1 /*
2 * Copyright © 2018, VideoLAN and dav1d authors
3 * Copyright © 2018, Two Orioles, LLC
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this
10 * list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 * this list of conditions and the following disclaimer in the documentation
14 * and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include "config.h"
29
30 #include <stdint.h>
31 #include <string.h>
32
33 #include "common/intops.h"
34
35 #include "src/wedge.h"
36
37 enum WedgeDirectionType {
38 WEDGE_HORIZONTAL = 0,
39 WEDGE_VERTICAL = 1,
40 WEDGE_OBLIQUE27 = 2,
41 WEDGE_OBLIQUE63 = 3,
42 WEDGE_OBLIQUE117 = 4,
43 WEDGE_OBLIQUE153 = 5,
44 N_WEDGE_DIRECTIONS
45 };
46
47 typedef struct {
48 uint8_t /* enum WedgeDirectionType */ direction;
49 uint8_t x_offset;
50 uint8_t y_offset;
51 } wedge_code_type;
52
53 static const wedge_code_type wedge_codebook_16_hgtw[16] = {
54 { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
55 { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
56 { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
57 { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
58 { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
59 { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
60 { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
61 { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
62 };
63
64 static const wedge_code_type wedge_codebook_16_hltw[16] = {
65 { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
66 { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
67 { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 4, 4 },
68 { WEDGE_VERTICAL, 6, 4 }, { WEDGE_HORIZONTAL, 4, 4 },
69 { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
70 { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
71 { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
72 { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
73 };
74
75 static const wedge_code_type wedge_codebook_16_heqw[16] = {
76 { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
77 { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
78 { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
79 { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 6, 4 },
80 { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
81 { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
82 { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
83 { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
84 };
85
86 static uint8_t ALIGN(wedge_masks_444_32x32[2 * 16 * 32 * 32], 64);
87 static uint8_t ALIGN(wedge_masks_444_32x16[2 * 16 * 32 * 16], 64);
88 static uint8_t ALIGN(wedge_masks_444_32x8 [2 * 16 * 32 * 8], 64);
89 static uint8_t ALIGN(wedge_masks_444_16x32[2 * 16 * 16 * 32], 64);
90 static uint8_t ALIGN(wedge_masks_444_16x16[2 * 16 * 16 * 16], 64);
91 static uint8_t ALIGN(wedge_masks_444_16x8 [2 * 16 * 16 * 8], 64);
92 static uint8_t ALIGN(wedge_masks_444_8x32 [2 * 16 * 8 * 32], 64);
93 static uint8_t ALIGN(wedge_masks_444_8x16 [2 * 16 * 8 * 16], 64);
94 static uint8_t ALIGN(wedge_masks_444_8x8 [2 * 16 * 8 * 8], 64);
95
96 static uint8_t ALIGN(wedge_masks_422_16x32[2 * 16 * 16 * 32], 64);
97 static uint8_t ALIGN(wedge_masks_422_16x16[2 * 16 * 16 * 16], 64);
98 static uint8_t ALIGN(wedge_masks_422_16x8 [2 * 16 * 16 * 8], 64);
99 static uint8_t ALIGN(wedge_masks_422_8x32 [2 * 16 * 8 * 32], 64);
100 static uint8_t ALIGN(wedge_masks_422_8x16 [2 * 16 * 8 * 16], 64);
101 static uint8_t ALIGN(wedge_masks_422_8x8 [2 * 16 * 8 * 8], 64);
102 static uint8_t ALIGN(wedge_masks_422_4x32 [2 * 16 * 4 * 32], 64);
103 static uint8_t ALIGN(wedge_masks_422_4x16 [2 * 16 * 4 * 16], 64);
104 static uint8_t ALIGN(wedge_masks_422_4x8 [2 * 16 * 4 * 8], 32);
105
106 static uint8_t ALIGN(wedge_masks_420_16x16[2 * 16 * 16 * 16], 64);
107 static uint8_t ALIGN(wedge_masks_420_16x8 [2 * 16 * 16 * 8], 64);
108 static uint8_t ALIGN(wedge_masks_420_16x4 [2 * 16 * 16 * 4], 64);
109 static uint8_t ALIGN(wedge_masks_420_8x16 [2 * 16 * 8 * 16], 64);
110 static uint8_t ALIGN(wedge_masks_420_8x8 [2 * 16 * 8 * 8], 64);
111 static uint8_t ALIGN(wedge_masks_420_8x4 [2 * 16 * 8 * 4], 64);
112 static uint8_t ALIGN(wedge_masks_420_4x16 [2 * 16 * 4 * 16], 64);
113 static uint8_t ALIGN(wedge_masks_420_4x8 [2 * 16 * 4 * 8], 32);
114 static uint8_t ALIGN(wedge_masks_420_4x4 [2 * 16 * 4 * 4], 16);
115
116 const uint8_t *dav1d_wedge_masks[N_BS_SIZES][3][2][16];
117
insert_border(uint8_t * const dst,const uint8_t * const src,const int ctr)118 static void insert_border(uint8_t *const dst, const uint8_t *const src,
119 const int ctr)
120 {
121 if (ctr > 4) memset(dst, 0, ctr - 4);
122 memcpy(dst + imax(ctr, 4) - 4, src + imax(4 - ctr, 0), imin(64 - ctr, 8));
123 if (ctr < 64 - 4)
124 memset(dst + ctr + 4, 64, 64 - 4 - ctr);
125 }
126
transpose(uint8_t * const dst,const uint8_t * const src)127 static void transpose(uint8_t *const dst, const uint8_t *const src) {
128 for (int y = 0, y_off = 0; y < 64; y++, y_off += 64)
129 for (int x = 0, x_off = 0; x < 64; x++, x_off += 64)
130 dst[x_off + y] = src[y_off + x];
131 }
132
hflip(uint8_t * const dst,const uint8_t * const src)133 static void hflip(uint8_t *const dst, const uint8_t *const src) {
134 for (int y = 0, y_off = 0; y < 64; y++, y_off += 64)
135 for (int x = 0; x < 64; x++)
136 dst[y_off + 64 - 1 - x] = src[y_off + x];
137 }
138
invert(uint8_t * const dst,const uint8_t * const src,const int w,const int h)139 static void invert(uint8_t *const dst, const uint8_t *const src,
140 const int w, const int h)
141 {
142 for (int y = 0, y_off = 0; y < h; y++, y_off += w)
143 for (int x = 0; x < w; x++)
144 dst[y_off + x] = 64 - src[y_off + x];
145 }
146
copy2d(uint8_t * dst,const uint8_t * src,const int w,const int h,const int x_off,const int y_off)147 static void copy2d(uint8_t *dst, const uint8_t *src,
148 const int w, const int h, const int x_off, const int y_off)
149 {
150 src += y_off * 64 + x_off;
151 for (int y = 0; y < h; y++) {
152 memcpy(dst, src, w);
153 src += 64;
154 dst += w;
155 }
156 }
157
init_chroma(uint8_t * chroma,const uint8_t * luma,const int sign,const int w,const int h,const int ss_ver)158 static COLD void init_chroma(uint8_t *chroma, const uint8_t *luma,
159 const int sign, const int w, const int h,
160 const int ss_ver)
161 {
162 for (int y = 0; y < h; y += 1 + ss_ver) {
163 for (int x = 0; x < w; x += 2) {
164 int sum = luma[x] + luma[x + 1] + 1;
165 if (ss_ver) sum += luma[w + x] + luma[w + x + 1] + 1;
166 chroma[x >> 1] = (sum - sign) >> (1 + ss_ver);
167 }
168 luma += w << ss_ver;
169 chroma += w >> 1;
170 }
171 }
172
fill2d_16x2(uint8_t * dst,const int w,const int h,const enum BlockSize bs,const uint8_t (* const master)[64* 64],const wedge_code_type * const cb,uint8_t * masks_444,uint8_t * masks_422,uint8_t * masks_420,const unsigned signs)173 static COLD void fill2d_16x2(uint8_t *dst, const int w, const int h,
174 const enum BlockSize bs,
175 const uint8_t (*const master)[64 * 64],
176 const wedge_code_type *const cb,
177 uint8_t *masks_444, uint8_t *masks_422,
178 uint8_t *masks_420, const unsigned signs)
179 {
180 uint8_t *ptr = dst;
181 for (int n = 0; n < 16; n++) {
182 copy2d(ptr, master[cb[n].direction], w, h,
183 32 - (w * cb[n].x_offset >> 3), 32 - (h * cb[n].y_offset >> 3));
184 ptr += w * h;
185 }
186 for (int n = 0, off = 0; n < 16; n++, off += w * h)
187 invert(ptr + off, dst + off, w, h);
188
189 const int n_stride_444 = (w * h);
190 const int n_stride_422 = n_stride_444 >> 1;
191 const int n_stride_420 = n_stride_444 >> 2;
192 const int sign_stride_444 = 16 * n_stride_444;
193 const int sign_stride_422 = 16 * n_stride_422;
194 const int sign_stride_420 = 16 * n_stride_420;
195 // assign pointers in externally visible array
196 for (int n = 0; n < 16; n++) {
197 const int sign = (signs >> n) & 1;
198 dav1d_wedge_masks[bs][0][0][n] = &masks_444[ sign * sign_stride_444];
199 // not using !sign is intentional here, since 444 does not require
200 // any rounding since no chroma subsampling is applied.
201 dav1d_wedge_masks[bs][0][1][n] = &masks_444[ sign * sign_stride_444];
202 dav1d_wedge_masks[bs][1][0][n] = &masks_422[ sign * sign_stride_422];
203 dav1d_wedge_masks[bs][1][1][n] = &masks_422[!sign * sign_stride_422];
204 dav1d_wedge_masks[bs][2][0][n] = &masks_420[ sign * sign_stride_420];
205 dav1d_wedge_masks[bs][2][1][n] = &masks_420[!sign * sign_stride_420];
206 masks_444 += n_stride_444;
207 masks_422 += n_stride_422;
208 masks_420 += n_stride_420;
209
210 // since the pointers come from inside, we know that
211 // violation of the const is OK here. Any other approach
212 // means we would have to duplicate the sign correction
213 // logic in two places, which isn't very nice, or mark
214 // the table faced externally as non-const, which also sucks
215 init_chroma((uint8_t *)dav1d_wedge_masks[bs][1][0][n],
216 dav1d_wedge_masks[bs][0][0][n], 0, w, h, 0);
217 init_chroma((uint8_t *)dav1d_wedge_masks[bs][1][1][n],
218 dav1d_wedge_masks[bs][0][0][n], 1, w, h, 0);
219 init_chroma((uint8_t *)dav1d_wedge_masks[bs][2][0][n],
220 dav1d_wedge_masks[bs][0][0][n], 0, w, h, 1);
221 init_chroma((uint8_t *)dav1d_wedge_masks[bs][2][1][n],
222 dav1d_wedge_masks[bs][0][0][n], 1, w, h, 1);
223 }
224 }
225
dav1d_init_wedge_masks(void)226 COLD void dav1d_init_wedge_masks(void) {
227 // This function is guaranteed to be called only once
228
229 enum WedgeMasterLineType {
230 WEDGE_MASTER_LINE_ODD,
231 WEDGE_MASTER_LINE_EVEN,
232 WEDGE_MASTER_LINE_VERT,
233 N_WEDGE_MASTER_LINES,
234 };
235 static const uint8_t wedge_master_border[N_WEDGE_MASTER_LINES][8] = {
236 [WEDGE_MASTER_LINE_ODD] = { 1, 2, 6, 18, 37, 53, 60, 63 },
237 [WEDGE_MASTER_LINE_EVEN] = { 1, 4, 11, 27, 46, 58, 62, 63 },
238 [WEDGE_MASTER_LINE_VERT] = { 0, 2, 7, 21, 43, 57, 62, 64 },
239 };
240 uint8_t master[6][64 * 64];
241
242 // create master templates
243 for (int y = 0, off = 0; y < 64; y++, off += 64)
244 insert_border(&master[WEDGE_VERTICAL][off],
245 wedge_master_border[WEDGE_MASTER_LINE_VERT], 32);
246 for (int y = 0, off = 0, ctr = 48; y < 64; y += 2, off += 128, ctr--)
247 {
248 insert_border(&master[WEDGE_OBLIQUE63][off],
249 wedge_master_border[WEDGE_MASTER_LINE_EVEN], ctr);
250 insert_border(&master[WEDGE_OBLIQUE63][off + 64],
251 wedge_master_border[WEDGE_MASTER_LINE_ODD], ctr - 1);
252 }
253
254 transpose(master[WEDGE_OBLIQUE27], master[WEDGE_OBLIQUE63]);
255 transpose(master[WEDGE_HORIZONTAL], master[WEDGE_VERTICAL]);
256 hflip(master[WEDGE_OBLIQUE117], master[WEDGE_OBLIQUE63]);
257 hflip(master[WEDGE_OBLIQUE153], master[WEDGE_OBLIQUE27]);
258
259 #define fill(w, h, sz_422, sz_420, hvsw, signs) \
260 fill2d_16x2((uint8_t *) wedge_masks_444_##w##x##h, w, h, BS_##w##x##h, \
261 master, wedge_codebook_16_##hvsw, wedge_masks_444_##w##x##h, \
262 wedge_masks_422_##sz_422, wedge_masks_420_##sz_420, signs)
263
264 fill(32, 32, 16x32, 16x16, heqw, 0x7bfb);
265 fill(32, 16, 16x16, 16x8, hltw, 0x7beb);
266 fill(32, 8, 16x8, 16x4, hltw, 0x6beb);
267 fill(16, 32, 8x32, 8x16, hgtw, 0x7beb);
268 fill(16, 16, 8x16, 8x8, heqw, 0x7bfb);
269 fill(16, 8, 8x8, 8x4, hltw, 0x7beb);
270 fill( 8, 32, 4x32, 4x16, hgtw, 0x7aeb);
271 fill( 8, 16, 4x16, 4x8, hgtw, 0x7beb);
272 fill( 8, 8, 4x8, 4x4, heqw, 0x7bfb);
273 #undef fill
274 }
275
276 #define N_II_PRED_MODES (N_INTER_INTRA_PRED_MODES - 1)
277 static uint8_t ALIGN(ii_dc_mask[32 * 32], 64);
278 static uint8_t ALIGN(ii_nondc_mask_32x32[N_II_PRED_MODES][32 * 32], 64);
279 static uint8_t ALIGN(ii_nondc_mask_16x32[N_II_PRED_MODES][16 * 32], 64);
280 static uint8_t ALIGN(ii_nondc_mask_16x16[N_II_PRED_MODES][16 * 16], 64);
281 static uint8_t ALIGN(ii_nondc_mask_8x32 [N_II_PRED_MODES][ 8 * 32], 64);
282 static uint8_t ALIGN(ii_nondc_mask_8x16 [N_II_PRED_MODES][ 8 * 16], 64);
283 static uint8_t ALIGN(ii_nondc_mask_8x8 [N_II_PRED_MODES][ 8 * 8], 64);
284 static uint8_t ALIGN(ii_nondc_mask_4x16 [N_II_PRED_MODES][ 4 * 16], 64);
285 static uint8_t ALIGN(ii_nondc_mask_4x8 [N_II_PRED_MODES][ 4 * 8], 32);
286 static uint8_t ALIGN(ii_nondc_mask_4x4 [N_II_PRED_MODES][ 4 * 4], 16);
287 #undef N_II_PRED_MODES
288
289 #define set1(sz) \
290 [II_DC_PRED] = ii_dc_mask, \
291 [II_VERT_PRED] = ii_nondc_mask_##sz[II_VERT_PRED - 1], \
292 [II_HOR_PRED] = ii_nondc_mask_##sz[II_HOR_PRED - 1], \
293 [II_SMOOTH_PRED] = ii_nondc_mask_##sz[II_SMOOTH_PRED - 1]
294 #define set(sz_444, sz_422, sz_420) \
295 { { set1(sz_444) }, { set1(sz_422) }, { set1(sz_420) } }
296 const uint8_t *dav1d_ii_masks[N_BS_SIZES][3][N_INTER_INTRA_PRED_MODES] = {
297 [BS_8x8] = set( 8x8, 4x8, 4x4),
298 [BS_8x16] = set( 8x16, 4x16, 4x8),
299 [BS_16x8] = set(16x16, 8x8, 8x8),
300 [BS_16x16] = set(16x16, 8x16, 8x8),
301 [BS_16x32] = set(16x32, 8x32, 8x16),
302 [BS_32x16] = set(32x32, 16x16, 16x16),
303 [BS_32x32] = set(32x32, 16x32, 16x16),
304 };
305 #undef set
306 #undef set1
307
build_nondc_ii_masks(uint8_t * const mask_v,uint8_t * const mask_h,uint8_t * const mask_sm,const int w,const int h,const int step)308 static COLD void build_nondc_ii_masks(uint8_t *const mask_v,
309 uint8_t *const mask_h,
310 uint8_t *const mask_sm,
311 const int w, const int h, const int step)
312 {
313 static const uint8_t ii_weights_1d[] = {
314 60, 52, 45, 39, 34, 30, 26, 22, 19, 17, 15, 13, 11, 10, 8, 7,
315 6, 6, 5, 4, 4, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 1,
316 };
317
318 for (int y = 0, off = 0; y < h; y++, off += w) {
319 memset(&mask_v[off], ii_weights_1d[y * step], w);
320 for (int x = 0; x < w; x++) {
321 mask_sm[off + x] = ii_weights_1d[imin(x, y) * step];
322 mask_h[off + x] = ii_weights_1d[x * step];
323 }
324 }
325 }
326
dav1d_init_interintra_masks(void)327 COLD void dav1d_init_interintra_masks(void) {
328 // This function is guaranteed to be called only once
329
330 memset(ii_dc_mask, 32, 32 * 32);
331 #define set(a) a[II_VERT_PRED - 1], a[II_HOR_PRED - 1], a[II_SMOOTH_PRED - 1]
332 build_nondc_ii_masks(set(ii_nondc_mask_32x32), 32, 32, 1);
333 build_nondc_ii_masks(set(ii_nondc_mask_16x32), 16, 32, 1);
334 build_nondc_ii_masks(set(ii_nondc_mask_16x16), 16, 16, 2);
335 build_nondc_ii_masks(set(ii_nondc_mask_8x32), 8, 32, 1);
336 build_nondc_ii_masks(set(ii_nondc_mask_8x16), 8, 16, 2);
337 build_nondc_ii_masks(set(ii_nondc_mask_8x8), 8, 8, 4);
338 build_nondc_ii_masks(set(ii_nondc_mask_4x16), 4, 16, 2);
339 build_nondc_ii_masks(set(ii_nondc_mask_4x8), 4, 8, 4);
340 build_nondc_ii_masks(set(ii_nondc_mask_4x4), 4, 4, 8);
341 #undef set
342 }
343