1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <math.h>
13
14 #include "config/aom_config.h"
15 #include "config/aom_dsp_rtcd.h"
16 #include "config/av1_rtcd.h"
17
18 #include "aom_dsp/aom_dsp_common.h"
19 #include "aom_mem/aom_mem.h"
20 #include "aom_ports/aom_once.h"
21 #include "aom_ports/mem.h"
22 #include "aom_ports/system_state.h"
23 #include "av1/common/reconintra.h"
24 #include "av1/common/onyxc_int.h"
25 #include "av1/common/cfl.h"
26
27 enum {
28 NEED_LEFT = 1 << 1,
29 NEED_ABOVE = 1 << 2,
30 NEED_ABOVERIGHT = 1 << 3,
31 NEED_ABOVELEFT = 1 << 4,
32 NEED_BOTTOMLEFT = 1 << 5,
33 };
34
35 #define INTRA_EDGE_FILT 3
36 #define INTRA_EDGE_TAPS 5
37 #define MAX_UPSAMPLE_SZ 16
38
39 static const uint8_t extend_modes[INTRA_MODES] = {
40 NEED_ABOVE | NEED_LEFT, // DC
41 NEED_ABOVE, // V
42 NEED_LEFT, // H
43 NEED_ABOVE | NEED_ABOVERIGHT, // D45
44 NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D135
45 NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D113
46 NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D157
47 NEED_LEFT | NEED_BOTTOMLEFT, // D203
48 NEED_ABOVE | NEED_ABOVERIGHT, // D67
49 NEED_LEFT | NEED_ABOVE, // SMOOTH
50 NEED_LEFT | NEED_ABOVE, // SMOOTH_V
51 NEED_LEFT | NEED_ABOVE, // SMOOTH_H
52 NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // PAETH
53 };
54
55 // Tables to store if the top-right reference pixels are available. The flags
56 // are represented with bits, packed into 8-bit integers. E.g., for the 32x32
57 // blocks in a 128x128 superblock, the index of the "o" block is 10 (in raster
58 // order), so its flag is stored at the 3rd bit of the 2nd entry in the table,
59 // i.e. (table[10 / 8] >> (10 % 8)) & 1.
60 // . . . .
61 // . . . .
62 // . . o .
63 // . . . .
64 static uint8_t has_tr_4x4[128] = {
65 255, 255, 255, 255, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
66 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
67 255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
68 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
69 255, 255, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
70 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
71 255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
72 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
73 };
74 static uint8_t has_tr_4x8[64] = {
75 255, 255, 255, 255, 119, 119, 119, 119, 127, 127, 127, 127, 119,
76 119, 119, 119, 255, 127, 255, 127, 119, 119, 119, 119, 127, 127,
77 127, 127, 119, 119, 119, 119, 255, 255, 255, 127, 119, 119, 119,
78 119, 127, 127, 127, 127, 119, 119, 119, 119, 255, 127, 255, 127,
79 119, 119, 119, 119, 127, 127, 127, 127, 119, 119, 119, 119,
80 };
81 static uint8_t has_tr_8x4[64] = {
82 255, 255, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
83 127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
84 255, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
85 127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
86 };
87 static uint8_t has_tr_8x8[32] = {
88 255, 255, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
89 255, 127, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
90 };
91 static uint8_t has_tr_8x16[16] = {
92 255, 255, 119, 119, 127, 127, 119, 119,
93 255, 127, 119, 119, 127, 127, 119, 119,
94 };
95 static uint8_t has_tr_16x8[16] = {
96 255, 0, 85, 0, 119, 0, 85, 0, 127, 0, 85, 0, 119, 0, 85, 0,
97 };
98 static uint8_t has_tr_16x16[8] = {
99 255, 85, 119, 85, 127, 85, 119, 85,
100 };
101 static uint8_t has_tr_16x32[4] = { 255, 119, 127, 119 };
102 static uint8_t has_tr_32x16[4] = { 15, 5, 7, 5 };
103 static uint8_t has_tr_32x32[2] = { 95, 87 };
104 static uint8_t has_tr_32x64[1] = { 127 };
105 static uint8_t has_tr_64x32[1] = { 19 };
106 static uint8_t has_tr_64x64[1] = { 7 };
107 static uint8_t has_tr_64x128[1] = { 3 };
108 static uint8_t has_tr_128x64[1] = { 1 };
109 static uint8_t has_tr_128x128[1] = { 1 };
110 static uint8_t has_tr_4x16[32] = {
111 255, 255, 255, 255, 127, 127, 127, 127, 255, 127, 255,
112 127, 127, 127, 127, 127, 255, 255, 255, 127, 127, 127,
113 127, 127, 255, 127, 255, 127, 127, 127, 127, 127,
114 };
115 static uint8_t has_tr_16x4[32] = {
116 255, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
117 127, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
118 };
119 static uint8_t has_tr_8x32[8] = {
120 255, 255, 127, 127, 255, 127, 127, 127,
121 };
122 static uint8_t has_tr_32x8[8] = {
123 15, 0, 5, 0, 7, 0, 5, 0,
124 };
125 static uint8_t has_tr_16x64[2] = { 255, 127 };
126 static uint8_t has_tr_64x16[2] = { 3, 1 };
127
128 static const uint8_t *const has_tr_tables[BLOCK_SIZES_ALL] = {
129 // 4X4
130 has_tr_4x4,
131 // 4X8, 8X4, 8X8
132 has_tr_4x8, has_tr_8x4, has_tr_8x8,
133 // 8X16, 16X8, 16X16
134 has_tr_8x16, has_tr_16x8, has_tr_16x16,
135 // 16X32, 32X16, 32X32
136 has_tr_16x32, has_tr_32x16, has_tr_32x32,
137 // 32X64, 64X32, 64X64
138 has_tr_32x64, has_tr_64x32, has_tr_64x64,
139 // 64x128, 128x64, 128x128
140 has_tr_64x128, has_tr_128x64, has_tr_128x128,
141 // 4x16, 16x4, 8x32
142 has_tr_4x16, has_tr_16x4, has_tr_8x32,
143 // 32x8, 16x64, 64x16
144 has_tr_32x8, has_tr_16x64, has_tr_64x16
145 };
146
147 static uint8_t has_tr_vert_8x8[32] = {
148 255, 255, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
149 255, 127, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
150 };
151 static uint8_t has_tr_vert_16x16[8] = {
152 255, 0, 119, 0, 127, 0, 119, 0,
153 };
154 static uint8_t has_tr_vert_32x32[2] = { 15, 7 };
155 static uint8_t has_tr_vert_64x64[1] = { 3 };
156
157 // The _vert_* tables are like the ordinary tables above, but describe the
158 // order we visit square blocks when doing a PARTITION_VERT_A or
159 // PARTITION_VERT_B. This is the same order as normal except for on the last
160 // split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
161 // as a pair of squares, which means that these tables work correctly for both
162 // mixed vertical partition types.
163 //
164 // There are tables for each of the square sizes. Vertical rectangles (like
165 // BLOCK_16X32) use their respective "non-vert" table
166 static const uint8_t *const has_tr_vert_tables[BLOCK_SIZES] = {
167 // 4X4
168 NULL,
169 // 4X8, 8X4, 8X8
170 has_tr_4x8, NULL, has_tr_vert_8x8,
171 // 8X16, 16X8, 16X16
172 has_tr_8x16, NULL, has_tr_vert_16x16,
173 // 16X32, 32X16, 32X32
174 has_tr_16x32, NULL, has_tr_vert_32x32,
175 // 32X64, 64X32, 64X64
176 has_tr_32x64, NULL, has_tr_vert_64x64,
177 // 64x128, 128x64, 128x128
178 has_tr_64x128, NULL, has_tr_128x128
179 };
180
get_has_tr_table(PARTITION_TYPE partition,BLOCK_SIZE bsize)181 static const uint8_t *get_has_tr_table(PARTITION_TYPE partition,
182 BLOCK_SIZE bsize) {
183 const uint8_t *ret = NULL;
184 // If this is a mixed vertical partition, look up bsize in orders_vert.
185 if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
186 assert(bsize < BLOCK_SIZES);
187 ret = has_tr_vert_tables[bsize];
188 } else {
189 ret = has_tr_tables[bsize];
190 }
191 assert(ret);
192 return ret;
193 }
194
has_top_right(const AV1_COMMON * cm,BLOCK_SIZE bsize,int mi_row,int mi_col,int top_available,int right_available,PARTITION_TYPE partition,TX_SIZE txsz,int row_off,int col_off,int ss_x,int ss_y)195 static int has_top_right(const AV1_COMMON *cm, BLOCK_SIZE bsize, int mi_row,
196 int mi_col, int top_available, int right_available,
197 PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
198 int col_off, int ss_x, int ss_y) {
199 if (!top_available || !right_available) return 0;
200
201 const int bw_unit = block_size_wide[bsize] >> tx_size_wide_log2[0];
202 const int plane_bw_unit = AOMMAX(bw_unit >> ss_x, 1);
203 const int top_right_count_unit = tx_size_wide_unit[txsz];
204
205 if (row_off > 0) { // Just need to check if enough pixels on the right.
206 if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64]) {
207 // Special case: For 128x128 blocks, the transform unit whose
208 // top-right corner is at the center of the block does in fact have
209 // pixels available at its top-right corner.
210 if (row_off == mi_size_high[BLOCK_64X64] >> ss_y &&
211 col_off + top_right_count_unit == mi_size_wide[BLOCK_64X64] >> ss_x) {
212 return 1;
213 }
214 const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
215 const int col_off_64 = col_off % plane_bw_unit_64;
216 return col_off_64 + top_right_count_unit < plane_bw_unit_64;
217 }
218 return col_off + top_right_count_unit < plane_bw_unit;
219 } else {
220 // All top-right pixels are in the block above, which is already available.
221 if (col_off + top_right_count_unit < plane_bw_unit) return 1;
222
223 const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
224 const int bh_in_mi_log2 = mi_size_high_log2[bsize];
225 const int sb_mi_size = mi_size_high[cm->seq_params.sb_size];
226 const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
227 const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
228
229 // Top row of superblock: so top-right pixels are in the top and/or
230 // top-right superblocks, both of which are already available.
231 if (blk_row_in_sb == 0) return 1;
232
233 // Rightmost column of superblock (and not the top row): so top-right pixels
234 // fall in the right superblock, which is not available yet.
235 if (((blk_col_in_sb + 1) << bw_in_mi_log2) >= sb_mi_size) {
236 return 0;
237 }
238
239 // General case (neither top row nor rightmost column): check if the
240 // top-right block is coded before the current block.
241 const int this_blk_index =
242 ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
243 blk_col_in_sb + 0;
244 const int idx1 = this_blk_index / 8;
245 const int idx2 = this_blk_index % 8;
246 const uint8_t *has_tr_table = get_has_tr_table(partition, bsize);
247 return (has_tr_table[idx1] >> idx2) & 1;
248 }
249 }
250
251 // Similar to the has_tr_* tables, but store if the bottom-left reference
252 // pixels are available.
253 static uint8_t has_bl_4x4[128] = {
254 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85, 85,
255 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 1, 0, 84, 85, 85, 85, 16, 17,
256 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85, 85, 85, 16, 17, 17, 17, 84,
257 85, 85, 85, 0, 0, 0, 0, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85,
258 0, 1, 1, 1, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 1,
259 0, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85,
260 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 0, 0,
261 };
262 static uint8_t has_bl_4x8[64] = {
263 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
264 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
265 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
266 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
267 };
268 static uint8_t has_bl_8x4[64] = {
269 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
270 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
271 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
272 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
273 };
274 static uint8_t has_bl_8x8[32] = {
275 84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
276 84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
277 };
278 static uint8_t has_bl_8x16[16] = {
279 16, 17, 0, 1, 16, 17, 0, 0, 16, 17, 0, 1, 16, 17, 0, 0,
280 };
281 static uint8_t has_bl_16x8[16] = {
282 254, 84, 254, 16, 254, 84, 254, 0, 254, 84, 254, 16, 254, 84, 254, 0,
283 };
284 static uint8_t has_bl_16x16[8] = {
285 84, 16, 84, 0, 84, 16, 84, 0,
286 };
287 static uint8_t has_bl_16x32[4] = { 16, 0, 16, 0 };
288 static uint8_t has_bl_32x16[4] = { 78, 14, 78, 14 };
289 static uint8_t has_bl_32x32[2] = { 4, 4 };
290 static uint8_t has_bl_32x64[1] = { 0 };
291 static uint8_t has_bl_64x32[1] = { 34 };
292 static uint8_t has_bl_64x64[1] = { 0 };
293 static uint8_t has_bl_64x128[1] = { 0 };
294 static uint8_t has_bl_128x64[1] = { 0 };
295 static uint8_t has_bl_128x128[1] = { 0 };
296 static uint8_t has_bl_4x16[32] = {
297 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
298 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
299 };
300 static uint8_t has_bl_16x4[32] = {
301 254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
302 254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
303 };
304 static uint8_t has_bl_8x32[8] = {
305 0, 1, 0, 0, 0, 1, 0, 0,
306 };
307 static uint8_t has_bl_32x8[8] = {
308 238, 78, 238, 14, 238, 78, 238, 14,
309 };
310 static uint8_t has_bl_16x64[2] = { 0, 0 };
311 static uint8_t has_bl_64x16[2] = { 42, 42 };
312
313 static const uint8_t *const has_bl_tables[BLOCK_SIZES_ALL] = {
314 // 4X4
315 has_bl_4x4,
316 // 4X8, 8X4, 8X8
317 has_bl_4x8, has_bl_8x4, has_bl_8x8,
318 // 8X16, 16X8, 16X16
319 has_bl_8x16, has_bl_16x8, has_bl_16x16,
320 // 16X32, 32X16, 32X32
321 has_bl_16x32, has_bl_32x16, has_bl_32x32,
322 // 32X64, 64X32, 64X64
323 has_bl_32x64, has_bl_64x32, has_bl_64x64,
324 // 64x128, 128x64, 128x128
325 has_bl_64x128, has_bl_128x64, has_bl_128x128,
326 // 4x16, 16x4, 8x32
327 has_bl_4x16, has_bl_16x4, has_bl_8x32,
328 // 32x8, 16x64, 64x16
329 has_bl_32x8, has_bl_16x64, has_bl_64x16
330 };
331
332 static uint8_t has_bl_vert_8x8[32] = {
333 254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
334 254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
335 };
336 static uint8_t has_bl_vert_16x16[8] = {
337 254, 16, 254, 0, 254, 16, 254, 0,
338 };
339 static uint8_t has_bl_vert_32x32[2] = { 14, 14 };
340 static uint8_t has_bl_vert_64x64[1] = { 2 };
341
342 // The _vert_* tables are like the ordinary tables above, but describe the
343 // order we visit square blocks when doing a PARTITION_VERT_A or
344 // PARTITION_VERT_B. This is the same order as normal except for on the last
345 // split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
346 // as a pair of squares, which means that these tables work correctly for both
347 // mixed vertical partition types.
348 //
349 // There are tables for each of the square sizes. Vertical rectangles (like
350 // BLOCK_16X32) use their respective "non-vert" table
351 static const uint8_t *const has_bl_vert_tables[BLOCK_SIZES] = {
352 // 4X4
353 NULL,
354 // 4X8, 8X4, 8X8
355 has_bl_4x8, NULL, has_bl_vert_8x8,
356 // 8X16, 16X8, 16X16
357 has_bl_8x16, NULL, has_bl_vert_16x16,
358 // 16X32, 32X16, 32X32
359 has_bl_16x32, NULL, has_bl_vert_32x32,
360 // 32X64, 64X32, 64X64
361 has_bl_32x64, NULL, has_bl_vert_64x64,
362 // 64x128, 128x64, 128x128
363 has_bl_64x128, NULL, has_bl_128x128
364 };
365
get_has_bl_table(PARTITION_TYPE partition,BLOCK_SIZE bsize)366 static const uint8_t *get_has_bl_table(PARTITION_TYPE partition,
367 BLOCK_SIZE bsize) {
368 const uint8_t *ret = NULL;
369 // If this is a mixed vertical partition, look up bsize in orders_vert.
370 if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
371 assert(bsize < BLOCK_SIZES);
372 ret = has_bl_vert_tables[bsize];
373 } else {
374 ret = has_bl_tables[bsize];
375 }
376 assert(ret);
377 return ret;
378 }
379
has_bottom_left(const AV1_COMMON * cm,BLOCK_SIZE bsize,int mi_row,int mi_col,int bottom_available,int left_available,PARTITION_TYPE partition,TX_SIZE txsz,int row_off,int col_off,int ss_x,int ss_y)380 static int has_bottom_left(const AV1_COMMON *cm, BLOCK_SIZE bsize, int mi_row,
381 int mi_col, int bottom_available, int left_available,
382 PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
383 int col_off, int ss_x, int ss_y) {
384 if (!bottom_available || !left_available) return 0;
385
386 // Special case for 128x* blocks, when col_off is half the block width.
387 // This is needed because 128x* superblocks are divided into 64x* blocks in
388 // raster order
389 if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64] && col_off > 0) {
390 const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
391 const int col_off_64 = col_off % plane_bw_unit_64;
392 if (col_off_64 == 0) {
393 // We are at the left edge of top-right or bottom-right 64x* block.
394 const int plane_bh_unit_64 = mi_size_high[BLOCK_64X64] >> ss_y;
395 const int row_off_64 = row_off % plane_bh_unit_64;
396 const int plane_bh_unit =
397 AOMMIN(mi_size_high[bsize] >> ss_y, plane_bh_unit_64);
398 // Check if all bottom-left pixels are in the left 64x* block (which is
399 // already coded).
400 return row_off_64 + tx_size_high_unit[txsz] < plane_bh_unit;
401 }
402 }
403
404 if (col_off > 0) {
405 // Bottom-left pixels are in the bottom-left block, which is not available.
406 return 0;
407 } else {
408 const int bh_unit = block_size_high[bsize] >> tx_size_high_log2[0];
409 const int plane_bh_unit = AOMMAX(bh_unit >> ss_y, 1);
410 const int bottom_left_count_unit = tx_size_high_unit[txsz];
411
412 // All bottom-left pixels are in the left block, which is already available.
413 if (row_off + bottom_left_count_unit < plane_bh_unit) return 1;
414
415 const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
416 const int bh_in_mi_log2 = mi_size_high_log2[bsize];
417 const int sb_mi_size = mi_size_high[cm->seq_params.sb_size];
418 const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
419 const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
420
421 // Leftmost column of superblock: so bottom-left pixels maybe in the left
422 // and/or bottom-left superblocks. But only the left superblock is
423 // available, so check if all required pixels fall in that superblock.
424 if (blk_col_in_sb == 0) {
425 const int blk_start_row_off = blk_row_in_sb
426 << (bh_in_mi_log2 + MI_SIZE_LOG2 -
427 tx_size_wide_log2[0]) >>
428 ss_y;
429 const int row_off_in_sb = blk_start_row_off + row_off;
430 const int sb_height_unit = sb_mi_size >> ss_y;
431 return row_off_in_sb + bottom_left_count_unit < sb_height_unit;
432 }
433
434 // Bottom row of superblock (and not the leftmost column): so bottom-left
435 // pixels fall in the bottom superblock, which is not available yet.
436 if (((blk_row_in_sb + 1) << bh_in_mi_log2) >= sb_mi_size) return 0;
437
438 // General case (neither leftmost column nor bottom row): check if the
439 // bottom-left block is coded before the current block.
440 const int this_blk_index =
441 ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
442 blk_col_in_sb + 0;
443 const int idx1 = this_blk_index / 8;
444 const int idx2 = this_blk_index % 8;
445 const uint8_t *has_bl_table = get_has_bl_table(partition, bsize);
446 return (has_bl_table[idx1] >> idx2) & 1;
447 }
448 }
449
450 typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
451 const uint8_t *above, const uint8_t *left);
452
453 static intra_pred_fn pred[INTRA_MODES][TX_SIZES_ALL];
454 static intra_pred_fn dc_pred[2][2][TX_SIZES_ALL];
455
456 typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride,
457 const uint16_t *above, const uint16_t *left,
458 int bd);
459 static intra_high_pred_fn pred_high[INTRA_MODES][TX_SIZES_ALL];
460 static intra_high_pred_fn dc_pred_high[2][2][TX_SIZES_ALL];
461
init_intra_predictors_internal(void)462 static void init_intra_predictors_internal(void) {
463 assert(NELEMENTS(mode_to_angle_map) == INTRA_MODES);
464
465 #define INIT_RECTANGULAR(p, type) \
466 p[TX_4X8] = aom_##type##_predictor_4x8; \
467 p[TX_8X4] = aom_##type##_predictor_8x4; \
468 p[TX_8X16] = aom_##type##_predictor_8x16; \
469 p[TX_16X8] = aom_##type##_predictor_16x8; \
470 p[TX_16X32] = aom_##type##_predictor_16x32; \
471 p[TX_32X16] = aom_##type##_predictor_32x16; \
472 p[TX_32X64] = aom_##type##_predictor_32x64; \
473 p[TX_64X32] = aom_##type##_predictor_64x32; \
474 p[TX_4X16] = aom_##type##_predictor_4x16; \
475 p[TX_16X4] = aom_##type##_predictor_16x4; \
476 p[TX_8X32] = aom_##type##_predictor_8x32; \
477 p[TX_32X8] = aom_##type##_predictor_32x8; \
478 p[TX_16X64] = aom_##type##_predictor_16x64; \
479 p[TX_64X16] = aom_##type##_predictor_64x16;
480
481 #define INIT_NO_4X4(p, type) \
482 p[TX_8X8] = aom_##type##_predictor_8x8; \
483 p[TX_16X16] = aom_##type##_predictor_16x16; \
484 p[TX_32X32] = aom_##type##_predictor_32x32; \
485 p[TX_64X64] = aom_##type##_predictor_64x64; \
486 INIT_RECTANGULAR(p, type)
487
488 #define INIT_ALL_SIZES(p, type) \
489 p[TX_4X4] = aom_##type##_predictor_4x4; \
490 INIT_NO_4X4(p, type)
491
492 INIT_ALL_SIZES(pred[V_PRED], v);
493 INIT_ALL_SIZES(pred[H_PRED], h);
494 INIT_ALL_SIZES(pred[PAETH_PRED], paeth);
495 INIT_ALL_SIZES(pred[SMOOTH_PRED], smooth);
496 INIT_ALL_SIZES(pred[SMOOTH_V_PRED], smooth_v);
497 INIT_ALL_SIZES(pred[SMOOTH_H_PRED], smooth_h);
498 INIT_ALL_SIZES(dc_pred[0][0], dc_128);
499 INIT_ALL_SIZES(dc_pred[0][1], dc_top);
500 INIT_ALL_SIZES(dc_pred[1][0], dc_left);
501 INIT_ALL_SIZES(dc_pred[1][1], dc);
502
503 INIT_ALL_SIZES(pred_high[V_PRED], highbd_v);
504 INIT_ALL_SIZES(pred_high[H_PRED], highbd_h);
505 INIT_ALL_SIZES(pred_high[PAETH_PRED], highbd_paeth);
506 INIT_ALL_SIZES(pred_high[SMOOTH_PRED], highbd_smooth);
507 INIT_ALL_SIZES(pred_high[SMOOTH_V_PRED], highbd_smooth_v);
508 INIT_ALL_SIZES(pred_high[SMOOTH_H_PRED], highbd_smooth_h);
509 INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128);
510 INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top);
511 INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left);
512 INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc);
513 #undef intra_pred_allsizes
514 }
515
516 // Directional prediction, zone 1: 0 < angle < 90
av1_dr_prediction_z1_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_above,int dx,int dy)517 void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
518 const uint8_t *above, const uint8_t *left,
519 int upsample_above, int dx, int dy) {
520 int r, c, x, base, shift, val;
521
522 (void)left;
523 (void)dy;
524 assert(dy == 1);
525 assert(dx > 0);
526
527 const int max_base_x = ((bw + bh) - 1) << upsample_above;
528 const int frac_bits = 6 - upsample_above;
529 const int base_inc = 1 << upsample_above;
530 x = dx;
531 for (r = 0; r < bh; ++r, dst += stride, x += dx) {
532 base = x >> frac_bits;
533 shift = ((x << upsample_above) & 0x3F) >> 1;
534
535 if (base >= max_base_x) {
536 for (int i = r; i < bh; ++i) {
537 memset(dst, above[max_base_x], bw * sizeof(dst[0]));
538 dst += stride;
539 }
540 return;
541 }
542
543 for (c = 0; c < bw; ++c, base += base_inc) {
544 if (base < max_base_x) {
545 val = above[base] * (32 - shift) + above[base + 1] * shift;
546 dst[c] = ROUND_POWER_OF_TWO(val, 5);
547 } else {
548 dst[c] = above[max_base_x];
549 }
550 }
551 }
552 }
553
554 // Directional prediction, zone 2: 90 < angle < 180
av1_dr_prediction_z2_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_above,int upsample_left,int dx,int dy)555 void av1_dr_prediction_z2_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
556 const uint8_t *above, const uint8_t *left,
557 int upsample_above, int upsample_left, int dx,
558 int dy) {
559 int r, c, x, y, shift1, shift2, val, base1, base2;
560
561 assert(dx > 0);
562 assert(dy > 0);
563
564 const int min_base_x = -(1 << upsample_above);
565 const int frac_bits_x = 6 - upsample_above;
566 const int frac_bits_y = 6 - upsample_left;
567 const int base_inc_x = 1 << upsample_above;
568 x = -dx;
569 for (r = 0; r < bh; ++r, x -= dx, dst += stride) {
570 base1 = x >> frac_bits_x;
571 y = (r << 6) - dy;
572 for (c = 0; c < bw; ++c, base1 += base_inc_x, y -= dy) {
573 if (base1 >= min_base_x) {
574 shift1 = ((x * (1 << upsample_above)) & 0x3F) >> 1;
575 val = above[base1] * (32 - shift1) + above[base1 + 1] * shift1;
576 val = ROUND_POWER_OF_TWO(val, 5);
577 } else {
578 base2 = y >> frac_bits_y;
579 assert(base2 >= -(1 << upsample_left));
580 shift2 = ((y * (1 << upsample_left)) & 0x3F) >> 1;
581 val = left[base2] * (32 - shift2) + left[base2 + 1] * shift2;
582 val = ROUND_POWER_OF_TWO(val, 5);
583 }
584 dst[c] = val;
585 }
586 }
587 }
588
589 // Directional prediction, zone 3: 180 < angle < 270
av1_dr_prediction_z3_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_left,int dx,int dy)590 void av1_dr_prediction_z3_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
591 const uint8_t *above, const uint8_t *left,
592 int upsample_left, int dx, int dy) {
593 int r, c, y, base, shift, val;
594
595 (void)above;
596 (void)dx;
597
598 assert(dx == 1);
599 assert(dy > 0);
600
601 const int max_base_y = (bw + bh - 1) << upsample_left;
602 const int frac_bits = 6 - upsample_left;
603 const int base_inc = 1 << upsample_left;
604 y = dy;
605 for (c = 0; c < bw; ++c, y += dy) {
606 base = y >> frac_bits;
607 shift = ((y << upsample_left) & 0x3F) >> 1;
608
609 for (r = 0; r < bh; ++r, base += base_inc) {
610 if (base < max_base_y) {
611 val = left[base] * (32 - shift) + left[base + 1] * shift;
612 dst[r * stride + c] = val = ROUND_POWER_OF_TWO(val, 5);
613 } else {
614 for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
615 break;
616 }
617 }
618 }
619 }
620
dr_predictor(uint8_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint8_t * above,const uint8_t * left,int upsample_above,int upsample_left,int angle)621 static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
622 const uint8_t *above, const uint8_t *left,
623 int upsample_above, int upsample_left, int angle) {
624 const int dx = av1_get_dx(angle);
625 const int dy = av1_get_dy(angle);
626 const int bw = tx_size_wide[tx_size];
627 const int bh = tx_size_high[tx_size];
628 assert(angle > 0 && angle < 270);
629
630 if (angle > 0 && angle < 90) {
631 av1_dr_prediction_z1(dst, stride, bw, bh, above, left, upsample_above, dx,
632 dy);
633 } else if (angle > 90 && angle < 180) {
634 av1_dr_prediction_z2(dst, stride, bw, bh, above, left, upsample_above,
635 upsample_left, dx, dy);
636 } else if (angle > 180 && angle < 270) {
637 av1_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left, dx,
638 dy);
639 } else if (angle == 90) {
640 pred[V_PRED][tx_size](dst, stride, above, left);
641 } else if (angle == 180) {
642 pred[H_PRED][tx_size](dst, stride, above, left);
643 }
644 }
645
646 // Directional prediction, zone 1: 0 < angle < 90
av1_highbd_dr_prediction_z1_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_above,int dx,int dy,int bd)647 void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw,
648 int bh, const uint16_t *above,
649 const uint16_t *left, int upsample_above,
650 int dx, int dy, int bd) {
651 int r, c, x, base, shift, val;
652
653 (void)left;
654 (void)dy;
655 (void)bd;
656 assert(dy == 1);
657 assert(dx > 0);
658
659 const int max_base_x = ((bw + bh) - 1) << upsample_above;
660 const int frac_bits = 6 - upsample_above;
661 const int base_inc = 1 << upsample_above;
662 x = dx;
663 for (r = 0; r < bh; ++r, dst += stride, x += dx) {
664 base = x >> frac_bits;
665 shift = ((x << upsample_above) & 0x3F) >> 1;
666
667 if (base >= max_base_x) {
668 for (int i = r; i < bh; ++i) {
669 aom_memset16(dst, above[max_base_x], bw);
670 dst += stride;
671 }
672 return;
673 }
674
675 for (c = 0; c < bw; ++c, base += base_inc) {
676 if (base < max_base_x) {
677 val = above[base] * (32 - shift) + above[base + 1] * shift;
678 dst[c] = ROUND_POWER_OF_TWO(val, 5);
679 } else {
680 dst[c] = above[max_base_x];
681 }
682 }
683 }
684 }
685
686 // Directional prediction, zone 2: 90 < angle < 180
av1_highbd_dr_prediction_z2_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_above,int upsample_left,int dx,int dy,int bd)687 void av1_highbd_dr_prediction_z2_c(uint16_t *dst, ptrdiff_t stride, int bw,
688 int bh, const uint16_t *above,
689 const uint16_t *left, int upsample_above,
690 int upsample_left, int dx, int dy, int bd) {
691 int r, c, x, y, shift, val, base;
692
693 (void)bd;
694 assert(dx > 0);
695 assert(dy > 0);
696
697 const int min_base_x = -(1 << upsample_above);
698 const int frac_bits_x = 6 - upsample_above;
699 const int frac_bits_y = 6 - upsample_left;
700 for (r = 0; r < bh; ++r) {
701 for (c = 0; c < bw; ++c) {
702 y = r + 1;
703 x = (c << 6) - y * dx;
704 base = x >> frac_bits_x;
705 if (base >= min_base_x) {
706 shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
707 val = above[base] * (32 - shift) + above[base + 1] * shift;
708 val = ROUND_POWER_OF_TWO(val, 5);
709 } else {
710 x = c + 1;
711 y = (r << 6) - x * dy;
712 base = y >> frac_bits_y;
713 shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
714 val = left[base] * (32 - shift) + left[base + 1] * shift;
715 val = ROUND_POWER_OF_TWO(val, 5);
716 }
717 dst[c] = val;
718 }
719 dst += stride;
720 }
721 }
722
723 // Directional prediction, zone 3: 180 < angle < 270
av1_highbd_dr_prediction_z3_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_left,int dx,int dy,int bd)724 void av1_highbd_dr_prediction_z3_c(uint16_t *dst, ptrdiff_t stride, int bw,
725 int bh, const uint16_t *above,
726 const uint16_t *left, int upsample_left,
727 int dx, int dy, int bd) {
728 int r, c, y, base, shift, val;
729
730 (void)above;
731 (void)dx;
732 (void)bd;
733 assert(dx == 1);
734 assert(dy > 0);
735
736 const int max_base_y = (bw + bh - 1) << upsample_left;
737 const int frac_bits = 6 - upsample_left;
738 const int base_inc = 1 << upsample_left;
739 y = dy;
740 for (c = 0; c < bw; ++c, y += dy) {
741 base = y >> frac_bits;
742 shift = ((y << upsample_left) & 0x3F) >> 1;
743
744 for (r = 0; r < bh; ++r, base += base_inc) {
745 if (base < max_base_y) {
746 val = left[base] * (32 - shift) + left[base + 1] * shift;
747 dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
748 } else {
749 for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
750 break;
751 }
752 }
753 }
754 }
755
highbd_dr_predictor(uint16_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint16_t * above,const uint16_t * left,int upsample_above,int upsample_left,int angle,int bd)756 static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride,
757 TX_SIZE tx_size, const uint16_t *above,
758 const uint16_t *left, int upsample_above,
759 int upsample_left, int angle, int bd) {
760 const int dx = av1_get_dx(angle);
761 const int dy = av1_get_dy(angle);
762 const int bw = tx_size_wide[tx_size];
763 const int bh = tx_size_high[tx_size];
764 assert(angle > 0 && angle < 270);
765
766 if (angle > 0 && angle < 90) {
767 av1_highbd_dr_prediction_z1(dst, stride, bw, bh, above, left,
768 upsample_above, dx, dy, bd);
769 } else if (angle > 90 && angle < 180) {
770 av1_highbd_dr_prediction_z2(dst, stride, bw, bh, above, left,
771 upsample_above, upsample_left, dx, dy, bd);
772 } else if (angle > 180 && angle < 270) {
773 av1_highbd_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left,
774 dx, dy, bd);
775 } else if (angle == 90) {
776 pred_high[V_PRED][tx_size](dst, stride, above, left, bd);
777 } else if (angle == 180) {
778 pred_high[H_PRED][tx_size](dst, stride, above, left, bd);
779 }
780 }
781
782 DECLARE_ALIGNED(16, const int8_t,
783 av1_filter_intra_taps[FILTER_INTRA_MODES][8][8]) = {
784 {
785 { -6, 10, 0, 0, 0, 12, 0, 0 },
786 { -5, 2, 10, 0, 0, 9, 0, 0 },
787 { -3, 1, 1, 10, 0, 7, 0, 0 },
788 { -3, 1, 1, 2, 10, 5, 0, 0 },
789 { -4, 6, 0, 0, 0, 2, 12, 0 },
790 { -3, 2, 6, 0, 0, 2, 9, 0 },
791 { -3, 2, 2, 6, 0, 2, 7, 0 },
792 { -3, 1, 2, 2, 6, 3, 5, 0 },
793 },
794 {
795 { -10, 16, 0, 0, 0, 10, 0, 0 },
796 { -6, 0, 16, 0, 0, 6, 0, 0 },
797 { -4, 0, 0, 16, 0, 4, 0, 0 },
798 { -2, 0, 0, 0, 16, 2, 0, 0 },
799 { -10, 16, 0, 0, 0, 0, 10, 0 },
800 { -6, 0, 16, 0, 0, 0, 6, 0 },
801 { -4, 0, 0, 16, 0, 0, 4, 0 },
802 { -2, 0, 0, 0, 16, 0, 2, 0 },
803 },
804 {
805 { -8, 8, 0, 0, 0, 16, 0, 0 },
806 { -8, 0, 8, 0, 0, 16, 0, 0 },
807 { -8, 0, 0, 8, 0, 16, 0, 0 },
808 { -8, 0, 0, 0, 8, 16, 0, 0 },
809 { -4, 4, 0, 0, 0, 0, 16, 0 },
810 { -4, 0, 4, 0, 0, 0, 16, 0 },
811 { -4, 0, 0, 4, 0, 0, 16, 0 },
812 { -4, 0, 0, 0, 4, 0, 16, 0 },
813 },
814 {
815 { -2, 8, 0, 0, 0, 10, 0, 0 },
816 { -1, 3, 8, 0, 0, 6, 0, 0 },
817 { -1, 2, 3, 8, 0, 4, 0, 0 },
818 { 0, 1, 2, 3, 8, 2, 0, 0 },
819 { -1, 4, 0, 0, 0, 3, 10, 0 },
820 { -1, 3, 4, 0, 0, 4, 6, 0 },
821 { -1, 2, 3, 4, 0, 4, 4, 0 },
822 { -1, 2, 2, 3, 4, 3, 3, 0 },
823 },
824 {
825 { -12, 14, 0, 0, 0, 14, 0, 0 },
826 { -10, 0, 14, 0, 0, 12, 0, 0 },
827 { -9, 0, 0, 14, 0, 11, 0, 0 },
828 { -8, 0, 0, 0, 14, 10, 0, 0 },
829 { -10, 12, 0, 0, 0, 0, 14, 0 },
830 { -9, 1, 12, 0, 0, 0, 12, 0 },
831 { -8, 0, 0, 12, 0, 1, 11, 0 },
832 { -7, 0, 0, 1, 12, 1, 9, 0 },
833 },
834 };
835
av1_filter_intra_predictor_c(uint8_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint8_t * above,const uint8_t * left,int mode)836 void av1_filter_intra_predictor_c(uint8_t *dst, ptrdiff_t stride,
837 TX_SIZE tx_size, const uint8_t *above,
838 const uint8_t *left, int mode) {
839 int r, c;
840 uint8_t buffer[33][33];
841 const int bw = tx_size_wide[tx_size];
842 const int bh = tx_size_high[tx_size];
843
844 assert(bw <= 32 && bh <= 32);
845
846 // The initialization is just for silencing Jenkins static analysis warnings
847 for (r = 0; r < bh + 1; ++r)
848 memset(buffer[r], 0, (bw + 1) * sizeof(buffer[0][0]));
849
850 for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
851 memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(uint8_t));
852
853 for (r = 1; r < bh + 1; r += 2)
854 for (c = 1; c < bw + 1; c += 4) {
855 const uint8_t p0 = buffer[r - 1][c - 1];
856 const uint8_t p1 = buffer[r - 1][c];
857 const uint8_t p2 = buffer[r - 1][c + 1];
858 const uint8_t p3 = buffer[r - 1][c + 2];
859 const uint8_t p4 = buffer[r - 1][c + 3];
860 const uint8_t p5 = buffer[r][c - 1];
861 const uint8_t p6 = buffer[r + 1][c - 1];
862 for (int k = 0; k < 8; ++k) {
863 int r_offset = k >> 2;
864 int c_offset = k & 0x03;
865 buffer[r + r_offset][c + c_offset] =
866 clip_pixel(ROUND_POWER_OF_TWO_SIGNED(
867 av1_filter_intra_taps[mode][k][0] * p0 +
868 av1_filter_intra_taps[mode][k][1] * p1 +
869 av1_filter_intra_taps[mode][k][2] * p2 +
870 av1_filter_intra_taps[mode][k][3] * p3 +
871 av1_filter_intra_taps[mode][k][4] * p4 +
872 av1_filter_intra_taps[mode][k][5] * p5 +
873 av1_filter_intra_taps[mode][k][6] * p6,
874 FILTER_INTRA_SCALE_BITS));
875 }
876 }
877
878 for (r = 0; r < bh; ++r) {
879 memcpy(dst, &buffer[r + 1][1], bw * sizeof(uint8_t));
880 dst += stride;
881 }
882 }
883
highbd_filter_intra_predictor(uint16_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint16_t * above,const uint16_t * left,int mode,int bd)884 static void highbd_filter_intra_predictor(uint16_t *dst, ptrdiff_t stride,
885 TX_SIZE tx_size,
886 const uint16_t *above,
887 const uint16_t *left, int mode,
888 int bd) {
889 int r, c;
890 uint16_t buffer[33][33];
891 const int bw = tx_size_wide[tx_size];
892 const int bh = tx_size_high[tx_size];
893
894 assert(bw <= 32 && bh <= 32);
895
896 // The initialization is just for silencing Jenkins static analysis warnings
897 for (r = 0; r < bh + 1; ++r)
898 memset(buffer[r], 0, (bw + 1) * sizeof(buffer[0][0]));
899
900 for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
901 memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(buffer[0][0]));
902
903 for (r = 1; r < bh + 1; r += 2)
904 for (c = 1; c < bw + 1; c += 4) {
905 const uint16_t p0 = buffer[r - 1][c - 1];
906 const uint16_t p1 = buffer[r - 1][c];
907 const uint16_t p2 = buffer[r - 1][c + 1];
908 const uint16_t p3 = buffer[r - 1][c + 2];
909 const uint16_t p4 = buffer[r - 1][c + 3];
910 const uint16_t p5 = buffer[r][c - 1];
911 const uint16_t p6 = buffer[r + 1][c - 1];
912 for (int k = 0; k < 8; ++k) {
913 int r_offset = k >> 2;
914 int c_offset = k & 0x03;
915 buffer[r + r_offset][c + c_offset] =
916 clip_pixel_highbd(ROUND_POWER_OF_TWO_SIGNED(
917 av1_filter_intra_taps[mode][k][0] * p0 +
918 av1_filter_intra_taps[mode][k][1] * p1 +
919 av1_filter_intra_taps[mode][k][2] * p2 +
920 av1_filter_intra_taps[mode][k][3] * p3 +
921 av1_filter_intra_taps[mode][k][4] * p4 +
922 av1_filter_intra_taps[mode][k][5] * p5 +
923 av1_filter_intra_taps[mode][k][6] * p6,
924 FILTER_INTRA_SCALE_BITS),
925 bd);
926 }
927 }
928
929 for (r = 0; r < bh; ++r) {
930 memcpy(dst, &buffer[r + 1][1], bw * sizeof(dst[0]));
931 dst += stride;
932 }
933 }
934
is_smooth(const MB_MODE_INFO * mbmi,int plane)935 static int is_smooth(const MB_MODE_INFO *mbmi, int plane) {
936 if (plane == 0) {
937 const PREDICTION_MODE mode = mbmi->mode;
938 return (mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
939 mode == SMOOTH_H_PRED);
940 } else {
941 // uv_mode is not set for inter blocks, so need to explicitly
942 // detect that case.
943 if (is_inter_block(mbmi)) return 0;
944
945 const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
946 return (uv_mode == UV_SMOOTH_PRED || uv_mode == UV_SMOOTH_V_PRED ||
947 uv_mode == UV_SMOOTH_H_PRED);
948 }
949 }
950
get_filt_type(const MACROBLOCKD * xd,int plane)951 static int get_filt_type(const MACROBLOCKD *xd, int plane) {
952 int ab_sm, le_sm;
953
954 if (plane == 0) {
955 const MB_MODE_INFO *ab = xd->above_mbmi;
956 const MB_MODE_INFO *le = xd->left_mbmi;
957 ab_sm = ab ? is_smooth(ab, plane) : 0;
958 le_sm = le ? is_smooth(le, plane) : 0;
959 } else {
960 const MB_MODE_INFO *ab = xd->chroma_above_mbmi;
961 const MB_MODE_INFO *le = xd->chroma_left_mbmi;
962 ab_sm = ab ? is_smooth(ab, plane) : 0;
963 le_sm = le ? is_smooth(le, plane) : 0;
964 }
965
966 return (ab_sm || le_sm) ? 1 : 0;
967 }
968
intra_edge_filter_strength(int bs0,int bs1,int delta,int type)969 static int intra_edge_filter_strength(int bs0, int bs1, int delta, int type) {
970 const int d = abs(delta);
971 int strength = 0;
972
973 const int blk_wh = bs0 + bs1;
974 if (type == 0) {
975 if (blk_wh <= 8) {
976 if (d >= 56) strength = 1;
977 } else if (blk_wh <= 12) {
978 if (d >= 40) strength = 1;
979 } else if (blk_wh <= 16) {
980 if (d >= 40) strength = 1;
981 } else if (blk_wh <= 24) {
982 if (d >= 8) strength = 1;
983 if (d >= 16) strength = 2;
984 if (d >= 32) strength = 3;
985 } else if (blk_wh <= 32) {
986 if (d >= 1) strength = 1;
987 if (d >= 4) strength = 2;
988 if (d >= 32) strength = 3;
989 } else {
990 if (d >= 1) strength = 3;
991 }
992 } else {
993 if (blk_wh <= 8) {
994 if (d >= 40) strength = 1;
995 if (d >= 64) strength = 2;
996 } else if (blk_wh <= 16) {
997 if (d >= 20) strength = 1;
998 if (d >= 48) strength = 2;
999 } else if (blk_wh <= 24) {
1000 if (d >= 4) strength = 3;
1001 } else {
1002 if (d >= 1) strength = 3;
1003 }
1004 }
1005 return strength;
1006 }
1007
av1_filter_intra_edge_c(uint8_t * p,int sz,int strength)1008 void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength) {
1009 if (!strength) return;
1010
1011 const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = {
1012 { 0, 4, 8, 4, 0 }, { 0, 5, 6, 5, 0 }, { 2, 4, 4, 4, 2 }
1013 };
1014 const int filt = strength - 1;
1015 uint8_t edge[129];
1016
1017 memcpy(edge, p, sz * sizeof(*p));
1018 for (int i = 1; i < sz; i++) {
1019 int s = 0;
1020 for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1021 int k = i - 2 + j;
1022 k = (k < 0) ? 0 : k;
1023 k = (k > sz - 1) ? sz - 1 : k;
1024 s += edge[k] * kernel[filt][j];
1025 }
1026 s = (s + 8) >> 4;
1027 p[i] = s;
1028 }
1029 }
1030
filter_intra_edge_corner(uint8_t * p_above,uint8_t * p_left)1031 static void filter_intra_edge_corner(uint8_t *p_above, uint8_t *p_left) {
1032 const int kernel[3] = { 5, 6, 5 };
1033
1034 int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1035 (p_above[0] * kernel[2]);
1036 s = (s + 8) >> 4;
1037 p_above[-1] = s;
1038 p_left[-1] = s;
1039 }
1040
av1_filter_intra_edge_high_c(uint16_t * p,int sz,int strength)1041 void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength) {
1042 if (!strength) return;
1043
1044 const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = {
1045 { 0, 4, 8, 4, 0 }, { 0, 5, 6, 5, 0 }, { 2, 4, 4, 4, 2 }
1046 };
1047 const int filt = strength - 1;
1048 uint16_t edge[129];
1049
1050 memcpy(edge, p, sz * sizeof(*p));
1051 for (int i = 1; i < sz; i++) {
1052 int s = 0;
1053 for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1054 int k = i - 2 + j;
1055 k = (k < 0) ? 0 : k;
1056 k = (k > sz - 1) ? sz - 1 : k;
1057 s += edge[k] * kernel[filt][j];
1058 }
1059 s = (s + 8) >> 4;
1060 p[i] = s;
1061 }
1062 }
1063
filter_intra_edge_corner_high(uint16_t * p_above,uint16_t * p_left)1064 static void filter_intra_edge_corner_high(uint16_t *p_above, uint16_t *p_left) {
1065 const int kernel[3] = { 5, 6, 5 };
1066
1067 int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1068 (p_above[0] * kernel[2]);
1069 s = (s + 8) >> 4;
1070 p_above[-1] = s;
1071 p_left[-1] = s;
1072 }
1073
av1_upsample_intra_edge_c(uint8_t * p,int sz)1074 void av1_upsample_intra_edge_c(uint8_t *p, int sz) {
1075 // interpolate half-sample positions
1076 assert(sz <= MAX_UPSAMPLE_SZ);
1077
1078 uint8_t in[MAX_UPSAMPLE_SZ + 3];
1079 // copy p[-1..(sz-1)] and extend first and last samples
1080 in[0] = p[-1];
1081 in[1] = p[-1];
1082 for (int i = 0; i < sz; i++) {
1083 in[i + 2] = p[i];
1084 }
1085 in[sz + 2] = p[sz - 1];
1086
1087 // interpolate half-sample edge positions
1088 p[-2] = in[0];
1089 for (int i = 0; i < sz; i++) {
1090 int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1091 s = clip_pixel((s + 8) >> 4);
1092 p[2 * i - 1] = s;
1093 p[2 * i] = in[i + 2];
1094 }
1095 }
1096
av1_upsample_intra_edge_high_c(uint16_t * p,int sz,int bd)1097 void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd) {
1098 // interpolate half-sample positions
1099 assert(sz <= MAX_UPSAMPLE_SZ);
1100
1101 uint16_t in[MAX_UPSAMPLE_SZ + 3];
1102 // copy p[-1..(sz-1)] and extend first and last samples
1103 in[0] = p[-1];
1104 in[1] = p[-1];
1105 for (int i = 0; i < sz; i++) {
1106 in[i + 2] = p[i];
1107 }
1108 in[sz + 2] = p[sz - 1];
1109
1110 // interpolate half-sample edge positions
1111 p[-2] = in[0];
1112 for (int i = 0; i < sz; i++) {
1113 int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1114 s = (s + 8) >> 4;
1115 s = clip_pixel_highbd(s, bd);
1116 p[2 * i - 1] = s;
1117 p[2 * i] = in[i + 2];
1118 }
1119 }
1120
build_intra_predictors_high(const MACROBLOCKD * xd,const uint8_t * ref8,int ref_stride,uint8_t * dst8,int dst_stride,PREDICTION_MODE mode,int angle_delta,FILTER_INTRA_MODE filter_intra_mode,TX_SIZE tx_size,int disable_edge_filter,int n_top_px,int n_topright_px,int n_left_px,int n_bottomleft_px,int plane)1121 static void build_intra_predictors_high(
1122 const MACROBLOCKD *xd, const uint8_t *ref8, int ref_stride, uint8_t *dst8,
1123 int dst_stride, PREDICTION_MODE mode, int angle_delta,
1124 FILTER_INTRA_MODE filter_intra_mode, TX_SIZE tx_size,
1125 int disable_edge_filter, int n_top_px, int n_topright_px, int n_left_px,
1126 int n_bottomleft_px, int plane) {
1127 int i;
1128 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
1129 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
1130 DECLARE_ALIGNED(16, uint16_t, left_data[MAX_TX_SIZE * 2 + 32]);
1131 DECLARE_ALIGNED(16, uint16_t, above_data[MAX_TX_SIZE * 2 + 32]);
1132 uint16_t *const above_row = above_data + 16;
1133 uint16_t *const left_col = left_data + 16;
1134 const int txwpx = tx_size_wide[tx_size];
1135 const int txhpx = tx_size_high[tx_size];
1136 int need_left = extend_modes[mode] & NEED_LEFT;
1137 int need_above = extend_modes[mode] & NEED_ABOVE;
1138 int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1139 const uint16_t *above_ref = ref - ref_stride;
1140 const uint16_t *left_ref = ref - 1;
1141 int p_angle = 0;
1142 const int is_dr_mode = av1_is_directional_mode(mode);
1143 const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1144 int base = 128 << (xd->bd - 8);
1145
1146 // The default values if ref pixels are not available:
1147 // base-1 base-1 base-1 .. base-1 base-1 base-1 base-1 base-1 base-1
1148 // base+1 A B .. Y Z
1149 // base+1 C D .. W X
1150 // base+1 E F .. U V
1151 // base+1 G H .. S T T T T T
1152
1153 if (is_dr_mode) {
1154 p_angle = mode_to_angle_map[mode] + angle_delta;
1155 if (p_angle <= 90)
1156 need_above = 1, need_left = 0, need_above_left = 1;
1157 else if (p_angle < 180)
1158 need_above = 1, need_left = 1, need_above_left = 1;
1159 else
1160 need_above = 0, need_left = 1, need_above_left = 1;
1161 }
1162 if (use_filter_intra) need_left = need_above = need_above_left = 1;
1163
1164 assert(n_top_px >= 0);
1165 assert(n_topright_px >= 0);
1166 assert(n_left_px >= 0);
1167 assert(n_bottomleft_px >= 0);
1168
1169 if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1170 int val;
1171 if (need_left) {
1172 val = (n_top_px > 0) ? above_ref[0] : base + 1;
1173 } else {
1174 val = (n_left_px > 0) ? left_ref[0] : base - 1;
1175 }
1176 for (i = 0; i < txhpx; ++i) {
1177 aom_memset16(dst, val, txwpx);
1178 dst += dst_stride;
1179 }
1180 return;
1181 }
1182
1183 // NEED_LEFT
1184 if (need_left) {
1185 int need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
1186 if (use_filter_intra) need_bottom = 0;
1187 if (is_dr_mode) need_bottom = p_angle > 180;
1188 const int num_left_pixels_needed = txhpx + (need_bottom ? txwpx : 0);
1189 i = 0;
1190 if (n_left_px > 0) {
1191 for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1192 if (need_bottom && n_bottomleft_px > 0) {
1193 assert(i == txhpx);
1194 for (; i < txhpx + n_bottomleft_px; i++)
1195 left_col[i] = left_ref[i * ref_stride];
1196 }
1197 if (i < num_left_pixels_needed)
1198 aom_memset16(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1199 } else {
1200 if (n_top_px > 0) {
1201 aom_memset16(left_col, above_ref[0], num_left_pixels_needed);
1202 } else {
1203 aom_memset16(left_col, base + 1, num_left_pixels_needed);
1204 }
1205 }
1206 }
1207
1208 // NEED_ABOVE
1209 if (need_above) {
1210 int need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
1211 if (use_filter_intra) need_right = 0;
1212 if (is_dr_mode) need_right = p_angle < 90;
1213 const int num_top_pixels_needed = txwpx + (need_right ? txhpx : 0);
1214 if (n_top_px > 0) {
1215 memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
1216 i = n_top_px;
1217 if (need_right && n_topright_px > 0) {
1218 assert(n_top_px == txwpx);
1219 memcpy(above_row + txwpx, above_ref + txwpx,
1220 n_topright_px * sizeof(above_ref[0]));
1221 i += n_topright_px;
1222 }
1223 if (i < num_top_pixels_needed)
1224 aom_memset16(&above_row[i], above_row[i - 1],
1225 num_top_pixels_needed - i);
1226 } else {
1227 if (n_left_px > 0) {
1228 aom_memset16(above_row, left_ref[0], num_top_pixels_needed);
1229 } else {
1230 aom_memset16(above_row, base - 1, num_top_pixels_needed);
1231 }
1232 }
1233 }
1234
1235 if (need_above_left) {
1236 if (n_top_px > 0 && n_left_px > 0) {
1237 above_row[-1] = above_ref[-1];
1238 } else if (n_top_px > 0) {
1239 above_row[-1] = above_ref[0];
1240 } else if (n_left_px > 0) {
1241 above_row[-1] = left_ref[0];
1242 } else {
1243 above_row[-1] = base;
1244 }
1245 left_col[-1] = above_row[-1];
1246 }
1247
1248 if (use_filter_intra) {
1249 highbd_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1250 filter_intra_mode, xd->bd);
1251 return;
1252 }
1253
1254 if (is_dr_mode) {
1255 int upsample_above = 0;
1256 int upsample_left = 0;
1257 if (!disable_edge_filter) {
1258 const int need_right = p_angle < 90;
1259 const int need_bottom = p_angle > 180;
1260 const int filt_type = get_filt_type(xd, plane);
1261 if (p_angle != 90 && p_angle != 180) {
1262 const int ab_le = need_above_left ? 1 : 0;
1263 if (need_above && need_left && (txwpx + txhpx >= 24)) {
1264 filter_intra_edge_corner_high(above_row, left_col);
1265 }
1266 if (need_above && n_top_px > 0) {
1267 const int strength =
1268 intra_edge_filter_strength(txwpx, txhpx, p_angle - 90, filt_type);
1269 const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1270 av1_filter_intra_edge_high(above_row - ab_le, n_px, strength);
1271 }
1272 if (need_left && n_left_px > 0) {
1273 const int strength = intra_edge_filter_strength(
1274 txhpx, txwpx, p_angle - 180, filt_type);
1275 const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1276 av1_filter_intra_edge_high(left_col - ab_le, n_px, strength);
1277 }
1278 }
1279 upsample_above =
1280 av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90, filt_type);
1281 if (need_above && upsample_above) {
1282 const int n_px = txwpx + (need_right ? txhpx : 0);
1283 av1_upsample_intra_edge_high(above_row, n_px, xd->bd);
1284 }
1285 upsample_left =
1286 av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180, filt_type);
1287 if (need_left && upsample_left) {
1288 const int n_px = txhpx + (need_bottom ? txwpx : 0);
1289 av1_upsample_intra_edge_high(left_col, n_px, xd->bd);
1290 }
1291 }
1292 highbd_dr_predictor(dst, dst_stride, tx_size, above_row, left_col,
1293 upsample_above, upsample_left, p_angle, xd->bd);
1294 return;
1295 }
1296
1297 // predict
1298 if (mode == DC_PRED) {
1299 dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](
1300 dst, dst_stride, above_row, left_col, xd->bd);
1301 } else {
1302 pred_high[mode][tx_size](dst, dst_stride, above_row, left_col, xd->bd);
1303 }
1304 }
1305
build_intra_predictors(const MACROBLOCKD * xd,const uint8_t * ref,int ref_stride,uint8_t * dst,int dst_stride,PREDICTION_MODE mode,int angle_delta,FILTER_INTRA_MODE filter_intra_mode,TX_SIZE tx_size,int disable_edge_filter,int n_top_px,int n_topright_px,int n_left_px,int n_bottomleft_px,int plane)1306 static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
1307 int ref_stride, uint8_t *dst, int dst_stride,
1308 PREDICTION_MODE mode, int angle_delta,
1309 FILTER_INTRA_MODE filter_intra_mode,
1310 TX_SIZE tx_size, int disable_edge_filter,
1311 int n_top_px, int n_topright_px,
1312 int n_left_px, int n_bottomleft_px,
1313 int plane) {
1314 int i;
1315 const uint8_t *above_ref = ref - ref_stride;
1316 const uint8_t *left_ref = ref - 1;
1317 DECLARE_ALIGNED(16, uint8_t, left_data[MAX_TX_SIZE * 2 + 32]);
1318 DECLARE_ALIGNED(16, uint8_t, above_data[MAX_TX_SIZE * 2 + 32]);
1319 uint8_t *const above_row = above_data + 16;
1320 uint8_t *const left_col = left_data + 16;
1321 const int txwpx = tx_size_wide[tx_size];
1322 const int txhpx = tx_size_high[tx_size];
1323 int need_left = extend_modes[mode] & NEED_LEFT;
1324 int need_above = extend_modes[mode] & NEED_ABOVE;
1325 int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1326 int p_angle = 0;
1327 const int is_dr_mode = av1_is_directional_mode(mode);
1328 const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1329
1330 // The default values if ref pixels are not available:
1331 // 127 127 127 .. 127 127 127 127 127 127
1332 // 129 A B .. Y Z
1333 // 129 C D .. W X
1334 // 129 E F .. U V
1335 // 129 G H .. S T T T T T
1336 // ..
1337
1338 if (is_dr_mode) {
1339 p_angle = mode_to_angle_map[mode] + angle_delta;
1340 if (p_angle <= 90)
1341 need_above = 1, need_left = 0, need_above_left = 1;
1342 else if (p_angle < 180)
1343 need_above = 1, need_left = 1, need_above_left = 1;
1344 else
1345 need_above = 0, need_left = 1, need_above_left = 1;
1346 }
1347 if (use_filter_intra) need_left = need_above = need_above_left = 1;
1348
1349 assert(n_top_px >= 0);
1350 assert(n_topright_px >= 0);
1351 assert(n_left_px >= 0);
1352 assert(n_bottomleft_px >= 0);
1353
1354 if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1355 int val;
1356 if (need_left) {
1357 val = (n_top_px > 0) ? above_ref[0] : 129;
1358 } else {
1359 val = (n_left_px > 0) ? left_ref[0] : 127;
1360 }
1361 for (i = 0; i < txhpx; ++i) {
1362 memset(dst, val, txwpx);
1363 dst += dst_stride;
1364 }
1365 return;
1366 }
1367
1368 // NEED_LEFT
1369 if (need_left) {
1370 int need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
1371 if (use_filter_intra) need_bottom = 0;
1372 if (is_dr_mode) need_bottom = p_angle > 180;
1373 const int num_left_pixels_needed = txhpx + (need_bottom ? txwpx : 0);
1374 i = 0;
1375 if (n_left_px > 0) {
1376 for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1377 if (need_bottom && n_bottomleft_px > 0) {
1378 assert(i == txhpx);
1379 for (; i < txhpx + n_bottomleft_px; i++)
1380 left_col[i] = left_ref[i * ref_stride];
1381 }
1382 if (i < num_left_pixels_needed)
1383 memset(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1384 } else {
1385 if (n_top_px > 0) {
1386 memset(left_col, above_ref[0], num_left_pixels_needed);
1387 } else {
1388 memset(left_col, 129, num_left_pixels_needed);
1389 }
1390 }
1391 }
1392
1393 // NEED_ABOVE
1394 if (need_above) {
1395 int need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
1396 if (use_filter_intra) need_right = 0;
1397 if (is_dr_mode) need_right = p_angle < 90;
1398 const int num_top_pixels_needed = txwpx + (need_right ? txhpx : 0);
1399 if (n_top_px > 0) {
1400 memcpy(above_row, above_ref, n_top_px);
1401 i = n_top_px;
1402 if (need_right && n_topright_px > 0) {
1403 assert(n_top_px == txwpx);
1404 memcpy(above_row + txwpx, above_ref + txwpx, n_topright_px);
1405 i += n_topright_px;
1406 }
1407 if (i < num_top_pixels_needed)
1408 memset(&above_row[i], above_row[i - 1], num_top_pixels_needed - i);
1409 } else {
1410 if (n_left_px > 0) {
1411 memset(above_row, left_ref[0], num_top_pixels_needed);
1412 } else {
1413 memset(above_row, 127, num_top_pixels_needed);
1414 }
1415 }
1416 }
1417
1418 if (need_above_left) {
1419 if (n_top_px > 0 && n_left_px > 0) {
1420 above_row[-1] = above_ref[-1];
1421 } else if (n_top_px > 0) {
1422 above_row[-1] = above_ref[0];
1423 } else if (n_left_px > 0) {
1424 above_row[-1] = left_ref[0];
1425 } else {
1426 above_row[-1] = 128;
1427 }
1428 left_col[-1] = above_row[-1];
1429 }
1430
1431 if (use_filter_intra) {
1432 av1_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1433 filter_intra_mode);
1434 return;
1435 }
1436
1437 if (is_dr_mode) {
1438 int upsample_above = 0;
1439 int upsample_left = 0;
1440 if (!disable_edge_filter) {
1441 const int need_right = p_angle < 90;
1442 const int need_bottom = p_angle > 180;
1443 const int filt_type = get_filt_type(xd, plane);
1444 if (p_angle != 90 && p_angle != 180) {
1445 const int ab_le = need_above_left ? 1 : 0;
1446 if (need_above && need_left && (txwpx + txhpx >= 24)) {
1447 filter_intra_edge_corner(above_row, left_col);
1448 }
1449 if (need_above && n_top_px > 0) {
1450 const int strength =
1451 intra_edge_filter_strength(txwpx, txhpx, p_angle - 90, filt_type);
1452 const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1453 av1_filter_intra_edge(above_row - ab_le, n_px, strength);
1454 }
1455 if (need_left && n_left_px > 0) {
1456 const int strength = intra_edge_filter_strength(
1457 txhpx, txwpx, p_angle - 180, filt_type);
1458 const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1459 av1_filter_intra_edge(left_col - ab_le, n_px, strength);
1460 }
1461 }
1462 upsample_above =
1463 av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90, filt_type);
1464 if (need_above && upsample_above) {
1465 const int n_px = txwpx + (need_right ? txhpx : 0);
1466 av1_upsample_intra_edge(above_row, n_px);
1467 }
1468 upsample_left =
1469 av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180, filt_type);
1470 if (need_left && upsample_left) {
1471 const int n_px = txhpx + (need_bottom ? txwpx : 0);
1472 av1_upsample_intra_edge(left_col, n_px);
1473 }
1474 }
1475 dr_predictor(dst, dst_stride, tx_size, above_row, left_col, upsample_above,
1476 upsample_left, p_angle);
1477 return;
1478 }
1479
1480 // predict
1481 if (mode == DC_PRED) {
1482 dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, above_row,
1483 left_col);
1484 } else {
1485 pred[mode][tx_size](dst, dst_stride, above_row, left_col);
1486 }
1487 }
1488
av1_predict_intra_block(const AV1_COMMON * cm,const MACROBLOCKD * xd,int wpx,int hpx,TX_SIZE tx_size,PREDICTION_MODE mode,int angle_delta,int use_palette,FILTER_INTRA_MODE filter_intra_mode,const uint8_t * ref,int ref_stride,uint8_t * dst,int dst_stride,int col_off,int row_off,int plane)1489 void av1_predict_intra_block(
1490 const AV1_COMMON *cm, const MACROBLOCKD *xd, int wpx, int hpx,
1491 TX_SIZE tx_size, PREDICTION_MODE mode, int angle_delta, int use_palette,
1492 FILTER_INTRA_MODE filter_intra_mode, const uint8_t *ref, int ref_stride,
1493 uint8_t *dst, int dst_stride, int col_off, int row_off, int plane) {
1494 const MB_MODE_INFO *const mbmi = xd->mi[0];
1495 const int txwpx = tx_size_wide[tx_size];
1496 const int txhpx = tx_size_high[tx_size];
1497 const int x = col_off << tx_size_wide_log2[0];
1498 const int y = row_off << tx_size_high_log2[0];
1499
1500 if (use_palette) {
1501 int r, c;
1502 const uint8_t *const map = xd->plane[plane != 0].color_index_map +
1503 xd->color_index_map_offset[plane != 0];
1504 const uint16_t *const palette =
1505 mbmi->palette_mode_info.palette_colors + plane * PALETTE_MAX_SIZE;
1506 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1507 uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
1508 for (r = 0; r < txhpx; ++r) {
1509 for (c = 0; c < txwpx; ++c) {
1510 dst16[r * dst_stride + c] = palette[map[(r + y) * wpx + c + x]];
1511 }
1512 }
1513 } else {
1514 for (r = 0; r < txhpx; ++r) {
1515 for (c = 0; c < txwpx; ++c) {
1516 dst[r * dst_stride + c] =
1517 (uint8_t)palette[map[(r + y) * wpx + c + x]];
1518 }
1519 }
1520 }
1521 return;
1522 }
1523
1524 BLOCK_SIZE bsize = mbmi->sb_type;
1525 const struct macroblockd_plane *const pd = &xd->plane[plane];
1526 const int txw = tx_size_wide_unit[tx_size];
1527 const int txh = tx_size_high_unit[tx_size];
1528 const int have_top = row_off || (pd->subsampling_y ? xd->chroma_up_available
1529 : xd->up_available);
1530 const int have_left =
1531 col_off ||
1532 (pd->subsampling_x ? xd->chroma_left_available : xd->left_available);
1533 const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
1534 const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
1535 const int xr_chr_offset = 0;
1536 const int yd_chr_offset = 0;
1537
1538 // Distance between the right edge of this prediction block to
1539 // the frame right edge
1540 const int xr = (xd->mb_to_right_edge >> (3 + pd->subsampling_x)) +
1541 (wpx - x - txwpx) - xr_chr_offset;
1542 // Distance between the bottom edge of this prediction block to
1543 // the frame bottom edge
1544 const int yd = (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) +
1545 (hpx - y - txhpx) - yd_chr_offset;
1546 const int right_available =
1547 mi_col + ((col_off + txw) << pd->subsampling_x) < xd->tile.mi_col_end;
1548 const int bottom_available =
1549 (yd > 0) &&
1550 (mi_row + ((row_off + txh) << pd->subsampling_y) < xd->tile.mi_row_end);
1551
1552 const PARTITION_TYPE partition = mbmi->partition;
1553
1554 // force 4x4 chroma component block size.
1555 bsize = scale_chroma_bsize(bsize, pd->subsampling_x, pd->subsampling_y);
1556
1557 const int have_top_right = has_top_right(
1558 cm, bsize, mi_row, mi_col, have_top, right_available, partition, tx_size,
1559 row_off, col_off, pd->subsampling_x, pd->subsampling_y);
1560 const int have_bottom_left = has_bottom_left(
1561 cm, bsize, mi_row, mi_col, bottom_available, have_left, partition,
1562 tx_size, row_off, col_off, pd->subsampling_x, pd->subsampling_y);
1563
1564 const int disable_edge_filter = !cm->seq_params.enable_intra_edge_filter;
1565 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1566 build_intra_predictors_high(
1567 xd, ref, ref_stride, dst, dst_stride, mode, angle_delta,
1568 filter_intra_mode, tx_size, disable_edge_filter,
1569 have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
1570 have_top_right ? AOMMIN(txwpx, xr) : 0,
1571 have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
1572 have_bottom_left ? AOMMIN(txhpx, yd) : 0, plane);
1573 return;
1574 }
1575
1576 build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode,
1577 angle_delta, filter_intra_mode, tx_size,
1578 disable_edge_filter,
1579 have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
1580 have_top_right ? AOMMIN(txwpx, xr) : 0,
1581 have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
1582 have_bottom_left ? AOMMIN(txhpx, yd) : 0, plane);
1583 }
1584
av1_predict_intra_block_facade(const AV1_COMMON * cm,MACROBLOCKD * xd,int plane,int blk_col,int blk_row,TX_SIZE tx_size)1585 void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
1586 int plane, int blk_col, int blk_row,
1587 TX_SIZE tx_size) {
1588 const MB_MODE_INFO *const mbmi = xd->mi[0];
1589 struct macroblockd_plane *const pd = &xd->plane[plane];
1590 const int dst_stride = pd->dst.stride;
1591 uint8_t *dst =
1592 &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
1593 const PREDICTION_MODE mode =
1594 (plane == AOM_PLANE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode);
1595 const int use_palette = mbmi->palette_mode_info.palette_size[plane != 0] > 0;
1596 const FILTER_INTRA_MODE filter_intra_mode =
1597 (plane == AOM_PLANE_Y && mbmi->filter_intra_mode_info.use_filter_intra)
1598 ? mbmi->filter_intra_mode_info.filter_intra_mode
1599 : FILTER_INTRA_MODES;
1600 const int angle_delta = mbmi->angle_delta[plane != AOM_PLANE_Y] * ANGLE_STEP;
1601
1602 if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED) {
1603 #if CONFIG_DEBUG
1604 assert(is_cfl_allowed(xd));
1605 const BLOCK_SIZE plane_bsize = get_plane_block_size(
1606 mbmi->sb_type, pd->subsampling_x, pd->subsampling_y);
1607 (void)plane_bsize;
1608 assert(plane_bsize < BLOCK_SIZES_ALL);
1609 if (!xd->lossless[mbmi->segment_id]) {
1610 assert(blk_col == 0);
1611 assert(blk_row == 0);
1612 assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]);
1613 assert(block_size_high[plane_bsize] == tx_size_high[tx_size]);
1614 }
1615 #endif
1616 CFL_CTX *const cfl = &xd->cfl;
1617 CFL_PRED_TYPE pred_plane = get_cfl_pred_type(plane);
1618 if (cfl->dc_pred_is_cached[pred_plane] == 0) {
1619 av1_predict_intra_block(cm, xd, pd->width, pd->height, tx_size, mode,
1620 angle_delta, use_palette, filter_intra_mode, dst,
1621 dst_stride, dst, dst_stride, blk_col, blk_row,
1622 plane);
1623 if (cfl->use_dc_pred_cache) {
1624 cfl_store_dc_pred(xd, dst, pred_plane, tx_size_wide[tx_size]);
1625 cfl->dc_pred_is_cached[pred_plane] = 1;
1626 }
1627 } else {
1628 cfl_load_dc_pred(xd, dst, dst_stride, tx_size, pred_plane);
1629 }
1630 cfl_predict_block(xd, dst, dst_stride, tx_size, plane);
1631 return;
1632 }
1633 av1_predict_intra_block(cm, xd, pd->width, pd->height, tx_size, mode,
1634 angle_delta, use_palette, filter_intra_mode, dst,
1635 dst_stride, dst, dst_stride, blk_col, blk_row, plane);
1636 }
1637
av1_init_intra_predictors(void)1638 void av1_init_intra_predictors(void) {
1639 aom_once(init_intra_predictors_internal);
1640 }
1641