1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <math.h>
13 
14 #include "config/aom_config.h"
15 #include "config/aom_dsp_rtcd.h"
16 #include "config/av1_rtcd.h"
17 
18 #include "aom_dsp/aom_dsp_common.h"
19 #include "aom_mem/aom_mem.h"
20 #include "aom_ports/aom_once.h"
21 #include "aom_ports/mem.h"
22 #include "aom_ports/system_state.h"
23 #include "av1/common/reconintra.h"
24 #include "av1/common/onyxc_int.h"
25 #include "av1/common/cfl.h"
26 
27 enum {
28   NEED_LEFT = 1 << 1,
29   NEED_ABOVE = 1 << 2,
30   NEED_ABOVERIGHT = 1 << 3,
31   NEED_ABOVELEFT = 1 << 4,
32   NEED_BOTTOMLEFT = 1 << 5,
33 };
34 
35 #define INTRA_EDGE_FILT 3
36 #define INTRA_EDGE_TAPS 5
37 #define MAX_UPSAMPLE_SZ 16
38 
39 static const uint8_t extend_modes[INTRA_MODES] = {
40   NEED_ABOVE | NEED_LEFT,                   // DC
41   NEED_ABOVE,                               // V
42   NEED_LEFT,                                // H
43   NEED_ABOVE | NEED_ABOVERIGHT,             // D45
44   NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D135
45   NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D113
46   NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D157
47   NEED_LEFT | NEED_BOTTOMLEFT,              // D203
48   NEED_ABOVE | NEED_ABOVERIGHT,             // D67
49   NEED_LEFT | NEED_ABOVE,                   // SMOOTH
50   NEED_LEFT | NEED_ABOVE,                   // SMOOTH_V
51   NEED_LEFT | NEED_ABOVE,                   // SMOOTH_H
52   NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // PAETH
53 };
54 
55 // Tables to store if the top-right reference pixels are available. The flags
56 // are represented with bits, packed into 8-bit integers. E.g., for the 32x32
57 // blocks in a 128x128 superblock, the index of the "o" block is 10 (in raster
58 // order), so its flag is stored at the 3rd bit of the 2nd entry in the table,
59 // i.e. (table[10 / 8] >> (10 % 8)) & 1.
60 //       . . . .
61 //       . . . .
62 //       . . o .
63 //       . . . .
64 static uint8_t has_tr_4x4[128] = {
65   255, 255, 255, 255, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
66   127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
67   255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
68   127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
69   255, 255, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
70   127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
71   255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
72   127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
73 };
74 static uint8_t has_tr_4x8[64] = {
75   255, 255, 255, 255, 119, 119, 119, 119, 127, 127, 127, 127, 119,
76   119, 119, 119, 255, 127, 255, 127, 119, 119, 119, 119, 127, 127,
77   127, 127, 119, 119, 119, 119, 255, 255, 255, 127, 119, 119, 119,
78   119, 127, 127, 127, 127, 119, 119, 119, 119, 255, 127, 255, 127,
79   119, 119, 119, 119, 127, 127, 127, 127, 119, 119, 119, 119,
80 };
81 static uint8_t has_tr_8x4[64] = {
82   255, 255, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
83   127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
84   255, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
85   127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
86 };
87 static uint8_t has_tr_8x8[32] = {
88   255, 255, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
89   255, 127, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
90 };
91 static uint8_t has_tr_8x16[16] = {
92   255, 255, 119, 119, 127, 127, 119, 119,
93   255, 127, 119, 119, 127, 127, 119, 119,
94 };
95 static uint8_t has_tr_16x8[16] = {
96   255, 0, 85, 0, 119, 0, 85, 0, 127, 0, 85, 0, 119, 0, 85, 0,
97 };
98 static uint8_t has_tr_16x16[8] = {
99   255, 85, 119, 85, 127, 85, 119, 85,
100 };
101 static uint8_t has_tr_16x32[4] = { 255, 119, 127, 119 };
102 static uint8_t has_tr_32x16[4] = { 15, 5, 7, 5 };
103 static uint8_t has_tr_32x32[2] = { 95, 87 };
104 static uint8_t has_tr_32x64[1] = { 127 };
105 static uint8_t has_tr_64x32[1] = { 19 };
106 static uint8_t has_tr_64x64[1] = { 7 };
107 static uint8_t has_tr_64x128[1] = { 3 };
108 static uint8_t has_tr_128x64[1] = { 1 };
109 static uint8_t has_tr_128x128[1] = { 1 };
110 static uint8_t has_tr_4x16[32] = {
111   255, 255, 255, 255, 127, 127, 127, 127, 255, 127, 255,
112   127, 127, 127, 127, 127, 255, 255, 255, 127, 127, 127,
113   127, 127, 255, 127, 255, 127, 127, 127, 127, 127,
114 };
115 static uint8_t has_tr_16x4[32] = {
116   255, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
117   127, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
118 };
119 static uint8_t has_tr_8x32[8] = {
120   255, 255, 127, 127, 255, 127, 127, 127,
121 };
122 static uint8_t has_tr_32x8[8] = {
123   15, 0, 5, 0, 7, 0, 5, 0,
124 };
125 static uint8_t has_tr_16x64[2] = { 255, 127 };
126 static uint8_t has_tr_64x16[2] = { 3, 1 };
127 
128 static const uint8_t *const has_tr_tables[BLOCK_SIZES_ALL] = {
129   // 4X4
130   has_tr_4x4,
131   // 4X8,       8X4,            8X8
132   has_tr_4x8, has_tr_8x4, has_tr_8x8,
133   // 8X16,      16X8,           16X16
134   has_tr_8x16, has_tr_16x8, has_tr_16x16,
135   // 16X32,     32X16,          32X32
136   has_tr_16x32, has_tr_32x16, has_tr_32x32,
137   // 32X64,     64X32,          64X64
138   has_tr_32x64, has_tr_64x32, has_tr_64x64,
139   // 64x128,    128x64,         128x128
140   has_tr_64x128, has_tr_128x64, has_tr_128x128,
141   // 4x16,      16x4,            8x32
142   has_tr_4x16, has_tr_16x4, has_tr_8x32,
143   // 32x8,      16x64,           64x16
144   has_tr_32x8, has_tr_16x64, has_tr_64x16
145 };
146 
147 static uint8_t has_tr_vert_8x8[32] = {
148   255, 255, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
149   255, 127, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
150 };
151 static uint8_t has_tr_vert_16x16[8] = {
152   255, 0, 119, 0, 127, 0, 119, 0,
153 };
154 static uint8_t has_tr_vert_32x32[2] = { 15, 7 };
155 static uint8_t has_tr_vert_64x64[1] = { 3 };
156 
157 // The _vert_* tables are like the ordinary tables above, but describe the
158 // order we visit square blocks when doing a PARTITION_VERT_A or
159 // PARTITION_VERT_B. This is the same order as normal except for on the last
160 // split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
161 // as a pair of squares, which means that these tables work correctly for both
162 // mixed vertical partition types.
163 //
164 // There are tables for each of the square sizes. Vertical rectangles (like
165 // BLOCK_16X32) use their respective "non-vert" table
166 static const uint8_t *const has_tr_vert_tables[BLOCK_SIZES] = {
167   // 4X4
168   NULL,
169   // 4X8,      8X4,         8X8
170   has_tr_4x8, NULL, has_tr_vert_8x8,
171   // 8X16,     16X8,        16X16
172   has_tr_8x16, NULL, has_tr_vert_16x16,
173   // 16X32,    32X16,       32X32
174   has_tr_16x32, NULL, has_tr_vert_32x32,
175   // 32X64,    64X32,       64X64
176   has_tr_32x64, NULL, has_tr_vert_64x64,
177   // 64x128,   128x64,      128x128
178   has_tr_64x128, NULL, has_tr_128x128
179 };
180 
get_has_tr_table(PARTITION_TYPE partition,BLOCK_SIZE bsize)181 static const uint8_t *get_has_tr_table(PARTITION_TYPE partition,
182                                        BLOCK_SIZE bsize) {
183   const uint8_t *ret = NULL;
184   // If this is a mixed vertical partition, look up bsize in orders_vert.
185   if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
186     assert(bsize < BLOCK_SIZES);
187     ret = has_tr_vert_tables[bsize];
188   } else {
189     ret = has_tr_tables[bsize];
190   }
191   assert(ret);
192   return ret;
193 }
194 
has_top_right(const AV1_COMMON * cm,BLOCK_SIZE bsize,int mi_row,int mi_col,int top_available,int right_available,PARTITION_TYPE partition,TX_SIZE txsz,int row_off,int col_off,int ss_x,int ss_y)195 static int has_top_right(const AV1_COMMON *cm, BLOCK_SIZE bsize, int mi_row,
196                          int mi_col, int top_available, int right_available,
197                          PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
198                          int col_off, int ss_x, int ss_y) {
199   if (!top_available || !right_available) return 0;
200 
201   const int bw_unit = block_size_wide[bsize] >> tx_size_wide_log2[0];
202   const int plane_bw_unit = AOMMAX(bw_unit >> ss_x, 1);
203   const int top_right_count_unit = tx_size_wide_unit[txsz];
204 
205   if (row_off > 0) {  // Just need to check if enough pixels on the right.
206     if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64]) {
207       // Special case: For 128x128 blocks, the transform unit whose
208       // top-right corner is at the center of the block does in fact have
209       // pixels available at its top-right corner.
210       if (row_off == mi_size_high[BLOCK_64X64] >> ss_y &&
211           col_off + top_right_count_unit == mi_size_wide[BLOCK_64X64] >> ss_x) {
212         return 1;
213       }
214       const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
215       const int col_off_64 = col_off % plane_bw_unit_64;
216       return col_off_64 + top_right_count_unit < plane_bw_unit_64;
217     }
218     return col_off + top_right_count_unit < plane_bw_unit;
219   } else {
220     // All top-right pixels are in the block above, which is already available.
221     if (col_off + top_right_count_unit < plane_bw_unit) return 1;
222 
223     const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
224     const int bh_in_mi_log2 = mi_size_high_log2[bsize];
225     const int sb_mi_size = mi_size_high[cm->seq_params.sb_size];
226     const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
227     const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
228 
229     // Top row of superblock: so top-right pixels are in the top and/or
230     // top-right superblocks, both of which are already available.
231     if (blk_row_in_sb == 0) return 1;
232 
233     // Rightmost column of superblock (and not the top row): so top-right pixels
234     // fall in the right superblock, which is not available yet.
235     if (((blk_col_in_sb + 1) << bw_in_mi_log2) >= sb_mi_size) {
236       return 0;
237     }
238 
239     // General case (neither top row nor rightmost column): check if the
240     // top-right block is coded before the current block.
241     const int this_blk_index =
242         ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
243         blk_col_in_sb + 0;
244     const int idx1 = this_blk_index / 8;
245     const int idx2 = this_blk_index % 8;
246     const uint8_t *has_tr_table = get_has_tr_table(partition, bsize);
247     return (has_tr_table[idx1] >> idx2) & 1;
248   }
249 }
250 
251 // Similar to the has_tr_* tables, but store if the bottom-left reference
252 // pixels are available.
253 static uint8_t has_bl_4x4[128] = {
254   84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85,
255   85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,  0,  84, 85, 85, 85, 16, 17,
256   17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84,
257   85, 85, 85, 0,  0,  0,  0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85,
258   0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,
259   0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85,
260   85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  0,  0,
261 };
262 static uint8_t has_bl_4x8[64] = {
263   16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
264   16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
265   16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
266   16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
267 };
268 static uint8_t has_bl_8x4[64] = {
269   254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
270   254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
271   254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
272   254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
273 };
274 static uint8_t has_bl_8x8[32] = {
275   84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
276   84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
277 };
278 static uint8_t has_bl_8x16[16] = {
279   16, 17, 0, 1, 16, 17, 0, 0, 16, 17, 0, 1, 16, 17, 0, 0,
280 };
281 static uint8_t has_bl_16x8[16] = {
282   254, 84, 254, 16, 254, 84, 254, 0, 254, 84, 254, 16, 254, 84, 254, 0,
283 };
284 static uint8_t has_bl_16x16[8] = {
285   84, 16, 84, 0, 84, 16, 84, 0,
286 };
287 static uint8_t has_bl_16x32[4] = { 16, 0, 16, 0 };
288 static uint8_t has_bl_32x16[4] = { 78, 14, 78, 14 };
289 static uint8_t has_bl_32x32[2] = { 4, 4 };
290 static uint8_t has_bl_32x64[1] = { 0 };
291 static uint8_t has_bl_64x32[1] = { 34 };
292 static uint8_t has_bl_64x64[1] = { 0 };
293 static uint8_t has_bl_64x128[1] = { 0 };
294 static uint8_t has_bl_128x64[1] = { 0 };
295 static uint8_t has_bl_128x128[1] = { 0 };
296 static uint8_t has_bl_4x16[32] = {
297   0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
298   0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
299 };
300 static uint8_t has_bl_16x4[32] = {
301   254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
302   254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
303 };
304 static uint8_t has_bl_8x32[8] = {
305   0, 1, 0, 0, 0, 1, 0, 0,
306 };
307 static uint8_t has_bl_32x8[8] = {
308   238, 78, 238, 14, 238, 78, 238, 14,
309 };
310 static uint8_t has_bl_16x64[2] = { 0, 0 };
311 static uint8_t has_bl_64x16[2] = { 42, 42 };
312 
313 static const uint8_t *const has_bl_tables[BLOCK_SIZES_ALL] = {
314   // 4X4
315   has_bl_4x4,
316   // 4X8,         8X4,         8X8
317   has_bl_4x8, has_bl_8x4, has_bl_8x8,
318   // 8X16,        16X8,        16X16
319   has_bl_8x16, has_bl_16x8, has_bl_16x16,
320   // 16X32,       32X16,       32X32
321   has_bl_16x32, has_bl_32x16, has_bl_32x32,
322   // 32X64,       64X32,       64X64
323   has_bl_32x64, has_bl_64x32, has_bl_64x64,
324   // 64x128,      128x64,      128x128
325   has_bl_64x128, has_bl_128x64, has_bl_128x128,
326   // 4x16,        16x4,        8x32
327   has_bl_4x16, has_bl_16x4, has_bl_8x32,
328   // 32x8,        16x64,       64x16
329   has_bl_32x8, has_bl_16x64, has_bl_64x16
330 };
331 
332 static uint8_t has_bl_vert_8x8[32] = {
333   254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
334   254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
335 };
336 static uint8_t has_bl_vert_16x16[8] = {
337   254, 16, 254, 0, 254, 16, 254, 0,
338 };
339 static uint8_t has_bl_vert_32x32[2] = { 14, 14 };
340 static uint8_t has_bl_vert_64x64[1] = { 2 };
341 
342 // The _vert_* tables are like the ordinary tables above, but describe the
343 // order we visit square blocks when doing a PARTITION_VERT_A or
344 // PARTITION_VERT_B. This is the same order as normal except for on the last
345 // split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
346 // as a pair of squares, which means that these tables work correctly for both
347 // mixed vertical partition types.
348 //
349 // There are tables for each of the square sizes. Vertical rectangles (like
350 // BLOCK_16X32) use their respective "non-vert" table
351 static const uint8_t *const has_bl_vert_tables[BLOCK_SIZES] = {
352   // 4X4
353   NULL,
354   // 4X8,     8X4,         8X8
355   has_bl_4x8, NULL, has_bl_vert_8x8,
356   // 8X16,    16X8,        16X16
357   has_bl_8x16, NULL, has_bl_vert_16x16,
358   // 16X32,   32X16,       32X32
359   has_bl_16x32, NULL, has_bl_vert_32x32,
360   // 32X64,   64X32,       64X64
361   has_bl_32x64, NULL, has_bl_vert_64x64,
362   // 64x128,  128x64,      128x128
363   has_bl_64x128, NULL, has_bl_128x128
364 };
365 
get_has_bl_table(PARTITION_TYPE partition,BLOCK_SIZE bsize)366 static const uint8_t *get_has_bl_table(PARTITION_TYPE partition,
367                                        BLOCK_SIZE bsize) {
368   const uint8_t *ret = NULL;
369   // If this is a mixed vertical partition, look up bsize in orders_vert.
370   if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
371     assert(bsize < BLOCK_SIZES);
372     ret = has_bl_vert_tables[bsize];
373   } else {
374     ret = has_bl_tables[bsize];
375   }
376   assert(ret);
377   return ret;
378 }
379 
has_bottom_left(const AV1_COMMON * cm,BLOCK_SIZE bsize,int mi_row,int mi_col,int bottom_available,int left_available,PARTITION_TYPE partition,TX_SIZE txsz,int row_off,int col_off,int ss_x,int ss_y)380 static int has_bottom_left(const AV1_COMMON *cm, BLOCK_SIZE bsize, int mi_row,
381                            int mi_col, int bottom_available, int left_available,
382                            PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
383                            int col_off, int ss_x, int ss_y) {
384   if (!bottom_available || !left_available) return 0;
385 
386   // Special case for 128x* blocks, when col_off is half the block width.
387   // This is needed because 128x* superblocks are divided into 64x* blocks in
388   // raster order
389   if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64] && col_off > 0) {
390     const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
391     const int col_off_64 = col_off % plane_bw_unit_64;
392     if (col_off_64 == 0) {
393       // We are at the left edge of top-right or bottom-right 64x* block.
394       const int plane_bh_unit_64 = mi_size_high[BLOCK_64X64] >> ss_y;
395       const int row_off_64 = row_off % plane_bh_unit_64;
396       const int plane_bh_unit =
397           AOMMIN(mi_size_high[bsize] >> ss_y, plane_bh_unit_64);
398       // Check if all bottom-left pixels are in the left 64x* block (which is
399       // already coded).
400       return row_off_64 + tx_size_high_unit[txsz] < plane_bh_unit;
401     }
402   }
403 
404   if (col_off > 0) {
405     // Bottom-left pixels are in the bottom-left block, which is not available.
406     return 0;
407   } else {
408     const int bh_unit = block_size_high[bsize] >> tx_size_high_log2[0];
409     const int plane_bh_unit = AOMMAX(bh_unit >> ss_y, 1);
410     const int bottom_left_count_unit = tx_size_high_unit[txsz];
411 
412     // All bottom-left pixels are in the left block, which is already available.
413     if (row_off + bottom_left_count_unit < plane_bh_unit) return 1;
414 
415     const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
416     const int bh_in_mi_log2 = mi_size_high_log2[bsize];
417     const int sb_mi_size = mi_size_high[cm->seq_params.sb_size];
418     const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
419     const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
420 
421     // Leftmost column of superblock: so bottom-left pixels maybe in the left
422     // and/or bottom-left superblocks. But only the left superblock is
423     // available, so check if all required pixels fall in that superblock.
424     if (blk_col_in_sb == 0) {
425       const int blk_start_row_off = blk_row_in_sb
426                                         << (bh_in_mi_log2 + MI_SIZE_LOG2 -
427                                             tx_size_wide_log2[0]) >>
428                                     ss_y;
429       const int row_off_in_sb = blk_start_row_off + row_off;
430       const int sb_height_unit = sb_mi_size >> ss_y;
431       return row_off_in_sb + bottom_left_count_unit < sb_height_unit;
432     }
433 
434     // Bottom row of superblock (and not the leftmost column): so bottom-left
435     // pixels fall in the bottom superblock, which is not available yet.
436     if (((blk_row_in_sb + 1) << bh_in_mi_log2) >= sb_mi_size) return 0;
437 
438     // General case (neither leftmost column nor bottom row): check if the
439     // bottom-left block is coded before the current block.
440     const int this_blk_index =
441         ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
442         blk_col_in_sb + 0;
443     const int idx1 = this_blk_index / 8;
444     const int idx2 = this_blk_index % 8;
445     const uint8_t *has_bl_table = get_has_bl_table(partition, bsize);
446     return (has_bl_table[idx1] >> idx2) & 1;
447   }
448 }
449 
450 typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
451                               const uint8_t *above, const uint8_t *left);
452 
453 static intra_pred_fn pred[INTRA_MODES][TX_SIZES_ALL];
454 static intra_pred_fn dc_pred[2][2][TX_SIZES_ALL];
455 
456 typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride,
457                                    const uint16_t *above, const uint16_t *left,
458                                    int bd);
459 static intra_high_pred_fn pred_high[INTRA_MODES][TX_SIZES_ALL];
460 static intra_high_pred_fn dc_pred_high[2][2][TX_SIZES_ALL];
461 
init_intra_predictors_internal(void)462 static void init_intra_predictors_internal(void) {
463   assert(NELEMENTS(mode_to_angle_map) == INTRA_MODES);
464 
465 #define INIT_RECTANGULAR(p, type)             \
466   p[TX_4X8] = aom_##type##_predictor_4x8;     \
467   p[TX_8X4] = aom_##type##_predictor_8x4;     \
468   p[TX_8X16] = aom_##type##_predictor_8x16;   \
469   p[TX_16X8] = aom_##type##_predictor_16x8;   \
470   p[TX_16X32] = aom_##type##_predictor_16x32; \
471   p[TX_32X16] = aom_##type##_predictor_32x16; \
472   p[TX_32X64] = aom_##type##_predictor_32x64; \
473   p[TX_64X32] = aom_##type##_predictor_64x32; \
474   p[TX_4X16] = aom_##type##_predictor_4x16;   \
475   p[TX_16X4] = aom_##type##_predictor_16x4;   \
476   p[TX_8X32] = aom_##type##_predictor_8x32;   \
477   p[TX_32X8] = aom_##type##_predictor_32x8;   \
478   p[TX_16X64] = aom_##type##_predictor_16x64; \
479   p[TX_64X16] = aom_##type##_predictor_64x16;
480 
481 #define INIT_NO_4X4(p, type)                  \
482   p[TX_8X8] = aom_##type##_predictor_8x8;     \
483   p[TX_16X16] = aom_##type##_predictor_16x16; \
484   p[TX_32X32] = aom_##type##_predictor_32x32; \
485   p[TX_64X64] = aom_##type##_predictor_64x64; \
486   INIT_RECTANGULAR(p, type)
487 
488 #define INIT_ALL_SIZES(p, type)           \
489   p[TX_4X4] = aom_##type##_predictor_4x4; \
490   INIT_NO_4X4(p, type)
491 
492   INIT_ALL_SIZES(pred[V_PRED], v);
493   INIT_ALL_SIZES(pred[H_PRED], h);
494   INIT_ALL_SIZES(pred[PAETH_PRED], paeth);
495   INIT_ALL_SIZES(pred[SMOOTH_PRED], smooth);
496   INIT_ALL_SIZES(pred[SMOOTH_V_PRED], smooth_v);
497   INIT_ALL_SIZES(pred[SMOOTH_H_PRED], smooth_h);
498   INIT_ALL_SIZES(dc_pred[0][0], dc_128);
499   INIT_ALL_SIZES(dc_pred[0][1], dc_top);
500   INIT_ALL_SIZES(dc_pred[1][0], dc_left);
501   INIT_ALL_SIZES(dc_pred[1][1], dc);
502 
503   INIT_ALL_SIZES(pred_high[V_PRED], highbd_v);
504   INIT_ALL_SIZES(pred_high[H_PRED], highbd_h);
505   INIT_ALL_SIZES(pred_high[PAETH_PRED], highbd_paeth);
506   INIT_ALL_SIZES(pred_high[SMOOTH_PRED], highbd_smooth);
507   INIT_ALL_SIZES(pred_high[SMOOTH_V_PRED], highbd_smooth_v);
508   INIT_ALL_SIZES(pred_high[SMOOTH_H_PRED], highbd_smooth_h);
509   INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128);
510   INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top);
511   INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left);
512   INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc);
513 #undef intra_pred_allsizes
514 }
515 
516 // Directional prediction, zone 1: 0 < angle < 90
av1_dr_prediction_z1_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_above,int dx,int dy)517 void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
518                             const uint8_t *above, const uint8_t *left,
519                             int upsample_above, int dx, int dy) {
520   int r, c, x, base, shift, val;
521 
522   (void)left;
523   (void)dy;
524   assert(dy == 1);
525   assert(dx > 0);
526 
527   const int max_base_x = ((bw + bh) - 1) << upsample_above;
528   const int frac_bits = 6 - upsample_above;
529   const int base_inc = 1 << upsample_above;
530   x = dx;
531   for (r = 0; r < bh; ++r, dst += stride, x += dx) {
532     base = x >> frac_bits;
533     shift = ((x << upsample_above) & 0x3F) >> 1;
534 
535     if (base >= max_base_x) {
536       for (int i = r; i < bh; ++i) {
537         memset(dst, above[max_base_x], bw * sizeof(dst[0]));
538         dst += stride;
539       }
540       return;
541     }
542 
543     for (c = 0; c < bw; ++c, base += base_inc) {
544       if (base < max_base_x) {
545         val = above[base] * (32 - shift) + above[base + 1] * shift;
546         dst[c] = ROUND_POWER_OF_TWO(val, 5);
547       } else {
548         dst[c] = above[max_base_x];
549       }
550     }
551   }
552 }
553 
554 // Directional prediction, zone 2: 90 < angle < 180
av1_dr_prediction_z2_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_above,int upsample_left,int dx,int dy)555 void av1_dr_prediction_z2_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
556                             const uint8_t *above, const uint8_t *left,
557                             int upsample_above, int upsample_left, int dx,
558                             int dy) {
559   int r, c, x, y, shift1, shift2, val, base1, base2;
560 
561   assert(dx > 0);
562   assert(dy > 0);
563 
564   const int min_base_x = -(1 << upsample_above);
565   const int frac_bits_x = 6 - upsample_above;
566   const int frac_bits_y = 6 - upsample_left;
567   const int base_inc_x = 1 << upsample_above;
568   x = -dx;
569   for (r = 0; r < bh; ++r, x -= dx, dst += stride) {
570     base1 = x >> frac_bits_x;
571     y = (r << 6) - dy;
572     for (c = 0; c < bw; ++c, base1 += base_inc_x, y -= dy) {
573       if (base1 >= min_base_x) {
574         shift1 = ((x * (1 << upsample_above)) & 0x3F) >> 1;
575         val = above[base1] * (32 - shift1) + above[base1 + 1] * shift1;
576         val = ROUND_POWER_OF_TWO(val, 5);
577       } else {
578         base2 = y >> frac_bits_y;
579         assert(base2 >= -(1 << upsample_left));
580         shift2 = ((y * (1 << upsample_left)) & 0x3F) >> 1;
581         val = left[base2] * (32 - shift2) + left[base2 + 1] * shift2;
582         val = ROUND_POWER_OF_TWO(val, 5);
583       }
584       dst[c] = val;
585     }
586   }
587 }
588 
589 // Directional prediction, zone 3: 180 < angle < 270
av1_dr_prediction_z3_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_left,int dx,int dy)590 void av1_dr_prediction_z3_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
591                             const uint8_t *above, const uint8_t *left,
592                             int upsample_left, int dx, int dy) {
593   int r, c, y, base, shift, val;
594 
595   (void)above;
596   (void)dx;
597 
598   assert(dx == 1);
599   assert(dy > 0);
600 
601   const int max_base_y = (bw + bh - 1) << upsample_left;
602   const int frac_bits = 6 - upsample_left;
603   const int base_inc = 1 << upsample_left;
604   y = dy;
605   for (c = 0; c < bw; ++c, y += dy) {
606     base = y >> frac_bits;
607     shift = ((y << upsample_left) & 0x3F) >> 1;
608 
609     for (r = 0; r < bh; ++r, base += base_inc) {
610       if (base < max_base_y) {
611         val = left[base] * (32 - shift) + left[base + 1] * shift;
612         dst[r * stride + c] = val = ROUND_POWER_OF_TWO(val, 5);
613       } else {
614         for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
615         break;
616       }
617     }
618   }
619 }
620 
dr_predictor(uint8_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint8_t * above,const uint8_t * left,int upsample_above,int upsample_left,int angle)621 static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
622                          const uint8_t *above, const uint8_t *left,
623                          int upsample_above, int upsample_left, int angle) {
624   const int dx = av1_get_dx(angle);
625   const int dy = av1_get_dy(angle);
626   const int bw = tx_size_wide[tx_size];
627   const int bh = tx_size_high[tx_size];
628   assert(angle > 0 && angle < 270);
629 
630   if (angle > 0 && angle < 90) {
631     av1_dr_prediction_z1(dst, stride, bw, bh, above, left, upsample_above, dx,
632                          dy);
633   } else if (angle > 90 && angle < 180) {
634     av1_dr_prediction_z2(dst, stride, bw, bh, above, left, upsample_above,
635                          upsample_left, dx, dy);
636   } else if (angle > 180 && angle < 270) {
637     av1_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left, dx,
638                          dy);
639   } else if (angle == 90) {
640     pred[V_PRED][tx_size](dst, stride, above, left);
641   } else if (angle == 180) {
642     pred[H_PRED][tx_size](dst, stride, above, left);
643   }
644 }
645 
646 // Directional prediction, zone 1: 0 < angle < 90
av1_highbd_dr_prediction_z1_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_above,int dx,int dy,int bd)647 void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw,
648                                    int bh, const uint16_t *above,
649                                    const uint16_t *left, int upsample_above,
650                                    int dx, int dy, int bd) {
651   int r, c, x, base, shift, val;
652 
653   (void)left;
654   (void)dy;
655   (void)bd;
656   assert(dy == 1);
657   assert(dx > 0);
658 
659   const int max_base_x = ((bw + bh) - 1) << upsample_above;
660   const int frac_bits = 6 - upsample_above;
661   const int base_inc = 1 << upsample_above;
662   x = dx;
663   for (r = 0; r < bh; ++r, dst += stride, x += dx) {
664     base = x >> frac_bits;
665     shift = ((x << upsample_above) & 0x3F) >> 1;
666 
667     if (base >= max_base_x) {
668       for (int i = r; i < bh; ++i) {
669         aom_memset16(dst, above[max_base_x], bw);
670         dst += stride;
671       }
672       return;
673     }
674 
675     for (c = 0; c < bw; ++c, base += base_inc) {
676       if (base < max_base_x) {
677         val = above[base] * (32 - shift) + above[base + 1] * shift;
678         dst[c] = ROUND_POWER_OF_TWO(val, 5);
679       } else {
680         dst[c] = above[max_base_x];
681       }
682     }
683   }
684 }
685 
686 // Directional prediction, zone 2: 90 < angle < 180
av1_highbd_dr_prediction_z2_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_above,int upsample_left,int dx,int dy,int bd)687 void av1_highbd_dr_prediction_z2_c(uint16_t *dst, ptrdiff_t stride, int bw,
688                                    int bh, const uint16_t *above,
689                                    const uint16_t *left, int upsample_above,
690                                    int upsample_left, int dx, int dy, int bd) {
691   int r, c, x, y, shift, val, base;
692 
693   (void)bd;
694   assert(dx > 0);
695   assert(dy > 0);
696 
697   const int min_base_x = -(1 << upsample_above);
698   const int frac_bits_x = 6 - upsample_above;
699   const int frac_bits_y = 6 - upsample_left;
700   for (r = 0; r < bh; ++r) {
701     for (c = 0; c < bw; ++c) {
702       y = r + 1;
703       x = (c << 6) - y * dx;
704       base = x >> frac_bits_x;
705       if (base >= min_base_x) {
706         shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
707         val = above[base] * (32 - shift) + above[base + 1] * shift;
708         val = ROUND_POWER_OF_TWO(val, 5);
709       } else {
710         x = c + 1;
711         y = (r << 6) - x * dy;
712         base = y >> frac_bits_y;
713         shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
714         val = left[base] * (32 - shift) + left[base + 1] * shift;
715         val = ROUND_POWER_OF_TWO(val, 5);
716       }
717       dst[c] = val;
718     }
719     dst += stride;
720   }
721 }
722 
723 // Directional prediction, zone 3: 180 < angle < 270
av1_highbd_dr_prediction_z3_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_left,int dx,int dy,int bd)724 void av1_highbd_dr_prediction_z3_c(uint16_t *dst, ptrdiff_t stride, int bw,
725                                    int bh, const uint16_t *above,
726                                    const uint16_t *left, int upsample_left,
727                                    int dx, int dy, int bd) {
728   int r, c, y, base, shift, val;
729 
730   (void)above;
731   (void)dx;
732   (void)bd;
733   assert(dx == 1);
734   assert(dy > 0);
735 
736   const int max_base_y = (bw + bh - 1) << upsample_left;
737   const int frac_bits = 6 - upsample_left;
738   const int base_inc = 1 << upsample_left;
739   y = dy;
740   for (c = 0; c < bw; ++c, y += dy) {
741     base = y >> frac_bits;
742     shift = ((y << upsample_left) & 0x3F) >> 1;
743 
744     for (r = 0; r < bh; ++r, base += base_inc) {
745       if (base < max_base_y) {
746         val = left[base] * (32 - shift) + left[base + 1] * shift;
747         dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
748       } else {
749         for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
750         break;
751       }
752     }
753   }
754 }
755 
highbd_dr_predictor(uint16_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint16_t * above,const uint16_t * left,int upsample_above,int upsample_left,int angle,int bd)756 static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride,
757                                 TX_SIZE tx_size, const uint16_t *above,
758                                 const uint16_t *left, int upsample_above,
759                                 int upsample_left, int angle, int bd) {
760   const int dx = av1_get_dx(angle);
761   const int dy = av1_get_dy(angle);
762   const int bw = tx_size_wide[tx_size];
763   const int bh = tx_size_high[tx_size];
764   assert(angle > 0 && angle < 270);
765 
766   if (angle > 0 && angle < 90) {
767     av1_highbd_dr_prediction_z1(dst, stride, bw, bh, above, left,
768                                 upsample_above, dx, dy, bd);
769   } else if (angle > 90 && angle < 180) {
770     av1_highbd_dr_prediction_z2(dst, stride, bw, bh, above, left,
771                                 upsample_above, upsample_left, dx, dy, bd);
772   } else if (angle > 180 && angle < 270) {
773     av1_highbd_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left,
774                                 dx, dy, bd);
775   } else if (angle == 90) {
776     pred_high[V_PRED][tx_size](dst, stride, above, left, bd);
777   } else if (angle == 180) {
778     pred_high[H_PRED][tx_size](dst, stride, above, left, bd);
779   }
780 }
781 
782 DECLARE_ALIGNED(16, const int8_t,
783                 av1_filter_intra_taps[FILTER_INTRA_MODES][8][8]) = {
784   {
785       { -6, 10, 0, 0, 0, 12, 0, 0 },
786       { -5, 2, 10, 0, 0, 9, 0, 0 },
787       { -3, 1, 1, 10, 0, 7, 0, 0 },
788       { -3, 1, 1, 2, 10, 5, 0, 0 },
789       { -4, 6, 0, 0, 0, 2, 12, 0 },
790       { -3, 2, 6, 0, 0, 2, 9, 0 },
791       { -3, 2, 2, 6, 0, 2, 7, 0 },
792       { -3, 1, 2, 2, 6, 3, 5, 0 },
793   },
794   {
795       { -10, 16, 0, 0, 0, 10, 0, 0 },
796       { -6, 0, 16, 0, 0, 6, 0, 0 },
797       { -4, 0, 0, 16, 0, 4, 0, 0 },
798       { -2, 0, 0, 0, 16, 2, 0, 0 },
799       { -10, 16, 0, 0, 0, 0, 10, 0 },
800       { -6, 0, 16, 0, 0, 0, 6, 0 },
801       { -4, 0, 0, 16, 0, 0, 4, 0 },
802       { -2, 0, 0, 0, 16, 0, 2, 0 },
803   },
804   {
805       { -8, 8, 0, 0, 0, 16, 0, 0 },
806       { -8, 0, 8, 0, 0, 16, 0, 0 },
807       { -8, 0, 0, 8, 0, 16, 0, 0 },
808       { -8, 0, 0, 0, 8, 16, 0, 0 },
809       { -4, 4, 0, 0, 0, 0, 16, 0 },
810       { -4, 0, 4, 0, 0, 0, 16, 0 },
811       { -4, 0, 0, 4, 0, 0, 16, 0 },
812       { -4, 0, 0, 0, 4, 0, 16, 0 },
813   },
814   {
815       { -2, 8, 0, 0, 0, 10, 0, 0 },
816       { -1, 3, 8, 0, 0, 6, 0, 0 },
817       { -1, 2, 3, 8, 0, 4, 0, 0 },
818       { 0, 1, 2, 3, 8, 2, 0, 0 },
819       { -1, 4, 0, 0, 0, 3, 10, 0 },
820       { -1, 3, 4, 0, 0, 4, 6, 0 },
821       { -1, 2, 3, 4, 0, 4, 4, 0 },
822       { -1, 2, 2, 3, 4, 3, 3, 0 },
823   },
824   {
825       { -12, 14, 0, 0, 0, 14, 0, 0 },
826       { -10, 0, 14, 0, 0, 12, 0, 0 },
827       { -9, 0, 0, 14, 0, 11, 0, 0 },
828       { -8, 0, 0, 0, 14, 10, 0, 0 },
829       { -10, 12, 0, 0, 0, 0, 14, 0 },
830       { -9, 1, 12, 0, 0, 0, 12, 0 },
831       { -8, 0, 0, 12, 0, 1, 11, 0 },
832       { -7, 0, 0, 1, 12, 1, 9, 0 },
833   },
834 };
835 
av1_filter_intra_predictor_c(uint8_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint8_t * above,const uint8_t * left,int mode)836 void av1_filter_intra_predictor_c(uint8_t *dst, ptrdiff_t stride,
837                                   TX_SIZE tx_size, const uint8_t *above,
838                                   const uint8_t *left, int mode) {
839   int r, c;
840   uint8_t buffer[33][33];
841   const int bw = tx_size_wide[tx_size];
842   const int bh = tx_size_high[tx_size];
843 
844   assert(bw <= 32 && bh <= 32);
845 
846   // The initialization is just for silencing Jenkins static analysis warnings
847   for (r = 0; r < bh + 1; ++r)
848     memset(buffer[r], 0, (bw + 1) * sizeof(buffer[0][0]));
849 
850   for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
851   memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(uint8_t));
852 
853   for (r = 1; r < bh + 1; r += 2)
854     for (c = 1; c < bw + 1; c += 4) {
855       const uint8_t p0 = buffer[r - 1][c - 1];
856       const uint8_t p1 = buffer[r - 1][c];
857       const uint8_t p2 = buffer[r - 1][c + 1];
858       const uint8_t p3 = buffer[r - 1][c + 2];
859       const uint8_t p4 = buffer[r - 1][c + 3];
860       const uint8_t p5 = buffer[r][c - 1];
861       const uint8_t p6 = buffer[r + 1][c - 1];
862       for (int k = 0; k < 8; ++k) {
863         int r_offset = k >> 2;
864         int c_offset = k & 0x03;
865         buffer[r + r_offset][c + c_offset] =
866             clip_pixel(ROUND_POWER_OF_TWO_SIGNED(
867                 av1_filter_intra_taps[mode][k][0] * p0 +
868                     av1_filter_intra_taps[mode][k][1] * p1 +
869                     av1_filter_intra_taps[mode][k][2] * p2 +
870                     av1_filter_intra_taps[mode][k][3] * p3 +
871                     av1_filter_intra_taps[mode][k][4] * p4 +
872                     av1_filter_intra_taps[mode][k][5] * p5 +
873                     av1_filter_intra_taps[mode][k][6] * p6,
874                 FILTER_INTRA_SCALE_BITS));
875       }
876     }
877 
878   for (r = 0; r < bh; ++r) {
879     memcpy(dst, &buffer[r + 1][1], bw * sizeof(uint8_t));
880     dst += stride;
881   }
882 }
883 
highbd_filter_intra_predictor(uint16_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint16_t * above,const uint16_t * left,int mode,int bd)884 static void highbd_filter_intra_predictor(uint16_t *dst, ptrdiff_t stride,
885                                           TX_SIZE tx_size,
886                                           const uint16_t *above,
887                                           const uint16_t *left, int mode,
888                                           int bd) {
889   int r, c;
890   uint16_t buffer[33][33];
891   const int bw = tx_size_wide[tx_size];
892   const int bh = tx_size_high[tx_size];
893 
894   assert(bw <= 32 && bh <= 32);
895 
896   // The initialization is just for silencing Jenkins static analysis warnings
897   for (r = 0; r < bh + 1; ++r)
898     memset(buffer[r], 0, (bw + 1) * sizeof(buffer[0][0]));
899 
900   for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
901   memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(buffer[0][0]));
902 
903   for (r = 1; r < bh + 1; r += 2)
904     for (c = 1; c < bw + 1; c += 4) {
905       const uint16_t p0 = buffer[r - 1][c - 1];
906       const uint16_t p1 = buffer[r - 1][c];
907       const uint16_t p2 = buffer[r - 1][c + 1];
908       const uint16_t p3 = buffer[r - 1][c + 2];
909       const uint16_t p4 = buffer[r - 1][c + 3];
910       const uint16_t p5 = buffer[r][c - 1];
911       const uint16_t p6 = buffer[r + 1][c - 1];
912       for (int k = 0; k < 8; ++k) {
913         int r_offset = k >> 2;
914         int c_offset = k & 0x03;
915         buffer[r + r_offset][c + c_offset] =
916             clip_pixel_highbd(ROUND_POWER_OF_TWO_SIGNED(
917                                   av1_filter_intra_taps[mode][k][0] * p0 +
918                                       av1_filter_intra_taps[mode][k][1] * p1 +
919                                       av1_filter_intra_taps[mode][k][2] * p2 +
920                                       av1_filter_intra_taps[mode][k][3] * p3 +
921                                       av1_filter_intra_taps[mode][k][4] * p4 +
922                                       av1_filter_intra_taps[mode][k][5] * p5 +
923                                       av1_filter_intra_taps[mode][k][6] * p6,
924                                   FILTER_INTRA_SCALE_BITS),
925                               bd);
926       }
927     }
928 
929   for (r = 0; r < bh; ++r) {
930     memcpy(dst, &buffer[r + 1][1], bw * sizeof(dst[0]));
931     dst += stride;
932   }
933 }
934 
is_smooth(const MB_MODE_INFO * mbmi,int plane)935 static int is_smooth(const MB_MODE_INFO *mbmi, int plane) {
936   if (plane == 0) {
937     const PREDICTION_MODE mode = mbmi->mode;
938     return (mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
939             mode == SMOOTH_H_PRED);
940   } else {
941     // uv_mode is not set for inter blocks, so need to explicitly
942     // detect that case.
943     if (is_inter_block(mbmi)) return 0;
944 
945     const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
946     return (uv_mode == UV_SMOOTH_PRED || uv_mode == UV_SMOOTH_V_PRED ||
947             uv_mode == UV_SMOOTH_H_PRED);
948   }
949 }
950 
get_filt_type(const MACROBLOCKD * xd,int plane)951 static int get_filt_type(const MACROBLOCKD *xd, int plane) {
952   int ab_sm, le_sm;
953 
954   if (plane == 0) {
955     const MB_MODE_INFO *ab = xd->above_mbmi;
956     const MB_MODE_INFO *le = xd->left_mbmi;
957     ab_sm = ab ? is_smooth(ab, plane) : 0;
958     le_sm = le ? is_smooth(le, plane) : 0;
959   } else {
960     const MB_MODE_INFO *ab = xd->chroma_above_mbmi;
961     const MB_MODE_INFO *le = xd->chroma_left_mbmi;
962     ab_sm = ab ? is_smooth(ab, plane) : 0;
963     le_sm = le ? is_smooth(le, plane) : 0;
964   }
965 
966   return (ab_sm || le_sm) ? 1 : 0;
967 }
968 
intra_edge_filter_strength(int bs0,int bs1,int delta,int type)969 static int intra_edge_filter_strength(int bs0, int bs1, int delta, int type) {
970   const int d = abs(delta);
971   int strength = 0;
972 
973   const int blk_wh = bs0 + bs1;
974   if (type == 0) {
975     if (blk_wh <= 8) {
976       if (d >= 56) strength = 1;
977     } else if (blk_wh <= 12) {
978       if (d >= 40) strength = 1;
979     } else if (blk_wh <= 16) {
980       if (d >= 40) strength = 1;
981     } else if (blk_wh <= 24) {
982       if (d >= 8) strength = 1;
983       if (d >= 16) strength = 2;
984       if (d >= 32) strength = 3;
985     } else if (blk_wh <= 32) {
986       if (d >= 1) strength = 1;
987       if (d >= 4) strength = 2;
988       if (d >= 32) strength = 3;
989     } else {
990       if (d >= 1) strength = 3;
991     }
992   } else {
993     if (blk_wh <= 8) {
994       if (d >= 40) strength = 1;
995       if (d >= 64) strength = 2;
996     } else if (blk_wh <= 16) {
997       if (d >= 20) strength = 1;
998       if (d >= 48) strength = 2;
999     } else if (blk_wh <= 24) {
1000       if (d >= 4) strength = 3;
1001     } else {
1002       if (d >= 1) strength = 3;
1003     }
1004   }
1005   return strength;
1006 }
1007 
av1_filter_intra_edge_c(uint8_t * p,int sz,int strength)1008 void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength) {
1009   if (!strength) return;
1010 
1011   const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = {
1012     { 0, 4, 8, 4, 0 }, { 0, 5, 6, 5, 0 }, { 2, 4, 4, 4, 2 }
1013   };
1014   const int filt = strength - 1;
1015   uint8_t edge[129];
1016 
1017   memcpy(edge, p, sz * sizeof(*p));
1018   for (int i = 1; i < sz; i++) {
1019     int s = 0;
1020     for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1021       int k = i - 2 + j;
1022       k = (k < 0) ? 0 : k;
1023       k = (k > sz - 1) ? sz - 1 : k;
1024       s += edge[k] * kernel[filt][j];
1025     }
1026     s = (s + 8) >> 4;
1027     p[i] = s;
1028   }
1029 }
1030 
filter_intra_edge_corner(uint8_t * p_above,uint8_t * p_left)1031 static void filter_intra_edge_corner(uint8_t *p_above, uint8_t *p_left) {
1032   const int kernel[3] = { 5, 6, 5 };
1033 
1034   int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1035           (p_above[0] * kernel[2]);
1036   s = (s + 8) >> 4;
1037   p_above[-1] = s;
1038   p_left[-1] = s;
1039 }
1040 
av1_filter_intra_edge_high_c(uint16_t * p,int sz,int strength)1041 void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength) {
1042   if (!strength) return;
1043 
1044   const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = {
1045     { 0, 4, 8, 4, 0 }, { 0, 5, 6, 5, 0 }, { 2, 4, 4, 4, 2 }
1046   };
1047   const int filt = strength - 1;
1048   uint16_t edge[129];
1049 
1050   memcpy(edge, p, sz * sizeof(*p));
1051   for (int i = 1; i < sz; i++) {
1052     int s = 0;
1053     for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1054       int k = i - 2 + j;
1055       k = (k < 0) ? 0 : k;
1056       k = (k > sz - 1) ? sz - 1 : k;
1057       s += edge[k] * kernel[filt][j];
1058     }
1059     s = (s + 8) >> 4;
1060     p[i] = s;
1061   }
1062 }
1063 
filter_intra_edge_corner_high(uint16_t * p_above,uint16_t * p_left)1064 static void filter_intra_edge_corner_high(uint16_t *p_above, uint16_t *p_left) {
1065   const int kernel[3] = { 5, 6, 5 };
1066 
1067   int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1068           (p_above[0] * kernel[2]);
1069   s = (s + 8) >> 4;
1070   p_above[-1] = s;
1071   p_left[-1] = s;
1072 }
1073 
av1_upsample_intra_edge_c(uint8_t * p,int sz)1074 void av1_upsample_intra_edge_c(uint8_t *p, int sz) {
1075   // interpolate half-sample positions
1076   assert(sz <= MAX_UPSAMPLE_SZ);
1077 
1078   uint8_t in[MAX_UPSAMPLE_SZ + 3];
1079   // copy p[-1..(sz-1)] and extend first and last samples
1080   in[0] = p[-1];
1081   in[1] = p[-1];
1082   for (int i = 0; i < sz; i++) {
1083     in[i + 2] = p[i];
1084   }
1085   in[sz + 2] = p[sz - 1];
1086 
1087   // interpolate half-sample edge positions
1088   p[-2] = in[0];
1089   for (int i = 0; i < sz; i++) {
1090     int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1091     s = clip_pixel((s + 8) >> 4);
1092     p[2 * i - 1] = s;
1093     p[2 * i] = in[i + 2];
1094   }
1095 }
1096 
av1_upsample_intra_edge_high_c(uint16_t * p,int sz,int bd)1097 void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd) {
1098   // interpolate half-sample positions
1099   assert(sz <= MAX_UPSAMPLE_SZ);
1100 
1101   uint16_t in[MAX_UPSAMPLE_SZ + 3];
1102   // copy p[-1..(sz-1)] and extend first and last samples
1103   in[0] = p[-1];
1104   in[1] = p[-1];
1105   for (int i = 0; i < sz; i++) {
1106     in[i + 2] = p[i];
1107   }
1108   in[sz + 2] = p[sz - 1];
1109 
1110   // interpolate half-sample edge positions
1111   p[-2] = in[0];
1112   for (int i = 0; i < sz; i++) {
1113     int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1114     s = (s + 8) >> 4;
1115     s = clip_pixel_highbd(s, bd);
1116     p[2 * i - 1] = s;
1117     p[2 * i] = in[i + 2];
1118   }
1119 }
1120 
build_intra_predictors_high(const MACROBLOCKD * xd,const uint8_t * ref8,int ref_stride,uint8_t * dst8,int dst_stride,PREDICTION_MODE mode,int angle_delta,FILTER_INTRA_MODE filter_intra_mode,TX_SIZE tx_size,int disable_edge_filter,int n_top_px,int n_topright_px,int n_left_px,int n_bottomleft_px,int plane)1121 static void build_intra_predictors_high(
1122     const MACROBLOCKD *xd, const uint8_t *ref8, int ref_stride, uint8_t *dst8,
1123     int dst_stride, PREDICTION_MODE mode, int angle_delta,
1124     FILTER_INTRA_MODE filter_intra_mode, TX_SIZE tx_size,
1125     int disable_edge_filter, int n_top_px, int n_topright_px, int n_left_px,
1126     int n_bottomleft_px, int plane) {
1127   int i;
1128   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
1129   uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
1130   DECLARE_ALIGNED(16, uint16_t, left_data[MAX_TX_SIZE * 2 + 32]);
1131   DECLARE_ALIGNED(16, uint16_t, above_data[MAX_TX_SIZE * 2 + 32]);
1132   uint16_t *const above_row = above_data + 16;
1133   uint16_t *const left_col = left_data + 16;
1134   const int txwpx = tx_size_wide[tx_size];
1135   const int txhpx = tx_size_high[tx_size];
1136   int need_left = extend_modes[mode] & NEED_LEFT;
1137   int need_above = extend_modes[mode] & NEED_ABOVE;
1138   int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1139   const uint16_t *above_ref = ref - ref_stride;
1140   const uint16_t *left_ref = ref - 1;
1141   int p_angle = 0;
1142   const int is_dr_mode = av1_is_directional_mode(mode);
1143   const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1144   int base = 128 << (xd->bd - 8);
1145 
1146   // The default values if ref pixels are not available:
1147   // base-1 base-1 base-1 .. base-1 base-1 base-1 base-1 base-1 base-1
1148   // base+1   A      B  ..     Y      Z
1149   // base+1   C      D  ..     W      X
1150   // base+1   E      F  ..     U      V
1151   // base+1   G      H  ..     S      T      T      T      T      T
1152 
1153   if (is_dr_mode) {
1154     p_angle = mode_to_angle_map[mode] + angle_delta;
1155     if (p_angle <= 90)
1156       need_above = 1, need_left = 0, need_above_left = 1;
1157     else if (p_angle < 180)
1158       need_above = 1, need_left = 1, need_above_left = 1;
1159     else
1160       need_above = 0, need_left = 1, need_above_left = 1;
1161   }
1162   if (use_filter_intra) need_left = need_above = need_above_left = 1;
1163 
1164   assert(n_top_px >= 0);
1165   assert(n_topright_px >= 0);
1166   assert(n_left_px >= 0);
1167   assert(n_bottomleft_px >= 0);
1168 
1169   if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1170     int val;
1171     if (need_left) {
1172       val = (n_top_px > 0) ? above_ref[0] : base + 1;
1173     } else {
1174       val = (n_left_px > 0) ? left_ref[0] : base - 1;
1175     }
1176     for (i = 0; i < txhpx; ++i) {
1177       aom_memset16(dst, val, txwpx);
1178       dst += dst_stride;
1179     }
1180     return;
1181   }
1182 
1183   // NEED_LEFT
1184   if (need_left) {
1185     int need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
1186     if (use_filter_intra) need_bottom = 0;
1187     if (is_dr_mode) need_bottom = p_angle > 180;
1188     const int num_left_pixels_needed = txhpx + (need_bottom ? txwpx : 0);
1189     i = 0;
1190     if (n_left_px > 0) {
1191       for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1192       if (need_bottom && n_bottomleft_px > 0) {
1193         assert(i == txhpx);
1194         for (; i < txhpx + n_bottomleft_px; i++)
1195           left_col[i] = left_ref[i * ref_stride];
1196       }
1197       if (i < num_left_pixels_needed)
1198         aom_memset16(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1199     } else {
1200       if (n_top_px > 0) {
1201         aom_memset16(left_col, above_ref[0], num_left_pixels_needed);
1202       } else {
1203         aom_memset16(left_col, base + 1, num_left_pixels_needed);
1204       }
1205     }
1206   }
1207 
1208   // NEED_ABOVE
1209   if (need_above) {
1210     int need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
1211     if (use_filter_intra) need_right = 0;
1212     if (is_dr_mode) need_right = p_angle < 90;
1213     const int num_top_pixels_needed = txwpx + (need_right ? txhpx : 0);
1214     if (n_top_px > 0) {
1215       memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
1216       i = n_top_px;
1217       if (need_right && n_topright_px > 0) {
1218         assert(n_top_px == txwpx);
1219         memcpy(above_row + txwpx, above_ref + txwpx,
1220                n_topright_px * sizeof(above_ref[0]));
1221         i += n_topright_px;
1222       }
1223       if (i < num_top_pixels_needed)
1224         aom_memset16(&above_row[i], above_row[i - 1],
1225                      num_top_pixels_needed - i);
1226     } else {
1227       if (n_left_px > 0) {
1228         aom_memset16(above_row, left_ref[0], num_top_pixels_needed);
1229       } else {
1230         aom_memset16(above_row, base - 1, num_top_pixels_needed);
1231       }
1232     }
1233   }
1234 
1235   if (need_above_left) {
1236     if (n_top_px > 0 && n_left_px > 0) {
1237       above_row[-1] = above_ref[-1];
1238     } else if (n_top_px > 0) {
1239       above_row[-1] = above_ref[0];
1240     } else if (n_left_px > 0) {
1241       above_row[-1] = left_ref[0];
1242     } else {
1243       above_row[-1] = base;
1244     }
1245     left_col[-1] = above_row[-1];
1246   }
1247 
1248   if (use_filter_intra) {
1249     highbd_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1250                                   filter_intra_mode, xd->bd);
1251     return;
1252   }
1253 
1254   if (is_dr_mode) {
1255     int upsample_above = 0;
1256     int upsample_left = 0;
1257     if (!disable_edge_filter) {
1258       const int need_right = p_angle < 90;
1259       const int need_bottom = p_angle > 180;
1260       const int filt_type = get_filt_type(xd, plane);
1261       if (p_angle != 90 && p_angle != 180) {
1262         const int ab_le = need_above_left ? 1 : 0;
1263         if (need_above && need_left && (txwpx + txhpx >= 24)) {
1264           filter_intra_edge_corner_high(above_row, left_col);
1265         }
1266         if (need_above && n_top_px > 0) {
1267           const int strength =
1268               intra_edge_filter_strength(txwpx, txhpx, p_angle - 90, filt_type);
1269           const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1270           av1_filter_intra_edge_high(above_row - ab_le, n_px, strength);
1271         }
1272         if (need_left && n_left_px > 0) {
1273           const int strength = intra_edge_filter_strength(
1274               txhpx, txwpx, p_angle - 180, filt_type);
1275           const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1276           av1_filter_intra_edge_high(left_col - ab_le, n_px, strength);
1277         }
1278       }
1279       upsample_above =
1280           av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90, filt_type);
1281       if (need_above && upsample_above) {
1282         const int n_px = txwpx + (need_right ? txhpx : 0);
1283         av1_upsample_intra_edge_high(above_row, n_px, xd->bd);
1284       }
1285       upsample_left =
1286           av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180, filt_type);
1287       if (need_left && upsample_left) {
1288         const int n_px = txhpx + (need_bottom ? txwpx : 0);
1289         av1_upsample_intra_edge_high(left_col, n_px, xd->bd);
1290       }
1291     }
1292     highbd_dr_predictor(dst, dst_stride, tx_size, above_row, left_col,
1293                         upsample_above, upsample_left, p_angle, xd->bd);
1294     return;
1295   }
1296 
1297   // predict
1298   if (mode == DC_PRED) {
1299     dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](
1300         dst, dst_stride, above_row, left_col, xd->bd);
1301   } else {
1302     pred_high[mode][tx_size](dst, dst_stride, above_row, left_col, xd->bd);
1303   }
1304 }
1305 
build_intra_predictors(const MACROBLOCKD * xd,const uint8_t * ref,int ref_stride,uint8_t * dst,int dst_stride,PREDICTION_MODE mode,int angle_delta,FILTER_INTRA_MODE filter_intra_mode,TX_SIZE tx_size,int disable_edge_filter,int n_top_px,int n_topright_px,int n_left_px,int n_bottomleft_px,int plane)1306 static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
1307                                    int ref_stride, uint8_t *dst, int dst_stride,
1308                                    PREDICTION_MODE mode, int angle_delta,
1309                                    FILTER_INTRA_MODE filter_intra_mode,
1310                                    TX_SIZE tx_size, int disable_edge_filter,
1311                                    int n_top_px, int n_topright_px,
1312                                    int n_left_px, int n_bottomleft_px,
1313                                    int plane) {
1314   int i;
1315   const uint8_t *above_ref = ref - ref_stride;
1316   const uint8_t *left_ref = ref - 1;
1317   DECLARE_ALIGNED(16, uint8_t, left_data[MAX_TX_SIZE * 2 + 32]);
1318   DECLARE_ALIGNED(16, uint8_t, above_data[MAX_TX_SIZE * 2 + 32]);
1319   uint8_t *const above_row = above_data + 16;
1320   uint8_t *const left_col = left_data + 16;
1321   const int txwpx = tx_size_wide[tx_size];
1322   const int txhpx = tx_size_high[tx_size];
1323   int need_left = extend_modes[mode] & NEED_LEFT;
1324   int need_above = extend_modes[mode] & NEED_ABOVE;
1325   int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1326   int p_angle = 0;
1327   const int is_dr_mode = av1_is_directional_mode(mode);
1328   const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1329 
1330   // The default values if ref pixels are not available:
1331   // 127 127 127 .. 127 127 127 127 127 127
1332   // 129  A   B  ..  Y   Z
1333   // 129  C   D  ..  W   X
1334   // 129  E   F  ..  U   V
1335   // 129  G   H  ..  S   T   T   T   T   T
1336   // ..
1337 
1338   if (is_dr_mode) {
1339     p_angle = mode_to_angle_map[mode] + angle_delta;
1340     if (p_angle <= 90)
1341       need_above = 1, need_left = 0, need_above_left = 1;
1342     else if (p_angle < 180)
1343       need_above = 1, need_left = 1, need_above_left = 1;
1344     else
1345       need_above = 0, need_left = 1, need_above_left = 1;
1346   }
1347   if (use_filter_intra) need_left = need_above = need_above_left = 1;
1348 
1349   assert(n_top_px >= 0);
1350   assert(n_topright_px >= 0);
1351   assert(n_left_px >= 0);
1352   assert(n_bottomleft_px >= 0);
1353 
1354   if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1355     int val;
1356     if (need_left) {
1357       val = (n_top_px > 0) ? above_ref[0] : 129;
1358     } else {
1359       val = (n_left_px > 0) ? left_ref[0] : 127;
1360     }
1361     for (i = 0; i < txhpx; ++i) {
1362       memset(dst, val, txwpx);
1363       dst += dst_stride;
1364     }
1365     return;
1366   }
1367 
1368   // NEED_LEFT
1369   if (need_left) {
1370     int need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
1371     if (use_filter_intra) need_bottom = 0;
1372     if (is_dr_mode) need_bottom = p_angle > 180;
1373     const int num_left_pixels_needed = txhpx + (need_bottom ? txwpx : 0);
1374     i = 0;
1375     if (n_left_px > 0) {
1376       for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1377       if (need_bottom && n_bottomleft_px > 0) {
1378         assert(i == txhpx);
1379         for (; i < txhpx + n_bottomleft_px; i++)
1380           left_col[i] = left_ref[i * ref_stride];
1381       }
1382       if (i < num_left_pixels_needed)
1383         memset(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1384     } else {
1385       if (n_top_px > 0) {
1386         memset(left_col, above_ref[0], num_left_pixels_needed);
1387       } else {
1388         memset(left_col, 129, num_left_pixels_needed);
1389       }
1390     }
1391   }
1392 
1393   // NEED_ABOVE
1394   if (need_above) {
1395     int need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
1396     if (use_filter_intra) need_right = 0;
1397     if (is_dr_mode) need_right = p_angle < 90;
1398     const int num_top_pixels_needed = txwpx + (need_right ? txhpx : 0);
1399     if (n_top_px > 0) {
1400       memcpy(above_row, above_ref, n_top_px);
1401       i = n_top_px;
1402       if (need_right && n_topright_px > 0) {
1403         assert(n_top_px == txwpx);
1404         memcpy(above_row + txwpx, above_ref + txwpx, n_topright_px);
1405         i += n_topright_px;
1406       }
1407       if (i < num_top_pixels_needed)
1408         memset(&above_row[i], above_row[i - 1], num_top_pixels_needed - i);
1409     } else {
1410       if (n_left_px > 0) {
1411         memset(above_row, left_ref[0], num_top_pixels_needed);
1412       } else {
1413         memset(above_row, 127, num_top_pixels_needed);
1414       }
1415     }
1416   }
1417 
1418   if (need_above_left) {
1419     if (n_top_px > 0 && n_left_px > 0) {
1420       above_row[-1] = above_ref[-1];
1421     } else if (n_top_px > 0) {
1422       above_row[-1] = above_ref[0];
1423     } else if (n_left_px > 0) {
1424       above_row[-1] = left_ref[0];
1425     } else {
1426       above_row[-1] = 128;
1427     }
1428     left_col[-1] = above_row[-1];
1429   }
1430 
1431   if (use_filter_intra) {
1432     av1_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1433                                filter_intra_mode);
1434     return;
1435   }
1436 
1437   if (is_dr_mode) {
1438     int upsample_above = 0;
1439     int upsample_left = 0;
1440     if (!disable_edge_filter) {
1441       const int need_right = p_angle < 90;
1442       const int need_bottom = p_angle > 180;
1443       const int filt_type = get_filt_type(xd, plane);
1444       if (p_angle != 90 && p_angle != 180) {
1445         const int ab_le = need_above_left ? 1 : 0;
1446         if (need_above && need_left && (txwpx + txhpx >= 24)) {
1447           filter_intra_edge_corner(above_row, left_col);
1448         }
1449         if (need_above && n_top_px > 0) {
1450           const int strength =
1451               intra_edge_filter_strength(txwpx, txhpx, p_angle - 90, filt_type);
1452           const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1453           av1_filter_intra_edge(above_row - ab_le, n_px, strength);
1454         }
1455         if (need_left && n_left_px > 0) {
1456           const int strength = intra_edge_filter_strength(
1457               txhpx, txwpx, p_angle - 180, filt_type);
1458           const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1459           av1_filter_intra_edge(left_col - ab_le, n_px, strength);
1460         }
1461       }
1462       upsample_above =
1463           av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90, filt_type);
1464       if (need_above && upsample_above) {
1465         const int n_px = txwpx + (need_right ? txhpx : 0);
1466         av1_upsample_intra_edge(above_row, n_px);
1467       }
1468       upsample_left =
1469           av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180, filt_type);
1470       if (need_left && upsample_left) {
1471         const int n_px = txhpx + (need_bottom ? txwpx : 0);
1472         av1_upsample_intra_edge(left_col, n_px);
1473       }
1474     }
1475     dr_predictor(dst, dst_stride, tx_size, above_row, left_col, upsample_above,
1476                  upsample_left, p_angle);
1477     return;
1478   }
1479 
1480   // predict
1481   if (mode == DC_PRED) {
1482     dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, above_row,
1483                                                   left_col);
1484   } else {
1485     pred[mode][tx_size](dst, dst_stride, above_row, left_col);
1486   }
1487 }
1488 
av1_predict_intra_block(const AV1_COMMON * cm,const MACROBLOCKD * xd,int wpx,int hpx,TX_SIZE tx_size,PREDICTION_MODE mode,int angle_delta,int use_palette,FILTER_INTRA_MODE filter_intra_mode,const uint8_t * ref,int ref_stride,uint8_t * dst,int dst_stride,int col_off,int row_off,int plane)1489 void av1_predict_intra_block(
1490     const AV1_COMMON *cm, const MACROBLOCKD *xd, int wpx, int hpx,
1491     TX_SIZE tx_size, PREDICTION_MODE mode, int angle_delta, int use_palette,
1492     FILTER_INTRA_MODE filter_intra_mode, const uint8_t *ref, int ref_stride,
1493     uint8_t *dst, int dst_stride, int col_off, int row_off, int plane) {
1494   const MB_MODE_INFO *const mbmi = xd->mi[0];
1495   const int txwpx = tx_size_wide[tx_size];
1496   const int txhpx = tx_size_high[tx_size];
1497   const int x = col_off << tx_size_wide_log2[0];
1498   const int y = row_off << tx_size_high_log2[0];
1499 
1500   if (use_palette) {
1501     int r, c;
1502     const uint8_t *const map = xd->plane[plane != 0].color_index_map +
1503                                xd->color_index_map_offset[plane != 0];
1504     const uint16_t *const palette =
1505         mbmi->palette_mode_info.palette_colors + plane * PALETTE_MAX_SIZE;
1506     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1507       uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
1508       for (r = 0; r < txhpx; ++r) {
1509         for (c = 0; c < txwpx; ++c) {
1510           dst16[r * dst_stride + c] = palette[map[(r + y) * wpx + c + x]];
1511         }
1512       }
1513     } else {
1514       for (r = 0; r < txhpx; ++r) {
1515         for (c = 0; c < txwpx; ++c) {
1516           dst[r * dst_stride + c] =
1517               (uint8_t)palette[map[(r + y) * wpx + c + x]];
1518         }
1519       }
1520     }
1521     return;
1522   }
1523 
1524   BLOCK_SIZE bsize = mbmi->sb_type;
1525   const struct macroblockd_plane *const pd = &xd->plane[plane];
1526   const int txw = tx_size_wide_unit[tx_size];
1527   const int txh = tx_size_high_unit[tx_size];
1528   const int have_top = row_off || (pd->subsampling_y ? xd->chroma_up_available
1529                                                      : xd->up_available);
1530   const int have_left =
1531       col_off ||
1532       (pd->subsampling_x ? xd->chroma_left_available : xd->left_available);
1533   const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
1534   const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
1535   const int xr_chr_offset = 0;
1536   const int yd_chr_offset = 0;
1537 
1538   // Distance between the right edge of this prediction block to
1539   // the frame right edge
1540   const int xr = (xd->mb_to_right_edge >> (3 + pd->subsampling_x)) +
1541                  (wpx - x - txwpx) - xr_chr_offset;
1542   // Distance between the bottom edge of this prediction block to
1543   // the frame bottom edge
1544   const int yd = (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) +
1545                  (hpx - y - txhpx) - yd_chr_offset;
1546   const int right_available =
1547       mi_col + ((col_off + txw) << pd->subsampling_x) < xd->tile.mi_col_end;
1548   const int bottom_available =
1549       (yd > 0) &&
1550       (mi_row + ((row_off + txh) << pd->subsampling_y) < xd->tile.mi_row_end);
1551 
1552   const PARTITION_TYPE partition = mbmi->partition;
1553 
1554   // force 4x4 chroma component block size.
1555   bsize = scale_chroma_bsize(bsize, pd->subsampling_x, pd->subsampling_y);
1556 
1557   const int have_top_right = has_top_right(
1558       cm, bsize, mi_row, mi_col, have_top, right_available, partition, tx_size,
1559       row_off, col_off, pd->subsampling_x, pd->subsampling_y);
1560   const int have_bottom_left = has_bottom_left(
1561       cm, bsize, mi_row, mi_col, bottom_available, have_left, partition,
1562       tx_size, row_off, col_off, pd->subsampling_x, pd->subsampling_y);
1563 
1564   const int disable_edge_filter = !cm->seq_params.enable_intra_edge_filter;
1565   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1566     build_intra_predictors_high(
1567         xd, ref, ref_stride, dst, dst_stride, mode, angle_delta,
1568         filter_intra_mode, tx_size, disable_edge_filter,
1569         have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
1570         have_top_right ? AOMMIN(txwpx, xr) : 0,
1571         have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
1572         have_bottom_left ? AOMMIN(txhpx, yd) : 0, plane);
1573     return;
1574   }
1575 
1576   build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode,
1577                          angle_delta, filter_intra_mode, tx_size,
1578                          disable_edge_filter,
1579                          have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
1580                          have_top_right ? AOMMIN(txwpx, xr) : 0,
1581                          have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
1582                          have_bottom_left ? AOMMIN(txhpx, yd) : 0, plane);
1583 }
1584 
av1_predict_intra_block_facade(const AV1_COMMON * cm,MACROBLOCKD * xd,int plane,int blk_col,int blk_row,TX_SIZE tx_size)1585 void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
1586                                     int plane, int blk_col, int blk_row,
1587                                     TX_SIZE tx_size) {
1588   const MB_MODE_INFO *const mbmi = xd->mi[0];
1589   struct macroblockd_plane *const pd = &xd->plane[plane];
1590   const int dst_stride = pd->dst.stride;
1591   uint8_t *dst =
1592       &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
1593   const PREDICTION_MODE mode =
1594       (plane == AOM_PLANE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode);
1595   const int use_palette = mbmi->palette_mode_info.palette_size[plane != 0] > 0;
1596   const FILTER_INTRA_MODE filter_intra_mode =
1597       (plane == AOM_PLANE_Y && mbmi->filter_intra_mode_info.use_filter_intra)
1598           ? mbmi->filter_intra_mode_info.filter_intra_mode
1599           : FILTER_INTRA_MODES;
1600   const int angle_delta = mbmi->angle_delta[plane != AOM_PLANE_Y] * ANGLE_STEP;
1601 
1602   if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED) {
1603 #if CONFIG_DEBUG
1604     assert(is_cfl_allowed(xd));
1605     const BLOCK_SIZE plane_bsize = get_plane_block_size(
1606         mbmi->sb_type, pd->subsampling_x, pd->subsampling_y);
1607     (void)plane_bsize;
1608     assert(plane_bsize < BLOCK_SIZES_ALL);
1609     if (!xd->lossless[mbmi->segment_id]) {
1610       assert(blk_col == 0);
1611       assert(blk_row == 0);
1612       assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]);
1613       assert(block_size_high[plane_bsize] == tx_size_high[tx_size]);
1614     }
1615 #endif
1616     CFL_CTX *const cfl = &xd->cfl;
1617     CFL_PRED_TYPE pred_plane = get_cfl_pred_type(plane);
1618     if (cfl->dc_pred_is_cached[pred_plane] == 0) {
1619       av1_predict_intra_block(cm, xd, pd->width, pd->height, tx_size, mode,
1620                               angle_delta, use_palette, filter_intra_mode, dst,
1621                               dst_stride, dst, dst_stride, blk_col, blk_row,
1622                               plane);
1623       if (cfl->use_dc_pred_cache) {
1624         cfl_store_dc_pred(xd, dst, pred_plane, tx_size_wide[tx_size]);
1625         cfl->dc_pred_is_cached[pred_plane] = 1;
1626       }
1627     } else {
1628       cfl_load_dc_pred(xd, dst, dst_stride, tx_size, pred_plane);
1629     }
1630     cfl_predict_block(xd, dst, dst_stride, tx_size, plane);
1631     return;
1632   }
1633   av1_predict_intra_block(cm, xd, pd->width, pd->height, tx_size, mode,
1634                           angle_delta, use_palette, filter_intra_mode, dst,
1635                           dst_stride, dst, dst_stride, blk_col, blk_row, plane);
1636 }
1637 
av1_init_intra_predictors(void)1638 void av1_init_intra_predictors(void) {
1639   aom_once(init_intra_predictors_internal);
1640 }
1641