1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <assert.h>
13 #include <stdio.h>
14 #include <limits.h>
15 
16 #include "config/aom_config.h"
17 #include "config/aom_dsp_rtcd.h"
18 #include "config/aom_scale_rtcd.h"
19 
20 #include "aom/aom_integer.h"
21 #include "aom_dsp/blend.h"
22 
23 #include "av1/common/av1_common_int.h"
24 #include "av1/common/blockd.h"
25 #include "av1/common/mvref_common.h"
26 #include "av1/common/obmc.h"
27 #include "av1/common/reconinter.h"
28 #include "av1/common/reconintra.h"
29 
30 // This function will determine whether or not to create a warped
31 // prediction.
av1_allow_warp(const MB_MODE_INFO * const mbmi,const WarpTypesAllowed * const warp_types,const WarpedMotionParams * const gm_params,int build_for_obmc,const struct scale_factors * const sf,WarpedMotionParams * final_warp_params)32 int av1_allow_warp(const MB_MODE_INFO *const mbmi,
33                    const WarpTypesAllowed *const warp_types,
34                    const WarpedMotionParams *const gm_params,
35                    int build_for_obmc, const struct scale_factors *const sf,
36                    WarpedMotionParams *final_warp_params) {
37   // Note: As per the spec, we must test the fixed point scales here, which are
38   // at a higher precision (1 << 14) than the xs and ys in subpel_params (that
39   // have 1 << 10 precision).
40   if (av1_is_scaled(sf)) return 0;
41 
42   if (final_warp_params != NULL) *final_warp_params = default_warp_params;
43 
44   if (build_for_obmc) return 0;
45 
46   if (warp_types->local_warp_allowed && !mbmi->wm_params.invalid) {
47     if (final_warp_params != NULL)
48       memcpy(final_warp_params, &mbmi->wm_params, sizeof(*final_warp_params));
49     return 1;
50   } else if (warp_types->global_warp_allowed && !gm_params->invalid) {
51     if (final_warp_params != NULL)
52       memcpy(final_warp_params, gm_params, sizeof(*final_warp_params));
53     return 1;
54   }
55 
56   return 0;
57 }
58 
av1_init_inter_params(InterPredParams * inter_pred_params,int block_width,int block_height,int pix_row,int pix_col,int subsampling_x,int subsampling_y,int bit_depth,int use_hbd_buf,int is_intrabc,const struct scale_factors * sf,const struct buf_2d * ref_buf,int_interpfilters interp_filters)59 void av1_init_inter_params(InterPredParams *inter_pred_params, int block_width,
60                            int block_height, int pix_row, int pix_col,
61                            int subsampling_x, int subsampling_y, int bit_depth,
62                            int use_hbd_buf, int is_intrabc,
63                            const struct scale_factors *sf,
64                            const struct buf_2d *ref_buf,
65                            int_interpfilters interp_filters) {
66   inter_pred_params->block_width = block_width;
67   inter_pred_params->block_height = block_height;
68   inter_pred_params->pix_row = pix_row;
69   inter_pred_params->pix_col = pix_col;
70   inter_pred_params->subsampling_x = subsampling_x;
71   inter_pred_params->subsampling_y = subsampling_y;
72   inter_pred_params->bit_depth = bit_depth;
73   inter_pred_params->use_hbd_buf = use_hbd_buf;
74   inter_pred_params->is_intrabc = is_intrabc;
75   inter_pred_params->scale_factors = sf;
76   inter_pred_params->ref_frame_buf = *ref_buf;
77   inter_pred_params->mode = TRANSLATION_PRED;
78   inter_pred_params->comp_mode = UNIFORM_SINGLE;
79 
80   if (is_intrabc) {
81     inter_pred_params->interp_filter_params[0] = &av1_intrabc_filter_params;
82     inter_pred_params->interp_filter_params[1] = &av1_intrabc_filter_params;
83   } else {
84     inter_pred_params->interp_filter_params[0] =
85         av1_get_interp_filter_params_with_block_size(
86             interp_filters.as_filters.x_filter, block_width);
87     inter_pred_params->interp_filter_params[1] =
88         av1_get_interp_filter_params_with_block_size(
89             interp_filters.as_filters.y_filter, block_height);
90   }
91 }
92 
av1_init_comp_mode(InterPredParams * inter_pred_params)93 void av1_init_comp_mode(InterPredParams *inter_pred_params) {
94   inter_pred_params->comp_mode = UNIFORM_COMP;
95 }
96 
av1_init_warp_params(InterPredParams * inter_pred_params,const WarpTypesAllowed * warp_types,int ref,const MACROBLOCKD * xd,const MB_MODE_INFO * mi)97 void av1_init_warp_params(InterPredParams *inter_pred_params,
98                           const WarpTypesAllowed *warp_types, int ref,
99                           const MACROBLOCKD *xd, const MB_MODE_INFO *mi) {
100   if (inter_pred_params->block_height < 8 || inter_pred_params->block_width < 8)
101     return;
102 
103   if (xd->cur_frame_force_integer_mv) return;
104 
105   if (av1_allow_warp(mi, warp_types, &xd->global_motion[mi->ref_frame[ref]], 0,
106                      inter_pred_params->scale_factors,
107                      &inter_pred_params->warp_params))
108     inter_pred_params->mode = WARP_PRED;
109 }
110 
av1_make_inter_predictor(const uint8_t * src,int src_stride,uint8_t * dst,int dst_stride,InterPredParams * inter_pred_params,const SubpelParams * subpel_params)111 void av1_make_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
112                               int dst_stride,
113                               InterPredParams *inter_pred_params,
114                               const SubpelParams *subpel_params) {
115   assert(IMPLIES(inter_pred_params->conv_params.is_compound,
116                  inter_pred_params->conv_params.dst != NULL));
117 
118   if (inter_pred_params->mode == TRANSLATION_PRED) {
119 #if CONFIG_AV1_HIGHBITDEPTH
120     if (inter_pred_params->use_hbd_buf) {
121       highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
122                              inter_pred_params->block_width,
123                              inter_pred_params->block_height,
124                              &inter_pred_params->conv_params,
125                              inter_pred_params->interp_filter_params,
126                              inter_pred_params->bit_depth);
127     } else {
128       inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
129                       inter_pred_params->block_width,
130                       inter_pred_params->block_height,
131                       &inter_pred_params->conv_params,
132                       inter_pred_params->interp_filter_params);
133     }
134 #else
135     inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
136                     inter_pred_params->block_width,
137                     inter_pred_params->block_height,
138                     &inter_pred_params->conv_params,
139                     inter_pred_params->interp_filter_params);
140 #endif
141   }
142 #if !CONFIG_REALTIME_ONLY
143   // TODO(jingning): av1_warp_plane() can be further cleaned up.
144   else if (inter_pred_params->mode == WARP_PRED) {
145     av1_warp_plane(
146         &inter_pred_params->warp_params, inter_pred_params->use_hbd_buf,
147         inter_pred_params->bit_depth, inter_pred_params->ref_frame_buf.buf0,
148         inter_pred_params->ref_frame_buf.width,
149         inter_pred_params->ref_frame_buf.height,
150         inter_pred_params->ref_frame_buf.stride, dst,
151         inter_pred_params->pix_col, inter_pred_params->pix_row,
152         inter_pred_params->block_width, inter_pred_params->block_height,
153         dst_stride, inter_pred_params->subsampling_x,
154         inter_pred_params->subsampling_y, &inter_pred_params->conv_params);
155   }
156 #endif
157 }
158 
159 static const uint8_t wedge_master_oblique_odd[MASK_MASTER_SIZE] = {
160   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
161   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  6,  18,
162   37, 53, 60, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
163   64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
164 };
165 static const uint8_t wedge_master_oblique_even[MASK_MASTER_SIZE] = {
166   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
167   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  4,  11, 27,
168   46, 58, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
169   64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
170 };
171 static const uint8_t wedge_master_vertical[MASK_MASTER_SIZE] = {
172   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
173   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  7,  21,
174   43, 57, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
175   64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
176 };
177 
shift_copy(const uint8_t * src,uint8_t * dst,int shift,int width)178 static AOM_INLINE void shift_copy(const uint8_t *src, uint8_t *dst, int shift,
179                                   int width) {
180   if (shift >= 0) {
181     memcpy(dst + shift, src, width - shift);
182     memset(dst, src[0], shift);
183   } else {
184     shift = -shift;
185     memcpy(dst, src + shift, width - shift);
186     memset(dst + width - shift, src[width - 1], shift);
187   }
188 }
189 
190 /* clang-format off */
191 DECLARE_ALIGNED(16, static uint8_t,
192                 wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]) = {
193   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
194   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
195   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
196   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
197   { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
198   { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
199   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
200   { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
201   { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
202   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
203   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
204   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
205   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
206   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
207   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
208   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
209   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
210   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
211   { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, },
212   { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, },
213   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
214   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
215 };
216 /* clang-format on */
217 
218 // [negative][direction]
219 DECLARE_ALIGNED(
220     16, static uint8_t,
221     wedge_mask_obl[2][WEDGE_DIRECTIONS][MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
222 
223 // 4 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound
224 // on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE.
225 DECLARE_ALIGNED(16, static uint8_t,
226                 wedge_mask_buf[2 * MAX_WEDGE_TYPES * 4 * MAX_WEDGE_SQUARE]);
227 
228 DECLARE_ALIGNED(16, static uint8_t,
229                 smooth_interintra_mask_buf[INTERINTRA_MODES][BLOCK_SIZES_ALL]
230                                           [MAX_WEDGE_SQUARE]);
231 
232 static wedge_masks_type wedge_masks[BLOCK_SIZES_ALL][2];
233 
234 static const wedge_code_type wedge_codebook_16_hgtw[16] = {
235   { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
236   { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
237   { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
238   { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
239   { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
240   { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
241   { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
242   { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
243 };
244 
245 static const wedge_code_type wedge_codebook_16_hltw[16] = {
246   { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
247   { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
248   { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 4, 4 },
249   { WEDGE_VERTICAL, 6, 4 },   { WEDGE_HORIZONTAL, 4, 4 },
250   { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
251   { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
252   { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
253   { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
254 };
255 
256 static const wedge_code_type wedge_codebook_16_heqw[16] = {
257   { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
258   { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
259   { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
260   { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 6, 4 },
261   { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
262   { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
263   { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
264   { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
265 };
266 
267 const wedge_params_type av1_wedge_params_lookup[BLOCK_SIZES_ALL] = {
268   { 0, NULL, NULL, NULL },
269   { 0, NULL, NULL, NULL },
270   { 0, NULL, NULL, NULL },
271   { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8],
272     wedge_masks[BLOCK_8X8] },
273   { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16],
274     wedge_masks[BLOCK_8X16] },
275   { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8],
276     wedge_masks[BLOCK_16X8] },
277   { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16],
278     wedge_masks[BLOCK_16X16] },
279   { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32],
280     wedge_masks[BLOCK_16X32] },
281   { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16],
282     wedge_masks[BLOCK_32X16] },
283   { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32],
284     wedge_masks[BLOCK_32X32] },
285   { 0, NULL, NULL, NULL },
286   { 0, NULL, NULL, NULL },
287   { 0, NULL, NULL, NULL },
288   { 0, NULL, NULL, NULL },
289   { 0, NULL, NULL, NULL },
290   { 0, NULL, NULL, NULL },
291   { 0, NULL, NULL, NULL },
292   { 0, NULL, NULL, NULL },
293   { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32],
294     wedge_masks[BLOCK_8X32] },
295   { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8],
296     wedge_masks[BLOCK_32X8] },
297   { 0, NULL, NULL, NULL },
298   { 0, NULL, NULL, NULL },
299 };
300 
get_wedge_mask_inplace(int wedge_index,int neg,BLOCK_SIZE sb_type)301 static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
302                                              BLOCK_SIZE sb_type) {
303   const uint8_t *master;
304   const int bh = block_size_high[sb_type];
305   const int bw = block_size_wide[sb_type];
306   const wedge_code_type *a =
307       av1_wedge_params_lookup[sb_type].codebook + wedge_index;
308   int woff, hoff;
309   const uint8_t wsignflip =
310       av1_wedge_params_lookup[sb_type].signflip[wedge_index];
311 
312   assert(wedge_index >= 0 && wedge_index < get_wedge_types_lookup(sb_type));
313   woff = (a->x_offset * bw) >> 3;
314   hoff = (a->y_offset * bh) >> 3;
315   master = wedge_mask_obl[neg ^ wsignflip][a->direction] +
316            MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
317            MASK_MASTER_SIZE / 2 - woff;
318   return master;
319 }
320 
av1_get_compound_type_mask(const INTERINTER_COMPOUND_DATA * const comp_data,BLOCK_SIZE sb_type)321 const uint8_t *av1_get_compound_type_mask(
322     const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) {
323   (void)sb_type;
324   switch (comp_data->type) {
325     case COMPOUND_WEDGE:
326       return av1_get_contiguous_soft_mask(comp_data->wedge_index,
327                                           comp_data->wedge_sign, sb_type);
328     default: return comp_data->seg_mask;
329   }
330 }
331 
diffwtd_mask_d16(uint8_t * mask,int which_inverse,int mask_base,const CONV_BUF_TYPE * src0,int src0_stride,const CONV_BUF_TYPE * src1,int src1_stride,int h,int w,ConvolveParams * conv_params,int bd)332 static AOM_INLINE void diffwtd_mask_d16(
333     uint8_t *mask, int which_inverse, int mask_base, const CONV_BUF_TYPE *src0,
334     int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
335     ConvolveParams *conv_params, int bd) {
336   int round =
337       2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
338   int i, j, m, diff;
339   for (i = 0; i < h; ++i) {
340     for (j = 0; j < w; ++j) {
341       diff = abs(src0[i * src0_stride + j] - src1[i * src1_stride + j]);
342       diff = ROUND_POWER_OF_TWO(diff, round);
343       m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
344       mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
345     }
346   }
347 }
348 
av1_build_compound_diffwtd_mask_d16_c(uint8_t * mask,DIFFWTD_MASK_TYPE mask_type,const CONV_BUF_TYPE * src0,int src0_stride,const CONV_BUF_TYPE * src1,int src1_stride,int h,int w,ConvolveParams * conv_params,int bd)349 void av1_build_compound_diffwtd_mask_d16_c(
350     uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
351     int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
352     ConvolveParams *conv_params, int bd) {
353   switch (mask_type) {
354     case DIFFWTD_38:
355       diffwtd_mask_d16(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w,
356                        conv_params, bd);
357       break;
358     case DIFFWTD_38_INV:
359       diffwtd_mask_d16(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w,
360                        conv_params, bd);
361       break;
362     default: assert(0);
363   }
364 }
365 
diffwtd_mask(uint8_t * mask,int which_inverse,int mask_base,const uint8_t * src0,int src0_stride,const uint8_t * src1,int src1_stride,int h,int w)366 static AOM_INLINE void diffwtd_mask(uint8_t *mask, int which_inverse,
367                                     int mask_base, const uint8_t *src0,
368                                     int src0_stride, const uint8_t *src1,
369                                     int src1_stride, int h, int w) {
370   int i, j, m, diff;
371   for (i = 0; i < h; ++i) {
372     for (j = 0; j < w; ++j) {
373       diff =
374           abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]);
375       m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
376       mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
377     }
378   }
379 }
380 
av1_build_compound_diffwtd_mask_c(uint8_t * mask,DIFFWTD_MASK_TYPE mask_type,const uint8_t * src0,int src0_stride,const uint8_t * src1,int src1_stride,int h,int w)381 void av1_build_compound_diffwtd_mask_c(uint8_t *mask,
382                                        DIFFWTD_MASK_TYPE mask_type,
383                                        const uint8_t *src0, int src0_stride,
384                                        const uint8_t *src1, int src1_stride,
385                                        int h, int w) {
386   switch (mask_type) {
387     case DIFFWTD_38:
388       diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w);
389       break;
390     case DIFFWTD_38_INV:
391       diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w);
392       break;
393     default: assert(0);
394   }
395 }
396 
diffwtd_mask_highbd(uint8_t * mask,int which_inverse,int mask_base,const uint16_t * src0,int src0_stride,const uint16_t * src1,int src1_stride,int h,int w,const unsigned int bd)397 static AOM_FORCE_INLINE void diffwtd_mask_highbd(
398     uint8_t *mask, int which_inverse, int mask_base, const uint16_t *src0,
399     int src0_stride, const uint16_t *src1, int src1_stride, int h, int w,
400     const unsigned int bd) {
401   assert(bd >= 8);
402   if (bd == 8) {
403     if (which_inverse) {
404       for (int i = 0; i < h; ++i) {
405         for (int j = 0; j < w; ++j) {
406           int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
407           unsigned int m = negative_to_zero(mask_base + diff);
408           m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
409           mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
410         }
411         src0 += src0_stride;
412         src1 += src1_stride;
413         mask += w;
414       }
415     } else {
416       for (int i = 0; i < h; ++i) {
417         for (int j = 0; j < w; ++j) {
418           int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
419           unsigned int m = negative_to_zero(mask_base + diff);
420           m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
421           mask[j] = m;
422         }
423         src0 += src0_stride;
424         src1 += src1_stride;
425         mask += w;
426       }
427     }
428   } else {
429     const unsigned int bd_shift = bd - 8;
430     if (which_inverse) {
431       for (int i = 0; i < h; ++i) {
432         for (int j = 0; j < w; ++j) {
433           int diff =
434               (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
435           unsigned int m = negative_to_zero(mask_base + diff);
436           m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
437           mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
438         }
439         src0 += src0_stride;
440         src1 += src1_stride;
441         mask += w;
442       }
443     } else {
444       for (int i = 0; i < h; ++i) {
445         for (int j = 0; j < w; ++j) {
446           int diff =
447               (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
448           unsigned int m = negative_to_zero(mask_base + diff);
449           m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
450           mask[j] = m;
451         }
452         src0 += src0_stride;
453         src1 += src1_stride;
454         mask += w;
455       }
456     }
457   }
458 }
459 
av1_build_compound_diffwtd_mask_highbd_c(uint8_t * mask,DIFFWTD_MASK_TYPE mask_type,const uint8_t * src0,int src0_stride,const uint8_t * src1,int src1_stride,int h,int w,int bd)460 void av1_build_compound_diffwtd_mask_highbd_c(
461     uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0,
462     int src0_stride, const uint8_t *src1, int src1_stride, int h, int w,
463     int bd) {
464   switch (mask_type) {
465     case DIFFWTD_38:
466       diffwtd_mask_highbd(mask, 0, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
467                           CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
468       break;
469     case DIFFWTD_38_INV:
470       diffwtd_mask_highbd(mask, 1, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
471                           CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
472       break;
473     default: assert(0);
474   }
475 }
476 
init_wedge_master_masks()477 static AOM_INLINE void init_wedge_master_masks() {
478   int i, j;
479   const int w = MASK_MASTER_SIZE;
480   const int h = MASK_MASTER_SIZE;
481   const int stride = MASK_MASTER_STRIDE;
482   // Note: index [0] stores the masters, and [1] its complement.
483   // Generate prototype by shifting the masters
484   int shift = h / 4;
485   for (i = 0; i < h; i += 2) {
486     shift_copy(wedge_master_oblique_even,
487                &wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride], shift,
488                MASK_MASTER_SIZE);
489     shift--;
490     shift_copy(wedge_master_oblique_odd,
491                &wedge_mask_obl[0][WEDGE_OBLIQUE63][(i + 1) * stride], shift,
492                MASK_MASTER_SIZE);
493     memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][i * stride],
494            wedge_master_vertical,
495            MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
496     memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][(i + 1) * stride],
497            wedge_master_vertical,
498            MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
499   }
500 
501   for (i = 0; i < h; ++i) {
502     for (j = 0; j < w; ++j) {
503       const int msk = wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j];
504       wedge_mask_obl[0][WEDGE_OBLIQUE27][j * stride + i] = msk;
505       wedge_mask_obl[0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
506           wedge_mask_obl[0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
507               (1 << WEDGE_WEIGHT_BITS) - msk;
508       wedge_mask_obl[1][WEDGE_OBLIQUE63][i * stride + j] =
509           wedge_mask_obl[1][WEDGE_OBLIQUE27][j * stride + i] =
510               (1 << WEDGE_WEIGHT_BITS) - msk;
511       wedge_mask_obl[1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
512           wedge_mask_obl[1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = msk;
513       const int mskx = wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j];
514       wedge_mask_obl[0][WEDGE_HORIZONTAL][j * stride + i] = mskx;
515       wedge_mask_obl[1][WEDGE_VERTICAL][i * stride + j] =
516           wedge_mask_obl[1][WEDGE_HORIZONTAL][j * stride + i] =
517               (1 << WEDGE_WEIGHT_BITS) - mskx;
518     }
519   }
520 }
521 
init_wedge_masks()522 static AOM_INLINE void init_wedge_masks() {
523   uint8_t *dst = wedge_mask_buf;
524   BLOCK_SIZE bsize;
525   memset(wedge_masks, 0, sizeof(wedge_masks));
526   for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; ++bsize) {
527     const wedge_params_type *wedge_params = &av1_wedge_params_lookup[bsize];
528     const int wtypes = wedge_params->wedge_types;
529     if (wtypes == 0) continue;
530     const uint8_t *mask;
531     const int bw = block_size_wide[bsize];
532     const int bh = block_size_high[bsize];
533     int w;
534     for (w = 0; w < wtypes; ++w) {
535       mask = get_wedge_mask_inplace(w, 0, bsize);
536       aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw /* dst_stride */, bw,
537                         bh);
538       wedge_params->masks[0][w] = dst;
539       dst += bw * bh;
540 
541       mask = get_wedge_mask_inplace(w, 1, bsize);
542       aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw /* dst_stride */, bw,
543                         bh);
544       wedge_params->masks[1][w] = dst;
545       dst += bw * bh;
546     }
547     assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf));
548   }
549 }
550 
551 /* clang-format off */
552 static const uint8_t ii_weights1d[MAX_SB_SIZE] = {
553   60, 58, 56, 54, 52, 50, 48, 47, 45, 44, 42, 41, 39, 38, 37, 35, 34, 33, 32,
554   31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 20, 19, 19, 18, 18, 17, 16,
555   16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10,  9,  9,  9,  8,
556   8,  8,  8,  7,  7,  7,  7,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  4,  4,
557   4,  4,  4,  4,  4,  4,  3,  3,  3,  3,  3,  3,  3,  3,  3,  2,  2,  2,  2,
558   2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1,  1,
559   1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1
560 };
561 static uint8_t ii_size_scales[BLOCK_SIZES_ALL] = {
562     32, 16, 16, 16, 8, 8, 8, 4,
563     4,  4,  2,  2,  2, 1, 1, 1,
564     8,  8,  4,  4,  2, 2
565 };
566 /* clang-format on */
567 
build_smooth_interintra_mask(uint8_t * mask,int stride,BLOCK_SIZE plane_bsize,INTERINTRA_MODE mode)568 static AOM_INLINE void build_smooth_interintra_mask(uint8_t *mask, int stride,
569                                                     BLOCK_SIZE plane_bsize,
570                                                     INTERINTRA_MODE mode) {
571   int i, j;
572   const int bw = block_size_wide[plane_bsize];
573   const int bh = block_size_high[plane_bsize];
574   const int size_scale = ii_size_scales[plane_bsize];
575 
576   switch (mode) {
577     case II_V_PRED:
578       for (i = 0; i < bh; ++i) {
579         memset(mask, ii_weights1d[i * size_scale], bw * sizeof(mask[0]));
580         mask += stride;
581       }
582       break;
583 
584     case II_H_PRED:
585       for (i = 0; i < bh; ++i) {
586         for (j = 0; j < bw; ++j) mask[j] = ii_weights1d[j * size_scale];
587         mask += stride;
588       }
589       break;
590 
591     case II_SMOOTH_PRED:
592       for (i = 0; i < bh; ++i) {
593         for (j = 0; j < bw; ++j)
594           mask[j] = ii_weights1d[(i < j ? i : j) * size_scale];
595         mask += stride;
596       }
597       break;
598 
599     case II_DC_PRED:
600     default:
601       for (i = 0; i < bh; ++i) {
602         memset(mask, 32, bw * sizeof(mask[0]));
603         mask += stride;
604       }
605       break;
606   }
607 }
608 
init_smooth_interintra_masks()609 static AOM_INLINE void init_smooth_interintra_masks() {
610   for (int m = 0; m < INTERINTRA_MODES; ++m) {
611     for (int bs = 0; bs < BLOCK_SIZES_ALL; ++bs) {
612       const int bw = block_size_wide[bs];
613       const int bh = block_size_high[bs];
614       if (bw > MAX_WEDGE_SIZE || bh > MAX_WEDGE_SIZE) continue;
615       build_smooth_interintra_mask(smooth_interintra_mask_buf[m][bs], bw, bs,
616                                    m);
617     }
618   }
619 }
620 
621 // Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
av1_init_wedge_masks()622 void av1_init_wedge_masks() {
623   init_wedge_master_masks();
624   init_wedge_masks();
625   init_smooth_interintra_masks();
626 }
627 
build_masked_compound_no_round(uint8_t * dst,int dst_stride,const CONV_BUF_TYPE * src0,int src0_stride,const CONV_BUF_TYPE * src1,int src1_stride,const INTERINTER_COMPOUND_DATA * const comp_data,BLOCK_SIZE sb_type,int h,int w,InterPredParams * inter_pred_params)628 static AOM_INLINE void build_masked_compound_no_round(
629     uint8_t *dst, int dst_stride, const CONV_BUF_TYPE *src0, int src0_stride,
630     const CONV_BUF_TYPE *src1, int src1_stride,
631     const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
632     int w, InterPredParams *inter_pred_params) {
633   const int ssy = inter_pred_params->subsampling_y;
634   const int ssx = inter_pred_params->subsampling_x;
635   const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
636   const int mask_stride = block_size_wide[sb_type];
637 #if CONFIG_AV1_HIGHBITDEPTH
638   if (inter_pred_params->use_hbd_buf) {
639     aom_highbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
640                                   src1_stride, mask, mask_stride, w, h, ssx,
641                                   ssy, &inter_pred_params->conv_params,
642                                   inter_pred_params->bit_depth);
643   } else {
644     aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
645                                  src1_stride, mask, mask_stride, w, h, ssx, ssy,
646                                  &inter_pred_params->conv_params);
647   }
648 #else
649   aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
650                                src1_stride, mask, mask_stride, w, h, ssx, ssy,
651                                &inter_pred_params->conv_params);
652 #endif
653 }
654 
make_masked_inter_predictor(const uint8_t * pre,int pre_stride,uint8_t * dst,int dst_stride,InterPredParams * inter_pred_params,const SubpelParams * subpel_params)655 static void make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
656                                         uint8_t *dst, int dst_stride,
657                                         InterPredParams *inter_pred_params,
658                                         const SubpelParams *subpel_params) {
659   const INTERINTER_COMPOUND_DATA *comp_data = &inter_pred_params->mask_comp;
660   BLOCK_SIZE sb_type = inter_pred_params->sb_type;
661 
662   // We're going to call av1_make_inter_predictor to generate a prediction into
663   // a temporary buffer, then will blend that temporary buffer with that from
664   // the other reference.
665   DECLARE_ALIGNED(32, uint8_t, tmp_buf[2 * MAX_SB_SQUARE]);
666   uint8_t *tmp_dst =
667       inter_pred_params->use_hbd_buf ? CONVERT_TO_BYTEPTR(tmp_buf) : tmp_buf;
668 
669   const int tmp_buf_stride = MAX_SB_SIZE;
670   CONV_BUF_TYPE *org_dst = inter_pred_params->conv_params.dst;
671   int org_dst_stride = inter_pred_params->conv_params.dst_stride;
672   CONV_BUF_TYPE *tmp_buf16 = (CONV_BUF_TYPE *)tmp_buf;
673   inter_pred_params->conv_params.dst = tmp_buf16;
674   inter_pred_params->conv_params.dst_stride = tmp_buf_stride;
675   assert(inter_pred_params->conv_params.do_average == 0);
676 
677   // This will generate a prediction in tmp_buf for the second reference
678   av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE,
679                            inter_pred_params, subpel_params);
680 
681   if (!inter_pred_params->conv_params.plane &&
682       comp_data->type == COMPOUND_DIFFWTD) {
683     av1_build_compound_diffwtd_mask_d16(
684         comp_data->seg_mask, comp_data->mask_type, org_dst, org_dst_stride,
685         tmp_buf16, tmp_buf_stride, inter_pred_params->block_height,
686         inter_pred_params->block_width, &inter_pred_params->conv_params,
687         inter_pred_params->bit_depth);
688   }
689   build_masked_compound_no_round(
690       dst, dst_stride, org_dst, org_dst_stride, tmp_buf16, tmp_buf_stride,
691       comp_data, sb_type, inter_pred_params->block_height,
692       inter_pred_params->block_width, inter_pred_params);
693 }
694 
av1_build_one_inter_predictor(uint8_t * dst,int dst_stride,const MV * const src_mv,InterPredParams * inter_pred_params,MACROBLOCKD * xd,int mi_x,int mi_y,int ref,uint8_t ** mc_buf,CalcSubpelParamsFunc calc_subpel_params_func)695 void av1_build_one_inter_predictor(
696     uint8_t *dst, int dst_stride, const MV *const src_mv,
697     InterPredParams *inter_pred_params, MACROBLOCKD *xd, int mi_x, int mi_y,
698     int ref, uint8_t **mc_buf, CalcSubpelParamsFunc calc_subpel_params_func) {
699   SubpelParams subpel_params;
700   uint8_t *src;
701   int src_stride;
702   calc_subpel_params_func(src_mv, inter_pred_params, xd, mi_x, mi_y, ref,
703                           mc_buf, &src, &subpel_params, &src_stride);
704 
705   if (inter_pred_params->comp_mode == UNIFORM_SINGLE ||
706       inter_pred_params->comp_mode == UNIFORM_COMP) {
707     av1_make_inter_predictor(src, src_stride, dst, dst_stride,
708                              inter_pred_params, &subpel_params);
709   } else {
710     make_masked_inter_predictor(src, src_stride, dst, dst_stride,
711                                 inter_pred_params, &subpel_params);
712   }
713 }
714 
av1_dist_wtd_comp_weight_assign(const AV1_COMMON * cm,const MB_MODE_INFO * mbmi,int order_idx,int * fwd_offset,int * bck_offset,int * use_dist_wtd_comp_avg,int is_compound)715 void av1_dist_wtd_comp_weight_assign(const AV1_COMMON *cm,
716                                      const MB_MODE_INFO *mbmi, int order_idx,
717                                      int *fwd_offset, int *bck_offset,
718                                      int *use_dist_wtd_comp_avg,
719                                      int is_compound) {
720   assert(fwd_offset != NULL && bck_offset != NULL);
721   if (!is_compound || mbmi->compound_idx) {
722     *fwd_offset = 8;
723     *bck_offset = 8;
724     *use_dist_wtd_comp_avg = 0;
725     return;
726   }
727 
728   *use_dist_wtd_comp_avg = 1;
729   const RefCntBuffer *const bck_buf = get_ref_frame_buf(cm, mbmi->ref_frame[0]);
730   const RefCntBuffer *const fwd_buf = get_ref_frame_buf(cm, mbmi->ref_frame[1]);
731   const int cur_frame_index = cm->cur_frame->order_hint;
732   int bck_frame_index = 0, fwd_frame_index = 0;
733 
734   if (bck_buf != NULL) bck_frame_index = bck_buf->order_hint;
735   if (fwd_buf != NULL) fwd_frame_index = fwd_buf->order_hint;
736 
737   int d0 = clamp(abs(get_relative_dist(&cm->seq_params.order_hint_info,
738                                        fwd_frame_index, cur_frame_index)),
739                  0, MAX_FRAME_DISTANCE);
740   int d1 = clamp(abs(get_relative_dist(&cm->seq_params.order_hint_info,
741                                        cur_frame_index, bck_frame_index)),
742                  0, MAX_FRAME_DISTANCE);
743 
744   const int order = d0 <= d1;
745 
746   if (d0 == 0 || d1 == 0) {
747     *fwd_offset = quant_dist_lookup_table[order_idx][3][order];
748     *bck_offset = quant_dist_lookup_table[order_idx][3][1 - order];
749     return;
750   }
751 
752   int i;
753   for (i = 0; i < 3; ++i) {
754     int c0 = quant_dist_weight[i][order];
755     int c1 = quant_dist_weight[i][!order];
756     int d0_c0 = d0 * c0;
757     int d1_c1 = d1 * c1;
758     if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break;
759   }
760 
761   *fwd_offset = quant_dist_lookup_table[order_idx][i][order];
762   *bck_offset = quant_dist_lookup_table[order_idx][i][1 - order];
763 }
764 
765 // True if the following hold:
766 //  1. Not intrabc and not build_for_obmc
767 //  2. At least one dimension is size 4 with subsampling
768 //  3. If sub-sampled, none of the previous blocks around the sub-sample
769 //     are intrabc or inter-blocks
is_sub8x8_inter(const MACROBLOCKD * xd,int plane,BLOCK_SIZE bsize,int is_intrabc,int build_for_obmc)770 static bool is_sub8x8_inter(const MACROBLOCKD *xd, int plane, BLOCK_SIZE bsize,
771                             int is_intrabc, int build_for_obmc) {
772   if (is_intrabc || build_for_obmc) {
773     return false;
774   }
775 
776   const struct macroblockd_plane *const pd = &xd->plane[plane];
777   const int ss_x = pd->subsampling_x;
778   const int ss_y = pd->subsampling_y;
779   const int is_sub4_x = (block_size_wide[bsize] == 4) && ss_x;
780   const int is_sub4_y = (block_size_high[bsize] == 4) && ss_y;
781   if (!is_sub4_x && !is_sub4_y) {
782     return false;
783   }
784 
785   // For sub8x8 chroma blocks, we may be covering more than one luma block's
786   // worth of pixels. Thus (mi_x, mi_y) may not be the correct coordinates for
787   // the top-left corner of the prediction source - the correct top-left corner
788   // is at (pre_x, pre_y).
789   const int row_start = is_sub4_y ? -1 : 0;
790   const int col_start = is_sub4_x ? -1 : 0;
791 
792   for (int row = row_start; row <= 0; ++row) {
793     for (int col = col_start; col <= 0; ++col) {
794       const MB_MODE_INFO *this_mbmi = xd->mi[row * xd->mi_stride + col];
795       if (!is_inter_block(this_mbmi)) return false;
796       if (is_intrabc_block(this_mbmi)) return false;
797     }
798   }
799   return true;
800 }
801 
build_inter_predictors_sub8x8(const AV1_COMMON * cm,MACROBLOCKD * xd,int plane,const MB_MODE_INFO * mi,int mi_x,int mi_y,uint8_t ** mc_buf,CalcSubpelParamsFunc calc_subpel_params_func)802 static void build_inter_predictors_sub8x8(
803     const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, const MB_MODE_INFO *mi,
804     int mi_x, int mi_y, uint8_t **mc_buf,
805     CalcSubpelParamsFunc calc_subpel_params_func) {
806   const BLOCK_SIZE bsize = mi->bsize;
807   struct macroblockd_plane *const pd = &xd->plane[plane];
808   const bool ss_x = pd->subsampling_x;
809   const bool ss_y = pd->subsampling_y;
810   const int b4_w = block_size_wide[bsize] >> ss_x;
811   const int b4_h = block_size_high[bsize] >> ss_y;
812   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ss_x, ss_y);
813   const int b8_w = block_size_wide[plane_bsize];
814   const int b8_h = block_size_high[plane_bsize];
815   const int is_compound = has_second_ref(mi);
816   assert(!is_compound);
817   assert(!is_intrabc_block(mi));
818 
819   // For sub8x8 chroma blocks, we may be covering more than one luma block's
820   // worth of pixels. Thus (mi_x, mi_y) may not be the correct coordinates for
821   // the top-left corner of the prediction source - the correct top-left corner
822   // is at (pre_x, pre_y).
823   const int row_start = (block_size_high[bsize] == 4) && ss_y ? -1 : 0;
824   const int col_start = (block_size_wide[bsize] == 4) && ss_x ? -1 : 0;
825   const int pre_x = (mi_x + MI_SIZE * col_start) >> ss_x;
826   const int pre_y = (mi_y + MI_SIZE * row_start) >> ss_y;
827 
828   int row = row_start;
829   for (int y = 0; y < b8_h; y += b4_h) {
830     int col = col_start;
831     for (int x = 0; x < b8_w; x += b4_w) {
832       MB_MODE_INFO *this_mbmi = xd->mi[row * xd->mi_stride + col];
833       struct buf_2d *const dst_buf = &pd->dst;
834       uint8_t *dst = dst_buf->buf + dst_buf->stride * y + x;
835       int ref = 0;
836       const RefCntBuffer *ref_buf =
837           get_ref_frame_buf(cm, this_mbmi->ref_frame[ref]);
838       const struct scale_factors *ref_scale_factors =
839           get_ref_scale_factors_const(cm, this_mbmi->ref_frame[ref]);
840       const struct scale_factors *const sf = ref_scale_factors;
841       const struct buf_2d pre_buf = {
842         NULL,
843         (plane == 1) ? ref_buf->buf.u_buffer : ref_buf->buf.v_buffer,
844         ref_buf->buf.uv_crop_width,
845         ref_buf->buf.uv_crop_height,
846         ref_buf->buf.uv_stride,
847       };
848 
849       const MV mv = this_mbmi->mv[ref].as_mv;
850 
851       InterPredParams inter_pred_params;
852       av1_init_inter_params(&inter_pred_params, b4_w, b4_h, pre_y + y,
853                             pre_x + x, pd->subsampling_x, pd->subsampling_y,
854                             xd->bd, is_cur_buf_hbd(xd), mi->use_intrabc, sf,
855                             &pre_buf, this_mbmi->interp_filters);
856       inter_pred_params.conv_params =
857           get_conv_params_no_round(ref, plane, NULL, 0, is_compound, xd->bd);
858 
859       av1_build_one_inter_predictor(dst, dst_buf->stride, &mv,
860                                     &inter_pred_params, xd, mi_x + x, mi_y + y,
861                                     ref, mc_buf, calc_subpel_params_func);
862 
863       ++col;
864     }
865     ++row;
866   }
867 }
868 
build_inter_predictors_8x8_and_bigger(const AV1_COMMON * cm,MACROBLOCKD * xd,int plane,const MB_MODE_INFO * mi,int build_for_obmc,int bw,int bh,int mi_x,int mi_y,uint8_t ** mc_buf,CalcSubpelParamsFunc calc_subpel_params_func)869 static void build_inter_predictors_8x8_and_bigger(
870     const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, const MB_MODE_INFO *mi,
871     int build_for_obmc, int bw, int bh, int mi_x, int mi_y, uint8_t **mc_buf,
872     CalcSubpelParamsFunc calc_subpel_params_func) {
873   const int is_compound = has_second_ref(mi);
874   const int is_intrabc = is_intrabc_block(mi);
875   assert(IMPLIES(is_intrabc, !is_compound));
876   struct macroblockd_plane *const pd = &xd->plane[plane];
877   struct buf_2d *const dst_buf = &pd->dst;
878   uint8_t *const dst = dst_buf->buf;
879 
880   int is_global[2] = { 0, 0 };
881   for (int ref = 0; ref < 1 + is_compound; ++ref) {
882     const WarpedMotionParams *const wm = &xd->global_motion[mi->ref_frame[ref]];
883     is_global[ref] = is_global_mv_block(mi, wm->wmtype);
884   }
885 
886   const BLOCK_SIZE bsize = mi->bsize;
887   const int ss_x = pd->subsampling_x;
888   const int ss_y = pd->subsampling_y;
889   const int row_start =
890       (block_size_high[bsize] == 4) && ss_y && !build_for_obmc ? -1 : 0;
891   const int col_start =
892       (block_size_wide[bsize] == 4) && ss_x && !build_for_obmc ? -1 : 0;
893   const int pre_x = (mi_x + MI_SIZE * col_start) >> ss_x;
894   const int pre_y = (mi_y + MI_SIZE * row_start) >> ss_y;
895 
896   for (int ref = 0; ref < 1 + is_compound; ++ref) {
897     const struct scale_factors *const sf =
898         is_intrabc ? &cm->sf_identity : xd->block_ref_scale_factors[ref];
899     struct buf_2d *const pre_buf = is_intrabc ? dst_buf : &pd->pre[ref];
900     const MV mv = mi->mv[ref].as_mv;
901     const WarpTypesAllowed warp_types = { is_global[ref],
902                                           mi->motion_mode == WARPED_CAUSAL };
903 
904     InterPredParams inter_pred_params;
905     av1_init_inter_params(&inter_pred_params, bw, bh, pre_y, pre_x,
906                           pd->subsampling_x, pd->subsampling_y, xd->bd,
907                           is_cur_buf_hbd(xd), mi->use_intrabc, sf, pre_buf,
908                           mi->interp_filters);
909     if (is_compound) av1_init_comp_mode(&inter_pred_params);
910     inter_pred_params.conv_params = get_conv_params_no_round(
911         ref, plane, xd->tmp_conv_dst, MAX_SB_SIZE, is_compound, xd->bd);
912 
913     av1_dist_wtd_comp_weight_assign(
914         cm, mi, 0, &inter_pred_params.conv_params.fwd_offset,
915         &inter_pred_params.conv_params.bck_offset,
916         &inter_pred_params.conv_params.use_dist_wtd_comp_avg, is_compound);
917 
918     if (!build_for_obmc)
919       av1_init_warp_params(&inter_pred_params, &warp_types, ref, xd, mi);
920 
921     if (is_masked_compound_type(mi->interinter_comp.type)) {
922       inter_pred_params.sb_type = mi->bsize;
923       inter_pred_params.mask_comp = mi->interinter_comp;
924       if (ref == 1) {
925         inter_pred_params.conv_params.do_average = 0;
926         inter_pred_params.comp_mode = MASK_COMP;
927       }
928       // Assign physical buffer.
929       inter_pred_params.mask_comp.seg_mask = xd->seg_mask;
930     }
931 
932     av1_build_one_inter_predictor(dst, dst_buf->stride, &mv, &inter_pred_params,
933                                   xd, mi_x, mi_y, ref, mc_buf,
934                                   calc_subpel_params_func);
935   }
936 }
937 
av1_build_inter_predictors(const AV1_COMMON * cm,MACROBLOCKD * xd,int plane,const MB_MODE_INFO * mi,int build_for_obmc,int bw,int bh,int mi_x,int mi_y,uint8_t ** mc_buf,CalcSubpelParamsFunc calc_subpel_params_func)938 void av1_build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd,
939                                 int plane, const MB_MODE_INFO *mi,
940                                 int build_for_obmc, int bw, int bh, int mi_x,
941                                 int mi_y, uint8_t **mc_buf,
942                                 CalcSubpelParamsFunc calc_subpel_params_func) {
943   if (is_sub8x8_inter(xd, plane, mi->bsize, is_intrabc_block(mi),
944                       build_for_obmc)) {
945     assert(bw < 8 || bh < 8);
946     build_inter_predictors_sub8x8(cm, xd, plane, mi, mi_x, mi_y, mc_buf,
947                                   calc_subpel_params_func);
948   } else {
949     build_inter_predictors_8x8_and_bigger(cm, xd, plane, mi, build_for_obmc, bw,
950                                           bh, mi_x, mi_y, mc_buf,
951                                           calc_subpel_params_func);
952   }
953 }
av1_setup_dst_planes(struct macroblockd_plane * planes,BLOCK_SIZE bsize,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const int plane_start,const int plane_end)954 void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize,
955                           const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
956                           const int plane_start, const int plane_end) {
957   // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
958   // the static analysis warnings.
959   for (int i = plane_start; i < AOMMIN(plane_end, MAX_MB_PLANE); ++i) {
960     struct macroblockd_plane *const pd = &planes[i];
961     const int is_uv = i > 0;
962     setup_pred_plane(&pd->dst, bsize, src->buffers[i], src->crop_widths[is_uv],
963                      src->crop_heights[is_uv], src->strides[is_uv], mi_row,
964                      mi_col, NULL, pd->subsampling_x, pd->subsampling_y);
965   }
966 }
967 
av1_setup_pre_planes(MACROBLOCKD * xd,int idx,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const struct scale_factors * sf,const int num_planes)968 void av1_setup_pre_planes(MACROBLOCKD *xd, int idx,
969                           const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
970                           const struct scale_factors *sf,
971                           const int num_planes) {
972   if (src != NULL) {
973     // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
974     // the static analysis warnings.
975     for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); ++i) {
976       struct macroblockd_plane *const pd = &xd->plane[i];
977       const int is_uv = i > 0;
978       setup_pred_plane(&pd->pre[idx], xd->mi[0]->bsize, src->buffers[i],
979                        src->crop_widths[is_uv], src->crop_heights[is_uv],
980                        src->strides[is_uv], mi_row, mi_col, sf,
981                        pd->subsampling_x, pd->subsampling_y);
982     }
983   }
984 }
985 
986 // obmc_mask_N[overlap_position]
987 static const uint8_t obmc_mask_1[1] = { 64 };
988 DECLARE_ALIGNED(2, static const uint8_t, obmc_mask_2[2]) = { 45, 64 };
989 
990 DECLARE_ALIGNED(4, static const uint8_t, obmc_mask_4[4]) = { 39, 50, 59, 64 };
991 
992 static const uint8_t obmc_mask_8[8] = { 36, 42, 48, 53, 57, 61, 64, 64 };
993 
994 static const uint8_t obmc_mask_16[16] = { 34, 37, 40, 43, 46, 49, 52, 54,
995                                           56, 58, 60, 61, 64, 64, 64, 64 };
996 
997 static const uint8_t obmc_mask_32[32] = { 33, 35, 36, 38, 40, 41, 43, 44,
998                                           45, 47, 48, 50, 51, 52, 53, 55,
999                                           56, 57, 58, 59, 60, 60, 61, 62,
1000                                           64, 64, 64, 64, 64, 64, 64, 64 };
1001 
1002 static const uint8_t obmc_mask_64[64] = {
1003   33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44,
1004   45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56,
1005   56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62,
1006   62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
1007 };
1008 
av1_get_obmc_mask(int length)1009 const uint8_t *av1_get_obmc_mask(int length) {
1010   switch (length) {
1011     case 1: return obmc_mask_1;
1012     case 2: return obmc_mask_2;
1013     case 4: return obmc_mask_4;
1014     case 8: return obmc_mask_8;
1015     case 16: return obmc_mask_16;
1016     case 32: return obmc_mask_32;
1017     case 64: return obmc_mask_64;
1018     default: assert(0); return NULL;
1019   }
1020 }
1021 
increment_int_ptr(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * mi,void * fun_ctxt,const int num_planes)1022 static INLINE void increment_int_ptr(MACROBLOCKD *xd, int rel_mi_row,
1023                                      int rel_mi_col, uint8_t op_mi_size,
1024                                      int dir, MB_MODE_INFO *mi, void *fun_ctxt,
1025                                      const int num_planes) {
1026   (void)xd;
1027   (void)rel_mi_row;
1028   (void)rel_mi_col;
1029   (void)op_mi_size;
1030   (void)dir;
1031   (void)mi;
1032   ++*(int *)fun_ctxt;
1033   (void)num_planes;
1034 }
1035 
av1_count_overlappable_neighbors(const AV1_COMMON * cm,MACROBLOCKD * xd)1036 void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd) {
1037   MB_MODE_INFO *mbmi = xd->mi[0];
1038 
1039   mbmi->overlappable_neighbors = 0;
1040 
1041   if (!is_motion_variation_allowed_bsize(mbmi->bsize)) return;
1042 
1043   foreach_overlappable_nb_above(cm, xd, INT_MAX, increment_int_ptr,
1044                                 &mbmi->overlappable_neighbors);
1045   if (mbmi->overlappable_neighbors) return;
1046   foreach_overlappable_nb_left(cm, xd, INT_MAX, increment_int_ptr,
1047                                &mbmi->overlappable_neighbors);
1048 }
1049 
1050 // HW does not support < 4x4 prediction. To limit the bandwidth requirement, if
1051 // block-size of current plane is smaller than 8x8, always only blend with the
1052 // left neighbor(s) (skip blending with the above side).
1053 #define DISABLE_CHROMA_U8X8_OBMC 0  // 0: one-sided obmc; 1: disable
1054 
av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,const struct macroblockd_plane * pd,int dir)1055 int av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,
1056                                const struct macroblockd_plane *pd, int dir) {
1057   assert(is_motion_variation_allowed_bsize(bsize));
1058 
1059   const BLOCK_SIZE bsize_plane =
1060       get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
1061   switch (bsize_plane) {
1062 #if DISABLE_CHROMA_U8X8_OBMC
1063     case BLOCK_4X4:
1064     case BLOCK_8X4:
1065     case BLOCK_4X8: return 1; break;
1066 #else
1067     case BLOCK_4X4:
1068     case BLOCK_8X4:
1069     case BLOCK_4X8: return dir == 0; break;
1070 #endif
1071     default: return 0;
1072   }
1073 }
1074 
av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO * mbmi)1075 void av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) {
1076   mbmi->ref_frame[1] = NONE_FRAME;
1077   mbmi->interinter_comp.type = COMPOUND_AVERAGE;
1078 
1079   return;
1080 }
1081 
1082 struct obmc_inter_pred_ctxt {
1083   uint8_t **adjacent;
1084   int *adjacent_stride;
1085 };
1086 
build_obmc_inter_pred_above(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * above_mi,void * fun_ctxt,const int num_planes)1087 static INLINE void build_obmc_inter_pred_above(
1088     MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
1089     int dir, MB_MODE_INFO *above_mi, void *fun_ctxt, const int num_planes) {
1090   (void)above_mi;
1091   (void)rel_mi_row;
1092   (void)dir;
1093   struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
1094   const BLOCK_SIZE bsize = xd->mi[0]->bsize;
1095   const int overlap =
1096       AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
1097 
1098   for (int plane = 0; plane < num_planes; ++plane) {
1099     const struct macroblockd_plane *pd = &xd->plane[plane];
1100     const int bw = (op_mi_size * MI_SIZE) >> pd->subsampling_x;
1101     const int bh = overlap >> pd->subsampling_y;
1102     const int plane_col = (rel_mi_col * MI_SIZE) >> pd->subsampling_x;
1103 
1104     if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
1105 
1106     const int dst_stride = pd->dst.stride;
1107     uint8_t *const dst = &pd->dst.buf[plane_col];
1108     const int tmp_stride = ctxt->adjacent_stride[plane];
1109     const uint8_t *const tmp = &ctxt->adjacent[plane][plane_col];
1110     const uint8_t *const mask = av1_get_obmc_mask(bh);
1111 #if CONFIG_AV1_HIGHBITDEPTH
1112     const int is_hbd = is_cur_buf_hbd(xd);
1113     if (is_hbd)
1114       aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
1115                                  tmp_stride, mask, bw, bh, xd->bd);
1116     else
1117       aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
1118                           mask, bw, bh);
1119 #else
1120     aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
1121                         bw, bh);
1122 #endif
1123   }
1124 }
1125 
build_obmc_inter_pred_left(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * left_mi,void * fun_ctxt,const int num_planes)1126 static INLINE void build_obmc_inter_pred_left(
1127     MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
1128     int dir, MB_MODE_INFO *left_mi, void *fun_ctxt, const int num_planes) {
1129   (void)left_mi;
1130   (void)rel_mi_col;
1131   (void)dir;
1132   struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
1133   const BLOCK_SIZE bsize = xd->mi[0]->bsize;
1134   const int overlap =
1135       AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
1136 
1137   for (int plane = 0; plane < num_planes; ++plane) {
1138     const struct macroblockd_plane *pd = &xd->plane[plane];
1139     const int bw = overlap >> pd->subsampling_x;
1140     const int bh = (op_mi_size * MI_SIZE) >> pd->subsampling_y;
1141     const int plane_row = (rel_mi_row * MI_SIZE) >> pd->subsampling_y;
1142 
1143     if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
1144 
1145     const int dst_stride = pd->dst.stride;
1146     uint8_t *const dst = &pd->dst.buf[plane_row * dst_stride];
1147     const int tmp_stride = ctxt->adjacent_stride[plane];
1148     const uint8_t *const tmp = &ctxt->adjacent[plane][plane_row * tmp_stride];
1149     const uint8_t *const mask = av1_get_obmc_mask(bw);
1150 
1151 #if CONFIG_AV1_HIGHBITDEPTH
1152     const int is_hbd = is_cur_buf_hbd(xd);
1153     if (is_hbd)
1154       aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
1155                                  tmp_stride, mask, bw, bh, xd->bd);
1156     else
1157       aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
1158                           mask, bw, bh);
1159 #else
1160     aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
1161                         bw, bh);
1162 #endif
1163   }
1164 }
1165 
1166 // This function combines motion compensated predictions that are generated by
1167 // top/left neighboring blocks' inter predictors with the regular inter
1168 // prediction. We assume the original prediction (bmc) is stored in
1169 // xd->plane[].dst.buf
av1_build_obmc_inter_prediction(const AV1_COMMON * cm,MACROBLOCKD * xd,uint8_t * above[MAX_MB_PLANE],int above_stride[MAX_MB_PLANE],uint8_t * left[MAX_MB_PLANE],int left_stride[MAX_MB_PLANE])1170 void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
1171                                      uint8_t *above[MAX_MB_PLANE],
1172                                      int above_stride[MAX_MB_PLANE],
1173                                      uint8_t *left[MAX_MB_PLANE],
1174                                      int left_stride[MAX_MB_PLANE]) {
1175   const BLOCK_SIZE bsize = xd->mi[0]->bsize;
1176 
1177   // handle above row
1178   struct obmc_inter_pred_ctxt ctxt_above = { above, above_stride };
1179   foreach_overlappable_nb_above(cm, xd,
1180                                 max_neighbor_obmc[mi_size_wide_log2[bsize]],
1181                                 build_obmc_inter_pred_above, &ctxt_above);
1182 
1183   // handle left column
1184   struct obmc_inter_pred_ctxt ctxt_left = { left, left_stride };
1185   foreach_overlappable_nb_left(cm, xd,
1186                                max_neighbor_obmc[mi_size_high_log2[bsize]],
1187                                build_obmc_inter_pred_left, &ctxt_left);
1188 }
1189 
av1_setup_obmc_dst_bufs(MACROBLOCKD * xd,uint8_t ** dst_buf1,uint8_t ** dst_buf2)1190 void av1_setup_obmc_dst_bufs(MACROBLOCKD *xd, uint8_t **dst_buf1,
1191                              uint8_t **dst_buf2) {
1192 #if CONFIG_AV1_HIGHBITDEPTH
1193   if (is_cur_buf_hbd(xd)) {
1194     int len = sizeof(uint16_t);
1195     dst_buf1[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0]);
1196     dst_buf1[1] =
1197         CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * len);
1198     dst_buf1[2] =
1199         CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2 * len);
1200     dst_buf2[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1]);
1201     dst_buf2[1] =
1202         CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * len);
1203     dst_buf2[2] =
1204         CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2 * len);
1205   } else {
1206 #endif  // CONFIG_AV1_HIGHBITDEPTH
1207     dst_buf1[0] = xd->tmp_obmc_bufs[0];
1208     dst_buf1[1] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE;
1209     dst_buf1[2] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2;
1210     dst_buf2[0] = xd->tmp_obmc_bufs[1];
1211     dst_buf2[1] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE;
1212     dst_buf2[2] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2;
1213 #if CONFIG_AV1_HIGHBITDEPTH
1214   }
1215 #endif  // CONFIG_AV1_HIGHBITDEPTH
1216 }
1217 
av1_setup_build_prediction_by_above_pred(MACROBLOCKD * xd,int rel_mi_col,uint8_t above_mi_width,MB_MODE_INFO * above_mbmi,struct build_prediction_ctxt * ctxt,const int num_planes)1218 void av1_setup_build_prediction_by_above_pred(
1219     MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width,
1220     MB_MODE_INFO *above_mbmi, struct build_prediction_ctxt *ctxt,
1221     const int num_planes) {
1222   const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->bsize);
1223   const int above_mi_col = xd->mi_col + rel_mi_col;
1224 
1225   av1_modify_neighbor_predictor_for_obmc(above_mbmi);
1226 
1227   for (int j = 0; j < num_planes; ++j) {
1228     struct macroblockd_plane *const pd = &xd->plane[j];
1229     setup_pred_plane(&pd->dst, a_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
1230                      ctxt->tmp_height[j], ctxt->tmp_stride[j], 0, rel_mi_col,
1231                      NULL, pd->subsampling_x, pd->subsampling_y);
1232   }
1233 
1234   const int num_refs = 1 + has_second_ref(above_mbmi);
1235 
1236   for (int ref = 0; ref < num_refs; ++ref) {
1237     const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
1238 
1239     const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
1240     const struct scale_factors *const sf =
1241         get_ref_scale_factors_const(ctxt->cm, frame);
1242     xd->block_ref_scale_factors[ref] = sf;
1243     if ((!av1_is_valid_scale(sf)))
1244       aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
1245                          "Reference frame has invalid dimensions");
1246     av1_setup_pre_planes(xd, ref, &ref_buf->buf, xd->mi_row, above_mi_col, sf,
1247                          num_planes);
1248   }
1249 
1250   xd->mb_to_left_edge = 8 * MI_SIZE * (-above_mi_col);
1251   xd->mb_to_right_edge =
1252       ctxt->mb_to_far_edge +
1253       (xd->width - rel_mi_col - above_mi_width) * MI_SIZE * 8;
1254 }
1255 
av1_setup_build_prediction_by_left_pred(MACROBLOCKD * xd,int rel_mi_row,uint8_t left_mi_height,MB_MODE_INFO * left_mbmi,struct build_prediction_ctxt * ctxt,const int num_planes)1256 void av1_setup_build_prediction_by_left_pred(MACROBLOCKD *xd, int rel_mi_row,
1257                                              uint8_t left_mi_height,
1258                                              MB_MODE_INFO *left_mbmi,
1259                                              struct build_prediction_ctxt *ctxt,
1260                                              const int num_planes) {
1261   const BLOCK_SIZE l_bsize = AOMMAX(BLOCK_8X8, left_mbmi->bsize);
1262   const int left_mi_row = xd->mi_row + rel_mi_row;
1263 
1264   av1_modify_neighbor_predictor_for_obmc(left_mbmi);
1265 
1266   for (int j = 0; j < num_planes; ++j) {
1267     struct macroblockd_plane *const pd = &xd->plane[j];
1268     setup_pred_plane(&pd->dst, l_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
1269                      ctxt->tmp_height[j], ctxt->tmp_stride[j], rel_mi_row, 0,
1270                      NULL, pd->subsampling_x, pd->subsampling_y);
1271   }
1272 
1273   const int num_refs = 1 + has_second_ref(left_mbmi);
1274 
1275   for (int ref = 0; ref < num_refs; ++ref) {
1276     const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
1277 
1278     const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
1279     const struct scale_factors *const ref_scale_factors =
1280         get_ref_scale_factors_const(ctxt->cm, frame);
1281 
1282     xd->block_ref_scale_factors[ref] = ref_scale_factors;
1283     if ((!av1_is_valid_scale(ref_scale_factors)))
1284       aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
1285                          "Reference frame has invalid dimensions");
1286     av1_setup_pre_planes(xd, ref, &ref_buf->buf, left_mi_row, xd->mi_col,
1287                          ref_scale_factors, num_planes);
1288   }
1289 
1290   xd->mb_to_top_edge = GET_MV_SUBPEL(MI_SIZE * (-left_mi_row));
1291   xd->mb_to_bottom_edge =
1292       ctxt->mb_to_far_edge +
1293       GET_MV_SUBPEL((xd->height - rel_mi_row - left_mi_height) * MI_SIZE);
1294 }
1295 
combine_interintra(INTERINTRA_MODE mode,int8_t use_wedge_interintra,int8_t wedge_index,int8_t wedge_sign,BLOCK_SIZE bsize,BLOCK_SIZE plane_bsize,uint8_t * comppred,int compstride,const uint8_t * interpred,int interstride,const uint8_t * intrapred,int intrastride)1296 static AOM_INLINE void combine_interintra(
1297     INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
1298     int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1299     uint8_t *comppred, int compstride, const uint8_t *interpred,
1300     int interstride, const uint8_t *intrapred, int intrastride) {
1301   const int bw = block_size_wide[plane_bsize];
1302   const int bh = block_size_high[plane_bsize];
1303 
1304   if (use_wedge_interintra) {
1305     if (av1_is_wedge_used(bsize)) {
1306       const uint8_t *mask =
1307           av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1308       const int subw = 2 * mi_size_wide[bsize] == bw;
1309       const int subh = 2 * mi_size_high[bsize] == bh;
1310       aom_blend_a64_mask(comppred, compstride, intrapred, intrastride,
1311                          interpred, interstride, mask, block_size_wide[bsize],
1312                          bw, bh, subw, subh);
1313     }
1314     return;
1315   }
1316 
1317   const uint8_t *mask = smooth_interintra_mask_buf[mode][plane_bsize];
1318   aom_blend_a64_mask(comppred, compstride, intrapred, intrastride, interpred,
1319                      interstride, mask, bw, bw, bh, 0, 0);
1320 }
1321 
1322 #if CONFIG_AV1_HIGHBITDEPTH
combine_interintra_highbd(INTERINTRA_MODE mode,int8_t use_wedge_interintra,int8_t wedge_index,int8_t wedge_sign,BLOCK_SIZE bsize,BLOCK_SIZE plane_bsize,uint8_t * comppred8,int compstride,const uint8_t * interpred8,int interstride,const uint8_t * intrapred8,int intrastride,int bd)1323 static AOM_INLINE void combine_interintra_highbd(
1324     INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
1325     int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1326     uint8_t *comppred8, int compstride, const uint8_t *interpred8,
1327     int interstride, const uint8_t *intrapred8, int intrastride, int bd) {
1328   const int bw = block_size_wide[plane_bsize];
1329   const int bh = block_size_high[plane_bsize];
1330 
1331   if (use_wedge_interintra) {
1332     if (av1_is_wedge_used(bsize)) {
1333       const uint8_t *mask =
1334           av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1335       const int subh = 2 * mi_size_high[bsize] == bh;
1336       const int subw = 2 * mi_size_wide[bsize] == bw;
1337       aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1338                                 interpred8, interstride, mask,
1339                                 block_size_wide[bsize], bw, bh, subw, subh, bd);
1340     }
1341     return;
1342   }
1343 
1344   uint8_t mask[MAX_SB_SQUARE];
1345   build_smooth_interintra_mask(mask, bw, plane_bsize, mode);
1346   aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1347                             interpred8, interstride, mask, bw, bw, bh, 0, 0,
1348                             bd);
1349 }
1350 #endif
1351 
av1_build_intra_predictors_for_interintra(const AV1_COMMON * cm,MACROBLOCKD * xd,BLOCK_SIZE bsize,int plane,const BUFFER_SET * ctx,uint8_t * dst,int dst_stride)1352 void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm,
1353                                                MACROBLOCKD *xd,
1354                                                BLOCK_SIZE bsize, int plane,
1355                                                const BUFFER_SET *ctx,
1356                                                uint8_t *dst, int dst_stride) {
1357   struct macroblockd_plane *const pd = &xd->plane[plane];
1358   const int ssx = xd->plane[plane].subsampling_x;
1359   const int ssy = xd->plane[plane].subsampling_y;
1360   BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1361   PREDICTION_MODE mode = interintra_to_intra_mode[xd->mi[0]->interintra_mode];
1362   assert(xd->mi[0]->angle_delta[PLANE_TYPE_Y] == 0);
1363   assert(xd->mi[0]->angle_delta[PLANE_TYPE_UV] == 0);
1364   assert(xd->mi[0]->filter_intra_mode_info.use_filter_intra == 0);
1365   assert(xd->mi[0]->use_intrabc == 0);
1366 
1367   av1_predict_intra_block(cm, xd, pd->width, pd->height,
1368                           max_txsize_rect_lookup[plane_bsize], mode, 0, 0,
1369                           FILTER_INTRA_MODES, ctx->plane[plane],
1370                           ctx->stride[plane], dst, dst_stride, 0, 0, plane);
1371 }
1372 
av1_combine_interintra(MACROBLOCKD * xd,BLOCK_SIZE bsize,int plane,const uint8_t * inter_pred,int inter_stride,const uint8_t * intra_pred,int intra_stride)1373 void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
1374                             const uint8_t *inter_pred, int inter_stride,
1375                             const uint8_t *intra_pred, int intra_stride) {
1376   const int ssx = xd->plane[plane].subsampling_x;
1377   const int ssy = xd->plane[plane].subsampling_y;
1378   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1379 #if CONFIG_AV1_HIGHBITDEPTH
1380   if (is_cur_buf_hbd(xd)) {
1381     combine_interintra_highbd(
1382         xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1383         xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
1384         plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1385         inter_pred, inter_stride, intra_pred, intra_stride, xd->bd);
1386     return;
1387   }
1388 #endif
1389   combine_interintra(
1390       xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1391       xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
1392       plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1393       inter_pred, inter_stride, intra_pred, intra_stride);
1394 }
1395 
1396 // build interintra_predictors for one plane
av1_build_interintra_predictor(const AV1_COMMON * cm,MACROBLOCKD * xd,uint8_t * pred,int stride,const BUFFER_SET * ctx,int plane,BLOCK_SIZE bsize)1397 void av1_build_interintra_predictor(const AV1_COMMON *cm, MACROBLOCKD *xd,
1398                                     uint8_t *pred, int stride,
1399                                     const BUFFER_SET *ctx, int plane,
1400                                     BLOCK_SIZE bsize) {
1401   assert(bsize < BLOCK_SIZES_ALL);
1402   if (is_cur_buf_hbd(xd)) {
1403     DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]);
1404     av1_build_intra_predictors_for_interintra(
1405         cm, xd, bsize, plane, ctx, CONVERT_TO_BYTEPTR(intrapredictor),
1406         MAX_SB_SIZE);
1407     av1_combine_interintra(xd, bsize, plane, pred, stride,
1408                            CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
1409   } else {
1410     DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]);
1411     av1_build_intra_predictors_for_interintra(cm, xd, bsize, plane, ctx,
1412                                               intrapredictor, MAX_SB_SIZE);
1413     av1_combine_interintra(xd, bsize, plane, pred, stride, intrapredictor,
1414                            MAX_SB_SIZE);
1415   }
1416 }
1417