1 /*
2 * Copyright(c) 2019 Intel Corporation
3 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
4 *
5 * This source code is subject to the terms of the BSD 2 Clause License and
6 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
7 * was not distributed with this source code in the LICENSE file, you can
8 * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open
9 * Media Patent License 1.0 was not distributed with this source code in the
10 * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
11 */
12
13 #include <assert.h>
14
15 #include "EbDefinitions.h"
16
17 #include "common_dsp_rtcd.h"
18
19 // Blending with alpha mask. Mask values come from the range [0, 64],
20 // as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can
21 // be the same as dst, or dst can be different from both sources.
22
23 // NOTE(david.barker): The input and output of aom_blend_a64_d16_mask_c() are
24 // in a higher intermediate precision, and will later be rounded down to pixel
25 // precision.
26 // Thus, in order to avoid double-rounding, we want to use normal right shifts
27 // within this function, not ROUND_POWER_OF_TWO.
28 // This works because of the identity:
29 // ROUND_POWER_OF_TWO(x >> y, z) == ROUND_POWER_OF_TWO(x, y+z)
30 //
31 // In contrast, the output of the non-d16 functions will not be further rounded,
32 // so we *should* use ROUND_POWER_OF_TWO there.
33
svt_aom_lowbd_blend_a64_d16_mask_c(uint8_t * dst,uint32_t dst_stride,const CONV_BUF_TYPE * src0,uint32_t src0_stride,const CONV_BUF_TYPE * src1,uint32_t src1_stride,const uint8_t * mask,uint32_t mask_stride,int w,int h,int subw,int subh,ConvolveParams * conv_params)34 void svt_aom_lowbd_blend_a64_d16_mask_c(uint8_t *dst, uint32_t dst_stride,
35 const CONV_BUF_TYPE *src0, uint32_t src0_stride,
36 const CONV_BUF_TYPE *src1, uint32_t src1_stride,
37 const uint8_t *mask, uint32_t mask_stride, int w, int h,
38 int subw, int subh, ConvolveParams *conv_params) {
39 int i, j;
40 const int bd = 8;
41 const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
42 const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
43 (1 << (offset_bits - conv_params->round_1 - 1));
44 const int round_bits = 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
45
46 assert(IMPLIES((void *)src0 == dst, src0_stride == dst_stride));
47 assert(IMPLIES((void *)src1 == dst, src1_stride == dst_stride));
48
49 assert(h >= 4);
50 assert(w >= 4);
51
52 if (subw == 0 && subh == 0) {
53 for (i = 0; i < h; ++i) {
54 for (j = 0; j < w; ++j) {
55 int32_t res;
56 const int m = mask[i * mask_stride + j];
57 res = ((m * (int32_t)src0[i * src0_stride + j] +
58 (AOM_BLEND_A64_MAX_ALPHA - m) * (int32_t)src1[i * src1_stride + j]) >>
59 AOM_BLEND_A64_ROUND_BITS);
60 res -= round_offset;
61 dst[i * dst_stride + j] = clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
62 }
63 }
64 } else if (subw == 1 && subh == 1) {
65 for (i = 0; i < h; ++i) {
66 for (j = 0; j < w; ++j) {
67 int32_t res;
68 const int m = ROUND_POWER_OF_TWO(mask[(2 * i) * mask_stride + (2 * j)] +
69 mask[(2 * i + 1) * mask_stride + (2 * j)] +
70 mask[(2 * i) * mask_stride + (2 * j + 1)] +
71 mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
72 2);
73 res = ((m * (int32_t)src0[i * src0_stride + j] +
74 (AOM_BLEND_A64_MAX_ALPHA - m) * (int32_t)src1[i * src1_stride + j]) >>
75 AOM_BLEND_A64_ROUND_BITS);
76 res -= round_offset;
77 dst[i * dst_stride + j] = clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
78 }
79 }
80 } else if (subw == 1 && subh == 0) {
81 for (i = 0; i < h; ++i) {
82 for (j = 0; j < w; ++j) {
83 int32_t res;
84 const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
85 mask[i * mask_stride + (2 * j + 1)]);
86 res = ((m * (int32_t)src0[i * src0_stride + j] +
87 (AOM_BLEND_A64_MAX_ALPHA - m) * (int32_t)src1[i * src1_stride + j]) >>
88 AOM_BLEND_A64_ROUND_BITS);
89 res -= round_offset;
90 dst[i * dst_stride + j] = clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
91 }
92 }
93 } else {
94 for (i = 0; i < h; ++i) {
95 for (j = 0; j < w; ++j) {
96 int32_t res;
97 const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
98 mask[(2 * i + 1) * mask_stride + j]);
99 res = ((int32_t)(m * (int32_t)src0[i * src0_stride + j] +
100 (AOM_BLEND_A64_MAX_ALPHA - m) *
101 (int32_t)src1[i * src1_stride + j]) >>
102 AOM_BLEND_A64_ROUND_BITS);
103 res -= round_offset;
104 dst[i * dst_stride + j] = clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
105 }
106 }
107 }
108 }
109
svt_aom_highbd_blend_a64_d16_mask_c(uint8_t * dst_8,uint32_t dst_stride,const CONV_BUF_TYPE * src0,uint32_t src0_stride,const CONV_BUF_TYPE * src1,uint32_t src1_stride,const uint8_t * mask,uint32_t mask_stride,int w,int h,int subw,int subh,ConvolveParams * conv_params,const int bd)110 void svt_aom_highbd_blend_a64_d16_mask_c(uint8_t *dst_8, uint32_t dst_stride,
111 const CONV_BUF_TYPE *src0, uint32_t src0_stride,
112 const CONV_BUF_TYPE *src1, uint32_t src1_stride,
113 const uint8_t *mask, uint32_t mask_stride, int w, int h,
114 int subw, int subh, ConvolveParams *conv_params,
115 const int bd) {
116 const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
117 const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
118 (1 << (offset_bits - conv_params->round_1 - 1));
119 const int round_bits = 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
120 uint16_t *dst = (uint16_t *)dst_8;
121
122 assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
123 assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
124
125 assert(h >= 1);
126 assert(w >= 1);
127 assert(IS_POWER_OF_TWO(h));
128 assert(IS_POWER_OF_TWO(w));
129
130 // excerpt from clip_pixel_highbd()
131 // set saturation_value to (1 << bd) - 1
132 unsigned int saturation_value;
133 switch (bd) {
134 case 8:
135 default: saturation_value = 255; break;
136 case 10: saturation_value = 1023; break;
137 case 12: saturation_value = 4095; break;
138 }
139
140 if (subw == 0 && subh == 0) {
141 for (int i = 0; i < h; ++i) {
142 for (int j = 0; j < w; ++j) {
143 int32_t res;
144 const int m = mask[j];
145 res = ((m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
146 AOM_BLEND_A64_ROUND_BITS);
147 res -= round_offset;
148 unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
149 dst[j] = AOMMIN(v, saturation_value);
150 }
151 mask += mask_stride;
152 src0 += src0_stride;
153 src1 += src1_stride;
154 dst += dst_stride;
155 }
156 } else if (subw == 1 && subh == 1) {
157 for (int i = 0; i < h; ++i) {
158 for (int j = 0; j < w; ++j) {
159 int32_t res;
160 const int m = ROUND_POWER_OF_TWO(mask[2 * j] + mask[mask_stride + 2 * j] +
161 mask[2 * j + 1] +
162 mask[mask_stride + 2 * j + 1],
163 2);
164 res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
165 AOM_BLEND_A64_ROUND_BITS;
166 res -= round_offset;
167 unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
168 dst[j] = AOMMIN(v, saturation_value);
169 }
170 mask += 2 * mask_stride;
171 src0 += src0_stride;
172 src1 += src1_stride;
173 dst += dst_stride;
174 }
175 } else if (subw == 1 && subh == 0) {
176 for (int i = 0; i < h; ++i) {
177 for (int j = 0; j < w; ++j) {
178 int32_t res;
179 const int m = AOM_BLEND_AVG(mask[2 * j], mask[2 * j + 1]);
180 res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
181 AOM_BLEND_A64_ROUND_BITS;
182 res -= round_offset;
183 unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
184 dst[j] = AOMMIN(v, saturation_value);
185 }
186 mask += mask_stride;
187 src0 += src0_stride;
188 src1 += src1_stride;
189 dst += dst_stride;
190 }
191 } else {
192 for (int i = 0; i < h; ++i) {
193 for (int j = 0; j < w; ++j) {
194 int32_t res;
195 const int m = AOM_BLEND_AVG(mask[j], mask[mask_stride + j]);
196 res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
197 AOM_BLEND_A64_ROUND_BITS;
198 res -= round_offset;
199 unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
200 dst[j] = AOMMIN(v, saturation_value);
201 }
202 mask += 2 * mask_stride;
203 src0 += src0_stride;
204 src1 += src1_stride;
205 dst += dst_stride;
206 }
207 }
208 }
209
210 // Blending with alpha mask. Mask values come from the range [0, 64],
211 // as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can
212 // be the same as dst, or dst can be different from both sources.
213
svt_aom_blend_a64_mask_c(uint8_t * dst,uint32_t dst_stride,const uint8_t * src0,uint32_t src0_stride,const uint8_t * src1,uint32_t src1_stride,const uint8_t * mask,uint32_t mask_stride,int w,int h,int subw,int subh)214 void svt_aom_blend_a64_mask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
215 uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
216 const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw,
217 int subh) {
218 int i, j;
219
220 assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
221 assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
222
223 assert(h >= 1);
224 assert(w >= 1);
225 assert(IS_POWER_OF_TWO(h));
226 assert(IS_POWER_OF_TWO(w));
227
228 if (subw == 0 && subh == 0) {
229 for (i = 0; i < h; ++i) {
230 for (j = 0; j < w; ++j) {
231 const int m = mask[i * mask_stride + j];
232 dst[i * dst_stride + j] = AOM_BLEND_A64(
233 m, src0[i * src0_stride + j], src1[i * src1_stride + j]);
234 }
235 }
236 } else if (subw == 1 && subh == 1) {
237 for (i = 0; i < h; ++i) {
238 for (j = 0; j < w; ++j) {
239 const int m = ROUND_POWER_OF_TWO(mask[(2 * i) * mask_stride + (2 * j)] +
240 mask[(2 * i + 1) * mask_stride + (2 * j)] +
241 mask[(2 * i) * mask_stride + (2 * j + 1)] +
242 mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
243 2);
244 dst[i * dst_stride + j] = AOM_BLEND_A64(
245 m, src0[i * src0_stride + j], src1[i * src1_stride + j]);
246 }
247 }
248 } else if (subw == 1 && subh == 0) {
249 for (i = 0; i < h; ++i) {
250 for (j = 0; j < w; ++j) {
251 const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
252 mask[i * mask_stride + (2 * j + 1)]);
253 dst[i * dst_stride + j] = AOM_BLEND_A64(
254 m, src0[i * src0_stride + j], src1[i * src1_stride + j]);
255 }
256 }
257 } else {
258 for (i = 0; i < h; ++i) {
259 for (j = 0; j < w; ++j) {
260 const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
261 mask[(2 * i + 1) * mask_stride + j]);
262 dst[i * dst_stride + j] = AOM_BLEND_A64(
263 m, src0[i * src0_stride + j], src1[i * src1_stride + j]);
264 }
265 }
266 }
267 }
268
svt_aom_highbd_blend_a64_mask_c(uint8_t * dst_8,uint32_t dst_stride,const uint8_t * src0_8,uint32_t src0_stride,const uint8_t * src1_8,uint32_t src1_stride,const uint8_t * mask,uint32_t mask_stride,int w,int h,int subw,int subh,int bd)269 void svt_aom_highbd_blend_a64_mask_c(uint8_t *dst_8, uint32_t dst_stride, const uint8_t *src0_8,
270 uint32_t src0_stride, const uint8_t *src1_8,
271 uint32_t src1_stride, const uint8_t *mask,
272 uint32_t mask_stride, int w, int h, int subw, int subh,
273 int bd) {
274 int i, j;
275 uint16_t * dst = (uint16_t *)dst_8;
276 const uint16_t *src0 = (uint16_t *)src0_8;
277 const uint16_t *src1 = (uint16_t *)src1_8;
278 (void)bd;
279
280 assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
281 assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
282
283 assert(h >= 1);
284 assert(w >= 1);
285 assert(IS_POWER_OF_TWO(h));
286 assert(IS_POWER_OF_TWO(w));
287
288 assert(bd == 8 || bd == 10 || bd == 12);
289
290 if (subw == 0 && subh == 0) {
291 for (i = 0; i < h; ++i) {
292 for (j = 0; j < w; ++j) {
293 const int m = mask[i * mask_stride + j];
294 dst[i * dst_stride + j] = AOM_BLEND_A64(
295 m, src0[i * src0_stride + j], src1[i * src1_stride + j]);
296 }
297 }
298 } else if (subw == 1 && subh == 1) {
299 for (i = 0; i < h; ++i) {
300 for (j = 0; j < w; ++j) {
301 const int m = ROUND_POWER_OF_TWO(mask[(2 * i) * mask_stride + (2 * j)] +
302 mask[(2 * i + 1) * mask_stride + (2 * j)] +
303 mask[(2 * i) * mask_stride + (2 * j + 1)] +
304 mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
305 2);
306 dst[i * dst_stride + j] = AOM_BLEND_A64(
307 m, src0[i * src0_stride + j], src1[i * src1_stride + j]);
308 }
309 }
310 } else if (subw == 1 && subh == 0) {
311 for (i = 0; i < h; ++i) {
312 for (j = 0; j < w; ++j) {
313 const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
314 mask[i * mask_stride + (2 * j + 1)]);
315 dst[i * dst_stride + j] = AOM_BLEND_A64(
316 m, src0[i * src0_stride + j], src1[i * src1_stride + j]);
317 }
318 }
319 } else {
320 for (i = 0; i < h; ++i) {
321 for (j = 0; j < w; ++j) {
322 const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
323 mask[(2 * i + 1) * mask_stride + j]);
324 dst[i * dst_stride + j] = AOM_BLEND_A64(
325 m, src0[i * src0_stride + j], src1[i * src1_stride + j]);
326 }
327 }
328 }
329 }
330
331 /*Vertical mask related blend functions*/
svt_aom_blend_a64_vmask_c(uint8_t * dst,uint32_t dst_stride,const uint8_t * src0,uint32_t src0_stride,const uint8_t * src1,uint32_t src1_stride,const uint8_t * mask,int w,int h)332 void svt_aom_blend_a64_vmask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
333 uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
334 const uint8_t *mask, int w, int h) {
335 int i, j;
336
337 assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
338 assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
339
340 assert(h >= 1);
341 assert(w >= 1);
342 assert(IS_POWER_OF_TWO(h));
343 assert(IS_POWER_OF_TWO(w));
344
345 for (i = 0; i < h; ++i) {
346 const int m = mask[i];
347 for (j = 0; j < w; ++j) {
348 dst[i * dst_stride + j] = AOM_BLEND_A64(
349 m, src0[i * src0_stride + j], src1[i * src1_stride + j]);
350 }
351 }
352 }
353
svt_aom_highbd_blend_a64_vmask_8bit_c(uint8_t * dst_8,uint32_t dst_stride,const uint8_t * src0_8,uint32_t src0_stride,const uint8_t * src1_8,uint32_t src1_stride,const uint8_t * mask,int w,int h,int bd)354 void svt_aom_highbd_blend_a64_vmask_8bit_c(uint8_t *dst_8, uint32_t dst_stride,
355 const uint8_t *src0_8, uint32_t src0_stride,
356 const uint8_t *src1_8, uint32_t src1_stride,
357 const uint8_t *mask, int w, int h, int bd) {
358 int i, j;
359 uint16_t * dst = (uint16_t *)(dst_8); // CONVERT_TO_SHORTPTR(dst_8);
360 const uint16_t *src0 = (uint16_t *)(src0_8); //CONVERT_TO_SHORTPTR(src0_8);
361 const uint16_t *src1 = (uint16_t *)(src1_8); //CONVERT_TO_SHORTPTR(src1_8);
362 (void)bd;
363
364 assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
365 assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
366
367 assert(h >= 1);
368 assert(w >= 1);
369 assert(IS_POWER_OF_TWO(h));
370 assert(IS_POWER_OF_TWO(w));
371
372 assert(bd == 8 || bd == 10 || bd == 12);
373
374 for (i = 0; i < h; ++i) {
375 const int m = mask[i];
376 for (j = 0; j < w; ++j) {
377 dst[i * dst_stride + j] = AOM_BLEND_A64(
378 m, src0[i * src0_stride + j], src1[i * src1_stride + j]);
379 }
380 }
381 }
382
383 /*Horizontal mask related blend functions*/
svt_aom_blend_a64_hmask_c(uint8_t * dst,uint32_t dst_stride,const uint8_t * src0,uint32_t src0_stride,const uint8_t * src1,uint32_t src1_stride,const uint8_t * mask,int w,int h)384 void svt_aom_blend_a64_hmask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
385 uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
386 const uint8_t *mask, int w, int h) {
387 int i, j;
388
389 assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
390 assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
391
392 assert(h >= 1);
393 assert(w >= 1);
394 assert(IS_POWER_OF_TWO(h));
395 assert(IS_POWER_OF_TWO(w));
396
397 for (i = 0; i < h; ++i) {
398 for (j = 0; j < w; ++j) {
399 dst[i * dst_stride + j] = AOM_BLEND_A64(
400 mask[j], src0[i * src0_stride + j], src1[i * src1_stride + j]);
401 }
402 }
403 }
404
svt_aom_highbd_blend_a64_hmask_8bit_c(uint8_t * dst_8,uint32_t dst_stride,const uint8_t * src0_8,uint32_t src0_stride,const uint8_t * src1_8,uint32_t src1_stride,const uint8_t * mask,int w,int h,int bd)405 void svt_aom_highbd_blend_a64_hmask_8bit_c(uint8_t *dst_8, uint32_t dst_stride,
406 const uint8_t *src0_8, uint32_t src0_stride,
407 const uint8_t *src1_8, uint32_t src1_stride,
408 const uint8_t *mask, int w, int h, int bd) {
409 int i, j;
410 uint16_t * dst = (uint16_t *)(dst_8); // CONVERT_TO_SHORTPTR(dst_8);
411 const uint16_t *src0 = (uint16_t *)(src0_8); //CONVERT_TO_SHORTPTR(src0_8);
412 const uint16_t *src1 = (uint16_t *)(src1_8); //CONVERT_TO_SHORTPTR(src1_8);
413 (void)bd;
414
415 assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
416 assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
417
418 assert(h >= 1);
419 assert(w >= 1);
420 assert(IS_POWER_OF_TWO(h));
421 assert(IS_POWER_OF_TWO(w));
422
423 assert(bd == 8 || bd == 10 || bd == 12);
424
425 for (i = 0; i < h; ++i) {
426 for (j = 0; j < w; ++j) {
427 dst[i * dst_stride + j] = AOM_BLEND_A64(
428 mask[j], src0[i * src0_stride + j], src1[i * src1_stride + j]);
429 }
430 }
431 }
432