1 /*
2  * Copyright(c) 2019 Intel Corporation
3  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
4  *
5  * This source code is subject to the terms of the BSD 2 Clause License and
6  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
7  * was not distributed with this source code in the LICENSE file, you can
8  * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open
9  * Media Patent License 1.0 was not distributed with this source code in the
10  * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
11  */
12 
13 #include <assert.h>
14 
15 #include "EbDefinitions.h"
16 
17 #include "common_dsp_rtcd.h"
18 
19 // Blending with alpha mask. Mask values come from the range [0, 64],
20 // as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can
21 // be the same as dst, or dst can be different from both sources.
22 
23 // NOTE(david.barker): The input and output of aom_blend_a64_d16_mask_c() are
24 // in a higher intermediate precision, and will later be rounded down to pixel
25 // precision.
26 // Thus, in order to avoid double-rounding, we want to use normal right shifts
27 // within this function, not ROUND_POWER_OF_TWO.
28 // This works because of the identity:
29 // ROUND_POWER_OF_TWO(x >> y, z) == ROUND_POWER_OF_TWO(x, y+z)
30 //
31 // In contrast, the output of the non-d16 functions will not be further rounded,
32 // so we *should* use ROUND_POWER_OF_TWO there.
33 
svt_aom_lowbd_blend_a64_d16_mask_c(uint8_t * dst,uint32_t dst_stride,const CONV_BUF_TYPE * src0,uint32_t src0_stride,const CONV_BUF_TYPE * src1,uint32_t src1_stride,const uint8_t * mask,uint32_t mask_stride,int w,int h,int subw,int subh,ConvolveParams * conv_params)34 void svt_aom_lowbd_blend_a64_d16_mask_c(uint8_t *dst, uint32_t dst_stride,
35                                         const CONV_BUF_TYPE *src0, uint32_t src0_stride,
36                                         const CONV_BUF_TYPE *src1, uint32_t src1_stride,
37                                         const uint8_t *mask, uint32_t mask_stride, int w, int h,
38                                         int subw, int subh, ConvolveParams *conv_params) {
39     int       i, j;
40     const int bd           = 8;
41     const int offset_bits  = bd + 2 * FILTER_BITS - conv_params->round_0;
42     const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
43         (1 << (offset_bits - conv_params->round_1 - 1));
44     const int round_bits = 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
45 
46     assert(IMPLIES((void *)src0 == dst, src0_stride == dst_stride));
47     assert(IMPLIES((void *)src1 == dst, src1_stride == dst_stride));
48 
49     assert(h >= 4);
50     assert(w >= 4);
51 
52     if (subw == 0 && subh == 0) {
53         for (i = 0; i < h; ++i) {
54             for (j = 0; j < w; ++j) {
55                 int32_t   res;
56                 const int m = mask[i * mask_stride + j];
57                 res         = ((m * (int32_t)src0[i * src0_stride + j] +
58                         (AOM_BLEND_A64_MAX_ALPHA - m) * (int32_t)src1[i * src1_stride + j]) >>
59                        AOM_BLEND_A64_ROUND_BITS);
60                 res -= round_offset;
61                 dst[i * dst_stride + j] = clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
62             }
63         }
64     } else if (subw == 1 && subh == 1) {
65         for (i = 0; i < h; ++i) {
66             for (j = 0; j < w; ++j) {
67                 int32_t   res;
68                 const int m = ROUND_POWER_OF_TWO(mask[(2 * i) * mask_stride + (2 * j)] +
69                                                      mask[(2 * i + 1) * mask_stride + (2 * j)] +
70                                                      mask[(2 * i) * mask_stride + (2 * j + 1)] +
71                                                      mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
72                                                  2);
73                 res         = ((m * (int32_t)src0[i * src0_stride + j] +
74                         (AOM_BLEND_A64_MAX_ALPHA - m) * (int32_t)src1[i * src1_stride + j]) >>
75                        AOM_BLEND_A64_ROUND_BITS);
76                 res -= round_offset;
77                 dst[i * dst_stride + j] = clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
78             }
79         }
80     } else if (subw == 1 && subh == 0) {
81         for (i = 0; i < h; ++i) {
82             for (j = 0; j < w; ++j) {
83                 int32_t   res;
84                 const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
85                                             mask[i * mask_stride + (2 * j + 1)]);
86                 res         = ((m * (int32_t)src0[i * src0_stride + j] +
87                         (AOM_BLEND_A64_MAX_ALPHA - m) * (int32_t)src1[i * src1_stride + j]) >>
88                        AOM_BLEND_A64_ROUND_BITS);
89                 res -= round_offset;
90                 dst[i * dst_stride + j] = clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
91             }
92         }
93     } else {
94         for (i = 0; i < h; ++i) {
95             for (j = 0; j < w; ++j) {
96                 int32_t   res;
97                 const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
98                                             mask[(2 * i + 1) * mask_stride + j]);
99                 res         = ((int32_t)(m * (int32_t)src0[i * src0_stride + j] +
100                                  (AOM_BLEND_A64_MAX_ALPHA - m) *
101                                      (int32_t)src1[i * src1_stride + j]) >>
102                        AOM_BLEND_A64_ROUND_BITS);
103                 res -= round_offset;
104                 dst[i * dst_stride + j] = clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
105             }
106         }
107     }
108 }
109 
svt_aom_highbd_blend_a64_d16_mask_c(uint8_t * dst_8,uint32_t dst_stride,const CONV_BUF_TYPE * src0,uint32_t src0_stride,const CONV_BUF_TYPE * src1,uint32_t src1_stride,const uint8_t * mask,uint32_t mask_stride,int w,int h,int subw,int subh,ConvolveParams * conv_params,const int bd)110 void svt_aom_highbd_blend_a64_d16_mask_c(uint8_t *dst_8, uint32_t dst_stride,
111                                          const CONV_BUF_TYPE *src0, uint32_t src0_stride,
112                                          const CONV_BUF_TYPE *src1, uint32_t src1_stride,
113                                          const uint8_t *mask, uint32_t mask_stride, int w, int h,
114                                          int subw, int subh, ConvolveParams *conv_params,
115                                          const int bd) {
116     const int offset_bits  = bd + 2 * FILTER_BITS - conv_params->round_0;
117     const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
118         (1 << (offset_bits - conv_params->round_1 - 1));
119     const int round_bits = 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
120     uint16_t *dst        = (uint16_t *)dst_8;
121 
122     assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
123     assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
124 
125     assert(h >= 1);
126     assert(w >= 1);
127     assert(IS_POWER_OF_TWO(h));
128     assert(IS_POWER_OF_TWO(w));
129 
130     // excerpt from clip_pixel_highbd()
131     // set saturation_value to (1 << bd) - 1
132     unsigned int saturation_value;
133     switch (bd) {
134     case 8:
135     default: saturation_value = 255; break;
136     case 10: saturation_value = 1023; break;
137     case 12: saturation_value = 4095; break;
138     }
139 
140     if (subw == 0 && subh == 0) {
141         for (int i = 0; i < h; ++i) {
142             for (int j = 0; j < w; ++j) {
143                 int32_t   res;
144                 const int m = mask[j];
145                 res         = ((m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
146                        AOM_BLEND_A64_ROUND_BITS);
147                 res -= round_offset;
148                 unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
149                 dst[j]         = AOMMIN(v, saturation_value);
150             }
151             mask += mask_stride;
152             src0 += src0_stride;
153             src1 += src1_stride;
154             dst += dst_stride;
155         }
156     } else if (subw == 1 && subh == 1) {
157         for (int i = 0; i < h; ++i) {
158             for (int j = 0; j < w; ++j) {
159                 int32_t   res;
160                 const int m = ROUND_POWER_OF_TWO(mask[2 * j] + mask[mask_stride + 2 * j] +
161                                                      mask[2 * j + 1] +
162                                                      mask[mask_stride + 2 * j + 1],
163                                                  2);
164                 res         = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
165                     AOM_BLEND_A64_ROUND_BITS;
166                 res -= round_offset;
167                 unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
168                 dst[j]         = AOMMIN(v, saturation_value);
169             }
170             mask += 2 * mask_stride;
171             src0 += src0_stride;
172             src1 += src1_stride;
173             dst += dst_stride;
174         }
175     } else if (subw == 1 && subh == 0) {
176         for (int i = 0; i < h; ++i) {
177             for (int j = 0; j < w; ++j) {
178                 int32_t   res;
179                 const int m = AOM_BLEND_AVG(mask[2 * j], mask[2 * j + 1]);
180                 res         = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
181                     AOM_BLEND_A64_ROUND_BITS;
182                 res -= round_offset;
183                 unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
184                 dst[j]         = AOMMIN(v, saturation_value);
185             }
186             mask += mask_stride;
187             src0 += src0_stride;
188             src1 += src1_stride;
189             dst += dst_stride;
190         }
191     } else {
192         for (int i = 0; i < h; ++i) {
193             for (int j = 0; j < w; ++j) {
194                 int32_t   res;
195                 const int m = AOM_BLEND_AVG(mask[j], mask[mask_stride + j]);
196                 res         = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
197                     AOM_BLEND_A64_ROUND_BITS;
198                 res -= round_offset;
199                 unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
200                 dst[j]         = AOMMIN(v, saturation_value);
201             }
202             mask += 2 * mask_stride;
203             src0 += src0_stride;
204             src1 += src1_stride;
205             dst += dst_stride;
206         }
207     }
208 }
209 
210 // Blending with alpha mask. Mask values come from the range [0, 64],
211 // as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can
212 // be the same as dst, or dst can be different from both sources.
213 
svt_aom_blend_a64_mask_c(uint8_t * dst,uint32_t dst_stride,const uint8_t * src0,uint32_t src0_stride,const uint8_t * src1,uint32_t src1_stride,const uint8_t * mask,uint32_t mask_stride,int w,int h,int subw,int subh)214 void svt_aom_blend_a64_mask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
215                               uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
216                               const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw,
217                               int subh) {
218     int i, j;
219 
220     assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
221     assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
222 
223     assert(h >= 1);
224     assert(w >= 1);
225     assert(IS_POWER_OF_TWO(h));
226     assert(IS_POWER_OF_TWO(w));
227 
228     if (subw == 0 && subh == 0) {
229         for (i = 0; i < h; ++i) {
230             for (j = 0; j < w; ++j) {
231                 const int m             = mask[i * mask_stride + j];
232                 dst[i * dst_stride + j] = AOM_BLEND_A64(
233                     m, src0[i * src0_stride + j], src1[i * src1_stride + j]);
234             }
235         }
236     } else if (subw == 1 && subh == 1) {
237         for (i = 0; i < h; ++i) {
238             for (j = 0; j < w; ++j) {
239                 const int m             = ROUND_POWER_OF_TWO(mask[(2 * i) * mask_stride + (2 * j)] +
240                                                      mask[(2 * i + 1) * mask_stride + (2 * j)] +
241                                                      mask[(2 * i) * mask_stride + (2 * j + 1)] +
242                                                      mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
243                                                  2);
244                 dst[i * dst_stride + j] = AOM_BLEND_A64(
245                     m, src0[i * src0_stride + j], src1[i * src1_stride + j]);
246             }
247         }
248     } else if (subw == 1 && subh == 0) {
249         for (i = 0; i < h; ++i) {
250             for (j = 0; j < w; ++j) {
251                 const int m             = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
252                                             mask[i * mask_stride + (2 * j + 1)]);
253                 dst[i * dst_stride + j] = AOM_BLEND_A64(
254                     m, src0[i * src0_stride + j], src1[i * src1_stride + j]);
255             }
256         }
257     } else {
258         for (i = 0; i < h; ++i) {
259             for (j = 0; j < w; ++j) {
260                 const int m             = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
261                                             mask[(2 * i + 1) * mask_stride + j]);
262                 dst[i * dst_stride + j] = AOM_BLEND_A64(
263                     m, src0[i * src0_stride + j], src1[i * src1_stride + j]);
264             }
265         }
266     }
267 }
268 
svt_aom_highbd_blend_a64_mask_c(uint8_t * dst_8,uint32_t dst_stride,const uint8_t * src0_8,uint32_t src0_stride,const uint8_t * src1_8,uint32_t src1_stride,const uint8_t * mask,uint32_t mask_stride,int w,int h,int subw,int subh,int bd)269 void svt_aom_highbd_blend_a64_mask_c(uint8_t *dst_8, uint32_t dst_stride, const uint8_t *src0_8,
270                                      uint32_t src0_stride, const uint8_t *src1_8,
271                                      uint32_t src1_stride, const uint8_t *mask,
272                                      uint32_t mask_stride, int w, int h, int subw, int subh,
273                                      int bd) {
274     int             i, j;
275     uint16_t *      dst  = (uint16_t *)dst_8;
276     const uint16_t *src0 = (uint16_t *)src0_8;
277     const uint16_t *src1 = (uint16_t *)src1_8;
278     (void)bd;
279 
280     assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
281     assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
282 
283     assert(h >= 1);
284     assert(w >= 1);
285     assert(IS_POWER_OF_TWO(h));
286     assert(IS_POWER_OF_TWO(w));
287 
288     assert(bd == 8 || bd == 10 || bd == 12);
289 
290     if (subw == 0 && subh == 0) {
291         for (i = 0; i < h; ++i) {
292             for (j = 0; j < w; ++j) {
293                 const int m             = mask[i * mask_stride + j];
294                 dst[i * dst_stride + j] = AOM_BLEND_A64(
295                     m, src0[i * src0_stride + j], src1[i * src1_stride + j]);
296             }
297         }
298     } else if (subw == 1 && subh == 1) {
299         for (i = 0; i < h; ++i) {
300             for (j = 0; j < w; ++j) {
301                 const int m             = ROUND_POWER_OF_TWO(mask[(2 * i) * mask_stride + (2 * j)] +
302                                                      mask[(2 * i + 1) * mask_stride + (2 * j)] +
303                                                      mask[(2 * i) * mask_stride + (2 * j + 1)] +
304                                                      mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
305                                                  2);
306                 dst[i * dst_stride + j] = AOM_BLEND_A64(
307                     m, src0[i * src0_stride + j], src1[i * src1_stride + j]);
308             }
309         }
310     } else if (subw == 1 && subh == 0) {
311         for (i = 0; i < h; ++i) {
312             for (j = 0; j < w; ++j) {
313                 const int m             = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
314                                             mask[i * mask_stride + (2 * j + 1)]);
315                 dst[i * dst_stride + j] = AOM_BLEND_A64(
316                     m, src0[i * src0_stride + j], src1[i * src1_stride + j]);
317             }
318         }
319     } else {
320         for (i = 0; i < h; ++i) {
321             for (j = 0; j < w; ++j) {
322                 const int m             = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
323                                             mask[(2 * i + 1) * mask_stride + j]);
324                 dst[i * dst_stride + j] = AOM_BLEND_A64(
325                     m, src0[i * src0_stride + j], src1[i * src1_stride + j]);
326             }
327         }
328     }
329 }
330 
331 /*Vertical mask related blend functions*/
svt_aom_blend_a64_vmask_c(uint8_t * dst,uint32_t dst_stride,const uint8_t * src0,uint32_t src0_stride,const uint8_t * src1,uint32_t src1_stride,const uint8_t * mask,int w,int h)332 void svt_aom_blend_a64_vmask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
333                                uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
334                                const uint8_t *mask, int w, int h) {
335     int i, j;
336 
337     assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
338     assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
339 
340     assert(h >= 1);
341     assert(w >= 1);
342     assert(IS_POWER_OF_TWO(h));
343     assert(IS_POWER_OF_TWO(w));
344 
345     for (i = 0; i < h; ++i) {
346         const int m = mask[i];
347         for (j = 0; j < w; ++j) {
348             dst[i * dst_stride + j] = AOM_BLEND_A64(
349                 m, src0[i * src0_stride + j], src1[i * src1_stride + j]);
350         }
351     }
352 }
353 
svt_aom_highbd_blend_a64_vmask_8bit_c(uint8_t * dst_8,uint32_t dst_stride,const uint8_t * src0_8,uint32_t src0_stride,const uint8_t * src1_8,uint32_t src1_stride,const uint8_t * mask,int w,int h,int bd)354 void svt_aom_highbd_blend_a64_vmask_8bit_c(uint8_t *dst_8, uint32_t dst_stride,
355                                            const uint8_t *src0_8, uint32_t src0_stride,
356                                            const uint8_t *src1_8, uint32_t src1_stride,
357                                            const uint8_t *mask, int w, int h, int bd) {
358     int             i, j;
359     uint16_t *      dst  = (uint16_t *)(dst_8); // CONVERT_TO_SHORTPTR(dst_8);
360     const uint16_t *src0 = (uint16_t *)(src0_8); //CONVERT_TO_SHORTPTR(src0_8);
361     const uint16_t *src1 = (uint16_t *)(src1_8); //CONVERT_TO_SHORTPTR(src1_8);
362     (void)bd;
363 
364     assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
365     assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
366 
367     assert(h >= 1);
368     assert(w >= 1);
369     assert(IS_POWER_OF_TWO(h));
370     assert(IS_POWER_OF_TWO(w));
371 
372     assert(bd == 8 || bd == 10 || bd == 12);
373 
374     for (i = 0; i < h; ++i) {
375         const int m = mask[i];
376         for (j = 0; j < w; ++j) {
377             dst[i * dst_stride + j] = AOM_BLEND_A64(
378                 m, src0[i * src0_stride + j], src1[i * src1_stride + j]);
379         }
380     }
381 }
382 
383 /*Horizontal mask related blend functions*/
svt_aom_blend_a64_hmask_c(uint8_t * dst,uint32_t dst_stride,const uint8_t * src0,uint32_t src0_stride,const uint8_t * src1,uint32_t src1_stride,const uint8_t * mask,int w,int h)384 void svt_aom_blend_a64_hmask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
385                                uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
386                                const uint8_t *mask, int w, int h) {
387     int i, j;
388 
389     assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
390     assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
391 
392     assert(h >= 1);
393     assert(w >= 1);
394     assert(IS_POWER_OF_TWO(h));
395     assert(IS_POWER_OF_TWO(w));
396 
397     for (i = 0; i < h; ++i) {
398         for (j = 0; j < w; ++j) {
399             dst[i * dst_stride + j] = AOM_BLEND_A64(
400                 mask[j], src0[i * src0_stride + j], src1[i * src1_stride + j]);
401         }
402     }
403 }
404 
svt_aom_highbd_blend_a64_hmask_8bit_c(uint8_t * dst_8,uint32_t dst_stride,const uint8_t * src0_8,uint32_t src0_stride,const uint8_t * src1_8,uint32_t src1_stride,const uint8_t * mask,int w,int h,int bd)405 void svt_aom_highbd_blend_a64_hmask_8bit_c(uint8_t *dst_8, uint32_t dst_stride,
406                                            const uint8_t *src0_8, uint32_t src0_stride,
407                                            const uint8_t *src1_8, uint32_t src1_stride,
408                                            const uint8_t *mask, int w, int h, int bd) {
409     int             i, j;
410     uint16_t *      dst  = (uint16_t *)(dst_8); // CONVERT_TO_SHORTPTR(dst_8);
411     const uint16_t *src0 = (uint16_t *)(src0_8); //CONVERT_TO_SHORTPTR(src0_8);
412     const uint16_t *src1 = (uint16_t *)(src1_8); //CONVERT_TO_SHORTPTR(src1_8);
413     (void)bd;
414 
415     assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
416     assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
417 
418     assert(h >= 1);
419     assert(w >= 1);
420     assert(IS_POWER_OF_TWO(h));
421     assert(IS_POWER_OF_TWO(w));
422 
423     assert(bd == 8 || bd == 10 || bd == 12);
424 
425     for (i = 0; i < h; ++i) {
426         for (j = 0; j < w; ++j) {
427             dst[i * dst_stride + j] = AOM_BLEND_A64(
428                 mask[j], src0[i * src0_stride + j], src1[i * src1_stride + j]);
429         }
430     }
431 }
432