1 /*
2 * Copyright (c) 2012-2019 Fredrik Mellbin
3 *
4 * This file is part of VapourSynth.
5 *
6 * VapourSynth is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * VapourSynth is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with VapourSynth; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20 
21 #define VS_MERGE_IMPL
22 #include "merge.h"
23 #include "VSHelper.h"
24 
25 #define MERGESHIFT 15
26 #define ROUND (1U << (MERGESHIFT - 1))
27 
vs_merge_byte_c(const void * src1,const void * src2,void * dst,union vs_merge_weight weight,unsigned n)28 void vs_merge_byte_c(const void *src1, const void *src2, void *dst, union vs_merge_weight weight, unsigned n)
29 {
30     const uint8_t *srcp1 = src1;
31     const uint8_t *srcp2 = src2;
32     uint8_t *dstp = dst;
33     unsigned w = weight.u;
34     unsigned i;
35 
36     for (i = 0; i < n; i++) {
37         unsigned v1 = srcp1[i];
38         unsigned v2 = srcp2[i];
39         dstp[i] = v1 + (((v2 - v1) * w + ROUND) >> MERGESHIFT);
40     }
41 }
42 
vs_merge_word_c(const void * src1,const void * src2,void * dst,union vs_merge_weight weight,unsigned n)43 void vs_merge_word_c(const void *src1, const void *src2, void *dst, union vs_merge_weight weight, unsigned n)
44 {
45     const uint16_t *srcp1 = src1;
46     const uint16_t *srcp2 = src2;
47     uint16_t *dstp = dst;
48     unsigned w = weight.u;
49     unsigned i;
50 
51     for (i = 0; i < n; i++) {
52         unsigned v1 = srcp1[i];
53         unsigned v2 = srcp2[i];
54         dstp[i] = v1 + (((v2 - v1) * w + ROUND) >> MERGESHIFT);
55     }
56 }
57 
vs_merge_float_c(const void * src1,const void * src2,void * dst,union vs_merge_weight weight,unsigned n)58 void vs_merge_float_c(const void *src1, const void *src2, void *dst, union vs_merge_weight weight, unsigned n)
59 {
60     const float *srcp1 = src1;
61     const float *srcp2 = src2;
62     float *dstp = dst;
63     float w = weight.f;
64     unsigned i;
65 
66     for (i = 0; i < n; i++) {
67         float v1 = srcp1[i];
68         float v2 = srcp2[i];
69         dstp[i] = v1 + (v2 - v1) * w;
70     }
71 }
72 
73 
vs_mask_merge_byte_c(const void * src1,const void * src2,const void * mask,void * dst,unsigned depth,unsigned offset,unsigned n)74 void vs_mask_merge_byte_c(const void *src1, const void *src2, const void *mask, void *dst, unsigned depth, unsigned offset, unsigned n)
75 {
76     const uint8_t *srcp1 = src1;
77     const uint8_t *srcp2 = src2;
78     const uint8_t *maskp = mask;
79     uint8_t *dstp = dst;
80     unsigned i;
81 
82     (void)offset;
83     (void)depth;
84 
85     for (i = 0; i < n; i++) {
86         uint8_t v1 = srcp1[i];
87         uint8_t v2 = srcp2[i];
88         uint8_t mask = maskp[i];
89         uint8_t invmask = UINT8_MAX - mask;
90         uint16_t tmp = invmask * v1 + mask * v2 + UINT8_MAX / 2;
91         dstp[i] = tmp / 255;
92     }
93 }
94 
vs_mask_merge_word_c(const void * src1,const void * src2,const void * mask,void * dst,unsigned depth,unsigned offset,unsigned n)95 void vs_mask_merge_word_c(const void *src1, const void *src2, const void *mask, void *dst, unsigned depth, unsigned offset, unsigned n)
96 {
97     const uint16_t *srcp1 = src1;
98     const uint16_t *srcp2 = src2;
99     const uint16_t *maskp = mask;
100     uint16_t *dstp = dst;
101     unsigned i;
102 
103     uint16_t maxval = (1U << depth) - 1;
104     uint32_t div = div_table[depth - 9];
105     uint8_t shift = shift_table[depth - 9];
106 
107     (void)offset;
108 
109     for (i = 0; i < n; i++) {
110         uint16_t v1 = srcp1[i];
111         uint16_t v2 = srcp2[i];
112         uint16_t mask = maskp[i];
113         uint16_t invmask = maxval - mask;
114         uint32_t tmp = (uint32_t)invmask * v1 + (uint32_t)mask * v2 + maxval / 2;
115         dstp[i] = (uint16_t)(((uint64_t)tmp * div) >> (32 + shift));
116     }
117 }
118 
vs_mask_merge_float_c(const void * src1,const void * src2,const void * mask,void * dst,unsigned depth,unsigned offset,unsigned n)119 void vs_mask_merge_float_c(const void *src1, const void *src2, const void *mask, void *dst, unsigned depth, unsigned offset, unsigned n)
120 {
121     const float *srcp1 = src1;
122     const float *srcp2 = src2;
123     const float *maskp = mask;
124     float *dstp = dst;
125     unsigned i;
126 
127     (void)depth;
128     (void)offset;
129 
130     for (i = 0; i < n; i++) {
131         float v1 = srcp1[i];
132         float v2 = srcp2[i];
133         dstp[i] = v1 + (v2 - v1) * maskp[i];
134     }
135 }
136 
vs_mask_merge_premul_byte_c(const void * src1,const void * src2,const void * mask,void * dst,unsigned depth,unsigned offset,unsigned n)137 void vs_mask_merge_premul_byte_c(const void *src1, const void *src2, const void *mask, void *dst, unsigned depth, unsigned offset, unsigned n)
138 {
139     const uint8_t *srcp1 = src1;
140     const uint8_t *srcp2 = src2;
141     const uint8_t *maskp = mask;
142     uint8_t *dstp = dst;
143     unsigned i;
144 
145     (void)depth;
146 
147     for (i = 0; i < n; i++) {
148         uint8_t v1 = srcp1[i];
149         uint8_t v2 = srcp2[i];
150         uint8_t invmask = UINT8_MAX - maskp[i];
151 
152         uint16_t tmp = v1 - offset;
153         int sign = (int16_t)tmp < 0;
154         tmp = sign ? -tmp : tmp;
155         tmp = (tmp * invmask + UINT8_MAX / 2) / 255;
156         tmp = sign ? -tmp : tmp;
157 
158         dstp[i] = tmp + v2;
159     }
160 }
161 
vs_mask_merge_premul_word_c(const void * src1,const void * src2,const void * mask,void * dst,unsigned depth,unsigned offset,unsigned n)162 void vs_mask_merge_premul_word_c(const void *src1, const void *src2, const void *mask, void *dst, unsigned depth, unsigned offset, unsigned n)
163 {
164     const uint16_t *srcp1 = src1;
165     const uint16_t *srcp2 = src2;
166     const uint16_t *maskp = mask;
167     uint16_t *dstp = dst;
168     unsigned i;
169 
170     uint16_t maxval = (1U << depth) - 1;
171     uint32_t div = div_table[depth - 9];
172     uint8_t shift = shift_table[depth - 9];
173 
174     for (i = 0; i < n; i++) {
175         uint16_t v1 = srcp1[i];
176         uint16_t v2 = srcp2[i];
177         uint16_t invmask = maxval - maskp[i];
178 #pragma warning(push)
179 #pragma warning(disable:4146)
180         uint32_t tmp = v1 - offset;
181         int sign = (int32_t)tmp < 0;
182         tmp = sign ? -tmp : tmp;
183         tmp = (((uint64_t)tmp * invmask + maxval / 2) * div) >> (32 + shift);
184         tmp = sign ? -tmp : tmp;
185 #pragma warning(pop)
186         dstp[i] = tmp + v2;
187     }
188 }
189 
vs_mask_merge_premul_float_c(const void * src1,const void * src2,const void * mask,void * dst,unsigned depth,unsigned offset,unsigned n)190 void vs_mask_merge_premul_float_c(const void *src1, const void *src2, const void *mask, void *dst, unsigned depth, unsigned offset, unsigned n)
191 {
192     const float *srcp1 = src1;
193     const float *srcp2 = src2;
194     const float *maskp = mask;
195     float *dstp = dst;
196     unsigned i;
197 
198     (void)depth;
199     (void)offset;
200 
201     for (i = 0; i < n; i++) {
202         float v1 = srcp1[i];
203         float v2 = srcp2[i];
204         dstp[i] = (1.0f - maskp[i]) * v1 + v2;
205     }
206 }
207 
vs_makediff_byte_c(const void * src1,const void * src2,void * dst,unsigned depth,unsigned n)208 void vs_makediff_byte_c(const void *src1, const void *src2, void *dst, unsigned depth, unsigned n)
209 {
210     const uint8_t *srcp1 = src1;
211     const uint8_t *srcp2 = src2;
212     uint8_t *dstp = dst;
213     unsigned i;
214 
215     (void)depth;
216 
217     for (i = 0; i < n; i++) {
218         uint8_t v1 = srcp1[i];
219         uint8_t v2 = srcp2[i];
220         dstp[i] = VSMIN(VSMAX((int)v1 - (int)v2 + 128, 0), 255);
221     }
222 }
223 
vs_makediff_word_c(const void * src1,const void * src2,void * dst,unsigned depth,unsigned n)224 void vs_makediff_word_c(const void *src1, const void *src2, void *dst, unsigned depth, unsigned n)
225 {
226     const uint16_t *srcp1 = src1;
227     const uint16_t *srcp2 = src2;
228     uint16_t *dstp = dst;
229     unsigned i;
230 
231     int32_t half = 1U << (depth - 1);
232     int32_t maxval = (1U << depth) - 1;
233 
234     for (i = 0; i < n; i++) {
235         uint16_t v1 = srcp1[i];
236         uint16_t v2 = srcp2[i];
237         int32_t tmp = (int32_t)v1 - (int32_t)v2 + half;
238         dstp[i] = VSMIN(VSMAX(tmp, 0), maxval);
239     }
240 }
241 
vs_makediff_float_c(const void * src1,const void * src2,void * dst,unsigned depth,unsigned n)242 void vs_makediff_float_c(const void *src1, const void *src2, void *dst, unsigned depth, unsigned n)
243 {
244     const float *srcp1 = src1;
245     const float *srcp2 = src2;
246     float *dstp = dst;
247     unsigned i;
248 
249     (void)depth;
250 
251     for (i = 0; i < n; i++) {
252         dstp[i] = srcp1[i] - srcp2[i];
253     }
254 }
255 
vs_mergediff_byte_c(const void * src1,const void * src2,void * dst,unsigned depth,unsigned n)256 void vs_mergediff_byte_c(const void *src1, const void *src2, void *dst, unsigned depth, unsigned n)
257 {
258     const uint8_t *srcp1 = src1;
259     const uint8_t *srcp2 = src2;
260     uint8_t *dstp = dst;
261     unsigned i;
262 
263     (void)depth;
264 
265     for (i = 0; i < n; i++) {
266         uint8_t v1 = srcp1[i];
267         uint8_t v2 = srcp2[i];
268         dstp[i] = VSMIN(VSMAX((int)v1 + (int)v2 - 128, 0), 255);
269     }
270 }
271 
vs_mergediff_word_c(const void * src1,const void * src2,void * dst,unsigned depth,unsigned n)272 void vs_mergediff_word_c(const void *src1, const void *src2, void *dst, unsigned depth, unsigned n)
273 {
274     const uint16_t *srcp1 = src1;
275     const uint16_t *srcp2 = src2;
276     uint16_t *dstp = dst;
277     unsigned i;
278 
279     int32_t half = 1U << (depth - 1);
280     int32_t maxval = (1U << depth) - 1;
281 
282     for (i = 0; i < n; i++) {
283         uint16_t v1 = srcp1[i];
284         uint16_t v2 = srcp2[i];
285         int32_t tmp = (int32_t)v1 + (int32_t)v2 - half;
286         dstp[i] = VSMIN(VSMAX(tmp, 0), maxval);
287     }
288 }
289 
vs_mergediff_float_c(const void * src1,const void * src2,void * dst,unsigned depth,unsigned n)290 void vs_mergediff_float_c(const void *src1, const void *src2, void *dst, unsigned depth, unsigned n)
291 {
292     const float *srcp1 = src1;
293     const float *srcp2 = src2;
294     float *dstp = dst;
295     unsigned i;
296 
297     (void)depth;
298 
299     for (i = 0; i < n; i++) {
300         dstp[i] = srcp1[i] + srcp2[i];
301     }
302 }
303