1 /*
2 * Copyright (c) 2012-2019 Fredrik Mellbin
3 *
4 * This file is part of VapourSynth.
5 *
6 * VapourSynth is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * VapourSynth is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with VapourSynth; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #define VS_MERGE_IMPL
22 #include "merge.h"
23 #include "VSHelper.h"
24
25 #define MERGESHIFT 15
26 #define ROUND (1U << (MERGESHIFT - 1))
27
vs_merge_byte_c(const void * src1,const void * src2,void * dst,union vs_merge_weight weight,unsigned n)28 void vs_merge_byte_c(const void *src1, const void *src2, void *dst, union vs_merge_weight weight, unsigned n)
29 {
30 const uint8_t *srcp1 = src1;
31 const uint8_t *srcp2 = src2;
32 uint8_t *dstp = dst;
33 unsigned w = weight.u;
34 unsigned i;
35
36 for (i = 0; i < n; i++) {
37 unsigned v1 = srcp1[i];
38 unsigned v2 = srcp2[i];
39 dstp[i] = v1 + (((v2 - v1) * w + ROUND) >> MERGESHIFT);
40 }
41 }
42
vs_merge_word_c(const void * src1,const void * src2,void * dst,union vs_merge_weight weight,unsigned n)43 void vs_merge_word_c(const void *src1, const void *src2, void *dst, union vs_merge_weight weight, unsigned n)
44 {
45 const uint16_t *srcp1 = src1;
46 const uint16_t *srcp2 = src2;
47 uint16_t *dstp = dst;
48 unsigned w = weight.u;
49 unsigned i;
50
51 for (i = 0; i < n; i++) {
52 unsigned v1 = srcp1[i];
53 unsigned v2 = srcp2[i];
54 dstp[i] = v1 + (((v2 - v1) * w + ROUND) >> MERGESHIFT);
55 }
56 }
57
vs_merge_float_c(const void * src1,const void * src2,void * dst,union vs_merge_weight weight,unsigned n)58 void vs_merge_float_c(const void *src1, const void *src2, void *dst, union vs_merge_weight weight, unsigned n)
59 {
60 const float *srcp1 = src1;
61 const float *srcp2 = src2;
62 float *dstp = dst;
63 float w = weight.f;
64 unsigned i;
65
66 for (i = 0; i < n; i++) {
67 float v1 = srcp1[i];
68 float v2 = srcp2[i];
69 dstp[i] = v1 + (v2 - v1) * w;
70 }
71 }
72
73
vs_mask_merge_byte_c(const void * src1,const void * src2,const void * mask,void * dst,unsigned depth,unsigned offset,unsigned n)74 void vs_mask_merge_byte_c(const void *src1, const void *src2, const void *mask, void *dst, unsigned depth, unsigned offset, unsigned n)
75 {
76 const uint8_t *srcp1 = src1;
77 const uint8_t *srcp2 = src2;
78 const uint8_t *maskp = mask;
79 uint8_t *dstp = dst;
80 unsigned i;
81
82 (void)offset;
83 (void)depth;
84
85 for (i = 0; i < n; i++) {
86 uint8_t v1 = srcp1[i];
87 uint8_t v2 = srcp2[i];
88 uint8_t mask = maskp[i];
89 uint8_t invmask = UINT8_MAX - mask;
90 uint16_t tmp = invmask * v1 + mask * v2 + UINT8_MAX / 2;
91 dstp[i] = tmp / 255;
92 }
93 }
94
vs_mask_merge_word_c(const void * src1,const void * src2,const void * mask,void * dst,unsigned depth,unsigned offset,unsigned n)95 void vs_mask_merge_word_c(const void *src1, const void *src2, const void *mask, void *dst, unsigned depth, unsigned offset, unsigned n)
96 {
97 const uint16_t *srcp1 = src1;
98 const uint16_t *srcp2 = src2;
99 const uint16_t *maskp = mask;
100 uint16_t *dstp = dst;
101 unsigned i;
102
103 uint16_t maxval = (1U << depth) - 1;
104 uint32_t div = div_table[depth - 9];
105 uint8_t shift = shift_table[depth - 9];
106
107 (void)offset;
108
109 for (i = 0; i < n; i++) {
110 uint16_t v1 = srcp1[i];
111 uint16_t v2 = srcp2[i];
112 uint16_t mask = maskp[i];
113 uint16_t invmask = maxval - mask;
114 uint32_t tmp = (uint32_t)invmask * v1 + (uint32_t)mask * v2 + maxval / 2;
115 dstp[i] = (uint16_t)(((uint64_t)tmp * div) >> (32 + shift));
116 }
117 }
118
vs_mask_merge_float_c(const void * src1,const void * src2,const void * mask,void * dst,unsigned depth,unsigned offset,unsigned n)119 void vs_mask_merge_float_c(const void *src1, const void *src2, const void *mask, void *dst, unsigned depth, unsigned offset, unsigned n)
120 {
121 const float *srcp1 = src1;
122 const float *srcp2 = src2;
123 const float *maskp = mask;
124 float *dstp = dst;
125 unsigned i;
126
127 (void)depth;
128 (void)offset;
129
130 for (i = 0; i < n; i++) {
131 float v1 = srcp1[i];
132 float v2 = srcp2[i];
133 dstp[i] = v1 + (v2 - v1) * maskp[i];
134 }
135 }
136
vs_mask_merge_premul_byte_c(const void * src1,const void * src2,const void * mask,void * dst,unsigned depth,unsigned offset,unsigned n)137 void vs_mask_merge_premul_byte_c(const void *src1, const void *src2, const void *mask, void *dst, unsigned depth, unsigned offset, unsigned n)
138 {
139 const uint8_t *srcp1 = src1;
140 const uint8_t *srcp2 = src2;
141 const uint8_t *maskp = mask;
142 uint8_t *dstp = dst;
143 unsigned i;
144
145 (void)depth;
146
147 for (i = 0; i < n; i++) {
148 uint8_t v1 = srcp1[i];
149 uint8_t v2 = srcp2[i];
150 uint8_t invmask = UINT8_MAX - maskp[i];
151
152 uint16_t tmp = v1 - offset;
153 int sign = (int16_t)tmp < 0;
154 tmp = sign ? -tmp : tmp;
155 tmp = (tmp * invmask + UINT8_MAX / 2) / 255;
156 tmp = sign ? -tmp : tmp;
157
158 dstp[i] = tmp + v2;
159 }
160 }
161
vs_mask_merge_premul_word_c(const void * src1,const void * src2,const void * mask,void * dst,unsigned depth,unsigned offset,unsigned n)162 void vs_mask_merge_premul_word_c(const void *src1, const void *src2, const void *mask, void *dst, unsigned depth, unsigned offset, unsigned n)
163 {
164 const uint16_t *srcp1 = src1;
165 const uint16_t *srcp2 = src2;
166 const uint16_t *maskp = mask;
167 uint16_t *dstp = dst;
168 unsigned i;
169
170 uint16_t maxval = (1U << depth) - 1;
171 uint32_t div = div_table[depth - 9];
172 uint8_t shift = shift_table[depth - 9];
173
174 for (i = 0; i < n; i++) {
175 uint16_t v1 = srcp1[i];
176 uint16_t v2 = srcp2[i];
177 uint16_t invmask = maxval - maskp[i];
178 #pragma warning(push)
179 #pragma warning(disable:4146)
180 uint32_t tmp = v1 - offset;
181 int sign = (int32_t)tmp < 0;
182 tmp = sign ? -tmp : tmp;
183 tmp = (((uint64_t)tmp * invmask + maxval / 2) * div) >> (32 + shift);
184 tmp = sign ? -tmp : tmp;
185 #pragma warning(pop)
186 dstp[i] = tmp + v2;
187 }
188 }
189
vs_mask_merge_premul_float_c(const void * src1,const void * src2,const void * mask,void * dst,unsigned depth,unsigned offset,unsigned n)190 void vs_mask_merge_premul_float_c(const void *src1, const void *src2, const void *mask, void *dst, unsigned depth, unsigned offset, unsigned n)
191 {
192 const float *srcp1 = src1;
193 const float *srcp2 = src2;
194 const float *maskp = mask;
195 float *dstp = dst;
196 unsigned i;
197
198 (void)depth;
199 (void)offset;
200
201 for (i = 0; i < n; i++) {
202 float v1 = srcp1[i];
203 float v2 = srcp2[i];
204 dstp[i] = (1.0f - maskp[i]) * v1 + v2;
205 }
206 }
207
vs_makediff_byte_c(const void * src1,const void * src2,void * dst,unsigned depth,unsigned n)208 void vs_makediff_byte_c(const void *src1, const void *src2, void *dst, unsigned depth, unsigned n)
209 {
210 const uint8_t *srcp1 = src1;
211 const uint8_t *srcp2 = src2;
212 uint8_t *dstp = dst;
213 unsigned i;
214
215 (void)depth;
216
217 for (i = 0; i < n; i++) {
218 uint8_t v1 = srcp1[i];
219 uint8_t v2 = srcp2[i];
220 dstp[i] = VSMIN(VSMAX((int)v1 - (int)v2 + 128, 0), 255);
221 }
222 }
223
vs_makediff_word_c(const void * src1,const void * src2,void * dst,unsigned depth,unsigned n)224 void vs_makediff_word_c(const void *src1, const void *src2, void *dst, unsigned depth, unsigned n)
225 {
226 const uint16_t *srcp1 = src1;
227 const uint16_t *srcp2 = src2;
228 uint16_t *dstp = dst;
229 unsigned i;
230
231 int32_t half = 1U << (depth - 1);
232 int32_t maxval = (1U << depth) - 1;
233
234 for (i = 0; i < n; i++) {
235 uint16_t v1 = srcp1[i];
236 uint16_t v2 = srcp2[i];
237 int32_t tmp = (int32_t)v1 - (int32_t)v2 + half;
238 dstp[i] = VSMIN(VSMAX(tmp, 0), maxval);
239 }
240 }
241
vs_makediff_float_c(const void * src1,const void * src2,void * dst,unsigned depth,unsigned n)242 void vs_makediff_float_c(const void *src1, const void *src2, void *dst, unsigned depth, unsigned n)
243 {
244 const float *srcp1 = src1;
245 const float *srcp2 = src2;
246 float *dstp = dst;
247 unsigned i;
248
249 (void)depth;
250
251 for (i = 0; i < n; i++) {
252 dstp[i] = srcp1[i] - srcp2[i];
253 }
254 }
255
vs_mergediff_byte_c(const void * src1,const void * src2,void * dst,unsigned depth,unsigned n)256 void vs_mergediff_byte_c(const void *src1, const void *src2, void *dst, unsigned depth, unsigned n)
257 {
258 const uint8_t *srcp1 = src1;
259 const uint8_t *srcp2 = src2;
260 uint8_t *dstp = dst;
261 unsigned i;
262
263 (void)depth;
264
265 for (i = 0; i < n; i++) {
266 uint8_t v1 = srcp1[i];
267 uint8_t v2 = srcp2[i];
268 dstp[i] = VSMIN(VSMAX((int)v1 + (int)v2 - 128, 0), 255);
269 }
270 }
271
vs_mergediff_word_c(const void * src1,const void * src2,void * dst,unsigned depth,unsigned n)272 void vs_mergediff_word_c(const void *src1, const void *src2, void *dst, unsigned depth, unsigned n)
273 {
274 const uint16_t *srcp1 = src1;
275 const uint16_t *srcp2 = src2;
276 uint16_t *dstp = dst;
277 unsigned i;
278
279 int32_t half = 1U << (depth - 1);
280 int32_t maxval = (1U << depth) - 1;
281
282 for (i = 0; i < n; i++) {
283 uint16_t v1 = srcp1[i];
284 uint16_t v2 = srcp2[i];
285 int32_t tmp = (int32_t)v1 + (int32_t)v2 - half;
286 dstp[i] = VSMIN(VSMAX(tmp, 0), maxval);
287 }
288 }
289
vs_mergediff_float_c(const void * src1,const void * src2,void * dst,unsigned depth,unsigned n)290 void vs_mergediff_float_c(const void *src1, const void *src2, void *dst, unsigned depth, unsigned n)
291 {
292 const float *srcp1 = src1;
293 const float *srcp2 = src2;
294 float *dstp = dst;
295 unsigned i;
296
297 (void)depth;
298
299 for (i = 0; i < n; i++) {
300 dstp[i] = srcp1[i] + srcp2[i];
301 }
302 }
303