1 /*
2 This file is part of darktable,
3 Copyright (C) 2011-2021 darktable developers.
4
5 darktable is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
9
10 darktable is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with darktable. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #if defined(__GNUC__)
20 #pragma GCC optimize("unroll-loops", "tree-loop-if-convert", "tree-loop-distribution", "no-strict-aliasing", \
21 "loop-interchange", "tree-loop-im", "unswitch-loops", \
22 "tree-loop-ivcanon", "ira-loop-pressure", "split-ivs-in-unroller", "tree-loop-vectorize", \
23 "variable-expansion-in-unroller", "split-loops", "ivopts", "predictive-commoning", \
24 "finite-math-only", "fp-contract=fast", \
25 "fast-math", "no-math-errno")
26 #endif
27
28 #include "common/colorspaces_inline_conversions.h"
29 #include "common/imagebuf.h"
30 #include "develop/blend.h"
31 #include "develop/imageop.h"
32 #include "develop/openmp_maths.h"
33 #include <math.h>
34
35 #define DT_BLENDIF_RGB_CH 4
36 #define DT_BLENDIF_RGB_BCH 3
37
38
39 typedef void(_blend_row_func)(const float *const restrict a, const float *const restrict b, const float p,
40 float *const restrict out, const float *const restrict mask, const size_t stride);
41
42
43 #ifdef _OPENMP
44 #pragma omp declare simd uniform(parameters, invert_mask)
45 #endif
_blendif_compute_factor(const float value,const unsigned int invert_mask,const float * const restrict parameters)46 static inline float _blendif_compute_factor(const float value, const unsigned int invert_mask,
47 const float *const restrict parameters)
48 {
49 float factor = 0.0f;
50 if(value <= parameters[0])
51 {
52 // we are below the keyframe
53 factor = 0.0f;
54 }
55 else if(value < parameters[1])
56 {
57 // we are on the bottom slope of the keyframe
58 factor = (value - parameters[0]) * parameters[4];
59 }
60 else if(value <= parameters[2])
61 {
62 // we are on the ramp - constant part - of the keyframe
63 factor = 1.0f;
64 }
65 else if(value < parameters[3])
66 {
67 // we are on the top slope of the keyframe
68 factor = 1.0f - (value - parameters[2]) * parameters[5];
69 }
70 else
71 {
72 // we are above the keyframe
73 factor = 0.0f;
74 }
75 return invert_mask ? 1.0f - factor : factor; // inverted channel?
76 }
77
78 #ifdef _OPENMP
79 #pragma omp declare simd aligned(pixels: 16) uniform(parameters, invert_mask, stride, profile)
80 #endif
_blendif_gray(const float * const restrict pixels,float * const restrict mask,const size_t stride,const float * const restrict parameters,const unsigned int invert_mask,const dt_iop_order_iccprofile_info_t * const restrict profile)81 static inline void _blendif_gray(const float *const restrict pixels, float *const restrict mask,
82 const size_t stride, const float *const restrict parameters,
83 const unsigned int invert_mask,
84 const dt_iop_order_iccprofile_info_t *const restrict profile)
85 {
86 for(size_t x = 0, j = 0; x < stride; x++, j += DT_BLENDIF_RGB_CH)
87 {
88 const float value = dt_ioppr_get_rgb_matrix_luminance(pixels + j, profile->matrix_in, profile->lut_in,
89 profile->unbounded_coeffs_in, profile->lutsize,
90 profile->nonlinearlut);
91 mask[x] *= _blendif_compute_factor(value, invert_mask, parameters);
92 }
93 }
94
95 #ifdef _OPENMP
96 #pragma omp declare simd aligned(pixels: 16) uniform(parameters, invert_mask, stride)
97 #endif
_blendif_rgb_red(const float * const restrict pixels,float * const restrict mask,const size_t stride,const float * const restrict parameters,const unsigned int invert_mask)98 static inline void _blendif_rgb_red(const float *const restrict pixels, float *const restrict mask,
99 const size_t stride, const float *const restrict parameters,
100 const unsigned int invert_mask)
101 {
102 for(size_t x = 0, j = 0; x < stride; x++, j += DT_BLENDIF_RGB_CH)
103 {
104 mask[x] *= _blendif_compute_factor(pixels[j + 0], invert_mask, parameters);
105 }
106 }
107
108 #ifdef _OPENMP
109 #pragma omp declare simd aligned(pixels: 16) uniform(parameters, invert_mask, stride)
110 #endif
_blendif_rgb_green(const float * const restrict pixels,float * const restrict mask,const size_t stride,const float * const restrict parameters,const unsigned int invert_mask)111 static inline void _blendif_rgb_green(const float *const restrict pixels, float *const restrict mask,
112 const size_t stride, const float *const restrict parameters,
113 const unsigned int invert_mask)
114 {
115 for(size_t x = 0, j = 0; x < stride; x++, j += DT_BLENDIF_RGB_CH)
116 {
117 mask[x] *= _blendif_compute_factor(pixels[j + 1], invert_mask, parameters);
118 }
119 }
120
121 #ifdef _OPENMP
122 #pragma omp declare simd aligned(pixels: 16) uniform(parameters, invert_mask, stride)
123 #endif
_blendif_rgb_blue(const float * const restrict pixels,float * const restrict mask,const size_t stride,const float * const restrict parameters,const unsigned int invert_mask)124 static inline void _blendif_rgb_blue(const float *const restrict pixels, float *const restrict mask,
125 const size_t stride, const float *const restrict parameters,
126 const unsigned int invert_mask)
127 {
128 for(size_t x = 0, j = 0; x < stride; x++, j += DT_BLENDIF_RGB_CH)
129 {
130 mask[x] *= _blendif_compute_factor(pixels[j + 2], invert_mask, parameters);
131 }
132 }
133
134 #ifdef _OPENMP
135 #pragma omp declare simd aligned(pixels, invert_mask: 16) uniform(parameters, invert_mask, stride, profile)
136 #endif
_blendif_jzczhz(const float * const restrict pixels,float * const restrict mask,const size_t stride,const float * const restrict parameters,const unsigned int * const restrict invert_mask,const dt_iop_order_iccprofile_info_t * const restrict profile)137 static inline void _blendif_jzczhz(const float *const restrict pixels, float *const restrict mask,
138 const size_t stride, const float *const restrict parameters,
139 const unsigned int *const restrict invert_mask,
140 const dt_iop_order_iccprofile_info_t *const restrict profile)
141 {
142 for(size_t x = 0, j = 0; x < stride; x++, j += DT_BLENDIF_RGB_CH)
143 {
144 float XYZ_D65[3] DT_ALIGNED_PIXEL;
145 float JzAzBz[3] DT_ALIGNED_PIXEL;
146 float JzCzhz[3] DT_ALIGNED_PIXEL;
147
148 // use the matrix_out of the hacked profile for blending to use the
149 // conversion from RGB to XYZ D65 (instead of XYZ D50)
150 dt_ioppr_rgb_matrix_to_xyz(pixels + j, XYZ_D65, profile->matrix_out, profile->lut_in,
151 profile->unbounded_coeffs_in, profile->lutsize, profile->nonlinearlut);
152
153 dt_XYZ_2_JzAzBz(XYZ_D65, JzAzBz);
154 dt_JzAzBz_2_JzCzhz(JzAzBz, JzCzhz);
155
156 float factor = 1.0f;
157 for(size_t i = 0; i < 3; i++)
158 factor *= _blendif_compute_factor(JzCzhz[i], invert_mask[i],
159 parameters + DEVELOP_BLENDIF_PARAMETER_ITEMS * i);
160 mask[x] *= factor;
161 }
162 }
163
164 #ifdef _OPENMP
165 #pragma omp declare simd aligned(pixels: 16) uniform(stride, blendif, parameters, profile)
166 #endif
_blendif_combine_channels(const float * const restrict pixels,float * const restrict mask,const size_t stride,const unsigned int blendif,const float * const restrict parameters,const dt_iop_order_iccprofile_info_t * const restrict profile)167 static void _blendif_combine_channels(const float *const restrict pixels, float *const restrict mask,
168 const size_t stride, const unsigned int blendif,
169 const float *const restrict parameters,
170 const dt_iop_order_iccprofile_info_t *const restrict profile)
171 {
172 if(blendif & (1 << DEVELOP_BLENDIF_GRAY_in))
173 {
174 const unsigned int invert_mask = (blendif >> 16) & (1 << DEVELOP_BLENDIF_GRAY_in);
175 _blendif_gray(pixels, mask, stride, parameters + DEVELOP_BLENDIF_PARAMETER_ITEMS * DEVELOP_BLENDIF_GRAY_in,
176 invert_mask, profile);
177 }
178
179 if(blendif & (1 << DEVELOP_BLENDIF_RED_in))
180 {
181 const unsigned int invert_mask = (blendif >> 16) & (1 << DEVELOP_BLENDIF_RED_in);
182 _blendif_rgb_red(pixels, mask, stride, parameters + DEVELOP_BLENDIF_PARAMETER_ITEMS * DEVELOP_BLENDIF_RED_in,
183 invert_mask);
184 }
185
186 if(blendif & (1 << DEVELOP_BLENDIF_GREEN_in))
187 {
188 const unsigned int invert_mask = (blendif >> 16) & (1 << DEVELOP_BLENDIF_GREEN_in);
189 _blendif_rgb_green(pixels, mask, stride,
190 parameters + DEVELOP_BLENDIF_PARAMETER_ITEMS * DEVELOP_BLENDIF_GREEN_in, invert_mask);
191 }
192
193 if(blendif & (1 << DEVELOP_BLENDIF_BLUE_in))
194 {
195 const unsigned int invert_mask = (blendif >> 16) & (1 << DEVELOP_BLENDIF_BLUE_in);
196 _blendif_rgb_blue(pixels, mask, stride, parameters + DEVELOP_BLENDIF_PARAMETER_ITEMS * DEVELOP_BLENDIF_BLUE_in,
197 invert_mask);
198 }
199
200 if(blendif & ((1 << DEVELOP_BLENDIF_Jz_in) | (1 << DEVELOP_BLENDIF_Cz_in) | (1 << DEVELOP_BLENDIF_hz_in)))
201 {
202 const unsigned int invert_mask[3] DT_ALIGNED_PIXEL = {
203 (blendif >> 16) & (1 << DEVELOP_BLENDIF_Jz_in),
204 (blendif >> 16) & (1 << DEVELOP_BLENDIF_Cz_in),
205 (blendif >> 16) & (1 << DEVELOP_BLENDIF_hz_in),
206 };
207 _blendif_jzczhz(pixels, mask, stride, parameters + DEVELOP_BLENDIF_PARAMETER_ITEMS * DEVELOP_BLENDIF_Jz_in,
208 invert_mask, profile);
209 }
210 }
211
dt_develop_blendif_rgb_jzczhz_make_mask(struct dt_dev_pixelpipe_iop_t * piece,const float * const restrict a,const float * const restrict b,const struct dt_iop_roi_t * const roi_in,const struct dt_iop_roi_t * const roi_out,float * const restrict mask)212 void dt_develop_blendif_rgb_jzczhz_make_mask(struct dt_dev_pixelpipe_iop_t *piece,
213 const float *const restrict a,
214 const float *const restrict b,
215 const struct dt_iop_roi_t *const roi_in,
216 const struct dt_iop_roi_t *const roi_out,
217 float *const restrict mask)
218 {
219 const dt_develop_blend_params_t *const d = (const dt_develop_blend_params_t *const)piece->blendop_data;
220
221 if(piece->colors != DT_BLENDIF_RGB_CH) return;
222
223 const int xoffs = roi_out->x - roi_in->x;
224 const int yoffs = roi_out->y - roi_in->y;
225 const int iwidth = roi_in->width;
226 const int owidth = roi_out->width;
227 const int oheight = roi_out->height;
228
229 const unsigned int any_channel_active = d->blendif & DEVELOP_BLENDIF_RGB_MASK;
230 const unsigned int mask_inclusive = d->mask_combine & DEVELOP_COMBINE_INCL;
231 const unsigned int mask_inversed = d->mask_combine & DEVELOP_COMBINE_INV;
232
233 // invert the individual channels if the combine mode is inclusive
234 const unsigned int blendif = d->blendif ^ (mask_inclusive ? DEVELOP_BLENDIF_RGB_MASK << 16 : 0);
235
236 // a channel cancels the mask if the whole span is selected and the channel is inverted
237 const unsigned int canceling_channel = (blendif >> 16) & ~blendif & DEVELOP_BLENDIF_RGB_MASK;
238
239 const size_t buffsize = (size_t)owidth * oheight;
240
241 // get the clipped opacity value 0 - 1
242 const float global_opacity = clamp_simd(d->opacity / 100.0f);
243
244 if(!(d->mask_mode & DEVELOP_MASK_CONDITIONAL) || (!canceling_channel && !any_channel_active))
245 {
246 // mask is not conditional, invert the mask if required
247 if(mask_inversed)
248 {
249 #ifdef _OPENMP
250 #pragma omp parallel for simd default(none) dt_omp_firstprivate(mask, buffsize, global_opacity) schedule(static)
251 #endif
252 for(size_t x = 0; x < buffsize; x++) mask[x] = global_opacity * (1.0f - mask[x]);
253 }
254 else
255 {
256 dt_iop_image_mul_const(mask,global_opacity,owidth,oheight,1); // mask[k] *= global_opacity;
257 }
258 }
259 else if(canceling_channel || !any_channel_active)
260 {
261 // one of the conditional channel selects nothing
262 // this means that the conditional opacity of all pixels is the same
263 // and depends on whether the mask combination is inclusive and whether the mask is inverted
264 const float opac = ((mask_inversed == 0) ^ (mask_inclusive == 0)) ? global_opacity : 0.0f;
265 dt_iop_image_fill(mask,opac,owidth,oheight,1); // mask[k] = opac;
266 }
267 else
268 {
269 // we need to process all conditional channels
270
271 // parameters, for every channel the 4 limits + pre-computed increasing slope and decreasing slope
272 float parameters[DEVELOP_BLENDIF_PARAMETER_ITEMS * DEVELOP_BLENDIF_SIZE] DT_ALIGNED_ARRAY;
273 dt_develop_blendif_process_parameters(parameters, d);
274
275 dt_iop_order_iccprofile_info_t blend_profile;
276 if(!dt_develop_blendif_init_masking_profile(piece, &blend_profile, DEVELOP_BLEND_CS_RGB_SCENE))
277 {
278 return;
279 }
280 const dt_iop_order_iccprofile_info_t *profile = &blend_profile;
281
282 // allocate space for a temporary mask buffer to split the computation of every channel
283 float *const restrict temp_mask = dt_alloc_align_float(buffsize);
284 if(!temp_mask)
285 {
286 return;
287 }
288
289 #ifdef _OPENMP
290 #pragma omp parallel default(none) \
291 dt_omp_firstprivate(temp_mask, mask, a, b, oheight, owidth, iwidth, yoffs, xoffs, buffsize, \
292 blendif, profile, parameters, mask_inclusive, mask_inversed, global_opacity)
293 #endif
294 {
295 #ifdef __SSE2__
296 // flush denormals to zero to avoid performance penalty if there are a lot of zero values in the mask
297 const int oldMode = _MM_GET_FLUSH_ZERO_MODE();
298 _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
299 #endif
300
301 // initialize the parametric mask
302 #ifdef _OPENMP
303 #pragma omp for simd schedule(static) aligned(temp_mask:64)
304 #endif
305 for(size_t x = 0; x < buffsize; x++) temp_mask[x] = 1.0f;
306
307 // combine channels
308 #ifdef _OPENMP
309 #pragma omp for schedule(static)
310 #endif
311 for(size_t y = 0; y < oheight; y++)
312 {
313 const size_t start = ((y + yoffs) * iwidth + xoffs) * DT_BLENDIF_RGB_CH;
314 _blendif_combine_channels(a + start, temp_mask + (y * owidth), owidth, blendif, parameters, profile);
315 }
316 #ifdef _OPENMP
317 #pragma omp for schedule(static)
318 #endif
319 for(size_t y = 0; y < oheight; y++)
320 {
321 const size_t start = (y * owidth) * DT_BLENDIF_RGB_CH;
322 _blendif_combine_channels(b + start, temp_mask + (y * owidth), owidth, blendif >> DEVELOP_BLENDIF_GRAY_out,
323 parameters + DEVELOP_BLENDIF_PARAMETER_ITEMS * DEVELOP_BLENDIF_GRAY_out,
324 profile);
325 }
326
327 // apply global opacity
328 if(mask_inclusive)
329 {
330 if(mask_inversed)
331 {
332 #ifdef _OPENMP
333 #pragma omp for simd schedule(static) aligned(mask, temp_mask:64)
334 #endif
335 for(size_t x = 0; x < buffsize; x++) mask[x] = global_opacity * (1.0f - mask[x]) * temp_mask[x];
336 }
337 else
338 {
339 #ifdef _OPENMP
340 #pragma omp for simd schedule(static) aligned(mask, temp_mask:64)
341 #endif
342 for(size_t x = 0; x < buffsize; x++) mask[x] = global_opacity * (1.0f - (1.0f - mask[x]) * temp_mask[x]);
343 }
344 }
345 else
346 {
347 if(mask_inversed)
348 {
349 #ifdef _OPENMP
350 #pragma omp for simd schedule(static) aligned(mask, temp_mask:64)
351 #endif
352 for(size_t x = 0; x < buffsize; x++) mask[x] = global_opacity * (1.0f - mask[x] * temp_mask[x]);
353 }
354 else
355 {
356 #ifdef _OPENMP
357 #pragma omp for simd schedule(static) aligned(mask, temp_mask:64)
358 #endif
359 for(size_t x = 0; x < buffsize; x++) mask[x] = global_opacity * mask[x] * temp_mask[x];
360 }
361 }
362
363 #ifdef __SSE2__
364 _MM_SET_FLUSH_ZERO_MODE(oldMode);
365 #endif
366 }
367
368 dt_free_align(temp_mask);
369 }
370 }
371
372
373 /* normal blend without any clamping */
374 #ifdef _OPENMP
375 #pragma omp declare simd aligned(a, b, out:16) uniform(p, stride)
376 #endif
_blend_normal(const float * const restrict a,const float * const restrict b,const float p,float * const restrict out,const float * const restrict mask,const size_t stride)377 static void _blend_normal(const float *const restrict a, const float *const restrict b, const float p,
378 float *const restrict out, const float *const restrict mask, const size_t stride)
379 {
380 for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
381 {
382 const float local_opacity = mask[i];
383 for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++)
384 {
385 out[j + k] = a[j + k] * (1.0f - local_opacity) + b[j + k] * local_opacity;
386 }
387 out[j + DT_BLENDIF_RGB_BCH] = local_opacity;
388 }
389 }
390
391 /* multiply */
392 #ifdef _OPENMP
393 #pragma omp declare simd aligned(a, b, out:16) uniform(p, stride)
394 #endif
_blend_multiply(const float * const restrict a,const float * const restrict b,const float p,float * const restrict out,const float * const restrict mask,const size_t stride)395 static void _blend_multiply(const float *const restrict a, const float *const restrict b, const float p,
396 float *const restrict out, const float *const restrict mask, const size_t stride)
397 {
398 for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
399 {
400 const float local_opacity = mask[i];
401 for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++)
402 {
403 out[j + k] = a[j + k] * (1.0f - local_opacity) + (a[j + k] * b[j + k] * p) * local_opacity;
404 }
405 out[j + DT_BLENDIF_RGB_BCH] = local_opacity;
406 }
407 }
408
409 /* add */
410 #ifdef _OPENMP
411 #pragma omp declare simd aligned(a, b, out:16) uniform(p, stride)
412 #endif
_blend_add(const float * const restrict a,const float * const restrict b,const float p,float * const restrict out,const float * const restrict mask,const size_t stride)413 static void _blend_add(const float *const restrict a, const float *const restrict b, const float p,
414 float *const restrict out, const float *const restrict mask, const size_t stride)
415 {
416 for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
417 {
418 const float local_opacity = mask[i];
419 for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++)
420 {
421 out[j + k] = a[j + k] * (1.0f - local_opacity) + (a[j + k] + p * b[j + k]) * local_opacity;
422 }
423 out[j + DT_BLENDIF_RGB_BCH] = local_opacity;
424 }
425 }
426
427 /* subtract */
428 #ifdef _OPENMP
429 #pragma omp declare simd aligned(a, b, out:16) uniform(p, stride)
430 #endif
_blend_subtract(const float * const restrict a,const float * const restrict b,const float p,float * const restrict out,const float * const restrict mask,const size_t stride)431 static void _blend_subtract(const float *const restrict a, const float *const restrict b, const float p,
432 float *const restrict out, const float *const restrict mask, const size_t stride)
433 {
434 for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
435 {
436 const float local_opacity = mask[i];
437 for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++)
438 {
439 out[j + k] = a[j + k] * (1.0f - local_opacity) + fmaxf(a[j + k] - p * b[j + k], 0.0f) * local_opacity;
440 }
441 out[j + 3] = local_opacity;
442 }
443 }
444
445 /* subtract inverse */
446 #ifdef _OPENMP
447 #pragma omp declare simd aligned(a, b, out:16) uniform(p, stride)
448 #endif
_blend_subtract_inverse(const float * const restrict a,const float * const restrict b,const float p,float * const restrict out,const float * const restrict mask,const size_t stride)449 static void _blend_subtract_inverse(const float *const restrict a, const float *const restrict b, const float p,
450 float *const restrict out, const float *const restrict mask,
451 const size_t stride)
452 {
453 for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
454 {
455 const float local_opacity = mask[i];
456 for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++)
457 {
458 out[j + k] = a[j + k] * (1.0f - local_opacity) + fmaxf(b[j + k] - p * a[j + k], 0.0f) * local_opacity;
459 }
460 out[j + 3] = local_opacity;
461 }
462 }
463
464 /* difference */
465 #ifdef _OPENMP
466 #pragma omp declare simd aligned(a, b, out:16) uniform(p, stride)
467 #endif
_blend_difference(const float * const restrict a,const float * const restrict b,const float p,float * const restrict out,const float * const restrict mask,const size_t stride)468 static void _blend_difference(const float *const restrict a, const float *const restrict b, const float p,
469 float *const restrict out, const float *const restrict mask, const size_t stride)
470 {
471 for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
472 {
473 const float local_opacity = mask[i];
474 for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++)
475 {
476 out[j + k] = a[j + k] * (1.0f - local_opacity) + fabsf(a[j + k] - b[j + k]) * local_opacity;
477 }
478 out[j + DT_BLENDIF_RGB_BCH] = local_opacity;
479 }
480 }
481
482 /* divide */
483 #ifdef _OPENMP
484 #pragma omp declare simd aligned(a, b, out:16) uniform(p, stride)
485 #endif
_blend_divide(const float * const restrict a,const float * const restrict b,const float p,float * const restrict out,const float * const restrict mask,const size_t stride)486 static void _blend_divide(const float *const restrict a, const float *const restrict b, const float p,
487 float *const restrict out, const float *const restrict mask, const size_t stride)
488 {
489 for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
490 {
491 const float local_opacity = mask[i];
492 for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++)
493 {
494 out[j + k] = a[j + k] * (1.0f - local_opacity) + a[j + k] / fmaxf(p * b[j + k], 1e-6f) * local_opacity;
495 }
496 out[j + DT_BLENDIF_RGB_BCH] = local_opacity;
497 }
498 }
499
500 /* divide inverse */
501 #ifdef _OPENMP
502 #pragma omp declare simd aligned(a, b, out:16) uniform(p, stride)
503 #endif
_blend_divide_inverse(const float * const restrict a,const float * const restrict b,const float p,float * const restrict out,const float * const restrict mask,const size_t stride)504 static void _blend_divide_inverse(const float *const restrict a, const float *const restrict b, const float p,
505 float *const restrict out, const float *const restrict mask, const size_t stride)
506 {
507 for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
508 {
509 const float local_opacity = mask[i];
510 for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++)
511 {
512 out[j + k] = a[j + k] * (1.0f - local_opacity) + b[j + k] / fmaxf(p * a[j + k], 1e-6f) * local_opacity;
513 }
514 out[j + DT_BLENDIF_RGB_BCH] = local_opacity;
515 }
516 }
517
518 /* average */
519 #ifdef _OPENMP
520 #pragma omp declare simd aligned(a, b, out:16) uniform(p, stride)
521 #endif
_blend_average(const float * const restrict a,const float * const restrict b,const float p,float * const restrict out,const float * const restrict mask,const size_t stride)522 static void _blend_average(const float *const restrict a, const float *const restrict b, const float p,
523 float *const restrict out, const float *const restrict mask, const size_t stride)
524 {
525 for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
526 {
527 const float local_opacity = mask[i];
528 for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++)
529 {
530 out[j + k] = a[j + k] * (1.0f - local_opacity) + (a[j + k] + b[j + k]) / 2.0f * local_opacity;
531 }
532 out[j + DT_BLENDIF_RGB_BCH] = local_opacity;
533 }
534 }
535
536 /* geometric mean */
537 #ifdef _OPENMP
538 #pragma omp declare simd aligned(a, b, out:16) uniform(p, stride)
539 #endif
_blend_geometric_mean(const float * const restrict a,const float * const restrict b,const float p,float * const restrict out,const float * const restrict mask,const size_t stride)540 static void _blend_geometric_mean(const float *const restrict a, const float *const restrict b, const float p,
541 float *const restrict out, const float *const restrict mask, const size_t stride)
542 {
543 for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
544 {
545 const float local_opacity = mask[i];
546 for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++)
547 {
548 out[j + k] = a[j + k] * (1.0f - local_opacity) + sqrtf(fmax(a[j + k] * b[j + k], 0.0f)) * local_opacity;
549 }
550 out[j + DT_BLENDIF_RGB_BCH] = local_opacity;
551 }
552 }
553
554 /* harmonic mean */
555 #ifdef _OPENMP
556 #pragma omp declare simd aligned(a, b, out:16) uniform(p, stride)
557 #endif
_blend_harmonic_mean(const float * const restrict a,const float * const restrict b,const float p,float * const restrict out,const float * const restrict mask,const size_t stride)558 static void _blend_harmonic_mean(const float *const restrict a, const float *const restrict b, const float p,
559 float *const restrict out, const float *const restrict mask, const size_t stride)
560 {
561 for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
562 {
563 const float local_opacity = mask[i];
564 for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++)
565 {
566 // consider that pixel values should be positive
567 out[j + k] = a[j + k] * (1.0f - local_opacity)
568 + 2.0f * a[j + k] * b[j + k] / (fmaxf(a[j + k], 5e-7f) + fmaxf(b[j + k], 5e-7f)) * local_opacity;
569 }
570 out[j + DT_BLENDIF_RGB_BCH] = local_opacity;
571 }
572 }
573
574 /* chromaticity */
575 #ifdef _OPENMP
576 #pragma omp declare simd aligned(a, b, out:16) uniform(p, stride)
577 #endif
_blend_chromaticity(const float * const restrict a,const float * const restrict b,const float p,float * const restrict out,const float * const restrict mask,const size_t stride)578 static void _blend_chromaticity(const float *const restrict a, const float *const restrict b, const float p,
579 float *const restrict out, const float *const restrict mask, const size_t stride)
580 {
581 for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
582 {
583 const float local_opacity = mask[i];
584 const float norm_a = fmax(sqrtf(sqf(a[j]) + sqf(a[j + 1]) + sqf(a[j + 2])), 1e-6f);
585 const float norm_b = fmax(sqrtf(sqf(b[j]) + sqf(b[j + 1]) + sqf(b[j + 2])), 1e-6f);
586 for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++)
587 {
588 out[j + k] = a[j + k] * (1.0f - local_opacity) + b[j + k] * norm_a / norm_b * local_opacity;
589 }
590 out[j + DT_BLENDIF_RGB_BCH] = local_opacity;
591 }
592 }
593
594 /* luminance */
595 #ifdef _OPENMP
596 #pragma omp declare simd aligned(a, b, out:16) uniform(p, stride)
597 #endif
_blend_luminance(const float * const restrict a,const float * const restrict b,const float p,float * const restrict out,const float * const restrict mask,const size_t stride)598 static void _blend_luminance(const float *const restrict a, const float *const restrict b, const float p,
599 float *const restrict out, const float *const restrict mask, const size_t stride)
600 {
601 for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
602 {
603 const float local_opacity = mask[i];
604 const float norm_a = fmax(sqrtf(sqf(a[j]) + sqf(a[j + 1]) + sqf(a[j + 2])), 1e-6f);
605 const float norm_b = fmax(sqrtf(sqf(b[j]) + sqf(b[j + 1]) + sqf(b[j + 2])), 1e-6f);
606 for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++)
607 {
608 out[j + k] = a[j + k] * (1.0f - local_opacity) + a[j + k] * norm_b / norm_a * local_opacity;
609 }
610 out[j + DT_BLENDIF_RGB_BCH] = local_opacity;
611 }
612 }
613
614 /* blend only R-channel in RGB color space without any clamping */
615 #ifdef _OPENMP
616 #pragma omp declare simd aligned(a, b, out:16) uniform(p, stride)
617 #endif
_blend_RGB_R(const float * const restrict a,const float * const restrict b,const float p,float * const restrict out,const float * const restrict mask,const size_t stride)618 static void _blend_RGB_R(const float *const restrict a, const float *const restrict b, const float p,
619 float *const restrict out, const float *const restrict mask, const size_t stride)
620 {
621 for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
622 {
623 const float local_opacity = mask[i];
624 out[j + 0] = a[j + 0] * (1.0f - local_opacity) + p * b[j + 0] * local_opacity;
625 out[j + 1] = a[j + 1];
626 out[j + 2] = a[j + 2];
627 out[j + 3] = local_opacity;
628 }
629 }
630
631 /* blend only R-channel in RGB color space without any clamping */
632 #ifdef _OPENMP
633 #pragma omp declare simd aligned(a, b, out:16) uniform(p, stride)
634 #endif
_blend_RGB_G(const float * const restrict a,const float * const restrict b,const float p,float * const restrict out,const float * const restrict mask,const size_t stride)635 static void _blend_RGB_G(const float *const restrict a, const float *const restrict b, const float p,
636 float *const restrict out, const float *const restrict mask, const size_t stride)
637 {
638 for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
639 {
640 const float local_opacity = mask[i];
641 out[j + 0] = a[j + 0];
642 out[j + 1] = a[j + 1] * (1.0f - local_opacity) + p * b[j + 1] * local_opacity;
643 out[j + 2] = a[j + 2];
644 out[j + 3] = local_opacity;
645 }
646 }
647
648 /* blend only R-channel in RGB color space without any clamping */
649 #ifdef _OPENMP
650 #pragma omp declare simd aligned(a, b, out:16) uniform(p, stride)
651 #endif
_blend_RGB_B(const float * const restrict a,const float * const restrict b,const float p,float * const restrict out,const float * const restrict mask,const size_t stride)652 static void _blend_RGB_B(const float *const restrict a, const float *const restrict b, const float p,
653 float *const restrict out, const float *const restrict mask, const size_t stride)
654 {
655 for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
656 {
657 const float local_opacity = mask[i];
658 out[j + 0] = a[j + 0];
659 out[j + 1] = a[j + 1];
660 out[j + 2] = a[j + 2] * (1.0f - local_opacity) + p * b[j + 2] * local_opacity;
661 out[j + 3] = local_opacity;
662 }
663 }
664
665
_choose_blend_func(const unsigned int blend_mode)666 static _blend_row_func *_choose_blend_func(const unsigned int blend_mode)
667 {
668 _blend_row_func *blend = NULL;
669
670 /* select the blend operator */
671 switch(blend_mode & DEVELOP_BLEND_MODE_MASK)
672 {
673 case DEVELOP_BLEND_MULTIPLY:
674 blend = _blend_multiply;
675 break;
676 case DEVELOP_BLEND_AVERAGE:
677 blend = _blend_average;
678 break;
679 case DEVELOP_BLEND_ADD:
680 blend = _blend_add;
681 break;
682 case DEVELOP_BLEND_SUBTRACT:
683 blend = _blend_subtract;
684 break;
685 case DEVELOP_BLEND_SUBTRACT_INVERSE:
686 blend = _blend_subtract_inverse;
687 break;
688 case DEVELOP_BLEND_DIFFERENCE:
689 case DEVELOP_BLEND_DIFFERENCE2:
690 blend = _blend_difference;
691 break;
692 case DEVELOP_BLEND_DIVIDE:
693 blend = _blend_divide;
694 break;
695 case DEVELOP_BLEND_DIVIDE_INVERSE:
696 blend = _blend_divide_inverse;
697 break;
698 case DEVELOP_BLEND_LIGHTNESS:
699 blend = _blend_luminance;
700 break;
701 case DEVELOP_BLEND_CHROMATICITY:
702 blend = _blend_chromaticity;
703 break;
704 case DEVELOP_BLEND_RGB_R:
705 blend = _blend_RGB_R;
706 break;
707 case DEVELOP_BLEND_RGB_G:
708 blend = _blend_RGB_G;
709 break;
710 case DEVELOP_BLEND_RGB_B:
711 blend = _blend_RGB_B;
712 break;
713 case DEVELOP_BLEND_GEOMETRIC_MEAN:
714 blend = _blend_geometric_mean;
715 break;
716 case DEVELOP_BLEND_HARMONIC_MEAN:
717 blend = _blend_harmonic_mean;
718 break;
719
720 /* fallback to normal blend */
721 default:
722 blend = _blend_normal;
723 break;
724 }
725
726 return blend;
727 }
728
729
730 #ifdef _OPENMP
731 #pragma omp declare simd aligned(rgb: 16) uniform(profile)
732 #endif
_rgb_luminance(const float * const restrict rgb,const dt_iop_order_iccprofile_info_t * const restrict profile)733 static inline float _rgb_luminance(const float *const restrict rgb,
734 const dt_iop_order_iccprofile_info_t *const restrict profile)
735 {
736 float value = 0.0f;
737 if(profile)
738 value = dt_ioppr_get_rgb_matrix_luminance(rgb, profile->matrix_in, profile->lut_in,
739 profile->unbounded_coeffs_in, profile->lutsize,
740 profile->nonlinearlut);
741 else
742 value = 0.3f * rgb[0] + 0.59f * rgb[1] + 0.11f * rgb[2];
743 return value;
744 }
745
746 #ifdef _OPENMP
747 #pragma omp declare simd aligned(rgb, JzCzhz: 16) uniform(profile)
748 #endif
_rgb_to_JzCzhz(const float * const restrict rgb,float * const restrict JzCzhz,const dt_iop_order_iccprofile_info_t * const restrict profile)749 static inline void _rgb_to_JzCzhz(const float *const restrict rgb, float *const restrict JzCzhz,
750 const dt_iop_order_iccprofile_info_t *const restrict profile)
751 {
752 float JzAzBz[3] DT_ALIGNED_PIXEL = { 0.0f, 0.0f, 0.0f };
753
754 if(profile)
755 {
756 float XYZ_D65[3] DT_ALIGNED_PIXEL = { 0.0f, 0.0f, 0.0f };
757 // use the matrix_out of the hacked profile for blending to use the
758 // conversion from RGB to XYZ D65 (instead of XYZ D50)
759 dt_ioppr_rgb_matrix_to_xyz(rgb, XYZ_D65, profile->matrix_out, profile->lut_in, profile->unbounded_coeffs_in,
760 profile->lutsize, profile->nonlinearlut);
761 dt_XYZ_2_JzAzBz(XYZ_D65, JzAzBz);
762 }
763 else
764 {
765 // This should not happen (we don't know what RGB is), but use this when profile is not defined
766 dt_XYZ_2_JzAzBz(rgb, JzAzBz);
767 }
768
769 dt_JzAzBz_2_JzCzhz(JzAzBz, JzCzhz);
770 }
771
772
773 #ifdef _OPENMP
774 #pragma omp declare simd aligned(a, b:16) uniform(channel, profile, stride)
775 #endif
_display_channel(const float * const restrict a,float * const restrict b,const float * const restrict mask,const size_t stride,const int channel,const float * const restrict boost_factors,const dt_iop_order_iccprofile_info_t * const profile)776 static void _display_channel(const float *const restrict a, float *const restrict b,
777 const float *const restrict mask, const size_t stride, const int channel,
778 const float *const restrict boost_factors,
779 const dt_iop_order_iccprofile_info_t *const profile)
780 {
781 switch(channel)
782 {
783 case DT_DEV_PIXELPIPE_DISPLAY_R:
784 {
785 const float factor = 1.0f / exp2f(boost_factors[DEVELOP_BLENDIF_RED_in]);
786 for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
787 {
788 const float c = clamp_simd(a[j + 0] * factor);
789 for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++) b[j + k] = c;
790 b[j + DT_BLENDIF_RGB_BCH] = mask[i];
791 }
792 break;
793 }
794 case DT_DEV_PIXELPIPE_DISPLAY_R | DT_DEV_PIXELPIPE_DISPLAY_OUTPUT:
795 {
796 const float factor = 1.0f / exp2f(boost_factors[DEVELOP_BLENDIF_RED_out]);
797 for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
798 {
799 const float c = clamp_simd(b[j + 0] * factor);
800 for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++) b[j + k] = c;
801 b[j + DT_BLENDIF_RGB_BCH] = mask[i];
802 }
803 break;
804 }
805 case DT_DEV_PIXELPIPE_DISPLAY_G:
806 {
807 const float factor = 1.0f / exp2f(boost_factors[DEVELOP_BLENDIF_GREEN_in]);
808 for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
809 {
810 const float c = clamp_simd(a[j + 1] * factor);
811 for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++) b[j + k] = c;
812 b[j + DT_BLENDIF_RGB_BCH] = mask[i];
813 }
814 break;
815 }
816 case DT_DEV_PIXELPIPE_DISPLAY_G | DT_DEV_PIXELPIPE_DISPLAY_OUTPUT:
817 {
818 const float factor = 1.0f / exp2f(boost_factors[DEVELOP_BLENDIF_GREEN_out]);
819 for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
820 {
821 const float c = clamp_simd(b[j + 1] * factor);
822 for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++) b[j + k] = c;
823 b[j + DT_BLENDIF_RGB_BCH] = mask[i];
824 }
825 break;
826 }
827 case DT_DEV_PIXELPIPE_DISPLAY_B:
828 {
829 const float factor = 1.0f / exp2f(boost_factors[DEVELOP_BLENDIF_BLUE_in]);
830 for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
831 {
832 const float c = clamp_simd(a[j + 2] * factor);
833 for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++) b[j + k] = c;
834 b[j + DT_BLENDIF_RGB_BCH] = mask[i];
835 }
836 break;
837 }
838 case DT_DEV_PIXELPIPE_DISPLAY_B | DT_DEV_PIXELPIPE_DISPLAY_OUTPUT:
839 {
840 const float factor = 1.0f / exp2f(boost_factors[DEVELOP_BLENDIF_BLUE_out]);
841 for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
842 {
843 const float c = clamp_simd(b[j + 2] * factor);
844 for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++) b[j + k] = c;
845 b[j + DT_BLENDIF_RGB_BCH] = mask[i];
846 }
847 break;
848 }
849 case DT_DEV_PIXELPIPE_DISPLAY_GRAY:
850 {
851 const float factor = 1.0f / exp2f(boost_factors[DEVELOP_BLENDIF_GRAY_in]);
852 for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
853 {
854 const float c = clamp_simd(_rgb_luminance(a + j, profile) * factor);
855 for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++) b[j + k] = c;
856 b[j + DT_BLENDIF_RGB_BCH] = mask[i];
857 }
858 break;
859 }
860 case DT_DEV_PIXELPIPE_DISPLAY_GRAY | DT_DEV_PIXELPIPE_DISPLAY_OUTPUT:
861 {
862 const float factor = 1.0f / exp2f(boost_factors[DEVELOP_BLENDIF_GRAY_out]);
863 for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
864 {
865 const float c = clamp_simd(_rgb_luminance(b + j, profile) * factor);
866 for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++) b[j + k] = c;
867 b[j + DT_BLENDIF_RGB_BCH] = mask[i];
868 }
869 break;
870 }
871 case DT_DEV_PIXELPIPE_DISPLAY_JzCzhz_Jz:
872 {
873 const float factor = 1.0f / exp2f(boost_factors[DEVELOP_BLENDIF_Jz_in]);
874 for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
875 {
876 float JzCzhz[3] DT_ALIGNED_PIXEL;
877 _rgb_to_JzCzhz(a + j, JzCzhz, profile);
878 const float c = clamp_simd(JzCzhz[0] * factor);
879 for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++) b[j + k] = c;
880 b[j + DT_BLENDIF_RGB_BCH] = mask[i];
881 }
882 break;
883 }
884 case DT_DEV_PIXELPIPE_DISPLAY_JzCzhz_Jz | DT_DEV_PIXELPIPE_DISPLAY_OUTPUT:
885 {
886 const float factor = 1.0f / exp2f(boost_factors[DEVELOP_BLENDIF_Jz_out]);
887 for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
888 {
889 float JzCzhz[3] DT_ALIGNED_PIXEL;
890 _rgb_to_JzCzhz(b + j, JzCzhz, profile);
891 const float c = clamp_simd(JzCzhz[0] * factor);
892 for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++) b[j + k] = c;
893 b[j + DT_BLENDIF_RGB_BCH] = mask[i];
894 }
895 break;
896 }
897 case DT_DEV_PIXELPIPE_DISPLAY_JzCzhz_Cz:
898 {
899 const float factor = 1.0f / exp2f(boost_factors[DEVELOP_BLENDIF_Cz_in]);
900 for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
901 {
902 float JzCzhz[3] DT_ALIGNED_PIXEL;
903 _rgb_to_JzCzhz(a + j, JzCzhz, profile);
904 const float c = clamp_simd(JzCzhz[1] * factor);
905 for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++) b[j + k] = c;
906 b[j + DT_BLENDIF_RGB_BCH] = mask[i];
907 }
908 break;
909 }
910 case DT_DEV_PIXELPIPE_DISPLAY_JzCzhz_Cz | DT_DEV_PIXELPIPE_DISPLAY_OUTPUT:
911 {
912 const float factor = 1.0f / exp2f(boost_factors[DEVELOP_BLENDIF_Cz_out]);
913 for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
914 {
915 float JzCzhz[3] DT_ALIGNED_PIXEL;
916 _rgb_to_JzCzhz(b + j, JzCzhz, profile);
917 const float c = clamp_simd(JzCzhz[1] * factor);
918 for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++) b[j + k] = c;
919 b[j + DT_BLENDIF_RGB_BCH] = mask[i];
920 }
921 break;
922 }
923 case DT_DEV_PIXELPIPE_DISPLAY_JzCzhz_hz:
924 // no boost factor for hues
925 for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
926 {
927 float JzCzhz[3] DT_ALIGNED_PIXEL;
928 _rgb_to_JzCzhz(a + j, JzCzhz, profile);
929 const float c = clamp_simd(JzCzhz[2]);
930 for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++) b[j + k] = c;
931 b[j + DT_BLENDIF_RGB_BCH] = mask[i];
932 }
933 break;
934 case DT_DEV_PIXELPIPE_DISPLAY_JzCzhz_hz | DT_DEV_PIXELPIPE_DISPLAY_OUTPUT:
935 // no boost factor for hues
936 for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
937 {
938 float JzCzhz[3] DT_ALIGNED_PIXEL;
939 _rgb_to_JzCzhz(b + j, JzCzhz, profile);
940 const float c = clamp_simd(JzCzhz[2]);
941 for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++) b[j + k] = c;
942 b[j + DT_BLENDIF_RGB_BCH] = mask[i];
943 }
944 break;
945 default:
946 for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
947 {
948 for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++) b[j + k] = 0.0f;
949 b[j + DT_BLENDIF_RGB_BCH] = mask[i];
950 }
951 break;
952 }
953 }
954
955
956 #ifdef _OPENMP
957 #pragma omp declare simd aligned(a, b:16) uniform(stride)
958 #endif
_copy_mask(const float * const restrict a,float * const restrict b,const size_t stride)959 static inline void _copy_mask(const float *const restrict a, float *const restrict b, const size_t stride)
960 {
961 #ifdef _OPENMP
962 #pragma omp simd aligned(a, b: 16)
963 #endif
964 for(size_t x = DT_BLENDIF_RGB_BCH; x < stride; x += DT_BLENDIF_RGB_CH) b[x] = a[x];
965 }
966
dt_develop_blendif_rgb_jzczhz_blend(struct dt_dev_pixelpipe_iop_t * piece,const float * const restrict a,float * const restrict b,const struct dt_iop_roi_t * const roi_in,const struct dt_iop_roi_t * const roi_out,const float * const restrict mask,const dt_dev_pixelpipe_display_mask_t request_mask_display)967 void dt_develop_blendif_rgb_jzczhz_blend(struct dt_dev_pixelpipe_iop_t *piece, const float *const restrict a,
968 float *const restrict b, const struct dt_iop_roi_t *const roi_in,
969 const struct dt_iop_roi_t *const roi_out, const float *const restrict mask,
970 const dt_dev_pixelpipe_display_mask_t request_mask_display)
971 {
972 const dt_develop_blend_params_t *const d = (const dt_develop_blend_params_t *const)piece->blendop_data;
973
974 if(piece->colors != DT_BLENDIF_RGB_CH) return;
975
976 const int xoffs = roi_out->x - roi_in->x;
977 const int yoffs = roi_out->y - roi_in->y;
978 const int iwidth = roi_in->width;
979 const int owidth = roi_out->width;
980 const int oheight = roi_out->height;
981
982 // only non-zero if mask_display was set by an _earlier_ module
983 const dt_dev_pixelpipe_display_mask_t mask_display = piece->pipe->mask_display;
984
985 // process the blending operator
986 if(request_mask_display & DT_DEV_PIXELPIPE_DISPLAY_ANY)
987 {
988 dt_iop_order_iccprofile_info_t blend_profile;
989 const int use_profile = dt_develop_blendif_init_masking_profile(piece, &blend_profile,
990 DEVELOP_BLEND_CS_RGB_SCENE);
991 const dt_iop_order_iccprofile_info_t *profile = use_profile ? &blend_profile : NULL;
992 const float *const restrict boost_factors = d->blendif_boost_factors;
993 const dt_dev_pixelpipe_display_mask_t channel = request_mask_display & DT_DEV_PIXELPIPE_DISPLAY_ANY;
994
995 #ifdef _OPENMP
996 #pragma omp parallel for schedule(static) default(none) \
997 dt_omp_firstprivate(a, b, mask, channel, oheight, owidth, iwidth, xoffs, yoffs, boost_factors, profile)
998 #endif
999 for(size_t y = 0; y < oheight; y++)
1000 {
1001 const size_t a_start = ((y + yoffs) * iwidth + xoffs) * DT_BLENDIF_RGB_CH;
1002 const size_t b_start = y * owidth * DT_BLENDIF_RGB_CH;
1003 const size_t m_start = y * owidth;
1004 _display_channel(a + a_start, b + b_start, mask + m_start, owidth, channel, boost_factors, profile);
1005 }
1006 }
1007 else
1008 {
1009 const float p = exp2f(d->blend_parameter);
1010 _blend_row_func *const blend = _choose_blend_func(d->blend_mode);
1011
1012 float *tmp_buffer = dt_alloc_align_float(owidth * oheight * DT_BLENDIF_RGB_CH);
1013 if (tmp_buffer != NULL)
1014 {
1015 dt_iop_image_copy(tmp_buffer, b, owidth * oheight * DT_BLENDIF_RGB_CH);
1016 if((d->blend_mode & DEVELOP_BLEND_REVERSE) == DEVELOP_BLEND_REVERSE)
1017 {
1018 #ifdef _OPENMP
1019 #pragma omp parallel for schedule(static) default(none) \
1020 dt_omp_firstprivate(a, b, tmp_buffer, mask, blend, oheight, owidth, iwidth, xoffs, yoffs, p)
1021 #endif
1022 for(size_t y = 0; y < oheight; y++)
1023 {
1024 const size_t a_start = ((y + yoffs) * iwidth + xoffs) * DT_BLENDIF_RGB_CH;
1025 const size_t b_start = y * owidth * DT_BLENDIF_RGB_CH;
1026 const size_t m_start = y * owidth;
1027 blend(tmp_buffer + b_start, a + a_start, p, b + b_start, mask + m_start, owidth);
1028 }
1029 }
1030 else
1031 {
1032 #ifdef _OPENMP
1033 #pragma omp parallel for schedule(static) default(none) \
1034 dt_omp_firstprivate(a, b, tmp_buffer, mask, blend, oheight, owidth, iwidth, xoffs, yoffs, p)
1035 #endif
1036 for(size_t y = 0; y < oheight; y++)
1037 {
1038 const size_t a_start = ((y + yoffs) * iwidth + xoffs) * DT_BLENDIF_RGB_CH;
1039 const size_t b_start = y * owidth * DT_BLENDIF_RGB_CH;
1040 const size_t m_start = y * owidth;
1041 blend(a + a_start, tmp_buffer + b_start, p, b + b_start, mask + m_start, owidth);
1042 }
1043 }
1044 dt_free_align(tmp_buffer);
1045 }
1046 }
1047
1048 if(mask_display & DT_DEV_PIXELPIPE_DISPLAY_MASK)
1049 {
1050 const size_t stride = owidth * DT_BLENDIF_RGB_CH;
1051 #ifdef _OPENMP
1052 #pragma omp parallel for schedule(static) default(none) \
1053 dt_omp_firstprivate(a, b, oheight, stride, iwidth, xoffs, yoffs)
1054 #endif
1055 for(size_t y = 0; y < oheight; y++)
1056 {
1057 const size_t a_start = ((y + yoffs) * iwidth + xoffs) * DT_BLENDIF_RGB_CH;
1058 const size_t b_start = y * stride;
1059 _copy_mask(a + a_start, b + b_start, stride);
1060 }
1061 }
1062 }
1063
1064 // tools/update_modelines.sh
1065 // remove-trailing-space on;
1066 // modelines: These editor modelines have been set for all relevant files by tools/update_modelines.sh
1067 // vim: shiftwidth=2 expandtab tabstop=2 cindent
1068 // kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
1069