1 /*
2     This file is part of darktable,
3     Copyright (C) 2011-2021 darktable developers.
4 
5     darktable is free software: you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation, either version 3 of the License, or
8     (at your option) any later version.
9 
10     darktable is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14 
15     You should have received a copy of the GNU General Public License
16     along with darktable.  If not, see <http://www.gnu.org/licenses/>.
17 */
18 
19 #if defined(__GNUC__)
20 #pragma GCC optimize("unroll-loops", "tree-loop-if-convert", "tree-loop-distribution", "no-strict-aliasing",      \
21                      "loop-interchange",  "tree-loop-im", "unswitch-loops",                  \
22                      "tree-loop-ivcanon", "ira-loop-pressure", "split-ivs-in-unroller", "tree-loop-vectorize",    \
23                      "variable-expansion-in-unroller", "split-loops", "ivopts", "predictive-commoning",           \
24                         "finite-math-only", "fp-contract=fast", \
25                      "fast-math", "no-math-errno")
26 #endif
27 
28 #include "common/colorspaces_inline_conversions.h"
29 #include "common/imagebuf.h"
30 #include "develop/blend.h"
31 #include "develop/imageop.h"
32 #include "develop/openmp_maths.h"
33 #include <math.h>
34 
35 #define DT_BLENDIF_RGB_CH 4
36 #define DT_BLENDIF_RGB_BCH 3
37 
38 
39 typedef void(_blend_row_func)(const float *const restrict a, const float *const restrict b, const float p,
40                               float *const restrict out, const float *const restrict mask, const size_t stride);
41 
42 
43 #ifdef _OPENMP
44 #pragma omp declare simd uniform(parameters, invert_mask)
45 #endif
_blendif_compute_factor(const float value,const unsigned int invert_mask,const float * const restrict parameters)46 static inline float _blendif_compute_factor(const float value, const unsigned int invert_mask,
47                                             const float *const restrict parameters)
48 {
49   float factor = 0.0f;
50   if(value <= parameters[0])
51   {
52    // we are below the keyframe
53    factor = 0.0f;
54   }
55   else if(value < parameters[1])
56   {
57    // we are on the bottom slope of the keyframe
58    factor = (value - parameters[0]) * parameters[4];
59   }
60   else if(value <= parameters[2])
61   {
62    // we are on the ramp - constant part - of the keyframe
63    factor = 1.0f;
64   }
65   else if(value < parameters[3])
66   {
67    // we are on the top slope of the keyframe
68    factor = 1.0f - (value - parameters[2]) * parameters[5];
69   }
70   else
71   {
72    // we are above the keyframe
73    factor = 0.0f;
74   }
75   return invert_mask ? 1.0f - factor : factor; // inverted channel?
76 }
77 
78 #ifdef _OPENMP
79 #pragma omp declare simd aligned(pixels: 16) uniform(parameters, invert_mask, stride, profile)
80 #endif
_blendif_gray(const float * const restrict pixels,float * const restrict mask,const size_t stride,const float * const restrict parameters,const unsigned int invert_mask,const dt_iop_order_iccprofile_info_t * const restrict profile)81 static inline void _blendif_gray(const float *const restrict pixels, float *const restrict mask,
82                                  const size_t stride, const float *const restrict parameters,
83                                  const unsigned int invert_mask,
84                                  const dt_iop_order_iccprofile_info_t *const restrict profile)
85 {
86   for(size_t x = 0, j = 0; x < stride; x++, j += DT_BLENDIF_RGB_CH)
87   {
88     const float value = dt_ioppr_get_rgb_matrix_luminance(pixels + j, profile->matrix_in, profile->lut_in,
89                                                           profile->unbounded_coeffs_in, profile->lutsize,
90                                                           profile->nonlinearlut);
91     mask[x] *= _blendif_compute_factor(value, invert_mask, parameters);
92   }
93 }
94 
95 #ifdef _OPENMP
96 #pragma omp declare simd aligned(pixels: 16) uniform(parameters, invert_mask, stride)
97 #endif
_blendif_rgb_red(const float * const restrict pixels,float * const restrict mask,const size_t stride,const float * const restrict parameters,const unsigned int invert_mask)98 static inline void _blendif_rgb_red(const float *const restrict pixels, float *const restrict mask,
99                                     const size_t stride, const float *const restrict parameters,
100                                     const unsigned int invert_mask)
101 {
102   for(size_t x = 0, j = 0; x < stride; x++, j += DT_BLENDIF_RGB_CH)
103   {
104     mask[x] *= _blendif_compute_factor(pixels[j + 0], invert_mask, parameters);
105   }
106 }
107 
108 #ifdef _OPENMP
109 #pragma omp declare simd aligned(pixels: 16) uniform(parameters, invert_mask, stride)
110 #endif
_blendif_rgb_green(const float * const restrict pixels,float * const restrict mask,const size_t stride,const float * const restrict parameters,const unsigned int invert_mask)111 static inline void _blendif_rgb_green(const float *const restrict pixels, float *const restrict mask,
112                                       const size_t stride, const float *const restrict parameters,
113                                       const unsigned int invert_mask)
114 {
115   for(size_t x = 0, j = 0; x < stride; x++, j += DT_BLENDIF_RGB_CH)
116   {
117     mask[x] *= _blendif_compute_factor(pixels[j + 1], invert_mask, parameters);
118   }
119 }
120 
121 #ifdef _OPENMP
122 #pragma omp declare simd aligned(pixels: 16) uniform(parameters, invert_mask, stride)
123 #endif
_blendif_rgb_blue(const float * const restrict pixels,float * const restrict mask,const size_t stride,const float * const restrict parameters,const unsigned int invert_mask)124 static inline void _blendif_rgb_blue(const float *const restrict pixels, float *const restrict mask,
125                                      const size_t stride, const float *const restrict parameters,
126                                      const unsigned int invert_mask)
127 {
128   for(size_t x = 0, j = 0; x < stride; x++, j += DT_BLENDIF_RGB_CH)
129   {
130     mask[x] *= _blendif_compute_factor(pixels[j + 2], invert_mask, parameters);
131   }
132 }
133 
134 #ifdef _OPENMP
135 #pragma omp declare simd aligned(pixels, invert_mask: 16) uniform(parameters, invert_mask, stride, profile)
136 #endif
_blendif_jzczhz(const float * const restrict pixels,float * const restrict mask,const size_t stride,const float * const restrict parameters,const unsigned int * const restrict invert_mask,const dt_iop_order_iccprofile_info_t * const restrict profile)137 static inline void _blendif_jzczhz(const float *const restrict pixels, float *const restrict mask,
138                                    const size_t stride, const float *const restrict parameters,
139                                    const unsigned int *const restrict invert_mask,
140                                    const dt_iop_order_iccprofile_info_t *const restrict profile)
141 {
142   for(size_t x = 0, j = 0; x < stride; x++, j += DT_BLENDIF_RGB_CH)
143   {
144     float XYZ_D65[3] DT_ALIGNED_PIXEL;
145     float JzAzBz[3] DT_ALIGNED_PIXEL;
146     float JzCzhz[3] DT_ALIGNED_PIXEL;
147 
148     // use the matrix_out of the hacked profile for blending to use the
149     // conversion from RGB to XYZ D65 (instead of XYZ D50)
150     dt_ioppr_rgb_matrix_to_xyz(pixels + j, XYZ_D65, profile->matrix_out, profile->lut_in,
151                                profile->unbounded_coeffs_in, profile->lutsize, profile->nonlinearlut);
152 
153     dt_XYZ_2_JzAzBz(XYZ_D65, JzAzBz);
154     dt_JzAzBz_2_JzCzhz(JzAzBz, JzCzhz);
155 
156     float factor = 1.0f;
157     for(size_t i = 0; i < 3; i++)
158       factor *= _blendif_compute_factor(JzCzhz[i], invert_mask[i],
159                                         parameters + DEVELOP_BLENDIF_PARAMETER_ITEMS * i);
160     mask[x] *= factor;
161   }
162 }
163 
164 #ifdef _OPENMP
165 #pragma omp declare simd aligned(pixels: 16) uniform(stride, blendif, parameters, profile)
166 #endif
_blendif_combine_channels(const float * const restrict pixels,float * const restrict mask,const size_t stride,const unsigned int blendif,const float * const restrict parameters,const dt_iop_order_iccprofile_info_t * const restrict profile)167 static void _blendif_combine_channels(const float *const restrict pixels, float *const restrict mask,
168                                       const size_t stride, const unsigned int blendif,
169                                       const float *const restrict parameters,
170                                       const dt_iop_order_iccprofile_info_t *const restrict profile)
171 {
172   if(blendif & (1 << DEVELOP_BLENDIF_GRAY_in))
173   {
174     const unsigned int invert_mask = (blendif >> 16) & (1 << DEVELOP_BLENDIF_GRAY_in);
175     _blendif_gray(pixels, mask, stride, parameters + DEVELOP_BLENDIF_PARAMETER_ITEMS * DEVELOP_BLENDIF_GRAY_in,
176                   invert_mask, profile);
177   }
178 
179   if(blendif & (1 << DEVELOP_BLENDIF_RED_in))
180   {
181     const unsigned int invert_mask = (blendif >> 16) & (1 << DEVELOP_BLENDIF_RED_in);
182     _blendif_rgb_red(pixels, mask, stride, parameters + DEVELOP_BLENDIF_PARAMETER_ITEMS * DEVELOP_BLENDIF_RED_in,
183                      invert_mask);
184   }
185 
186   if(blendif & (1 << DEVELOP_BLENDIF_GREEN_in))
187   {
188     const unsigned int invert_mask = (blendif >> 16) & (1 << DEVELOP_BLENDIF_GREEN_in);
189     _blendif_rgb_green(pixels, mask, stride,
190                        parameters + DEVELOP_BLENDIF_PARAMETER_ITEMS * DEVELOP_BLENDIF_GREEN_in, invert_mask);
191   }
192 
193   if(blendif & (1 << DEVELOP_BLENDIF_BLUE_in))
194   {
195     const unsigned int invert_mask = (blendif >> 16) & (1 << DEVELOP_BLENDIF_BLUE_in);
196     _blendif_rgb_blue(pixels, mask, stride, parameters + DEVELOP_BLENDIF_PARAMETER_ITEMS * DEVELOP_BLENDIF_BLUE_in,
197                       invert_mask);
198   }
199 
200   if(blendif & ((1 << DEVELOP_BLENDIF_Jz_in) | (1 << DEVELOP_BLENDIF_Cz_in) | (1 << DEVELOP_BLENDIF_hz_in)))
201   {
202     const unsigned int invert_mask[3] DT_ALIGNED_PIXEL = {
203         (blendif >> 16) & (1 << DEVELOP_BLENDIF_Jz_in),
204         (blendif >> 16) & (1 << DEVELOP_BLENDIF_Cz_in),
205         (blendif >> 16) & (1 << DEVELOP_BLENDIF_hz_in),
206     };
207     _blendif_jzczhz(pixels, mask, stride, parameters + DEVELOP_BLENDIF_PARAMETER_ITEMS * DEVELOP_BLENDIF_Jz_in,
208                     invert_mask, profile);
209   }
210 }
211 
dt_develop_blendif_rgb_jzczhz_make_mask(struct dt_dev_pixelpipe_iop_t * piece,const float * const restrict a,const float * const restrict b,const struct dt_iop_roi_t * const roi_in,const struct dt_iop_roi_t * const roi_out,float * const restrict mask)212 void dt_develop_blendif_rgb_jzczhz_make_mask(struct dt_dev_pixelpipe_iop_t *piece,
213                                              const float *const restrict a,
214                                              const float *const restrict b,
215                                              const struct dt_iop_roi_t *const roi_in,
216                                              const struct dt_iop_roi_t *const roi_out,
217                                              float *const restrict mask)
218 {
219   const dt_develop_blend_params_t *const d = (const dt_develop_blend_params_t *const)piece->blendop_data;
220 
221   if(piece->colors != DT_BLENDIF_RGB_CH) return;
222 
223   const int xoffs = roi_out->x - roi_in->x;
224   const int yoffs = roi_out->y - roi_in->y;
225   const int iwidth = roi_in->width;
226   const int owidth = roi_out->width;
227   const int oheight = roi_out->height;
228 
229   const unsigned int any_channel_active = d->blendif & DEVELOP_BLENDIF_RGB_MASK;
230   const unsigned int mask_inclusive = d->mask_combine & DEVELOP_COMBINE_INCL;
231   const unsigned int mask_inversed = d->mask_combine & DEVELOP_COMBINE_INV;
232 
233   // invert the individual channels if the combine mode is inclusive
234   const unsigned int blendif = d->blendif ^ (mask_inclusive ? DEVELOP_BLENDIF_RGB_MASK << 16 : 0);
235 
236   // a channel cancels the mask if the whole span is selected and the channel is inverted
237   const unsigned int canceling_channel = (blendif >> 16) & ~blendif & DEVELOP_BLENDIF_RGB_MASK;
238 
239   const size_t buffsize = (size_t)owidth * oheight;
240 
241   // get the clipped opacity value  0 - 1
242   const float global_opacity = clamp_simd(d->opacity / 100.0f);
243 
244   if(!(d->mask_mode & DEVELOP_MASK_CONDITIONAL) || (!canceling_channel && !any_channel_active))
245   {
246     // mask is not conditional, invert the mask if required
247     if(mask_inversed)
248     {
249 #ifdef _OPENMP
250 #pragma omp parallel for simd default(none) dt_omp_firstprivate(mask, buffsize, global_opacity) schedule(static)
251 #endif
252       for(size_t x = 0; x < buffsize; x++) mask[x] = global_opacity * (1.0f - mask[x]);
253     }
254     else
255     {
256       dt_iop_image_mul_const(mask,global_opacity,owidth,oheight,1); // mask[k] *= global_opacity;
257     }
258   }
259   else if(canceling_channel || !any_channel_active)
260   {
261     // one of the conditional channel selects nothing
262     // this means that the conditional opacity of all pixels is the same
263     // and depends on whether the mask combination is inclusive and whether the mask is inverted
264     const float opac = ((mask_inversed == 0) ^ (mask_inclusive == 0)) ? global_opacity : 0.0f;
265     dt_iop_image_fill(mask,opac,owidth,oheight,1); // mask[k] = opac;
266   }
267   else
268   {
269     // we need to process all conditional channels
270 
271     // parameters, for every channel the 4 limits + pre-computed increasing slope and decreasing slope
272     float parameters[DEVELOP_BLENDIF_PARAMETER_ITEMS * DEVELOP_BLENDIF_SIZE] DT_ALIGNED_ARRAY;
273     dt_develop_blendif_process_parameters(parameters, d);
274 
275     dt_iop_order_iccprofile_info_t blend_profile;
276     if(!dt_develop_blendif_init_masking_profile(piece, &blend_profile, DEVELOP_BLEND_CS_RGB_SCENE))
277     {
278       return;
279     }
280     const dt_iop_order_iccprofile_info_t *profile = &blend_profile;
281 
282     // allocate space for a temporary mask buffer to split the computation of every channel
283     float *const restrict temp_mask = dt_alloc_align_float(buffsize);
284     if(!temp_mask)
285     {
286       return;
287     }
288 
289 #ifdef _OPENMP
290 #pragma omp parallel default(none) \
291   dt_omp_firstprivate(temp_mask, mask, a, b, oheight, owidth, iwidth, yoffs, xoffs, buffsize, \
292                       blendif, profile, parameters, mask_inclusive, mask_inversed, global_opacity)
293 #endif
294     {
295 #ifdef __SSE2__
296       // flush denormals to zero to avoid performance penalty if there are a lot of zero values in the mask
297       const int oldMode = _MM_GET_FLUSH_ZERO_MODE();
298       _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
299 #endif
300 
301       // initialize the parametric mask
302 #ifdef _OPENMP
303 #pragma omp for simd schedule(static) aligned(temp_mask:64)
304 #endif
305       for(size_t x = 0; x < buffsize; x++) temp_mask[x] = 1.0f;
306 
307       // combine channels
308 #ifdef _OPENMP
309 #pragma omp for schedule(static)
310 #endif
311       for(size_t y = 0; y < oheight; y++)
312       {
313         const size_t start = ((y + yoffs) * iwidth + xoffs) * DT_BLENDIF_RGB_CH;
314         _blendif_combine_channels(a + start, temp_mask + (y * owidth), owidth, blendif, parameters, profile);
315       }
316 #ifdef _OPENMP
317 #pragma omp for schedule(static)
318 #endif
319       for(size_t y = 0; y < oheight; y++)
320       {
321         const size_t start = (y * owidth) * DT_BLENDIF_RGB_CH;
322         _blendif_combine_channels(b + start, temp_mask + (y * owidth), owidth, blendif >> DEVELOP_BLENDIF_GRAY_out,
323                                   parameters + DEVELOP_BLENDIF_PARAMETER_ITEMS * DEVELOP_BLENDIF_GRAY_out,
324                                   profile);
325       }
326 
327       // apply global opacity
328       if(mask_inclusive)
329       {
330         if(mask_inversed)
331         {
332 #ifdef _OPENMP
333 #pragma omp for simd schedule(static) aligned(mask, temp_mask:64)
334 #endif
335           for(size_t x = 0; x < buffsize; x++) mask[x] = global_opacity * (1.0f - mask[x]) * temp_mask[x];
336         }
337         else
338         {
339 #ifdef _OPENMP
340 #pragma omp for simd schedule(static) aligned(mask, temp_mask:64)
341 #endif
342           for(size_t x = 0; x < buffsize; x++) mask[x] = global_opacity * (1.0f - (1.0f - mask[x]) * temp_mask[x]);
343         }
344       }
345       else
346       {
347         if(mask_inversed)
348         {
349 #ifdef _OPENMP
350 #pragma omp for simd schedule(static) aligned(mask, temp_mask:64)
351 #endif
352           for(size_t x = 0; x < buffsize; x++) mask[x] = global_opacity * (1.0f - mask[x] * temp_mask[x]);
353         }
354         else
355         {
356 #ifdef _OPENMP
357 #pragma omp for simd schedule(static) aligned(mask, temp_mask:64)
358 #endif
359           for(size_t x = 0; x < buffsize; x++) mask[x] = global_opacity * mask[x] * temp_mask[x];
360         }
361       }
362 
363 #ifdef __SSE2__
364       _MM_SET_FLUSH_ZERO_MODE(oldMode);
365 #endif
366     }
367 
368     dt_free_align(temp_mask);
369   }
370 }
371 
372 
373 /* normal blend without any clamping */
374 #ifdef _OPENMP
375 #pragma omp declare simd aligned(a, b, out:16) uniform(p, stride)
376 #endif
_blend_normal(const float * const restrict a,const float * const restrict b,const float p,float * const restrict out,const float * const restrict mask,const size_t stride)377 static void _blend_normal(const float *const restrict a, const float *const restrict b, const float p,
378                           float *const restrict out, const float *const restrict mask, const size_t stride)
379 {
380   for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
381   {
382     const float local_opacity = mask[i];
383     for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++)
384     {
385       out[j + k] = a[j + k] * (1.0f - local_opacity) + b[j + k] * local_opacity;
386     }
387     out[j + DT_BLENDIF_RGB_BCH] = local_opacity;
388   }
389 }
390 
391 /* multiply */
392 #ifdef _OPENMP
393 #pragma omp declare simd aligned(a, b, out:16) uniform(p, stride)
394 #endif
_blend_multiply(const float * const restrict a,const float * const restrict b,const float p,float * const restrict out,const float * const restrict mask,const size_t stride)395 static void _blend_multiply(const float *const restrict a, const float *const restrict b, const float p,
396                             float *const restrict out, const float *const restrict mask, const size_t stride)
397 {
398   for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
399   {
400     const float local_opacity = mask[i];
401     for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++)
402     {
403       out[j + k] = a[j + k] * (1.0f - local_opacity) + (a[j + k] * b[j + k] * p) * local_opacity;
404     }
405     out[j + DT_BLENDIF_RGB_BCH] = local_opacity;
406   }
407 }
408 
409 /* add */
410 #ifdef _OPENMP
411 #pragma omp declare simd aligned(a, b, out:16) uniform(p, stride)
412 #endif
_blend_add(const float * const restrict a,const float * const restrict b,const float p,float * const restrict out,const float * const restrict mask,const size_t stride)413 static void _blend_add(const float *const restrict a, const float *const restrict b, const float p,
414                        float *const restrict out, const float *const restrict mask, const size_t stride)
415 {
416   for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
417   {
418     const float local_opacity = mask[i];
419     for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++)
420     {
421       out[j + k] = a[j + k] * (1.0f - local_opacity) + (a[j + k] + p * b[j + k]) * local_opacity;
422     }
423     out[j + DT_BLENDIF_RGB_BCH] = local_opacity;
424   }
425 }
426 
427 /* subtract */
428 #ifdef _OPENMP
429 #pragma omp declare simd aligned(a, b, out:16) uniform(p, stride)
430 #endif
_blend_subtract(const float * const restrict a,const float * const restrict b,const float p,float * const restrict out,const float * const restrict mask,const size_t stride)431 static void _blend_subtract(const float *const restrict a, const float *const restrict b, const float p,
432                             float *const restrict out, const float *const restrict mask, const size_t stride)
433 {
434   for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
435   {
436     const float local_opacity = mask[i];
437     for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++)
438     {
439       out[j + k] = a[j + k] * (1.0f - local_opacity) + fmaxf(a[j + k] - p * b[j + k], 0.0f) * local_opacity;
440     }
441     out[j + 3] = local_opacity;
442   }
443 }
444 
445 /* subtract inverse */
446 #ifdef _OPENMP
447 #pragma omp declare simd aligned(a, b, out:16) uniform(p, stride)
448 #endif
_blend_subtract_inverse(const float * const restrict a,const float * const restrict b,const float p,float * const restrict out,const float * const restrict mask,const size_t stride)449 static void _blend_subtract_inverse(const float *const restrict a, const float *const restrict b, const float p,
450                                     float *const restrict out, const float *const restrict mask,
451                                     const size_t stride)
452 {
453   for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
454   {
455     const float local_opacity = mask[i];
456     for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++)
457     {
458       out[j + k] = a[j + k] * (1.0f - local_opacity) + fmaxf(b[j + k] - p * a[j + k], 0.0f) * local_opacity;
459     }
460     out[j + 3] = local_opacity;
461   }
462 }
463 
464 /* difference */
465 #ifdef _OPENMP
466 #pragma omp declare simd aligned(a, b, out:16) uniform(p, stride)
467 #endif
_blend_difference(const float * const restrict a,const float * const restrict b,const float p,float * const restrict out,const float * const restrict mask,const size_t stride)468 static void _blend_difference(const float *const restrict a, const float *const restrict b, const float p,
469                               float *const restrict out, const float *const restrict mask, const size_t stride)
470 {
471   for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
472   {
473     const float local_opacity = mask[i];
474     for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++)
475     {
476       out[j + k] = a[j + k] * (1.0f - local_opacity) + fabsf(a[j + k] - b[j + k]) * local_opacity;
477     }
478     out[j + DT_BLENDIF_RGB_BCH] = local_opacity;
479   }
480 }
481 
482 /* divide */
483 #ifdef _OPENMP
484 #pragma omp declare simd aligned(a, b, out:16) uniform(p, stride)
485 #endif
_blend_divide(const float * const restrict a,const float * const restrict b,const float p,float * const restrict out,const float * const restrict mask,const size_t stride)486 static void _blend_divide(const float *const restrict a, const float *const restrict b, const float p,
487                           float *const restrict out, const float *const restrict mask, const size_t stride)
488 {
489   for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
490   {
491     const float local_opacity = mask[i];
492     for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++)
493     {
494       out[j + k] = a[j + k] * (1.0f - local_opacity) + a[j + k] / fmaxf(p * b[j + k], 1e-6f) * local_opacity;
495     }
496     out[j + DT_BLENDIF_RGB_BCH] = local_opacity;
497   }
498 }
499 
500 /* divide inverse */
501 #ifdef _OPENMP
502 #pragma omp declare simd aligned(a, b, out:16) uniform(p, stride)
503 #endif
_blend_divide_inverse(const float * const restrict a,const float * const restrict b,const float p,float * const restrict out,const float * const restrict mask,const size_t stride)504 static void _blend_divide_inverse(const float *const restrict a, const float *const restrict b, const float p,
505                                   float *const restrict out, const float *const restrict mask, const size_t stride)
506 {
507   for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
508   {
509     const float local_opacity = mask[i];
510     for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++)
511     {
512       out[j + k] = a[j + k] * (1.0f - local_opacity) + b[j + k] / fmaxf(p * a[j + k], 1e-6f) * local_opacity;
513     }
514     out[j + DT_BLENDIF_RGB_BCH] = local_opacity;
515   }
516 }
517 
518 /* average */
519 #ifdef _OPENMP
520 #pragma omp declare simd aligned(a, b, out:16) uniform(p, stride)
521 #endif
_blend_average(const float * const restrict a,const float * const restrict b,const float p,float * const restrict out,const float * const restrict mask,const size_t stride)522 static void _blend_average(const float *const restrict a, const float *const restrict b, const float p,
523                            float *const restrict out, const float *const restrict mask, const size_t stride)
524 {
525   for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
526   {
527     const float local_opacity = mask[i];
528     for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++)
529     {
530       out[j + k] = a[j + k] * (1.0f - local_opacity) + (a[j + k] + b[j + k]) / 2.0f * local_opacity;
531     }
532     out[j + DT_BLENDIF_RGB_BCH] = local_opacity;
533   }
534 }
535 
536 /* geometric mean */
537 #ifdef _OPENMP
538 #pragma omp declare simd aligned(a, b, out:16) uniform(p, stride)
539 #endif
_blend_geometric_mean(const float * const restrict a,const float * const restrict b,const float p,float * const restrict out,const float * const restrict mask,const size_t stride)540 static void _blend_geometric_mean(const float *const restrict a, const float *const restrict b, const float p,
541                                   float *const restrict out, const float *const restrict mask, const size_t stride)
542 {
543   for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
544   {
545     const float local_opacity = mask[i];
546     for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++)
547     {
548       out[j + k] = a[j + k] * (1.0f - local_opacity) + sqrtf(fmax(a[j + k] * b[j + k], 0.0f)) * local_opacity;
549     }
550     out[j + DT_BLENDIF_RGB_BCH] = local_opacity;
551   }
552 }
553 
554 /* harmonic mean */
555 #ifdef _OPENMP
556 #pragma omp declare simd aligned(a, b, out:16) uniform(p, stride)
557 #endif
_blend_harmonic_mean(const float * const restrict a,const float * const restrict b,const float p,float * const restrict out,const float * const restrict mask,const size_t stride)558 static void _blend_harmonic_mean(const float *const restrict a, const float *const restrict b, const float p,
559                                  float *const restrict out, const float *const restrict mask, const size_t stride)
560 {
561   for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
562   {
563     const float local_opacity = mask[i];
564     for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++)
565     {
566       // consider that pixel values should be positive
567       out[j + k] = a[j + k] * (1.0f - local_opacity)
568           + 2.0f * a[j + k] * b[j + k] / (fmaxf(a[j + k], 5e-7f) + fmaxf(b[j + k], 5e-7f)) * local_opacity;
569     }
570     out[j + DT_BLENDIF_RGB_BCH] = local_opacity;
571   }
572 }
573 
574 /* chromaticity */
575 #ifdef _OPENMP
576 #pragma omp declare simd aligned(a, b, out:16) uniform(p, stride)
577 #endif
_blend_chromaticity(const float * const restrict a,const float * const restrict b,const float p,float * const restrict out,const float * const restrict mask,const size_t stride)578 static void _blend_chromaticity(const float *const restrict a, const float *const restrict b, const float p,
579                                 float *const restrict out, const float *const restrict mask, const size_t stride)
580 {
581   for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
582   {
583     const float local_opacity = mask[i];
584     const float norm_a = fmax(sqrtf(sqf(a[j]) + sqf(a[j + 1]) + sqf(a[j + 2])), 1e-6f);
585     const float norm_b = fmax(sqrtf(sqf(b[j]) + sqf(b[j + 1]) + sqf(b[j + 2])), 1e-6f);
586     for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++)
587     {
588       out[j + k] = a[j + k] * (1.0f - local_opacity) + b[j + k] * norm_a / norm_b * local_opacity;
589     }
590     out[j + DT_BLENDIF_RGB_BCH] = local_opacity;
591   }
592 }
593 
594 /* luminance */
595 #ifdef _OPENMP
596 #pragma omp declare simd aligned(a, b, out:16) uniform(p, stride)
597 #endif
_blend_luminance(const float * const restrict a,const float * const restrict b,const float p,float * const restrict out,const float * const restrict mask,const size_t stride)598 static void _blend_luminance(const float *const restrict a, const float *const restrict b, const float p,
599                              float *const restrict out, const float *const restrict mask, const size_t stride)
600 {
601   for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
602   {
603     const float local_opacity = mask[i];
604     const float norm_a = fmax(sqrtf(sqf(a[j]) + sqf(a[j + 1]) + sqf(a[j + 2])), 1e-6f);
605     const float norm_b = fmax(sqrtf(sqf(b[j]) + sqf(b[j + 1]) + sqf(b[j + 2])), 1e-6f);
606     for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++)
607     {
608       out[j + k] = a[j + k] * (1.0f - local_opacity) + a[j + k] * norm_b / norm_a * local_opacity;
609     }
610     out[j + DT_BLENDIF_RGB_BCH] = local_opacity;
611   }
612 }
613 
614 /* blend only R-channel in RGB color space without any clamping */
615 #ifdef _OPENMP
616 #pragma omp declare simd aligned(a, b, out:16) uniform(p, stride)
617 #endif
_blend_RGB_R(const float * const restrict a,const float * const restrict b,const float p,float * const restrict out,const float * const restrict mask,const size_t stride)618 static void _blend_RGB_R(const float *const restrict a, const float *const restrict b, const float p,
619                          float *const restrict out, const float *const restrict mask, const size_t stride)
620 {
621   for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
622   {
623     const float local_opacity = mask[i];
624     out[j + 0] = a[j + 0] * (1.0f - local_opacity) + p * b[j + 0] * local_opacity;
625     out[j + 1] = a[j + 1];
626     out[j + 2] = a[j + 2];
627     out[j + 3] = local_opacity;
628   }
629 }
630 
631 /* blend only R-channel in RGB color space without any clamping */
632 #ifdef _OPENMP
633 #pragma omp declare simd aligned(a, b, out:16) uniform(p, stride)
634 #endif
_blend_RGB_G(const float * const restrict a,const float * const restrict b,const float p,float * const restrict out,const float * const restrict mask,const size_t stride)635 static void _blend_RGB_G(const float *const restrict a, const float *const restrict b, const float p,
636                          float *const restrict out, const float *const restrict mask, const size_t stride)
637 {
638   for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
639   {
640     const float local_opacity = mask[i];
641     out[j + 0] = a[j + 0];
642     out[j + 1] = a[j + 1] * (1.0f - local_opacity) + p * b[j + 1] * local_opacity;
643     out[j + 2] = a[j + 2];
644     out[j + 3] = local_opacity;
645   }
646 }
647 
648 /* blend only R-channel in RGB color space without any clamping */
649 #ifdef _OPENMP
650 #pragma omp declare simd aligned(a, b, out:16) uniform(p, stride)
651 #endif
_blend_RGB_B(const float * const restrict a,const float * const restrict b,const float p,float * const restrict out,const float * const restrict mask,const size_t stride)652 static void _blend_RGB_B(const float *const restrict a, const float *const restrict b, const float p,
653                          float *const restrict out, const float *const restrict mask, const size_t stride)
654 {
655   for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
656   {
657     const float local_opacity = mask[i];
658     out[j + 0] = a[j + 0];
659     out[j + 1] = a[j + 1];
660     out[j + 2] = a[j + 2] * (1.0f - local_opacity) + p * b[j + 2] * local_opacity;
661     out[j + 3] = local_opacity;
662   }
663 }
664 
665 
_choose_blend_func(const unsigned int blend_mode)666 static _blend_row_func *_choose_blend_func(const unsigned int blend_mode)
667 {
668   _blend_row_func *blend = NULL;
669 
670   /* select the blend operator */
671   switch(blend_mode & DEVELOP_BLEND_MODE_MASK)
672   {
673     case DEVELOP_BLEND_MULTIPLY:
674       blend = _blend_multiply;
675       break;
676     case DEVELOP_BLEND_AVERAGE:
677       blend = _blend_average;
678       break;
679     case DEVELOP_BLEND_ADD:
680       blend = _blend_add;
681       break;
682     case DEVELOP_BLEND_SUBTRACT:
683       blend = _blend_subtract;
684       break;
685     case DEVELOP_BLEND_SUBTRACT_INVERSE:
686       blend = _blend_subtract_inverse;
687       break;
688     case DEVELOP_BLEND_DIFFERENCE:
689     case DEVELOP_BLEND_DIFFERENCE2:
690       blend = _blend_difference;
691       break;
692     case DEVELOP_BLEND_DIVIDE:
693       blend = _blend_divide;
694       break;
695     case DEVELOP_BLEND_DIVIDE_INVERSE:
696       blend = _blend_divide_inverse;
697       break;
698     case DEVELOP_BLEND_LIGHTNESS:
699       blend = _blend_luminance;
700       break;
701     case DEVELOP_BLEND_CHROMATICITY:
702       blend = _blend_chromaticity;
703       break;
704     case DEVELOP_BLEND_RGB_R:
705       blend = _blend_RGB_R;
706       break;
707     case DEVELOP_BLEND_RGB_G:
708       blend = _blend_RGB_G;
709       break;
710     case DEVELOP_BLEND_RGB_B:
711       blend = _blend_RGB_B;
712       break;
713     case DEVELOP_BLEND_GEOMETRIC_MEAN:
714       blend = _blend_geometric_mean;
715       break;
716     case DEVELOP_BLEND_HARMONIC_MEAN:
717       blend = _blend_harmonic_mean;
718       break;
719 
720     /* fallback to normal blend */
721     default:
722       blend = _blend_normal;
723       break;
724   }
725 
726   return blend;
727 }
728 
729 
730 #ifdef _OPENMP
731 #pragma omp declare simd aligned(rgb: 16) uniform(profile)
732 #endif
_rgb_luminance(const float * const restrict rgb,const dt_iop_order_iccprofile_info_t * const restrict profile)733 static inline float _rgb_luminance(const float *const restrict rgb,
734                                    const dt_iop_order_iccprofile_info_t *const restrict profile)
735 {
736   float value = 0.0f;
737   if(profile)
738     value = dt_ioppr_get_rgb_matrix_luminance(rgb, profile->matrix_in, profile->lut_in,
739                                               profile->unbounded_coeffs_in, profile->lutsize,
740                                               profile->nonlinearlut);
741   else
742     value = 0.3f * rgb[0] + 0.59f * rgb[1] + 0.11f * rgb[2];
743   return value;
744 }
745 
746 #ifdef _OPENMP
747 #pragma omp declare simd aligned(rgb, JzCzhz: 16) uniform(profile)
748 #endif
_rgb_to_JzCzhz(const float * const restrict rgb,float * const restrict JzCzhz,const dt_iop_order_iccprofile_info_t * const restrict profile)749 static inline void _rgb_to_JzCzhz(const float *const restrict rgb, float *const restrict JzCzhz,
750                                   const dt_iop_order_iccprofile_info_t *const restrict profile)
751 {
752   float JzAzBz[3] DT_ALIGNED_PIXEL = { 0.0f, 0.0f, 0.0f };
753 
754   if(profile)
755   {
756     float XYZ_D65[3] DT_ALIGNED_PIXEL = { 0.0f, 0.0f, 0.0f };
757     // use the matrix_out of the hacked profile for blending to use the
758     // conversion from RGB to XYZ D65 (instead of XYZ D50)
759     dt_ioppr_rgb_matrix_to_xyz(rgb, XYZ_D65, profile->matrix_out, profile->lut_in, profile->unbounded_coeffs_in,
760                                profile->lutsize, profile->nonlinearlut);
761     dt_XYZ_2_JzAzBz(XYZ_D65, JzAzBz);
762   }
763   else
764   {
765     // This should not happen (we don't know what RGB is), but use this when profile is not defined
766     dt_XYZ_2_JzAzBz(rgb, JzAzBz);
767   }
768 
769   dt_JzAzBz_2_JzCzhz(JzAzBz, JzCzhz);
770 }
771 
772 
773 #ifdef _OPENMP
774 #pragma omp declare simd aligned(a, b:16) uniform(channel, profile, stride)
775 #endif
_display_channel(const float * const restrict a,float * const restrict b,const float * const restrict mask,const size_t stride,const int channel,const float * const restrict boost_factors,const dt_iop_order_iccprofile_info_t * const profile)776 static void _display_channel(const float *const restrict a, float *const restrict b,
777                              const float *const restrict mask, const size_t stride, const int channel,
778                              const float *const restrict boost_factors,
779                              const dt_iop_order_iccprofile_info_t *const profile)
780 {
781   switch(channel)
782   {
783     case DT_DEV_PIXELPIPE_DISPLAY_R:
784     {
785       const float factor = 1.0f / exp2f(boost_factors[DEVELOP_BLENDIF_RED_in]);
786       for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
787       {
788         const float c = clamp_simd(a[j + 0] * factor);
789         for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++) b[j + k] = c;
790         b[j + DT_BLENDIF_RGB_BCH] = mask[i];
791       }
792       break;
793     }
794     case DT_DEV_PIXELPIPE_DISPLAY_R | DT_DEV_PIXELPIPE_DISPLAY_OUTPUT:
795     {
796       const float factor = 1.0f / exp2f(boost_factors[DEVELOP_BLENDIF_RED_out]);
797       for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
798       {
799         const float c = clamp_simd(b[j + 0] * factor);
800         for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++) b[j + k] = c;
801         b[j + DT_BLENDIF_RGB_BCH] = mask[i];
802       }
803       break;
804     }
805     case DT_DEV_PIXELPIPE_DISPLAY_G:
806     {
807       const float factor = 1.0f / exp2f(boost_factors[DEVELOP_BLENDIF_GREEN_in]);
808       for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
809       {
810         const float c = clamp_simd(a[j + 1] * factor);
811         for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++) b[j + k] = c;
812         b[j + DT_BLENDIF_RGB_BCH] = mask[i];
813       }
814       break;
815     }
816     case DT_DEV_PIXELPIPE_DISPLAY_G | DT_DEV_PIXELPIPE_DISPLAY_OUTPUT:
817     {
818       const float factor = 1.0f / exp2f(boost_factors[DEVELOP_BLENDIF_GREEN_out]);
819       for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
820       {
821         const float c = clamp_simd(b[j + 1] * factor);
822         for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++) b[j + k] = c;
823         b[j + DT_BLENDIF_RGB_BCH] = mask[i];
824       }
825       break;
826     }
827     case DT_DEV_PIXELPIPE_DISPLAY_B:
828     {
829       const float factor = 1.0f / exp2f(boost_factors[DEVELOP_BLENDIF_BLUE_in]);
830       for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
831       {
832         const float c = clamp_simd(a[j + 2] * factor);
833         for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++) b[j + k] = c;
834         b[j + DT_BLENDIF_RGB_BCH] = mask[i];
835       }
836       break;
837     }
838     case DT_DEV_PIXELPIPE_DISPLAY_B | DT_DEV_PIXELPIPE_DISPLAY_OUTPUT:
839     {
840       const float factor = 1.0f / exp2f(boost_factors[DEVELOP_BLENDIF_BLUE_out]);
841       for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
842       {
843         const float c = clamp_simd(b[j + 2] * factor);
844         for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++) b[j + k] = c;
845         b[j + DT_BLENDIF_RGB_BCH] = mask[i];
846       }
847       break;
848     }
849     case DT_DEV_PIXELPIPE_DISPLAY_GRAY:
850     {
851       const float factor = 1.0f / exp2f(boost_factors[DEVELOP_BLENDIF_GRAY_in]);
852       for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
853       {
854         const float c = clamp_simd(_rgb_luminance(a + j, profile) * factor);
855         for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++) b[j + k] = c;
856         b[j + DT_BLENDIF_RGB_BCH] = mask[i];
857       }
858       break;
859     }
860     case DT_DEV_PIXELPIPE_DISPLAY_GRAY | DT_DEV_PIXELPIPE_DISPLAY_OUTPUT:
861     {
862       const float factor = 1.0f / exp2f(boost_factors[DEVELOP_BLENDIF_GRAY_out]);
863       for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
864       {
865         const float c = clamp_simd(_rgb_luminance(b + j, profile) * factor);
866         for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++) b[j + k] = c;
867         b[j + DT_BLENDIF_RGB_BCH] = mask[i];
868       }
869       break;
870     }
871     case DT_DEV_PIXELPIPE_DISPLAY_JzCzhz_Jz:
872     {
873       const float factor = 1.0f / exp2f(boost_factors[DEVELOP_BLENDIF_Jz_in]);
874       for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
875       {
876         float JzCzhz[3] DT_ALIGNED_PIXEL;
877         _rgb_to_JzCzhz(a + j, JzCzhz, profile);
878         const float c = clamp_simd(JzCzhz[0] * factor);
879         for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++) b[j + k] = c;
880         b[j + DT_BLENDIF_RGB_BCH] = mask[i];
881       }
882       break;
883     }
884     case DT_DEV_PIXELPIPE_DISPLAY_JzCzhz_Jz | DT_DEV_PIXELPIPE_DISPLAY_OUTPUT:
885     {
886       const float factor = 1.0f / exp2f(boost_factors[DEVELOP_BLENDIF_Jz_out]);
887       for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
888       {
889         float JzCzhz[3] DT_ALIGNED_PIXEL;
890         _rgb_to_JzCzhz(b + j, JzCzhz, profile);
891         const float c = clamp_simd(JzCzhz[0] * factor);
892         for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++) b[j + k] = c;
893         b[j + DT_BLENDIF_RGB_BCH] = mask[i];
894       }
895       break;
896     }
897     case DT_DEV_PIXELPIPE_DISPLAY_JzCzhz_Cz:
898     {
899       const float factor = 1.0f / exp2f(boost_factors[DEVELOP_BLENDIF_Cz_in]);
900       for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
901       {
902         float JzCzhz[3] DT_ALIGNED_PIXEL;
903         _rgb_to_JzCzhz(a + j, JzCzhz, profile);
904         const float c = clamp_simd(JzCzhz[1] * factor);
905         for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++) b[j + k] = c;
906         b[j + DT_BLENDIF_RGB_BCH] = mask[i];
907       }
908       break;
909     }
910     case DT_DEV_PIXELPIPE_DISPLAY_JzCzhz_Cz | DT_DEV_PIXELPIPE_DISPLAY_OUTPUT:
911     {
912       const float factor = 1.0f / exp2f(boost_factors[DEVELOP_BLENDIF_Cz_out]);
913       for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
914       {
915         float JzCzhz[3] DT_ALIGNED_PIXEL;
916         _rgb_to_JzCzhz(b + j, JzCzhz, profile);
917         const float c = clamp_simd(JzCzhz[1] * factor);
918         for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++) b[j + k] = c;
919         b[j + DT_BLENDIF_RGB_BCH] = mask[i];
920       }
921       break;
922     }
923     case DT_DEV_PIXELPIPE_DISPLAY_JzCzhz_hz:
924       // no boost factor for hues
925       for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
926       {
927         float JzCzhz[3] DT_ALIGNED_PIXEL;
928         _rgb_to_JzCzhz(a + j, JzCzhz, profile);
929         const float c = clamp_simd(JzCzhz[2]);
930         for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++) b[j + k] = c;
931         b[j + DT_BLENDIF_RGB_BCH] = mask[i];
932       }
933       break;
934     case DT_DEV_PIXELPIPE_DISPLAY_JzCzhz_hz | DT_DEV_PIXELPIPE_DISPLAY_OUTPUT:
935       // no boost factor for hues
936       for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
937       {
938         float JzCzhz[3] DT_ALIGNED_PIXEL;
939         _rgb_to_JzCzhz(b + j, JzCzhz, profile);
940         const float c = clamp_simd(JzCzhz[2]);
941         for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++) b[j + k] = c;
942         b[j + DT_BLENDIF_RGB_BCH] = mask[i];
943       }
944       break;
945     default:
946       for(size_t i = 0, j = 0; i < stride; i++, j += DT_BLENDIF_RGB_CH)
947       {
948         for(int k = 0; k < DT_BLENDIF_RGB_BCH; k++) b[j + k] = 0.0f;
949         b[j + DT_BLENDIF_RGB_BCH] = mask[i];
950       }
951       break;
952   }
953 }
954 
955 
956 #ifdef _OPENMP
957 #pragma omp declare simd aligned(a, b:16) uniform(stride)
958 #endif
_copy_mask(const float * const restrict a,float * const restrict b,const size_t stride)959 static inline void _copy_mask(const float *const restrict a, float *const restrict b, const size_t stride)
960 {
961 #ifdef _OPENMP
962 #pragma omp simd aligned(a, b: 16)
963 #endif
964   for(size_t x = DT_BLENDIF_RGB_BCH; x < stride; x += DT_BLENDIF_RGB_CH) b[x] = a[x];
965 }
966 
dt_develop_blendif_rgb_jzczhz_blend(struct dt_dev_pixelpipe_iop_t * piece,const float * const restrict a,float * const restrict b,const struct dt_iop_roi_t * const roi_in,const struct dt_iop_roi_t * const roi_out,const float * const restrict mask,const dt_dev_pixelpipe_display_mask_t request_mask_display)967 void dt_develop_blendif_rgb_jzczhz_blend(struct dt_dev_pixelpipe_iop_t *piece, const float *const restrict a,
968                                          float *const restrict b, const struct dt_iop_roi_t *const roi_in,
969                                          const struct dt_iop_roi_t *const roi_out, const float *const restrict mask,
970                                          const dt_dev_pixelpipe_display_mask_t request_mask_display)
971 {
972   const dt_develop_blend_params_t *const d = (const dt_develop_blend_params_t *const)piece->blendop_data;
973 
974   if(piece->colors != DT_BLENDIF_RGB_CH) return;
975 
976   const int xoffs = roi_out->x - roi_in->x;
977   const int yoffs = roi_out->y - roi_in->y;
978   const int iwidth = roi_in->width;
979   const int owidth = roi_out->width;
980   const int oheight = roi_out->height;
981 
982   // only non-zero if mask_display was set by an _earlier_ module
983   const dt_dev_pixelpipe_display_mask_t mask_display = piece->pipe->mask_display;
984 
985   // process the blending operator
986   if(request_mask_display & DT_DEV_PIXELPIPE_DISPLAY_ANY)
987   {
988     dt_iop_order_iccprofile_info_t blend_profile;
989     const int use_profile = dt_develop_blendif_init_masking_profile(piece, &blend_profile,
990                                                                     DEVELOP_BLEND_CS_RGB_SCENE);
991     const dt_iop_order_iccprofile_info_t *profile = use_profile ? &blend_profile : NULL;
992     const float *const restrict boost_factors = d->blendif_boost_factors;
993     const dt_dev_pixelpipe_display_mask_t channel = request_mask_display & DT_DEV_PIXELPIPE_DISPLAY_ANY;
994 
995 #ifdef _OPENMP
996 #pragma omp parallel for schedule(static) default(none) \
997   dt_omp_firstprivate(a, b, mask, channel, oheight, owidth, iwidth, xoffs, yoffs, boost_factors, profile)
998 #endif
999     for(size_t y = 0; y < oheight; y++)
1000     {
1001       const size_t a_start = ((y + yoffs) * iwidth + xoffs) * DT_BLENDIF_RGB_CH;
1002       const size_t b_start = y * owidth * DT_BLENDIF_RGB_CH;
1003       const size_t m_start = y * owidth;
1004       _display_channel(a + a_start, b + b_start, mask + m_start, owidth, channel, boost_factors, profile);
1005     }
1006   }
1007   else
1008   {
1009     const float p = exp2f(d->blend_parameter);
1010     _blend_row_func *const blend = _choose_blend_func(d->blend_mode);
1011 
1012     float *tmp_buffer = dt_alloc_align_float(owidth * oheight * DT_BLENDIF_RGB_CH);
1013     if (tmp_buffer != NULL)
1014     {
1015       dt_iop_image_copy(tmp_buffer, b, owidth * oheight * DT_BLENDIF_RGB_CH);
1016       if((d->blend_mode & DEVELOP_BLEND_REVERSE) == DEVELOP_BLEND_REVERSE)
1017       {
1018 #ifdef _OPENMP
1019 #pragma omp parallel for schedule(static) default(none) \
1020   dt_omp_firstprivate(a, b, tmp_buffer, mask, blend, oheight, owidth, iwidth, xoffs, yoffs, p)
1021 #endif
1022         for(size_t y = 0; y < oheight; y++)
1023         {
1024           const size_t a_start = ((y + yoffs) * iwidth + xoffs) * DT_BLENDIF_RGB_CH;
1025           const size_t b_start = y * owidth * DT_BLENDIF_RGB_CH;
1026           const size_t m_start = y * owidth;
1027           blend(tmp_buffer + b_start, a + a_start, p, b + b_start, mask + m_start, owidth);
1028         }
1029       }
1030       else
1031       {
1032 #ifdef _OPENMP
1033 #pragma omp parallel for schedule(static) default(none) \
1034   dt_omp_firstprivate(a, b, tmp_buffer, mask, blend, oheight, owidth, iwidth, xoffs, yoffs, p)
1035 #endif
1036         for(size_t y = 0; y < oheight; y++)
1037         {
1038           const size_t a_start = ((y + yoffs) * iwidth + xoffs) * DT_BLENDIF_RGB_CH;
1039           const size_t b_start = y * owidth * DT_BLENDIF_RGB_CH;
1040           const size_t m_start = y * owidth;
1041           blend(a + a_start, tmp_buffer + b_start, p, b + b_start, mask + m_start, owidth);
1042         }
1043       }
1044       dt_free_align(tmp_buffer);
1045     }
1046   }
1047 
1048   if(mask_display & DT_DEV_PIXELPIPE_DISPLAY_MASK)
1049   {
1050     const size_t stride = owidth * DT_BLENDIF_RGB_CH;
1051 #ifdef _OPENMP
1052 #pragma omp parallel for schedule(static) default(none) \
1053   dt_omp_firstprivate(a, b, oheight, stride, iwidth, xoffs, yoffs)
1054 #endif
1055     for(size_t y = 0; y < oheight; y++)
1056     {
1057       const size_t a_start = ((y + yoffs) * iwidth + xoffs) * DT_BLENDIF_RGB_CH;
1058       const size_t b_start = y * stride;
1059       _copy_mask(a + a_start, b + b_start, stride);
1060     }
1061   }
1062 }
1063 
1064 // tools/update_modelines.sh
1065 // remove-trailing-space on;
1066 // modelines: These editor modelines have been set for all relevant files by tools/update_modelines.sh
1067 // vim: shiftwidth=2 expandtab tabstop=2 cindent
1068 // kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
1069