1/*
2    This file is part of darktable,
3    copyright (c) 2009--2014 Ulrich Pegelow
4
5    darktable is free software: you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation, either version 3 of the License, or
8    (at your option) any later version.
9
10    darktable is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14
15    You should have received a copy of the GNU General Public License
16    along with darktable.  If not, see <http://www.gnu.org/licenses/>.
17*/
18
19#include "common.h"
20#include "colorspace.h"
21#include "color_conversion.h"
22
23
24__kernel void
25graduatedndp (read_only image2d_t in, write_only image2d_t out, const int width, const int height, const float4 color,
26              const float density, const float length_base, const float length_inc_x, const float length_inc_y)
27{
28  const int x = get_global_id(0);
29  const int y = get_global_id(1);
30
31  if(x >= width || y >= height) return;
32
33  float4 pixel = read_imagef(in, sampleri, (int2)(x, y));
34
35  const float len = length_base + y*length_inc_y + x*length_inc_x;
36
37  const float t = 0.693147181f * (density * clamp(0.5f+len, 0.0f, 1.0f)/8.0f);
38  const float d1 = t * t * 0.5f;
39  const float d2 = d1 * t * 0.333333333f;
40  const float d3 = d2 * t * 0.25f;
41  float dens = 1.0f + t + d1 + d2 + d3;
42  dens *= dens;
43  dens *= dens;
44  dens *= dens;
45
46  pixel.xyz = fmax((float4)0.0f, pixel / (color + ((float4)1.0f - color) * (float4)dens)).xyz;
47
48  write_imagef (out, (int2)(x, y), pixel);
49}
50
51
52__kernel void
53graduatedndm (read_only image2d_t in, write_only image2d_t out, const int width, const int height, const float4 color,
54              const float density, const float length_base, const float length_inc_x, const float length_inc_y)
55{
56  const int x = get_global_id(0);
57  const int y = get_global_id(1);
58
59  if(x >= width || y >= height) return;
60
61  float4 pixel = read_imagef(in, sampleri, (int2)(x, y));
62
63  const float len = length_base + y*length_inc_y + x*length_inc_x;
64
65  const float t = 0.693147181f * (-density * clamp(0.5f-len, 0.0f, 1.0f)/8.0f);
66  const float d1 = t * t * 0.5f;
67  const float d2 = d1 * t * 0.333333333f;
68  const float d3 = d2 * t * 0.25f;
69  float dens = 1.0f + t + d1 + d2 + d3;
70  dens *= dens;
71  dens *= dens;
72  dens *= dens;
73
74  pixel.xyz = fmax((float4)0.0f, pixel * (color + ((float4)1.0f - color) * (float4)dens)).xyz;
75
76  write_imagef (out, (int2)(x, y), pixel);
77}
78
79__kernel void
80colorize (read_only image2d_t in, write_only image2d_t out, const int width, const int height,
81          const float mix, const float L, const float a, const float b)
82{
83  const int x = get_global_id(0);
84  const int y = get_global_id(1);
85
86  if(x >= width || y >= height) return;
87
88  float4 pixel = read_imagef(in, sampleri, (int2)(x, y));
89
90  pixel.x = pixel.x * mix + L - 50.0f * mix;
91  pixel.y = a;
92  pixel.z = b;
93
94  write_imagef (out, (int2)(x, y), pixel);
95}
96
97
98float
99GAUSS(float center, float wings, float x)
100{
101  const float b = -1.0f + center * 2.0f;
102  const float c = (wings / 10.0f) / 2.0f;
103  return exp(-(x-b)*(x-b)/(c*c));
104}
105
106
107__kernel void
108relight (read_only image2d_t in, write_only image2d_t out, const int width, const int height,
109         const float center, const float wings, const float ev)
110{
111  const int x = get_global_id(0);
112  const int y = get_global_id(1);
113
114  if(x >= width || y >= height) return;
115
116  float4 pixel = read_imagef(in, sampleri, (int2)(x, y));
117
118  const float lightness = pixel.x/100.0f;
119  const float value = -1.0f+(lightness*2.0f);
120  float gauss = GAUSS(center, wings, value);
121
122  if(isnan(gauss) || isinf(gauss))
123    gauss = 0.0f;
124
125  float relight = 1.0f / exp2(-ev * clamp(gauss, 0.0f, 1.0f));
126
127  if(isnan(relight) || isinf(relight))
128    relight = 1.0f;
129
130  pixel.x = 100.0f * clamp(lightness*relight, 0.0f, 1.0f);
131
132  write_imagef (out, (int2)(x, y), pixel);
133}
134
135
136typedef enum _channelmixer_operation_mode_t
137{
138  OPERATION_MODE_RGB = 0,
139  OPERATION_MODE_GRAY = 1,
140  OPERATION_MODE_HSL_V1 = 2,
141  OPERATION_MODE_HSL_V2 = 3,
142} _channelmixer_operation_mode_t;
143
144
145__kernel void
146channelmixer (read_only image2d_t in, write_only image2d_t out, const int width, const int height,
147              const int operation_mode, global const float *hsl_matrix,
148              global const float *rgb_matrix)
149{
150  const int x = get_global_id(0);
151  const int y = get_global_id(1);
152
153  if(x >= width || y >= height) return;
154
155  float4 pixel = read_imagef(in, sampleri, (int2)(x, y));
156  float4 opixel = (float4)(0.0f, 0.0f, 0.0f, pixel.w);
157  float gray, hmix, smix, lmix;
158
159  switch(operation_mode)
160  {
161    case OPERATION_MODE_RGB:
162      opixel.x = fmax(pixel.x * rgb_matrix[0] + pixel.y * rgb_matrix[1] + pixel.z * rgb_matrix[2], 0.0f);
163      opixel.y = fmax(pixel.x * rgb_matrix[3] + pixel.y * rgb_matrix[4] + pixel.z * rgb_matrix[5], 0.0f);
164      opixel.z = fmax(pixel.x * rgb_matrix[6] + pixel.y * rgb_matrix[7] + pixel.z * rgb_matrix[8], 0.0f);
165      break;
166
167    case OPERATION_MODE_GRAY:
168      gray = fmax(pixel.x * rgb_matrix[0] + pixel.y * rgb_matrix[1] + pixel.z * rgb_matrix[2], 0.0f);
169      opixel = (float4)(gray, gray, gray, pixel.w);
170      break;
171
172    case OPERATION_MODE_HSL_V1:
173      hmix = clamp(pixel.x * hsl_matrix[0], 0.0f, 1.0f) + pixel.y * hsl_matrix[1] + pixel.z * hsl_matrix[2];
174      smix = clamp(pixel.x * hsl_matrix[3], 0.0f, 1.0f) + pixel.y * hsl_matrix[4] + pixel.z * hsl_matrix[5];
175      lmix = clamp(pixel.x * hsl_matrix[6], 0.0f, 1.0f) + pixel.y * hsl_matrix[7] + pixel.z * hsl_matrix[8];
176
177      if( hmix != 0.0f || smix != 0.0f || lmix != 0.0f )
178      {
179        float4 hsl = RGB_2_HSL(pixel);
180        hsl.x = (hmix != 0.0f ) ? hmix : hsl.x;
181        hsl.y = (smix != 0.0f ) ? smix : hsl.y;
182        hsl.z = (lmix != 0.0f ) ? lmix : hsl.z;
183        pixel = HSL_2_RGB(hsl);
184      }
185
186      opixel.x = clamp(pixel.x * rgb_matrix[0] + pixel.y * rgb_matrix[1] + pixel.z * rgb_matrix[2], 0.0f, 1.0f);
187      opixel.y = clamp(pixel.x * rgb_matrix[3] + pixel.y * rgb_matrix[4] + pixel.z * rgb_matrix[5], 0.0f, 1.0f);
188      opixel.z = clamp(pixel.x * rgb_matrix[6] + pixel.y * rgb_matrix[7] + pixel.z * rgb_matrix[8], 0.0f, 1.0f);
189      break;
190
191    case OPERATION_MODE_HSL_V2:
192      hmix = clamp(pixel.x * hsl_matrix[0] + pixel.y * hsl_matrix[1] + pixel.z * hsl_matrix[2], 0.0f, 1.0f);
193      smix = clamp(pixel.x * hsl_matrix[3] + pixel.y * hsl_matrix[4] + pixel.z * hsl_matrix[5], 0.0f, 1.0f);
194      lmix = clamp(pixel.x * hsl_matrix[6] + pixel.y * hsl_matrix[7] + pixel.z * hsl_matrix[8], 0.0f, 1.0f);
195      if( hmix != 0.0f || smix != 0.0f || lmix != 0.0f )
196      {
197        pixel = (float4)(clamp(pixel.x, 0.0f, 1.0f), clamp(pixel.y, 0.0f, 1.0f), clamp(pixel.z, 0.0f, 1.0f), pixel.w);
198        float4 hsl = RGB_2_HSL(pixel);
199        hsl.x = (hmix != 0.0f ) ? hmix : hsl.x;
200        hsl.y = (smix != 0.0f ) ? smix : hsl.y;
201        hsl.z = (lmix != 0.0f ) ? lmix : hsl.z;
202        pixel = HSL_2_RGB(hsl);
203      }
204      opixel.x = fmax(pixel.x * rgb_matrix[0] + pixel.y * rgb_matrix[1] + pixel.z * rgb_matrix[2], 0.0f);
205      opixel.y = fmax(pixel.x * rgb_matrix[3] + pixel.y * rgb_matrix[4] + pixel.z * rgb_matrix[5], 0.0f);
206      opixel.z = fmax(pixel.x * rgb_matrix[6] + pixel.y * rgb_matrix[7] + pixel.z * rgb_matrix[8], 0.0f);
207      break;
208  }
209
210  write_imagef (out, (int2)(x, y), opixel);
211}
212
213
214__kernel void
215velvia (read_only image2d_t in, write_only image2d_t out, const int width, const int height,
216        const float strength, const float bias)
217{
218  const int x = get_global_id(0);
219  const int y = get_global_id(1);
220
221  if(x >= width || y >= height) return;
222
223  float4 pixel = read_imagef(in, sampleri, (int2)(x, y));
224
225  // calculate vibrance, and apply boost velvia saturation at least saturated pixels
226  float pmax = fmax(pixel.x, fmax(pixel.y, pixel.z));     // max value in RGB set
227  float pmin = fmin(pixel.x, fmin(pixel.y, pixel.z));     // min value in RGB set
228  float plum = (pmax + pmin) / 2.0f;                // pixel luminocity
229  float psat = (plum <= 0.5f) ? (pmax-pmin)/(1e-5f + pmax+pmin) : (pmax-pmin)/(1e-5f + fmax(0.0f, 2.0f-pmax-pmin));
230
231  float pweight = clamp(((1.0f- (1.5f*psat)) + ((1.0f+(fabs(plum-0.5f)*2.0f))*(1.0f-bias))) / (1.0f+(1.0f-bias)), 0.0f, 1.0f); // The weight of pixel
232  float saturation = strength*pweight;      // So lets calculate the final affection of filter on pixel
233
234  float4 opixel;
235
236  opixel.x = clamp(pixel.x + saturation*(pixel.x-0.5f*(pixel.y+pixel.z)), 0.0f, 1.0f);
237  opixel.y = clamp(pixel.y + saturation*(pixel.y-0.5f*(pixel.z+pixel.x)), 0.0f, 1.0f);
238  opixel.z = clamp(pixel.z + saturation*(pixel.z-0.5f*(pixel.x+pixel.y)), 0.0f, 1.0f);
239  opixel.w = pixel.w;
240
241  write_imagef (out, (int2)(x, y), opixel);
242}
243
244
245__kernel void
246colorcontrast (read_only image2d_t in, write_only image2d_t out, const int width, const int height,
247               const float4 scale, const float4 offset, const int unbound)
248{
249  const int x = get_global_id(0);
250  const int y = get_global_id(1);
251
252  if(x >= width || y >= height) return;
253
254  float4 pixel = read_imagef(in, sampleri, (int2)(x, y));
255
256  pixel.xyz = (pixel * scale + offset).xyz;
257  pixel.y = unbound ? pixel.y : clamp(pixel.y, -128.0f, 128.0f);
258  pixel.z = unbound ? pixel.z : clamp(pixel.z, -128.0f, 128.0f);
259
260  write_imagef (out, (int2)(x, y), pixel);
261}
262
263
264__kernel void
265vibrance (read_only image2d_t in, write_only image2d_t out, const int width, const int height, const float amount)
266{
267  const int x = get_global_id(0);
268  const int y = get_global_id(1);
269
270  if(x >= width || y >= height) return;
271
272  float4 pixel = read_imagef(in, sampleri, (int2)(x, y));
273
274  const float sw = sqrt(pixel.y*pixel.y + pixel.z*pixel.z)/256.0f;
275  const float ls = 1.0f - amount * sw * 0.25f;
276  const float ss = 1.0f + amount * sw;
277
278  pixel.x *= ls;
279  pixel.y *= ss;
280  pixel.z *= ss;
281
282  write_imagef (out, (int2)(x, y), pixel);
283}
284
285#define TEA_ROUNDS 8
286
287void
288encrypt_tea(unsigned int *arg)
289{
290  const unsigned int key[] = {0xa341316c, 0xc8013ea4, 0xad90777d, 0x7e95761e};
291  unsigned int v0 = arg[0], v1 = arg[1];
292  unsigned int sum = 0;
293  unsigned int delta = 0x9e3779b9;
294  for(int i = 0; i < TEA_ROUNDS; i++)
295  {
296    sum += delta;
297    v0 += ((v1 << 4) + key[0]) ^ (v1 + sum) ^ ((v1 >> 5) + key[1]);
298    v1 += ((v0 << 4) + key[2]) ^ (v0 + sum) ^ ((v0 >> 5) + key[3]);
299  }
300  arg[0] = v0;
301  arg[1] = v1;
302}
303
304float
305tpdf(unsigned int urandom)
306{
307  float frandom = (float)urandom / (float)0xFFFFFFFFu;
308
309  return (frandom < 0.5f ? (sqrt(2.0f*frandom) - 1.0f) : (1.0f - sqrt(2.0f*(1.0f - frandom))));
310}
311
312
313__kernel void
314vignette (read_only image2d_t in, write_only image2d_t out, const int width, const int height,
315          const float2 scale, const float2 roi_center_scaled, const float2 expt,
316          const float dscale, const float fscale, const float brightness, const float saturation,
317          const float dither, const int unbound)
318{
319  const int x = get_global_id(0);
320  const int y = get_global_id(1);
321
322  if(x >= width || y >= height) return;
323
324  unsigned int tea_state[2] = { mad24(y, width, x), 0 };
325  encrypt_tea(tea_state);
326
327  const float2 pv = fabs((float2)(x,y) * scale - roi_center_scaled);
328
329  const float cplen = pow(pow(pv.x, expt.x) + pow(pv.y, expt.x), expt.y);
330
331  float weight = 0.0f;
332  float dith = 0.0f;
333
334  if(cplen >= dscale)
335  {
336    weight = ((cplen - dscale) / fscale);
337
338    dith = (weight <= 1.0f && weight >= 0.0f) ? dither * tpdf(tea_state[0]) : 0.0f;
339
340    weight = weight >= 1.0f ? 1.0f : (weight <= 0.0f ? 0.0f : 0.5f - cos(M_PI_F * weight) / 2.0f);
341  }
342
343  float4 pixel = read_imagef(in, sampleri, (int2)(x, y));
344
345  if(weight > 0.0f)
346  {
347    float falloff = brightness < 0.0f ? 1.0f + (weight * brightness) : weight * brightness;
348
349    pixel.xyz = (brightness < 0.0f ? pixel * falloff + dith : pixel + falloff + dith).xyz;
350
351    pixel.xyz = unbound ? pixel.xyz : clamp(pixel, (float4)0.0f, (float4)1.0f).xyz;
352
353    float mv = (pixel.x + pixel.y + pixel.z) / 3.0f;
354    float wss = weight * saturation;
355
356    pixel.xyz = (pixel - (mv - pixel)* wss).xyz,
357
358    pixel.xyz = unbound ? pixel.xyz : clamp(pixel, (float4)0.0f, (float4)1.0f).xyz;
359  }
360
361  write_imagef (out, (int2)(x, y), pixel);
362}
363
364
365/* kernel for the splittoning plugin. */
366kernel void
367splittoning (read_only image2d_t in, write_only image2d_t out, const int width, const int height,
368            const float compress, const float balance, const float shadow_hue, const float shadow_saturation,
369            const float highlight_hue, const float highlight_saturation)
370{
371  const int x = get_global_id(0);
372  const int y = get_global_id(1);
373
374  if(x >= width || y >= height) return;
375
376  float4 pixel = read_imagef(in, sampleri, (int2)(x, y));
377
378  float4 hsl = RGB_2_HSL(pixel);
379
380  if(hsl.z < balance - compress || hsl.z > balance + compress)
381  {
382    hsl.x = hsl.z < balance ? shadow_hue : highlight_hue;
383    hsl.y = hsl.z < balance ? shadow_saturation : highlight_saturation;
384    float ra = hsl.z < balance ? clamp(2.0f*fabs(-balance + compress + hsl.z), 0.0f, 1.0f) :
385               clamp(2.0f*fabs(-balance - compress + hsl.z), 0.0f, 1.0f);
386
387    float4 mixrgb = HSL_2_RGB(hsl);
388
389    pixel.xyz = clamp(pixel * (1.0f - ra) + mixrgb * ra, (float4)0.0f, (float4)1.0f).xyz;
390  }
391
392  write_imagef (out, (int2)(x, y), pixel);
393}
394
395/* kernels to get the maximum value of an image */
396kernel void
397pixelmax_first (read_only image2d_t in, const int width, const int height, global float *accu, local float *buffer)
398{
399  const int x = get_global_id(0);
400  const int y = get_global_id(1);
401  const int xlsz = get_local_size(0);
402  const int ylsz = get_local_size(1);
403  const int xlid = get_local_id(0);
404  const int ylid = get_local_id(1);
405
406  const int l = ylid * xlsz + xlid;
407
408  buffer[l] = (x < width && y < height) ? read_imagef(in, sampleri, (int2)(x, y)).x : -INFINITY;
409
410  barrier(CLK_LOCAL_MEM_FENCE);
411
412  const int lsz = mul24(xlsz, ylsz);
413
414  for(int offset = lsz / 2; offset > 0; offset = offset / 2)
415  {
416    if (l < offset)
417    {
418      float other = buffer[l + offset];
419      float mine =  buffer[l];
420      buffer[l] = (mine > other) ? mine : other;
421    }
422    barrier(CLK_LOCAL_MEM_FENCE);
423  }
424
425  const int xgid = get_group_id(0);
426  const int ygid = get_group_id(1);
427  const int xgsz = get_num_groups(0);
428
429  const int m = mad24(ygid, xgsz, xgid);
430  accu[m] = buffer[0];
431}
432
433
434
435__kernel void
436pixelmax_second(global float* input, global float *result, const int length, local float *buffer)
437{
438  int x = get_global_id(0);
439  float accu = -INFINITY;
440
441  while (x < length)
442  {
443    float element = input[x];
444    accu = (accu > element) ? accu : element;
445    x += get_global_size(0);
446  }
447
448  int lid = get_local_id(0);
449  buffer[lid] = accu;
450
451  barrier(CLK_LOCAL_MEM_FENCE);
452
453  for(int offset = get_local_size(0) / 2; offset > 0; offset = offset / 2)
454  {
455    if (lid < offset)
456    {
457      float other = buffer[lid + offset];
458      float mine = buffer[lid];
459      buffer[lid] = (mine > other) ? mine : other;
460    }
461    barrier(CLK_LOCAL_MEM_FENCE);
462  }
463
464  if (lid == 0)
465  {
466    result[get_group_id(0)] = buffer[0];
467  }
468}
469
470
471/* kernel for the global tonemap plugin: reinhard */
472kernel void
473global_tonemap_reinhard (read_only image2d_t in, write_only image2d_t out, const int width, const int height,
474            const float4 parameters)
475{
476  const int x = get_global_id(0);
477  const int y = get_global_id(1);
478
479  if(x >= width || y >= height) return;
480
481  float4 pixel = read_imagef(in, sampleri, (int2)(x, y));
482
483  float l = pixel.x * 0.01f;
484
485  pixel.x = 100.0f * (l/(1.0f + l));
486
487  write_imagef (out, (int2)(x, y), pixel);
488}
489
490
491
492/* kernel for the global tonemap plugin: drago */
493kernel void
494global_tonemap_drago (read_only image2d_t in, write_only image2d_t out, const int width, const int height,
495            const float4 parameters)
496{
497  const int x = get_global_id(0);
498  const int y = get_global_id(1);
499
500  if(x >= width || y >= height) return;
501
502  const float eps = parameters.x;
503  const float ldc = parameters.y;
504  const float bl = parameters.z;
505  const float lwmax = parameters.w;
506
507  float4 pixel = read_imagef(in, sampleri, (int2)(x, y));
508
509  float lw = pixel.x * 0.01f;
510
511  pixel.x = 100.0f * (ldc * log(fmax(eps, lw + 1.0f)) / log(fmax(eps, 2.0f + (pow(lw/lwmax,bl)) * 8.0f)));
512
513  write_imagef (out, (int2)(x, y), pixel);
514}
515
516
517/* kernel for the global tonemap plugin: filmic */
518kernel void
519global_tonemap_filmic (read_only image2d_t in, write_only image2d_t out, const int width, const int height,
520            const float4 parameters)
521{
522  const int x = get_global_id(0);
523  const int y = get_global_id(1);
524
525  if(x >= width || y >= height) return;
526
527  float4 pixel = read_imagef(in, sampleri, (int2)(x, y));
528
529  float l = pixel.x * 0.01f;
530  float m = fmax(0.0f, l - 0.004f);
531
532  pixel.x = 100.0f * ((m*(6.2f*m+0.5f))/(m*(6.2f*m+1.7f)+0.06f));
533
534  write_imagef (out, (int2)(x, y), pixel);
535}
536
537
538/* kernels for the colormapping module */
539#define HISTN (1<<11)
540#define MAXN 5
541
542// inverse distant weighting according to D. Shepard's method; with power parameter 2.0
543void
544get_clusters(const float4 col, const int n, global float2 *mean, float *weight)
545{
546  float mdist = FLT_MAX;
547  for(int k=0; k<n; k++)
548  {
549    const float dist2 = (col.y-mean[k].x)*(col.y-mean[k].x) + (col.z-mean[k].y)*(col.z-mean[k].y);  // dist^2
550    weight[k] = dist2 > 1.0e-6f ? 1.0f/dist2 : -1.0f;                                                // direct hits marked as -1
551    if(dist2 < mdist) mdist = dist2;
552  }
553  if(mdist < 1.0e-6f) for(int k=0; k<n; k++) weight[k] = weight[k] < 0.0f ? 1.0f : 0.0f;             // correction in case of direct hits
554  float sum = 0.0f;
555  for(int k=0; k<n; k++) sum += weight[k];
556  if(sum > 0.0f) for(int k=0; k<n; k++) weight[k] /= sum;
557}
558
559kernel void
560colormapping_histogram (read_only image2d_t in, write_only image2d_t out, const int width, const int height,
561            const float equalization, global int *target_hist, global float *source_ihist)
562{
563  const int x = get_global_id(0);
564  const int y = get_global_id(1);
565
566  if(x >= width || y >= height) return;
567
568  float L = read_imagef(in, sampleri, (int2)(x, y)).x;
569
570  float dL = 0.5f*((L * (1.0f - equalization) + source_ihist[target_hist[(int)clamp(HISTN*L/100.0f, 0.0f, (float)HISTN-1.0f)]] * equalization) - L) + 50.0f;
571  dL = clamp(dL, 0.0f, 100.0f);
572
573  write_imagef (out, (int2)(x, y), (float4)(dL, 0.0f, 0.0f, 0.0f));
574}
575
576kernel void
577colormapping_mapping (read_only image2d_t in, read_only image2d_t tmp, write_only image2d_t out, const int width, const int height,
578            const int clusters, global float2 *target_mean, global float2 *source_mean, global float2 *var_ratio, global int *mapio)
579{
580  const int x = get_global_id(0);
581  const int y = get_global_id(1);
582
583  if(x >= width || y >= height) return;
584
585  float4 ipixel = read_imagef(in, sampleri, (int2)(x, y));
586  float dL = read_imagef(tmp, sampleri, (int2)(x, y)).x;
587  float weight[MAXN];
588  float4 opixel = (float4)0.0f;
589
590  opixel.x = 2.0f*(dL - 50.0f) + ipixel.x;
591  opixel.x = clamp(opixel.x, 0.0f, 100.0f);
592
593  get_clusters(ipixel, clusters, target_mean, weight);
594
595  for(int c=0; c < clusters; c++)
596  {
597    opixel.y += weight[c] * ((ipixel.y - target_mean[c].x)*var_ratio[c].x + source_mean[mapio[c]].x);
598    opixel.z += weight[c] * ((ipixel.z - target_mean[c].y)*var_ratio[c].y + source_mean[mapio[c]].y);
599  }
600  opixel.w = ipixel.w;
601
602  write_imagef (out, (int2)(x, y), opixel);
603}
604
605#undef HISTN
606#undef MAXN
607
608
609/* kernel for the colorbalance module */
610kernel void
611colorbalance (read_only image2d_t in, write_only image2d_t out, const int width, const int height,
612              const float4 lift, const float4 gain, const float4 gamma_inv, const float saturation, const float contrast, const float grey)
613{
614  const int x = get_global_id(0);
615  const int y = get_global_id(1);
616
617  if(x >= width || y >= height) return;
618
619  float4 Lab = read_imagef(in, sampleri, (int2)(x, y));
620  float4 sRGB = XYZ_to_sRGB(Lab_to_XYZ(Lab));
621
622  // Lift gamma gain
623  sRGB = (sRGB <= (float4)0.0031308f) ? 12.92f * sRGB : (1.0f + 0.055f) * native_powr(sRGB, (float4)1.0f/2.4f) - (float4)0.055f;
624  sRGB = native_powr(fmax(((sRGB - (float4)1.0f) * lift + (float4)1.0f) * gain, (float4)0.0f), gamma_inv);
625  sRGB = (sRGB <= (float4)0.04045f) ? sRGB / 12.92f : native_powr((sRGB + (float4)0.055f) / (1.0f + 0.055f), (float4)2.4f);
626  Lab.xyz = XYZ_to_Lab(sRGB_to_XYZ(sRGB)).xyz;
627
628  write_imagef (out, (int2)(x, y), Lab);
629}
630
631kernel void
632colorbalance_lgg (read_only image2d_t in, write_only image2d_t out, const int width, const int height,
633              const float4 lift, const float4 gain, const float4 gamma_inv, const float saturation, const float contrast, const float grey, const float saturation_out)
634{
635  const int x = get_global_id(0);
636  const int y = get_global_id(1);
637
638  if(x >= width || y >= height) return;
639
640  float4 Lab = read_imagef(in, sampleri, (int2)(x, y));
641  const float4 XYZ = Lab_to_XYZ(Lab);
642  float4 RGB = XYZ_to_prophotorgb(XYZ);
643
644  // saturation input
645  if (saturation != 1.0f)
646  {
647    const float4 luma = XYZ.y;
648    const float4 saturation4 = saturation;
649    RGB = luma + saturation4 * (RGB - luma);
650  }
651
652  // Lift gamma gain
653  RGB = (RGB <= (float4)0.0f) ? (float4)0.0f : native_powr(RGB, (float4)1.0f/2.2f);
654  RGB = ((RGB - (float4)1.0f) * lift + (float4)1.0f) * gain;
655  RGB = (RGB <= (float4)0.0f) ? (float4)0.0f : native_powr(RGB, gamma_inv * (float4)2.2f);
656
657  // saturation output
658  if (saturation_out != 1.0f)
659  {
660    const float4 luma = prophotorgb_to_XYZ(RGB).y;
661    const float4 saturation_out4 = saturation_out;
662    RGB = luma + saturation_out4 * (RGB - luma);
663  }
664
665  // fulcrum contrast
666  if (contrast != 1.0f)
667  {
668    const float4 contrast4 = contrast;
669    const float4 grey4 = grey;
670    RGB = (RGB <= (float4)0.0f) ? (float4)0.0f : pow(RGB / grey4, contrast4) * grey4;
671  }
672
673  Lab.xyz = prophotorgb_to_Lab(RGB).xyz;
674
675  write_imagef (out, (int2)(x, y), Lab);
676}
677
678kernel void
679colorbalance_cdl (read_only image2d_t in, write_only image2d_t out, const int width, const int height,
680              const float4 lift, const float4 gain, const float4 gamma_inv, const float saturation, const float contrast, const float grey, const float saturation_out)
681{
682  const int x = get_global_id(0);
683  const int y = get_global_id(1);
684
685  if(x >= width || y >= height) return;
686
687  float4 Lab = read_imagef(in, sampleri, (int2)(x, y));
688  const float4 XYZ = Lab_to_XYZ(Lab);
689  float4 RGB = XYZ_to_prophotorgb(XYZ);
690
691  // saturation input
692  if (saturation != 1.0f)
693  {
694    const float4 luma = XYZ.y;
695    const float4 saturation4 = saturation;
696    RGB = luma + saturation4 * (RGB - luma);
697  }
698
699  // lift power slope
700  RGB = RGB * gain + lift;
701  RGB = (RGB <= (float4)0.0f) ? (float4)0.0f : native_powr(RGB, gamma_inv);
702
703  // saturation output
704  if (saturation_out != 1.0f)
705  {
706    const float4 luma = prophotorgb_to_XYZ(RGB).y;
707    const float4 saturation_out4 = saturation_out;
708    RGB = luma + saturation_out4 * (RGB - luma);
709  }
710
711  // fulcrum contrast
712  if (contrast != 1.0f)
713  {
714    const float4 contrast4 = contrast;
715    const float4 grey4 = grey;
716    RGB = (RGB <= (float4)0.0f) ? (float4)0.0f : native_powr(RGB / grey4, contrast4) * grey4;
717  }
718
719  Lab.xyz = prophotorgb_to_Lab(RGB).xyz;
720
721  write_imagef (out, (int2)(x, y), Lab);
722}
723
724
725static inline float sqf(const float x)
726{
727  return x * x;
728}
729
730
731static inline float4 opacity_masks(const float x,
732                                   const float shadows_weight, const float highlights_weight,
733                                   const float midtones_weight, const float mask_grey_fulcrum)
734{
735  float4 output;
736  const float x_offset = (x - mask_grey_fulcrum);
737  const float x_offset_norm = x_offset / mask_grey_fulcrum;
738  const float alpha = 1.f / (1.f + native_exp(x_offset_norm * shadows_weight));    // opacity of shadows
739  const float beta = 1.f / (1.f + native_exp(-x_offset_norm * highlights_weight)); // opacity of highlights
740  const float gamma = native_exp(-sqf(x_offset) * midtones_weight / 4.f) * sqf(1.f - alpha) * sqf(1.f - beta) * 8.f; // opacity of midtones
741
742  output.x = alpha;
743  output.y = gamma;
744  output.z = beta;
745  output.w = 0.f;
746
747  return output;
748}
749
750
751#define LUT_ELEM 360 // gamut LUT number of elements: resolution of 1°
752
753static inline float lookup_gamut(read_only image2d_t gamut_lut, const float x)
754{
755  // WARNING : x should be between [-pi ; pi ], which is the default output of atan2 anyway
756
757  // convert in LUT coordinate
758  const float x_test = (LUT_ELEM - 1) * (x + M_PI_F) / (2.f * M_PI_F);
759
760  // find the 2 closest integer coordinates (next/previous)
761  float x_prev = floor(x_test);
762  float x_next = ceil(x_test);
763
764  // get the 2 closest LUT elements at integer coordinates
765  // cycle on the hue ring if out of bounds
766  int xi = (int)x_prev;
767  if(xi < 0) xi = LUT_ELEM - 1;
768  else if(xi > LUT_ELEM - 1) xi = 0;
769
770  int xii = (int)x_next;
771  if(xii < 0) xii = LUT_ELEM - 1;
772  else if(xii > LUT_ELEM - 1) xii = 0;
773
774  // fetch the corresponding y values
775  const float y_prev = read_imagef(gamut_lut, sampleri, (int2)(xi, 0)).x;
776  const float y_next = read_imagef(gamut_lut, sampleri, (int2)(xii, 0)).x;
777
778  // assume that we are exactly on an integer LUT element
779  float out = y_prev;
780
781  if(x_next != x_prev)
782    // we are between 2 LUT elements : do linear interpolation
783    // actually, we only add the slope term on the previous one
784    out += (x_test - x_prev) * (y_next - y_prev) / (x_next - x_prev);
785
786  return out;
787}
788
789
790static inline float soft_clip(const float x, const float soft_threshold, const float hard_threshold)
791{
792  // use an exponential soft clipping above soft_threshold
793  // hard threshold must be > soft threshold
794  const float norm = hard_threshold - soft_threshold;
795  return (x > soft_threshold) ? soft_threshold + (1.f - native_exp(-(x - soft_threshold) / norm)) * norm : x;
796}
797
798
799kernel void
800colorbalancergb (read_only image2d_t in, write_only image2d_t out,
801                 const int width, const int height,
802                 constant const dt_colorspaces_iccprofile_info_cl_t *const profile_info,
803                 constant const float *const matrix_in, constant const float *const matrix_out,
804                 read_only image2d_t gamut_lut,
805                 const float shadows_weight, const float highlights_weight, const float midtones_weight, const float mask_grey_fulcrum,
806                 const float hue_angle, const float chroma_global, const float4 chroma, const float vibrance,
807                 const float4 global_offset, const float4 shadows, const float4 highlights, const float4 midtones,
808                 const float white_fulcrum, const float midtones_Y,
809                 const float grey_fulcrum, const float contrast,
810                 const float brilliance_global, const float4 brilliance,
811                 const float saturation_global, const float4 saturation,
812                 const int mask_display, const int mask_type, const int checker_1, const int checker_2,
813                 const float4 checker_color_1, const float4 checker_color_2)
814{
815  const int x = get_global_id(0);
816  const int y = get_global_id(1);
817  if(x >= width || y >= height) return;
818  const float4 pix_in = read_imagef(in, sampleri, (int2)(x, y));
819
820  float4 XYZ_D65 = 0.f;
821  float4 LMS = 0.f;
822  float4 RGB = 0.f;
823  float4 Yrg = 0.f;
824  float4 Ych = 0.f;
825
826  // clip pipeline RGB
827  RGB = fmax(pix_in, 0.f);
828
829  // go to CIE 2006 LMS D65
830  LMS = matrix_product_float4(RGB, matrix_in);
831
832  // go to Filmlight Yrg
833  Yrg = LMS_to_Yrg(LMS);
834
835  // go to Ych
836  Ych = Yrg_to_Ych(Yrg);
837
838  // Sanitize input : no negative luminance
839  Ych.x = fmax(Ych.x, 0.f);
840  const float4 opacities = opacity_masks(native_powr(Ych.x, 0.4101205819200422f), // center middle grey in 50 %
841                                         shadows_weight, highlights_weight, midtones_weight, mask_grey_fulcrum);
842  const float4 opacities_comp = (float4)1.f - opacities;
843
844  // Hue shift - do it now because we need the gamut limit at output hue right after
845  Ych.z += hue_angle;
846
847  // Ensure hue ± correction is in [-PI; PI]
848  if(Ych.z > M_PI_F) Ych.z -= 2.f * M_PI_F;
849  else if(Ych.z < -M_PI_F) Ych.z += 2.f * M_PI_F;
850
851  // Linear chroma : distance to achromatic at constant luminance in scene-referred
852  const float chroma_boost = chroma_global + dot(opacities, chroma);
853  const float vib = vibrance * (1.0f - native_powr(Ych.y, fabs(vibrance)));
854  const float chroma_factor = fmax(1.f + chroma_boost + vib, 0.f);
855  Ych.y *= chroma_factor;
856
857  // Do a test conversion to Yrg
858  Yrg = Ych_to_Yrg(Ych);
859
860  // Gamut-clip in Yrg at constant hue and luminance
861  // e.g. find the max chroma value that fits in gamut at the current hue
862  const float D65[4] = { 0.21962576f, 0.54487092f, 0.23550333f, 0.f };
863  float max_c = Ych.y;
864  const float cos_h = native_cos(Ych.z);
865  const float sin_h = native_sin(Ych.z);
866
867  if(Yrg.y < 0.f)
868  {
869    max_c = fmin(-D65[0] / cos_h, max_c);
870  }
871  if(Yrg.z < 0.f)
872  {
873    max_c = fmin(-D65[1] / sin_h, max_c);
874  }
875  if(Yrg.y + Yrg.z > 1.f)
876  {
877    max_c = fmin((1.f - D65[0] - D65[1]) / (cos_h + sin_h), max_c);
878  }
879
880  // Overwrite chroma with the sanitized value and go to Yrg for real
881  Ych.y = max_c;
882  Yrg = Ych_to_Yrg(Ych);
883
884  // Go to LMS
885  LMS = Yrg_to_LMS(Yrg);
886
887  // Go to Filmlight RGB
888  RGB = LMS_to_gradingRGB(LMS);
889
890  // Color balance
891
892  // global : offset
893  RGB += global_offset;
894
895  // highlights, shadows : 2 slopes with masking
896  RGB *= opacities_comp.z * (opacities_comp.x + opacities.x * shadows) + opacities.z * highlights;
897  // factorization of : (RGB[c] * (1.f - alpha) + RGB[c] * d->shadows[c] * alpha) * (1.f - beta)  + RGB[c] * d->highlights[c] * beta;
898
899  // midtones : power with sign preservation
900  RGB = sign(RGB) * native_powr(fabs(RGB) / white_fulcrum, midtones) * white_fulcrum;
901
902  // for the non-linear ops we need to go in Yrg again because RGB doesn't preserve color
903  LMS = gradingRGB_to_LMS(RGB);
904  Yrg = LMS_to_Yrg(LMS);
905
906  // Y midtones power (gamma)
907  Yrg.x = native_powr(fmax(Yrg.x / white_fulcrum, 0.f), midtones_Y) * white_fulcrum;
908
909  // Y fulcrumed contrast
910  Yrg.x = grey_fulcrum * native_powr(Yrg.x / grey_fulcrum, contrast);
911
912  LMS = Yrg_to_LMS(Yrg);
913  XYZ_D65 = LMS_to_XYZ(LMS);
914
915  // Perceptual color adjustments
916
917  // Go to JzAzBz for perceptual saturation
918  float4 Jab = XYZ_to_JzAzBz(XYZ_D65);
919
920  // Convert to JCh
921  float JC[2] = { Jab.x, hypot(Jab.y, Jab.z) };               // brightness/chroma vector
922  const float h = atan2(Jab.z, Jab.y);  // hue : (a, b) angle
923
924  // Project JC onto S, the saturation eigenvector, with orthogonal vector O.
925  // Note : O should be = (C * cosf(T) - J * sinf(T)) = 0 since S is the eigenvector,
926  // so we add the chroma projected along the orthogonal axis to get some control value
927  const float T = atan2(JC[1], JC[0]); // angle of the eigenvector over the hue plane
928  const float sin_T = native_sin(T);
929  const float cos_T = native_cos(T);
930  const float M_rot_dir[2][2] = { {  cos_T,  sin_T },
931                                  { -sin_T,  cos_T } };
932  const float M_rot_inv[2][2] = { {  cos_T, -sin_T },
933                                  {  sin_T,  cos_T } };
934  float SO[2];
935
936  // brilliance & Saturation : mix of chroma and luminance
937  const float boosts[2] = { 1.f + brilliance_global + dot(opacities, brilliance),     // move in S direction
938                            saturation_global + dot(opacities, saturation) }; // move in O direction
939
940  SO[0] = JC[0] * M_rot_dir[0][0] + JC[1] * M_rot_dir[0][1];
941  SO[1] = SO[0] * clamp(T * boosts[1], -T, M_PI_F / 2.f - T);
942  SO[0] = fmax(SO[0] * boosts[0], 0.f);
943
944  // Project back to JCh, that is rotate back of -T angle
945  JC[0] = fmax(SO[0] * M_rot_inv[0][0] + SO[1] * M_rot_inv[0][1], 0.f);
946  JC[1] = fmax(SO[0] * M_rot_inv[1][0] + SO[1] * M_rot_inv[1][1], 0.f);
947
948  // Gamut mapping
949  const float out_max_sat_h = lookup_gamut(gamut_lut, h);
950  float sat = (JC[0] > 0.f) ? JC[1] / JC[0] : 0.f;
951  sat = soft_clip(sat, 0.8f * out_max_sat_h, out_max_sat_h);
952  const float max_C_at_sat = JC[0] * sat;
953  const float max_J_at_sat = (sat > 0.f) ? JC[1] / sat : 0.f;
954  JC[0] = (JC[0] + max_J_at_sat) / 2.f;
955  JC[1] = (JC[1] + max_C_at_sat) / 2.f;
956
957  // Gamut-clip in Jch at constant hue and lightness,
958  // e.g. find the max chroma available at current hue that doesn't
959  // yield negative L'M'S' values, which will need to be clipped during conversion
960  const float cos_H = native_cos(h);
961  const float sin_H = native_sin(h);
962
963  const float d0 = 1.6295499532821566e-11f;
964  const float d = -0.56f;
965  float Iz = JC[0] + d0;
966  Iz /= (1.f + d - d * Iz);
967  Iz = fmax(Iz, 0.f);
968
969  const float4 AI[3] = { {  1.0f,  0.1386050432715393f,  0.0580473161561189f, 0.0f },
970                         {  1.0f, -0.1386050432715393f, -0.0580473161561189f, 0.0f },
971                         {  1.0f, -0.0960192420263190f, -0.8118918960560390f, 0.0f } };
972
973  // Do a test conversion to L'M'S'
974  const float4 IzAzBz = { Iz, JC[1] * cos_H, JC[1] * sin_H, 0.f };
975  LMS.x = dot(AI[0], IzAzBz);
976  LMS.y = dot(AI[1], IzAzBz);
977  LMS.z = dot(AI[2], IzAzBz);
978
979  // Clip chroma
980  float max_C = JC[1];
981  if(LMS.x < 0.f)
982    max_C = fmin(-Iz / (AI[0].y * cos_H + AI[0].z * sin_H), max_C);
983
984  if(LMS.y < 0.f)
985    max_C = fmin(-Iz / (AI[1].y * cos_H + AI[1].z * sin_H), max_C);
986
987  if(LMS.z < 0.f)
988    max_C = fmin(-Iz / (AI[2].y * cos_H + AI[2].z * sin_H), max_C);
989
990  // Project back to JzAzBz for real
991  Jab.x = JC[0];
992  Jab.y = max_C * cos_H;
993  Jab.z = max_C * sin_H;
994
995  XYZ_D65 = JzAzBz_2_XYZ(Jab);
996
997  // Project back to D50 pipeline RGB
998  RGB = matrix_product_float4(XYZ_D65, matrix_out);
999
1000  if(mask_display)
1001  {
1002    // draw checkerboard
1003    float4 color;
1004    if(x % checker_1 < x % checker_2)
1005    {
1006      if(y % checker_1 < y % checker_2) color = checker_color_2;
1007      else color = checker_color_1;
1008    }
1009    else
1010    {
1011      if(y % checker_1 < y % checker_2) color = checker_color_1;
1012      else color = checker_color_2;
1013    }
1014    const float *op = (const float *)&opacities;
1015    float opacity = op[mask_type];
1016    const float opacity_comp = 1.0f - opacity;
1017
1018    RGB = opacity_comp * color + opacity * fmax(RGB, 0.f);
1019    RGB.w = 1.0f; // alpha is opaque, we need to preview it
1020  }
1021  else
1022  {
1023    RGB = fmax(RGB, 0.f);
1024    RGB.w = pix_in.w; // alpha copy
1025  }
1026
1027  write_imagef (out, (int2)(x, y), RGB);
1028}
1029
1030
1031/* helpers and kernel for the colorchecker module */
1032float fastlog2(float x)
1033{
1034  union { float f; unsigned int i; } vx = { x };
1035  union { unsigned int i; float f; } mx = { (vx.i & 0x007FFFFF) | 0x3f000000 };
1036
1037  float y = vx.i;
1038
1039  y *= 1.1920928955078125e-7f;
1040
1041  return y - 124.22551499f
1042    - 1.498030302f * mx.f
1043    - 1.72587999f / (0.3520887068f + mx.f);
1044}
1045
1046float fastlog(float x)
1047{
1048  return 0.69314718f * fastlog2(x);
1049}
1050
1051float thinplate(const float4 x, const float4 y)
1052{
1053  const float r2 =
1054      (x.x - y.x) * (x.x - y.x) +
1055      (x.y - y.y) * (x.y - y.y) +
1056      (x.z - y.z) * (x.z - y.z);
1057
1058  return r2 * fastlog(max(1e-8f, r2));
1059}
1060
1061kernel void
1062colorchecker (read_only image2d_t in, write_only image2d_t out, const int width, const int height,
1063              const int num_patches, global float4 *params)
1064{
1065  const int x = get_global_id(0);
1066  const int y = get_global_id(1);
1067
1068  if(x >= width || y >= height) return;
1069
1070  global float4 *source_Lab = params;
1071  global float4 *coeff_Lab = params + num_patches;
1072  global float4 *poly_Lab = params + 2 * num_patches;
1073
1074  float4 ipixel = read_imagef(in, sampleri, (int2)(x, y));
1075
1076  const float w = ipixel.w;
1077
1078  float4 opixel = poly_Lab[0] + poly_Lab[1] * ipixel.x + poly_Lab[2] * ipixel.y + poly_Lab[3] * ipixel.z;
1079
1080  for(int k = 0; k < num_patches; k++)
1081  {
1082    const float phi = thinplate(ipixel, source_Lab[k]);
1083    opixel += coeff_Lab[k] * phi;
1084  }
1085
1086  opixel.w = w;
1087
1088  write_imagef (out, (int2)(x, y), opixel);
1089}
1090