1/* 2 This file is part of darktable, 3 copyright (c) 2009--2014 Ulrich Pegelow 4 5 darktable is free software: you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation, either version 3 of the License, or 8 (at your option) any later version. 9 10 darktable is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with darktable. If not, see <http://www.gnu.org/licenses/>. 17*/ 18 19#include "common.h" 20#include "colorspace.h" 21#include "color_conversion.h" 22 23 24__kernel void 25graduatedndp (read_only image2d_t in, write_only image2d_t out, const int width, const int height, const float4 color, 26 const float density, const float length_base, const float length_inc_x, const float length_inc_y) 27{ 28 const int x = get_global_id(0); 29 const int y = get_global_id(1); 30 31 if(x >= width || y >= height) return; 32 33 float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 34 35 const float len = length_base + y*length_inc_y + x*length_inc_x; 36 37 const float t = 0.693147181f * (density * clamp(0.5f+len, 0.0f, 1.0f)/8.0f); 38 const float d1 = t * t * 0.5f; 39 const float d2 = d1 * t * 0.333333333f; 40 const float d3 = d2 * t * 0.25f; 41 float dens = 1.0f + t + d1 + d2 + d3; 42 dens *= dens; 43 dens *= dens; 44 dens *= dens; 45 46 pixel.xyz = fmax((float4)0.0f, pixel / (color + ((float4)1.0f - color) * (float4)dens)).xyz; 47 48 write_imagef (out, (int2)(x, y), pixel); 49} 50 51 52__kernel void 53graduatedndm (read_only image2d_t in, write_only image2d_t out, const int width, const int height, const float4 color, 54 const float density, const float length_base, const float length_inc_x, const float length_inc_y) 55{ 56 const int x = get_global_id(0); 57 const int y = get_global_id(1); 58 59 if(x >= width || y >= height) return; 60 61 float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 62 63 const float len = length_base + y*length_inc_y + x*length_inc_x; 64 65 const float t = 0.693147181f * (-density * clamp(0.5f-len, 0.0f, 1.0f)/8.0f); 66 const float d1 = t * t * 0.5f; 67 const float d2 = d1 * t * 0.333333333f; 68 const float d3 = d2 * t * 0.25f; 69 float dens = 1.0f + t + d1 + d2 + d3; 70 dens *= dens; 71 dens *= dens; 72 dens *= dens; 73 74 pixel.xyz = fmax((float4)0.0f, pixel * (color + ((float4)1.0f - color) * (float4)dens)).xyz; 75 76 write_imagef (out, (int2)(x, y), pixel); 77} 78 79__kernel void 80colorize (read_only image2d_t in, write_only image2d_t out, const int width, const int height, 81 const float mix, const float L, const float a, const float b) 82{ 83 const int x = get_global_id(0); 84 const int y = get_global_id(1); 85 86 if(x >= width || y >= height) return; 87 88 float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 89 90 pixel.x = pixel.x * mix + L - 50.0f * mix; 91 pixel.y = a; 92 pixel.z = b; 93 94 write_imagef (out, (int2)(x, y), pixel); 95} 96 97 98float 99GAUSS(float center, float wings, float x) 100{ 101 const float b = -1.0f + center * 2.0f; 102 const float c = (wings / 10.0f) / 2.0f; 103 return exp(-(x-b)*(x-b)/(c*c)); 104} 105 106 107__kernel void 108relight (read_only image2d_t in, write_only image2d_t out, const int width, const int height, 109 const float center, const float wings, const float ev) 110{ 111 const int x = get_global_id(0); 112 const int y = get_global_id(1); 113 114 if(x >= width || y >= height) return; 115 116 float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 117 118 const float lightness = pixel.x/100.0f; 119 const float value = -1.0f+(lightness*2.0f); 120 float gauss = GAUSS(center, wings, value); 121 122 if(isnan(gauss) || isinf(gauss)) 123 gauss = 0.0f; 124 125 float relight = 1.0f / exp2(-ev * clamp(gauss, 0.0f, 1.0f)); 126 127 if(isnan(relight) || isinf(relight)) 128 relight = 1.0f; 129 130 pixel.x = 100.0f * clamp(lightness*relight, 0.0f, 1.0f); 131 132 write_imagef (out, (int2)(x, y), pixel); 133} 134 135 136typedef enum _channelmixer_operation_mode_t 137{ 138 OPERATION_MODE_RGB = 0, 139 OPERATION_MODE_GRAY = 1, 140 OPERATION_MODE_HSL_V1 = 2, 141 OPERATION_MODE_HSL_V2 = 3, 142} _channelmixer_operation_mode_t; 143 144 145__kernel void 146channelmixer (read_only image2d_t in, write_only image2d_t out, const int width, const int height, 147 const int operation_mode, global const float *hsl_matrix, 148 global const float *rgb_matrix) 149{ 150 const int x = get_global_id(0); 151 const int y = get_global_id(1); 152 153 if(x >= width || y >= height) return; 154 155 float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 156 float4 opixel = (float4)(0.0f, 0.0f, 0.0f, pixel.w); 157 float gray, hmix, smix, lmix; 158 159 switch(operation_mode) 160 { 161 case OPERATION_MODE_RGB: 162 opixel.x = fmax(pixel.x * rgb_matrix[0] + pixel.y * rgb_matrix[1] + pixel.z * rgb_matrix[2], 0.0f); 163 opixel.y = fmax(pixel.x * rgb_matrix[3] + pixel.y * rgb_matrix[4] + pixel.z * rgb_matrix[5], 0.0f); 164 opixel.z = fmax(pixel.x * rgb_matrix[6] + pixel.y * rgb_matrix[7] + pixel.z * rgb_matrix[8], 0.0f); 165 break; 166 167 case OPERATION_MODE_GRAY: 168 gray = fmax(pixel.x * rgb_matrix[0] + pixel.y * rgb_matrix[1] + pixel.z * rgb_matrix[2], 0.0f); 169 opixel = (float4)(gray, gray, gray, pixel.w); 170 break; 171 172 case OPERATION_MODE_HSL_V1: 173 hmix = clamp(pixel.x * hsl_matrix[0], 0.0f, 1.0f) + pixel.y * hsl_matrix[1] + pixel.z * hsl_matrix[2]; 174 smix = clamp(pixel.x * hsl_matrix[3], 0.0f, 1.0f) + pixel.y * hsl_matrix[4] + pixel.z * hsl_matrix[5]; 175 lmix = clamp(pixel.x * hsl_matrix[6], 0.0f, 1.0f) + pixel.y * hsl_matrix[7] + pixel.z * hsl_matrix[8]; 176 177 if( hmix != 0.0f || smix != 0.0f || lmix != 0.0f ) 178 { 179 float4 hsl = RGB_2_HSL(pixel); 180 hsl.x = (hmix != 0.0f ) ? hmix : hsl.x; 181 hsl.y = (smix != 0.0f ) ? smix : hsl.y; 182 hsl.z = (lmix != 0.0f ) ? lmix : hsl.z; 183 pixel = HSL_2_RGB(hsl); 184 } 185 186 opixel.x = clamp(pixel.x * rgb_matrix[0] + pixel.y * rgb_matrix[1] + pixel.z * rgb_matrix[2], 0.0f, 1.0f); 187 opixel.y = clamp(pixel.x * rgb_matrix[3] + pixel.y * rgb_matrix[4] + pixel.z * rgb_matrix[5], 0.0f, 1.0f); 188 opixel.z = clamp(pixel.x * rgb_matrix[6] + pixel.y * rgb_matrix[7] + pixel.z * rgb_matrix[8], 0.0f, 1.0f); 189 break; 190 191 case OPERATION_MODE_HSL_V2: 192 hmix = clamp(pixel.x * hsl_matrix[0] + pixel.y * hsl_matrix[1] + pixel.z * hsl_matrix[2], 0.0f, 1.0f); 193 smix = clamp(pixel.x * hsl_matrix[3] + pixel.y * hsl_matrix[4] + pixel.z * hsl_matrix[5], 0.0f, 1.0f); 194 lmix = clamp(pixel.x * hsl_matrix[6] + pixel.y * hsl_matrix[7] + pixel.z * hsl_matrix[8], 0.0f, 1.0f); 195 if( hmix != 0.0f || smix != 0.0f || lmix != 0.0f ) 196 { 197 pixel = (float4)(clamp(pixel.x, 0.0f, 1.0f), clamp(pixel.y, 0.0f, 1.0f), clamp(pixel.z, 0.0f, 1.0f), pixel.w); 198 float4 hsl = RGB_2_HSL(pixel); 199 hsl.x = (hmix != 0.0f ) ? hmix : hsl.x; 200 hsl.y = (smix != 0.0f ) ? smix : hsl.y; 201 hsl.z = (lmix != 0.0f ) ? lmix : hsl.z; 202 pixel = HSL_2_RGB(hsl); 203 } 204 opixel.x = fmax(pixel.x * rgb_matrix[0] + pixel.y * rgb_matrix[1] + pixel.z * rgb_matrix[2], 0.0f); 205 opixel.y = fmax(pixel.x * rgb_matrix[3] + pixel.y * rgb_matrix[4] + pixel.z * rgb_matrix[5], 0.0f); 206 opixel.z = fmax(pixel.x * rgb_matrix[6] + pixel.y * rgb_matrix[7] + pixel.z * rgb_matrix[8], 0.0f); 207 break; 208 } 209 210 write_imagef (out, (int2)(x, y), opixel); 211} 212 213 214__kernel void 215velvia (read_only image2d_t in, write_only image2d_t out, const int width, const int height, 216 const float strength, const float bias) 217{ 218 const int x = get_global_id(0); 219 const int y = get_global_id(1); 220 221 if(x >= width || y >= height) return; 222 223 float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 224 225 // calculate vibrance, and apply boost velvia saturation at least saturated pixels 226 float pmax = fmax(pixel.x, fmax(pixel.y, pixel.z)); // max value in RGB set 227 float pmin = fmin(pixel.x, fmin(pixel.y, pixel.z)); // min value in RGB set 228 float plum = (pmax + pmin) / 2.0f; // pixel luminocity 229 float psat = (plum <= 0.5f) ? (pmax-pmin)/(1e-5f + pmax+pmin) : (pmax-pmin)/(1e-5f + fmax(0.0f, 2.0f-pmax-pmin)); 230 231 float pweight = clamp(((1.0f- (1.5f*psat)) + ((1.0f+(fabs(plum-0.5f)*2.0f))*(1.0f-bias))) / (1.0f+(1.0f-bias)), 0.0f, 1.0f); // The weight of pixel 232 float saturation = strength*pweight; // So lets calculate the final affection of filter on pixel 233 234 float4 opixel; 235 236 opixel.x = clamp(pixel.x + saturation*(pixel.x-0.5f*(pixel.y+pixel.z)), 0.0f, 1.0f); 237 opixel.y = clamp(pixel.y + saturation*(pixel.y-0.5f*(pixel.z+pixel.x)), 0.0f, 1.0f); 238 opixel.z = clamp(pixel.z + saturation*(pixel.z-0.5f*(pixel.x+pixel.y)), 0.0f, 1.0f); 239 opixel.w = pixel.w; 240 241 write_imagef (out, (int2)(x, y), opixel); 242} 243 244 245__kernel void 246colorcontrast (read_only image2d_t in, write_only image2d_t out, const int width, const int height, 247 const float4 scale, const float4 offset, const int unbound) 248{ 249 const int x = get_global_id(0); 250 const int y = get_global_id(1); 251 252 if(x >= width || y >= height) return; 253 254 float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 255 256 pixel.xyz = (pixel * scale + offset).xyz; 257 pixel.y = unbound ? pixel.y : clamp(pixel.y, -128.0f, 128.0f); 258 pixel.z = unbound ? pixel.z : clamp(pixel.z, -128.0f, 128.0f); 259 260 write_imagef (out, (int2)(x, y), pixel); 261} 262 263 264__kernel void 265vibrance (read_only image2d_t in, write_only image2d_t out, const int width, const int height, const float amount) 266{ 267 const int x = get_global_id(0); 268 const int y = get_global_id(1); 269 270 if(x >= width || y >= height) return; 271 272 float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 273 274 const float sw = sqrt(pixel.y*pixel.y + pixel.z*pixel.z)/256.0f; 275 const float ls = 1.0f - amount * sw * 0.25f; 276 const float ss = 1.0f + amount * sw; 277 278 pixel.x *= ls; 279 pixel.y *= ss; 280 pixel.z *= ss; 281 282 write_imagef (out, (int2)(x, y), pixel); 283} 284 285#define TEA_ROUNDS 8 286 287void 288encrypt_tea(unsigned int *arg) 289{ 290 const unsigned int key[] = {0xa341316c, 0xc8013ea4, 0xad90777d, 0x7e95761e}; 291 unsigned int v0 = arg[0], v1 = arg[1]; 292 unsigned int sum = 0; 293 unsigned int delta = 0x9e3779b9; 294 for(int i = 0; i < TEA_ROUNDS; i++) 295 { 296 sum += delta; 297 v0 += ((v1 << 4) + key[0]) ^ (v1 + sum) ^ ((v1 >> 5) + key[1]); 298 v1 += ((v0 << 4) + key[2]) ^ (v0 + sum) ^ ((v0 >> 5) + key[3]); 299 } 300 arg[0] = v0; 301 arg[1] = v1; 302} 303 304float 305tpdf(unsigned int urandom) 306{ 307 float frandom = (float)urandom / (float)0xFFFFFFFFu; 308 309 return (frandom < 0.5f ? (sqrt(2.0f*frandom) - 1.0f) : (1.0f - sqrt(2.0f*(1.0f - frandom)))); 310} 311 312 313__kernel void 314vignette (read_only image2d_t in, write_only image2d_t out, const int width, const int height, 315 const float2 scale, const float2 roi_center_scaled, const float2 expt, 316 const float dscale, const float fscale, const float brightness, const float saturation, 317 const float dither, const int unbound) 318{ 319 const int x = get_global_id(0); 320 const int y = get_global_id(1); 321 322 if(x >= width || y >= height) return; 323 324 unsigned int tea_state[2] = { mad24(y, width, x), 0 }; 325 encrypt_tea(tea_state); 326 327 const float2 pv = fabs((float2)(x,y) * scale - roi_center_scaled); 328 329 const float cplen = pow(pow(pv.x, expt.x) + pow(pv.y, expt.x), expt.y); 330 331 float weight = 0.0f; 332 float dith = 0.0f; 333 334 if(cplen >= dscale) 335 { 336 weight = ((cplen - dscale) / fscale); 337 338 dith = (weight <= 1.0f && weight >= 0.0f) ? dither * tpdf(tea_state[0]) : 0.0f; 339 340 weight = weight >= 1.0f ? 1.0f : (weight <= 0.0f ? 0.0f : 0.5f - cos(M_PI_F * weight) / 2.0f); 341 } 342 343 float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 344 345 if(weight > 0.0f) 346 { 347 float falloff = brightness < 0.0f ? 1.0f + (weight * brightness) : weight * brightness; 348 349 pixel.xyz = (brightness < 0.0f ? pixel * falloff + dith : pixel + falloff + dith).xyz; 350 351 pixel.xyz = unbound ? pixel.xyz : clamp(pixel, (float4)0.0f, (float4)1.0f).xyz; 352 353 float mv = (pixel.x + pixel.y + pixel.z) / 3.0f; 354 float wss = weight * saturation; 355 356 pixel.xyz = (pixel - (mv - pixel)* wss).xyz, 357 358 pixel.xyz = unbound ? pixel.xyz : clamp(pixel, (float4)0.0f, (float4)1.0f).xyz; 359 } 360 361 write_imagef (out, (int2)(x, y), pixel); 362} 363 364 365/* kernel for the splittoning plugin. */ 366kernel void 367splittoning (read_only image2d_t in, write_only image2d_t out, const int width, const int height, 368 const float compress, const float balance, const float shadow_hue, const float shadow_saturation, 369 const float highlight_hue, const float highlight_saturation) 370{ 371 const int x = get_global_id(0); 372 const int y = get_global_id(1); 373 374 if(x >= width || y >= height) return; 375 376 float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 377 378 float4 hsl = RGB_2_HSL(pixel); 379 380 if(hsl.z < balance - compress || hsl.z > balance + compress) 381 { 382 hsl.x = hsl.z < balance ? shadow_hue : highlight_hue; 383 hsl.y = hsl.z < balance ? shadow_saturation : highlight_saturation; 384 float ra = hsl.z < balance ? clamp(2.0f*fabs(-balance + compress + hsl.z), 0.0f, 1.0f) : 385 clamp(2.0f*fabs(-balance - compress + hsl.z), 0.0f, 1.0f); 386 387 float4 mixrgb = HSL_2_RGB(hsl); 388 389 pixel.xyz = clamp(pixel * (1.0f - ra) + mixrgb * ra, (float4)0.0f, (float4)1.0f).xyz; 390 } 391 392 write_imagef (out, (int2)(x, y), pixel); 393} 394 395/* kernels to get the maximum value of an image */ 396kernel void 397pixelmax_first (read_only image2d_t in, const int width, const int height, global float *accu, local float *buffer) 398{ 399 const int x = get_global_id(0); 400 const int y = get_global_id(1); 401 const int xlsz = get_local_size(0); 402 const int ylsz = get_local_size(1); 403 const int xlid = get_local_id(0); 404 const int ylid = get_local_id(1); 405 406 const int l = ylid * xlsz + xlid; 407 408 buffer[l] = (x < width && y < height) ? read_imagef(in, sampleri, (int2)(x, y)).x : -INFINITY; 409 410 barrier(CLK_LOCAL_MEM_FENCE); 411 412 const int lsz = mul24(xlsz, ylsz); 413 414 for(int offset = lsz / 2; offset > 0; offset = offset / 2) 415 { 416 if (l < offset) 417 { 418 float other = buffer[l + offset]; 419 float mine = buffer[l]; 420 buffer[l] = (mine > other) ? mine : other; 421 } 422 barrier(CLK_LOCAL_MEM_FENCE); 423 } 424 425 const int xgid = get_group_id(0); 426 const int ygid = get_group_id(1); 427 const int xgsz = get_num_groups(0); 428 429 const int m = mad24(ygid, xgsz, xgid); 430 accu[m] = buffer[0]; 431} 432 433 434 435__kernel void 436pixelmax_second(global float* input, global float *result, const int length, local float *buffer) 437{ 438 int x = get_global_id(0); 439 float accu = -INFINITY; 440 441 while (x < length) 442 { 443 float element = input[x]; 444 accu = (accu > element) ? accu : element; 445 x += get_global_size(0); 446 } 447 448 int lid = get_local_id(0); 449 buffer[lid] = accu; 450 451 barrier(CLK_LOCAL_MEM_FENCE); 452 453 for(int offset = get_local_size(0) / 2; offset > 0; offset = offset / 2) 454 { 455 if (lid < offset) 456 { 457 float other = buffer[lid + offset]; 458 float mine = buffer[lid]; 459 buffer[lid] = (mine > other) ? mine : other; 460 } 461 barrier(CLK_LOCAL_MEM_FENCE); 462 } 463 464 if (lid == 0) 465 { 466 result[get_group_id(0)] = buffer[0]; 467 } 468} 469 470 471/* kernel for the global tonemap plugin: reinhard */ 472kernel void 473global_tonemap_reinhard (read_only image2d_t in, write_only image2d_t out, const int width, const int height, 474 const float4 parameters) 475{ 476 const int x = get_global_id(0); 477 const int y = get_global_id(1); 478 479 if(x >= width || y >= height) return; 480 481 float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 482 483 float l = pixel.x * 0.01f; 484 485 pixel.x = 100.0f * (l/(1.0f + l)); 486 487 write_imagef (out, (int2)(x, y), pixel); 488} 489 490 491 492/* kernel for the global tonemap plugin: drago */ 493kernel void 494global_tonemap_drago (read_only image2d_t in, write_only image2d_t out, const int width, const int height, 495 const float4 parameters) 496{ 497 const int x = get_global_id(0); 498 const int y = get_global_id(1); 499 500 if(x >= width || y >= height) return; 501 502 const float eps = parameters.x; 503 const float ldc = parameters.y; 504 const float bl = parameters.z; 505 const float lwmax = parameters.w; 506 507 float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 508 509 float lw = pixel.x * 0.01f; 510 511 pixel.x = 100.0f * (ldc * log(fmax(eps, lw + 1.0f)) / log(fmax(eps, 2.0f + (pow(lw/lwmax,bl)) * 8.0f))); 512 513 write_imagef (out, (int2)(x, y), pixel); 514} 515 516 517/* kernel for the global tonemap plugin: filmic */ 518kernel void 519global_tonemap_filmic (read_only image2d_t in, write_only image2d_t out, const int width, const int height, 520 const float4 parameters) 521{ 522 const int x = get_global_id(0); 523 const int y = get_global_id(1); 524 525 if(x >= width || y >= height) return; 526 527 float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 528 529 float l = pixel.x * 0.01f; 530 float m = fmax(0.0f, l - 0.004f); 531 532 pixel.x = 100.0f * ((m*(6.2f*m+0.5f))/(m*(6.2f*m+1.7f)+0.06f)); 533 534 write_imagef (out, (int2)(x, y), pixel); 535} 536 537 538/* kernels for the colormapping module */ 539#define HISTN (1<<11) 540#define MAXN 5 541 542// inverse distant weighting according to D. Shepard's method; with power parameter 2.0 543void 544get_clusters(const float4 col, const int n, global float2 *mean, float *weight) 545{ 546 float mdist = FLT_MAX; 547 for(int k=0; k<n; k++) 548 { 549 const float dist2 = (col.y-mean[k].x)*(col.y-mean[k].x) + (col.z-mean[k].y)*(col.z-mean[k].y); // dist^2 550 weight[k] = dist2 > 1.0e-6f ? 1.0f/dist2 : -1.0f; // direct hits marked as -1 551 if(dist2 < mdist) mdist = dist2; 552 } 553 if(mdist < 1.0e-6f) for(int k=0; k<n; k++) weight[k] = weight[k] < 0.0f ? 1.0f : 0.0f; // correction in case of direct hits 554 float sum = 0.0f; 555 for(int k=0; k<n; k++) sum += weight[k]; 556 if(sum > 0.0f) for(int k=0; k<n; k++) weight[k] /= sum; 557} 558 559kernel void 560colormapping_histogram (read_only image2d_t in, write_only image2d_t out, const int width, const int height, 561 const float equalization, global int *target_hist, global float *source_ihist) 562{ 563 const int x = get_global_id(0); 564 const int y = get_global_id(1); 565 566 if(x >= width || y >= height) return; 567 568 float L = read_imagef(in, sampleri, (int2)(x, y)).x; 569 570 float dL = 0.5f*((L * (1.0f - equalization) + source_ihist[target_hist[(int)clamp(HISTN*L/100.0f, 0.0f, (float)HISTN-1.0f)]] * equalization) - L) + 50.0f; 571 dL = clamp(dL, 0.0f, 100.0f); 572 573 write_imagef (out, (int2)(x, y), (float4)(dL, 0.0f, 0.0f, 0.0f)); 574} 575 576kernel void 577colormapping_mapping (read_only image2d_t in, read_only image2d_t tmp, write_only image2d_t out, const int width, const int height, 578 const int clusters, global float2 *target_mean, global float2 *source_mean, global float2 *var_ratio, global int *mapio) 579{ 580 const int x = get_global_id(0); 581 const int y = get_global_id(1); 582 583 if(x >= width || y >= height) return; 584 585 float4 ipixel = read_imagef(in, sampleri, (int2)(x, y)); 586 float dL = read_imagef(tmp, sampleri, (int2)(x, y)).x; 587 float weight[MAXN]; 588 float4 opixel = (float4)0.0f; 589 590 opixel.x = 2.0f*(dL - 50.0f) + ipixel.x; 591 opixel.x = clamp(opixel.x, 0.0f, 100.0f); 592 593 get_clusters(ipixel, clusters, target_mean, weight); 594 595 for(int c=0; c < clusters; c++) 596 { 597 opixel.y += weight[c] * ((ipixel.y - target_mean[c].x)*var_ratio[c].x + source_mean[mapio[c]].x); 598 opixel.z += weight[c] * ((ipixel.z - target_mean[c].y)*var_ratio[c].y + source_mean[mapio[c]].y); 599 } 600 opixel.w = ipixel.w; 601 602 write_imagef (out, (int2)(x, y), opixel); 603} 604 605#undef HISTN 606#undef MAXN 607 608 609/* kernel for the colorbalance module */ 610kernel void 611colorbalance (read_only image2d_t in, write_only image2d_t out, const int width, const int height, 612 const float4 lift, const float4 gain, const float4 gamma_inv, const float saturation, const float contrast, const float grey) 613{ 614 const int x = get_global_id(0); 615 const int y = get_global_id(1); 616 617 if(x >= width || y >= height) return; 618 619 float4 Lab = read_imagef(in, sampleri, (int2)(x, y)); 620 float4 sRGB = XYZ_to_sRGB(Lab_to_XYZ(Lab)); 621 622 // Lift gamma gain 623 sRGB = (sRGB <= (float4)0.0031308f) ? 12.92f * sRGB : (1.0f + 0.055f) * native_powr(sRGB, (float4)1.0f/2.4f) - (float4)0.055f; 624 sRGB = native_powr(fmax(((sRGB - (float4)1.0f) * lift + (float4)1.0f) * gain, (float4)0.0f), gamma_inv); 625 sRGB = (sRGB <= (float4)0.04045f) ? sRGB / 12.92f : native_powr((sRGB + (float4)0.055f) / (1.0f + 0.055f), (float4)2.4f); 626 Lab.xyz = XYZ_to_Lab(sRGB_to_XYZ(sRGB)).xyz; 627 628 write_imagef (out, (int2)(x, y), Lab); 629} 630 631kernel void 632colorbalance_lgg (read_only image2d_t in, write_only image2d_t out, const int width, const int height, 633 const float4 lift, const float4 gain, const float4 gamma_inv, const float saturation, const float contrast, const float grey, const float saturation_out) 634{ 635 const int x = get_global_id(0); 636 const int y = get_global_id(1); 637 638 if(x >= width || y >= height) return; 639 640 float4 Lab = read_imagef(in, sampleri, (int2)(x, y)); 641 const float4 XYZ = Lab_to_XYZ(Lab); 642 float4 RGB = XYZ_to_prophotorgb(XYZ); 643 644 // saturation input 645 if (saturation != 1.0f) 646 { 647 const float4 luma = XYZ.y; 648 const float4 saturation4 = saturation; 649 RGB = luma + saturation4 * (RGB - luma); 650 } 651 652 // Lift gamma gain 653 RGB = (RGB <= (float4)0.0f) ? (float4)0.0f : native_powr(RGB, (float4)1.0f/2.2f); 654 RGB = ((RGB - (float4)1.0f) * lift + (float4)1.0f) * gain; 655 RGB = (RGB <= (float4)0.0f) ? (float4)0.0f : native_powr(RGB, gamma_inv * (float4)2.2f); 656 657 // saturation output 658 if (saturation_out != 1.0f) 659 { 660 const float4 luma = prophotorgb_to_XYZ(RGB).y; 661 const float4 saturation_out4 = saturation_out; 662 RGB = luma + saturation_out4 * (RGB - luma); 663 } 664 665 // fulcrum contrast 666 if (contrast != 1.0f) 667 { 668 const float4 contrast4 = contrast; 669 const float4 grey4 = grey; 670 RGB = (RGB <= (float4)0.0f) ? (float4)0.0f : pow(RGB / grey4, contrast4) * grey4; 671 } 672 673 Lab.xyz = prophotorgb_to_Lab(RGB).xyz; 674 675 write_imagef (out, (int2)(x, y), Lab); 676} 677 678kernel void 679colorbalance_cdl (read_only image2d_t in, write_only image2d_t out, const int width, const int height, 680 const float4 lift, const float4 gain, const float4 gamma_inv, const float saturation, const float contrast, const float grey, const float saturation_out) 681{ 682 const int x = get_global_id(0); 683 const int y = get_global_id(1); 684 685 if(x >= width || y >= height) return; 686 687 float4 Lab = read_imagef(in, sampleri, (int2)(x, y)); 688 const float4 XYZ = Lab_to_XYZ(Lab); 689 float4 RGB = XYZ_to_prophotorgb(XYZ); 690 691 // saturation input 692 if (saturation != 1.0f) 693 { 694 const float4 luma = XYZ.y; 695 const float4 saturation4 = saturation; 696 RGB = luma + saturation4 * (RGB - luma); 697 } 698 699 // lift power slope 700 RGB = RGB * gain + lift; 701 RGB = (RGB <= (float4)0.0f) ? (float4)0.0f : native_powr(RGB, gamma_inv); 702 703 // saturation output 704 if (saturation_out != 1.0f) 705 { 706 const float4 luma = prophotorgb_to_XYZ(RGB).y; 707 const float4 saturation_out4 = saturation_out; 708 RGB = luma + saturation_out4 * (RGB - luma); 709 } 710 711 // fulcrum contrast 712 if (contrast != 1.0f) 713 { 714 const float4 contrast4 = contrast; 715 const float4 grey4 = grey; 716 RGB = (RGB <= (float4)0.0f) ? (float4)0.0f : native_powr(RGB / grey4, contrast4) * grey4; 717 } 718 719 Lab.xyz = prophotorgb_to_Lab(RGB).xyz; 720 721 write_imagef (out, (int2)(x, y), Lab); 722} 723 724 725static inline float sqf(const float x) 726{ 727 return x * x; 728} 729 730 731static inline float4 opacity_masks(const float x, 732 const float shadows_weight, const float highlights_weight, 733 const float midtones_weight, const float mask_grey_fulcrum) 734{ 735 float4 output; 736 const float x_offset = (x - mask_grey_fulcrum); 737 const float x_offset_norm = x_offset / mask_grey_fulcrum; 738 const float alpha = 1.f / (1.f + native_exp(x_offset_norm * shadows_weight)); // opacity of shadows 739 const float beta = 1.f / (1.f + native_exp(-x_offset_norm * highlights_weight)); // opacity of highlights 740 const float gamma = native_exp(-sqf(x_offset) * midtones_weight / 4.f) * sqf(1.f - alpha) * sqf(1.f - beta) * 8.f; // opacity of midtones 741 742 output.x = alpha; 743 output.y = gamma; 744 output.z = beta; 745 output.w = 0.f; 746 747 return output; 748} 749 750 751#define LUT_ELEM 360 // gamut LUT number of elements: resolution of 1° 752 753static inline float lookup_gamut(read_only image2d_t gamut_lut, const float x) 754{ 755 // WARNING : x should be between [-pi ; pi ], which is the default output of atan2 anyway 756 757 // convert in LUT coordinate 758 const float x_test = (LUT_ELEM - 1) * (x + M_PI_F) / (2.f * M_PI_F); 759 760 // find the 2 closest integer coordinates (next/previous) 761 float x_prev = floor(x_test); 762 float x_next = ceil(x_test); 763 764 // get the 2 closest LUT elements at integer coordinates 765 // cycle on the hue ring if out of bounds 766 int xi = (int)x_prev; 767 if(xi < 0) xi = LUT_ELEM - 1; 768 else if(xi > LUT_ELEM - 1) xi = 0; 769 770 int xii = (int)x_next; 771 if(xii < 0) xii = LUT_ELEM - 1; 772 else if(xii > LUT_ELEM - 1) xii = 0; 773 774 // fetch the corresponding y values 775 const float y_prev = read_imagef(gamut_lut, sampleri, (int2)(xi, 0)).x; 776 const float y_next = read_imagef(gamut_lut, sampleri, (int2)(xii, 0)).x; 777 778 // assume that we are exactly on an integer LUT element 779 float out = y_prev; 780 781 if(x_next != x_prev) 782 // we are between 2 LUT elements : do linear interpolation 783 // actually, we only add the slope term on the previous one 784 out += (x_test - x_prev) * (y_next - y_prev) / (x_next - x_prev); 785 786 return out; 787} 788 789 790static inline float soft_clip(const float x, const float soft_threshold, const float hard_threshold) 791{ 792 // use an exponential soft clipping above soft_threshold 793 // hard threshold must be > soft threshold 794 const float norm = hard_threshold - soft_threshold; 795 return (x > soft_threshold) ? soft_threshold + (1.f - native_exp(-(x - soft_threshold) / norm)) * norm : x; 796} 797 798 799kernel void 800colorbalancergb (read_only image2d_t in, write_only image2d_t out, 801 const int width, const int height, 802 constant const dt_colorspaces_iccprofile_info_cl_t *const profile_info, 803 constant const float *const matrix_in, constant const float *const matrix_out, 804 read_only image2d_t gamut_lut, 805 const float shadows_weight, const float highlights_weight, const float midtones_weight, const float mask_grey_fulcrum, 806 const float hue_angle, const float chroma_global, const float4 chroma, const float vibrance, 807 const float4 global_offset, const float4 shadows, const float4 highlights, const float4 midtones, 808 const float white_fulcrum, const float midtones_Y, 809 const float grey_fulcrum, const float contrast, 810 const float brilliance_global, const float4 brilliance, 811 const float saturation_global, const float4 saturation, 812 const int mask_display, const int mask_type, const int checker_1, const int checker_2, 813 const float4 checker_color_1, const float4 checker_color_2) 814{ 815 const int x = get_global_id(0); 816 const int y = get_global_id(1); 817 if(x >= width || y >= height) return; 818 const float4 pix_in = read_imagef(in, sampleri, (int2)(x, y)); 819 820 float4 XYZ_D65 = 0.f; 821 float4 LMS = 0.f; 822 float4 RGB = 0.f; 823 float4 Yrg = 0.f; 824 float4 Ych = 0.f; 825 826 // clip pipeline RGB 827 RGB = fmax(pix_in, 0.f); 828 829 // go to CIE 2006 LMS D65 830 LMS = matrix_product_float4(RGB, matrix_in); 831 832 // go to Filmlight Yrg 833 Yrg = LMS_to_Yrg(LMS); 834 835 // go to Ych 836 Ych = Yrg_to_Ych(Yrg); 837 838 // Sanitize input : no negative luminance 839 Ych.x = fmax(Ych.x, 0.f); 840 const float4 opacities = opacity_masks(native_powr(Ych.x, 0.4101205819200422f), // center middle grey in 50 % 841 shadows_weight, highlights_weight, midtones_weight, mask_grey_fulcrum); 842 const float4 opacities_comp = (float4)1.f - opacities; 843 844 // Hue shift - do it now because we need the gamut limit at output hue right after 845 Ych.z += hue_angle; 846 847 // Ensure hue ± correction is in [-PI; PI] 848 if(Ych.z > M_PI_F) Ych.z -= 2.f * M_PI_F; 849 else if(Ych.z < -M_PI_F) Ych.z += 2.f * M_PI_F; 850 851 // Linear chroma : distance to achromatic at constant luminance in scene-referred 852 const float chroma_boost = chroma_global + dot(opacities, chroma); 853 const float vib = vibrance * (1.0f - native_powr(Ych.y, fabs(vibrance))); 854 const float chroma_factor = fmax(1.f + chroma_boost + vib, 0.f); 855 Ych.y *= chroma_factor; 856 857 // Do a test conversion to Yrg 858 Yrg = Ych_to_Yrg(Ych); 859 860 // Gamut-clip in Yrg at constant hue and luminance 861 // e.g. find the max chroma value that fits in gamut at the current hue 862 const float D65[4] = { 0.21962576f, 0.54487092f, 0.23550333f, 0.f }; 863 float max_c = Ych.y; 864 const float cos_h = native_cos(Ych.z); 865 const float sin_h = native_sin(Ych.z); 866 867 if(Yrg.y < 0.f) 868 { 869 max_c = fmin(-D65[0] / cos_h, max_c); 870 } 871 if(Yrg.z < 0.f) 872 { 873 max_c = fmin(-D65[1] / sin_h, max_c); 874 } 875 if(Yrg.y + Yrg.z > 1.f) 876 { 877 max_c = fmin((1.f - D65[0] - D65[1]) / (cos_h + sin_h), max_c); 878 } 879 880 // Overwrite chroma with the sanitized value and go to Yrg for real 881 Ych.y = max_c; 882 Yrg = Ych_to_Yrg(Ych); 883 884 // Go to LMS 885 LMS = Yrg_to_LMS(Yrg); 886 887 // Go to Filmlight RGB 888 RGB = LMS_to_gradingRGB(LMS); 889 890 // Color balance 891 892 // global : offset 893 RGB += global_offset; 894 895 // highlights, shadows : 2 slopes with masking 896 RGB *= opacities_comp.z * (opacities_comp.x + opacities.x * shadows) + opacities.z * highlights; 897 // factorization of : (RGB[c] * (1.f - alpha) + RGB[c] * d->shadows[c] * alpha) * (1.f - beta) + RGB[c] * d->highlights[c] * beta; 898 899 // midtones : power with sign preservation 900 RGB = sign(RGB) * native_powr(fabs(RGB) / white_fulcrum, midtones) * white_fulcrum; 901 902 // for the non-linear ops we need to go in Yrg again because RGB doesn't preserve color 903 LMS = gradingRGB_to_LMS(RGB); 904 Yrg = LMS_to_Yrg(LMS); 905 906 // Y midtones power (gamma) 907 Yrg.x = native_powr(fmax(Yrg.x / white_fulcrum, 0.f), midtones_Y) * white_fulcrum; 908 909 // Y fulcrumed contrast 910 Yrg.x = grey_fulcrum * native_powr(Yrg.x / grey_fulcrum, contrast); 911 912 LMS = Yrg_to_LMS(Yrg); 913 XYZ_D65 = LMS_to_XYZ(LMS); 914 915 // Perceptual color adjustments 916 917 // Go to JzAzBz for perceptual saturation 918 float4 Jab = XYZ_to_JzAzBz(XYZ_D65); 919 920 // Convert to JCh 921 float JC[2] = { Jab.x, hypot(Jab.y, Jab.z) }; // brightness/chroma vector 922 const float h = atan2(Jab.z, Jab.y); // hue : (a, b) angle 923 924 // Project JC onto S, the saturation eigenvector, with orthogonal vector O. 925 // Note : O should be = (C * cosf(T) - J * sinf(T)) = 0 since S is the eigenvector, 926 // so we add the chroma projected along the orthogonal axis to get some control value 927 const float T = atan2(JC[1], JC[0]); // angle of the eigenvector over the hue plane 928 const float sin_T = native_sin(T); 929 const float cos_T = native_cos(T); 930 const float M_rot_dir[2][2] = { { cos_T, sin_T }, 931 { -sin_T, cos_T } }; 932 const float M_rot_inv[2][2] = { { cos_T, -sin_T }, 933 { sin_T, cos_T } }; 934 float SO[2]; 935 936 // brilliance & Saturation : mix of chroma and luminance 937 const float boosts[2] = { 1.f + brilliance_global + dot(opacities, brilliance), // move in S direction 938 saturation_global + dot(opacities, saturation) }; // move in O direction 939 940 SO[0] = JC[0] * M_rot_dir[0][0] + JC[1] * M_rot_dir[0][1]; 941 SO[1] = SO[0] * clamp(T * boosts[1], -T, M_PI_F / 2.f - T); 942 SO[0] = fmax(SO[0] * boosts[0], 0.f); 943 944 // Project back to JCh, that is rotate back of -T angle 945 JC[0] = fmax(SO[0] * M_rot_inv[0][0] + SO[1] * M_rot_inv[0][1], 0.f); 946 JC[1] = fmax(SO[0] * M_rot_inv[1][0] + SO[1] * M_rot_inv[1][1], 0.f); 947 948 // Gamut mapping 949 const float out_max_sat_h = lookup_gamut(gamut_lut, h); 950 float sat = (JC[0] > 0.f) ? JC[1] / JC[0] : 0.f; 951 sat = soft_clip(sat, 0.8f * out_max_sat_h, out_max_sat_h); 952 const float max_C_at_sat = JC[0] * sat; 953 const float max_J_at_sat = (sat > 0.f) ? JC[1] / sat : 0.f; 954 JC[0] = (JC[0] + max_J_at_sat) / 2.f; 955 JC[1] = (JC[1] + max_C_at_sat) / 2.f; 956 957 // Gamut-clip in Jch at constant hue and lightness, 958 // e.g. find the max chroma available at current hue that doesn't 959 // yield negative L'M'S' values, which will need to be clipped during conversion 960 const float cos_H = native_cos(h); 961 const float sin_H = native_sin(h); 962 963 const float d0 = 1.6295499532821566e-11f; 964 const float d = -0.56f; 965 float Iz = JC[0] + d0; 966 Iz /= (1.f + d - d * Iz); 967 Iz = fmax(Iz, 0.f); 968 969 const float4 AI[3] = { { 1.0f, 0.1386050432715393f, 0.0580473161561189f, 0.0f }, 970 { 1.0f, -0.1386050432715393f, -0.0580473161561189f, 0.0f }, 971 { 1.0f, -0.0960192420263190f, -0.8118918960560390f, 0.0f } }; 972 973 // Do a test conversion to L'M'S' 974 const float4 IzAzBz = { Iz, JC[1] * cos_H, JC[1] * sin_H, 0.f }; 975 LMS.x = dot(AI[0], IzAzBz); 976 LMS.y = dot(AI[1], IzAzBz); 977 LMS.z = dot(AI[2], IzAzBz); 978 979 // Clip chroma 980 float max_C = JC[1]; 981 if(LMS.x < 0.f) 982 max_C = fmin(-Iz / (AI[0].y * cos_H + AI[0].z * sin_H), max_C); 983 984 if(LMS.y < 0.f) 985 max_C = fmin(-Iz / (AI[1].y * cos_H + AI[1].z * sin_H), max_C); 986 987 if(LMS.z < 0.f) 988 max_C = fmin(-Iz / (AI[2].y * cos_H + AI[2].z * sin_H), max_C); 989 990 // Project back to JzAzBz for real 991 Jab.x = JC[0]; 992 Jab.y = max_C * cos_H; 993 Jab.z = max_C * sin_H; 994 995 XYZ_D65 = JzAzBz_2_XYZ(Jab); 996 997 // Project back to D50 pipeline RGB 998 RGB = matrix_product_float4(XYZ_D65, matrix_out); 999 1000 if(mask_display) 1001 { 1002 // draw checkerboard 1003 float4 color; 1004 if(x % checker_1 < x % checker_2) 1005 { 1006 if(y % checker_1 < y % checker_2) color = checker_color_2; 1007 else color = checker_color_1; 1008 } 1009 else 1010 { 1011 if(y % checker_1 < y % checker_2) color = checker_color_1; 1012 else color = checker_color_2; 1013 } 1014 const float *op = (const float *)&opacities; 1015 float opacity = op[mask_type]; 1016 const float opacity_comp = 1.0f - opacity; 1017 1018 RGB = opacity_comp * color + opacity * fmax(RGB, 0.f); 1019 RGB.w = 1.0f; // alpha is opaque, we need to preview it 1020 } 1021 else 1022 { 1023 RGB = fmax(RGB, 0.f); 1024 RGB.w = pix_in.w; // alpha copy 1025 } 1026 1027 write_imagef (out, (int2)(x, y), RGB); 1028} 1029 1030 1031/* helpers and kernel for the colorchecker module */ 1032float fastlog2(float x) 1033{ 1034 union { float f; unsigned int i; } vx = { x }; 1035 union { unsigned int i; float f; } mx = { (vx.i & 0x007FFFFF) | 0x3f000000 }; 1036 1037 float y = vx.i; 1038 1039 y *= 1.1920928955078125e-7f; 1040 1041 return y - 124.22551499f 1042 - 1.498030302f * mx.f 1043 - 1.72587999f / (0.3520887068f + mx.f); 1044} 1045 1046float fastlog(float x) 1047{ 1048 return 0.69314718f * fastlog2(x); 1049} 1050 1051float thinplate(const float4 x, const float4 y) 1052{ 1053 const float r2 = 1054 (x.x - y.x) * (x.x - y.x) + 1055 (x.y - y.y) * (x.y - y.y) + 1056 (x.z - y.z) * (x.z - y.z); 1057 1058 return r2 * fastlog(max(1e-8f, r2)); 1059} 1060 1061kernel void 1062colorchecker (read_only image2d_t in, write_only image2d_t out, const int width, const int height, 1063 const int num_patches, global float4 *params) 1064{ 1065 const int x = get_global_id(0); 1066 const int y = get_global_id(1); 1067 1068 if(x >= width || y >= height) return; 1069 1070 global float4 *source_Lab = params; 1071 global float4 *coeff_Lab = params + num_patches; 1072 global float4 *poly_Lab = params + 2 * num_patches; 1073 1074 float4 ipixel = read_imagef(in, sampleri, (int2)(x, y)); 1075 1076 const float w = ipixel.w; 1077 1078 float4 opixel = poly_Lab[0] + poly_Lab[1] * ipixel.x + poly_Lab[2] * ipixel.y + poly_Lab[3] * ipixel.z; 1079 1080 for(int k = 0; k < num_patches; k++) 1081 { 1082 const float phi = thinplate(ipixel, source_Lab[k]); 1083 opixel += coeff_Lab[k] * phi; 1084 } 1085 1086 opixel.w = w; 1087 1088 write_imagef (out, (int2)(x, y), opixel); 1089} 1090