1/* 2 This file is part of darktable, 3 copyright (c) 2009--2013 johannes hanika. 4 copyright (c) 2014 Ulrich Pegelow. 5 copyright (c) 2014 LebedevRI. 6 7 darktable is free software: you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation, either version 3 of the License, or 10 (at your option) any later version. 11 12 darktable is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with darktable. If not, see <http://www.gnu.org/licenses/>. 19*/ 20 21#include "colorspace.h" 22#include "color_conversion.h" 23#include "common.h" 24#include "rgb_norms.h" 25 26int 27BL(const int row, const int col) 28{ 29 return (((row & 1) << 1) + (col & 1)); 30} 31 32kernel void 33rawprepare_1f(read_only image2d_t in, write_only image2d_t out, 34 const int width, const int height, 35 const int cx, const int cy, 36 global const float *sub, global const float *div, 37 const int rx, const int ry) 38{ 39 const int x = get_global_id(0); 40 const int y = get_global_id(1); 41 42 if(x >= width || y >= height) return; 43 44 const float pixel = read_imageui(in, sampleri, (int2)(x + cx, y + cy)).x; 45 46 const int id = BL(ry+cy+y, rx+cx+x); 47 const float pixel_scaled = (pixel - sub[id]) / div[id]; 48 49 write_imagef(out, (int2)(x, y), (float4)(pixel_scaled, 0.0f, 0.0f, 0.0f)); 50} 51 52kernel void 53rawprepare_1f_unnormalized(read_only image2d_t in, write_only image2d_t out, 54 const int width, const int height, 55 const int cx, const int cy, 56 global const float *sub, global const float *div, 57 const int rx, const int ry) 58{ 59 const int x = get_global_id(0); 60 const int y = get_global_id(1); 61 62 if(x >= width || y >= height) return; 63 64 const float pixel = read_imagef(in, sampleri, (int2)(x + cx, y + cy)).x; 65 66 const int id = BL(ry+cy+y, rx+cx+x); 67 const float pixel_scaled = (pixel - sub[id]) / div[id]; 68 69 write_imagef(out, (int2)(x, y), (float4)(pixel_scaled, 0.0f, 0.0f, 0.0f)); 70} 71 72kernel void 73rawprepare_4f(read_only image2d_t in, write_only image2d_t out, 74 const int width, const int height, 75 const int cx, const int cy, 76 global const float *black, global const float *div) 77{ 78 const int x = get_global_id(0); 79 const int y = get_global_id(1); 80 81 if(x >= width || y >= height) return; 82 83 float4 pixel = read_imagef(in, sampleri, (int2)(x + cx, y + cy)); 84 pixel.xyz = (pixel.xyz - black[0]) / div[0]; 85 86 write_imagef(out, (int2)(x, y), pixel); 87} 88 89kernel void 90invert_1f(read_only image2d_t in, write_only image2d_t out, const int width, const int height, global float *color, 91 const unsigned int filters, const int rx, const int ry) 92{ 93 const int x = get_global_id(0); 94 const int y = get_global_id(1); 95 if(x >= width || y >= height) return; 96 const float pixel = read_imagef(in, sampleri, (int2)(x, y)).x; 97 const float inv_pixel = color[FC(ry+y, rx+x, filters)] - pixel; 98 99 write_imagef (out, (int2)(x, y), (float4)(clamp(inv_pixel, 0.0f, 1.0f), 0.0f, 0.0f, 0.0f)); 100} 101 102kernel void 103invert_4f(read_only image2d_t in, write_only image2d_t out, const int width, const int height, global float *color, 104 const unsigned int filters, const int rx, const int ry) 105{ 106 const int x = get_global_id(0); 107 const int y = get_global_id(1); 108 if(x >= width || y >= height) return; 109 float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 110 pixel.x = color[0] - pixel.x; 111 pixel.y = color[1] - pixel.y; 112 pixel.z = color[2] - pixel.z; 113 pixel.xyz = clamp(pixel.xyz, 0.0f, 1.0f); 114 115 write_imagef (out, (int2)(x, y), pixel); 116} 117 118kernel void 119whitebalance_1f(read_only image2d_t in, write_only image2d_t out, const int width, const int height, global float *coeffs, 120 const unsigned int filters, const int rx, const int ry, global const unsigned char (*const xtrans)[6]) 121{ 122 const int x = get_global_id(0); 123 const int y = get_global_id(1); 124 if(x >= width || y >= height) return; 125 const float pixel = read_imagef(in, sampleri, (int2)(x, y)).x; 126 write_imagef (out, (int2)(x, y), (float4)(pixel * coeffs[FC(ry+y, rx+x, filters)], 0.0f, 0.0f, 0.0f)); 127} 128 129kernel void 130whitebalance_1f_xtrans(read_only image2d_t in, write_only image2d_t out, const int width, const int height, global float *coeffs, 131 const unsigned int filters, const int rx, const int ry, global const unsigned char (*const xtrans)[6]) 132{ 133 const int x = get_global_id(0); 134 const int y = get_global_id(1); 135 if(x >= width || y >= height) return; 136 const float pixel = read_imagef(in, sampleri, (int2)(x, y)).x; 137 write_imagef (out, (int2)(x, y), (float4)(pixel * coeffs[FCxtrans(ry+y, rx+x, xtrans)], 0.0f, 0.0f, 0.0f)); 138} 139 140 141kernel void 142whitebalance_4f(read_only image2d_t in, write_only image2d_t out, const int width, const int height, global float *coeffs, 143 const unsigned int filters, const int rx, const int ry, global const unsigned char (*const xtrans)[6]) 144{ 145 const int x = get_global_id(0); 146 const int y = get_global_id(1); 147 if(x >= width || y >= height) return; 148 const float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 149 write_imagef (out, (int2)(x, y), (float4)(pixel.x * coeffs[0], pixel.y * coeffs[1], pixel.z * coeffs[2], pixel.w)); 150} 151 152/* kernel for the exposure plugin. should work transparently with float4 and float image2d. */ 153kernel void 154exposure (read_only image2d_t in, write_only image2d_t out, const int width, const int height, const float black, const float scale) 155{ 156 const int x = get_global_id(0); 157 const int y = get_global_id(1); 158 159 if(x >= width || y >= height) return; 160 float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 161 pixel.xyz = ((pixel - black ) * scale).xyz; 162 write_imagef (out, (int2)(x, y), pixel); 163} 164 165/* kernel for the highlights plugin. */ 166kernel void 167highlights_4f_clip (read_only image2d_t in, write_only image2d_t out, const int width, const int height, 168 const int mode, const float clip) 169{ 170 const int x = get_global_id(0); 171 const int y = get_global_id(1); 172 173 if(x >= width || y >= height) return; 174 175 // 4f/pixel means that this has been debayered already. 176 // it's thus hopeless to recover highlights here (this code path is just used for preview and non-raw images) 177 float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 178 // default: // 0, DT_IOP_HIGHLIGHTS_CLIP 179 pixel.x = fmin(clip, pixel.x); 180 pixel.y = fmin(clip, pixel.y); 181 pixel.z = fmin(clip, pixel.z); 182 write_imagef (out, (int2)(x, y), pixel); 183} 184 185kernel void 186highlights_1f_clip (read_only image2d_t in, write_only image2d_t out, const int width, const int height, 187 const float clip, const int rx, const int ry, const int filters) 188{ 189 const int x = get_global_id(0); 190 const int y = get_global_id(1); 191 192 if(x >= width || y >= height) return; 193 194 float pixel = read_imagef(in, sampleri, (int2)(x, y)).x; 195 196 pixel = fmin(clip, pixel); 197 198 write_imagef (out, (int2)(x, y), pixel); 199} 200 201#define SQRT3 1.7320508075688772935274463415058723669f 202#define SQRT12 3.4641016151377545870548926830117447339f // 2*SQRT3 203kernel void 204highlights_1f_lch_bayer (read_only image2d_t in, write_only image2d_t out, const int width, const int height, 205 const float clip, const int rx, const int ry, const int filters) 206{ 207 const int x = get_global_id(0); 208 const int y = get_global_id(1); 209 210 if(x >= width || y >= height) return; 211 212 int clipped = 0; 213 float R = 0.0f; 214 float Gmin = FLT_MAX; 215 float Gmax = -FLT_MAX; 216 float B = 0.0f; 217 float pixel = 0.0f; 218 219 // sample 1 bayer block. thus we will have 2 green values. 220 for(int jj = 0; jj <= 1; jj++) 221 { 222 for(int ii = 0; ii <= 1; ii++) 223 { 224 const float val = read_imagef(in, sampleri, (int2)(x+ii, y+jj)).x; 225 226 pixel = (ii == 0 && jj == 0) ? val : pixel; 227 228 clipped = (clipped || (val > clip)); 229 230 const int c = FC(y + jj + ry, x + ii + rx, filters); 231 232 switch(c) 233 { 234 case 0: 235 R = val; 236 break; 237 case 1: 238 Gmin = min(Gmin, val); 239 Gmax = max(Gmax, val); 240 break; 241 case 2: 242 B = val; 243 break; 244 } 245 } 246 } 247 248 if(clipped) 249 { 250 const float Ro = min(R, clip); 251 const float Go = min(Gmin, clip); 252 const float Bo = min(B, clip); 253 254 const float L = (R + Gmax + B) / 3.0f; 255 256 float C = SQRT3 * (R - Gmax); 257 float H = 2.0f * B - Gmax - R; 258 259 const float Co = SQRT3 * (Ro - Go); 260 const float Ho = 2.0f * Bo - Go - Ro; 261 262 const float ratio = (R != Gmax && Gmax != B) ? sqrt((Co * Co + Ho * Ho) / (C * C + H * H)) : 1.0f; 263 264 C *= ratio; 265 H *= ratio; 266 267 /* 268 * backtransform proof, sage: 269 * 270 * R,G,B,L,C,H = var('R,G,B,L,C,H') 271 * solve([L==(R+G+B)/3, C==sqrt(3)*(R-G), H==2*B-G-R], R, G, B) 272 * 273 * result: 274 * [[R == 1/6*sqrt(3)*C - 1/6*H + L, G == -1/6*sqrt(3)*C - 1/6*H + L, B == 1/3*H + L]] 275 */ 276 const int c = FC(y + ry, x + rx, filters); 277 C = (c == 1) ? -C : C; 278 279 pixel = L; 280 pixel += (c == 2) ? H / 3.0f : -H / 6.0f + C / SQRT12; 281 } 282 283 write_imagef (out, (int2)(x, y), pixel); 284} 285 286 287kernel void 288highlights_1f_lch_xtrans (read_only image2d_t in, write_only image2d_t out, const int width, const int height, 289 const float clip, const int rx, const int ry, global const unsigned char (*const xtrans)[6], 290 local float *buffer) 291{ 292 const int x = get_global_id(0); 293 const int y = get_global_id(1); 294 const int xlsz = get_local_size(0); 295 const int ylsz = get_local_size(1); 296 const int xlid = get_local_id(0); 297 const int ylid = get_local_id(1); 298 const int xgid = get_group_id(0); 299 const int ygid = get_group_id(1); 300 301 // individual control variable in this work group and the work group size 302 const int l = mad24(ylid, xlsz, xlid); 303 const int lsz = mul24(xlsz, ylsz); 304 305 // stride and maximum capacity of local buffer 306 // cells of 1*float per pixel with a surrounding border of 2 cells 307 const int stride = xlsz + 2*2; 308 const int maxbuf = mul24(stride, ylsz + 2*2); 309 310 // coordinates of top left pixel of buffer 311 // this is 2 pixel left and above of the work group origin 312 const int xul = mul24(xgid, xlsz) - 2; 313 const int yul = mul24(ygid, ylsz) - 2; 314 315 // populate local memory buffer 316 for(int n = 0; n <= maxbuf/lsz; n++) 317 { 318 const int bufidx = mad24(n, lsz, l); 319 if(bufidx >= maxbuf) continue; 320 const int xx = xul + bufidx % stride; 321 const int yy = yul + bufidx / stride; 322 buffer[bufidx] = read_imagef(in, sampleri, (int2)(xx, yy)).x; 323 } 324 325 // center buffer around current x,y-Pixel 326 buffer += mad24(ylid + 2, stride, xlid + 2); 327 328 barrier(CLK_LOCAL_MEM_FENCE); 329 330 if(x >= width || y >= height) return; 331 332 float pixel = 0.0f; 333 334 if(x < 2 || x > width - 3 || y < 2 || y > height - 3) 335 { 336 // fast path for border 337 pixel = min(clip, buffer[0]); 338 } 339 else 340 { 341 // if current pixel is clipped, always reconstruct 342 int clipped = (buffer[0] > clip); 343 344 if(!clipped) 345 { 346 clipped = 1; 347 // check if there is any 3x3 block touching the current 348 // pixel which has no clipping, as then we don't need to 349 // reconstruct the current pixel. This avoids zippering in 350 // edge transitions from clipped to unclipped areas. The 351 // X-Trans sensor seems prone to this, unlike Bayer, due 352 // to its irregular pattern. 353 for(int offset_j = -2; offset_j <= 0; offset_j++) 354 { 355 for(int offset_i = -2; offset_i <= 0; offset_i++) 356 { 357 if(clipped) 358 { 359 clipped = 0; 360 for(int jj = offset_j; jj <= offset_j + 2; jj++) 361 { 362 for(int ii = offset_i; ii <= offset_i + 2; ii++) 363 { 364 const float val = buffer[mad24(jj, stride, ii)]; 365 clipped = (clipped || (val > clip)); 366 } 367 } 368 } 369 } 370 } 371 } 372 373 if(clipped) 374 { 375 float mean[3] = { 0.0f, 0.0f, 0.0f }; 376 int cnt[3] = { 0, 0, 0 }; 377 float RGBmax[3] = { -FLT_MAX, -FLT_MAX, -FLT_MAX }; 378 379 for(int jj = -1; jj <= 1; jj++) 380 { 381 for(int ii = -1; ii <= 1; ii++) 382 { 383 const float val = buffer[mad24(jj, stride, ii)]; 384 const int c = FCxtrans(y + jj + ry, x + ii + rx, xtrans); 385 mean[c] += val; 386 cnt[c]++; 387 RGBmax[c] = max(RGBmax[c], val); 388 } 389 } 390 391 const float Ro = min(mean[0]/cnt[0], clip); 392 const float Go = min(mean[1]/cnt[1], clip); 393 const float Bo = min(mean[2]/cnt[2], clip); 394 395 const float R = RGBmax[0]; 396 const float G = RGBmax[1]; 397 const float B = RGBmax[2]; 398 399 const float L = (R + G + B) / 3.0f; 400 float C = SQRT3 * (R - G); 401 float H = 2.0f * B - G - R; 402 403 const float Co = SQRT3 * (Ro - Go); 404 const float Ho = 2.0f * Bo - Go - Ro; 405 406 if(R != G && G != B) 407 { 408 const float ratio = sqrt((Co * Co + Ho * Ho) / (C * C + H * H)); 409 C *= ratio; 410 H *= ratio; 411 } 412 413 float RGB[3] = { 0.0f, 0.0f, 0.0f }; 414 415 RGB[0] = L - H / 6.0f + C / SQRT12; 416 RGB[1] = L - H / 6.0f - C / SQRT12; 417 RGB[2] = L + H / 3.0f; 418 419 pixel = RGB[FCxtrans(y + ry, x + rx, xtrans)]; 420 } 421 else 422 pixel = buffer[0]; 423 } 424 425 write_imagef (out, (int2)(x, y), pixel); 426} 427#undef SQRT3 428#undef SQRT12 429 430float 431lookup_unbounded_twosided(read_only image2d_t lut, const float x, constant float *a) 432{ 433 // in case the tone curve is marked as linear, return the fast 434 // path to linear unbounded (does not clip x at 1) 435 if(a[0] >= 0.0f) 436 { 437 const float ar = 1.0f/a[0]; 438 const float al = 1.0f - 1.0f/a[3]; 439 if(x < ar && x >= al) 440 { 441 // lut lookup 442 const int xi = clamp((int)(x * 0x10000ul), 0, 0xffff); 443 const int2 p = (int2)((xi & 0xff), (xi >> 8)); 444 return read_imagef(lut, sampleri, p).x; 445 } 446 else 447 { 448 // two-sided extrapolation (with inverted x-axis for left side) 449 const float xx = (x >= ar) ? x : 1.0f - x; 450 constant float *aa = (x >= ar) ? a : a + 3; 451 return aa[1] * native_powr(xx*aa[0], aa[2]); 452 } 453 } 454 else return x; 455} 456 457float 458lerp_lookup_unbounded0(read_only image2d_t lut, const float x, global const float *a) 459{ 460 // in case the tone curve is marked as linear, return the fast 461 // path to linear unbounded (does not clip x at 1) 462 if(a[0] >= 0.0f) 463 { 464 if(x < 1.0f) 465 { 466 const float ft = clamp(x * (float)0xffff, 0.0f, (float)0xffff); 467 const int t = ft < 0xfffe ? ft : 0xfffe; 468 const float f = ft - t; 469 const int2 p1 = (int2)((t & 0xff), (t >> 8)); 470 const int2 p2 = (int2)(((t + 1) & 0xff), ((t + 1) >> 8)); 471 const float l1 = read_imagef(lut, sampleri, p1).x; 472 const float l2 = read_imagef(lut, sampleri, p2).x; 473 return l1 * (1.0f - f) + l2 * f; 474 } 475 else return a[1] * native_powr(x*a[0], a[2]); 476 } 477 else return x; 478} 479 480 481/* kernel for the plugin colorin: unbound processing */ 482kernel void 483colorin_unbound (read_only image2d_t in, write_only image2d_t out, const int width, const int height, 484 global float *cmat, global float *lmat, 485 read_only image2d_t lutr, read_only image2d_t lutg, read_only image2d_t lutb, 486 const int blue_mapping, global const float (*const a)[3]) 487{ 488 const int x = get_global_id(0); 489 const int y = get_global_id(1); 490 491 if(x >= width || y >= height) return; 492 493 float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 494 495 float cam[3], XYZ[3]; 496 cam[0] = lerp_lookup_unbounded0(lutr, pixel.x, a[0]); 497 cam[1] = lerp_lookup_unbounded0(lutg, pixel.y, a[1]); 498 cam[2] = lerp_lookup_unbounded0(lutb, pixel.z, a[2]); 499 500 if(blue_mapping) 501 { 502 const float YY = cam[0] + cam[1] + cam[2]; 503 if(YY > 0.0f) 504 { 505 // manual gamut mapping. these values cause trouble when converting back from Lab to sRGB: 506 const float zz = cam[2] / YY; 507 // lower amount and higher bound_z make the effect smaller. 508 // the effect is weakened the darker input values are, saturating at bound_Y 509 const float bound_z = 0.5f, bound_Y = 0.8f; 510 const float amount = 0.11f; 511 if (zz > bound_z) 512 { 513 const float t = (zz - bound_z) / (1.0f - bound_z) * fmin(1.0f, YY / bound_Y); 514 cam[1] += t * amount; 515 cam[2] -= t * amount; 516 } 517 } 518 } 519 520 // now convert camera to XYZ using the color matrix 521 for(int j=0;j<3;j++) 522 { 523 XYZ[j] = 0.0f; 524 for(int i=0;i<3;i++) XYZ[j] += cmat[3*j+i] * cam[i]; 525 } 526 float4 xyz = (float4)(XYZ[0], XYZ[1], XYZ[2], 0.0f); 527 pixel.xyz = XYZ_to_Lab(xyz).xyz; 528 write_imagef (out, (int2)(x, y), pixel); 529} 530 531/* kernel for the plugin colorin: with clipping */ 532kernel void 533colorin_clipping (read_only image2d_t in, write_only image2d_t out, const int width, const int height, 534 global float *cmat, global float *lmat, 535 read_only image2d_t lutr, read_only image2d_t lutg, read_only image2d_t lutb, 536 const int blue_mapping, global const float (*const a)[3]) 537{ 538 const int x = get_global_id(0); 539 const int y = get_global_id(1); 540 541 if(x >= width || y >= height) return; 542 543 float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 544 545 float cam[3], RGB[3], XYZ[3]; 546 cam[0] = lerp_lookup_unbounded0(lutr, pixel.x, a[0]); 547 cam[1] = lerp_lookup_unbounded0(lutg, pixel.y, a[1]); 548 cam[2] = lerp_lookup_unbounded0(lutb, pixel.z, a[2]); 549 550 if(blue_mapping) 551 { 552 const float YY = cam[0] + cam[1] + cam[2]; 553 if(YY > 0.0f) 554 { 555 // manual gamut mapping. these values cause trouble when converting back from Lab to sRGB: 556 const float zz = cam[2] / YY; 557 // lower amount and higher bound_z make the effect smaller. 558 // the effect is weakened the darker input values are, saturating at bound_Y 559 const float bound_z = 0.5f, bound_Y = 0.8f; 560 const float amount = 0.11f; 561 if (zz > bound_z) 562 { 563 const float t = (zz - bound_z) / (1.0f - bound_z) * fmin(1.0f, YY / bound_Y); 564 cam[1] += t * amount; 565 cam[2] -= t * amount; 566 } 567 } 568 } 569 570 // convert camera to RGB using the first color matrix 571 for(int j=0;j<3;j++) 572 { 573 RGB[j] = 0.0f; 574 for(int i=0;i<3;i++) RGB[j] += cmat[3*j+i] * cam[i]; 575 } 576 577 // clamp at this stage 578 for(int i=0; i<3; i++) RGB[i] = clamp(RGB[i], 0.0f, 1.0f); 579 580 // convert clipped RGB to XYZ 581 for(int j=0;j<3;j++) 582 { 583 XYZ[j] = 0.0f; 584 for(int i=0;i<3;i++) XYZ[j] += lmat[3*j+i] * RGB[i]; 585 } 586 587 float4 xyz = (float4)(XYZ[0], XYZ[1], XYZ[2], 0.0f); 588 pixel.xyz = XYZ_to_Lab(xyz).xyz; 589 write_imagef (out, (int2)(x, y), pixel); 590} 591 592/* kernel for the tonecurve plugin. */ 593kernel void 594tonecurve (read_only image2d_t in, write_only image2d_t out, const int width, const int height, 595 read_only image2d_t table_L, read_only image2d_t table_a, read_only image2d_t table_b, 596 const int autoscale_ab, const int unbound_ab, constant float *coeffs_L, constant float *coeffs_ab, 597 const float low_approximation, const int preserve_colors, 598 constant dt_colorspaces_iccprofile_info_cl_t *profile_info, read_only image2d_t lut) 599{ 600 const int x = get_global_id(0); 601 const int y = get_global_id(1); 602 603 if(x >= width || y >= height) return; 604 605 float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 606 const float L_in = pixel.x/100.0f; 607 // use lut or extrapolation: 608 const float L = lookup_unbounded(table_L, L_in, coeffs_L); 609 610 if (autoscale_ab == 0) 611 { 612 const float a_in = (pixel.y + 128.0f) / 256.0f; 613 const float b_in = (pixel.z + 128.0f) / 256.0f; 614 615 if (unbound_ab == 0) 616 { 617 pixel.y = lookup(table_a, a_in); 618 pixel.z = lookup(table_b, b_in); 619 } 620 else 621 { 622 // use lut or two-sided extrapolation 623 pixel.y = lookup_unbounded_twosided(table_a, a_in, coeffs_ab); 624 pixel.z = lookup_unbounded_twosided(table_b, b_in, coeffs_ab + 6); 625 } 626 pixel.x = L; 627 } 628 else if(autoscale_ab == 1) 629 { 630 if(L_in > 0.01f) 631 { 632 pixel.y *= L/pixel.x; 633 pixel.z *= L/pixel.x; 634 } 635 else 636 { 637 pixel.y *= low_approximation; 638 pixel.z *= low_approximation; 639 } 640 pixel.x = L; 641 } 642 else if(autoscale_ab == 2) 643 { 644 float4 xyz = Lab_to_XYZ(pixel); 645 xyz.x = lookup_unbounded(table_L, xyz.x, coeffs_L); 646 xyz.y = lookup_unbounded(table_L, xyz.y, coeffs_L); 647 xyz.z = lookup_unbounded(table_L, xyz.z, coeffs_L); 648 pixel.xyz = XYZ_to_Lab(xyz).xyz; 649 } 650 else if(autoscale_ab == 3) 651 { 652 float4 rgb = Lab_to_prophotorgb(pixel); 653 654 if (preserve_colors == DT_RGB_NORM_NONE) 655 { 656 rgb.x = lookup_unbounded(table_L, rgb.x, coeffs_L); 657 rgb.y = lookup_unbounded(table_L, rgb.y, coeffs_L); 658 rgb.z = lookup_unbounded(table_L, rgb.z, coeffs_L); 659 } 660 else 661 { 662 float ratio = 1.f; 663 float lum = dt_rgb_norm(rgb, preserve_colors, 1, profile_info, lut); 664 if(lum > 0.f) 665 { 666 const float curve_lum = lookup_unbounded(table_L, lum, coeffs_L); 667 ratio = curve_lum / lum; 668 } 669 rgb.xyz *= ratio; 670 } 671 pixel.xyz = prophotorgb_to_Lab(rgb).xyz; 672 } 673 674 write_imagef (out, (int2)(x, y), pixel); 675} 676 677 678/* kernel for the colorcorrection plugin. */ 679__kernel void 680colorcorrection (read_only image2d_t in, write_only image2d_t out, const int width, const int height, 681 const float saturation, const float a_scale, const float a_base, 682 const float b_scale, const float b_base) 683{ 684 const int x = get_global_id(0); 685 const int y = get_global_id(1); 686 687 if(x >= width || y >= height) return; 688 689 float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 690 pixel.y = saturation*(pixel.y + pixel.x * a_scale + a_base); 691 pixel.z = saturation*(pixel.z + pixel.x * b_scale + b_base); 692 write_imagef (out, (int2)(x, y), pixel); 693} 694 695 696void 697mul_mat_vec_2(const float4 m, const float2 *p, float2 *o) 698{ 699 (*o).x = (*p).x*m.x + (*p).y*m.y; 700 (*o).y = (*p).x*m.z + (*p).y*m.w; 701} 702 703void 704backtransform(float2 *p, float2 *o, const float4 m, const float2 t) 705{ 706 (*p).y /= (1.0f + (*p).x*t.x); 707 (*p).x /= (1.0f + (*p).y*t.y); 708 mul_mat_vec_2(m, p, o); 709} 710 711void 712keystone_backtransform(float2 *i, const float4 k_space, const float2 ka, const float4 ma, const float2 mb) 713{ 714 float xx = (*i).x - k_space.x; 715 float yy = (*i).y - k_space.y; 716 717 /*float u = ka.x-kb.x+kc.x-kd.x; 718 float v = ka.x-kb.x; 719 float w = ka.x-kd.x; 720 float z = ka.x; 721 //(*i).x = (xx/k_space.z)*(yy/k_space.w)*(ka.x-kb.x+kc.x-kd.x) - (xx/k_space.z)*(ka.x-kb.x) - (yy/k_space.w)*(ka.x-kd.x) + ka.x + k_space.x; 722 (*i).x = (xx/k_space.z)*(yy/k_space.w)*u - (xx/k_space.z)*v - (yy/k_space.w)*w + z + k_space.x; 723 u = ka.y-kb.y+kc.y-kd.y; 724 v = ka.y-kb.y; 725 w = ka.y-kd.y; 726 z = ka.y; 727 //(*i).y = (xx/k_space.z)*(yy/k_space.w)*(ka.y-kb.y+kc.y-kd.y) - (xx/k_space.z)*(ka.y-kb.y) - (yy/k_space.w)*(ka.y-kd.y) + ka.y + k_space.y; 728 (*i).y = (xx/k_space.z)*(yy/k_space.w)*u - (xx/k_space.z)*v - (yy/k_space.w)*w + z + k_space.y;*/ 729 float div = ((ma.z*xx-ma.x*yy)*mb.y+(ma.y*yy-ma.w*xx)*mb.x+ma.x*ma.w-ma.y*ma.z); 730 731 (*i).x = (ma.w*xx-ma.y*yy)/div + ka.x; 732 (*i).y =-(ma.z*xx-ma.x*yy)/div + ka.y; 733} 734 735 736float 737interpolation_func_bicubic(float t) 738{ 739 float r; 740 t = fabs(t); 741 742 r = (t >= 2.0f) ? 0.0f : ((t > 1.0f) ? (0.5f*(t*(-t*t + 5.0f*t - 8.0f) + 4.0f)) : (0.5f*(t*(3.0f*t*t - 5.0f*t) + 2.0f))); 743 744 return r; 745} 746 747#define DT_LANCZOS_EPSILON (1e-9f) 748 749#if 0 750float 751interpolation_func_lanczos(float width, float t) 752{ 753 float ta = fabs(t); 754 755 float r = (ta > width) ? 0.0f : ((ta < DT_LANCZOS_EPSILON) ? 1.0f : width*native_sin(M_PI_F*t)*native_sin(M_PI_F*t/width)/(M_PI_F*M_PI_F*t*t)); 756 757 return r; 758} 759#else 760float 761sinf_fast(float t) 762{ 763 /***** if you change this function, you must also change the copy in src/common/math.h *****/ 764 const float a = 4.0f/(M_PI_F*M_PI_F); 765 const float p = 0.225f; 766 767 t = a*t*(M_PI_F - fabs(t)); 768 769 return p*(t*fabs(t) - t) + t; 770} 771 772float 773interpolation_func_lanczos(float width, float t) 774{ 775 /* Compute a value for sinf(pi.t) in [-pi pi] for which the value will be 776 * correct */ 777 int a = (int)t; 778 float r = t - (float)a; 779 780 // Compute the correct sign for sinf(pi.r) 781 union { float f; unsigned int i; } sign; 782 sign.i = ((a&1)<<31) | 0x3f800000; 783 784 return (DT_LANCZOS_EPSILON + width*sign.f*sinf_fast(M_PI_F*r)*sinf_fast(M_PI_F*t/width))/(DT_LANCZOS_EPSILON + M_PI_F*M_PI_F*t*t); 785} 786#endif 787 788 789/* kernel for clip&rotate: bilinear interpolation */ 790__kernel void 791clip_rotate_bilinear(read_only image2d_t in, write_only image2d_t out, const int width, const int height, 792 const int in_width, const int in_height, 793 const int2 roi_in, const float2 roi_out, const float scale_in, const float scale_out, 794 const int flip, const float2 t, const float2 k, const float4 mat, 795 const float4 k_space, const float2 ka, const float4 ma, const float2 mb) 796{ 797 const int x = get_global_id(0); 798 const int y = get_global_id(1); 799 800 if(x >= width || y >= height) return; 801 802 float2 pi, po; 803 804 pi.x = roi_out.x + x + 0.5f; 805 pi.y = roi_out.y + y + 0.5f; 806 807 pi.x -= flip ? t.y * scale_out : t.x * scale_out; 808 pi.y -= flip ? t.x * scale_out : t.y * scale_out; 809 810 pi /= scale_out; 811 backtransform(&pi, &po, mat, k); 812 po *= scale_in; 813 814 po.x += t.x * scale_in; 815 po.y += t.y * scale_in; 816 817 if (k_space.z > 0.0f) keystone_backtransform(&po,k_space,ka,ma,mb); 818 819 po.x -= roi_in.x + 0.5f; 820 po.y -= roi_in.y + 0.5f; 821 822 const int ii = (int)po.x; 823 const int jj = (int)po.y; 824 825 float4 o = (ii >=0 && jj >= 0 && ii < in_width && jj < in_height) ? read_imagef(in, samplerf, po) : (float4)0.0f; 826 827 write_imagef (out, (int2)(x, y), o); 828} 829 830 831 832/* kernel for clip&rotate: bicubic interpolation */ 833__kernel void 834clip_rotate_bicubic(read_only image2d_t in, write_only image2d_t out, const int width, const int height, 835 const int in_width, const int in_height, 836 const int2 roi_in, const float2 roi_out, const float scale_in, const float scale_out, 837 const int flip, const float2 t, const float2 k, const float4 mat, 838 const float4 k_space, const float2 ka, const float4 ma, const float2 mb) 839{ 840 const int x = get_global_id(0); 841 const int y = get_global_id(1); 842 843 const int kwidth = 2; 844 845 if(x >= width || y >= height) return; 846 847 float2 pi, po; 848 849 pi.x = roi_out.x + x + 0.5f; 850 pi.y = roi_out.y + y + 0.5f; 851 852 pi.x -= flip ? t.y * scale_out : t.x * scale_out; 853 pi.y -= flip ? t.x * scale_out : t.y * scale_out; 854 855 pi /= scale_out; 856 backtransform(&pi, &po, mat, k); 857 po *= scale_in; 858 859 po.x += t.x * scale_in; 860 po.y += t.y * scale_in; 861 862 if (k_space.z > 0.0f) keystone_backtransform(&po,k_space,ka,ma,mb); 863 864 po.x -= roi_in.x + 0.5f; 865 po.y -= roi_in.y + 0.5f; 866 867 int tx = po.x; 868 int ty = po.y; 869 870 float4 pixel = (float4)0.0f; 871 float weight = 0.0f; 872 873 for(int jj = 1 - kwidth; jj <= kwidth; jj++) 874 for(int ii= 1 - kwidth; ii <= kwidth; ii++) 875 { 876 const int i = tx + ii; 877 const int j = ty + jj; 878 879 float wx = interpolation_func_bicubic((float)i - po.x); 880 float wy = interpolation_func_bicubic((float)j - po.y); 881 float w = wx * wy; 882 883 pixel += read_imagef(in, sampleri, (int2)(i, j)) * w; 884 weight += w; 885 } 886 887 pixel = (tx >= 0 && ty >= 0 && tx < in_width && ty < in_height) ? pixel / weight : (float4)0.0f; 888 889 write_imagef (out, (int2)(x, y), pixel); 890} 891 892 893/* kernel for clip&rotate: lanczos2 interpolation */ 894__kernel void 895clip_rotate_lanczos2(read_only image2d_t in, write_only image2d_t out, const int width, const int height, 896 const int in_width, const int in_height, 897 const int2 roi_in, const float2 roi_out, const float scale_in, const float scale_out, 898 const int flip, const float2 t, const float2 k, const float4 mat, 899 const float4 k_space, const float2 ka, const float4 ma, const float2 mb) 900{ 901 const int x = get_global_id(0); 902 const int y = get_global_id(1); 903 904 const int kwidth = 2; 905 906 if(x >= width || y >= height) return; 907 908 float2 pi, po; 909 910 pi.x = roi_out.x + x + 0.5f; 911 pi.y = roi_out.y + y + 0.5f; 912 913 pi.x -= flip ? t.y * scale_out : t.x * scale_out; 914 pi.y -= flip ? t.x * scale_out : t.y * scale_out; 915 916 pi /= scale_out; 917 backtransform(&pi, &po, mat, k); 918 po *= scale_in; 919 920 po.x += t.x * scale_in; 921 po.y += t.y * scale_in; 922 923 if (k_space.z > 0.0f) keystone_backtransform(&po,k_space,ka,ma,mb); 924 925 po.x -= roi_in.x + 0.5f; 926 po.y -= roi_in.y + 0.5f; 927 928 int tx = po.x; 929 int ty = po.y; 930 931 float4 pixel = (float4)0.0f; 932 float weight = 0.0f; 933 934 for(int jj = 1 - kwidth; jj <= kwidth; jj++) 935 for(int ii= 1 - kwidth; ii <= kwidth; ii++) 936 { 937 const int i = tx + ii; 938 const int j = ty + jj; 939 940 float wx = interpolation_func_lanczos(2, (float)i - po.x); 941 float wy = interpolation_func_lanczos(2, (float)j - po.y); 942 float w = wx * wy; 943 944 pixel += read_imagef(in, sampleri, (int2)(i, j)) * w; 945 weight += w; 946 } 947 948 pixel = (tx >= 0 && ty >= 0 && tx < in_width && ty < in_height) ? pixel / weight : (float4)0.0f; 949 950 write_imagef (out, (int2)(x, y), pixel); 951} 952 953 954 955/* kernel for clip&rotate: lanczos3 interpolation */ 956__kernel void 957clip_rotate_lanczos3(read_only image2d_t in, write_only image2d_t out, const int width, const int height, 958 const int in_width, const int in_height, 959 const int2 roi_in, const float2 roi_out, const float scale_in, const float scale_out, 960 const int flip, const float2 t, const float2 k, const float4 mat, 961 const float4 k_space, const float2 ka, const float4 ma, const float2 mb) 962{ 963 const int x = get_global_id(0); 964 const int y = get_global_id(1); 965 966 const int kwidth = 3; 967 968 if(x >= width || y >= height) return; 969 970 float2 pi, po; 971 972 pi.x = roi_out.x + x + 0.5f; 973 pi.y = roi_out.y + y + 0.5f; 974 975 pi.x -= flip ? t.y * scale_out : t.x * scale_out; 976 pi.y -= flip ? t.x * scale_out : t.y * scale_out; 977 978 pi /= scale_out; 979 backtransform(&pi, &po, mat, k); 980 po *= scale_in; 981 982 po.x += t.x * scale_in; 983 po.y += t.y * scale_in; 984 985 if (k_space.z > 0.0f) keystone_backtransform(&po,k_space,ka,ma,mb); 986 987 po.x -= roi_in.x + 0.5f; 988 po.y -= roi_in.y + 0.5f; 989 990 int tx = (int)po.x; 991 int ty = (int)po.y; 992 993 float4 pixel = (float4)0.0f; 994 float weight = 0.0f; 995 996 for(int jj = 1 - kwidth; jj <= kwidth; jj++) 997 for(int ii= 1 - kwidth; ii <= kwidth; ii++) 998 { 999 const int i = tx + ii; 1000 const int j = ty + jj; 1001 1002 float wx = interpolation_func_lanczos(3, (float)i - po.x); 1003 float wy = interpolation_func_lanczos(3, (float)j - po.y); 1004 float w = wx * wy; 1005 1006 pixel += read_imagef(in, sampleri, (int2)(i, j)) * w; 1007 weight += w; 1008 } 1009 1010 pixel = (tx >= 0 && ty >= 0 && tx < in_width && ty < in_height) ? pixel / weight : (float4)0.0f; 1011 1012 write_imagef (out, (int2)(x, y), pixel); 1013} 1014 1015 1016/* kernels for the lens plugin: bilinear interpolation */ 1017kernel void 1018lens_distort_bilinear (read_only image2d_t in, write_only image2d_t out, const int width, const int height, 1019 const int iwidth, const int iheight, const int roi_in_x, const int roi_in_y, global float *pi, 1020 const int do_nan_checks) 1021{ 1022 const int x = get_global_id(0); 1023 const int y = get_global_id(1); 1024 1025 if(x >= width || y >= height) return; 1026 1027 float4 pixel; 1028 1029 float rx, ry; 1030 const int piwidth = 2*3*width; 1031 global float *ppi = pi + mad24(y, piwidth, 2*3*x); 1032 1033 if(do_nan_checks) 1034 { 1035 bool valid = true; 1036 1037 for(int i = 0; i < 6; i++) valid = valid && isfinite(ppi[i]); 1038 1039 if(!valid) 1040 { 1041 pixel = (float4)0.0f; 1042 write_imagef (out, (int2)(x, y), pixel); 1043 return; 1044 } 1045 } 1046 1047 rx = ppi[0] - roi_in_x; 1048 ry = ppi[1] - roi_in_y; 1049 rx = (rx >= 0) ? rx : 0; 1050 ry = (ry >= 0) ? ry : 0; 1051 rx = (rx <= iwidth - 1) ? rx : iwidth - 1; 1052 ry = (ry <= iheight - 1) ? ry : iheight - 1; 1053 pixel.x = read_imagef(in, samplerf, (float2)(rx, ry)).x; 1054 1055 rx = ppi[2] - roi_in_x; 1056 ry = ppi[3] - roi_in_y; 1057 rx = (rx >= 0) ? rx : 0; 1058 ry = (ry >= 0) ? ry : 0; 1059 rx = (rx <= iwidth - 1) ? rx : iwidth - 1; 1060 ry = (ry <= iheight - 1) ? ry : iheight - 1; 1061 pixel.yw = read_imagef(in, samplerf, (float2)(rx, ry)).yw; 1062 1063 rx = ppi[4] - roi_in_x; 1064 ry = ppi[5] - roi_in_y; 1065 rx = (rx >= 0) ? rx : 0; 1066 ry = (ry >= 0) ? ry : 0; 1067 rx = (rx <= iwidth - 1) ? rx : iwidth - 1; 1068 ry = (ry <= iheight - 1) ? ry : iheight - 1; 1069 pixel.z = read_imagef(in, samplerf, (float2)(rx, ry)).z; 1070 1071 pixel = all(isfinite(pixel.xyz)) ? pixel : (float4)0.0f; 1072 1073 write_imagef (out, (int2)(x, y), pixel); 1074} 1075 1076/* kernels for the lens plugin: bicubic interpolation */ 1077kernel void 1078lens_distort_bicubic (read_only image2d_t in, write_only image2d_t out, const int width, const int height, 1079 const int iwidth, const int iheight, const int roi_in_x, const int roi_in_y, global float *pi, 1080 const int do_nan_checks) 1081{ 1082 const int x = get_global_id(0); 1083 const int y = get_global_id(1); 1084 1085 const int kwidth = 2; 1086 1087 if(x >= width || y >= height) return; 1088 1089 float4 pixel = (float4)0.0f; 1090 1091 float rx, ry; 1092 int tx, ty; 1093 float sum, weight; 1094 float2 sum2; 1095 const int piwidth = 2*3*width; 1096 global float *ppi = pi + mad24(y, piwidth, 2*3*x); 1097 1098 if(do_nan_checks) 1099 { 1100 bool valid = true; 1101 1102 for(int i = 0; i < 6; i++) valid = valid && isfinite(ppi[i]); 1103 1104 if(!valid) 1105 { 1106 pixel = (float4)0.0f; 1107 write_imagef (out, (int2)(x, y), pixel); 1108 return; 1109 } 1110 } 1111 1112 1113 rx = ppi[0] - (float)roi_in_x; 1114 ry = ppi[1] - (float)roi_in_y; 1115 rx = (rx >= 0) ? rx : 0; 1116 ry = (ry >= 0) ? ry : 0; 1117 rx = (rx <= iwidth - 1) ? rx : iwidth - 1; 1118 ry = (ry <= iheight - 1) ? ry : iheight - 1; 1119 1120 tx = rx; 1121 ty = ry; 1122 1123 sum = 0.0f; 1124 weight = 0.0f; 1125 for(int jj = 1 - kwidth; jj <= kwidth; jj++) 1126 for(int ii= 1 - kwidth; ii <= kwidth; ii++) 1127 { 1128 int i = tx + ii; 1129 int j = ty + jj; 1130 i = (i >= 0) ? i : 0; 1131 j = (j >= 0) ? j : 0; 1132 i = (i <= iwidth - 1) ? i : iwidth - 1; 1133 j = (j <= iheight - 1) ? j : iheight - 1; 1134 1135 float wx = interpolation_func_bicubic((float)i - rx); 1136 float wy = interpolation_func_bicubic((float)j - ry); 1137 float w = wx * wy; 1138 1139 sum += read_imagef(in, samplerc, (int2)(i, j)).x * w; 1140 weight += w; 1141 } 1142 pixel.x = sum/weight; 1143 1144 1145 rx = ppi[2] - (float)roi_in_x; 1146 ry = ppi[3] - (float)roi_in_y; 1147 rx = (rx >= 0) ? rx : 0; 1148 ry = (ry >= 0) ? ry : 0; 1149 rx = (rx <= iwidth - 1) ? rx : iwidth - 1; 1150 ry = (ry <= iheight - 1) ? ry : iheight - 1; 1151 1152 tx = rx; 1153 ty = ry; 1154 1155 sum2 = (float2)0.0f; 1156 weight = 0.0f; 1157 for(int jj = 1 - kwidth; jj <= kwidth; jj++) 1158 for(int ii= 1 - kwidth; ii <= kwidth; ii++) 1159 { 1160 int i = tx + ii; 1161 int j = ty + jj; 1162 i = (i >= 0) ? i : 0; 1163 j = (j >= 0) ? j : 0; 1164 i = (i <= iwidth - 1) ? i : iwidth - 1; 1165 j = (j <= iheight - 1) ? j : iheight - 1; 1166 1167 float wx = interpolation_func_bicubic((float)i - rx); 1168 float wy = interpolation_func_bicubic((float)j - ry); 1169 float w = wx * wy; 1170 1171 sum2 += read_imagef(in, samplerc, (int2)(i, j)).yw * w; 1172 weight += w; 1173 } 1174 pixel.yw = sum2/weight; 1175 1176 1177 rx = ppi[4] - (float)roi_in_x; 1178 ry = ppi[5] - (float)roi_in_y; 1179 rx = (rx >= 0) ? rx : 0; 1180 ry = (ry >= 0) ? ry : 0; 1181 rx = (rx <= iwidth - 1) ? rx : iwidth - 1; 1182 ry = (ry <= iheight - 1) ? ry : iheight - 1; 1183 1184 tx = rx; 1185 ty = ry; 1186 1187 sum = 0.0f; 1188 weight = 0.0f; 1189 for(int jj = 1 - kwidth; jj <= kwidth; jj++) 1190 for(int ii= 1 - kwidth; ii <= kwidth; ii++) 1191 { 1192 int i = tx + ii; 1193 int j = ty + jj; 1194 i = (i >= 0) ? i : 0; 1195 j = (j >= 0) ? j : 0; 1196 i = (i <= iwidth - 1) ? i : iwidth - 1; 1197 j = (j <= iheight - 1) ? j : iheight - 1; 1198 1199 float wx = interpolation_func_bicubic((float)i - rx); 1200 float wy = interpolation_func_bicubic((float)j - ry); 1201 float w = wx * wy; 1202 1203 sum += read_imagef(in, samplerc, (int2)(i, j)).z * w; 1204 weight += w; 1205 } 1206 pixel.z = sum/weight; 1207 1208 pixel = all(isfinite(pixel.xyz)) ? pixel : (float4)0.0f; 1209 1210 write_imagef (out, (int2)(x, y), pixel); 1211} 1212 1213 1214/* kernels for the lens plugin: lanczos2 interpolation */ 1215kernel void 1216lens_distort_lanczos2 (read_only image2d_t in, write_only image2d_t out, const int width, const int height, 1217 const int iwidth, const int iheight, const int roi_in_x, const int roi_in_y, global float *pi, 1218 const int do_nan_checks) 1219{ 1220 const int x = get_global_id(0); 1221 const int y = get_global_id(1); 1222 1223 const int kwidth = 2; 1224 1225 if(x >= width || y >= height) return; 1226 1227 float4 pixel = (float4)0.0f; 1228 1229 float rx, ry; 1230 int tx, ty; 1231 float sum, weight; 1232 float2 sum2; 1233 const int piwidth = 2*3*width; 1234 global float *ppi = pi + mad24(y, piwidth, 2*3*x); 1235 1236 if(do_nan_checks) 1237 { 1238 bool valid = true; 1239 1240 for(int i = 0; i < 6; i++) valid = valid && isfinite(ppi[i]); 1241 1242 if(!valid) 1243 { 1244 pixel = (float4)0.0f; 1245 write_imagef (out, (int2)(x, y), pixel); 1246 return; 1247 } 1248 } 1249 1250 1251 rx = ppi[0] - (float)roi_in_x; 1252 ry = ppi[1] - (float)roi_in_y; 1253 rx = (rx >= 0) ? rx : 0; 1254 ry = (ry >= 0) ? ry : 0; 1255 rx = (rx <= iwidth - 1) ? rx : iwidth - 1; 1256 ry = (ry <= iheight - 1) ? ry : iheight - 1; 1257 1258 tx = rx; 1259 ty = ry; 1260 1261 sum = 0.0f; 1262 weight = 0.0f; 1263 for(int jj = 1 - kwidth; jj <= kwidth; jj++) 1264 for(int ii= 1 - kwidth; ii <= kwidth; ii++) 1265 { 1266 int i = tx + ii; 1267 int j = ty + jj; 1268 i = (i >= 0) ? i : 0; 1269 j = (j >= 0) ? j : 0; 1270 i = (i <= iwidth - 1) ? i : iwidth - 1; 1271 j = (j <= iheight - 1) ? j : iheight - 1; 1272 1273 float wx = interpolation_func_lanczos(2, (float)i - rx); 1274 float wy = interpolation_func_lanczos(2, (float)j - ry); 1275 float w = wx * wy; 1276 1277 sum += read_imagef(in, samplerc, (int2)(i, j)).x * w; 1278 weight += w; 1279 } 1280 pixel.x = sum/weight; 1281 1282 1283 rx = ppi[2] - (float)roi_in_x; 1284 ry = ppi[3] - (float)roi_in_y; 1285 rx = (rx >= 0) ? rx : 0; 1286 ry = (ry >= 0) ? ry : 0; 1287 rx = (rx <= iwidth - 1) ? rx : iwidth - 1; 1288 ry = (ry <= iheight - 1) ? ry : iheight - 1; 1289 1290 tx = rx; 1291 ty = ry; 1292 1293 sum2 = (float2)0.0f; 1294 weight = 0.0f; 1295 for(int jj = 1 - kwidth; jj <= kwidth; jj++) 1296 for(int ii= 1 - kwidth; ii <= kwidth; ii++) 1297 { 1298 int i = tx + ii; 1299 int j = ty + jj; 1300 i = (i >= 0) ? i : 0; 1301 j = (j >= 0) ? j : 0; 1302 i = (i <= iwidth - 1) ? i : iwidth - 1; 1303 j = (j <= iheight - 1) ? j : iheight - 1; 1304 1305 float wx = interpolation_func_lanczos(2, (float)i - rx); 1306 float wy = interpolation_func_lanczos(2, (float)j - ry); 1307 float w = wx * wy; 1308 1309 sum2 += read_imagef(in, samplerc, (int2)(i, j)).yw * w; 1310 weight += w; 1311 } 1312 pixel.yw = sum2/weight; 1313 1314 1315 rx = ppi[4] - (float)roi_in_x; 1316 ry = ppi[5] - (float)roi_in_y; 1317 rx = (rx >= 0) ? rx : 0; 1318 ry = (ry >= 0) ? ry : 0; 1319 rx = (rx <= iwidth - 1) ? rx : iwidth - 1; 1320 ry = (ry <= iheight - 1) ? ry : iheight - 1; 1321 1322 tx = rx; 1323 ty = ry; 1324 1325 sum = 0.0f; 1326 weight = 0.0f; 1327 for(int jj = 1 - kwidth; jj <= kwidth; jj++) 1328 for(int ii= 1 - kwidth; ii <= kwidth; ii++) 1329 { 1330 int i = tx + ii; 1331 int j = ty + jj; 1332 i = (i >= 0) ? i : 0; 1333 j = (j >= 0) ? j : 0; 1334 i = (i <= iwidth - 1) ? i : iwidth - 1; 1335 j = (j <= iheight - 1) ? j : iheight - 1; 1336 1337 float wx = interpolation_func_lanczos(2, (float)i - rx); 1338 float wy = interpolation_func_lanczos(2, (float)j - ry); 1339 float w = wx * wy; 1340 1341 sum += read_imagef(in, samplerc, (int2)(i, j)).z * w; 1342 weight += w; 1343 } 1344 pixel.z = sum/weight; 1345 1346 pixel = all(isfinite(pixel.xyz)) ? pixel : (float4)0.0f; 1347 1348 write_imagef (out, (int2)(x, y), pixel); 1349} 1350 1351 1352/* kernels for the lens plugin: lanczos3 interpolation */ 1353kernel void 1354lens_distort_lanczos3 (read_only image2d_t in, write_only image2d_t out, const int width, const int height, 1355 const int iwidth, const int iheight, const int roi_in_x, const int roi_in_y, global float *pi, 1356 const int do_nan_checks) 1357{ 1358 const int x = get_global_id(0); 1359 const int y = get_global_id(1); 1360 1361 const int kwidth = 3; 1362 1363 if(x >= width || y >= height) return; 1364 1365 float4 pixel = (float4)0.0f; 1366 1367 float rx, ry; 1368 int tx, ty; 1369 float sum, weight; 1370 float2 sum2; 1371 const int piwidth = 2*3*width; 1372 global float *ppi = pi + mad24(y, piwidth, 2*3*x); 1373 1374 if(do_nan_checks) 1375 { 1376 bool valid = true; 1377 1378 for(int i = 0; i < 6; i++) valid = valid && isfinite(ppi[i]); 1379 1380 if(!valid) 1381 { 1382 pixel = (float4)0.0f; 1383 write_imagef (out, (int2)(x, y), pixel); 1384 return; 1385 } 1386 } 1387 1388 rx = ppi[0] - (float)roi_in_x; 1389 ry = ppi[1] - (float)roi_in_y; 1390 rx = (rx >= 0) ? rx : 0; 1391 ry = (ry >= 0) ? ry : 0; 1392 rx = (rx <= iwidth - 1) ? rx : iwidth - 1; 1393 ry = (ry <= iheight - 1) ? ry : iheight - 1; 1394 1395 tx = rx; 1396 ty = ry; 1397 1398 sum = 0.0f; 1399 weight = 0.0f; 1400 for(int jj = 1 - kwidth; jj <= kwidth; jj++) 1401 for(int ii= 1 - kwidth; ii <= kwidth; ii++) 1402 { 1403 int i = tx + ii; 1404 int j = ty + jj; 1405 i = (i >= 0) ? i : 0; 1406 j = (j >= 0) ? j : 0; 1407 i = (i <= iwidth - 1) ? i : iwidth - 1; 1408 j = (j <= iheight - 1) ? j : iheight - 1; 1409 1410 float wx = interpolation_func_lanczos(3, (float)i - rx); 1411 float wy = interpolation_func_lanczos(3, (float)j - ry); 1412 float w = wx * wy; 1413 1414 sum += read_imagef(in, samplerc, (int2)(i, j)).x * w; 1415 weight += w; 1416 } 1417 pixel.x = sum/weight; 1418 1419 1420 rx = ppi[2] - (float)roi_in_x; 1421 ry = ppi[3] - (float)roi_in_y; 1422 rx = (rx >= 0) ? rx : 0; 1423 ry = (ry >= 0) ? ry : 0; 1424 rx = (rx <= iwidth - 1) ? rx : iwidth - 1; 1425 ry = (ry <= iheight - 1) ? ry : iheight - 1; 1426 1427 tx = rx; 1428 ty = ry; 1429 1430 sum2 = (float2)0.0f; 1431 weight = 0.0f; 1432 for(int jj = 1 - kwidth; jj <= kwidth; jj++) 1433 for(int ii= 1 - kwidth; ii <= kwidth; ii++) 1434 { 1435 int i = tx + ii; 1436 int j = ty + jj; 1437 i = (i >= 0) ? i : 0; 1438 j = (j >= 0) ? j : 0; 1439 i = (i <= iwidth - 1) ? i : iwidth - 1; 1440 j = (j <= iheight - 1) ? j : iheight - 1; 1441 1442 float wx = interpolation_func_lanczos(3, (float)i - rx); 1443 float wy = interpolation_func_lanczos(3, (float)j - ry); 1444 float w = wx * wy; 1445 1446 sum2 += read_imagef(in, samplerc, (int2)(i, j)).yw * w; 1447 weight += w; 1448 } 1449 pixel.yw = sum2/weight; 1450 1451 1452 rx = ppi[4] - (float)roi_in_x; 1453 ry = ppi[5] - (float)roi_in_y; 1454 rx = (rx >= 0) ? rx : 0; 1455 ry = (ry >= 0) ? ry : 0; 1456 rx = (rx <= iwidth - 1) ? rx : iwidth - 1; 1457 ry = (ry <= iheight - 1) ? ry : iheight - 1; 1458 1459 tx = rx; 1460 ty = ry; 1461 1462 sum = 0.0f; 1463 weight = 0.0f; 1464 for(int jj = 1 - kwidth; jj <= kwidth; jj++) 1465 for(int ii= 1 - kwidth; ii <= kwidth; ii++) 1466 { 1467 int i = tx + ii; 1468 int j = ty + jj; 1469 i = (i >= 0) ? i : 0; 1470 j = (j >= 0) ? j : 0; 1471 i = (i <= iwidth - 1) ? i : iwidth - 1; 1472 j = (j <= iheight - 1) ? j : iheight - 1; 1473 1474 float wx = interpolation_func_lanczos(3, (float)i - rx); 1475 float wy = interpolation_func_lanczos(3, (float)j - ry); 1476 float w = wx * wy; 1477 1478 sum += read_imagef(in, samplerc, (int2)(i, j)).z * w; 1479 weight += w; 1480 } 1481 pixel.z = sum/weight; 1482 1483 pixel = all(isfinite(pixel.xyz)) ? pixel : (float4)0.0f; 1484 1485 write_imagef (out, (int2)(x, y), pixel); 1486} 1487 1488 1489 1490/* kernel for the ashift module: bilinear interpolation */ 1491kernel void 1492ashift_bilinear(read_only image2d_t in, write_only image2d_t out, const int width, const int height, 1493 const int iwidth, const int iheight, const int2 roi_in, const int2 roi_out, 1494 const float in_scale, const float out_scale, const float2 clip, global float *homograph) 1495{ 1496 const int x = get_global_id(0); 1497 const int y = get_global_id(1); 1498 1499 if(x >= width || y >= height) return; 1500 1501 float pin[3], pout[3]; 1502 1503 // convert output pixel coordinates to original image coordinates 1504 pout[0] = roi_out.x + x + clip.x; 1505 pout[1] = roi_out.y + y + clip.y; 1506 pout[0] /= out_scale; 1507 pout[1] /= out_scale; 1508 pout[2] = 1.0f; 1509 1510 // apply homograph 1511 for(int i = 0; i < 3; i++) 1512 { 1513 pin[i] = 0.0f; 1514 for(int j = 0; j < 3; j++) pin[i] += homograph[3 * i + j] * pout[j]; 1515 } 1516 1517 // convert to input pixel coordinates 1518 pin[0] /= pin[2]; 1519 pin[1] /= pin[2]; 1520 pin[0] *= in_scale; 1521 pin[1] *= in_scale; 1522 pin[0] -= roi_in.x; 1523 pin[1] -= roi_in.y; 1524 1525 // get output values by interpolation from input image using fast hardware bilinear interpolation 1526 float rx = pin[0]; 1527 float ry = pin[1]; 1528 int tx = rx; 1529 int ty = ry; 1530 1531 float4 pixel = (tx >= 0 && ty >= 0 && tx < iwidth && ty < iheight) ? read_imagef(in, samplerf, (float2)(rx, ry)) : (float4)0.0f; 1532 1533 write_imagef (out, (int2)(x, y), pixel); 1534} 1535 1536/* kernel for the ashift module: bicubic interpolation */ 1537kernel void 1538ashift_bicubic (read_only image2d_t in, write_only image2d_t out, const int width, const int height, 1539 const int iwidth, const int iheight, const int2 roi_in, const int2 roi_out, 1540 const float in_scale, const float out_scale, const float2 clip, global float *homograph) 1541{ 1542 const int x = get_global_id(0); 1543 const int y = get_global_id(1); 1544 1545 const int kwidth = 2; 1546 1547 if(x >= width || y >= height) return; 1548 1549 float pin[3], pout[3]; 1550 1551 // convert output pixel coordinates to original image coordinates 1552 pout[0] = roi_out.x + x + clip.x; 1553 pout[1] = roi_out.y + y + clip.y; 1554 pout[0] /= out_scale; 1555 pout[1] /= out_scale; 1556 pout[2] = 1.0f; 1557 1558 // apply homograph 1559 for(int i = 0; i < 3; i++) 1560 { 1561 pin[i] = 0.0f; 1562 for(int j = 0; j < 3; j++) pin[i] += homograph[3 * i + j] * pout[j]; 1563 } 1564 1565 // convert to input pixel coordinates 1566 pin[0] /= pin[2]; 1567 pin[1] /= pin[2]; 1568 pin[0] *= in_scale; 1569 pin[1] *= in_scale; 1570 pin[0] -= roi_in.x; 1571 pin[1] -= roi_in.y; 1572 1573 // get output values by interpolation from input image 1574 float rx = pin[0]; 1575 float ry = pin[1]; 1576 int tx = rx; 1577 int ty = ry; 1578 1579 float4 pixel = (float4)0.0f; 1580 float weight = 0.0f; 1581 for(int jj = 1 - kwidth; jj <= kwidth; jj++) 1582 for(int ii= 1 - kwidth; ii <= kwidth; ii++) 1583 { 1584 const int i = tx + ii; 1585 const int j = ty + jj; 1586 1587 float wx = interpolation_func_bicubic((float)i - rx); 1588 float wy = interpolation_func_bicubic((float)j - ry); 1589 float w = wx * wy; 1590 1591 pixel += read_imagef(in, sampleri, (int2)(i, j)) * w; 1592 weight += w; 1593 } 1594 1595 pixel = (tx >= 0 && ty >= 0 && tx < iwidth && ty < iheight) ? pixel/weight : (float4)0.0f; 1596 1597 write_imagef (out, (int2)(x, y), pixel); 1598} 1599 1600 1601/* kernel for the ashift module: lanczos2 interpolation */ 1602kernel void 1603ashift_lanczos2(read_only image2d_t in, write_only image2d_t out, const int width, const int height, 1604 const int iwidth, const int iheight, const int2 roi_in, const int2 roi_out, 1605 const float in_scale, const float out_scale, const float2 clip, global float *homograph) 1606{ 1607 const int x = get_global_id(0); 1608 const int y = get_global_id(1); 1609 1610 const int kwidth = 2; 1611 1612 if(x >= width || y >= height) return; 1613 1614 float pin[3], pout[3]; 1615 1616 // convert output pixel coordinates to original image coordinates 1617 pout[0] = roi_out.x + x + clip.x; 1618 pout[1] = roi_out.y + y + clip.y; 1619 pout[0] /= out_scale; 1620 pout[1] /= out_scale; 1621 pout[2] = 1.0f; 1622 1623 // apply homograph 1624 for(int i = 0; i < 3; i++) 1625 { 1626 pin[i] = 0.0f; 1627 for(int j = 0; j < 3; j++) pin[i] += homograph[3 * i + j] * pout[j]; 1628 } 1629 1630 // convert to input pixel coordinates 1631 pin[0] /= pin[2]; 1632 pin[1] /= pin[2]; 1633 pin[0] *= in_scale; 1634 pin[1] *= in_scale; 1635 pin[0] -= roi_in.x; 1636 pin[1] -= roi_in.y; 1637 1638 // get output values by interpolation from input image 1639 float rx = pin[0]; 1640 float ry = pin[1]; 1641 int tx = rx; 1642 int ty = ry; 1643 1644 float4 pixel = (float4)0.0f; 1645 float weight = 0.0f; 1646 for(int jj = 1 - kwidth; jj <= kwidth; jj++) 1647 for(int ii= 1 - kwidth; ii <= kwidth; ii++) 1648 { 1649 const int i = tx + ii; 1650 const int j = ty + jj; 1651 1652 float wx = interpolation_func_lanczos(2, (float)i - rx); 1653 float wy = interpolation_func_lanczos(2, (float)j - ry); 1654 float w = wx * wy; 1655 1656 pixel += read_imagef(in, sampleri, (int2)(i, j)) * w; 1657 weight += w; 1658 } 1659 1660 pixel = (tx >= 0 && ty >= 0 && tx < iwidth && ty < iheight) ? pixel/weight : (float4)0.0f; 1661 1662 write_imagef (out, (int2)(x, y), pixel); 1663} 1664 1665 1666/* kernels for the ashift module: lanczos3 interpolation */ 1667kernel void 1668ashift_lanczos3(read_only image2d_t in, write_only image2d_t out, const int width, const int height, 1669 const int iwidth, const int iheight, const int2 roi_in, const int2 roi_out, 1670 const float in_scale, const float out_scale, const float2 clip, global float *homograph) 1671{ 1672 const int x = get_global_id(0); 1673 const int y = get_global_id(1); 1674 1675 const int kwidth = 3; 1676 1677 if(x >= width || y >= height) return; 1678 1679 float pin[3], pout[3]; 1680 1681 // convert output pixel coordinates to original image coordinates 1682 pout[0] = roi_out.x + x + clip.x; 1683 pout[1] = roi_out.y + y + clip.y; 1684 pout[0] /= out_scale; 1685 pout[1] /= out_scale; 1686 pout[2] = 1.0f; 1687 1688 // apply homograph 1689 for(int i = 0; i < 3; i++) 1690 { 1691 pin[i] = 0.0f; 1692 for(int j = 0; j < 3; j++) pin[i] += homograph[3 * i + j] * pout[j]; 1693 } 1694 1695 // convert to input pixel coordinates 1696 pin[0] /= pin[2]; 1697 pin[1] /= pin[2]; 1698 pin[0] *= in_scale; 1699 pin[1] *= in_scale; 1700 pin[0] -= roi_in.x; 1701 pin[1] -= roi_in.y; 1702 1703 // get output values by interpolation from input image 1704 float rx = pin[0]; 1705 float ry = pin[1]; 1706 int tx = rx; 1707 int ty = ry; 1708 1709 float4 pixel = (float4)0.0f; 1710 float weight = 0.0f; 1711 for(int jj = 1 - kwidth; jj <= kwidth; jj++) 1712 for(int ii= 1 - kwidth; ii <= kwidth; ii++) 1713 { 1714 const int i = tx + ii; 1715 const int j = ty + jj; 1716 1717 float wx = interpolation_func_lanczos(3, (float)i - rx); 1718 float wy = interpolation_func_lanczos(3, (float)j - ry); 1719 float w = wx * wy; 1720 1721 pixel += read_imagef(in, sampleri, (int2)(i, j)) * w; 1722 weight += w; 1723 } 1724 1725 pixel = (tx >= 0 && ty >= 0 && tx < iwidth && ty < iheight) ? pixel/weight : (float4)0.0f; 1726 1727 write_imagef (out, (int2)(x, y), pixel); 1728} 1729 1730 1731kernel void 1732lens_vignette (read_only image2d_t in, write_only image2d_t out, const int width, const int height, global float4 *pi) 1733{ 1734 const int x = get_global_id(0); 1735 const int y = get_global_id(1); 1736 1737 if(x >= width || y >= height) return; 1738 1739 float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 1740 float4 scale = pi[mad24(y, width, x)]/(float4)0.5f; 1741 1742 pixel.xyz *= scale.xyz; 1743 1744 write_imagef (out, (int2)(x, y), pixel); 1745} 1746 1747 1748 1749/* kernel for flip */ 1750__kernel void 1751flip(read_only image2d_t in, write_only image2d_t out, const int width, const int height, const int orientation) 1752{ 1753 const int x = get_global_id(0); 1754 const int y = get_global_id(1); 1755 1756 if(x >= width || y >= height) return; 1757 1758 float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 1759 1760 // ORIENTATION_FLIP_X = 2 1761 int nx = (orientation & 2) ? width - x - 1 : x; 1762 1763 // ORIENTATION_FLIP_Y = 1 1764 int ny = (orientation & 1) ? height - y - 1 : y; 1765 1766 // ORIENTATION_SWAP_XY = 4 1767 if((orientation & 4) == 4) 1768 { 1769 const int tmp = nx; 1770 nx = ny; 1771 ny = tmp; 1772 } 1773 1774 write_imagef (out, (int2)(nx, ny), pixel); 1775} 1776 1777 1778/* we use this exp approximation to maintain full identity with cpu path */ 1779float 1780fast_expf(const float x) 1781{ 1782 // meant for the range [-100.0f, 0.0f]. largest error ~ -0.06 at 0.0f. 1783 // will get _a_lot_ worse for x > 0.0f (9000 at 10.0f).. 1784 const int i1 = 0x3f800000u; 1785 // e^x, the comment would be 2^x 1786 const int i2 = 0x402DF854u;//0x40000000u; 1787 // const int k = CLAMPS(i1 + x * (i2 - i1), 0x0u, 0x7fffffffu); 1788 // without max clamping (doesn't work for large x, but is faster): 1789 const int k0 = i1 + x * (i2 - i1); 1790 union { 1791 float f; 1792 int k; 1793 } u; 1794 u.k = k0 > 0 ? k0 : 0; 1795 return u.f; 1796} 1797 1798 1799float 1800envelope(const float L) 1801{ 1802 const float x = clamp(L/100.0f, 0.0f, 1.0f); 1803 // const float alpha = 2.0f; 1804 const float beta = 0.6f; 1805 if(x < beta) 1806 { 1807 // return 1.0f-fabsf(x/beta-1.0f)^2 1808 const float tmp = fabs(x/beta-1.0f); 1809 return 1.0f-tmp*tmp; 1810 } 1811 else 1812 { 1813 const float tmp1 = (1.0f-x)/(1.0f-beta); 1814 const float tmp2 = tmp1*tmp1; 1815 const float tmp3 = tmp2*tmp1; 1816 return 3.0f*tmp2 - 2.0f*tmp3; 1817 } 1818} 1819 1820/* kernel for monochrome */ 1821kernel void 1822monochrome_filter( 1823 read_only image2d_t in, 1824 write_only image2d_t out, 1825 const int width, 1826 const int height, 1827 const float a, 1828 const float b, 1829 const float size) 1830{ 1831 const int x = get_global_id(0); 1832 const int y = get_global_id(1); 1833 1834 if(x >= width || y >= height) return; 1835 1836 float4 pixel = read_imagef (in, sampleri, (int2)(x, y)); 1837 // TODO: this could be a native_expf, or exp2f, need to evaluate comparisons with cpu though: 1838 pixel.x = 100.0f*fast_expf(-clamp(((pixel.y - a)*(pixel.y - a) + (pixel.z - b)*(pixel.z - b))/(2.0f * size), 0.0f, 1.0f)); 1839 write_imagef (out, (int2)(x, y), pixel); 1840} 1841 1842kernel void 1843monochrome( 1844 read_only image2d_t in, 1845 read_only image2d_t base, 1846 write_only image2d_t out, 1847 const int width, 1848 const int height, 1849 const float a, 1850 const float b, 1851 const float size, 1852 float highlights) 1853{ 1854 const int x = get_global_id(0); 1855 const int y = get_global_id(1); 1856 1857 if(x >= width || y >= height) return; 1858 1859 float4 pixel = read_imagef (in, sampleri, (int2)(x, y)); 1860 float4 basep = read_imagef (base, sampleri, (int2)(x, y)); 1861 float filter = fast_expf(-clamp(((pixel.y - a)*(pixel.y - a) + (pixel.z - b)*(pixel.z - b))/(2.0f * size), 0.0f, 1.0f)); 1862 float tt = envelope(pixel.x); 1863 float t = tt + (1.0f-tt)*(1.0f-highlights); 1864 pixel.x = mix(pixel.x, pixel.x*basep.x/100.0f, t); 1865 pixel.y = pixel.z = 0.0f; 1866 write_imagef (out, (int2)(x, y), pixel); 1867} 1868 1869 1870 1871/* kernel for the plugin colorout, fast matrix + shaper path only */ 1872kernel void 1873colorout (read_only image2d_t in, write_only image2d_t out, const int width, const int height, 1874 global float *mat, read_only image2d_t lutr, read_only image2d_t lutg, read_only image2d_t lutb, 1875 global const float (*const a)[3]) 1876{ 1877 const int x = get_global_id(0); 1878 const int y = get_global_id(1); 1879 1880 if(x >= width || y >= height) return; 1881 1882 float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 1883 float XYZ[3], rgb[3]; 1884 float4 xyz = Lab_to_XYZ(pixel); 1885 XYZ[0] = xyz.x; 1886 XYZ[1] = xyz.y; 1887 XYZ[2] = xyz.z; 1888 for(int i=0;i<3;i++) 1889 { 1890 rgb[i] = 0.0f; 1891 for(int j=0;j<3;j++) rgb[i] += mat[3*i+j]*XYZ[j]; 1892 } 1893 pixel.x = lerp_lookup_unbounded0(lutr, rgb[0], a[0]); 1894 pixel.y = lerp_lookup_unbounded0(lutg, rgb[1], a[1]); 1895 pixel.z = lerp_lookup_unbounded0(lutb, rgb[2], a[2]); 1896 write_imagef (out, (int2)(x, y), pixel); 1897} 1898 1899 1900/* kernel for the levels plugin */ 1901kernel void 1902levels (read_only image2d_t in, write_only image2d_t out, const int width, const int height, 1903 read_only image2d_t lut, const float in_low, const float in_high, const float in_inv_gamma) 1904{ 1905 const int x = get_global_id(0); 1906 const int y = get_global_id(1); 1907 1908 if(x >= width || y >= height) return; 1909 1910 float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 1911 const float L = pixel.x; 1912 const float L_in = pixel.x/100.0f; 1913 1914 if(L_in <= in_low) 1915 { 1916 pixel.x = 0.0f; 1917 } 1918 else if(L_in >= in_high) 1919 { 1920 float percentage = (L_in - in_low) / (in_high - in_low); 1921 pixel.x = 100.0f * pow(percentage, in_inv_gamma); 1922 } 1923 else 1924 { 1925 float percentage = (L_in - in_low) / (in_high - in_low); 1926 pixel.x = lookup(lut, percentage); 1927 } 1928 1929 if(L_in > 0.01f) 1930 { 1931 pixel.y *= pixel.x/L; 1932 pixel.z *= pixel.x/L; 1933 } 1934 else 1935 { 1936 pixel.y *= pixel.x; 1937 pixel.z *= pixel.x; 1938 } 1939 1940 write_imagef (out, (int2)(x, y), pixel); 1941} 1942 1943/* kernel for the colorzones plugin */ 1944enum 1945{ 1946 DT_IOP_COLORZONES_L = 0, 1947 DT_IOP_COLORZONES_C = 1, 1948 DT_IOP_COLORZONES_h = 2 1949}; 1950 1951 1952kernel void 1953colorzones_v3 (read_only image2d_t in, write_only image2d_t out, const int width, const int height, const int channel, 1954 read_only image2d_t table_L, read_only image2d_t table_a, read_only image2d_t table_b) 1955{ 1956 const int x = get_global_id(0); 1957 const int y = get_global_id(1); 1958 1959 if(x >= width || y >= height) return; 1960 1961 float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 1962 1963 const float a = pixel.y; 1964 const float b = pixel.z; 1965 const float h = fmod(atan2(b, a) + 2.0f*M_PI_F, 2.0f*M_PI_F)/(2.0f*M_PI_F); 1966 const float C = sqrt(b*b + a*a); 1967 1968 float select = 0.0f; 1969 float blend = 0.0f; 1970 1971 switch(channel) 1972 { 1973 case DT_IOP_COLORZONES_L: 1974 select = fmin(1.0f, pixel.x/100.0f); 1975 break; 1976 case DT_IOP_COLORZONES_C: 1977 select = fmin(1.0f, C/128.0f); 1978 break; 1979 default: 1980 case DT_IOP_COLORZONES_h: 1981 select = h; 1982 blend = pow(1.0f - C/128.0f, 2.0f); 1983 break; 1984 } 1985 1986 const float Lm = (blend * 0.5f + (1.0f-blend)*lookup(table_L, select)) - 0.5f; 1987 const float hm = (blend * 0.5f + (1.0f-blend)*lookup(table_b, select)) - 0.5f; 1988 blend *= blend; // saturation isn't as prone to artifacts: 1989 // const float Cm = 2.0f* (blend*0.5f + (1.0f-blend)*lookup(d->lut[1], select)); 1990 const float Cm = 2.0f * lookup(table_a, select); 1991 const float L = pixel.x * pow(2.0f, 4.0f*Lm); 1992 1993 pixel.x = L; 1994 pixel.y = cos(2.0f*M_PI_F*(h + hm)) * Cm * C; 1995 pixel.z = sin(2.0f*M_PI_F*(h + hm)) * Cm * C; 1996 1997 write_imagef (out, (int2)(x, y), pixel); 1998} 1999 2000kernel void 2001colorzones (read_only image2d_t in, write_only image2d_t out, const int width, const int height, const int channel, 2002 read_only image2d_t table_L, read_only image2d_t table_C, read_only image2d_t table_h) 2003{ 2004 const int x = get_global_id(0); 2005 const int y = get_global_id(1); 2006 2007 if(x >= width || y >= height) return; 2008 2009 float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 2010 2011 float4 LCh; 2012 const float normalize_C = 1.f / (128.0f * sqrt(2.f)); 2013 2014 LCh = Lab_2_LCH(pixel); 2015 2016 float select = 0.0f; 2017 switch(channel) 2018 { 2019 case DT_IOP_COLORZONES_L: 2020 select = LCh.x * 0.01f; 2021 break; 2022 case DT_IOP_COLORZONES_C: 2023 select = LCh.y * normalize_C; 2024 break; 2025 case DT_IOP_COLORZONES_h: 2026 default: 2027 select = LCh.z; 2028 break; 2029 } 2030 select = clamp(select, 0.f, 1.f); 2031 2032 LCh.x *= native_powr(2.0f, 4.0f * (lookup(table_L, select) - .5f)); 2033 LCh.y *= 2.f * lookup(table_C, select); 2034 LCh.z += lookup(table_h, select) - .5f; 2035 2036 pixel.xyz = LCH_2_Lab(LCh).xyz; 2037 2038 write_imagef (out, (int2)(x, y), pixel); 2039} 2040 2041 2042/* kernel for the zonesystem plugin */ 2043kernel void 2044zonesystem (read_only image2d_t in, write_only image2d_t out, const int width, const int height, const int size, 2045 global float *zonemap_offset, global float *zonemap_scale) 2046{ 2047 const int x = get_global_id(0); 2048 const int y = get_global_id(1); 2049 2050 if(x >= width || y >= height) return; 2051 2052 float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 2053 2054 const float rzscale = (float)(size-1)/100.0f; 2055 const int rz = clamp((int)(pixel.x*rzscale), 0, size-2); 2056 const float zs = ((rz > 0) ? (zonemap_offset[rz]/pixel.x) : 0) + zonemap_scale[rz]; 2057 2058 pixel.xyz *= zs; 2059 2060 write_imagef (out, (int2)(x, y), pixel); 2061} 2062 2063 2064 2065 2066/* kernel to fill an image with a color (for the borders plugin). */ 2067kernel void 2068borders_fill (write_only image2d_t out, const int left, const int top, const int width, const int height, const float4 color) 2069{ 2070 const int x = get_global_id(0); 2071 const int y = get_global_id(1); 2072 2073 if(x < left || y < top) return; 2074 if(x >= width + left || y >= height + top) return; 2075 2076 write_imagef (out, (int2)(x, y), color); 2077} 2078 2079 2080/* kernel for the overexposed plugin. */ 2081typedef enum dt_clipping_preview_mode_t 2082{ 2083 DT_CLIPPING_PREVIEW_GAMUT = 0, 2084 DT_CLIPPING_PREVIEW_ANYRGB = 1, 2085 DT_CLIPPING_PREVIEW_LUMINANCE = 2, 2086 DT_CLIPPING_PREVIEW_SATURATION = 3 2087} dt_clipping_preview_mode_t; 2088 2089kernel void 2090overexposed (read_only image2d_t in, write_only image2d_t out, read_only image2d_t tmp, const int width, const int height, 2091 const float lower, const float upper, const float4 lower_color, const float4 upper_color, 2092 constant dt_colorspaces_iccprofile_info_cl_t *profile_info, 2093 read_only image2d_t lut, const int use_work_profile, dt_clipping_preview_mode_t mode) 2094{ 2095 const int x = get_global_id(0); 2096 const int y = get_global_id(1); 2097 2098 if(x >= width || y >= height) return; 2099 2100 float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 2101 float4 pixel_tmp = read_imagef(tmp, sampleri, (int2)(x, y)); 2102 2103 if(mode == DT_CLIPPING_PREVIEW_ANYRGB) 2104 { 2105 if(pixel_tmp.x >= upper || pixel_tmp.y >= upper || pixel_tmp.z >= upper) 2106 pixel.xyz = upper_color.xyz; 2107 2108 else if(pixel_tmp.x <= lower && pixel_tmp.y <= lower && pixel_tmp.z <= lower) 2109 pixel.xyz = lower_color.xyz; 2110 2111 } 2112 else if(mode == DT_CLIPPING_PREVIEW_GAMUT && use_work_profile) 2113 { 2114 const float luminance = get_rgb_matrix_luminance(pixel, profile_info, profile_info->matrix_in, lut); 2115 2116 if(luminance >= upper) 2117 { 2118 pixel.xyz = upper_color.xyz; 2119 } 2120 else if(luminance <= lower) 2121 { 2122 pixel.xyz = lower_color.xyz; 2123 } 2124 else 2125 { 2126 float4 saturation = { 0.f, 0.f, 0.f, 0.f}; 2127 saturation = pixel_tmp - (float4)luminance; 2128 saturation = native_sqrt(saturation * saturation / ((float4)(luminance * luminance) + pixel_tmp * pixel_tmp)); 2129 2130 if(saturation.x > upper || saturation.y > upper || saturation.z > upper || 2131 pixel_tmp.x >= upper || pixel_tmp.y >= upper || pixel_tmp.z >= upper) 2132 pixel.xyz = upper_color.xyz; 2133 2134 else if(pixel_tmp.x <= lower && pixel_tmp.y <= lower && pixel_tmp.z <= lower) 2135 pixel.xyz = lower_color.xyz; 2136 } 2137 } 2138 else if(mode == DT_CLIPPING_PREVIEW_LUMINANCE && use_work_profile) 2139 { 2140 const float luminance = get_rgb_matrix_luminance(pixel, profile_info, profile_info->matrix_in, lut); 2141 2142 if(luminance >= upper) 2143 pixel.xyz = upper_color.xyz; 2144 2145 else if(luminance <= lower) 2146 pixel.xyz = lower_color.xyz; 2147 } 2148 else if(mode == DT_CLIPPING_PREVIEW_SATURATION && use_work_profile) 2149 { 2150 const float luminance = get_rgb_matrix_luminance(pixel, profile_info, profile_info->matrix_in, lut); 2151 2152 if(luminance < upper && luminance > lower) 2153 { 2154 float4 saturation = { 0.f, 0.f, 0.f, 0.f}; 2155 saturation = pixel_tmp - (float4)luminance; 2156 saturation = native_sqrt(saturation * saturation / ((float4)(luminance * luminance) + pixel_tmp * pixel_tmp)); 2157 2158 if(saturation.x > upper || saturation.y > upper || saturation.z > upper || 2159 pixel_tmp.x >= upper || pixel_tmp.y >= upper || pixel_tmp.z >= upper) 2160 pixel.xyz = upper_color.xyz; 2161 2162 else if(pixel_tmp.x <= lower && pixel_tmp.y <= lower && pixel_tmp.z <= lower) 2163 pixel.xyz = lower_color.xyz; 2164 } 2165 } 2166 2167 write_imagef (out, (int2)(x, y), pixel); 2168} 2169 2170 2171/* kernel for the rawoverexposed plugin. */ 2172kernel void 2173rawoverexposed_mark_cfa ( 2174 read_only image2d_t in, write_only image2d_t out, global float *pi, 2175 const int width, const int height, 2176 read_only image2d_t raw, const int raw_width, const int raw_height, 2177 const unsigned int filters, global const unsigned char (*const xtrans)[6], 2178 global unsigned int *threshold, global float *colors) 2179{ 2180 const int x = get_global_id(0); 2181 const int y = get_global_id(1); 2182 2183 if(x >= width || y >= height) return; 2184 2185 const int piwidth = 2*width; 2186 global float *ppi = pi + mad24(y, piwidth, 2*x); 2187 2188 const int raw_x = ppi[0]; 2189 const int raw_y = ppi[1]; 2190 2191 if(raw_x < 0 || raw_y < 0 || raw_x >= raw_width || raw_y >= raw_height) return; 2192 2193 const uint raw_pixel = read_imageui(raw, sampleri, (int2)(raw_x, raw_y)).x; 2194 2195 const int c = (filters == 9u) ? FCxtrans(raw_y, raw_x, xtrans) : FC(raw_y, raw_x, filters); 2196 2197 if(raw_pixel < threshold[c]) return; 2198 2199 float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 2200 2201 global float *color = colors + mad24(4, c, 0); 2202 2203 // cfa color 2204 pixel.x = color[0]; 2205 pixel.y = color[1]; 2206 pixel.z = color[2]; 2207 2208 write_imagef (out, (int2)(x, y), pixel); 2209} 2210 2211kernel void 2212rawoverexposed_mark_solid ( 2213 read_only image2d_t in, write_only image2d_t out, global float *pi, 2214 const int width, const int height, 2215 read_only image2d_t raw, const int raw_width, const int raw_height, 2216 const unsigned int filters, global const unsigned char (*const xtrans)[6], 2217 global unsigned int *threshold, const float4 solid_color) 2218{ 2219 const int x = get_global_id(0); 2220 const int y = get_global_id(1); 2221 2222 if(x >= width || y >= height) return; 2223 2224 const int piwidth = 2*width; 2225 global float *ppi = pi + mad24(y, piwidth, 2*x); 2226 2227 const int raw_x = ppi[0]; 2228 const int raw_y = ppi[1]; 2229 2230 if(raw_x < 0 || raw_y < 0 || raw_x >= raw_width || raw_y >= raw_height) return; 2231 2232 const uint raw_pixel = read_imageui(raw, sampleri, (int2)(raw_x, raw_y)).x; 2233 2234 const int c = (filters == 9u) ? FCxtrans(raw_y, raw_x, xtrans) : FC(raw_y, raw_x, filters); 2235 2236 if(raw_pixel < threshold[c]) return; 2237 2238 float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 2239 2240 // solid color 2241 pixel.xyz = solid_color.xyz; 2242 2243 write_imagef (out, (int2)(x, y), pixel); 2244} 2245 2246kernel void 2247rawoverexposed_falsecolor ( 2248 read_only image2d_t in, write_only image2d_t out, global float *pi, 2249 const int width, const int height, 2250 read_only image2d_t raw, const int raw_width, const int raw_height, 2251 const unsigned int filters, global const unsigned char (*const xtrans)[6], 2252 global unsigned int *threshold) 2253{ 2254 const int x = get_global_id(0); 2255 const int y = get_global_id(1); 2256 2257 if(x >= width || y >= height) return; 2258 2259 const int piwidth = 2*width; 2260 global float *ppi = pi + mad24(y, piwidth, 2*x); 2261 2262 const int raw_x = ppi[0]; 2263 const int raw_y = ppi[1]; 2264 2265 if(raw_x < 0 || raw_y < 0 || raw_x >= raw_width || raw_y >= raw_height) return; 2266 2267 const uint raw_pixel = read_imageui(raw, sampleri, (int2)(raw_x, raw_y)).x; 2268 2269 const int c = (filters == 9u) ? FCxtrans(raw_y, raw_x, xtrans) : FC(raw_y, raw_x, filters); 2270 2271 if(raw_pixel < threshold[c]) return; 2272 2273 float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 2274 2275 float p[4]; 2276 vstore4(pixel, 0, p); 2277 // falsecolor 2278 p[c] = 0.0f; 2279 pixel = vload4(0, p); 2280 2281 write_imagef (out, (int2)(x, y), pixel); 2282} 2283 2284 2285/* kernel for the lowlight plugin. */ 2286kernel void 2287lowlight (read_only image2d_t in, write_only image2d_t out, const int width, const int height, 2288 const float4 XYZ_sw, read_only image2d_t lut) 2289{ 2290 const int x = get_global_id(0); 2291 const int y = get_global_id(1); 2292 2293 if(x >= width || y >= height) return; 2294 2295 const float c = 0.5f; 2296 const float threshold = 0.01f; 2297 2298 float V; 2299 float w; 2300 2301 float4 pixel = read_imagef(in, sampleri, (int2)(x, y)); 2302 2303 float4 XYZ = Lab_to_XYZ(pixel); 2304 2305 // calculate scotopic luminance 2306 if (XYZ.x > threshold) 2307 { 2308 // normal flow 2309 V = XYZ.y * ( 1.33f * ( 1.0f + (XYZ.y+XYZ.z)/XYZ.x) - 1.68f ); 2310 } 2311 else 2312 { 2313 // low red flow, avoids "snow" on dark noisy areas 2314 V = XYZ.y * ( 1.33f * ( 1.0f + (XYZ.y+XYZ.z)/threshold) - 1.68f ); 2315 } 2316 2317 // scale using empiric coefficient and fit inside limits 2318 V = clamp(c*V, 0.0f, 1.0f); 2319 2320 // blending coefficient from curve 2321 w = lookup(lut, pixel.x/100.0f); 2322 2323 XYZ = w * XYZ + (1.0f - w) * V * XYZ_sw; 2324 2325 pixel = XYZ_to_Lab(XYZ); 2326 2327 write_imagef (out, (int2)(x, y), pixel); 2328} 2329 2330 2331/* kernel for the contrast lightness saturation module */ 2332kernel void 2333colisa (read_only image2d_t in, write_only image2d_t out, unsigned int width, unsigned int height, const float saturation, 2334 read_only image2d_t ctable, constant float *ca, read_only image2d_t ltable, constant float *la) 2335{ 2336 const unsigned int x = get_global_id(0); 2337 const unsigned int y = get_global_id(1); 2338 2339 if(x >= width || y >= height) return; 2340 2341 float4 i = read_imagef(in, sampleri, (int2)(x, y)); 2342 float4 o; 2343 2344 o.x = lookup_unbounded(ctable, i.x/100.0f, ca); 2345 o.x = lookup_unbounded(ltable, o.x/100.0f, la); 2346 o.y = i.y*saturation; 2347 o.z = i.z*saturation; 2348 o.w = i.w; 2349 2350 write_imagef(out, (int2)(x, y), o); 2351} 2352 2353/* kernel for the unbreak input profile module - gamma version */ 2354 2355kernel void 2356profilegamma (read_only image2d_t in, write_only image2d_t out, int width, int height, 2357 read_only image2d_t table, constant float *ta) 2358{ 2359 const unsigned int x = get_global_id(0); 2360 const unsigned int y = get_global_id(1); 2361 2362 if(x >= width || y >= height) return; 2363 2364 float4 i = read_imagef(in, sampleri, (int2)(x, y)); 2365 float4 o; 2366 2367 o.x = lookup_unbounded(table, i.x, ta); 2368 o.y = lookup_unbounded(table, i.y, ta); 2369 o.z = lookup_unbounded(table, i.z, ta); 2370 o.w = i.w; 2371 2372 write_imagef(out, (int2)(x, y), o); 2373} 2374 2375/* kernel for the unbreak input profile module - log version */ 2376kernel void 2377profilegamma_log (read_only image2d_t in, write_only image2d_t out, int width, int height, const float dynamic_range, const float shadows_range, const float grey) 2378{ 2379 const unsigned int x = get_global_id(0); 2380 const unsigned int y = get_global_id(1); 2381 2382 if(x >= width || y >= height) return; 2383 2384 float4 i = read_imagef(in, sampleri, (int2)(x, y)); 2385 const float4 noise = pow((float4)2.0f, (float4)-16.0f); 2386 const float4 dynamic4 = dynamic_range; 2387 const float4 shadows4 = shadows_range; 2388 const float4 grey4 = grey; 2389 2390 float4 o; 2391 2392 o = (i < noise) ? noise : i / grey4; 2393 o = (log2(o) - shadows4) / dynamic4; 2394 o = (o < noise) ? noise : o; 2395 i.xyz = o.xyz; 2396 2397 write_imagef(out, (int2)(x, y), i); 2398} 2399 2400/* kernel for the interpolation resample helper */ 2401kernel void 2402interpolation_resample (read_only image2d_t in, write_only image2d_t out, const int width, const int height, 2403 const global int *hmeta, const global int *vmeta, 2404 const global int *hlength, const global int *vlength, 2405 const global int *hindex, const global int *vindex, 2406 const global float *hkernel, const global float *vkernel, 2407 const int htaps, const int vtaps, 2408 local float *lkernel, local int *lindex, 2409 local float4 *buffer) 2410{ 2411 const int x = get_global_id(0); 2412 const int yi = get_global_id(1); 2413 const int ylsz = get_local_size(1); 2414 const int xlid = get_local_id(0); 2415 const int ylid = get_local_id(1); 2416 const int y = yi / vtaps; 2417 const int iy = yi % vtaps; 2418 2419 // Initialize resampling indices 2420 const int xm = min(x, width - 1); 2421 const int ym = min(y, height - 1); 2422 const int hlidx = hmeta[xm*3]; // H(orizontal) L(ength) I(n)d(e)x 2423 const int hkidx = hmeta[xm*3+1]; // H(orizontal) K(ernel) I(n)d(e)x 2424 const int hiidx = hmeta[xm*3+2]; // H(orizontal) I(ndex) I(n)d(e)x 2425 const int vlidx = vmeta[ym*3]; // V(ertical) L(ength) I(n)d(e)x 2426 const int vkidx = vmeta[ym*3+1]; // V(ertical) K(ernel) I(n)d(e)x 2427 const int viidx = vmeta[ym*3+2]; // V(ertical) I(ndex) I(n)d(e)x 2428 2429 const int hl = hlength[hlidx]; // H(orizontal) L(ength) 2430 const int vl = vlength[vlidx]; // V(ertical) L(ength) 2431 2432 // generate local copy of horizontal index field and kernel 2433 for(int n = 0; n <= htaps/ylsz; n++) 2434 { 2435 int k = mad24(n, ylsz, ylid); 2436 if(k >= hl) continue; 2437 lindex[k] = hindex[hiidx+k]; 2438 lkernel[k] = hkernel[hkidx+k]; 2439 } 2440 2441 barrier(CLK_LOCAL_MEM_FENCE); 2442 2443 // horizontal convolution kernel; store intermediate result in local buffer 2444 if(x < width && y < height) 2445 { 2446 const int yvalid = iy < vl; 2447 2448 const int yy = yvalid ? vindex[viidx+iy] : -1; 2449 2450 float4 vpixel = (float4)0.0f; 2451 2452 for (int ix = 0; ix < hl && yvalid; ix++) 2453 { 2454 const int xx = lindex[ix]; 2455 float4 hpixel = read_imagef(in, sampleri,(int2)(xx, yy)); 2456 vpixel += hpixel * lkernel[ix]; 2457 } 2458 2459 buffer[ylid] = yvalid ? vpixel * vkernel[vkidx+iy] : (float4)0.0f; 2460 } 2461 else 2462 buffer[ylid] = (float4)0.0f; 2463 2464 barrier(CLK_LOCAL_MEM_FENCE); 2465 2466 // recursively reduce local buffer (vertical convolution kernel) 2467 for(int offset = vtaps / 2; offset > 0; offset >>= 1) 2468 { 2469 if (iy < offset) 2470 { 2471 buffer[ylid] += buffer[ylid + offset]; 2472 } 2473 barrier(CLK_LOCAL_MEM_FENCE); 2474 } 2475 2476 // store final result 2477 if (iy == 0 && x < width && y < height) 2478 { 2479 // Clip negative RGB that may be produced by Lanczos undershooting 2480 // Negative RGB are invalid values no matter the RGB space (light is positive) 2481 write_imagef (out, (int2)(x, y), fmax(buffer[ylid], 0.f)); 2482 } 2483} 2484