1 /* -*- C++ -*-
2 *
3 * This file is part of RawTherapee.
4 *
5 * Ported from LuminanceHDR by Alberto Griggio <alberto.griggio@gmail.com>
6 *
7 * RawTherapee is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * RawTherapee is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with RawTherapee. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21 /**
22 * @file tmo_fattal02.cpp
23 * @brief TMO: Gradient Domain High Dynamic Range Compression
24 *
25 * Implementation of Gradient Domain High Dynamic Range Compression
26 * by Raanan Fattal, Dani Lischinski, Michael Werman.
27 *
28 * @author Grzegorz Krawczyk, <krawczyk@mpi-sb.mpg.de>
29 *
30 *
31 * This file is a part of LuminanceHDR package, based on pfstmo.
32 * ----------------------------------------------------------------------
33 * Copyright (C) 2003,2004 Grzegorz Krawczyk
34 *
35 * This program is free software; you can redistribute it and/or modify
36 * it under the terms of the GNU General Public License as published by
37 * the Free Software Foundation; either version 2 of the License, or
38 * (at your option) any later version.
39 *
40 * This program is distributed in the hope that it will be useful,
41 * but WITHOUT ANY WARRANTY; without even the implied warranty of
42 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
43 * GNU General Public License for more details.
44 *
45 * You should have received a copy of the GNU General Public License
46 * along with this program; if not, write to the Free Software
47 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
48 * ----------------------------------------------------------------------
49 *
50 * $Id: tmo_fattal02.cpp,v 1.3 2008/11/04 23:43:08 rafm Exp $
51 */
52
53
54 #ifdef _OPENMP
55 #include <omp.h>
56 #endif
57 #include <cstdio>
58 #include <iostream>
59 #include <iterator>
60 #include <vector>
61 #include <algorithm>
62 #include <limits>
63
64 #include <math.h>
65 #include <assert.h>
66 #include <fftw3.h>
67
68 #include "array2D.h"
69 #include "improcfun.h"
70 #include "settings.h"
71 #include "iccstore.h"
72 #include "StopWatch.h"
73 #include "sleef.h"
74 #include "opthelper.h"
75 #include "rt_algo.h"
76 #include "rescale.h"
77 #include "ipdenoise.h"
78
79 namespace rtengine
80 {
81
82 /******************************************************************************
83 * RT code
84 ******************************************************************************/
85
86 extern const Settings *settings;
87 extern MyMutex *fftwMutex;
88
89 using namespace std;
90
91 namespace
92 {
93
94 class Array2Df: public array2D<float>
95 {
96 typedef array2D<float> Super;
97 public:
Array2Df()98 Array2Df(): Super() {}
Array2Df(int w,int h)99 Array2Df(int w, int h): Super(w, h) {}
Array2Df(int w,int h,float ** data)100 Array2Df(int w, int h, float **data):
101 Super(w, h, data, ARRAY2D_BYREFERENCE) {}
102
operator ()(int w,int h)103 float &operator() (int w, int h)
104 {
105 return (*this)[h][w];
106 }
107
operator ()(int w,int h) const108 const float &operator() (int w, int h) const
109 {
110 return (*this)[h][w];
111 }
112
operator ()(int i)113 float &operator() (int i)
114 {
115 return static_cast<float *> (*this)[i];
116 }
117
operator ()(int i) const118 const float &operator() (int i) const
119 {
120 return const_cast<Array2Df &> (*this).operator() (i);
121 }
122
getRows() const123 int getRows() const
124 {
125 return const_cast<Array2Df &> (*this).height();
126 }
127
getCols() const128 int getCols() const
129 {
130 return const_cast<Array2Df &> (*this).width();
131 }
132
data()133 float *data()
134 {
135 return static_cast<float *> (*this);
136 }
137
data() const138 const float *data() const
139 {
140 return const_cast<Array2Df &> (*this).data();
141 }
142
143 // operator float **() const
144 // {
145 // return static_cast<float **>(const_cast<Array2Df &>(*this));
146 // }
147 };
148
149 // upper bound on image dimension used in tmo_fattal02 -- see the comment there
150 const int RT_dimension_cap = 1920;
151
152 void rescale_bilinear (const Array2Df &src, Array2Df &dst, bool multithread);
153
154
155 /******************************************************************************
156 * Luminance HDR code (modifications are marked with an RT comment)
157 ******************************************************************************/
158
downSample(const Array2Df & A,Array2Df & B)159 void downSample (const Array2Df& A, Array2Df& B)
160 {
161 const int width = B.getCols();
162 const int height = B.getRows();
163
164 // Note, I've uncommented all omp directives. They are all ok but are
165 // applied to too small problems and in total don't lead to noticeable
166 // speed improvements. The main issue is the pde solver and in case of the
167 // fft solver uses optimised threaded fftw routines.
168 //#pragma omp parallel for
169 for ( int y = 0 ; y < height ; y++ ) {
170 for ( int x = 0 ; x < width ; x++ ) {
171 float p = A (2 * x, 2 * y);
172 p += A (2 * x + 1, 2 * y);
173 p += A (2 * x, 2 * y + 1);
174 p += A (2 * x + 1, 2 * y + 1);
175 B (x, y) = p * 0.25f; // p / 4.0f;
176 }
177 }
178 }
179
gaussianBlur(const Array2Df & I,Array2Df & L,bool multithread)180 void gaussianBlur (const Array2Df& I, Array2Df& L, bool multithread)
181 {
182 const int width = I.getCols();
183 const int height = I.getRows();
184
185 if (width < 3 || height < 3) {
186 if (&I != &L) {
187 for (int i = 0, n = width * height; i < n; ++i) {
188 L (i) = I (i);
189 }
190 }
191
192 return;
193 }
194
195 Array2Df T (width, height);
196
197 //--- X blur
198 #ifdef _OPENMP
199 #pragma omp parallel for shared(I, T) if(multithread)
200 #endif
201
202 for ( int y = 0 ; y < height ; y++ ) {
203 for ( int x = 1 ; x < width - 1 ; x++ ) {
204 float t = 2.f * I (x, y);
205 t += I (x - 1, y);
206 t += I (x + 1, y);
207 T (x, y) = t * 0.25f; // t / 4.f;
208 }
209
210 T (0, y) = ( 3.f * I (0, y) + I (1, y) ) * 0.25f; // / 4.f;
211 T (width - 1, y) = ( 3.f * I (width - 1, y) + I (width - 2, y) ) * 0.25f; // / 4.f;
212 }
213
214 //--- Y blur
215 #ifdef _OPENMP
216 #pragma omp parallel for if(multithread)
217 #endif
218
219 for ( int x = 0 ; x < width - 7 ; x += 8 ) {
220 for ( int y = 1 ; y < height - 1 ; y++ ) {
221 for (int xx = 0; xx < 8; ++xx) {
222 float t = 2.f * T (x + xx, y);
223 t += T (x + xx, y - 1);
224 t += T (x + xx, y + 1);
225 L (x + xx, y) = t * 0.25f; // t/4.0f;
226 }
227 }
228
229 for (int xx = 0; xx < 8; ++xx) {
230 L (x + xx, 0) = ( 3.f * T (x + xx, 0) + T (x + xx, 1) ) * 0.25f; // / 4.0f;
231 L (x + xx, height - 1) = ( 3.f * T (x + xx, height - 1) + T (x + xx, height - 2) ) * 0.25f; // / 4.0f;
232 }
233 }
234
235 for ( int x = width - (width % 8) ; x < width ; x++ ) {
236 for ( int y = 1 ; y < height - 1 ; y++ ) {
237 float t = 2.f * T (x, y);
238 t += T (x, y - 1);
239 t += T (x, y + 1);
240 L (x, y) = t * 0.25f; // t/4.0f;
241 }
242
243 L (x, 0) = ( 3.f * T (x, 0) + T (x, 1) ) * 0.25f; // / 4.0f;
244 L (x, height - 1) = ( 3.f * T (x, height - 1) + T (x, height - 2) ) * 0.25f; // / 4.0f;
245 }
246 }
247
createGaussianPyramids(Array2Df ** pyramids,int nlevels,bool multithread)248 void createGaussianPyramids (Array2Df** pyramids, int nlevels, bool multithread)
249 {
250 // pyramids[0] is already set
251 int width = pyramids[0]->getCols();
252 int height = pyramids[0]->getRows();
253
254
255 Array2Df* L = new Array2Df (width, height);
256 gaussianBlur ( *pyramids[0], *L, multithread );
257
258 for ( int k = 1 ; k < nlevels ; k++ ) {
259 if (width > 2 && height > 2) {
260 width /= 2;
261 height /= 2;
262 pyramids[k] = new Array2Df (width, height);
263 downSample (*L, *pyramids[k]);
264 } else {
265 // RT - now nlevels is fixed in tmo_fattal02 (see the comment in
266 // there), so it might happen that we have to add some padding to
267 // the gaussian pyramids
268 pyramids[k] = new Array2Df (width, height);
269
270 for (int j = 0, n = width * height; j < n; ++j) {
271 (*pyramids[k]) (j) = (*L) (j);
272 }
273 }
274
275 if (k < nlevels - 1) {
276 delete L;
277 L = new Array2Df (width, height);
278 gaussianBlur ( *pyramids[k], *L, multithread );
279 }
280 }
281
282 delete L;
283 }
284
285 //--------------------------------------------------------------------
286
calculateGradients(Array2Df * H,Array2Df * G,int k,bool multithread)287 float calculateGradients (Array2Df* H, Array2Df* G, int k, bool multithread)
288 {
289 const int width = H->getCols();
290 const int height = H->getRows();
291 const float divider = pow ( 2.0f, k + 1 );
292 double avgGrad = 0.0; // use double precision for large summations
293
294 #ifdef _OPENMP
295 #pragma omp parallel for reduction(+:avgGrad) if(multithread)
296 #endif
297
298 for ( int y = 0 ; y < height ; y++ ) {
299 int n = (y == 0 ? 0 : y - 1);
300 int s = (y + 1 == height ? y : y + 1);
301
302 for ( int x = 0 ; x < width ; x++ ) {
303 float gx, gy;
304 int w, e;
305 w = (x == 0 ? 0 : x - 1);
306 e = (x + 1 == width ? x : x + 1);
307
308 gx = ((*H) (w, y) - (*H) (e, y));
309
310 gy = ((*H) (x, s) - (*H) (x, n));
311 // note this implicitly assumes that H(-1)=H(0)
312 // for the fft-pde slover this would need adjustment as H(-1)=H(1)
313 // is assumed, which means gx=0.0, gy=0.0 at the boundaries
314 // however, the impact is not visible so we ignore this here
315
316 (*G) (x, y) = sqrt (gx * gx + gy * gy) / divider;
317 avgGrad += (*G) (x, y);
318 }
319 }
320
321 return avgGrad / (width * height);
322 }
323
324 //--------------------------------------------------------------------
325
upSample(const Array2Df & A,Array2Df & B)326 void upSample (const Array2Df& A, Array2Df& B)
327 {
328 const int width = B.getCols();
329 const int height = B.getRows();
330 const int awidth = A.getCols();
331 const int aheight = A.getRows();
332
333 //#pragma omp parallel for shared(A, B)
334 for ( int y = 0 ; y < height ; y++ ) {
335 for ( int x = 0 ; x < width ; x++ ) {
336 int ax = static_cast<int> (x * 0.5f); //x / 2.f;
337 int ay = static_cast<int> (y * 0.5f); //y / 2.f;
338 ax = (ax < awidth) ? ax : awidth - 1;
339 ay = (ay < aheight) ? ay : aheight - 1;
340
341 B (x, y) = A (ax, ay);
342 }
343 }
344
345 //--- this code below produces 'use of uninitialized value error'
346 // int width = A->getCols();
347 // int height = A->getRows();
348 // int x,y;
349
350 // for( y=0 ; y<height ; y++ )
351 // for( x=0 ; x<width ; x++ )
352 // {
353 // (*B)(2*x,2*y) = (*A)(x,y);
354 // (*B)(2*x+1,2*y) = (*A)(x,y);
355 // (*B)(2*x,2*y+1) = (*A)(x,y);
356 // (*B)(2*x+1,2*y+1) = (*A)(x,y);
357 // }
358 }
359
360
calculateFiMatrix(Array2Df * FI,Array2Df * gradients[],float avgGrad[],int nlevels,int detail_level,float alfa,float beta,float noise,bool multithread)361 void calculateFiMatrix (Array2Df* FI, Array2Df* gradients[],
362 float avgGrad[], int nlevels, int detail_level,
363 float alfa, float beta, float noise, bool multithread)
364 {
365 int width = gradients[nlevels - 1]->getCols();
366 int height = gradients[nlevels - 1]->getRows();
367 Array2Df** fi = new Array2Df*[nlevels];
368
369 fi[nlevels - 1] = new Array2Df (width, height);
370
371 #ifdef _OPENMP
372 #pragma omp parallel for shared(fi) if(multithread)
373 #endif
374 for ( int k = 0 ; k < width * height ; k++ ) {
375 (*fi[nlevels - 1]) (k) = 1.0f;
376 }
377
378 for ( int k = nlevels - 1; k >= 0 ; k-- ) {
379 width = gradients[k]->getCols();
380 height = gradients[k]->getRows();
381
382 // only apply gradients to levels>=detail_level but at least to the coarsest
383 if ((k >= detail_level || k == nlevels - 1) && beta != 1.f) {
384 const float a = alfa * avgGrad[k];
385 //DEBUG_STR << "calculateFiMatrix: apply gradient to level " << k << endl;
386 #ifdef _OPENMP
387 #pragma omp parallel for shared(fi,avgGrad) if(multithread)
388 #endif
389 for ( int y = 0; y < height; y++ ) {
390 for ( int x = 0; x < width; x++ ) {
391 float grad = ((*gradients[k]) (x, y) < 1e-4f) ? 1e-4 : (*gradients[k]) (x, y);
392 float value = pow ((grad + noise) / a, beta - 1.0f);
393
394 (*fi[k]) (x, y) *= value;
395 }
396 }
397 }
398
399 // create next level
400 if ( k > 1 ) {
401 width = gradients[k - 1]->getCols();
402 height = gradients[k - 1]->getRows();
403 fi[k - 1] = new Array2Df (width, height);
404 } else {
405 fi[0] = FI; // highest level -> result
406 }
407
408 if (k > 0) {
409 upSample (*fi[k], *fi[k - 1]); // upsample to next level
410 gaussianBlur (*fi[k - 1], *fi[k - 1], multithread);
411 }
412 }
413
414 for ( int k = 1 ; k < nlevels ; k++ ) {
415 delete fi[k];
416 }
417
418 delete[] fi;
419 }
420
421 void solve_pde_fft (Array2Df *F, Array2Df *U, Array2Df *buf, bool multithread);
422
tmo_fattal02(size_t width,size_t height,const Array2Df & Y,Array2Df & L,float alfa,float beta,float noise,int detail_level,bool multithread)423 void tmo_fattal02 (size_t width,
424 size_t height,
425 const Array2Df& Y,
426 Array2Df& L,
427 float alfa,
428 float beta,
429 float noise,
430 int detail_level,
431 bool multithread)
432 {
433 // #ifdef TIMER_PROFILING
434 // msec_timer stop_watch;
435 // stop_watch.start();
436 // #endif
437 // static const float black_point = 0.1f;
438 // static const float white_point = 0.5f;
439 // static const float gamma = 1.0f; // 0.8f;
440
441 // static const int detail_level = 3;
442 if ( detail_level < 0 ) {
443 detail_level = 0;
444 }
445
446 if ( detail_level > 3 ) {
447 detail_level = 3;
448 }
449
450 // ph.setValue(2);
451 // if (ph.canceled()) return;
452
453 /* RT -- we use a hardcoded value for nlevels, to limit the
454 * dependency of the result on the image size. When using an auto computed
455 * nlevels value, you would get vastly different results with different
456 * image sizes, making it essentially impossible to preview the tool
457 * inside RT. With a hardcoded value, the results for the preview are much
458 * closer to those for the final image */
459 // int MSIZE = 32; // minimum size of gaussian pyramid
460 // // I believe a smaller value than 32 results in slightly better overall
461 // // quality but I'm only applying this if the newly implemented fft solver
462 // // is used in order not to change behaviour of the old version
463 // // TODO: best let the user decide this value
464 // // if (fftsolver)
465 // {
466 // MSIZE = 8;
467 // }
468
469
470 // int size = width * height;
471
472 // find max value, normalize to range 0..100 and take logarithm
473 // float minLum = Y (0, 0);
474 // float maxLum = Y (0, 0);
475
476 // #ifdef _OPENMP
477 // #pragma omp parallel for reduction(max:maxLum) if(multithread)
478 // #endif
479
480 // for ( int i = 0 ; i < size ; i++ ) {
481 // maxLum = std::max (maxLum, Y (i));
482 // }
483
484 Array2Df* H = new Array2Df (width, height);
485 //float temp = 100.f / maxLum;
486 float eps = 1e-4f;
487 #ifdef _OPENMP
488 #pragma omp parallel if(multithread)
489 #endif
490 {
491 #ifdef __SSE2__
492 vfloat epsv = F2V (eps);
493 //vfloat tempv = F2V (temp);
494 #endif
495 #ifdef _OPENMP
496 #pragma omp for schedule(dynamic,16)
497 #endif
498
499 for (size_t i = 0 ; i < height ; ++i) {
500 size_t j = 0;
501 #ifdef __SSE2__
502
503 for (; j < width - 3; j += 4) {
504 STVFU ((*H)[i][j], xlogf(/*tempv **/ LVFU (Y[i][j]) + epsv));
505 }
506
507 #endif
508
509 for (; j < width; ++j) {
510 (*H)[i][j] = xlogf(/*temp **/ Y[i][j] + eps);
511 }
512 }
513 }
514
515 /** RT - this is also here to reduce the dependency of the results on the
516 * input image size, with the primary aim of having a preview in RT that is
517 * reasonably close to the actual output image. Intuitively, what we do is
518 * to put a cap on the dimension of the image processed, so that it is close
519 * in size to the typical preview that you will see on a normal consumer
520 * monitor. (That's where the 1920 value for RT_dimension_cap comes from.)
521 * However, we can't simply downscale the input Y array and then upscale it
522 * on output, because that would cause a big loss of sharpness (confirmed by
523 * testing).
524 * So, we use a different method: we downscale the H array, so that we
525 * compute a downscaled gaussian pyramid and a downscaled FI matrix. Then,
526 * we upscale the FI matrix later on, before it gets combined with the
527 * original input luminance array H. This seems to preserve the input
528 * sharpness and at the same time significantly reduce the dependency of the
529 * result on the input size. Clearly this is a hack, and keep in mind that I
530 * do not really know how Fattal works (it comes from LuminanceHDR almost
531 * verbatim), so this should probably be revised/reviewed by someone who
532 * knows better... also, we use a quite naive bilinear interpolation
533 * algorithm (see rescale_bilinear below), which could definitely be
534 * improved */
535 int fullwidth = width;
536 int fullheight = height;
537 int dim = std::max (width, height);
538 Array2Df *fullH = nullptr;
539
540 if (dim > RT_dimension_cap) {
541 float s = float (RT_dimension_cap) / float (dim);
542 Array2Df *HH = new Array2Df (width * s, height * s);
543 rescale_bilinear (*H, *HH, multithread);
544 fullH = H;
545 H = HH;
546 width = H->getCols();
547 height = H->getRows();
548 }
549
550 /** RT */
551
552 const int nlevels = 7; // RT -- see above
553
554 Array2Df* pyramids[nlevels];
555 pyramids[0] = H;
556 createGaussianPyramids (pyramids, nlevels, multithread);
557
558 // calculate gradients and its average values on pyramid levels
559 Array2Df* gradients[nlevels];
560 float avgGrad[nlevels];
561
562 for ( int k = 0 ; k < nlevels ; k++ ) {
563 gradients[k] = new Array2Df (pyramids[k]->getCols(), pyramids[k]->getRows());
564 avgGrad[k] = calculateGradients (pyramids[k], gradients[k], k, multithread);
565 if(k != 0) // pyramids[0] is H. Will be deleted later
566 delete pyramids[k];
567 }
568
569
570 // calculate fi matrix
571 Array2Df* FI = new Array2Df (width, height);
572 calculateFiMatrix (FI, gradients, avgGrad, nlevels, detail_level, alfa, beta, noise, multithread);
573
574 for ( int i = 0 ; i < nlevels ; i++ ) {
575 delete gradients[i];
576 }
577
578 /** - RT - bring back the FI image to the input size if it was downscaled */
579 if (fullH) {
580 delete H;
581 H = fullH;
582 Array2Df *FI2 = new Array2Df (fullwidth, fullheight);
583 rescale_bilinear (*FI, *FI2, multithread);
584 delete FI;
585 FI = FI2;
586 width = fullwidth;
587 height = fullheight;
588 }
589
590 /** RT */
591
592 // attenuate gradients
593 Array2Df* Gx = new Array2Df (width, height);
594 Array2Df* Gy = &L; // use L as buffer for Gy
595
596 // the fft solver solves the Poisson pde but with slightly different
597 // boundary conditions, so we need to adjust the assembly of the right hand
598 // side accordingly (basically fft solver assumes U(-1) = U(1), whereas zero
599 // Neumann conditions assume U(-1)=U(0)), see also divergence calculation
600 #ifdef _OPENMP
601 #pragma omp parallel for if(multithread)
602 #endif
603
604 for ( size_t y = 0 ; y < height ; y++ ) {
605 // sets index+1 based on the boundary assumption H(N+1)=H(N-1)
606 unsigned int yp1 = (y + 1 >= height ? height - 2 : y + 1);
607
608 for ( size_t x = 0 ; x < width ; x++ ) {
609 // sets index+1 based on the boundary assumption H(N+1)=H(N-1)
610 unsigned int xp1 = (x + 1 >= width ? width - 2 : x + 1);
611 // forward differences in H, so need to use between-points approx of FI
612 (*Gx) (x, y) = ((*H) (xp1, y) - (*H) (x, y)) * 0.5 * ((*FI) (xp1, y) + (*FI) (x, y));
613 (*Gy) (x, y) = ((*H) (x, yp1) - (*H) (x, y)) * 0.5 * ((*FI) (x, yp1) + (*FI) (x, y));
614 }
615 }
616
617 delete H;
618
619 // calculate divergence
620 #ifdef _OPENMP
621 #pragma omp parallel for if(multithread)
622 #endif
623
624 for ( size_t y = 0; y < height; ++y ) {
625 for ( size_t x = 0; x < width; ++x ) {
626 (*FI) (x, y) = (*Gx) (x, y) + (*Gy) (x, y);
627
628 if ( x > 0 ) {
629 (*FI) (x, y) -= (*Gx) (x - 1, y);
630 }
631
632 if ( y > 0 ) {
633 (*FI) (x, y) -= (*Gy) (x, y - 1);
634 }
635
636 if (x == 0) {
637 (*FI) (x, y) += (*Gx) (x, y);
638 }
639
640 if (y == 0) {
641 (*FI) (x, y) += (*Gy) (x, y);
642 }
643
644 }
645 }
646
647 //delete Gx; // RT - reused as temp buffer in solve_pde_fft, deleted later
648
649 // solve pde and exponentiate (ie recover compressed image)
650 {
651 MyMutex::MyLock lock (*fftwMutex);
652 solve_pde_fft (FI, &L, Gx, multithread);
653 }
654 delete Gx;
655 delete FI;
656
657 #ifdef _OPENMP
658 #pragma omp parallel if(multithread)
659 #endif
660 {
661 // #ifdef __SSE2__
662 // vfloat gammav = F2V (gamma);
663 // #endif
664 #ifdef _OPENMP
665 #pragma omp for schedule(dynamic,16)
666 #endif
667
668 for (size_t i = 0 ; i < height ; i++) {
669 size_t j = 0;
670 #ifdef __SSE2__
671
672 for (; j < width - 3; j += 4) {
673 STVFU (L[i][j], xexpf (/*gammav **/ LVFU (L[i][j])));
674 }
675
676 #endif
677
678 for (; j < width; j++) {
679 L[i][j] = xexpf ( /*gamma **/ L[i][j]);
680 }
681 }
682 }
683 }
684
685
686 /**
687 *
688 * @file pde_fft.cpp
689 * @brief Direct Poisson solver using the discrete cosine transform
690 *
691 * @author Tino Kluge (tino.kluge@hrz.tu-chemnitz.de)
692 *
693 */
694
695 //////////////////////////////////////////////////////////////////////
696 // Direct Poisson solver using the discrete cosine transform
697 //////////////////////////////////////////////////////////////////////
698 // by Tino Kluge (tino.kluge@hrz.tu-chemnitz.de)
699 //
700 // let U and F be matrices of order (n1,n2), ie n1=height, n2=width
701 // and L_x of order (n2,n2) and L_y of order (n1,n1) and both
702 // representing the 1d Laplace operator with Neumann boundary conditions,
703 // ie L_x and L_y are tridiagonal matrices of the form
704 //
705 // ( -2 2 )
706 // ( 1 -2 1 )
707 // ( . . . )
708 // ( 1 -2 1 )
709 // ( 2 -2 )
710 //
711 // then this solver computes U given F based on the equation
712 //
713 // -------------------------
714 // L_y U + (L_x U^tr)^tr = F
715 // -------------------------
716 //
717 // Note, if the first and last row of L_x and L_y contained one's instead of
718 // two's then this equation would be exactly the 2d Poisson equation with
719 // Neumann boundary conditions. As a simple rule:
720 // - Neumann: assume U(-1)=U(0) --> U(i-1) - 2 U(i) + U(i+1) becomes
721 // i=0: U(0) - 2 U(0) + U(1) = -U(0) + U(1)
722 // - our system: assume U(-1)=U(1) --> this becomes
723 // i=0: U(1) - 2(0) + U(1) = -2 U(0) + 2 U(1)
724 //
725 // The multi grid solver solve_pde_multigrid() solves the 2d Poisson pde
726 // with the right Neumann boundary conditions, U(-1)=U(0), see function
727 // atimes(). This means the assembly of the right hand side F is different
728 // for both solvers.
729
730
731 // returns T = EVy A EVx^tr
732 // note, modifies input data
transform_ev2normal(Array2Df * A,Array2Df * T,bool multithread)733 void transform_ev2normal (Array2Df *A, Array2Df *T, bool multithread)
734 {
735 int width = A->getCols();
736 int height = A->getRows();
737 assert ((int)T->getCols() == width && (int)T->getRows() == height);
738
739 // the discrete cosine transform is not exactly the transform needed
740 // need to scale input values to get the right transformation
741 #ifdef _OPENMP
742 #pragma omp parallel for if(multithread)
743 #endif
744
745 for (int y = 1 ; y < height - 1 ; y++ )
746 for (int x = 1 ; x < width - 1 ; x++ ) {
747 (*A) (x, y) *= 0.25f;
748 }
749
750 for (int x = 1 ; x < width - 1 ; x++ ) {
751 (*A) (x, 0) *= 0.5f;
752 (*A) (x, height - 1) *= 0.5f;
753 }
754
755 for (int y = 1 ; y < height - 1 ; y++ ) {
756 (*A) (0, y) *= 0.5;
757 (*A) (width - 1, y) *= 0.5f;
758 }
759
760 // note, fftw provides its own memory allocation routines which
761 // ensure that memory is properly 16/32 byte aligned so it can
762 // use SSE/AVX operations (2/4 double ops in parallel), if our
763 // data is not properly aligned fftw won't use SSE/AVX
764 // (I believe new() aligns memory to 16 byte so avoid overhead here)
765 //
766 // double* in = (double*) fftwf_malloc(sizeof(double) * width*height);
767 // fftwf_free(in);
768
769 // executes 2d discrete cosine transform
770 fftwf_plan p;
771 p = fftwf_plan_r2r_2d (height, width, A->data(), T->data(),
772 FFTW_REDFT00, FFTW_REDFT00, FFTW_ESTIMATE);
773 fftwf_execute (p);
774 fftwf_destroy_plan (p);
775 }
776
777
778 // returns T = EVy^-1 * A * (EVx^-1)^tr
transform_normal2ev(Array2Df * A,Array2Df * T,bool multithread)779 void transform_normal2ev (Array2Df *A, Array2Df *T, bool multithread)
780 {
781 int width = A->getCols();
782 int height = A->getRows();
783 assert ((int)T->getCols() == width && (int)T->getRows() == height);
784
785 // executes 2d discrete cosine transform
786 fftwf_plan p;
787 p = fftwf_plan_r2r_2d (height, width, A->data(), T->data(),
788 FFTW_REDFT00, FFTW_REDFT00, FFTW_ESTIMATE);
789 fftwf_execute (p);
790 fftwf_destroy_plan (p);
791
792 // need to scale the output matrix to get the right transform
793 float factor = (1.0f / ((height - 1) * (width - 1)));
794 #ifdef _OPENMP
795 #pragma omp parallel for if(multithread)
796 #endif
797
798 for (int y = 0 ; y < height ; y++ )
799 for (int x = 0 ; x < width ; x++ ) {
800 (*T) (x, y) *= factor;
801 }
802
803 for (int x = 0 ; x < width ; x++ ) {
804 (*T) (x, 0) *= 0.5f;
805 (*T) (x, height - 1) *= 0.5f;
806 }
807
808 for (int y = 0 ; y < height ; y++ ) {
809 (*T) (0, y) *= 0.5f;
810 (*T) (width - 1, y) *= 0.5f;
811 }
812 }
813
814 // returns the eigenvalues of the 1d laplace operator
get_lambda(int n)815 std::vector<double> get_lambda (int n)
816 {
817 assert (n > 1);
818 std::vector<double> v (n);
819
820 for (int i = 0; i < n; i++) {
821 v[i] = -4.0 * SQR (sin ((double)i / (2 * (n - 1)) * RT_PI));
822 }
823
824 return v;
825 }
826
827 // // makes boundary conditions compatible so that a solution exists
828 // void make_compatible_boundary(Array2Df *F)
829 // {
830 // int width = F->getCols();
831 // int height = F->getRows();
832
833 // double sum=0.0;
834 // for(int y=1 ; y<height-1 ; y++ )
835 // for(int x=1 ; x<width-1 ; x++ )
836 // sum+=(*F)(x,y);
837
838 // for(int x=1 ; x<width-1 ; x++ )
839 // sum+=0.5*((*F)(x,0)+(*F)(x,height-1));
840
841 // for(int y=1 ; y<height-1 ; y++ )
842 // sum+=0.5*((*F)(0,y)+(*F)(width-1,y));
843
844 // sum+=0.25*((*F)(0,0)+(*F)(0,height-1)+(*F)(width-1,0)+(*F)(width-1,height-1));
845
846 // //DEBUG_STR << "compatible_boundary: int F = " << sum ;
847 // //DEBUG_STR << " (should be 0 to be solvable)" << std::endl;
848
849 // double add=-sum/(height+width-3);
850 // //DEBUG_STR << "compatible_boundary: adjusting boundary by " << add << std::endl;
851 // for(int x=0 ; x<width ; x++ )
852 // {
853 // (*F)(x,0)+=add;
854 // (*F)(x,height-1)+=add;
855 // }
856 // for(int y=1 ; y<height-1 ; y++ )
857 // {
858 // (*F)(0,y)+=add;
859 // (*F)(width-1,y)+=add;
860 // }
861 // }
862
863
864
865 // solves Laplace U = F with Neumann boundary conditions
866 // if adjust_bound is true then boundary values in F are modified so that
867 // the equation has a solution, if adjust_bound is set to false then F is
868 // not modified and the equation might not have a solution but an
869 // approximate solution with a minimum error is then calculated
870 // double precision version
solve_pde_fft(Array2Df * F,Array2Df * U,Array2Df * buf,bool multithread)871 void solve_pde_fft (Array2Df *F, Array2Df *U, Array2Df *buf, bool multithread)/*, pfs::Progress &ph,
872 bool adjust_bound)*/
873 {
874 // ph.setValue(20);
875 //DEBUG_STR << "solve_pde_fft: solving Laplace U = F ..." << std::endl;
876 int width = F->getCols();
877 int height = F->getRows();
878 assert ((int)U->getCols() == width && (int)U->getRows() == height);
879 assert (buf->getCols() == width && buf->getRows() == height);
880
881 // activate parallel execution of fft routines
882 #ifdef RT_FFTW3F_OMP
883
884 if (multithread) {
885 fftwf_init_threads();
886 fftwf_plan_with_nthreads ( omp_get_max_threads() );
887 }
888
889 // #else
890 // fftwf_plan_with_nthreads( 2 );
891 #endif
892
893 // in general there might not be a solution to the Poisson pde
894 // with Neumann boundary conditions unless the boundary satisfies
895 // an integral condition, this function modifies the boundary so that
896 // the condition is exactly satisfied
897 // if(adjust_bound)
898 // {
899 // //DEBUG_STR << "solve_pde_fft: checking boundary conditions" << std::endl;
900 // make_compatible_boundary(F);
901 // }
902
903 // transforms F into eigenvector space: Ftr =
904 //DEBUG_STR << "solve_pde_fft: transform F to ev space (fft)" << std::endl;
905 Array2Df* F_tr = buf;
906 transform_normal2ev (F, F_tr, multithread);
907 // TODO: F no longer needed so could release memory, but as it is an
908 // input parameter we won't do that
909
910
911 // in the eigenvector space the solution is very simple
912 std::vector<double> l1 = get_lambda (height);
913 std::vector<double> l2 = get_lambda (width);
914
915 #ifdef _OPENMP
916 #pragma omp parallel for if(multithread)
917 #endif
918
919 for (int y = 0 ; y < height ; y++ ) {
920 for (int x = 0 ; x < width ; x++ ) {
921 (*F_tr) (x, y) = (*F_tr) (x, y) / (l1[y] + l2[x]);
922 }
923 }
924
925 (*F_tr) (0, 0) = 0.f; // any value ok, only adds a const to the solution
926
927 // transforms F_tr back to the normal space
928 transform_ev2normal (F_tr, U, multithread);
929
930 // the solution U as calculated will satisfy something like int U = 0
931 // since for any constant c, U-c is also a solution and we are mainly
932 // working in the logspace of (0,1) data we prefer to have
933 // a solution which has no positive values: U_new(x,y)=U(x,y)-max
934 // (not really needed but good for numerics as we later take exp(U))
935 //DEBUG_STR << "solve_pde_fft: removing constant from solution" << std::endl;
936 // float max = 0.f;
937 // #ifdef _OPENMP
938 // #pragma omp parallel for reduction(max:max) if(multithread)
939 // #endif
940
941 // for (int i = 0; i < width * height; i++) {
942 // max = std::max (max, (*U) (i));
943 // }
944
945 // #ifdef _OPENMP
946 // #pragma omp parallel for if(multithread)
947 // #endif
948
949 // for (int i = 0; i < width * height; i++) {
950 // (*U) (i) -= max;
951 // }
952 }
953
954
955 // ---------------------------------------------------------------------
956 // the functions below are only for test purposes to check the accuracy
957 // of the pde solvers
958
959
960 // // returns the norm of (Laplace U - F) of all interior points
961 // // useful to compare solvers
962 // float residual_pde(Array2Df* U, Array2Df* F)
963 // {
964 // int width = U->getCols();
965 // int height = U->getRows();
966 // assert((int)F->getCols()==width && (int)F->getRows()==height);
967
968 // double res=0.0;
969 // for(int y=1;y<height-1;y++)
970 // for(int x=1;x<width-1;x++)
971 // {
972 // double laplace=-4.0*(*U)(x,y)+(*U)(x-1,y)+(*U)(x+1,y)
973 // +(*U)(x,y-1)+(*U)(x,y+1);
974 // res += SQR( laplace-(*F)(x,y) );
975 // }
976 // return static_cast<float>( sqrt(res) );
977 // }
978
979
980 /*****************************************************************************
981 * RT code from here on
982 *****************************************************************************/
983
rescale_bilinear(const Array2Df & src,Array2Df & dst,bool multithread)984 inline void rescale_bilinear (const Array2Df &src, Array2Df &dst, bool multithread)
985 {
986 rescaleBilinear(src, dst, multithread);
987 }
988
rescale_nearest(const Array2Df & src,Array2Df & dst,bool multithread)989 inline void rescale_nearest (const Array2Df &src, Array2Df &dst, bool multithread)
990 {
991 rescaleNearest(src, dst, multithread);
992 }
993
994
luminance(float r,float g,float b,TMatrix ws)995 inline float luminance (float r, float g, float b, TMatrix ws)
996 {
997 return Color::rgbLuminance(r, g, b, ws);
998 }
999
1000
round_up_pow2(int dim)1001 inline int round_up_pow2 (int dim)
1002 {
1003 // from https://graphics.stanford.edu/~seander/bithacks.html
1004 assert (dim > 0);
1005 unsigned int v = dim;
1006 v--;
1007 v |= v >> 1;
1008 v |= v >> 2;
1009 v |= v >> 4;
1010 v |= v >> 8;
1011 v |= v >> 16;
1012 v++;
1013 return v;
1014 }
1015
find_fast_dim(int dim)1016 inline int find_fast_dim (int dim)
1017 {
1018 // as per the FFTW docs:
1019 //
1020 // FFTW is generally best at handling sizes of the form
1021 // 2^a 3^b 5^c 7^d 11^e 13^f,
1022 // where e+f is either 0 or 1.
1023 //
1024 // Here, we try to round up to the nearest dim that can be expressed in
1025 // the above form. This is not exhaustive, but should be ok for pictures
1026 // up to 100MPix at least
1027
1028 int d1 = round_up_pow2 (dim);
1029 std::vector<int> d = {
1030 d1 / 128 * 65,
1031 d1 / 64 * 33,
1032 d1 / 512 * 273,
1033 d1 / 16 * 9,
1034 d1 / 8 * 5,
1035 d1 / 16 * 11,
1036 d1 / 128 * 91,
1037 d1 / 4 * 3,
1038 d1 / 64 * 49,
1039 d1 / 16 * 13,
1040 d1 / 8 * 7,
1041 d1
1042 };
1043
1044 for (size_t i = 0; i < d.size(); ++i) {
1045 if (d[i] >= dim) {
1046 return d[i];
1047 }
1048 }
1049
1050 assert (false);
1051 return dim;
1052 }
1053
1054
ToneMapFattal02(Imagefloat * rgb,ImProcFunctions * ipf,const ProcParams * params,bool multiThread)1055 void ToneMapFattal02(Imagefloat *rgb, ImProcFunctions *ipf, const ProcParams *params, bool multiThread)
1056 {
1057 // BENCHFUN
1058 const int detail_level = 3;
1059
1060 float alpha = 1.f;
1061 if (params->fattal.threshold < 0) {
1062 alpha += (params->fattal.threshold * 0.9f) / 100.f;
1063 } else if (params->fattal.threshold > 0) {
1064 alpha += params->fattal.threshold / 100.f;
1065 }
1066
1067 float beta = 1.f - (params->fattal.amount * 0.3f) / 100.f;
1068
1069 // sanity check
1070 if (alpha <= 0 || beta <= 0) {
1071 return;
1072 }
1073
1074 int w = rgb->getWidth();
1075 int h = rgb->getHeight();
1076
1077 Array2Df Yr (w, h);
1078
1079 constexpr float epsilon = 1e-4f;
1080 constexpr float luminance_noise_floor = 65.535f;
1081 constexpr float min_luminance = 1.f;
1082
1083 TMatrix ws = ICCStore::getInstance()->workingSpaceMatrix (params->icm.workingProfile);
1084
1085 #ifdef _OPENMP
1086 #pragma omp parallel for if(multiThread)
1087 #endif
1088 for (int y = 0; y < h; y++) {
1089 for (int x = 0; x < w; x++) {
1090 Yr (x, y) = std::max (luminance (rgb->r (y, x), rgb->g (y, x), rgb->b (y, x), ws), min_luminance); // clip really black pixels
1091 }
1092 }
1093
1094 // median filter on the deep shadows, to avoid boosting noise
1095 // because w2 >= w and h2 >= h, we can use the L buffer as temporary buffer for Median_Denoise()
1096 int w2 = find_fast_dim (w) + 1;
1097 int h2 = find_fast_dim (h) + 1;
1098 Array2Df L (w2, h2);
1099 {
1100 #ifdef _OPENMP
1101 int num_threads = multiThread ? omp_get_max_threads() : 1;
1102 #else
1103 int num_threads = 1;
1104 #endif
1105 float r = float (std::max (w, h)) / float (RT_dimension_cap);
1106 denoise::Median med;
1107
1108 if (r >= 3) {
1109 med = denoise::Median::TYPE_7X7;
1110 } else if (r >= 2) {
1111 med = denoise::Median::TYPE_5X5_STRONG;
1112 } else if (r >= 1) {
1113 med = denoise::Median::TYPE_5X5_SOFT;
1114 } else {
1115 med = denoise::Median::TYPE_3X3_STRONG;
1116 }
1117
1118 denoise::Median_Denoise(Yr, Yr, luminance_noise_floor, w, h, med, 1, num_threads, L);
1119 }
1120
1121 float noise = alpha * 0.01f;
1122
1123 if (settings->verbose) {
1124 std::cout << "ToneMapFattal02: alpha = " << alpha << ", beta = " << beta
1125 << ", detail_level = " << detail_level << std::endl;
1126 }
1127
1128 rescale_nearest (Yr, L, multiThread);
1129 tmo_fattal02 (w2, h2, L, L, alpha, beta, noise, detail_level, multiThread);
1130
1131 const float hr = float(h2) / float(h);
1132 const float wr = float(w2) / float(w);
1133
1134 float scale = 65535.f;
1135 float offset = 0.f;
1136 {
1137 float ratio = 0.f;
1138 int ww, hh;
1139 if (w >= h) {
1140 ratio = 200.f / w;
1141 ww = 200;
1142 hh = ratio * h;
1143 } else {
1144 ratio = 200.f / h;
1145 hh = 200;
1146 ww = ratio * w;
1147 }
1148 Array2Df tmp(ww, hh);
1149 int sz = ww * hh;
1150 int idx = sz / 2;
1151 int oidx = LIM(int(sz * 0.05f + 0.5f), 1, sz-1);
1152 rescale_nearest(Yr, tmp, multiThread);
1153 std::sort(tmp.data(), tmp.data() + sz);
1154 float oldMedian = tmp(idx);
1155 float old_min = 0.f;
1156 for (int i = 0; i <= oidx; ++i) {
1157 old_min += tmp(i);
1158 }
1159 old_min /= oidx;
1160 rescale_nearest(L, tmp, multiThread);
1161 std::sort(tmp.data(), tmp.data() + sz);
1162 float newMedian = tmp(idx);
1163 scale = (oldMedian == 0.f || newMedian == 0.f) ? 65535.f : (oldMedian / newMedian); // avoid Nan
1164 float new_min = 0.f;
1165 for (int i = 0; i <= oidx; ++i) {
1166 new_min += tmp(i);
1167 }
1168 new_min /= oidx;
1169 offset = old_min - new_min;
1170 }
1171
1172 const bool satcontrol = params->fattal.satcontrol;
1173
1174 #ifdef _OPENMP
1175 #pragma omp parallel for schedule(dynamic,16) if(multiThread)
1176 #endif
1177 for (int y = 0; y < h; y++) {
1178 int yy = std::min(int(y * hr + 1), h2-1);
1179
1180 for (int x = 0; x < w; x++) {
1181 int xx = std::min(int(x * wr + 1), w2-1);
1182
1183 float Y = std::max(Yr(x, y), epsilon);
1184 float l = std::max(L(xx, yy), epsilon) * (scale / Y);
1185 float &r = rgb->r(y, x);
1186 float &g = rgb->g(y, x);
1187 float &b = rgb->b(y, x);
1188 float s = 1.f;
1189 if (l > 1.f) {
1190 r = max(r * l - offset, r);
1191 g = max(g * l - offset, g);
1192 b = max(b * l - offset, b);
1193 if (satcontrol) {
1194 s = pow_F(1.f / l, 0.3f);
1195 }
1196 } else {
1197 r *= l;
1198 g *= l;
1199 b *= l;
1200 if (satcontrol) {
1201 s = pow_F(l, 0.3f);
1202 }
1203 }
1204 if (satcontrol && s != 1.f) {
1205 float ll = luminance(r, g, b, ws);
1206 float rl = r - ll;
1207 float gl = g - ll;
1208 float bl = b - ll;
1209 r = ll + s * rl;
1210 g = ll + s * gl;
1211 b = ll + s * bl;
1212 }
1213
1214 assert(std::isfinite(rgb->r(y, x)));
1215 assert(std::isfinite(rgb->g(y, x)));
1216 assert(std::isfinite(rgb->b(y, x)));
1217 }
1218 }
1219 }
1220
1221 } // namespace
1222
1223
dynamicRangeCompression(Imagefloat * rgb)1224 void ImProcFunctions::dynamicRangeCompression(Imagefloat *rgb)
1225 {
1226 if (params->fattal.enabled) {
1227 ToneMapFattal02(rgb, this, params, multiThread);
1228 }
1229 }
1230
1231
buildGradientsMask(int W,int H,float ** luminance,float ** out,float amount,int nlevels,int detail_level,float alfa,float beta,bool multithread)1232 void buildGradientsMask(int W, int H, float **luminance, float **out,
1233 float amount, int nlevels, int detail_level,
1234 float alfa, float beta, bool multithread)
1235 {
1236 Array2Df Y(W, H, luminance);
1237 const float noise = alfa * 0.01f;
1238
1239 Array2Df *pyramids[nlevels];
1240 pyramids[0] = &Y;
1241 createGaussianPyramids(pyramids, nlevels, multithread);
1242
1243 // calculate gradients and its average values on pyramid levels
1244 Array2Df *gradients[nlevels];
1245 float avgGrad[nlevels];
1246
1247 for (int k = 0 ; k < nlevels ; k++) {
1248 gradients[k] = new Array2Df(pyramids[k]->getCols(), pyramids[k]->getRows());
1249 avgGrad[k] = calculateGradients(pyramids[k], gradients[k], k, multithread);
1250 if (k != 0) { // pyramids[0] is Y
1251 delete pyramids[k];
1252 }
1253 }
1254
1255
1256 // calculate fi matrix
1257 Array2Df FI(W, H, out);
1258 calculateFiMatrix(&FI, gradients, avgGrad, nlevels, detail_level, alfa, beta, noise, multithread);
1259
1260 for (int i = 0 ; i < nlevels ; i++) {
1261 delete gradients[i];
1262 }
1263
1264 // rescale the mask
1265 float m = out[0][0];
1266 #ifdef _OPENMP
1267 # pragma omp parallel for reduction(max:m) if (multithread)
1268 #endif
1269 for (int y = 0; y < H; ++y) {
1270 for (int x = 0; x < W; ++x) {
1271 float v = std::abs(out[y][x]);
1272 out[y][x] = v;
1273 m = std::max(v, m);
1274 }
1275 }
1276 if (m > 0.f) {
1277 const float f = amount / m;
1278 #ifdef _OPENMP
1279 # pragma omp parallel for reduction(max:m) if (multithread)
1280 #endif
1281 for (int y = 0; y < H; ++y) {
1282 for (int x = 0; x < W; ++x) {
1283 out[y][x] *= f;
1284 }
1285 }
1286 }
1287
1288 // {
1289 // Imagefloat tmp(W, H);
1290 // for (int y = 0; y < H; ++y) {
1291 // for (int x = 0; x < W; ++x) {
1292 // tmp.r(y, x) = tmp.g(y, x) = tmp.b(y, x) = out[y][x] * 65535.f;
1293 // }
1294 // }
1295 // std::ostringstream name;
1296 // name << "/tmp/FI-" << W << "x" << H << ".tif";
1297 // tmp.saveAsTIFF(name.str(), 16);
1298 // }
1299 }
1300
1301
1302 } // namespace rtengine
1303