1 /**
2  * @file tmo_fattal02.cpp
3  * @brief TMO: Gradient Domain High Dynamic Range Compression
4  *
5  * Implementation of Gradient Domain High Dynamic Range Compression
6  * by Raanan Fattal, Dani Lischinski, Michael Werman.
7  *
8  * @author Grzegorz Krawczyk, <krawczyk@mpi-sb.mpg.de>
9  *
10  *
11  * This file is a part of LuminanceHDR package, based on pfstmo.
12  * ----------------------------------------------------------------------
13  * Copyright (C) 2003,2004 Grzegorz Krawczyk
14  *
15  *  This program is free software; you can redistribute it and/or modify
16  *  it under the terms of the GNU General Public License as published by
17  *  the Free Software Foundation; either version 2 of the License, or
18  *  (at your option) any later version.
19  *
20  *  This program is distributed in the hope that it will be useful,
21  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
22  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23  *  GNU General Public License for more details.
24  *
25  *  You should have received a copy of the GNU General Public License
26  *  along with this program; if not, write to the Free Software
27  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
28  * ----------------------------------------------------------------------
29  *
30  * $Id: tmo_fattal02.cpp,v 1.3 2008/11/04 23:43:08 rafm Exp $
31  */
32 
33 #include <algorithm>
34 #include <cstdio>
35 #include <iostream>
36 #include <iterator>
37 #include <vector>
38 
39 #include <assert.h>
40 #include <math.h>
41 
42 #include "Libpfs/array2d.h"
43 #include "Libpfs/rt_algo.h"
44 #include "Libpfs/progress.h"
45 #include "Libpfs/utils/msec_timer.h"
46 #include "TonemappingOperators/pfstmo.h"
47 #include "../../sleef.c"
48 #ifdef _OPENMP
49 #include <omp.h>
50 #endif
51 #include "opthelper.h"
52 
53 #include "pde.h"
54 #include "tmo_fattal02.h"
55 
56 using namespace std;
57 using namespace pfs;
58 using namespace utils;
59 
downSample(const pfs::Array2Df & A,pfs::Array2Df & B)60 void downSample(const pfs::Array2Df &A, pfs::Array2Df &B) {
61 
62     const int width = B.getCols();
63     const int height = B.getRows();
64 
65     // Note, I've uncommented all omp directives. They are all ok but are
66     // applied to too small problems and in total don't lead to noticable
67     // speed improvements. The main issue is the pde solver and in case of the
68     // fft solver uses optimised threaded fftw routines.
69     // Note from Ingo Weyrich. The above statement is wrong. There are some very expensive loops (exp, log, pow) which got a really good speedup by parallelized and vectorized code.
70     //#pragma omp parallel for
71     for (int y = 0; y < height; y++) {
72         for (int x = 0; x < width; x++) {
73             float p = A(2 * x, 2 * y);
74             p += A(2 * x + 1, 2 * y);
75             p += A(2 * x, 2 * y + 1);
76             p += A(2 * x + 1, 2 * y + 1);
77             B(x, y) = p * 0.25f;  // p / 4.0f;
78         }
79     }
80 }
81 
gaussianBlur(const pfs::Array2Df & I,pfs::Array2Df & L)82 void gaussianBlur(const pfs::Array2Df &I, pfs::Array2Df &L) {
83     const int width = I.getCols();
84     const int height = I.getRows();
85 
86     if (width < 3 || height < 3) {
87         if (&I != &L) {
88             for (int i = 0, n = width * height; i < n; ++i) {
89                 L (i) = I (i);
90             }
91         }
92 
93         return;
94     }
95 
96     pfs::Array2Df T(width, height);
97 
98     //--- X blur
99     #pragma omp parallel for
100 
101     for ( int y = 0 ; y < height ; y++ ) {
102         for ( int x = 1 ; x < width - 1 ; x++ ) {
103             float t = 2.f * I (x, y);
104             t += I (x - 1, y);
105             t += I (x + 1, y);
106             T (x, y) = t * 0.25f; // t / 4.f;
107         }
108 
109         T (0, y) = ( 3.f * I (0, y) + I (1, y) ) * 0.25f; // / 4.f;
110         T (width - 1, y) = ( 3.f * I (width - 1, y) + I (width - 2, y) ) * 0.25f; // / 4.f;
111     }
112 
113     //--- Y blur
114     #pragma omp parallel for
115 
116     for ( int x = 0 ; x < width - 7 ; x += 8 ) {
117         for ( int y = 1 ; y < height - 1 ; y++ ) {
118             for (int xx = 0; xx < 8; ++xx) {
119                 float t = 2.f * T (x + xx, y);
120                 t += T (x + xx, y - 1);
121                 t += T (x + xx, y + 1);
122                 L (x + xx, y) = t * 0.25f; // t/4.0f;
123             }
124         }
125 
126         for (int xx = 0; xx < 8; ++xx) {
127             L (x + xx, 0) = ( 3.f * T (x + xx, 0) + T (x + xx, 1) ) * 0.25f; // / 4.0f;
128             L (x + xx, height - 1) = ( 3.f * T (x + xx, height - 1) + T (x + xx, height - 2) ) * 0.25f; // / 4.0f;
129         }
130     }
131 
132     for ( int x = width - (width % 8) ; x < width ; x++ ) {
133         for ( int y = 1 ; y < height - 1 ; y++ ) {
134             float t = 2.f * T (x, y);
135             t += T (x, y - 1);
136             t += T (x, y + 1);
137             L (x, y) = t * 0.25f; // t/4.0f;
138         }
139 
140         L (x, 0) = ( 3.f * T (x, 0) + T (x, 1) ) * 0.25f; // / 4.0f;
141         L (x, height - 1) = ( 3.f * T (x, height - 1) + T (x, height - 2) ) * 0.25f; // / 4.0f;
142     }
143 }
144 
createGaussianPyramids(pfs::Array2Df & H,pfs::Array2Df ** pyramids,int nlevels)145 void createGaussianPyramids(pfs::Array2Df &H, pfs::Array2Df **pyramids,
146                             int nlevels) {
147 
148     int width = H.getCols();
149     int height = H.getRows();
150 
151     pfs::Array2Df *L = new pfs::Array2Df(width, height);
152     gaussianBlur(*pyramids[0], *L);
153 
154     for (int k = 1; k < nlevels; k++) {
155         width /= 2;
156         height /= 2;
157         pyramids[k] = new pfs::Array2Df(width, height);
158         downSample(*L, *pyramids[k]);
159         if(k < nlevels - 1) {
160             delete L;
161             L = new pfs::Array2Df(width, height);
162             gaussianBlur(*pyramids[k], *L);
163         }
164     }
165     delete L;
166 }
167 
168 //--------------------------------------------------------------------
169 
calculateGradients(pfs::Array2Df & H,pfs::Array2Df & G,int k)170 float calculateGradients(pfs::Array2Df &H, pfs::Array2Df &G, int k) {
171 
172     const int width = H.getCols();
173     const int height = H.getRows();
174     const float divider = pow(2.0f, k + 1);
175     double avgGrad = 0.0f; // use double precision for large summations
176 
177     #pragma omp parallel for reduction(+:avgGrad)
178     for (int y = 0; y < height; y++) {
179         for (int x = 0; x < width; x++) {
180             float gx, gy;
181             int w, n, e, s;
182             w = (x == 0 ? 0 : x - 1);
183             n = (y == 0 ? 0 : y - 1);
184             s = (y + 1 == height ? y : y + 1);
185             e = (x + 1 == width ? x : x + 1);
186 
187             gx = (H(w, y) - H(e, y)) / divider;
188 
189             gy = (H(x, s) - H(x, n)) / divider;
190             // note this implicitly assumes that H(-1)=H(0)
191             // for the fft-pde slover this would need adjustment as H(-1)=H(1)
192             // is assumed, which means gx=0.0, gy=0.0 at the boundaries
193             // however, the impact is not visible so we ignore this here
194 
195             G(x, y) = sqrt(gx * gx + gy * gy);
196             avgGrad += G(x, y);
197         }
198     }
199 
200     return avgGrad / (width * height);
201 }
202 
203 //--------------------------------------------------------------------
204 
upSample(const pfs::Array2Df & A,pfs::Array2Df & B)205 void upSample(const pfs::Array2Df &A, pfs::Array2Df &B) {
206 
207     const int width = B.getCols();
208     const int height = B.getRows();
209     const int awidth = A.getCols();
210     const int aheight = A.getRows();
211 
212     #pragma omp parallel for
213     for (int y = 0; y < height; y++) {
214         for (int x = 0; x < width; x++) {
215             int ax = static_cast<int>(x * 0.5f);  // x / 2.f;
216             int ay = static_cast<int>(y * 0.5f);  // y / 2.f;
217             ax = (ax < awidth) ? ax : awidth - 1;
218             ay = (ay < aheight) ? ay : aheight - 1;
219 
220             B(x, y) = A(ax, ay);
221         }
222     }
223 }
224 
calculateFiMatrix(pfs::Array2Df & FI,pfs::Array2Df * gradients[],float avgGrad[],int nlevels,int detail_level,float alfa,float beta,float noise,bool newfattal)225 void calculateFiMatrix(pfs::Array2Df &FI, pfs::Array2Df *gradients[],
226                        float avgGrad[], int nlevels, int detail_level,
227                        float alfa, float beta, float noise, bool newfattal) {
228 
229     int width = gradients[nlevels - 1]->getCols();
230     int height = gradients[nlevels - 1]->getRows();
231     pfs::Array2Df **fi = new pfs::Array2Df *[nlevels];
232 
233     fi[nlevels - 1] = new pfs::Array2Df(width, height);
234     if (newfattal) {
235         for (int k = 0; k < width * height; k++) {
236             (*fi[nlevels - 1])(k) = 1.0f;
237         }
238     }
239 
240     for (int k = nlevels - 1; k >= 0; k--) {
241         width = gradients[k]->getCols();
242         height = gradients[k]->getRows();
243 
244         // only apply gradients to levels>=detail_level but at least to the
245         // coarsest
246         if (k >= detail_level || k == nlevels - 1 || newfattal == false) {
247             #pragma omp parallel for
248             for (int y = 0; y < height; y++) {
249                 for (int x = 0; x < width; x++) {
250                     float grad = ((*gradients[k])(x, y) < 1e-4f)
251                                      ? 1e-4
252                                      : (*gradients[k])(x, y);
253                     float a = alfa * avgGrad[k];
254 
255                     float value = powf((grad + noise) / a, beta - 1.0f);
256 
257                     if (newfattal)
258                         (*fi[k])(x, y) *= value;
259                     else
260                         (*fi[k])(x, y) = value;
261                 }
262             }
263         }
264 
265         // create next level
266         if (k > 1) {
267             width = gradients[k - 1]->getCols();
268             height = gradients[k - 1]->getRows();
269             fi[k - 1] = new pfs::Array2Df(width, height);
270         } else
271             fi[0] = &FI;  // highest level -> result
272 
273         if (k > 0 && newfattal) {
274             upSample(*fi[k], *fi[k - 1]);  // upsample to next level
275             gaussianBlur(*fi[k - 1], *fi[k - 1]);
276         }
277     }
278 
279     for (int k = 1; k < nlevels; k++) {
280         delete fi[k];
281     }
282     delete[] fi;
283 }
284 
tmo_fattal02(size_t width,size_t height,const pfs::Array2Df & Y,pfs::Array2Df & L,float alfa,float beta,float noise,bool newfattal,bool fftsolver,int detail_level,pfs::Progress & ph)285 void tmo_fattal02(size_t width, size_t height, const pfs::Array2Df &Y,
286                   pfs::Array2Df &L, float alfa, float beta, float noise,
287                   bool newfattal, bool fftsolver, int detail_level,
288                   pfs::Progress &ph) {
289 #ifdef TIMER_PROFILING
290     msec_timer stop_watch;
291     stop_watch.start();
292 #endif
293     static const float black_point = 0.1f;
294     static const float white_point = 0.5f;
295     static const float gamma = 1.0f;  // 0.8f;
296     // static const int   detail_level = 3;
297     if (detail_level < 0) detail_level = 0;
298     if (detail_level > 3) detail_level = 3;
299 
300     ph.setValue(2);
301     if (ph.canceled()) return;
302 
303     int MSIZE = 32;  // minimum size of gaussian pyramid
304     // I believe a smaller value than 32 results in slightly better overall
305     // quality but I'm only applying this if the newly implemented fft solver
306     // is used in order not to change behaviour of the old version
307     // TODO: best let the user decide this value
308     if (fftsolver) {
309         MSIZE = 8;
310     }
311 
312     size_t size = width * height;
313 
314     // find max & min values, normalize to range 0..100 and take logarithm
315     float minLum = Y(0, 0);
316     float maxLum = Y(0, 0);
317 
318     for (size_t i = 0; i < size; i++) {
319         minLum = (Y(i) < minLum) ? Y(i) : minLum;
320         maxLum = (Y(i) > maxLum) ? Y(i) : maxLum;
321     }
322 
323     pfs::Array2Df H(width, height);
324 
325 #ifdef __SSE2__
326     const vfloat maxLumv = F2V(maxLum);
327     const vfloat c100v = F2V(100.f);
328     const vfloat epsv = F2V(1e-4f);
329 #endif
330         #pragma omp parallel for
331         for (size_t i = 0; i < height; ++i) {
332             size_t j = 0;
333 #ifdef __SSE2__
334             for (; j < width - 3; j += 4) {
335                 STVFU(H(j, i), xlogf(c100v * LVFU(Y(j, i)) / maxLumv + epsv));
336             }
337 #endif
338             for (; j < width; ++j) {
339                 H(j, i) = xlogf(100.0f * Y(j, i) / maxLum + 1e-4f);
340             }
341         }
342 
343     ph.setValue(4);
344 
345     // create gaussian pyramids
346     int mins = (width < height) ? width : height;  // smaller dimension
347     int nlevels = 0;
348     while (mins >= MSIZE) {
349         nlevels++;
350         mins /= 2;
351     }
352 
353     // The following lines solves a bug with images particularly small
354     if (nlevels == 0) nlevels = 1;
355 
356     pfs::Array2Df **pyramids = new pfs::Array2Df *[nlevels];
357     pyramids[0] = &H;
358     createGaussianPyramids(H, pyramids, nlevels);
359     ph.setValue(8);
360 
361     // calculate gradients and its average values on pyramid levels
362     pfs::Array2Df **gradients = new pfs::Array2Df *[nlevels];
363     float avgGrad[nlevels];
364     for (int k = 0; k < nlevels; k++) {
365         gradients[k] = new pfs::Array2Df(pyramids[k]->getCols(), pyramids[k]->getRows());
366         avgGrad[k] = calculateGradients(*pyramids[k], *gradients[k], k);
367     }
368     ph.setValue(12);
369 
370     // calculate fi matrix
371     pfs::Array2Df FI(width, height);
372     calculateFiMatrix(FI, gradients, avgGrad, nlevels, detail_level, alfa, beta,
373                       noise, newfattal);
374 
375     for (int i = 0; i < nlevels; i++) {
376         if(i != 0) // pyramids[0] is H. Will be deleted later
377             delete pyramids[i];
378         delete gradients[i];
379     }
380     delete[] pyramids;
381     delete[] gradients;
382     ph.setValue(16);
383     if (ph.canceled()) {
384         return;
385     }
386 
387     // attenuate gradients
388     pfs::Array2Df Gx(width, height);
389     pfs::Array2Df Gy(width, height);
390 
391     // the fft solver solves the Poisson pde but with slightly different
392     // boundary conditions, so we need to adjust the assembly of the right hand
393     // side accordingly (basically fft solver assumes U(-1) = U(1), whereas zero
394     // Neumann conditions assume U(-1)=U(0)), see also divergence calculation
395 
396     if (fftsolver)
397         #pragma omp parallel for
398         for (size_t y = 0; y < height; y++)
399             for (size_t x = 0; x < width; x++) {
400                 // sets index+1 based on the boundary assumption H(N+1)=H(N-1)
401                 unsigned int yp1 = (y + 1 >= height ? height - 2 : y + 1);
402                 unsigned int xp1 = (x + 1 >= width ? width - 2 : x + 1);
403                 // forward differences in H, so need to use between-points
404                 // approx of FI
405                 Gx(x, y) =
406                     (H(xp1, y) - H(x, y)) * 0.5 * (FI(xp1, y) + FI(x, y));
407                 Gy(x, y) =
408                     (H(x, yp1) - H(x, y)) * 0.5 * (FI(x, yp1) + FI(x, y));
409             }
410     else
411         #pragma omp parallel for
412         for (size_t y = 0; y < height; y++)
413             for (size_t x = 0; x < width; x++) {
414                 int s, e;
415                 s = (y + 1 == height ? y : y + 1);
416                 e = (x + 1 == width ? x : x + 1);
417 
418                 Gx(x, y) = (H(e, y) - H(x, y)) * FI(x, y);
419                 Gy(x, y) = (H(x, s) - H(x, y)) * FI(x, y);
420             }
421 
422     ph.setValue(18);
423 
424     // calculate divergence
425 
426     pfs::Array2Df DivG(width, height);
427     #pragma omp parallel for
428     for (size_t y = 0; y < height; ++y) {
429         for (size_t x = 0; x < width; ++x) {
430             DivG(x, y) = Gx(x, y) + Gy(x, y);
431             if (x > 0) DivG(x, y) -= Gx(x - 1, y);
432             if (y > 0) DivG(x, y) -= Gy(x, y - 1);
433 
434             if (fftsolver) {
435                 if (x == 0) DivG(x, y) += Gx(x, y);
436                 if (y == 0) DivG(x, y) += Gy(x, y);
437             }
438         }
439     }
440 
441     ph.setValue(20);
442     if (ph.canceled()) {
443         return;
444     }
445 
446     // solve pde and exponentiate (ie recover compressed image)
447     {
448         pfs::Array2Df U(width, height);
449         if (fftsolver) {
450             solve_pde_fft(DivG, U, Gx, ph);
451         } else {
452             solve_pde_multigrid(&DivG, &U, ph);
453         }
454 #ifndef NDEBUG
455         printf("\npde residual error: %f\n", residual_pde(U, DivG));
456 #endif
457         ph.setValue(90);
458         if (ph.canceled()) {
459             return;
460         }
461 
462 #ifdef __SSE2__
463         const vfloat gammav = F2V(gamma);
464 #endif
465         #pragma omp parallel for
466         for (size_t i = 0; i < height; ++i) {
467             size_t j = 0;
468 #ifdef __SSE2__
469             for (; j < width - 3; j += 4) {
470                 STVFU(L(j, i), xexpf(gammav * LVFU(U(j, i))));
471             }
472 #endif
473             for (; j < width; ++j) {
474                 L(j, i) = xexpf(gamma * U(j, i));
475             }
476         }
477     }
478     ph.setValue(95);
479 
480     // remove percentile of min and max values and renormalize
481     float cut_min = 0.01f * black_point;
482     float cut_max = 1.0f - 0.01f * white_point;
483     assert(cut_min >= 0.0f && (cut_max <= 1.0f) && (cut_min < cut_max));
484     lhdrengine::findMinMaxPercentile(L.data(), width * height, cut_min, minLum, cut_max, maxLum, true);
485 
486     for (size_t idx = 0; idx < height * width; ++idx) {
487         L(idx) = (L(idx) - minLum) / (maxLum - minLum);
488         if (L(idx) <= 0.0f) {
489             L(idx) = 0.0;
490         }
491         // note, we intentionally do not cut off values > 1.0
492     }
493 
494     ph.setValue(96);
495 
496 #ifdef TIMER_PROFILING
497     stop_watch.stop_and_update();
498     cout << endl;
499     cout << "tmo_fattal02 = " << stop_watch.get_time() << " msec" << endl;
500 #endif
501 }
502