1 /**
2 * @file tmo_fattal02.cpp
3 * @brief TMO: Gradient Domain High Dynamic Range Compression
4 *
5 * Implementation of Gradient Domain High Dynamic Range Compression
6 * by Raanan Fattal, Dani Lischinski, Michael Werman.
7 *
8 * @author Grzegorz Krawczyk, <krawczyk@mpi-sb.mpg.de>
9 *
10 *
11 * This file is a part of LuminanceHDR package, based on pfstmo.
12 * ----------------------------------------------------------------------
13 * Copyright (C) 2003,2004 Grzegorz Krawczyk
14 *
15 * This program is free software; you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by
17 * the Free Software Foundation; either version 2 of the License, or
18 * (at your option) any later version.
19 *
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
24 *
25 * You should have received a copy of the GNU General Public License
26 * along with this program; if not, write to the Free Software
27 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
28 * ----------------------------------------------------------------------
29 *
30 * $Id: tmo_fattal02.cpp,v 1.3 2008/11/04 23:43:08 rafm Exp $
31 */
32
33 #include <algorithm>
34 #include <cstdio>
35 #include <iostream>
36 #include <iterator>
37 #include <vector>
38
39 #include <assert.h>
40 #include <math.h>
41
42 #include "Libpfs/array2d.h"
43 #include "Libpfs/rt_algo.h"
44 #include "Libpfs/progress.h"
45 #include "Libpfs/utils/msec_timer.h"
46 #include "TonemappingOperators/pfstmo.h"
47 #include "../../sleef.c"
48 #ifdef _OPENMP
49 #include <omp.h>
50 #endif
51 #include "opthelper.h"
52
53 #include "pde.h"
54 #include "tmo_fattal02.h"
55
56 using namespace std;
57 using namespace pfs;
58 using namespace utils;
59
downSample(const pfs::Array2Df & A,pfs::Array2Df & B)60 void downSample(const pfs::Array2Df &A, pfs::Array2Df &B) {
61
62 const int width = B.getCols();
63 const int height = B.getRows();
64
65 // Note, I've uncommented all omp directives. They are all ok but are
66 // applied to too small problems and in total don't lead to noticable
67 // speed improvements. The main issue is the pde solver and in case of the
68 // fft solver uses optimised threaded fftw routines.
69 // Note from Ingo Weyrich. The above statement is wrong. There are some very expensive loops (exp, log, pow) which got a really good speedup by parallelized and vectorized code.
70 //#pragma omp parallel for
71 for (int y = 0; y < height; y++) {
72 for (int x = 0; x < width; x++) {
73 float p = A(2 * x, 2 * y);
74 p += A(2 * x + 1, 2 * y);
75 p += A(2 * x, 2 * y + 1);
76 p += A(2 * x + 1, 2 * y + 1);
77 B(x, y) = p * 0.25f; // p / 4.0f;
78 }
79 }
80 }
81
gaussianBlur(const pfs::Array2Df & I,pfs::Array2Df & L)82 void gaussianBlur(const pfs::Array2Df &I, pfs::Array2Df &L) {
83 const int width = I.getCols();
84 const int height = I.getRows();
85
86 if (width < 3 || height < 3) {
87 if (&I != &L) {
88 for (int i = 0, n = width * height; i < n; ++i) {
89 L (i) = I (i);
90 }
91 }
92
93 return;
94 }
95
96 pfs::Array2Df T(width, height);
97
98 //--- X blur
99 #pragma omp parallel for
100
101 for ( int y = 0 ; y < height ; y++ ) {
102 for ( int x = 1 ; x < width - 1 ; x++ ) {
103 float t = 2.f * I (x, y);
104 t += I (x - 1, y);
105 t += I (x + 1, y);
106 T (x, y) = t * 0.25f; // t / 4.f;
107 }
108
109 T (0, y) = ( 3.f * I (0, y) + I (1, y) ) * 0.25f; // / 4.f;
110 T (width - 1, y) = ( 3.f * I (width - 1, y) + I (width - 2, y) ) * 0.25f; // / 4.f;
111 }
112
113 //--- Y blur
114 #pragma omp parallel for
115
116 for ( int x = 0 ; x < width - 7 ; x += 8 ) {
117 for ( int y = 1 ; y < height - 1 ; y++ ) {
118 for (int xx = 0; xx < 8; ++xx) {
119 float t = 2.f * T (x + xx, y);
120 t += T (x + xx, y - 1);
121 t += T (x + xx, y + 1);
122 L (x + xx, y) = t * 0.25f; // t/4.0f;
123 }
124 }
125
126 for (int xx = 0; xx < 8; ++xx) {
127 L (x + xx, 0) = ( 3.f * T (x + xx, 0) + T (x + xx, 1) ) * 0.25f; // / 4.0f;
128 L (x + xx, height - 1) = ( 3.f * T (x + xx, height - 1) + T (x + xx, height - 2) ) * 0.25f; // / 4.0f;
129 }
130 }
131
132 for ( int x = width - (width % 8) ; x < width ; x++ ) {
133 for ( int y = 1 ; y < height - 1 ; y++ ) {
134 float t = 2.f * T (x, y);
135 t += T (x, y - 1);
136 t += T (x, y + 1);
137 L (x, y) = t * 0.25f; // t/4.0f;
138 }
139
140 L (x, 0) = ( 3.f * T (x, 0) + T (x, 1) ) * 0.25f; // / 4.0f;
141 L (x, height - 1) = ( 3.f * T (x, height - 1) + T (x, height - 2) ) * 0.25f; // / 4.0f;
142 }
143 }
144
createGaussianPyramids(pfs::Array2Df & H,pfs::Array2Df ** pyramids,int nlevels)145 void createGaussianPyramids(pfs::Array2Df &H, pfs::Array2Df **pyramids,
146 int nlevels) {
147
148 int width = H.getCols();
149 int height = H.getRows();
150
151 pfs::Array2Df *L = new pfs::Array2Df(width, height);
152 gaussianBlur(*pyramids[0], *L);
153
154 for (int k = 1; k < nlevels; k++) {
155 width /= 2;
156 height /= 2;
157 pyramids[k] = new pfs::Array2Df(width, height);
158 downSample(*L, *pyramids[k]);
159 if(k < nlevels - 1) {
160 delete L;
161 L = new pfs::Array2Df(width, height);
162 gaussianBlur(*pyramids[k], *L);
163 }
164 }
165 delete L;
166 }
167
168 //--------------------------------------------------------------------
169
calculateGradients(pfs::Array2Df & H,pfs::Array2Df & G,int k)170 float calculateGradients(pfs::Array2Df &H, pfs::Array2Df &G, int k) {
171
172 const int width = H.getCols();
173 const int height = H.getRows();
174 const float divider = pow(2.0f, k + 1);
175 double avgGrad = 0.0f; // use double precision for large summations
176
177 #pragma omp parallel for reduction(+:avgGrad)
178 for (int y = 0; y < height; y++) {
179 for (int x = 0; x < width; x++) {
180 float gx, gy;
181 int w, n, e, s;
182 w = (x == 0 ? 0 : x - 1);
183 n = (y == 0 ? 0 : y - 1);
184 s = (y + 1 == height ? y : y + 1);
185 e = (x + 1 == width ? x : x + 1);
186
187 gx = (H(w, y) - H(e, y)) / divider;
188
189 gy = (H(x, s) - H(x, n)) / divider;
190 // note this implicitly assumes that H(-1)=H(0)
191 // for the fft-pde slover this would need adjustment as H(-1)=H(1)
192 // is assumed, which means gx=0.0, gy=0.0 at the boundaries
193 // however, the impact is not visible so we ignore this here
194
195 G(x, y) = sqrt(gx * gx + gy * gy);
196 avgGrad += G(x, y);
197 }
198 }
199
200 return avgGrad / (width * height);
201 }
202
203 //--------------------------------------------------------------------
204
upSample(const pfs::Array2Df & A,pfs::Array2Df & B)205 void upSample(const pfs::Array2Df &A, pfs::Array2Df &B) {
206
207 const int width = B.getCols();
208 const int height = B.getRows();
209 const int awidth = A.getCols();
210 const int aheight = A.getRows();
211
212 #pragma omp parallel for
213 for (int y = 0; y < height; y++) {
214 for (int x = 0; x < width; x++) {
215 int ax = static_cast<int>(x * 0.5f); // x / 2.f;
216 int ay = static_cast<int>(y * 0.5f); // y / 2.f;
217 ax = (ax < awidth) ? ax : awidth - 1;
218 ay = (ay < aheight) ? ay : aheight - 1;
219
220 B(x, y) = A(ax, ay);
221 }
222 }
223 }
224
calculateFiMatrix(pfs::Array2Df & FI,pfs::Array2Df * gradients[],float avgGrad[],int nlevels,int detail_level,float alfa,float beta,float noise,bool newfattal)225 void calculateFiMatrix(pfs::Array2Df &FI, pfs::Array2Df *gradients[],
226 float avgGrad[], int nlevels, int detail_level,
227 float alfa, float beta, float noise, bool newfattal) {
228
229 int width = gradients[nlevels - 1]->getCols();
230 int height = gradients[nlevels - 1]->getRows();
231 pfs::Array2Df **fi = new pfs::Array2Df *[nlevels];
232
233 fi[nlevels - 1] = new pfs::Array2Df(width, height);
234 if (newfattal) {
235 for (int k = 0; k < width * height; k++) {
236 (*fi[nlevels - 1])(k) = 1.0f;
237 }
238 }
239
240 for (int k = nlevels - 1; k >= 0; k--) {
241 width = gradients[k]->getCols();
242 height = gradients[k]->getRows();
243
244 // only apply gradients to levels>=detail_level but at least to the
245 // coarsest
246 if (k >= detail_level || k == nlevels - 1 || newfattal == false) {
247 #pragma omp parallel for
248 for (int y = 0; y < height; y++) {
249 for (int x = 0; x < width; x++) {
250 float grad = ((*gradients[k])(x, y) < 1e-4f)
251 ? 1e-4
252 : (*gradients[k])(x, y);
253 float a = alfa * avgGrad[k];
254
255 float value = powf((grad + noise) / a, beta - 1.0f);
256
257 if (newfattal)
258 (*fi[k])(x, y) *= value;
259 else
260 (*fi[k])(x, y) = value;
261 }
262 }
263 }
264
265 // create next level
266 if (k > 1) {
267 width = gradients[k - 1]->getCols();
268 height = gradients[k - 1]->getRows();
269 fi[k - 1] = new pfs::Array2Df(width, height);
270 } else
271 fi[0] = &FI; // highest level -> result
272
273 if (k > 0 && newfattal) {
274 upSample(*fi[k], *fi[k - 1]); // upsample to next level
275 gaussianBlur(*fi[k - 1], *fi[k - 1]);
276 }
277 }
278
279 for (int k = 1; k < nlevels; k++) {
280 delete fi[k];
281 }
282 delete[] fi;
283 }
284
tmo_fattal02(size_t width,size_t height,const pfs::Array2Df & Y,pfs::Array2Df & L,float alfa,float beta,float noise,bool newfattal,bool fftsolver,int detail_level,pfs::Progress & ph)285 void tmo_fattal02(size_t width, size_t height, const pfs::Array2Df &Y,
286 pfs::Array2Df &L, float alfa, float beta, float noise,
287 bool newfattal, bool fftsolver, int detail_level,
288 pfs::Progress &ph) {
289 #ifdef TIMER_PROFILING
290 msec_timer stop_watch;
291 stop_watch.start();
292 #endif
293 static const float black_point = 0.1f;
294 static const float white_point = 0.5f;
295 static const float gamma = 1.0f; // 0.8f;
296 // static const int detail_level = 3;
297 if (detail_level < 0) detail_level = 0;
298 if (detail_level > 3) detail_level = 3;
299
300 ph.setValue(2);
301 if (ph.canceled()) return;
302
303 int MSIZE = 32; // minimum size of gaussian pyramid
304 // I believe a smaller value than 32 results in slightly better overall
305 // quality but I'm only applying this if the newly implemented fft solver
306 // is used in order not to change behaviour of the old version
307 // TODO: best let the user decide this value
308 if (fftsolver) {
309 MSIZE = 8;
310 }
311
312 size_t size = width * height;
313
314 // find max & min values, normalize to range 0..100 and take logarithm
315 float minLum = Y(0, 0);
316 float maxLum = Y(0, 0);
317
318 for (size_t i = 0; i < size; i++) {
319 minLum = (Y(i) < minLum) ? Y(i) : minLum;
320 maxLum = (Y(i) > maxLum) ? Y(i) : maxLum;
321 }
322
323 pfs::Array2Df H(width, height);
324
325 #ifdef __SSE2__
326 const vfloat maxLumv = F2V(maxLum);
327 const vfloat c100v = F2V(100.f);
328 const vfloat epsv = F2V(1e-4f);
329 #endif
330 #pragma omp parallel for
331 for (size_t i = 0; i < height; ++i) {
332 size_t j = 0;
333 #ifdef __SSE2__
334 for (; j < width - 3; j += 4) {
335 STVFU(H(j, i), xlogf(c100v * LVFU(Y(j, i)) / maxLumv + epsv));
336 }
337 #endif
338 for (; j < width; ++j) {
339 H(j, i) = xlogf(100.0f * Y(j, i) / maxLum + 1e-4f);
340 }
341 }
342
343 ph.setValue(4);
344
345 // create gaussian pyramids
346 int mins = (width < height) ? width : height; // smaller dimension
347 int nlevels = 0;
348 while (mins >= MSIZE) {
349 nlevels++;
350 mins /= 2;
351 }
352
353 // The following lines solves a bug with images particularly small
354 if (nlevels == 0) nlevels = 1;
355
356 pfs::Array2Df **pyramids = new pfs::Array2Df *[nlevels];
357 pyramids[0] = &H;
358 createGaussianPyramids(H, pyramids, nlevels);
359 ph.setValue(8);
360
361 // calculate gradients and its average values on pyramid levels
362 pfs::Array2Df **gradients = new pfs::Array2Df *[nlevels];
363 float avgGrad[nlevels];
364 for (int k = 0; k < nlevels; k++) {
365 gradients[k] = new pfs::Array2Df(pyramids[k]->getCols(), pyramids[k]->getRows());
366 avgGrad[k] = calculateGradients(*pyramids[k], *gradients[k], k);
367 }
368 ph.setValue(12);
369
370 // calculate fi matrix
371 pfs::Array2Df FI(width, height);
372 calculateFiMatrix(FI, gradients, avgGrad, nlevels, detail_level, alfa, beta,
373 noise, newfattal);
374
375 for (int i = 0; i < nlevels; i++) {
376 if(i != 0) // pyramids[0] is H. Will be deleted later
377 delete pyramids[i];
378 delete gradients[i];
379 }
380 delete[] pyramids;
381 delete[] gradients;
382 ph.setValue(16);
383 if (ph.canceled()) {
384 return;
385 }
386
387 // attenuate gradients
388 pfs::Array2Df Gx(width, height);
389 pfs::Array2Df Gy(width, height);
390
391 // the fft solver solves the Poisson pde but with slightly different
392 // boundary conditions, so we need to adjust the assembly of the right hand
393 // side accordingly (basically fft solver assumes U(-1) = U(1), whereas zero
394 // Neumann conditions assume U(-1)=U(0)), see also divergence calculation
395
396 if (fftsolver)
397 #pragma omp parallel for
398 for (size_t y = 0; y < height; y++)
399 for (size_t x = 0; x < width; x++) {
400 // sets index+1 based on the boundary assumption H(N+1)=H(N-1)
401 unsigned int yp1 = (y + 1 >= height ? height - 2 : y + 1);
402 unsigned int xp1 = (x + 1 >= width ? width - 2 : x + 1);
403 // forward differences in H, so need to use between-points
404 // approx of FI
405 Gx(x, y) =
406 (H(xp1, y) - H(x, y)) * 0.5 * (FI(xp1, y) + FI(x, y));
407 Gy(x, y) =
408 (H(x, yp1) - H(x, y)) * 0.5 * (FI(x, yp1) + FI(x, y));
409 }
410 else
411 #pragma omp parallel for
412 for (size_t y = 0; y < height; y++)
413 for (size_t x = 0; x < width; x++) {
414 int s, e;
415 s = (y + 1 == height ? y : y + 1);
416 e = (x + 1 == width ? x : x + 1);
417
418 Gx(x, y) = (H(e, y) - H(x, y)) * FI(x, y);
419 Gy(x, y) = (H(x, s) - H(x, y)) * FI(x, y);
420 }
421
422 ph.setValue(18);
423
424 // calculate divergence
425
426 pfs::Array2Df DivG(width, height);
427 #pragma omp parallel for
428 for (size_t y = 0; y < height; ++y) {
429 for (size_t x = 0; x < width; ++x) {
430 DivG(x, y) = Gx(x, y) + Gy(x, y);
431 if (x > 0) DivG(x, y) -= Gx(x - 1, y);
432 if (y > 0) DivG(x, y) -= Gy(x, y - 1);
433
434 if (fftsolver) {
435 if (x == 0) DivG(x, y) += Gx(x, y);
436 if (y == 0) DivG(x, y) += Gy(x, y);
437 }
438 }
439 }
440
441 ph.setValue(20);
442 if (ph.canceled()) {
443 return;
444 }
445
446 // solve pde and exponentiate (ie recover compressed image)
447 {
448 pfs::Array2Df U(width, height);
449 if (fftsolver) {
450 solve_pde_fft(DivG, U, Gx, ph);
451 } else {
452 solve_pde_multigrid(&DivG, &U, ph);
453 }
454 #ifndef NDEBUG
455 printf("\npde residual error: %f\n", residual_pde(U, DivG));
456 #endif
457 ph.setValue(90);
458 if (ph.canceled()) {
459 return;
460 }
461
462 #ifdef __SSE2__
463 const vfloat gammav = F2V(gamma);
464 #endif
465 #pragma omp parallel for
466 for (size_t i = 0; i < height; ++i) {
467 size_t j = 0;
468 #ifdef __SSE2__
469 for (; j < width - 3; j += 4) {
470 STVFU(L(j, i), xexpf(gammav * LVFU(U(j, i))));
471 }
472 #endif
473 for (; j < width; ++j) {
474 L(j, i) = xexpf(gamma * U(j, i));
475 }
476 }
477 }
478 ph.setValue(95);
479
480 // remove percentile of min and max values and renormalize
481 float cut_min = 0.01f * black_point;
482 float cut_max = 1.0f - 0.01f * white_point;
483 assert(cut_min >= 0.0f && (cut_max <= 1.0f) && (cut_min < cut_max));
484 lhdrengine::findMinMaxPercentile(L.data(), width * height, cut_min, minLum, cut_max, maxLum, true);
485
486 for (size_t idx = 0; idx < height * width; ++idx) {
487 L(idx) = (L(idx) - minLum) / (maxLum - minLum);
488 if (L(idx) <= 0.0f) {
489 L(idx) = 0.0;
490 }
491 // note, we intentionally do not cut off values > 1.0
492 }
493
494 ph.setValue(96);
495
496 #ifdef TIMER_PROFILING
497 stop_watch.stop_and_update();
498 cout << endl;
499 cout << "tmo_fattal02 = " << stop_watch.get_time() << " msec" << endl;
500 #endif
501 }
502