1 /*
2 * This file is a part of Luminance HDR package, based on pfstmo.
3 * ----------------------------------------------------------------------
4 * Copyright (C) 2007 Grzegorz Krawczyk
5 * Copyright (C) 2010-2012 Davide Anastasia
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 * ----------------------------------------------------------------------
21 *
22 */
23
24 //! \brief Contrast mapping TMO [Mantiuk06]
25 //!
26 //! From:
27 //!
28 //! Rafal Mantiuk, Karol Myszkowski, Hans-Peter Seidel.
29 //! A Perceptual Framework for Contrast Processing of High Dynamic Range Images
30 //! In: ACM Transactions on Applied Perception 3 (3), pp. 286-308, 2006
31 //! \ref http://www.mpi-inf.mpg.de/~mantiuk/contrast_domain/
32 //! \author Radoslaw Mantiuk, <radoslaw.mantiuk@gmail.com>
33 //! \author Rafal Mantiuk, <mantiuk@gmail.com>
34 //! Updated 2007/12/17 by Ed Brambley <E.J.Brambley@damtp.cam.ac.uk>
35 //! (more information on the changes:
36 //! http://www.damtp.cam.ac.uk/user/ejb48/hdr/index.html)
37 //! Updated 2008/06/25 by Ed Brambley <E.J.Brambley@damtp.cam.ac.uk>
38 //! bug fixes and OpenMP patches
39 //! more on this:
40 //! http://tinyurl.com/9plnn8c
41 //! Optimization improvements by Lebed Dmytry
42 //! Updated 2008/07/26 by Dejan Beric <dejan.beric@live.com>
43 //! Added the detail factor slider which offers more control over contrast in
44 //! details
45 //! Update 2010/10/06 by Axel Voitier <axel.voitier@gmail.com>
46 //! detail_factor patch in order to remove potential issues in a multithreading
47 //! environment
48 //! \author Davide Anastasia <davideanastasia@users.sourceforge.net>
49 //! Improvement & Clean up (August 2011)
50 //! Refactoring code structure to improve modularity (September 2012)
51 //! \author Bruce Guenter <bruce@untroubled.org>
52 //! Added trivial downsample and upsample functions when both dimension are
53 //! even
54 //!
55 //! \note This implementation of Mantiuk06, while originally based on the source
56 //! code available in PFSTMO, is different in many ways. For this reason, while
57 //! the file mentions the original authors (and history of the file as well,
58 //! as above), license applied to this file is uniquely GPL2. If you are looking
59 //! for an implementation with a less stringent license, please refer to the
60 //! original implementation of this algorithm in PFSTMO.
61
62 #include <algorithm>
63 #include <cassert>
64 #include <cmath>
65 #include <cstdio>
66 #include <cstdlib>
67 #include <cstring>
68 #include <iostream>
69 #include <vector>
70
71 #ifdef _OPENMP
72 #include <omp.h>
73 #endif
74
75 #include "../../sleef.c"
76 #include "../../opthelper.h"
77
78 #include "TonemappingOperators/pfstmo.h"
79 #include "arch/malloc.h"
80 #include "arch/math.h"
81 #include "contrast_domain.h"
82
83 #include "pyramid.h"
84
85 #include "Libpfs/progress.h"
86 #include "Libpfs/utils/dotproduct.h"
87 #include "Libpfs/utils/minmax.h"
88 #include "Libpfs/utils/msec_timer.h"
89 #include "Libpfs/utils/numeric.h"
90 #include "Libpfs/utils/sse.h"
91 #include "Libpfs/rt_algo.h"
92
93 using namespace pfs;
94
95 // divG_sum = A * x = sum(divG(x))
multiplyA(PyramidT & px,const PyramidT & pC,const Array2Df & x,Array2Df & sumOfDivG)96 void multiplyA(PyramidT &px, const PyramidT &pC, const Array2Df &x,
97 Array2Df &sumOfDivG) {
98 px.computeGradients(x);
99
100 // scale gradients by Cx,Cy from main pyramid
101 px.multiply(pC);
102
103 // calculate the sum of divergences
104 px.computeSumOfDivergence(sumOfDivG);
105 }
106
107 // conjugate linear equation solver overwrites pyramid!
108 //
109 // This version is a slightly modified version by
110 // Davide Anastasia <davideanastasia@users.sourceforge.net>
111 // March 25, 2011
112 //
113 namespace {
114 const int NUM_BACKWARDS_CEILING = 3;
115 }
116
lincg(PyramidT & pyramid,PyramidT & pC,const Array2Df & b,Array2Df & x,const int itmax,const float tol,Progress & ph)117 void lincg(PyramidT &pyramid, PyramidT &pC, const Array2Df &b, Array2Df &x,
118 const int itmax, const float tol, Progress &ph) {
119
120 float rdotr_curr;
121 float rdotr_prev;
122 float rdotr_best;
123 float alpha;
124 float beta;
125
126 const size_t rows = pyramid.getRows();
127 const size_t cols = pyramid.getCols();
128 const size_t n = rows * cols;
129 const float tol2 = tol * tol;
130
131 Array2Df x_best(cols, rows);
132 Array2Df r(cols, rows);
133 Array2Df p(cols, rows);
134 Array2Df Ap(cols, rows);
135
136 // bnrm2 = ||b||
137 const float bnrm2 = utils::dotProduct(b.data(), n);
138
139 // r = b - Ax
140 multiplyA(pyramid, pC, x, r); // r = A x
141 utils::vsub(b.data(), r.data(), r.data(), n); // r = b - r
142
143 // rdotr = r.r
144 rdotr_best = rdotr_curr = utils::dotProduct(r.data(), n);
145
146 // Setup initial vector
147 std::copy(r.begin(), r.end(), p.begin()); // p = r
148 std::copy(x.begin(), x.end(), x_best.begin()); // x_best = x
149
150 const float irdotr = rdotr_curr;
151 int phvalue = ph.value() + 8;
152 const float percent_sf = (100.0f - phvalue) / std::log(tol2 * bnrm2 / irdotr);
153
154 int iter = 0;
155 int num_backwards = 0;
156 for (; iter < itmax; ++iter) {
157 // TEST
158 ph.setValue(
159 static_cast<int>(phvalue + std::max(std::log(rdotr_curr / irdotr) * percent_sf, 0.f)));
160 // User requested abort
161 if (ph.canceled() && iter > 0) {
162 break;
163 }
164
165 // Ap = A p
166 multiplyA(pyramid, pC, p, Ap);
167
168 // alpha = r.r / (p . Ap)
169 alpha = rdotr_curr / utils::dotProduct(p.data(), Ap.data(), n);
170
171 // r = r - alpha Ap
172 utils::vsubs(r.data(), alpha, Ap.data(), r.data(), n);
173
174 // rdotr = r.r
175 rdotr_prev = rdotr_curr;
176 rdotr_curr = utils::dotProduct(r.data(), n);
177
178 // Have we gone unstable?
179 if (rdotr_curr > rdotr_prev) {
180 // Save where we've got to
181 if (num_backwards == 0 && rdotr_prev < rdotr_best) {
182 rdotr_best = rdotr_prev;
183 std::copy(x.begin(), x.end(), x_best.begin());
184 }
185
186 num_backwards++;
187 } else {
188 num_backwards = 0;
189 }
190
191 // x = x + alpha * p
192 utils::vadds(x.data(), alpha, p.data(), x.data(), n);
193
194 // Exit if we're done
195 // fprintf(stderr, "iter:%d err:%f\n", iter+1, sqrtf(rdotr/bnrm2));
196 if (rdotr_curr / bnrm2 < tol2) break;
197
198 if (num_backwards > NUM_BACKWARDS_CEILING) {
199 // Reset
200 num_backwards = 0;
201 std::copy(x_best.begin(), x_best.end(), x.begin());
202
203 // r = Ax
204 multiplyA(pyramid, pC, x, r);
205
206 // r = b - r
207 utils::vsub(b.data(), r.data(), r.data(), n);
208
209 // rdotr = r.r
210 rdotr_best = rdotr_curr = utils::dotProduct(r.data(), r.size());
211
212 // p = r
213 std::copy(r.begin(), r.end(), p.begin());
214 } else {
215 // p = r + beta * p
216 beta = rdotr_curr / rdotr_prev;
217 utils::vadds(r.data(), beta, p.data(), p.data(), n);
218 }
219 }
220
221 // Use the best version we found
222 if (rdotr_curr > rdotr_best) {
223 rdotr_curr = rdotr_best;
224 std::copy(x_best.begin(), x_best.end(), x.begin());
225 }
226
227 if (rdotr_curr / bnrm2 > tol2) {
228 // Not converged
229 ph.setValue(
230 static_cast<int>(std::log(rdotr_curr / irdotr) * percent_sf));
231 if (iter == itmax) {
232 std::cerr << std::endl
233 << "pfstmo_mantiuk06: Warning: Not "
234 "converged (hit maximum iterations), error = "
235 << std::sqrt(rdotr_curr / bnrm2) << " (should be below "
236 << tol << ")" << std::endl;
237 } else {
238 std::cerr << std::endl
239 << "pfstmo_mantiuk06: Warning: Not converged "
240 "(going unstable), error = "
241 << std::sqrt(rdotr_curr / bnrm2) << " (should be below "
242 << tol << ")" << std::endl;
243 }
244 }
245 }
246
transformToLuminance(PyramidT & pp,Array2Df & Y,const int itmax,const float tol,Progress & ph)247 void transformToLuminance(PyramidT &pp, Array2Df &Y, const int itmax,
248 const float tol, Progress &ph) {
249 PyramidT pC = pp; // copy ctor
250
251 pp.computeScaleFactors(pC);
252
253 // pyramidScaleGradient(pp, pC);
254 pp.multiply(pC);
255
256 // size of the first level of the pyramid
257 Array2Df b(pp.getCols(), pp.getRows());
258
259 // calculate the sum of divergences (equal to b)
260 pp.computeSumOfDivergence(b);
261
262 // calculate luminances from gradients
263 lincg(pp, pC, b, Y, itmax, tol, ph);
264 }
265
266 struct HistData {
267 float data;
268 float cdf;
269 size_t index;
270 };
271
272 struct HistDataCompareData {
operator ()HistDataCompareData273 bool operator()(const HistData &v1, const HistData &v2) const {
274 return (v1.data < v2.data);
275 }
276 };
277
278 struct HistDataCompareIndex {
operator ()HistDataCompareIndex279 bool operator()(const HistData &v1, const HistData &v2) const {
280 return (v1.index < v2.index);
281 }
282 };
283
contrastEqualization(PyramidT & pp,const float contrastFactor)284 void contrastEqualization(PyramidT &pp, const float contrastFactor) {
285 // Count size
286 size_t totalPixels = 0;
287 for (PyramidT::const_iterator itCurr = pp.begin(), itEnd = pp.end();
288 itCurr != itEnd; ++itCurr) {
289 totalPixels += itCurr->size();
290 }
291
292 // Allocate memory
293 std::vector<HistData> hist(totalPixels);
294
295 // Build histogram info
296 size_t offset = 0;
297 for (PyramidT::const_iterator itCurr = pp.begin(), itEnd = pp.end();
298 itCurr != itEnd; ++itCurr) {
299 PyramidS::const_iterator xyGradIter = itCurr->begin();
300 PyramidS::const_iterator xyGradEnd = itCurr->end();
301
302 for (; xyGradIter != xyGradEnd; ++xyGradIter) {
303 hist[offset].data = std::sqrt(std::pow(xyGradIter->gX(), 2) +
304 std::pow(xyGradIter->gY(), 2));
305 hist[offset].index = offset;
306
307 offset++;
308 }
309 }
310
311 std::sort(hist.begin(), hist.end(), HistDataCompareData());
312 assert(hist[0].data < hist[totalPixels - 1].data);
313
314 // Calculate cdf
315 const float normalizationFactor = 1.0f / totalPixels;
316 for (size_t idx = 0; idx < totalPixels; ++idx) {
317 hist[idx].cdf = idx * normalizationFactor;
318 }
319
320 // Recalculate in terms of indexes
321 std::sort(hist.begin(), hist.end(), HistDataCompareIndex());
322 assert(hist[0].index < hist[totalPixels - 1].index);
323 assert(hist[0].index == 0);
324
325 // Remap gradient magnitudes
326 offset = 0;
327 for (PyramidT::iterator itCurr = pp.begin(), itEnd = pp.end();
328 itCurr != itEnd; ++itCurr) {
329 PyramidS::iterator xyGradIter = itCurr->begin();
330 PyramidS::iterator xyGradEnd = itCurr->end();
331
332 for (; xyGradIter != xyGradEnd; ++xyGradIter) {
333 float scaleFactor =
334 contrastFactor * hist[offset].cdf / hist[offset].data;
335
336 *xyGradIter *= scaleFactor;
337
338 offset++;
339 }
340 }
341 }
342
343 namespace {
344 const float CUT_MARGIN = 0.1f;
345 const float DISP_DYN_RANGE = 2.3f;
346
normalizeLuminanceAndRGB(Array2Df & R,Array2Df & G,Array2Df & B,Array2Df & Y)347 void normalizeLuminanceAndRGB(Array2Df &R, Array2Df &G, Array2Df &B,
348 Array2Df &Y) {
349 const float Ymax = utils::maxElement(Y.data(), Y.size());
350 const float clip_min = 1e-7f * Ymax;
351
352 // std::cout << "clip_min = " << clip_min << std::endl;
353 // std::cout << "Ymax = " << Ymax << std::endl;
354 #pragma omp parallel for
355 for (int idx = 0; idx < static_cast<int>(Y.size()); idx++) {
356 if (R(idx) < clip_min) R(idx) = clip_min;
357 if (G(idx) < clip_min) G(idx) = clip_min;
358 if (B(idx) < clip_min) B(idx) = clip_min;
359 if (Y(idx) < clip_min) Y(idx) = clip_min;
360
361 float currY = 1.f / Y(idx);
362
363 R(idx) *= currY;
364 G(idx) *= currY;
365 B(idx) *= currY;
366 Y(idx) = std::log10(Y(idx));
367 }
368 }
369
370 /* Renormalize luminance */
denormalizeLuminance(Array2Df & Y)371 void denormalizeLuminance(Array2Df &Y) {
372 const size_t size = Y.size();
373
374 float lumMin, lumMax;
375 lhdrengine::findMinMaxPercentile(Y.data(), size, CUT_MARGIN * 0.01f, lumMin, 1.f - CUT_MARGIN * 0.01f, lumMax, true);
376
377 const float lumRange = 1.f / (lumMax - lumMin) * DISP_DYN_RANGE;
378
379 #pragma omp parallel for // shared(lumRange, lumMin)
380 for (int j = 0; j < static_cast<int>(size); j++) {
381 Y(j) = (Y(j) - lumMin) * lumRange - DISP_DYN_RANGE; // x scaled
382 }
383 }
384
385 template <typename T>
fastDecode(const T & value)386 inline T fastDecode(const T &value) {
387 if (value <= -5.766466716f) {
388 return (xexpf(value) * 12.92f);
389 }
390 return (1.055f * xexpf(1.f / 2.4f * value) - 0.055f);
391 }
392
393 #ifdef __SSE2__
fastDecode(const vfloat & valuev,const vfloat & c0,const vfloat & c1,const vfloat & c2,const vfloat & c3,const vfloat & c4)394 inline vfloat fastDecode(const vfloat &valuev, const vfloat &c0, const vfloat &c1, const vfloat &c2, const vfloat &c3, const vfloat &c4) {
395 vmask selmask = vmaskf_le(valuev, c0);
396 vfloat tempv = vself(selmask, valuev, valuev * c1);
397 tempv = xexpf(tempv);
398 return vself(selmask, tempv * c2, tempv * c3 - c4);
399 }
400 #endif
401
denormalizeRGB(Array2Df & R,Array2Df & G,Array2Df & B,const Array2Df & Y,float saturationFactor)402 void denormalizeRGB(Array2Df &R, Array2Df &G, Array2Df &B, const Array2Df &Y,
403 float saturationFactor) {
404
405 const float log10 = std::log(10.f);
406
407 #ifdef __SSE2__
408 const vfloat log10v = F2V(log10);
409 const vfloat saturationFactorv = F2V(saturationFactor);
410 const vfloat c0 = F2V(-5.7664667f);
411 const vfloat c1 = F2V(0.416666667f);
412 const vfloat c2 = F2V(12.92f);
413 const vfloat c3 = F2V(1.055f);
414 const vfloat c4 = F2V(0.055f);
415
416 #endif
417 /* Transform to sRGB */
418 #pragma omp parallel for
419 for (size_t i = 0; i < Y.getRows(); ++i) {
420 size_t j = 0;
421 #ifdef __SSE2__
422 for (; j < Y.getCols() - 3; j += 4) {
423 vfloat myYv = LVFU(Y(j, i)) * log10v;
424 STVFU(R(j, i), fastDecode(saturationFactorv * xlogf(LVFU(R(j, i))) + myYv, c0, c1, c2, c3, c4));
425 STVFU(G(j, i), fastDecode(saturationFactorv * xlogf(LVFU(G(j, i))) + myYv, c0, c1, c2, c3, c4));
426 STVFU(B(j, i), fastDecode(saturationFactorv * xlogf(LVFU(B(j, i))) + myYv, c0, c1, c2, c3, c4));
427 }
428 #endif
429 for (; j < Y.getCols(); ++j) {
430 float myY = Y(j, i) * log10;
431 R(j, i) = fastDecode(saturationFactor * xlogf(R(j, i)) + myY);
432 G(j, i) = fastDecode(saturationFactor * xlogf(G(j, i)) + myY);
433 B(j, i) = fastDecode(saturationFactor * xlogf(B(j, i)) + myY);
434 }
435 }
436 }
437 }
438 // tone mapping
tmo_mantiuk06_contmap(Array2Df & R,Array2Df & G,Array2Df & B,Array2Df & Y,const float contrastFactor,const float saturationFactor,float detailfactor,const int itmax,const float tol,Progress & ph)439 int tmo_mantiuk06_contmap(Array2Df &R, Array2Df &G, Array2Df &B, Array2Df &Y,
440 const float contrastFactor,
441 const float saturationFactor, float detailfactor,
442 const int itmax, const float tol, Progress &ph) {
443 #ifdef TIMER_PROFILING
444 msec_timer stop_watch;
445 stop_watch.start();
446 #endif
447 assert(R.getCols() == G.getCols());
448 assert(G.getCols() == B.getCols());
449 assert(B.getCols() == Y.getCols());
450 assert(R.getRows() == G.getRows());
451 assert(G.getRows() == B.getRows());
452 assert(B.getRows() == Y.getRows());
453
454 const size_t r = R.getRows();
455 const size_t c = R.getCols();
456 // const size_t n = r*c;
457
458 normalizeLuminanceAndRGB(R, G, B, Y);
459 ph.setValue(2);
460
461 // create pyramid
462 PyramidT pp(r, c);
463 ph.setValue(6);
464
465 // calculate gradients for pyramid (Y won't be changed)
466 pp.computeGradients(Y);
467
468 // transform gradients to R
469 pp.transformToR(detailfactor);
470 ph.setValue(13);
471
472 // Contrast map
473 if (contrastFactor > 0.0f) {
474 // Contrast mapping
475 pp.scale(contrastFactor);
476 } else {
477 // Contrast equalization
478 contrastEqualization(pp, -contrastFactor);
479 }
480
481 // transform R to gradients
482 pp.transformToG(detailfactor);
483 ph.setValue(40);
484
485 // transform gradients to luminance Y (pp -> Y)
486 transformToLuminance(pp, Y, itmax, tol, ph);
487 denormalizeLuminance(Y);
488 denormalizeRGB(R, G, B, Y, saturationFactor);
489
490 #ifdef TIMER_PROFILING
491 stop_watch.stop_and_update();
492 cout << endl;
493 cout << "tmo_mantiuk06 = " << stop_watch.get_time() << " msec" << endl;
494 #endif
495
496 return PFSTMO_OK;
497 }
498