1 /*
2  * This file is a part of Luminance HDR package, based on pfstmo.
3  * ----------------------------------------------------------------------
4  * Copyright (C) 2007 Grzegorz Krawczyk
5  * Copyright (C) 2010-2012 Davide Anastasia
6  *
7  *  This program is free software; you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation; either version 2 of the License, or
10  *  (at your option) any later version.
11  *
12  *  This program is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *  GNU General Public License for more details.
16  *
17  *  You should have received a copy of the GNU General Public License
18  *  along with this program; if not, write to the Free Software
19  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20  * ----------------------------------------------------------------------
21  *
22  */
23 
24 //! \brief Contrast mapping TMO [Mantiuk06]
25 //!
26 //! From:
27 //!
28 //! Rafal Mantiuk, Karol Myszkowski, Hans-Peter Seidel.
29 //! A Perceptual Framework for Contrast Processing of High Dynamic Range Images
30 //! In: ACM Transactions on Applied Perception 3 (3), pp. 286-308, 2006
31 //! \ref http://www.mpi-inf.mpg.de/~mantiuk/contrast_domain/
32 //! \author Radoslaw Mantiuk, <radoslaw.mantiuk@gmail.com>
33 //! \author Rafal Mantiuk, <mantiuk@gmail.com>
34 //! Updated 2007/12/17 by Ed Brambley <E.J.Brambley@damtp.cam.ac.uk>
35 //!  (more information on the changes:
36 //!  http://www.damtp.cam.ac.uk/user/ejb48/hdr/index.html)
37 //! Updated 2008/06/25 by Ed Brambley <E.J.Brambley@damtp.cam.ac.uk>
38 //!  bug fixes and OpenMP patches
39 //!  more on this:
40 //!  http://tinyurl.com/9plnn8c
41 //!  Optimization improvements by Lebed Dmytry
42 //! Updated 2008/07/26 by Dejan Beric <dejan.beric@live.com>
43 //!  Added the detail factor slider which offers more control over contrast in
44 //!  details
45 //! Update 2010/10/06 by Axel Voitier <axel.voitier@gmail.com>
46 //!  detail_factor patch in order to remove potential issues in a multithreading
47 //!  environment
48 //! \author Davide Anastasia <davideanastasia@users.sourceforge.net>
49 //!  Improvement & Clean up (August 2011)
50 //!  Refactoring code structure to improve modularity (September 2012)
51 //! \author Bruce Guenter <bruce@untroubled.org>
52 //!  Added trivial downsample and upsample functions when both dimension are
53 //!  even
54 //!
55 //! \note This implementation of Mantiuk06, while originally based on the source
56 //! code available in PFSTMO, is different in many ways. For this reason, while
57 //! the file mentions the original authors (and history of the file as well,
58 //! as above), license applied to this file is uniquely GPL2. If you are looking
59 //! for an implementation with a less stringent license, please refer to the
60 //! original implementation of this algorithm in PFSTMO.
61 
62 #include <algorithm>
63 #include <cassert>
64 #include <cmath>
65 #include <cstdio>
66 #include <cstdlib>
67 #include <cstring>
68 #include <iostream>
69 #include <vector>
70 
71 #ifdef _OPENMP
72 #include <omp.h>
73 #endif
74 
75 #include "../../sleef.c"
76 #include "../../opthelper.h"
77 
78 #include "TonemappingOperators/pfstmo.h"
79 #include "arch/malloc.h"
80 #include "arch/math.h"
81 #include "contrast_domain.h"
82 
83 #include "pyramid.h"
84 
85 #include "Libpfs/progress.h"
86 #include "Libpfs/utils/dotproduct.h"
87 #include "Libpfs/utils/minmax.h"
88 #include "Libpfs/utils/msec_timer.h"
89 #include "Libpfs/utils/numeric.h"
90 #include "Libpfs/utils/sse.h"
91 #include "Libpfs/rt_algo.h"
92 
93 using namespace pfs;
94 
95 // divG_sum = A * x = sum(divG(x))
multiplyA(PyramidT & px,const PyramidT & pC,const Array2Df & x,Array2Df & sumOfDivG)96 void multiplyA(PyramidT &px, const PyramidT &pC, const Array2Df &x,
97                Array2Df &sumOfDivG) {
98     px.computeGradients(x);
99 
100     // scale gradients by Cx,Cy from main pyramid
101     px.multiply(pC);
102 
103     // calculate the sum of divergences
104     px.computeSumOfDivergence(sumOfDivG);
105 }
106 
107 // conjugate linear equation solver overwrites pyramid!
108 //
109 // This version is a slightly modified version by
110 // Davide Anastasia <davideanastasia@users.sourceforge.net>
111 // March 25, 2011
112 //
113 namespace {
114 const int NUM_BACKWARDS_CEILING = 3;
115 }
116 
lincg(PyramidT & pyramid,PyramidT & pC,const Array2Df & b,Array2Df & x,const int itmax,const float tol,Progress & ph)117 void lincg(PyramidT &pyramid, PyramidT &pC, const Array2Df &b, Array2Df &x,
118            const int itmax, const float tol, Progress &ph) {
119 
120     float rdotr_curr;
121     float rdotr_prev;
122     float rdotr_best;
123     float alpha;
124     float beta;
125 
126     const size_t rows = pyramid.getRows();
127     const size_t cols = pyramid.getCols();
128     const size_t n = rows * cols;
129     const float tol2 = tol * tol;
130 
131     Array2Df x_best(cols, rows);
132     Array2Df r(cols, rows);
133     Array2Df p(cols, rows);
134     Array2Df Ap(cols, rows);
135 
136     // bnrm2 = ||b||
137     const float bnrm2 = utils::dotProduct(b.data(), n);
138 
139     // r = b - Ax
140     multiplyA(pyramid, pC, x, r);                  // r = A x
141     utils::vsub(b.data(), r.data(), r.data(), n);  // r = b - r
142 
143     // rdotr = r.r
144     rdotr_best = rdotr_curr = utils::dotProduct(r.data(), n);
145 
146     // Setup initial vector
147     std::copy(r.begin(), r.end(), p.begin());       // p = r
148     std::copy(x.begin(), x.end(), x_best.begin());  // x_best = x
149 
150     const float irdotr = rdotr_curr;
151     int phvalue = ph.value() + 8;
152     const float percent_sf = (100.0f - phvalue) / std::log(tol2 * bnrm2 / irdotr);
153 
154     int iter = 0;
155     int num_backwards = 0;
156     for (; iter < itmax; ++iter) {
157         // TEST
158         ph.setValue(
159             static_cast<int>(phvalue + std::max(std::log(rdotr_curr / irdotr) * percent_sf, 0.f)));
160         // User requested abort
161         if (ph.canceled() && iter > 0) {
162             break;
163         }
164 
165         // Ap = A p
166         multiplyA(pyramid, pC, p, Ap);
167 
168         // alpha = r.r / (p . Ap)
169         alpha = rdotr_curr / utils::dotProduct(p.data(), Ap.data(), n);
170 
171         // r = r - alpha Ap
172         utils::vsubs(r.data(), alpha, Ap.data(), r.data(), n);
173 
174         // rdotr = r.r
175         rdotr_prev = rdotr_curr;
176         rdotr_curr = utils::dotProduct(r.data(), n);
177 
178         // Have we gone unstable?
179         if (rdotr_curr > rdotr_prev) {
180             // Save where we've got to
181             if (num_backwards == 0 && rdotr_prev < rdotr_best) {
182                 rdotr_best = rdotr_prev;
183                 std::copy(x.begin(), x.end(), x_best.begin());
184             }
185 
186             num_backwards++;
187         } else {
188             num_backwards = 0;
189         }
190 
191         // x = x + alpha * p
192         utils::vadds(x.data(), alpha, p.data(), x.data(), n);
193 
194         // Exit if we're done
195         // fprintf(stderr, "iter:%d err:%f\n", iter+1, sqrtf(rdotr/bnrm2));
196         if (rdotr_curr / bnrm2 < tol2) break;
197 
198         if (num_backwards > NUM_BACKWARDS_CEILING) {
199             // Reset
200             num_backwards = 0;
201             std::copy(x_best.begin(), x_best.end(), x.begin());
202 
203             // r = Ax
204             multiplyA(pyramid, pC, x, r);
205 
206             // r = b - r
207             utils::vsub(b.data(), r.data(), r.data(), n);
208 
209             // rdotr = r.r
210             rdotr_best = rdotr_curr = utils::dotProduct(r.data(), r.size());
211 
212             // p = r
213             std::copy(r.begin(), r.end(), p.begin());
214         } else {
215             // p = r + beta * p
216             beta = rdotr_curr / rdotr_prev;
217             utils::vadds(r.data(), beta, p.data(), p.data(), n);
218         }
219     }
220 
221     // Use the best version we found
222     if (rdotr_curr > rdotr_best) {
223         rdotr_curr = rdotr_best;
224         std::copy(x_best.begin(), x_best.end(), x.begin());
225     }
226 
227     if (rdotr_curr / bnrm2 > tol2) {
228         // Not converged
229         ph.setValue(
230             static_cast<int>(std::log(rdotr_curr / irdotr) * percent_sf));
231         if (iter == itmax) {
232             std::cerr << std::endl
233                       << "pfstmo_mantiuk06: Warning: Not "
234                          "converged (hit maximum iterations), error = "
235                       << std::sqrt(rdotr_curr / bnrm2) << " (should be below "
236                       << tol << ")" << std::endl;
237         } else {
238             std::cerr << std::endl
239                       << "pfstmo_mantiuk06: Warning: Not converged "
240                          "(going unstable), error = "
241                       << std::sqrt(rdotr_curr / bnrm2) << " (should be below "
242                       << tol << ")" << std::endl;
243         }
244     }
245 }
246 
transformToLuminance(PyramidT & pp,Array2Df & Y,const int itmax,const float tol,Progress & ph)247 void transformToLuminance(PyramidT &pp, Array2Df &Y, const int itmax,
248                           const float tol, Progress &ph) {
249     PyramidT pC = pp;  // copy ctor
250 
251     pp.computeScaleFactors(pC);
252 
253     // pyramidScaleGradient(pp, pC);
254     pp.multiply(pC);
255 
256     // size of the first level of the pyramid
257     Array2Df b(pp.getCols(), pp.getRows());
258 
259     // calculate the sum of divergences (equal to b)
260     pp.computeSumOfDivergence(b);
261 
262     // calculate luminances from gradients
263     lincg(pp, pC, b, Y, itmax, tol, ph);
264 }
265 
266 struct HistData {
267     float data;
268     float cdf;
269     size_t index;
270 };
271 
272 struct HistDataCompareData {
operator ()HistDataCompareData273     bool operator()(const HistData &v1, const HistData &v2) const {
274         return (v1.data < v2.data);
275     }
276 };
277 
278 struct HistDataCompareIndex {
operator ()HistDataCompareIndex279     bool operator()(const HistData &v1, const HistData &v2) const {
280         return (v1.index < v2.index);
281     }
282 };
283 
contrastEqualization(PyramidT & pp,const float contrastFactor)284 void contrastEqualization(PyramidT &pp, const float contrastFactor) {
285     // Count size
286     size_t totalPixels = 0;
287     for (PyramidT::const_iterator itCurr = pp.begin(), itEnd = pp.end();
288          itCurr != itEnd; ++itCurr) {
289         totalPixels += itCurr->size();
290     }
291 
292     // Allocate memory
293     std::vector<HistData> hist(totalPixels);
294 
295     // Build histogram info
296     size_t offset = 0;
297     for (PyramidT::const_iterator itCurr = pp.begin(), itEnd = pp.end();
298          itCurr != itEnd; ++itCurr) {
299         PyramidS::const_iterator xyGradIter = itCurr->begin();
300         PyramidS::const_iterator xyGradEnd = itCurr->end();
301 
302         for (; xyGradIter != xyGradEnd; ++xyGradIter) {
303             hist[offset].data = std::sqrt(std::pow(xyGradIter->gX(), 2) +
304                                           std::pow(xyGradIter->gY(), 2));
305             hist[offset].index = offset;
306 
307             offset++;
308         }
309     }
310 
311     std::sort(hist.begin(), hist.end(), HistDataCompareData());
312     assert(hist[0].data < hist[totalPixels - 1].data);
313 
314     // Calculate cdf
315     const float normalizationFactor = 1.0f / totalPixels;
316     for (size_t idx = 0; idx < totalPixels; ++idx) {
317         hist[idx].cdf = idx * normalizationFactor;
318     }
319 
320     // Recalculate in terms of indexes
321     std::sort(hist.begin(), hist.end(), HistDataCompareIndex());
322     assert(hist[0].index < hist[totalPixels - 1].index);
323     assert(hist[0].index == 0);
324 
325     // Remap gradient magnitudes
326     offset = 0;
327     for (PyramidT::iterator itCurr = pp.begin(), itEnd = pp.end();
328          itCurr != itEnd; ++itCurr) {
329         PyramidS::iterator xyGradIter = itCurr->begin();
330         PyramidS::iterator xyGradEnd = itCurr->end();
331 
332         for (; xyGradIter != xyGradEnd; ++xyGradIter) {
333             float scaleFactor =
334                 contrastFactor * hist[offset].cdf / hist[offset].data;
335 
336             *xyGradIter *= scaleFactor;
337 
338             offset++;
339         }
340     }
341 }
342 
343 namespace {
344 const float CUT_MARGIN = 0.1f;
345 const float DISP_DYN_RANGE = 2.3f;
346 
normalizeLuminanceAndRGB(Array2Df & R,Array2Df & G,Array2Df & B,Array2Df & Y)347 void normalizeLuminanceAndRGB(Array2Df &R, Array2Df &G, Array2Df &B,
348                               Array2Df &Y) {
349     const float Ymax = utils::maxElement(Y.data(), Y.size());
350     const float clip_min = 1e-7f * Ymax;
351 
352 // std::cout << "clip_min = " << clip_min << std::endl;
353 // std::cout << "Ymax = " << Ymax << std::endl;
354 #pragma omp parallel for
355     for (int idx = 0; idx < static_cast<int>(Y.size()); idx++) {
356         if (R(idx) < clip_min) R(idx) = clip_min;
357         if (G(idx) < clip_min) G(idx) = clip_min;
358         if (B(idx) < clip_min) B(idx) = clip_min;
359         if (Y(idx) < clip_min) Y(idx) = clip_min;
360 
361         float currY = 1.f / Y(idx);
362 
363         R(idx) *= currY;
364         G(idx) *= currY;
365         B(idx) *= currY;
366         Y(idx) = std::log10(Y(idx));
367     }
368 }
369 
370 /* Renormalize luminance */
denormalizeLuminance(Array2Df & Y)371 void denormalizeLuminance(Array2Df &Y) {
372     const size_t size = Y.size();
373 
374     float lumMin, lumMax;
375     lhdrengine::findMinMaxPercentile(Y.data(), size, CUT_MARGIN * 0.01f, lumMin, 1.f - CUT_MARGIN * 0.01f, lumMax, true);
376 
377     const float lumRange = 1.f / (lumMax - lumMin) * DISP_DYN_RANGE;
378 
379 #pragma omp parallel for  // shared(lumRange, lumMin)
380     for (int j = 0; j < static_cast<int>(size); j++) {
381         Y(j) = (Y(j) - lumMin) * lumRange - DISP_DYN_RANGE;  // x scaled
382     }
383 }
384 
385 template <typename T>
fastDecode(const T & value)386 inline T fastDecode(const T &value) {
387     if (value <= -5.766466716f) {
388         return (xexpf(value) * 12.92f);
389     }
390     return (1.055f * xexpf(1.f / 2.4f * value) - 0.055f);
391 }
392 
393 #ifdef __SSE2__
fastDecode(const vfloat & valuev,const vfloat & c0,const vfloat & c1,const vfloat & c2,const vfloat & c3,const vfloat & c4)394 inline vfloat fastDecode(const vfloat &valuev, const vfloat &c0, const vfloat &c1, const vfloat &c2, const vfloat &c3, const vfloat &c4) {
395     vmask selmask = vmaskf_le(valuev, c0);
396     vfloat tempv = vself(selmask, valuev, valuev * c1);
397     tempv = xexpf(tempv);
398     return vself(selmask, tempv * c2, tempv * c3 - c4);
399 }
400 #endif
401 
denormalizeRGB(Array2Df & R,Array2Df & G,Array2Df & B,const Array2Df & Y,float saturationFactor)402 void denormalizeRGB(Array2Df &R, Array2Df &G, Array2Df &B, const Array2Df &Y,
403                     float saturationFactor) {
404 
405     const float log10 = std::log(10.f);
406 
407 #ifdef __SSE2__
408     const vfloat log10v = F2V(log10);
409     const vfloat saturationFactorv = F2V(saturationFactor);
410     const vfloat c0 = F2V(-5.7664667f);
411     const vfloat c1 = F2V(0.416666667f);
412     const vfloat c2 = F2V(12.92f);
413     const vfloat c3 = F2V(1.055f);
414     const vfloat c4 = F2V(0.055f);
415 
416 #endif
417 /* Transform to sRGB */
418 #pragma omp parallel for
419     for (size_t i = 0; i < Y.getRows(); ++i) {
420         size_t j = 0;
421 #ifdef __SSE2__
422         for (; j < Y.getCols() - 3; j += 4) {
423             vfloat myYv = LVFU(Y(j, i)) * log10v;
424             STVFU(R(j, i), fastDecode(saturationFactorv * xlogf(LVFU(R(j, i))) + myYv, c0, c1, c2, c3, c4));
425             STVFU(G(j, i), fastDecode(saturationFactorv * xlogf(LVFU(G(j, i))) + myYv, c0, c1, c2, c3, c4));
426             STVFU(B(j, i), fastDecode(saturationFactorv * xlogf(LVFU(B(j, i))) + myYv, c0, c1, c2, c3, c4));
427         }
428 #endif
429         for (; j < Y.getCols(); ++j) {
430             float myY = Y(j, i) * log10;
431             R(j, i) = fastDecode(saturationFactor * xlogf(R(j, i)) + myY);
432             G(j, i) = fastDecode(saturationFactor * xlogf(G(j, i)) + myY);
433             B(j, i) = fastDecode(saturationFactor * xlogf(B(j, i)) + myY);
434         }
435     }
436 }
437 }
438 // tone mapping
tmo_mantiuk06_contmap(Array2Df & R,Array2Df & G,Array2Df & B,Array2Df & Y,const float contrastFactor,const float saturationFactor,float detailfactor,const int itmax,const float tol,Progress & ph)439 int tmo_mantiuk06_contmap(Array2Df &R, Array2Df &G, Array2Df &B, Array2Df &Y,
440                           const float contrastFactor,
441                           const float saturationFactor, float detailfactor,
442                           const int itmax, const float tol, Progress &ph) {
443 #ifdef TIMER_PROFILING
444     msec_timer stop_watch;
445     stop_watch.start();
446 #endif
447     assert(R.getCols() == G.getCols());
448     assert(G.getCols() == B.getCols());
449     assert(B.getCols() == Y.getCols());
450     assert(R.getRows() == G.getRows());
451     assert(G.getRows() == B.getRows());
452     assert(B.getRows() == Y.getRows());
453 
454     const size_t r = R.getRows();
455     const size_t c = R.getCols();
456     // const size_t n = r*c;
457 
458     normalizeLuminanceAndRGB(R, G, B, Y);
459     ph.setValue(2);
460 
461     // create pyramid
462     PyramidT pp(r, c);
463     ph.setValue(6);
464 
465     // calculate gradients for pyramid (Y won't be changed)
466     pp.computeGradients(Y);
467 
468     // transform gradients to R
469     pp.transformToR(detailfactor);
470     ph.setValue(13);
471 
472     // Contrast map
473     if (contrastFactor > 0.0f) {
474         // Contrast mapping
475         pp.scale(contrastFactor);
476     } else {
477         // Contrast equalization
478         contrastEqualization(pp, -contrastFactor);
479     }
480 
481     // transform R to gradients
482     pp.transformToG(detailfactor);
483     ph.setValue(40);
484 
485     // transform gradients to luminance Y (pp -> Y)
486     transformToLuminance(pp, Y, itmax, tol, ph);
487     denormalizeLuminance(Y);
488     denormalizeRGB(R, G, B, Y, saturationFactor);
489 
490 #ifdef TIMER_PROFILING
491     stop_watch.stop_and_update();
492     cout << endl;
493     cout << "tmo_mantiuk06 = " << stop_watch.get_time() << " msec" << endl;
494 #endif
495 
496     return PFSTMO_OK;
497 }
498