1 /**
2  * @Brief Contrast mapping TMO
3  *
4  * From:
5  *
6  * Rafal Mantiuk, Karol Myszkowski, Hans-Peter Seidel.
7  * A Perceptual Framework for Contrast Processing of High Dynamic Range Images
8  * In: ACM Transactions on Applied Perception 3 (3), pp. 286-308, 2006
9  * http://www.mpi-inf.mpg.de/~mantiuk/contrast_domain/
10  *
11  * This file is a part of LuminanceHDR package, based on pfstmo.
12  * ----------------------------------------------------------------------
13  * Copyright (C) 2007 Grzegorz Krawczyk
14  *
15  *  This program is free software; you can redistribute it and/or modify
16  *  it under the terms of the GNU General Public License as published by
17  *  the Free Software Foundation; either version 2 of the License, or
18  *  (at your option) any later version.
19  *
20  *  This program is distributed in the hope that it will be useful,
21  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
22  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23  *  GNU General Public License for more details.
24  *
25  *  You should have received a copy of the GNU General Public License
26  *  along with this program; if not, write to the Free Software
27  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
28  * ----------------------------------------------------------------------
29  *
30  * @author Radoslaw Mantiuk, <radoslaw.mantiuk@gmail.com>
31  * @author Rafal Mantiuk, <mantiuk@gmail.com>
32  * Updated 2007/12/17 by Ed Brambley <E.J.Brambley@damtp.cam.ac.uk>
33  *  (more information on the changes:
34  *  http://www.damtp.cam.ac.uk/user/ejb48/hdr/index.html)
35  * Updated 2008/06/25 by Ed Brambley <E.J.Brambley@damtp.cam.ac.uk>
36  *  bug fixes and openMP patches
37  *  more on this:
38  *  http://groups.google.com/group/pfstools/browse_thread/thread/de2378af98ec6185/0dee5304fc14e99d?hl=en#0dee5304fc14e99d
39  *  Optimization improvements by Lebed Dmytry
40  *
41  * Updated 2008/07/26 by Dejan Beric <dejan.beric@live.com>
42  *  Added the detail factor slider which offers more control over contrast in details
43  * Update 2010/10/06 by Axel Voitier <axel.voitier@gmail.com>
44  *  detail_factor patch in order to remove potential issues in a multithreading environment
45  * @author Davide Anastasia <davideanastasia@users.sourceforge.net>
46  *  Improvement & Clean up
47  * @author Bruce Guenter <bruce@untroubled.org>
48  *  Added trivial downsample and upsample functions when both dimension are even
49  *
50  * $Id: contrast_domain.cpp,v 1.14 2008/08/26 17:08:49 rafm Exp $
51  */
52 
53 #include <cstdio>
54 #include <cstdlib>
55 #include <cstring>
56 #include <cmath>
57 #include <iostream>
58 #include <cassert>
59 
60 #include "contrast_domain.h"
61 #include "arch/malloc.h"
62 #include "arch/math.h"
63 #include "TonemappingOperators/pfstmo.h"
64 
65 #include "Libpfs/utils/sse.h"
66 #include "Libpfs/utils/numeric.h"
67 #include "Libpfs/utils/dotproduct.h"
68 #include "Libpfs/utils/msec_timer.h"
69 
70 namespace test_mantiuk06
71 {
72 
73 #define PYRAMID_MIN_PIXELS      3
74 #define LOOKUP_W_TO_R           107
75 //#define DEBUG_MANTIUK06
76 
77 void swap_pointers(float* &pOne, float* &pTwo); // utility function
78 
79 #ifdef DEBUG_MANTIUK06
80 
81 void dump_matrix_to_file(const int width, const int height, const float* const m, const char * const file_name);
82 void matrix_show(const char* const text, int rows, int cols, const float* const data);
83 void pyramid_show(pyramid_t* pyramid);
84 
85 #endif
86 
87 static float W_table[] = {0.000000f,0.010000f,0.021180f,0.031830f,0.042628f,0.053819f,0.065556f,0.077960f,0.091140f,0.105203f,0.120255f,0.136410f,0.153788f,0.172518f,0.192739f,0.214605f,0.238282f,0.263952f,0.291817f,0.322099f,0.355040f,0.390911f,0.430009f,0.472663f,0.519238f,0.570138f,0.625811f,0.686754f,0.753519f,0.826720f,0.907041f,0.995242f,1.092169f,1.198767f,1.316090f,1.445315f,1.587756f,1.744884f,1.918345f,2.109983f,2.321863f,2.556306f,2.815914f,3.103613f,3.422694f,3.776862f,4.170291f,4.607686f,5.094361f,5.636316f,6.240338f,6.914106f,7.666321f,8.506849f,9.446889f,10.499164f,11.678143f,13.000302f,14.484414f,16.151900f,18.027221f,20.138345f,22.517282f,25.200713f,28.230715f,31.655611f,35.530967f,39.920749f,44.898685f,50.549857f,56.972578f,64.280589f,72.605654f,82.100619f,92.943020f,105.339358f,119.530154f,135.795960f,154.464484f,175.919088f,200.608905f,229.060934f,261.894494f,299.838552f,343.752526f,394.651294f,453.735325f,522.427053f,602.414859f,695.706358f,804.693100f,932.229271f,1081.727632f,1257.276717f,1463.784297f,1707.153398f,1994.498731f,2334.413424f,2737.298517f,3215.770944f,3785.169959f,4464.187290f,5275.653272f,6247.520102f,7414.094945f,8817.590551f,10510.080619f};
88 static float R_table[] = {0.000000f,0.009434f,0.018868f,0.028302f,0.037736f,0.047170f,0.056604f,0.066038f,0.075472f,0.084906f,0.094340f,0.103774f,0.113208f,0.122642f,0.132075f,0.141509f,0.150943f,0.160377f,0.169811f,0.179245f,0.188679f,0.198113f,0.207547f,0.216981f,0.226415f,0.235849f,0.245283f,0.254717f,0.264151f,0.273585f,0.283019f,0.292453f,0.301887f,0.311321f,0.320755f,0.330189f,0.339623f,0.349057f,0.358491f,0.367925f,0.377358f,0.386792f,0.396226f,0.405660f,0.415094f,0.424528f,0.433962f,0.443396f,0.452830f,0.462264f,0.471698f,0.481132f,0.490566f,0.500000f,0.509434f,0.518868f,0.528302f,0.537736f,0.547170f,0.556604f,0.566038f,0.575472f,0.584906f,0.594340f,0.603774f,0.613208f,0.622642f,0.632075f,0.641509f,0.650943f,0.660377f,0.669811f,0.679245f,0.688679f,0.698113f,0.707547f,0.716981f,0.726415f,0.735849f,0.745283f,0.754717f,0.764151f,0.773585f,0.783019f,0.792453f,0.801887f,0.811321f,0.820755f,0.830189f,0.839623f,0.849057f,0.858491f,0.867925f,0.877358f,0.886792f,0.896226f,0.905660f,0.915094f,0.924528f,0.933962f,0.943396f,0.952830f,0.962264f,0.971698f,0.981132f,0.990566f,1.000000f};
89 
imin(int a,int b)90  int imin(int a, int b)
91 {
92   return a < b ? a : b;
93 }
94 
95 // upsample the matrix
96 // upsampled matrix is twice bigger in each direction than data[]
97 // res should be a pointer to allocated memory for bigger matrix
98 // cols and rows are the dimensions of the output matrix
matrix_upsample_full(const int outCols,const int outRows,const float * const in,float * const out)99 void matrix_upsample_full(const int outCols, const int outRows, const float* const in, float* const out)
100 {
101   const int inRows = outRows/2;
102   const int inCols = outCols/2;
103 
104   // Transpose of experimental downsampling matrix (theoretically the correct thing to do)
105 
106   const float dx = (float)inCols / ((float)outCols);
107   const float dy = (float)inRows / ((float)outRows);
108   const float factor = 1.0f / (dx*dy); // This gives a genuine upsampling matrix, not the transpose of the downsampling matrix
109   // const float factor = 1.0f; // Theoretically, this should be the best.
110 
111   for (int y = 0; y < outRows; y++)
112   {
113     const float sy = y * dy;
114     const int iy1 =      (  y   * inRows) / outRows;
115     const int iy2 = imin(((y+1) * inRows) / outRows, inRows-1);
116 
117     for (int x = 0; x < outCols; x++)
118     {
119       const float sx = x * dx;
120       const int ix1 =      (  x   * inCols) / outCols;
121       const int ix2 = imin(((x+1) * inCols) / outCols, inCols-1);
122 
123       out[x + y*outCols] = (((ix1+1) - sx)*((iy1+1 - sy)) * in[ix1 + iy1*inCols] +
124                             ((ix1+1) - sx)*(sy+dy - (iy1+1)) * in[ix1 + iy2*inCols] +
125                             (sx+dx - (ix1+1))*((iy1+1 - sy)) * in[ix2 + iy1*inCols] +
126                             (sx+dx - (ix1+1))*(sy+dx - (iy1+1)) * in[ix2 + iy2*inCols])*factor;
127     }
128   }
129 }
130 
matrix_upsample_simple(const int outCols,const int outRows,const float * const in,float * const out)131 void matrix_upsample_simple(const int outCols, const int outRows, const float* const in, float* const out)
132 {
133   for (int y = 0; y < outRows; y++)
134   {
135     const int iy1 = y / 2;
136     float* outp = out + y*outCols;
137     const float* inp = in + iy1*(outCols/2);
138     for (int x = 0; x < outCols; x+=2)
139     {
140       const int ix1 = x / 2;
141       outp[x] = outp[x+1] = inp[ix1];
142     }
143   }
144 }
145 
matrix_upsample(const int outCols,const int outRows,const float * const in,float * const out)146 void matrix_upsample(const int outCols, const int outRows, const float* const in, float* const out)
147 {
148   if (outRows%2 == 0 && outCols%2 == 0)
149     matrix_upsample_simple(outCols, outRows, in, out);
150   else
151     matrix_upsample_full(outCols, outRows, in, out);
152 }
153 
154 // downsample the matrix
matrix_downsample_full(const int inCols,const int inRows,const float * const data,float * const res)155 void matrix_downsample_full(const int inCols, const int inRows, const float* const data, float* const res)
156 {
157   const int outRows = inRows / 2;
158   const int outCols = inCols / 2;
159 
160   const float dx = (float)inCols / ((float)outCols);
161   const float dy = (float)inRows / ((float)outRows);
162 
163   // New downsampling by Ed Brambley:
164   // Experimental downsampling that assumes pixels are square and
165   // integrates over each new pixel to find the average value of the
166   // underlying pixels.
167   //
168   // Consider the original pixels laid out, and the new (larger)
169   // pixels layed out over the top of them.  Then the new value for
170   // the larger pixels is just the integral over that pixel of what
171   // shows through; i.e., the values of the pixels underneath
172   // multiplied by how much of that pixel is showing.
173   //
174   // (ix1, iy1) is the coordinate of the top left visible pixel.
175   // (ix2, iy2) is the coordinate of the bottom right visible pixel.
176   // (fx1, fy1) is the fraction of the top left pixel showing.
177   // (fx2, fy2) is the fraction of the bottom right pixel showing.
178 
179   const float normalize = 1.0f/(dx*dy);
180   for (int y = 0; y < outRows; y++)
181   {
182       const int iy1 = (  y   * inRows) / outRows;
183       const int iy2 = ((y+1) * inRows) / outRows;
184       const float fy1 = (iy1+1) - y * dy;
185       const float fy2 = (y+1) * dy - iy2;
186 
187       for (int x = 0; x < outCols; x++)
188       {
189           const int ix1 = (  x   * inCols) / outCols;
190           const int ix2 = ((x+1) * inCols) / outCols;
191           const float fx1 = (ix1+1) - x * dx;
192           const float fx2 = (x+1) * dx - ix2;
193 
194           float pixVal = 0.0f;
195           float factorx, factory;
196           for (int i = iy1; i <= iy2 && i < inRows; i++)
197           {
198 	      if (i == iy1)
199                   factory = fy1;  // We're just getting the bottom edge of this pixel
200 	      else if (i == iy2)
201                   factory = fy2;  // We're just gettting the top edge of this pixel
202 	      else
203                   factory = 1.0f; // We've got the full height of this pixel
204 	      for (int j = ix1; j <= ix2 && j < inCols; j++)
205               {
206                   if (j == ix1)
207                       factorx = fx1;  // We've just got the right edge of this pixel
208                   else if (j == ix2)
209                       factorx = fx2; // We've just got the left edge of this pixel
210                   else
211                       factorx = 1.0f; // We've got the full width of this pixel
212 
213                   pixVal += data[j + i*inCols] * factorx * factory;
214               }
215           }
216 
217           res[x + y * outCols] = pixVal * normalize;  // Normalize by the area of the new pixel
218       }
219   }
220 }
221 
matrix_downsample_simple(const int inCols,const int inRows,const float * const data,float * const res)222 void matrix_downsample_simple(const int inCols, const int inRows, const float* const data, float* const res)
223 {
224     const int outRows = inRows / 2;
225     const int outCols = inCols / 2;
226 
227     // Simplified downsampling by Bruce Guenter:
228     //
229     // Follows exactly the same math as the full downsampling above,
230     // except that inRows and inCols are known to be even.  This allows
231     // for all of the boundary cases to be eliminated, reducing the
232     // sampling to a simple average.
233 
234     for (int y = 0; y < outRows; y++)
235     {
236         const int iy1 = y * 2;
237         const float* datap = data + iy1 * inCols;
238         float* resp = res + y * outCols;
239 
240         for (int x = 0; x < outCols; x++)
241         {
242             const int ix1 = x*2;
243 
244             resp[x] = ( datap[ix1]
245                         + datap[ix1+1]
246                         + datap[ix1   + inCols]
247                         + datap[ix1+1 + inCols]) / 4.0f;
248         }
249     }
250 }
251 
matrix_downsample(const int inCols,const int inRows,const float * const data,float * const res)252 void matrix_downsample(const int inCols, const int inRows, const float* const data, float* const res)
253 {
254   if (inCols % 2 == 0 && inRows % 2 == 0)
255     matrix_downsample_simple(inCols, inRows, data, res);
256   else
257     matrix_downsample_full(inCols, inRows, data, res);
258 }
259 
260 // return = a - b
matrix_subtract(const int n,const float * const a,float * const b)261  void matrix_subtract(const int n, const float* const a, float* const b)
262 {
263     pfs::utils::vsub(a, b, b, n);
264 }
265 
266 // copy matix a to b, return = a
matrix_copy(const int n,const float * const a,float * const b)267  void matrix_copy(const int n, const float* const a, float* const b)
268 {
269      std::copy(a, a + n, b);
270 }
271 
272 // multiply matrix a by scalar val
matrix_multiply_const(const int n,float * const a,const float val)273 void matrix_multiply_const(const int n, float* const a, const float val)
274 {
275      pfs::utils::vsmul(a, val, a, n);
276 }
277 
278 // alloc memory for the float table
matrix_alloc(int size)279  float* matrix_alloc(int size)
280 {
281 #ifdef __APPLE__
282   float* m  = (float*)malloc      (sizeof(float)*size);
283 #else
284   float* m    = (float*)_mm_malloc  (sizeof(float)*size, 32);
285 #endif
286   if (m == NULL)
287   {
288     fprintf(stderr, "ERROR: malloc in matrix_alloc() (size:%d)", size);
289     exit(155);
290   }
291   return m;
292 }
293 
294 // free memory for matrix
matrix_free(float * m)295  void matrix_free(float* m)
296 {
297   if (m != NULL)
298   {
299 #ifdef __APPLE__
300     free(m);
301 #else
302     _mm_free(m);
303 #endif
304     m = NULL;
305   }
306   else
307   {
308     fprintf(stderr, "ERROR: This pointer has already been freed");
309   }
310 }
311 
312 // multiply vector by vector (each vector should have one dimension equal to 1)
matrix_DotProduct(const int n,const float * const a,const float * const b)313 float matrix_DotProduct(const int n, const float* const a, const float* const b)
314 {
315     return pfs::utils::dotProduct(a, b, n);
316 }
317 
318 // set zeros for matrix elements
matrix_zero(int n,float * m)319  void matrix_zero(int n, float* m)
320 {
321   //bzero(m, n*sizeof(float));
322   memset(m, 0, n*sizeof(float));
323 }
324 
325 // Davide Anastasia <davideanastasia@users.sourceforge.net> (2010 08 31)
326 // calculate divergence of two gradient maps (Gx and Gy)
327 // divG(x,y) = Gx(x,y) - Gx(x-1,y) + Gy(x,y) - Gy(x,y-1)
calculate_and_add_divergence(const int COLS,const int ROWS,const float * const Gx,const float * const Gy,float * const divG)328 void calculate_and_add_divergence(const int COLS, const int ROWS, const float* const Gx, const float* const Gy, float* const divG)
329 {
330     float divGx, divGy;
331     {
332         {
333             // kx = 0 AND ky = 0;
334             divG[0] += Gx[0] + Gy[0];                       // OUT
335 
336             // ky = 0
337             for (int kx=1; kx<COLS; kx++)
338             {
339                 divGx = Gx[kx] - Gx[kx - 1];
340                 divGy = Gy[kx];
341                 divG[kx] += divGx + divGy;                    // OUT
342             }
343         }
344         {
345             for (int ky=1; ky<ROWS; ky++)
346             {
347                 // kx = 0
348                 divGx = Gx[ky*COLS];
349                 divGy = Gy[ky*COLS] - Gy[ky*COLS - COLS];
350                 divG[ky*COLS] += divGx + divGy;               // OUT
351 
352                 // kx > 0
353                 for(int kx=1; kx<COLS; kx++)
354                 {
355                     divGx = Gx[kx + ky*COLS] - Gx[kx + ky*COLS-1];
356                     divGy = Gy[kx + ky*COLS] - Gy[kx + ky*COLS - COLS];
357                     divG[kx + ky*COLS] += divGx + divGy;        // OUT
358                 }
359             }
360         }
361     }   // END PARALLEL SECTIONS
362 }
363 
364 // Calculate the sum of divergences for the all pyramid level
365 // The smaller divergence map is upsampled and added to the divergence map for the higher level of pyramid
pyramid_calculate_divergence_sum(pyramid_t * pyramid,float * divG_sum)366 void pyramid_calculate_divergence_sum(pyramid_t* pyramid, float* divG_sum)
367 {
368   float* temp = matrix_alloc((pyramid->rows*pyramid->cols)/4);
369 
370   // Find the coarsest pyramid, and the number of pyramid levels
371   bool swap = true;
372   while (pyramid->next != NULL)
373   {
374     swap = (!swap);
375     pyramid = pyramid->next;
376   }
377 
378   // For every level, we swap temp and divG_sum
379   // So, if there are an odd number of levels...
380   if (swap) swap_pointers(divG_sum, temp);
381 
382   if (pyramid)
383   {
384     matrix_zero(pyramid->rows * pyramid->cols, temp);
385     calculate_and_add_divergence(pyramid->cols, pyramid->rows, pyramid->Gx, pyramid->Gy, temp);
386 
387     swap_pointers(divG_sum, temp);
388     pyramid = pyramid->prev;
389   }
390 
391   while (pyramid)
392   {
393     matrix_upsample(pyramid->cols, pyramid->rows, divG_sum, temp);
394     calculate_and_add_divergence(pyramid->cols, pyramid->rows, pyramid->Gx, pyramid->Gy, temp);
395 
396     swap_pointers(divG_sum, temp);
397     pyramid = pyramid->prev;
398   }
399 
400   matrix_free(temp);
401 }
402 
403 // calculate scale factors (Cx,Cy) for gradients (Gx,Gy)
404 // C is equal to EDGE_WEIGHT for gradients smaller than GFIXATE or 1.0 otherwise
calculate_scale_factor(const int n,const float * const G,float * const C)405  void calculate_scale_factor(const int n, const float* const G, float* const C)
406 {
407   //  float GFIXATE = 0.1f;
408   //  float EDGE_WEIGHT = 0.01f;
409   const float detectT = 0.001f;
410   const float a = 0.038737f;
411   const float b = 0.537756f;
412 
413   for(int i=0; i<n; i++)
414   {
415     //#if 1
416     const float g = std::max( detectT, fabsf(G[i]) );
417     C[i] = 1.0f / (a*powf(g,b));
418     //#else
419     //    if(fabsf(G[i]) < GFIXATE)
420     //      C[i] = 1.0f / EDGE_WEIGHT;
421     //    else
422     //      C[i] = 1.0f;
423     //#endif
424   }
425 }
426 
427 // calculate scale factor for the whole pyramid
pyramid_calculate_scale_factor(const pyramid_t * pyramid,pyramid_t * pC)428 void pyramid_calculate_scale_factor(const pyramid_t* pyramid, pyramid_t* pC)
429 {
430   while (pyramid != NULL)
431   {
432     calculate_scale_factor(pyramid->rows * pyramid->cols, pyramid->Gx, pC->Gx);
433     calculate_scale_factor(pyramid->rows * pyramid->cols, pyramid->Gy, pC->Gy);
434 
435     pyramid = pyramid->next;
436     pC = pC->next;
437   }
438 }
439 
440 // Scale gradient (Gx and Gy) by C (Cx and Cy)
441 // G = G * C
scale_gradient(const int n,float * G,const float * C)442  void scale_gradient(const int n, float* G, const float* C)
443 {
444     pfs::utils::vmul(G, C, G, n);
445 }
446 
447 // scale gradients for the whole one pyramid with the use of (Cx,Cy) from the other pyramid
pyramid_scale_gradient(pyramid_t * pyramid,const pyramid_t * pC)448 void pyramid_scale_gradient(pyramid_t* pyramid, const pyramid_t* pC)
449 {
450   while (pyramid != NULL)
451   {
452     scale_gradient(pyramid->rows * pyramid->cols, pyramid->Gx, pC->Gx);
453     scale_gradient(pyramid->rows * pyramid->cols, pyramid->Gy, pC->Gy);
454 
455     pyramid = pyramid->next;
456     pC = pC->next;
457   }
458 }
459 
460 
461 // free memory allocated for the pyramid
pyramid_free(pyramid_t * pyramid)462 void pyramid_free(pyramid_t* pyramid)
463 {
464   pyramid_t* t_next; // = pyramid->next;
465 
466   while (pyramid)
467   {
468     t_next = pyramid->next;
469 
470     if (pyramid->Gx != NULL)
471     {
472       matrix_free(pyramid->Gx);   //free(pyramid->Gx);
473       pyramid->Gx = NULL;
474     }
475     if (pyramid->Gy != NULL)
476     {
477       matrix_free(pyramid->Gy);   //free(pyramid->Gy);
478       pyramid->Gy = NULL;
479     }
480 
481     //pyramid->prev = NULL;
482     //pyramid->next = NULL;
483 
484     free(pyramid);
485     pyramid = t_next;
486   }
487 }
488 
489 
490 // allocate memory for the pyramid
pyramid_allocate(int cols,int rows)491 pyramid_t * pyramid_allocate(int cols, int rows)
492 {
493   pyramid_t* level = NULL;
494   pyramid_t* pyramid = NULL;
495   pyramid_t* prev = NULL;
496 
497   while (rows >= PYRAMID_MIN_PIXELS && cols >= PYRAMID_MIN_PIXELS)
498   {
499       assert(rows != 0);
500       assert(cols != 0);
501 
502     level = (pyramid_t *) malloc(sizeof(pyramid_t));
503     if (level == NULL)
504     {
505       fprintf(stderr, "ERROR: malloc in pyramid_alloc() (size:%zu)", sizeof(pyramid_t) );
506       exit(155);
507     }
508     memset( level, 0, sizeof(pyramid_t) );
509 
510     level->rows = rows;
511     level->cols = cols;
512     const int size = level->rows * level->cols;
513 
514     assert(size != 0);
515 
516     level->Gx = matrix_alloc(size);
517     if (level->Gx == NULL)
518     {
519       fprintf(stderr, "ERROR: malloc in pyramid_alloc() (size:%zu)", sizeof(pyramid_t) );
520       exit(155);
521     }
522     level->Gy = matrix_alloc(size);
523     if (level->Gy == NULL)
524     {
525       fprintf(stderr, "ERROR: malloc in pyramid_alloc() (size:%zu)", sizeof(pyramid_t) );
526       exit(155);
527     }
528 
529     level->prev = prev;
530     if (prev != NULL)
531       prev->next = level;
532     prev = level;
533 
534     if (pyramid == NULL)
535       pyramid = level;
536 
537     rows /= 2;
538     cols /= 2;
539   }
540 
541   return pyramid;
542 }
543 
544 
545 // calculate gradients
calculate_gradient(const int COLS,const int ROWS,const float * const lum,float * const Gx,float * const Gy)546  void calculate_gradient(const int COLS, const int ROWS, const float* const lum, float* const Gx, float* const Gy)
547 {
548   int Y_IDX, IDX;
549 
550   for (int ky = 0; ky < ROWS-1; ky++)
551   {
552     Y_IDX = ky*COLS;
553     for (int kx = 0; kx < COLS-1; kx++)
554     {
555       IDX = Y_IDX + kx;
556 
557       Gx[IDX] = lum[IDX + 1]    - lum[IDX];
558       Gy[IDX] = lum[IDX + COLS] - lum[IDX];
559     }
560 
561     Gx[Y_IDX + COLS - 1] = 0.0f; // last columns (kx = COLS - 1)
562     Gy[Y_IDX + COLS - 1] = lum[Y_IDX + COLS - 1 + COLS] - lum[Y_IDX + COLS - 1];
563   }
564 
565   // last row (ky = ROWS-1)
566   for (int kx = 0; kx < (COLS-1); kx++)
567   {
568     IDX = (ROWS - 1)*COLS + kx;
569 
570     Gx[IDX] = lum[IDX + 1] - lum[IDX];
571     Gy[IDX] = 0.0f;
572   }
573 
574   // last row & last col = last element
575   Gx[ROWS*COLS - 1] = 0.0f;
576   Gy[ROWS*COLS - 1] = 0.0f;
577 }
578 
swap_pointers(float * & pOne,float * & pTwo)579 void swap_pointers(float* &pOne, float* &pTwo)
580 {
581   float* pTemp = pOne;
582   pOne = pTwo;
583   pTwo = pTemp;
584 }
585 
586 // calculate gradients for the pyramid
587 // lum_temp WILL NOT BE overwritten!
pyramid_calculate_gradient(pyramid_t * pyramid,const float * Y)588 void pyramid_calculate_gradient(pyramid_t* pyramid, const float* Y)
589 {
590   float* buffer1 = matrix_alloc((pyramid->rows*pyramid->cols)/4); // /4
591   float* buffer2 = matrix_alloc((pyramid->rows*pyramid->cols)/16); // /16
592 
593   float* p_t1 = buffer1;
594   float* p_t2 = buffer2;
595 
596   calculate_gradient(pyramid->cols, pyramid->rows, Y, pyramid->Gx, pyramid->Gy);
597 
598   pyramid_t* py_curr = pyramid->next;
599   pyramid_t* py_prev = py_curr->prev;
600 
601   if (py_curr)
602   {
603     matrix_downsample(py_prev->cols, py_prev->rows, Y, p_t1);
604     calculate_gradient(py_curr->cols, py_curr->rows, p_t1, py_curr->Gx, py_curr->Gy);
605 
606     py_prev = py_curr;
607     py_curr = py_curr->next;
608   }
609 
610   while (py_curr)
611   {
612     matrix_downsample(py_prev->cols, py_prev->rows, p_t1, p_t2);
613     calculate_gradient(py_curr->cols, py_curr->rows, p_t2, py_curr->Gx, py_curr->Gy);
614 
615     // swap pointers
616     swap_pointers(p_t1, p_t2);
617 
618     py_prev = py_curr;
619     py_curr = py_curr->next;
620   }
621 
622   matrix_free(buffer1);
623   matrix_free(buffer2);
624 }
625 
626 // divG_sum = A * x = sum(divG(x))
multiplyA(pyramid_t * px,pyramid_t * pC,const float * x,float * divG_sum)627 void multiplyA(pyramid_t* px, pyramid_t* pC, const float* x, float* divG_sum)
628 {
629   pyramid_calculate_gradient(px, x);                // x won't be changed
630   pyramid_scale_gradient(px, pC);                   // scale gradients by Cx,Cy from main pyramid
631   pyramid_calculate_divergence_sum(px, divG_sum);   // calculate the sum of divergences
632 }
633 
634 
635 // conjugate linear equation solver
636 // overwrites pyramid!
637 // This version is a slightly modified version by Davide Anastasia <davideanastasia@users.sourceforge.net>
638 // March 25, 2011
lincg(pyramid_t * pyramid,pyramid_t * pC,const float * const b,float * const x,const int itmax,const float tol,ProgressHelper * ph)639 void lincg(pyramid_t* pyramid, pyramid_t* pC, const float* const b, float* const x, const int itmax, const float tol, ProgressHelper *ph)
640 {
641   const int num_backwards_ceiling = 3;
642 
643   float rdotr_curr, rdotr_prev, rdotr_best;
644   float alpha, beta;
645 
646 #ifdef TIMER_PROFILING
647   msec_timer f_timer;
648   f_timer.start();
649 #endif
650 
651   const int rows = pyramid->rows;
652   const int cols = pyramid->cols;
653   const int n = rows*cols;
654   const float tol2 = tol*tol;
655 
656   float* const x_best = matrix_alloc(n);
657   float* const r = matrix_alloc(n);
658   float* const p = matrix_alloc(n);
659   float* const Ap = matrix_alloc(n);
660 
661   // bnrm2 = ||b||
662   const float bnrm2 = matrix_DotProduct(n, b, b);
663 
664   // r = b - Ax
665   multiplyA(pyramid, pC, x, r);     // r = A x
666   matrix_subtract(n, b, r);         // r = b - r
667 
668   // rdotr = r.r
669   rdotr_best = rdotr_curr = matrix_DotProduct(n, r, r);
670 
671   // Setup initial vector
672   matrix_copy(n, r, p);             // p = r
673   matrix_copy(n, x, x_best);
674 
675   const float irdotr = rdotr_curr;
676   const float percent_sf = 100.0f/logf(tol2*bnrm2/irdotr);
677 
678   int iter = 0;
679   int num_backwards = 0;
680   for (; iter < itmax; iter++)
681   {
682     // TEST
683     ph->setValue( (int) (logf(rdotr_curr/irdotr)*percent_sf));
684     // User requested abort
685     if (ph->canceled() && iter > 0 )
686     {
687       break;
688     }
689 
690     // Ap = A p
691     multiplyA(pyramid, pC, p, Ap);
692 
693     // alpha = r.r / (p . Ap)
694     alpha = rdotr_curr / matrix_DotProduct(n, p, Ap);
695 
696     // r = r - alpha Ap
697     for (int i = 0; i < n; i++)
698     {
699       r[i] -= alpha * Ap[i];
700     }
701 
702     // rdotr = r.r
703     rdotr_prev = rdotr_curr;
704     rdotr_curr = matrix_DotProduct(n, r, r);
705 
706     // Have we gone unstable?
707     if (rdotr_curr > rdotr_prev)
708     {
709       // Save where we've got to
710       if (num_backwards == 0 && rdotr_prev < rdotr_best)
711 	    {
712 	      rdotr_best = rdotr_prev;
713 	      matrix_copy(n, x, x_best);
714 	    }
715 
716       num_backwards++;
717     }
718     else
719     {
720       num_backwards = 0;
721     }
722 
723     // x = x + alpha * p
724     for (int i = 0; i < n; i++)
725     {
726       x[i] += alpha * p[i];
727     }
728 
729     // Exit if we're done
730     // fprintf(stderr, "iter:%d err:%f\n", iter+1, sqrtf(rdotr/bnrm2));
731     if (rdotr_curr/bnrm2 < tol2)
732       break;
733 
734     if (num_backwards > num_backwards_ceiling)
735     {
736       // Reset
737       num_backwards = 0;
738       matrix_copy(n, x_best, x);
739 
740       // r = Ax
741       multiplyA(pyramid, pC, x, r);
742 
743       // r = b - r
744       matrix_subtract(n, b, r);
745 
746       // rdotr = r.r
747       rdotr_best = rdotr_curr = matrix_DotProduct(n, r, r);
748 
749       // p = r
750       matrix_copy(n, r, p);
751     }
752     else
753     {
754       // p = r + beta p
755       beta = rdotr_curr/rdotr_prev;
756 
757       for (int i = 0; i < n; i++)
758       {
759         p[i] = r[i] + beta*p[i];
760       }
761     }
762   }
763 
764   // Use the best version we found
765   if (rdotr_curr > rdotr_best)
766   {
767     rdotr_curr = rdotr_best;
768     matrix_copy(n, x_best, x);
769   }
770 
771   if (rdotr_curr/bnrm2 > tol2)
772   {
773     // Not converged
774     ph->setValue( (int) (logf(rdotr_curr/irdotr)*percent_sf));
775     if (iter == itmax)
776     {
777       fprintf(stderr, "\npfstmo_mantiuk06: Warning: Not converged (hit maximum iterations), error = %g (should be below %g).\n", sqrtf(rdotr_curr/bnrm2), tol);
778     }
779     else
780     {
781       fprintf(stderr, "\npfstmo_mantiuk06: Warning: Not converged (going unstable), error = %g (should be below %g).\n", sqrtf(rdotr_curr/bnrm2), tol);
782     }
783   }
784   else
785   {
786     ph->setValue( itmax );
787   }
788 
789   matrix_free(x_best);
790   matrix_free(p);
791   matrix_free(Ap);
792   matrix_free(r);
793 
794 #ifdef TIMER_PROFILING
795   f_timer.stop_and_update();
796   std::cout << "lincg() = " << f_timer.get_time() << " msec" << std::endl;
797 #endif
798 }
799 
800 // in_tab and out_tab should contain inccreasing float values
lookup_table(const int n,const float * const in_tab,const float * const out_tab,const float val)801  float lookup_table(const int n, const float* const in_tab, const float* const out_tab, const float val)
802 {
803   if (unlikely(val < in_tab[0]))
804     return out_tab[0];
805 
806   for (int j = 1; j < n; j++)
807   {
808     if (val < in_tab[j])
809     {
810       const float dd = (val - in_tab[j-1]) / (in_tab[j] - in_tab[j-1]);
811       return out_tab[j-1] + (out_tab[j] - out_tab[j-1]) * dd;
812     }
813   }
814 
815   return out_tab[n-1];
816 }
817 
818 
819 // transform gradient (Gx,Gy) to R
transform_to_R(const int n,float * const G,float detail_factor)820  void transform_to_R(const int n, float* const G, float detail_factor)
821 {
822   const float log10=2.3025850929940456840179914546844*detail_factor;
823 
824   for (int j=0; j<n; j++)
825   {
826     // G to W
827     float Curr_G = G[j];
828 
829     if (Curr_G < 0.0f)
830     {
831       Curr_G = -(powf(10, (-Curr_G) * log10) - 1.0f);
832     } else {
833       Curr_G = (powf(10, Curr_G * log10) - 1.0f);
834     }
835     // W to RESP
836     if (Curr_G < 0.0f)
837     {
838       Curr_G = -lookup_table(LOOKUP_W_TO_R, W_table, R_table, -Curr_G);
839     } else {
840       Curr_G = lookup_table(LOOKUP_W_TO_R, W_table, R_table, Curr_G);
841     }
842 
843     G[j] = Curr_G;
844   }
845 }
846 
847 // transform from R to G
transform_to_G(const int n,float * const R,float detail_factor)848  void transform_to_G(const int n, float* const R, float detail_factor)
849 {
850   //here we are actually changing the base of logarithm
851   const float log10 = 2.3025850929940456840179914546844*detail_factor;
852 
853   for(int j=0; j<n; j++)
854   {
855     float Curr_R = R[j];
856 
857     // RESP to W
858     if (Curr_R < 0.0f)
859     {
860       Curr_R = -lookup_table(LOOKUP_W_TO_R, R_table, W_table, -Curr_R);
861     } else {
862       Curr_R = lookup_table(LOOKUP_W_TO_R, R_table, W_table, Curr_R);
863     }
864 
865     // W to G
866     if (Curr_R < 0.0f)
867     {
868       Curr_R = -log((-Curr_R) + 1.0f) / log10;
869     } else {
870       Curr_R = log(Curr_R + 1.0f) / log10;
871     }
872 
873     R[j] = Curr_R;
874   }
875 }
876 
877 
878 // transform gradient (Gx,Gy) to R for the whole pyramid
pyramid_transform_to_R(pyramid_t * pyramid,float detail_factor)879 void pyramid_transform_to_R(pyramid_t* pyramid, float detail_factor)
880 {
881   while (pyramid != NULL)
882   {
883     transform_to_R(pyramid->rows * pyramid->cols, pyramid->Gx, detail_factor);
884     transform_to_R(pyramid->rows * pyramid->cols, pyramid->Gy, detail_factor);
885 
886     pyramid = pyramid->next;
887   }
888 }
889 
890 
891 
892 // transform from R to G for the pyramid
pyramid_transform_to_G(pyramid_t * pyramid,float detail_factor)893 void pyramid_transform_to_G(pyramid_t* pyramid, float detail_factor)
894 {
895   while (pyramid != NULL)
896   {
897     transform_to_G(pyramid->rows*pyramid->cols, pyramid->Gx, detail_factor);
898     transform_to_G(pyramid->rows*pyramid->cols, pyramid->Gy, detail_factor);
899 
900     pyramid = pyramid->next;
901   }
902 }
903 
904 // multiply gradient (Gx,Gy) values by float scalar value for the whole pyramid
pyramid_gradient_multiply(pyramid_t * pyramid,const float val)905 void pyramid_gradient_multiply(pyramid_t* pyramid, const float val)
906 {
907   while (pyramid != NULL)
908   {
909     matrix_multiply_const(pyramid->rows*pyramid->cols, pyramid->Gx, val);
910     matrix_multiply_const(pyramid->rows*pyramid->cols, pyramid->Gy, val);
911 
912     pyramid = pyramid->next;
913   }
914 }
915 
916 
sort_float(const void * const v1,const void * const v2)917 int sort_float(const void* const v1, const void* const v2)
918 {
919   if (*((float*)v1) < *((float*)v2))
920     return -1;
921 
922   if(likely(*((float*)v1) > *((float*)v2)))
923     return 1;
924 
925   return 0;
926 }
927 
928 
929 // transform gradients to luminance
transform_to_luminance(pyramid_t * pp,float * x,ProgressHelper * ph,const int itmax,const float tol)930 void transform_to_luminance(pyramid_t* pp, float* x, ProgressHelper *ph, const int itmax, const float tol)
931 {
932   pyramid_t* pC = pyramid_allocate(pp->cols, pp->rows);
933   pyramid_calculate_scale_factor(pp, pC);             // calculate (Cx,Cy)
934   pyramid_scale_gradient(pp, pC);                     // scale small gradients by (Cx,Cy);
935 
936   float* b = matrix_alloc(pp->cols * pp->rows);
937   pyramid_calculate_divergence_sum(pp, b);            // calculate the sum of divergences (equal to b)
938 
939   // calculate luminances from gradients
940   lincg(pp, pC, b, x, itmax, tol, ph);
941 
942   matrix_free(b);
943   pyramid_free(pC);
944 }
945 
946 
947 struct hist_data
948 {
949   float size;
950   float cdf;
951   int index;
952 };
953 
hist_data_order(const void * const v1,const void * const v2)954 int hist_data_order(const void* const v1, const void* const v2)
955 {
956   if (((struct hist_data*) v1)->size < ((struct hist_data*) v2)->size)
957     return -1;
958 
959   if (((struct hist_data*) v1)->size > ((struct hist_data*) v2)->size)
960     return 1;
961 
962   return 0;
963 }
964 
965 
hist_data_index(const void * const v1,const void * const v2)966 int hist_data_index(const void* const v1, const void* const v2)
967 {
968   return ((struct hist_data*) v1)->index - ((struct hist_data*) v2)->index;
969 }
970 
971 
contrast_equalization(pyramid_t * pp,const float contrastFactor)972 void contrast_equalization(pyramid_t *pp, const float contrastFactor)
973 {
974   // Count sizes
975   int total_pixels = 0;
976   pyramid_t* l = pp;
977   while (l != NULL)
978   {
979     total_pixels += l->rows * l->cols;
980     l = l->next;
981   }
982 
983   // Allocate memory
984   struct hist_data* hist = (struct hist_data*) malloc(sizeof(struct hist_data) * total_pixels);
985   if (hist == NULL)
986   {
987     fprintf(stderr, "ERROR: malloc in contrast_equalization() (size:%zu)", sizeof(struct hist_data) * total_pixels);
988     exit(155);
989   }
990 
991   // Build histogram info
992   l = pp;
993   int index = 0;
994   while ( l != NULL )
995   {
996     const int pixels = l->rows*l->cols;
997     const int offset = index;
998     for(int c = 0; c < pixels; c++)
999     {
1000       hist[c+offset].size = sqrtf( l->Gx[c]*l->Gx[c] + l->Gy[c]*l->Gy[c] );
1001       hist[c+offset].index = c + offset;
1002     }
1003     index += pixels;
1004     l = l->next;
1005   }
1006 
1007   // Generate histogram
1008   qsort(hist, total_pixels, sizeof(struct hist_data), hist_data_order);
1009   assert( hist[0].size < hist[total_pixels-1].size );
1010 
1011   // Calculate cdf
1012   const float norm = 1.0f / (float) total_pixels;
1013   for (int i = 0; i < total_pixels; i++)
1014   {
1015     hist[i].cdf = ((float) i) * norm;
1016   }
1017 
1018   // Recalculate in terms of indexes
1019   qsort(hist, total_pixels, sizeof(struct hist_data), hist_data_index);
1020   assert( hist[0].index < hist[total_pixels-1].index );
1021   assert( hist[0].index == 0 );
1022 
1023 
1024   //Remap gradient magnitudes
1025   l = pp;
1026   index = 0;
1027   while  ( l != NULL )
1028   {
1029     const int pixels = l->rows*l->cols;
1030     const int offset = index;
1031 
1032     for( int c = 0; c < pixels; c++)
1033     {
1034       const float scale = contrastFactor * hist[c+offset].cdf/hist[c+offset].size;
1035       l->Gx[c] *= scale;
1036       l->Gy[c] *= scale;
1037     }
1038     index += pixels;
1039     l = l->next;
1040   }
1041 
1042   free(hist);
1043 }
1044 
1045 
1046 // tone mapping
tmo_mantiuk06_contmap(const int c,const int r,float * const R,float * const G,float * const B,float * const Y,const float contrastFactor,const float saturationFactor,float detailfactor,const int itmax,const float tol,ProgressHelper * ph)1047 int tmo_mantiuk06_contmap(const int c, const int r, float* const R, float* const G, float* const B, float* const Y, const float contrastFactor, const float saturationFactor, float detailfactor, const int itmax, const float tol, ProgressHelper *ph)
1048 {
1049   const int n = c*r;
1050 
1051   /* Normalize */
1052   float Ymax = Y[0];
1053   for (int j = 1; j < n; j++)
1054     if (Y[j] > Ymax)
1055       Ymax = Y[j];
1056 
1057   const float clip_min = 1e-7f*Ymax;
1058 
1059   for (int j = 0; j < n; j++)
1060   {
1061     if ( unlikely(R[j] < clip_min) ) R[j] = clip_min;
1062     if ( unlikely(G[j] < clip_min) ) G[j] = clip_min;
1063     if ( unlikely(B[j] < clip_min) ) B[j] = clip_min;
1064     if ( unlikely(Y[j] < clip_min) ) Y[j] = clip_min;
1065   }
1066 
1067   for(int j=0;j<n;j++)
1068   {
1069     R[j] /= Y[j];
1070     G[j] /= Y[j];
1071     B[j] /= Y[j];
1072     Y[j] = log10f(Y[j]);
1073   }
1074 
1075   pyramid_t* pp = pyramid_allocate(c, r);                 // create pyramid
1076 
1077   pyramid_calculate_gradient(pp, Y);                      // calculate gradients for pyramid (Y won't be changed)
1078   pyramid_transform_to_R(pp, detailfactor);               // transform gradients to R
1079 
1080   /* Contrast map */
1081   if (contrastFactor > 0.0f)
1082   {
1083     pyramid_gradient_multiply(pp, contrastFactor);        // Contrast mapping
1084   }
1085   else
1086   {
1087     contrast_equalization(pp, -contrastFactor);           // Contrast equalization
1088   }
1089 
1090   pyramid_transform_to_G(pp, detailfactor);               // transform R to gradients
1091   transform_to_luminance(pp, Y, ph, itmax, tol);     // transform gradients to luminance Y
1092   pyramid_free(pp);
1093 
1094   /* Renormalize luminance */
1095   float* temp = matrix_alloc(n);
1096 
1097   matrix_copy(n, Y, temp);                                // copy Y to temp
1098   qsort(temp, n, sizeof(float), sort_float);              // sort temp in ascending order
1099 
1100   // const float median = (temp[(int)((n-1)/2)] + temp[(int)((n-1)/2+1)]) * 0.5f; // calculate median
1101   const float CUT_MARGIN = 0.1f;
1102 
1103   float trim = (n-1) * CUT_MARGIN * 0.01f;
1104   float delta = trim - floorf(trim);
1105   const float l_min = temp[(int)floorf(trim)] * delta + temp[(int)ceilf(trim)] * (1.0f-delta);
1106 
1107   trim = (n-1) * (100.0f - CUT_MARGIN) * 0.01f;
1108   delta = trim - floorf(trim);
1109   const float l_max = temp[(int)floorf(trim)] * delta + temp[(int)ceilf(trim)] * (1.0f-delta);
1110 
1111   matrix_free(temp);
1112 
1113   const float disp_dyn_range = 2.3f;
1114 
1115   for(int j=0; j<n; j++)
1116   {
1117     Y[j] = (Y[j] - l_min) / (l_max - l_min) * disp_dyn_range - disp_dyn_range; // x scaled
1118   }
1119 
1120   /* Transform to linear scale RGB */
1121   for(int j=0;j<n;j++)
1122   {
1123     Y[j] = powf(10, Y[j]);
1124     R[j] = powf( R[j], saturationFactor) * Y[j];
1125     G[j] = powf( G[j], saturationFactor) * Y[j];
1126     B[j] = powf( B[j], saturationFactor) * Y[j];
1127   }
1128 
1129   return PFSTMO_OK;
1130 }
1131 
1132 #ifdef DEBUG_MANTIUK06
1133 
1134 #define PFSEOL "\x0a"
dump_matrix_to_file(const int width,const int height,const float * const m,const char * const file_name)1135 void dump_matrix_to_file(const int width, const int height, const float* const m, const char * const file_name)
1136 {
1137   FILE *fh = fopen( file_name, "wb" );
1138   // assert( fh != NULL );
1139 
1140   fprintf( fh, "PFS1" PFSEOL "%d %d" PFSEOL "1" PFSEOL "0" PFSEOL
1141           "%s" PFSEOL "0" PFSEOL "ENDH", width, height, "Y");
1142 
1143   for( int y = 0; y < height; y++ )
1144     for( int x = 0; x < width; x++ ) {
1145       int idx = x + y*width;
1146       fwrite( &(m[idx]), sizeof( float ), 1, fh );
1147     }
1148 
1149   fclose( fh );
1150 }
1151 
1152 // display matrix in the console (debugging)
matrix_show(const char * const text,int cols,int rows,const float * const data)1153 void matrix_show(const char* const text, int cols, int rows, const float* const data)
1154 {
1155   const int _cols = cols;
1156 
1157   if(rows > 8)
1158     rows = 8;
1159   if(cols > 8)
1160     cols = 8;
1161 
1162   printf("\n%s\n", text);
1163   for(int ky=0; ky<rows; ky++)
1164   {
1165     for(int kx=0; kx<cols; kx++)
1166     {
1167       printf("%.06f  ", data[kx + ky*_cols]);
1168     }
1169     printf("\n");
1170   }
1171 }
1172 
1173 // display pyramid in the console (debugging)
pyramid_show(pyramid_t * pyramid)1174 void pyramid_show(pyramid_t* pyramid)
1175 {
1176   char ss[30];
1177 
1178   while (pyramid->next != NULL)
1179   {
1180     pyramid = pyramid->next;
1181   }
1182 
1183   while (pyramid != NULL)
1184   {
1185     printf("\n----- pyramid_t level %d,%d\n", pyramid->cols, pyramid->rows);
1186 
1187     sprintf(ss, "Gx %p ", pyramid->Gx);
1188     if (pyramid->Gx != NULL)
1189     {
1190       matrix_show(ss,pyramid->cols, pyramid->rows, pyramid->Gx);
1191     }
1192     sprintf(ss, "Gy %p ", pyramid->Gy);
1193     if (pyramid->Gy != NULL)
1194     {
1195       matrix_show(ss,pyramid->cols, pyramid->rows, pyramid->Gy);
1196     }
1197 
1198     pyramid = pyramid->prev;
1199   }
1200 }
1201 
1202 #endif
1203 
1204 } // namespace test_mantiuk06
1205