1 /**
2  * @brief Tone-mapping optimized for backward-compatible HDR image and video
3  * compression
4  *
5  * From:
6  * Mai, Z., Mansour, H., Mantiuk, R., Nasiopoulos, P., Ward, R., & Heidrich, W.
7  * Optimizing a tone curve for backward-compatible high dynamic range image and
8  * video compression.
9  * IEEE Transactions on Image Processing, 20(6), 1558 – 1571.
10  * doi:10.1109/TIP.2010.2095866, 2011
11  *
12  * This file is a part of LuminanceHDR package, based on pfstmo.
13  * ----------------------------------------------------------------------
14  *  This program is free software; you can redistribute it and/or modify
15  *  it under the terms of the GNU General Public License as published by
16  *  the Free Software Foundation; either version 2 of the License, or
17  *  (at your option) any later version.
18  *
19  *  This program is distributed in the hope that it will be useful,
20  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
21  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22  *  GNU General Public License for more details.
23  *
24  *  You should have received a copy of the GNU General Public License
25  *  along with this program; if not, write to the Free Software
26  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
27  * ----------------------------------------------------------------------
28  *
29  * @author Rafal Mantiuk, <mantiuk@gmail.com>
30  *
31  * $Id: pfstmo_mantiuk08.cpp,v 1.19 2013/12/28 14:00:54 rafm Exp $
32  */
33 
34 #ifdef _OPENMP
35 #include <omp.h>
36 #endif
37 #include "opthelper.h"
38 #include "sleef.c"
39 
40 #include <math.h>
41 #include <string.h>
42 #include <algorithm>
43 #include <iostream>
44 
45 #include "Libpfs/utils/msec_timer.h"
46 #include "compression_tmo.h"
47 
48 #ifdef BRANCH_PREDICTION
49 #define likely(x) __builtin_expect((x), 1)
50 #define unlikely(x) __builtin_expect((x), 0)
51 #else
52 #define likely(x) (x)
53 #define unlikely(x) (x)
54 #endif
55 
56 /**
57  * Lookup table on a uniform array & interpolation
58  *
59  * x_i must be at least two elements
60  * y_i must be initialized after creating an object
61  */
62 namespace mai {
63 class UniformArrayLUT {
64     double start_v;
65     size_t lut_size;
66     double delta;
67 
68     bool own_y_i;
69 
70    public:
71     double *y_i;
72 
UniformArrayLUT(double from,double to,int lut_size,double * y_i=NULL)73     UniformArrayLUT(double from, double to, int lut_size, double *y_i = NULL)
74         : start_v(from),
75           lut_size(lut_size),
76           delta((to - from) / (double)lut_size) {
77         if (y_i == NULL) {
78             this->y_i = new double[lut_size];
79             own_y_i = true;
80         } else {
81             this->y_i = y_i;
82             own_y_i = false;
83         }
84     }
85 
UniformArrayLUT()86     UniformArrayLUT() : start_v(0.), lut_size(0), delta(0.), own_y_i(false), y_i(NULL) {}
87 
UniformArrayLUT(const UniformArrayLUT & other)88     UniformArrayLUT(const UniformArrayLUT &other)
89         : start_v(other.start_v), lut_size(other.lut_size), delta(other.delta) {
90         this->y_i = new double[lut_size];
91         own_y_i = true;
92         memcpy(this->y_i, other.y_i, lut_size * sizeof(double));
93     }
94 
operator =(const UniformArrayLUT & other)95     UniformArrayLUT &operator=(const UniformArrayLUT &other) {
96         if (&other != this) {
97             this->lut_size = other.lut_size;
98             this->delta = other.delta;
99             this->start_v = other.start_v;
100             this->y_i = new double[lut_size];
101             own_y_i = true;
102             memcpy(this->y_i, other.y_i, lut_size * sizeof(double));
103         }
104         return *this;
105     }
106 
~UniformArrayLUT()107     ~UniformArrayLUT() {
108         if (own_y_i) delete[] y_i;
109     }
110 
interp(double x)111     double interp(double x) {
112         const double ind_f = (x - start_v) / delta;
113         const size_t ind_low = (size_t)(ind_f);
114         const size_t ind_hi = (size_t)ceil(ind_f);
115 
116         if (unlikely(ind_f < 0))  // Out of range checks
117             return y_i[0];
118         if (unlikely(ind_hi >= lut_size)) return y_i[lut_size - 1];
119 
120         if (unlikely(ind_low == ind_hi))
121             return y_i[ind_low];  // No interpolation necessary
122 
123         return y_i[ind_low] +
124                (y_i[ind_hi] - y_i[ind_low]) *
125                    (ind_f - (double)ind_low);  // Interpolation
126     }
127 };
128 
129 class ImgHistogram {
130    public:
131     const float L_min, L_max;
132     const float delta;
133     int *bins;
134     double *p;
135     int bin_count;
136 
ImgHistogram()137     ImgHistogram() : L_min(-6.f), L_max(9.f), delta(0.1), bins(NULL), p(NULL) {
138         bin_count = (int)ceil((L_max - L_min) / delta);
139         bins = new int[bin_count];
140         p = new double[bin_count];
141     }
142 
~ImgHistogram()143     ~ImgHistogram() {
144         delete[] bins;
145         delete[] p;
146     }
147 
compute(const float * img,size_t pixel_count)148     void compute(const float *img, size_t pixel_count) {
149         std::fill(bins, bins + bin_count, 0);
150 
151         int pp_count = 0;
152 
153 #pragma omp parallel
154 {
155         int *binsThr = new int[bin_count];
156         std::fill(binsThr, binsThr + bin_count, 0);
157 
158         int pp_countThr = 0;
159         #pragma omp for nowait
160         for (size_t pp = 0; pp < pixel_count; pp++) {
161             int bin_index = (img[pp] - L_min) / delta;
162             // ignore anything outside the range
163             if (bin_index < 0 || bin_index >= bin_count) continue;
164             binsThr[bin_index]++;
165             pp_countThr++;
166         }
167         #pragma omp critical
168         {
169             pp_count += pp_countThr;
170             for (int bb = 0; bb < bin_count; bb++) {
171                 bins[bb] += binsThr[bb];
172             }
173             delete [] binsThr;
174         }
175 }
176         for (int bb = 0; bb < bin_count; bb++) {
177             p[bb] = (double)bins[bb] / (double)pp_count;
178         }
179     }
180 };
181 
safelog10f(float x)182 inline float safelog10f(float x) {
183     return xlogf(std::max(x, 1e-5f)) * 0.43429448190325182765112891891661f;
184 }
185 
186 #ifdef __SSE2__
safelog10f(vfloat xv,vfloat log10v,vfloat minv)187 inline vfloat safelog10f(vfloat xv, vfloat log10v, vfloat minv) {
188     return xlogf(vmaxf(xv, minv)) * log10v;
189 }
190 #endif
191 
tonemap(const float * R_in,const float * G_in,float * B_in,size_t width,size_t height,float * R_out,float * G_out,float * B_out,const float * L_in,pfs::Progress & ph)192 void CompressionTMO::tonemap(const float *R_in, const float *G_in, float *B_in,
193                              size_t width, size_t height, float *R_out, float *G_out,
194                              float *B_out, const float *L_in,
195                              pfs::Progress &ph) {
196 #ifdef TIMER_PROFILING
197     msec_timer stop_watch;
198     stop_watch.start();
199 #endif
200     const size_t pix_count = width * height;
201 
202     ph.setValue(2);
203     // Compute log of Luminance
204     float *logL = new float[pix_count];
205 
206 #ifdef __SSE2__
207     const vfloat log10v = F2V(0.43429448190325182765112891891661f);
208     const vfloat minv = F2V(1e-5f);
209     #pragma omp parallel for
210     for (size_t pp = 0; pp < pix_count - 3; pp += 4) {
211         STVFU(logL[pp], safelog10f(LVFU(L_in[pp]), log10v, minv));
212     }
213 
214     for (size_t pp = pix_count - (pix_count % 4); pp < pix_count; pp++) {
215         logL[pp] = safelog10f(L_in[pp]);
216     }
217 #else
218     #pragma omp parallel for
219     for (size_t pp = 0; pp < pix_count; pp++) {
220         logL[pp] = safelog10f(L_in[pp]);
221     }
222 
223 #endif
224     ImgHistogram H;
225     H.compute(logL, pix_count);
226 
227     // Instantiate LUT
228     UniformArrayLUT lut(H.L_min, H.L_max, H.bin_count);
229 
230     // Compute slopes
231     //    std::unique_ptr<double[]> s(new double[H.bin_count]);
232     double *s = new double[H.bin_count];
233     {
234         double d = 0;
235 
236         for (int bb = 0; bb < H.bin_count; bb++) {
237             d += cbrt(H.p[bb]);
238         }
239 
240         d *= H.delta;
241 
242         for (int bb = 0; bb < H.bin_count; bb++) {
243             s[bb] = cbrt(H.p[bb]) / d;
244         }
245     }
246     ph.setValue(33);
247 
248 #if 0
249     // TODO: Handling of degenerated cases, e.g. when an image contains uniform color
250     const double s_max = 2.; // Maximum slope, to avoid enhancing noise
251     double s_renorm = 1;
252     for( int bb = 0; bb < H.bin_count; bb++ ) {
253         if( s[bb] >= s_max ) {
254             s[bb] = s_max;
255             s_renorm -= s_max * H.delta;
256         }
257     }
258     for( int bb = 0; bb < H.bin_count; bb++ ) {
259         if( s[bb] < s_max ) {
260             s[bb] = s_max;
261             s_renorm -= s_max * H.delta;
262         }
263 
264     }
265 
266 #endif
267 
268     // Create a tone-curve
269     lut.y_i[0] = 0;
270     for (int bb = 1; bb < H.bin_count; bb++) {
271         lut.y_i[bb] = lut.y_i[bb - 1] + s[bb] * H.delta;
272     }
273     ph.setValue(66);
274 
275 // Apply the tone-curve
276 
277 #pragma omp parallel for
278     for (int pp = 0; pp < static_cast<int>(pix_count); pp++) {
279 #ifdef __SSE2__
280         vfloat rgbv = _mm_set_ps(R_in[pp], G_in[pp], B_in[pp], 0);
281         rgbv = safelog10f(rgbv, log10v, minv);
282         float temp[4];
283         STVFU(temp[0], rgbv);
284         R_out[pp] = lut.interp(temp[3]);
285         G_out[pp] = lut.interp(temp[2]);
286         B_out[pp] = lut.interp(temp[1]);
287 #else
288         R_out[pp] = lut.interp(safelog10f(R_in[pp]));
289         G_out[pp] = lut.interp(safelog10f(G_in[pp]));
290         B_out[pp] = lut.interp(safelog10f(B_in[pp]));
291 #endif
292     }
293     ph.setValue(99);
294     delete[] s;
295     delete[] logL;
296 
297 #ifdef TIMER_PROFILING
298     stop_watch.stop_and_update();
299     std::cout << std::endl;
300     std::cout << "tmo_mai11 = " << stop_watch.get_time() << " msec"
301               << std::endl;
302 #endif
303 }
304 }
305