1 /**
2 * @brief Tone-mapping optimized for backward-compatible HDR image and video
3 * compression
4 *
5 * From:
6 * Mai, Z., Mansour, H., Mantiuk, R., Nasiopoulos, P., Ward, R., & Heidrich, W.
7 * Optimizing a tone curve for backward-compatible high dynamic range image and
8 * video compression.
9 * IEEE Transactions on Image Processing, 20(6), 1558 – 1571.
10 * doi:10.1109/TIP.2010.2095866, 2011
11 *
12 * This file is a part of LuminanceHDR package, based on pfstmo.
13 * ----------------------------------------------------------------------
14 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2 of the License, or
17 * (at your option) any later version.
18 *
19 * This program is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License
25 * along with this program; if not, write to the Free Software
26 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 * ----------------------------------------------------------------------
28 *
29 * @author Rafal Mantiuk, <mantiuk@gmail.com>
30 *
31 * $Id: pfstmo_mantiuk08.cpp,v 1.19 2013/12/28 14:00:54 rafm Exp $
32 */
33
34 #ifdef _OPENMP
35 #include <omp.h>
36 #endif
37 #include "opthelper.h"
38 #include "sleef.c"
39
40 #include <math.h>
41 #include <string.h>
42 #include <algorithm>
43 #include <iostream>
44
45 #include "Libpfs/utils/msec_timer.h"
46 #include "compression_tmo.h"
47
48 #ifdef BRANCH_PREDICTION
49 #define likely(x) __builtin_expect((x), 1)
50 #define unlikely(x) __builtin_expect((x), 0)
51 #else
52 #define likely(x) (x)
53 #define unlikely(x) (x)
54 #endif
55
56 /**
57 * Lookup table on a uniform array & interpolation
58 *
59 * x_i must be at least two elements
60 * y_i must be initialized after creating an object
61 */
62 namespace mai {
63 class UniformArrayLUT {
64 double start_v;
65 size_t lut_size;
66 double delta;
67
68 bool own_y_i;
69
70 public:
71 double *y_i;
72
UniformArrayLUT(double from,double to,int lut_size,double * y_i=NULL)73 UniformArrayLUT(double from, double to, int lut_size, double *y_i = NULL)
74 : start_v(from),
75 lut_size(lut_size),
76 delta((to - from) / (double)lut_size) {
77 if (y_i == NULL) {
78 this->y_i = new double[lut_size];
79 own_y_i = true;
80 } else {
81 this->y_i = y_i;
82 own_y_i = false;
83 }
84 }
85
UniformArrayLUT()86 UniformArrayLUT() : start_v(0.), lut_size(0), delta(0.), own_y_i(false), y_i(NULL) {}
87
UniformArrayLUT(const UniformArrayLUT & other)88 UniformArrayLUT(const UniformArrayLUT &other)
89 : start_v(other.start_v), lut_size(other.lut_size), delta(other.delta) {
90 this->y_i = new double[lut_size];
91 own_y_i = true;
92 memcpy(this->y_i, other.y_i, lut_size * sizeof(double));
93 }
94
operator =(const UniformArrayLUT & other)95 UniformArrayLUT &operator=(const UniformArrayLUT &other) {
96 if (&other != this) {
97 this->lut_size = other.lut_size;
98 this->delta = other.delta;
99 this->start_v = other.start_v;
100 this->y_i = new double[lut_size];
101 own_y_i = true;
102 memcpy(this->y_i, other.y_i, lut_size * sizeof(double));
103 }
104 return *this;
105 }
106
~UniformArrayLUT()107 ~UniformArrayLUT() {
108 if (own_y_i) delete[] y_i;
109 }
110
interp(double x)111 double interp(double x) {
112 const double ind_f = (x - start_v) / delta;
113 const size_t ind_low = (size_t)(ind_f);
114 const size_t ind_hi = (size_t)ceil(ind_f);
115
116 if (unlikely(ind_f < 0)) // Out of range checks
117 return y_i[0];
118 if (unlikely(ind_hi >= lut_size)) return y_i[lut_size - 1];
119
120 if (unlikely(ind_low == ind_hi))
121 return y_i[ind_low]; // No interpolation necessary
122
123 return y_i[ind_low] +
124 (y_i[ind_hi] - y_i[ind_low]) *
125 (ind_f - (double)ind_low); // Interpolation
126 }
127 };
128
129 class ImgHistogram {
130 public:
131 const float L_min, L_max;
132 const float delta;
133 int *bins;
134 double *p;
135 int bin_count;
136
ImgHistogram()137 ImgHistogram() : L_min(-6.f), L_max(9.f), delta(0.1), bins(NULL), p(NULL) {
138 bin_count = (int)ceil((L_max - L_min) / delta);
139 bins = new int[bin_count];
140 p = new double[bin_count];
141 }
142
~ImgHistogram()143 ~ImgHistogram() {
144 delete[] bins;
145 delete[] p;
146 }
147
compute(const float * img,size_t pixel_count)148 void compute(const float *img, size_t pixel_count) {
149 std::fill(bins, bins + bin_count, 0);
150
151 int pp_count = 0;
152
153 #pragma omp parallel
154 {
155 int *binsThr = new int[bin_count];
156 std::fill(binsThr, binsThr + bin_count, 0);
157
158 int pp_countThr = 0;
159 #pragma omp for nowait
160 for (size_t pp = 0; pp < pixel_count; pp++) {
161 int bin_index = (img[pp] - L_min) / delta;
162 // ignore anything outside the range
163 if (bin_index < 0 || bin_index >= bin_count) continue;
164 binsThr[bin_index]++;
165 pp_countThr++;
166 }
167 #pragma omp critical
168 {
169 pp_count += pp_countThr;
170 for (int bb = 0; bb < bin_count; bb++) {
171 bins[bb] += binsThr[bb];
172 }
173 delete [] binsThr;
174 }
175 }
176 for (int bb = 0; bb < bin_count; bb++) {
177 p[bb] = (double)bins[bb] / (double)pp_count;
178 }
179 }
180 };
181
safelog10f(float x)182 inline float safelog10f(float x) {
183 return xlogf(std::max(x, 1e-5f)) * 0.43429448190325182765112891891661f;
184 }
185
186 #ifdef __SSE2__
safelog10f(vfloat xv,vfloat log10v,vfloat minv)187 inline vfloat safelog10f(vfloat xv, vfloat log10v, vfloat minv) {
188 return xlogf(vmaxf(xv, minv)) * log10v;
189 }
190 #endif
191
tonemap(const float * R_in,const float * G_in,float * B_in,size_t width,size_t height,float * R_out,float * G_out,float * B_out,const float * L_in,pfs::Progress & ph)192 void CompressionTMO::tonemap(const float *R_in, const float *G_in, float *B_in,
193 size_t width, size_t height, float *R_out, float *G_out,
194 float *B_out, const float *L_in,
195 pfs::Progress &ph) {
196 #ifdef TIMER_PROFILING
197 msec_timer stop_watch;
198 stop_watch.start();
199 #endif
200 const size_t pix_count = width * height;
201
202 ph.setValue(2);
203 // Compute log of Luminance
204 float *logL = new float[pix_count];
205
206 #ifdef __SSE2__
207 const vfloat log10v = F2V(0.43429448190325182765112891891661f);
208 const vfloat minv = F2V(1e-5f);
209 #pragma omp parallel for
210 for (size_t pp = 0; pp < pix_count - 3; pp += 4) {
211 STVFU(logL[pp], safelog10f(LVFU(L_in[pp]), log10v, minv));
212 }
213
214 for (size_t pp = pix_count - (pix_count % 4); pp < pix_count; pp++) {
215 logL[pp] = safelog10f(L_in[pp]);
216 }
217 #else
218 #pragma omp parallel for
219 for (size_t pp = 0; pp < pix_count; pp++) {
220 logL[pp] = safelog10f(L_in[pp]);
221 }
222
223 #endif
224 ImgHistogram H;
225 H.compute(logL, pix_count);
226
227 // Instantiate LUT
228 UniformArrayLUT lut(H.L_min, H.L_max, H.bin_count);
229
230 // Compute slopes
231 // std::unique_ptr<double[]> s(new double[H.bin_count]);
232 double *s = new double[H.bin_count];
233 {
234 double d = 0;
235
236 for (int bb = 0; bb < H.bin_count; bb++) {
237 d += cbrt(H.p[bb]);
238 }
239
240 d *= H.delta;
241
242 for (int bb = 0; bb < H.bin_count; bb++) {
243 s[bb] = cbrt(H.p[bb]) / d;
244 }
245 }
246 ph.setValue(33);
247
248 #if 0
249 // TODO: Handling of degenerated cases, e.g. when an image contains uniform color
250 const double s_max = 2.; // Maximum slope, to avoid enhancing noise
251 double s_renorm = 1;
252 for( int bb = 0; bb < H.bin_count; bb++ ) {
253 if( s[bb] >= s_max ) {
254 s[bb] = s_max;
255 s_renorm -= s_max * H.delta;
256 }
257 }
258 for( int bb = 0; bb < H.bin_count; bb++ ) {
259 if( s[bb] < s_max ) {
260 s[bb] = s_max;
261 s_renorm -= s_max * H.delta;
262 }
263
264 }
265
266 #endif
267
268 // Create a tone-curve
269 lut.y_i[0] = 0;
270 for (int bb = 1; bb < H.bin_count; bb++) {
271 lut.y_i[bb] = lut.y_i[bb - 1] + s[bb] * H.delta;
272 }
273 ph.setValue(66);
274
275 // Apply the tone-curve
276
277 #pragma omp parallel for
278 for (int pp = 0; pp < static_cast<int>(pix_count); pp++) {
279 #ifdef __SSE2__
280 vfloat rgbv = _mm_set_ps(R_in[pp], G_in[pp], B_in[pp], 0);
281 rgbv = safelog10f(rgbv, log10v, minv);
282 float temp[4];
283 STVFU(temp[0], rgbv);
284 R_out[pp] = lut.interp(temp[3]);
285 G_out[pp] = lut.interp(temp[2]);
286 B_out[pp] = lut.interp(temp[1]);
287 #else
288 R_out[pp] = lut.interp(safelog10f(R_in[pp]));
289 G_out[pp] = lut.interp(safelog10f(G_in[pp]));
290 B_out[pp] = lut.interp(safelog10f(B_in[pp]));
291 #endif
292 }
293 ph.setValue(99);
294 delete[] s;
295 delete[] logL;
296
297 #ifdef TIMER_PROFILING
298 stop_watch.stop_and_update();
299 std::cout << std::endl;
300 std::cout << "tmo_mai11 = " << stop_watch.get_time() << " msec"
301 << std::endl;
302 #endif
303 }
304 }
305