1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // * Redistribution's of source code must retain the above copyright notice,
18 // this list of conditions and the following disclaimer.
19 //
20 // * Redistribution's in binary form must reproduce the above copyright notice,
21 // this list of conditions and the following disclaimer in the documentation
22 // and/or other materials provided with the distribution.
23 //
24 // * The name of Intel Corporation may not be used to endorse or promote products
25 // derived from this software without specific prior written permission.
26 //
27 // This software is provided by the copyright holders and contributors "as is" and
28 // any express or implied warranties, including, but not limited to, the implied
29 // warranties of merchantability and fitness for a particular purpose are disclaimed.
30 // In no event shall the Intel Corporation or contributors be liable for any direct,
31 // indirect, incidental, special, exemplary, or consequential damages
32 // (including, but not limited to, procurement of substitute goods or services;
33 // loss of use, data, or profits; or business interruption) however caused
34 // and on any theory of liability, whether in contract, strict liability,
35 // or tort (including negligence or otherwise) arising in any way out of
36 // the use of this software, even if advised of the possibility of such damage.
37 //
38 //M*/
39
40 #include "opencv2/core.hpp"
41 #include "opencv2/core/hal/intrin.hpp"
42 #include "opencv2/xphoto.hpp"
43
44 namespace cv
45 {
46 namespace xphoto
47 {
48
49 void calculateChannelSums(uint &sumB, uint &sumG, uint &sumR, uchar *src_data, int src_len, float thresh);
50 void calculateChannelSums(uint64 &sumB, uint64 &sumG, uint64 &sumR, ushort *src_data, int src_len, float thresh);
51
52 class GrayworldWBImpl CV_FINAL : public GrayworldWB
53 {
54 private:
55 float thresh;
56
57 public:
GrayworldWBImpl()58 GrayworldWBImpl() { thresh = 0.9f; }
getSaturationThreshold() const59 float getSaturationThreshold() const CV_OVERRIDE { return thresh; }
setSaturationThreshold(float val)60 void setSaturationThreshold(float val) CV_OVERRIDE { thresh = val; }
balanceWhite(InputArray _src,OutputArray _dst)61 void balanceWhite(InputArray _src, OutputArray _dst) CV_OVERRIDE
62 {
63 CV_Assert(!_src.empty());
64 CV_Assert(_src.isContinuous());
65 CV_Assert(_src.type() == CV_8UC3 || _src.type() == CV_16UC3);
66 Mat src = _src.getMat();
67
68 int N = src.cols * src.rows, N3 = N * 3;
69
70 double dsumB = 0.0, dsumG = 0.0, dsumR = 0.0;
71 if (src.type() == CV_8UC3)
72 {
73 uint sumB = 0, sumG = 0, sumR = 0;
74 calculateChannelSums(sumB, sumG, sumR, src.ptr<uchar>(), N3, thresh);
75 dsumB = (double)sumB;
76 dsumG = (double)sumG;
77 dsumR = (double)sumR;
78 }
79 else if (src.type() == CV_16UC3)
80 {
81 uint64 sumB = 0, sumG = 0, sumR = 0;
82 calculateChannelSums(sumB, sumG, sumR, src.ptr<ushort>(), N3, thresh);
83 dsumB = (double)sumB;
84 dsumG = (double)sumG;
85 dsumR = (double)sumR;
86 }
87
88 // Find inverse of averages
89 double max_sum = max(dsumB, max(dsumR, dsumG));
90 const double eps = 0.1;
91 float dinvB = dsumB < eps ? 0.f : (float)(max_sum / dsumB),
92 dinvG = dsumG < eps ? 0.f : (float)(max_sum / dsumG),
93 dinvR = dsumR < eps ? 0.f : (float)(max_sum / dsumR);
94
95 // Use the inverse of averages as channel gains:
96 applyChannelGains(src, _dst, dinvB, dinvG, dinvR);
97 }
98 };
99
100 /* Computes sums for each channel, while ignoring saturated pixels which are determined by thresh
101 * (version for CV_8UC3)
102 */
calculateChannelSums(uint & sumB,uint & sumG,uint & sumR,uchar * src_data,int src_len,float thresh)103 void calculateChannelSums(uint &sumB, uint &sumG, uint &sumR, uchar *src_data, int src_len, float thresh)
104 {
105 sumB = sumG = sumR = 0;
106 ushort thresh255 = (ushort)cvRound(thresh * 255);
107 int i = 0;
108 #if CV_SIMD128
109 v_uint8x16 v_inB, v_inG, v_inR, v_min_val, v_max_val;
110 v_uint16x8 v_iB1, v_iB2, v_iG1, v_iG2, v_iR1, v_iR2;
111 v_uint16x8 v_min1, v_min2, v_max1, v_max2, v_m1, v_m2;
112 v_uint16x8 v_255 = v_setall_u16(255), v_thresh = v_setall_u16(thresh255);
113 v_uint32x4 v_uint1, v_uint2;
114 v_uint32x4 v_SB = v_setzero_u32(), v_SG = v_setzero_u32(), v_SR = v_setzero_u32();
115
116 for (; i < src_len - 47; i += 48)
117 {
118 // Load 3x uint8x16 and deinterleave into vectors of each channel
119 v_load_deinterleave(&src_data[i], v_inB, v_inG, v_inR);
120
121 // Get min and max
122 v_min_val = v_min(v_inB, v_min(v_inG, v_inR));
123 v_max_val = v_max(v_inB, v_max(v_inG, v_inR));
124
125 // Split into two ushort vectors per channel
126 v_expand(v_inB, v_iB1, v_iB2);
127 v_expand(v_inG, v_iG1, v_iG2);
128 v_expand(v_inR, v_iR1, v_iR2);
129 v_expand(v_min_val, v_min1, v_min2);
130 v_expand(v_max_val, v_max1, v_max2);
131
132 // Calculate masks
133 v_m1 = ~(v_mul_wrap(v_max1 - v_min1, v_255) > v_mul_wrap(v_thresh, v_max1));
134 v_m2 = ~(v_mul_wrap(v_max2 - v_min2, v_255) > v_mul_wrap(v_thresh, v_max2));
135
136 // Apply masks
137 v_iB1 = (v_iB1 & v_m1) + (v_iB2 & v_m2);
138 v_iG1 = (v_iG1 & v_m1) + (v_iG2 & v_m2);
139 v_iR1 = (v_iR1 & v_m1) + (v_iR2 & v_m2);
140
141 // Split and add to the sums:
142 v_expand(v_iB1, v_uint1, v_uint2);
143 v_SB += v_uint1 + v_uint2;
144 v_expand(v_iG1, v_uint1, v_uint2);
145 v_SG += v_uint1 + v_uint2;
146 v_expand(v_iR1, v_uint1, v_uint2);
147 v_SR += v_uint1 + v_uint2;
148 }
149
150 sumB = v_reduce_sum(v_SB);
151 sumG = v_reduce_sum(v_SG);
152 sumR = v_reduce_sum(v_SR);
153 #endif
154 unsigned int minRGB, maxRGB;
155 for (; i < src_len; i += 3)
156 {
157 minRGB = min(src_data[i], min(src_data[i + 1], src_data[i + 2]));
158 maxRGB = max(src_data[i], max(src_data[i + 1], src_data[i + 2]));
159 if ((maxRGB - minRGB) * 255 > thresh255 * maxRGB)
160 continue;
161 sumB += src_data[i];
162 sumG += src_data[i + 1];
163 sumR += src_data[i + 2];
164 }
165 }
166
167 /* Computes sums for each channel, while ignoring saturated pixels which are determined by thresh
168 * (version for CV_16UC3)
169 */
calculateChannelSums(uint64 & sumB,uint64 & sumG,uint64 & sumR,ushort * src_data,int src_len,float thresh)170 void calculateChannelSums(uint64 &sumB, uint64 &sumG, uint64 &sumR, ushort *src_data, int src_len, float thresh)
171 {
172 sumB = sumG = sumR = 0;
173 uint thresh65535 = cvRound(thresh * 65535);
174 int i = 0;
175 #if CV_SIMD128
176 v_uint16x8 v_inB, v_inG, v_inR, v_min_val, v_max_val;
177 v_uint32x4 v_iB1, v_iB2, v_iG1, v_iG2, v_iR1, v_iR2;
178 v_uint32x4 v_min1, v_min2, v_max1, v_max2, v_m1, v_m2;
179 v_uint32x4 v_65535 = v_setall_u32(65535), v_thresh = v_setall_u32(thresh65535);
180 v_uint64x2 v_u64_1, v_u64_2;
181 v_uint64x2 v_SB = v_setzero_u64(), v_SG = v_setzero_u64(), v_SR = v_setzero_u64();
182
183 for (; i < src_len - 23; i += 24)
184 {
185 // Load 3x uint16x8 and deinterleave into vectors of each channel
186 v_load_deinterleave(&src_data[i], v_inB, v_inG, v_inR);
187
188 // Get min and max
189 v_min_val = v_min(v_inB, v_min(v_inG, v_inR));
190 v_max_val = v_max(v_inB, v_max(v_inG, v_inR));
191
192 // Split into two uint vectors per channel
193 v_expand(v_inB, v_iB1, v_iB2);
194 v_expand(v_inG, v_iG1, v_iG2);
195 v_expand(v_inR, v_iR1, v_iR2);
196 v_expand(v_min_val, v_min1, v_min2);
197 v_expand(v_max_val, v_max1, v_max2);
198
199 // Calculate masks
200 v_m1 = ~((v_max1 - v_min1) * v_65535 > v_thresh * v_max1);
201 v_m2 = ~((v_max2 - v_min2) * v_65535 > v_thresh * v_max2);
202
203 // Apply masks
204 v_iB1 = (v_iB1 & v_m1) + (v_iB2 & v_m2);
205 v_iG1 = (v_iG1 & v_m1) + (v_iG2 & v_m2);
206 v_iR1 = (v_iR1 & v_m1) + (v_iR2 & v_m2);
207
208 // Split and add to the sums:
209 v_expand(v_iB1, v_u64_1, v_u64_2);
210 v_SB += v_u64_1 + v_u64_2;
211 v_expand(v_iG1, v_u64_1, v_u64_2);
212 v_SG += v_u64_1 + v_u64_2;
213 v_expand(v_iR1, v_u64_1, v_u64_2);
214 v_SR += v_u64_1 + v_u64_2;
215 }
216
217 // Perform final reduction
218 uint64 sum_arr[2];
219 v_store(sum_arr, v_SB);
220 sumB = sum_arr[0] + sum_arr[1];
221 v_store(sum_arr, v_SG);
222 sumG = sum_arr[0] + sum_arr[1];
223 v_store(sum_arr, v_SR);
224 sumR = sum_arr[0] + sum_arr[1];
225 #endif
226 unsigned int minRGB, maxRGB;
227 for (; i < src_len; i += 3)
228 {
229 minRGB = min(src_data[i], min(src_data[i + 1], src_data[i + 2]));
230 maxRGB = max(src_data[i], max(src_data[i + 1], src_data[i + 2]));
231 if ((maxRGB - minRGB) * 65535 > thresh65535 * maxRGB)
232 continue;
233 sumB += src_data[i];
234 sumG += src_data[i + 1];
235 sumR += src_data[i + 2];
236 }
237 }
238
applyChannelGains(InputArray _src,OutputArray _dst,float gainB,float gainG,float gainR)239 void applyChannelGains(InputArray _src, OutputArray _dst, float gainB, float gainG, float gainR)
240 {
241 Mat src = _src.getMat();
242 CV_Assert(!src.empty());
243 CV_Assert(src.isContinuous());
244 CV_Assert(src.type() == CV_8UC3 || src.type() == CV_16UC3);
245
246 _dst.create(src.size(), src.type());
247 Mat dst = _dst.getMat();
248 int N3 = 3 * src.cols * src.rows;
249 int i = 0;
250
251 // Scale gains by their maximum (fixed point approximation works only when all gains are <=1)
252 float gain_max = max(gainB, max(gainG, gainR));
253 if (gain_max > 0)
254 {
255 gainB /= gain_max;
256 gainG /= gain_max;
257 gainR /= gain_max;
258 }
259
260 if (src.type() == CV_8UC3)
261 {
262 // Fixed point arithmetic, mul by 2^8 then shift back 8 bits
263 int i_gainB = cvRound(gainB * (1 << 8)), i_gainG = cvRound(gainG * (1 << 8)),
264 i_gainR = cvRound(gainR * (1 << 8));
265 const uchar *src_data = src.ptr<uchar>();
266 uchar *dst_data = dst.ptr<uchar>();
267 #if CV_SIMD128
268 v_uint8x16 v_inB, v_inG, v_inR;
269 v_uint8x16 v_outB, v_outG, v_outR;
270 v_uint16x8 v_sB1, v_sB2, v_sG1, v_sG2, v_sR1, v_sR2;
271 v_uint16x8 v_gainB = v_setall_u16((ushort)i_gainB), v_gainG = v_setall_u16((ushort)i_gainG),
272 v_gainR = v_setall_u16((ushort)i_gainR);
273
274 for (; i < N3 - 47; i += 48)
275 {
276 // Load 3x uint8x16 and deinterleave into vectors of each channel
277 v_load_deinterleave(&src_data[i], v_inB, v_inG, v_inR);
278
279 // Split into two ushort vectors per channel
280 v_expand(v_inB, v_sB1, v_sB2);
281 v_expand(v_inG, v_sG1, v_sG2);
282 v_expand(v_inR, v_sR1, v_sR2);
283
284 // Multiply by gains
285 v_sB1 = v_mul_wrap(v_sB1, v_gainB) >> 8;
286 v_sB2 = v_mul_wrap(v_sB2, v_gainB) >> 8;
287 v_sG1 = v_mul_wrap(v_sG1, v_gainG) >> 8;
288 v_sG2 = v_mul_wrap(v_sG2, v_gainG) >> 8;
289 v_sR1 = v_mul_wrap(v_sR1, v_gainR) >> 8;
290 v_sR2 = v_mul_wrap(v_sR2, v_gainR) >> 8;
291
292 // Pack into vectors of v_uint8x16
293 v_store_interleave(&dst_data[i], v_pack(v_sB1, v_sB2), v_pack(v_sG1, v_sG2), v_pack(v_sR1, v_sR2));
294 }
295 #endif
296 for (; i < N3; i += 3)
297 {
298 dst_data[i] = (uchar)((src_data[i] * i_gainB) >> 8);
299 dst_data[i + 1] = (uchar)((src_data[i + 1] * i_gainG) >> 8);
300 dst_data[i + 2] = (uchar)((src_data[i + 2] * i_gainR) >> 8);
301 }
302 }
303 else if (src.type() == CV_16UC3)
304 {
305 // Fixed point arithmetic, mul by 2^16 then shift back 16 bits
306 int i_gainB = cvRound(gainB * (1 << 16)), i_gainG = cvRound(gainG * (1 << 16)),
307 i_gainR = cvRound(gainR * (1 << 16));
308 const ushort *src_data = src.ptr<ushort>();
309 ushort *dst_data = dst.ptr<ushort>();
310 #if CV_SIMD128
311 v_uint16x8 v_inB, v_inG, v_inR;
312 v_uint16x8 v_outB, v_outG, v_outR;
313 v_uint32x4 v_sB1, v_sB2, v_sG1, v_sG2, v_sR1, v_sR2;
314 v_uint32x4 v_gainB = v_setall_u32((uint)i_gainB), v_gainG = v_setall_u32((uint)i_gainG),
315 v_gainR = v_setall_u32((uint)i_gainR);
316
317 for (; i < N3 - 23; i += 24)
318 {
319 // Load 3x uint16x8 and deinterleave into vectors of each channel
320 v_load_deinterleave(&src_data[i], v_inB, v_inG, v_inR);
321
322 // Split into two uint vectors per channel
323 v_expand(v_inB, v_sB1, v_sB2);
324 v_expand(v_inG, v_sG1, v_sG2);
325 v_expand(v_inR, v_sR1, v_sR2);
326
327 // Multiply by scaling factors
328 v_sB1 = (v_sB1 * v_gainB) >> 16;
329 v_sB2 = (v_sB2 * v_gainB) >> 16;
330 v_sG1 = (v_sG1 * v_gainG) >> 16;
331 v_sG2 = (v_sG2 * v_gainG) >> 16;
332 v_sR1 = (v_sR1 * v_gainR) >> 16;
333 v_sR2 = (v_sR2 * v_gainR) >> 16;
334
335 // Pack into vectors of v_uint16x8
336 v_store_interleave(&dst_data[i], v_pack(v_sB1, v_sB2), v_pack(v_sG1, v_sG2), v_pack(v_sR1, v_sR2));
337 }
338 #endif
339 for (; i < N3; i += 3)
340 {
341 dst_data[i] = (ushort)((src_data[i] * i_gainB) >> 16);
342 dst_data[i + 1] = (ushort)((src_data[i + 1] * i_gainG) >> 16);
343 dst_data[i + 2] = (ushort)((src_data[i + 2] * i_gainR) >> 16);
344 }
345 }
346 }
347
createGrayworldWB()348 Ptr<GrayworldWB> createGrayworldWB() { return makePtr<GrayworldWBImpl>(); }
349 }
350 }
351