1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 //   * Redistribution's of source code must retain the above copyright notice,
18 //     this list of conditions and the following disclaimer.
19 //
20 //   * Redistribution's in binary form must reproduce the above copyright notice,
21 //     this list of conditions and the following disclaimer in the documentation
22 //     and/or other materials provided with the distribution.
23 //
24 //   * The name of Intel Corporation may not be used to endorse or promote products
25 //     derived from this software without specific prior written permission.
26 //
27 // This software is provided by the copyright holders and contributors "as is" and
28 // any express or implied warranties, including, but not limited to, the implied
29 // warranties of merchantability and fitness for a particular purpose are disclaimed.
30 // In no event shall the Intel Corporation or contributors be liable for any direct,
31 // indirect, incidental, special, exemplary, or consequential damages
32 // (including, but not limited to, procurement of substitute goods or services;
33 // loss of use, data, or profits; or business interruption) however caused
34 // and on any theory of liability, whether in contract, strict liability,
35 // or tort (including negligence or otherwise) arising in any way out of
36 // the use of this software, even if advised of the possibility of such damage.
37 //
38 //M*/
39 
40 #include "opencv2/core.hpp"
41 #include "opencv2/core/hal/intrin.hpp"
42 #include "opencv2/xphoto.hpp"
43 
44 namespace cv
45 {
46 namespace xphoto
47 {
48 
49 void calculateChannelSums(uint &sumB, uint &sumG, uint &sumR, uchar *src_data, int src_len, float thresh);
50 void calculateChannelSums(uint64 &sumB, uint64 &sumG, uint64 &sumR, ushort *src_data, int src_len, float thresh);
51 
52 class GrayworldWBImpl CV_FINAL : public GrayworldWB
53 {
54   private:
55     float thresh;
56 
57   public:
GrayworldWBImpl()58     GrayworldWBImpl() { thresh = 0.9f; }
getSaturationThreshold() const59     float getSaturationThreshold() const CV_OVERRIDE { return thresh; }
setSaturationThreshold(float val)60     void setSaturationThreshold(float val) CV_OVERRIDE { thresh = val; }
balanceWhite(InputArray _src,OutputArray _dst)61     void balanceWhite(InputArray _src, OutputArray _dst) CV_OVERRIDE
62     {
63         CV_Assert(!_src.empty());
64         CV_Assert(_src.isContinuous());
65         CV_Assert(_src.type() == CV_8UC3 || _src.type() == CV_16UC3);
66         Mat src = _src.getMat();
67 
68         int N = src.cols * src.rows, N3 = N * 3;
69 
70         double dsumB = 0.0, dsumG = 0.0, dsumR = 0.0;
71         if (src.type() == CV_8UC3)
72         {
73             uint sumB = 0, sumG = 0, sumR = 0;
74             calculateChannelSums(sumB, sumG, sumR, src.ptr<uchar>(), N3, thresh);
75             dsumB = (double)sumB;
76             dsumG = (double)sumG;
77             dsumR = (double)sumR;
78         }
79         else if (src.type() == CV_16UC3)
80         {
81             uint64 sumB = 0, sumG = 0, sumR = 0;
82             calculateChannelSums(sumB, sumG, sumR, src.ptr<ushort>(), N3, thresh);
83             dsumB = (double)sumB;
84             dsumG = (double)sumG;
85             dsumR = (double)sumR;
86         }
87 
88         // Find inverse of averages
89         double max_sum = max(dsumB, max(dsumR, dsumG));
90         const double eps = 0.1;
91         float dinvB = dsumB < eps ? 0.f : (float)(max_sum / dsumB),
92               dinvG = dsumG < eps ? 0.f : (float)(max_sum / dsumG),
93               dinvR = dsumR < eps ? 0.f : (float)(max_sum / dsumR);
94 
95         // Use the inverse of averages as channel gains:
96         applyChannelGains(src, _dst, dinvB, dinvG, dinvR);
97     }
98 };
99 
100 /* Computes sums for each channel, while ignoring saturated pixels which are determined by thresh
101  * (version for CV_8UC3)
102  */
calculateChannelSums(uint & sumB,uint & sumG,uint & sumR,uchar * src_data,int src_len,float thresh)103 void calculateChannelSums(uint &sumB, uint &sumG, uint &sumR, uchar *src_data, int src_len, float thresh)
104 {
105     sumB = sumG = sumR = 0;
106     ushort thresh255 = (ushort)cvRound(thresh * 255);
107     int i = 0;
108 #if CV_SIMD128
109     v_uint8x16 v_inB, v_inG, v_inR, v_min_val, v_max_val;
110     v_uint16x8 v_iB1, v_iB2, v_iG1, v_iG2, v_iR1, v_iR2;
111     v_uint16x8 v_min1, v_min2, v_max1, v_max2, v_m1, v_m2;
112     v_uint16x8 v_255 = v_setall_u16(255), v_thresh = v_setall_u16(thresh255);
113     v_uint32x4 v_uint1, v_uint2;
114     v_uint32x4 v_SB = v_setzero_u32(), v_SG = v_setzero_u32(), v_SR = v_setzero_u32();
115 
116     for (; i < src_len - 47; i += 48)
117     {
118         // Load 3x uint8x16 and deinterleave into vectors of each channel
119         v_load_deinterleave(&src_data[i], v_inB, v_inG, v_inR);
120 
121         // Get min and max
122         v_min_val = v_min(v_inB, v_min(v_inG, v_inR));
123         v_max_val = v_max(v_inB, v_max(v_inG, v_inR));
124 
125         // Split into two ushort vectors per channel
126         v_expand(v_inB, v_iB1, v_iB2);
127         v_expand(v_inG, v_iG1, v_iG2);
128         v_expand(v_inR, v_iR1, v_iR2);
129         v_expand(v_min_val, v_min1, v_min2);
130         v_expand(v_max_val, v_max1, v_max2);
131 
132         // Calculate masks
133         v_m1 = ~(v_mul_wrap(v_max1 - v_min1, v_255) > v_mul_wrap(v_thresh, v_max1));
134         v_m2 = ~(v_mul_wrap(v_max2 - v_min2, v_255) > v_mul_wrap(v_thresh, v_max2));
135 
136         // Apply masks
137         v_iB1 = (v_iB1 & v_m1) + (v_iB2 & v_m2);
138         v_iG1 = (v_iG1 & v_m1) + (v_iG2 & v_m2);
139         v_iR1 = (v_iR1 & v_m1) + (v_iR2 & v_m2);
140 
141         // Split and add to the sums:
142         v_expand(v_iB1, v_uint1, v_uint2);
143         v_SB += v_uint1 + v_uint2;
144         v_expand(v_iG1, v_uint1, v_uint2);
145         v_SG += v_uint1 + v_uint2;
146         v_expand(v_iR1, v_uint1, v_uint2);
147         v_SR += v_uint1 + v_uint2;
148     }
149 
150     sumB = v_reduce_sum(v_SB);
151     sumG = v_reduce_sum(v_SG);
152     sumR = v_reduce_sum(v_SR);
153 #endif
154     unsigned int minRGB, maxRGB;
155     for (; i < src_len; i += 3)
156     {
157         minRGB = min(src_data[i], min(src_data[i + 1], src_data[i + 2]));
158         maxRGB = max(src_data[i], max(src_data[i + 1], src_data[i + 2]));
159         if ((maxRGB - minRGB) * 255 > thresh255 * maxRGB)
160             continue;
161         sumB += src_data[i];
162         sumG += src_data[i + 1];
163         sumR += src_data[i + 2];
164     }
165 }
166 
167 /* Computes sums for each channel, while ignoring saturated pixels which are determined by thresh
168  * (version for CV_16UC3)
169  */
calculateChannelSums(uint64 & sumB,uint64 & sumG,uint64 & sumR,ushort * src_data,int src_len,float thresh)170 void calculateChannelSums(uint64 &sumB, uint64 &sumG, uint64 &sumR, ushort *src_data, int src_len, float thresh)
171 {
172     sumB = sumG = sumR = 0;
173     uint thresh65535 = cvRound(thresh * 65535);
174     int i = 0;
175 #if CV_SIMD128
176     v_uint16x8 v_inB, v_inG, v_inR, v_min_val, v_max_val;
177     v_uint32x4 v_iB1, v_iB2, v_iG1, v_iG2, v_iR1, v_iR2;
178     v_uint32x4 v_min1, v_min2, v_max1, v_max2, v_m1, v_m2;
179     v_uint32x4 v_65535 = v_setall_u32(65535), v_thresh = v_setall_u32(thresh65535);
180     v_uint64x2 v_u64_1, v_u64_2;
181     v_uint64x2 v_SB = v_setzero_u64(), v_SG = v_setzero_u64(), v_SR = v_setzero_u64();
182 
183     for (; i < src_len - 23; i += 24)
184     {
185         // Load 3x uint16x8 and deinterleave into vectors of each channel
186         v_load_deinterleave(&src_data[i], v_inB, v_inG, v_inR);
187 
188         // Get min and max
189         v_min_val = v_min(v_inB, v_min(v_inG, v_inR));
190         v_max_val = v_max(v_inB, v_max(v_inG, v_inR));
191 
192         // Split into two uint vectors per channel
193         v_expand(v_inB, v_iB1, v_iB2);
194         v_expand(v_inG, v_iG1, v_iG2);
195         v_expand(v_inR, v_iR1, v_iR2);
196         v_expand(v_min_val, v_min1, v_min2);
197         v_expand(v_max_val, v_max1, v_max2);
198 
199         // Calculate masks
200         v_m1 = ~((v_max1 - v_min1) * v_65535 > v_thresh * v_max1);
201         v_m2 = ~((v_max2 - v_min2) * v_65535 > v_thresh * v_max2);
202 
203         // Apply masks
204         v_iB1 = (v_iB1 & v_m1) + (v_iB2 & v_m2);
205         v_iG1 = (v_iG1 & v_m1) + (v_iG2 & v_m2);
206         v_iR1 = (v_iR1 & v_m1) + (v_iR2 & v_m2);
207 
208         // Split and add to the sums:
209         v_expand(v_iB1, v_u64_1, v_u64_2);
210         v_SB += v_u64_1 + v_u64_2;
211         v_expand(v_iG1, v_u64_1, v_u64_2);
212         v_SG += v_u64_1 + v_u64_2;
213         v_expand(v_iR1, v_u64_1, v_u64_2);
214         v_SR += v_u64_1 + v_u64_2;
215     }
216 
217     // Perform final reduction
218     uint64 sum_arr[2];
219     v_store(sum_arr, v_SB);
220     sumB = sum_arr[0] + sum_arr[1];
221     v_store(sum_arr, v_SG);
222     sumG = sum_arr[0] + sum_arr[1];
223     v_store(sum_arr, v_SR);
224     sumR = sum_arr[0] + sum_arr[1];
225 #endif
226     unsigned int minRGB, maxRGB;
227     for (; i < src_len; i += 3)
228     {
229         minRGB = min(src_data[i], min(src_data[i + 1], src_data[i + 2]));
230         maxRGB = max(src_data[i], max(src_data[i + 1], src_data[i + 2]));
231         if ((maxRGB - minRGB) * 65535 > thresh65535 * maxRGB)
232             continue;
233         sumB += src_data[i];
234         sumG += src_data[i + 1];
235         sumR += src_data[i + 2];
236     }
237 }
238 
applyChannelGains(InputArray _src,OutputArray _dst,float gainB,float gainG,float gainR)239 void applyChannelGains(InputArray _src, OutputArray _dst, float gainB, float gainG, float gainR)
240 {
241     Mat src = _src.getMat();
242     CV_Assert(!src.empty());
243     CV_Assert(src.isContinuous());
244     CV_Assert(src.type() == CV_8UC3 || src.type() == CV_16UC3);
245 
246     _dst.create(src.size(), src.type());
247     Mat dst = _dst.getMat();
248     int N3 = 3 * src.cols * src.rows;
249     int i = 0;
250 
251     // Scale gains by their maximum (fixed point approximation works only when all gains are <=1)
252     float gain_max = max(gainB, max(gainG, gainR));
253     if (gain_max > 0)
254     {
255         gainB /= gain_max;
256         gainG /= gain_max;
257         gainR /= gain_max;
258     }
259 
260     if (src.type() == CV_8UC3)
261     {
262         // Fixed point arithmetic, mul by 2^8 then shift back 8 bits
263         int i_gainB = cvRound(gainB * (1 << 8)), i_gainG = cvRound(gainG * (1 << 8)),
264             i_gainR = cvRound(gainR * (1 << 8));
265         const uchar *src_data = src.ptr<uchar>();
266         uchar *dst_data = dst.ptr<uchar>();
267 #if CV_SIMD128
268         v_uint8x16 v_inB, v_inG, v_inR;
269         v_uint8x16 v_outB, v_outG, v_outR;
270         v_uint16x8 v_sB1, v_sB2, v_sG1, v_sG2, v_sR1, v_sR2;
271         v_uint16x8 v_gainB = v_setall_u16((ushort)i_gainB), v_gainG = v_setall_u16((ushort)i_gainG),
272                    v_gainR = v_setall_u16((ushort)i_gainR);
273 
274         for (; i < N3 - 47; i += 48)
275         {
276             // Load 3x uint8x16 and deinterleave into vectors of each channel
277             v_load_deinterleave(&src_data[i], v_inB, v_inG, v_inR);
278 
279             // Split into two ushort vectors per channel
280             v_expand(v_inB, v_sB1, v_sB2);
281             v_expand(v_inG, v_sG1, v_sG2);
282             v_expand(v_inR, v_sR1, v_sR2);
283 
284             // Multiply by gains
285             v_sB1 = v_mul_wrap(v_sB1, v_gainB) >> 8;
286             v_sB2 = v_mul_wrap(v_sB2, v_gainB) >> 8;
287             v_sG1 = v_mul_wrap(v_sG1, v_gainG) >> 8;
288             v_sG2 = v_mul_wrap(v_sG2, v_gainG) >> 8;
289             v_sR1 = v_mul_wrap(v_sR1, v_gainR) >> 8;
290             v_sR2 = v_mul_wrap(v_sR2, v_gainR) >> 8;
291 
292             // Pack into vectors of v_uint8x16
293             v_store_interleave(&dst_data[i], v_pack(v_sB1, v_sB2), v_pack(v_sG1, v_sG2), v_pack(v_sR1, v_sR2));
294         }
295 #endif
296         for (; i < N3; i += 3)
297         {
298             dst_data[i] = (uchar)((src_data[i] * i_gainB) >> 8);
299             dst_data[i + 1] = (uchar)((src_data[i + 1] * i_gainG) >> 8);
300             dst_data[i + 2] = (uchar)((src_data[i + 2] * i_gainR) >> 8);
301         }
302     }
303     else if (src.type() == CV_16UC3)
304     {
305         // Fixed point arithmetic, mul by 2^16 then shift back 16 bits
306         int i_gainB = cvRound(gainB * (1 << 16)), i_gainG = cvRound(gainG * (1 << 16)),
307             i_gainR = cvRound(gainR * (1 << 16));
308         const ushort *src_data = src.ptr<ushort>();
309         ushort *dst_data = dst.ptr<ushort>();
310 #if CV_SIMD128
311         v_uint16x8 v_inB, v_inG, v_inR;
312         v_uint16x8 v_outB, v_outG, v_outR;
313         v_uint32x4 v_sB1, v_sB2, v_sG1, v_sG2, v_sR1, v_sR2;
314         v_uint32x4 v_gainB = v_setall_u32((uint)i_gainB), v_gainG = v_setall_u32((uint)i_gainG),
315                    v_gainR = v_setall_u32((uint)i_gainR);
316 
317         for (; i < N3 - 23; i += 24)
318         {
319             // Load 3x uint16x8 and deinterleave into vectors of each channel
320             v_load_deinterleave(&src_data[i], v_inB, v_inG, v_inR);
321 
322             // Split into two uint vectors per channel
323             v_expand(v_inB, v_sB1, v_sB2);
324             v_expand(v_inG, v_sG1, v_sG2);
325             v_expand(v_inR, v_sR1, v_sR2);
326 
327             // Multiply by scaling factors
328             v_sB1 = (v_sB1 * v_gainB) >> 16;
329             v_sB2 = (v_sB2 * v_gainB) >> 16;
330             v_sG1 = (v_sG1 * v_gainG) >> 16;
331             v_sG2 = (v_sG2 * v_gainG) >> 16;
332             v_sR1 = (v_sR1 * v_gainR) >> 16;
333             v_sR2 = (v_sR2 * v_gainR) >> 16;
334 
335             // Pack into vectors of v_uint16x8
336             v_store_interleave(&dst_data[i], v_pack(v_sB1, v_sB2), v_pack(v_sG1, v_sG2), v_pack(v_sR1, v_sR2));
337         }
338 #endif
339         for (; i < N3; i += 3)
340         {
341             dst_data[i] = (ushort)((src_data[i] * i_gainB) >> 16);
342             dst_data[i + 1] = (ushort)((src_data[i + 1] * i_gainG) >> 16);
343             dst_data[i + 2] = (ushort)((src_data[i + 2] * i_gainR) >> 16);
344         }
345     }
346 }
347 
createGrayworldWB()348 Ptr<GrayworldWB> createGrayworldWB() { return makePtr<GrayworldWBImpl>(); }
349 }
350 }
351