1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html
4 
5 #include "precomp.hpp"
6 #include "opencv2/core/hal/intrin.hpp"
7 
8 namespace cv {
9 namespace hal {
10 CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
11 // forward declarations
12 
13 void cvtBGRtoHSV(const uchar * src_data, size_t src_step,
14                  uchar * dst_data, size_t dst_step,
15                  int width, int height,
16                  int depth, int scn, bool swapBlue, bool isFullRange, bool isHSV);
17 void cvtHSVtoBGR(const uchar * src_data, size_t src_step,
18                  uchar * dst_data, size_t dst_step,
19                  int width, int height,
20                  int depth, int dcn, bool swapBlue, bool isFullRange, bool isHSV);
21 
22 #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
23 
24 #if defined(CV_CPU_BASELINE_MODE)
25 // included in color.hpp
26 #else
27 #include "color.simd_helpers.hpp"
28 #endif
29 
30 namespace {
31 ////////////////////////////////////// RGB <-> HSV ///////////////////////////////////////
32 
33 
34 struct RGB2HSV_b
35 {
36     typedef uchar channel_type;
37 
RGB2HSV_bcv::hal::__anon673258420111::RGB2HSV_b38     RGB2HSV_b(int _srccn, int _blueIdx, int _hrange)
39     : srccn(_srccn), blueIdx(_blueIdx), hrange(_hrange)
40     {
41         CV_Assert( hrange == 180 || hrange == 256 );
42     }
43 
operator ()cv::hal::__anon673258420111::RGB2HSV_b44     void operator()(const uchar* src, uchar* dst, int n) const
45     {
46         CV_INSTRUMENT_REGION();
47 
48         int i, bidx = blueIdx, scn = srccn;
49         const int hsv_shift = 12;
50 
51         static int sdiv_table[256];
52         static int hdiv_table180[256];
53         static int hdiv_table256[256];
54         static volatile bool initialized = false;
55 
56         int hr = hrange;
57         const int* hdiv_table = hr == 180 ? hdiv_table180 : hdiv_table256;
58 
59         if( !initialized )
60         {
61             sdiv_table[0] = hdiv_table180[0] = hdiv_table256[0] = 0;
62             for( i = 1; i < 256; i++ )
63             {
64                 sdiv_table[i] = saturate_cast<int>((255 << hsv_shift)/(1.*i));
65                 hdiv_table180[i] = saturate_cast<int>((180 << hsv_shift)/(6.*i));
66                 hdiv_table256[i] = saturate_cast<int>((256 << hsv_shift)/(6.*i));
67             }
68             initialized = true;
69         }
70 
71         i = 0;
72 
73 #if CV_SIMD
74         const int vsize = v_uint8::nlanes;
75         for ( ; i <= n - vsize;
76               i += vsize, src += scn*vsize, dst += 3*vsize)
77         {
78             v_uint8 b, g, r;
79             if(scn == 4)
80             {
81                 v_uint8 a;
82                 v_load_deinterleave(src, b, g, r, a);
83             }
84             else
85             {
86                 v_load_deinterleave(src, b, g, r);
87             }
88 
89             if(bidx)
90                 swap(b, r);
91 
92             v_uint8 h, s, v;
93             v_uint8 vmin;
94             v = v_max(b, v_max(g, r));
95             vmin = v_min(b, v_min(g, r));
96 
97             v_uint8 diff, vr, vg;
98             diff = v - vmin;
99             v_uint8 v255 = vx_setall_u8(0xff), vz = vx_setzero_u8();
100             vr = v_select(v == r, v255, vz);
101             vg = v_select(v == g, v255, vz);
102 
103             // sdiv = sdiv_table[v]
104             v_int32 sdiv[4];
105             v_uint16 vd[2];
106             v_expand(v, vd[0], vd[1]);
107             v_int32 vq[4];
108             v_expand(v_reinterpret_as_s16(vd[0]), vq[0], vq[1]);
109             v_expand(v_reinterpret_as_s16(vd[1]), vq[2], vq[3]);
110             {
111                 int32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) storevq[vsize];
112                 for (int k = 0; k < 4; k++)
113                 {
114                     v_store_aligned(storevq + k*vsize/4, vq[k]);
115                 }
116 
117                 for(int k = 0; k < 4; k++)
118                 {
119                     sdiv[k] = vx_lut(sdiv_table, storevq + k*vsize/4);
120                 }
121             }
122 
123             // hdiv = hdiv_table[diff]
124             v_int32 hdiv[4];
125             v_uint16 diffd[2];
126             v_expand(diff, diffd[0], diffd[1]);
127             v_int32 diffq[4];
128             v_expand(v_reinterpret_as_s16(diffd[0]), diffq[0], diffq[1]);
129             v_expand(v_reinterpret_as_s16(diffd[1]), diffq[2], diffq[3]);
130             {
131                 int32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) storediffq[vsize];
132                 for (int k = 0; k < 4; k++)
133                 {
134                     v_store_aligned(storediffq + k*vsize/4, diffq[k]);
135                 }
136 
137                 for (int k = 0; k < 4; k++)
138                 {
139                     hdiv[k] = vx_lut((int32_t*)hdiv_table, storediffq + k*vsize/4);
140                 }
141             }
142 
143             // s = (diff * sdiv + (1 << (hsv_shift-1))) >> hsv_shift;
144             v_int32 sq[4];
145             v_int32 vdescale = vx_setall_s32(1 << (hsv_shift-1));
146             for (int k = 0; k < 4; k++)
147             {
148                 sq[k] = (diffq[k]*sdiv[k] + vdescale) >> hsv_shift;
149             }
150             v_int16 sd[2];
151             sd[0] = v_pack(sq[0], sq[1]);
152             sd[1] = v_pack(sq[2], sq[3]);
153             s = v_pack_u(sd[0], sd[1]);
154 
155             // expand all to 16 bits
156             v_uint16 bdu[2], gdu[2], rdu[2];
157             v_expand(b, bdu[0], bdu[1]);
158             v_expand(g, gdu[0], gdu[1]);
159             v_expand(r, rdu[0], rdu[1]);
160             v_int16 bd[2], gd[2], rd[2];
161             bd[0] = v_reinterpret_as_s16(bdu[0]);
162             bd[1] = v_reinterpret_as_s16(bdu[1]);
163             gd[0] = v_reinterpret_as_s16(gdu[0]);
164             gd[1] = v_reinterpret_as_s16(gdu[1]);
165             rd[0] = v_reinterpret_as_s16(rdu[0]);
166             rd[1] = v_reinterpret_as_s16(rdu[1]);
167 
168             v_int16 vrd[2], vgd[2];
169             v_expand(v_reinterpret_as_s8(vr), vrd[0], vrd[1]);
170             v_expand(v_reinterpret_as_s8(vg), vgd[0], vgd[1]);
171             v_int16 diffsd[2];
172             diffsd[0] = v_reinterpret_as_s16(diffd[0]);
173             diffsd[1] = v_reinterpret_as_s16(diffd[1]);
174 
175             v_int16 hd[2];
176             // h before division
177             for (int k = 0; k < 2; k++)
178             {
179                 v_int16 gb = gd[k] - bd[k];
180                 v_int16 br = bd[k] - rd[k] + (diffsd[k] << 1);
181                 v_int16 rg = rd[k] - gd[k] + (diffsd[k] << 2);
182                 hd[k] = (vrd[k] & gb) + ((~vrd[k]) & ((vgd[k] & br) + ((~vgd[k]) & rg)));
183             }
184 
185             // h div and fix
186             v_int32 hq[4];
187             v_expand(hd[0], hq[0], hq[1]);
188             v_expand(hd[1], hq[2], hq[3]);
189             for(int k = 0; k < 4; k++)
190             {
191                 hq[k] = (hq[k]*hdiv[k] + vdescale) >> hsv_shift;
192             }
193             hd[0] = v_pack(hq[0], hq[1]);
194             hd[1] = v_pack(hq[2], hq[3]);
195             v_int16 vhr = vx_setall_s16((short)hr);
196             v_int16 vzd = vx_setzero_s16();
197             hd[0] += v_select(hd[0] < vzd, vhr, vzd);
198             hd[1] += v_select(hd[1] < vzd, vhr, vzd);
199             h = v_pack_u(hd[0], hd[1]);
200 
201             v_store_interleave(dst, h, s, v);
202         }
203 #endif
204 
205         for( ; i < n; i++, src += scn, dst += 3 )
206         {
207             int b = src[bidx], g = src[1], r = src[bidx^2];
208             int h, s, v = b;
209             int vmin = b;
210             int vr, vg;
211 
212             CV_CALC_MAX_8U( v, g );
213             CV_CALC_MAX_8U( v, r );
214             CV_CALC_MIN_8U( vmin, g );
215             CV_CALC_MIN_8U( vmin, r );
216 
217             uchar diff = saturate_cast<uchar>(v - vmin);
218             vr = v == r ? -1 : 0;
219             vg = v == g ? -1 : 0;
220 
221             s = (diff * sdiv_table[v] + (1 << (hsv_shift-1))) >> hsv_shift;
222             h = (vr & (g - b)) +
223                 (~vr & ((vg & (b - r + 2 * diff)) + ((~vg) & (r - g + 4 * diff))));
224             h = (h * hdiv_table[diff] + (1 << (hsv_shift-1))) >> hsv_shift;
225             h += h < 0 ? hr : 0;
226 
227             dst[0] = saturate_cast<uchar>(h);
228             dst[1] = (uchar)s;
229             dst[2] = (uchar)v;
230         }
231     }
232 
233     int srccn, blueIdx, hrange;
234 };
235 
236 
237 struct RGB2HSV_f
238 {
239     typedef float channel_type;
240 
RGB2HSV_fcv::hal::__anon673258420111::RGB2HSV_f241     RGB2HSV_f(int _srccn, int _blueIdx, float _hrange)
242     : srccn(_srccn), blueIdx(_blueIdx), hrange(_hrange)
243     { }
244 
245     #if CV_SIMD
processcv::hal::__anon673258420111::RGB2HSV_f246     inline void process(const v_float32& v_r, const v_float32& v_g, const v_float32& v_b,
247                         v_float32& v_h, v_float32& v_s, v_float32& v_v,
248                         float hscale) const
249     {
250         v_float32 v_min_rgb = v_min(v_min(v_r, v_g), v_b);
251         v_float32 v_max_rgb = v_max(v_max(v_r, v_g), v_b);
252 
253         v_float32 v_eps = vx_setall_f32(FLT_EPSILON);
254         v_float32 v_diff = v_max_rgb - v_min_rgb;
255         v_s = v_diff / (v_abs(v_max_rgb) + v_eps);
256 
257         v_float32 v_r_eq_max = v_r == v_max_rgb;
258         v_float32 v_g_eq_max = v_g == v_max_rgb;
259         v_h = v_select(v_r_eq_max, v_g - v_b,
260               v_select(v_g_eq_max, v_b - v_r, v_r - v_g));
261         v_float32 v_res = v_select(v_r_eq_max, (v_g < v_b) & vx_setall_f32(360.0f),
262                           v_select(v_g_eq_max, vx_setall_f32(120.0f), vx_setall_f32(240.0f)));
263         v_float32 v_rev_diff = vx_setall_f32(60.0f) / (v_diff + v_eps);
264         v_h = v_muladd(v_h, v_rev_diff, v_res) * vx_setall_f32(hscale);
265 
266         v_v = v_max_rgb;
267     }
268     #endif
269 
operator ()cv::hal::__anon673258420111::RGB2HSV_f270     void operator()(const float* src, float* dst, int n) const
271     {
272         CV_INSTRUMENT_REGION();
273 
274         int i = 0, bidx = blueIdx, scn = srccn;
275         float hscale = hrange*(1.f/360.f);
276         n *= 3;
277 
278 #if CV_SIMD
279         const int vsize = v_float32::nlanes;
280         for ( ; i <= n - 3*vsize; i += 3*vsize, src += scn * vsize)
281         {
282             v_float32 r, g, b, a;
283             if(scn == 4)
284             {
285                 v_load_deinterleave(src, r, g, b, a);
286             }
287             else // scn == 3
288             {
289                 v_load_deinterleave(src, r, g, b);
290             }
291 
292             if(bidx)
293                 swap(b, r);
294 
295             v_float32 h, s, v;
296             process(b, g, r, h, s, v, hscale);
297 
298             v_store_interleave(dst + i, h, s, v);
299         }
300 #endif
301 
302         for( ; i < n; i += 3, src += scn )
303         {
304             float b = src[bidx], g = src[1], r = src[bidx^2];
305             float h, s, v;
306 
307             float vmin, diff;
308 
309             v = vmin = r;
310             if( v < g ) v = g;
311             if( v < b ) v = b;
312             if( vmin > g ) vmin = g;
313             if( vmin > b ) vmin = b;
314 
315             diff = v - vmin;
316             s = diff/(float)(fabs(v) + FLT_EPSILON);
317             diff = (float)(60./(diff + FLT_EPSILON));
318             if( v == r )
319                 h = (g - b)*diff;
320             else if( v == g )
321                 h = (b - r)*diff + 120.f;
322             else
323                 h = (r - g)*diff + 240.f;
324 
325             if( h < 0 ) h += 360.f;
326 
327             dst[i] = h*hscale;
328             dst[i+1] = s;
329             dst[i+2] = v;
330         }
331     }
332 
333     int srccn, blueIdx;
334     float hrange;
335 };
336 
337 
338 #if CV_SIMD
HSV2RGB_simd(const v_float32 & h,const v_float32 & s,const v_float32 & v,v_float32 & b,v_float32 & g,v_float32 & r,float hscale)339 inline void HSV2RGB_simd(const v_float32& h, const v_float32& s, const v_float32& v,
340                          v_float32& b, v_float32& g, v_float32& r, float hscale)
341 {
342     v_float32 v_h = h;
343     v_float32 v_s = s;
344     v_float32 v_v = v;
345 
346     v_h = v_h * vx_setall_f32(hscale);
347 
348     v_float32 v_pre_sector = v_cvt_f32(v_trunc(v_h));
349     v_h = v_h - v_pre_sector;
350     v_float32 v_tab0 = v_v;
351     v_float32 v_one = vx_setall_f32(1.0f);
352     v_float32 v_tab1 = v_v * (v_one - v_s);
353     v_float32 v_tab2 = v_v * (v_one - (v_s * v_h));
354     v_float32 v_tab3 = v_v * (v_one - (v_s * (v_one - v_h)));
355 
356     v_float32 v_one_sixth = vx_setall_f32(1.0f / 6.0f);
357     v_float32 v_sector = v_pre_sector * v_one_sixth;
358     v_sector = v_cvt_f32(v_trunc(v_sector));
359     v_float32 v_six = vx_setall_f32(6.0f);
360     v_sector = v_pre_sector - (v_sector * v_six);
361 
362     v_float32 v_two = vx_setall_f32(2.0f);
363     v_h = v_tab1 & (v_sector < v_two);
364     v_h = v_h | (v_tab3 & (v_sector == v_two));
365     v_float32 v_three = vx_setall_f32(3.0f);
366     v_h = v_h | (v_tab0 & (v_sector == v_three));
367     v_float32 v_four = vx_setall_f32(4.0f);
368     v_h = v_h | (v_tab0 & (v_sector == v_four));
369     v_h = v_h | (v_tab2 & (v_sector > v_four));
370 
371     v_s = v_tab3 & (v_sector < v_one);
372     v_s = v_s | (v_tab0 & (v_sector == v_one));
373     v_s = v_s | (v_tab0 & (v_sector == v_two));
374     v_s = v_s | (v_tab2 & (v_sector == v_three));
375     v_s = v_s | (v_tab1 & (v_sector > v_three));
376 
377     v_v = v_tab0 & (v_sector < v_one);
378     v_v = v_v | (v_tab2 & (v_sector == v_one));
379     v_v = v_v | (v_tab1 & (v_sector == v_two));
380     v_v = v_v | (v_tab1 & (v_sector == v_three));
381     v_v = v_v | (v_tab3 & (v_sector == v_four));
382     v_v = v_v | (v_tab0 & (v_sector > v_four));
383 
384     b = v_h;
385     g = v_s;
386     r = v_v;
387 }
388 #endif
389 
390 
HSV2RGB_native(float h,float s,float v,float & b,float & g,float & r,const float hscale)391 inline void HSV2RGB_native(float h, float s, float v,
392                            float& b, float& g, float& r,
393                            const float hscale)
394 {
395     if( s == 0 )
396         b = g = r = v;
397     else
398     {
399         static const int sector_data[][3]=
400             {{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
401         float tab[4];
402         int sector;
403         h *= hscale;
404         h = fmod(h, 6.f);
405         sector = cvFloor(h);
406         h -= sector;
407         if( (unsigned)sector >= 6u )
408         {
409             sector = 0;
410             h = 0.f;
411         }
412 
413         tab[0] = v;
414         tab[1] = v*(1.f - s);
415         tab[2] = v*(1.f - s*h);
416         tab[3] = v*(1.f - s*(1.f - h));
417 
418         b = tab[sector_data[sector][0]];
419         g = tab[sector_data[sector][1]];
420         r = tab[sector_data[sector][2]];
421     }
422 }
423 
424 
425 struct HSV2RGB_f
426 {
427     typedef float channel_type;
428 
HSV2RGB_fcv::hal::__anon673258420111::HSV2RGB_f429     HSV2RGB_f(int _dstcn, int _blueIdx, float _hrange)
430     : dstcn(_dstcn), blueIdx(_blueIdx), hscale(6.f/_hrange)
431     { }
432 
operator ()cv::hal::__anon673258420111::HSV2RGB_f433     void operator()(const float* src, float* dst, int n) const
434     {
435         CV_INSTRUMENT_REGION();
436 
437         int i = 0, bidx = blueIdx, dcn = dstcn;
438         float alpha = ColorChannel<float>::max();
439         float hs = hscale;
440         n *= 3;
441 
442 #if CV_SIMD
443         const int vsize = v_float32::nlanes;
444         v_float32 valpha = vx_setall_f32(alpha);
445         for (; i <= n - vsize*3; i += vsize*3, dst += dcn * vsize)
446         {
447             v_float32 h, s, v, b, g, r;
448             v_load_deinterleave(src + i, h, s, v);
449 
450             HSV2RGB_simd(h, s, v, b, g, r, hs);
451 
452             if(bidx)
453                 swap(b, r);
454 
455             if(dcn == 4)
456             {
457                 v_store_interleave(dst, b, g, r, valpha);
458             }
459             else // dcn == 3
460             {
461                 v_store_interleave(dst, b, g, r);
462             }
463         }
464 #endif
465         for( ; i < n; i += 3, dst += dcn )
466         {
467             float h = src[i + 0], s = src[i + 1], v = src[i + 2];
468             float b, g, r;
469             HSV2RGB_native(h, s, v, b, g, r, hs);
470 
471             dst[bidx] = b;
472             dst[1] = g;
473             dst[bidx^2] = r;
474             if(dcn == 4)
475                 dst[3] = alpha;
476         }
477     }
478 
479     int dstcn, blueIdx;
480     float hscale;
481 };
482 
483 
484 struct HSV2RGB_b
485 {
486     typedef uchar channel_type;
487 
HSV2RGB_bcv::hal::__anon673258420111::HSV2RGB_b488     HSV2RGB_b(int _dstcn, int _blueIdx, int _hrange)
489     : dstcn(_dstcn), blueIdx(_blueIdx), hscale(6.0f / _hrange)
490     { }
491 
operator ()cv::hal::__anon673258420111::HSV2RGB_b492     void operator()(const uchar* src, uchar* dst, int n) const
493     {
494         CV_INSTRUMENT_REGION();
495 
496         int j = 0, dcn = dstcn;
497         uchar alpha = ColorChannel<uchar>::max();
498 
499 #if CV_SIMD
500         const int vsize = v_float32::nlanes;
501 
502         for (j = 0; j <= (n - vsize*4) * 3; j += 3 * 4 * vsize, dst += dcn * 4 * vsize)
503         {
504             v_uint8 h_b, s_b, v_b;
505             v_uint16 h_w[2], s_w[2], v_w[2];
506             v_uint32 h_u[4], s_u[4], v_u[4];
507             v_load_deinterleave(src + j, h_b, s_b, v_b);
508             v_expand(h_b, h_w[0], h_w[1]);
509             v_expand(s_b, s_w[0], s_w[1]);
510             v_expand(v_b, v_w[0], v_w[1]);
511             v_expand(h_w[0], h_u[0], h_u[1]);
512             v_expand(h_w[1], h_u[2], h_u[3]);
513             v_expand(s_w[0], s_u[0], s_u[1]);
514             v_expand(s_w[1], s_u[2], s_u[3]);
515             v_expand(v_w[0], v_u[0], v_u[1]);
516             v_expand(v_w[1], v_u[2], v_u[3]);
517 
518             v_int32 b_i[4], g_i[4], r_i[4];
519             v_float32 v_coeff0 = vx_setall_f32(1.0f / 255.0f);
520             v_float32 v_coeff1 = vx_setall_f32(255.0f);
521 
522             for( int k = 0; k < 4; k++ )
523             {
524                 v_float32 h = v_cvt_f32(v_reinterpret_as_s32(h_u[k]));
525                 v_float32 s = v_cvt_f32(v_reinterpret_as_s32(s_u[k]));
526                 v_float32 v = v_cvt_f32(v_reinterpret_as_s32(v_u[k]));
527 
528                 s *= v_coeff0;
529                 v *= v_coeff0;
530                 v_float32 b, g, r;
531                 HSV2RGB_simd(h, s, v, b, g, r, hscale);
532 
533                 b *= v_coeff1;
534                 g *= v_coeff1;
535                 r *= v_coeff1;
536                 b_i[k] = v_trunc(b);
537                 g_i[k] = v_trunc(g);
538                 r_i[k] = v_trunc(r);
539             }
540 
541             v_uint16 r_w[2], g_w[2], b_w[2];
542             v_uint8 r_b, g_b, b_b;
543 
544             r_w[0] = v_pack_u(r_i[0], r_i[1]);
545             r_w[1] = v_pack_u(r_i[2], r_i[3]);
546             r_b = v_pack(r_w[0], r_w[1]);
547             g_w[0] = v_pack_u(g_i[0], g_i[1]);
548             g_w[1] = v_pack_u(g_i[2], g_i[3]);
549             g_b = v_pack(g_w[0], g_w[1]);
550             b_w[0] = v_pack_u(b_i[0], b_i[1]);
551             b_w[1] = v_pack_u(b_i[2], b_i[3]);
552             b_b = v_pack(b_w[0], b_w[1]);
553 
554             if( dcn == 3 )
555             {
556                 if( blueIdx == 0 )
557                     v_store_interleave(dst, b_b, g_b, r_b);
558                 else
559                     v_store_interleave(dst, r_b, g_b, b_b);
560             }
561             else
562             {
563                 v_uint8 alpha_b = vx_setall_u8(alpha);
564                 if( blueIdx == 0 )
565                     v_store_interleave(dst, b_b, g_b, r_b, alpha_b);
566                 else
567                     v_store_interleave(dst, r_b, g_b, b_b, alpha_b);
568             }
569         }
570 #endif
571 
572         for( ; j < n * 3; j += 3, dst += dcn )
573         {
574             float h, s, v, b, g, r;
575             h = src[j];
576             s = src[j+1] * (1.0f / 255.0f);
577             v = src[j+2] * (1.0f / 255.0f);
578             HSV2RGB_native(h, s, v, b, g, r, hscale);
579 
580             dst[blueIdx]   = saturate_cast<uchar>(b * 255.0f);
581             dst[1]         = saturate_cast<uchar>(g * 255.0f);
582             dst[blueIdx^2] = saturate_cast<uchar>(r * 255.0f);
583 
584             if( dcn == 4 )
585                 dst[3] = alpha;
586         }
587     }
588 
589     int dstcn;
590     int blueIdx;
591     float hscale;
592 };
593 
594 
595 ///////////////////////////////////// RGB <-> HLS ////////////////////////////////////////
596 
597 struct RGB2HLS_f
598 {
599     typedef float channel_type;
600 
RGB2HLS_fcv::hal::__anon673258420111::RGB2HLS_f601     RGB2HLS_f(int _srccn, int _blueIdx, float _hrange)
602     : srccn(_srccn), blueIdx(_blueIdx), hscale(_hrange/360.f)
603     {
604     }
605 
606 #if CV_SIMD
processcv::hal::__anon673258420111::RGB2HLS_f607     inline void process(const v_float32& r, const v_float32& g, const v_float32& b,
608                         const v_float32& vhscale,
609                         v_float32& h, v_float32& l, v_float32& s) const
610     {
611         v_float32 maxRgb = v_max(v_max(r, g), b);
612         v_float32 minRgb = v_min(v_min(r, g), b);
613 
614         v_float32 diff = maxRgb - minRgb;
615         v_float32 msum = maxRgb + minRgb;
616         v_float32 vhalf = vx_setall_f32(0.5f);
617         l = msum * vhalf;
618 
619         s = diff / v_select(l < vhalf, msum, vx_setall_f32(2.0f) - msum);
620 
621         v_float32 rMaxMask = maxRgb == r;
622         v_float32 gMaxMask = maxRgb == g;
623 
624         h = v_select(rMaxMask, g - b, v_select(gMaxMask, b - r, r - g));
625         v_float32 hpart = v_select(rMaxMask, (g < b) & vx_setall_f32(360.0f),
626                           v_select(gMaxMask, vx_setall_f32(120.0f), vx_setall_f32(240.0f)));
627 
628         v_float32 invDiff = vx_setall_f32(60.0f) / diff;
629         h = v_muladd(h, invDiff, hpart) * vhscale;
630 
631         v_float32 diffEpsMask = diff > vx_setall_f32(FLT_EPSILON);
632 
633         h = diffEpsMask & h;
634         // l = l;
635         s = diffEpsMask & s;
636     }
637 #endif
638 
operator ()cv::hal::__anon673258420111::RGB2HLS_f639     void operator()(const float* src, float* dst, int n) const
640     {
641         CV_INSTRUMENT_REGION();
642 
643         int i = 0, bidx = blueIdx, scn = srccn;
644 
645 #if CV_SIMD
646         const int vsize = v_float32::nlanes;
647         v_float32 vhscale = vx_setall_f32(hscale);
648 
649         for ( ; i <= n - vsize;
650               i += vsize, src += scn * vsize, dst += 3 * vsize)
651         {
652             v_float32 r, g, b, h, l, s;
653 
654             if(scn == 4)
655             {
656                 v_float32 a;
657                 v_load_deinterleave(src, b, g, r, a);
658             }
659             else // scn == 3
660             {
661                 v_load_deinterleave(src, b, g, r);
662             }
663 
664             if(bidx)
665                 swap(r, b);
666 
667             process(r, g, b, vhscale, h, l, s);
668 
669             v_store_interleave(dst, h, l, s);
670         }
671 #endif
672 
673         for( ; i < n; i++, src += scn, dst += 3 )
674         {
675             float b = src[bidx], g = src[1], r = src[bidx^2];
676             float h = 0.f, s = 0.f, l;
677             float vmin, vmax, diff;
678 
679             vmax = vmin = r;
680             if( vmax < g ) vmax = g;
681             if( vmax < b ) vmax = b;
682             if( vmin > g ) vmin = g;
683             if( vmin > b ) vmin = b;
684 
685             diff = vmax - vmin;
686             l = (vmax + vmin)*0.5f;
687 
688             if( diff > FLT_EPSILON )
689             {
690                 s = l < 0.5f ? diff/(vmax + vmin) : diff/(2 - vmax - vmin);
691                 diff = 60.f/diff;
692 
693                 if( vmax == r )
694                     h = (g - b)*diff;
695                 else if( vmax == g )
696                     h = (b - r)*diff + 120.f;
697                 else
698                     h = (r - g)*diff + 240.f;
699 
700                 if( h < 0.f ) h += 360.f;
701             }
702 
703             dst[0] = h*hscale;
704             dst[1] = l;
705             dst[2] = s;
706         }
707     }
708 
709     int srccn, blueIdx;
710     float hscale;
711 };
712 
713 
714 struct RGB2HLS_b
715 {
716     typedef uchar channel_type;
717     static const int bufChannels = 3;
718 
RGB2HLS_bcv::hal::__anon673258420111::RGB2HLS_b719     RGB2HLS_b(int _srccn, int _blueIdx, int _hrange)
720     : srccn(_srccn), cvt(bufChannels, _blueIdx, (float)_hrange)
721     { }
722 
operator ()cv::hal::__anon673258420111::RGB2HLS_b723     void operator()(const uchar* src, uchar* dst, int n) const
724     {
725         CV_INSTRUMENT_REGION();
726 
727         int scn = srccn;
728 
729 #if CV_SIMD
730         float CV_DECL_ALIGNED(CV_SIMD_WIDTH) buf[bufChannels*BLOCK_SIZE];
731 #else
732         float CV_DECL_ALIGNED(16) buf[bufChannels*BLOCK_SIZE];
733 #endif
734 
735 #if CV_SIMD
736         static const int fsize = v_float32::nlanes;
737         //TODO: fix that when v_interleave is available
738         float CV_DECL_ALIGNED(CV_SIMD_WIDTH) interTmpM[fsize*3];
739         v_store_interleave(interTmpM, vx_setall_f32(1.f), vx_setall_f32(255.f), vx_setall_f32(255.f));
740         v_float32 mhls[3];
741         for(int k = 0; k < 3; k++)
742         {
743             mhls[k] = vx_load_aligned(interTmpM + k*fsize);
744         }
745 #endif
746 
747         for(int i = 0; i < n; i += BLOCK_SIZE, dst += BLOCK_SIZE*3 )
748         {
749             int dn = std::min(n - i, (int)BLOCK_SIZE);
750 
751 #if CV_SIMD
752             v_float32 v255inv = vx_setall_f32(1.f/255.f);
753             if (scn == 3)
754             {
755                 int j = 0;
756                 static const int nBlock = fsize*2;
757                 for ( ; j <= (dn * bufChannels - nBlock);
758                       j += nBlock, src += nBlock)
759                 {
760                     v_uint16 drgb = vx_load_expand(src);
761                     v_int32 qrgb0, qrgb1;
762                     v_expand(v_reinterpret_as_s16(drgb), qrgb0, qrgb1);
763                     v_store_aligned(buf + j + 0*fsize, v_cvt_f32(qrgb0)*v255inv);
764                     v_store_aligned(buf + j + 1*fsize, v_cvt_f32(qrgb1)*v255inv);
765                 }
766                 for( ; j < dn*3; j++, src++ )
767                 {
768                     buf[j] = src[0]*(1.f/255.f);
769                 }
770             }
771             else // if (scn == 4)
772             {
773                 int j = 0;
774                 static const int nBlock = fsize*4;
775                 for ( ; j <= dn*bufChannels - nBlock*bufChannels;
776                       j += nBlock*bufChannels, src += nBlock*4)
777                 {
778                     v_uint8 rgb[3], dummy;
779                     v_load_deinterleave(src, rgb[0], rgb[1], rgb[2], dummy);
780 
781                     v_uint16 d[3*2];
782                     for(int k = 0; k < 3; k++)
783                     {
784                         v_expand(rgb[k], d[k*2+0], d[k*2+1]);
785                     }
786                     v_int32 q[3*4];
787                     for(int k = 0; k < 3*2; k++)
788                     {
789                         v_expand(v_reinterpret_as_s16(d[k]), q[k*2+0], q[k*2+1]);
790                     }
791 
792                     v_float32 f[3*4];
793                     for(int k = 0; k < 3*4; k++)
794                     {
795                         f[k] = v_cvt_f32(q[k])*v255inv;
796                     }
797 
798                     for(int k = 0; k < 4; k++)
799                     {
800                         v_store_interleave(buf + j + k*bufChannels*fsize, f[0*4+k], f[1*4+k], f[2*4+k]);
801                     }
802                 }
803                 for( ; j < dn*3; j += 3, src += 4 )
804                 {
805                     buf[j+0] = src[0]*(1.f/255.f);
806                     buf[j+1] = src[1]*(1.f/255.f);
807                     buf[j+2] = src[2]*(1.f/255.f);
808                 }
809             }
810 #else
811             for(int j = 0; j < dn*3; j += 3, src += scn )
812             {
813                 buf[j+0] = src[0]*(1.f/255.f);
814                 buf[j+1] = src[1]*(1.f/255.f);
815                 buf[j+2] = src[2]*(1.f/255.f);
816             }
817 #endif
818             cvt(buf, buf, dn);
819 
820             int j = 0;
821 #if CV_SIMD
822             for( ; j <= dn*3 - fsize*3*4; j += fsize*3*4)
823             {
824                 v_float32 f[3*4];
825                 for(int k = 0; k < 3*4; k++)
826                 {
827                     f[k] = vx_load_aligned(buf + j + k*fsize);
828                 }
829 
830                 for(int k = 0; k < 4; k++)
831                 {
832                     for(int l = 0; l < 3; l++)
833                     {
834                         f[k*3+l] = f[k*3+l] * mhls[l];
835                     }
836                 }
837 
838                 v_int32 q[3*4];
839                 for(int k = 0; k < 3*4; k++)
840                 {
841                     q[k] = v_round(f[k]);
842                 }
843 
844                 for(int k = 0; k < 3; k++)
845                 {
846                     v_store(dst + j + k*fsize*4, v_pack_u(v_pack(q[k*4+0], q[k*4+1]),
847                                                           v_pack(q[k*4+2], q[k*4+3])));
848                 }
849             }
850 #endif
851             for( ; j < dn*3; j += 3 )
852             {
853                 dst[j] = saturate_cast<uchar>(buf[j]);
854                 dst[j+1] = saturate_cast<uchar>(buf[j+1]*255.f);
855                 dst[j+2] = saturate_cast<uchar>(buf[j+2]*255.f);
856             }
857         }
858     }
859 
860     int srccn;
861     RGB2HLS_f cvt;
862 };
863 
864 
865 struct HLS2RGB_f
866 {
867     typedef float channel_type;
868 
HLS2RGB_fcv::hal::__anon673258420111::HLS2RGB_f869     HLS2RGB_f(int _dstcn, int _blueIdx, float _hrange)
870     : dstcn(_dstcn), blueIdx(_blueIdx), hscale(6.f/_hrange)
871     { }
872 
873 #if CV_SIMD
processcv::hal::__anon673258420111::HLS2RGB_f874     inline void process(const v_float32& h, const v_float32& l, const v_float32& s,
875                         v_float32& b, v_float32& g, v_float32& r) const
876     {
877         v_float32 v1 = vx_setall_f32(1.0f), v2 = vx_setall_f32(2.0f), v4 = vx_setall_f32(4.0f);
878 
879         v_float32 lBelowHalfMask = l <= vx_setall_f32(0.5f);
880         v_float32 ls = l * s;
881         v_float32 elem0 = v_select(lBelowHalfMask, ls, s - ls);
882 
883         v_float32 hsRaw = h * vx_setall_f32(hscale);
884         v_float32 preHs = v_cvt_f32(v_trunc(hsRaw));
885         v_float32 hs = hsRaw - preHs;
886         v_float32 sector = preHs - vx_setall_f32(6.0f) * v_cvt_f32(v_trunc(hsRaw * vx_setall_f32(1.0f / 6.0f)));
887         v_float32 elem1 = hs + hs;
888 
889         v_float32 tab0 = l + elem0;
890         v_float32 tab1 = l - elem0;
891         v_float32 tab2 = l + elem0 - elem0 * elem1;
892         v_float32 tab3 = l - elem0 + elem0 * elem1;
893 
894         b = v_select(sector <  v2, tab1,
895             v_select(sector <= v2, tab3,
896             v_select(sector <= v4, tab0, tab2)));
897 
898         g = v_select(sector <  v1, tab3,
899             v_select(sector <= v2, tab0,
900             v_select(sector <  v4, tab2, tab1)));
901 
902         r = v_select(sector <  v1, tab0,
903             v_select(sector <  v2, tab2,
904             v_select(sector <  v4, tab1,
905             v_select(sector <= v4, tab3, tab0))));
906     }
907 #endif
908 
operator ()cv::hal::__anon673258420111::HLS2RGB_f909     void operator()(const float* src, float* dst, int n) const
910     {
911         CV_INSTRUMENT_REGION();
912 
913         int i = 0, bidx = blueIdx, dcn = dstcn;
914         float alpha = ColorChannel<float>::max();
915 
916 #if CV_SIMD
917         static const int vsize = v_float32::nlanes;
918         for (; i <= n - vsize; i += vsize, src += 3*vsize, dst += dcn*vsize)
919         {
920             v_float32 h, l, s, r, g, b;
921             v_load_deinterleave(src, h, l, s);
922 
923             process(h, l, s, b, g, r);
924 
925             if(bidx)
926                 swap(b, r);
927 
928             if(dcn == 3)
929             {
930                 v_store_interleave(dst, b, g, r);
931             }
932             else
933             {
934                 v_float32 a = vx_setall_f32(alpha);
935                 v_store_interleave(dst, b, g, r, a);
936             }
937         }
938 #endif
939 
940         for( ; i < n; i++, src += 3, dst += dcn )
941         {
942             float h = src[0], l = src[1], s = src[2];
943             float b, g, r;
944 
945             if( s == 0 )
946                 b = g = r = l;
947             else
948             {
949                 static const int sector_data[][3]=
950                 {{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
951                 float tab[4];
952                 int sector;
953 
954                 float p2 = l <= 0.5f ? l*(1 + s) : l + s - l*s;
955                 float p1 = 2*l - p2;
956 
957                 h *= hscale;
958                 if( h < 0 )
959                     do h += 6; while( h < 0 );
960                 else if( h >= 6 )
961                     do h -= 6; while( h >= 6 );
962 
963                 assert( 0 <= h && h < 6 );
964                 sector = cvFloor(h);
965                 h -= sector;
966 
967                 tab[0] = p2;
968                 tab[1] = p1;
969                 tab[2] = p1 + (p2 - p1)*(1-h);
970                 tab[3] = p1 + (p2 - p1)*h;
971 
972                 b = tab[sector_data[sector][0]];
973                 g = tab[sector_data[sector][1]];
974                 r = tab[sector_data[sector][2]];
975             }
976 
977             dst[bidx] = b;
978             dst[1] = g;
979             dst[bidx^2] = r;
980             if( dcn == 4 )
981                 dst[3] = alpha;
982         }
983     }
984 
985     int dstcn, blueIdx;
986     float hscale;
987 };
988 
989 
990 struct HLS2RGB_b
991 {
992     typedef uchar channel_type;
993     static const int bufChannels = 3;
994 
HLS2RGB_bcv::hal::__anon673258420111::HLS2RGB_b995     HLS2RGB_b(int _dstcn, int _blueIdx, int _hrange)
996     : dstcn(_dstcn), cvt(bufChannels, _blueIdx, (float)_hrange)
997     { }
998 
operator ()cv::hal::__anon673258420111::HLS2RGB_b999     void operator()(const uchar* src, uchar* dst, int n) const
1000     {
1001         CV_INSTRUMENT_REGION();
1002 
1003         int i, j, dcn = dstcn;
1004         uchar alpha = ColorChannel<uchar>::max();
1005 
1006 #if CV_SIMD
1007         float CV_DECL_ALIGNED(CV_SIMD_WIDTH) buf[bufChannels*BLOCK_SIZE];
1008 #else
1009         float CV_DECL_ALIGNED(16) buf[bufChannels*BLOCK_SIZE];
1010 #endif
1011 
1012 #if CV_SIMD
1013         static const int fsize = v_float32::nlanes;
1014         //TODO: fix that when v_interleave is available
1015         float CV_DECL_ALIGNED(CV_SIMD_WIDTH) interTmpM[fsize*3];
1016         v_float32 v255inv = vx_setall_f32(1.f/255.f);
1017         v_store_interleave(interTmpM, vx_setall_f32(1.f), v255inv, v255inv);
1018         v_float32 mhls[3];
1019         for(int k = 0; k < 3; k++)
1020         {
1021             mhls[k] = vx_load_aligned(interTmpM + k*fsize);
1022         }
1023 #endif
1024 
1025         for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 )
1026         {
1027             int dn = std::min(n - i, (int)BLOCK_SIZE);
1028             j = 0;
1029 
1030 #if CV_SIMD
1031             for( ; j <= dn*3 - 3*4*fsize; j += 3*4*fsize)
1032             {
1033                 // 3x uchar -> 3*4 float
1034                 v_uint8 u[3];
1035                 for(int k = 0; k < 3; k++)
1036                 {
1037                     u[k] = vx_load(src + j + k*4*fsize);
1038                 }
1039                 v_uint16 d[3*2];
1040                 for(int k = 0; k < 3; k++)
1041                 {
1042                     v_expand(u[k], d[k*2+0], d[k*2+1]);
1043                 }
1044                 v_int32 q[3*4];
1045                 for(int k = 0; k < 3*2; k++)
1046                 {
1047                     v_expand(v_reinterpret_as_s16(d[k]), q[k*2+0], q[k*2+1]);
1048                 }
1049 
1050                 v_float32 f[3*4];
1051                 for(int k = 0; k < 4; k++)
1052                 {
1053                     for(int l = 0; l < 3; l++)
1054                     {
1055                         f[k*3+l] = v_cvt_f32(q[k*3+l])*mhls[l];
1056                     }
1057                 }
1058 
1059                 for (int k = 0; k < 4*3; k++)
1060                 {
1061                     v_store_aligned(buf + j + k*fsize, f[k]);
1062                 }
1063             }
1064 #endif
1065             for( ; j < dn*3; j += 3 )
1066             {
1067                 buf[j] = src[j];
1068                 buf[j+1] = src[j+1]*(1.f/255.f);
1069                 buf[j+2] = src[j+2]*(1.f/255.f);
1070             }
1071             cvt(buf, buf, dn);
1072 
1073 #if CV_SIMD
1074             v_float32 v255 = vx_setall_f32(255.f);
1075             if(dcn == 3)
1076             {
1077                 int x = 0;
1078                 float* pbuf = buf;
1079                 for( ; x <= dn - 4*fsize; x += 4*fsize, dst += 4*fsize, pbuf += 4*fsize)
1080                 {
1081                     v_float32 vf[4];
1082                     vf[0] = vx_load_aligned(pbuf + 0*fsize);
1083                     vf[1] = vx_load_aligned(pbuf + 1*fsize);
1084                     vf[2] = vx_load_aligned(pbuf + 2*fsize);
1085                     vf[3] = vx_load_aligned(pbuf + 3*fsize);
1086                     v_int32 vi[4];
1087                     vi[0] = v_round(vf[0]*v255);
1088                     vi[1] = v_round(vf[1]*v255);
1089                     vi[2] = v_round(vf[2]*v255);
1090                     vi[3] = v_round(vf[3]*v255);
1091                     v_store(dst, v_pack_u(v_pack(vi[0], vi[1]),
1092                                           v_pack(vi[2], vi[3])));
1093                 }
1094                 for( ; x < dn*3; x++, dst++, pbuf++)
1095                 {
1096                     dst[0] = saturate_cast<uchar>(pbuf[0]*255.f);
1097                 }
1098             }
1099             else // dcn == 4
1100             {
1101                 int x = 0;
1102                 float* pbuf = buf;
1103                 for ( ; x <= dn - 4*fsize; x += fsize, dst += 4*fsize, pbuf += bufChannels*fsize)
1104                 {
1105                     v_float32 r[4], g[4], b[4];
1106                     v_int32 ir[4], ig[4], ib[4];
1107                     for(int k = 0; k < 4; k++)
1108                     {
1109                         v_load_deinterleave(pbuf, r[k], g[k], b[k]);
1110                         ir[k] = v_round(r[k]*v255);
1111                         ig[k] = v_round(g[k]*v255);
1112                         ib[k] = v_round(b[k]*v255);
1113                     }
1114                     v_uint8 ur, ug, ub;
1115                     ur = v_pack_u(v_pack(ir[0], ir[1]), v_pack(ir[2], ir[3]));
1116                     ug = v_pack_u(v_pack(ig[0], ig[1]), v_pack(ig[2], ig[3]));
1117                     ub = v_pack_u(v_pack(ib[0], ib[1]), v_pack(ib[2], ib[3]));
1118 
1119                     v_uint8 valpha = vx_setall_u8(alpha);
1120                     v_store_interleave(dst, ur, ug, ub, valpha);
1121                 }
1122 
1123                 for( ; x < dn; x++, dst += dcn, pbuf += bufChannels)
1124                 {
1125                     dst[0] = saturate_cast<uchar>(pbuf[0]*255.f);
1126                     dst[1] = saturate_cast<uchar>(pbuf[1]*255.f);
1127                     dst[2] = saturate_cast<uchar>(pbuf[2]*255.f);
1128                     dst[3] = alpha;
1129                 }
1130             }
1131 #else
1132             for(int x = 0; x < dn*3; x += 3, dst += dcn )
1133             {
1134                 dst[0] = saturate_cast<uchar>(buf[x+0]*255.f);
1135                 dst[1] = saturate_cast<uchar>(buf[x+1]*255.f);
1136                 dst[2] = saturate_cast<uchar>(buf[x+2]*255.f);
1137                 if( dcn == 4 )
1138                     dst[3] = alpha;
1139             }
1140 #endif
1141         }
1142     }
1143 
1144     int dstcn;
1145     HLS2RGB_f cvt;
1146 };
1147 
1148 } // namespace anon
1149 
1150 // 8u, 32f
cvtBGRtoHSV(const uchar * src_data,size_t src_step,uchar * dst_data,size_t dst_step,int width,int height,int depth,int scn,bool swapBlue,bool isFullRange,bool isHSV)1151 void cvtBGRtoHSV(const uchar * src_data, size_t src_step,
1152                  uchar * dst_data, size_t dst_step,
1153                  int width, int height,
1154                  int depth, int scn, bool swapBlue, bool isFullRange, bool isHSV)
1155 {
1156     CV_INSTRUMENT_REGION();
1157 
1158     int hrange = depth == CV_32F ? 360 : isFullRange ? 256 : 180;
1159     int blueIdx = swapBlue ? 2 : 0;
1160     if(isHSV)
1161     {
1162         if(depth == CV_8U)
1163             CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, RGB2HSV_b(scn, blueIdx, hrange));
1164         else
1165             CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, RGB2HSV_f(scn, blueIdx, static_cast<float>(hrange)));
1166     }
1167     else
1168     {
1169         if( depth == CV_8U )
1170             CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, RGB2HLS_b(scn, blueIdx, hrange));
1171         else
1172             CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, RGB2HLS_f(scn, blueIdx, static_cast<float>(hrange)));
1173     }
1174 }
1175 
1176 // 8u, 32f
cvtHSVtoBGR(const uchar * src_data,size_t src_step,uchar * dst_data,size_t dst_step,int width,int height,int depth,int dcn,bool swapBlue,bool isFullRange,bool isHSV)1177 void cvtHSVtoBGR(const uchar * src_data, size_t src_step,
1178                  uchar * dst_data, size_t dst_step,
1179                  int width, int height,
1180                  int depth, int dcn, bool swapBlue, bool isFullRange, bool isHSV)
1181 {
1182     CV_INSTRUMENT_REGION();
1183 
1184     int hrange = depth == CV_32F ? 360 : isFullRange ? 255 : 180;
1185     int blueIdx = swapBlue ? 2 : 0;
1186     if(isHSV)
1187     {
1188         if( depth == CV_8U )
1189             CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, HSV2RGB_b(dcn, blueIdx, hrange));
1190         else
1191             CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, HSV2RGB_f(dcn, blueIdx, static_cast<float>(hrange)));
1192     }
1193     else
1194     {
1195         if( depth == CV_8U )
1196             CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, HLS2RGB_b(dcn, blueIdx, hrange));
1197         else
1198             CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, HLS2RGB_f(dcn, blueIdx, static_cast<float>(hrange)));
1199     }
1200 }
1201 
1202 #endif
1203 CV_CPU_OPTIMIZATION_NAMESPACE_END
1204 }} // namespace
1205