1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html
4
5 #include "precomp.hpp"
6 #include "opencv2/core/hal/intrin.hpp"
7
8 namespace cv {
9 namespace hal {
10 CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
11 // forward declarations
12
13 void cvtBGRtoHSV(const uchar * src_data, size_t src_step,
14 uchar * dst_data, size_t dst_step,
15 int width, int height,
16 int depth, int scn, bool swapBlue, bool isFullRange, bool isHSV);
17 void cvtHSVtoBGR(const uchar * src_data, size_t src_step,
18 uchar * dst_data, size_t dst_step,
19 int width, int height,
20 int depth, int dcn, bool swapBlue, bool isFullRange, bool isHSV);
21
22 #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
23
24 #if defined(CV_CPU_BASELINE_MODE)
25 // included in color.hpp
26 #else
27 #include "color.simd_helpers.hpp"
28 #endif
29
30 namespace {
31 ////////////////////////////////////// RGB <-> HSV ///////////////////////////////////////
32
33
34 struct RGB2HSV_b
35 {
36 typedef uchar channel_type;
37
RGB2HSV_bcv::hal::__anon673258420111::RGB2HSV_b38 RGB2HSV_b(int _srccn, int _blueIdx, int _hrange)
39 : srccn(_srccn), blueIdx(_blueIdx), hrange(_hrange)
40 {
41 CV_Assert( hrange == 180 || hrange == 256 );
42 }
43
operator ()cv::hal::__anon673258420111::RGB2HSV_b44 void operator()(const uchar* src, uchar* dst, int n) const
45 {
46 CV_INSTRUMENT_REGION();
47
48 int i, bidx = blueIdx, scn = srccn;
49 const int hsv_shift = 12;
50
51 static int sdiv_table[256];
52 static int hdiv_table180[256];
53 static int hdiv_table256[256];
54 static volatile bool initialized = false;
55
56 int hr = hrange;
57 const int* hdiv_table = hr == 180 ? hdiv_table180 : hdiv_table256;
58
59 if( !initialized )
60 {
61 sdiv_table[0] = hdiv_table180[0] = hdiv_table256[0] = 0;
62 for( i = 1; i < 256; i++ )
63 {
64 sdiv_table[i] = saturate_cast<int>((255 << hsv_shift)/(1.*i));
65 hdiv_table180[i] = saturate_cast<int>((180 << hsv_shift)/(6.*i));
66 hdiv_table256[i] = saturate_cast<int>((256 << hsv_shift)/(6.*i));
67 }
68 initialized = true;
69 }
70
71 i = 0;
72
73 #if CV_SIMD
74 const int vsize = v_uint8::nlanes;
75 for ( ; i <= n - vsize;
76 i += vsize, src += scn*vsize, dst += 3*vsize)
77 {
78 v_uint8 b, g, r;
79 if(scn == 4)
80 {
81 v_uint8 a;
82 v_load_deinterleave(src, b, g, r, a);
83 }
84 else
85 {
86 v_load_deinterleave(src, b, g, r);
87 }
88
89 if(bidx)
90 swap(b, r);
91
92 v_uint8 h, s, v;
93 v_uint8 vmin;
94 v = v_max(b, v_max(g, r));
95 vmin = v_min(b, v_min(g, r));
96
97 v_uint8 diff, vr, vg;
98 diff = v - vmin;
99 v_uint8 v255 = vx_setall_u8(0xff), vz = vx_setzero_u8();
100 vr = v_select(v == r, v255, vz);
101 vg = v_select(v == g, v255, vz);
102
103 // sdiv = sdiv_table[v]
104 v_int32 sdiv[4];
105 v_uint16 vd[2];
106 v_expand(v, vd[0], vd[1]);
107 v_int32 vq[4];
108 v_expand(v_reinterpret_as_s16(vd[0]), vq[0], vq[1]);
109 v_expand(v_reinterpret_as_s16(vd[1]), vq[2], vq[3]);
110 {
111 int32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) storevq[vsize];
112 for (int k = 0; k < 4; k++)
113 {
114 v_store_aligned(storevq + k*vsize/4, vq[k]);
115 }
116
117 for(int k = 0; k < 4; k++)
118 {
119 sdiv[k] = vx_lut(sdiv_table, storevq + k*vsize/4);
120 }
121 }
122
123 // hdiv = hdiv_table[diff]
124 v_int32 hdiv[4];
125 v_uint16 diffd[2];
126 v_expand(diff, diffd[0], diffd[1]);
127 v_int32 diffq[4];
128 v_expand(v_reinterpret_as_s16(diffd[0]), diffq[0], diffq[1]);
129 v_expand(v_reinterpret_as_s16(diffd[1]), diffq[2], diffq[3]);
130 {
131 int32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) storediffq[vsize];
132 for (int k = 0; k < 4; k++)
133 {
134 v_store_aligned(storediffq + k*vsize/4, diffq[k]);
135 }
136
137 for (int k = 0; k < 4; k++)
138 {
139 hdiv[k] = vx_lut((int32_t*)hdiv_table, storediffq + k*vsize/4);
140 }
141 }
142
143 // s = (diff * sdiv + (1 << (hsv_shift-1))) >> hsv_shift;
144 v_int32 sq[4];
145 v_int32 vdescale = vx_setall_s32(1 << (hsv_shift-1));
146 for (int k = 0; k < 4; k++)
147 {
148 sq[k] = (diffq[k]*sdiv[k] + vdescale) >> hsv_shift;
149 }
150 v_int16 sd[2];
151 sd[0] = v_pack(sq[0], sq[1]);
152 sd[1] = v_pack(sq[2], sq[3]);
153 s = v_pack_u(sd[0], sd[1]);
154
155 // expand all to 16 bits
156 v_uint16 bdu[2], gdu[2], rdu[2];
157 v_expand(b, bdu[0], bdu[1]);
158 v_expand(g, gdu[0], gdu[1]);
159 v_expand(r, rdu[0], rdu[1]);
160 v_int16 bd[2], gd[2], rd[2];
161 bd[0] = v_reinterpret_as_s16(bdu[0]);
162 bd[1] = v_reinterpret_as_s16(bdu[1]);
163 gd[0] = v_reinterpret_as_s16(gdu[0]);
164 gd[1] = v_reinterpret_as_s16(gdu[1]);
165 rd[0] = v_reinterpret_as_s16(rdu[0]);
166 rd[1] = v_reinterpret_as_s16(rdu[1]);
167
168 v_int16 vrd[2], vgd[2];
169 v_expand(v_reinterpret_as_s8(vr), vrd[0], vrd[1]);
170 v_expand(v_reinterpret_as_s8(vg), vgd[0], vgd[1]);
171 v_int16 diffsd[2];
172 diffsd[0] = v_reinterpret_as_s16(diffd[0]);
173 diffsd[1] = v_reinterpret_as_s16(diffd[1]);
174
175 v_int16 hd[2];
176 // h before division
177 for (int k = 0; k < 2; k++)
178 {
179 v_int16 gb = gd[k] - bd[k];
180 v_int16 br = bd[k] - rd[k] + (diffsd[k] << 1);
181 v_int16 rg = rd[k] - gd[k] + (diffsd[k] << 2);
182 hd[k] = (vrd[k] & gb) + ((~vrd[k]) & ((vgd[k] & br) + ((~vgd[k]) & rg)));
183 }
184
185 // h div and fix
186 v_int32 hq[4];
187 v_expand(hd[0], hq[0], hq[1]);
188 v_expand(hd[1], hq[2], hq[3]);
189 for(int k = 0; k < 4; k++)
190 {
191 hq[k] = (hq[k]*hdiv[k] + vdescale) >> hsv_shift;
192 }
193 hd[0] = v_pack(hq[0], hq[1]);
194 hd[1] = v_pack(hq[2], hq[3]);
195 v_int16 vhr = vx_setall_s16((short)hr);
196 v_int16 vzd = vx_setzero_s16();
197 hd[0] += v_select(hd[0] < vzd, vhr, vzd);
198 hd[1] += v_select(hd[1] < vzd, vhr, vzd);
199 h = v_pack_u(hd[0], hd[1]);
200
201 v_store_interleave(dst, h, s, v);
202 }
203 #endif
204
205 for( ; i < n; i++, src += scn, dst += 3 )
206 {
207 int b = src[bidx], g = src[1], r = src[bidx^2];
208 int h, s, v = b;
209 int vmin = b;
210 int vr, vg;
211
212 CV_CALC_MAX_8U( v, g );
213 CV_CALC_MAX_8U( v, r );
214 CV_CALC_MIN_8U( vmin, g );
215 CV_CALC_MIN_8U( vmin, r );
216
217 uchar diff = saturate_cast<uchar>(v - vmin);
218 vr = v == r ? -1 : 0;
219 vg = v == g ? -1 : 0;
220
221 s = (diff * sdiv_table[v] + (1 << (hsv_shift-1))) >> hsv_shift;
222 h = (vr & (g - b)) +
223 (~vr & ((vg & (b - r + 2 * diff)) + ((~vg) & (r - g + 4 * diff))));
224 h = (h * hdiv_table[diff] + (1 << (hsv_shift-1))) >> hsv_shift;
225 h += h < 0 ? hr : 0;
226
227 dst[0] = saturate_cast<uchar>(h);
228 dst[1] = (uchar)s;
229 dst[2] = (uchar)v;
230 }
231 }
232
233 int srccn, blueIdx, hrange;
234 };
235
236
237 struct RGB2HSV_f
238 {
239 typedef float channel_type;
240
RGB2HSV_fcv::hal::__anon673258420111::RGB2HSV_f241 RGB2HSV_f(int _srccn, int _blueIdx, float _hrange)
242 : srccn(_srccn), blueIdx(_blueIdx), hrange(_hrange)
243 { }
244
245 #if CV_SIMD
processcv::hal::__anon673258420111::RGB2HSV_f246 inline void process(const v_float32& v_r, const v_float32& v_g, const v_float32& v_b,
247 v_float32& v_h, v_float32& v_s, v_float32& v_v,
248 float hscale) const
249 {
250 v_float32 v_min_rgb = v_min(v_min(v_r, v_g), v_b);
251 v_float32 v_max_rgb = v_max(v_max(v_r, v_g), v_b);
252
253 v_float32 v_eps = vx_setall_f32(FLT_EPSILON);
254 v_float32 v_diff = v_max_rgb - v_min_rgb;
255 v_s = v_diff / (v_abs(v_max_rgb) + v_eps);
256
257 v_float32 v_r_eq_max = v_r == v_max_rgb;
258 v_float32 v_g_eq_max = v_g == v_max_rgb;
259 v_h = v_select(v_r_eq_max, v_g - v_b,
260 v_select(v_g_eq_max, v_b - v_r, v_r - v_g));
261 v_float32 v_res = v_select(v_r_eq_max, (v_g < v_b) & vx_setall_f32(360.0f),
262 v_select(v_g_eq_max, vx_setall_f32(120.0f), vx_setall_f32(240.0f)));
263 v_float32 v_rev_diff = vx_setall_f32(60.0f) / (v_diff + v_eps);
264 v_h = v_muladd(v_h, v_rev_diff, v_res) * vx_setall_f32(hscale);
265
266 v_v = v_max_rgb;
267 }
268 #endif
269
operator ()cv::hal::__anon673258420111::RGB2HSV_f270 void operator()(const float* src, float* dst, int n) const
271 {
272 CV_INSTRUMENT_REGION();
273
274 int i = 0, bidx = blueIdx, scn = srccn;
275 float hscale = hrange*(1.f/360.f);
276 n *= 3;
277
278 #if CV_SIMD
279 const int vsize = v_float32::nlanes;
280 for ( ; i <= n - 3*vsize; i += 3*vsize, src += scn * vsize)
281 {
282 v_float32 r, g, b, a;
283 if(scn == 4)
284 {
285 v_load_deinterleave(src, r, g, b, a);
286 }
287 else // scn == 3
288 {
289 v_load_deinterleave(src, r, g, b);
290 }
291
292 if(bidx)
293 swap(b, r);
294
295 v_float32 h, s, v;
296 process(b, g, r, h, s, v, hscale);
297
298 v_store_interleave(dst + i, h, s, v);
299 }
300 #endif
301
302 for( ; i < n; i += 3, src += scn )
303 {
304 float b = src[bidx], g = src[1], r = src[bidx^2];
305 float h, s, v;
306
307 float vmin, diff;
308
309 v = vmin = r;
310 if( v < g ) v = g;
311 if( v < b ) v = b;
312 if( vmin > g ) vmin = g;
313 if( vmin > b ) vmin = b;
314
315 diff = v - vmin;
316 s = diff/(float)(fabs(v) + FLT_EPSILON);
317 diff = (float)(60./(diff + FLT_EPSILON));
318 if( v == r )
319 h = (g - b)*diff;
320 else if( v == g )
321 h = (b - r)*diff + 120.f;
322 else
323 h = (r - g)*diff + 240.f;
324
325 if( h < 0 ) h += 360.f;
326
327 dst[i] = h*hscale;
328 dst[i+1] = s;
329 dst[i+2] = v;
330 }
331 }
332
333 int srccn, blueIdx;
334 float hrange;
335 };
336
337
338 #if CV_SIMD
HSV2RGB_simd(const v_float32 & h,const v_float32 & s,const v_float32 & v,v_float32 & b,v_float32 & g,v_float32 & r,float hscale)339 inline void HSV2RGB_simd(const v_float32& h, const v_float32& s, const v_float32& v,
340 v_float32& b, v_float32& g, v_float32& r, float hscale)
341 {
342 v_float32 v_h = h;
343 v_float32 v_s = s;
344 v_float32 v_v = v;
345
346 v_h = v_h * vx_setall_f32(hscale);
347
348 v_float32 v_pre_sector = v_cvt_f32(v_trunc(v_h));
349 v_h = v_h - v_pre_sector;
350 v_float32 v_tab0 = v_v;
351 v_float32 v_one = vx_setall_f32(1.0f);
352 v_float32 v_tab1 = v_v * (v_one - v_s);
353 v_float32 v_tab2 = v_v * (v_one - (v_s * v_h));
354 v_float32 v_tab3 = v_v * (v_one - (v_s * (v_one - v_h)));
355
356 v_float32 v_one_sixth = vx_setall_f32(1.0f / 6.0f);
357 v_float32 v_sector = v_pre_sector * v_one_sixth;
358 v_sector = v_cvt_f32(v_trunc(v_sector));
359 v_float32 v_six = vx_setall_f32(6.0f);
360 v_sector = v_pre_sector - (v_sector * v_six);
361
362 v_float32 v_two = vx_setall_f32(2.0f);
363 v_h = v_tab1 & (v_sector < v_two);
364 v_h = v_h | (v_tab3 & (v_sector == v_two));
365 v_float32 v_three = vx_setall_f32(3.0f);
366 v_h = v_h | (v_tab0 & (v_sector == v_three));
367 v_float32 v_four = vx_setall_f32(4.0f);
368 v_h = v_h | (v_tab0 & (v_sector == v_four));
369 v_h = v_h | (v_tab2 & (v_sector > v_four));
370
371 v_s = v_tab3 & (v_sector < v_one);
372 v_s = v_s | (v_tab0 & (v_sector == v_one));
373 v_s = v_s | (v_tab0 & (v_sector == v_two));
374 v_s = v_s | (v_tab2 & (v_sector == v_three));
375 v_s = v_s | (v_tab1 & (v_sector > v_three));
376
377 v_v = v_tab0 & (v_sector < v_one);
378 v_v = v_v | (v_tab2 & (v_sector == v_one));
379 v_v = v_v | (v_tab1 & (v_sector == v_two));
380 v_v = v_v | (v_tab1 & (v_sector == v_three));
381 v_v = v_v | (v_tab3 & (v_sector == v_four));
382 v_v = v_v | (v_tab0 & (v_sector > v_four));
383
384 b = v_h;
385 g = v_s;
386 r = v_v;
387 }
388 #endif
389
390
HSV2RGB_native(float h,float s,float v,float & b,float & g,float & r,const float hscale)391 inline void HSV2RGB_native(float h, float s, float v,
392 float& b, float& g, float& r,
393 const float hscale)
394 {
395 if( s == 0 )
396 b = g = r = v;
397 else
398 {
399 static const int sector_data[][3]=
400 {{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
401 float tab[4];
402 int sector;
403 h *= hscale;
404 h = fmod(h, 6.f);
405 sector = cvFloor(h);
406 h -= sector;
407 if( (unsigned)sector >= 6u )
408 {
409 sector = 0;
410 h = 0.f;
411 }
412
413 tab[0] = v;
414 tab[1] = v*(1.f - s);
415 tab[2] = v*(1.f - s*h);
416 tab[3] = v*(1.f - s*(1.f - h));
417
418 b = tab[sector_data[sector][0]];
419 g = tab[sector_data[sector][1]];
420 r = tab[sector_data[sector][2]];
421 }
422 }
423
424
425 struct HSV2RGB_f
426 {
427 typedef float channel_type;
428
HSV2RGB_fcv::hal::__anon673258420111::HSV2RGB_f429 HSV2RGB_f(int _dstcn, int _blueIdx, float _hrange)
430 : dstcn(_dstcn), blueIdx(_blueIdx), hscale(6.f/_hrange)
431 { }
432
operator ()cv::hal::__anon673258420111::HSV2RGB_f433 void operator()(const float* src, float* dst, int n) const
434 {
435 CV_INSTRUMENT_REGION();
436
437 int i = 0, bidx = blueIdx, dcn = dstcn;
438 float alpha = ColorChannel<float>::max();
439 float hs = hscale;
440 n *= 3;
441
442 #if CV_SIMD
443 const int vsize = v_float32::nlanes;
444 v_float32 valpha = vx_setall_f32(alpha);
445 for (; i <= n - vsize*3; i += vsize*3, dst += dcn * vsize)
446 {
447 v_float32 h, s, v, b, g, r;
448 v_load_deinterleave(src + i, h, s, v);
449
450 HSV2RGB_simd(h, s, v, b, g, r, hs);
451
452 if(bidx)
453 swap(b, r);
454
455 if(dcn == 4)
456 {
457 v_store_interleave(dst, b, g, r, valpha);
458 }
459 else // dcn == 3
460 {
461 v_store_interleave(dst, b, g, r);
462 }
463 }
464 #endif
465 for( ; i < n; i += 3, dst += dcn )
466 {
467 float h = src[i + 0], s = src[i + 1], v = src[i + 2];
468 float b, g, r;
469 HSV2RGB_native(h, s, v, b, g, r, hs);
470
471 dst[bidx] = b;
472 dst[1] = g;
473 dst[bidx^2] = r;
474 if(dcn == 4)
475 dst[3] = alpha;
476 }
477 }
478
479 int dstcn, blueIdx;
480 float hscale;
481 };
482
483
484 struct HSV2RGB_b
485 {
486 typedef uchar channel_type;
487
HSV2RGB_bcv::hal::__anon673258420111::HSV2RGB_b488 HSV2RGB_b(int _dstcn, int _blueIdx, int _hrange)
489 : dstcn(_dstcn), blueIdx(_blueIdx), hscale(6.0f / _hrange)
490 { }
491
operator ()cv::hal::__anon673258420111::HSV2RGB_b492 void operator()(const uchar* src, uchar* dst, int n) const
493 {
494 CV_INSTRUMENT_REGION();
495
496 int j = 0, dcn = dstcn;
497 uchar alpha = ColorChannel<uchar>::max();
498
499 #if CV_SIMD
500 const int vsize = v_float32::nlanes;
501
502 for (j = 0; j <= (n - vsize*4) * 3; j += 3 * 4 * vsize, dst += dcn * 4 * vsize)
503 {
504 v_uint8 h_b, s_b, v_b;
505 v_uint16 h_w[2], s_w[2], v_w[2];
506 v_uint32 h_u[4], s_u[4], v_u[4];
507 v_load_deinterleave(src + j, h_b, s_b, v_b);
508 v_expand(h_b, h_w[0], h_w[1]);
509 v_expand(s_b, s_w[0], s_w[1]);
510 v_expand(v_b, v_w[0], v_w[1]);
511 v_expand(h_w[0], h_u[0], h_u[1]);
512 v_expand(h_w[1], h_u[2], h_u[3]);
513 v_expand(s_w[0], s_u[0], s_u[1]);
514 v_expand(s_w[1], s_u[2], s_u[3]);
515 v_expand(v_w[0], v_u[0], v_u[1]);
516 v_expand(v_w[1], v_u[2], v_u[3]);
517
518 v_int32 b_i[4], g_i[4], r_i[4];
519 v_float32 v_coeff0 = vx_setall_f32(1.0f / 255.0f);
520 v_float32 v_coeff1 = vx_setall_f32(255.0f);
521
522 for( int k = 0; k < 4; k++ )
523 {
524 v_float32 h = v_cvt_f32(v_reinterpret_as_s32(h_u[k]));
525 v_float32 s = v_cvt_f32(v_reinterpret_as_s32(s_u[k]));
526 v_float32 v = v_cvt_f32(v_reinterpret_as_s32(v_u[k]));
527
528 s *= v_coeff0;
529 v *= v_coeff0;
530 v_float32 b, g, r;
531 HSV2RGB_simd(h, s, v, b, g, r, hscale);
532
533 b *= v_coeff1;
534 g *= v_coeff1;
535 r *= v_coeff1;
536 b_i[k] = v_trunc(b);
537 g_i[k] = v_trunc(g);
538 r_i[k] = v_trunc(r);
539 }
540
541 v_uint16 r_w[2], g_w[2], b_w[2];
542 v_uint8 r_b, g_b, b_b;
543
544 r_w[0] = v_pack_u(r_i[0], r_i[1]);
545 r_w[1] = v_pack_u(r_i[2], r_i[3]);
546 r_b = v_pack(r_w[0], r_w[1]);
547 g_w[0] = v_pack_u(g_i[0], g_i[1]);
548 g_w[1] = v_pack_u(g_i[2], g_i[3]);
549 g_b = v_pack(g_w[0], g_w[1]);
550 b_w[0] = v_pack_u(b_i[0], b_i[1]);
551 b_w[1] = v_pack_u(b_i[2], b_i[3]);
552 b_b = v_pack(b_w[0], b_w[1]);
553
554 if( dcn == 3 )
555 {
556 if( blueIdx == 0 )
557 v_store_interleave(dst, b_b, g_b, r_b);
558 else
559 v_store_interleave(dst, r_b, g_b, b_b);
560 }
561 else
562 {
563 v_uint8 alpha_b = vx_setall_u8(alpha);
564 if( blueIdx == 0 )
565 v_store_interleave(dst, b_b, g_b, r_b, alpha_b);
566 else
567 v_store_interleave(dst, r_b, g_b, b_b, alpha_b);
568 }
569 }
570 #endif
571
572 for( ; j < n * 3; j += 3, dst += dcn )
573 {
574 float h, s, v, b, g, r;
575 h = src[j];
576 s = src[j+1] * (1.0f / 255.0f);
577 v = src[j+2] * (1.0f / 255.0f);
578 HSV2RGB_native(h, s, v, b, g, r, hscale);
579
580 dst[blueIdx] = saturate_cast<uchar>(b * 255.0f);
581 dst[1] = saturate_cast<uchar>(g * 255.0f);
582 dst[blueIdx^2] = saturate_cast<uchar>(r * 255.0f);
583
584 if( dcn == 4 )
585 dst[3] = alpha;
586 }
587 }
588
589 int dstcn;
590 int blueIdx;
591 float hscale;
592 };
593
594
595 ///////////////////////////////////// RGB <-> HLS ////////////////////////////////////////
596
597 struct RGB2HLS_f
598 {
599 typedef float channel_type;
600
RGB2HLS_fcv::hal::__anon673258420111::RGB2HLS_f601 RGB2HLS_f(int _srccn, int _blueIdx, float _hrange)
602 : srccn(_srccn), blueIdx(_blueIdx), hscale(_hrange/360.f)
603 {
604 }
605
606 #if CV_SIMD
processcv::hal::__anon673258420111::RGB2HLS_f607 inline void process(const v_float32& r, const v_float32& g, const v_float32& b,
608 const v_float32& vhscale,
609 v_float32& h, v_float32& l, v_float32& s) const
610 {
611 v_float32 maxRgb = v_max(v_max(r, g), b);
612 v_float32 minRgb = v_min(v_min(r, g), b);
613
614 v_float32 diff = maxRgb - minRgb;
615 v_float32 msum = maxRgb + minRgb;
616 v_float32 vhalf = vx_setall_f32(0.5f);
617 l = msum * vhalf;
618
619 s = diff / v_select(l < vhalf, msum, vx_setall_f32(2.0f) - msum);
620
621 v_float32 rMaxMask = maxRgb == r;
622 v_float32 gMaxMask = maxRgb == g;
623
624 h = v_select(rMaxMask, g - b, v_select(gMaxMask, b - r, r - g));
625 v_float32 hpart = v_select(rMaxMask, (g < b) & vx_setall_f32(360.0f),
626 v_select(gMaxMask, vx_setall_f32(120.0f), vx_setall_f32(240.0f)));
627
628 v_float32 invDiff = vx_setall_f32(60.0f) / diff;
629 h = v_muladd(h, invDiff, hpart) * vhscale;
630
631 v_float32 diffEpsMask = diff > vx_setall_f32(FLT_EPSILON);
632
633 h = diffEpsMask & h;
634 // l = l;
635 s = diffEpsMask & s;
636 }
637 #endif
638
operator ()cv::hal::__anon673258420111::RGB2HLS_f639 void operator()(const float* src, float* dst, int n) const
640 {
641 CV_INSTRUMENT_REGION();
642
643 int i = 0, bidx = blueIdx, scn = srccn;
644
645 #if CV_SIMD
646 const int vsize = v_float32::nlanes;
647 v_float32 vhscale = vx_setall_f32(hscale);
648
649 for ( ; i <= n - vsize;
650 i += vsize, src += scn * vsize, dst += 3 * vsize)
651 {
652 v_float32 r, g, b, h, l, s;
653
654 if(scn == 4)
655 {
656 v_float32 a;
657 v_load_deinterleave(src, b, g, r, a);
658 }
659 else // scn == 3
660 {
661 v_load_deinterleave(src, b, g, r);
662 }
663
664 if(bidx)
665 swap(r, b);
666
667 process(r, g, b, vhscale, h, l, s);
668
669 v_store_interleave(dst, h, l, s);
670 }
671 #endif
672
673 for( ; i < n; i++, src += scn, dst += 3 )
674 {
675 float b = src[bidx], g = src[1], r = src[bidx^2];
676 float h = 0.f, s = 0.f, l;
677 float vmin, vmax, diff;
678
679 vmax = vmin = r;
680 if( vmax < g ) vmax = g;
681 if( vmax < b ) vmax = b;
682 if( vmin > g ) vmin = g;
683 if( vmin > b ) vmin = b;
684
685 diff = vmax - vmin;
686 l = (vmax + vmin)*0.5f;
687
688 if( diff > FLT_EPSILON )
689 {
690 s = l < 0.5f ? diff/(vmax + vmin) : diff/(2 - vmax - vmin);
691 diff = 60.f/diff;
692
693 if( vmax == r )
694 h = (g - b)*diff;
695 else if( vmax == g )
696 h = (b - r)*diff + 120.f;
697 else
698 h = (r - g)*diff + 240.f;
699
700 if( h < 0.f ) h += 360.f;
701 }
702
703 dst[0] = h*hscale;
704 dst[1] = l;
705 dst[2] = s;
706 }
707 }
708
709 int srccn, blueIdx;
710 float hscale;
711 };
712
713
714 struct RGB2HLS_b
715 {
716 typedef uchar channel_type;
717 static const int bufChannels = 3;
718
RGB2HLS_bcv::hal::__anon673258420111::RGB2HLS_b719 RGB2HLS_b(int _srccn, int _blueIdx, int _hrange)
720 : srccn(_srccn), cvt(bufChannels, _blueIdx, (float)_hrange)
721 { }
722
operator ()cv::hal::__anon673258420111::RGB2HLS_b723 void operator()(const uchar* src, uchar* dst, int n) const
724 {
725 CV_INSTRUMENT_REGION();
726
727 int scn = srccn;
728
729 #if CV_SIMD
730 float CV_DECL_ALIGNED(CV_SIMD_WIDTH) buf[bufChannels*BLOCK_SIZE];
731 #else
732 float CV_DECL_ALIGNED(16) buf[bufChannels*BLOCK_SIZE];
733 #endif
734
735 #if CV_SIMD
736 static const int fsize = v_float32::nlanes;
737 //TODO: fix that when v_interleave is available
738 float CV_DECL_ALIGNED(CV_SIMD_WIDTH) interTmpM[fsize*3];
739 v_store_interleave(interTmpM, vx_setall_f32(1.f), vx_setall_f32(255.f), vx_setall_f32(255.f));
740 v_float32 mhls[3];
741 for(int k = 0; k < 3; k++)
742 {
743 mhls[k] = vx_load_aligned(interTmpM + k*fsize);
744 }
745 #endif
746
747 for(int i = 0; i < n; i += BLOCK_SIZE, dst += BLOCK_SIZE*3 )
748 {
749 int dn = std::min(n - i, (int)BLOCK_SIZE);
750
751 #if CV_SIMD
752 v_float32 v255inv = vx_setall_f32(1.f/255.f);
753 if (scn == 3)
754 {
755 int j = 0;
756 static const int nBlock = fsize*2;
757 for ( ; j <= (dn * bufChannels - nBlock);
758 j += nBlock, src += nBlock)
759 {
760 v_uint16 drgb = vx_load_expand(src);
761 v_int32 qrgb0, qrgb1;
762 v_expand(v_reinterpret_as_s16(drgb), qrgb0, qrgb1);
763 v_store_aligned(buf + j + 0*fsize, v_cvt_f32(qrgb0)*v255inv);
764 v_store_aligned(buf + j + 1*fsize, v_cvt_f32(qrgb1)*v255inv);
765 }
766 for( ; j < dn*3; j++, src++ )
767 {
768 buf[j] = src[0]*(1.f/255.f);
769 }
770 }
771 else // if (scn == 4)
772 {
773 int j = 0;
774 static const int nBlock = fsize*4;
775 for ( ; j <= dn*bufChannels - nBlock*bufChannels;
776 j += nBlock*bufChannels, src += nBlock*4)
777 {
778 v_uint8 rgb[3], dummy;
779 v_load_deinterleave(src, rgb[0], rgb[1], rgb[2], dummy);
780
781 v_uint16 d[3*2];
782 for(int k = 0; k < 3; k++)
783 {
784 v_expand(rgb[k], d[k*2+0], d[k*2+1]);
785 }
786 v_int32 q[3*4];
787 for(int k = 0; k < 3*2; k++)
788 {
789 v_expand(v_reinterpret_as_s16(d[k]), q[k*2+0], q[k*2+1]);
790 }
791
792 v_float32 f[3*4];
793 for(int k = 0; k < 3*4; k++)
794 {
795 f[k] = v_cvt_f32(q[k])*v255inv;
796 }
797
798 for(int k = 0; k < 4; k++)
799 {
800 v_store_interleave(buf + j + k*bufChannels*fsize, f[0*4+k], f[1*4+k], f[2*4+k]);
801 }
802 }
803 for( ; j < dn*3; j += 3, src += 4 )
804 {
805 buf[j+0] = src[0]*(1.f/255.f);
806 buf[j+1] = src[1]*(1.f/255.f);
807 buf[j+2] = src[2]*(1.f/255.f);
808 }
809 }
810 #else
811 for(int j = 0; j < dn*3; j += 3, src += scn )
812 {
813 buf[j+0] = src[0]*(1.f/255.f);
814 buf[j+1] = src[1]*(1.f/255.f);
815 buf[j+2] = src[2]*(1.f/255.f);
816 }
817 #endif
818 cvt(buf, buf, dn);
819
820 int j = 0;
821 #if CV_SIMD
822 for( ; j <= dn*3 - fsize*3*4; j += fsize*3*4)
823 {
824 v_float32 f[3*4];
825 for(int k = 0; k < 3*4; k++)
826 {
827 f[k] = vx_load_aligned(buf + j + k*fsize);
828 }
829
830 for(int k = 0; k < 4; k++)
831 {
832 for(int l = 0; l < 3; l++)
833 {
834 f[k*3+l] = f[k*3+l] * mhls[l];
835 }
836 }
837
838 v_int32 q[3*4];
839 for(int k = 0; k < 3*4; k++)
840 {
841 q[k] = v_round(f[k]);
842 }
843
844 for(int k = 0; k < 3; k++)
845 {
846 v_store(dst + j + k*fsize*4, v_pack_u(v_pack(q[k*4+0], q[k*4+1]),
847 v_pack(q[k*4+2], q[k*4+3])));
848 }
849 }
850 #endif
851 for( ; j < dn*3; j += 3 )
852 {
853 dst[j] = saturate_cast<uchar>(buf[j]);
854 dst[j+1] = saturate_cast<uchar>(buf[j+1]*255.f);
855 dst[j+2] = saturate_cast<uchar>(buf[j+2]*255.f);
856 }
857 }
858 }
859
860 int srccn;
861 RGB2HLS_f cvt;
862 };
863
864
865 struct HLS2RGB_f
866 {
867 typedef float channel_type;
868
HLS2RGB_fcv::hal::__anon673258420111::HLS2RGB_f869 HLS2RGB_f(int _dstcn, int _blueIdx, float _hrange)
870 : dstcn(_dstcn), blueIdx(_blueIdx), hscale(6.f/_hrange)
871 { }
872
873 #if CV_SIMD
processcv::hal::__anon673258420111::HLS2RGB_f874 inline void process(const v_float32& h, const v_float32& l, const v_float32& s,
875 v_float32& b, v_float32& g, v_float32& r) const
876 {
877 v_float32 v1 = vx_setall_f32(1.0f), v2 = vx_setall_f32(2.0f), v4 = vx_setall_f32(4.0f);
878
879 v_float32 lBelowHalfMask = l <= vx_setall_f32(0.5f);
880 v_float32 ls = l * s;
881 v_float32 elem0 = v_select(lBelowHalfMask, ls, s - ls);
882
883 v_float32 hsRaw = h * vx_setall_f32(hscale);
884 v_float32 preHs = v_cvt_f32(v_trunc(hsRaw));
885 v_float32 hs = hsRaw - preHs;
886 v_float32 sector = preHs - vx_setall_f32(6.0f) * v_cvt_f32(v_trunc(hsRaw * vx_setall_f32(1.0f / 6.0f)));
887 v_float32 elem1 = hs + hs;
888
889 v_float32 tab0 = l + elem0;
890 v_float32 tab1 = l - elem0;
891 v_float32 tab2 = l + elem0 - elem0 * elem1;
892 v_float32 tab3 = l - elem0 + elem0 * elem1;
893
894 b = v_select(sector < v2, tab1,
895 v_select(sector <= v2, tab3,
896 v_select(sector <= v4, tab0, tab2)));
897
898 g = v_select(sector < v1, tab3,
899 v_select(sector <= v2, tab0,
900 v_select(sector < v4, tab2, tab1)));
901
902 r = v_select(sector < v1, tab0,
903 v_select(sector < v2, tab2,
904 v_select(sector < v4, tab1,
905 v_select(sector <= v4, tab3, tab0))));
906 }
907 #endif
908
operator ()cv::hal::__anon673258420111::HLS2RGB_f909 void operator()(const float* src, float* dst, int n) const
910 {
911 CV_INSTRUMENT_REGION();
912
913 int i = 0, bidx = blueIdx, dcn = dstcn;
914 float alpha = ColorChannel<float>::max();
915
916 #if CV_SIMD
917 static const int vsize = v_float32::nlanes;
918 for (; i <= n - vsize; i += vsize, src += 3*vsize, dst += dcn*vsize)
919 {
920 v_float32 h, l, s, r, g, b;
921 v_load_deinterleave(src, h, l, s);
922
923 process(h, l, s, b, g, r);
924
925 if(bidx)
926 swap(b, r);
927
928 if(dcn == 3)
929 {
930 v_store_interleave(dst, b, g, r);
931 }
932 else
933 {
934 v_float32 a = vx_setall_f32(alpha);
935 v_store_interleave(dst, b, g, r, a);
936 }
937 }
938 #endif
939
940 for( ; i < n; i++, src += 3, dst += dcn )
941 {
942 float h = src[0], l = src[1], s = src[2];
943 float b, g, r;
944
945 if( s == 0 )
946 b = g = r = l;
947 else
948 {
949 static const int sector_data[][3]=
950 {{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
951 float tab[4];
952 int sector;
953
954 float p2 = l <= 0.5f ? l*(1 + s) : l + s - l*s;
955 float p1 = 2*l - p2;
956
957 h *= hscale;
958 if( h < 0 )
959 do h += 6; while( h < 0 );
960 else if( h >= 6 )
961 do h -= 6; while( h >= 6 );
962
963 assert( 0 <= h && h < 6 );
964 sector = cvFloor(h);
965 h -= sector;
966
967 tab[0] = p2;
968 tab[1] = p1;
969 tab[2] = p1 + (p2 - p1)*(1-h);
970 tab[3] = p1 + (p2 - p1)*h;
971
972 b = tab[sector_data[sector][0]];
973 g = tab[sector_data[sector][1]];
974 r = tab[sector_data[sector][2]];
975 }
976
977 dst[bidx] = b;
978 dst[1] = g;
979 dst[bidx^2] = r;
980 if( dcn == 4 )
981 dst[3] = alpha;
982 }
983 }
984
985 int dstcn, blueIdx;
986 float hscale;
987 };
988
989
990 struct HLS2RGB_b
991 {
992 typedef uchar channel_type;
993 static const int bufChannels = 3;
994
HLS2RGB_bcv::hal::__anon673258420111::HLS2RGB_b995 HLS2RGB_b(int _dstcn, int _blueIdx, int _hrange)
996 : dstcn(_dstcn), cvt(bufChannels, _blueIdx, (float)_hrange)
997 { }
998
operator ()cv::hal::__anon673258420111::HLS2RGB_b999 void operator()(const uchar* src, uchar* dst, int n) const
1000 {
1001 CV_INSTRUMENT_REGION();
1002
1003 int i, j, dcn = dstcn;
1004 uchar alpha = ColorChannel<uchar>::max();
1005
1006 #if CV_SIMD
1007 float CV_DECL_ALIGNED(CV_SIMD_WIDTH) buf[bufChannels*BLOCK_SIZE];
1008 #else
1009 float CV_DECL_ALIGNED(16) buf[bufChannels*BLOCK_SIZE];
1010 #endif
1011
1012 #if CV_SIMD
1013 static const int fsize = v_float32::nlanes;
1014 //TODO: fix that when v_interleave is available
1015 float CV_DECL_ALIGNED(CV_SIMD_WIDTH) interTmpM[fsize*3];
1016 v_float32 v255inv = vx_setall_f32(1.f/255.f);
1017 v_store_interleave(interTmpM, vx_setall_f32(1.f), v255inv, v255inv);
1018 v_float32 mhls[3];
1019 for(int k = 0; k < 3; k++)
1020 {
1021 mhls[k] = vx_load_aligned(interTmpM + k*fsize);
1022 }
1023 #endif
1024
1025 for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 )
1026 {
1027 int dn = std::min(n - i, (int)BLOCK_SIZE);
1028 j = 0;
1029
1030 #if CV_SIMD
1031 for( ; j <= dn*3 - 3*4*fsize; j += 3*4*fsize)
1032 {
1033 // 3x uchar -> 3*4 float
1034 v_uint8 u[3];
1035 for(int k = 0; k < 3; k++)
1036 {
1037 u[k] = vx_load(src + j + k*4*fsize);
1038 }
1039 v_uint16 d[3*2];
1040 for(int k = 0; k < 3; k++)
1041 {
1042 v_expand(u[k], d[k*2+0], d[k*2+1]);
1043 }
1044 v_int32 q[3*4];
1045 for(int k = 0; k < 3*2; k++)
1046 {
1047 v_expand(v_reinterpret_as_s16(d[k]), q[k*2+0], q[k*2+1]);
1048 }
1049
1050 v_float32 f[3*4];
1051 for(int k = 0; k < 4; k++)
1052 {
1053 for(int l = 0; l < 3; l++)
1054 {
1055 f[k*3+l] = v_cvt_f32(q[k*3+l])*mhls[l];
1056 }
1057 }
1058
1059 for (int k = 0; k < 4*3; k++)
1060 {
1061 v_store_aligned(buf + j + k*fsize, f[k]);
1062 }
1063 }
1064 #endif
1065 for( ; j < dn*3; j += 3 )
1066 {
1067 buf[j] = src[j];
1068 buf[j+1] = src[j+1]*(1.f/255.f);
1069 buf[j+2] = src[j+2]*(1.f/255.f);
1070 }
1071 cvt(buf, buf, dn);
1072
1073 #if CV_SIMD
1074 v_float32 v255 = vx_setall_f32(255.f);
1075 if(dcn == 3)
1076 {
1077 int x = 0;
1078 float* pbuf = buf;
1079 for( ; x <= dn - 4*fsize; x += 4*fsize, dst += 4*fsize, pbuf += 4*fsize)
1080 {
1081 v_float32 vf[4];
1082 vf[0] = vx_load_aligned(pbuf + 0*fsize);
1083 vf[1] = vx_load_aligned(pbuf + 1*fsize);
1084 vf[2] = vx_load_aligned(pbuf + 2*fsize);
1085 vf[3] = vx_load_aligned(pbuf + 3*fsize);
1086 v_int32 vi[4];
1087 vi[0] = v_round(vf[0]*v255);
1088 vi[1] = v_round(vf[1]*v255);
1089 vi[2] = v_round(vf[2]*v255);
1090 vi[3] = v_round(vf[3]*v255);
1091 v_store(dst, v_pack_u(v_pack(vi[0], vi[1]),
1092 v_pack(vi[2], vi[3])));
1093 }
1094 for( ; x < dn*3; x++, dst++, pbuf++)
1095 {
1096 dst[0] = saturate_cast<uchar>(pbuf[0]*255.f);
1097 }
1098 }
1099 else // dcn == 4
1100 {
1101 int x = 0;
1102 float* pbuf = buf;
1103 for ( ; x <= dn - 4*fsize; x += fsize, dst += 4*fsize, pbuf += bufChannels*fsize)
1104 {
1105 v_float32 r[4], g[4], b[4];
1106 v_int32 ir[4], ig[4], ib[4];
1107 for(int k = 0; k < 4; k++)
1108 {
1109 v_load_deinterleave(pbuf, r[k], g[k], b[k]);
1110 ir[k] = v_round(r[k]*v255);
1111 ig[k] = v_round(g[k]*v255);
1112 ib[k] = v_round(b[k]*v255);
1113 }
1114 v_uint8 ur, ug, ub;
1115 ur = v_pack_u(v_pack(ir[0], ir[1]), v_pack(ir[2], ir[3]));
1116 ug = v_pack_u(v_pack(ig[0], ig[1]), v_pack(ig[2], ig[3]));
1117 ub = v_pack_u(v_pack(ib[0], ib[1]), v_pack(ib[2], ib[3]));
1118
1119 v_uint8 valpha = vx_setall_u8(alpha);
1120 v_store_interleave(dst, ur, ug, ub, valpha);
1121 }
1122
1123 for( ; x < dn; x++, dst += dcn, pbuf += bufChannels)
1124 {
1125 dst[0] = saturate_cast<uchar>(pbuf[0]*255.f);
1126 dst[1] = saturate_cast<uchar>(pbuf[1]*255.f);
1127 dst[2] = saturate_cast<uchar>(pbuf[2]*255.f);
1128 dst[3] = alpha;
1129 }
1130 }
1131 #else
1132 for(int x = 0; x < dn*3; x += 3, dst += dcn )
1133 {
1134 dst[0] = saturate_cast<uchar>(buf[x+0]*255.f);
1135 dst[1] = saturate_cast<uchar>(buf[x+1]*255.f);
1136 dst[2] = saturate_cast<uchar>(buf[x+2]*255.f);
1137 if( dcn == 4 )
1138 dst[3] = alpha;
1139 }
1140 #endif
1141 }
1142 }
1143
1144 int dstcn;
1145 HLS2RGB_f cvt;
1146 };
1147
1148 } // namespace anon
1149
1150 // 8u, 32f
cvtBGRtoHSV(const uchar * src_data,size_t src_step,uchar * dst_data,size_t dst_step,int width,int height,int depth,int scn,bool swapBlue,bool isFullRange,bool isHSV)1151 void cvtBGRtoHSV(const uchar * src_data, size_t src_step,
1152 uchar * dst_data, size_t dst_step,
1153 int width, int height,
1154 int depth, int scn, bool swapBlue, bool isFullRange, bool isHSV)
1155 {
1156 CV_INSTRUMENT_REGION();
1157
1158 int hrange = depth == CV_32F ? 360 : isFullRange ? 256 : 180;
1159 int blueIdx = swapBlue ? 2 : 0;
1160 if(isHSV)
1161 {
1162 if(depth == CV_8U)
1163 CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, RGB2HSV_b(scn, blueIdx, hrange));
1164 else
1165 CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, RGB2HSV_f(scn, blueIdx, static_cast<float>(hrange)));
1166 }
1167 else
1168 {
1169 if( depth == CV_8U )
1170 CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, RGB2HLS_b(scn, blueIdx, hrange));
1171 else
1172 CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, RGB2HLS_f(scn, blueIdx, static_cast<float>(hrange)));
1173 }
1174 }
1175
1176 // 8u, 32f
cvtHSVtoBGR(const uchar * src_data,size_t src_step,uchar * dst_data,size_t dst_step,int width,int height,int depth,int dcn,bool swapBlue,bool isFullRange,bool isHSV)1177 void cvtHSVtoBGR(const uchar * src_data, size_t src_step,
1178 uchar * dst_data, size_t dst_step,
1179 int width, int height,
1180 int depth, int dcn, bool swapBlue, bool isFullRange, bool isHSV)
1181 {
1182 CV_INSTRUMENT_REGION();
1183
1184 int hrange = depth == CV_32F ? 360 : isFullRange ? 255 : 180;
1185 int blueIdx = swapBlue ? 2 : 0;
1186 if(isHSV)
1187 {
1188 if( depth == CV_8U )
1189 CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, HSV2RGB_b(dcn, blueIdx, hrange));
1190 else
1191 CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, HSV2RGB_f(dcn, blueIdx, static_cast<float>(hrange)));
1192 }
1193 else
1194 {
1195 if( depth == CV_8U )
1196 CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, HLS2RGB_b(dcn, blueIdx, hrange));
1197 else
1198 CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, HLS2RGB_f(dcn, blueIdx, static_cast<float>(hrange)));
1199 }
1200 }
1201
1202 #endif
1203 CV_CPU_OPTIMIZATION_NAMESPACE_END
1204 }} // namespace
1205