1 /*
2 * Simd Library (http://ermig1979.github.io/Simd).
3 *
4 * Copyright (c) 2011-2019 Yermalayeu Ihar,
5 *               2014-2015 Antonenka Mikhail.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 */
25 #ifndef __SimdConversion_h__
26 #define __SimdConversion_h__
27 
28 #include "Simd/SimdConst.h"
29 #include "Simd/SimdMath.h"
30 #include "Simd/SimdLoad.h"
31 
32 namespace Simd
33 {
34     namespace Base
35     {
BgrToGray(int blue,int green,int red)36         SIMD_INLINE int BgrToGray(int blue, int green, int red)
37         {
38             return (BLUE_TO_GRAY_WEIGHT*blue + GREEN_TO_GRAY_WEIGHT * green +
39                 RED_TO_GRAY_WEIGHT * red + BGR_TO_GRAY_ROUND_TERM) >> BGR_TO_GRAY_AVERAGING_SHIFT;
40         }
41 
RgbToGray(int red,int green,int blue)42         SIMD_INLINE int RgbToGray(int red, int green, int blue)
43         {
44             return (BLUE_TO_GRAY_WEIGHT*blue + GREEN_TO_GRAY_WEIGHT * green +
45                 RED_TO_GRAY_WEIGHT * red + BGR_TO_GRAY_ROUND_TERM) >> BGR_TO_GRAY_AVERAGING_SHIFT;
46         }
47     }
48 
49 #ifdef SIMD_SSSE3_ENABLE
50     namespace Ssse3
51     {
52         template <int index> __m128i InterleaveBgr(__m128i blue, __m128i green, __m128i red);
53 
54         template<> SIMD_INLINE __m128i InterleaveBgr<0>(__m128i blue, __m128i green, __m128i red)
55         {
56             return
57                 _mm_or_si128(_mm_shuffle_epi8(blue, K8_SHUFFLE_BLUE_TO_BGR0),
58                     _mm_or_si128(_mm_shuffle_epi8(green, K8_SHUFFLE_GREEN_TO_BGR0),
59                         _mm_shuffle_epi8(red, K8_SHUFFLE_RED_TO_BGR0)));
60         }
61 
62         template<> SIMD_INLINE __m128i InterleaveBgr<1>(__m128i blue, __m128i green, __m128i red)
63         {
64             return
65                 _mm_or_si128(_mm_shuffle_epi8(blue, K8_SHUFFLE_BLUE_TO_BGR1),
66                     _mm_or_si128(_mm_shuffle_epi8(green, K8_SHUFFLE_GREEN_TO_BGR1),
67                         _mm_shuffle_epi8(red, K8_SHUFFLE_RED_TO_BGR1)));
68         }
69 
70         template<> SIMD_INLINE __m128i InterleaveBgr<2>(__m128i blue, __m128i green, __m128i red)
71         {
72             return
73                 _mm_or_si128(_mm_shuffle_epi8(blue, K8_SHUFFLE_BLUE_TO_BGR2),
74                     _mm_or_si128(_mm_shuffle_epi8(green, K8_SHUFFLE_GREEN_TO_BGR2),
75                         _mm_shuffle_epi8(red, K8_SHUFFLE_RED_TO_BGR2)));
76         }
77 
BgrToBlue(__m128i bgr[3])78         SIMD_INLINE __m128i BgrToBlue(__m128i bgr[3])
79         {
80             return
81                 _mm_or_si128(_mm_shuffle_epi8(bgr[0], K8_SHUFFLE_BGR0_TO_BLUE),
82                     _mm_or_si128(_mm_shuffle_epi8(bgr[1], K8_SHUFFLE_BGR1_TO_BLUE),
83                         _mm_shuffle_epi8(bgr[2], K8_SHUFFLE_BGR2_TO_BLUE)));
84         }
85 
BgrToGreen(__m128i bgr[3])86         SIMD_INLINE __m128i BgrToGreen(__m128i bgr[3])
87         {
88             return
89                 _mm_or_si128(_mm_shuffle_epi8(bgr[0], K8_SHUFFLE_BGR0_TO_GREEN),
90                     _mm_or_si128(_mm_shuffle_epi8(bgr[1], K8_SHUFFLE_BGR1_TO_GREEN),
91                         _mm_shuffle_epi8(bgr[2], K8_SHUFFLE_BGR2_TO_GREEN)));
92         }
93 
BgrToRed(__m128i bgr[3])94         SIMD_INLINE __m128i BgrToRed(__m128i bgr[3])
95         {
96             return
97                 _mm_or_si128(_mm_shuffle_epi8(bgr[0], K8_SHUFFLE_BGR0_TO_RED),
98                     _mm_or_si128(_mm_shuffle_epi8(bgr[1], K8_SHUFFLE_BGR1_TO_RED),
99                         _mm_shuffle_epi8(bgr[2], K8_SHUFFLE_BGR2_TO_RED)));
100         }
101     }
102 #endif//SIMD_SSSE3_ENABLE
103 
104 #ifdef SIMD_AVX2_ENABLE
105     namespace Avx2
106     {
107         template <int index> __m256i GrayToBgr(__m256i gray);
108 
109         template<> SIMD_INLINE __m256i GrayToBgr<0>(__m256i gray)
110         {
111             return _mm256_shuffle_epi8(_mm256_permute4x64_epi64(gray, 0x44), K8_SHUFFLE_GRAY_TO_BGR0);
112         }
113 
114         template<> SIMD_INLINE __m256i GrayToBgr<1>(__m256i gray)
115         {
116             return _mm256_shuffle_epi8(_mm256_permute4x64_epi64(gray, 0x99), K8_SHUFFLE_GRAY_TO_BGR1);
117         }
118 
119         template<> SIMD_INLINE __m256i GrayToBgr<2>(__m256i gray)
120         {
121             return _mm256_shuffle_epi8(_mm256_permute4x64_epi64(gray, 0xEE), K8_SHUFFLE_GRAY_TO_BGR2);
122         }
123 
124         template <int index> __m256i InterleaveBgr(__m256i blue, __m256i green, __m256i red);
125 
126         template<> SIMD_INLINE __m256i InterleaveBgr<0>(__m256i blue, __m256i green, __m256i red)
127         {
128             return
129                 _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(blue, 0x44), K8_SHUFFLE_PERMUTED_BLUE_TO_BGR0),
130                     _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(green, 0x44), K8_SHUFFLE_PERMUTED_GREEN_TO_BGR0),
131                         _mm256_shuffle_epi8(_mm256_permute4x64_epi64(red, 0x44), K8_SHUFFLE_PERMUTED_RED_TO_BGR0)));
132         }
133 
134         template<> SIMD_INLINE __m256i InterleaveBgr<1>(__m256i blue, __m256i green, __m256i red)
135         {
136             return
137                 _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(blue, 0x99), K8_SHUFFLE_PERMUTED_BLUE_TO_BGR1),
138                     _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(green, 0x99), K8_SHUFFLE_PERMUTED_GREEN_TO_BGR1),
139                         _mm256_shuffle_epi8(_mm256_permute4x64_epi64(red, 0x99), K8_SHUFFLE_PERMUTED_RED_TO_BGR1)));
140         }
141 
142         template<> SIMD_INLINE __m256i InterleaveBgr<2>(__m256i blue, __m256i green, __m256i red)
143         {
144             return
145                 _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(blue, 0xEE), K8_SHUFFLE_PERMUTED_BLUE_TO_BGR2),
146                     _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(green, 0xEE), K8_SHUFFLE_PERMUTED_GREEN_TO_BGR2),
147                         _mm256_shuffle_epi8(_mm256_permute4x64_epi64(red, 0xEE), K8_SHUFFLE_PERMUTED_RED_TO_BGR2)));
148         }
149 
BgrToBlue(__m256i bgr[3])150         SIMD_INLINE __m256i BgrToBlue(__m256i bgr[3])
151         {
152             __m256i b0 = _mm256_shuffle_epi8(bgr[0], K8_SHUFFLE_BGR0_TO_BLUE);
153             __m256i b2 = _mm256_shuffle_epi8(bgr[2], K8_SHUFFLE_BGR2_TO_BLUE);
154             return
155                 _mm256_or_si256(_mm256_permute2x128_si256(b0, b2, 0x20),
156                     _mm256_or_si256(_mm256_shuffle_epi8(bgr[1], K8_SHUFFLE_BGR1_TO_BLUE),
157                         _mm256_permute2x128_si256(b0, b2, 0x31)));
158         }
159 
BgrToGreen(__m256i bgr[3])160         SIMD_INLINE __m256i BgrToGreen(__m256i bgr[3])
161         {
162             __m256i g0 = _mm256_shuffle_epi8(bgr[0], K8_SHUFFLE_BGR0_TO_GREEN);
163             __m256i g2 = _mm256_shuffle_epi8(bgr[2], K8_SHUFFLE_BGR2_TO_GREEN);
164             return
165                 _mm256_or_si256(_mm256_permute2x128_si256(g0, g2, 0x20),
166                     _mm256_or_si256(_mm256_shuffle_epi8(bgr[1], K8_SHUFFLE_BGR1_TO_GREEN),
167                         _mm256_permute2x128_si256(g0, g2, 0x31)));
168         }
169 
BgrToRed(__m256i bgr[3])170         SIMD_INLINE __m256i BgrToRed(__m256i bgr[3])
171         {
172             __m256i r0 = _mm256_shuffle_epi8(bgr[0], K8_SHUFFLE_BGR0_TO_RED);
173             __m256i r2 = _mm256_shuffle_epi8(bgr[2], K8_SHUFFLE_BGR2_TO_RED);
174             return
175                 _mm256_or_si256(_mm256_permute2x128_si256(r0, r2, 0x20),
176                     _mm256_or_si256(_mm256_shuffle_epi8(bgr[1], K8_SHUFFLE_BGR1_TO_RED),
177                         _mm256_permute2x128_si256(r0, r2, 0x31)));
178         }
179 
180         template<bool tail> __m256i BgrToBgra(const __m256i & bgr, const __m256i & alpha);
181 
182         template<> SIMD_INLINE __m256i BgrToBgra<false>(const __m256i & bgr, const __m256i & alpha)
183         {
184             return _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(bgr, 0x94), K8_BGRA_TO_BGR_SHUFFLE), alpha);
185         }
186 
187         template<> SIMD_INLINE __m256i BgrToBgra<true>(const __m256i & bgr, const __m256i & alpha)
188         {
189             return _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(bgr, 0xE9), K8_BGRA_TO_BGR_SHUFFLE), alpha);
190         }
191 
192         template<bool tail> __m256i BgrToRgba(const __m256i & bgr, const __m256i & alpha);
193 
194         template<> SIMD_INLINE __m256i BgrToRgba<false>(const __m256i & bgr, const __m256i & alpha)
195         {
196             return _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(bgr, 0x94), K8_BGRA_TO_RGB_SHUFFLE), alpha);
197         }
198 
199         template<> SIMD_INLINE __m256i BgrToRgba<true>(const __m256i & bgr, const __m256i & alpha)
200         {
201             return _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(bgr, 0xE9), K8_BGRA_TO_RGB_SHUFFLE), alpha);
202         }
203 
BgraToRgba(const __m256i & bgra)204         SIMD_INLINE __m256i BgraToRgba(const __m256i & bgra)
205         {
206             return _mm256_shuffle_epi8(bgra, K8_BGRA_TO_RGBA_SHUFFLE);
207         }
208 
209         template<bool tail> __m256i RgbToBgra(const __m256i & rgb, const __m256i & alpha);
210 
211         template<> SIMD_INLINE __m256i RgbToBgra<false>(const __m256i & rgb, const __m256i & alpha)
212         {
213             return _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(rgb, 0x94), K8_BGRA_TO_RGB_SHUFFLE), alpha);
214         }
215 
216         template<> SIMD_INLINE __m256i RgbToBgra<true>(const __m256i & rgb, const __m256i & alpha)
217         {
218             return _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(rgb, 0xE9), K8_BGRA_TO_RGB_SHUFFLE), alpha);
219         }
220     }
221 #endif// SIMD_AVX2_ENABLE
222 
223 #ifdef SIMD_NEON_ENABLE
224     namespace Neon
225     {
BgrToGray(const uint16x8_t & blue,const uint16x8_t & green,const uint16x8_t & red)226         template <int part> SIMD_INLINE uint32x4_t BgrToGray(const uint16x8_t & blue, const uint16x8_t & green, const uint16x8_t & red)
227         {
228             return vshrq_n_u32(vmlal_u16(vmlal_u16(vmlal_u16(K32_BGR_TO_GRAY_ROUND_TERM, Half<part>(blue), K16_BLUE_TO_GRAY_WEIGHT),
229                 Half<part>(green), K16_GREEN_TO_GRAY_WEIGHT), Half<part>(red), K16_RED_TO_GRAY_WEIGHT), Base::BGR_TO_GRAY_AVERAGING_SHIFT);
230         }
231 
BgrToGray(const uint16x8_t & blue,const uint16x8_t & green,const uint16x8_t & red)232         SIMD_INLINE uint16x8_t BgrToGray(const uint16x8_t & blue, const uint16x8_t & green, const uint16x8_t & red)
233         {
234             return PackU32(BgrToGray<0>(blue, green, red), BgrToGray<1>(blue, green, red));
235         }
236 
BgrToU(uint16x8_t blue,uint16x8_t green,uint16x8_t red)237         template <int part> SIMD_INLINE int32x4_t BgrToU(uint16x8_t blue, uint16x8_t green, uint16x8_t red)
238         {
239             return vshrq_n_s32(vmlal_s16(vmlal_s16(vmlal_s16(K32_BGR_TO_YUV_ROUND_TERM, (int16x4_t)Half<part>(blue), K16_BLUE_TO_U_WEIGHT),
240                 (int16x4_t)Half<part>(green), K16_GREEN_TO_U_WEIGHT), (int16x4_t)Half<part>(red), K16_RED_TO_U_WEIGHT), Base::BGR_TO_YUV_AVERAGING_SHIFT);
241         }
242     }
243 #endif// SIMD_NEON_ENABLE
244 }
245 #endif//__SimdConversion_h__
246