1 /* 2 * Simd Library (http://ermig1979.github.io/Simd). 3 * 4 * Copyright (c) 2011-2019 Yermalayeu Ihar, 5 * 2014-2015 Antonenka Mikhail. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 * SOFTWARE. 24 */ 25 #ifndef __SimdConversion_h__ 26 #define __SimdConversion_h__ 27 28 #include "Simd/SimdConst.h" 29 #include "Simd/SimdMath.h" 30 #include "Simd/SimdLoad.h" 31 32 namespace Simd 33 { 34 namespace Base 35 { BgrToGray(int blue,int green,int red)36 SIMD_INLINE int BgrToGray(int blue, int green, int red) 37 { 38 return (BLUE_TO_GRAY_WEIGHT*blue + GREEN_TO_GRAY_WEIGHT * green + 39 RED_TO_GRAY_WEIGHT * red + BGR_TO_GRAY_ROUND_TERM) >> BGR_TO_GRAY_AVERAGING_SHIFT; 40 } 41 RgbToGray(int red,int green,int blue)42 SIMD_INLINE int RgbToGray(int red, int green, int blue) 43 { 44 return (BLUE_TO_GRAY_WEIGHT*blue + GREEN_TO_GRAY_WEIGHT * green + 45 RED_TO_GRAY_WEIGHT * red + BGR_TO_GRAY_ROUND_TERM) >> BGR_TO_GRAY_AVERAGING_SHIFT; 46 } 47 } 48 49 #ifdef SIMD_SSSE3_ENABLE 50 namespace Ssse3 51 { 52 template <int index> __m128i InterleaveBgr(__m128i blue, __m128i green, __m128i red); 53 54 template<> SIMD_INLINE __m128i InterleaveBgr<0>(__m128i blue, __m128i green, __m128i red) 55 { 56 return 57 _mm_or_si128(_mm_shuffle_epi8(blue, K8_SHUFFLE_BLUE_TO_BGR0), 58 _mm_or_si128(_mm_shuffle_epi8(green, K8_SHUFFLE_GREEN_TO_BGR0), 59 _mm_shuffle_epi8(red, K8_SHUFFLE_RED_TO_BGR0))); 60 } 61 62 template<> SIMD_INLINE __m128i InterleaveBgr<1>(__m128i blue, __m128i green, __m128i red) 63 { 64 return 65 _mm_or_si128(_mm_shuffle_epi8(blue, K8_SHUFFLE_BLUE_TO_BGR1), 66 _mm_or_si128(_mm_shuffle_epi8(green, K8_SHUFFLE_GREEN_TO_BGR1), 67 _mm_shuffle_epi8(red, K8_SHUFFLE_RED_TO_BGR1))); 68 } 69 70 template<> SIMD_INLINE __m128i InterleaveBgr<2>(__m128i blue, __m128i green, __m128i red) 71 { 72 return 73 _mm_or_si128(_mm_shuffle_epi8(blue, K8_SHUFFLE_BLUE_TO_BGR2), 74 _mm_or_si128(_mm_shuffle_epi8(green, K8_SHUFFLE_GREEN_TO_BGR2), 75 _mm_shuffle_epi8(red, K8_SHUFFLE_RED_TO_BGR2))); 76 } 77 BgrToBlue(__m128i bgr[3])78 SIMD_INLINE __m128i BgrToBlue(__m128i bgr[3]) 79 { 80 return 81 _mm_or_si128(_mm_shuffle_epi8(bgr[0], K8_SHUFFLE_BGR0_TO_BLUE), 82 _mm_or_si128(_mm_shuffle_epi8(bgr[1], K8_SHUFFLE_BGR1_TO_BLUE), 83 _mm_shuffle_epi8(bgr[2], K8_SHUFFLE_BGR2_TO_BLUE))); 84 } 85 BgrToGreen(__m128i bgr[3])86 SIMD_INLINE __m128i BgrToGreen(__m128i bgr[3]) 87 { 88 return 89 _mm_or_si128(_mm_shuffle_epi8(bgr[0], K8_SHUFFLE_BGR0_TO_GREEN), 90 _mm_or_si128(_mm_shuffle_epi8(bgr[1], K8_SHUFFLE_BGR1_TO_GREEN), 91 _mm_shuffle_epi8(bgr[2], K8_SHUFFLE_BGR2_TO_GREEN))); 92 } 93 BgrToRed(__m128i bgr[3])94 SIMD_INLINE __m128i BgrToRed(__m128i bgr[3]) 95 { 96 return 97 _mm_or_si128(_mm_shuffle_epi8(bgr[0], K8_SHUFFLE_BGR0_TO_RED), 98 _mm_or_si128(_mm_shuffle_epi8(bgr[1], K8_SHUFFLE_BGR1_TO_RED), 99 _mm_shuffle_epi8(bgr[2], K8_SHUFFLE_BGR2_TO_RED))); 100 } 101 } 102 #endif//SIMD_SSSE3_ENABLE 103 104 #ifdef SIMD_AVX2_ENABLE 105 namespace Avx2 106 { 107 template <int index> __m256i GrayToBgr(__m256i gray); 108 109 template<> SIMD_INLINE __m256i GrayToBgr<0>(__m256i gray) 110 { 111 return _mm256_shuffle_epi8(_mm256_permute4x64_epi64(gray, 0x44), K8_SHUFFLE_GRAY_TO_BGR0); 112 } 113 114 template<> SIMD_INLINE __m256i GrayToBgr<1>(__m256i gray) 115 { 116 return _mm256_shuffle_epi8(_mm256_permute4x64_epi64(gray, 0x99), K8_SHUFFLE_GRAY_TO_BGR1); 117 } 118 119 template<> SIMD_INLINE __m256i GrayToBgr<2>(__m256i gray) 120 { 121 return _mm256_shuffle_epi8(_mm256_permute4x64_epi64(gray, 0xEE), K8_SHUFFLE_GRAY_TO_BGR2); 122 } 123 124 template <int index> __m256i InterleaveBgr(__m256i blue, __m256i green, __m256i red); 125 126 template<> SIMD_INLINE __m256i InterleaveBgr<0>(__m256i blue, __m256i green, __m256i red) 127 { 128 return 129 _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(blue, 0x44), K8_SHUFFLE_PERMUTED_BLUE_TO_BGR0), 130 _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(green, 0x44), K8_SHUFFLE_PERMUTED_GREEN_TO_BGR0), 131 _mm256_shuffle_epi8(_mm256_permute4x64_epi64(red, 0x44), K8_SHUFFLE_PERMUTED_RED_TO_BGR0))); 132 } 133 134 template<> SIMD_INLINE __m256i InterleaveBgr<1>(__m256i blue, __m256i green, __m256i red) 135 { 136 return 137 _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(blue, 0x99), K8_SHUFFLE_PERMUTED_BLUE_TO_BGR1), 138 _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(green, 0x99), K8_SHUFFLE_PERMUTED_GREEN_TO_BGR1), 139 _mm256_shuffle_epi8(_mm256_permute4x64_epi64(red, 0x99), K8_SHUFFLE_PERMUTED_RED_TO_BGR1))); 140 } 141 142 template<> SIMD_INLINE __m256i InterleaveBgr<2>(__m256i blue, __m256i green, __m256i red) 143 { 144 return 145 _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(blue, 0xEE), K8_SHUFFLE_PERMUTED_BLUE_TO_BGR2), 146 _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(green, 0xEE), K8_SHUFFLE_PERMUTED_GREEN_TO_BGR2), 147 _mm256_shuffle_epi8(_mm256_permute4x64_epi64(red, 0xEE), K8_SHUFFLE_PERMUTED_RED_TO_BGR2))); 148 } 149 BgrToBlue(__m256i bgr[3])150 SIMD_INLINE __m256i BgrToBlue(__m256i bgr[3]) 151 { 152 __m256i b0 = _mm256_shuffle_epi8(bgr[0], K8_SHUFFLE_BGR0_TO_BLUE); 153 __m256i b2 = _mm256_shuffle_epi8(bgr[2], K8_SHUFFLE_BGR2_TO_BLUE); 154 return 155 _mm256_or_si256(_mm256_permute2x128_si256(b0, b2, 0x20), 156 _mm256_or_si256(_mm256_shuffle_epi8(bgr[1], K8_SHUFFLE_BGR1_TO_BLUE), 157 _mm256_permute2x128_si256(b0, b2, 0x31))); 158 } 159 BgrToGreen(__m256i bgr[3])160 SIMD_INLINE __m256i BgrToGreen(__m256i bgr[3]) 161 { 162 __m256i g0 = _mm256_shuffle_epi8(bgr[0], K8_SHUFFLE_BGR0_TO_GREEN); 163 __m256i g2 = _mm256_shuffle_epi8(bgr[2], K8_SHUFFLE_BGR2_TO_GREEN); 164 return 165 _mm256_or_si256(_mm256_permute2x128_si256(g0, g2, 0x20), 166 _mm256_or_si256(_mm256_shuffle_epi8(bgr[1], K8_SHUFFLE_BGR1_TO_GREEN), 167 _mm256_permute2x128_si256(g0, g2, 0x31))); 168 } 169 BgrToRed(__m256i bgr[3])170 SIMD_INLINE __m256i BgrToRed(__m256i bgr[3]) 171 { 172 __m256i r0 = _mm256_shuffle_epi8(bgr[0], K8_SHUFFLE_BGR0_TO_RED); 173 __m256i r2 = _mm256_shuffle_epi8(bgr[2], K8_SHUFFLE_BGR2_TO_RED); 174 return 175 _mm256_or_si256(_mm256_permute2x128_si256(r0, r2, 0x20), 176 _mm256_or_si256(_mm256_shuffle_epi8(bgr[1], K8_SHUFFLE_BGR1_TO_RED), 177 _mm256_permute2x128_si256(r0, r2, 0x31))); 178 } 179 180 template<bool tail> __m256i BgrToBgra(const __m256i & bgr, const __m256i & alpha); 181 182 template<> SIMD_INLINE __m256i BgrToBgra<false>(const __m256i & bgr, const __m256i & alpha) 183 { 184 return _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(bgr, 0x94), K8_BGRA_TO_BGR_SHUFFLE), alpha); 185 } 186 187 template<> SIMD_INLINE __m256i BgrToBgra<true>(const __m256i & bgr, const __m256i & alpha) 188 { 189 return _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(bgr, 0xE9), K8_BGRA_TO_BGR_SHUFFLE), alpha); 190 } 191 192 template<bool tail> __m256i BgrToRgba(const __m256i & bgr, const __m256i & alpha); 193 194 template<> SIMD_INLINE __m256i BgrToRgba<false>(const __m256i & bgr, const __m256i & alpha) 195 { 196 return _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(bgr, 0x94), K8_BGRA_TO_RGB_SHUFFLE), alpha); 197 } 198 199 template<> SIMD_INLINE __m256i BgrToRgba<true>(const __m256i & bgr, const __m256i & alpha) 200 { 201 return _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(bgr, 0xE9), K8_BGRA_TO_RGB_SHUFFLE), alpha); 202 } 203 BgraToRgba(const __m256i & bgra)204 SIMD_INLINE __m256i BgraToRgba(const __m256i & bgra) 205 { 206 return _mm256_shuffle_epi8(bgra, K8_BGRA_TO_RGBA_SHUFFLE); 207 } 208 209 template<bool tail> __m256i RgbToBgra(const __m256i & rgb, const __m256i & alpha); 210 211 template<> SIMD_INLINE __m256i RgbToBgra<false>(const __m256i & rgb, const __m256i & alpha) 212 { 213 return _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(rgb, 0x94), K8_BGRA_TO_RGB_SHUFFLE), alpha); 214 } 215 216 template<> SIMD_INLINE __m256i RgbToBgra<true>(const __m256i & rgb, const __m256i & alpha) 217 { 218 return _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(rgb, 0xE9), K8_BGRA_TO_RGB_SHUFFLE), alpha); 219 } 220 } 221 #endif// SIMD_AVX2_ENABLE 222 223 #ifdef SIMD_NEON_ENABLE 224 namespace Neon 225 { BgrToGray(const uint16x8_t & blue,const uint16x8_t & green,const uint16x8_t & red)226 template <int part> SIMD_INLINE uint32x4_t BgrToGray(const uint16x8_t & blue, const uint16x8_t & green, const uint16x8_t & red) 227 { 228 return vshrq_n_u32(vmlal_u16(vmlal_u16(vmlal_u16(K32_BGR_TO_GRAY_ROUND_TERM, Half<part>(blue), K16_BLUE_TO_GRAY_WEIGHT), 229 Half<part>(green), K16_GREEN_TO_GRAY_WEIGHT), Half<part>(red), K16_RED_TO_GRAY_WEIGHT), Base::BGR_TO_GRAY_AVERAGING_SHIFT); 230 } 231 BgrToGray(const uint16x8_t & blue,const uint16x8_t & green,const uint16x8_t & red)232 SIMD_INLINE uint16x8_t BgrToGray(const uint16x8_t & blue, const uint16x8_t & green, const uint16x8_t & red) 233 { 234 return PackU32(BgrToGray<0>(blue, green, red), BgrToGray<1>(blue, green, red)); 235 } 236 BgrToU(uint16x8_t blue,uint16x8_t green,uint16x8_t red)237 template <int part> SIMD_INLINE int32x4_t BgrToU(uint16x8_t blue, uint16x8_t green, uint16x8_t red) 238 { 239 return vshrq_n_s32(vmlal_s16(vmlal_s16(vmlal_s16(K32_BGR_TO_YUV_ROUND_TERM, (int16x4_t)Half<part>(blue), K16_BLUE_TO_U_WEIGHT), 240 (int16x4_t)Half<part>(green), K16_GREEN_TO_U_WEIGHT), (int16x4_t)Half<part>(red), K16_RED_TO_U_WEIGHT), Base::BGR_TO_YUV_AVERAGING_SHIFT); 241 } 242 } 243 #endif// SIMD_NEON_ENABLE 244 } 245 #endif//__SimdConversion_h__ 246