1 /* 2 * Simd Library (http://ermig1979.github.io/Simd). 3 * 4 * Copyright (c) 2011-2019 Yermalayeu Ihar, 5 * 2014-2015 Antonenka Mikhail. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 * SOFTWARE. 24 */ 25 #ifndef __SimdConst_h__ 26 #define __SimdConst_h__ 27 28 #include "Simd/SimdInit.h" 29 30 namespace Simd 31 { 32 const size_t HISTOGRAM_SIZE = UCHAR_MAX + 1; 33 34 namespace Base 35 { 36 const int LINEAR_SHIFT = 4; 37 const int LINEAR_ROUND_TERM = 1 << (LINEAR_SHIFT - 1); 38 39 const int BILINEAR_SHIFT = LINEAR_SHIFT * 2; 40 const int BILINEAR_ROUND_TERM = 1 << (BILINEAR_SHIFT - 1); 41 42 const int FRACTION_RANGE = 1 << LINEAR_SHIFT; 43 const double FRACTION_ROUND_TERM = 0.5 / FRACTION_RANGE; 44 45 const float KF_255_DIV_6 = 255.0f / 6.0f; 46 47 const int BGR_TO_GRAY_AVERAGING_SHIFT = 14; 48 const int BGR_TO_GRAY_ROUND_TERM = 1 << (BGR_TO_GRAY_AVERAGING_SHIFT - 1); 49 const int BLUE_TO_GRAY_WEIGHT = int(0.0722*(1 << BGR_TO_GRAY_AVERAGING_SHIFT) + 0.5); // 0.114 //modif SIMD lib coeffs to ViSP ones 50 const int GREEN_TO_GRAY_WEIGHT = int(0.7152*(1 << BGR_TO_GRAY_AVERAGING_SHIFT) + 0.5); // 0.587 51 const int RED_TO_GRAY_WEIGHT = int(0.2126*(1 << BGR_TO_GRAY_AVERAGING_SHIFT) + 0.5); // 0.299 52 53 const int Y_ADJUST = 16; 54 const int UV_ADJUST = 128; 55 const int YUV_TO_BGR_AVERAGING_SHIFT = 13; 56 const int YUV_TO_BGR_ROUND_TERM = 1 << (YUV_TO_BGR_AVERAGING_SHIFT - 1); 57 const int Y_TO_RGB_WEIGHT = int(1.164*(1 << YUV_TO_BGR_AVERAGING_SHIFT) + 0.5); 58 const int U_TO_BLUE_WEIGHT = int(2.018*(1 << YUV_TO_BGR_AVERAGING_SHIFT) + 0.5); 59 const int U_TO_GREEN_WEIGHT = -int(0.391*(1 << YUV_TO_BGR_AVERAGING_SHIFT) + 0.5); 60 const int V_TO_GREEN_WEIGHT = -int(0.813*(1 << YUV_TO_BGR_AVERAGING_SHIFT) + 0.5); 61 const int V_TO_RED_WEIGHT = int(1.596*(1 << YUV_TO_BGR_AVERAGING_SHIFT) + 0.5); 62 63 const int BGR_TO_YUV_AVERAGING_SHIFT = 14; 64 const int BGR_TO_YUV_ROUND_TERM = 1 << (BGR_TO_YUV_AVERAGING_SHIFT - 1); 65 const int BLUE_TO_Y_WEIGHT = int(0.098*(1 << BGR_TO_YUV_AVERAGING_SHIFT) + 0.5); 66 const int GREEN_TO_Y_WEIGHT = int(0.504*(1 << BGR_TO_YUV_AVERAGING_SHIFT) + 0.5); 67 const int RED_TO_Y_WEIGHT = int(0.257*(1 << BGR_TO_YUV_AVERAGING_SHIFT) + 0.5); 68 const int BLUE_TO_U_WEIGHT = int(0.439*(1 << BGR_TO_YUV_AVERAGING_SHIFT) + 0.5); 69 const int GREEN_TO_U_WEIGHT = -int(0.291*(1 << BGR_TO_YUV_AVERAGING_SHIFT) + 0.5); 70 const int RED_TO_U_WEIGHT = -int(0.148*(1 << BGR_TO_YUV_AVERAGING_SHIFT) + 0.5); 71 const int BLUE_TO_V_WEIGHT = -int(0.071*(1 << BGR_TO_YUV_AVERAGING_SHIFT) + 0.5); 72 const int GREEN_TO_V_WEIGHT = -int(0.368*(1 << BGR_TO_YUV_AVERAGING_SHIFT) + 0.5); 73 const int RED_TO_V_WEIGHT = int(0.439*(1 << BGR_TO_YUV_AVERAGING_SHIFT) + 0.5); 74 75 const int DIVISION_BY_9_SHIFT = 16; 76 const int DIVISION_BY_9_FACTOR = (1 << DIVISION_BY_9_SHIFT) / 9; 77 } 78 79 #ifdef SIMD_SSE_ENABLE 80 namespace Sse 81 { 82 const size_t F = sizeof(__m128) / sizeof(float); 83 const size_t DF = 2 * F; 84 const size_t QF = 4 * F; 85 const size_t HF = F / 2; 86 } 87 #endif// SIMD_SSE_ENABLE 88 89 #ifdef SIMD_SSE2_ENABLE 90 namespace Sse2 91 { 92 using namespace Sse; 93 #if defined(_MSC_VER) && _MSC_VER >= 1700 && _MSC_VER < 1900 // Visual Studio 2012/2013 compiler bug 94 using Sse::F; 95 using Sse::DF; 96 using Sse::QF; 97 #endif 98 99 const size_t A = sizeof(__m128i); 100 const size_t DA = 2 * A; 101 const size_t QA = 4 * A; 102 const size_t OA = 8 * A; 103 const size_t HA = A / 2; 104 105 const __m128i K_ZERO = SIMD_MM_SET1_EPI8(0); 106 const __m128i K_INV_ZERO = SIMD_MM_SET1_EPI8(0xFF); 107 108 const __m128i K8_01 = SIMD_MM_SET1_EPI8(0x01); 109 const __m128i K8_02 = SIMD_MM_SET1_EPI8(0x02); 110 const __m128i K8_03 = SIMD_MM_SET1_EPI8(0x03); 111 const __m128i K8_04 = SIMD_MM_SET1_EPI8(0x04); 112 const __m128i K8_07 = SIMD_MM_SET1_EPI8(0x07); 113 const __m128i K8_08 = SIMD_MM_SET1_EPI8(0x08); 114 const __m128i K8_10 = SIMD_MM_SET1_EPI8(0x10); 115 const __m128i K8_20 = SIMD_MM_SET1_EPI8(0x20); 116 const __m128i K8_40 = SIMD_MM_SET1_EPI8(0x40); 117 const __m128i K8_80 = SIMD_MM_SET1_EPI8(0x80); 118 119 const __m128i K8_01_FF = SIMD_MM_SET2_EPI8(0x01, 0xFF); 120 121 const __m128i K16_0001 = SIMD_MM_SET1_EPI16(0x0001); 122 const __m128i K16_0002 = SIMD_MM_SET1_EPI16(0x0002); 123 const __m128i K16_0003 = SIMD_MM_SET1_EPI16(0x0003); 124 const __m128i K16_0004 = SIMD_MM_SET1_EPI16(0x0004); 125 const __m128i K16_0005 = SIMD_MM_SET1_EPI16(0x0005); 126 const __m128i K16_0006 = SIMD_MM_SET1_EPI16(0x0006); 127 const __m128i K16_0008 = SIMD_MM_SET1_EPI16(0x0008); 128 const __m128i K16_0020 = SIMD_MM_SET1_EPI16(0x0020); 129 const __m128i K16_0080 = SIMD_MM_SET1_EPI16(0x0080); 130 const __m128i K16_00FF = SIMD_MM_SET1_EPI16(0x00FF); 131 const __m128i K16_FF00 = SIMD_MM_SET1_EPI16(0xFF00); 132 133 const __m128i K32_00000001 = SIMD_MM_SET1_EPI32(0x00000001); 134 const __m128i K32_00000002 = SIMD_MM_SET1_EPI32(0x00000002); 135 const __m128i K32_00000004 = SIMD_MM_SET1_EPI32(0x00000004); 136 const __m128i K32_00000008 = SIMD_MM_SET1_EPI32(0x00000008); 137 const __m128i K32_000000FF = SIMD_MM_SET1_EPI32(0x000000FF); 138 const __m128i K32_0000FFFF = SIMD_MM_SET1_EPI32(0x0000FFFF); 139 const __m128i K32_00010000 = SIMD_MM_SET1_EPI32(0x00010000); 140 const __m128i K32_01000000 = SIMD_MM_SET1_EPI32(0x01000000); 141 const __m128i K32_00FFFFFF = SIMD_MM_SET1_EPI32(0x00FFFFFF); 142 const __m128i K32_FFFFFF00 = SIMD_MM_SET1_EPI32(0xFFFFFF00); 143 144 const __m128i K64_00000000FFFFFFFF = SIMD_MM_SET2_EPI32(0xFFFFFFFF, 0); 145 146 const __m128i K16_Y_ADJUST = SIMD_MM_SET1_EPI16(Base::Y_ADJUST); 147 const __m128i K16_UV_ADJUST = SIMD_MM_SET1_EPI16(Base::UV_ADJUST); 148 149 const __m128i K16_YRGB_RT = SIMD_MM_SET2_EPI16(Base::Y_TO_RGB_WEIGHT, Base::YUV_TO_BGR_ROUND_TERM); 150 const __m128i K16_VR_0 = SIMD_MM_SET2_EPI16(Base::V_TO_RED_WEIGHT, 0); 151 const __m128i K16_UG_VG = SIMD_MM_SET2_EPI16(Base::U_TO_GREEN_WEIGHT, Base::V_TO_GREEN_WEIGHT); 152 const __m128i K16_UB_0 = SIMD_MM_SET2_EPI16(Base::U_TO_BLUE_WEIGHT, 0); 153 154 const __m128i K16_BY_RY = SIMD_MM_SET2_EPI16(Base::BLUE_TO_Y_WEIGHT, Base::RED_TO_Y_WEIGHT); 155 const __m128i K16_GY_RT = SIMD_MM_SET2_EPI16(Base::GREEN_TO_Y_WEIGHT, Base::BGR_TO_YUV_ROUND_TERM); 156 const __m128i K16_BU_RU = SIMD_MM_SET2_EPI16(Base::BLUE_TO_U_WEIGHT, Base::RED_TO_U_WEIGHT); 157 const __m128i K16_GU_RT = SIMD_MM_SET2_EPI16(Base::GREEN_TO_U_WEIGHT, Base::BGR_TO_YUV_ROUND_TERM); 158 const __m128i K16_BV_RV = SIMD_MM_SET2_EPI16(Base::BLUE_TO_V_WEIGHT, Base::RED_TO_V_WEIGHT); 159 const __m128i K16_GV_RT = SIMD_MM_SET2_EPI16(Base::GREEN_TO_V_WEIGHT, Base::BGR_TO_YUV_ROUND_TERM); 160 161 const __m128i K16_DIVISION_BY_9_FACTOR = SIMD_MM_SET1_EPI16(Base::DIVISION_BY_9_FACTOR); 162 } 163 #endif// SIMD_SSE2_ENABLE 164 165 #ifdef SIMD_SSE3_ENABLE 166 namespace Sse3 167 { 168 using namespace Sse2; 169 #if defined(_MSC_VER) && _MSC_VER >= 1700 && _MSC_VER < 1900 // Visual Studio 2012/2013 compiler bug 170 using Sse::F; 171 using Sse::DF; 172 using Sse::QF; 173 #endif 174 } 175 #endif// SIMD_SSE3_ENABLE 176 177 #ifdef SIMD_SSSE3_ENABLE 178 namespace Ssse3 179 { 180 using namespace Sse3; 181 182 const __m128i K8_SHUFFLE_GRAY_TO_BGR0 = SIMD_MM_SETR_EPI8(0x0, 0x0, 0x0, 0x1, 0x1, 0x1, 0x2, 0x2, 0x2, 0x3, 0x3, 0x3, 0x4, 0x4, 0x4, 0x5); 183 const __m128i K8_SHUFFLE_GRAY_TO_BGR1 = SIMD_MM_SETR_EPI8(0x5, 0x5, 0x6, 0x6, 0x6, 0x7, 0x7, 0x7, 0x8, 0x8, 0x8, 0x9, 0x9, 0x9, 0xA, 0xA); 184 const __m128i K8_SHUFFLE_GRAY_TO_BGR2 = SIMD_MM_SETR_EPI8(0xA, 0xB, 0xB, 0xB, 0xC, 0xC, 0xC, 0xD, 0xD, 0xD, 0xE, 0xE, 0xE, 0xF, 0xF, 0xF); 185 186 const __m128i K8_SHUFFLE_BLUE_TO_BGR0 = SIMD_MM_SETR_EPI8(0x0, -1, -1, 0x1, -1, -1, 0x2, -1, -1, 0x3, -1, -1, 0x4, -1, -1, 0x5); 187 const __m128i K8_SHUFFLE_BLUE_TO_BGR1 = SIMD_MM_SETR_EPI8(-1, -1, 0x6, -1, -1, 0x7, -1, -1, 0x8, -1, -1, 0x9, -1, -1, 0xA, -1); 188 const __m128i K8_SHUFFLE_BLUE_TO_BGR2 = SIMD_MM_SETR_EPI8(-1, 0xB, -1, -1, 0xC, -1, -1, 0xD, -1, -1, 0xE, -1, -1, 0xF, -1, -1); 189 190 const __m128i K8_SHUFFLE_GREEN_TO_BGR0 = SIMD_MM_SETR_EPI8(-1, 0x0, -1, -1, 0x1, -1, -1, 0x2, -1, -1, 0x3, -1, -1, 0x4, -1, -1); 191 const __m128i K8_SHUFFLE_GREEN_TO_BGR1 = SIMD_MM_SETR_EPI8(0x5, -1, -1, 0x6, -1, -1, 0x7, -1, -1, 0x8, -1, -1, 0x9, -1, -1, 0xA); 192 const __m128i K8_SHUFFLE_GREEN_TO_BGR2 = SIMD_MM_SETR_EPI8(-1, -1, 0xB, -1, -1, 0xC, -1, -1, 0xD, -1, -1, 0xE, -1, -1, 0xF, -1); 193 194 const __m128i K8_SHUFFLE_RED_TO_BGR0 = SIMD_MM_SETR_EPI8(-1, -1, 0x0, -1, -1, 0x1, -1, -1, 0x2, -1, -1, 0x3, -1, -1, 0x4, -1); 195 const __m128i K8_SHUFFLE_RED_TO_BGR1 = SIMD_MM_SETR_EPI8(-1, 0x5, -1, -1, 0x6, -1, -1, 0x7, -1, -1, 0x8, -1, -1, 0x9, -1, -1); 196 const __m128i K8_SHUFFLE_RED_TO_BGR2 = SIMD_MM_SETR_EPI8(0xA, -1, -1, 0xB, -1, -1, 0xC, -1, -1, 0xD, -1, -1, 0xE, -1, -1, 0xF); 197 198 const __m128i K8_SHUFFLE_BGR0_TO_BLUE = SIMD_MM_SETR_EPI8(0x0, 0x3, 0x6, 0x9, 0xC, 0xF, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); 199 const __m128i K8_SHUFFLE_BGR1_TO_BLUE = SIMD_MM_SETR_EPI8(-1, -1, -1, -1, -1, -1, 0x2, 0x5, 0x8, 0xB, 0xE, -1, -1, -1, -1, -1); 200 const __m128i K8_SHUFFLE_BGR2_TO_BLUE = SIMD_MM_SETR_EPI8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x1, 0x4, 0x7, 0xA, 0xD); 201 202 const __m128i K8_SHUFFLE_BGR0_TO_GREEN = SIMD_MM_SETR_EPI8(0x1, 0x4, 0x7, 0xA, 0xD, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); 203 const __m128i K8_SHUFFLE_BGR1_TO_GREEN = SIMD_MM_SETR_EPI8(-1, -1, -1, -1, -1, 0x0, 0x3, 0x6, 0x9, 0xC, 0xF, -1, -1, -1, -1, -1); 204 const __m128i K8_SHUFFLE_BGR2_TO_GREEN = SIMD_MM_SETR_EPI8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x2, 0x5, 0x8, 0xB, 0xE); 205 206 const __m128i K8_SHUFFLE_BGR0_TO_RED = SIMD_MM_SETR_EPI8(0x2, 0x5, 0x8, 0xB, 0xE, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); 207 const __m128i K8_SHUFFLE_BGR1_TO_RED = SIMD_MM_SETR_EPI8(-1, -1, -1, -1, -1, 0x1, 0x4, 0x7, 0xA, 0xD, -1, -1, -1, -1, -1, -1); 208 const __m128i K8_SHUFFLE_BGR2_TO_RED = SIMD_MM_SETR_EPI8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x0, 0x3, 0x6, 0x9, 0xC, 0xF); 209 } 210 #endif// SIMD_SSSE3_ENABLE 211 212 #ifdef SIMD_SSE41_ENABLE 213 namespace Sse41 214 { 215 using namespace Ssse3; 216 #if defined(_MSC_VER) && _MSC_VER >= 1700 && _MSC_VER < 1900 // Visual Studio 2012/2013 compiler bug 217 using Sse::F; 218 using Sse::DF; 219 using Sse::QF; 220 #endif 221 } 222 #endif// SIMD_SSE41_ENABLE 223 224 #ifdef SIMD_SSE42_ENABLE 225 namespace Sse42 226 { 227 using namespace Sse41; 228 } 229 #endif// SIMD_SSE42_ENABLE 230 231 #ifdef SIMD_AVX_ENABLE 232 namespace Avx 233 { 234 const size_t F = sizeof(__m256) / sizeof(float); 235 const size_t DF = 2 * F; 236 const size_t QF = 4 * F; 237 const size_t HF = F / 2; 238 } 239 #endif// SIMD_AVX_ENABLE 240 241 #ifdef SIMD_AVX2_ENABLE 242 namespace Avx2 243 { 244 using namespace Avx; 245 #if defined(_MSC_VER) && _MSC_VER >= 1700 && _MSC_VER < 1900 // Visual Studio 2012/2013 compiler bug 246 using Avx::F; 247 using Avx::DF; 248 using Avx::QF; 249 #endif 250 251 const size_t A = sizeof(__m256i); 252 const size_t DA = 2 * A; 253 const size_t QA = 4 * A; 254 const size_t OA = 8 * A; 255 const size_t HA = A / 2; 256 257 const __m256i K_ZERO = SIMD_MM256_SET1_EPI8(0); 258 const __m256i K_INV_ZERO = SIMD_MM256_SET1_EPI8(0xFF); 259 260 const __m256i K8_01 = SIMD_MM256_SET1_EPI8(0x01); 261 const __m256i K8_02 = SIMD_MM256_SET1_EPI8(0x02); 262 const __m256i K8_03 = SIMD_MM256_SET1_EPI8(0x03); 263 const __m256i K8_04 = SIMD_MM256_SET1_EPI8(0x04); 264 const __m256i K8_07 = SIMD_MM256_SET1_EPI8(0x07); 265 const __m256i K8_08 = SIMD_MM256_SET1_EPI8(0x08); 266 const __m256i K8_10 = SIMD_MM256_SET1_EPI8(0x10); 267 const __m256i K8_20 = SIMD_MM256_SET1_EPI8(0x20); 268 const __m256i K8_40 = SIMD_MM256_SET1_EPI8(0x40); 269 const __m256i K8_80 = SIMD_MM256_SET1_EPI8(0x80); 270 271 const __m256i K8_01_FF = SIMD_MM256_SET2_EPI8(0x01, 0xFF); 272 273 const __m256i K16_0001 = SIMD_MM256_SET1_EPI16(0x0001); 274 const __m256i K16_0002 = SIMD_MM256_SET1_EPI16(0x0002); 275 const __m256i K16_0003 = SIMD_MM256_SET1_EPI16(0x0003); 276 const __m256i K16_0004 = SIMD_MM256_SET1_EPI16(0x0004); 277 const __m256i K16_0005 = SIMD_MM256_SET1_EPI16(0x0005); 278 const __m256i K16_0006 = SIMD_MM256_SET1_EPI16(0x0006); 279 const __m256i K16_0008 = SIMD_MM256_SET1_EPI16(0x0008); 280 const __m256i K16_0010 = SIMD_MM256_SET1_EPI16(0x0010); 281 const __m256i K16_0018 = SIMD_MM256_SET1_EPI16(0x0018); 282 const __m256i K16_0020 = SIMD_MM256_SET1_EPI16(0x0020); 283 const __m256i K16_0080 = SIMD_MM256_SET1_EPI16(0x0080); 284 const __m256i K16_00FF = SIMD_MM256_SET1_EPI16(0x00FF); 285 const __m256i K16_FF00 = SIMD_MM256_SET1_EPI16(0xFF00); 286 287 const __m256i K32_00000001 = SIMD_MM256_SET1_EPI32(0x00000001); 288 const __m256i K32_00000002 = SIMD_MM256_SET1_EPI32(0x00000002); 289 const __m256i K32_00000004 = SIMD_MM256_SET1_EPI32(0x00000004); 290 const __m256i K32_00000008 = SIMD_MM256_SET1_EPI32(0x00000008); 291 const __m256i K32_000000FF = SIMD_MM256_SET1_EPI32(0x000000FF); 292 const __m256i K32_0000FFFF = SIMD_MM256_SET1_EPI32(0x0000FFFF); 293 const __m256i K32_00010000 = SIMD_MM256_SET1_EPI32(0x00010000); 294 const __m256i K32_01000000 = SIMD_MM256_SET1_EPI32(0x01000000); 295 const __m256i K32_FFFFFF00 = SIMD_MM256_SET1_EPI32(0xFFFFFF00); 296 297 const __m256i K16_Y_ADJUST = SIMD_MM256_SET1_EPI16(Base::Y_ADJUST); 298 const __m256i K16_UV_ADJUST = SIMD_MM256_SET1_EPI16(Base::UV_ADJUST); 299 300 const __m256i K16_YRGB_RT = SIMD_MM256_SET2_EPI16(Base::Y_TO_RGB_WEIGHT, Base::YUV_TO_BGR_ROUND_TERM); 301 const __m256i K16_VR_0 = SIMD_MM256_SET2_EPI16(Base::V_TO_RED_WEIGHT, 0); 302 const __m256i K16_UG_VG = SIMD_MM256_SET2_EPI16(Base::U_TO_GREEN_WEIGHT, Base::V_TO_GREEN_WEIGHT); 303 const __m256i K16_UB_0 = SIMD_MM256_SET2_EPI16(Base::U_TO_BLUE_WEIGHT, 0); 304 305 const __m256i K16_BY_RY = SIMD_MM256_SET2_EPI16(Base::BLUE_TO_Y_WEIGHT, Base::RED_TO_Y_WEIGHT); 306 const __m256i K16_GY_RT = SIMD_MM256_SET2_EPI16(Base::GREEN_TO_Y_WEIGHT, Base::BGR_TO_YUV_ROUND_TERM); 307 const __m256i K16_BU_RU = SIMD_MM256_SET2_EPI16(Base::BLUE_TO_U_WEIGHT, Base::RED_TO_U_WEIGHT); 308 const __m256i K16_GU_RT = SIMD_MM256_SET2_EPI16(Base::GREEN_TO_U_WEIGHT, Base::BGR_TO_YUV_ROUND_TERM); 309 const __m256i K16_BV_RV = SIMD_MM256_SET2_EPI16(Base::BLUE_TO_V_WEIGHT, Base::RED_TO_V_WEIGHT); 310 const __m256i K16_GV_RT = SIMD_MM256_SET2_EPI16(Base::GREEN_TO_V_WEIGHT, Base::BGR_TO_YUV_ROUND_TERM); 311 312 const __m256i K16_DIVISION_BY_9_FACTOR = SIMD_MM256_SET1_EPI16(Base::DIVISION_BY_9_FACTOR); 313 314 const __m256i K8_SHUFFLE_0 = SIMD_MM256_SETR_EPI8( 315 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 316 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0); 317 318 const __m256i K8_SHUFFLE_1 = SIMD_MM256_SETR_EPI8( 319 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 320 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70); 321 322 const __m256i K8_SHUFFLE_GRAY_TO_BGR0 = SIMD_MM256_SETR_EPI8( 323 0x0, 0x0, 0x0, 0x1, 0x1, 0x1, 0x2, 0x2, 0x2, 0x3, 0x3, 0x3, 0x4, 0x4, 0x4, 0x5, 324 0x5, 0x5, 0x6, 0x6, 0x6, 0x7, 0x7, 0x7, 0x8, 0x8, 0x8, 0x9, 0x9, 0x9, 0xA, 0xA); 325 const __m256i K8_SHUFFLE_GRAY_TO_BGR1 = SIMD_MM256_SETR_EPI8( 326 0x2, 0x3, 0x3, 0x3, 0x4, 0x4, 0x4, 0x5, 0x5, 0x5, 0x6, 0x6, 0x6, 0x7, 0x7, 0x7, 327 0x8, 0x8, 0x8, 0x9, 0x9, 0x9, 0xA, 0xA, 0xA, 0xB, 0xB, 0xB, 0xC, 0xC, 0xC, 0xD); 328 const __m256i K8_SHUFFLE_GRAY_TO_BGR2 = SIMD_MM256_SETR_EPI8( 329 0x5, 0x5, 0x6, 0x6, 0x6, 0x7, 0x7, 0x7, 0x8, 0x8, 0x8, 0x9, 0x9, 0x9, 0xA, 0xA, 330 0xA, 0xB, 0xB, 0xB, 0xC, 0xC, 0xC, 0xD, 0xD, 0xD, 0xE, 0xE, 0xE, 0xF, 0xF, 0xF); 331 332 const __m256i K8_SHUFFLE_PERMUTED_BLUE_TO_BGR0 = SIMD_MM256_SETR_EPI8( 333 0x0, -1, -1, 0x1, -1, -1, 0x2, -1, -1, 0x3, -1, -1, 0x4, -1, -1, 0x5, 334 -1, -1, 0x6, -1, -1, 0x7, -1, -1, 0x8, -1, -1, 0x9, -1, -1, 0xA, -1); 335 const __m256i K8_SHUFFLE_PERMUTED_BLUE_TO_BGR1 = SIMD_MM256_SETR_EPI8( 336 -1, 0x3, -1, -1, 0x4, -1, -1, 0x5, -1, -1, 0x6, -1, -1, 0x7, -1, -1, 337 0x8, -1, -1, 0x9, -1, -1, 0xA, -1, -1, 0xB, -1, -1, 0xC, -1, -1, 0xD); 338 const __m256i K8_SHUFFLE_PERMUTED_BLUE_TO_BGR2 = SIMD_MM256_SETR_EPI8( 339 -1, -1, 0x6, -1, -1, 0x7, -1, -1, 0x8, -1, -1, 0x9, -1, -1, 0xA, -1, 340 -1, 0xB, -1, -1, 0xC, -1, -1, 0xD, -1, -1, 0xE, -1, -1, 0xF, -1, -1); 341 342 const __m256i K8_SHUFFLE_PERMUTED_GREEN_TO_BGR0 = SIMD_MM256_SETR_EPI8( 343 -1, 0x0, -1, -1, 0x1, -1, -1, 0x2, -1, -1, 0x3, -1, -1, 0x4, -1, -1, 344 0x5, -1, -1, 0x6, -1, -1, 0x7, -1, -1, 0x8, -1, -1, 0x9, -1, -1, 0xA); 345 const __m256i K8_SHUFFLE_PERMUTED_GREEN_TO_BGR1 = SIMD_MM256_SETR_EPI8( 346 -1, -1, 0x3, -1, -1, 0x4, -1, -1, 0x5, -1, -1, 0x6, -1, -1, 0x7, -1, 347 -1, 0x8, -1, -1, 0x9, -1, -1, 0xA, -1, -1, 0xB, -1, -1, 0xC, -1, -1); 348 const __m256i K8_SHUFFLE_PERMUTED_GREEN_TO_BGR2 = SIMD_MM256_SETR_EPI8( 349 0x5, -1, -1, 0x6, -1, -1, 0x7, -1, -1, 0x8, -1, -1, 0x9, -1, -1, 0xA, 350 -1, -1, 0xB, -1, -1, 0xC, -1, -1, 0xD, -1, -1, 0xE, -1, -1, 0xF, -1); 351 352 const __m256i K8_SHUFFLE_PERMUTED_RED_TO_BGR0 = SIMD_MM256_SETR_EPI8( 353 -1, -1, 0x0, -1, -1, 0x1, -1, -1, 0x2, -1, -1, 0x3, -1, -1, 0x4, -1, 354 -1, 0x5, -1, -1, 0x6, -1, -1, 0x7, -1, -1, 0x8, -1, -1, 0x9, -1, -1); 355 const __m256i K8_SHUFFLE_PERMUTED_RED_TO_BGR1 = SIMD_MM256_SETR_EPI8( 356 0x2, -1, -1, 0x3, -1, -1, 0x4, -1, -1, 0x5, -1, -1, 0x6, -1, -1, 0x7, 357 -1, -1, 0x8, -1, -1, 0x9, -1, -1, 0xA, -1, -1, 0xB, -1, -1, 0xC, -1); 358 const __m256i K8_SHUFFLE_PERMUTED_RED_TO_BGR2 = SIMD_MM256_SETR_EPI8( 359 -1, 0x5, -1, -1, 0x6, -1, -1, 0x7, -1, -1, 0x8, -1, -1, 0x9, -1, -1, 360 0xA, -1, -1, 0xB, -1, -1, 0xC, -1, -1, 0xD, -1, -1, 0xE, -1, -1, 0xF); 361 362 const __m256i K8_SHUFFLE_BGR0_TO_BLUE = SIMD_MM256_SETR_EPI8( 363 0x0, 0x3, 0x6, 0x9, 0xC, 0xF, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 364 -1, -1, -1, -1, -1, -1, 0x2, 0x5, 0x8, 0xB, 0xE, -1, -1, -1, -1, -1); 365 const __m256i K8_SHUFFLE_BGR1_TO_BLUE = SIMD_MM256_SETR_EPI8( 366 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x1, 0x4, 0x7, 0xA, 0xD, 367 0x0, 0x3, 0x6, 0x9, 0xC, 0xF, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); 368 const __m256i K8_SHUFFLE_BGR2_TO_BLUE = SIMD_MM256_SETR_EPI8( 369 -1, -1, -1, -1, -1, -1, 0x2, 0x5, 0x8, 0xB, 0xE, -1, -1, -1, -1, -1, 370 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x1, 0x4, 0x7, 0xA, 0xD); 371 372 const __m256i K8_SHUFFLE_BGR0_TO_GREEN = SIMD_MM256_SETR_EPI8( 373 0x1, 0x4, 0x7, 0xA, 0xD, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 374 -1, -1, -1, -1, -1, 0x0, 0x3, 0x6, 0x9, 0xC, 0xF, -1, -1, -1, -1, -1); 375 const __m256i K8_SHUFFLE_BGR1_TO_GREEN = SIMD_MM256_SETR_EPI8( 376 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x2, 0x5, 0x8, 0xB, 0xE, 377 0x1, 0x4, 0x7, 0xA, 0xD, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); 378 const __m256i K8_SHUFFLE_BGR2_TO_GREEN = SIMD_MM256_SETR_EPI8( 379 -1, -1, -1, -1, -1, 0x0, 0x3, 0x6, 0x9, 0xC, 0xF, -1, -1, -1, -1, -1, 380 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x2, 0x5, 0x8, 0xB, 0xE); 381 382 const __m256i K8_SHUFFLE_BGR0_TO_RED = SIMD_MM256_SETR_EPI8( 383 0x2, 0x5, 0x8, 0xB, 0xE, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 384 -1, -1, -1, -1, -1, 0x1, 0x4, 0x7, 0xA, 0xD, -1, -1, -1, -1, -1, -1); 385 const __m256i K8_SHUFFLE_BGR1_TO_RED = SIMD_MM256_SETR_EPI8( 386 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x0, 0x3, 0x6, 0x9, 0xC, 0xF, 387 0x2, 0x5, 0x8, 0xB, 0xE, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); 388 const __m256i K8_SHUFFLE_BGR2_TO_RED = SIMD_MM256_SETR_EPI8( 389 -1, -1, -1, -1, -1, 0x1, 0x4, 0x7, 0xA, 0xD, -1, -1, -1, -1, -1, -1, 390 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x0, 0x3, 0x6, 0x9, 0xC, 0xF); 391 392 const __m256i K8_BGRA_TO_BGR_SHUFFLE = SIMD_MM256_SETR_EPI8( 393 0x0, 0x1, 0x2, -1, 0x3, 0x4, 0x5, -1, 0x6, 0x7, 0x8, -1, 0x9, 0xA, 0xB, -1, 394 0x4, 0x5, 0x6, -1, 0x7, 0x8, 0x9, -1, 0xA, 0xB, 0xC, -1, 0xD, 0xE, 0xF, -1); 395 396 const __m256i K8_BGRA_TO_RGB_SHUFFLE = SIMD_MM256_SETR_EPI8( 397 0x2, 0x1, 0x0, -1, 0x5, 0x4, 0x3, -1, 0x8, 0x7, 0x6, -1, 0xB, 0xA, 0x9, -1, 398 0x6, 0x5, 0x4, -1, 0x9, 0x8, 0x7, -1, 0xC, 0xB, 0xA, -1, 0xF, 0xE, 0xD, -1); 399 400 const __m256i K8_BGRA_TO_RGBA_SHUFFLE = SIMD_MM256_SETR_EPI8( 401 0x2, 0x1, 0x0, 0x3, 0x6, 0x5, 0x4, 0x7, 0xA, 0x9, 0x8, 0xB, 0xE, 0xD, 0xC, 0xF, 402 0x2, 0x1, 0x0, 0x3, 0x6, 0x5, 0x4, 0x7, 0xA, 0x9, 0x8, 0xB, 0xE, 0xD, 0xC, 0xF); 403 404 const __m256i K32_TWO_UNPACK_PERMUTE = SIMD_MM256_SETR_EPI32(0, 2, 4, 6, 1, 3, 5, 7); 405 } 406 #endif// SIMD_AVX2_ENABLE 407 408 #ifdef SIMD_NEON_ENABLE 409 namespace Neon 410 { 411 const size_t A = sizeof(uint8x16_t); 412 const size_t DA = 2 * A; 413 const size_t QA = 4 * A; 414 const size_t OA = 8 * A; 415 const size_t HA = A / 2; 416 417 const size_t F = sizeof(float32x4_t) / sizeof(float); 418 const size_t DF = 2 * F; 419 const size_t QF = 4 * F; 420 const size_t HF = F / 2; 421 422 const uint8x16_t K8_00 = SIMD_VEC_SET1_EPI8(0x00); 423 const uint8x16_t K8_01 = SIMD_VEC_SET1_EPI8(0x01); 424 const uint8x16_t K8_02 = SIMD_VEC_SET1_EPI8(0x02); 425 const uint8x16_t K8_03 = SIMD_VEC_SET1_EPI8(0x03); 426 const uint8x16_t K8_04 = SIMD_VEC_SET1_EPI8(0x04); 427 const uint8x16_t K8_07 = SIMD_VEC_SET1_EPI8(0x07); 428 const uint8x16_t K8_08 = SIMD_VEC_SET1_EPI8(0x08); 429 const uint8x16_t K8_10 = SIMD_VEC_SET1_EPI8(0x10); 430 const uint8x16_t K8_20 = SIMD_VEC_SET1_EPI8(0x20); 431 const uint8x16_t K8_40 = SIMD_VEC_SET1_EPI8(0x40); 432 const uint8x16_t K8_80 = SIMD_VEC_SET1_EPI8(0x80); 433 const uint8x16_t K8_FF = SIMD_VEC_SET1_EPI8(0xFF); 434 435 const uint16x8_t K16_0000 = SIMD_VEC_SET1_EPI16(0x0000); 436 const uint16x8_t K16_0001 = SIMD_VEC_SET1_EPI16(0x0001); 437 const uint16x8_t K16_0002 = SIMD_VEC_SET1_EPI16(0x0002); 438 const uint16x8_t K16_0003 = SIMD_VEC_SET1_EPI16(0x0003); 439 const uint16x8_t K16_0004 = SIMD_VEC_SET1_EPI16(0x0004); 440 const uint16x8_t K16_0005 = SIMD_VEC_SET1_EPI16(0x0005); 441 const uint16x8_t K16_0006 = SIMD_VEC_SET1_EPI16(0x0006); 442 const uint16x8_t K16_0008 = SIMD_VEC_SET1_EPI16(0x0008); 443 const uint16x8_t K16_0010 = SIMD_VEC_SET1_EPI16(0x0010); 444 const uint16x8_t K16_0020 = SIMD_VEC_SET1_EPI16(0x0020); 445 const uint16x8_t K16_0080 = SIMD_VEC_SET1_EPI16(0x0080); 446 const uint16x8_t K16_00FF = SIMD_VEC_SET1_EPI16(0x00FF); 447 const uint16x8_t K16_0101 = SIMD_VEC_SET1_EPI16(0x0101); 448 const uint16x8_t K16_0800 = SIMD_VEC_SET1_EPI16(0x0800); 449 const uint16x8_t K16_FF00 = SIMD_VEC_SET1_EPI16(0xFF00); 450 451 const uint32x4_t K32_00000000 = SIMD_VEC_SET1_EPI32(0x00000000); 452 const uint32x4_t K32_00000001 = SIMD_VEC_SET1_EPI32(0x00000001); 453 const uint32x4_t K32_00000002 = SIMD_VEC_SET1_EPI32(0x00000002); 454 const uint32x4_t K32_00000003 = SIMD_VEC_SET1_EPI32(0x00000003); 455 const uint32x4_t K32_00000004 = SIMD_VEC_SET1_EPI32(0x00000004); 456 const uint32x4_t K32_00000005 = SIMD_VEC_SET1_EPI32(0x00000005); 457 const uint32x4_t K32_00000008 = SIMD_VEC_SET1_EPI32(0x00000008); 458 const uint32x4_t K32_00000010 = SIMD_VEC_SET1_EPI32(0x00000010); 459 const uint32x4_t K32_000000FF = SIMD_VEC_SET1_EPI32(0x000000FF); 460 const uint32x4_t K32_0000FFFF = SIMD_VEC_SET1_EPI32(0x0000FFFF); 461 const uint32x4_t K32_00010000 = SIMD_VEC_SET1_EPI32(0x00010000); 462 const uint32x4_t K32_01000000 = SIMD_VEC_SET1_EPI32(0x01000000); 463 const uint32x4_t K32_08080800 = SIMD_VEC_SET1_EPI32(0x08080800); 464 const uint32x4_t K32_FFFFFF00 = SIMD_VEC_SET1_EPI32(0xFFFFFF00); 465 const uint32x4_t K32_FFFFFFFF = SIMD_VEC_SET1_EPI32(0xFFFFFFFF); 466 const uint32x4_t K32_0123 = SIMD_VEC_SETR_EPI32(0, 1, 2, 3); 467 468 const uint64x2_t K64_0000000000000000 = SIMD_VEC_SET1_EPI64(0x0000000000000000); 469 470 const uint16x4_t K16_BLUE_TO_GRAY_WEIGHT = SIMD_VEC_SET1_PI16(Base::BLUE_TO_GRAY_WEIGHT); 471 const uint16x4_t K16_GREEN_TO_GRAY_WEIGHT = SIMD_VEC_SET1_PI16(Base::GREEN_TO_GRAY_WEIGHT); 472 const uint16x4_t K16_RED_TO_GRAY_WEIGHT = SIMD_VEC_SET1_PI16(Base::RED_TO_GRAY_WEIGHT); 473 const uint32x4_t K32_BGR_TO_GRAY_ROUND_TERM = SIMD_VEC_SET1_EPI32(Base::BGR_TO_GRAY_ROUND_TERM); 474 475 const int16x8_t K16_Y_ADJUST = SIMD_VEC_SET1_EPI16(Base::Y_ADJUST); 476 const int16x8_t K16_UV_ADJUST = SIMD_VEC_SET1_EPI16(Base::UV_ADJUST); 477 478 const int16x4_t K16_BLUE_TO_Y_WEIGHT = SIMD_VEC_SET1_PI16(Base::BLUE_TO_Y_WEIGHT); 479 const int16x4_t K16_GREEN_TO_Y_WEIGHT = SIMD_VEC_SET1_PI16(Base::GREEN_TO_Y_WEIGHT); 480 const int16x4_t K16_RED_TO_Y_WEIGHT = SIMD_VEC_SET1_PI16(Base::RED_TO_Y_WEIGHT); 481 482 const int16x4_t K16_BLUE_TO_U_WEIGHT = SIMD_VEC_SET1_PI16(Base::BLUE_TO_U_WEIGHT); 483 const int16x4_t K16_GREEN_TO_U_WEIGHT = SIMD_VEC_SET1_PI16(Base::GREEN_TO_U_WEIGHT); 484 const int16x4_t K16_RED_TO_U_WEIGHT = SIMD_VEC_SET1_PI16(Base::RED_TO_U_WEIGHT); 485 486 const int16x4_t K16_BLUE_TO_V_WEIGHT = SIMD_VEC_SET1_PI16(Base::BLUE_TO_V_WEIGHT); 487 const int16x4_t K16_GREEN_TO_V_WEIGHT = SIMD_VEC_SET1_PI16(Base::GREEN_TO_V_WEIGHT); 488 const int16x4_t K16_RED_TO_V_WEIGHT = SIMD_VEC_SET1_PI16(Base::RED_TO_V_WEIGHT); 489 490 const int32x4_t K32_BGR_TO_YUV_ROUND_TERM = SIMD_VEC_SET1_EPI32(Base::BGR_TO_YUV_ROUND_TERM); 491 492 const int16x4_t K16_Y_TO_RGB_WEIGHT = SIMD_VEC_SET1_PI16(Base::Y_TO_RGB_WEIGHT); 493 494 const int16x4_t K16_U_TO_BLUE_WEIGHT = SIMD_VEC_SET1_PI16(Base::U_TO_BLUE_WEIGHT); 495 const int16x4_t K16_U_TO_GREEN_WEIGHT = SIMD_VEC_SET1_PI16(Base::U_TO_GREEN_WEIGHT); 496 497 const int16x4_t K16_V_TO_GREEN_WEIGHT = SIMD_VEC_SET1_PI16(Base::V_TO_GREEN_WEIGHT); 498 const int16x4_t K16_V_TO_RED_WEIGHT = SIMD_VEC_SET1_PI16(Base::V_TO_RED_WEIGHT); 499 500 const int32x4_t K32_YUV_TO_BGR_ROUND_TERM = SIMD_VEC_SET1_EPI32(Base::YUV_TO_BGR_ROUND_TERM); 501 } 502 #endif//SIMD_NEON_ENABLE 503 } 504 #endif//__SimdConst_h__ 505