1 /* 2 * Copyright 2012 The LibYuv Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "libyuv/row.h" 12 13 #include <string.h> // For memset. 14 15 #include "libyuv/basic_types.h" 16 17 #ifdef __cplusplus 18 namespace libyuv { 19 extern "C" { 20 #endif 21 22 // memset for temp is meant to clear the source buffer (not dest) so that 23 // SIMD that reads full multiple of 16 bytes will not trigger msan errors. 24 // memset is not needed for production, as the garbage values are processed but 25 // not used, although there may be edge cases for subsampling. 26 // The size of the buffer is based on the largest read, which can be inferred 27 // by the source type (e.g. ARGB) and the mask (last parameter), or by examining 28 // the source code for how much the source pointers are advanced. 29 30 // Subsampled source needs to be increase by 1 of not even. 31 #define SS(width, shift) (((width) + (1 << (shift)) - 1) >> (shift)) 32 33 // Any 4 planes to 1 with yuvconstants 34 #define ANY41C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ 35 void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \ 36 const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_ptr, \ 37 const struct YuvConstants* yuvconstants, int width) { \ 38 SIMD_ALIGNED(uint8_t temp[64 * 5]); \ 39 memset(temp, 0, 64 * 4); /* for msan */ \ 40 int r = width & MASK; \ 41 int n = width & ~MASK; \ 42 if (n > 0) { \ 43 ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \ 44 } \ 45 memcpy(temp, y_buf + n, r); \ 46 memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ 47 memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ 48 memcpy(temp + 192, a_buf + n, r); \ 49 ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256, \ 50 yuvconstants, MASK + 1); \ 51 memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256, \ 52 SS(r, DUVSHIFT) * BPP); \ 53 } 54 55 #ifdef HAS_I422ALPHATOARGBROW_SSSE3 56 ANY41C(I422AlphaToARGBRow_Any_SSSE3, I422AlphaToARGBRow_SSSE3, 1, 0, 4, 7) 57 #endif 58 #ifdef HAS_I422ALPHATOARGBROW_AVX2 59 ANY41C(I422AlphaToARGBRow_Any_AVX2, I422AlphaToARGBRow_AVX2, 1, 0, 4, 15) 60 #endif 61 #ifdef HAS_I422ALPHATOARGBROW_NEON 62 ANY41C(I422AlphaToARGBRow_Any_NEON, I422AlphaToARGBRow_NEON, 1, 0, 4, 7) 63 #endif 64 #ifdef HAS_I422ALPHATOARGBROW_MSA 65 ANY41C(I422AlphaToARGBRow_Any_MSA, I422AlphaToARGBRow_MSA, 1, 0, 4, 7) 66 #endif 67 #ifdef HAS_I422ALPHATOARGBROW_MMI 68 ANY41C(I422AlphaToARGBRow_Any_MMI, I422AlphaToARGBRow_MMI, 1, 0, 4, 7) 69 #endif 70 #undef ANY41C 71 72 // Any 3 planes to 1. 73 #define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ 74 void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \ 75 const uint8_t* v_buf, uint8_t* dst_ptr, int width) { \ 76 SIMD_ALIGNED(uint8_t temp[64 * 4]); \ 77 memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \ 78 int r = width & MASK; \ 79 int n = width & ~MASK; \ 80 if (n > 0) { \ 81 ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n); \ 82 } \ 83 memcpy(temp, y_buf + n, r); \ 84 memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ 85 memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ 86 ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, MASK + 1); \ 87 memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \ 88 SS(r, DUVSHIFT) * BPP); \ 89 } 90 91 // Merge functions. 92 #ifdef HAS_MERGERGBROW_SSSE3 93 ANY31(MergeRGBRow_Any_SSSE3, MergeRGBRow_SSSE3, 0, 0, 3, 15) 94 #endif 95 #ifdef HAS_MERGERGBROW_NEON 96 ANY31(MergeRGBRow_Any_NEON, MergeRGBRow_NEON, 0, 0, 3, 15) 97 #endif 98 #ifdef HAS_MERGERGBROW_MMI 99 ANY31(MergeRGBRow_Any_MMI, MergeRGBRow_MMI, 0, 0, 3, 7) 100 #endif 101 #ifdef HAS_I422TOYUY2ROW_SSE2 102 ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15) 103 ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15) 104 #endif 105 #ifdef HAS_I422TOYUY2ROW_AVX2 106 ANY31(I422ToYUY2Row_Any_AVX2, I422ToYUY2Row_AVX2, 1, 1, 4, 31) 107 ANY31(I422ToUYVYRow_Any_AVX2, I422ToUYVYRow_AVX2, 1, 1, 4, 31) 108 #endif 109 #ifdef HAS_I422TOYUY2ROW_NEON 110 ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15) 111 #endif 112 #ifdef HAS_I422TOYUY2ROW_MSA 113 ANY31(I422ToYUY2Row_Any_MSA, I422ToYUY2Row_MSA, 1, 1, 4, 31) 114 #endif 115 #ifdef HAS_I422TOYUY2ROW_MMI 116 ANY31(I422ToYUY2Row_Any_MMI, I422ToYUY2Row_MMI, 1, 1, 4, 7) 117 #endif 118 #ifdef HAS_I422TOUYVYROW_NEON 119 ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15) 120 #endif 121 #ifdef HAS_I422TOUYVYROW_MSA 122 ANY31(I422ToUYVYRow_Any_MSA, I422ToUYVYRow_MSA, 1, 1, 4, 31) 123 #endif 124 #ifdef HAS_I422TOUYVYROW_MMI 125 ANY31(I422ToUYVYRow_Any_MMI, I422ToUYVYRow_MMI, 1, 1, 4, 7) 126 #endif 127 #ifdef HAS_BLENDPLANEROW_AVX2 128 ANY31(BlendPlaneRow_Any_AVX2, BlendPlaneRow_AVX2, 0, 0, 1, 31) 129 #endif 130 #ifdef HAS_BLENDPLANEROW_SSSE3 131 ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7) 132 #endif 133 #ifdef HAS_BLENDPLANEROW_MMI 134 ANY31(BlendPlaneRow_Any_MMI, BlendPlaneRow_MMI, 0, 0, 1, 7) 135 #endif 136 #undef ANY31 137 138 // Note that odd width replication includes 444 due to implementation 139 // on arm that subsamples 444 to 422 internally. 140 // Any 3 planes to 1 with yuvconstants 141 #define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ 142 void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \ 143 const uint8_t* v_buf, uint8_t* dst_ptr, \ 144 const struct YuvConstants* yuvconstants, int width) { \ 145 SIMD_ALIGNED(uint8_t temp[128 * 4]); \ 146 memset(temp, 0, 128 * 3); /* for YUY2 and msan */ \ 147 int r = width & MASK; \ 148 int n = width & ~MASK; \ 149 if (n > 0) { \ 150 ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \ 151 } \ 152 memcpy(temp, y_buf + n, r); \ 153 memcpy(temp + 128, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ 154 memcpy(temp + 256, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ 155 if (width & 1) { \ 156 temp[128 + SS(r, UVSHIFT)] = temp[128 + SS(r, UVSHIFT) - 1]; \ 157 temp[256 + SS(r, UVSHIFT)] = temp[256 + SS(r, UVSHIFT) - 1]; \ 158 } \ 159 ANY_SIMD(temp, temp + 128, temp + 256, temp + 384, yuvconstants, \ 160 MASK + 1); \ 161 memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 384, \ 162 SS(r, DUVSHIFT) * BPP); \ 163 } 164 165 #ifdef HAS_I422TOARGBROW_SSSE3 166 ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7) 167 #endif 168 #ifdef HAS_I422TOAR30ROW_SSSE3 169 ANY31C(I422ToAR30Row_Any_SSSE3, I422ToAR30Row_SSSE3, 1, 0, 4, 7) 170 #endif 171 #ifdef HAS_I422TOAR30ROW_AVX2 172 ANY31C(I422ToAR30Row_Any_AVX2, I422ToAR30Row_AVX2, 1, 0, 4, 15) 173 #endif 174 #ifdef HAS_I444TOARGBROW_SSSE3 175 ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7) 176 ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7) 177 ANY31C(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7) 178 ANY31C(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7) 179 ANY31C(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7) 180 ANY31C(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 15) 181 #endif // HAS_I444TOARGBROW_SSSE3 182 #ifdef HAS_I422TORGB24ROW_AVX2 183 ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 31) 184 #endif 185 #ifdef HAS_I422TOARGBROW_AVX2 186 ANY31C(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15) 187 #endif 188 #ifdef HAS_I422TORGBAROW_AVX2 189 ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15) 190 #endif 191 #ifdef HAS_I444TOARGBROW_AVX2 192 ANY31C(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15) 193 #endif 194 #ifdef HAS_I422TOARGB4444ROW_AVX2 195 ANY31C(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 15) 196 #endif 197 #ifdef HAS_I422TOARGB1555ROW_AVX2 198 ANY31C(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 15) 199 #endif 200 #ifdef HAS_I422TORGB565ROW_AVX2 201 ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 15) 202 #endif 203 #ifdef HAS_I422TOARGBROW_NEON 204 ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7) 205 ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7) 206 ANY31C(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7) 207 ANY31C(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7) 208 ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7) 209 ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7) 210 ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7) 211 #endif 212 #ifdef HAS_I422TOARGBROW_MSA 213 ANY31C(I444ToARGBRow_Any_MSA, I444ToARGBRow_MSA, 0, 0, 4, 7) 214 ANY31C(I422ToARGBRow_Any_MSA, I422ToARGBRow_MSA, 1, 0, 4, 7) 215 ANY31C(I422ToRGBARow_Any_MSA, I422ToRGBARow_MSA, 1, 0, 4, 7) 216 ANY31C(I422ToRGB24Row_Any_MSA, I422ToRGB24Row_MSA, 1, 0, 3, 15) 217 ANY31C(I422ToARGB4444Row_Any_MSA, I422ToARGB4444Row_MSA, 1, 0, 2, 7) 218 ANY31C(I422ToARGB1555Row_Any_MSA, I422ToARGB1555Row_MSA, 1, 0, 2, 7) 219 ANY31C(I422ToRGB565Row_Any_MSA, I422ToRGB565Row_MSA, 1, 0, 2, 7) 220 #endif 221 #ifdef HAS_I422TOARGBROW_MMI 222 ANY31C(I444ToARGBRow_Any_MMI, I444ToARGBRow_MMI, 0, 0, 4, 7) 223 ANY31C(I422ToARGBRow_Any_MMI, I422ToARGBRow_MMI, 1, 0, 4, 7) 224 ANY31C(I422ToRGB24Row_Any_MMI, I422ToRGB24Row_MMI, 1, 0, 3, 15) 225 ANY31C(I422ToARGB4444Row_Any_MMI, I422ToARGB4444Row_MMI, 1, 0, 2, 7) 226 ANY31C(I422ToARGB1555Row_Any_MMI, I422ToARGB1555Row_MMI, 1, 0, 2, 7) 227 ANY31C(I422ToRGB565Row_Any_MMI, I422ToRGB565Row_MMI, 1, 0, 2, 7) 228 ANY31C(I422ToRGBARow_Any_MMI, I422ToRGBARow_MMI, 1, 0, 4, 7) 229 #endif 230 #undef ANY31C 231 232 // Any 3 planes of 16 bit to 1 with yuvconstants 233 // TODO(fbarchard): consider sharing this code with ANY31C 234 #define ANY31CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \ 235 void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, \ 236 uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, \ 237 int width) { \ 238 SIMD_ALIGNED(T temp[16 * 3]); \ 239 SIMD_ALIGNED(uint8_t out[64]); \ 240 memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */ \ 241 int r = width & MASK; \ 242 int n = width & ~MASK; \ 243 if (n > 0) { \ 244 ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \ 245 } \ 246 memcpy(temp, y_buf + n, r * SBPP); \ 247 memcpy(temp + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \ 248 memcpy(temp + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \ 249 ANY_SIMD(temp, temp + 16, temp + 32, out, yuvconstants, MASK + 1); \ 250 memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP); \ 251 } 252 253 #ifdef HAS_I210TOAR30ROW_SSSE3 254 ANY31CT(I210ToAR30Row_Any_SSSE3, I210ToAR30Row_SSSE3, 1, 0, uint16_t, 2, 4, 7) 255 #endif 256 #ifdef HAS_I210TOARGBROW_SSSE3 257 ANY31CT(I210ToARGBRow_Any_SSSE3, I210ToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7) 258 #endif 259 #ifdef HAS_I210TOARGBROW_AVX2 260 ANY31CT(I210ToARGBRow_Any_AVX2, I210ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15) 261 #endif 262 #ifdef HAS_I210TOAR30ROW_AVX2 263 ANY31CT(I210ToAR30Row_Any_AVX2, I210ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15) 264 #endif 265 #ifdef HAS_I210TOARGBROW_MMI 266 ANY31CT(I210ToARGBRow_Any_MMI, I210ToARGBRow_MMI, 1, 0, uint16_t, 2, 4, 7) 267 #endif 268 #undef ANY31CT 269 270 // Any 2 planes to 1. 271 #define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \ 272 void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \ 273 int width) { \ 274 SIMD_ALIGNED(uint8_t temp[64 * 3]); \ 275 memset(temp, 0, 64 * 2); /* for msan */ \ 276 int r = width & MASK; \ 277 int n = width & ~MASK; \ 278 if (n > 0) { \ 279 ANY_SIMD(y_buf, uv_buf, dst_ptr, n); \ 280 } \ 281 memcpy(temp, y_buf + n * SBPP, r * SBPP); \ 282 memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \ 283 SS(r, UVSHIFT) * SBPP2); \ 284 ANY_SIMD(temp, temp + 64, temp + 128, MASK + 1); \ 285 memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ 286 } 287 288 // Merge functions. 289 #ifdef HAS_MERGEUVROW_SSE2 290 ANY21(MergeUVRow_Any_SSE2, MergeUVRow_SSE2, 0, 1, 1, 2, 15) 291 #endif 292 #ifdef HAS_MERGEUVROW_AVX2 293 ANY21(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, 0, 1, 1, 2, 31) 294 #endif 295 #ifdef HAS_MERGEUVROW_NEON 296 ANY21(MergeUVRow_Any_NEON, MergeUVRow_NEON, 0, 1, 1, 2, 15) 297 #endif 298 #ifdef HAS_MERGEUVROW_MSA 299 ANY21(MergeUVRow_Any_MSA, MergeUVRow_MSA, 0, 1, 1, 2, 15) 300 #endif 301 #ifdef HAS_MERGEUVROW_MMI 302 ANY21(MergeUVRow_Any_MMI, MergeUVRow_MMI, 0, 1, 1, 2, 7) 303 #endif 304 #ifdef HAS_NV21TOYUV24ROW_NEON 305 ANY21(NV21ToYUV24Row_Any_NEON, NV21ToYUV24Row_NEON, 1, 1, 2, 3, 15) 306 #endif 307 #ifdef HAS_NV21TOYUV24ROW_AVX2 308 ANY21(NV21ToYUV24Row_Any_AVX2, NV21ToYUV24Row_AVX2, 1, 1, 2, 3, 31) 309 #endif 310 // Math functions. 311 #ifdef HAS_ARGBMULTIPLYROW_SSE2 312 ANY21(ARGBMultiplyRow_Any_SSE2, ARGBMultiplyRow_SSE2, 0, 4, 4, 4, 3) 313 #endif 314 #ifdef HAS_ARGBADDROW_SSE2 315 ANY21(ARGBAddRow_Any_SSE2, ARGBAddRow_SSE2, 0, 4, 4, 4, 3) 316 #endif 317 #ifdef HAS_ARGBSUBTRACTROW_SSE2 318 ANY21(ARGBSubtractRow_Any_SSE2, ARGBSubtractRow_SSE2, 0, 4, 4, 4, 3) 319 #endif 320 #ifdef HAS_ARGBMULTIPLYROW_AVX2 321 ANY21(ARGBMultiplyRow_Any_AVX2, ARGBMultiplyRow_AVX2, 0, 4, 4, 4, 7) 322 #endif 323 #ifdef HAS_ARGBADDROW_AVX2 324 ANY21(ARGBAddRow_Any_AVX2, ARGBAddRow_AVX2, 0, 4, 4, 4, 7) 325 #endif 326 #ifdef HAS_ARGBSUBTRACTROW_AVX2 327 ANY21(ARGBSubtractRow_Any_AVX2, ARGBSubtractRow_AVX2, 0, 4, 4, 4, 7) 328 #endif 329 #ifdef HAS_ARGBMULTIPLYROW_NEON 330 ANY21(ARGBMultiplyRow_Any_NEON, ARGBMultiplyRow_NEON, 0, 4, 4, 4, 7) 331 #endif 332 #ifdef HAS_ARGBADDROW_NEON 333 ANY21(ARGBAddRow_Any_NEON, ARGBAddRow_NEON, 0, 4, 4, 4, 7) 334 #endif 335 #ifdef HAS_ARGBSUBTRACTROW_NEON 336 ANY21(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, 0, 4, 4, 4, 7) 337 #endif 338 #ifdef HAS_ARGBMULTIPLYROW_MSA 339 ANY21(ARGBMultiplyRow_Any_MSA, ARGBMultiplyRow_MSA, 0, 4, 4, 4, 3) 340 #endif 341 #ifdef HAS_ARGBMULTIPLYROW_MMI 342 ANY21(ARGBMultiplyRow_Any_MMI, ARGBMultiplyRow_MMI, 0, 4, 4, 4, 1) 343 #endif 344 #ifdef HAS_ARGBADDROW_MSA 345 ANY21(ARGBAddRow_Any_MSA, ARGBAddRow_MSA, 0, 4, 4, 4, 7) 346 #endif 347 #ifdef HAS_ARGBADDROW_MMI 348 ANY21(ARGBAddRow_Any_MMI, ARGBAddRow_MMI, 0, 4, 4, 4, 1) 349 #endif 350 #ifdef HAS_ARGBSUBTRACTROW_MSA 351 ANY21(ARGBSubtractRow_Any_MSA, ARGBSubtractRow_MSA, 0, 4, 4, 4, 7) 352 #endif 353 #ifdef HAS_ARGBSUBTRACTROW_MMI 354 ANY21(ARGBSubtractRow_Any_MMI, ARGBSubtractRow_MMI, 0, 4, 4, 4, 1) 355 #endif 356 #ifdef HAS_SOBELROW_SSE2 357 ANY21(SobelRow_Any_SSE2, SobelRow_SSE2, 0, 1, 1, 4, 15) 358 #endif 359 #ifdef HAS_SOBELROW_NEON 360 ANY21(SobelRow_Any_NEON, SobelRow_NEON, 0, 1, 1, 4, 7) 361 #endif 362 #ifdef HAS_SOBELROW_MSA 363 ANY21(SobelRow_Any_MSA, SobelRow_MSA, 0, 1, 1, 4, 15) 364 #endif 365 #ifdef HAS_SOBELROW_MMI 366 ANY21(SobelRow_Any_MMI, SobelRow_MMI, 0, 1, 1, 4, 7) 367 #endif 368 #ifdef HAS_SOBELTOPLANEROW_SSE2 369 ANY21(SobelToPlaneRow_Any_SSE2, SobelToPlaneRow_SSE2, 0, 1, 1, 1, 15) 370 #endif 371 #ifdef HAS_SOBELTOPLANEROW_NEON 372 ANY21(SobelToPlaneRow_Any_NEON, SobelToPlaneRow_NEON, 0, 1, 1, 1, 15) 373 #endif 374 #ifdef HAS_SOBELTOPLANEROW_MSA 375 ANY21(SobelToPlaneRow_Any_MSA, SobelToPlaneRow_MSA, 0, 1, 1, 1, 31) 376 #endif 377 #ifdef HAS_SOBELTOPLANEROW_MMI 378 ANY21(SobelToPlaneRow_Any_MMI, SobelToPlaneRow_MMI, 0, 1, 1, 1, 7) 379 #endif 380 #ifdef HAS_SOBELXYROW_SSE2 381 ANY21(SobelXYRow_Any_SSE2, SobelXYRow_SSE2, 0, 1, 1, 4, 15) 382 #endif 383 #ifdef HAS_SOBELXYROW_NEON 384 ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7) 385 #endif 386 #ifdef HAS_SOBELXYROW_MSA 387 ANY21(SobelXYRow_Any_MSA, SobelXYRow_MSA, 0, 1, 1, 4, 15) 388 #endif 389 #ifdef HAS_SOBELXYROW_MMI 390 ANY21(SobelXYRow_Any_MMI, SobelXYRow_MMI, 0, 1, 1, 4, 7) 391 #endif 392 #undef ANY21 393 394 // Any 2 planes to 1 with yuvconstants 395 #define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \ 396 void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \ 397 const struct YuvConstants* yuvconstants, int width) { \ 398 SIMD_ALIGNED(uint8_t temp[128 * 3]); \ 399 memset(temp, 0, 128 * 2); /* for msan */ \ 400 int r = width & MASK; \ 401 int n = width & ~MASK; \ 402 if (n > 0) { \ 403 ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \ 404 } \ 405 memcpy(temp, y_buf + n * SBPP, r * SBPP); \ 406 memcpy(temp + 128, uv_buf + (n >> UVSHIFT) * SBPP2, \ 407 SS(r, UVSHIFT) * SBPP2); \ 408 ANY_SIMD(temp, temp + 128, temp + 256, yuvconstants, MASK + 1); \ 409 memcpy(dst_ptr + n * BPP, temp + 256, r * BPP); \ 410 } 411 412 // Biplanar to RGB. 413 #ifdef HAS_NV12TOARGBROW_SSSE3 414 ANY21C(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7) 415 #endif 416 #ifdef HAS_NV12TOARGBROW_AVX2 417 ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15) 418 #endif 419 #ifdef HAS_NV12TOARGBROW_NEON 420 ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7) 421 #endif 422 #ifdef HAS_NV12TOARGBROW_MSA 423 ANY21C(NV12ToARGBRow_Any_MSA, NV12ToARGBRow_MSA, 1, 1, 2, 4, 7) 424 #endif 425 #ifdef HAS_NV12TOARGBROW_MMI 426 ANY21C(NV12ToARGBRow_Any_MMI, NV12ToARGBRow_MMI, 1, 1, 2, 4, 7) 427 #endif 428 #ifdef HAS_NV21TOARGBROW_SSSE3 429 ANY21C(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7) 430 #endif 431 #ifdef HAS_NV21TOARGBROW_AVX2 432 ANY21C(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, 1, 1, 2, 4, 15) 433 #endif 434 #ifdef HAS_NV21TOARGBROW_NEON 435 ANY21C(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7) 436 #endif 437 #ifdef HAS_NV21TOARGBROW_MSA 438 ANY21C(NV21ToARGBRow_Any_MSA, NV21ToARGBRow_MSA, 1, 1, 2, 4, 7) 439 #endif 440 #ifdef HAS_NV21TOARGBROW_MMI 441 ANY21C(NV21ToARGBRow_Any_MMI, NV21ToARGBRow_MMI, 1, 1, 2, 4, 7) 442 #endif 443 #ifdef HAS_NV12TORGB24ROW_NEON 444 ANY21C(NV12ToRGB24Row_Any_NEON, NV12ToRGB24Row_NEON, 1, 1, 2, 3, 7) 445 #endif 446 #ifdef HAS_NV21TORGB24ROW_NEON 447 ANY21C(NV21ToRGB24Row_Any_NEON, NV21ToRGB24Row_NEON, 1, 1, 2, 3, 7) 448 #endif 449 #ifdef HAS_NV12TORGB24ROW_SSSE3 450 ANY21C(NV12ToRGB24Row_Any_SSSE3, NV12ToRGB24Row_SSSE3, 1, 1, 2, 3, 15) 451 #endif 452 #ifdef HAS_NV12TORGB24ROW_MMI 453 ANY21C(NV12ToRGB24Row_Any_MMI, NV12ToRGB24Row_MMI, 1, 1, 2, 3, 7) 454 #endif 455 #ifdef HAS_NV21TORGB24ROW_SSSE3 456 ANY21C(NV21ToRGB24Row_Any_SSSE3, NV21ToRGB24Row_SSSE3, 1, 1, 2, 3, 15) 457 #endif 458 #ifdef HAS_NV12TORGB24ROW_AVX2 459 ANY21C(NV12ToRGB24Row_Any_AVX2, NV12ToRGB24Row_AVX2, 1, 1, 2, 3, 31) 460 #endif 461 #ifdef HAS_NV21TORGB24ROW_AVX2 462 ANY21C(NV21ToRGB24Row_Any_AVX2, NV21ToRGB24Row_AVX2, 1, 1, 2, 3, 31) 463 #endif 464 #ifdef HAS_NV21TORGB24ROW_MMI 465 ANY21C(NV21ToRGB24Row_Any_MMI, NV21ToRGB24Row_MMI, 1, 1, 2, 3, 7) 466 #endif 467 #ifdef HAS_NV12TORGB565ROW_SSSE3 468 ANY21C(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7) 469 #endif 470 #ifdef HAS_NV12TORGB565ROW_AVX2 471 ANY21C(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15) 472 #endif 473 #ifdef HAS_NV12TORGB565ROW_NEON 474 ANY21C(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7) 475 #endif 476 #ifdef HAS_NV12TORGB565ROW_MSA 477 ANY21C(NV12ToRGB565Row_Any_MSA, NV12ToRGB565Row_MSA, 1, 1, 2, 2, 7) 478 #endif 479 #ifdef HAS_NV12TORGB565ROW_MMI 480 ANY21C(NV12ToRGB565Row_Any_MMI, NV12ToRGB565Row_MMI, 1, 1, 2, 2, 7) 481 #endif 482 #undef ANY21C 483 484 // Any 1 to 1. 485 #define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ 486 void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \ 487 SIMD_ALIGNED(uint8_t temp[128 * 2]); \ 488 memset(temp, 0, 128); /* for YUY2 and msan */ \ 489 int r = width & MASK; \ 490 int n = width & ~MASK; \ 491 if (n > 0) { \ 492 ANY_SIMD(src_ptr, dst_ptr, n); \ 493 } \ 494 memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ 495 ANY_SIMD(temp, temp + 128, MASK + 1); \ 496 memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ 497 } 498 499 #ifdef HAS_COPYROW_AVX 500 ANY11(CopyRow_Any_AVX, CopyRow_AVX, 0, 1, 1, 63) 501 #endif 502 #ifdef HAS_COPYROW_SSE2 503 ANY11(CopyRow_Any_SSE2, CopyRow_SSE2, 0, 1, 1, 31) 504 #endif 505 #ifdef HAS_COPYROW_NEON 506 ANY11(CopyRow_Any_NEON, CopyRow_NEON, 0, 1, 1, 31) 507 #endif 508 #if defined(HAS_ARGBTORGB24ROW_SSSE3) 509 ANY11(ARGBToRGB24Row_Any_SSSE3, ARGBToRGB24Row_SSSE3, 0, 4, 3, 15) 510 ANY11(ARGBToRAWRow_Any_SSSE3, ARGBToRAWRow_SSSE3, 0, 4, 3, 15) 511 ANY11(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, 0, 4, 2, 3) 512 ANY11(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, 0, 4, 2, 3) 513 ANY11(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, 0, 4, 2, 3) 514 #endif 515 #if defined(HAS_ARGBTORGB24ROW_AVX2) 516 ANY11(ARGBToRGB24Row_Any_AVX2, ARGBToRGB24Row_AVX2, 0, 4, 3, 31) 517 #endif 518 #if defined(HAS_ARGBTORGB24ROW_AVX512VBMI) 519 ANY11(ARGBToRGB24Row_Any_AVX512VBMI, ARGBToRGB24Row_AVX512VBMI, 0, 4, 3, 31) 520 #endif 521 #if defined(HAS_ARGBTORAWROW_AVX2) 522 ANY11(ARGBToRAWRow_Any_AVX2, ARGBToRAWRow_AVX2, 0, 4, 3, 31) 523 #endif 524 #if defined(HAS_ARGBTORGB565ROW_AVX2) 525 ANY11(ARGBToRGB565Row_Any_AVX2, ARGBToRGB565Row_AVX2, 0, 4, 2, 7) 526 #endif 527 #if defined(HAS_ARGBTOARGB4444ROW_AVX2) 528 ANY11(ARGBToARGB1555Row_Any_AVX2, ARGBToARGB1555Row_AVX2, 0, 4, 2, 7) 529 ANY11(ARGBToARGB4444Row_Any_AVX2, ARGBToARGB4444Row_AVX2, 0, 4, 2, 7) 530 #endif 531 #if defined(HAS_ABGRTOAR30ROW_SSSE3) 532 ANY11(ABGRToAR30Row_Any_SSSE3, ABGRToAR30Row_SSSE3, 0, 4, 4, 3) 533 #endif 534 #if defined(HAS_ARGBTOAR30ROW_SSSE3) 535 ANY11(ARGBToAR30Row_Any_SSSE3, ARGBToAR30Row_SSSE3, 0, 4, 4, 3) 536 #endif 537 #if defined(HAS_ABGRTOAR30ROW_AVX2) 538 ANY11(ABGRToAR30Row_Any_AVX2, ABGRToAR30Row_AVX2, 0, 4, 4, 7) 539 #endif 540 #if defined(HAS_ARGBTOAR30ROW_AVX2) 541 ANY11(ARGBToAR30Row_Any_AVX2, ARGBToAR30Row_AVX2, 0, 4, 4, 7) 542 #endif 543 #if defined(HAS_J400TOARGBROW_SSE2) 544 ANY11(J400ToARGBRow_Any_SSE2, J400ToARGBRow_SSE2, 0, 1, 4, 7) 545 #endif 546 #if defined(HAS_J400TOARGBROW_AVX2) 547 ANY11(J400ToARGBRow_Any_AVX2, J400ToARGBRow_AVX2, 0, 1, 4, 15) 548 #endif 549 #if defined(HAS_RGB24TOARGBROW_SSSE3) 550 ANY11(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 0, 3, 4, 15) 551 ANY11(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 0, 3, 4, 15) 552 ANY11(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 0, 2, 4, 7) 553 ANY11(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, 0, 2, 4, 7) 554 ANY11(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, 0, 2, 4, 7) 555 #endif 556 #if defined(HAS_RAWTORGBAROW_SSSE3) 557 ANY11(RAWToRGBARow_Any_SSSE3, RAWToRGBARow_SSSE3, 0, 3, 4, 15) 558 #endif 559 #if defined(HAS_RAWTORGB24ROW_SSSE3) 560 ANY11(RAWToRGB24Row_Any_SSSE3, RAWToRGB24Row_SSSE3, 0, 3, 3, 7) 561 #endif 562 #if defined(HAS_RGB565TOARGBROW_AVX2) 563 ANY11(RGB565ToARGBRow_Any_AVX2, RGB565ToARGBRow_AVX2, 0, 2, 4, 15) 564 #endif 565 #if defined(HAS_ARGB1555TOARGBROW_AVX2) 566 ANY11(ARGB1555ToARGBRow_Any_AVX2, ARGB1555ToARGBRow_AVX2, 0, 2, 4, 15) 567 #endif 568 #if defined(HAS_ARGB4444TOARGBROW_AVX2) 569 ANY11(ARGB4444ToARGBRow_Any_AVX2, ARGB4444ToARGBRow_AVX2, 0, 2, 4, 15) 570 #endif 571 #if defined(HAS_ARGBTORGB24ROW_NEON) 572 ANY11(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, 0, 4, 3, 7) 573 ANY11(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, 0, 4, 3, 7) 574 ANY11(ARGBToRGB565Row_Any_NEON, ARGBToRGB565Row_NEON, 0, 4, 2, 7) 575 ANY11(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, 0, 4, 2, 7) 576 ANY11(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, 0, 4, 2, 7) 577 ANY11(J400ToARGBRow_Any_NEON, J400ToARGBRow_NEON, 0, 1, 4, 7) 578 #endif 579 #if defined(HAS_ARGBTORGB24ROW_MSA) 580 ANY11(ARGBToRGB24Row_Any_MSA, ARGBToRGB24Row_MSA, 0, 4, 3, 15) 581 ANY11(ARGBToRAWRow_Any_MSA, ARGBToRAWRow_MSA, 0, 4, 3, 15) 582 ANY11(ARGBToRGB565Row_Any_MSA, ARGBToRGB565Row_MSA, 0, 4, 2, 7) 583 ANY11(ARGBToARGB1555Row_Any_MSA, ARGBToARGB1555Row_MSA, 0, 4, 2, 7) 584 ANY11(ARGBToARGB4444Row_Any_MSA, ARGBToARGB4444Row_MSA, 0, 4, 2, 7) 585 ANY11(J400ToARGBRow_Any_MSA, J400ToARGBRow_MSA, 0, 1, 4, 15) 586 #endif 587 #if defined(HAS_ARGBTORGB24ROW_MMI) 588 ANY11(ARGBToRGB24Row_Any_MMI, ARGBToRGB24Row_MMI, 0, 4, 3, 3) 589 ANY11(ARGBToRAWRow_Any_MMI, ARGBToRAWRow_MMI, 0, 4, 3, 3) 590 ANY11(ARGBToRGB565Row_Any_MMI, ARGBToRGB565Row_MMI, 0, 4, 2, 3) 591 ANY11(ARGBToARGB1555Row_Any_MMI, ARGBToARGB1555Row_MMI, 0, 4, 2, 3) 592 ANY11(ARGBToARGB4444Row_Any_MMI, ARGBToARGB4444Row_MMI, 0, 4, 2, 3) 593 ANY11(J400ToARGBRow_Any_MMI, J400ToARGBRow_MMI, 0, 1, 4, 3) 594 #endif 595 #if defined(HAS_RAWTORGB24ROW_NEON) 596 ANY11(RAWToRGB24Row_Any_NEON, RAWToRGB24Row_NEON, 0, 3, 3, 7) 597 #endif 598 #if defined(HAS_RAWTORGB24ROW_MSA) 599 ANY11(RAWToRGB24Row_Any_MSA, RAWToRGB24Row_MSA, 0, 3, 3, 15) 600 #endif 601 #if defined(HAS_RAWTORGB24ROW_MMI) 602 ANY11(RAWToRGB24Row_Any_MMI, RAWToRGB24Row_MMI, 0, 3, 3, 3) 603 #endif 604 #ifdef HAS_ARGBTOYROW_AVX2 605 ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31) 606 #endif 607 #ifdef HAS_ABGRTOYROW_AVX2 608 ANY11(ABGRToYRow_Any_AVX2, ABGRToYRow_AVX2, 0, 4, 1, 31) 609 #endif 610 #ifdef HAS_ARGBTOYJROW_AVX2 611 ANY11(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, 0, 4, 1, 31) 612 #endif 613 #ifdef HAS_RGBATOYJROW_AVX2 614 ANY11(RGBAToYJRow_Any_AVX2, RGBAToYJRow_AVX2, 0, 4, 1, 31) 615 #endif 616 #ifdef HAS_UYVYTOYROW_AVX2 617 ANY11(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, 0, 2, 1, 31) 618 #endif 619 #ifdef HAS_YUY2TOYROW_AVX2 620 ANY11(YUY2ToYRow_Any_AVX2, YUY2ToYRow_AVX2, 1, 4, 1, 31) 621 #endif 622 #ifdef HAS_ARGBTOYROW_SSSE3 623 ANY11(ARGBToYRow_Any_SSSE3, ARGBToYRow_SSSE3, 0, 4, 1, 15) 624 #endif 625 #ifdef HAS_BGRATOYROW_SSSE3 626 ANY11(BGRAToYRow_Any_SSSE3, BGRAToYRow_SSSE3, 0, 4, 1, 15) 627 ANY11(ABGRToYRow_Any_SSSE3, ABGRToYRow_SSSE3, 0, 4, 1, 15) 628 ANY11(RGBAToYRow_Any_SSSE3, RGBAToYRow_SSSE3, 0, 4, 1, 15) 629 ANY11(YUY2ToYRow_Any_SSE2, YUY2ToYRow_SSE2, 1, 4, 1, 15) 630 ANY11(UYVYToYRow_Any_SSE2, UYVYToYRow_SSE2, 1, 4, 1, 15) 631 #endif 632 #ifdef HAS_ARGBTOYJROW_SSSE3 633 ANY11(ARGBToYJRow_Any_SSSE3, ARGBToYJRow_SSSE3, 0, 4, 1, 15) 634 #endif 635 #ifdef HAS_RGBATOYJROW_SSSE3 636 ANY11(RGBAToYJRow_Any_SSSE3, RGBAToYJRow_SSSE3, 0, 4, 1, 15) 637 #endif 638 #ifdef HAS_ARGBTOYROW_NEON 639 ANY11(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 0, 4, 1, 7) 640 #endif 641 #ifdef HAS_ARGBTOYROW_MSA 642 ANY11(ARGBToYRow_Any_MSA, ARGBToYRow_MSA, 0, 4, 1, 15) 643 #endif 644 #ifdef HAS_ARGBTOYROW_MMI 645 ANY11(ARGBToYRow_Any_MMI, ARGBToYRow_MMI, 0, 4, 1, 7) 646 #endif 647 #ifdef HAS_ARGBTOYJROW_NEON 648 ANY11(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, 0, 4, 1, 7) 649 #endif 650 #ifdef HAS_RGBATOYJROW_NEON 651 ANY11(RGBAToYJRow_Any_NEON, RGBAToYJRow_NEON, 0, 4, 1, 7) 652 #endif 653 #ifdef HAS_ARGBTOYJROW_MSA 654 ANY11(ARGBToYJRow_Any_MSA, ARGBToYJRow_MSA, 0, 4, 1, 15) 655 #endif 656 #ifdef HAS_ARGBTOYJROW_MMI 657 ANY11(ARGBToYJRow_Any_MMI, ARGBToYJRow_MMI, 0, 4, 1, 7) 658 #endif 659 #ifdef HAS_BGRATOYROW_NEON 660 ANY11(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 0, 4, 1, 7) 661 #endif 662 #ifdef HAS_BGRATOYROW_MSA 663 ANY11(BGRAToYRow_Any_MSA, BGRAToYRow_MSA, 0, 4, 1, 15) 664 #endif 665 #ifdef HAS_BGRATOYROW_MMI 666 ANY11(BGRAToYRow_Any_MMI, BGRAToYRow_MMI, 0, 4, 1, 7) 667 #endif 668 #ifdef HAS_ABGRTOYROW_NEON 669 ANY11(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 0, 4, 1, 7) 670 #endif 671 #ifdef HAS_ABGRTOYROW_MSA 672 ANY11(ABGRToYRow_Any_MSA, ABGRToYRow_MSA, 0, 4, 1, 7) 673 #endif 674 #ifdef HAS_ABGRTOYROW_MMI 675 ANY11(ABGRToYRow_Any_MMI, ABGRToYRow_MMI, 0, 4, 1, 7) 676 #endif 677 #ifdef HAS_RGBATOYROW_NEON 678 ANY11(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 0, 4, 1, 7) 679 #endif 680 #ifdef HAS_RGBATOYROW_MSA 681 ANY11(RGBAToYRow_Any_MSA, RGBAToYRow_MSA, 0, 4, 1, 15) 682 #endif 683 #ifdef HAS_RGBATOYROW_MMI 684 ANY11(RGBAToYRow_Any_MMI, RGBAToYRow_MMI, 0, 4, 1, 7) 685 #endif 686 #ifdef HAS_RGB24TOYROW_NEON 687 ANY11(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 0, 3, 1, 7) 688 #endif 689 #ifdef HAS_RGB24TOYJROW_AVX2 690 ANY11(RGB24ToYJRow_Any_AVX2, RGB24ToYJRow_AVX2, 0, 3, 1, 31) 691 #endif 692 #ifdef HAS_RGB24TOYJROW_SSSE3 693 ANY11(RGB24ToYJRow_Any_SSSE3, RGB24ToYJRow_SSSE3, 0, 3, 1, 15) 694 #endif 695 #ifdef HAS_RGB24TOYJROW_NEON 696 ANY11(RGB24ToYJRow_Any_NEON, RGB24ToYJRow_NEON, 0, 3, 1, 7) 697 #endif 698 #ifdef HAS_RGB24TOYROW_MSA 699 ANY11(RGB24ToYRow_Any_MSA, RGB24ToYRow_MSA, 0, 3, 1, 15) 700 #endif 701 #ifdef HAS_RGB24TOYROW_MMI 702 ANY11(RGB24ToYRow_Any_MMI, RGB24ToYRow_MMI, 0, 3, 1, 7) 703 #endif 704 #ifdef HAS_RAWTOYROW_NEON 705 ANY11(RAWToYRow_Any_NEON, RAWToYRow_NEON, 0, 3, 1, 7) 706 #endif 707 #ifdef HAS_RAWTOYJROW_AVX2 708 ANY11(RAWToYJRow_Any_AVX2, RAWToYJRow_AVX2, 0, 3, 1, 31) 709 #endif 710 #ifdef HAS_RAWTOYJROW_SSSE3 711 ANY11(RAWToYJRow_Any_SSSE3, RAWToYJRow_SSSE3, 0, 3, 1, 15) 712 #endif 713 #ifdef HAS_RAWTOYJROW_NEON 714 ANY11(RAWToYJRow_Any_NEON, RAWToYJRow_NEON, 0, 3, 1, 7) 715 #endif 716 #ifdef HAS_RAWTOYROW_MSA 717 ANY11(RAWToYRow_Any_MSA, RAWToYRow_MSA, 0, 3, 1, 15) 718 #endif 719 #ifdef HAS_RAWTOYROW_MMI 720 ANY11(RAWToYRow_Any_MMI, RAWToYRow_MMI, 0, 3, 1, 7) 721 #endif 722 #ifdef HAS_RGB565TOYROW_NEON 723 ANY11(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 0, 2, 1, 7) 724 #endif 725 #ifdef HAS_RGB565TOYROW_MSA 726 ANY11(RGB565ToYRow_Any_MSA, RGB565ToYRow_MSA, 0, 2, 1, 15) 727 #endif 728 #ifdef HAS_RGB565TOYROW_MMI 729 ANY11(RGB565ToYRow_Any_MMI, RGB565ToYRow_MMI, 0, 2, 1, 7) 730 #endif 731 #ifdef HAS_ARGB1555TOYROW_NEON 732 ANY11(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 0, 2, 1, 7) 733 #endif 734 #ifdef HAS_ARGB1555TOYROW_MSA 735 ANY11(ARGB1555ToYRow_Any_MSA, ARGB1555ToYRow_MSA, 0, 2, 1, 15) 736 #endif 737 #ifdef HAS_ARGB1555TOYROW_MMI 738 ANY11(ARGB1555ToYRow_Any_MMI, ARGB1555ToYRow_MMI, 0, 2, 1, 7) 739 #endif 740 #ifdef HAS_ARGB4444TOYROW_NEON 741 ANY11(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 0, 2, 1, 7) 742 #endif 743 #ifdef HAS_ARGB4444TOYROW_MMI 744 ANY11(ARGB4444ToYRow_Any_MMI, ARGB4444ToYRow_MMI, 0, 2, 1, 7) 745 #endif 746 #ifdef HAS_YUY2TOYROW_NEON 747 ANY11(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 1, 4, 1, 15) 748 #endif 749 #ifdef HAS_UYVYTOYROW_NEON 750 ANY11(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 1, 4, 1, 15) 751 #endif 752 #ifdef HAS_YUY2TOYROW_MSA 753 ANY11(YUY2ToYRow_Any_MSA, YUY2ToYRow_MSA, 1, 4, 1, 31) 754 #endif 755 #ifdef HAS_YUY2TOYROW_MMI 756 ANY11(YUY2ToYRow_Any_MMI, YUY2ToYRow_MMI, 1, 4, 1, 7) 757 #endif 758 #ifdef HAS_UYVYTOYROW_MSA 759 ANY11(UYVYToYRow_Any_MSA, UYVYToYRow_MSA, 1, 4, 1, 31) 760 #endif 761 #ifdef HAS_UYVYTOYROW_MMI 762 ANY11(UYVYToYRow_Any_MMI, UYVYToYRow_MMI, 1, 4, 1, 15) 763 #endif 764 #ifdef HAS_AYUVTOYROW_NEON 765 ANY11(AYUVToYRow_Any_NEON, AYUVToYRow_NEON, 0, 4, 1, 15) 766 #endif 767 #ifdef HAS_SWAPUVROW_SSSE3 768 ANY11(SwapUVRow_Any_SSSE3, SwapUVRow_SSSE3, 0, 2, 2, 15) 769 #endif 770 #ifdef HAS_SWAPUVROW_AVX2 771 ANY11(SwapUVRow_Any_AVX2, SwapUVRow_AVX2, 0, 2, 2, 31) 772 #endif 773 #ifdef HAS_SWAPUVROW_NEON 774 ANY11(SwapUVRow_Any_NEON, SwapUVRow_NEON, 0, 2, 2, 15) 775 #endif 776 #ifdef HAS_RGB24TOARGBROW_NEON 777 ANY11(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 0, 3, 4, 7) 778 #endif 779 #ifdef HAS_RGB24TOARGBROW_MSA 780 ANY11(RGB24ToARGBRow_Any_MSA, RGB24ToARGBRow_MSA, 0, 3, 4, 15) 781 #endif 782 #ifdef HAS_RGB24TOARGBROW_MMI 783 ANY11(RGB24ToARGBRow_Any_MMI, RGB24ToARGBRow_MMI, 0, 3, 4, 3) 784 #endif 785 #ifdef HAS_RAWTOARGBROW_NEON 786 ANY11(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 0, 3, 4, 7) 787 #endif 788 #ifdef HAS_RAWTORGBAROW_NEON 789 ANY11(RAWToRGBARow_Any_NEON, RAWToRGBARow_NEON, 0, 3, 4, 7) 790 #endif 791 #ifdef HAS_RAWTOARGBROW_MSA 792 ANY11(RAWToARGBRow_Any_MSA, RAWToARGBRow_MSA, 0, 3, 4, 15) 793 #endif 794 #ifdef HAS_RAWTOARGBROW_MMI 795 ANY11(RAWToARGBRow_Any_MMI, RAWToARGBRow_MMI, 0, 3, 4, 3) 796 #endif 797 #ifdef HAS_RGB565TOARGBROW_NEON 798 ANY11(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 0, 2, 4, 7) 799 #endif 800 #ifdef HAS_RGB565TOARGBROW_MSA 801 ANY11(RGB565ToARGBRow_Any_MSA, RGB565ToARGBRow_MSA, 0, 2, 4, 15) 802 #endif 803 #ifdef HAS_RGB565TOARGBROW_MMI 804 ANY11(RGB565ToARGBRow_Any_MMI, RGB565ToARGBRow_MMI, 0, 2, 4, 3) 805 #endif 806 #ifdef HAS_ARGB1555TOARGBROW_NEON 807 ANY11(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 0, 2, 4, 7) 808 #endif 809 #ifdef HAS_ARGB1555TOARGBROW_MSA 810 ANY11(ARGB1555ToARGBRow_Any_MSA, ARGB1555ToARGBRow_MSA, 0, 2, 4, 15) 811 #endif 812 #ifdef HAS_ARGB1555TOARGBROW_MMI 813 ANY11(ARGB1555ToARGBRow_Any_MMI, ARGB1555ToARGBRow_MMI, 0, 2, 4, 3) 814 #endif 815 #ifdef HAS_ARGB4444TOARGBROW_NEON 816 ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7) 817 #endif 818 #ifdef HAS_ARGB4444TOARGBROW_MSA 819 ANY11(ARGB4444ToARGBRow_Any_MSA, ARGB4444ToARGBRow_MSA, 0, 2, 4, 15) 820 #endif 821 #ifdef HAS_ARGB4444TOARGBROW_MMI 822 ANY11(ARGB4444ToARGBRow_Any_MMI, ARGB4444ToARGBRow_MMI, 0, 2, 4, 3) 823 #endif 824 #ifdef HAS_ARGBATTENUATEROW_SSSE3 825 ANY11(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, 0, 4, 4, 3) 826 #endif 827 #ifdef HAS_ARGBUNATTENUATEROW_SSE2 828 ANY11(ARGBUnattenuateRow_Any_SSE2, ARGBUnattenuateRow_SSE2, 0, 4, 4, 3) 829 #endif 830 #ifdef HAS_ARGBATTENUATEROW_AVX2 831 ANY11(ARGBAttenuateRow_Any_AVX2, ARGBAttenuateRow_AVX2, 0, 4, 4, 7) 832 #endif 833 #ifdef HAS_ARGBUNATTENUATEROW_AVX2 834 ANY11(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, 0, 4, 4, 7) 835 #endif 836 #ifdef HAS_ARGBATTENUATEROW_NEON 837 ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7) 838 #endif 839 #ifdef HAS_ARGBATTENUATEROW_MSA 840 ANY11(ARGBAttenuateRow_Any_MSA, ARGBAttenuateRow_MSA, 0, 4, 4, 7) 841 #endif 842 #ifdef HAS_ARGBATTENUATEROW_MMI 843 ANY11(ARGBAttenuateRow_Any_MMI, ARGBAttenuateRow_MMI, 0, 4, 4, 1) 844 #endif 845 #ifdef HAS_ARGBEXTRACTALPHAROW_SSE2 846 ANY11(ARGBExtractAlphaRow_Any_SSE2, ARGBExtractAlphaRow_SSE2, 0, 4, 1, 7) 847 #endif 848 #ifdef HAS_ARGBEXTRACTALPHAROW_AVX2 849 ANY11(ARGBExtractAlphaRow_Any_AVX2, ARGBExtractAlphaRow_AVX2, 0, 4, 1, 31) 850 #endif 851 #ifdef HAS_ARGBEXTRACTALPHAROW_NEON 852 ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 15) 853 #endif 854 #ifdef HAS_ARGBEXTRACTALPHAROW_MSA 855 ANY11(ARGBExtractAlphaRow_Any_MSA, ARGBExtractAlphaRow_MSA, 0, 4, 1, 15) 856 #endif 857 #ifdef HAS_ARGBEXTRACTALPHAROW_MMI 858 ANY11(ARGBExtractAlphaRow_Any_MMI, ARGBExtractAlphaRow_MMI, 0, 4, 1, 7) 859 #endif 860 #undef ANY11 861 862 // Any 1 to 1 blended. Destination is read, modify, write. 863 #define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ 864 void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \ 865 SIMD_ALIGNED(uint8_t temp[64 * 2]); \ 866 memset(temp, 0, 64 * 2); /* for msan */ \ 867 int r = width & MASK; \ 868 int n = width & ~MASK; \ 869 if (n > 0) { \ 870 ANY_SIMD(src_ptr, dst_ptr, n); \ 871 } \ 872 memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ 873 memcpy(temp + 64, dst_ptr + n * BPP, r * BPP); \ 874 ANY_SIMD(temp, temp + 64, MASK + 1); \ 875 memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \ 876 } 877 878 #ifdef HAS_ARGBCOPYALPHAROW_AVX2 879 ANY11B(ARGBCopyAlphaRow_Any_AVX2, ARGBCopyAlphaRow_AVX2, 0, 4, 4, 15) 880 #endif 881 #ifdef HAS_ARGBCOPYALPHAROW_SSE2 882 ANY11B(ARGBCopyAlphaRow_Any_SSE2, ARGBCopyAlphaRow_SSE2, 0, 4, 4, 7) 883 #endif 884 #ifdef HAS_ARGBCOPYALPHAROW_MMI 885 ANY11B(ARGBCopyAlphaRow_Any_MMI, ARGBCopyAlphaRow_MMI, 0, 4, 4, 1) 886 #endif 887 #ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2 888 ANY11B(ARGBCopyYToAlphaRow_Any_AVX2, ARGBCopyYToAlphaRow_AVX2, 0, 1, 4, 15) 889 #endif 890 #ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 891 ANY11B(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7) 892 #endif 893 #ifdef HAS_ARGBCOPYYTOALPHAROW_MMI 894 ANY11B(ARGBCopyYToAlphaRow_Any_MMI, ARGBCopyYToAlphaRow_MMI, 0, 1, 4, 7) 895 #endif 896 #undef ANY11B 897 898 // Any 1 to 1 with parameter. 899 #define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \ 900 void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, T param, int width) { \ 901 SIMD_ALIGNED(uint8_t temp[64 * 2]); \ 902 memset(temp, 0, 64); /* for msan */ \ 903 int r = width & MASK; \ 904 int n = width & ~MASK; \ 905 if (n > 0) { \ 906 ANY_SIMD(src_ptr, dst_ptr, param, n); \ 907 } \ 908 memcpy(temp, src_ptr + n * SBPP, r * SBPP); \ 909 ANY_SIMD(temp, temp + 64, param, MASK + 1); \ 910 memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \ 911 } 912 913 #if defined(HAS_I400TOARGBROW_SSE2) 914 ANY11P(I400ToARGBRow_Any_SSE2, 915 I400ToARGBRow_SSE2, 916 const struct YuvConstants*, 917 1, 918 4, 919 7) 920 #endif 921 #if defined(HAS_I400TOARGBROW_AVX2) 922 ANY11P(I400ToARGBRow_Any_AVX2, 923 I400ToARGBRow_AVX2, 924 const struct YuvConstants*, 925 1, 926 4, 927 15) 928 #endif 929 #if defined(HAS_I400TOARGBROW_NEON) 930 ANY11P(I400ToARGBRow_Any_NEON, 931 I400ToARGBRow_NEON, 932 const struct YuvConstants*, 933 1, 934 4, 935 7) 936 #endif 937 #if defined(HAS_I400TOARGBROW_MSA) 938 ANY11P(I400ToARGBRow_Any_MSA, 939 I400ToARGBRow_MSA, 940 const struct YuvConstants*, 941 1, 942 4, 943 15) 944 #endif 945 #if defined(HAS_I400TOARGBROW_MMI) 946 ANY11P(I400ToARGBRow_Any_MMI, 947 I400ToARGBRow_MMI, 948 const struct YuvConstants*, 949 1, 950 4, 951 7) 952 #endif 953 954 #if defined(HAS_ARGBTORGB565DITHERROW_SSE2) 955 ANY11P(ARGBToRGB565DitherRow_Any_SSE2, 956 ARGBToRGB565DitherRow_SSE2, 957 const uint32_t, 958 4, 959 2, 960 3) 961 #endif 962 #if defined(HAS_ARGBTORGB565DITHERROW_AVX2) 963 ANY11P(ARGBToRGB565DitherRow_Any_AVX2, 964 ARGBToRGB565DitherRow_AVX2, 965 const uint32_t, 966 4, 967 2, 968 7) 969 #endif 970 #if defined(HAS_ARGBTORGB565DITHERROW_NEON) 971 ANY11P(ARGBToRGB565DitherRow_Any_NEON, 972 ARGBToRGB565DitherRow_NEON, 973 const uint32_t, 974 4, 975 2, 976 7) 977 #endif 978 #if defined(HAS_ARGBTORGB565DITHERROW_MSA) 979 ANY11P(ARGBToRGB565DitherRow_Any_MSA, 980 ARGBToRGB565DitherRow_MSA, 981 const uint32_t, 982 4, 983 2, 984 7) 985 #endif 986 #if defined(HAS_ARGBTORGB565DITHERROW_MMI) 987 ANY11P(ARGBToRGB565DitherRow_Any_MMI, 988 ARGBToRGB565DitherRow_MMI, 989 const uint32_t, 990 4, 991 2, 992 3) 993 #endif 994 #ifdef HAS_ARGBSHUFFLEROW_SSSE3 995 ANY11P(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3, const uint8_t*, 4, 4, 7) 996 #endif 997 #ifdef HAS_ARGBSHUFFLEROW_AVX2 998 ANY11P(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2, const uint8_t*, 4, 4, 15) 999 #endif 1000 #ifdef HAS_ARGBSHUFFLEROW_NEON 1001 ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8_t*, 4, 4, 3) 1002 #endif 1003 #ifdef HAS_ARGBSHUFFLEROW_MSA 1004 ANY11P(ARGBShuffleRow_Any_MSA, ARGBShuffleRow_MSA, const uint8_t*, 4, 4, 7) 1005 #endif 1006 #ifdef HAS_ARGBSHUFFLEROW_MMI 1007 ANY11P(ARGBShuffleRow_Any_MMI, ARGBShuffleRow_MMI, const uint8_t*, 4, 4, 1) 1008 #endif 1009 #undef ANY11P 1010 #undef ANY11P 1011 1012 // Any 1 to 1 with parameter and shorts. BPP measures in shorts. 1013 #define ANY11C(NAMEANY, ANY_SIMD, SBPP, BPP, STYPE, DTYPE, MASK) \ 1014 void NAMEANY(const STYPE* src_ptr, DTYPE* dst_ptr, int scale, int width) { \ 1015 SIMD_ALIGNED(STYPE temp[32]); \ 1016 SIMD_ALIGNED(DTYPE out[32]); \ 1017 memset(temp, 0, 32 * SBPP); /* for msan */ \ 1018 int r = width & MASK; \ 1019 int n = width & ~MASK; \ 1020 if (n > 0) { \ 1021 ANY_SIMD(src_ptr, dst_ptr, scale, n); \ 1022 } \ 1023 memcpy(temp, src_ptr + n, r * SBPP); \ 1024 ANY_SIMD(temp, out, scale, MASK + 1); \ 1025 memcpy(dst_ptr + n, out, r * BPP); \ 1026 } 1027 1028 #ifdef HAS_CONVERT16TO8ROW_SSSE3 1029 ANY11C(Convert16To8Row_Any_SSSE3, 1030 Convert16To8Row_SSSE3, 1031 2, 1032 1, 1033 uint16_t, 1034 uint8_t, 1035 15) 1036 #endif 1037 #ifdef HAS_CONVERT16TO8ROW_AVX2 1038 ANY11C(Convert16To8Row_Any_AVX2, 1039 Convert16To8Row_AVX2, 1040 2, 1041 1, 1042 uint16_t, 1043 uint8_t, 1044 31) 1045 #endif 1046 #ifdef HAS_CONVERT8TO16ROW_SSE2 1047 ANY11C(Convert8To16Row_Any_SSE2, 1048 Convert8To16Row_SSE2, 1049 1, 1050 2, 1051 uint8_t, 1052 uint16_t, 1053 15) 1054 #endif 1055 #ifdef HAS_CONVERT8TO16ROW_AVX2 1056 ANY11C(Convert8To16Row_Any_AVX2, 1057 Convert8To16Row_AVX2, 1058 1, 1059 2, 1060 uint8_t, 1061 uint16_t, 1062 31) 1063 #endif 1064 #undef ANY11C 1065 1066 // Any 1 to 1 with parameter and shorts to byte. BPP measures in shorts. 1067 #define ANY11P16(NAMEANY, ANY_SIMD, ST, T, SBPP, BPP, MASK) \ 1068 void NAMEANY(const ST* src_ptr, T* dst_ptr, float param, int width) { \ 1069 SIMD_ALIGNED(ST temp[32]); \ 1070 SIMD_ALIGNED(T out[32]); \ 1071 memset(temp, 0, SBPP * 32); /* for msan */ \ 1072 int r = width & MASK; \ 1073 int n = width & ~MASK; \ 1074 if (n > 0) { \ 1075 ANY_SIMD(src_ptr, dst_ptr, param, n); \ 1076 } \ 1077 memcpy(temp, src_ptr + n, r * SBPP); \ 1078 ANY_SIMD(temp, out, param, MASK + 1); \ 1079 memcpy(dst_ptr + n, out, r * BPP); \ 1080 } 1081 1082 #ifdef HAS_HALFFLOATROW_SSE2 1083 ANY11P16(HalfFloatRow_Any_SSE2, HalfFloatRow_SSE2, uint16_t, uint16_t, 2, 2, 7) 1084 #endif 1085 #ifdef HAS_HALFFLOATROW_AVX2 1086 ANY11P16(HalfFloatRow_Any_AVX2, HalfFloatRow_AVX2, uint16_t, uint16_t, 2, 2, 15) 1087 #endif 1088 #ifdef HAS_HALFFLOATROW_F16C 1089 ANY11P16(HalfFloatRow_Any_F16C, HalfFloatRow_F16C, uint16_t, uint16_t, 2, 2, 15) 1090 ANY11P16(HalfFloat1Row_Any_F16C, 1091 HalfFloat1Row_F16C, 1092 uint16_t, 1093 uint16_t, 1094 2, 1095 2, 1096 15) 1097 #endif 1098 #ifdef HAS_HALFFLOATROW_NEON 1099 ANY11P16(HalfFloatRow_Any_NEON, HalfFloatRow_NEON, uint16_t, uint16_t, 2, 2, 7) 1100 ANY11P16(HalfFloat1Row_Any_NEON, 1101 HalfFloat1Row_NEON, 1102 uint16_t, 1103 uint16_t, 1104 2, 1105 2, 1106 7) 1107 #endif 1108 #ifdef HAS_HALFFLOATROW_MSA 1109 ANY11P16(HalfFloatRow_Any_MSA, HalfFloatRow_MSA, uint16_t, uint16_t, 2, 2, 31) 1110 #endif 1111 #ifdef HAS_BYTETOFLOATROW_NEON 1112 ANY11P16(ByteToFloatRow_Any_NEON, ByteToFloatRow_NEON, uint8_t, float, 1, 3, 7) 1113 #endif 1114 #undef ANY11P16 1115 1116 // Any 1 to 1 with yuvconstants 1117 #define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ 1118 void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, \ 1119 const struct YuvConstants* yuvconstants, int width) { \ 1120 SIMD_ALIGNED(uint8_t temp[128 * 2]); \ 1121 memset(temp, 0, 128); /* for YUY2 and msan */ \ 1122 int r = width & MASK; \ 1123 int n = width & ~MASK; \ 1124 if (n > 0) { \ 1125 ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n); \ 1126 } \ 1127 memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ 1128 ANY_SIMD(temp, temp + 128, yuvconstants, MASK + 1); \ 1129 memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ 1130 } 1131 #if defined(HAS_YUY2TOARGBROW_SSSE3) 1132 ANY11C(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15) 1133 ANY11C(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15) 1134 #endif 1135 #if defined(HAS_YUY2TOARGBROW_AVX2) 1136 ANY11C(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31) 1137 ANY11C(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31) 1138 #endif 1139 #if defined(HAS_YUY2TOARGBROW_NEON) 1140 ANY11C(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7) 1141 ANY11C(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7) 1142 #endif 1143 #if defined(HAS_YUY2TOARGBROW_MSA) 1144 ANY11C(YUY2ToARGBRow_Any_MSA, YUY2ToARGBRow_MSA, 1, 4, 4, 7) 1145 ANY11C(UYVYToARGBRow_Any_MSA, UYVYToARGBRow_MSA, 1, 4, 4, 7) 1146 #endif 1147 #if defined(HAS_YUY2TOARGBROW_MMI) 1148 ANY11C(YUY2ToARGBRow_Any_MMI, YUY2ToARGBRow_MMI, 1, 4, 4, 7) 1149 ANY11C(UYVYToARGBRow_Any_MMI, UYVYToARGBRow_MMI, 1, 4, 4, 7) 1150 #endif 1151 #undef ANY11C 1152 1153 // Any 1 to 1 interpolate. Takes 2 rows of source via stride. 1154 #define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \ 1155 void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, \ 1156 ptrdiff_t src_stride_ptr, int width, int source_y_fraction) { \ 1157 SIMD_ALIGNED(uint8_t temp[64 * 3]); \ 1158 memset(temp, 0, 64 * 2); /* for msan */ \ 1159 int r = width & MASK; \ 1160 int n = width & ~MASK; \ 1161 if (n > 0) { \ 1162 ANY_SIMD(dst_ptr, src_ptr, src_stride_ptr, n, source_y_fraction); \ 1163 } \ 1164 memcpy(temp, src_ptr + n * SBPP, r * SBPP); \ 1165 memcpy(temp + 64, src_ptr + src_stride_ptr + n * SBPP, r * SBPP); \ 1166 ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction); \ 1167 memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ 1168 } 1169 1170 #ifdef HAS_INTERPOLATEROW_AVX2 1171 ANY11T(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, 1, 1, 31) 1172 #endif 1173 #ifdef HAS_INTERPOLATEROW_SSSE3 1174 ANY11T(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, 1, 1, 15) 1175 #endif 1176 #ifdef HAS_INTERPOLATEROW_NEON 1177 ANY11T(InterpolateRow_Any_NEON, InterpolateRow_NEON, 1, 1, 15) 1178 #endif 1179 #ifdef HAS_INTERPOLATEROW_MSA 1180 ANY11T(InterpolateRow_Any_MSA, InterpolateRow_MSA, 1, 1, 31) 1181 #endif 1182 #ifdef HAS_INTERPOLATEROW_MMI 1183 ANY11T(InterpolateRow_Any_MMI, InterpolateRow_MMI, 1, 1, 7) 1184 #endif 1185 #undef ANY11T 1186 1187 // Any 1 to 1 mirror. 1188 #define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK) \ 1189 void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \ 1190 SIMD_ALIGNED(uint8_t temp[64 * 2]); \ 1191 memset(temp, 0, 64); /* for msan */ \ 1192 int r = width & MASK; \ 1193 int n = width & ~MASK; \ 1194 if (n > 0) { \ 1195 ANY_SIMD(src_ptr + r * BPP, dst_ptr, n); \ 1196 } \ 1197 memcpy(temp, src_ptr, r* BPP); \ 1198 ANY_SIMD(temp, temp + 64, MASK + 1); \ 1199 memcpy(dst_ptr + n * BPP, temp + 64 + (MASK + 1 - r) * BPP, r * BPP); \ 1200 } 1201 1202 #ifdef HAS_MIRRORROW_AVX2 1203 ANY11M(MirrorRow_Any_AVX2, MirrorRow_AVX2, 1, 31) 1204 #endif 1205 #ifdef HAS_MIRRORROW_SSSE3 1206 ANY11M(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, 1, 15) 1207 #endif 1208 #ifdef HAS_MIRRORROW_NEON 1209 ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 31) 1210 #endif 1211 #ifdef HAS_MIRRORROW_MSA 1212 ANY11M(MirrorRow_Any_MSA, MirrorRow_MSA, 1, 63) 1213 #endif 1214 #ifdef HAS_MIRRORROW_MMI 1215 ANY11M(MirrorRow_Any_MMI, MirrorRow_MMI, 1, 7) 1216 #endif 1217 #ifdef HAS_MIRRORUVROW_AVX2 1218 ANY11M(MirrorUVRow_Any_AVX2, MirrorUVRow_AVX2, 2, 15) 1219 #endif 1220 #ifdef HAS_MIRRORUVROW_SSSE3 1221 ANY11M(MirrorUVRow_Any_SSSE3, MirrorUVRow_SSSE3, 2, 7) 1222 #endif 1223 #ifdef HAS_MIRRORUVROW_NEON 1224 ANY11M(MirrorUVRow_Any_NEON, MirrorUVRow_NEON, 2, 31) 1225 #endif 1226 #ifdef HAS_MIRRORUVROW_MSA 1227 ANY11M(MirrorUVRow_Any_MSA, MirrorUVRow_MSA, 2, 7) 1228 #endif 1229 #ifdef HAS_ARGBMIRRORROW_AVX2 1230 ANY11M(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, 4, 7) 1231 #endif 1232 #ifdef HAS_ARGBMIRRORROW_SSE2 1233 ANY11M(ARGBMirrorRow_Any_SSE2, ARGBMirrorRow_SSE2, 4, 3) 1234 #endif 1235 #ifdef HAS_ARGBMIRRORROW_NEON 1236 ANY11M(ARGBMirrorRow_Any_NEON, ARGBMirrorRow_NEON, 4, 7) 1237 #endif 1238 #ifdef HAS_ARGBMIRRORROW_MSA 1239 ANY11M(ARGBMirrorRow_Any_MSA, ARGBMirrorRow_MSA, 4, 15) 1240 #endif 1241 #ifdef HAS_ARGBMIRRORROW_MMI 1242 ANY11M(ARGBMirrorRow_Any_MMI, ARGBMirrorRow_MMI, 4, 1) 1243 #endif 1244 #ifdef HAS_RGB24MIRRORROW_SSSE3 1245 ANY11M(RGB24MirrorRow_Any_SSSE3, RGB24MirrorRow_SSSE3, 3, 15) 1246 #endif 1247 #ifdef HAS_RGB24MIRRORROW_NEON 1248 ANY11M(RGB24MirrorRow_Any_NEON, RGB24MirrorRow_NEON, 3, 15) 1249 #endif 1250 #undef ANY11M 1251 1252 // Any 1 plane. (memset) 1253 #define ANY1(NAMEANY, ANY_SIMD, T, BPP, MASK) \ 1254 void NAMEANY(uint8_t* dst_ptr, T v32, int width) { \ 1255 SIMD_ALIGNED(uint8_t temp[64]); \ 1256 memset(temp, 0, 64); /* for msan */ \ 1257 int r = width & MASK; \ 1258 int n = width & ~MASK; \ 1259 if (n > 0) { \ 1260 ANY_SIMD(dst_ptr, v32, n); \ 1261 } \ 1262 ANY_SIMD(temp, v32, MASK + 1); \ 1263 memcpy(dst_ptr + n * BPP, temp, r * BPP); \ 1264 } 1265 1266 #ifdef HAS_SETROW_X86 1267 ANY1(SetRow_Any_X86, SetRow_X86, uint8_t, 1, 3) 1268 #endif 1269 #ifdef HAS_SETROW_NEON 1270 ANY1(SetRow_Any_NEON, SetRow_NEON, uint8_t, 1, 15) 1271 #endif 1272 #ifdef HAS_ARGBSETROW_NEON 1273 ANY1(ARGBSetRow_Any_NEON, ARGBSetRow_NEON, uint32_t, 4, 3) 1274 #endif 1275 #ifdef HAS_ARGBSETROW_MSA 1276 ANY1(ARGBSetRow_Any_MSA, ARGBSetRow_MSA, uint32_t, 4, 3) 1277 #endif 1278 #ifdef HAS_ARGBSETROW_MMI 1279 ANY1(ARGBSetRow_Any_MMI, ARGBSetRow_MMI, uint32_t, 4, 3) 1280 #endif 1281 #undef ANY1 1282 1283 // Any 1 to 2. Outputs UV planes. 1284 #define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK) \ 1285 void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, \ 1286 int width) { \ 1287 SIMD_ALIGNED(uint8_t temp[128 * 3]); \ 1288 memset(temp, 0, 128); /* for msan */ \ 1289 int r = width & MASK; \ 1290 int n = width & ~MASK; \ 1291 if (n > 0) { \ 1292 ANY_SIMD(src_ptr, dst_u, dst_v, n); \ 1293 } \ 1294 memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ 1295 ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1); \ 1296 memcpy(dst_u + (n >> DUVSHIFT), temp + 128, SS(r, DUVSHIFT)); \ 1297 memcpy(dst_v + (n >> DUVSHIFT), temp + 256, SS(r, DUVSHIFT)); \ 1298 } 1299 1300 #ifdef HAS_SPLITUVROW_SSE2 1301 ANY12(SplitUVRow_Any_SSE2, SplitUVRow_SSE2, 0, 2, 0, 15) 1302 #endif 1303 #ifdef HAS_SPLITUVROW_AVX2 1304 ANY12(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, 0, 2, 0, 31) 1305 #endif 1306 #ifdef HAS_SPLITUVROW_NEON 1307 ANY12(SplitUVRow_Any_NEON, SplitUVRow_NEON, 0, 2, 0, 15) 1308 #endif 1309 #ifdef HAS_SPLITUVROW_MSA 1310 ANY12(SplitUVRow_Any_MSA, SplitUVRow_MSA, 0, 2, 0, 31) 1311 #endif 1312 #ifdef HAS_SPLITUVROW_MMI 1313 ANY12(SplitUVRow_Any_MMI, SplitUVRow_MMI, 0, 2, 0, 7) 1314 #endif 1315 #ifdef HAS_ARGBTOUV444ROW_SSSE3 1316 ANY12(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_SSSE3, 0, 4, 0, 15) 1317 #endif 1318 #ifdef HAS_YUY2TOUV422ROW_AVX2 1319 ANY12(YUY2ToUV422Row_Any_AVX2, YUY2ToUV422Row_AVX2, 1, 4, 1, 31) 1320 ANY12(UYVYToUV422Row_Any_AVX2, UYVYToUV422Row_AVX2, 1, 4, 1, 31) 1321 #endif 1322 #ifdef HAS_YUY2TOUV422ROW_SSE2 1323 ANY12(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_SSE2, 1, 4, 1, 15) 1324 ANY12(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_SSE2, 1, 4, 1, 15) 1325 #endif 1326 #ifdef HAS_YUY2TOUV422ROW_NEON 1327 ANY12(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON, 0, 4, 0, 7) 1328 ANY12(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON, 1, 4, 1, 15) 1329 ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15) 1330 #endif 1331 #ifdef HAS_YUY2TOUV422ROW_MSA 1332 ANY12(ARGBToUV444Row_Any_MSA, ARGBToUV444Row_MSA, 0, 4, 0, 15) 1333 ANY12(YUY2ToUV422Row_Any_MSA, YUY2ToUV422Row_MSA, 1, 4, 1, 31) 1334 ANY12(UYVYToUV422Row_Any_MSA, UYVYToUV422Row_MSA, 1, 4, 1, 31) 1335 #endif 1336 #ifdef HAS_YUY2TOUV422ROW_MMI 1337 ANY12(ARGBToUV444Row_Any_MMI, ARGBToUV444Row_MMI, 0, 4, 0, 7) 1338 ANY12(UYVYToUV422Row_Any_MMI, UYVYToUV422Row_MMI, 1, 4, 1, 15) 1339 ANY12(YUY2ToUV422Row_Any_MMI, YUY2ToUV422Row_MMI, 1, 4, 1, 15) 1340 #endif 1341 #undef ANY12 1342 1343 // Any 1 to 3. Outputs RGB planes. 1344 #define ANY13(NAMEANY, ANY_SIMD, BPP, MASK) \ 1345 void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, \ 1346 uint8_t* dst_b, int width) { \ 1347 SIMD_ALIGNED(uint8_t temp[16 * 6]); \ 1348 memset(temp, 0, 16 * 3); /* for msan */ \ 1349 int r = width & MASK; \ 1350 int n = width & ~MASK; \ 1351 if (n > 0) { \ 1352 ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, n); \ 1353 } \ 1354 memcpy(temp, src_ptr + n * BPP, r * BPP); \ 1355 ANY_SIMD(temp, temp + 16 * 3, temp + 16 * 4, temp + 16 * 5, MASK + 1); \ 1356 memcpy(dst_r + n, temp + 16 * 3, r); \ 1357 memcpy(dst_g + n, temp + 16 * 4, r); \ 1358 memcpy(dst_b + n, temp + 16 * 5, r); \ 1359 } 1360 1361 #ifdef HAS_SPLITRGBROW_SSSE3 1362 ANY13(SplitRGBRow_Any_SSSE3, SplitRGBRow_SSSE3, 3, 15) 1363 #endif 1364 #ifdef HAS_SPLITRGBROW_NEON 1365 ANY13(SplitRGBRow_Any_NEON, SplitRGBRow_NEON, 3, 15) 1366 #endif 1367 #ifdef HAS_SPLITRGBROW_MMI 1368 ANY13(SplitRGBRow_Any_MMI, SplitRGBRow_MMI, 3, 3) 1369 #endif 1370 1371 // Any 1 to 2 with source stride (2 rows of source). Outputs UV planes. 1372 // 128 byte row allows for 32 avx ARGB pixels. 1373 #define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \ 1374 void NAMEANY(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, \ 1375 uint8_t* dst_v, int width) { \ 1376 SIMD_ALIGNED(uint8_t temp[128 * 4]); \ 1377 memset(temp, 0, 128 * 2); /* for msan */ \ 1378 int r = width & MASK; \ 1379 int n = width & ~MASK; \ 1380 if (n > 0) { \ 1381 ANY_SIMD(src_ptr, src_stride_ptr, dst_u, dst_v, n); \ 1382 } \ 1383 memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ 1384 memcpy(temp + 128, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP, \ 1385 SS(r, UVSHIFT) * BPP); \ 1386 if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \ 1387 memcpy(temp + SS(r, UVSHIFT) * BPP, temp + SS(r, UVSHIFT) * BPP - BPP, \ 1388 BPP); \ 1389 memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \ 1390 temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \ 1391 } \ 1392 ANY_SIMD(temp, 128, temp + 256, temp + 384, MASK + 1); \ 1393 memcpy(dst_u + (n >> 1), temp + 256, SS(r, 1)); \ 1394 memcpy(dst_v + (n >> 1), temp + 384, SS(r, 1)); \ 1395 } 1396 1397 #ifdef HAS_ARGBTOUVROW_AVX2 1398 ANY12S(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, 0, 4, 31) 1399 #endif 1400 #ifdef HAS_ABGRTOUVROW_AVX2 1401 ANY12S(ABGRToUVRow_Any_AVX2, ABGRToUVRow_AVX2, 0, 4, 31) 1402 #endif 1403 #ifdef HAS_ARGBTOUVJROW_AVX2 1404 ANY12S(ARGBToUVJRow_Any_AVX2, ARGBToUVJRow_AVX2, 0, 4, 31) 1405 #endif 1406 #ifdef HAS_ARGBTOUVROW_SSSE3 1407 ANY12S(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_SSSE3, 0, 4, 15) 1408 ANY12S(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_SSSE3, 0, 4, 15) 1409 ANY12S(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_SSSE3, 0, 4, 15) 1410 ANY12S(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_SSSE3, 0, 4, 15) 1411 ANY12S(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_SSSE3, 0, 4, 15) 1412 #endif 1413 #ifdef HAS_YUY2TOUVROW_AVX2 1414 ANY12S(YUY2ToUVRow_Any_AVX2, YUY2ToUVRow_AVX2, 1, 4, 31) 1415 ANY12S(UYVYToUVRow_Any_AVX2, UYVYToUVRow_AVX2, 1, 4, 31) 1416 #endif 1417 #ifdef HAS_YUY2TOUVROW_SSE2 1418 ANY12S(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_SSE2, 1, 4, 15) 1419 ANY12S(UYVYToUVRow_Any_SSE2, UYVYToUVRow_SSE2, 1, 4, 15) 1420 #endif 1421 #ifdef HAS_ARGBTOUVROW_NEON 1422 ANY12S(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, 0, 4, 15) 1423 #endif 1424 #ifdef HAS_ARGBTOUVROW_MSA 1425 ANY12S(ARGBToUVRow_Any_MSA, ARGBToUVRow_MSA, 0, 4, 31) 1426 #endif 1427 #ifdef HAS_ARGBTOUVROW_MMI 1428 ANY12S(ARGBToUVRow_Any_MMI, ARGBToUVRow_MMI, 0, 4, 15) 1429 #endif 1430 #ifdef HAS_ARGBTOUVJROW_NEON 1431 ANY12S(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, 0, 4, 15) 1432 #endif 1433 #ifdef HAS_ARGBTOUVJROW_MSA 1434 ANY12S(ARGBToUVJRow_Any_MSA, ARGBToUVJRow_MSA, 0, 4, 31) 1435 #endif 1436 #ifdef HAS_ARGBTOUVJROW_MMI 1437 ANY12S(ARGBToUVJRow_Any_MMI, ARGBToUVJRow_MMI, 0, 4, 15) 1438 #endif 1439 #ifdef HAS_BGRATOUVROW_NEON 1440 ANY12S(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, 0, 4, 15) 1441 #endif 1442 #ifdef HAS_BGRATOUVROW_MSA 1443 ANY12S(BGRAToUVRow_Any_MSA, BGRAToUVRow_MSA, 0, 4, 15) 1444 #endif 1445 #ifdef HAS_BGRATOUVROW_MMI 1446 ANY12S(BGRAToUVRow_Any_MMI, BGRAToUVRow_MMI, 0, 4, 15) 1447 #endif 1448 #ifdef HAS_ABGRTOUVROW_NEON 1449 ANY12S(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, 0, 4, 15) 1450 #endif 1451 #ifdef HAS_ABGRTOUVROW_MSA 1452 ANY12S(ABGRToUVRow_Any_MSA, ABGRToUVRow_MSA, 0, 4, 15) 1453 #endif 1454 #ifdef HAS_ABGRTOUVROW_MMI 1455 ANY12S(ABGRToUVRow_Any_MMI, ABGRToUVRow_MMI, 0, 4, 15) 1456 #endif 1457 #ifdef HAS_RGBATOUVROW_NEON 1458 ANY12S(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, 0, 4, 15) 1459 #endif 1460 #ifdef HAS_RGBATOUVROW_MSA 1461 ANY12S(RGBAToUVRow_Any_MSA, RGBAToUVRow_MSA, 0, 4, 15) 1462 #endif 1463 #ifdef HAS_RGBATOUVROW_MMI 1464 ANY12S(RGBAToUVRow_Any_MMI, RGBAToUVRow_MMI, 0, 4, 15) 1465 #endif 1466 #ifdef HAS_RGB24TOUVROW_NEON 1467 ANY12S(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, 0, 3, 15) 1468 #endif 1469 #ifdef HAS_RGB24TOUVROW_MSA 1470 ANY12S(RGB24ToUVRow_Any_MSA, RGB24ToUVRow_MSA, 0, 3, 15) 1471 #endif 1472 #ifdef HAS_RGB24TOUVROW_MMI 1473 ANY12S(RGB24ToUVRow_Any_MMI, RGB24ToUVRow_MMI, 0, 3, 15) 1474 #endif 1475 #ifdef HAS_RAWTOUVROW_NEON 1476 ANY12S(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, 0, 3, 15) 1477 #endif 1478 #ifdef HAS_RAWTOUVROW_MSA 1479 ANY12S(RAWToUVRow_Any_MSA, RAWToUVRow_MSA, 0, 3, 15) 1480 #endif 1481 #ifdef HAS_RAWTOUVROW_MMI 1482 ANY12S(RAWToUVRow_Any_MMI, RAWToUVRow_MMI, 0, 3, 15) 1483 #endif 1484 #ifdef HAS_RGB565TOUVROW_NEON 1485 ANY12S(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, 0, 2, 15) 1486 #endif 1487 #ifdef HAS_RGB565TOUVROW_MSA 1488 ANY12S(RGB565ToUVRow_Any_MSA, RGB565ToUVRow_MSA, 0, 2, 15) 1489 #endif 1490 #ifdef HAS_RGB565TOUVROW_MMI 1491 ANY12S(RGB565ToUVRow_Any_MMI, RGB565ToUVRow_MMI, 0, 2, 15) 1492 #endif 1493 #ifdef HAS_ARGB1555TOUVROW_NEON 1494 ANY12S(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, 0, 2, 15) 1495 #endif 1496 #ifdef HAS_ARGB1555TOUVROW_MSA 1497 ANY12S(ARGB1555ToUVRow_Any_MSA, ARGB1555ToUVRow_MSA, 0, 2, 15) 1498 #endif 1499 #ifdef HAS_ARGB1555TOUVROW_MMI 1500 ANY12S(ARGB1555ToUVRow_Any_MMI, ARGB1555ToUVRow_MMI, 0, 2, 15) 1501 #endif 1502 #ifdef HAS_ARGB4444TOUVROW_NEON 1503 ANY12S(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, 0, 2, 15) 1504 #endif 1505 #ifdef HAS_ARGB4444TOUVROW_MMI 1506 ANY12S(ARGB4444ToUVRow_Any_MMI, ARGB4444ToUVRow_MMI, 0, 2, 15) 1507 #endif 1508 #ifdef HAS_YUY2TOUVROW_NEON 1509 ANY12S(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, 1, 4, 15) 1510 #endif 1511 #ifdef HAS_UYVYTOUVROW_NEON 1512 ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15) 1513 #endif 1514 #ifdef HAS_YUY2TOUVROW_MSA 1515 ANY12S(YUY2ToUVRow_Any_MSA, YUY2ToUVRow_MSA, 1, 4, 31) 1516 #endif 1517 #ifdef HAS_YUY2TOUVROW_MMI 1518 ANY12S(YUY2ToUVRow_Any_MMI, YUY2ToUVRow_MMI, 1, 4, 15) 1519 #endif 1520 #ifdef HAS_UYVYTOUVROW_MSA 1521 ANY12S(UYVYToUVRow_Any_MSA, UYVYToUVRow_MSA, 1, 4, 31) 1522 #endif 1523 #ifdef HAS_UYVYTOUVROW_MMI 1524 ANY12S(UYVYToUVRow_Any_MMI, UYVYToUVRow_MMI, 1, 4, 15) 1525 #endif 1526 #undef ANY12S 1527 1528 // Any 1 to 1 with source stride (2 rows of source). Outputs UV plane. 1529 // 128 byte row allows for 32 avx ARGB pixels. 1530 #define ANY11S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \ 1531 void NAMEANY(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_vu, \ 1532 int width) { \ 1533 SIMD_ALIGNED(uint8_t temp[128 * 3]); \ 1534 memset(temp, 0, 128 * 2); /* for msan */ \ 1535 int r = width & MASK; \ 1536 int n = width & ~MASK; \ 1537 if (n > 0) { \ 1538 ANY_SIMD(src_ptr, src_stride_ptr, dst_vu, n); \ 1539 } \ 1540 memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ 1541 memcpy(temp + 128, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP, \ 1542 SS(r, UVSHIFT) * BPP); \ 1543 if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \ 1544 memcpy(temp + SS(r, UVSHIFT) * BPP, temp + SS(r, UVSHIFT) * BPP - BPP, \ 1545 BPP); \ 1546 memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \ 1547 temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \ 1548 } \ 1549 ANY_SIMD(temp, 128, temp + 256, MASK + 1); \ 1550 memcpy(dst_vu + (n >> 1) * 2, temp + 256, SS(r, 1) * 2); \ 1551 } 1552 1553 #ifdef HAS_AYUVTOVUROW_NEON 1554 ANY11S(AYUVToUVRow_Any_NEON, AYUVToUVRow_NEON, 0, 4, 15) 1555 ANY11S(AYUVToVURow_Any_NEON, AYUVToVURow_NEON, 0, 4, 15) 1556 #endif 1557 #undef ANY11S 1558 1559 #ifdef __cplusplus 1560 } // extern "C" 1561 } // namespace libyuv 1562 #endif 1563