1 /* 2 * Copyright 2012 The LibYuv Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "libyuv/row.h" 12 13 #include <string.h> // For memset. 14 15 #include "libyuv/basic_types.h" 16 17 #ifdef __cplusplus 18 namespace libyuv { 19 extern "C" { 20 #endif 21 22 // memset for temp is meant to clear the source buffer (not dest) so that 23 // SIMD that reads full multiple of 16 bytes will not trigger msan errors. 24 // memset is not needed for production, as the garbage values are processed but 25 // not used, although there may be edge cases for subsampling. 26 // The size of the buffer is based on the largest read, which can be inferred 27 // by the source type (e.g. ARGB) and the mask (last parameter), or by examining 28 // the source code for how much the source pointers are advanced. 29 30 // Subsampled source needs to be increase by 1 of not even. 31 #define SS(width, shift) (((width) + (1 << (shift)) - 1) >> (shift)) 32 33 // Any 4 planes to 1 34 #define ANY41(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ 35 void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \ 36 const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_ptr, \ 37 int width) { \ 38 SIMD_ALIGNED(uint8_t temp[64 * 5]); \ 39 memset(temp, 0, 64 * 4); /* for msan */ \ 40 int r = width & MASK; \ 41 int n = width & ~MASK; \ 42 if (n > 0) { \ 43 ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, n); \ 44 } \ 45 memcpy(temp, y_buf + n, r); \ 46 memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ 47 memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ 48 memcpy(temp + 192, a_buf + n, r); \ 49 ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256, MASK + 1); \ 50 memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256, \ 51 SS(r, DUVSHIFT) * BPP); \ 52 } 53 54 #ifdef HAS_MERGEARGBROW_SSE2 55 ANY41(MergeARGBRow_Any_SSE2, MergeARGBRow_SSE2, 0, 0, 4, 7) 56 #endif 57 #ifdef HAS_MERGEARGBROW_AVX2 58 ANY41(MergeARGBRow_Any_AVX2, MergeARGBRow_AVX2, 0, 0, 4, 15) 59 #endif 60 #ifdef HAS_MERGEARGBROW_NEON 61 ANY41(MergeARGBRow_Any_NEON, MergeARGBRow_NEON, 0, 0, 4, 15) 62 #endif 63 64 // Note that odd width replication includes 444 due to implementation 65 // on arm that subsamples 444 to 422 internally. 66 // Any 4 planes to 1 with yuvconstants 67 #define ANY41C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ 68 void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \ 69 const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_ptr, \ 70 const struct YuvConstants* yuvconstants, int width) { \ 71 SIMD_ALIGNED(uint8_t temp[64 * 5]); \ 72 memset(temp, 0, 64 * 4); /* for msan */ \ 73 int r = width & MASK; \ 74 int n = width & ~MASK; \ 75 if (n > 0) { \ 76 ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \ 77 } \ 78 memcpy(temp, y_buf + n, r); \ 79 memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ 80 memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ 81 memcpy(temp + 192, a_buf + n, r); \ 82 if (width & 1) { \ 83 temp[64 + SS(r, UVSHIFT)] = temp[64 + SS(r, UVSHIFT) - 1]; \ 84 temp[128 + SS(r, UVSHIFT)] = temp[128 + SS(r, UVSHIFT) - 1]; \ 85 } \ 86 ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256, \ 87 yuvconstants, MASK + 1); \ 88 memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256, \ 89 SS(r, DUVSHIFT) * BPP); \ 90 } 91 92 #ifdef HAS_I444ALPHATOARGBROW_SSSE3 93 ANY41C(I444AlphaToARGBRow_Any_SSSE3, I444AlphaToARGBRow_SSSE3, 0, 0, 4, 7) 94 #endif 95 #ifdef HAS_I444ALPHATOARGBROW_AVX2 96 ANY41C(I444AlphaToARGBRow_Any_AVX2, I444AlphaToARGBRow_AVX2, 0, 0, 4, 15) 97 #endif 98 #ifdef HAS_I422ALPHATOARGBROW_SSSE3 99 ANY41C(I422AlphaToARGBRow_Any_SSSE3, I422AlphaToARGBRow_SSSE3, 1, 0, 4, 7) 100 #endif 101 #ifdef HAS_I422ALPHATOARGBROW_AVX2 102 ANY41C(I422AlphaToARGBRow_Any_AVX2, I422AlphaToARGBRow_AVX2, 1, 0, 4, 15) 103 #endif 104 #ifdef HAS_I444ALPHATOARGBROW_NEON 105 ANY41C(I444AlphaToARGBRow_Any_NEON, I444AlphaToARGBRow_NEON, 0, 0, 4, 7) 106 #endif 107 #ifdef HAS_I422ALPHATOARGBROW_NEON 108 ANY41C(I422AlphaToARGBRow_Any_NEON, I422AlphaToARGBRow_NEON, 1, 0, 4, 7) 109 #endif 110 #ifdef HAS_I444ALPHATOARGBROW_MSA 111 ANY41C(I444AlphaToARGBRow_Any_MSA, I444AlphaToARGBRow_MSA, 0, 0, 4, 7) 112 #endif 113 #ifdef HAS_I422ALPHATOARGBROW_MSA 114 ANY41C(I422AlphaToARGBRow_Any_MSA, I422AlphaToARGBRow_MSA, 1, 0, 4, 7) 115 #endif 116 #ifdef HAS_I444ALPHATOARGBROW_MMI 117 ANY41C(I444AlphaToARGBRow_Any_MMI, I444AlphaToARGBRow_MMI, 0, 0, 4, 7) 118 #endif 119 #ifdef HAS_I422ALPHATOARGBROW_MMI 120 ANY41C(I422AlphaToARGBRow_Any_MMI, I422AlphaToARGBRow_MMI, 1, 0, 4, 7) 121 #endif 122 #undef ANY41C 123 124 // Any 4 planes to 1 plane of 8 bit with yuvconstants 125 #define ANY41CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \ 126 void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, const T* a_buf, \ 127 uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, \ 128 int width) { \ 129 SIMD_ALIGNED(T temp[16 * 4]); \ 130 SIMD_ALIGNED(uint8_t out[64]); \ 131 memset(temp, 0, 16 * 4 * SBPP); /* for YUY2 and msan */ \ 132 int r = width & MASK; \ 133 int n = width & ~MASK; \ 134 if (n > 0) { \ 135 ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \ 136 } \ 137 memcpy(temp, y_buf + n, r * SBPP); \ 138 memcpy(temp + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \ 139 memcpy(temp + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \ 140 memcpy(temp + 48, a_buf + n, r * SBPP); \ 141 ANY_SIMD(temp, temp + 16, temp + 32, temp + 48, out, yuvconstants, \ 142 MASK + 1); \ 143 memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP); \ 144 } 145 146 #ifdef HAS_I210ALPHATOARGBROW_SSSE3 147 ANY41CT(I210AlphaToARGBRow_Any_SSSE3, 148 I210AlphaToARGBRow_SSSE3, 149 1, 150 0, 151 uint16_t, 152 2, 153 4, 154 7) 155 #endif 156 157 #ifdef HAS_I210ALPHATOARGBROW_AVX2 158 ANY41CT(I210AlphaToARGBRow_Any_AVX2, 159 I210AlphaToARGBRow_AVX2, 160 1, 161 0, 162 uint16_t, 163 2, 164 4, 165 15) 166 #endif 167 168 #ifdef HAS_I410ALPHATOARGBROW_SSSE3 169 ANY41CT(I410AlphaToARGBRow_Any_SSSE3, 170 I410AlphaToARGBRow_SSSE3, 171 0, 172 0, 173 uint16_t, 174 2, 175 4, 176 7) 177 #endif 178 179 #ifdef HAS_I410ALPHATOARGBROW_AVX2 180 ANY41CT(I410AlphaToARGBRow_Any_AVX2, 181 I410AlphaToARGBRow_AVX2, 182 0, 183 0, 184 uint16_t, 185 2, 186 4, 187 15) 188 #endif 189 190 #undef ANY41CT 191 192 // Any 4 planes to 1 plane with parameter 193 #define ANY41PT(NAMEANY, ANY_SIMD, STYPE, SBPP, DTYPE, BPP, MASK) \ 194 void NAMEANY(const STYPE* r_buf, const STYPE* g_buf, const STYPE* b_buf, \ 195 const STYPE* a_buf, DTYPE* dst_ptr, int depth, int width) { \ 196 SIMD_ALIGNED(STYPE temp[16 * 4]); \ 197 SIMD_ALIGNED(DTYPE out[64]); \ 198 memset(temp, 0, 16 * 4 * SBPP); /* for YUY2 and msan */ \ 199 int r = width & MASK; \ 200 int n = width & ~MASK; \ 201 if (n > 0) { \ 202 ANY_SIMD(r_buf, g_buf, b_buf, a_buf, dst_ptr, depth, n); \ 203 } \ 204 memcpy(temp, r_buf + n, r * SBPP); \ 205 memcpy(temp + 16, g_buf + n, r * SBPP); \ 206 memcpy(temp + 32, b_buf + n, r * SBPP); \ 207 memcpy(temp + 48, a_buf + n, r * SBPP); \ 208 ANY_SIMD(temp, temp + 16, temp + 32, temp + 48, out, depth, MASK + 1); \ 209 memcpy((uint8_t*)dst_ptr + n * BPP, out, r * BPP); \ 210 } 211 212 #ifdef HAS_MERGEAR64ROW_AVX2 213 ANY41PT(MergeAR64Row_Any_AVX2, MergeAR64Row_AVX2, uint16_t, 2, uint16_t, 8, 15) 214 #endif 215 216 #ifdef HAS_MERGEAR64ROW_NEON 217 ANY41PT(MergeAR64Row_Any_NEON, MergeAR64Row_NEON, uint16_t, 2, uint16_t, 8, 7) 218 #endif 219 220 #ifdef HAS_MERGEARGB16TO8ROW_AVX2 221 ANY41PT(MergeARGB16To8Row_Any_AVX2, 222 MergeARGB16To8Row_AVX2, 223 uint16_t, 224 2, 225 uint8_t, 226 4, 227 15) 228 #endif 229 230 #ifdef HAS_MERGEARGB16TO8ROW_NEON 231 ANY41PT(MergeARGB16To8Row_Any_NEON, 232 MergeARGB16To8Row_NEON, 233 uint16_t, 234 2, 235 uint8_t, 236 4, 237 7) 238 #endif 239 240 #undef ANY41PT 241 242 // Any 3 planes to 1. 243 #define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ 244 void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \ 245 const uint8_t* v_buf, uint8_t* dst_ptr, int width) { \ 246 SIMD_ALIGNED(uint8_t temp[64 * 4]); \ 247 memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \ 248 int r = width & MASK; \ 249 int n = width & ~MASK; \ 250 if (n > 0) { \ 251 ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n); \ 252 } \ 253 memcpy(temp, y_buf + n, r); \ 254 memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ 255 memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ 256 ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, MASK + 1); \ 257 memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \ 258 SS(r, DUVSHIFT) * BPP); \ 259 } 260 261 // Merge functions. 262 #ifdef HAS_MERGERGBROW_SSSE3 263 ANY31(MergeRGBRow_Any_SSSE3, MergeRGBRow_SSSE3, 0, 0, 3, 15) 264 #endif 265 #ifdef HAS_MERGERGBROW_NEON 266 ANY31(MergeRGBRow_Any_NEON, MergeRGBRow_NEON, 0, 0, 3, 15) 267 #endif 268 #ifdef HAS_MERGERGBROW_MMI 269 ANY31(MergeRGBRow_Any_MMI, MergeRGBRow_MMI, 0, 0, 3, 7) 270 #endif 271 #ifdef HAS_MERGEXRGBROW_SSE2 272 ANY31(MergeXRGBRow_Any_SSE2, MergeXRGBRow_SSE2, 0, 0, 4, 7) 273 #endif 274 #ifdef HAS_MERGEXRGBROW_AVX2 275 ANY31(MergeXRGBRow_Any_AVX2, MergeXRGBRow_AVX2, 0, 0, 4, 15) 276 #endif 277 #ifdef HAS_MERGEXRGBROW_NEON 278 ANY31(MergeXRGBRow_Any_NEON, MergeXRGBRow_NEON, 0, 0, 4, 15) 279 #endif 280 #ifdef HAS_I422TOYUY2ROW_SSE2 281 ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15) 282 ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15) 283 #endif 284 #ifdef HAS_I422TOYUY2ROW_AVX2 285 ANY31(I422ToYUY2Row_Any_AVX2, I422ToYUY2Row_AVX2, 1, 1, 4, 31) 286 ANY31(I422ToUYVYRow_Any_AVX2, I422ToUYVYRow_AVX2, 1, 1, 4, 31) 287 #endif 288 #ifdef HAS_I422TOYUY2ROW_NEON 289 ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15) 290 #endif 291 #ifdef HAS_I422TOYUY2ROW_MSA 292 ANY31(I422ToYUY2Row_Any_MSA, I422ToYUY2Row_MSA, 1, 1, 4, 31) 293 #endif 294 #ifdef HAS_I422TOYUY2ROW_MMI 295 ANY31(I422ToYUY2Row_Any_MMI, I422ToYUY2Row_MMI, 1, 1, 4, 7) 296 #endif 297 #ifdef HAS_I422TOUYVYROW_NEON 298 ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15) 299 #endif 300 #ifdef HAS_I422TOUYVYROW_MSA 301 ANY31(I422ToUYVYRow_Any_MSA, I422ToUYVYRow_MSA, 1, 1, 4, 31) 302 #endif 303 #ifdef HAS_I422TOUYVYROW_MMI 304 ANY31(I422ToUYVYRow_Any_MMI, I422ToUYVYRow_MMI, 1, 1, 4, 7) 305 #endif 306 #ifdef HAS_BLENDPLANEROW_AVX2 307 ANY31(BlendPlaneRow_Any_AVX2, BlendPlaneRow_AVX2, 0, 0, 1, 31) 308 #endif 309 #ifdef HAS_BLENDPLANEROW_SSSE3 310 ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7) 311 #endif 312 #ifdef HAS_BLENDPLANEROW_MMI 313 ANY31(BlendPlaneRow_Any_MMI, BlendPlaneRow_MMI, 0, 0, 1, 7) 314 #endif 315 #undef ANY31 316 317 // Note that odd width replication includes 444 due to implementation 318 // on arm that subsamples 444 to 422 internally. 319 // Any 3 planes to 1 with yuvconstants 320 #define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ 321 void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \ 322 const uint8_t* v_buf, uint8_t* dst_ptr, \ 323 const struct YuvConstants* yuvconstants, int width) { \ 324 SIMD_ALIGNED(uint8_t temp[128 * 4]); \ 325 memset(temp, 0, 128 * 3); /* for YUY2 and msan */ \ 326 int r = width & MASK; \ 327 int n = width & ~MASK; \ 328 if (n > 0) { \ 329 ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \ 330 } \ 331 memcpy(temp, y_buf + n, r); \ 332 memcpy(temp + 128, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ 333 memcpy(temp + 256, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ 334 if (width & 1) { \ 335 temp[128 + SS(r, UVSHIFT)] = temp[128 + SS(r, UVSHIFT) - 1]; \ 336 temp[256 + SS(r, UVSHIFT)] = temp[256 + SS(r, UVSHIFT) - 1]; \ 337 } \ 338 ANY_SIMD(temp, temp + 128, temp + 256, temp + 384, yuvconstants, \ 339 MASK + 1); \ 340 memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 384, \ 341 SS(r, DUVSHIFT) * BPP); \ 342 } 343 344 #ifdef HAS_I422TOARGBROW_SSSE3 345 ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7) 346 #endif 347 #ifdef HAS_I422TORGBAROW_SSSE3 348 ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7) 349 #endif 350 #ifdef HAS_I422TOARGB4444ROW_SSSE3 351 ANY31C(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7) 352 #endif 353 #ifdef HAS_I422TOARGB1555ROW_SSSE3 354 ANY31C(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7) 355 #endif 356 #ifdef HAS_I422TORGB565ROW_SSSE3 357 ANY31C(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7) 358 #endif 359 #ifdef HAS_I422TORGB24ROW_SSSE3 360 ANY31C(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 15) 361 #endif 362 #ifdef HAS_I422TOAR30ROW_SSSE3 363 ANY31C(I422ToAR30Row_Any_SSSE3, I422ToAR30Row_SSSE3, 1, 0, 4, 7) 364 #endif 365 #ifdef HAS_I422TOAR30ROW_AVX2 366 ANY31C(I422ToAR30Row_Any_AVX2, I422ToAR30Row_AVX2, 1, 0, 4, 15) 367 #endif 368 #ifdef HAS_I444TOARGBROW_SSSE3 369 ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7) 370 #endif 371 #ifdef HAS_I422TORGB24ROW_AVX2 372 ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 31) 373 #endif 374 #ifdef HAS_I422TOARGBROW_AVX2 375 ANY31C(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15) 376 #endif 377 #ifdef HAS_I422TORGBAROW_AVX2 378 ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15) 379 #endif 380 #ifdef HAS_I444TOARGBROW_AVX2 381 ANY31C(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15) 382 #endif 383 #ifdef HAS_I422TOARGB4444ROW_AVX2 384 ANY31C(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 15) 385 #endif 386 #ifdef HAS_I422TOARGB1555ROW_AVX2 387 ANY31C(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 15) 388 #endif 389 #ifdef HAS_I422TORGB565ROW_AVX2 390 ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 15) 391 #endif 392 #ifdef HAS_I422TOARGBROW_NEON 393 ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7) 394 ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7) 395 ANY31C(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7) 396 ANY31C(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7) 397 ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7) 398 ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7) 399 ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7) 400 #endif 401 #ifdef HAS_I422TOARGBROW_MSA 402 ANY31C(I444ToARGBRow_Any_MSA, I444ToARGBRow_MSA, 0, 0, 4, 7) 403 ANY31C(I422ToARGBRow_Any_MSA, I422ToARGBRow_MSA, 1, 0, 4, 7) 404 ANY31C(I422ToRGBARow_Any_MSA, I422ToRGBARow_MSA, 1, 0, 4, 7) 405 ANY31C(I422ToRGB24Row_Any_MSA, I422ToRGB24Row_MSA, 1, 0, 3, 15) 406 ANY31C(I422ToARGB4444Row_Any_MSA, I422ToARGB4444Row_MSA, 1, 0, 2, 7) 407 ANY31C(I422ToARGB1555Row_Any_MSA, I422ToARGB1555Row_MSA, 1, 0, 2, 7) 408 ANY31C(I422ToRGB565Row_Any_MSA, I422ToRGB565Row_MSA, 1, 0, 2, 7) 409 #endif 410 #ifdef HAS_I422TOARGBROW_MMI 411 ANY31C(I444ToARGBRow_Any_MMI, I444ToARGBRow_MMI, 0, 0, 4, 7) 412 ANY31C(I422ToARGBRow_Any_MMI, I422ToARGBRow_MMI, 1, 0, 4, 7) 413 ANY31C(I422ToRGB24Row_Any_MMI, I422ToRGB24Row_MMI, 1, 0, 3, 15) 414 ANY31C(I422ToARGB4444Row_Any_MMI, I422ToARGB4444Row_MMI, 1, 0, 2, 7) 415 ANY31C(I422ToARGB1555Row_Any_MMI, I422ToARGB1555Row_MMI, 1, 0, 2, 7) 416 ANY31C(I422ToRGB565Row_Any_MMI, I422ToRGB565Row_MMI, 1, 0, 2, 7) 417 ANY31C(I422ToRGBARow_Any_MMI, I422ToRGBARow_MMI, 1, 0, 4, 7) 418 #endif 419 #undef ANY31C 420 421 // Any 3 planes of 16 bit to 1 with yuvconstants 422 // TODO(fbarchard): consider sharing this code with ANY31C 423 #define ANY31CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \ 424 void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, \ 425 uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, \ 426 int width) { \ 427 SIMD_ALIGNED(T temp[16 * 3]); \ 428 SIMD_ALIGNED(uint8_t out[64]); \ 429 memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */ \ 430 int r = width & MASK; \ 431 int n = width & ~MASK; \ 432 if (n > 0) { \ 433 ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \ 434 } \ 435 memcpy(temp, y_buf + n, r * SBPP); \ 436 memcpy(temp + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \ 437 memcpy(temp + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \ 438 ANY_SIMD(temp, temp + 16, temp + 32, out, yuvconstants, MASK + 1); \ 439 memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP); \ 440 } 441 442 #ifdef HAS_I210TOAR30ROW_SSSE3 443 ANY31CT(I210ToAR30Row_Any_SSSE3, I210ToAR30Row_SSSE3, 1, 0, uint16_t, 2, 4, 7) 444 #endif 445 #ifdef HAS_I210TOARGBROW_SSSE3 446 ANY31CT(I210ToARGBRow_Any_SSSE3, I210ToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7) 447 #endif 448 #ifdef HAS_I210TOARGBROW_AVX2 449 ANY31CT(I210ToARGBRow_Any_AVX2, I210ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15) 450 #endif 451 #ifdef HAS_I210TOAR30ROW_AVX2 452 ANY31CT(I210ToAR30Row_Any_AVX2, I210ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15) 453 #endif 454 #ifdef HAS_I410TOAR30ROW_SSSE3 455 ANY31CT(I410ToAR30Row_Any_SSSE3, I410ToAR30Row_SSSE3, 0, 0, uint16_t, 2, 4, 7) 456 #endif 457 #ifdef HAS_I410TOARGBROW_SSSE3 458 ANY31CT(I410ToARGBRow_Any_SSSE3, I410ToARGBRow_SSSE3, 0, 0, uint16_t, 2, 4, 7) 459 #endif 460 #ifdef HAS_I410TOARGBROW_AVX2 461 ANY31CT(I410ToARGBRow_Any_AVX2, I410ToARGBRow_AVX2, 0, 0, uint16_t, 2, 4, 15) 462 #endif 463 #ifdef HAS_I410TOAR30ROW_AVX2 464 ANY31CT(I410ToAR30Row_Any_AVX2, I410ToAR30Row_AVX2, 0, 0, uint16_t, 2, 4, 15) 465 #endif 466 #ifdef HAS_I210TOARGBROW_MMI 467 ANY31CT(I210ToARGBRow_Any_MMI, I210ToARGBRow_MMI, 1, 0, uint16_t, 2, 4, 7) 468 #endif 469 #ifdef HAS_I212TOAR30ROW_SSSE3 470 ANY31CT(I212ToAR30Row_Any_SSSE3, I212ToAR30Row_SSSE3, 1, 0, uint16_t, 2, 4, 7) 471 #endif 472 #ifdef HAS_I212TOARGBROW_SSSE3 473 ANY31CT(I212ToARGBRow_Any_SSSE3, I212ToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7) 474 #endif 475 #ifdef HAS_I212TOARGBROW_AVX2 476 ANY31CT(I212ToARGBRow_Any_AVX2, I212ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15) 477 #endif 478 #ifdef HAS_I212TOAR30ROW_AVX2 479 ANY31CT(I212ToAR30Row_Any_AVX2, I212ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15) 480 #endif 481 #undef ANY31CT 482 483 // Any 3 planes to 1 plane with parameter 484 #define ANY31PT(NAMEANY, ANY_SIMD, STYPE, SBPP, DTYPE, BPP, MASK) \ 485 void NAMEANY(const STYPE* r_buf, const STYPE* g_buf, const STYPE* b_buf, \ 486 DTYPE* dst_ptr, int depth, int width) { \ 487 SIMD_ALIGNED(STYPE temp[16 * 3]); \ 488 SIMD_ALIGNED(DTYPE out[64]); \ 489 memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */ \ 490 int r = width & MASK; \ 491 int n = width & ~MASK; \ 492 if (n > 0) { \ 493 ANY_SIMD(r_buf, g_buf, b_buf, dst_ptr, depth, n); \ 494 } \ 495 memcpy(temp, r_buf + n, r * SBPP); \ 496 memcpy(temp + 16, g_buf + n, r * SBPP); \ 497 memcpy(temp + 32, b_buf + n, r * SBPP); \ 498 ANY_SIMD(temp, temp + 16, temp + 32, out, depth, MASK + 1); \ 499 memcpy((uint8_t*)dst_ptr + n * BPP, out, r * BPP); \ 500 } 501 502 #ifdef HAS_MERGEXR30ROW_AVX2 503 ANY31PT(MergeXR30Row_Any_AVX2, MergeXR30Row_AVX2, uint16_t, 2, uint8_t, 4, 15) 504 #endif 505 506 #ifdef HAS_MERGEXR30ROW_NEON 507 ANY31PT(MergeXR30Row_Any_NEON, MergeXR30Row_NEON, uint16_t, 2, uint8_t, 4, 3) 508 ANY31PT(MergeXR30Row_10_Any_NEON, 509 MergeXR30Row_10_NEON, 510 uint16_t, 511 2, 512 uint8_t, 513 4, 514 3) 515 #endif 516 517 #ifdef HAS_MERGEXR64ROW_AVX2 518 ANY31PT(MergeXR64Row_Any_AVX2, MergeXR64Row_AVX2, uint16_t, 2, uint16_t, 8, 15) 519 #endif 520 521 #ifdef HAS_MERGEXR64ROW_NEON 522 ANY31PT(MergeXR64Row_Any_NEON, MergeXR64Row_NEON, uint16_t, 2, uint16_t, 8, 7) 523 #endif 524 525 #ifdef HAS_MERGEXRGB16TO8ROW_AVX2 526 ANY31PT(MergeXRGB16To8Row_Any_AVX2, 527 MergeXRGB16To8Row_AVX2, 528 uint16_t, 529 2, 530 uint8_t, 531 4, 532 15) 533 #endif 534 535 #ifdef HAS_MERGEXRGB16TO8ROW_NEON 536 ANY31PT(MergeXRGB16To8Row_Any_NEON, 537 MergeXRGB16To8Row_NEON, 538 uint16_t, 539 2, 540 uint8_t, 541 4, 542 7) 543 #endif 544 545 #undef ANY31PT 546 547 // Any 2 planes to 1. 548 #define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \ 549 void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \ 550 int width) { \ 551 SIMD_ALIGNED(uint8_t temp[64 * 3]); \ 552 memset(temp, 0, 64 * 2); /* for msan */ \ 553 int r = width & MASK; \ 554 int n = width & ~MASK; \ 555 if (n > 0) { \ 556 ANY_SIMD(y_buf, uv_buf, dst_ptr, n); \ 557 } \ 558 memcpy(temp, y_buf + n * SBPP, r * SBPP); \ 559 memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \ 560 SS(r, UVSHIFT) * SBPP2); \ 561 ANY_SIMD(temp, temp + 64, temp + 128, MASK + 1); \ 562 memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ 563 } 564 565 // Merge functions. 566 #ifdef HAS_MERGEUVROW_SSE2 567 ANY21(MergeUVRow_Any_SSE2, MergeUVRow_SSE2, 0, 1, 1, 2, 15) 568 #endif 569 #ifdef HAS_MERGEUVROW_AVX2 570 ANY21(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, 0, 1, 1, 2, 31) 571 #endif 572 #ifdef HAS_MERGEUVROW_NEON 573 ANY21(MergeUVRow_Any_NEON, MergeUVRow_NEON, 0, 1, 1, 2, 15) 574 #endif 575 #ifdef HAS_MERGEUVROW_MSA 576 ANY21(MergeUVRow_Any_MSA, MergeUVRow_MSA, 0, 1, 1, 2, 15) 577 #endif 578 #ifdef HAS_MERGEUVROW_MMI 579 ANY21(MergeUVRow_Any_MMI, MergeUVRow_MMI, 0, 1, 1, 2, 7) 580 #endif 581 #ifdef HAS_NV21TOYUV24ROW_NEON 582 ANY21(NV21ToYUV24Row_Any_NEON, NV21ToYUV24Row_NEON, 1, 1, 2, 3, 15) 583 #endif 584 #ifdef HAS_NV21TOYUV24ROW_AVX2 585 ANY21(NV21ToYUV24Row_Any_AVX2, NV21ToYUV24Row_AVX2, 1, 1, 2, 3, 31) 586 #endif 587 // Math functions. 588 #ifdef HAS_ARGBMULTIPLYROW_SSE2 589 ANY21(ARGBMultiplyRow_Any_SSE2, ARGBMultiplyRow_SSE2, 0, 4, 4, 4, 3) 590 #endif 591 #ifdef HAS_ARGBADDROW_SSE2 592 ANY21(ARGBAddRow_Any_SSE2, ARGBAddRow_SSE2, 0, 4, 4, 4, 3) 593 #endif 594 #ifdef HAS_ARGBSUBTRACTROW_SSE2 595 ANY21(ARGBSubtractRow_Any_SSE2, ARGBSubtractRow_SSE2, 0, 4, 4, 4, 3) 596 #endif 597 #ifdef HAS_ARGBMULTIPLYROW_AVX2 598 ANY21(ARGBMultiplyRow_Any_AVX2, ARGBMultiplyRow_AVX2, 0, 4, 4, 4, 7) 599 #endif 600 #ifdef HAS_ARGBADDROW_AVX2 601 ANY21(ARGBAddRow_Any_AVX2, ARGBAddRow_AVX2, 0, 4, 4, 4, 7) 602 #endif 603 #ifdef HAS_ARGBSUBTRACTROW_AVX2 604 ANY21(ARGBSubtractRow_Any_AVX2, ARGBSubtractRow_AVX2, 0, 4, 4, 4, 7) 605 #endif 606 #ifdef HAS_ARGBMULTIPLYROW_NEON 607 ANY21(ARGBMultiplyRow_Any_NEON, ARGBMultiplyRow_NEON, 0, 4, 4, 4, 7) 608 #endif 609 #ifdef HAS_ARGBADDROW_NEON 610 ANY21(ARGBAddRow_Any_NEON, ARGBAddRow_NEON, 0, 4, 4, 4, 7) 611 #endif 612 #ifdef HAS_ARGBSUBTRACTROW_NEON 613 ANY21(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, 0, 4, 4, 4, 7) 614 #endif 615 #ifdef HAS_ARGBMULTIPLYROW_MSA 616 ANY21(ARGBMultiplyRow_Any_MSA, ARGBMultiplyRow_MSA, 0, 4, 4, 4, 3) 617 #endif 618 #ifdef HAS_ARGBMULTIPLYROW_MMI 619 ANY21(ARGBMultiplyRow_Any_MMI, ARGBMultiplyRow_MMI, 0, 4, 4, 4, 1) 620 #endif 621 #ifdef HAS_ARGBADDROW_MSA 622 ANY21(ARGBAddRow_Any_MSA, ARGBAddRow_MSA, 0, 4, 4, 4, 7) 623 #endif 624 #ifdef HAS_ARGBADDROW_MMI 625 ANY21(ARGBAddRow_Any_MMI, ARGBAddRow_MMI, 0, 4, 4, 4, 1) 626 #endif 627 #ifdef HAS_ARGBSUBTRACTROW_MSA 628 ANY21(ARGBSubtractRow_Any_MSA, ARGBSubtractRow_MSA, 0, 4, 4, 4, 7) 629 #endif 630 #ifdef HAS_ARGBSUBTRACTROW_MMI 631 ANY21(ARGBSubtractRow_Any_MMI, ARGBSubtractRow_MMI, 0, 4, 4, 4, 1) 632 #endif 633 #ifdef HAS_SOBELROW_SSE2 634 ANY21(SobelRow_Any_SSE2, SobelRow_SSE2, 0, 1, 1, 4, 15) 635 #endif 636 #ifdef HAS_SOBELROW_NEON 637 ANY21(SobelRow_Any_NEON, SobelRow_NEON, 0, 1, 1, 4, 7) 638 #endif 639 #ifdef HAS_SOBELROW_MSA 640 ANY21(SobelRow_Any_MSA, SobelRow_MSA, 0, 1, 1, 4, 15) 641 #endif 642 #ifdef HAS_SOBELROW_MMI 643 ANY21(SobelRow_Any_MMI, SobelRow_MMI, 0, 1, 1, 4, 7) 644 #endif 645 #ifdef HAS_SOBELTOPLANEROW_SSE2 646 ANY21(SobelToPlaneRow_Any_SSE2, SobelToPlaneRow_SSE2, 0, 1, 1, 1, 15) 647 #endif 648 #ifdef HAS_SOBELTOPLANEROW_NEON 649 ANY21(SobelToPlaneRow_Any_NEON, SobelToPlaneRow_NEON, 0, 1, 1, 1, 15) 650 #endif 651 #ifdef HAS_SOBELTOPLANEROW_MSA 652 ANY21(SobelToPlaneRow_Any_MSA, SobelToPlaneRow_MSA, 0, 1, 1, 1, 31) 653 #endif 654 #ifdef HAS_SOBELTOPLANEROW_MMI 655 ANY21(SobelToPlaneRow_Any_MMI, SobelToPlaneRow_MMI, 0, 1, 1, 1, 7) 656 #endif 657 #ifdef HAS_SOBELXYROW_SSE2 658 ANY21(SobelXYRow_Any_SSE2, SobelXYRow_SSE2, 0, 1, 1, 4, 15) 659 #endif 660 #ifdef HAS_SOBELXYROW_NEON 661 ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7) 662 #endif 663 #ifdef HAS_SOBELXYROW_MSA 664 ANY21(SobelXYRow_Any_MSA, SobelXYRow_MSA, 0, 1, 1, 4, 15) 665 #endif 666 #ifdef HAS_SOBELXYROW_MMI 667 ANY21(SobelXYRow_Any_MMI, SobelXYRow_MMI, 0, 1, 1, 4, 7) 668 #endif 669 #undef ANY21 670 671 // Any 2 planes to 1 with yuvconstants 672 #define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \ 673 void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \ 674 const struct YuvConstants* yuvconstants, int width) { \ 675 SIMD_ALIGNED(uint8_t temp[128 * 3]); \ 676 memset(temp, 0, 128 * 2); /* for msan */ \ 677 int r = width & MASK; \ 678 int n = width & ~MASK; \ 679 if (n > 0) { \ 680 ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \ 681 } \ 682 memcpy(temp, y_buf + n * SBPP, r * SBPP); \ 683 memcpy(temp + 128, uv_buf + (n >> UVSHIFT) * SBPP2, \ 684 SS(r, UVSHIFT) * SBPP2); \ 685 ANY_SIMD(temp, temp + 128, temp + 256, yuvconstants, MASK + 1); \ 686 memcpy(dst_ptr + n * BPP, temp + 256, r * BPP); \ 687 } 688 689 // Biplanar to RGB. 690 #ifdef HAS_NV12TOARGBROW_SSSE3 691 ANY21C(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7) 692 #endif 693 #ifdef HAS_NV12TOARGBROW_AVX2 694 ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15) 695 #endif 696 #ifdef HAS_NV12TOARGBROW_NEON 697 ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7) 698 #endif 699 #ifdef HAS_NV12TOARGBROW_MSA 700 ANY21C(NV12ToARGBRow_Any_MSA, NV12ToARGBRow_MSA, 1, 1, 2, 4, 7) 701 #endif 702 #ifdef HAS_NV12TOARGBROW_MMI 703 ANY21C(NV12ToARGBRow_Any_MMI, NV12ToARGBRow_MMI, 1, 1, 2, 4, 7) 704 #endif 705 #ifdef HAS_NV21TOARGBROW_SSSE3 706 ANY21C(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7) 707 #endif 708 #ifdef HAS_NV21TOARGBROW_AVX2 709 ANY21C(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, 1, 1, 2, 4, 15) 710 #endif 711 #ifdef HAS_NV21TOARGBROW_NEON 712 ANY21C(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7) 713 #endif 714 #ifdef HAS_NV21TOARGBROW_MSA 715 ANY21C(NV21ToARGBRow_Any_MSA, NV21ToARGBRow_MSA, 1, 1, 2, 4, 7) 716 #endif 717 #ifdef HAS_NV21TOARGBROW_MMI 718 ANY21C(NV21ToARGBRow_Any_MMI, NV21ToARGBRow_MMI, 1, 1, 2, 4, 7) 719 #endif 720 #ifdef HAS_NV12TORGB24ROW_NEON 721 ANY21C(NV12ToRGB24Row_Any_NEON, NV12ToRGB24Row_NEON, 1, 1, 2, 3, 7) 722 #endif 723 #ifdef HAS_NV21TORGB24ROW_NEON 724 ANY21C(NV21ToRGB24Row_Any_NEON, NV21ToRGB24Row_NEON, 1, 1, 2, 3, 7) 725 #endif 726 #ifdef HAS_NV12TORGB24ROW_SSSE3 727 ANY21C(NV12ToRGB24Row_Any_SSSE3, NV12ToRGB24Row_SSSE3, 1, 1, 2, 3, 15) 728 #endif 729 #ifdef HAS_NV12TORGB24ROW_MMI 730 ANY21C(NV12ToRGB24Row_Any_MMI, NV12ToRGB24Row_MMI, 1, 1, 2, 3, 7) 731 #endif 732 #ifdef HAS_NV21TORGB24ROW_SSSE3 733 ANY21C(NV21ToRGB24Row_Any_SSSE3, NV21ToRGB24Row_SSSE3, 1, 1, 2, 3, 15) 734 #endif 735 #ifdef HAS_NV12TORGB24ROW_AVX2 736 ANY21C(NV12ToRGB24Row_Any_AVX2, NV12ToRGB24Row_AVX2, 1, 1, 2, 3, 31) 737 #endif 738 #ifdef HAS_NV21TORGB24ROW_AVX2 739 ANY21C(NV21ToRGB24Row_Any_AVX2, NV21ToRGB24Row_AVX2, 1, 1, 2, 3, 31) 740 #endif 741 #ifdef HAS_NV21TORGB24ROW_MMI 742 ANY21C(NV21ToRGB24Row_Any_MMI, NV21ToRGB24Row_MMI, 1, 1, 2, 3, 7) 743 #endif 744 #ifdef HAS_NV12TORGB565ROW_SSSE3 745 ANY21C(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7) 746 #endif 747 #ifdef HAS_NV12TORGB565ROW_AVX2 748 ANY21C(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15) 749 #endif 750 #ifdef HAS_NV12TORGB565ROW_NEON 751 ANY21C(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7) 752 #endif 753 #ifdef HAS_NV12TORGB565ROW_MSA 754 ANY21C(NV12ToRGB565Row_Any_MSA, NV12ToRGB565Row_MSA, 1, 1, 2, 2, 7) 755 #endif 756 #ifdef HAS_NV12TORGB565ROW_MMI 757 ANY21C(NV12ToRGB565Row_Any_MMI, NV12ToRGB565Row_MMI, 1, 1, 2, 2, 7) 758 #endif 759 #undef ANY21C 760 761 // Any 2 planes of 16 bit to 1 with yuvconstants 762 #define ANY21CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \ 763 void NAMEANY(const T* y_buf, const T* uv_buf, uint8_t* dst_ptr, \ 764 const struct YuvConstants* yuvconstants, int width) { \ 765 SIMD_ALIGNED(T temp[16 * 3]); \ 766 SIMD_ALIGNED(uint8_t out[64]); \ 767 memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */ \ 768 int r = width & MASK; \ 769 int n = width & ~MASK; \ 770 if (n > 0) { \ 771 ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \ 772 } \ 773 memcpy(temp, y_buf + n, r * SBPP); \ 774 memcpy(temp + 16, uv_buf + 2 * (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP * 2); \ 775 ANY_SIMD(temp, temp + 16, out, yuvconstants, MASK + 1); \ 776 memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP); \ 777 } 778 779 #ifdef HAS_P210TOAR30ROW_SSSE3 780 ANY21CT(P210ToAR30Row_Any_SSSE3, P210ToAR30Row_SSSE3, 1, 0, uint16_t, 2, 4, 7) 781 #endif 782 #ifdef HAS_P210TOARGBROW_SSSE3 783 ANY21CT(P210ToARGBRow_Any_SSSE3, P210ToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7) 784 #endif 785 #ifdef HAS_P210TOARGBROW_AVX2 786 ANY21CT(P210ToARGBRow_Any_AVX2, P210ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15) 787 #endif 788 #ifdef HAS_P210TOAR30ROW_AVX2 789 ANY21CT(P210ToAR30Row_Any_AVX2, P210ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15) 790 #endif 791 #ifdef HAS_P410TOAR30ROW_SSSE3 792 ANY21CT(P410ToAR30Row_Any_SSSE3, P410ToAR30Row_SSSE3, 0, 0, uint16_t, 2, 4, 7) 793 #endif 794 #ifdef HAS_P410TOARGBROW_SSSE3 795 ANY21CT(P410ToARGBRow_Any_SSSE3, P410ToARGBRow_SSSE3, 0, 0, uint16_t, 2, 4, 7) 796 #endif 797 #ifdef HAS_P410TOARGBROW_AVX2 798 ANY21CT(P410ToARGBRow_Any_AVX2, P410ToARGBRow_AVX2, 0, 0, uint16_t, 2, 4, 15) 799 #endif 800 #ifdef HAS_P410TOAR30ROW_AVX2 801 ANY21CT(P410ToAR30Row_Any_AVX2, P410ToAR30Row_AVX2, 0, 0, uint16_t, 2, 4, 15) 802 #endif 803 804 #undef ANY21CT 805 806 // Any 2 16 bit planes with parameter to 1 807 #define ANY21PT(NAMEANY, ANY_SIMD, T, BPP, MASK) \ 808 void NAMEANY(const T* src_u, const T* src_v, T* dst_uv, int depth, \ 809 int width) { \ 810 SIMD_ALIGNED(T temp[16 * 4]); \ 811 memset(temp, 0, 16 * 4 * BPP); /* for msan */ \ 812 int r = width & MASK; \ 813 int n = width & ~MASK; \ 814 if (n > 0) { \ 815 ANY_SIMD(src_u, src_v, dst_uv, depth, n); \ 816 } \ 817 memcpy(temp, src_u + n, r * BPP); \ 818 memcpy(temp + 16, src_v + n, r * BPP); \ 819 ANY_SIMD(temp, temp + 16, temp + 32, depth, MASK + 1); \ 820 memcpy(dst_uv + n * 2, temp + 32, r * BPP * 2); \ 821 } 822 823 #ifdef HAS_MERGEUVROW_16_AVX2 824 ANY21PT(MergeUVRow_16_Any_AVX2, MergeUVRow_16_AVX2, uint16_t, 2, 15) 825 #endif 826 #ifdef HAS_MERGEUVROW_16_NEON 827 ANY21PT(MergeUVRow_16_Any_NEON, MergeUVRow_16_NEON, uint16_t, 2, 7) 828 #endif 829 830 #undef ANY21CT 831 832 // Any 1 to 1. 833 #define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ 834 void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \ 835 SIMD_ALIGNED(uint8_t temp[128 * 2]); \ 836 memset(temp, 0, 128); /* for YUY2 and msan */ \ 837 int r = width & MASK; \ 838 int n = width & ~MASK; \ 839 if (n > 0) { \ 840 ANY_SIMD(src_ptr, dst_ptr, n); \ 841 } \ 842 memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ 843 ANY_SIMD(temp, temp + 128, MASK + 1); \ 844 memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ 845 } 846 847 #ifdef HAS_COPYROW_AVX 848 ANY11(CopyRow_Any_AVX, CopyRow_AVX, 0, 1, 1, 63) 849 #endif 850 #ifdef HAS_COPYROW_SSE2 851 ANY11(CopyRow_Any_SSE2, CopyRow_SSE2, 0, 1, 1, 31) 852 #endif 853 #ifdef HAS_COPYROW_NEON 854 ANY11(CopyRow_Any_NEON, CopyRow_NEON, 0, 1, 1, 31) 855 #endif 856 #if defined(HAS_ARGBTORGB24ROW_SSSE3) 857 ANY11(ARGBToRGB24Row_Any_SSSE3, ARGBToRGB24Row_SSSE3, 0, 4, 3, 15) 858 ANY11(ARGBToRAWRow_Any_SSSE3, ARGBToRAWRow_SSSE3, 0, 4, 3, 15) 859 ANY11(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, 0, 4, 2, 3) 860 ANY11(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, 0, 4, 2, 3) 861 ANY11(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, 0, 4, 2, 3) 862 #endif 863 #if defined(HAS_ARGBTORGB24ROW_AVX2) 864 ANY11(ARGBToRGB24Row_Any_AVX2, ARGBToRGB24Row_AVX2, 0, 4, 3, 31) 865 #endif 866 #if defined(HAS_ARGBTORGB24ROW_AVX512VBMI) 867 ANY11(ARGBToRGB24Row_Any_AVX512VBMI, ARGBToRGB24Row_AVX512VBMI, 0, 4, 3, 31) 868 #endif 869 #if defined(HAS_ARGBTORAWROW_AVX2) 870 ANY11(ARGBToRAWRow_Any_AVX2, ARGBToRAWRow_AVX2, 0, 4, 3, 31) 871 #endif 872 #if defined(HAS_ARGBTORGB565ROW_AVX2) 873 ANY11(ARGBToRGB565Row_Any_AVX2, ARGBToRGB565Row_AVX2, 0, 4, 2, 7) 874 #endif 875 #if defined(HAS_ARGBTOARGB4444ROW_AVX2) 876 ANY11(ARGBToARGB1555Row_Any_AVX2, ARGBToARGB1555Row_AVX2, 0, 4, 2, 7) 877 ANY11(ARGBToARGB4444Row_Any_AVX2, ARGBToARGB4444Row_AVX2, 0, 4, 2, 7) 878 #endif 879 #if defined(HAS_ABGRTOAR30ROW_SSSE3) 880 ANY11(ABGRToAR30Row_Any_SSSE3, ABGRToAR30Row_SSSE3, 0, 4, 4, 3) 881 #endif 882 #if defined(HAS_ARGBTOAR30ROW_SSSE3) 883 ANY11(ARGBToAR30Row_Any_SSSE3, ARGBToAR30Row_SSSE3, 0, 4, 4, 3) 884 #endif 885 #if defined(HAS_ABGRTOAR30ROW_AVX2) 886 ANY11(ABGRToAR30Row_Any_AVX2, ABGRToAR30Row_AVX2, 0, 4, 4, 7) 887 #endif 888 #if defined(HAS_ARGBTOAR30ROW_AVX2) 889 ANY11(ARGBToAR30Row_Any_AVX2, ARGBToAR30Row_AVX2, 0, 4, 4, 7) 890 #endif 891 #if defined(HAS_J400TOARGBROW_SSE2) 892 ANY11(J400ToARGBRow_Any_SSE2, J400ToARGBRow_SSE2, 0, 1, 4, 7) 893 #endif 894 #if defined(HAS_J400TOARGBROW_AVX2) 895 ANY11(J400ToARGBRow_Any_AVX2, J400ToARGBRow_AVX2, 0, 1, 4, 15) 896 #endif 897 #if defined(HAS_RGB24TOARGBROW_SSSE3) 898 ANY11(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 0, 3, 4, 15) 899 ANY11(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 0, 3, 4, 15) 900 ANY11(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 0, 2, 4, 7) 901 ANY11(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, 0, 2, 4, 7) 902 ANY11(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, 0, 2, 4, 7) 903 #endif 904 #if defined(HAS_RAWTORGBAROW_SSSE3) 905 ANY11(RAWToRGBARow_Any_SSSE3, RAWToRGBARow_SSSE3, 0, 3, 4, 15) 906 #endif 907 #if defined(HAS_RAWTORGB24ROW_SSSE3) 908 ANY11(RAWToRGB24Row_Any_SSSE3, RAWToRGB24Row_SSSE3, 0, 3, 3, 7) 909 #endif 910 #if defined(HAS_RGB565TOARGBROW_AVX2) 911 ANY11(RGB565ToARGBRow_Any_AVX2, RGB565ToARGBRow_AVX2, 0, 2, 4, 15) 912 #endif 913 #if defined(HAS_ARGB1555TOARGBROW_AVX2) 914 ANY11(ARGB1555ToARGBRow_Any_AVX2, ARGB1555ToARGBRow_AVX2, 0, 2, 4, 15) 915 #endif 916 #if defined(HAS_ARGB4444TOARGBROW_AVX2) 917 ANY11(ARGB4444ToARGBRow_Any_AVX2, ARGB4444ToARGBRow_AVX2, 0, 2, 4, 15) 918 #endif 919 #if defined(HAS_ARGBTORGB24ROW_NEON) 920 ANY11(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, 0, 4, 3, 7) 921 ANY11(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, 0, 4, 3, 7) 922 ANY11(ARGBToRGB565Row_Any_NEON, ARGBToRGB565Row_NEON, 0, 4, 2, 7) 923 ANY11(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, 0, 4, 2, 7) 924 ANY11(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, 0, 4, 2, 7) 925 ANY11(J400ToARGBRow_Any_NEON, J400ToARGBRow_NEON, 0, 1, 4, 7) 926 #endif 927 #if defined(HAS_ARGBTORGB24ROW_MSA) 928 ANY11(ARGBToRGB24Row_Any_MSA, ARGBToRGB24Row_MSA, 0, 4, 3, 15) 929 ANY11(ARGBToRAWRow_Any_MSA, ARGBToRAWRow_MSA, 0, 4, 3, 15) 930 ANY11(ARGBToRGB565Row_Any_MSA, ARGBToRGB565Row_MSA, 0, 4, 2, 7) 931 ANY11(ARGBToARGB1555Row_Any_MSA, ARGBToARGB1555Row_MSA, 0, 4, 2, 7) 932 ANY11(ARGBToARGB4444Row_Any_MSA, ARGBToARGB4444Row_MSA, 0, 4, 2, 7) 933 ANY11(J400ToARGBRow_Any_MSA, J400ToARGBRow_MSA, 0, 1, 4, 15) 934 #endif 935 #if defined(HAS_ARGBTORGB24ROW_MMI) 936 ANY11(ARGBToRGB24Row_Any_MMI, ARGBToRGB24Row_MMI, 0, 4, 3, 3) 937 ANY11(ARGBToRAWRow_Any_MMI, ARGBToRAWRow_MMI, 0, 4, 3, 3) 938 ANY11(ARGBToRGB565Row_Any_MMI, ARGBToRGB565Row_MMI, 0, 4, 2, 3) 939 ANY11(ARGBToARGB1555Row_Any_MMI, ARGBToARGB1555Row_MMI, 0, 4, 2, 3) 940 ANY11(ARGBToARGB4444Row_Any_MMI, ARGBToARGB4444Row_MMI, 0, 4, 2, 3) 941 ANY11(J400ToARGBRow_Any_MMI, J400ToARGBRow_MMI, 0, 1, 4, 3) 942 #endif 943 #if defined(HAS_RAWTORGB24ROW_NEON) 944 ANY11(RAWToRGB24Row_Any_NEON, RAWToRGB24Row_NEON, 0, 3, 3, 7) 945 #endif 946 #if defined(HAS_RAWTORGB24ROW_MSA) 947 ANY11(RAWToRGB24Row_Any_MSA, RAWToRGB24Row_MSA, 0, 3, 3, 15) 948 #endif 949 #if defined(HAS_RAWTORGB24ROW_MMI) 950 ANY11(RAWToRGB24Row_Any_MMI, RAWToRGB24Row_MMI, 0, 3, 3, 3) 951 #endif 952 #ifdef HAS_ARGBTOYROW_AVX2 953 ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31) 954 #endif 955 #ifdef HAS_ABGRTOYROW_AVX2 956 ANY11(ABGRToYRow_Any_AVX2, ABGRToYRow_AVX2, 0, 4, 1, 31) 957 #endif 958 #ifdef HAS_ARGBTOYJROW_AVX2 959 ANY11(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, 0, 4, 1, 31) 960 #endif 961 #ifdef HAS_RGBATOYJROW_AVX2 962 ANY11(RGBAToYJRow_Any_AVX2, RGBAToYJRow_AVX2, 0, 4, 1, 31) 963 #endif 964 #ifdef HAS_UYVYTOYROW_AVX2 965 ANY11(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, 0, 2, 1, 31) 966 #endif 967 #ifdef HAS_YUY2TOYROW_AVX2 968 ANY11(YUY2ToYRow_Any_AVX2, YUY2ToYRow_AVX2, 1, 4, 1, 31) 969 #endif 970 #ifdef HAS_ARGBTOYROW_SSSE3 971 ANY11(ARGBToYRow_Any_SSSE3, ARGBToYRow_SSSE3, 0, 4, 1, 15) 972 #endif 973 #ifdef HAS_BGRATOYROW_SSSE3 974 ANY11(BGRAToYRow_Any_SSSE3, BGRAToYRow_SSSE3, 0, 4, 1, 15) 975 ANY11(ABGRToYRow_Any_SSSE3, ABGRToYRow_SSSE3, 0, 4, 1, 15) 976 ANY11(RGBAToYRow_Any_SSSE3, RGBAToYRow_SSSE3, 0, 4, 1, 15) 977 ANY11(YUY2ToYRow_Any_SSE2, YUY2ToYRow_SSE2, 1, 4, 1, 15) 978 ANY11(UYVYToYRow_Any_SSE2, UYVYToYRow_SSE2, 1, 4, 1, 15) 979 #endif 980 #ifdef HAS_ARGBTOYJROW_SSSE3 981 ANY11(ARGBToYJRow_Any_SSSE3, ARGBToYJRow_SSSE3, 0, 4, 1, 15) 982 #endif 983 #ifdef HAS_RGBATOYJROW_SSSE3 984 ANY11(RGBAToYJRow_Any_SSSE3, RGBAToYJRow_SSSE3, 0, 4, 1, 15) 985 #endif 986 #ifdef HAS_ARGBTOYROW_NEON 987 ANY11(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 0, 4, 1, 7) 988 #endif 989 #ifdef HAS_ARGBTOYROW_MSA 990 ANY11(ARGBToYRow_Any_MSA, ARGBToYRow_MSA, 0, 4, 1, 15) 991 #endif 992 #ifdef HAS_ARGBTOYROW_MMI 993 ANY11(ARGBToYRow_Any_MMI, ARGBToYRow_MMI, 0, 4, 1, 7) 994 #endif 995 #ifdef HAS_ARGBTOYJROW_NEON 996 ANY11(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, 0, 4, 1, 7) 997 #endif 998 #ifdef HAS_RGBATOYJROW_NEON 999 ANY11(RGBAToYJRow_Any_NEON, RGBAToYJRow_NEON, 0, 4, 1, 7) 1000 #endif 1001 #ifdef HAS_ARGBTOYJROW_MSA 1002 ANY11(ARGBToYJRow_Any_MSA, ARGBToYJRow_MSA, 0, 4, 1, 15) 1003 #endif 1004 #ifdef HAS_ARGBTOYJROW_MMI 1005 ANY11(ARGBToYJRow_Any_MMI, ARGBToYJRow_MMI, 0, 4, 1, 7) 1006 #endif 1007 #ifdef HAS_BGRATOYROW_NEON 1008 ANY11(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 0, 4, 1, 7) 1009 #endif 1010 #ifdef HAS_BGRATOYROW_MSA 1011 ANY11(BGRAToYRow_Any_MSA, BGRAToYRow_MSA, 0, 4, 1, 15) 1012 #endif 1013 #ifdef HAS_BGRATOYROW_MMI 1014 ANY11(BGRAToYRow_Any_MMI, BGRAToYRow_MMI, 0, 4, 1, 7) 1015 #endif 1016 #ifdef HAS_ABGRTOYROW_NEON 1017 ANY11(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 0, 4, 1, 7) 1018 #endif 1019 #ifdef HAS_ABGRTOYROW_MSA 1020 ANY11(ABGRToYRow_Any_MSA, ABGRToYRow_MSA, 0, 4, 1, 7) 1021 #endif 1022 #ifdef HAS_ABGRTOYROW_MMI 1023 ANY11(ABGRToYRow_Any_MMI, ABGRToYRow_MMI, 0, 4, 1, 7) 1024 #endif 1025 #ifdef HAS_RGBATOYROW_NEON 1026 ANY11(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 0, 4, 1, 7) 1027 #endif 1028 #ifdef HAS_RGBATOYROW_MSA 1029 ANY11(RGBAToYRow_Any_MSA, RGBAToYRow_MSA, 0, 4, 1, 15) 1030 #endif 1031 #ifdef HAS_RGBATOYROW_MMI 1032 ANY11(RGBAToYRow_Any_MMI, RGBAToYRow_MMI, 0, 4, 1, 7) 1033 #endif 1034 #ifdef HAS_RGB24TOYROW_NEON 1035 ANY11(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 0, 3, 1, 7) 1036 #endif 1037 #ifdef HAS_RGB24TOYJROW_AVX2 1038 ANY11(RGB24ToYJRow_Any_AVX2, RGB24ToYJRow_AVX2, 0, 3, 1, 31) 1039 #endif 1040 #ifdef HAS_RGB24TOYJROW_SSSE3 1041 ANY11(RGB24ToYJRow_Any_SSSE3, RGB24ToYJRow_SSSE3, 0, 3, 1, 15) 1042 #endif 1043 #ifdef HAS_RGB24TOYJROW_NEON 1044 ANY11(RGB24ToYJRow_Any_NEON, RGB24ToYJRow_NEON, 0, 3, 1, 7) 1045 #endif 1046 #ifdef HAS_RGB24TOYROW_MSA 1047 ANY11(RGB24ToYRow_Any_MSA, RGB24ToYRow_MSA, 0, 3, 1, 15) 1048 #endif 1049 #ifdef HAS_RGB24TOYROW_MMI 1050 ANY11(RGB24ToYRow_Any_MMI, RGB24ToYRow_MMI, 0, 3, 1, 7) 1051 #endif 1052 #ifdef HAS_RAWTOYROW_NEON 1053 ANY11(RAWToYRow_Any_NEON, RAWToYRow_NEON, 0, 3, 1, 7) 1054 #endif 1055 #ifdef HAS_RAWTOYJROW_AVX2 1056 ANY11(RAWToYJRow_Any_AVX2, RAWToYJRow_AVX2, 0, 3, 1, 31) 1057 #endif 1058 #ifdef HAS_RAWTOYJROW_SSSE3 1059 ANY11(RAWToYJRow_Any_SSSE3, RAWToYJRow_SSSE3, 0, 3, 1, 15) 1060 #endif 1061 #ifdef HAS_RAWTOYJROW_NEON 1062 ANY11(RAWToYJRow_Any_NEON, RAWToYJRow_NEON, 0, 3, 1, 7) 1063 #endif 1064 #ifdef HAS_RAWTOYROW_MSA 1065 ANY11(RAWToYRow_Any_MSA, RAWToYRow_MSA, 0, 3, 1, 15) 1066 #endif 1067 #ifdef HAS_RAWTOYROW_MMI 1068 ANY11(RAWToYRow_Any_MMI, RAWToYRow_MMI, 0, 3, 1, 7) 1069 #endif 1070 #ifdef HAS_RGB565TOYROW_NEON 1071 ANY11(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 0, 2, 1, 7) 1072 #endif 1073 #ifdef HAS_RGB565TOYROW_MSA 1074 ANY11(RGB565ToYRow_Any_MSA, RGB565ToYRow_MSA, 0, 2, 1, 15) 1075 #endif 1076 #ifdef HAS_RGB565TOYROW_MMI 1077 ANY11(RGB565ToYRow_Any_MMI, RGB565ToYRow_MMI, 0, 2, 1, 7) 1078 #endif 1079 #ifdef HAS_ARGB1555TOYROW_NEON 1080 ANY11(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 0, 2, 1, 7) 1081 #endif 1082 #ifdef HAS_ARGB1555TOYROW_MSA 1083 ANY11(ARGB1555ToYRow_Any_MSA, ARGB1555ToYRow_MSA, 0, 2, 1, 15) 1084 #endif 1085 #ifdef HAS_ARGB1555TOYROW_MMI 1086 ANY11(ARGB1555ToYRow_Any_MMI, ARGB1555ToYRow_MMI, 0, 2, 1, 7) 1087 #endif 1088 #ifdef HAS_ARGB4444TOYROW_NEON 1089 ANY11(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 0, 2, 1, 7) 1090 #endif 1091 #ifdef HAS_ARGB4444TOYROW_MMI 1092 ANY11(ARGB4444ToYRow_Any_MMI, ARGB4444ToYRow_MMI, 0, 2, 1, 7) 1093 #endif 1094 #ifdef HAS_YUY2TOYROW_NEON 1095 ANY11(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 1, 4, 1, 15) 1096 #endif 1097 #ifdef HAS_UYVYTOYROW_NEON 1098 ANY11(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 1, 4, 1, 15) 1099 #endif 1100 #ifdef HAS_YUY2TOYROW_MSA 1101 ANY11(YUY2ToYRow_Any_MSA, YUY2ToYRow_MSA, 1, 4, 1, 31) 1102 #endif 1103 #ifdef HAS_YUY2TOYROW_MMI 1104 ANY11(YUY2ToYRow_Any_MMI, YUY2ToYRow_MMI, 1, 4, 1, 7) 1105 #endif 1106 #ifdef HAS_UYVYTOYROW_MSA 1107 ANY11(UYVYToYRow_Any_MSA, UYVYToYRow_MSA, 1, 4, 1, 31) 1108 #endif 1109 #ifdef HAS_UYVYTOYROW_MMI 1110 ANY11(UYVYToYRow_Any_MMI, UYVYToYRow_MMI, 1, 4, 1, 15) 1111 #endif 1112 #ifdef HAS_AYUVTOYROW_NEON 1113 ANY11(AYUVToYRow_Any_NEON, AYUVToYRow_NEON, 0, 4, 1, 15) 1114 #endif 1115 #ifdef HAS_SWAPUVROW_SSSE3 1116 ANY11(SwapUVRow_Any_SSSE3, SwapUVRow_SSSE3, 0, 2, 2, 15) 1117 #endif 1118 #ifdef HAS_SWAPUVROW_AVX2 1119 ANY11(SwapUVRow_Any_AVX2, SwapUVRow_AVX2, 0, 2, 2, 31) 1120 #endif 1121 #ifdef HAS_SWAPUVROW_NEON 1122 ANY11(SwapUVRow_Any_NEON, SwapUVRow_NEON, 0, 2, 2, 15) 1123 #endif 1124 #ifdef HAS_RGB24TOARGBROW_NEON 1125 ANY11(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 0, 3, 4, 7) 1126 #endif 1127 #ifdef HAS_RGB24TOARGBROW_MSA 1128 ANY11(RGB24ToARGBRow_Any_MSA, RGB24ToARGBRow_MSA, 0, 3, 4, 15) 1129 #endif 1130 #ifdef HAS_RGB24TOARGBROW_MMI 1131 ANY11(RGB24ToARGBRow_Any_MMI, RGB24ToARGBRow_MMI, 0, 3, 4, 3) 1132 #endif 1133 #ifdef HAS_RAWTOARGBROW_NEON 1134 ANY11(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 0, 3, 4, 7) 1135 #endif 1136 #ifdef HAS_RAWTORGBAROW_NEON 1137 ANY11(RAWToRGBARow_Any_NEON, RAWToRGBARow_NEON, 0, 3, 4, 7) 1138 #endif 1139 #ifdef HAS_RAWTOARGBROW_MSA 1140 ANY11(RAWToARGBRow_Any_MSA, RAWToARGBRow_MSA, 0, 3, 4, 15) 1141 #endif 1142 #ifdef HAS_RAWTOARGBROW_MMI 1143 ANY11(RAWToARGBRow_Any_MMI, RAWToARGBRow_MMI, 0, 3, 4, 3) 1144 #endif 1145 #ifdef HAS_RGB565TOARGBROW_NEON 1146 ANY11(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 0, 2, 4, 7) 1147 #endif 1148 #ifdef HAS_RGB565TOARGBROW_MSA 1149 ANY11(RGB565ToARGBRow_Any_MSA, RGB565ToARGBRow_MSA, 0, 2, 4, 15) 1150 #endif 1151 #ifdef HAS_RGB565TOARGBROW_MMI 1152 ANY11(RGB565ToARGBRow_Any_MMI, RGB565ToARGBRow_MMI, 0, 2, 4, 3) 1153 #endif 1154 #ifdef HAS_ARGB1555TOARGBROW_NEON 1155 ANY11(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 0, 2, 4, 7) 1156 #endif 1157 #ifdef HAS_ARGB1555TOARGBROW_MSA 1158 ANY11(ARGB1555ToARGBRow_Any_MSA, ARGB1555ToARGBRow_MSA, 0, 2, 4, 15) 1159 #endif 1160 #ifdef HAS_ARGB1555TOARGBROW_MMI 1161 ANY11(ARGB1555ToARGBRow_Any_MMI, ARGB1555ToARGBRow_MMI, 0, 2, 4, 3) 1162 #endif 1163 #ifdef HAS_ARGB4444TOARGBROW_NEON 1164 ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7) 1165 #endif 1166 #ifdef HAS_ARGB4444TOARGBROW_MSA 1167 ANY11(ARGB4444ToARGBRow_Any_MSA, ARGB4444ToARGBRow_MSA, 0, 2, 4, 15) 1168 #endif 1169 #ifdef HAS_ARGB4444TOARGBROW_MMI 1170 ANY11(ARGB4444ToARGBRow_Any_MMI, ARGB4444ToARGBRow_MMI, 0, 2, 4, 3) 1171 #endif 1172 #ifdef HAS_ARGBATTENUATEROW_SSSE3 1173 ANY11(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, 0, 4, 4, 3) 1174 #endif 1175 #ifdef HAS_ARGBUNATTENUATEROW_SSE2 1176 ANY11(ARGBUnattenuateRow_Any_SSE2, ARGBUnattenuateRow_SSE2, 0, 4, 4, 3) 1177 #endif 1178 #ifdef HAS_ARGBATTENUATEROW_AVX2 1179 ANY11(ARGBAttenuateRow_Any_AVX2, ARGBAttenuateRow_AVX2, 0, 4, 4, 7) 1180 #endif 1181 #ifdef HAS_ARGBUNATTENUATEROW_AVX2 1182 ANY11(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, 0, 4, 4, 7) 1183 #endif 1184 #ifdef HAS_ARGBATTENUATEROW_NEON 1185 ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7) 1186 #endif 1187 #ifdef HAS_ARGBATTENUATEROW_MSA 1188 ANY11(ARGBAttenuateRow_Any_MSA, ARGBAttenuateRow_MSA, 0, 4, 4, 7) 1189 #endif 1190 #ifdef HAS_ARGBATTENUATEROW_MMI 1191 ANY11(ARGBAttenuateRow_Any_MMI, ARGBAttenuateRow_MMI, 0, 4, 4, 1) 1192 #endif 1193 #ifdef HAS_ARGBEXTRACTALPHAROW_SSE2 1194 ANY11(ARGBExtractAlphaRow_Any_SSE2, ARGBExtractAlphaRow_SSE2, 0, 4, 1, 7) 1195 #endif 1196 #ifdef HAS_ARGBEXTRACTALPHAROW_AVX2 1197 ANY11(ARGBExtractAlphaRow_Any_AVX2, ARGBExtractAlphaRow_AVX2, 0, 4, 1, 31) 1198 #endif 1199 #ifdef HAS_ARGBEXTRACTALPHAROW_NEON 1200 ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 15) 1201 #endif 1202 #ifdef HAS_ARGBEXTRACTALPHAROW_MSA 1203 ANY11(ARGBExtractAlphaRow_Any_MSA, ARGBExtractAlphaRow_MSA, 0, 4, 1, 15) 1204 #endif 1205 #ifdef HAS_ARGBEXTRACTALPHAROW_MMI 1206 ANY11(ARGBExtractAlphaRow_Any_MMI, ARGBExtractAlphaRow_MMI, 0, 4, 1, 7) 1207 #endif 1208 #undef ANY11 1209 1210 // Any 1 to 1 blended. Destination is read, modify, write. 1211 #define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ 1212 void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \ 1213 SIMD_ALIGNED(uint8_t temp[64 * 2]); \ 1214 memset(temp, 0, 64 * 2); /* for msan */ \ 1215 int r = width & MASK; \ 1216 int n = width & ~MASK; \ 1217 if (n > 0) { \ 1218 ANY_SIMD(src_ptr, dst_ptr, n); \ 1219 } \ 1220 memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ 1221 memcpy(temp + 64, dst_ptr + n * BPP, r * BPP); \ 1222 ANY_SIMD(temp, temp + 64, MASK + 1); \ 1223 memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \ 1224 } 1225 1226 #ifdef HAS_ARGBCOPYALPHAROW_AVX2 1227 ANY11B(ARGBCopyAlphaRow_Any_AVX2, ARGBCopyAlphaRow_AVX2, 0, 4, 4, 15) 1228 #endif 1229 #ifdef HAS_ARGBCOPYALPHAROW_SSE2 1230 ANY11B(ARGBCopyAlphaRow_Any_SSE2, ARGBCopyAlphaRow_SSE2, 0, 4, 4, 7) 1231 #endif 1232 #ifdef HAS_ARGBCOPYALPHAROW_MMI 1233 ANY11B(ARGBCopyAlphaRow_Any_MMI, ARGBCopyAlphaRow_MMI, 0, 4, 4, 1) 1234 #endif 1235 #ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2 1236 ANY11B(ARGBCopyYToAlphaRow_Any_AVX2, ARGBCopyYToAlphaRow_AVX2, 0, 1, 4, 15) 1237 #endif 1238 #ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 1239 ANY11B(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7) 1240 #endif 1241 #ifdef HAS_ARGBCOPYYTOALPHAROW_MMI 1242 ANY11B(ARGBCopyYToAlphaRow_Any_MMI, ARGBCopyYToAlphaRow_MMI, 0, 1, 4, 7) 1243 #endif 1244 #undef ANY11B 1245 1246 // Any 1 to 1 with parameter. 1247 #define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \ 1248 void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, T param, int width) { \ 1249 SIMD_ALIGNED(uint8_t temp[64 * 2]); \ 1250 memset(temp, 0, 64); /* for msan */ \ 1251 int r = width & MASK; \ 1252 int n = width & ~MASK; \ 1253 if (n > 0) { \ 1254 ANY_SIMD(src_ptr, dst_ptr, param, n); \ 1255 } \ 1256 memcpy(temp, src_ptr + n * SBPP, r * SBPP); \ 1257 ANY_SIMD(temp, temp + 64, param, MASK + 1); \ 1258 memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \ 1259 } 1260 1261 #if defined(HAS_I400TOARGBROW_SSE2) 1262 ANY11P(I400ToARGBRow_Any_SSE2, 1263 I400ToARGBRow_SSE2, 1264 const struct YuvConstants*, 1265 1, 1266 4, 1267 7) 1268 #endif 1269 #if defined(HAS_I400TOARGBROW_AVX2) 1270 ANY11P(I400ToARGBRow_Any_AVX2, 1271 I400ToARGBRow_AVX2, 1272 const struct YuvConstants*, 1273 1, 1274 4, 1275 15) 1276 #endif 1277 #if defined(HAS_I400TOARGBROW_NEON) 1278 ANY11P(I400ToARGBRow_Any_NEON, 1279 I400ToARGBRow_NEON, 1280 const struct YuvConstants*, 1281 1, 1282 4, 1283 7) 1284 #endif 1285 #if defined(HAS_I400TOARGBROW_MSA) 1286 ANY11P(I400ToARGBRow_Any_MSA, 1287 I400ToARGBRow_MSA, 1288 const struct YuvConstants*, 1289 1, 1290 4, 1291 15) 1292 #endif 1293 #if defined(HAS_I400TOARGBROW_MMI) 1294 ANY11P(I400ToARGBRow_Any_MMI, 1295 I400ToARGBRow_MMI, 1296 const struct YuvConstants*, 1297 1, 1298 4, 1299 7) 1300 #endif 1301 1302 #if defined(HAS_ARGBTORGB565DITHERROW_SSE2) 1303 ANY11P(ARGBToRGB565DitherRow_Any_SSE2, 1304 ARGBToRGB565DitherRow_SSE2, 1305 const uint32_t, 1306 4, 1307 2, 1308 3) 1309 #endif 1310 #if defined(HAS_ARGBTORGB565DITHERROW_AVX2) 1311 ANY11P(ARGBToRGB565DitherRow_Any_AVX2, 1312 ARGBToRGB565DitherRow_AVX2, 1313 const uint32_t, 1314 4, 1315 2, 1316 7) 1317 #endif 1318 #if defined(HAS_ARGBTORGB565DITHERROW_NEON) 1319 ANY11P(ARGBToRGB565DitherRow_Any_NEON, 1320 ARGBToRGB565DitherRow_NEON, 1321 const uint32_t, 1322 4, 1323 2, 1324 7) 1325 #endif 1326 #if defined(HAS_ARGBTORGB565DITHERROW_MSA) 1327 ANY11P(ARGBToRGB565DitherRow_Any_MSA, 1328 ARGBToRGB565DitherRow_MSA, 1329 const uint32_t, 1330 4, 1331 2, 1332 7) 1333 #endif 1334 #if defined(HAS_ARGBTORGB565DITHERROW_MMI) 1335 ANY11P(ARGBToRGB565DitherRow_Any_MMI, 1336 ARGBToRGB565DitherRow_MMI, 1337 const uint32_t, 1338 4, 1339 2, 1340 3) 1341 #endif 1342 #ifdef HAS_ARGBSHUFFLEROW_SSSE3 1343 ANY11P(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3, const uint8_t*, 4, 4, 7) 1344 #endif 1345 #ifdef HAS_ARGBSHUFFLEROW_AVX2 1346 ANY11P(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2, const uint8_t*, 4, 4, 15) 1347 #endif 1348 #ifdef HAS_ARGBSHUFFLEROW_NEON 1349 ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8_t*, 4, 4, 3) 1350 #endif 1351 #ifdef HAS_ARGBSHUFFLEROW_MSA 1352 ANY11P(ARGBShuffleRow_Any_MSA, ARGBShuffleRow_MSA, const uint8_t*, 4, 4, 7) 1353 #endif 1354 #ifdef HAS_ARGBSHUFFLEROW_MMI 1355 ANY11P(ARGBShuffleRow_Any_MMI, ARGBShuffleRow_MMI, const uint8_t*, 4, 4, 1) 1356 #endif 1357 #undef ANY11P 1358 #undef ANY11P 1359 1360 // Any 1 to 1 with type 1361 #define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, STYPE, DTYPE, MASK) \ 1362 void NAMEANY(const STYPE* src_ptr, DTYPE* dst_ptr, int width) { \ 1363 SIMD_ALIGNED(uint8_t temp[(MASK + 1) * SBPP]); \ 1364 SIMD_ALIGNED(uint8_t out[(MASK + 1) * BPP]); \ 1365 memset(temp, 0, (MASK + 1) * SBPP); /* for msan */ \ 1366 int r = width & MASK; \ 1367 int n = width & ~MASK; \ 1368 if (n > 0) { \ 1369 ANY_SIMD(src_ptr, dst_ptr, n); \ 1370 } \ 1371 memcpy(temp, (uint8_t*)(src_ptr) + n * SBPP, r * SBPP); \ 1372 ANY_SIMD((STYPE*)temp, (DTYPE*)out, MASK + 1); \ 1373 memcpy((uint8_t*)(dst_ptr) + n * BPP, out, r * BPP); \ 1374 } 1375 1376 #ifdef HAS_ARGBTOAR64ROW_SSSE3 1377 ANY11T(ARGBToAR64Row_Any_SSSE3, ARGBToAR64Row_SSSE3, 4, 8, uint8_t, uint16_t, 3) 1378 #endif 1379 1380 #ifdef HAS_ARGBTOAB64ROW_SSSE3 1381 ANY11T(ARGBToAB64Row_Any_SSSE3, ARGBToAB64Row_SSSE3, 4, 8, uint8_t, uint16_t, 3) 1382 #endif 1383 1384 #ifdef HAS_AR64TOARGBROW_SSSE3 1385 ANY11T(AR64ToARGBRow_Any_SSSE3, AR64ToARGBRow_SSSE3, 8, 4, uint16_t, uint8_t, 3) 1386 #endif 1387 1388 #ifdef HAS_ARGBTOAR64ROW_SSSE3 1389 ANY11T(AB64ToARGBRow_Any_SSSE3, AB64ToARGBRow_SSSE3, 8, 4, uint16_t, uint8_t, 3) 1390 #endif 1391 1392 #ifdef HAS_ARGBTOAR64ROW_AVX2 1393 ANY11T(ARGBToAR64Row_Any_AVX2, ARGBToAR64Row_AVX2, 4, 8, uint8_t, uint16_t, 7) 1394 #endif 1395 1396 #ifdef HAS_ARGBTOAB64ROW_AVX2 1397 ANY11T(ARGBToAB64Row_Any_AVX2, ARGBToAB64Row_AVX2, 4, 8, uint8_t, uint16_t, 7) 1398 #endif 1399 1400 #ifdef HAS_AR64TOARGBROW_AVX2 1401 ANY11T(AR64ToARGBRow_Any_AVX2, AR64ToARGBRow_AVX2, 8, 4, uint16_t, uint8_t, 7) 1402 #endif 1403 1404 #ifdef HAS_ARGBTOAR64ROW_AVX2 1405 ANY11T(AB64ToARGBRow_Any_AVX2, AB64ToARGBRow_AVX2, 8, 4, uint16_t, uint8_t, 7) 1406 #endif 1407 1408 #ifdef HAS_ARGBTOAR64ROW_NEON 1409 ANY11T(ARGBToAR64Row_Any_NEON, ARGBToAR64Row_NEON, 4, 8, uint8_t, uint16_t, 7) 1410 #endif 1411 1412 #ifdef HAS_ARGBTOAB64ROW_NEON 1413 ANY11T(ARGBToAB64Row_Any_NEON, ARGBToAB64Row_NEON, 4, 8, uint8_t, uint16_t, 7) 1414 #endif 1415 1416 #ifdef HAS_AR64TOARGBROW_NEON 1417 ANY11T(AR64ToARGBRow_Any_NEON, AR64ToARGBRow_NEON, 8, 4, uint16_t, uint8_t, 7) 1418 #endif 1419 1420 #ifdef HAS_ARGBTOAR64ROW_NEON 1421 ANY11T(AB64ToARGBRow_Any_NEON, AB64ToARGBRow_NEON, 8, 4, uint16_t, uint8_t, 7) 1422 #endif 1423 1424 #undef ANY11T 1425 1426 // Any 1 to 1 with parameter and shorts. BPP measures in shorts. 1427 #define ANY11C(NAMEANY, ANY_SIMD, SBPP, BPP, STYPE, DTYPE, MASK) \ 1428 void NAMEANY(const STYPE* src_ptr, DTYPE* dst_ptr, int scale, int width) { \ 1429 SIMD_ALIGNED(STYPE temp[32]); \ 1430 SIMD_ALIGNED(DTYPE out[32]); \ 1431 memset(temp, 0, 32 * SBPP); /* for msan */ \ 1432 int r = width & MASK; \ 1433 int n = width & ~MASK; \ 1434 if (n > 0) { \ 1435 ANY_SIMD(src_ptr, dst_ptr, scale, n); \ 1436 } \ 1437 memcpy(temp, src_ptr + n, r * SBPP); \ 1438 ANY_SIMD(temp, out, scale, MASK + 1); \ 1439 memcpy(dst_ptr + n, out, r * BPP); \ 1440 } 1441 1442 #ifdef HAS_CONVERT16TO8ROW_SSSE3 1443 ANY11C(Convert16To8Row_Any_SSSE3, 1444 Convert16To8Row_SSSE3, 1445 2, 1446 1, 1447 uint16_t, 1448 uint8_t, 1449 15) 1450 #endif 1451 #ifdef HAS_CONVERT16TO8ROW_AVX2 1452 ANY11C(Convert16To8Row_Any_AVX2, 1453 Convert16To8Row_AVX2, 1454 2, 1455 1, 1456 uint16_t, 1457 uint8_t, 1458 31) 1459 #endif 1460 #ifdef HAS_CONVERT8TO16ROW_SSE2 1461 ANY11C(Convert8To16Row_Any_SSE2, 1462 Convert8To16Row_SSE2, 1463 1, 1464 2, 1465 uint8_t, 1466 uint16_t, 1467 15) 1468 #endif 1469 #ifdef HAS_CONVERT8TO16ROW_AVX2 1470 ANY11C(Convert8To16Row_Any_AVX2, 1471 Convert8To16Row_AVX2, 1472 1, 1473 2, 1474 uint8_t, 1475 uint16_t, 1476 31) 1477 #endif 1478 #ifdef HAS_MULTIPLYROW_16_AVX2 1479 ANY11C(MultiplyRow_16_Any_AVX2, 1480 MultiplyRow_16_AVX2, 1481 2, 1482 2, 1483 uint16_t, 1484 uint16_t, 1485 31) 1486 #endif 1487 #ifdef HAS_MULTIPLYROW_16_NEON 1488 ANY11C(MultiplyRow_16_Any_NEON, 1489 MultiplyRow_16_NEON, 1490 2, 1491 2, 1492 uint16_t, 1493 uint16_t, 1494 15) 1495 #endif 1496 #ifdef HAS_DIVIDEROW_16_AVX2 1497 ANY11C(DivideRow_16_Any_AVX2, DivideRow_16_AVX2, 2, 2, uint16_t, uint16_t, 31) 1498 #endif 1499 #ifdef HAS_DIVIDEROW_16_NEON 1500 ANY11C(DivideRow_16_Any_NEON, DivideRow_16_NEON, 2, 2, uint16_t, uint16_t, 15) 1501 #endif 1502 #undef ANY11C 1503 1504 // Any 1 to 1 with parameter and shorts to byte. BPP measures in shorts. 1505 #define ANY11P16(NAMEANY, ANY_SIMD, ST, T, SBPP, BPP, MASK) \ 1506 void NAMEANY(const ST* src_ptr, T* dst_ptr, float param, int width) { \ 1507 SIMD_ALIGNED(ST temp[32]); \ 1508 SIMD_ALIGNED(T out[32]); \ 1509 memset(temp, 0, SBPP * 32); /* for msan */ \ 1510 int r = width & MASK; \ 1511 int n = width & ~MASK; \ 1512 if (n > 0) { \ 1513 ANY_SIMD(src_ptr, dst_ptr, param, n); \ 1514 } \ 1515 memcpy(temp, src_ptr + n, r * SBPP); \ 1516 ANY_SIMD(temp, out, param, MASK + 1); \ 1517 memcpy(dst_ptr + n, out, r * BPP); \ 1518 } 1519 1520 #ifdef HAS_HALFFLOATROW_SSE2 1521 ANY11P16(HalfFloatRow_Any_SSE2, HalfFloatRow_SSE2, uint16_t, uint16_t, 2, 2, 7) 1522 #endif 1523 #ifdef HAS_HALFFLOATROW_AVX2 1524 ANY11P16(HalfFloatRow_Any_AVX2, HalfFloatRow_AVX2, uint16_t, uint16_t, 2, 2, 15) 1525 #endif 1526 #ifdef HAS_HALFFLOATROW_F16C 1527 ANY11P16(HalfFloatRow_Any_F16C, HalfFloatRow_F16C, uint16_t, uint16_t, 2, 2, 15) 1528 ANY11P16(HalfFloat1Row_Any_F16C, 1529 HalfFloat1Row_F16C, 1530 uint16_t, 1531 uint16_t, 1532 2, 1533 2, 1534 15) 1535 #endif 1536 #ifdef HAS_HALFFLOATROW_NEON 1537 ANY11P16(HalfFloatRow_Any_NEON, HalfFloatRow_NEON, uint16_t, uint16_t, 2, 2, 7) 1538 ANY11P16(HalfFloat1Row_Any_NEON, 1539 HalfFloat1Row_NEON, 1540 uint16_t, 1541 uint16_t, 1542 2, 1543 2, 1544 7) 1545 #endif 1546 #ifdef HAS_HALFFLOATROW_MSA 1547 ANY11P16(HalfFloatRow_Any_MSA, HalfFloatRow_MSA, uint16_t, uint16_t, 2, 2, 31) 1548 #endif 1549 #ifdef HAS_BYTETOFLOATROW_NEON 1550 ANY11P16(ByteToFloatRow_Any_NEON, ByteToFloatRow_NEON, uint8_t, float, 1, 3, 7) 1551 #endif 1552 #undef ANY11P16 1553 1554 // Any 1 to 1 with yuvconstants 1555 #define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ 1556 void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, \ 1557 const struct YuvConstants* yuvconstants, int width) { \ 1558 SIMD_ALIGNED(uint8_t temp[128 * 2]); \ 1559 memset(temp, 0, 128); /* for YUY2 and msan */ \ 1560 int r = width & MASK; \ 1561 int n = width & ~MASK; \ 1562 if (n > 0) { \ 1563 ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n); \ 1564 } \ 1565 memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ 1566 ANY_SIMD(temp, temp + 128, yuvconstants, MASK + 1); \ 1567 memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ 1568 } 1569 #if defined(HAS_YUY2TOARGBROW_SSSE3) 1570 ANY11C(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15) 1571 ANY11C(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15) 1572 #endif 1573 #if defined(HAS_YUY2TOARGBROW_AVX2) 1574 ANY11C(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31) 1575 ANY11C(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31) 1576 #endif 1577 #if defined(HAS_YUY2TOARGBROW_NEON) 1578 ANY11C(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7) 1579 ANY11C(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7) 1580 #endif 1581 #if defined(HAS_YUY2TOARGBROW_MSA) 1582 ANY11C(YUY2ToARGBRow_Any_MSA, YUY2ToARGBRow_MSA, 1, 4, 4, 7) 1583 ANY11C(UYVYToARGBRow_Any_MSA, UYVYToARGBRow_MSA, 1, 4, 4, 7) 1584 #endif 1585 #if defined(HAS_YUY2TOARGBROW_MMI) 1586 ANY11C(YUY2ToARGBRow_Any_MMI, YUY2ToARGBRow_MMI, 1, 4, 4, 7) 1587 ANY11C(UYVYToARGBRow_Any_MMI, UYVYToARGBRow_MMI, 1, 4, 4, 7) 1588 #endif 1589 #undef ANY11C 1590 1591 // Any 1 to 1 interpolate. Takes 2 rows of source via stride. 1592 #define ANY11I(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \ 1593 void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, \ 1594 int width, int source_y_fraction) { \ 1595 SIMD_ALIGNED(uint8_t temp[64 * 3]); \ 1596 memset(temp, 0, 64 * 2); /* for msan */ \ 1597 int r = width & MASK; \ 1598 int n = width & ~MASK; \ 1599 if (n > 0) { \ 1600 ANY_SIMD(dst_ptr, src_ptr, src_stride, n, source_y_fraction); \ 1601 } \ 1602 memcpy(temp, src_ptr + n * SBPP, r * SBPP); \ 1603 memcpy(temp + 64, src_ptr + src_stride + n * SBPP, r * SBPP); \ 1604 ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction); \ 1605 memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ 1606 } 1607 1608 #ifdef HAS_INTERPOLATEROW_AVX2 1609 ANY11I(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, 1, 1, 31) 1610 #endif 1611 #ifdef HAS_INTERPOLATEROW_SSSE3 1612 ANY11I(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, 1, 1, 15) 1613 #endif 1614 #ifdef HAS_INTERPOLATEROW_NEON 1615 ANY11I(InterpolateRow_Any_NEON, InterpolateRow_NEON, 1, 1, 15) 1616 #endif 1617 #ifdef HAS_INTERPOLATEROW_MSA 1618 ANY11I(InterpolateRow_Any_MSA, InterpolateRow_MSA, 1, 1, 31) 1619 #endif 1620 #ifdef HAS_INTERPOLATEROW_MMI 1621 ANY11I(InterpolateRow_Any_MMI, InterpolateRow_MMI, 1, 1, 7) 1622 #endif 1623 #undef ANY11I 1624 1625 // Any 1 to 1 mirror. 1626 #define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK) \ 1627 void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \ 1628 SIMD_ALIGNED(uint8_t temp[64 * 2]); \ 1629 memset(temp, 0, 64); /* for msan */ \ 1630 int r = width & MASK; \ 1631 int n = width & ~MASK; \ 1632 if (n > 0) { \ 1633 ANY_SIMD(src_ptr + r * BPP, dst_ptr, n); \ 1634 } \ 1635 memcpy(temp, src_ptr, r* BPP); \ 1636 ANY_SIMD(temp, temp + 64, MASK + 1); \ 1637 memcpy(dst_ptr + n * BPP, temp + 64 + (MASK + 1 - r) * BPP, r * BPP); \ 1638 } 1639 1640 #ifdef HAS_MIRRORROW_AVX2 1641 ANY11M(MirrorRow_Any_AVX2, MirrorRow_AVX2, 1, 31) 1642 #endif 1643 #ifdef HAS_MIRRORROW_SSSE3 1644 ANY11M(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, 1, 15) 1645 #endif 1646 #ifdef HAS_MIRRORROW_NEON 1647 ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 31) 1648 #endif 1649 #ifdef HAS_MIRRORROW_MSA 1650 ANY11M(MirrorRow_Any_MSA, MirrorRow_MSA, 1, 63) 1651 #endif 1652 #ifdef HAS_MIRRORROW_MMI 1653 ANY11M(MirrorRow_Any_MMI, MirrorRow_MMI, 1, 7) 1654 #endif 1655 #ifdef HAS_MIRRORUVROW_AVX2 1656 ANY11M(MirrorUVRow_Any_AVX2, MirrorUVRow_AVX2, 2, 15) 1657 #endif 1658 #ifdef HAS_MIRRORUVROW_SSSE3 1659 ANY11M(MirrorUVRow_Any_SSSE3, MirrorUVRow_SSSE3, 2, 7) 1660 #endif 1661 #ifdef HAS_MIRRORUVROW_NEON 1662 ANY11M(MirrorUVRow_Any_NEON, MirrorUVRow_NEON, 2, 31) 1663 #endif 1664 #ifdef HAS_MIRRORUVROW_MSA 1665 ANY11M(MirrorUVRow_Any_MSA, MirrorUVRow_MSA, 2, 7) 1666 #endif 1667 #ifdef HAS_ARGBMIRRORROW_AVX2 1668 ANY11M(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, 4, 7) 1669 #endif 1670 #ifdef HAS_ARGBMIRRORROW_SSE2 1671 ANY11M(ARGBMirrorRow_Any_SSE2, ARGBMirrorRow_SSE2, 4, 3) 1672 #endif 1673 #ifdef HAS_ARGBMIRRORROW_NEON 1674 ANY11M(ARGBMirrorRow_Any_NEON, ARGBMirrorRow_NEON, 4, 7) 1675 #endif 1676 #ifdef HAS_ARGBMIRRORROW_MSA 1677 ANY11M(ARGBMirrorRow_Any_MSA, ARGBMirrorRow_MSA, 4, 15) 1678 #endif 1679 #ifdef HAS_ARGBMIRRORROW_MMI 1680 ANY11M(ARGBMirrorRow_Any_MMI, ARGBMirrorRow_MMI, 4, 1) 1681 #endif 1682 #ifdef HAS_RGB24MIRRORROW_SSSE3 1683 ANY11M(RGB24MirrorRow_Any_SSSE3, RGB24MirrorRow_SSSE3, 3, 15) 1684 #endif 1685 #ifdef HAS_RGB24MIRRORROW_NEON 1686 ANY11M(RGB24MirrorRow_Any_NEON, RGB24MirrorRow_NEON, 3, 15) 1687 #endif 1688 #undef ANY11M 1689 1690 // Any 1 plane. (memset) 1691 #define ANY1(NAMEANY, ANY_SIMD, T, BPP, MASK) \ 1692 void NAMEANY(uint8_t* dst_ptr, T v32, int width) { \ 1693 SIMD_ALIGNED(uint8_t temp[64]); \ 1694 memset(temp, 0, 64); /* for msan */ \ 1695 int r = width & MASK; \ 1696 int n = width & ~MASK; \ 1697 if (n > 0) { \ 1698 ANY_SIMD(dst_ptr, v32, n); \ 1699 } \ 1700 ANY_SIMD(temp, v32, MASK + 1); \ 1701 memcpy(dst_ptr + n * BPP, temp, r * BPP); \ 1702 } 1703 1704 #ifdef HAS_SETROW_X86 1705 ANY1(SetRow_Any_X86, SetRow_X86, uint8_t, 1, 3) 1706 #endif 1707 #ifdef HAS_SETROW_NEON 1708 ANY1(SetRow_Any_NEON, SetRow_NEON, uint8_t, 1, 15) 1709 #endif 1710 #ifdef HAS_ARGBSETROW_NEON 1711 ANY1(ARGBSetRow_Any_NEON, ARGBSetRow_NEON, uint32_t, 4, 3) 1712 #endif 1713 #ifdef HAS_ARGBSETROW_MSA 1714 ANY1(ARGBSetRow_Any_MSA, ARGBSetRow_MSA, uint32_t, 4, 3) 1715 #endif 1716 #ifdef HAS_ARGBSETROW_MMI 1717 ANY1(ARGBSetRow_Any_MMI, ARGBSetRow_MMI, uint32_t, 4, 3) 1718 #endif 1719 #undef ANY1 1720 1721 // Any 1 to 2. Outputs UV planes. 1722 #define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK) \ 1723 void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, \ 1724 int width) { \ 1725 SIMD_ALIGNED(uint8_t temp[128 * 3]); \ 1726 memset(temp, 0, 128); /* for msan */ \ 1727 int r = width & MASK; \ 1728 int n = width & ~MASK; \ 1729 if (n > 0) { \ 1730 ANY_SIMD(src_ptr, dst_u, dst_v, n); \ 1731 } \ 1732 memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ 1733 ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1); \ 1734 memcpy(dst_u + (n >> DUVSHIFT), temp + 128, SS(r, DUVSHIFT)); \ 1735 memcpy(dst_v + (n >> DUVSHIFT), temp + 256, SS(r, DUVSHIFT)); \ 1736 } 1737 1738 #ifdef HAS_SPLITUVROW_SSE2 1739 ANY12(SplitUVRow_Any_SSE2, SplitUVRow_SSE2, 0, 2, 0, 15) 1740 #endif 1741 #ifdef HAS_SPLITUVROW_AVX2 1742 ANY12(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, 0, 2, 0, 31) 1743 #endif 1744 #ifdef HAS_SPLITUVROW_NEON 1745 ANY12(SplitUVRow_Any_NEON, SplitUVRow_NEON, 0, 2, 0, 15) 1746 #endif 1747 #ifdef HAS_SPLITUVROW_MSA 1748 ANY12(SplitUVRow_Any_MSA, SplitUVRow_MSA, 0, 2, 0, 31) 1749 #endif 1750 #ifdef HAS_SPLITUVROW_MMI 1751 ANY12(SplitUVRow_Any_MMI, SplitUVRow_MMI, 0, 2, 0, 7) 1752 #endif 1753 #ifdef HAS_ARGBTOUV444ROW_SSSE3 1754 ANY12(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_SSSE3, 0, 4, 0, 15) 1755 #endif 1756 #ifdef HAS_YUY2TOUV422ROW_AVX2 1757 ANY12(YUY2ToUV422Row_Any_AVX2, YUY2ToUV422Row_AVX2, 1, 4, 1, 31) 1758 ANY12(UYVYToUV422Row_Any_AVX2, UYVYToUV422Row_AVX2, 1, 4, 1, 31) 1759 #endif 1760 #ifdef HAS_YUY2TOUV422ROW_SSE2 1761 ANY12(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_SSE2, 1, 4, 1, 15) 1762 ANY12(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_SSE2, 1, 4, 1, 15) 1763 #endif 1764 #ifdef HAS_YUY2TOUV422ROW_NEON 1765 ANY12(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON, 0, 4, 0, 7) 1766 ANY12(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON, 1, 4, 1, 15) 1767 ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15) 1768 #endif 1769 #ifdef HAS_YUY2TOUV422ROW_MSA 1770 ANY12(ARGBToUV444Row_Any_MSA, ARGBToUV444Row_MSA, 0, 4, 0, 15) 1771 ANY12(YUY2ToUV422Row_Any_MSA, YUY2ToUV422Row_MSA, 1, 4, 1, 31) 1772 ANY12(UYVYToUV422Row_Any_MSA, UYVYToUV422Row_MSA, 1, 4, 1, 31) 1773 #endif 1774 #ifdef HAS_YUY2TOUV422ROW_MMI 1775 ANY12(ARGBToUV444Row_Any_MMI, ARGBToUV444Row_MMI, 0, 4, 0, 7) 1776 ANY12(UYVYToUV422Row_Any_MMI, UYVYToUV422Row_MMI, 1, 4, 1, 15) 1777 ANY12(YUY2ToUV422Row_Any_MMI, YUY2ToUV422Row_MMI, 1, 4, 1, 15) 1778 #endif 1779 #undef ANY12 1780 1781 // Any 2 16 bit planes with parameter to 1 1782 #define ANY12PT(NAMEANY, ANY_SIMD, T, BPP, MASK) \ 1783 void NAMEANY(const T* src_uv, T* dst_u, T* dst_v, int depth, int width) { \ 1784 SIMD_ALIGNED(T temp[16 * 4]); \ 1785 memset(temp, 0, 16 * 4 * BPP); /* for msan */ \ 1786 int r = width & MASK; \ 1787 int n = width & ~MASK; \ 1788 if (n > 0) { \ 1789 ANY_SIMD(src_uv, dst_u, dst_v, depth, n); \ 1790 } \ 1791 memcpy(temp, src_uv + n * 2, r * BPP * 2); \ 1792 ANY_SIMD(temp, temp + 32, temp + 48, depth, MASK + 1); \ 1793 memcpy(dst_u + n, temp + 32, r * BPP); \ 1794 memcpy(dst_v + n, temp + 48, r * BPP); \ 1795 } 1796 1797 #ifdef HAS_SPLITUVROW_16_AVX2 1798 ANY12PT(SplitUVRow_16_Any_AVX2, SplitUVRow_16_AVX2, uint16_t, 2, 15) 1799 #endif 1800 1801 #ifdef HAS_SPLITUVROW_16_NEON 1802 ANY12PT(SplitUVRow_16_Any_NEON, SplitUVRow_16_NEON, uint16_t, 2, 7) 1803 #endif 1804 1805 #undef ANY21CT 1806 1807 // Any 1 to 3. Outputs RGB planes. 1808 #define ANY13(NAMEANY, ANY_SIMD, BPP, MASK) \ 1809 void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, \ 1810 uint8_t* dst_b, int width) { \ 1811 SIMD_ALIGNED(uint8_t temp[16 * 6]); \ 1812 memset(temp, 0, 16 * 3); /* for msan */ \ 1813 int r = width & MASK; \ 1814 int n = width & ~MASK; \ 1815 if (n > 0) { \ 1816 ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, n); \ 1817 } \ 1818 memcpy(temp, src_ptr + n * BPP, r * BPP); \ 1819 ANY_SIMD(temp, temp + 16 * 3, temp + 16 * 4, temp + 16 * 5, MASK + 1); \ 1820 memcpy(dst_r + n, temp + 16 * 3, r); \ 1821 memcpy(dst_g + n, temp + 16 * 4, r); \ 1822 memcpy(dst_b + n, temp + 16 * 5, r); \ 1823 } 1824 1825 #ifdef HAS_SPLITRGBROW_SSSE3 1826 ANY13(SplitRGBRow_Any_SSSE3, SplitRGBRow_SSSE3, 3, 15) 1827 #endif 1828 #ifdef HAS_SPLITRGBROW_NEON 1829 ANY13(SplitRGBRow_Any_NEON, SplitRGBRow_NEON, 3, 15) 1830 #endif 1831 #ifdef HAS_SPLITRGBROW_MMI 1832 ANY13(SplitRGBRow_Any_MMI, SplitRGBRow_MMI, 3, 3) 1833 #endif 1834 #ifdef HAS_SPLITXRGBROW_SSE2 1835 ANY13(SplitXRGBRow_Any_SSE2, SplitXRGBRow_SSE2, 4, 7) 1836 #endif 1837 #ifdef HAS_SPLITXRGBROW_SSSE3 1838 ANY13(SplitXRGBRow_Any_SSSE3, SplitXRGBRow_SSSE3, 4, 7) 1839 #endif 1840 #ifdef HAS_SPLITXRGBROW_AVX2 1841 ANY13(SplitXRGBRow_Any_AVX2, SplitXRGBRow_AVX2, 4, 15) 1842 #endif 1843 #ifdef HAS_SPLITXRGBROW_NEON 1844 ANY13(SplitXRGBRow_Any_NEON, SplitXRGBRow_NEON, 4, 15) 1845 #endif 1846 1847 // Any 1 to 4. Outputs ARGB planes. 1848 #define ANY14(NAMEANY, ANY_SIMD, BPP, MASK) \ 1849 void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, \ 1850 uint8_t* dst_b, uint8_t* dst_a, int width) { \ 1851 SIMD_ALIGNED(uint8_t temp[16 * 8]); \ 1852 memset(temp, 0, 16 * 4); /* for msan */ \ 1853 int r = width & MASK; \ 1854 int n = width & ~MASK; \ 1855 if (n > 0) { \ 1856 ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, dst_a, n); \ 1857 } \ 1858 memcpy(temp, src_ptr + n * BPP, r * BPP); \ 1859 ANY_SIMD(temp, temp + 16 * 4, temp + 16 * 5, temp + 16 * 6, temp + 16 * 7, \ 1860 MASK + 1); \ 1861 memcpy(dst_r + n, temp + 16 * 4, r); \ 1862 memcpy(dst_g + n, temp + 16 * 5, r); \ 1863 memcpy(dst_b + n, temp + 16 * 6, r); \ 1864 memcpy(dst_a + n, temp + 16 * 7, r); \ 1865 } 1866 1867 #ifdef HAS_SPLITARGBROW_SSE2 1868 ANY14(SplitARGBRow_Any_SSE2, SplitARGBRow_SSE2, 4, 7) 1869 #endif 1870 #ifdef HAS_SPLITARGBROW_SSSE3 1871 ANY14(SplitARGBRow_Any_SSSE3, SplitARGBRow_SSSE3, 4, 7) 1872 #endif 1873 #ifdef HAS_SPLITARGBROW_AVX2 1874 ANY14(SplitARGBRow_Any_AVX2, SplitARGBRow_AVX2, 4, 15) 1875 #endif 1876 #ifdef HAS_SPLITARGBROW_NEON 1877 ANY14(SplitARGBRow_Any_NEON, SplitARGBRow_NEON, 4, 15) 1878 #endif 1879 1880 // Any 1 to 2 with source stride (2 rows of source). Outputs UV planes. 1881 // 128 byte row allows for 32 avx ARGB pixels. 1882 #define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \ 1883 void NAMEANY(const uint8_t* src_ptr, int src_stride, uint8_t* dst_u, \ 1884 uint8_t* dst_v, int width) { \ 1885 SIMD_ALIGNED(uint8_t temp[128 * 4]); \ 1886 memset(temp, 0, 128 * 2); /* for msan */ \ 1887 int r = width & MASK; \ 1888 int n = width & ~MASK; \ 1889 if (n > 0) { \ 1890 ANY_SIMD(src_ptr, src_stride, dst_u, dst_v, n); \ 1891 } \ 1892 memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ 1893 memcpy(temp + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP, \ 1894 SS(r, UVSHIFT) * BPP); \ 1895 if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \ 1896 memcpy(temp + SS(r, UVSHIFT) * BPP, temp + SS(r, UVSHIFT) * BPP - BPP, \ 1897 BPP); \ 1898 memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \ 1899 temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \ 1900 } \ 1901 ANY_SIMD(temp, 128, temp + 256, temp + 384, MASK + 1); \ 1902 memcpy(dst_u + (n >> 1), temp + 256, SS(r, 1)); \ 1903 memcpy(dst_v + (n >> 1), temp + 384, SS(r, 1)); \ 1904 } 1905 1906 #ifdef HAS_ARGBTOUVROW_AVX2 1907 ANY12S(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, 0, 4, 31) 1908 #endif 1909 #ifdef HAS_ABGRTOUVROW_AVX2 1910 ANY12S(ABGRToUVRow_Any_AVX2, ABGRToUVRow_AVX2, 0, 4, 31) 1911 #endif 1912 #ifdef HAS_ARGBTOUVJROW_AVX2 1913 ANY12S(ARGBToUVJRow_Any_AVX2, ARGBToUVJRow_AVX2, 0, 4, 31) 1914 #endif 1915 #ifdef HAS_ARGBTOUVROW_SSSE3 1916 ANY12S(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_SSSE3, 0, 4, 15) 1917 ANY12S(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_SSSE3, 0, 4, 15) 1918 ANY12S(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_SSSE3, 0, 4, 15) 1919 ANY12S(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_SSSE3, 0, 4, 15) 1920 ANY12S(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_SSSE3, 0, 4, 15) 1921 #endif 1922 #ifdef HAS_YUY2TOUVROW_AVX2 1923 ANY12S(YUY2ToUVRow_Any_AVX2, YUY2ToUVRow_AVX2, 1, 4, 31) 1924 ANY12S(UYVYToUVRow_Any_AVX2, UYVYToUVRow_AVX2, 1, 4, 31) 1925 #endif 1926 #ifdef HAS_YUY2TOUVROW_SSE2 1927 ANY12S(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_SSE2, 1, 4, 15) 1928 ANY12S(UYVYToUVRow_Any_SSE2, UYVYToUVRow_SSE2, 1, 4, 15) 1929 #endif 1930 #ifdef HAS_ARGBTOUVROW_NEON 1931 ANY12S(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, 0, 4, 15) 1932 #endif 1933 #ifdef HAS_ARGBTOUVROW_MSA 1934 ANY12S(ARGBToUVRow_Any_MSA, ARGBToUVRow_MSA, 0, 4, 31) 1935 #endif 1936 #ifdef HAS_ARGBTOUVROW_MMI 1937 ANY12S(ARGBToUVRow_Any_MMI, ARGBToUVRow_MMI, 0, 4, 15) 1938 #endif 1939 #ifdef HAS_ARGBTOUVJROW_NEON 1940 ANY12S(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, 0, 4, 15) 1941 #endif 1942 #ifdef HAS_ARGBTOUVJROW_MSA 1943 ANY12S(ARGBToUVJRow_Any_MSA, ARGBToUVJRow_MSA, 0, 4, 31) 1944 #endif 1945 #ifdef HAS_ARGBTOUVJROW_MMI 1946 ANY12S(ARGBToUVJRow_Any_MMI, ARGBToUVJRow_MMI, 0, 4, 15) 1947 #endif 1948 #ifdef HAS_BGRATOUVROW_NEON 1949 ANY12S(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, 0, 4, 15) 1950 #endif 1951 #ifdef HAS_BGRATOUVROW_MSA 1952 ANY12S(BGRAToUVRow_Any_MSA, BGRAToUVRow_MSA, 0, 4, 15) 1953 #endif 1954 #ifdef HAS_BGRATOUVROW_MMI 1955 ANY12S(BGRAToUVRow_Any_MMI, BGRAToUVRow_MMI, 0, 4, 15) 1956 #endif 1957 #ifdef HAS_ABGRTOUVROW_NEON 1958 ANY12S(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, 0, 4, 15) 1959 #endif 1960 #ifdef HAS_ABGRTOUVROW_MSA 1961 ANY12S(ABGRToUVRow_Any_MSA, ABGRToUVRow_MSA, 0, 4, 15) 1962 #endif 1963 #ifdef HAS_ABGRTOUVROW_MMI 1964 ANY12S(ABGRToUVRow_Any_MMI, ABGRToUVRow_MMI, 0, 4, 15) 1965 #endif 1966 #ifdef HAS_RGBATOUVROW_NEON 1967 ANY12S(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, 0, 4, 15) 1968 #endif 1969 #ifdef HAS_RGBATOUVROW_MSA 1970 ANY12S(RGBAToUVRow_Any_MSA, RGBAToUVRow_MSA, 0, 4, 15) 1971 #endif 1972 #ifdef HAS_RGBATOUVROW_MMI 1973 ANY12S(RGBAToUVRow_Any_MMI, RGBAToUVRow_MMI, 0, 4, 15) 1974 #endif 1975 #ifdef HAS_RGB24TOUVROW_NEON 1976 ANY12S(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, 0, 3, 15) 1977 #endif 1978 #ifdef HAS_RGB24TOUVROW_MSA 1979 ANY12S(RGB24ToUVRow_Any_MSA, RGB24ToUVRow_MSA, 0, 3, 15) 1980 #endif 1981 #ifdef HAS_RGB24TOUVROW_MMI 1982 ANY12S(RGB24ToUVRow_Any_MMI, RGB24ToUVRow_MMI, 0, 3, 15) 1983 #endif 1984 #ifdef HAS_RAWTOUVROW_NEON 1985 ANY12S(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, 0, 3, 15) 1986 #endif 1987 #ifdef HAS_RAWTOUVROW_MSA 1988 ANY12S(RAWToUVRow_Any_MSA, RAWToUVRow_MSA, 0, 3, 15) 1989 #endif 1990 #ifdef HAS_RAWTOUVROW_MMI 1991 ANY12S(RAWToUVRow_Any_MMI, RAWToUVRow_MMI, 0, 3, 15) 1992 #endif 1993 #ifdef HAS_RGB565TOUVROW_NEON 1994 ANY12S(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, 0, 2, 15) 1995 #endif 1996 #ifdef HAS_RGB565TOUVROW_MSA 1997 ANY12S(RGB565ToUVRow_Any_MSA, RGB565ToUVRow_MSA, 0, 2, 15) 1998 #endif 1999 #ifdef HAS_RGB565TOUVROW_MMI 2000 ANY12S(RGB565ToUVRow_Any_MMI, RGB565ToUVRow_MMI, 0, 2, 15) 2001 #endif 2002 #ifdef HAS_ARGB1555TOUVROW_NEON 2003 ANY12S(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, 0, 2, 15) 2004 #endif 2005 #ifdef HAS_ARGB1555TOUVROW_MSA 2006 ANY12S(ARGB1555ToUVRow_Any_MSA, ARGB1555ToUVRow_MSA, 0, 2, 15) 2007 #endif 2008 #ifdef HAS_ARGB1555TOUVROW_MMI 2009 ANY12S(ARGB1555ToUVRow_Any_MMI, ARGB1555ToUVRow_MMI, 0, 2, 15) 2010 #endif 2011 #ifdef HAS_ARGB4444TOUVROW_NEON 2012 ANY12S(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, 0, 2, 15) 2013 #endif 2014 #ifdef HAS_ARGB4444TOUVROW_MMI 2015 ANY12S(ARGB4444ToUVRow_Any_MMI, ARGB4444ToUVRow_MMI, 0, 2, 15) 2016 #endif 2017 #ifdef HAS_YUY2TOUVROW_NEON 2018 ANY12S(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, 1, 4, 15) 2019 #endif 2020 #ifdef HAS_UYVYTOUVROW_NEON 2021 ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15) 2022 #endif 2023 #ifdef HAS_YUY2TOUVROW_MSA 2024 ANY12S(YUY2ToUVRow_Any_MSA, YUY2ToUVRow_MSA, 1, 4, 31) 2025 #endif 2026 #ifdef HAS_YUY2TOUVROW_MMI 2027 ANY12S(YUY2ToUVRow_Any_MMI, YUY2ToUVRow_MMI, 1, 4, 15) 2028 #endif 2029 #ifdef HAS_UYVYTOUVROW_MSA 2030 ANY12S(UYVYToUVRow_Any_MSA, UYVYToUVRow_MSA, 1, 4, 31) 2031 #endif 2032 #ifdef HAS_UYVYTOUVROW_MMI 2033 ANY12S(UYVYToUVRow_Any_MMI, UYVYToUVRow_MMI, 1, 4, 15) 2034 #endif 2035 #undef ANY12S 2036 2037 // Any 1 to 1 with source stride (2 rows of source). Outputs UV plane. 2038 // 128 byte row allows for 32 avx ARGB pixels. 2039 #define ANY11S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \ 2040 void NAMEANY(const uint8_t* src_ptr, int src_stride, uint8_t* dst_vu, \ 2041 int width) { \ 2042 SIMD_ALIGNED(uint8_t temp[128 * 3]); \ 2043 memset(temp, 0, 128 * 2); /* for msan */ \ 2044 int r = width & MASK; \ 2045 int n = width & ~MASK; \ 2046 if (n > 0) { \ 2047 ANY_SIMD(src_ptr, src_stride, dst_vu, n); \ 2048 } \ 2049 memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ 2050 memcpy(temp + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP, \ 2051 SS(r, UVSHIFT) * BPP); \ 2052 if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \ 2053 memcpy(temp + SS(r, UVSHIFT) * BPP, temp + SS(r, UVSHIFT) * BPP - BPP, \ 2054 BPP); \ 2055 memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \ 2056 temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \ 2057 } \ 2058 ANY_SIMD(temp, 128, temp + 256, MASK + 1); \ 2059 memcpy(dst_vu + (n >> 1) * 2, temp + 256, SS(r, 1) * 2); \ 2060 } 2061 2062 #ifdef HAS_AYUVTOVUROW_NEON 2063 ANY11S(AYUVToUVRow_Any_NEON, AYUVToUVRow_NEON, 0, 4, 15) 2064 ANY11S(AYUVToVURow_Any_NEON, AYUVToVURow_NEON, 0, 4, 15) 2065 #endif 2066 #undef ANY11S 2067 2068 #ifdef __cplusplus 2069 } // extern "C" 2070 } // namespace libyuv 2071 #endif 2072