1 /*
2  *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS. All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "libyuv/row.h"
12 
13 #include <string.h>  // For memset.
14 
15 #include "libyuv/basic_types.h"
16 
17 #ifdef __cplusplus
18 namespace libyuv {
19 extern "C" {
20 #endif
21 
22 // memset for temp is meant to clear the source buffer (not dest) so that
23 // SIMD that reads full multiple of 16 bytes will not trigger msan errors.
24 // memset is not needed for production, as the garbage values are processed but
25 // not used, although there may be edge cases for subsampling.
26 // The size of the buffer is based on the largest read, which can be inferred
27 // by the source type (e.g. ARGB) and the mask (last parameter), or by examining
28 // the source code for how much the source pointers are advanced.
29 
30 // Subsampled source needs to be increase by 1 of not even.
31 #define SS(width, shift) (((width) + (1 << (shift)) - 1) >> (shift))
32 
33 // Any 4 planes to 1
34 #define ANY41(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK)               \
35   void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf,                   \
36                const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_ptr, \
37                int width) {                                                  \
38     SIMD_ALIGNED(uint8_t temp[64 * 5]);                                      \
39     memset(temp, 0, 64 * 4); /* for msan */                                  \
40     int r = width & MASK;                                                    \
41     int n = width & ~MASK;                                                   \
42     if (n > 0) {                                                             \
43       ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, n);                      \
44     }                                                                        \
45     memcpy(temp, y_buf + n, r);                                              \
46     memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));               \
47     memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));              \
48     memcpy(temp + 192, a_buf + n, r);                                        \
49     ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256, MASK + 1); \
50     memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256,                      \
51            SS(r, DUVSHIFT) * BPP);                                           \
52   }
53 
54 #ifdef HAS_MERGEARGBROW_SSE2
55 ANY41(MergeARGBRow_Any_SSE2, MergeARGBRow_SSE2, 0, 0, 4, 7)
56 #endif
57 #ifdef HAS_MERGEARGBROW_AVX2
58 ANY41(MergeARGBRow_Any_AVX2, MergeARGBRow_AVX2, 0, 0, 4, 15)
59 #endif
60 #ifdef HAS_MERGEARGBROW_NEON
61 ANY41(MergeARGBRow_Any_NEON, MergeARGBRow_NEON, 0, 0, 4, 15)
62 #endif
63 
64 // Note that odd width replication includes 444 due to implementation
65 // on arm that subsamples 444 to 422 internally.
66 // Any 4 planes to 1 with yuvconstants
67 #define ANY41C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK)              \
68   void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf,                   \
69                const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_ptr, \
70                const struct YuvConstants* yuvconstants, int width) {         \
71     SIMD_ALIGNED(uint8_t temp[64 * 5]);                                      \
72     memset(temp, 0, 64 * 4); /* for msan */                                  \
73     int r = width & MASK;                                                    \
74     int n = width & ~MASK;                                                   \
75     if (n > 0) {                                                             \
76       ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n);        \
77     }                                                                        \
78     memcpy(temp, y_buf + n, r);                                              \
79     memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));               \
80     memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));              \
81     memcpy(temp + 192, a_buf + n, r);                                        \
82     if (width & 1) {                                                         \
83       temp[64 + SS(r, UVSHIFT)] = temp[64 + SS(r, UVSHIFT) - 1];             \
84       temp[128 + SS(r, UVSHIFT)] = temp[128 + SS(r, UVSHIFT) - 1];           \
85     }                                                                        \
86     ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256,            \
87              yuvconstants, MASK + 1);                                        \
88     memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256,                      \
89            SS(r, DUVSHIFT) * BPP);                                           \
90   }
91 
92 #ifdef HAS_I444ALPHATOARGBROW_SSSE3
93 ANY41C(I444AlphaToARGBRow_Any_SSSE3, I444AlphaToARGBRow_SSSE3, 0, 0, 4, 7)
94 #endif
95 #ifdef HAS_I444ALPHATOARGBROW_AVX2
96 ANY41C(I444AlphaToARGBRow_Any_AVX2, I444AlphaToARGBRow_AVX2, 0, 0, 4, 15)
97 #endif
98 #ifdef HAS_I422ALPHATOARGBROW_SSSE3
99 ANY41C(I422AlphaToARGBRow_Any_SSSE3, I422AlphaToARGBRow_SSSE3, 1, 0, 4, 7)
100 #endif
101 #ifdef HAS_I422ALPHATOARGBROW_AVX2
102 ANY41C(I422AlphaToARGBRow_Any_AVX2, I422AlphaToARGBRow_AVX2, 1, 0, 4, 15)
103 #endif
104 #ifdef HAS_I444ALPHATOARGBROW_NEON
105 ANY41C(I444AlphaToARGBRow_Any_NEON, I444AlphaToARGBRow_NEON, 0, 0, 4, 7)
106 #endif
107 #ifdef HAS_I422ALPHATOARGBROW_NEON
108 ANY41C(I422AlphaToARGBRow_Any_NEON, I422AlphaToARGBRow_NEON, 1, 0, 4, 7)
109 #endif
110 #ifdef HAS_I444ALPHATOARGBROW_MSA
111 ANY41C(I444AlphaToARGBRow_Any_MSA, I444AlphaToARGBRow_MSA, 0, 0, 4, 7)
112 #endif
113 #ifdef HAS_I422ALPHATOARGBROW_MSA
114 ANY41C(I422AlphaToARGBRow_Any_MSA, I422AlphaToARGBRow_MSA, 1, 0, 4, 7)
115 #endif
116 #ifdef HAS_I444ALPHATOARGBROW_MMI
117 ANY41C(I444AlphaToARGBRow_Any_MMI, I444AlphaToARGBRow_MMI, 0, 0, 4, 7)
118 #endif
119 #ifdef HAS_I422ALPHATOARGBROW_MMI
120 ANY41C(I422AlphaToARGBRow_Any_MMI, I422AlphaToARGBRow_MMI, 1, 0, 4, 7)
121 #endif
122 #undef ANY41C
123 
124 // Any 4 planes to 1 plane of 8 bit with yuvconstants
125 #define ANY41CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK)      \
126   void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, const T* a_buf, \
127                uint8_t* dst_ptr, const struct YuvConstants* yuvconstants,      \
128                int width) {                                                    \
129     SIMD_ALIGNED(T temp[16 * 4]);                                              \
130     SIMD_ALIGNED(uint8_t out[64]);                                             \
131     memset(temp, 0, 16 * 4 * SBPP); /* for YUY2 and msan */                    \
132     int r = width & MASK;                                                      \
133     int n = width & ~MASK;                                                     \
134     if (n > 0) {                                                               \
135       ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n);          \
136     }                                                                          \
137     memcpy(temp, y_buf + n, r * SBPP);                                         \
138     memcpy(temp + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP);          \
139     memcpy(temp + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP);          \
140     memcpy(temp + 48, a_buf + n, r * SBPP);                                    \
141     ANY_SIMD(temp, temp + 16, temp + 32, temp + 48, out, yuvconstants,         \
142              MASK + 1);                                                        \
143     memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP);       \
144   }
145 
146 #ifdef HAS_I210ALPHATOARGBROW_SSSE3
147 ANY41CT(I210AlphaToARGBRow_Any_SSSE3,
148         I210AlphaToARGBRow_SSSE3,
149         1,
150         0,
151         uint16_t,
152         2,
153         4,
154         7)
155 #endif
156 
157 #ifdef HAS_I210ALPHATOARGBROW_AVX2
158 ANY41CT(I210AlphaToARGBRow_Any_AVX2,
159         I210AlphaToARGBRow_AVX2,
160         1,
161         0,
162         uint16_t,
163         2,
164         4,
165         15)
166 #endif
167 
168 #ifdef HAS_I410ALPHATOARGBROW_SSSE3
169 ANY41CT(I410AlphaToARGBRow_Any_SSSE3,
170         I410AlphaToARGBRow_SSSE3,
171         0,
172         0,
173         uint16_t,
174         2,
175         4,
176         7)
177 #endif
178 
179 #ifdef HAS_I410ALPHATOARGBROW_AVX2
180 ANY41CT(I410AlphaToARGBRow_Any_AVX2,
181         I410AlphaToARGBRow_AVX2,
182         0,
183         0,
184         uint16_t,
185         2,
186         4,
187         15)
188 #endif
189 
190 #undef ANY41CT
191 
192 // Any 4 planes to 1 plane with parameter
193 #define ANY41PT(NAMEANY, ANY_SIMD, STYPE, SBPP, DTYPE, BPP, MASK)          \
194   void NAMEANY(const STYPE* r_buf, const STYPE* g_buf, const STYPE* b_buf, \
195                const STYPE* a_buf, DTYPE* dst_ptr, int depth, int width) { \
196     SIMD_ALIGNED(STYPE temp[16 * 4]);                                      \
197     SIMD_ALIGNED(DTYPE out[64]);                                           \
198     memset(temp, 0, 16 * 4 * SBPP); /* for YUY2 and msan */                \
199     int r = width & MASK;                                                  \
200     int n = width & ~MASK;                                                 \
201     if (n > 0) {                                                           \
202       ANY_SIMD(r_buf, g_buf, b_buf, a_buf, dst_ptr, depth, n);             \
203     }                                                                      \
204     memcpy(temp, r_buf + n, r * SBPP);                                     \
205     memcpy(temp + 16, g_buf + n, r * SBPP);                                \
206     memcpy(temp + 32, b_buf + n, r * SBPP);                                \
207     memcpy(temp + 48, a_buf + n, r * SBPP);                                \
208     ANY_SIMD(temp, temp + 16, temp + 32, temp + 48, out, depth, MASK + 1); \
209     memcpy((uint8_t*)dst_ptr + n * BPP, out, r * BPP);                     \
210   }
211 
212 #ifdef HAS_MERGEAR64ROW_AVX2
213 ANY41PT(MergeAR64Row_Any_AVX2, MergeAR64Row_AVX2, uint16_t, 2, uint16_t, 8, 15)
214 #endif
215 
216 #ifdef HAS_MERGEAR64ROW_NEON
217 ANY41PT(MergeAR64Row_Any_NEON, MergeAR64Row_NEON, uint16_t, 2, uint16_t, 8, 7)
218 #endif
219 
220 #ifdef HAS_MERGEARGB16TO8ROW_AVX2
221 ANY41PT(MergeARGB16To8Row_Any_AVX2,
222         MergeARGB16To8Row_AVX2,
223         uint16_t,
224         2,
225         uint8_t,
226         4,
227         15)
228 #endif
229 
230 #ifdef HAS_MERGEARGB16TO8ROW_NEON
231 ANY41PT(MergeARGB16To8Row_Any_NEON,
232         MergeARGB16To8Row_NEON,
233         uint16_t,
234         2,
235         uint8_t,
236         4,
237         7)
238 #endif
239 
240 #undef ANY41PT
241 
242 // Any 3 planes to 1.
243 #define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK)      \
244   void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf,          \
245                const uint8_t* v_buf, uint8_t* dst_ptr, int width) { \
246     SIMD_ALIGNED(uint8_t temp[64 * 4]);                             \
247     memset(temp, 0, 64 * 3); /* for YUY2 and msan */                \
248     int r = width & MASK;                                           \
249     int n = width & ~MASK;                                          \
250     if (n > 0) {                                                    \
251       ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n);                    \
252     }                                                               \
253     memcpy(temp, y_buf + n, r);                                     \
254     memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));      \
255     memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));     \
256     ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, MASK + 1);    \
257     memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192,             \
258            SS(r, DUVSHIFT) * BPP);                                  \
259   }
260 
261 // Merge functions.
262 #ifdef HAS_MERGERGBROW_SSSE3
263 ANY31(MergeRGBRow_Any_SSSE3, MergeRGBRow_SSSE3, 0, 0, 3, 15)
264 #endif
265 #ifdef HAS_MERGERGBROW_NEON
266 ANY31(MergeRGBRow_Any_NEON, MergeRGBRow_NEON, 0, 0, 3, 15)
267 #endif
268 #ifdef HAS_MERGERGBROW_MMI
269 ANY31(MergeRGBRow_Any_MMI, MergeRGBRow_MMI, 0, 0, 3, 7)
270 #endif
271 #ifdef HAS_MERGEXRGBROW_SSE2
272 ANY31(MergeXRGBRow_Any_SSE2, MergeXRGBRow_SSE2, 0, 0, 4, 7)
273 #endif
274 #ifdef HAS_MERGEXRGBROW_AVX2
275 ANY31(MergeXRGBRow_Any_AVX2, MergeXRGBRow_AVX2, 0, 0, 4, 15)
276 #endif
277 #ifdef HAS_MERGEXRGBROW_NEON
278 ANY31(MergeXRGBRow_Any_NEON, MergeXRGBRow_NEON, 0, 0, 4, 15)
279 #endif
280 #ifdef HAS_I422TOYUY2ROW_SSE2
281 ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15)
282 ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15)
283 #endif
284 #ifdef HAS_I422TOYUY2ROW_AVX2
285 ANY31(I422ToYUY2Row_Any_AVX2, I422ToYUY2Row_AVX2, 1, 1, 4, 31)
286 ANY31(I422ToUYVYRow_Any_AVX2, I422ToUYVYRow_AVX2, 1, 1, 4, 31)
287 #endif
288 #ifdef HAS_I422TOYUY2ROW_NEON
289 ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15)
290 #endif
291 #ifdef HAS_I422TOYUY2ROW_MSA
292 ANY31(I422ToYUY2Row_Any_MSA, I422ToYUY2Row_MSA, 1, 1, 4, 31)
293 #endif
294 #ifdef HAS_I422TOYUY2ROW_MMI
295 ANY31(I422ToYUY2Row_Any_MMI, I422ToYUY2Row_MMI, 1, 1, 4, 7)
296 #endif
297 #ifdef HAS_I422TOUYVYROW_NEON
298 ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15)
299 #endif
300 #ifdef HAS_I422TOUYVYROW_MSA
301 ANY31(I422ToUYVYRow_Any_MSA, I422ToUYVYRow_MSA, 1, 1, 4, 31)
302 #endif
303 #ifdef HAS_I422TOUYVYROW_MMI
304 ANY31(I422ToUYVYRow_Any_MMI, I422ToUYVYRow_MMI, 1, 1, 4, 7)
305 #endif
306 #ifdef HAS_BLENDPLANEROW_AVX2
307 ANY31(BlendPlaneRow_Any_AVX2, BlendPlaneRow_AVX2, 0, 0, 1, 31)
308 #endif
309 #ifdef HAS_BLENDPLANEROW_SSSE3
310 ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7)
311 #endif
312 #ifdef HAS_BLENDPLANEROW_MMI
313 ANY31(BlendPlaneRow_Any_MMI, BlendPlaneRow_MMI, 0, 0, 1, 7)
314 #endif
315 #undef ANY31
316 
317 // Note that odd width replication includes 444 due to implementation
318 // on arm that subsamples 444 to 422 internally.
319 // Any 3 planes to 1 with yuvconstants
320 #define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK)      \
321   void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf,           \
322                const uint8_t* v_buf, uint8_t* dst_ptr,               \
323                const struct YuvConstants* yuvconstants, int width) { \
324     SIMD_ALIGNED(uint8_t temp[128 * 4]);                             \
325     memset(temp, 0, 128 * 3); /* for YUY2 and msan */                \
326     int r = width & MASK;                                            \
327     int n = width & ~MASK;                                           \
328     if (n > 0) {                                                     \
329       ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n);       \
330     }                                                                \
331     memcpy(temp, y_buf + n, r);                                      \
332     memcpy(temp + 128, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));      \
333     memcpy(temp + 256, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));      \
334     if (width & 1) {                                                 \
335       temp[128 + SS(r, UVSHIFT)] = temp[128 + SS(r, UVSHIFT) - 1];   \
336       temp[256 + SS(r, UVSHIFT)] = temp[256 + SS(r, UVSHIFT) - 1];   \
337     }                                                                \
338     ANY_SIMD(temp, temp + 128, temp + 256, temp + 384, yuvconstants, \
339              MASK + 1);                                              \
340     memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 384,              \
341            SS(r, DUVSHIFT) * BPP);                                   \
342   }
343 
344 #ifdef HAS_I422TOARGBROW_SSSE3
345 ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7)
346 #endif
347 #ifdef HAS_I422TORGBAROW_SSSE3
348 ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7)
349 #endif
350 #ifdef HAS_I422TOARGB4444ROW_SSSE3
351 ANY31C(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7)
352 #endif
353 #ifdef HAS_I422TOARGB1555ROW_SSSE3
354 ANY31C(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7)
355 #endif
356 #ifdef HAS_I422TORGB565ROW_SSSE3
357 ANY31C(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7)
358 #endif
359 #ifdef HAS_I422TORGB24ROW_SSSE3
360 ANY31C(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 15)
361 #endif
362 #ifdef HAS_I422TOAR30ROW_SSSE3
363 ANY31C(I422ToAR30Row_Any_SSSE3, I422ToAR30Row_SSSE3, 1, 0, 4, 7)
364 #endif
365 #ifdef HAS_I422TOAR30ROW_AVX2
366 ANY31C(I422ToAR30Row_Any_AVX2, I422ToAR30Row_AVX2, 1, 0, 4, 15)
367 #endif
368 #ifdef HAS_I444TOARGBROW_SSSE3
369 ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7)
370 #endif
371 #ifdef HAS_I422TORGB24ROW_AVX2
372 ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 31)
373 #endif
374 #ifdef HAS_I422TOARGBROW_AVX2
375 ANY31C(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15)
376 #endif
377 #ifdef HAS_I422TORGBAROW_AVX2
378 ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15)
379 #endif
380 #ifdef HAS_I444TOARGBROW_AVX2
381 ANY31C(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15)
382 #endif
383 #ifdef HAS_I422TOARGB4444ROW_AVX2
384 ANY31C(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 15)
385 #endif
386 #ifdef HAS_I422TOARGB1555ROW_AVX2
387 ANY31C(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 15)
388 #endif
389 #ifdef HAS_I422TORGB565ROW_AVX2
390 ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 15)
391 #endif
392 #ifdef HAS_I422TOARGBROW_NEON
393 ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7)
394 ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7)
395 ANY31C(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7)
396 ANY31C(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7)
397 ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7)
398 ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7)
399 ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7)
400 #endif
401 #ifdef HAS_I422TOARGBROW_MSA
402 ANY31C(I444ToARGBRow_Any_MSA, I444ToARGBRow_MSA, 0, 0, 4, 7)
403 ANY31C(I422ToARGBRow_Any_MSA, I422ToARGBRow_MSA, 1, 0, 4, 7)
404 ANY31C(I422ToRGBARow_Any_MSA, I422ToRGBARow_MSA, 1, 0, 4, 7)
405 ANY31C(I422ToRGB24Row_Any_MSA, I422ToRGB24Row_MSA, 1, 0, 3, 15)
406 ANY31C(I422ToARGB4444Row_Any_MSA, I422ToARGB4444Row_MSA, 1, 0, 2, 7)
407 ANY31C(I422ToARGB1555Row_Any_MSA, I422ToARGB1555Row_MSA, 1, 0, 2, 7)
408 ANY31C(I422ToRGB565Row_Any_MSA, I422ToRGB565Row_MSA, 1, 0, 2, 7)
409 #endif
410 #ifdef HAS_I422TOARGBROW_MMI
411 ANY31C(I444ToARGBRow_Any_MMI, I444ToARGBRow_MMI, 0, 0, 4, 7)
412 ANY31C(I422ToARGBRow_Any_MMI, I422ToARGBRow_MMI, 1, 0, 4, 7)
413 ANY31C(I422ToRGB24Row_Any_MMI, I422ToRGB24Row_MMI, 1, 0, 3, 15)
414 ANY31C(I422ToARGB4444Row_Any_MMI, I422ToARGB4444Row_MMI, 1, 0, 2, 7)
415 ANY31C(I422ToARGB1555Row_Any_MMI, I422ToARGB1555Row_MMI, 1, 0, 2, 7)
416 ANY31C(I422ToRGB565Row_Any_MMI, I422ToRGB565Row_MMI, 1, 0, 2, 7)
417 ANY31C(I422ToRGBARow_Any_MMI, I422ToRGBARow_MMI, 1, 0, 4, 7)
418 #endif
419 #undef ANY31C
420 
421 // Any 3 planes of 16 bit to 1 with yuvconstants
422 // TODO(fbarchard): consider sharing this code with ANY31C
423 #define ANY31CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \
424   void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf,            \
425                uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, \
426                int width) {                                               \
427     SIMD_ALIGNED(T temp[16 * 3]);                                         \
428     SIMD_ALIGNED(uint8_t out[64]);                                        \
429     memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */               \
430     int r = width & MASK;                                                 \
431     int n = width & ~MASK;                                                \
432     if (n > 0) {                                                          \
433       ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n);            \
434     }                                                                     \
435     memcpy(temp, y_buf + n, r * SBPP);                                    \
436     memcpy(temp + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP);     \
437     memcpy(temp + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP);     \
438     ANY_SIMD(temp, temp + 16, temp + 32, out, yuvconstants, MASK + 1);    \
439     memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP);  \
440   }
441 
442 #ifdef HAS_I210TOAR30ROW_SSSE3
443 ANY31CT(I210ToAR30Row_Any_SSSE3, I210ToAR30Row_SSSE3, 1, 0, uint16_t, 2, 4, 7)
444 #endif
445 #ifdef HAS_I210TOARGBROW_SSSE3
446 ANY31CT(I210ToARGBRow_Any_SSSE3, I210ToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7)
447 #endif
448 #ifdef HAS_I210TOARGBROW_AVX2
449 ANY31CT(I210ToARGBRow_Any_AVX2, I210ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15)
450 #endif
451 #ifdef HAS_I210TOAR30ROW_AVX2
452 ANY31CT(I210ToAR30Row_Any_AVX2, I210ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15)
453 #endif
454 #ifdef HAS_I410TOAR30ROW_SSSE3
455 ANY31CT(I410ToAR30Row_Any_SSSE3, I410ToAR30Row_SSSE3, 0, 0, uint16_t, 2, 4, 7)
456 #endif
457 #ifdef HAS_I410TOARGBROW_SSSE3
458 ANY31CT(I410ToARGBRow_Any_SSSE3, I410ToARGBRow_SSSE3, 0, 0, uint16_t, 2, 4, 7)
459 #endif
460 #ifdef HAS_I410TOARGBROW_AVX2
461 ANY31CT(I410ToARGBRow_Any_AVX2, I410ToARGBRow_AVX2, 0, 0, uint16_t, 2, 4, 15)
462 #endif
463 #ifdef HAS_I410TOAR30ROW_AVX2
464 ANY31CT(I410ToAR30Row_Any_AVX2, I410ToAR30Row_AVX2, 0, 0, uint16_t, 2, 4, 15)
465 #endif
466 #ifdef HAS_I210TOARGBROW_MMI
467 ANY31CT(I210ToARGBRow_Any_MMI, I210ToARGBRow_MMI, 1, 0, uint16_t, 2, 4, 7)
468 #endif
469 #ifdef HAS_I212TOAR30ROW_SSSE3
470 ANY31CT(I212ToAR30Row_Any_SSSE3, I212ToAR30Row_SSSE3, 1, 0, uint16_t, 2, 4, 7)
471 #endif
472 #ifdef HAS_I212TOARGBROW_SSSE3
473 ANY31CT(I212ToARGBRow_Any_SSSE3, I212ToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7)
474 #endif
475 #ifdef HAS_I212TOARGBROW_AVX2
476 ANY31CT(I212ToARGBRow_Any_AVX2, I212ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15)
477 #endif
478 #ifdef HAS_I212TOAR30ROW_AVX2
479 ANY31CT(I212ToAR30Row_Any_AVX2, I212ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15)
480 #endif
481 #undef ANY31CT
482 
483 // Any 3 planes to 1 plane with parameter
484 #define ANY31PT(NAMEANY, ANY_SIMD, STYPE, SBPP, DTYPE, BPP, MASK)          \
485   void NAMEANY(const STYPE* r_buf, const STYPE* g_buf, const STYPE* b_buf, \
486                DTYPE* dst_ptr, int depth, int width) {                     \
487     SIMD_ALIGNED(STYPE temp[16 * 3]);                                      \
488     SIMD_ALIGNED(DTYPE out[64]);                                           \
489     memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */                \
490     int r = width & MASK;                                                  \
491     int n = width & ~MASK;                                                 \
492     if (n > 0) {                                                           \
493       ANY_SIMD(r_buf, g_buf, b_buf, dst_ptr, depth, n);                    \
494     }                                                                      \
495     memcpy(temp, r_buf + n, r * SBPP);                                     \
496     memcpy(temp + 16, g_buf + n, r * SBPP);                                \
497     memcpy(temp + 32, b_buf + n, r * SBPP);                                \
498     ANY_SIMD(temp, temp + 16, temp + 32, out, depth, MASK + 1);            \
499     memcpy((uint8_t*)dst_ptr + n * BPP, out, r * BPP);                     \
500   }
501 
502 #ifdef HAS_MERGEXR30ROW_AVX2
503 ANY31PT(MergeXR30Row_Any_AVX2, MergeXR30Row_AVX2, uint16_t, 2, uint8_t, 4, 15)
504 #endif
505 
506 #ifdef HAS_MERGEXR30ROW_NEON
507 ANY31PT(MergeXR30Row_Any_NEON, MergeXR30Row_NEON, uint16_t, 2, uint8_t, 4, 3)
508 ANY31PT(MergeXR30Row_10_Any_NEON,
509         MergeXR30Row_10_NEON,
510         uint16_t,
511         2,
512         uint8_t,
513         4,
514         3)
515 #endif
516 
517 #ifdef HAS_MERGEXR64ROW_AVX2
518 ANY31PT(MergeXR64Row_Any_AVX2, MergeXR64Row_AVX2, uint16_t, 2, uint16_t, 8, 15)
519 #endif
520 
521 #ifdef HAS_MERGEXR64ROW_NEON
522 ANY31PT(MergeXR64Row_Any_NEON, MergeXR64Row_NEON, uint16_t, 2, uint16_t, 8, 7)
523 #endif
524 
525 #ifdef HAS_MERGEXRGB16TO8ROW_AVX2
526 ANY31PT(MergeXRGB16To8Row_Any_AVX2,
527         MergeXRGB16To8Row_AVX2,
528         uint16_t,
529         2,
530         uint8_t,
531         4,
532         15)
533 #endif
534 
535 #ifdef HAS_MERGEXRGB16TO8ROW_NEON
536 ANY31PT(MergeXRGB16To8Row_Any_NEON,
537         MergeXRGB16To8Row_NEON,
538         uint16_t,
539         2,
540         uint8_t,
541         4,
542         7)
543 #endif
544 
545 #undef ANY31PT
546 
547 // Any 2 planes to 1.
548 #define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK)             \
549   void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \
550                int width) {                                                   \
551     SIMD_ALIGNED(uint8_t temp[64 * 3]);                                       \
552     memset(temp, 0, 64 * 2); /* for msan */                                   \
553     int r = width & MASK;                                                     \
554     int n = width & ~MASK;                                                    \
555     if (n > 0) {                                                              \
556       ANY_SIMD(y_buf, uv_buf, dst_ptr, n);                                    \
557     }                                                                         \
558     memcpy(temp, y_buf + n * SBPP, r * SBPP);                                 \
559     memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2,                        \
560            SS(r, UVSHIFT) * SBPP2);                                           \
561     ANY_SIMD(temp, temp + 64, temp + 128, MASK + 1);                          \
562     memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                           \
563   }
564 
565 // Merge functions.
566 #ifdef HAS_MERGEUVROW_SSE2
567 ANY21(MergeUVRow_Any_SSE2, MergeUVRow_SSE2, 0, 1, 1, 2, 15)
568 #endif
569 #ifdef HAS_MERGEUVROW_AVX2
570 ANY21(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, 0, 1, 1, 2, 31)
571 #endif
572 #ifdef HAS_MERGEUVROW_NEON
573 ANY21(MergeUVRow_Any_NEON, MergeUVRow_NEON, 0, 1, 1, 2, 15)
574 #endif
575 #ifdef HAS_MERGEUVROW_MSA
576 ANY21(MergeUVRow_Any_MSA, MergeUVRow_MSA, 0, 1, 1, 2, 15)
577 #endif
578 #ifdef HAS_MERGEUVROW_MMI
579 ANY21(MergeUVRow_Any_MMI, MergeUVRow_MMI, 0, 1, 1, 2, 7)
580 #endif
581 #ifdef HAS_NV21TOYUV24ROW_NEON
582 ANY21(NV21ToYUV24Row_Any_NEON, NV21ToYUV24Row_NEON, 1, 1, 2, 3, 15)
583 #endif
584 #ifdef HAS_NV21TOYUV24ROW_AVX2
585 ANY21(NV21ToYUV24Row_Any_AVX2, NV21ToYUV24Row_AVX2, 1, 1, 2, 3, 31)
586 #endif
587 // Math functions.
588 #ifdef HAS_ARGBMULTIPLYROW_SSE2
589 ANY21(ARGBMultiplyRow_Any_SSE2, ARGBMultiplyRow_SSE2, 0, 4, 4, 4, 3)
590 #endif
591 #ifdef HAS_ARGBADDROW_SSE2
592 ANY21(ARGBAddRow_Any_SSE2, ARGBAddRow_SSE2, 0, 4, 4, 4, 3)
593 #endif
594 #ifdef HAS_ARGBSUBTRACTROW_SSE2
595 ANY21(ARGBSubtractRow_Any_SSE2, ARGBSubtractRow_SSE2, 0, 4, 4, 4, 3)
596 #endif
597 #ifdef HAS_ARGBMULTIPLYROW_AVX2
598 ANY21(ARGBMultiplyRow_Any_AVX2, ARGBMultiplyRow_AVX2, 0, 4, 4, 4, 7)
599 #endif
600 #ifdef HAS_ARGBADDROW_AVX2
601 ANY21(ARGBAddRow_Any_AVX2, ARGBAddRow_AVX2, 0, 4, 4, 4, 7)
602 #endif
603 #ifdef HAS_ARGBSUBTRACTROW_AVX2
604 ANY21(ARGBSubtractRow_Any_AVX2, ARGBSubtractRow_AVX2, 0, 4, 4, 4, 7)
605 #endif
606 #ifdef HAS_ARGBMULTIPLYROW_NEON
607 ANY21(ARGBMultiplyRow_Any_NEON, ARGBMultiplyRow_NEON, 0, 4, 4, 4, 7)
608 #endif
609 #ifdef HAS_ARGBADDROW_NEON
610 ANY21(ARGBAddRow_Any_NEON, ARGBAddRow_NEON, 0, 4, 4, 4, 7)
611 #endif
612 #ifdef HAS_ARGBSUBTRACTROW_NEON
613 ANY21(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, 0, 4, 4, 4, 7)
614 #endif
615 #ifdef HAS_ARGBMULTIPLYROW_MSA
616 ANY21(ARGBMultiplyRow_Any_MSA, ARGBMultiplyRow_MSA, 0, 4, 4, 4, 3)
617 #endif
618 #ifdef HAS_ARGBMULTIPLYROW_MMI
619 ANY21(ARGBMultiplyRow_Any_MMI, ARGBMultiplyRow_MMI, 0, 4, 4, 4, 1)
620 #endif
621 #ifdef HAS_ARGBADDROW_MSA
622 ANY21(ARGBAddRow_Any_MSA, ARGBAddRow_MSA, 0, 4, 4, 4, 7)
623 #endif
624 #ifdef HAS_ARGBADDROW_MMI
625 ANY21(ARGBAddRow_Any_MMI, ARGBAddRow_MMI, 0, 4, 4, 4, 1)
626 #endif
627 #ifdef HAS_ARGBSUBTRACTROW_MSA
628 ANY21(ARGBSubtractRow_Any_MSA, ARGBSubtractRow_MSA, 0, 4, 4, 4, 7)
629 #endif
630 #ifdef HAS_ARGBSUBTRACTROW_MMI
631 ANY21(ARGBSubtractRow_Any_MMI, ARGBSubtractRow_MMI, 0, 4, 4, 4, 1)
632 #endif
633 #ifdef HAS_SOBELROW_SSE2
634 ANY21(SobelRow_Any_SSE2, SobelRow_SSE2, 0, 1, 1, 4, 15)
635 #endif
636 #ifdef HAS_SOBELROW_NEON
637 ANY21(SobelRow_Any_NEON, SobelRow_NEON, 0, 1, 1, 4, 7)
638 #endif
639 #ifdef HAS_SOBELROW_MSA
640 ANY21(SobelRow_Any_MSA, SobelRow_MSA, 0, 1, 1, 4, 15)
641 #endif
642 #ifdef HAS_SOBELROW_MMI
643 ANY21(SobelRow_Any_MMI, SobelRow_MMI, 0, 1, 1, 4, 7)
644 #endif
645 #ifdef HAS_SOBELTOPLANEROW_SSE2
646 ANY21(SobelToPlaneRow_Any_SSE2, SobelToPlaneRow_SSE2, 0, 1, 1, 1, 15)
647 #endif
648 #ifdef HAS_SOBELTOPLANEROW_NEON
649 ANY21(SobelToPlaneRow_Any_NEON, SobelToPlaneRow_NEON, 0, 1, 1, 1, 15)
650 #endif
651 #ifdef HAS_SOBELTOPLANEROW_MSA
652 ANY21(SobelToPlaneRow_Any_MSA, SobelToPlaneRow_MSA, 0, 1, 1, 1, 31)
653 #endif
654 #ifdef HAS_SOBELTOPLANEROW_MMI
655 ANY21(SobelToPlaneRow_Any_MMI, SobelToPlaneRow_MMI, 0, 1, 1, 1, 7)
656 #endif
657 #ifdef HAS_SOBELXYROW_SSE2
658 ANY21(SobelXYRow_Any_SSE2, SobelXYRow_SSE2, 0, 1, 1, 4, 15)
659 #endif
660 #ifdef HAS_SOBELXYROW_NEON
661 ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7)
662 #endif
663 #ifdef HAS_SOBELXYROW_MSA
664 ANY21(SobelXYRow_Any_MSA, SobelXYRow_MSA, 0, 1, 1, 4, 15)
665 #endif
666 #ifdef HAS_SOBELXYROW_MMI
667 ANY21(SobelXYRow_Any_MMI, SobelXYRow_MMI, 0, 1, 1, 4, 7)
668 #endif
669 #undef ANY21
670 
671 // Any 2 planes to 1 with yuvconstants
672 #define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK)            \
673   void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \
674                const struct YuvConstants* yuvconstants, int width) {          \
675     SIMD_ALIGNED(uint8_t temp[128 * 3]);                                      \
676     memset(temp, 0, 128 * 2); /* for msan */                                  \
677     int r = width & MASK;                                                     \
678     int n = width & ~MASK;                                                    \
679     if (n > 0) {                                                              \
680       ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n);                      \
681     }                                                                         \
682     memcpy(temp, y_buf + n * SBPP, r * SBPP);                                 \
683     memcpy(temp + 128, uv_buf + (n >> UVSHIFT) * SBPP2,                       \
684            SS(r, UVSHIFT) * SBPP2);                                           \
685     ANY_SIMD(temp, temp + 128, temp + 256, yuvconstants, MASK + 1);           \
686     memcpy(dst_ptr + n * BPP, temp + 256, r * BPP);                           \
687   }
688 
689 // Biplanar to RGB.
690 #ifdef HAS_NV12TOARGBROW_SSSE3
691 ANY21C(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
692 #endif
693 #ifdef HAS_NV12TOARGBROW_AVX2
694 ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15)
695 #endif
696 #ifdef HAS_NV12TOARGBROW_NEON
697 ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
698 #endif
699 #ifdef HAS_NV12TOARGBROW_MSA
700 ANY21C(NV12ToARGBRow_Any_MSA, NV12ToARGBRow_MSA, 1, 1, 2, 4, 7)
701 #endif
702 #ifdef HAS_NV12TOARGBROW_MMI
703 ANY21C(NV12ToARGBRow_Any_MMI, NV12ToARGBRow_MMI, 1, 1, 2, 4, 7)
704 #endif
705 #ifdef HAS_NV21TOARGBROW_SSSE3
706 ANY21C(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
707 #endif
708 #ifdef HAS_NV21TOARGBROW_AVX2
709 ANY21C(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, 1, 1, 2, 4, 15)
710 #endif
711 #ifdef HAS_NV21TOARGBROW_NEON
712 ANY21C(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7)
713 #endif
714 #ifdef HAS_NV21TOARGBROW_MSA
715 ANY21C(NV21ToARGBRow_Any_MSA, NV21ToARGBRow_MSA, 1, 1, 2, 4, 7)
716 #endif
717 #ifdef HAS_NV21TOARGBROW_MMI
718 ANY21C(NV21ToARGBRow_Any_MMI, NV21ToARGBRow_MMI, 1, 1, 2, 4, 7)
719 #endif
720 #ifdef HAS_NV12TORGB24ROW_NEON
721 ANY21C(NV12ToRGB24Row_Any_NEON, NV12ToRGB24Row_NEON, 1, 1, 2, 3, 7)
722 #endif
723 #ifdef HAS_NV21TORGB24ROW_NEON
724 ANY21C(NV21ToRGB24Row_Any_NEON, NV21ToRGB24Row_NEON, 1, 1, 2, 3, 7)
725 #endif
726 #ifdef HAS_NV12TORGB24ROW_SSSE3
727 ANY21C(NV12ToRGB24Row_Any_SSSE3, NV12ToRGB24Row_SSSE3, 1, 1, 2, 3, 15)
728 #endif
729 #ifdef HAS_NV12TORGB24ROW_MMI
730 ANY21C(NV12ToRGB24Row_Any_MMI, NV12ToRGB24Row_MMI, 1, 1, 2, 3, 7)
731 #endif
732 #ifdef HAS_NV21TORGB24ROW_SSSE3
733 ANY21C(NV21ToRGB24Row_Any_SSSE3, NV21ToRGB24Row_SSSE3, 1, 1, 2, 3, 15)
734 #endif
735 #ifdef HAS_NV12TORGB24ROW_AVX2
736 ANY21C(NV12ToRGB24Row_Any_AVX2, NV12ToRGB24Row_AVX2, 1, 1, 2, 3, 31)
737 #endif
738 #ifdef HAS_NV21TORGB24ROW_AVX2
739 ANY21C(NV21ToRGB24Row_Any_AVX2, NV21ToRGB24Row_AVX2, 1, 1, 2, 3, 31)
740 #endif
741 #ifdef HAS_NV21TORGB24ROW_MMI
742 ANY21C(NV21ToRGB24Row_Any_MMI, NV21ToRGB24Row_MMI, 1, 1, 2, 3, 7)
743 #endif
744 #ifdef HAS_NV12TORGB565ROW_SSSE3
745 ANY21C(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
746 #endif
747 #ifdef HAS_NV12TORGB565ROW_AVX2
748 ANY21C(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15)
749 #endif
750 #ifdef HAS_NV12TORGB565ROW_NEON
751 ANY21C(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7)
752 #endif
753 #ifdef HAS_NV12TORGB565ROW_MSA
754 ANY21C(NV12ToRGB565Row_Any_MSA, NV12ToRGB565Row_MSA, 1, 1, 2, 2, 7)
755 #endif
756 #ifdef HAS_NV12TORGB565ROW_MMI
757 ANY21C(NV12ToRGB565Row_Any_MMI, NV12ToRGB565Row_MMI, 1, 1, 2, 2, 7)
758 #endif
759 #undef ANY21C
760 
761 // Any 2 planes of 16 bit to 1 with yuvconstants
762 #define ANY21CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK)      \
763   void NAMEANY(const T* y_buf, const T* uv_buf, uint8_t* dst_ptr,              \
764                const struct YuvConstants* yuvconstants, int width) {           \
765     SIMD_ALIGNED(T temp[16 * 3]);                                              \
766     SIMD_ALIGNED(uint8_t out[64]);                                             \
767     memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */                    \
768     int r = width & MASK;                                                      \
769     int n = width & ~MASK;                                                     \
770     if (n > 0) {                                                               \
771       ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n);                       \
772     }                                                                          \
773     memcpy(temp, y_buf + n, r * SBPP);                                         \
774     memcpy(temp + 16, uv_buf + 2 * (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP * 2); \
775     ANY_SIMD(temp, temp + 16, out, yuvconstants, MASK + 1);                    \
776     memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP);       \
777   }
778 
779 #ifdef HAS_P210TOAR30ROW_SSSE3
780 ANY21CT(P210ToAR30Row_Any_SSSE3, P210ToAR30Row_SSSE3, 1, 0, uint16_t, 2, 4, 7)
781 #endif
782 #ifdef HAS_P210TOARGBROW_SSSE3
783 ANY21CT(P210ToARGBRow_Any_SSSE3, P210ToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7)
784 #endif
785 #ifdef HAS_P210TOARGBROW_AVX2
786 ANY21CT(P210ToARGBRow_Any_AVX2, P210ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15)
787 #endif
788 #ifdef HAS_P210TOAR30ROW_AVX2
789 ANY21CT(P210ToAR30Row_Any_AVX2, P210ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15)
790 #endif
791 #ifdef HAS_P410TOAR30ROW_SSSE3
792 ANY21CT(P410ToAR30Row_Any_SSSE3, P410ToAR30Row_SSSE3, 0, 0, uint16_t, 2, 4, 7)
793 #endif
794 #ifdef HAS_P410TOARGBROW_SSSE3
795 ANY21CT(P410ToARGBRow_Any_SSSE3, P410ToARGBRow_SSSE3, 0, 0, uint16_t, 2, 4, 7)
796 #endif
797 #ifdef HAS_P410TOARGBROW_AVX2
798 ANY21CT(P410ToARGBRow_Any_AVX2, P410ToARGBRow_AVX2, 0, 0, uint16_t, 2, 4, 15)
799 #endif
800 #ifdef HAS_P410TOAR30ROW_AVX2
801 ANY21CT(P410ToAR30Row_Any_AVX2, P410ToAR30Row_AVX2, 0, 0, uint16_t, 2, 4, 15)
802 #endif
803 
804 #undef ANY21CT
805 
806 // Any 2 16 bit planes with parameter to 1
807 #define ANY21PT(NAMEANY, ANY_SIMD, T, BPP, MASK)                     \
808   void NAMEANY(const T* src_u, const T* src_v, T* dst_uv, int depth, \
809                int width) {                                          \
810     SIMD_ALIGNED(T temp[16 * 4]);                                    \
811     memset(temp, 0, 16 * 4 * BPP); /* for msan */                    \
812     int r = width & MASK;                                            \
813     int n = width & ~MASK;                                           \
814     if (n > 0) {                                                     \
815       ANY_SIMD(src_u, src_v, dst_uv, depth, n);                      \
816     }                                                                \
817     memcpy(temp, src_u + n, r * BPP);                                \
818     memcpy(temp + 16, src_v + n, r * BPP);                           \
819     ANY_SIMD(temp, temp + 16, temp + 32, depth, MASK + 1);           \
820     memcpy(dst_uv + n * 2, temp + 32, r * BPP * 2);                  \
821   }
822 
823 #ifdef HAS_MERGEUVROW_16_AVX2
824 ANY21PT(MergeUVRow_16_Any_AVX2, MergeUVRow_16_AVX2, uint16_t, 2, 15)
825 #endif
826 #ifdef HAS_MERGEUVROW_16_NEON
827 ANY21PT(MergeUVRow_16_Any_NEON, MergeUVRow_16_NEON, uint16_t, 2, 7)
828 #endif
829 
830 #undef ANY21CT
831 
832 // Any 1 to 1.
833 #define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK)                \
834   void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) {     \
835     SIMD_ALIGNED(uint8_t temp[128 * 2]);                                  \
836     memset(temp, 0, 128); /* for YUY2 and msan */                         \
837     int r = width & MASK;                                                 \
838     int n = width & ~MASK;                                                \
839     if (n > 0) {                                                          \
840       ANY_SIMD(src_ptr, dst_ptr, n);                                      \
841     }                                                                     \
842     memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
843     ANY_SIMD(temp, temp + 128, MASK + 1);                                 \
844     memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                       \
845   }
846 
847 #ifdef HAS_COPYROW_AVX
848 ANY11(CopyRow_Any_AVX, CopyRow_AVX, 0, 1, 1, 63)
849 #endif
850 #ifdef HAS_COPYROW_SSE2
851 ANY11(CopyRow_Any_SSE2, CopyRow_SSE2, 0, 1, 1, 31)
852 #endif
853 #ifdef HAS_COPYROW_NEON
854 ANY11(CopyRow_Any_NEON, CopyRow_NEON, 0, 1, 1, 31)
855 #endif
856 #if defined(HAS_ARGBTORGB24ROW_SSSE3)
857 ANY11(ARGBToRGB24Row_Any_SSSE3, ARGBToRGB24Row_SSSE3, 0, 4, 3, 15)
858 ANY11(ARGBToRAWRow_Any_SSSE3, ARGBToRAWRow_SSSE3, 0, 4, 3, 15)
859 ANY11(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, 0, 4, 2, 3)
860 ANY11(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, 0, 4, 2, 3)
861 ANY11(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, 0, 4, 2, 3)
862 #endif
863 #if defined(HAS_ARGBTORGB24ROW_AVX2)
864 ANY11(ARGBToRGB24Row_Any_AVX2, ARGBToRGB24Row_AVX2, 0, 4, 3, 31)
865 #endif
866 #if defined(HAS_ARGBTORGB24ROW_AVX512VBMI)
867 ANY11(ARGBToRGB24Row_Any_AVX512VBMI, ARGBToRGB24Row_AVX512VBMI, 0, 4, 3, 31)
868 #endif
869 #if defined(HAS_ARGBTORAWROW_AVX2)
870 ANY11(ARGBToRAWRow_Any_AVX2, ARGBToRAWRow_AVX2, 0, 4, 3, 31)
871 #endif
872 #if defined(HAS_ARGBTORGB565ROW_AVX2)
873 ANY11(ARGBToRGB565Row_Any_AVX2, ARGBToRGB565Row_AVX2, 0, 4, 2, 7)
874 #endif
875 #if defined(HAS_ARGBTOARGB4444ROW_AVX2)
876 ANY11(ARGBToARGB1555Row_Any_AVX2, ARGBToARGB1555Row_AVX2, 0, 4, 2, 7)
877 ANY11(ARGBToARGB4444Row_Any_AVX2, ARGBToARGB4444Row_AVX2, 0, 4, 2, 7)
878 #endif
879 #if defined(HAS_ABGRTOAR30ROW_SSSE3)
880 ANY11(ABGRToAR30Row_Any_SSSE3, ABGRToAR30Row_SSSE3, 0, 4, 4, 3)
881 #endif
882 #if defined(HAS_ARGBTOAR30ROW_SSSE3)
883 ANY11(ARGBToAR30Row_Any_SSSE3, ARGBToAR30Row_SSSE3, 0, 4, 4, 3)
884 #endif
885 #if defined(HAS_ABGRTOAR30ROW_AVX2)
886 ANY11(ABGRToAR30Row_Any_AVX2, ABGRToAR30Row_AVX2, 0, 4, 4, 7)
887 #endif
888 #if defined(HAS_ARGBTOAR30ROW_AVX2)
889 ANY11(ARGBToAR30Row_Any_AVX2, ARGBToAR30Row_AVX2, 0, 4, 4, 7)
890 #endif
891 #if defined(HAS_J400TOARGBROW_SSE2)
892 ANY11(J400ToARGBRow_Any_SSE2, J400ToARGBRow_SSE2, 0, 1, 4, 7)
893 #endif
894 #if defined(HAS_J400TOARGBROW_AVX2)
895 ANY11(J400ToARGBRow_Any_AVX2, J400ToARGBRow_AVX2, 0, 1, 4, 15)
896 #endif
897 #if defined(HAS_RGB24TOARGBROW_SSSE3)
898 ANY11(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 0, 3, 4, 15)
899 ANY11(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 0, 3, 4, 15)
900 ANY11(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 0, 2, 4, 7)
901 ANY11(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, 0, 2, 4, 7)
902 ANY11(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, 0, 2, 4, 7)
903 #endif
904 #if defined(HAS_RAWTORGBAROW_SSSE3)
905 ANY11(RAWToRGBARow_Any_SSSE3, RAWToRGBARow_SSSE3, 0, 3, 4, 15)
906 #endif
907 #if defined(HAS_RAWTORGB24ROW_SSSE3)
908 ANY11(RAWToRGB24Row_Any_SSSE3, RAWToRGB24Row_SSSE3, 0, 3, 3, 7)
909 #endif
910 #if defined(HAS_RGB565TOARGBROW_AVX2)
911 ANY11(RGB565ToARGBRow_Any_AVX2, RGB565ToARGBRow_AVX2, 0, 2, 4, 15)
912 #endif
913 #if defined(HAS_ARGB1555TOARGBROW_AVX2)
914 ANY11(ARGB1555ToARGBRow_Any_AVX2, ARGB1555ToARGBRow_AVX2, 0, 2, 4, 15)
915 #endif
916 #if defined(HAS_ARGB4444TOARGBROW_AVX2)
917 ANY11(ARGB4444ToARGBRow_Any_AVX2, ARGB4444ToARGBRow_AVX2, 0, 2, 4, 15)
918 #endif
919 #if defined(HAS_ARGBTORGB24ROW_NEON)
920 ANY11(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, 0, 4, 3, 7)
921 ANY11(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, 0, 4, 3, 7)
922 ANY11(ARGBToRGB565Row_Any_NEON, ARGBToRGB565Row_NEON, 0, 4, 2, 7)
923 ANY11(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, 0, 4, 2, 7)
924 ANY11(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, 0, 4, 2, 7)
925 ANY11(J400ToARGBRow_Any_NEON, J400ToARGBRow_NEON, 0, 1, 4, 7)
926 #endif
927 #if defined(HAS_ARGBTORGB24ROW_MSA)
928 ANY11(ARGBToRGB24Row_Any_MSA, ARGBToRGB24Row_MSA, 0, 4, 3, 15)
929 ANY11(ARGBToRAWRow_Any_MSA, ARGBToRAWRow_MSA, 0, 4, 3, 15)
930 ANY11(ARGBToRGB565Row_Any_MSA, ARGBToRGB565Row_MSA, 0, 4, 2, 7)
931 ANY11(ARGBToARGB1555Row_Any_MSA, ARGBToARGB1555Row_MSA, 0, 4, 2, 7)
932 ANY11(ARGBToARGB4444Row_Any_MSA, ARGBToARGB4444Row_MSA, 0, 4, 2, 7)
933 ANY11(J400ToARGBRow_Any_MSA, J400ToARGBRow_MSA, 0, 1, 4, 15)
934 #endif
935 #if defined(HAS_ARGBTORGB24ROW_MMI)
936 ANY11(ARGBToRGB24Row_Any_MMI, ARGBToRGB24Row_MMI, 0, 4, 3, 3)
937 ANY11(ARGBToRAWRow_Any_MMI, ARGBToRAWRow_MMI, 0, 4, 3, 3)
938 ANY11(ARGBToRGB565Row_Any_MMI, ARGBToRGB565Row_MMI, 0, 4, 2, 3)
939 ANY11(ARGBToARGB1555Row_Any_MMI, ARGBToARGB1555Row_MMI, 0, 4, 2, 3)
940 ANY11(ARGBToARGB4444Row_Any_MMI, ARGBToARGB4444Row_MMI, 0, 4, 2, 3)
941 ANY11(J400ToARGBRow_Any_MMI, J400ToARGBRow_MMI, 0, 1, 4, 3)
942 #endif
943 #if defined(HAS_RAWTORGB24ROW_NEON)
944 ANY11(RAWToRGB24Row_Any_NEON, RAWToRGB24Row_NEON, 0, 3, 3, 7)
945 #endif
946 #if defined(HAS_RAWTORGB24ROW_MSA)
947 ANY11(RAWToRGB24Row_Any_MSA, RAWToRGB24Row_MSA, 0, 3, 3, 15)
948 #endif
949 #if defined(HAS_RAWTORGB24ROW_MMI)
950 ANY11(RAWToRGB24Row_Any_MMI, RAWToRGB24Row_MMI, 0, 3, 3, 3)
951 #endif
952 #ifdef HAS_ARGBTOYROW_AVX2
953 ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31)
954 #endif
955 #ifdef HAS_ABGRTOYROW_AVX2
956 ANY11(ABGRToYRow_Any_AVX2, ABGRToYRow_AVX2, 0, 4, 1, 31)
957 #endif
958 #ifdef HAS_ARGBTOYJROW_AVX2
959 ANY11(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, 0, 4, 1, 31)
960 #endif
961 #ifdef HAS_RGBATOYJROW_AVX2
962 ANY11(RGBAToYJRow_Any_AVX2, RGBAToYJRow_AVX2, 0, 4, 1, 31)
963 #endif
964 #ifdef HAS_UYVYTOYROW_AVX2
965 ANY11(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, 0, 2, 1, 31)
966 #endif
967 #ifdef HAS_YUY2TOYROW_AVX2
968 ANY11(YUY2ToYRow_Any_AVX2, YUY2ToYRow_AVX2, 1, 4, 1, 31)
969 #endif
970 #ifdef HAS_ARGBTOYROW_SSSE3
971 ANY11(ARGBToYRow_Any_SSSE3, ARGBToYRow_SSSE3, 0, 4, 1, 15)
972 #endif
973 #ifdef HAS_BGRATOYROW_SSSE3
974 ANY11(BGRAToYRow_Any_SSSE3, BGRAToYRow_SSSE3, 0, 4, 1, 15)
975 ANY11(ABGRToYRow_Any_SSSE3, ABGRToYRow_SSSE3, 0, 4, 1, 15)
976 ANY11(RGBAToYRow_Any_SSSE3, RGBAToYRow_SSSE3, 0, 4, 1, 15)
977 ANY11(YUY2ToYRow_Any_SSE2, YUY2ToYRow_SSE2, 1, 4, 1, 15)
978 ANY11(UYVYToYRow_Any_SSE2, UYVYToYRow_SSE2, 1, 4, 1, 15)
979 #endif
980 #ifdef HAS_ARGBTOYJROW_SSSE3
981 ANY11(ARGBToYJRow_Any_SSSE3, ARGBToYJRow_SSSE3, 0, 4, 1, 15)
982 #endif
983 #ifdef HAS_RGBATOYJROW_SSSE3
984 ANY11(RGBAToYJRow_Any_SSSE3, RGBAToYJRow_SSSE3, 0, 4, 1, 15)
985 #endif
986 #ifdef HAS_ARGBTOYROW_NEON
987 ANY11(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 0, 4, 1, 7)
988 #endif
989 #ifdef HAS_ARGBTOYROW_MSA
990 ANY11(ARGBToYRow_Any_MSA, ARGBToYRow_MSA, 0, 4, 1, 15)
991 #endif
992 #ifdef HAS_ARGBTOYROW_MMI
993 ANY11(ARGBToYRow_Any_MMI, ARGBToYRow_MMI, 0, 4, 1, 7)
994 #endif
995 #ifdef HAS_ARGBTOYJROW_NEON
996 ANY11(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, 0, 4, 1, 7)
997 #endif
998 #ifdef HAS_RGBATOYJROW_NEON
999 ANY11(RGBAToYJRow_Any_NEON, RGBAToYJRow_NEON, 0, 4, 1, 7)
1000 #endif
1001 #ifdef HAS_ARGBTOYJROW_MSA
1002 ANY11(ARGBToYJRow_Any_MSA, ARGBToYJRow_MSA, 0, 4, 1, 15)
1003 #endif
1004 #ifdef HAS_ARGBTOYJROW_MMI
1005 ANY11(ARGBToYJRow_Any_MMI, ARGBToYJRow_MMI, 0, 4, 1, 7)
1006 #endif
1007 #ifdef HAS_BGRATOYROW_NEON
1008 ANY11(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 0, 4, 1, 7)
1009 #endif
1010 #ifdef HAS_BGRATOYROW_MSA
1011 ANY11(BGRAToYRow_Any_MSA, BGRAToYRow_MSA, 0, 4, 1, 15)
1012 #endif
1013 #ifdef HAS_BGRATOYROW_MMI
1014 ANY11(BGRAToYRow_Any_MMI, BGRAToYRow_MMI, 0, 4, 1, 7)
1015 #endif
1016 #ifdef HAS_ABGRTOYROW_NEON
1017 ANY11(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 0, 4, 1, 7)
1018 #endif
1019 #ifdef HAS_ABGRTOYROW_MSA
1020 ANY11(ABGRToYRow_Any_MSA, ABGRToYRow_MSA, 0, 4, 1, 7)
1021 #endif
1022 #ifdef HAS_ABGRTOYROW_MMI
1023 ANY11(ABGRToYRow_Any_MMI, ABGRToYRow_MMI, 0, 4, 1, 7)
1024 #endif
1025 #ifdef HAS_RGBATOYROW_NEON
1026 ANY11(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 0, 4, 1, 7)
1027 #endif
1028 #ifdef HAS_RGBATOYROW_MSA
1029 ANY11(RGBAToYRow_Any_MSA, RGBAToYRow_MSA, 0, 4, 1, 15)
1030 #endif
1031 #ifdef HAS_RGBATOYROW_MMI
1032 ANY11(RGBAToYRow_Any_MMI, RGBAToYRow_MMI, 0, 4, 1, 7)
1033 #endif
1034 #ifdef HAS_RGB24TOYROW_NEON
1035 ANY11(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 0, 3, 1, 7)
1036 #endif
1037 #ifdef HAS_RGB24TOYJROW_AVX2
1038 ANY11(RGB24ToYJRow_Any_AVX2, RGB24ToYJRow_AVX2, 0, 3, 1, 31)
1039 #endif
1040 #ifdef HAS_RGB24TOYJROW_SSSE3
1041 ANY11(RGB24ToYJRow_Any_SSSE3, RGB24ToYJRow_SSSE3, 0, 3, 1, 15)
1042 #endif
1043 #ifdef HAS_RGB24TOYJROW_NEON
1044 ANY11(RGB24ToYJRow_Any_NEON, RGB24ToYJRow_NEON, 0, 3, 1, 7)
1045 #endif
1046 #ifdef HAS_RGB24TOYROW_MSA
1047 ANY11(RGB24ToYRow_Any_MSA, RGB24ToYRow_MSA, 0, 3, 1, 15)
1048 #endif
1049 #ifdef HAS_RGB24TOYROW_MMI
1050 ANY11(RGB24ToYRow_Any_MMI, RGB24ToYRow_MMI, 0, 3, 1, 7)
1051 #endif
1052 #ifdef HAS_RAWTOYROW_NEON
1053 ANY11(RAWToYRow_Any_NEON, RAWToYRow_NEON, 0, 3, 1, 7)
1054 #endif
1055 #ifdef HAS_RAWTOYJROW_AVX2
1056 ANY11(RAWToYJRow_Any_AVX2, RAWToYJRow_AVX2, 0, 3, 1, 31)
1057 #endif
1058 #ifdef HAS_RAWTOYJROW_SSSE3
1059 ANY11(RAWToYJRow_Any_SSSE3, RAWToYJRow_SSSE3, 0, 3, 1, 15)
1060 #endif
1061 #ifdef HAS_RAWTOYJROW_NEON
1062 ANY11(RAWToYJRow_Any_NEON, RAWToYJRow_NEON, 0, 3, 1, 7)
1063 #endif
1064 #ifdef HAS_RAWTOYROW_MSA
1065 ANY11(RAWToYRow_Any_MSA, RAWToYRow_MSA, 0, 3, 1, 15)
1066 #endif
1067 #ifdef HAS_RAWTOYROW_MMI
1068 ANY11(RAWToYRow_Any_MMI, RAWToYRow_MMI, 0, 3, 1, 7)
1069 #endif
1070 #ifdef HAS_RGB565TOYROW_NEON
1071 ANY11(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 0, 2, 1, 7)
1072 #endif
1073 #ifdef HAS_RGB565TOYROW_MSA
1074 ANY11(RGB565ToYRow_Any_MSA, RGB565ToYRow_MSA, 0, 2, 1, 15)
1075 #endif
1076 #ifdef HAS_RGB565TOYROW_MMI
1077 ANY11(RGB565ToYRow_Any_MMI, RGB565ToYRow_MMI, 0, 2, 1, 7)
1078 #endif
1079 #ifdef HAS_ARGB1555TOYROW_NEON
1080 ANY11(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 0, 2, 1, 7)
1081 #endif
1082 #ifdef HAS_ARGB1555TOYROW_MSA
1083 ANY11(ARGB1555ToYRow_Any_MSA, ARGB1555ToYRow_MSA, 0, 2, 1, 15)
1084 #endif
1085 #ifdef HAS_ARGB1555TOYROW_MMI
1086 ANY11(ARGB1555ToYRow_Any_MMI, ARGB1555ToYRow_MMI, 0, 2, 1, 7)
1087 #endif
1088 #ifdef HAS_ARGB4444TOYROW_NEON
1089 ANY11(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 0, 2, 1, 7)
1090 #endif
1091 #ifdef HAS_ARGB4444TOYROW_MMI
1092 ANY11(ARGB4444ToYRow_Any_MMI, ARGB4444ToYRow_MMI, 0, 2, 1, 7)
1093 #endif
1094 #ifdef HAS_YUY2TOYROW_NEON
1095 ANY11(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 1, 4, 1, 15)
1096 #endif
1097 #ifdef HAS_UYVYTOYROW_NEON
1098 ANY11(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 1, 4, 1, 15)
1099 #endif
1100 #ifdef HAS_YUY2TOYROW_MSA
1101 ANY11(YUY2ToYRow_Any_MSA, YUY2ToYRow_MSA, 1, 4, 1, 31)
1102 #endif
1103 #ifdef HAS_YUY2TOYROW_MMI
1104 ANY11(YUY2ToYRow_Any_MMI, YUY2ToYRow_MMI, 1, 4, 1, 7)
1105 #endif
1106 #ifdef HAS_UYVYTOYROW_MSA
1107 ANY11(UYVYToYRow_Any_MSA, UYVYToYRow_MSA, 1, 4, 1, 31)
1108 #endif
1109 #ifdef HAS_UYVYTOYROW_MMI
1110 ANY11(UYVYToYRow_Any_MMI, UYVYToYRow_MMI, 1, 4, 1, 15)
1111 #endif
1112 #ifdef HAS_AYUVTOYROW_NEON
1113 ANY11(AYUVToYRow_Any_NEON, AYUVToYRow_NEON, 0, 4, 1, 15)
1114 #endif
1115 #ifdef HAS_SWAPUVROW_SSSE3
1116 ANY11(SwapUVRow_Any_SSSE3, SwapUVRow_SSSE3, 0, 2, 2, 15)
1117 #endif
1118 #ifdef HAS_SWAPUVROW_AVX2
1119 ANY11(SwapUVRow_Any_AVX2, SwapUVRow_AVX2, 0, 2, 2, 31)
1120 #endif
1121 #ifdef HAS_SWAPUVROW_NEON
1122 ANY11(SwapUVRow_Any_NEON, SwapUVRow_NEON, 0, 2, 2, 15)
1123 #endif
1124 #ifdef HAS_RGB24TOARGBROW_NEON
1125 ANY11(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 0, 3, 4, 7)
1126 #endif
1127 #ifdef HAS_RGB24TOARGBROW_MSA
1128 ANY11(RGB24ToARGBRow_Any_MSA, RGB24ToARGBRow_MSA, 0, 3, 4, 15)
1129 #endif
1130 #ifdef HAS_RGB24TOARGBROW_MMI
1131 ANY11(RGB24ToARGBRow_Any_MMI, RGB24ToARGBRow_MMI, 0, 3, 4, 3)
1132 #endif
1133 #ifdef HAS_RAWTOARGBROW_NEON
1134 ANY11(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 0, 3, 4, 7)
1135 #endif
1136 #ifdef HAS_RAWTORGBAROW_NEON
1137 ANY11(RAWToRGBARow_Any_NEON, RAWToRGBARow_NEON, 0, 3, 4, 7)
1138 #endif
1139 #ifdef HAS_RAWTOARGBROW_MSA
1140 ANY11(RAWToARGBRow_Any_MSA, RAWToARGBRow_MSA, 0, 3, 4, 15)
1141 #endif
1142 #ifdef HAS_RAWTOARGBROW_MMI
1143 ANY11(RAWToARGBRow_Any_MMI, RAWToARGBRow_MMI, 0, 3, 4, 3)
1144 #endif
1145 #ifdef HAS_RGB565TOARGBROW_NEON
1146 ANY11(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 0, 2, 4, 7)
1147 #endif
1148 #ifdef HAS_RGB565TOARGBROW_MSA
1149 ANY11(RGB565ToARGBRow_Any_MSA, RGB565ToARGBRow_MSA, 0, 2, 4, 15)
1150 #endif
1151 #ifdef HAS_RGB565TOARGBROW_MMI
1152 ANY11(RGB565ToARGBRow_Any_MMI, RGB565ToARGBRow_MMI, 0, 2, 4, 3)
1153 #endif
1154 #ifdef HAS_ARGB1555TOARGBROW_NEON
1155 ANY11(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 0, 2, 4, 7)
1156 #endif
1157 #ifdef HAS_ARGB1555TOARGBROW_MSA
1158 ANY11(ARGB1555ToARGBRow_Any_MSA, ARGB1555ToARGBRow_MSA, 0, 2, 4, 15)
1159 #endif
1160 #ifdef HAS_ARGB1555TOARGBROW_MMI
1161 ANY11(ARGB1555ToARGBRow_Any_MMI, ARGB1555ToARGBRow_MMI, 0, 2, 4, 3)
1162 #endif
1163 #ifdef HAS_ARGB4444TOARGBROW_NEON
1164 ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7)
1165 #endif
1166 #ifdef HAS_ARGB4444TOARGBROW_MSA
1167 ANY11(ARGB4444ToARGBRow_Any_MSA, ARGB4444ToARGBRow_MSA, 0, 2, 4, 15)
1168 #endif
1169 #ifdef HAS_ARGB4444TOARGBROW_MMI
1170 ANY11(ARGB4444ToARGBRow_Any_MMI, ARGB4444ToARGBRow_MMI, 0, 2, 4, 3)
1171 #endif
1172 #ifdef HAS_ARGBATTENUATEROW_SSSE3
1173 ANY11(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, 0, 4, 4, 3)
1174 #endif
1175 #ifdef HAS_ARGBUNATTENUATEROW_SSE2
1176 ANY11(ARGBUnattenuateRow_Any_SSE2, ARGBUnattenuateRow_SSE2, 0, 4, 4, 3)
1177 #endif
1178 #ifdef HAS_ARGBATTENUATEROW_AVX2
1179 ANY11(ARGBAttenuateRow_Any_AVX2, ARGBAttenuateRow_AVX2, 0, 4, 4, 7)
1180 #endif
1181 #ifdef HAS_ARGBUNATTENUATEROW_AVX2
1182 ANY11(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, 0, 4, 4, 7)
1183 #endif
1184 #ifdef HAS_ARGBATTENUATEROW_NEON
1185 ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7)
1186 #endif
1187 #ifdef HAS_ARGBATTENUATEROW_MSA
1188 ANY11(ARGBAttenuateRow_Any_MSA, ARGBAttenuateRow_MSA, 0, 4, 4, 7)
1189 #endif
1190 #ifdef HAS_ARGBATTENUATEROW_MMI
1191 ANY11(ARGBAttenuateRow_Any_MMI, ARGBAttenuateRow_MMI, 0, 4, 4, 1)
1192 #endif
1193 #ifdef HAS_ARGBEXTRACTALPHAROW_SSE2
1194 ANY11(ARGBExtractAlphaRow_Any_SSE2, ARGBExtractAlphaRow_SSE2, 0, 4, 1, 7)
1195 #endif
1196 #ifdef HAS_ARGBEXTRACTALPHAROW_AVX2
1197 ANY11(ARGBExtractAlphaRow_Any_AVX2, ARGBExtractAlphaRow_AVX2, 0, 4, 1, 31)
1198 #endif
1199 #ifdef HAS_ARGBEXTRACTALPHAROW_NEON
1200 ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 15)
1201 #endif
1202 #ifdef HAS_ARGBEXTRACTALPHAROW_MSA
1203 ANY11(ARGBExtractAlphaRow_Any_MSA, ARGBExtractAlphaRow_MSA, 0, 4, 1, 15)
1204 #endif
1205 #ifdef HAS_ARGBEXTRACTALPHAROW_MMI
1206 ANY11(ARGBExtractAlphaRow_Any_MMI, ARGBExtractAlphaRow_MMI, 0, 4, 1, 7)
1207 #endif
1208 #undef ANY11
1209 
1210 // Any 1 to 1 blended.  Destination is read, modify, write.
1211 #define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK)               \
1212   void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) {     \
1213     SIMD_ALIGNED(uint8_t temp[64 * 2]);                                   \
1214     memset(temp, 0, 64 * 2); /* for msan */                               \
1215     int r = width & MASK;                                                 \
1216     int n = width & ~MASK;                                                \
1217     if (n > 0) {                                                          \
1218       ANY_SIMD(src_ptr, dst_ptr, n);                                      \
1219     }                                                                     \
1220     memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
1221     memcpy(temp + 64, dst_ptr + n * BPP, r * BPP);                        \
1222     ANY_SIMD(temp, temp + 64, MASK + 1);                                  \
1223     memcpy(dst_ptr + n * BPP, temp + 64, r * BPP);                        \
1224   }
1225 
1226 #ifdef HAS_ARGBCOPYALPHAROW_AVX2
1227 ANY11B(ARGBCopyAlphaRow_Any_AVX2, ARGBCopyAlphaRow_AVX2, 0, 4, 4, 15)
1228 #endif
1229 #ifdef HAS_ARGBCOPYALPHAROW_SSE2
1230 ANY11B(ARGBCopyAlphaRow_Any_SSE2, ARGBCopyAlphaRow_SSE2, 0, 4, 4, 7)
1231 #endif
1232 #ifdef HAS_ARGBCOPYALPHAROW_MMI
1233 ANY11B(ARGBCopyAlphaRow_Any_MMI, ARGBCopyAlphaRow_MMI, 0, 4, 4, 1)
1234 #endif
1235 #ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2
1236 ANY11B(ARGBCopyYToAlphaRow_Any_AVX2, ARGBCopyYToAlphaRow_AVX2, 0, 1, 4, 15)
1237 #endif
1238 #ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
1239 ANY11B(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7)
1240 #endif
1241 #ifdef HAS_ARGBCOPYYTOALPHAROW_MMI
1242 ANY11B(ARGBCopyYToAlphaRow_Any_MMI, ARGBCopyYToAlphaRow_MMI, 0, 1, 4, 7)
1243 #endif
1244 #undef ANY11B
1245 
1246 // Any 1 to 1 with parameter.
1247 #define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK)                          \
1248   void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, T param, int width) { \
1249     SIMD_ALIGNED(uint8_t temp[64 * 2]);                                        \
1250     memset(temp, 0, 64); /* for msan */                                        \
1251     int r = width & MASK;                                                      \
1252     int n = width & ~MASK;                                                     \
1253     if (n > 0) {                                                               \
1254       ANY_SIMD(src_ptr, dst_ptr, param, n);                                    \
1255     }                                                                          \
1256     memcpy(temp, src_ptr + n * SBPP, r * SBPP);                                \
1257     ANY_SIMD(temp, temp + 64, param, MASK + 1);                                \
1258     memcpy(dst_ptr + n * BPP, temp + 64, r * BPP);                             \
1259   }
1260 
1261 #if defined(HAS_I400TOARGBROW_SSE2)
1262 ANY11P(I400ToARGBRow_Any_SSE2,
1263        I400ToARGBRow_SSE2,
1264        const struct YuvConstants*,
1265        1,
1266        4,
1267        7)
1268 #endif
1269 #if defined(HAS_I400TOARGBROW_AVX2)
1270 ANY11P(I400ToARGBRow_Any_AVX2,
1271        I400ToARGBRow_AVX2,
1272        const struct YuvConstants*,
1273        1,
1274        4,
1275        15)
1276 #endif
1277 #if defined(HAS_I400TOARGBROW_NEON)
1278 ANY11P(I400ToARGBRow_Any_NEON,
1279        I400ToARGBRow_NEON,
1280        const struct YuvConstants*,
1281        1,
1282        4,
1283        7)
1284 #endif
1285 #if defined(HAS_I400TOARGBROW_MSA)
1286 ANY11P(I400ToARGBRow_Any_MSA,
1287        I400ToARGBRow_MSA,
1288        const struct YuvConstants*,
1289        1,
1290        4,
1291        15)
1292 #endif
1293 #if defined(HAS_I400TOARGBROW_MMI)
1294 ANY11P(I400ToARGBRow_Any_MMI,
1295        I400ToARGBRow_MMI,
1296        const struct YuvConstants*,
1297        1,
1298        4,
1299        7)
1300 #endif
1301 
1302 #if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
1303 ANY11P(ARGBToRGB565DitherRow_Any_SSE2,
1304        ARGBToRGB565DitherRow_SSE2,
1305        const uint32_t,
1306        4,
1307        2,
1308        3)
1309 #endif
1310 #if defined(HAS_ARGBTORGB565DITHERROW_AVX2)
1311 ANY11P(ARGBToRGB565DitherRow_Any_AVX2,
1312        ARGBToRGB565DitherRow_AVX2,
1313        const uint32_t,
1314        4,
1315        2,
1316        7)
1317 #endif
1318 #if defined(HAS_ARGBTORGB565DITHERROW_NEON)
1319 ANY11P(ARGBToRGB565DitherRow_Any_NEON,
1320        ARGBToRGB565DitherRow_NEON,
1321        const uint32_t,
1322        4,
1323        2,
1324        7)
1325 #endif
1326 #if defined(HAS_ARGBTORGB565DITHERROW_MSA)
1327 ANY11P(ARGBToRGB565DitherRow_Any_MSA,
1328        ARGBToRGB565DitherRow_MSA,
1329        const uint32_t,
1330        4,
1331        2,
1332        7)
1333 #endif
1334 #if defined(HAS_ARGBTORGB565DITHERROW_MMI)
1335 ANY11P(ARGBToRGB565DitherRow_Any_MMI,
1336        ARGBToRGB565DitherRow_MMI,
1337        const uint32_t,
1338        4,
1339        2,
1340        3)
1341 #endif
1342 #ifdef HAS_ARGBSHUFFLEROW_SSSE3
1343 ANY11P(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3, const uint8_t*, 4, 4, 7)
1344 #endif
1345 #ifdef HAS_ARGBSHUFFLEROW_AVX2
1346 ANY11P(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2, const uint8_t*, 4, 4, 15)
1347 #endif
1348 #ifdef HAS_ARGBSHUFFLEROW_NEON
1349 ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8_t*, 4, 4, 3)
1350 #endif
1351 #ifdef HAS_ARGBSHUFFLEROW_MSA
1352 ANY11P(ARGBShuffleRow_Any_MSA, ARGBShuffleRow_MSA, const uint8_t*, 4, 4, 7)
1353 #endif
1354 #ifdef HAS_ARGBSHUFFLEROW_MMI
1355 ANY11P(ARGBShuffleRow_Any_MMI, ARGBShuffleRow_MMI, const uint8_t*, 4, 4, 1)
1356 #endif
1357 #undef ANY11P
1358 #undef ANY11P
1359 
1360 // Any 1 to 1 with type
1361 #define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, STYPE, DTYPE, MASK)  \
1362   void NAMEANY(const STYPE* src_ptr, DTYPE* dst_ptr, int width) { \
1363     SIMD_ALIGNED(uint8_t temp[(MASK + 1) * SBPP]);                \
1364     SIMD_ALIGNED(uint8_t out[(MASK + 1) * BPP]);                  \
1365     memset(temp, 0, (MASK + 1) * SBPP); /* for msan */            \
1366     int r = width & MASK;                                         \
1367     int n = width & ~MASK;                                        \
1368     if (n > 0) {                                                  \
1369       ANY_SIMD(src_ptr, dst_ptr, n);                              \
1370     }                                                             \
1371     memcpy(temp, (uint8_t*)(src_ptr) + n * SBPP, r * SBPP);       \
1372     ANY_SIMD((STYPE*)temp, (DTYPE*)out, MASK + 1);                \
1373     memcpy((uint8_t*)(dst_ptr) + n * BPP, out, r * BPP);          \
1374   }
1375 
1376 #ifdef HAS_ARGBTOAR64ROW_SSSE3
1377 ANY11T(ARGBToAR64Row_Any_SSSE3, ARGBToAR64Row_SSSE3, 4, 8, uint8_t, uint16_t, 3)
1378 #endif
1379 
1380 #ifdef HAS_ARGBTOAB64ROW_SSSE3
1381 ANY11T(ARGBToAB64Row_Any_SSSE3, ARGBToAB64Row_SSSE3, 4, 8, uint8_t, uint16_t, 3)
1382 #endif
1383 
1384 #ifdef HAS_AR64TOARGBROW_SSSE3
1385 ANY11T(AR64ToARGBRow_Any_SSSE3, AR64ToARGBRow_SSSE3, 8, 4, uint16_t, uint8_t, 3)
1386 #endif
1387 
1388 #ifdef HAS_ARGBTOAR64ROW_SSSE3
1389 ANY11T(AB64ToARGBRow_Any_SSSE3, AB64ToARGBRow_SSSE3, 8, 4, uint16_t, uint8_t, 3)
1390 #endif
1391 
1392 #ifdef HAS_ARGBTOAR64ROW_AVX2
1393 ANY11T(ARGBToAR64Row_Any_AVX2, ARGBToAR64Row_AVX2, 4, 8, uint8_t, uint16_t, 7)
1394 #endif
1395 
1396 #ifdef HAS_ARGBTOAB64ROW_AVX2
1397 ANY11T(ARGBToAB64Row_Any_AVX2, ARGBToAB64Row_AVX2, 4, 8, uint8_t, uint16_t, 7)
1398 #endif
1399 
1400 #ifdef HAS_AR64TOARGBROW_AVX2
1401 ANY11T(AR64ToARGBRow_Any_AVX2, AR64ToARGBRow_AVX2, 8, 4, uint16_t, uint8_t, 7)
1402 #endif
1403 
1404 #ifdef HAS_ARGBTOAR64ROW_AVX2
1405 ANY11T(AB64ToARGBRow_Any_AVX2, AB64ToARGBRow_AVX2, 8, 4, uint16_t, uint8_t, 7)
1406 #endif
1407 
1408 #ifdef HAS_ARGBTOAR64ROW_NEON
1409 ANY11T(ARGBToAR64Row_Any_NEON, ARGBToAR64Row_NEON, 4, 8, uint8_t, uint16_t, 7)
1410 #endif
1411 
1412 #ifdef HAS_ARGBTOAB64ROW_NEON
1413 ANY11T(ARGBToAB64Row_Any_NEON, ARGBToAB64Row_NEON, 4, 8, uint8_t, uint16_t, 7)
1414 #endif
1415 
1416 #ifdef HAS_AR64TOARGBROW_NEON
1417 ANY11T(AR64ToARGBRow_Any_NEON, AR64ToARGBRow_NEON, 8, 4, uint16_t, uint8_t, 7)
1418 #endif
1419 
1420 #ifdef HAS_ARGBTOAR64ROW_NEON
1421 ANY11T(AB64ToARGBRow_Any_NEON, AB64ToARGBRow_NEON, 8, 4, uint16_t, uint8_t, 7)
1422 #endif
1423 
1424 #undef ANY11T
1425 
1426 // Any 1 to 1 with parameter and shorts.  BPP measures in shorts.
1427 #define ANY11C(NAMEANY, ANY_SIMD, SBPP, BPP, STYPE, DTYPE, MASK)             \
1428   void NAMEANY(const STYPE* src_ptr, DTYPE* dst_ptr, int scale, int width) { \
1429     SIMD_ALIGNED(STYPE temp[32]);                                            \
1430     SIMD_ALIGNED(DTYPE out[32]);                                             \
1431     memset(temp, 0, 32 * SBPP); /* for msan */                               \
1432     int r = width & MASK;                                                    \
1433     int n = width & ~MASK;                                                   \
1434     if (n > 0) {                                                             \
1435       ANY_SIMD(src_ptr, dst_ptr, scale, n);                                  \
1436     }                                                                        \
1437     memcpy(temp, src_ptr + n, r * SBPP);                                     \
1438     ANY_SIMD(temp, out, scale, MASK + 1);                                    \
1439     memcpy(dst_ptr + n, out, r * BPP);                                       \
1440   }
1441 
1442 #ifdef HAS_CONVERT16TO8ROW_SSSE3
1443 ANY11C(Convert16To8Row_Any_SSSE3,
1444        Convert16To8Row_SSSE3,
1445        2,
1446        1,
1447        uint16_t,
1448        uint8_t,
1449        15)
1450 #endif
1451 #ifdef HAS_CONVERT16TO8ROW_AVX2
1452 ANY11C(Convert16To8Row_Any_AVX2,
1453        Convert16To8Row_AVX2,
1454        2,
1455        1,
1456        uint16_t,
1457        uint8_t,
1458        31)
1459 #endif
1460 #ifdef HAS_CONVERT8TO16ROW_SSE2
1461 ANY11C(Convert8To16Row_Any_SSE2,
1462        Convert8To16Row_SSE2,
1463        1,
1464        2,
1465        uint8_t,
1466        uint16_t,
1467        15)
1468 #endif
1469 #ifdef HAS_CONVERT8TO16ROW_AVX2
1470 ANY11C(Convert8To16Row_Any_AVX2,
1471        Convert8To16Row_AVX2,
1472        1,
1473        2,
1474        uint8_t,
1475        uint16_t,
1476        31)
1477 #endif
1478 #ifdef HAS_MULTIPLYROW_16_AVX2
1479 ANY11C(MultiplyRow_16_Any_AVX2,
1480        MultiplyRow_16_AVX2,
1481        2,
1482        2,
1483        uint16_t,
1484        uint16_t,
1485        31)
1486 #endif
1487 #ifdef HAS_MULTIPLYROW_16_NEON
1488 ANY11C(MultiplyRow_16_Any_NEON,
1489        MultiplyRow_16_NEON,
1490        2,
1491        2,
1492        uint16_t,
1493        uint16_t,
1494        15)
1495 #endif
1496 #ifdef HAS_DIVIDEROW_16_AVX2
1497 ANY11C(DivideRow_16_Any_AVX2, DivideRow_16_AVX2, 2, 2, uint16_t, uint16_t, 31)
1498 #endif
1499 #ifdef HAS_DIVIDEROW_16_NEON
1500 ANY11C(DivideRow_16_Any_NEON, DivideRow_16_NEON, 2, 2, uint16_t, uint16_t, 15)
1501 #endif
1502 #undef ANY11C
1503 
1504 // Any 1 to 1 with parameter and shorts to byte.  BPP measures in shorts.
1505 #define ANY11P16(NAMEANY, ANY_SIMD, ST, T, SBPP, BPP, MASK)             \
1506   void NAMEANY(const ST* src_ptr, T* dst_ptr, float param, int width) { \
1507     SIMD_ALIGNED(ST temp[32]);                                          \
1508     SIMD_ALIGNED(T out[32]);                                            \
1509     memset(temp, 0, SBPP * 32); /* for msan */                          \
1510     int r = width & MASK;                                               \
1511     int n = width & ~MASK;                                              \
1512     if (n > 0) {                                                        \
1513       ANY_SIMD(src_ptr, dst_ptr, param, n);                             \
1514     }                                                                   \
1515     memcpy(temp, src_ptr + n, r * SBPP);                                \
1516     ANY_SIMD(temp, out, param, MASK + 1);                               \
1517     memcpy(dst_ptr + n, out, r * BPP);                                  \
1518   }
1519 
1520 #ifdef HAS_HALFFLOATROW_SSE2
1521 ANY11P16(HalfFloatRow_Any_SSE2, HalfFloatRow_SSE2, uint16_t, uint16_t, 2, 2, 7)
1522 #endif
1523 #ifdef HAS_HALFFLOATROW_AVX2
1524 ANY11P16(HalfFloatRow_Any_AVX2, HalfFloatRow_AVX2, uint16_t, uint16_t, 2, 2, 15)
1525 #endif
1526 #ifdef HAS_HALFFLOATROW_F16C
1527 ANY11P16(HalfFloatRow_Any_F16C, HalfFloatRow_F16C, uint16_t, uint16_t, 2, 2, 15)
1528 ANY11P16(HalfFloat1Row_Any_F16C,
1529          HalfFloat1Row_F16C,
1530          uint16_t,
1531          uint16_t,
1532          2,
1533          2,
1534          15)
1535 #endif
1536 #ifdef HAS_HALFFLOATROW_NEON
1537 ANY11P16(HalfFloatRow_Any_NEON, HalfFloatRow_NEON, uint16_t, uint16_t, 2, 2, 7)
1538 ANY11P16(HalfFloat1Row_Any_NEON,
1539          HalfFloat1Row_NEON,
1540          uint16_t,
1541          uint16_t,
1542          2,
1543          2,
1544          7)
1545 #endif
1546 #ifdef HAS_HALFFLOATROW_MSA
1547 ANY11P16(HalfFloatRow_Any_MSA, HalfFloatRow_MSA, uint16_t, uint16_t, 2, 2, 31)
1548 #endif
1549 #ifdef HAS_BYTETOFLOATROW_NEON
1550 ANY11P16(ByteToFloatRow_Any_NEON, ByteToFloatRow_NEON, uint8_t, float, 1, 3, 7)
1551 #endif
1552 #undef ANY11P16
1553 
1554 // Any 1 to 1 with yuvconstants
1555 #define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK)               \
1556   void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr,                  \
1557                const struct YuvConstants* yuvconstants, int width) {      \
1558     SIMD_ALIGNED(uint8_t temp[128 * 2]);                                  \
1559     memset(temp, 0, 128); /* for YUY2 and msan */                         \
1560     int r = width & MASK;                                                 \
1561     int n = width & ~MASK;                                                \
1562     if (n > 0) {                                                          \
1563       ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n);                        \
1564     }                                                                     \
1565     memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
1566     ANY_SIMD(temp, temp + 128, yuvconstants, MASK + 1);                   \
1567     memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                       \
1568   }
1569 #if defined(HAS_YUY2TOARGBROW_SSSE3)
1570 ANY11C(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15)
1571 ANY11C(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15)
1572 #endif
1573 #if defined(HAS_YUY2TOARGBROW_AVX2)
1574 ANY11C(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31)
1575 ANY11C(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31)
1576 #endif
1577 #if defined(HAS_YUY2TOARGBROW_NEON)
1578 ANY11C(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7)
1579 ANY11C(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7)
1580 #endif
1581 #if defined(HAS_YUY2TOARGBROW_MSA)
1582 ANY11C(YUY2ToARGBRow_Any_MSA, YUY2ToARGBRow_MSA, 1, 4, 4, 7)
1583 ANY11C(UYVYToARGBRow_Any_MSA, UYVYToARGBRow_MSA, 1, 4, 4, 7)
1584 #endif
1585 #if defined(HAS_YUY2TOARGBROW_MMI)
1586 ANY11C(YUY2ToARGBRow_Any_MMI, YUY2ToARGBRow_MMI, 1, 4, 4, 7)
1587 ANY11C(UYVYToARGBRow_Any_MMI, UYVYToARGBRow_MMI, 1, 4, 4, 7)
1588 #endif
1589 #undef ANY11C
1590 
1591 // Any 1 to 1 interpolate.  Takes 2 rows of source via stride.
1592 #define ANY11I(NAMEANY, ANY_SIMD, SBPP, BPP, MASK)                             \
1593   void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, \
1594                int width, int source_y_fraction) {                             \
1595     SIMD_ALIGNED(uint8_t temp[64 * 3]);                                        \
1596     memset(temp, 0, 64 * 2); /* for msan */                                    \
1597     int r = width & MASK;                                                      \
1598     int n = width & ~MASK;                                                     \
1599     if (n > 0) {                                                               \
1600       ANY_SIMD(dst_ptr, src_ptr, src_stride, n, source_y_fraction);            \
1601     }                                                                          \
1602     memcpy(temp, src_ptr + n * SBPP, r * SBPP);                                \
1603     memcpy(temp + 64, src_ptr + src_stride + n * SBPP, r * SBPP);              \
1604     ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction);               \
1605     memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                            \
1606   }
1607 
1608 #ifdef HAS_INTERPOLATEROW_AVX2
1609 ANY11I(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, 1, 1, 31)
1610 #endif
1611 #ifdef HAS_INTERPOLATEROW_SSSE3
1612 ANY11I(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, 1, 1, 15)
1613 #endif
1614 #ifdef HAS_INTERPOLATEROW_NEON
1615 ANY11I(InterpolateRow_Any_NEON, InterpolateRow_NEON, 1, 1, 15)
1616 #endif
1617 #ifdef HAS_INTERPOLATEROW_MSA
1618 ANY11I(InterpolateRow_Any_MSA, InterpolateRow_MSA, 1, 1, 31)
1619 #endif
1620 #ifdef HAS_INTERPOLATEROW_MMI
1621 ANY11I(InterpolateRow_Any_MMI, InterpolateRow_MMI, 1, 1, 7)
1622 #endif
1623 #undef ANY11I
1624 
1625 // Any 1 to 1 mirror.
1626 #define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK)                              \
1627   void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) {     \
1628     SIMD_ALIGNED(uint8_t temp[64 * 2]);                                   \
1629     memset(temp, 0, 64); /* for msan */                                   \
1630     int r = width & MASK;                                                 \
1631     int n = width & ~MASK;                                                \
1632     if (n > 0) {                                                          \
1633       ANY_SIMD(src_ptr + r * BPP, dst_ptr, n);                            \
1634     }                                                                     \
1635     memcpy(temp, src_ptr, r* BPP);                                        \
1636     ANY_SIMD(temp, temp + 64, MASK + 1);                                  \
1637     memcpy(dst_ptr + n * BPP, temp + 64 + (MASK + 1 - r) * BPP, r * BPP); \
1638   }
1639 
1640 #ifdef HAS_MIRRORROW_AVX2
1641 ANY11M(MirrorRow_Any_AVX2, MirrorRow_AVX2, 1, 31)
1642 #endif
1643 #ifdef HAS_MIRRORROW_SSSE3
1644 ANY11M(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, 1, 15)
1645 #endif
1646 #ifdef HAS_MIRRORROW_NEON
1647 ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 31)
1648 #endif
1649 #ifdef HAS_MIRRORROW_MSA
1650 ANY11M(MirrorRow_Any_MSA, MirrorRow_MSA, 1, 63)
1651 #endif
1652 #ifdef HAS_MIRRORROW_MMI
1653 ANY11M(MirrorRow_Any_MMI, MirrorRow_MMI, 1, 7)
1654 #endif
1655 #ifdef HAS_MIRRORUVROW_AVX2
1656 ANY11M(MirrorUVRow_Any_AVX2, MirrorUVRow_AVX2, 2, 15)
1657 #endif
1658 #ifdef HAS_MIRRORUVROW_SSSE3
1659 ANY11M(MirrorUVRow_Any_SSSE3, MirrorUVRow_SSSE3, 2, 7)
1660 #endif
1661 #ifdef HAS_MIRRORUVROW_NEON
1662 ANY11M(MirrorUVRow_Any_NEON, MirrorUVRow_NEON, 2, 31)
1663 #endif
1664 #ifdef HAS_MIRRORUVROW_MSA
1665 ANY11M(MirrorUVRow_Any_MSA, MirrorUVRow_MSA, 2, 7)
1666 #endif
1667 #ifdef HAS_ARGBMIRRORROW_AVX2
1668 ANY11M(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, 4, 7)
1669 #endif
1670 #ifdef HAS_ARGBMIRRORROW_SSE2
1671 ANY11M(ARGBMirrorRow_Any_SSE2, ARGBMirrorRow_SSE2, 4, 3)
1672 #endif
1673 #ifdef HAS_ARGBMIRRORROW_NEON
1674 ANY11M(ARGBMirrorRow_Any_NEON, ARGBMirrorRow_NEON, 4, 7)
1675 #endif
1676 #ifdef HAS_ARGBMIRRORROW_MSA
1677 ANY11M(ARGBMirrorRow_Any_MSA, ARGBMirrorRow_MSA, 4, 15)
1678 #endif
1679 #ifdef HAS_ARGBMIRRORROW_MMI
1680 ANY11M(ARGBMirrorRow_Any_MMI, ARGBMirrorRow_MMI, 4, 1)
1681 #endif
1682 #ifdef HAS_RGB24MIRRORROW_SSSE3
1683 ANY11M(RGB24MirrorRow_Any_SSSE3, RGB24MirrorRow_SSSE3, 3, 15)
1684 #endif
1685 #ifdef HAS_RGB24MIRRORROW_NEON
1686 ANY11M(RGB24MirrorRow_Any_NEON, RGB24MirrorRow_NEON, 3, 15)
1687 #endif
1688 #undef ANY11M
1689 
1690 // Any 1 plane. (memset)
1691 #define ANY1(NAMEANY, ANY_SIMD, T, BPP, MASK)        \
1692   void NAMEANY(uint8_t* dst_ptr, T v32, int width) { \
1693     SIMD_ALIGNED(uint8_t temp[64]);                  \
1694     memset(temp, 0, 64); /* for msan */              \
1695     int r = width & MASK;                            \
1696     int n = width & ~MASK;                           \
1697     if (n > 0) {                                     \
1698       ANY_SIMD(dst_ptr, v32, n);                     \
1699     }                                                \
1700     ANY_SIMD(temp, v32, MASK + 1);                   \
1701     memcpy(dst_ptr + n * BPP, temp, r * BPP);        \
1702   }
1703 
1704 #ifdef HAS_SETROW_X86
1705 ANY1(SetRow_Any_X86, SetRow_X86, uint8_t, 1, 3)
1706 #endif
1707 #ifdef HAS_SETROW_NEON
1708 ANY1(SetRow_Any_NEON, SetRow_NEON, uint8_t, 1, 15)
1709 #endif
1710 #ifdef HAS_ARGBSETROW_NEON
1711 ANY1(ARGBSetRow_Any_NEON, ARGBSetRow_NEON, uint32_t, 4, 3)
1712 #endif
1713 #ifdef HAS_ARGBSETROW_MSA
1714 ANY1(ARGBSetRow_Any_MSA, ARGBSetRow_MSA, uint32_t, 4, 3)
1715 #endif
1716 #ifdef HAS_ARGBSETROW_MMI
1717 ANY1(ARGBSetRow_Any_MMI, ARGBSetRow_MMI, uint32_t, 4, 3)
1718 #endif
1719 #undef ANY1
1720 
1721 // Any 1 to 2.  Outputs UV planes.
1722 #define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK)          \
1723   void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v,  \
1724                int width) {                                             \
1725     SIMD_ALIGNED(uint8_t temp[128 * 3]);                                \
1726     memset(temp, 0, 128); /* for msan */                                \
1727     int r = width & MASK;                                               \
1728     int n = width & ~MASK;                                              \
1729     if (n > 0) {                                                        \
1730       ANY_SIMD(src_ptr, dst_u, dst_v, n);                               \
1731     }                                                                   \
1732     memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
1733     ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1);                   \
1734     memcpy(dst_u + (n >> DUVSHIFT), temp + 128, SS(r, DUVSHIFT));       \
1735     memcpy(dst_v + (n >> DUVSHIFT), temp + 256, SS(r, DUVSHIFT));       \
1736   }
1737 
1738 #ifdef HAS_SPLITUVROW_SSE2
1739 ANY12(SplitUVRow_Any_SSE2, SplitUVRow_SSE2, 0, 2, 0, 15)
1740 #endif
1741 #ifdef HAS_SPLITUVROW_AVX2
1742 ANY12(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, 0, 2, 0, 31)
1743 #endif
1744 #ifdef HAS_SPLITUVROW_NEON
1745 ANY12(SplitUVRow_Any_NEON, SplitUVRow_NEON, 0, 2, 0, 15)
1746 #endif
1747 #ifdef HAS_SPLITUVROW_MSA
1748 ANY12(SplitUVRow_Any_MSA, SplitUVRow_MSA, 0, 2, 0, 31)
1749 #endif
1750 #ifdef HAS_SPLITUVROW_MMI
1751 ANY12(SplitUVRow_Any_MMI, SplitUVRow_MMI, 0, 2, 0, 7)
1752 #endif
1753 #ifdef HAS_ARGBTOUV444ROW_SSSE3
1754 ANY12(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_SSSE3, 0, 4, 0, 15)
1755 #endif
1756 #ifdef HAS_YUY2TOUV422ROW_AVX2
1757 ANY12(YUY2ToUV422Row_Any_AVX2, YUY2ToUV422Row_AVX2, 1, 4, 1, 31)
1758 ANY12(UYVYToUV422Row_Any_AVX2, UYVYToUV422Row_AVX2, 1, 4, 1, 31)
1759 #endif
1760 #ifdef HAS_YUY2TOUV422ROW_SSE2
1761 ANY12(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_SSE2, 1, 4, 1, 15)
1762 ANY12(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_SSE2, 1, 4, 1, 15)
1763 #endif
1764 #ifdef HAS_YUY2TOUV422ROW_NEON
1765 ANY12(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON, 0, 4, 0, 7)
1766 ANY12(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON, 1, 4, 1, 15)
1767 ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15)
1768 #endif
1769 #ifdef HAS_YUY2TOUV422ROW_MSA
1770 ANY12(ARGBToUV444Row_Any_MSA, ARGBToUV444Row_MSA, 0, 4, 0, 15)
1771 ANY12(YUY2ToUV422Row_Any_MSA, YUY2ToUV422Row_MSA, 1, 4, 1, 31)
1772 ANY12(UYVYToUV422Row_Any_MSA, UYVYToUV422Row_MSA, 1, 4, 1, 31)
1773 #endif
1774 #ifdef HAS_YUY2TOUV422ROW_MMI
1775 ANY12(ARGBToUV444Row_Any_MMI, ARGBToUV444Row_MMI, 0, 4, 0, 7)
1776 ANY12(UYVYToUV422Row_Any_MMI, UYVYToUV422Row_MMI, 1, 4, 1, 15)
1777 ANY12(YUY2ToUV422Row_Any_MMI, YUY2ToUV422Row_MMI, 1, 4, 1, 15)
1778 #endif
1779 #undef ANY12
1780 
1781 // Any 2 16 bit planes with parameter to 1
1782 #define ANY12PT(NAMEANY, ANY_SIMD, T, BPP, MASK)                            \
1783   void NAMEANY(const T* src_uv, T* dst_u, T* dst_v, int depth, int width) { \
1784     SIMD_ALIGNED(T temp[16 * 4]);                                           \
1785     memset(temp, 0, 16 * 4 * BPP); /* for msan */                           \
1786     int r = width & MASK;                                                   \
1787     int n = width & ~MASK;                                                  \
1788     if (n > 0) {                                                            \
1789       ANY_SIMD(src_uv, dst_u, dst_v, depth, n);                             \
1790     }                                                                       \
1791     memcpy(temp, src_uv + n * 2, r * BPP * 2);                              \
1792     ANY_SIMD(temp, temp + 32, temp + 48, depth, MASK + 1);                  \
1793     memcpy(dst_u + n, temp + 32, r * BPP);                                  \
1794     memcpy(dst_v + n, temp + 48, r * BPP);                                  \
1795   }
1796 
1797 #ifdef HAS_SPLITUVROW_16_AVX2
1798 ANY12PT(SplitUVRow_16_Any_AVX2, SplitUVRow_16_AVX2, uint16_t, 2, 15)
1799 #endif
1800 
1801 #ifdef HAS_SPLITUVROW_16_NEON
1802 ANY12PT(SplitUVRow_16_Any_NEON, SplitUVRow_16_NEON, uint16_t, 2, 7)
1803 #endif
1804 
1805 #undef ANY21CT
1806 
1807 // Any 1 to 3.  Outputs RGB planes.
1808 #define ANY13(NAMEANY, ANY_SIMD, BPP, MASK)                                \
1809   void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g,     \
1810                uint8_t* dst_b, int width) {                                \
1811     SIMD_ALIGNED(uint8_t temp[16 * 6]);                                    \
1812     memset(temp, 0, 16 * 3); /* for msan */                                \
1813     int r = width & MASK;                                                  \
1814     int n = width & ~MASK;                                                 \
1815     if (n > 0) {                                                           \
1816       ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, n);                           \
1817     }                                                                      \
1818     memcpy(temp, src_ptr + n * BPP, r * BPP);                              \
1819     ANY_SIMD(temp, temp + 16 * 3, temp + 16 * 4, temp + 16 * 5, MASK + 1); \
1820     memcpy(dst_r + n, temp + 16 * 3, r);                                   \
1821     memcpy(dst_g + n, temp + 16 * 4, r);                                   \
1822     memcpy(dst_b + n, temp + 16 * 5, r);                                   \
1823   }
1824 
1825 #ifdef HAS_SPLITRGBROW_SSSE3
1826 ANY13(SplitRGBRow_Any_SSSE3, SplitRGBRow_SSSE3, 3, 15)
1827 #endif
1828 #ifdef HAS_SPLITRGBROW_NEON
1829 ANY13(SplitRGBRow_Any_NEON, SplitRGBRow_NEON, 3, 15)
1830 #endif
1831 #ifdef HAS_SPLITRGBROW_MMI
1832 ANY13(SplitRGBRow_Any_MMI, SplitRGBRow_MMI, 3, 3)
1833 #endif
1834 #ifdef HAS_SPLITXRGBROW_SSE2
1835 ANY13(SplitXRGBRow_Any_SSE2, SplitXRGBRow_SSE2, 4, 7)
1836 #endif
1837 #ifdef HAS_SPLITXRGBROW_SSSE3
1838 ANY13(SplitXRGBRow_Any_SSSE3, SplitXRGBRow_SSSE3, 4, 7)
1839 #endif
1840 #ifdef HAS_SPLITXRGBROW_AVX2
1841 ANY13(SplitXRGBRow_Any_AVX2, SplitXRGBRow_AVX2, 4, 15)
1842 #endif
1843 #ifdef HAS_SPLITXRGBROW_NEON
1844 ANY13(SplitXRGBRow_Any_NEON, SplitXRGBRow_NEON, 4, 15)
1845 #endif
1846 
1847 // Any 1 to 4.  Outputs ARGB planes.
1848 #define ANY14(NAMEANY, ANY_SIMD, BPP, MASK)                                    \
1849   void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g,         \
1850                uint8_t* dst_b, uint8_t* dst_a, int width) {                    \
1851     SIMD_ALIGNED(uint8_t temp[16 * 8]);                                        \
1852     memset(temp, 0, 16 * 4); /* for msan */                                    \
1853     int r = width & MASK;                                                      \
1854     int n = width & ~MASK;                                                     \
1855     if (n > 0) {                                                               \
1856       ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, dst_a, n);                        \
1857     }                                                                          \
1858     memcpy(temp, src_ptr + n * BPP, r * BPP);                                  \
1859     ANY_SIMD(temp, temp + 16 * 4, temp + 16 * 5, temp + 16 * 6, temp + 16 * 7, \
1860              MASK + 1);                                                        \
1861     memcpy(dst_r + n, temp + 16 * 4, r);                                       \
1862     memcpy(dst_g + n, temp + 16 * 5, r);                                       \
1863     memcpy(dst_b + n, temp + 16 * 6, r);                                       \
1864     memcpy(dst_a + n, temp + 16 * 7, r);                                       \
1865   }
1866 
1867 #ifdef HAS_SPLITARGBROW_SSE2
1868 ANY14(SplitARGBRow_Any_SSE2, SplitARGBRow_SSE2, 4, 7)
1869 #endif
1870 #ifdef HAS_SPLITARGBROW_SSSE3
1871 ANY14(SplitARGBRow_Any_SSSE3, SplitARGBRow_SSSE3, 4, 7)
1872 #endif
1873 #ifdef HAS_SPLITARGBROW_AVX2
1874 ANY14(SplitARGBRow_Any_AVX2, SplitARGBRow_AVX2, 4, 15)
1875 #endif
1876 #ifdef HAS_SPLITARGBROW_NEON
1877 ANY14(SplitARGBRow_Any_NEON, SplitARGBRow_NEON, 4, 15)
1878 #endif
1879 
1880 // Any 1 to 2 with source stride (2 rows of source).  Outputs UV planes.
1881 // 128 byte row allows for 32 avx ARGB pixels.
1882 #define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK)                        \
1883   void NAMEANY(const uint8_t* src_ptr, int src_stride, uint8_t* dst_u,       \
1884                uint8_t* dst_v, int width) {                                  \
1885     SIMD_ALIGNED(uint8_t temp[128 * 4]);                                     \
1886     memset(temp, 0, 128 * 2); /* for msan */                                 \
1887     int r = width & MASK;                                                    \
1888     int n = width & ~MASK;                                                   \
1889     if (n > 0) {                                                             \
1890       ANY_SIMD(src_ptr, src_stride, dst_u, dst_v, n);                        \
1891     }                                                                        \
1892     memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP);      \
1893     memcpy(temp + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP,          \
1894            SS(r, UVSHIFT) * BPP);                                            \
1895     if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
1896       memcpy(temp + SS(r, UVSHIFT) * BPP, temp + SS(r, UVSHIFT) * BPP - BPP, \
1897              BPP);                                                           \
1898       memcpy(temp + 128 + SS(r, UVSHIFT) * BPP,                              \
1899              temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP);                  \
1900     }                                                                        \
1901     ANY_SIMD(temp, 128, temp + 256, temp + 384, MASK + 1);                   \
1902     memcpy(dst_u + (n >> 1), temp + 256, SS(r, 1));                          \
1903     memcpy(dst_v + (n >> 1), temp + 384, SS(r, 1));                          \
1904   }
1905 
1906 #ifdef HAS_ARGBTOUVROW_AVX2
1907 ANY12S(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, 0, 4, 31)
1908 #endif
1909 #ifdef HAS_ABGRTOUVROW_AVX2
1910 ANY12S(ABGRToUVRow_Any_AVX2, ABGRToUVRow_AVX2, 0, 4, 31)
1911 #endif
1912 #ifdef HAS_ARGBTOUVJROW_AVX2
1913 ANY12S(ARGBToUVJRow_Any_AVX2, ARGBToUVJRow_AVX2, 0, 4, 31)
1914 #endif
1915 #ifdef HAS_ARGBTOUVROW_SSSE3
1916 ANY12S(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_SSSE3, 0, 4, 15)
1917 ANY12S(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_SSSE3, 0, 4, 15)
1918 ANY12S(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_SSSE3, 0, 4, 15)
1919 ANY12S(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_SSSE3, 0, 4, 15)
1920 ANY12S(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_SSSE3, 0, 4, 15)
1921 #endif
1922 #ifdef HAS_YUY2TOUVROW_AVX2
1923 ANY12S(YUY2ToUVRow_Any_AVX2, YUY2ToUVRow_AVX2, 1, 4, 31)
1924 ANY12S(UYVYToUVRow_Any_AVX2, UYVYToUVRow_AVX2, 1, 4, 31)
1925 #endif
1926 #ifdef HAS_YUY2TOUVROW_SSE2
1927 ANY12S(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_SSE2, 1, 4, 15)
1928 ANY12S(UYVYToUVRow_Any_SSE2, UYVYToUVRow_SSE2, 1, 4, 15)
1929 #endif
1930 #ifdef HAS_ARGBTOUVROW_NEON
1931 ANY12S(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, 0, 4, 15)
1932 #endif
1933 #ifdef HAS_ARGBTOUVROW_MSA
1934 ANY12S(ARGBToUVRow_Any_MSA, ARGBToUVRow_MSA, 0, 4, 31)
1935 #endif
1936 #ifdef HAS_ARGBTOUVROW_MMI
1937 ANY12S(ARGBToUVRow_Any_MMI, ARGBToUVRow_MMI, 0, 4, 15)
1938 #endif
1939 #ifdef HAS_ARGBTOUVJROW_NEON
1940 ANY12S(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, 0, 4, 15)
1941 #endif
1942 #ifdef HAS_ARGBTOUVJROW_MSA
1943 ANY12S(ARGBToUVJRow_Any_MSA, ARGBToUVJRow_MSA, 0, 4, 31)
1944 #endif
1945 #ifdef HAS_ARGBTOUVJROW_MMI
1946 ANY12S(ARGBToUVJRow_Any_MMI, ARGBToUVJRow_MMI, 0, 4, 15)
1947 #endif
1948 #ifdef HAS_BGRATOUVROW_NEON
1949 ANY12S(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, 0, 4, 15)
1950 #endif
1951 #ifdef HAS_BGRATOUVROW_MSA
1952 ANY12S(BGRAToUVRow_Any_MSA, BGRAToUVRow_MSA, 0, 4, 15)
1953 #endif
1954 #ifdef HAS_BGRATOUVROW_MMI
1955 ANY12S(BGRAToUVRow_Any_MMI, BGRAToUVRow_MMI, 0, 4, 15)
1956 #endif
1957 #ifdef HAS_ABGRTOUVROW_NEON
1958 ANY12S(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, 0, 4, 15)
1959 #endif
1960 #ifdef HAS_ABGRTOUVROW_MSA
1961 ANY12S(ABGRToUVRow_Any_MSA, ABGRToUVRow_MSA, 0, 4, 15)
1962 #endif
1963 #ifdef HAS_ABGRTOUVROW_MMI
1964 ANY12S(ABGRToUVRow_Any_MMI, ABGRToUVRow_MMI, 0, 4, 15)
1965 #endif
1966 #ifdef HAS_RGBATOUVROW_NEON
1967 ANY12S(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, 0, 4, 15)
1968 #endif
1969 #ifdef HAS_RGBATOUVROW_MSA
1970 ANY12S(RGBAToUVRow_Any_MSA, RGBAToUVRow_MSA, 0, 4, 15)
1971 #endif
1972 #ifdef HAS_RGBATOUVROW_MMI
1973 ANY12S(RGBAToUVRow_Any_MMI, RGBAToUVRow_MMI, 0, 4, 15)
1974 #endif
1975 #ifdef HAS_RGB24TOUVROW_NEON
1976 ANY12S(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, 0, 3, 15)
1977 #endif
1978 #ifdef HAS_RGB24TOUVROW_MSA
1979 ANY12S(RGB24ToUVRow_Any_MSA, RGB24ToUVRow_MSA, 0, 3, 15)
1980 #endif
1981 #ifdef HAS_RGB24TOUVROW_MMI
1982 ANY12S(RGB24ToUVRow_Any_MMI, RGB24ToUVRow_MMI, 0, 3, 15)
1983 #endif
1984 #ifdef HAS_RAWTOUVROW_NEON
1985 ANY12S(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, 0, 3, 15)
1986 #endif
1987 #ifdef HAS_RAWTOUVROW_MSA
1988 ANY12S(RAWToUVRow_Any_MSA, RAWToUVRow_MSA, 0, 3, 15)
1989 #endif
1990 #ifdef HAS_RAWTOUVROW_MMI
1991 ANY12S(RAWToUVRow_Any_MMI, RAWToUVRow_MMI, 0, 3, 15)
1992 #endif
1993 #ifdef HAS_RGB565TOUVROW_NEON
1994 ANY12S(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, 0, 2, 15)
1995 #endif
1996 #ifdef HAS_RGB565TOUVROW_MSA
1997 ANY12S(RGB565ToUVRow_Any_MSA, RGB565ToUVRow_MSA, 0, 2, 15)
1998 #endif
1999 #ifdef HAS_RGB565TOUVROW_MMI
2000 ANY12S(RGB565ToUVRow_Any_MMI, RGB565ToUVRow_MMI, 0, 2, 15)
2001 #endif
2002 #ifdef HAS_ARGB1555TOUVROW_NEON
2003 ANY12S(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, 0, 2, 15)
2004 #endif
2005 #ifdef HAS_ARGB1555TOUVROW_MSA
2006 ANY12S(ARGB1555ToUVRow_Any_MSA, ARGB1555ToUVRow_MSA, 0, 2, 15)
2007 #endif
2008 #ifdef HAS_ARGB1555TOUVROW_MMI
2009 ANY12S(ARGB1555ToUVRow_Any_MMI, ARGB1555ToUVRow_MMI, 0, 2, 15)
2010 #endif
2011 #ifdef HAS_ARGB4444TOUVROW_NEON
2012 ANY12S(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, 0, 2, 15)
2013 #endif
2014 #ifdef HAS_ARGB4444TOUVROW_MMI
2015 ANY12S(ARGB4444ToUVRow_Any_MMI, ARGB4444ToUVRow_MMI, 0, 2, 15)
2016 #endif
2017 #ifdef HAS_YUY2TOUVROW_NEON
2018 ANY12S(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, 1, 4, 15)
2019 #endif
2020 #ifdef HAS_UYVYTOUVROW_NEON
2021 ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15)
2022 #endif
2023 #ifdef HAS_YUY2TOUVROW_MSA
2024 ANY12S(YUY2ToUVRow_Any_MSA, YUY2ToUVRow_MSA, 1, 4, 31)
2025 #endif
2026 #ifdef HAS_YUY2TOUVROW_MMI
2027 ANY12S(YUY2ToUVRow_Any_MMI, YUY2ToUVRow_MMI, 1, 4, 15)
2028 #endif
2029 #ifdef HAS_UYVYTOUVROW_MSA
2030 ANY12S(UYVYToUVRow_Any_MSA, UYVYToUVRow_MSA, 1, 4, 31)
2031 #endif
2032 #ifdef HAS_UYVYTOUVROW_MMI
2033 ANY12S(UYVYToUVRow_Any_MMI, UYVYToUVRow_MMI, 1, 4, 15)
2034 #endif
2035 #undef ANY12S
2036 
2037 // Any 1 to 1 with source stride (2 rows of source).  Outputs UV plane.
2038 // 128 byte row allows for 32 avx ARGB pixels.
2039 #define ANY11S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK)                        \
2040   void NAMEANY(const uint8_t* src_ptr, int src_stride, uint8_t* dst_vu,      \
2041                int width) {                                                  \
2042     SIMD_ALIGNED(uint8_t temp[128 * 3]);                                     \
2043     memset(temp, 0, 128 * 2); /* for msan */                                 \
2044     int r = width & MASK;                                                    \
2045     int n = width & ~MASK;                                                   \
2046     if (n > 0) {                                                             \
2047       ANY_SIMD(src_ptr, src_stride, dst_vu, n);                              \
2048     }                                                                        \
2049     memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP);      \
2050     memcpy(temp + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP,          \
2051            SS(r, UVSHIFT) * BPP);                                            \
2052     if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
2053       memcpy(temp + SS(r, UVSHIFT) * BPP, temp + SS(r, UVSHIFT) * BPP - BPP, \
2054              BPP);                                                           \
2055       memcpy(temp + 128 + SS(r, UVSHIFT) * BPP,                              \
2056              temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP);                  \
2057     }                                                                        \
2058     ANY_SIMD(temp, 128, temp + 256, MASK + 1);                               \
2059     memcpy(dst_vu + (n >> 1) * 2, temp + 256, SS(r, 1) * 2);                 \
2060   }
2061 
2062 #ifdef HAS_AYUVTOVUROW_NEON
2063 ANY11S(AYUVToUVRow_Any_NEON, AYUVToUVRow_NEON, 0, 4, 15)
2064 ANY11S(AYUVToVURow_Any_NEON, AYUVToVURow_NEON, 0, 4, 15)
2065 #endif
2066 #undef ANY11S
2067 
2068 #ifdef __cplusplus
2069 }  // extern "C"
2070 }  // namespace libyuv
2071 #endif
2072