1 /*
2  *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS. All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "libyuv/row.h"
12 
13 #include <string.h>  // For memset.
14 
15 #include "libyuv/basic_types.h"
16 
17 #ifdef __cplusplus
18 namespace libyuv {
19 extern "C" {
20 #endif
21 
22 // memset for temp is meant to clear the source buffer (not dest) so that
23 // SIMD that reads full multiple of 16 bytes will not trigger msan errors.
24 // memset is not needed for production, as the garbage values are processed but
25 // not used, although there may be edge cases for subsampling.
26 // The size of the buffer is based on the largest read, which can be inferred
27 // by the source type (e.g. ARGB) and the mask (last parameter), or by examining
28 // the source code for how much the source pointers are advanced.
29 
30 // Subsampled source needs to be increase by 1 of not even.
31 #define SS(width, shift) (((width) + (1 << (shift)) - 1) >> (shift))
32 
33 // Any 4 planes to 1 with yuvconstants
34 #define ANY41C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK)              \
35   void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf,                   \
36                const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_ptr, \
37                const struct YuvConstants* yuvconstants, int width) {         \
38     SIMD_ALIGNED(uint8_t temp[64 * 5]);                                      \
39     memset(temp, 0, 64 * 4); /* for msan */                                  \
40     int r = width & MASK;                                                    \
41     int n = width & ~MASK;                                                   \
42     if (n > 0) {                                                             \
43       ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n);        \
44     }                                                                        \
45     memcpy(temp, y_buf + n, r);                                              \
46     memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));               \
47     memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));              \
48     memcpy(temp + 192, a_buf + n, r);                                        \
49     ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256,            \
50              yuvconstants, MASK + 1);                                        \
51     memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256,                      \
52            SS(r, DUVSHIFT) * BPP);                                           \
53   }
54 
55 #ifdef HAS_I422ALPHATOARGBROW_SSSE3
56 ANY41C(I422AlphaToARGBRow_Any_SSSE3, I422AlphaToARGBRow_SSSE3, 1, 0, 4, 7)
57 #endif
58 #ifdef HAS_I422ALPHATOARGBROW_AVX2
59 ANY41C(I422AlphaToARGBRow_Any_AVX2, I422AlphaToARGBRow_AVX2, 1, 0, 4, 15)
60 #endif
61 #ifdef HAS_I422ALPHATOARGBROW_NEON
62 ANY41C(I422AlphaToARGBRow_Any_NEON, I422AlphaToARGBRow_NEON, 1, 0, 4, 7)
63 #endif
64 #ifdef HAS_I422ALPHATOARGBROW_MSA
65 ANY41C(I422AlphaToARGBRow_Any_MSA, I422AlphaToARGBRow_MSA, 1, 0, 4, 7)
66 #endif
67 #undef ANY41C
68 
69 // Any 3 planes to 1.
70 #define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK)      \
71   void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf,          \
72                const uint8_t* v_buf, uint8_t* dst_ptr, int width) { \
73     SIMD_ALIGNED(uint8_t temp[64 * 4]);                             \
74     memset(temp, 0, 64 * 3); /* for YUY2 and msan */                \
75     int r = width & MASK;                                           \
76     int n = width & ~MASK;                                          \
77     if (n > 0) {                                                    \
78       ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n);                    \
79     }                                                               \
80     memcpy(temp, y_buf + n, r);                                     \
81     memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));      \
82     memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));     \
83     ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, MASK + 1);    \
84     memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192,             \
85            SS(r, DUVSHIFT) * BPP);                                  \
86   }
87 
88 // Merge functions.
89 #ifdef HAS_MERGERGBROW_SSSE3
90 ANY31(MergeRGBRow_Any_SSSE3, MergeRGBRow_SSSE3, 0, 0, 3, 15)
91 #endif
92 #ifdef HAS_MERGERGBROW_NEON
93 ANY31(MergeRGBRow_Any_NEON, MergeRGBRow_NEON, 0, 0, 3, 15)
94 #endif
95 #ifdef HAS_I422TOYUY2ROW_SSE2
96 ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15)
97 ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15)
98 #endif
99 #ifdef HAS_I422TOYUY2ROW_AVX2
100 ANY31(I422ToYUY2Row_Any_AVX2, I422ToYUY2Row_AVX2, 1, 1, 4, 31)
101 ANY31(I422ToUYVYRow_Any_AVX2, I422ToUYVYRow_AVX2, 1, 1, 4, 31)
102 #endif
103 #ifdef HAS_I422TOYUY2ROW_NEON
104 ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15)
105 #endif
106 #ifdef HAS_I422TOYUY2ROW_MSA
107 ANY31(I422ToYUY2Row_Any_MSA, I422ToYUY2Row_MSA, 1, 1, 4, 31)
108 #endif
109 #ifdef HAS_I422TOUYVYROW_NEON
110 ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15)
111 #endif
112 #ifdef HAS_I422TOUYVYROW_MSA
113 ANY31(I422ToUYVYRow_Any_MSA, I422ToUYVYRow_MSA, 1, 1, 4, 31)
114 #endif
115 #ifdef HAS_BLENDPLANEROW_AVX2
116 ANY31(BlendPlaneRow_Any_AVX2, BlendPlaneRow_AVX2, 0, 0, 1, 31)
117 #endif
118 #ifdef HAS_BLENDPLANEROW_SSSE3
119 ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7)
120 #endif
121 #undef ANY31
122 
123 // Note that odd width replication includes 444 due to implementation
124 // on arm that subsamples 444 to 422 internally.
125 // Any 3 planes to 1 with yuvconstants
126 #define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK)      \
127   void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf,           \
128                const uint8_t* v_buf, uint8_t* dst_ptr,               \
129                const struct YuvConstants* yuvconstants, int width) { \
130     SIMD_ALIGNED(uint8_t temp[128 * 4]);                             \
131     memset(temp, 0, 128 * 3); /* for YUY2 and msan */                \
132     int r = width & MASK;                                            \
133     int n = width & ~MASK;                                           \
134     if (n > 0) {                                                     \
135       ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n);       \
136     }                                                                \
137     memcpy(temp, y_buf + n, r);                                      \
138     memcpy(temp + 128, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));      \
139     memcpy(temp + 256, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));      \
140     if (width & 1) {                                                 \
141       temp[128 + SS(r, UVSHIFT)] = temp[128 + SS(r, UVSHIFT) - 1];   \
142       temp[256 + SS(r, UVSHIFT)] = temp[256 + SS(r, UVSHIFT) - 1];   \
143     }                                                                \
144     ANY_SIMD(temp, temp + 128, temp + 256, temp + 384, yuvconstants, \
145              MASK + 1);                                              \
146     memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 384,              \
147            SS(r, DUVSHIFT) * BPP);                                   \
148   }
149 
150 #ifdef HAS_I422TOARGBROW_SSSE3
151 ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7)
152 #endif
153 #ifdef HAS_I422TOAR30ROW_SSSE3
154 ANY31C(I422ToAR30Row_Any_SSSE3, I422ToAR30Row_SSSE3, 1, 0, 4, 7)
155 #endif
156 #ifdef HAS_I422TOAR30ROW_AVX2
157 ANY31C(I422ToAR30Row_Any_AVX2, I422ToAR30Row_AVX2, 1, 0, 4, 15)
158 #endif
159 #ifdef HAS_I444TOARGBROW_SSSE3
160 ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7)
161 ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7)
162 ANY31C(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7)
163 ANY31C(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7)
164 ANY31C(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7)
165 ANY31C(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 15)
166 #endif  // HAS_I444TOARGBROW_SSSE3
167 #ifdef HAS_I422TORGB24ROW_AVX2
168 ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 31)
169 #endif
170 #ifdef HAS_I422TOARGBROW_AVX2
171 ANY31C(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15)
172 #endif
173 #ifdef HAS_I422TORGBAROW_AVX2
174 ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15)
175 #endif
176 #ifdef HAS_I444TOARGBROW_AVX2
177 ANY31C(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15)
178 #endif
179 #ifdef HAS_I422TOARGB4444ROW_AVX2
180 ANY31C(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 15)
181 #endif
182 #ifdef HAS_I422TOARGB1555ROW_AVX2
183 ANY31C(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 15)
184 #endif
185 #ifdef HAS_I422TORGB565ROW_AVX2
186 ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 15)
187 #endif
188 #ifdef HAS_I422TOARGBROW_NEON
189 ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7)
190 ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7)
191 ANY31C(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7)
192 ANY31C(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7)
193 ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7)
194 ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7)
195 ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7)
196 #endif
197 #ifdef HAS_I422TOARGBROW_MSA
198 ANY31C(I444ToARGBRow_Any_MSA, I444ToARGBRow_MSA, 0, 0, 4, 7)
199 ANY31C(I422ToARGBRow_Any_MSA, I422ToARGBRow_MSA, 1, 0, 4, 7)
200 ANY31C(I422ToRGBARow_Any_MSA, I422ToRGBARow_MSA, 1, 0, 4, 7)
201 ANY31C(I422ToRGB24Row_Any_MSA, I422ToRGB24Row_MSA, 1, 0, 3, 15)
202 ANY31C(I422ToARGB4444Row_Any_MSA, I422ToARGB4444Row_MSA, 1, 0, 2, 7)
203 ANY31C(I422ToARGB1555Row_Any_MSA, I422ToARGB1555Row_MSA, 1, 0, 2, 7)
204 ANY31C(I422ToRGB565Row_Any_MSA, I422ToRGB565Row_MSA, 1, 0, 2, 7)
205 #endif
206 #undef ANY31C
207 
208 // Any 3 planes of 16 bit to 1 with yuvconstants
209 // TODO(fbarchard): consider sharing this code with ANY31C
210 #define ANY31CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \
211   void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf,            \
212                uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, \
213                int width) {                                               \
214     SIMD_ALIGNED(T temp[16 * 3]);                                         \
215     SIMD_ALIGNED(uint8_t out[64]);                                        \
216     memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */               \
217     int r = width & MASK;                                                 \
218     int n = width & ~MASK;                                                \
219     if (n > 0) {                                                          \
220       ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n);            \
221     }                                                                     \
222     memcpy(temp, y_buf + n, r * SBPP);                                    \
223     memcpy(temp + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP);     \
224     memcpy(temp + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP);     \
225     ANY_SIMD(temp, temp + 16, temp + 32, out, yuvconstants, MASK + 1);    \
226     memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP);  \
227   }
228 
229 #ifdef HAS_I210TOAR30ROW_SSSE3
230 ANY31CT(I210ToAR30Row_Any_SSSE3, I210ToAR30Row_SSSE3, 1, 0, uint16_t, 2, 4, 7)
231 #endif
232 #ifdef HAS_I210TOARGBROW_SSSE3
233 ANY31CT(I210ToARGBRow_Any_SSSE3, I210ToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7)
234 #endif
235 #ifdef HAS_I210TOARGBROW_AVX2
236 ANY31CT(I210ToARGBRow_Any_AVX2, I210ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15)
237 #endif
238 #ifdef HAS_I210TOAR30ROW_AVX2
239 ANY31CT(I210ToAR30Row_Any_AVX2, I210ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15)
240 #endif
241 #undef ANY31CT
242 
243 // Any 2 planes to 1.
244 #define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK)             \
245   void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \
246                int width) {                                                   \
247     SIMD_ALIGNED(uint8_t temp[64 * 3]);                                       \
248     memset(temp, 0, 64 * 2); /* for msan */                                   \
249     int r = width & MASK;                                                     \
250     int n = width & ~MASK;                                                    \
251     if (n > 0) {                                                              \
252       ANY_SIMD(y_buf, uv_buf, dst_ptr, n);                                    \
253     }                                                                         \
254     memcpy(temp, y_buf + n * SBPP, r * SBPP);                                 \
255     memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2,                        \
256            SS(r, UVSHIFT) * SBPP2);                                           \
257     ANY_SIMD(temp, temp + 64, temp + 128, MASK + 1);                          \
258     memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                           \
259   }
260 
261 // Merge functions.
262 #ifdef HAS_MERGEUVROW_SSE2
263 ANY21(MergeUVRow_Any_SSE2, MergeUVRow_SSE2, 0, 1, 1, 2, 15)
264 #endif
265 #ifdef HAS_MERGEUVROW_AVX2
266 ANY21(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, 0, 1, 1, 2, 31)
267 #endif
268 #ifdef HAS_MERGEUVROW_NEON
269 ANY21(MergeUVRow_Any_NEON, MergeUVRow_NEON, 0, 1, 1, 2, 15)
270 #endif
271 #ifdef HAS_MERGEUVROW_MSA
272 ANY21(MergeUVRow_Any_MSA, MergeUVRow_MSA, 0, 1, 1, 2, 15)
273 #endif
274 
275 // Math functions.
276 #ifdef HAS_ARGBMULTIPLYROW_SSE2
277 ANY21(ARGBMultiplyRow_Any_SSE2, ARGBMultiplyRow_SSE2, 0, 4, 4, 4, 3)
278 #endif
279 #ifdef HAS_ARGBADDROW_SSE2
280 ANY21(ARGBAddRow_Any_SSE2, ARGBAddRow_SSE2, 0, 4, 4, 4, 3)
281 #endif
282 #ifdef HAS_ARGBSUBTRACTROW_SSE2
283 ANY21(ARGBSubtractRow_Any_SSE2, ARGBSubtractRow_SSE2, 0, 4, 4, 4, 3)
284 #endif
285 #ifdef HAS_ARGBMULTIPLYROW_AVX2
286 ANY21(ARGBMultiplyRow_Any_AVX2, ARGBMultiplyRow_AVX2, 0, 4, 4, 4, 7)
287 #endif
288 #ifdef HAS_ARGBADDROW_AVX2
289 ANY21(ARGBAddRow_Any_AVX2, ARGBAddRow_AVX2, 0, 4, 4, 4, 7)
290 #endif
291 #ifdef HAS_ARGBSUBTRACTROW_AVX2
292 ANY21(ARGBSubtractRow_Any_AVX2, ARGBSubtractRow_AVX2, 0, 4, 4, 4, 7)
293 #endif
294 #ifdef HAS_ARGBMULTIPLYROW_NEON
295 ANY21(ARGBMultiplyRow_Any_NEON, ARGBMultiplyRow_NEON, 0, 4, 4, 4, 7)
296 #endif
297 #ifdef HAS_ARGBADDROW_NEON
298 ANY21(ARGBAddRow_Any_NEON, ARGBAddRow_NEON, 0, 4, 4, 4, 7)
299 #endif
300 #ifdef HAS_ARGBSUBTRACTROW_NEON
301 ANY21(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, 0, 4, 4, 4, 7)
302 #endif
303 #ifdef HAS_ARGBMULTIPLYROW_MSA
304 ANY21(ARGBMultiplyRow_Any_MSA, ARGBMultiplyRow_MSA, 0, 4, 4, 4, 3)
305 #endif
306 #ifdef HAS_ARGBADDROW_MSA
307 ANY21(ARGBAddRow_Any_MSA, ARGBAddRow_MSA, 0, 4, 4, 4, 7)
308 #endif
309 #ifdef HAS_ARGBSUBTRACTROW_MSA
310 ANY21(ARGBSubtractRow_Any_MSA, ARGBSubtractRow_MSA, 0, 4, 4, 4, 7)
311 #endif
312 #ifdef HAS_SOBELROW_SSE2
313 ANY21(SobelRow_Any_SSE2, SobelRow_SSE2, 0, 1, 1, 4, 15)
314 #endif
315 #ifdef HAS_SOBELROW_NEON
316 ANY21(SobelRow_Any_NEON, SobelRow_NEON, 0, 1, 1, 4, 7)
317 #endif
318 #ifdef HAS_SOBELROW_MSA
319 ANY21(SobelRow_Any_MSA, SobelRow_MSA, 0, 1, 1, 4, 15)
320 #endif
321 #ifdef HAS_SOBELTOPLANEROW_SSE2
322 ANY21(SobelToPlaneRow_Any_SSE2, SobelToPlaneRow_SSE2, 0, 1, 1, 1, 15)
323 #endif
324 #ifdef HAS_SOBELTOPLANEROW_NEON
325 ANY21(SobelToPlaneRow_Any_NEON, SobelToPlaneRow_NEON, 0, 1, 1, 1, 15)
326 #endif
327 #ifdef HAS_SOBELTOPLANEROW_MSA
328 ANY21(SobelToPlaneRow_Any_MSA, SobelToPlaneRow_MSA, 0, 1, 1, 1, 31)
329 #endif
330 #ifdef HAS_SOBELXYROW_SSE2
331 ANY21(SobelXYRow_Any_SSE2, SobelXYRow_SSE2, 0, 1, 1, 4, 15)
332 #endif
333 #ifdef HAS_SOBELXYROW_NEON
334 ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7)
335 #endif
336 #ifdef HAS_SOBELXYROW_MSA
337 ANY21(SobelXYRow_Any_MSA, SobelXYRow_MSA, 0, 1, 1, 4, 15)
338 #endif
339 #undef ANY21
340 
341 // Any 2 planes to 1 with yuvconstants
342 #define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK)            \
343   void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \
344                const struct YuvConstants* yuvconstants, int width) {          \
345     SIMD_ALIGNED(uint8_t temp[128 * 3]);                                      \
346     memset(temp, 0, 128 * 2); /* for msan */                                  \
347     int r = width & MASK;                                                     \
348     int n = width & ~MASK;                                                    \
349     if (n > 0) {                                                              \
350       ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n);                      \
351     }                                                                         \
352     memcpy(temp, y_buf + n * SBPP, r * SBPP);                                 \
353     memcpy(temp + 128, uv_buf + (n >> UVSHIFT) * SBPP2,                       \
354            SS(r, UVSHIFT) * SBPP2);                                           \
355     ANY_SIMD(temp, temp + 128, temp + 256, yuvconstants, MASK + 1);           \
356     memcpy(dst_ptr + n * BPP, temp + 256, r * BPP);                           \
357   }
358 
359 // Biplanar to RGB.
360 #ifdef HAS_NV12TOARGBROW_SSSE3
361 ANY21C(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
362 #endif
363 #ifdef HAS_NV12TOARGBROW_AVX2
364 ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15)
365 #endif
366 #ifdef HAS_NV12TOARGBROW_NEON
367 ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
368 #endif
369 #ifdef HAS_NV12TOARGBROW_MSA
370 ANY21C(NV12ToARGBRow_Any_MSA, NV12ToARGBRow_MSA, 1, 1, 2, 4, 7)
371 #endif
372 #ifdef HAS_NV21TOARGBROW_SSSE3
373 ANY21C(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
374 #endif
375 #ifdef HAS_NV21TOARGBROW_AVX2
376 ANY21C(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, 1, 1, 2, 4, 15)
377 #endif
378 #ifdef HAS_NV21TOARGBROW_NEON
379 ANY21C(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7)
380 #endif
381 #ifdef HAS_NV21TOARGBROW_MSA
382 ANY21C(NV21ToARGBRow_Any_MSA, NV21ToARGBRow_MSA, 1, 1, 2, 4, 7)
383 #endif
384 #ifdef HAS_NV12TORGB24ROW_NEON
385 ANY21C(NV12ToRGB24Row_Any_NEON, NV12ToRGB24Row_NEON, 1, 1, 2, 3, 7)
386 #endif
387 #ifdef HAS_NV21TORGB24ROW_NEON
388 ANY21C(NV21ToRGB24Row_Any_NEON, NV21ToRGB24Row_NEON, 1, 1, 2, 3, 7)
389 #endif
390 #ifdef HAS_NV12TORGB24ROW_SSSE3
391 ANY21C(NV12ToRGB24Row_Any_SSSE3, NV12ToRGB24Row_SSSE3, 1, 1, 2, 3, 15)
392 #endif
393 #ifdef HAS_NV21TORGB24ROW_SSSE3
394 ANY21C(NV21ToRGB24Row_Any_SSSE3, NV21ToRGB24Row_SSSE3, 1, 1, 2, 3, 15)
395 #endif
396 #ifdef HAS_NV12TORGB24ROW_AVX2
397 ANY21C(NV12ToRGB24Row_Any_AVX2, NV12ToRGB24Row_AVX2, 1, 1, 2, 3, 31)
398 #endif
399 #ifdef HAS_NV21TORGB24ROW_AVX2
400 ANY21C(NV21ToRGB24Row_Any_AVX2, NV21ToRGB24Row_AVX2, 1, 1, 2, 3, 31)
401 #endif
402 #ifdef HAS_NV12TORGB565ROW_SSSE3
403 ANY21C(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
404 #endif
405 #ifdef HAS_NV12TORGB565ROW_AVX2
406 ANY21C(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15)
407 #endif
408 #ifdef HAS_NV12TORGB565ROW_NEON
409 ANY21C(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7)
410 #endif
411 #ifdef HAS_NV12TORGB565ROW_MSA
412 ANY21C(NV12ToRGB565Row_Any_MSA, NV12ToRGB565Row_MSA, 1, 1, 2, 2, 7)
413 #endif
414 #undef ANY21C
415 
416 // Any 1 to 1.
417 #define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK)                \
418   void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) {     \
419     SIMD_ALIGNED(uint8_t temp[128 * 2]);                                  \
420     memset(temp, 0, 128); /* for YUY2 and msan */                         \
421     int r = width & MASK;                                                 \
422     int n = width & ~MASK;                                                \
423     if (n > 0) {                                                          \
424       ANY_SIMD(src_ptr, dst_ptr, n);                                      \
425     }                                                                     \
426     memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
427     ANY_SIMD(temp, temp + 128, MASK + 1);                                 \
428     memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                       \
429   }
430 
431 #ifdef HAS_COPYROW_AVX
432 ANY11(CopyRow_Any_AVX, CopyRow_AVX, 0, 1, 1, 63)
433 #endif
434 #ifdef HAS_COPYROW_SSE2
435 ANY11(CopyRow_Any_SSE2, CopyRow_SSE2, 0, 1, 1, 31)
436 #endif
437 #ifdef HAS_COPYROW_NEON
438 ANY11(CopyRow_Any_NEON, CopyRow_NEON, 0, 1, 1, 31)
439 #endif
440 #if defined(HAS_ARGBTORGB24ROW_SSSE3)
441 ANY11(ARGBToRGB24Row_Any_SSSE3, ARGBToRGB24Row_SSSE3, 0, 4, 3, 15)
442 ANY11(ARGBToRAWRow_Any_SSSE3, ARGBToRAWRow_SSSE3, 0, 4, 3, 15)
443 ANY11(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, 0, 4, 2, 3)
444 ANY11(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, 0, 4, 2, 3)
445 ANY11(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, 0, 4, 2, 3)
446 #endif
447 #if defined(HAS_ARGBTORGB24ROW_AVX2)
448 ANY11(ARGBToRGB24Row_Any_AVX2, ARGBToRGB24Row_AVX2, 0, 4, 3, 31)
449 #endif
450 #if defined(HAS_ARGBTORGB24ROW_AVX512VBMI)
451 ANY11(ARGBToRGB24Row_Any_AVX512VBMI, ARGBToRGB24Row_AVX512VBMI, 0, 4, 3, 31)
452 #endif
453 #if defined(HAS_ARGBTORAWROW_AVX2)
454 ANY11(ARGBToRAWRow_Any_AVX2, ARGBToRAWRow_AVX2, 0, 4, 3, 31)
455 #endif
456 #if defined(HAS_ARGBTORGB565ROW_AVX2)
457 ANY11(ARGBToRGB565Row_Any_AVX2, ARGBToRGB565Row_AVX2, 0, 4, 2, 7)
458 #endif
459 #if defined(HAS_ARGBTOARGB4444ROW_AVX2)
460 ANY11(ARGBToARGB1555Row_Any_AVX2, ARGBToARGB1555Row_AVX2, 0, 4, 2, 7)
461 ANY11(ARGBToARGB4444Row_Any_AVX2, ARGBToARGB4444Row_AVX2, 0, 4, 2, 7)
462 #endif
463 #if defined(HAS_ABGRTOAR30ROW_SSSE3)
464 ANY11(ABGRToAR30Row_Any_SSSE3, ABGRToAR30Row_SSSE3, 0, 4, 4, 3)
465 #endif
466 #if defined(HAS_ARGBTOAR30ROW_SSSE3)
467 ANY11(ARGBToAR30Row_Any_SSSE3, ARGBToAR30Row_SSSE3, 0, 4, 4, 3)
468 #endif
469 #if defined(HAS_ABGRTOAR30ROW_AVX2)
470 ANY11(ABGRToAR30Row_Any_AVX2, ABGRToAR30Row_AVX2, 0, 4, 4, 7)
471 #endif
472 #if defined(HAS_ARGBTOAR30ROW_AVX2)
473 ANY11(ARGBToAR30Row_Any_AVX2, ARGBToAR30Row_AVX2, 0, 4, 4, 7)
474 #endif
475 #if defined(HAS_J400TOARGBROW_SSE2)
476 ANY11(J400ToARGBRow_Any_SSE2, J400ToARGBRow_SSE2, 0, 1, 4, 7)
477 #endif
478 #if defined(HAS_J400TOARGBROW_AVX2)
479 ANY11(J400ToARGBRow_Any_AVX2, J400ToARGBRow_AVX2, 0, 1, 4, 15)
480 #endif
481 #if defined(HAS_I400TOARGBROW_SSE2)
482 ANY11(I400ToARGBRow_Any_SSE2, I400ToARGBRow_SSE2, 0, 1, 4, 7)
483 #endif
484 #if defined(HAS_I400TOARGBROW_AVX2)
485 ANY11(I400ToARGBRow_Any_AVX2, I400ToARGBRow_AVX2, 0, 1, 4, 15)
486 #endif
487 #if defined(HAS_RGB24TOARGBROW_SSSE3)
488 ANY11(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 0, 3, 4, 15)
489 ANY11(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 0, 3, 4, 15)
490 ANY11(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 0, 2, 4, 7)
491 ANY11(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, 0, 2, 4, 7)
492 ANY11(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, 0, 2, 4, 7)
493 #endif
494 #if defined(HAS_RAWTORGB24ROW_SSSE3)
495 ANY11(RAWToRGB24Row_Any_SSSE3, RAWToRGB24Row_SSSE3, 0, 3, 3, 7)
496 #endif
497 #if defined(HAS_RGB565TOARGBROW_AVX2)
498 ANY11(RGB565ToARGBRow_Any_AVX2, RGB565ToARGBRow_AVX2, 0, 2, 4, 15)
499 #endif
500 #if defined(HAS_ARGB1555TOARGBROW_AVX2)
501 ANY11(ARGB1555ToARGBRow_Any_AVX2, ARGB1555ToARGBRow_AVX2, 0, 2, 4, 15)
502 #endif
503 #if defined(HAS_ARGB4444TOARGBROW_AVX2)
504 ANY11(ARGB4444ToARGBRow_Any_AVX2, ARGB4444ToARGBRow_AVX2, 0, 2, 4, 15)
505 #endif
506 #if defined(HAS_ARGBTORGB24ROW_NEON)
507 ANY11(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, 0, 4, 3, 7)
508 ANY11(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, 0, 4, 3, 7)
509 ANY11(ARGBToRGB565Row_Any_NEON, ARGBToRGB565Row_NEON, 0, 4, 2, 7)
510 ANY11(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, 0, 4, 2, 7)
511 ANY11(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, 0, 4, 2, 7)
512 ANY11(J400ToARGBRow_Any_NEON, J400ToARGBRow_NEON, 0, 1, 4, 7)
513 ANY11(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, 0, 1, 4, 7)
514 #endif
515 #if defined(HAS_ARGBTORGB24ROW_MSA)
516 ANY11(ARGBToRGB24Row_Any_MSA, ARGBToRGB24Row_MSA, 0, 4, 3, 15)
517 ANY11(ARGBToRAWRow_Any_MSA, ARGBToRAWRow_MSA, 0, 4, 3, 15)
518 ANY11(ARGBToRGB565Row_Any_MSA, ARGBToRGB565Row_MSA, 0, 4, 2, 7)
519 ANY11(ARGBToARGB1555Row_Any_MSA, ARGBToARGB1555Row_MSA, 0, 4, 2, 7)
520 ANY11(ARGBToARGB4444Row_Any_MSA, ARGBToARGB4444Row_MSA, 0, 4, 2, 7)
521 ANY11(J400ToARGBRow_Any_MSA, J400ToARGBRow_MSA, 0, 1, 4, 15)
522 ANY11(I400ToARGBRow_Any_MSA, I400ToARGBRow_MSA, 0, 1, 4, 15)
523 #endif
524 #if defined(HAS_RAWTORGB24ROW_NEON)
525 ANY11(RAWToRGB24Row_Any_NEON, RAWToRGB24Row_NEON, 0, 3, 3, 7)
526 #endif
527 #if defined(HAS_RAWTORGB24ROW_MSA)
528 ANY11(RAWToRGB24Row_Any_MSA, RAWToRGB24Row_MSA, 0, 3, 3, 15)
529 #endif
530 #ifdef HAS_ARGBTOYROW_AVX2
531 ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31)
532 #endif
533 #ifdef HAS_ARGBTOYJROW_AVX2
534 ANY11(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, 0, 4, 1, 31)
535 #endif
536 #ifdef HAS_UYVYTOYROW_AVX2
537 ANY11(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, 0, 2, 1, 31)
538 #endif
539 #ifdef HAS_YUY2TOYROW_AVX2
540 ANY11(YUY2ToYRow_Any_AVX2, YUY2ToYRow_AVX2, 1, 4, 1, 31)
541 #endif
542 #ifdef HAS_ARGBTOYROW_SSSE3
543 ANY11(ARGBToYRow_Any_SSSE3, ARGBToYRow_SSSE3, 0, 4, 1, 15)
544 #endif
545 #ifdef HAS_BGRATOYROW_SSSE3
546 ANY11(BGRAToYRow_Any_SSSE3, BGRAToYRow_SSSE3, 0, 4, 1, 15)
547 ANY11(ABGRToYRow_Any_SSSE3, ABGRToYRow_SSSE3, 0, 4, 1, 15)
548 ANY11(RGBAToYRow_Any_SSSE3, RGBAToYRow_SSSE3, 0, 4, 1, 15)
549 ANY11(YUY2ToYRow_Any_SSE2, YUY2ToYRow_SSE2, 1, 4, 1, 15)
550 ANY11(UYVYToYRow_Any_SSE2, UYVYToYRow_SSE2, 1, 4, 1, 15)
551 #endif
552 #ifdef HAS_ARGBTOYJROW_SSSE3
553 ANY11(ARGBToYJRow_Any_SSSE3, ARGBToYJRow_SSSE3, 0, 4, 1, 15)
554 #endif
555 #ifdef HAS_ARGBTOYROW_NEON
556 ANY11(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 0, 4, 1, 7)
557 #endif
558 #ifdef HAS_ARGBTOYROW_MSA
559 ANY11(ARGBToYRow_Any_MSA, ARGBToYRow_MSA, 0, 4, 1, 15)
560 #endif
561 #ifdef HAS_ARGBTOYJROW_NEON
562 ANY11(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, 0, 4, 1, 7)
563 #endif
564 #ifdef HAS_ARGBTOYJROW_MSA
565 ANY11(ARGBToYJRow_Any_MSA, ARGBToYJRow_MSA, 0, 4, 1, 15)
566 #endif
567 #ifdef HAS_BGRATOYROW_NEON
568 ANY11(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 0, 4, 1, 7)
569 #endif
570 #ifdef HAS_BGRATOYROW_MSA
571 ANY11(BGRAToYRow_Any_MSA, BGRAToYRow_MSA, 0, 4, 1, 15)
572 #endif
573 #ifdef HAS_ABGRTOYROW_NEON
574 ANY11(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 0, 4, 1, 7)
575 #endif
576 #ifdef HAS_ABGRTOYROW_MSA
577 ANY11(ABGRToYRow_Any_MSA, ABGRToYRow_MSA, 0, 4, 1, 7)
578 #endif
579 #ifdef HAS_RGBATOYROW_NEON
580 ANY11(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 0, 4, 1, 7)
581 #endif
582 #ifdef HAS_RGBATOYROW_MSA
583 ANY11(RGBAToYRow_Any_MSA, RGBAToYRow_MSA, 0, 4, 1, 15)
584 #endif
585 #ifdef HAS_RGB24TOYROW_NEON
586 ANY11(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 0, 3, 1, 7)
587 #endif
588 #ifdef HAS_RGB24TOYROW_MSA
589 ANY11(RGB24ToYRow_Any_MSA, RGB24ToYRow_MSA, 0, 3, 1, 15)
590 #endif
591 #ifdef HAS_RAWTOYROW_NEON
592 ANY11(RAWToYRow_Any_NEON, RAWToYRow_NEON, 0, 3, 1, 7)
593 #endif
594 #ifdef HAS_RAWTOYROW_MSA
595 ANY11(RAWToYRow_Any_MSA, RAWToYRow_MSA, 0, 3, 1, 15)
596 #endif
597 #ifdef HAS_RGB565TOYROW_NEON
598 ANY11(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 0, 2, 1, 7)
599 #endif
600 #ifdef HAS_RGB565TOYROW_MSA
601 ANY11(RGB565ToYRow_Any_MSA, RGB565ToYRow_MSA, 0, 2, 1, 15)
602 #endif
603 #ifdef HAS_ARGB1555TOYROW_NEON
604 ANY11(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 0, 2, 1, 7)
605 #endif
606 #ifdef HAS_ARGB1555TOYROW_MSA
607 ANY11(ARGB1555ToYRow_Any_MSA, ARGB1555ToYRow_MSA, 0, 2, 1, 15)
608 #endif
609 #ifdef HAS_ARGB4444TOYROW_NEON
610 ANY11(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 0, 2, 1, 7)
611 #endif
612 #ifdef HAS_YUY2TOYROW_NEON
613 ANY11(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 1, 4, 1, 15)
614 #endif
615 #ifdef HAS_UYVYTOYROW_NEON
616 ANY11(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 1, 4, 1, 15)
617 #endif
618 #ifdef HAS_YUY2TOYROW_MSA
619 ANY11(YUY2ToYRow_Any_MSA, YUY2ToYRow_MSA, 1, 4, 1, 31)
620 #endif
621 #ifdef HAS_UYVYTOYROW_MSA
622 ANY11(UYVYToYRow_Any_MSA, UYVYToYRow_MSA, 1, 4, 1, 31)
623 #endif
624 #ifdef HAS_RGB24TOARGBROW_NEON
625 ANY11(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 0, 3, 4, 7)
626 #endif
627 #ifdef HAS_RGB24TOARGBROW_MSA
628 ANY11(RGB24ToARGBRow_Any_MSA, RGB24ToARGBRow_MSA, 0, 3, 4, 15)
629 #endif
630 #ifdef HAS_RAWTOARGBROW_NEON
631 ANY11(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 0, 3, 4, 7)
632 #endif
633 #ifdef HAS_RAWTOARGBROW_MSA
634 ANY11(RAWToARGBRow_Any_MSA, RAWToARGBRow_MSA, 0, 3, 4, 15)
635 #endif
636 #ifdef HAS_RGB565TOARGBROW_NEON
637 ANY11(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 0, 2, 4, 7)
638 #endif
639 #ifdef HAS_RGB565TOARGBROW_MSA
640 ANY11(RGB565ToARGBRow_Any_MSA, RGB565ToARGBRow_MSA, 0, 2, 4, 15)
641 #endif
642 #ifdef HAS_ARGB1555TOARGBROW_NEON
643 ANY11(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 0, 2, 4, 7)
644 #endif
645 #ifdef HAS_ARGB1555TOARGBROW_MSA
646 ANY11(ARGB1555ToARGBRow_Any_MSA, ARGB1555ToARGBRow_MSA, 0, 2, 4, 15)
647 #endif
648 #ifdef HAS_ARGB4444TOARGBROW_NEON
649 ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7)
650 #endif
651 #ifdef HAS_ARGB4444TOARGBROW_MSA
652 ANY11(ARGB4444ToARGBRow_Any_MSA, ARGB4444ToARGBRow_MSA, 0, 2, 4, 15)
653 #endif
654 #ifdef HAS_ARGBATTENUATEROW_SSSE3
655 ANY11(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, 0, 4, 4, 3)
656 #endif
657 #ifdef HAS_ARGBUNATTENUATEROW_SSE2
658 ANY11(ARGBUnattenuateRow_Any_SSE2, ARGBUnattenuateRow_SSE2, 0, 4, 4, 3)
659 #endif
660 #ifdef HAS_ARGBATTENUATEROW_AVX2
661 ANY11(ARGBAttenuateRow_Any_AVX2, ARGBAttenuateRow_AVX2, 0, 4, 4, 7)
662 #endif
663 #ifdef HAS_ARGBUNATTENUATEROW_AVX2
664 ANY11(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, 0, 4, 4, 7)
665 #endif
666 #ifdef HAS_ARGBATTENUATEROW_NEON
667 ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7)
668 #endif
669 #ifdef HAS_ARGBATTENUATEROW_MSA
670 ANY11(ARGBAttenuateRow_Any_MSA, ARGBAttenuateRow_MSA, 0, 4, 4, 7)
671 #endif
672 #ifdef HAS_ARGBEXTRACTALPHAROW_SSE2
673 ANY11(ARGBExtractAlphaRow_Any_SSE2, ARGBExtractAlphaRow_SSE2, 0, 4, 1, 7)
674 #endif
675 #ifdef HAS_ARGBEXTRACTALPHAROW_AVX2
676 ANY11(ARGBExtractAlphaRow_Any_AVX2, ARGBExtractAlphaRow_AVX2, 0, 4, 1, 31)
677 #endif
678 #ifdef HAS_ARGBEXTRACTALPHAROW_NEON
679 ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 15)
680 #endif
681 #ifdef HAS_ARGBEXTRACTALPHAROW_MSA
682 ANY11(ARGBExtractAlphaRow_Any_MSA, ARGBExtractAlphaRow_MSA, 0, 4, 1, 15)
683 #endif
684 #undef ANY11
685 
686 // Any 1 to 1 blended.  Destination is read, modify, write.
687 #define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK)               \
688   void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) {     \
689     SIMD_ALIGNED(uint8_t temp[64 * 2]);                                   \
690     memset(temp, 0, 64 * 2); /* for msan */                               \
691     int r = width & MASK;                                                 \
692     int n = width & ~MASK;                                                \
693     if (n > 0) {                                                          \
694       ANY_SIMD(src_ptr, dst_ptr, n);                                      \
695     }                                                                     \
696     memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
697     memcpy(temp + 64, dst_ptr + n * BPP, r * BPP);                        \
698     ANY_SIMD(temp, temp + 64, MASK + 1);                                  \
699     memcpy(dst_ptr + n * BPP, temp + 64, r * BPP);                        \
700   }
701 
702 #ifdef HAS_ARGBCOPYALPHAROW_AVX2
703 ANY11B(ARGBCopyAlphaRow_Any_AVX2, ARGBCopyAlphaRow_AVX2, 0, 4, 4, 15)
704 #endif
705 #ifdef HAS_ARGBCOPYALPHAROW_SSE2
706 ANY11B(ARGBCopyAlphaRow_Any_SSE2, ARGBCopyAlphaRow_SSE2, 0, 4, 4, 7)
707 #endif
708 #ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2
709 ANY11B(ARGBCopyYToAlphaRow_Any_AVX2, ARGBCopyYToAlphaRow_AVX2, 0, 1, 4, 15)
710 #endif
711 #ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
712 ANY11B(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7)
713 #endif
714 #undef ANY11B
715 
716 // Any 1 to 1 with parameter.
717 #define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK)                          \
718   void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, T param, int width) { \
719     SIMD_ALIGNED(uint8_t temp[64 * 2]);                                        \
720     memset(temp, 0, 64); /* for msan */                                        \
721     int r = width & MASK;                                                      \
722     int n = width & ~MASK;                                                     \
723     if (n > 0) {                                                               \
724       ANY_SIMD(src_ptr, dst_ptr, param, n);                                    \
725     }                                                                          \
726     memcpy(temp, src_ptr + n * SBPP, r * SBPP);                                \
727     ANY_SIMD(temp, temp + 64, param, MASK + 1);                                \
728     memcpy(dst_ptr + n * BPP, temp + 64, r * BPP);                             \
729   }
730 
731 #if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
732 ANY11P(ARGBToRGB565DitherRow_Any_SSE2,
733        ARGBToRGB565DitherRow_SSE2,
734        const uint32_t,
735        4,
736        2,
737        3)
738 #endif
739 #if defined(HAS_ARGBTORGB565DITHERROW_AVX2)
740 ANY11P(ARGBToRGB565DitherRow_Any_AVX2,
741        ARGBToRGB565DitherRow_AVX2,
742        const uint32_t,
743        4,
744        2,
745        7)
746 #endif
747 #if defined(HAS_ARGBTORGB565DITHERROW_NEON)
748 ANY11P(ARGBToRGB565DitherRow_Any_NEON,
749        ARGBToRGB565DitherRow_NEON,
750        const uint32_t,
751        4,
752        2,
753        7)
754 #endif
755 #if defined(HAS_ARGBTORGB565DITHERROW_MSA)
756 ANY11P(ARGBToRGB565DitherRow_Any_MSA,
757        ARGBToRGB565DitherRow_MSA,
758        const uint32_t,
759        4,
760        2,
761        7)
762 #endif
763 #ifdef HAS_ARGBSHUFFLEROW_SSSE3
764 ANY11P(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3, const uint8_t*, 4, 4, 7)
765 #endif
766 #ifdef HAS_ARGBSHUFFLEROW_AVX2
767 ANY11P(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2, const uint8_t*, 4, 4, 15)
768 #endif
769 #ifdef HAS_ARGBSHUFFLEROW_NEON
770 ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8_t*, 4, 4, 3)
771 #endif
772 #ifdef HAS_ARGBSHUFFLEROW_MSA
773 ANY11P(ARGBShuffleRow_Any_MSA, ARGBShuffleRow_MSA, const uint8_t*, 4, 4, 7)
774 #endif
775 #undef ANY11P
776 
777 // Any 1 to 1 with parameter and shorts.  BPP measures in shorts.
778 #define ANY11C(NAMEANY, ANY_SIMD, SBPP, BPP, STYPE, DTYPE, MASK)             \
779   void NAMEANY(const STYPE* src_ptr, DTYPE* dst_ptr, int scale, int width) { \
780     SIMD_ALIGNED(STYPE temp[32]);                                            \
781     SIMD_ALIGNED(DTYPE out[32]);                                             \
782     memset(temp, 0, 32 * SBPP); /* for msan */                               \
783     int r = width & MASK;                                                    \
784     int n = width & ~MASK;                                                   \
785     if (n > 0) {                                                             \
786       ANY_SIMD(src_ptr, dst_ptr, scale, n);                                  \
787     }                                                                        \
788     memcpy(temp, src_ptr + n, r * SBPP);                                     \
789     ANY_SIMD(temp, out, scale, MASK + 1);                                    \
790     memcpy(dst_ptr + n, out, r * BPP);                                       \
791   }
792 
793 #ifdef HAS_CONVERT16TO8ROW_SSSE3
794 ANY11C(Convert16To8Row_Any_SSSE3,
795        Convert16To8Row_SSSE3,
796        2,
797        1,
798        uint16_t,
799        uint8_t,
800        15)
801 #endif
802 #ifdef HAS_CONVERT16TO8ROW_AVX2
803 ANY11C(Convert16To8Row_Any_AVX2,
804        Convert16To8Row_AVX2,
805        2,
806        1,
807        uint16_t,
808        uint8_t,
809        31)
810 #endif
811 #ifdef HAS_CONVERT8TO16ROW_SSE2
812 ANY11C(Convert8To16Row_Any_SSE2,
813        Convert8To16Row_SSE2,
814        1,
815        2,
816        uint8_t,
817        uint16_t,
818        15)
819 #endif
820 #ifdef HAS_CONVERT8TO16ROW_AVX2
821 ANY11C(Convert8To16Row_Any_AVX2,
822        Convert8To16Row_AVX2,
823        1,
824        2,
825        uint8_t,
826        uint16_t,
827        31)
828 #endif
829 #undef ANY11C
830 
831 // Any 1 to 1 with parameter and shorts to byte.  BPP measures in shorts.
832 #define ANY11P16(NAMEANY, ANY_SIMD, ST, T, SBPP, BPP, MASK)             \
833   void NAMEANY(const ST* src_ptr, T* dst_ptr, float param, int width) { \
834     SIMD_ALIGNED(ST temp[32]);                                          \
835     SIMD_ALIGNED(T out[32]);                                            \
836     memset(temp, 0, SBPP * 32); /* for msan */                          \
837     int r = width & MASK;                                               \
838     int n = width & ~MASK;                                              \
839     if (n > 0) {                                                        \
840       ANY_SIMD(src_ptr, dst_ptr, param, n);                             \
841     }                                                                   \
842     memcpy(temp, src_ptr + n, r * SBPP);                                \
843     ANY_SIMD(temp, out, param, MASK + 1);                               \
844     memcpy(dst_ptr + n, out, r * BPP);                                  \
845   }
846 
847 #ifdef HAS_HALFFLOATROW_SSE2
848 ANY11P16(HalfFloatRow_Any_SSE2, HalfFloatRow_SSE2, uint16_t, uint16_t, 2, 2, 7)
849 #endif
850 #ifdef HAS_HALFFLOATROW_AVX2
851 ANY11P16(HalfFloatRow_Any_AVX2, HalfFloatRow_AVX2, uint16_t, uint16_t, 2, 2, 15)
852 #endif
853 #ifdef HAS_HALFFLOATROW_F16C
854 ANY11P16(HalfFloatRow_Any_F16C, HalfFloatRow_F16C, uint16_t, uint16_t, 2, 2, 15)
855 ANY11P16(HalfFloat1Row_Any_F16C,
856          HalfFloat1Row_F16C,
857          uint16_t,
858          uint16_t,
859          2,
860          2,
861          15)
862 #endif
863 #ifdef HAS_HALFFLOATROW_NEON
864 ANY11P16(HalfFloatRow_Any_NEON, HalfFloatRow_NEON, uint16_t, uint16_t, 2, 2, 7)
865 ANY11P16(HalfFloat1Row_Any_NEON,
866          HalfFloat1Row_NEON,
867          uint16_t,
868          uint16_t,
869          2,
870          2,
871          7)
872 #endif
873 #ifdef HAS_HALFFLOATROW_MSA
874 ANY11P16(HalfFloatRow_Any_MSA, HalfFloatRow_MSA, uint16_t, uint16_t, 2, 2, 31)
875 #endif
876 #ifdef HAS_BYTETOFLOATROW_NEON
877 ANY11P16(ByteToFloatRow_Any_NEON, ByteToFloatRow_NEON, uint8_t, float, 1, 3, 7)
878 #endif
879 #undef ANY11P16
880 
881 // Any 1 to 1 with yuvconstants
882 #define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK)               \
883   void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr,                  \
884                const struct YuvConstants* yuvconstants, int width) {      \
885     SIMD_ALIGNED(uint8_t temp[128 * 2]);                                  \
886     memset(temp, 0, 128); /* for YUY2 and msan */                         \
887     int r = width & MASK;                                                 \
888     int n = width & ~MASK;                                                \
889     if (n > 0) {                                                          \
890       ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n);                        \
891     }                                                                     \
892     memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
893     ANY_SIMD(temp, temp + 128, yuvconstants, MASK + 1);                   \
894     memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                       \
895   }
896 #if defined(HAS_YUY2TOARGBROW_SSSE3)
897 ANY11C(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15)
898 ANY11C(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15)
899 #endif
900 #if defined(HAS_YUY2TOARGBROW_AVX2)
901 ANY11C(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31)
902 ANY11C(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31)
903 #endif
904 #if defined(HAS_YUY2TOARGBROW_NEON)
905 ANY11C(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7)
906 ANY11C(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7)
907 #endif
908 #if defined(HAS_YUY2TOARGBROW_MSA)
909 ANY11C(YUY2ToARGBRow_Any_MSA, YUY2ToARGBRow_MSA, 1, 4, 4, 7)
910 ANY11C(UYVYToARGBRow_Any_MSA, UYVYToARGBRow_MSA, 1, 4, 4, 7)
911 #endif
912 #undef ANY11C
913 
914 // Any 1 to 1 interpolate.  Takes 2 rows of source via stride.
915 #define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, MASK)                           \
916   void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr,                     \
917                ptrdiff_t src_stride_ptr, int width, int source_y_fraction) { \
918     SIMD_ALIGNED(uint8_t temp[64 * 3]);                                      \
919     memset(temp, 0, 64 * 2); /* for msan */                                  \
920     int r = width & MASK;                                                    \
921     int n = width & ~MASK;                                                   \
922     if (n > 0) {                                                             \
923       ANY_SIMD(dst_ptr, src_ptr, src_stride_ptr, n, source_y_fraction);      \
924     }                                                                        \
925     memcpy(temp, src_ptr + n * SBPP, r * SBPP);                              \
926     memcpy(temp + 64, src_ptr + src_stride_ptr + n * SBPP, r * SBPP);        \
927     ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction);             \
928     memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                          \
929   }
930 
931 #ifdef HAS_INTERPOLATEROW_AVX2
932 ANY11T(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, 1, 1, 31)
933 #endif
934 #ifdef HAS_INTERPOLATEROW_SSSE3
935 ANY11T(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, 1, 1, 15)
936 #endif
937 #ifdef HAS_INTERPOLATEROW_NEON
938 ANY11T(InterpolateRow_Any_NEON, InterpolateRow_NEON, 1, 1, 15)
939 #endif
940 #ifdef HAS_INTERPOLATEROW_MSA
941 ANY11T(InterpolateRow_Any_MSA, InterpolateRow_MSA, 1, 1, 31)
942 #endif
943 #undef ANY11T
944 
945 // Any 1 to 1 mirror.
946 #define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK)                              \
947   void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) {     \
948     SIMD_ALIGNED(uint8_t temp[64 * 2]);                                   \
949     memset(temp, 0, 64); /* for msan */                                   \
950     int r = width & MASK;                                                 \
951     int n = width & ~MASK;                                                \
952     if (n > 0) {                                                          \
953       ANY_SIMD(src_ptr + r * BPP, dst_ptr, n);                            \
954     }                                                                     \
955     memcpy(temp, src_ptr, r* BPP);                                        \
956     ANY_SIMD(temp, temp + 64, MASK + 1);                                  \
957     memcpy(dst_ptr + n * BPP, temp + 64 + (MASK + 1 - r) * BPP, r * BPP); \
958   }
959 
960 #ifdef HAS_MIRRORROW_AVX2
961 ANY11M(MirrorRow_Any_AVX2, MirrorRow_AVX2, 1, 31)
962 #endif
963 #ifdef HAS_MIRRORROW_SSSE3
964 ANY11M(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, 1, 15)
965 #endif
966 #ifdef HAS_MIRRORROW_NEON
967 ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 15)
968 #endif
969 #ifdef HAS_MIRRORROW_MSA
970 ANY11M(MirrorRow_Any_MSA, MirrorRow_MSA, 1, 63)
971 #endif
972 #ifdef HAS_ARGBMIRRORROW_AVX2
973 ANY11M(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, 4, 7)
974 #endif
975 #ifdef HAS_ARGBMIRRORROW_SSE2
976 ANY11M(ARGBMirrorRow_Any_SSE2, ARGBMirrorRow_SSE2, 4, 3)
977 #endif
978 #ifdef HAS_ARGBMIRRORROW_NEON
979 ANY11M(ARGBMirrorRow_Any_NEON, ARGBMirrorRow_NEON, 4, 3)
980 #endif
981 #ifdef HAS_ARGBMIRRORROW_MSA
982 ANY11M(ARGBMirrorRow_Any_MSA, ARGBMirrorRow_MSA, 4, 15)
983 #endif
984 #undef ANY11M
985 
986 // Any 1 plane. (memset)
987 #define ANY1(NAMEANY, ANY_SIMD, T, BPP, MASK)        \
988   void NAMEANY(uint8_t* dst_ptr, T v32, int width) { \
989     SIMD_ALIGNED(uint8_t temp[64]);                  \
990     int r = width & MASK;                            \
991     int n = width & ~MASK;                           \
992     if (n > 0) {                                     \
993       ANY_SIMD(dst_ptr, v32, n);                     \
994     }                                                \
995     ANY_SIMD(temp, v32, MASK + 1);                   \
996     memcpy(dst_ptr + n * BPP, temp, r * BPP);        \
997   }
998 
999 #ifdef HAS_SETROW_X86
1000 ANY1(SetRow_Any_X86, SetRow_X86, uint8_t, 1, 3)
1001 #endif
1002 #ifdef HAS_SETROW_NEON
1003 ANY1(SetRow_Any_NEON, SetRow_NEON, uint8_t, 1, 15)
1004 #endif
1005 #ifdef HAS_ARGBSETROW_NEON
1006 ANY1(ARGBSetRow_Any_NEON, ARGBSetRow_NEON, uint32_t, 4, 3)
1007 #endif
1008 #ifdef HAS_ARGBSETROW_MSA
1009 ANY1(ARGBSetRow_Any_MSA, ARGBSetRow_MSA, uint32_t, 4, 3)
1010 #endif
1011 #undef ANY1
1012 
1013 // Any 1 to 2.  Outputs UV planes.
1014 #define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK)          \
1015   void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v,  \
1016                int width) {                                             \
1017     SIMD_ALIGNED(uint8_t temp[128 * 3]);                                \
1018     memset(temp, 0, 128); /* for msan */                                \
1019     int r = width & MASK;                                               \
1020     int n = width & ~MASK;                                              \
1021     if (n > 0) {                                                        \
1022       ANY_SIMD(src_ptr, dst_u, dst_v, n);                               \
1023     }                                                                   \
1024     memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
1025     ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1);                   \
1026     memcpy(dst_u + (n >> DUVSHIFT), temp + 128, SS(r, DUVSHIFT));       \
1027     memcpy(dst_v + (n >> DUVSHIFT), temp + 256, SS(r, DUVSHIFT));       \
1028   }
1029 
1030 #ifdef HAS_SPLITUVROW_SSE2
1031 ANY12(SplitUVRow_Any_SSE2, SplitUVRow_SSE2, 0, 2, 0, 15)
1032 #endif
1033 #ifdef HAS_SPLITUVROW_AVX2
1034 ANY12(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, 0, 2, 0, 31)
1035 #endif
1036 #ifdef HAS_SPLITUVROW_NEON
1037 ANY12(SplitUVRow_Any_NEON, SplitUVRow_NEON, 0, 2, 0, 15)
1038 #endif
1039 #ifdef HAS_SPLITUVROW_MSA
1040 ANY12(SplitUVRow_Any_MSA, SplitUVRow_MSA, 0, 2, 0, 31)
1041 #endif
1042 #ifdef HAS_ARGBTOUV444ROW_SSSE3
1043 ANY12(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_SSSE3, 0, 4, 0, 15)
1044 #endif
1045 #ifdef HAS_YUY2TOUV422ROW_AVX2
1046 ANY12(YUY2ToUV422Row_Any_AVX2, YUY2ToUV422Row_AVX2, 1, 4, 1, 31)
1047 ANY12(UYVYToUV422Row_Any_AVX2, UYVYToUV422Row_AVX2, 1, 4, 1, 31)
1048 #endif
1049 #ifdef HAS_YUY2TOUV422ROW_SSE2
1050 ANY12(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_SSE2, 1, 4, 1, 15)
1051 ANY12(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_SSE2, 1, 4, 1, 15)
1052 #endif
1053 #ifdef HAS_YUY2TOUV422ROW_NEON
1054 ANY12(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON, 0, 4, 0, 7)
1055 ANY12(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON, 1, 4, 1, 15)
1056 ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15)
1057 #endif
1058 #ifdef HAS_YUY2TOUV422ROW_MSA
1059 ANY12(ARGBToUV444Row_Any_MSA, ARGBToUV444Row_MSA, 0, 4, 0, 15)
1060 ANY12(YUY2ToUV422Row_Any_MSA, YUY2ToUV422Row_MSA, 1, 4, 1, 31)
1061 ANY12(UYVYToUV422Row_Any_MSA, UYVYToUV422Row_MSA, 1, 4, 1, 31)
1062 #endif
1063 #undef ANY12
1064 
1065 // Any 1 to 3.  Outputs RGB planes.
1066 #define ANY13(NAMEANY, ANY_SIMD, BPP, MASK)                                \
1067   void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g,     \
1068                uint8_t* dst_b, int width) {                                \
1069     SIMD_ALIGNED(uint8_t temp[16 * 6]);                                    \
1070     memset(temp, 0, 16 * 3); /* for msan */                                \
1071     int r = width & MASK;                                                  \
1072     int n = width & ~MASK;                                                 \
1073     if (n > 0) {                                                           \
1074       ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, n);                           \
1075     }                                                                      \
1076     memcpy(temp, src_ptr + n * BPP, r * BPP);                              \
1077     ANY_SIMD(temp, temp + 16 * 3, temp + 16 * 4, temp + 16 * 5, MASK + 1); \
1078     memcpy(dst_r + n, temp + 16 * 3, r);                                   \
1079     memcpy(dst_g + n, temp + 16 * 4, r);                                   \
1080     memcpy(dst_b + n, temp + 16 * 5, r);                                   \
1081   }
1082 
1083 #ifdef HAS_SPLITRGBROW_SSSE3
1084 ANY13(SplitRGBRow_Any_SSSE3, SplitRGBRow_SSSE3, 3, 15)
1085 #endif
1086 #ifdef HAS_SPLITRGBROW_NEON
1087 ANY13(SplitRGBRow_Any_NEON, SplitRGBRow_NEON, 3, 15)
1088 #endif
1089 
1090 // Any 1 to 2 with source stride (2 rows of source).  Outputs UV planes.
1091 // 128 byte row allows for 32 avx ARGB pixels.
1092 #define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK)                        \
1093   void NAMEANY(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u,   \
1094                uint8_t* dst_v, int width) {                                  \
1095     SIMD_ALIGNED(uint8_t temp[128 * 4]);                                     \
1096     memset(temp, 0, 128 * 2); /* for msan */                                 \
1097     int r = width & MASK;                                                    \
1098     int n = width & ~MASK;                                                   \
1099     if (n > 0) {                                                             \
1100       ANY_SIMD(src_ptr, src_stride_ptr, dst_u, dst_v, n);                    \
1101     }                                                                        \
1102     memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP);      \
1103     memcpy(temp + 128, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP,      \
1104            SS(r, UVSHIFT) * BPP);                                            \
1105     if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
1106       memcpy(temp + SS(r, UVSHIFT) * BPP, temp + SS(r, UVSHIFT) * BPP - BPP, \
1107              BPP);                                                           \
1108       memcpy(temp + 128 + SS(r, UVSHIFT) * BPP,                              \
1109              temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP);                  \
1110     }                                                                        \
1111     ANY_SIMD(temp, 128, temp + 256, temp + 384, MASK + 1);                   \
1112     memcpy(dst_u + (n >> 1), temp + 256, SS(r, 1));                          \
1113     memcpy(dst_v + (n >> 1), temp + 384, SS(r, 1));                          \
1114   }
1115 
1116 #ifdef HAS_ARGBTOUVROW_AVX2
1117 ANY12S(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, 0, 4, 31)
1118 #endif
1119 #ifdef HAS_ARGBTOUVJROW_AVX2
1120 ANY12S(ARGBToUVJRow_Any_AVX2, ARGBToUVJRow_AVX2, 0, 4, 31)
1121 #endif
1122 #ifdef HAS_ARGBTOUVROW_SSSE3
1123 ANY12S(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_SSSE3, 0, 4, 15)
1124 ANY12S(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_SSSE3, 0, 4, 15)
1125 ANY12S(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_SSSE3, 0, 4, 15)
1126 ANY12S(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_SSSE3, 0, 4, 15)
1127 ANY12S(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_SSSE3, 0, 4, 15)
1128 #endif
1129 #ifdef HAS_YUY2TOUVROW_AVX2
1130 ANY12S(YUY2ToUVRow_Any_AVX2, YUY2ToUVRow_AVX2, 1, 4, 31)
1131 ANY12S(UYVYToUVRow_Any_AVX2, UYVYToUVRow_AVX2, 1, 4, 31)
1132 #endif
1133 #ifdef HAS_YUY2TOUVROW_SSE2
1134 ANY12S(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_SSE2, 1, 4, 15)
1135 ANY12S(UYVYToUVRow_Any_SSE2, UYVYToUVRow_SSE2, 1, 4, 15)
1136 #endif
1137 #ifdef HAS_ARGBTOUVROW_NEON
1138 ANY12S(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, 0, 4, 15)
1139 #endif
1140 #ifdef HAS_ARGBTOUVROW_MSA
1141 ANY12S(ARGBToUVRow_Any_MSA, ARGBToUVRow_MSA, 0, 4, 31)
1142 #endif
1143 #ifdef HAS_ARGBTOUVJROW_NEON
1144 ANY12S(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, 0, 4, 15)
1145 #endif
1146 #ifdef HAS_ARGBTOUVJROW_MSA
1147 ANY12S(ARGBToUVJRow_Any_MSA, ARGBToUVJRow_MSA, 0, 4, 31)
1148 #endif
1149 #ifdef HAS_BGRATOUVROW_NEON
1150 ANY12S(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, 0, 4, 15)
1151 #endif
1152 #ifdef HAS_BGRATOUVROW_MSA
1153 ANY12S(BGRAToUVRow_Any_MSA, BGRAToUVRow_MSA, 0, 4, 31)
1154 #endif
1155 #ifdef HAS_ABGRTOUVROW_NEON
1156 ANY12S(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, 0, 4, 15)
1157 #endif
1158 #ifdef HAS_ABGRTOUVROW_MSA
1159 ANY12S(ABGRToUVRow_Any_MSA, ABGRToUVRow_MSA, 0, 4, 31)
1160 #endif
1161 #ifdef HAS_RGBATOUVROW_NEON
1162 ANY12S(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, 0, 4, 15)
1163 #endif
1164 #ifdef HAS_RGBATOUVROW_MSA
1165 ANY12S(RGBAToUVRow_Any_MSA, RGBAToUVRow_MSA, 0, 4, 31)
1166 #endif
1167 #ifdef HAS_RGB24TOUVROW_NEON
1168 ANY12S(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, 0, 3, 15)
1169 #endif
1170 #ifdef HAS_RGB24TOUVROW_MSA
1171 ANY12S(RGB24ToUVRow_Any_MSA, RGB24ToUVRow_MSA, 0, 3, 15)
1172 #endif
1173 #ifdef HAS_RAWTOUVROW_NEON
1174 ANY12S(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, 0, 3, 15)
1175 #endif
1176 #ifdef HAS_RAWTOUVROW_MSA
1177 ANY12S(RAWToUVRow_Any_MSA, RAWToUVRow_MSA, 0, 3, 15)
1178 #endif
1179 #ifdef HAS_RGB565TOUVROW_NEON
1180 ANY12S(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, 0, 2, 15)
1181 #endif
1182 #ifdef HAS_RGB565TOUVROW_MSA
1183 ANY12S(RGB565ToUVRow_Any_MSA, RGB565ToUVRow_MSA, 0, 2, 15)
1184 #endif
1185 #ifdef HAS_ARGB1555TOUVROW_NEON
1186 ANY12S(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, 0, 2, 15)
1187 #endif
1188 #ifdef HAS_ARGB1555TOUVROW_MSA
1189 ANY12S(ARGB1555ToUVRow_Any_MSA, ARGB1555ToUVRow_MSA, 0, 2, 15)
1190 #endif
1191 #ifdef HAS_ARGB4444TOUVROW_NEON
1192 ANY12S(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, 0, 2, 15)
1193 #endif
1194 #ifdef HAS_YUY2TOUVROW_NEON
1195 ANY12S(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, 1, 4, 15)
1196 #endif
1197 #ifdef HAS_UYVYTOUVROW_NEON
1198 ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15)
1199 #endif
1200 #ifdef HAS_YUY2TOUVROW_MSA
1201 ANY12S(YUY2ToUVRow_Any_MSA, YUY2ToUVRow_MSA, 1, 4, 31)
1202 #endif
1203 #ifdef HAS_UYVYTOUVROW_MSA
1204 ANY12S(UYVYToUVRow_Any_MSA, UYVYToUVRow_MSA, 1, 4, 31)
1205 #endif
1206 #undef ANY12S
1207 
1208 #ifdef __cplusplus
1209 }  // extern "C"
1210 }  // namespace libyuv
1211 #endif
1212