1 /*
2  *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS. All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "libyuv/row.h"
12 
13 #include <string.h>  // For memset.
14 
15 #include "libyuv/basic_types.h"
16 
17 #ifdef __cplusplus
18 namespace libyuv {
19 extern "C" {
20 #endif
21 
22 // Subsampled source needs to be increase by 1 of not even.
23 #define SS(width, shift) (((width) + (1 << (shift)) - 1) >> (shift))
24 
25 // Any 4 planes to 1 with yuvconstants
26 #define ANY41C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK)            \
27   void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \
28                const uint8* a_buf, uint8* dst_ptr,                         \
29                const struct YuvConstants* yuvconstants, int width) {       \
30     SIMD_ALIGNED(uint8 temp[64 * 5]);                                      \
31     memset(temp, 0, 64 * 4); /* for msan */                                \
32     int r = width & MASK;                                                  \
33     int n = width & ~MASK;                                                 \
34     if (n > 0) {                                                           \
35       ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n);      \
36     }                                                                      \
37     memcpy(temp, y_buf + n, r);                                            \
38     memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));             \
39     memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));            \
40     memcpy(temp + 192, a_buf + n, r);                                      \
41     ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256,          \
42              yuvconstants, MASK + 1);                                      \
43     memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256,                    \
44            SS(r, DUVSHIFT) * BPP);                                         \
45   }
46 
47 #ifdef HAS_I422ALPHATOARGBROW_SSSE3
48 ANY41C(I422AlphaToARGBRow_Any_SSSE3, I422AlphaToARGBRow_SSSE3, 1, 0, 4, 7)
49 #endif
50 #ifdef HAS_I422ALPHATOARGBROW_AVX2
51 ANY41C(I422AlphaToARGBRow_Any_AVX2, I422AlphaToARGBRow_AVX2, 1, 0, 4, 15)
52 #endif
53 #ifdef HAS_I422ALPHATOARGBROW_NEON
54 ANY41C(I422AlphaToARGBRow_Any_NEON, I422AlphaToARGBRow_NEON, 1, 0, 4, 7)
55 #endif
56 #ifdef HAS_I422ALPHATOARGBROW_MSA
57 ANY41C(I422AlphaToARGBRow_Any_MSA, I422AlphaToARGBRow_MSA, 1, 0, 4, 7)
58 #endif
59 #undef ANY41C
60 
61 // Any 3 planes to 1.
62 #define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK)             \
63   void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \
64                uint8* dst_ptr, int width) {                                \
65     SIMD_ALIGNED(uint8 temp[64 * 4]);                                      \
66     memset(temp, 0, 64 * 3); /* for YUY2 and msan */                       \
67     int r = width & MASK;                                                  \
68     int n = width & ~MASK;                                                 \
69     if (n > 0) {                                                           \
70       ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n);                           \
71     }                                                                      \
72     memcpy(temp, y_buf + n, r);                                            \
73     memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));             \
74     memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));            \
75     ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, MASK + 1);           \
76     memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192,                    \
77            SS(r, DUVSHIFT) * BPP);                                         \
78   }
79 #ifdef HAS_I422TOYUY2ROW_SSE2
80 ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15)
81 ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15)
82 #endif
83 #ifdef HAS_I422TOYUY2ROW_NEON
84 ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15)
85 #endif
86 #ifdef HAS_I422TOYUY2ROW_MSA
87 ANY31(I422ToYUY2Row_Any_MSA, I422ToYUY2Row_MSA, 1, 1, 4, 31)
88 #endif
89 #ifdef HAS_I422TOUYVYROW_NEON
90 ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15)
91 #endif
92 #ifdef HAS_I422TOUYVYROW_MSA
93 ANY31(I422ToUYVYRow_Any_MSA, I422ToUYVYRow_MSA, 1, 1, 4, 31)
94 #endif
95 #ifdef HAS_BLENDPLANEROW_AVX2
96 ANY31(BlendPlaneRow_Any_AVX2, BlendPlaneRow_AVX2, 0, 0, 1, 31)
97 #endif
98 #ifdef HAS_BLENDPLANEROW_SSSE3
99 ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7)
100 #endif
101 #undef ANY31
102 
103 // Note that odd width replication includes 444 due to implementation
104 // on arm that subsamples 444 to 422 internally.
105 // Any 3 planes to 1 with yuvconstants
106 #define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK)                \
107   void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf,     \
108                uint8* dst_ptr, const struct YuvConstants* yuvconstants,        \
109                int width) {                                                    \
110     SIMD_ALIGNED(uint8 temp[64 * 4]);                                          \
111     memset(temp, 0, 64 * 3); /* for YUY2 and msan */                           \
112     int r = width & MASK;                                                      \
113     int n = width & ~MASK;                                                     \
114     if (n > 0) {                                                               \
115       ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n);                 \
116     }                                                                          \
117     memcpy(temp, y_buf + n, r);                                                \
118     memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));                 \
119     memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));                \
120     if (width & 1) {                                                           \
121       temp[64 + SS(r, UVSHIFT)] = temp[64 + SS(r, UVSHIFT) - 1];               \
122       temp[128 + SS(r, UVSHIFT)] = temp[128 + SS(r, UVSHIFT) - 1];             \
123     }                                                                          \
124     ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, yuvconstants, MASK + 1); \
125     memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192,                        \
126            SS(r, DUVSHIFT) * BPP);                                             \
127   }
128 
129 #ifdef HAS_I422TOARGBROW_SSSE3
130 ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7)
131 #endif
132 #ifdef HAS_I444TOARGBROW_SSSE3
133 ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7)
134 ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7)
135 ANY31C(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7)
136 ANY31C(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7)
137 ANY31C(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7)
138 ANY31C(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 7)
139 #endif  // HAS_I444TOARGBROW_SSSE3
140 #ifdef HAS_I422TORGB24ROW_AVX2
141 ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 15)
142 #endif
143 #ifdef HAS_I422TOARGBROW_AVX2
144 ANY31C(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15)
145 #endif
146 #ifdef HAS_I422TORGBAROW_AVX2
147 ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15)
148 #endif
149 #ifdef HAS_I444TOARGBROW_AVX2
150 ANY31C(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15)
151 #endif
152 #ifdef HAS_I422TOARGB4444ROW_AVX2
153 ANY31C(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 7)
154 #endif
155 #ifdef HAS_I422TOARGB1555ROW_AVX2
156 ANY31C(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 7)
157 #endif
158 #ifdef HAS_I422TORGB565ROW_AVX2
159 ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 7)
160 #endif
161 #ifdef HAS_I422TOARGBROW_NEON
162 ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7)
163 ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7)
164 ANY31C(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7)
165 ANY31C(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7)
166 ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7)
167 ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7)
168 ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7)
169 #endif
170 #ifdef HAS_I422TOARGBROW_DSPR2
171 ANY31C(I444ToARGBRow_Any_DSPR2, I444ToARGBRow_DSPR2, 0, 0, 4, 7)
172 ANY31C(I422ToARGBRow_Any_DSPR2, I422ToARGBRow_DSPR2, 1, 0, 4, 7)
173 ANY31C(I422ToARGB4444Row_Any_DSPR2, I422ToARGB4444Row_DSPR2, 1, 0, 2, 7)
174 ANY31C(I422ToARGB1555Row_Any_DSPR2, I422ToARGB1555Row_DSPR2, 1, 0, 2, 7)
175 #endif
176 #ifdef HAS_I422TOARGBROW_MSA
177 ANY31C(I444ToARGBRow_Any_MSA, I444ToARGBRow_MSA, 0, 0, 4, 7)
178 ANY31C(I422ToARGBRow_Any_MSA, I422ToARGBRow_MSA, 1, 0, 4, 7)
179 ANY31C(I422ToRGBARow_Any_MSA, I422ToRGBARow_MSA, 1, 0, 4, 7)
180 ANY31C(I422ToRGB24Row_Any_MSA, I422ToRGB24Row_MSA, 1, 0, 3, 15)
181 ANY31C(I422ToARGB4444Row_Any_MSA, I422ToARGB4444Row_MSA, 1, 0, 2, 7)
182 ANY31C(I422ToARGB1555Row_Any_MSA, I422ToARGB1555Row_MSA, 1, 0, 2, 7)
183 ANY31C(I422ToRGB565Row_Any_MSA, I422ToRGB565Row_MSA, 1, 0, 2, 7)
184 #endif
185 #undef ANY31C
186 
187 // Any 2 planes to 1.
188 #define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK)       \
189   void NAMEANY(const uint8* y_buf, const uint8* uv_buf, uint8* dst_ptr, \
190                int width) {                                             \
191     SIMD_ALIGNED(uint8 temp[64 * 3]);                                   \
192     memset(temp, 0, 64 * 2); /* for msan */                             \
193     int r = width & MASK;                                               \
194     int n = width & ~MASK;                                              \
195     if (n > 0) {                                                        \
196       ANY_SIMD(y_buf, uv_buf, dst_ptr, n);                              \
197     }                                                                   \
198     memcpy(temp, y_buf + n * SBPP, r * SBPP);                           \
199     memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2,                  \
200            SS(r, UVSHIFT) * SBPP2);                                     \
201     ANY_SIMD(temp, temp + 64, temp + 128, MASK + 1);                    \
202     memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                     \
203   }
204 
205 // Merge functions.
206 #ifdef HAS_MERGEUVROW_SSE2
207 ANY21(MergeUVRow_Any_SSE2, MergeUVRow_SSE2, 0, 1, 1, 2, 15)
208 #endif
209 #ifdef HAS_MERGEUVROW_AVX2
210 ANY21(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, 0, 1, 1, 2, 31)
211 #endif
212 #ifdef HAS_MERGEUVROW_NEON
213 ANY21(MergeUVRow_Any_NEON, MergeUVRow_NEON, 0, 1, 1, 2, 15)
214 #endif
215 
216 // Math functions.
217 #ifdef HAS_ARGBMULTIPLYROW_SSE2
218 ANY21(ARGBMultiplyRow_Any_SSE2, ARGBMultiplyRow_SSE2, 0, 4, 4, 4, 3)
219 #endif
220 #ifdef HAS_ARGBADDROW_SSE2
221 ANY21(ARGBAddRow_Any_SSE2, ARGBAddRow_SSE2, 0, 4, 4, 4, 3)
222 #endif
223 #ifdef HAS_ARGBSUBTRACTROW_SSE2
224 ANY21(ARGBSubtractRow_Any_SSE2, ARGBSubtractRow_SSE2, 0, 4, 4, 4, 3)
225 #endif
226 #ifdef HAS_ARGBMULTIPLYROW_AVX2
227 ANY21(ARGBMultiplyRow_Any_AVX2, ARGBMultiplyRow_AVX2, 0, 4, 4, 4, 7)
228 #endif
229 #ifdef HAS_ARGBADDROW_AVX2
230 ANY21(ARGBAddRow_Any_AVX2, ARGBAddRow_AVX2, 0, 4, 4, 4, 7)
231 #endif
232 #ifdef HAS_ARGBSUBTRACTROW_AVX2
233 ANY21(ARGBSubtractRow_Any_AVX2, ARGBSubtractRow_AVX2, 0, 4, 4, 4, 7)
234 #endif
235 #ifdef HAS_ARGBMULTIPLYROW_NEON
236 ANY21(ARGBMultiplyRow_Any_NEON, ARGBMultiplyRow_NEON, 0, 4, 4, 4, 7)
237 #endif
238 #ifdef HAS_ARGBADDROW_NEON
239 ANY21(ARGBAddRow_Any_NEON, ARGBAddRow_NEON, 0, 4, 4, 4, 7)
240 #endif
241 #ifdef HAS_ARGBSUBTRACTROW_NEON
242 ANY21(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, 0, 4, 4, 4, 7)
243 #endif
244 #ifdef HAS_ARGBMULTIPLYROW_MSA
245 ANY21(ARGBMultiplyRow_Any_MSA, ARGBMultiplyRow_MSA, 0, 4, 4, 4, 3)
246 #endif
247 #ifdef HAS_ARGBADDROW_MSA
248 ANY21(ARGBAddRow_Any_MSA, ARGBAddRow_MSA, 0, 4, 4, 4, 7)
249 #endif
250 #ifdef HAS_ARGBSUBTRACTROW_MSA
251 ANY21(ARGBSubtractRow_Any_MSA, ARGBSubtractRow_MSA, 0, 4, 4, 4, 7)
252 #endif
253 #ifdef HAS_SOBELROW_SSE2
254 ANY21(SobelRow_Any_SSE2, SobelRow_SSE2, 0, 1, 1, 4, 15)
255 #endif
256 #ifdef HAS_SOBELROW_NEON
257 ANY21(SobelRow_Any_NEON, SobelRow_NEON, 0, 1, 1, 4, 7)
258 #endif
259 #ifdef HAS_SOBELROW_MSA
260 ANY21(SobelRow_Any_MSA, SobelRow_MSA, 0, 1, 1, 4, 15)
261 #endif
262 #ifdef HAS_SOBELTOPLANEROW_SSE2
263 ANY21(SobelToPlaneRow_Any_SSE2, SobelToPlaneRow_SSE2, 0, 1, 1, 1, 15)
264 #endif
265 #ifdef HAS_SOBELTOPLANEROW_NEON
266 ANY21(SobelToPlaneRow_Any_NEON, SobelToPlaneRow_NEON, 0, 1, 1, 1, 15)
267 #endif
268 #ifdef HAS_SOBELTOPLANEROW_MSA
269 ANY21(SobelToPlaneRow_Any_MSA, SobelToPlaneRow_MSA, 0, 1, 1, 1, 31)
270 #endif
271 #ifdef HAS_SOBELXYROW_SSE2
272 ANY21(SobelXYRow_Any_SSE2, SobelXYRow_SSE2, 0, 1, 1, 4, 15)
273 #endif
274 #ifdef HAS_SOBELXYROW_NEON
275 ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7)
276 #endif
277 #ifdef HAS_SOBELXYROW_MSA
278 ANY21(SobelXYRow_Any_MSA, SobelXYRow_MSA, 0, 1, 1, 4, 15)
279 #endif
280 #undef ANY21
281 
282 // Any 2 planes to 1 with yuvconstants
283 #define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK)      \
284   void NAMEANY(const uint8* y_buf, const uint8* uv_buf, uint8* dst_ptr, \
285                const struct YuvConstants* yuvconstants, int width) {    \
286     SIMD_ALIGNED(uint8 temp[64 * 3]);                                   \
287     memset(temp, 0, 64 * 2); /* for msan */                             \
288     int r = width & MASK;                                               \
289     int n = width & ~MASK;                                              \
290     if (n > 0) {                                                        \
291       ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n);                \
292     }                                                                   \
293     memcpy(temp, y_buf + n * SBPP, r * SBPP);                           \
294     memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2,                  \
295            SS(r, UVSHIFT) * SBPP2);                                     \
296     ANY_SIMD(temp, temp + 64, temp + 128, yuvconstants, MASK + 1);      \
297     memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                     \
298   }
299 
300 // Biplanar to RGB.
301 #ifdef HAS_NV12TOARGBROW_SSSE3
302 ANY21C(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
303 #endif
304 #ifdef HAS_NV12TOARGBROW_AVX2
305 ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15)
306 #endif
307 #ifdef HAS_NV12TOARGBROW_NEON
308 ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
309 #endif
310 #ifdef HAS_NV12TOARGBROW_DSPR2
311 ANY21C(NV12ToARGBRow_Any_DSPR2, NV12ToARGBRow_DSPR2, 1, 1, 2, 4, 7)
312 #endif
313 #ifdef HAS_NV12TOARGBROW_MSA
314 ANY21C(NV12ToARGBRow_Any_MSA, NV12ToARGBRow_MSA, 1, 1, 2, 4, 7)
315 #endif
316 #ifdef HAS_NV21TOARGBROW_SSSE3
317 ANY21C(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
318 #endif
319 #ifdef HAS_NV21TOARGBROW_AVX2
320 ANY21C(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, 1, 1, 2, 4, 15)
321 #endif
322 #ifdef HAS_NV21TOARGBROW_NEON
323 ANY21C(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7)
324 #endif
325 #ifdef HAS_NV21TOARGBROW_MSA
326 ANY21C(NV21ToARGBRow_Any_MSA, NV21ToARGBRow_MSA, 1, 1, 2, 4, 7)
327 #endif
328 #ifdef HAS_NV12TORGB565ROW_SSSE3
329 ANY21C(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
330 #endif
331 #ifdef HAS_NV12TORGB565ROW_AVX2
332 ANY21C(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15)
333 #endif
334 #ifdef HAS_NV12TORGB565ROW_NEON
335 ANY21C(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7)
336 #endif
337 #ifdef HAS_NV12TORGB565ROW_MSA
338 ANY21C(NV12ToRGB565Row_Any_MSA, NV12ToRGB565Row_MSA, 1, 1, 2, 2, 7)
339 #endif
340 #undef ANY21C
341 
342 // Any 1 to 1.
343 #define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK)                \
344   void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) {         \
345     SIMD_ALIGNED(uint8 temp[128 * 2]);                                    \
346     memset(temp, 0, 128); /* for YUY2 and msan */                         \
347     int r = width & MASK;                                                 \
348     int n = width & ~MASK;                                                \
349     if (n > 0) {                                                          \
350       ANY_SIMD(src_ptr, dst_ptr, n);                                      \
351     }                                                                     \
352     memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
353     ANY_SIMD(temp, temp + 128, MASK + 1);                                 \
354     memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                       \
355   }
356 
357 #ifdef HAS_COPYROW_AVX
358 ANY11(CopyRow_Any_AVX, CopyRow_AVX, 0, 1, 1, 63)
359 #endif
360 #ifdef HAS_COPYROW_SSE2
361 ANY11(CopyRow_Any_SSE2, CopyRow_SSE2, 0, 1, 1, 31)
362 #endif
363 #ifdef HAS_COPYROW_NEON
364 ANY11(CopyRow_Any_NEON, CopyRow_NEON, 0, 1, 1, 31)
365 #endif
366 #if defined(HAS_ARGBTORGB24ROW_SSSE3)
367 ANY11(ARGBToRGB24Row_Any_SSSE3, ARGBToRGB24Row_SSSE3, 0, 4, 3, 15)
368 ANY11(ARGBToRAWRow_Any_SSSE3, ARGBToRAWRow_SSSE3, 0, 4, 3, 15)
369 ANY11(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, 0, 4, 2, 3)
370 ANY11(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, 0, 4, 2, 3)
371 ANY11(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, 0, 4, 2, 3)
372 #endif
373 #if defined(HAS_ARGBTORGB565ROW_AVX2)
374 ANY11(ARGBToRGB565Row_Any_AVX2, ARGBToRGB565Row_AVX2, 0, 4, 2, 7)
375 #endif
376 #if defined(HAS_ARGBTOARGB4444ROW_AVX2)
377 ANY11(ARGBToARGB1555Row_Any_AVX2, ARGBToARGB1555Row_AVX2, 0, 4, 2, 7)
378 ANY11(ARGBToARGB4444Row_Any_AVX2, ARGBToARGB4444Row_AVX2, 0, 4, 2, 7)
379 #endif
380 #if defined(HAS_J400TOARGBROW_SSE2)
381 ANY11(J400ToARGBRow_Any_SSE2, J400ToARGBRow_SSE2, 0, 1, 4, 7)
382 #endif
383 #if defined(HAS_J400TOARGBROW_AVX2)
384 ANY11(J400ToARGBRow_Any_AVX2, J400ToARGBRow_AVX2, 0, 1, 4, 15)
385 #endif
386 #if defined(HAS_I400TOARGBROW_SSE2)
387 ANY11(I400ToARGBRow_Any_SSE2, I400ToARGBRow_SSE2, 0, 1, 4, 7)
388 #endif
389 #if defined(HAS_I400TOARGBROW_AVX2)
390 ANY11(I400ToARGBRow_Any_AVX2, I400ToARGBRow_AVX2, 0, 1, 4, 15)
391 #endif
392 #if defined(HAS_RGB24TOARGBROW_SSSE3)
393 ANY11(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 0, 3, 4, 15)
394 ANY11(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 0, 3, 4, 15)
395 ANY11(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 0, 2, 4, 7)
396 ANY11(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, 0, 2, 4, 7)
397 ANY11(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, 0, 2, 4, 7)
398 #endif
399 #if defined(HAS_RAWTORGB24ROW_SSSE3)
400 ANY11(RAWToRGB24Row_Any_SSSE3, RAWToRGB24Row_SSSE3, 0, 3, 3, 7)
401 #endif
402 #if defined(HAS_RGB565TOARGBROW_AVX2)
403 ANY11(RGB565ToARGBRow_Any_AVX2, RGB565ToARGBRow_AVX2, 0, 2, 4, 15)
404 #endif
405 #if defined(HAS_ARGB1555TOARGBROW_AVX2)
406 ANY11(ARGB1555ToARGBRow_Any_AVX2, ARGB1555ToARGBRow_AVX2, 0, 2, 4, 15)
407 #endif
408 #if defined(HAS_ARGB4444TOARGBROW_AVX2)
409 ANY11(ARGB4444ToARGBRow_Any_AVX2, ARGB4444ToARGBRow_AVX2, 0, 2, 4, 15)
410 #endif
411 #if defined(HAS_ARGBTORGB24ROW_NEON)
412 ANY11(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, 0, 4, 3, 7)
413 ANY11(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, 0, 4, 3, 7)
414 ANY11(ARGBToRGB565Row_Any_NEON, ARGBToRGB565Row_NEON, 0, 4, 2, 7)
415 ANY11(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, 0, 4, 2, 7)
416 ANY11(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, 0, 4, 2, 7)
417 ANY11(J400ToARGBRow_Any_NEON, J400ToARGBRow_NEON, 0, 1, 4, 7)
418 ANY11(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, 0, 1, 4, 7)
419 #endif
420 #if defined(HAS_ARGBTORGB24ROW_MSA)
421 ANY11(ARGBToRGB24Row_Any_MSA, ARGBToRGB24Row_MSA, 0, 4, 3, 15)
422 ANY11(ARGBToRAWRow_Any_MSA, ARGBToRAWRow_MSA, 0, 4, 3, 15)
423 ANY11(ARGBToRGB565Row_Any_MSA, ARGBToRGB565Row_MSA, 0, 4, 2, 7)
424 ANY11(ARGBToARGB1555Row_Any_MSA, ARGBToARGB1555Row_MSA, 0, 4, 2, 7)
425 ANY11(ARGBToARGB4444Row_Any_MSA, ARGBToARGB4444Row_MSA, 0, 4, 2, 7)
426 ANY11(J400ToARGBRow_Any_MSA, J400ToARGBRow_MSA, 0, 1, 4, 15)
427 ANY11(I400ToARGBRow_Any_MSA, I400ToARGBRow_MSA, 0, 1, 4, 15)
428 #endif
429 #if defined(HAS_RAWTORGB24ROW_NEON)
430 ANY11(RAWToRGB24Row_Any_NEON, RAWToRGB24Row_NEON, 0, 3, 3, 7)
431 #endif
432 #ifdef HAS_ARGBTOYROW_AVX2
433 ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31)
434 #endif
435 #ifdef HAS_ARGBTOYJROW_AVX2
436 ANY11(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, 0, 4, 1, 31)
437 #endif
438 #ifdef HAS_UYVYTOYROW_AVX2
439 ANY11(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, 0, 2, 1, 31)
440 #endif
441 #ifdef HAS_YUY2TOYROW_AVX2
442 ANY11(YUY2ToYRow_Any_AVX2, YUY2ToYRow_AVX2, 1, 4, 1, 31)
443 #endif
444 #ifdef HAS_ARGBTOYROW_SSSE3
445 ANY11(ARGBToYRow_Any_SSSE3, ARGBToYRow_SSSE3, 0, 4, 1, 15)
446 #endif
447 #ifdef HAS_BGRATOYROW_SSSE3
448 ANY11(BGRAToYRow_Any_SSSE3, BGRAToYRow_SSSE3, 0, 4, 1, 15)
449 ANY11(ABGRToYRow_Any_SSSE3, ABGRToYRow_SSSE3, 0, 4, 1, 15)
450 ANY11(RGBAToYRow_Any_SSSE3, RGBAToYRow_SSSE3, 0, 4, 1, 15)
451 ANY11(YUY2ToYRow_Any_SSE2, YUY2ToYRow_SSE2, 1, 4, 1, 15)
452 ANY11(UYVYToYRow_Any_SSE2, UYVYToYRow_SSE2, 1, 4, 1, 15)
453 #endif
454 #ifdef HAS_ARGBTOYJROW_SSSE3
455 ANY11(ARGBToYJRow_Any_SSSE3, ARGBToYJRow_SSSE3, 0, 4, 1, 15)
456 #endif
457 #ifdef HAS_ARGBTOYROW_NEON
458 ANY11(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 0, 4, 1, 7)
459 #endif
460 #ifdef HAS_ARGBTOYROW_MSA
461 ANY11(ARGBToYRow_Any_MSA, ARGBToYRow_MSA, 0, 4, 1, 15)
462 #endif
463 #ifdef HAS_ARGBTOYJROW_NEON
464 ANY11(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, 0, 4, 1, 7)
465 #endif
466 #ifdef HAS_ARGBTOYJROW_MSA
467 ANY11(ARGBToYJRow_Any_MSA, ARGBToYJRow_MSA, 0, 4, 1, 15)
468 #endif
469 #ifdef HAS_BGRATOYROW_NEON
470 ANY11(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 0, 4, 1, 7)
471 #endif
472 #ifdef HAS_BGRATOYROW_MSA
473 ANY11(BGRAToYRow_Any_MSA, BGRAToYRow_MSA, 0, 4, 1, 15)
474 #endif
475 #ifdef HAS_ABGRTOYROW_NEON
476 ANY11(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 0, 4, 1, 7)
477 #endif
478 #ifdef HAS_ABGRTOYROW_MSA
479 ANY11(ABGRToYRow_Any_MSA, ABGRToYRow_MSA, 0, 4, 1, 7)
480 #endif
481 #ifdef HAS_RGBATOYROW_NEON
482 ANY11(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 0, 4, 1, 7)
483 #endif
484 #ifdef HAS_RGBATOYROW_MSA
485 ANY11(RGBAToYRow_Any_MSA, RGBAToYRow_MSA, 0, 4, 1, 15)
486 #endif
487 #ifdef HAS_RGB24TOYROW_NEON
488 ANY11(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 0, 3, 1, 7)
489 #endif
490 #ifdef HAS_RGB24TOYROW_MSA
491 ANY11(RGB24ToYRow_Any_MSA, RGB24ToYRow_MSA, 0, 3, 1, 15)
492 #endif
493 #ifdef HAS_RAWTOYROW_NEON
494 ANY11(RAWToYRow_Any_NEON, RAWToYRow_NEON, 0, 3, 1, 7)
495 #endif
496 #ifdef HAS_RAWTOYROW_MSA
497 ANY11(RAWToYRow_Any_MSA, RAWToYRow_MSA, 0, 3, 1, 15)
498 #endif
499 #ifdef HAS_RGB565TOYROW_NEON
500 ANY11(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 0, 2, 1, 7)
501 #endif
502 #ifdef HAS_RGB565TOYROW_MSA
503 ANY11(RGB565ToYRow_Any_MSA, RGB565ToYRow_MSA, 0, 2, 1, 15)
504 #endif
505 #ifdef HAS_ARGB1555TOYROW_NEON
506 ANY11(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 0, 2, 1, 7)
507 #endif
508 #ifdef HAS_ARGB1555TOYROW_MSA
509 ANY11(ARGB1555ToYRow_Any_MSA, ARGB1555ToYRow_MSA, 0, 2, 1, 15)
510 #endif
511 #ifdef HAS_ARGB4444TOYROW_NEON
512 ANY11(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 0, 2, 1, 7)
513 #endif
514 #ifdef HAS_YUY2TOYROW_NEON
515 ANY11(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 1, 4, 1, 15)
516 #endif
517 #ifdef HAS_UYVYTOYROW_NEON
518 ANY11(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 1, 4, 1, 15)
519 #endif
520 #ifdef HAS_YUY2TOYROW_MSA
521 ANY11(YUY2ToYRow_Any_MSA, YUY2ToYRow_MSA, 1, 4, 1, 31)
522 #endif
523 #ifdef HAS_UYVYTOYROW_MSA
524 ANY11(UYVYToYRow_Any_MSA, UYVYToYRow_MSA, 1, 4, 1, 31)
525 #endif
526 #ifdef HAS_RGB24TOARGBROW_NEON
527 ANY11(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 0, 3, 4, 7)
528 #endif
529 #ifdef HAS_RGB24TOARGBROW_MSA
530 ANY11(RGB24ToARGBRow_Any_MSA, RGB24ToARGBRow_MSA, 0, 3, 4, 15)
531 #endif
532 #ifdef HAS_RAWTOARGBROW_NEON
533 ANY11(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 0, 3, 4, 7)
534 #endif
535 #ifdef HAS_RAWTOARGBROW_MSA
536 ANY11(RAWToARGBRow_Any_MSA, RAWToARGBRow_MSA, 0, 3, 4, 15)
537 #endif
538 #ifdef HAS_RGB565TOARGBROW_NEON
539 ANY11(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 0, 2, 4, 7)
540 #endif
541 #ifdef HAS_RGB565TOARGBROW_MSA
542 ANY11(RGB565ToARGBRow_Any_MSA, RGB565ToARGBRow_MSA, 0, 2, 4, 15)
543 #endif
544 #ifdef HAS_ARGB1555TOARGBROW_NEON
545 ANY11(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 0, 2, 4, 7)
546 #endif
547 #ifdef HAS_ARGB1555TOARGBROW_MSA
548 ANY11(ARGB1555ToARGBRow_Any_MSA, ARGB1555ToARGBRow_MSA, 0, 2, 4, 15)
549 #endif
550 #ifdef HAS_ARGB4444TOARGBROW_NEON
551 ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7)
552 #endif
553 #ifdef HAS_RGB24TOARGBROW_DSPR2
554 ANY11(RGB24ToARGBRow_Any_DSPR2, RGB24ToARGBRow_DSPR2, 0, 3, 4, 7)
555 #endif
556 #ifdef HAS_RAWTOARGBROW_DSPR2
557 ANY11(RAWToARGBRow_Any_DSPR2, RAWToARGBRow_DSPR2, 0, 3, 4, 7)
558 #endif
559 #ifdef HAS_RGB565TOARGBROW_DSPR2
560 ANY11(RGB565ToARGBRow_Any_DSPR2, RGB565ToARGBRow_DSPR2, 0, 2, 4, 7)
561 #endif
562 #ifdef HAS_ARGB1555TOARGBROW_DSPR2
563 ANY11(ARGB1555ToARGBRow_Any_DSPR2, ARGB1555ToARGBRow_DSPR2, 0, 2, 4, 7)
564 #endif
565 #ifdef HAS_ARGB4444TOARGBROW_DSPR2
566 ANY11(ARGB4444ToARGBRow_Any_DSPR2, ARGB4444ToARGBRow_DSPR2, 0, 2, 4, 7)
567 #endif
568 #ifdef HAS_BGRATOYROW_DSPR2
569 ANY11(BGRAToYRow_Any_DSPR2, BGRAToYRow_DSPR2, 0, 4, 1, 7)
570 #endif
571 #ifdef HAS_ARGBTOYROW_DSPR2
572 ANY11(ARGBToYRow_Any_DSPR2, ARGBToYRow_DSPR2, 0, 4, 1, 7)
573 #endif
574 #ifdef HAS_ABGRTOYROW_DSPR2
575 ANY11(ABGRToYRow_Any_DSPR2, ABGRToYRow_DSPR2, 0, 4, 1, 7)
576 #endif
577 #ifdef HAS_RGBATOYROW_DSPR2
578 ANY11(RGBAToYRow_Any_DSPR2, RGBAToYRow_DSPR2, 0, 4, 1, 7)
579 #endif
580 #ifdef HAS_ARGB4444TOARGBROW_MSA
581 ANY11(ARGB4444ToARGBRow_Any_MSA, ARGB4444ToARGBRow_MSA, 0, 2, 4, 15)
582 #endif
583 #ifdef HAS_ARGBATTENUATEROW_SSSE3
584 ANY11(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, 0, 4, 4, 3)
585 #endif
586 #ifdef HAS_ARGBUNATTENUATEROW_SSE2
587 ANY11(ARGBUnattenuateRow_Any_SSE2, ARGBUnattenuateRow_SSE2, 0, 4, 4, 3)
588 #endif
589 #ifdef HAS_ARGBATTENUATEROW_AVX2
590 ANY11(ARGBAttenuateRow_Any_AVX2, ARGBAttenuateRow_AVX2, 0, 4, 4, 7)
591 #endif
592 #ifdef HAS_ARGBUNATTENUATEROW_AVX2
593 ANY11(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, 0, 4, 4, 7)
594 #endif
595 #ifdef HAS_ARGBATTENUATEROW_NEON
596 ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7)
597 #endif
598 #ifdef HAS_ARGBATTENUATEROW_MSA
599 ANY11(ARGBAttenuateRow_Any_MSA, ARGBAttenuateRow_MSA, 0, 4, 4, 7)
600 #endif
601 #ifdef HAS_ARGBEXTRACTALPHAROW_SSE2
602 ANY11(ARGBExtractAlphaRow_Any_SSE2, ARGBExtractAlphaRow_SSE2, 0, 4, 1, 7)
603 #endif
604 #ifdef HAS_ARGBEXTRACTALPHAROW_AVX2
605 ANY11(ARGBExtractAlphaRow_Any_AVX2, ARGBExtractAlphaRow_AVX2, 0, 4, 1, 32)
606 #endif
607 #ifdef HAS_ARGBEXTRACTALPHAROW_NEON
608 ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 15)
609 #endif
610 #undef ANY11
611 
612 // Any 1 to 1 blended.  Destination is read, modify, write.
613 #define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK)               \
614   void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) {         \
615     SIMD_ALIGNED(uint8 temp[128 * 2]);                                    \
616     memset(temp, 0, 128 * 2); /* for YUY2 and msan */                     \
617     int r = width & MASK;                                                 \
618     int n = width & ~MASK;                                                \
619     if (n > 0) {                                                          \
620       ANY_SIMD(src_ptr, dst_ptr, n);                                      \
621     }                                                                     \
622     memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
623     memcpy(temp + 128, dst_ptr + n * BPP, r * BPP);                       \
624     ANY_SIMD(temp, temp + 128, MASK + 1);                                 \
625     memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                       \
626   }
627 
628 #ifdef HAS_ARGBCOPYALPHAROW_AVX2
629 ANY11B(ARGBCopyAlphaRow_Any_AVX2, ARGBCopyAlphaRow_AVX2, 0, 4, 4, 15)
630 #endif
631 #ifdef HAS_ARGBCOPYALPHAROW_SSE2
632 ANY11B(ARGBCopyAlphaRow_Any_SSE2, ARGBCopyAlphaRow_SSE2, 0, 4, 4, 7)
633 #endif
634 #ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2
635 ANY11B(ARGBCopyYToAlphaRow_Any_AVX2, ARGBCopyYToAlphaRow_AVX2, 0, 1, 4, 15)
636 #endif
637 #ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
638 ANY11B(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7)
639 #endif
640 #undef ANY11B
641 
642 // Any 1 to 1 with parameter.
643 #define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK)                         \
644   void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, T shuffler, int width) { \
645     SIMD_ALIGNED(uint8 temp[64 * 2]);                                         \
646     memset(temp, 0, 64); /* for msan */                                       \
647     int r = width & MASK;                                                     \
648     int n = width & ~MASK;                                                    \
649     if (n > 0) {                                                              \
650       ANY_SIMD(src_ptr, dst_ptr, shuffler, n);                                \
651     }                                                                         \
652     memcpy(temp, src_ptr + n * SBPP, r * SBPP);                               \
653     ANY_SIMD(temp, temp + 64, shuffler, MASK + 1);                            \
654     memcpy(dst_ptr + n * BPP, temp + 64, r * BPP);                            \
655   }
656 
657 #if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
658 ANY11P(ARGBToRGB565DitherRow_Any_SSE2,
659        ARGBToRGB565DitherRow_SSE2,
660        const uint32,
661        4,
662        2,
663        3)
664 #endif
665 #if defined(HAS_ARGBTORGB565DITHERROW_AVX2)
666 ANY11P(ARGBToRGB565DitherRow_Any_AVX2,
667        ARGBToRGB565DitherRow_AVX2,
668        const uint32,
669        4,
670        2,
671        7)
672 #endif
673 #if defined(HAS_ARGBTORGB565DITHERROW_NEON)
674 ANY11P(ARGBToRGB565DitherRow_Any_NEON,
675        ARGBToRGB565DitherRow_NEON,
676        const uint32,
677        4,
678        2,
679        7)
680 #endif
681 #if defined(HAS_ARGBTORGB565DITHERROW_MSA)
682 ANY11P(ARGBToRGB565DitherRow_Any_MSA,
683        ARGBToRGB565DitherRow_MSA,
684        const uint32,
685        4,
686        2,
687        7)
688 #endif
689 #ifdef HAS_ARGBSHUFFLEROW_SSE2
690 ANY11P(ARGBShuffleRow_Any_SSE2, ARGBShuffleRow_SSE2, const uint8*, 4, 4, 3)
691 #endif
692 #ifdef HAS_ARGBSHUFFLEROW_SSSE3
693 ANY11P(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3, const uint8*, 4, 4, 7)
694 #endif
695 #ifdef HAS_ARGBSHUFFLEROW_AVX2
696 ANY11P(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2, const uint8*, 4, 4, 15)
697 #endif
698 #ifdef HAS_ARGBSHUFFLEROW_NEON
699 ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8*, 4, 4, 3)
700 #endif
701 #ifdef HAS_ARGBSHUFFLEROW_MSA
702 ANY11P(ARGBShuffleRow_Any_MSA, ARGBShuffleRow_MSA, const uint8*, 4, 4, 7)
703 #endif
704 #undef ANY11P
705 
706 // Any 1 to 1 with parameter and shorts.  BPP measures in shorts.
707 #define ANY11P16(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK)            \
708   void NAMEANY(const uint16* src_ptr, uint16* dst_ptr, T shuffler, \
709                int width) {                                        \
710     SIMD_ALIGNED(uint16 temp[64 * 2]);                             \
711     memset(temp, 0, 64*sizeof(uint16)); /* for msan */             \
712     int r = width & MASK;                                          \
713     int n = width & ~MASK;                                         \
714     if (n > 0) {                                                   \
715       ANY_SIMD(src_ptr, dst_ptr, shuffler, n);                     \
716     }                                                              \
717     memcpy(temp, src_ptr + n * SBPP, r * SBPP);                    \
718     ANY_SIMD(temp, temp + 64, shuffler, MASK + 1);                 \
719     memcpy(dst_ptr + n * BPP, temp + 64, r * BPP);                 \
720   }
721 
722 #ifdef HAS_HALFFLOATROW_SSE2
723 ANY11P16(HalfFloatRow_Any_SSE2, HalfFloatRow_SSE2, float, 1, 1, 7)
724 #endif
725 #ifdef HAS_HALFFLOATROW_AVX2
726 ANY11P16(HalfFloatRow_Any_AVX2, HalfFloatRow_AVX2, float, 1, 1, 15)
727 #endif
728 #ifdef HAS_HALFFLOATROW_F16C
729 ANY11P16(HalfFloatRow_Any_F16C, HalfFloatRow_F16C, float, 1, 1, 15)
730 ANY11P16(HalfFloat1Row_Any_F16C, HalfFloat1Row_F16C, float, 1, 1, 15)
731 #endif
732 #ifdef HAS_HALFFLOATROW_NEON
733 ANY11P16(HalfFloatRow_Any_NEON, HalfFloatRow_NEON, float, 1, 1, 7)
734 ANY11P16(HalfFloat1Row_Any_NEON, HalfFloat1Row_NEON, float, 1, 1, 7)
735 #endif
736 #undef ANY11P16
737 
738 // Any 1 to 1 with yuvconstants
739 #define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK)               \
740   void NAMEANY(const uint8* src_ptr, uint8* dst_ptr,                      \
741                const struct YuvConstants* yuvconstants, int width) {      \
742     SIMD_ALIGNED(uint8 temp[128 * 2]);                                    \
743     memset(temp, 0, 128); /* for YUY2 and msan */                         \
744     int r = width & MASK;                                                 \
745     int n = width & ~MASK;                                                \
746     if (n > 0) {                                                          \
747       ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n);                        \
748     }                                                                     \
749     memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
750     ANY_SIMD(temp, temp + 128, yuvconstants, MASK + 1);                   \
751     memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                       \
752   }
753 #if defined(HAS_YUY2TOARGBROW_SSSE3)
754 ANY11C(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15)
755 ANY11C(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15)
756 #endif
757 #if defined(HAS_YUY2TOARGBROW_AVX2)
758 ANY11C(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31)
759 ANY11C(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31)
760 #endif
761 #if defined(HAS_YUY2TOARGBROW_NEON)
762 ANY11C(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7)
763 ANY11C(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7)
764 #endif
765 #if defined(HAS_YUY2TOARGBROW_MSA)
766 ANY11C(YUY2ToARGBRow_Any_MSA, YUY2ToARGBRow_MSA, 1, 4, 4, 7)
767 ANY11C(UYVYToARGBRow_Any_MSA, UYVYToARGBRow_MSA, 1, 4, 4, 7)
768 #endif
769 #undef ANY11C
770 
771 // Any 1 to 1 interpolate.  Takes 2 rows of source via stride.
772 #define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, MASK)                             \
773   void NAMEANY(uint8* dst_ptr, const uint8* src_ptr, ptrdiff_t src_stride_ptr, \
774                int width, int source_y_fraction) {                             \
775     SIMD_ALIGNED(uint8 temp[64 * 3]);                                          \
776     memset(temp, 0, 64 * 2); /* for msan */                                    \
777     int r = width & MASK;                                                      \
778     int n = width & ~MASK;                                                     \
779     if (n > 0) {                                                               \
780       ANY_SIMD(dst_ptr, src_ptr, src_stride_ptr, n, source_y_fraction);        \
781     }                                                                          \
782     memcpy(temp, src_ptr + n * SBPP, r * SBPP);                                \
783     memcpy(temp + 64, src_ptr + src_stride_ptr + n * SBPP, r * SBPP);          \
784     ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction);               \
785     memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                            \
786   }
787 
788 #ifdef HAS_INTERPOLATEROW_AVX2
789 ANY11T(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, 1, 1, 31)
790 #endif
791 #ifdef HAS_INTERPOLATEROW_SSSE3
792 ANY11T(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, 1, 1, 15)
793 #endif
794 #ifdef HAS_INTERPOLATEROW_NEON
795 ANY11T(InterpolateRow_Any_NEON, InterpolateRow_NEON, 1, 1, 15)
796 #endif
797 #ifdef HAS_INTERPOLATEROW_DSPR2
798 ANY11T(InterpolateRow_Any_DSPR2, InterpolateRow_DSPR2, 1, 1, 3)
799 #endif
800 #undef ANY11T
801 
802 // Any 1 to 1 mirror.
803 #define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK)                              \
804   void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) {         \
805     SIMD_ALIGNED(uint8 temp[64 * 2]);                                     \
806     memset(temp, 0, 64); /* for msan */                                   \
807     int r = width & MASK;                                                 \
808     int n = width & ~MASK;                                                \
809     if (n > 0) {                                                          \
810       ANY_SIMD(src_ptr + r * BPP, dst_ptr, n);                            \
811     }                                                                     \
812     memcpy(temp, src_ptr, r* BPP);                                        \
813     ANY_SIMD(temp, temp + 64, MASK + 1);                                  \
814     memcpy(dst_ptr + n * BPP, temp + 64 + (MASK + 1 - r) * BPP, r * BPP); \
815   }
816 
817 #ifdef HAS_MIRRORROW_AVX2
818 ANY11M(MirrorRow_Any_AVX2, MirrorRow_AVX2, 1, 31)
819 #endif
820 #ifdef HAS_MIRRORROW_SSSE3
821 ANY11M(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, 1, 15)
822 #endif
823 #ifdef HAS_MIRRORROW_NEON
824 ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 15)
825 #endif
826 #ifdef HAS_MIRRORROW_MSA
827 ANY11M(MirrorRow_Any_MSA, MirrorRow_MSA, 1, 63)
828 #endif
829 #ifdef HAS_ARGBMIRRORROW_AVX2
830 ANY11M(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, 4, 7)
831 #endif
832 #ifdef HAS_ARGBMIRRORROW_SSE2
833 ANY11M(ARGBMirrorRow_Any_SSE2, ARGBMirrorRow_SSE2, 4, 3)
834 #endif
835 #ifdef HAS_ARGBMIRRORROW_NEON
836 ANY11M(ARGBMirrorRow_Any_NEON, ARGBMirrorRow_NEON, 4, 3)
837 #endif
838 #ifdef HAS_ARGBMIRRORROW_MSA
839 ANY11M(ARGBMirrorRow_Any_MSA, ARGBMirrorRow_MSA, 4, 15)
840 #endif
841 #undef ANY11M
842 
843 // Any 1 plane. (memset)
844 #define ANY1(NAMEANY, ANY_SIMD, T, BPP, MASK)      \
845   void NAMEANY(uint8* dst_ptr, T v32, int width) { \
846     SIMD_ALIGNED(uint8 temp[64]);                  \
847     int r = width & MASK;                          \
848     int n = width & ~MASK;                         \
849     if (n > 0) {                                   \
850       ANY_SIMD(dst_ptr, v32, n);                   \
851     }                                              \
852     ANY_SIMD(temp, v32, MASK + 1);                 \
853     memcpy(dst_ptr + n * BPP, temp, r * BPP);      \
854   }
855 
856 #ifdef HAS_SETROW_X86
857 ANY1(SetRow_Any_X86, SetRow_X86, uint8, 1, 3)
858 #endif
859 #ifdef HAS_SETROW_NEON
860 ANY1(SetRow_Any_NEON, SetRow_NEON, uint8, 1, 15)
861 #endif
862 #ifdef HAS_ARGBSETROW_NEON
863 ANY1(ARGBSetRow_Any_NEON, ARGBSetRow_NEON, uint32, 4, 3)
864 #endif
865 #undef ANY1
866 
867 // Any 1 to 2.  Outputs UV planes.
868 #define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK)                \
869   void NAMEANY(const uint8* src_ptr, uint8* dst_u, uint8* dst_v, int width) { \
870     SIMD_ALIGNED(uint8 temp[128 * 3]);                                        \
871     memset(temp, 0, 128); /* for msan */                                      \
872     int r = width & MASK;                                                     \
873     int n = width & ~MASK;                                                    \
874     if (n > 0) {                                                              \
875       ANY_SIMD(src_ptr, dst_u, dst_v, n);                                     \
876     }                                                                         \
877     memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP);       \
878     ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1);                         \
879     memcpy(dst_u + (n >> DUVSHIFT), temp + 128, SS(r, DUVSHIFT));             \
880     memcpy(dst_v + (n >> DUVSHIFT), temp + 256, SS(r, DUVSHIFT));             \
881   }
882 
883 #ifdef HAS_SPLITUVROW_SSE2
884 ANY12(SplitUVRow_Any_SSE2, SplitUVRow_SSE2, 0, 2, 0, 15)
885 #endif
886 #ifdef HAS_SPLITUVROW_AVX2
887 ANY12(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, 0, 2, 0, 31)
888 #endif
889 #ifdef HAS_SPLITUVROW_NEON
890 ANY12(SplitUVRow_Any_NEON, SplitUVRow_NEON, 0, 2, 0, 15)
891 #endif
892 #ifdef HAS_SPLITUVROW_DSPR2
893 ANY12(SplitUVRow_Any_DSPR2, SplitUVRow_DSPR2, 0, 2, 0, 15)
894 #endif
895 #ifdef HAS_ARGBTOUV444ROW_SSSE3
896 ANY12(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_SSSE3, 0, 4, 0, 15)
897 #endif
898 #ifdef HAS_YUY2TOUV422ROW_AVX2
899 ANY12(YUY2ToUV422Row_Any_AVX2, YUY2ToUV422Row_AVX2, 1, 4, 1, 31)
900 ANY12(UYVYToUV422Row_Any_AVX2, UYVYToUV422Row_AVX2, 1, 4, 1, 31)
901 #endif
902 #ifdef HAS_YUY2TOUV422ROW_SSE2
903 ANY12(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_SSE2, 1, 4, 1, 15)
904 ANY12(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_SSE2, 1, 4, 1, 15)
905 #endif
906 #ifdef HAS_YUY2TOUV422ROW_NEON
907 ANY12(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON, 0, 4, 0, 7)
908 ANY12(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON, 1, 4, 1, 15)
909 ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15)
910 #endif
911 #ifdef HAS_YUY2TOUV422ROW_MSA
912 ANY12(ARGBToUV444Row_Any_MSA, ARGBToUV444Row_MSA, 0, 4, 0, 15)
913 ANY12(YUY2ToUV422Row_Any_MSA, YUY2ToUV422Row_MSA, 1, 4, 1, 31)
914 ANY12(UYVYToUV422Row_Any_MSA, UYVYToUV422Row_MSA, 1, 4, 1, 31)
915 #endif
916 #undef ANY12
917 
918 // Any 1 to 2 with source stride (2 rows of source).  Outputs UV planes.
919 // 128 byte row allows for 32 avx ARGB pixels.
920 #define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK)                        \
921   void NAMEANY(const uint8* src_ptr, int src_stride_ptr, uint8* dst_u,       \
922                uint8* dst_v, int width) {                                    \
923     SIMD_ALIGNED(uint8 temp[128 * 4]);                                       \
924     memset(temp, 0, 128 * 2); /* for msan */                                 \
925     int r = width & MASK;                                                    \
926     int n = width & ~MASK;                                                   \
927     if (n > 0) {                                                             \
928       ANY_SIMD(src_ptr, src_stride_ptr, dst_u, dst_v, n);                    \
929     }                                                                        \
930     memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP);      \
931     memcpy(temp + 128, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP,      \
932            SS(r, UVSHIFT) * BPP);                                            \
933     if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
934       memcpy(temp + SS(r, UVSHIFT) * BPP, temp + SS(r, UVSHIFT) * BPP - BPP, \
935              BPP);                                                           \
936       memcpy(temp + 128 + SS(r, UVSHIFT) * BPP,                              \
937              temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP);                  \
938     }                                                                        \
939     ANY_SIMD(temp, 128, temp + 256, temp + 384, MASK + 1);                   \
940     memcpy(dst_u + (n >> 1), temp + 256, SS(r, 1));                          \
941     memcpy(dst_v + (n >> 1), temp + 384, SS(r, 1));                          \
942   }
943 
944 #ifdef HAS_ARGBTOUVROW_AVX2
945 ANY12S(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, 0, 4, 31)
946 #endif
947 #ifdef HAS_ARGBTOUVJROW_AVX2
948 ANY12S(ARGBToUVJRow_Any_AVX2, ARGBToUVJRow_AVX2, 0, 4, 31)
949 #endif
950 #ifdef HAS_ARGBTOUVROW_SSSE3
951 ANY12S(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_SSSE3, 0, 4, 15)
952 ANY12S(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_SSSE3, 0, 4, 15)
953 ANY12S(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_SSSE3, 0, 4, 15)
954 ANY12S(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_SSSE3, 0, 4, 15)
955 ANY12S(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_SSSE3, 0, 4, 15)
956 #endif
957 #ifdef HAS_YUY2TOUVROW_AVX2
958 ANY12S(YUY2ToUVRow_Any_AVX2, YUY2ToUVRow_AVX2, 1, 4, 31)
959 ANY12S(UYVYToUVRow_Any_AVX2, UYVYToUVRow_AVX2, 1, 4, 31)
960 #endif
961 #ifdef HAS_YUY2TOUVROW_SSE2
962 ANY12S(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_SSE2, 1, 4, 15)
963 ANY12S(UYVYToUVRow_Any_SSE2, UYVYToUVRow_SSE2, 1, 4, 15)
964 #endif
965 #ifdef HAS_ARGBTOUVROW_NEON
966 ANY12S(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, 0, 4, 15)
967 #endif
968 #ifdef HAS_ARGBTOUVROW_MSA
969 ANY12S(ARGBToUVRow_Any_MSA, ARGBToUVRow_MSA, 0, 4, 31)
970 #endif
971 #ifdef HAS_ARGBTOUVJROW_NEON
972 ANY12S(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, 0, 4, 15)
973 #endif
974 #ifdef HAS_ARGBTOUVJROW_MSA
975 ANY12S(ARGBToUVJRow_Any_MSA, ARGBToUVJRow_MSA, 0, 4, 31)
976 #endif
977 #ifdef HAS_BGRATOUVROW_NEON
978 ANY12S(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, 0, 4, 15)
979 #endif
980 #ifdef HAS_BGRATOUVROW_MSA
981 ANY12S(BGRAToUVRow_Any_MSA, BGRAToUVRow_MSA, 0, 4, 31)
982 #endif
983 #ifdef HAS_ABGRTOUVROW_NEON
984 ANY12S(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, 0, 4, 15)
985 #endif
986 #ifdef HAS_ABGRTOUVROW_MSA
987 ANY12S(ABGRToUVRow_Any_MSA, ABGRToUVRow_MSA, 0, 4, 31)
988 #endif
989 #ifdef HAS_RGBATOUVROW_NEON
990 ANY12S(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, 0, 4, 15)
991 #endif
992 #ifdef HAS_RGBATOUVROW_MSA
993 ANY12S(RGBAToUVRow_Any_MSA, RGBAToUVRow_MSA, 0, 4, 31)
994 #endif
995 #ifdef HAS_RGB24TOUVROW_NEON
996 ANY12S(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, 0, 3, 15)
997 #endif
998 #ifdef HAS_RGB24TOUVROW_MSA
999 ANY12S(RGB24ToUVRow_Any_MSA, RGB24ToUVRow_MSA, 0, 3, 15)
1000 #endif
1001 #ifdef HAS_RAWTOUVROW_NEON
1002 ANY12S(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, 0, 3, 15)
1003 #endif
1004 #ifdef HAS_RAWTOUVROW_MSA
1005 ANY12S(RAWToUVRow_Any_MSA, RAWToUVRow_MSA, 0, 3, 15)
1006 #endif
1007 #ifdef HAS_RGB565TOUVROW_NEON
1008 ANY12S(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, 0, 2, 15)
1009 #endif
1010 #ifdef HAS_RGB565TOUVROW_MSA
1011 ANY12S(RGB565ToUVRow_Any_MSA, RGB565ToUVRow_MSA, 0, 2, 15)
1012 #endif
1013 #ifdef HAS_ARGB1555TOUVROW_NEON
1014 ANY12S(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, 0, 2, 15)
1015 #endif
1016 #ifdef HAS_ARGB1555TOUVROW_MSA
1017 ANY12S(ARGB1555ToUVRow_Any_MSA, ARGB1555ToUVRow_MSA, 0, 2, 15)
1018 #endif
1019 #ifdef HAS_ARGB4444TOUVROW_NEON
1020 ANY12S(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, 0, 2, 15)
1021 #endif
1022 #ifdef HAS_YUY2TOUVROW_NEON
1023 ANY12S(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, 1, 4, 15)
1024 #endif
1025 #ifdef HAS_UYVYTOUVROW_NEON
1026 ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15)
1027 #endif
1028 #ifdef HAS_BGRATOUVROW_DSPR2
1029 ANY12S(BGRAToUVRow_Any_DSPR2, BGRAToUVRow_DSPR2, 0, 4, 15)
1030 #endif
1031 #ifdef HAS_ABGRTOUVROW_DSPR2
1032 ANY12S(ABGRToUVRow_Any_DSPR2, ABGRToUVRow_DSPR2, 0, 4, 15)
1033 #endif
1034 #ifdef HAS_RGBATOUVROW_DSPR2
1035 ANY12S(RGBAToUVRow_Any_DSPR2, RGBAToUVRow_DSPR2, 0, 4, 15)
1036 #endif
1037 #ifdef HAS_ARGBTOUVROW_DSPR2
1038 ANY12S(ARGBToUVRow_Any_DSPR2, ARGBToUVRow_DSPR2, 0, 4, 15)
1039 #endif
1040 #ifdef HAS_YUY2TOUVROW_MSA
1041 ANY12S(YUY2ToUVRow_Any_MSA, YUY2ToUVRow_MSA, 1, 4, 31)
1042 #endif
1043 #ifdef HAS_UYVYTOUVROW_MSA
1044 ANY12S(UYVYToUVRow_Any_MSA, UYVYToUVRow_MSA, 1, 4, 31)
1045 #endif
1046 #undef ANY12S
1047 
1048 #ifdef __cplusplus
1049 }  // extern "C"
1050 }  // namespace libyuv
1051 #endif
1052