1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/planar_functions.h"
12
13 #include <assert.h>
14 #include <string.h> // for memset()
15
16 #include "libyuv/cpu_id.h"
17 #ifdef HAVE_JPEG
18 #include "libyuv/mjpeg_decoder.h"
19 #endif
20 #include "libyuv/row.h"
21 #include "libyuv/scale_row.h" // for ScaleRowDown2
22
23 #ifdef __cplusplus
24 namespace libyuv {
25 extern "C" {
26 #endif
27
28 // Copy a plane of data
29 LIBYUV_API
CopyPlane(const uint8_t * src_y,int src_stride_y,uint8_t * dst_y,int dst_stride_y,int width,int height)30 void CopyPlane(const uint8_t* src_y,
31 int src_stride_y,
32 uint8_t* dst_y,
33 int dst_stride_y,
34 int width,
35 int height) {
36 int y;
37 void (*CopyRow)(const uint8_t* src, uint8_t* dst, int width) = CopyRow_C;
38 // Negative height means invert the image.
39 if (height < 0) {
40 height = -height;
41 dst_y = dst_y + (height - 1) * dst_stride_y;
42 dst_stride_y = -dst_stride_y;
43 }
44 // Coalesce rows.
45 if (src_stride_y == width && dst_stride_y == width) {
46 width *= height;
47 height = 1;
48 src_stride_y = dst_stride_y = 0;
49 }
50 // Nothing to do.
51 if (src_y == dst_y && src_stride_y == dst_stride_y) {
52 return;
53 }
54
55 #if defined(HAS_COPYROW_SSE2)
56 if (TestCpuFlag(kCpuHasSSE2)) {
57 CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
58 }
59 #endif
60 #if defined(HAS_COPYROW_AVX)
61 if (TestCpuFlag(kCpuHasAVX)) {
62 CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
63 }
64 #endif
65 #if defined(HAS_COPYROW_ERMS)
66 if (TestCpuFlag(kCpuHasERMS)) {
67 CopyRow = CopyRow_ERMS;
68 }
69 #endif
70 #if defined(HAS_COPYROW_NEON)
71 if (TestCpuFlag(kCpuHasNEON)) {
72 CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
73 }
74 #endif
75
76 // Copy plane
77 for (y = 0; y < height; ++y) {
78 CopyRow(src_y, dst_y, width);
79 src_y += src_stride_y;
80 dst_y += dst_stride_y;
81 }
82 }
83
84 // TODO(fbarchard): Consider support for negative height.
85 // TODO(fbarchard): Consider stride measured in bytes.
86 LIBYUV_API
CopyPlane_16(const uint16_t * src_y,int src_stride_y,uint16_t * dst_y,int dst_stride_y,int width,int height)87 void CopyPlane_16(const uint16_t* src_y,
88 int src_stride_y,
89 uint16_t* dst_y,
90 int dst_stride_y,
91 int width,
92 int height) {
93 int y;
94 void (*CopyRow)(const uint16_t* src, uint16_t* dst, int width) = CopyRow_16_C;
95 // Coalesce rows.
96 if (src_stride_y == width && dst_stride_y == width) {
97 width *= height;
98 height = 1;
99 src_stride_y = dst_stride_y = 0;
100 }
101 #if defined(HAS_COPYROW_16_SSE2)
102 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32)) {
103 CopyRow = CopyRow_16_SSE2;
104 }
105 #endif
106 #if defined(HAS_COPYROW_16_ERMS)
107 if (TestCpuFlag(kCpuHasERMS)) {
108 CopyRow = CopyRow_16_ERMS;
109 }
110 #endif
111 #if defined(HAS_COPYROW_16_NEON)
112 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
113 CopyRow = CopyRow_16_NEON;
114 }
115 #endif
116
117 // Copy plane
118 for (y = 0; y < height; ++y) {
119 CopyRow(src_y, dst_y, width);
120 src_y += src_stride_y;
121 dst_y += dst_stride_y;
122 }
123 }
124
125 // Convert a plane of 16 bit data to 8 bit
126 LIBYUV_API
Convert16To8Plane(const uint16_t * src_y,int src_stride_y,uint8_t * dst_y,int dst_stride_y,int scale,int width,int height)127 void Convert16To8Plane(const uint16_t* src_y,
128 int src_stride_y,
129 uint8_t* dst_y,
130 int dst_stride_y,
131 int scale, // 16384 for 10 bits
132 int width,
133 int height) {
134 int y;
135 void (*Convert16To8Row)(const uint16_t* src_y, uint8_t* dst_y, int scale,
136 int width) = Convert16To8Row_C;
137
138 // Negative height means invert the image.
139 if (height < 0) {
140 height = -height;
141 dst_y = dst_y + (height - 1) * dst_stride_y;
142 dst_stride_y = -dst_stride_y;
143 }
144 // Coalesce rows.
145 if (src_stride_y == width && dst_stride_y == width) {
146 width *= height;
147 height = 1;
148 src_stride_y = dst_stride_y = 0;
149 }
150 #if defined(HAS_CONVERT16TO8ROW_SSSE3)
151 if (TestCpuFlag(kCpuHasSSSE3)) {
152 Convert16To8Row = Convert16To8Row_Any_SSSE3;
153 if (IS_ALIGNED(width, 16)) {
154 Convert16To8Row = Convert16To8Row_SSSE3;
155 }
156 }
157 #endif
158 #if defined(HAS_CONVERT16TO8ROW_AVX2)
159 if (TestCpuFlag(kCpuHasAVX2)) {
160 Convert16To8Row = Convert16To8Row_Any_AVX2;
161 if (IS_ALIGNED(width, 32)) {
162 Convert16To8Row = Convert16To8Row_AVX2;
163 }
164 }
165 #endif
166
167 // Convert plane
168 for (y = 0; y < height; ++y) {
169 Convert16To8Row(src_y, dst_y, scale, width);
170 src_y += src_stride_y;
171 dst_y += dst_stride_y;
172 }
173 }
174
175 // Convert a plane of 8 bit data to 16 bit
176 LIBYUV_API
Convert8To16Plane(const uint8_t * src_y,int src_stride_y,uint16_t * dst_y,int dst_stride_y,int scale,int width,int height)177 void Convert8To16Plane(const uint8_t* src_y,
178 int src_stride_y,
179 uint16_t* dst_y,
180 int dst_stride_y,
181 int scale, // 16384 for 10 bits
182 int width,
183 int height) {
184 int y;
185 void (*Convert8To16Row)(const uint8_t* src_y, uint16_t* dst_y, int scale,
186 int width) = Convert8To16Row_C;
187
188 // Negative height means invert the image.
189 if (height < 0) {
190 height = -height;
191 dst_y = dst_y + (height - 1) * dst_stride_y;
192 dst_stride_y = -dst_stride_y;
193 }
194 // Coalesce rows.
195 if (src_stride_y == width && dst_stride_y == width) {
196 width *= height;
197 height = 1;
198 src_stride_y = dst_stride_y = 0;
199 }
200 #if defined(HAS_CONVERT8TO16ROW_SSE2)
201 if (TestCpuFlag(kCpuHasSSE2)) {
202 Convert8To16Row = Convert8To16Row_Any_SSE2;
203 if (IS_ALIGNED(width, 16)) {
204 Convert8To16Row = Convert8To16Row_SSE2;
205 }
206 }
207 #endif
208 #if defined(HAS_CONVERT8TO16ROW_AVX2)
209 if (TestCpuFlag(kCpuHasAVX2)) {
210 Convert8To16Row = Convert8To16Row_Any_AVX2;
211 if (IS_ALIGNED(width, 32)) {
212 Convert8To16Row = Convert8To16Row_AVX2;
213 }
214 }
215 #endif
216
217 // Convert plane
218 for (y = 0; y < height; ++y) {
219 Convert8To16Row(src_y, dst_y, scale, width);
220 src_y += src_stride_y;
221 dst_y += dst_stride_y;
222 }
223 }
224
225 // Copy I422.
226 LIBYUV_API
I422Copy(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)227 int I422Copy(const uint8_t* src_y,
228 int src_stride_y,
229 const uint8_t* src_u,
230 int src_stride_u,
231 const uint8_t* src_v,
232 int src_stride_v,
233 uint8_t* dst_y,
234 int dst_stride_y,
235 uint8_t* dst_u,
236 int dst_stride_u,
237 uint8_t* dst_v,
238 int dst_stride_v,
239 int width,
240 int height) {
241 int halfwidth = (width + 1) >> 1;
242 if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
243 return -1;
244 }
245 // Negative height means invert the image.
246 if (height < 0) {
247 height = -height;
248 src_y = src_y + (height - 1) * src_stride_y;
249 src_u = src_u + (height - 1) * src_stride_u;
250 src_v = src_v + (height - 1) * src_stride_v;
251 src_stride_y = -src_stride_y;
252 src_stride_u = -src_stride_u;
253 src_stride_v = -src_stride_v;
254 }
255
256 if (dst_y) {
257 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
258 }
259 CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
260 CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
261 return 0;
262 }
263
264 // Copy I444.
265 LIBYUV_API
I444Copy(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)266 int I444Copy(const uint8_t* src_y,
267 int src_stride_y,
268 const uint8_t* src_u,
269 int src_stride_u,
270 const uint8_t* src_v,
271 int src_stride_v,
272 uint8_t* dst_y,
273 int dst_stride_y,
274 uint8_t* dst_u,
275 int dst_stride_u,
276 uint8_t* dst_v,
277 int dst_stride_v,
278 int width,
279 int height) {
280 if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
281 return -1;
282 }
283 // Negative height means invert the image.
284 if (height < 0) {
285 height = -height;
286 src_y = src_y + (height - 1) * src_stride_y;
287 src_u = src_u + (height - 1) * src_stride_u;
288 src_v = src_v + (height - 1) * src_stride_v;
289 src_stride_y = -src_stride_y;
290 src_stride_u = -src_stride_u;
291 src_stride_v = -src_stride_v;
292 }
293
294 if (dst_y) {
295 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
296 }
297 CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
298 CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
299 return 0;
300 }
301
302 // Copy I400.
303 LIBYUV_API
I400ToI400(const uint8_t * src_y,int src_stride_y,uint8_t * dst_y,int dst_stride_y,int width,int height)304 int I400ToI400(const uint8_t* src_y,
305 int src_stride_y,
306 uint8_t* dst_y,
307 int dst_stride_y,
308 int width,
309 int height) {
310 if (!src_y || !dst_y || width <= 0 || height == 0) {
311 return -1;
312 }
313 // Negative height means invert the image.
314 if (height < 0) {
315 height = -height;
316 src_y = src_y + (height - 1) * src_stride_y;
317 src_stride_y = -src_stride_y;
318 }
319 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
320 return 0;
321 }
322
323 // Convert I420 to I400.
324 LIBYUV_API
I420ToI400(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_y,int dst_stride_y,int width,int height)325 int I420ToI400(const uint8_t* src_y,
326 int src_stride_y,
327 const uint8_t* src_u,
328 int src_stride_u,
329 const uint8_t* src_v,
330 int src_stride_v,
331 uint8_t* dst_y,
332 int dst_stride_y,
333 int width,
334 int height) {
335 (void)src_u;
336 (void)src_stride_u;
337 (void)src_v;
338 (void)src_stride_v;
339 if (!src_y || !dst_y || width <= 0 || height == 0) {
340 return -1;
341 }
342 // Negative height means invert the image.
343 if (height < 0) {
344 height = -height;
345 src_y = src_y + (height - 1) * src_stride_y;
346 src_stride_y = -src_stride_y;
347 }
348
349 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
350 return 0;
351 }
352
353 // Copy NV12. Supports inverting.
NV12Copy(const uint8_t * src_y,int src_stride_y,const uint8_t * src_uv,int src_stride_uv,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_uv,int dst_stride_uv,int width,int height)354 int NV12Copy(const uint8_t* src_y,
355 int src_stride_y,
356 const uint8_t* src_uv,
357 int src_stride_uv,
358 uint8_t* dst_y,
359 int dst_stride_y,
360 uint8_t* dst_uv,
361 int dst_stride_uv,
362 int width,
363 int height) {
364 if (!src_y || !dst_y || !src_uv || !dst_uv || width <= 0 || height == 0) {
365 return -1;
366 }
367
368 int halfwidth = (width + 1) >> 1;
369 int halfheight = (height + 1) >> 1;
370 // Negative height means invert the image.
371 if (height < 0) {
372 height = -height;
373 halfheight = (height + 1) >> 1;
374 src_y = src_y + (height - 1) * src_stride_y;
375 src_uv = src_uv + (halfheight - 1) * src_stride_uv;
376 src_stride_y = -src_stride_y;
377 src_stride_uv = -src_stride_uv;
378 }
379 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
380 CopyPlane(src_uv, src_stride_uv, dst_uv, dst_stride_uv, halfwidth * 2,
381 halfheight);
382 return 0;
383 }
384
385 // Copy NV21. Supports inverting.
NV21Copy(const uint8_t * src_y,int src_stride_y,const uint8_t * src_vu,int src_stride_vu,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_vu,int dst_stride_vu,int width,int height)386 int NV21Copy(const uint8_t* src_y,
387 int src_stride_y,
388 const uint8_t* src_vu,
389 int src_stride_vu,
390 uint8_t* dst_y,
391 int dst_stride_y,
392 uint8_t* dst_vu,
393 int dst_stride_vu,
394 int width,
395 int height) {
396 return NV12Copy(src_y, src_stride_y, src_vu, src_stride_vu, dst_y,
397 dst_stride_y, dst_vu, dst_stride_vu, width, height);
398 }
399
400 // Support function for NV12 etc UV channels.
401 // Width and height are plane sizes (typically half pixel width).
402 LIBYUV_API
SplitUVPlane(const uint8_t * src_uv,int src_stride_uv,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)403 void SplitUVPlane(const uint8_t* src_uv,
404 int src_stride_uv,
405 uint8_t* dst_u,
406 int dst_stride_u,
407 uint8_t* dst_v,
408 int dst_stride_v,
409 int width,
410 int height) {
411 int y;
412 void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
413 int width) = SplitUVRow_C;
414 // Negative height means invert the image.
415 if (height < 0) {
416 height = -height;
417 dst_u = dst_u + (height - 1) * dst_stride_u;
418 dst_v = dst_v + (height - 1) * dst_stride_v;
419 dst_stride_u = -dst_stride_u;
420 dst_stride_v = -dst_stride_v;
421 }
422 // Coalesce rows.
423 if (src_stride_uv == width * 2 && dst_stride_u == width &&
424 dst_stride_v == width) {
425 width *= height;
426 height = 1;
427 src_stride_uv = dst_stride_u = dst_stride_v = 0;
428 }
429 #if defined(HAS_SPLITUVROW_SSE2)
430 if (TestCpuFlag(kCpuHasSSE2)) {
431 SplitUVRow = SplitUVRow_Any_SSE2;
432 if (IS_ALIGNED(width, 16)) {
433 SplitUVRow = SplitUVRow_SSE2;
434 }
435 }
436 #endif
437 #if defined(HAS_SPLITUVROW_AVX2)
438 if (TestCpuFlag(kCpuHasAVX2)) {
439 SplitUVRow = SplitUVRow_Any_AVX2;
440 if (IS_ALIGNED(width, 32)) {
441 SplitUVRow = SplitUVRow_AVX2;
442 }
443 }
444 #endif
445 #if defined(HAS_SPLITUVROW_NEON)
446 if (TestCpuFlag(kCpuHasNEON)) {
447 SplitUVRow = SplitUVRow_Any_NEON;
448 if (IS_ALIGNED(width, 16)) {
449 SplitUVRow = SplitUVRow_NEON;
450 }
451 }
452 #endif
453 #if defined(HAS_SPLITUVROW_MMI)
454 if (TestCpuFlag(kCpuHasMMI)) {
455 SplitUVRow = SplitUVRow_Any_MMI;
456 if (IS_ALIGNED(width, 8)) {
457 SplitUVRow = SplitUVRow_MMI;
458 }
459 }
460 #endif
461 #if defined(HAS_SPLITUVROW_MSA)
462 if (TestCpuFlag(kCpuHasMSA)) {
463 SplitUVRow = SplitUVRow_Any_MSA;
464 if (IS_ALIGNED(width, 32)) {
465 SplitUVRow = SplitUVRow_MSA;
466 }
467 }
468 #endif
469
470 for (y = 0; y < height; ++y) {
471 // Copy a row of UV.
472 SplitUVRow(src_uv, dst_u, dst_v, width);
473 dst_u += dst_stride_u;
474 dst_v += dst_stride_v;
475 src_uv += src_stride_uv;
476 }
477 }
478
479 LIBYUV_API
MergeUVPlane(const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_uv,int dst_stride_uv,int width,int height)480 void MergeUVPlane(const uint8_t* src_u,
481 int src_stride_u,
482 const uint8_t* src_v,
483 int src_stride_v,
484 uint8_t* dst_uv,
485 int dst_stride_uv,
486 int width,
487 int height) {
488 int y;
489 void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v,
490 uint8_t* dst_uv, int width) = MergeUVRow_C;
491 // Negative height means invert the image.
492 if (height < 0) {
493 height = -height;
494 dst_uv = dst_uv + (height - 1) * dst_stride_uv;
495 dst_stride_uv = -dst_stride_uv;
496 }
497 // Coalesce rows.
498 if (src_stride_u == width && src_stride_v == width &&
499 dst_stride_uv == width * 2) {
500 width *= height;
501 height = 1;
502 src_stride_u = src_stride_v = dst_stride_uv = 0;
503 }
504 #if defined(HAS_MERGEUVROW_SSE2)
505 if (TestCpuFlag(kCpuHasSSE2)) {
506 MergeUVRow = MergeUVRow_Any_SSE2;
507 if (IS_ALIGNED(width, 16)) {
508 MergeUVRow = MergeUVRow_SSE2;
509 }
510 }
511 #endif
512 #if defined(HAS_MERGEUVROW_AVX2)
513 if (TestCpuFlag(kCpuHasAVX2)) {
514 MergeUVRow = MergeUVRow_Any_AVX2;
515 if (IS_ALIGNED(width, 32)) {
516 MergeUVRow = MergeUVRow_AVX2;
517 }
518 }
519 #endif
520 #if defined(HAS_MERGEUVROW_NEON)
521 if (TestCpuFlag(kCpuHasNEON)) {
522 MergeUVRow = MergeUVRow_Any_NEON;
523 if (IS_ALIGNED(width, 16)) {
524 MergeUVRow = MergeUVRow_NEON;
525 }
526 }
527 #endif
528 #if defined(HAS_MERGEUVROW_MMI)
529 if (TestCpuFlag(kCpuHasMMI)) {
530 MergeUVRow = MergeUVRow_Any_MMI;
531 if (IS_ALIGNED(width, 8)) {
532 MergeUVRow = MergeUVRow_MMI;
533 }
534 }
535 #endif
536 #if defined(HAS_MERGEUVROW_MSA)
537 if (TestCpuFlag(kCpuHasMSA)) {
538 MergeUVRow = MergeUVRow_Any_MSA;
539 if (IS_ALIGNED(width, 16)) {
540 MergeUVRow = MergeUVRow_MSA;
541 }
542 }
543 #endif
544
545 for (y = 0; y < height; ++y) {
546 // Merge a row of U and V into a row of UV.
547 MergeUVRow(src_u, src_v, dst_uv, width);
548 src_u += src_stride_u;
549 src_v += src_stride_v;
550 dst_uv += dst_stride_uv;
551 }
552 }
553
554 // Support function for P010 etc UV channels.
555 // Width and height are plane sizes (typically half pixel width).
556 LIBYUV_API
SplitUVPlane_16(const uint16_t * src_uv,int src_stride_uv,uint16_t * dst_u,int dst_stride_u,uint16_t * dst_v,int dst_stride_v,int width,int height,int depth)557 void SplitUVPlane_16(const uint16_t* src_uv,
558 int src_stride_uv,
559 uint16_t* dst_u,
560 int dst_stride_u,
561 uint16_t* dst_v,
562 int dst_stride_v,
563 int width,
564 int height,
565 int depth) {
566 int y;
567 void (*SplitUVRow_16)(const uint16_t* src_uv, uint16_t* dst_u,
568 uint16_t* dst_v, int depth, int width) =
569 SplitUVRow_16_C;
570 // Negative height means invert the image.
571 if (height < 0) {
572 height = -height;
573 dst_u = dst_u + (height - 1) * dst_stride_u;
574 dst_v = dst_v + (height - 1) * dst_stride_v;
575 dst_stride_u = -dst_stride_u;
576 dst_stride_v = -dst_stride_v;
577 }
578 // Coalesce rows.
579 if (src_stride_uv == width * 2 && dst_stride_u == width &&
580 dst_stride_v == width) {
581 width *= height;
582 height = 1;
583 src_stride_uv = dst_stride_u = dst_stride_v = 0;
584 }
585 #if defined(HAS_SPLITUVROW_16_AVX2)
586 if (TestCpuFlag(kCpuHasAVX2)) {
587 SplitUVRow_16 = SplitUVRow_16_Any_AVX2;
588 if (IS_ALIGNED(width, 16)) {
589 SplitUVRow_16 = SplitUVRow_16_AVX2;
590 }
591 }
592 #endif
593 #if defined(HAS_SPLITUVROW_16_NEON)
594 if (TestCpuFlag(kCpuHasNEON)) {
595 SplitUVRow_16 = SplitUVRow_16_Any_NEON;
596 if (IS_ALIGNED(width, 8)) {
597 SplitUVRow_16 = SplitUVRow_16_NEON;
598 }
599 }
600 #endif
601
602 for (y = 0; y < height; ++y) {
603 // Copy a row of UV.
604 SplitUVRow_16(src_uv, dst_u, dst_v, depth, width);
605 dst_u += dst_stride_u;
606 dst_v += dst_stride_v;
607 src_uv += src_stride_uv;
608 }
609 }
610
611 LIBYUV_API
MergeUVPlane_16(const uint16_t * src_u,int src_stride_u,const uint16_t * src_v,int src_stride_v,uint16_t * dst_uv,int dst_stride_uv,int width,int height,int depth)612 void MergeUVPlane_16(const uint16_t* src_u,
613 int src_stride_u,
614 const uint16_t* src_v,
615 int src_stride_v,
616 uint16_t* dst_uv,
617 int dst_stride_uv,
618 int width,
619 int height,
620 int depth) {
621 int y;
622 void (*MergeUVRow_16)(const uint16_t* src_u, const uint16_t* src_v,
623 uint16_t* dst_uv, int depth, int width) =
624 MergeUVRow_16_C;
625 assert(depth >= 8);
626 assert(depth <= 16);
627 // Negative height means invert the image.
628 if (height < 0) {
629 height = -height;
630 dst_uv = dst_uv + (height - 1) * dst_stride_uv;
631 dst_stride_uv = -dst_stride_uv;
632 }
633 // Coalesce rows.
634 if (src_stride_u == width && src_stride_v == width &&
635 dst_stride_uv == width * 2) {
636 width *= height;
637 height = 1;
638 src_stride_u = src_stride_v = dst_stride_uv = 0;
639 }
640 #if defined(HAS_MERGEUVROW_16_AVX2)
641 if (TestCpuFlag(kCpuHasAVX2)) {
642 MergeUVRow_16 = MergeUVRow_16_Any_AVX2;
643 if (IS_ALIGNED(width, 16)) {
644 MergeUVRow_16 = MergeUVRow_16_AVX2;
645 }
646 }
647 #endif
648 #if defined(HAS_MERGEUVROW_16_NEON)
649 if (TestCpuFlag(kCpuHasNEON)) {
650 MergeUVRow_16 = MergeUVRow_16_Any_NEON;
651 if (IS_ALIGNED(width, 8)) {
652 MergeUVRow_16 = MergeUVRow_16_NEON;
653 }
654 }
655 #endif
656
657 for (y = 0; y < height; ++y) {
658 // Merge a row of U and V into a row of UV.
659 MergeUVRow_16(src_u, src_v, dst_uv, depth, width);
660 src_u += src_stride_u;
661 src_v += src_stride_v;
662 dst_uv += dst_stride_uv;
663 }
664 }
665
666 // Convert plane from lsb to msb
667 LIBYUV_API
ConvertToMSBPlane_16(const uint16_t * src_y,int src_stride_y,uint16_t * dst_y,int dst_stride_y,int width,int height,int depth)668 void ConvertToMSBPlane_16(const uint16_t* src_y,
669 int src_stride_y,
670 uint16_t* dst_y,
671 int dst_stride_y,
672 int width,
673 int height,
674 int depth) {
675 int y;
676 int scale = 1 << (16 - depth);
677 void (*MultiplyRow_16)(const uint16_t* src_y, uint16_t* dst_y, int scale,
678 int width) = MultiplyRow_16_C;
679 // Negative height means invert the image.
680 if (height < 0) {
681 height = -height;
682 dst_y = dst_y + (height - 1) * dst_stride_y;
683 dst_stride_y = -dst_stride_y;
684 }
685 // Coalesce rows.
686 if (src_stride_y == width && dst_stride_y == width) {
687 width *= height;
688 height = 1;
689 src_stride_y = dst_stride_y = 0;
690 }
691
692 #if defined(HAS_MULTIPLYROW_16_AVX2)
693 if (TestCpuFlag(kCpuHasAVX2)) {
694 MultiplyRow_16 = MultiplyRow_16_Any_AVX2;
695 if (IS_ALIGNED(width, 32)) {
696 MultiplyRow_16 = MultiplyRow_16_AVX2;
697 }
698 }
699 #endif
700 #if defined(HAS_MULTIPLYROW_16_NEON)
701 if (TestCpuFlag(kCpuHasNEON)) {
702 MultiplyRow_16 = MultiplyRow_16_Any_NEON;
703 if (IS_ALIGNED(width, 16)) {
704 MultiplyRow_16 = MultiplyRow_16_NEON;
705 }
706 }
707 #endif
708
709 for (y = 0; y < height; ++y) {
710 MultiplyRow_16(src_y, dst_y, scale, width);
711 src_y += src_stride_y;
712 dst_y += dst_stride_y;
713 }
714 }
715
716 // Convert plane from msb to lsb
717 LIBYUV_API
ConvertToLSBPlane_16(const uint16_t * src_y,int src_stride_y,uint16_t * dst_y,int dst_stride_y,int width,int height,int depth)718 void ConvertToLSBPlane_16(const uint16_t* src_y,
719 int src_stride_y,
720 uint16_t* dst_y,
721 int dst_stride_y,
722 int width,
723 int height,
724 int depth) {
725 int y;
726 int scale = 1 << depth;
727 void (*DivideRow)(const uint16_t* src_y, uint16_t* dst_y, int scale,
728 int width) = DivideRow_16_C;
729 // Negative height means invert the image.
730 if (height < 0) {
731 height = -height;
732 dst_y = dst_y + (height - 1) * dst_stride_y;
733 dst_stride_y = -dst_stride_y;
734 }
735 // Coalesce rows.
736 if (src_stride_y == width && dst_stride_y == width) {
737 width *= height;
738 height = 1;
739 src_stride_y = dst_stride_y = 0;
740 }
741
742 #if defined(HAS_DIVIDEROW_16_AVX2)
743 if (TestCpuFlag(kCpuHasAVX2)) {
744 DivideRow = DivideRow_16_Any_AVX2;
745 if (IS_ALIGNED(width, 32)) {
746 DivideRow = DivideRow_16_AVX2;
747 }
748 }
749 #endif
750 #if defined(HAS_DIVIDEROW_16_NEON)
751 if (TestCpuFlag(kCpuHasNEON)) {
752 DivideRow = DivideRow_16_Any_NEON;
753 if (IS_ALIGNED(width, 16)) {
754 DivideRow = DivideRow_16_NEON;
755 }
756 }
757 #endif
758
759 for (y = 0; y < height; ++y) {
760 DivideRow(src_y, dst_y, scale, width);
761 src_y += src_stride_y;
762 dst_y += dst_stride_y;
763 }
764 }
765
766 // Swap U and V channels in interleaved UV plane.
767 LIBYUV_API
SwapUVPlane(const uint8_t * src_uv,int src_stride_uv,uint8_t * dst_vu,int dst_stride_vu,int width,int height)768 void SwapUVPlane(const uint8_t* src_uv,
769 int src_stride_uv,
770 uint8_t* dst_vu,
771 int dst_stride_vu,
772 int width,
773 int height) {
774 int y;
775 void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) =
776 SwapUVRow_C;
777 // Negative height means invert the image.
778 if (height < 0) {
779 height = -height;
780 src_uv = src_uv + (height - 1) * src_stride_uv;
781 src_stride_uv = -src_stride_uv;
782 }
783 // Coalesce rows.
784 if (src_stride_uv == width * 2 && dst_stride_vu == width * 2) {
785 width *= height;
786 height = 1;
787 src_stride_uv = dst_stride_vu = 0;
788 }
789
790 #if defined(HAS_SWAPUVROW_SSSE3)
791 if (TestCpuFlag(kCpuHasSSSE3)) {
792 SwapUVRow = SwapUVRow_Any_SSSE3;
793 if (IS_ALIGNED(width, 16)) {
794 SwapUVRow = SwapUVRow_SSSE3;
795 }
796 }
797 #endif
798 #if defined(HAS_SWAPUVROW_AVX2)
799 if (TestCpuFlag(kCpuHasAVX2)) {
800 SwapUVRow = SwapUVRow_Any_AVX2;
801 if (IS_ALIGNED(width, 32)) {
802 SwapUVRow = SwapUVRow_AVX2;
803 }
804 }
805 #endif
806 #if defined(HAS_SWAPUVROW_NEON)
807 if (TestCpuFlag(kCpuHasNEON)) {
808 SwapUVRow = SwapUVRow_Any_NEON;
809 if (IS_ALIGNED(width, 16)) {
810 SwapUVRow = SwapUVRow_NEON;
811 }
812 }
813 #endif
814
815 for (y = 0; y < height; ++y) {
816 SwapUVRow(src_uv, dst_vu, width);
817 src_uv += src_stride_uv;
818 dst_vu += dst_stride_vu;
819 }
820 }
821
822 // Convert NV21 to NV12.
823 LIBYUV_API
NV21ToNV12(const uint8_t * src_y,int src_stride_y,const uint8_t * src_vu,int src_stride_vu,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_uv,int dst_stride_uv,int width,int height)824 int NV21ToNV12(const uint8_t* src_y,
825 int src_stride_y,
826 const uint8_t* src_vu,
827 int src_stride_vu,
828 uint8_t* dst_y,
829 int dst_stride_y,
830 uint8_t* dst_uv,
831 int dst_stride_uv,
832 int width,
833 int height) {
834 int halfwidth = (width + 1) >> 1;
835 int halfheight = (height + 1) >> 1;
836 if (!src_vu || !dst_uv || width <= 0 || height == 0) {
837 return -1;
838 }
839 if (dst_y) {
840 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
841 }
842
843 // Negative height means invert the image.
844 if (height < 0) {
845 height = -height;
846 halfheight = (height + 1) >> 1;
847 src_vu = src_vu + (halfheight - 1) * src_stride_vu;
848 src_stride_vu = -src_stride_vu;
849 }
850
851 SwapUVPlane(src_vu, src_stride_vu, dst_uv, dst_stride_uv, halfwidth,
852 halfheight);
853 return 0;
854 }
855
856 // Support function for NV12 etc RGB channels.
857 // Width and height are plane sizes (typically half pixel width).
858 LIBYUV_API
SplitRGBPlane(const uint8_t * src_rgb,int src_stride_rgb,uint8_t * dst_r,int dst_stride_r,uint8_t * dst_g,int dst_stride_g,uint8_t * dst_b,int dst_stride_b,int width,int height)859 void SplitRGBPlane(const uint8_t* src_rgb,
860 int src_stride_rgb,
861 uint8_t* dst_r,
862 int dst_stride_r,
863 uint8_t* dst_g,
864 int dst_stride_g,
865 uint8_t* dst_b,
866 int dst_stride_b,
867 int width,
868 int height) {
869 int y;
870 void (*SplitRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
871 uint8_t* dst_b, int width) = SplitRGBRow_C;
872 // Negative height means invert the image.
873 if (height < 0) {
874 height = -height;
875 dst_r = dst_r + (height - 1) * dst_stride_r;
876 dst_g = dst_g + (height - 1) * dst_stride_g;
877 dst_b = dst_b + (height - 1) * dst_stride_b;
878 dst_stride_r = -dst_stride_r;
879 dst_stride_g = -dst_stride_g;
880 dst_stride_b = -dst_stride_b;
881 }
882 // Coalesce rows.
883 if (src_stride_rgb == width * 3 && dst_stride_r == width &&
884 dst_stride_g == width && dst_stride_b == width) {
885 width *= height;
886 height = 1;
887 src_stride_rgb = dst_stride_r = dst_stride_g = dst_stride_b = 0;
888 }
889 #if defined(HAS_SPLITRGBROW_SSSE3)
890 if (TestCpuFlag(kCpuHasSSSE3)) {
891 SplitRGBRow = SplitRGBRow_Any_SSSE3;
892 if (IS_ALIGNED(width, 16)) {
893 SplitRGBRow = SplitRGBRow_SSSE3;
894 }
895 }
896 #endif
897 #if defined(HAS_SPLITRGBROW_MMI)
898 if (TestCpuFlag(kCpuHasMMI)) {
899 SplitRGBRow = SplitRGBRow_Any_MMI;
900 if (IS_ALIGNED(width, 4)) {
901 SplitRGBRow = SplitRGBRow_MMI;
902 }
903 }
904 #endif
905 #if defined(HAS_SPLITRGBROW_NEON)
906 if (TestCpuFlag(kCpuHasNEON)) {
907 SplitRGBRow = SplitRGBRow_Any_NEON;
908 if (IS_ALIGNED(width, 16)) {
909 SplitRGBRow = SplitRGBRow_NEON;
910 }
911 }
912 #endif
913
914 for (y = 0; y < height; ++y) {
915 // Copy a row of RGB.
916 SplitRGBRow(src_rgb, dst_r, dst_g, dst_b, width);
917 dst_r += dst_stride_r;
918 dst_g += dst_stride_g;
919 dst_b += dst_stride_b;
920 src_rgb += src_stride_rgb;
921 }
922 }
923
924 LIBYUV_API
MergeRGBPlane(const uint8_t * src_r,int src_stride_r,const uint8_t * src_g,int src_stride_g,const uint8_t * src_b,int src_stride_b,uint8_t * dst_rgb,int dst_stride_rgb,int width,int height)925 void MergeRGBPlane(const uint8_t* src_r,
926 int src_stride_r,
927 const uint8_t* src_g,
928 int src_stride_g,
929 const uint8_t* src_b,
930 int src_stride_b,
931 uint8_t* dst_rgb,
932 int dst_stride_rgb,
933 int width,
934 int height) {
935 int y;
936 void (*MergeRGBRow)(const uint8_t* src_r, const uint8_t* src_g,
937 const uint8_t* src_b, uint8_t* dst_rgb, int width) =
938 MergeRGBRow_C;
939 // Coalesce rows.
940 // Negative height means invert the image.
941 if (height < 0) {
942 height = -height;
943 dst_rgb = dst_rgb + (height - 1) * dst_stride_rgb;
944 dst_stride_rgb = -dst_stride_rgb;
945 }
946 // Coalesce rows.
947 if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
948 dst_stride_rgb == width * 3) {
949 width *= height;
950 height = 1;
951 src_stride_r = src_stride_g = src_stride_b = dst_stride_rgb = 0;
952 }
953 #if defined(HAS_MERGERGBROW_SSSE3)
954 if (TestCpuFlag(kCpuHasSSSE3)) {
955 MergeRGBRow = MergeRGBRow_Any_SSSE3;
956 if (IS_ALIGNED(width, 16)) {
957 MergeRGBRow = MergeRGBRow_SSSE3;
958 }
959 }
960 #endif
961 #if defined(HAS_MERGERGBROW_NEON)
962 if (TestCpuFlag(kCpuHasNEON)) {
963 MergeRGBRow = MergeRGBRow_Any_NEON;
964 if (IS_ALIGNED(width, 16)) {
965 MergeRGBRow = MergeRGBRow_NEON;
966 }
967 }
968 #endif
969 #if defined(HAS_MERGERGBROW_MMI)
970 if (TestCpuFlag(kCpuHasMMI)) {
971 MergeRGBRow = MergeRGBRow_Any_MMI;
972 if (IS_ALIGNED(width, 8)) {
973 MergeRGBRow = MergeRGBRow_MMI;
974 }
975 }
976 #endif
977
978 for (y = 0; y < height; ++y) {
979 // Merge a row of U and V into a row of RGB.
980 MergeRGBRow(src_r, src_g, src_b, dst_rgb, width);
981 src_r += src_stride_r;
982 src_g += src_stride_g;
983 src_b += src_stride_b;
984 dst_rgb += dst_stride_rgb;
985 }
986 }
987
988 LIBYUV_NOINLINE
SplitARGBPlaneAlpha(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_r,int dst_stride_r,uint8_t * dst_g,int dst_stride_g,uint8_t * dst_b,int dst_stride_b,uint8_t * dst_a,int dst_stride_a,int width,int height)989 void SplitARGBPlaneAlpha(const uint8_t* src_argb,
990 int src_stride_argb,
991 uint8_t* dst_r,
992 int dst_stride_r,
993 uint8_t* dst_g,
994 int dst_stride_g,
995 uint8_t* dst_b,
996 int dst_stride_b,
997 uint8_t* dst_a,
998 int dst_stride_a,
999 int width,
1000 int height) {
1001 int y;
1002 void (*SplitARGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
1003 uint8_t* dst_b, uint8_t* dst_a, int width) =
1004 SplitARGBRow_C;
1005
1006 assert(height > 0);
1007
1008 if (src_stride_argb == width * 4 && dst_stride_r == width &&
1009 dst_stride_g == width && dst_stride_b == width && dst_stride_a == width) {
1010 width *= height;
1011 height = 1;
1012 src_stride_argb = dst_stride_r = dst_stride_g = dst_stride_b =
1013 dst_stride_a = 0;
1014 }
1015
1016 #if defined(HAS_SPLITARGBROW_SSE2)
1017 if (TestCpuFlag(kCpuHasSSE2)) {
1018 SplitARGBRow = SplitARGBRow_Any_SSE2;
1019 if (IS_ALIGNED(width, 8)) {
1020 SplitARGBRow = SplitARGBRow_SSE2;
1021 }
1022 }
1023 #endif
1024 #if defined(HAS_SPLITARGBROW_SSSE3)
1025 if (TestCpuFlag(kCpuHasSSSE3)) {
1026 SplitARGBRow = SplitARGBRow_Any_SSSE3;
1027 if (IS_ALIGNED(width, 8)) {
1028 SplitARGBRow = SplitARGBRow_SSSE3;
1029 }
1030 }
1031 #endif
1032 #if defined(HAS_SPLITARGBROW_AVX2)
1033 if (TestCpuFlag(kCpuHasAVX2)) {
1034 SplitARGBRow = SplitARGBRow_Any_AVX2;
1035 if (IS_ALIGNED(width, 16)) {
1036 SplitARGBRow = SplitARGBRow_AVX2;
1037 }
1038 }
1039 #endif
1040 #if defined(HAS_SPLITARGBROW_NEON)
1041 if (TestCpuFlag(kCpuHasNEON)) {
1042 SplitARGBRow = SplitARGBRow_Any_NEON;
1043 if (IS_ALIGNED(width, 16)) {
1044 SplitARGBRow = SplitARGBRow_NEON;
1045 }
1046 }
1047 #endif
1048
1049 for (y = 0; y < height; ++y) {
1050 SplitARGBRow(src_argb, dst_r, dst_g, dst_b, dst_a, width);
1051 dst_r += dst_stride_r;
1052 dst_g += dst_stride_g;
1053 dst_b += dst_stride_b;
1054 dst_a += dst_stride_a;
1055 src_argb += src_stride_argb;
1056 }
1057 }
1058
1059 LIBYUV_NOINLINE
SplitARGBPlaneOpaque(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_r,int dst_stride_r,uint8_t * dst_g,int dst_stride_g,uint8_t * dst_b,int dst_stride_b,int width,int height)1060 void SplitARGBPlaneOpaque(const uint8_t* src_argb,
1061 int src_stride_argb,
1062 uint8_t* dst_r,
1063 int dst_stride_r,
1064 uint8_t* dst_g,
1065 int dst_stride_g,
1066 uint8_t* dst_b,
1067 int dst_stride_b,
1068 int width,
1069 int height) {
1070 int y;
1071 void (*SplitXRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
1072 uint8_t* dst_b, int width) = SplitXRGBRow_C;
1073 assert(height > 0);
1074
1075 if (src_stride_argb == width * 4 && dst_stride_r == width &&
1076 dst_stride_g == width && dst_stride_b == width) {
1077 width *= height;
1078 height = 1;
1079 src_stride_argb = dst_stride_r = dst_stride_g = dst_stride_b = 0;
1080 }
1081
1082 #if defined(HAS_SPLITXRGBROW_SSE2)
1083 if (TestCpuFlag(kCpuHasSSE2)) {
1084 SplitXRGBRow = SplitXRGBRow_Any_SSE2;
1085 if (IS_ALIGNED(width, 8)) {
1086 SplitXRGBRow = SplitXRGBRow_SSE2;
1087 }
1088 }
1089 #endif
1090 #if defined(HAS_SPLITXRGBROW_SSSE3)
1091 if (TestCpuFlag(kCpuHasSSSE3)) {
1092 SplitXRGBRow = SplitXRGBRow_Any_SSSE3;
1093 if (IS_ALIGNED(width, 8)) {
1094 SplitXRGBRow = SplitXRGBRow_SSSE3;
1095 }
1096 }
1097 #endif
1098 #if defined(HAS_SPLITXRGBROW_AVX2)
1099 if (TestCpuFlag(kCpuHasAVX2)) {
1100 SplitXRGBRow = SplitXRGBRow_Any_AVX2;
1101 if (IS_ALIGNED(width, 16)) {
1102 SplitXRGBRow = SplitXRGBRow_AVX2;
1103 }
1104 }
1105 #endif
1106 #if defined(HAS_SPLITXRGBROW_NEON)
1107 if (TestCpuFlag(kCpuHasNEON)) {
1108 SplitXRGBRow = SplitXRGBRow_Any_NEON;
1109 if (IS_ALIGNED(width, 16)) {
1110 SplitXRGBRow = SplitXRGBRow_NEON;
1111 }
1112 }
1113 #endif
1114
1115 for (y = 0; y < height; ++y) {
1116 SplitXRGBRow(src_argb, dst_r, dst_g, dst_b, width);
1117 dst_r += dst_stride_r;
1118 dst_g += dst_stride_g;
1119 dst_b += dst_stride_b;
1120 src_argb += src_stride_argb;
1121 }
1122 }
1123
1124 LIBYUV_API
SplitARGBPlane(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_r,int dst_stride_r,uint8_t * dst_g,int dst_stride_g,uint8_t * dst_b,int dst_stride_b,uint8_t * dst_a,int dst_stride_a,int width,int height)1125 void SplitARGBPlane(const uint8_t* src_argb,
1126 int src_stride_argb,
1127 uint8_t* dst_r,
1128 int dst_stride_r,
1129 uint8_t* dst_g,
1130 int dst_stride_g,
1131 uint8_t* dst_b,
1132 int dst_stride_b,
1133 uint8_t* dst_a,
1134 int dst_stride_a,
1135 int width,
1136 int height) {
1137 // Negative height means invert the image.
1138 if (height < 0) {
1139 height = -height;
1140 dst_r = dst_r + (height - 1) * dst_stride_r;
1141 dst_g = dst_g + (height - 1) * dst_stride_g;
1142 dst_b = dst_b + (height - 1) * dst_stride_b;
1143 dst_a = dst_a + (height - 1) * dst_stride_a;
1144 dst_stride_r = -dst_stride_r;
1145 dst_stride_g = -dst_stride_g;
1146 dst_stride_b = -dst_stride_b;
1147 dst_stride_a = -dst_stride_a;
1148 }
1149
1150 if (dst_a == NULL) {
1151 SplitARGBPlaneOpaque(src_argb, src_stride_argb, dst_r, dst_stride_r, dst_g,
1152 dst_stride_g, dst_b, dst_stride_b, width, height);
1153 } else {
1154 SplitARGBPlaneAlpha(src_argb, src_stride_argb, dst_r, dst_stride_r, dst_g,
1155 dst_stride_g, dst_b, dst_stride_b, dst_a, dst_stride_a,
1156 width, height);
1157 }
1158 }
1159
1160 LIBYUV_NOINLINE
MergeARGBPlaneAlpha(const uint8_t * src_r,int src_stride_r,const uint8_t * src_g,int src_stride_g,const uint8_t * src_b,int src_stride_b,const uint8_t * src_a,int src_stride_a,uint8_t * dst_argb,int dst_stride_argb,int width,int height)1161 void MergeARGBPlaneAlpha(const uint8_t* src_r,
1162 int src_stride_r,
1163 const uint8_t* src_g,
1164 int src_stride_g,
1165 const uint8_t* src_b,
1166 int src_stride_b,
1167 const uint8_t* src_a,
1168 int src_stride_a,
1169 uint8_t* dst_argb,
1170 int dst_stride_argb,
1171 int width,
1172 int height) {
1173 int y;
1174 void (*MergeARGBRow)(const uint8_t* src_r, const uint8_t* src_g,
1175 const uint8_t* src_b, const uint8_t* src_a,
1176 uint8_t* dst_argb, int width) = MergeARGBRow_C;
1177
1178 assert(height > 0);
1179
1180 if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
1181 src_stride_a == width && dst_stride_argb == width * 4) {
1182 width *= height;
1183 height = 1;
1184 src_stride_r = src_stride_g = src_stride_b = src_stride_a =
1185 dst_stride_argb = 0;
1186 }
1187 #if defined(HAS_MERGEARGBROW_SSE2)
1188 if (TestCpuFlag(kCpuHasSSE2)) {
1189 MergeARGBRow = MergeARGBRow_Any_SSE2;
1190 if (IS_ALIGNED(width, 8)) {
1191 MergeARGBRow = MergeARGBRow_SSE2;
1192 }
1193 }
1194 #endif
1195 #if defined(HAS_MERGEARGBROW_AVX2)
1196 if (TestCpuFlag(kCpuHasAVX2)) {
1197 MergeARGBRow = MergeARGBRow_Any_AVX2;
1198 if (IS_ALIGNED(width, 16)) {
1199 MergeARGBRow = MergeARGBRow_AVX2;
1200 }
1201 }
1202 #endif
1203 #if defined(HAS_MERGEARGBROW_NEON)
1204 if (TestCpuFlag(kCpuHasNEON)) {
1205 MergeARGBRow = MergeARGBRow_Any_NEON;
1206 if (IS_ALIGNED(width, 16)) {
1207 MergeARGBRow = MergeARGBRow_NEON;
1208 }
1209 }
1210 #endif
1211
1212 for (y = 0; y < height; ++y) {
1213 MergeARGBRow(src_r, src_g, src_b, src_a, dst_argb, width);
1214 src_r += src_stride_r;
1215 src_g += src_stride_g;
1216 src_b += src_stride_b;
1217 src_a += src_stride_a;
1218 dst_argb += dst_stride_argb;
1219 }
1220 }
1221
1222 LIBYUV_NOINLINE
MergeARGBPlaneOpaque(const uint8_t * src_r,int src_stride_r,const uint8_t * src_g,int src_stride_g,const uint8_t * src_b,int src_stride_b,uint8_t * dst_argb,int dst_stride_argb,int width,int height)1223 void MergeARGBPlaneOpaque(const uint8_t* src_r,
1224 int src_stride_r,
1225 const uint8_t* src_g,
1226 int src_stride_g,
1227 const uint8_t* src_b,
1228 int src_stride_b,
1229 uint8_t* dst_argb,
1230 int dst_stride_argb,
1231 int width,
1232 int height) {
1233 int y;
1234 void (*MergeXRGBRow)(const uint8_t* src_r, const uint8_t* src_g,
1235 const uint8_t* src_b, uint8_t* dst_argb, int width) =
1236 MergeXRGBRow_C;
1237
1238 assert(height > 0);
1239
1240 if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
1241 dst_stride_argb == width * 4) {
1242 width *= height;
1243 height = 1;
1244 src_stride_r = src_stride_g = src_stride_b = dst_stride_argb = 0;
1245 }
1246 #if defined(HAS_MERGEXRGBROW_SSE2)
1247 if (TestCpuFlag(kCpuHasSSE2)) {
1248 MergeXRGBRow = MergeXRGBRow_Any_SSE2;
1249 if (IS_ALIGNED(width, 8)) {
1250 MergeXRGBRow = MergeXRGBRow_SSE2;
1251 }
1252 }
1253 #endif
1254 #if defined(HAS_MERGEXRGBROW_AVX2)
1255 if (TestCpuFlag(kCpuHasAVX2)) {
1256 MergeXRGBRow = MergeXRGBRow_Any_AVX2;
1257 if (IS_ALIGNED(width, 16)) {
1258 MergeXRGBRow = MergeXRGBRow_AVX2;
1259 }
1260 }
1261 #endif
1262 #if defined(HAS_MERGEXRGBROW_NEON)
1263 if (TestCpuFlag(kCpuHasNEON)) {
1264 MergeXRGBRow = MergeXRGBRow_Any_NEON;
1265 if (IS_ALIGNED(width, 16)) {
1266 MergeXRGBRow = MergeXRGBRow_NEON;
1267 }
1268 }
1269 #endif
1270
1271 for (y = 0; y < height; ++y) {
1272 MergeXRGBRow(src_r, src_g, src_b, dst_argb, width);
1273 src_r += src_stride_r;
1274 src_g += src_stride_g;
1275 src_b += src_stride_b;
1276 dst_argb += dst_stride_argb;
1277 }
1278 }
1279
1280 LIBYUV_API
MergeARGBPlane(const uint8_t * src_r,int src_stride_r,const uint8_t * src_g,int src_stride_g,const uint8_t * src_b,int src_stride_b,const uint8_t * src_a,int src_stride_a,uint8_t * dst_argb,int dst_stride_argb,int width,int height)1281 void MergeARGBPlane(const uint8_t* src_r,
1282 int src_stride_r,
1283 const uint8_t* src_g,
1284 int src_stride_g,
1285 const uint8_t* src_b,
1286 int src_stride_b,
1287 const uint8_t* src_a,
1288 int src_stride_a,
1289 uint8_t* dst_argb,
1290 int dst_stride_argb,
1291 int width,
1292 int height) {
1293 // Negative height means invert the image.
1294 if (height < 0) {
1295 height = -height;
1296 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
1297 dst_stride_argb = -dst_stride_argb;
1298 }
1299
1300 if (src_a == NULL) {
1301 MergeARGBPlaneOpaque(src_r, src_stride_r, src_g, src_stride_g, src_b,
1302 src_stride_b, dst_argb, dst_stride_argb, width,
1303 height);
1304 } else {
1305 MergeARGBPlaneAlpha(src_r, src_stride_r, src_g, src_stride_g, src_b,
1306 src_stride_b, src_a, src_stride_a, dst_argb,
1307 dst_stride_argb, width, height);
1308 }
1309 }
1310
1311 // TODO(yuan): Support 2 bit alpha channel.
1312 LIBYUV_API
MergeXR30Plane(const uint16_t * src_r,int src_stride_r,const uint16_t * src_g,int src_stride_g,const uint16_t * src_b,int src_stride_b,uint8_t * dst_ar30,int dst_stride_ar30,int width,int height,int depth)1313 void MergeXR30Plane(const uint16_t* src_r,
1314 int src_stride_r,
1315 const uint16_t* src_g,
1316 int src_stride_g,
1317 const uint16_t* src_b,
1318 int src_stride_b,
1319 uint8_t* dst_ar30,
1320 int dst_stride_ar30,
1321 int width,
1322 int height,
1323 int depth) {
1324 int y;
1325 void (*MergeXR30Row)(const uint16_t* src_r, const uint16_t* src_g,
1326 const uint16_t* src_b, uint8_t* dst_ar30, int depth,
1327 int width) = MergeXR30Row_C;
1328
1329 // Negative height means invert the image.
1330 if (height < 0) {
1331 height = -height;
1332 dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30;
1333 dst_stride_ar30 = -dst_stride_ar30;
1334 }
1335 // Coalesce rows.
1336 if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
1337 dst_stride_ar30 == width * 4) {
1338 width *= height;
1339 height = 1;
1340 src_stride_r = src_stride_g = src_stride_b = dst_stride_ar30 = 0;
1341 }
1342 #if defined(HAS_MERGEXR30ROW_AVX2)
1343 if (TestCpuFlag(kCpuHasAVX2)) {
1344 MergeXR30Row = MergeXR30Row_Any_AVX2;
1345 if (IS_ALIGNED(width, 16)) {
1346 MergeXR30Row = MergeXR30Row_AVX2;
1347 }
1348 }
1349 #endif
1350 #if defined(HAS_MERGEXR30ROW_NEON)
1351 if (TestCpuFlag(kCpuHasNEON)) {
1352 if (depth == 10) {
1353 MergeXR30Row = MergeXR30Row_10_Any_NEON;
1354 if (IS_ALIGNED(width, 8)) {
1355 MergeXR30Row = MergeXR30Row_10_NEON;
1356 }
1357 } else {
1358 MergeXR30Row = MergeXR30Row_Any_NEON;
1359 if (IS_ALIGNED(width, 8)) {
1360 MergeXR30Row = MergeXR30Row_NEON;
1361 }
1362 }
1363 }
1364 #endif
1365
1366 for (y = 0; y < height; ++y) {
1367 MergeXR30Row(src_r, src_g, src_b, dst_ar30, depth, width);
1368 src_r += src_stride_r;
1369 src_g += src_stride_g;
1370 src_b += src_stride_b;
1371 dst_ar30 += dst_stride_ar30;
1372 }
1373 }
1374
1375 LIBYUV_NOINLINE
MergeAR64PlaneAlpha(const uint16_t * src_r,int src_stride_r,const uint16_t * src_g,int src_stride_g,const uint16_t * src_b,int src_stride_b,const uint16_t * src_a,int src_stride_a,uint16_t * dst_ar64,int dst_stride_ar64,int width,int height,int depth)1376 static void MergeAR64PlaneAlpha(const uint16_t* src_r,
1377 int src_stride_r,
1378 const uint16_t* src_g,
1379 int src_stride_g,
1380 const uint16_t* src_b,
1381 int src_stride_b,
1382 const uint16_t* src_a,
1383 int src_stride_a,
1384 uint16_t* dst_ar64,
1385 int dst_stride_ar64,
1386 int width,
1387 int height,
1388 int depth) {
1389 int y;
1390 void (*MergeAR64Row)(const uint16_t* src_r, const uint16_t* src_g,
1391 const uint16_t* src_b, const uint16_t* src_a,
1392 uint16_t* dst_argb, int depth, int width) =
1393 MergeAR64Row_C;
1394
1395 if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
1396 src_stride_a == width && dst_stride_ar64 == width * 4) {
1397 width *= height;
1398 height = 1;
1399 src_stride_r = src_stride_g = src_stride_b = src_stride_a =
1400 dst_stride_ar64 = 0;
1401 }
1402 #if defined(HAS_MERGEAR64ROW_AVX2)
1403 if (TestCpuFlag(kCpuHasAVX2)) {
1404 MergeAR64Row = MergeAR64Row_Any_AVX2;
1405 if (IS_ALIGNED(width, 16)) {
1406 MergeAR64Row = MergeAR64Row_AVX2;
1407 }
1408 }
1409 #endif
1410 #if defined(HAS_MERGEAR64ROW_NEON)
1411 if (TestCpuFlag(kCpuHasNEON)) {
1412 MergeAR64Row = MergeAR64Row_Any_NEON;
1413 if (IS_ALIGNED(width, 8)) {
1414 MergeAR64Row = MergeAR64Row_NEON;
1415 }
1416 }
1417 #endif
1418
1419 for (y = 0; y < height; ++y) {
1420 MergeAR64Row(src_r, src_g, src_b, src_a, dst_ar64, depth, width);
1421 src_r += src_stride_r;
1422 src_g += src_stride_g;
1423 src_b += src_stride_b;
1424 src_a += src_stride_a;
1425 dst_ar64 += dst_stride_ar64;
1426 }
1427 }
1428
1429 LIBYUV_NOINLINE
MergeAR64PlaneOpaque(const uint16_t * src_r,int src_stride_r,const uint16_t * src_g,int src_stride_g,const uint16_t * src_b,int src_stride_b,uint16_t * dst_ar64,int dst_stride_ar64,int width,int height,int depth)1430 static void MergeAR64PlaneOpaque(const uint16_t* src_r,
1431 int src_stride_r,
1432 const uint16_t* src_g,
1433 int src_stride_g,
1434 const uint16_t* src_b,
1435 int src_stride_b,
1436 uint16_t* dst_ar64,
1437 int dst_stride_ar64,
1438 int width,
1439 int height,
1440 int depth) {
1441 int y;
1442 void (*MergeXR64Row)(const uint16_t* src_r, const uint16_t* src_g,
1443 const uint16_t* src_b, uint16_t* dst_argb, int depth,
1444 int width) = MergeXR64Row_C;
1445
1446 // Coalesce rows.
1447 if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
1448 dst_stride_ar64 == width * 4) {
1449 width *= height;
1450 height = 1;
1451 src_stride_r = src_stride_g = src_stride_b = dst_stride_ar64 = 0;
1452 }
1453 #if defined(HAS_MERGEXR64ROW_AVX2)
1454 if (TestCpuFlag(kCpuHasAVX2)) {
1455 MergeXR64Row = MergeXR64Row_Any_AVX2;
1456 if (IS_ALIGNED(width, 16)) {
1457 MergeXR64Row = MergeXR64Row_AVX2;
1458 }
1459 }
1460 #endif
1461 #if defined(HAS_MERGEXR64ROW_NEON)
1462 if (TestCpuFlag(kCpuHasNEON)) {
1463 MergeXR64Row = MergeXR64Row_Any_NEON;
1464 if (IS_ALIGNED(width, 8)) {
1465 MergeXR64Row = MergeXR64Row_NEON;
1466 }
1467 }
1468 #endif
1469
1470 for (y = 0; y < height; ++y) {
1471 MergeXR64Row(src_r, src_g, src_b, dst_ar64, depth, width);
1472 src_r += src_stride_r;
1473 src_g += src_stride_g;
1474 src_b += src_stride_b;
1475 dst_ar64 += dst_stride_ar64;
1476 }
1477 }
1478
1479 LIBYUV_API
MergeAR64Plane(const uint16_t * src_r,int src_stride_r,const uint16_t * src_g,int src_stride_g,const uint16_t * src_b,int src_stride_b,const uint16_t * src_a,int src_stride_a,uint16_t * dst_ar64,int dst_stride_ar64,int width,int height,int depth)1480 void MergeAR64Plane(const uint16_t* src_r,
1481 int src_stride_r,
1482 const uint16_t* src_g,
1483 int src_stride_g,
1484 const uint16_t* src_b,
1485 int src_stride_b,
1486 const uint16_t* src_a,
1487 int src_stride_a,
1488 uint16_t* dst_ar64,
1489 int dst_stride_ar64,
1490 int width,
1491 int height,
1492 int depth) {
1493 // Negative height means invert the image.
1494 if (height < 0) {
1495 height = -height;
1496 dst_ar64 = dst_ar64 + (height - 1) * dst_stride_ar64;
1497 dst_stride_ar64 = -dst_stride_ar64;
1498 }
1499
1500 if (src_a == NULL) {
1501 MergeAR64PlaneOpaque(src_r, src_stride_r, src_g, src_stride_g, src_b,
1502 src_stride_b, dst_ar64, dst_stride_ar64, width, height,
1503 depth);
1504 } else {
1505 MergeAR64PlaneAlpha(src_r, src_stride_r, src_g, src_stride_g, src_b,
1506 src_stride_b, src_a, src_stride_a, dst_ar64,
1507 dst_stride_ar64, width, height, depth);
1508 }
1509 }
1510
1511 LIBYUV_NOINLINE
MergeARGB16To8PlaneAlpha(const uint16_t * src_r,int src_stride_r,const uint16_t * src_g,int src_stride_g,const uint16_t * src_b,int src_stride_b,const uint16_t * src_a,int src_stride_a,uint8_t * dst_argb,int dst_stride_argb,int width,int height,int depth)1512 static void MergeARGB16To8PlaneAlpha(const uint16_t* src_r,
1513 int src_stride_r,
1514 const uint16_t* src_g,
1515 int src_stride_g,
1516 const uint16_t* src_b,
1517 int src_stride_b,
1518 const uint16_t* src_a,
1519 int src_stride_a,
1520 uint8_t* dst_argb,
1521 int dst_stride_argb,
1522 int width,
1523 int height,
1524 int depth) {
1525 int y;
1526 void (*MergeARGB16To8Row)(const uint16_t* src_r, const uint16_t* src_g,
1527 const uint16_t* src_b, const uint16_t* src_a,
1528 uint8_t* dst_argb, int depth, int width) =
1529 MergeARGB16To8Row_C;
1530
1531 if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
1532 src_stride_a == width && dst_stride_argb == width * 4) {
1533 width *= height;
1534 height = 1;
1535 src_stride_r = src_stride_g = src_stride_b = src_stride_a =
1536 dst_stride_argb = 0;
1537 }
1538 #if defined(HAS_MERGEARGB16TO8ROW_AVX2)
1539 if (TestCpuFlag(kCpuHasAVX2)) {
1540 MergeARGB16To8Row = MergeARGB16To8Row_Any_AVX2;
1541 if (IS_ALIGNED(width, 16)) {
1542 MergeARGB16To8Row = MergeARGB16To8Row_AVX2;
1543 }
1544 }
1545 #endif
1546 #if defined(HAS_MERGEARGB16TO8ROW_NEON)
1547 if (TestCpuFlag(kCpuHasNEON)) {
1548 MergeARGB16To8Row = MergeARGB16To8Row_Any_NEON;
1549 if (IS_ALIGNED(width, 8)) {
1550 MergeARGB16To8Row = MergeARGB16To8Row_NEON;
1551 }
1552 }
1553 #endif
1554
1555 for (y = 0; y < height; ++y) {
1556 MergeARGB16To8Row(src_r, src_g, src_b, src_a, dst_argb, depth, width);
1557 src_r += src_stride_r;
1558 src_g += src_stride_g;
1559 src_b += src_stride_b;
1560 src_a += src_stride_a;
1561 dst_argb += dst_stride_argb;
1562 }
1563 }
1564
1565 LIBYUV_NOINLINE
MergeARGB16To8PlaneOpaque(const uint16_t * src_r,int src_stride_r,const uint16_t * src_g,int src_stride_g,const uint16_t * src_b,int src_stride_b,uint8_t * dst_argb,int dst_stride_argb,int width,int height,int depth)1566 static void MergeARGB16To8PlaneOpaque(const uint16_t* src_r,
1567 int src_stride_r,
1568 const uint16_t* src_g,
1569 int src_stride_g,
1570 const uint16_t* src_b,
1571 int src_stride_b,
1572 uint8_t* dst_argb,
1573 int dst_stride_argb,
1574 int width,
1575 int height,
1576 int depth) {
1577 int y;
1578 void (*MergeXRGB16To8Row)(const uint16_t* src_r, const uint16_t* src_g,
1579 const uint16_t* src_b, uint8_t* dst_argb, int depth,
1580 int width) = MergeXRGB16To8Row_C;
1581
1582 // Coalesce rows.
1583 if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
1584 dst_stride_argb == width * 4) {
1585 width *= height;
1586 height = 1;
1587 src_stride_r = src_stride_g = src_stride_b = dst_stride_argb = 0;
1588 }
1589 #if defined(HAS_MERGEXRGB16TO8ROW_AVX2)
1590 if (TestCpuFlag(kCpuHasAVX2)) {
1591 MergeXRGB16To8Row = MergeXRGB16To8Row_Any_AVX2;
1592 if (IS_ALIGNED(width, 16)) {
1593 MergeXRGB16To8Row = MergeXRGB16To8Row_AVX2;
1594 }
1595 }
1596 #endif
1597 #if defined(HAS_MERGEXRGB16TO8ROW_NEON)
1598 if (TestCpuFlag(kCpuHasNEON)) {
1599 MergeXRGB16To8Row = MergeXRGB16To8Row_Any_NEON;
1600 if (IS_ALIGNED(width, 8)) {
1601 MergeXRGB16To8Row = MergeXRGB16To8Row_NEON;
1602 }
1603 }
1604 #endif
1605
1606 for (y = 0; y < height; ++y) {
1607 MergeXRGB16To8Row(src_r, src_g, src_b, dst_argb, depth, width);
1608 src_r += src_stride_r;
1609 src_g += src_stride_g;
1610 src_b += src_stride_b;
1611 dst_argb += dst_stride_argb;
1612 }
1613 }
1614
1615 LIBYUV_API
MergeARGB16To8Plane(const uint16_t * src_r,int src_stride_r,const uint16_t * src_g,int src_stride_g,const uint16_t * src_b,int src_stride_b,const uint16_t * src_a,int src_stride_a,uint8_t * dst_argb,int dst_stride_argb,int width,int height,int depth)1616 void MergeARGB16To8Plane(const uint16_t* src_r,
1617 int src_stride_r,
1618 const uint16_t* src_g,
1619 int src_stride_g,
1620 const uint16_t* src_b,
1621 int src_stride_b,
1622 const uint16_t* src_a,
1623 int src_stride_a,
1624 uint8_t* dst_argb,
1625 int dst_stride_argb,
1626 int width,
1627 int height,
1628 int depth) {
1629 // Negative height means invert the image.
1630 if (height < 0) {
1631 height = -height;
1632 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
1633 dst_stride_argb = -dst_stride_argb;
1634 }
1635
1636 if (src_a == NULL) {
1637 MergeARGB16To8PlaneOpaque(src_r, src_stride_r, src_g, src_stride_g, src_b,
1638 src_stride_b, dst_argb, dst_stride_argb, width,
1639 height, depth);
1640 } else {
1641 MergeARGB16To8PlaneAlpha(src_r, src_stride_r, src_g, src_stride_g, src_b,
1642 src_stride_b, src_a, src_stride_a, dst_argb,
1643 dst_stride_argb, width, height, depth);
1644 }
1645 }
1646
1647 // Convert YUY2 to I422.
1648 LIBYUV_API
YUY2ToI422(const uint8_t * src_yuy2,int src_stride_yuy2,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)1649 int YUY2ToI422(const uint8_t* src_yuy2,
1650 int src_stride_yuy2,
1651 uint8_t* dst_y,
1652 int dst_stride_y,
1653 uint8_t* dst_u,
1654 int dst_stride_u,
1655 uint8_t* dst_v,
1656 int dst_stride_v,
1657 int width,
1658 int height) {
1659 int y;
1660 void (*YUY2ToUV422Row)(const uint8_t* src_yuy2, uint8_t* dst_u,
1661 uint8_t* dst_v, int width) = YUY2ToUV422Row_C;
1662 void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) =
1663 YUY2ToYRow_C;
1664 if (!src_yuy2 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
1665 return -1;
1666 }
1667 // Negative height means invert the image.
1668 if (height < 0) {
1669 height = -height;
1670 src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
1671 src_stride_yuy2 = -src_stride_yuy2;
1672 }
1673 // Coalesce rows.
1674 if (src_stride_yuy2 == width * 2 && dst_stride_y == width &&
1675 dst_stride_u * 2 == width && dst_stride_v * 2 == width &&
1676 width * height <= 32768) {
1677 width *= height;
1678 height = 1;
1679 src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0;
1680 }
1681 #if defined(HAS_YUY2TOYROW_SSE2)
1682 if (TestCpuFlag(kCpuHasSSE2)) {
1683 YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
1684 YUY2ToYRow = YUY2ToYRow_Any_SSE2;
1685 if (IS_ALIGNED(width, 16)) {
1686 YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
1687 YUY2ToYRow = YUY2ToYRow_SSE2;
1688 }
1689 }
1690 #endif
1691 #if defined(HAS_YUY2TOYROW_AVX2)
1692 if (TestCpuFlag(kCpuHasAVX2)) {
1693 YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2;
1694 YUY2ToYRow = YUY2ToYRow_Any_AVX2;
1695 if (IS_ALIGNED(width, 32)) {
1696 YUY2ToUV422Row = YUY2ToUV422Row_AVX2;
1697 YUY2ToYRow = YUY2ToYRow_AVX2;
1698 }
1699 }
1700 #endif
1701 #if defined(HAS_YUY2TOYROW_NEON)
1702 if (TestCpuFlag(kCpuHasNEON)) {
1703 YUY2ToYRow = YUY2ToYRow_Any_NEON;
1704 YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
1705 if (IS_ALIGNED(width, 16)) {
1706 YUY2ToYRow = YUY2ToYRow_NEON;
1707 YUY2ToUV422Row = YUY2ToUV422Row_NEON;
1708 }
1709 }
1710 #endif
1711 #if defined(HAS_YUY2TOYROW_MMI) && defined(HAS_YUY2TOUV422ROW_MMI)
1712 if (TestCpuFlag(kCpuHasMMI)) {
1713 YUY2ToYRow = YUY2ToYRow_Any_MMI;
1714 YUY2ToUV422Row = YUY2ToUV422Row_Any_MMI;
1715 if (IS_ALIGNED(width, 8)) {
1716 YUY2ToYRow = YUY2ToYRow_MMI;
1717 YUY2ToUV422Row = YUY2ToUV422Row_MMI;
1718 }
1719 }
1720 #endif
1721 #if defined(HAS_YUY2TOYROW_MSA) && defined(HAS_YUY2TOUV422ROW_MSA)
1722 if (TestCpuFlag(kCpuHasMSA)) {
1723 YUY2ToYRow = YUY2ToYRow_Any_MSA;
1724 YUY2ToUV422Row = YUY2ToUV422Row_Any_MSA;
1725 if (IS_ALIGNED(width, 32)) {
1726 YUY2ToYRow = YUY2ToYRow_MSA;
1727 YUY2ToUV422Row = YUY2ToUV422Row_MSA;
1728 }
1729 }
1730 #endif
1731
1732 for (y = 0; y < height; ++y) {
1733 YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
1734 YUY2ToYRow(src_yuy2, dst_y, width);
1735 src_yuy2 += src_stride_yuy2;
1736 dst_y += dst_stride_y;
1737 dst_u += dst_stride_u;
1738 dst_v += dst_stride_v;
1739 }
1740 return 0;
1741 }
1742
1743 // Convert UYVY to I422.
1744 LIBYUV_API
UYVYToI422(const uint8_t * src_uyvy,int src_stride_uyvy,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)1745 int UYVYToI422(const uint8_t* src_uyvy,
1746 int src_stride_uyvy,
1747 uint8_t* dst_y,
1748 int dst_stride_y,
1749 uint8_t* dst_u,
1750 int dst_stride_u,
1751 uint8_t* dst_v,
1752 int dst_stride_v,
1753 int width,
1754 int height) {
1755 int y;
1756 void (*UYVYToUV422Row)(const uint8_t* src_uyvy, uint8_t* dst_u,
1757 uint8_t* dst_v, int width) = UYVYToUV422Row_C;
1758 void (*UYVYToYRow)(const uint8_t* src_uyvy, uint8_t* dst_y, int width) =
1759 UYVYToYRow_C;
1760 if (!src_uyvy || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
1761 return -1;
1762 }
1763 // Negative height means invert the image.
1764 if (height < 0) {
1765 height = -height;
1766 src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
1767 src_stride_uyvy = -src_stride_uyvy;
1768 }
1769 // Coalesce rows.
1770 if (src_stride_uyvy == width * 2 && dst_stride_y == width &&
1771 dst_stride_u * 2 == width && dst_stride_v * 2 == width &&
1772 width * height <= 32768) {
1773 width *= height;
1774 height = 1;
1775 src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0;
1776 }
1777 #if defined(HAS_UYVYTOYROW_SSE2)
1778 if (TestCpuFlag(kCpuHasSSE2)) {
1779 UYVYToUV422Row = UYVYToUV422Row_Any_SSE2;
1780 UYVYToYRow = UYVYToYRow_Any_SSE2;
1781 if (IS_ALIGNED(width, 16)) {
1782 UYVYToUV422Row = UYVYToUV422Row_SSE2;
1783 UYVYToYRow = UYVYToYRow_SSE2;
1784 }
1785 }
1786 #endif
1787 #if defined(HAS_UYVYTOYROW_AVX2)
1788 if (TestCpuFlag(kCpuHasAVX2)) {
1789 UYVYToUV422Row = UYVYToUV422Row_Any_AVX2;
1790 UYVYToYRow = UYVYToYRow_Any_AVX2;
1791 if (IS_ALIGNED(width, 32)) {
1792 UYVYToUV422Row = UYVYToUV422Row_AVX2;
1793 UYVYToYRow = UYVYToYRow_AVX2;
1794 }
1795 }
1796 #endif
1797 #if defined(HAS_UYVYTOYROW_NEON)
1798 if (TestCpuFlag(kCpuHasNEON)) {
1799 UYVYToYRow = UYVYToYRow_Any_NEON;
1800 UYVYToUV422Row = UYVYToUV422Row_Any_NEON;
1801 if (IS_ALIGNED(width, 16)) {
1802 UYVYToYRow = UYVYToYRow_NEON;
1803 UYVYToUV422Row = UYVYToUV422Row_NEON;
1804 }
1805 }
1806 #endif
1807 #if defined(HAS_UYVYTOYROW_MMI) && defined(HAS_UYVYTOUV422ROW_MMI)
1808 if (TestCpuFlag(kCpuHasMMI)) {
1809 UYVYToYRow = UYVYToYRow_Any_MMI;
1810 UYVYToUV422Row = UYVYToUV422Row_Any_MMI;
1811 if (IS_ALIGNED(width, 16)) {
1812 UYVYToYRow = UYVYToYRow_MMI;
1813 UYVYToUV422Row = UYVYToUV422Row_MMI;
1814 }
1815 }
1816 #endif
1817 #if defined(HAS_UYVYTOYROW_MSA) && defined(HAS_UYVYTOUV422ROW_MSA)
1818 if (TestCpuFlag(kCpuHasMSA)) {
1819 UYVYToYRow = UYVYToYRow_Any_MSA;
1820 UYVYToUV422Row = UYVYToUV422Row_Any_MSA;
1821 if (IS_ALIGNED(width, 32)) {
1822 UYVYToYRow = UYVYToYRow_MSA;
1823 UYVYToUV422Row = UYVYToUV422Row_MSA;
1824 }
1825 }
1826 #endif
1827
1828 for (y = 0; y < height; ++y) {
1829 UYVYToUV422Row(src_uyvy, dst_u, dst_v, width);
1830 UYVYToYRow(src_uyvy, dst_y, width);
1831 src_uyvy += src_stride_uyvy;
1832 dst_y += dst_stride_y;
1833 dst_u += dst_stride_u;
1834 dst_v += dst_stride_v;
1835 }
1836 return 0;
1837 }
1838
1839 // Convert YUY2 to Y.
1840 LIBYUV_API
YUY2ToY(const uint8_t * src_yuy2,int src_stride_yuy2,uint8_t * dst_y,int dst_stride_y,int width,int height)1841 int YUY2ToY(const uint8_t* src_yuy2,
1842 int src_stride_yuy2,
1843 uint8_t* dst_y,
1844 int dst_stride_y,
1845 int width,
1846 int height) {
1847 int y;
1848 void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) =
1849 YUY2ToYRow_C;
1850 if (!src_yuy2 || !dst_y || width <= 0 || height == 0) {
1851 return -1;
1852 }
1853 // Negative height means invert the image.
1854 if (height < 0) {
1855 height = -height;
1856 src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
1857 src_stride_yuy2 = -src_stride_yuy2;
1858 }
1859 // Coalesce rows.
1860 if (src_stride_yuy2 == width * 2 && dst_stride_y == width) {
1861 width *= height;
1862 height = 1;
1863 src_stride_yuy2 = dst_stride_y = 0;
1864 }
1865 #if defined(HAS_YUY2TOYROW_SSE2)
1866 if (TestCpuFlag(kCpuHasSSE2)) {
1867 YUY2ToYRow = YUY2ToYRow_Any_SSE2;
1868 if (IS_ALIGNED(width, 16)) {
1869 YUY2ToYRow = YUY2ToYRow_SSE2;
1870 }
1871 }
1872 #endif
1873 #if defined(HAS_YUY2TOYROW_AVX2)
1874 if (TestCpuFlag(kCpuHasAVX2)) {
1875 YUY2ToYRow = YUY2ToYRow_Any_AVX2;
1876 if (IS_ALIGNED(width, 32)) {
1877 YUY2ToYRow = YUY2ToYRow_AVX2;
1878 }
1879 }
1880 #endif
1881 #if defined(HAS_YUY2TOYROW_NEON)
1882 if (TestCpuFlag(kCpuHasNEON)) {
1883 YUY2ToYRow = YUY2ToYRow_Any_NEON;
1884 if (IS_ALIGNED(width, 16)) {
1885 YUY2ToYRow = YUY2ToYRow_NEON;
1886 }
1887 }
1888 #endif
1889 #if defined(HAS_YUY2TOYROW_MMI)
1890 if (TestCpuFlag(kCpuHasMMI)) {
1891 YUY2ToYRow = YUY2ToYRow_Any_MMI;
1892 if (IS_ALIGNED(width, 8)) {
1893 YUY2ToYRow = YUY2ToYRow_MMI;
1894 }
1895 }
1896 #endif
1897 #if defined(HAS_YUY2TOYROW_MSA)
1898 if (TestCpuFlag(kCpuHasMSA)) {
1899 YUY2ToYRow = YUY2ToYRow_Any_MSA;
1900 if (IS_ALIGNED(width, 32)) {
1901 YUY2ToYRow = YUY2ToYRow_MSA;
1902 }
1903 }
1904 #endif
1905
1906 for (y = 0; y < height; ++y) {
1907 YUY2ToYRow(src_yuy2, dst_y, width);
1908 src_yuy2 += src_stride_yuy2;
1909 dst_y += dst_stride_y;
1910 }
1911 return 0;
1912 }
1913
1914 // Mirror a plane of data.
1915 // See Also I400Mirror
1916 LIBYUV_API
MirrorPlane(const uint8_t * src_y,int src_stride_y,uint8_t * dst_y,int dst_stride_y,int width,int height)1917 void MirrorPlane(const uint8_t* src_y,
1918 int src_stride_y,
1919 uint8_t* dst_y,
1920 int dst_stride_y,
1921 int width,
1922 int height) {
1923 int y;
1924 void (*MirrorRow)(const uint8_t* src, uint8_t* dst, int width) = MirrorRow_C;
1925 // Negative height means invert the image.
1926 if (height < 0) {
1927 height = -height;
1928 src_y = src_y + (height - 1) * src_stride_y;
1929 src_stride_y = -src_stride_y;
1930 }
1931 #if defined(HAS_MIRRORROW_NEON)
1932 if (TestCpuFlag(kCpuHasNEON)) {
1933 MirrorRow = MirrorRow_Any_NEON;
1934 if (IS_ALIGNED(width, 32)) {
1935 MirrorRow = MirrorRow_NEON;
1936 }
1937 }
1938 #endif
1939 #if defined(HAS_MIRRORROW_SSSE3)
1940 if (TestCpuFlag(kCpuHasSSSE3)) {
1941 MirrorRow = MirrorRow_Any_SSSE3;
1942 if (IS_ALIGNED(width, 16)) {
1943 MirrorRow = MirrorRow_SSSE3;
1944 }
1945 }
1946 #endif
1947 #if defined(HAS_MIRRORROW_AVX2)
1948 if (TestCpuFlag(kCpuHasAVX2)) {
1949 MirrorRow = MirrorRow_Any_AVX2;
1950 if (IS_ALIGNED(width, 32)) {
1951 MirrorRow = MirrorRow_AVX2;
1952 }
1953 }
1954 #endif
1955 #if defined(HAS_MIRRORROW_MMI)
1956 if (TestCpuFlag(kCpuHasMMI)) {
1957 MirrorRow = MirrorRow_Any_MMI;
1958 if (IS_ALIGNED(width, 8)) {
1959 MirrorRow = MirrorRow_MMI;
1960 }
1961 }
1962 #endif
1963 #if defined(HAS_MIRRORROW_MSA)
1964 if (TestCpuFlag(kCpuHasMSA)) {
1965 MirrorRow = MirrorRow_Any_MSA;
1966 if (IS_ALIGNED(width, 64)) {
1967 MirrorRow = MirrorRow_MSA;
1968 }
1969 }
1970 #endif
1971
1972 // Mirror plane
1973 for (y = 0; y < height; ++y) {
1974 MirrorRow(src_y, dst_y, width);
1975 src_y += src_stride_y;
1976 dst_y += dst_stride_y;
1977 }
1978 }
1979
1980 // Mirror a plane of UV data.
1981 LIBYUV_API
MirrorUVPlane(const uint8_t * src_uv,int src_stride_uv,uint8_t * dst_uv,int dst_stride_uv,int width,int height)1982 void MirrorUVPlane(const uint8_t* src_uv,
1983 int src_stride_uv,
1984 uint8_t* dst_uv,
1985 int dst_stride_uv,
1986 int width,
1987 int height) {
1988 int y;
1989 void (*MirrorUVRow)(const uint8_t* src, uint8_t* dst, int width) =
1990 MirrorUVRow_C;
1991 // Negative height means invert the image.
1992 if (height < 0) {
1993 height = -height;
1994 src_uv = src_uv + (height - 1) * src_stride_uv;
1995 src_stride_uv = -src_stride_uv;
1996 }
1997 #if defined(HAS_MIRRORUVROW_NEON)
1998 if (TestCpuFlag(kCpuHasNEON)) {
1999 MirrorUVRow = MirrorUVRow_Any_NEON;
2000 if (IS_ALIGNED(width, 32)) {
2001 MirrorUVRow = MirrorUVRow_NEON;
2002 }
2003 }
2004 #endif
2005 #if defined(HAS_MIRRORUVROW_SSSE3)
2006 if (TestCpuFlag(kCpuHasSSSE3)) {
2007 MirrorUVRow = MirrorUVRow_Any_SSSE3;
2008 if (IS_ALIGNED(width, 8)) {
2009 MirrorUVRow = MirrorUVRow_SSSE3;
2010 }
2011 }
2012 #endif
2013 #if defined(HAS_MIRRORUVROW_AVX2)
2014 if (TestCpuFlag(kCpuHasAVX2)) {
2015 MirrorUVRow = MirrorUVRow_Any_AVX2;
2016 if (IS_ALIGNED(width, 16)) {
2017 MirrorUVRow = MirrorUVRow_AVX2;
2018 }
2019 }
2020 #endif
2021 #if defined(HAS_MIRRORUVROW_MSA)
2022 if (TestCpuFlag(kCpuHasMSA)) {
2023 MirrorUVRow = MirrorUVRow_Any_MSA;
2024 if (IS_ALIGNED(width, 8)) {
2025 MirrorUVRow = MirrorUVRow_MSA;
2026 }
2027 }
2028 #endif
2029
2030 // MirrorUV plane
2031 for (y = 0; y < height; ++y) {
2032 MirrorUVRow(src_uv, dst_uv, width);
2033 src_uv += src_stride_uv;
2034 dst_uv += dst_stride_uv;
2035 }
2036 }
2037
2038 // Mirror I400 with optional flipping
2039 LIBYUV_API
I400Mirror(const uint8_t * src_y,int src_stride_y,uint8_t * dst_y,int dst_stride_y,int width,int height)2040 int I400Mirror(const uint8_t* src_y,
2041 int src_stride_y,
2042 uint8_t* dst_y,
2043 int dst_stride_y,
2044 int width,
2045 int height) {
2046 if (!src_y || !dst_y || width <= 0 || height == 0) {
2047 return -1;
2048 }
2049 // Negative height means invert the image.
2050 if (height < 0) {
2051 height = -height;
2052 src_y = src_y + (height - 1) * src_stride_y;
2053 src_stride_y = -src_stride_y;
2054 }
2055
2056 MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
2057 return 0;
2058 }
2059
2060 // Mirror I420 with optional flipping
2061 LIBYUV_API
I420Mirror(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)2062 int I420Mirror(const uint8_t* src_y,
2063 int src_stride_y,
2064 const uint8_t* src_u,
2065 int src_stride_u,
2066 const uint8_t* src_v,
2067 int src_stride_v,
2068 uint8_t* dst_y,
2069 int dst_stride_y,
2070 uint8_t* dst_u,
2071 int dst_stride_u,
2072 uint8_t* dst_v,
2073 int dst_stride_v,
2074 int width,
2075 int height) {
2076 int halfwidth = (width + 1) >> 1;
2077 int halfheight = (height + 1) >> 1;
2078 if (!src_y || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
2079 height == 0) {
2080 return -1;
2081 }
2082 // Negative height means invert the image.
2083 if (height < 0) {
2084 height = -height;
2085 halfheight = (height + 1) >> 1;
2086 src_y = src_y + (height - 1) * src_stride_y;
2087 src_u = src_u + (halfheight - 1) * src_stride_u;
2088 src_v = src_v + (halfheight - 1) * src_stride_v;
2089 src_stride_y = -src_stride_y;
2090 src_stride_u = -src_stride_u;
2091 src_stride_v = -src_stride_v;
2092 }
2093
2094 if (dst_y) {
2095 MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
2096 }
2097 MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
2098 MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
2099 return 0;
2100 }
2101
2102 // NV12 mirror.
2103 LIBYUV_API
NV12Mirror(const uint8_t * src_y,int src_stride_y,const uint8_t * src_uv,int src_stride_uv,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_uv,int dst_stride_uv,int width,int height)2104 int NV12Mirror(const uint8_t* src_y,
2105 int src_stride_y,
2106 const uint8_t* src_uv,
2107 int src_stride_uv,
2108 uint8_t* dst_y,
2109 int dst_stride_y,
2110 uint8_t* dst_uv,
2111 int dst_stride_uv,
2112 int width,
2113 int height) {
2114 int halfwidth = (width + 1) >> 1;
2115 int halfheight = (height + 1) >> 1;
2116 if (!src_y || !src_uv || !dst_uv || width <= 0 || height == 0) {
2117 return -1;
2118 }
2119 // Negative height means invert the image.
2120 if (height < 0) {
2121 height = -height;
2122 halfheight = (height + 1) >> 1;
2123 src_y = src_y + (height - 1) * src_stride_y;
2124 src_uv = src_uv + (halfheight - 1) * src_stride_uv;
2125 src_stride_y = -src_stride_y;
2126 src_stride_uv = -src_stride_uv;
2127 }
2128
2129 if (dst_y) {
2130 MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
2131 }
2132 MirrorUVPlane(src_uv, src_stride_uv, dst_uv, dst_stride_uv, halfwidth,
2133 halfheight);
2134 return 0;
2135 }
2136
2137 // ARGB mirror.
2138 LIBYUV_API
ARGBMirror(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)2139 int ARGBMirror(const uint8_t* src_argb,
2140 int src_stride_argb,
2141 uint8_t* dst_argb,
2142 int dst_stride_argb,
2143 int width,
2144 int height) {
2145 int y;
2146 void (*ARGBMirrorRow)(const uint8_t* src, uint8_t* dst, int width) =
2147 ARGBMirrorRow_C;
2148 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
2149 return -1;
2150 }
2151 // Negative height means invert the image.
2152 if (height < 0) {
2153 height = -height;
2154 src_argb = src_argb + (height - 1) * src_stride_argb;
2155 src_stride_argb = -src_stride_argb;
2156 }
2157 #if defined(HAS_ARGBMIRRORROW_NEON)
2158 if (TestCpuFlag(kCpuHasNEON)) {
2159 ARGBMirrorRow = ARGBMirrorRow_Any_NEON;
2160 if (IS_ALIGNED(width, 8)) {
2161 ARGBMirrorRow = ARGBMirrorRow_NEON;
2162 }
2163 }
2164 #endif
2165 #if defined(HAS_ARGBMIRRORROW_SSE2)
2166 if (TestCpuFlag(kCpuHasSSE2)) {
2167 ARGBMirrorRow = ARGBMirrorRow_Any_SSE2;
2168 if (IS_ALIGNED(width, 4)) {
2169 ARGBMirrorRow = ARGBMirrorRow_SSE2;
2170 }
2171 }
2172 #endif
2173 #if defined(HAS_ARGBMIRRORROW_AVX2)
2174 if (TestCpuFlag(kCpuHasAVX2)) {
2175 ARGBMirrorRow = ARGBMirrorRow_Any_AVX2;
2176 if (IS_ALIGNED(width, 8)) {
2177 ARGBMirrorRow = ARGBMirrorRow_AVX2;
2178 }
2179 }
2180 #endif
2181 #if defined(HAS_ARGBMIRRORROW_MMI)
2182 if (TestCpuFlag(kCpuHasMMI)) {
2183 ARGBMirrorRow = ARGBMirrorRow_Any_MMI;
2184 if (IS_ALIGNED(width, 2)) {
2185 ARGBMirrorRow = ARGBMirrorRow_MMI;
2186 }
2187 }
2188 #endif
2189 #if defined(HAS_ARGBMIRRORROW_MSA)
2190 if (TestCpuFlag(kCpuHasMSA)) {
2191 ARGBMirrorRow = ARGBMirrorRow_Any_MSA;
2192 if (IS_ALIGNED(width, 16)) {
2193 ARGBMirrorRow = ARGBMirrorRow_MSA;
2194 }
2195 }
2196 #endif
2197
2198 // Mirror plane
2199 for (y = 0; y < height; ++y) {
2200 ARGBMirrorRow(src_argb, dst_argb, width);
2201 src_argb += src_stride_argb;
2202 dst_argb += dst_stride_argb;
2203 }
2204 return 0;
2205 }
2206
2207 // RGB24 mirror.
2208 LIBYUV_API
RGB24Mirror(const uint8_t * src_rgb24,int src_stride_rgb24,uint8_t * dst_rgb24,int dst_stride_rgb24,int width,int height)2209 int RGB24Mirror(const uint8_t* src_rgb24,
2210 int src_stride_rgb24,
2211 uint8_t* dst_rgb24,
2212 int dst_stride_rgb24,
2213 int width,
2214 int height) {
2215 int y;
2216 void (*RGB24MirrorRow)(const uint8_t* src, uint8_t* dst, int width) =
2217 RGB24MirrorRow_C;
2218 if (!src_rgb24 || !dst_rgb24 || width <= 0 || height == 0) {
2219 return -1;
2220 }
2221 // Negative height means invert the image.
2222 if (height < 0) {
2223 height = -height;
2224 src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
2225 src_stride_rgb24 = -src_stride_rgb24;
2226 }
2227 #if defined(HAS_RGB24MIRRORROW_NEON)
2228 if (TestCpuFlag(kCpuHasNEON)) {
2229 RGB24MirrorRow = RGB24MirrorRow_Any_NEON;
2230 if (IS_ALIGNED(width, 16)) {
2231 RGB24MirrorRow = RGB24MirrorRow_NEON;
2232 }
2233 }
2234 #endif
2235 #if defined(HAS_RGB24MIRRORROW_SSSE3)
2236 if (TestCpuFlag(kCpuHasSSSE3)) {
2237 RGB24MirrorRow = RGB24MirrorRow_Any_SSSE3;
2238 if (IS_ALIGNED(width, 16)) {
2239 RGB24MirrorRow = RGB24MirrorRow_SSSE3;
2240 }
2241 }
2242 #endif
2243
2244 // Mirror plane
2245 for (y = 0; y < height; ++y) {
2246 RGB24MirrorRow(src_rgb24, dst_rgb24, width);
2247 src_rgb24 += src_stride_rgb24;
2248 dst_rgb24 += dst_stride_rgb24;
2249 }
2250 return 0;
2251 }
2252
2253 // Get a blender that optimized for the CPU and pixel count.
2254 // As there are 6 blenders to choose from, the caller should try to use
2255 // the same blend function for all pixels if possible.
2256 LIBYUV_API
GetARGBBlend()2257 ARGBBlendRow GetARGBBlend() {
2258 void (*ARGBBlendRow)(const uint8_t* src_argb, const uint8_t* src_argb1,
2259 uint8_t* dst_argb, int width) = ARGBBlendRow_C;
2260 #if defined(HAS_ARGBBLENDROW_SSSE3)
2261 if (TestCpuFlag(kCpuHasSSSE3)) {
2262 ARGBBlendRow = ARGBBlendRow_SSSE3;
2263 return ARGBBlendRow;
2264 }
2265 #endif
2266 #if defined(HAS_ARGBBLENDROW_NEON)
2267 if (TestCpuFlag(kCpuHasNEON)) {
2268 ARGBBlendRow = ARGBBlendRow_NEON;
2269 }
2270 #endif
2271 #if defined(HAS_ARGBBLENDROW_MMI)
2272 if (TestCpuFlag(kCpuHasMMI)) {
2273 ARGBBlendRow = ARGBBlendRow_MMI;
2274 }
2275 #endif
2276 #if defined(HAS_ARGBBLENDROW_MSA)
2277 if (TestCpuFlag(kCpuHasMSA)) {
2278 ARGBBlendRow = ARGBBlendRow_MSA;
2279 }
2280 #endif
2281 return ARGBBlendRow;
2282 }
2283
2284 // Alpha Blend 2 ARGB images and store to destination.
2285 LIBYUV_API
ARGBBlend(const uint8_t * src_argb0,int src_stride_argb0,const uint8_t * src_argb1,int src_stride_argb1,uint8_t * dst_argb,int dst_stride_argb,int width,int height)2286 int ARGBBlend(const uint8_t* src_argb0,
2287 int src_stride_argb0,
2288 const uint8_t* src_argb1,
2289 int src_stride_argb1,
2290 uint8_t* dst_argb,
2291 int dst_stride_argb,
2292 int width,
2293 int height) {
2294 int y;
2295 void (*ARGBBlendRow)(const uint8_t* src_argb, const uint8_t* src_argb1,
2296 uint8_t* dst_argb, int width) = GetARGBBlend();
2297 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
2298 return -1;
2299 }
2300 // Negative height means invert the image.
2301 if (height < 0) {
2302 height = -height;
2303 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
2304 dst_stride_argb = -dst_stride_argb;
2305 }
2306 // Coalesce rows.
2307 if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
2308 dst_stride_argb == width * 4) {
2309 width *= height;
2310 height = 1;
2311 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
2312 }
2313
2314 for (y = 0; y < height; ++y) {
2315 ARGBBlendRow(src_argb0, src_argb1, dst_argb, width);
2316 src_argb0 += src_stride_argb0;
2317 src_argb1 += src_stride_argb1;
2318 dst_argb += dst_stride_argb;
2319 }
2320 return 0;
2321 }
2322
2323 // Alpha Blend plane and store to destination.
2324 LIBYUV_API
BlendPlane(const uint8_t * src_y0,int src_stride_y0,const uint8_t * src_y1,int src_stride_y1,const uint8_t * alpha,int alpha_stride,uint8_t * dst_y,int dst_stride_y,int width,int height)2325 int BlendPlane(const uint8_t* src_y0,
2326 int src_stride_y0,
2327 const uint8_t* src_y1,
2328 int src_stride_y1,
2329 const uint8_t* alpha,
2330 int alpha_stride,
2331 uint8_t* dst_y,
2332 int dst_stride_y,
2333 int width,
2334 int height) {
2335 int y;
2336 void (*BlendPlaneRow)(const uint8_t* src0, const uint8_t* src1,
2337 const uint8_t* alpha, uint8_t* dst, int width) =
2338 BlendPlaneRow_C;
2339 if (!src_y0 || !src_y1 || !alpha || !dst_y || width <= 0 || height == 0) {
2340 return -1;
2341 }
2342 // Negative height means invert the image.
2343 if (height < 0) {
2344 height = -height;
2345 dst_y = dst_y + (height - 1) * dst_stride_y;
2346 dst_stride_y = -dst_stride_y;
2347 }
2348
2349 // Coalesce rows for Y plane.
2350 if (src_stride_y0 == width && src_stride_y1 == width &&
2351 alpha_stride == width && dst_stride_y == width) {
2352 width *= height;
2353 height = 1;
2354 src_stride_y0 = src_stride_y1 = alpha_stride = dst_stride_y = 0;
2355 }
2356
2357 #if defined(HAS_BLENDPLANEROW_SSSE3)
2358 if (TestCpuFlag(kCpuHasSSSE3)) {
2359 BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
2360 if (IS_ALIGNED(width, 8)) {
2361 BlendPlaneRow = BlendPlaneRow_SSSE3;
2362 }
2363 }
2364 #endif
2365 #if defined(HAS_BLENDPLANEROW_AVX2)
2366 if (TestCpuFlag(kCpuHasAVX2)) {
2367 BlendPlaneRow = BlendPlaneRow_Any_AVX2;
2368 if (IS_ALIGNED(width, 32)) {
2369 BlendPlaneRow = BlendPlaneRow_AVX2;
2370 }
2371 }
2372 #endif
2373 #if defined(HAS_BLENDPLANEROW_MMI)
2374 if (TestCpuFlag(kCpuHasMMI)) {
2375 BlendPlaneRow = BlendPlaneRow_Any_MMI;
2376 if (IS_ALIGNED(width, 8)) {
2377 BlendPlaneRow = BlendPlaneRow_MMI;
2378 }
2379 }
2380 #endif
2381
2382 for (y = 0; y < height; ++y) {
2383 BlendPlaneRow(src_y0, src_y1, alpha, dst_y, width);
2384 src_y0 += src_stride_y0;
2385 src_y1 += src_stride_y1;
2386 alpha += alpha_stride;
2387 dst_y += dst_stride_y;
2388 }
2389 return 0;
2390 }
2391
2392 #define MAXTWIDTH 2048
2393 // Alpha Blend YUV images and store to destination.
2394 LIBYUV_API
I420Blend(const uint8_t * src_y0,int src_stride_y0,const uint8_t * src_u0,int src_stride_u0,const uint8_t * src_v0,int src_stride_v0,const uint8_t * src_y1,int src_stride_y1,const uint8_t * src_u1,int src_stride_u1,const uint8_t * src_v1,int src_stride_v1,const uint8_t * alpha,int alpha_stride,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)2395 int I420Blend(const uint8_t* src_y0,
2396 int src_stride_y0,
2397 const uint8_t* src_u0,
2398 int src_stride_u0,
2399 const uint8_t* src_v0,
2400 int src_stride_v0,
2401 const uint8_t* src_y1,
2402 int src_stride_y1,
2403 const uint8_t* src_u1,
2404 int src_stride_u1,
2405 const uint8_t* src_v1,
2406 int src_stride_v1,
2407 const uint8_t* alpha,
2408 int alpha_stride,
2409 uint8_t* dst_y,
2410 int dst_stride_y,
2411 uint8_t* dst_u,
2412 int dst_stride_u,
2413 uint8_t* dst_v,
2414 int dst_stride_v,
2415 int width,
2416 int height) {
2417 int y;
2418 // Half width/height for UV.
2419 int halfwidth = (width + 1) >> 1;
2420 void (*BlendPlaneRow)(const uint8_t* src0, const uint8_t* src1,
2421 const uint8_t* alpha, uint8_t* dst, int width) =
2422 BlendPlaneRow_C;
2423 void (*ScaleRowDown2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
2424 uint8_t* dst_ptr, int dst_width) = ScaleRowDown2Box_C;
2425 if (!src_y0 || !src_u0 || !src_v0 || !src_y1 || !src_u1 || !src_v1 ||
2426 !alpha || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
2427 return -1;
2428 }
2429
2430 // Negative height means invert the image.
2431 if (height < 0) {
2432 height = -height;
2433 dst_y = dst_y + (height - 1) * dst_stride_y;
2434 dst_stride_y = -dst_stride_y;
2435 }
2436
2437 // Blend Y plane.
2438 BlendPlane(src_y0, src_stride_y0, src_y1, src_stride_y1, alpha, alpha_stride,
2439 dst_y, dst_stride_y, width, height);
2440
2441 #if defined(HAS_BLENDPLANEROW_SSSE3)
2442 if (TestCpuFlag(kCpuHasSSSE3)) {
2443 BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
2444 if (IS_ALIGNED(halfwidth, 8)) {
2445 BlendPlaneRow = BlendPlaneRow_SSSE3;
2446 }
2447 }
2448 #endif
2449 #if defined(HAS_BLENDPLANEROW_AVX2)
2450 if (TestCpuFlag(kCpuHasAVX2)) {
2451 BlendPlaneRow = BlendPlaneRow_Any_AVX2;
2452 if (IS_ALIGNED(halfwidth, 32)) {
2453 BlendPlaneRow = BlendPlaneRow_AVX2;
2454 }
2455 }
2456 #endif
2457 #if defined(HAS_BLENDPLANEROW_MMI)
2458 if (TestCpuFlag(kCpuHasMMI)) {
2459 BlendPlaneRow = BlendPlaneRow_Any_MMI;
2460 if (IS_ALIGNED(halfwidth, 8)) {
2461 BlendPlaneRow = BlendPlaneRow_MMI;
2462 }
2463 }
2464 #endif
2465 if (!IS_ALIGNED(width, 2)) {
2466 ScaleRowDown2 = ScaleRowDown2Box_Odd_C;
2467 }
2468 #if defined(HAS_SCALEROWDOWN2_NEON)
2469 if (TestCpuFlag(kCpuHasNEON)) {
2470 ScaleRowDown2 = ScaleRowDown2Box_Odd_NEON;
2471 if (IS_ALIGNED(width, 2)) {
2472 ScaleRowDown2 = ScaleRowDown2Box_Any_NEON;
2473 if (IS_ALIGNED(halfwidth, 16)) {
2474 ScaleRowDown2 = ScaleRowDown2Box_NEON;
2475 }
2476 }
2477 }
2478 #endif
2479 #if defined(HAS_SCALEROWDOWN2_SSSE3)
2480 if (TestCpuFlag(kCpuHasSSSE3)) {
2481 ScaleRowDown2 = ScaleRowDown2Box_Odd_SSSE3;
2482 if (IS_ALIGNED(width, 2)) {
2483 ScaleRowDown2 = ScaleRowDown2Box_Any_SSSE3;
2484 if (IS_ALIGNED(halfwidth, 16)) {
2485 ScaleRowDown2 = ScaleRowDown2Box_SSSE3;
2486 }
2487 }
2488 }
2489 #endif
2490 #if defined(HAS_SCALEROWDOWN2_AVX2)
2491 if (TestCpuFlag(kCpuHasAVX2)) {
2492 ScaleRowDown2 = ScaleRowDown2Box_Odd_AVX2;
2493 if (IS_ALIGNED(width, 2)) {
2494 ScaleRowDown2 = ScaleRowDown2Box_Any_AVX2;
2495 if (IS_ALIGNED(halfwidth, 32)) {
2496 ScaleRowDown2 = ScaleRowDown2Box_AVX2;
2497 }
2498 }
2499 }
2500 #endif
2501 #if defined(HAS_SCALEROWDOWN2_MMI)
2502 if (TestCpuFlag(kCpuHasMMI)) {
2503 ScaleRowDown2 = ScaleRowDown2Box_Odd_MMI;
2504 if (IS_ALIGNED(width, 2)) {
2505 ScaleRowDown2 = ScaleRowDown2Box_Any_MMI;
2506 if (IS_ALIGNED(halfwidth, 8)) {
2507 ScaleRowDown2 = ScaleRowDown2Box_MMI;
2508 }
2509 }
2510 }
2511 #endif
2512
2513 // Row buffer for intermediate alpha pixels.
2514 align_buffer_64(halfalpha, halfwidth);
2515 for (y = 0; y < height; y += 2) {
2516 // last row of odd height image use 1 row of alpha instead of 2.
2517 if (y == (height - 1)) {
2518 alpha_stride = 0;
2519 }
2520 // Subsample 2 rows of UV to half width and half height.
2521 ScaleRowDown2(alpha, alpha_stride, halfalpha, halfwidth);
2522 alpha += alpha_stride * 2;
2523 BlendPlaneRow(src_u0, src_u1, halfalpha, dst_u, halfwidth);
2524 BlendPlaneRow(src_v0, src_v1, halfalpha, dst_v, halfwidth);
2525 src_u0 += src_stride_u0;
2526 src_u1 += src_stride_u1;
2527 dst_u += dst_stride_u;
2528 src_v0 += src_stride_v0;
2529 src_v1 += src_stride_v1;
2530 dst_v += dst_stride_v;
2531 }
2532 free_aligned_buffer_64(halfalpha);
2533 return 0;
2534 }
2535
2536 // Multiply 2 ARGB images and store to destination.
2537 LIBYUV_API
ARGBMultiply(const uint8_t * src_argb0,int src_stride_argb0,const uint8_t * src_argb1,int src_stride_argb1,uint8_t * dst_argb,int dst_stride_argb,int width,int height)2538 int ARGBMultiply(const uint8_t* src_argb0,
2539 int src_stride_argb0,
2540 const uint8_t* src_argb1,
2541 int src_stride_argb1,
2542 uint8_t* dst_argb,
2543 int dst_stride_argb,
2544 int width,
2545 int height) {
2546 int y;
2547 void (*ARGBMultiplyRow)(const uint8_t* src0, const uint8_t* src1,
2548 uint8_t* dst, int width) = ARGBMultiplyRow_C;
2549 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
2550 return -1;
2551 }
2552 // Negative height means invert the image.
2553 if (height < 0) {
2554 height = -height;
2555 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
2556 dst_stride_argb = -dst_stride_argb;
2557 }
2558 // Coalesce rows.
2559 if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
2560 dst_stride_argb == width * 4) {
2561 width *= height;
2562 height = 1;
2563 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
2564 }
2565 #if defined(HAS_ARGBMULTIPLYROW_SSE2)
2566 if (TestCpuFlag(kCpuHasSSE2)) {
2567 ARGBMultiplyRow = ARGBMultiplyRow_Any_SSE2;
2568 if (IS_ALIGNED(width, 4)) {
2569 ARGBMultiplyRow = ARGBMultiplyRow_SSE2;
2570 }
2571 }
2572 #endif
2573 #if defined(HAS_ARGBMULTIPLYROW_AVX2)
2574 if (TestCpuFlag(kCpuHasAVX2)) {
2575 ARGBMultiplyRow = ARGBMultiplyRow_Any_AVX2;
2576 if (IS_ALIGNED(width, 8)) {
2577 ARGBMultiplyRow = ARGBMultiplyRow_AVX2;
2578 }
2579 }
2580 #endif
2581 #if defined(HAS_ARGBMULTIPLYROW_NEON)
2582 if (TestCpuFlag(kCpuHasNEON)) {
2583 ARGBMultiplyRow = ARGBMultiplyRow_Any_NEON;
2584 if (IS_ALIGNED(width, 8)) {
2585 ARGBMultiplyRow = ARGBMultiplyRow_NEON;
2586 }
2587 }
2588 #endif
2589 #if defined(HAS_ARGBMULTIPLYROW_MMI)
2590 if (TestCpuFlag(kCpuHasMMI)) {
2591 ARGBMultiplyRow = ARGBMultiplyRow_Any_MMI;
2592 if (IS_ALIGNED(width, 2)) {
2593 ARGBMultiplyRow = ARGBMultiplyRow_MMI;
2594 }
2595 }
2596 #endif
2597 #if defined(HAS_ARGBMULTIPLYROW_MSA)
2598 if (TestCpuFlag(kCpuHasMSA)) {
2599 ARGBMultiplyRow = ARGBMultiplyRow_Any_MSA;
2600 if (IS_ALIGNED(width, 4)) {
2601 ARGBMultiplyRow = ARGBMultiplyRow_MSA;
2602 }
2603 }
2604 #endif
2605
2606 // Multiply plane
2607 for (y = 0; y < height; ++y) {
2608 ARGBMultiplyRow(src_argb0, src_argb1, dst_argb, width);
2609 src_argb0 += src_stride_argb0;
2610 src_argb1 += src_stride_argb1;
2611 dst_argb += dst_stride_argb;
2612 }
2613 return 0;
2614 }
2615
2616 // Add 2 ARGB images and store to destination.
2617 LIBYUV_API
ARGBAdd(const uint8_t * src_argb0,int src_stride_argb0,const uint8_t * src_argb1,int src_stride_argb1,uint8_t * dst_argb,int dst_stride_argb,int width,int height)2618 int ARGBAdd(const uint8_t* src_argb0,
2619 int src_stride_argb0,
2620 const uint8_t* src_argb1,
2621 int src_stride_argb1,
2622 uint8_t* dst_argb,
2623 int dst_stride_argb,
2624 int width,
2625 int height) {
2626 int y;
2627 void (*ARGBAddRow)(const uint8_t* src0, const uint8_t* src1, uint8_t* dst,
2628 int width) = ARGBAddRow_C;
2629 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
2630 return -1;
2631 }
2632 // Negative height means invert the image.
2633 if (height < 0) {
2634 height = -height;
2635 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
2636 dst_stride_argb = -dst_stride_argb;
2637 }
2638 // Coalesce rows.
2639 if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
2640 dst_stride_argb == width * 4) {
2641 width *= height;
2642 height = 1;
2643 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
2644 }
2645 #if defined(HAS_ARGBADDROW_SSE2)
2646 if (TestCpuFlag(kCpuHasSSE2)) {
2647 ARGBAddRow = ARGBAddRow_SSE2;
2648 }
2649 #endif
2650 #if defined(HAS_ARGBADDROW_SSE2)
2651 if (TestCpuFlag(kCpuHasSSE2)) {
2652 ARGBAddRow = ARGBAddRow_Any_SSE2;
2653 if (IS_ALIGNED(width, 4)) {
2654 ARGBAddRow = ARGBAddRow_SSE2;
2655 }
2656 }
2657 #endif
2658 #if defined(HAS_ARGBADDROW_AVX2)
2659 if (TestCpuFlag(kCpuHasAVX2)) {
2660 ARGBAddRow = ARGBAddRow_Any_AVX2;
2661 if (IS_ALIGNED(width, 8)) {
2662 ARGBAddRow = ARGBAddRow_AVX2;
2663 }
2664 }
2665 #endif
2666 #if defined(HAS_ARGBADDROW_NEON)
2667 if (TestCpuFlag(kCpuHasNEON)) {
2668 ARGBAddRow = ARGBAddRow_Any_NEON;
2669 if (IS_ALIGNED(width, 8)) {
2670 ARGBAddRow = ARGBAddRow_NEON;
2671 }
2672 }
2673 #endif
2674 #if defined(HAS_ARGBADDROW_MMI)
2675 if (TestCpuFlag(kCpuHasMMI)) {
2676 ARGBAddRow = ARGBAddRow_Any_MMI;
2677 if (IS_ALIGNED(width, 2)) {
2678 ARGBAddRow = ARGBAddRow_MMI;
2679 }
2680 }
2681 #endif
2682 #if defined(HAS_ARGBADDROW_MSA)
2683 if (TestCpuFlag(kCpuHasMSA)) {
2684 ARGBAddRow = ARGBAddRow_Any_MSA;
2685 if (IS_ALIGNED(width, 8)) {
2686 ARGBAddRow = ARGBAddRow_MSA;
2687 }
2688 }
2689 #endif
2690
2691 // Add plane
2692 for (y = 0; y < height; ++y) {
2693 ARGBAddRow(src_argb0, src_argb1, dst_argb, width);
2694 src_argb0 += src_stride_argb0;
2695 src_argb1 += src_stride_argb1;
2696 dst_argb += dst_stride_argb;
2697 }
2698 return 0;
2699 }
2700
2701 // Subtract 2 ARGB images and store to destination.
2702 LIBYUV_API
ARGBSubtract(const uint8_t * src_argb0,int src_stride_argb0,const uint8_t * src_argb1,int src_stride_argb1,uint8_t * dst_argb,int dst_stride_argb,int width,int height)2703 int ARGBSubtract(const uint8_t* src_argb0,
2704 int src_stride_argb0,
2705 const uint8_t* src_argb1,
2706 int src_stride_argb1,
2707 uint8_t* dst_argb,
2708 int dst_stride_argb,
2709 int width,
2710 int height) {
2711 int y;
2712 void (*ARGBSubtractRow)(const uint8_t* src0, const uint8_t* src1,
2713 uint8_t* dst, int width) = ARGBSubtractRow_C;
2714 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
2715 return -1;
2716 }
2717 // Negative height means invert the image.
2718 if (height < 0) {
2719 height = -height;
2720 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
2721 dst_stride_argb = -dst_stride_argb;
2722 }
2723 // Coalesce rows.
2724 if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
2725 dst_stride_argb == width * 4) {
2726 width *= height;
2727 height = 1;
2728 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
2729 }
2730 #if defined(HAS_ARGBSUBTRACTROW_SSE2)
2731 if (TestCpuFlag(kCpuHasSSE2)) {
2732 ARGBSubtractRow = ARGBSubtractRow_Any_SSE2;
2733 if (IS_ALIGNED(width, 4)) {
2734 ARGBSubtractRow = ARGBSubtractRow_SSE2;
2735 }
2736 }
2737 #endif
2738 #if defined(HAS_ARGBSUBTRACTROW_AVX2)
2739 if (TestCpuFlag(kCpuHasAVX2)) {
2740 ARGBSubtractRow = ARGBSubtractRow_Any_AVX2;
2741 if (IS_ALIGNED(width, 8)) {
2742 ARGBSubtractRow = ARGBSubtractRow_AVX2;
2743 }
2744 }
2745 #endif
2746 #if defined(HAS_ARGBSUBTRACTROW_NEON)
2747 if (TestCpuFlag(kCpuHasNEON)) {
2748 ARGBSubtractRow = ARGBSubtractRow_Any_NEON;
2749 if (IS_ALIGNED(width, 8)) {
2750 ARGBSubtractRow = ARGBSubtractRow_NEON;
2751 }
2752 }
2753 #endif
2754 #if defined(HAS_ARGBSUBTRACTROW_MMI)
2755 if (TestCpuFlag(kCpuHasMMI)) {
2756 ARGBSubtractRow = ARGBSubtractRow_Any_MMI;
2757 if (IS_ALIGNED(width, 2)) {
2758 ARGBSubtractRow = ARGBSubtractRow_MMI;
2759 }
2760 }
2761 #endif
2762 #if defined(HAS_ARGBSUBTRACTROW_MSA)
2763 if (TestCpuFlag(kCpuHasMSA)) {
2764 ARGBSubtractRow = ARGBSubtractRow_Any_MSA;
2765 if (IS_ALIGNED(width, 8)) {
2766 ARGBSubtractRow = ARGBSubtractRow_MSA;
2767 }
2768 }
2769 #endif
2770
2771 // Subtract plane
2772 for (y = 0; y < height; ++y) {
2773 ARGBSubtractRow(src_argb0, src_argb1, dst_argb, width);
2774 src_argb0 += src_stride_argb0;
2775 src_argb1 += src_stride_argb1;
2776 dst_argb += dst_stride_argb;
2777 }
2778 return 0;
2779 }
2780
2781 // Convert RAW to RGB24.
2782 LIBYUV_API
RAWToRGB24(const uint8_t * src_raw,int src_stride_raw,uint8_t * dst_rgb24,int dst_stride_rgb24,int width,int height)2783 int RAWToRGB24(const uint8_t* src_raw,
2784 int src_stride_raw,
2785 uint8_t* dst_rgb24,
2786 int dst_stride_rgb24,
2787 int width,
2788 int height) {
2789 int y;
2790 void (*RAWToRGB24Row)(const uint8_t* src_rgb, uint8_t* dst_rgb24, int width) =
2791 RAWToRGB24Row_C;
2792 if (!src_raw || !dst_rgb24 || width <= 0 || height == 0) {
2793 return -1;
2794 }
2795 // Negative height means invert the image.
2796 if (height < 0) {
2797 height = -height;
2798 src_raw = src_raw + (height - 1) * src_stride_raw;
2799 src_stride_raw = -src_stride_raw;
2800 }
2801 // Coalesce rows.
2802 if (src_stride_raw == width * 3 && dst_stride_rgb24 == width * 3) {
2803 width *= height;
2804 height = 1;
2805 src_stride_raw = dst_stride_rgb24 = 0;
2806 }
2807 #if defined(HAS_RAWTORGB24ROW_SSSE3)
2808 if (TestCpuFlag(kCpuHasSSSE3)) {
2809 RAWToRGB24Row = RAWToRGB24Row_Any_SSSE3;
2810 if (IS_ALIGNED(width, 8)) {
2811 RAWToRGB24Row = RAWToRGB24Row_SSSE3;
2812 }
2813 }
2814 #endif
2815 #if defined(HAS_RAWTORGB24ROW_NEON)
2816 if (TestCpuFlag(kCpuHasNEON)) {
2817 RAWToRGB24Row = RAWToRGB24Row_Any_NEON;
2818 if (IS_ALIGNED(width, 8)) {
2819 RAWToRGB24Row = RAWToRGB24Row_NEON;
2820 }
2821 }
2822 #endif
2823 #if defined(HAS_RAWTORGB24ROW_MMI)
2824 if (TestCpuFlag(kCpuHasMMI)) {
2825 RAWToRGB24Row = RAWToRGB24Row_Any_MMI;
2826 if (IS_ALIGNED(width, 4)) {
2827 RAWToRGB24Row = RAWToRGB24Row_MMI;
2828 }
2829 }
2830 #endif
2831 #if defined(HAS_RAWTORGB24ROW_MSA)
2832 if (TestCpuFlag(kCpuHasMSA)) {
2833 RAWToRGB24Row = RAWToRGB24Row_Any_MSA;
2834 if (IS_ALIGNED(width, 16)) {
2835 RAWToRGB24Row = RAWToRGB24Row_MSA;
2836 }
2837 }
2838 #endif
2839
2840 for (y = 0; y < height; ++y) {
2841 RAWToRGB24Row(src_raw, dst_rgb24, width);
2842 src_raw += src_stride_raw;
2843 dst_rgb24 += dst_stride_rgb24;
2844 }
2845 return 0;
2846 }
2847
2848 LIBYUV_API
SetPlane(uint8_t * dst_y,int dst_stride_y,int width,int height,uint32_t value)2849 void SetPlane(uint8_t* dst_y,
2850 int dst_stride_y,
2851 int width,
2852 int height,
2853 uint32_t value) {
2854 int y;
2855 void (*SetRow)(uint8_t * dst, uint8_t value, int width) = SetRow_C;
2856 if (height < 0) {
2857 height = -height;
2858 dst_y = dst_y + (height - 1) * dst_stride_y;
2859 dst_stride_y = -dst_stride_y;
2860 }
2861 // Coalesce rows.
2862 if (dst_stride_y == width) {
2863 width *= height;
2864 height = 1;
2865 dst_stride_y = 0;
2866 }
2867 #if defined(HAS_SETROW_NEON)
2868 if (TestCpuFlag(kCpuHasNEON)) {
2869 SetRow = SetRow_Any_NEON;
2870 if (IS_ALIGNED(width, 16)) {
2871 SetRow = SetRow_NEON;
2872 }
2873 }
2874 #endif
2875 #if defined(HAS_SETROW_X86)
2876 if (TestCpuFlag(kCpuHasX86)) {
2877 SetRow = SetRow_Any_X86;
2878 if (IS_ALIGNED(width, 4)) {
2879 SetRow = SetRow_X86;
2880 }
2881 }
2882 #endif
2883 #if defined(HAS_SETROW_ERMS)
2884 if (TestCpuFlag(kCpuHasERMS)) {
2885 SetRow = SetRow_ERMS;
2886 }
2887 #endif
2888 #if defined(HAS_SETROW_MSA)
2889 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 16)) {
2890 SetRow = SetRow_MSA;
2891 }
2892 #endif
2893
2894 // Set plane
2895 for (y = 0; y < height; ++y) {
2896 SetRow(dst_y, value, width);
2897 dst_y += dst_stride_y;
2898 }
2899 }
2900
2901 // Draw a rectangle into I420
2902 LIBYUV_API
I420Rect(uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int x,int y,int width,int height,int value_y,int value_u,int value_v)2903 int I420Rect(uint8_t* dst_y,
2904 int dst_stride_y,
2905 uint8_t* dst_u,
2906 int dst_stride_u,
2907 uint8_t* dst_v,
2908 int dst_stride_v,
2909 int x,
2910 int y,
2911 int width,
2912 int height,
2913 int value_y,
2914 int value_u,
2915 int value_v) {
2916 int halfwidth = (width + 1) >> 1;
2917 int halfheight = (height + 1) >> 1;
2918 uint8_t* start_y = dst_y + y * dst_stride_y + x;
2919 uint8_t* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2);
2920 uint8_t* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2);
2921 if (!dst_y || !dst_u || !dst_v || width <= 0 || height == 0 || x < 0 ||
2922 y < 0 || value_y < 0 || value_y > 255 || value_u < 0 || value_u > 255 ||
2923 value_v < 0 || value_v > 255) {
2924 return -1;
2925 }
2926
2927 SetPlane(start_y, dst_stride_y, width, height, value_y);
2928 SetPlane(start_u, dst_stride_u, halfwidth, halfheight, value_u);
2929 SetPlane(start_v, dst_stride_v, halfwidth, halfheight, value_v);
2930 return 0;
2931 }
2932
2933 // Draw a rectangle into ARGB
2934 LIBYUV_API
ARGBRect(uint8_t * dst_argb,int dst_stride_argb,int dst_x,int dst_y,int width,int height,uint32_t value)2935 int ARGBRect(uint8_t* dst_argb,
2936 int dst_stride_argb,
2937 int dst_x,
2938 int dst_y,
2939 int width,
2940 int height,
2941 uint32_t value) {
2942 int y;
2943 void (*ARGBSetRow)(uint8_t * dst_argb, uint32_t value, int width) =
2944 ARGBSetRow_C;
2945 if (!dst_argb || width <= 0 || height == 0 || dst_x < 0 || dst_y < 0) {
2946 return -1;
2947 }
2948 if (height < 0) {
2949 height = -height;
2950 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
2951 dst_stride_argb = -dst_stride_argb;
2952 }
2953 dst_argb += dst_y * dst_stride_argb + dst_x * 4;
2954 // Coalesce rows.
2955 if (dst_stride_argb == width * 4) {
2956 width *= height;
2957 height = 1;
2958 dst_stride_argb = 0;
2959 }
2960
2961 #if defined(HAS_ARGBSETROW_NEON)
2962 if (TestCpuFlag(kCpuHasNEON)) {
2963 ARGBSetRow = ARGBSetRow_Any_NEON;
2964 if (IS_ALIGNED(width, 4)) {
2965 ARGBSetRow = ARGBSetRow_NEON;
2966 }
2967 }
2968 #endif
2969 #if defined(HAS_ARGBSETROW_X86)
2970 if (TestCpuFlag(kCpuHasX86)) {
2971 ARGBSetRow = ARGBSetRow_X86;
2972 }
2973 #endif
2974 #if defined(HAS_ARGBSETROW_MMI)
2975 if (TestCpuFlag(kCpuHasMMI)) {
2976 ARGBSetRow = ARGBSetRow_Any_MMI;
2977 if (IS_ALIGNED(width, 4)) {
2978 ARGBSetRow = ARGBSetRow_MMI;
2979 }
2980 }
2981 #endif
2982 #if defined(HAS_ARGBSETROW_MSA)
2983 if (TestCpuFlag(kCpuHasMSA)) {
2984 ARGBSetRow = ARGBSetRow_Any_MSA;
2985 if (IS_ALIGNED(width, 4)) {
2986 ARGBSetRow = ARGBSetRow_MSA;
2987 }
2988 }
2989 #endif
2990
2991 // Set plane
2992 for (y = 0; y < height; ++y) {
2993 ARGBSetRow(dst_argb, value, width);
2994 dst_argb += dst_stride_argb;
2995 }
2996 return 0;
2997 }
2998
2999 // Convert unattentuated ARGB to preattenuated ARGB.
3000 // An unattenutated ARGB alpha blend uses the formula
3001 // p = a * f + (1 - a) * b
3002 // where
3003 // p is output pixel
3004 // f is foreground pixel
3005 // b is background pixel
3006 // a is alpha value from foreground pixel
3007 // An preattenutated ARGB alpha blend uses the formula
3008 // p = f + (1 - a) * b
3009 // where
3010 // f is foreground pixel premultiplied by alpha
3011
3012 LIBYUV_API
ARGBAttenuate(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)3013 int ARGBAttenuate(const uint8_t* src_argb,
3014 int src_stride_argb,
3015 uint8_t* dst_argb,
3016 int dst_stride_argb,
3017 int width,
3018 int height) {
3019 int y;
3020 void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
3021 int width) = ARGBAttenuateRow_C;
3022 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
3023 return -1;
3024 }
3025 if (height < 0) {
3026 height = -height;
3027 src_argb = src_argb + (height - 1) * src_stride_argb;
3028 src_stride_argb = -src_stride_argb;
3029 }
3030 // Coalesce rows.
3031 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
3032 width *= height;
3033 height = 1;
3034 src_stride_argb = dst_stride_argb = 0;
3035 }
3036 #if defined(HAS_ARGBATTENUATEROW_SSSE3)
3037 if (TestCpuFlag(kCpuHasSSSE3)) {
3038 ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
3039 if (IS_ALIGNED(width, 4)) {
3040 ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
3041 }
3042 }
3043 #endif
3044 #if defined(HAS_ARGBATTENUATEROW_AVX2)
3045 if (TestCpuFlag(kCpuHasAVX2)) {
3046 ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
3047 if (IS_ALIGNED(width, 8)) {
3048 ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
3049 }
3050 }
3051 #endif
3052 #if defined(HAS_ARGBATTENUATEROW_NEON)
3053 if (TestCpuFlag(kCpuHasNEON)) {
3054 ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
3055 if (IS_ALIGNED(width, 8)) {
3056 ARGBAttenuateRow = ARGBAttenuateRow_NEON;
3057 }
3058 }
3059 #endif
3060 #if defined(HAS_ARGBATTENUATEROW_MMI)
3061 if (TestCpuFlag(kCpuHasMMI)) {
3062 ARGBAttenuateRow = ARGBAttenuateRow_Any_MMI;
3063 if (IS_ALIGNED(width, 2)) {
3064 ARGBAttenuateRow = ARGBAttenuateRow_MMI;
3065 }
3066 }
3067 #endif
3068 #if defined(HAS_ARGBATTENUATEROW_MSA)
3069 if (TestCpuFlag(kCpuHasMSA)) {
3070 ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA;
3071 if (IS_ALIGNED(width, 8)) {
3072 ARGBAttenuateRow = ARGBAttenuateRow_MSA;
3073 }
3074 }
3075 #endif
3076
3077 for (y = 0; y < height; ++y) {
3078 ARGBAttenuateRow(src_argb, dst_argb, width);
3079 src_argb += src_stride_argb;
3080 dst_argb += dst_stride_argb;
3081 }
3082 return 0;
3083 }
3084
3085 // Convert preattentuated ARGB to unattenuated ARGB.
3086 LIBYUV_API
ARGBUnattenuate(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)3087 int ARGBUnattenuate(const uint8_t* src_argb,
3088 int src_stride_argb,
3089 uint8_t* dst_argb,
3090 int dst_stride_argb,
3091 int width,
3092 int height) {
3093 int y;
3094 void (*ARGBUnattenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
3095 int width) = ARGBUnattenuateRow_C;
3096 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
3097 return -1;
3098 }
3099 if (height < 0) {
3100 height = -height;
3101 src_argb = src_argb + (height - 1) * src_stride_argb;
3102 src_stride_argb = -src_stride_argb;
3103 }
3104 // Coalesce rows.
3105 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
3106 width *= height;
3107 height = 1;
3108 src_stride_argb = dst_stride_argb = 0;
3109 }
3110 #if defined(HAS_ARGBUNATTENUATEROW_SSE2)
3111 if (TestCpuFlag(kCpuHasSSE2)) {
3112 ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2;
3113 if (IS_ALIGNED(width, 4)) {
3114 ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2;
3115 }
3116 }
3117 #endif
3118 #if defined(HAS_ARGBUNATTENUATEROW_AVX2)
3119 if (TestCpuFlag(kCpuHasAVX2)) {
3120 ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2;
3121 if (IS_ALIGNED(width, 8)) {
3122 ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2;
3123 }
3124 }
3125 #endif
3126 // TODO(fbarchard): Neon version.
3127
3128 for (y = 0; y < height; ++y) {
3129 ARGBUnattenuateRow(src_argb, dst_argb, width);
3130 src_argb += src_stride_argb;
3131 dst_argb += dst_stride_argb;
3132 }
3133 return 0;
3134 }
3135
3136 // Convert ARGB to Grayed ARGB.
3137 LIBYUV_API
ARGBGrayTo(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)3138 int ARGBGrayTo(const uint8_t* src_argb,
3139 int src_stride_argb,
3140 uint8_t* dst_argb,
3141 int dst_stride_argb,
3142 int width,
3143 int height) {
3144 int y;
3145 void (*ARGBGrayRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) =
3146 ARGBGrayRow_C;
3147 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
3148 return -1;
3149 }
3150 if (height < 0) {
3151 height = -height;
3152 src_argb = src_argb + (height - 1) * src_stride_argb;
3153 src_stride_argb = -src_stride_argb;
3154 }
3155 // Coalesce rows.
3156 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
3157 width *= height;
3158 height = 1;
3159 src_stride_argb = dst_stride_argb = 0;
3160 }
3161 #if defined(HAS_ARGBGRAYROW_SSSE3)
3162 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
3163 ARGBGrayRow = ARGBGrayRow_SSSE3;
3164 }
3165 #endif
3166 #if defined(HAS_ARGBGRAYROW_NEON)
3167 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
3168 ARGBGrayRow = ARGBGrayRow_NEON;
3169 }
3170 #endif
3171 #if defined(HAS_ARGBGRAYROW_MMI)
3172 if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(width, 2)) {
3173 ARGBGrayRow = ARGBGrayRow_MMI;
3174 }
3175 #endif
3176 #if defined(HAS_ARGBGRAYROW_MSA)
3177 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
3178 ARGBGrayRow = ARGBGrayRow_MSA;
3179 }
3180 #endif
3181
3182 for (y = 0; y < height; ++y) {
3183 ARGBGrayRow(src_argb, dst_argb, width);
3184 src_argb += src_stride_argb;
3185 dst_argb += dst_stride_argb;
3186 }
3187 return 0;
3188 }
3189
3190 // Make a rectangle of ARGB gray scale.
3191 LIBYUV_API
ARGBGray(uint8_t * dst_argb,int dst_stride_argb,int dst_x,int dst_y,int width,int height)3192 int ARGBGray(uint8_t* dst_argb,
3193 int dst_stride_argb,
3194 int dst_x,
3195 int dst_y,
3196 int width,
3197 int height) {
3198 int y;
3199 void (*ARGBGrayRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) =
3200 ARGBGrayRow_C;
3201 uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
3202 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
3203 return -1;
3204 }
3205 // Coalesce rows.
3206 if (dst_stride_argb == width * 4) {
3207 width *= height;
3208 height = 1;
3209 dst_stride_argb = 0;
3210 }
3211 #if defined(HAS_ARGBGRAYROW_SSSE3)
3212 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
3213 ARGBGrayRow = ARGBGrayRow_SSSE3;
3214 }
3215 #endif
3216 #if defined(HAS_ARGBGRAYROW_NEON)
3217 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
3218 ARGBGrayRow = ARGBGrayRow_NEON;
3219 }
3220 #endif
3221 #if defined(HAS_ARGBGRAYROW_MMI)
3222 if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(width, 2)) {
3223 ARGBGrayRow = ARGBGrayRow_MMI;
3224 }
3225 #endif
3226 #if defined(HAS_ARGBGRAYROW_MSA)
3227 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
3228 ARGBGrayRow = ARGBGrayRow_MSA;
3229 }
3230 #endif
3231
3232 for (y = 0; y < height; ++y) {
3233 ARGBGrayRow(dst, dst, width);
3234 dst += dst_stride_argb;
3235 }
3236 return 0;
3237 }
3238
3239 // Make a rectangle of ARGB Sepia tone.
3240 LIBYUV_API
ARGBSepia(uint8_t * dst_argb,int dst_stride_argb,int dst_x,int dst_y,int width,int height)3241 int ARGBSepia(uint8_t* dst_argb,
3242 int dst_stride_argb,
3243 int dst_x,
3244 int dst_y,
3245 int width,
3246 int height) {
3247 int y;
3248 void (*ARGBSepiaRow)(uint8_t * dst_argb, int width) = ARGBSepiaRow_C;
3249 uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
3250 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
3251 return -1;
3252 }
3253 // Coalesce rows.
3254 if (dst_stride_argb == width * 4) {
3255 width *= height;
3256 height = 1;
3257 dst_stride_argb = 0;
3258 }
3259 #if defined(HAS_ARGBSEPIAROW_SSSE3)
3260 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
3261 ARGBSepiaRow = ARGBSepiaRow_SSSE3;
3262 }
3263 #endif
3264 #if defined(HAS_ARGBSEPIAROW_NEON)
3265 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
3266 ARGBSepiaRow = ARGBSepiaRow_NEON;
3267 }
3268 #endif
3269 #if defined(HAS_ARGBSEPIAROW_MMI)
3270 if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(width, 2)) {
3271 ARGBSepiaRow = ARGBSepiaRow_MMI;
3272 }
3273 #endif
3274 #if defined(HAS_ARGBSEPIAROW_MSA)
3275 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
3276 ARGBSepiaRow = ARGBSepiaRow_MSA;
3277 }
3278 #endif
3279
3280 for (y = 0; y < height; ++y) {
3281 ARGBSepiaRow(dst, width);
3282 dst += dst_stride_argb;
3283 }
3284 return 0;
3285 }
3286
3287 // Apply a 4x4 matrix to each ARGB pixel.
3288 // Note: Normally for shading, but can be used to swizzle or invert.
3289 LIBYUV_API
ARGBColorMatrix(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,const int8_t * matrix_argb,int width,int height)3290 int ARGBColorMatrix(const uint8_t* src_argb,
3291 int src_stride_argb,
3292 uint8_t* dst_argb,
3293 int dst_stride_argb,
3294 const int8_t* matrix_argb,
3295 int width,
3296 int height) {
3297 int y;
3298 void (*ARGBColorMatrixRow)(const uint8_t* src_argb, uint8_t* dst_argb,
3299 const int8_t* matrix_argb, int width) =
3300 ARGBColorMatrixRow_C;
3301 if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) {
3302 return -1;
3303 }
3304 if (height < 0) {
3305 height = -height;
3306 src_argb = src_argb + (height - 1) * src_stride_argb;
3307 src_stride_argb = -src_stride_argb;
3308 }
3309 // Coalesce rows.
3310 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
3311 width *= height;
3312 height = 1;
3313 src_stride_argb = dst_stride_argb = 0;
3314 }
3315 #if defined(HAS_ARGBCOLORMATRIXROW_SSSE3)
3316 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
3317 ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3;
3318 }
3319 #endif
3320 #if defined(HAS_ARGBCOLORMATRIXROW_NEON)
3321 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
3322 ARGBColorMatrixRow = ARGBColorMatrixRow_NEON;
3323 }
3324 #endif
3325 #if defined(HAS_ARGBCOLORMATRIXROW_MMI)
3326 if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(width, 2)) {
3327 ARGBColorMatrixRow = ARGBColorMatrixRow_MMI;
3328 }
3329 #endif
3330 #if defined(HAS_ARGBCOLORMATRIXROW_MSA)
3331 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
3332 ARGBColorMatrixRow = ARGBColorMatrixRow_MSA;
3333 }
3334 #endif
3335 for (y = 0; y < height; ++y) {
3336 ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width);
3337 src_argb += src_stride_argb;
3338 dst_argb += dst_stride_argb;
3339 }
3340 return 0;
3341 }
3342
3343 // Apply a 4x3 matrix to each ARGB pixel.
3344 // Deprecated.
3345 LIBYUV_API
RGBColorMatrix(uint8_t * dst_argb,int dst_stride_argb,const int8_t * matrix_rgb,int dst_x,int dst_y,int width,int height)3346 int RGBColorMatrix(uint8_t* dst_argb,
3347 int dst_stride_argb,
3348 const int8_t* matrix_rgb,
3349 int dst_x,
3350 int dst_y,
3351 int width,
3352 int height) {
3353 SIMD_ALIGNED(int8_t matrix_argb[16]);
3354 uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
3355 if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 || dst_x < 0 ||
3356 dst_y < 0) {
3357 return -1;
3358 }
3359
3360 // Convert 4x3 7 bit matrix to 4x4 6 bit matrix.
3361 matrix_argb[0] = matrix_rgb[0] / 2;
3362 matrix_argb[1] = matrix_rgb[1] / 2;
3363 matrix_argb[2] = matrix_rgb[2] / 2;
3364 matrix_argb[3] = matrix_rgb[3] / 2;
3365 matrix_argb[4] = matrix_rgb[4] / 2;
3366 matrix_argb[5] = matrix_rgb[5] / 2;
3367 matrix_argb[6] = matrix_rgb[6] / 2;
3368 matrix_argb[7] = matrix_rgb[7] / 2;
3369 matrix_argb[8] = matrix_rgb[8] / 2;
3370 matrix_argb[9] = matrix_rgb[9] / 2;
3371 matrix_argb[10] = matrix_rgb[10] / 2;
3372 matrix_argb[11] = matrix_rgb[11] / 2;
3373 matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0;
3374 matrix_argb[15] = 64; // 1.0
3375
3376 return ARGBColorMatrix((const uint8_t*)(dst), dst_stride_argb, dst,
3377 dst_stride_argb, &matrix_argb[0], width, height);
3378 }
3379
3380 // Apply a color table each ARGB pixel.
3381 // Table contains 256 ARGB values.
3382 LIBYUV_API
ARGBColorTable(uint8_t * dst_argb,int dst_stride_argb,const uint8_t * table_argb,int dst_x,int dst_y,int width,int height)3383 int ARGBColorTable(uint8_t* dst_argb,
3384 int dst_stride_argb,
3385 const uint8_t* table_argb,
3386 int dst_x,
3387 int dst_y,
3388 int width,
3389 int height) {
3390 int y;
3391 void (*ARGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb,
3392 int width) = ARGBColorTableRow_C;
3393 uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
3394 if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 ||
3395 dst_y < 0) {
3396 return -1;
3397 }
3398 // Coalesce rows.
3399 if (dst_stride_argb == width * 4) {
3400 width *= height;
3401 height = 1;
3402 dst_stride_argb = 0;
3403 }
3404 #if defined(HAS_ARGBCOLORTABLEROW_X86)
3405 if (TestCpuFlag(kCpuHasX86)) {
3406 ARGBColorTableRow = ARGBColorTableRow_X86;
3407 }
3408 #endif
3409 for (y = 0; y < height; ++y) {
3410 ARGBColorTableRow(dst, table_argb, width);
3411 dst += dst_stride_argb;
3412 }
3413 return 0;
3414 }
3415
3416 // Apply a color table each ARGB pixel but preserve destination alpha.
3417 // Table contains 256 ARGB values.
3418 LIBYUV_API
RGBColorTable(uint8_t * dst_argb,int dst_stride_argb,const uint8_t * table_argb,int dst_x,int dst_y,int width,int height)3419 int RGBColorTable(uint8_t* dst_argb,
3420 int dst_stride_argb,
3421 const uint8_t* table_argb,
3422 int dst_x,
3423 int dst_y,
3424 int width,
3425 int height) {
3426 int y;
3427 void (*RGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb,
3428 int width) = RGBColorTableRow_C;
3429 uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
3430 if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 ||
3431 dst_y < 0) {
3432 return -1;
3433 }
3434 // Coalesce rows.
3435 if (dst_stride_argb == width * 4) {
3436 width *= height;
3437 height = 1;
3438 dst_stride_argb = 0;
3439 }
3440 #if defined(HAS_RGBCOLORTABLEROW_X86)
3441 if (TestCpuFlag(kCpuHasX86)) {
3442 RGBColorTableRow = RGBColorTableRow_X86;
3443 }
3444 #endif
3445 for (y = 0; y < height; ++y) {
3446 RGBColorTableRow(dst, table_argb, width);
3447 dst += dst_stride_argb;
3448 }
3449 return 0;
3450 }
3451
3452 // ARGBQuantize is used to posterize art.
3453 // e.g. rgb / qvalue * qvalue + qvalue / 2
3454 // But the low levels implement efficiently with 3 parameters, and could be
3455 // used for other high level operations.
3456 // dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
3457 // where scale is 1 / interval_size as a fixed point value.
3458 // The divide is replaces with a multiply by reciprocal fixed point multiply.
3459 // Caveat - although SSE2 saturates, the C function does not and should be used
3460 // with care if doing anything but quantization.
3461 LIBYUV_API
ARGBQuantize(uint8_t * dst_argb,int dst_stride_argb,int scale,int interval_size,int interval_offset,int dst_x,int dst_y,int width,int height)3462 int ARGBQuantize(uint8_t* dst_argb,
3463 int dst_stride_argb,
3464 int scale,
3465 int interval_size,
3466 int interval_offset,
3467 int dst_x,
3468 int dst_y,
3469 int width,
3470 int height) {
3471 int y;
3472 void (*ARGBQuantizeRow)(uint8_t * dst_argb, int scale, int interval_size,
3473 int interval_offset, int width) = ARGBQuantizeRow_C;
3474 uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
3475 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
3476 interval_size < 1 || interval_size > 255) {
3477 return -1;
3478 }
3479 // Coalesce rows.
3480 if (dst_stride_argb == width * 4) {
3481 width *= height;
3482 height = 1;
3483 dst_stride_argb = 0;
3484 }
3485 #if defined(HAS_ARGBQUANTIZEROW_SSE2)
3486 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
3487 ARGBQuantizeRow = ARGBQuantizeRow_SSE2;
3488 }
3489 #endif
3490 #if defined(HAS_ARGBQUANTIZEROW_NEON)
3491 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
3492 ARGBQuantizeRow = ARGBQuantizeRow_NEON;
3493 }
3494 #endif
3495 #if defined(HAS_ARGBQUANTIZEROW_MSA)
3496 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
3497 ARGBQuantizeRow = ARGBQuantizeRow_MSA;
3498 }
3499 #endif
3500 for (y = 0; y < height; ++y) {
3501 ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width);
3502 dst += dst_stride_argb;
3503 }
3504 return 0;
3505 }
3506
3507 // Computes table of cumulative sum for image where the value is the sum
3508 // of all values above and to the left of the entry. Used by ARGBBlur.
3509 LIBYUV_API
ARGBComputeCumulativeSum(const uint8_t * src_argb,int src_stride_argb,int32_t * dst_cumsum,int dst_stride32_cumsum,int width,int height)3510 int ARGBComputeCumulativeSum(const uint8_t* src_argb,
3511 int src_stride_argb,
3512 int32_t* dst_cumsum,
3513 int dst_stride32_cumsum,
3514 int width,
3515 int height) {
3516 int y;
3517 void (*ComputeCumulativeSumRow)(const uint8_t* row, int32_t* cumsum,
3518 const int32_t* previous_cumsum, int width) =
3519 ComputeCumulativeSumRow_C;
3520 int32_t* previous_cumsum = dst_cumsum;
3521 if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) {
3522 return -1;
3523 }
3524 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
3525 if (TestCpuFlag(kCpuHasSSE2)) {
3526 ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
3527 }
3528 #endif
3529 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_MMI)
3530 if (TestCpuFlag(kCpuHasMMI)) {
3531 ComputeCumulativeSumRow = ComputeCumulativeSumRow_MMI;
3532 }
3533 #endif
3534
3535 memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4); // 4 int per pixel.
3536 for (y = 0; y < height; ++y) {
3537 ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width);
3538 previous_cumsum = dst_cumsum;
3539 dst_cumsum += dst_stride32_cumsum;
3540 src_argb += src_stride_argb;
3541 }
3542 return 0;
3543 }
3544
3545 // Blur ARGB image.
3546 // Caller should allocate CumulativeSum table of width * height * 16 bytes
3547 // aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory
3548 // as the buffer is treated as circular.
3549 LIBYUV_API
ARGBBlur(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int32_t * dst_cumsum,int dst_stride32_cumsum,int width,int height,int radius)3550 int ARGBBlur(const uint8_t* src_argb,
3551 int src_stride_argb,
3552 uint8_t* dst_argb,
3553 int dst_stride_argb,
3554 int32_t* dst_cumsum,
3555 int dst_stride32_cumsum,
3556 int width,
3557 int height,
3558 int radius) {
3559 int y;
3560 void (*ComputeCumulativeSumRow)(const uint8_t* row, int32_t* cumsum,
3561 const int32_t* previous_cumsum, int width) =
3562 ComputeCumulativeSumRow_C;
3563 void (*CumulativeSumToAverageRow)(
3564 const int32_t* topleft, const int32_t* botleft, int width, int area,
3565 uint8_t* dst, int count) = CumulativeSumToAverageRow_C;
3566 int32_t* cumsum_bot_row;
3567 int32_t* max_cumsum_bot_row;
3568 int32_t* cumsum_top_row;
3569
3570 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
3571 return -1;
3572 }
3573 if (height < 0) {
3574 height = -height;
3575 src_argb = src_argb + (height - 1) * src_stride_argb;
3576 src_stride_argb = -src_stride_argb;
3577 }
3578 if (radius > height) {
3579 radius = height;
3580 }
3581 if (radius > (width / 2 - 1)) {
3582 radius = width / 2 - 1;
3583 }
3584 if (radius <= 0) {
3585 return -1;
3586 }
3587 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
3588 if (TestCpuFlag(kCpuHasSSE2)) {
3589 ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
3590 CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2;
3591 }
3592 #endif
3593 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_MMI)
3594 if (TestCpuFlag(kCpuHasMMI)) {
3595 ComputeCumulativeSumRow = ComputeCumulativeSumRow_MMI;
3596 }
3597 #endif
3598 // Compute enough CumulativeSum for first row to be blurred. After this
3599 // one row of CumulativeSum is updated at a time.
3600 ARGBComputeCumulativeSum(src_argb, src_stride_argb, dst_cumsum,
3601 dst_stride32_cumsum, width, radius);
3602
3603 src_argb = src_argb + radius * src_stride_argb;
3604 cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum];
3605
3606 max_cumsum_bot_row = &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum];
3607 cumsum_top_row = &dst_cumsum[0];
3608
3609 for (y = 0; y < height; ++y) {
3610 int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0;
3611 int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1);
3612 int area = radius * (bot_y - top_y);
3613 int boxwidth = radius * 4;
3614 int x;
3615 int n;
3616
3617 // Increment cumsum_top_row pointer with circular buffer wrap around.
3618 if (top_y) {
3619 cumsum_top_row += dst_stride32_cumsum;
3620 if (cumsum_top_row >= max_cumsum_bot_row) {
3621 cumsum_top_row = dst_cumsum;
3622 }
3623 }
3624 // Increment cumsum_bot_row pointer with circular buffer wrap around and
3625 // then fill in a row of CumulativeSum.
3626 if ((y + radius) < height) {
3627 const int32_t* prev_cumsum_bot_row = cumsum_bot_row;
3628 cumsum_bot_row += dst_stride32_cumsum;
3629 if (cumsum_bot_row >= max_cumsum_bot_row) {
3630 cumsum_bot_row = dst_cumsum;
3631 }
3632 ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row,
3633 width);
3634 src_argb += src_stride_argb;
3635 }
3636
3637 // Left clipped.
3638 for (x = 0; x < radius + 1; ++x) {
3639 CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, boxwidth, area,
3640 &dst_argb[x * 4], 1);
3641 area += (bot_y - top_y);
3642 boxwidth += 4;
3643 }
3644
3645 // Middle unclipped.
3646 n = (width - 1) - radius - x + 1;
3647 CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, boxwidth, area,
3648 &dst_argb[x * 4], n);
3649
3650 // Right clipped.
3651 for (x += n; x <= width - 1; ++x) {
3652 area -= (bot_y - top_y);
3653 boxwidth -= 4;
3654 CumulativeSumToAverageRow(cumsum_top_row + (x - radius - 1) * 4,
3655 cumsum_bot_row + (x - radius - 1) * 4, boxwidth,
3656 area, &dst_argb[x * 4], 1);
3657 }
3658 dst_argb += dst_stride_argb;
3659 }
3660 return 0;
3661 }
3662
3663 // Multiply ARGB image by a specified ARGB value.
3664 LIBYUV_API
ARGBShade(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height,uint32_t value)3665 int ARGBShade(const uint8_t* src_argb,
3666 int src_stride_argb,
3667 uint8_t* dst_argb,
3668 int dst_stride_argb,
3669 int width,
3670 int height,
3671 uint32_t value) {
3672 int y;
3673 void (*ARGBShadeRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width,
3674 uint32_t value) = ARGBShadeRow_C;
3675 if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) {
3676 return -1;
3677 }
3678 if (height < 0) {
3679 height = -height;
3680 src_argb = src_argb + (height - 1) * src_stride_argb;
3681 src_stride_argb = -src_stride_argb;
3682 }
3683 // Coalesce rows.
3684 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
3685 width *= height;
3686 height = 1;
3687 src_stride_argb = dst_stride_argb = 0;
3688 }
3689 #if defined(HAS_ARGBSHADEROW_SSE2)
3690 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
3691 ARGBShadeRow = ARGBShadeRow_SSE2;
3692 }
3693 #endif
3694 #if defined(HAS_ARGBSHADEROW_NEON)
3695 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
3696 ARGBShadeRow = ARGBShadeRow_NEON;
3697 }
3698 #endif
3699 #if defined(HAS_ARGBSHADEROW_MMI)
3700 if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(width, 2)) {
3701 ARGBShadeRow = ARGBShadeRow_MMI;
3702 }
3703 #endif
3704 #if defined(HAS_ARGBSHADEROW_MSA)
3705 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 4)) {
3706 ARGBShadeRow = ARGBShadeRow_MSA;
3707 }
3708 #endif
3709
3710 for (y = 0; y < height; ++y) {
3711 ARGBShadeRow(src_argb, dst_argb, width, value);
3712 src_argb += src_stride_argb;
3713 dst_argb += dst_stride_argb;
3714 }
3715 return 0;
3716 }
3717
3718 // Interpolate 2 planes by specified amount (0 to 255).
3719 LIBYUV_API
InterpolatePlane(const uint8_t * src0,int src_stride0,const uint8_t * src1,int src_stride1,uint8_t * dst,int dst_stride,int width,int height,int interpolation)3720 int InterpolatePlane(const uint8_t* src0,
3721 int src_stride0,
3722 const uint8_t* src1,
3723 int src_stride1,
3724 uint8_t* dst,
3725 int dst_stride,
3726 int width,
3727 int height,
3728 int interpolation) {
3729 int y;
3730 void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
3731 ptrdiff_t src_stride, int dst_width,
3732 int source_y_fraction) = InterpolateRow_C;
3733 if (!src0 || !src1 || !dst || width <= 0 || height == 0) {
3734 return -1;
3735 }
3736 // Negative height means invert the image.
3737 if (height < 0) {
3738 height = -height;
3739 dst = dst + (height - 1) * dst_stride;
3740 dst_stride = -dst_stride;
3741 }
3742 // Coalesce rows.
3743 if (src_stride0 == width && src_stride1 == width && dst_stride == width) {
3744 width *= height;
3745 height = 1;
3746 src_stride0 = src_stride1 = dst_stride = 0;
3747 }
3748 #if defined(HAS_INTERPOLATEROW_SSSE3)
3749 if (TestCpuFlag(kCpuHasSSSE3)) {
3750 InterpolateRow = InterpolateRow_Any_SSSE3;
3751 if (IS_ALIGNED(width, 16)) {
3752 InterpolateRow = InterpolateRow_SSSE3;
3753 }
3754 }
3755 #endif
3756 #if defined(HAS_INTERPOLATEROW_AVX2)
3757 if (TestCpuFlag(kCpuHasAVX2)) {
3758 InterpolateRow = InterpolateRow_Any_AVX2;
3759 if (IS_ALIGNED(width, 32)) {
3760 InterpolateRow = InterpolateRow_AVX2;
3761 }
3762 }
3763 #endif
3764 #if defined(HAS_INTERPOLATEROW_NEON)
3765 if (TestCpuFlag(kCpuHasNEON)) {
3766 InterpolateRow = InterpolateRow_Any_NEON;
3767 if (IS_ALIGNED(width, 16)) {
3768 InterpolateRow = InterpolateRow_NEON;
3769 }
3770 }
3771 #endif
3772 #if defined(HAS_INTERPOLATEROW_MMI)
3773 if (TestCpuFlag(kCpuHasMMI)) {
3774 InterpolateRow = InterpolateRow_Any_MMI;
3775 if (IS_ALIGNED(width, 8)) {
3776 InterpolateRow = InterpolateRow_MMI;
3777 }
3778 }
3779 #endif
3780 #if defined(HAS_INTERPOLATEROW_MSA)
3781 if (TestCpuFlag(kCpuHasMSA)) {
3782 InterpolateRow = InterpolateRow_Any_MSA;
3783 if (IS_ALIGNED(width, 32)) {
3784 InterpolateRow = InterpolateRow_MSA;
3785 }
3786 }
3787 #endif
3788
3789 for (y = 0; y < height; ++y) {
3790 InterpolateRow(dst, src0, src1 - src0, width, interpolation);
3791 src0 += src_stride0;
3792 src1 += src_stride1;
3793 dst += dst_stride;
3794 }
3795 return 0;
3796 }
3797
3798 // Interpolate 2 ARGB images by specified amount (0 to 255).
3799 LIBYUV_API
ARGBInterpolate(const uint8_t * src_argb0,int src_stride_argb0,const uint8_t * src_argb1,int src_stride_argb1,uint8_t * dst_argb,int dst_stride_argb,int width,int height,int interpolation)3800 int ARGBInterpolate(const uint8_t* src_argb0,
3801 int src_stride_argb0,
3802 const uint8_t* src_argb1,
3803 int src_stride_argb1,
3804 uint8_t* dst_argb,
3805 int dst_stride_argb,
3806 int width,
3807 int height,
3808 int interpolation) {
3809 return InterpolatePlane(src_argb0, src_stride_argb0, src_argb1,
3810 src_stride_argb1, dst_argb, dst_stride_argb,
3811 width * 4, height, interpolation);
3812 }
3813
3814 // Interpolate 2 YUV images by specified amount (0 to 255).
3815 LIBYUV_API
I420Interpolate(const uint8_t * src0_y,int src0_stride_y,const uint8_t * src0_u,int src0_stride_u,const uint8_t * src0_v,int src0_stride_v,const uint8_t * src1_y,int src1_stride_y,const uint8_t * src1_u,int src1_stride_u,const uint8_t * src1_v,int src1_stride_v,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height,int interpolation)3816 int I420Interpolate(const uint8_t* src0_y,
3817 int src0_stride_y,
3818 const uint8_t* src0_u,
3819 int src0_stride_u,
3820 const uint8_t* src0_v,
3821 int src0_stride_v,
3822 const uint8_t* src1_y,
3823 int src1_stride_y,
3824 const uint8_t* src1_u,
3825 int src1_stride_u,
3826 const uint8_t* src1_v,
3827 int src1_stride_v,
3828 uint8_t* dst_y,
3829 int dst_stride_y,
3830 uint8_t* dst_u,
3831 int dst_stride_u,
3832 uint8_t* dst_v,
3833 int dst_stride_v,
3834 int width,
3835 int height,
3836 int interpolation) {
3837 int halfwidth = (width + 1) >> 1;
3838 int halfheight = (height + 1) >> 1;
3839 if (!src0_y || !src0_u || !src0_v || !src1_y || !src1_u || !src1_v ||
3840 !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
3841 return -1;
3842 }
3843 InterpolatePlane(src0_y, src0_stride_y, src1_y, src1_stride_y, dst_y,
3844 dst_stride_y, width, height, interpolation);
3845 InterpolatePlane(src0_u, src0_stride_u, src1_u, src1_stride_u, dst_u,
3846 dst_stride_u, halfwidth, halfheight, interpolation);
3847 InterpolatePlane(src0_v, src0_stride_v, src1_v, src1_stride_v, dst_v,
3848 dst_stride_v, halfwidth, halfheight, interpolation);
3849 return 0;
3850 }
3851
3852 // Shuffle ARGB channel order. e.g. BGRA to ARGB.
3853 LIBYUV_API
ARGBShuffle(const uint8_t * src_bgra,int src_stride_bgra,uint8_t * dst_argb,int dst_stride_argb,const uint8_t * shuffler,int width,int height)3854 int ARGBShuffle(const uint8_t* src_bgra,
3855 int src_stride_bgra,
3856 uint8_t* dst_argb,
3857 int dst_stride_argb,
3858 const uint8_t* shuffler,
3859 int width,
3860 int height) {
3861 int y;
3862 void (*ARGBShuffleRow)(const uint8_t* src_bgra, uint8_t* dst_argb,
3863 const uint8_t* shuffler, int width) = ARGBShuffleRow_C;
3864 if (!src_bgra || !dst_argb || width <= 0 || height == 0) {
3865 return -1;
3866 }
3867 // Negative height means invert the image.
3868 if (height < 0) {
3869 height = -height;
3870 src_bgra = src_bgra + (height - 1) * src_stride_bgra;
3871 src_stride_bgra = -src_stride_bgra;
3872 }
3873 // Coalesce rows.
3874 if (src_stride_bgra == width * 4 && dst_stride_argb == width * 4) {
3875 width *= height;
3876 height = 1;
3877 src_stride_bgra = dst_stride_argb = 0;
3878 }
3879 #if defined(HAS_ARGBSHUFFLEROW_SSSE3)
3880 if (TestCpuFlag(kCpuHasSSSE3)) {
3881 ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3;
3882 if (IS_ALIGNED(width, 8)) {
3883 ARGBShuffleRow = ARGBShuffleRow_SSSE3;
3884 }
3885 }
3886 #endif
3887 #if defined(HAS_ARGBSHUFFLEROW_AVX2)
3888 if (TestCpuFlag(kCpuHasAVX2)) {
3889 ARGBShuffleRow = ARGBShuffleRow_Any_AVX2;
3890 if (IS_ALIGNED(width, 16)) {
3891 ARGBShuffleRow = ARGBShuffleRow_AVX2;
3892 }
3893 }
3894 #endif
3895 #if defined(HAS_ARGBSHUFFLEROW_NEON)
3896 if (TestCpuFlag(kCpuHasNEON)) {
3897 ARGBShuffleRow = ARGBShuffleRow_Any_NEON;
3898 if (IS_ALIGNED(width, 4)) {
3899 ARGBShuffleRow = ARGBShuffleRow_NEON;
3900 }
3901 }
3902 #endif
3903 #if defined(HAS_ARGBSHUFFLEROW_MMI)
3904 if (TestCpuFlag(kCpuHasMMI)) {
3905 ARGBShuffleRow = ARGBShuffleRow_Any_MMI;
3906 if (IS_ALIGNED(width, 2)) {
3907 ARGBShuffleRow = ARGBShuffleRow_MMI;
3908 }
3909 }
3910 #endif
3911 #if defined(HAS_ARGBSHUFFLEROW_MSA)
3912 if (TestCpuFlag(kCpuHasMSA)) {
3913 ARGBShuffleRow = ARGBShuffleRow_Any_MSA;
3914 if (IS_ALIGNED(width, 8)) {
3915 ARGBShuffleRow = ARGBShuffleRow_MSA;
3916 }
3917 }
3918 #endif
3919
3920 for (y = 0; y < height; ++y) {
3921 ARGBShuffleRow(src_bgra, dst_argb, shuffler, width);
3922 src_bgra += src_stride_bgra;
3923 dst_argb += dst_stride_argb;
3924 }
3925 return 0;
3926 }
3927
3928 // Shuffle AR64 channel order. e.g. AR64 to AB64.
3929 LIBYUV_API
AR64Shuffle(const uint16_t * src_ar64,int src_stride_ar64,uint16_t * dst_ar64,int dst_stride_ar64,const uint8_t * shuffler,int width,int height)3930 int AR64Shuffle(const uint16_t* src_ar64,
3931 int src_stride_ar64,
3932 uint16_t* dst_ar64,
3933 int dst_stride_ar64,
3934 const uint8_t* shuffler,
3935 int width,
3936 int height) {
3937 int y;
3938 void (*AR64ShuffleRow)(const uint8_t* src_ar64, uint8_t* dst_ar64,
3939 const uint8_t* shuffler, int width) = AR64ShuffleRow_C;
3940 if (!src_ar64 || !dst_ar64 || width <= 0 || height == 0) {
3941 return -1;
3942 }
3943 // Negative height means invert the image.
3944 if (height < 0) {
3945 height = -height;
3946 src_ar64 = src_ar64 + (height - 1) * src_stride_ar64;
3947 src_stride_ar64 = -src_stride_ar64;
3948 }
3949 // Coalesce rows.
3950 if (src_stride_ar64 == width * 4 && dst_stride_ar64 == width * 4) {
3951 width *= height;
3952 height = 1;
3953 src_stride_ar64 = dst_stride_ar64 = 0;
3954 }
3955 // Assembly versions can be reused if it's implemented with shuffle.
3956 #if defined(HAS_ARGBSHUFFLEROW_SSSE3)
3957 if (TestCpuFlag(kCpuHasSSSE3)) {
3958 AR64ShuffleRow = ARGBShuffleRow_Any_SSSE3;
3959 if (IS_ALIGNED(width, 8)) {
3960 AR64ShuffleRow = ARGBShuffleRow_SSSE3;
3961 }
3962 }
3963 #endif
3964 #if defined(HAS_ARGBSHUFFLEROW_AVX2)
3965 if (TestCpuFlag(kCpuHasAVX2)) {
3966 AR64ShuffleRow = ARGBShuffleRow_Any_AVX2;
3967 if (IS_ALIGNED(width, 16)) {
3968 AR64ShuffleRow = ARGBShuffleRow_AVX2;
3969 }
3970 }
3971 #endif
3972 #if defined(HAS_ARGBSHUFFLEROW_NEON)
3973 if (TestCpuFlag(kCpuHasNEON)) {
3974 AR64ShuffleRow = ARGBShuffleRow_Any_NEON;
3975 if (IS_ALIGNED(width, 4)) {
3976 AR64ShuffleRow = ARGBShuffleRow_NEON;
3977 }
3978 }
3979 #endif
3980 #if defined(HAS_ARGBSHUFFLEROW_MMI)
3981 if (TestCpuFlag(kCpuHasMMI)) {
3982 AR64ShuffleRow = ARGBShuffleRow_Any_MMI;
3983 if (IS_ALIGNED(width, 2)) {
3984 AR64ShuffleRow = ARGBShuffleRow_MMI;
3985 }
3986 }
3987 #endif
3988
3989 for (y = 0; y < height; ++y) {
3990 AR64ShuffleRow((uint8_t*)(src_ar64), (uint8_t*)(dst_ar64), shuffler,
3991 width * 2);
3992 src_ar64 += src_stride_ar64;
3993 dst_ar64 += dst_stride_ar64;
3994 }
3995 return 0;
3996 }
3997
3998 // Gauss blur a float plane using Gaussian 5x5 filter with
3999 // coefficients of 1, 4, 6, 4, 1.
4000 // Each destination pixel is a blur of the 5x5
4001 // pixels from the source.
4002 // Source edges are clamped.
4003 // Edge is 2 pixels on each side, and interior is multiple of 4.
4004 LIBYUV_API
GaussPlane_F32(const float * src,int src_stride,float * dst,int dst_stride,int width,int height)4005 int GaussPlane_F32(const float* src,
4006 int src_stride,
4007 float* dst,
4008 int dst_stride,
4009 int width,
4010 int height) {
4011 int y;
4012 void (*GaussCol_F32)(const float* src0, const float* src1, const float* src2,
4013 const float* src3, const float* src4, float* dst,
4014 int width) = GaussCol_F32_C;
4015 void (*GaussRow_F32)(const float* src, float* dst, int width) =
4016 GaussRow_F32_C;
4017 if (!src || !dst || width <= 0 || height == 0) {
4018 return -1;
4019 }
4020 // Negative height means invert the image.
4021 if (height < 0) {
4022 height = -height;
4023 src = src + (height - 1) * src_stride;
4024 src_stride = -src_stride;
4025 }
4026
4027 #if defined(HAS_GAUSSCOL_F32_NEON)
4028 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
4029 GaussCol_F32 = GaussCol_F32_NEON;
4030 }
4031 #endif
4032 #if defined(HAS_GAUSSROW_F32_NEON)
4033 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
4034 GaussRow_F32 = GaussRow_F32_NEON;
4035 }
4036 #endif
4037 {
4038 // 2 pixels on each side, but aligned out to 16 bytes.
4039 align_buffer_64(rowbuf, (4 + width + 4) * 4);
4040 memset(rowbuf, 0, 16);
4041 memset(rowbuf + (4 + width) * 4, 0, 16);
4042 float* row = (float*)(rowbuf + 16);
4043 const float* src0 = src;
4044 const float* src1 = src;
4045 const float* src2 = src;
4046 const float* src3 = src2 + ((height > 1) ? src_stride : 0);
4047 const float* src4 = src3 + ((height > 2) ? src_stride : 0);
4048
4049 for (y = 0; y < height; ++y) {
4050 GaussCol_F32(src0, src1, src2, src3, src4, row, width);
4051
4052 // Extrude edge by 2 floats
4053 row[-2] = row[-1] = row[0];
4054 row[width + 1] = row[width] = row[width - 1];
4055
4056 GaussRow_F32(row - 2, dst, width);
4057
4058 src0 = src1;
4059 src1 = src2;
4060 src2 = src3;
4061 src3 = src4;
4062 if ((y + 2) < (height - 1)) {
4063 src4 += src_stride;
4064 }
4065 dst += dst_stride;
4066 }
4067 free_aligned_buffer_64(rowbuf);
4068 }
4069 return 0;
4070 }
4071
4072 // Sobel ARGB effect.
ARGBSobelize(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height,void (* SobelRow)(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst,int width))4073 static int ARGBSobelize(const uint8_t* src_argb,
4074 int src_stride_argb,
4075 uint8_t* dst_argb,
4076 int dst_stride_argb,
4077 int width,
4078 int height,
4079 void (*SobelRow)(const uint8_t* src_sobelx,
4080 const uint8_t* src_sobely,
4081 uint8_t* dst,
4082 int width)) {
4083 int y;
4084 void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_g, int width) =
4085 ARGBToYJRow_C;
4086 void (*SobelYRow)(const uint8_t* src_y0, const uint8_t* src_y1,
4087 uint8_t* dst_sobely, int width) = SobelYRow_C;
4088 void (*SobelXRow)(const uint8_t* src_y0, const uint8_t* src_y1,
4089 const uint8_t* src_y2, uint8_t* dst_sobely, int width) =
4090 SobelXRow_C;
4091 const int kEdge = 16; // Extra pixels at start of row for extrude/align.
4092 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
4093 return -1;
4094 }
4095 // Negative height means invert the image.
4096 if (height < 0) {
4097 height = -height;
4098 src_argb = src_argb + (height - 1) * src_stride_argb;
4099 src_stride_argb = -src_stride_argb;
4100 }
4101
4102 #if defined(HAS_ARGBTOYJROW_SSSE3)
4103 if (TestCpuFlag(kCpuHasSSSE3)) {
4104 ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
4105 if (IS_ALIGNED(width, 16)) {
4106 ARGBToYJRow = ARGBToYJRow_SSSE3;
4107 }
4108 }
4109 #endif
4110 #if defined(HAS_ARGBTOYJROW_AVX2)
4111 if (TestCpuFlag(kCpuHasAVX2)) {
4112 ARGBToYJRow = ARGBToYJRow_Any_AVX2;
4113 if (IS_ALIGNED(width, 32)) {
4114 ARGBToYJRow = ARGBToYJRow_AVX2;
4115 }
4116 }
4117 #endif
4118 #if defined(HAS_ARGBTOYJROW_NEON)
4119 if (TestCpuFlag(kCpuHasNEON)) {
4120 ARGBToYJRow = ARGBToYJRow_Any_NEON;
4121 if (IS_ALIGNED(width, 8)) {
4122 ARGBToYJRow = ARGBToYJRow_NEON;
4123 }
4124 }
4125 #endif
4126 #if defined(HAS_ARGBTOYJROW_MMI)
4127 if (TestCpuFlag(kCpuHasMMI)) {
4128 ARGBToYJRow = ARGBToYJRow_Any_MMI;
4129 if (IS_ALIGNED(width, 8)) {
4130 ARGBToYJRow = ARGBToYJRow_MMI;
4131 }
4132 }
4133 #endif
4134 #if defined(HAS_ARGBTOYJROW_MSA)
4135 if (TestCpuFlag(kCpuHasMSA)) {
4136 ARGBToYJRow = ARGBToYJRow_Any_MSA;
4137 if (IS_ALIGNED(width, 16)) {
4138 ARGBToYJRow = ARGBToYJRow_MSA;
4139 }
4140 }
4141 #endif
4142
4143 #if defined(HAS_SOBELYROW_SSE2)
4144 if (TestCpuFlag(kCpuHasSSE2)) {
4145 SobelYRow = SobelYRow_SSE2;
4146 }
4147 #endif
4148 #if defined(HAS_SOBELYROW_NEON)
4149 if (TestCpuFlag(kCpuHasNEON)) {
4150 SobelYRow = SobelYRow_NEON;
4151 }
4152 #endif
4153 #if defined(HAS_SOBELYROW_MMI)
4154 if (TestCpuFlag(kCpuHasMMI)) {
4155 SobelYRow = SobelYRow_MMI;
4156 }
4157 #endif
4158 #if defined(HAS_SOBELYROW_MSA)
4159 if (TestCpuFlag(kCpuHasMSA)) {
4160 SobelYRow = SobelYRow_MSA;
4161 }
4162 #endif
4163 #if defined(HAS_SOBELXROW_SSE2)
4164 if (TestCpuFlag(kCpuHasSSE2)) {
4165 SobelXRow = SobelXRow_SSE2;
4166 }
4167 #endif
4168 #if defined(HAS_SOBELXROW_NEON)
4169 if (TestCpuFlag(kCpuHasNEON)) {
4170 SobelXRow = SobelXRow_NEON;
4171 }
4172 #endif
4173 #if defined(HAS_SOBELXROW_MMI)
4174 if (TestCpuFlag(kCpuHasMMI)) {
4175 SobelXRow = SobelXRow_MMI;
4176 }
4177 #endif
4178 #if defined(HAS_SOBELXROW_MSA)
4179 if (TestCpuFlag(kCpuHasMSA)) {
4180 SobelXRow = SobelXRow_MSA;
4181 }
4182 #endif
4183 {
4184 // 3 rows with edges before/after.
4185 const int kRowSize = (width + kEdge + 31) & ~31;
4186 align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge));
4187 uint8_t* row_sobelx = rows;
4188 uint8_t* row_sobely = rows + kRowSize;
4189 uint8_t* row_y = rows + kRowSize * 2;
4190
4191 // Convert first row.
4192 uint8_t* row_y0 = row_y + kEdge;
4193 uint8_t* row_y1 = row_y0 + kRowSize;
4194 uint8_t* row_y2 = row_y1 + kRowSize;
4195 ARGBToYJRow(src_argb, row_y0, width);
4196 row_y0[-1] = row_y0[0];
4197 memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind.
4198 ARGBToYJRow(src_argb, row_y1, width);
4199 row_y1[-1] = row_y1[0];
4200 memset(row_y1 + width, row_y1[width - 1], 16);
4201 memset(row_y2 + width, 0, 16);
4202
4203 for (y = 0; y < height; ++y) {
4204 // Convert next row of ARGB to G.
4205 if (y < (height - 1)) {
4206 src_argb += src_stride_argb;
4207 }
4208 ARGBToYJRow(src_argb, row_y2, width);
4209 row_y2[-1] = row_y2[0];
4210 row_y2[width] = row_y2[width - 1];
4211
4212 SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width);
4213 SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width);
4214 SobelRow(row_sobelx, row_sobely, dst_argb, width);
4215
4216 // Cycle thru circular queue of 3 row_y buffers.
4217 {
4218 uint8_t* row_yt = row_y0;
4219 row_y0 = row_y1;
4220 row_y1 = row_y2;
4221 row_y2 = row_yt;
4222 }
4223
4224 dst_argb += dst_stride_argb;
4225 }
4226 free_aligned_buffer_64(rows);
4227 }
4228 return 0;
4229 }
4230
4231 // Sobel ARGB effect.
4232 LIBYUV_API
ARGBSobel(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)4233 int ARGBSobel(const uint8_t* src_argb,
4234 int src_stride_argb,
4235 uint8_t* dst_argb,
4236 int dst_stride_argb,
4237 int width,
4238 int height) {
4239 void (*SobelRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely,
4240 uint8_t* dst_argb, int width) = SobelRow_C;
4241 #if defined(HAS_SOBELROW_SSE2)
4242 if (TestCpuFlag(kCpuHasSSE2)) {
4243 SobelRow = SobelRow_Any_SSE2;
4244 if (IS_ALIGNED(width, 16)) {
4245 SobelRow = SobelRow_SSE2;
4246 }
4247 }
4248 #endif
4249 #if defined(HAS_SOBELROW_NEON)
4250 if (TestCpuFlag(kCpuHasNEON)) {
4251 SobelRow = SobelRow_Any_NEON;
4252 if (IS_ALIGNED(width, 8)) {
4253 SobelRow = SobelRow_NEON;
4254 }
4255 }
4256 #endif
4257 #if defined(HAS_SOBELROW_MMI)
4258 if (TestCpuFlag(kCpuHasMMI)) {
4259 SobelRow = SobelRow_Any_MMI;
4260 if (IS_ALIGNED(width, 8)) {
4261 SobelRow = SobelRow_MMI;
4262 }
4263 }
4264 #endif
4265 #if defined(HAS_SOBELROW_MSA)
4266 if (TestCpuFlag(kCpuHasMSA)) {
4267 SobelRow = SobelRow_Any_MSA;
4268 if (IS_ALIGNED(width, 16)) {
4269 SobelRow = SobelRow_MSA;
4270 }
4271 }
4272 #endif
4273 return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
4274 width, height, SobelRow);
4275 }
4276
4277 // Sobel ARGB effect with planar output.
4278 LIBYUV_API
ARGBSobelToPlane(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_y,int dst_stride_y,int width,int height)4279 int ARGBSobelToPlane(const uint8_t* src_argb,
4280 int src_stride_argb,
4281 uint8_t* dst_y,
4282 int dst_stride_y,
4283 int width,
4284 int height) {
4285 void (*SobelToPlaneRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely,
4286 uint8_t* dst_, int width) = SobelToPlaneRow_C;
4287 #if defined(HAS_SOBELTOPLANEROW_SSE2)
4288 if (TestCpuFlag(kCpuHasSSE2)) {
4289 SobelToPlaneRow = SobelToPlaneRow_Any_SSE2;
4290 if (IS_ALIGNED(width, 16)) {
4291 SobelToPlaneRow = SobelToPlaneRow_SSE2;
4292 }
4293 }
4294 #endif
4295 #if defined(HAS_SOBELTOPLANEROW_NEON)
4296 if (TestCpuFlag(kCpuHasNEON)) {
4297 SobelToPlaneRow = SobelToPlaneRow_Any_NEON;
4298 if (IS_ALIGNED(width, 16)) {
4299 SobelToPlaneRow = SobelToPlaneRow_NEON;
4300 }
4301 }
4302 #endif
4303 #if defined(HAS_SOBELTOPLANEROW_MMI)
4304 if (TestCpuFlag(kCpuHasMMI)) {
4305 SobelToPlaneRow = SobelToPlaneRow_Any_MMI;
4306 if (IS_ALIGNED(width, 8)) {
4307 SobelToPlaneRow = SobelToPlaneRow_MMI;
4308 }
4309 }
4310 #endif
4311 #if defined(HAS_SOBELTOPLANEROW_MSA)
4312 if (TestCpuFlag(kCpuHasMSA)) {
4313 SobelToPlaneRow = SobelToPlaneRow_Any_MSA;
4314 if (IS_ALIGNED(width, 32)) {
4315 SobelToPlaneRow = SobelToPlaneRow_MSA;
4316 }
4317 }
4318 #endif
4319 return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y, width,
4320 height, SobelToPlaneRow);
4321 }
4322
4323 // SobelXY ARGB effect.
4324 // Similar to Sobel, but also stores Sobel X in R and Sobel Y in B. G = Sobel.
4325 LIBYUV_API
ARGBSobelXY(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)4326 int ARGBSobelXY(const uint8_t* src_argb,
4327 int src_stride_argb,
4328 uint8_t* dst_argb,
4329 int dst_stride_argb,
4330 int width,
4331 int height) {
4332 void (*SobelXYRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely,
4333 uint8_t* dst_argb, int width) = SobelXYRow_C;
4334 #if defined(HAS_SOBELXYROW_SSE2)
4335 if (TestCpuFlag(kCpuHasSSE2)) {
4336 SobelXYRow = SobelXYRow_Any_SSE2;
4337 if (IS_ALIGNED(width, 16)) {
4338 SobelXYRow = SobelXYRow_SSE2;
4339 }
4340 }
4341 #endif
4342 #if defined(HAS_SOBELXYROW_NEON)
4343 if (TestCpuFlag(kCpuHasNEON)) {
4344 SobelXYRow = SobelXYRow_Any_NEON;
4345 if (IS_ALIGNED(width, 8)) {
4346 SobelXYRow = SobelXYRow_NEON;
4347 }
4348 }
4349 #endif
4350 #if defined(HAS_SOBELXYROW_MMI)
4351 if (TestCpuFlag(kCpuHasMMI)) {
4352 SobelXYRow = SobelXYRow_Any_MMI;
4353 if (IS_ALIGNED(width, 8)) {
4354 SobelXYRow = SobelXYRow_MMI;
4355 }
4356 }
4357 #endif
4358 #if defined(HAS_SOBELXYROW_MSA)
4359 if (TestCpuFlag(kCpuHasMSA)) {
4360 SobelXYRow = SobelXYRow_Any_MSA;
4361 if (IS_ALIGNED(width, 16)) {
4362 SobelXYRow = SobelXYRow_MSA;
4363 }
4364 }
4365 #endif
4366 return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
4367 width, height, SobelXYRow);
4368 }
4369
4370 // Apply a 4x4 polynomial to each ARGB pixel.
4371 LIBYUV_API
ARGBPolynomial(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,const float * poly,int width,int height)4372 int ARGBPolynomial(const uint8_t* src_argb,
4373 int src_stride_argb,
4374 uint8_t* dst_argb,
4375 int dst_stride_argb,
4376 const float* poly,
4377 int width,
4378 int height) {
4379 int y;
4380 void (*ARGBPolynomialRow)(const uint8_t* src_argb, uint8_t* dst_argb,
4381 const float* poly, int width) = ARGBPolynomialRow_C;
4382 if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) {
4383 return -1;
4384 }
4385 // Negative height means invert the image.
4386 if (height < 0) {
4387 height = -height;
4388 src_argb = src_argb + (height - 1) * src_stride_argb;
4389 src_stride_argb = -src_stride_argb;
4390 }
4391 // Coalesce rows.
4392 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
4393 width *= height;
4394 height = 1;
4395 src_stride_argb = dst_stride_argb = 0;
4396 }
4397 #if defined(HAS_ARGBPOLYNOMIALROW_SSE2)
4398 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 2)) {
4399 ARGBPolynomialRow = ARGBPolynomialRow_SSE2;
4400 }
4401 #endif
4402 #if defined(HAS_ARGBPOLYNOMIALROW_AVX2)
4403 if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasFMA3) &&
4404 IS_ALIGNED(width, 2)) {
4405 ARGBPolynomialRow = ARGBPolynomialRow_AVX2;
4406 }
4407 #endif
4408
4409 for (y = 0; y < height; ++y) {
4410 ARGBPolynomialRow(src_argb, dst_argb, poly, width);
4411 src_argb += src_stride_argb;
4412 dst_argb += dst_stride_argb;
4413 }
4414 return 0;
4415 }
4416
4417 // Convert plane of 16 bit shorts to half floats.
4418 // Source values are multiplied by scale before storing as half float.
4419 LIBYUV_API
HalfFloatPlane(const uint16_t * src_y,int src_stride_y,uint16_t * dst_y,int dst_stride_y,float scale,int width,int height)4420 int HalfFloatPlane(const uint16_t* src_y,
4421 int src_stride_y,
4422 uint16_t* dst_y,
4423 int dst_stride_y,
4424 float scale,
4425 int width,
4426 int height) {
4427 int y;
4428 void (*HalfFloatRow)(const uint16_t* src, uint16_t* dst, float scale,
4429 int width) = HalfFloatRow_C;
4430 if (!src_y || !dst_y || width <= 0 || height == 0) {
4431 return -1;
4432 }
4433 src_stride_y >>= 1;
4434 dst_stride_y >>= 1;
4435 // Negative height means invert the image.
4436 if (height < 0) {
4437 height = -height;
4438 src_y = src_y + (height - 1) * src_stride_y;
4439 src_stride_y = -src_stride_y;
4440 }
4441 // Coalesce rows.
4442 if (src_stride_y == width && dst_stride_y == width) {
4443 width *= height;
4444 height = 1;
4445 src_stride_y = dst_stride_y = 0;
4446 }
4447 #if defined(HAS_HALFFLOATROW_SSE2)
4448 if (TestCpuFlag(kCpuHasSSE2)) {
4449 HalfFloatRow = HalfFloatRow_Any_SSE2;
4450 if (IS_ALIGNED(width, 8)) {
4451 HalfFloatRow = HalfFloatRow_SSE2;
4452 }
4453 }
4454 #endif
4455 #if defined(HAS_HALFFLOATROW_AVX2)
4456 if (TestCpuFlag(kCpuHasAVX2)) {
4457 HalfFloatRow = HalfFloatRow_Any_AVX2;
4458 if (IS_ALIGNED(width, 16)) {
4459 HalfFloatRow = HalfFloatRow_AVX2;
4460 }
4461 }
4462 #endif
4463 #if defined(HAS_HALFFLOATROW_F16C)
4464 if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasF16C)) {
4465 HalfFloatRow =
4466 (scale == 1.0f) ? HalfFloat1Row_Any_F16C : HalfFloatRow_Any_F16C;
4467 if (IS_ALIGNED(width, 16)) {
4468 HalfFloatRow = (scale == 1.0f) ? HalfFloat1Row_F16C : HalfFloatRow_F16C;
4469 }
4470 }
4471 #endif
4472 #if defined(HAS_HALFFLOATROW_NEON)
4473 if (TestCpuFlag(kCpuHasNEON)) {
4474 HalfFloatRow =
4475 (scale == 1.0f) ? HalfFloat1Row_Any_NEON : HalfFloatRow_Any_NEON;
4476 if (IS_ALIGNED(width, 8)) {
4477 HalfFloatRow = (scale == 1.0f) ? HalfFloat1Row_NEON : HalfFloatRow_NEON;
4478 }
4479 }
4480 #endif
4481 #if defined(HAS_HALFFLOATROW_MSA)
4482 if (TestCpuFlag(kCpuHasMSA)) {
4483 HalfFloatRow = HalfFloatRow_Any_MSA;
4484 if (IS_ALIGNED(width, 32)) {
4485 HalfFloatRow = HalfFloatRow_MSA;
4486 }
4487 }
4488 #endif
4489
4490 for (y = 0; y < height; ++y) {
4491 HalfFloatRow(src_y, dst_y, scale, width);
4492 src_y += src_stride_y;
4493 dst_y += dst_stride_y;
4494 }
4495 return 0;
4496 }
4497
4498 // Convert a buffer of bytes to floats, scale the values and store as floats.
4499 LIBYUV_API
ByteToFloat(const uint8_t * src_y,float * dst_y,float scale,int width)4500 int ByteToFloat(const uint8_t* src_y, float* dst_y, float scale, int width) {
4501 void (*ByteToFloatRow)(const uint8_t* src, float* dst, float scale,
4502 int width) = ByteToFloatRow_C;
4503 if (!src_y || !dst_y || width <= 0) {
4504 return -1;
4505 }
4506 #if defined(HAS_BYTETOFLOATROW_NEON)
4507 if (TestCpuFlag(kCpuHasNEON)) {
4508 ByteToFloatRow = ByteToFloatRow_Any_NEON;
4509 if (IS_ALIGNED(width, 8)) {
4510 ByteToFloatRow = ByteToFloatRow_NEON;
4511 }
4512 }
4513 #endif
4514
4515 ByteToFloatRow(src_y, dst_y, scale, width);
4516 return 0;
4517 }
4518
4519 // Apply a lumacolortable to each ARGB pixel.
4520 LIBYUV_API
ARGBLumaColorTable(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,const uint8_t * luma,int width,int height)4521 int ARGBLumaColorTable(const uint8_t* src_argb,
4522 int src_stride_argb,
4523 uint8_t* dst_argb,
4524 int dst_stride_argb,
4525 const uint8_t* luma,
4526 int width,
4527 int height) {
4528 int y;
4529 void (*ARGBLumaColorTableRow)(
4530 const uint8_t* src_argb, uint8_t* dst_argb, int width,
4531 const uint8_t* luma, const uint32_t lumacoeff) = ARGBLumaColorTableRow_C;
4532 if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) {
4533 return -1;
4534 }
4535 // Negative height means invert the image.
4536 if (height < 0) {
4537 height = -height;
4538 src_argb = src_argb + (height - 1) * src_stride_argb;
4539 src_stride_argb = -src_stride_argb;
4540 }
4541 // Coalesce rows.
4542 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
4543 width *= height;
4544 height = 1;
4545 src_stride_argb = dst_stride_argb = 0;
4546 }
4547 #if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3)
4548 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
4549 ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3;
4550 }
4551 #endif
4552
4553 for (y = 0; y < height; ++y) {
4554 ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f);
4555 src_argb += src_stride_argb;
4556 dst_argb += dst_stride_argb;
4557 }
4558 return 0;
4559 }
4560
4561 // Copy Alpha from one ARGB image to another.
4562 LIBYUV_API
ARGBCopyAlpha(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)4563 int ARGBCopyAlpha(const uint8_t* src_argb,
4564 int src_stride_argb,
4565 uint8_t* dst_argb,
4566 int dst_stride_argb,
4567 int width,
4568 int height) {
4569 int y;
4570 void (*ARGBCopyAlphaRow)(const uint8_t* src_argb, uint8_t* dst_argb,
4571 int width) = ARGBCopyAlphaRow_C;
4572 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
4573 return -1;
4574 }
4575 // Negative height means invert the image.
4576 if (height < 0) {
4577 height = -height;
4578 src_argb = src_argb + (height - 1) * src_stride_argb;
4579 src_stride_argb = -src_stride_argb;
4580 }
4581 // Coalesce rows.
4582 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
4583 width *= height;
4584 height = 1;
4585 src_stride_argb = dst_stride_argb = 0;
4586 }
4587 #if defined(HAS_ARGBCOPYALPHAROW_SSE2)
4588 if (TestCpuFlag(kCpuHasSSE2)) {
4589 ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_SSE2;
4590 if (IS_ALIGNED(width, 8)) {
4591 ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2;
4592 }
4593 }
4594 #endif
4595 #if defined(HAS_ARGBCOPYALPHAROW_AVX2)
4596 if (TestCpuFlag(kCpuHasAVX2)) {
4597 ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_AVX2;
4598 if (IS_ALIGNED(width, 16)) {
4599 ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2;
4600 }
4601 }
4602 #endif
4603 #if defined(HAS_ARGBCOPYALPHAROW_MMI)
4604 if (TestCpuFlag(kCpuHasMMI)) {
4605 ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_MMI;
4606 if (IS_ALIGNED(width, 2)) {
4607 ARGBCopyAlphaRow = ARGBCopyAlphaRow_MMI;
4608 }
4609 }
4610 #endif
4611
4612 for (y = 0; y < height; ++y) {
4613 ARGBCopyAlphaRow(src_argb, dst_argb, width);
4614 src_argb += src_stride_argb;
4615 dst_argb += dst_stride_argb;
4616 }
4617 return 0;
4618 }
4619
4620 // Extract just the alpha channel from ARGB.
4621 LIBYUV_API
ARGBExtractAlpha(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_a,int dst_stride_a,int width,int height)4622 int ARGBExtractAlpha(const uint8_t* src_argb,
4623 int src_stride_argb,
4624 uint8_t* dst_a,
4625 int dst_stride_a,
4626 int width,
4627 int height) {
4628 if (!src_argb || !dst_a || width <= 0 || height == 0) {
4629 return -1;
4630 }
4631 // Negative height means invert the image.
4632 if (height < 0) {
4633 height = -height;
4634 src_argb += (height - 1) * src_stride_argb;
4635 src_stride_argb = -src_stride_argb;
4636 }
4637 // Coalesce rows.
4638 if (src_stride_argb == width * 4 && dst_stride_a == width) {
4639 width *= height;
4640 height = 1;
4641 src_stride_argb = dst_stride_a = 0;
4642 }
4643 void (*ARGBExtractAlphaRow)(const uint8_t* src_argb, uint8_t* dst_a,
4644 int width) = ARGBExtractAlphaRow_C;
4645 #if defined(HAS_ARGBEXTRACTALPHAROW_SSE2)
4646 if (TestCpuFlag(kCpuHasSSE2)) {
4647 ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_SSE2
4648 : ARGBExtractAlphaRow_Any_SSE2;
4649 }
4650 #endif
4651 #if defined(HAS_ARGBEXTRACTALPHAROW_AVX2)
4652 if (TestCpuFlag(kCpuHasAVX2)) {
4653 ARGBExtractAlphaRow = IS_ALIGNED(width, 32) ? ARGBExtractAlphaRow_AVX2
4654 : ARGBExtractAlphaRow_Any_AVX2;
4655 }
4656 #endif
4657 #if defined(HAS_ARGBEXTRACTALPHAROW_NEON)
4658 if (TestCpuFlag(kCpuHasNEON)) {
4659 ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_NEON
4660 : ARGBExtractAlphaRow_Any_NEON;
4661 }
4662 #endif
4663 #if defined(HAS_ARGBEXTRACTALPHAROW_MMI)
4664 if (TestCpuFlag(kCpuHasMMI)) {
4665 ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_MMI
4666 : ARGBExtractAlphaRow_Any_MMI;
4667 }
4668 #endif
4669 #if defined(HAS_ARGBEXTRACTALPHAROW_MSA)
4670 if (TestCpuFlag(kCpuHasMSA)) {
4671 ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_MSA
4672 : ARGBExtractAlphaRow_Any_MSA;
4673 }
4674 #endif
4675
4676 for (int y = 0; y < height; ++y) {
4677 ARGBExtractAlphaRow(src_argb, dst_a, width);
4678 src_argb += src_stride_argb;
4679 dst_a += dst_stride_a;
4680 }
4681 return 0;
4682 }
4683
4684 // Copy a planar Y channel to the alpha channel of a destination ARGB image.
4685 LIBYUV_API
ARGBCopyYToAlpha(const uint8_t * src_y,int src_stride_y,uint8_t * dst_argb,int dst_stride_argb,int width,int height)4686 int ARGBCopyYToAlpha(const uint8_t* src_y,
4687 int src_stride_y,
4688 uint8_t* dst_argb,
4689 int dst_stride_argb,
4690 int width,
4691 int height) {
4692 int y;
4693 void (*ARGBCopyYToAlphaRow)(const uint8_t* src_y, uint8_t* dst_argb,
4694 int width) = ARGBCopyYToAlphaRow_C;
4695 if (!src_y || !dst_argb || width <= 0 || height == 0) {
4696 return -1;
4697 }
4698 // Negative height means invert the image.
4699 if (height < 0) {
4700 height = -height;
4701 src_y = src_y + (height - 1) * src_stride_y;
4702 src_stride_y = -src_stride_y;
4703 }
4704 // Coalesce rows.
4705 if (src_stride_y == width && dst_stride_argb == width * 4) {
4706 width *= height;
4707 height = 1;
4708 src_stride_y = dst_stride_argb = 0;
4709 }
4710 #if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2)
4711 if (TestCpuFlag(kCpuHasSSE2)) {
4712 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_SSE2;
4713 if (IS_ALIGNED(width, 8)) {
4714 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2;
4715 }
4716 }
4717 #endif
4718 #if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2)
4719 if (TestCpuFlag(kCpuHasAVX2)) {
4720 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_AVX2;
4721 if (IS_ALIGNED(width, 16)) {
4722 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2;
4723 }
4724 }
4725 #endif
4726 #if defined(HAS_ARGBCOPYYTOALPHAROW_MMI)
4727 if (TestCpuFlag(kCpuHasMMI)) {
4728 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_MMI;
4729 if (IS_ALIGNED(width, 8)) {
4730 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_MMI;
4731 }
4732 }
4733 #endif
4734
4735 for (y = 0; y < height; ++y) {
4736 ARGBCopyYToAlphaRow(src_y, dst_argb, width);
4737 src_y += src_stride_y;
4738 dst_argb += dst_stride_argb;
4739 }
4740 return 0;
4741 }
4742
4743 // TODO(fbarchard): Consider if width is even Y channel can be split
4744 // directly. A SplitUVRow_Odd function could copy the remaining chroma.
4745
4746 LIBYUV_API
YUY2ToNV12(const uint8_t * src_yuy2,int src_stride_yuy2,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_uv,int dst_stride_uv,int width,int height)4747 int YUY2ToNV12(const uint8_t* src_yuy2,
4748 int src_stride_yuy2,
4749 uint8_t* dst_y,
4750 int dst_stride_y,
4751 uint8_t* dst_uv,
4752 int dst_stride_uv,
4753 int width,
4754 int height) {
4755 int y;
4756 int halfwidth = (width + 1) >> 1;
4757 void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
4758 int width) = SplitUVRow_C;
4759 void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
4760 ptrdiff_t src_stride, int dst_width,
4761 int source_y_fraction) = InterpolateRow_C;
4762 if (!src_yuy2 || !dst_y || !dst_uv || width <= 0 || height == 0) {
4763 return -1;
4764 }
4765 // Negative height means invert the image.
4766 if (height < 0) {
4767 height = -height;
4768 src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
4769 src_stride_yuy2 = -src_stride_yuy2;
4770 }
4771 #if defined(HAS_SPLITUVROW_SSE2)
4772 if (TestCpuFlag(kCpuHasSSE2)) {
4773 SplitUVRow = SplitUVRow_Any_SSE2;
4774 if (IS_ALIGNED(width, 16)) {
4775 SplitUVRow = SplitUVRow_SSE2;
4776 }
4777 }
4778 #endif
4779 #if defined(HAS_SPLITUVROW_AVX2)
4780 if (TestCpuFlag(kCpuHasAVX2)) {
4781 SplitUVRow = SplitUVRow_Any_AVX2;
4782 if (IS_ALIGNED(width, 32)) {
4783 SplitUVRow = SplitUVRow_AVX2;
4784 }
4785 }
4786 #endif
4787 #if defined(HAS_SPLITUVROW_NEON)
4788 if (TestCpuFlag(kCpuHasNEON)) {
4789 SplitUVRow = SplitUVRow_Any_NEON;
4790 if (IS_ALIGNED(width, 16)) {
4791 SplitUVRow = SplitUVRow_NEON;
4792 }
4793 }
4794 #endif
4795 #if defined(HAS_SPLITUVROW_MMI)
4796 if (TestCpuFlag(kCpuHasMMI)) {
4797 SplitUVRow = SplitUVRow_Any_MMI;
4798 if (IS_ALIGNED(width, 8)) {
4799 SplitUVRow = SplitUVRow_MMI;
4800 }
4801 }
4802 #endif
4803 #if defined(HAS_SPLITUVROW_MSA)
4804 if (TestCpuFlag(kCpuHasMSA)) {
4805 SplitUVRow = SplitUVRow_Any_MSA;
4806 if (IS_ALIGNED(width, 32)) {
4807 SplitUVRow = SplitUVRow_MSA;
4808 }
4809 }
4810 #endif
4811 #if defined(HAS_INTERPOLATEROW_SSSE3)
4812 if (TestCpuFlag(kCpuHasSSSE3)) {
4813 InterpolateRow = InterpolateRow_Any_SSSE3;
4814 if (IS_ALIGNED(width, 16)) {
4815 InterpolateRow = InterpolateRow_SSSE3;
4816 }
4817 }
4818 #endif
4819 #if defined(HAS_INTERPOLATEROW_AVX2)
4820 if (TestCpuFlag(kCpuHasAVX2)) {
4821 InterpolateRow = InterpolateRow_Any_AVX2;
4822 if (IS_ALIGNED(width, 32)) {
4823 InterpolateRow = InterpolateRow_AVX2;
4824 }
4825 }
4826 #endif
4827 #if defined(HAS_INTERPOLATEROW_NEON)
4828 if (TestCpuFlag(kCpuHasNEON)) {
4829 InterpolateRow = InterpolateRow_Any_NEON;
4830 if (IS_ALIGNED(width, 16)) {
4831 InterpolateRow = InterpolateRow_NEON;
4832 }
4833 }
4834 #endif
4835 #if defined(HAS_INTERPOLATEROW_MMI)
4836 if (TestCpuFlag(kCpuHasMMI)) {
4837 InterpolateRow = InterpolateRow_Any_MMI;
4838 if (IS_ALIGNED(width, 8)) {
4839 InterpolateRow = InterpolateRow_MMI;
4840 }
4841 }
4842 #endif
4843 #if defined(HAS_INTERPOLATEROW_MSA)
4844 if (TestCpuFlag(kCpuHasMSA)) {
4845 InterpolateRow = InterpolateRow_Any_MSA;
4846 if (IS_ALIGNED(width, 32)) {
4847 InterpolateRow = InterpolateRow_MSA;
4848 }
4849 }
4850 #endif
4851
4852 {
4853 int awidth = halfwidth * 2;
4854 // row of y and 2 rows of uv
4855 align_buffer_64(rows, awidth * 3);
4856
4857 for (y = 0; y < height - 1; y += 2) {
4858 // Split Y from UV.
4859 SplitUVRow(src_yuy2, rows, rows + awidth, awidth);
4860 memcpy(dst_y, rows, width);
4861 SplitUVRow(src_yuy2 + src_stride_yuy2, rows, rows + awidth * 2, awidth);
4862 memcpy(dst_y + dst_stride_y, rows, width);
4863 InterpolateRow(dst_uv, rows + awidth, awidth, awidth, 128);
4864 src_yuy2 += src_stride_yuy2 * 2;
4865 dst_y += dst_stride_y * 2;
4866 dst_uv += dst_stride_uv;
4867 }
4868 if (height & 1) {
4869 // Split Y from UV.
4870 SplitUVRow(src_yuy2, rows, dst_uv, awidth);
4871 memcpy(dst_y, rows, width);
4872 }
4873 free_aligned_buffer_64(rows);
4874 }
4875 return 0;
4876 }
4877
4878 LIBYUV_API
UYVYToNV12(const uint8_t * src_uyvy,int src_stride_uyvy,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_uv,int dst_stride_uv,int width,int height)4879 int UYVYToNV12(const uint8_t* src_uyvy,
4880 int src_stride_uyvy,
4881 uint8_t* dst_y,
4882 int dst_stride_y,
4883 uint8_t* dst_uv,
4884 int dst_stride_uv,
4885 int width,
4886 int height) {
4887 int y;
4888 int halfwidth = (width + 1) >> 1;
4889 void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
4890 int width) = SplitUVRow_C;
4891 void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
4892 ptrdiff_t src_stride, int dst_width,
4893 int source_y_fraction) = InterpolateRow_C;
4894 if (!src_uyvy || !dst_y || !dst_uv || width <= 0 || height == 0) {
4895 return -1;
4896 }
4897 // Negative height means invert the image.
4898 if (height < 0) {
4899 height = -height;
4900 src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
4901 src_stride_uyvy = -src_stride_uyvy;
4902 }
4903 #if defined(HAS_SPLITUVROW_SSE2)
4904 if (TestCpuFlag(kCpuHasSSE2)) {
4905 SplitUVRow = SplitUVRow_Any_SSE2;
4906 if (IS_ALIGNED(width, 16)) {
4907 SplitUVRow = SplitUVRow_SSE2;
4908 }
4909 }
4910 #endif
4911 #if defined(HAS_SPLITUVROW_AVX2)
4912 if (TestCpuFlag(kCpuHasAVX2)) {
4913 SplitUVRow = SplitUVRow_Any_AVX2;
4914 if (IS_ALIGNED(width, 32)) {
4915 SplitUVRow = SplitUVRow_AVX2;
4916 }
4917 }
4918 #endif
4919 #if defined(HAS_SPLITUVROW_NEON)
4920 if (TestCpuFlag(kCpuHasNEON)) {
4921 SplitUVRow = SplitUVRow_Any_NEON;
4922 if (IS_ALIGNED(width, 16)) {
4923 SplitUVRow = SplitUVRow_NEON;
4924 }
4925 }
4926 #endif
4927 #if defined(HAS_SPLITUVROW_MMI)
4928 if (TestCpuFlag(kCpuHasMMI)) {
4929 SplitUVRow = SplitUVRow_Any_MMI;
4930 if (IS_ALIGNED(width, 8)) {
4931 SplitUVRow = SplitUVRow_MMI;
4932 }
4933 }
4934 #endif
4935 #if defined(HAS_SPLITUVROW_MSA)
4936 if (TestCpuFlag(kCpuHasMSA)) {
4937 SplitUVRow = SplitUVRow_Any_MSA;
4938 if (IS_ALIGNED(width, 32)) {
4939 SplitUVRow = SplitUVRow_MSA;
4940 }
4941 }
4942 #endif
4943 #if defined(HAS_INTERPOLATEROW_SSSE3)
4944 if (TestCpuFlag(kCpuHasSSSE3)) {
4945 InterpolateRow = InterpolateRow_Any_SSSE3;
4946 if (IS_ALIGNED(width, 16)) {
4947 InterpolateRow = InterpolateRow_SSSE3;
4948 }
4949 }
4950 #endif
4951 #if defined(HAS_INTERPOLATEROW_AVX2)
4952 if (TestCpuFlag(kCpuHasAVX2)) {
4953 InterpolateRow = InterpolateRow_Any_AVX2;
4954 if (IS_ALIGNED(width, 32)) {
4955 InterpolateRow = InterpolateRow_AVX2;
4956 }
4957 }
4958 #endif
4959 #if defined(HAS_INTERPOLATEROW_NEON)
4960 if (TestCpuFlag(kCpuHasNEON)) {
4961 InterpolateRow = InterpolateRow_Any_NEON;
4962 if (IS_ALIGNED(width, 16)) {
4963 InterpolateRow = InterpolateRow_NEON;
4964 }
4965 }
4966 #endif
4967 #if defined(HAS_INTERPOLATEROW_MMI)
4968 if (TestCpuFlag(kCpuHasMMI)) {
4969 InterpolateRow = InterpolateRow_Any_MMI;
4970 if (IS_ALIGNED(width, 8)) {
4971 InterpolateRow = InterpolateRow_MMI;
4972 }
4973 }
4974 #endif
4975 #if defined(HAS_INTERPOLATEROW_MSA)
4976 if (TestCpuFlag(kCpuHasMSA)) {
4977 InterpolateRow = InterpolateRow_Any_MSA;
4978 if (IS_ALIGNED(width, 32)) {
4979 InterpolateRow = InterpolateRow_MSA;
4980 }
4981 }
4982 #endif
4983
4984 {
4985 int awidth = halfwidth * 2;
4986 // row of y and 2 rows of uv
4987 align_buffer_64(rows, awidth * 3);
4988
4989 for (y = 0; y < height - 1; y += 2) {
4990 // Split Y from UV.
4991 SplitUVRow(src_uyvy, rows + awidth, rows, awidth);
4992 memcpy(dst_y, rows, width);
4993 SplitUVRow(src_uyvy + src_stride_uyvy, rows + awidth * 2, rows, awidth);
4994 memcpy(dst_y + dst_stride_y, rows, width);
4995 InterpolateRow(dst_uv, rows + awidth, awidth, awidth, 128);
4996 src_uyvy += src_stride_uyvy * 2;
4997 dst_y += dst_stride_y * 2;
4998 dst_uv += dst_stride_uv;
4999 }
5000 if (height & 1) {
5001 // Split Y from UV.
5002 SplitUVRow(src_uyvy, dst_uv, rows, awidth);
5003 memcpy(dst_y, rows, width);
5004 }
5005 free_aligned_buffer_64(rows);
5006 }
5007 return 0;
5008 }
5009
5010 // width and height are src size allowing odd size handling.
5011 LIBYUV_API
HalfMergeUVPlane(const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_uv,int dst_stride_uv,int width,int height)5012 void HalfMergeUVPlane(const uint8_t* src_u,
5013 int src_stride_u,
5014 const uint8_t* src_v,
5015 int src_stride_v,
5016 uint8_t* dst_uv,
5017 int dst_stride_uv,
5018 int width,
5019 int height) {
5020 int y;
5021 void (*HalfMergeUVRow)(const uint8_t* src_u, int src_stride_u,
5022 const uint8_t* src_v, int src_stride_v,
5023 uint8_t* dst_uv, int width) = HalfMergeUVRow_C;
5024
5025 // Negative height means invert the image.
5026 if (height < 0) {
5027 height = -height;
5028 src_u = src_u + (height - 1) * src_stride_u;
5029 src_v = src_v + (height - 1) * src_stride_v;
5030 src_stride_u = -src_stride_u;
5031 src_stride_v = -src_stride_v;
5032 }
5033 #if defined(HAS_HALFMERGEUVROW_NEON)
5034 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
5035 HalfMergeUVRow = HalfMergeUVRow_NEON;
5036 }
5037 #endif
5038 #if defined(HAS_HALFMERGEUVROW_SSSE3)
5039 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {
5040 HalfMergeUVRow = HalfMergeUVRow_SSSE3;
5041 }
5042 #endif
5043 #if defined(HAS_HALFMERGEUVROW_AVX2)
5044 if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) {
5045 HalfMergeUVRow = HalfMergeUVRow_AVX2;
5046 }
5047 #endif
5048 for (y = 0; y < height - 1; y += 2) {
5049 // Merge a row of U and V into a row of UV.
5050 HalfMergeUVRow(src_u, src_stride_u, src_v, src_stride_v, dst_uv, width);
5051 src_u += src_stride_u * 2;
5052 src_v += src_stride_v * 2;
5053 dst_uv += dst_stride_uv;
5054 }
5055 if (height & 1) {
5056 HalfMergeUVRow(src_u, 0, src_v, 0, dst_uv, width);
5057 }
5058 }
5059
5060 #ifdef __cplusplus
5061 } // extern "C"
5062 } // namespace libyuv
5063 #endif
5064