1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/planar_functions.h"
12
13 #include <string.h> // for memset()
14
15 #include "libyuv/cpu_id.h"
16 #ifdef HAVE_JPEG
17 #include "libyuv/mjpeg_decoder.h"
18 #endif
19 #include "libyuv/row.h"
20 #include "libyuv/scale_row.h" // for ScaleRowDown2
21
22 #ifdef __cplusplus
23 namespace libyuv {
24 extern "C" {
25 #endif
26
27 // Copy a plane of data
28 LIBYUV_API
CopyPlane(const uint8_t * src_y,int src_stride_y,uint8_t * dst_y,int dst_stride_y,int width,int height)29 void CopyPlane(const uint8_t* src_y,
30 int src_stride_y,
31 uint8_t* dst_y,
32 int dst_stride_y,
33 int width,
34 int height) {
35 int y;
36 void (*CopyRow)(const uint8_t* src, uint8_t* dst, int width) = CopyRow_C;
37 // Negative height means invert the image.
38 if (height < 0) {
39 height = -height;
40 dst_y = dst_y + (height - 1) * dst_stride_y;
41 dst_stride_y = -dst_stride_y;
42 }
43 // Coalesce rows.
44 if (src_stride_y == width && dst_stride_y == width) {
45 width *= height;
46 height = 1;
47 src_stride_y = dst_stride_y = 0;
48 }
49 // Nothing to do.
50 if (src_y == dst_y && src_stride_y == dst_stride_y) {
51 return;
52 }
53
54 #if defined(HAS_COPYROW_SSE2)
55 if (TestCpuFlag(kCpuHasSSE2)) {
56 CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
57 }
58 #endif
59 #if defined(HAS_COPYROW_AVX)
60 if (TestCpuFlag(kCpuHasAVX)) {
61 CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
62 }
63 #endif
64 #if defined(HAS_COPYROW_ERMS)
65 if (TestCpuFlag(kCpuHasERMS)) {
66 CopyRow = CopyRow_ERMS;
67 }
68 #endif
69 #if defined(HAS_COPYROW_NEON)
70 if (TestCpuFlag(kCpuHasNEON)) {
71 CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
72 }
73 #endif
74
75 // Copy plane
76 for (y = 0; y < height; ++y) {
77 CopyRow(src_y, dst_y, width);
78 src_y += src_stride_y;
79 dst_y += dst_stride_y;
80 }
81 }
82
83 // TODO(fbarchard): Consider support for negative height.
84 // TODO(fbarchard): Consider stride measured in bytes.
85 LIBYUV_API
CopyPlane_16(const uint16_t * src_y,int src_stride_y,uint16_t * dst_y,int dst_stride_y,int width,int height)86 void CopyPlane_16(const uint16_t* src_y,
87 int src_stride_y,
88 uint16_t* dst_y,
89 int dst_stride_y,
90 int width,
91 int height) {
92 int y;
93 void (*CopyRow)(const uint16_t* src, uint16_t* dst, int width) = CopyRow_16_C;
94 // Coalesce rows.
95 if (src_stride_y == width && dst_stride_y == width) {
96 width *= height;
97 height = 1;
98 src_stride_y = dst_stride_y = 0;
99 }
100 #if defined(HAS_COPYROW_16_SSE2)
101 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32)) {
102 CopyRow = CopyRow_16_SSE2;
103 }
104 #endif
105 #if defined(HAS_COPYROW_16_ERMS)
106 if (TestCpuFlag(kCpuHasERMS)) {
107 CopyRow = CopyRow_16_ERMS;
108 }
109 #endif
110 #if defined(HAS_COPYROW_16_NEON)
111 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
112 CopyRow = CopyRow_16_NEON;
113 }
114 #endif
115
116 // Copy plane
117 for (y = 0; y < height; ++y) {
118 CopyRow(src_y, dst_y, width);
119 src_y += src_stride_y;
120 dst_y += dst_stride_y;
121 }
122 }
123
124 // Convert a plane of 16 bit data to 8 bit
125 LIBYUV_API
Convert16To8Plane(const uint16_t * src_y,int src_stride_y,uint8_t * dst_y,int dst_stride_y,int scale,int width,int height)126 void Convert16To8Plane(const uint16_t* src_y,
127 int src_stride_y,
128 uint8_t* dst_y,
129 int dst_stride_y,
130 int scale, // 16384 for 10 bits
131 int width,
132 int height) {
133 int y;
134 void (*Convert16To8Row)(const uint16_t* src_y, uint8_t* dst_y, int scale,
135 int width) = Convert16To8Row_C;
136
137 // Negative height means invert the image.
138 if (height < 0) {
139 height = -height;
140 dst_y = dst_y + (height - 1) * dst_stride_y;
141 dst_stride_y = -dst_stride_y;
142 }
143 // Coalesce rows.
144 if (src_stride_y == width && dst_stride_y == width) {
145 width *= height;
146 height = 1;
147 src_stride_y = dst_stride_y = 0;
148 }
149 #if defined(HAS_CONVERT16TO8ROW_SSSE3)
150 if (TestCpuFlag(kCpuHasSSSE3)) {
151 Convert16To8Row = Convert16To8Row_Any_SSSE3;
152 if (IS_ALIGNED(width, 16)) {
153 Convert16To8Row = Convert16To8Row_SSSE3;
154 }
155 }
156 #endif
157 #if defined(HAS_CONVERT16TO8ROW_AVX2)
158 if (TestCpuFlag(kCpuHasAVX2)) {
159 Convert16To8Row = Convert16To8Row_Any_AVX2;
160 if (IS_ALIGNED(width, 32)) {
161 Convert16To8Row = Convert16To8Row_AVX2;
162 }
163 }
164 #endif
165
166 // Convert plane
167 for (y = 0; y < height; ++y) {
168 Convert16To8Row(src_y, dst_y, scale, width);
169 src_y += src_stride_y;
170 dst_y += dst_stride_y;
171 }
172 }
173
174 // Convert a plane of 8 bit data to 16 bit
175 LIBYUV_API
Convert8To16Plane(const uint8_t * src_y,int src_stride_y,uint16_t * dst_y,int dst_stride_y,int scale,int width,int height)176 void Convert8To16Plane(const uint8_t* src_y,
177 int src_stride_y,
178 uint16_t* dst_y,
179 int dst_stride_y,
180 int scale, // 16384 for 10 bits
181 int width,
182 int height) {
183 int y;
184 void (*Convert8To16Row)(const uint8_t* src_y, uint16_t* dst_y, int scale,
185 int width) = Convert8To16Row_C;
186
187 // Negative height means invert the image.
188 if (height < 0) {
189 height = -height;
190 dst_y = dst_y + (height - 1) * dst_stride_y;
191 dst_stride_y = -dst_stride_y;
192 }
193 // Coalesce rows.
194 if (src_stride_y == width && dst_stride_y == width) {
195 width *= height;
196 height = 1;
197 src_stride_y = dst_stride_y = 0;
198 }
199 #if defined(HAS_CONVERT8TO16ROW_SSE2)
200 if (TestCpuFlag(kCpuHasSSE2)) {
201 Convert8To16Row = Convert8To16Row_Any_SSE2;
202 if (IS_ALIGNED(width, 16)) {
203 Convert8To16Row = Convert8To16Row_SSE2;
204 }
205 }
206 #endif
207 #if defined(HAS_CONVERT8TO16ROW_AVX2)
208 if (TestCpuFlag(kCpuHasAVX2)) {
209 Convert8To16Row = Convert8To16Row_Any_AVX2;
210 if (IS_ALIGNED(width, 32)) {
211 Convert8To16Row = Convert8To16Row_AVX2;
212 }
213 }
214 #endif
215
216 // Convert plane
217 for (y = 0; y < height; ++y) {
218 Convert8To16Row(src_y, dst_y, scale, width);
219 src_y += src_stride_y;
220 dst_y += dst_stride_y;
221 }
222 }
223
224 // Copy I422.
225 LIBYUV_API
I422Copy(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)226 int I422Copy(const uint8_t* src_y,
227 int src_stride_y,
228 const uint8_t* src_u,
229 int src_stride_u,
230 const uint8_t* src_v,
231 int src_stride_v,
232 uint8_t* dst_y,
233 int dst_stride_y,
234 uint8_t* dst_u,
235 int dst_stride_u,
236 uint8_t* dst_v,
237 int dst_stride_v,
238 int width,
239 int height) {
240 int halfwidth = (width + 1) >> 1;
241 if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
242 return -1;
243 }
244 // Negative height means invert the image.
245 if (height < 0) {
246 height = -height;
247 src_y = src_y + (height - 1) * src_stride_y;
248 src_u = src_u + (height - 1) * src_stride_u;
249 src_v = src_v + (height - 1) * src_stride_v;
250 src_stride_y = -src_stride_y;
251 src_stride_u = -src_stride_u;
252 src_stride_v = -src_stride_v;
253 }
254
255 if (dst_y) {
256 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
257 }
258 CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
259 CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
260 return 0;
261 }
262
263 // Copy I444.
264 LIBYUV_API
I444Copy(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)265 int I444Copy(const uint8_t* src_y,
266 int src_stride_y,
267 const uint8_t* src_u,
268 int src_stride_u,
269 const uint8_t* src_v,
270 int src_stride_v,
271 uint8_t* dst_y,
272 int dst_stride_y,
273 uint8_t* dst_u,
274 int dst_stride_u,
275 uint8_t* dst_v,
276 int dst_stride_v,
277 int width,
278 int height) {
279 if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
280 return -1;
281 }
282 // Negative height means invert the image.
283 if (height < 0) {
284 height = -height;
285 src_y = src_y + (height - 1) * src_stride_y;
286 src_u = src_u + (height - 1) * src_stride_u;
287 src_v = src_v + (height - 1) * src_stride_v;
288 src_stride_y = -src_stride_y;
289 src_stride_u = -src_stride_u;
290 src_stride_v = -src_stride_v;
291 }
292
293 if (dst_y) {
294 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
295 }
296 CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
297 CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
298 return 0;
299 }
300
301 // Copy I400.
302 LIBYUV_API
I400ToI400(const uint8_t * src_y,int src_stride_y,uint8_t * dst_y,int dst_stride_y,int width,int height)303 int I400ToI400(const uint8_t* src_y,
304 int src_stride_y,
305 uint8_t* dst_y,
306 int dst_stride_y,
307 int width,
308 int height) {
309 if (!src_y || !dst_y || width <= 0 || height == 0) {
310 return -1;
311 }
312 // Negative height means invert the image.
313 if (height < 0) {
314 height = -height;
315 src_y = src_y + (height - 1) * src_stride_y;
316 src_stride_y = -src_stride_y;
317 }
318 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
319 return 0;
320 }
321
322 // Convert I420 to I400.
323 LIBYUV_API
I420ToI400(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_y,int dst_stride_y,int width,int height)324 int I420ToI400(const uint8_t* src_y,
325 int src_stride_y,
326 const uint8_t* src_u,
327 int src_stride_u,
328 const uint8_t* src_v,
329 int src_stride_v,
330 uint8_t* dst_y,
331 int dst_stride_y,
332 int width,
333 int height) {
334 (void)src_u;
335 (void)src_stride_u;
336 (void)src_v;
337 (void)src_stride_v;
338 if (!src_y || !dst_y || width <= 0 || height == 0) {
339 return -1;
340 }
341 // Negative height means invert the image.
342 if (height < 0) {
343 height = -height;
344 src_y = src_y + (height - 1) * src_stride_y;
345 src_stride_y = -src_stride_y;
346 }
347
348 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
349 return 0;
350 }
351
352 // Support function for NV12 etc UV channels.
353 // Width and height are plane sizes (typically half pixel width).
354 LIBYUV_API
SplitUVPlane(const uint8_t * src_uv,int src_stride_uv,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)355 void SplitUVPlane(const uint8_t* src_uv,
356 int src_stride_uv,
357 uint8_t* dst_u,
358 int dst_stride_u,
359 uint8_t* dst_v,
360 int dst_stride_v,
361 int width,
362 int height) {
363 int y;
364 void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
365 int width) = SplitUVRow_C;
366 // Negative height means invert the image.
367 if (height < 0) {
368 height = -height;
369 dst_u = dst_u + (height - 1) * dst_stride_u;
370 dst_v = dst_v + (height - 1) * dst_stride_v;
371 dst_stride_u = -dst_stride_u;
372 dst_stride_v = -dst_stride_v;
373 }
374 // Coalesce rows.
375 if (src_stride_uv == width * 2 && dst_stride_u == width &&
376 dst_stride_v == width) {
377 width *= height;
378 height = 1;
379 src_stride_uv = dst_stride_u = dst_stride_v = 0;
380 }
381 #if defined(HAS_SPLITUVROW_SSE2)
382 if (TestCpuFlag(kCpuHasSSE2)) {
383 SplitUVRow = SplitUVRow_Any_SSE2;
384 if (IS_ALIGNED(width, 16)) {
385 SplitUVRow = SplitUVRow_SSE2;
386 }
387 }
388 #endif
389 #if defined(HAS_SPLITUVROW_AVX2)
390 if (TestCpuFlag(kCpuHasAVX2)) {
391 SplitUVRow = SplitUVRow_Any_AVX2;
392 if (IS_ALIGNED(width, 32)) {
393 SplitUVRow = SplitUVRow_AVX2;
394 }
395 }
396 #endif
397 #if defined(HAS_SPLITUVROW_NEON)
398 if (TestCpuFlag(kCpuHasNEON)) {
399 SplitUVRow = SplitUVRow_Any_NEON;
400 if (IS_ALIGNED(width, 16)) {
401 SplitUVRow = SplitUVRow_NEON;
402 }
403 }
404 #endif
405 #if defined(HAS_SPLITUVROW_MMI)
406 if (TestCpuFlag(kCpuHasMMI)) {
407 SplitUVRow = SplitUVRow_Any_MMI;
408 if (IS_ALIGNED(width, 8)) {
409 SplitUVRow = SplitUVRow_MMI;
410 }
411 }
412 #endif
413 #if defined(HAS_SPLITUVROW_MSA)
414 if (TestCpuFlag(kCpuHasMSA)) {
415 SplitUVRow = SplitUVRow_Any_MSA;
416 if (IS_ALIGNED(width, 32)) {
417 SplitUVRow = SplitUVRow_MSA;
418 }
419 }
420 #endif
421
422 for (y = 0; y < height; ++y) {
423 // Copy a row of UV.
424 SplitUVRow(src_uv, dst_u, dst_v, width);
425 dst_u += dst_stride_u;
426 dst_v += dst_stride_v;
427 src_uv += src_stride_uv;
428 }
429 }
430
431 LIBYUV_API
MergeUVPlane(const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_uv,int dst_stride_uv,int width,int height)432 void MergeUVPlane(const uint8_t* src_u,
433 int src_stride_u,
434 const uint8_t* src_v,
435 int src_stride_v,
436 uint8_t* dst_uv,
437 int dst_stride_uv,
438 int width,
439 int height) {
440 int y;
441 void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v,
442 uint8_t* dst_uv, int width) = MergeUVRow_C;
443 // Negative height means invert the image.
444 if (height < 0) {
445 height = -height;
446 dst_uv = dst_uv + (height - 1) * dst_stride_uv;
447 dst_stride_uv = -dst_stride_uv;
448 }
449 // Coalesce rows.
450 if (src_stride_u == width && src_stride_v == width &&
451 dst_stride_uv == width * 2) {
452 width *= height;
453 height = 1;
454 src_stride_u = src_stride_v = dst_stride_uv = 0;
455 }
456 #if defined(HAS_MERGEUVROW_SSE2)
457 if (TestCpuFlag(kCpuHasSSE2)) {
458 MergeUVRow = MergeUVRow_Any_SSE2;
459 if (IS_ALIGNED(width, 16)) {
460 MergeUVRow = MergeUVRow_SSE2;
461 }
462 }
463 #endif
464 #if defined(HAS_MERGEUVROW_AVX2)
465 if (TestCpuFlag(kCpuHasAVX2)) {
466 MergeUVRow = MergeUVRow_Any_AVX2;
467 if (IS_ALIGNED(width, 32)) {
468 MergeUVRow = MergeUVRow_AVX2;
469 }
470 }
471 #endif
472 #if defined(HAS_MERGEUVROW_NEON)
473 if (TestCpuFlag(kCpuHasNEON)) {
474 MergeUVRow = MergeUVRow_Any_NEON;
475 if (IS_ALIGNED(width, 16)) {
476 MergeUVRow = MergeUVRow_NEON;
477 }
478 }
479 #endif
480 #if defined(HAS_MERGEUVROW_MMI)
481 if (TestCpuFlag(kCpuHasMMI)) {
482 MergeUVRow = MergeUVRow_Any_MMI;
483 if (IS_ALIGNED(width, 8)) {
484 MergeUVRow = MergeUVRow_MMI;
485 }
486 }
487 #endif
488 #if defined(HAS_MERGEUVROW_MSA)
489 if (TestCpuFlag(kCpuHasMSA)) {
490 MergeUVRow = MergeUVRow_Any_MSA;
491 if (IS_ALIGNED(width, 16)) {
492 MergeUVRow = MergeUVRow_MSA;
493 }
494 }
495 #endif
496
497 for (y = 0; y < height; ++y) {
498 // Merge a row of U and V into a row of UV.
499 MergeUVRow(src_u, src_v, dst_uv, width);
500 src_u += src_stride_u;
501 src_v += src_stride_v;
502 dst_uv += dst_stride_uv;
503 }
504 }
505
506 // Swap U and V channels in interleaved UV plane.
507 LIBYUV_API
SwapUVPlane(const uint8_t * src_uv,int src_stride_uv,uint8_t * dst_vu,int dst_stride_vu,int width,int height)508 void SwapUVPlane(const uint8_t* src_uv,
509 int src_stride_uv,
510 uint8_t* dst_vu,
511 int dst_stride_vu,
512 int width,
513 int height) {
514 int y;
515 void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) =
516 SwapUVRow_C;
517 // Negative height means invert the image.
518 if (height < 0) {
519 height = -height;
520 src_uv = src_uv + (height - 1) * src_stride_uv;
521 src_stride_uv = -src_stride_uv;
522 }
523 // Coalesce rows.
524 if (src_stride_uv == width * 2 && dst_stride_vu == width * 2) {
525 width *= height;
526 height = 1;
527 src_stride_uv = dst_stride_vu = 0;
528 }
529
530 #if defined(HAS_SWAPUVROW_SSSE3)
531 if (TestCpuFlag(kCpuHasSSSE3)) {
532 SwapUVRow = SwapUVRow_Any_SSSE3;
533 if (IS_ALIGNED(width, 16)) {
534 SwapUVRow = SwapUVRow_SSSE3;
535 }
536 }
537 #endif
538 #if defined(HAS_SWAPUVROW_AVX2)
539 if (TestCpuFlag(kCpuHasAVX2)) {
540 SwapUVRow = SwapUVRow_Any_AVX2;
541 if (IS_ALIGNED(width, 32)) {
542 SwapUVRow = SwapUVRow_AVX2;
543 }
544 }
545 #endif
546 #if defined(HAS_SWAPUVROW_NEON)
547 if (TestCpuFlag(kCpuHasNEON)) {
548 SwapUVRow = SwapUVRow_Any_NEON;
549 if (IS_ALIGNED(width, 16)) {
550 SwapUVRow = SwapUVRow_NEON;
551 }
552 }
553 #endif
554
555 for (y = 0; y < height; ++y) {
556 SwapUVRow(src_uv, dst_vu, width);
557 src_uv += src_stride_uv;
558 dst_vu += dst_stride_vu;
559 }
560 }
561
562 // Convert NV21 to NV12.
563 LIBYUV_API
NV21ToNV12(const uint8_t * src_y,int src_stride_y,const uint8_t * src_vu,int src_stride_vu,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_uv,int dst_stride_uv,int width,int height)564 int NV21ToNV12(const uint8_t* src_y,
565 int src_stride_y,
566 const uint8_t* src_vu,
567 int src_stride_vu,
568 uint8_t* dst_y,
569 int dst_stride_y,
570 uint8_t* dst_uv,
571 int dst_stride_uv,
572 int width,
573 int height) {
574 int halfwidth = (width + 1) >> 1;
575 int halfheight = (height + 1) >> 1;
576 if (!src_vu || !dst_uv || width <= 0 || height == 0) {
577 return -1;
578 }
579 if (dst_y) {
580 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
581 }
582
583 // Negative height means invert the image.
584 if (height < 0) {
585 height = -height;
586 halfheight = (height + 1) >> 1;
587 src_vu = src_vu + (halfheight - 1) * src_stride_vu;
588 src_stride_vu = -src_stride_vu;
589 }
590
591 SwapUVPlane(src_vu, src_stride_vu, dst_uv, dst_stride_uv, halfwidth,
592 halfheight);
593 return 0;
594 }
595
596 // Support function for NV12 etc RGB channels.
597 // Width and height are plane sizes (typically half pixel width).
598 LIBYUV_API
SplitRGBPlane(const uint8_t * src_rgb,int src_stride_rgb,uint8_t * dst_r,int dst_stride_r,uint8_t * dst_g,int dst_stride_g,uint8_t * dst_b,int dst_stride_b,int width,int height)599 void SplitRGBPlane(const uint8_t* src_rgb,
600 int src_stride_rgb,
601 uint8_t* dst_r,
602 int dst_stride_r,
603 uint8_t* dst_g,
604 int dst_stride_g,
605 uint8_t* dst_b,
606 int dst_stride_b,
607 int width,
608 int height) {
609 int y;
610 void (*SplitRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
611 uint8_t* dst_b, int width) = SplitRGBRow_C;
612 // Negative height means invert the image.
613 if (height < 0) {
614 height = -height;
615 dst_r = dst_r + (height - 1) * dst_stride_r;
616 dst_g = dst_g + (height - 1) * dst_stride_g;
617 dst_b = dst_b + (height - 1) * dst_stride_b;
618 dst_stride_r = -dst_stride_r;
619 dst_stride_g = -dst_stride_g;
620 dst_stride_b = -dst_stride_b;
621 }
622 // Coalesce rows.
623 if (src_stride_rgb == width * 3 && dst_stride_r == width &&
624 dst_stride_g == width && dst_stride_b == width) {
625 width *= height;
626 height = 1;
627 src_stride_rgb = dst_stride_r = dst_stride_g = dst_stride_b = 0;
628 }
629 #if defined(HAS_SPLITRGBROW_SSSE3)
630 if (TestCpuFlag(kCpuHasSSSE3)) {
631 SplitRGBRow = SplitRGBRow_Any_SSSE3;
632 if (IS_ALIGNED(width, 16)) {
633 SplitRGBRow = SplitRGBRow_SSSE3;
634 }
635 }
636 #endif
637 #if defined(HAS_SPLITRGBROW_MMI)
638 if (TestCpuFlag(kCpuHasMMI)) {
639 SplitRGBRow = SplitRGBRow_Any_MMI;
640 if (IS_ALIGNED(width, 4)) {
641 SplitRGBRow = SplitRGBRow_MMI;
642 }
643 }
644 #endif
645 #if defined(HAS_SPLITRGBROW_NEON)
646 if (TestCpuFlag(kCpuHasNEON)) {
647 SplitRGBRow = SplitRGBRow_Any_NEON;
648 if (IS_ALIGNED(width, 16)) {
649 SplitRGBRow = SplitRGBRow_NEON;
650 }
651 }
652 #endif
653
654 for (y = 0; y < height; ++y) {
655 // Copy a row of RGB.
656 SplitRGBRow(src_rgb, dst_r, dst_g, dst_b, width);
657 dst_r += dst_stride_r;
658 dst_g += dst_stride_g;
659 dst_b += dst_stride_b;
660 src_rgb += src_stride_rgb;
661 }
662 }
663
664 LIBYUV_API
MergeRGBPlane(const uint8_t * src_r,int src_stride_r,const uint8_t * src_g,int src_stride_g,const uint8_t * src_b,int src_stride_b,uint8_t * dst_rgb,int dst_stride_rgb,int width,int height)665 void MergeRGBPlane(const uint8_t* src_r,
666 int src_stride_r,
667 const uint8_t* src_g,
668 int src_stride_g,
669 const uint8_t* src_b,
670 int src_stride_b,
671 uint8_t* dst_rgb,
672 int dst_stride_rgb,
673 int width,
674 int height) {
675 int y;
676 void (*MergeRGBRow)(const uint8_t* src_r, const uint8_t* src_g,
677 const uint8_t* src_b, uint8_t* dst_rgb, int width) =
678 MergeRGBRow_C;
679 // Coalesce rows.
680 // Negative height means invert the image.
681 if (height < 0) {
682 height = -height;
683 dst_rgb = dst_rgb + (height - 1) * dst_stride_rgb;
684 dst_stride_rgb = -dst_stride_rgb;
685 }
686 // Coalesce rows.
687 if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
688 dst_stride_rgb == width * 3) {
689 width *= height;
690 height = 1;
691 src_stride_r = src_stride_g = src_stride_b = dst_stride_rgb = 0;
692 }
693 #if defined(HAS_MERGERGBROW_SSSE3)
694 if (TestCpuFlag(kCpuHasSSSE3)) {
695 MergeRGBRow = MergeRGBRow_Any_SSSE3;
696 if (IS_ALIGNED(width, 16)) {
697 MergeRGBRow = MergeRGBRow_SSSE3;
698 }
699 }
700 #endif
701 #if defined(HAS_MERGERGBROW_NEON)
702 if (TestCpuFlag(kCpuHasNEON)) {
703 MergeRGBRow = MergeRGBRow_Any_NEON;
704 if (IS_ALIGNED(width, 16)) {
705 MergeRGBRow = MergeRGBRow_NEON;
706 }
707 }
708 #endif
709 #if defined(HAS_MERGERGBROW_MMI)
710 if (TestCpuFlag(kCpuHasMMI)) {
711 MergeRGBRow = MergeRGBRow_Any_MMI;
712 if (IS_ALIGNED(width, 8)) {
713 MergeRGBRow = MergeRGBRow_MMI;
714 }
715 }
716 #endif
717
718 for (y = 0; y < height; ++y) {
719 // Merge a row of U and V into a row of RGB.
720 MergeRGBRow(src_r, src_g, src_b, dst_rgb, width);
721 src_r += src_stride_r;
722 src_g += src_stride_g;
723 src_b += src_stride_b;
724 dst_rgb += dst_stride_rgb;
725 }
726 }
727
728 // Convert YUY2 to I422.
729 LIBYUV_API
YUY2ToI422(const uint8_t * src_yuy2,int src_stride_yuy2,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)730 int YUY2ToI422(const uint8_t* src_yuy2,
731 int src_stride_yuy2,
732 uint8_t* dst_y,
733 int dst_stride_y,
734 uint8_t* dst_u,
735 int dst_stride_u,
736 uint8_t* dst_v,
737 int dst_stride_v,
738 int width,
739 int height) {
740 int y;
741 void (*YUY2ToUV422Row)(const uint8_t* src_yuy2, uint8_t* dst_u,
742 uint8_t* dst_v, int width) = YUY2ToUV422Row_C;
743 void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) =
744 YUY2ToYRow_C;
745 if (!src_yuy2 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
746 return -1;
747 }
748 // Negative height means invert the image.
749 if (height < 0) {
750 height = -height;
751 src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
752 src_stride_yuy2 = -src_stride_yuy2;
753 }
754 // Coalesce rows.
755 if (src_stride_yuy2 == width * 2 && dst_stride_y == width &&
756 dst_stride_u * 2 == width && dst_stride_v * 2 == width &&
757 width * height <= 32768) {
758 width *= height;
759 height = 1;
760 src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0;
761 }
762 #if defined(HAS_YUY2TOYROW_SSE2)
763 if (TestCpuFlag(kCpuHasSSE2)) {
764 YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
765 YUY2ToYRow = YUY2ToYRow_Any_SSE2;
766 if (IS_ALIGNED(width, 16)) {
767 YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
768 YUY2ToYRow = YUY2ToYRow_SSE2;
769 }
770 }
771 #endif
772 #if defined(HAS_YUY2TOYROW_AVX2)
773 if (TestCpuFlag(kCpuHasAVX2)) {
774 YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2;
775 YUY2ToYRow = YUY2ToYRow_Any_AVX2;
776 if (IS_ALIGNED(width, 32)) {
777 YUY2ToUV422Row = YUY2ToUV422Row_AVX2;
778 YUY2ToYRow = YUY2ToYRow_AVX2;
779 }
780 }
781 #endif
782 #if defined(HAS_YUY2TOYROW_NEON)
783 if (TestCpuFlag(kCpuHasNEON)) {
784 YUY2ToYRow = YUY2ToYRow_Any_NEON;
785 YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
786 if (IS_ALIGNED(width, 16)) {
787 YUY2ToYRow = YUY2ToYRow_NEON;
788 YUY2ToUV422Row = YUY2ToUV422Row_NEON;
789 }
790 }
791 #endif
792 #if defined(HAS_YUY2TOYROW_MMI) && defined(HAS_YUY2TOUV422ROW_MMI)
793 if (TestCpuFlag(kCpuHasMMI)) {
794 YUY2ToYRow = YUY2ToYRow_Any_MMI;
795 YUY2ToUV422Row = YUY2ToUV422Row_Any_MMI;
796 if (IS_ALIGNED(width, 8)) {
797 YUY2ToYRow = YUY2ToYRow_MMI;
798 YUY2ToUV422Row = YUY2ToUV422Row_MMI;
799 }
800 }
801 #endif
802 #if defined(HAS_YUY2TOYROW_MSA) && defined(HAS_YUY2TOUV422ROW_MSA)
803 if (TestCpuFlag(kCpuHasMSA)) {
804 YUY2ToYRow = YUY2ToYRow_Any_MSA;
805 YUY2ToUV422Row = YUY2ToUV422Row_Any_MSA;
806 if (IS_ALIGNED(width, 32)) {
807 YUY2ToYRow = YUY2ToYRow_MSA;
808 YUY2ToUV422Row = YUY2ToUV422Row_MSA;
809 }
810 }
811 #endif
812
813 for (y = 0; y < height; ++y) {
814 YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
815 YUY2ToYRow(src_yuy2, dst_y, width);
816 src_yuy2 += src_stride_yuy2;
817 dst_y += dst_stride_y;
818 dst_u += dst_stride_u;
819 dst_v += dst_stride_v;
820 }
821 return 0;
822 }
823
824 // Convert UYVY to I422.
825 LIBYUV_API
UYVYToI422(const uint8_t * src_uyvy,int src_stride_uyvy,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)826 int UYVYToI422(const uint8_t* src_uyvy,
827 int src_stride_uyvy,
828 uint8_t* dst_y,
829 int dst_stride_y,
830 uint8_t* dst_u,
831 int dst_stride_u,
832 uint8_t* dst_v,
833 int dst_stride_v,
834 int width,
835 int height) {
836 int y;
837 void (*UYVYToUV422Row)(const uint8_t* src_uyvy, uint8_t* dst_u,
838 uint8_t* dst_v, int width) = UYVYToUV422Row_C;
839 void (*UYVYToYRow)(const uint8_t* src_uyvy, uint8_t* dst_y, int width) =
840 UYVYToYRow_C;
841 if (!src_uyvy || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
842 return -1;
843 }
844 // Negative height means invert the image.
845 if (height < 0) {
846 height = -height;
847 src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
848 src_stride_uyvy = -src_stride_uyvy;
849 }
850 // Coalesce rows.
851 if (src_stride_uyvy == width * 2 && dst_stride_y == width &&
852 dst_stride_u * 2 == width && dst_stride_v * 2 == width &&
853 width * height <= 32768) {
854 width *= height;
855 height = 1;
856 src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0;
857 }
858 #if defined(HAS_UYVYTOYROW_SSE2)
859 if (TestCpuFlag(kCpuHasSSE2)) {
860 UYVYToUV422Row = UYVYToUV422Row_Any_SSE2;
861 UYVYToYRow = UYVYToYRow_Any_SSE2;
862 if (IS_ALIGNED(width, 16)) {
863 UYVYToUV422Row = UYVYToUV422Row_SSE2;
864 UYVYToYRow = UYVYToYRow_SSE2;
865 }
866 }
867 #endif
868 #if defined(HAS_UYVYTOYROW_AVX2)
869 if (TestCpuFlag(kCpuHasAVX2)) {
870 UYVYToUV422Row = UYVYToUV422Row_Any_AVX2;
871 UYVYToYRow = UYVYToYRow_Any_AVX2;
872 if (IS_ALIGNED(width, 32)) {
873 UYVYToUV422Row = UYVYToUV422Row_AVX2;
874 UYVYToYRow = UYVYToYRow_AVX2;
875 }
876 }
877 #endif
878 #if defined(HAS_UYVYTOYROW_NEON)
879 if (TestCpuFlag(kCpuHasNEON)) {
880 UYVYToYRow = UYVYToYRow_Any_NEON;
881 UYVYToUV422Row = UYVYToUV422Row_Any_NEON;
882 if (IS_ALIGNED(width, 16)) {
883 UYVYToYRow = UYVYToYRow_NEON;
884 UYVYToUV422Row = UYVYToUV422Row_NEON;
885 }
886 }
887 #endif
888 #if defined(HAS_UYVYTOYROW_MMI) && defined(HAS_UYVYTOUV422ROW_MMI)
889 if (TestCpuFlag(kCpuHasMMI)) {
890 UYVYToYRow = UYVYToYRow_Any_MMI;
891 UYVYToUV422Row = UYVYToUV422Row_Any_MMI;
892 if (IS_ALIGNED(width, 16)) {
893 UYVYToYRow = UYVYToYRow_MMI;
894 UYVYToUV422Row = UYVYToUV422Row_MMI;
895 }
896 }
897 #endif
898 #if defined(HAS_UYVYTOYROW_MSA) && defined(HAS_UYVYTOUV422ROW_MSA)
899 if (TestCpuFlag(kCpuHasMSA)) {
900 UYVYToYRow = UYVYToYRow_Any_MSA;
901 UYVYToUV422Row = UYVYToUV422Row_Any_MSA;
902 if (IS_ALIGNED(width, 32)) {
903 UYVYToYRow = UYVYToYRow_MSA;
904 UYVYToUV422Row = UYVYToUV422Row_MSA;
905 }
906 }
907 #endif
908
909 for (y = 0; y < height; ++y) {
910 UYVYToUV422Row(src_uyvy, dst_u, dst_v, width);
911 UYVYToYRow(src_uyvy, dst_y, width);
912 src_uyvy += src_stride_uyvy;
913 dst_y += dst_stride_y;
914 dst_u += dst_stride_u;
915 dst_v += dst_stride_v;
916 }
917 return 0;
918 }
919
920 // Convert YUY2 to Y.
921 LIBYUV_API
YUY2ToY(const uint8_t * src_yuy2,int src_stride_yuy2,uint8_t * dst_y,int dst_stride_y,int width,int height)922 int YUY2ToY(const uint8_t* src_yuy2,
923 int src_stride_yuy2,
924 uint8_t* dst_y,
925 int dst_stride_y,
926 int width,
927 int height) {
928 int y;
929 void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) =
930 YUY2ToYRow_C;
931 if (!src_yuy2 || !dst_y || width <= 0 || height == 0) {
932 return -1;
933 }
934 // Negative height means invert the image.
935 if (height < 0) {
936 height = -height;
937 src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
938 src_stride_yuy2 = -src_stride_yuy2;
939 }
940 // Coalesce rows.
941 if (src_stride_yuy2 == width * 2 && dst_stride_y == width) {
942 width *= height;
943 height = 1;
944 src_stride_yuy2 = dst_stride_y = 0;
945 }
946 #if defined(HAS_YUY2TOYROW_SSE2)
947 if (TestCpuFlag(kCpuHasSSE2)) {
948 YUY2ToYRow = YUY2ToYRow_Any_SSE2;
949 if (IS_ALIGNED(width, 16)) {
950 YUY2ToYRow = YUY2ToYRow_SSE2;
951 }
952 }
953 #endif
954 #if defined(HAS_YUY2TOYROW_AVX2)
955 if (TestCpuFlag(kCpuHasAVX2)) {
956 YUY2ToYRow = YUY2ToYRow_Any_AVX2;
957 if (IS_ALIGNED(width, 32)) {
958 YUY2ToYRow = YUY2ToYRow_AVX2;
959 }
960 }
961 #endif
962 #if defined(HAS_YUY2TOYROW_NEON)
963 if (TestCpuFlag(kCpuHasNEON)) {
964 YUY2ToYRow = YUY2ToYRow_Any_NEON;
965 if (IS_ALIGNED(width, 16)) {
966 YUY2ToYRow = YUY2ToYRow_NEON;
967 }
968 }
969 #endif
970 #if defined(HAS_YUY2TOYROW_MMI)
971 if (TestCpuFlag(kCpuHasMMI)) {
972 YUY2ToYRow = YUY2ToYRow_Any_MMI;
973 if (IS_ALIGNED(width, 8)) {
974 YUY2ToYRow = YUY2ToYRow_MMI;
975 }
976 }
977 #endif
978 #if defined(HAS_YUY2TOYROW_MSA)
979 if (TestCpuFlag(kCpuHasMSA)) {
980 YUY2ToYRow = YUY2ToYRow_Any_MSA;
981 if (IS_ALIGNED(width, 32)) {
982 YUY2ToYRow = YUY2ToYRow_MSA;
983 }
984 }
985 #endif
986
987 for (y = 0; y < height; ++y) {
988 YUY2ToYRow(src_yuy2, dst_y, width);
989 src_yuy2 += src_stride_yuy2;
990 dst_y += dst_stride_y;
991 }
992 return 0;
993 }
994
995 // Mirror a plane of data.
996 // See Also I400Mirror
997 LIBYUV_API
MirrorPlane(const uint8_t * src_y,int src_stride_y,uint8_t * dst_y,int dst_stride_y,int width,int height)998 void MirrorPlane(const uint8_t* src_y,
999 int src_stride_y,
1000 uint8_t* dst_y,
1001 int dst_stride_y,
1002 int width,
1003 int height) {
1004 int y;
1005 void (*MirrorRow)(const uint8_t* src, uint8_t* dst, int width) = MirrorRow_C;
1006 // Negative height means invert the image.
1007 if (height < 0) {
1008 height = -height;
1009 src_y = src_y + (height - 1) * src_stride_y;
1010 src_stride_y = -src_stride_y;
1011 }
1012 #if defined(HAS_MIRRORROW_NEON)
1013 if (TestCpuFlag(kCpuHasNEON)) {
1014 MirrorRow = MirrorRow_Any_NEON;
1015 if (IS_ALIGNED(width, 32)) {
1016 MirrorRow = MirrorRow_NEON;
1017 }
1018 }
1019 #endif
1020 #if defined(HAS_MIRRORROW_SSSE3)
1021 if (TestCpuFlag(kCpuHasSSSE3)) {
1022 MirrorRow = MirrorRow_Any_SSSE3;
1023 if (IS_ALIGNED(width, 16)) {
1024 MirrorRow = MirrorRow_SSSE3;
1025 }
1026 }
1027 #endif
1028 #if defined(HAS_MIRRORROW_AVX2)
1029 if (TestCpuFlag(kCpuHasAVX2)) {
1030 MirrorRow = MirrorRow_Any_AVX2;
1031 if (IS_ALIGNED(width, 32)) {
1032 MirrorRow = MirrorRow_AVX2;
1033 }
1034 }
1035 #endif
1036 #if defined(HAS_MIRRORROW_MMI)
1037 if (TestCpuFlag(kCpuHasMMI)) {
1038 MirrorRow = MirrorRow_Any_MMI;
1039 if (IS_ALIGNED(width, 8)) {
1040 MirrorRow = MirrorRow_MMI;
1041 }
1042 }
1043 #endif
1044 #if defined(HAS_MIRRORROW_MSA)
1045 if (TestCpuFlag(kCpuHasMSA)) {
1046 MirrorRow = MirrorRow_Any_MSA;
1047 if (IS_ALIGNED(width, 64)) {
1048 MirrorRow = MirrorRow_MSA;
1049 }
1050 }
1051 #endif
1052
1053 // Mirror plane
1054 for (y = 0; y < height; ++y) {
1055 MirrorRow(src_y, dst_y, width);
1056 src_y += src_stride_y;
1057 dst_y += dst_stride_y;
1058 }
1059 }
1060
1061 // Mirror a plane of UV data.
1062 LIBYUV_API
MirrorUVPlane(const uint8_t * src_uv,int src_stride_uv,uint8_t * dst_uv,int dst_stride_uv,int width,int height)1063 void MirrorUVPlane(const uint8_t* src_uv,
1064 int src_stride_uv,
1065 uint8_t* dst_uv,
1066 int dst_stride_uv,
1067 int width,
1068 int height) {
1069 int y;
1070 void (*MirrorUVRow)(const uint8_t* src, uint8_t* dst, int width) =
1071 MirrorUVRow_C;
1072 // Negative height means invert the image.
1073 if (height < 0) {
1074 height = -height;
1075 src_uv = src_uv + (height - 1) * src_stride_uv;
1076 src_stride_uv = -src_stride_uv;
1077 }
1078 #if defined(HAS_MIRRORUVROW_NEON)
1079 if (TestCpuFlag(kCpuHasNEON)) {
1080 MirrorUVRow = MirrorUVRow_Any_NEON;
1081 if (IS_ALIGNED(width, 32)) {
1082 MirrorUVRow = MirrorUVRow_NEON;
1083 }
1084 }
1085 #endif
1086 #if defined(HAS_MIRRORUVROW_SSSE3)
1087 if (TestCpuFlag(kCpuHasSSSE3)) {
1088 MirrorUVRow = MirrorUVRow_Any_SSSE3;
1089 if (IS_ALIGNED(width, 8)) {
1090 MirrorUVRow = MirrorUVRow_SSSE3;
1091 }
1092 }
1093 #endif
1094 #if defined(HAS_MIRRORUVROW_AVX2)
1095 if (TestCpuFlag(kCpuHasAVX2)) {
1096 MirrorUVRow = MirrorUVRow_Any_AVX2;
1097 if (IS_ALIGNED(width, 16)) {
1098 MirrorUVRow = MirrorUVRow_AVX2;
1099 }
1100 }
1101 #endif
1102 #if defined(HAS_MIRRORUVROW_MSA)
1103 if (TestCpuFlag(kCpuHasMSA)) {
1104 MirrorUVRow = MirrorUVRow_Any_MSA;
1105 if (IS_ALIGNED(width, 8)) {
1106 MirrorUVRow = MirrorUVRow_MSA;
1107 }
1108 }
1109 #endif
1110
1111 // MirrorUV plane
1112 for (y = 0; y < height; ++y) {
1113 MirrorUVRow(src_uv, dst_uv, width);
1114 src_uv += src_stride_uv;
1115 dst_uv += dst_stride_uv;
1116 }
1117 }
1118
1119 // Mirror I400 with optional flipping
1120 LIBYUV_API
I400Mirror(const uint8_t * src_y,int src_stride_y,uint8_t * dst_y,int dst_stride_y,int width,int height)1121 int I400Mirror(const uint8_t* src_y,
1122 int src_stride_y,
1123 uint8_t* dst_y,
1124 int dst_stride_y,
1125 int width,
1126 int height) {
1127 if (!src_y || !dst_y || width <= 0 || height == 0) {
1128 return -1;
1129 }
1130 // Negative height means invert the image.
1131 if (height < 0) {
1132 height = -height;
1133 src_y = src_y + (height - 1) * src_stride_y;
1134 src_stride_y = -src_stride_y;
1135 }
1136
1137 MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
1138 return 0;
1139 }
1140
1141 // Mirror I420 with optional flipping
1142 LIBYUV_API
I420Mirror(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)1143 int I420Mirror(const uint8_t* src_y,
1144 int src_stride_y,
1145 const uint8_t* src_u,
1146 int src_stride_u,
1147 const uint8_t* src_v,
1148 int src_stride_v,
1149 uint8_t* dst_y,
1150 int dst_stride_y,
1151 uint8_t* dst_u,
1152 int dst_stride_u,
1153 uint8_t* dst_v,
1154 int dst_stride_v,
1155 int width,
1156 int height) {
1157 int halfwidth = (width + 1) >> 1;
1158 int halfheight = (height + 1) >> 1;
1159 if (!src_y || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
1160 height == 0) {
1161 return -1;
1162 }
1163 // Negative height means invert the image.
1164 if (height < 0) {
1165 height = -height;
1166 halfheight = (height + 1) >> 1;
1167 src_y = src_y + (height - 1) * src_stride_y;
1168 src_u = src_u + (halfheight - 1) * src_stride_u;
1169 src_v = src_v + (halfheight - 1) * src_stride_v;
1170 src_stride_y = -src_stride_y;
1171 src_stride_u = -src_stride_u;
1172 src_stride_v = -src_stride_v;
1173 }
1174
1175 if (dst_y) {
1176 MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
1177 }
1178 MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
1179 MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
1180 return 0;
1181 }
1182
1183 // NV12 mirror.
1184 LIBYUV_API
NV12Mirror(const uint8_t * src_y,int src_stride_y,const uint8_t * src_uv,int src_stride_uv,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_uv,int dst_stride_uv,int width,int height)1185 int NV12Mirror(const uint8_t* src_y,
1186 int src_stride_y,
1187 const uint8_t* src_uv,
1188 int src_stride_uv,
1189 uint8_t* dst_y,
1190 int dst_stride_y,
1191 uint8_t* dst_uv,
1192 int dst_stride_uv,
1193 int width,
1194 int height) {
1195 int halfwidth = (width + 1) >> 1;
1196 int halfheight = (height + 1) >> 1;
1197 if (!src_y || !src_uv || !dst_uv || width <= 0 || height == 0) {
1198 return -1;
1199 }
1200 // Negative height means invert the image.
1201 if (height < 0) {
1202 height = -height;
1203 halfheight = (height + 1) >> 1;
1204 src_y = src_y + (height - 1) * src_stride_y;
1205 src_uv = src_uv + (halfheight - 1) * src_stride_uv;
1206 src_stride_y = -src_stride_y;
1207 src_stride_uv = -src_stride_uv;
1208 }
1209
1210 if (dst_y) {
1211 MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
1212 }
1213 MirrorUVPlane(src_uv, src_stride_uv, dst_uv, dst_stride_uv, halfwidth,
1214 halfheight);
1215 return 0;
1216 }
1217
1218 // ARGB mirror.
1219 LIBYUV_API
ARGBMirror(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)1220 int ARGBMirror(const uint8_t* src_argb,
1221 int src_stride_argb,
1222 uint8_t* dst_argb,
1223 int dst_stride_argb,
1224 int width,
1225 int height) {
1226 int y;
1227 void (*ARGBMirrorRow)(const uint8_t* src, uint8_t* dst, int width) =
1228 ARGBMirrorRow_C;
1229 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1230 return -1;
1231 }
1232 // Negative height means invert the image.
1233 if (height < 0) {
1234 height = -height;
1235 src_argb = src_argb + (height - 1) * src_stride_argb;
1236 src_stride_argb = -src_stride_argb;
1237 }
1238 #if defined(HAS_ARGBMIRRORROW_NEON)
1239 if (TestCpuFlag(kCpuHasNEON)) {
1240 ARGBMirrorRow = ARGBMirrorRow_Any_NEON;
1241 if (IS_ALIGNED(width, 8)) {
1242 ARGBMirrorRow = ARGBMirrorRow_NEON;
1243 }
1244 }
1245 #endif
1246 #if defined(HAS_ARGBMIRRORROW_SSE2)
1247 if (TestCpuFlag(kCpuHasSSE2)) {
1248 ARGBMirrorRow = ARGBMirrorRow_Any_SSE2;
1249 if (IS_ALIGNED(width, 4)) {
1250 ARGBMirrorRow = ARGBMirrorRow_SSE2;
1251 }
1252 }
1253 #endif
1254 #if defined(HAS_ARGBMIRRORROW_AVX2)
1255 if (TestCpuFlag(kCpuHasAVX2)) {
1256 ARGBMirrorRow = ARGBMirrorRow_Any_AVX2;
1257 if (IS_ALIGNED(width, 8)) {
1258 ARGBMirrorRow = ARGBMirrorRow_AVX2;
1259 }
1260 }
1261 #endif
1262 #if defined(HAS_ARGBMIRRORROW_MMI)
1263 if (TestCpuFlag(kCpuHasMMI)) {
1264 ARGBMirrorRow = ARGBMirrorRow_Any_MMI;
1265 if (IS_ALIGNED(width, 2)) {
1266 ARGBMirrorRow = ARGBMirrorRow_MMI;
1267 }
1268 }
1269 #endif
1270 #if defined(HAS_ARGBMIRRORROW_MSA)
1271 if (TestCpuFlag(kCpuHasMSA)) {
1272 ARGBMirrorRow = ARGBMirrorRow_Any_MSA;
1273 if (IS_ALIGNED(width, 16)) {
1274 ARGBMirrorRow = ARGBMirrorRow_MSA;
1275 }
1276 }
1277 #endif
1278
1279 // Mirror plane
1280 for (y = 0; y < height; ++y) {
1281 ARGBMirrorRow(src_argb, dst_argb, width);
1282 src_argb += src_stride_argb;
1283 dst_argb += dst_stride_argb;
1284 }
1285 return 0;
1286 }
1287
1288 // RGB24 mirror.
1289 LIBYUV_API
RGB24Mirror(const uint8_t * src_rgb24,int src_stride_rgb24,uint8_t * dst_rgb24,int dst_stride_rgb24,int width,int height)1290 int RGB24Mirror(const uint8_t* src_rgb24,
1291 int src_stride_rgb24,
1292 uint8_t* dst_rgb24,
1293 int dst_stride_rgb24,
1294 int width,
1295 int height) {
1296 int y;
1297 void (*RGB24MirrorRow)(const uint8_t* src, uint8_t* dst, int width) =
1298 RGB24MirrorRow_C;
1299 if (!src_rgb24 || !dst_rgb24 || width <= 0 || height == 0) {
1300 return -1;
1301 }
1302 // Negative height means invert the image.
1303 if (height < 0) {
1304 height = -height;
1305 src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
1306 src_stride_rgb24 = -src_stride_rgb24;
1307 }
1308 #if defined(HAS_RGB24MIRRORROW_NEON)
1309 if (TestCpuFlag(kCpuHasNEON)) {
1310 RGB24MirrorRow = RGB24MirrorRow_Any_NEON;
1311 if (IS_ALIGNED(width, 16)) {
1312 RGB24MirrorRow = RGB24MirrorRow_NEON;
1313 }
1314 }
1315 #endif
1316 #if defined(HAS_RGB24MIRRORROW_SSSE3)
1317 if (TestCpuFlag(kCpuHasSSSE3)) {
1318 RGB24MirrorRow = RGB24MirrorRow_Any_SSSE3;
1319 if (IS_ALIGNED(width, 16)) {
1320 RGB24MirrorRow = RGB24MirrorRow_SSSE3;
1321 }
1322 }
1323 #endif
1324
1325 // Mirror plane
1326 for (y = 0; y < height; ++y) {
1327 RGB24MirrorRow(src_rgb24, dst_rgb24, width);
1328 src_rgb24 += src_stride_rgb24;
1329 dst_rgb24 += dst_stride_rgb24;
1330 }
1331 return 0;
1332 }
1333
1334 // Get a blender that optimized for the CPU and pixel count.
1335 // As there are 6 blenders to choose from, the caller should try to use
1336 // the same blend function for all pixels if possible.
1337 LIBYUV_API
GetARGBBlend()1338 ARGBBlendRow GetARGBBlend() {
1339 void (*ARGBBlendRow)(const uint8_t* src_argb, const uint8_t* src_argb1,
1340 uint8_t* dst_argb, int width) = ARGBBlendRow_C;
1341 #if defined(HAS_ARGBBLENDROW_SSSE3)
1342 if (TestCpuFlag(kCpuHasSSSE3)) {
1343 ARGBBlendRow = ARGBBlendRow_SSSE3;
1344 return ARGBBlendRow;
1345 }
1346 #endif
1347 #if defined(HAS_ARGBBLENDROW_NEON)
1348 if (TestCpuFlag(kCpuHasNEON)) {
1349 ARGBBlendRow = ARGBBlendRow_NEON;
1350 }
1351 #endif
1352 #if defined(HAS_ARGBBLENDROW_MMI)
1353 if (TestCpuFlag(kCpuHasMMI)) {
1354 ARGBBlendRow = ARGBBlendRow_MMI;
1355 }
1356 #endif
1357 #if defined(HAS_ARGBBLENDROW_MSA)
1358 if (TestCpuFlag(kCpuHasMSA)) {
1359 ARGBBlendRow = ARGBBlendRow_MSA;
1360 }
1361 #endif
1362 return ARGBBlendRow;
1363 }
1364
1365 // Alpha Blend 2 ARGB images and store to destination.
1366 LIBYUV_API
ARGBBlend(const uint8_t * src_argb0,int src_stride_argb0,const uint8_t * src_argb1,int src_stride_argb1,uint8_t * dst_argb,int dst_stride_argb,int width,int height)1367 int ARGBBlend(const uint8_t* src_argb0,
1368 int src_stride_argb0,
1369 const uint8_t* src_argb1,
1370 int src_stride_argb1,
1371 uint8_t* dst_argb,
1372 int dst_stride_argb,
1373 int width,
1374 int height) {
1375 int y;
1376 void (*ARGBBlendRow)(const uint8_t* src_argb, const uint8_t* src_argb1,
1377 uint8_t* dst_argb, int width) = GetARGBBlend();
1378 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
1379 return -1;
1380 }
1381 // Negative height means invert the image.
1382 if (height < 0) {
1383 height = -height;
1384 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
1385 dst_stride_argb = -dst_stride_argb;
1386 }
1387 // Coalesce rows.
1388 if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
1389 dst_stride_argb == width * 4) {
1390 width *= height;
1391 height = 1;
1392 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
1393 }
1394
1395 for (y = 0; y < height; ++y) {
1396 ARGBBlendRow(src_argb0, src_argb1, dst_argb, width);
1397 src_argb0 += src_stride_argb0;
1398 src_argb1 += src_stride_argb1;
1399 dst_argb += dst_stride_argb;
1400 }
1401 return 0;
1402 }
1403
1404 // Alpha Blend plane and store to destination.
1405 LIBYUV_API
BlendPlane(const uint8_t * src_y0,int src_stride_y0,const uint8_t * src_y1,int src_stride_y1,const uint8_t * alpha,int alpha_stride,uint8_t * dst_y,int dst_stride_y,int width,int height)1406 int BlendPlane(const uint8_t* src_y0,
1407 int src_stride_y0,
1408 const uint8_t* src_y1,
1409 int src_stride_y1,
1410 const uint8_t* alpha,
1411 int alpha_stride,
1412 uint8_t* dst_y,
1413 int dst_stride_y,
1414 int width,
1415 int height) {
1416 int y;
1417 void (*BlendPlaneRow)(const uint8_t* src0, const uint8_t* src1,
1418 const uint8_t* alpha, uint8_t* dst, int width) =
1419 BlendPlaneRow_C;
1420 if (!src_y0 || !src_y1 || !alpha || !dst_y || width <= 0 || height == 0) {
1421 return -1;
1422 }
1423 // Negative height means invert the image.
1424 if (height < 0) {
1425 height = -height;
1426 dst_y = dst_y + (height - 1) * dst_stride_y;
1427 dst_stride_y = -dst_stride_y;
1428 }
1429
1430 // Coalesce rows for Y plane.
1431 if (src_stride_y0 == width && src_stride_y1 == width &&
1432 alpha_stride == width && dst_stride_y == width) {
1433 width *= height;
1434 height = 1;
1435 src_stride_y0 = src_stride_y1 = alpha_stride = dst_stride_y = 0;
1436 }
1437
1438 #if defined(HAS_BLENDPLANEROW_SSSE3)
1439 if (TestCpuFlag(kCpuHasSSSE3)) {
1440 BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
1441 if (IS_ALIGNED(width, 8)) {
1442 BlendPlaneRow = BlendPlaneRow_SSSE3;
1443 }
1444 }
1445 #endif
1446 #if defined(HAS_BLENDPLANEROW_AVX2)
1447 if (TestCpuFlag(kCpuHasAVX2)) {
1448 BlendPlaneRow = BlendPlaneRow_Any_AVX2;
1449 if (IS_ALIGNED(width, 32)) {
1450 BlendPlaneRow = BlendPlaneRow_AVX2;
1451 }
1452 }
1453 #endif
1454 #if defined(HAS_BLENDPLANEROW_MMI)
1455 if (TestCpuFlag(kCpuHasMMI)) {
1456 BlendPlaneRow = BlendPlaneRow_Any_MMI;
1457 if (IS_ALIGNED(width, 8)) {
1458 BlendPlaneRow = BlendPlaneRow_MMI;
1459 }
1460 }
1461 #endif
1462
1463 for (y = 0; y < height; ++y) {
1464 BlendPlaneRow(src_y0, src_y1, alpha, dst_y, width);
1465 src_y0 += src_stride_y0;
1466 src_y1 += src_stride_y1;
1467 alpha += alpha_stride;
1468 dst_y += dst_stride_y;
1469 }
1470 return 0;
1471 }
1472
1473 #define MAXTWIDTH 2048
1474 // Alpha Blend YUV images and store to destination.
1475 LIBYUV_API
I420Blend(const uint8_t * src_y0,int src_stride_y0,const uint8_t * src_u0,int src_stride_u0,const uint8_t * src_v0,int src_stride_v0,const uint8_t * src_y1,int src_stride_y1,const uint8_t * src_u1,int src_stride_u1,const uint8_t * src_v1,int src_stride_v1,const uint8_t * alpha,int alpha_stride,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)1476 int I420Blend(const uint8_t* src_y0,
1477 int src_stride_y0,
1478 const uint8_t* src_u0,
1479 int src_stride_u0,
1480 const uint8_t* src_v0,
1481 int src_stride_v0,
1482 const uint8_t* src_y1,
1483 int src_stride_y1,
1484 const uint8_t* src_u1,
1485 int src_stride_u1,
1486 const uint8_t* src_v1,
1487 int src_stride_v1,
1488 const uint8_t* alpha,
1489 int alpha_stride,
1490 uint8_t* dst_y,
1491 int dst_stride_y,
1492 uint8_t* dst_u,
1493 int dst_stride_u,
1494 uint8_t* dst_v,
1495 int dst_stride_v,
1496 int width,
1497 int height) {
1498 int y;
1499 // Half width/height for UV.
1500 int halfwidth = (width + 1) >> 1;
1501 void (*BlendPlaneRow)(const uint8_t* src0, const uint8_t* src1,
1502 const uint8_t* alpha, uint8_t* dst, int width) =
1503 BlendPlaneRow_C;
1504 void (*ScaleRowDown2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
1505 uint8_t* dst_ptr, int dst_width) = ScaleRowDown2Box_C;
1506 if (!src_y0 || !src_u0 || !src_v0 || !src_y1 || !src_u1 || !src_v1 ||
1507 !alpha || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
1508 return -1;
1509 }
1510
1511 // Negative height means invert the image.
1512 if (height < 0) {
1513 height = -height;
1514 dst_y = dst_y + (height - 1) * dst_stride_y;
1515 dst_stride_y = -dst_stride_y;
1516 }
1517
1518 // Blend Y plane.
1519 BlendPlane(src_y0, src_stride_y0, src_y1, src_stride_y1, alpha, alpha_stride,
1520 dst_y, dst_stride_y, width, height);
1521
1522 #if defined(HAS_BLENDPLANEROW_SSSE3)
1523 if (TestCpuFlag(kCpuHasSSSE3)) {
1524 BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
1525 if (IS_ALIGNED(halfwidth, 8)) {
1526 BlendPlaneRow = BlendPlaneRow_SSSE3;
1527 }
1528 }
1529 #endif
1530 #if defined(HAS_BLENDPLANEROW_AVX2)
1531 if (TestCpuFlag(kCpuHasAVX2)) {
1532 BlendPlaneRow = BlendPlaneRow_Any_AVX2;
1533 if (IS_ALIGNED(halfwidth, 32)) {
1534 BlendPlaneRow = BlendPlaneRow_AVX2;
1535 }
1536 }
1537 #endif
1538 #if defined(HAS_BLENDPLANEROW_MMI)
1539 if (TestCpuFlag(kCpuHasMMI)) {
1540 BlendPlaneRow = BlendPlaneRow_Any_MMI;
1541 if (IS_ALIGNED(halfwidth, 8)) {
1542 BlendPlaneRow = BlendPlaneRow_MMI;
1543 }
1544 }
1545 #endif
1546 if (!IS_ALIGNED(width, 2)) {
1547 ScaleRowDown2 = ScaleRowDown2Box_Odd_C;
1548 }
1549 #if defined(HAS_SCALEROWDOWN2_NEON)
1550 if (TestCpuFlag(kCpuHasNEON)) {
1551 ScaleRowDown2 = ScaleRowDown2Box_Odd_NEON;
1552 if (IS_ALIGNED(width, 2)) {
1553 ScaleRowDown2 = ScaleRowDown2Box_Any_NEON;
1554 if (IS_ALIGNED(halfwidth, 16)) {
1555 ScaleRowDown2 = ScaleRowDown2Box_NEON;
1556 }
1557 }
1558 }
1559 #endif
1560 #if defined(HAS_SCALEROWDOWN2_SSSE3)
1561 if (TestCpuFlag(kCpuHasSSSE3)) {
1562 ScaleRowDown2 = ScaleRowDown2Box_Odd_SSSE3;
1563 if (IS_ALIGNED(width, 2)) {
1564 ScaleRowDown2 = ScaleRowDown2Box_Any_SSSE3;
1565 if (IS_ALIGNED(halfwidth, 16)) {
1566 ScaleRowDown2 = ScaleRowDown2Box_SSSE3;
1567 }
1568 }
1569 }
1570 #endif
1571 #if defined(HAS_SCALEROWDOWN2_AVX2)
1572 if (TestCpuFlag(kCpuHasAVX2)) {
1573 ScaleRowDown2 = ScaleRowDown2Box_Odd_AVX2;
1574 if (IS_ALIGNED(width, 2)) {
1575 ScaleRowDown2 = ScaleRowDown2Box_Any_AVX2;
1576 if (IS_ALIGNED(halfwidth, 32)) {
1577 ScaleRowDown2 = ScaleRowDown2Box_AVX2;
1578 }
1579 }
1580 }
1581 #endif
1582 #if defined(HAS_SCALEROWDOWN2_MMI)
1583 if (TestCpuFlag(kCpuHasMMI)) {
1584 ScaleRowDown2 = ScaleRowDown2Box_Odd_MMI;
1585 if (IS_ALIGNED(width, 2)) {
1586 ScaleRowDown2 = ScaleRowDown2Box_Any_MMI;
1587 if (IS_ALIGNED(halfwidth, 8)) {
1588 ScaleRowDown2 = ScaleRowDown2Box_MMI;
1589 }
1590 }
1591 }
1592 #endif
1593
1594 // Row buffer for intermediate alpha pixels.
1595 align_buffer_64(halfalpha, halfwidth);
1596 for (y = 0; y < height; y += 2) {
1597 // last row of odd height image use 1 row of alpha instead of 2.
1598 if (y == (height - 1)) {
1599 alpha_stride = 0;
1600 }
1601 // Subsample 2 rows of UV to half width and half height.
1602 ScaleRowDown2(alpha, alpha_stride, halfalpha, halfwidth);
1603 alpha += alpha_stride * 2;
1604 BlendPlaneRow(src_u0, src_u1, halfalpha, dst_u, halfwidth);
1605 BlendPlaneRow(src_v0, src_v1, halfalpha, dst_v, halfwidth);
1606 src_u0 += src_stride_u0;
1607 src_u1 += src_stride_u1;
1608 dst_u += dst_stride_u;
1609 src_v0 += src_stride_v0;
1610 src_v1 += src_stride_v1;
1611 dst_v += dst_stride_v;
1612 }
1613 free_aligned_buffer_64(halfalpha);
1614 return 0;
1615 }
1616
1617 // Multiply 2 ARGB images and store to destination.
1618 LIBYUV_API
ARGBMultiply(const uint8_t * src_argb0,int src_stride_argb0,const uint8_t * src_argb1,int src_stride_argb1,uint8_t * dst_argb,int dst_stride_argb,int width,int height)1619 int ARGBMultiply(const uint8_t* src_argb0,
1620 int src_stride_argb0,
1621 const uint8_t* src_argb1,
1622 int src_stride_argb1,
1623 uint8_t* dst_argb,
1624 int dst_stride_argb,
1625 int width,
1626 int height) {
1627 int y;
1628 void (*ARGBMultiplyRow)(const uint8_t* src0, const uint8_t* src1,
1629 uint8_t* dst, int width) = ARGBMultiplyRow_C;
1630 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
1631 return -1;
1632 }
1633 // Negative height means invert the image.
1634 if (height < 0) {
1635 height = -height;
1636 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
1637 dst_stride_argb = -dst_stride_argb;
1638 }
1639 // Coalesce rows.
1640 if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
1641 dst_stride_argb == width * 4) {
1642 width *= height;
1643 height = 1;
1644 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
1645 }
1646 #if defined(HAS_ARGBMULTIPLYROW_SSE2)
1647 if (TestCpuFlag(kCpuHasSSE2)) {
1648 ARGBMultiplyRow = ARGBMultiplyRow_Any_SSE2;
1649 if (IS_ALIGNED(width, 4)) {
1650 ARGBMultiplyRow = ARGBMultiplyRow_SSE2;
1651 }
1652 }
1653 #endif
1654 #if defined(HAS_ARGBMULTIPLYROW_AVX2)
1655 if (TestCpuFlag(kCpuHasAVX2)) {
1656 ARGBMultiplyRow = ARGBMultiplyRow_Any_AVX2;
1657 if (IS_ALIGNED(width, 8)) {
1658 ARGBMultiplyRow = ARGBMultiplyRow_AVX2;
1659 }
1660 }
1661 #endif
1662 #if defined(HAS_ARGBMULTIPLYROW_NEON)
1663 if (TestCpuFlag(kCpuHasNEON)) {
1664 ARGBMultiplyRow = ARGBMultiplyRow_Any_NEON;
1665 if (IS_ALIGNED(width, 8)) {
1666 ARGBMultiplyRow = ARGBMultiplyRow_NEON;
1667 }
1668 }
1669 #endif
1670 #if defined(HAS_ARGBMULTIPLYROW_MMI)
1671 if (TestCpuFlag(kCpuHasMMI)) {
1672 ARGBMultiplyRow = ARGBMultiplyRow_Any_MMI;
1673 if (IS_ALIGNED(width, 2)) {
1674 ARGBMultiplyRow = ARGBMultiplyRow_MMI;
1675 }
1676 }
1677 #endif
1678 #if defined(HAS_ARGBMULTIPLYROW_MSA)
1679 if (TestCpuFlag(kCpuHasMSA)) {
1680 ARGBMultiplyRow = ARGBMultiplyRow_Any_MSA;
1681 if (IS_ALIGNED(width, 4)) {
1682 ARGBMultiplyRow = ARGBMultiplyRow_MSA;
1683 }
1684 }
1685 #endif
1686
1687 // Multiply plane
1688 for (y = 0; y < height; ++y) {
1689 ARGBMultiplyRow(src_argb0, src_argb1, dst_argb, width);
1690 src_argb0 += src_stride_argb0;
1691 src_argb1 += src_stride_argb1;
1692 dst_argb += dst_stride_argb;
1693 }
1694 return 0;
1695 }
1696
1697 // Add 2 ARGB images and store to destination.
1698 LIBYUV_API
ARGBAdd(const uint8_t * src_argb0,int src_stride_argb0,const uint8_t * src_argb1,int src_stride_argb1,uint8_t * dst_argb,int dst_stride_argb,int width,int height)1699 int ARGBAdd(const uint8_t* src_argb0,
1700 int src_stride_argb0,
1701 const uint8_t* src_argb1,
1702 int src_stride_argb1,
1703 uint8_t* dst_argb,
1704 int dst_stride_argb,
1705 int width,
1706 int height) {
1707 int y;
1708 void (*ARGBAddRow)(const uint8_t* src0, const uint8_t* src1, uint8_t* dst,
1709 int width) = ARGBAddRow_C;
1710 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
1711 return -1;
1712 }
1713 // Negative height means invert the image.
1714 if (height < 0) {
1715 height = -height;
1716 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
1717 dst_stride_argb = -dst_stride_argb;
1718 }
1719 // Coalesce rows.
1720 if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
1721 dst_stride_argb == width * 4) {
1722 width *= height;
1723 height = 1;
1724 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
1725 }
1726 #if defined(HAS_ARGBADDROW_SSE2) && (defined(_MSC_VER) && !defined(__clang__))
1727 if (TestCpuFlag(kCpuHasSSE2)) {
1728 ARGBAddRow = ARGBAddRow_SSE2;
1729 }
1730 #endif
1731 #if defined(HAS_ARGBADDROW_SSE2) && !(defined(_MSC_VER) && !defined(__clang__))
1732 if (TestCpuFlag(kCpuHasSSE2)) {
1733 ARGBAddRow = ARGBAddRow_Any_SSE2;
1734 if (IS_ALIGNED(width, 4)) {
1735 ARGBAddRow = ARGBAddRow_SSE2;
1736 }
1737 }
1738 #endif
1739 #if defined(HAS_ARGBADDROW_AVX2)
1740 if (TestCpuFlag(kCpuHasAVX2)) {
1741 ARGBAddRow = ARGBAddRow_Any_AVX2;
1742 if (IS_ALIGNED(width, 8)) {
1743 ARGBAddRow = ARGBAddRow_AVX2;
1744 }
1745 }
1746 #endif
1747 #if defined(HAS_ARGBADDROW_NEON)
1748 if (TestCpuFlag(kCpuHasNEON)) {
1749 ARGBAddRow = ARGBAddRow_Any_NEON;
1750 if (IS_ALIGNED(width, 8)) {
1751 ARGBAddRow = ARGBAddRow_NEON;
1752 }
1753 }
1754 #endif
1755 #if defined(HAS_ARGBADDROW_MMI)
1756 if (TestCpuFlag(kCpuHasMMI)) {
1757 ARGBAddRow = ARGBAddRow_Any_MMI;
1758 if (IS_ALIGNED(width, 2)) {
1759 ARGBAddRow = ARGBAddRow_MMI;
1760 }
1761 }
1762 #endif
1763 #if defined(HAS_ARGBADDROW_MSA)
1764 if (TestCpuFlag(kCpuHasMSA)) {
1765 ARGBAddRow = ARGBAddRow_Any_MSA;
1766 if (IS_ALIGNED(width, 8)) {
1767 ARGBAddRow = ARGBAddRow_MSA;
1768 }
1769 }
1770 #endif
1771
1772 // Add plane
1773 for (y = 0; y < height; ++y) {
1774 ARGBAddRow(src_argb0, src_argb1, dst_argb, width);
1775 src_argb0 += src_stride_argb0;
1776 src_argb1 += src_stride_argb1;
1777 dst_argb += dst_stride_argb;
1778 }
1779 return 0;
1780 }
1781
1782 // Subtract 2 ARGB images and store to destination.
1783 LIBYUV_API
ARGBSubtract(const uint8_t * src_argb0,int src_stride_argb0,const uint8_t * src_argb1,int src_stride_argb1,uint8_t * dst_argb,int dst_stride_argb,int width,int height)1784 int ARGBSubtract(const uint8_t* src_argb0,
1785 int src_stride_argb0,
1786 const uint8_t* src_argb1,
1787 int src_stride_argb1,
1788 uint8_t* dst_argb,
1789 int dst_stride_argb,
1790 int width,
1791 int height) {
1792 int y;
1793 void (*ARGBSubtractRow)(const uint8_t* src0, const uint8_t* src1,
1794 uint8_t* dst, int width) = ARGBSubtractRow_C;
1795 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
1796 return -1;
1797 }
1798 // Negative height means invert the image.
1799 if (height < 0) {
1800 height = -height;
1801 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
1802 dst_stride_argb = -dst_stride_argb;
1803 }
1804 // Coalesce rows.
1805 if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
1806 dst_stride_argb == width * 4) {
1807 width *= height;
1808 height = 1;
1809 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
1810 }
1811 #if defined(HAS_ARGBSUBTRACTROW_SSE2)
1812 if (TestCpuFlag(kCpuHasSSE2)) {
1813 ARGBSubtractRow = ARGBSubtractRow_Any_SSE2;
1814 if (IS_ALIGNED(width, 4)) {
1815 ARGBSubtractRow = ARGBSubtractRow_SSE2;
1816 }
1817 }
1818 #endif
1819 #if defined(HAS_ARGBSUBTRACTROW_AVX2)
1820 if (TestCpuFlag(kCpuHasAVX2)) {
1821 ARGBSubtractRow = ARGBSubtractRow_Any_AVX2;
1822 if (IS_ALIGNED(width, 8)) {
1823 ARGBSubtractRow = ARGBSubtractRow_AVX2;
1824 }
1825 }
1826 #endif
1827 #if defined(HAS_ARGBSUBTRACTROW_NEON)
1828 if (TestCpuFlag(kCpuHasNEON)) {
1829 ARGBSubtractRow = ARGBSubtractRow_Any_NEON;
1830 if (IS_ALIGNED(width, 8)) {
1831 ARGBSubtractRow = ARGBSubtractRow_NEON;
1832 }
1833 }
1834 #endif
1835 #if defined(HAS_ARGBSUBTRACTROW_MMI)
1836 if (TestCpuFlag(kCpuHasMMI)) {
1837 ARGBSubtractRow = ARGBSubtractRow_Any_MMI;
1838 if (IS_ALIGNED(width, 2)) {
1839 ARGBSubtractRow = ARGBSubtractRow_MMI;
1840 }
1841 }
1842 #endif
1843 #if defined(HAS_ARGBSUBTRACTROW_MSA)
1844 if (TestCpuFlag(kCpuHasMSA)) {
1845 ARGBSubtractRow = ARGBSubtractRow_Any_MSA;
1846 if (IS_ALIGNED(width, 8)) {
1847 ARGBSubtractRow = ARGBSubtractRow_MSA;
1848 }
1849 }
1850 #endif
1851
1852 // Subtract plane
1853 for (y = 0; y < height; ++y) {
1854 ARGBSubtractRow(src_argb0, src_argb1, dst_argb, width);
1855 src_argb0 += src_stride_argb0;
1856 src_argb1 += src_stride_argb1;
1857 dst_argb += dst_stride_argb;
1858 }
1859 return 0;
1860 }
1861
1862 // Convert RAW to RGB24.
1863 LIBYUV_API
RAWToRGB24(const uint8_t * src_raw,int src_stride_raw,uint8_t * dst_rgb24,int dst_stride_rgb24,int width,int height)1864 int RAWToRGB24(const uint8_t* src_raw,
1865 int src_stride_raw,
1866 uint8_t* dst_rgb24,
1867 int dst_stride_rgb24,
1868 int width,
1869 int height) {
1870 int y;
1871 void (*RAWToRGB24Row)(const uint8_t* src_rgb, uint8_t* dst_rgb24, int width) =
1872 RAWToRGB24Row_C;
1873 if (!src_raw || !dst_rgb24 || width <= 0 || height == 0) {
1874 return -1;
1875 }
1876 // Negative height means invert the image.
1877 if (height < 0) {
1878 height = -height;
1879 src_raw = src_raw + (height - 1) * src_stride_raw;
1880 src_stride_raw = -src_stride_raw;
1881 }
1882 // Coalesce rows.
1883 if (src_stride_raw == width * 3 && dst_stride_rgb24 == width * 3) {
1884 width *= height;
1885 height = 1;
1886 src_stride_raw = dst_stride_rgb24 = 0;
1887 }
1888 #if defined(HAS_RAWTORGB24ROW_SSSE3)
1889 if (TestCpuFlag(kCpuHasSSSE3)) {
1890 RAWToRGB24Row = RAWToRGB24Row_Any_SSSE3;
1891 if (IS_ALIGNED(width, 8)) {
1892 RAWToRGB24Row = RAWToRGB24Row_SSSE3;
1893 }
1894 }
1895 #endif
1896 #if defined(HAS_RAWTORGB24ROW_NEON)
1897 if (TestCpuFlag(kCpuHasNEON)) {
1898 RAWToRGB24Row = RAWToRGB24Row_Any_NEON;
1899 if (IS_ALIGNED(width, 8)) {
1900 RAWToRGB24Row = RAWToRGB24Row_NEON;
1901 }
1902 }
1903 #endif
1904 #if defined(HAS_RAWTORGB24ROW_MMI)
1905 if (TestCpuFlag(kCpuHasMMI)) {
1906 RAWToRGB24Row = RAWToRGB24Row_Any_MMI;
1907 if (IS_ALIGNED(width, 4)) {
1908 RAWToRGB24Row = RAWToRGB24Row_MMI;
1909 }
1910 }
1911 #endif
1912 #if defined(HAS_RAWTORGB24ROW_MSA)
1913 if (TestCpuFlag(kCpuHasMSA)) {
1914 RAWToRGB24Row = RAWToRGB24Row_Any_MSA;
1915 if (IS_ALIGNED(width, 16)) {
1916 RAWToRGB24Row = RAWToRGB24Row_MSA;
1917 }
1918 }
1919 #endif
1920
1921 for (y = 0; y < height; ++y) {
1922 RAWToRGB24Row(src_raw, dst_rgb24, width);
1923 src_raw += src_stride_raw;
1924 dst_rgb24 += dst_stride_rgb24;
1925 }
1926 return 0;
1927 }
1928
1929 LIBYUV_API
SetPlane(uint8_t * dst_y,int dst_stride_y,int width,int height,uint32_t value)1930 void SetPlane(uint8_t* dst_y,
1931 int dst_stride_y,
1932 int width,
1933 int height,
1934 uint32_t value) {
1935 int y;
1936 void (*SetRow)(uint8_t * dst, uint8_t value, int width) = SetRow_C;
1937 if (height < 0) {
1938 height = -height;
1939 dst_y = dst_y + (height - 1) * dst_stride_y;
1940 dst_stride_y = -dst_stride_y;
1941 }
1942 // Coalesce rows.
1943 if (dst_stride_y == width) {
1944 width *= height;
1945 height = 1;
1946 dst_stride_y = 0;
1947 }
1948 #if defined(HAS_SETROW_NEON)
1949 if (TestCpuFlag(kCpuHasNEON)) {
1950 SetRow = SetRow_Any_NEON;
1951 if (IS_ALIGNED(width, 16)) {
1952 SetRow = SetRow_NEON;
1953 }
1954 }
1955 #endif
1956 #if defined(HAS_SETROW_X86)
1957 if (TestCpuFlag(kCpuHasX86)) {
1958 SetRow = SetRow_Any_X86;
1959 if (IS_ALIGNED(width, 4)) {
1960 SetRow = SetRow_X86;
1961 }
1962 }
1963 #endif
1964 #if defined(HAS_SETROW_ERMS)
1965 if (TestCpuFlag(kCpuHasERMS)) {
1966 SetRow = SetRow_ERMS;
1967 }
1968 #endif
1969 #if defined(HAS_SETROW_MSA)
1970 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 16)) {
1971 SetRow = SetRow_MSA;
1972 }
1973 #endif
1974
1975 // Set plane
1976 for (y = 0; y < height; ++y) {
1977 SetRow(dst_y, value, width);
1978 dst_y += dst_stride_y;
1979 }
1980 }
1981
1982 // Draw a rectangle into I420
1983 LIBYUV_API
I420Rect(uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int x,int y,int width,int height,int value_y,int value_u,int value_v)1984 int I420Rect(uint8_t* dst_y,
1985 int dst_stride_y,
1986 uint8_t* dst_u,
1987 int dst_stride_u,
1988 uint8_t* dst_v,
1989 int dst_stride_v,
1990 int x,
1991 int y,
1992 int width,
1993 int height,
1994 int value_y,
1995 int value_u,
1996 int value_v) {
1997 int halfwidth = (width + 1) >> 1;
1998 int halfheight = (height + 1) >> 1;
1999 uint8_t* start_y = dst_y + y * dst_stride_y + x;
2000 uint8_t* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2);
2001 uint8_t* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2);
2002 if (!dst_y || !dst_u || !dst_v || width <= 0 || height == 0 || x < 0 ||
2003 y < 0 || value_y < 0 || value_y > 255 || value_u < 0 || value_u > 255 ||
2004 value_v < 0 || value_v > 255) {
2005 return -1;
2006 }
2007
2008 SetPlane(start_y, dst_stride_y, width, height, value_y);
2009 SetPlane(start_u, dst_stride_u, halfwidth, halfheight, value_u);
2010 SetPlane(start_v, dst_stride_v, halfwidth, halfheight, value_v);
2011 return 0;
2012 }
2013
2014 // Draw a rectangle into ARGB
2015 LIBYUV_API
ARGBRect(uint8_t * dst_argb,int dst_stride_argb,int dst_x,int dst_y,int width,int height,uint32_t value)2016 int ARGBRect(uint8_t* dst_argb,
2017 int dst_stride_argb,
2018 int dst_x,
2019 int dst_y,
2020 int width,
2021 int height,
2022 uint32_t value) {
2023 int y;
2024 void (*ARGBSetRow)(uint8_t * dst_argb, uint32_t value, int width) =
2025 ARGBSetRow_C;
2026 if (!dst_argb || width <= 0 || height == 0 || dst_x < 0 || dst_y < 0) {
2027 return -1;
2028 }
2029 if (height < 0) {
2030 height = -height;
2031 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
2032 dst_stride_argb = -dst_stride_argb;
2033 }
2034 dst_argb += dst_y * dst_stride_argb + dst_x * 4;
2035 // Coalesce rows.
2036 if (dst_stride_argb == width * 4) {
2037 width *= height;
2038 height = 1;
2039 dst_stride_argb = 0;
2040 }
2041
2042 #if defined(HAS_ARGBSETROW_NEON)
2043 if (TestCpuFlag(kCpuHasNEON)) {
2044 ARGBSetRow = ARGBSetRow_Any_NEON;
2045 if (IS_ALIGNED(width, 4)) {
2046 ARGBSetRow = ARGBSetRow_NEON;
2047 }
2048 }
2049 #endif
2050 #if defined(HAS_ARGBSETROW_X86)
2051 if (TestCpuFlag(kCpuHasX86)) {
2052 ARGBSetRow = ARGBSetRow_X86;
2053 }
2054 #endif
2055 #if defined(HAS_ARGBSETROW_MMI)
2056 if (TestCpuFlag(kCpuHasMMI)) {
2057 ARGBSetRow = ARGBSetRow_Any_MMI;
2058 if (IS_ALIGNED(width, 4)) {
2059 ARGBSetRow = ARGBSetRow_MMI;
2060 }
2061 }
2062 #endif
2063 #if defined(HAS_ARGBSETROW_MSA)
2064 if (TestCpuFlag(kCpuHasMSA)) {
2065 ARGBSetRow = ARGBSetRow_Any_MSA;
2066 if (IS_ALIGNED(width, 4)) {
2067 ARGBSetRow = ARGBSetRow_MSA;
2068 }
2069 }
2070 #endif
2071
2072 // Set plane
2073 for (y = 0; y < height; ++y) {
2074 ARGBSetRow(dst_argb, value, width);
2075 dst_argb += dst_stride_argb;
2076 }
2077 return 0;
2078 }
2079
2080 // Convert unattentuated ARGB to preattenuated ARGB.
2081 // An unattenutated ARGB alpha blend uses the formula
2082 // p = a * f + (1 - a) * b
2083 // where
2084 // p is output pixel
2085 // f is foreground pixel
2086 // b is background pixel
2087 // a is alpha value from foreground pixel
2088 // An preattenutated ARGB alpha blend uses the formula
2089 // p = f + (1 - a) * b
2090 // where
2091 // f is foreground pixel premultiplied by alpha
2092
2093 LIBYUV_API
ARGBAttenuate(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)2094 int ARGBAttenuate(const uint8_t* src_argb,
2095 int src_stride_argb,
2096 uint8_t* dst_argb,
2097 int dst_stride_argb,
2098 int width,
2099 int height) {
2100 int y;
2101 void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
2102 int width) = ARGBAttenuateRow_C;
2103 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
2104 return -1;
2105 }
2106 if (height < 0) {
2107 height = -height;
2108 src_argb = src_argb + (height - 1) * src_stride_argb;
2109 src_stride_argb = -src_stride_argb;
2110 }
2111 // Coalesce rows.
2112 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
2113 width *= height;
2114 height = 1;
2115 src_stride_argb = dst_stride_argb = 0;
2116 }
2117 #if defined(HAS_ARGBATTENUATEROW_SSSE3)
2118 if (TestCpuFlag(kCpuHasSSSE3)) {
2119 ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
2120 if (IS_ALIGNED(width, 4)) {
2121 ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
2122 }
2123 }
2124 #endif
2125 #if defined(HAS_ARGBATTENUATEROW_AVX2)
2126 if (TestCpuFlag(kCpuHasAVX2)) {
2127 ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
2128 if (IS_ALIGNED(width, 8)) {
2129 ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
2130 }
2131 }
2132 #endif
2133 #if defined(HAS_ARGBATTENUATEROW_NEON)
2134 if (TestCpuFlag(kCpuHasNEON)) {
2135 ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
2136 if (IS_ALIGNED(width, 8)) {
2137 ARGBAttenuateRow = ARGBAttenuateRow_NEON;
2138 }
2139 }
2140 #endif
2141 #if defined(HAS_ARGBATTENUATEROW_MMI)
2142 if (TestCpuFlag(kCpuHasMMI)) {
2143 ARGBAttenuateRow = ARGBAttenuateRow_Any_MMI;
2144 if (IS_ALIGNED(width, 2)) {
2145 ARGBAttenuateRow = ARGBAttenuateRow_MMI;
2146 }
2147 }
2148 #endif
2149 #if defined(HAS_ARGBATTENUATEROW_MSA)
2150 if (TestCpuFlag(kCpuHasMSA)) {
2151 ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA;
2152 if (IS_ALIGNED(width, 8)) {
2153 ARGBAttenuateRow = ARGBAttenuateRow_MSA;
2154 }
2155 }
2156 #endif
2157
2158 for (y = 0; y < height; ++y) {
2159 ARGBAttenuateRow(src_argb, dst_argb, width);
2160 src_argb += src_stride_argb;
2161 dst_argb += dst_stride_argb;
2162 }
2163 return 0;
2164 }
2165
2166 // Convert preattentuated ARGB to unattenuated ARGB.
2167 LIBYUV_API
ARGBUnattenuate(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)2168 int ARGBUnattenuate(const uint8_t* src_argb,
2169 int src_stride_argb,
2170 uint8_t* dst_argb,
2171 int dst_stride_argb,
2172 int width,
2173 int height) {
2174 int y;
2175 void (*ARGBUnattenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
2176 int width) = ARGBUnattenuateRow_C;
2177 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
2178 return -1;
2179 }
2180 if (height < 0) {
2181 height = -height;
2182 src_argb = src_argb + (height - 1) * src_stride_argb;
2183 src_stride_argb = -src_stride_argb;
2184 }
2185 // Coalesce rows.
2186 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
2187 width *= height;
2188 height = 1;
2189 src_stride_argb = dst_stride_argb = 0;
2190 }
2191 #if defined(HAS_ARGBUNATTENUATEROW_SSE2)
2192 if (TestCpuFlag(kCpuHasSSE2)) {
2193 ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2;
2194 if (IS_ALIGNED(width, 4)) {
2195 ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2;
2196 }
2197 }
2198 #endif
2199 #if defined(HAS_ARGBUNATTENUATEROW_AVX2)
2200 if (TestCpuFlag(kCpuHasAVX2)) {
2201 ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2;
2202 if (IS_ALIGNED(width, 8)) {
2203 ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2;
2204 }
2205 }
2206 #endif
2207 // TODO(fbarchard): Neon version.
2208
2209 for (y = 0; y < height; ++y) {
2210 ARGBUnattenuateRow(src_argb, dst_argb, width);
2211 src_argb += src_stride_argb;
2212 dst_argb += dst_stride_argb;
2213 }
2214 return 0;
2215 }
2216
2217 // Convert ARGB to Grayed ARGB.
2218 LIBYUV_API
ARGBGrayTo(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)2219 int ARGBGrayTo(const uint8_t* src_argb,
2220 int src_stride_argb,
2221 uint8_t* dst_argb,
2222 int dst_stride_argb,
2223 int width,
2224 int height) {
2225 int y;
2226 void (*ARGBGrayRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) =
2227 ARGBGrayRow_C;
2228 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
2229 return -1;
2230 }
2231 if (height < 0) {
2232 height = -height;
2233 src_argb = src_argb + (height - 1) * src_stride_argb;
2234 src_stride_argb = -src_stride_argb;
2235 }
2236 // Coalesce rows.
2237 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
2238 width *= height;
2239 height = 1;
2240 src_stride_argb = dst_stride_argb = 0;
2241 }
2242 #if defined(HAS_ARGBGRAYROW_SSSE3)
2243 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
2244 ARGBGrayRow = ARGBGrayRow_SSSE3;
2245 }
2246 #endif
2247 #if defined(HAS_ARGBGRAYROW_NEON)
2248 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
2249 ARGBGrayRow = ARGBGrayRow_NEON;
2250 }
2251 #endif
2252 #if defined(HAS_ARGBGRAYROW_MMI)
2253 if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(width, 2)) {
2254 ARGBGrayRow = ARGBGrayRow_MMI;
2255 }
2256 #endif
2257 #if defined(HAS_ARGBGRAYROW_MSA)
2258 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
2259 ARGBGrayRow = ARGBGrayRow_MSA;
2260 }
2261 #endif
2262
2263 for (y = 0; y < height; ++y) {
2264 ARGBGrayRow(src_argb, dst_argb, width);
2265 src_argb += src_stride_argb;
2266 dst_argb += dst_stride_argb;
2267 }
2268 return 0;
2269 }
2270
2271 // Make a rectangle of ARGB gray scale.
2272 LIBYUV_API
ARGBGray(uint8_t * dst_argb,int dst_stride_argb,int dst_x,int dst_y,int width,int height)2273 int ARGBGray(uint8_t* dst_argb,
2274 int dst_stride_argb,
2275 int dst_x,
2276 int dst_y,
2277 int width,
2278 int height) {
2279 int y;
2280 void (*ARGBGrayRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) =
2281 ARGBGrayRow_C;
2282 uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
2283 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
2284 return -1;
2285 }
2286 // Coalesce rows.
2287 if (dst_stride_argb == width * 4) {
2288 width *= height;
2289 height = 1;
2290 dst_stride_argb = 0;
2291 }
2292 #if defined(HAS_ARGBGRAYROW_SSSE3)
2293 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
2294 ARGBGrayRow = ARGBGrayRow_SSSE3;
2295 }
2296 #endif
2297 #if defined(HAS_ARGBGRAYROW_NEON)
2298 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
2299 ARGBGrayRow = ARGBGrayRow_NEON;
2300 }
2301 #endif
2302 #if defined(HAS_ARGBGRAYROW_MMI)
2303 if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(width, 2)) {
2304 ARGBGrayRow = ARGBGrayRow_MMI;
2305 }
2306 #endif
2307 #if defined(HAS_ARGBGRAYROW_MSA)
2308 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
2309 ARGBGrayRow = ARGBGrayRow_MSA;
2310 }
2311 #endif
2312
2313 for (y = 0; y < height; ++y) {
2314 ARGBGrayRow(dst, dst, width);
2315 dst += dst_stride_argb;
2316 }
2317 return 0;
2318 }
2319
2320 // Make a rectangle of ARGB Sepia tone.
2321 LIBYUV_API
ARGBSepia(uint8_t * dst_argb,int dst_stride_argb,int dst_x,int dst_y,int width,int height)2322 int ARGBSepia(uint8_t* dst_argb,
2323 int dst_stride_argb,
2324 int dst_x,
2325 int dst_y,
2326 int width,
2327 int height) {
2328 int y;
2329 void (*ARGBSepiaRow)(uint8_t * dst_argb, int width) = ARGBSepiaRow_C;
2330 uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
2331 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
2332 return -1;
2333 }
2334 // Coalesce rows.
2335 if (dst_stride_argb == width * 4) {
2336 width *= height;
2337 height = 1;
2338 dst_stride_argb = 0;
2339 }
2340 #if defined(HAS_ARGBSEPIAROW_SSSE3)
2341 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
2342 ARGBSepiaRow = ARGBSepiaRow_SSSE3;
2343 }
2344 #endif
2345 #if defined(HAS_ARGBSEPIAROW_NEON)
2346 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
2347 ARGBSepiaRow = ARGBSepiaRow_NEON;
2348 }
2349 #endif
2350 #if defined(HAS_ARGBSEPIAROW_MMI)
2351 if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(width, 2)) {
2352 ARGBSepiaRow = ARGBSepiaRow_MMI;
2353 }
2354 #endif
2355 #if defined(HAS_ARGBSEPIAROW_MSA)
2356 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
2357 ARGBSepiaRow = ARGBSepiaRow_MSA;
2358 }
2359 #endif
2360
2361 for (y = 0; y < height; ++y) {
2362 ARGBSepiaRow(dst, width);
2363 dst += dst_stride_argb;
2364 }
2365 return 0;
2366 }
2367
2368 // Apply a 4x4 matrix to each ARGB pixel.
2369 // Note: Normally for shading, but can be used to swizzle or invert.
2370 LIBYUV_API
ARGBColorMatrix(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,const int8_t * matrix_argb,int width,int height)2371 int ARGBColorMatrix(const uint8_t* src_argb,
2372 int src_stride_argb,
2373 uint8_t* dst_argb,
2374 int dst_stride_argb,
2375 const int8_t* matrix_argb,
2376 int width,
2377 int height) {
2378 int y;
2379 void (*ARGBColorMatrixRow)(const uint8_t* src_argb, uint8_t* dst_argb,
2380 const int8_t* matrix_argb, int width) =
2381 ARGBColorMatrixRow_C;
2382 if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) {
2383 return -1;
2384 }
2385 if (height < 0) {
2386 height = -height;
2387 src_argb = src_argb + (height - 1) * src_stride_argb;
2388 src_stride_argb = -src_stride_argb;
2389 }
2390 // Coalesce rows.
2391 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
2392 width *= height;
2393 height = 1;
2394 src_stride_argb = dst_stride_argb = 0;
2395 }
2396 #if defined(HAS_ARGBCOLORMATRIXROW_SSSE3)
2397 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
2398 ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3;
2399 }
2400 #endif
2401 #if defined(HAS_ARGBCOLORMATRIXROW_NEON)
2402 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
2403 ARGBColorMatrixRow = ARGBColorMatrixRow_NEON;
2404 }
2405 #endif
2406 #if defined(HAS_ARGBCOLORMATRIXROW_MMI)
2407 if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(width, 2)) {
2408 ARGBColorMatrixRow = ARGBColorMatrixRow_MMI;
2409 }
2410 #endif
2411 #if defined(HAS_ARGBCOLORMATRIXROW_MSA)
2412 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
2413 ARGBColorMatrixRow = ARGBColorMatrixRow_MSA;
2414 }
2415 #endif
2416 for (y = 0; y < height; ++y) {
2417 ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width);
2418 src_argb += src_stride_argb;
2419 dst_argb += dst_stride_argb;
2420 }
2421 return 0;
2422 }
2423
2424 // Apply a 4x3 matrix to each ARGB pixel.
2425 // Deprecated.
2426 LIBYUV_API
RGBColorMatrix(uint8_t * dst_argb,int dst_stride_argb,const int8_t * matrix_rgb,int dst_x,int dst_y,int width,int height)2427 int RGBColorMatrix(uint8_t* dst_argb,
2428 int dst_stride_argb,
2429 const int8_t* matrix_rgb,
2430 int dst_x,
2431 int dst_y,
2432 int width,
2433 int height) {
2434 SIMD_ALIGNED(int8_t matrix_argb[16]);
2435 uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
2436 if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 || dst_x < 0 ||
2437 dst_y < 0) {
2438 return -1;
2439 }
2440
2441 // Convert 4x3 7 bit matrix to 4x4 6 bit matrix.
2442 matrix_argb[0] = matrix_rgb[0] / 2;
2443 matrix_argb[1] = matrix_rgb[1] / 2;
2444 matrix_argb[2] = matrix_rgb[2] / 2;
2445 matrix_argb[3] = matrix_rgb[3] / 2;
2446 matrix_argb[4] = matrix_rgb[4] / 2;
2447 matrix_argb[5] = matrix_rgb[5] / 2;
2448 matrix_argb[6] = matrix_rgb[6] / 2;
2449 matrix_argb[7] = matrix_rgb[7] / 2;
2450 matrix_argb[8] = matrix_rgb[8] / 2;
2451 matrix_argb[9] = matrix_rgb[9] / 2;
2452 matrix_argb[10] = matrix_rgb[10] / 2;
2453 matrix_argb[11] = matrix_rgb[11] / 2;
2454 matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0;
2455 matrix_argb[15] = 64; // 1.0
2456
2457 return ARGBColorMatrix((const uint8_t*)(dst), dst_stride_argb, dst,
2458 dst_stride_argb, &matrix_argb[0], width, height);
2459 }
2460
2461 // Apply a color table each ARGB pixel.
2462 // Table contains 256 ARGB values.
2463 LIBYUV_API
ARGBColorTable(uint8_t * dst_argb,int dst_stride_argb,const uint8_t * table_argb,int dst_x,int dst_y,int width,int height)2464 int ARGBColorTable(uint8_t* dst_argb,
2465 int dst_stride_argb,
2466 const uint8_t* table_argb,
2467 int dst_x,
2468 int dst_y,
2469 int width,
2470 int height) {
2471 int y;
2472 void (*ARGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb,
2473 int width) = ARGBColorTableRow_C;
2474 uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
2475 if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 ||
2476 dst_y < 0) {
2477 return -1;
2478 }
2479 // Coalesce rows.
2480 if (dst_stride_argb == width * 4) {
2481 width *= height;
2482 height = 1;
2483 dst_stride_argb = 0;
2484 }
2485 #if defined(HAS_ARGBCOLORTABLEROW_X86)
2486 if (TestCpuFlag(kCpuHasX86)) {
2487 ARGBColorTableRow = ARGBColorTableRow_X86;
2488 }
2489 #endif
2490 for (y = 0; y < height; ++y) {
2491 ARGBColorTableRow(dst, table_argb, width);
2492 dst += dst_stride_argb;
2493 }
2494 return 0;
2495 }
2496
2497 // Apply a color table each ARGB pixel but preserve destination alpha.
2498 // Table contains 256 ARGB values.
2499 LIBYUV_API
RGBColorTable(uint8_t * dst_argb,int dst_stride_argb,const uint8_t * table_argb,int dst_x,int dst_y,int width,int height)2500 int RGBColorTable(uint8_t* dst_argb,
2501 int dst_stride_argb,
2502 const uint8_t* table_argb,
2503 int dst_x,
2504 int dst_y,
2505 int width,
2506 int height) {
2507 int y;
2508 void (*RGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb,
2509 int width) = RGBColorTableRow_C;
2510 uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
2511 if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 ||
2512 dst_y < 0) {
2513 return -1;
2514 }
2515 // Coalesce rows.
2516 if (dst_stride_argb == width * 4) {
2517 width *= height;
2518 height = 1;
2519 dst_stride_argb = 0;
2520 }
2521 #if defined(HAS_RGBCOLORTABLEROW_X86)
2522 if (TestCpuFlag(kCpuHasX86)) {
2523 RGBColorTableRow = RGBColorTableRow_X86;
2524 }
2525 #endif
2526 for (y = 0; y < height; ++y) {
2527 RGBColorTableRow(dst, table_argb, width);
2528 dst += dst_stride_argb;
2529 }
2530 return 0;
2531 }
2532
2533 // ARGBQuantize is used to posterize art.
2534 // e.g. rgb / qvalue * qvalue + qvalue / 2
2535 // But the low levels implement efficiently with 3 parameters, and could be
2536 // used for other high level operations.
2537 // dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
2538 // where scale is 1 / interval_size as a fixed point value.
2539 // The divide is replaces with a multiply by reciprocal fixed point multiply.
2540 // Caveat - although SSE2 saturates, the C function does not and should be used
2541 // with care if doing anything but quantization.
2542 LIBYUV_API
ARGBQuantize(uint8_t * dst_argb,int dst_stride_argb,int scale,int interval_size,int interval_offset,int dst_x,int dst_y,int width,int height)2543 int ARGBQuantize(uint8_t* dst_argb,
2544 int dst_stride_argb,
2545 int scale,
2546 int interval_size,
2547 int interval_offset,
2548 int dst_x,
2549 int dst_y,
2550 int width,
2551 int height) {
2552 int y;
2553 void (*ARGBQuantizeRow)(uint8_t * dst_argb, int scale, int interval_size,
2554 int interval_offset, int width) = ARGBQuantizeRow_C;
2555 uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
2556 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
2557 interval_size < 1 || interval_size > 255) {
2558 return -1;
2559 }
2560 // Coalesce rows.
2561 if (dst_stride_argb == width * 4) {
2562 width *= height;
2563 height = 1;
2564 dst_stride_argb = 0;
2565 }
2566 #if defined(HAS_ARGBQUANTIZEROW_SSE2)
2567 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
2568 ARGBQuantizeRow = ARGBQuantizeRow_SSE2;
2569 }
2570 #endif
2571 #if defined(HAS_ARGBQUANTIZEROW_NEON)
2572 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
2573 ARGBQuantizeRow = ARGBQuantizeRow_NEON;
2574 }
2575 #endif
2576 #if defined(HAS_ARGBQUANTIZEROW_MSA)
2577 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
2578 ARGBQuantizeRow = ARGBQuantizeRow_MSA;
2579 }
2580 #endif
2581 for (y = 0; y < height; ++y) {
2582 ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width);
2583 dst += dst_stride_argb;
2584 }
2585 return 0;
2586 }
2587
2588 // Computes table of cumulative sum for image where the value is the sum
2589 // of all values above and to the left of the entry. Used by ARGBBlur.
2590 LIBYUV_API
ARGBComputeCumulativeSum(const uint8_t * src_argb,int src_stride_argb,int32_t * dst_cumsum,int dst_stride32_cumsum,int width,int height)2591 int ARGBComputeCumulativeSum(const uint8_t* src_argb,
2592 int src_stride_argb,
2593 int32_t* dst_cumsum,
2594 int dst_stride32_cumsum,
2595 int width,
2596 int height) {
2597 int y;
2598 void (*ComputeCumulativeSumRow)(const uint8_t* row, int32_t* cumsum,
2599 const int32_t* previous_cumsum, int width) =
2600 ComputeCumulativeSumRow_C;
2601 int32_t* previous_cumsum = dst_cumsum;
2602 if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) {
2603 return -1;
2604 }
2605 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
2606 if (TestCpuFlag(kCpuHasSSE2)) {
2607 ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
2608 }
2609 #endif
2610 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_MMI)
2611 if (TestCpuFlag(kCpuHasMMI)) {
2612 ComputeCumulativeSumRow = ComputeCumulativeSumRow_MMI;
2613 }
2614 #endif
2615
2616 memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4); // 4 int per pixel.
2617 for (y = 0; y < height; ++y) {
2618 ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width);
2619 previous_cumsum = dst_cumsum;
2620 dst_cumsum += dst_stride32_cumsum;
2621 src_argb += src_stride_argb;
2622 }
2623 return 0;
2624 }
2625
2626 // Blur ARGB image.
2627 // Caller should allocate CumulativeSum table of width * height * 16 bytes
2628 // aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory
2629 // as the buffer is treated as circular.
2630 LIBYUV_API
ARGBBlur(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int32_t * dst_cumsum,int dst_stride32_cumsum,int width,int height,int radius)2631 int ARGBBlur(const uint8_t* src_argb,
2632 int src_stride_argb,
2633 uint8_t* dst_argb,
2634 int dst_stride_argb,
2635 int32_t* dst_cumsum,
2636 int dst_stride32_cumsum,
2637 int width,
2638 int height,
2639 int radius) {
2640 int y;
2641 void (*ComputeCumulativeSumRow)(const uint8_t* row, int32_t* cumsum,
2642 const int32_t* previous_cumsum, int width) =
2643 ComputeCumulativeSumRow_C;
2644 void (*CumulativeSumToAverageRow)(
2645 const int32_t* topleft, const int32_t* botleft, int width, int area,
2646 uint8_t* dst, int count) = CumulativeSumToAverageRow_C;
2647 int32_t* cumsum_bot_row;
2648 int32_t* max_cumsum_bot_row;
2649 int32_t* cumsum_top_row;
2650
2651 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
2652 return -1;
2653 }
2654 if (height < 0) {
2655 height = -height;
2656 src_argb = src_argb + (height - 1) * src_stride_argb;
2657 src_stride_argb = -src_stride_argb;
2658 }
2659 if (radius > height) {
2660 radius = height;
2661 }
2662 if (radius > (width / 2 - 1)) {
2663 radius = width / 2 - 1;
2664 }
2665 if (radius <= 0) {
2666 return -1;
2667 }
2668 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
2669 if (TestCpuFlag(kCpuHasSSE2)) {
2670 ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
2671 CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2;
2672 }
2673 #endif
2674 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_MMI)
2675 if (TestCpuFlag(kCpuHasMMI)) {
2676 ComputeCumulativeSumRow = ComputeCumulativeSumRow_MMI;
2677 }
2678 #endif
2679 // Compute enough CumulativeSum for first row to be blurred. After this
2680 // one row of CumulativeSum is updated at a time.
2681 ARGBComputeCumulativeSum(src_argb, src_stride_argb, dst_cumsum,
2682 dst_stride32_cumsum, width, radius);
2683
2684 src_argb = src_argb + radius * src_stride_argb;
2685 cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum];
2686
2687 max_cumsum_bot_row = &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum];
2688 cumsum_top_row = &dst_cumsum[0];
2689
2690 for (y = 0; y < height; ++y) {
2691 int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0;
2692 int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1);
2693 int area = radius * (bot_y - top_y);
2694 int boxwidth = radius * 4;
2695 int x;
2696 int n;
2697
2698 // Increment cumsum_top_row pointer with circular buffer wrap around.
2699 if (top_y) {
2700 cumsum_top_row += dst_stride32_cumsum;
2701 if (cumsum_top_row >= max_cumsum_bot_row) {
2702 cumsum_top_row = dst_cumsum;
2703 }
2704 }
2705 // Increment cumsum_bot_row pointer with circular buffer wrap around and
2706 // then fill in a row of CumulativeSum.
2707 if ((y + radius) < height) {
2708 const int32_t* prev_cumsum_bot_row = cumsum_bot_row;
2709 cumsum_bot_row += dst_stride32_cumsum;
2710 if (cumsum_bot_row >= max_cumsum_bot_row) {
2711 cumsum_bot_row = dst_cumsum;
2712 }
2713 ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row,
2714 width);
2715 src_argb += src_stride_argb;
2716 }
2717
2718 // Left clipped.
2719 for (x = 0; x < radius + 1; ++x) {
2720 CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, boxwidth, area,
2721 &dst_argb[x * 4], 1);
2722 area += (bot_y - top_y);
2723 boxwidth += 4;
2724 }
2725
2726 // Middle unclipped.
2727 n = (width - 1) - radius - x + 1;
2728 CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, boxwidth, area,
2729 &dst_argb[x * 4], n);
2730
2731 // Right clipped.
2732 for (x += n; x <= width - 1; ++x) {
2733 area -= (bot_y - top_y);
2734 boxwidth -= 4;
2735 CumulativeSumToAverageRow(cumsum_top_row + (x - radius - 1) * 4,
2736 cumsum_bot_row + (x - radius - 1) * 4, boxwidth,
2737 area, &dst_argb[x * 4], 1);
2738 }
2739 dst_argb += dst_stride_argb;
2740 }
2741 return 0;
2742 }
2743
2744 // Multiply ARGB image by a specified ARGB value.
2745 LIBYUV_API
ARGBShade(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height,uint32_t value)2746 int ARGBShade(const uint8_t* src_argb,
2747 int src_stride_argb,
2748 uint8_t* dst_argb,
2749 int dst_stride_argb,
2750 int width,
2751 int height,
2752 uint32_t value) {
2753 int y;
2754 void (*ARGBShadeRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width,
2755 uint32_t value) = ARGBShadeRow_C;
2756 if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) {
2757 return -1;
2758 }
2759 if (height < 0) {
2760 height = -height;
2761 src_argb = src_argb + (height - 1) * src_stride_argb;
2762 src_stride_argb = -src_stride_argb;
2763 }
2764 // Coalesce rows.
2765 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
2766 width *= height;
2767 height = 1;
2768 src_stride_argb = dst_stride_argb = 0;
2769 }
2770 #if defined(HAS_ARGBSHADEROW_SSE2)
2771 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
2772 ARGBShadeRow = ARGBShadeRow_SSE2;
2773 }
2774 #endif
2775 #if defined(HAS_ARGBSHADEROW_NEON)
2776 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
2777 ARGBShadeRow = ARGBShadeRow_NEON;
2778 }
2779 #endif
2780 #if defined(HAS_ARGBSHADEROW_MMI)
2781 if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(width, 2)) {
2782 ARGBShadeRow = ARGBShadeRow_MMI;
2783 }
2784 #endif
2785 #if defined(HAS_ARGBSHADEROW_MSA)
2786 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 4)) {
2787 ARGBShadeRow = ARGBShadeRow_MSA;
2788 }
2789 #endif
2790
2791 for (y = 0; y < height; ++y) {
2792 ARGBShadeRow(src_argb, dst_argb, width, value);
2793 src_argb += src_stride_argb;
2794 dst_argb += dst_stride_argb;
2795 }
2796 return 0;
2797 }
2798
2799 // Interpolate 2 planes by specified amount (0 to 255).
2800 LIBYUV_API
InterpolatePlane(const uint8_t * src0,int src_stride0,const uint8_t * src1,int src_stride1,uint8_t * dst,int dst_stride,int width,int height,int interpolation)2801 int InterpolatePlane(const uint8_t* src0,
2802 int src_stride0,
2803 const uint8_t* src1,
2804 int src_stride1,
2805 uint8_t* dst,
2806 int dst_stride,
2807 int width,
2808 int height,
2809 int interpolation) {
2810 int y;
2811 void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
2812 ptrdiff_t src_stride, int dst_width,
2813 int source_y_fraction) = InterpolateRow_C;
2814 if (!src0 || !src1 || !dst || width <= 0 || height == 0) {
2815 return -1;
2816 }
2817 // Negative height means invert the image.
2818 if (height < 0) {
2819 height = -height;
2820 dst = dst + (height - 1) * dst_stride;
2821 dst_stride = -dst_stride;
2822 }
2823 // Coalesce rows.
2824 if (src_stride0 == width && src_stride1 == width && dst_stride == width) {
2825 width *= height;
2826 height = 1;
2827 src_stride0 = src_stride1 = dst_stride = 0;
2828 }
2829 #if defined(HAS_INTERPOLATEROW_SSSE3)
2830 if (TestCpuFlag(kCpuHasSSSE3)) {
2831 InterpolateRow = InterpolateRow_Any_SSSE3;
2832 if (IS_ALIGNED(width, 16)) {
2833 InterpolateRow = InterpolateRow_SSSE3;
2834 }
2835 }
2836 #endif
2837 #if defined(HAS_INTERPOLATEROW_AVX2)
2838 if (TestCpuFlag(kCpuHasAVX2)) {
2839 InterpolateRow = InterpolateRow_Any_AVX2;
2840 if (IS_ALIGNED(width, 32)) {
2841 InterpolateRow = InterpolateRow_AVX2;
2842 }
2843 }
2844 #endif
2845 #if defined(HAS_INTERPOLATEROW_NEON)
2846 if (TestCpuFlag(kCpuHasNEON)) {
2847 InterpolateRow = InterpolateRow_Any_NEON;
2848 if (IS_ALIGNED(width, 16)) {
2849 InterpolateRow = InterpolateRow_NEON;
2850 }
2851 }
2852 #endif
2853 #if defined(HAS_INTERPOLATEROW_MMI)
2854 if (TestCpuFlag(kCpuHasMMI)) {
2855 InterpolateRow = InterpolateRow_Any_MMI;
2856 if (IS_ALIGNED(width, 8)) {
2857 InterpolateRow = InterpolateRow_MMI;
2858 }
2859 }
2860 #endif
2861 #if defined(HAS_INTERPOLATEROW_MSA)
2862 if (TestCpuFlag(kCpuHasMSA)) {
2863 InterpolateRow = InterpolateRow_Any_MSA;
2864 if (IS_ALIGNED(width, 32)) {
2865 InterpolateRow = InterpolateRow_MSA;
2866 }
2867 }
2868 #endif
2869
2870 for (y = 0; y < height; ++y) {
2871 InterpolateRow(dst, src0, src1 - src0, width, interpolation);
2872 src0 += src_stride0;
2873 src1 += src_stride1;
2874 dst += dst_stride;
2875 }
2876 return 0;
2877 }
2878
2879 // Interpolate 2 ARGB images by specified amount (0 to 255).
2880 LIBYUV_API
ARGBInterpolate(const uint8_t * src_argb0,int src_stride_argb0,const uint8_t * src_argb1,int src_stride_argb1,uint8_t * dst_argb,int dst_stride_argb,int width,int height,int interpolation)2881 int ARGBInterpolate(const uint8_t* src_argb0,
2882 int src_stride_argb0,
2883 const uint8_t* src_argb1,
2884 int src_stride_argb1,
2885 uint8_t* dst_argb,
2886 int dst_stride_argb,
2887 int width,
2888 int height,
2889 int interpolation) {
2890 return InterpolatePlane(src_argb0, src_stride_argb0, src_argb1,
2891 src_stride_argb1, dst_argb, dst_stride_argb,
2892 width * 4, height, interpolation);
2893 }
2894
2895 // Interpolate 2 YUV images by specified amount (0 to 255).
2896 LIBYUV_API
I420Interpolate(const uint8_t * src0_y,int src0_stride_y,const uint8_t * src0_u,int src0_stride_u,const uint8_t * src0_v,int src0_stride_v,const uint8_t * src1_y,int src1_stride_y,const uint8_t * src1_u,int src1_stride_u,const uint8_t * src1_v,int src1_stride_v,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height,int interpolation)2897 int I420Interpolate(const uint8_t* src0_y,
2898 int src0_stride_y,
2899 const uint8_t* src0_u,
2900 int src0_stride_u,
2901 const uint8_t* src0_v,
2902 int src0_stride_v,
2903 const uint8_t* src1_y,
2904 int src1_stride_y,
2905 const uint8_t* src1_u,
2906 int src1_stride_u,
2907 const uint8_t* src1_v,
2908 int src1_stride_v,
2909 uint8_t* dst_y,
2910 int dst_stride_y,
2911 uint8_t* dst_u,
2912 int dst_stride_u,
2913 uint8_t* dst_v,
2914 int dst_stride_v,
2915 int width,
2916 int height,
2917 int interpolation) {
2918 int halfwidth = (width + 1) >> 1;
2919 int halfheight = (height + 1) >> 1;
2920 if (!src0_y || !src0_u || !src0_v || !src1_y || !src1_u || !src1_v ||
2921 !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
2922 return -1;
2923 }
2924 InterpolatePlane(src0_y, src0_stride_y, src1_y, src1_stride_y, dst_y,
2925 dst_stride_y, width, height, interpolation);
2926 InterpolatePlane(src0_u, src0_stride_u, src1_u, src1_stride_u, dst_u,
2927 dst_stride_u, halfwidth, halfheight, interpolation);
2928 InterpolatePlane(src0_v, src0_stride_v, src1_v, src1_stride_v, dst_v,
2929 dst_stride_v, halfwidth, halfheight, interpolation);
2930 return 0;
2931 }
2932
2933 // Shuffle ARGB channel order. e.g. BGRA to ARGB.
2934 LIBYUV_API
ARGBShuffle(const uint8_t * src_bgra,int src_stride_bgra,uint8_t * dst_argb,int dst_stride_argb,const uint8_t * shuffler,int width,int height)2935 int ARGBShuffle(const uint8_t* src_bgra,
2936 int src_stride_bgra,
2937 uint8_t* dst_argb,
2938 int dst_stride_argb,
2939 const uint8_t* shuffler,
2940 int width,
2941 int height) {
2942 int y;
2943 void (*ARGBShuffleRow)(const uint8_t* src_bgra, uint8_t* dst_argb,
2944 const uint8_t* shuffler, int width) = ARGBShuffleRow_C;
2945 if (!src_bgra || !dst_argb || width <= 0 || height == 0) {
2946 return -1;
2947 }
2948 // Negative height means invert the image.
2949 if (height < 0) {
2950 height = -height;
2951 src_bgra = src_bgra + (height - 1) * src_stride_bgra;
2952 src_stride_bgra = -src_stride_bgra;
2953 }
2954 // Coalesce rows.
2955 if (src_stride_bgra == width * 4 && dst_stride_argb == width * 4) {
2956 width *= height;
2957 height = 1;
2958 src_stride_bgra = dst_stride_argb = 0;
2959 }
2960 #if defined(HAS_ARGBSHUFFLEROW_SSSE3)
2961 if (TestCpuFlag(kCpuHasSSSE3)) {
2962 ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3;
2963 if (IS_ALIGNED(width, 8)) {
2964 ARGBShuffleRow = ARGBShuffleRow_SSSE3;
2965 }
2966 }
2967 #endif
2968 #if defined(HAS_ARGBSHUFFLEROW_AVX2)
2969 if (TestCpuFlag(kCpuHasAVX2)) {
2970 ARGBShuffleRow = ARGBShuffleRow_Any_AVX2;
2971 if (IS_ALIGNED(width, 16)) {
2972 ARGBShuffleRow = ARGBShuffleRow_AVX2;
2973 }
2974 }
2975 #endif
2976 #if defined(HAS_ARGBSHUFFLEROW_NEON)
2977 if (TestCpuFlag(kCpuHasNEON)) {
2978 ARGBShuffleRow = ARGBShuffleRow_Any_NEON;
2979 if (IS_ALIGNED(width, 4)) {
2980 ARGBShuffleRow = ARGBShuffleRow_NEON;
2981 }
2982 }
2983 #endif
2984 #if defined(HAS_ARGBSHUFFLEROW_MMI)
2985 if (TestCpuFlag(kCpuHasMMI)) {
2986 ARGBShuffleRow = ARGBShuffleRow_Any_MMI;
2987 if (IS_ALIGNED(width, 2)) {
2988 ARGBShuffleRow = ARGBShuffleRow_MMI;
2989 }
2990 }
2991 #endif
2992 #if defined(HAS_ARGBSHUFFLEROW_MSA)
2993 if (TestCpuFlag(kCpuHasMSA)) {
2994 ARGBShuffleRow = ARGBShuffleRow_Any_MSA;
2995 if (IS_ALIGNED(width, 8)) {
2996 ARGBShuffleRow = ARGBShuffleRow_MSA;
2997 }
2998 }
2999 #endif
3000
3001 for (y = 0; y < height; ++y) {
3002 ARGBShuffleRow(src_bgra, dst_argb, shuffler, width);
3003 src_bgra += src_stride_bgra;
3004 dst_argb += dst_stride_argb;
3005 }
3006 return 0;
3007 }
3008
3009 // Gauss blur a float plane using Gaussian 5x5 filter with
3010 // coefficients of 1, 4, 6, 4, 1.
3011 // Each destination pixel is a blur of the 5x5
3012 // pixels from the source.
3013 // Source edges are clamped.
3014 // Edge is 2 pixels on each side, and interior is multiple of 4.
3015 LIBYUV_API
GaussPlane_F32(const float * src,int src_stride,float * dst,int dst_stride,int width,int height)3016 int GaussPlane_F32(const float* src,
3017 int src_stride,
3018 float* dst,
3019 int dst_stride,
3020 int width,
3021 int height) {
3022 int y;
3023 void (*GaussCol_F32)(const float* src0, const float* src1, const float* src2,
3024 const float* src3, const float* src4, float* dst,
3025 int width) = GaussCol_F32_C;
3026 void (*GaussRow_F32)(const float* src, float* dst, int width) =
3027 GaussRow_F32_C;
3028 if (!src || !dst || width <= 0 || height == 0) {
3029 return -1;
3030 }
3031 // Negative height means invert the image.
3032 if (height < 0) {
3033 height = -height;
3034 src = src + (height - 1) * src_stride;
3035 src_stride = -src_stride;
3036 }
3037
3038 #if defined(HAS_GAUSSCOL_F32_NEON)
3039 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
3040 GaussCol_F32 = GaussCol_F32_NEON;
3041 }
3042 #endif
3043 #if defined(HAS_GAUSSROW_F32_NEON)
3044 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
3045 GaussRow_F32 = GaussRow_F32_NEON;
3046 }
3047 #endif
3048 {
3049 // 2 pixels on each side, but aligned out to 16 bytes.
3050 align_buffer_64(rowbuf, (4 + width + 4) * 4);
3051 memset(rowbuf, 0, 16);
3052 memset(rowbuf + (4 + width) * 4, 0, 16);
3053 float* row = (float*)(rowbuf + 16);
3054 const float* src0 = src;
3055 const float* src1 = src;
3056 const float* src2 = src;
3057 const float* src3 = src2 + ((height > 1) ? src_stride : 0);
3058 const float* src4 = src3 + ((height > 2) ? src_stride : 0);
3059
3060 for (y = 0; y < height; ++y) {
3061 GaussCol_F32(src0, src1, src2, src3, src4, row, width);
3062
3063 // Extrude edge by 2 floats
3064 row[-2] = row[-1] = row[0];
3065 row[width + 1] = row[width] = row[width - 1];
3066
3067 GaussRow_F32(row - 2, dst, width);
3068
3069 src0 = src1;
3070 src1 = src2;
3071 src2 = src3;
3072 src3 = src4;
3073 if ((y + 2) < (height - 1)) {
3074 src4 += src_stride;
3075 }
3076 dst += dst_stride;
3077 }
3078 free_aligned_buffer_64(rowbuf);
3079 }
3080 return 0;
3081 }
3082
3083 // Sobel ARGB effect.
ARGBSobelize(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height,void (* SobelRow)(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst,int width))3084 static int ARGBSobelize(const uint8_t* src_argb,
3085 int src_stride_argb,
3086 uint8_t* dst_argb,
3087 int dst_stride_argb,
3088 int width,
3089 int height,
3090 void (*SobelRow)(const uint8_t* src_sobelx,
3091 const uint8_t* src_sobely,
3092 uint8_t* dst,
3093 int width)) {
3094 int y;
3095 void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_g, int width) =
3096 ARGBToYJRow_C;
3097 void (*SobelYRow)(const uint8_t* src_y0, const uint8_t* src_y1,
3098 uint8_t* dst_sobely, int width) = SobelYRow_C;
3099 void (*SobelXRow)(const uint8_t* src_y0, const uint8_t* src_y1,
3100 const uint8_t* src_y2, uint8_t* dst_sobely, int width) =
3101 SobelXRow_C;
3102 const int kEdge = 16; // Extra pixels at start of row for extrude/align.
3103 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
3104 return -1;
3105 }
3106 // Negative height means invert the image.
3107 if (height < 0) {
3108 height = -height;
3109 src_argb = src_argb + (height - 1) * src_stride_argb;
3110 src_stride_argb = -src_stride_argb;
3111 }
3112
3113 #if defined(HAS_ARGBTOYJROW_SSSE3)
3114 if (TestCpuFlag(kCpuHasSSSE3)) {
3115 ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
3116 if (IS_ALIGNED(width, 16)) {
3117 ARGBToYJRow = ARGBToYJRow_SSSE3;
3118 }
3119 }
3120 #endif
3121 #if defined(HAS_ARGBTOYJROW_AVX2)
3122 if (TestCpuFlag(kCpuHasAVX2)) {
3123 ARGBToYJRow = ARGBToYJRow_Any_AVX2;
3124 if (IS_ALIGNED(width, 32)) {
3125 ARGBToYJRow = ARGBToYJRow_AVX2;
3126 }
3127 }
3128 #endif
3129 #if defined(HAS_ARGBTOYJROW_NEON)
3130 if (TestCpuFlag(kCpuHasNEON)) {
3131 ARGBToYJRow = ARGBToYJRow_Any_NEON;
3132 if (IS_ALIGNED(width, 8)) {
3133 ARGBToYJRow = ARGBToYJRow_NEON;
3134 }
3135 }
3136 #endif
3137 #if defined(HAS_ARGBTOYJROW_MMI)
3138 if (TestCpuFlag(kCpuHasMMI)) {
3139 ARGBToYJRow = ARGBToYJRow_Any_MMI;
3140 if (IS_ALIGNED(width, 8)) {
3141 ARGBToYJRow = ARGBToYJRow_MMI;
3142 }
3143 }
3144 #endif
3145 #if defined(HAS_ARGBTOYJROW_MSA)
3146 if (TestCpuFlag(kCpuHasMSA)) {
3147 ARGBToYJRow = ARGBToYJRow_Any_MSA;
3148 if (IS_ALIGNED(width, 16)) {
3149 ARGBToYJRow = ARGBToYJRow_MSA;
3150 }
3151 }
3152 #endif
3153
3154 #if defined(HAS_SOBELYROW_SSE2)
3155 if (TestCpuFlag(kCpuHasSSE2)) {
3156 SobelYRow = SobelYRow_SSE2;
3157 }
3158 #endif
3159 #if defined(HAS_SOBELYROW_NEON)
3160 if (TestCpuFlag(kCpuHasNEON)) {
3161 SobelYRow = SobelYRow_NEON;
3162 }
3163 #endif
3164 #if defined(HAS_SOBELYROW_MMI)
3165 if (TestCpuFlag(kCpuHasMMI)) {
3166 SobelYRow = SobelYRow_MMI;
3167 }
3168 #endif
3169 #if defined(HAS_SOBELYROW_MSA)
3170 if (TestCpuFlag(kCpuHasMSA)) {
3171 SobelYRow = SobelYRow_MSA;
3172 }
3173 #endif
3174 #if defined(HAS_SOBELXROW_SSE2)
3175 if (TestCpuFlag(kCpuHasSSE2)) {
3176 SobelXRow = SobelXRow_SSE2;
3177 }
3178 #endif
3179 #if defined(HAS_SOBELXROW_NEON)
3180 if (TestCpuFlag(kCpuHasNEON)) {
3181 SobelXRow = SobelXRow_NEON;
3182 }
3183 #endif
3184 #if defined(HAS_SOBELXROW_MMI)
3185 if (TestCpuFlag(kCpuHasMMI)) {
3186 SobelXRow = SobelXRow_MMI;
3187 }
3188 #endif
3189 #if defined(HAS_SOBELXROW_MSA)
3190 if (TestCpuFlag(kCpuHasMSA)) {
3191 SobelXRow = SobelXRow_MSA;
3192 }
3193 #endif
3194 {
3195 // 3 rows with edges before/after.
3196 const int kRowSize = (width + kEdge + 31) & ~31;
3197 align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge));
3198 uint8_t* row_sobelx = rows;
3199 uint8_t* row_sobely = rows + kRowSize;
3200 uint8_t* row_y = rows + kRowSize * 2;
3201
3202 // Convert first row.
3203 uint8_t* row_y0 = row_y + kEdge;
3204 uint8_t* row_y1 = row_y0 + kRowSize;
3205 uint8_t* row_y2 = row_y1 + kRowSize;
3206 ARGBToYJRow(src_argb, row_y0, width);
3207 row_y0[-1] = row_y0[0];
3208 memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind.
3209 ARGBToYJRow(src_argb, row_y1, width);
3210 row_y1[-1] = row_y1[0];
3211 memset(row_y1 + width, row_y1[width - 1], 16);
3212 memset(row_y2 + width, 0, 16);
3213
3214 for (y = 0; y < height; ++y) {
3215 // Convert next row of ARGB to G.
3216 if (y < (height - 1)) {
3217 src_argb += src_stride_argb;
3218 }
3219 ARGBToYJRow(src_argb, row_y2, width);
3220 row_y2[-1] = row_y2[0];
3221 row_y2[width] = row_y2[width - 1];
3222
3223 SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width);
3224 SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width);
3225 SobelRow(row_sobelx, row_sobely, dst_argb, width);
3226
3227 // Cycle thru circular queue of 3 row_y buffers.
3228 {
3229 uint8_t* row_yt = row_y0;
3230 row_y0 = row_y1;
3231 row_y1 = row_y2;
3232 row_y2 = row_yt;
3233 }
3234
3235 dst_argb += dst_stride_argb;
3236 }
3237 free_aligned_buffer_64(rows);
3238 }
3239 return 0;
3240 }
3241
3242 // Sobel ARGB effect.
3243 LIBYUV_API
ARGBSobel(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)3244 int ARGBSobel(const uint8_t* src_argb,
3245 int src_stride_argb,
3246 uint8_t* dst_argb,
3247 int dst_stride_argb,
3248 int width,
3249 int height) {
3250 void (*SobelRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely,
3251 uint8_t* dst_argb, int width) = SobelRow_C;
3252 #if defined(HAS_SOBELROW_SSE2)
3253 if (TestCpuFlag(kCpuHasSSE2)) {
3254 SobelRow = SobelRow_Any_SSE2;
3255 if (IS_ALIGNED(width, 16)) {
3256 SobelRow = SobelRow_SSE2;
3257 }
3258 }
3259 #endif
3260 #if defined(HAS_SOBELROW_NEON)
3261 if (TestCpuFlag(kCpuHasNEON)) {
3262 SobelRow = SobelRow_Any_NEON;
3263 if (IS_ALIGNED(width, 8)) {
3264 SobelRow = SobelRow_NEON;
3265 }
3266 }
3267 #endif
3268 #if defined(HAS_SOBELROW_MMI)
3269 if (TestCpuFlag(kCpuHasMMI)) {
3270 SobelRow = SobelRow_Any_MMI;
3271 if (IS_ALIGNED(width, 8)) {
3272 SobelRow = SobelRow_MMI;
3273 }
3274 }
3275 #endif
3276 #if defined(HAS_SOBELROW_MSA)
3277 if (TestCpuFlag(kCpuHasMSA)) {
3278 SobelRow = SobelRow_Any_MSA;
3279 if (IS_ALIGNED(width, 16)) {
3280 SobelRow = SobelRow_MSA;
3281 }
3282 }
3283 #endif
3284 return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
3285 width, height, SobelRow);
3286 }
3287
3288 // Sobel ARGB effect with planar output.
3289 LIBYUV_API
ARGBSobelToPlane(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_y,int dst_stride_y,int width,int height)3290 int ARGBSobelToPlane(const uint8_t* src_argb,
3291 int src_stride_argb,
3292 uint8_t* dst_y,
3293 int dst_stride_y,
3294 int width,
3295 int height) {
3296 void (*SobelToPlaneRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely,
3297 uint8_t* dst_, int width) = SobelToPlaneRow_C;
3298 #if defined(HAS_SOBELTOPLANEROW_SSE2)
3299 if (TestCpuFlag(kCpuHasSSE2)) {
3300 SobelToPlaneRow = SobelToPlaneRow_Any_SSE2;
3301 if (IS_ALIGNED(width, 16)) {
3302 SobelToPlaneRow = SobelToPlaneRow_SSE2;
3303 }
3304 }
3305 #endif
3306 #if defined(HAS_SOBELTOPLANEROW_NEON)
3307 if (TestCpuFlag(kCpuHasNEON)) {
3308 SobelToPlaneRow = SobelToPlaneRow_Any_NEON;
3309 if (IS_ALIGNED(width, 16)) {
3310 SobelToPlaneRow = SobelToPlaneRow_NEON;
3311 }
3312 }
3313 #endif
3314 #if defined(HAS_SOBELTOPLANEROW_MMI)
3315 if (TestCpuFlag(kCpuHasMMI)) {
3316 SobelToPlaneRow = SobelToPlaneRow_Any_MMI;
3317 if (IS_ALIGNED(width, 8)) {
3318 SobelToPlaneRow = SobelToPlaneRow_MMI;
3319 }
3320 }
3321 #endif
3322 #if defined(HAS_SOBELTOPLANEROW_MSA)
3323 if (TestCpuFlag(kCpuHasMSA)) {
3324 SobelToPlaneRow = SobelToPlaneRow_Any_MSA;
3325 if (IS_ALIGNED(width, 32)) {
3326 SobelToPlaneRow = SobelToPlaneRow_MSA;
3327 }
3328 }
3329 #endif
3330 return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y, width,
3331 height, SobelToPlaneRow);
3332 }
3333
3334 // SobelXY ARGB effect.
3335 // Similar to Sobel, but also stores Sobel X in R and Sobel Y in B. G = Sobel.
3336 LIBYUV_API
ARGBSobelXY(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)3337 int ARGBSobelXY(const uint8_t* src_argb,
3338 int src_stride_argb,
3339 uint8_t* dst_argb,
3340 int dst_stride_argb,
3341 int width,
3342 int height) {
3343 void (*SobelXYRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely,
3344 uint8_t* dst_argb, int width) = SobelXYRow_C;
3345 #if defined(HAS_SOBELXYROW_SSE2)
3346 if (TestCpuFlag(kCpuHasSSE2)) {
3347 SobelXYRow = SobelXYRow_Any_SSE2;
3348 if (IS_ALIGNED(width, 16)) {
3349 SobelXYRow = SobelXYRow_SSE2;
3350 }
3351 }
3352 #endif
3353 #if defined(HAS_SOBELXYROW_NEON)
3354 if (TestCpuFlag(kCpuHasNEON)) {
3355 SobelXYRow = SobelXYRow_Any_NEON;
3356 if (IS_ALIGNED(width, 8)) {
3357 SobelXYRow = SobelXYRow_NEON;
3358 }
3359 }
3360 #endif
3361 #if defined(HAS_SOBELXYROW_MMI)
3362 if (TestCpuFlag(kCpuHasMMI)) {
3363 SobelXYRow = SobelXYRow_Any_MMI;
3364 if (IS_ALIGNED(width, 8)) {
3365 SobelXYRow = SobelXYRow_MMI;
3366 }
3367 }
3368 #endif
3369 #if defined(HAS_SOBELXYROW_MSA)
3370 if (TestCpuFlag(kCpuHasMSA)) {
3371 SobelXYRow = SobelXYRow_Any_MSA;
3372 if (IS_ALIGNED(width, 16)) {
3373 SobelXYRow = SobelXYRow_MSA;
3374 }
3375 }
3376 #endif
3377 return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
3378 width, height, SobelXYRow);
3379 }
3380
3381 // Apply a 4x4 polynomial to each ARGB pixel.
3382 LIBYUV_API
ARGBPolynomial(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,const float * poly,int width,int height)3383 int ARGBPolynomial(const uint8_t* src_argb,
3384 int src_stride_argb,
3385 uint8_t* dst_argb,
3386 int dst_stride_argb,
3387 const float* poly,
3388 int width,
3389 int height) {
3390 int y;
3391 void (*ARGBPolynomialRow)(const uint8_t* src_argb, uint8_t* dst_argb,
3392 const float* poly, int width) = ARGBPolynomialRow_C;
3393 if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) {
3394 return -1;
3395 }
3396 // Negative height means invert the image.
3397 if (height < 0) {
3398 height = -height;
3399 src_argb = src_argb + (height - 1) * src_stride_argb;
3400 src_stride_argb = -src_stride_argb;
3401 }
3402 // Coalesce rows.
3403 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
3404 width *= height;
3405 height = 1;
3406 src_stride_argb = dst_stride_argb = 0;
3407 }
3408 #if defined(HAS_ARGBPOLYNOMIALROW_SSE2)
3409 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 2)) {
3410 ARGBPolynomialRow = ARGBPolynomialRow_SSE2;
3411 }
3412 #endif
3413 #if defined(HAS_ARGBPOLYNOMIALROW_AVX2)
3414 if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasFMA3) &&
3415 IS_ALIGNED(width, 2)) {
3416 ARGBPolynomialRow = ARGBPolynomialRow_AVX2;
3417 }
3418 #endif
3419
3420 for (y = 0; y < height; ++y) {
3421 ARGBPolynomialRow(src_argb, dst_argb, poly, width);
3422 src_argb += src_stride_argb;
3423 dst_argb += dst_stride_argb;
3424 }
3425 return 0;
3426 }
3427
3428 // Convert plane of 16 bit shorts to half floats.
3429 // Source values are multiplied by scale before storing as half float.
3430 LIBYUV_API
HalfFloatPlane(const uint16_t * src_y,int src_stride_y,uint16_t * dst_y,int dst_stride_y,float scale,int width,int height)3431 int HalfFloatPlane(const uint16_t* src_y,
3432 int src_stride_y,
3433 uint16_t* dst_y,
3434 int dst_stride_y,
3435 float scale,
3436 int width,
3437 int height) {
3438 int y;
3439 void (*HalfFloatRow)(const uint16_t* src, uint16_t* dst, float scale,
3440 int width) = HalfFloatRow_C;
3441 if (!src_y || !dst_y || width <= 0 || height == 0) {
3442 return -1;
3443 }
3444 src_stride_y >>= 1;
3445 dst_stride_y >>= 1;
3446 // Negative height means invert the image.
3447 if (height < 0) {
3448 height = -height;
3449 src_y = src_y + (height - 1) * src_stride_y;
3450 src_stride_y = -src_stride_y;
3451 }
3452 // Coalesce rows.
3453 if (src_stride_y == width && dst_stride_y == width) {
3454 width *= height;
3455 height = 1;
3456 src_stride_y = dst_stride_y = 0;
3457 }
3458 #if defined(HAS_HALFFLOATROW_SSE2)
3459 if (TestCpuFlag(kCpuHasSSE2)) {
3460 HalfFloatRow = HalfFloatRow_Any_SSE2;
3461 if (IS_ALIGNED(width, 8)) {
3462 HalfFloatRow = HalfFloatRow_SSE2;
3463 }
3464 }
3465 #endif
3466 #if defined(HAS_HALFFLOATROW_AVX2)
3467 if (TestCpuFlag(kCpuHasAVX2)) {
3468 HalfFloatRow = HalfFloatRow_Any_AVX2;
3469 if (IS_ALIGNED(width, 16)) {
3470 HalfFloatRow = HalfFloatRow_AVX2;
3471 }
3472 }
3473 #endif
3474 #if defined(HAS_HALFFLOATROW_F16C)
3475 if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasF16C)) {
3476 HalfFloatRow =
3477 (scale == 1.0f) ? HalfFloat1Row_Any_F16C : HalfFloatRow_Any_F16C;
3478 if (IS_ALIGNED(width, 16)) {
3479 HalfFloatRow = (scale == 1.0f) ? HalfFloat1Row_F16C : HalfFloatRow_F16C;
3480 }
3481 }
3482 #endif
3483 #if defined(HAS_HALFFLOATROW_NEON)
3484 if (TestCpuFlag(kCpuHasNEON)) {
3485 HalfFloatRow =
3486 (scale == 1.0f) ? HalfFloat1Row_Any_NEON : HalfFloatRow_Any_NEON;
3487 if (IS_ALIGNED(width, 8)) {
3488 HalfFloatRow = (scale == 1.0f) ? HalfFloat1Row_NEON : HalfFloatRow_NEON;
3489 }
3490 }
3491 #endif
3492 #if defined(HAS_HALFFLOATROW_MSA)
3493 if (TestCpuFlag(kCpuHasMSA)) {
3494 HalfFloatRow = HalfFloatRow_Any_MSA;
3495 if (IS_ALIGNED(width, 32)) {
3496 HalfFloatRow = HalfFloatRow_MSA;
3497 }
3498 }
3499 #endif
3500
3501 for (y = 0; y < height; ++y) {
3502 HalfFloatRow(src_y, dst_y, scale, width);
3503 src_y += src_stride_y;
3504 dst_y += dst_stride_y;
3505 }
3506 return 0;
3507 }
3508
3509 // Convert a buffer of bytes to floats, scale the values and store as floats.
3510 LIBYUV_API
ByteToFloat(const uint8_t * src_y,float * dst_y,float scale,int width)3511 int ByteToFloat(const uint8_t* src_y, float* dst_y, float scale, int width) {
3512 void (*ByteToFloatRow)(const uint8_t* src, float* dst, float scale,
3513 int width) = ByteToFloatRow_C;
3514 if (!src_y || !dst_y || width <= 0) {
3515 return -1;
3516 }
3517 #if defined(HAS_BYTETOFLOATROW_NEON)
3518 if (TestCpuFlag(kCpuHasNEON)) {
3519 ByteToFloatRow = ByteToFloatRow_Any_NEON;
3520 if (IS_ALIGNED(width, 8)) {
3521 ByteToFloatRow = ByteToFloatRow_NEON;
3522 }
3523 }
3524 #endif
3525
3526 ByteToFloatRow(src_y, dst_y, scale, width);
3527 return 0;
3528 }
3529
3530 // Apply a lumacolortable to each ARGB pixel.
3531 LIBYUV_API
ARGBLumaColorTable(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,const uint8_t * luma,int width,int height)3532 int ARGBLumaColorTable(const uint8_t* src_argb,
3533 int src_stride_argb,
3534 uint8_t* dst_argb,
3535 int dst_stride_argb,
3536 const uint8_t* luma,
3537 int width,
3538 int height) {
3539 int y;
3540 void (*ARGBLumaColorTableRow)(
3541 const uint8_t* src_argb, uint8_t* dst_argb, int width,
3542 const uint8_t* luma, const uint32_t lumacoeff) = ARGBLumaColorTableRow_C;
3543 if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) {
3544 return -1;
3545 }
3546 // Negative height means invert the image.
3547 if (height < 0) {
3548 height = -height;
3549 src_argb = src_argb + (height - 1) * src_stride_argb;
3550 src_stride_argb = -src_stride_argb;
3551 }
3552 // Coalesce rows.
3553 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
3554 width *= height;
3555 height = 1;
3556 src_stride_argb = dst_stride_argb = 0;
3557 }
3558 #if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3)
3559 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
3560 ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3;
3561 }
3562 #endif
3563
3564 for (y = 0; y < height; ++y) {
3565 ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f);
3566 src_argb += src_stride_argb;
3567 dst_argb += dst_stride_argb;
3568 }
3569 return 0;
3570 }
3571
3572 // Copy Alpha from one ARGB image to another.
3573 LIBYUV_API
ARGBCopyAlpha(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)3574 int ARGBCopyAlpha(const uint8_t* src_argb,
3575 int src_stride_argb,
3576 uint8_t* dst_argb,
3577 int dst_stride_argb,
3578 int width,
3579 int height) {
3580 int y;
3581 void (*ARGBCopyAlphaRow)(const uint8_t* src_argb, uint8_t* dst_argb,
3582 int width) = ARGBCopyAlphaRow_C;
3583 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
3584 return -1;
3585 }
3586 // Negative height means invert the image.
3587 if (height < 0) {
3588 height = -height;
3589 src_argb = src_argb + (height - 1) * src_stride_argb;
3590 src_stride_argb = -src_stride_argb;
3591 }
3592 // Coalesce rows.
3593 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
3594 width *= height;
3595 height = 1;
3596 src_stride_argb = dst_stride_argb = 0;
3597 }
3598 #if defined(HAS_ARGBCOPYALPHAROW_SSE2)
3599 if (TestCpuFlag(kCpuHasSSE2)) {
3600 ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_SSE2;
3601 if (IS_ALIGNED(width, 8)) {
3602 ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2;
3603 }
3604 }
3605 #endif
3606 #if defined(HAS_ARGBCOPYALPHAROW_AVX2)
3607 if (TestCpuFlag(kCpuHasAVX2)) {
3608 ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_AVX2;
3609 if (IS_ALIGNED(width, 16)) {
3610 ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2;
3611 }
3612 }
3613 #endif
3614 #if defined(HAS_ARGBCOPYALPHAROW_MMI)
3615 if (TestCpuFlag(kCpuHasMMI)) {
3616 ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_MMI;
3617 if (IS_ALIGNED(width, 2)) {
3618 ARGBCopyAlphaRow = ARGBCopyAlphaRow_MMI;
3619 }
3620 }
3621 #endif
3622
3623 for (y = 0; y < height; ++y) {
3624 ARGBCopyAlphaRow(src_argb, dst_argb, width);
3625 src_argb += src_stride_argb;
3626 dst_argb += dst_stride_argb;
3627 }
3628 return 0;
3629 }
3630
3631 // Extract just the alpha channel from ARGB.
3632 LIBYUV_API
ARGBExtractAlpha(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_a,int dst_stride_a,int width,int height)3633 int ARGBExtractAlpha(const uint8_t* src_argb,
3634 int src_stride_argb,
3635 uint8_t* dst_a,
3636 int dst_stride_a,
3637 int width,
3638 int height) {
3639 if (!src_argb || !dst_a || width <= 0 || height == 0) {
3640 return -1;
3641 }
3642 // Negative height means invert the image.
3643 if (height < 0) {
3644 height = -height;
3645 src_argb += (height - 1) * src_stride_argb;
3646 src_stride_argb = -src_stride_argb;
3647 }
3648 // Coalesce rows.
3649 if (src_stride_argb == width * 4 && dst_stride_a == width) {
3650 width *= height;
3651 height = 1;
3652 src_stride_argb = dst_stride_a = 0;
3653 }
3654 void (*ARGBExtractAlphaRow)(const uint8_t* src_argb, uint8_t* dst_a,
3655 int width) = ARGBExtractAlphaRow_C;
3656 #if defined(HAS_ARGBEXTRACTALPHAROW_SSE2)
3657 if (TestCpuFlag(kCpuHasSSE2)) {
3658 ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_SSE2
3659 : ARGBExtractAlphaRow_Any_SSE2;
3660 }
3661 #endif
3662 #if defined(HAS_ARGBEXTRACTALPHAROW_AVX2)
3663 if (TestCpuFlag(kCpuHasAVX2)) {
3664 ARGBExtractAlphaRow = IS_ALIGNED(width, 32) ? ARGBExtractAlphaRow_AVX2
3665 : ARGBExtractAlphaRow_Any_AVX2;
3666 }
3667 #endif
3668 #if defined(HAS_ARGBEXTRACTALPHAROW_NEON)
3669 if (TestCpuFlag(kCpuHasNEON)) {
3670 ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_NEON
3671 : ARGBExtractAlphaRow_Any_NEON;
3672 }
3673 #endif
3674 #if defined(HAS_ARGBEXTRACTALPHAROW_MMI)
3675 if (TestCpuFlag(kCpuHasMMI)) {
3676 ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_MMI
3677 : ARGBExtractAlphaRow_Any_MMI;
3678 }
3679 #endif
3680 #if defined(HAS_ARGBEXTRACTALPHAROW_MSA)
3681 if (TestCpuFlag(kCpuHasMSA)) {
3682 ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_MSA
3683 : ARGBExtractAlphaRow_Any_MSA;
3684 }
3685 #endif
3686
3687 for (int y = 0; y < height; ++y) {
3688 ARGBExtractAlphaRow(src_argb, dst_a, width);
3689 src_argb += src_stride_argb;
3690 dst_a += dst_stride_a;
3691 }
3692 return 0;
3693 }
3694
3695 // Copy a planar Y channel to the alpha channel of a destination ARGB image.
3696 LIBYUV_API
ARGBCopyYToAlpha(const uint8_t * src_y,int src_stride_y,uint8_t * dst_argb,int dst_stride_argb,int width,int height)3697 int ARGBCopyYToAlpha(const uint8_t* src_y,
3698 int src_stride_y,
3699 uint8_t* dst_argb,
3700 int dst_stride_argb,
3701 int width,
3702 int height) {
3703 int y;
3704 void (*ARGBCopyYToAlphaRow)(const uint8_t* src_y, uint8_t* dst_argb,
3705 int width) = ARGBCopyYToAlphaRow_C;
3706 if (!src_y || !dst_argb || width <= 0 || height == 0) {
3707 return -1;
3708 }
3709 // Negative height means invert the image.
3710 if (height < 0) {
3711 height = -height;
3712 src_y = src_y + (height - 1) * src_stride_y;
3713 src_stride_y = -src_stride_y;
3714 }
3715 // Coalesce rows.
3716 if (src_stride_y == width && dst_stride_argb == width * 4) {
3717 width *= height;
3718 height = 1;
3719 src_stride_y = dst_stride_argb = 0;
3720 }
3721 #if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2)
3722 if (TestCpuFlag(kCpuHasSSE2)) {
3723 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_SSE2;
3724 if (IS_ALIGNED(width, 8)) {
3725 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2;
3726 }
3727 }
3728 #endif
3729 #if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2)
3730 if (TestCpuFlag(kCpuHasAVX2)) {
3731 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_AVX2;
3732 if (IS_ALIGNED(width, 16)) {
3733 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2;
3734 }
3735 }
3736 #endif
3737 #if defined(HAS_ARGBCOPYYTOALPHAROW_MMI)
3738 if (TestCpuFlag(kCpuHasMMI)) {
3739 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_MMI;
3740 if (IS_ALIGNED(width, 8)) {
3741 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_MMI;
3742 }
3743 }
3744 #endif
3745
3746 for (y = 0; y < height; ++y) {
3747 ARGBCopyYToAlphaRow(src_y, dst_argb, width);
3748 src_y += src_stride_y;
3749 dst_argb += dst_stride_argb;
3750 }
3751 return 0;
3752 }
3753
3754 // TODO(fbarchard): Consider if width is even Y channel can be split
3755 // directly. A SplitUVRow_Odd function could copy the remaining chroma.
3756
3757 LIBYUV_API
YUY2ToNV12(const uint8_t * src_yuy2,int src_stride_yuy2,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_uv,int dst_stride_uv,int width,int height)3758 int YUY2ToNV12(const uint8_t* src_yuy2,
3759 int src_stride_yuy2,
3760 uint8_t* dst_y,
3761 int dst_stride_y,
3762 uint8_t* dst_uv,
3763 int dst_stride_uv,
3764 int width,
3765 int height) {
3766 int y;
3767 int halfwidth = (width + 1) >> 1;
3768 void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
3769 int width) = SplitUVRow_C;
3770 void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
3771 ptrdiff_t src_stride, int dst_width,
3772 int source_y_fraction) = InterpolateRow_C;
3773 if (!src_yuy2 || !dst_y || !dst_uv || width <= 0 || height == 0) {
3774 return -1;
3775 }
3776 // Negative height means invert the image.
3777 if (height < 0) {
3778 height = -height;
3779 src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
3780 src_stride_yuy2 = -src_stride_yuy2;
3781 }
3782 #if defined(HAS_SPLITUVROW_SSE2)
3783 if (TestCpuFlag(kCpuHasSSE2)) {
3784 SplitUVRow = SplitUVRow_Any_SSE2;
3785 if (IS_ALIGNED(width, 16)) {
3786 SplitUVRow = SplitUVRow_SSE2;
3787 }
3788 }
3789 #endif
3790 #if defined(HAS_SPLITUVROW_AVX2)
3791 if (TestCpuFlag(kCpuHasAVX2)) {
3792 SplitUVRow = SplitUVRow_Any_AVX2;
3793 if (IS_ALIGNED(width, 32)) {
3794 SplitUVRow = SplitUVRow_AVX2;
3795 }
3796 }
3797 #endif
3798 #if defined(HAS_SPLITUVROW_NEON)
3799 if (TestCpuFlag(kCpuHasNEON)) {
3800 SplitUVRow = SplitUVRow_Any_NEON;
3801 if (IS_ALIGNED(width, 16)) {
3802 SplitUVRow = SplitUVRow_NEON;
3803 }
3804 }
3805 #endif
3806 #if defined(HAS_SPLITUVROW_MMI)
3807 if (TestCpuFlag(kCpuHasMMI)) {
3808 SplitUVRow = SplitUVRow_Any_MMI;
3809 if (IS_ALIGNED(width, 8)) {
3810 SplitUVRow = SplitUVRow_MMI;
3811 }
3812 }
3813 #endif
3814 #if defined(HAS_SPLITUVROW_MSA)
3815 if (TestCpuFlag(kCpuHasMSA)) {
3816 SplitUVRow = SplitUVRow_Any_MSA;
3817 if (IS_ALIGNED(width, 32)) {
3818 SplitUVRow = SplitUVRow_MSA;
3819 }
3820 }
3821 #endif
3822 #if defined(HAS_INTERPOLATEROW_SSSE3)
3823 if (TestCpuFlag(kCpuHasSSSE3)) {
3824 InterpolateRow = InterpolateRow_Any_SSSE3;
3825 if (IS_ALIGNED(width, 16)) {
3826 InterpolateRow = InterpolateRow_SSSE3;
3827 }
3828 }
3829 #endif
3830 #if defined(HAS_INTERPOLATEROW_AVX2)
3831 if (TestCpuFlag(kCpuHasAVX2)) {
3832 InterpolateRow = InterpolateRow_Any_AVX2;
3833 if (IS_ALIGNED(width, 32)) {
3834 InterpolateRow = InterpolateRow_AVX2;
3835 }
3836 }
3837 #endif
3838 #if defined(HAS_INTERPOLATEROW_NEON)
3839 if (TestCpuFlag(kCpuHasNEON)) {
3840 InterpolateRow = InterpolateRow_Any_NEON;
3841 if (IS_ALIGNED(width, 16)) {
3842 InterpolateRow = InterpolateRow_NEON;
3843 }
3844 }
3845 #endif
3846 #if defined(HAS_INTERPOLATEROW_MMI)
3847 if (TestCpuFlag(kCpuHasMMI)) {
3848 InterpolateRow = InterpolateRow_Any_MMI;
3849 if (IS_ALIGNED(width, 8)) {
3850 InterpolateRow = InterpolateRow_MMI;
3851 }
3852 }
3853 #endif
3854 #if defined(HAS_INTERPOLATEROW_MSA)
3855 if (TestCpuFlag(kCpuHasMSA)) {
3856 InterpolateRow = InterpolateRow_Any_MSA;
3857 if (IS_ALIGNED(width, 32)) {
3858 InterpolateRow = InterpolateRow_MSA;
3859 }
3860 }
3861 #endif
3862
3863 {
3864 int awidth = halfwidth * 2;
3865 // row of y and 2 rows of uv
3866 align_buffer_64(rows, awidth * 3);
3867
3868 for (y = 0; y < height - 1; y += 2) {
3869 // Split Y from UV.
3870 SplitUVRow(src_yuy2, rows, rows + awidth, awidth);
3871 memcpy(dst_y, rows, width);
3872 SplitUVRow(src_yuy2 + src_stride_yuy2, rows, rows + awidth * 2, awidth);
3873 memcpy(dst_y + dst_stride_y, rows, width);
3874 InterpolateRow(dst_uv, rows + awidth, awidth, awidth, 128);
3875 src_yuy2 += src_stride_yuy2 * 2;
3876 dst_y += dst_stride_y * 2;
3877 dst_uv += dst_stride_uv;
3878 }
3879 if (height & 1) {
3880 // Split Y from UV.
3881 SplitUVRow(src_yuy2, rows, dst_uv, awidth);
3882 memcpy(dst_y, rows, width);
3883 }
3884 free_aligned_buffer_64(rows);
3885 }
3886 return 0;
3887 }
3888
3889 LIBYUV_API
UYVYToNV12(const uint8_t * src_uyvy,int src_stride_uyvy,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_uv,int dst_stride_uv,int width,int height)3890 int UYVYToNV12(const uint8_t* src_uyvy,
3891 int src_stride_uyvy,
3892 uint8_t* dst_y,
3893 int dst_stride_y,
3894 uint8_t* dst_uv,
3895 int dst_stride_uv,
3896 int width,
3897 int height) {
3898 int y;
3899 int halfwidth = (width + 1) >> 1;
3900 void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
3901 int width) = SplitUVRow_C;
3902 void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
3903 ptrdiff_t src_stride, int dst_width,
3904 int source_y_fraction) = InterpolateRow_C;
3905 if (!src_uyvy || !dst_y || !dst_uv || width <= 0 || height == 0) {
3906 return -1;
3907 }
3908 // Negative height means invert the image.
3909 if (height < 0) {
3910 height = -height;
3911 src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
3912 src_stride_uyvy = -src_stride_uyvy;
3913 }
3914 #if defined(HAS_SPLITUVROW_SSE2)
3915 if (TestCpuFlag(kCpuHasSSE2)) {
3916 SplitUVRow = SplitUVRow_Any_SSE2;
3917 if (IS_ALIGNED(width, 16)) {
3918 SplitUVRow = SplitUVRow_SSE2;
3919 }
3920 }
3921 #endif
3922 #if defined(HAS_SPLITUVROW_AVX2)
3923 if (TestCpuFlag(kCpuHasAVX2)) {
3924 SplitUVRow = SplitUVRow_Any_AVX2;
3925 if (IS_ALIGNED(width, 32)) {
3926 SplitUVRow = SplitUVRow_AVX2;
3927 }
3928 }
3929 #endif
3930 #if defined(HAS_SPLITUVROW_NEON)
3931 if (TestCpuFlag(kCpuHasNEON)) {
3932 SplitUVRow = SplitUVRow_Any_NEON;
3933 if (IS_ALIGNED(width, 16)) {
3934 SplitUVRow = SplitUVRow_NEON;
3935 }
3936 }
3937 #endif
3938 #if defined(HAS_SPLITUVROW_MMI)
3939 if (TestCpuFlag(kCpuHasMMI)) {
3940 SplitUVRow = SplitUVRow_Any_MMI;
3941 if (IS_ALIGNED(width, 8)) {
3942 SplitUVRow = SplitUVRow_MMI;
3943 }
3944 }
3945 #endif
3946 #if defined(HAS_SPLITUVROW_MSA)
3947 if (TestCpuFlag(kCpuHasMSA)) {
3948 SplitUVRow = SplitUVRow_Any_MSA;
3949 if (IS_ALIGNED(width, 32)) {
3950 SplitUVRow = SplitUVRow_MSA;
3951 }
3952 }
3953 #endif
3954 #if defined(HAS_INTERPOLATEROW_SSSE3)
3955 if (TestCpuFlag(kCpuHasSSSE3)) {
3956 InterpolateRow = InterpolateRow_Any_SSSE3;
3957 if (IS_ALIGNED(width, 16)) {
3958 InterpolateRow = InterpolateRow_SSSE3;
3959 }
3960 }
3961 #endif
3962 #if defined(HAS_INTERPOLATEROW_AVX2)
3963 if (TestCpuFlag(kCpuHasAVX2)) {
3964 InterpolateRow = InterpolateRow_Any_AVX2;
3965 if (IS_ALIGNED(width, 32)) {
3966 InterpolateRow = InterpolateRow_AVX2;
3967 }
3968 }
3969 #endif
3970 #if defined(HAS_INTERPOLATEROW_NEON)
3971 if (TestCpuFlag(kCpuHasNEON)) {
3972 InterpolateRow = InterpolateRow_Any_NEON;
3973 if (IS_ALIGNED(width, 16)) {
3974 InterpolateRow = InterpolateRow_NEON;
3975 }
3976 }
3977 #endif
3978 #if defined(HAS_INTERPOLATEROW_MMI)
3979 if (TestCpuFlag(kCpuHasMMI)) {
3980 InterpolateRow = InterpolateRow_Any_MMI;
3981 if (IS_ALIGNED(width, 8)) {
3982 InterpolateRow = InterpolateRow_MMI;
3983 }
3984 }
3985 #endif
3986 #if defined(HAS_INTERPOLATEROW_MSA)
3987 if (TestCpuFlag(kCpuHasMSA)) {
3988 InterpolateRow = InterpolateRow_Any_MSA;
3989 if (IS_ALIGNED(width, 32)) {
3990 InterpolateRow = InterpolateRow_MSA;
3991 }
3992 }
3993 #endif
3994
3995 {
3996 int awidth = halfwidth * 2;
3997 // row of y and 2 rows of uv
3998 align_buffer_64(rows, awidth * 3);
3999
4000 for (y = 0; y < height - 1; y += 2) {
4001 // Split Y from UV.
4002 SplitUVRow(src_uyvy, rows + awidth, rows, awidth);
4003 memcpy(dst_y, rows, width);
4004 SplitUVRow(src_uyvy + src_stride_uyvy, rows + awidth * 2, rows, awidth);
4005 memcpy(dst_y + dst_stride_y, rows, width);
4006 InterpolateRow(dst_uv, rows + awidth, awidth, awidth, 128);
4007 src_uyvy += src_stride_uyvy * 2;
4008 dst_y += dst_stride_y * 2;
4009 dst_uv += dst_stride_uv;
4010 }
4011 if (height & 1) {
4012 // Split Y from UV.
4013 SplitUVRow(src_uyvy, dst_uv, rows, awidth);
4014 memcpy(dst_y, rows, width);
4015 }
4016 free_aligned_buffer_64(rows);
4017 }
4018 return 0;
4019 }
4020
4021 // width and height are src size allowing odd size handling.
4022 LIBYUV_API
HalfMergeUVPlane(const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_uv,int dst_stride_uv,int width,int height)4023 void HalfMergeUVPlane(const uint8_t* src_u,
4024 int src_stride_u,
4025 const uint8_t* src_v,
4026 int src_stride_v,
4027 uint8_t* dst_uv,
4028 int dst_stride_uv,
4029 int width,
4030 int height) {
4031 int y;
4032 void (*HalfMergeUVRow)(const uint8_t* src_u, int src_stride_u,
4033 const uint8_t* src_v, int src_stride_v,
4034 uint8_t* dst_uv, int width) = HalfMergeUVRow_C;
4035
4036 // Negative height means invert the image.
4037 if (height < 0) {
4038 height = -height;
4039 src_u = src_u + (height - 1) * src_stride_u;
4040 src_v = src_v + (height - 1) * src_stride_v;
4041 src_stride_u = -src_stride_u;
4042 src_stride_v = -src_stride_v;
4043 }
4044 #if defined(HAS_HALFMERGEUVROW_NEON)
4045 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
4046 HalfMergeUVRow = HalfMergeUVRow_NEON;
4047 }
4048 #endif
4049 #if defined(HAS_HALFMERGEUVROW_SSSE3)
4050 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {
4051 HalfMergeUVRow = HalfMergeUVRow_SSSE3;
4052 }
4053 #endif
4054 #if defined(HAS_HALFMERGEUVROW_AVX2)
4055 if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) {
4056 HalfMergeUVRow = HalfMergeUVRow_AVX2;
4057 }
4058 #endif
4059 for (y = 0; y < height - 1; y += 2) {
4060 // Merge a row of U and V into a row of UV.
4061 HalfMergeUVRow(src_u, src_stride_u, src_v, src_stride_v, dst_uv, width);
4062 src_u += src_stride_u * 2;
4063 src_v += src_stride_v * 2;
4064 dst_uv += dst_stride_uv;
4065 }
4066 if (height & 1) {
4067 HalfMergeUVRow(src_u, 0, src_v, 0, dst_uv, width);
4068 }
4069 }
4070
4071 #ifdef __cplusplus
4072 } // extern "C"
4073 } // namespace libyuv
4074 #endif
4075