1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/planar_functions.h"
12
13 #include <string.h> // for memset()
14
15 #include "libyuv/cpu_id.h"
16 #ifdef HAVE_JPEG
17 #include "libyuv/mjpeg_decoder.h"
18 #endif
19 #include "libyuv/row.h"
20 #include "libyuv/scale_row.h" // for ScaleRowDown2
21
22 #ifdef __cplusplus
23 namespace libyuv {
24 extern "C" {
25 #endif
26
27 // Copy a plane of data
28 LIBYUV_API
CopyPlane(const uint8_t * src_y,int src_stride_y,uint8_t * dst_y,int dst_stride_y,int width,int height)29 void CopyPlane(const uint8_t* src_y,
30 int src_stride_y,
31 uint8_t* dst_y,
32 int dst_stride_y,
33 int width,
34 int height) {
35 int y;
36 void (*CopyRow)(const uint8_t* src, uint8_t* dst, int width) = CopyRow_C;
37 // Negative height means invert the image.
38 if (height < 0) {
39 height = -height;
40 dst_y = dst_y + (height - 1) * dst_stride_y;
41 dst_stride_y = -dst_stride_y;
42 }
43 // Coalesce rows.
44 if (src_stride_y == width && dst_stride_y == width) {
45 width *= height;
46 height = 1;
47 src_stride_y = dst_stride_y = 0;
48 }
49 // Nothing to do.
50 if (src_y == dst_y && src_stride_y == dst_stride_y) {
51 return;
52 }
53
54 #if defined(HAS_COPYROW_SSE2)
55 if (TestCpuFlag(kCpuHasSSE2)) {
56 CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
57 }
58 #endif
59 #if defined(HAS_COPYROW_AVX)
60 if (TestCpuFlag(kCpuHasAVX)) {
61 CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
62 }
63 #endif
64 #if defined(HAS_COPYROW_ERMS)
65 if (TestCpuFlag(kCpuHasERMS)) {
66 CopyRow = CopyRow_ERMS;
67 }
68 #endif
69 #if defined(HAS_COPYROW_NEON)
70 if (TestCpuFlag(kCpuHasNEON)) {
71 CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
72 }
73 #endif
74
75 // Copy plane
76 for (y = 0; y < height; ++y) {
77 CopyRow(src_y, dst_y, width);
78 src_y += src_stride_y;
79 dst_y += dst_stride_y;
80 }
81 }
82
83 // TODO(fbarchard): Consider support for negative height.
84 // TODO(fbarchard): Consider stride measured in bytes.
85 LIBYUV_API
CopyPlane_16(const uint16_t * src_y,int src_stride_y,uint16_t * dst_y,int dst_stride_y,int width,int height)86 void CopyPlane_16(const uint16_t* src_y,
87 int src_stride_y,
88 uint16_t* dst_y,
89 int dst_stride_y,
90 int width,
91 int height) {
92 int y;
93 void (*CopyRow)(const uint16_t* src, uint16_t* dst, int width) = CopyRow_16_C;
94 // Coalesce rows.
95 if (src_stride_y == width && dst_stride_y == width) {
96 width *= height;
97 height = 1;
98 src_stride_y = dst_stride_y = 0;
99 }
100 #if defined(HAS_COPYROW_16_SSE2)
101 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32)) {
102 CopyRow = CopyRow_16_SSE2;
103 }
104 #endif
105 #if defined(HAS_COPYROW_16_ERMS)
106 if (TestCpuFlag(kCpuHasERMS)) {
107 CopyRow = CopyRow_16_ERMS;
108 }
109 #endif
110 #if defined(HAS_COPYROW_16_NEON)
111 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
112 CopyRow = CopyRow_16_NEON;
113 }
114 #endif
115
116 // Copy plane
117 for (y = 0; y < height; ++y) {
118 CopyRow(src_y, dst_y, width);
119 src_y += src_stride_y;
120 dst_y += dst_stride_y;
121 }
122 }
123
124 // Convert a plane of 16 bit data to 8 bit
125 LIBYUV_API
Convert16To8Plane(const uint16_t * src_y,int src_stride_y,uint8_t * dst_y,int dst_stride_y,int scale,int width,int height)126 void Convert16To8Plane(const uint16_t* src_y,
127 int src_stride_y,
128 uint8_t* dst_y,
129 int dst_stride_y,
130 int scale, // 16384 for 10 bits
131 int width,
132 int height) {
133 int y;
134 void (*Convert16To8Row)(const uint16_t* src_y, uint8_t* dst_y, int scale,
135 int width) = Convert16To8Row_C;
136
137 // Negative height means invert the image.
138 if (height < 0) {
139 height = -height;
140 dst_y = dst_y + (height - 1) * dst_stride_y;
141 dst_stride_y = -dst_stride_y;
142 }
143 // Coalesce rows.
144 if (src_stride_y == width && dst_stride_y == width) {
145 width *= height;
146 height = 1;
147 src_stride_y = dst_stride_y = 0;
148 }
149 #if defined(HAS_CONVERT16TO8ROW_SSSE3)
150 if (TestCpuFlag(kCpuHasSSSE3)) {
151 Convert16To8Row = Convert16To8Row_Any_SSSE3;
152 if (IS_ALIGNED(width, 16)) {
153 Convert16To8Row = Convert16To8Row_SSSE3;
154 }
155 }
156 #endif
157 #if defined(HAS_CONVERT16TO8ROW_AVX2)
158 if (TestCpuFlag(kCpuHasAVX2)) {
159 Convert16To8Row = Convert16To8Row_Any_AVX2;
160 if (IS_ALIGNED(width, 32)) {
161 Convert16To8Row = Convert16To8Row_AVX2;
162 }
163 }
164 #endif
165
166 // Convert plane
167 for (y = 0; y < height; ++y) {
168 Convert16To8Row(src_y, dst_y, scale, width);
169 src_y += src_stride_y;
170 dst_y += dst_stride_y;
171 }
172 }
173
174 // Convert a plane of 8 bit data to 16 bit
175 LIBYUV_API
Convert8To16Plane(const uint8_t * src_y,int src_stride_y,uint16_t * dst_y,int dst_stride_y,int scale,int width,int height)176 void Convert8To16Plane(const uint8_t* src_y,
177 int src_stride_y,
178 uint16_t* dst_y,
179 int dst_stride_y,
180 int scale, // 16384 for 10 bits
181 int width,
182 int height) {
183 int y;
184 void (*Convert8To16Row)(const uint8_t* src_y, uint16_t* dst_y, int scale,
185 int width) = Convert8To16Row_C;
186
187 // Negative height means invert the image.
188 if (height < 0) {
189 height = -height;
190 dst_y = dst_y + (height - 1) * dst_stride_y;
191 dst_stride_y = -dst_stride_y;
192 }
193 // Coalesce rows.
194 if (src_stride_y == width && dst_stride_y == width) {
195 width *= height;
196 height = 1;
197 src_stride_y = dst_stride_y = 0;
198 }
199 #if defined(HAS_CONVERT8TO16ROW_SSE2)
200 if (TestCpuFlag(kCpuHasSSE2)) {
201 Convert8To16Row = Convert8To16Row_Any_SSE2;
202 if (IS_ALIGNED(width, 16)) {
203 Convert8To16Row = Convert8To16Row_SSE2;
204 }
205 }
206 #endif
207 #if defined(HAS_CONVERT8TO16ROW_AVX2)
208 if (TestCpuFlag(kCpuHasAVX2)) {
209 Convert8To16Row = Convert8To16Row_Any_AVX2;
210 if (IS_ALIGNED(width, 32)) {
211 Convert8To16Row = Convert8To16Row_AVX2;
212 }
213 }
214 #endif
215
216 // Convert plane
217 for (y = 0; y < height; ++y) {
218 Convert8To16Row(src_y, dst_y, scale, width);
219 src_y += src_stride_y;
220 dst_y += dst_stride_y;
221 }
222 }
223
224 // Copy I422.
225 LIBYUV_API
I422Copy(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)226 int I422Copy(const uint8_t* src_y,
227 int src_stride_y,
228 const uint8_t* src_u,
229 int src_stride_u,
230 const uint8_t* src_v,
231 int src_stride_v,
232 uint8_t* dst_y,
233 int dst_stride_y,
234 uint8_t* dst_u,
235 int dst_stride_u,
236 uint8_t* dst_v,
237 int dst_stride_v,
238 int width,
239 int height) {
240 int halfwidth = (width + 1) >> 1;
241 if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
242 return -1;
243 }
244 // Negative height means invert the image.
245 if (height < 0) {
246 height = -height;
247 src_y = src_y + (height - 1) * src_stride_y;
248 src_u = src_u + (height - 1) * src_stride_u;
249 src_v = src_v + (height - 1) * src_stride_v;
250 src_stride_y = -src_stride_y;
251 src_stride_u = -src_stride_u;
252 src_stride_v = -src_stride_v;
253 }
254
255 if (dst_y) {
256 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
257 }
258 CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
259 CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
260 return 0;
261 }
262
263 // Copy I444.
264 LIBYUV_API
I444Copy(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)265 int I444Copy(const uint8_t* src_y,
266 int src_stride_y,
267 const uint8_t* src_u,
268 int src_stride_u,
269 const uint8_t* src_v,
270 int src_stride_v,
271 uint8_t* dst_y,
272 int dst_stride_y,
273 uint8_t* dst_u,
274 int dst_stride_u,
275 uint8_t* dst_v,
276 int dst_stride_v,
277 int width,
278 int height) {
279 if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
280 return -1;
281 }
282 // Negative height means invert the image.
283 if (height < 0) {
284 height = -height;
285 src_y = src_y + (height - 1) * src_stride_y;
286 src_u = src_u + (height - 1) * src_stride_u;
287 src_v = src_v + (height - 1) * src_stride_v;
288 src_stride_y = -src_stride_y;
289 src_stride_u = -src_stride_u;
290 src_stride_v = -src_stride_v;
291 }
292
293 if (dst_y) {
294 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
295 }
296 CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
297 CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
298 return 0;
299 }
300
301 // Copy I400.
302 LIBYUV_API
I400ToI400(const uint8_t * src_y,int src_stride_y,uint8_t * dst_y,int dst_stride_y,int width,int height)303 int I400ToI400(const uint8_t* src_y,
304 int src_stride_y,
305 uint8_t* dst_y,
306 int dst_stride_y,
307 int width,
308 int height) {
309 if (!src_y || !dst_y || width <= 0 || height == 0) {
310 return -1;
311 }
312 // Negative height means invert the image.
313 if (height < 0) {
314 height = -height;
315 src_y = src_y + (height - 1) * src_stride_y;
316 src_stride_y = -src_stride_y;
317 }
318 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
319 return 0;
320 }
321
322 // Convert I420 to I400.
323 LIBYUV_API
I420ToI400(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_y,int dst_stride_y,int width,int height)324 int I420ToI400(const uint8_t* src_y,
325 int src_stride_y,
326 const uint8_t* src_u,
327 int src_stride_u,
328 const uint8_t* src_v,
329 int src_stride_v,
330 uint8_t* dst_y,
331 int dst_stride_y,
332 int width,
333 int height) {
334 (void)src_u;
335 (void)src_stride_u;
336 (void)src_v;
337 (void)src_stride_v;
338 if (!src_y || !dst_y || width <= 0 || height == 0) {
339 return -1;
340 }
341 // Negative height means invert the image.
342 if (height < 0) {
343 height = -height;
344 src_y = src_y + (height - 1) * src_stride_y;
345 src_stride_y = -src_stride_y;
346 }
347
348 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
349 return 0;
350 }
351
352 // Support function for NV12 etc UV channels.
353 // Width and height are plane sizes (typically half pixel width).
354 LIBYUV_API
SplitUVPlane(const uint8_t * src_uv,int src_stride_uv,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)355 void SplitUVPlane(const uint8_t* src_uv,
356 int src_stride_uv,
357 uint8_t* dst_u,
358 int dst_stride_u,
359 uint8_t* dst_v,
360 int dst_stride_v,
361 int width,
362 int height) {
363 int y;
364 void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
365 int width) = SplitUVRow_C;
366 // Negative height means invert the image.
367 if (height < 0) {
368 height = -height;
369 dst_u = dst_u + (height - 1) * dst_stride_u;
370 dst_v = dst_v + (height - 1) * dst_stride_v;
371 dst_stride_u = -dst_stride_u;
372 dst_stride_v = -dst_stride_v;
373 }
374 // Coalesce rows.
375 if (src_stride_uv == width * 2 && dst_stride_u == width &&
376 dst_stride_v == width) {
377 width *= height;
378 height = 1;
379 src_stride_uv = dst_stride_u = dst_stride_v = 0;
380 }
381 #if defined(HAS_SPLITUVROW_SSE2)
382 if (TestCpuFlag(kCpuHasSSE2)) {
383 SplitUVRow = SplitUVRow_Any_SSE2;
384 if (IS_ALIGNED(width, 16)) {
385 SplitUVRow = SplitUVRow_SSE2;
386 }
387 }
388 #endif
389 #if defined(HAS_SPLITUVROW_AVX2)
390 if (TestCpuFlag(kCpuHasAVX2)) {
391 SplitUVRow = SplitUVRow_Any_AVX2;
392 if (IS_ALIGNED(width, 32)) {
393 SplitUVRow = SplitUVRow_AVX2;
394 }
395 }
396 #endif
397 #if defined(HAS_SPLITUVROW_NEON)
398 if (TestCpuFlag(kCpuHasNEON)) {
399 SplitUVRow = SplitUVRow_Any_NEON;
400 if (IS_ALIGNED(width, 16)) {
401 SplitUVRow = SplitUVRow_NEON;
402 }
403 }
404 #endif
405 #if defined(HAS_SPLITUVROW_MSA)
406 if (TestCpuFlag(kCpuHasMSA)) {
407 SplitUVRow = SplitUVRow_Any_MSA;
408 if (IS_ALIGNED(width, 32)) {
409 SplitUVRow = SplitUVRow_MSA;
410 }
411 }
412 #endif
413
414 for (y = 0; y < height; ++y) {
415 // Copy a row of UV.
416 SplitUVRow(src_uv, dst_u, dst_v, width);
417 dst_u += dst_stride_u;
418 dst_v += dst_stride_v;
419 src_uv += src_stride_uv;
420 }
421 }
422
423 LIBYUV_API
MergeUVPlane(const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_uv,int dst_stride_uv,int width,int height)424 void MergeUVPlane(const uint8_t* src_u,
425 int src_stride_u,
426 const uint8_t* src_v,
427 int src_stride_v,
428 uint8_t* dst_uv,
429 int dst_stride_uv,
430 int width,
431 int height) {
432 int y;
433 void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v,
434 uint8_t* dst_uv, int width) = MergeUVRow_C;
435 // Coalesce rows.
436 // Negative height means invert the image.
437 if (height < 0) {
438 height = -height;
439 dst_uv = dst_uv + (height - 1) * dst_stride_uv;
440 dst_stride_uv = -dst_stride_uv;
441 }
442 // Coalesce rows.
443 if (src_stride_u == width && src_stride_v == width &&
444 dst_stride_uv == width * 2) {
445 width *= height;
446 height = 1;
447 src_stride_u = src_stride_v = dst_stride_uv = 0;
448 }
449 #if defined(HAS_MERGEUVROW_SSE2)
450 if (TestCpuFlag(kCpuHasSSE2)) {
451 MergeUVRow = MergeUVRow_Any_SSE2;
452 if (IS_ALIGNED(width, 16)) {
453 MergeUVRow = MergeUVRow_SSE2;
454 }
455 }
456 #endif
457 #if defined(HAS_MERGEUVROW_AVX2)
458 if (TestCpuFlag(kCpuHasAVX2)) {
459 MergeUVRow = MergeUVRow_Any_AVX2;
460 if (IS_ALIGNED(width, 32)) {
461 MergeUVRow = MergeUVRow_AVX2;
462 }
463 }
464 #endif
465 #if defined(HAS_MERGEUVROW_NEON)
466 if (TestCpuFlag(kCpuHasNEON)) {
467 MergeUVRow = MergeUVRow_Any_NEON;
468 if (IS_ALIGNED(width, 16)) {
469 MergeUVRow = MergeUVRow_NEON;
470 }
471 }
472 #endif
473 #if defined(HAS_MERGEUVROW_MSA)
474 if (TestCpuFlag(kCpuHasMSA)) {
475 MergeUVRow = MergeUVRow_Any_MSA;
476 if (IS_ALIGNED(width, 16)) {
477 MergeUVRow = MergeUVRow_MSA;
478 }
479 }
480 #endif
481
482 for (y = 0; y < height; ++y) {
483 // Merge a row of U and V into a row of UV.
484 MergeUVRow(src_u, src_v, dst_uv, width);
485 src_u += src_stride_u;
486 src_v += src_stride_v;
487 dst_uv += dst_stride_uv;
488 }
489 }
490
491 // Support function for NV12 etc RGB channels.
492 // Width and height are plane sizes (typically half pixel width).
493 LIBYUV_API
SplitRGBPlane(const uint8_t * src_rgb,int src_stride_rgb,uint8_t * dst_r,int dst_stride_r,uint8_t * dst_g,int dst_stride_g,uint8_t * dst_b,int dst_stride_b,int width,int height)494 void SplitRGBPlane(const uint8_t* src_rgb,
495 int src_stride_rgb,
496 uint8_t* dst_r,
497 int dst_stride_r,
498 uint8_t* dst_g,
499 int dst_stride_g,
500 uint8_t* dst_b,
501 int dst_stride_b,
502 int width,
503 int height) {
504 int y;
505 void (*SplitRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
506 uint8_t* dst_b, int width) = SplitRGBRow_C;
507 // Negative height means invert the image.
508 if (height < 0) {
509 height = -height;
510 dst_r = dst_r + (height - 1) * dst_stride_r;
511 dst_g = dst_g + (height - 1) * dst_stride_g;
512 dst_b = dst_b + (height - 1) * dst_stride_b;
513 dst_stride_r = -dst_stride_r;
514 dst_stride_g = -dst_stride_g;
515 dst_stride_b = -dst_stride_b;
516 }
517 // Coalesce rows.
518 if (src_stride_rgb == width * 3 && dst_stride_r == width &&
519 dst_stride_g == width && dst_stride_b == width) {
520 width *= height;
521 height = 1;
522 src_stride_rgb = dst_stride_r = dst_stride_g = dst_stride_b = 0;
523 }
524 #if defined(HAS_SPLITRGBROW_SSSE3)
525 if (TestCpuFlag(kCpuHasSSSE3)) {
526 SplitRGBRow = SplitRGBRow_Any_SSSE3;
527 if (IS_ALIGNED(width, 16)) {
528 SplitRGBRow = SplitRGBRow_SSSE3;
529 }
530 }
531 #endif
532 #if defined(HAS_SPLITRGBROW_NEON)
533 if (TestCpuFlag(kCpuHasNEON)) {
534 SplitRGBRow = SplitRGBRow_Any_NEON;
535 if (IS_ALIGNED(width, 16)) {
536 SplitRGBRow = SplitRGBRow_NEON;
537 }
538 }
539 #endif
540
541 for (y = 0; y < height; ++y) {
542 // Copy a row of RGB.
543 SplitRGBRow(src_rgb, dst_r, dst_g, dst_b, width);
544 dst_r += dst_stride_r;
545 dst_g += dst_stride_g;
546 dst_b += dst_stride_b;
547 src_rgb += src_stride_rgb;
548 }
549 }
550
551 LIBYUV_API
MergeRGBPlane(const uint8_t * src_r,int src_stride_r,const uint8_t * src_g,int src_stride_g,const uint8_t * src_b,int src_stride_b,uint8_t * dst_rgb,int dst_stride_rgb,int width,int height)552 void MergeRGBPlane(const uint8_t* src_r,
553 int src_stride_r,
554 const uint8_t* src_g,
555 int src_stride_g,
556 const uint8_t* src_b,
557 int src_stride_b,
558 uint8_t* dst_rgb,
559 int dst_stride_rgb,
560 int width,
561 int height) {
562 int y;
563 void (*MergeRGBRow)(const uint8_t* src_r, const uint8_t* src_g,
564 const uint8_t* src_b, uint8_t* dst_rgb, int width) =
565 MergeRGBRow_C;
566 // Coalesce rows.
567 // Negative height means invert the image.
568 if (height < 0) {
569 height = -height;
570 dst_rgb = dst_rgb + (height - 1) * dst_stride_rgb;
571 dst_stride_rgb = -dst_stride_rgb;
572 }
573 // Coalesce rows.
574 if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
575 dst_stride_rgb == width * 3) {
576 width *= height;
577 height = 1;
578 src_stride_r = src_stride_g = src_stride_b = dst_stride_rgb = 0;
579 }
580 #if defined(HAS_MERGERGBROW_SSSE3)
581 if (TestCpuFlag(kCpuHasSSSE3)) {
582 MergeRGBRow = MergeRGBRow_Any_SSSE3;
583 if (IS_ALIGNED(width, 16)) {
584 MergeRGBRow = MergeRGBRow_SSSE3;
585 }
586 }
587 #endif
588 #if defined(HAS_MERGERGBROW_NEON)
589 if (TestCpuFlag(kCpuHasNEON)) {
590 MergeRGBRow = MergeRGBRow_Any_NEON;
591 if (IS_ALIGNED(width, 16)) {
592 MergeRGBRow = MergeRGBRow_NEON;
593 }
594 }
595 #endif
596
597 for (y = 0; y < height; ++y) {
598 // Merge a row of U and V into a row of RGB.
599 MergeRGBRow(src_r, src_g, src_b, dst_rgb, width);
600 src_r += src_stride_r;
601 src_g += src_stride_g;
602 src_b += src_stride_b;
603 dst_rgb += dst_stride_rgb;
604 }
605 }
606
607 // Mirror a plane of data.
MirrorPlane(const uint8_t * src_y,int src_stride_y,uint8_t * dst_y,int dst_stride_y,int width,int height)608 void MirrorPlane(const uint8_t* src_y,
609 int src_stride_y,
610 uint8_t* dst_y,
611 int dst_stride_y,
612 int width,
613 int height) {
614 int y;
615 void (*MirrorRow)(const uint8_t* src, uint8_t* dst, int width) = MirrorRow_C;
616 // Negative height means invert the image.
617 if (height < 0) {
618 height = -height;
619 src_y = src_y + (height - 1) * src_stride_y;
620 src_stride_y = -src_stride_y;
621 }
622 #if defined(HAS_MIRRORROW_NEON)
623 if (TestCpuFlag(kCpuHasNEON)) {
624 MirrorRow = MirrorRow_Any_NEON;
625 if (IS_ALIGNED(width, 16)) {
626 MirrorRow = MirrorRow_NEON;
627 }
628 }
629 #endif
630 #if defined(HAS_MIRRORROW_SSSE3)
631 if (TestCpuFlag(kCpuHasSSSE3)) {
632 MirrorRow = MirrorRow_Any_SSSE3;
633 if (IS_ALIGNED(width, 16)) {
634 MirrorRow = MirrorRow_SSSE3;
635 }
636 }
637 #endif
638 #if defined(HAS_MIRRORROW_AVX2)
639 if (TestCpuFlag(kCpuHasAVX2)) {
640 MirrorRow = MirrorRow_Any_AVX2;
641 if (IS_ALIGNED(width, 32)) {
642 MirrorRow = MirrorRow_AVX2;
643 }
644 }
645 #endif
646 #if defined(HAS_MIRRORROW_MSA)
647 if (TestCpuFlag(kCpuHasMSA)) {
648 MirrorRow = MirrorRow_Any_MSA;
649 if (IS_ALIGNED(width, 64)) {
650 MirrorRow = MirrorRow_MSA;
651 }
652 }
653 #endif
654
655 // Mirror plane
656 for (y = 0; y < height; ++y) {
657 MirrorRow(src_y, dst_y, width);
658 src_y += src_stride_y;
659 dst_y += dst_stride_y;
660 }
661 }
662
663 // Convert YUY2 to I422.
664 LIBYUV_API
YUY2ToI422(const uint8_t * src_yuy2,int src_stride_yuy2,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)665 int YUY2ToI422(const uint8_t* src_yuy2,
666 int src_stride_yuy2,
667 uint8_t* dst_y,
668 int dst_stride_y,
669 uint8_t* dst_u,
670 int dst_stride_u,
671 uint8_t* dst_v,
672 int dst_stride_v,
673 int width,
674 int height) {
675 int y;
676 void (*YUY2ToUV422Row)(const uint8_t* src_yuy2, uint8_t* dst_u,
677 uint8_t* dst_v, int width) = YUY2ToUV422Row_C;
678 void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) =
679 YUY2ToYRow_C;
680 if (!src_yuy2 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
681 return -1;
682 }
683 // Negative height means invert the image.
684 if (height < 0) {
685 height = -height;
686 src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
687 src_stride_yuy2 = -src_stride_yuy2;
688 }
689 // Coalesce rows.
690 if (src_stride_yuy2 == width * 2 && dst_stride_y == width &&
691 dst_stride_u * 2 == width && dst_stride_v * 2 == width &&
692 width * height <= 32768) {
693 width *= height;
694 height = 1;
695 src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0;
696 }
697 #if defined(HAS_YUY2TOYROW_SSE2)
698 if (TestCpuFlag(kCpuHasSSE2)) {
699 YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
700 YUY2ToYRow = YUY2ToYRow_Any_SSE2;
701 if (IS_ALIGNED(width, 16)) {
702 YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
703 YUY2ToYRow = YUY2ToYRow_SSE2;
704 }
705 }
706 #endif
707 #if defined(HAS_YUY2TOYROW_AVX2)
708 if (TestCpuFlag(kCpuHasAVX2)) {
709 YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2;
710 YUY2ToYRow = YUY2ToYRow_Any_AVX2;
711 if (IS_ALIGNED(width, 32)) {
712 YUY2ToUV422Row = YUY2ToUV422Row_AVX2;
713 YUY2ToYRow = YUY2ToYRow_AVX2;
714 }
715 }
716 #endif
717 #if defined(HAS_YUY2TOYROW_NEON)
718 if (TestCpuFlag(kCpuHasNEON)) {
719 YUY2ToYRow = YUY2ToYRow_Any_NEON;
720 YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
721 if (IS_ALIGNED(width, 16)) {
722 YUY2ToYRow = YUY2ToYRow_NEON;
723 YUY2ToUV422Row = YUY2ToUV422Row_NEON;
724 }
725 }
726 #endif
727 #if defined(HAS_YUY2TOYROW_MSA)
728 if (TestCpuFlag(kCpuHasMSA)) {
729 YUY2ToYRow = YUY2ToYRow_Any_MSA;
730 YUY2ToUV422Row = YUY2ToUV422Row_Any_MSA;
731 if (IS_ALIGNED(width, 32)) {
732 YUY2ToYRow = YUY2ToYRow_MSA;
733 YUY2ToUV422Row = YUY2ToUV422Row_MSA;
734 }
735 }
736 #endif
737
738 for (y = 0; y < height; ++y) {
739 YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
740 YUY2ToYRow(src_yuy2, dst_y, width);
741 src_yuy2 += src_stride_yuy2;
742 dst_y += dst_stride_y;
743 dst_u += dst_stride_u;
744 dst_v += dst_stride_v;
745 }
746 return 0;
747 }
748
749 // Convert UYVY to I422.
750 LIBYUV_API
UYVYToI422(const uint8_t * src_uyvy,int src_stride_uyvy,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)751 int UYVYToI422(const uint8_t* src_uyvy,
752 int src_stride_uyvy,
753 uint8_t* dst_y,
754 int dst_stride_y,
755 uint8_t* dst_u,
756 int dst_stride_u,
757 uint8_t* dst_v,
758 int dst_stride_v,
759 int width,
760 int height) {
761 int y;
762 void (*UYVYToUV422Row)(const uint8_t* src_uyvy, uint8_t* dst_u,
763 uint8_t* dst_v, int width) = UYVYToUV422Row_C;
764 void (*UYVYToYRow)(const uint8_t* src_uyvy, uint8_t* dst_y, int width) =
765 UYVYToYRow_C;
766 if (!src_uyvy || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
767 return -1;
768 }
769 // Negative height means invert the image.
770 if (height < 0) {
771 height = -height;
772 src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
773 src_stride_uyvy = -src_stride_uyvy;
774 }
775 // Coalesce rows.
776 if (src_stride_uyvy == width * 2 && dst_stride_y == width &&
777 dst_stride_u * 2 == width && dst_stride_v * 2 == width &&
778 width * height <= 32768) {
779 width *= height;
780 height = 1;
781 src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0;
782 }
783 #if defined(HAS_UYVYTOYROW_SSE2)
784 if (TestCpuFlag(kCpuHasSSE2)) {
785 UYVYToUV422Row = UYVYToUV422Row_Any_SSE2;
786 UYVYToYRow = UYVYToYRow_Any_SSE2;
787 if (IS_ALIGNED(width, 16)) {
788 UYVYToUV422Row = UYVYToUV422Row_SSE2;
789 UYVYToYRow = UYVYToYRow_SSE2;
790 }
791 }
792 #endif
793 #if defined(HAS_UYVYTOYROW_AVX2)
794 if (TestCpuFlag(kCpuHasAVX2)) {
795 UYVYToUV422Row = UYVYToUV422Row_Any_AVX2;
796 UYVYToYRow = UYVYToYRow_Any_AVX2;
797 if (IS_ALIGNED(width, 32)) {
798 UYVYToUV422Row = UYVYToUV422Row_AVX2;
799 UYVYToYRow = UYVYToYRow_AVX2;
800 }
801 }
802 #endif
803 #if defined(HAS_UYVYTOYROW_NEON)
804 if (TestCpuFlag(kCpuHasNEON)) {
805 UYVYToYRow = UYVYToYRow_Any_NEON;
806 UYVYToUV422Row = UYVYToUV422Row_Any_NEON;
807 if (IS_ALIGNED(width, 16)) {
808 UYVYToYRow = UYVYToYRow_NEON;
809 UYVYToUV422Row = UYVYToUV422Row_NEON;
810 }
811 }
812 #endif
813 #if defined(HAS_UYVYTOYROW_MSA)
814 if (TestCpuFlag(kCpuHasMSA)) {
815 UYVYToYRow = UYVYToYRow_Any_MSA;
816 UYVYToUV422Row = UYVYToUV422Row_Any_MSA;
817 if (IS_ALIGNED(width, 32)) {
818 UYVYToYRow = UYVYToYRow_MSA;
819 UYVYToUV422Row = UYVYToUV422Row_MSA;
820 }
821 }
822 #endif
823
824 for (y = 0; y < height; ++y) {
825 UYVYToUV422Row(src_uyvy, dst_u, dst_v, width);
826 UYVYToYRow(src_uyvy, dst_y, width);
827 src_uyvy += src_stride_uyvy;
828 dst_y += dst_stride_y;
829 dst_u += dst_stride_u;
830 dst_v += dst_stride_v;
831 }
832 return 0;
833 }
834
835 // Convert YUY2 to Y.
836 LIBYUV_API
YUY2ToY(const uint8_t * src_yuy2,int src_stride_yuy2,uint8_t * dst_y,int dst_stride_y,int width,int height)837 int YUY2ToY(const uint8_t* src_yuy2,
838 int src_stride_yuy2,
839 uint8_t* dst_y,
840 int dst_stride_y,
841 int width,
842 int height) {
843 int y;
844 void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) =
845 YUY2ToYRow_C;
846 if (!src_yuy2 || !dst_y || width <= 0 || height == 0) {
847 return -1;
848 }
849 // Negative height means invert the image.
850 if (height < 0) {
851 height = -height;
852 src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
853 src_stride_yuy2 = -src_stride_yuy2;
854 }
855 // Coalesce rows.
856 if (src_stride_yuy2 == width * 2 && dst_stride_y == width) {
857 width *= height;
858 height = 1;
859 src_stride_yuy2 = dst_stride_y = 0;
860 }
861 #if defined(HAS_YUY2TOYROW_SSE2)
862 if (TestCpuFlag(kCpuHasSSE2)) {
863 YUY2ToYRow = YUY2ToYRow_Any_SSE2;
864 if (IS_ALIGNED(width, 16)) {
865 YUY2ToYRow = YUY2ToYRow_SSE2;
866 }
867 }
868 #endif
869 #if defined(HAS_YUY2TOYROW_AVX2)
870 if (TestCpuFlag(kCpuHasAVX2)) {
871 YUY2ToYRow = YUY2ToYRow_Any_AVX2;
872 if (IS_ALIGNED(width, 32)) {
873 YUY2ToYRow = YUY2ToYRow_AVX2;
874 }
875 }
876 #endif
877 #if defined(HAS_YUY2TOYROW_NEON)
878 if (TestCpuFlag(kCpuHasNEON)) {
879 YUY2ToYRow = YUY2ToYRow_Any_NEON;
880 if (IS_ALIGNED(width, 16)) {
881 YUY2ToYRow = YUY2ToYRow_NEON;
882 }
883 }
884 #endif
885 #if defined(HAS_YUY2TOYROW_MSA)
886 if (TestCpuFlag(kCpuHasMSA)) {
887 YUY2ToYRow = YUY2ToYRow_Any_MSA;
888 if (IS_ALIGNED(width, 32)) {
889 YUY2ToYRow = YUY2ToYRow_MSA;
890 }
891 }
892 #endif
893
894 for (y = 0; y < height; ++y) {
895 YUY2ToYRow(src_yuy2, dst_y, width);
896 src_yuy2 += src_stride_yuy2;
897 dst_y += dst_stride_y;
898 }
899 return 0;
900 }
901
902 // Mirror I400 with optional flipping
903 LIBYUV_API
I400Mirror(const uint8_t * src_y,int src_stride_y,uint8_t * dst_y,int dst_stride_y,int width,int height)904 int I400Mirror(const uint8_t* src_y,
905 int src_stride_y,
906 uint8_t* dst_y,
907 int dst_stride_y,
908 int width,
909 int height) {
910 if (!src_y || !dst_y || width <= 0 || height == 0) {
911 return -1;
912 }
913 // Negative height means invert the image.
914 if (height < 0) {
915 height = -height;
916 src_y = src_y + (height - 1) * src_stride_y;
917 src_stride_y = -src_stride_y;
918 }
919
920 MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
921 return 0;
922 }
923
924 // Mirror I420 with optional flipping
925 LIBYUV_API
I420Mirror(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)926 int I420Mirror(const uint8_t* src_y,
927 int src_stride_y,
928 const uint8_t* src_u,
929 int src_stride_u,
930 const uint8_t* src_v,
931 int src_stride_v,
932 uint8_t* dst_y,
933 int dst_stride_y,
934 uint8_t* dst_u,
935 int dst_stride_u,
936 uint8_t* dst_v,
937 int dst_stride_v,
938 int width,
939 int height) {
940 int halfwidth = (width + 1) >> 1;
941 int halfheight = (height + 1) >> 1;
942 if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v || width <= 0 ||
943 height == 0) {
944 return -1;
945 }
946 // Negative height means invert the image.
947 if (height < 0) {
948 height = -height;
949 halfheight = (height + 1) >> 1;
950 src_y = src_y + (height - 1) * src_stride_y;
951 src_u = src_u + (halfheight - 1) * src_stride_u;
952 src_v = src_v + (halfheight - 1) * src_stride_v;
953 src_stride_y = -src_stride_y;
954 src_stride_u = -src_stride_u;
955 src_stride_v = -src_stride_v;
956 }
957
958 if (dst_y) {
959 MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
960 }
961 MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
962 MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
963 return 0;
964 }
965
966 // ARGB mirror.
967 LIBYUV_API
ARGBMirror(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)968 int ARGBMirror(const uint8_t* src_argb,
969 int src_stride_argb,
970 uint8_t* dst_argb,
971 int dst_stride_argb,
972 int width,
973 int height) {
974 int y;
975 void (*ARGBMirrorRow)(const uint8_t* src, uint8_t* dst, int width) =
976 ARGBMirrorRow_C;
977 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
978 return -1;
979 }
980 // Negative height means invert the image.
981 if (height < 0) {
982 height = -height;
983 src_argb = src_argb + (height - 1) * src_stride_argb;
984 src_stride_argb = -src_stride_argb;
985 }
986 #if defined(HAS_ARGBMIRRORROW_NEON)
987 if (TestCpuFlag(kCpuHasNEON)) {
988 ARGBMirrorRow = ARGBMirrorRow_Any_NEON;
989 if (IS_ALIGNED(width, 4)) {
990 ARGBMirrorRow = ARGBMirrorRow_NEON;
991 }
992 }
993 #endif
994 #if defined(HAS_ARGBMIRRORROW_SSE2)
995 if (TestCpuFlag(kCpuHasSSE2)) {
996 ARGBMirrorRow = ARGBMirrorRow_Any_SSE2;
997 if (IS_ALIGNED(width, 4)) {
998 ARGBMirrorRow = ARGBMirrorRow_SSE2;
999 }
1000 }
1001 #endif
1002 #if defined(HAS_ARGBMIRRORROW_AVX2)
1003 if (TestCpuFlag(kCpuHasAVX2)) {
1004 ARGBMirrorRow = ARGBMirrorRow_Any_AVX2;
1005 if (IS_ALIGNED(width, 8)) {
1006 ARGBMirrorRow = ARGBMirrorRow_AVX2;
1007 }
1008 }
1009 #endif
1010 #if defined(HAS_ARGBMIRRORROW_MSA)
1011 if (TestCpuFlag(kCpuHasMSA)) {
1012 ARGBMirrorRow = ARGBMirrorRow_Any_MSA;
1013 if (IS_ALIGNED(width, 16)) {
1014 ARGBMirrorRow = ARGBMirrorRow_MSA;
1015 }
1016 }
1017 #endif
1018
1019 // Mirror plane
1020 for (y = 0; y < height; ++y) {
1021 ARGBMirrorRow(src_argb, dst_argb, width);
1022 src_argb += src_stride_argb;
1023 dst_argb += dst_stride_argb;
1024 }
1025 return 0;
1026 }
1027
1028 // Get a blender that optimized for the CPU and pixel count.
1029 // As there are 6 blenders to choose from, the caller should try to use
1030 // the same blend function for all pixels if possible.
1031 LIBYUV_API
GetARGBBlend()1032 ARGBBlendRow GetARGBBlend() {
1033 void (*ARGBBlendRow)(const uint8_t* src_argb, const uint8_t* src_argb1,
1034 uint8_t* dst_argb, int width) = ARGBBlendRow_C;
1035 #if defined(HAS_ARGBBLENDROW_SSSE3)
1036 if (TestCpuFlag(kCpuHasSSSE3)) {
1037 ARGBBlendRow = ARGBBlendRow_SSSE3;
1038 return ARGBBlendRow;
1039 }
1040 #endif
1041 #if defined(HAS_ARGBBLENDROW_NEON)
1042 if (TestCpuFlag(kCpuHasNEON)) {
1043 ARGBBlendRow = ARGBBlendRow_NEON;
1044 }
1045 #endif
1046 #if defined(HAS_ARGBBLENDROW_MSA)
1047 if (TestCpuFlag(kCpuHasMSA)) {
1048 ARGBBlendRow = ARGBBlendRow_MSA;
1049 }
1050 #endif
1051 return ARGBBlendRow;
1052 }
1053
1054 // Alpha Blend 2 ARGB images and store to destination.
1055 LIBYUV_API
ARGBBlend(const uint8_t * src_argb0,int src_stride_argb0,const uint8_t * src_argb1,int src_stride_argb1,uint8_t * dst_argb,int dst_stride_argb,int width,int height)1056 int ARGBBlend(const uint8_t* src_argb0,
1057 int src_stride_argb0,
1058 const uint8_t* src_argb1,
1059 int src_stride_argb1,
1060 uint8_t* dst_argb,
1061 int dst_stride_argb,
1062 int width,
1063 int height) {
1064 int y;
1065 void (*ARGBBlendRow)(const uint8_t* src_argb, const uint8_t* src_argb1,
1066 uint8_t* dst_argb, int width) = GetARGBBlend();
1067 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
1068 return -1;
1069 }
1070 // Negative height means invert the image.
1071 if (height < 0) {
1072 height = -height;
1073 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
1074 dst_stride_argb = -dst_stride_argb;
1075 }
1076 // Coalesce rows.
1077 if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
1078 dst_stride_argb == width * 4) {
1079 width *= height;
1080 height = 1;
1081 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
1082 }
1083
1084 for (y = 0; y < height; ++y) {
1085 ARGBBlendRow(src_argb0, src_argb1, dst_argb, width);
1086 src_argb0 += src_stride_argb0;
1087 src_argb1 += src_stride_argb1;
1088 dst_argb += dst_stride_argb;
1089 }
1090 return 0;
1091 }
1092
1093 // Alpha Blend plane and store to destination.
1094 LIBYUV_API
BlendPlane(const uint8_t * src_y0,int src_stride_y0,const uint8_t * src_y1,int src_stride_y1,const uint8_t * alpha,int alpha_stride,uint8_t * dst_y,int dst_stride_y,int width,int height)1095 int BlendPlane(const uint8_t* src_y0,
1096 int src_stride_y0,
1097 const uint8_t* src_y1,
1098 int src_stride_y1,
1099 const uint8_t* alpha,
1100 int alpha_stride,
1101 uint8_t* dst_y,
1102 int dst_stride_y,
1103 int width,
1104 int height) {
1105 int y;
1106 void (*BlendPlaneRow)(const uint8_t* src0, const uint8_t* src1,
1107 const uint8_t* alpha, uint8_t* dst, int width) =
1108 BlendPlaneRow_C;
1109 if (!src_y0 || !src_y1 || !alpha || !dst_y || width <= 0 || height == 0) {
1110 return -1;
1111 }
1112 // Negative height means invert the image.
1113 if (height < 0) {
1114 height = -height;
1115 dst_y = dst_y + (height - 1) * dst_stride_y;
1116 dst_stride_y = -dst_stride_y;
1117 }
1118
1119 // Coalesce rows for Y plane.
1120 if (src_stride_y0 == width && src_stride_y1 == width &&
1121 alpha_stride == width && dst_stride_y == width) {
1122 width *= height;
1123 height = 1;
1124 src_stride_y0 = src_stride_y1 = alpha_stride = dst_stride_y = 0;
1125 }
1126
1127 #if defined(HAS_BLENDPLANEROW_SSSE3)
1128 if (TestCpuFlag(kCpuHasSSSE3)) {
1129 BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
1130 if (IS_ALIGNED(width, 8)) {
1131 BlendPlaneRow = BlendPlaneRow_SSSE3;
1132 }
1133 }
1134 #endif
1135 #if defined(HAS_BLENDPLANEROW_AVX2)
1136 if (TestCpuFlag(kCpuHasAVX2)) {
1137 BlendPlaneRow = BlendPlaneRow_Any_AVX2;
1138 if (IS_ALIGNED(width, 32)) {
1139 BlendPlaneRow = BlendPlaneRow_AVX2;
1140 }
1141 }
1142 #endif
1143
1144 for (y = 0; y < height; ++y) {
1145 BlendPlaneRow(src_y0, src_y1, alpha, dst_y, width);
1146 src_y0 += src_stride_y0;
1147 src_y1 += src_stride_y1;
1148 alpha += alpha_stride;
1149 dst_y += dst_stride_y;
1150 }
1151 return 0;
1152 }
1153
1154 #define MAXTWIDTH 2048
1155 // Alpha Blend YUV images and store to destination.
1156 LIBYUV_API
I420Blend(const uint8_t * src_y0,int src_stride_y0,const uint8_t * src_u0,int src_stride_u0,const uint8_t * src_v0,int src_stride_v0,const uint8_t * src_y1,int src_stride_y1,const uint8_t * src_u1,int src_stride_u1,const uint8_t * src_v1,int src_stride_v1,const uint8_t * alpha,int alpha_stride,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)1157 int I420Blend(const uint8_t* src_y0,
1158 int src_stride_y0,
1159 const uint8_t* src_u0,
1160 int src_stride_u0,
1161 const uint8_t* src_v0,
1162 int src_stride_v0,
1163 const uint8_t* src_y1,
1164 int src_stride_y1,
1165 const uint8_t* src_u1,
1166 int src_stride_u1,
1167 const uint8_t* src_v1,
1168 int src_stride_v1,
1169 const uint8_t* alpha,
1170 int alpha_stride,
1171 uint8_t* dst_y,
1172 int dst_stride_y,
1173 uint8_t* dst_u,
1174 int dst_stride_u,
1175 uint8_t* dst_v,
1176 int dst_stride_v,
1177 int width,
1178 int height) {
1179 int y;
1180 // Half width/height for UV.
1181 int halfwidth = (width + 1) >> 1;
1182 void (*BlendPlaneRow)(const uint8_t* src0, const uint8_t* src1,
1183 const uint8_t* alpha, uint8_t* dst, int width) =
1184 BlendPlaneRow_C;
1185 void (*ScaleRowDown2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
1186 uint8_t* dst_ptr, int dst_width) = ScaleRowDown2Box_C;
1187 if (!src_y0 || !src_u0 || !src_v0 || !src_y1 || !src_u1 || !src_v1 ||
1188 !alpha || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
1189 return -1;
1190 }
1191
1192 // Negative height means invert the image.
1193 if (height < 0) {
1194 height = -height;
1195 dst_y = dst_y + (height - 1) * dst_stride_y;
1196 dst_stride_y = -dst_stride_y;
1197 }
1198
1199 // Blend Y plane.
1200 BlendPlane(src_y0, src_stride_y0, src_y1, src_stride_y1, alpha, alpha_stride,
1201 dst_y, dst_stride_y, width, height);
1202
1203 #if defined(HAS_BLENDPLANEROW_SSSE3)
1204 if (TestCpuFlag(kCpuHasSSSE3)) {
1205 BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
1206 if (IS_ALIGNED(halfwidth, 8)) {
1207 BlendPlaneRow = BlendPlaneRow_SSSE3;
1208 }
1209 }
1210 #endif
1211 #if defined(HAS_BLENDPLANEROW_AVX2)
1212 if (TestCpuFlag(kCpuHasAVX2)) {
1213 BlendPlaneRow = BlendPlaneRow_Any_AVX2;
1214 if (IS_ALIGNED(halfwidth, 32)) {
1215 BlendPlaneRow = BlendPlaneRow_AVX2;
1216 }
1217 }
1218 #endif
1219 if (!IS_ALIGNED(width, 2)) {
1220 ScaleRowDown2 = ScaleRowDown2Box_Odd_C;
1221 }
1222 #if defined(HAS_SCALEROWDOWN2_NEON)
1223 if (TestCpuFlag(kCpuHasNEON)) {
1224 ScaleRowDown2 = ScaleRowDown2Box_Odd_NEON;
1225 if (IS_ALIGNED(width, 2)) {
1226 ScaleRowDown2 = ScaleRowDown2Box_Any_NEON;
1227 if (IS_ALIGNED(halfwidth, 16)) {
1228 ScaleRowDown2 = ScaleRowDown2Box_NEON;
1229 }
1230 }
1231 }
1232 #endif
1233 #if defined(HAS_SCALEROWDOWN2_SSSE3)
1234 if (TestCpuFlag(kCpuHasSSSE3)) {
1235 ScaleRowDown2 = ScaleRowDown2Box_Odd_SSSE3;
1236 if (IS_ALIGNED(width, 2)) {
1237 ScaleRowDown2 = ScaleRowDown2Box_Any_SSSE3;
1238 if (IS_ALIGNED(halfwidth, 16)) {
1239 ScaleRowDown2 = ScaleRowDown2Box_SSSE3;
1240 }
1241 }
1242 }
1243 #endif
1244 #if defined(HAS_SCALEROWDOWN2_AVX2)
1245 if (TestCpuFlag(kCpuHasAVX2)) {
1246 ScaleRowDown2 = ScaleRowDown2Box_Odd_AVX2;
1247 if (IS_ALIGNED(width, 2)) {
1248 ScaleRowDown2 = ScaleRowDown2Box_Any_AVX2;
1249 if (IS_ALIGNED(halfwidth, 32)) {
1250 ScaleRowDown2 = ScaleRowDown2Box_AVX2;
1251 }
1252 }
1253 }
1254 #endif
1255
1256 // Row buffer for intermediate alpha pixels.
1257 align_buffer_64(halfalpha, halfwidth);
1258 for (y = 0; y < height; y += 2) {
1259 // last row of odd height image use 1 row of alpha instead of 2.
1260 if (y == (height - 1)) {
1261 alpha_stride = 0;
1262 }
1263 // Subsample 2 rows of UV to half width and half height.
1264 ScaleRowDown2(alpha, alpha_stride, halfalpha, halfwidth);
1265 alpha += alpha_stride * 2;
1266 BlendPlaneRow(src_u0, src_u1, halfalpha, dst_u, halfwidth);
1267 BlendPlaneRow(src_v0, src_v1, halfalpha, dst_v, halfwidth);
1268 src_u0 += src_stride_u0;
1269 src_u1 += src_stride_u1;
1270 dst_u += dst_stride_u;
1271 src_v0 += src_stride_v0;
1272 src_v1 += src_stride_v1;
1273 dst_v += dst_stride_v;
1274 }
1275 free_aligned_buffer_64(halfalpha);
1276 return 0;
1277 }
1278
1279 // Multiply 2 ARGB images and store to destination.
1280 LIBYUV_API
ARGBMultiply(const uint8_t * src_argb0,int src_stride_argb0,const uint8_t * src_argb1,int src_stride_argb1,uint8_t * dst_argb,int dst_stride_argb,int width,int height)1281 int ARGBMultiply(const uint8_t* src_argb0,
1282 int src_stride_argb0,
1283 const uint8_t* src_argb1,
1284 int src_stride_argb1,
1285 uint8_t* dst_argb,
1286 int dst_stride_argb,
1287 int width,
1288 int height) {
1289 int y;
1290 void (*ARGBMultiplyRow)(const uint8_t* src0, const uint8_t* src1,
1291 uint8_t* dst, int width) = ARGBMultiplyRow_C;
1292 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
1293 return -1;
1294 }
1295 // Negative height means invert the image.
1296 if (height < 0) {
1297 height = -height;
1298 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
1299 dst_stride_argb = -dst_stride_argb;
1300 }
1301 // Coalesce rows.
1302 if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
1303 dst_stride_argb == width * 4) {
1304 width *= height;
1305 height = 1;
1306 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
1307 }
1308 #if defined(HAS_ARGBMULTIPLYROW_SSE2)
1309 if (TestCpuFlag(kCpuHasSSE2)) {
1310 ARGBMultiplyRow = ARGBMultiplyRow_Any_SSE2;
1311 if (IS_ALIGNED(width, 4)) {
1312 ARGBMultiplyRow = ARGBMultiplyRow_SSE2;
1313 }
1314 }
1315 #endif
1316 #if defined(HAS_ARGBMULTIPLYROW_AVX2)
1317 if (TestCpuFlag(kCpuHasAVX2)) {
1318 ARGBMultiplyRow = ARGBMultiplyRow_Any_AVX2;
1319 if (IS_ALIGNED(width, 8)) {
1320 ARGBMultiplyRow = ARGBMultiplyRow_AVX2;
1321 }
1322 }
1323 #endif
1324 #if defined(HAS_ARGBMULTIPLYROW_NEON)
1325 if (TestCpuFlag(kCpuHasNEON)) {
1326 ARGBMultiplyRow = ARGBMultiplyRow_Any_NEON;
1327 if (IS_ALIGNED(width, 8)) {
1328 ARGBMultiplyRow = ARGBMultiplyRow_NEON;
1329 }
1330 }
1331 #endif
1332 #if defined(HAS_ARGBMULTIPLYROW_MSA)
1333 if (TestCpuFlag(kCpuHasMSA)) {
1334 ARGBMultiplyRow = ARGBMultiplyRow_Any_MSA;
1335 if (IS_ALIGNED(width, 4)) {
1336 ARGBMultiplyRow = ARGBMultiplyRow_MSA;
1337 }
1338 }
1339 #endif
1340
1341 // Multiply plane
1342 for (y = 0; y < height; ++y) {
1343 ARGBMultiplyRow(src_argb0, src_argb1, dst_argb, width);
1344 src_argb0 += src_stride_argb0;
1345 src_argb1 += src_stride_argb1;
1346 dst_argb += dst_stride_argb;
1347 }
1348 return 0;
1349 }
1350
1351 // Add 2 ARGB images and store to destination.
1352 LIBYUV_API
ARGBAdd(const uint8_t * src_argb0,int src_stride_argb0,const uint8_t * src_argb1,int src_stride_argb1,uint8_t * dst_argb,int dst_stride_argb,int width,int height)1353 int ARGBAdd(const uint8_t* src_argb0,
1354 int src_stride_argb0,
1355 const uint8_t* src_argb1,
1356 int src_stride_argb1,
1357 uint8_t* dst_argb,
1358 int dst_stride_argb,
1359 int width,
1360 int height) {
1361 int y;
1362 void (*ARGBAddRow)(const uint8_t* src0, const uint8_t* src1, uint8_t* dst,
1363 int width) = ARGBAddRow_C;
1364 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
1365 return -1;
1366 }
1367 // Negative height means invert the image.
1368 if (height < 0) {
1369 height = -height;
1370 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
1371 dst_stride_argb = -dst_stride_argb;
1372 }
1373 // Coalesce rows.
1374 if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
1375 dst_stride_argb == width * 4) {
1376 width *= height;
1377 height = 1;
1378 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
1379 }
1380 #if defined(HAS_ARGBADDROW_SSE2) && (defined(_MSC_VER) && !defined(__clang__))
1381 if (TestCpuFlag(kCpuHasSSE2)) {
1382 ARGBAddRow = ARGBAddRow_SSE2;
1383 }
1384 #endif
1385 #if defined(HAS_ARGBADDROW_SSE2) && !(defined(_MSC_VER) && !defined(__clang__))
1386 if (TestCpuFlag(kCpuHasSSE2)) {
1387 ARGBAddRow = ARGBAddRow_Any_SSE2;
1388 if (IS_ALIGNED(width, 4)) {
1389 ARGBAddRow = ARGBAddRow_SSE2;
1390 }
1391 }
1392 #endif
1393 #if defined(HAS_ARGBADDROW_AVX2)
1394 if (TestCpuFlag(kCpuHasAVX2)) {
1395 ARGBAddRow = ARGBAddRow_Any_AVX2;
1396 if (IS_ALIGNED(width, 8)) {
1397 ARGBAddRow = ARGBAddRow_AVX2;
1398 }
1399 }
1400 #endif
1401 #if defined(HAS_ARGBADDROW_NEON)
1402 if (TestCpuFlag(kCpuHasNEON)) {
1403 ARGBAddRow = ARGBAddRow_Any_NEON;
1404 if (IS_ALIGNED(width, 8)) {
1405 ARGBAddRow = ARGBAddRow_NEON;
1406 }
1407 }
1408 #endif
1409 #if defined(HAS_ARGBADDROW_MSA)
1410 if (TestCpuFlag(kCpuHasMSA)) {
1411 ARGBAddRow = ARGBAddRow_Any_MSA;
1412 if (IS_ALIGNED(width, 8)) {
1413 ARGBAddRow = ARGBAddRow_MSA;
1414 }
1415 }
1416 #endif
1417
1418 // Add plane
1419 for (y = 0; y < height; ++y) {
1420 ARGBAddRow(src_argb0, src_argb1, dst_argb, width);
1421 src_argb0 += src_stride_argb0;
1422 src_argb1 += src_stride_argb1;
1423 dst_argb += dst_stride_argb;
1424 }
1425 return 0;
1426 }
1427
1428 // Subtract 2 ARGB images and store to destination.
1429 LIBYUV_API
ARGBSubtract(const uint8_t * src_argb0,int src_stride_argb0,const uint8_t * src_argb1,int src_stride_argb1,uint8_t * dst_argb,int dst_stride_argb,int width,int height)1430 int ARGBSubtract(const uint8_t* src_argb0,
1431 int src_stride_argb0,
1432 const uint8_t* src_argb1,
1433 int src_stride_argb1,
1434 uint8_t* dst_argb,
1435 int dst_stride_argb,
1436 int width,
1437 int height) {
1438 int y;
1439 void (*ARGBSubtractRow)(const uint8_t* src0, const uint8_t* src1,
1440 uint8_t* dst, int width) = ARGBSubtractRow_C;
1441 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
1442 return -1;
1443 }
1444 // Negative height means invert the image.
1445 if (height < 0) {
1446 height = -height;
1447 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
1448 dst_stride_argb = -dst_stride_argb;
1449 }
1450 // Coalesce rows.
1451 if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
1452 dst_stride_argb == width * 4) {
1453 width *= height;
1454 height = 1;
1455 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
1456 }
1457 #if defined(HAS_ARGBSUBTRACTROW_SSE2)
1458 if (TestCpuFlag(kCpuHasSSE2)) {
1459 ARGBSubtractRow = ARGBSubtractRow_Any_SSE2;
1460 if (IS_ALIGNED(width, 4)) {
1461 ARGBSubtractRow = ARGBSubtractRow_SSE2;
1462 }
1463 }
1464 #endif
1465 #if defined(HAS_ARGBSUBTRACTROW_AVX2)
1466 if (TestCpuFlag(kCpuHasAVX2)) {
1467 ARGBSubtractRow = ARGBSubtractRow_Any_AVX2;
1468 if (IS_ALIGNED(width, 8)) {
1469 ARGBSubtractRow = ARGBSubtractRow_AVX2;
1470 }
1471 }
1472 #endif
1473 #if defined(HAS_ARGBSUBTRACTROW_NEON)
1474 if (TestCpuFlag(kCpuHasNEON)) {
1475 ARGBSubtractRow = ARGBSubtractRow_Any_NEON;
1476 if (IS_ALIGNED(width, 8)) {
1477 ARGBSubtractRow = ARGBSubtractRow_NEON;
1478 }
1479 }
1480 #endif
1481 #if defined(HAS_ARGBSUBTRACTROW_MSA)
1482 if (TestCpuFlag(kCpuHasMSA)) {
1483 ARGBSubtractRow = ARGBSubtractRow_Any_MSA;
1484 if (IS_ALIGNED(width, 8)) {
1485 ARGBSubtractRow = ARGBSubtractRow_MSA;
1486 }
1487 }
1488 #endif
1489
1490 // Subtract plane
1491 for (y = 0; y < height; ++y) {
1492 ARGBSubtractRow(src_argb0, src_argb1, dst_argb, width);
1493 src_argb0 += src_stride_argb0;
1494 src_argb1 += src_stride_argb1;
1495 dst_argb += dst_stride_argb;
1496 }
1497 return 0;
1498 }
1499 // Convert I422 to RGBA with matrix
I422ToRGBAMatrix(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_rgba,int dst_stride_rgba,const struct YuvConstants * yuvconstants,int width,int height)1500 static int I422ToRGBAMatrix(const uint8_t* src_y,
1501 int src_stride_y,
1502 const uint8_t* src_u,
1503 int src_stride_u,
1504 const uint8_t* src_v,
1505 int src_stride_v,
1506 uint8_t* dst_rgba,
1507 int dst_stride_rgba,
1508 const struct YuvConstants* yuvconstants,
1509 int width,
1510 int height) {
1511 int y;
1512 void (*I422ToRGBARow)(const uint8_t* y_buf, const uint8_t* u_buf,
1513 const uint8_t* v_buf, uint8_t* rgb_buf,
1514 const struct YuvConstants* yuvconstants, int width) =
1515 I422ToRGBARow_C;
1516 if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0) {
1517 return -1;
1518 }
1519 // Negative height means invert the image.
1520 if (height < 0) {
1521 height = -height;
1522 dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
1523 dst_stride_rgba = -dst_stride_rgba;
1524 }
1525 #if defined(HAS_I422TORGBAROW_SSSE3)
1526 if (TestCpuFlag(kCpuHasSSSE3)) {
1527 I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
1528 if (IS_ALIGNED(width, 8)) {
1529 I422ToRGBARow = I422ToRGBARow_SSSE3;
1530 }
1531 }
1532 #endif
1533 #if defined(HAS_I422TORGBAROW_AVX2)
1534 if (TestCpuFlag(kCpuHasAVX2)) {
1535 I422ToRGBARow = I422ToRGBARow_Any_AVX2;
1536 if (IS_ALIGNED(width, 16)) {
1537 I422ToRGBARow = I422ToRGBARow_AVX2;
1538 }
1539 }
1540 #endif
1541 #if defined(HAS_I422TORGBAROW_NEON)
1542 if (TestCpuFlag(kCpuHasNEON)) {
1543 I422ToRGBARow = I422ToRGBARow_Any_NEON;
1544 if (IS_ALIGNED(width, 8)) {
1545 I422ToRGBARow = I422ToRGBARow_NEON;
1546 }
1547 }
1548 #endif
1549 #if defined(HAS_I422TORGBAROW_MSA)
1550 if (TestCpuFlag(kCpuHasMSA)) {
1551 I422ToRGBARow = I422ToRGBARow_Any_MSA;
1552 if (IS_ALIGNED(width, 8)) {
1553 I422ToRGBARow = I422ToRGBARow_MSA;
1554 }
1555 }
1556 #endif
1557
1558 for (y = 0; y < height; ++y) {
1559 I422ToRGBARow(src_y, src_u, src_v, dst_rgba, yuvconstants, width);
1560 dst_rgba += dst_stride_rgba;
1561 src_y += src_stride_y;
1562 src_u += src_stride_u;
1563 src_v += src_stride_v;
1564 }
1565 return 0;
1566 }
1567
1568 // Convert I422 to RGBA.
1569 LIBYUV_API
I422ToRGBA(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_rgba,int dst_stride_rgba,int width,int height)1570 int I422ToRGBA(const uint8_t* src_y,
1571 int src_stride_y,
1572 const uint8_t* src_u,
1573 int src_stride_u,
1574 const uint8_t* src_v,
1575 int src_stride_v,
1576 uint8_t* dst_rgba,
1577 int dst_stride_rgba,
1578 int width,
1579 int height) {
1580 return I422ToRGBAMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
1581 src_stride_v, dst_rgba, dst_stride_rgba,
1582 &kYuvI601Constants, width, height);
1583 }
1584
1585 // Convert I422 to BGRA.
1586 LIBYUV_API
I422ToBGRA(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_bgra,int dst_stride_bgra,int width,int height)1587 int I422ToBGRA(const uint8_t* src_y,
1588 int src_stride_y,
1589 const uint8_t* src_u,
1590 int src_stride_u,
1591 const uint8_t* src_v,
1592 int src_stride_v,
1593 uint8_t* dst_bgra,
1594 int dst_stride_bgra,
1595 int width,
1596 int height) {
1597 return I422ToRGBAMatrix(src_y, src_stride_y, src_v,
1598 src_stride_v, // Swap U and V
1599 src_u, src_stride_u, dst_bgra, dst_stride_bgra,
1600 &kYvuI601Constants, // Use Yvu matrix
1601 width, height);
1602 }
1603
1604 // Convert NV12 to RGB565.
1605 LIBYUV_API
NV12ToRGB565(const uint8_t * src_y,int src_stride_y,const uint8_t * src_uv,int src_stride_uv,uint8_t * dst_rgb565,int dst_stride_rgb565,int width,int height)1606 int NV12ToRGB565(const uint8_t* src_y,
1607 int src_stride_y,
1608 const uint8_t* src_uv,
1609 int src_stride_uv,
1610 uint8_t* dst_rgb565,
1611 int dst_stride_rgb565,
1612 int width,
1613 int height) {
1614 int y;
1615 void (*NV12ToRGB565Row)(
1616 const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
1617 const struct YuvConstants* yuvconstants, int width) = NV12ToRGB565Row_C;
1618 if (!src_y || !src_uv || !dst_rgb565 || width <= 0 || height == 0) {
1619 return -1;
1620 }
1621 // Negative height means invert the image.
1622 if (height < 0) {
1623 height = -height;
1624 dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
1625 dst_stride_rgb565 = -dst_stride_rgb565;
1626 }
1627 #if defined(HAS_NV12TORGB565ROW_SSSE3)
1628 if (TestCpuFlag(kCpuHasSSSE3)) {
1629 NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3;
1630 if (IS_ALIGNED(width, 8)) {
1631 NV12ToRGB565Row = NV12ToRGB565Row_SSSE3;
1632 }
1633 }
1634 #endif
1635 #if defined(HAS_NV12TORGB565ROW_AVX2)
1636 if (TestCpuFlag(kCpuHasAVX2)) {
1637 NV12ToRGB565Row = NV12ToRGB565Row_Any_AVX2;
1638 if (IS_ALIGNED(width, 16)) {
1639 NV12ToRGB565Row = NV12ToRGB565Row_AVX2;
1640 }
1641 }
1642 #endif
1643 #if defined(HAS_NV12TORGB565ROW_NEON)
1644 if (TestCpuFlag(kCpuHasNEON)) {
1645 NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON;
1646 if (IS_ALIGNED(width, 8)) {
1647 NV12ToRGB565Row = NV12ToRGB565Row_NEON;
1648 }
1649 }
1650 #endif
1651 #if defined(HAS_NV12TORGB565ROW_MSA)
1652 if (TestCpuFlag(kCpuHasMSA)) {
1653 NV12ToRGB565Row = NV12ToRGB565Row_Any_MSA;
1654 if (IS_ALIGNED(width, 8)) {
1655 NV12ToRGB565Row = NV12ToRGB565Row_MSA;
1656 }
1657 }
1658 #endif
1659
1660 for (y = 0; y < height; ++y) {
1661 NV12ToRGB565Row(src_y, src_uv, dst_rgb565, &kYuvI601Constants, width);
1662 dst_rgb565 += dst_stride_rgb565;
1663 src_y += src_stride_y;
1664 if (y & 1) {
1665 src_uv += src_stride_uv;
1666 }
1667 }
1668 return 0;
1669 }
1670
1671 // Convert RAW to RGB24.
1672 LIBYUV_API
RAWToRGB24(const uint8_t * src_raw,int src_stride_raw,uint8_t * dst_rgb24,int dst_stride_rgb24,int width,int height)1673 int RAWToRGB24(const uint8_t* src_raw,
1674 int src_stride_raw,
1675 uint8_t* dst_rgb24,
1676 int dst_stride_rgb24,
1677 int width,
1678 int height) {
1679 int y;
1680 void (*RAWToRGB24Row)(const uint8_t* src_rgb, uint8_t* dst_rgb24, int width) =
1681 RAWToRGB24Row_C;
1682 if (!src_raw || !dst_rgb24 || width <= 0 || height == 0) {
1683 return -1;
1684 }
1685 // Negative height means invert the image.
1686 if (height < 0) {
1687 height = -height;
1688 src_raw = src_raw + (height - 1) * src_stride_raw;
1689 src_stride_raw = -src_stride_raw;
1690 }
1691 // Coalesce rows.
1692 if (src_stride_raw == width * 3 && dst_stride_rgb24 == width * 3) {
1693 width *= height;
1694 height = 1;
1695 src_stride_raw = dst_stride_rgb24 = 0;
1696 }
1697 #if defined(HAS_RAWTORGB24ROW_SSSE3)
1698 if (TestCpuFlag(kCpuHasSSSE3)) {
1699 RAWToRGB24Row = RAWToRGB24Row_Any_SSSE3;
1700 if (IS_ALIGNED(width, 8)) {
1701 RAWToRGB24Row = RAWToRGB24Row_SSSE3;
1702 }
1703 }
1704 #endif
1705 #if defined(HAS_RAWTORGB24ROW_NEON)
1706 if (TestCpuFlag(kCpuHasNEON)) {
1707 RAWToRGB24Row = RAWToRGB24Row_Any_NEON;
1708 if (IS_ALIGNED(width, 8)) {
1709 RAWToRGB24Row = RAWToRGB24Row_NEON;
1710 }
1711 }
1712 #endif
1713 #if defined(HAS_RAWTORGB24ROW_MSA)
1714 if (TestCpuFlag(kCpuHasMSA)) {
1715 RAWToRGB24Row = RAWToRGB24Row_Any_MSA;
1716 if (IS_ALIGNED(width, 16)) {
1717 RAWToRGB24Row = RAWToRGB24Row_MSA;
1718 }
1719 }
1720 #endif
1721
1722 for (y = 0; y < height; ++y) {
1723 RAWToRGB24Row(src_raw, dst_rgb24, width);
1724 src_raw += src_stride_raw;
1725 dst_rgb24 += dst_stride_rgb24;
1726 }
1727 return 0;
1728 }
1729
1730 LIBYUV_API
SetPlane(uint8_t * dst_y,int dst_stride_y,int width,int height,uint32_t value)1731 void SetPlane(uint8_t* dst_y,
1732 int dst_stride_y,
1733 int width,
1734 int height,
1735 uint32_t value) {
1736 int y;
1737 void (*SetRow)(uint8_t * dst, uint8_t value, int width) = SetRow_C;
1738 if (height < 0) {
1739 height = -height;
1740 dst_y = dst_y + (height - 1) * dst_stride_y;
1741 dst_stride_y = -dst_stride_y;
1742 }
1743 // Coalesce rows.
1744 if (dst_stride_y == width) {
1745 width *= height;
1746 height = 1;
1747 dst_stride_y = 0;
1748 }
1749 #if defined(HAS_SETROW_NEON)
1750 if (TestCpuFlag(kCpuHasNEON)) {
1751 SetRow = SetRow_Any_NEON;
1752 if (IS_ALIGNED(width, 16)) {
1753 SetRow = SetRow_NEON;
1754 }
1755 }
1756 #endif
1757 #if defined(HAS_SETROW_X86)
1758 if (TestCpuFlag(kCpuHasX86)) {
1759 SetRow = SetRow_Any_X86;
1760 if (IS_ALIGNED(width, 4)) {
1761 SetRow = SetRow_X86;
1762 }
1763 }
1764 #endif
1765 #if defined(HAS_SETROW_ERMS)
1766 if (TestCpuFlag(kCpuHasERMS)) {
1767 SetRow = SetRow_ERMS;
1768 }
1769 #endif
1770 #if defined(HAS_SETROW_MSA)
1771 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 16)) {
1772 SetRow = SetRow_MSA;
1773 }
1774 #endif
1775
1776 // Set plane
1777 for (y = 0; y < height; ++y) {
1778 SetRow(dst_y, value, width);
1779 dst_y += dst_stride_y;
1780 }
1781 }
1782
1783 // Draw a rectangle into I420
1784 LIBYUV_API
I420Rect(uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int x,int y,int width,int height,int value_y,int value_u,int value_v)1785 int I420Rect(uint8_t* dst_y,
1786 int dst_stride_y,
1787 uint8_t* dst_u,
1788 int dst_stride_u,
1789 uint8_t* dst_v,
1790 int dst_stride_v,
1791 int x,
1792 int y,
1793 int width,
1794 int height,
1795 int value_y,
1796 int value_u,
1797 int value_v) {
1798 int halfwidth = (width + 1) >> 1;
1799 int halfheight = (height + 1) >> 1;
1800 uint8_t* start_y = dst_y + y * dst_stride_y + x;
1801 uint8_t* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2);
1802 uint8_t* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2);
1803 if (!dst_y || !dst_u || !dst_v || width <= 0 || height == 0 || x < 0 ||
1804 y < 0 || value_y < 0 || value_y > 255 || value_u < 0 || value_u > 255 ||
1805 value_v < 0 || value_v > 255) {
1806 return -1;
1807 }
1808
1809 SetPlane(start_y, dst_stride_y, width, height, value_y);
1810 SetPlane(start_u, dst_stride_u, halfwidth, halfheight, value_u);
1811 SetPlane(start_v, dst_stride_v, halfwidth, halfheight, value_v);
1812 return 0;
1813 }
1814
1815 // Draw a rectangle into ARGB
1816 LIBYUV_API
ARGBRect(uint8_t * dst_argb,int dst_stride_argb,int dst_x,int dst_y,int width,int height,uint32_t value)1817 int ARGBRect(uint8_t* dst_argb,
1818 int dst_stride_argb,
1819 int dst_x,
1820 int dst_y,
1821 int width,
1822 int height,
1823 uint32_t value) {
1824 int y;
1825 void (*ARGBSetRow)(uint8_t * dst_argb, uint32_t value, int width) =
1826 ARGBSetRow_C;
1827 if (!dst_argb || width <= 0 || height == 0 || dst_x < 0 || dst_y < 0) {
1828 return -1;
1829 }
1830 if (height < 0) {
1831 height = -height;
1832 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
1833 dst_stride_argb = -dst_stride_argb;
1834 }
1835 dst_argb += dst_y * dst_stride_argb + dst_x * 4;
1836 // Coalesce rows.
1837 if (dst_stride_argb == width * 4) {
1838 width *= height;
1839 height = 1;
1840 dst_stride_argb = 0;
1841 }
1842
1843 #if defined(HAS_ARGBSETROW_NEON)
1844 if (TestCpuFlag(kCpuHasNEON)) {
1845 ARGBSetRow = ARGBSetRow_Any_NEON;
1846 if (IS_ALIGNED(width, 4)) {
1847 ARGBSetRow = ARGBSetRow_NEON;
1848 }
1849 }
1850 #endif
1851 #if defined(HAS_ARGBSETROW_X86)
1852 if (TestCpuFlag(kCpuHasX86)) {
1853 ARGBSetRow = ARGBSetRow_X86;
1854 }
1855 #endif
1856 #if defined(HAS_ARGBSETROW_MSA)
1857 if (TestCpuFlag(kCpuHasMSA)) {
1858 ARGBSetRow = ARGBSetRow_Any_MSA;
1859 if (IS_ALIGNED(width, 4)) {
1860 ARGBSetRow = ARGBSetRow_MSA;
1861 }
1862 }
1863 #endif
1864
1865 // Set plane
1866 for (y = 0; y < height; ++y) {
1867 ARGBSetRow(dst_argb, value, width);
1868 dst_argb += dst_stride_argb;
1869 }
1870 return 0;
1871 }
1872
1873 // Convert unattentuated ARGB to preattenuated ARGB.
1874 // An unattenutated ARGB alpha blend uses the formula
1875 // p = a * f + (1 - a) * b
1876 // where
1877 // p is output pixel
1878 // f is foreground pixel
1879 // b is background pixel
1880 // a is alpha value from foreground pixel
1881 // An preattenutated ARGB alpha blend uses the formula
1882 // p = f + (1 - a) * b
1883 // where
1884 // f is foreground pixel premultiplied by alpha
1885
1886 LIBYUV_API
ARGBAttenuate(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)1887 int ARGBAttenuate(const uint8_t* src_argb,
1888 int src_stride_argb,
1889 uint8_t* dst_argb,
1890 int dst_stride_argb,
1891 int width,
1892 int height) {
1893 int y;
1894 void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
1895 int width) = ARGBAttenuateRow_C;
1896 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1897 return -1;
1898 }
1899 if (height < 0) {
1900 height = -height;
1901 src_argb = src_argb + (height - 1) * src_stride_argb;
1902 src_stride_argb = -src_stride_argb;
1903 }
1904 // Coalesce rows.
1905 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
1906 width *= height;
1907 height = 1;
1908 src_stride_argb = dst_stride_argb = 0;
1909 }
1910 #if defined(HAS_ARGBATTENUATEROW_SSSE3)
1911 if (TestCpuFlag(kCpuHasSSSE3)) {
1912 ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
1913 if (IS_ALIGNED(width, 4)) {
1914 ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
1915 }
1916 }
1917 #endif
1918 #if defined(HAS_ARGBATTENUATEROW_AVX2)
1919 if (TestCpuFlag(kCpuHasAVX2)) {
1920 ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
1921 if (IS_ALIGNED(width, 8)) {
1922 ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
1923 }
1924 }
1925 #endif
1926 #if defined(HAS_ARGBATTENUATEROW_NEON)
1927 if (TestCpuFlag(kCpuHasNEON)) {
1928 ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
1929 if (IS_ALIGNED(width, 8)) {
1930 ARGBAttenuateRow = ARGBAttenuateRow_NEON;
1931 }
1932 }
1933 #endif
1934 #if defined(HAS_ARGBATTENUATEROW_MSA)
1935 if (TestCpuFlag(kCpuHasMSA)) {
1936 ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA;
1937 if (IS_ALIGNED(width, 8)) {
1938 ARGBAttenuateRow = ARGBAttenuateRow_MSA;
1939 }
1940 }
1941 #endif
1942
1943 for (y = 0; y < height; ++y) {
1944 ARGBAttenuateRow(src_argb, dst_argb, width);
1945 src_argb += src_stride_argb;
1946 dst_argb += dst_stride_argb;
1947 }
1948 return 0;
1949 }
1950
1951 // Convert preattentuated ARGB to unattenuated ARGB.
1952 LIBYUV_API
ARGBUnattenuate(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)1953 int ARGBUnattenuate(const uint8_t* src_argb,
1954 int src_stride_argb,
1955 uint8_t* dst_argb,
1956 int dst_stride_argb,
1957 int width,
1958 int height) {
1959 int y;
1960 void (*ARGBUnattenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
1961 int width) = ARGBUnattenuateRow_C;
1962 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1963 return -1;
1964 }
1965 if (height < 0) {
1966 height = -height;
1967 src_argb = src_argb + (height - 1) * src_stride_argb;
1968 src_stride_argb = -src_stride_argb;
1969 }
1970 // Coalesce rows.
1971 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
1972 width *= height;
1973 height = 1;
1974 src_stride_argb = dst_stride_argb = 0;
1975 }
1976 #if defined(HAS_ARGBUNATTENUATEROW_SSE2)
1977 if (TestCpuFlag(kCpuHasSSE2)) {
1978 ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2;
1979 if (IS_ALIGNED(width, 4)) {
1980 ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2;
1981 }
1982 }
1983 #endif
1984 #if defined(HAS_ARGBUNATTENUATEROW_AVX2)
1985 if (TestCpuFlag(kCpuHasAVX2)) {
1986 ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2;
1987 if (IS_ALIGNED(width, 8)) {
1988 ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2;
1989 }
1990 }
1991 #endif
1992 // TODO(fbarchard): Neon version.
1993
1994 for (y = 0; y < height; ++y) {
1995 ARGBUnattenuateRow(src_argb, dst_argb, width);
1996 src_argb += src_stride_argb;
1997 dst_argb += dst_stride_argb;
1998 }
1999 return 0;
2000 }
2001
2002 // Convert ARGB to Grayed ARGB.
2003 LIBYUV_API
ARGBGrayTo(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)2004 int ARGBGrayTo(const uint8_t* src_argb,
2005 int src_stride_argb,
2006 uint8_t* dst_argb,
2007 int dst_stride_argb,
2008 int width,
2009 int height) {
2010 int y;
2011 void (*ARGBGrayRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) =
2012 ARGBGrayRow_C;
2013 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
2014 return -1;
2015 }
2016 if (height < 0) {
2017 height = -height;
2018 src_argb = src_argb + (height - 1) * src_stride_argb;
2019 src_stride_argb = -src_stride_argb;
2020 }
2021 // Coalesce rows.
2022 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
2023 width *= height;
2024 height = 1;
2025 src_stride_argb = dst_stride_argb = 0;
2026 }
2027 #if defined(HAS_ARGBGRAYROW_SSSE3)
2028 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
2029 ARGBGrayRow = ARGBGrayRow_SSSE3;
2030 }
2031 #endif
2032 #if defined(HAS_ARGBGRAYROW_NEON)
2033 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
2034 ARGBGrayRow = ARGBGrayRow_NEON;
2035 }
2036 #endif
2037 #if defined(HAS_ARGBGRAYROW_MSA)
2038 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
2039 ARGBGrayRow = ARGBGrayRow_MSA;
2040 }
2041 #endif
2042
2043 for (y = 0; y < height; ++y) {
2044 ARGBGrayRow(src_argb, dst_argb, width);
2045 src_argb += src_stride_argb;
2046 dst_argb += dst_stride_argb;
2047 }
2048 return 0;
2049 }
2050
2051 // Make a rectangle of ARGB gray scale.
2052 LIBYUV_API
ARGBGray(uint8_t * dst_argb,int dst_stride_argb,int dst_x,int dst_y,int width,int height)2053 int ARGBGray(uint8_t* dst_argb,
2054 int dst_stride_argb,
2055 int dst_x,
2056 int dst_y,
2057 int width,
2058 int height) {
2059 int y;
2060 void (*ARGBGrayRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) =
2061 ARGBGrayRow_C;
2062 uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
2063 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
2064 return -1;
2065 }
2066 // Coalesce rows.
2067 if (dst_stride_argb == width * 4) {
2068 width *= height;
2069 height = 1;
2070 dst_stride_argb = 0;
2071 }
2072 #if defined(HAS_ARGBGRAYROW_SSSE3)
2073 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
2074 ARGBGrayRow = ARGBGrayRow_SSSE3;
2075 }
2076 #endif
2077 #if defined(HAS_ARGBGRAYROW_NEON)
2078 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
2079 ARGBGrayRow = ARGBGrayRow_NEON;
2080 }
2081 #endif
2082 #if defined(HAS_ARGBGRAYROW_MSA)
2083 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
2084 ARGBGrayRow = ARGBGrayRow_MSA;
2085 }
2086 #endif
2087
2088 for (y = 0; y < height; ++y) {
2089 ARGBGrayRow(dst, dst, width);
2090 dst += dst_stride_argb;
2091 }
2092 return 0;
2093 }
2094
2095 // Make a rectangle of ARGB Sepia tone.
2096 LIBYUV_API
ARGBSepia(uint8_t * dst_argb,int dst_stride_argb,int dst_x,int dst_y,int width,int height)2097 int ARGBSepia(uint8_t* dst_argb,
2098 int dst_stride_argb,
2099 int dst_x,
2100 int dst_y,
2101 int width,
2102 int height) {
2103 int y;
2104 void (*ARGBSepiaRow)(uint8_t * dst_argb, int width) = ARGBSepiaRow_C;
2105 uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
2106 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
2107 return -1;
2108 }
2109 // Coalesce rows.
2110 if (dst_stride_argb == width * 4) {
2111 width *= height;
2112 height = 1;
2113 dst_stride_argb = 0;
2114 }
2115 #if defined(HAS_ARGBSEPIAROW_SSSE3)
2116 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
2117 ARGBSepiaRow = ARGBSepiaRow_SSSE3;
2118 }
2119 #endif
2120 #if defined(HAS_ARGBSEPIAROW_NEON)
2121 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
2122 ARGBSepiaRow = ARGBSepiaRow_NEON;
2123 }
2124 #endif
2125 #if defined(HAS_ARGBSEPIAROW_MSA)
2126 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
2127 ARGBSepiaRow = ARGBSepiaRow_MSA;
2128 }
2129 #endif
2130
2131 for (y = 0; y < height; ++y) {
2132 ARGBSepiaRow(dst, width);
2133 dst += dst_stride_argb;
2134 }
2135 return 0;
2136 }
2137
2138 // Apply a 4x4 matrix to each ARGB pixel.
2139 // Note: Normally for shading, but can be used to swizzle or invert.
2140 LIBYUV_API
ARGBColorMatrix(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,const int8_t * matrix_argb,int width,int height)2141 int ARGBColorMatrix(const uint8_t* src_argb,
2142 int src_stride_argb,
2143 uint8_t* dst_argb,
2144 int dst_stride_argb,
2145 const int8_t* matrix_argb,
2146 int width,
2147 int height) {
2148 int y;
2149 void (*ARGBColorMatrixRow)(const uint8_t* src_argb, uint8_t* dst_argb,
2150 const int8_t* matrix_argb, int width) =
2151 ARGBColorMatrixRow_C;
2152 if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) {
2153 return -1;
2154 }
2155 if (height < 0) {
2156 height = -height;
2157 src_argb = src_argb + (height - 1) * src_stride_argb;
2158 src_stride_argb = -src_stride_argb;
2159 }
2160 // Coalesce rows.
2161 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
2162 width *= height;
2163 height = 1;
2164 src_stride_argb = dst_stride_argb = 0;
2165 }
2166 #if defined(HAS_ARGBCOLORMATRIXROW_SSSE3)
2167 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
2168 ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3;
2169 }
2170 #endif
2171 #if defined(HAS_ARGBCOLORMATRIXROW_NEON)
2172 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
2173 ARGBColorMatrixRow = ARGBColorMatrixRow_NEON;
2174 }
2175 #endif
2176 #if defined(HAS_ARGBCOLORMATRIXROW_MSA)
2177 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
2178 ARGBColorMatrixRow = ARGBColorMatrixRow_MSA;
2179 }
2180 #endif
2181 for (y = 0; y < height; ++y) {
2182 ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width);
2183 src_argb += src_stride_argb;
2184 dst_argb += dst_stride_argb;
2185 }
2186 return 0;
2187 }
2188
2189 // Apply a 4x3 matrix to each ARGB pixel.
2190 // Deprecated.
2191 LIBYUV_API
RGBColorMatrix(uint8_t * dst_argb,int dst_stride_argb,const int8_t * matrix_rgb,int dst_x,int dst_y,int width,int height)2192 int RGBColorMatrix(uint8_t* dst_argb,
2193 int dst_stride_argb,
2194 const int8_t* matrix_rgb,
2195 int dst_x,
2196 int dst_y,
2197 int width,
2198 int height) {
2199 SIMD_ALIGNED(int8_t matrix_argb[16]);
2200 uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
2201 if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 || dst_x < 0 ||
2202 dst_y < 0) {
2203 return -1;
2204 }
2205
2206 // Convert 4x3 7 bit matrix to 4x4 6 bit matrix.
2207 matrix_argb[0] = matrix_rgb[0] / 2;
2208 matrix_argb[1] = matrix_rgb[1] / 2;
2209 matrix_argb[2] = matrix_rgb[2] / 2;
2210 matrix_argb[3] = matrix_rgb[3] / 2;
2211 matrix_argb[4] = matrix_rgb[4] / 2;
2212 matrix_argb[5] = matrix_rgb[5] / 2;
2213 matrix_argb[6] = matrix_rgb[6] / 2;
2214 matrix_argb[7] = matrix_rgb[7] / 2;
2215 matrix_argb[8] = matrix_rgb[8] / 2;
2216 matrix_argb[9] = matrix_rgb[9] / 2;
2217 matrix_argb[10] = matrix_rgb[10] / 2;
2218 matrix_argb[11] = matrix_rgb[11] / 2;
2219 matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0;
2220 matrix_argb[15] = 64; // 1.0
2221
2222 return ARGBColorMatrix((const uint8_t*)(dst), dst_stride_argb, dst,
2223 dst_stride_argb, &matrix_argb[0], width, height);
2224 }
2225
2226 // Apply a color table each ARGB pixel.
2227 // Table contains 256 ARGB values.
2228 LIBYUV_API
ARGBColorTable(uint8_t * dst_argb,int dst_stride_argb,const uint8_t * table_argb,int dst_x,int dst_y,int width,int height)2229 int ARGBColorTable(uint8_t* dst_argb,
2230 int dst_stride_argb,
2231 const uint8_t* table_argb,
2232 int dst_x,
2233 int dst_y,
2234 int width,
2235 int height) {
2236 int y;
2237 void (*ARGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb,
2238 int width) = ARGBColorTableRow_C;
2239 uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
2240 if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 ||
2241 dst_y < 0) {
2242 return -1;
2243 }
2244 // Coalesce rows.
2245 if (dst_stride_argb == width * 4) {
2246 width *= height;
2247 height = 1;
2248 dst_stride_argb = 0;
2249 }
2250 #if defined(HAS_ARGBCOLORTABLEROW_X86)
2251 if (TestCpuFlag(kCpuHasX86)) {
2252 ARGBColorTableRow = ARGBColorTableRow_X86;
2253 }
2254 #endif
2255 for (y = 0; y < height; ++y) {
2256 ARGBColorTableRow(dst, table_argb, width);
2257 dst += dst_stride_argb;
2258 }
2259 return 0;
2260 }
2261
2262 // Apply a color table each ARGB pixel but preserve destination alpha.
2263 // Table contains 256 ARGB values.
2264 LIBYUV_API
RGBColorTable(uint8_t * dst_argb,int dst_stride_argb,const uint8_t * table_argb,int dst_x,int dst_y,int width,int height)2265 int RGBColorTable(uint8_t* dst_argb,
2266 int dst_stride_argb,
2267 const uint8_t* table_argb,
2268 int dst_x,
2269 int dst_y,
2270 int width,
2271 int height) {
2272 int y;
2273 void (*RGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb,
2274 int width) = RGBColorTableRow_C;
2275 uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
2276 if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 ||
2277 dst_y < 0) {
2278 return -1;
2279 }
2280 // Coalesce rows.
2281 if (dst_stride_argb == width * 4) {
2282 width *= height;
2283 height = 1;
2284 dst_stride_argb = 0;
2285 }
2286 #if defined(HAS_RGBCOLORTABLEROW_X86)
2287 if (TestCpuFlag(kCpuHasX86)) {
2288 RGBColorTableRow = RGBColorTableRow_X86;
2289 }
2290 #endif
2291 for (y = 0; y < height; ++y) {
2292 RGBColorTableRow(dst, table_argb, width);
2293 dst += dst_stride_argb;
2294 }
2295 return 0;
2296 }
2297
2298 // ARGBQuantize is used to posterize art.
2299 // e.g. rgb / qvalue * qvalue + qvalue / 2
2300 // But the low levels implement efficiently with 3 parameters, and could be
2301 // used for other high level operations.
2302 // dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
2303 // where scale is 1 / interval_size as a fixed point value.
2304 // The divide is replaces with a multiply by reciprocal fixed point multiply.
2305 // Caveat - although SSE2 saturates, the C function does not and should be used
2306 // with care if doing anything but quantization.
2307 LIBYUV_API
ARGBQuantize(uint8_t * dst_argb,int dst_stride_argb,int scale,int interval_size,int interval_offset,int dst_x,int dst_y,int width,int height)2308 int ARGBQuantize(uint8_t* dst_argb,
2309 int dst_stride_argb,
2310 int scale,
2311 int interval_size,
2312 int interval_offset,
2313 int dst_x,
2314 int dst_y,
2315 int width,
2316 int height) {
2317 int y;
2318 void (*ARGBQuantizeRow)(uint8_t * dst_argb, int scale, int interval_size,
2319 int interval_offset, int width) = ARGBQuantizeRow_C;
2320 uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
2321 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
2322 interval_size < 1 || interval_size > 255) {
2323 return -1;
2324 }
2325 // Coalesce rows.
2326 if (dst_stride_argb == width * 4) {
2327 width *= height;
2328 height = 1;
2329 dst_stride_argb = 0;
2330 }
2331 #if defined(HAS_ARGBQUANTIZEROW_SSE2)
2332 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
2333 ARGBQuantizeRow = ARGBQuantizeRow_SSE2;
2334 }
2335 #endif
2336 #if defined(HAS_ARGBQUANTIZEROW_NEON)
2337 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
2338 ARGBQuantizeRow = ARGBQuantizeRow_NEON;
2339 }
2340 #endif
2341 #if defined(HAS_ARGBQUANTIZEROW_MSA)
2342 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
2343 ARGBQuantizeRow = ARGBQuantizeRow_MSA;
2344 }
2345 #endif
2346 for (y = 0; y < height; ++y) {
2347 ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width);
2348 dst += dst_stride_argb;
2349 }
2350 return 0;
2351 }
2352
2353 // Computes table of cumulative sum for image where the value is the sum
2354 // of all values above and to the left of the entry. Used by ARGBBlur.
2355 LIBYUV_API
ARGBComputeCumulativeSum(const uint8_t * src_argb,int src_stride_argb,int32_t * dst_cumsum,int dst_stride32_cumsum,int width,int height)2356 int ARGBComputeCumulativeSum(const uint8_t* src_argb,
2357 int src_stride_argb,
2358 int32_t* dst_cumsum,
2359 int dst_stride32_cumsum,
2360 int width,
2361 int height) {
2362 int y;
2363 void (*ComputeCumulativeSumRow)(const uint8_t* row, int32_t* cumsum,
2364 const int32_t* previous_cumsum, int width) =
2365 ComputeCumulativeSumRow_C;
2366 int32_t* previous_cumsum = dst_cumsum;
2367 if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) {
2368 return -1;
2369 }
2370 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
2371 if (TestCpuFlag(kCpuHasSSE2)) {
2372 ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
2373 }
2374 #endif
2375 memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4); // 4 int per pixel.
2376 for (y = 0; y < height; ++y) {
2377 ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width);
2378 previous_cumsum = dst_cumsum;
2379 dst_cumsum += dst_stride32_cumsum;
2380 src_argb += src_stride_argb;
2381 }
2382 return 0;
2383 }
2384
2385 // Blur ARGB image.
2386 // Caller should allocate CumulativeSum table of width * height * 16 bytes
2387 // aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory
2388 // as the buffer is treated as circular.
2389 LIBYUV_API
ARGBBlur(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int32_t * dst_cumsum,int dst_stride32_cumsum,int width,int height,int radius)2390 int ARGBBlur(const uint8_t* src_argb,
2391 int src_stride_argb,
2392 uint8_t* dst_argb,
2393 int dst_stride_argb,
2394 int32_t* dst_cumsum,
2395 int dst_stride32_cumsum,
2396 int width,
2397 int height,
2398 int radius) {
2399 int y;
2400 void (*ComputeCumulativeSumRow)(const uint8_t* row, int32_t* cumsum,
2401 const int32_t* previous_cumsum, int width) =
2402 ComputeCumulativeSumRow_C;
2403 void (*CumulativeSumToAverageRow)(
2404 const int32_t* topleft, const int32_t* botleft, int width, int area,
2405 uint8_t* dst, int count) = CumulativeSumToAverageRow_C;
2406 int32_t* cumsum_bot_row;
2407 int32_t* max_cumsum_bot_row;
2408 int32_t* cumsum_top_row;
2409
2410 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
2411 return -1;
2412 }
2413 if (height < 0) {
2414 height = -height;
2415 src_argb = src_argb + (height - 1) * src_stride_argb;
2416 src_stride_argb = -src_stride_argb;
2417 }
2418 if (radius > height) {
2419 radius = height;
2420 }
2421 if (radius > (width / 2 - 1)) {
2422 radius = width / 2 - 1;
2423 }
2424 if (radius <= 0) {
2425 return -1;
2426 }
2427 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
2428 if (TestCpuFlag(kCpuHasSSE2)) {
2429 ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
2430 CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2;
2431 }
2432 #endif
2433 // Compute enough CumulativeSum for first row to be blurred. After this
2434 // one row of CumulativeSum is updated at a time.
2435 ARGBComputeCumulativeSum(src_argb, src_stride_argb, dst_cumsum,
2436 dst_stride32_cumsum, width, radius);
2437
2438 src_argb = src_argb + radius * src_stride_argb;
2439 cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum];
2440
2441 max_cumsum_bot_row = &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum];
2442 cumsum_top_row = &dst_cumsum[0];
2443
2444 for (y = 0; y < height; ++y) {
2445 int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0;
2446 int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1);
2447 int area = radius * (bot_y - top_y);
2448 int boxwidth = radius * 4;
2449 int x;
2450 int n;
2451
2452 // Increment cumsum_top_row pointer with circular buffer wrap around.
2453 if (top_y) {
2454 cumsum_top_row += dst_stride32_cumsum;
2455 if (cumsum_top_row >= max_cumsum_bot_row) {
2456 cumsum_top_row = dst_cumsum;
2457 }
2458 }
2459 // Increment cumsum_bot_row pointer with circular buffer wrap around and
2460 // then fill in a row of CumulativeSum.
2461 if ((y + radius) < height) {
2462 const int32_t* prev_cumsum_bot_row = cumsum_bot_row;
2463 cumsum_bot_row += dst_stride32_cumsum;
2464 if (cumsum_bot_row >= max_cumsum_bot_row) {
2465 cumsum_bot_row = dst_cumsum;
2466 }
2467 ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row,
2468 width);
2469 src_argb += src_stride_argb;
2470 }
2471
2472 // Left clipped.
2473 for (x = 0; x < radius + 1; ++x) {
2474 CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, boxwidth, area,
2475 &dst_argb[x * 4], 1);
2476 area += (bot_y - top_y);
2477 boxwidth += 4;
2478 }
2479
2480 // Middle unclipped.
2481 n = (width - 1) - radius - x + 1;
2482 CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, boxwidth, area,
2483 &dst_argb[x * 4], n);
2484
2485 // Right clipped.
2486 for (x += n; x <= width - 1; ++x) {
2487 area -= (bot_y - top_y);
2488 boxwidth -= 4;
2489 CumulativeSumToAverageRow(cumsum_top_row + (x - radius - 1) * 4,
2490 cumsum_bot_row + (x - radius - 1) * 4, boxwidth,
2491 area, &dst_argb[x * 4], 1);
2492 }
2493 dst_argb += dst_stride_argb;
2494 }
2495 return 0;
2496 }
2497
2498 // Multiply ARGB image by a specified ARGB value.
2499 LIBYUV_API
ARGBShade(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height,uint32_t value)2500 int ARGBShade(const uint8_t* src_argb,
2501 int src_stride_argb,
2502 uint8_t* dst_argb,
2503 int dst_stride_argb,
2504 int width,
2505 int height,
2506 uint32_t value) {
2507 int y;
2508 void (*ARGBShadeRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width,
2509 uint32_t value) = ARGBShadeRow_C;
2510 if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) {
2511 return -1;
2512 }
2513 if (height < 0) {
2514 height = -height;
2515 src_argb = src_argb + (height - 1) * src_stride_argb;
2516 src_stride_argb = -src_stride_argb;
2517 }
2518 // Coalesce rows.
2519 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
2520 width *= height;
2521 height = 1;
2522 src_stride_argb = dst_stride_argb = 0;
2523 }
2524 #if defined(HAS_ARGBSHADEROW_SSE2)
2525 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
2526 ARGBShadeRow = ARGBShadeRow_SSE2;
2527 }
2528 #endif
2529 #if defined(HAS_ARGBSHADEROW_NEON)
2530 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
2531 ARGBShadeRow = ARGBShadeRow_NEON;
2532 }
2533 #endif
2534 #if defined(HAS_ARGBSHADEROW_MSA)
2535 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 4)) {
2536 ARGBShadeRow = ARGBShadeRow_MSA;
2537 }
2538 #endif
2539
2540 for (y = 0; y < height; ++y) {
2541 ARGBShadeRow(src_argb, dst_argb, width, value);
2542 src_argb += src_stride_argb;
2543 dst_argb += dst_stride_argb;
2544 }
2545 return 0;
2546 }
2547
2548 // Interpolate 2 planes by specified amount (0 to 255).
2549 LIBYUV_API
InterpolatePlane(const uint8_t * src0,int src_stride0,const uint8_t * src1,int src_stride1,uint8_t * dst,int dst_stride,int width,int height,int interpolation)2550 int InterpolatePlane(const uint8_t* src0,
2551 int src_stride0,
2552 const uint8_t* src1,
2553 int src_stride1,
2554 uint8_t* dst,
2555 int dst_stride,
2556 int width,
2557 int height,
2558 int interpolation) {
2559 int y;
2560 void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
2561 ptrdiff_t src_stride, int dst_width,
2562 int source_y_fraction) = InterpolateRow_C;
2563 if (!src0 || !src1 || !dst || width <= 0 || height == 0) {
2564 return -1;
2565 }
2566 // Negative height means invert the image.
2567 if (height < 0) {
2568 height = -height;
2569 dst = dst + (height - 1) * dst_stride;
2570 dst_stride = -dst_stride;
2571 }
2572 // Coalesce rows.
2573 if (src_stride0 == width && src_stride1 == width && dst_stride == width) {
2574 width *= height;
2575 height = 1;
2576 src_stride0 = src_stride1 = dst_stride = 0;
2577 }
2578 #if defined(HAS_INTERPOLATEROW_SSSE3)
2579 if (TestCpuFlag(kCpuHasSSSE3)) {
2580 InterpolateRow = InterpolateRow_Any_SSSE3;
2581 if (IS_ALIGNED(width, 16)) {
2582 InterpolateRow = InterpolateRow_SSSE3;
2583 }
2584 }
2585 #endif
2586 #if defined(HAS_INTERPOLATEROW_AVX2)
2587 if (TestCpuFlag(kCpuHasAVX2)) {
2588 InterpolateRow = InterpolateRow_Any_AVX2;
2589 if (IS_ALIGNED(width, 32)) {
2590 InterpolateRow = InterpolateRow_AVX2;
2591 }
2592 }
2593 #endif
2594 #if defined(HAS_INTERPOLATEROW_NEON)
2595 if (TestCpuFlag(kCpuHasNEON)) {
2596 InterpolateRow = InterpolateRow_Any_NEON;
2597 if (IS_ALIGNED(width, 16)) {
2598 InterpolateRow = InterpolateRow_NEON;
2599 }
2600 }
2601 #endif
2602 #if defined(HAS_INTERPOLATEROW_MSA)
2603 if (TestCpuFlag(kCpuHasMSA)) {
2604 InterpolateRow = InterpolateRow_Any_MSA;
2605 if (IS_ALIGNED(width, 32)) {
2606 InterpolateRow = InterpolateRow_MSA;
2607 }
2608 }
2609 #endif
2610
2611 for (y = 0; y < height; ++y) {
2612 InterpolateRow(dst, src0, src1 - src0, width, interpolation);
2613 src0 += src_stride0;
2614 src1 += src_stride1;
2615 dst += dst_stride;
2616 }
2617 return 0;
2618 }
2619
2620 // Interpolate 2 ARGB images by specified amount (0 to 255).
2621 LIBYUV_API
ARGBInterpolate(const uint8_t * src_argb0,int src_stride_argb0,const uint8_t * src_argb1,int src_stride_argb1,uint8_t * dst_argb,int dst_stride_argb,int width,int height,int interpolation)2622 int ARGBInterpolate(const uint8_t* src_argb0,
2623 int src_stride_argb0,
2624 const uint8_t* src_argb1,
2625 int src_stride_argb1,
2626 uint8_t* dst_argb,
2627 int dst_stride_argb,
2628 int width,
2629 int height,
2630 int interpolation) {
2631 return InterpolatePlane(src_argb0, src_stride_argb0, src_argb1,
2632 src_stride_argb1, dst_argb, dst_stride_argb,
2633 width * 4, height, interpolation);
2634 }
2635
2636 // Interpolate 2 YUV images by specified amount (0 to 255).
2637 LIBYUV_API
I420Interpolate(const uint8_t * src0_y,int src0_stride_y,const uint8_t * src0_u,int src0_stride_u,const uint8_t * src0_v,int src0_stride_v,const uint8_t * src1_y,int src1_stride_y,const uint8_t * src1_u,int src1_stride_u,const uint8_t * src1_v,int src1_stride_v,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height,int interpolation)2638 int I420Interpolate(const uint8_t* src0_y,
2639 int src0_stride_y,
2640 const uint8_t* src0_u,
2641 int src0_stride_u,
2642 const uint8_t* src0_v,
2643 int src0_stride_v,
2644 const uint8_t* src1_y,
2645 int src1_stride_y,
2646 const uint8_t* src1_u,
2647 int src1_stride_u,
2648 const uint8_t* src1_v,
2649 int src1_stride_v,
2650 uint8_t* dst_y,
2651 int dst_stride_y,
2652 uint8_t* dst_u,
2653 int dst_stride_u,
2654 uint8_t* dst_v,
2655 int dst_stride_v,
2656 int width,
2657 int height,
2658 int interpolation) {
2659 int halfwidth = (width + 1) >> 1;
2660 int halfheight = (height + 1) >> 1;
2661 if (!src0_y || !src0_u || !src0_v || !src1_y || !src1_u || !src1_v ||
2662 !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
2663 return -1;
2664 }
2665 InterpolatePlane(src0_y, src0_stride_y, src1_y, src1_stride_y, dst_y,
2666 dst_stride_y, width, height, interpolation);
2667 InterpolatePlane(src0_u, src0_stride_u, src1_u, src1_stride_u, dst_u,
2668 dst_stride_u, halfwidth, halfheight, interpolation);
2669 InterpolatePlane(src0_v, src0_stride_v, src1_v, src1_stride_v, dst_v,
2670 dst_stride_v, halfwidth, halfheight, interpolation);
2671 return 0;
2672 }
2673
2674 // Shuffle ARGB channel order. e.g. BGRA to ARGB.
2675 LIBYUV_API
ARGBShuffle(const uint8_t * src_bgra,int src_stride_bgra,uint8_t * dst_argb,int dst_stride_argb,const uint8_t * shuffler,int width,int height)2676 int ARGBShuffle(const uint8_t* src_bgra,
2677 int src_stride_bgra,
2678 uint8_t* dst_argb,
2679 int dst_stride_argb,
2680 const uint8_t* shuffler,
2681 int width,
2682 int height) {
2683 int y;
2684 void (*ARGBShuffleRow)(const uint8_t* src_bgra, uint8_t* dst_argb,
2685 const uint8_t* shuffler, int width) = ARGBShuffleRow_C;
2686 if (!src_bgra || !dst_argb || width <= 0 || height == 0) {
2687 return -1;
2688 }
2689 // Negative height means invert the image.
2690 if (height < 0) {
2691 height = -height;
2692 src_bgra = src_bgra + (height - 1) * src_stride_bgra;
2693 src_stride_bgra = -src_stride_bgra;
2694 }
2695 // Coalesce rows.
2696 if (src_stride_bgra == width * 4 && dst_stride_argb == width * 4) {
2697 width *= height;
2698 height = 1;
2699 src_stride_bgra = dst_stride_argb = 0;
2700 }
2701 #if defined(HAS_ARGBSHUFFLEROW_SSSE3)
2702 if (TestCpuFlag(kCpuHasSSSE3)) {
2703 ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3;
2704 if (IS_ALIGNED(width, 8)) {
2705 ARGBShuffleRow = ARGBShuffleRow_SSSE3;
2706 }
2707 }
2708 #endif
2709 #if defined(HAS_ARGBSHUFFLEROW_AVX2)
2710 if (TestCpuFlag(kCpuHasAVX2)) {
2711 ARGBShuffleRow = ARGBShuffleRow_Any_AVX2;
2712 if (IS_ALIGNED(width, 16)) {
2713 ARGBShuffleRow = ARGBShuffleRow_AVX2;
2714 }
2715 }
2716 #endif
2717 #if defined(HAS_ARGBSHUFFLEROW_NEON)
2718 if (TestCpuFlag(kCpuHasNEON)) {
2719 ARGBShuffleRow = ARGBShuffleRow_Any_NEON;
2720 if (IS_ALIGNED(width, 4)) {
2721 ARGBShuffleRow = ARGBShuffleRow_NEON;
2722 }
2723 }
2724 #endif
2725 #if defined(HAS_ARGBSHUFFLEROW_MSA)
2726 if (TestCpuFlag(kCpuHasMSA)) {
2727 ARGBShuffleRow = ARGBShuffleRow_Any_MSA;
2728 if (IS_ALIGNED(width, 8)) {
2729 ARGBShuffleRow = ARGBShuffleRow_MSA;
2730 }
2731 }
2732 #endif
2733
2734 for (y = 0; y < height; ++y) {
2735 ARGBShuffleRow(src_bgra, dst_argb, shuffler, width);
2736 src_bgra += src_stride_bgra;
2737 dst_argb += dst_stride_argb;
2738 }
2739 return 0;
2740 }
2741
2742 // Sobel ARGB effect.
ARGBSobelize(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height,void (* SobelRow)(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst,int width))2743 static int ARGBSobelize(const uint8_t* src_argb,
2744 int src_stride_argb,
2745 uint8_t* dst_argb,
2746 int dst_stride_argb,
2747 int width,
2748 int height,
2749 void (*SobelRow)(const uint8_t* src_sobelx,
2750 const uint8_t* src_sobely,
2751 uint8_t* dst,
2752 int width)) {
2753 int y;
2754 void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_g, int width) =
2755 ARGBToYJRow_C;
2756 void (*SobelYRow)(const uint8_t* src_y0, const uint8_t* src_y1,
2757 uint8_t* dst_sobely, int width) = SobelYRow_C;
2758 void (*SobelXRow)(const uint8_t* src_y0, const uint8_t* src_y1,
2759 const uint8_t* src_y2, uint8_t* dst_sobely, int width) =
2760 SobelXRow_C;
2761 const int kEdge = 16; // Extra pixels at start of row for extrude/align.
2762 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
2763 return -1;
2764 }
2765 // Negative height means invert the image.
2766 if (height < 0) {
2767 height = -height;
2768 src_argb = src_argb + (height - 1) * src_stride_argb;
2769 src_stride_argb = -src_stride_argb;
2770 }
2771
2772 #if defined(HAS_ARGBTOYJROW_SSSE3)
2773 if (TestCpuFlag(kCpuHasSSSE3)) {
2774 ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
2775 if (IS_ALIGNED(width, 16)) {
2776 ARGBToYJRow = ARGBToYJRow_SSSE3;
2777 }
2778 }
2779 #endif
2780 #if defined(HAS_ARGBTOYJROW_AVX2)
2781 if (TestCpuFlag(kCpuHasAVX2)) {
2782 ARGBToYJRow = ARGBToYJRow_Any_AVX2;
2783 if (IS_ALIGNED(width, 32)) {
2784 ARGBToYJRow = ARGBToYJRow_AVX2;
2785 }
2786 }
2787 #endif
2788 #if defined(HAS_ARGBTOYJROW_NEON)
2789 if (TestCpuFlag(kCpuHasNEON)) {
2790 ARGBToYJRow = ARGBToYJRow_Any_NEON;
2791 if (IS_ALIGNED(width, 8)) {
2792 ARGBToYJRow = ARGBToYJRow_NEON;
2793 }
2794 }
2795 #endif
2796 #if defined(HAS_ARGBTOYJROW_MSA)
2797 if (TestCpuFlag(kCpuHasMSA)) {
2798 ARGBToYJRow = ARGBToYJRow_Any_MSA;
2799 if (IS_ALIGNED(width, 16)) {
2800 ARGBToYJRow = ARGBToYJRow_MSA;
2801 }
2802 }
2803 #endif
2804
2805 #if defined(HAS_SOBELYROW_SSE2)
2806 if (TestCpuFlag(kCpuHasSSE2)) {
2807 SobelYRow = SobelYRow_SSE2;
2808 }
2809 #endif
2810 #if defined(HAS_SOBELYROW_NEON)
2811 if (TestCpuFlag(kCpuHasNEON)) {
2812 SobelYRow = SobelYRow_NEON;
2813 }
2814 #endif
2815 #if defined(HAS_SOBELYROW_MSA)
2816 if (TestCpuFlag(kCpuHasMSA)) {
2817 SobelYRow = SobelYRow_MSA;
2818 }
2819 #endif
2820 #if defined(HAS_SOBELXROW_SSE2)
2821 if (TestCpuFlag(kCpuHasSSE2)) {
2822 SobelXRow = SobelXRow_SSE2;
2823 }
2824 #endif
2825 #if defined(HAS_SOBELXROW_NEON)
2826 if (TestCpuFlag(kCpuHasNEON)) {
2827 SobelXRow = SobelXRow_NEON;
2828 }
2829 #endif
2830 #if defined(HAS_SOBELXROW_MSA)
2831 if (TestCpuFlag(kCpuHasMSA)) {
2832 SobelXRow = SobelXRow_MSA;
2833 }
2834 #endif
2835 {
2836 // 3 rows with edges before/after.
2837 const int kRowSize = (width + kEdge + 31) & ~31;
2838 align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge));
2839 uint8_t* row_sobelx = rows;
2840 uint8_t* row_sobely = rows + kRowSize;
2841 uint8_t* row_y = rows + kRowSize * 2;
2842
2843 // Convert first row.
2844 uint8_t* row_y0 = row_y + kEdge;
2845 uint8_t* row_y1 = row_y0 + kRowSize;
2846 uint8_t* row_y2 = row_y1 + kRowSize;
2847 ARGBToYJRow(src_argb, row_y0, width);
2848 row_y0[-1] = row_y0[0];
2849 memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind.
2850 ARGBToYJRow(src_argb, row_y1, width);
2851 row_y1[-1] = row_y1[0];
2852 memset(row_y1 + width, row_y1[width - 1], 16);
2853 memset(row_y2 + width, 0, 16);
2854
2855 for (y = 0; y < height; ++y) {
2856 // Convert next row of ARGB to G.
2857 if (y < (height - 1)) {
2858 src_argb += src_stride_argb;
2859 }
2860 ARGBToYJRow(src_argb, row_y2, width);
2861 row_y2[-1] = row_y2[0];
2862 row_y2[width] = row_y2[width - 1];
2863
2864 SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width);
2865 SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width);
2866 SobelRow(row_sobelx, row_sobely, dst_argb, width);
2867
2868 // Cycle thru circular queue of 3 row_y buffers.
2869 {
2870 uint8_t* row_yt = row_y0;
2871 row_y0 = row_y1;
2872 row_y1 = row_y2;
2873 row_y2 = row_yt;
2874 }
2875
2876 dst_argb += dst_stride_argb;
2877 }
2878 free_aligned_buffer_64(rows);
2879 }
2880 return 0;
2881 }
2882
2883 // Sobel ARGB effect.
2884 LIBYUV_API
ARGBSobel(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)2885 int ARGBSobel(const uint8_t* src_argb,
2886 int src_stride_argb,
2887 uint8_t* dst_argb,
2888 int dst_stride_argb,
2889 int width,
2890 int height) {
2891 void (*SobelRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely,
2892 uint8_t* dst_argb, int width) = SobelRow_C;
2893 #if defined(HAS_SOBELROW_SSE2)
2894 if (TestCpuFlag(kCpuHasSSE2)) {
2895 SobelRow = SobelRow_Any_SSE2;
2896 if (IS_ALIGNED(width, 16)) {
2897 SobelRow = SobelRow_SSE2;
2898 }
2899 }
2900 #endif
2901 #if defined(HAS_SOBELROW_NEON)
2902 if (TestCpuFlag(kCpuHasNEON)) {
2903 SobelRow = SobelRow_Any_NEON;
2904 if (IS_ALIGNED(width, 8)) {
2905 SobelRow = SobelRow_NEON;
2906 }
2907 }
2908 #endif
2909 #if defined(HAS_SOBELROW_MSA)
2910 if (TestCpuFlag(kCpuHasMSA)) {
2911 SobelRow = SobelRow_Any_MSA;
2912 if (IS_ALIGNED(width, 16)) {
2913 SobelRow = SobelRow_MSA;
2914 }
2915 }
2916 #endif
2917 return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
2918 width, height, SobelRow);
2919 }
2920
2921 // Sobel ARGB effect with planar output.
2922 LIBYUV_API
ARGBSobelToPlane(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_y,int dst_stride_y,int width,int height)2923 int ARGBSobelToPlane(const uint8_t* src_argb,
2924 int src_stride_argb,
2925 uint8_t* dst_y,
2926 int dst_stride_y,
2927 int width,
2928 int height) {
2929 void (*SobelToPlaneRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely,
2930 uint8_t* dst_, int width) = SobelToPlaneRow_C;
2931 #if defined(HAS_SOBELTOPLANEROW_SSE2)
2932 if (TestCpuFlag(kCpuHasSSE2)) {
2933 SobelToPlaneRow = SobelToPlaneRow_Any_SSE2;
2934 if (IS_ALIGNED(width, 16)) {
2935 SobelToPlaneRow = SobelToPlaneRow_SSE2;
2936 }
2937 }
2938 #endif
2939 #if defined(HAS_SOBELTOPLANEROW_NEON)
2940 if (TestCpuFlag(kCpuHasNEON)) {
2941 SobelToPlaneRow = SobelToPlaneRow_Any_NEON;
2942 if (IS_ALIGNED(width, 16)) {
2943 SobelToPlaneRow = SobelToPlaneRow_NEON;
2944 }
2945 }
2946 #endif
2947 #if defined(HAS_SOBELTOPLANEROW_MSA)
2948 if (TestCpuFlag(kCpuHasMSA)) {
2949 SobelToPlaneRow = SobelToPlaneRow_Any_MSA;
2950 if (IS_ALIGNED(width, 32)) {
2951 SobelToPlaneRow = SobelToPlaneRow_MSA;
2952 }
2953 }
2954 #endif
2955 return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y, width,
2956 height, SobelToPlaneRow);
2957 }
2958
2959 // SobelXY ARGB effect.
2960 // Similar to Sobel, but also stores Sobel X in R and Sobel Y in B. G = Sobel.
2961 LIBYUV_API
ARGBSobelXY(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)2962 int ARGBSobelXY(const uint8_t* src_argb,
2963 int src_stride_argb,
2964 uint8_t* dst_argb,
2965 int dst_stride_argb,
2966 int width,
2967 int height) {
2968 void (*SobelXYRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely,
2969 uint8_t* dst_argb, int width) = SobelXYRow_C;
2970 #if defined(HAS_SOBELXYROW_SSE2)
2971 if (TestCpuFlag(kCpuHasSSE2)) {
2972 SobelXYRow = SobelXYRow_Any_SSE2;
2973 if (IS_ALIGNED(width, 16)) {
2974 SobelXYRow = SobelXYRow_SSE2;
2975 }
2976 }
2977 #endif
2978 #if defined(HAS_SOBELXYROW_NEON)
2979 if (TestCpuFlag(kCpuHasNEON)) {
2980 SobelXYRow = SobelXYRow_Any_NEON;
2981 if (IS_ALIGNED(width, 8)) {
2982 SobelXYRow = SobelXYRow_NEON;
2983 }
2984 }
2985 #endif
2986 #if defined(HAS_SOBELXYROW_MSA)
2987 if (TestCpuFlag(kCpuHasMSA)) {
2988 SobelXYRow = SobelXYRow_Any_MSA;
2989 if (IS_ALIGNED(width, 16)) {
2990 SobelXYRow = SobelXYRow_MSA;
2991 }
2992 }
2993 #endif
2994 return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
2995 width, height, SobelXYRow);
2996 }
2997
2998 // Apply a 4x4 polynomial to each ARGB pixel.
2999 LIBYUV_API
ARGBPolynomial(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,const float * poly,int width,int height)3000 int ARGBPolynomial(const uint8_t* src_argb,
3001 int src_stride_argb,
3002 uint8_t* dst_argb,
3003 int dst_stride_argb,
3004 const float* poly,
3005 int width,
3006 int height) {
3007 int y;
3008 void (*ARGBPolynomialRow)(const uint8_t* src_argb, uint8_t* dst_argb,
3009 const float* poly, int width) = ARGBPolynomialRow_C;
3010 if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) {
3011 return -1;
3012 }
3013 // Negative height means invert the image.
3014 if (height < 0) {
3015 height = -height;
3016 src_argb = src_argb + (height - 1) * src_stride_argb;
3017 src_stride_argb = -src_stride_argb;
3018 }
3019 // Coalesce rows.
3020 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
3021 width *= height;
3022 height = 1;
3023 src_stride_argb = dst_stride_argb = 0;
3024 }
3025 #if defined(HAS_ARGBPOLYNOMIALROW_SSE2)
3026 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 2)) {
3027 ARGBPolynomialRow = ARGBPolynomialRow_SSE2;
3028 }
3029 #endif
3030 #if defined(HAS_ARGBPOLYNOMIALROW_AVX2)
3031 if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasFMA3) &&
3032 IS_ALIGNED(width, 2)) {
3033 ARGBPolynomialRow = ARGBPolynomialRow_AVX2;
3034 }
3035 #endif
3036
3037 for (y = 0; y < height; ++y) {
3038 ARGBPolynomialRow(src_argb, dst_argb, poly, width);
3039 src_argb += src_stride_argb;
3040 dst_argb += dst_stride_argb;
3041 }
3042 return 0;
3043 }
3044
3045 // Convert plane of 16 bit shorts to half floats.
3046 // Source values are multiplied by scale before storing as half float.
3047 LIBYUV_API
HalfFloatPlane(const uint16_t * src_y,int src_stride_y,uint16_t * dst_y,int dst_stride_y,float scale,int width,int height)3048 int HalfFloatPlane(const uint16_t* src_y,
3049 int src_stride_y,
3050 uint16_t* dst_y,
3051 int dst_stride_y,
3052 float scale,
3053 int width,
3054 int height) {
3055 int y;
3056 void (*HalfFloatRow)(const uint16_t* src, uint16_t* dst, float scale,
3057 int width) = HalfFloatRow_C;
3058 if (!src_y || !dst_y || width <= 0 || height == 0) {
3059 return -1;
3060 }
3061 src_stride_y >>= 1;
3062 dst_stride_y >>= 1;
3063 // Negative height means invert the image.
3064 if (height < 0) {
3065 height = -height;
3066 src_y = src_y + (height - 1) * src_stride_y;
3067 src_stride_y = -src_stride_y;
3068 }
3069 // Coalesce rows.
3070 if (src_stride_y == width && dst_stride_y == width) {
3071 width *= height;
3072 height = 1;
3073 src_stride_y = dst_stride_y = 0;
3074 }
3075 #if defined(HAS_HALFFLOATROW_SSE2)
3076 if (TestCpuFlag(kCpuHasSSE2)) {
3077 HalfFloatRow = HalfFloatRow_Any_SSE2;
3078 if (IS_ALIGNED(width, 8)) {
3079 HalfFloatRow = HalfFloatRow_SSE2;
3080 }
3081 }
3082 #endif
3083 #if defined(HAS_HALFFLOATROW_AVX2)
3084 if (TestCpuFlag(kCpuHasAVX2)) {
3085 HalfFloatRow = HalfFloatRow_Any_AVX2;
3086 if (IS_ALIGNED(width, 16)) {
3087 HalfFloatRow = HalfFloatRow_AVX2;
3088 }
3089 }
3090 #endif
3091 #if defined(HAS_HALFFLOATROW_F16C)
3092 if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasF16C)) {
3093 HalfFloatRow =
3094 (scale == 1.0f) ? HalfFloat1Row_Any_F16C : HalfFloatRow_Any_F16C;
3095 if (IS_ALIGNED(width, 16)) {
3096 HalfFloatRow = (scale == 1.0f) ? HalfFloat1Row_F16C : HalfFloatRow_F16C;
3097 }
3098 }
3099 #endif
3100 #if defined(HAS_HALFFLOATROW_NEON)
3101 if (TestCpuFlag(kCpuHasNEON)) {
3102 HalfFloatRow =
3103 (scale == 1.0f) ? HalfFloat1Row_Any_NEON : HalfFloatRow_Any_NEON;
3104 if (IS_ALIGNED(width, 8)) {
3105 HalfFloatRow = (scale == 1.0f) ? HalfFloat1Row_NEON : HalfFloatRow_NEON;
3106 }
3107 }
3108 #endif
3109 #if defined(HAS_HALFFLOATROW_MSA)
3110 if (TestCpuFlag(kCpuHasMSA)) {
3111 HalfFloatRow = HalfFloatRow_Any_MSA;
3112 if (IS_ALIGNED(width, 32)) {
3113 HalfFloatRow = HalfFloatRow_MSA;
3114 }
3115 }
3116 #endif
3117
3118 for (y = 0; y < height; ++y) {
3119 HalfFloatRow(src_y, dst_y, scale, width);
3120 src_y += src_stride_y;
3121 dst_y += dst_stride_y;
3122 }
3123 return 0;
3124 }
3125
3126 // Convert a buffer of bytes to floats, scale the values and store as floats.
3127 LIBYUV_API
ByteToFloat(const uint8_t * src_y,float * dst_y,float scale,int width)3128 int ByteToFloat(const uint8_t* src_y, float* dst_y, float scale, int width) {
3129 void (*ByteToFloatRow)(const uint8_t* src, float* dst, float scale,
3130 int width) = ByteToFloatRow_C;
3131 if (!src_y || !dst_y || width <= 0) {
3132 return -1;
3133 }
3134 #if defined(HAS_BYTETOFLOATROW_NEON)
3135 if (TestCpuFlag(kCpuHasNEON)) {
3136 ByteToFloatRow = ByteToFloatRow_Any_NEON;
3137 if (IS_ALIGNED(width, 8)) {
3138 ByteToFloatRow = ByteToFloatRow_NEON;
3139 }
3140 }
3141 #endif
3142
3143 ByteToFloatRow(src_y, dst_y, scale, width);
3144 return 0;
3145 }
3146
3147 // Apply a lumacolortable to each ARGB pixel.
3148 LIBYUV_API
ARGBLumaColorTable(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,const uint8_t * luma,int width,int height)3149 int ARGBLumaColorTable(const uint8_t* src_argb,
3150 int src_stride_argb,
3151 uint8_t* dst_argb,
3152 int dst_stride_argb,
3153 const uint8_t* luma,
3154 int width,
3155 int height) {
3156 int y;
3157 void (*ARGBLumaColorTableRow)(
3158 const uint8_t* src_argb, uint8_t* dst_argb, int width,
3159 const uint8_t* luma, const uint32_t lumacoeff) = ARGBLumaColorTableRow_C;
3160 if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) {
3161 return -1;
3162 }
3163 // Negative height means invert the image.
3164 if (height < 0) {
3165 height = -height;
3166 src_argb = src_argb + (height - 1) * src_stride_argb;
3167 src_stride_argb = -src_stride_argb;
3168 }
3169 // Coalesce rows.
3170 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
3171 width *= height;
3172 height = 1;
3173 src_stride_argb = dst_stride_argb = 0;
3174 }
3175 #if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3)
3176 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
3177 ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3;
3178 }
3179 #endif
3180
3181 for (y = 0; y < height; ++y) {
3182 ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f);
3183 src_argb += src_stride_argb;
3184 dst_argb += dst_stride_argb;
3185 }
3186 return 0;
3187 }
3188
3189 // Copy Alpha from one ARGB image to another.
3190 LIBYUV_API
ARGBCopyAlpha(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)3191 int ARGBCopyAlpha(const uint8_t* src_argb,
3192 int src_stride_argb,
3193 uint8_t* dst_argb,
3194 int dst_stride_argb,
3195 int width,
3196 int height) {
3197 int y;
3198 void (*ARGBCopyAlphaRow)(const uint8_t* src_argb, uint8_t* dst_argb,
3199 int width) = ARGBCopyAlphaRow_C;
3200 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
3201 return -1;
3202 }
3203 // Negative height means invert the image.
3204 if (height < 0) {
3205 height = -height;
3206 src_argb = src_argb + (height - 1) * src_stride_argb;
3207 src_stride_argb = -src_stride_argb;
3208 }
3209 // Coalesce rows.
3210 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
3211 width *= height;
3212 height = 1;
3213 src_stride_argb = dst_stride_argb = 0;
3214 }
3215 #if defined(HAS_ARGBCOPYALPHAROW_SSE2)
3216 if (TestCpuFlag(kCpuHasSSE2)) {
3217 ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_SSE2;
3218 if (IS_ALIGNED(width, 8)) {
3219 ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2;
3220 }
3221 }
3222 #endif
3223 #if defined(HAS_ARGBCOPYALPHAROW_AVX2)
3224 if (TestCpuFlag(kCpuHasAVX2)) {
3225 ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_AVX2;
3226 if (IS_ALIGNED(width, 16)) {
3227 ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2;
3228 }
3229 }
3230 #endif
3231
3232 for (y = 0; y < height; ++y) {
3233 ARGBCopyAlphaRow(src_argb, dst_argb, width);
3234 src_argb += src_stride_argb;
3235 dst_argb += dst_stride_argb;
3236 }
3237 return 0;
3238 }
3239
3240 // Extract just the alpha channel from ARGB.
3241 LIBYUV_API
ARGBExtractAlpha(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_a,int dst_stride_a,int width,int height)3242 int ARGBExtractAlpha(const uint8_t* src_argb,
3243 int src_stride_argb,
3244 uint8_t* dst_a,
3245 int dst_stride_a,
3246 int width,
3247 int height) {
3248 if (!src_argb || !dst_a || width <= 0 || height == 0) {
3249 return -1;
3250 }
3251 // Negative height means invert the image.
3252 if (height < 0) {
3253 height = -height;
3254 src_argb += (height - 1) * src_stride_argb;
3255 src_stride_argb = -src_stride_argb;
3256 }
3257 // Coalesce rows.
3258 if (src_stride_argb == width * 4 && dst_stride_a == width) {
3259 width *= height;
3260 height = 1;
3261 src_stride_argb = dst_stride_a = 0;
3262 }
3263 void (*ARGBExtractAlphaRow)(const uint8_t* src_argb, uint8_t* dst_a,
3264 int width) = ARGBExtractAlphaRow_C;
3265 #if defined(HAS_ARGBEXTRACTALPHAROW_SSE2)
3266 if (TestCpuFlag(kCpuHasSSE2)) {
3267 ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_SSE2
3268 : ARGBExtractAlphaRow_Any_SSE2;
3269 }
3270 #endif
3271 #if defined(HAS_ARGBEXTRACTALPHAROW_AVX2)
3272 if (TestCpuFlag(kCpuHasAVX2)) {
3273 ARGBExtractAlphaRow = IS_ALIGNED(width, 32) ? ARGBExtractAlphaRow_AVX2
3274 : ARGBExtractAlphaRow_Any_AVX2;
3275 }
3276 #endif
3277 #if defined(HAS_ARGBEXTRACTALPHAROW_NEON)
3278 if (TestCpuFlag(kCpuHasNEON)) {
3279 ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_NEON
3280 : ARGBExtractAlphaRow_Any_NEON;
3281 }
3282 #endif
3283 #if defined(HAS_ARGBEXTRACTALPHAROW_MSA)
3284 if (TestCpuFlag(kCpuHasMSA)) {
3285 ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_MSA
3286 : ARGBExtractAlphaRow_Any_MSA;
3287 }
3288 #endif
3289
3290 for (int y = 0; y < height; ++y) {
3291 ARGBExtractAlphaRow(src_argb, dst_a, width);
3292 src_argb += src_stride_argb;
3293 dst_a += dst_stride_a;
3294 }
3295 return 0;
3296 }
3297
3298 // Copy a planar Y channel to the alpha channel of a destination ARGB image.
3299 LIBYUV_API
ARGBCopyYToAlpha(const uint8_t * src_y,int src_stride_y,uint8_t * dst_argb,int dst_stride_argb,int width,int height)3300 int ARGBCopyYToAlpha(const uint8_t* src_y,
3301 int src_stride_y,
3302 uint8_t* dst_argb,
3303 int dst_stride_argb,
3304 int width,
3305 int height) {
3306 int y;
3307 void (*ARGBCopyYToAlphaRow)(const uint8_t* src_y, uint8_t* dst_argb,
3308 int width) = ARGBCopyYToAlphaRow_C;
3309 if (!src_y || !dst_argb || width <= 0 || height == 0) {
3310 return -1;
3311 }
3312 // Negative height means invert the image.
3313 if (height < 0) {
3314 height = -height;
3315 src_y = src_y + (height - 1) * src_stride_y;
3316 src_stride_y = -src_stride_y;
3317 }
3318 // Coalesce rows.
3319 if (src_stride_y == width && dst_stride_argb == width * 4) {
3320 width *= height;
3321 height = 1;
3322 src_stride_y = dst_stride_argb = 0;
3323 }
3324 #if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2)
3325 if (TestCpuFlag(kCpuHasSSE2)) {
3326 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_SSE2;
3327 if (IS_ALIGNED(width, 8)) {
3328 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2;
3329 }
3330 }
3331 #endif
3332 #if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2)
3333 if (TestCpuFlag(kCpuHasAVX2)) {
3334 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_AVX2;
3335 if (IS_ALIGNED(width, 16)) {
3336 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2;
3337 }
3338 }
3339 #endif
3340
3341 for (y = 0; y < height; ++y) {
3342 ARGBCopyYToAlphaRow(src_y, dst_argb, width);
3343 src_y += src_stride_y;
3344 dst_argb += dst_stride_argb;
3345 }
3346 return 0;
3347 }
3348
3349 // TODO(fbarchard): Consider if width is even Y channel can be split
3350 // directly. A SplitUVRow_Odd function could copy the remaining chroma.
3351
3352 LIBYUV_API
YUY2ToNV12(const uint8_t * src_yuy2,int src_stride_yuy2,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_uv,int dst_stride_uv,int width,int height)3353 int YUY2ToNV12(const uint8_t* src_yuy2,
3354 int src_stride_yuy2,
3355 uint8_t* dst_y,
3356 int dst_stride_y,
3357 uint8_t* dst_uv,
3358 int dst_stride_uv,
3359 int width,
3360 int height) {
3361 int y;
3362 int halfwidth = (width + 1) >> 1;
3363 void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
3364 int width) = SplitUVRow_C;
3365 void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
3366 ptrdiff_t src_stride, int dst_width,
3367 int source_y_fraction) = InterpolateRow_C;
3368 if (!src_yuy2 || !dst_y || !dst_uv || width <= 0 || height == 0) {
3369 return -1;
3370 }
3371 // Negative height means invert the image.
3372 if (height < 0) {
3373 height = -height;
3374 src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
3375 src_stride_yuy2 = -src_stride_yuy2;
3376 }
3377 #if defined(HAS_SPLITUVROW_SSE2)
3378 if (TestCpuFlag(kCpuHasSSE2)) {
3379 SplitUVRow = SplitUVRow_Any_SSE2;
3380 if (IS_ALIGNED(width, 16)) {
3381 SplitUVRow = SplitUVRow_SSE2;
3382 }
3383 }
3384 #endif
3385 #if defined(HAS_SPLITUVROW_AVX2)
3386 if (TestCpuFlag(kCpuHasAVX2)) {
3387 SplitUVRow = SplitUVRow_Any_AVX2;
3388 if (IS_ALIGNED(width, 32)) {
3389 SplitUVRow = SplitUVRow_AVX2;
3390 }
3391 }
3392 #endif
3393 #if defined(HAS_SPLITUVROW_NEON)
3394 if (TestCpuFlag(kCpuHasNEON)) {
3395 SplitUVRow = SplitUVRow_Any_NEON;
3396 if (IS_ALIGNED(width, 16)) {
3397 SplitUVRow = SplitUVRow_NEON;
3398 }
3399 }
3400 #endif
3401 #if defined(HAS_SPLITUVROW_MSA)
3402 if (TestCpuFlag(kCpuHasMSA)) {
3403 SplitUVRow = SplitUVRow_Any_MSA;
3404 if (IS_ALIGNED(width, 32)) {
3405 SplitUVRow = SplitUVRow_MSA;
3406 }
3407 }
3408 #endif
3409 #if defined(HAS_INTERPOLATEROW_SSSE3)
3410 if (TestCpuFlag(kCpuHasSSSE3)) {
3411 InterpolateRow = InterpolateRow_Any_SSSE3;
3412 if (IS_ALIGNED(width, 16)) {
3413 InterpolateRow = InterpolateRow_SSSE3;
3414 }
3415 }
3416 #endif
3417 #if defined(HAS_INTERPOLATEROW_AVX2)
3418 if (TestCpuFlag(kCpuHasAVX2)) {
3419 InterpolateRow = InterpolateRow_Any_AVX2;
3420 if (IS_ALIGNED(width, 32)) {
3421 InterpolateRow = InterpolateRow_AVX2;
3422 }
3423 }
3424 #endif
3425 #if defined(HAS_INTERPOLATEROW_NEON)
3426 if (TestCpuFlag(kCpuHasNEON)) {
3427 InterpolateRow = InterpolateRow_Any_NEON;
3428 if (IS_ALIGNED(width, 16)) {
3429 InterpolateRow = InterpolateRow_NEON;
3430 }
3431 }
3432 #endif
3433 #if defined(HAS_INTERPOLATEROW_MSA)
3434 if (TestCpuFlag(kCpuHasMSA)) {
3435 InterpolateRow = InterpolateRow_Any_MSA;
3436 if (IS_ALIGNED(width, 32)) {
3437 InterpolateRow = InterpolateRow_MSA;
3438 }
3439 }
3440 #endif
3441
3442 {
3443 int awidth = halfwidth * 2;
3444 // row of y and 2 rows of uv
3445 align_buffer_64(rows, awidth * 3);
3446
3447 for (y = 0; y < height - 1; y += 2) {
3448 // Split Y from UV.
3449 SplitUVRow(src_yuy2, rows, rows + awidth, awidth);
3450 memcpy(dst_y, rows, width);
3451 SplitUVRow(src_yuy2 + src_stride_yuy2, rows, rows + awidth * 2, awidth);
3452 memcpy(dst_y + dst_stride_y, rows, width);
3453 InterpolateRow(dst_uv, rows + awidth, awidth, awidth, 128);
3454 src_yuy2 += src_stride_yuy2 * 2;
3455 dst_y += dst_stride_y * 2;
3456 dst_uv += dst_stride_uv;
3457 }
3458 if (height & 1) {
3459 // Split Y from UV.
3460 SplitUVRow(src_yuy2, rows, dst_uv, awidth);
3461 memcpy(dst_y, rows, width);
3462 }
3463 free_aligned_buffer_64(rows);
3464 }
3465 return 0;
3466 }
3467
3468 LIBYUV_API
UYVYToNV12(const uint8_t * src_uyvy,int src_stride_uyvy,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_uv,int dst_stride_uv,int width,int height)3469 int UYVYToNV12(const uint8_t* src_uyvy,
3470 int src_stride_uyvy,
3471 uint8_t* dst_y,
3472 int dst_stride_y,
3473 uint8_t* dst_uv,
3474 int dst_stride_uv,
3475 int width,
3476 int height) {
3477 int y;
3478 int halfwidth = (width + 1) >> 1;
3479 void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
3480 int width) = SplitUVRow_C;
3481 void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
3482 ptrdiff_t src_stride, int dst_width,
3483 int source_y_fraction) = InterpolateRow_C;
3484 if (!src_uyvy || !dst_y || !dst_uv || width <= 0 || height == 0) {
3485 return -1;
3486 }
3487 // Negative height means invert the image.
3488 if (height < 0) {
3489 height = -height;
3490 src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
3491 src_stride_uyvy = -src_stride_uyvy;
3492 }
3493 #if defined(HAS_SPLITUVROW_SSE2)
3494 if (TestCpuFlag(kCpuHasSSE2)) {
3495 SplitUVRow = SplitUVRow_Any_SSE2;
3496 if (IS_ALIGNED(width, 16)) {
3497 SplitUVRow = SplitUVRow_SSE2;
3498 }
3499 }
3500 #endif
3501 #if defined(HAS_SPLITUVROW_AVX2)
3502 if (TestCpuFlag(kCpuHasAVX2)) {
3503 SplitUVRow = SplitUVRow_Any_AVX2;
3504 if (IS_ALIGNED(width, 32)) {
3505 SplitUVRow = SplitUVRow_AVX2;
3506 }
3507 }
3508 #endif
3509 #if defined(HAS_SPLITUVROW_NEON)
3510 if (TestCpuFlag(kCpuHasNEON)) {
3511 SplitUVRow = SplitUVRow_Any_NEON;
3512 if (IS_ALIGNED(width, 16)) {
3513 SplitUVRow = SplitUVRow_NEON;
3514 }
3515 }
3516 #endif
3517 #if defined(HAS_SPLITUVROW_MSA)
3518 if (TestCpuFlag(kCpuHasMSA)) {
3519 SplitUVRow = SplitUVRow_Any_MSA;
3520 if (IS_ALIGNED(width, 32)) {
3521 SplitUVRow = SplitUVRow_MSA;
3522 }
3523 }
3524 #endif
3525 #if defined(HAS_INTERPOLATEROW_SSSE3)
3526 if (TestCpuFlag(kCpuHasSSSE3)) {
3527 InterpolateRow = InterpolateRow_Any_SSSE3;
3528 if (IS_ALIGNED(width, 16)) {
3529 InterpolateRow = InterpolateRow_SSSE3;
3530 }
3531 }
3532 #endif
3533 #if defined(HAS_INTERPOLATEROW_AVX2)
3534 if (TestCpuFlag(kCpuHasAVX2)) {
3535 InterpolateRow = InterpolateRow_Any_AVX2;
3536 if (IS_ALIGNED(width, 32)) {
3537 InterpolateRow = InterpolateRow_AVX2;
3538 }
3539 }
3540 #endif
3541 #if defined(HAS_INTERPOLATEROW_NEON)
3542 if (TestCpuFlag(kCpuHasNEON)) {
3543 InterpolateRow = InterpolateRow_Any_NEON;
3544 if (IS_ALIGNED(width, 16)) {
3545 InterpolateRow = InterpolateRow_NEON;
3546 }
3547 }
3548 #endif
3549 #if defined(HAS_INTERPOLATEROW_MSA)
3550 if (TestCpuFlag(kCpuHasMSA)) {
3551 InterpolateRow = InterpolateRow_Any_MSA;
3552 if (IS_ALIGNED(width, 32)) {
3553 InterpolateRow = InterpolateRow_MSA;
3554 }
3555 }
3556 #endif
3557
3558 {
3559 int awidth = halfwidth * 2;
3560 // row of y and 2 rows of uv
3561 align_buffer_64(rows, awidth * 3);
3562
3563 for (y = 0; y < height - 1; y += 2) {
3564 // Split Y from UV.
3565 SplitUVRow(src_uyvy, rows + awidth, rows, awidth);
3566 memcpy(dst_y, rows, width);
3567 SplitUVRow(src_uyvy + src_stride_uyvy, rows + awidth * 2, rows, awidth);
3568 memcpy(dst_y + dst_stride_y, rows, width);
3569 InterpolateRow(dst_uv, rows + awidth, awidth, awidth, 128);
3570 src_uyvy += src_stride_uyvy * 2;
3571 dst_y += dst_stride_y * 2;
3572 dst_uv += dst_stride_uv;
3573 }
3574 if (height & 1) {
3575 // Split Y from UV.
3576 SplitUVRow(src_uyvy, dst_uv, rows, awidth);
3577 memcpy(dst_y, rows, width);
3578 }
3579 free_aligned_buffer_64(rows);
3580 }
3581 return 0;
3582 }
3583
3584 #ifdef __cplusplus
3585 } // extern "C"
3586 } // namespace libyuv
3587 #endif
3588