1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 * Copyright 2016 Mozilla Foundation
4 *
5 * Use of this source code is governed by a BSD-style license
6 * that can be found in the LICENSE file in the root of the source
7 * tree. An additional intellectual property rights grant can be found
8 * in the file PATENTS. All contributing project authors may
9 * be found in the AUTHORS file in the root of the source tree.
10 */
11
12 #include "libyuv/scale.h"
13
14 #include <assert.h>
15 #include <string.h>
16
17 #include "libyuv/cpu_id.h"
18 #include "libyuv/row.h"
19 #include "libyuv/scale_row.h"
20 #include "libyuv/video_common.h"
21
22 #include "mozilla/gfx/Types.h"
23
24 #ifdef __cplusplus
25 namespace libyuv {
26 extern "C" {
27 #endif
28
29 // YUV to RGB conversion and scaling functions were implemented by referencing
30 // scale_argb.cc
31 //
32 // libyuv already has ScaleYUVToARGBBilinearUp(), but its implementation is not
33 // completed yet. Implementations of the functions are based on it.
34 // At first, ScaleYUVToARGBBilinearUp() was implemented by modifying the
35 // libyuv's one. Then all another functions were implemented similarly.
36 //
37 // Function relationship between yuv_convert.cpp and scale_argb.cc are like
38 // the followings
39 // - ScaleYUVToARGBDown2() <-- ScaleARGBDown2()
40 // - ScaleYUVToARGBDownEven() <-- ScaleARGBDownEven()
41 // - ScaleYUVToARGBBilinearDown() <-- ScaleARGBBilinearDown()
42 // - ScaleYUVToARGBBilinearUp() <-- ScaleARGBBilinearUp() and ScaleYUVToARGBBilinearUp() in libyuv
43 // - ScaleYUVToARGBSimple() <-- ScaleARGBSimple()
44 // - ScaleYUVToARGB() <-- ScaleARGB() // Removed some function calls for simplicity.
45 // - YUVToARGBScale() <-- ARGBScale()
46 //
47 // Callings and selections of InterpolateRow() and ScaleARGBFilterCols() were
48 // kept as same as possible.
49 //
50 // The followings changes were done to each scaling functions.
51 //
52 // -[1] Allocate YUV conversion buffer and use it as source buffer of scaling.
53 // Its usage is borrowed from the libyuv's ScaleYUVToARGBBilinearUp().
54 // -[2] Conversion from YUV to RGB was abstracted as YUVBuferIter.
55 // It is for handling multiple yuv color formats.
56 // -[3] Modified scaling functions as to handle YUV conversion buffer and
57 // use YUVBuferIter.
58 // -[4] Color conversion function selections in YUVBuferIter were borrowed from
59 // I444ToARGBMatrix(), I422ToARGBMatrix() and I420ToARGBMatrix()
60
61 typedef mozilla::gfx::YUVColorSpace YUVColorSpace;
62
63 struct YUVBuferIter {
64 int src_width;
65 int src_height;
66 int src_stride_y;
67 int src_stride_u;
68 int src_stride_v;
69 const uint8* src_y;
70 const uint8* src_u;
71 const uint8* src_v;
72
73 uint32 src_fourcc;
74 const struct YuvConstants* yuvconstants;
75 int y_index;
76 const uint8* src_row_y;
77 const uint8* src_row_u;
78 const uint8* src_row_v;
79
80 void (*YUVToARGBRow)(const uint8* y_buf,
81 const uint8* u_buf,
82 const uint8* v_buf,
83 uint8* rgb_buf,
84 const struct YuvConstants* yuvconstants,
85 int width);
86 void (*MoveTo)(YUVBuferIter& iter, int y_index);
87 void (*MoveToNextRow)(YUVBuferIter& iter);
88 };
89
YUVBuferIter_InitI422(YUVBuferIter & iter)90 void YUVBuferIter_InitI422(YUVBuferIter& iter) {
91 iter.YUVToARGBRow = I422ToARGBRow_C;
92 #if defined(HAS_I422TOARGBROW_SSSE3)
93 if (TestCpuFlag(kCpuHasSSSE3)) {
94 iter.YUVToARGBRow = I422ToARGBRow_Any_SSSE3;
95 if (IS_ALIGNED(iter.src_width, 8)) {
96 iter.YUVToARGBRow = I422ToARGBRow_SSSE3;
97 }
98 }
99 #endif
100 #if defined(HAS_I422TOARGBROW_AVX2)
101 if (TestCpuFlag(kCpuHasAVX2)) {
102 iter.YUVToARGBRow = I422ToARGBRow_Any_AVX2;
103 if (IS_ALIGNED(iter.src_width, 16)) {
104 iter.YUVToARGBRow = I422ToARGBRow_AVX2;
105 }
106 }
107 #endif
108 #if defined(HAS_I422TOARGBROW_NEON)
109 if (TestCpuFlag(kCpuHasNEON)) {
110 iter.YUVToARGBRow = I422ToARGBRow_Any_NEON;
111 if (IS_ALIGNED(iter.src_width, 8)) {
112 iter.YUVToARGBRow = I422ToARGBRow_NEON;
113 }
114 }
115 #endif
116 #if defined(HAS_I422TOARGBROW_DSPR2)
117 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(iter.src_width, 4) &&
118 IS_ALIGNED(iter.src_y, 4) && IS_ALIGNED(iter.src_stride_y, 4) &&
119 IS_ALIGNED(iter.src_u, 2) && IS_ALIGNED(iter.src_stride_u, 2) &&
120 IS_ALIGNED(iter.src_v, 2) && IS_ALIGNED(iter.src_stride_v, 2) {
121 // Always satisfy IS_ALIGNED(argb_cnv_row, 4) && IS_ALIGNED(argb_cnv_rowstride, 4)
122 iter.YUVToARGBRow = I422ToARGBRow_DSPR2;
123 }
124 #endif
125 }
126
127 void YUVBuferIter_InitI444(YUVBuferIter& iter) {
128 iter.YUVToARGBRow = I444ToARGBRow_C;
129 #if defined(HAS_I444TOARGBROW_SSSE3)
130 if (TestCpuFlag(kCpuHasSSSE3)) {
131 iter.YUVToARGBRow = I444ToARGBRow_Any_SSSE3;
132 if (IS_ALIGNED(iter.src_width, 8)) {
133 iter.YUVToARGBRow = I444ToARGBRow_SSSE3;
134 }
135 }
136 #endif
137 #if defined(HAS_I444TOARGBROW_AVX2)
138 if (TestCpuFlag(kCpuHasAVX2)) {
139 iter.YUVToARGBRow = I444ToARGBRow_Any_AVX2;
140 if (IS_ALIGNED(iter.src_width, 16)) {
141 iter.YUVToARGBRow = I444ToARGBRow_AVX2;
142 }
143 }
144 #endif
145 #if defined(HAS_I444TOARGBROW_NEON)
146 if (TestCpuFlag(kCpuHasNEON)) {
147 iter.YUVToARGBRow = I444ToARGBRow_Any_NEON;
148 if (IS_ALIGNED(iter.src_width, 8)) {
149 iter.YUVToARGBRow = I444ToARGBRow_NEON;
150 }
151 }
152 #endif
153 }
154
155
156 static void YUVBuferIter_MoveToForI444(YUVBuferIter& iter, int y_index) {
157 iter.y_index = y_index;
158 iter.src_row_y = iter.src_y + y_index * iter.src_stride_y;
159 iter.src_row_u = iter.src_u + y_index * iter.src_stride_u;
160 iter.src_row_v = iter.src_v + y_index * iter.src_stride_v;
161 }
162
163 static void YUVBuferIter_MoveToNextRowForI444(YUVBuferIter& iter) {
164 iter.src_row_y += iter.src_stride_y;
165 iter.src_row_u += iter.src_stride_u;
166 iter.src_row_v += iter.src_stride_v;
167 iter.y_index++;
168 }
169
170 static void YUVBuferIter_MoveToForI422(YUVBuferIter& iter, int y_index) {
171 iter.y_index = y_index;
172 iter.src_row_y = iter.src_y + y_index * iter.src_stride_y;
173 iter.src_row_u = iter.src_u + y_index * iter.src_stride_u;
174 iter.src_row_v = iter.src_v + y_index * iter.src_stride_v;
175 }
176
177 static void YUVBuferIter_MoveToNextRowForI422(YUVBuferIter& iter) {
178 iter.src_row_y += iter.src_stride_y;
179 iter.src_row_u += iter.src_stride_u;
180 iter.src_row_v += iter.src_stride_v;
181 iter.y_index++;
182 }
183
184 static void YUVBuferIter_MoveToForI420(YUVBuferIter& iter, int y_index) {
185 const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate.
186 int uv_y_index = y_index >> kYShift;
187
188 iter.y_index = y_index;
189 iter.src_row_y = iter.src_y + y_index * iter.src_stride_y;
190 iter.src_row_u = iter.src_u + uv_y_index * iter.src_stride_u;
191 iter.src_row_v = iter.src_v + uv_y_index * iter.src_stride_v;
192 }
193
194 static void YUVBuferIter_MoveToNextRowForI420(YUVBuferIter& iter) {
195 iter.src_row_y += iter.src_stride_y;
196 if (iter.y_index & 1) {
197 iter.src_row_u += iter.src_stride_u;
198 iter.src_row_v += iter.src_stride_v;
199 }
200 iter.y_index++;
201 }
202
203 static __inline void YUVBuferIter_ConvertToARGBRow(YUVBuferIter& iter, uint8* argb_row) {
204 iter.YUVToARGBRow(iter.src_row_y, iter.src_row_u, iter.src_row_v, argb_row, iter.yuvconstants, iter.src_width);
205 }
206
207 void YUVBuferIter_Init(YUVBuferIter& iter, uint32 src_fourcc, YUVColorSpace yuv_color_space) {
208 iter.src_fourcc = src_fourcc;
209 iter.y_index = 0;
210 iter.src_row_y = iter.src_y;
211 iter.src_row_u = iter.src_u;
212 iter.src_row_v = iter.src_v;
213 switch (yuv_color_space) {
214 case YUVColorSpace::BT2020:
215 iter.yuvconstants = &kYuv2020Constants;
216 break;
217 case YUVColorSpace::BT709:
218 iter.yuvconstants = &kYuvH709Constants;
219 break;
220 default:
221 iter.yuvconstants = &kYuvI601Constants;
222 }
223
224 if (src_fourcc == FOURCC_I444) {
225 YUVBuferIter_InitI444(iter);
226 iter.MoveTo = YUVBuferIter_MoveToForI444;
227 iter.MoveToNextRow = YUVBuferIter_MoveToNextRowForI444;
228 } else if(src_fourcc == FOURCC_I422){
229 YUVBuferIter_InitI422(iter);
230 iter.MoveTo = YUVBuferIter_MoveToForI422;
231 iter.MoveToNextRow = YUVBuferIter_MoveToNextRowForI422;
232 } else {
233 assert(src_fourcc == FOURCC_I420); // Should be FOURCC_I420
234 YUVBuferIter_InitI422(iter);
235 iter.MoveTo = YUVBuferIter_MoveToForI420;
236 iter.MoveToNextRow = YUVBuferIter_MoveToNextRowForI420;
237 }
238 }
239
240 // ScaleARGB ARGB, 1/2
241 // This is an optimized version for scaling down a ARGB to 1/2 of
242 // its original size.
243 static void ScaleYUVToARGBDown2(int src_width, int src_height,
244 int dst_width, int dst_height,
245 int src_stride_y,
246 int src_stride_u,
247 int src_stride_v,
248 int dst_stride_argb,
249 const uint8* src_y,
250 const uint8* src_u,
251 const uint8* src_v,
252 uint8* dst_argb,
253 int x, int dx, int y, int dy,
254 enum FilterMode filtering,
255 uint32 src_fourcc,
256 YUVColorSpace yuv_color_space) {
257 int j;
258
259 // Allocate 2 rows of ARGB for source conversion.
260 const int kRowSize = (src_width * 4 + 15) & ~15;
261 align_buffer_64(argb_cnv_row, kRowSize * 2);
262 uint8* argb_cnv_rowptr = argb_cnv_row;
263 int argb_cnv_rowstride = kRowSize;
264
265 YUVBuferIter iter;
266 iter.src_width = src_width;
267 iter.src_height = src_height;
268 iter.src_stride_y = src_stride_y;
269 iter.src_stride_u = src_stride_u;
270 iter.src_stride_v = src_stride_v;
271 iter.src_y = src_y;
272 iter.src_u = src_u;
273 iter.src_v = src_v;
274 YUVBuferIter_Init(iter, src_fourcc, yuv_color_space);
275
276 void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
277 uint8* dst_argb, int dst_width) =
278 filtering == kFilterNone ? ScaleARGBRowDown2_C :
279 (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C :
280 ScaleARGBRowDown2Box_C);
281 assert(dx == 65536 * 2); // Test scale factor of 2.
282 assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
283 // Advance to odd row, even column.
284 int yi = y >> 16;
285 iter.MoveTo(iter, yi);
286 ptrdiff_t x_offset;
287 if (filtering == kFilterBilinear) {
288 x_offset = (x >> 16) * 4;
289 } else {
290 x_offset = ((x >> 16) - 1) * 4;
291 }
292 #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
293 if (TestCpuFlag(kCpuHasSSE2)) {
294 ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_SSE2 :
295 (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_SSE2 :
296 ScaleARGBRowDown2Box_Any_SSE2);
297 if (IS_ALIGNED(dst_width, 4)) {
298 ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 :
299 (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 :
300 ScaleARGBRowDown2Box_SSE2);
301 }
302 }
303
304 #endif
305 #if defined(HAS_SCALEARGBROWDOWN2_NEON)
306 if (TestCpuFlag(kCpuHasNEON)) {
307 ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_NEON :
308 (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_NEON :
309 ScaleARGBRowDown2Box_Any_NEON);
310 if (IS_ALIGNED(dst_width, 8)) {
311 ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_NEON :
312 (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_NEON :
313 ScaleARGBRowDown2Box_NEON);
314 }
315 }
316 #endif
317
318 const int dyi = dy >> 16;
319 int lastyi = yi;
320 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
321 // Prepare next row if necessary
322 if (filtering != kFilterLinear) {
323 if ((yi + dyi) < (src_height - 1)) {
324 iter.MoveTo(iter, yi + dyi);
325 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride);
326 } else {
327 argb_cnv_rowstride = 0;
328 }
329 }
330
331 if (filtering == kFilterLinear) {
332 argb_cnv_rowstride = 0;
333 }
334 const int max_yi = src_height - 1;
335 const int max_yi_minus_dyi = max_yi - dyi;
336 for (j = 0; j < dst_height; ++j) {
337 if (yi != lastyi) {
338 if (yi > max_yi) {
339 yi = max_yi;
340 }
341 if (yi != lastyi) {
342 if (filtering == kFilterLinear) {
343 iter.MoveTo(iter, yi);
344 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
345 lastyi = yi;
346 } else {
347 // Prepare current row
348 if (yi == iter.y_index) {
349 argb_cnv_rowptr = argb_cnv_rowptr + argb_cnv_rowstride;
350 argb_cnv_rowstride = - argb_cnv_rowstride;
351 } else {
352 iter.MoveTo(iter, yi);
353 argb_cnv_rowptr = argb_cnv_row;
354 argb_cnv_rowstride = kRowSize;
355 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
356 }
357 // Prepare next row if necessary
358 if (iter.y_index < max_yi) {
359 int next_yi = yi < max_yi_minus_dyi ? yi + dyi : max_yi;
360 iter.MoveTo(iter, next_yi);
361 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride);
362 } else {
363 argb_cnv_rowstride = 0;
364 }
365 lastyi = yi;
366 }
367 }
368 }
369 ScaleARGBRowDown2(argb_cnv_rowptr + x_offset, argb_cnv_rowstride, dst_argb, dst_width);
370 dst_argb += dst_stride_argb;
371 yi += dyi;
372 }
373
374 free_aligned_buffer_64(argb_cnv_row);
375 }
376
377 // ScaleARGB ARGB Even
378 // This is an optimized version for scaling down a ARGB to even
379 // multiple of its original size.
380 static void ScaleYUVToARGBDownEven(int src_width, int src_height,
381 int dst_width, int dst_height,
382 int src_stride_y,
383 int src_stride_u,
384 int src_stride_v,
385 int dst_stride_argb,
386 const uint8* src_y,
387 const uint8* src_u,
388 const uint8* src_v,
389 uint8* dst_argb,
390 int x, int dx, int y, int dy,
391 enum FilterMode filtering,
392 uint32 src_fourcc,
393 YUVColorSpace yuv_color_space) {
394 int j;
395 // Allocate 2 rows of ARGB for source conversion.
396 const int kRowSize = (src_width * 4 + 15) & ~15;
397 align_buffer_64(argb_cnv_row, kRowSize * 2);
398 uint8* argb_cnv_rowptr = argb_cnv_row;
399 int argb_cnv_rowstride = kRowSize;
400
401 int col_step = dx >> 16;
402 void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride,
403 int src_step, uint8* dst_argb, int dst_width) =
404 filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
405 assert(IS_ALIGNED(src_width, 2));
406 assert(IS_ALIGNED(src_height, 2));
407 int yi = y >> 16;
408 const ptrdiff_t x_offset = (x >> 16) * 4;
409
410 #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
411 if (TestCpuFlag(kCpuHasSSE2)) {
412 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2 :
413 ScaleARGBRowDownEven_Any_SSE2;
414 if (IS_ALIGNED(dst_width, 4)) {
415 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 :
416 ScaleARGBRowDownEven_SSE2;
417 }
418 }
419 #endif
420 #if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
421 if (TestCpuFlag(kCpuHasNEON)) {
422 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_NEON :
423 ScaleARGBRowDownEven_Any_NEON;
424 if (IS_ALIGNED(dst_width, 4)) {
425 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON :
426 ScaleARGBRowDownEven_NEON;
427 }
428 }
429 #endif
430
431 YUVBuferIter iter;
432 iter.src_width = src_width;
433 iter.src_height = src_height;
434 iter.src_stride_y = src_stride_y;
435 iter.src_stride_u = src_stride_u;
436 iter.src_stride_v = src_stride_v;
437 iter.src_y = src_y;
438 iter.src_u = src_u;
439 iter.src_v = src_v;
440 YUVBuferIter_Init(iter, src_fourcc, yuv_color_space);
441
442 const int dyi = dy >> 16;
443 int lastyi = yi;
444 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
445 // Prepare next row if necessary
446 if (filtering != kFilterLinear) {
447 if ((yi + dyi) < (src_height - 1)) {
448 iter.MoveTo(iter, yi + dyi);
449 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride);
450 } else {
451 argb_cnv_rowstride = 0;
452 }
453 }
454
455 if (filtering == kFilterLinear) {
456 argb_cnv_rowstride = 0;
457 }
458 const int max_yi = src_height - 1;
459 const int max_yi_minus_dyi = max_yi - dyi;
460 for (j = 0; j < dst_height; ++j) {
461 if (yi != lastyi) {
462 if (yi > max_yi) {
463 yi = max_yi;
464 }
465 if (yi != lastyi) {
466 if (filtering == kFilterLinear) {
467 iter.MoveTo(iter, yi);
468 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
469 lastyi = yi;
470 } else {
471 // Prepare current row
472 if (yi == iter.y_index) {
473 argb_cnv_rowptr = argb_cnv_rowptr + argb_cnv_rowstride;
474 argb_cnv_rowstride = - argb_cnv_rowstride;
475 } else {
476 iter.MoveTo(iter, yi);
477 argb_cnv_rowptr = argb_cnv_row;
478 argb_cnv_rowstride = kRowSize;
479 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
480 }
481 // Prepare next row if necessary
482 if (iter.y_index < max_yi) {
483 int next_yi = yi < max_yi_minus_dyi ? yi + dyi : max_yi;
484 iter.MoveTo(iter, next_yi);
485 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride);
486 } else {
487 argb_cnv_rowstride = 0;
488 }
489 lastyi = yi;
490 }
491 }
492 }
493 ScaleARGBRowDownEven(argb_cnv_rowptr + x_offset, argb_cnv_rowstride, col_step, dst_argb, dst_width);
494 dst_argb += dst_stride_argb;
495 yi += dyi;
496 }
497 free_aligned_buffer_64(argb_cnv_row);
498 }
499
500 // Scale YUV to ARGB down with bilinear interpolation.
501 static void ScaleYUVToARGBBilinearDown(int src_width, int src_height,
502 int dst_width, int dst_height,
503 int src_stride_y,
504 int src_stride_u,
505 int src_stride_v,
506 int dst_stride_argb,
507 const uint8* src_y,
508 const uint8* src_u,
509 const uint8* src_v,
510 uint8* dst_argb,
511 int x, int dx, int y, int dy,
512 enum FilterMode filtering,
513 uint32 src_fourcc,
514 YUVColorSpace yuv_color_space) {
515 int j;
516 void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
517 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
518 InterpolateRow_C;
519 void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
520 int dst_width, int x, int dx) =
521 (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
522 int64 xlast = x + (int64)(dst_width - 1) * dx;
523 int64 xl = (dx >= 0) ? x : xlast;
524 int64 xr = (dx >= 0) ? xlast : x;
525 int clip_src_width;
526 xl = (xl >> 16) & ~3; // Left edge aligned.
527 xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels.
528 xr = (xr + 1 + 3) & ~3; // 1 beyond 4 pixel aligned right most pixel.
529 if (xr > src_width) {
530 xr = src_width;
531 }
532 clip_src_width = (int)(xr - xl) * 4; // Width aligned to 4.
533 const ptrdiff_t xl_offset = xl * 4;
534 x -= (int)(xl << 16);
535
536 // Allocate 2 row of ARGB for source conversion.
537 const int kRowSize = (src_width * 4 + 15) & ~15;
538 align_buffer_64(argb_cnv_row, kRowSize * 2);
539 uint8* argb_cnv_rowptr = argb_cnv_row;
540 int argb_cnv_rowstride = kRowSize;
541
542 #if defined(HAS_INTERPOLATEROW_SSSE3)
543 if (TestCpuFlag(kCpuHasSSSE3)) {
544 InterpolateRow = InterpolateRow_Any_SSSE3;
545 if (IS_ALIGNED(clip_src_width, 16)) {
546 InterpolateRow = InterpolateRow_SSSE3;
547 }
548 }
549 #endif
550 #if defined(HAS_INTERPOLATEROW_AVX2)
551 if (TestCpuFlag(kCpuHasAVX2)) {
552 InterpolateRow = InterpolateRow_Any_AVX2;
553 if (IS_ALIGNED(clip_src_width, 32)) {
554 InterpolateRow = InterpolateRow_AVX2;
555 }
556 }
557 #endif
558 #if defined(HAS_INTERPOLATEROW_NEON)
559 if (TestCpuFlag(kCpuHasNEON)) {
560 InterpolateRow = InterpolateRow_Any_NEON;
561 if (IS_ALIGNED(clip_src_width, 16)) {
562 InterpolateRow = InterpolateRow_NEON;
563 }
564 }
565 #endif
566 #if defined(HAS_INTERPOLATEROW_DSPR2)
567 if (TestCpuFlag(kCpuHasDSPR2) &&
568 IS_ALIGNED(src_argb, 4) && IS_ALIGNED(argb_cnv_rowstride, 4)) {
569 InterpolateRow = InterpolateRow_Any_DSPR2;
570 if (IS_ALIGNED(clip_src_width, 4)) {
571 InterpolateRow = InterpolateRow_DSPR2;
572 }
573 }
574 #endif
575 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
576 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
577 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
578 }
579 #endif
580 #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
581 if (TestCpuFlag(kCpuHasNEON)) {
582 ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
583 if (IS_ALIGNED(dst_width, 4)) {
584 ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
585 }
586 }
587 #endif
588
589 int yi = y >> 16;
590
591 YUVBuferIter iter;
592 iter.src_width = src_width;
593 iter.src_height = src_height;
594 iter.src_stride_y = src_stride_y;
595 iter.src_stride_u = src_stride_u;
596 iter.src_stride_v = src_stride_v;
597 iter.src_y = src_y;
598 iter.src_u = src_u;
599 iter.src_v = src_v;
600 YUVBuferIter_Init(iter, src_fourcc, yuv_color_space);
601 iter.MoveTo(iter, yi);
602
603 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
604 // Allocate a row of ARGB.
605 align_buffer_64(row, clip_src_width * 4);
606
607 int lastyi = yi;
608 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
609 // Prepare next row if necessary
610 if (filtering != kFilterLinear) {
611 if ((yi + 1) < src_height) {
612 iter.MoveToNextRow(iter);
613 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride);
614 } else {
615 argb_cnv_rowstride = 0;
616 }
617 }
618
619 const int max_y = (src_height - 1) << 16;
620 const int max_yi = src_height - 1;
621 for (j = 0; j < dst_height; ++j) {
622 yi = y >> 16;
623 if (yi != lastyi) {
624 if (y > max_y) {
625 y = max_y;
626 yi = y >> 16;
627 }
628 if (yi != lastyi) {
629 if (filtering == kFilterLinear) {
630 iter.MoveTo(iter, yi);
631 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
632 lastyi = yi;
633 } else {
634 // Prepare current row
635 if (yi == iter.y_index) {
636 argb_cnv_rowptr = argb_cnv_rowptr + argb_cnv_rowstride;
637 argb_cnv_rowstride = - argb_cnv_rowstride;
638 } else {
639 iter.MoveTo(iter, yi);
640 argb_cnv_rowptr = argb_cnv_row;
641 argb_cnv_rowstride = kRowSize;
642 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
643 }
644 // Prepare next row if necessary
645 if (iter.y_index < max_yi) {
646 iter.MoveToNextRow(iter);
647 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride);
648 } else {
649 argb_cnv_rowstride = 0;
650 }
651 lastyi = yi;
652 }
653 }
654 }
655 if (filtering == kFilterLinear) {
656 ScaleARGBFilterCols(dst_argb, argb_cnv_rowptr + xl_offset, dst_width, x, dx);
657 } else {
658 int yf = (y >> 8) & 255;
659 InterpolateRow(row, argb_cnv_rowptr + xl_offset, argb_cnv_rowstride, clip_src_width, yf);
660 ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
661 }
662 dst_argb += dst_stride_argb;
663 y += dy;
664 }
665 free_aligned_buffer_64(row);
666 free_aligned_buffer_64(argb_cnv_row);
667 }
668
669 // Scale YUV to ARGB up with bilinear interpolation.
670 static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
671 int dst_width, int dst_height,
672 int src_stride_y,
673 int src_stride_u,
674 int src_stride_v,
675 int dst_stride_argb,
676 const uint8* src_y,
677 const uint8* src_u,
678 const uint8* src_v,
679 uint8* dst_argb,
680 int x, int dx, int y, int dy,
681 enum FilterMode filtering,
682 uint32 src_fourcc,
683 YUVColorSpace yuv_color_space) {
684 int j;
685 void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
686 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
687 InterpolateRow_C;
688 void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
689 int dst_width, int x, int dx) =
690 filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
691 const int max_y = (src_height - 1) << 16;
692
693 // Allocate 1 row of ARGB for source conversion.
694 align_buffer_64(argb_cnv_row, src_width * 4);
695
696 #if defined(HAS_INTERPOLATEROW_SSSE3)
697 if (TestCpuFlag(kCpuHasSSSE3)) {
698 InterpolateRow = InterpolateRow_Any_SSSE3;
699 if (IS_ALIGNED(dst_width, 4)) {
700 InterpolateRow = InterpolateRow_SSSE3;
701 }
702 }
703 #endif
704 #if defined(HAS_INTERPOLATEROW_AVX2)
705 if (TestCpuFlag(kCpuHasAVX2)) {
706 InterpolateRow = InterpolateRow_Any_AVX2;
707 if (IS_ALIGNED(dst_width, 8)) {
708 InterpolateRow = InterpolateRow_AVX2;
709 }
710 }
711 #endif
712 #if defined(HAS_INTERPOLATEROW_NEON)
713 if (TestCpuFlag(kCpuHasNEON)) {
714 InterpolateRow = InterpolateRow_Any_NEON;
715 if (IS_ALIGNED(dst_width, 4)) {
716 InterpolateRow = InterpolateRow_NEON;
717 }
718 }
719 #endif
720 #if defined(HAS_INTERPOLATEROW_DSPR2)
721 if (TestCpuFlag(kCpuHasDSPR2) &&
722 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
723 InterpolateRow = InterpolateRow_DSPR2;
724 }
725 #endif
726 if (src_width >= 32768) {
727 ScaleARGBFilterCols = filtering ?
728 ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
729 }
730 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
731 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
732 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
733 }
734 #endif
735 #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
736 if (filtering && TestCpuFlag(kCpuHasNEON)) {
737 ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
738 if (IS_ALIGNED(dst_width, 4)) {
739 ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
740 }
741 }
742 #endif
743 #if defined(HAS_SCALEARGBCOLS_SSE2)
744 if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
745 ScaleARGBFilterCols = ScaleARGBCols_SSE2;
746 }
747 #endif
748 #if defined(HAS_SCALEARGBCOLS_NEON)
749 if (!filtering && TestCpuFlag(kCpuHasNEON)) {
750 ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
751 if (IS_ALIGNED(dst_width, 8)) {
752 ScaleARGBFilterCols = ScaleARGBCols_NEON;
753 }
754 }
755 #endif
756 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
757 ScaleARGBFilterCols = ScaleARGBColsUp2_C;
758 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
759 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
760 ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
761 }
762 #endif
763 }
764
765 if (y > max_y) {
766 y = max_y;
767 }
768
769 int yi = y >> 16;
770
771 YUVBuferIter iter;
772 iter.src_width = src_width;
773 iter.src_height = src_height;
774 iter.src_stride_y = src_stride_y;
775 iter.src_stride_u = src_stride_u;
776 iter.src_stride_v = src_stride_v;
777 iter.src_y = src_y;
778 iter.src_u = src_u;
779 iter.src_v = src_v;
780 YUVBuferIter_Init(iter, src_fourcc, yuv_color_space);
781 iter.MoveTo(iter, yi);
782
783 // Allocate 2 rows of ARGB.
784 const int kRowSize = (dst_width * 4 + 15) & ~15;
785 align_buffer_64(row, kRowSize * 2);
786
787 uint8* rowptr = row;
788 int rowstride = kRowSize;
789 int lastyi = yi;
790
791 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row);
792 ScaleARGBFilterCols(rowptr, argb_cnv_row, dst_width, x, dx);
793
794 if (filtering == kFilterLinear) {
795 rowstride = 0;
796 }
797 // Prepare next row if necessary
798 if (filtering != kFilterLinear) {
799 if ((yi + 1) < src_height) {
800 iter.MoveToNextRow(iter);
801 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row);
802 ScaleARGBFilterCols(rowptr + rowstride, argb_cnv_row, dst_width, x, dx);
803 }else {
804 rowstride = 0;
805 }
806 }
807
808 const int max_yi = src_height - 1;
809 for (j = 0; j < dst_height; ++j) {
810 yi = y >> 16;
811 if (yi != lastyi) {
812 if (y > max_y) {
813 y = max_y;
814 yi = y >> 16;
815 }
816 if (yi != lastyi) {
817 if (filtering == kFilterLinear) {
818 iter.MoveToNextRow(iter);
819 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row);
820 ScaleARGBFilterCols(rowptr, argb_cnv_row, dst_width, x, dx);
821 } else {
822 // Prepare next row if necessary
823 if (yi < max_yi) {
824 iter.MoveToNextRow(iter);
825 rowptr += rowstride;
826 rowstride = -rowstride;
827 // TODO(fbarchard): Convert the clipped region of row.
828 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row);
829 ScaleARGBFilterCols(rowptr + rowstride, argb_cnv_row, dst_width, x, dx);
830 } else {
831 rowstride = 0;
832 }
833 }
834 lastyi = yi;
835 }
836 }
837 if (filtering == kFilterLinear) {
838 InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
839 } else {
840 int yf = (y >> 8) & 255;
841 InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
842 }
843 dst_argb += dst_stride_argb;
844 y += dy;
845 }
846 free_aligned_buffer_64(row);
847 free_aligned_buffer_64(argb_cnv_row);
848 }
849
850 // Scale ARGB to/from any dimensions, without interpolation.
851 // Fixed point math is used for performance: The upper 16 bits
852 // of x and dx is the integer part of the source position and
853 // the lower 16 bits are the fixed decimal part.
854
855 static void ScaleYUVToARGBSimple(int src_width, int src_height,
856 int dst_width, int dst_height,
857 int src_stride_y,
858 int src_stride_u,
859 int src_stride_v,
860 int dst_stride_argb,
861 const uint8* src_y,
862 const uint8* src_u,
863 const uint8* src_v,
864 uint8* dst_argb,
865 int x, int dx, int y, int dy,
866 uint32 src_fourcc,
867 YUVColorSpace yuv_color_space) {
868 int j;
869 void (*ScaleARGBCols)(uint8* dst_argb, const uint8* src_argb,
870 int dst_width, int x, int dx) =
871 (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
872
873 // Allocate 1 row of ARGB for source conversion.
874 align_buffer_64(argb_cnv_row, src_width * 4);
875
876 #if defined(HAS_SCALEARGBCOLS_SSE2)
877 if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
878 ScaleARGBCols = ScaleARGBCols_SSE2;
879 }
880 #endif
881 #if defined(HAS_SCALEARGBCOLS_NEON)
882 if (TestCpuFlag(kCpuHasNEON)) {
883 ScaleARGBCols = ScaleARGBCols_Any_NEON;
884 if (IS_ALIGNED(dst_width, 8)) {
885 ScaleARGBCols = ScaleARGBCols_NEON;
886 }
887 }
888 #endif
889 if (src_width * 2 == dst_width && x < 0x8000) {
890 ScaleARGBCols = ScaleARGBColsUp2_C;
891 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
892 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
893 ScaleARGBCols = ScaleARGBColsUp2_SSE2;
894 }
895 #endif
896 }
897
898 int yi = y >> 16;
899
900 YUVBuferIter iter;
901 iter.src_width = src_width;
902 iter.src_height = src_height;
903 iter.src_stride_y = src_stride_y;
904 iter.src_stride_u = src_stride_u;
905 iter.src_stride_v = src_stride_v;
906 iter.src_y = src_y;
907 iter.src_u = src_u;
908 iter.src_v = src_v;
909 YUVBuferIter_Init(iter, src_fourcc, yuv_color_space);
910 iter.MoveTo(iter, yi);
911
912 int lasty = yi;
913 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row);
914
915 for (j = 0; j < dst_height; ++j) {
916 yi = y >> 16;
917 if (yi != lasty) {
918 iter.MoveTo(iter, yi);
919 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row);
920 lasty = yi;
921 }
922 ScaleARGBCols(dst_argb, argb_cnv_row, dst_width, x, dx);
923 dst_argb += dst_stride_argb;
924 y += dy;
925 }
926 free_aligned_buffer_64(argb_cnv_row);
927 }
928
929 static void YUVToARGBCopy(const uint8* src_y, int src_stride_y,
930 const uint8* src_u, int src_stride_u,
931 const uint8* src_v, int src_stride_v,
932 int src_width, int src_height,
933 uint8* dst_argb, int dst_stride_argb,
934 int dst_width, int dst_height,
935 uint32 src_fourcc,
936 YUVColorSpace yuv_color_space)
937 {
938 YUVBuferIter iter;
939 iter.src_width = src_width;
940 iter.src_height = src_height;
941 iter.src_stride_y = src_stride_y;
942 iter.src_stride_u = src_stride_u;
943 iter.src_stride_v = src_stride_v;
944 iter.src_y = src_y;
945 iter.src_u = src_u;
946 iter.src_v = src_v;
947 YUVBuferIter_Init(iter, src_fourcc, yuv_color_space);
948
949 for (int j = 0; j < dst_height; ++j) {
950 YUVBuferIter_ConvertToARGBRow(iter, dst_argb);
951 iter.MoveToNextRow(iter);
952 dst_argb += dst_stride_argb;
953 }
954 }
955
956 static void ScaleYUVToARGB(const uint8* src_y, int src_stride_y,
957 const uint8* src_u, int src_stride_u,
958 const uint8* src_v, int src_stride_v,
959 int src_width, int src_height,
960 uint8* dst_argb, int dst_stride_argb,
961 int dst_width, int dst_height,
962 enum FilterMode filtering,
963 uint32 src_fourcc,
964 YUVColorSpace yuv_color_space)
965 {
966 // Initial source x/y coordinate and step values as 16.16 fixed point.
967 int x = 0;
968 int y = 0;
969 int dx = 0;
970 int dy = 0;
971 // ARGB does not support box filter yet, but allow the user to pass it.
972 // Simplify filtering when possible.
973 filtering = ScaleFilterReduce(src_width, src_height,
974 dst_width, dst_height,
975 filtering);
976 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
977 &x, &y, &dx, &dy);
978
979 // Special case for integer step values.
980 if (((dx | dy) & 0xffff) == 0) {
981 if (!dx || !dy) { // 1 pixel wide and/or tall.
982 filtering = kFilterNone;
983 } else {
984 // Optimized even scale down. ie 2, 4, 6, 8, 10x.
985 if (!(dx & 0x10000) && !(dy & 0x10000)) {
986 if (dx == 0x20000) {
987 // Optimized 1/2 downsample.
988 ScaleYUVToARGBDown2(src_width, src_height,
989 dst_width, dst_height,
990 src_stride_y,
991 src_stride_u,
992 src_stride_v,
993 dst_stride_argb,
994 src_y,
995 src_u,
996 src_v,
997 dst_argb,
998 x, dx, y, dy,
999 filtering,
1000 src_fourcc,
1001 yuv_color_space);
1002 return;
1003 }
1004 ScaleYUVToARGBDownEven(src_width, src_height,
1005 dst_width, dst_height,
1006 src_stride_y,
1007 src_stride_u,
1008 src_stride_v,
1009 dst_stride_argb,
1010 src_y,
1011 src_u,
1012 src_v,
1013 dst_argb,
1014 x, dx, y, dy,
1015 filtering,
1016 src_fourcc,
1017 yuv_color_space);
1018 return;
1019 }
1020 // Optimized odd scale down. ie 3, 5, 7, 9x.
1021 if ((dx & 0x10000) && (dy & 0x10000)) {
1022 filtering = kFilterNone;
1023 if (dx == 0x10000 && dy == 0x10000) {
1024 // Straight conversion and copy.
1025 YUVToARGBCopy(src_y, src_stride_y,
1026 src_u, src_stride_u,
1027 src_v, src_stride_v,
1028 src_width, src_height,
1029 dst_argb, dst_stride_argb,
1030 dst_width, dst_height,
1031 src_fourcc,
1032 yuv_color_space);
1033 return;
1034 }
1035 }
1036 }
1037 }
1038 if (filtering && dy < 65536) {
1039 ScaleYUVToARGBBilinearUp(src_width, src_height,
1040 dst_width, dst_height,
1041 src_stride_y,
1042 src_stride_u,
1043 src_stride_v,
1044 dst_stride_argb,
1045 src_y,
1046 src_u,
1047 src_v,
1048 dst_argb,
1049 x, dx, y, dy,
1050 filtering,
1051 src_fourcc,
1052 yuv_color_space);
1053 return;
1054 }
1055 if (filtering) {
1056 ScaleYUVToARGBBilinearDown(src_width, src_height,
1057 dst_width, dst_height,
1058 src_stride_y,
1059 src_stride_u,
1060 src_stride_v,
1061 dst_stride_argb,
1062 src_y,
1063 src_u,
1064 src_v,
1065 dst_argb,
1066 x, dx, y, dy,
1067 filtering,
1068 src_fourcc,
1069 yuv_color_space);
1070 return;
1071 }
1072 ScaleYUVToARGBSimple(src_width, src_height,
1073 dst_width, dst_height,
1074 src_stride_y,
1075 src_stride_u,
1076 src_stride_v,
1077 dst_stride_argb,
1078 src_y,
1079 src_u,
1080 src_v,
1081 dst_argb,
1082 x, dx, y, dy,
1083 src_fourcc,
1084 yuv_color_space);
1085 }
1086
1087 bool IsConvertSupported(uint32 src_fourcc)
1088 {
1089 if (src_fourcc == FOURCC_I444 ||
1090 src_fourcc == FOURCC_I422 ||
1091 src_fourcc == FOURCC_I420) {
1092 return true;
1093 }
1094 return false;
1095 }
1096
1097 LIBYUV_API
1098 int YUVToARGBScale(const uint8* src_y, int src_stride_y,
1099 const uint8* src_u, int src_stride_u,
1100 const uint8* src_v, int src_stride_v,
1101 uint32 src_fourcc,
1102 YUVColorSpace yuv_color_space,
1103 int src_width, int src_height,
1104 uint8* dst_argb, int dst_stride_argb,
1105 int dst_width, int dst_height,
1106 enum FilterMode filtering)
1107 {
1108 if (!src_y || !src_u || !src_v ||
1109 src_width == 0 || src_height == 0 ||
1110 !dst_argb || dst_width <= 0 || dst_height <= 0) {
1111 return -1;
1112 }
1113 if (!IsConvertSupported(src_fourcc)) {
1114 return -1;
1115 }
1116 ScaleYUVToARGB(src_y, src_stride_y,
1117 src_u, src_stride_u,
1118 src_v, src_stride_v,
1119 src_width, src_height,
1120 dst_argb, dst_stride_argb,
1121 dst_width, dst_height,
1122 filtering,
1123 src_fourcc,
1124 yuv_color_space);
1125 return 0;
1126 }
1127
1128 #ifdef __cplusplus
1129 } // extern "C"
1130 } // namespace libyuv
1131 #endif
1132