1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/scale.h"
12
13 #include <assert.h>
14 #include <string.h>
15
16 #include "libyuv/cpu_id.h"
17 #include "libyuv/planar_functions.h" // For CopyARGB
18 #include "libyuv/row.h"
19 #include "libyuv/scale_row.h"
20
21 #ifdef __cplusplus
22 namespace libyuv {
23 extern "C" {
24 #endif
25
Abs(int v)26 static __inline int Abs(int v) {
27 return v >= 0 ? v : -v;
28 }
29
30 // ScaleARGB ARGB, 1/2
31 // This is an optimized version for scaling down a ARGB to 1/2 of
32 // its original size.
ScaleARGBDown2(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_argb,uint8_t * dst_argb,int x,int dx,int y,int dy,enum FilterMode filtering)33 static void ScaleARGBDown2(int src_width,
34 int src_height,
35 int dst_width,
36 int dst_height,
37 int src_stride,
38 int dst_stride,
39 const uint8_t* src_argb,
40 uint8_t* dst_argb,
41 int x,
42 int dx,
43 int y,
44 int dy,
45 enum FilterMode filtering) {
46 int j;
47 int row_stride = src_stride * (dy >> 16);
48 void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride,
49 uint8_t* dst_argb, int dst_width) =
50 filtering == kFilterNone
51 ? ScaleARGBRowDown2_C
52 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C
53 : ScaleARGBRowDown2Box_C);
54 (void)src_width;
55 (void)src_height;
56 (void)dx;
57 assert(dx == 65536 * 2); // Test scale factor of 2.
58 assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
59 // Advance to odd row, even column.
60 if (filtering == kFilterBilinear) {
61 src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
62 } else {
63 src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
64 }
65
66 #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
67 if (TestCpuFlag(kCpuHasSSE2)) {
68 ScaleARGBRowDown2 =
69 filtering == kFilterNone
70 ? ScaleARGBRowDown2_Any_SSE2
71 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_SSE2
72 : ScaleARGBRowDown2Box_Any_SSE2);
73 if (IS_ALIGNED(dst_width, 4)) {
74 ScaleARGBRowDown2 =
75 filtering == kFilterNone
76 ? ScaleARGBRowDown2_SSE2
77 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2
78 : ScaleARGBRowDown2Box_SSE2);
79 }
80 }
81 #endif
82 #if defined(HAS_SCALEARGBROWDOWN2_NEON)
83 if (TestCpuFlag(kCpuHasNEON)) {
84 ScaleARGBRowDown2 =
85 filtering == kFilterNone
86 ? ScaleARGBRowDown2_Any_NEON
87 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_NEON
88 : ScaleARGBRowDown2Box_Any_NEON);
89 if (IS_ALIGNED(dst_width, 8)) {
90 ScaleARGBRowDown2 =
91 filtering == kFilterNone
92 ? ScaleARGBRowDown2_NEON
93 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_NEON
94 : ScaleARGBRowDown2Box_NEON);
95 }
96 }
97 #endif
98 #if defined(HAS_SCALEARGBROWDOWN2_MSA)
99 if (TestCpuFlag(kCpuHasMSA)) {
100 ScaleARGBRowDown2 =
101 filtering == kFilterNone
102 ? ScaleARGBRowDown2_Any_MSA
103 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_MSA
104 : ScaleARGBRowDown2Box_Any_MSA);
105 if (IS_ALIGNED(dst_width, 4)) {
106 ScaleARGBRowDown2 =
107 filtering == kFilterNone
108 ? ScaleARGBRowDown2_MSA
109 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_MSA
110 : ScaleARGBRowDown2Box_MSA);
111 }
112 }
113 #endif
114 #if defined(HAS_SCALEARGBROWDOWN2_MMI)
115 if (TestCpuFlag(kCpuHasMMI)) {
116 ScaleARGBRowDown2 =
117 filtering == kFilterNone
118 ? ScaleARGBRowDown2_Any_MMI
119 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_MMI
120 : ScaleARGBRowDown2Box_Any_MMI);
121 if (IS_ALIGNED(dst_width, 2)) {
122 ScaleARGBRowDown2 =
123 filtering == kFilterNone
124 ? ScaleARGBRowDown2_MMI
125 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_MMI
126 : ScaleARGBRowDown2Box_MMI);
127 }
128 }
129 #endif
130
131 if (filtering == kFilterLinear) {
132 src_stride = 0;
133 }
134 for (j = 0; j < dst_height; ++j) {
135 ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width);
136 src_argb += row_stride;
137 dst_argb += dst_stride;
138 }
139 }
140
141 // ScaleARGB ARGB, 1/4
142 // This is an optimized version for scaling down a ARGB to 1/4 of
143 // its original size.
ScaleARGBDown4Box(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_argb,uint8_t * dst_argb,int x,int dx,int y,int dy)144 static void ScaleARGBDown4Box(int src_width,
145 int src_height,
146 int dst_width,
147 int dst_height,
148 int src_stride,
149 int dst_stride,
150 const uint8_t* src_argb,
151 uint8_t* dst_argb,
152 int x,
153 int dx,
154 int y,
155 int dy) {
156 int j;
157 // Allocate 2 rows of ARGB.
158 const int kRowSize = (dst_width * 2 * 4 + 31) & ~31;
159 align_buffer_64(row, kRowSize * 2);
160 int row_stride = src_stride * (dy >> 16);
161 void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride,
162 uint8_t* dst_argb, int dst_width) =
163 ScaleARGBRowDown2Box_C;
164 // Advance to odd row, even column.
165 src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
166 (void)src_width;
167 (void)src_height;
168 (void)dx;
169 assert(dx == 65536 * 4); // Test scale factor of 4.
170 assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4.
171 #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
172 if (TestCpuFlag(kCpuHasSSE2)) {
173 ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_SSE2;
174 if (IS_ALIGNED(dst_width, 4)) {
175 ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
176 }
177 }
178 #endif
179 #if defined(HAS_SCALEARGBROWDOWN2_NEON)
180 if (TestCpuFlag(kCpuHasNEON)) {
181 ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_NEON;
182 if (IS_ALIGNED(dst_width, 8)) {
183 ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
184 }
185 }
186 #endif
187
188 for (j = 0; j < dst_height; ++j) {
189 ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
190 ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, row + kRowSize,
191 dst_width * 2);
192 ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
193 src_argb += row_stride;
194 dst_argb += dst_stride;
195 }
196 free_aligned_buffer_64(row);
197 }
198
199 // ScaleARGB ARGB Even
200 // This is an optimized version for scaling down a ARGB to even
201 // multiple of its original size.
ScaleARGBDownEven(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_argb,uint8_t * dst_argb,int x,int dx,int y,int dy,enum FilterMode filtering)202 static void ScaleARGBDownEven(int src_width,
203 int src_height,
204 int dst_width,
205 int dst_height,
206 int src_stride,
207 int dst_stride,
208 const uint8_t* src_argb,
209 uint8_t* dst_argb,
210 int x,
211 int dx,
212 int y,
213 int dy,
214 enum FilterMode filtering) {
215 int j;
216 int col_step = dx >> 16;
217 int row_stride = (dy >> 16) * src_stride;
218 void (*ScaleARGBRowDownEven)(const uint8_t* src_argb, ptrdiff_t src_stride,
219 int src_step, uint8_t* dst_argb, int dst_width) =
220 filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
221 (void)src_width;
222 (void)src_height;
223 assert(IS_ALIGNED(src_width, 2));
224 assert(IS_ALIGNED(src_height, 2));
225 src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
226 #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
227 if (TestCpuFlag(kCpuHasSSE2)) {
228 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2
229 : ScaleARGBRowDownEven_Any_SSE2;
230 if (IS_ALIGNED(dst_width, 4)) {
231 ScaleARGBRowDownEven =
232 filtering ? ScaleARGBRowDownEvenBox_SSE2 : ScaleARGBRowDownEven_SSE2;
233 }
234 }
235 #endif
236 #if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
237 if (TestCpuFlag(kCpuHasNEON)) {
238 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_NEON
239 : ScaleARGBRowDownEven_Any_NEON;
240 if (IS_ALIGNED(dst_width, 4)) {
241 ScaleARGBRowDownEven =
242 filtering ? ScaleARGBRowDownEvenBox_NEON : ScaleARGBRowDownEven_NEON;
243 }
244 }
245 #endif
246 #if defined(HAS_SCALEARGBROWDOWNEVEN_MSA)
247 if (TestCpuFlag(kCpuHasMSA)) {
248 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_MSA
249 : ScaleARGBRowDownEven_Any_MSA;
250 if (IS_ALIGNED(dst_width, 4)) {
251 ScaleARGBRowDownEven =
252 filtering ? ScaleARGBRowDownEvenBox_MSA : ScaleARGBRowDownEven_MSA;
253 }
254 }
255 #endif
256 #if defined(HAS_SCALEARGBROWDOWNEVEN_MMI)
257 if (TestCpuFlag(kCpuHasMMI)) {
258 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_MMI
259 : ScaleARGBRowDownEven_Any_MMI;
260 if (IS_ALIGNED(dst_width, 2)) {
261 ScaleARGBRowDownEven =
262 filtering ? ScaleARGBRowDownEvenBox_MMI : ScaleARGBRowDownEven_MMI;
263 }
264 }
265 #endif
266
267 if (filtering == kFilterLinear) {
268 src_stride = 0;
269 }
270 for (j = 0; j < dst_height; ++j) {
271 ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width);
272 src_argb += row_stride;
273 dst_argb += dst_stride;
274 }
275 }
276
277 // Scale ARGB down with bilinear interpolation.
ScaleARGBBilinearDown(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_argb,uint8_t * dst_argb,int x,int dx,int y,int dy,enum FilterMode filtering)278 static void ScaleARGBBilinearDown(int src_width,
279 int src_height,
280 int dst_width,
281 int dst_height,
282 int src_stride,
283 int dst_stride,
284 const uint8_t* src_argb,
285 uint8_t* dst_argb,
286 int x,
287 int dx,
288 int y,
289 int dy,
290 enum FilterMode filtering) {
291 int j;
292 void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
293 ptrdiff_t src_stride, int dst_width,
294 int source_y_fraction) = InterpolateRow_C;
295 void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
296 int dst_width, int x, int dx) =
297 (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
298 int64_t xlast = x + (int64_t)(dst_width - 1) * dx;
299 int64_t xl = (dx >= 0) ? x : xlast;
300 int64_t xr = (dx >= 0) ? xlast : x;
301 int clip_src_width;
302 xl = (xl >> 16) & ~3; // Left edge aligned.
303 xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels.
304 xr = (xr + 1 + 3) & ~3; // 1 beyond 4 pixel aligned right most pixel.
305 if (xr > src_width) {
306 xr = src_width;
307 }
308 clip_src_width = (int)(xr - xl) * 4; // Width aligned to 4.
309 src_argb += xl * 4;
310 x -= (int)(xl << 16);
311 #if defined(HAS_INTERPOLATEROW_SSSE3)
312 if (TestCpuFlag(kCpuHasSSSE3)) {
313 InterpolateRow = InterpolateRow_Any_SSSE3;
314 if (IS_ALIGNED(clip_src_width, 16)) {
315 InterpolateRow = InterpolateRow_SSSE3;
316 }
317 }
318 #endif
319 #if defined(HAS_INTERPOLATEROW_AVX2)
320 if (TestCpuFlag(kCpuHasAVX2)) {
321 InterpolateRow = InterpolateRow_Any_AVX2;
322 if (IS_ALIGNED(clip_src_width, 32)) {
323 InterpolateRow = InterpolateRow_AVX2;
324 }
325 }
326 #endif
327 #if defined(HAS_INTERPOLATEROW_NEON)
328 if (TestCpuFlag(kCpuHasNEON)) {
329 InterpolateRow = InterpolateRow_Any_NEON;
330 if (IS_ALIGNED(clip_src_width, 16)) {
331 InterpolateRow = InterpolateRow_NEON;
332 }
333 }
334 #endif
335 #if defined(HAS_INTERPOLATEROW_MSA)
336 if (TestCpuFlag(kCpuHasMSA)) {
337 InterpolateRow = InterpolateRow_Any_MSA;
338 if (IS_ALIGNED(clip_src_width, 32)) {
339 InterpolateRow = InterpolateRow_MSA;
340 }
341 }
342 #endif
343 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
344 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
345 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
346 }
347 #endif
348 #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
349 if (TestCpuFlag(kCpuHasNEON)) {
350 ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
351 if (IS_ALIGNED(dst_width, 4)) {
352 ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
353 }
354 }
355 #endif
356 #if defined(HAS_SCALEARGBFILTERCOLS_MSA)
357 if (TestCpuFlag(kCpuHasMSA)) {
358 ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA;
359 if (IS_ALIGNED(dst_width, 8)) {
360 ScaleARGBFilterCols = ScaleARGBFilterCols_MSA;
361 }
362 }
363 #endif
364 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
365 // Allocate a row of ARGB.
366 {
367 align_buffer_64(row, clip_src_width * 4);
368
369 const int max_y = (src_height - 1) << 16;
370 if (y > max_y) {
371 y = max_y;
372 }
373 for (j = 0; j < dst_height; ++j) {
374 int yi = y >> 16;
375 const uint8_t* src = src_argb + yi * src_stride;
376 if (filtering == kFilterLinear) {
377 ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
378 } else {
379 int yf = (y >> 8) & 255;
380 InterpolateRow(row, src, src_stride, clip_src_width, yf);
381 ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
382 }
383 dst_argb += dst_stride;
384 y += dy;
385 if (y > max_y) {
386 y = max_y;
387 }
388 }
389 free_aligned_buffer_64(row);
390 }
391 }
392
393 // Scale ARGB up with bilinear interpolation.
ScaleARGBBilinearUp(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_argb,uint8_t * dst_argb,int x,int dx,int y,int dy,enum FilterMode filtering)394 static void ScaleARGBBilinearUp(int src_width,
395 int src_height,
396 int dst_width,
397 int dst_height,
398 int src_stride,
399 int dst_stride,
400 const uint8_t* src_argb,
401 uint8_t* dst_argb,
402 int x,
403 int dx,
404 int y,
405 int dy,
406 enum FilterMode filtering) {
407 int j;
408 void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
409 ptrdiff_t src_stride, int dst_width,
410 int source_y_fraction) = InterpolateRow_C;
411 void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
412 int dst_width, int x, int dx) =
413 filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
414 const int max_y = (src_height - 1) << 16;
415 #if defined(HAS_INTERPOLATEROW_SSSE3)
416 if (TestCpuFlag(kCpuHasSSSE3)) {
417 InterpolateRow = InterpolateRow_Any_SSSE3;
418 if (IS_ALIGNED(dst_width, 4)) {
419 InterpolateRow = InterpolateRow_SSSE3;
420 }
421 }
422 #endif
423 #if defined(HAS_INTERPOLATEROW_AVX2)
424 if (TestCpuFlag(kCpuHasAVX2)) {
425 InterpolateRow = InterpolateRow_Any_AVX2;
426 if (IS_ALIGNED(dst_width, 8)) {
427 InterpolateRow = InterpolateRow_AVX2;
428 }
429 }
430 #endif
431 #if defined(HAS_INTERPOLATEROW_NEON)
432 if (TestCpuFlag(kCpuHasNEON)) {
433 InterpolateRow = InterpolateRow_Any_NEON;
434 if (IS_ALIGNED(dst_width, 4)) {
435 InterpolateRow = InterpolateRow_NEON;
436 }
437 }
438 #endif
439 #if defined(HAS_INTERPOLATEROW_MSA)
440 if (TestCpuFlag(kCpuHasMSA)) {
441 InterpolateRow = InterpolateRow_Any_MSA;
442 if (IS_ALIGNED(dst_width, 8)) {
443 InterpolateRow = InterpolateRow_MSA;
444 }
445 }
446 #endif
447 #if defined(HAS_INTERPOLATEROW_MMI)
448 if (TestCpuFlag(kCpuHasMMI)) {
449 InterpolateRow = InterpolateRow_Any_MMI;
450 if (IS_ALIGNED(dst_width, 2)) {
451 InterpolateRow = InterpolateRow_MMI;
452 }
453 }
454 #endif
455 if (src_width >= 32768) {
456 ScaleARGBFilterCols =
457 filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
458 }
459 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
460 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
461 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
462 }
463 #endif
464 #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
465 if (filtering && TestCpuFlag(kCpuHasNEON)) {
466 ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
467 if (IS_ALIGNED(dst_width, 4)) {
468 ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
469 }
470 }
471 #endif
472 #if defined(HAS_SCALEARGBFILTERCOLS_MSA)
473 if (filtering && TestCpuFlag(kCpuHasMSA)) {
474 ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA;
475 if (IS_ALIGNED(dst_width, 8)) {
476 ScaleARGBFilterCols = ScaleARGBFilterCols_MSA;
477 }
478 }
479 #endif
480 #if defined(HAS_SCALEARGBCOLS_SSE2)
481 if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
482 ScaleARGBFilterCols = ScaleARGBCols_SSE2;
483 }
484 #endif
485 #if defined(HAS_SCALEARGBCOLS_NEON)
486 if (!filtering && TestCpuFlag(kCpuHasNEON)) {
487 ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
488 if (IS_ALIGNED(dst_width, 8)) {
489 ScaleARGBFilterCols = ScaleARGBCols_NEON;
490 }
491 }
492 #endif
493 #if defined(HAS_SCALEARGBCOLS_MSA)
494 if (!filtering && TestCpuFlag(kCpuHasMSA)) {
495 ScaleARGBFilterCols = ScaleARGBCols_Any_MSA;
496 if (IS_ALIGNED(dst_width, 4)) {
497 ScaleARGBFilterCols = ScaleARGBCols_MSA;
498 }
499 }
500 #endif
501 #if defined(HAS_SCALEARGBCOLS_MMI)
502 if (!filtering && TestCpuFlag(kCpuHasMMI)) {
503 ScaleARGBFilterCols = ScaleARGBCols_Any_MMI;
504 if (IS_ALIGNED(dst_width, 1)) {
505 ScaleARGBFilterCols = ScaleARGBCols_MMI;
506 }
507 }
508 #endif
509 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
510 ScaleARGBFilterCols = ScaleARGBColsUp2_C;
511 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
512 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
513 ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
514 }
515 #endif
516 #if defined(HAS_SCALEARGBCOLSUP2_MMI)
517 if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) {
518 ScaleARGBFilterCols = ScaleARGBColsUp2_MMI;
519 }
520 #endif
521 }
522
523 if (y > max_y) {
524 y = max_y;
525 }
526
527 {
528 int yi = y >> 16;
529 const uint8_t* src = src_argb + yi * src_stride;
530
531 // Allocate 2 rows of ARGB.
532 const int kRowSize = (dst_width * 4 + 31) & ~31;
533 align_buffer_64(row, kRowSize * 2);
534
535 uint8_t* rowptr = row;
536 int rowstride = kRowSize;
537 int lasty = yi;
538
539 ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
540 if (src_height > 1) {
541 src += src_stride;
542 }
543 ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx);
544 src += src_stride;
545
546 for (j = 0; j < dst_height; ++j) {
547 yi = y >> 16;
548 if (yi != lasty) {
549 if (y > max_y) {
550 y = max_y;
551 yi = y >> 16;
552 src = src_argb + yi * src_stride;
553 }
554 if (yi != lasty) {
555 ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
556 rowptr += rowstride;
557 rowstride = -rowstride;
558 lasty = yi;
559 src += src_stride;
560 }
561 }
562 if (filtering == kFilterLinear) {
563 InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
564 } else {
565 int yf = (y >> 8) & 255;
566 InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
567 }
568 dst_argb += dst_stride;
569 y += dy;
570 }
571 free_aligned_buffer_64(row);
572 }
573 }
574
575 #ifdef YUVSCALEUP
576 // Scale YUV to ARGB up with bilinear interpolation.
ScaleYUVToARGBBilinearUp(int src_width,int src_height,int dst_width,int dst_height,int src_stride_y,int src_stride_u,int src_stride_v,int dst_stride_argb,const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb,int x,int dx,int y,int dy,enum FilterMode filtering)577 static void ScaleYUVToARGBBilinearUp(int src_width,
578 int src_height,
579 int dst_width,
580 int dst_height,
581 int src_stride_y,
582 int src_stride_u,
583 int src_stride_v,
584 int dst_stride_argb,
585 const uint8_t* src_y,
586 const uint8_t* src_u,
587 const uint8_t* src_v,
588 uint8_t* dst_argb,
589 int x,
590 int dx,
591 int y,
592 int dy,
593 enum FilterMode filtering) {
594 int j;
595 void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf,
596 const uint8_t* v_buf, uint8_t* rgb_buf, int width) =
597 I422ToARGBRow_C;
598 #if defined(HAS_I422TOARGBROW_SSSE3)
599 if (TestCpuFlag(kCpuHasSSSE3)) {
600 I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
601 if (IS_ALIGNED(src_width, 8)) {
602 I422ToARGBRow = I422ToARGBRow_SSSE3;
603 }
604 }
605 #endif
606 #if defined(HAS_I422TOARGBROW_AVX2)
607 if (TestCpuFlag(kCpuHasAVX2)) {
608 I422ToARGBRow = I422ToARGBRow_Any_AVX2;
609 if (IS_ALIGNED(src_width, 16)) {
610 I422ToARGBRow = I422ToARGBRow_AVX2;
611 }
612 }
613 #endif
614 #if defined(HAS_I422TOARGBROW_NEON)
615 if (TestCpuFlag(kCpuHasNEON)) {
616 I422ToARGBRow = I422ToARGBRow_Any_NEON;
617 if (IS_ALIGNED(src_width, 8)) {
618 I422ToARGBRow = I422ToARGBRow_NEON;
619 }
620 }
621 #endif
622 #if defined(HAS_I422TOARGBROW_MSA)
623 if (TestCpuFlag(kCpuHasMSA)) {
624 I422ToARGBRow = I422ToARGBRow_Any_MSA;
625 if (IS_ALIGNED(src_width, 8)) {
626 I422ToARGBRow = I422ToARGBRow_MSA;
627 }
628 }
629 #endif
630
631 void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
632 ptrdiff_t src_stride, int dst_width,
633 int source_y_fraction) = InterpolateRow_C;
634 #if defined(HAS_INTERPOLATEROW_SSSE3)
635 if (TestCpuFlag(kCpuHasSSSE3)) {
636 InterpolateRow = InterpolateRow_Any_SSSE3;
637 if (IS_ALIGNED(dst_width, 4)) {
638 InterpolateRow = InterpolateRow_SSSE3;
639 }
640 }
641 #endif
642 #if defined(HAS_INTERPOLATEROW_AVX2)
643 if (TestCpuFlag(kCpuHasAVX2)) {
644 InterpolateRow = InterpolateRow_Any_AVX2;
645 if (IS_ALIGNED(dst_width, 8)) {
646 InterpolateRow = InterpolateRow_AVX2;
647 }
648 }
649 #endif
650 #if defined(HAS_INTERPOLATEROW_NEON)
651 if (TestCpuFlag(kCpuHasNEON)) {
652 InterpolateRow = InterpolateRow_Any_NEON;
653 if (IS_ALIGNED(dst_width, 4)) {
654 InterpolateRow = InterpolateRow_NEON;
655 }
656 }
657 #endif
658 #if defined(HAS_INTERPOLATEROW_MSA)
659 if (TestCpuFlag(kCpuHasMSA)) {
660 InterpolateRow = InterpolateRow_Any_MSA;
661 if (IS_ALIGNED(dst_width, 8)) {
662 InterpolateRow = InterpolateRow_MSA;
663 }
664 }
665 #endif
666
667 void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
668 int dst_width, int x, int dx) =
669 filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
670 if (src_width >= 32768) {
671 ScaleARGBFilterCols =
672 filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
673 }
674 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
675 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
676 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
677 }
678 #endif
679 #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
680 if (filtering && TestCpuFlag(kCpuHasNEON)) {
681 ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
682 if (IS_ALIGNED(dst_width, 4)) {
683 ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
684 }
685 }
686 #endif
687 #if defined(HAS_SCALEARGBFILTERCOLS_MSA)
688 if (filtering && TestCpuFlag(kCpuHasMSA)) {
689 ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA;
690 if (IS_ALIGNED(dst_width, 8)) {
691 ScaleARGBFilterCols = ScaleARGBFilterCols_MSA;
692 }
693 }
694 #endif
695 #if defined(HAS_SCALEARGBCOLS_SSE2)
696 if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
697 ScaleARGBFilterCols = ScaleARGBCols_SSE2;
698 }
699 #endif
700 #if defined(HAS_SCALEARGBCOLS_NEON)
701 if (!filtering && TestCpuFlag(kCpuHasNEON)) {
702 ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
703 if (IS_ALIGNED(dst_width, 8)) {
704 ScaleARGBFilterCols = ScaleARGBCols_NEON;
705 }
706 }
707 #endif
708 #if defined(HAS_SCALEARGBCOLS_MSA)
709 if (!filtering && TestCpuFlag(kCpuHasMSA)) {
710 ScaleARGBFilterCols = ScaleARGBCols_Any_MSA;
711 if (IS_ALIGNED(dst_width, 4)) {
712 ScaleARGBFilterCols = ScaleARGBCols_MSA;
713 }
714 }
715 #endif
716 #if defined(HAS_SCALEARGBCOLS_MMI)
717 if (!filtering && TestCpuFlag(kCpuHasMMI)) {
718 ScaleARGBFilterCols = ScaleARGBCols_Any_MMI;
719 if (IS_ALIGNED(dst_width, 1)) {
720 ScaleARGBFilterCols = ScaleARGBCols_MMI;
721 }
722 }
723 #endif
724 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
725 ScaleARGBFilterCols = ScaleARGBColsUp2_C;
726 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
727 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
728 ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
729 }
730 #endif
731 #if defined(HAS_SCALEARGBCOLSUP2_MMI)
732 if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) {
733 ScaleARGBFilterCols = ScaleARGBColsUp2_MMI;
734 }
735 #endif
736 }
737
738 const int max_y = (src_height - 1) << 16;
739 if (y > max_y) {
740 y = max_y;
741 }
742 const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate.
743 int yi = y >> 16;
744 int uv_yi = yi >> kYShift;
745 const uint8_t* src_row_y = src_y + yi * src_stride_y;
746 const uint8_t* src_row_u = src_u + uv_yi * src_stride_u;
747 const uint8_t* src_row_v = src_v + uv_yi * src_stride_v;
748
749 // Allocate 2 rows of ARGB.
750 const int kRowSize = (dst_width * 4 + 31) & ~31;
751 align_buffer_64(row, kRowSize * 2);
752
753 // Allocate 1 row of ARGB for source conversion.
754 align_buffer_64(argb_row, src_width * 4);
755
756 uint8_t* rowptr = row;
757 int rowstride = kRowSize;
758 int lasty = yi;
759
760 // TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
761 ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
762 if (src_height > 1) {
763 src_row_y += src_stride_y;
764 if (yi & 1) {
765 src_row_u += src_stride_u;
766 src_row_v += src_stride_v;
767 }
768 }
769 ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx);
770 if (src_height > 2) {
771 src_row_y += src_stride_y;
772 if (!(yi & 1)) {
773 src_row_u += src_stride_u;
774 src_row_v += src_stride_v;
775 }
776 }
777
778 for (j = 0; j < dst_height; ++j) {
779 yi = y >> 16;
780 if (yi != lasty) {
781 if (y > max_y) {
782 y = max_y;
783 yi = y >> 16;
784 uv_yi = yi >> kYShift;
785 src_row_y = src_y + yi * src_stride_y;
786 src_row_u = src_u + uv_yi * src_stride_u;
787 src_row_v = src_v + uv_yi * src_stride_v;
788 }
789 if (yi != lasty) {
790 // TODO(fbarchard): Convert the clipped region of row.
791 I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width);
792 ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx);
793 rowptr += rowstride;
794 rowstride = -rowstride;
795 lasty = yi;
796 src_row_y += src_stride_y;
797 if (yi & 1) {
798 src_row_u += src_stride_u;
799 src_row_v += src_stride_v;
800 }
801 }
802 }
803 if (filtering == kFilterLinear) {
804 InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
805 } else {
806 int yf = (y >> 8) & 255;
807 InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
808 }
809 dst_argb += dst_stride_argb;
810 y += dy;
811 }
812 free_aligned_buffer_64(row);
813 free_aligned_buffer_64(row_argb);
814 }
815 #endif
816
817 // Scale ARGB to/from any dimensions, without interpolation.
818 // Fixed point math is used for performance: The upper 16 bits
819 // of x and dx is the integer part of the source position and
820 // the lower 16 bits are the fixed decimal part.
821
ScaleARGBSimple(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_argb,uint8_t * dst_argb,int x,int dx,int y,int dy)822 static void ScaleARGBSimple(int src_width,
823 int src_height,
824 int dst_width,
825 int dst_height,
826 int src_stride,
827 int dst_stride,
828 const uint8_t* src_argb,
829 uint8_t* dst_argb,
830 int x,
831 int dx,
832 int y,
833 int dy) {
834 int j;
835 void (*ScaleARGBCols)(uint8_t * dst_argb, const uint8_t* src_argb,
836 int dst_width, int x, int dx) =
837 (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
838 (void)src_height;
839 #if defined(HAS_SCALEARGBCOLS_SSE2)
840 if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
841 ScaleARGBCols = ScaleARGBCols_SSE2;
842 }
843 #endif
844 #if defined(HAS_SCALEARGBCOLS_NEON)
845 if (TestCpuFlag(kCpuHasNEON)) {
846 ScaleARGBCols = ScaleARGBCols_Any_NEON;
847 if (IS_ALIGNED(dst_width, 8)) {
848 ScaleARGBCols = ScaleARGBCols_NEON;
849 }
850 }
851 #endif
852 #if defined(HAS_SCALEARGBCOLS_MSA)
853 if (TestCpuFlag(kCpuHasMSA)) {
854 ScaleARGBCols = ScaleARGBCols_Any_MSA;
855 if (IS_ALIGNED(dst_width, 4)) {
856 ScaleARGBCols = ScaleARGBCols_MSA;
857 }
858 }
859 #endif
860 #if defined(HAS_SCALEARGBCOLS_MMI)
861 if (TestCpuFlag(kCpuHasMMI)) {
862 ScaleARGBCols = ScaleARGBCols_Any_MMI;
863 if (IS_ALIGNED(dst_width, 1)) {
864 ScaleARGBCols = ScaleARGBCols_MMI;
865 }
866 }
867 #endif
868 if (src_width * 2 == dst_width && x < 0x8000) {
869 ScaleARGBCols = ScaleARGBColsUp2_C;
870 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
871 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
872 ScaleARGBCols = ScaleARGBColsUp2_SSE2;
873 }
874 #endif
875 #if defined(HAS_SCALEARGBCOLSUP2_MMI)
876 if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) {
877 ScaleARGBCols = ScaleARGBColsUp2_MMI;
878 }
879 #endif
880 }
881
882 for (j = 0; j < dst_height; ++j) {
883 ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride, dst_width, x,
884 dx);
885 dst_argb += dst_stride;
886 y += dy;
887 }
888 }
889
890 // ScaleARGB a ARGB.
891 // This function in turn calls a scaling function
892 // suitable for handling the desired resolutions.
ScaleARGB(const uint8_t * src,int src_stride,int src_width,int src_height,uint8_t * dst,int dst_stride,int dst_width,int dst_height,int clip_x,int clip_y,int clip_width,int clip_height,enum FilterMode filtering)893 static void ScaleARGB(const uint8_t* src,
894 int src_stride,
895 int src_width,
896 int src_height,
897 uint8_t* dst,
898 int dst_stride,
899 int dst_width,
900 int dst_height,
901 int clip_x,
902 int clip_y,
903 int clip_width,
904 int clip_height,
905 enum FilterMode filtering) {
906 // Initial source x/y coordinate and step values as 16.16 fixed point.
907 int x = 0;
908 int y = 0;
909 int dx = 0;
910 int dy = 0;
911 // ARGB does not support box filter yet, but allow the user to pass it.
912 // Simplify filtering when possible.
913 filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
914 filtering);
915
916 // Negative src_height means invert the image.
917 if (src_height < 0) {
918 src_height = -src_height;
919 src = src + (src_height - 1) * src_stride;
920 src_stride = -src_stride;
921 }
922 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
923 &dx, &dy);
924 src_width = Abs(src_width);
925 if (clip_x) {
926 int64_t clipf = (int64_t)(clip_x)*dx;
927 x += (clipf & 0xffff);
928 src += (clipf >> 16) * 4;
929 dst += clip_x * 4;
930 }
931 if (clip_y) {
932 int64_t clipf = (int64_t)(clip_y)*dy;
933 y += (clipf & 0xffff);
934 src += (clipf >> 16) * src_stride;
935 dst += clip_y * dst_stride;
936 }
937
938 // Special case for integer step values.
939 if (((dx | dy) & 0xffff) == 0) {
940 if (!dx || !dy) { // 1 pixel wide and/or tall.
941 filtering = kFilterNone;
942 } else {
943 // Optimized even scale down. ie 2, 4, 6, 8, 10x.
944 if (!(dx & 0x10000) && !(dy & 0x10000)) {
945 if (dx == 0x20000) {
946 // Optimized 1/2 downsample.
947 ScaleARGBDown2(src_width, src_height, clip_width, clip_height,
948 src_stride, dst_stride, src, dst, x, dx, y, dy,
949 filtering);
950 return;
951 }
952 if (dx == 0x40000 && filtering == kFilterBox) {
953 // Optimized 1/4 box downsample.
954 ScaleARGBDown4Box(src_width, src_height, clip_width, clip_height,
955 src_stride, dst_stride, src, dst, x, dx, y, dy);
956 return;
957 }
958 ScaleARGBDownEven(src_width, src_height, clip_width, clip_height,
959 src_stride, dst_stride, src, dst, x, dx, y, dy,
960 filtering);
961 return;
962 }
963 // Optimized odd scale down. ie 3, 5, 7, 9x.
964 if ((dx & 0x10000) && (dy & 0x10000)) {
965 filtering = kFilterNone;
966 if (dx == 0x10000 && dy == 0x10000) {
967 // Straight copy.
968 ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride,
969 dst, dst_stride, clip_width, clip_height);
970 return;
971 }
972 }
973 }
974 }
975 if (dx == 0x10000 && (x & 0xffff) == 0) {
976 // Arbitrary scale vertically, but unscaled vertically.
977 ScalePlaneVertical(src_height, clip_width, clip_height, src_stride,
978 dst_stride, src, dst, x, y, dy, 4, filtering);
979 return;
980 }
981 if (filtering && dy < 65536) {
982 ScaleARGBBilinearUp(src_width, src_height, clip_width, clip_height,
983 src_stride, dst_stride, src, dst, x, dx, y, dy,
984 filtering);
985 return;
986 }
987 if (filtering) {
988 ScaleARGBBilinearDown(src_width, src_height, clip_width, clip_height,
989 src_stride, dst_stride, src, dst, x, dx, y, dy,
990 filtering);
991 return;
992 }
993 ScaleARGBSimple(src_width, src_height, clip_width, clip_height, src_stride,
994 dst_stride, src, dst, x, dx, y, dy);
995 }
996
997 LIBYUV_API
ARGBScaleClip(const uint8_t * src_argb,int src_stride_argb,int src_width,int src_height,uint8_t * dst_argb,int dst_stride_argb,int dst_width,int dst_height,int clip_x,int clip_y,int clip_width,int clip_height,enum FilterMode filtering)998 int ARGBScaleClip(const uint8_t* src_argb,
999 int src_stride_argb,
1000 int src_width,
1001 int src_height,
1002 uint8_t* dst_argb,
1003 int dst_stride_argb,
1004 int dst_width,
1005 int dst_height,
1006 int clip_x,
1007 int clip_y,
1008 int clip_width,
1009 int clip_height,
1010 enum FilterMode filtering) {
1011 if (!src_argb || src_width == 0 || src_height == 0 || !dst_argb ||
1012 dst_width <= 0 || dst_height <= 0 || clip_x < 0 || clip_y < 0 ||
1013 clip_width > 32768 || clip_height > 32768 ||
1014 (clip_x + clip_width) > dst_width ||
1015 (clip_y + clip_height) > dst_height) {
1016 return -1;
1017 }
1018 ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
1019 dst_stride_argb, dst_width, dst_height, clip_x, clip_y, clip_width,
1020 clip_height, filtering);
1021 return 0;
1022 }
1023
1024 // Scale an ARGB image.
1025 LIBYUV_API
ARGBScale(const uint8_t * src_argb,int src_stride_argb,int src_width,int src_height,uint8_t * dst_argb,int dst_stride_argb,int dst_width,int dst_height,enum FilterMode filtering)1026 int ARGBScale(const uint8_t* src_argb,
1027 int src_stride_argb,
1028 int src_width,
1029 int src_height,
1030 uint8_t* dst_argb,
1031 int dst_stride_argb,
1032 int dst_width,
1033 int dst_height,
1034 enum FilterMode filtering) {
1035 if (!src_argb || src_width == 0 || src_height == 0 || src_width > 32768 ||
1036 src_height > 32768 || !dst_argb || dst_width <= 0 || dst_height <= 0) {
1037 return -1;
1038 }
1039 ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
1040 dst_stride_argb, dst_width, dst_height, 0, 0, dst_width, dst_height,
1041 filtering);
1042 return 0;
1043 }
1044
1045 // Scale with YUV conversion to ARGB and clipping.
1046 LIBYUV_API
YUVToARGBScaleClip(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint32_t src_fourcc,int src_width,int src_height,uint8_t * dst_argb,int dst_stride_argb,uint32_t dst_fourcc,int dst_width,int dst_height,int clip_x,int clip_y,int clip_width,int clip_height,enum FilterMode filtering)1047 int YUVToARGBScaleClip(const uint8_t* src_y,
1048 int src_stride_y,
1049 const uint8_t* src_u,
1050 int src_stride_u,
1051 const uint8_t* src_v,
1052 int src_stride_v,
1053 uint32_t src_fourcc,
1054 int src_width,
1055 int src_height,
1056 uint8_t* dst_argb,
1057 int dst_stride_argb,
1058 uint32_t dst_fourcc,
1059 int dst_width,
1060 int dst_height,
1061 int clip_x,
1062 int clip_y,
1063 int clip_width,
1064 int clip_height,
1065 enum FilterMode filtering) {
1066 uint8_t* argb_buffer = (uint8_t*)malloc(src_width * src_height * 4);
1067 int r;
1068 (void)src_fourcc; // TODO(fbarchard): implement and/or assert.
1069 (void)dst_fourcc;
1070 I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
1071 argb_buffer, src_width * 4, src_width, src_height);
1072
1073 r = ARGBScaleClip(argb_buffer, src_width * 4, src_width, src_height, dst_argb,
1074 dst_stride_argb, dst_width, dst_height, clip_x, clip_y,
1075 clip_width, clip_height, filtering);
1076 free(argb_buffer);
1077 return r;
1078 }
1079
1080 #ifdef __cplusplus
1081 } // extern "C"
1082 } // namespace libyuv
1083 #endif
1084