1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/scale.h"
12
13 #include <assert.h>
14 #include <string.h>
15
16 #include "libyuv/cpu_id.h"
17 #include "libyuv/planar_functions.h" // For CopyARGB
18 #include "libyuv/row.h"
19 #include "libyuv/scale_row.h"
20
21 #ifdef __cplusplus
22 namespace libyuv {
23 extern "C" {
24 #endif
25
Abs(int v)26 static __inline int Abs(int v) {
27 return v >= 0 ? v : -v;
28 }
29
30 // ScaleARGB ARGB, 1/2
31 // This is an optimized version for scaling down a ARGB to 1/2 of
32 // its original size.
ScaleARGBDown2(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_argb,uint8_t * dst_argb,int x,int dx,int y,int dy,enum FilterMode filtering)33 static void ScaleARGBDown2(int src_width,
34 int src_height,
35 int dst_width,
36 int dst_height,
37 int src_stride,
38 int dst_stride,
39 const uint8_t* src_argb,
40 uint8_t* dst_argb,
41 int x,
42 int dx,
43 int y,
44 int dy,
45 enum FilterMode filtering) {
46 int j;
47 int row_stride = src_stride * (dy >> 16);
48 void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride,
49 uint8_t* dst_argb, int dst_width) =
50 filtering == kFilterNone
51 ? ScaleARGBRowDown2_C
52 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C
53 : ScaleARGBRowDown2Box_C);
54 (void)src_width;
55 (void)src_height;
56 (void)dx;
57 assert(dx == 65536 * 2); // Test scale factor of 2.
58 assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
59 // Advance to odd row, even column.
60 if (filtering == kFilterBilinear) {
61 src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
62 } else {
63 src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
64 }
65
66 #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
67 if (TestCpuFlag(kCpuHasSSE2)) {
68 ScaleARGBRowDown2 =
69 filtering == kFilterNone
70 ? ScaleARGBRowDown2_Any_SSE2
71 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_SSE2
72 : ScaleARGBRowDown2Box_Any_SSE2);
73 if (IS_ALIGNED(dst_width, 4)) {
74 ScaleARGBRowDown2 =
75 filtering == kFilterNone
76 ? ScaleARGBRowDown2_SSE2
77 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2
78 : ScaleARGBRowDown2Box_SSE2);
79 }
80 }
81 #endif
82 #if defined(HAS_SCALEARGBROWDOWN2_NEON)
83 if (TestCpuFlag(kCpuHasNEON)) {
84 ScaleARGBRowDown2 =
85 filtering == kFilterNone
86 ? ScaleARGBRowDown2_Any_NEON
87 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_NEON
88 : ScaleARGBRowDown2Box_Any_NEON);
89 if (IS_ALIGNED(dst_width, 8)) {
90 ScaleARGBRowDown2 =
91 filtering == kFilterNone
92 ? ScaleARGBRowDown2_NEON
93 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_NEON
94 : ScaleARGBRowDown2Box_NEON);
95 }
96 }
97 #endif
98 #if defined(HAS_SCALEARGBROWDOWN2_MSA)
99 if (TestCpuFlag(kCpuHasMSA)) {
100 ScaleARGBRowDown2 =
101 filtering == kFilterNone
102 ? ScaleARGBRowDown2_Any_MSA
103 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_MSA
104 : ScaleARGBRowDown2Box_Any_MSA);
105 if (IS_ALIGNED(dst_width, 4)) {
106 ScaleARGBRowDown2 =
107 filtering == kFilterNone
108 ? ScaleARGBRowDown2_MSA
109 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_MSA
110 : ScaleARGBRowDown2Box_MSA);
111 }
112 }
113 #endif
114 #if defined(HAS_SCALEARGBROWDOWN2_MMI)
115 if (TestCpuFlag(kCpuHasMMI)) {
116 ScaleARGBRowDown2 =
117 filtering == kFilterNone
118 ? ScaleARGBRowDown2_Any_MMI
119 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_MMI
120 : ScaleARGBRowDown2Box_Any_MMI);
121 if (IS_ALIGNED(dst_width, 2)) {
122 ScaleARGBRowDown2 =
123 filtering == kFilterNone
124 ? ScaleARGBRowDown2_MMI
125 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_MMI
126 : ScaleARGBRowDown2Box_MMI);
127 }
128 }
129 #endif
130
131 if (filtering == kFilterLinear) {
132 src_stride = 0;
133 }
134 for (j = 0; j < dst_height; ++j) {
135 ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width);
136 src_argb += row_stride;
137 dst_argb += dst_stride;
138 }
139 }
140
141 // ScaleARGB ARGB, 1/4
142 // This is an optimized version for scaling down a ARGB to 1/4 of
143 // its original size.
ScaleARGBDown4Box(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_argb,uint8_t * dst_argb,int x,int dx,int y,int dy)144 static void ScaleARGBDown4Box(int src_width,
145 int src_height,
146 int dst_width,
147 int dst_height,
148 int src_stride,
149 int dst_stride,
150 const uint8_t* src_argb,
151 uint8_t* dst_argb,
152 int x,
153 int dx,
154 int y,
155 int dy) {
156 int j;
157 // Allocate 2 rows of ARGB.
158 const int kRowSize = (dst_width * 2 * 4 + 31) & ~31;
159 align_buffer_64(row, kRowSize * 2);
160 int row_stride = src_stride * (dy >> 16);
161 void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride,
162 uint8_t* dst_argb, int dst_width) =
163 ScaleARGBRowDown2Box_C;
164 // Advance to odd row, even column.
165 src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
166 (void)src_width;
167 (void)src_height;
168 (void)dx;
169 assert(dx == 65536 * 4); // Test scale factor of 4.
170 assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4.
171 #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
172 if (TestCpuFlag(kCpuHasSSE2)) {
173 ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_SSE2;
174 if (IS_ALIGNED(dst_width, 4)) {
175 ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
176 }
177 }
178 #endif
179 #if defined(HAS_SCALEARGBROWDOWN2_NEON)
180 if (TestCpuFlag(kCpuHasNEON)) {
181 ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_NEON;
182 if (IS_ALIGNED(dst_width, 8)) {
183 ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
184 }
185 }
186 #endif
187
188 for (j = 0; j < dst_height; ++j) {
189 ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
190 ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, row + kRowSize,
191 dst_width * 2);
192 ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
193 src_argb += row_stride;
194 dst_argb += dst_stride;
195 }
196 free_aligned_buffer_64(row);
197 }
198
199 // ScaleARGB ARGB Even
200 // This is an optimized version for scaling down a ARGB to even
201 // multiple of its original size.
ScaleARGBDownEven(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_argb,uint8_t * dst_argb,int x,int dx,int y,int dy,enum FilterMode filtering)202 static void ScaleARGBDownEven(int src_width,
203 int src_height,
204 int dst_width,
205 int dst_height,
206 int src_stride,
207 int dst_stride,
208 const uint8_t* src_argb,
209 uint8_t* dst_argb,
210 int x,
211 int dx,
212 int y,
213 int dy,
214 enum FilterMode filtering) {
215 int j;
216 int col_step = dx >> 16;
217 int row_stride = (dy >> 16) * src_stride;
218 void (*ScaleARGBRowDownEven)(const uint8_t* src_argb, ptrdiff_t src_stride,
219 int src_step, uint8_t* dst_argb, int dst_width) =
220 filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
221 (void)src_width;
222 (void)src_height;
223 assert(IS_ALIGNED(src_width, 2));
224 assert(IS_ALIGNED(src_height, 2));
225 src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
226 #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
227 if (TestCpuFlag(kCpuHasSSE2)) {
228 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2
229 : ScaleARGBRowDownEven_Any_SSE2;
230 if (IS_ALIGNED(dst_width, 4)) {
231 ScaleARGBRowDownEven =
232 filtering ? ScaleARGBRowDownEvenBox_SSE2 : ScaleARGBRowDownEven_SSE2;
233 }
234 }
235 #endif
236 #if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
237 if (TestCpuFlag(kCpuHasNEON)) {
238 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_NEON
239 : ScaleARGBRowDownEven_Any_NEON;
240 if (IS_ALIGNED(dst_width, 4)) {
241 ScaleARGBRowDownEven =
242 filtering ? ScaleARGBRowDownEvenBox_NEON : ScaleARGBRowDownEven_NEON;
243 }
244 }
245 #endif
246 #if defined(HAS_SCALEARGBROWDOWNEVEN_MSA)
247 if (TestCpuFlag(kCpuHasMSA)) {
248 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_MSA
249 : ScaleARGBRowDownEven_Any_MSA;
250 if (IS_ALIGNED(dst_width, 4)) {
251 ScaleARGBRowDownEven =
252 filtering ? ScaleARGBRowDownEvenBox_MSA : ScaleARGBRowDownEven_MSA;
253 }
254 }
255 #endif
256 #if defined(HAS_SCALEARGBROWDOWNEVEN_MMI)
257 if (TestCpuFlag(kCpuHasMMI)) {
258 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_MMI
259 : ScaleARGBRowDownEven_Any_MMI;
260 if (IS_ALIGNED(dst_width, 2)) {
261 ScaleARGBRowDownEven =
262 filtering ? ScaleARGBRowDownEvenBox_MMI : ScaleARGBRowDownEven_MMI;
263 }
264 }
265 #endif
266
267 if (filtering == kFilterLinear) {
268 src_stride = 0;
269 }
270 for (j = 0; j < dst_height; ++j) {
271 ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width);
272 src_argb += row_stride;
273 dst_argb += dst_stride;
274 }
275 }
276
277 // Scale ARGB down with bilinear interpolation.
ScaleARGBBilinearDown(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_argb,uint8_t * dst_argb,int x,int dx,int y,int dy,enum FilterMode filtering)278 static void ScaleARGBBilinearDown(int src_width,
279 int src_height,
280 int dst_width,
281 int dst_height,
282 int src_stride,
283 int dst_stride,
284 const uint8_t* src_argb,
285 uint8_t* dst_argb,
286 int x,
287 int dx,
288 int y,
289 int dy,
290 enum FilterMode filtering) {
291 int j;
292 void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
293 ptrdiff_t src_stride, int dst_width,
294 int source_y_fraction) = InterpolateRow_C;
295 void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
296 int dst_width, int x, int dx) =
297 (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
298 int64_t xlast = x + (int64_t)(dst_width - 1) * dx;
299 int64_t xl = (dx >= 0) ? x : xlast;
300 int64_t xr = (dx >= 0) ? xlast : x;
301 int clip_src_width;
302 xl = (xl >> 16) & ~3; // Left edge aligned.
303 xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels.
304 xr = (xr + 1 + 3) & ~3; // 1 beyond 4 pixel aligned right most pixel.
305 if (xr > src_width) {
306 xr = src_width;
307 }
308 clip_src_width = (int)(xr - xl) * 4; // Width aligned to 4.
309 src_argb += xl * 4;
310 x -= (int)(xl << 16);
311 #if defined(HAS_INTERPOLATEROW_SSSE3)
312 if (TestCpuFlag(kCpuHasSSSE3)) {
313 InterpolateRow = InterpolateRow_Any_SSSE3;
314 if (IS_ALIGNED(clip_src_width, 16)) {
315 InterpolateRow = InterpolateRow_SSSE3;
316 }
317 }
318 #endif
319 #if defined(HAS_INTERPOLATEROW_AVX2)
320 if (TestCpuFlag(kCpuHasAVX2)) {
321 InterpolateRow = InterpolateRow_Any_AVX2;
322 if (IS_ALIGNED(clip_src_width, 32)) {
323 InterpolateRow = InterpolateRow_AVX2;
324 }
325 }
326 #endif
327 #if defined(HAS_INTERPOLATEROW_NEON)
328 if (TestCpuFlag(kCpuHasNEON)) {
329 InterpolateRow = InterpolateRow_Any_NEON;
330 if (IS_ALIGNED(clip_src_width, 16)) {
331 InterpolateRow = InterpolateRow_NEON;
332 }
333 }
334 #endif
335 #if defined(HAS_INTERPOLATEROW_MSA)
336 if (TestCpuFlag(kCpuHasMSA)) {
337 InterpolateRow = InterpolateRow_Any_MSA;
338 if (IS_ALIGNED(clip_src_width, 32)) {
339 InterpolateRow = InterpolateRow_MSA;
340 }
341 }
342 #endif
343 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
344 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
345 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
346 }
347 #endif
348 #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
349 if (TestCpuFlag(kCpuHasNEON)) {
350 ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
351 if (IS_ALIGNED(dst_width, 4)) {
352 ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
353 }
354 }
355 #endif
356 #if defined(HAS_SCALEARGBFILTERCOLS_MSA)
357 if (TestCpuFlag(kCpuHasMSA)) {
358 ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA;
359 if (IS_ALIGNED(dst_width, 8)) {
360 ScaleARGBFilterCols = ScaleARGBFilterCols_MSA;
361 }
362 }
363 #endif
364 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
365 // Allocate a row of ARGB.
366 {
367 align_buffer_64(row, clip_src_width * 4);
368
369 const int max_y = (src_height - 1) << 16;
370 if (y > max_y) {
371 y = max_y;
372 }
373 for (j = 0; j < dst_height; ++j) {
374 int yi = y >> 16;
375 const uint8_t* src = src_argb + yi * src_stride;
376 if (filtering == kFilterLinear) {
377 ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
378 } else {
379 int yf = (y >> 8) & 255;
380 InterpolateRow(row, src, src_stride, clip_src_width, yf);
381 ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
382 }
383 dst_argb += dst_stride;
384 y += dy;
385 if (y > max_y) {
386 y = max_y;
387 }
388 }
389 free_aligned_buffer_64(row);
390 }
391 }
392
393 // Scale ARGB up with bilinear interpolation.
ScaleARGBBilinearUp(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_argb,uint8_t * dst_argb,int x,int dx,int y,int dy,enum FilterMode filtering)394 static void ScaleARGBBilinearUp(int src_width,
395 int src_height,
396 int dst_width,
397 int dst_height,
398 int src_stride,
399 int dst_stride,
400 const uint8_t* src_argb,
401 uint8_t* dst_argb,
402 int x,
403 int dx,
404 int y,
405 int dy,
406 enum FilterMode filtering) {
407 int j;
408 void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
409 ptrdiff_t src_stride, int dst_width,
410 int source_y_fraction) = InterpolateRow_C;
411 void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
412 int dst_width, int x, int dx) =
413 filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
414 const int max_y = (src_height - 1) << 16;
415 #if defined(HAS_INTERPOLATEROW_SSSE3)
416 if (TestCpuFlag(kCpuHasSSSE3)) {
417 InterpolateRow = InterpolateRow_Any_SSSE3;
418 if (IS_ALIGNED(dst_width, 4)) {
419 InterpolateRow = InterpolateRow_SSSE3;
420 }
421 }
422 #endif
423 #if defined(HAS_INTERPOLATEROW_AVX2)
424 if (TestCpuFlag(kCpuHasAVX2)) {
425 InterpolateRow = InterpolateRow_Any_AVX2;
426 if (IS_ALIGNED(dst_width, 8)) {
427 InterpolateRow = InterpolateRow_AVX2;
428 }
429 }
430 #endif
431 #if defined(HAS_INTERPOLATEROW_NEON)
432 if (TestCpuFlag(kCpuHasNEON)) {
433 InterpolateRow = InterpolateRow_Any_NEON;
434 if (IS_ALIGNED(dst_width, 4)) {
435 InterpolateRow = InterpolateRow_NEON;
436 }
437 }
438 #endif
439 #if defined(HAS_INTERPOLATEROW_MSA)
440 if (TestCpuFlag(kCpuHasMSA)) {
441 InterpolateRow = InterpolateRow_Any_MSA;
442 if (IS_ALIGNED(dst_width, 8)) {
443 InterpolateRow = InterpolateRow_MSA;
444 }
445 }
446 #endif
447 #if defined(HAS_INTERPOLATEROW_MMI)
448 if (TestCpuFlag(kCpuHasMMI)) {
449 InterpolateRow = InterpolateRow_Any_MMI;
450 if (IS_ALIGNED(dst_width, 2)) {
451 InterpolateRow = InterpolateRow_MMI;
452 }
453 }
454 #endif
455 if (src_width >= 32768) {
456 ScaleARGBFilterCols =
457 filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
458 }
459 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
460 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
461 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
462 }
463 #endif
464 #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
465 if (filtering && TestCpuFlag(kCpuHasNEON)) {
466 ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
467 if (IS_ALIGNED(dst_width, 4)) {
468 ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
469 }
470 }
471 #endif
472 #if defined(HAS_SCALEARGBFILTERCOLS_MSA)
473 if (filtering && TestCpuFlag(kCpuHasMSA)) {
474 ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA;
475 if (IS_ALIGNED(dst_width, 8)) {
476 ScaleARGBFilterCols = ScaleARGBFilterCols_MSA;
477 }
478 }
479 #endif
480 #if defined(HAS_SCALEARGBCOLS_SSE2)
481 if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
482 ScaleARGBFilterCols = ScaleARGBCols_SSE2;
483 }
484 #endif
485 #if defined(HAS_SCALEARGBCOLS_NEON)
486 if (!filtering && TestCpuFlag(kCpuHasNEON)) {
487 ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
488 if (IS_ALIGNED(dst_width, 8)) {
489 ScaleARGBFilterCols = ScaleARGBCols_NEON;
490 }
491 }
492 #endif
493 #if defined(HAS_SCALEARGBCOLS_MSA)
494 if (!filtering && TestCpuFlag(kCpuHasMSA)) {
495 ScaleARGBFilterCols = ScaleARGBCols_Any_MSA;
496 if (IS_ALIGNED(dst_width, 4)) {
497 ScaleARGBFilterCols = ScaleARGBCols_MSA;
498 }
499 }
500 #endif
501 #if defined(HAS_SCALEARGBCOLS_MMI)
502 if (!filtering && TestCpuFlag(kCpuHasMMI)) {
503 ScaleARGBFilterCols = ScaleARGBCols_Any_MMI;
504 if (IS_ALIGNED(dst_width, 1)) {
505 ScaleARGBFilterCols = ScaleARGBCols_MMI;
506 }
507 }
508 #endif
509 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
510 ScaleARGBFilterCols = ScaleARGBColsUp2_C;
511 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
512 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
513 ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
514 }
515 #endif
516 #if defined(HAS_SCALEARGBCOLSUP2_MMI)
517 if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) {
518 ScaleARGBFilterCols = ScaleARGBColsUp2_MMI;
519 }
520 #endif
521 }
522
523 if (y > max_y) {
524 y = max_y;
525 }
526
527 {
528 int yi = y >> 16;
529 const uint8_t* src = src_argb + yi * src_stride;
530
531 // Allocate 2 rows of ARGB.
532 const int kRowSize = (dst_width * 4 + 31) & ~31;
533 align_buffer_64(row, kRowSize * 2);
534
535 uint8_t* rowptr = row;
536 int rowstride = kRowSize;
537 int lasty = yi;
538
539 ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
540 if (src_height > 1) {
541 src += src_stride;
542 }
543 ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx);
544 src += src_stride;
545
546 for (j = 0; j < dst_height; ++j) {
547 yi = y >> 16;
548 if (yi != lasty) {
549 if (y > max_y) {
550 y = max_y;
551 yi = y >> 16;
552 src = src_argb + yi * src_stride;
553 }
554 if (yi != lasty) {
555 ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
556 rowptr += rowstride;
557 rowstride = -rowstride;
558 lasty = yi;
559 src += src_stride;
560 }
561 }
562 if (filtering == kFilterLinear) {
563 InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
564 } else {
565 int yf = (y >> 8) & 255;
566 InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
567 }
568 dst_argb += dst_stride;
569 y += dy;
570 }
571 free_aligned_buffer_64(row);
572 }
573 }
574
575 #ifdef YUVSCALEUP
576 // Scale YUV to ARGB up with bilinear interpolation.
ScaleYUVToARGBBilinearUp(int src_width,int src_height,int dst_width,int dst_height,int src_stride_y,int src_stride_u,int src_stride_v,int dst_stride_argb,const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb,int x,int dx,int y,int dy,enum FilterMode filtering)577 static void ScaleYUVToARGBBilinearUp(int src_width,
578 int src_height,
579 int dst_width,
580 int dst_height,
581 int src_stride_y,
582 int src_stride_u,
583 int src_stride_v,
584 int dst_stride_argb,
585 const uint8_t* src_y,
586 const uint8_t* src_u,
587 const uint8_t* src_v,
588 uint8_t* dst_argb,
589 int x,
590 int dx,
591 int y,
592 int dy,
593 enum FilterMode filtering) {
594 int j;
595 void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf,
596 const uint8_t* v_buf, uint8_t* rgb_buf, int width) =
597 I422ToARGBRow_C;
598 #if defined(HAS_I422TOARGBROW_SSSE3)
599 if (TestCpuFlag(kCpuHasSSSE3)) {
600 I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
601 if (IS_ALIGNED(src_width, 8)) {
602 I422ToARGBRow = I422ToARGBRow_SSSE3;
603 }
604 }
605 #endif
606 #if defined(HAS_I422TOARGBROW_AVX2)
607 if (TestCpuFlag(kCpuHasAVX2)) {
608 I422ToARGBRow = I422ToARGBRow_Any_AVX2;
609 if (IS_ALIGNED(src_width, 16)) {
610 I422ToARGBRow = I422ToARGBRow_AVX2;
611 }
612 }
613 #endif
614 #if defined(HAS_I422TOARGBROW_NEON)
615 if (TestCpuFlag(kCpuHasNEON)) {
616 I422ToARGBRow = I422ToARGBRow_Any_NEON;
617 if (IS_ALIGNED(src_width, 8)) {
618 I422ToARGBRow = I422ToARGBRow_NEON;
619 }
620 }
621 #endif
622 #if defined(HAS_I422TOARGBROW_MSA)
623 if (TestCpuFlag(kCpuHasMSA)) {
624 I422ToARGBRow = I422ToARGBRow_Any_MSA;
625 if (IS_ALIGNED(src_width, 8)) {
626 I422ToARGBRow = I422ToARGBRow_MSA;
627 }
628 }
629 #endif
630 #if defined(HAS_I422TOARGBROW_MMI)
631 if (TestCpuFlag(kCpuHasMMI)) {
632 I422ToARGBRow = I422ToARGBRow_Any_MMI;
633 if (IS_ALIGNED(src_width, 4)) {
634 I422ToARGBRow = I422ToARGBRow_MMI;
635 }
636 }
637 #endif
638
639 void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
640 ptrdiff_t src_stride, int dst_width,
641 int source_y_fraction) = InterpolateRow_C;
642 #if defined(HAS_INTERPOLATEROW_SSSE3)
643 if (TestCpuFlag(kCpuHasSSSE3)) {
644 InterpolateRow = InterpolateRow_Any_SSSE3;
645 if (IS_ALIGNED(dst_width, 4)) {
646 InterpolateRow = InterpolateRow_SSSE3;
647 }
648 }
649 #endif
650 #if defined(HAS_INTERPOLATEROW_AVX2)
651 if (TestCpuFlag(kCpuHasAVX2)) {
652 InterpolateRow = InterpolateRow_Any_AVX2;
653 if (IS_ALIGNED(dst_width, 8)) {
654 InterpolateRow = InterpolateRow_AVX2;
655 }
656 }
657 #endif
658 #if defined(HAS_INTERPOLATEROW_NEON)
659 if (TestCpuFlag(kCpuHasNEON)) {
660 InterpolateRow = InterpolateRow_Any_NEON;
661 if (IS_ALIGNED(dst_width, 4)) {
662 InterpolateRow = InterpolateRow_NEON;
663 }
664 }
665 #endif
666 #if defined(HAS_INTERPOLATEROW_MSA)
667 if (TestCpuFlag(kCpuHasMSA)) {
668 InterpolateRow = InterpolateRow_Any_MSA;
669 if (IS_ALIGNED(dst_width, 8)) {
670 InterpolateRow = InterpolateRow_MSA;
671 }
672 }
673 #endif
674
675 void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
676 int dst_width, int x, int dx) =
677 filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
678 if (src_width >= 32768) {
679 ScaleARGBFilterCols =
680 filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
681 }
682 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
683 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
684 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
685 }
686 #endif
687 #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
688 if (filtering && TestCpuFlag(kCpuHasNEON)) {
689 ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
690 if (IS_ALIGNED(dst_width, 4)) {
691 ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
692 }
693 }
694 #endif
695 #if defined(HAS_SCALEARGBFILTERCOLS_MSA)
696 if (filtering && TestCpuFlag(kCpuHasMSA)) {
697 ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA;
698 if (IS_ALIGNED(dst_width, 8)) {
699 ScaleARGBFilterCols = ScaleARGBFilterCols_MSA;
700 }
701 }
702 #endif
703 #if defined(HAS_SCALEARGBCOLS_SSE2)
704 if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
705 ScaleARGBFilterCols = ScaleARGBCols_SSE2;
706 }
707 #endif
708 #if defined(HAS_SCALEARGBCOLS_NEON)
709 if (!filtering && TestCpuFlag(kCpuHasNEON)) {
710 ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
711 if (IS_ALIGNED(dst_width, 8)) {
712 ScaleARGBFilterCols = ScaleARGBCols_NEON;
713 }
714 }
715 #endif
716 #if defined(HAS_SCALEARGBCOLS_MSA)
717 if (!filtering && TestCpuFlag(kCpuHasMSA)) {
718 ScaleARGBFilterCols = ScaleARGBCols_Any_MSA;
719 if (IS_ALIGNED(dst_width, 4)) {
720 ScaleARGBFilterCols = ScaleARGBCols_MSA;
721 }
722 }
723 #endif
724 #if defined(HAS_SCALEARGBCOLS_MMI)
725 if (!filtering && TestCpuFlag(kCpuHasMMI)) {
726 ScaleARGBFilterCols = ScaleARGBCols_Any_MMI;
727 if (IS_ALIGNED(dst_width, 1)) {
728 ScaleARGBFilterCols = ScaleARGBCols_MMI;
729 }
730 }
731 #endif
732 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
733 ScaleARGBFilterCols = ScaleARGBColsUp2_C;
734 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
735 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
736 ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
737 }
738 #endif
739 #if defined(HAS_SCALEARGBCOLSUP2_MMI)
740 if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) {
741 ScaleARGBFilterCols = ScaleARGBColsUp2_MMI;
742 }
743 #endif
744 }
745
746 const int max_y = (src_height - 1) << 16;
747 if (y > max_y) {
748 y = max_y;
749 }
750 const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate.
751 int yi = y >> 16;
752 int uv_yi = yi >> kYShift;
753 const uint8_t* src_row_y = src_y + yi * src_stride_y;
754 const uint8_t* src_row_u = src_u + uv_yi * src_stride_u;
755 const uint8_t* src_row_v = src_v + uv_yi * src_stride_v;
756
757 // Allocate 2 rows of ARGB.
758 const int kRowSize = (dst_width * 4 + 31) & ~31;
759 align_buffer_64(row, kRowSize * 2);
760
761 // Allocate 1 row of ARGB for source conversion.
762 align_buffer_64(argb_row, src_width * 4);
763
764 uint8_t* rowptr = row;
765 int rowstride = kRowSize;
766 int lasty = yi;
767
768 // TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
769 ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
770 if (src_height > 1) {
771 src_row_y += src_stride_y;
772 if (yi & 1) {
773 src_row_u += src_stride_u;
774 src_row_v += src_stride_v;
775 }
776 }
777 ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx);
778 if (src_height > 2) {
779 src_row_y += src_stride_y;
780 if (!(yi & 1)) {
781 src_row_u += src_stride_u;
782 src_row_v += src_stride_v;
783 }
784 }
785
786 for (j = 0; j < dst_height; ++j) {
787 yi = y >> 16;
788 if (yi != lasty) {
789 if (y > max_y) {
790 y = max_y;
791 yi = y >> 16;
792 uv_yi = yi >> kYShift;
793 src_row_y = src_y + yi * src_stride_y;
794 src_row_u = src_u + uv_yi * src_stride_u;
795 src_row_v = src_v + uv_yi * src_stride_v;
796 }
797 if (yi != lasty) {
798 // TODO(fbarchard): Convert the clipped region of row.
799 I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width);
800 ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx);
801 rowptr += rowstride;
802 rowstride = -rowstride;
803 lasty = yi;
804 src_row_y += src_stride_y;
805 if (yi & 1) {
806 src_row_u += src_stride_u;
807 src_row_v += src_stride_v;
808 }
809 }
810 }
811 if (filtering == kFilterLinear) {
812 InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
813 } else {
814 int yf = (y >> 8) & 255;
815 InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
816 }
817 dst_argb += dst_stride_argb;
818 y += dy;
819 }
820 free_aligned_buffer_64(row);
821 free_aligned_buffer_64(row_argb);
822 }
823 #endif
824
825 // Scale ARGB to/from any dimensions, without interpolation.
826 // Fixed point math is used for performance: The upper 16 bits
827 // of x and dx is the integer part of the source position and
828 // the lower 16 bits are the fixed decimal part.
829
ScaleARGBSimple(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_argb,uint8_t * dst_argb,int x,int dx,int y,int dy)830 static void ScaleARGBSimple(int src_width,
831 int src_height,
832 int dst_width,
833 int dst_height,
834 int src_stride,
835 int dst_stride,
836 const uint8_t* src_argb,
837 uint8_t* dst_argb,
838 int x,
839 int dx,
840 int y,
841 int dy) {
842 int j;
843 void (*ScaleARGBCols)(uint8_t * dst_argb, const uint8_t* src_argb,
844 int dst_width, int x, int dx) =
845 (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
846 (void)src_height;
847 #if defined(HAS_SCALEARGBCOLS_SSE2)
848 if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
849 ScaleARGBCols = ScaleARGBCols_SSE2;
850 }
851 #endif
852 #if defined(HAS_SCALEARGBCOLS_NEON)
853 if (TestCpuFlag(kCpuHasNEON)) {
854 ScaleARGBCols = ScaleARGBCols_Any_NEON;
855 if (IS_ALIGNED(dst_width, 8)) {
856 ScaleARGBCols = ScaleARGBCols_NEON;
857 }
858 }
859 #endif
860 #if defined(HAS_SCALEARGBCOLS_MSA)
861 if (TestCpuFlag(kCpuHasMSA)) {
862 ScaleARGBCols = ScaleARGBCols_Any_MSA;
863 if (IS_ALIGNED(dst_width, 4)) {
864 ScaleARGBCols = ScaleARGBCols_MSA;
865 }
866 }
867 #endif
868 #if defined(HAS_SCALEARGBCOLS_MMI)
869 if (TestCpuFlag(kCpuHasMMI)) {
870 ScaleARGBCols = ScaleARGBCols_Any_MMI;
871 if (IS_ALIGNED(dst_width, 1)) {
872 ScaleARGBCols = ScaleARGBCols_MMI;
873 }
874 }
875 #endif
876 if (src_width * 2 == dst_width && x < 0x8000) {
877 ScaleARGBCols = ScaleARGBColsUp2_C;
878 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
879 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
880 ScaleARGBCols = ScaleARGBColsUp2_SSE2;
881 }
882 #endif
883 #if defined(HAS_SCALEARGBCOLSUP2_MMI)
884 if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) {
885 ScaleARGBCols = ScaleARGBColsUp2_MMI;
886 }
887 #endif
888 }
889
890 for (j = 0; j < dst_height; ++j) {
891 ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride, dst_width, x,
892 dx);
893 dst_argb += dst_stride;
894 y += dy;
895 }
896 }
897
898 // ScaleARGB a ARGB.
899 // This function in turn calls a scaling function
900 // suitable for handling the desired resolutions.
ScaleARGB(const uint8_t * src,int src_stride,int src_width,int src_height,uint8_t * dst,int dst_stride,int dst_width,int dst_height,int clip_x,int clip_y,int clip_width,int clip_height,enum FilterMode filtering)901 static void ScaleARGB(const uint8_t* src,
902 int src_stride,
903 int src_width,
904 int src_height,
905 uint8_t* dst,
906 int dst_stride,
907 int dst_width,
908 int dst_height,
909 int clip_x,
910 int clip_y,
911 int clip_width,
912 int clip_height,
913 enum FilterMode filtering) {
914 // Initial source x/y coordinate and step values as 16.16 fixed point.
915 int x = 0;
916 int y = 0;
917 int dx = 0;
918 int dy = 0;
919 // ARGB does not support box filter yet, but allow the user to pass it.
920 // Simplify filtering when possible.
921 filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
922 filtering);
923
924 // Negative src_height means invert the image.
925 if (src_height < 0) {
926 src_height = -src_height;
927 src = src + (src_height - 1) * src_stride;
928 src_stride = -src_stride;
929 }
930 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
931 &dx, &dy);
932 src_width = Abs(src_width);
933 if (clip_x) {
934 int64_t clipf = (int64_t)(clip_x)*dx;
935 x += (clipf & 0xffff);
936 src += (clipf >> 16) * 4;
937 dst += clip_x * 4;
938 }
939 if (clip_y) {
940 int64_t clipf = (int64_t)(clip_y)*dy;
941 y += (clipf & 0xffff);
942 src += (clipf >> 16) * src_stride;
943 dst += clip_y * dst_stride;
944 }
945
946 // Special case for integer step values.
947 if (((dx | dy) & 0xffff) == 0) {
948 if (!dx || !dy) { // 1 pixel wide and/or tall.
949 filtering = kFilterNone;
950 } else {
951 // Optimized even scale down. ie 2, 4, 6, 8, 10x.
952 if (!(dx & 0x10000) && !(dy & 0x10000)) {
953 if (dx == 0x20000) {
954 // Optimized 1/2 downsample.
955 ScaleARGBDown2(src_width, src_height, clip_width, clip_height,
956 src_stride, dst_stride, src, dst, x, dx, y, dy,
957 filtering);
958 return;
959 }
960 if (dx == 0x40000 && filtering == kFilterBox) {
961 // Optimized 1/4 box downsample.
962 ScaleARGBDown4Box(src_width, src_height, clip_width, clip_height,
963 src_stride, dst_stride, src, dst, x, dx, y, dy);
964 return;
965 }
966 ScaleARGBDownEven(src_width, src_height, clip_width, clip_height,
967 src_stride, dst_stride, src, dst, x, dx, y, dy,
968 filtering);
969 return;
970 }
971 // Optimized odd scale down. ie 3, 5, 7, 9x.
972 if ((dx & 0x10000) && (dy & 0x10000)) {
973 filtering = kFilterNone;
974 if (dx == 0x10000 && dy == 0x10000) {
975 // Straight copy.
976 ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride,
977 dst, dst_stride, clip_width, clip_height);
978 return;
979 }
980 }
981 }
982 }
983 if (dx == 0x10000 && (x & 0xffff) == 0) {
984 // Arbitrary scale vertically, but unscaled vertically.
985 ScalePlaneVertical(src_height, clip_width, clip_height, src_stride,
986 dst_stride, src, dst, x, y, dy, 4, filtering);
987 return;
988 }
989 if (filtering && dy < 65536) {
990 ScaleARGBBilinearUp(src_width, src_height, clip_width, clip_height,
991 src_stride, dst_stride, src, dst, x, dx, y, dy,
992 filtering);
993 return;
994 }
995 if (filtering) {
996 ScaleARGBBilinearDown(src_width, src_height, clip_width, clip_height,
997 src_stride, dst_stride, src, dst, x, dx, y, dy,
998 filtering);
999 return;
1000 }
1001 ScaleARGBSimple(src_width, src_height, clip_width, clip_height, src_stride,
1002 dst_stride, src, dst, x, dx, y, dy);
1003 }
1004
1005 LIBYUV_API
ARGBScaleClip(const uint8_t * src_argb,int src_stride_argb,int src_width,int src_height,uint8_t * dst_argb,int dst_stride_argb,int dst_width,int dst_height,int clip_x,int clip_y,int clip_width,int clip_height,enum FilterMode filtering)1006 int ARGBScaleClip(const uint8_t* src_argb,
1007 int src_stride_argb,
1008 int src_width,
1009 int src_height,
1010 uint8_t* dst_argb,
1011 int dst_stride_argb,
1012 int dst_width,
1013 int dst_height,
1014 int clip_x,
1015 int clip_y,
1016 int clip_width,
1017 int clip_height,
1018 enum FilterMode filtering) {
1019 if (!src_argb || src_width == 0 || src_height == 0 || !dst_argb ||
1020 dst_width <= 0 || dst_height <= 0 || clip_x < 0 || clip_y < 0 ||
1021 clip_width > 32768 || clip_height > 32768 ||
1022 (clip_x + clip_width) > dst_width ||
1023 (clip_y + clip_height) > dst_height) {
1024 return -1;
1025 }
1026 ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
1027 dst_stride_argb, dst_width, dst_height, clip_x, clip_y, clip_width,
1028 clip_height, filtering);
1029 return 0;
1030 }
1031
1032 // Scale an ARGB image.
1033 LIBYUV_API
ARGBScale(const uint8_t * src_argb,int src_stride_argb,int src_width,int src_height,uint8_t * dst_argb,int dst_stride_argb,int dst_width,int dst_height,enum FilterMode filtering)1034 int ARGBScale(const uint8_t* src_argb,
1035 int src_stride_argb,
1036 int src_width,
1037 int src_height,
1038 uint8_t* dst_argb,
1039 int dst_stride_argb,
1040 int dst_width,
1041 int dst_height,
1042 enum FilterMode filtering) {
1043 if (!src_argb || src_width == 0 || src_height == 0 || src_width > 32768 ||
1044 src_height > 32768 || !dst_argb || dst_width <= 0 || dst_height <= 0) {
1045 return -1;
1046 }
1047 ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
1048 dst_stride_argb, dst_width, dst_height, 0, 0, dst_width, dst_height,
1049 filtering);
1050 return 0;
1051 }
1052
1053 // Scale with YUV conversion to ARGB and clipping.
1054 LIBYUV_API
YUVToARGBScaleClip(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint32_t src_fourcc,int src_width,int src_height,uint8_t * dst_argb,int dst_stride_argb,uint32_t dst_fourcc,int dst_width,int dst_height,int clip_x,int clip_y,int clip_width,int clip_height,enum FilterMode filtering)1055 int YUVToARGBScaleClip(const uint8_t* src_y,
1056 int src_stride_y,
1057 const uint8_t* src_u,
1058 int src_stride_u,
1059 const uint8_t* src_v,
1060 int src_stride_v,
1061 uint32_t src_fourcc,
1062 int src_width,
1063 int src_height,
1064 uint8_t* dst_argb,
1065 int dst_stride_argb,
1066 uint32_t dst_fourcc,
1067 int dst_width,
1068 int dst_height,
1069 int clip_x,
1070 int clip_y,
1071 int clip_width,
1072 int clip_height,
1073 enum FilterMode filtering) {
1074 uint8_t* argb_buffer = (uint8_t*)malloc(src_width * src_height * 4);
1075 int r;
1076 (void)src_fourcc; // TODO(fbarchard): implement and/or assert.
1077 (void)dst_fourcc;
1078 I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
1079 argb_buffer, src_width * 4, src_width, src_height);
1080
1081 r = ARGBScaleClip(argb_buffer, src_width * 4, src_width, src_height, dst_argb,
1082 dst_stride_argb, dst_width, dst_height, clip_x, clip_y,
1083 clip_width, clip_height, filtering);
1084 free(argb_buffer);
1085 return r;
1086 }
1087
1088 #ifdef __cplusplus
1089 } // extern "C"
1090 } // namespace libyuv
1091 #endif
1092