1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/scale.h"
12
13 #include <assert.h>
14 #include <string.h>
15
16 #include "libyuv/cpu_id.h"
17 #include "libyuv/planar_functions.h" // For CopyPlane
18 #include "libyuv/row.h"
19 #include "libyuv/scale_row.h"
20 #include "libyuv/scale_uv.h" // For UVScale
21
22 #ifdef __cplusplus
23 namespace libyuv {
24 extern "C" {
25 #endif
26
Abs(int v)27 static __inline int Abs(int v) {
28 return v >= 0 ? v : -v;
29 }
30
31 #define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
32
33 // Scale plane, 1/2
34 // This is an optimized version for scaling down a plane to 1/2 of
35 // its original size.
36
ScalePlaneDown2(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr,enum FilterMode filtering)37 static void ScalePlaneDown2(int src_width,
38 int src_height,
39 int dst_width,
40 int dst_height,
41 int src_stride,
42 int dst_stride,
43 const uint8_t* src_ptr,
44 uint8_t* dst_ptr,
45 enum FilterMode filtering) {
46 int y;
47 void (*ScaleRowDown2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
48 uint8_t* dst_ptr, int dst_width) =
49 filtering == kFilterNone
50 ? ScaleRowDown2_C
51 : (filtering == kFilterLinear ? ScaleRowDown2Linear_C
52 : ScaleRowDown2Box_C);
53 int row_stride = src_stride << 1;
54 (void)src_width;
55 (void)src_height;
56 if (!filtering) {
57 src_ptr += src_stride; // Point to odd rows.
58 src_stride = 0;
59 }
60
61 #if defined(HAS_SCALEROWDOWN2_NEON)
62 if (TestCpuFlag(kCpuHasNEON)) {
63 ScaleRowDown2 =
64 filtering == kFilterNone
65 ? ScaleRowDown2_Any_NEON
66 : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_NEON
67 : ScaleRowDown2Box_Any_NEON);
68 if (IS_ALIGNED(dst_width, 16)) {
69 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON
70 : (filtering == kFilterLinear
71 ? ScaleRowDown2Linear_NEON
72 : ScaleRowDown2Box_NEON);
73 }
74 }
75 #endif
76 #if defined(HAS_SCALEROWDOWN2_SSSE3)
77 if (TestCpuFlag(kCpuHasSSSE3)) {
78 ScaleRowDown2 =
79 filtering == kFilterNone
80 ? ScaleRowDown2_Any_SSSE3
81 : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSSE3
82 : ScaleRowDown2Box_Any_SSSE3);
83 if (IS_ALIGNED(dst_width, 16)) {
84 ScaleRowDown2 =
85 filtering == kFilterNone
86 ? ScaleRowDown2_SSSE3
87 : (filtering == kFilterLinear ? ScaleRowDown2Linear_SSSE3
88 : ScaleRowDown2Box_SSSE3);
89 }
90 }
91 #endif
92 #if defined(HAS_SCALEROWDOWN2_AVX2)
93 if (TestCpuFlag(kCpuHasAVX2)) {
94 ScaleRowDown2 =
95 filtering == kFilterNone
96 ? ScaleRowDown2_Any_AVX2
97 : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_AVX2
98 : ScaleRowDown2Box_Any_AVX2);
99 if (IS_ALIGNED(dst_width, 32)) {
100 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_AVX2
101 : (filtering == kFilterLinear
102 ? ScaleRowDown2Linear_AVX2
103 : ScaleRowDown2Box_AVX2);
104 }
105 }
106 #endif
107 #if defined(HAS_SCALEROWDOWN2_MMI)
108 if (TestCpuFlag(kCpuHasMMI)) {
109 ScaleRowDown2 =
110 filtering == kFilterNone
111 ? ScaleRowDown2_Any_MMI
112 : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_MMI
113 : ScaleRowDown2Box_Any_MMI);
114 if (IS_ALIGNED(dst_width, 8)) {
115 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_MMI
116 : (filtering == kFilterLinear
117 ? ScaleRowDown2Linear_MMI
118 : ScaleRowDown2Box_MMI);
119 }
120 }
121 #endif
122 #if defined(HAS_SCALEROWDOWN2_MSA)
123 if (TestCpuFlag(kCpuHasMSA)) {
124 ScaleRowDown2 =
125 filtering == kFilterNone
126 ? ScaleRowDown2_Any_MSA
127 : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_MSA
128 : ScaleRowDown2Box_Any_MSA);
129 if (IS_ALIGNED(dst_width, 32)) {
130 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_MSA
131 : (filtering == kFilterLinear
132 ? ScaleRowDown2Linear_MSA
133 : ScaleRowDown2Box_MSA);
134 }
135 }
136 #endif
137
138 if (filtering == kFilterLinear) {
139 src_stride = 0;
140 }
141 // TODO(fbarchard): Loop through source height to allow odd height.
142 for (y = 0; y < dst_height; ++y) {
143 ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
144 src_ptr += row_stride;
145 dst_ptr += dst_stride;
146 }
147 }
148
ScalePlaneDown2_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr,enum FilterMode filtering)149 static void ScalePlaneDown2_16(int src_width,
150 int src_height,
151 int dst_width,
152 int dst_height,
153 int src_stride,
154 int dst_stride,
155 const uint16_t* src_ptr,
156 uint16_t* dst_ptr,
157 enum FilterMode filtering) {
158 int y;
159 void (*ScaleRowDown2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
160 uint16_t* dst_ptr, int dst_width) =
161 filtering == kFilterNone
162 ? ScaleRowDown2_16_C
163 : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C
164 : ScaleRowDown2Box_16_C);
165 int row_stride = src_stride << 1;
166 (void)src_width;
167 (void)src_height;
168 if (!filtering) {
169 src_ptr += src_stride; // Point to odd rows.
170 src_stride = 0;
171 }
172
173 #if defined(HAS_SCALEROWDOWN2_16_NEON)
174 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
175 ScaleRowDown2 =
176 filtering ? ScaleRowDown2Box_16_NEON : ScaleRowDown2_16_NEON;
177 }
178 #endif
179 #if defined(HAS_SCALEROWDOWN2_16_SSE2)
180 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
181 ScaleRowDown2 =
182 filtering == kFilterNone
183 ? ScaleRowDown2_16_SSE2
184 : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2
185 : ScaleRowDown2Box_16_SSE2);
186 }
187 #endif
188 #if defined(HAS_SCALEROWDOWN2_16_MMI)
189 if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) {
190 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_16_MMI
191 : (filtering == kFilterLinear
192 ? ScaleRowDown2Linear_16_MMI
193 : ScaleRowDown2Box_16_MMI);
194 }
195 #endif
196
197 if (filtering == kFilterLinear) {
198 src_stride = 0;
199 }
200 // TODO(fbarchard): Loop through source height to allow odd height.
201 for (y = 0; y < dst_height; ++y) {
202 ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
203 src_ptr += row_stride;
204 dst_ptr += dst_stride;
205 }
206 }
207
208 // Scale plane, 1/4
209 // This is an optimized version for scaling down a plane to 1/4 of
210 // its original size.
211
ScalePlaneDown4(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr,enum FilterMode filtering)212 static void ScalePlaneDown4(int src_width,
213 int src_height,
214 int dst_width,
215 int dst_height,
216 int src_stride,
217 int dst_stride,
218 const uint8_t* src_ptr,
219 uint8_t* dst_ptr,
220 enum FilterMode filtering) {
221 int y;
222 void (*ScaleRowDown4)(const uint8_t* src_ptr, ptrdiff_t src_stride,
223 uint8_t* dst_ptr, int dst_width) =
224 filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
225 int row_stride = src_stride << 2;
226 (void)src_width;
227 (void)src_height;
228 if (!filtering) {
229 src_ptr += src_stride * 2; // Point to row 2.
230 src_stride = 0;
231 }
232 #if defined(HAS_SCALEROWDOWN4_NEON)
233 if (TestCpuFlag(kCpuHasNEON)) {
234 ScaleRowDown4 =
235 filtering ? ScaleRowDown4Box_Any_NEON : ScaleRowDown4_Any_NEON;
236 if (IS_ALIGNED(dst_width, 8)) {
237 ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
238 }
239 }
240 #endif
241 #if defined(HAS_SCALEROWDOWN4_SSSE3)
242 if (TestCpuFlag(kCpuHasSSSE3)) {
243 ScaleRowDown4 =
244 filtering ? ScaleRowDown4Box_Any_SSSE3 : ScaleRowDown4_Any_SSSE3;
245 if (IS_ALIGNED(dst_width, 8)) {
246 ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSSE3 : ScaleRowDown4_SSSE3;
247 }
248 }
249 #endif
250 #if defined(HAS_SCALEROWDOWN4_AVX2)
251 if (TestCpuFlag(kCpuHasAVX2)) {
252 ScaleRowDown4 =
253 filtering ? ScaleRowDown4Box_Any_AVX2 : ScaleRowDown4_Any_AVX2;
254 if (IS_ALIGNED(dst_width, 16)) {
255 ScaleRowDown4 = filtering ? ScaleRowDown4Box_AVX2 : ScaleRowDown4_AVX2;
256 }
257 }
258 #endif
259 #if defined(HAS_SCALEROWDOWN4_MMI)
260 if (TestCpuFlag(kCpuHasMMI)) {
261 ScaleRowDown4 =
262 filtering ? ScaleRowDown4Box_Any_MMI : ScaleRowDown4_Any_MMI;
263 if (IS_ALIGNED(dst_width, 8)) {
264 ScaleRowDown4 = filtering ? ScaleRowDown4Box_MMI : ScaleRowDown4_MMI;
265 }
266 }
267 #endif
268 #if defined(HAS_SCALEROWDOWN4_MSA)
269 if (TestCpuFlag(kCpuHasMSA)) {
270 ScaleRowDown4 =
271 filtering ? ScaleRowDown4Box_Any_MSA : ScaleRowDown4_Any_MSA;
272 if (IS_ALIGNED(dst_width, 16)) {
273 ScaleRowDown4 = filtering ? ScaleRowDown4Box_MSA : ScaleRowDown4_MSA;
274 }
275 }
276 #endif
277
278 if (filtering == kFilterLinear) {
279 src_stride = 0;
280 }
281 for (y = 0; y < dst_height; ++y) {
282 ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
283 src_ptr += row_stride;
284 dst_ptr += dst_stride;
285 }
286 }
287
ScalePlaneDown4_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr,enum FilterMode filtering)288 static void ScalePlaneDown4_16(int src_width,
289 int src_height,
290 int dst_width,
291 int dst_height,
292 int src_stride,
293 int dst_stride,
294 const uint16_t* src_ptr,
295 uint16_t* dst_ptr,
296 enum FilterMode filtering) {
297 int y;
298 void (*ScaleRowDown4)(const uint16_t* src_ptr, ptrdiff_t src_stride,
299 uint16_t* dst_ptr, int dst_width) =
300 filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C;
301 int row_stride = src_stride << 2;
302 (void)src_width;
303 (void)src_height;
304 if (!filtering) {
305 src_ptr += src_stride * 2; // Point to row 2.
306 src_stride = 0;
307 }
308 #if defined(HAS_SCALEROWDOWN4_16_NEON)
309 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
310 ScaleRowDown4 =
311 filtering ? ScaleRowDown4Box_16_NEON : ScaleRowDown4_16_NEON;
312 }
313 #endif
314 #if defined(HAS_SCALEROWDOWN4_16_SSE2)
315 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
316 ScaleRowDown4 =
317 filtering ? ScaleRowDown4Box_16_SSE2 : ScaleRowDown4_16_SSE2;
318 }
319 #endif
320 #if defined(HAS_SCALEROWDOWN4_16_MMI)
321 if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) {
322 ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_MMI : ScaleRowDown4_16_MMI;
323 }
324 #endif
325
326 if (filtering == kFilterLinear) {
327 src_stride = 0;
328 }
329 for (y = 0; y < dst_height; ++y) {
330 ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
331 src_ptr += row_stride;
332 dst_ptr += dst_stride;
333 }
334 }
335
336 // Scale plane down, 3/4
ScalePlaneDown34(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr,enum FilterMode filtering)337 static void ScalePlaneDown34(int src_width,
338 int src_height,
339 int dst_width,
340 int dst_height,
341 int src_stride,
342 int dst_stride,
343 const uint8_t* src_ptr,
344 uint8_t* dst_ptr,
345 enum FilterMode filtering) {
346 int y;
347 void (*ScaleRowDown34_0)(const uint8_t* src_ptr, ptrdiff_t src_stride,
348 uint8_t* dst_ptr, int dst_width);
349 void (*ScaleRowDown34_1)(const uint8_t* src_ptr, ptrdiff_t src_stride,
350 uint8_t* dst_ptr, int dst_width);
351 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
352 (void)src_width;
353 (void)src_height;
354 assert(dst_width % 3 == 0);
355 if (!filtering) {
356 ScaleRowDown34_0 = ScaleRowDown34_C;
357 ScaleRowDown34_1 = ScaleRowDown34_C;
358 } else {
359 ScaleRowDown34_0 = ScaleRowDown34_0_Box_C;
360 ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
361 }
362 #if defined(HAS_SCALEROWDOWN34_NEON)
363 if (TestCpuFlag(kCpuHasNEON)) {
364 if (!filtering) {
365 ScaleRowDown34_0 = ScaleRowDown34_Any_NEON;
366 ScaleRowDown34_1 = ScaleRowDown34_Any_NEON;
367 } else {
368 ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_NEON;
369 ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_NEON;
370 }
371 if (dst_width % 24 == 0) {
372 if (!filtering) {
373 ScaleRowDown34_0 = ScaleRowDown34_NEON;
374 ScaleRowDown34_1 = ScaleRowDown34_NEON;
375 } else {
376 ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;
377 ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
378 }
379 }
380 }
381 #endif
382 #if defined(HAS_SCALEROWDOWN34_MMI)
383 if (TestCpuFlag(kCpuHasMMI)) {
384 if (!filtering) {
385 ScaleRowDown34_0 = ScaleRowDown34_Any_MMI;
386 ScaleRowDown34_1 = ScaleRowDown34_Any_MMI;
387 if (dst_width % 24 == 0) {
388 ScaleRowDown34_0 = ScaleRowDown34_MMI;
389 ScaleRowDown34_1 = ScaleRowDown34_MMI;
390 }
391 }
392 }
393 #endif
394 #if defined(HAS_SCALEROWDOWN34_MSA)
395 if (TestCpuFlag(kCpuHasMSA)) {
396 if (!filtering) {
397 ScaleRowDown34_0 = ScaleRowDown34_Any_MSA;
398 ScaleRowDown34_1 = ScaleRowDown34_Any_MSA;
399 } else {
400 ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_MSA;
401 ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_MSA;
402 }
403 if (dst_width % 48 == 0) {
404 if (!filtering) {
405 ScaleRowDown34_0 = ScaleRowDown34_MSA;
406 ScaleRowDown34_1 = ScaleRowDown34_MSA;
407 } else {
408 ScaleRowDown34_0 = ScaleRowDown34_0_Box_MSA;
409 ScaleRowDown34_1 = ScaleRowDown34_1_Box_MSA;
410 }
411 }
412 }
413 #endif
414 #if defined(HAS_SCALEROWDOWN34_SSSE3)
415 if (TestCpuFlag(kCpuHasSSSE3)) {
416 if (!filtering) {
417 ScaleRowDown34_0 = ScaleRowDown34_Any_SSSE3;
418 ScaleRowDown34_1 = ScaleRowDown34_Any_SSSE3;
419 } else {
420 ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_SSSE3;
421 ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_SSSE3;
422 }
423 if (dst_width % 24 == 0) {
424 if (!filtering) {
425 ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
426 ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
427 } else {
428 ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
429 ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
430 }
431 }
432 }
433 #endif
434
435 for (y = 0; y < dst_height - 2; y += 3) {
436 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
437 src_ptr += src_stride;
438 dst_ptr += dst_stride;
439 ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
440 src_ptr += src_stride;
441 dst_ptr += dst_stride;
442 ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width);
443 src_ptr += src_stride * 2;
444 dst_ptr += dst_stride;
445 }
446
447 // Remainder 1 or 2 rows with last row vertically unfiltered
448 if ((dst_height % 3) == 2) {
449 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
450 src_ptr += src_stride;
451 dst_ptr += dst_stride;
452 ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
453 } else if ((dst_height % 3) == 1) {
454 ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
455 }
456 }
457
ScalePlaneDown34_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr,enum FilterMode filtering)458 static void ScalePlaneDown34_16(int src_width,
459 int src_height,
460 int dst_width,
461 int dst_height,
462 int src_stride,
463 int dst_stride,
464 const uint16_t* src_ptr,
465 uint16_t* dst_ptr,
466 enum FilterMode filtering) {
467 int y;
468 void (*ScaleRowDown34_0)(const uint16_t* src_ptr, ptrdiff_t src_stride,
469 uint16_t* dst_ptr, int dst_width);
470 void (*ScaleRowDown34_1)(const uint16_t* src_ptr, ptrdiff_t src_stride,
471 uint16_t* dst_ptr, int dst_width);
472 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
473 (void)src_width;
474 (void)src_height;
475 assert(dst_width % 3 == 0);
476 if (!filtering) {
477 ScaleRowDown34_0 = ScaleRowDown34_16_C;
478 ScaleRowDown34_1 = ScaleRowDown34_16_C;
479 } else {
480 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_C;
481 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_C;
482 }
483 #if defined(HAS_SCALEROWDOWN34_16_NEON)
484 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
485 if (!filtering) {
486 ScaleRowDown34_0 = ScaleRowDown34_16_NEON;
487 ScaleRowDown34_1 = ScaleRowDown34_16_NEON;
488 } else {
489 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_NEON;
490 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_NEON;
491 }
492 }
493 #endif
494 #if defined(HAS_SCALEROWDOWN34_16_SSSE3)
495 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
496 if (!filtering) {
497 ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3;
498 ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3;
499 } else {
500 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_SSSE3;
501 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_SSSE3;
502 }
503 }
504 #endif
505
506 for (y = 0; y < dst_height - 2; y += 3) {
507 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
508 src_ptr += src_stride;
509 dst_ptr += dst_stride;
510 ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
511 src_ptr += src_stride;
512 dst_ptr += dst_stride;
513 ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width);
514 src_ptr += src_stride * 2;
515 dst_ptr += dst_stride;
516 }
517
518 // Remainder 1 or 2 rows with last row vertically unfiltered
519 if ((dst_height % 3) == 2) {
520 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
521 src_ptr += src_stride;
522 dst_ptr += dst_stride;
523 ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
524 } else if ((dst_height % 3) == 1) {
525 ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
526 }
527 }
528
529 // Scale plane, 3/8
530 // This is an optimized version for scaling down a plane to 3/8
531 // of its original size.
532 //
533 // Uses box filter arranges like this
534 // aaabbbcc -> abc
535 // aaabbbcc def
536 // aaabbbcc ghi
537 // dddeeeff
538 // dddeeeff
539 // dddeeeff
540 // ggghhhii
541 // ggghhhii
542 // Boxes are 3x3, 2x3, 3x2 and 2x2
543
ScalePlaneDown38(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr,enum FilterMode filtering)544 static void ScalePlaneDown38(int src_width,
545 int src_height,
546 int dst_width,
547 int dst_height,
548 int src_stride,
549 int dst_stride,
550 const uint8_t* src_ptr,
551 uint8_t* dst_ptr,
552 enum FilterMode filtering) {
553 int y;
554 void (*ScaleRowDown38_3)(const uint8_t* src_ptr, ptrdiff_t src_stride,
555 uint8_t* dst_ptr, int dst_width);
556 void (*ScaleRowDown38_2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
557 uint8_t* dst_ptr, int dst_width);
558 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
559 assert(dst_width % 3 == 0);
560 (void)src_width;
561 (void)src_height;
562 if (!filtering) {
563 ScaleRowDown38_3 = ScaleRowDown38_C;
564 ScaleRowDown38_2 = ScaleRowDown38_C;
565 } else {
566 ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
567 ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
568 }
569
570 #if defined(HAS_SCALEROWDOWN38_NEON)
571 if (TestCpuFlag(kCpuHasNEON)) {
572 if (!filtering) {
573 ScaleRowDown38_3 = ScaleRowDown38_Any_NEON;
574 ScaleRowDown38_2 = ScaleRowDown38_Any_NEON;
575 } else {
576 ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_NEON;
577 ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_NEON;
578 }
579 if (dst_width % 12 == 0) {
580 if (!filtering) {
581 ScaleRowDown38_3 = ScaleRowDown38_NEON;
582 ScaleRowDown38_2 = ScaleRowDown38_NEON;
583 } else {
584 ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;
585 ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
586 }
587 }
588 }
589 #endif
590 #if defined(HAS_SCALEROWDOWN38_SSSE3)
591 if (TestCpuFlag(kCpuHasSSSE3)) {
592 if (!filtering) {
593 ScaleRowDown38_3 = ScaleRowDown38_Any_SSSE3;
594 ScaleRowDown38_2 = ScaleRowDown38_Any_SSSE3;
595 } else {
596 ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_SSSE3;
597 ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_SSSE3;
598 }
599 if (dst_width % 12 == 0 && !filtering) {
600 ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
601 ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
602 }
603 if (dst_width % 6 == 0 && filtering) {
604 ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
605 ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
606 }
607 }
608 #endif
609 #if defined(HAS_SCALEROWDOWN38_MSA)
610 if (TestCpuFlag(kCpuHasMSA)) {
611 if (!filtering) {
612 ScaleRowDown38_3 = ScaleRowDown38_Any_MSA;
613 ScaleRowDown38_2 = ScaleRowDown38_Any_MSA;
614 } else {
615 ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_MSA;
616 ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_MSA;
617 }
618 if (dst_width % 12 == 0) {
619 if (!filtering) {
620 ScaleRowDown38_3 = ScaleRowDown38_MSA;
621 ScaleRowDown38_2 = ScaleRowDown38_MSA;
622 } else {
623 ScaleRowDown38_3 = ScaleRowDown38_3_Box_MSA;
624 ScaleRowDown38_2 = ScaleRowDown38_2_Box_MSA;
625 }
626 }
627 }
628 #endif
629
630 for (y = 0; y < dst_height - 2; y += 3) {
631 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
632 src_ptr += src_stride * 3;
633 dst_ptr += dst_stride;
634 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
635 src_ptr += src_stride * 3;
636 dst_ptr += dst_stride;
637 ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
638 src_ptr += src_stride * 2;
639 dst_ptr += dst_stride;
640 }
641
642 // Remainder 1 or 2 rows with last row vertically unfiltered
643 if ((dst_height % 3) == 2) {
644 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
645 src_ptr += src_stride * 3;
646 dst_ptr += dst_stride;
647 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
648 } else if ((dst_height % 3) == 1) {
649 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
650 }
651 }
652
ScalePlaneDown38_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr,enum FilterMode filtering)653 static void ScalePlaneDown38_16(int src_width,
654 int src_height,
655 int dst_width,
656 int dst_height,
657 int src_stride,
658 int dst_stride,
659 const uint16_t* src_ptr,
660 uint16_t* dst_ptr,
661 enum FilterMode filtering) {
662 int y;
663 void (*ScaleRowDown38_3)(const uint16_t* src_ptr, ptrdiff_t src_stride,
664 uint16_t* dst_ptr, int dst_width);
665 void (*ScaleRowDown38_2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
666 uint16_t* dst_ptr, int dst_width);
667 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
668 (void)src_width;
669 (void)src_height;
670 assert(dst_width % 3 == 0);
671 if (!filtering) {
672 ScaleRowDown38_3 = ScaleRowDown38_16_C;
673 ScaleRowDown38_2 = ScaleRowDown38_16_C;
674 } else {
675 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_C;
676 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_C;
677 }
678 #if defined(HAS_SCALEROWDOWN38_16_NEON)
679 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
680 if (!filtering) {
681 ScaleRowDown38_3 = ScaleRowDown38_16_NEON;
682 ScaleRowDown38_2 = ScaleRowDown38_16_NEON;
683 } else {
684 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_NEON;
685 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_NEON;
686 }
687 }
688 #endif
689 #if defined(HAS_SCALEROWDOWN38_16_SSSE3)
690 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
691 if (!filtering) {
692 ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3;
693 ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3;
694 } else {
695 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_SSSE3;
696 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_SSSE3;
697 }
698 }
699 #endif
700
701 for (y = 0; y < dst_height - 2; y += 3) {
702 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
703 src_ptr += src_stride * 3;
704 dst_ptr += dst_stride;
705 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
706 src_ptr += src_stride * 3;
707 dst_ptr += dst_stride;
708 ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
709 src_ptr += src_stride * 2;
710 dst_ptr += dst_stride;
711 }
712
713 // Remainder 1 or 2 rows with last row vertically unfiltered
714 if ((dst_height % 3) == 2) {
715 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
716 src_ptr += src_stride * 3;
717 dst_ptr += dst_stride;
718 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
719 } else if ((dst_height % 3) == 1) {
720 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
721 }
722 }
723
724 #define MIN1(x) ((x) < 1 ? 1 : (x))
725
SumPixels(int iboxwidth,const uint16_t * src_ptr)726 static __inline uint32_t SumPixels(int iboxwidth, const uint16_t* src_ptr) {
727 uint32_t sum = 0u;
728 int x;
729 assert(iboxwidth > 0);
730 for (x = 0; x < iboxwidth; ++x) {
731 sum += src_ptr[x];
732 }
733 return sum;
734 }
735
SumPixels_16(int iboxwidth,const uint32_t * src_ptr)736 static __inline uint32_t SumPixels_16(int iboxwidth, const uint32_t* src_ptr) {
737 uint32_t sum = 0u;
738 int x;
739 assert(iboxwidth > 0);
740 for (x = 0; x < iboxwidth; ++x) {
741 sum += src_ptr[x];
742 }
743 return sum;
744 }
745
ScaleAddCols2_C(int dst_width,int boxheight,int x,int dx,const uint16_t * src_ptr,uint8_t * dst_ptr)746 static void ScaleAddCols2_C(int dst_width,
747 int boxheight,
748 int x,
749 int dx,
750 const uint16_t* src_ptr,
751 uint8_t* dst_ptr) {
752 int i;
753 int scaletbl[2];
754 int minboxwidth = dx >> 16;
755 int boxwidth;
756 scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
757 scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
758 for (i = 0; i < dst_width; ++i) {
759 int ix = x >> 16;
760 x += dx;
761 boxwidth = MIN1((x >> 16) - ix);
762 *dst_ptr++ =
763 SumPixels(boxwidth, src_ptr + ix) * scaletbl[boxwidth - minboxwidth] >>
764 16;
765 }
766 }
767
ScaleAddCols2_16_C(int dst_width,int boxheight,int x,int dx,const uint32_t * src_ptr,uint16_t * dst_ptr)768 static void ScaleAddCols2_16_C(int dst_width,
769 int boxheight,
770 int x,
771 int dx,
772 const uint32_t* src_ptr,
773 uint16_t* dst_ptr) {
774 int i;
775 int scaletbl[2];
776 int minboxwidth = dx >> 16;
777 int boxwidth;
778 scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
779 scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
780 for (i = 0; i < dst_width; ++i) {
781 int ix = x >> 16;
782 x += dx;
783 boxwidth = MIN1((x >> 16) - ix);
784 *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix) *
785 scaletbl[boxwidth - minboxwidth] >>
786 16;
787 }
788 }
789
ScaleAddCols0_C(int dst_width,int boxheight,int x,int dx,const uint16_t * src_ptr,uint8_t * dst_ptr)790 static void ScaleAddCols0_C(int dst_width,
791 int boxheight,
792 int x,
793 int dx,
794 const uint16_t* src_ptr,
795 uint8_t* dst_ptr) {
796 int scaleval = 65536 / boxheight;
797 int i;
798 (void)dx;
799 src_ptr += (x >> 16);
800 for (i = 0; i < dst_width; ++i) {
801 *dst_ptr++ = src_ptr[i] * scaleval >> 16;
802 }
803 }
804
ScaleAddCols1_C(int dst_width,int boxheight,int x,int dx,const uint16_t * src_ptr,uint8_t * dst_ptr)805 static void ScaleAddCols1_C(int dst_width,
806 int boxheight,
807 int x,
808 int dx,
809 const uint16_t* src_ptr,
810 uint8_t* dst_ptr) {
811 int boxwidth = MIN1(dx >> 16);
812 int scaleval = 65536 / (boxwidth * boxheight);
813 int i;
814 x >>= 16;
815 for (i = 0; i < dst_width; ++i) {
816 *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
817 x += boxwidth;
818 }
819 }
820
ScaleAddCols1_16_C(int dst_width,int boxheight,int x,int dx,const uint32_t * src_ptr,uint16_t * dst_ptr)821 static void ScaleAddCols1_16_C(int dst_width,
822 int boxheight,
823 int x,
824 int dx,
825 const uint32_t* src_ptr,
826 uint16_t* dst_ptr) {
827 int boxwidth = MIN1(dx >> 16);
828 int scaleval = 65536 / (boxwidth * boxheight);
829 int i;
830 for (i = 0; i < dst_width; ++i) {
831 *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + x) * scaleval >> 16;
832 x += boxwidth;
833 }
834 }
835
836 // Scale plane down to any dimensions, with interpolation.
837 // (boxfilter).
838 //
839 // Same method as SimpleScale, which is fixed point, outputting
840 // one pixel of destination using fixed point (16.16) to step
841 // through source, sampling a box of pixel with simple
842 // averaging.
ScalePlaneBox(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr)843 static void ScalePlaneBox(int src_width,
844 int src_height,
845 int dst_width,
846 int dst_height,
847 int src_stride,
848 int dst_stride,
849 const uint8_t* src_ptr,
850 uint8_t* dst_ptr) {
851 int j, k;
852 // Initial source x/y coordinate and step values as 16.16 fixed point.
853 int x = 0;
854 int y = 0;
855 int dx = 0;
856 int dy = 0;
857 const int max_y = (src_height << 16);
858 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y,
859 &dx, &dy);
860 src_width = Abs(src_width);
861 {
862 // Allocate a row buffer of uint16_t.
863 align_buffer_64(row16, src_width * 2);
864 void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
865 const uint16_t* src_ptr, uint8_t* dst_ptr) =
866 (dx & 0xffff) ? ScaleAddCols2_C
867 : ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C);
868 void (*ScaleAddRow)(const uint8_t* src_ptr, uint16_t* dst_ptr,
869 int src_width) = ScaleAddRow_C;
870 #if defined(HAS_SCALEADDROW_SSE2)
871 if (TestCpuFlag(kCpuHasSSE2)) {
872 ScaleAddRow = ScaleAddRow_Any_SSE2;
873 if (IS_ALIGNED(src_width, 16)) {
874 ScaleAddRow = ScaleAddRow_SSE2;
875 }
876 }
877 #endif
878 #if defined(HAS_SCALEADDROW_AVX2)
879 if (TestCpuFlag(kCpuHasAVX2)) {
880 ScaleAddRow = ScaleAddRow_Any_AVX2;
881 if (IS_ALIGNED(src_width, 32)) {
882 ScaleAddRow = ScaleAddRow_AVX2;
883 }
884 }
885 #endif
886 #if defined(HAS_SCALEADDROW_NEON)
887 if (TestCpuFlag(kCpuHasNEON)) {
888 ScaleAddRow = ScaleAddRow_Any_NEON;
889 if (IS_ALIGNED(src_width, 16)) {
890 ScaleAddRow = ScaleAddRow_NEON;
891 }
892 }
893 #endif
894 #if defined(HAS_SCALEADDROW_MMI)
895 if (TestCpuFlag(kCpuHasMMI)) {
896 ScaleAddRow = ScaleAddRow_Any_MMI;
897 if (IS_ALIGNED(src_width, 8)) {
898 ScaleAddRow = ScaleAddRow_MMI;
899 }
900 }
901 #endif
902 #if defined(HAS_SCALEADDROW_MSA)
903 if (TestCpuFlag(kCpuHasMSA)) {
904 ScaleAddRow = ScaleAddRow_Any_MSA;
905 if (IS_ALIGNED(src_width, 16)) {
906 ScaleAddRow = ScaleAddRow_MSA;
907 }
908 }
909 #endif
910
911 for (j = 0; j < dst_height; ++j) {
912 int boxheight;
913 int iy = y >> 16;
914 const uint8_t* src = src_ptr + iy * src_stride;
915 y += dy;
916 if (y > max_y) {
917 y = max_y;
918 }
919 boxheight = MIN1((y >> 16) - iy);
920 memset(row16, 0, src_width * 2);
921 for (k = 0; k < boxheight; ++k) {
922 ScaleAddRow(src, (uint16_t*)(row16), src_width);
923 src += src_stride;
924 }
925 ScaleAddCols(dst_width, boxheight, x, dx, (uint16_t*)(row16), dst_ptr);
926 dst_ptr += dst_stride;
927 }
928 free_aligned_buffer_64(row16);
929 }
930 }
931
ScalePlaneBox_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr)932 static void ScalePlaneBox_16(int src_width,
933 int src_height,
934 int dst_width,
935 int dst_height,
936 int src_stride,
937 int dst_stride,
938 const uint16_t* src_ptr,
939 uint16_t* dst_ptr) {
940 int j, k;
941 // Initial source x/y coordinate and step values as 16.16 fixed point.
942 int x = 0;
943 int y = 0;
944 int dx = 0;
945 int dy = 0;
946 const int max_y = (src_height << 16);
947 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y,
948 &dx, &dy);
949 src_width = Abs(src_width);
950 {
951 // Allocate a row buffer of uint32_t.
952 align_buffer_64(row32, src_width * 4);
953 void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
954 const uint32_t* src_ptr, uint16_t* dst_ptr) =
955 (dx & 0xffff) ? ScaleAddCols2_16_C : ScaleAddCols1_16_C;
956 void (*ScaleAddRow)(const uint16_t* src_ptr, uint32_t* dst_ptr,
957 int src_width) = ScaleAddRow_16_C;
958
959 #if defined(HAS_SCALEADDROW_16_SSE2)
960 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) {
961 ScaleAddRow = ScaleAddRow_16_SSE2;
962 }
963 #endif
964
965 #if defined(HAS_SCALEADDROW_16_MMI)
966 if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(src_width, 4)) {
967 ScaleAddRow = ScaleAddRow_16_MMI;
968 }
969 #endif
970 for (j = 0; j < dst_height; ++j) {
971 int boxheight;
972 int iy = y >> 16;
973 const uint16_t* src = src_ptr + iy * src_stride;
974 y += dy;
975 if (y > max_y) {
976 y = max_y;
977 }
978 boxheight = MIN1((y >> 16) - iy);
979 memset(row32, 0, src_width * 4);
980 for (k = 0; k < boxheight; ++k) {
981 ScaleAddRow(src, (uint32_t*)(row32), src_width);
982 src += src_stride;
983 }
984 ScaleAddCols(dst_width, boxheight, x, dx, (uint32_t*)(row32), dst_ptr);
985 dst_ptr += dst_stride;
986 }
987 free_aligned_buffer_64(row32);
988 }
989 }
990
991 // Scale plane down with bilinear interpolation.
ScalePlaneBilinearDown(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr,enum FilterMode filtering)992 void ScalePlaneBilinearDown(int src_width,
993 int src_height,
994 int dst_width,
995 int dst_height,
996 int src_stride,
997 int dst_stride,
998 const uint8_t* src_ptr,
999 uint8_t* dst_ptr,
1000 enum FilterMode filtering) {
1001 // Initial source x/y coordinate and step values as 16.16 fixed point.
1002 int x = 0;
1003 int y = 0;
1004 int dx = 0;
1005 int dy = 0;
1006 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
1007 // Allocate a row buffer.
1008 align_buffer_64(row, src_width);
1009
1010 const int max_y = (src_height - 1) << 16;
1011 int j;
1012 void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr,
1013 int dst_width, int x, int dx) =
1014 (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
1015 void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
1016 ptrdiff_t src_stride, int dst_width,
1017 int source_y_fraction) = InterpolateRow_C;
1018 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1019 &dx, &dy);
1020 src_width = Abs(src_width);
1021
1022 #if defined(HAS_INTERPOLATEROW_SSSE3)
1023 if (TestCpuFlag(kCpuHasSSSE3)) {
1024 InterpolateRow = InterpolateRow_Any_SSSE3;
1025 if (IS_ALIGNED(src_width, 16)) {
1026 InterpolateRow = InterpolateRow_SSSE3;
1027 }
1028 }
1029 #endif
1030 #if defined(HAS_INTERPOLATEROW_AVX2)
1031 if (TestCpuFlag(kCpuHasAVX2)) {
1032 InterpolateRow = InterpolateRow_Any_AVX2;
1033 if (IS_ALIGNED(src_width, 32)) {
1034 InterpolateRow = InterpolateRow_AVX2;
1035 }
1036 }
1037 #endif
1038 #if defined(HAS_INTERPOLATEROW_NEON)
1039 if (TestCpuFlag(kCpuHasNEON)) {
1040 InterpolateRow = InterpolateRow_Any_NEON;
1041 if (IS_ALIGNED(src_width, 16)) {
1042 InterpolateRow = InterpolateRow_NEON;
1043 }
1044 }
1045 #endif
1046 #if defined(HAS_INTERPOLATEROW_MMI)
1047 if (TestCpuFlag(kCpuHasMMI)) {
1048 InterpolateRow = InterpolateRow_Any_MMI;
1049 if (IS_ALIGNED(src_width, 16)) {
1050 InterpolateRow = InterpolateRow_MMI;
1051 }
1052 }
1053 #endif
1054 #if defined(HAS_INTERPOLATEROW_MSA)
1055 if (TestCpuFlag(kCpuHasMSA)) {
1056 InterpolateRow = InterpolateRow_Any_MSA;
1057 if (IS_ALIGNED(src_width, 32)) {
1058 InterpolateRow = InterpolateRow_MSA;
1059 }
1060 }
1061 #endif
1062
1063 #if defined(HAS_SCALEFILTERCOLS_SSSE3)
1064 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1065 ScaleFilterCols = ScaleFilterCols_SSSE3;
1066 }
1067 #endif
1068 #if defined(HAS_SCALEFILTERCOLS_NEON)
1069 if (TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
1070 ScaleFilterCols = ScaleFilterCols_Any_NEON;
1071 if (IS_ALIGNED(dst_width, 8)) {
1072 ScaleFilterCols = ScaleFilterCols_NEON;
1073 }
1074 }
1075 #endif
1076 #if defined(HAS_SCALEFILTERCOLS_MSA)
1077 if (TestCpuFlag(kCpuHasMSA) && src_width < 32768) {
1078 ScaleFilterCols = ScaleFilterCols_Any_MSA;
1079 if (IS_ALIGNED(dst_width, 16)) {
1080 ScaleFilterCols = ScaleFilterCols_MSA;
1081 }
1082 }
1083 #endif
1084 if (y > max_y) {
1085 y = max_y;
1086 }
1087
1088 for (j = 0; j < dst_height; ++j) {
1089 int yi = y >> 16;
1090 const uint8_t* src = src_ptr + yi * src_stride;
1091 if (filtering == kFilterLinear) {
1092 ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
1093 } else {
1094 int yf = (y >> 8) & 255;
1095 InterpolateRow(row, src, src_stride, src_width, yf);
1096 ScaleFilterCols(dst_ptr, row, dst_width, x, dx);
1097 }
1098 dst_ptr += dst_stride;
1099 y += dy;
1100 if (y > max_y) {
1101 y = max_y;
1102 }
1103 }
1104 free_aligned_buffer_64(row);
1105 }
1106
ScalePlaneBilinearDown_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr,enum FilterMode filtering)1107 void ScalePlaneBilinearDown_16(int src_width,
1108 int src_height,
1109 int dst_width,
1110 int dst_height,
1111 int src_stride,
1112 int dst_stride,
1113 const uint16_t* src_ptr,
1114 uint16_t* dst_ptr,
1115 enum FilterMode filtering) {
1116 // Initial source x/y coordinate and step values as 16.16 fixed point.
1117 int x = 0;
1118 int y = 0;
1119 int dx = 0;
1120 int dy = 0;
1121 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
1122 // Allocate a row buffer.
1123 align_buffer_64(row, src_width * 2);
1124
1125 const int max_y = (src_height - 1) << 16;
1126 int j;
1127 void (*ScaleFilterCols)(uint16_t * dst_ptr, const uint16_t* src_ptr,
1128 int dst_width, int x, int dx) =
1129 (src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C;
1130 void (*InterpolateRow)(uint16_t * dst_ptr, const uint16_t* src_ptr,
1131 ptrdiff_t src_stride, int dst_width,
1132 int source_y_fraction) = InterpolateRow_16_C;
1133 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1134 &dx, &dy);
1135 src_width = Abs(src_width);
1136
1137 #if defined(HAS_INTERPOLATEROW_16_SSE2)
1138 if (TestCpuFlag(kCpuHasSSE2)) {
1139 InterpolateRow = InterpolateRow_Any_16_SSE2;
1140 if (IS_ALIGNED(src_width, 16)) {
1141 InterpolateRow = InterpolateRow_16_SSE2;
1142 }
1143 }
1144 #endif
1145 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
1146 if (TestCpuFlag(kCpuHasSSSE3)) {
1147 InterpolateRow = InterpolateRow_Any_16_SSSE3;
1148 if (IS_ALIGNED(src_width, 16)) {
1149 InterpolateRow = InterpolateRow_16_SSSE3;
1150 }
1151 }
1152 #endif
1153 #if defined(HAS_INTERPOLATEROW_16_AVX2)
1154 if (TestCpuFlag(kCpuHasAVX2)) {
1155 InterpolateRow = InterpolateRow_Any_16_AVX2;
1156 if (IS_ALIGNED(src_width, 32)) {
1157 InterpolateRow = InterpolateRow_16_AVX2;
1158 }
1159 }
1160 #endif
1161 #if defined(HAS_INTERPOLATEROW_16_NEON)
1162 if (TestCpuFlag(kCpuHasNEON)) {
1163 InterpolateRow = InterpolateRow_Any_16_NEON;
1164 if (IS_ALIGNED(src_width, 16)) {
1165 InterpolateRow = InterpolateRow_16_NEON;
1166 }
1167 }
1168 #endif
1169
1170 #if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
1171 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1172 ScaleFilterCols = ScaleFilterCols_16_SSSE3;
1173 }
1174 #endif
1175 if (y > max_y) {
1176 y = max_y;
1177 }
1178
1179 for (j = 0; j < dst_height; ++j) {
1180 int yi = y >> 16;
1181 const uint16_t* src = src_ptr + yi * src_stride;
1182 if (filtering == kFilterLinear) {
1183 ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
1184 } else {
1185 int yf = (y >> 8) & 255;
1186 InterpolateRow((uint16_t*)row, src, src_stride, src_width, yf);
1187 ScaleFilterCols(dst_ptr, (uint16_t*)row, dst_width, x, dx);
1188 }
1189 dst_ptr += dst_stride;
1190 y += dy;
1191 if (y > max_y) {
1192 y = max_y;
1193 }
1194 }
1195 free_aligned_buffer_64(row);
1196 }
1197
1198 // Scale up down with bilinear interpolation.
ScalePlaneBilinearUp(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr,enum FilterMode filtering)1199 void ScalePlaneBilinearUp(int src_width,
1200 int src_height,
1201 int dst_width,
1202 int dst_height,
1203 int src_stride,
1204 int dst_stride,
1205 const uint8_t* src_ptr,
1206 uint8_t* dst_ptr,
1207 enum FilterMode filtering) {
1208 int j;
1209 // Initial source x/y coordinate and step values as 16.16 fixed point.
1210 int x = 0;
1211 int y = 0;
1212 int dx = 0;
1213 int dy = 0;
1214 const int max_y = (src_height - 1) << 16;
1215 void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
1216 ptrdiff_t src_stride, int dst_width,
1217 int source_y_fraction) = InterpolateRow_C;
1218 void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr,
1219 int dst_width, int x, int dx) =
1220 filtering ? ScaleFilterCols_C : ScaleCols_C;
1221 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1222 &dx, &dy);
1223 src_width = Abs(src_width);
1224
1225 #if defined(HAS_INTERPOLATEROW_SSSE3)
1226 if (TestCpuFlag(kCpuHasSSSE3)) {
1227 InterpolateRow = InterpolateRow_Any_SSSE3;
1228 if (IS_ALIGNED(dst_width, 16)) {
1229 InterpolateRow = InterpolateRow_SSSE3;
1230 }
1231 }
1232 #endif
1233 #if defined(HAS_INTERPOLATEROW_AVX2)
1234 if (TestCpuFlag(kCpuHasAVX2)) {
1235 InterpolateRow = InterpolateRow_Any_AVX2;
1236 if (IS_ALIGNED(dst_width, 32)) {
1237 InterpolateRow = InterpolateRow_AVX2;
1238 }
1239 }
1240 #endif
1241 #if defined(HAS_INTERPOLATEROW_NEON)
1242 if (TestCpuFlag(kCpuHasNEON)) {
1243 InterpolateRow = InterpolateRow_Any_NEON;
1244 if (IS_ALIGNED(dst_width, 16)) {
1245 InterpolateRow = InterpolateRow_NEON;
1246 }
1247 }
1248 #endif
1249
1250 if (filtering && src_width >= 32768) {
1251 ScaleFilterCols = ScaleFilterCols64_C;
1252 }
1253 #if defined(HAS_SCALEFILTERCOLS_SSSE3)
1254 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1255 ScaleFilterCols = ScaleFilterCols_SSSE3;
1256 }
1257 #endif
1258 #if defined(HAS_SCALEFILTERCOLS_NEON)
1259 if (filtering && TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
1260 ScaleFilterCols = ScaleFilterCols_Any_NEON;
1261 if (IS_ALIGNED(dst_width, 8)) {
1262 ScaleFilterCols = ScaleFilterCols_NEON;
1263 }
1264 }
1265 #endif
1266 #if defined(HAS_SCALEFILTERCOLS_MSA)
1267 if (filtering && TestCpuFlag(kCpuHasMSA) && src_width < 32768) {
1268 ScaleFilterCols = ScaleFilterCols_Any_MSA;
1269 if (IS_ALIGNED(dst_width, 16)) {
1270 ScaleFilterCols = ScaleFilterCols_MSA;
1271 }
1272 }
1273 #endif
1274 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
1275 ScaleFilterCols = ScaleColsUp2_C;
1276 #if defined(HAS_SCALECOLS_SSE2)
1277 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1278 ScaleFilterCols = ScaleColsUp2_SSE2;
1279 }
1280 #endif
1281 #if defined(HAS_SCALECOLS_MMI)
1282 if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) {
1283 ScaleFilterCols = ScaleColsUp2_MMI;
1284 }
1285 #endif
1286 }
1287
1288 if (y > max_y) {
1289 y = max_y;
1290 }
1291 {
1292 int yi = y >> 16;
1293 const uint8_t* src = src_ptr + yi * src_stride;
1294
1295 // Allocate 2 row buffers.
1296 const int kRowSize = (dst_width + 31) & ~31;
1297 align_buffer_64(row, kRowSize * 2);
1298
1299 uint8_t* rowptr = row;
1300 int rowstride = kRowSize;
1301 int lasty = yi;
1302
1303 ScaleFilterCols(rowptr, src, dst_width, x, dx);
1304 if (src_height > 1) {
1305 src += src_stride;
1306 }
1307 ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
1308 src += src_stride;
1309
1310 for (j = 0; j < dst_height; ++j) {
1311 yi = y >> 16;
1312 if (yi != lasty) {
1313 if (y > max_y) {
1314 y = max_y;
1315 yi = y >> 16;
1316 src = src_ptr + yi * src_stride;
1317 }
1318 if (yi != lasty) {
1319 ScaleFilterCols(rowptr, src, dst_width, x, dx);
1320 rowptr += rowstride;
1321 rowstride = -rowstride;
1322 lasty = yi;
1323 src += src_stride;
1324 }
1325 }
1326 if (filtering == kFilterLinear) {
1327 InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
1328 } else {
1329 int yf = (y >> 8) & 255;
1330 InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
1331 }
1332 dst_ptr += dst_stride;
1333 y += dy;
1334 }
1335 free_aligned_buffer_64(row);
1336 }
1337 }
1338
ScalePlaneBilinearUp_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr,enum FilterMode filtering)1339 void ScalePlaneBilinearUp_16(int src_width,
1340 int src_height,
1341 int dst_width,
1342 int dst_height,
1343 int src_stride,
1344 int dst_stride,
1345 const uint16_t* src_ptr,
1346 uint16_t* dst_ptr,
1347 enum FilterMode filtering) {
1348 int j;
1349 // Initial source x/y coordinate and step values as 16.16 fixed point.
1350 int x = 0;
1351 int y = 0;
1352 int dx = 0;
1353 int dy = 0;
1354 const int max_y = (src_height - 1) << 16;
1355 void (*InterpolateRow)(uint16_t * dst_ptr, const uint16_t* src_ptr,
1356 ptrdiff_t src_stride, int dst_width,
1357 int source_y_fraction) = InterpolateRow_16_C;
1358 void (*ScaleFilterCols)(uint16_t * dst_ptr, const uint16_t* src_ptr,
1359 int dst_width, int x, int dx) =
1360 filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;
1361 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1362 &dx, &dy);
1363 src_width = Abs(src_width);
1364
1365 #if defined(HAS_INTERPOLATEROW_16_SSE2)
1366 if (TestCpuFlag(kCpuHasSSE2)) {
1367 InterpolateRow = InterpolateRow_Any_16_SSE2;
1368 if (IS_ALIGNED(dst_width, 16)) {
1369 InterpolateRow = InterpolateRow_16_SSE2;
1370 }
1371 }
1372 #endif
1373 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
1374 if (TestCpuFlag(kCpuHasSSSE3)) {
1375 InterpolateRow = InterpolateRow_Any_16_SSSE3;
1376 if (IS_ALIGNED(dst_width, 16)) {
1377 InterpolateRow = InterpolateRow_16_SSSE3;
1378 }
1379 }
1380 #endif
1381 #if defined(HAS_INTERPOLATEROW_16_AVX2)
1382 if (TestCpuFlag(kCpuHasAVX2)) {
1383 InterpolateRow = InterpolateRow_Any_16_AVX2;
1384 if (IS_ALIGNED(dst_width, 32)) {
1385 InterpolateRow = InterpolateRow_16_AVX2;
1386 }
1387 }
1388 #endif
1389 #if defined(HAS_INTERPOLATEROW_16_NEON)
1390 if (TestCpuFlag(kCpuHasNEON)) {
1391 InterpolateRow = InterpolateRow_Any_16_NEON;
1392 if (IS_ALIGNED(dst_width, 16)) {
1393 InterpolateRow = InterpolateRow_16_NEON;
1394 }
1395 }
1396 #endif
1397
1398 if (filtering && src_width >= 32768) {
1399 ScaleFilterCols = ScaleFilterCols64_16_C;
1400 }
1401 #if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
1402 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1403 ScaleFilterCols = ScaleFilterCols_16_SSSE3;
1404 }
1405 #endif
1406 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
1407 ScaleFilterCols = ScaleColsUp2_16_C;
1408 #if defined(HAS_SCALECOLS_16_SSE2)
1409 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1410 ScaleFilterCols = ScaleColsUp2_16_SSE2;
1411 }
1412 #endif
1413 #if defined(HAS_SCALECOLS_16_MMI)
1414 if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) {
1415 ScaleFilterCols = ScaleColsUp2_16_MMI;
1416 }
1417 #endif
1418 }
1419
1420 if (y > max_y) {
1421 y = max_y;
1422 }
1423 {
1424 int yi = y >> 16;
1425 const uint16_t* src = src_ptr + yi * src_stride;
1426
1427 // Allocate 2 row buffers.
1428 const int kRowSize = (dst_width + 31) & ~31;
1429 align_buffer_64(row, kRowSize * 4);
1430
1431 uint16_t* rowptr = (uint16_t*)row;
1432 int rowstride = kRowSize;
1433 int lasty = yi;
1434
1435 ScaleFilterCols(rowptr, src, dst_width, x, dx);
1436 if (src_height > 1) {
1437 src += src_stride;
1438 }
1439 ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
1440 src += src_stride;
1441
1442 for (j = 0; j < dst_height; ++j) {
1443 yi = y >> 16;
1444 if (yi != lasty) {
1445 if (y > max_y) {
1446 y = max_y;
1447 yi = y >> 16;
1448 src = src_ptr + yi * src_stride;
1449 }
1450 if (yi != lasty) {
1451 ScaleFilterCols(rowptr, src, dst_width, x, dx);
1452 rowptr += rowstride;
1453 rowstride = -rowstride;
1454 lasty = yi;
1455 src += src_stride;
1456 }
1457 }
1458 if (filtering == kFilterLinear) {
1459 InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
1460 } else {
1461 int yf = (y >> 8) & 255;
1462 InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
1463 }
1464 dst_ptr += dst_stride;
1465 y += dy;
1466 }
1467 free_aligned_buffer_64(row);
1468 }
1469 }
1470
1471 // Scale Plane to/from any dimensions, without interpolation.
1472 // Fixed point math is used for performance: The upper 16 bits
1473 // of x and dx is the integer part of the source position and
1474 // the lower 16 bits are the fixed decimal part.
1475
ScalePlaneSimple(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr)1476 static void ScalePlaneSimple(int src_width,
1477 int src_height,
1478 int dst_width,
1479 int dst_height,
1480 int src_stride,
1481 int dst_stride,
1482 const uint8_t* src_ptr,
1483 uint8_t* dst_ptr) {
1484 int i;
1485 void (*ScaleCols)(uint8_t * dst_ptr, const uint8_t* src_ptr, int dst_width,
1486 int x, int dx) = ScaleCols_C;
1487 // Initial source x/y coordinate and step values as 16.16 fixed point.
1488 int x = 0;
1489 int y = 0;
1490 int dx = 0;
1491 int dy = 0;
1492 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y,
1493 &dx, &dy);
1494 src_width = Abs(src_width);
1495
1496 if (src_width * 2 == dst_width && x < 0x8000) {
1497 ScaleCols = ScaleColsUp2_C;
1498 #if defined(HAS_SCALECOLS_SSE2)
1499 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1500 ScaleCols = ScaleColsUp2_SSE2;
1501 }
1502 #endif
1503 #if defined(HAS_SCALECOLS_MMI)
1504 if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) {
1505 ScaleCols = ScaleColsUp2_MMI;
1506 }
1507 #endif
1508 }
1509
1510 for (i = 0; i < dst_height; ++i) {
1511 ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
1512 dst_ptr += dst_stride;
1513 y += dy;
1514 }
1515 }
1516
ScalePlaneSimple_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr)1517 static void ScalePlaneSimple_16(int src_width,
1518 int src_height,
1519 int dst_width,
1520 int dst_height,
1521 int src_stride,
1522 int dst_stride,
1523 const uint16_t* src_ptr,
1524 uint16_t* dst_ptr) {
1525 int i;
1526 void (*ScaleCols)(uint16_t * dst_ptr, const uint16_t* src_ptr, int dst_width,
1527 int x, int dx) = ScaleCols_16_C;
1528 // Initial source x/y coordinate and step values as 16.16 fixed point.
1529 int x = 0;
1530 int y = 0;
1531 int dx = 0;
1532 int dy = 0;
1533 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y,
1534 &dx, &dy);
1535 src_width = Abs(src_width);
1536
1537 if (src_width * 2 == dst_width && x < 0x8000) {
1538 ScaleCols = ScaleColsUp2_16_C;
1539 #if defined(HAS_SCALECOLS_16_SSE2)
1540 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1541 ScaleCols = ScaleColsUp2_16_SSE2;
1542 }
1543 #endif
1544 #if defined(HAS_SCALECOLS_16_MMI)
1545 if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) {
1546 ScaleCols = ScaleColsUp2_16_MMI;
1547 }
1548 #endif
1549 }
1550
1551 for (i = 0; i < dst_height; ++i) {
1552 ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
1553 dst_ptr += dst_stride;
1554 y += dy;
1555 }
1556 }
1557
1558 // Scale a plane.
1559 // This function dispatches to a specialized scaler based on scale factor.
1560
1561 LIBYUV_API
ScalePlane(const uint8_t * src,int src_stride,int src_width,int src_height,uint8_t * dst,int dst_stride,int dst_width,int dst_height,enum FilterMode filtering)1562 void ScalePlane(const uint8_t* src,
1563 int src_stride,
1564 int src_width,
1565 int src_height,
1566 uint8_t* dst,
1567 int dst_stride,
1568 int dst_width,
1569 int dst_height,
1570 enum FilterMode filtering) {
1571 // Simplify filtering when possible.
1572 filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
1573 filtering);
1574
1575 // Negative height means invert the image.
1576 if (src_height < 0) {
1577 src_height = -src_height;
1578 src = src + (src_height - 1) * src_stride;
1579 src_stride = -src_stride;
1580 }
1581
1582 // Use specialized scales to improve performance for common resolutions.
1583 // For example, all the 1/2 scalings will use ScalePlaneDown2()
1584 if (dst_width == src_width && dst_height == src_height) {
1585 // Straight copy.
1586 CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
1587 return;
1588 }
1589 if (dst_width == src_width && filtering != kFilterBox) {
1590 int dy = FixedDiv(src_height, dst_height);
1591 // Arbitrary scale vertically, but unscaled horizontally.
1592 ScalePlaneVertical(src_height, dst_width, dst_height, src_stride,
1593 dst_stride, src, dst, 0, 0, dy, 1, filtering);
1594 return;
1595 }
1596 if (dst_width <= Abs(src_width) && dst_height <= src_height) {
1597 // Scale down.
1598 if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) {
1599 // optimized, 3/4
1600 ScalePlaneDown34(src_width, src_height, dst_width, dst_height, src_stride,
1601 dst_stride, src, dst, filtering);
1602 return;
1603 }
1604 if (2 * dst_width == src_width && 2 * dst_height == src_height) {
1605 // optimized, 1/2
1606 ScalePlaneDown2(src_width, src_height, dst_width, dst_height, src_stride,
1607 dst_stride, src, dst, filtering);
1608 return;
1609 }
1610 // 3/8 rounded up for odd sized chroma height.
1611 if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) {
1612 // optimized, 3/8
1613 ScalePlaneDown38(src_width, src_height, dst_width, dst_height, src_stride,
1614 dst_stride, src, dst, filtering);
1615 return;
1616 }
1617 if (4 * dst_width == src_width && 4 * dst_height == src_height &&
1618 (filtering == kFilterBox || filtering == kFilterNone)) {
1619 // optimized, 1/4
1620 ScalePlaneDown4(src_width, src_height, dst_width, dst_height, src_stride,
1621 dst_stride, src, dst, filtering);
1622 return;
1623 }
1624 }
1625 if (filtering == kFilterBox && dst_height * 2 < src_height) {
1626 ScalePlaneBox(src_width, src_height, dst_width, dst_height, src_stride,
1627 dst_stride, src, dst);
1628 return;
1629 }
1630 if (filtering && dst_height > src_height) {
1631 ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
1632 src_stride, dst_stride, src, dst, filtering);
1633 return;
1634 }
1635 if (filtering) {
1636 ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
1637 src_stride, dst_stride, src, dst, filtering);
1638 return;
1639 }
1640 ScalePlaneSimple(src_width, src_height, dst_width, dst_height, src_stride,
1641 dst_stride, src, dst);
1642 }
1643
1644 LIBYUV_API
ScalePlane_16(const uint16_t * src,int src_stride,int src_width,int src_height,uint16_t * dst,int dst_stride,int dst_width,int dst_height,enum FilterMode filtering)1645 void ScalePlane_16(const uint16_t* src,
1646 int src_stride,
1647 int src_width,
1648 int src_height,
1649 uint16_t* dst,
1650 int dst_stride,
1651 int dst_width,
1652 int dst_height,
1653 enum FilterMode filtering) {
1654 // Simplify filtering when possible.
1655 filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
1656 filtering);
1657
1658 // Negative height means invert the image.
1659 if (src_height < 0) {
1660 src_height = -src_height;
1661 src = src + (src_height - 1) * src_stride;
1662 src_stride = -src_stride;
1663 }
1664
1665 // Use specialized scales to improve performance for common resolutions.
1666 // For example, all the 1/2 scalings will use ScalePlaneDown2()
1667 if (dst_width == src_width && dst_height == src_height) {
1668 // Straight copy.
1669 CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height);
1670 return;
1671 }
1672 if (dst_width == src_width && filtering != kFilterBox) {
1673 int dy = FixedDiv(src_height, dst_height);
1674 // Arbitrary scale vertically, but unscaled horizontally.
1675 ScalePlaneVertical_16(src_height, dst_width, dst_height, src_stride,
1676 dst_stride, src, dst, 0, 0, dy, 1, filtering);
1677 return;
1678 }
1679 if (dst_width <= Abs(src_width) && dst_height <= src_height) {
1680 // Scale down.
1681 if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) {
1682 // optimized, 3/4
1683 ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height,
1684 src_stride, dst_stride, src, dst, filtering);
1685 return;
1686 }
1687 if (2 * dst_width == src_width && 2 * dst_height == src_height) {
1688 // optimized, 1/2
1689 ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height,
1690 src_stride, dst_stride, src, dst, filtering);
1691 return;
1692 }
1693 // 3/8 rounded up for odd sized chroma height.
1694 if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) {
1695 // optimized, 3/8
1696 ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height,
1697 src_stride, dst_stride, src, dst, filtering);
1698 return;
1699 }
1700 if (4 * dst_width == src_width && 4 * dst_height == src_height &&
1701 (filtering == kFilterBox || filtering == kFilterNone)) {
1702 // optimized, 1/4
1703 ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height,
1704 src_stride, dst_stride, src, dst, filtering);
1705 return;
1706 }
1707 }
1708 if (filtering == kFilterBox && dst_height * 2 < src_height) {
1709 ScalePlaneBox_16(src_width, src_height, dst_width, dst_height, src_stride,
1710 dst_stride, src, dst);
1711 return;
1712 }
1713 if (filtering && dst_height > src_height) {
1714 ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height,
1715 src_stride, dst_stride, src, dst, filtering);
1716 return;
1717 }
1718 if (filtering) {
1719 ScalePlaneBilinearDown_16(src_width, src_height, dst_width, dst_height,
1720 src_stride, dst_stride, src, dst, filtering);
1721 return;
1722 }
1723 ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height, src_stride,
1724 dst_stride, src, dst);
1725 }
1726
1727 // Scale an I420 image.
1728 // This function in turn calls a scaling function for each plane.
1729
1730 LIBYUV_API
I420Scale(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,int src_width,int src_height,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int dst_width,int dst_height,enum FilterMode filtering)1731 int I420Scale(const uint8_t* src_y,
1732 int src_stride_y,
1733 const uint8_t* src_u,
1734 int src_stride_u,
1735 const uint8_t* src_v,
1736 int src_stride_v,
1737 int src_width,
1738 int src_height,
1739 uint8_t* dst_y,
1740 int dst_stride_y,
1741 uint8_t* dst_u,
1742 int dst_stride_u,
1743 uint8_t* dst_v,
1744 int dst_stride_v,
1745 int dst_width,
1746 int dst_height,
1747 enum FilterMode filtering) {
1748 int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
1749 int src_halfheight = SUBSAMPLE(src_height, 1, 1);
1750 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
1751 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
1752 if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
1753 src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
1754 dst_width <= 0 || dst_height <= 0) {
1755 return -1;
1756 }
1757
1758 ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
1759 dst_width, dst_height, filtering);
1760 ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
1761 dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
1762 ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
1763 dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
1764 return 0;
1765 }
1766
1767 LIBYUV_API
I420Scale_16(const uint16_t * src_y,int src_stride_y,const uint16_t * src_u,int src_stride_u,const uint16_t * src_v,int src_stride_v,int src_width,int src_height,uint16_t * dst_y,int dst_stride_y,uint16_t * dst_u,int dst_stride_u,uint16_t * dst_v,int dst_stride_v,int dst_width,int dst_height,enum FilterMode filtering)1768 int I420Scale_16(const uint16_t* src_y,
1769 int src_stride_y,
1770 const uint16_t* src_u,
1771 int src_stride_u,
1772 const uint16_t* src_v,
1773 int src_stride_v,
1774 int src_width,
1775 int src_height,
1776 uint16_t* dst_y,
1777 int dst_stride_y,
1778 uint16_t* dst_u,
1779 int dst_stride_u,
1780 uint16_t* dst_v,
1781 int dst_stride_v,
1782 int dst_width,
1783 int dst_height,
1784 enum FilterMode filtering) {
1785 int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
1786 int src_halfheight = SUBSAMPLE(src_height, 1, 1);
1787 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
1788 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
1789 if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
1790 src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
1791 dst_width <= 0 || dst_height <= 0) {
1792 return -1;
1793 }
1794
1795 ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
1796 dst_width, dst_height, filtering);
1797 ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
1798 dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
1799 ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
1800 dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
1801 return 0;
1802 }
1803
1804 // Scale an I444 image.
1805 // This function in turn calls a scaling function for each plane.
1806
1807 LIBYUV_API
I444Scale(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,int src_width,int src_height,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int dst_width,int dst_height,enum FilterMode filtering)1808 int I444Scale(const uint8_t* src_y,
1809 int src_stride_y,
1810 const uint8_t* src_u,
1811 int src_stride_u,
1812 const uint8_t* src_v,
1813 int src_stride_v,
1814 int src_width,
1815 int src_height,
1816 uint8_t* dst_y,
1817 int dst_stride_y,
1818 uint8_t* dst_u,
1819 int dst_stride_u,
1820 uint8_t* dst_v,
1821 int dst_stride_v,
1822 int dst_width,
1823 int dst_height,
1824 enum FilterMode filtering) {
1825 if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
1826 src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
1827 dst_width <= 0 || dst_height <= 0) {
1828 return -1;
1829 }
1830
1831 ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
1832 dst_width, dst_height, filtering);
1833 ScalePlane(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u,
1834 dst_width, dst_height, filtering);
1835 ScalePlane(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v,
1836 dst_width, dst_height, filtering);
1837 return 0;
1838 }
1839
1840 LIBYUV_API
I444Scale_16(const uint16_t * src_y,int src_stride_y,const uint16_t * src_u,int src_stride_u,const uint16_t * src_v,int src_stride_v,int src_width,int src_height,uint16_t * dst_y,int dst_stride_y,uint16_t * dst_u,int dst_stride_u,uint16_t * dst_v,int dst_stride_v,int dst_width,int dst_height,enum FilterMode filtering)1841 int I444Scale_16(const uint16_t* src_y,
1842 int src_stride_y,
1843 const uint16_t* src_u,
1844 int src_stride_u,
1845 const uint16_t* src_v,
1846 int src_stride_v,
1847 int src_width,
1848 int src_height,
1849 uint16_t* dst_y,
1850 int dst_stride_y,
1851 uint16_t* dst_u,
1852 int dst_stride_u,
1853 uint16_t* dst_v,
1854 int dst_stride_v,
1855 int dst_width,
1856 int dst_height,
1857 enum FilterMode filtering) {
1858 if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
1859 src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
1860 dst_width <= 0 || dst_height <= 0) {
1861 return -1;
1862 }
1863
1864 ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
1865 dst_width, dst_height, filtering);
1866 ScalePlane_16(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u,
1867 dst_width, dst_height, filtering);
1868 ScalePlane_16(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v,
1869 dst_width, dst_height, filtering);
1870 return 0;
1871 }
1872
1873 // Scale an NV12 image.
1874 // This function in turn calls a scaling function for each plane.
1875
1876 LIBYUV_API
NV12Scale(const uint8_t * src_y,int src_stride_y,const uint8_t * src_uv,int src_stride_uv,int src_width,int src_height,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_uv,int dst_stride_uv,int dst_width,int dst_height,enum FilterMode filtering)1877 int NV12Scale(const uint8_t* src_y,
1878 int src_stride_y,
1879 const uint8_t* src_uv,
1880 int src_stride_uv,
1881 int src_width,
1882 int src_height,
1883 uint8_t* dst_y,
1884 int dst_stride_y,
1885 uint8_t* dst_uv,
1886 int dst_stride_uv,
1887 int dst_width,
1888 int dst_height,
1889 enum FilterMode filtering) {
1890 int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
1891 int src_halfheight = SUBSAMPLE(src_height, 1, 1);
1892 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
1893 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
1894 if (!src_y || !src_uv || src_width == 0 || src_height == 0 ||
1895 src_width > 32768 || src_height > 32768 || !dst_y || !dst_uv ||
1896 dst_width <= 0 || dst_height <= 0) {
1897 return -1;
1898 }
1899
1900 ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
1901 dst_width, dst_height, filtering);
1902 UVScale(src_uv, src_stride_uv, src_halfwidth, src_halfheight, dst_uv,
1903 dst_stride_uv, dst_halfwidth, dst_halfheight, filtering);
1904 return 0;
1905 }
1906
1907 // Deprecated api
1908 LIBYUV_API
Scale(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,int src_stride_y,int src_stride_u,int src_stride_v,int src_width,int src_height,uint8_t * dst_y,uint8_t * dst_u,uint8_t * dst_v,int dst_stride_y,int dst_stride_u,int dst_stride_v,int dst_width,int dst_height,LIBYUV_BOOL interpolate)1909 int Scale(const uint8_t* src_y,
1910 const uint8_t* src_u,
1911 const uint8_t* src_v,
1912 int src_stride_y,
1913 int src_stride_u,
1914 int src_stride_v,
1915 int src_width,
1916 int src_height,
1917 uint8_t* dst_y,
1918 uint8_t* dst_u,
1919 uint8_t* dst_v,
1920 int dst_stride_y,
1921 int dst_stride_u,
1922 int dst_stride_v,
1923 int dst_width,
1924 int dst_height,
1925 LIBYUV_BOOL interpolate) {
1926 return I420Scale(src_y, src_stride_y, src_u, src_stride_u, src_v,
1927 src_stride_v, src_width, src_height, dst_y, dst_stride_y,
1928 dst_u, dst_stride_u, dst_v, dst_stride_v, dst_width,
1929 dst_height, interpolate ? kFilterBox : kFilterNone);
1930 }
1931
1932 #ifdef __cplusplus
1933 } // extern "C"
1934 } // namespace libyuv
1935 #endif
1936