1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/scale.h"
12
13 #include <assert.h>
14 #include <string.h>
15
16 #include "libyuv/cpu_id.h"
17 #include "libyuv/planar_functions.h" // For CopyPlane
18 #include "libyuv/row.h"
19 #include "libyuv/scale_row.h"
20
21 #ifdef __cplusplus
22 namespace libyuv {
23 extern "C" {
24 #endif
25
Abs(int v)26 static __inline int Abs(int v) {
27 return v >= 0 ? v : -v;
28 }
29
30 #define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
31
32 // Scale plane, 1/2
33 // This is an optimized version for scaling down a plane to 1/2 of
34 // its original size.
35
ScalePlaneDown2(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8 * src_ptr,uint8 * dst_ptr,enum FilterMode filtering)36 static void ScalePlaneDown2(int src_width, int src_height,
37 int dst_width, int dst_height,
38 int src_stride, int dst_stride,
39 const uint8* src_ptr, uint8* dst_ptr,
40 enum FilterMode filtering) {
41 int y;
42 void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
43 uint8* dst_ptr, int dst_width) =
44 filtering == kFilterNone ? ScaleRowDown2_C :
45 (filtering == kFilterLinear ? ScaleRowDown2Linear_C : ScaleRowDown2Box_C);
46 int row_stride = src_stride << 1;
47 if (!filtering) {
48 src_ptr += src_stride; // Point to odd rows.
49 src_stride = 0;
50 }
51
52 #if defined(HAS_SCALEROWDOWN2_NEON)
53 if (TestCpuFlag(kCpuHasNEON)) {
54 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_NEON :
55 (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_NEON :
56 ScaleRowDown2Box_Any_NEON);
57 if (IS_ALIGNED(dst_width, 16)) {
58 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON :
59 (filtering == kFilterLinear ? ScaleRowDown2Linear_NEON :
60 ScaleRowDown2Box_NEON);
61 }
62 }
63 #endif
64 #if defined(HAS_SCALEROWDOWN2_SSE2)
65 if (TestCpuFlag(kCpuHasSSE2)) {
66 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_SSE2 :
67 (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSE2 :
68 ScaleRowDown2Box_Any_SSE2);
69 if (IS_ALIGNED(dst_width, 16)) {
70 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 :
71 (filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 :
72 ScaleRowDown2Box_SSE2);
73 }
74 }
75 #endif
76 #if defined(HAS_SCALEROWDOWN2_AVX2)
77 if (TestCpuFlag(kCpuHasAVX2)) {
78 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_AVX2 :
79 (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_AVX2 :
80 ScaleRowDown2Box_Any_AVX2);
81 if (IS_ALIGNED(dst_width, 32)) {
82 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_AVX2 :
83 (filtering == kFilterLinear ? ScaleRowDown2Linear_AVX2 :
84 ScaleRowDown2Box_AVX2);
85 }
86 }
87 #endif
88 #if defined(HAS_SCALEROWDOWN2_MIPS_DSPR2)
89 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
90 IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
91 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
92 ScaleRowDown2 = filtering ?
93 ScaleRowDown2Box_MIPS_DSPR2 : ScaleRowDown2_MIPS_DSPR2;
94 }
95 #endif
96
97 if (filtering == kFilterLinear) {
98 src_stride = 0;
99 }
100 // TODO(fbarchard): Loop through source height to allow odd height.
101 for (y = 0; y < dst_height; ++y) {
102 ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
103 src_ptr += row_stride;
104 dst_ptr += dst_stride;
105 }
106 }
107
ScalePlaneDown2_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16 * src_ptr,uint16 * dst_ptr,enum FilterMode filtering)108 static void ScalePlaneDown2_16(int src_width, int src_height,
109 int dst_width, int dst_height,
110 int src_stride, int dst_stride,
111 const uint16* src_ptr, uint16* dst_ptr,
112 enum FilterMode filtering) {
113 int y;
114 void (*ScaleRowDown2)(const uint16* src_ptr, ptrdiff_t src_stride,
115 uint16* dst_ptr, int dst_width) =
116 filtering == kFilterNone ? ScaleRowDown2_16_C :
117 (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C :
118 ScaleRowDown2Box_16_C);
119 int row_stride = src_stride << 1;
120 if (!filtering) {
121 src_ptr += src_stride; // Point to odd rows.
122 src_stride = 0;
123 }
124
125 #if defined(HAS_SCALEROWDOWN2_16_NEON)
126 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
127 ScaleRowDown2 = filtering ? ScaleRowDown2Box_16_NEON :
128 ScaleRowDown2_16_NEON;
129 }
130 #endif
131 #if defined(HAS_SCALEROWDOWN2_16_SSE2)
132 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
133 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_16_SSE2 :
134 (filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2 :
135 ScaleRowDown2Box_16_SSE2);
136 }
137 #endif
138 #if defined(HAS_SCALEROWDOWN2_16_MIPS_DSPR2)
139 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
140 IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
141 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
142 ScaleRowDown2 = filtering ?
143 ScaleRowDown2Box_16_MIPS_DSPR2 : ScaleRowDown2_16_MIPS_DSPR2;
144 }
145 #endif
146
147 if (filtering == kFilterLinear) {
148 src_stride = 0;
149 }
150 // TODO(fbarchard): Loop through source height to allow odd height.
151 for (y = 0; y < dst_height; ++y) {
152 ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
153 src_ptr += row_stride;
154 dst_ptr += dst_stride;
155 }
156 }
157
158 // Scale plane, 1/4
159 // This is an optimized version for scaling down a plane to 1/4 of
160 // its original size.
161
ScalePlaneDown4(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8 * src_ptr,uint8 * dst_ptr,enum FilterMode filtering)162 static void ScalePlaneDown4(int src_width, int src_height,
163 int dst_width, int dst_height,
164 int src_stride, int dst_stride,
165 const uint8* src_ptr, uint8* dst_ptr,
166 enum FilterMode filtering) {
167 int y;
168 void (*ScaleRowDown4)(const uint8* src_ptr, ptrdiff_t src_stride,
169 uint8* dst_ptr, int dst_width) =
170 filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
171 int row_stride = src_stride << 2;
172 if (!filtering) {
173 src_ptr += src_stride * 2; // Point to row 2.
174 src_stride = 0;
175 }
176 #if defined(HAS_SCALEROWDOWN4_NEON)
177 if (TestCpuFlag(kCpuHasNEON)) {
178 ScaleRowDown4 = filtering ?
179 ScaleRowDown4Box_Any_NEON : ScaleRowDown4_Any_NEON;
180 if (IS_ALIGNED(dst_width, 8)) {
181 ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
182 }
183 }
184 #endif
185 #if defined(HAS_SCALEROWDOWN4_SSE2)
186 if (TestCpuFlag(kCpuHasSSE2)) {
187 ScaleRowDown4 = filtering ?
188 ScaleRowDown4Box_Any_SSE2 : ScaleRowDown4_Any_SSE2;
189 if (IS_ALIGNED(dst_width, 8)) {
190 ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2;
191 }
192 }
193 #endif
194 #if defined(HAS_SCALEROWDOWN4_AVX2)
195 if (TestCpuFlag(kCpuHasAVX2)) {
196 ScaleRowDown4 = filtering ?
197 ScaleRowDown4Box_Any_AVX2 : ScaleRowDown4_Any_AVX2;
198 if (IS_ALIGNED(dst_width, 16)) {
199 ScaleRowDown4 = filtering ? ScaleRowDown4Box_AVX2 : ScaleRowDown4_AVX2;
200 }
201 }
202 #endif
203 #if defined(HAS_SCALEROWDOWN4_MIPS_DSPR2)
204 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) &&
205 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
206 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
207 ScaleRowDown4 = filtering ?
208 ScaleRowDown4Box_MIPS_DSPR2 : ScaleRowDown4_MIPS_DSPR2;
209 }
210 #endif
211
212 if (filtering == kFilterLinear) {
213 src_stride = 0;
214 }
215 for (y = 0; y < dst_height; ++y) {
216 ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
217 src_ptr += row_stride;
218 dst_ptr += dst_stride;
219 }
220 }
221
ScalePlaneDown4_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16 * src_ptr,uint16 * dst_ptr,enum FilterMode filtering)222 static void ScalePlaneDown4_16(int src_width, int src_height,
223 int dst_width, int dst_height,
224 int src_stride, int dst_stride,
225 const uint16* src_ptr, uint16* dst_ptr,
226 enum FilterMode filtering) {
227 int y;
228 void (*ScaleRowDown4)(const uint16* src_ptr, ptrdiff_t src_stride,
229 uint16* dst_ptr, int dst_width) =
230 filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C;
231 int row_stride = src_stride << 2;
232 if (!filtering) {
233 src_ptr += src_stride * 2; // Point to row 2.
234 src_stride = 0;
235 }
236 #if defined(HAS_SCALEROWDOWN4_16_NEON)
237 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
238 ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_NEON :
239 ScaleRowDown4_16_NEON;
240 }
241 #endif
242 #if defined(HAS_SCALEROWDOWN4_16_SSE2)
243 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
244 ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_SSE2 :
245 ScaleRowDown4_16_SSE2;
246 }
247 #endif
248 #if defined(HAS_SCALEROWDOWN4_16_MIPS_DSPR2)
249 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) &&
250 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
251 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
252 ScaleRowDown4 = filtering ?
253 ScaleRowDown4Box_16_MIPS_DSPR2 : ScaleRowDown4_16_MIPS_DSPR2;
254 }
255 #endif
256
257 if (filtering == kFilterLinear) {
258 src_stride = 0;
259 }
260 for (y = 0; y < dst_height; ++y) {
261 ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
262 src_ptr += row_stride;
263 dst_ptr += dst_stride;
264 }
265 }
266
267 // Scale plane down, 3/4
268
ScalePlaneDown34(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8 * src_ptr,uint8 * dst_ptr,enum FilterMode filtering)269 static void ScalePlaneDown34(int src_width, int src_height,
270 int dst_width, int dst_height,
271 int src_stride, int dst_stride,
272 const uint8* src_ptr, uint8* dst_ptr,
273 enum FilterMode filtering) {
274 int y;
275 void (*ScaleRowDown34_0)(const uint8* src_ptr, ptrdiff_t src_stride,
276 uint8* dst_ptr, int dst_width);
277 void (*ScaleRowDown34_1)(const uint8* src_ptr, ptrdiff_t src_stride,
278 uint8* dst_ptr, int dst_width);
279 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
280 assert(dst_width % 3 == 0);
281 if (!filtering) {
282 ScaleRowDown34_0 = ScaleRowDown34_C;
283 ScaleRowDown34_1 = ScaleRowDown34_C;
284 } else {
285 ScaleRowDown34_0 = ScaleRowDown34_0_Box_C;
286 ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
287 }
288 #if defined(HAS_SCALEROWDOWN34_NEON)
289 if (TestCpuFlag(kCpuHasNEON)) {
290 if (!filtering) {
291 ScaleRowDown34_0 = ScaleRowDown34_Any_NEON;
292 ScaleRowDown34_1 = ScaleRowDown34_Any_NEON;
293 } else {
294 ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_NEON;
295 ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_NEON;
296 }
297 if (dst_width % 24 == 0) {
298 if (!filtering) {
299 ScaleRowDown34_0 = ScaleRowDown34_NEON;
300 ScaleRowDown34_1 = ScaleRowDown34_NEON;
301 } else {
302 ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;
303 ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
304 }
305 }
306 }
307 #endif
308 #if defined(HAS_SCALEROWDOWN34_SSSE3)
309 if (TestCpuFlag(kCpuHasSSSE3)) {
310 if (!filtering) {
311 ScaleRowDown34_0 = ScaleRowDown34_Any_SSSE3;
312 ScaleRowDown34_1 = ScaleRowDown34_Any_SSSE3;
313 } else {
314 ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_SSSE3;
315 ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_SSSE3;
316 }
317 if (dst_width % 24 == 0) {
318 if (!filtering) {
319 ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
320 ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
321 } else {
322 ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
323 ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
324 }
325 }
326 }
327 #endif
328 #if defined(HAS_SCALEROWDOWN34_MIPS_DSPR2)
329 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) &&
330 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
331 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
332 if (!filtering) {
333 ScaleRowDown34_0 = ScaleRowDown34_MIPS_DSPR2;
334 ScaleRowDown34_1 = ScaleRowDown34_MIPS_DSPR2;
335 } else {
336 ScaleRowDown34_0 = ScaleRowDown34_0_Box_MIPS_DSPR2;
337 ScaleRowDown34_1 = ScaleRowDown34_1_Box_MIPS_DSPR2;
338 }
339 }
340 #endif
341
342 for (y = 0; y < dst_height - 2; y += 3) {
343 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
344 src_ptr += src_stride;
345 dst_ptr += dst_stride;
346 ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
347 src_ptr += src_stride;
348 dst_ptr += dst_stride;
349 ScaleRowDown34_0(src_ptr + src_stride, -filter_stride,
350 dst_ptr, dst_width);
351 src_ptr += src_stride * 2;
352 dst_ptr += dst_stride;
353 }
354
355 // Remainder 1 or 2 rows with last row vertically unfiltered
356 if ((dst_height % 3) == 2) {
357 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
358 src_ptr += src_stride;
359 dst_ptr += dst_stride;
360 ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
361 } else if ((dst_height % 3) == 1) {
362 ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
363 }
364 }
365
ScalePlaneDown34_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16 * src_ptr,uint16 * dst_ptr,enum FilterMode filtering)366 static void ScalePlaneDown34_16(int src_width, int src_height,
367 int dst_width, int dst_height,
368 int src_stride, int dst_stride,
369 const uint16* src_ptr, uint16* dst_ptr,
370 enum FilterMode filtering) {
371 int y;
372 void (*ScaleRowDown34_0)(const uint16* src_ptr, ptrdiff_t src_stride,
373 uint16* dst_ptr, int dst_width);
374 void (*ScaleRowDown34_1)(const uint16* src_ptr, ptrdiff_t src_stride,
375 uint16* dst_ptr, int dst_width);
376 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
377 assert(dst_width % 3 == 0);
378 if (!filtering) {
379 ScaleRowDown34_0 = ScaleRowDown34_16_C;
380 ScaleRowDown34_1 = ScaleRowDown34_16_C;
381 } else {
382 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_C;
383 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_C;
384 }
385 #if defined(HAS_SCALEROWDOWN34_16_NEON)
386 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
387 if (!filtering) {
388 ScaleRowDown34_0 = ScaleRowDown34_16_NEON;
389 ScaleRowDown34_1 = ScaleRowDown34_16_NEON;
390 } else {
391 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_NEON;
392 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_NEON;
393 }
394 }
395 #endif
396 #if defined(HAS_SCALEROWDOWN34_16_SSSE3)
397 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
398 if (!filtering) {
399 ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3;
400 ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3;
401 } else {
402 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_SSSE3;
403 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_SSSE3;
404 }
405 }
406 #endif
407 #if defined(HAS_SCALEROWDOWN34_16_MIPS_DSPR2)
408 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) &&
409 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
410 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
411 if (!filtering) {
412 ScaleRowDown34_0 = ScaleRowDown34_16_MIPS_DSPR2;
413 ScaleRowDown34_1 = ScaleRowDown34_16_MIPS_DSPR2;
414 } else {
415 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_MIPS_DSPR2;
416 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_MIPS_DSPR2;
417 }
418 }
419 #endif
420
421 for (y = 0; y < dst_height - 2; y += 3) {
422 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
423 src_ptr += src_stride;
424 dst_ptr += dst_stride;
425 ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
426 src_ptr += src_stride;
427 dst_ptr += dst_stride;
428 ScaleRowDown34_0(src_ptr + src_stride, -filter_stride,
429 dst_ptr, dst_width);
430 src_ptr += src_stride * 2;
431 dst_ptr += dst_stride;
432 }
433
434 // Remainder 1 or 2 rows with last row vertically unfiltered
435 if ((dst_height % 3) == 2) {
436 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
437 src_ptr += src_stride;
438 dst_ptr += dst_stride;
439 ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
440 } else if ((dst_height % 3) == 1) {
441 ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
442 }
443 }
444
445
446 // Scale plane, 3/8
447 // This is an optimized version for scaling down a plane to 3/8
448 // of its original size.
449 //
450 // Uses box filter arranges like this
451 // aaabbbcc -> abc
452 // aaabbbcc def
453 // aaabbbcc ghi
454 // dddeeeff
455 // dddeeeff
456 // dddeeeff
457 // ggghhhii
458 // ggghhhii
459 // Boxes are 3x3, 2x3, 3x2 and 2x2
460
ScalePlaneDown38(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8 * src_ptr,uint8 * dst_ptr,enum FilterMode filtering)461 static void ScalePlaneDown38(int src_width, int src_height,
462 int dst_width, int dst_height,
463 int src_stride, int dst_stride,
464 const uint8* src_ptr, uint8* dst_ptr,
465 enum FilterMode filtering) {
466 int y;
467 void (*ScaleRowDown38_3)(const uint8* src_ptr, ptrdiff_t src_stride,
468 uint8* dst_ptr, int dst_width);
469 void (*ScaleRowDown38_2)(const uint8* src_ptr, ptrdiff_t src_stride,
470 uint8* dst_ptr, int dst_width);
471 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
472 assert(dst_width % 3 == 0);
473 if (!filtering) {
474 ScaleRowDown38_3 = ScaleRowDown38_C;
475 ScaleRowDown38_2 = ScaleRowDown38_C;
476 } else {
477 ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
478 ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
479 }
480
481 #if defined(HAS_SCALEROWDOWN38_NEON)
482 if (TestCpuFlag(kCpuHasNEON)) {
483 if (!filtering) {
484 ScaleRowDown38_3 = ScaleRowDown38_Any_NEON;
485 ScaleRowDown38_2 = ScaleRowDown38_Any_NEON;
486 } else {
487 ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_NEON;
488 ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_NEON;
489 }
490 if (dst_width % 12 == 0) {
491 if (!filtering) {
492 ScaleRowDown38_3 = ScaleRowDown38_NEON;
493 ScaleRowDown38_2 = ScaleRowDown38_NEON;
494 } else {
495 ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;
496 ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
497 }
498 }
499 }
500 #endif
501 #if defined(HAS_SCALEROWDOWN38_SSSE3)
502 if (TestCpuFlag(kCpuHasSSSE3)) {
503 if (!filtering) {
504 ScaleRowDown38_3 = ScaleRowDown38_Any_SSSE3;
505 ScaleRowDown38_2 = ScaleRowDown38_Any_SSSE3;
506 } else {
507 ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_SSSE3;
508 ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_SSSE3;
509 }
510 if (dst_width % 12 == 0 && !filtering) {
511 ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
512 ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
513 }
514 if (dst_width % 6 == 0 && filtering) {
515 ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
516 ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
517 }
518 }
519 #endif
520 #if defined(HAS_SCALEROWDOWN38_MIPS_DSPR2)
521 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) &&
522 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
523 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
524 if (!filtering) {
525 ScaleRowDown38_3 = ScaleRowDown38_MIPS_DSPR2;
526 ScaleRowDown38_2 = ScaleRowDown38_MIPS_DSPR2;
527 } else {
528 ScaleRowDown38_3 = ScaleRowDown38_3_Box_MIPS_DSPR2;
529 ScaleRowDown38_2 = ScaleRowDown38_2_Box_MIPS_DSPR2;
530 }
531 }
532 #endif
533
534 for (y = 0; y < dst_height - 2; y += 3) {
535 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
536 src_ptr += src_stride * 3;
537 dst_ptr += dst_stride;
538 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
539 src_ptr += src_stride * 3;
540 dst_ptr += dst_stride;
541 ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
542 src_ptr += src_stride * 2;
543 dst_ptr += dst_stride;
544 }
545
546 // Remainder 1 or 2 rows with last row vertically unfiltered
547 if ((dst_height % 3) == 2) {
548 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
549 src_ptr += src_stride * 3;
550 dst_ptr += dst_stride;
551 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
552 } else if ((dst_height % 3) == 1) {
553 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
554 }
555 }
556
ScalePlaneDown38_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16 * src_ptr,uint16 * dst_ptr,enum FilterMode filtering)557 static void ScalePlaneDown38_16(int src_width, int src_height,
558 int dst_width, int dst_height,
559 int src_stride, int dst_stride,
560 const uint16* src_ptr, uint16* dst_ptr,
561 enum FilterMode filtering) {
562 int y;
563 void (*ScaleRowDown38_3)(const uint16* src_ptr, ptrdiff_t src_stride,
564 uint16* dst_ptr, int dst_width);
565 void (*ScaleRowDown38_2)(const uint16* src_ptr, ptrdiff_t src_stride,
566 uint16* dst_ptr, int dst_width);
567 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
568 assert(dst_width % 3 == 0);
569 if (!filtering) {
570 ScaleRowDown38_3 = ScaleRowDown38_16_C;
571 ScaleRowDown38_2 = ScaleRowDown38_16_C;
572 } else {
573 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_C;
574 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_C;
575 }
576 #if defined(HAS_SCALEROWDOWN38_16_NEON)
577 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
578 if (!filtering) {
579 ScaleRowDown38_3 = ScaleRowDown38_16_NEON;
580 ScaleRowDown38_2 = ScaleRowDown38_16_NEON;
581 } else {
582 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_NEON;
583 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_NEON;
584 }
585 }
586 #endif
587 #if defined(HAS_SCALEROWDOWN38_16_SSSE3)
588 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
589 if (!filtering) {
590 ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3;
591 ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3;
592 } else {
593 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_SSSE3;
594 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_SSSE3;
595 }
596 }
597 #endif
598 #if defined(HAS_SCALEROWDOWN38_16_MIPS_DSPR2)
599 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) &&
600 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
601 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
602 if (!filtering) {
603 ScaleRowDown38_3 = ScaleRowDown38_16_MIPS_DSPR2;
604 ScaleRowDown38_2 = ScaleRowDown38_16_MIPS_DSPR2;
605 } else {
606 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_MIPS_DSPR2;
607 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_MIPS_DSPR2;
608 }
609 }
610 #endif
611
612 for (y = 0; y < dst_height - 2; y += 3) {
613 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
614 src_ptr += src_stride * 3;
615 dst_ptr += dst_stride;
616 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
617 src_ptr += src_stride * 3;
618 dst_ptr += dst_stride;
619 ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
620 src_ptr += src_stride * 2;
621 dst_ptr += dst_stride;
622 }
623
624 // Remainder 1 or 2 rows with last row vertically unfiltered
625 if ((dst_height % 3) == 2) {
626 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
627 src_ptr += src_stride * 3;
628 dst_ptr += dst_stride;
629 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
630 } else if ((dst_height % 3) == 1) {
631 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
632 }
633 }
634
635 #define MIN1(x) ((x) < 1 ? 1 : (x))
636
SumPixels(int iboxwidth,const uint16 * src_ptr)637 static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) {
638 uint32 sum = 0u;
639 int x;
640 assert(iboxwidth > 0);
641 for (x = 0; x < iboxwidth; ++x) {
642 sum += src_ptr[x];
643 }
644 return sum;
645 }
646
SumPixels_16(int iboxwidth,const uint32 * src_ptr)647 static __inline uint32 SumPixels_16(int iboxwidth, const uint32* src_ptr) {
648 uint32 sum = 0u;
649 int x;
650 assert(iboxwidth > 0);
651 for (x = 0; x < iboxwidth; ++x) {
652 sum += src_ptr[x];
653 }
654 return sum;
655 }
656
ScaleAddCols2_C(int dst_width,int boxheight,int x,int dx,const uint16 * src_ptr,uint8 * dst_ptr)657 static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx,
658 const uint16* src_ptr, uint8* dst_ptr) {
659 int i;
660 int scaletbl[2];
661 int minboxwidth = dx >> 16;
662 int* scaleptr = scaletbl - minboxwidth;
663 int boxwidth;
664 scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
665 scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
666 for (i = 0; i < dst_width; ++i) {
667 int ix = x >> 16;
668 x += dx;
669 boxwidth = MIN1((x >> 16) - ix);
670 *dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16;
671 }
672 }
673
ScaleAddCols2_16_C(int dst_width,int boxheight,int x,int dx,const uint32 * src_ptr,uint16 * dst_ptr)674 static void ScaleAddCols2_16_C(int dst_width, int boxheight, int x, int dx,
675 const uint32* src_ptr, uint16* dst_ptr) {
676 int i;
677 int scaletbl[2];
678 int minboxwidth = dx >> 16;
679 int* scaleptr = scaletbl - minboxwidth;
680 int boxwidth;
681 scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
682 scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
683 for (i = 0; i < dst_width; ++i) {
684 int ix = x >> 16;
685 x += dx;
686 boxwidth = MIN1((x >> 16) - ix);
687 *dst_ptr++ =
688 SumPixels_16(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16;
689 }
690 }
691
ScaleAddCols0_C(int dst_width,int boxheight,int x,int,const uint16 * src_ptr,uint8 * dst_ptr)692 static void ScaleAddCols0_C(int dst_width, int boxheight, int x, int,
693 const uint16* src_ptr, uint8* dst_ptr) {
694 int scaleval = 65536 / boxheight;
695 int i;
696 src_ptr += (x >> 16);
697 for (i = 0; i < dst_width; ++i) {
698 *dst_ptr++ = src_ptr[i] * scaleval >> 16;
699 }
700 }
701
ScaleAddCols1_C(int dst_width,int boxheight,int x,int dx,const uint16 * src_ptr,uint8 * dst_ptr)702 static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx,
703 const uint16* src_ptr, uint8* dst_ptr) {
704 int boxwidth = MIN1(dx >> 16);
705 int scaleval = 65536 / (boxwidth * boxheight);
706 int i;
707 x >>= 16;
708 for (i = 0; i < dst_width; ++i) {
709 *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
710 x += boxwidth;
711 }
712 }
713
ScaleAddCols1_16_C(int dst_width,int boxheight,int x,int dx,const uint32 * src_ptr,uint16 * dst_ptr)714 static void ScaleAddCols1_16_C(int dst_width, int boxheight, int x, int dx,
715 const uint32* src_ptr, uint16* dst_ptr) {
716 int boxwidth = MIN1(dx >> 16);
717 int scaleval = 65536 / (boxwidth * boxheight);
718 int i;
719 for (i = 0; i < dst_width; ++i) {
720 *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + x) * scaleval >> 16;
721 x += boxwidth;
722 }
723 }
724
725 // Scale plane down to any dimensions, with interpolation.
726 // (boxfilter).
727 //
728 // Same method as SimpleScale, which is fixed point, outputting
729 // one pixel of destination using fixed point (16.16) to step
730 // through source, sampling a box of pixel with simple
731 // averaging.
ScalePlaneBox(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8 * src_ptr,uint8 * dst_ptr)732 static void ScalePlaneBox(int src_width, int src_height,
733 int dst_width, int dst_height,
734 int src_stride, int dst_stride,
735 const uint8* src_ptr, uint8* dst_ptr) {
736 int j, k;
737 // Initial source x/y coordinate and step values as 16.16 fixed point.
738 int x = 0;
739 int y = 0;
740 int dx = 0;
741 int dy = 0;
742 const int max_y = (src_height << 16);
743 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
744 &x, &y, &dx, &dy);
745 src_width = Abs(src_width);
746 {
747 // Allocate a row buffer of uint16.
748 align_buffer_64(row16, src_width * 2);
749 void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
750 const uint16* src_ptr, uint8* dst_ptr) =
751 (dx & 0xffff) ? ScaleAddCols2_C:
752 ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C);
753 void (*ScaleAddRow)(const uint8* src_ptr, uint16* dst_ptr, int src_width) =
754 ScaleAddRow_C;
755 #if defined(HAS_SCALEADDROW_SSE2)
756 if (TestCpuFlag(kCpuHasSSE2)) {
757 ScaleAddRow = ScaleAddRow_Any_SSE2;
758 if (IS_ALIGNED(src_width, 16)) {
759 ScaleAddRow = ScaleAddRow_SSE2;
760 }
761 }
762 #endif
763 #if defined(HAS_SCALEADDROW_AVX2)
764 if (TestCpuFlag(kCpuHasAVX2)) {
765 ScaleAddRow = ScaleAddRow_Any_AVX2;
766 if (IS_ALIGNED(src_width, 32)) {
767 ScaleAddRow = ScaleAddRow_AVX2;
768 }
769 }
770 #endif
771 #if defined(HAS_SCALEADDROW_NEON)
772 if (TestCpuFlag(kCpuHasNEON)) {
773 ScaleAddRow = ScaleAddRow_Any_NEON;
774 if (IS_ALIGNED(src_width, 16)) {
775 ScaleAddRow = ScaleAddRow_NEON;
776 }
777 }
778 #endif
779
780 for (j = 0; j < dst_height; ++j) {
781 int boxheight;
782 int iy = y >> 16;
783 const uint8* src = src_ptr + iy * src_stride;
784 y += dy;
785 if (y > max_y) {
786 y = max_y;
787 }
788 boxheight = MIN1((y >> 16) - iy);
789 memset(row16, 0, src_width * 2);
790 for (k = 0; k < boxheight; ++k) {
791 ScaleAddRow(src, (uint16 *)(row16), src_width);
792 src += src_stride;
793 }
794 ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16), dst_ptr);
795 dst_ptr += dst_stride;
796 }
797 free_aligned_buffer_64(row16);
798 }
799 }
800
ScalePlaneBox_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16 * src_ptr,uint16 * dst_ptr)801 static void ScalePlaneBox_16(int src_width, int src_height,
802 int dst_width, int dst_height,
803 int src_stride, int dst_stride,
804 const uint16* src_ptr, uint16* dst_ptr) {
805 int j, k;
806 // Initial source x/y coordinate and step values as 16.16 fixed point.
807 int x = 0;
808 int y = 0;
809 int dx = 0;
810 int dy = 0;
811 const int max_y = (src_height << 16);
812 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
813 &x, &y, &dx, &dy);
814 src_width = Abs(src_width);
815 {
816 // Allocate a row buffer of uint32.
817 align_buffer_64(row32, src_width * 4);
818 void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
819 const uint32* src_ptr, uint16* dst_ptr) =
820 (dx & 0xffff) ? ScaleAddCols2_16_C: ScaleAddCols1_16_C;
821 void (*ScaleAddRow)(const uint16* src_ptr, uint32* dst_ptr, int src_width) =
822 ScaleAddRow_16_C;
823
824 #if defined(HAS_SCALEADDROW_16_SSE2)
825 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) {
826 ScaleAddRow = ScaleAddRow_16_SSE2;
827 }
828 #endif
829
830 for (j = 0; j < dst_height; ++j) {
831 int boxheight;
832 int iy = y >> 16;
833 const uint16* src = src_ptr + iy * src_stride;
834 y += dy;
835 if (y > max_y) {
836 y = max_y;
837 }
838 boxheight = MIN1((y >> 16) - iy);
839 memset(row32, 0, src_width * 4);
840 for (k = 0; k < boxheight; ++k) {
841 ScaleAddRow(src, (uint32 *)(row32), src_width);
842 src += src_stride;
843 }
844 ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32), dst_ptr);
845 dst_ptr += dst_stride;
846 }
847 free_aligned_buffer_64(row32);
848 }
849 }
850
851 // Scale plane down with bilinear interpolation.
ScalePlaneBilinearDown(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8 * src_ptr,uint8 * dst_ptr,enum FilterMode filtering)852 void ScalePlaneBilinearDown(int src_width, int src_height,
853 int dst_width, int dst_height,
854 int src_stride, int dst_stride,
855 const uint8* src_ptr, uint8* dst_ptr,
856 enum FilterMode filtering) {
857 // Initial source x/y coordinate and step values as 16.16 fixed point.
858 int x = 0;
859 int y = 0;
860 int dx = 0;
861 int dy = 0;
862 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
863 // Allocate a row buffer.
864 align_buffer_64(row, src_width);
865
866 const int max_y = (src_height - 1) << 16;
867 int j;
868 void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
869 int dst_width, int x, int dx) =
870 (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
871 void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
872 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
873 InterpolateRow_C;
874 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
875 &x, &y, &dx, &dy);
876 src_width = Abs(src_width);
877
878 #if defined(HAS_INTERPOLATEROW_SSE2)
879 if (TestCpuFlag(kCpuHasSSE2)) {
880 InterpolateRow = InterpolateRow_Any_SSE2;
881 if (IS_ALIGNED(src_width, 16)) {
882 InterpolateRow = InterpolateRow_SSE2;
883 }
884 }
885 #endif
886 #if defined(HAS_INTERPOLATEROW_SSSE3)
887 if (TestCpuFlag(kCpuHasSSSE3)) {
888 InterpolateRow = InterpolateRow_Any_SSSE3;
889 if (IS_ALIGNED(src_width, 16)) {
890 InterpolateRow = InterpolateRow_SSSE3;
891 }
892 }
893 #endif
894 #if defined(HAS_INTERPOLATEROW_AVX2)
895 if (TestCpuFlag(kCpuHasAVX2)) {
896 InterpolateRow = InterpolateRow_Any_AVX2;
897 if (IS_ALIGNED(src_width, 32)) {
898 InterpolateRow = InterpolateRow_AVX2;
899 }
900 }
901 #endif
902 #if defined(HAS_INTERPOLATEROW_NEON)
903 if (TestCpuFlag(kCpuHasNEON)) {
904 InterpolateRow = InterpolateRow_Any_NEON;
905 if (IS_ALIGNED(src_width, 16)) {
906 InterpolateRow = InterpolateRow_NEON;
907 }
908 }
909 #endif
910 #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
911 if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
912 InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
913 if (IS_ALIGNED(src_width, 4)) {
914 InterpolateRow = InterpolateRow_MIPS_DSPR2;
915 }
916 }
917 #endif
918
919
920 #if defined(HAS_SCALEFILTERCOLS_SSSE3)
921 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
922 ScaleFilterCols = ScaleFilterCols_SSSE3;
923 }
924 #endif
925 #if defined(HAS_SCALEFILTERCOLS_NEON)
926 if (TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
927 ScaleFilterCols = ScaleFilterCols_Any_NEON;
928 if (IS_ALIGNED(dst_width, 8)) {
929 ScaleFilterCols = ScaleFilterCols_NEON;
930 }
931 }
932 #endif
933 if (y > max_y) {
934 y = max_y;
935 }
936
937 for (j = 0; j < dst_height; ++j) {
938 int yi = y >> 16;
939 const uint8* src = src_ptr + yi * src_stride;
940 if (filtering == kFilterLinear) {
941 ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
942 } else {
943 int yf = (y >> 8) & 255;
944 InterpolateRow(row, src, src_stride, src_width, yf);
945 ScaleFilterCols(dst_ptr, row, dst_width, x, dx);
946 }
947 dst_ptr += dst_stride;
948 y += dy;
949 if (y > max_y) {
950 y = max_y;
951 }
952 }
953 free_aligned_buffer_64(row);
954 }
955
ScalePlaneBilinearDown_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16 * src_ptr,uint16 * dst_ptr,enum FilterMode filtering)956 void ScalePlaneBilinearDown_16(int src_width, int src_height,
957 int dst_width, int dst_height,
958 int src_stride, int dst_stride,
959 const uint16* src_ptr, uint16* dst_ptr,
960 enum FilterMode filtering) {
961 // Initial source x/y coordinate and step values as 16.16 fixed point.
962 int x = 0;
963 int y = 0;
964 int dx = 0;
965 int dy = 0;
966 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
967 // Allocate a row buffer.
968 align_buffer_64(row, src_width * 2);
969
970 const int max_y = (src_height - 1) << 16;
971 int j;
972 void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr,
973 int dst_width, int x, int dx) =
974 (src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C;
975 void (*InterpolateRow)(uint16* dst_ptr, const uint16* src_ptr,
976 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
977 InterpolateRow_16_C;
978 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
979 &x, &y, &dx, &dy);
980 src_width = Abs(src_width);
981
982 #if defined(HAS_INTERPOLATEROW_16_SSE2)
983 if (TestCpuFlag(kCpuHasSSE2)) {
984 InterpolateRow = InterpolateRow_Any_16_SSE2;
985 if (IS_ALIGNED(src_width, 16)) {
986 InterpolateRow = InterpolateRow_16_SSE2;
987 }
988 }
989 #endif
990 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
991 if (TestCpuFlag(kCpuHasSSSE3)) {
992 InterpolateRow = InterpolateRow_Any_16_SSSE3;
993 if (IS_ALIGNED(src_width, 16)) {
994 InterpolateRow = InterpolateRow_16_SSSE3;
995 }
996 }
997 #endif
998 #if defined(HAS_INTERPOLATEROW_16_AVX2)
999 if (TestCpuFlag(kCpuHasAVX2)) {
1000 InterpolateRow = InterpolateRow_Any_16_AVX2;
1001 if (IS_ALIGNED(src_width, 32)) {
1002 InterpolateRow = InterpolateRow_16_AVX2;
1003 }
1004 }
1005 #endif
1006 #if defined(HAS_INTERPOLATEROW_16_NEON)
1007 if (TestCpuFlag(kCpuHasNEON)) {
1008 InterpolateRow = InterpolateRow_Any_16_NEON;
1009 if (IS_ALIGNED(src_width, 16)) {
1010 InterpolateRow = InterpolateRow_16_NEON;
1011 }
1012 }
1013 #endif
1014 #if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)
1015 if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
1016 InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
1017 if (IS_ALIGNED(src_width, 4)) {
1018 InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
1019 }
1020 }
1021 #endif
1022
1023
1024 #if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
1025 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1026 ScaleFilterCols = ScaleFilterCols_16_SSSE3;
1027 }
1028 #endif
1029 if (y > max_y) {
1030 y = max_y;
1031 }
1032
1033 for (j = 0; j < dst_height; ++j) {
1034 int yi = y >> 16;
1035 const uint16* src = src_ptr + yi * src_stride;
1036 if (filtering == kFilterLinear) {
1037 ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
1038 } else {
1039 int yf = (y >> 8) & 255;
1040 InterpolateRow((uint16*)row, src, src_stride, src_width, yf);
1041 ScaleFilterCols(dst_ptr, (uint16*)row, dst_width, x, dx);
1042 }
1043 dst_ptr += dst_stride;
1044 y += dy;
1045 if (y > max_y) {
1046 y = max_y;
1047 }
1048 }
1049 free_aligned_buffer_64(row);
1050 }
1051
1052 // Scale up down with bilinear interpolation.
ScalePlaneBilinearUp(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8 * src_ptr,uint8 * dst_ptr,enum FilterMode filtering)1053 void ScalePlaneBilinearUp(int src_width, int src_height,
1054 int dst_width, int dst_height,
1055 int src_stride, int dst_stride,
1056 const uint8* src_ptr, uint8* dst_ptr,
1057 enum FilterMode filtering) {
1058 int j;
1059 // Initial source x/y coordinate and step values as 16.16 fixed point.
1060 int x = 0;
1061 int y = 0;
1062 int dx = 0;
1063 int dy = 0;
1064 const int max_y = (src_height - 1) << 16;
1065 void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
1066 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
1067 InterpolateRow_C;
1068 void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
1069 int dst_width, int x, int dx) =
1070 filtering ? ScaleFilterCols_C : ScaleCols_C;
1071 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
1072 &x, &y, &dx, &dy);
1073 src_width = Abs(src_width);
1074
1075 #if defined(HAS_INTERPOLATEROW_SSE2)
1076 if (TestCpuFlag(kCpuHasSSE2)) {
1077 InterpolateRow = InterpolateRow_Any_SSE2;
1078 if (IS_ALIGNED(dst_width, 16)) {
1079 InterpolateRow = InterpolateRow_SSE2;
1080 }
1081 }
1082 #endif
1083 #if defined(HAS_INTERPOLATEROW_SSSE3)
1084 if (TestCpuFlag(kCpuHasSSSE3)) {
1085 InterpolateRow = InterpolateRow_Any_SSSE3;
1086 if (IS_ALIGNED(dst_width, 16)) {
1087 InterpolateRow = InterpolateRow_SSSE3;
1088 }
1089 }
1090 #endif
1091 #if defined(HAS_INTERPOLATEROW_AVX2)
1092 if (TestCpuFlag(kCpuHasAVX2)) {
1093 InterpolateRow = InterpolateRow_Any_AVX2;
1094 if (IS_ALIGNED(dst_width, 32)) {
1095 InterpolateRow = InterpolateRow_AVX2;
1096 }
1097 }
1098 #endif
1099 #if defined(HAS_INTERPOLATEROW_NEON)
1100 if (TestCpuFlag(kCpuHasNEON)) {
1101 InterpolateRow = InterpolateRow_Any_NEON;
1102 if (IS_ALIGNED(dst_width, 16)) {
1103 InterpolateRow = InterpolateRow_NEON;
1104 }
1105 }
1106 #endif
1107 #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
1108 if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
1109 InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
1110 if (IS_ALIGNED(dst_width, 4)) {
1111 InterpolateRow = InterpolateRow_MIPS_DSPR2;
1112 }
1113 }
1114 #endif
1115
1116 if (filtering && src_width >= 32768) {
1117 ScaleFilterCols = ScaleFilterCols64_C;
1118 }
1119 #if defined(HAS_SCALEFILTERCOLS_SSSE3)
1120 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1121 ScaleFilterCols = ScaleFilterCols_SSSE3;
1122 }
1123 #endif
1124 #if defined(HAS_SCALEFILTERCOLS_NEON)
1125 if (filtering && TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
1126 ScaleFilterCols = ScaleFilterCols_Any_NEON;
1127 if (IS_ALIGNED(dst_width, 8)) {
1128 ScaleFilterCols = ScaleFilterCols_NEON;
1129 }
1130 }
1131 #endif
1132 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
1133 ScaleFilterCols = ScaleColsUp2_C;
1134 #if defined(HAS_SCALECOLS_SSE2)
1135 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1136 ScaleFilterCols = ScaleColsUp2_SSE2;
1137 }
1138 #endif
1139 }
1140
1141 if (y > max_y) {
1142 y = max_y;
1143 }
1144 {
1145 int yi = y >> 16;
1146 const uint8* src = src_ptr + yi * src_stride;
1147
1148 // Allocate 2 row buffers.
1149 const int kRowSize = (dst_width + 31) & ~31;
1150 align_buffer_64(row, kRowSize * 2);
1151
1152 uint8* rowptr = row;
1153 int rowstride = kRowSize;
1154 int lasty = yi;
1155
1156 ScaleFilterCols(rowptr, src, dst_width, x, dx);
1157 if (src_height > 1) {
1158 src += src_stride;
1159 }
1160 ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
1161 src += src_stride;
1162
1163 for (j = 0; j < dst_height; ++j) {
1164 yi = y >> 16;
1165 if (yi != lasty) {
1166 if (y > max_y) {
1167 y = max_y;
1168 yi = y >> 16;
1169 src = src_ptr + yi * src_stride;
1170 }
1171 if (yi != lasty) {
1172 ScaleFilterCols(rowptr, src, dst_width, x, dx);
1173 rowptr += rowstride;
1174 rowstride = -rowstride;
1175 lasty = yi;
1176 src += src_stride;
1177 }
1178 }
1179 if (filtering == kFilterLinear) {
1180 InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
1181 } else {
1182 int yf = (y >> 8) & 255;
1183 InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
1184 }
1185 dst_ptr += dst_stride;
1186 y += dy;
1187 }
1188 free_aligned_buffer_64(row);
1189 }
1190 }
1191
ScalePlaneBilinearUp_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16 * src_ptr,uint16 * dst_ptr,enum FilterMode filtering)1192 void ScalePlaneBilinearUp_16(int src_width, int src_height,
1193 int dst_width, int dst_height,
1194 int src_stride, int dst_stride,
1195 const uint16* src_ptr, uint16* dst_ptr,
1196 enum FilterMode filtering) {
1197 int j;
1198 // Initial source x/y coordinate and step values as 16.16 fixed point.
1199 int x = 0;
1200 int y = 0;
1201 int dx = 0;
1202 int dy = 0;
1203 const int max_y = (src_height - 1) << 16;
1204 void (*InterpolateRow)(uint16* dst_ptr, const uint16* src_ptr,
1205 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
1206 InterpolateRow_16_C;
1207 void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr,
1208 int dst_width, int x, int dx) =
1209 filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;
1210 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
1211 &x, &y, &dx, &dy);
1212 src_width = Abs(src_width);
1213
1214 #if defined(HAS_INTERPOLATEROW_16_SSE2)
1215 if (TestCpuFlag(kCpuHasSSE2)) {
1216 InterpolateRow = InterpolateRow_Any_16_SSE2;
1217 if (IS_ALIGNED(dst_width, 16)) {
1218 InterpolateRow = InterpolateRow_16_SSE2;
1219 }
1220 }
1221 #endif
1222 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
1223 if (TestCpuFlag(kCpuHasSSSE3)) {
1224 InterpolateRow = InterpolateRow_Any_16_SSSE3;
1225 if (IS_ALIGNED(dst_width, 16)) {
1226 InterpolateRow = InterpolateRow_16_SSSE3;
1227 }
1228 }
1229 #endif
1230 #if defined(HAS_INTERPOLATEROW_16_AVX2)
1231 if (TestCpuFlag(kCpuHasAVX2)) {
1232 InterpolateRow = InterpolateRow_Any_16_AVX2;
1233 if (IS_ALIGNED(dst_width, 32)) {
1234 InterpolateRow = InterpolateRow_16_AVX2;
1235 }
1236 }
1237 #endif
1238 #if defined(HAS_INTERPOLATEROW_16_NEON)
1239 if (TestCpuFlag(kCpuHasNEON)) {
1240 InterpolateRow = InterpolateRow_Any_16_NEON;
1241 if (IS_ALIGNED(dst_width, 16)) {
1242 InterpolateRow = InterpolateRow_16_NEON;
1243 }
1244 }
1245 #endif
1246 #if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)
1247 if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
1248 InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
1249 if (IS_ALIGNED(dst_width, 4)) {
1250 InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
1251 }
1252 }
1253 #endif
1254
1255 if (filtering && src_width >= 32768) {
1256 ScaleFilterCols = ScaleFilterCols64_16_C;
1257 }
1258 #if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
1259 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1260 ScaleFilterCols = ScaleFilterCols_16_SSSE3;
1261 }
1262 #endif
1263 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
1264 ScaleFilterCols = ScaleColsUp2_16_C;
1265 #if defined(HAS_SCALECOLS_16_SSE2)
1266 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1267 ScaleFilterCols = ScaleColsUp2_16_SSE2;
1268 }
1269 #endif
1270 }
1271
1272 if (y > max_y) {
1273 y = max_y;
1274 }
1275 {
1276 int yi = y >> 16;
1277 const uint16* src = src_ptr + yi * src_stride;
1278
1279 // Allocate 2 row buffers.
1280 const int kRowSize = (dst_width + 31) & ~31;
1281 align_buffer_64(row, kRowSize * 4);
1282
1283 uint16* rowptr = (uint16*)row;
1284 int rowstride = kRowSize;
1285 int lasty = yi;
1286
1287 ScaleFilterCols(rowptr, src, dst_width, x, dx);
1288 if (src_height > 1) {
1289 src += src_stride;
1290 }
1291 ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
1292 src += src_stride;
1293
1294 for (j = 0; j < dst_height; ++j) {
1295 yi = y >> 16;
1296 if (yi != lasty) {
1297 if (y > max_y) {
1298 y = max_y;
1299 yi = y >> 16;
1300 src = src_ptr + yi * src_stride;
1301 }
1302 if (yi != lasty) {
1303 ScaleFilterCols(rowptr, src, dst_width, x, dx);
1304 rowptr += rowstride;
1305 rowstride = -rowstride;
1306 lasty = yi;
1307 src += src_stride;
1308 }
1309 }
1310 if (filtering == kFilterLinear) {
1311 InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
1312 } else {
1313 int yf = (y >> 8) & 255;
1314 InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
1315 }
1316 dst_ptr += dst_stride;
1317 y += dy;
1318 }
1319 free_aligned_buffer_64(row);
1320 }
1321 }
1322
1323 // Scale Plane to/from any dimensions, without interpolation.
1324 // Fixed point math is used for performance: The upper 16 bits
1325 // of x and dx is the integer part of the source position and
1326 // the lower 16 bits are the fixed decimal part.
1327
ScalePlaneSimple(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8 * src_ptr,uint8 * dst_ptr)1328 static void ScalePlaneSimple(int src_width, int src_height,
1329 int dst_width, int dst_height,
1330 int src_stride, int dst_stride,
1331 const uint8* src_ptr, uint8* dst_ptr) {
1332 int i;
1333 void (*ScaleCols)(uint8* dst_ptr, const uint8* src_ptr,
1334 int dst_width, int x, int dx) = ScaleCols_C;
1335 // Initial source x/y coordinate and step values as 16.16 fixed point.
1336 int x = 0;
1337 int y = 0;
1338 int dx = 0;
1339 int dy = 0;
1340 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone,
1341 &x, &y, &dx, &dy);
1342 src_width = Abs(src_width);
1343
1344 if (src_width * 2 == dst_width && x < 0x8000) {
1345 ScaleCols = ScaleColsUp2_C;
1346 #if defined(HAS_SCALECOLS_SSE2)
1347 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1348 ScaleCols = ScaleColsUp2_SSE2;
1349 }
1350 #endif
1351 }
1352
1353 for (i = 0; i < dst_height; ++i) {
1354 ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
1355 dst_ptr += dst_stride;
1356 y += dy;
1357 }
1358 }
1359
ScalePlaneSimple_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16 * src_ptr,uint16 * dst_ptr)1360 static void ScalePlaneSimple_16(int src_width, int src_height,
1361 int dst_width, int dst_height,
1362 int src_stride, int dst_stride,
1363 const uint16* src_ptr, uint16* dst_ptr) {
1364 int i;
1365 void (*ScaleCols)(uint16* dst_ptr, const uint16* src_ptr,
1366 int dst_width, int x, int dx) = ScaleCols_16_C;
1367 // Initial source x/y coordinate and step values as 16.16 fixed point.
1368 int x = 0;
1369 int y = 0;
1370 int dx = 0;
1371 int dy = 0;
1372 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone,
1373 &x, &y, &dx, &dy);
1374 src_width = Abs(src_width);
1375
1376 if (src_width * 2 == dst_width && x < 0x8000) {
1377 ScaleCols = ScaleColsUp2_16_C;
1378 #if defined(HAS_SCALECOLS_16_SSE2)
1379 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1380 ScaleCols = ScaleColsUp2_16_SSE2;
1381 }
1382 #endif
1383 }
1384
1385 for (i = 0; i < dst_height; ++i) {
1386 ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride,
1387 dst_width, x, dx);
1388 dst_ptr += dst_stride;
1389 y += dy;
1390 }
1391 }
1392
1393 // Scale a plane.
1394 // This function dispatches to a specialized scaler based on scale factor.
1395
1396 LIBYUV_API
ScalePlane(const uint8 * src,int src_stride,int src_width,int src_height,uint8 * dst,int dst_stride,int dst_width,int dst_height,enum FilterMode filtering)1397 void ScalePlane(const uint8* src, int src_stride,
1398 int src_width, int src_height,
1399 uint8* dst, int dst_stride,
1400 int dst_width, int dst_height,
1401 enum FilterMode filtering) {
1402 // Simplify filtering when possible.
1403 filtering = ScaleFilterReduce(src_width, src_height,
1404 dst_width, dst_height, filtering);
1405
1406 // Negative height means invert the image.
1407 if (src_height < 0) {
1408 src_height = -src_height;
1409 src = src + (src_height - 1) * src_stride;
1410 src_stride = -src_stride;
1411 }
1412
1413 // Use specialized scales to improve performance for common resolutions.
1414 // For example, all the 1/2 scalings will use ScalePlaneDown2()
1415 if (dst_width == src_width && dst_height == src_height) {
1416 // Straight copy.
1417 CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
1418 return;
1419 }
1420 if (dst_width == src_width && filtering != kFilterBox) {
1421 int dy = FixedDiv(src_height, dst_height);
1422 // Arbitrary scale vertically, but unscaled horizontally.
1423 ScalePlaneVertical(src_height,
1424 dst_width, dst_height,
1425 src_stride, dst_stride, src, dst,
1426 0, 0, dy, 1, filtering);
1427 return;
1428 }
1429 if (dst_width <= Abs(src_width) && dst_height <= src_height) {
1430 // Scale down.
1431 if (4 * dst_width == 3 * src_width &&
1432 4 * dst_height == 3 * src_height) {
1433 // optimized, 3/4
1434 ScalePlaneDown34(src_width, src_height, dst_width, dst_height,
1435 src_stride, dst_stride, src, dst, filtering);
1436 return;
1437 }
1438 if (2 * dst_width == src_width && 2 * dst_height == src_height) {
1439 // optimized, 1/2
1440 ScalePlaneDown2(src_width, src_height, dst_width, dst_height,
1441 src_stride, dst_stride, src, dst, filtering);
1442 return;
1443 }
1444 // 3/8 rounded up for odd sized chroma height.
1445 if (8 * dst_width == 3 * src_width &&
1446 dst_height == ((src_height * 3 + 7) / 8)) {
1447 // optimized, 3/8
1448 ScalePlaneDown38(src_width, src_height, dst_width, dst_height,
1449 src_stride, dst_stride, src, dst, filtering);
1450 return;
1451 }
1452 if (4 * dst_width == src_width && 4 * dst_height == src_height &&
1453 (filtering == kFilterBox || filtering == kFilterNone)) {
1454 // optimized, 1/4
1455 ScalePlaneDown4(src_width, src_height, dst_width, dst_height,
1456 src_stride, dst_stride, src, dst, filtering);
1457 return;
1458 }
1459 }
1460 if (filtering == kFilterBox && dst_height * 2 < src_height) {
1461 ScalePlaneBox(src_width, src_height, dst_width, dst_height,
1462 src_stride, dst_stride, src, dst);
1463 return;
1464 }
1465 if (filtering && dst_height > src_height) {
1466 ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
1467 src_stride, dst_stride, src, dst, filtering);
1468 return;
1469 }
1470 if (filtering) {
1471 ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
1472 src_stride, dst_stride, src, dst, filtering);
1473 return;
1474 }
1475 ScalePlaneSimple(src_width, src_height, dst_width, dst_height,
1476 src_stride, dst_stride, src, dst);
1477 }
1478
1479 LIBYUV_API
ScalePlane_16(const uint16 * src,int src_stride,int src_width,int src_height,uint16 * dst,int dst_stride,int dst_width,int dst_height,enum FilterMode filtering)1480 void ScalePlane_16(const uint16* src, int src_stride,
1481 int src_width, int src_height,
1482 uint16* dst, int dst_stride,
1483 int dst_width, int dst_height,
1484 enum FilterMode filtering) {
1485 // Simplify filtering when possible.
1486 filtering = ScaleFilterReduce(src_width, src_height,
1487 dst_width, dst_height, filtering);
1488
1489 // Negative height means invert the image.
1490 if (src_height < 0) {
1491 src_height = -src_height;
1492 src = src + (src_height - 1) * src_stride;
1493 src_stride = -src_stride;
1494 }
1495
1496 // Use specialized scales to improve performance for common resolutions.
1497 // For example, all the 1/2 scalings will use ScalePlaneDown2()
1498 if (dst_width == src_width && dst_height == src_height) {
1499 // Straight copy.
1500 CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height);
1501 return;
1502 }
1503 if (dst_width == src_width) {
1504 int dy = FixedDiv(src_height, dst_height);
1505 // Arbitrary scale vertically, but unscaled vertically.
1506 ScalePlaneVertical_16(src_height,
1507 dst_width, dst_height,
1508 src_stride, dst_stride, src, dst,
1509 0, 0, dy, 1, filtering);
1510 return;
1511 }
1512 if (dst_width <= Abs(src_width) && dst_height <= src_height) {
1513 // Scale down.
1514 if (4 * dst_width == 3 * src_width &&
1515 4 * dst_height == 3 * src_height) {
1516 // optimized, 3/4
1517 ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height,
1518 src_stride, dst_stride, src, dst, filtering);
1519 return;
1520 }
1521 if (2 * dst_width == src_width && 2 * dst_height == src_height) {
1522 // optimized, 1/2
1523 ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height,
1524 src_stride, dst_stride, src, dst, filtering);
1525 return;
1526 }
1527 // 3/8 rounded up for odd sized chroma height.
1528 if (8 * dst_width == 3 * src_width &&
1529 dst_height == ((src_height * 3 + 7) / 8)) {
1530 // optimized, 3/8
1531 ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height,
1532 src_stride, dst_stride, src, dst, filtering);
1533 return;
1534 }
1535 if (4 * dst_width == src_width && 4 * dst_height == src_height &&
1536 filtering != kFilterBilinear) {
1537 // optimized, 1/4
1538 ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height,
1539 src_stride, dst_stride, src, dst, filtering);
1540 return;
1541 }
1542 }
1543 if (filtering == kFilterBox && dst_height * 2 < src_height) {
1544 ScalePlaneBox_16(src_width, src_height, dst_width, dst_height,
1545 src_stride, dst_stride, src, dst);
1546 return;
1547 }
1548 if (filtering && dst_height > src_height) {
1549 ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height,
1550 src_stride, dst_stride, src, dst, filtering);
1551 return;
1552 }
1553 if (filtering) {
1554 ScalePlaneBilinearDown_16(src_width, src_height, dst_width, dst_height,
1555 src_stride, dst_stride, src, dst, filtering);
1556 return;
1557 }
1558 ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height,
1559 src_stride, dst_stride, src, dst);
1560 }
1561
1562 // Scale an I420 image.
1563 // This function in turn calls a scaling function for each plane.
1564
1565 LIBYUV_API
I420Scale(const uint8 * src_y,int src_stride_y,const uint8 * src_u,int src_stride_u,const uint8 * src_v,int src_stride_v,int src_width,int src_height,uint8 * dst_y,int dst_stride_y,uint8 * dst_u,int dst_stride_u,uint8 * dst_v,int dst_stride_v,int dst_width,int dst_height,enum FilterMode filtering)1566 int I420Scale(const uint8* src_y, int src_stride_y,
1567 const uint8* src_u, int src_stride_u,
1568 const uint8* src_v, int src_stride_v,
1569 int src_width, int src_height,
1570 uint8* dst_y, int dst_stride_y,
1571 uint8* dst_u, int dst_stride_u,
1572 uint8* dst_v, int dst_stride_v,
1573 int dst_width, int dst_height,
1574 enum FilterMode filtering) {
1575 int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
1576 int src_halfheight = SUBSAMPLE(src_height, 1, 1);
1577 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
1578 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
1579 if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
1580 src_width > 32768 || src_height > 32768 ||
1581 !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
1582 return -1;
1583 }
1584
1585 ScalePlane(src_y, src_stride_y, src_width, src_height,
1586 dst_y, dst_stride_y, dst_width, dst_height,
1587 filtering);
1588 ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight,
1589 dst_u, dst_stride_u, dst_halfwidth, dst_halfheight,
1590 filtering);
1591 ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight,
1592 dst_v, dst_stride_v, dst_halfwidth, dst_halfheight,
1593 filtering);
1594 return 0;
1595 }
1596
1597 LIBYUV_API
I420Scale_16(const uint16 * src_y,int src_stride_y,const uint16 * src_u,int src_stride_u,const uint16 * src_v,int src_stride_v,int src_width,int src_height,uint16 * dst_y,int dst_stride_y,uint16 * dst_u,int dst_stride_u,uint16 * dst_v,int dst_stride_v,int dst_width,int dst_height,enum FilterMode filtering)1598 int I420Scale_16(const uint16* src_y, int src_stride_y,
1599 const uint16* src_u, int src_stride_u,
1600 const uint16* src_v, int src_stride_v,
1601 int src_width, int src_height,
1602 uint16* dst_y, int dst_stride_y,
1603 uint16* dst_u, int dst_stride_u,
1604 uint16* dst_v, int dst_stride_v,
1605 int dst_width, int dst_height,
1606 enum FilterMode filtering) {
1607 int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
1608 int src_halfheight = SUBSAMPLE(src_height, 1, 1);
1609 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
1610 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
1611 if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
1612 src_width > 32768 || src_height > 32768 ||
1613 !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
1614 return -1;
1615 }
1616
1617 ScalePlane_16(src_y, src_stride_y, src_width, src_height,
1618 dst_y, dst_stride_y, dst_width, dst_height,
1619 filtering);
1620 ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight,
1621 dst_u, dst_stride_u, dst_halfwidth, dst_halfheight,
1622 filtering);
1623 ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight,
1624 dst_v, dst_stride_v, dst_halfwidth, dst_halfheight,
1625 filtering);
1626 return 0;
1627 }
1628
1629 // Deprecated api
1630 LIBYUV_API
Scale(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,int src_stride_y,int src_stride_u,int src_stride_v,int src_width,int src_height,uint8 * dst_y,uint8 * dst_u,uint8 * dst_v,int dst_stride_y,int dst_stride_u,int dst_stride_v,int dst_width,int dst_height,LIBYUV_BOOL interpolate)1631 int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
1632 int src_stride_y, int src_stride_u, int src_stride_v,
1633 int src_width, int src_height,
1634 uint8* dst_y, uint8* dst_u, uint8* dst_v,
1635 int dst_stride_y, int dst_stride_u, int dst_stride_v,
1636 int dst_width, int dst_height,
1637 LIBYUV_BOOL interpolate) {
1638 return I420Scale(src_y, src_stride_y,
1639 src_u, src_stride_u,
1640 src_v, src_stride_v,
1641 src_width, src_height,
1642 dst_y, dst_stride_y,
1643 dst_u, dst_stride_u,
1644 dst_v, dst_stride_v,
1645 dst_width, dst_height,
1646 interpolate ? kFilterBox : kFilterNone);
1647 }
1648
1649 // Deprecated api
1650 LIBYUV_API
ScaleOffset(const uint8 * src,int src_width,int src_height,uint8 * dst,int dst_width,int dst_height,int dst_yoffset,LIBYUV_BOOL interpolate)1651 int ScaleOffset(const uint8* src, int src_width, int src_height,
1652 uint8* dst, int dst_width, int dst_height, int dst_yoffset,
1653 LIBYUV_BOOL interpolate) {
1654 // Chroma requires offset to multiple of 2.
1655 int dst_yoffset_even = dst_yoffset & ~1;
1656 int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
1657 int src_halfheight = SUBSAMPLE(src_height, 1, 1);
1658 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
1659 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
1660 int aheight = dst_height - dst_yoffset_even * 2; // actual output height
1661 const uint8* src_y = src;
1662 const uint8* src_u = src + src_width * src_height;
1663 const uint8* src_v = src + src_width * src_height +
1664 src_halfwidth * src_halfheight;
1665 uint8* dst_y = dst + dst_yoffset_even * dst_width;
1666 uint8* dst_u = dst + dst_width * dst_height +
1667 (dst_yoffset_even >> 1) * dst_halfwidth;
1668 uint8* dst_v = dst + dst_width * dst_height + dst_halfwidth * dst_halfheight +
1669 (dst_yoffset_even >> 1) * dst_halfwidth;
1670 if (!src || src_width <= 0 || src_height <= 0 ||
1671 !dst || dst_width <= 0 || dst_height <= 0 || dst_yoffset_even < 0 ||
1672 dst_yoffset_even >= dst_height) {
1673 return -1;
1674 }
1675 return I420Scale(src_y, src_width,
1676 src_u, src_halfwidth,
1677 src_v, src_halfwidth,
1678 src_width, src_height,
1679 dst_y, dst_width,
1680 dst_u, dst_halfwidth,
1681 dst_v, dst_halfwidth,
1682 dst_width, aheight,
1683 interpolate ? kFilterBox : kFilterNone);
1684 }
1685
1686 #ifdef __cplusplus
1687 } // extern "C"
1688 } // namespace libyuv
1689 #endif
1690