1 /*
2 * Copyright 2013 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/scale.h"
12
13 #include <assert.h>
14 #include <string.h>
15
16 #include "libyuv/cpu_id.h"
17 #include "libyuv/planar_functions.h" // For CopyARGB
18 #include "libyuv/row.h"
19 #include "libyuv/scale_row.h"
20
21 #ifdef __cplusplus
22 namespace libyuv {
23 extern "C" {
24 #endif
25
Abs(int v)26 static __inline int Abs(int v) {
27 return v >= 0 ? v : -v;
28 }
29
30 // CPU agnostic row functions
ScaleRowDown2_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * dst,int dst_width)31 void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride,
32 uint8* dst, int dst_width) {
33 int x;
34 for (x = 0; x < dst_width - 1; x += 2) {
35 dst[0] = src_ptr[1];
36 dst[1] = src_ptr[3];
37 dst += 2;
38 src_ptr += 4;
39 }
40 if (dst_width & 1) {
41 dst[0] = src_ptr[1];
42 }
43 }
44
ScaleRowDown2_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * dst,int dst_width)45 void ScaleRowDown2_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
46 uint16* dst, int dst_width) {
47 int x;
48 for (x = 0; x < dst_width - 1; x += 2) {
49 dst[0] = src_ptr[1];
50 dst[1] = src_ptr[3];
51 dst += 2;
52 src_ptr += 4;
53 }
54 if (dst_width & 1) {
55 dst[0] = src_ptr[1];
56 }
57 }
58
ScaleRowDown2Linear_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * dst,int dst_width)59 void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride,
60 uint8* dst, int dst_width) {
61 const uint8* s = src_ptr;
62 int x;
63 for (x = 0; x < dst_width - 1; x += 2) {
64 dst[0] = (s[0] + s[1] + 1) >> 1;
65 dst[1] = (s[2] + s[3] + 1) >> 1;
66 dst += 2;
67 s += 4;
68 }
69 if (dst_width & 1) {
70 dst[0] = (s[0] + s[1] + 1) >> 1;
71 }
72 }
73
ScaleRowDown2Linear_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * dst,int dst_width)74 void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
75 uint16* dst, int dst_width) {
76 const uint16* s = src_ptr;
77 int x;
78 for (x = 0; x < dst_width - 1; x += 2) {
79 dst[0] = (s[0] + s[1] + 1) >> 1;
80 dst[1] = (s[2] + s[3] + 1) >> 1;
81 dst += 2;
82 s += 4;
83 }
84 if (dst_width & 1) {
85 dst[0] = (s[0] + s[1] + 1) >> 1;
86 }
87 }
88
ScaleRowDown2Box_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * dst,int dst_width)89 void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
90 uint8* dst, int dst_width) {
91 const uint8* s = src_ptr;
92 const uint8* t = src_ptr + src_stride;
93 int x;
94 for (x = 0; x < dst_width - 1; x += 2) {
95 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
96 dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
97 dst += 2;
98 s += 4;
99 t += 4;
100 }
101 if (dst_width & 1) {
102 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
103 }
104 }
105
ScaleRowDown2Box_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * dst,int dst_width)106 void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
107 uint16* dst, int dst_width) {
108 const uint16* s = src_ptr;
109 const uint16* t = src_ptr + src_stride;
110 int x;
111 for (x = 0; x < dst_width - 1; x += 2) {
112 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
113 dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
114 dst += 2;
115 s += 4;
116 t += 4;
117 }
118 if (dst_width & 1) {
119 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
120 }
121 }
122
ScaleRowDown4_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * dst,int dst_width)123 void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,
124 uint8* dst, int dst_width) {
125 int x;
126 for (x = 0; x < dst_width - 1; x += 2) {
127 dst[0] = src_ptr[2];
128 dst[1] = src_ptr[6];
129 dst += 2;
130 src_ptr += 8;
131 }
132 if (dst_width & 1) {
133 dst[0] = src_ptr[2];
134 }
135 }
136
ScaleRowDown4_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * dst,int dst_width)137 void ScaleRowDown4_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
138 uint16* dst, int dst_width) {
139 int x;
140 for (x = 0; x < dst_width - 1; x += 2) {
141 dst[0] = src_ptr[2];
142 dst[1] = src_ptr[6];
143 dst += 2;
144 src_ptr += 8;
145 }
146 if (dst_width & 1) {
147 dst[0] = src_ptr[2];
148 }
149 }
150
ScaleRowDown4Box_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * dst,int dst_width)151 void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
152 uint8* dst, int dst_width) {
153 intptr_t stride = src_stride;
154 int x;
155 for (x = 0; x < dst_width - 1; x += 2) {
156 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
157 src_ptr[stride + 0] + src_ptr[stride + 1] +
158 src_ptr[stride + 2] + src_ptr[stride + 3] +
159 src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
160 src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
161 src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
162 src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
163 8) >> 4;
164 dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
165 src_ptr[stride + 4] + src_ptr[stride + 5] +
166 src_ptr[stride + 6] + src_ptr[stride + 7] +
167 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] +
168 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] +
169 src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] +
170 src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] +
171 8) >> 4;
172 dst += 2;
173 src_ptr += 8;
174 }
175 if (dst_width & 1) {
176 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
177 src_ptr[stride + 0] + src_ptr[stride + 1] +
178 src_ptr[stride + 2] + src_ptr[stride + 3] +
179 src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
180 src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
181 src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
182 src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
183 8) >> 4;
184 }
185 }
186
ScaleRowDown4Box_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * dst,int dst_width)187 void ScaleRowDown4Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
188 uint16* dst, int dst_width) {
189 intptr_t stride = src_stride;
190 int x;
191 for (x = 0; x < dst_width - 1; x += 2) {
192 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
193 src_ptr[stride + 0] + src_ptr[stride + 1] +
194 src_ptr[stride + 2] + src_ptr[stride + 3] +
195 src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
196 src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
197 src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
198 src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
199 8) >> 4;
200 dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
201 src_ptr[stride + 4] + src_ptr[stride + 5] +
202 src_ptr[stride + 6] + src_ptr[stride + 7] +
203 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] +
204 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] +
205 src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] +
206 src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] +
207 8) >> 4;
208 dst += 2;
209 src_ptr += 8;
210 }
211 if (dst_width & 1) {
212 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
213 src_ptr[stride + 0] + src_ptr[stride + 1] +
214 src_ptr[stride + 2] + src_ptr[stride + 3] +
215 src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
216 src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
217 src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
218 src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
219 8) >> 4;
220 }
221 }
222
ScaleRowDown34_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * dst,int dst_width)223 void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride,
224 uint8* dst, int dst_width) {
225 int x;
226 assert((dst_width % 3 == 0) && (dst_width > 0));
227 for (x = 0; x < dst_width; x += 3) {
228 dst[0] = src_ptr[0];
229 dst[1] = src_ptr[1];
230 dst[2] = src_ptr[3];
231 dst += 3;
232 src_ptr += 4;
233 }
234 }
235
ScaleRowDown34_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * dst,int dst_width)236 void ScaleRowDown34_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
237 uint16* dst, int dst_width) {
238 int x;
239 assert((dst_width % 3 == 0) && (dst_width > 0));
240 for (x = 0; x < dst_width; x += 3) {
241 dst[0] = src_ptr[0];
242 dst[1] = src_ptr[1];
243 dst[2] = src_ptr[3];
244 dst += 3;
245 src_ptr += 4;
246 }
247 }
248
249 // Filter rows 0 and 1 together, 3 : 1
ScaleRowDown34_0_Box_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * d,int dst_width)250 void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
251 uint8* d, int dst_width) {
252 const uint8* s = src_ptr;
253 const uint8* t = src_ptr + src_stride;
254 int x;
255 assert((dst_width % 3 == 0) && (dst_width > 0));
256 for (x = 0; x < dst_width; x += 3) {
257 uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
258 uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
259 uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
260 uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
261 uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
262 uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
263 d[0] = (a0 * 3 + b0 + 2) >> 2;
264 d[1] = (a1 * 3 + b1 + 2) >> 2;
265 d[2] = (a2 * 3 + b2 + 2) >> 2;
266 d += 3;
267 s += 4;
268 t += 4;
269 }
270 }
271
ScaleRowDown34_0_Box_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * d,int dst_width)272 void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
273 uint16* d, int dst_width) {
274 const uint16* s = src_ptr;
275 const uint16* t = src_ptr + src_stride;
276 int x;
277 assert((dst_width % 3 == 0) && (dst_width > 0));
278 for (x = 0; x < dst_width; x += 3) {
279 uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
280 uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
281 uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
282 uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
283 uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
284 uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
285 d[0] = (a0 * 3 + b0 + 2) >> 2;
286 d[1] = (a1 * 3 + b1 + 2) >> 2;
287 d[2] = (a2 * 3 + b2 + 2) >> 2;
288 d += 3;
289 s += 4;
290 t += 4;
291 }
292 }
293
294 // Filter rows 1 and 2 together, 1 : 1
ScaleRowDown34_1_Box_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * d,int dst_width)295 void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
296 uint8* d, int dst_width) {
297 const uint8* s = src_ptr;
298 const uint8* t = src_ptr + src_stride;
299 int x;
300 assert((dst_width % 3 == 0) && (dst_width > 0));
301 for (x = 0; x < dst_width; x += 3) {
302 uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
303 uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
304 uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
305 uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
306 uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
307 uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
308 d[0] = (a0 + b0 + 1) >> 1;
309 d[1] = (a1 + b1 + 1) >> 1;
310 d[2] = (a2 + b2 + 1) >> 1;
311 d += 3;
312 s += 4;
313 t += 4;
314 }
315 }
316
ScaleRowDown34_1_Box_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * d,int dst_width)317 void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
318 uint16* d, int dst_width) {
319 const uint16* s = src_ptr;
320 const uint16* t = src_ptr + src_stride;
321 int x;
322 assert((dst_width % 3 == 0) && (dst_width > 0));
323 for (x = 0; x < dst_width; x += 3) {
324 uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
325 uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
326 uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
327 uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
328 uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
329 uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
330 d[0] = (a0 + b0 + 1) >> 1;
331 d[1] = (a1 + b1 + 1) >> 1;
332 d[2] = (a2 + b2 + 1) >> 1;
333 d += 3;
334 s += 4;
335 t += 4;
336 }
337 }
338
339 // Scales a single row of pixels using point sampling.
ScaleCols_C(uint8 * dst_ptr,const uint8 * src_ptr,int dst_width,int x,int dx)340 void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr,
341 int dst_width, int x, int dx) {
342 int j;
343 for (j = 0; j < dst_width - 1; j += 2) {
344 dst_ptr[0] = src_ptr[x >> 16];
345 x += dx;
346 dst_ptr[1] = src_ptr[x >> 16];
347 x += dx;
348 dst_ptr += 2;
349 }
350 if (dst_width & 1) {
351 dst_ptr[0] = src_ptr[x >> 16];
352 }
353 }
354
ScaleCols_16_C(uint16 * dst_ptr,const uint16 * src_ptr,int dst_width,int x,int dx)355 void ScaleCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
356 int dst_width, int x, int dx) {
357 int j;
358 for (j = 0; j < dst_width - 1; j += 2) {
359 dst_ptr[0] = src_ptr[x >> 16];
360 x += dx;
361 dst_ptr[1] = src_ptr[x >> 16];
362 x += dx;
363 dst_ptr += 2;
364 }
365 if (dst_width & 1) {
366 dst_ptr[0] = src_ptr[x >> 16];
367 }
368 }
369
370 // Scales a single row of pixels up by 2x using point sampling.
ScaleColsUp2_C(uint8 * dst_ptr,const uint8 * src_ptr,int dst_width,int x,int dx)371 void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr,
372 int dst_width, int x, int dx) {
373 int j;
374 for (j = 0; j < dst_width - 1; j += 2) {
375 dst_ptr[1] = dst_ptr[0] = src_ptr[0];
376 src_ptr += 1;
377 dst_ptr += 2;
378 }
379 if (dst_width & 1) {
380 dst_ptr[0] = src_ptr[0];
381 }
382 }
383
ScaleColsUp2_16_C(uint16 * dst_ptr,const uint16 * src_ptr,int dst_width,int x,int dx)384 void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr,
385 int dst_width, int x, int dx) {
386 int j;
387 for (j = 0; j < dst_width - 1; j += 2) {
388 dst_ptr[1] = dst_ptr[0] = src_ptr[0];
389 src_ptr += 1;
390 dst_ptr += 2;
391 }
392 if (dst_width & 1) {
393 dst_ptr[0] = src_ptr[0];
394 }
395 }
396
397 // (1-f)a + fb can be replaced with a + f(b-a)
398 #define BLENDER(a, b, f) (uint8)((int)(a) + \
399 ((int)(f) * ((int)(b) - (int)(a)) >> 16))
400
ScaleFilterCols_C(uint8 * dst_ptr,const uint8 * src_ptr,int dst_width,int x,int dx)401 void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,
402 int dst_width, int x, int dx) {
403 int j;
404 for (j = 0; j < dst_width - 1; j += 2) {
405 int xi = x >> 16;
406 int a = src_ptr[xi];
407 int b = src_ptr[xi + 1];
408 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
409 x += dx;
410 xi = x >> 16;
411 a = src_ptr[xi];
412 b = src_ptr[xi + 1];
413 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
414 x += dx;
415 dst_ptr += 2;
416 }
417 if (dst_width & 1) {
418 int xi = x >> 16;
419 int a = src_ptr[xi];
420 int b = src_ptr[xi + 1];
421 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
422 }
423 }
424
ScaleFilterCols64_C(uint8 * dst_ptr,const uint8 * src_ptr,int dst_width,int x32,int dx)425 void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
426 int dst_width, int x32, int dx) {
427 int64 x = (int64)(x32);
428 int j;
429 for (j = 0; j < dst_width - 1; j += 2) {
430 int64 xi = x >> 16;
431 int a = src_ptr[xi];
432 int b = src_ptr[xi + 1];
433 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
434 x += dx;
435 xi = x >> 16;
436 a = src_ptr[xi];
437 b = src_ptr[xi + 1];
438 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
439 x += dx;
440 dst_ptr += 2;
441 }
442 if (dst_width & 1) {
443 int64 xi = x >> 16;
444 int a = src_ptr[xi];
445 int b = src_ptr[xi + 1];
446 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
447 }
448 }
449 #undef BLENDER
450
451 #define BLENDER(a, b, f) (uint16)((int)(a) + \
452 ((int)(f) * ((int)(b) - (int)(a)) >> 16))
453
ScaleFilterCols_16_C(uint16 * dst_ptr,const uint16 * src_ptr,int dst_width,int x,int dx)454 void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
455 int dst_width, int x, int dx) {
456 int j;
457 for (j = 0; j < dst_width - 1; j += 2) {
458 int xi = x >> 16;
459 int a = src_ptr[xi];
460 int b = src_ptr[xi + 1];
461 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
462 x += dx;
463 xi = x >> 16;
464 a = src_ptr[xi];
465 b = src_ptr[xi + 1];
466 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
467 x += dx;
468 dst_ptr += 2;
469 }
470 if (dst_width & 1) {
471 int xi = x >> 16;
472 int a = src_ptr[xi];
473 int b = src_ptr[xi + 1];
474 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
475 }
476 }
477
ScaleFilterCols64_16_C(uint16 * dst_ptr,const uint16 * src_ptr,int dst_width,int x32,int dx)478 void ScaleFilterCols64_16_C(uint16* dst_ptr, const uint16* src_ptr,
479 int dst_width, int x32, int dx) {
480 int64 x = (int64)(x32);
481 int j;
482 for (j = 0; j < dst_width - 1; j += 2) {
483 int64 xi = x >> 16;
484 int a = src_ptr[xi];
485 int b = src_ptr[xi + 1];
486 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
487 x += dx;
488 xi = x >> 16;
489 a = src_ptr[xi];
490 b = src_ptr[xi + 1];
491 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
492 x += dx;
493 dst_ptr += 2;
494 }
495 if (dst_width & 1) {
496 int64 xi = x >> 16;
497 int a = src_ptr[xi];
498 int b = src_ptr[xi + 1];
499 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
500 }
501 }
502 #undef BLENDER
503
ScaleRowDown38_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * dst,int dst_width)504 void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride,
505 uint8* dst, int dst_width) {
506 int x;
507 assert(dst_width % 3 == 0);
508 for (x = 0; x < dst_width; x += 3) {
509 dst[0] = src_ptr[0];
510 dst[1] = src_ptr[3];
511 dst[2] = src_ptr[6];
512 dst += 3;
513 src_ptr += 8;
514 }
515 }
516
ScaleRowDown38_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * dst,int dst_width)517 void ScaleRowDown38_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
518 uint16* dst, int dst_width) {
519 int x;
520 assert(dst_width % 3 == 0);
521 for (x = 0; x < dst_width; x += 3) {
522 dst[0] = src_ptr[0];
523 dst[1] = src_ptr[3];
524 dst[2] = src_ptr[6];
525 dst += 3;
526 src_ptr += 8;
527 }
528 }
529
530 // 8x3 -> 3x1
ScaleRowDown38_3_Box_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * dst_ptr,int dst_width)531 void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
532 ptrdiff_t src_stride,
533 uint8* dst_ptr, int dst_width) {
534 intptr_t stride = src_stride;
535 int i;
536 assert((dst_width % 3 == 0) && (dst_width > 0));
537 for (i = 0; i < dst_width; i += 3) {
538 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
539 src_ptr[stride + 0] + src_ptr[stride + 1] +
540 src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
541 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
542 (65536 / 9) >> 16;
543 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
544 src_ptr[stride + 3] + src_ptr[stride + 4] +
545 src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
546 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
547 (65536 / 9) >> 16;
548 dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
549 src_ptr[stride + 6] + src_ptr[stride + 7] +
550 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
551 (65536 / 6) >> 16;
552 src_ptr += 8;
553 dst_ptr += 3;
554 }
555 }
556
ScaleRowDown38_3_Box_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * dst_ptr,int dst_width)557 void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr,
558 ptrdiff_t src_stride,
559 uint16* dst_ptr, int dst_width) {
560 intptr_t stride = src_stride;
561 int i;
562 assert((dst_width % 3 == 0) && (dst_width > 0));
563 for (i = 0; i < dst_width; i += 3) {
564 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
565 src_ptr[stride + 0] + src_ptr[stride + 1] +
566 src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
567 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
568 (65536 / 9) >> 16;
569 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
570 src_ptr[stride + 3] + src_ptr[stride + 4] +
571 src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
572 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
573 (65536 / 9) >> 16;
574 dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
575 src_ptr[stride + 6] + src_ptr[stride + 7] +
576 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
577 (65536 / 6) >> 16;
578 src_ptr += 8;
579 dst_ptr += 3;
580 }
581 }
582
583 // 8x2 -> 3x1
ScaleRowDown38_2_Box_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * dst_ptr,int dst_width)584 void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
585 uint8* dst_ptr, int dst_width) {
586 intptr_t stride = src_stride;
587 int i;
588 assert((dst_width % 3 == 0) && (dst_width > 0));
589 for (i = 0; i < dst_width; i += 3) {
590 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
591 src_ptr[stride + 0] + src_ptr[stride + 1] +
592 src_ptr[stride + 2]) * (65536 / 6) >> 16;
593 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
594 src_ptr[stride + 3] + src_ptr[stride + 4] +
595 src_ptr[stride + 5]) * (65536 / 6) >> 16;
596 dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
597 src_ptr[stride + 6] + src_ptr[stride + 7]) *
598 (65536 / 4) >> 16;
599 src_ptr += 8;
600 dst_ptr += 3;
601 }
602 }
603
ScaleRowDown38_2_Box_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * dst_ptr,int dst_width)604 void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
605 uint16* dst_ptr, int dst_width) {
606 intptr_t stride = src_stride;
607 int i;
608 assert((dst_width % 3 == 0) && (dst_width > 0));
609 for (i = 0; i < dst_width; i += 3) {
610 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
611 src_ptr[stride + 0] + src_ptr[stride + 1] +
612 src_ptr[stride + 2]) * (65536 / 6) >> 16;
613 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
614 src_ptr[stride + 3] + src_ptr[stride + 4] +
615 src_ptr[stride + 5]) * (65536 / 6) >> 16;
616 dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
617 src_ptr[stride + 6] + src_ptr[stride + 7]) *
618 (65536 / 4) >> 16;
619 src_ptr += 8;
620 dst_ptr += 3;
621 }
622 }
623
ScaleAddRow_C(const uint8 * src_ptr,uint16 * dst_ptr,int src_width)624 void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
625 int x;
626 assert(src_width > 0);
627 for (x = 0; x < src_width - 1; x += 2) {
628 dst_ptr[0] += src_ptr[0];
629 dst_ptr[1] += src_ptr[1];
630 src_ptr += 2;
631 dst_ptr += 2;
632 }
633 if (src_width & 1) {
634 dst_ptr[0] += src_ptr[0];
635 }
636 }
637
ScaleAddRow_16_C(const uint16 * src_ptr,uint32 * dst_ptr,int src_width)638 void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width) {
639 int x;
640 assert(src_width > 0);
641 for (x = 0; x < src_width - 1; x += 2) {
642 dst_ptr[0] += src_ptr[0];
643 dst_ptr[1] += src_ptr[1];
644 src_ptr += 2;
645 dst_ptr += 2;
646 }
647 if (src_width & 1) {
648 dst_ptr[0] += src_ptr[0];
649 }
650 }
651
ScaleARGBRowDown2_C(const uint8 * src_argb,ptrdiff_t src_stride,uint8 * dst_argb,int dst_width)652 void ScaleARGBRowDown2_C(const uint8* src_argb,
653 ptrdiff_t src_stride,
654 uint8* dst_argb, int dst_width) {
655 const uint32* src = (const uint32*)(src_argb);
656 uint32* dst = (uint32*)(dst_argb);
657
658 int x;
659 for (x = 0; x < dst_width - 1; x += 2) {
660 dst[0] = src[1];
661 dst[1] = src[3];
662 src += 4;
663 dst += 2;
664 }
665 if (dst_width & 1) {
666 dst[0] = src[1];
667 }
668 }
669
ScaleARGBRowDown2Linear_C(const uint8 * src_argb,ptrdiff_t src_stride,uint8 * dst_argb,int dst_width)670 void ScaleARGBRowDown2Linear_C(const uint8* src_argb,
671 ptrdiff_t src_stride,
672 uint8* dst_argb, int dst_width) {
673 int x;
674 for (x = 0; x < dst_width; ++x) {
675 dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1;
676 dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1;
677 dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1;
678 dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1;
679 src_argb += 8;
680 dst_argb += 4;
681 }
682 }
683
ScaleARGBRowDown2Box_C(const uint8 * src_argb,ptrdiff_t src_stride,uint8 * dst_argb,int dst_width)684 void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride,
685 uint8* dst_argb, int dst_width) {
686 int x;
687 for (x = 0; x < dst_width; ++x) {
688 dst_argb[0] = (src_argb[0] + src_argb[4] +
689 src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2;
690 dst_argb[1] = (src_argb[1] + src_argb[5] +
691 src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2;
692 dst_argb[2] = (src_argb[2] + src_argb[6] +
693 src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2;
694 dst_argb[3] = (src_argb[3] + src_argb[7] +
695 src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2;
696 src_argb += 8;
697 dst_argb += 4;
698 }
699 }
700
ScaleARGBRowDownEven_C(const uint8 * src_argb,ptrdiff_t src_stride,int src_stepx,uint8 * dst_argb,int dst_width)701 void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride,
702 int src_stepx,
703 uint8* dst_argb, int dst_width) {
704 const uint32* src = (const uint32*)(src_argb);
705 uint32* dst = (uint32*)(dst_argb);
706
707 int x;
708 for (x = 0; x < dst_width - 1; x += 2) {
709 dst[0] = src[0];
710 dst[1] = src[src_stepx];
711 src += src_stepx * 2;
712 dst += 2;
713 }
714 if (dst_width & 1) {
715 dst[0] = src[0];
716 }
717 }
718
ScaleARGBRowDownEvenBox_C(const uint8 * src_argb,ptrdiff_t src_stride,int src_stepx,uint8 * dst_argb,int dst_width)719 void ScaleARGBRowDownEvenBox_C(const uint8* src_argb,
720 ptrdiff_t src_stride,
721 int src_stepx,
722 uint8* dst_argb, int dst_width) {
723 int x;
724 for (x = 0; x < dst_width; ++x) {
725 dst_argb[0] = (src_argb[0] + src_argb[4] +
726 src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2;
727 dst_argb[1] = (src_argb[1] + src_argb[5] +
728 src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2;
729 dst_argb[2] = (src_argb[2] + src_argb[6] +
730 src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2;
731 dst_argb[3] = (src_argb[3] + src_argb[7] +
732 src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2;
733 src_argb += src_stepx * 4;
734 dst_argb += 4;
735 }
736 }
737
738 // Scales a single row of pixels using point sampling.
ScaleARGBCols_C(uint8 * dst_argb,const uint8 * src_argb,int dst_width,int x,int dx)739 void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb,
740 int dst_width, int x, int dx) {
741 const uint32* src = (const uint32*)(src_argb);
742 uint32* dst = (uint32*)(dst_argb);
743 int j;
744 for (j = 0; j < dst_width - 1; j += 2) {
745 dst[0] = src[x >> 16];
746 x += dx;
747 dst[1] = src[x >> 16];
748 x += dx;
749 dst += 2;
750 }
751 if (dst_width & 1) {
752 dst[0] = src[x >> 16];
753 }
754 }
755
ScaleARGBCols64_C(uint8 * dst_argb,const uint8 * src_argb,int dst_width,int x32,int dx)756 void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb,
757 int dst_width, int x32, int dx) {
758 int64 x = (int64)(x32);
759 const uint32* src = (const uint32*)(src_argb);
760 uint32* dst = (uint32*)(dst_argb);
761 int j;
762 for (j = 0; j < dst_width - 1; j += 2) {
763 dst[0] = src[x >> 16];
764 x += dx;
765 dst[1] = src[x >> 16];
766 x += dx;
767 dst += 2;
768 }
769 if (dst_width & 1) {
770 dst[0] = src[x >> 16];
771 }
772 }
773
774 // Scales a single row of pixels up by 2x using point sampling.
ScaleARGBColsUp2_C(uint8 * dst_argb,const uint8 * src_argb,int dst_width,int x,int dx)775 void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb,
776 int dst_width, int x, int dx) {
777 const uint32* src = (const uint32*)(src_argb);
778 uint32* dst = (uint32*)(dst_argb);
779 int j;
780 for (j = 0; j < dst_width - 1; j += 2) {
781 dst[1] = dst[0] = src[0];
782 src += 1;
783 dst += 2;
784 }
785 if (dst_width & 1) {
786 dst[0] = src[0];
787 }
788 }
789
790 // Mimics SSSE3 blender
791 #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7
792 #define BLENDERC(a, b, f, s) (uint32)( \
793 BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
794 #define BLENDER(a, b, f) \
795 BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | \
796 BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0)
797
ScaleARGBFilterCols_C(uint8 * dst_argb,const uint8 * src_argb,int dst_width,int x,int dx)798 void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb,
799 int dst_width, int x, int dx) {
800 const uint32* src = (const uint32*)(src_argb);
801 uint32* dst = (uint32*)(dst_argb);
802 int j;
803 for (j = 0; j < dst_width - 1; j += 2) {
804 int xi = x >> 16;
805 int xf = (x >> 9) & 0x7f;
806 uint32 a = src[xi];
807 uint32 b = src[xi + 1];
808 dst[0] = BLENDER(a, b, xf);
809 x += dx;
810 xi = x >> 16;
811 xf = (x >> 9) & 0x7f;
812 a = src[xi];
813 b = src[xi + 1];
814 dst[1] = BLENDER(a, b, xf);
815 x += dx;
816 dst += 2;
817 }
818 if (dst_width & 1) {
819 int xi = x >> 16;
820 int xf = (x >> 9) & 0x7f;
821 uint32 a = src[xi];
822 uint32 b = src[xi + 1];
823 dst[0] = BLENDER(a, b, xf);
824 }
825 }
826
ScaleARGBFilterCols64_C(uint8 * dst_argb,const uint8 * src_argb,int dst_width,int x32,int dx)827 void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb,
828 int dst_width, int x32, int dx) {
829 int64 x = (int64)(x32);
830 const uint32* src = (const uint32*)(src_argb);
831 uint32* dst = (uint32*)(dst_argb);
832 int j;
833 for (j = 0; j < dst_width - 1; j += 2) {
834 int64 xi = x >> 16;
835 int xf = (x >> 9) & 0x7f;
836 uint32 a = src[xi];
837 uint32 b = src[xi + 1];
838 dst[0] = BLENDER(a, b, xf);
839 x += dx;
840 xi = x >> 16;
841 xf = (x >> 9) & 0x7f;
842 a = src[xi];
843 b = src[xi + 1];
844 dst[1] = BLENDER(a, b, xf);
845 x += dx;
846 dst += 2;
847 }
848 if (dst_width & 1) {
849 int64 xi = x >> 16;
850 int xf = (x >> 9) & 0x7f;
851 uint32 a = src[xi];
852 uint32 b = src[xi + 1];
853 dst[0] = BLENDER(a, b, xf);
854 }
855 }
856 #undef BLENDER1
857 #undef BLENDERC
858 #undef BLENDER
859
860 // Scale plane vertically with bilinear interpolation.
ScalePlaneVertical(int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8 * src_argb,uint8 * dst_argb,int x,int y,int dy,int bpp,enum FilterMode filtering)861 void ScalePlaneVertical(int src_height,
862 int dst_width, int dst_height,
863 int src_stride, int dst_stride,
864 const uint8* src_argb, uint8* dst_argb,
865 int x, int y, int dy,
866 int bpp, enum FilterMode filtering) {
867 // TODO(fbarchard): Allow higher bpp.
868 int dst_width_bytes = dst_width * bpp;
869 void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
870 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
871 InterpolateRow_C;
872 const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
873 int j;
874 assert(bpp >= 1 && bpp <= 4);
875 assert(src_height != 0);
876 assert(dst_width > 0);
877 assert(dst_height > 0);
878 src_argb += (x >> 16) * bpp;
879 #if defined(HAS_INTERPOLATEROW_SSE2)
880 if (TestCpuFlag(kCpuHasSSE2)) {
881 InterpolateRow = InterpolateRow_Any_SSE2;
882 if (IS_ALIGNED(dst_width_bytes, 16)) {
883 InterpolateRow = InterpolateRow_SSE2;
884 }
885 }
886 #endif
887 #if defined(HAS_INTERPOLATEROW_SSSE3)
888 if (TestCpuFlag(kCpuHasSSSE3)) {
889 InterpolateRow = InterpolateRow_Any_SSSE3;
890 if (IS_ALIGNED(dst_width_bytes, 16)) {
891 InterpolateRow = InterpolateRow_SSSE3;
892 }
893 }
894 #endif
895 #if defined(HAS_INTERPOLATEROW_AVX2)
896 if (TestCpuFlag(kCpuHasAVX2)) {
897 InterpolateRow = InterpolateRow_Any_AVX2;
898 if (IS_ALIGNED(dst_width_bytes, 32)) {
899 InterpolateRow = InterpolateRow_AVX2;
900 }
901 }
902 #endif
903 #if defined(HAS_INTERPOLATEROW_NEON)
904 if (TestCpuFlag(kCpuHasNEON)) {
905 InterpolateRow = InterpolateRow_Any_NEON;
906 if (IS_ALIGNED(dst_width_bytes, 16)) {
907 InterpolateRow = InterpolateRow_NEON;
908 }
909 }
910 #endif
911 #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
912 if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
913 IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
914 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
915 InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
916 if (IS_ALIGNED(dst_width_bytes, 4)) {
917 InterpolateRow = InterpolateRow_MIPS_DSPR2;
918 }
919 }
920 #endif
921 for (j = 0; j < dst_height; ++j) {
922 int yi;
923 int yf;
924 if (y > max_y) {
925 y = max_y;
926 }
927 yi = y >> 16;
928 yf = filtering ? ((y >> 8) & 255) : 0;
929 InterpolateRow(dst_argb, src_argb + yi * src_stride,
930 src_stride, dst_width_bytes, yf);
931 dst_argb += dst_stride;
932 y += dy;
933 }
934 }
ScalePlaneVertical_16(int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16 * src_argb,uint16 * dst_argb,int x,int y,int dy,int wpp,enum FilterMode filtering)935 void ScalePlaneVertical_16(int src_height,
936 int dst_width, int dst_height,
937 int src_stride, int dst_stride,
938 const uint16* src_argb, uint16* dst_argb,
939 int x, int y, int dy,
940 int wpp, enum FilterMode filtering) {
941 // TODO(fbarchard): Allow higher wpp.
942 int dst_width_words = dst_width * wpp;
943 void (*InterpolateRow)(uint16* dst_argb, const uint16* src_argb,
944 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
945 InterpolateRow_16_C;
946 const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
947 int j;
948 assert(wpp >= 1 && wpp <= 2);
949 assert(src_height != 0);
950 assert(dst_width > 0);
951 assert(dst_height > 0);
952 src_argb += (x >> 16) * wpp;
953 #if defined(HAS_INTERPOLATEROW_16_SSE2)
954 if (TestCpuFlag(kCpuHasSSE2)) {
955 InterpolateRow = InterpolateRow_Any_16_SSE2;
956 if (IS_ALIGNED(dst_width_bytes, 16)) {
957 InterpolateRow = InterpolateRow_16_SSE2;
958 }
959 }
960 #endif
961 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
962 if (TestCpuFlag(kCpuHasSSSE3)) {
963 InterpolateRow = InterpolateRow_Any_16_SSSE3;
964 if (IS_ALIGNED(dst_width_bytes, 16)) {
965 InterpolateRow = InterpolateRow_16_SSSE3;
966 }
967 }
968 #endif
969 #if defined(HAS_INTERPOLATEROW_16_AVX2)
970 if (TestCpuFlag(kCpuHasAVX2)) {
971 InterpolateRow = InterpolateRow_Any_16_AVX2;
972 if (IS_ALIGNED(dst_width_bytes, 32)) {
973 InterpolateRow = InterpolateRow_16_AVX2;
974 }
975 }
976 #endif
977 #if defined(HAS_INTERPOLATEROW_16_NEON)
978 if (TestCpuFlag(kCpuHasNEON)) {
979 InterpolateRow = InterpolateRow_Any_16_NEON;
980 if (IS_ALIGNED(dst_width_bytes, 16)) {
981 InterpolateRow = InterpolateRow_16_NEON;
982 }
983 }
984 #endif
985 #if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)
986 if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
987 IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
988 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
989 InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
990 if (IS_ALIGNED(dst_width_bytes, 4)) {
991 InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
992 }
993 }
994 #endif
995 for (j = 0; j < dst_height; ++j) {
996 int yi;
997 int yf;
998 if (y > max_y) {
999 y = max_y;
1000 }
1001 yi = y >> 16;
1002 yf = filtering ? ((y >> 8) & 255) : 0;
1003 InterpolateRow(dst_argb, src_argb + yi * src_stride,
1004 src_stride, dst_width_words, yf);
1005 dst_argb += dst_stride;
1006 y += dy;
1007 }
1008 }
1009
1010 // Simplify the filtering based on scale factors.
ScaleFilterReduce(int src_width,int src_height,int dst_width,int dst_height,enum FilterMode filtering)1011 enum FilterMode ScaleFilterReduce(int src_width, int src_height,
1012 int dst_width, int dst_height,
1013 enum FilterMode filtering) {
1014 if (src_width < 0) {
1015 src_width = -src_width;
1016 }
1017 if (src_height < 0) {
1018 src_height = -src_height;
1019 }
1020 if (filtering == kFilterBox) {
1021 // If scaling both axis to 0.5 or larger, switch from Box to Bilinear.
1022 if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) {
1023 filtering = kFilterBilinear;
1024 }
1025 }
1026 if (filtering == kFilterBilinear) {
1027 if (src_height == 1) {
1028 filtering = kFilterLinear;
1029 }
1030 // TODO(fbarchard): Detect any odd scale factor and reduce to Linear.
1031 if (dst_height == src_height || dst_height * 3 == src_height) {
1032 filtering = kFilterLinear;
1033 }
1034 // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to
1035 // avoid reading 2 pixels horizontally that causes memory exception.
1036 if (src_width == 1) {
1037 filtering = kFilterNone;
1038 }
1039 }
1040 if (filtering == kFilterLinear) {
1041 if (src_width == 1) {
1042 filtering = kFilterNone;
1043 }
1044 // TODO(fbarchard): Detect any odd scale factor and reduce to None.
1045 if (dst_width == src_width || dst_width * 3 == src_width) {
1046 filtering = kFilterNone;
1047 }
1048 }
1049 return filtering;
1050 }
1051
1052 // Divide num by div and return as 16.16 fixed point result.
FixedDiv_C(int num,int div)1053 int FixedDiv_C(int num, int div) {
1054 return (int)(((int64)(num) << 16) / div);
1055 }
1056
1057 // Divide num by div and return as 16.16 fixed point result.
FixedDiv1_C(int num,int div)1058 int FixedDiv1_C(int num, int div) {
1059 return (int)((((int64)(num) << 16) - 0x00010001) /
1060 (div - 1));
1061 }
1062
1063 #define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
1064
1065 // Compute slope values for stepping.
ScaleSlope(int src_width,int src_height,int dst_width,int dst_height,enum FilterMode filtering,int * x,int * y,int * dx,int * dy)1066 void ScaleSlope(int src_width, int src_height,
1067 int dst_width, int dst_height,
1068 enum FilterMode filtering,
1069 int* x, int* y, int* dx, int* dy) {
1070 assert(x != NULL);
1071 assert(y != NULL);
1072 assert(dx != NULL);
1073 assert(dy != NULL);
1074 assert(src_width != 0);
1075 assert(src_height != 0);
1076 assert(dst_width > 0);
1077 assert(dst_height > 0);
1078 // Check for 1 pixel and avoid FixedDiv overflow.
1079 if (dst_width == 1 && src_width >= 32768) {
1080 dst_width = src_width;
1081 }
1082 if (dst_height == 1 && src_height >= 32768) {
1083 dst_height = src_height;
1084 }
1085 if (filtering == kFilterBox) {
1086 // Scale step for point sampling duplicates all pixels equally.
1087 *dx = FixedDiv(Abs(src_width), dst_width);
1088 *dy = FixedDiv(src_height, dst_height);
1089 *x = 0;
1090 *y = 0;
1091 } else if (filtering == kFilterBilinear) {
1092 // Scale step for bilinear sampling renders last pixel once for upsample.
1093 if (dst_width <= Abs(src_width)) {
1094 *dx = FixedDiv(Abs(src_width), dst_width);
1095 *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
1096 } else if (dst_width > 1) {
1097 *dx = FixedDiv1(Abs(src_width), dst_width);
1098 *x = 0;
1099 }
1100 if (dst_height <= src_height) {
1101 *dy = FixedDiv(src_height, dst_height);
1102 *y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter.
1103 } else if (dst_height > 1) {
1104 *dy = FixedDiv1(src_height, dst_height);
1105 *y = 0;
1106 }
1107 } else if (filtering == kFilterLinear) {
1108 // Scale step for bilinear sampling renders last pixel once for upsample.
1109 if (dst_width <= Abs(src_width)) {
1110 *dx = FixedDiv(Abs(src_width), dst_width);
1111 *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
1112 } else if (dst_width > 1) {
1113 *dx = FixedDiv1(Abs(src_width), dst_width);
1114 *x = 0;
1115 }
1116 *dy = FixedDiv(src_height, dst_height);
1117 *y = *dy >> 1;
1118 } else {
1119 // Scale step for point sampling duplicates all pixels equally.
1120 *dx = FixedDiv(Abs(src_width), dst_width);
1121 *dy = FixedDiv(src_height, dst_height);
1122 *x = CENTERSTART(*dx, 0);
1123 *y = CENTERSTART(*dy, 0);
1124 }
1125 // Negative src_width means horizontally mirror.
1126 if (src_width < 0) {
1127 *x += (dst_width - 1) * *dx;
1128 *dx = -*dx;
1129 // src_width = -src_width; // Caller must do this.
1130 }
1131 }
1132 #undef CENTERSTART
1133
1134 #ifdef __cplusplus
1135 } // extern "C"
1136 } // namespace libyuv
1137 #endif
1138