1 /*
2 * Copyright 2013 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/scale.h"
12
13 #include <assert.h>
14 #include <string.h>
15
16 #include "libyuv/cpu_id.h"
17 #include "libyuv/planar_functions.h" // For CopyARGB
18 #include "libyuv/row.h"
19 #include "libyuv/scale_row.h"
20
21 #ifdef __cplusplus
22 namespace libyuv {
23 extern "C" {
24 #endif
25
Abs(int v)26 static __inline int Abs(int v) {
27 return v >= 0 ? v : -v;
28 }
29
30 // CPU agnostic row functions
ScaleRowDown2_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * dst,int dst_width)31 void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride,
32 uint8* dst, int dst_width) {
33 int x;
34 for (x = 0; x < dst_width - 1; x += 2) {
35 dst[0] = src_ptr[1];
36 dst[1] = src_ptr[3];
37 dst += 2;
38 src_ptr += 4;
39 }
40 if (dst_width & 1) {
41 dst[0] = src_ptr[1];
42 }
43 }
44
ScaleRowDown2_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * dst,int dst_width)45 void ScaleRowDown2_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
46 uint16* dst, int dst_width) {
47 int x;
48 for (x = 0; x < dst_width - 1; x += 2) {
49 dst[0] = src_ptr[1];
50 dst[1] = src_ptr[3];
51 dst += 2;
52 src_ptr += 4;
53 }
54 if (dst_width & 1) {
55 dst[0] = src_ptr[1];
56 }
57 }
58
ScaleRowDown2Linear_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * dst,int dst_width)59 void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride,
60 uint8* dst, int dst_width) {
61 const uint8* s = src_ptr;
62 int x;
63 for (x = 0; x < dst_width - 1; x += 2) {
64 dst[0] = (s[0] + s[1] + 1) >> 1;
65 dst[1] = (s[2] + s[3] + 1) >> 1;
66 dst += 2;
67 s += 4;
68 }
69 if (dst_width & 1) {
70 dst[0] = (s[0] + s[1] + 1) >> 1;
71 }
72 }
73
ScaleRowDown2Linear_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * dst,int dst_width)74 void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
75 uint16* dst, int dst_width) {
76 const uint16* s = src_ptr;
77 int x;
78 for (x = 0; x < dst_width - 1; x += 2) {
79 dst[0] = (s[0] + s[1] + 1) >> 1;
80 dst[1] = (s[2] + s[3] + 1) >> 1;
81 dst += 2;
82 s += 4;
83 }
84 if (dst_width & 1) {
85 dst[0] = (s[0] + s[1] + 1) >> 1;
86 }
87 }
88
ScaleRowDown2Box_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * dst,int dst_width)89 void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
90 uint8* dst, int dst_width) {
91 const uint8* s = src_ptr;
92 const uint8* t = src_ptr + src_stride;
93 int x;
94 for (x = 0; x < dst_width - 1; x += 2) {
95 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
96 dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
97 dst += 2;
98 s += 4;
99 t += 4;
100 }
101 if (dst_width & 1) {
102 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
103 }
104 }
105
ScaleRowDown2Box_Odd_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * dst,int dst_width)106 void ScaleRowDown2Box_Odd_C(const uint8* src_ptr, ptrdiff_t src_stride,
107 uint8* dst, int dst_width) {
108 const uint8* s = src_ptr;
109 const uint8* t = src_ptr + src_stride;
110 int x;
111 dst_width -= 1;
112 for (x = 0; x < dst_width - 1; x += 2) {
113 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
114 dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
115 dst += 2;
116 s += 4;
117 t += 4;
118 }
119 if (dst_width & 1) {
120 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
121 dst += 1;
122 s += 2;
123 t += 2;
124 }
125 dst[0] = (s[0] + t[0] + 1) >> 1;
126 }
127
ScaleRowDown2Box_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * dst,int dst_width)128 void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
129 uint16* dst, int dst_width) {
130 const uint16* s = src_ptr;
131 const uint16* t = src_ptr + src_stride;
132 int x;
133 for (x = 0; x < dst_width - 1; x += 2) {
134 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
135 dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
136 dst += 2;
137 s += 4;
138 t += 4;
139 }
140 if (dst_width & 1) {
141 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
142 }
143 }
144
ScaleRowDown4_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * dst,int dst_width)145 void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,
146 uint8* dst, int dst_width) {
147 int x;
148 for (x = 0; x < dst_width - 1; x += 2) {
149 dst[0] = src_ptr[2];
150 dst[1] = src_ptr[6];
151 dst += 2;
152 src_ptr += 8;
153 }
154 if (dst_width & 1) {
155 dst[0] = src_ptr[2];
156 }
157 }
158
ScaleRowDown4_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * dst,int dst_width)159 void ScaleRowDown4_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
160 uint16* dst, int dst_width) {
161 int x;
162 for (x = 0; x < dst_width - 1; x += 2) {
163 dst[0] = src_ptr[2];
164 dst[1] = src_ptr[6];
165 dst += 2;
166 src_ptr += 8;
167 }
168 if (dst_width & 1) {
169 dst[0] = src_ptr[2];
170 }
171 }
172
ScaleRowDown4Box_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * dst,int dst_width)173 void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
174 uint8* dst, int dst_width) {
175 intptr_t stride = src_stride;
176 int x;
177 for (x = 0; x < dst_width - 1; x += 2) {
178 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
179 src_ptr[stride + 0] + src_ptr[stride + 1] +
180 src_ptr[stride + 2] + src_ptr[stride + 3] +
181 src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
182 src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
183 src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
184 src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
185 8) >> 4;
186 dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
187 src_ptr[stride + 4] + src_ptr[stride + 5] +
188 src_ptr[stride + 6] + src_ptr[stride + 7] +
189 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] +
190 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] +
191 src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] +
192 src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] +
193 8) >> 4;
194 dst += 2;
195 src_ptr += 8;
196 }
197 if (dst_width & 1) {
198 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
199 src_ptr[stride + 0] + src_ptr[stride + 1] +
200 src_ptr[stride + 2] + src_ptr[stride + 3] +
201 src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
202 src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
203 src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
204 src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
205 8) >> 4;
206 }
207 }
208
ScaleRowDown4Box_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * dst,int dst_width)209 void ScaleRowDown4Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
210 uint16* dst, int dst_width) {
211 intptr_t stride = src_stride;
212 int x;
213 for (x = 0; x < dst_width - 1; x += 2) {
214 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
215 src_ptr[stride + 0] + src_ptr[stride + 1] +
216 src_ptr[stride + 2] + src_ptr[stride + 3] +
217 src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
218 src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
219 src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
220 src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
221 8) >> 4;
222 dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
223 src_ptr[stride + 4] + src_ptr[stride + 5] +
224 src_ptr[stride + 6] + src_ptr[stride + 7] +
225 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] +
226 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] +
227 src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] +
228 src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] +
229 8) >> 4;
230 dst += 2;
231 src_ptr += 8;
232 }
233 if (dst_width & 1) {
234 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
235 src_ptr[stride + 0] + src_ptr[stride + 1] +
236 src_ptr[stride + 2] + src_ptr[stride + 3] +
237 src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
238 src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
239 src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
240 src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
241 8) >> 4;
242 }
243 }
244
ScaleRowDown34_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * dst,int dst_width)245 void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride,
246 uint8* dst, int dst_width) {
247 int x;
248 assert((dst_width % 3 == 0) && (dst_width > 0));
249 for (x = 0; x < dst_width; x += 3) {
250 dst[0] = src_ptr[0];
251 dst[1] = src_ptr[1];
252 dst[2] = src_ptr[3];
253 dst += 3;
254 src_ptr += 4;
255 }
256 }
257
ScaleRowDown34_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * dst,int dst_width)258 void ScaleRowDown34_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
259 uint16* dst, int dst_width) {
260 int x;
261 assert((dst_width % 3 == 0) && (dst_width > 0));
262 for (x = 0; x < dst_width; x += 3) {
263 dst[0] = src_ptr[0];
264 dst[1] = src_ptr[1];
265 dst[2] = src_ptr[3];
266 dst += 3;
267 src_ptr += 4;
268 }
269 }
270
271 // Filter rows 0 and 1 together, 3 : 1
ScaleRowDown34_0_Box_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * d,int dst_width)272 void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
273 uint8* d, int dst_width) {
274 const uint8* s = src_ptr;
275 const uint8* t = src_ptr + src_stride;
276 int x;
277 assert((dst_width % 3 == 0) && (dst_width > 0));
278 for (x = 0; x < dst_width; x += 3) {
279 uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
280 uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
281 uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
282 uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
283 uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
284 uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
285 d[0] = (a0 * 3 + b0 + 2) >> 2;
286 d[1] = (a1 * 3 + b1 + 2) >> 2;
287 d[2] = (a2 * 3 + b2 + 2) >> 2;
288 d += 3;
289 s += 4;
290 t += 4;
291 }
292 }
293
ScaleRowDown34_0_Box_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * d,int dst_width)294 void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
295 uint16* d, int dst_width) {
296 const uint16* s = src_ptr;
297 const uint16* t = src_ptr + src_stride;
298 int x;
299 assert((dst_width % 3 == 0) && (dst_width > 0));
300 for (x = 0; x < dst_width; x += 3) {
301 uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
302 uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
303 uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
304 uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
305 uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
306 uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
307 d[0] = (a0 * 3 + b0 + 2) >> 2;
308 d[1] = (a1 * 3 + b1 + 2) >> 2;
309 d[2] = (a2 * 3 + b2 + 2) >> 2;
310 d += 3;
311 s += 4;
312 t += 4;
313 }
314 }
315
316 // Filter rows 1 and 2 together, 1 : 1
ScaleRowDown34_1_Box_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * d,int dst_width)317 void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
318 uint8* d, int dst_width) {
319 const uint8* s = src_ptr;
320 const uint8* t = src_ptr + src_stride;
321 int x;
322 assert((dst_width % 3 == 0) && (dst_width > 0));
323 for (x = 0; x < dst_width; x += 3) {
324 uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
325 uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
326 uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
327 uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
328 uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
329 uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
330 d[0] = (a0 + b0 + 1) >> 1;
331 d[1] = (a1 + b1 + 1) >> 1;
332 d[2] = (a2 + b2 + 1) >> 1;
333 d += 3;
334 s += 4;
335 t += 4;
336 }
337 }
338
ScaleRowDown34_1_Box_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * d,int dst_width)339 void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
340 uint16* d, int dst_width) {
341 const uint16* s = src_ptr;
342 const uint16* t = src_ptr + src_stride;
343 int x;
344 assert((dst_width % 3 == 0) && (dst_width > 0));
345 for (x = 0; x < dst_width; x += 3) {
346 uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
347 uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
348 uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
349 uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
350 uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
351 uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
352 d[0] = (a0 + b0 + 1) >> 1;
353 d[1] = (a1 + b1 + 1) >> 1;
354 d[2] = (a2 + b2 + 1) >> 1;
355 d += 3;
356 s += 4;
357 t += 4;
358 }
359 }
360
361 // Scales a single row of pixels using point sampling.
ScaleCols_C(uint8 * dst_ptr,const uint8 * src_ptr,int dst_width,int x,int dx)362 void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr,
363 int dst_width, int x, int dx) {
364 int j;
365 for (j = 0; j < dst_width - 1; j += 2) {
366 dst_ptr[0] = src_ptr[x >> 16];
367 x += dx;
368 dst_ptr[1] = src_ptr[x >> 16];
369 x += dx;
370 dst_ptr += 2;
371 }
372 if (dst_width & 1) {
373 dst_ptr[0] = src_ptr[x >> 16];
374 }
375 }
376
ScaleCols_16_C(uint16 * dst_ptr,const uint16 * src_ptr,int dst_width,int x,int dx)377 void ScaleCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
378 int dst_width, int x, int dx) {
379 int j;
380 for (j = 0; j < dst_width - 1; j += 2) {
381 dst_ptr[0] = src_ptr[x >> 16];
382 x += dx;
383 dst_ptr[1] = src_ptr[x >> 16];
384 x += dx;
385 dst_ptr += 2;
386 }
387 if (dst_width & 1) {
388 dst_ptr[0] = src_ptr[x >> 16];
389 }
390 }
391
392 // Scales a single row of pixels up by 2x using point sampling.
ScaleColsUp2_C(uint8 * dst_ptr,const uint8 * src_ptr,int dst_width,int x,int dx)393 void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr,
394 int dst_width, int x, int dx) {
395 int j;
396 for (j = 0; j < dst_width - 1; j += 2) {
397 dst_ptr[1] = dst_ptr[0] = src_ptr[0];
398 src_ptr += 1;
399 dst_ptr += 2;
400 }
401 if (dst_width & 1) {
402 dst_ptr[0] = src_ptr[0];
403 }
404 }
405
ScaleColsUp2_16_C(uint16 * dst_ptr,const uint16 * src_ptr,int dst_width,int x,int dx)406 void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr,
407 int dst_width, int x, int dx) {
408 int j;
409 for (j = 0; j < dst_width - 1; j += 2) {
410 dst_ptr[1] = dst_ptr[0] = src_ptr[0];
411 src_ptr += 1;
412 dst_ptr += 2;
413 }
414 if (dst_width & 1) {
415 dst_ptr[0] = src_ptr[0];
416 }
417 }
418
419 // (1-f)a + fb can be replaced with a + f(b-a)
420 #if defined(__arm__) || defined(__aarch64__)
421 #define BLENDER(a, b, f) (uint8)((int)(a) + \
422 ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
423 #else
424 // inteluses 7 bit math with rounding.
425 #define BLENDER(a, b, f) (uint8)((int)(a) + \
426 (((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7))
427 #endif
428
ScaleFilterCols_C(uint8 * dst_ptr,const uint8 * src_ptr,int dst_width,int x,int dx)429 void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,
430 int dst_width, int x, int dx) {
431 int j;
432 for (j = 0; j < dst_width - 1; j += 2) {
433 int xi = x >> 16;
434 int a = src_ptr[xi];
435 int b = src_ptr[xi + 1];
436 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
437 x += dx;
438 xi = x >> 16;
439 a = src_ptr[xi];
440 b = src_ptr[xi + 1];
441 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
442 x += dx;
443 dst_ptr += 2;
444 }
445 if (dst_width & 1) {
446 int xi = x >> 16;
447 int a = src_ptr[xi];
448 int b = src_ptr[xi + 1];
449 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
450 }
451 }
452
ScaleFilterCols64_C(uint8 * dst_ptr,const uint8 * src_ptr,int dst_width,int x32,int dx)453 void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
454 int dst_width, int x32, int dx) {
455 int64 x = (int64)(x32);
456 int j;
457 for (j = 0; j < dst_width - 1; j += 2) {
458 int64 xi = x >> 16;
459 int a = src_ptr[xi];
460 int b = src_ptr[xi + 1];
461 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
462 x += dx;
463 xi = x >> 16;
464 a = src_ptr[xi];
465 b = src_ptr[xi + 1];
466 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
467 x += dx;
468 dst_ptr += 2;
469 }
470 if (dst_width & 1) {
471 int64 xi = x >> 16;
472 int a = src_ptr[xi];
473 int b = src_ptr[xi + 1];
474 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
475 }
476 }
477 #undef BLENDER
478
479 // Same as 8 bit arm blender but return is cast to uint16
480 #define BLENDER(a, b, f) (uint16)((int)(a) + \
481 ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
482
ScaleFilterCols_16_C(uint16 * dst_ptr,const uint16 * src_ptr,int dst_width,int x,int dx)483 void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
484 int dst_width, int x, int dx) {
485 int j;
486 for (j = 0; j < dst_width - 1; j += 2) {
487 int xi = x >> 16;
488 int a = src_ptr[xi];
489 int b = src_ptr[xi + 1];
490 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
491 x += dx;
492 xi = x >> 16;
493 a = src_ptr[xi];
494 b = src_ptr[xi + 1];
495 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
496 x += dx;
497 dst_ptr += 2;
498 }
499 if (dst_width & 1) {
500 int xi = x >> 16;
501 int a = src_ptr[xi];
502 int b = src_ptr[xi + 1];
503 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
504 }
505 }
506
ScaleFilterCols64_16_C(uint16 * dst_ptr,const uint16 * src_ptr,int dst_width,int x32,int dx)507 void ScaleFilterCols64_16_C(uint16* dst_ptr, const uint16* src_ptr,
508 int dst_width, int x32, int dx) {
509 int64 x = (int64)(x32);
510 int j;
511 for (j = 0; j < dst_width - 1; j += 2) {
512 int64 xi = x >> 16;
513 int a = src_ptr[xi];
514 int b = src_ptr[xi + 1];
515 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
516 x += dx;
517 xi = x >> 16;
518 a = src_ptr[xi];
519 b = src_ptr[xi + 1];
520 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
521 x += dx;
522 dst_ptr += 2;
523 }
524 if (dst_width & 1) {
525 int64 xi = x >> 16;
526 int a = src_ptr[xi];
527 int b = src_ptr[xi + 1];
528 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
529 }
530 }
531 #undef BLENDER
532
ScaleRowDown38_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * dst,int dst_width)533 void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride,
534 uint8* dst, int dst_width) {
535 int x;
536 assert(dst_width % 3 == 0);
537 for (x = 0; x < dst_width; x += 3) {
538 dst[0] = src_ptr[0];
539 dst[1] = src_ptr[3];
540 dst[2] = src_ptr[6];
541 dst += 3;
542 src_ptr += 8;
543 }
544 }
545
ScaleRowDown38_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * dst,int dst_width)546 void ScaleRowDown38_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
547 uint16* dst, int dst_width) {
548 int x;
549 assert(dst_width % 3 == 0);
550 for (x = 0; x < dst_width; x += 3) {
551 dst[0] = src_ptr[0];
552 dst[1] = src_ptr[3];
553 dst[2] = src_ptr[6];
554 dst += 3;
555 src_ptr += 8;
556 }
557 }
558
559 // 8x3 -> 3x1
ScaleRowDown38_3_Box_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * dst_ptr,int dst_width)560 void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
561 ptrdiff_t src_stride,
562 uint8* dst_ptr, int dst_width) {
563 intptr_t stride = src_stride;
564 int i;
565 assert((dst_width % 3 == 0) && (dst_width > 0));
566 for (i = 0; i < dst_width; i += 3) {
567 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
568 src_ptr[stride + 0] + src_ptr[stride + 1] +
569 src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
570 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
571 (65536 / 9) >> 16;
572 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
573 src_ptr[stride + 3] + src_ptr[stride + 4] +
574 src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
575 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
576 (65536 / 9) >> 16;
577 dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
578 src_ptr[stride + 6] + src_ptr[stride + 7] +
579 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
580 (65536 / 6) >> 16;
581 src_ptr += 8;
582 dst_ptr += 3;
583 }
584 }
585
ScaleRowDown38_3_Box_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * dst_ptr,int dst_width)586 void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr,
587 ptrdiff_t src_stride,
588 uint16* dst_ptr, int dst_width) {
589 intptr_t stride = src_stride;
590 int i;
591 assert((dst_width % 3 == 0) && (dst_width > 0));
592 for (i = 0; i < dst_width; i += 3) {
593 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
594 src_ptr[stride + 0] + src_ptr[stride + 1] +
595 src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
596 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
597 (65536 / 9) >> 16;
598 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
599 src_ptr[stride + 3] + src_ptr[stride + 4] +
600 src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
601 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
602 (65536 / 9) >> 16;
603 dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
604 src_ptr[stride + 6] + src_ptr[stride + 7] +
605 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
606 (65536 / 6) >> 16;
607 src_ptr += 8;
608 dst_ptr += 3;
609 }
610 }
611
612 // 8x2 -> 3x1
ScaleRowDown38_2_Box_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * dst_ptr,int dst_width)613 void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
614 uint8* dst_ptr, int dst_width) {
615 intptr_t stride = src_stride;
616 int i;
617 assert((dst_width % 3 == 0) && (dst_width > 0));
618 for (i = 0; i < dst_width; i += 3) {
619 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
620 src_ptr[stride + 0] + src_ptr[stride + 1] +
621 src_ptr[stride + 2]) * (65536 / 6) >> 16;
622 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
623 src_ptr[stride + 3] + src_ptr[stride + 4] +
624 src_ptr[stride + 5]) * (65536 / 6) >> 16;
625 dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
626 src_ptr[stride + 6] + src_ptr[stride + 7]) *
627 (65536 / 4) >> 16;
628 src_ptr += 8;
629 dst_ptr += 3;
630 }
631 }
632
ScaleRowDown38_2_Box_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * dst_ptr,int dst_width)633 void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
634 uint16* dst_ptr, int dst_width) {
635 intptr_t stride = src_stride;
636 int i;
637 assert((dst_width % 3 == 0) && (dst_width > 0));
638 for (i = 0; i < dst_width; i += 3) {
639 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
640 src_ptr[stride + 0] + src_ptr[stride + 1] +
641 src_ptr[stride + 2]) * (65536 / 6) >> 16;
642 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
643 src_ptr[stride + 3] + src_ptr[stride + 4] +
644 src_ptr[stride + 5]) * (65536 / 6) >> 16;
645 dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
646 src_ptr[stride + 6] + src_ptr[stride + 7]) *
647 (65536 / 4) >> 16;
648 src_ptr += 8;
649 dst_ptr += 3;
650 }
651 }
652
ScaleAddRow_C(const uint8 * src_ptr,uint16 * dst_ptr,int src_width)653 void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
654 int x;
655 assert(src_width > 0);
656 for (x = 0; x < src_width - 1; x += 2) {
657 dst_ptr[0] += src_ptr[0];
658 dst_ptr[1] += src_ptr[1];
659 src_ptr += 2;
660 dst_ptr += 2;
661 }
662 if (src_width & 1) {
663 dst_ptr[0] += src_ptr[0];
664 }
665 }
666
ScaleAddRow_16_C(const uint16 * src_ptr,uint32 * dst_ptr,int src_width)667 void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width) {
668 int x;
669 assert(src_width > 0);
670 for (x = 0; x < src_width - 1; x += 2) {
671 dst_ptr[0] += src_ptr[0];
672 dst_ptr[1] += src_ptr[1];
673 src_ptr += 2;
674 dst_ptr += 2;
675 }
676 if (src_width & 1) {
677 dst_ptr[0] += src_ptr[0];
678 }
679 }
680
ScaleARGBRowDown2_C(const uint8 * src_argb,ptrdiff_t src_stride,uint8 * dst_argb,int dst_width)681 void ScaleARGBRowDown2_C(const uint8* src_argb,
682 ptrdiff_t src_stride,
683 uint8* dst_argb, int dst_width) {
684 const uint32* src = (const uint32*)(src_argb);
685 uint32* dst = (uint32*)(dst_argb);
686
687 int x;
688 for (x = 0; x < dst_width - 1; x += 2) {
689 dst[0] = src[1];
690 dst[1] = src[3];
691 src += 4;
692 dst += 2;
693 }
694 if (dst_width & 1) {
695 dst[0] = src[1];
696 }
697 }
698
ScaleARGBRowDown2Linear_C(const uint8 * src_argb,ptrdiff_t src_stride,uint8 * dst_argb,int dst_width)699 void ScaleARGBRowDown2Linear_C(const uint8* src_argb,
700 ptrdiff_t src_stride,
701 uint8* dst_argb, int dst_width) {
702 int x;
703 for (x = 0; x < dst_width; ++x) {
704 dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1;
705 dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1;
706 dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1;
707 dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1;
708 src_argb += 8;
709 dst_argb += 4;
710 }
711 }
712
ScaleARGBRowDown2Box_C(const uint8 * src_argb,ptrdiff_t src_stride,uint8 * dst_argb,int dst_width)713 void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride,
714 uint8* dst_argb, int dst_width) {
715 int x;
716 for (x = 0; x < dst_width; ++x) {
717 dst_argb[0] = (src_argb[0] + src_argb[4] +
718 src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2;
719 dst_argb[1] = (src_argb[1] + src_argb[5] +
720 src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2;
721 dst_argb[2] = (src_argb[2] + src_argb[6] +
722 src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2;
723 dst_argb[3] = (src_argb[3] + src_argb[7] +
724 src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2;
725 src_argb += 8;
726 dst_argb += 4;
727 }
728 }
729
ScaleARGBRowDownEven_C(const uint8 * src_argb,ptrdiff_t src_stride,int src_stepx,uint8 * dst_argb,int dst_width)730 void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride,
731 int src_stepx,
732 uint8* dst_argb, int dst_width) {
733 const uint32* src = (const uint32*)(src_argb);
734 uint32* dst = (uint32*)(dst_argb);
735
736 int x;
737 for (x = 0; x < dst_width - 1; x += 2) {
738 dst[0] = src[0];
739 dst[1] = src[src_stepx];
740 src += src_stepx * 2;
741 dst += 2;
742 }
743 if (dst_width & 1) {
744 dst[0] = src[0];
745 }
746 }
747
ScaleARGBRowDownEvenBox_C(const uint8 * src_argb,ptrdiff_t src_stride,int src_stepx,uint8 * dst_argb,int dst_width)748 void ScaleARGBRowDownEvenBox_C(const uint8* src_argb,
749 ptrdiff_t src_stride,
750 int src_stepx,
751 uint8* dst_argb, int dst_width) {
752 int x;
753 for (x = 0; x < dst_width; ++x) {
754 dst_argb[0] = (src_argb[0] + src_argb[4] +
755 src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2;
756 dst_argb[1] = (src_argb[1] + src_argb[5] +
757 src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2;
758 dst_argb[2] = (src_argb[2] + src_argb[6] +
759 src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2;
760 dst_argb[3] = (src_argb[3] + src_argb[7] +
761 src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2;
762 src_argb += src_stepx * 4;
763 dst_argb += 4;
764 }
765 }
766
767 // Scales a single row of pixels using point sampling.
ScaleARGBCols_C(uint8 * dst_argb,const uint8 * src_argb,int dst_width,int x,int dx)768 void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb,
769 int dst_width, int x, int dx) {
770 const uint32* src = (const uint32*)(src_argb);
771 uint32* dst = (uint32*)(dst_argb);
772 int j;
773 for (j = 0; j < dst_width - 1; j += 2) {
774 dst[0] = src[x >> 16];
775 x += dx;
776 dst[1] = src[x >> 16];
777 x += dx;
778 dst += 2;
779 }
780 if (dst_width & 1) {
781 dst[0] = src[x >> 16];
782 }
783 }
784
ScaleARGBCols64_C(uint8 * dst_argb,const uint8 * src_argb,int dst_width,int x32,int dx)785 void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb,
786 int dst_width, int x32, int dx) {
787 int64 x = (int64)(x32);
788 const uint32* src = (const uint32*)(src_argb);
789 uint32* dst = (uint32*)(dst_argb);
790 int j;
791 for (j = 0; j < dst_width - 1; j += 2) {
792 dst[0] = src[x >> 16];
793 x += dx;
794 dst[1] = src[x >> 16];
795 x += dx;
796 dst += 2;
797 }
798 if (dst_width & 1) {
799 dst[0] = src[x >> 16];
800 }
801 }
802
803 // Scales a single row of pixels up by 2x using point sampling.
ScaleARGBColsUp2_C(uint8 * dst_argb,const uint8 * src_argb,int dst_width,int x,int dx)804 void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb,
805 int dst_width, int x, int dx) {
806 const uint32* src = (const uint32*)(src_argb);
807 uint32* dst = (uint32*)(dst_argb);
808 int j;
809 for (j = 0; j < dst_width - 1; j += 2) {
810 dst[1] = dst[0] = src[0];
811 src += 1;
812 dst += 2;
813 }
814 if (dst_width & 1) {
815 dst[0] = src[0];
816 }
817 }
818
819 // TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=607.
820 // Mimics SSSE3 blender
821 #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7
822 #define BLENDERC(a, b, f, s) (uint32)( \
823 BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
824 #define BLENDER(a, b, f) \
825 BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | \
826 BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0)
827
ScaleARGBFilterCols_C(uint8 * dst_argb,const uint8 * src_argb,int dst_width,int x,int dx)828 void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb,
829 int dst_width, int x, int dx) {
830 const uint32* src = (const uint32*)(src_argb);
831 uint32* dst = (uint32*)(dst_argb);
832 int j;
833 for (j = 0; j < dst_width - 1; j += 2) {
834 int xi = x >> 16;
835 int xf = (x >> 9) & 0x7f;
836 uint32 a = src[xi];
837 uint32 b = src[xi + 1];
838 dst[0] = BLENDER(a, b, xf);
839 x += dx;
840 xi = x >> 16;
841 xf = (x >> 9) & 0x7f;
842 a = src[xi];
843 b = src[xi + 1];
844 dst[1] = BLENDER(a, b, xf);
845 x += dx;
846 dst += 2;
847 }
848 if (dst_width & 1) {
849 int xi = x >> 16;
850 int xf = (x >> 9) & 0x7f;
851 uint32 a = src[xi];
852 uint32 b = src[xi + 1];
853 dst[0] = BLENDER(a, b, xf);
854 }
855 }
856
ScaleARGBFilterCols64_C(uint8 * dst_argb,const uint8 * src_argb,int dst_width,int x32,int dx)857 void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb,
858 int dst_width, int x32, int dx) {
859 int64 x = (int64)(x32);
860 const uint32* src = (const uint32*)(src_argb);
861 uint32* dst = (uint32*)(dst_argb);
862 int j;
863 for (j = 0; j < dst_width - 1; j += 2) {
864 int64 xi = x >> 16;
865 int xf = (x >> 9) & 0x7f;
866 uint32 a = src[xi];
867 uint32 b = src[xi + 1];
868 dst[0] = BLENDER(a, b, xf);
869 x += dx;
870 xi = x >> 16;
871 xf = (x >> 9) & 0x7f;
872 a = src[xi];
873 b = src[xi + 1];
874 dst[1] = BLENDER(a, b, xf);
875 x += dx;
876 dst += 2;
877 }
878 if (dst_width & 1) {
879 int64 xi = x >> 16;
880 int xf = (x >> 9) & 0x7f;
881 uint32 a = src[xi];
882 uint32 b = src[xi + 1];
883 dst[0] = BLENDER(a, b, xf);
884 }
885 }
886 #undef BLENDER1
887 #undef BLENDERC
888 #undef BLENDER
889
890 // Scale plane vertically with bilinear interpolation.
ScalePlaneVertical(int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8 * src_argb,uint8 * dst_argb,int x,int y,int dy,int bpp,enum FilterMode filtering)891 void ScalePlaneVertical(int src_height,
892 int dst_width, int dst_height,
893 int src_stride, int dst_stride,
894 const uint8* src_argb, uint8* dst_argb,
895 int x, int y, int dy,
896 int bpp, enum FilterMode filtering) {
897 // TODO(fbarchard): Allow higher bpp.
898 int dst_width_bytes = dst_width * bpp;
899 void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
900 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
901 InterpolateRow_C;
902 const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
903 int j;
904 assert(bpp >= 1 && bpp <= 4);
905 assert(src_height != 0);
906 assert(dst_width > 0);
907 assert(dst_height > 0);
908 src_argb += (x >> 16) * bpp;
909 #if defined(HAS_INTERPOLATEROW_SSSE3)
910 if (TestCpuFlag(kCpuHasSSSE3)) {
911 InterpolateRow = InterpolateRow_Any_SSSE3;
912 if (IS_ALIGNED(dst_width_bytes, 16)) {
913 InterpolateRow = InterpolateRow_SSSE3;
914 }
915 }
916 #endif
917 #if defined(HAS_INTERPOLATEROW_AVX2)
918 if (TestCpuFlag(kCpuHasAVX2)) {
919 InterpolateRow = InterpolateRow_Any_AVX2;
920 if (IS_ALIGNED(dst_width_bytes, 32)) {
921 InterpolateRow = InterpolateRow_AVX2;
922 }
923 }
924 #endif
925 #if defined(HAS_INTERPOLATEROW_NEON)
926 if (TestCpuFlag(kCpuHasNEON)) {
927 InterpolateRow = InterpolateRow_Any_NEON;
928 if (IS_ALIGNED(dst_width_bytes, 16)) {
929 InterpolateRow = InterpolateRow_NEON;
930 }
931 }
932 #endif
933 #if defined(HAS_INTERPOLATEROW_DSPR2)
934 if (TestCpuFlag(kCpuHasDSPR2) &&
935 IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
936 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
937 InterpolateRow = InterpolateRow_Any_DSPR2;
938 if (IS_ALIGNED(dst_width_bytes, 4)) {
939 InterpolateRow = InterpolateRow_DSPR2;
940 }
941 }
942 #endif
943 for (j = 0; j < dst_height; ++j) {
944 int yi;
945 int yf;
946 if (y > max_y) {
947 y = max_y;
948 }
949 yi = y >> 16;
950 yf = filtering ? ((y >> 8) & 255) : 0;
951 InterpolateRow(dst_argb, src_argb + yi * src_stride,
952 src_stride, dst_width_bytes, yf);
953 dst_argb += dst_stride;
954 y += dy;
955 }
956 }
ScalePlaneVertical_16(int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16 * src_argb,uint16 * dst_argb,int x,int y,int dy,int wpp,enum FilterMode filtering)957 void ScalePlaneVertical_16(int src_height,
958 int dst_width, int dst_height,
959 int src_stride, int dst_stride,
960 const uint16* src_argb, uint16* dst_argb,
961 int x, int y, int dy,
962 int wpp, enum FilterMode filtering) {
963 // TODO(fbarchard): Allow higher wpp.
964 int dst_width_words = dst_width * wpp;
965 void (*InterpolateRow)(uint16* dst_argb, const uint16* src_argb,
966 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
967 InterpolateRow_16_C;
968 const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
969 int j;
970 assert(wpp >= 1 && wpp <= 2);
971 assert(src_height != 0);
972 assert(dst_width > 0);
973 assert(dst_height > 0);
974 src_argb += (x >> 16) * wpp;
975 #if defined(HAS_INTERPOLATEROW_16_SSE2)
976 if (TestCpuFlag(kCpuHasSSE2)) {
977 InterpolateRow = InterpolateRow_Any_16_SSE2;
978 if (IS_ALIGNED(dst_width_bytes, 16)) {
979 InterpolateRow = InterpolateRow_16_SSE2;
980 }
981 }
982 #endif
983 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
984 if (TestCpuFlag(kCpuHasSSSE3)) {
985 InterpolateRow = InterpolateRow_Any_16_SSSE3;
986 if (IS_ALIGNED(dst_width_bytes, 16)) {
987 InterpolateRow = InterpolateRow_16_SSSE3;
988 }
989 }
990 #endif
991 #if defined(HAS_INTERPOLATEROW_16_AVX2)
992 if (TestCpuFlag(kCpuHasAVX2)) {
993 InterpolateRow = InterpolateRow_Any_16_AVX2;
994 if (IS_ALIGNED(dst_width_bytes, 32)) {
995 InterpolateRow = InterpolateRow_16_AVX2;
996 }
997 }
998 #endif
999 #if defined(HAS_INTERPOLATEROW_16_NEON)
1000 if (TestCpuFlag(kCpuHasNEON)) {
1001 InterpolateRow = InterpolateRow_Any_16_NEON;
1002 if (IS_ALIGNED(dst_width_bytes, 16)) {
1003 InterpolateRow = InterpolateRow_16_NEON;
1004 }
1005 }
1006 #endif
1007 #if defined(HAS_INTERPOLATEROW_16_DSPR2)
1008 if (TestCpuFlag(kCpuHasDSPR2) &&
1009 IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
1010 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
1011 InterpolateRow = InterpolateRow_Any_16_DSPR2;
1012 if (IS_ALIGNED(dst_width_bytes, 4)) {
1013 InterpolateRow = InterpolateRow_16_DSPR2;
1014 }
1015 }
1016 #endif
1017 for (j = 0; j < dst_height; ++j) {
1018 int yi;
1019 int yf;
1020 if (y > max_y) {
1021 y = max_y;
1022 }
1023 yi = y >> 16;
1024 yf = filtering ? ((y >> 8) & 255) : 0;
1025 InterpolateRow(dst_argb, src_argb + yi * src_stride,
1026 src_stride, dst_width_words, yf);
1027 dst_argb += dst_stride;
1028 y += dy;
1029 }
1030 }
1031
1032 // Simplify the filtering based on scale factors.
ScaleFilterReduce(int src_width,int src_height,int dst_width,int dst_height,enum FilterMode filtering)1033 enum FilterMode ScaleFilterReduce(int src_width, int src_height,
1034 int dst_width, int dst_height,
1035 enum FilterMode filtering) {
1036 if (src_width < 0) {
1037 src_width = -src_width;
1038 }
1039 if (src_height < 0) {
1040 src_height = -src_height;
1041 }
1042 if (filtering == kFilterBox) {
1043 // If scaling both axis to 0.5 or larger, switch from Box to Bilinear.
1044 if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) {
1045 filtering = kFilterBilinear;
1046 }
1047 }
1048 if (filtering == kFilterBilinear) {
1049 if (src_height == 1) {
1050 filtering = kFilterLinear;
1051 }
1052 // TODO(fbarchard): Detect any odd scale factor and reduce to Linear.
1053 if (dst_height == src_height || dst_height * 3 == src_height) {
1054 filtering = kFilterLinear;
1055 }
1056 // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to
1057 // avoid reading 2 pixels horizontally that causes memory exception.
1058 if (src_width == 1) {
1059 filtering = kFilterNone;
1060 }
1061 }
1062 if (filtering == kFilterLinear) {
1063 if (src_width == 1) {
1064 filtering = kFilterNone;
1065 }
1066 // TODO(fbarchard): Detect any odd scale factor and reduce to None.
1067 if (dst_width == src_width || dst_width * 3 == src_width) {
1068 filtering = kFilterNone;
1069 }
1070 }
1071 return filtering;
1072 }
1073
1074 // Divide num by div and return as 16.16 fixed point result.
FixedDiv_C(int num,int div)1075 int FixedDiv_C(int num, int div) {
1076 return (int)(((int64)(num) << 16) / div);
1077 }
1078
1079 // Divide num by div and return as 16.16 fixed point result.
FixedDiv1_C(int num,int div)1080 int FixedDiv1_C(int num, int div) {
1081 return (int)((((int64)(num) << 16) - 0x00010001) /
1082 (div - 1));
1083 }
1084
1085 #define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
1086
1087 // Compute slope values for stepping.
ScaleSlope(int src_width,int src_height,int dst_width,int dst_height,enum FilterMode filtering,int * x,int * y,int * dx,int * dy)1088 void ScaleSlope(int src_width, int src_height,
1089 int dst_width, int dst_height,
1090 enum FilterMode filtering,
1091 int* x, int* y, int* dx, int* dy) {
1092 assert(x != NULL);
1093 assert(y != NULL);
1094 assert(dx != NULL);
1095 assert(dy != NULL);
1096 assert(src_width != 0);
1097 assert(src_height != 0);
1098 assert(dst_width > 0);
1099 assert(dst_height > 0);
1100 // Check for 1 pixel and avoid FixedDiv overflow.
1101 if (dst_width == 1 && src_width >= 32768) {
1102 dst_width = src_width;
1103 }
1104 if (dst_height == 1 && src_height >= 32768) {
1105 dst_height = src_height;
1106 }
1107 if (filtering == kFilterBox) {
1108 // Scale step for point sampling duplicates all pixels equally.
1109 *dx = FixedDiv(Abs(src_width), dst_width);
1110 *dy = FixedDiv(src_height, dst_height);
1111 *x = 0;
1112 *y = 0;
1113 } else if (filtering == kFilterBilinear) {
1114 // Scale step for bilinear sampling renders last pixel once for upsample.
1115 if (dst_width <= Abs(src_width)) {
1116 *dx = FixedDiv(Abs(src_width), dst_width);
1117 *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
1118 } else if (dst_width > 1) {
1119 *dx = FixedDiv1(Abs(src_width), dst_width);
1120 *x = 0;
1121 }
1122 if (dst_height <= src_height) {
1123 *dy = FixedDiv(src_height, dst_height);
1124 *y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter.
1125 } else if (dst_height > 1) {
1126 *dy = FixedDiv1(src_height, dst_height);
1127 *y = 0;
1128 }
1129 } else if (filtering == kFilterLinear) {
1130 // Scale step for bilinear sampling renders last pixel once for upsample.
1131 if (dst_width <= Abs(src_width)) {
1132 *dx = FixedDiv(Abs(src_width), dst_width);
1133 *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
1134 } else if (dst_width > 1) {
1135 *dx = FixedDiv1(Abs(src_width), dst_width);
1136 *x = 0;
1137 }
1138 *dy = FixedDiv(src_height, dst_height);
1139 *y = *dy >> 1;
1140 } else {
1141 // Scale step for point sampling duplicates all pixels equally.
1142 *dx = FixedDiv(Abs(src_width), dst_width);
1143 *dy = FixedDiv(src_height, dst_height);
1144 *x = CENTERSTART(*dx, 0);
1145 *y = CENTERSTART(*dy, 0);
1146 }
1147 // Negative src_width means horizontally mirror.
1148 if (src_width < 0) {
1149 *x += (dst_width - 1) * *dx;
1150 *dx = -*dx;
1151 // src_width = -src_width; // Caller must do this.
1152 }
1153 }
1154 #undef CENTERSTART
1155
1156 #ifdef __cplusplus
1157 } // extern "C"
1158 } // namespace libyuv
1159 #endif
1160