1 /*
2 * Copyright 2013 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/scale.h"
12
13 #include <assert.h>
14 #include <string.h>
15
16 #include "libyuv/cpu_id.h"
17 #include "libyuv/planar_functions.h" // For CopyARGB
18 #include "libyuv/row.h"
19 #include "libyuv/scale_row.h"
20
21 #ifdef __cplusplus
22 namespace libyuv {
23 extern "C" {
24 #endif
25
Abs(int v)26 static __inline int Abs(int v) {
27 return v >= 0 ? v : -v;
28 }
29
30 // CPU agnostic row functions
ScaleRowDown2_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)31 void ScaleRowDown2_C(const uint8_t* src_ptr,
32 ptrdiff_t src_stride,
33 uint8_t* dst,
34 int dst_width) {
35 int x;
36 (void)src_stride;
37 for (x = 0; x < dst_width - 1; x += 2) {
38 dst[0] = src_ptr[1];
39 dst[1] = src_ptr[3];
40 dst += 2;
41 src_ptr += 4;
42 }
43 if (dst_width & 1) {
44 dst[0] = src_ptr[1];
45 }
46 }
47
ScaleRowDown2_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)48 void ScaleRowDown2_16_C(const uint16_t* src_ptr,
49 ptrdiff_t src_stride,
50 uint16_t* dst,
51 int dst_width) {
52 int x;
53 (void)src_stride;
54 for (x = 0; x < dst_width - 1; x += 2) {
55 dst[0] = src_ptr[1];
56 dst[1] = src_ptr[3];
57 dst += 2;
58 src_ptr += 4;
59 }
60 if (dst_width & 1) {
61 dst[0] = src_ptr[1];
62 }
63 }
64
ScaleRowDown2Linear_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)65 void ScaleRowDown2Linear_C(const uint8_t* src_ptr,
66 ptrdiff_t src_stride,
67 uint8_t* dst,
68 int dst_width) {
69 const uint8_t* s = src_ptr;
70 int x;
71 (void)src_stride;
72 for (x = 0; x < dst_width - 1; x += 2) {
73 dst[0] = (s[0] + s[1] + 1) >> 1;
74 dst[1] = (s[2] + s[3] + 1) >> 1;
75 dst += 2;
76 s += 4;
77 }
78 if (dst_width & 1) {
79 dst[0] = (s[0] + s[1] + 1) >> 1;
80 }
81 }
82
ScaleRowDown2Linear_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)83 void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr,
84 ptrdiff_t src_stride,
85 uint16_t* dst,
86 int dst_width) {
87 const uint16_t* s = src_ptr;
88 int x;
89 (void)src_stride;
90 for (x = 0; x < dst_width - 1; x += 2) {
91 dst[0] = (s[0] + s[1] + 1) >> 1;
92 dst[1] = (s[2] + s[3] + 1) >> 1;
93 dst += 2;
94 s += 4;
95 }
96 if (dst_width & 1) {
97 dst[0] = (s[0] + s[1] + 1) >> 1;
98 }
99 }
100
ScaleRowDown2Box_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)101 void ScaleRowDown2Box_C(const uint8_t* src_ptr,
102 ptrdiff_t src_stride,
103 uint8_t* dst,
104 int dst_width) {
105 const uint8_t* s = src_ptr;
106 const uint8_t* t = src_ptr + src_stride;
107 int x;
108 for (x = 0; x < dst_width - 1; x += 2) {
109 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
110 dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
111 dst += 2;
112 s += 4;
113 t += 4;
114 }
115 if (dst_width & 1) {
116 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
117 }
118 }
119
ScaleRowDown2Box_Odd_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)120 void ScaleRowDown2Box_Odd_C(const uint8_t* src_ptr,
121 ptrdiff_t src_stride,
122 uint8_t* dst,
123 int dst_width) {
124 const uint8_t* s = src_ptr;
125 const uint8_t* t = src_ptr + src_stride;
126 int x;
127 dst_width -= 1;
128 for (x = 0; x < dst_width - 1; x += 2) {
129 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
130 dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
131 dst += 2;
132 s += 4;
133 t += 4;
134 }
135 if (dst_width & 1) {
136 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
137 dst += 1;
138 s += 2;
139 t += 2;
140 }
141 dst[0] = (s[0] + t[0] + 1) >> 1;
142 }
143
ScaleRowDown2Box_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)144 void ScaleRowDown2Box_16_C(const uint16_t* src_ptr,
145 ptrdiff_t src_stride,
146 uint16_t* dst,
147 int dst_width) {
148 const uint16_t* s = src_ptr;
149 const uint16_t* t = src_ptr + src_stride;
150 int x;
151 for (x = 0; x < dst_width - 1; x += 2) {
152 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
153 dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
154 dst += 2;
155 s += 4;
156 t += 4;
157 }
158 if (dst_width & 1) {
159 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
160 }
161 }
162
ScaleRowDown4_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)163 void ScaleRowDown4_C(const uint8_t* src_ptr,
164 ptrdiff_t src_stride,
165 uint8_t* dst,
166 int dst_width) {
167 int x;
168 (void)src_stride;
169 for (x = 0; x < dst_width - 1; x += 2) {
170 dst[0] = src_ptr[2];
171 dst[1] = src_ptr[6];
172 dst += 2;
173 src_ptr += 8;
174 }
175 if (dst_width & 1) {
176 dst[0] = src_ptr[2];
177 }
178 }
179
ScaleRowDown4_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)180 void ScaleRowDown4_16_C(const uint16_t* src_ptr,
181 ptrdiff_t src_stride,
182 uint16_t* dst,
183 int dst_width) {
184 int x;
185 (void)src_stride;
186 for (x = 0; x < dst_width - 1; x += 2) {
187 dst[0] = src_ptr[2];
188 dst[1] = src_ptr[6];
189 dst += 2;
190 src_ptr += 8;
191 }
192 if (dst_width & 1) {
193 dst[0] = src_ptr[2];
194 }
195 }
196
ScaleRowDown4Box_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)197 void ScaleRowDown4Box_C(const uint8_t* src_ptr,
198 ptrdiff_t src_stride,
199 uint8_t* dst,
200 int dst_width) {
201 intptr_t stride = src_stride;
202 int x;
203 for (x = 0; x < dst_width - 1; x += 2) {
204 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
205 src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
206 src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
207 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
208 src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
209 src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
210 src_ptr[stride * 3 + 3] + 8) >>
211 4;
212 dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
213 src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
214 src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
215 src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
216 src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
217 src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
218 src_ptr[stride * 3 + 7] + 8) >>
219 4;
220 dst += 2;
221 src_ptr += 8;
222 }
223 if (dst_width & 1) {
224 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
225 src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
226 src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
227 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
228 src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
229 src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
230 src_ptr[stride * 3 + 3] + 8) >>
231 4;
232 }
233 }
234
ScaleRowDown4Box_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)235 void ScaleRowDown4Box_16_C(const uint16_t* src_ptr,
236 ptrdiff_t src_stride,
237 uint16_t* dst,
238 int dst_width) {
239 intptr_t stride = src_stride;
240 int x;
241 for (x = 0; x < dst_width - 1; x += 2) {
242 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
243 src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
244 src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
245 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
246 src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
247 src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
248 src_ptr[stride * 3 + 3] + 8) >>
249 4;
250 dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
251 src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
252 src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
253 src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
254 src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
255 src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
256 src_ptr[stride * 3 + 7] + 8) >>
257 4;
258 dst += 2;
259 src_ptr += 8;
260 }
261 if (dst_width & 1) {
262 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
263 src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
264 src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
265 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
266 src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
267 src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
268 src_ptr[stride * 3 + 3] + 8) >>
269 4;
270 }
271 }
272
ScaleRowDown34_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)273 void ScaleRowDown34_C(const uint8_t* src_ptr,
274 ptrdiff_t src_stride,
275 uint8_t* dst,
276 int dst_width) {
277 int x;
278 (void)src_stride;
279 assert((dst_width % 3 == 0) && (dst_width > 0));
280 for (x = 0; x < dst_width; x += 3) {
281 dst[0] = src_ptr[0];
282 dst[1] = src_ptr[1];
283 dst[2] = src_ptr[3];
284 dst += 3;
285 src_ptr += 4;
286 }
287 }
288
ScaleRowDown34_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)289 void ScaleRowDown34_16_C(const uint16_t* src_ptr,
290 ptrdiff_t src_stride,
291 uint16_t* dst,
292 int dst_width) {
293 int x;
294 (void)src_stride;
295 assert((dst_width % 3 == 0) && (dst_width > 0));
296 for (x = 0; x < dst_width; x += 3) {
297 dst[0] = src_ptr[0];
298 dst[1] = src_ptr[1];
299 dst[2] = src_ptr[3];
300 dst += 3;
301 src_ptr += 4;
302 }
303 }
304
305 // Filter rows 0 and 1 together, 3 : 1
ScaleRowDown34_0_Box_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * d,int dst_width)306 void ScaleRowDown34_0_Box_C(const uint8_t* src_ptr,
307 ptrdiff_t src_stride,
308 uint8_t* d,
309 int dst_width) {
310 const uint8_t* s = src_ptr;
311 const uint8_t* t = src_ptr + src_stride;
312 int x;
313 assert((dst_width % 3 == 0) && (dst_width > 0));
314 for (x = 0; x < dst_width; x += 3) {
315 uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
316 uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
317 uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
318 uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
319 uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
320 uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
321 d[0] = (a0 * 3 + b0 + 2) >> 2;
322 d[1] = (a1 * 3 + b1 + 2) >> 2;
323 d[2] = (a2 * 3 + b2 + 2) >> 2;
324 d += 3;
325 s += 4;
326 t += 4;
327 }
328 }
329
ScaleRowDown34_0_Box_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * d,int dst_width)330 void ScaleRowDown34_0_Box_16_C(const uint16_t* src_ptr,
331 ptrdiff_t src_stride,
332 uint16_t* d,
333 int dst_width) {
334 const uint16_t* s = src_ptr;
335 const uint16_t* t = src_ptr + src_stride;
336 int x;
337 assert((dst_width % 3 == 0) && (dst_width > 0));
338 for (x = 0; x < dst_width; x += 3) {
339 uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
340 uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
341 uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
342 uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
343 uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
344 uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
345 d[0] = (a0 * 3 + b0 + 2) >> 2;
346 d[1] = (a1 * 3 + b1 + 2) >> 2;
347 d[2] = (a2 * 3 + b2 + 2) >> 2;
348 d += 3;
349 s += 4;
350 t += 4;
351 }
352 }
353
354 // Filter rows 1 and 2 together, 1 : 1
ScaleRowDown34_1_Box_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * d,int dst_width)355 void ScaleRowDown34_1_Box_C(const uint8_t* src_ptr,
356 ptrdiff_t src_stride,
357 uint8_t* d,
358 int dst_width) {
359 const uint8_t* s = src_ptr;
360 const uint8_t* t = src_ptr + src_stride;
361 int x;
362 assert((dst_width % 3 == 0) && (dst_width > 0));
363 for (x = 0; x < dst_width; x += 3) {
364 uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
365 uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
366 uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
367 uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
368 uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
369 uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
370 d[0] = (a0 + b0 + 1) >> 1;
371 d[1] = (a1 + b1 + 1) >> 1;
372 d[2] = (a2 + b2 + 1) >> 1;
373 d += 3;
374 s += 4;
375 t += 4;
376 }
377 }
378
ScaleRowDown34_1_Box_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * d,int dst_width)379 void ScaleRowDown34_1_Box_16_C(const uint16_t* src_ptr,
380 ptrdiff_t src_stride,
381 uint16_t* d,
382 int dst_width) {
383 const uint16_t* s = src_ptr;
384 const uint16_t* t = src_ptr + src_stride;
385 int x;
386 assert((dst_width % 3 == 0) && (dst_width > 0));
387 for (x = 0; x < dst_width; x += 3) {
388 uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
389 uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
390 uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
391 uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
392 uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
393 uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
394 d[0] = (a0 + b0 + 1) >> 1;
395 d[1] = (a1 + b1 + 1) >> 1;
396 d[2] = (a2 + b2 + 1) >> 1;
397 d += 3;
398 s += 4;
399 t += 4;
400 }
401 }
402
403 // Scales a single row of pixels using point sampling.
ScaleCols_C(uint8_t * dst_ptr,const uint8_t * src_ptr,int dst_width,int x,int dx)404 void ScaleCols_C(uint8_t* dst_ptr,
405 const uint8_t* src_ptr,
406 int dst_width,
407 int x,
408 int dx) {
409 int j;
410 for (j = 0; j < dst_width - 1; j += 2) {
411 dst_ptr[0] = src_ptr[x >> 16];
412 x += dx;
413 dst_ptr[1] = src_ptr[x >> 16];
414 x += dx;
415 dst_ptr += 2;
416 }
417 if (dst_width & 1) {
418 dst_ptr[0] = src_ptr[x >> 16];
419 }
420 }
421
ScaleCols_16_C(uint16_t * dst_ptr,const uint16_t * src_ptr,int dst_width,int x,int dx)422 void ScaleCols_16_C(uint16_t* dst_ptr,
423 const uint16_t* src_ptr,
424 int dst_width,
425 int x,
426 int dx) {
427 int j;
428 for (j = 0; j < dst_width - 1; j += 2) {
429 dst_ptr[0] = src_ptr[x >> 16];
430 x += dx;
431 dst_ptr[1] = src_ptr[x >> 16];
432 x += dx;
433 dst_ptr += 2;
434 }
435 if (dst_width & 1) {
436 dst_ptr[0] = src_ptr[x >> 16];
437 }
438 }
439
440 // Scales a single row of pixels up by 2x using point sampling.
ScaleColsUp2_C(uint8_t * dst_ptr,const uint8_t * src_ptr,int dst_width,int x,int dx)441 void ScaleColsUp2_C(uint8_t* dst_ptr,
442 const uint8_t* src_ptr,
443 int dst_width,
444 int x,
445 int dx) {
446 int j;
447 (void)x;
448 (void)dx;
449 for (j = 0; j < dst_width - 1; j += 2) {
450 dst_ptr[1] = dst_ptr[0] = src_ptr[0];
451 src_ptr += 1;
452 dst_ptr += 2;
453 }
454 if (dst_width & 1) {
455 dst_ptr[0] = src_ptr[0];
456 }
457 }
458
ScaleColsUp2_16_C(uint16_t * dst_ptr,const uint16_t * src_ptr,int dst_width,int x,int dx)459 void ScaleColsUp2_16_C(uint16_t* dst_ptr,
460 const uint16_t* src_ptr,
461 int dst_width,
462 int x,
463 int dx) {
464 int j;
465 (void)x;
466 (void)dx;
467 for (j = 0; j < dst_width - 1; j += 2) {
468 dst_ptr[1] = dst_ptr[0] = src_ptr[0];
469 src_ptr += 1;
470 dst_ptr += 2;
471 }
472 if (dst_width & 1) {
473 dst_ptr[0] = src_ptr[0];
474 }
475 }
476
477 // (1-f)a + fb can be replaced with a + f(b-a)
478 #if defined(__arm__) || defined(__aarch64__)
479 #define BLENDER(a, b, f) \
480 (uint8_t)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
481 #else
482 // Intel uses 7 bit math with rounding.
483 #define BLENDER(a, b, f) \
484 (uint8_t)((int)(a) + (((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7))
485 #endif
486
ScaleFilterCols_C(uint8_t * dst_ptr,const uint8_t * src_ptr,int dst_width,int x,int dx)487 void ScaleFilterCols_C(uint8_t* dst_ptr,
488 const uint8_t* src_ptr,
489 int dst_width,
490 int x,
491 int dx) {
492 int j;
493 for (j = 0; j < dst_width - 1; j += 2) {
494 int xi = x >> 16;
495 int a = src_ptr[xi];
496 int b = src_ptr[xi + 1];
497 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
498 x += dx;
499 xi = x >> 16;
500 a = src_ptr[xi];
501 b = src_ptr[xi + 1];
502 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
503 x += dx;
504 dst_ptr += 2;
505 }
506 if (dst_width & 1) {
507 int xi = x >> 16;
508 int a = src_ptr[xi];
509 int b = src_ptr[xi + 1];
510 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
511 }
512 }
513
ScaleFilterCols64_C(uint8_t * dst_ptr,const uint8_t * src_ptr,int dst_width,int x32,int dx)514 void ScaleFilterCols64_C(uint8_t* dst_ptr,
515 const uint8_t* src_ptr,
516 int dst_width,
517 int x32,
518 int dx) {
519 int64_t x = (int64_t)(x32);
520 int j;
521 for (j = 0; j < dst_width - 1; j += 2) {
522 int64_t xi = x >> 16;
523 int a = src_ptr[xi];
524 int b = src_ptr[xi + 1];
525 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
526 x += dx;
527 xi = x >> 16;
528 a = src_ptr[xi];
529 b = src_ptr[xi + 1];
530 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
531 x += dx;
532 dst_ptr += 2;
533 }
534 if (dst_width & 1) {
535 int64_t xi = x >> 16;
536 int a = src_ptr[xi];
537 int b = src_ptr[xi + 1];
538 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
539 }
540 }
541 #undef BLENDER
542
543 // Same as 8 bit arm blender but return is cast to uint16_t
544 #define BLENDER(a, b, f) \
545 (uint16_t)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
546
ScaleFilterCols_16_C(uint16_t * dst_ptr,const uint16_t * src_ptr,int dst_width,int x,int dx)547 void ScaleFilterCols_16_C(uint16_t* dst_ptr,
548 const uint16_t* src_ptr,
549 int dst_width,
550 int x,
551 int dx) {
552 int j;
553 for (j = 0; j < dst_width - 1; j += 2) {
554 int xi = x >> 16;
555 int a = src_ptr[xi];
556 int b = src_ptr[xi + 1];
557 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
558 x += dx;
559 xi = x >> 16;
560 a = src_ptr[xi];
561 b = src_ptr[xi + 1];
562 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
563 x += dx;
564 dst_ptr += 2;
565 }
566 if (dst_width & 1) {
567 int xi = x >> 16;
568 int a = src_ptr[xi];
569 int b = src_ptr[xi + 1];
570 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
571 }
572 }
573
ScaleFilterCols64_16_C(uint16_t * dst_ptr,const uint16_t * src_ptr,int dst_width,int x32,int dx)574 void ScaleFilterCols64_16_C(uint16_t* dst_ptr,
575 const uint16_t* src_ptr,
576 int dst_width,
577 int x32,
578 int dx) {
579 int64_t x = (int64_t)(x32);
580 int j;
581 for (j = 0; j < dst_width - 1; j += 2) {
582 int64_t xi = x >> 16;
583 int a = src_ptr[xi];
584 int b = src_ptr[xi + 1];
585 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
586 x += dx;
587 xi = x >> 16;
588 a = src_ptr[xi];
589 b = src_ptr[xi + 1];
590 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
591 x += dx;
592 dst_ptr += 2;
593 }
594 if (dst_width & 1) {
595 int64_t xi = x >> 16;
596 int a = src_ptr[xi];
597 int b = src_ptr[xi + 1];
598 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
599 }
600 }
601 #undef BLENDER
602
ScaleRowDown38_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)603 void ScaleRowDown38_C(const uint8_t* src_ptr,
604 ptrdiff_t src_stride,
605 uint8_t* dst,
606 int dst_width) {
607 int x;
608 (void)src_stride;
609 assert(dst_width % 3 == 0);
610 for (x = 0; x < dst_width; x += 3) {
611 dst[0] = src_ptr[0];
612 dst[1] = src_ptr[3];
613 dst[2] = src_ptr[6];
614 dst += 3;
615 src_ptr += 8;
616 }
617 }
618
ScaleRowDown38_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)619 void ScaleRowDown38_16_C(const uint16_t* src_ptr,
620 ptrdiff_t src_stride,
621 uint16_t* dst,
622 int dst_width) {
623 int x;
624 (void)src_stride;
625 assert(dst_width % 3 == 0);
626 for (x = 0; x < dst_width; x += 3) {
627 dst[0] = src_ptr[0];
628 dst[1] = src_ptr[3];
629 dst[2] = src_ptr[6];
630 dst += 3;
631 src_ptr += 8;
632 }
633 }
634
635 // 8x3 -> 3x1
ScaleRowDown38_3_Box_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst_ptr,int dst_width)636 void ScaleRowDown38_3_Box_C(const uint8_t* src_ptr,
637 ptrdiff_t src_stride,
638 uint8_t* dst_ptr,
639 int dst_width) {
640 intptr_t stride = src_stride;
641 int i;
642 assert((dst_width % 3 == 0) && (dst_width > 0));
643 for (i = 0; i < dst_width; i += 3) {
644 dst_ptr[0] =
645 (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
646 src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
647 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
648 (65536 / 9) >>
649 16;
650 dst_ptr[1] =
651 (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
652 src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
653 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
654 (65536 / 9) >>
655 16;
656 dst_ptr[2] =
657 (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
658 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
659 (65536 / 6) >>
660 16;
661 src_ptr += 8;
662 dst_ptr += 3;
663 }
664 }
665
ScaleRowDown38_3_Box_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst_ptr,int dst_width)666 void ScaleRowDown38_3_Box_16_C(const uint16_t* src_ptr,
667 ptrdiff_t src_stride,
668 uint16_t* dst_ptr,
669 int dst_width) {
670 intptr_t stride = src_stride;
671 int i;
672 assert((dst_width % 3 == 0) && (dst_width > 0));
673 for (i = 0; i < dst_width; i += 3) {
674 dst_ptr[0] =
675 (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
676 src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
677 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
678 (65536 / 9) >>
679 16;
680 dst_ptr[1] =
681 (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
682 src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
683 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
684 (65536 / 9) >>
685 16;
686 dst_ptr[2] =
687 (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
688 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
689 (65536 / 6) >>
690 16;
691 src_ptr += 8;
692 dst_ptr += 3;
693 }
694 }
695
696 // 8x2 -> 3x1
ScaleRowDown38_2_Box_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst_ptr,int dst_width)697 void ScaleRowDown38_2_Box_C(const uint8_t* src_ptr,
698 ptrdiff_t src_stride,
699 uint8_t* dst_ptr,
700 int dst_width) {
701 intptr_t stride = src_stride;
702 int i;
703 assert((dst_width % 3 == 0) && (dst_width > 0));
704 for (i = 0; i < dst_width; i += 3) {
705 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
706 src_ptr[stride + 1] + src_ptr[stride + 2]) *
707 (65536 / 6) >>
708 16;
709 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
710 src_ptr[stride + 4] + src_ptr[stride + 5]) *
711 (65536 / 6) >>
712 16;
713 dst_ptr[2] =
714 (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
715 (65536 / 4) >>
716 16;
717 src_ptr += 8;
718 dst_ptr += 3;
719 }
720 }
721
ScaleRowDown38_2_Box_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst_ptr,int dst_width)722 void ScaleRowDown38_2_Box_16_C(const uint16_t* src_ptr,
723 ptrdiff_t src_stride,
724 uint16_t* dst_ptr,
725 int dst_width) {
726 intptr_t stride = src_stride;
727 int i;
728 assert((dst_width % 3 == 0) && (dst_width > 0));
729 for (i = 0; i < dst_width; i += 3) {
730 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
731 src_ptr[stride + 1] + src_ptr[stride + 2]) *
732 (65536 / 6) >>
733 16;
734 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
735 src_ptr[stride + 4] + src_ptr[stride + 5]) *
736 (65536 / 6) >>
737 16;
738 dst_ptr[2] =
739 (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
740 (65536 / 4) >>
741 16;
742 src_ptr += 8;
743 dst_ptr += 3;
744 }
745 }
746
ScaleAddRow_C(const uint8_t * src_ptr,uint16_t * dst_ptr,int src_width)747 void ScaleAddRow_C(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) {
748 int x;
749 assert(src_width > 0);
750 for (x = 0; x < src_width - 1; x += 2) {
751 dst_ptr[0] += src_ptr[0];
752 dst_ptr[1] += src_ptr[1];
753 src_ptr += 2;
754 dst_ptr += 2;
755 }
756 if (src_width & 1) {
757 dst_ptr[0] += src_ptr[0];
758 }
759 }
760
ScaleAddRow_16_C(const uint16_t * src_ptr,uint32_t * dst_ptr,int src_width)761 void ScaleAddRow_16_C(const uint16_t* src_ptr,
762 uint32_t* dst_ptr,
763 int src_width) {
764 int x;
765 assert(src_width > 0);
766 for (x = 0; x < src_width - 1; x += 2) {
767 dst_ptr[0] += src_ptr[0];
768 dst_ptr[1] += src_ptr[1];
769 src_ptr += 2;
770 dst_ptr += 2;
771 }
772 if (src_width & 1) {
773 dst_ptr[0] += src_ptr[0];
774 }
775 }
776
ScaleARGBRowDown2_C(const uint8_t * src_argb,ptrdiff_t src_stride,uint8_t * dst_argb,int dst_width)777 void ScaleARGBRowDown2_C(const uint8_t* src_argb,
778 ptrdiff_t src_stride,
779 uint8_t* dst_argb,
780 int dst_width) {
781 const uint32_t* src = (const uint32_t*)(src_argb);
782 uint32_t* dst = (uint32_t*)(dst_argb);
783 int x;
784 (void)src_stride;
785 for (x = 0; x < dst_width - 1; x += 2) {
786 dst[0] = src[1];
787 dst[1] = src[3];
788 src += 4;
789 dst += 2;
790 }
791 if (dst_width & 1) {
792 dst[0] = src[1];
793 }
794 }
795
ScaleARGBRowDown2Linear_C(const uint8_t * src_argb,ptrdiff_t src_stride,uint8_t * dst_argb,int dst_width)796 void ScaleARGBRowDown2Linear_C(const uint8_t* src_argb,
797 ptrdiff_t src_stride,
798 uint8_t* dst_argb,
799 int dst_width) {
800 int x;
801 (void)src_stride;
802 for (x = 0; x < dst_width; ++x) {
803 dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1;
804 dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1;
805 dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1;
806 dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1;
807 src_argb += 8;
808 dst_argb += 4;
809 }
810 }
811
ScaleARGBRowDown2Box_C(const uint8_t * src_argb,ptrdiff_t src_stride,uint8_t * dst_argb,int dst_width)812 void ScaleARGBRowDown2Box_C(const uint8_t* src_argb,
813 ptrdiff_t src_stride,
814 uint8_t* dst_argb,
815 int dst_width) {
816 int x;
817 for (x = 0; x < dst_width; ++x) {
818 dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] +
819 src_argb[src_stride + 4] + 2) >>
820 2;
821 dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] +
822 src_argb[src_stride + 5] + 2) >>
823 2;
824 dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] +
825 src_argb[src_stride + 6] + 2) >>
826 2;
827 dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] +
828 src_argb[src_stride + 7] + 2) >>
829 2;
830 src_argb += 8;
831 dst_argb += 4;
832 }
833 }
834
ScaleARGBRowDownEven_C(const uint8_t * src_argb,ptrdiff_t src_stride,int src_stepx,uint8_t * dst_argb,int dst_width)835 void ScaleARGBRowDownEven_C(const uint8_t* src_argb,
836 ptrdiff_t src_stride,
837 int src_stepx,
838 uint8_t* dst_argb,
839 int dst_width) {
840 const uint32_t* src = (const uint32_t*)(src_argb);
841 uint32_t* dst = (uint32_t*)(dst_argb);
842 (void)src_stride;
843 int x;
844 for (x = 0; x < dst_width - 1; x += 2) {
845 dst[0] = src[0];
846 dst[1] = src[src_stepx];
847 src += src_stepx * 2;
848 dst += 2;
849 }
850 if (dst_width & 1) {
851 dst[0] = src[0];
852 }
853 }
854
ScaleARGBRowDownEvenBox_C(const uint8_t * src_argb,ptrdiff_t src_stride,int src_stepx,uint8_t * dst_argb,int dst_width)855 void ScaleARGBRowDownEvenBox_C(const uint8_t* src_argb,
856 ptrdiff_t src_stride,
857 int src_stepx,
858 uint8_t* dst_argb,
859 int dst_width) {
860 int x;
861 for (x = 0; x < dst_width; ++x) {
862 dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] +
863 src_argb[src_stride + 4] + 2) >>
864 2;
865 dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] +
866 src_argb[src_stride + 5] + 2) >>
867 2;
868 dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] +
869 src_argb[src_stride + 6] + 2) >>
870 2;
871 dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] +
872 src_argb[src_stride + 7] + 2) >>
873 2;
874 src_argb += src_stepx * 4;
875 dst_argb += 4;
876 }
877 }
878
879 // Scales a single row of pixels using point sampling.
ScaleARGBCols_C(uint8_t * dst_argb,const uint8_t * src_argb,int dst_width,int x,int dx)880 void ScaleARGBCols_C(uint8_t* dst_argb,
881 const uint8_t* src_argb,
882 int dst_width,
883 int x,
884 int dx) {
885 const uint32_t* src = (const uint32_t*)(src_argb);
886 uint32_t* dst = (uint32_t*)(dst_argb);
887 int j;
888 for (j = 0; j < dst_width - 1; j += 2) {
889 dst[0] = src[x >> 16];
890 x += dx;
891 dst[1] = src[x >> 16];
892 x += dx;
893 dst += 2;
894 }
895 if (dst_width & 1) {
896 dst[0] = src[x >> 16];
897 }
898 }
899
ScaleARGBCols64_C(uint8_t * dst_argb,const uint8_t * src_argb,int dst_width,int x32,int dx)900 void ScaleARGBCols64_C(uint8_t* dst_argb,
901 const uint8_t* src_argb,
902 int dst_width,
903 int x32,
904 int dx) {
905 int64_t x = (int64_t)(x32);
906 const uint32_t* src = (const uint32_t*)(src_argb);
907 uint32_t* dst = (uint32_t*)(dst_argb);
908 int j;
909 for (j = 0; j < dst_width - 1; j += 2) {
910 dst[0] = src[x >> 16];
911 x += dx;
912 dst[1] = src[x >> 16];
913 x += dx;
914 dst += 2;
915 }
916 if (dst_width & 1) {
917 dst[0] = src[x >> 16];
918 }
919 }
920
921 // Scales a single row of pixels up by 2x using point sampling.
ScaleARGBColsUp2_C(uint8_t * dst_argb,const uint8_t * src_argb,int dst_width,int x,int dx)922 void ScaleARGBColsUp2_C(uint8_t* dst_argb,
923 const uint8_t* src_argb,
924 int dst_width,
925 int x,
926 int dx) {
927 const uint32_t* src = (const uint32_t*)(src_argb);
928 uint32_t* dst = (uint32_t*)(dst_argb);
929 int j;
930 (void)x;
931 (void)dx;
932 for (j = 0; j < dst_width - 1; j += 2) {
933 dst[1] = dst[0] = src[0];
934 src += 1;
935 dst += 2;
936 }
937 if (dst_width & 1) {
938 dst[0] = src[0];
939 }
940 }
941
942 // TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=607.
943 // Mimics SSSE3 blender
944 #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7
945 #define BLENDERC(a, b, f, s) \
946 (uint32_t)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
947 #define BLENDER(a, b, f) \
948 BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | BLENDERC(a, b, f, 8) | \
949 BLENDERC(a, b, f, 0)
950
ScaleARGBFilterCols_C(uint8_t * dst_argb,const uint8_t * src_argb,int dst_width,int x,int dx)951 void ScaleARGBFilterCols_C(uint8_t* dst_argb,
952 const uint8_t* src_argb,
953 int dst_width,
954 int x,
955 int dx) {
956 const uint32_t* src = (const uint32_t*)(src_argb);
957 uint32_t* dst = (uint32_t*)(dst_argb);
958 int j;
959 for (j = 0; j < dst_width - 1; j += 2) {
960 int xi = x >> 16;
961 int xf = (x >> 9) & 0x7f;
962 uint32_t a = src[xi];
963 uint32_t b = src[xi + 1];
964 dst[0] = BLENDER(a, b, xf);
965 x += dx;
966 xi = x >> 16;
967 xf = (x >> 9) & 0x7f;
968 a = src[xi];
969 b = src[xi + 1];
970 dst[1] = BLENDER(a, b, xf);
971 x += dx;
972 dst += 2;
973 }
974 if (dst_width & 1) {
975 int xi = x >> 16;
976 int xf = (x >> 9) & 0x7f;
977 uint32_t a = src[xi];
978 uint32_t b = src[xi + 1];
979 dst[0] = BLENDER(a, b, xf);
980 }
981 }
982
ScaleARGBFilterCols64_C(uint8_t * dst_argb,const uint8_t * src_argb,int dst_width,int x32,int dx)983 void ScaleARGBFilterCols64_C(uint8_t* dst_argb,
984 const uint8_t* src_argb,
985 int dst_width,
986 int x32,
987 int dx) {
988 int64_t x = (int64_t)(x32);
989 const uint32_t* src = (const uint32_t*)(src_argb);
990 uint32_t* dst = (uint32_t*)(dst_argb);
991 int j;
992 for (j = 0; j < dst_width - 1; j += 2) {
993 int64_t xi = x >> 16;
994 int xf = (x >> 9) & 0x7f;
995 uint32_t a = src[xi];
996 uint32_t b = src[xi + 1];
997 dst[0] = BLENDER(a, b, xf);
998 x += dx;
999 xi = x >> 16;
1000 xf = (x >> 9) & 0x7f;
1001 a = src[xi];
1002 b = src[xi + 1];
1003 dst[1] = BLENDER(a, b, xf);
1004 x += dx;
1005 dst += 2;
1006 }
1007 if (dst_width & 1) {
1008 int64_t xi = x >> 16;
1009 int xf = (x >> 9) & 0x7f;
1010 uint32_t a = src[xi];
1011 uint32_t b = src[xi + 1];
1012 dst[0] = BLENDER(a, b, xf);
1013 }
1014 }
1015 #undef BLENDER1
1016 #undef BLENDERC
1017 #undef BLENDER
1018
1019 // Scale plane vertically with bilinear interpolation.
ScalePlaneVertical(int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_argb,uint8_t * dst_argb,int x,int y,int dy,int bpp,enum FilterMode filtering)1020 void ScalePlaneVertical(int src_height,
1021 int dst_width,
1022 int dst_height,
1023 int src_stride,
1024 int dst_stride,
1025 const uint8_t* src_argb,
1026 uint8_t* dst_argb,
1027 int x,
1028 int y,
1029 int dy,
1030 int bpp,
1031 enum FilterMode filtering) {
1032 // TODO(fbarchard): Allow higher bpp.
1033 int dst_width_bytes = dst_width * bpp;
1034 void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
1035 ptrdiff_t src_stride, int dst_width,
1036 int source_y_fraction) = InterpolateRow_C;
1037 const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
1038 int j;
1039 assert(bpp >= 1 && bpp <= 4);
1040 assert(src_height != 0);
1041 assert(dst_width > 0);
1042 assert(dst_height > 0);
1043 src_argb += (x >> 16) * bpp;
1044 #if defined(HAS_INTERPOLATEROW_SSSE3)
1045 if (TestCpuFlag(kCpuHasSSSE3)) {
1046 InterpolateRow = InterpolateRow_Any_SSSE3;
1047 if (IS_ALIGNED(dst_width_bytes, 16)) {
1048 InterpolateRow = InterpolateRow_SSSE3;
1049 }
1050 }
1051 #endif
1052 #if defined(HAS_INTERPOLATEROW_AVX2)
1053 if (TestCpuFlag(kCpuHasAVX2)) {
1054 InterpolateRow = InterpolateRow_Any_AVX2;
1055 if (IS_ALIGNED(dst_width_bytes, 32)) {
1056 InterpolateRow = InterpolateRow_AVX2;
1057 }
1058 }
1059 #endif
1060 #if defined(HAS_INTERPOLATEROW_NEON)
1061 if (TestCpuFlag(kCpuHasNEON)) {
1062 InterpolateRow = InterpolateRow_Any_NEON;
1063 if (IS_ALIGNED(dst_width_bytes, 16)) {
1064 InterpolateRow = InterpolateRow_NEON;
1065 }
1066 }
1067 #endif
1068 #if defined(HAS_INTERPOLATEROW_MSA)
1069 if (TestCpuFlag(kCpuHasMSA)) {
1070 InterpolateRow = InterpolateRow_Any_MSA;
1071 if (IS_ALIGNED(dst_width_bytes, 32)) {
1072 InterpolateRow = InterpolateRow_MSA;
1073 }
1074 }
1075 #endif
1076 for (j = 0; j < dst_height; ++j) {
1077 int yi;
1078 int yf;
1079 if (y > max_y) {
1080 y = max_y;
1081 }
1082 yi = y >> 16;
1083 yf = filtering ? ((y >> 8) & 255) : 0;
1084 InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
1085 dst_width_bytes, yf);
1086 dst_argb += dst_stride;
1087 y += dy;
1088 }
1089 }
ScalePlaneVertical_16(int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_argb,uint16_t * dst_argb,int x,int y,int dy,int wpp,enum FilterMode filtering)1090 void ScalePlaneVertical_16(int src_height,
1091 int dst_width,
1092 int dst_height,
1093 int src_stride,
1094 int dst_stride,
1095 const uint16_t* src_argb,
1096 uint16_t* dst_argb,
1097 int x,
1098 int y,
1099 int dy,
1100 int wpp,
1101 enum FilterMode filtering) {
1102 // TODO(fbarchard): Allow higher wpp.
1103 int dst_width_words = dst_width * wpp;
1104 void (*InterpolateRow)(uint16_t * dst_argb, const uint16_t* src_argb,
1105 ptrdiff_t src_stride, int dst_width,
1106 int source_y_fraction) = InterpolateRow_16_C;
1107 const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
1108 int j;
1109 assert(wpp >= 1 && wpp <= 2);
1110 assert(src_height != 0);
1111 assert(dst_width > 0);
1112 assert(dst_height > 0);
1113 src_argb += (x >> 16) * wpp;
1114 #if defined(HAS_INTERPOLATEROW_16_SSE2)
1115 if (TestCpuFlag(kCpuHasSSE2)) {
1116 InterpolateRow = InterpolateRow_Any_16_SSE2;
1117 if (IS_ALIGNED(dst_width_bytes, 16)) {
1118 InterpolateRow = InterpolateRow_16_SSE2;
1119 }
1120 }
1121 #endif
1122 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
1123 if (TestCpuFlag(kCpuHasSSSE3)) {
1124 InterpolateRow = InterpolateRow_Any_16_SSSE3;
1125 if (IS_ALIGNED(dst_width_bytes, 16)) {
1126 InterpolateRow = InterpolateRow_16_SSSE3;
1127 }
1128 }
1129 #endif
1130 #if defined(HAS_INTERPOLATEROW_16_AVX2)
1131 if (TestCpuFlag(kCpuHasAVX2)) {
1132 InterpolateRow = InterpolateRow_Any_16_AVX2;
1133 if (IS_ALIGNED(dst_width_bytes, 32)) {
1134 InterpolateRow = InterpolateRow_16_AVX2;
1135 }
1136 }
1137 #endif
1138 #if defined(HAS_INTERPOLATEROW_16_NEON)
1139 if (TestCpuFlag(kCpuHasNEON)) {
1140 InterpolateRow = InterpolateRow_Any_16_NEON;
1141 if (IS_ALIGNED(dst_width_bytes, 16)) {
1142 InterpolateRow = InterpolateRow_16_NEON;
1143 }
1144 }
1145 #endif
1146 for (j = 0; j < dst_height; ++j) {
1147 int yi;
1148 int yf;
1149 if (y > max_y) {
1150 y = max_y;
1151 }
1152 yi = y >> 16;
1153 yf = filtering ? ((y >> 8) & 255) : 0;
1154 InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
1155 dst_width_words, yf);
1156 dst_argb += dst_stride;
1157 y += dy;
1158 }
1159 }
1160
1161 // Simplify the filtering based on scale factors.
ScaleFilterReduce(int src_width,int src_height,int dst_width,int dst_height,enum FilterMode filtering)1162 enum FilterMode ScaleFilterReduce(int src_width,
1163 int src_height,
1164 int dst_width,
1165 int dst_height,
1166 enum FilterMode filtering) {
1167 if (src_width < 0) {
1168 src_width = -src_width;
1169 }
1170 if (src_height < 0) {
1171 src_height = -src_height;
1172 }
1173 if (filtering == kFilterBox) {
1174 // If scaling both axis to 0.5 or larger, switch from Box to Bilinear.
1175 if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) {
1176 filtering = kFilterBilinear;
1177 }
1178 }
1179 if (filtering == kFilterBilinear) {
1180 if (src_height == 1) {
1181 filtering = kFilterLinear;
1182 }
1183 // TODO(fbarchard): Detect any odd scale factor and reduce to Linear.
1184 if (dst_height == src_height || dst_height * 3 == src_height) {
1185 filtering = kFilterLinear;
1186 }
1187 // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to
1188 // avoid reading 2 pixels horizontally that causes memory exception.
1189 if (src_width == 1) {
1190 filtering = kFilterNone;
1191 }
1192 }
1193 if (filtering == kFilterLinear) {
1194 if (src_width == 1) {
1195 filtering = kFilterNone;
1196 }
1197 // TODO(fbarchard): Detect any odd scale factor and reduce to None.
1198 if (dst_width == src_width || dst_width * 3 == src_width) {
1199 filtering = kFilterNone;
1200 }
1201 }
1202 return filtering;
1203 }
1204
1205 // Divide num by div and return as 16.16 fixed point result.
FixedDiv_C(int num,int div)1206 int FixedDiv_C(int num, int div) {
1207 return (int)(((int64_t)(num) << 16) / div);
1208 }
1209
1210 // Divide num by div and return as 16.16 fixed point result.
FixedDiv1_C(int num,int div)1211 int FixedDiv1_C(int num, int div) {
1212 return (int)((((int64_t)(num) << 16) - 0x00010001) / (div - 1));
1213 }
1214
1215 #define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
1216
1217 // Compute slope values for stepping.
ScaleSlope(int src_width,int src_height,int dst_width,int dst_height,enum FilterMode filtering,int * x,int * y,int * dx,int * dy)1218 void ScaleSlope(int src_width,
1219 int src_height,
1220 int dst_width,
1221 int dst_height,
1222 enum FilterMode filtering,
1223 int* x,
1224 int* y,
1225 int* dx,
1226 int* dy) {
1227 assert(x != NULL);
1228 assert(y != NULL);
1229 assert(dx != NULL);
1230 assert(dy != NULL);
1231 assert(src_width != 0);
1232 assert(src_height != 0);
1233 assert(dst_width > 0);
1234 assert(dst_height > 0);
1235 // Check for 1 pixel and avoid FixedDiv overflow.
1236 if (dst_width == 1 && src_width >= 32768) {
1237 dst_width = src_width;
1238 }
1239 if (dst_height == 1 && src_height >= 32768) {
1240 dst_height = src_height;
1241 }
1242 if (filtering == kFilterBox) {
1243 // Scale step for point sampling duplicates all pixels equally.
1244 *dx = FixedDiv(Abs(src_width), dst_width);
1245 *dy = FixedDiv(src_height, dst_height);
1246 *x = 0;
1247 *y = 0;
1248 } else if (filtering == kFilterBilinear) {
1249 // Scale step for bilinear sampling renders last pixel once for upsample.
1250 if (dst_width <= Abs(src_width)) {
1251 *dx = FixedDiv(Abs(src_width), dst_width);
1252 *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
1253 } else if (dst_width > 1) {
1254 *dx = FixedDiv1(Abs(src_width), dst_width);
1255 *x = 0;
1256 }
1257 if (dst_height <= src_height) {
1258 *dy = FixedDiv(src_height, dst_height);
1259 *y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter.
1260 } else if (dst_height > 1) {
1261 *dy = FixedDiv1(src_height, dst_height);
1262 *y = 0;
1263 }
1264 } else if (filtering == kFilterLinear) {
1265 // Scale step for bilinear sampling renders last pixel once for upsample.
1266 if (dst_width <= Abs(src_width)) {
1267 *dx = FixedDiv(Abs(src_width), dst_width);
1268 *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
1269 } else if (dst_width > 1) {
1270 *dx = FixedDiv1(Abs(src_width), dst_width);
1271 *x = 0;
1272 }
1273 *dy = FixedDiv(src_height, dst_height);
1274 *y = *dy >> 1;
1275 } else {
1276 // Scale step for point sampling duplicates all pixels equally.
1277 *dx = FixedDiv(Abs(src_width), dst_width);
1278 *dy = FixedDiv(src_height, dst_height);
1279 *x = CENTERSTART(*dx, 0);
1280 *y = CENTERSTART(*dy, 0);
1281 }
1282 // Negative src_width means horizontally mirror.
1283 if (src_width < 0) {
1284 *x += (dst_width - 1) * *dx;
1285 *dx = -*dx;
1286 // src_width = -src_width; // Caller must do this.
1287 }
1288 }
1289 #undef CENTERSTART
1290
1291 // Read 8x2 upsample with filtering and write 16x1.
1292 // actually reads an extra pixel, so 9x2.
ScaleRowUp2_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)1293 void ScaleRowUp2_16_C(const uint16_t* src_ptr,
1294 ptrdiff_t src_stride,
1295 uint16_t* dst,
1296 int dst_width) {
1297 const uint16_t* src2 = src_ptr + src_stride;
1298
1299 int x;
1300 for (x = 0; x < dst_width - 1; x += 2) {
1301 uint16_t p0 = src_ptr[0];
1302 uint16_t p1 = src_ptr[1];
1303 uint16_t p2 = src2[0];
1304 uint16_t p3 = src2[1];
1305 dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4;
1306 dst[1] = (p0 * 3 + p1 * 9 + p2 + p3 * 3 + 8) >> 4;
1307 ++src_ptr;
1308 ++src2;
1309 dst += 2;
1310 }
1311 if (dst_width & 1) {
1312 uint16_t p0 = src_ptr[0];
1313 uint16_t p1 = src_ptr[1];
1314 uint16_t p2 = src2[0];
1315 uint16_t p3 = src2[1];
1316 dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4;
1317 }
1318 }
1319
1320 #ifdef __cplusplus
1321 } // extern "C"
1322 } // namespace libyuv
1323 #endif
1324