1 /*
2 * Copyright 2013 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/scale.h"
12
13 #include <assert.h>
14 #include <string.h>
15
16 #include "libyuv/cpu_id.h"
17 #include "libyuv/planar_functions.h" // For CopyARGB
18 #include "libyuv/row.h"
19 #include "libyuv/scale_row.h"
20
21 #ifdef __cplusplus
22 namespace libyuv {
23 extern "C" {
24 #endif
25
Abs(int v)26 static __inline int Abs(int v) {
27 return v >= 0 ? v : -v;
28 }
29
30 // CPU agnostic row functions
ScaleRowDown2_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)31 void ScaleRowDown2_C(const uint8_t* src_ptr,
32 ptrdiff_t src_stride,
33 uint8_t* dst,
34 int dst_width) {
35 int x;
36 (void)src_stride;
37 for (x = 0; x < dst_width - 1; x += 2) {
38 dst[0] = src_ptr[1];
39 dst[1] = src_ptr[3];
40 dst += 2;
41 src_ptr += 4;
42 }
43 if (dst_width & 1) {
44 dst[0] = src_ptr[1];
45 }
46 }
47
ScaleRowDown2_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)48 void ScaleRowDown2_16_C(const uint16_t* src_ptr,
49 ptrdiff_t src_stride,
50 uint16_t* dst,
51 int dst_width) {
52 int x;
53 (void)src_stride;
54 for (x = 0; x < dst_width - 1; x += 2) {
55 dst[0] = src_ptr[1];
56 dst[1] = src_ptr[3];
57 dst += 2;
58 src_ptr += 4;
59 }
60 if (dst_width & 1) {
61 dst[0] = src_ptr[1];
62 }
63 }
64
ScaleRowDown2Linear_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)65 void ScaleRowDown2Linear_C(const uint8_t* src_ptr,
66 ptrdiff_t src_stride,
67 uint8_t* dst,
68 int dst_width) {
69 const uint8_t* s = src_ptr;
70 int x;
71 (void)src_stride;
72 for (x = 0; x < dst_width - 1; x += 2) {
73 dst[0] = (s[0] + s[1] + 1) >> 1;
74 dst[1] = (s[2] + s[3] + 1) >> 1;
75 dst += 2;
76 s += 4;
77 }
78 if (dst_width & 1) {
79 dst[0] = (s[0] + s[1] + 1) >> 1;
80 }
81 }
82
ScaleRowDown2Linear_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)83 void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr,
84 ptrdiff_t src_stride,
85 uint16_t* dst,
86 int dst_width) {
87 const uint16_t* s = src_ptr;
88 int x;
89 (void)src_stride;
90 for (x = 0; x < dst_width - 1; x += 2) {
91 dst[0] = (s[0] + s[1] + 1) >> 1;
92 dst[1] = (s[2] + s[3] + 1) >> 1;
93 dst += 2;
94 s += 4;
95 }
96 if (dst_width & 1) {
97 dst[0] = (s[0] + s[1] + 1) >> 1;
98 }
99 }
100
ScaleRowDown2Box_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)101 void ScaleRowDown2Box_C(const uint8_t* src_ptr,
102 ptrdiff_t src_stride,
103 uint8_t* dst,
104 int dst_width) {
105 const uint8_t* s = src_ptr;
106 const uint8_t* t = src_ptr + src_stride;
107 int x;
108 for (x = 0; x < dst_width - 1; x += 2) {
109 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
110 dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
111 dst += 2;
112 s += 4;
113 t += 4;
114 }
115 if (dst_width & 1) {
116 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
117 }
118 }
119
ScaleRowDown2Box_Odd_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)120 void ScaleRowDown2Box_Odd_C(const uint8_t* src_ptr,
121 ptrdiff_t src_stride,
122 uint8_t* dst,
123 int dst_width) {
124 const uint8_t* s = src_ptr;
125 const uint8_t* t = src_ptr + src_stride;
126 int x;
127 dst_width -= 1;
128 for (x = 0; x < dst_width - 1; x += 2) {
129 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
130 dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
131 dst += 2;
132 s += 4;
133 t += 4;
134 }
135 if (dst_width & 1) {
136 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
137 dst += 1;
138 s += 2;
139 t += 2;
140 }
141 dst[0] = (s[0] + t[0] + 1) >> 1;
142 }
143
ScaleRowDown2Box_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)144 void ScaleRowDown2Box_16_C(const uint16_t* src_ptr,
145 ptrdiff_t src_stride,
146 uint16_t* dst,
147 int dst_width) {
148 const uint16_t* s = src_ptr;
149 const uint16_t* t = src_ptr + src_stride;
150 int x;
151 for (x = 0; x < dst_width - 1; x += 2) {
152 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
153 dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
154 dst += 2;
155 s += 4;
156 t += 4;
157 }
158 if (dst_width & 1) {
159 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
160 }
161 }
162
ScaleRowDown4_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)163 void ScaleRowDown4_C(const uint8_t* src_ptr,
164 ptrdiff_t src_stride,
165 uint8_t* dst,
166 int dst_width) {
167 int x;
168 (void)src_stride;
169 for (x = 0; x < dst_width - 1; x += 2) {
170 dst[0] = src_ptr[2];
171 dst[1] = src_ptr[6];
172 dst += 2;
173 src_ptr += 8;
174 }
175 if (dst_width & 1) {
176 dst[0] = src_ptr[2];
177 }
178 }
179
ScaleRowDown4_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)180 void ScaleRowDown4_16_C(const uint16_t* src_ptr,
181 ptrdiff_t src_stride,
182 uint16_t* dst,
183 int dst_width) {
184 int x;
185 (void)src_stride;
186 for (x = 0; x < dst_width - 1; x += 2) {
187 dst[0] = src_ptr[2];
188 dst[1] = src_ptr[6];
189 dst += 2;
190 src_ptr += 8;
191 }
192 if (dst_width & 1) {
193 dst[0] = src_ptr[2];
194 }
195 }
196
ScaleRowDown4Box_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)197 void ScaleRowDown4Box_C(const uint8_t* src_ptr,
198 ptrdiff_t src_stride,
199 uint8_t* dst,
200 int dst_width) {
201 intptr_t stride = src_stride;
202 int x;
203 for (x = 0; x < dst_width - 1; x += 2) {
204 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
205 src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
206 src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
207 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
208 src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
209 src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
210 src_ptr[stride * 3 + 3] + 8) >>
211 4;
212 dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
213 src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
214 src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
215 src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
216 src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
217 src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
218 src_ptr[stride * 3 + 7] + 8) >>
219 4;
220 dst += 2;
221 src_ptr += 8;
222 }
223 if (dst_width & 1) {
224 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
225 src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
226 src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
227 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
228 src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
229 src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
230 src_ptr[stride * 3 + 3] + 8) >>
231 4;
232 }
233 }
234
ScaleRowDown4Box_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)235 void ScaleRowDown4Box_16_C(const uint16_t* src_ptr,
236 ptrdiff_t src_stride,
237 uint16_t* dst,
238 int dst_width) {
239 intptr_t stride = src_stride;
240 int x;
241 for (x = 0; x < dst_width - 1; x += 2) {
242 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
243 src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
244 src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
245 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
246 src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
247 src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
248 src_ptr[stride * 3 + 3] + 8) >>
249 4;
250 dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
251 src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
252 src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
253 src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
254 src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
255 src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
256 src_ptr[stride * 3 + 7] + 8) >>
257 4;
258 dst += 2;
259 src_ptr += 8;
260 }
261 if (dst_width & 1) {
262 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
263 src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
264 src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
265 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
266 src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
267 src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
268 src_ptr[stride * 3 + 3] + 8) >>
269 4;
270 }
271 }
272
ScaleRowDown34_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)273 void ScaleRowDown34_C(const uint8_t* src_ptr,
274 ptrdiff_t src_stride,
275 uint8_t* dst,
276 int dst_width) {
277 int x;
278 (void)src_stride;
279 assert((dst_width % 3 == 0) && (dst_width > 0));
280 for (x = 0; x < dst_width; x += 3) {
281 dst[0] = src_ptr[0];
282 dst[1] = src_ptr[1];
283 dst[2] = src_ptr[3];
284 dst += 3;
285 src_ptr += 4;
286 }
287 }
288
ScaleRowDown34_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)289 void ScaleRowDown34_16_C(const uint16_t* src_ptr,
290 ptrdiff_t src_stride,
291 uint16_t* dst,
292 int dst_width) {
293 int x;
294 (void)src_stride;
295 assert((dst_width % 3 == 0) && (dst_width > 0));
296 for (x = 0; x < dst_width; x += 3) {
297 dst[0] = src_ptr[0];
298 dst[1] = src_ptr[1];
299 dst[2] = src_ptr[3];
300 dst += 3;
301 src_ptr += 4;
302 }
303 }
304
305 // Filter rows 0 and 1 together, 3 : 1
ScaleRowDown34_0_Box_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * d,int dst_width)306 void ScaleRowDown34_0_Box_C(const uint8_t* src_ptr,
307 ptrdiff_t src_stride,
308 uint8_t* d,
309 int dst_width) {
310 const uint8_t* s = src_ptr;
311 const uint8_t* t = src_ptr + src_stride;
312 int x;
313 assert((dst_width % 3 == 0) && (dst_width > 0));
314 for (x = 0; x < dst_width; x += 3) {
315 uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
316 uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
317 uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
318 uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
319 uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
320 uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
321 d[0] = (a0 * 3 + b0 + 2) >> 2;
322 d[1] = (a1 * 3 + b1 + 2) >> 2;
323 d[2] = (a2 * 3 + b2 + 2) >> 2;
324 d += 3;
325 s += 4;
326 t += 4;
327 }
328 }
329
ScaleRowDown34_0_Box_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * d,int dst_width)330 void ScaleRowDown34_0_Box_16_C(const uint16_t* src_ptr,
331 ptrdiff_t src_stride,
332 uint16_t* d,
333 int dst_width) {
334 const uint16_t* s = src_ptr;
335 const uint16_t* t = src_ptr + src_stride;
336 int x;
337 assert((dst_width % 3 == 0) && (dst_width > 0));
338 for (x = 0; x < dst_width; x += 3) {
339 uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
340 uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
341 uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
342 uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
343 uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
344 uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
345 d[0] = (a0 * 3 + b0 + 2) >> 2;
346 d[1] = (a1 * 3 + b1 + 2) >> 2;
347 d[2] = (a2 * 3 + b2 + 2) >> 2;
348 d += 3;
349 s += 4;
350 t += 4;
351 }
352 }
353
354 // Filter rows 1 and 2 together, 1 : 1
ScaleRowDown34_1_Box_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * d,int dst_width)355 void ScaleRowDown34_1_Box_C(const uint8_t* src_ptr,
356 ptrdiff_t src_stride,
357 uint8_t* d,
358 int dst_width) {
359 const uint8_t* s = src_ptr;
360 const uint8_t* t = src_ptr + src_stride;
361 int x;
362 assert((dst_width % 3 == 0) && (dst_width > 0));
363 for (x = 0; x < dst_width; x += 3) {
364 uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
365 uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
366 uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
367 uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
368 uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
369 uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
370 d[0] = (a0 + b0 + 1) >> 1;
371 d[1] = (a1 + b1 + 1) >> 1;
372 d[2] = (a2 + b2 + 1) >> 1;
373 d += 3;
374 s += 4;
375 t += 4;
376 }
377 }
378
ScaleRowDown34_1_Box_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * d,int dst_width)379 void ScaleRowDown34_1_Box_16_C(const uint16_t* src_ptr,
380 ptrdiff_t src_stride,
381 uint16_t* d,
382 int dst_width) {
383 const uint16_t* s = src_ptr;
384 const uint16_t* t = src_ptr + src_stride;
385 int x;
386 assert((dst_width % 3 == 0) && (dst_width > 0));
387 for (x = 0; x < dst_width; x += 3) {
388 uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
389 uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
390 uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
391 uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
392 uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
393 uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
394 d[0] = (a0 + b0 + 1) >> 1;
395 d[1] = (a1 + b1 + 1) >> 1;
396 d[2] = (a2 + b2 + 1) >> 1;
397 d += 3;
398 s += 4;
399 t += 4;
400 }
401 }
402
403 // Scales a single row of pixels using point sampling.
ScaleCols_C(uint8_t * dst_ptr,const uint8_t * src_ptr,int dst_width,int x,int dx)404 void ScaleCols_C(uint8_t* dst_ptr,
405 const uint8_t* src_ptr,
406 int dst_width,
407 int x,
408 int dx) {
409 int j;
410 for (j = 0; j < dst_width - 1; j += 2) {
411 dst_ptr[0] = src_ptr[x >> 16];
412 x += dx;
413 dst_ptr[1] = src_ptr[x >> 16];
414 x += dx;
415 dst_ptr += 2;
416 }
417 if (dst_width & 1) {
418 dst_ptr[0] = src_ptr[x >> 16];
419 }
420 }
421
ScaleCols_16_C(uint16_t * dst_ptr,const uint16_t * src_ptr,int dst_width,int x,int dx)422 void ScaleCols_16_C(uint16_t* dst_ptr,
423 const uint16_t* src_ptr,
424 int dst_width,
425 int x,
426 int dx) {
427 int j;
428 for (j = 0; j < dst_width - 1; j += 2) {
429 dst_ptr[0] = src_ptr[x >> 16];
430 x += dx;
431 dst_ptr[1] = src_ptr[x >> 16];
432 x += dx;
433 dst_ptr += 2;
434 }
435 if (dst_width & 1) {
436 dst_ptr[0] = src_ptr[x >> 16];
437 }
438 }
439
440 // Scales a single row of pixels up by 2x using point sampling.
ScaleColsUp2_C(uint8_t * dst_ptr,const uint8_t * src_ptr,int dst_width,int x,int dx)441 void ScaleColsUp2_C(uint8_t* dst_ptr,
442 const uint8_t* src_ptr,
443 int dst_width,
444 int x,
445 int dx) {
446 int j;
447 (void)x;
448 (void)dx;
449 for (j = 0; j < dst_width - 1; j += 2) {
450 dst_ptr[1] = dst_ptr[0] = src_ptr[0];
451 src_ptr += 1;
452 dst_ptr += 2;
453 }
454 if (dst_width & 1) {
455 dst_ptr[0] = src_ptr[0];
456 }
457 }
458
ScaleColsUp2_16_C(uint16_t * dst_ptr,const uint16_t * src_ptr,int dst_width,int x,int dx)459 void ScaleColsUp2_16_C(uint16_t* dst_ptr,
460 const uint16_t* src_ptr,
461 int dst_width,
462 int x,
463 int dx) {
464 int j;
465 (void)x;
466 (void)dx;
467 for (j = 0; j < dst_width - 1; j += 2) {
468 dst_ptr[1] = dst_ptr[0] = src_ptr[0];
469 src_ptr += 1;
470 dst_ptr += 2;
471 }
472 if (dst_width & 1) {
473 dst_ptr[0] = src_ptr[0];
474 }
475 }
476
477 // (1-f)a + fb can be replaced with a + f(b-a)
478 #if defined(__arm__) || defined(__aarch64__)
479 #define BLENDER(a, b, f) \
480 (uint8_t)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
481 #else
482 // Intel uses 7 bit math with rounding.
483 #define BLENDER(a, b, f) \
484 (uint8_t)((int)(a) + (((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7))
485 #endif
486
ScaleFilterCols_C(uint8_t * dst_ptr,const uint8_t * src_ptr,int dst_width,int x,int dx)487 void ScaleFilterCols_C(uint8_t* dst_ptr,
488 const uint8_t* src_ptr,
489 int dst_width,
490 int x,
491 int dx) {
492 int j;
493 for (j = 0; j < dst_width - 1; j += 2) {
494 int xi = x >> 16;
495 int a = src_ptr[xi];
496 int b = src_ptr[xi + 1];
497 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
498 x += dx;
499 xi = x >> 16;
500 a = src_ptr[xi];
501 b = src_ptr[xi + 1];
502 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
503 x += dx;
504 dst_ptr += 2;
505 }
506 if (dst_width & 1) {
507 int xi = x >> 16;
508 int a = src_ptr[xi];
509 int b = src_ptr[xi + 1];
510 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
511 }
512 }
513
ScaleFilterCols64_C(uint8_t * dst_ptr,const uint8_t * src_ptr,int dst_width,int x32,int dx)514 void ScaleFilterCols64_C(uint8_t* dst_ptr,
515 const uint8_t* src_ptr,
516 int dst_width,
517 int x32,
518 int dx) {
519 int64_t x = (int64_t)(x32);
520 int j;
521 for (j = 0; j < dst_width - 1; j += 2) {
522 int64_t xi = x >> 16;
523 int a = src_ptr[xi];
524 int b = src_ptr[xi + 1];
525 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
526 x += dx;
527 xi = x >> 16;
528 a = src_ptr[xi];
529 b = src_ptr[xi + 1];
530 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
531 x += dx;
532 dst_ptr += 2;
533 }
534 if (dst_width & 1) {
535 int64_t xi = x >> 16;
536 int a = src_ptr[xi];
537 int b = src_ptr[xi + 1];
538 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
539 }
540 }
541 #undef BLENDER
542
543 // Same as 8 bit arm blender but return is cast to uint16_t
544 #define BLENDER(a, b, f) \
545 (uint16_t)( \
546 (int)(a) + \
547 (int)((((int64_t)((f)) * ((int64_t)(b) - (int)(a))) + 0x8000) >> 16))
548
ScaleFilterCols_16_C(uint16_t * dst_ptr,const uint16_t * src_ptr,int dst_width,int x,int dx)549 void ScaleFilterCols_16_C(uint16_t* dst_ptr,
550 const uint16_t* src_ptr,
551 int dst_width,
552 int x,
553 int dx) {
554 int j;
555 for (j = 0; j < dst_width - 1; j += 2) {
556 int xi = x >> 16;
557 int a = src_ptr[xi];
558 int b = src_ptr[xi + 1];
559 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
560 x += dx;
561 xi = x >> 16;
562 a = src_ptr[xi];
563 b = src_ptr[xi + 1];
564 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
565 x += dx;
566 dst_ptr += 2;
567 }
568 if (dst_width & 1) {
569 int xi = x >> 16;
570 int a = src_ptr[xi];
571 int b = src_ptr[xi + 1];
572 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
573 }
574 }
575
ScaleFilterCols64_16_C(uint16_t * dst_ptr,const uint16_t * src_ptr,int dst_width,int x32,int dx)576 void ScaleFilterCols64_16_C(uint16_t* dst_ptr,
577 const uint16_t* src_ptr,
578 int dst_width,
579 int x32,
580 int dx) {
581 int64_t x = (int64_t)(x32);
582 int j;
583 for (j = 0; j < dst_width - 1; j += 2) {
584 int64_t xi = x >> 16;
585 int a = src_ptr[xi];
586 int b = src_ptr[xi + 1];
587 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
588 x += dx;
589 xi = x >> 16;
590 a = src_ptr[xi];
591 b = src_ptr[xi + 1];
592 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
593 x += dx;
594 dst_ptr += 2;
595 }
596 if (dst_width & 1) {
597 int64_t xi = x >> 16;
598 int a = src_ptr[xi];
599 int b = src_ptr[xi + 1];
600 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
601 }
602 }
603 #undef BLENDER
604
ScaleRowDown38_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)605 void ScaleRowDown38_C(const uint8_t* src_ptr,
606 ptrdiff_t src_stride,
607 uint8_t* dst,
608 int dst_width) {
609 int x;
610 (void)src_stride;
611 assert(dst_width % 3 == 0);
612 for (x = 0; x < dst_width; x += 3) {
613 dst[0] = src_ptr[0];
614 dst[1] = src_ptr[3];
615 dst[2] = src_ptr[6];
616 dst += 3;
617 src_ptr += 8;
618 }
619 }
620
ScaleRowDown38_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)621 void ScaleRowDown38_16_C(const uint16_t* src_ptr,
622 ptrdiff_t src_stride,
623 uint16_t* dst,
624 int dst_width) {
625 int x;
626 (void)src_stride;
627 assert(dst_width % 3 == 0);
628 for (x = 0; x < dst_width; x += 3) {
629 dst[0] = src_ptr[0];
630 dst[1] = src_ptr[3];
631 dst[2] = src_ptr[6];
632 dst += 3;
633 src_ptr += 8;
634 }
635 }
636
637 // 8x3 -> 3x1
ScaleRowDown38_3_Box_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst_ptr,int dst_width)638 void ScaleRowDown38_3_Box_C(const uint8_t* src_ptr,
639 ptrdiff_t src_stride,
640 uint8_t* dst_ptr,
641 int dst_width) {
642 intptr_t stride = src_stride;
643 int i;
644 assert((dst_width % 3 == 0) && (dst_width > 0));
645 for (i = 0; i < dst_width; i += 3) {
646 dst_ptr[0] =
647 (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
648 src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
649 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
650 (65536 / 9) >>
651 16;
652 dst_ptr[1] =
653 (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
654 src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
655 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
656 (65536 / 9) >>
657 16;
658 dst_ptr[2] =
659 (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
660 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
661 (65536 / 6) >>
662 16;
663 src_ptr += 8;
664 dst_ptr += 3;
665 }
666 }
667
ScaleRowDown38_3_Box_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst_ptr,int dst_width)668 void ScaleRowDown38_3_Box_16_C(const uint16_t* src_ptr,
669 ptrdiff_t src_stride,
670 uint16_t* dst_ptr,
671 int dst_width) {
672 intptr_t stride = src_stride;
673 int i;
674 assert((dst_width % 3 == 0) && (dst_width > 0));
675 for (i = 0; i < dst_width; i += 3) {
676 dst_ptr[0] =
677 (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
678 src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
679 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
680 (65536 / 9) >>
681 16;
682 dst_ptr[1] =
683 (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
684 src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
685 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
686 (65536 / 9) >>
687 16;
688 dst_ptr[2] =
689 (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
690 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
691 (65536 / 6) >>
692 16;
693 src_ptr += 8;
694 dst_ptr += 3;
695 }
696 }
697
698 // 8x2 -> 3x1
ScaleRowDown38_2_Box_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst_ptr,int dst_width)699 void ScaleRowDown38_2_Box_C(const uint8_t* src_ptr,
700 ptrdiff_t src_stride,
701 uint8_t* dst_ptr,
702 int dst_width) {
703 intptr_t stride = src_stride;
704 int i;
705 assert((dst_width % 3 == 0) && (dst_width > 0));
706 for (i = 0; i < dst_width; i += 3) {
707 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
708 src_ptr[stride + 1] + src_ptr[stride + 2]) *
709 (65536 / 6) >>
710 16;
711 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
712 src_ptr[stride + 4] + src_ptr[stride + 5]) *
713 (65536 / 6) >>
714 16;
715 dst_ptr[2] =
716 (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
717 (65536 / 4) >>
718 16;
719 src_ptr += 8;
720 dst_ptr += 3;
721 }
722 }
723
ScaleRowDown38_2_Box_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst_ptr,int dst_width)724 void ScaleRowDown38_2_Box_16_C(const uint16_t* src_ptr,
725 ptrdiff_t src_stride,
726 uint16_t* dst_ptr,
727 int dst_width) {
728 intptr_t stride = src_stride;
729 int i;
730 assert((dst_width % 3 == 0) && (dst_width > 0));
731 for (i = 0; i < dst_width; i += 3) {
732 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
733 src_ptr[stride + 1] + src_ptr[stride + 2]) *
734 (65536 / 6) >>
735 16;
736 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
737 src_ptr[stride + 4] + src_ptr[stride + 5]) *
738 (65536 / 6) >>
739 16;
740 dst_ptr[2] =
741 (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
742 (65536 / 4) >>
743 16;
744 src_ptr += 8;
745 dst_ptr += 3;
746 }
747 }
748
ScaleAddRow_C(const uint8_t * src_ptr,uint16_t * dst_ptr,int src_width)749 void ScaleAddRow_C(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) {
750 int x;
751 assert(src_width > 0);
752 for (x = 0; x < src_width - 1; x += 2) {
753 dst_ptr[0] += src_ptr[0];
754 dst_ptr[1] += src_ptr[1];
755 src_ptr += 2;
756 dst_ptr += 2;
757 }
758 if (src_width & 1) {
759 dst_ptr[0] += src_ptr[0];
760 }
761 }
762
ScaleAddRow_16_C(const uint16_t * src_ptr,uint32_t * dst_ptr,int src_width)763 void ScaleAddRow_16_C(const uint16_t* src_ptr,
764 uint32_t* dst_ptr,
765 int src_width) {
766 int x;
767 assert(src_width > 0);
768 for (x = 0; x < src_width - 1; x += 2) {
769 dst_ptr[0] += src_ptr[0];
770 dst_ptr[1] += src_ptr[1];
771 src_ptr += 2;
772 dst_ptr += 2;
773 }
774 if (src_width & 1) {
775 dst_ptr[0] += src_ptr[0];
776 }
777 }
778
779 // ARGB scale row functions
780
ScaleARGBRowDown2_C(const uint8_t * src_argb,ptrdiff_t src_stride,uint8_t * dst_argb,int dst_width)781 void ScaleARGBRowDown2_C(const uint8_t* src_argb,
782 ptrdiff_t src_stride,
783 uint8_t* dst_argb,
784 int dst_width) {
785 const uint32_t* src = (const uint32_t*)(src_argb);
786 uint32_t* dst = (uint32_t*)(dst_argb);
787 int x;
788 (void)src_stride;
789 for (x = 0; x < dst_width - 1; x += 2) {
790 dst[0] = src[1];
791 dst[1] = src[3];
792 src += 4;
793 dst += 2;
794 }
795 if (dst_width & 1) {
796 dst[0] = src[1];
797 }
798 }
799
ScaleARGBRowDown2Linear_C(const uint8_t * src_argb,ptrdiff_t src_stride,uint8_t * dst_argb,int dst_width)800 void ScaleARGBRowDown2Linear_C(const uint8_t* src_argb,
801 ptrdiff_t src_stride,
802 uint8_t* dst_argb,
803 int dst_width) {
804 int x;
805 (void)src_stride;
806 for (x = 0; x < dst_width; ++x) {
807 dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1;
808 dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1;
809 dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1;
810 dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1;
811 src_argb += 8;
812 dst_argb += 4;
813 }
814 }
815
ScaleARGBRowDown2Box_C(const uint8_t * src_argb,ptrdiff_t src_stride,uint8_t * dst_argb,int dst_width)816 void ScaleARGBRowDown2Box_C(const uint8_t* src_argb,
817 ptrdiff_t src_stride,
818 uint8_t* dst_argb,
819 int dst_width) {
820 int x;
821 for (x = 0; x < dst_width; ++x) {
822 dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] +
823 src_argb[src_stride + 4] + 2) >>
824 2;
825 dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] +
826 src_argb[src_stride + 5] + 2) >>
827 2;
828 dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] +
829 src_argb[src_stride + 6] + 2) >>
830 2;
831 dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] +
832 src_argb[src_stride + 7] + 2) >>
833 2;
834 src_argb += 8;
835 dst_argb += 4;
836 }
837 }
838
ScaleARGBRowDownEven_C(const uint8_t * src_argb,ptrdiff_t src_stride,int src_stepx,uint8_t * dst_argb,int dst_width)839 void ScaleARGBRowDownEven_C(const uint8_t* src_argb,
840 ptrdiff_t src_stride,
841 int src_stepx,
842 uint8_t* dst_argb,
843 int dst_width) {
844 const uint32_t* src = (const uint32_t*)(src_argb);
845 uint32_t* dst = (uint32_t*)(dst_argb);
846 (void)src_stride;
847 int x;
848 for (x = 0; x < dst_width - 1; x += 2) {
849 dst[0] = src[0];
850 dst[1] = src[src_stepx];
851 src += src_stepx * 2;
852 dst += 2;
853 }
854 if (dst_width & 1) {
855 dst[0] = src[0];
856 }
857 }
858
ScaleARGBRowDownEvenBox_C(const uint8_t * src_argb,ptrdiff_t src_stride,int src_stepx,uint8_t * dst_argb,int dst_width)859 void ScaleARGBRowDownEvenBox_C(const uint8_t* src_argb,
860 ptrdiff_t src_stride,
861 int src_stepx,
862 uint8_t* dst_argb,
863 int dst_width) {
864 int x;
865 for (x = 0; x < dst_width; ++x) {
866 dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] +
867 src_argb[src_stride + 4] + 2) >>
868 2;
869 dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] +
870 src_argb[src_stride + 5] + 2) >>
871 2;
872 dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] +
873 src_argb[src_stride + 6] + 2) >>
874 2;
875 dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] +
876 src_argb[src_stride + 7] + 2) >>
877 2;
878 src_argb += src_stepx * 4;
879 dst_argb += 4;
880 }
881 }
882
883 // Scales a single row of pixels using point sampling.
ScaleARGBCols_C(uint8_t * dst_argb,const uint8_t * src_argb,int dst_width,int x,int dx)884 void ScaleARGBCols_C(uint8_t* dst_argb,
885 const uint8_t* src_argb,
886 int dst_width,
887 int x,
888 int dx) {
889 const uint32_t* src = (const uint32_t*)(src_argb);
890 uint32_t* dst = (uint32_t*)(dst_argb);
891 int j;
892 for (j = 0; j < dst_width - 1; j += 2) {
893 dst[0] = src[x >> 16];
894 x += dx;
895 dst[1] = src[x >> 16];
896 x += dx;
897 dst += 2;
898 }
899 if (dst_width & 1) {
900 dst[0] = src[x >> 16];
901 }
902 }
903
ScaleARGBCols64_C(uint8_t * dst_argb,const uint8_t * src_argb,int dst_width,int x32,int dx)904 void ScaleARGBCols64_C(uint8_t* dst_argb,
905 const uint8_t* src_argb,
906 int dst_width,
907 int x32,
908 int dx) {
909 int64_t x = (int64_t)(x32);
910 const uint32_t* src = (const uint32_t*)(src_argb);
911 uint32_t* dst = (uint32_t*)(dst_argb);
912 int j;
913 for (j = 0; j < dst_width - 1; j += 2) {
914 dst[0] = src[x >> 16];
915 x += dx;
916 dst[1] = src[x >> 16];
917 x += dx;
918 dst += 2;
919 }
920 if (dst_width & 1) {
921 dst[0] = src[x >> 16];
922 }
923 }
924
925 // Scales a single row of pixels up by 2x using point sampling.
ScaleARGBColsUp2_C(uint8_t * dst_argb,const uint8_t * src_argb,int dst_width,int x,int dx)926 void ScaleARGBColsUp2_C(uint8_t* dst_argb,
927 const uint8_t* src_argb,
928 int dst_width,
929 int x,
930 int dx) {
931 const uint32_t* src = (const uint32_t*)(src_argb);
932 uint32_t* dst = (uint32_t*)(dst_argb);
933 int j;
934 (void)x;
935 (void)dx;
936 for (j = 0; j < dst_width - 1; j += 2) {
937 dst[1] = dst[0] = src[0];
938 src += 1;
939 dst += 2;
940 }
941 if (dst_width & 1) {
942 dst[0] = src[0];
943 }
944 }
945
946 // TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=607.
947 // Mimics SSSE3 blender
948 #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7
949 #define BLENDERC(a, b, f, s) \
950 (uint32_t)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
951 #define BLENDER(a, b, f) \
952 BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | BLENDERC(a, b, f, 8) | \
953 BLENDERC(a, b, f, 0)
954
ScaleARGBFilterCols_C(uint8_t * dst_argb,const uint8_t * src_argb,int dst_width,int x,int dx)955 void ScaleARGBFilterCols_C(uint8_t* dst_argb,
956 const uint8_t* src_argb,
957 int dst_width,
958 int x,
959 int dx) {
960 const uint32_t* src = (const uint32_t*)(src_argb);
961 uint32_t* dst = (uint32_t*)(dst_argb);
962 int j;
963 for (j = 0; j < dst_width - 1; j += 2) {
964 int xi = x >> 16;
965 int xf = (x >> 9) & 0x7f;
966 uint32_t a = src[xi];
967 uint32_t b = src[xi + 1];
968 dst[0] = BLENDER(a, b, xf);
969 x += dx;
970 xi = x >> 16;
971 xf = (x >> 9) & 0x7f;
972 a = src[xi];
973 b = src[xi + 1];
974 dst[1] = BLENDER(a, b, xf);
975 x += dx;
976 dst += 2;
977 }
978 if (dst_width & 1) {
979 int xi = x >> 16;
980 int xf = (x >> 9) & 0x7f;
981 uint32_t a = src[xi];
982 uint32_t b = src[xi + 1];
983 dst[0] = BLENDER(a, b, xf);
984 }
985 }
986
ScaleARGBFilterCols64_C(uint8_t * dst_argb,const uint8_t * src_argb,int dst_width,int x32,int dx)987 void ScaleARGBFilterCols64_C(uint8_t* dst_argb,
988 const uint8_t* src_argb,
989 int dst_width,
990 int x32,
991 int dx) {
992 int64_t x = (int64_t)(x32);
993 const uint32_t* src = (const uint32_t*)(src_argb);
994 uint32_t* dst = (uint32_t*)(dst_argb);
995 int j;
996 for (j = 0; j < dst_width - 1; j += 2) {
997 int64_t xi = x >> 16;
998 int xf = (x >> 9) & 0x7f;
999 uint32_t a = src[xi];
1000 uint32_t b = src[xi + 1];
1001 dst[0] = BLENDER(a, b, xf);
1002 x += dx;
1003 xi = x >> 16;
1004 xf = (x >> 9) & 0x7f;
1005 a = src[xi];
1006 b = src[xi + 1];
1007 dst[1] = BLENDER(a, b, xf);
1008 x += dx;
1009 dst += 2;
1010 }
1011 if (dst_width & 1) {
1012 int64_t xi = x >> 16;
1013 int xf = (x >> 9) & 0x7f;
1014 uint32_t a = src[xi];
1015 uint32_t b = src[xi + 1];
1016 dst[0] = BLENDER(a, b, xf);
1017 }
1018 }
1019 #undef BLENDER1
1020 #undef BLENDERC
1021 #undef BLENDER
1022
1023 // UV scale row functions
1024 // same as ARGB but 2 channels
1025
ScaleUVRowDown2_C(const uint8_t * src_uv,ptrdiff_t src_stride,uint8_t * dst_uv,int dst_width)1026 void ScaleUVRowDown2_C(const uint8_t* src_uv,
1027 ptrdiff_t src_stride,
1028 uint8_t* dst_uv,
1029 int dst_width) {
1030 const uint16_t* src = (const uint16_t*)(src_uv);
1031 uint16_t* dst = (uint16_t*)(dst_uv);
1032 int x;
1033 (void)src_stride;
1034 for (x = 0; x < dst_width - 1; x += 2) {
1035 dst[0] = src[1];
1036 dst[1] = src[3];
1037 src += 2;
1038 dst += 2;
1039 }
1040 if (dst_width & 1) {
1041 dst[0] = src[1];
1042 }
1043 }
1044
ScaleUVRowDown2Linear_C(const uint8_t * src_uv,ptrdiff_t src_stride,uint8_t * dst_uv,int dst_width)1045 void ScaleUVRowDown2Linear_C(const uint8_t* src_uv,
1046 ptrdiff_t src_stride,
1047 uint8_t* dst_uv,
1048 int dst_width) {
1049 int x;
1050 (void)src_stride;
1051 for (x = 0; x < dst_width; ++x) {
1052 dst_uv[0] = (src_uv[0] + src_uv[2] + 1) >> 1;
1053 dst_uv[1] = (src_uv[1] + src_uv[3] + 1) >> 1;
1054 src_uv += 4;
1055 dst_uv += 2;
1056 }
1057 }
1058
ScaleUVRowDown2Box_C(const uint8_t * src_uv,ptrdiff_t src_stride,uint8_t * dst_uv,int dst_width)1059 void ScaleUVRowDown2Box_C(const uint8_t* src_uv,
1060 ptrdiff_t src_stride,
1061 uint8_t* dst_uv,
1062 int dst_width) {
1063 int x;
1064 for (x = 0; x < dst_width; ++x) {
1065 dst_uv[0] = (src_uv[0] + src_uv[2] + src_uv[src_stride] +
1066 src_uv[src_stride + 2] + 2) >>
1067 2;
1068 dst_uv[1] = (src_uv[1] + src_uv[3] + src_uv[src_stride + 1] +
1069 src_uv[src_stride + 3] + 2) >>
1070 2;
1071 src_uv += 4;
1072 dst_uv += 2;
1073 }
1074 }
1075
ScaleUVRowDownEven_C(const uint8_t * src_uv,ptrdiff_t src_stride,int src_stepx,uint8_t * dst_uv,int dst_width)1076 void ScaleUVRowDownEven_C(const uint8_t* src_uv,
1077 ptrdiff_t src_stride,
1078 int src_stepx,
1079 uint8_t* dst_uv,
1080 int dst_width) {
1081 const uint16_t* src = (const uint16_t*)(src_uv);
1082 uint16_t* dst = (uint16_t*)(dst_uv);
1083 (void)src_stride;
1084 int x;
1085 for (x = 0; x < dst_width - 1; x += 2) {
1086 dst[0] = src[0];
1087 dst[1] = src[src_stepx];
1088 src += src_stepx * 2;
1089 dst += 2;
1090 }
1091 if (dst_width & 1) {
1092 dst[0] = src[0];
1093 }
1094 }
1095
ScaleUVRowDownEvenBox_C(const uint8_t * src_uv,ptrdiff_t src_stride,int src_stepx,uint8_t * dst_uv,int dst_width)1096 void ScaleUVRowDownEvenBox_C(const uint8_t* src_uv,
1097 ptrdiff_t src_stride,
1098 int src_stepx,
1099 uint8_t* dst_uv,
1100 int dst_width) {
1101 int x;
1102 for (x = 0; x < dst_width; ++x) {
1103 dst_uv[0] = (src_uv[0] + src_uv[2] + src_uv[src_stride] +
1104 src_uv[src_stride + 2] + 2) >>
1105 2;
1106 dst_uv[1] = (src_uv[1] + src_uv[3] + src_uv[src_stride + 1] +
1107 src_uv[src_stride + 3] + 2) >>
1108 2;
1109 src_uv += src_stepx * 2;
1110 dst_uv += 2;
1111 }
1112 }
1113
1114 // Scales a single row of pixels using point sampling.
ScaleUVCols_C(uint8_t * dst_uv,const uint8_t * src_uv,int dst_width,int x,int dx)1115 void ScaleUVCols_C(uint8_t* dst_uv,
1116 const uint8_t* src_uv,
1117 int dst_width,
1118 int x,
1119 int dx) {
1120 const uint16_t* src = (const uint16_t*)(src_uv);
1121 uint16_t* dst = (uint16_t*)(dst_uv);
1122 int j;
1123 for (j = 0; j < dst_width - 1; j += 2) {
1124 dst[0] = src[x >> 16];
1125 x += dx;
1126 dst[1] = src[x >> 16];
1127 x += dx;
1128 dst += 2;
1129 }
1130 if (dst_width & 1) {
1131 dst[0] = src[x >> 16];
1132 }
1133 }
1134
ScaleUVCols64_C(uint8_t * dst_uv,const uint8_t * src_uv,int dst_width,int x32,int dx)1135 void ScaleUVCols64_C(uint8_t* dst_uv,
1136 const uint8_t* src_uv,
1137 int dst_width,
1138 int x32,
1139 int dx) {
1140 int64_t x = (int64_t)(x32);
1141 const uint16_t* src = (const uint16_t*)(src_uv);
1142 uint16_t* dst = (uint16_t*)(dst_uv);
1143 int j;
1144 for (j = 0; j < dst_width - 1; j += 2) {
1145 dst[0] = src[x >> 16];
1146 x += dx;
1147 dst[1] = src[x >> 16];
1148 x += dx;
1149 dst += 2;
1150 }
1151 if (dst_width & 1) {
1152 dst[0] = src[x >> 16];
1153 }
1154 }
1155
1156 // Scales a single row of pixels up by 2x using point sampling.
ScaleUVColsUp2_C(uint8_t * dst_uv,const uint8_t * src_uv,int dst_width,int x,int dx)1157 void ScaleUVColsUp2_C(uint8_t* dst_uv,
1158 const uint8_t* src_uv,
1159 int dst_width,
1160 int x,
1161 int dx) {
1162 const uint16_t* src = (const uint16_t*)(src_uv);
1163 uint16_t* dst = (uint16_t*)(dst_uv);
1164 int j;
1165 (void)x;
1166 (void)dx;
1167 for (j = 0; j < dst_width - 1; j += 2) {
1168 dst[1] = dst[0] = src[0];
1169 src += 1;
1170 dst += 2;
1171 }
1172 if (dst_width & 1) {
1173 dst[0] = src[0];
1174 }
1175 }
1176
1177 // TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=607.
1178 // Mimics SSSE3 blender
1179 #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7
1180 #define BLENDERC(a, b, f, s) \
1181 (uint16_t)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
1182 #define BLENDER(a, b, f) BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0)
1183
ScaleUVFilterCols_C(uint8_t * dst_uv,const uint8_t * src_uv,int dst_width,int x,int dx)1184 void ScaleUVFilterCols_C(uint8_t* dst_uv,
1185 const uint8_t* src_uv,
1186 int dst_width,
1187 int x,
1188 int dx) {
1189 const uint16_t* src = (const uint16_t*)(src_uv);
1190 uint16_t* dst = (uint16_t*)(dst_uv);
1191 int j;
1192 for (j = 0; j < dst_width - 1; j += 2) {
1193 int xi = x >> 16;
1194 int xf = (x >> 9) & 0x7f;
1195 uint16_t a = src[xi];
1196 uint16_t b = src[xi + 1];
1197 dst[0] = BLENDER(a, b, xf);
1198 x += dx;
1199 xi = x >> 16;
1200 xf = (x >> 9) & 0x7f;
1201 a = src[xi];
1202 b = src[xi + 1];
1203 dst[1] = BLENDER(a, b, xf);
1204 x += dx;
1205 dst += 2;
1206 }
1207 if (dst_width & 1) {
1208 int xi = x >> 16;
1209 int xf = (x >> 9) & 0x7f;
1210 uint16_t a = src[xi];
1211 uint16_t b = src[xi + 1];
1212 dst[0] = BLENDER(a, b, xf);
1213 }
1214 }
1215
ScaleUVFilterCols64_C(uint8_t * dst_uv,const uint8_t * src_uv,int dst_width,int x32,int dx)1216 void ScaleUVFilterCols64_C(uint8_t* dst_uv,
1217 const uint8_t* src_uv,
1218 int dst_width,
1219 int x32,
1220 int dx) {
1221 int64_t x = (int64_t)(x32);
1222 const uint16_t* src = (const uint16_t*)(src_uv);
1223 uint16_t* dst = (uint16_t*)(dst_uv);
1224 int j;
1225 for (j = 0; j < dst_width - 1; j += 2) {
1226 int64_t xi = x >> 16;
1227 int xf = (x >> 9) & 0x7f;
1228 uint16_t a = src[xi];
1229 uint16_t b = src[xi + 1];
1230 dst[0] = BLENDER(a, b, xf);
1231 x += dx;
1232 xi = x >> 16;
1233 xf = (x >> 9) & 0x7f;
1234 a = src[xi];
1235 b = src[xi + 1];
1236 dst[1] = BLENDER(a, b, xf);
1237 x += dx;
1238 dst += 2;
1239 }
1240 if (dst_width & 1) {
1241 int64_t xi = x >> 16;
1242 int xf = (x >> 9) & 0x7f;
1243 uint16_t a = src[xi];
1244 uint16_t b = src[xi + 1];
1245 dst[0] = BLENDER(a, b, xf);
1246 }
1247 }
1248 #undef BLENDER1
1249 #undef BLENDERC
1250 #undef BLENDER
1251
1252 // Scale plane vertically with bilinear interpolation.
ScalePlaneVertical(int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_argb,uint8_t * dst_argb,int x,int y,int dy,int bpp,enum FilterMode filtering)1253 void ScalePlaneVertical(int src_height,
1254 int dst_width,
1255 int dst_height,
1256 int src_stride,
1257 int dst_stride,
1258 const uint8_t* src_argb,
1259 uint8_t* dst_argb,
1260 int x,
1261 int y,
1262 int dy,
1263 int bpp,
1264 enum FilterMode filtering) {
1265 // TODO(fbarchard): Allow higher bpp.
1266 int dst_width_bytes = dst_width * bpp;
1267 void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
1268 ptrdiff_t src_stride, int dst_width,
1269 int source_y_fraction) = InterpolateRow_C;
1270 const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
1271 int j;
1272 assert(bpp >= 1 && bpp <= 4);
1273 assert(src_height != 0);
1274 assert(dst_width > 0);
1275 assert(dst_height > 0);
1276 src_argb += (x >> 16) * bpp;
1277 #if defined(HAS_INTERPOLATEROW_SSSE3)
1278 if (TestCpuFlag(kCpuHasSSSE3)) {
1279 InterpolateRow = InterpolateRow_Any_SSSE3;
1280 if (IS_ALIGNED(dst_width_bytes, 16)) {
1281 InterpolateRow = InterpolateRow_SSSE3;
1282 }
1283 }
1284 #endif
1285 #if defined(HAS_INTERPOLATEROW_AVX2)
1286 if (TestCpuFlag(kCpuHasAVX2)) {
1287 InterpolateRow = InterpolateRow_Any_AVX2;
1288 if (IS_ALIGNED(dst_width_bytes, 32)) {
1289 InterpolateRow = InterpolateRow_AVX2;
1290 }
1291 }
1292 #endif
1293 #if defined(HAS_INTERPOLATEROW_NEON)
1294 if (TestCpuFlag(kCpuHasNEON)) {
1295 InterpolateRow = InterpolateRow_Any_NEON;
1296 if (IS_ALIGNED(dst_width_bytes, 16)) {
1297 InterpolateRow = InterpolateRow_NEON;
1298 }
1299 }
1300 #endif
1301 #if defined(HAS_INTERPOLATEROW_MMI)
1302 if (TestCpuFlag(kCpuHasMMI)) {
1303 InterpolateRow = InterpolateRow_Any_MMI;
1304 if (IS_ALIGNED(dst_width_bytes, 8)) {
1305 InterpolateRow = InterpolateRow_MMI;
1306 }
1307 }
1308 #endif
1309 #if defined(HAS_INTERPOLATEROW_MSA)
1310 if (TestCpuFlag(kCpuHasMSA)) {
1311 InterpolateRow = InterpolateRow_Any_MSA;
1312 if (IS_ALIGNED(dst_width_bytes, 32)) {
1313 InterpolateRow = InterpolateRow_MSA;
1314 }
1315 }
1316 #endif
1317 for (j = 0; j < dst_height; ++j) {
1318 int yi;
1319 int yf;
1320 if (y > max_y) {
1321 y = max_y;
1322 }
1323 yi = y >> 16;
1324 yf = filtering ? ((y >> 8) & 255) : 0;
1325 InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
1326 dst_width_bytes, yf);
1327 dst_argb += dst_stride;
1328 y += dy;
1329 }
1330 }
ScalePlaneVertical_16(int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_argb,uint16_t * dst_argb,int x,int y,int dy,int wpp,enum FilterMode filtering)1331 void ScalePlaneVertical_16(int src_height,
1332 int dst_width,
1333 int dst_height,
1334 int src_stride,
1335 int dst_stride,
1336 const uint16_t* src_argb,
1337 uint16_t* dst_argb,
1338 int x,
1339 int y,
1340 int dy,
1341 int wpp,
1342 enum FilterMode filtering) {
1343 // TODO(fbarchard): Allow higher wpp.
1344 int dst_width_words = dst_width * wpp;
1345 void (*InterpolateRow)(uint16_t * dst_argb, const uint16_t* src_argb,
1346 ptrdiff_t src_stride, int dst_width,
1347 int source_y_fraction) = InterpolateRow_16_C;
1348 const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
1349 int j;
1350 assert(wpp >= 1 && wpp <= 2);
1351 assert(src_height != 0);
1352 assert(dst_width > 0);
1353 assert(dst_height > 0);
1354 src_argb += (x >> 16) * wpp;
1355 #if defined(HAS_INTERPOLATEROW_16_SSE2)
1356 if (TestCpuFlag(kCpuHasSSE2)) {
1357 InterpolateRow = InterpolateRow_Any_16_SSE2;
1358 if (IS_ALIGNED(dst_width_bytes, 16)) {
1359 InterpolateRow = InterpolateRow_16_SSE2;
1360 }
1361 }
1362 #endif
1363 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
1364 if (TestCpuFlag(kCpuHasSSSE3)) {
1365 InterpolateRow = InterpolateRow_Any_16_SSSE3;
1366 if (IS_ALIGNED(dst_width_bytes, 16)) {
1367 InterpolateRow = InterpolateRow_16_SSSE3;
1368 }
1369 }
1370 #endif
1371 #if defined(HAS_INTERPOLATEROW_16_AVX2)
1372 if (TestCpuFlag(kCpuHasAVX2)) {
1373 InterpolateRow = InterpolateRow_Any_16_AVX2;
1374 if (IS_ALIGNED(dst_width_bytes, 32)) {
1375 InterpolateRow = InterpolateRow_16_AVX2;
1376 }
1377 }
1378 #endif
1379 #if defined(HAS_INTERPOLATEROW_16_NEON)
1380 if (TestCpuFlag(kCpuHasNEON)) {
1381 InterpolateRow = InterpolateRow_Any_16_NEON;
1382 if (IS_ALIGNED(dst_width_bytes, 16)) {
1383 InterpolateRow = InterpolateRow_16_NEON;
1384 }
1385 }
1386 #endif
1387 for (j = 0; j < dst_height; ++j) {
1388 int yi;
1389 int yf;
1390 if (y > max_y) {
1391 y = max_y;
1392 }
1393 yi = y >> 16;
1394 yf = filtering ? ((y >> 8) & 255) : 0;
1395 InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
1396 dst_width_words, yf);
1397 dst_argb += dst_stride;
1398 y += dy;
1399 }
1400 }
1401
1402 // Simplify the filtering based on scale factors.
ScaleFilterReduce(int src_width,int src_height,int dst_width,int dst_height,enum FilterMode filtering)1403 enum FilterMode ScaleFilterReduce(int src_width,
1404 int src_height,
1405 int dst_width,
1406 int dst_height,
1407 enum FilterMode filtering) {
1408 if (src_width < 0) {
1409 src_width = -src_width;
1410 }
1411 if (src_height < 0) {
1412 src_height = -src_height;
1413 }
1414 if (filtering == kFilterBox) {
1415 // If scaling both axis to 0.5 or larger, switch from Box to Bilinear.
1416 if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) {
1417 filtering = kFilterBilinear;
1418 }
1419 }
1420 if (filtering == kFilterBilinear) {
1421 if (src_height == 1) {
1422 filtering = kFilterLinear;
1423 }
1424 // TODO(fbarchard): Detect any odd scale factor and reduce to Linear.
1425 if (dst_height == src_height || dst_height * 3 == src_height) {
1426 filtering = kFilterLinear;
1427 }
1428 // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to
1429 // avoid reading 2 pixels horizontally that causes memory exception.
1430 if (src_width == 1) {
1431 filtering = kFilterNone;
1432 }
1433 }
1434 if (filtering == kFilterLinear) {
1435 if (src_width == 1) {
1436 filtering = kFilterNone;
1437 }
1438 // TODO(fbarchard): Detect any odd scale factor and reduce to None.
1439 if (dst_width == src_width || dst_width * 3 == src_width) {
1440 filtering = kFilterNone;
1441 }
1442 }
1443 return filtering;
1444 }
1445
1446 // Divide num by div and return as 16.16 fixed point result.
FixedDiv_C(int num,int div)1447 int FixedDiv_C(int num, int div) {
1448 return (int)(((int64_t)(num) << 16) / div);
1449 }
1450
1451 // Divide num by div and return as 16.16 fixed point result.
FixedDiv1_C(int num,int div)1452 int FixedDiv1_C(int num, int div) {
1453 return (int)((((int64_t)(num) << 16) - 0x00010001) / (div - 1));
1454 }
1455
1456 #define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
1457
1458 // Compute slope values for stepping.
ScaleSlope(int src_width,int src_height,int dst_width,int dst_height,enum FilterMode filtering,int * x,int * y,int * dx,int * dy)1459 void ScaleSlope(int src_width,
1460 int src_height,
1461 int dst_width,
1462 int dst_height,
1463 enum FilterMode filtering,
1464 int* x,
1465 int* y,
1466 int* dx,
1467 int* dy) {
1468 assert(x != NULL);
1469 assert(y != NULL);
1470 assert(dx != NULL);
1471 assert(dy != NULL);
1472 assert(src_width != 0);
1473 assert(src_height != 0);
1474 assert(dst_width > 0);
1475 assert(dst_height > 0);
1476 // Check for 1 pixel and avoid FixedDiv overflow.
1477 if (dst_width == 1 && src_width >= 32768) {
1478 dst_width = src_width;
1479 }
1480 if (dst_height == 1 && src_height >= 32768) {
1481 dst_height = src_height;
1482 }
1483 if (filtering == kFilterBox) {
1484 // Scale step for point sampling duplicates all pixels equally.
1485 *dx = FixedDiv(Abs(src_width), dst_width);
1486 *dy = FixedDiv(src_height, dst_height);
1487 *x = 0;
1488 *y = 0;
1489 } else if (filtering == kFilterBilinear) {
1490 // Scale step for bilinear sampling renders last pixel once for upsample.
1491 if (dst_width <= Abs(src_width)) {
1492 *dx = FixedDiv(Abs(src_width), dst_width);
1493 *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
1494 } else if (dst_width > 1) {
1495 *dx = FixedDiv1(Abs(src_width), dst_width);
1496 *x = 0;
1497 }
1498 if (dst_height <= src_height) {
1499 *dy = FixedDiv(src_height, dst_height);
1500 *y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter.
1501 } else if (dst_height > 1) {
1502 *dy = FixedDiv1(src_height, dst_height);
1503 *y = 0;
1504 }
1505 } else if (filtering == kFilterLinear) {
1506 // Scale step for bilinear sampling renders last pixel once for upsample.
1507 if (dst_width <= Abs(src_width)) {
1508 *dx = FixedDiv(Abs(src_width), dst_width);
1509 *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
1510 } else if (dst_width > 1) {
1511 *dx = FixedDiv1(Abs(src_width), dst_width);
1512 *x = 0;
1513 }
1514 *dy = FixedDiv(src_height, dst_height);
1515 *y = *dy >> 1;
1516 } else {
1517 // Scale step for point sampling duplicates all pixels equally.
1518 *dx = FixedDiv(Abs(src_width), dst_width);
1519 *dy = FixedDiv(src_height, dst_height);
1520 *x = CENTERSTART(*dx, 0);
1521 *y = CENTERSTART(*dy, 0);
1522 }
1523 // Negative src_width means horizontally mirror.
1524 if (src_width < 0) {
1525 *x += (dst_width - 1) * *dx;
1526 *dx = -*dx;
1527 // src_width = -src_width; // Caller must do this.
1528 }
1529 }
1530 #undef CENTERSTART
1531
1532 // Read 8x2 upsample with filtering and write 16x1.
1533 // actually reads an extra pixel, so 9x2.
ScaleRowUp2_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)1534 void ScaleRowUp2_16_C(const uint16_t* src_ptr,
1535 ptrdiff_t src_stride,
1536 uint16_t* dst,
1537 int dst_width) {
1538 const uint16_t* src2 = src_ptr + src_stride;
1539
1540 int x;
1541 for (x = 0; x < dst_width - 1; x += 2) {
1542 uint16_t p0 = src_ptr[0];
1543 uint16_t p1 = src_ptr[1];
1544 uint16_t p2 = src2[0];
1545 uint16_t p3 = src2[1];
1546 dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4;
1547 dst[1] = (p0 * 3 + p1 * 9 + p2 + p3 * 3 + 8) >> 4;
1548 ++src_ptr;
1549 ++src2;
1550 dst += 2;
1551 }
1552 if (dst_width & 1) {
1553 uint16_t p0 = src_ptr[0];
1554 uint16_t p1 = src_ptr[1];
1555 uint16_t p2 = src2[0];
1556 uint16_t p3 = src2[1];
1557 dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4;
1558 }
1559 }
1560
1561 #ifdef __cplusplus
1562 } // extern "C"
1563 } // namespace libyuv
1564 #endif
1565