1 /*
2 * Copyright 2013 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/scale.h"
12
13 #include <assert.h>
14 #include <string.h>
15
16 #include "libyuv/cpu_id.h"
17 #include "libyuv/planar_functions.h" // For CopyARGB
18 #include "libyuv/row.h"
19 #include "libyuv/scale_row.h"
20
21 #ifdef __cplusplus
22 namespace libyuv {
23 extern "C" {
24 #endif
25
Abs(int v)26 static __inline int Abs(int v) {
27 return v >= 0 ? v : -v;
28 }
29
30 // CPU agnostic row functions
ScaleRowDown2_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)31 void ScaleRowDown2_C(const uint8_t* src_ptr,
32 ptrdiff_t src_stride,
33 uint8_t* dst,
34 int dst_width) {
35 int x;
36 (void)src_stride;
37 for (x = 0; x < dst_width - 1; x += 2) {
38 dst[0] = src_ptr[1];
39 dst[1] = src_ptr[3];
40 dst += 2;
41 src_ptr += 4;
42 }
43 if (dst_width & 1) {
44 dst[0] = src_ptr[1];
45 }
46 }
47
ScaleRowDown2_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)48 void ScaleRowDown2_16_C(const uint16_t* src_ptr,
49 ptrdiff_t src_stride,
50 uint16_t* dst,
51 int dst_width) {
52 int x;
53 (void)src_stride;
54 for (x = 0; x < dst_width - 1; x += 2) {
55 dst[0] = src_ptr[1];
56 dst[1] = src_ptr[3];
57 dst += 2;
58 src_ptr += 4;
59 }
60 if (dst_width & 1) {
61 dst[0] = src_ptr[1];
62 }
63 }
64
ScaleRowDown2Linear_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)65 void ScaleRowDown2Linear_C(const uint8_t* src_ptr,
66 ptrdiff_t src_stride,
67 uint8_t* dst,
68 int dst_width) {
69 const uint8_t* s = src_ptr;
70 int x;
71 (void)src_stride;
72 for (x = 0; x < dst_width - 1; x += 2) {
73 dst[0] = (s[0] + s[1] + 1) >> 1;
74 dst[1] = (s[2] + s[3] + 1) >> 1;
75 dst += 2;
76 s += 4;
77 }
78 if (dst_width & 1) {
79 dst[0] = (s[0] + s[1] + 1) >> 1;
80 }
81 }
82
ScaleRowDown2Linear_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)83 void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr,
84 ptrdiff_t src_stride,
85 uint16_t* dst,
86 int dst_width) {
87 const uint16_t* s = src_ptr;
88 int x;
89 (void)src_stride;
90 for (x = 0; x < dst_width - 1; x += 2) {
91 dst[0] = (s[0] + s[1] + 1) >> 1;
92 dst[1] = (s[2] + s[3] + 1) >> 1;
93 dst += 2;
94 s += 4;
95 }
96 if (dst_width & 1) {
97 dst[0] = (s[0] + s[1] + 1) >> 1;
98 }
99 }
100
ScaleRowDown2Box_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)101 void ScaleRowDown2Box_C(const uint8_t* src_ptr,
102 ptrdiff_t src_stride,
103 uint8_t* dst,
104 int dst_width) {
105 const uint8_t* s = src_ptr;
106 const uint8_t* t = src_ptr + src_stride;
107 int x;
108 for (x = 0; x < dst_width - 1; x += 2) {
109 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
110 dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
111 dst += 2;
112 s += 4;
113 t += 4;
114 }
115 if (dst_width & 1) {
116 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
117 }
118 }
119
ScaleRowDown2Box_Odd_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)120 void ScaleRowDown2Box_Odd_C(const uint8_t* src_ptr,
121 ptrdiff_t src_stride,
122 uint8_t* dst,
123 int dst_width) {
124 const uint8_t* s = src_ptr;
125 const uint8_t* t = src_ptr + src_stride;
126 int x;
127 dst_width -= 1;
128 for (x = 0; x < dst_width - 1; x += 2) {
129 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
130 dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
131 dst += 2;
132 s += 4;
133 t += 4;
134 }
135 if (dst_width & 1) {
136 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
137 dst += 1;
138 s += 2;
139 t += 2;
140 }
141 dst[0] = (s[0] + t[0] + 1) >> 1;
142 }
143
ScaleRowDown2Box_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)144 void ScaleRowDown2Box_16_C(const uint16_t* src_ptr,
145 ptrdiff_t src_stride,
146 uint16_t* dst,
147 int dst_width) {
148 const uint16_t* s = src_ptr;
149 const uint16_t* t = src_ptr + src_stride;
150 int x;
151 for (x = 0; x < dst_width - 1; x += 2) {
152 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
153 dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
154 dst += 2;
155 s += 4;
156 t += 4;
157 }
158 if (dst_width & 1) {
159 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
160 }
161 }
162
ScaleRowDown4_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)163 void ScaleRowDown4_C(const uint8_t* src_ptr,
164 ptrdiff_t src_stride,
165 uint8_t* dst,
166 int dst_width) {
167 int x;
168 (void)src_stride;
169 for (x = 0; x < dst_width - 1; x += 2) {
170 dst[0] = src_ptr[2];
171 dst[1] = src_ptr[6];
172 dst += 2;
173 src_ptr += 8;
174 }
175 if (dst_width & 1) {
176 dst[0] = src_ptr[2];
177 }
178 }
179
ScaleRowDown4_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)180 void ScaleRowDown4_16_C(const uint16_t* src_ptr,
181 ptrdiff_t src_stride,
182 uint16_t* dst,
183 int dst_width) {
184 int x;
185 (void)src_stride;
186 for (x = 0; x < dst_width - 1; x += 2) {
187 dst[0] = src_ptr[2];
188 dst[1] = src_ptr[6];
189 dst += 2;
190 src_ptr += 8;
191 }
192 if (dst_width & 1) {
193 dst[0] = src_ptr[2];
194 }
195 }
196
ScaleRowDown4Box_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)197 void ScaleRowDown4Box_C(const uint8_t* src_ptr,
198 ptrdiff_t src_stride,
199 uint8_t* dst,
200 int dst_width) {
201 intptr_t stride = src_stride;
202 int x;
203 for (x = 0; x < dst_width - 1; x += 2) {
204 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
205 src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
206 src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
207 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
208 src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
209 src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
210 src_ptr[stride * 3 + 3] + 8) >>
211 4;
212 dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
213 src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
214 src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
215 src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
216 src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
217 src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
218 src_ptr[stride * 3 + 7] + 8) >>
219 4;
220 dst += 2;
221 src_ptr += 8;
222 }
223 if (dst_width & 1) {
224 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
225 src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
226 src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
227 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
228 src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
229 src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
230 src_ptr[stride * 3 + 3] + 8) >>
231 4;
232 }
233 }
234
ScaleRowDown4Box_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)235 void ScaleRowDown4Box_16_C(const uint16_t* src_ptr,
236 ptrdiff_t src_stride,
237 uint16_t* dst,
238 int dst_width) {
239 intptr_t stride = src_stride;
240 int x;
241 for (x = 0; x < dst_width - 1; x += 2) {
242 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
243 src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
244 src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
245 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
246 src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
247 src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
248 src_ptr[stride * 3 + 3] + 8) >>
249 4;
250 dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
251 src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
252 src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
253 src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
254 src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
255 src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
256 src_ptr[stride * 3 + 7] + 8) >>
257 4;
258 dst += 2;
259 src_ptr += 8;
260 }
261 if (dst_width & 1) {
262 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
263 src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
264 src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
265 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
266 src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
267 src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
268 src_ptr[stride * 3 + 3] + 8) >>
269 4;
270 }
271 }
272
ScaleRowDown34_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)273 void ScaleRowDown34_C(const uint8_t* src_ptr,
274 ptrdiff_t src_stride,
275 uint8_t* dst,
276 int dst_width) {
277 int x;
278 (void)src_stride;
279 assert((dst_width % 3 == 0) && (dst_width > 0));
280 for (x = 0; x < dst_width; x += 3) {
281 dst[0] = src_ptr[0];
282 dst[1] = src_ptr[1];
283 dst[2] = src_ptr[3];
284 dst += 3;
285 src_ptr += 4;
286 }
287 }
288
ScaleRowDown34_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)289 void ScaleRowDown34_16_C(const uint16_t* src_ptr,
290 ptrdiff_t src_stride,
291 uint16_t* dst,
292 int dst_width) {
293 int x;
294 (void)src_stride;
295 assert((dst_width % 3 == 0) && (dst_width > 0));
296 for (x = 0; x < dst_width; x += 3) {
297 dst[0] = src_ptr[0];
298 dst[1] = src_ptr[1];
299 dst[2] = src_ptr[3];
300 dst += 3;
301 src_ptr += 4;
302 }
303 }
304
305 // Filter rows 0 and 1 together, 3 : 1
ScaleRowDown34_0_Box_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * d,int dst_width)306 void ScaleRowDown34_0_Box_C(const uint8_t* src_ptr,
307 ptrdiff_t src_stride,
308 uint8_t* d,
309 int dst_width) {
310 const uint8_t* s = src_ptr;
311 const uint8_t* t = src_ptr + src_stride;
312 int x;
313 assert((dst_width % 3 == 0) && (dst_width > 0));
314 for (x = 0; x < dst_width; x += 3) {
315 uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
316 uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
317 uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
318 uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
319 uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
320 uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
321 d[0] = (a0 * 3 + b0 + 2) >> 2;
322 d[1] = (a1 * 3 + b1 + 2) >> 2;
323 d[2] = (a2 * 3 + b2 + 2) >> 2;
324 d += 3;
325 s += 4;
326 t += 4;
327 }
328 }
329
ScaleRowDown34_0_Box_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * d,int dst_width)330 void ScaleRowDown34_0_Box_16_C(const uint16_t* src_ptr,
331 ptrdiff_t src_stride,
332 uint16_t* d,
333 int dst_width) {
334 const uint16_t* s = src_ptr;
335 const uint16_t* t = src_ptr + src_stride;
336 int x;
337 assert((dst_width % 3 == 0) && (dst_width > 0));
338 for (x = 0; x < dst_width; x += 3) {
339 uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
340 uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
341 uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
342 uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
343 uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
344 uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
345 d[0] = (a0 * 3 + b0 + 2) >> 2;
346 d[1] = (a1 * 3 + b1 + 2) >> 2;
347 d[2] = (a2 * 3 + b2 + 2) >> 2;
348 d += 3;
349 s += 4;
350 t += 4;
351 }
352 }
353
354 // Filter rows 1 and 2 together, 1 : 1
ScaleRowDown34_1_Box_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * d,int dst_width)355 void ScaleRowDown34_1_Box_C(const uint8_t* src_ptr,
356 ptrdiff_t src_stride,
357 uint8_t* d,
358 int dst_width) {
359 const uint8_t* s = src_ptr;
360 const uint8_t* t = src_ptr + src_stride;
361 int x;
362 assert((dst_width % 3 == 0) && (dst_width > 0));
363 for (x = 0; x < dst_width; x += 3) {
364 uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
365 uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
366 uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
367 uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
368 uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
369 uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
370 d[0] = (a0 + b0 + 1) >> 1;
371 d[1] = (a1 + b1 + 1) >> 1;
372 d[2] = (a2 + b2 + 1) >> 1;
373 d += 3;
374 s += 4;
375 t += 4;
376 }
377 }
378
ScaleRowDown34_1_Box_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * d,int dst_width)379 void ScaleRowDown34_1_Box_16_C(const uint16_t* src_ptr,
380 ptrdiff_t src_stride,
381 uint16_t* d,
382 int dst_width) {
383 const uint16_t* s = src_ptr;
384 const uint16_t* t = src_ptr + src_stride;
385 int x;
386 assert((dst_width % 3 == 0) && (dst_width > 0));
387 for (x = 0; x < dst_width; x += 3) {
388 uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
389 uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
390 uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
391 uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
392 uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
393 uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
394 d[0] = (a0 + b0 + 1) >> 1;
395 d[1] = (a1 + b1 + 1) >> 1;
396 d[2] = (a2 + b2 + 1) >> 1;
397 d += 3;
398 s += 4;
399 t += 4;
400 }
401 }
402
403 // Sample position: (O is src sample position, X is dst sample position)
404 //
405 // v dst_ptr at here v stop at here
406 // X O X X O X X O X X O X X O X
407 // ^ src_ptr at here
ScaleRowUp2_Linear_C(const uint8_t * src_ptr,uint8_t * dst_ptr,int dst_width)408 void ScaleRowUp2_Linear_C(const uint8_t* src_ptr,
409 uint8_t* dst_ptr,
410 int dst_width) {
411 int src_width = dst_width >> 1;
412 int x;
413 assert((dst_width % 2 == 0) && (dst_width >= 0));
414 for (x = 0; x < src_width; ++x) {
415 dst_ptr[2 * x + 0] = (src_ptr[x + 0] * 3 + src_ptr[x + 1] * 1 + 2) >> 2;
416 dst_ptr[2 * x + 1] = (src_ptr[x + 0] * 1 + src_ptr[x + 1] * 3 + 2) >> 2;
417 }
418 }
419
420 // Sample position: (O is src sample position, X is dst sample position)
421 //
422 // src_ptr at here
423 // X v X X X X X X X X X
424 // O O O O O
425 // X X X X X X X X X X
426 // ^ dst_ptr at here ^ stop at here
427 // X X X X X X X X X X
428 // O O O O O
429 // X X X X X X X X X X
ScaleRowUp2_Bilinear_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst_ptr,ptrdiff_t dst_stride,int dst_width)430 void ScaleRowUp2_Bilinear_C(const uint8_t* src_ptr,
431 ptrdiff_t src_stride,
432 uint8_t* dst_ptr,
433 ptrdiff_t dst_stride,
434 int dst_width) {
435 const uint8_t* s = src_ptr;
436 const uint8_t* t = src_ptr + src_stride;
437 uint8_t* d = dst_ptr;
438 uint8_t* e = dst_ptr + dst_stride;
439 int src_width = dst_width >> 1;
440 int x;
441 assert((dst_width % 2 == 0) && (dst_width >= 0));
442 for (x = 0; x < src_width; ++x) {
443 d[2 * x + 0] =
444 (s[x + 0] * 9 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 1 + 8) >> 4;
445 d[2 * x + 1] =
446 (s[x + 0] * 3 + s[x + 1] * 9 + t[x + 0] * 1 + t[x + 1] * 3 + 8) >> 4;
447 e[2 * x + 0] =
448 (s[x + 0] * 3 + s[x + 1] * 1 + t[x + 0] * 9 + t[x + 1] * 3 + 8) >> 4;
449 e[2 * x + 1] =
450 (s[x + 0] * 1 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 9 + 8) >> 4;
451 }
452 }
453
454 // Only suitable for at most 14 bit range.
ScaleRowUp2_Linear_16_C(const uint16_t * src_ptr,uint16_t * dst_ptr,int dst_width)455 void ScaleRowUp2_Linear_16_C(const uint16_t* src_ptr,
456 uint16_t* dst_ptr,
457 int dst_width) {
458 int src_width = dst_width >> 1;
459 int x;
460 assert((dst_width % 2 == 0) && (dst_width >= 0));
461 for (x = 0; x < src_width; ++x) {
462 dst_ptr[2 * x + 0] = (src_ptr[x + 0] * 3 + src_ptr[x + 1] * 1 + 2) >> 2;
463 dst_ptr[2 * x + 1] = (src_ptr[x + 0] * 1 + src_ptr[x + 1] * 3 + 2) >> 2;
464 }
465 }
466
467 // Only suitable for at most 12bit range.
ScaleRowUp2_Bilinear_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst_ptr,ptrdiff_t dst_stride,int dst_width)468 void ScaleRowUp2_Bilinear_16_C(const uint16_t* src_ptr,
469 ptrdiff_t src_stride,
470 uint16_t* dst_ptr,
471 ptrdiff_t dst_stride,
472 int dst_width) {
473 const uint16_t* s = src_ptr;
474 const uint16_t* t = src_ptr + src_stride;
475 uint16_t* d = dst_ptr;
476 uint16_t* e = dst_ptr + dst_stride;
477 int src_width = dst_width >> 1;
478 int x;
479 assert((dst_width % 2 == 0) && (dst_width >= 0));
480 for (x = 0; x < src_width; ++x) {
481 d[2 * x + 0] =
482 (s[x + 0] * 9 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 1 + 8) >> 4;
483 d[2 * x + 1] =
484 (s[x + 0] * 3 + s[x + 1] * 9 + t[x + 0] * 1 + t[x + 1] * 3 + 8) >> 4;
485 e[2 * x + 0] =
486 (s[x + 0] * 3 + s[x + 1] * 1 + t[x + 0] * 9 + t[x + 1] * 3 + 8) >> 4;
487 e[2 * x + 1] =
488 (s[x + 0] * 1 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 9 + 8) >> 4;
489 }
490 }
491
492 // Scales a single row of pixels using point sampling.
ScaleCols_C(uint8_t * dst_ptr,const uint8_t * src_ptr,int dst_width,int x,int dx)493 void ScaleCols_C(uint8_t* dst_ptr,
494 const uint8_t* src_ptr,
495 int dst_width,
496 int x,
497 int dx) {
498 int j;
499 for (j = 0; j < dst_width - 1; j += 2) {
500 dst_ptr[0] = src_ptr[x >> 16];
501 x += dx;
502 dst_ptr[1] = src_ptr[x >> 16];
503 x += dx;
504 dst_ptr += 2;
505 }
506 if (dst_width & 1) {
507 dst_ptr[0] = src_ptr[x >> 16];
508 }
509 }
510
ScaleCols_16_C(uint16_t * dst_ptr,const uint16_t * src_ptr,int dst_width,int x,int dx)511 void ScaleCols_16_C(uint16_t* dst_ptr,
512 const uint16_t* src_ptr,
513 int dst_width,
514 int x,
515 int dx) {
516 int j;
517 for (j = 0; j < dst_width - 1; j += 2) {
518 dst_ptr[0] = src_ptr[x >> 16];
519 x += dx;
520 dst_ptr[1] = src_ptr[x >> 16];
521 x += dx;
522 dst_ptr += 2;
523 }
524 if (dst_width & 1) {
525 dst_ptr[0] = src_ptr[x >> 16];
526 }
527 }
528
529 // Scales a single row of pixels up by 2x using point sampling.
ScaleColsUp2_C(uint8_t * dst_ptr,const uint8_t * src_ptr,int dst_width,int x,int dx)530 void ScaleColsUp2_C(uint8_t* dst_ptr,
531 const uint8_t* src_ptr,
532 int dst_width,
533 int x,
534 int dx) {
535 int j;
536 (void)x;
537 (void)dx;
538 for (j = 0; j < dst_width - 1; j += 2) {
539 dst_ptr[1] = dst_ptr[0] = src_ptr[0];
540 src_ptr += 1;
541 dst_ptr += 2;
542 }
543 if (dst_width & 1) {
544 dst_ptr[0] = src_ptr[0];
545 }
546 }
547
ScaleColsUp2_16_C(uint16_t * dst_ptr,const uint16_t * src_ptr,int dst_width,int x,int dx)548 void ScaleColsUp2_16_C(uint16_t* dst_ptr,
549 const uint16_t* src_ptr,
550 int dst_width,
551 int x,
552 int dx) {
553 int j;
554 (void)x;
555 (void)dx;
556 for (j = 0; j < dst_width - 1; j += 2) {
557 dst_ptr[1] = dst_ptr[0] = src_ptr[0];
558 src_ptr += 1;
559 dst_ptr += 2;
560 }
561 if (dst_width & 1) {
562 dst_ptr[0] = src_ptr[0];
563 }
564 }
565
566 // (1-f)a + fb can be replaced with a + f(b-a)
567 #if defined(__arm__) || defined(__aarch64__)
568 #define BLENDER(a, b, f) \
569 (uint8_t)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
570 #else
571 // Intel uses 7 bit math with rounding.
572 #define BLENDER(a, b, f) \
573 (uint8_t)((int)(a) + (((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7))
574 #endif
575
ScaleFilterCols_C(uint8_t * dst_ptr,const uint8_t * src_ptr,int dst_width,int x,int dx)576 void ScaleFilterCols_C(uint8_t* dst_ptr,
577 const uint8_t* src_ptr,
578 int dst_width,
579 int x,
580 int dx) {
581 int j;
582 for (j = 0; j < dst_width - 1; j += 2) {
583 int xi = x >> 16;
584 int a = src_ptr[xi];
585 int b = src_ptr[xi + 1];
586 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
587 x += dx;
588 xi = x >> 16;
589 a = src_ptr[xi];
590 b = src_ptr[xi + 1];
591 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
592 x += dx;
593 dst_ptr += 2;
594 }
595 if (dst_width & 1) {
596 int xi = x >> 16;
597 int a = src_ptr[xi];
598 int b = src_ptr[xi + 1];
599 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
600 }
601 }
602
ScaleFilterCols64_C(uint8_t * dst_ptr,const uint8_t * src_ptr,int dst_width,int x32,int dx)603 void ScaleFilterCols64_C(uint8_t* dst_ptr,
604 const uint8_t* src_ptr,
605 int dst_width,
606 int x32,
607 int dx) {
608 int64_t x = (int64_t)(x32);
609 int j;
610 for (j = 0; j < dst_width - 1; j += 2) {
611 int64_t xi = x >> 16;
612 int a = src_ptr[xi];
613 int b = src_ptr[xi + 1];
614 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
615 x += dx;
616 xi = x >> 16;
617 a = src_ptr[xi];
618 b = src_ptr[xi + 1];
619 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
620 x += dx;
621 dst_ptr += 2;
622 }
623 if (dst_width & 1) {
624 int64_t xi = x >> 16;
625 int a = src_ptr[xi];
626 int b = src_ptr[xi + 1];
627 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
628 }
629 }
630 #undef BLENDER
631
632 // Same as 8 bit arm blender but return is cast to uint16_t
633 #define BLENDER(a, b, f) \
634 (uint16_t)( \
635 (int)(a) + \
636 (int)((((int64_t)((f)) * ((int64_t)(b) - (int)(a))) + 0x8000) >> 16))
637
ScaleFilterCols_16_C(uint16_t * dst_ptr,const uint16_t * src_ptr,int dst_width,int x,int dx)638 void ScaleFilterCols_16_C(uint16_t* dst_ptr,
639 const uint16_t* src_ptr,
640 int dst_width,
641 int x,
642 int dx) {
643 int j;
644 for (j = 0; j < dst_width - 1; j += 2) {
645 int xi = x >> 16;
646 int a = src_ptr[xi];
647 int b = src_ptr[xi + 1];
648 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
649 x += dx;
650 xi = x >> 16;
651 a = src_ptr[xi];
652 b = src_ptr[xi + 1];
653 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
654 x += dx;
655 dst_ptr += 2;
656 }
657 if (dst_width & 1) {
658 int xi = x >> 16;
659 int a = src_ptr[xi];
660 int b = src_ptr[xi + 1];
661 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
662 }
663 }
664
ScaleFilterCols64_16_C(uint16_t * dst_ptr,const uint16_t * src_ptr,int dst_width,int x32,int dx)665 void ScaleFilterCols64_16_C(uint16_t* dst_ptr,
666 const uint16_t* src_ptr,
667 int dst_width,
668 int x32,
669 int dx) {
670 int64_t x = (int64_t)(x32);
671 int j;
672 for (j = 0; j < dst_width - 1; j += 2) {
673 int64_t xi = x >> 16;
674 int a = src_ptr[xi];
675 int b = src_ptr[xi + 1];
676 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
677 x += dx;
678 xi = x >> 16;
679 a = src_ptr[xi];
680 b = src_ptr[xi + 1];
681 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
682 x += dx;
683 dst_ptr += 2;
684 }
685 if (dst_width & 1) {
686 int64_t xi = x >> 16;
687 int a = src_ptr[xi];
688 int b = src_ptr[xi + 1];
689 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
690 }
691 }
692 #undef BLENDER
693
ScaleRowDown38_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)694 void ScaleRowDown38_C(const uint8_t* src_ptr,
695 ptrdiff_t src_stride,
696 uint8_t* dst,
697 int dst_width) {
698 int x;
699 (void)src_stride;
700 assert(dst_width % 3 == 0);
701 for (x = 0; x < dst_width; x += 3) {
702 dst[0] = src_ptr[0];
703 dst[1] = src_ptr[3];
704 dst[2] = src_ptr[6];
705 dst += 3;
706 src_ptr += 8;
707 }
708 }
709
ScaleRowDown38_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)710 void ScaleRowDown38_16_C(const uint16_t* src_ptr,
711 ptrdiff_t src_stride,
712 uint16_t* dst,
713 int dst_width) {
714 int x;
715 (void)src_stride;
716 assert(dst_width % 3 == 0);
717 for (x = 0; x < dst_width; x += 3) {
718 dst[0] = src_ptr[0];
719 dst[1] = src_ptr[3];
720 dst[2] = src_ptr[6];
721 dst += 3;
722 src_ptr += 8;
723 }
724 }
725
726 // 8x3 -> 3x1
ScaleRowDown38_3_Box_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst_ptr,int dst_width)727 void ScaleRowDown38_3_Box_C(const uint8_t* src_ptr,
728 ptrdiff_t src_stride,
729 uint8_t* dst_ptr,
730 int dst_width) {
731 intptr_t stride = src_stride;
732 int i;
733 assert((dst_width % 3 == 0) && (dst_width > 0));
734 for (i = 0; i < dst_width; i += 3) {
735 dst_ptr[0] =
736 (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
737 src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
738 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
739 (65536 / 9) >>
740 16;
741 dst_ptr[1] =
742 (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
743 src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
744 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
745 (65536 / 9) >>
746 16;
747 dst_ptr[2] =
748 (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
749 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
750 (65536 / 6) >>
751 16;
752 src_ptr += 8;
753 dst_ptr += 3;
754 }
755 }
756
ScaleRowDown38_3_Box_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst_ptr,int dst_width)757 void ScaleRowDown38_3_Box_16_C(const uint16_t* src_ptr,
758 ptrdiff_t src_stride,
759 uint16_t* dst_ptr,
760 int dst_width) {
761 intptr_t stride = src_stride;
762 int i;
763 assert((dst_width % 3 == 0) && (dst_width > 0));
764 for (i = 0; i < dst_width; i += 3) {
765 dst_ptr[0] =
766 (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
767 src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
768 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
769 (65536 / 9) >>
770 16;
771 dst_ptr[1] =
772 (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
773 src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
774 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
775 (65536 / 9) >>
776 16;
777 dst_ptr[2] =
778 (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
779 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
780 (65536 / 6) >>
781 16;
782 src_ptr += 8;
783 dst_ptr += 3;
784 }
785 }
786
787 // 8x2 -> 3x1
ScaleRowDown38_2_Box_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst_ptr,int dst_width)788 void ScaleRowDown38_2_Box_C(const uint8_t* src_ptr,
789 ptrdiff_t src_stride,
790 uint8_t* dst_ptr,
791 int dst_width) {
792 intptr_t stride = src_stride;
793 int i;
794 assert((dst_width % 3 == 0) && (dst_width > 0));
795 for (i = 0; i < dst_width; i += 3) {
796 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
797 src_ptr[stride + 1] + src_ptr[stride + 2]) *
798 (65536 / 6) >>
799 16;
800 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
801 src_ptr[stride + 4] + src_ptr[stride + 5]) *
802 (65536 / 6) >>
803 16;
804 dst_ptr[2] =
805 (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
806 (65536 / 4) >>
807 16;
808 src_ptr += 8;
809 dst_ptr += 3;
810 }
811 }
812
ScaleRowDown38_2_Box_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst_ptr,int dst_width)813 void ScaleRowDown38_2_Box_16_C(const uint16_t* src_ptr,
814 ptrdiff_t src_stride,
815 uint16_t* dst_ptr,
816 int dst_width) {
817 intptr_t stride = src_stride;
818 int i;
819 assert((dst_width % 3 == 0) && (dst_width > 0));
820 for (i = 0; i < dst_width; i += 3) {
821 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
822 src_ptr[stride + 1] + src_ptr[stride + 2]) *
823 (65536 / 6) >>
824 16;
825 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
826 src_ptr[stride + 4] + src_ptr[stride + 5]) *
827 (65536 / 6) >>
828 16;
829 dst_ptr[2] =
830 (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
831 (65536 / 4) >>
832 16;
833 src_ptr += 8;
834 dst_ptr += 3;
835 }
836 }
837
ScaleAddRow_C(const uint8_t * src_ptr,uint16_t * dst_ptr,int src_width)838 void ScaleAddRow_C(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) {
839 int x;
840 assert(src_width > 0);
841 for (x = 0; x < src_width - 1; x += 2) {
842 dst_ptr[0] += src_ptr[0];
843 dst_ptr[1] += src_ptr[1];
844 src_ptr += 2;
845 dst_ptr += 2;
846 }
847 if (src_width & 1) {
848 dst_ptr[0] += src_ptr[0];
849 }
850 }
851
ScaleAddRow_16_C(const uint16_t * src_ptr,uint32_t * dst_ptr,int src_width)852 void ScaleAddRow_16_C(const uint16_t* src_ptr,
853 uint32_t* dst_ptr,
854 int src_width) {
855 int x;
856 assert(src_width > 0);
857 for (x = 0; x < src_width - 1; x += 2) {
858 dst_ptr[0] += src_ptr[0];
859 dst_ptr[1] += src_ptr[1];
860 src_ptr += 2;
861 dst_ptr += 2;
862 }
863 if (src_width & 1) {
864 dst_ptr[0] += src_ptr[0];
865 }
866 }
867
868 // ARGB scale row functions
869
ScaleARGBRowDown2_C(const uint8_t * src_argb,ptrdiff_t src_stride,uint8_t * dst_argb,int dst_width)870 void ScaleARGBRowDown2_C(const uint8_t* src_argb,
871 ptrdiff_t src_stride,
872 uint8_t* dst_argb,
873 int dst_width) {
874 const uint32_t* src = (const uint32_t*)(src_argb);
875 uint32_t* dst = (uint32_t*)(dst_argb);
876 int x;
877 (void)src_stride;
878 for (x = 0; x < dst_width - 1; x += 2) {
879 dst[0] = src[1];
880 dst[1] = src[3];
881 src += 4;
882 dst += 2;
883 }
884 if (dst_width & 1) {
885 dst[0] = src[1];
886 }
887 }
888
ScaleARGBRowDown2Linear_C(const uint8_t * src_argb,ptrdiff_t src_stride,uint8_t * dst_argb,int dst_width)889 void ScaleARGBRowDown2Linear_C(const uint8_t* src_argb,
890 ptrdiff_t src_stride,
891 uint8_t* dst_argb,
892 int dst_width) {
893 int x;
894 (void)src_stride;
895 for (x = 0; x < dst_width; ++x) {
896 dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1;
897 dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1;
898 dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1;
899 dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1;
900 src_argb += 8;
901 dst_argb += 4;
902 }
903 }
904
ScaleARGBRowDown2Box_C(const uint8_t * src_argb,ptrdiff_t src_stride,uint8_t * dst_argb,int dst_width)905 void ScaleARGBRowDown2Box_C(const uint8_t* src_argb,
906 ptrdiff_t src_stride,
907 uint8_t* dst_argb,
908 int dst_width) {
909 int x;
910 for (x = 0; x < dst_width; ++x) {
911 dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] +
912 src_argb[src_stride + 4] + 2) >>
913 2;
914 dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] +
915 src_argb[src_stride + 5] + 2) >>
916 2;
917 dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] +
918 src_argb[src_stride + 6] + 2) >>
919 2;
920 dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] +
921 src_argb[src_stride + 7] + 2) >>
922 2;
923 src_argb += 8;
924 dst_argb += 4;
925 }
926 }
927
ScaleARGBRowDownEven_C(const uint8_t * src_argb,ptrdiff_t src_stride,int src_stepx,uint8_t * dst_argb,int dst_width)928 void ScaleARGBRowDownEven_C(const uint8_t* src_argb,
929 ptrdiff_t src_stride,
930 int src_stepx,
931 uint8_t* dst_argb,
932 int dst_width) {
933 const uint32_t* src = (const uint32_t*)(src_argb);
934 uint32_t* dst = (uint32_t*)(dst_argb);
935 (void)src_stride;
936 int x;
937 for (x = 0; x < dst_width - 1; x += 2) {
938 dst[0] = src[0];
939 dst[1] = src[src_stepx];
940 src += src_stepx * 2;
941 dst += 2;
942 }
943 if (dst_width & 1) {
944 dst[0] = src[0];
945 }
946 }
947
ScaleARGBRowDownEvenBox_C(const uint8_t * src_argb,ptrdiff_t src_stride,int src_stepx,uint8_t * dst_argb,int dst_width)948 void ScaleARGBRowDownEvenBox_C(const uint8_t* src_argb,
949 ptrdiff_t src_stride,
950 int src_stepx,
951 uint8_t* dst_argb,
952 int dst_width) {
953 int x;
954 for (x = 0; x < dst_width; ++x) {
955 dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] +
956 src_argb[src_stride + 4] + 2) >>
957 2;
958 dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] +
959 src_argb[src_stride + 5] + 2) >>
960 2;
961 dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] +
962 src_argb[src_stride + 6] + 2) >>
963 2;
964 dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] +
965 src_argb[src_stride + 7] + 2) >>
966 2;
967 src_argb += src_stepx * 4;
968 dst_argb += 4;
969 }
970 }
971
972 // Scales a single row of pixels using point sampling.
ScaleARGBCols_C(uint8_t * dst_argb,const uint8_t * src_argb,int dst_width,int x,int dx)973 void ScaleARGBCols_C(uint8_t* dst_argb,
974 const uint8_t* src_argb,
975 int dst_width,
976 int x,
977 int dx) {
978 const uint32_t* src = (const uint32_t*)(src_argb);
979 uint32_t* dst = (uint32_t*)(dst_argb);
980 int j;
981 for (j = 0; j < dst_width - 1; j += 2) {
982 dst[0] = src[x >> 16];
983 x += dx;
984 dst[1] = src[x >> 16];
985 x += dx;
986 dst += 2;
987 }
988 if (dst_width & 1) {
989 dst[0] = src[x >> 16];
990 }
991 }
992
ScaleARGBCols64_C(uint8_t * dst_argb,const uint8_t * src_argb,int dst_width,int x32,int dx)993 void ScaleARGBCols64_C(uint8_t* dst_argb,
994 const uint8_t* src_argb,
995 int dst_width,
996 int x32,
997 int dx) {
998 int64_t x = (int64_t)(x32);
999 const uint32_t* src = (const uint32_t*)(src_argb);
1000 uint32_t* dst = (uint32_t*)(dst_argb);
1001 int j;
1002 for (j = 0; j < dst_width - 1; j += 2) {
1003 dst[0] = src[x >> 16];
1004 x += dx;
1005 dst[1] = src[x >> 16];
1006 x += dx;
1007 dst += 2;
1008 }
1009 if (dst_width & 1) {
1010 dst[0] = src[x >> 16];
1011 }
1012 }
1013
1014 // Scales a single row of pixels up by 2x using point sampling.
ScaleARGBColsUp2_C(uint8_t * dst_argb,const uint8_t * src_argb,int dst_width,int x,int dx)1015 void ScaleARGBColsUp2_C(uint8_t* dst_argb,
1016 const uint8_t* src_argb,
1017 int dst_width,
1018 int x,
1019 int dx) {
1020 const uint32_t* src = (const uint32_t*)(src_argb);
1021 uint32_t* dst = (uint32_t*)(dst_argb);
1022 int j;
1023 (void)x;
1024 (void)dx;
1025 for (j = 0; j < dst_width - 1; j += 2) {
1026 dst[1] = dst[0] = src[0];
1027 src += 1;
1028 dst += 2;
1029 }
1030 if (dst_width & 1) {
1031 dst[0] = src[0];
1032 }
1033 }
1034
1035 // TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=607.
1036 // Mimics SSSE3 blender
1037 #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7
1038 #define BLENDERC(a, b, f, s) \
1039 (uint32_t)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
1040 #define BLENDER(a, b, f) \
1041 BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | BLENDERC(a, b, f, 8) | \
1042 BLENDERC(a, b, f, 0)
1043
ScaleARGBFilterCols_C(uint8_t * dst_argb,const uint8_t * src_argb,int dst_width,int x,int dx)1044 void ScaleARGBFilterCols_C(uint8_t* dst_argb,
1045 const uint8_t* src_argb,
1046 int dst_width,
1047 int x,
1048 int dx) {
1049 const uint32_t* src = (const uint32_t*)(src_argb);
1050 uint32_t* dst = (uint32_t*)(dst_argb);
1051 int j;
1052 for (j = 0; j < dst_width - 1; j += 2) {
1053 int xi = x >> 16;
1054 int xf = (x >> 9) & 0x7f;
1055 uint32_t a = src[xi];
1056 uint32_t b = src[xi + 1];
1057 dst[0] = BLENDER(a, b, xf);
1058 x += dx;
1059 xi = x >> 16;
1060 xf = (x >> 9) & 0x7f;
1061 a = src[xi];
1062 b = src[xi + 1];
1063 dst[1] = BLENDER(a, b, xf);
1064 x += dx;
1065 dst += 2;
1066 }
1067 if (dst_width & 1) {
1068 int xi = x >> 16;
1069 int xf = (x >> 9) & 0x7f;
1070 uint32_t a = src[xi];
1071 uint32_t b = src[xi + 1];
1072 dst[0] = BLENDER(a, b, xf);
1073 }
1074 }
1075
ScaleARGBFilterCols64_C(uint8_t * dst_argb,const uint8_t * src_argb,int dst_width,int x32,int dx)1076 void ScaleARGBFilterCols64_C(uint8_t* dst_argb,
1077 const uint8_t* src_argb,
1078 int dst_width,
1079 int x32,
1080 int dx) {
1081 int64_t x = (int64_t)(x32);
1082 const uint32_t* src = (const uint32_t*)(src_argb);
1083 uint32_t* dst = (uint32_t*)(dst_argb);
1084 int j;
1085 for (j = 0; j < dst_width - 1; j += 2) {
1086 int64_t xi = x >> 16;
1087 int xf = (x >> 9) & 0x7f;
1088 uint32_t a = src[xi];
1089 uint32_t b = src[xi + 1];
1090 dst[0] = BLENDER(a, b, xf);
1091 x += dx;
1092 xi = x >> 16;
1093 xf = (x >> 9) & 0x7f;
1094 a = src[xi];
1095 b = src[xi + 1];
1096 dst[1] = BLENDER(a, b, xf);
1097 x += dx;
1098 dst += 2;
1099 }
1100 if (dst_width & 1) {
1101 int64_t xi = x >> 16;
1102 int xf = (x >> 9) & 0x7f;
1103 uint32_t a = src[xi];
1104 uint32_t b = src[xi + 1];
1105 dst[0] = BLENDER(a, b, xf);
1106 }
1107 }
1108 #undef BLENDER1
1109 #undef BLENDERC
1110 #undef BLENDER
1111
1112 // UV scale row functions
1113 // same as ARGB but 2 channels
1114
ScaleUVRowDown2_C(const uint8_t * src_uv,ptrdiff_t src_stride,uint8_t * dst_uv,int dst_width)1115 void ScaleUVRowDown2_C(const uint8_t* src_uv,
1116 ptrdiff_t src_stride,
1117 uint8_t* dst_uv,
1118 int dst_width) {
1119 const uint16_t* src = (const uint16_t*)(src_uv);
1120 uint16_t* dst = (uint16_t*)(dst_uv);
1121 int x;
1122 (void)src_stride;
1123 for (x = 0; x < dst_width - 1; x += 2) {
1124 dst[0] = src[1];
1125 dst[1] = src[3];
1126 src += 2;
1127 dst += 2;
1128 }
1129 if (dst_width & 1) {
1130 dst[0] = src[1];
1131 }
1132 }
1133
ScaleUVRowDown2Linear_C(const uint8_t * src_uv,ptrdiff_t src_stride,uint8_t * dst_uv,int dst_width)1134 void ScaleUVRowDown2Linear_C(const uint8_t* src_uv,
1135 ptrdiff_t src_stride,
1136 uint8_t* dst_uv,
1137 int dst_width) {
1138 int x;
1139 (void)src_stride;
1140 for (x = 0; x < dst_width; ++x) {
1141 dst_uv[0] = (src_uv[0] + src_uv[2] + 1) >> 1;
1142 dst_uv[1] = (src_uv[1] + src_uv[3] + 1) >> 1;
1143 src_uv += 4;
1144 dst_uv += 2;
1145 }
1146 }
1147
ScaleUVRowDown2Box_C(const uint8_t * src_uv,ptrdiff_t src_stride,uint8_t * dst_uv,int dst_width)1148 void ScaleUVRowDown2Box_C(const uint8_t* src_uv,
1149 ptrdiff_t src_stride,
1150 uint8_t* dst_uv,
1151 int dst_width) {
1152 int x;
1153 for (x = 0; x < dst_width; ++x) {
1154 dst_uv[0] = (src_uv[0] + src_uv[2] + src_uv[src_stride] +
1155 src_uv[src_stride + 2] + 2) >>
1156 2;
1157 dst_uv[1] = (src_uv[1] + src_uv[3] + src_uv[src_stride + 1] +
1158 src_uv[src_stride + 3] + 2) >>
1159 2;
1160 src_uv += 4;
1161 dst_uv += 2;
1162 }
1163 }
1164
ScaleUVRowDownEven_C(const uint8_t * src_uv,ptrdiff_t src_stride,int src_stepx,uint8_t * dst_uv,int dst_width)1165 void ScaleUVRowDownEven_C(const uint8_t* src_uv,
1166 ptrdiff_t src_stride,
1167 int src_stepx,
1168 uint8_t* dst_uv,
1169 int dst_width) {
1170 const uint16_t* src = (const uint16_t*)(src_uv);
1171 uint16_t* dst = (uint16_t*)(dst_uv);
1172 (void)src_stride;
1173 int x;
1174 for (x = 0; x < dst_width - 1; x += 2) {
1175 dst[0] = src[0];
1176 dst[1] = src[src_stepx];
1177 src += src_stepx * 2;
1178 dst += 2;
1179 }
1180 if (dst_width & 1) {
1181 dst[0] = src[0];
1182 }
1183 }
1184
ScaleUVRowDownEvenBox_C(const uint8_t * src_uv,ptrdiff_t src_stride,int src_stepx,uint8_t * dst_uv,int dst_width)1185 void ScaleUVRowDownEvenBox_C(const uint8_t* src_uv,
1186 ptrdiff_t src_stride,
1187 int src_stepx,
1188 uint8_t* dst_uv,
1189 int dst_width) {
1190 int x;
1191 for (x = 0; x < dst_width; ++x) {
1192 dst_uv[0] = (src_uv[0] + src_uv[2] + src_uv[src_stride] +
1193 src_uv[src_stride + 2] + 2) >>
1194 2;
1195 dst_uv[1] = (src_uv[1] + src_uv[3] + src_uv[src_stride + 1] +
1196 src_uv[src_stride + 3] + 2) >>
1197 2;
1198 src_uv += src_stepx * 2;
1199 dst_uv += 2;
1200 }
1201 }
1202
ScaleUVRowUp2_Linear_C(const uint8_t * src_ptr,uint8_t * dst_ptr,int dst_width)1203 void ScaleUVRowUp2_Linear_C(const uint8_t* src_ptr,
1204 uint8_t* dst_ptr,
1205 int dst_width) {
1206 int src_width = dst_width >> 1;
1207 int x;
1208 assert((dst_width % 2 == 0) && (dst_width >= 0));
1209 for (x = 0; x < src_width; ++x) {
1210 dst_ptr[4 * x + 0] =
1211 (src_ptr[2 * x + 0] * 3 + src_ptr[2 * x + 2] * 1 + 2) >> 2;
1212 dst_ptr[4 * x + 1] =
1213 (src_ptr[2 * x + 1] * 3 + src_ptr[2 * x + 3] * 1 + 2) >> 2;
1214 dst_ptr[4 * x + 2] =
1215 (src_ptr[2 * x + 0] * 1 + src_ptr[2 * x + 2] * 3 + 2) >> 2;
1216 dst_ptr[4 * x + 3] =
1217 (src_ptr[2 * x + 1] * 1 + src_ptr[2 * x + 3] * 3 + 2) >> 2;
1218 }
1219 }
1220
ScaleUVRowUp2_Bilinear_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst_ptr,ptrdiff_t dst_stride,int dst_width)1221 void ScaleUVRowUp2_Bilinear_C(const uint8_t* src_ptr,
1222 ptrdiff_t src_stride,
1223 uint8_t* dst_ptr,
1224 ptrdiff_t dst_stride,
1225 int dst_width) {
1226 const uint8_t* s = src_ptr;
1227 const uint8_t* t = src_ptr + src_stride;
1228 uint8_t* d = dst_ptr;
1229 uint8_t* e = dst_ptr + dst_stride;
1230 int src_width = dst_width >> 1;
1231 int x;
1232 assert((dst_width % 2 == 0) && (dst_width >= 0));
1233 for (x = 0; x < src_width; ++x) {
1234 d[4 * x + 0] = (s[2 * x + 0] * 9 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
1235 t[2 * x + 2] * 1 + 8) >>
1236 4;
1237 d[4 * x + 1] = (s[2 * x + 1] * 9 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
1238 t[2 * x + 3] * 1 + 8) >>
1239 4;
1240 d[4 * x + 2] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 9 + t[2 * x + 0] * 1 +
1241 t[2 * x + 2] * 3 + 8) >>
1242 4;
1243 d[4 * x + 3] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 9 + t[2 * x + 1] * 1 +
1244 t[2 * x + 3] * 3 + 8) >>
1245 4;
1246 e[4 * x + 0] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 1 + t[2 * x + 0] * 9 +
1247 t[2 * x + 2] * 3 + 8) >>
1248 4;
1249 e[4 * x + 1] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 1 + t[2 * x + 1] * 9 +
1250 t[2 * x + 3] * 3 + 8) >>
1251 4;
1252 e[4 * x + 2] = (s[2 * x + 0] * 1 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
1253 t[2 * x + 2] * 9 + 8) >>
1254 4;
1255 e[4 * x + 3] = (s[2 * x + 1] * 1 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
1256 t[2 * x + 3] * 9 + 8) >>
1257 4;
1258 }
1259 }
1260
ScaleUVRowUp2_Linear_16_C(const uint16_t * src_ptr,uint16_t * dst_ptr,int dst_width)1261 void ScaleUVRowUp2_Linear_16_C(const uint16_t* src_ptr,
1262 uint16_t* dst_ptr,
1263 int dst_width) {
1264 int src_width = dst_width >> 1;
1265 int x;
1266 assert((dst_width % 2 == 0) && (dst_width >= 0));
1267 for (x = 0; x < src_width; ++x) {
1268 dst_ptr[4 * x + 0] =
1269 (src_ptr[2 * x + 0] * 3 + src_ptr[2 * x + 2] * 1 + 2) >> 2;
1270 dst_ptr[4 * x + 1] =
1271 (src_ptr[2 * x + 1] * 3 + src_ptr[2 * x + 3] * 1 + 2) >> 2;
1272 dst_ptr[4 * x + 2] =
1273 (src_ptr[2 * x + 0] * 1 + src_ptr[2 * x + 2] * 3 + 2) >> 2;
1274 dst_ptr[4 * x + 3] =
1275 (src_ptr[2 * x + 1] * 1 + src_ptr[2 * x + 3] * 3 + 2) >> 2;
1276 }
1277 }
1278
ScaleUVRowUp2_Bilinear_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst_ptr,ptrdiff_t dst_stride,int dst_width)1279 void ScaleUVRowUp2_Bilinear_16_C(const uint16_t* src_ptr,
1280 ptrdiff_t src_stride,
1281 uint16_t* dst_ptr,
1282 ptrdiff_t dst_stride,
1283 int dst_width) {
1284 const uint16_t* s = src_ptr;
1285 const uint16_t* t = src_ptr + src_stride;
1286 uint16_t* d = dst_ptr;
1287 uint16_t* e = dst_ptr + dst_stride;
1288 int src_width = dst_width >> 1;
1289 int x;
1290 assert((dst_width % 2 == 0) && (dst_width >= 0));
1291 for (x = 0; x < src_width; ++x) {
1292 d[4 * x + 0] = (s[2 * x + 0] * 9 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
1293 t[2 * x + 2] * 1 + 8) >>
1294 4;
1295 d[4 * x + 1] = (s[2 * x + 1] * 9 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
1296 t[2 * x + 3] * 1 + 8) >>
1297 4;
1298 d[4 * x + 2] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 9 + t[2 * x + 0] * 1 +
1299 t[2 * x + 2] * 3 + 8) >>
1300 4;
1301 d[4 * x + 3] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 9 + t[2 * x + 1] * 1 +
1302 t[2 * x + 3] * 3 + 8) >>
1303 4;
1304 e[4 * x + 0] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 1 + t[2 * x + 0] * 9 +
1305 t[2 * x + 2] * 3 + 8) >>
1306 4;
1307 e[4 * x + 1] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 1 + t[2 * x + 1] * 9 +
1308 t[2 * x + 3] * 3 + 8) >>
1309 4;
1310 e[4 * x + 2] = (s[2 * x + 0] * 1 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
1311 t[2 * x + 2] * 9 + 8) >>
1312 4;
1313 e[4 * x + 3] = (s[2 * x + 1] * 1 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
1314 t[2 * x + 3] * 9 + 8) >>
1315 4;
1316 }
1317 }
1318
1319 // Scales a single row of pixels using point sampling.
ScaleUVCols_C(uint8_t * dst_uv,const uint8_t * src_uv,int dst_width,int x,int dx)1320 void ScaleUVCols_C(uint8_t* dst_uv,
1321 const uint8_t* src_uv,
1322 int dst_width,
1323 int x,
1324 int dx) {
1325 const uint16_t* src = (const uint16_t*)(src_uv);
1326 uint16_t* dst = (uint16_t*)(dst_uv);
1327 int j;
1328 for (j = 0; j < dst_width - 1; j += 2) {
1329 dst[0] = src[x >> 16];
1330 x += dx;
1331 dst[1] = src[x >> 16];
1332 x += dx;
1333 dst += 2;
1334 }
1335 if (dst_width & 1) {
1336 dst[0] = src[x >> 16];
1337 }
1338 }
1339
ScaleUVCols64_C(uint8_t * dst_uv,const uint8_t * src_uv,int dst_width,int x32,int dx)1340 void ScaleUVCols64_C(uint8_t* dst_uv,
1341 const uint8_t* src_uv,
1342 int dst_width,
1343 int x32,
1344 int dx) {
1345 int64_t x = (int64_t)(x32);
1346 const uint16_t* src = (const uint16_t*)(src_uv);
1347 uint16_t* dst = (uint16_t*)(dst_uv);
1348 int j;
1349 for (j = 0; j < dst_width - 1; j += 2) {
1350 dst[0] = src[x >> 16];
1351 x += dx;
1352 dst[1] = src[x >> 16];
1353 x += dx;
1354 dst += 2;
1355 }
1356 if (dst_width & 1) {
1357 dst[0] = src[x >> 16];
1358 }
1359 }
1360
1361 // Scales a single row of pixels up by 2x using point sampling.
ScaleUVColsUp2_C(uint8_t * dst_uv,const uint8_t * src_uv,int dst_width,int x,int dx)1362 void ScaleUVColsUp2_C(uint8_t* dst_uv,
1363 const uint8_t* src_uv,
1364 int dst_width,
1365 int x,
1366 int dx) {
1367 const uint16_t* src = (const uint16_t*)(src_uv);
1368 uint16_t* dst = (uint16_t*)(dst_uv);
1369 int j;
1370 (void)x;
1371 (void)dx;
1372 for (j = 0; j < dst_width - 1; j += 2) {
1373 dst[1] = dst[0] = src[0];
1374 src += 1;
1375 dst += 2;
1376 }
1377 if (dst_width & 1) {
1378 dst[0] = src[0];
1379 }
1380 }
1381
1382 // TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=607.
1383 // Mimics SSSE3 blender
1384 #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7
1385 #define BLENDERC(a, b, f, s) \
1386 (uint16_t)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
1387 #define BLENDER(a, b, f) BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0)
1388
ScaleUVFilterCols_C(uint8_t * dst_uv,const uint8_t * src_uv,int dst_width,int x,int dx)1389 void ScaleUVFilterCols_C(uint8_t* dst_uv,
1390 const uint8_t* src_uv,
1391 int dst_width,
1392 int x,
1393 int dx) {
1394 const uint16_t* src = (const uint16_t*)(src_uv);
1395 uint16_t* dst = (uint16_t*)(dst_uv);
1396 int j;
1397 for (j = 0; j < dst_width - 1; j += 2) {
1398 int xi = x >> 16;
1399 int xf = (x >> 9) & 0x7f;
1400 uint16_t a = src[xi];
1401 uint16_t b = src[xi + 1];
1402 dst[0] = BLENDER(a, b, xf);
1403 x += dx;
1404 xi = x >> 16;
1405 xf = (x >> 9) & 0x7f;
1406 a = src[xi];
1407 b = src[xi + 1];
1408 dst[1] = BLENDER(a, b, xf);
1409 x += dx;
1410 dst += 2;
1411 }
1412 if (dst_width & 1) {
1413 int xi = x >> 16;
1414 int xf = (x >> 9) & 0x7f;
1415 uint16_t a = src[xi];
1416 uint16_t b = src[xi + 1];
1417 dst[0] = BLENDER(a, b, xf);
1418 }
1419 }
1420
ScaleUVFilterCols64_C(uint8_t * dst_uv,const uint8_t * src_uv,int dst_width,int x32,int dx)1421 void ScaleUVFilterCols64_C(uint8_t* dst_uv,
1422 const uint8_t* src_uv,
1423 int dst_width,
1424 int x32,
1425 int dx) {
1426 int64_t x = (int64_t)(x32);
1427 const uint16_t* src = (const uint16_t*)(src_uv);
1428 uint16_t* dst = (uint16_t*)(dst_uv);
1429 int j;
1430 for (j = 0; j < dst_width - 1; j += 2) {
1431 int64_t xi = x >> 16;
1432 int xf = (x >> 9) & 0x7f;
1433 uint16_t a = src[xi];
1434 uint16_t b = src[xi + 1];
1435 dst[0] = BLENDER(a, b, xf);
1436 x += dx;
1437 xi = x >> 16;
1438 xf = (x >> 9) & 0x7f;
1439 a = src[xi];
1440 b = src[xi + 1];
1441 dst[1] = BLENDER(a, b, xf);
1442 x += dx;
1443 dst += 2;
1444 }
1445 if (dst_width & 1) {
1446 int64_t xi = x >> 16;
1447 int xf = (x >> 9) & 0x7f;
1448 uint16_t a = src[xi];
1449 uint16_t b = src[xi + 1];
1450 dst[0] = BLENDER(a, b, xf);
1451 }
1452 }
1453 #undef BLENDER1
1454 #undef BLENDERC
1455 #undef BLENDER
1456
1457 // Scale plane vertically with bilinear interpolation.
ScalePlaneVertical(int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_argb,uint8_t * dst_argb,int x,int y,int dy,int bpp,enum FilterMode filtering)1458 void ScalePlaneVertical(int src_height,
1459 int dst_width,
1460 int dst_height,
1461 int src_stride,
1462 int dst_stride,
1463 const uint8_t* src_argb,
1464 uint8_t* dst_argb,
1465 int x,
1466 int y,
1467 int dy,
1468 int bpp,
1469 enum FilterMode filtering) {
1470 // TODO(fbarchard): Allow higher bpp.
1471 int dst_width_bytes = dst_width * bpp;
1472 void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
1473 ptrdiff_t src_stride, int dst_width,
1474 int source_y_fraction) = InterpolateRow_C;
1475 const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
1476 int j;
1477 assert(bpp >= 1 && bpp <= 4);
1478 assert(src_height != 0);
1479 assert(dst_width > 0);
1480 assert(dst_height > 0);
1481 src_argb += (x >> 16) * bpp;
1482 #if defined(HAS_INTERPOLATEROW_SSSE3)
1483 if (TestCpuFlag(kCpuHasSSSE3)) {
1484 InterpolateRow = InterpolateRow_Any_SSSE3;
1485 if (IS_ALIGNED(dst_width_bytes, 16)) {
1486 InterpolateRow = InterpolateRow_SSSE3;
1487 }
1488 }
1489 #endif
1490 #if defined(HAS_INTERPOLATEROW_AVX2)
1491 if (TestCpuFlag(kCpuHasAVX2)) {
1492 InterpolateRow = InterpolateRow_Any_AVX2;
1493 if (IS_ALIGNED(dst_width_bytes, 32)) {
1494 InterpolateRow = InterpolateRow_AVX2;
1495 }
1496 }
1497 #endif
1498 #if defined(HAS_INTERPOLATEROW_NEON)
1499 if (TestCpuFlag(kCpuHasNEON)) {
1500 InterpolateRow = InterpolateRow_Any_NEON;
1501 if (IS_ALIGNED(dst_width_bytes, 16)) {
1502 InterpolateRow = InterpolateRow_NEON;
1503 }
1504 }
1505 #endif
1506 #if defined(HAS_INTERPOLATEROW_MMI)
1507 if (TestCpuFlag(kCpuHasMMI)) {
1508 InterpolateRow = InterpolateRow_Any_MMI;
1509 if (IS_ALIGNED(dst_width_bytes, 8)) {
1510 InterpolateRow = InterpolateRow_MMI;
1511 }
1512 }
1513 #endif
1514 #if defined(HAS_INTERPOLATEROW_MSA)
1515 if (TestCpuFlag(kCpuHasMSA)) {
1516 InterpolateRow = InterpolateRow_Any_MSA;
1517 if (IS_ALIGNED(dst_width_bytes, 32)) {
1518 InterpolateRow = InterpolateRow_MSA;
1519 }
1520 }
1521 #endif
1522 for (j = 0; j < dst_height; ++j) {
1523 int yi;
1524 int yf;
1525 if (y > max_y) {
1526 y = max_y;
1527 }
1528 yi = y >> 16;
1529 yf = filtering ? ((y >> 8) & 255) : 0;
1530 InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
1531 dst_width_bytes, yf);
1532 dst_argb += dst_stride;
1533 y += dy;
1534 }
1535 }
ScalePlaneVertical_16(int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_argb,uint16_t * dst_argb,int x,int y,int dy,int wpp,enum FilterMode filtering)1536 void ScalePlaneVertical_16(int src_height,
1537 int dst_width,
1538 int dst_height,
1539 int src_stride,
1540 int dst_stride,
1541 const uint16_t* src_argb,
1542 uint16_t* dst_argb,
1543 int x,
1544 int y,
1545 int dy,
1546 int wpp,
1547 enum FilterMode filtering) {
1548 // TODO(fbarchard): Allow higher wpp.
1549 int dst_width_words = dst_width * wpp;
1550 void (*InterpolateRow)(uint16_t * dst_argb, const uint16_t* src_argb,
1551 ptrdiff_t src_stride, int dst_width,
1552 int source_y_fraction) = InterpolateRow_16_C;
1553 const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
1554 int j;
1555 assert(wpp >= 1 && wpp <= 2);
1556 assert(src_height != 0);
1557 assert(dst_width > 0);
1558 assert(dst_height > 0);
1559 src_argb += (x >> 16) * wpp;
1560 #if defined(HAS_INTERPOLATEROW_16_SSE2)
1561 if (TestCpuFlag(kCpuHasSSE2)) {
1562 InterpolateRow = InterpolateRow_Any_16_SSE2;
1563 if (IS_ALIGNED(dst_width_bytes, 16)) {
1564 InterpolateRow = InterpolateRow_16_SSE2;
1565 }
1566 }
1567 #endif
1568 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
1569 if (TestCpuFlag(kCpuHasSSSE3)) {
1570 InterpolateRow = InterpolateRow_Any_16_SSSE3;
1571 if (IS_ALIGNED(dst_width_bytes, 16)) {
1572 InterpolateRow = InterpolateRow_16_SSSE3;
1573 }
1574 }
1575 #endif
1576 #if defined(HAS_INTERPOLATEROW_16_AVX2)
1577 if (TestCpuFlag(kCpuHasAVX2)) {
1578 InterpolateRow = InterpolateRow_Any_16_AVX2;
1579 if (IS_ALIGNED(dst_width_bytes, 32)) {
1580 InterpolateRow = InterpolateRow_16_AVX2;
1581 }
1582 }
1583 #endif
1584 #if defined(HAS_INTERPOLATEROW_16_NEON)
1585 if (TestCpuFlag(kCpuHasNEON)) {
1586 InterpolateRow = InterpolateRow_Any_16_NEON;
1587 if (IS_ALIGNED(dst_width_bytes, 16)) {
1588 InterpolateRow = InterpolateRow_16_NEON;
1589 }
1590 }
1591 #endif
1592 for (j = 0; j < dst_height; ++j) {
1593 int yi;
1594 int yf;
1595 if (y > max_y) {
1596 y = max_y;
1597 }
1598 yi = y >> 16;
1599 yf = filtering ? ((y >> 8) & 255) : 0;
1600 InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
1601 dst_width_words, yf);
1602 dst_argb += dst_stride;
1603 y += dy;
1604 }
1605 }
1606
1607 // Simplify the filtering based on scale factors.
ScaleFilterReduce(int src_width,int src_height,int dst_width,int dst_height,enum FilterMode filtering)1608 enum FilterMode ScaleFilterReduce(int src_width,
1609 int src_height,
1610 int dst_width,
1611 int dst_height,
1612 enum FilterMode filtering) {
1613 if (src_width < 0) {
1614 src_width = -src_width;
1615 }
1616 if (src_height < 0) {
1617 src_height = -src_height;
1618 }
1619 if (filtering == kFilterBox) {
1620 // If scaling either axis to 0.5 or larger, switch from Box to Bilinear.
1621 if (dst_width * 2 >= src_width || dst_height * 2 >= src_height) {
1622 filtering = kFilterBilinear;
1623 }
1624 }
1625 if (filtering == kFilterBilinear) {
1626 if (src_height == 1) {
1627 filtering = kFilterLinear;
1628 }
1629 // TODO(fbarchard): Detect any odd scale factor and reduce to Linear.
1630 if (dst_height == src_height || dst_height * 3 == src_height) {
1631 filtering = kFilterLinear;
1632 }
1633 // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to
1634 // avoid reading 2 pixels horizontally that causes memory exception.
1635 if (src_width == 1) {
1636 filtering = kFilterNone;
1637 }
1638 }
1639 if (filtering == kFilterLinear) {
1640 if (src_width == 1) {
1641 filtering = kFilterNone;
1642 }
1643 // TODO(fbarchard): Detect any odd scale factor and reduce to None.
1644 if (dst_width == src_width || dst_width * 3 == src_width) {
1645 filtering = kFilterNone;
1646 }
1647 }
1648 return filtering;
1649 }
1650
1651 // Divide num by div and return as 16.16 fixed point result.
FixedDiv_C(int num,int div)1652 int FixedDiv_C(int num, int div) {
1653 return (int)(((int64_t)(num) << 16) / div);
1654 }
1655
1656 // Divide num by div and return as 16.16 fixed point result.
FixedDiv1_C(int num,int div)1657 int FixedDiv1_C(int num, int div) {
1658 return (int)((((int64_t)(num) << 16) - 0x00010001) / (div - 1));
1659 }
1660
1661 #define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
1662
1663 // Compute slope values for stepping.
ScaleSlope(int src_width,int src_height,int dst_width,int dst_height,enum FilterMode filtering,int * x,int * y,int * dx,int * dy)1664 void ScaleSlope(int src_width,
1665 int src_height,
1666 int dst_width,
1667 int dst_height,
1668 enum FilterMode filtering,
1669 int* x,
1670 int* y,
1671 int* dx,
1672 int* dy) {
1673 assert(x != NULL);
1674 assert(y != NULL);
1675 assert(dx != NULL);
1676 assert(dy != NULL);
1677 assert(src_width != 0);
1678 assert(src_height != 0);
1679 assert(dst_width > 0);
1680 assert(dst_height > 0);
1681 // Check for 1 pixel and avoid FixedDiv overflow.
1682 if (dst_width == 1 && src_width >= 32768) {
1683 dst_width = src_width;
1684 }
1685 if (dst_height == 1 && src_height >= 32768) {
1686 dst_height = src_height;
1687 }
1688 if (filtering == kFilterBox) {
1689 // Scale step for point sampling duplicates all pixels equally.
1690 *dx = FixedDiv(Abs(src_width), dst_width);
1691 *dy = FixedDiv(src_height, dst_height);
1692 *x = 0;
1693 *y = 0;
1694 } else if (filtering == kFilterBilinear) {
1695 // Scale step for bilinear sampling renders last pixel once for upsample.
1696 if (dst_width <= Abs(src_width)) {
1697 *dx = FixedDiv(Abs(src_width), dst_width);
1698 *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
1699 } else if (dst_width > 1) {
1700 *dx = FixedDiv1(Abs(src_width), dst_width);
1701 *x = 0;
1702 }
1703 if (dst_height <= src_height) {
1704 *dy = FixedDiv(src_height, dst_height);
1705 *y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter.
1706 } else if (dst_height > 1) {
1707 *dy = FixedDiv1(src_height, dst_height);
1708 *y = 0;
1709 }
1710 } else if (filtering == kFilterLinear) {
1711 // Scale step for bilinear sampling renders last pixel once for upsample.
1712 if (dst_width <= Abs(src_width)) {
1713 *dx = FixedDiv(Abs(src_width), dst_width);
1714 *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
1715 } else if (dst_width > 1) {
1716 *dx = FixedDiv1(Abs(src_width), dst_width);
1717 *x = 0;
1718 }
1719 *dy = FixedDiv(src_height, dst_height);
1720 *y = *dy >> 1;
1721 } else {
1722 // Scale step for point sampling duplicates all pixels equally.
1723 *dx = FixedDiv(Abs(src_width), dst_width);
1724 *dy = FixedDiv(src_height, dst_height);
1725 *x = CENTERSTART(*dx, 0);
1726 *y = CENTERSTART(*dy, 0);
1727 }
1728 // Negative src_width means horizontally mirror.
1729 if (src_width < 0) {
1730 *x += (dst_width - 1) * *dx;
1731 *dx = -*dx;
1732 // src_width = -src_width; // Caller must do this.
1733 }
1734 }
1735 #undef CENTERSTART
1736
1737 // Read 8x2 upsample with filtering and write 16x1.
1738 // actually reads an extra pixel, so 9x2.
ScaleRowUp2_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)1739 void ScaleRowUp2_16_C(const uint16_t* src_ptr,
1740 ptrdiff_t src_stride,
1741 uint16_t* dst,
1742 int dst_width) {
1743 const uint16_t* src2 = src_ptr + src_stride;
1744
1745 int x;
1746 for (x = 0; x < dst_width - 1; x += 2) {
1747 uint16_t p0 = src_ptr[0];
1748 uint16_t p1 = src_ptr[1];
1749 uint16_t p2 = src2[0];
1750 uint16_t p3 = src2[1];
1751 dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4;
1752 dst[1] = (p0 * 3 + p1 * 9 + p2 + p3 * 3 + 8) >> 4;
1753 ++src_ptr;
1754 ++src2;
1755 dst += 2;
1756 }
1757 if (dst_width & 1) {
1758 uint16_t p0 = src_ptr[0];
1759 uint16_t p1 = src_ptr[1];
1760 uint16_t p2 = src2[0];
1761 uint16_t p3 = src2[1];
1762 dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4;
1763 }
1764 }
1765
1766 #ifdef __cplusplus
1767 } // extern "C"
1768 } // namespace libyuv
1769 #endif
1770