1 /*
2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3  *  Copyright 2016 Mozilla Foundation
4  *
5  *  Use of this source code is governed by a BSD-style license
6  *  that can be found in the LICENSE file in the root of the source
7  *  tree. An additional intellectual property rights grant can be found
8  *  in the file PATENTS. All contributing project authors may
9  *  be found in the AUTHORS file in the root of the source tree.
10  */
11 
12 #include "libyuv/scale.h"
13 
14 #include <assert.h>
15 #include <string.h>
16 
17 #include "libyuv/cpu_id.h"
18 #include "libyuv/row.h"
19 #include "libyuv/scale_row.h"
20 #include "libyuv/video_common.h"
21 
22 #include "mozilla/gfx/Types.h"
23 
24 #ifdef __cplusplus
25 namespace libyuv {
26 extern "C" {
27 #endif
28 
29 // YUV to RGB conversion and scaling functions were implemented by referencing
30 // scale_argb.cc
31 //
32 // libyuv already has ScaleYUVToARGBBilinearUp(), but its implementation is not
33 // completed yet. Implementations of the functions are based on it.
34 // At first, ScaleYUVToARGBBilinearUp() was implemented by modifying the
35 // libyuv's one. Then all another functions were implemented similarly.
36 //
37 // Function relationship between yuv_convert.cpp and scale_argb.cc are like
38 // the followings
39 //  - ScaleYUVToARGBDown2()      <-- ScaleARGBDown2()
40 //  - ScaleYUVToARGBDownEven()   <-- ScaleARGBDownEven()
41 //  - ScaleYUVToARGBBilinearDown() <-- ScaleARGBBilinearDown()
42 //  - ScaleYUVToARGBBilinearUp() <-- ScaleARGBBilinearUp() and ScaleYUVToARGBBilinearUp() in libyuv
43 //  - ScaleYUVToARGBSimple()     <-- ScaleARGBSimple()
44 //  - ScaleYUVToARGB()           <-- ScaleARGB() // Removed some function calls for simplicity.
45 //  - YUVToARGBScale()           <-- ARGBScale()
46 //
47 // Callings and selections of InterpolateRow() and ScaleARGBFilterCols() were
48 // kept as same as possible.
49 //
50 // The followings changes were done to each scaling functions.
51 //
52 // -[1] Allocate YUV conversion buffer and use it as source buffer of scaling.
53 //      Its usage is borrowed from the libyuv's ScaleYUVToARGBBilinearUp().
54 // -[2] Conversion from YUV to RGB was abstracted as YUVBuferIter.
55 //      It is for handling multiple yuv color formats.
56 // -[3] Modified scaling functions as to handle YUV conversion buffer and
57 //      use YUVBuferIter.
58 // -[4] Color conversion function selections in YUVBuferIter were borrowed from
59 //      I444ToARGBMatrix(), I422ToARGBMatrix() and I420ToARGBMatrix()
60 
61 typedef mozilla::gfx::YUVColorSpace YUVColorSpace;
62 
63 struct YUVBuferIter {
64   int src_width;
65   int src_height;
66   int src_stride_y;
67   int src_stride_u;
68   int src_stride_v;
69   const uint8* src_y;
70   const uint8* src_u;
71   const uint8* src_v;
72 
73   uint32 src_fourcc;
74   const struct YuvConstants* yuvconstants;
75   int y_index;
76   const uint8* src_row_y;
77   const uint8* src_row_u;
78   const uint8* src_row_v;
79 
80   void (*YUVToARGBRow)(const uint8* y_buf,
81                        const uint8* u_buf,
82                        const uint8* v_buf,
83                        uint8* rgb_buf,
84                        const struct YuvConstants* yuvconstants,
85                        int width);
86   void (*MoveTo)(YUVBuferIter& iter, int y_index);
87   void (*MoveToNextRow)(YUVBuferIter& iter);
88 };
89 
YUVBuferIter_InitI422(YUVBuferIter & iter)90 void YUVBuferIter_InitI422(YUVBuferIter& iter) {
91   iter.YUVToARGBRow = I422ToARGBRow_C;
92 #if defined(HAS_I422TOARGBROW_SSSE3)
93   if (TestCpuFlag(kCpuHasSSSE3)) {
94     iter.YUVToARGBRow = I422ToARGBRow_Any_SSSE3;
95     if (IS_ALIGNED(iter.src_width, 8)) {
96       iter.YUVToARGBRow = I422ToARGBRow_SSSE3;
97     }
98   }
99 #endif
100 #if defined(HAS_I422TOARGBROW_AVX2)
101   if (TestCpuFlag(kCpuHasAVX2)) {
102     iter.YUVToARGBRow = I422ToARGBRow_Any_AVX2;
103     if (IS_ALIGNED(iter.src_width, 16)) {
104       iter.YUVToARGBRow = I422ToARGBRow_AVX2;
105     }
106   }
107 #endif
108 #if defined(HAS_I422TOARGBROW_NEON)
109   if (TestCpuFlag(kCpuHasNEON)) {
110     iter.YUVToARGBRow = I422ToARGBRow_Any_NEON;
111     if (IS_ALIGNED(iter.src_width, 8)) {
112       iter.YUVToARGBRow = I422ToARGBRow_NEON;
113     }
114   }
115 #endif
116 #if defined(HAS_I422TOARGBROW_DSPR2)
117   if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(iter.src_width, 4) &&
118       IS_ALIGNED(iter.src_y, 4) && IS_ALIGNED(iter.src_stride_y, 4) &&
119       IS_ALIGNED(iter.src_u, 2) && IS_ALIGNED(iter.src_stride_u, 2) &&
120       IS_ALIGNED(iter.src_v, 2) && IS_ALIGNED(iter.src_stride_v, 2) {
121     // Always satisfy IS_ALIGNED(argb_cnv_row, 4) && IS_ALIGNED(argb_cnv_rowstride, 4)
122     iter.YUVToARGBRow = I422ToARGBRow_DSPR2;
123   }
124 #endif
125 }
126 
127 void YUVBuferIter_InitI444(YUVBuferIter& iter) {
128   iter.YUVToARGBRow = I444ToARGBRow_C;
129 #if defined(HAS_I444TOARGBROW_SSSE3)
130   if (TestCpuFlag(kCpuHasSSSE3)) {
131     iter.YUVToARGBRow = I444ToARGBRow_Any_SSSE3;
132     if (IS_ALIGNED(iter.src_width, 8)) {
133       iter.YUVToARGBRow = I444ToARGBRow_SSSE3;
134     }
135   }
136 #endif
137 #if defined(HAS_I444TOARGBROW_AVX2)
138   if (TestCpuFlag(kCpuHasAVX2)) {
139     iter.YUVToARGBRow = I444ToARGBRow_Any_AVX2;
140     if (IS_ALIGNED(iter.src_width, 16)) {
141       iter.YUVToARGBRow = I444ToARGBRow_AVX2;
142     }
143   }
144 #endif
145 #if defined(HAS_I444TOARGBROW_NEON)
146   if (TestCpuFlag(kCpuHasNEON)) {
147     iter.YUVToARGBRow = I444ToARGBRow_Any_NEON;
148     if (IS_ALIGNED(iter.src_width, 8)) {
149       iter.YUVToARGBRow = I444ToARGBRow_NEON;
150     }
151   }
152 #endif
153 }
154 
155 
156 static void YUVBuferIter_MoveToForI444(YUVBuferIter& iter, int y_index) {
157   iter.y_index = y_index;
158   iter.src_row_y = iter.src_y + y_index * iter.src_stride_y;
159   iter.src_row_u = iter.src_u + y_index * iter.src_stride_u;
160   iter.src_row_v = iter.src_v + y_index * iter.src_stride_v;
161 }
162 
163 static void YUVBuferIter_MoveToNextRowForI444(YUVBuferIter& iter) {
164   iter.src_row_y += iter.src_stride_y;
165   iter.src_row_u += iter.src_stride_u;
166   iter.src_row_v += iter.src_stride_v;
167   iter.y_index++;
168 }
169 
170 static void YUVBuferIter_MoveToForI422(YUVBuferIter& iter, int y_index) {
171   iter.y_index = y_index;
172   iter.src_row_y = iter.src_y + y_index * iter.src_stride_y;
173   iter.src_row_u = iter.src_u + y_index * iter.src_stride_u;
174   iter.src_row_v = iter.src_v + y_index * iter.src_stride_v;
175 }
176 
177 static void YUVBuferIter_MoveToNextRowForI422(YUVBuferIter& iter) {
178   iter.src_row_y += iter.src_stride_y;
179   iter.src_row_u += iter.src_stride_u;
180   iter.src_row_v += iter.src_stride_v;
181   iter.y_index++;
182 }
183 
184 static void YUVBuferIter_MoveToForI420(YUVBuferIter& iter, int y_index) {
185   const int kYShift = 1;  // Shift Y by 1 to convert Y plane to UV coordinate.
186   int uv_y_index = y_index >> kYShift;
187 
188   iter.y_index = y_index;
189   iter.src_row_y = iter.src_y + y_index * iter.src_stride_y;
190   iter.src_row_u = iter.src_u + uv_y_index * iter.src_stride_u;
191   iter.src_row_v = iter.src_v + uv_y_index * iter.src_stride_v;
192 }
193 
194 static void YUVBuferIter_MoveToNextRowForI420(YUVBuferIter& iter) {
195   iter.src_row_y += iter.src_stride_y;
196   if (iter.y_index & 1) {
197     iter.src_row_u += iter.src_stride_u;
198     iter.src_row_v += iter.src_stride_v;
199   }
200   iter.y_index++;
201 }
202 
203 static __inline void YUVBuferIter_ConvertToARGBRow(YUVBuferIter& iter, uint8* argb_row) {
204   iter.YUVToARGBRow(iter.src_row_y, iter.src_row_u, iter.src_row_v, argb_row, iter.yuvconstants, iter.src_width);
205 }
206 
207 void YUVBuferIter_Init(YUVBuferIter& iter, uint32 src_fourcc, YUVColorSpace yuv_color_space) {
208   iter.src_fourcc = src_fourcc;
209   iter.y_index = 0;
210   iter.src_row_y = iter.src_y;
211   iter.src_row_u = iter.src_u;
212   iter.src_row_v = iter.src_v;
213   switch (yuv_color_space) {
214     case YUVColorSpace::BT2020:
215       iter.yuvconstants = &kYuv2020Constants;
216       break;
217     case YUVColorSpace::BT709:
218       iter.yuvconstants = &kYuvH709Constants;
219       break;
220     default:
221       iter.yuvconstants = &kYuvI601Constants;
222   }
223 
224   if (src_fourcc == FOURCC_I444) {
225     YUVBuferIter_InitI444(iter);
226     iter.MoveTo = YUVBuferIter_MoveToForI444;
227     iter.MoveToNextRow = YUVBuferIter_MoveToNextRowForI444;
228   } else if(src_fourcc == FOURCC_I422){
229     YUVBuferIter_InitI422(iter);
230     iter.MoveTo = YUVBuferIter_MoveToForI422;
231     iter.MoveToNextRow = YUVBuferIter_MoveToNextRowForI422;
232   } else {
233     assert(src_fourcc == FOURCC_I420); // Should be FOURCC_I420
234     YUVBuferIter_InitI422(iter);
235     iter.MoveTo = YUVBuferIter_MoveToForI420;
236     iter.MoveToNextRow = YUVBuferIter_MoveToNextRowForI420;
237   }
238 }
239 
240 // ScaleARGB ARGB, 1/2
241 // This is an optimized version for scaling down a ARGB to 1/2 of
242 // its original size.
243 static void ScaleYUVToARGBDown2(int src_width, int src_height,
244                                 int dst_width, int dst_height,
245                                 int src_stride_y,
246                                 int src_stride_u,
247                                 int src_stride_v,
248                                 int dst_stride_argb,
249                                 const uint8* src_y,
250                                 const uint8* src_u,
251                                 const uint8* src_v,
252                                 uint8* dst_argb,
253                                 int x, int dx, int y, int dy,
254                                 enum FilterMode filtering,
255                                 uint32 src_fourcc,
256                                 YUVColorSpace yuv_color_space) {
257   int j;
258 
259   // Allocate 2 rows of ARGB for source conversion.
260   const int kRowSize = (src_width * 4 + 15) & ~15;
261   align_buffer_64(argb_cnv_row, kRowSize * 2);
262   uint8* argb_cnv_rowptr = argb_cnv_row;
263   int argb_cnv_rowstride = kRowSize;
264 
265   YUVBuferIter iter;
266   iter.src_width = src_width;
267   iter.src_height = src_height;
268   iter.src_stride_y = src_stride_y;
269   iter.src_stride_u = src_stride_u;
270   iter.src_stride_v = src_stride_v;
271   iter.src_y = src_y;
272   iter.src_u = src_u;
273   iter.src_v = src_v;
274   YUVBuferIter_Init(iter, src_fourcc, yuv_color_space);
275 
276   void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
277                             uint8* dst_argb, int dst_width) =
278     filtering == kFilterNone ? ScaleARGBRowDown2_C :
279         (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C :
280         ScaleARGBRowDown2Box_C);
281   assert(dx == 65536 * 2);  // Test scale factor of 2.
282   assert((dy & 0x1ffff) == 0);  // Test vertical scale is multiple of 2.
283   // Advance to odd row, even column.
284   int yi = y >> 16;
285   iter.MoveTo(iter, yi);
286   ptrdiff_t x_offset;
287   if (filtering == kFilterBilinear) {
288     x_offset = (x >> 16) * 4;
289   } else {
290     x_offset = ((x >> 16) - 1) * 4;
291   }
292 #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
293   if (TestCpuFlag(kCpuHasSSE2)) {
294     ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_SSE2 :
295         (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_SSE2 :
296         ScaleARGBRowDown2Box_Any_SSE2);
297     if (IS_ALIGNED(dst_width, 4)) {
298       ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 :
299           (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 :
300           ScaleARGBRowDown2Box_SSE2);
301     }
302   }
303 
304 #endif
305 #if defined(HAS_SCALEARGBROWDOWN2_NEON)
306   if (TestCpuFlag(kCpuHasNEON)) {
307     ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_NEON :
308         (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_NEON :
309         ScaleARGBRowDown2Box_Any_NEON);
310     if (IS_ALIGNED(dst_width, 8)) {
311       ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_NEON :
312           (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_NEON :
313           ScaleARGBRowDown2Box_NEON);
314     }
315   }
316 #endif
317 
318   const int dyi = dy >> 16;
319   int lastyi = yi;
320   YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
321   // Prepare next row if necessary
322   if (filtering != kFilterLinear) {
323     if ((yi + dyi) < (src_height - 1)) {
324       iter.MoveTo(iter, yi + dyi);
325       YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride);
326     } else {
327       argb_cnv_rowstride = 0;
328     }
329   }
330 
331   if (filtering == kFilterLinear) {
332     argb_cnv_rowstride = 0;
333   }
334   const int max_yi = src_height - 1;
335   const int max_yi_minus_dyi = max_yi - dyi;
336   for (j = 0; j < dst_height; ++j) {
337     if (yi != lastyi) {
338       if (yi > max_yi) {
339         yi = max_yi;
340       }
341       if (yi != lastyi) {
342         if (filtering == kFilterLinear) {
343           iter.MoveTo(iter, yi);
344           YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
345           lastyi = yi;
346         } else {
347           // Prepare current row
348           if (yi == iter.y_index) {
349             argb_cnv_rowptr = argb_cnv_rowptr + argb_cnv_rowstride;
350             argb_cnv_rowstride = - argb_cnv_rowstride;
351           } else {
352             iter.MoveTo(iter, yi);
353             argb_cnv_rowptr = argb_cnv_row;
354             argb_cnv_rowstride = kRowSize;
355             YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
356           }
357           // Prepare next row if necessary
358           if (iter.y_index  < max_yi) {
359             int next_yi = yi < max_yi_minus_dyi ? yi + dyi : max_yi;
360             iter.MoveTo(iter, next_yi);
361             YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride);
362           } else {
363             argb_cnv_rowstride = 0;
364           }
365           lastyi = yi;
366         }
367       }
368     }
369     ScaleARGBRowDown2(argb_cnv_rowptr + x_offset, argb_cnv_rowstride, dst_argb, dst_width);
370     dst_argb += dst_stride_argb;
371     yi += dyi;
372   }
373 
374   free_aligned_buffer_64(argb_cnv_row);
375 }
376 
377 // ScaleARGB ARGB Even
378 // This is an optimized version for scaling down a ARGB to even
379 // multiple of its original size.
380 static void ScaleYUVToARGBDownEven(int src_width, int src_height,
381                                    int dst_width, int dst_height,
382                                    int src_stride_y,
383                                    int src_stride_u,
384                                    int src_stride_v,
385                                    int dst_stride_argb,
386                                    const uint8* src_y,
387                                    const uint8* src_u,
388                                    const uint8* src_v,
389                                    uint8* dst_argb,
390                                    int x, int dx, int y, int dy,
391                                    enum FilterMode filtering,
392                                    uint32 src_fourcc,
393                                    YUVColorSpace yuv_color_space) {
394   int j;
395   // Allocate 2 rows of ARGB for source conversion.
396   const int kRowSize = (src_width * 4 + 15) & ~15;
397   align_buffer_64(argb_cnv_row, kRowSize * 2);
398   uint8* argb_cnv_rowptr = argb_cnv_row;
399   int argb_cnv_rowstride = kRowSize;
400 
401   int col_step = dx >> 16;
402   void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride,
403                                int src_step, uint8* dst_argb, int dst_width) =
404       filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
405   assert(IS_ALIGNED(src_width, 2));
406   assert(IS_ALIGNED(src_height, 2));
407   int yi = y >> 16;
408   const ptrdiff_t x_offset = (x >> 16) * 4;
409 
410 #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
411   if (TestCpuFlag(kCpuHasSSE2)) {
412     ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2 :
413         ScaleARGBRowDownEven_Any_SSE2;
414     if (IS_ALIGNED(dst_width, 4)) {
415       ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 :
416           ScaleARGBRowDownEven_SSE2;
417     }
418   }
419 #endif
420 #if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
421   if (TestCpuFlag(kCpuHasNEON)) {
422     ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_NEON :
423         ScaleARGBRowDownEven_Any_NEON;
424     if (IS_ALIGNED(dst_width, 4)) {
425       ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON :
426           ScaleARGBRowDownEven_NEON;
427     }
428   }
429 #endif
430 
431   YUVBuferIter iter;
432   iter.src_width = src_width;
433   iter.src_height = src_height;
434   iter.src_stride_y = src_stride_y;
435   iter.src_stride_u = src_stride_u;
436   iter.src_stride_v = src_stride_v;
437   iter.src_y = src_y;
438   iter.src_u = src_u;
439   iter.src_v = src_v;
440   YUVBuferIter_Init(iter, src_fourcc, yuv_color_space);
441 
442   const int dyi = dy >> 16;
443   int lastyi = yi;
444   YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
445   // Prepare next row if necessary
446   if (filtering != kFilterLinear) {
447     if ((yi + dyi) < (src_height - 1)) {
448       iter.MoveTo(iter, yi + dyi);
449       YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride);
450     } else {
451       argb_cnv_rowstride = 0;
452     }
453   }
454 
455   if (filtering == kFilterLinear) {
456     argb_cnv_rowstride = 0;
457   }
458   const int max_yi = src_height - 1;
459   const int max_yi_minus_dyi = max_yi - dyi;
460   for (j = 0; j < dst_height; ++j) {
461     if (yi != lastyi) {
462       if (yi > max_yi) {
463         yi = max_yi;
464       }
465       if (yi != lastyi) {
466         if (filtering == kFilterLinear) {
467           iter.MoveTo(iter, yi);
468           YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
469           lastyi = yi;
470         } else {
471           // Prepare current row
472           if (yi == iter.y_index) {
473             argb_cnv_rowptr = argb_cnv_rowptr + argb_cnv_rowstride;
474             argb_cnv_rowstride = - argb_cnv_rowstride;
475           } else {
476             iter.MoveTo(iter, yi);
477             argb_cnv_rowptr = argb_cnv_row;
478             argb_cnv_rowstride = kRowSize;
479             YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
480           }
481           // Prepare next row if necessary
482           if (iter.y_index  < max_yi) {
483             int next_yi = yi < max_yi_minus_dyi ? yi + dyi : max_yi;
484             iter.MoveTo(iter, next_yi);
485             YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride);
486           } else {
487             argb_cnv_rowstride = 0;
488           }
489           lastyi = yi;
490         }
491       }
492     }
493     ScaleARGBRowDownEven(argb_cnv_rowptr + x_offset, argb_cnv_rowstride, col_step, dst_argb, dst_width);
494     dst_argb += dst_stride_argb;
495     yi += dyi;
496   }
497   free_aligned_buffer_64(argb_cnv_row);
498 }
499 
500 // Scale YUV to ARGB down with bilinear interpolation.
501 static void ScaleYUVToARGBBilinearDown(int src_width, int src_height,
502                                        int dst_width, int dst_height,
503                                        int src_stride_y,
504                                        int src_stride_u,
505                                        int src_stride_v,
506                                        int dst_stride_argb,
507                                        const uint8* src_y,
508                                        const uint8* src_u,
509                                        const uint8* src_v,
510                                        uint8* dst_argb,
511                                        int x, int dx, int y, int dy,
512                                        enum FilterMode filtering,
513                                        uint32 src_fourcc,
514                                        YUVColorSpace yuv_color_space) {
515   int j;
516   void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
517       ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
518       InterpolateRow_C;
519   void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
520       int dst_width, int x, int dx) =
521       (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
522   int64 xlast = x + (int64)(dst_width - 1) * dx;
523   int64 xl = (dx >= 0) ? x : xlast;
524   int64 xr = (dx >= 0) ? xlast : x;
525   int clip_src_width;
526   xl = (xl >> 16) & ~3;  // Left edge aligned.
527   xr = (xr >> 16) + 1;  // Right most pixel used.  Bilinear uses 2 pixels.
528   xr = (xr + 1 + 3) & ~3;  // 1 beyond 4 pixel aligned right most pixel.
529   if (xr > src_width) {
530     xr = src_width;
531   }
532   clip_src_width = (int)(xr - xl) * 4;  // Width aligned to 4.
533   const ptrdiff_t xl_offset = xl * 4;
534   x -= (int)(xl << 16);
535 
536   // Allocate 2 row of ARGB for source conversion.
537   const int kRowSize = (src_width * 4 + 15) & ~15;
538   align_buffer_64(argb_cnv_row, kRowSize * 2);
539   uint8* argb_cnv_rowptr = argb_cnv_row;
540   int argb_cnv_rowstride = kRowSize;
541 
542 #if defined(HAS_INTERPOLATEROW_SSSE3)
543   if (TestCpuFlag(kCpuHasSSSE3)) {
544     InterpolateRow = InterpolateRow_Any_SSSE3;
545     if (IS_ALIGNED(clip_src_width, 16)) {
546       InterpolateRow = InterpolateRow_SSSE3;
547     }
548   }
549 #endif
550 #if defined(HAS_INTERPOLATEROW_AVX2)
551   if (TestCpuFlag(kCpuHasAVX2)) {
552     InterpolateRow = InterpolateRow_Any_AVX2;
553     if (IS_ALIGNED(clip_src_width, 32)) {
554       InterpolateRow = InterpolateRow_AVX2;
555     }
556   }
557 #endif
558 #if defined(HAS_INTERPOLATEROW_NEON)
559   if (TestCpuFlag(kCpuHasNEON)) {
560     InterpolateRow = InterpolateRow_Any_NEON;
561     if (IS_ALIGNED(clip_src_width, 16)) {
562       InterpolateRow = InterpolateRow_NEON;
563     }
564   }
565 #endif
566 #if defined(HAS_INTERPOLATEROW_DSPR2)
567   if (TestCpuFlag(kCpuHasDSPR2) &&
568       IS_ALIGNED(src_argb, 4) && IS_ALIGNED(argb_cnv_rowstride, 4)) {
569     InterpolateRow = InterpolateRow_Any_DSPR2;
570     if (IS_ALIGNED(clip_src_width, 4)) {
571       InterpolateRow = InterpolateRow_DSPR2;
572     }
573   }
574 #endif
575 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
576   if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
577     ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
578   }
579 #endif
580 #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
581   if (TestCpuFlag(kCpuHasNEON)) {
582     ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
583     if (IS_ALIGNED(dst_width, 4)) {
584       ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
585     }
586   }
587 #endif
588 
589   int yi = y >> 16;
590 
591   YUVBuferIter iter;
592   iter.src_width = src_width;
593   iter.src_height = src_height;
594   iter.src_stride_y = src_stride_y;
595   iter.src_stride_u = src_stride_u;
596   iter.src_stride_v = src_stride_v;
597   iter.src_y = src_y;
598   iter.src_u = src_u;
599   iter.src_v = src_v;
600   YUVBuferIter_Init(iter, src_fourcc, yuv_color_space);
601   iter.MoveTo(iter, yi);
602 
603   // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
604   // Allocate a row of ARGB.
605   align_buffer_64(row, clip_src_width * 4);
606 
607   int lastyi = yi;
608   YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
609   // Prepare next row if necessary
610   if (filtering != kFilterLinear) {
611     if ((yi + 1) < src_height) {
612       iter.MoveToNextRow(iter);
613       YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride);
614     } else {
615       argb_cnv_rowstride = 0;
616     }
617   }
618 
619   const int max_y = (src_height - 1) << 16;
620   const int max_yi = src_height - 1;
621   for (j = 0; j < dst_height; ++j) {
622     yi = y >> 16;
623     if (yi != lastyi) {
624       if (y > max_y) {
625         y = max_y;
626         yi = y >> 16;
627       }
628       if (yi != lastyi) {
629         if (filtering == kFilterLinear) {
630           iter.MoveTo(iter, yi);
631           YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
632           lastyi = yi;
633         } else {
634           // Prepare current row
635           if (yi == iter.y_index) {
636             argb_cnv_rowptr = argb_cnv_rowptr + argb_cnv_rowstride;
637             argb_cnv_rowstride = - argb_cnv_rowstride;
638           } else {
639             iter.MoveTo(iter, yi);
640             argb_cnv_rowptr = argb_cnv_row;
641             argb_cnv_rowstride = kRowSize;
642             YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
643           }
644           // Prepare next row if necessary
645           if (iter.y_index < max_yi) {
646             iter.MoveToNextRow(iter);
647             YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride);
648           } else {
649             argb_cnv_rowstride = 0;
650           }
651           lastyi = yi;
652         }
653       }
654     }
655     if (filtering == kFilterLinear) {
656       ScaleARGBFilterCols(dst_argb, argb_cnv_rowptr + xl_offset, dst_width, x, dx);
657     } else {
658       int yf = (y >> 8) & 255;
659       InterpolateRow(row, argb_cnv_rowptr + xl_offset, argb_cnv_rowstride, clip_src_width, yf);
660       ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
661     }
662     dst_argb += dst_stride_argb;
663     y += dy;
664   }
665   free_aligned_buffer_64(row);
666   free_aligned_buffer_64(argb_cnv_row);
667 }
668 
669 // Scale YUV to ARGB up with bilinear interpolation.
670 static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
671                                      int dst_width, int dst_height,
672                                      int src_stride_y,
673                                      int src_stride_u,
674                                      int src_stride_v,
675                                      int dst_stride_argb,
676                                      const uint8* src_y,
677                                      const uint8* src_u,
678                                      const uint8* src_v,
679                                      uint8* dst_argb,
680                                      int x, int dx, int y, int dy,
681                                      enum FilterMode filtering,
682                                      uint32 src_fourcc,
683                                      YUVColorSpace yuv_color_space) {
684   int j;
685   void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
686       ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
687       InterpolateRow_C;
688   void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
689       int dst_width, int x, int dx) =
690       filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
691   const int max_y = (src_height - 1) << 16;
692 
693   // Allocate 1 row of ARGB for source conversion.
694   align_buffer_64(argb_cnv_row, src_width * 4);
695 
696 #if defined(HAS_INTERPOLATEROW_SSSE3)
697   if (TestCpuFlag(kCpuHasSSSE3)) {
698     InterpolateRow = InterpolateRow_Any_SSSE3;
699     if (IS_ALIGNED(dst_width, 4)) {
700       InterpolateRow = InterpolateRow_SSSE3;
701     }
702   }
703 #endif
704 #if defined(HAS_INTERPOLATEROW_AVX2)
705   if (TestCpuFlag(kCpuHasAVX2)) {
706     InterpolateRow = InterpolateRow_Any_AVX2;
707     if (IS_ALIGNED(dst_width, 8)) {
708       InterpolateRow = InterpolateRow_AVX2;
709     }
710   }
711 #endif
712 #if defined(HAS_INTERPOLATEROW_NEON)
713   if (TestCpuFlag(kCpuHasNEON)) {
714     InterpolateRow = InterpolateRow_Any_NEON;
715     if (IS_ALIGNED(dst_width, 4)) {
716       InterpolateRow = InterpolateRow_NEON;
717     }
718   }
719 #endif
720 #if defined(HAS_INTERPOLATEROW_DSPR2)
721   if (TestCpuFlag(kCpuHasDSPR2) &&
722       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
723     InterpolateRow = InterpolateRow_DSPR2;
724   }
725 #endif
726   if (src_width >= 32768) {
727     ScaleARGBFilterCols = filtering ?
728         ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
729   }
730 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
731   if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
732     ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
733   }
734 #endif
735 #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
736   if (filtering && TestCpuFlag(kCpuHasNEON)) {
737     ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
738     if (IS_ALIGNED(dst_width, 4)) {
739       ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
740     }
741   }
742 #endif
743 #if defined(HAS_SCALEARGBCOLS_SSE2)
744   if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
745     ScaleARGBFilterCols = ScaleARGBCols_SSE2;
746   }
747 #endif
748 #if defined(HAS_SCALEARGBCOLS_NEON)
749   if (!filtering && TestCpuFlag(kCpuHasNEON)) {
750     ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
751     if (IS_ALIGNED(dst_width, 8)) {
752       ScaleARGBFilterCols = ScaleARGBCols_NEON;
753     }
754   }
755 #endif
756   if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
757     ScaleARGBFilterCols = ScaleARGBColsUp2_C;
758 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
759     if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
760       ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
761     }
762 #endif
763   }
764 
765   if (y > max_y) {
766     y = max_y;
767   }
768 
769   int yi = y >> 16;
770 
771   YUVBuferIter iter;
772   iter.src_width = src_width;
773   iter.src_height = src_height;
774   iter.src_stride_y = src_stride_y;
775   iter.src_stride_u = src_stride_u;
776   iter.src_stride_v = src_stride_v;
777   iter.src_y = src_y;
778   iter.src_u = src_u;
779   iter.src_v = src_v;
780   YUVBuferIter_Init(iter, src_fourcc, yuv_color_space);
781   iter.MoveTo(iter, yi);
782 
783   // Allocate 2 rows of ARGB.
784   const int kRowSize = (dst_width * 4 + 15) & ~15;
785   align_buffer_64(row, kRowSize * 2);
786 
787   uint8* rowptr = row;
788   int rowstride = kRowSize;
789   int lastyi = yi;
790 
791   YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row);
792   ScaleARGBFilterCols(rowptr, argb_cnv_row, dst_width, x, dx);
793 
794   if (filtering == kFilterLinear) {
795     rowstride = 0;
796   }
797   // Prepare next row if necessary
798   if (filtering != kFilterLinear) {
799     if ((yi + 1) < src_height) {
800       iter.MoveToNextRow(iter);
801       YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row);
802       ScaleARGBFilterCols(rowptr + rowstride, argb_cnv_row, dst_width, x, dx);
803     }else {
804       rowstride = 0;
805     }
806   }
807 
808   const int max_yi = src_height - 1;
809   for (j = 0; j < dst_height; ++j) {
810     yi = y >> 16;
811     if (yi != lastyi) {
812       if (y > max_y) {
813         y = max_y;
814         yi = y >> 16;
815       }
816       if (yi != lastyi) {
817         if (filtering == kFilterLinear) {
818             iter.MoveToNextRow(iter);
819             YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row);
820             ScaleARGBFilterCols(rowptr, argb_cnv_row, dst_width, x, dx);
821         } else {
822           // Prepare next row if necessary
823           if (yi < max_yi) {
824             iter.MoveToNextRow(iter);
825             rowptr += rowstride;
826             rowstride = -rowstride;
827             // TODO(fbarchard): Convert the clipped region of row.
828             YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row);
829             ScaleARGBFilterCols(rowptr + rowstride, argb_cnv_row, dst_width, x, dx);
830           } else {
831             rowstride = 0;
832           }
833         }
834         lastyi = yi;
835       }
836     }
837     if (filtering == kFilterLinear) {
838       InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
839     } else {
840       int yf = (y >> 8) & 255;
841       InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
842     }
843     dst_argb += dst_stride_argb;
844     y += dy;
845   }
846   free_aligned_buffer_64(row);
847   free_aligned_buffer_64(argb_cnv_row);
848 }
849 
850 // Scale ARGB to/from any dimensions, without interpolation.
851 // Fixed point math is used for performance: The upper 16 bits
852 // of x and dx is the integer part of the source position and
853 // the lower 16 bits are the fixed decimal part.
854 
855 static void ScaleYUVToARGBSimple(int src_width, int src_height,
856                                  int dst_width, int dst_height,
857                                  int src_stride_y,
858                                  int src_stride_u,
859                                  int src_stride_v,
860                                  int dst_stride_argb,
861                                  const uint8* src_y,
862                                  const uint8* src_u,
863                                  const uint8* src_v,
864                                  uint8* dst_argb,
865                                  int x, int dx, int y, int dy,
866                                  uint32 src_fourcc,
867                                  YUVColorSpace yuv_color_space) {
868   int j;
869   void (*ScaleARGBCols)(uint8* dst_argb, const uint8* src_argb,
870       int dst_width, int x, int dx) =
871       (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
872 
873   // Allocate 1 row of ARGB for source conversion.
874   align_buffer_64(argb_cnv_row, src_width * 4);
875 
876 #if defined(HAS_SCALEARGBCOLS_SSE2)
877   if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
878     ScaleARGBCols = ScaleARGBCols_SSE2;
879   }
880 #endif
881 #if defined(HAS_SCALEARGBCOLS_NEON)
882   if (TestCpuFlag(kCpuHasNEON)) {
883     ScaleARGBCols = ScaleARGBCols_Any_NEON;
884     if (IS_ALIGNED(dst_width, 8)) {
885       ScaleARGBCols = ScaleARGBCols_NEON;
886     }
887   }
888 #endif
889   if (src_width * 2 == dst_width && x < 0x8000) {
890     ScaleARGBCols = ScaleARGBColsUp2_C;
891 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
892     if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
893       ScaleARGBCols = ScaleARGBColsUp2_SSE2;
894     }
895 #endif
896   }
897 
898   int yi = y >> 16;
899 
900   YUVBuferIter iter;
901   iter.src_width = src_width;
902   iter.src_height = src_height;
903   iter.src_stride_y = src_stride_y;
904   iter.src_stride_u = src_stride_u;
905   iter.src_stride_v = src_stride_v;
906   iter.src_y = src_y;
907   iter.src_u = src_u;
908   iter.src_v = src_v;
909   YUVBuferIter_Init(iter, src_fourcc, yuv_color_space);
910   iter.MoveTo(iter, yi);
911 
912   int lasty = yi;
913   YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row);
914 
915   for (j = 0; j < dst_height; ++j) {
916     yi = y >> 16;
917     if (yi != lasty) {
918       iter.MoveTo(iter, yi);
919       YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row);
920       lasty = yi;
921     }
922     ScaleARGBCols(dst_argb, argb_cnv_row, dst_width, x, dx);
923     dst_argb += dst_stride_argb;
924     y += dy;
925   }
926   free_aligned_buffer_64(argb_cnv_row);
927 }
928 
929 static void YUVToARGBCopy(const uint8* src_y, int src_stride_y,
930                           const uint8* src_u, int src_stride_u,
931                           const uint8* src_v, int src_stride_v,
932                           int src_width, int src_height,
933                           uint8* dst_argb, int dst_stride_argb,
934                           int dst_width, int dst_height,
935                           uint32 src_fourcc,
936                           YUVColorSpace yuv_color_space)
937 {
938   YUVBuferIter iter;
939   iter.src_width = src_width;
940   iter.src_height = src_height;
941   iter.src_stride_y = src_stride_y;
942   iter.src_stride_u = src_stride_u;
943   iter.src_stride_v = src_stride_v;
944   iter.src_y = src_y;
945   iter.src_u = src_u;
946   iter.src_v = src_v;
947   YUVBuferIter_Init(iter, src_fourcc, yuv_color_space);
948 
949   for (int j = 0; j < dst_height; ++j) {
950     YUVBuferIter_ConvertToARGBRow(iter, dst_argb);
951     iter.MoveToNextRow(iter);
952     dst_argb += dst_stride_argb;
953   }
954 }
955 
956 static void ScaleYUVToARGB(const uint8* src_y, int src_stride_y,
957                            const uint8* src_u, int src_stride_u,
958                            const uint8* src_v, int src_stride_v,
959                            int src_width, int src_height,
960                            uint8* dst_argb, int dst_stride_argb,
961                            int dst_width, int dst_height,
962                            enum FilterMode filtering,
963                            uint32 src_fourcc,
964                            YUVColorSpace yuv_color_space)
965 {
966   // Initial source x/y coordinate and step values as 16.16 fixed point.
967   int x = 0;
968   int y = 0;
969   int dx = 0;
970   int dy = 0;
971   // ARGB does not support box filter yet, but allow the user to pass it.
972   // Simplify filtering when possible.
973   filtering = ScaleFilterReduce(src_width, src_height,
974                                 dst_width, dst_height,
975                                 filtering);
976   ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
977              &x, &y, &dx, &dy);
978 
979   // Special case for integer step values.
980   if (((dx | dy) & 0xffff) == 0) {
981     if (!dx || !dy) {  // 1 pixel wide and/or tall.
982       filtering = kFilterNone;
983     } else {
984       // Optimized even scale down. ie 2, 4, 6, 8, 10x.
985       if (!(dx & 0x10000) && !(dy & 0x10000)) {
986         if (dx == 0x20000) {
987           // Optimized 1/2 downsample.
988           ScaleYUVToARGBDown2(src_width, src_height,
989                               dst_width, dst_height,
990                               src_stride_y,
991                               src_stride_u,
992                               src_stride_v,
993                               dst_stride_argb,
994                               src_y,
995                               src_u,
996                               src_v,
997                               dst_argb,
998                               x, dx, y, dy,
999                               filtering,
1000                               src_fourcc,
1001                               yuv_color_space);
1002           return;
1003         }
1004         ScaleYUVToARGBDownEven(src_width, src_height,
1005                                dst_width, dst_height,
1006                                src_stride_y,
1007                                src_stride_u,
1008                                src_stride_v,
1009                                dst_stride_argb,
1010                                src_y,
1011                                src_u,
1012                                src_v,
1013                                dst_argb,
1014                                x, dx, y, dy,
1015                                filtering,
1016                                src_fourcc,
1017                                yuv_color_space);
1018         return;
1019       }
1020       // Optimized odd scale down. ie 3, 5, 7, 9x.
1021       if ((dx & 0x10000) && (dy & 0x10000)) {
1022         filtering = kFilterNone;
1023         if (dx == 0x10000 && dy == 0x10000) {
1024           // Straight conversion and copy.
1025           YUVToARGBCopy(src_y, src_stride_y,
1026                         src_u, src_stride_u,
1027                         src_v, src_stride_v,
1028                         src_width, src_height,
1029                         dst_argb, dst_stride_argb,
1030                         dst_width, dst_height,
1031                         src_fourcc,
1032                         yuv_color_space);
1033           return;
1034         }
1035       }
1036     }
1037   }
1038   if (filtering && dy < 65536) {
1039     ScaleYUVToARGBBilinearUp(src_width, src_height,
1040                              dst_width, dst_height,
1041                              src_stride_y,
1042                              src_stride_u,
1043                              src_stride_v,
1044                              dst_stride_argb,
1045                              src_y,
1046                              src_u,
1047                              src_v,
1048                              dst_argb,
1049                              x, dx, y, dy,
1050                              filtering,
1051                              src_fourcc,
1052                              yuv_color_space);
1053     return;
1054   }
1055   if (filtering) {
1056     ScaleYUVToARGBBilinearDown(src_width, src_height,
1057                                dst_width, dst_height,
1058                                src_stride_y,
1059                                src_stride_u,
1060                                src_stride_v,
1061                                dst_stride_argb,
1062                                src_y,
1063                                src_u,
1064                                src_v,
1065                                dst_argb,
1066                                x, dx, y, dy,
1067                                filtering,
1068                                src_fourcc,
1069                                yuv_color_space);
1070     return;
1071   }
1072   ScaleYUVToARGBSimple(src_width, src_height,
1073                        dst_width, dst_height,
1074                        src_stride_y,
1075                        src_stride_u,
1076                        src_stride_v,
1077                        dst_stride_argb,
1078                        src_y,
1079                        src_u,
1080                        src_v,
1081                        dst_argb,
1082                        x, dx, y, dy,
1083                        src_fourcc,
1084                        yuv_color_space);
1085 }
1086 
1087 bool IsConvertSupported(uint32 src_fourcc)
1088 {
1089   if (src_fourcc == FOURCC_I444 ||
1090       src_fourcc == FOURCC_I422 ||
1091       src_fourcc == FOURCC_I420) {
1092     return true;
1093   }
1094   return false;
1095 }
1096 
1097 LIBYUV_API
1098 int YUVToARGBScale(const uint8* src_y, int src_stride_y,
1099                    const uint8* src_u, int src_stride_u,
1100                    const uint8* src_v, int src_stride_v,
1101                    uint32 src_fourcc,
1102                    YUVColorSpace yuv_color_space,
1103                    int src_width, int src_height,
1104                    uint8* dst_argb, int dst_stride_argb,
1105                    int dst_width, int dst_height,
1106                    enum FilterMode filtering)
1107 {
1108   if (!src_y || !src_u || !src_v ||
1109       src_width == 0 || src_height == 0 ||
1110       !dst_argb || dst_width <= 0 || dst_height <= 0) {
1111     return -1;
1112   }
1113   if (!IsConvertSupported(src_fourcc)) {
1114     return -1;
1115   }
1116   ScaleYUVToARGB(src_y, src_stride_y,
1117                  src_u, src_stride_u,
1118                  src_v, src_stride_v,
1119                  src_width, src_height,
1120                  dst_argb, dst_stride_argb,
1121                  dst_width, dst_height,
1122                  filtering,
1123                  src_fourcc,
1124                  yuv_color_space);
1125   return 0;
1126 }
1127 
1128 #ifdef __cplusplus
1129 }  // extern "C"
1130 }  // namespace libyuv
1131 #endif
1132