1 /*
2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS. All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "libyuv/row.h"
12 
13 #include <stdio.h>
14 #include <string.h>  // For memcpy and memset.
15 
16 #include "libyuv/basic_types.h"
17 
18 #ifdef __cplusplus
19 namespace libyuv {
20 extern "C" {
21 #endif
22 
23 // llvm x86 is poor at ternary operator, so use branchless min/max.
24 
25 #define USE_BRANCHLESS 1
26 #if USE_BRANCHLESS
clamp0(int32_t v)27 static __inline int32_t clamp0(int32_t v) {
28   return ((-(v) >> 31) & (v));
29 }
30 
clamp255(int32_t v)31 static __inline int32_t clamp255(int32_t v) {
32   return (((255 - (v)) >> 31) | (v)) & 255;
33 }
34 
clamp1023(int32_t v)35 static __inline int32_t clamp1023(int32_t v) {
36   return (((1023 - (v)) >> 31) | (v)) & 1023;
37 }
38 
Abs(int32_t v)39 static __inline uint32_t Abs(int32_t v) {
40   int m = v >> 31;
41   return (v + m) ^ m;
42 }
43 #else   // USE_BRANCHLESS
44 static __inline int32_t clamp0(int32_t v) {
45   return (v < 0) ? 0 : v;
46 }
47 
48 static __inline int32_t clamp255(int32_t v) {
49   return (v > 255) ? 255 : v;
50 }
51 
52 static __inline int32_t clamp1023(int32_t v) {
53   return (v > 1023) ? 1023 : v;
54 }
55 
56 static __inline uint32_t Abs(int32_t v) {
57   return (v < 0) ? -v : v;
58 }
59 #endif  // USE_BRANCHLESS
Clamp(int32_t val)60 static __inline uint32_t Clamp(int32_t val) {
61   int v = clamp0(val);
62   return (uint32_t)(clamp255(v));
63 }
64 
Clamp10(int32_t val)65 static __inline uint32_t Clamp10(int32_t val) {
66   int v = clamp0(val);
67   return (uint32_t)(clamp1023(v));
68 }
69 
70 // Little Endian
71 #if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
72     defined(_M_IX86) || defined(__arm__) || defined(_M_ARM) ||     \
73     (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
74 #define WRITEWORD(p, v) *(uint32_t*)(p) = v
75 #else
WRITEWORD(uint8_t * p,uint32_t v)76 static inline void WRITEWORD(uint8_t* p, uint32_t v) {
77   p[0] = (uint8_t)(v & 255);
78   p[1] = (uint8_t)((v >> 8) & 255);
79   p[2] = (uint8_t)((v >> 16) & 255);
80   p[3] = (uint8_t)((v >> 24) & 255);
81 }
82 #endif
83 
RGB24ToARGBRow_C(const uint8_t * src_rgb24,uint8_t * dst_argb,int width)84 void RGB24ToARGBRow_C(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) {
85   int x;
86   for (x = 0; x < width; ++x) {
87     uint8_t b = src_rgb24[0];
88     uint8_t g = src_rgb24[1];
89     uint8_t r = src_rgb24[2];
90     dst_argb[0] = b;
91     dst_argb[1] = g;
92     dst_argb[2] = r;
93     dst_argb[3] = 255u;
94     dst_argb += 4;
95     src_rgb24 += 3;
96   }
97 }
98 
RAWToARGBRow_C(const uint8_t * src_raw,uint8_t * dst_argb,int width)99 void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
100   int x;
101   for (x = 0; x < width; ++x) {
102     uint8_t r = src_raw[0];
103     uint8_t g = src_raw[1];
104     uint8_t b = src_raw[2];
105     dst_argb[0] = b;
106     dst_argb[1] = g;
107     dst_argb[2] = r;
108     dst_argb[3] = 255u;
109     dst_argb += 4;
110     src_raw += 3;
111   }
112 }
113 
RAWToRGB24Row_C(const uint8_t * src_raw,uint8_t * dst_rgb24,int width)114 void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
115   int x;
116   for (x = 0; x < width; ++x) {
117     uint8_t r = src_raw[0];
118     uint8_t g = src_raw[1];
119     uint8_t b = src_raw[2];
120     dst_rgb24[0] = b;
121     dst_rgb24[1] = g;
122     dst_rgb24[2] = r;
123     dst_rgb24 += 3;
124     src_raw += 3;
125   }
126 }
127 
RGB565ToARGBRow_C(const uint8_t * src_rgb565,uint8_t * dst_argb,int width)128 void RGB565ToARGBRow_C(const uint8_t* src_rgb565,
129                        uint8_t* dst_argb,
130                        int width) {
131   int x;
132   for (x = 0; x < width; ++x) {
133     uint8_t b = src_rgb565[0] & 0x1f;
134     uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
135     uint8_t r = src_rgb565[1] >> 3;
136     dst_argb[0] = (b << 3) | (b >> 2);
137     dst_argb[1] = (g << 2) | (g >> 4);
138     dst_argb[2] = (r << 3) | (r >> 2);
139     dst_argb[3] = 255u;
140     dst_argb += 4;
141     src_rgb565 += 2;
142   }
143 }
144 
ARGB1555ToARGBRow_C(const uint8_t * src_argb1555,uint8_t * dst_argb,int width)145 void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555,
146                          uint8_t* dst_argb,
147                          int width) {
148   int x;
149   for (x = 0; x < width; ++x) {
150     uint8_t b = src_argb1555[0] & 0x1f;
151     uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
152     uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
153     uint8_t a = src_argb1555[1] >> 7;
154     dst_argb[0] = (b << 3) | (b >> 2);
155     dst_argb[1] = (g << 3) | (g >> 2);
156     dst_argb[2] = (r << 3) | (r >> 2);
157     dst_argb[3] = -a;
158     dst_argb += 4;
159     src_argb1555 += 2;
160   }
161 }
162 
ARGB4444ToARGBRow_C(const uint8_t * src_argb4444,uint8_t * dst_argb,int width)163 void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444,
164                          uint8_t* dst_argb,
165                          int width) {
166   int x;
167   for (x = 0; x < width; ++x) {
168     uint8_t b = src_argb4444[0] & 0x0f;
169     uint8_t g = src_argb4444[0] >> 4;
170     uint8_t r = src_argb4444[1] & 0x0f;
171     uint8_t a = src_argb4444[1] >> 4;
172     dst_argb[0] = (b << 4) | b;
173     dst_argb[1] = (g << 4) | g;
174     dst_argb[2] = (r << 4) | r;
175     dst_argb[3] = (a << 4) | a;
176     dst_argb += 4;
177     src_argb4444 += 2;
178   }
179 }
180 
AR30ToARGBRow_C(const uint8_t * src_ar30,uint8_t * dst_argb,int width)181 void AR30ToARGBRow_C(const uint8_t* src_ar30, uint8_t* dst_argb, int width) {
182   int x;
183   for (x = 0; x < width; ++x) {
184     uint32_t ar30 = *(const uint32_t*)src_ar30;
185     uint32_t b = (ar30 >> 2) & 0xff;
186     uint32_t g = (ar30 >> 12) & 0xff;
187     uint32_t r = (ar30 >> 22) & 0xff;
188     uint32_t a = (ar30 >> 30) * 0x55;  // Replicate 2 bits to 8 bits.
189     *(uint32_t*)(dst_argb) = b | (g << 8) | (r << 16) | (a << 24);
190     dst_argb += 4;
191     src_ar30 += 4;
192   }
193 }
194 
AR30ToABGRRow_C(const uint8_t * src_ar30,uint8_t * dst_abgr,int width)195 void AR30ToABGRRow_C(const uint8_t* src_ar30, uint8_t* dst_abgr, int width) {
196   int x;
197   for (x = 0; x < width; ++x) {
198     uint32_t ar30 = *(const uint32_t*)src_ar30;
199     uint32_t b = (ar30 >> 2) & 0xff;
200     uint32_t g = (ar30 >> 12) & 0xff;
201     uint32_t r = (ar30 >> 22) & 0xff;
202     uint32_t a = (ar30 >> 30) * 0x55;  // Replicate 2 bits to 8 bits.
203     *(uint32_t*)(dst_abgr) = r | (g << 8) | (b << 16) | (a << 24);
204     dst_abgr += 4;
205     src_ar30 += 4;
206   }
207 }
208 
AR30ToAB30Row_C(const uint8_t * src_ar30,uint8_t * dst_ab30,int width)209 void AR30ToAB30Row_C(const uint8_t* src_ar30, uint8_t* dst_ab30, int width) {
210   int x;
211   for (x = 0; x < width; ++x) {
212     uint32_t ar30 = *(const uint32_t*)src_ar30;
213     uint32_t b = ar30 & 0x3ff;
214     uint32_t ga = ar30 & 0xc00ffc00;
215     uint32_t r = (ar30 >> 20) & 0x3ff;
216     *(uint32_t*)(dst_ab30) = r | ga | (b << 20);
217     dst_ab30 += 4;
218     src_ar30 += 4;
219   }
220 }
221 
ARGBToRGB24Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)222 void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
223   int x;
224   for (x = 0; x < width; ++x) {
225     uint8_t b = src_argb[0];
226     uint8_t g = src_argb[1];
227     uint8_t r = src_argb[2];
228     dst_rgb[0] = b;
229     dst_rgb[1] = g;
230     dst_rgb[2] = r;
231     dst_rgb += 3;
232     src_argb += 4;
233   }
234 }
235 
ARGBToRAWRow_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)236 void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
237   int x;
238   for (x = 0; x < width; ++x) {
239     uint8_t b = src_argb[0];
240     uint8_t g = src_argb[1];
241     uint8_t r = src_argb[2];
242     dst_rgb[0] = r;
243     dst_rgb[1] = g;
244     dst_rgb[2] = b;
245     dst_rgb += 3;
246     src_argb += 4;
247   }
248 }
249 
ARGBToRGB565Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)250 void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
251   int x;
252   for (x = 0; x < width - 1; x += 2) {
253     uint8_t b0 = src_argb[0] >> 3;
254     uint8_t g0 = src_argb[1] >> 2;
255     uint8_t r0 = src_argb[2] >> 3;
256     uint8_t b1 = src_argb[4] >> 3;
257     uint8_t g1 = src_argb[5] >> 2;
258     uint8_t r1 = src_argb[6] >> 3;
259     WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
260                            (r1 << 27));
261     dst_rgb += 4;
262     src_argb += 8;
263   }
264   if (width & 1) {
265     uint8_t b0 = src_argb[0] >> 3;
266     uint8_t g0 = src_argb[1] >> 2;
267     uint8_t r0 = src_argb[2] >> 3;
268     *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
269   }
270 }
271 
272 // dither4 is a row of 4 values from 4x4 dither matrix.
273 // The 4x4 matrix contains values to increase RGB.  When converting to
274 // fewer bits (565) this provides an ordered dither.
275 // The order in the 4x4 matrix in first byte is upper left.
276 // The 4 values are passed as an int, then referenced as an array, so
277 // endian will not affect order of the original matrix.  But the dither4
278 // will containing the first pixel in the lower byte for little endian
279 // or the upper byte for big endian.
ARGBToRGB565DitherRow_C(const uint8_t * src_argb,uint8_t * dst_rgb,const uint32_t dither4,int width)280 void ARGBToRGB565DitherRow_C(const uint8_t* src_argb,
281                              uint8_t* dst_rgb,
282                              const uint32_t dither4,
283                              int width) {
284   int x;
285   for (x = 0; x < width - 1; x += 2) {
286     int dither0 = ((const unsigned char*)(&dither4))[x & 3];
287     int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3];
288     uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3;
289     uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2;
290     uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3;
291     uint8_t b1 = clamp255(src_argb[4] + dither1) >> 3;
292     uint8_t g1 = clamp255(src_argb[5] + dither1) >> 2;
293     uint8_t r1 = clamp255(src_argb[6] + dither1) >> 3;
294     WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
295                            (r1 << 27));
296     dst_rgb += 4;
297     src_argb += 8;
298   }
299   if (width & 1) {
300     int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3];
301     uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3;
302     uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2;
303     uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3;
304     *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
305   }
306 }
307 
ARGBToARGB1555Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)308 void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
309   int x;
310   for (x = 0; x < width - 1; x += 2) {
311     uint8_t b0 = src_argb[0] >> 3;
312     uint8_t g0 = src_argb[1] >> 3;
313     uint8_t r0 = src_argb[2] >> 3;
314     uint8_t a0 = src_argb[3] >> 7;
315     uint8_t b1 = src_argb[4] >> 3;
316     uint8_t g1 = src_argb[5] >> 3;
317     uint8_t r1 = src_argb[6] >> 3;
318     uint8_t a1 = src_argb[7] >> 7;
319     *(uint32_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) |
320                             (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31);
321     dst_rgb += 4;
322     src_argb += 8;
323   }
324   if (width & 1) {
325     uint8_t b0 = src_argb[0] >> 3;
326     uint8_t g0 = src_argb[1] >> 3;
327     uint8_t r0 = src_argb[2] >> 3;
328     uint8_t a0 = src_argb[3] >> 7;
329     *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
330   }
331 }
332 
ARGBToARGB4444Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)333 void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
334   int x;
335   for (x = 0; x < width - 1; x += 2) {
336     uint8_t b0 = src_argb[0] >> 4;
337     uint8_t g0 = src_argb[1] >> 4;
338     uint8_t r0 = src_argb[2] >> 4;
339     uint8_t a0 = src_argb[3] >> 4;
340     uint8_t b1 = src_argb[4] >> 4;
341     uint8_t g1 = src_argb[5] >> 4;
342     uint8_t r1 = src_argb[6] >> 4;
343     uint8_t a1 = src_argb[7] >> 4;
344     *(uint32_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) |
345                             (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28);
346     dst_rgb += 4;
347     src_argb += 8;
348   }
349   if (width & 1) {
350     uint8_t b0 = src_argb[0] >> 4;
351     uint8_t g0 = src_argb[1] >> 4;
352     uint8_t r0 = src_argb[2] >> 4;
353     uint8_t a0 = src_argb[3] >> 4;
354     *(uint16_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
355   }
356 }
357 
ABGRToAR30Row_C(const uint8_t * src_abgr,uint8_t * dst_ar30,int width)358 void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width) {
359   int x;
360   for (x = 0; x < width; ++x) {
361     uint32_t b0 = (src_abgr[0] >> 6) | ((uint32_t)(src_abgr[0]) << 2);
362     uint32_t g0 = (src_abgr[1] >> 6) | ((uint32_t)(src_abgr[1]) << 2);
363     uint32_t r0 = (src_abgr[2] >> 6) | ((uint32_t)(src_abgr[2]) << 2);
364     uint32_t a0 = (src_abgr[3] >> 6);
365     *(uint32_t*)(dst_ar30) = r0 | (g0 << 10) | (b0 << 20) | (a0 << 30);
366     dst_ar30 += 4;
367     src_abgr += 4;
368   }
369 }
370 
ARGBToAR30Row_C(const uint8_t * src_argb,uint8_t * dst_ar30,int width)371 void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) {
372   int x;
373   for (x = 0; x < width; ++x) {
374     uint32_t b0 = (src_argb[0] >> 6) | ((uint32_t)(src_argb[0]) << 2);
375     uint32_t g0 = (src_argb[1] >> 6) | ((uint32_t)(src_argb[1]) << 2);
376     uint32_t r0 = (src_argb[2] >> 6) | ((uint32_t)(src_argb[2]) << 2);
377     uint32_t a0 = (src_argb[3] >> 6);
378     *(uint32_t*)(dst_ar30) = b0 | (g0 << 10) | (r0 << 20) | (a0 << 30);
379     dst_ar30 += 4;
380     src_argb += 4;
381   }
382 }
383 
RGBToY(uint8_t r,uint8_t g,uint8_t b)384 static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
385   return (66 * r + 129 * g + 25 * b + 0x1080) >> 8;
386 }
387 
RGBToU(uint8_t r,uint8_t g,uint8_t b)388 static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) {
389   return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
390 }
RGBToV(uint8_t r,uint8_t g,uint8_t b)391 static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
392   return (112 * r - 94 * g - 18 * b + 0x8080) >> 8;
393 }
394 
395 // ARGBToY_C and ARGBToUV_C
396 #define MAKEROWY(NAME, R, G, B, BPP)                                         \
397   void NAME##ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
398     int x;                                                                   \
399     for (x = 0; x < width; ++x) {                                            \
400       dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]);           \
401       src_argb0 += BPP;                                                      \
402       dst_y += 1;                                                            \
403     }                                                                        \
404   }                                                                          \
405   void NAME##ToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb,          \
406                        uint8_t* dst_u, uint8_t* dst_v, int width) {          \
407     const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb;                     \
408     int x;                                                                   \
409     for (x = 0; x < width - 1; x += 2) {                                     \
410       uint8_t ab = (src_rgb0[B] + src_rgb0[B + BPP] + src_rgb1[B] +          \
411                     src_rgb1[B + BPP]) >>                                    \
412                    2;                                                        \
413       uint8_t ag = (src_rgb0[G] + src_rgb0[G + BPP] + src_rgb1[G] +          \
414                     src_rgb1[G + BPP]) >>                                    \
415                    2;                                                        \
416       uint8_t ar = (src_rgb0[R] + src_rgb0[R + BPP] + src_rgb1[R] +          \
417                     src_rgb1[R + BPP]) >>                                    \
418                    2;                                                        \
419       dst_u[0] = RGBToU(ar, ag, ab);                                         \
420       dst_v[0] = RGBToV(ar, ag, ab);                                         \
421       src_rgb0 += BPP * 2;                                                   \
422       src_rgb1 += BPP * 2;                                                   \
423       dst_u += 1;                                                            \
424       dst_v += 1;                                                            \
425     }                                                                        \
426     if (width & 1) {                                                         \
427       uint8_t ab = (src_rgb0[B] + src_rgb1[B]) >> 1;                         \
428       uint8_t ag = (src_rgb0[G] + src_rgb1[G]) >> 1;                         \
429       uint8_t ar = (src_rgb0[R] + src_rgb1[R]) >> 1;                         \
430       dst_u[0] = RGBToU(ar, ag, ab);                                         \
431       dst_v[0] = RGBToV(ar, ag, ab);                                         \
432     }                                                                        \
433   }
434 
435 MAKEROWY(ARGB, 2, 1, 0, 4)
436 MAKEROWY(BGRA, 1, 2, 3, 4)
437 MAKEROWY(ABGR, 0, 1, 2, 4)
438 MAKEROWY(RGBA, 3, 2, 1, 4)
439 MAKEROWY(RGB24, 2, 1, 0, 3)
440 MAKEROWY(RAW, 0, 1, 2, 3)
441 #undef MAKEROWY
442 
443 // JPeg uses a variation on BT.601-1 full range
444 // y =  0.29900 * r + 0.58700 * g + 0.11400 * b
445 // u = -0.16874 * r - 0.33126 * g + 0.50000 * b  + center
446 // v =  0.50000 * r - 0.41869 * g - 0.08131 * b  + center
447 // BT.601 Mpeg range uses:
448 // b 0.1016 * 255 = 25.908 = 25
449 // g 0.5078 * 255 = 129.489 = 129
450 // r 0.2578 * 255 = 65.739 = 66
451 // JPeg 8 bit Y (not used):
452 // b 0.11400 * 256 = 29.184 = 29
453 // g 0.58700 * 256 = 150.272 = 150
454 // r 0.29900 * 256 = 76.544 = 77
455 // JPeg 7 bit Y:
456 // b 0.11400 * 128 = 14.592 = 15
457 // g 0.58700 * 128 = 75.136 = 75
458 // r 0.29900 * 128 = 38.272 = 38
459 // JPeg 8 bit U:
460 // b  0.50000 * 255 = 127.5 = 127
461 // g -0.33126 * 255 = -84.4713 = -84
462 // r -0.16874 * 255 = -43.0287 = -43
463 // JPeg 8 bit V:
464 // b -0.08131 * 255 = -20.73405 = -20
465 // g -0.41869 * 255 = -106.76595 = -107
466 // r  0.50000 * 255 = 127.5 = 127
467 
RGBToYJ(uint8_t r,uint8_t g,uint8_t b)468 static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
469   return (38 * r + 75 * g + 15 * b + 64) >> 7;
470 }
471 
RGBToUJ(uint8_t r,uint8_t g,uint8_t b)472 static __inline int RGBToUJ(uint8_t r, uint8_t g, uint8_t b) {
473   return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
474 }
RGBToVJ(uint8_t r,uint8_t g,uint8_t b)475 static __inline int RGBToVJ(uint8_t r, uint8_t g, uint8_t b) {
476   return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
477 }
478 
479 #define AVGB(a, b) (((a) + (b) + 1) >> 1)
480 
481 // ARGBToYJ_C and ARGBToUVJ_C
482 #define MAKEROWYJ(NAME, R, G, B, BPP)                                         \
483   void NAME##ToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
484     int x;                                                                    \
485     for (x = 0; x < width; ++x) {                                             \
486       dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]);           \
487       src_argb0 += BPP;                                                       \
488       dst_y += 1;                                                             \
489     }                                                                         \
490   }                                                                           \
491   void NAME##ToUVJRow_C(const uint8_t* src_rgb0, int src_stride_rgb,          \
492                         uint8_t* dst_u, uint8_t* dst_v, int width) {          \
493     const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb;                      \
494     int x;                                                                    \
495     for (x = 0; x < width - 1; x += 2) {                                      \
496       uint8_t ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]),                       \
497                         AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP]));          \
498       uint8_t ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]),                       \
499                         AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP]));          \
500       uint8_t ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]),                       \
501                         AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP]));          \
502       dst_u[0] = RGBToUJ(ar, ag, ab);                                         \
503       dst_v[0] = RGBToVJ(ar, ag, ab);                                         \
504       src_rgb0 += BPP * 2;                                                    \
505       src_rgb1 += BPP * 2;                                                    \
506       dst_u += 1;                                                             \
507       dst_v += 1;                                                             \
508     }                                                                         \
509     if (width & 1) {                                                          \
510       uint8_t ab = AVGB(src_rgb0[B], src_rgb1[B]);                            \
511       uint8_t ag = AVGB(src_rgb0[G], src_rgb1[G]);                            \
512       uint8_t ar = AVGB(src_rgb0[R], src_rgb1[R]);                            \
513       dst_u[0] = RGBToUJ(ar, ag, ab);                                         \
514       dst_v[0] = RGBToVJ(ar, ag, ab);                                         \
515     }                                                                         \
516   }
517 
518 MAKEROWYJ(ARGB, 2, 1, 0, 4)
519 #undef MAKEROWYJ
520 
RGB565ToYRow_C(const uint8_t * src_rgb565,uint8_t * dst_y,int width)521 void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
522   int x;
523   for (x = 0; x < width; ++x) {
524     uint8_t b = src_rgb565[0] & 0x1f;
525     uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
526     uint8_t r = src_rgb565[1] >> 3;
527     b = (b << 3) | (b >> 2);
528     g = (g << 2) | (g >> 4);
529     r = (r << 3) | (r >> 2);
530     dst_y[0] = RGBToY(r, g, b);
531     src_rgb565 += 2;
532     dst_y += 1;
533   }
534 }
535 
ARGB1555ToYRow_C(const uint8_t * src_argb1555,uint8_t * dst_y,int width)536 void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width) {
537   int x;
538   for (x = 0; x < width; ++x) {
539     uint8_t b = src_argb1555[0] & 0x1f;
540     uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
541     uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
542     b = (b << 3) | (b >> 2);
543     g = (g << 3) | (g >> 2);
544     r = (r << 3) | (r >> 2);
545     dst_y[0] = RGBToY(r, g, b);
546     src_argb1555 += 2;
547     dst_y += 1;
548   }
549 }
550 
ARGB4444ToYRow_C(const uint8_t * src_argb4444,uint8_t * dst_y,int width)551 void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width) {
552   int x;
553   for (x = 0; x < width; ++x) {
554     uint8_t b = src_argb4444[0] & 0x0f;
555     uint8_t g = src_argb4444[0] >> 4;
556     uint8_t r = src_argb4444[1] & 0x0f;
557     b = (b << 4) | b;
558     g = (g << 4) | g;
559     r = (r << 4) | r;
560     dst_y[0] = RGBToY(r, g, b);
561     src_argb4444 += 2;
562     dst_y += 1;
563   }
564 }
565 
RGB565ToUVRow_C(const uint8_t * src_rgb565,int src_stride_rgb565,uint8_t * dst_u,uint8_t * dst_v,int width)566 void RGB565ToUVRow_C(const uint8_t* src_rgb565,
567                      int src_stride_rgb565,
568                      uint8_t* dst_u,
569                      uint8_t* dst_v,
570                      int width) {
571   const uint8_t* next_rgb565 = src_rgb565 + src_stride_rgb565;
572   int x;
573   for (x = 0; x < width - 1; x += 2) {
574     uint8_t b0 = src_rgb565[0] & 0x1f;
575     uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
576     uint8_t r0 = src_rgb565[1] >> 3;
577     uint8_t b1 = src_rgb565[2] & 0x1f;
578     uint8_t g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3);
579     uint8_t r1 = src_rgb565[3] >> 3;
580     uint8_t b2 = next_rgb565[0] & 0x1f;
581     uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
582     uint8_t r2 = next_rgb565[1] >> 3;
583     uint8_t b3 = next_rgb565[2] & 0x1f;
584     uint8_t g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
585     uint8_t r3 = next_rgb565[3] >> 3;
586     uint8_t b = (b0 + b1 + b2 + b3);  // 565 * 4 = 787.
587     uint8_t g = (g0 + g1 + g2 + g3);
588     uint8_t r = (r0 + r1 + r2 + r3);
589     b = (b << 1) | (b >> 6);  // 787 -> 888.
590     r = (r << 1) | (r >> 6);
591     dst_u[0] = RGBToU(r, g, b);
592     dst_v[0] = RGBToV(r, g, b);
593     src_rgb565 += 4;
594     next_rgb565 += 4;
595     dst_u += 1;
596     dst_v += 1;
597   }
598   if (width & 1) {
599     uint8_t b0 = src_rgb565[0] & 0x1f;
600     uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
601     uint8_t r0 = src_rgb565[1] >> 3;
602     uint8_t b2 = next_rgb565[0] & 0x1f;
603     uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
604     uint8_t r2 = next_rgb565[1] >> 3;
605     uint8_t b = (b0 + b2);  // 565 * 2 = 676.
606     uint8_t g = (g0 + g2);
607     uint8_t r = (r0 + r2);
608     b = (b << 2) | (b >> 4);  // 676 -> 888
609     g = (g << 1) | (g >> 6);
610     r = (r << 2) | (r >> 4);
611     dst_u[0] = RGBToU(r, g, b);
612     dst_v[0] = RGBToV(r, g, b);
613   }
614 }
615 
ARGB1555ToUVRow_C(const uint8_t * src_argb1555,int src_stride_argb1555,uint8_t * dst_u,uint8_t * dst_v,int width)616 void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
617                        int src_stride_argb1555,
618                        uint8_t* dst_u,
619                        uint8_t* dst_v,
620                        int width) {
621   const uint8_t* next_argb1555 = src_argb1555 + src_stride_argb1555;
622   int x;
623   for (x = 0; x < width - 1; x += 2) {
624     uint8_t b0 = src_argb1555[0] & 0x1f;
625     uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
626     uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2;
627     uint8_t b1 = src_argb1555[2] & 0x1f;
628     uint8_t g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
629     uint8_t r1 = (src_argb1555[3] & 0x7c) >> 2;
630     uint8_t b2 = next_argb1555[0] & 0x1f;
631     uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
632     uint8_t r2 = (next_argb1555[1] & 0x7c) >> 2;
633     uint8_t b3 = next_argb1555[2] & 0x1f;
634     uint8_t g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
635     uint8_t r3 = (next_argb1555[3] & 0x7c) >> 2;
636     uint8_t b = (b0 + b1 + b2 + b3);  // 555 * 4 = 777.
637     uint8_t g = (g0 + g1 + g2 + g3);
638     uint8_t r = (r0 + r1 + r2 + r3);
639     b = (b << 1) | (b >> 6);  // 777 -> 888.
640     g = (g << 1) | (g >> 6);
641     r = (r << 1) | (r >> 6);
642     dst_u[0] = RGBToU(r, g, b);
643     dst_v[0] = RGBToV(r, g, b);
644     src_argb1555 += 4;
645     next_argb1555 += 4;
646     dst_u += 1;
647     dst_v += 1;
648   }
649   if (width & 1) {
650     uint8_t b0 = src_argb1555[0] & 0x1f;
651     uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
652     uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2;
653     uint8_t b2 = next_argb1555[0] & 0x1f;
654     uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
655     uint8_t r2 = next_argb1555[1] >> 3;
656     uint8_t b = (b0 + b2);  // 555 * 2 = 666.
657     uint8_t g = (g0 + g2);
658     uint8_t r = (r0 + r2);
659     b = (b << 2) | (b >> 4);  // 666 -> 888.
660     g = (g << 2) | (g >> 4);
661     r = (r << 2) | (r >> 4);
662     dst_u[0] = RGBToU(r, g, b);
663     dst_v[0] = RGBToV(r, g, b);
664   }
665 }
666 
ARGB4444ToUVRow_C(const uint8_t * src_argb4444,int src_stride_argb4444,uint8_t * dst_u,uint8_t * dst_v,int width)667 void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
668                        int src_stride_argb4444,
669                        uint8_t* dst_u,
670                        uint8_t* dst_v,
671                        int width) {
672   const uint8_t* next_argb4444 = src_argb4444 + src_stride_argb4444;
673   int x;
674   for (x = 0; x < width - 1; x += 2) {
675     uint8_t b0 = src_argb4444[0] & 0x0f;
676     uint8_t g0 = src_argb4444[0] >> 4;
677     uint8_t r0 = src_argb4444[1] & 0x0f;
678     uint8_t b1 = src_argb4444[2] & 0x0f;
679     uint8_t g1 = src_argb4444[2] >> 4;
680     uint8_t r1 = src_argb4444[3] & 0x0f;
681     uint8_t b2 = next_argb4444[0] & 0x0f;
682     uint8_t g2 = next_argb4444[0] >> 4;
683     uint8_t r2 = next_argb4444[1] & 0x0f;
684     uint8_t b3 = next_argb4444[2] & 0x0f;
685     uint8_t g3 = next_argb4444[2] >> 4;
686     uint8_t r3 = next_argb4444[3] & 0x0f;
687     uint8_t b = (b0 + b1 + b2 + b3);  // 444 * 4 = 666.
688     uint8_t g = (g0 + g1 + g2 + g3);
689     uint8_t r = (r0 + r1 + r2 + r3);
690     b = (b << 2) | (b >> 4);  // 666 -> 888.
691     g = (g << 2) | (g >> 4);
692     r = (r << 2) | (r >> 4);
693     dst_u[0] = RGBToU(r, g, b);
694     dst_v[0] = RGBToV(r, g, b);
695     src_argb4444 += 4;
696     next_argb4444 += 4;
697     dst_u += 1;
698     dst_v += 1;
699   }
700   if (width & 1) {
701     uint8_t b0 = src_argb4444[0] & 0x0f;
702     uint8_t g0 = src_argb4444[0] >> 4;
703     uint8_t r0 = src_argb4444[1] & 0x0f;
704     uint8_t b2 = next_argb4444[0] & 0x0f;
705     uint8_t g2 = next_argb4444[0] >> 4;
706     uint8_t r2 = next_argb4444[1] & 0x0f;
707     uint8_t b = (b0 + b2);  // 444 * 2 = 555.
708     uint8_t g = (g0 + g2);
709     uint8_t r = (r0 + r2);
710     b = (b << 3) | (b >> 2);  // 555 -> 888.
711     g = (g << 3) | (g >> 2);
712     r = (r << 3) | (r >> 2);
713     dst_u[0] = RGBToU(r, g, b);
714     dst_v[0] = RGBToV(r, g, b);
715   }
716 }
717 
ARGBToUV444Row_C(const uint8_t * src_argb,uint8_t * dst_u,uint8_t * dst_v,int width)718 void ARGBToUV444Row_C(const uint8_t* src_argb,
719                       uint8_t* dst_u,
720                       uint8_t* dst_v,
721                       int width) {
722   int x;
723   for (x = 0; x < width; ++x) {
724     uint8_t ab = src_argb[0];
725     uint8_t ag = src_argb[1];
726     uint8_t ar = src_argb[2];
727     dst_u[0] = RGBToU(ar, ag, ab);
728     dst_v[0] = RGBToV(ar, ag, ab);
729     src_argb += 4;
730     dst_u += 1;
731     dst_v += 1;
732   }
733 }
734 
ARGBGrayRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width)735 void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
736   int x;
737   for (x = 0; x < width; ++x) {
738     uint8_t y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
739     dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
740     dst_argb[3] = src_argb[3];
741     dst_argb += 4;
742     src_argb += 4;
743   }
744 }
745 
746 // Convert a row of image to Sepia tone.
ARGBSepiaRow_C(uint8_t * dst_argb,int width)747 void ARGBSepiaRow_C(uint8_t* dst_argb, int width) {
748   int x;
749   for (x = 0; x < width; ++x) {
750     int b = dst_argb[0];
751     int g = dst_argb[1];
752     int r = dst_argb[2];
753     int sb = (b * 17 + g * 68 + r * 35) >> 7;
754     int sg = (b * 22 + g * 88 + r * 45) >> 7;
755     int sr = (b * 24 + g * 98 + r * 50) >> 7;
756     // b does not over flow. a is preserved from original.
757     dst_argb[0] = sb;
758     dst_argb[1] = clamp255(sg);
759     dst_argb[2] = clamp255(sr);
760     dst_argb += 4;
761   }
762 }
763 
764 // Apply color matrix to a row of image. Matrix is signed.
765 // TODO(fbarchard): Consider adding rounding (+32).
ARGBColorMatrixRow_C(const uint8_t * src_argb,uint8_t * dst_argb,const int8_t * matrix_argb,int width)766 void ARGBColorMatrixRow_C(const uint8_t* src_argb,
767                           uint8_t* dst_argb,
768                           const int8_t* matrix_argb,
769                           int width) {
770   int x;
771   for (x = 0; x < width; ++x) {
772     int b = src_argb[0];
773     int g = src_argb[1];
774     int r = src_argb[2];
775     int a = src_argb[3];
776     int sb = (b * matrix_argb[0] + g * matrix_argb[1] + r * matrix_argb[2] +
777               a * matrix_argb[3]) >>
778              6;
779     int sg = (b * matrix_argb[4] + g * matrix_argb[5] + r * matrix_argb[6] +
780               a * matrix_argb[7]) >>
781              6;
782     int sr = (b * matrix_argb[8] + g * matrix_argb[9] + r * matrix_argb[10] +
783               a * matrix_argb[11]) >>
784              6;
785     int sa = (b * matrix_argb[12] + g * matrix_argb[13] + r * matrix_argb[14] +
786               a * matrix_argb[15]) >>
787              6;
788     dst_argb[0] = Clamp(sb);
789     dst_argb[1] = Clamp(sg);
790     dst_argb[2] = Clamp(sr);
791     dst_argb[3] = Clamp(sa);
792     src_argb += 4;
793     dst_argb += 4;
794   }
795 }
796 
797 // Apply color table to a row of image.
ARGBColorTableRow_C(uint8_t * dst_argb,const uint8_t * table_argb,int width)798 void ARGBColorTableRow_C(uint8_t* dst_argb,
799                          const uint8_t* table_argb,
800                          int width) {
801   int x;
802   for (x = 0; x < width; ++x) {
803     int b = dst_argb[0];
804     int g = dst_argb[1];
805     int r = dst_argb[2];
806     int a = dst_argb[3];
807     dst_argb[0] = table_argb[b * 4 + 0];
808     dst_argb[1] = table_argb[g * 4 + 1];
809     dst_argb[2] = table_argb[r * 4 + 2];
810     dst_argb[3] = table_argb[a * 4 + 3];
811     dst_argb += 4;
812   }
813 }
814 
815 // Apply color table to a row of image.
RGBColorTableRow_C(uint8_t * dst_argb,const uint8_t * table_argb,int width)816 void RGBColorTableRow_C(uint8_t* dst_argb,
817                         const uint8_t* table_argb,
818                         int width) {
819   int x;
820   for (x = 0; x < width; ++x) {
821     int b = dst_argb[0];
822     int g = dst_argb[1];
823     int r = dst_argb[2];
824     dst_argb[0] = table_argb[b * 4 + 0];
825     dst_argb[1] = table_argb[g * 4 + 1];
826     dst_argb[2] = table_argb[r * 4 + 2];
827     dst_argb += 4;
828   }
829 }
830 
ARGBQuantizeRow_C(uint8_t * dst_argb,int scale,int interval_size,int interval_offset,int width)831 void ARGBQuantizeRow_C(uint8_t* dst_argb,
832                        int scale,
833                        int interval_size,
834                        int interval_offset,
835                        int width) {
836   int x;
837   for (x = 0; x < width; ++x) {
838     int b = dst_argb[0];
839     int g = dst_argb[1];
840     int r = dst_argb[2];
841     dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
842     dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset;
843     dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset;
844     dst_argb += 4;
845   }
846 }
847 
848 #define REPEAT8(v) (v) | ((v) << 8)
849 #define SHADE(f, v) v* f >> 24
850 
ARGBShadeRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width,uint32_t value)851 void ARGBShadeRow_C(const uint8_t* src_argb,
852                     uint8_t* dst_argb,
853                     int width,
854                     uint32_t value) {
855   const uint32_t b_scale = REPEAT8(value & 0xff);
856   const uint32_t g_scale = REPEAT8((value >> 8) & 0xff);
857   const uint32_t r_scale = REPEAT8((value >> 16) & 0xff);
858   const uint32_t a_scale = REPEAT8(value >> 24);
859 
860   int i;
861   for (i = 0; i < width; ++i) {
862     const uint32_t b = REPEAT8(src_argb[0]);
863     const uint32_t g = REPEAT8(src_argb[1]);
864     const uint32_t r = REPEAT8(src_argb[2]);
865     const uint32_t a = REPEAT8(src_argb[3]);
866     dst_argb[0] = SHADE(b, b_scale);
867     dst_argb[1] = SHADE(g, g_scale);
868     dst_argb[2] = SHADE(r, r_scale);
869     dst_argb[3] = SHADE(a, a_scale);
870     src_argb += 4;
871     dst_argb += 4;
872   }
873 }
874 #undef REPEAT8
875 #undef SHADE
876 
877 #define REPEAT8(v) (v) | ((v) << 8)
878 #define SHADE(f, v) v* f >> 16
879 
ARGBMultiplyRow_C(const uint8_t * src_argb0,const uint8_t * src_argb1,uint8_t * dst_argb,int width)880 void ARGBMultiplyRow_C(const uint8_t* src_argb0,
881                        const uint8_t* src_argb1,
882                        uint8_t* dst_argb,
883                        int width) {
884   int i;
885   for (i = 0; i < width; ++i) {
886     const uint32_t b = REPEAT8(src_argb0[0]);
887     const uint32_t g = REPEAT8(src_argb0[1]);
888     const uint32_t r = REPEAT8(src_argb0[2]);
889     const uint32_t a = REPEAT8(src_argb0[3]);
890     const uint32_t b_scale = src_argb1[0];
891     const uint32_t g_scale = src_argb1[1];
892     const uint32_t r_scale = src_argb1[2];
893     const uint32_t a_scale = src_argb1[3];
894     dst_argb[0] = SHADE(b, b_scale);
895     dst_argb[1] = SHADE(g, g_scale);
896     dst_argb[2] = SHADE(r, r_scale);
897     dst_argb[3] = SHADE(a, a_scale);
898     src_argb0 += 4;
899     src_argb1 += 4;
900     dst_argb += 4;
901   }
902 }
903 #undef REPEAT8
904 #undef SHADE
905 
906 #define SHADE(f, v) clamp255(v + f)
907 
ARGBAddRow_C(const uint8_t * src_argb0,const uint8_t * src_argb1,uint8_t * dst_argb,int width)908 void ARGBAddRow_C(const uint8_t* src_argb0,
909                   const uint8_t* src_argb1,
910                   uint8_t* dst_argb,
911                   int width) {
912   int i;
913   for (i = 0; i < width; ++i) {
914     const int b = src_argb0[0];
915     const int g = src_argb0[1];
916     const int r = src_argb0[2];
917     const int a = src_argb0[3];
918     const int b_add = src_argb1[0];
919     const int g_add = src_argb1[1];
920     const int r_add = src_argb1[2];
921     const int a_add = src_argb1[3];
922     dst_argb[0] = SHADE(b, b_add);
923     dst_argb[1] = SHADE(g, g_add);
924     dst_argb[2] = SHADE(r, r_add);
925     dst_argb[3] = SHADE(a, a_add);
926     src_argb0 += 4;
927     src_argb1 += 4;
928     dst_argb += 4;
929   }
930 }
931 #undef SHADE
932 
933 #define SHADE(f, v) clamp0(f - v)
934 
ARGBSubtractRow_C(const uint8_t * src_argb0,const uint8_t * src_argb1,uint8_t * dst_argb,int width)935 void ARGBSubtractRow_C(const uint8_t* src_argb0,
936                        const uint8_t* src_argb1,
937                        uint8_t* dst_argb,
938                        int width) {
939   int i;
940   for (i = 0; i < width; ++i) {
941     const int b = src_argb0[0];
942     const int g = src_argb0[1];
943     const int r = src_argb0[2];
944     const int a = src_argb0[3];
945     const int b_sub = src_argb1[0];
946     const int g_sub = src_argb1[1];
947     const int r_sub = src_argb1[2];
948     const int a_sub = src_argb1[3];
949     dst_argb[0] = SHADE(b, b_sub);
950     dst_argb[1] = SHADE(g, g_sub);
951     dst_argb[2] = SHADE(r, r_sub);
952     dst_argb[3] = SHADE(a, a_sub);
953     src_argb0 += 4;
954     src_argb1 += 4;
955     dst_argb += 4;
956   }
957 }
958 #undef SHADE
959 
960 // Sobel functions which mimics SSSE3.
SobelXRow_C(const uint8_t * src_y0,const uint8_t * src_y1,const uint8_t * src_y2,uint8_t * dst_sobelx,int width)961 void SobelXRow_C(const uint8_t* src_y0,
962                  const uint8_t* src_y1,
963                  const uint8_t* src_y2,
964                  uint8_t* dst_sobelx,
965                  int width) {
966   int i;
967   for (i = 0; i < width; ++i) {
968     int a = src_y0[i];
969     int b = src_y1[i];
970     int c = src_y2[i];
971     int a_sub = src_y0[i + 2];
972     int b_sub = src_y1[i + 2];
973     int c_sub = src_y2[i + 2];
974     int a_diff = a - a_sub;
975     int b_diff = b - b_sub;
976     int c_diff = c - c_sub;
977     int sobel = Abs(a_diff + b_diff * 2 + c_diff);
978     dst_sobelx[i] = (uint8_t)(clamp255(sobel));
979   }
980 }
981 
SobelYRow_C(const uint8_t * src_y0,const uint8_t * src_y1,uint8_t * dst_sobely,int width)982 void SobelYRow_C(const uint8_t* src_y0,
983                  const uint8_t* src_y1,
984                  uint8_t* dst_sobely,
985                  int width) {
986   int i;
987   for (i = 0; i < width; ++i) {
988     int a = src_y0[i + 0];
989     int b = src_y0[i + 1];
990     int c = src_y0[i + 2];
991     int a_sub = src_y1[i + 0];
992     int b_sub = src_y1[i + 1];
993     int c_sub = src_y1[i + 2];
994     int a_diff = a - a_sub;
995     int b_diff = b - b_sub;
996     int c_diff = c - c_sub;
997     int sobel = Abs(a_diff + b_diff * 2 + c_diff);
998     dst_sobely[i] = (uint8_t)(clamp255(sobel));
999   }
1000 }
1001 
SobelRow_C(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_argb,int width)1002 void SobelRow_C(const uint8_t* src_sobelx,
1003                 const uint8_t* src_sobely,
1004                 uint8_t* dst_argb,
1005                 int width) {
1006   int i;
1007   for (i = 0; i < width; ++i) {
1008     int r = src_sobelx[i];
1009     int b = src_sobely[i];
1010     int s = clamp255(r + b);
1011     dst_argb[0] = (uint8_t)(s);
1012     dst_argb[1] = (uint8_t)(s);
1013     dst_argb[2] = (uint8_t)(s);
1014     dst_argb[3] = (uint8_t)(255u);
1015     dst_argb += 4;
1016   }
1017 }
1018 
SobelToPlaneRow_C(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_y,int width)1019 void SobelToPlaneRow_C(const uint8_t* src_sobelx,
1020                        const uint8_t* src_sobely,
1021                        uint8_t* dst_y,
1022                        int width) {
1023   int i;
1024   for (i = 0; i < width; ++i) {
1025     int r = src_sobelx[i];
1026     int b = src_sobely[i];
1027     int s = clamp255(r + b);
1028     dst_y[i] = (uint8_t)(s);
1029   }
1030 }
1031 
SobelXYRow_C(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_argb,int width)1032 void SobelXYRow_C(const uint8_t* src_sobelx,
1033                   const uint8_t* src_sobely,
1034                   uint8_t* dst_argb,
1035                   int width) {
1036   int i;
1037   for (i = 0; i < width; ++i) {
1038     int r = src_sobelx[i];
1039     int b = src_sobely[i];
1040     int g = clamp255(r + b);
1041     dst_argb[0] = (uint8_t)(b);
1042     dst_argb[1] = (uint8_t)(g);
1043     dst_argb[2] = (uint8_t)(r);
1044     dst_argb[3] = (uint8_t)(255u);
1045     dst_argb += 4;
1046   }
1047 }
1048 
J400ToARGBRow_C(const uint8_t * src_y,uint8_t * dst_argb,int width)1049 void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
1050   // Copy a Y to RGB.
1051   int x;
1052   for (x = 0; x < width; ++x) {
1053     uint8_t y = src_y[0];
1054     dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
1055     dst_argb[3] = 255u;
1056     dst_argb += 4;
1057     ++src_y;
1058   }
1059 }
1060 
1061 // TODO(fbarchard): Unify these structures to be platform independent.
1062 // TODO(fbarchard): Generate SIMD structures from float matrix.
1063 
1064 // BT.601 YUV to RGB reference
1065 //  R = (Y - 16) * 1.164              - V * -1.596
1066 //  G = (Y - 16) * 1.164 - U *  0.391 - V *  0.813
1067 //  B = (Y - 16) * 1.164 - U * -2.018
1068 
1069 // Y contribution to R,G,B.  Scale and bias.
1070 #define YG 18997  /* round(1.164 * 64 * 256 * 256 / 257) */
1071 #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
1072 
1073 // U and V contributions to R,G,B.
1074 #define UB -128 /* max(-128, round(-2.018 * 64)) */
1075 #define UG 25   /* round(0.391 * 64) */
1076 #define VG 52   /* round(0.813 * 64) */
1077 #define VR -102 /* round(-1.596 * 64) */
1078 
1079 // Bias values to subtract 16 from Y and 128 from U and V.
1080 #define BB (UB * 128 + YGB)
1081 #define BG (UG * 128 + VG * 128 + YGB)
1082 #define BR (VR * 128 + YGB)
1083 
1084 #if defined(__aarch64__)  // 64 bit arm
1085 const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
1086     {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1087     {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1088     {UG, VG, UG, VG, UG, VG, UG, VG},
1089     {UG, VG, UG, VG, UG, VG, UG, VG},
1090     {BB, BG, BR, 0, 0, 0, 0, 0},
1091     {0x0101 * YG, 0, 0, 0}};
1092 const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
1093     {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1094     {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1095     {VG, UG, VG, UG, VG, UG, VG, UG},
1096     {VG, UG, VG, UG, VG, UG, VG, UG},
1097     {BR, BG, BB, 0, 0, 0, 0, 0},
1098     {0x0101 * YG, 0, 0, 0}};
1099 #elif defined(__arm__)  // 32 bit arm
1100 const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
1101     {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
1102     {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
1103     {BB, BG, BR, 0, 0, 0, 0, 0},
1104     {0x0101 * YG, 0, 0, 0}};
1105 const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
1106     {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
1107     {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
1108     {BR, BG, BB, 0, 0, 0, 0, 0},
1109     {0x0101 * YG, 0, 0, 0}};
1110 #else
1111 const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
1112     {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
1113      UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},
1114     {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
1115      UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG},
1116     {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
1117      0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},
1118     {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1119     {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1120     {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1121     {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
1122 const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
1123     {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
1124      VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
1125     {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
1126      VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG},
1127     {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
1128      0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB},
1129     {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1130     {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1131     {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1132     {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
1133 #endif
1134 
1135 #undef BB
1136 #undef BG
1137 #undef BR
1138 #undef YGB
1139 #undef UB
1140 #undef UG
1141 #undef VG
1142 #undef VR
1143 #undef YG
1144 
1145 // JPEG YUV to RGB reference
1146 // *  R = Y                - V * -1.40200
1147 // *  G = Y - U *  0.34414 - V *  0.71414
1148 // *  B = Y - U * -1.77200
1149 
1150 // Y contribution to R,G,B.  Scale and bias.
1151 #define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */
1152 #define YGB 32   /* 64 / 2 */
1153 
1154 // U and V contributions to R,G,B.
1155 #define UB -113 /* round(-1.77200 * 64) */
1156 #define UG 22   /* round(0.34414 * 64) */
1157 #define VG 46   /* round(0.71414  * 64) */
1158 #define VR -90  /* round(-1.40200 * 64) */
1159 
1160 // Bias values to round, and subtract 128 from U and V.
1161 #define BB (UB * 128 + YGB)
1162 #define BG (UG * 128 + VG * 128 + YGB)
1163 #define BR (VR * 128 + YGB)
1164 
1165 #if defined(__aarch64__)
1166 const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
1167     {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1168     {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1169     {UG, VG, UG, VG, UG, VG, UG, VG},
1170     {UG, VG, UG, VG, UG, VG, UG, VG},
1171     {BB, BG, BR, 0, 0, 0, 0, 0},
1172     {0x0101 * YG, 0, 0, 0}};
1173 const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
1174     {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1175     {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1176     {VG, UG, VG, UG, VG, UG, VG, UG},
1177     {VG, UG, VG, UG, VG, UG, VG, UG},
1178     {BR, BG, BB, 0, 0, 0, 0, 0},
1179     {0x0101 * YG, 0, 0, 0}};
1180 #elif defined(__arm__)
1181 const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
1182     {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
1183     {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
1184     {BB, BG, BR, 0, 0, 0, 0, 0},
1185     {0x0101 * YG, 0, 0, 0}};
1186 const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
1187     {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
1188     {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
1189     {BR, BG, BB, 0, 0, 0, 0, 0},
1190     {0x0101 * YG, 0, 0, 0}};
1191 #else
1192 const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
1193     {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
1194      UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},
1195     {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
1196      UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG},
1197     {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
1198      0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},
1199     {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1200     {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1201     {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1202     {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
1203 const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
1204     {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
1205      VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
1206     {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
1207      VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG},
1208     {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
1209      0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB},
1210     {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1211     {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1212     {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1213     {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
1214 #endif
1215 
1216 #undef BB
1217 #undef BG
1218 #undef BR
1219 #undef YGB
1220 #undef UB
1221 #undef UG
1222 #undef VG
1223 #undef VR
1224 #undef YG
1225 
1226 // BT.709 YUV to RGB reference
1227 //  R = (Y - 16) * 1.164              - V * -1.793
1228 //  G = (Y - 16) * 1.164 - U *  0.213 - V *  0.533
1229 //  B = (Y - 16) * 1.164 - U * -2.112
1230 // See also http://www.equasys.de/colorconversion.html
1231 
1232 // Y contribution to R,G,B.  Scale and bias.
1233 #define YG 18997  /* round(1.164 * 64 * 256 * 256 / 257) */
1234 #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
1235 
1236 // TODO(fbarchard): Find way to express 2.112 instead of 2.0.
1237 // U and V contributions to R,G,B.
1238 #define UB -128 /* max(-128, round(-2.112 * 64)) */
1239 #define UG 14   /* round(0.213 * 64) */
1240 #define VG 34   /* round(0.533  * 64) */
1241 #define VR -115 /* round(-1.793 * 64) */
1242 
1243 // Bias values to round, and subtract 128 from U and V.
1244 #define BB (UB * 128 + YGB)
1245 #define BG (UG * 128 + VG * 128 + YGB)
1246 #define BR (VR * 128 + YGB)
1247 
1248 #if defined(__aarch64__)
1249 const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
1250     {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1251     {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1252     {UG, VG, UG, VG, UG, VG, UG, VG},
1253     {UG, VG, UG, VG, UG, VG, UG, VG},
1254     {BB, BG, BR, 0, 0, 0, 0, 0},
1255     {0x0101 * YG, 0, 0, 0}};
1256 const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
1257     {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1258     {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1259     {VG, UG, VG, UG, VG, UG, VG, UG},
1260     {VG, UG, VG, UG, VG, UG, VG, UG},
1261     {BR, BG, BB, 0, 0, 0, 0, 0},
1262     {0x0101 * YG, 0, 0, 0}};
1263 #elif defined(__arm__)
1264 const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
1265     {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
1266     {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
1267     {BB, BG, BR, 0, 0, 0, 0, 0},
1268     {0x0101 * YG, 0, 0, 0}};
1269 const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
1270     {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
1271     {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
1272     {BR, BG, BB, 0, 0, 0, 0, 0},
1273     {0x0101 * YG, 0, 0, 0}};
1274 #else
1275 const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
1276     {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
1277      UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},
1278     {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
1279      UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG},
1280     {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
1281      0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},
1282     {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1283     {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1284     {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1285     {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
1286 const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
1287     {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
1288      VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
1289     {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
1290      VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG},
1291     {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
1292      0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB},
1293     {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1294     {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1295     {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1296     {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
1297 #endif
1298 
1299 #undef BB
1300 #undef BG
1301 #undef BR
1302 #undef YGB
1303 #undef UB
1304 #undef UG
1305 #undef VG
1306 #undef VR
1307 #undef YG
1308 
1309 // C reference code that mimics the YUV assembly.
1310 // Reads 8 bit YUV and leaves result as 16 bit.
1311 
YuvPixel(uint8_t y,uint8_t u,uint8_t v,uint8_t * b,uint8_t * g,uint8_t * r,const struct YuvConstants * yuvconstants)1312 static __inline void YuvPixel(uint8_t y,
1313                               uint8_t u,
1314                               uint8_t v,
1315                               uint8_t* b,
1316                               uint8_t* g,
1317                               uint8_t* r,
1318                               const struct YuvConstants* yuvconstants) {
1319 #if defined(__aarch64__)
1320   int ub = -yuvconstants->kUVToRB[0];
1321   int ug = yuvconstants->kUVToG[0];
1322   int vg = yuvconstants->kUVToG[1];
1323   int vr = -yuvconstants->kUVToRB[1];
1324   int bb = yuvconstants->kUVBiasBGR[0];
1325   int bg = yuvconstants->kUVBiasBGR[1];
1326   int br = yuvconstants->kUVBiasBGR[2];
1327   int yg = yuvconstants->kYToRgb[0] / 0x0101;
1328 #elif defined(__arm__)
1329   int ub = -yuvconstants->kUVToRB[0];
1330   int ug = yuvconstants->kUVToG[0];
1331   int vg = yuvconstants->kUVToG[4];
1332   int vr = -yuvconstants->kUVToRB[4];
1333   int bb = yuvconstants->kUVBiasBGR[0];
1334   int bg = yuvconstants->kUVBiasBGR[1];
1335   int br = yuvconstants->kUVBiasBGR[2];
1336   int yg = yuvconstants->kYToRgb[0] / 0x0101;
1337 #else
1338   int ub = yuvconstants->kUVToB[0];
1339   int ug = yuvconstants->kUVToG[0];
1340   int vg = yuvconstants->kUVToG[1];
1341   int vr = yuvconstants->kUVToR[1];
1342   int bb = yuvconstants->kUVBiasB[0];
1343   int bg = yuvconstants->kUVBiasG[0];
1344   int br = yuvconstants->kUVBiasR[0];
1345   int yg = yuvconstants->kYToRgb[0];
1346 #endif
1347 
1348   uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
1349   *b = Clamp((int32_t)(-(u * ub) + y1 + bb) >> 6);
1350   *g = Clamp((int32_t)(-(u * ug + v * vg) + y1 + bg) >> 6);
1351   *r = Clamp((int32_t)(-(v * vr) + y1 + br) >> 6);
1352 }
1353 
1354 // Reads 8 bit YUV and leaves result as 16 bit.
YuvPixel8_16(uint8_t y,uint8_t u,uint8_t v,int * b,int * g,int * r,const struct YuvConstants * yuvconstants)1355 static __inline void YuvPixel8_16(uint8_t y,
1356                                   uint8_t u,
1357                                   uint8_t v,
1358                                   int* b,
1359                                   int* g,
1360                                   int* r,
1361                                   const struct YuvConstants* yuvconstants) {
1362 #if defined(__aarch64__)
1363   int ub = -yuvconstants->kUVToRB[0];
1364   int ug = yuvconstants->kUVToG[0];
1365   int vg = yuvconstants->kUVToG[1];
1366   int vr = -yuvconstants->kUVToRB[1];
1367   int bb = yuvconstants->kUVBiasBGR[0];
1368   int bg = yuvconstants->kUVBiasBGR[1];
1369   int br = yuvconstants->kUVBiasBGR[2];
1370   int yg = yuvconstants->kYToRgb[0] / 0x0101;
1371 #elif defined(__arm__)
1372   int ub = -yuvconstants->kUVToRB[0];
1373   int ug = yuvconstants->kUVToG[0];
1374   int vg = yuvconstants->kUVToG[4];
1375   int vr = -yuvconstants->kUVToRB[4];
1376   int bb = yuvconstants->kUVBiasBGR[0];
1377   int bg = yuvconstants->kUVBiasBGR[1];
1378   int br = yuvconstants->kUVBiasBGR[2];
1379   int yg = yuvconstants->kYToRgb[0] / 0x0101;
1380 #else
1381   int ub = yuvconstants->kUVToB[0];
1382   int ug = yuvconstants->kUVToG[0];
1383   int vg = yuvconstants->kUVToG[1];
1384   int vr = yuvconstants->kUVToR[1];
1385   int bb = yuvconstants->kUVBiasB[0];
1386   int bg = yuvconstants->kUVBiasG[0];
1387   int br = yuvconstants->kUVBiasR[0];
1388   int yg = yuvconstants->kYToRgb[0];
1389 #endif
1390 
1391   uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
1392   *b = (int)(-(u * ub) + y1 + bb);
1393   *g = (int)(-(u * ug + v * vg) + y1 + bg);
1394   *r = (int)(-(v * vr) + y1 + br);
1395 }
1396 
1397 // C reference code that mimics the YUV 16 bit assembly.
1398 // Reads 10 bit YUV and leaves result as 16 bit.
YuvPixel16(int16_t y,int16_t u,int16_t v,int * b,int * g,int * r,const struct YuvConstants * yuvconstants)1399 static __inline void YuvPixel16(int16_t y,
1400                                 int16_t u,
1401                                 int16_t v,
1402                                 int* b,
1403                                 int* g,
1404                                 int* r,
1405                                 const struct YuvConstants* yuvconstants) {
1406 #if defined(__aarch64__)
1407   int ub = -yuvconstants->kUVToRB[0];
1408   int ug = yuvconstants->kUVToG[0];
1409   int vg = yuvconstants->kUVToG[1];
1410   int vr = -yuvconstants->kUVToRB[1];
1411   int bb = yuvconstants->kUVBiasBGR[0];
1412   int bg = yuvconstants->kUVBiasBGR[1];
1413   int br = yuvconstants->kUVBiasBGR[2];
1414   int yg = yuvconstants->kYToRgb[0] / 0x0101;
1415 #elif defined(__arm__)
1416   int ub = -yuvconstants->kUVToRB[0];
1417   int ug = yuvconstants->kUVToG[0];
1418   int vg = yuvconstants->kUVToG[4];
1419   int vr = -yuvconstants->kUVToRB[4];
1420   int bb = yuvconstants->kUVBiasBGR[0];
1421   int bg = yuvconstants->kUVBiasBGR[1];
1422   int br = yuvconstants->kUVBiasBGR[2];
1423   int yg = yuvconstants->kYToRgb[0] / 0x0101;
1424 #else
1425   int ub = yuvconstants->kUVToB[0];
1426   int ug = yuvconstants->kUVToG[0];
1427   int vg = yuvconstants->kUVToG[1];
1428   int vr = yuvconstants->kUVToR[1];
1429   int bb = yuvconstants->kUVBiasB[0];
1430   int bg = yuvconstants->kUVBiasG[0];
1431   int br = yuvconstants->kUVBiasR[0];
1432   int yg = yuvconstants->kYToRgb[0];
1433 #endif
1434 
1435   uint32_t y1 = (uint32_t)((y << 6) * yg) >> 16;
1436   u = clamp255(u >> 2);
1437   v = clamp255(v >> 2);
1438   *b = (int)(-(u * ub) + y1 + bb);
1439   *g = (int)(-(u * ug + v * vg) + y1 + bg);
1440   *r = (int)(-(v * vr) + y1 + br);
1441 }
1442 
1443 // C reference code that mimics the YUV 10 bit assembly.
1444 // Reads 10 bit YUV and clamps down to 8 bit RGB.
YuvPixel10(uint16_t y,uint16_t u,uint16_t v,uint8_t * b,uint8_t * g,uint8_t * r,const struct YuvConstants * yuvconstants)1445 static __inline void YuvPixel10(uint16_t y,
1446                                 uint16_t u,
1447                                 uint16_t v,
1448                                 uint8_t* b,
1449                                 uint8_t* g,
1450                                 uint8_t* r,
1451                                 const struct YuvConstants* yuvconstants) {
1452   int b16;
1453   int g16;
1454   int r16;
1455   YuvPixel16(y, u, v, &b16, &g16, &r16, yuvconstants);
1456   *b = Clamp(b16 >> 6);
1457   *g = Clamp(g16 >> 6);
1458   *r = Clamp(r16 >> 6);
1459 }
1460 
1461 // Y contribution to R,G,B.  Scale and bias.
1462 #define YG 18997  /* round(1.164 * 64 * 256 * 256 / 257) */
1463 #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
1464 
1465 // C reference code that mimics the YUV assembly.
YPixel(uint8_t y,uint8_t * b,uint8_t * g,uint8_t * r)1466 static __inline void YPixel(uint8_t y, uint8_t* b, uint8_t* g, uint8_t* r) {
1467   uint32_t y1 = (uint32_t)(y * 0x0101 * YG) >> 16;
1468   *b = Clamp((int32_t)(y1 + YGB) >> 6);
1469   *g = Clamp((int32_t)(y1 + YGB) >> 6);
1470   *r = Clamp((int32_t)(y1 + YGB) >> 6);
1471 }
1472 
1473 #undef YG
1474 #undef YGB
1475 
1476 #if !defined(LIBYUV_DISABLE_NEON) && \
1477     (defined(__ARM_NEON__) || defined(__aarch64__) || defined(LIBYUV_NEON))
1478 // C mimic assembly.
1479 // TODO(fbarchard): Remove subsampling from Neon.
I444ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1480 void I444ToARGBRow_C(const uint8_t* src_y,
1481                      const uint8_t* src_u,
1482                      const uint8_t* src_v,
1483                      uint8_t* rgb_buf,
1484                      const struct YuvConstants* yuvconstants,
1485                      int width) {
1486   int x;
1487   for (x = 0; x < width - 1; x += 2) {
1488     uint8_t u = (src_u[0] + src_u[1] + 1) >> 1;
1489     uint8_t v = (src_v[0] + src_v[1] + 1) >> 1;
1490     YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2,
1491              yuvconstants);
1492     rgb_buf[3] = 255;
1493     YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6,
1494              yuvconstants);
1495     rgb_buf[7] = 255;
1496     src_y += 2;
1497     src_u += 2;
1498     src_v += 2;
1499     rgb_buf += 8;  // Advance 2 pixels.
1500   }
1501   if (width & 1) {
1502     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1503              rgb_buf + 2, yuvconstants);
1504     rgb_buf[3] = 255;
1505   }
1506 }
1507 #else
I444ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1508 void I444ToARGBRow_C(const uint8_t* src_y,
1509                      const uint8_t* src_u,
1510                      const uint8_t* src_v,
1511                      uint8_t* rgb_buf,
1512                      const struct YuvConstants* yuvconstants,
1513                      int width) {
1514   int x;
1515   for (x = 0; x < width; ++x) {
1516     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1517              rgb_buf + 2, yuvconstants);
1518     rgb_buf[3] = 255;
1519     src_y += 1;
1520     src_u += 1;
1521     src_v += 1;
1522     rgb_buf += 4;  // Advance 1 pixel.
1523   }
1524 }
1525 #endif
1526 
1527 // Also used for 420
I422ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1528 void I422ToARGBRow_C(const uint8_t* src_y,
1529                      const uint8_t* src_u,
1530                      const uint8_t* src_v,
1531                      uint8_t* rgb_buf,
1532                      const struct YuvConstants* yuvconstants,
1533                      int width) {
1534   int x;
1535   for (x = 0; x < width - 1; x += 2) {
1536     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1537              rgb_buf + 2, yuvconstants);
1538     rgb_buf[3] = 255;
1539     YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
1540              rgb_buf + 6, yuvconstants);
1541     rgb_buf[7] = 255;
1542     src_y += 2;
1543     src_u += 1;
1544     src_v += 1;
1545     rgb_buf += 8;  // Advance 2 pixels.
1546   }
1547   if (width & 1) {
1548     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1549              rgb_buf + 2, yuvconstants);
1550     rgb_buf[3] = 255;
1551   }
1552 }
1553 
1554 // 10 bit YUV to ARGB
I210ToARGBRow_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1555 void I210ToARGBRow_C(const uint16_t* src_y,
1556                      const uint16_t* src_u,
1557                      const uint16_t* src_v,
1558                      uint8_t* rgb_buf,
1559                      const struct YuvConstants* yuvconstants,
1560                      int width) {
1561   int x;
1562   for (x = 0; x < width - 1; x += 2) {
1563     YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1564                rgb_buf + 2, yuvconstants);
1565     rgb_buf[3] = 255;
1566     YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
1567                rgb_buf + 6, yuvconstants);
1568     rgb_buf[7] = 255;
1569     src_y += 2;
1570     src_u += 1;
1571     src_v += 1;
1572     rgb_buf += 8;  // Advance 2 pixels.
1573   }
1574   if (width & 1) {
1575     YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1576                rgb_buf + 2, yuvconstants);
1577     rgb_buf[3] = 255;
1578   }
1579 }
1580 
StoreAR30(uint8_t * rgb_buf,int b,int g,int r)1581 static void StoreAR30(uint8_t* rgb_buf, int b, int g, int r) {
1582   uint32_t ar30;
1583   b = b >> 4;  // convert 10.6 to 10 bit.
1584   g = g >> 4;
1585   r = r >> 4;
1586   b = Clamp10(b);
1587   g = Clamp10(g);
1588   r = Clamp10(r);
1589   ar30 = b | ((uint32_t)g << 10) | ((uint32_t)r << 20) | 0xc0000000;
1590   (*(uint32_t*)rgb_buf) = ar30;
1591 }
1592 
1593 // 10 bit YUV to 10 bit AR30
I210ToAR30Row_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1594 void I210ToAR30Row_C(const uint16_t* src_y,
1595                      const uint16_t* src_u,
1596                      const uint16_t* src_v,
1597                      uint8_t* rgb_buf,
1598                      const struct YuvConstants* yuvconstants,
1599                      int width) {
1600   int x;
1601   int b;
1602   int g;
1603   int r;
1604   for (x = 0; x < width - 1; x += 2) {
1605     YuvPixel16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
1606     StoreAR30(rgb_buf, b, g, r);
1607     YuvPixel16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
1608     StoreAR30(rgb_buf + 4, b, g, r);
1609     src_y += 2;
1610     src_u += 1;
1611     src_v += 1;
1612     rgb_buf += 8;  // Advance 2 pixels.
1613   }
1614   if (width & 1) {
1615     YuvPixel16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
1616     StoreAR30(rgb_buf, b, g, r);
1617   }
1618 }
1619 
1620 // 8 bit YUV to 10 bit AR30
1621 // Uses same code as 10 bit YUV bit shifts the 8 bit values up to 10 bits.
I422ToAR30Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1622 void I422ToAR30Row_C(const uint8_t* src_y,
1623                      const uint8_t* src_u,
1624                      const uint8_t* src_v,
1625                      uint8_t* rgb_buf,
1626                      const struct YuvConstants* yuvconstants,
1627                      int width) {
1628   int x;
1629   int b;
1630   int g;
1631   int r;
1632   for (x = 0; x < width - 1; x += 2) {
1633     YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
1634     StoreAR30(rgb_buf, b, g, r);
1635     YuvPixel8_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
1636     StoreAR30(rgb_buf + 4, b, g, r);
1637     src_y += 2;
1638     src_u += 1;
1639     src_v += 1;
1640     rgb_buf += 8;  // Advance 2 pixels.
1641   }
1642   if (width & 1) {
1643     YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
1644     StoreAR30(rgb_buf, b, g, r);
1645   }
1646 }
1647 
I422AlphaToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,const uint8_t * src_a,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1648 void I422AlphaToARGBRow_C(const uint8_t* src_y,
1649                           const uint8_t* src_u,
1650                           const uint8_t* src_v,
1651                           const uint8_t* src_a,
1652                           uint8_t* rgb_buf,
1653                           const struct YuvConstants* yuvconstants,
1654                           int width) {
1655   int x;
1656   for (x = 0; x < width - 1; x += 2) {
1657     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1658              rgb_buf + 2, yuvconstants);
1659     rgb_buf[3] = src_a[0];
1660     YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
1661              rgb_buf + 6, yuvconstants);
1662     rgb_buf[7] = src_a[1];
1663     src_y += 2;
1664     src_u += 1;
1665     src_v += 1;
1666     src_a += 2;
1667     rgb_buf += 8;  // Advance 2 pixels.
1668   }
1669   if (width & 1) {
1670     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1671              rgb_buf + 2, yuvconstants);
1672     rgb_buf[3] = src_a[0];
1673   }
1674 }
1675 
I422ToRGB24Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1676 void I422ToRGB24Row_C(const uint8_t* src_y,
1677                       const uint8_t* src_u,
1678                       const uint8_t* src_v,
1679                       uint8_t* rgb_buf,
1680                       const struct YuvConstants* yuvconstants,
1681                       int width) {
1682   int x;
1683   for (x = 0; x < width - 1; x += 2) {
1684     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1685              rgb_buf + 2, yuvconstants);
1686     YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 3, rgb_buf + 4,
1687              rgb_buf + 5, yuvconstants);
1688     src_y += 2;
1689     src_u += 1;
1690     src_v += 1;
1691     rgb_buf += 6;  // Advance 2 pixels.
1692   }
1693   if (width & 1) {
1694     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1695              rgb_buf + 2, yuvconstants);
1696   }
1697 }
1698 
I422ToARGB4444Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb4444,const struct YuvConstants * yuvconstants,int width)1699 void I422ToARGB4444Row_C(const uint8_t* src_y,
1700                          const uint8_t* src_u,
1701                          const uint8_t* src_v,
1702                          uint8_t* dst_argb4444,
1703                          const struct YuvConstants* yuvconstants,
1704                          int width) {
1705   uint8_t b0;
1706   uint8_t g0;
1707   uint8_t r0;
1708   uint8_t b1;
1709   uint8_t g1;
1710   uint8_t r1;
1711   int x;
1712   for (x = 0; x < width - 1; x += 2) {
1713     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
1714     YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
1715     b0 = b0 >> 4;
1716     g0 = g0 >> 4;
1717     r0 = r0 >> 4;
1718     b1 = b1 >> 4;
1719     g1 = g1 >> 4;
1720     r1 = r1 >> 4;
1721     *(uint32_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | (b1 << 16) |
1722                                  (g1 << 20) | (r1 << 24) | 0xf000f000;
1723     src_y += 2;
1724     src_u += 1;
1725     src_v += 1;
1726     dst_argb4444 += 4;  // Advance 2 pixels.
1727   }
1728   if (width & 1) {
1729     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
1730     b0 = b0 >> 4;
1731     g0 = g0 >> 4;
1732     r0 = r0 >> 4;
1733     *(uint16_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | 0xf000;
1734   }
1735 }
1736 
I422ToARGB1555Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb1555,const struct YuvConstants * yuvconstants,int width)1737 void I422ToARGB1555Row_C(const uint8_t* src_y,
1738                          const uint8_t* src_u,
1739                          const uint8_t* src_v,
1740                          uint8_t* dst_argb1555,
1741                          const struct YuvConstants* yuvconstants,
1742                          int width) {
1743   uint8_t b0;
1744   uint8_t g0;
1745   uint8_t r0;
1746   uint8_t b1;
1747   uint8_t g1;
1748   uint8_t r1;
1749   int x;
1750   for (x = 0; x < width - 1; x += 2) {
1751     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
1752     YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
1753     b0 = b0 >> 3;
1754     g0 = g0 >> 3;
1755     r0 = r0 >> 3;
1756     b1 = b1 >> 3;
1757     g1 = g1 >> 3;
1758     r1 = r1 >> 3;
1759     *(uint32_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | (b1 << 16) |
1760                                  (g1 << 21) | (r1 << 26) | 0x80008000;
1761     src_y += 2;
1762     src_u += 1;
1763     src_v += 1;
1764     dst_argb1555 += 4;  // Advance 2 pixels.
1765   }
1766   if (width & 1) {
1767     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
1768     b0 = b0 >> 3;
1769     g0 = g0 >> 3;
1770     r0 = r0 >> 3;
1771     *(uint16_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | 0x8000;
1772   }
1773 }
1774 
I422ToRGB565Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)1775 void I422ToRGB565Row_C(const uint8_t* src_y,
1776                        const uint8_t* src_u,
1777                        const uint8_t* src_v,
1778                        uint8_t* dst_rgb565,
1779                        const struct YuvConstants* yuvconstants,
1780                        int width) {
1781   uint8_t b0;
1782   uint8_t g0;
1783   uint8_t r0;
1784   uint8_t b1;
1785   uint8_t g1;
1786   uint8_t r1;
1787   int x;
1788   for (x = 0; x < width - 1; x += 2) {
1789     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
1790     YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
1791     b0 = b0 >> 3;
1792     g0 = g0 >> 2;
1793     r0 = r0 >> 3;
1794     b1 = b1 >> 3;
1795     g1 = g1 >> 2;
1796     r1 = r1 >> 3;
1797     *(uint32_t*)(dst_rgb565) =
1798         b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27);
1799     src_y += 2;
1800     src_u += 1;
1801     src_v += 1;
1802     dst_rgb565 += 4;  // Advance 2 pixels.
1803   }
1804   if (width & 1) {
1805     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
1806     b0 = b0 >> 3;
1807     g0 = g0 >> 2;
1808     r0 = r0 >> 3;
1809     *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
1810   }
1811 }
1812 
NV12ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1813 void NV12ToARGBRow_C(const uint8_t* src_y,
1814                      const uint8_t* src_uv,
1815                      uint8_t* rgb_buf,
1816                      const struct YuvConstants* yuvconstants,
1817                      int width) {
1818   int x;
1819   for (x = 0; x < width - 1; x += 2) {
1820     YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
1821              rgb_buf + 2, yuvconstants);
1822     rgb_buf[3] = 255;
1823     YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 4, rgb_buf + 5,
1824              rgb_buf + 6, yuvconstants);
1825     rgb_buf[7] = 255;
1826     src_y += 2;
1827     src_uv += 2;
1828     rgb_buf += 8;  // Advance 2 pixels.
1829   }
1830   if (width & 1) {
1831     YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
1832              rgb_buf + 2, yuvconstants);
1833     rgb_buf[3] = 255;
1834   }
1835 }
1836 
NV21ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1837 void NV21ToARGBRow_C(const uint8_t* src_y,
1838                      const uint8_t* src_vu,
1839                      uint8_t* rgb_buf,
1840                      const struct YuvConstants* yuvconstants,
1841                      int width) {
1842   int x;
1843   for (x = 0; x < width - 1; x += 2) {
1844     YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
1845              rgb_buf + 2, yuvconstants);
1846     rgb_buf[3] = 255;
1847     YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 4, rgb_buf + 5,
1848              rgb_buf + 6, yuvconstants);
1849     rgb_buf[7] = 255;
1850     src_y += 2;
1851     src_vu += 2;
1852     rgb_buf += 8;  // Advance 2 pixels.
1853   }
1854   if (width & 1) {
1855     YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
1856              rgb_buf + 2, yuvconstants);
1857     rgb_buf[3] = 255;
1858   }
1859 }
1860 
NV12ToRGB24Row_C(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1861 void NV12ToRGB24Row_C(const uint8_t* src_y,
1862                       const uint8_t* src_uv,
1863                       uint8_t* rgb_buf,
1864                       const struct YuvConstants* yuvconstants,
1865                       int width) {
1866   int x;
1867   for (x = 0; x < width - 1; x += 2) {
1868     YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
1869              rgb_buf + 2, yuvconstants);
1870     YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 3, rgb_buf + 4,
1871              rgb_buf + 5, yuvconstants);
1872     src_y += 2;
1873     src_uv += 2;
1874     rgb_buf += 6;  // Advance 2 pixels.
1875   }
1876   if (width & 1) {
1877     YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
1878              rgb_buf + 2, yuvconstants);
1879   }
1880 }
1881 
NV21ToRGB24Row_C(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1882 void NV21ToRGB24Row_C(const uint8_t* src_y,
1883                       const uint8_t* src_vu,
1884                       uint8_t* rgb_buf,
1885                       const struct YuvConstants* yuvconstants,
1886                       int width) {
1887   int x;
1888   for (x = 0; x < width - 1; x += 2) {
1889     YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
1890              rgb_buf + 2, yuvconstants);
1891     YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 3, rgb_buf + 4,
1892              rgb_buf + 5, yuvconstants);
1893     src_y += 2;
1894     src_vu += 2;
1895     rgb_buf += 6;  // Advance 2 pixels.
1896   }
1897   if (width & 1) {
1898     YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
1899              rgb_buf + 2, yuvconstants);
1900   }
1901 }
1902 
NV12ToRGB565Row_C(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)1903 void NV12ToRGB565Row_C(const uint8_t* src_y,
1904                        const uint8_t* src_uv,
1905                        uint8_t* dst_rgb565,
1906                        const struct YuvConstants* yuvconstants,
1907                        int width) {
1908   uint8_t b0;
1909   uint8_t g0;
1910   uint8_t r0;
1911   uint8_t b1;
1912   uint8_t g1;
1913   uint8_t r1;
1914   int x;
1915   for (x = 0; x < width - 1; x += 2) {
1916     YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
1917     YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1, yuvconstants);
1918     b0 = b0 >> 3;
1919     g0 = g0 >> 2;
1920     r0 = r0 >> 3;
1921     b1 = b1 >> 3;
1922     g1 = g1 >> 2;
1923     r1 = r1 >> 3;
1924     *(uint32_t*)(dst_rgb565) =
1925         b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27);
1926     src_y += 2;
1927     src_uv += 2;
1928     dst_rgb565 += 4;  // Advance 2 pixels.
1929   }
1930   if (width & 1) {
1931     YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
1932     b0 = b0 >> 3;
1933     g0 = g0 >> 2;
1934     r0 = r0 >> 3;
1935     *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
1936   }
1937 }
1938 
YUY2ToARGBRow_C(const uint8_t * src_yuy2,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1939 void YUY2ToARGBRow_C(const uint8_t* src_yuy2,
1940                      uint8_t* rgb_buf,
1941                      const struct YuvConstants* yuvconstants,
1942                      int width) {
1943   int x;
1944   for (x = 0; x < width - 1; x += 2) {
1945     YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
1946              rgb_buf + 2, yuvconstants);
1947     rgb_buf[3] = 255;
1948     YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3], rgb_buf + 4, rgb_buf + 5,
1949              rgb_buf + 6, yuvconstants);
1950     rgb_buf[7] = 255;
1951     src_yuy2 += 4;
1952     rgb_buf += 8;  // Advance 2 pixels.
1953   }
1954   if (width & 1) {
1955     YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
1956              rgb_buf + 2, yuvconstants);
1957     rgb_buf[3] = 255;
1958   }
1959 }
1960 
UYVYToARGBRow_C(const uint8_t * src_uyvy,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1961 void UYVYToARGBRow_C(const uint8_t* src_uyvy,
1962                      uint8_t* rgb_buf,
1963                      const struct YuvConstants* yuvconstants,
1964                      int width) {
1965   int x;
1966   for (x = 0; x < width - 1; x += 2) {
1967     YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
1968              rgb_buf + 2, yuvconstants);
1969     rgb_buf[3] = 255;
1970     YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2], rgb_buf + 4, rgb_buf + 5,
1971              rgb_buf + 6, yuvconstants);
1972     rgb_buf[7] = 255;
1973     src_uyvy += 4;
1974     rgb_buf += 8;  // Advance 2 pixels.
1975   }
1976   if (width & 1) {
1977     YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
1978              rgb_buf + 2, yuvconstants);
1979     rgb_buf[3] = 255;
1980   }
1981 }
1982 
I422ToRGBARow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1983 void I422ToRGBARow_C(const uint8_t* src_y,
1984                      const uint8_t* src_u,
1985                      const uint8_t* src_v,
1986                      uint8_t* rgb_buf,
1987                      const struct YuvConstants* yuvconstants,
1988                      int width) {
1989   int x;
1990   for (x = 0; x < width - 1; x += 2) {
1991     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
1992              rgb_buf + 3, yuvconstants);
1993     rgb_buf[0] = 255;
1994     YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 5, rgb_buf + 6,
1995              rgb_buf + 7, yuvconstants);
1996     rgb_buf[4] = 255;
1997     src_y += 2;
1998     src_u += 1;
1999     src_v += 1;
2000     rgb_buf += 8;  // Advance 2 pixels.
2001   }
2002   if (width & 1) {
2003     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
2004              rgb_buf + 3, yuvconstants);
2005     rgb_buf[0] = 255;
2006   }
2007 }
2008 
I400ToARGBRow_C(const uint8_t * src_y,uint8_t * rgb_buf,int width)2009 void I400ToARGBRow_C(const uint8_t* src_y, uint8_t* rgb_buf, int width) {
2010   int x;
2011   for (x = 0; x < width - 1; x += 2) {
2012     YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
2013     rgb_buf[3] = 255;
2014     YPixel(src_y[1], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
2015     rgb_buf[7] = 255;
2016     src_y += 2;
2017     rgb_buf += 8;  // Advance 2 pixels.
2018   }
2019   if (width & 1) {
2020     YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
2021     rgb_buf[3] = 255;
2022   }
2023 }
2024 
MirrorRow_C(const uint8_t * src,uint8_t * dst,int width)2025 void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
2026   int x;
2027   src += width - 1;
2028   for (x = 0; x < width - 1; x += 2) {
2029     dst[x] = src[0];
2030     dst[x + 1] = src[-1];
2031     src -= 2;
2032   }
2033   if (width & 1) {
2034     dst[width - 1] = src[0];
2035   }
2036 }
2037 
MirrorUVRow_C(const uint8_t * src_uv,uint8_t * dst_u,uint8_t * dst_v,int width)2038 void MirrorUVRow_C(const uint8_t* src_uv,
2039                    uint8_t* dst_u,
2040                    uint8_t* dst_v,
2041                    int width) {
2042   int x;
2043   src_uv += (width - 1) << 1;
2044   for (x = 0; x < width - 1; x += 2) {
2045     dst_u[x] = src_uv[0];
2046     dst_u[x + 1] = src_uv[-2];
2047     dst_v[x] = src_uv[1];
2048     dst_v[x + 1] = src_uv[-2 + 1];
2049     src_uv -= 4;
2050   }
2051   if (width & 1) {
2052     dst_u[width - 1] = src_uv[0];
2053     dst_v[width - 1] = src_uv[1];
2054   }
2055 }
2056 
ARGBMirrorRow_C(const uint8_t * src,uint8_t * dst,int width)2057 void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
2058   int x;
2059   const uint32_t* src32 = (const uint32_t*)(src);
2060   uint32_t* dst32 = (uint32_t*)(dst);
2061   src32 += width - 1;
2062   for (x = 0; x < width - 1; x += 2) {
2063     dst32[x] = src32[0];
2064     dst32[x + 1] = src32[-1];
2065     src32 -= 2;
2066   }
2067   if (width & 1) {
2068     dst32[width - 1] = src32[0];
2069   }
2070 }
2071 
SplitUVRow_C(const uint8_t * src_uv,uint8_t * dst_u,uint8_t * dst_v,int width)2072 void SplitUVRow_C(const uint8_t* src_uv,
2073                   uint8_t* dst_u,
2074                   uint8_t* dst_v,
2075                   int width) {
2076   int x;
2077   for (x = 0; x < width - 1; x += 2) {
2078     dst_u[x] = src_uv[0];
2079     dst_u[x + 1] = src_uv[2];
2080     dst_v[x] = src_uv[1];
2081     dst_v[x + 1] = src_uv[3];
2082     src_uv += 4;
2083   }
2084   if (width & 1) {
2085     dst_u[width - 1] = src_uv[0];
2086     dst_v[width - 1] = src_uv[1];
2087   }
2088 }
2089 
MergeUVRow_C(const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_uv,int width)2090 void MergeUVRow_C(const uint8_t* src_u,
2091                   const uint8_t* src_v,
2092                   uint8_t* dst_uv,
2093                   int width) {
2094   int x;
2095   for (x = 0; x < width - 1; x += 2) {
2096     dst_uv[0] = src_u[x];
2097     dst_uv[1] = src_v[x];
2098     dst_uv[2] = src_u[x + 1];
2099     dst_uv[3] = src_v[x + 1];
2100     dst_uv += 4;
2101   }
2102   if (width & 1) {
2103     dst_uv[0] = src_u[width - 1];
2104     dst_uv[1] = src_v[width - 1];
2105   }
2106 }
2107 
SplitRGBRow_C(const uint8_t * src_rgb,uint8_t * dst_r,uint8_t * dst_g,uint8_t * dst_b,int width)2108 void SplitRGBRow_C(const uint8_t* src_rgb,
2109                    uint8_t* dst_r,
2110                    uint8_t* dst_g,
2111                    uint8_t* dst_b,
2112                    int width) {
2113   int x;
2114   for (x = 0; x < width; ++x) {
2115     dst_r[x] = src_rgb[0];
2116     dst_g[x] = src_rgb[1];
2117     dst_b[x] = src_rgb[2];
2118     src_rgb += 3;
2119   }
2120 }
2121 
MergeRGBRow_C(const uint8_t * src_r,const uint8_t * src_g,const uint8_t * src_b,uint8_t * dst_rgb,int width)2122 void MergeRGBRow_C(const uint8_t* src_r,
2123                    const uint8_t* src_g,
2124                    const uint8_t* src_b,
2125                    uint8_t* dst_rgb,
2126                    int width) {
2127   int x;
2128   for (x = 0; x < width; ++x) {
2129     dst_rgb[0] = src_r[x];
2130     dst_rgb[1] = src_g[x];
2131     dst_rgb[2] = src_b[x];
2132     dst_rgb += 3;
2133   }
2134 }
2135 
2136 // Use scale to convert lsb formats to msb, depending how many bits there are:
2137 // 128 = 9 bits
2138 // 64 = 10 bits
2139 // 16 = 12 bits
2140 // 1 = 16 bits
MergeUVRow_16_C(const uint16_t * src_u,const uint16_t * src_v,uint16_t * dst_uv,int scale,int width)2141 void MergeUVRow_16_C(const uint16_t* src_u,
2142                      const uint16_t* src_v,
2143                      uint16_t* dst_uv,
2144                      int scale,
2145                      int width) {
2146   int x;
2147   for (x = 0; x < width - 1; x += 2) {
2148     dst_uv[0] = src_u[x] * scale;
2149     dst_uv[1] = src_v[x] * scale;
2150     dst_uv[2] = src_u[x + 1] * scale;
2151     dst_uv[3] = src_v[x + 1] * scale;
2152     dst_uv += 4;
2153   }
2154   if (width & 1) {
2155     dst_uv[0] = src_u[width - 1] * scale;
2156     dst_uv[1] = src_v[width - 1] * scale;
2157   }
2158 }
2159 
MultiplyRow_16_C(const uint16_t * src_y,uint16_t * dst_y,int scale,int width)2160 void MultiplyRow_16_C(const uint16_t* src_y,
2161                       uint16_t* dst_y,
2162                       int scale,
2163                       int width) {
2164   int x;
2165   for (x = 0; x < width; ++x) {
2166     dst_y[x] = src_y[x] * scale;
2167   }
2168 }
2169 
2170 // Use scale to convert lsb formats to msb, depending how many bits there are:
2171 // 32768 = 9 bits
2172 // 16384 = 10 bits
2173 // 4096 = 12 bits
2174 // 256 = 16 bits
Convert16To8Row_C(const uint16_t * src_y,uint8_t * dst_y,int scale,int width)2175 void Convert16To8Row_C(const uint16_t* src_y,
2176                        uint8_t* dst_y,
2177                        int scale,
2178                        int width) {
2179   int x;
2180   for (x = 0; x < width; ++x) {
2181     dst_y[x] = clamp255((src_y[x] * scale) >> 16);
2182   }
2183 }
2184 
2185 // Use scale to convert lsb formats to msb, depending how many bits there are:
2186 // 1024 = 10 bits
Convert8To16Row_C(const uint8_t * src_y,uint16_t * dst_y,int scale,int width)2187 void Convert8To16Row_C(const uint8_t* src_y,
2188                        uint16_t* dst_y,
2189                        int scale,
2190                        int width) {
2191   int x;
2192   scale *= 0x0101;  // replicates the byte.
2193   for (x = 0; x < width; ++x) {
2194     dst_y[x] = (src_y[x] * scale) >> 16;
2195   }
2196 }
2197 
CopyRow_C(const uint8_t * src,uint8_t * dst,int count)2198 void CopyRow_C(const uint8_t* src, uint8_t* dst, int count) {
2199   memcpy(dst, src, count);
2200 }
2201 
CopyRow_16_C(const uint16_t * src,uint16_t * dst,int count)2202 void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count) {
2203   memcpy(dst, src, count * 2);
2204 }
2205 
SetRow_C(uint8_t * dst,uint8_t v8,int width)2206 void SetRow_C(uint8_t* dst, uint8_t v8, int width) {
2207   memset(dst, v8, width);
2208 }
2209 
ARGBSetRow_C(uint8_t * dst_argb,uint32_t v32,int width)2210 void ARGBSetRow_C(uint8_t* dst_argb, uint32_t v32, int width) {
2211   uint32_t* d = (uint32_t*)(dst_argb);
2212   int x;
2213   for (x = 0; x < width; ++x) {
2214     d[x] = v32;
2215   }
2216 }
2217 
2218 // Filter 2 rows of YUY2 UV's (422) into U and V (420).
YUY2ToUVRow_C(const uint8_t * src_yuy2,int src_stride_yuy2,uint8_t * dst_u,uint8_t * dst_v,int width)2219 void YUY2ToUVRow_C(const uint8_t* src_yuy2,
2220                    int src_stride_yuy2,
2221                    uint8_t* dst_u,
2222                    uint8_t* dst_v,
2223                    int width) {
2224   // Output a row of UV values, filtering 2 rows of YUY2.
2225   int x;
2226   for (x = 0; x < width; x += 2) {
2227     dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
2228     dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
2229     src_yuy2 += 4;
2230     dst_u += 1;
2231     dst_v += 1;
2232   }
2233 }
2234 
2235 // Copy row of YUY2 UV's (422) into U and V (422).
YUY2ToUV422Row_C(const uint8_t * src_yuy2,uint8_t * dst_u,uint8_t * dst_v,int width)2236 void YUY2ToUV422Row_C(const uint8_t* src_yuy2,
2237                       uint8_t* dst_u,
2238                       uint8_t* dst_v,
2239                       int width) {
2240   // Output a row of UV values.
2241   int x;
2242   for (x = 0; x < width; x += 2) {
2243     dst_u[0] = src_yuy2[1];
2244     dst_v[0] = src_yuy2[3];
2245     src_yuy2 += 4;
2246     dst_u += 1;
2247     dst_v += 1;
2248   }
2249 }
2250 
2251 // Copy row of YUY2 Y's (422) into Y (420/422).
YUY2ToYRow_C(const uint8_t * src_yuy2,uint8_t * dst_y,int width)2252 void YUY2ToYRow_C(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
2253   // Output a row of Y values.
2254   int x;
2255   for (x = 0; x < width - 1; x += 2) {
2256     dst_y[x] = src_yuy2[0];
2257     dst_y[x + 1] = src_yuy2[2];
2258     src_yuy2 += 4;
2259   }
2260   if (width & 1) {
2261     dst_y[width - 1] = src_yuy2[0];
2262   }
2263 }
2264 
2265 // Filter 2 rows of UYVY UV's (422) into U and V (420).
UYVYToUVRow_C(const uint8_t * src_uyvy,int src_stride_uyvy,uint8_t * dst_u,uint8_t * dst_v,int width)2266 void UYVYToUVRow_C(const uint8_t* src_uyvy,
2267                    int src_stride_uyvy,
2268                    uint8_t* dst_u,
2269                    uint8_t* dst_v,
2270                    int width) {
2271   // Output a row of UV values.
2272   int x;
2273   for (x = 0; x < width; x += 2) {
2274     dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
2275     dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
2276     src_uyvy += 4;
2277     dst_u += 1;
2278     dst_v += 1;
2279   }
2280 }
2281 
2282 // Copy row of UYVY UV's (422) into U and V (422).
UYVYToUV422Row_C(const uint8_t * src_uyvy,uint8_t * dst_u,uint8_t * dst_v,int width)2283 void UYVYToUV422Row_C(const uint8_t* src_uyvy,
2284                       uint8_t* dst_u,
2285                       uint8_t* dst_v,
2286                       int width) {
2287   // Output a row of UV values.
2288   int x;
2289   for (x = 0; x < width; x += 2) {
2290     dst_u[0] = src_uyvy[0];
2291     dst_v[0] = src_uyvy[2];
2292     src_uyvy += 4;
2293     dst_u += 1;
2294     dst_v += 1;
2295   }
2296 }
2297 
2298 // Copy row of UYVY Y's (422) into Y (420/422).
UYVYToYRow_C(const uint8_t * src_uyvy,uint8_t * dst_y,int width)2299 void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
2300   // Output a row of Y values.
2301   int x;
2302   for (x = 0; x < width - 1; x += 2) {
2303     dst_y[x] = src_uyvy[1];
2304     dst_y[x + 1] = src_uyvy[3];
2305     src_uyvy += 4;
2306   }
2307   if (width & 1) {
2308     dst_y[width - 1] = src_uyvy[1];
2309   }
2310 }
2311 
2312 #define BLEND(f, b, a) (((256 - a) * b) >> 8) + f
2313 
2314 // Blend src_argb0 over src_argb1 and store to dst_argb.
2315 // dst_argb may be src_argb0 or src_argb1.
2316 // This code mimics the SSSE3 version for better testability.
ARGBBlendRow_C(const uint8_t * src_argb0,const uint8_t * src_argb1,uint8_t * dst_argb,int width)2317 void ARGBBlendRow_C(const uint8_t* src_argb0,
2318                     const uint8_t* src_argb1,
2319                     uint8_t* dst_argb,
2320                     int width) {
2321   int x;
2322   for (x = 0; x < width - 1; x += 2) {
2323     uint32_t fb = src_argb0[0];
2324     uint32_t fg = src_argb0[1];
2325     uint32_t fr = src_argb0[2];
2326     uint32_t a = src_argb0[3];
2327     uint32_t bb = src_argb1[0];
2328     uint32_t bg = src_argb1[1];
2329     uint32_t br = src_argb1[2];
2330     dst_argb[0] = BLEND(fb, bb, a);
2331     dst_argb[1] = BLEND(fg, bg, a);
2332     dst_argb[2] = BLEND(fr, br, a);
2333     dst_argb[3] = 255u;
2334 
2335     fb = src_argb0[4 + 0];
2336     fg = src_argb0[4 + 1];
2337     fr = src_argb0[4 + 2];
2338     a = src_argb0[4 + 3];
2339     bb = src_argb1[4 + 0];
2340     bg = src_argb1[4 + 1];
2341     br = src_argb1[4 + 2];
2342     dst_argb[4 + 0] = BLEND(fb, bb, a);
2343     dst_argb[4 + 1] = BLEND(fg, bg, a);
2344     dst_argb[4 + 2] = BLEND(fr, br, a);
2345     dst_argb[4 + 3] = 255u;
2346     src_argb0 += 8;
2347     src_argb1 += 8;
2348     dst_argb += 8;
2349   }
2350 
2351   if (width & 1) {
2352     uint32_t fb = src_argb0[0];
2353     uint32_t fg = src_argb0[1];
2354     uint32_t fr = src_argb0[2];
2355     uint32_t a = src_argb0[3];
2356     uint32_t bb = src_argb1[0];
2357     uint32_t bg = src_argb1[1];
2358     uint32_t br = src_argb1[2];
2359     dst_argb[0] = BLEND(fb, bb, a);
2360     dst_argb[1] = BLEND(fg, bg, a);
2361     dst_argb[2] = BLEND(fr, br, a);
2362     dst_argb[3] = 255u;
2363   }
2364 }
2365 #undef BLEND
2366 
2367 #define UBLEND(f, b, a) (((a)*f) + ((255 - a) * b) + 255) >> 8
BlendPlaneRow_C(const uint8_t * src0,const uint8_t * src1,const uint8_t * alpha,uint8_t * dst,int width)2368 void BlendPlaneRow_C(const uint8_t* src0,
2369                      const uint8_t* src1,
2370                      const uint8_t* alpha,
2371                      uint8_t* dst,
2372                      int width) {
2373   int x;
2374   for (x = 0; x < width - 1; x += 2) {
2375     dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
2376     dst[1] = UBLEND(src0[1], src1[1], alpha[1]);
2377     src0 += 2;
2378     src1 += 2;
2379     alpha += 2;
2380     dst += 2;
2381   }
2382   if (width & 1) {
2383     dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
2384   }
2385 }
2386 #undef UBLEND
2387 
2388 #define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
2389 
2390 // Multiply source RGB by alpha and store to destination.
2391 // This code mimics the SSSE3 version for better testability.
ARGBAttenuateRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width)2392 void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
2393   int i;
2394   for (i = 0; i < width - 1; i += 2) {
2395     uint32_t b = src_argb[0];
2396     uint32_t g = src_argb[1];
2397     uint32_t r = src_argb[2];
2398     uint32_t a = src_argb[3];
2399     dst_argb[0] = ATTENUATE(b, a);
2400     dst_argb[1] = ATTENUATE(g, a);
2401     dst_argb[2] = ATTENUATE(r, a);
2402     dst_argb[3] = a;
2403     b = src_argb[4];
2404     g = src_argb[5];
2405     r = src_argb[6];
2406     a = src_argb[7];
2407     dst_argb[4] = ATTENUATE(b, a);
2408     dst_argb[5] = ATTENUATE(g, a);
2409     dst_argb[6] = ATTENUATE(r, a);
2410     dst_argb[7] = a;
2411     src_argb += 8;
2412     dst_argb += 8;
2413   }
2414 
2415   if (width & 1) {
2416     const uint32_t b = src_argb[0];
2417     const uint32_t g = src_argb[1];
2418     const uint32_t r = src_argb[2];
2419     const uint32_t a = src_argb[3];
2420     dst_argb[0] = ATTENUATE(b, a);
2421     dst_argb[1] = ATTENUATE(g, a);
2422     dst_argb[2] = ATTENUATE(r, a);
2423     dst_argb[3] = a;
2424   }
2425 }
2426 #undef ATTENUATE
2427 
2428 // Divide source RGB by alpha and store to destination.
2429 // b = (b * 255 + (a / 2)) / a;
2430 // g = (g * 255 + (a / 2)) / a;
2431 // r = (r * 255 + (a / 2)) / a;
2432 // Reciprocal method is off by 1 on some values. ie 125
2433 // 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.
2434 #define T(a) 0x01000000 + (0x10000 / a)
2435 const uint32_t fixed_invtbl8[256] = {
2436     0x01000000, 0x0100ffff, T(0x02), T(0x03),   T(0x04), T(0x05), T(0x06),
2437     T(0x07),    T(0x08),    T(0x09), T(0x0a),   T(0x0b), T(0x0c), T(0x0d),
2438     T(0x0e),    T(0x0f),    T(0x10), T(0x11),   T(0x12), T(0x13), T(0x14),
2439     T(0x15),    T(0x16),    T(0x17), T(0x18),   T(0x19), T(0x1a), T(0x1b),
2440     T(0x1c),    T(0x1d),    T(0x1e), T(0x1f),   T(0x20), T(0x21), T(0x22),
2441     T(0x23),    T(0x24),    T(0x25), T(0x26),   T(0x27), T(0x28), T(0x29),
2442     T(0x2a),    T(0x2b),    T(0x2c), T(0x2d),   T(0x2e), T(0x2f), T(0x30),
2443     T(0x31),    T(0x32),    T(0x33), T(0x34),   T(0x35), T(0x36), T(0x37),
2444     T(0x38),    T(0x39),    T(0x3a), T(0x3b),   T(0x3c), T(0x3d), T(0x3e),
2445     T(0x3f),    T(0x40),    T(0x41), T(0x42),   T(0x43), T(0x44), T(0x45),
2446     T(0x46),    T(0x47),    T(0x48), T(0x49),   T(0x4a), T(0x4b), T(0x4c),
2447     T(0x4d),    T(0x4e),    T(0x4f), T(0x50),   T(0x51), T(0x52), T(0x53),
2448     T(0x54),    T(0x55),    T(0x56), T(0x57),   T(0x58), T(0x59), T(0x5a),
2449     T(0x5b),    T(0x5c),    T(0x5d), T(0x5e),   T(0x5f), T(0x60), T(0x61),
2450     T(0x62),    T(0x63),    T(0x64), T(0x65),   T(0x66), T(0x67), T(0x68),
2451     T(0x69),    T(0x6a),    T(0x6b), T(0x6c),   T(0x6d), T(0x6e), T(0x6f),
2452     T(0x70),    T(0x71),    T(0x72), T(0x73),   T(0x74), T(0x75), T(0x76),
2453     T(0x77),    T(0x78),    T(0x79), T(0x7a),   T(0x7b), T(0x7c), T(0x7d),
2454     T(0x7e),    T(0x7f),    T(0x80), T(0x81),   T(0x82), T(0x83), T(0x84),
2455     T(0x85),    T(0x86),    T(0x87), T(0x88),   T(0x89), T(0x8a), T(0x8b),
2456     T(0x8c),    T(0x8d),    T(0x8e), T(0x8f),   T(0x90), T(0x91), T(0x92),
2457     T(0x93),    T(0x94),    T(0x95), T(0x96),   T(0x97), T(0x98), T(0x99),
2458     T(0x9a),    T(0x9b),    T(0x9c), T(0x9d),   T(0x9e), T(0x9f), T(0xa0),
2459     T(0xa1),    T(0xa2),    T(0xa3), T(0xa4),   T(0xa5), T(0xa6), T(0xa7),
2460     T(0xa8),    T(0xa9),    T(0xaa), T(0xab),   T(0xac), T(0xad), T(0xae),
2461     T(0xaf),    T(0xb0),    T(0xb1), T(0xb2),   T(0xb3), T(0xb4), T(0xb5),
2462     T(0xb6),    T(0xb7),    T(0xb8), T(0xb9),   T(0xba), T(0xbb), T(0xbc),
2463     T(0xbd),    T(0xbe),    T(0xbf), T(0xc0),   T(0xc1), T(0xc2), T(0xc3),
2464     T(0xc4),    T(0xc5),    T(0xc6), T(0xc7),   T(0xc8), T(0xc9), T(0xca),
2465     T(0xcb),    T(0xcc),    T(0xcd), T(0xce),   T(0xcf), T(0xd0), T(0xd1),
2466     T(0xd2),    T(0xd3),    T(0xd4), T(0xd5),   T(0xd6), T(0xd7), T(0xd8),
2467     T(0xd9),    T(0xda),    T(0xdb), T(0xdc),   T(0xdd), T(0xde), T(0xdf),
2468     T(0xe0),    T(0xe1),    T(0xe2), T(0xe3),   T(0xe4), T(0xe5), T(0xe6),
2469     T(0xe7),    T(0xe8),    T(0xe9), T(0xea),   T(0xeb), T(0xec), T(0xed),
2470     T(0xee),    T(0xef),    T(0xf0), T(0xf1),   T(0xf2), T(0xf3), T(0xf4),
2471     T(0xf5),    T(0xf6),    T(0xf7), T(0xf8),   T(0xf9), T(0xfa), T(0xfb),
2472     T(0xfc),    T(0xfd),    T(0xfe), 0x01000100};
2473 #undef T
2474 
ARGBUnattenuateRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width)2475 void ARGBUnattenuateRow_C(const uint8_t* src_argb,
2476                           uint8_t* dst_argb,
2477                           int width) {
2478   int i;
2479   for (i = 0; i < width; ++i) {
2480     uint32_t b = src_argb[0];
2481     uint32_t g = src_argb[1];
2482     uint32_t r = src_argb[2];
2483     const uint32_t a = src_argb[3];
2484     const uint32_t ia = fixed_invtbl8[a] & 0xffff;  // 8.8 fixed point
2485     b = (b * ia) >> 8;
2486     g = (g * ia) >> 8;
2487     r = (r * ia) >> 8;
2488     // Clamping should not be necessary but is free in assembly.
2489     dst_argb[0] = clamp255(b);
2490     dst_argb[1] = clamp255(g);
2491     dst_argb[2] = clamp255(r);
2492     dst_argb[3] = a;
2493     src_argb += 4;
2494     dst_argb += 4;
2495   }
2496 }
2497 
ComputeCumulativeSumRow_C(const uint8_t * row,int32_t * cumsum,const int32_t * previous_cumsum,int width)2498 void ComputeCumulativeSumRow_C(const uint8_t* row,
2499                                int32_t* cumsum,
2500                                const int32_t* previous_cumsum,
2501                                int width) {
2502   int32_t row_sum[4] = {0, 0, 0, 0};
2503   int x;
2504   for (x = 0; x < width; ++x) {
2505     row_sum[0] += row[x * 4 + 0];
2506     row_sum[1] += row[x * 4 + 1];
2507     row_sum[2] += row[x * 4 + 2];
2508     row_sum[3] += row[x * 4 + 3];
2509     cumsum[x * 4 + 0] = row_sum[0] + previous_cumsum[x * 4 + 0];
2510     cumsum[x * 4 + 1] = row_sum[1] + previous_cumsum[x * 4 + 1];
2511     cumsum[x * 4 + 2] = row_sum[2] + previous_cumsum[x * 4 + 2];
2512     cumsum[x * 4 + 3] = row_sum[3] + previous_cumsum[x * 4 + 3];
2513   }
2514 }
2515 
CumulativeSumToAverageRow_C(const int32_t * tl,const int32_t * bl,int w,int area,uint8_t * dst,int count)2516 void CumulativeSumToAverageRow_C(const int32_t* tl,
2517                                  const int32_t* bl,
2518                                  int w,
2519                                  int area,
2520                                  uint8_t* dst,
2521                                  int count) {
2522   float ooa = 1.0f / area;
2523   int i;
2524   for (i = 0; i < count; ++i) {
2525     dst[0] = (uint8_t)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
2526     dst[1] = (uint8_t)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);
2527     dst[2] = (uint8_t)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa);
2528     dst[3] = (uint8_t)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa);
2529     dst += 4;
2530     tl += 4;
2531     bl += 4;
2532   }
2533 }
2534 
2535 // Copy pixels from rotated source to destination row with a slope.
2536 LIBYUV_API
ARGBAffineRow_C(const uint8_t * src_argb,int src_argb_stride,uint8_t * dst_argb,const float * uv_dudv,int width)2537 void ARGBAffineRow_C(const uint8_t* src_argb,
2538                      int src_argb_stride,
2539                      uint8_t* dst_argb,
2540                      const float* uv_dudv,
2541                      int width) {
2542   int i;
2543   // Render a row of pixels from source into a buffer.
2544   float uv[2];
2545   uv[0] = uv_dudv[0];
2546   uv[1] = uv_dudv[1];
2547   for (i = 0; i < width; ++i) {
2548     int x = (int)(uv[0]);
2549     int y = (int)(uv[1]);
2550     *(uint32_t*)(dst_argb) =
2551         *(const uint32_t*)(src_argb + y * src_argb_stride + x * 4);
2552     dst_argb += 4;
2553     uv[0] += uv_dudv[2];
2554     uv[1] += uv_dudv[3];
2555   }
2556 }
2557 
2558 // Blend 2 rows into 1.
HalfRow_C(const uint8_t * src_uv,ptrdiff_t src_uv_stride,uint8_t * dst_uv,int width)2559 static void HalfRow_C(const uint8_t* src_uv,
2560                       ptrdiff_t src_uv_stride,
2561                       uint8_t* dst_uv,
2562                       int width) {
2563   int x;
2564   for (x = 0; x < width; ++x) {
2565     dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
2566   }
2567 }
2568 
HalfRow_16_C(const uint16_t * src_uv,ptrdiff_t src_uv_stride,uint16_t * dst_uv,int width)2569 static void HalfRow_16_C(const uint16_t* src_uv,
2570                          ptrdiff_t src_uv_stride,
2571                          uint16_t* dst_uv,
2572                          int width) {
2573   int x;
2574   for (x = 0; x < width; ++x) {
2575     dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
2576   }
2577 }
2578 
2579 // C version 2x2 -> 2x1.
InterpolateRow_C(uint8_t * dst_ptr,const uint8_t * src_ptr,ptrdiff_t src_stride,int width,int source_y_fraction)2580 void InterpolateRow_C(uint8_t* dst_ptr,
2581                       const uint8_t* src_ptr,
2582                       ptrdiff_t src_stride,
2583                       int width,
2584                       int source_y_fraction) {
2585   int y1_fraction = source_y_fraction;
2586   int y0_fraction = 256 - y1_fraction;
2587   const uint8_t* src_ptr1 = src_ptr + src_stride;
2588   int x;
2589   if (y1_fraction == 0) {
2590     memcpy(dst_ptr, src_ptr, width);
2591     return;
2592   }
2593   if (y1_fraction == 128) {
2594     HalfRow_C(src_ptr, src_stride, dst_ptr, width);
2595     return;
2596   }
2597   for (x = 0; x < width - 1; x += 2) {
2598     dst_ptr[0] =
2599         (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
2600     dst_ptr[1] =
2601         (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction + 128) >> 8;
2602     src_ptr += 2;
2603     src_ptr1 += 2;
2604     dst_ptr += 2;
2605   }
2606   if (width & 1) {
2607     dst_ptr[0] =
2608         (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
2609   }
2610 }
2611 
InterpolateRow_16_C(uint16_t * dst_ptr,const uint16_t * src_ptr,ptrdiff_t src_stride,int width,int source_y_fraction)2612 void InterpolateRow_16_C(uint16_t* dst_ptr,
2613                          const uint16_t* src_ptr,
2614                          ptrdiff_t src_stride,
2615                          int width,
2616                          int source_y_fraction) {
2617   int y1_fraction = source_y_fraction;
2618   int y0_fraction = 256 - y1_fraction;
2619   const uint16_t* src_ptr1 = src_ptr + src_stride;
2620   int x;
2621   if (source_y_fraction == 0) {
2622     memcpy(dst_ptr, src_ptr, width * 2);
2623     return;
2624   }
2625   if (source_y_fraction == 128) {
2626     HalfRow_16_C(src_ptr, src_stride, dst_ptr, width);
2627     return;
2628   }
2629   for (x = 0; x < width - 1; x += 2) {
2630     dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
2631     dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
2632     src_ptr += 2;
2633     src_ptr1 += 2;
2634     dst_ptr += 2;
2635   }
2636   if (width & 1) {
2637     dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
2638   }
2639 }
2640 
2641 // Use first 4 shuffler values to reorder ARGB channels.
ARGBShuffleRow_C(const uint8_t * src_argb,uint8_t * dst_argb,const uint8_t * shuffler,int width)2642 void ARGBShuffleRow_C(const uint8_t* src_argb,
2643                       uint8_t* dst_argb,
2644                       const uint8_t* shuffler,
2645                       int width) {
2646   int index0 = shuffler[0];
2647   int index1 = shuffler[1];
2648   int index2 = shuffler[2];
2649   int index3 = shuffler[3];
2650   // Shuffle a row of ARGB.
2651   int x;
2652   for (x = 0; x < width; ++x) {
2653     // To support in-place conversion.
2654     uint8_t b = src_argb[index0];
2655     uint8_t g = src_argb[index1];
2656     uint8_t r = src_argb[index2];
2657     uint8_t a = src_argb[index3];
2658     dst_argb[0] = b;
2659     dst_argb[1] = g;
2660     dst_argb[2] = r;
2661     dst_argb[3] = a;
2662     src_argb += 4;
2663     dst_argb += 4;
2664   }
2665 }
2666 
I422ToYUY2Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_frame,int width)2667 void I422ToYUY2Row_C(const uint8_t* src_y,
2668                      const uint8_t* src_u,
2669                      const uint8_t* src_v,
2670                      uint8_t* dst_frame,
2671                      int width) {
2672   int x;
2673   for (x = 0; x < width - 1; x += 2) {
2674     dst_frame[0] = src_y[0];
2675     dst_frame[1] = src_u[0];
2676     dst_frame[2] = src_y[1];
2677     dst_frame[3] = src_v[0];
2678     dst_frame += 4;
2679     src_y += 2;
2680     src_u += 1;
2681     src_v += 1;
2682   }
2683   if (width & 1) {
2684     dst_frame[0] = src_y[0];
2685     dst_frame[1] = src_u[0];
2686     dst_frame[2] = 0;
2687     dst_frame[3] = src_v[0];
2688   }
2689 }
2690 
I422ToUYVYRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_frame,int width)2691 void I422ToUYVYRow_C(const uint8_t* src_y,
2692                      const uint8_t* src_u,
2693                      const uint8_t* src_v,
2694                      uint8_t* dst_frame,
2695                      int width) {
2696   int x;
2697   for (x = 0; x < width - 1; x += 2) {
2698     dst_frame[0] = src_u[0];
2699     dst_frame[1] = src_y[0];
2700     dst_frame[2] = src_v[0];
2701     dst_frame[3] = src_y[1];
2702     dst_frame += 4;
2703     src_y += 2;
2704     src_u += 1;
2705     src_v += 1;
2706   }
2707   if (width & 1) {
2708     dst_frame[0] = src_u[0];
2709     dst_frame[1] = src_y[0];
2710     dst_frame[2] = src_v[0];
2711     dst_frame[3] = 0;
2712   }
2713 }
2714 
ARGBPolynomialRow_C(const uint8_t * src_argb,uint8_t * dst_argb,const float * poly,int width)2715 void ARGBPolynomialRow_C(const uint8_t* src_argb,
2716                          uint8_t* dst_argb,
2717                          const float* poly,
2718                          int width) {
2719   int i;
2720   for (i = 0; i < width; ++i) {
2721     float b = (float)(src_argb[0]);
2722     float g = (float)(src_argb[1]);
2723     float r = (float)(src_argb[2]);
2724     float a = (float)(src_argb[3]);
2725     float b2 = b * b;
2726     float g2 = g * g;
2727     float r2 = r * r;
2728     float a2 = a * a;
2729     float db = poly[0] + poly[4] * b;
2730     float dg = poly[1] + poly[5] * g;
2731     float dr = poly[2] + poly[6] * r;
2732     float da = poly[3] + poly[7] * a;
2733     float b3 = b2 * b;
2734     float g3 = g2 * g;
2735     float r3 = r2 * r;
2736     float a3 = a2 * a;
2737     db += poly[8] * b2;
2738     dg += poly[9] * g2;
2739     dr += poly[10] * r2;
2740     da += poly[11] * a2;
2741     db += poly[12] * b3;
2742     dg += poly[13] * g3;
2743     dr += poly[14] * r3;
2744     da += poly[15] * a3;
2745 
2746     dst_argb[0] = Clamp((int32_t)(db));
2747     dst_argb[1] = Clamp((int32_t)(dg));
2748     dst_argb[2] = Clamp((int32_t)(dr));
2749     dst_argb[3] = Clamp((int32_t)(da));
2750     src_argb += 4;
2751     dst_argb += 4;
2752   }
2753 }
2754 
2755 // Samples assumed to be unsigned in low 9, 10 or 12 bits.  Scale factor
2756 // adjust the source integer range to the half float range desired.
2757 
2758 // This magic constant is 2^-112. Multiplying by this
2759 // is the same as subtracting 112 from the exponent, which
2760 // is the difference in exponent bias between 32-bit and
2761 // 16-bit floats. Once we've done this subtraction, we can
2762 // simply extract the low bits of the exponent and the high
2763 // bits of the mantissa from our float and we're done.
2764 
2765 // Work around GCC 7 punning warning -Wstrict-aliasing
2766 #if defined(__GNUC__)
2767 typedef uint32_t __attribute__((__may_alias__)) uint32_alias_t;
2768 #else
2769 typedef uint32_t uint32_alias_t;
2770 #endif
2771 
HalfFloatRow_C(const uint16_t * src,uint16_t * dst,float scale,int width)2772 void HalfFloatRow_C(const uint16_t* src,
2773                     uint16_t* dst,
2774                     float scale,
2775                     int width) {
2776   int i;
2777   float mult = 1.9259299444e-34f * scale;
2778   for (i = 0; i < width; ++i) {
2779     float value = src[i] * mult;
2780     dst[i] = (uint16_t)((*(const uint32_alias_t*)&value) >> 13);
2781   }
2782 }
2783 
ByteToFloatRow_C(const uint8_t * src,float * dst,float scale,int width)2784 void ByteToFloatRow_C(const uint8_t* src, float* dst, float scale, int width) {
2785   int i;
2786   for (i = 0; i < width; ++i) {
2787     float value = src[i] * scale;
2788     dst[i] = value;
2789   }
2790 }
2791 
ARGBLumaColorTableRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width,const uint8_t * luma,uint32_t lumacoeff)2792 void ARGBLumaColorTableRow_C(const uint8_t* src_argb,
2793                              uint8_t* dst_argb,
2794                              int width,
2795                              const uint8_t* luma,
2796                              uint32_t lumacoeff) {
2797   uint32_t bc = lumacoeff & 0xff;
2798   uint32_t gc = (lumacoeff >> 8) & 0xff;
2799   uint32_t rc = (lumacoeff >> 16) & 0xff;
2800 
2801   int i;
2802   for (i = 0; i < width - 1; i += 2) {
2803     // Luminance in rows, color values in columns.
2804     const uint8_t* luma0 =
2805         ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) +
2806         luma;
2807     const uint8_t* luma1;
2808     dst_argb[0] = luma0[src_argb[0]];
2809     dst_argb[1] = luma0[src_argb[1]];
2810     dst_argb[2] = luma0[src_argb[2]];
2811     dst_argb[3] = src_argb[3];
2812     luma1 =
2813         ((src_argb[4] * bc + src_argb[5] * gc + src_argb[6] * rc) & 0x7F00u) +
2814         luma;
2815     dst_argb[4] = luma1[src_argb[4]];
2816     dst_argb[5] = luma1[src_argb[5]];
2817     dst_argb[6] = luma1[src_argb[6]];
2818     dst_argb[7] = src_argb[7];
2819     src_argb += 8;
2820     dst_argb += 8;
2821   }
2822   if (width & 1) {
2823     // Luminance in rows, color values in columns.
2824     const uint8_t* luma0 =
2825         ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) +
2826         luma;
2827     dst_argb[0] = luma0[src_argb[0]];
2828     dst_argb[1] = luma0[src_argb[1]];
2829     dst_argb[2] = luma0[src_argb[2]];
2830     dst_argb[3] = src_argb[3];
2831   }
2832 }
2833 
ARGBCopyAlphaRow_C(const uint8_t * src,uint8_t * dst,int width)2834 void ARGBCopyAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) {
2835   int i;
2836   for (i = 0; i < width - 1; i += 2) {
2837     dst[3] = src[3];
2838     dst[7] = src[7];
2839     dst += 8;
2840     src += 8;
2841   }
2842   if (width & 1) {
2843     dst[3] = src[3];
2844   }
2845 }
2846 
ARGBExtractAlphaRow_C(const uint8_t * src_argb,uint8_t * dst_a,int width)2847 void ARGBExtractAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_a, int width) {
2848   int i;
2849   for (i = 0; i < width - 1; i += 2) {
2850     dst_a[0] = src_argb[3];
2851     dst_a[1] = src_argb[7];
2852     dst_a += 2;
2853     src_argb += 8;
2854   }
2855   if (width & 1) {
2856     dst_a[0] = src_argb[3];
2857   }
2858 }
2859 
ARGBCopyYToAlphaRow_C(const uint8_t * src,uint8_t * dst,int width)2860 void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) {
2861   int i;
2862   for (i = 0; i < width - 1; i += 2) {
2863     dst[3] = src[0];
2864     dst[7] = src[1];
2865     dst += 8;
2866     src += 2;
2867   }
2868   if (width & 1) {
2869     dst[3] = src[0];
2870   }
2871 }
2872 
2873 // Maximum temporary width for wrappers to process at a time, in pixels.
2874 #define MAXTWIDTH 2048
2875 
2876 #if !(defined(_MSC_VER) && defined(_M_IX86)) && \
2877     defined(HAS_I422TORGB565ROW_SSSE3)
2878 // row_win.cc has asm version, but GCC uses 2 step wrapper.
I422ToRGB565Row_SSSE3(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)2879 void I422ToRGB565Row_SSSE3(const uint8_t* src_y,
2880                            const uint8_t* src_u,
2881                            const uint8_t* src_v,
2882                            uint8_t* dst_rgb565,
2883                            const struct YuvConstants* yuvconstants,
2884                            int width) {
2885   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
2886   while (width > 0) {
2887     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2888     I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
2889     ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
2890     src_y += twidth;
2891     src_u += twidth / 2;
2892     src_v += twidth / 2;
2893     dst_rgb565 += twidth * 2;
2894     width -= twidth;
2895   }
2896 }
2897 #endif
2898 
2899 #if defined(HAS_I422TOARGB1555ROW_SSSE3)
I422ToARGB1555Row_SSSE3(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb1555,const struct YuvConstants * yuvconstants,int width)2900 void I422ToARGB1555Row_SSSE3(const uint8_t* src_y,
2901                              const uint8_t* src_u,
2902                              const uint8_t* src_v,
2903                              uint8_t* dst_argb1555,
2904                              const struct YuvConstants* yuvconstants,
2905                              int width) {
2906   // Row buffer for intermediate ARGB pixels.
2907   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
2908   while (width > 0) {
2909     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2910     I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
2911     ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
2912     src_y += twidth;
2913     src_u += twidth / 2;
2914     src_v += twidth / 2;
2915     dst_argb1555 += twidth * 2;
2916     width -= twidth;
2917   }
2918 }
2919 #endif
2920 
2921 #if defined(HAS_I422TOARGB4444ROW_SSSE3)
I422ToARGB4444Row_SSSE3(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb4444,const struct YuvConstants * yuvconstants,int width)2922 void I422ToARGB4444Row_SSSE3(const uint8_t* src_y,
2923                              const uint8_t* src_u,
2924                              const uint8_t* src_v,
2925                              uint8_t* dst_argb4444,
2926                              const struct YuvConstants* yuvconstants,
2927                              int width) {
2928   // Row buffer for intermediate ARGB pixels.
2929   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
2930   while (width > 0) {
2931     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2932     I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
2933     ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
2934     src_y += twidth;
2935     src_u += twidth / 2;
2936     src_v += twidth / 2;
2937     dst_argb4444 += twidth * 2;
2938     width -= twidth;
2939   }
2940 }
2941 #endif
2942 
2943 #if defined(HAS_NV12TORGB565ROW_SSSE3)
NV12ToRGB565Row_SSSE3(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)2944 void NV12ToRGB565Row_SSSE3(const uint8_t* src_y,
2945                            const uint8_t* src_uv,
2946                            uint8_t* dst_rgb565,
2947                            const struct YuvConstants* yuvconstants,
2948                            int width) {
2949   // Row buffer for intermediate ARGB pixels.
2950   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
2951   while (width > 0) {
2952     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2953     NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
2954     ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
2955     src_y += twidth;
2956     src_uv += twidth;
2957     dst_rgb565 += twidth * 2;
2958     width -= twidth;
2959   }
2960 }
2961 #endif
2962 
2963 #if defined(HAS_NV12TORGB24ROW_SSSE3)
NV12ToRGB24Row_SSSE3(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)2964 void NV12ToRGB24Row_SSSE3(const uint8_t* src_y,
2965                           const uint8_t* src_uv,
2966                           uint8_t* dst_rgb24,
2967                           const struct YuvConstants* yuvconstants,
2968                           int width) {
2969   // Row buffer for intermediate ARGB pixels.
2970   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
2971   while (width > 0) {
2972     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2973     NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
2974     ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
2975     src_y += twidth;
2976     src_uv += twidth;
2977     dst_rgb24 += twidth * 3;
2978     width -= twidth;
2979   }
2980 }
2981 #endif
2982 
2983 #if defined(HAS_NV21TORGB24ROW_SSSE3)
NV21ToRGB24Row_SSSE3(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)2984 void NV21ToRGB24Row_SSSE3(const uint8_t* src_y,
2985                           const uint8_t* src_vu,
2986                           uint8_t* dst_rgb24,
2987                           const struct YuvConstants* yuvconstants,
2988                           int width) {
2989   // Row buffer for intermediate ARGB pixels.
2990   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
2991   while (width > 0) {
2992     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2993     NV21ToARGBRow_SSSE3(src_y, src_vu, row, yuvconstants, twidth);
2994     ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
2995     src_y += twidth;
2996     src_vu += twidth;
2997     dst_rgb24 += twidth * 3;
2998     width -= twidth;
2999   }
3000 }
3001 #endif
3002 
3003 #if defined(HAS_NV12TORGB24ROW_AVX2)
NV12ToRGB24Row_AVX2(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)3004 void NV12ToRGB24Row_AVX2(const uint8_t* src_y,
3005                          const uint8_t* src_uv,
3006                          uint8_t* dst_rgb24,
3007                          const struct YuvConstants* yuvconstants,
3008                          int width) {
3009   // Row buffer for intermediate ARGB pixels.
3010   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3011   while (width > 0) {
3012     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3013     NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
3014 #if defined(HAS_ARGBTORGB24ROW_AVX2)
3015     ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
3016 #else
3017     ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
3018 #endif
3019     src_y += twidth;
3020     src_uv += twidth;
3021     dst_rgb24 += twidth * 3;
3022     width -= twidth;
3023   }
3024 }
3025 #endif
3026 
3027 #if defined(HAS_NV21TORGB24ROW_AVX2)
NV21ToRGB24Row_AVX2(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)3028 void NV21ToRGB24Row_AVX2(const uint8_t* src_y,
3029                          const uint8_t* src_vu,
3030                          uint8_t* dst_rgb24,
3031                          const struct YuvConstants* yuvconstants,
3032                          int width) {
3033   // Row buffer for intermediate ARGB pixels.
3034   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3035   while (width > 0) {
3036     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3037     NV21ToARGBRow_AVX2(src_y, src_vu, row, yuvconstants, twidth);
3038 #if defined(HAS_ARGBTORGB24ROW_AVX2)
3039     ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
3040 #else
3041     ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
3042 #endif
3043     src_y += twidth;
3044     src_vu += twidth;
3045     dst_rgb24 += twidth * 3;
3046     width -= twidth;
3047   }
3048 }
3049 #endif
3050 
3051 #if defined(HAS_I422TORGB565ROW_AVX2)
I422ToRGB565Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)3052 void I422ToRGB565Row_AVX2(const uint8_t* src_y,
3053                           const uint8_t* src_u,
3054                           const uint8_t* src_v,
3055                           uint8_t* dst_rgb565,
3056                           const struct YuvConstants* yuvconstants,
3057                           int width) {
3058   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3059   while (width > 0) {
3060     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3061     I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
3062 #if defined(HAS_ARGBTORGB565ROW_AVX2)
3063     ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
3064 #else
3065     ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
3066 #endif
3067     src_y += twidth;
3068     src_u += twidth / 2;
3069     src_v += twidth / 2;
3070     dst_rgb565 += twidth * 2;
3071     width -= twidth;
3072   }
3073 }
3074 #endif
3075 
3076 #if defined(HAS_I422TOARGB1555ROW_AVX2)
I422ToARGB1555Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb1555,const struct YuvConstants * yuvconstants,int width)3077 void I422ToARGB1555Row_AVX2(const uint8_t* src_y,
3078                             const uint8_t* src_u,
3079                             const uint8_t* src_v,
3080                             uint8_t* dst_argb1555,
3081                             const struct YuvConstants* yuvconstants,
3082                             int width) {
3083   // Row buffer for intermediate ARGB pixels.
3084   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3085   while (width > 0) {
3086     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3087     I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
3088 #if defined(HAS_ARGBTOARGB1555ROW_AVX2)
3089     ARGBToARGB1555Row_AVX2(row, dst_argb1555, twidth);
3090 #else
3091     ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
3092 #endif
3093     src_y += twidth;
3094     src_u += twidth / 2;
3095     src_v += twidth / 2;
3096     dst_argb1555 += twidth * 2;
3097     width -= twidth;
3098   }
3099 }
3100 #endif
3101 
3102 #if defined(HAS_I422TOARGB4444ROW_AVX2)
I422ToARGB4444Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb4444,const struct YuvConstants * yuvconstants,int width)3103 void I422ToARGB4444Row_AVX2(const uint8_t* src_y,
3104                             const uint8_t* src_u,
3105                             const uint8_t* src_v,
3106                             uint8_t* dst_argb4444,
3107                             const struct YuvConstants* yuvconstants,
3108                             int width) {
3109   // Row buffer for intermediate ARGB pixels.
3110   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3111   while (width > 0) {
3112     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3113     I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
3114 #if defined(HAS_ARGBTOARGB4444ROW_AVX2)
3115     ARGBToARGB4444Row_AVX2(row, dst_argb4444, twidth);
3116 #else
3117     ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
3118 #endif
3119     src_y += twidth;
3120     src_u += twidth / 2;
3121     src_v += twidth / 2;
3122     dst_argb4444 += twidth * 2;
3123     width -= twidth;
3124   }
3125 }
3126 #endif
3127 
3128 #if defined(HAS_I422TORGB24ROW_AVX2)
I422ToRGB24Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)3129 void I422ToRGB24Row_AVX2(const uint8_t* src_y,
3130                          const uint8_t* src_u,
3131                          const uint8_t* src_v,
3132                          uint8_t* dst_rgb24,
3133                          const struct YuvConstants* yuvconstants,
3134                          int width) {
3135   // Row buffer for intermediate ARGB pixels.
3136   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3137   while (width > 0) {
3138     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3139     I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
3140 #if defined(HAS_ARGBTORGB24ROW_AVX2)
3141     ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
3142 #else
3143     ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
3144 #endif
3145     src_y += twidth;
3146     src_u += twidth / 2;
3147     src_v += twidth / 2;
3148     dst_rgb24 += twidth * 3;
3149     width -= twidth;
3150   }
3151 }
3152 #endif
3153 
3154 #if defined(HAS_NV12TORGB565ROW_AVX2)
NV12ToRGB565Row_AVX2(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)3155 void NV12ToRGB565Row_AVX2(const uint8_t* src_y,
3156                           const uint8_t* src_uv,
3157                           uint8_t* dst_rgb565,
3158                           const struct YuvConstants* yuvconstants,
3159                           int width) {
3160   // Row buffer for intermediate ARGB pixels.
3161   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3162   while (width > 0) {
3163     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3164     NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
3165 #if defined(HAS_ARGBTORGB565ROW_AVX2)
3166     ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
3167 #else
3168     ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
3169 #endif
3170     src_y += twidth;
3171     src_uv += twidth;
3172     dst_rgb565 += twidth * 2;
3173     width -= twidth;
3174   }
3175 }
3176 #endif
3177 
ScaleSumSamples_C(const float * src,float * dst,float scale,int width)3178 float ScaleSumSamples_C(const float* src, float* dst, float scale, int width) {
3179   float fsum = 0.f;
3180   int i;
3181 #if defined(__clang__)
3182 #pragma clang loop vectorize_width(4)
3183 #endif
3184   for (i = 0; i < width; ++i) {
3185     float v = *src++;
3186     fsum += v * v;
3187     *dst++ = v * scale;
3188   }
3189   return fsum;
3190 }
3191 
ScaleMaxSamples_C(const float * src,float * dst,float scale,int width)3192 float ScaleMaxSamples_C(const float* src, float* dst, float scale, int width) {
3193   float fmax = 0.f;
3194   int i;
3195   for (i = 0; i < width; ++i) {
3196     float v = *src++;
3197     float vs = v * scale;
3198     fmax = (v > fmax) ? v : fmax;
3199     *dst++ = vs;
3200   }
3201   return fmax;
3202 }
3203 
ScaleSamples_C(const float * src,float * dst,float scale,int width)3204 void ScaleSamples_C(const float* src, float* dst, float scale, int width) {
3205   int i;
3206   for (i = 0; i < width; ++i) {
3207     *dst++ = *src++ * scale;
3208   }
3209 }
3210 
GaussRow_C(const uint32_t * src,uint16_t * dst,int width)3211 void GaussRow_C(const uint32_t* src, uint16_t* dst, int width) {
3212   int i;
3213   for (i = 0; i < width; ++i) {
3214     *dst++ =
3215         (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4] + 128) >> 8;
3216     ++src;
3217   }
3218 }
3219 
3220 // filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
GaussCol_C(const uint16_t * src0,const uint16_t * src1,const uint16_t * src2,const uint16_t * src3,const uint16_t * src4,uint32_t * dst,int width)3221 void GaussCol_C(const uint16_t* src0,
3222                 const uint16_t* src1,
3223                 const uint16_t* src2,
3224                 const uint16_t* src3,
3225                 const uint16_t* src4,
3226                 uint32_t* dst,
3227                 int width) {
3228   int i;
3229   for (i = 0; i < width; ++i) {
3230     *dst++ = *src0++ + *src1++ * 4 + *src2++ * 6 + *src3++ * 4 + *src4++;
3231   }
3232 }
3233 
3234 #ifdef __cplusplus
3235 }  // extern "C"
3236 }  // namespace libyuv
3237 #endif
3238