1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/row.h"
12
13 #include <assert.h>
14 #include <stdio.h>
15 #include <string.h> // For memcpy and memset.
16
17 #include "libyuv/basic_types.h"
18 #include "libyuv/convert_argb.h" // For kYuvI601Constants
19
20 #ifdef __cplusplus
21 namespace libyuv {
22 extern "C" {
23 #endif
24
25 // This macro control YUV to RGB using unsigned math to extend range of
26 // YUV to RGB coefficients to 0 to 4 instead of 0 to 2 for more accuracy on B:
27 // LIBYUV_UNLIMITED_DATA
28
29 // The following macro from row_win makes the C code match the row_win code,
30 // which is 7 bit fixed point for ARGBToI420:
31 #if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \
32 !defined(__clang__) && (defined(_M_IX86) || defined(_M_X64))
33 #define LIBYUV_RGB7 1
34 #endif
35
36 #if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
37 defined(_M_IX86)
38 #define LIBYUV_ARGBTOUV_PAVGB 1
39 #define LIBYUV_RGBTOU_TRUNCATE 1
40 #endif
41
42 // llvm x86 is poor at ternary operator, so use branchless min/max.
43
44 #define USE_BRANCHLESS 1
45 #if USE_BRANCHLESS
clamp0(int32_t v)46 static __inline int32_t clamp0(int32_t v) {
47 return -(v >= 0) & v;
48 }
49 // TODO(fbarchard): make clamp255 preserve negative values.
clamp255(int32_t v)50 static __inline int32_t clamp255(int32_t v) {
51 return (-(v >= 255) | v) & 255;
52 }
53
clamp1023(int32_t v)54 static __inline int32_t clamp1023(int32_t v) {
55 return (-(v >= 1023) | v) & 1023;
56 }
57
58 // clamp to max
ClampMax(int32_t v,int32_t max)59 static __inline int32_t ClampMax(int32_t v, int32_t max) {
60 return (-(v >= max) | v) & max;
61 }
62
Abs(int32_t v)63 static __inline uint32_t Abs(int32_t v) {
64 int m = -(v < 0);
65 return (v + m) ^ m;
66 }
67 #else // USE_BRANCHLESS
68 static __inline int32_t clamp0(int32_t v) {
69 return (v < 0) ? 0 : v;
70 }
71
72 static __inline int32_t clamp255(int32_t v) {
73 return (v > 255) ? 255 : v;
74 }
75
76 static __inline int32_t clamp1023(int32_t v) {
77 return (v > 1023) ? 1023 : v;
78 }
79
80 static __inline int32_t ClampMax(int32_t v, int32_t max) {
81 return (v > max) ? max : v;
82 }
83
84 static __inline uint32_t Abs(int32_t v) {
85 return (v < 0) ? -v : v;
86 }
87 #endif // USE_BRANCHLESS
Clamp(int32_t val)88 static __inline uint32_t Clamp(int32_t val) {
89 int v = clamp0(val);
90 return (uint32_t)(clamp255(v));
91 }
92
Clamp10(int32_t val)93 static __inline uint32_t Clamp10(int32_t val) {
94 int v = clamp0(val);
95 return (uint32_t)(clamp1023(v));
96 }
97
98 // Little Endian
99 #if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
100 defined(_M_IX86) || defined(__arm__) || defined(_M_ARM) || \
101 (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
102 #define WRITEWORD(p, v) *(uint32_t*)(p) = v
103 #else
WRITEWORD(uint8_t * p,uint32_t v)104 static inline void WRITEWORD(uint8_t* p, uint32_t v) {
105 p[0] = (uint8_t)(v & 255);
106 p[1] = (uint8_t)((v >> 8) & 255);
107 p[2] = (uint8_t)((v >> 16) & 255);
108 p[3] = (uint8_t)((v >> 24) & 255);
109 }
110 #endif
111
RGB24ToARGBRow_C(const uint8_t * src_rgb24,uint8_t * dst_argb,int width)112 void RGB24ToARGBRow_C(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) {
113 int x;
114 for (x = 0; x < width; ++x) {
115 uint8_t b = src_rgb24[0];
116 uint8_t g = src_rgb24[1];
117 uint8_t r = src_rgb24[2];
118 dst_argb[0] = b;
119 dst_argb[1] = g;
120 dst_argb[2] = r;
121 dst_argb[3] = 255u;
122 dst_argb += 4;
123 src_rgb24 += 3;
124 }
125 }
126
RAWToARGBRow_C(const uint8_t * src_raw,uint8_t * dst_argb,int width)127 void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
128 int x;
129 for (x = 0; x < width; ++x) {
130 uint8_t r = src_raw[0];
131 uint8_t g = src_raw[1];
132 uint8_t b = src_raw[2];
133 dst_argb[0] = b;
134 dst_argb[1] = g;
135 dst_argb[2] = r;
136 dst_argb[3] = 255u;
137 dst_argb += 4;
138 src_raw += 3;
139 }
140 }
141
RAWToRGBARow_C(const uint8_t * src_raw,uint8_t * dst_rgba,int width)142 void RAWToRGBARow_C(const uint8_t* src_raw, uint8_t* dst_rgba, int width) {
143 int x;
144 for (x = 0; x < width; ++x) {
145 uint8_t r = src_raw[0];
146 uint8_t g = src_raw[1];
147 uint8_t b = src_raw[2];
148 dst_rgba[0] = 255u;
149 dst_rgba[1] = b;
150 dst_rgba[2] = g;
151 dst_rgba[3] = r;
152 dst_rgba += 4;
153 src_raw += 3;
154 }
155 }
156
RAWToRGB24Row_C(const uint8_t * src_raw,uint8_t * dst_rgb24,int width)157 void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
158 int x;
159 for (x = 0; x < width; ++x) {
160 uint8_t r = src_raw[0];
161 uint8_t g = src_raw[1];
162 uint8_t b = src_raw[2];
163 dst_rgb24[0] = b;
164 dst_rgb24[1] = g;
165 dst_rgb24[2] = r;
166 dst_rgb24 += 3;
167 src_raw += 3;
168 }
169 }
170
RGB565ToARGBRow_C(const uint8_t * src_rgb565,uint8_t * dst_argb,int width)171 void RGB565ToARGBRow_C(const uint8_t* src_rgb565,
172 uint8_t* dst_argb,
173 int width) {
174 int x;
175 for (x = 0; x < width; ++x) {
176 uint8_t b = src_rgb565[0] & 0x1f;
177 uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
178 uint8_t r = src_rgb565[1] >> 3;
179 dst_argb[0] = (b << 3) | (b >> 2);
180 dst_argb[1] = (g << 2) | (g >> 4);
181 dst_argb[2] = (r << 3) | (r >> 2);
182 dst_argb[3] = 255u;
183 dst_argb += 4;
184 src_rgb565 += 2;
185 }
186 }
187
ARGB1555ToARGBRow_C(const uint8_t * src_argb1555,uint8_t * dst_argb,int width)188 void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555,
189 uint8_t* dst_argb,
190 int width) {
191 int x;
192 for (x = 0; x < width; ++x) {
193 uint8_t b = src_argb1555[0] & 0x1f;
194 uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
195 uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
196 uint8_t a = src_argb1555[1] >> 7;
197 dst_argb[0] = (b << 3) | (b >> 2);
198 dst_argb[1] = (g << 3) | (g >> 2);
199 dst_argb[2] = (r << 3) | (r >> 2);
200 dst_argb[3] = -a;
201 dst_argb += 4;
202 src_argb1555 += 2;
203 }
204 }
205
ARGB4444ToARGBRow_C(const uint8_t * src_argb4444,uint8_t * dst_argb,int width)206 void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444,
207 uint8_t* dst_argb,
208 int width) {
209 int x;
210 for (x = 0; x < width; ++x) {
211 uint8_t b = src_argb4444[0] & 0x0f;
212 uint8_t g = src_argb4444[0] >> 4;
213 uint8_t r = src_argb4444[1] & 0x0f;
214 uint8_t a = src_argb4444[1] >> 4;
215 dst_argb[0] = (b << 4) | b;
216 dst_argb[1] = (g << 4) | g;
217 dst_argb[2] = (r << 4) | r;
218 dst_argb[3] = (a << 4) | a;
219 dst_argb += 4;
220 src_argb4444 += 2;
221 }
222 }
223
AR30ToARGBRow_C(const uint8_t * src_ar30,uint8_t * dst_argb,int width)224 void AR30ToARGBRow_C(const uint8_t* src_ar30, uint8_t* dst_argb, int width) {
225 int x;
226 for (x = 0; x < width; ++x) {
227 uint32_t ar30;
228 memcpy(&ar30, src_ar30, sizeof ar30);
229 uint32_t b = (ar30 >> 2) & 0xff;
230 uint32_t g = (ar30 >> 12) & 0xff;
231 uint32_t r = (ar30 >> 22) & 0xff;
232 uint32_t a = (ar30 >> 30) * 0x55; // Replicate 2 bits to 8 bits.
233 *(uint32_t*)(dst_argb) = b | (g << 8) | (r << 16) | (a << 24);
234 dst_argb += 4;
235 src_ar30 += 4;
236 }
237 }
238
AR30ToABGRRow_C(const uint8_t * src_ar30,uint8_t * dst_abgr,int width)239 void AR30ToABGRRow_C(const uint8_t* src_ar30, uint8_t* dst_abgr, int width) {
240 int x;
241 for (x = 0; x < width; ++x) {
242 uint32_t ar30;
243 memcpy(&ar30, src_ar30, sizeof ar30);
244 uint32_t b = (ar30 >> 2) & 0xff;
245 uint32_t g = (ar30 >> 12) & 0xff;
246 uint32_t r = (ar30 >> 22) & 0xff;
247 uint32_t a = (ar30 >> 30) * 0x55; // Replicate 2 bits to 8 bits.
248 *(uint32_t*)(dst_abgr) = r | (g << 8) | (b << 16) | (a << 24);
249 dst_abgr += 4;
250 src_ar30 += 4;
251 }
252 }
253
AR30ToAB30Row_C(const uint8_t * src_ar30,uint8_t * dst_ab30,int width)254 void AR30ToAB30Row_C(const uint8_t* src_ar30, uint8_t* dst_ab30, int width) {
255 int x;
256 for (x = 0; x < width; ++x) {
257 uint32_t ar30;
258 memcpy(&ar30, src_ar30, sizeof ar30);
259 uint32_t b = ar30 & 0x3ff;
260 uint32_t ga = ar30 & 0xc00ffc00;
261 uint32_t r = (ar30 >> 20) & 0x3ff;
262 *(uint32_t*)(dst_ab30) = r | ga | (b << 20);
263 dst_ab30 += 4;
264 src_ar30 += 4;
265 }
266 }
267
ARGBToRGB24Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)268 void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
269 int x;
270 for (x = 0; x < width; ++x) {
271 uint8_t b = src_argb[0];
272 uint8_t g = src_argb[1];
273 uint8_t r = src_argb[2];
274 dst_rgb[0] = b;
275 dst_rgb[1] = g;
276 dst_rgb[2] = r;
277 dst_rgb += 3;
278 src_argb += 4;
279 }
280 }
281
ARGBToRAWRow_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)282 void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
283 int x;
284 for (x = 0; x < width; ++x) {
285 uint8_t b = src_argb[0];
286 uint8_t g = src_argb[1];
287 uint8_t r = src_argb[2];
288 dst_rgb[0] = r;
289 dst_rgb[1] = g;
290 dst_rgb[2] = b;
291 dst_rgb += 3;
292 src_argb += 4;
293 }
294 }
295
ARGBToRGB565Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)296 void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
297 int x;
298 for (x = 0; x < width - 1; x += 2) {
299 uint8_t b0 = src_argb[0] >> 3;
300 uint8_t g0 = src_argb[1] >> 2;
301 uint8_t r0 = src_argb[2] >> 3;
302 uint8_t b1 = src_argb[4] >> 3;
303 uint8_t g1 = src_argb[5] >> 2;
304 uint8_t r1 = src_argb[6] >> 3;
305 WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
306 (r1 << 27));
307 dst_rgb += 4;
308 src_argb += 8;
309 }
310 if (width & 1) {
311 uint8_t b0 = src_argb[0] >> 3;
312 uint8_t g0 = src_argb[1] >> 2;
313 uint8_t r0 = src_argb[2] >> 3;
314 *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
315 }
316 }
317
318 // dither4 is a row of 4 values from 4x4 dither matrix.
319 // The 4x4 matrix contains values to increase RGB. When converting to
320 // fewer bits (565) this provides an ordered dither.
321 // The order in the 4x4 matrix in first byte is upper left.
322 // The 4 values are passed as an int, then referenced as an array, so
323 // endian will not affect order of the original matrix. But the dither4
324 // will containing the first pixel in the lower byte for little endian
325 // or the upper byte for big endian.
ARGBToRGB565DitherRow_C(const uint8_t * src_argb,uint8_t * dst_rgb,const uint32_t dither4,int width)326 void ARGBToRGB565DitherRow_C(const uint8_t* src_argb,
327 uint8_t* dst_rgb,
328 const uint32_t dither4,
329 int width) {
330 int x;
331 for (x = 0; x < width - 1; x += 2) {
332 int dither0 = ((const unsigned char*)(&dither4))[x & 3];
333 int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3];
334 uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3;
335 uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2;
336 uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3;
337 uint8_t b1 = clamp255(src_argb[4] + dither1) >> 3;
338 uint8_t g1 = clamp255(src_argb[5] + dither1) >> 2;
339 uint8_t r1 = clamp255(src_argb[6] + dither1) >> 3;
340 WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
341 (r1 << 27));
342 dst_rgb += 4;
343 src_argb += 8;
344 }
345 if (width & 1) {
346 int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3];
347 uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3;
348 uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2;
349 uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3;
350 *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
351 }
352 }
353
ARGBToARGB1555Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)354 void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
355 int x;
356 for (x = 0; x < width - 1; x += 2) {
357 uint8_t b0 = src_argb[0] >> 3;
358 uint8_t g0 = src_argb[1] >> 3;
359 uint8_t r0 = src_argb[2] >> 3;
360 uint8_t a0 = src_argb[3] >> 7;
361 uint8_t b1 = src_argb[4] >> 3;
362 uint8_t g1 = src_argb[5] >> 3;
363 uint8_t r1 = src_argb[6] >> 3;
364 uint8_t a1 = src_argb[7] >> 7;
365 *(uint32_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) |
366 (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31);
367 dst_rgb += 4;
368 src_argb += 8;
369 }
370 if (width & 1) {
371 uint8_t b0 = src_argb[0] >> 3;
372 uint8_t g0 = src_argb[1] >> 3;
373 uint8_t r0 = src_argb[2] >> 3;
374 uint8_t a0 = src_argb[3] >> 7;
375 *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
376 }
377 }
378
ARGBToARGB4444Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)379 void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
380 int x;
381 for (x = 0; x < width - 1; x += 2) {
382 uint8_t b0 = src_argb[0] >> 4;
383 uint8_t g0 = src_argb[1] >> 4;
384 uint8_t r0 = src_argb[2] >> 4;
385 uint8_t a0 = src_argb[3] >> 4;
386 uint8_t b1 = src_argb[4] >> 4;
387 uint8_t g1 = src_argb[5] >> 4;
388 uint8_t r1 = src_argb[6] >> 4;
389 uint8_t a1 = src_argb[7] >> 4;
390 *(uint32_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) |
391 (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28);
392 dst_rgb += 4;
393 src_argb += 8;
394 }
395 if (width & 1) {
396 uint8_t b0 = src_argb[0] >> 4;
397 uint8_t g0 = src_argb[1] >> 4;
398 uint8_t r0 = src_argb[2] >> 4;
399 uint8_t a0 = src_argb[3] >> 4;
400 *(uint16_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
401 }
402 }
403
ABGRToAR30Row_C(const uint8_t * src_abgr,uint8_t * dst_ar30,int width)404 void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width) {
405 int x;
406 for (x = 0; x < width; ++x) {
407 uint32_t b0 = (src_abgr[0] >> 6) | ((uint32_t)(src_abgr[0]) << 2);
408 uint32_t g0 = (src_abgr[1] >> 6) | ((uint32_t)(src_abgr[1]) << 2);
409 uint32_t r0 = (src_abgr[2] >> 6) | ((uint32_t)(src_abgr[2]) << 2);
410 uint32_t a0 = (src_abgr[3] >> 6);
411 *(uint32_t*)(dst_ar30) = r0 | (g0 << 10) | (b0 << 20) | (a0 << 30);
412 dst_ar30 += 4;
413 src_abgr += 4;
414 }
415 }
416
ARGBToAR30Row_C(const uint8_t * src_argb,uint8_t * dst_ar30,int width)417 void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) {
418 int x;
419 for (x = 0; x < width; ++x) {
420 uint32_t b0 = (src_argb[0] >> 6) | ((uint32_t)(src_argb[0]) << 2);
421 uint32_t g0 = (src_argb[1] >> 6) | ((uint32_t)(src_argb[1]) << 2);
422 uint32_t r0 = (src_argb[2] >> 6) | ((uint32_t)(src_argb[2]) << 2);
423 uint32_t a0 = (src_argb[3] >> 6);
424 *(uint32_t*)(dst_ar30) = b0 | (g0 << 10) | (r0 << 20) | (a0 << 30);
425 dst_ar30 += 4;
426 src_argb += 4;
427 }
428 }
429
ARGBToAR64Row_C(const uint8_t * src_argb,uint16_t * dst_ar64,int width)430 void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width) {
431 int x;
432 for (x = 0; x < width; ++x) {
433 dst_ar64[0] = src_argb[0] * 0x0101;
434 dst_ar64[1] = src_argb[1] * 0x0101;
435 dst_ar64[2] = src_argb[2] * 0x0101;
436 dst_ar64[3] = src_argb[3] * 0x0101;
437 dst_ar64 += 4;
438 src_argb += 4;
439 }
440 }
441
ARGBToAB64Row_C(const uint8_t * src_argb,uint16_t * dst_ab64,int width)442 void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width) {
443 int x;
444 for (x = 0; x < width; ++x) {
445 dst_ab64[0] = src_argb[2] * 0x0101;
446 dst_ab64[1] = src_argb[1] * 0x0101;
447 dst_ab64[2] = src_argb[0] * 0x0101;
448 dst_ab64[3] = src_argb[3] * 0x0101;
449 dst_ab64 += 4;
450 src_argb += 4;
451 }
452 }
453
AR64ToARGBRow_C(const uint16_t * src_ar64,uint8_t * dst_argb,int width)454 void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width) {
455 int x;
456 for (x = 0; x < width; ++x) {
457 dst_argb[0] = src_ar64[0] >> 8;
458 dst_argb[1] = src_ar64[1] >> 8;
459 dst_argb[2] = src_ar64[2] >> 8;
460 dst_argb[3] = src_ar64[3] >> 8;
461 dst_argb += 4;
462 src_ar64 += 4;
463 }
464 }
465
AB64ToARGBRow_C(const uint16_t * src_ab64,uint8_t * dst_argb,int width)466 void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width) {
467 int x;
468 for (x = 0; x < width; ++x) {
469 dst_argb[0] = src_ab64[2] >> 8;
470 dst_argb[1] = src_ab64[1] >> 8;
471 dst_argb[2] = src_ab64[0] >> 8;
472 dst_argb[3] = src_ab64[3] >> 8;
473 dst_argb += 4;
474 src_ab64 += 4;
475 }
476 }
477
478 // TODO(fbarchard): Make shuffle compatible with SIMD versions
AR64ShuffleRow_C(const uint8_t * src_ar64,uint8_t * dst_ar64,const uint8_t * shuffler,int width)479 void AR64ShuffleRow_C(const uint8_t* src_ar64,
480 uint8_t* dst_ar64,
481 const uint8_t* shuffler,
482 int width) {
483 const uint16_t* src_ar64_16 = (const uint16_t*)src_ar64;
484 uint16_t* dst_ar64_16 = (uint16_t*)dst_ar64;
485 int index0 = shuffler[0] / 2;
486 int index1 = shuffler[2] / 2;
487 int index2 = shuffler[4] / 2;
488 int index3 = shuffler[6] / 2;
489 // Shuffle a row of AR64.
490 int x;
491 for (x = 0; x < width / 2; ++x) {
492 // To support in-place conversion.
493 uint16_t b = src_ar64_16[index0];
494 uint16_t g = src_ar64_16[index1];
495 uint16_t r = src_ar64_16[index2];
496 uint16_t a = src_ar64_16[index3];
497 dst_ar64_16[0] = b;
498 dst_ar64_16[1] = g;
499 dst_ar64_16[2] = r;
500 dst_ar64_16[3] = a;
501 src_ar64_16 += 4;
502 dst_ar64_16 += 4;
503 }
504 }
505
506 #ifdef LIBYUV_RGB7
507 // Old 7 bit math for compatibility on unsupported platforms.
RGBToY(uint8_t r,uint8_t g,uint8_t b)508 static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
509 return ((33 * r + 65 * g + 13 * b) >> 7) + 16;
510 }
511 #else
512 // 8 bit
513 // Intel SSE/AVX uses the following equivalent formula
514 // 0x7e80 = (66 + 129 + 25) * -128 + 0x1000 (for +16) and 0x0080 for round.
515 // return (66 * ((int)r - 128) + 129 * ((int)g - 128) + 25 * ((int)b - 128) +
516 // 0x7e80) >> 8;
517
RGBToY(uint8_t r,uint8_t g,uint8_t b)518 static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
519 return (66 * r + 129 * g + 25 * b + 0x1080) >> 8;
520 }
521 #endif
522
523 #define AVGB(a, b) (((a) + (b) + 1) >> 1)
524
525 #ifdef LIBYUV_RGBTOU_TRUNCATE
RGBToU(uint8_t r,uint8_t g,uint8_t b)526 static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) {
527 return (112 * b - 74 * g - 38 * r + 0x8000) >> 8;
528 }
RGBToV(uint8_t r,uint8_t g,uint8_t b)529 static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
530 return (112 * r - 94 * g - 18 * b + 0x8000) >> 8;
531 }
532 #else
533 // TODO(fbarchard): Add rounding to SIMD and use this
RGBToU(uint8_t r,uint8_t g,uint8_t b)534 static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) {
535 return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
536 }
RGBToV(uint8_t r,uint8_t g,uint8_t b)537 static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
538 return (112 * r - 94 * g - 18 * b + 0x8080) >> 8;
539 }
540 #endif
541
542 #if !defined(LIBYUV_ARGBTOUV_PAVGB)
RGB2xToU(uint16_t r,uint16_t g,uint16_t b)543 static __inline int RGB2xToU(uint16_t r, uint16_t g, uint16_t b) {
544 return ((112 / 2) * b - (74 / 2) * g - (38 / 2) * r + 0x8080) >> 8;
545 }
RGB2xToV(uint16_t r,uint16_t g,uint16_t b)546 static __inline int RGB2xToV(uint16_t r, uint16_t g, uint16_t b) {
547 return ((112 / 2) * r - (94 / 2) * g - (18 / 2) * b + 0x8080) >> 8;
548 }
549 #endif
550
551 // ARGBToY_C and ARGBToUV_C
552 // Intel version mimic SSE/AVX which does 2 pavgb
553 #if LIBYUV_ARGBTOUV_PAVGB
554
555 #define MAKEROWY(NAME, R, G, B, BPP) \
556 void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
557 int x; \
558 for (x = 0; x < width; ++x) { \
559 dst_y[0] = RGBToY(src_rgb[R], src_rgb[G], src_rgb[B]); \
560 src_rgb += BPP; \
561 dst_y += 1; \
562 } \
563 } \
564 void NAME##ToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
565 uint8_t* dst_u, uint8_t* dst_v, int width) { \
566 const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
567 int x; \
568 for (x = 0; x < width - 1; x += 2) { \
569 uint8_t ab = AVGB(AVGB(src_rgb[B], src_rgb1[B]), \
570 AVGB(src_rgb[B + BPP], src_rgb1[B + BPP])); \
571 uint8_t ag = AVGB(AVGB(src_rgb[G], src_rgb1[G]), \
572 AVGB(src_rgb[G + BPP], src_rgb1[G + BPP])); \
573 uint8_t ar = AVGB(AVGB(src_rgb[R], src_rgb1[R]), \
574 AVGB(src_rgb[R + BPP], src_rgb1[R + BPP])); \
575 dst_u[0] = RGBToU(ar, ag, ab); \
576 dst_v[0] = RGBToV(ar, ag, ab); \
577 src_rgb += BPP * 2; \
578 src_rgb1 += BPP * 2; \
579 dst_u += 1; \
580 dst_v += 1; \
581 } \
582 if (width & 1) { \
583 uint8_t ab = AVGB(src_rgb[B], src_rgb1[B]); \
584 uint8_t ag = AVGB(src_rgb[G], src_rgb1[G]); \
585 uint8_t ar = AVGB(src_rgb[R], src_rgb1[R]); \
586 dst_u[0] = RGBToU(ar, ag, ab); \
587 dst_v[0] = RGBToV(ar, ag, ab); \
588 } \
589 }
590 #else
591 // ARM version does sum / 2 then multiply by 2x smaller coefficients
592 #define MAKEROWY(NAME, R, G, B, BPP) \
593 void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
594 int x; \
595 for (x = 0; x < width; ++x) { \
596 dst_y[0] = RGBToY(src_rgb[R], src_rgb[G], src_rgb[B]); \
597 src_rgb += BPP; \
598 dst_y += 1; \
599 } \
600 } \
601 void NAME##ToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
602 uint8_t* dst_u, uint8_t* dst_v, int width) { \
603 const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
604 int x; \
605 for (x = 0; x < width - 1; x += 2) { \
606 uint16_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] + \
607 src_rgb1[B + BPP] + 1) >> \
608 1; \
609 uint16_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] + \
610 src_rgb1[G + BPP] + 1) >> \
611 1; \
612 uint16_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] + \
613 src_rgb1[R + BPP] + 1) >> \
614 1; \
615 dst_u[0] = RGB2xToU(ar, ag, ab); \
616 dst_v[0] = RGB2xToV(ar, ag, ab); \
617 src_rgb += BPP * 2; \
618 src_rgb1 += BPP * 2; \
619 dst_u += 1; \
620 dst_v += 1; \
621 } \
622 if (width & 1) { \
623 uint16_t ab = src_rgb[B] + src_rgb1[B]; \
624 uint16_t ag = src_rgb[G] + src_rgb1[G]; \
625 uint16_t ar = src_rgb[R] + src_rgb1[R]; \
626 dst_u[0] = RGB2xToU(ar, ag, ab); \
627 dst_v[0] = RGB2xToV(ar, ag, ab); \
628 } \
629 }
630 #endif
631
632 MAKEROWY(ARGB, 2, 1, 0, 4)
633 MAKEROWY(BGRA, 1, 2, 3, 4)
634 MAKEROWY(ABGR, 0, 1, 2, 4)
635 MAKEROWY(RGBA, 3, 2, 1, 4)
636 MAKEROWY(RGB24, 2, 1, 0, 3)
637 MAKEROWY(RAW, 0, 1, 2, 3)
638 #undef MAKEROWY
639
640 // JPeg uses a variation on BT.601-1 full range
641 // y = 0.29900 * r + 0.58700 * g + 0.11400 * b
642 // u = -0.16874 * r - 0.33126 * g + 0.50000 * b + center
643 // v = 0.50000 * r - 0.41869 * g - 0.08131 * b + center
644 // BT.601 Mpeg range uses:
645 // b 0.1016 * 255 = 25.908 = 25
646 // g 0.5078 * 255 = 129.489 = 129
647 // r 0.2578 * 255 = 65.739 = 66
648 // JPeg 7 bit Y (deprecated)
649 // b 0.11400 * 128 = 14.592 = 15
650 // g 0.58700 * 128 = 75.136 = 75
651 // r 0.29900 * 128 = 38.272 = 38
652 // JPeg 8 bit Y:
653 // b 0.11400 * 256 = 29.184 = 29
654 // g 0.58700 * 256 = 150.272 = 150
655 // r 0.29900 * 256 = 76.544 = 77
656 // JPeg 8 bit U:
657 // b 0.50000 * 255 = 127.5 = 127
658 // g -0.33126 * 255 = -84.4713 = -84
659 // r -0.16874 * 255 = -43.0287 = -43
660 // JPeg 8 bit V:
661 // b -0.08131 * 255 = -20.73405 = -20
662 // g -0.41869 * 255 = -106.76595 = -107
663 // r 0.50000 * 255 = 127.5 = 127
664
665 #ifdef LIBYUV_RGB7
666 // Old 7 bit math for compatibility on unsupported platforms.
RGBToYJ(uint8_t r,uint8_t g,uint8_t b)667 static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
668 return (38 * r + 75 * g + 15 * b + 64) >> 7;
669 }
670 #else
671 // 8 bit
672 static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
673 return (77 * r + 150 * g + 29 * b + 128) >> 8;
674 }
675 #endif
676
677 #if defined(LIBYUV_ARGBTOUV_PAVGB)
RGBToUJ(uint8_t r,uint8_t g,uint8_t b)678 static __inline int RGBToUJ(uint8_t r, uint8_t g, uint8_t b) {
679 return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
680 }
RGBToVJ(uint8_t r,uint8_t g,uint8_t b)681 static __inline int RGBToVJ(uint8_t r, uint8_t g, uint8_t b) {
682 return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
683 }
684 #else
RGB2xToUJ(uint16_t r,uint16_t g,uint16_t b)685 static __inline int RGB2xToUJ(uint16_t r, uint16_t g, uint16_t b) {
686 return ((127 / 2) * b - (84 / 2) * g - (43 / 2) * r + 0x8080) >> 8;
687 }
RGB2xToVJ(uint16_t r,uint16_t g,uint16_t b)688 static __inline int RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) {
689 return ((127 / 2) * r - (107 / 2) * g - (20 / 2) * b + 0x8080) >> 8;
690 }
691 #endif
692
693 // ARGBToYJ_C and ARGBToUVJ_C
694 // Intel version mimic SSE/AVX which does 2 pavgb
695 #if LIBYUV_ARGBTOUV_PAVGB
696 #define MAKEROWYJ(NAME, R, G, B, BPP) \
697 void NAME##ToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
698 int x; \
699 for (x = 0; x < width; ++x) { \
700 dst_y[0] = RGBToYJ(src_rgb[R], src_rgb[G], src_rgb[B]); \
701 src_rgb += BPP; \
702 dst_y += 1; \
703 } \
704 } \
705 void NAME##ToUVJRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
706 uint8_t* dst_u, uint8_t* dst_v, int width) { \
707 const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
708 int x; \
709 for (x = 0; x < width - 1; x += 2) { \
710 uint8_t ab = AVGB(AVGB(src_rgb[B], src_rgb1[B]), \
711 AVGB(src_rgb[B + BPP], src_rgb1[B + BPP])); \
712 uint8_t ag = AVGB(AVGB(src_rgb[G], src_rgb1[G]), \
713 AVGB(src_rgb[G + BPP], src_rgb1[G + BPP])); \
714 uint8_t ar = AVGB(AVGB(src_rgb[R], src_rgb1[R]), \
715 AVGB(src_rgb[R + BPP], src_rgb1[R + BPP])); \
716 dst_u[0] = RGBToUJ(ar, ag, ab); \
717 dst_v[0] = RGBToVJ(ar, ag, ab); \
718 src_rgb += BPP * 2; \
719 src_rgb1 += BPP * 2; \
720 dst_u += 1; \
721 dst_v += 1; \
722 } \
723 if (width & 1) { \
724 uint8_t ab = AVGB(src_rgb[B], src_rgb1[B]); \
725 uint8_t ag = AVGB(src_rgb[G], src_rgb1[G]); \
726 uint8_t ar = AVGB(src_rgb[R], src_rgb1[R]); \
727 dst_u[0] = RGBToUJ(ar, ag, ab); \
728 dst_v[0] = RGBToVJ(ar, ag, ab); \
729 } \
730 }
731 #else
732 // ARM version does sum / 2 then multiply by 2x smaller coefficients
733 #define MAKEROWYJ(NAME, R, G, B, BPP) \
734 void NAME##ToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
735 int x; \
736 for (x = 0; x < width; ++x) { \
737 dst_y[0] = RGBToYJ(src_rgb[R], src_rgb[G], src_rgb[B]); \
738 src_rgb += BPP; \
739 dst_y += 1; \
740 } \
741 } \
742 void NAME##ToUVJRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
743 uint8_t* dst_u, uint8_t* dst_v, int width) { \
744 const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
745 int x; \
746 for (x = 0; x < width - 1; x += 2) { \
747 uint16_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] + \
748 src_rgb1[B + BPP] + 1) >> \
749 1; \
750 uint16_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] + \
751 src_rgb1[G + BPP] + 1) >> \
752 1; \
753 uint16_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] + \
754 src_rgb1[R + BPP] + 1) >> \
755 1; \
756 dst_u[0] = RGB2xToUJ(ar, ag, ab); \
757 dst_v[0] = RGB2xToVJ(ar, ag, ab); \
758 src_rgb += BPP * 2; \
759 src_rgb1 += BPP * 2; \
760 dst_u += 1; \
761 dst_v += 1; \
762 } \
763 if (width & 1) { \
764 uint16_t ab = (src_rgb[B] + src_rgb1[B]); \
765 uint16_t ag = (src_rgb[G] + src_rgb1[G]); \
766 uint16_t ar = (src_rgb[R] + src_rgb1[R]); \
767 dst_u[0] = RGB2xToUJ(ar, ag, ab); \
768 dst_v[0] = RGB2xToVJ(ar, ag, ab); \
769 } \
770 }
771
772 #endif
773
774 MAKEROWYJ(ARGB, 2, 1, 0, 4)
775 MAKEROWYJ(RGBA, 3, 2, 1, 4)
776 MAKEROWYJ(RGB24, 2, 1, 0, 3)
777 MAKEROWYJ(RAW, 0, 1, 2, 3)
778 #undef MAKEROWYJ
779
RGB565ToYRow_C(const uint8_t * src_rgb565,uint8_t * dst_y,int width)780 void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
781 int x;
782 for (x = 0; x < width; ++x) {
783 uint8_t b = src_rgb565[0] & 0x1f;
784 uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
785 uint8_t r = src_rgb565[1] >> 3;
786 b = (b << 3) | (b >> 2);
787 g = (g << 2) | (g >> 4);
788 r = (r << 3) | (r >> 2);
789 dst_y[0] = RGBToY(r, g, b);
790 src_rgb565 += 2;
791 dst_y += 1;
792 }
793 }
794
ARGB1555ToYRow_C(const uint8_t * src_argb1555,uint8_t * dst_y,int width)795 void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width) {
796 int x;
797 for (x = 0; x < width; ++x) {
798 uint8_t b = src_argb1555[0] & 0x1f;
799 uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
800 uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
801 b = (b << 3) | (b >> 2);
802 g = (g << 3) | (g >> 2);
803 r = (r << 3) | (r >> 2);
804 dst_y[0] = RGBToY(r, g, b);
805 src_argb1555 += 2;
806 dst_y += 1;
807 }
808 }
809
ARGB4444ToYRow_C(const uint8_t * src_argb4444,uint8_t * dst_y,int width)810 void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width) {
811 int x;
812 for (x = 0; x < width; ++x) {
813 uint8_t b = src_argb4444[0] & 0x0f;
814 uint8_t g = src_argb4444[0] >> 4;
815 uint8_t r = src_argb4444[1] & 0x0f;
816 b = (b << 4) | b;
817 g = (g << 4) | g;
818 r = (r << 4) | r;
819 dst_y[0] = RGBToY(r, g, b);
820 src_argb4444 += 2;
821 dst_y += 1;
822 }
823 }
824
RGB565ToUVRow_C(const uint8_t * src_rgb565,int src_stride_rgb565,uint8_t * dst_u,uint8_t * dst_v,int width)825 void RGB565ToUVRow_C(const uint8_t* src_rgb565,
826 int src_stride_rgb565,
827 uint8_t* dst_u,
828 uint8_t* dst_v,
829 int width) {
830 const uint8_t* next_rgb565 = src_rgb565 + src_stride_rgb565;
831 int x;
832 for (x = 0; x < width - 1; x += 2) {
833 uint8_t b0 = src_rgb565[0] & 0x1f;
834 uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
835 uint8_t r0 = src_rgb565[1] >> 3;
836 uint8_t b1 = src_rgb565[2] & 0x1f;
837 uint8_t g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3);
838 uint8_t r1 = src_rgb565[3] >> 3;
839 uint8_t b2 = next_rgb565[0] & 0x1f;
840 uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
841 uint8_t r2 = next_rgb565[1] >> 3;
842 uint8_t b3 = next_rgb565[2] & 0x1f;
843 uint8_t g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
844 uint8_t r3 = next_rgb565[3] >> 3;
845
846 b0 = (b0 << 3) | (b0 >> 2);
847 g0 = (g0 << 2) | (g0 >> 4);
848 r0 = (r0 << 3) | (r0 >> 2);
849 b1 = (b1 << 3) | (b1 >> 2);
850 g1 = (g1 << 2) | (g1 >> 4);
851 r1 = (r1 << 3) | (r1 >> 2);
852 b2 = (b2 << 3) | (b2 >> 2);
853 g2 = (g2 << 2) | (g2 >> 4);
854 r2 = (r2 << 3) | (r2 >> 2);
855 b3 = (b3 << 3) | (b3 >> 2);
856 g3 = (g3 << 2) | (g3 >> 4);
857 r3 = (r3 << 3) | (r3 >> 2);
858
859 #if LIBYUV_ARGBTOUV_PAVGB
860 uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
861 uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
862 uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
863 dst_u[0] = RGBToU(ar, ag, ab);
864 dst_v[0] = RGBToV(ar, ag, ab);
865 #else
866 uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
867 uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
868 uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
869 dst_u[0] = RGB2xToU(r, g, b);
870 dst_v[0] = RGB2xToV(r, g, b);
871 #endif
872
873 src_rgb565 += 4;
874 next_rgb565 += 4;
875 dst_u += 1;
876 dst_v += 1;
877 }
878 if (width & 1) {
879 uint8_t b0 = src_rgb565[0] & 0x1f;
880 uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
881 uint8_t r0 = src_rgb565[1] >> 3;
882 uint8_t b2 = next_rgb565[0] & 0x1f;
883 uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
884 uint8_t r2 = next_rgb565[1] >> 3;
885
886 b0 = (b0 << 3) | (b0 >> 2);
887 g0 = (g0 << 2) | (g0 >> 4);
888 r0 = (r0 << 3) | (r0 >> 2);
889 b2 = (b2 << 3) | (b2 >> 2);
890 g2 = (g2 << 2) | (g2 >> 4);
891 r2 = (r2 << 3) | (r2 >> 2);
892
893 #if LIBYUV_ARGBTOUV_PAVGB
894 uint8_t ab = AVGB(b0, b2);
895 uint8_t ag = AVGB(g0, g2);
896 uint8_t ar = AVGB(r0, r2);
897 dst_u[0] = RGBToU(ar, ag, ab);
898 dst_v[0] = RGBToV(ar, ag, ab);
899 #else
900 uint16_t b = b0 + b2;
901 uint16_t g = g0 + g2;
902 uint16_t r = r0 + r2;
903 dst_u[0] = RGB2xToU(r, g, b);
904 dst_v[0] = RGB2xToV(r, g, b);
905 #endif
906 }
907 }
908
ARGB1555ToUVRow_C(const uint8_t * src_argb1555,int src_stride_argb1555,uint8_t * dst_u,uint8_t * dst_v,int width)909 void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
910 int src_stride_argb1555,
911 uint8_t* dst_u,
912 uint8_t* dst_v,
913 int width) {
914 const uint8_t* next_argb1555 = src_argb1555 + src_stride_argb1555;
915 int x;
916 for (x = 0; x < width - 1; x += 2) {
917 uint8_t b0 = src_argb1555[0] & 0x1f;
918 uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
919 uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2;
920 uint8_t b1 = src_argb1555[2] & 0x1f;
921 uint8_t g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
922 uint8_t r1 = (src_argb1555[3] & 0x7c) >> 2;
923 uint8_t b2 = next_argb1555[0] & 0x1f;
924 uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
925 uint8_t r2 = (next_argb1555[1] & 0x7c) >> 2;
926 uint8_t b3 = next_argb1555[2] & 0x1f;
927 uint8_t g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
928 uint8_t r3 = (next_argb1555[3] & 0x7c) >> 2;
929
930 b0 = (b0 << 3) | (b0 >> 2);
931 g0 = (g0 << 3) | (g0 >> 2);
932 r0 = (r0 << 3) | (r0 >> 2);
933 b1 = (b1 << 3) | (b1 >> 2);
934 g1 = (g1 << 3) | (g1 >> 2);
935 r1 = (r1 << 3) | (r1 >> 2);
936 b2 = (b2 << 3) | (b2 >> 2);
937 g2 = (g2 << 3) | (g2 >> 2);
938 r2 = (r2 << 3) | (r2 >> 2);
939 b3 = (b3 << 3) | (b3 >> 2);
940 g3 = (g3 << 3) | (g3 >> 2);
941 r3 = (r3 << 3) | (r3 >> 2);
942
943 #if LIBYUV_ARGBTOUV_PAVGB
944 uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
945 uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
946 uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
947 dst_u[0] = RGBToU(ar, ag, ab);
948 dst_v[0] = RGBToV(ar, ag, ab);
949 #else
950 uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
951 uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
952 uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
953 dst_u[0] = RGB2xToU(r, g, b);
954 dst_v[0] = RGB2xToV(r, g, b);
955 #endif
956
957 src_argb1555 += 4;
958 next_argb1555 += 4;
959 dst_u += 1;
960 dst_v += 1;
961 }
962 if (width & 1) {
963 uint8_t b0 = src_argb1555[0] & 0x1f;
964 uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
965 uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2;
966 uint8_t b2 = next_argb1555[0] & 0x1f;
967 uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
968 uint8_t r2 = (next_argb1555[1] & 0x7c) >> 2;
969
970 b0 = (b0 << 3) | (b0 >> 2);
971 g0 = (g0 << 3) | (g0 >> 2);
972 r0 = (r0 << 3) | (r0 >> 2);
973 b2 = (b2 << 3) | (b2 >> 2);
974 g2 = (g2 << 3) | (g2 >> 2);
975 r2 = (r2 << 3) | (r2 >> 2);
976
977 #if LIBYUV_ARGBTOUV_PAVGB
978 uint8_t ab = AVGB(b0, b2);
979 uint8_t ag = AVGB(g0, g2);
980 uint8_t ar = AVGB(r0, r2);
981 dst_u[0] = RGBToU(ar, ag, ab);
982 dst_v[0] = RGBToV(ar, ag, ab);
983 #else
984 uint16_t b = b0 + b2;
985 uint16_t g = g0 + g2;
986 uint16_t r = r0 + r2;
987 dst_u[0] = RGB2xToU(r, g, b);
988 dst_v[0] = RGB2xToV(r, g, b);
989 #endif
990 }
991 }
992
ARGB4444ToUVRow_C(const uint8_t * src_argb4444,int src_stride_argb4444,uint8_t * dst_u,uint8_t * dst_v,int width)993 void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
994 int src_stride_argb4444,
995 uint8_t* dst_u,
996 uint8_t* dst_v,
997 int width) {
998 const uint8_t* next_argb4444 = src_argb4444 + src_stride_argb4444;
999 int x;
1000 for (x = 0; x < width - 1; x += 2) {
1001 uint8_t b0 = src_argb4444[0] & 0x0f;
1002 uint8_t g0 = src_argb4444[0] >> 4;
1003 uint8_t r0 = src_argb4444[1] & 0x0f;
1004 uint8_t b1 = src_argb4444[2] & 0x0f;
1005 uint8_t g1 = src_argb4444[2] >> 4;
1006 uint8_t r1 = src_argb4444[3] & 0x0f;
1007 uint8_t b2 = next_argb4444[0] & 0x0f;
1008 uint8_t g2 = next_argb4444[0] >> 4;
1009 uint8_t r2 = next_argb4444[1] & 0x0f;
1010 uint8_t b3 = next_argb4444[2] & 0x0f;
1011 uint8_t g3 = next_argb4444[2] >> 4;
1012 uint8_t r3 = next_argb4444[3] & 0x0f;
1013
1014 b0 = (b0 << 4) | b0;
1015 g0 = (g0 << 4) | g0;
1016 r0 = (r0 << 4) | r0;
1017 b1 = (b1 << 4) | b1;
1018 g1 = (g1 << 4) | g1;
1019 r1 = (r1 << 4) | r1;
1020 b2 = (b2 << 4) | b2;
1021 g2 = (g2 << 4) | g2;
1022 r2 = (r2 << 4) | r2;
1023 b3 = (b3 << 4) | b3;
1024 g3 = (g3 << 4) | g3;
1025 r3 = (r3 << 4) | r3;
1026
1027 #if LIBYUV_ARGBTOUV_PAVGB
1028 uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
1029 uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
1030 uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
1031 dst_u[0] = RGBToU(ar, ag, ab);
1032 dst_v[0] = RGBToV(ar, ag, ab);
1033 #else
1034 uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
1035 uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
1036 uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
1037 dst_u[0] = RGB2xToU(r, g, b);
1038 dst_v[0] = RGB2xToV(r, g, b);
1039 #endif
1040
1041 src_argb4444 += 4;
1042 next_argb4444 += 4;
1043 dst_u += 1;
1044 dst_v += 1;
1045 }
1046 if (width & 1) {
1047 uint8_t b0 = src_argb4444[0] & 0x0f;
1048 uint8_t g0 = src_argb4444[0] >> 4;
1049 uint8_t r0 = src_argb4444[1] & 0x0f;
1050 uint8_t b2 = next_argb4444[0] & 0x0f;
1051 uint8_t g2 = next_argb4444[0] >> 4;
1052 uint8_t r2 = next_argb4444[1] & 0x0f;
1053
1054 b0 = (b0 << 4) | b0;
1055 g0 = (g0 << 4) | g0;
1056 r0 = (r0 << 4) | r0;
1057 b2 = (b2 << 4) | b2;
1058 g2 = (g2 << 4) | g2;
1059 r2 = (r2 << 4) | r2;
1060
1061 #if LIBYUV_ARGBTOUV_PAVGB
1062 uint8_t ab = AVGB(b0, b2);
1063 uint8_t ag = AVGB(g0, g2);
1064 uint8_t ar = AVGB(r0, r2);
1065 dst_u[0] = RGBToU(ar, ag, ab);
1066 dst_v[0] = RGBToV(ar, ag, ab);
1067 #else
1068 uint16_t b = b0 + b2;
1069 uint16_t g = g0 + g2;
1070 uint16_t r = r0 + r2;
1071 dst_u[0] = RGB2xToU(r, g, b);
1072 dst_v[0] = RGB2xToV(r, g, b);
1073 #endif
1074 }
1075 }
1076
ARGBToUV444Row_C(const uint8_t * src_argb,uint8_t * dst_u,uint8_t * dst_v,int width)1077 void ARGBToUV444Row_C(const uint8_t* src_argb,
1078 uint8_t* dst_u,
1079 uint8_t* dst_v,
1080 int width) {
1081 int x;
1082 for (x = 0; x < width; ++x) {
1083 uint8_t ab = src_argb[0];
1084 uint8_t ag = src_argb[1];
1085 uint8_t ar = src_argb[2];
1086 dst_u[0] = RGBToU(ar, ag, ab);
1087 dst_v[0] = RGBToV(ar, ag, ab);
1088 src_argb += 4;
1089 dst_u += 1;
1090 dst_v += 1;
1091 }
1092 }
1093
ARGBGrayRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width)1094 void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
1095 int x;
1096 for (x = 0; x < width; ++x) {
1097 uint8_t y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
1098 dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
1099 dst_argb[3] = src_argb[3];
1100 dst_argb += 4;
1101 src_argb += 4;
1102 }
1103 }
1104
1105 // Convert a row of image to Sepia tone.
ARGBSepiaRow_C(uint8_t * dst_argb,int width)1106 void ARGBSepiaRow_C(uint8_t* dst_argb, int width) {
1107 int x;
1108 for (x = 0; x < width; ++x) {
1109 int b = dst_argb[0];
1110 int g = dst_argb[1];
1111 int r = dst_argb[2];
1112 int sb = (b * 17 + g * 68 + r * 35) >> 7;
1113 int sg = (b * 22 + g * 88 + r * 45) >> 7;
1114 int sr = (b * 24 + g * 98 + r * 50) >> 7;
1115 // b does not over flow. a is preserved from original.
1116 dst_argb[0] = sb;
1117 dst_argb[1] = clamp255(sg);
1118 dst_argb[2] = clamp255(sr);
1119 dst_argb += 4;
1120 }
1121 }
1122
1123 // Apply color matrix to a row of image. Matrix is signed.
1124 // TODO(fbarchard): Consider adding rounding (+32).
ARGBColorMatrixRow_C(const uint8_t * src_argb,uint8_t * dst_argb,const int8_t * matrix_argb,int width)1125 void ARGBColorMatrixRow_C(const uint8_t* src_argb,
1126 uint8_t* dst_argb,
1127 const int8_t* matrix_argb,
1128 int width) {
1129 int x;
1130 for (x = 0; x < width; ++x) {
1131 int b = src_argb[0];
1132 int g = src_argb[1];
1133 int r = src_argb[2];
1134 int a = src_argb[3];
1135 int sb = (b * matrix_argb[0] + g * matrix_argb[1] + r * matrix_argb[2] +
1136 a * matrix_argb[3]) >>
1137 6;
1138 int sg = (b * matrix_argb[4] + g * matrix_argb[5] + r * matrix_argb[6] +
1139 a * matrix_argb[7]) >>
1140 6;
1141 int sr = (b * matrix_argb[8] + g * matrix_argb[9] + r * matrix_argb[10] +
1142 a * matrix_argb[11]) >>
1143 6;
1144 int sa = (b * matrix_argb[12] + g * matrix_argb[13] + r * matrix_argb[14] +
1145 a * matrix_argb[15]) >>
1146 6;
1147 dst_argb[0] = Clamp(sb);
1148 dst_argb[1] = Clamp(sg);
1149 dst_argb[2] = Clamp(sr);
1150 dst_argb[3] = Clamp(sa);
1151 src_argb += 4;
1152 dst_argb += 4;
1153 }
1154 }
1155
1156 // Apply color table to a row of image.
ARGBColorTableRow_C(uint8_t * dst_argb,const uint8_t * table_argb,int width)1157 void ARGBColorTableRow_C(uint8_t* dst_argb,
1158 const uint8_t* table_argb,
1159 int width) {
1160 int x;
1161 for (x = 0; x < width; ++x) {
1162 int b = dst_argb[0];
1163 int g = dst_argb[1];
1164 int r = dst_argb[2];
1165 int a = dst_argb[3];
1166 dst_argb[0] = table_argb[b * 4 + 0];
1167 dst_argb[1] = table_argb[g * 4 + 1];
1168 dst_argb[2] = table_argb[r * 4 + 2];
1169 dst_argb[3] = table_argb[a * 4 + 3];
1170 dst_argb += 4;
1171 }
1172 }
1173
1174 // Apply color table to a row of image.
RGBColorTableRow_C(uint8_t * dst_argb,const uint8_t * table_argb,int width)1175 void RGBColorTableRow_C(uint8_t* dst_argb,
1176 const uint8_t* table_argb,
1177 int width) {
1178 int x;
1179 for (x = 0; x < width; ++x) {
1180 int b = dst_argb[0];
1181 int g = dst_argb[1];
1182 int r = dst_argb[2];
1183 dst_argb[0] = table_argb[b * 4 + 0];
1184 dst_argb[1] = table_argb[g * 4 + 1];
1185 dst_argb[2] = table_argb[r * 4 + 2];
1186 dst_argb += 4;
1187 }
1188 }
1189
ARGBQuantizeRow_C(uint8_t * dst_argb,int scale,int interval_size,int interval_offset,int width)1190 void ARGBQuantizeRow_C(uint8_t* dst_argb,
1191 int scale,
1192 int interval_size,
1193 int interval_offset,
1194 int width) {
1195 int x;
1196 for (x = 0; x < width; ++x) {
1197 int b = dst_argb[0];
1198 int g = dst_argb[1];
1199 int r = dst_argb[2];
1200 dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
1201 dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset;
1202 dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset;
1203 dst_argb += 4;
1204 }
1205 }
1206
1207 #define REPEAT8(v) (v) | ((v) << 8)
1208 #define SHADE(f, v) v* f >> 24
1209
ARGBShadeRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width,uint32_t value)1210 void ARGBShadeRow_C(const uint8_t* src_argb,
1211 uint8_t* dst_argb,
1212 int width,
1213 uint32_t value) {
1214 const uint32_t b_scale = REPEAT8(value & 0xff);
1215 const uint32_t g_scale = REPEAT8((value >> 8) & 0xff);
1216 const uint32_t r_scale = REPEAT8((value >> 16) & 0xff);
1217 const uint32_t a_scale = REPEAT8(value >> 24);
1218
1219 int i;
1220 for (i = 0; i < width; ++i) {
1221 const uint32_t b = REPEAT8(src_argb[0]);
1222 const uint32_t g = REPEAT8(src_argb[1]);
1223 const uint32_t r = REPEAT8(src_argb[2]);
1224 const uint32_t a = REPEAT8(src_argb[3]);
1225 dst_argb[0] = SHADE(b, b_scale);
1226 dst_argb[1] = SHADE(g, g_scale);
1227 dst_argb[2] = SHADE(r, r_scale);
1228 dst_argb[3] = SHADE(a, a_scale);
1229 src_argb += 4;
1230 dst_argb += 4;
1231 }
1232 }
1233 #undef REPEAT8
1234 #undef SHADE
1235
1236 #define REPEAT8(v) (v) | ((v) << 8)
1237 #define SHADE(f, v) v* f >> 16
1238
ARGBMultiplyRow_C(const uint8_t * src_argb,const uint8_t * src_argb1,uint8_t * dst_argb,int width)1239 void ARGBMultiplyRow_C(const uint8_t* src_argb,
1240 const uint8_t* src_argb1,
1241 uint8_t* dst_argb,
1242 int width) {
1243 int i;
1244 for (i = 0; i < width; ++i) {
1245 const uint32_t b = REPEAT8(src_argb[0]);
1246 const uint32_t g = REPEAT8(src_argb[1]);
1247 const uint32_t r = REPEAT8(src_argb[2]);
1248 const uint32_t a = REPEAT8(src_argb[3]);
1249 const uint32_t b_scale = src_argb1[0];
1250 const uint32_t g_scale = src_argb1[1];
1251 const uint32_t r_scale = src_argb1[2];
1252 const uint32_t a_scale = src_argb1[3];
1253 dst_argb[0] = SHADE(b, b_scale);
1254 dst_argb[1] = SHADE(g, g_scale);
1255 dst_argb[2] = SHADE(r, r_scale);
1256 dst_argb[3] = SHADE(a, a_scale);
1257 src_argb += 4;
1258 src_argb1 += 4;
1259 dst_argb += 4;
1260 }
1261 }
1262 #undef REPEAT8
1263 #undef SHADE
1264
1265 #define SHADE(f, v) clamp255(v + f)
1266
ARGBAddRow_C(const uint8_t * src_argb,const uint8_t * src_argb1,uint8_t * dst_argb,int width)1267 void ARGBAddRow_C(const uint8_t* src_argb,
1268 const uint8_t* src_argb1,
1269 uint8_t* dst_argb,
1270 int width) {
1271 int i;
1272 for (i = 0; i < width; ++i) {
1273 const int b = src_argb[0];
1274 const int g = src_argb[1];
1275 const int r = src_argb[2];
1276 const int a = src_argb[3];
1277 const int b_add = src_argb1[0];
1278 const int g_add = src_argb1[1];
1279 const int r_add = src_argb1[2];
1280 const int a_add = src_argb1[3];
1281 dst_argb[0] = SHADE(b, b_add);
1282 dst_argb[1] = SHADE(g, g_add);
1283 dst_argb[2] = SHADE(r, r_add);
1284 dst_argb[3] = SHADE(a, a_add);
1285 src_argb += 4;
1286 src_argb1 += 4;
1287 dst_argb += 4;
1288 }
1289 }
1290 #undef SHADE
1291
1292 #define SHADE(f, v) clamp0(f - v)
1293
ARGBSubtractRow_C(const uint8_t * src_argb,const uint8_t * src_argb1,uint8_t * dst_argb,int width)1294 void ARGBSubtractRow_C(const uint8_t* src_argb,
1295 const uint8_t* src_argb1,
1296 uint8_t* dst_argb,
1297 int width) {
1298 int i;
1299 for (i = 0; i < width; ++i) {
1300 const int b = src_argb[0];
1301 const int g = src_argb[1];
1302 const int r = src_argb[2];
1303 const int a = src_argb[3];
1304 const int b_sub = src_argb1[0];
1305 const int g_sub = src_argb1[1];
1306 const int r_sub = src_argb1[2];
1307 const int a_sub = src_argb1[3];
1308 dst_argb[0] = SHADE(b, b_sub);
1309 dst_argb[1] = SHADE(g, g_sub);
1310 dst_argb[2] = SHADE(r, r_sub);
1311 dst_argb[3] = SHADE(a, a_sub);
1312 src_argb += 4;
1313 src_argb1 += 4;
1314 dst_argb += 4;
1315 }
1316 }
1317 #undef SHADE
1318
1319 // Sobel functions which mimics SSSE3.
SobelXRow_C(const uint8_t * src_y0,const uint8_t * src_y1,const uint8_t * src_y2,uint8_t * dst_sobelx,int width)1320 void SobelXRow_C(const uint8_t* src_y0,
1321 const uint8_t* src_y1,
1322 const uint8_t* src_y2,
1323 uint8_t* dst_sobelx,
1324 int width) {
1325 int i;
1326 for (i = 0; i < width; ++i) {
1327 int a = src_y0[i];
1328 int b = src_y1[i];
1329 int c = src_y2[i];
1330 int a_sub = src_y0[i + 2];
1331 int b_sub = src_y1[i + 2];
1332 int c_sub = src_y2[i + 2];
1333 int a_diff = a - a_sub;
1334 int b_diff = b - b_sub;
1335 int c_diff = c - c_sub;
1336 int sobel = Abs(a_diff + b_diff * 2 + c_diff);
1337 dst_sobelx[i] = (uint8_t)(clamp255(sobel));
1338 }
1339 }
1340
SobelYRow_C(const uint8_t * src_y0,const uint8_t * src_y1,uint8_t * dst_sobely,int width)1341 void SobelYRow_C(const uint8_t* src_y0,
1342 const uint8_t* src_y1,
1343 uint8_t* dst_sobely,
1344 int width) {
1345 int i;
1346 for (i = 0; i < width; ++i) {
1347 int a = src_y0[i + 0];
1348 int b = src_y0[i + 1];
1349 int c = src_y0[i + 2];
1350 int a_sub = src_y1[i + 0];
1351 int b_sub = src_y1[i + 1];
1352 int c_sub = src_y1[i + 2];
1353 int a_diff = a - a_sub;
1354 int b_diff = b - b_sub;
1355 int c_diff = c - c_sub;
1356 int sobel = Abs(a_diff + b_diff * 2 + c_diff);
1357 dst_sobely[i] = (uint8_t)(clamp255(sobel));
1358 }
1359 }
1360
SobelRow_C(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_argb,int width)1361 void SobelRow_C(const uint8_t* src_sobelx,
1362 const uint8_t* src_sobely,
1363 uint8_t* dst_argb,
1364 int width) {
1365 int i;
1366 for (i = 0; i < width; ++i) {
1367 int r = src_sobelx[i];
1368 int b = src_sobely[i];
1369 int s = clamp255(r + b);
1370 dst_argb[0] = (uint8_t)(s);
1371 dst_argb[1] = (uint8_t)(s);
1372 dst_argb[2] = (uint8_t)(s);
1373 dst_argb[3] = (uint8_t)(255u);
1374 dst_argb += 4;
1375 }
1376 }
1377
SobelToPlaneRow_C(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_y,int width)1378 void SobelToPlaneRow_C(const uint8_t* src_sobelx,
1379 const uint8_t* src_sobely,
1380 uint8_t* dst_y,
1381 int width) {
1382 int i;
1383 for (i = 0; i < width; ++i) {
1384 int r = src_sobelx[i];
1385 int b = src_sobely[i];
1386 int s = clamp255(r + b);
1387 dst_y[i] = (uint8_t)(s);
1388 }
1389 }
1390
SobelXYRow_C(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_argb,int width)1391 void SobelXYRow_C(const uint8_t* src_sobelx,
1392 const uint8_t* src_sobely,
1393 uint8_t* dst_argb,
1394 int width) {
1395 int i;
1396 for (i = 0; i < width; ++i) {
1397 int r = src_sobelx[i];
1398 int b = src_sobely[i];
1399 int g = clamp255(r + b);
1400 dst_argb[0] = (uint8_t)(b);
1401 dst_argb[1] = (uint8_t)(g);
1402 dst_argb[2] = (uint8_t)(r);
1403 dst_argb[3] = (uint8_t)(255u);
1404 dst_argb += 4;
1405 }
1406 }
1407
J400ToARGBRow_C(const uint8_t * src_y,uint8_t * dst_argb,int width)1408 void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
1409 // Copy a Y to RGB.
1410 int x;
1411 for (x = 0; x < width; ++x) {
1412 uint8_t y = src_y[0];
1413 dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
1414 dst_argb[3] = 255u;
1415 dst_argb += 4;
1416 ++src_y;
1417 }
1418 }
1419
1420 // Macros to create SIMD specific yuv to rgb conversion constants.
1421
1422 // clang-format off
1423
1424 #if defined(__aarch64__) || defined(__arm__)
1425 // Bias values include subtract 128 from U and V, bias from Y and rounding.
1426 // For B and R bias is negative. For G bias is positive.
1427 #define YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR) \
1428 {{UB, VR, UG, VG, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, \
1429 {YG, (UB * 128 - YB), (UG * 128 + VG * 128 + YB), (VR * 128 - YB), YB, 0, \
1430 0, 0}}
1431 #else
1432 #define YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR) \
1433 {{UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, \
1434 UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0}, \
1435 {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, \
1436 UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG}, \
1437 {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, \
1438 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR}, \
1439 {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, \
1440 {YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB}}
1441 #endif
1442
1443 // clang-format on
1444
1445 #define MAKEYUVCONSTANTS(name, YG, YB, UB, UG, VG, VR) \
1446 const struct YuvConstants SIMD_ALIGNED(kYuv##name##Constants) = \
1447 YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR); \
1448 const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = \
1449 YUVCONSTANTSBODY(YG, YB, VR, VG, UG, UB);
1450
1451 // TODO(fbarchard): Generate SIMD structures from float matrix.
1452
1453 // BT.601 limited range YUV to RGB reference
1454 // R = (Y - 16) * 1.164 + V * 1.596
1455 // G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813
1456 // B = (Y - 16) * 1.164 + U * 2.018
1457 // KR = 0.299; KB = 0.114
1458
1459 // U and V contributions to R,G,B.
1460 #ifdef LIBYUV_UNLIMITED_DATA
1461 #define UB 129 /* round(2.018 * 64) */
1462 #else
1463 #define UB 128 /* max(128, round(2.018 * 64)) */
1464 #endif
1465 #define UG 25 /* round(0.391 * 64) */
1466 #define VG 52 /* round(0.813 * 64) */
1467 #define VR 102 /* round(1.596 * 64) */
1468
1469 // Y contribution to R,G,B. Scale and bias.
1470 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
1471 #define YB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
1472
MAKEYUVCONSTANTS(I601,YG,YB,UB,UG,VG,VR)1473 MAKEYUVCONSTANTS(I601, YG, YB, UB, UG, VG, VR)
1474
1475 #undef YG
1476 #undef YB
1477 #undef UB
1478 #undef UG
1479 #undef VG
1480 #undef VR
1481
1482 // BT.601 full range YUV to RGB reference (aka JPEG)
1483 // * R = Y + V * 1.40200
1484 // * G = Y - U * 0.34414 - V * 0.71414
1485 // * B = Y + U * 1.77200
1486 // KR = 0.299; KB = 0.114
1487
1488 // U and V contributions to R,G,B.
1489 #define UB 113 /* round(1.77200 * 64) */
1490 #define UG 22 /* round(0.34414 * 64) */
1491 #define VG 46 /* round(0.71414 * 64) */
1492 #define VR 90 /* round(1.40200 * 64) */
1493
1494 // Y contribution to R,G,B. Scale and bias.
1495 #define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */
1496 #define YB 32 /* 64 / 2 */
1497
1498 MAKEYUVCONSTANTS(JPEG, YG, YB, UB, UG, VG, VR)
1499
1500 #undef YG
1501 #undef YB
1502 #undef UB
1503 #undef UG
1504 #undef VG
1505 #undef VR
1506
1507 // BT.709 limited range YUV to RGB reference
1508 // R = (Y - 16) * 1.164 + V * 1.793
1509 // G = (Y - 16) * 1.164 - U * 0.213 - V * 0.533
1510 // B = (Y - 16) * 1.164 + U * 2.112
1511 // KR = 0.2126, KB = 0.0722
1512
1513 // U and V contributions to R,G,B.
1514 #ifdef LIBYUV_UNLIMITED_DATA
1515 #define UB 135 /* round(2.112 * 64) */
1516 #else
1517 #define UB 128 /* max(128, round(2.112 * 64)) */
1518 #endif
1519 #define UG 14 /* round(0.213 * 64) */
1520 #define VG 34 /* round(0.533 * 64) */
1521 #define VR 115 /* round(1.793 * 64) */
1522
1523 // Y contribution to R,G,B. Scale and bias.
1524 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
1525 #define YB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
1526
1527 MAKEYUVCONSTANTS(H709, YG, YB, UB, UG, VG, VR)
1528
1529 #undef YG
1530 #undef YB
1531 #undef UB
1532 #undef UG
1533 #undef VG
1534 #undef VR
1535
1536 // BT.709 full range YUV to RGB reference
1537 // R = Y + V * 1.5748
1538 // G = Y - U * 0.18732 - V * 0.46812
1539 // B = Y + U * 1.8556
1540 // KR = 0.2126, KB = 0.0722
1541
1542 // U and V contributions to R,G,B.
1543 #define UB 119 /* round(1.8556 * 64) */
1544 #define UG 12 /* round(0.18732 * 64) */
1545 #define VG 30 /* round(0.46812 * 64) */
1546 #define VR 101 /* round(1.5748 * 64) */
1547
1548 // Y contribution to R,G,B. Scale and bias. (same as jpeg)
1549 #define YG 16320 /* round(1 * 64 * 256 * 256 / 257) */
1550 #define YB 32 /* 64 / 2 */
1551
1552 MAKEYUVCONSTANTS(F709, YG, YB, UB, UG, VG, VR)
1553
1554 #undef YG
1555 #undef YB
1556 #undef UB
1557 #undef UG
1558 #undef VG
1559 #undef VR
1560
1561 // BT.2020 limited range YUV to RGB reference
1562 // R = (Y - 16) * 1.164384 + V * 1.67867
1563 // G = (Y - 16) * 1.164384 - U * 0.187326 - V * 0.65042
1564 // B = (Y - 16) * 1.164384 + U * 2.14177
1565 // KR = 0.2627; KB = 0.0593
1566
1567 // U and V contributions to R,G,B.
1568 #ifdef LIBYUV_UNLIMITED_DATA
1569 #define UB 137 /* round(2.142 * 64) */
1570 #else
1571 #define UB 128 /* max(128, round(2.142 * 64)) */
1572 #endif
1573 #define UG 12 /* round(0.187326 * 64) */
1574 #define VG 42 /* round(0.65042 * 64) */
1575 #define VR 107 /* round(1.67867 * 64) */
1576
1577 // Y contribution to R,G,B. Scale and bias.
1578 #define YG 19003 /* round(1.164384 * 64 * 256 * 256 / 257) */
1579 #define YB -1160 /* 1.164384 * 64 * -16 + 64 / 2 */
1580
1581 MAKEYUVCONSTANTS(2020, YG, YB, UB, UG, VG, VR)
1582
1583 #undef YG
1584 #undef YB
1585 #undef UB
1586 #undef UG
1587 #undef VG
1588 #undef VR
1589
1590 // BT.2020 full range YUV to RGB reference
1591 // R = Y + V * 1.474600
1592 // G = Y - U * 0.164553 - V * 0.571353
1593 // B = Y + U * 1.881400
1594 // KR = 0.2627; KB = 0.0593
1595
1596 #define UB 120 /* round(1.881400 * 64) */
1597 #define UG 11 /* round(0.164553 * 64) */
1598 #define VG 37 /* round(0.571353 * 64) */
1599 #define VR 94 /* round(1.474600 * 64) */
1600
1601 // Y contribution to R,G,B. Scale and bias. (same as jpeg)
1602 #define YG 16320 /* round(1 * 64 * 256 * 256 / 257) */
1603 #define YB 32 /* 64 / 2 */
1604
1605 MAKEYUVCONSTANTS(V2020, YG, YB, UB, UG, VG, VR)
1606
1607 #undef YG
1608 #undef YB
1609 #undef UB
1610 #undef UG
1611 #undef VG
1612 #undef VR
1613
1614 #undef BB
1615 #undef BG
1616 #undef BR
1617
1618 #undef MAKEYUVCONSTANTS
1619
1620 #if defined(__aarch64__) || defined(__arm__)
1621 #define LOAD_YUV_CONSTANTS \
1622 int ub = yuvconstants->kUVCoeff[0]; \
1623 int vr = yuvconstants->kUVCoeff[1]; \
1624 int ug = yuvconstants->kUVCoeff[2]; \
1625 int vg = yuvconstants->kUVCoeff[3]; \
1626 int yg = yuvconstants->kRGBCoeffBias[0]; \
1627 int bb = yuvconstants->kRGBCoeffBias[1]; \
1628 int bg = yuvconstants->kRGBCoeffBias[2]; \
1629 int br = yuvconstants->kRGBCoeffBias[3]
1630
1631 #define CALC_RGB16 \
1632 int32_t y1 = (uint32_t)(y32 * yg) >> 16; \
1633 int b16 = y1 + (u * ub) - bb; \
1634 int g16 = y1 + bg - (u * ug + v * vg); \
1635 int r16 = y1 + (v * vr) - br
1636 #else
1637 #define LOAD_YUV_CONSTANTS \
1638 int ub = yuvconstants->kUVToB[0]; \
1639 int ug = yuvconstants->kUVToG[0]; \
1640 int vg = yuvconstants->kUVToG[1]; \
1641 int vr = yuvconstants->kUVToR[1]; \
1642 int yg = yuvconstants->kYToRgb[0]; \
1643 int yb = yuvconstants->kYBiasToRgb[0]
1644
1645 #define CALC_RGB16 \
1646 int32_t y1 = ((uint32_t)(y32 * yg) >> 16) + yb; \
1647 int8_t ui = u; \
1648 int8_t vi = v; \
1649 ui -= 0x80; \
1650 vi -= 0x80; \
1651 int b16 = y1 + (ui * ub); \
1652 int g16 = y1 - (ui * ug + vi * vg); \
1653 int r16 = y1 + (vi * vr)
1654 #endif
1655
1656 // C reference code that mimics the YUV assembly.
1657 // Reads 8 bit YUV and leaves result as 16 bit.
1658 static __inline void YuvPixel(uint8_t y,
1659 uint8_t u,
1660 uint8_t v,
1661 uint8_t* b,
1662 uint8_t* g,
1663 uint8_t* r,
1664 const struct YuvConstants* yuvconstants) {
1665 LOAD_YUV_CONSTANTS;
1666 uint32_t y32 = y * 0x0101;
1667 CALC_RGB16;
1668 *b = Clamp((int32_t)(b16) >> 6);
1669 *g = Clamp((int32_t)(g16) >> 6);
1670 *r = Clamp((int32_t)(r16) >> 6);
1671 }
1672
1673 // Reads 8 bit YUV and leaves result as 16 bit.
YuvPixel8_16(uint8_t y,uint8_t u,uint8_t v,int * b,int * g,int * r,const struct YuvConstants * yuvconstants)1674 static __inline void YuvPixel8_16(uint8_t y,
1675 uint8_t u,
1676 uint8_t v,
1677 int* b,
1678 int* g,
1679 int* r,
1680 const struct YuvConstants* yuvconstants) {
1681 LOAD_YUV_CONSTANTS;
1682 uint32_t y32 = y * 0x0101;
1683 CALC_RGB16;
1684 *b = b16;
1685 *g = g16;
1686 *r = r16;
1687 }
1688
1689 // C reference code that mimics the YUV 16 bit assembly.
1690 // Reads 10 bit YUV and leaves result as 16 bit.
YuvPixel10_16(uint16_t y,uint16_t u,uint16_t v,int * b,int * g,int * r,const struct YuvConstants * yuvconstants)1691 static __inline void YuvPixel10_16(uint16_t y,
1692 uint16_t u,
1693 uint16_t v,
1694 int* b,
1695 int* g,
1696 int* r,
1697 const struct YuvConstants* yuvconstants) {
1698 LOAD_YUV_CONSTANTS;
1699 uint32_t y32 = y << 6;
1700 u = clamp255(u >> 2);
1701 v = clamp255(v >> 2);
1702 CALC_RGB16;
1703 *b = b16;
1704 *g = g16;
1705 *r = r16;
1706 }
1707
1708 // C reference code that mimics the YUV 16 bit assembly.
1709 // Reads 12 bit YUV and leaves result as 16 bit.
YuvPixel12_16(int16_t y,int16_t u,int16_t v,int * b,int * g,int * r,const struct YuvConstants * yuvconstants)1710 static __inline void YuvPixel12_16(int16_t y,
1711 int16_t u,
1712 int16_t v,
1713 int* b,
1714 int* g,
1715 int* r,
1716 const struct YuvConstants* yuvconstants) {
1717 LOAD_YUV_CONSTANTS;
1718 uint32_t y32 = y << 4;
1719 u = clamp255(u >> 4);
1720 v = clamp255(v >> 4);
1721 CALC_RGB16;
1722 *b = b16;
1723 *g = g16;
1724 *r = r16;
1725 }
1726
1727 // C reference code that mimics the YUV 10 bit assembly.
1728 // Reads 10 bit YUV and clamps down to 8 bit RGB.
YuvPixel10(uint16_t y,uint16_t u,uint16_t v,uint8_t * b,uint8_t * g,uint8_t * r,const struct YuvConstants * yuvconstants)1729 static __inline void YuvPixel10(uint16_t y,
1730 uint16_t u,
1731 uint16_t v,
1732 uint8_t* b,
1733 uint8_t* g,
1734 uint8_t* r,
1735 const struct YuvConstants* yuvconstants) {
1736 int b16;
1737 int g16;
1738 int r16;
1739 YuvPixel10_16(y, u, v, &b16, &g16, &r16, yuvconstants);
1740 *b = Clamp(b16 >> 6);
1741 *g = Clamp(g16 >> 6);
1742 *r = Clamp(r16 >> 6);
1743 }
1744
1745 // C reference code that mimics the YUV 12 bit assembly.
1746 // Reads 12 bit YUV and clamps down to 8 bit RGB.
YuvPixel12(uint16_t y,uint16_t u,uint16_t v,uint8_t * b,uint8_t * g,uint8_t * r,const struct YuvConstants * yuvconstants)1747 static __inline void YuvPixel12(uint16_t y,
1748 uint16_t u,
1749 uint16_t v,
1750 uint8_t* b,
1751 uint8_t* g,
1752 uint8_t* r,
1753 const struct YuvConstants* yuvconstants) {
1754 int b16;
1755 int g16;
1756 int r16;
1757 YuvPixel12_16(y, u, v, &b16, &g16, &r16, yuvconstants);
1758 *b = Clamp(b16 >> 6);
1759 *g = Clamp(g16 >> 6);
1760 *r = Clamp(r16 >> 6);
1761 }
1762
1763 // C reference code that mimics the YUV 16 bit assembly.
1764 // Reads 16 bit YUV and leaves result as 8 bit.
YuvPixel16_8(uint16_t y,uint16_t u,uint16_t v,uint8_t * b,uint8_t * g,uint8_t * r,const struct YuvConstants * yuvconstants)1765 static __inline void YuvPixel16_8(uint16_t y,
1766 uint16_t u,
1767 uint16_t v,
1768 uint8_t* b,
1769 uint8_t* g,
1770 uint8_t* r,
1771 const struct YuvConstants* yuvconstants) {
1772 LOAD_YUV_CONSTANTS;
1773 uint32_t y32 = y;
1774 u = clamp255(u >> 8);
1775 v = clamp255(v >> 8);
1776 CALC_RGB16;
1777 *b = Clamp((int32_t)(b16) >> 6);
1778 *g = Clamp((int32_t)(g16) >> 6);
1779 *r = Clamp((int32_t)(r16) >> 6);
1780 }
1781
1782 // C reference code that mimics the YUV 16 bit assembly.
1783 // Reads 16 bit YUV and leaves result as 16 bit.
YuvPixel16_16(uint16_t y,uint16_t u,uint16_t v,int * b,int * g,int * r,const struct YuvConstants * yuvconstants)1784 static __inline void YuvPixel16_16(uint16_t y,
1785 uint16_t u,
1786 uint16_t v,
1787 int* b,
1788 int* g,
1789 int* r,
1790 const struct YuvConstants* yuvconstants) {
1791 LOAD_YUV_CONSTANTS;
1792 uint32_t y32 = y;
1793 u = clamp255(u >> 8);
1794 v = clamp255(v >> 8);
1795 CALC_RGB16;
1796 *b = b16;
1797 *g = g16;
1798 *r = r16;
1799 }
1800
1801 // C reference code that mimics the YUV assembly.
1802 // Reads 8 bit YUV and leaves result as 8 bit.
YPixel(uint8_t y,uint8_t * b,uint8_t * g,uint8_t * r,const struct YuvConstants * yuvconstants)1803 static __inline void YPixel(uint8_t y,
1804 uint8_t* b,
1805 uint8_t* g,
1806 uint8_t* r,
1807 const struct YuvConstants* yuvconstants) {
1808 #if defined(__aarch64__) || defined(__arm__)
1809 int yg = yuvconstants->kRGBCoeffBias[0];
1810 int ygb = yuvconstants->kRGBCoeffBias[4];
1811 #else
1812 int ygb = yuvconstants->kYBiasToRgb[0];
1813 int yg = yuvconstants->kYToRgb[0];
1814 #endif
1815 uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
1816 *b = Clamp(((int32_t)(y1) + ygb) >> 6);
1817 *g = Clamp(((int32_t)(y1) + ygb) >> 6);
1818 *r = Clamp(((int32_t)(y1) + ygb) >> 6);
1819 }
1820
I444ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1821 void I444ToARGBRow_C(const uint8_t* src_y,
1822 const uint8_t* src_u,
1823 const uint8_t* src_v,
1824 uint8_t* rgb_buf,
1825 const struct YuvConstants* yuvconstants,
1826 int width) {
1827 int x;
1828 for (x = 0; x < width; ++x) {
1829 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1830 rgb_buf + 2, yuvconstants);
1831 rgb_buf[3] = 255;
1832 src_y += 1;
1833 src_u += 1;
1834 src_v += 1;
1835 rgb_buf += 4; // Advance 1 pixel.
1836 }
1837 }
1838
1839 // Also used for 420
I422ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1840 void I422ToARGBRow_C(const uint8_t* src_y,
1841 const uint8_t* src_u,
1842 const uint8_t* src_v,
1843 uint8_t* rgb_buf,
1844 const struct YuvConstants* yuvconstants,
1845 int width) {
1846 int x;
1847 for (x = 0; x < width - 1; x += 2) {
1848 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1849 rgb_buf + 2, yuvconstants);
1850 rgb_buf[3] = 255;
1851 YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
1852 rgb_buf + 6, yuvconstants);
1853 rgb_buf[7] = 255;
1854 src_y += 2;
1855 src_u += 1;
1856 src_v += 1;
1857 rgb_buf += 8; // Advance 2 pixels.
1858 }
1859 if (width & 1) {
1860 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1861 rgb_buf + 2, yuvconstants);
1862 rgb_buf[3] = 255;
1863 }
1864 }
1865
1866 // 10 bit YUV to ARGB
I210ToARGBRow_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1867 void I210ToARGBRow_C(const uint16_t* src_y,
1868 const uint16_t* src_u,
1869 const uint16_t* src_v,
1870 uint8_t* rgb_buf,
1871 const struct YuvConstants* yuvconstants,
1872 int width) {
1873 int x;
1874 for (x = 0; x < width - 1; x += 2) {
1875 YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1876 rgb_buf + 2, yuvconstants);
1877 rgb_buf[3] = 255;
1878 YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
1879 rgb_buf + 6, yuvconstants);
1880 rgb_buf[7] = 255;
1881 src_y += 2;
1882 src_u += 1;
1883 src_v += 1;
1884 rgb_buf += 8; // Advance 2 pixels.
1885 }
1886 if (width & 1) {
1887 YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1888 rgb_buf + 2, yuvconstants);
1889 rgb_buf[3] = 255;
1890 }
1891 }
1892
I410ToARGBRow_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1893 void I410ToARGBRow_C(const uint16_t* src_y,
1894 const uint16_t* src_u,
1895 const uint16_t* src_v,
1896 uint8_t* rgb_buf,
1897 const struct YuvConstants* yuvconstants,
1898 int width) {
1899 int x;
1900 for (x = 0; x < width; ++x) {
1901 YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1902 rgb_buf + 2, yuvconstants);
1903 rgb_buf[3] = 255;
1904 src_y += 1;
1905 src_u += 1;
1906 src_v += 1;
1907 rgb_buf += 4; // Advance 1 pixels.
1908 }
1909 }
1910
I210AlphaToARGBRow_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,const uint16_t * src_a,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1911 void I210AlphaToARGBRow_C(const uint16_t* src_y,
1912 const uint16_t* src_u,
1913 const uint16_t* src_v,
1914 const uint16_t* src_a,
1915 uint8_t* rgb_buf,
1916 const struct YuvConstants* yuvconstants,
1917 int width) {
1918 int x;
1919 for (x = 0; x < width - 1; x += 2) {
1920 YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1921 rgb_buf + 2, yuvconstants);
1922 rgb_buf[3] = clamp255(src_a[0] >> 2);
1923 YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
1924 rgb_buf + 6, yuvconstants);
1925 rgb_buf[7] = clamp255(src_a[1] >> 2);
1926 src_y += 2;
1927 src_u += 1;
1928 src_v += 1;
1929 src_a += 2;
1930 rgb_buf += 8; // Advance 2 pixels.
1931 }
1932 if (width & 1) {
1933 YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1934 rgb_buf + 2, yuvconstants);
1935 rgb_buf[3] = clamp255(src_a[0] >> 2);
1936 }
1937 }
1938
I410AlphaToARGBRow_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,const uint16_t * src_a,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1939 void I410AlphaToARGBRow_C(const uint16_t* src_y,
1940 const uint16_t* src_u,
1941 const uint16_t* src_v,
1942 const uint16_t* src_a,
1943 uint8_t* rgb_buf,
1944 const struct YuvConstants* yuvconstants,
1945 int width) {
1946 int x;
1947 for (x = 0; x < width; ++x) {
1948 YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1949 rgb_buf + 2, yuvconstants);
1950 rgb_buf[3] = clamp255(src_a[0] >> 2);
1951 src_y += 1;
1952 src_u += 1;
1953 src_v += 1;
1954 src_a += 1;
1955 rgb_buf += 4; // Advance 1 pixels.
1956 }
1957 }
1958
1959 // 12 bit YUV to ARGB
I212ToARGBRow_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1960 void I212ToARGBRow_C(const uint16_t* src_y,
1961 const uint16_t* src_u,
1962 const uint16_t* src_v,
1963 uint8_t* rgb_buf,
1964 const struct YuvConstants* yuvconstants,
1965 int width) {
1966 int x;
1967 for (x = 0; x < width - 1; x += 2) {
1968 YuvPixel12(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1969 rgb_buf + 2, yuvconstants);
1970 rgb_buf[3] = 255;
1971 YuvPixel12(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
1972 rgb_buf + 6, yuvconstants);
1973 rgb_buf[7] = 255;
1974 src_y += 2;
1975 src_u += 1;
1976 src_v += 1;
1977 rgb_buf += 8; // Advance 2 pixels.
1978 }
1979 if (width & 1) {
1980 YuvPixel12(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1981 rgb_buf + 2, yuvconstants);
1982 rgb_buf[3] = 255;
1983 }
1984 }
1985
StoreAR30(uint8_t * rgb_buf,int b,int g,int r)1986 static void StoreAR30(uint8_t* rgb_buf, int b, int g, int r) {
1987 uint32_t ar30;
1988 b = b >> 4; // convert 8 bit 10.6 to 10 bit.
1989 g = g >> 4;
1990 r = r >> 4;
1991 b = Clamp10(b);
1992 g = Clamp10(g);
1993 r = Clamp10(r);
1994 ar30 = b | ((uint32_t)g << 10) | ((uint32_t)r << 20) | 0xc0000000;
1995 (*(uint32_t*)rgb_buf) = ar30;
1996 }
1997
1998 // 10 bit YUV to 10 bit AR30
I210ToAR30Row_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1999 void I210ToAR30Row_C(const uint16_t* src_y,
2000 const uint16_t* src_u,
2001 const uint16_t* src_v,
2002 uint8_t* rgb_buf,
2003 const struct YuvConstants* yuvconstants,
2004 int width) {
2005 int x;
2006 int b;
2007 int g;
2008 int r;
2009 for (x = 0; x < width - 1; x += 2) {
2010 YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2011 StoreAR30(rgb_buf, b, g, r);
2012 YuvPixel10_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2013 StoreAR30(rgb_buf + 4, b, g, r);
2014 src_y += 2;
2015 src_u += 1;
2016 src_v += 1;
2017 rgb_buf += 8; // Advance 2 pixels.
2018 }
2019 if (width & 1) {
2020 YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2021 StoreAR30(rgb_buf, b, g, r);
2022 }
2023 }
2024
2025 // 12 bit YUV to 10 bit AR30
I212ToAR30Row_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2026 void I212ToAR30Row_C(const uint16_t* src_y,
2027 const uint16_t* src_u,
2028 const uint16_t* src_v,
2029 uint8_t* rgb_buf,
2030 const struct YuvConstants* yuvconstants,
2031 int width) {
2032 int x;
2033 int b;
2034 int g;
2035 int r;
2036 for (x = 0; x < width - 1; x += 2) {
2037 YuvPixel12_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2038 StoreAR30(rgb_buf, b, g, r);
2039 YuvPixel12_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2040 StoreAR30(rgb_buf + 4, b, g, r);
2041 src_y += 2;
2042 src_u += 1;
2043 src_v += 1;
2044 rgb_buf += 8; // Advance 2 pixels.
2045 }
2046 if (width & 1) {
2047 YuvPixel12_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2048 StoreAR30(rgb_buf, b, g, r);
2049 }
2050 }
2051
I410ToAR30Row_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2052 void I410ToAR30Row_C(const uint16_t* src_y,
2053 const uint16_t* src_u,
2054 const uint16_t* src_v,
2055 uint8_t* rgb_buf,
2056 const struct YuvConstants* yuvconstants,
2057 int width) {
2058 int x;
2059 int b;
2060 int g;
2061 int r;
2062 for (x = 0; x < width; ++x) {
2063 YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2064 StoreAR30(rgb_buf, b, g, r);
2065 src_y += 1;
2066 src_u += 1;
2067 src_v += 1;
2068 rgb_buf += 4; // Advance 1 pixel.
2069 }
2070 }
2071
2072 // P210 has 10 bits in msb of 16 bit NV12 style layout.
P210ToARGBRow_C(const uint16_t * src_y,const uint16_t * src_uv,uint8_t * dst_argb,const struct YuvConstants * yuvconstants,int width)2073 void P210ToARGBRow_C(const uint16_t* src_y,
2074 const uint16_t* src_uv,
2075 uint8_t* dst_argb,
2076 const struct YuvConstants* yuvconstants,
2077 int width) {
2078 int x;
2079 for (x = 0; x < width - 1; x += 2) {
2080 YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], dst_argb + 0, dst_argb + 1,
2081 dst_argb + 2, yuvconstants);
2082 dst_argb[3] = 255;
2083 YuvPixel16_8(src_y[1], src_uv[0], src_uv[1], dst_argb + 4, dst_argb + 5,
2084 dst_argb + 6, yuvconstants);
2085 dst_argb[7] = 255;
2086 src_y += 2;
2087 src_uv += 2;
2088 dst_argb += 8; // Advance 2 pixels.
2089 }
2090 if (width & 1) {
2091 YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], dst_argb + 0, dst_argb + 1,
2092 dst_argb + 2, yuvconstants);
2093 dst_argb[3] = 255;
2094 }
2095 }
2096
P410ToARGBRow_C(const uint16_t * src_y,const uint16_t * src_uv,uint8_t * dst_argb,const struct YuvConstants * yuvconstants,int width)2097 void P410ToARGBRow_C(const uint16_t* src_y,
2098 const uint16_t* src_uv,
2099 uint8_t* dst_argb,
2100 const struct YuvConstants* yuvconstants,
2101 int width) {
2102 int x;
2103 for (x = 0; x < width; ++x) {
2104 YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], dst_argb + 0, dst_argb + 1,
2105 dst_argb + 2, yuvconstants);
2106 dst_argb[3] = 255;
2107 src_y += 1;
2108 src_uv += 2;
2109 dst_argb += 4; // Advance 1 pixels.
2110 }
2111 }
2112
P210ToAR30Row_C(const uint16_t * src_y,const uint16_t * src_uv,uint8_t * dst_ar30,const struct YuvConstants * yuvconstants,int width)2113 void P210ToAR30Row_C(const uint16_t* src_y,
2114 const uint16_t* src_uv,
2115 uint8_t* dst_ar30,
2116 const struct YuvConstants* yuvconstants,
2117 int width) {
2118 int x;
2119 int b;
2120 int g;
2121 int r;
2122 for (x = 0; x < width - 1; x += 2) {
2123 YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
2124 StoreAR30(dst_ar30, b, g, r);
2125 YuvPixel16_16(src_y[1], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
2126 StoreAR30(dst_ar30 + 4, b, g, r);
2127 src_y += 2;
2128 src_uv += 2;
2129 dst_ar30 += 8; // Advance 2 pixels.
2130 }
2131 if (width & 1) {
2132 YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
2133 StoreAR30(dst_ar30, b, g, r);
2134 }
2135 }
2136
P410ToAR30Row_C(const uint16_t * src_y,const uint16_t * src_uv,uint8_t * dst_ar30,const struct YuvConstants * yuvconstants,int width)2137 void P410ToAR30Row_C(const uint16_t* src_y,
2138 const uint16_t* src_uv,
2139 uint8_t* dst_ar30,
2140 const struct YuvConstants* yuvconstants,
2141 int width) {
2142 int x;
2143 int b;
2144 int g;
2145 int r;
2146 for (x = 0; x < width; ++x) {
2147 YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
2148 StoreAR30(dst_ar30, b, g, r);
2149 src_y += 1;
2150 src_uv += 2;
2151 dst_ar30 += 4; // Advance 1 pixel.
2152 }
2153 }
2154
2155 // 8 bit YUV to 10 bit AR30
2156 // Uses same code as 10 bit YUV bit shifts the 8 bit values up to 10 bits.
I422ToAR30Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2157 void I422ToAR30Row_C(const uint8_t* src_y,
2158 const uint8_t* src_u,
2159 const uint8_t* src_v,
2160 uint8_t* rgb_buf,
2161 const struct YuvConstants* yuvconstants,
2162 int width) {
2163 int x;
2164 int b;
2165 int g;
2166 int r;
2167 for (x = 0; x < width - 1; x += 2) {
2168 YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2169 StoreAR30(rgb_buf, b, g, r);
2170 YuvPixel8_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2171 StoreAR30(rgb_buf + 4, b, g, r);
2172 src_y += 2;
2173 src_u += 1;
2174 src_v += 1;
2175 rgb_buf += 8; // Advance 2 pixels.
2176 }
2177 if (width & 1) {
2178 YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2179 StoreAR30(rgb_buf, b, g, r);
2180 }
2181 }
2182
I444AlphaToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,const uint8_t * src_a,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2183 void I444AlphaToARGBRow_C(const uint8_t* src_y,
2184 const uint8_t* src_u,
2185 const uint8_t* src_v,
2186 const uint8_t* src_a,
2187 uint8_t* rgb_buf,
2188 const struct YuvConstants* yuvconstants,
2189 int width) {
2190 int x;
2191 for (x = 0; x < width; ++x) {
2192 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2193 rgb_buf + 2, yuvconstants);
2194 rgb_buf[3] = src_a[0];
2195 src_y += 1;
2196 src_u += 1;
2197 src_v += 1;
2198 src_a += 1;
2199 rgb_buf += 4; // Advance 1 pixel.
2200 }
2201 }
2202
I422AlphaToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,const uint8_t * src_a,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2203 void I422AlphaToARGBRow_C(const uint8_t* src_y,
2204 const uint8_t* src_u,
2205 const uint8_t* src_v,
2206 const uint8_t* src_a,
2207 uint8_t* rgb_buf,
2208 const struct YuvConstants* yuvconstants,
2209 int width) {
2210 int x;
2211 for (x = 0; x < width - 1; x += 2) {
2212 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2213 rgb_buf + 2, yuvconstants);
2214 rgb_buf[3] = src_a[0];
2215 YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
2216 rgb_buf + 6, yuvconstants);
2217 rgb_buf[7] = src_a[1];
2218 src_y += 2;
2219 src_u += 1;
2220 src_v += 1;
2221 src_a += 2;
2222 rgb_buf += 8; // Advance 2 pixels.
2223 }
2224 if (width & 1) {
2225 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2226 rgb_buf + 2, yuvconstants);
2227 rgb_buf[3] = src_a[0];
2228 }
2229 }
2230
I422ToRGB24Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2231 void I422ToRGB24Row_C(const uint8_t* src_y,
2232 const uint8_t* src_u,
2233 const uint8_t* src_v,
2234 uint8_t* rgb_buf,
2235 const struct YuvConstants* yuvconstants,
2236 int width) {
2237 int x;
2238 for (x = 0; x < width - 1; x += 2) {
2239 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2240 rgb_buf + 2, yuvconstants);
2241 YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 3, rgb_buf + 4,
2242 rgb_buf + 5, yuvconstants);
2243 src_y += 2;
2244 src_u += 1;
2245 src_v += 1;
2246 rgb_buf += 6; // Advance 2 pixels.
2247 }
2248 if (width & 1) {
2249 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2250 rgb_buf + 2, yuvconstants);
2251 }
2252 }
2253
I422ToARGB4444Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb4444,const struct YuvConstants * yuvconstants,int width)2254 void I422ToARGB4444Row_C(const uint8_t* src_y,
2255 const uint8_t* src_u,
2256 const uint8_t* src_v,
2257 uint8_t* dst_argb4444,
2258 const struct YuvConstants* yuvconstants,
2259 int width) {
2260 uint8_t b0;
2261 uint8_t g0;
2262 uint8_t r0;
2263 uint8_t b1;
2264 uint8_t g1;
2265 uint8_t r1;
2266 int x;
2267 for (x = 0; x < width - 1; x += 2) {
2268 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2269 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
2270 b0 = b0 >> 4;
2271 g0 = g0 >> 4;
2272 r0 = r0 >> 4;
2273 b1 = b1 >> 4;
2274 g1 = g1 >> 4;
2275 r1 = r1 >> 4;
2276 *(uint32_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | (b1 << 16) |
2277 (g1 << 20) | (r1 << 24) | 0xf000f000;
2278 src_y += 2;
2279 src_u += 1;
2280 src_v += 1;
2281 dst_argb4444 += 4; // Advance 2 pixels.
2282 }
2283 if (width & 1) {
2284 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2285 b0 = b0 >> 4;
2286 g0 = g0 >> 4;
2287 r0 = r0 >> 4;
2288 *(uint16_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | 0xf000;
2289 }
2290 }
2291
I422ToARGB1555Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb1555,const struct YuvConstants * yuvconstants,int width)2292 void I422ToARGB1555Row_C(const uint8_t* src_y,
2293 const uint8_t* src_u,
2294 const uint8_t* src_v,
2295 uint8_t* dst_argb1555,
2296 const struct YuvConstants* yuvconstants,
2297 int width) {
2298 uint8_t b0;
2299 uint8_t g0;
2300 uint8_t r0;
2301 uint8_t b1;
2302 uint8_t g1;
2303 uint8_t r1;
2304 int x;
2305 for (x = 0; x < width - 1; x += 2) {
2306 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2307 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
2308 b0 = b0 >> 3;
2309 g0 = g0 >> 3;
2310 r0 = r0 >> 3;
2311 b1 = b1 >> 3;
2312 g1 = g1 >> 3;
2313 r1 = r1 >> 3;
2314 *(uint32_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | (b1 << 16) |
2315 (g1 << 21) | (r1 << 26) | 0x80008000;
2316 src_y += 2;
2317 src_u += 1;
2318 src_v += 1;
2319 dst_argb1555 += 4; // Advance 2 pixels.
2320 }
2321 if (width & 1) {
2322 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2323 b0 = b0 >> 3;
2324 g0 = g0 >> 3;
2325 r0 = r0 >> 3;
2326 *(uint16_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | 0x8000;
2327 }
2328 }
2329
I422ToRGB565Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)2330 void I422ToRGB565Row_C(const uint8_t* src_y,
2331 const uint8_t* src_u,
2332 const uint8_t* src_v,
2333 uint8_t* dst_rgb565,
2334 const struct YuvConstants* yuvconstants,
2335 int width) {
2336 uint8_t b0;
2337 uint8_t g0;
2338 uint8_t r0;
2339 uint8_t b1;
2340 uint8_t g1;
2341 uint8_t r1;
2342 int x;
2343 for (x = 0; x < width - 1; x += 2) {
2344 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2345 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
2346 b0 = b0 >> 3;
2347 g0 = g0 >> 2;
2348 r0 = r0 >> 3;
2349 b1 = b1 >> 3;
2350 g1 = g1 >> 2;
2351 r1 = r1 >> 3;
2352 *(uint32_t*)(dst_rgb565) =
2353 b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27);
2354 src_y += 2;
2355 src_u += 1;
2356 src_v += 1;
2357 dst_rgb565 += 4; // Advance 2 pixels.
2358 }
2359 if (width & 1) {
2360 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2361 b0 = b0 >> 3;
2362 g0 = g0 >> 2;
2363 r0 = r0 >> 3;
2364 *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
2365 }
2366 }
2367
NV12ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2368 void NV12ToARGBRow_C(const uint8_t* src_y,
2369 const uint8_t* src_uv,
2370 uint8_t* rgb_buf,
2371 const struct YuvConstants* yuvconstants,
2372 int width) {
2373 int x;
2374 for (x = 0; x < width - 1; x += 2) {
2375 YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2376 rgb_buf + 2, yuvconstants);
2377 rgb_buf[3] = 255;
2378 YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 4, rgb_buf + 5,
2379 rgb_buf + 6, yuvconstants);
2380 rgb_buf[7] = 255;
2381 src_y += 2;
2382 src_uv += 2;
2383 rgb_buf += 8; // Advance 2 pixels.
2384 }
2385 if (width & 1) {
2386 YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2387 rgb_buf + 2, yuvconstants);
2388 rgb_buf[3] = 255;
2389 }
2390 }
2391
NV21ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2392 void NV21ToARGBRow_C(const uint8_t* src_y,
2393 const uint8_t* src_vu,
2394 uint8_t* rgb_buf,
2395 const struct YuvConstants* yuvconstants,
2396 int width) {
2397 int x;
2398 for (x = 0; x < width - 1; x += 2) {
2399 YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2400 rgb_buf + 2, yuvconstants);
2401 rgb_buf[3] = 255;
2402 YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 4, rgb_buf + 5,
2403 rgb_buf + 6, yuvconstants);
2404 rgb_buf[7] = 255;
2405 src_y += 2;
2406 src_vu += 2;
2407 rgb_buf += 8; // Advance 2 pixels.
2408 }
2409 if (width & 1) {
2410 YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2411 rgb_buf + 2, yuvconstants);
2412 rgb_buf[3] = 255;
2413 }
2414 }
2415
NV12ToRGB24Row_C(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2416 void NV12ToRGB24Row_C(const uint8_t* src_y,
2417 const uint8_t* src_uv,
2418 uint8_t* rgb_buf,
2419 const struct YuvConstants* yuvconstants,
2420 int width) {
2421 int x;
2422 for (x = 0; x < width - 1; x += 2) {
2423 YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2424 rgb_buf + 2, yuvconstants);
2425 YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 3, rgb_buf + 4,
2426 rgb_buf + 5, yuvconstants);
2427 src_y += 2;
2428 src_uv += 2;
2429 rgb_buf += 6; // Advance 2 pixels.
2430 }
2431 if (width & 1) {
2432 YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2433 rgb_buf + 2, yuvconstants);
2434 }
2435 }
2436
NV21ToRGB24Row_C(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2437 void NV21ToRGB24Row_C(const uint8_t* src_y,
2438 const uint8_t* src_vu,
2439 uint8_t* rgb_buf,
2440 const struct YuvConstants* yuvconstants,
2441 int width) {
2442 int x;
2443 for (x = 0; x < width - 1; x += 2) {
2444 YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2445 rgb_buf + 2, yuvconstants);
2446 YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 3, rgb_buf + 4,
2447 rgb_buf + 5, yuvconstants);
2448 src_y += 2;
2449 src_vu += 2;
2450 rgb_buf += 6; // Advance 2 pixels.
2451 }
2452 if (width & 1) {
2453 YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2454 rgb_buf + 2, yuvconstants);
2455 }
2456 }
2457
NV12ToRGB565Row_C(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)2458 void NV12ToRGB565Row_C(const uint8_t* src_y,
2459 const uint8_t* src_uv,
2460 uint8_t* dst_rgb565,
2461 const struct YuvConstants* yuvconstants,
2462 int width) {
2463 uint8_t b0;
2464 uint8_t g0;
2465 uint8_t r0;
2466 uint8_t b1;
2467 uint8_t g1;
2468 uint8_t r1;
2469 int x;
2470 for (x = 0; x < width - 1; x += 2) {
2471 YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
2472 YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1, yuvconstants);
2473 b0 = b0 >> 3;
2474 g0 = g0 >> 2;
2475 r0 = r0 >> 3;
2476 b1 = b1 >> 3;
2477 g1 = g1 >> 2;
2478 r1 = r1 >> 3;
2479 *(uint32_t*)(dst_rgb565) =
2480 b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27);
2481 src_y += 2;
2482 src_uv += 2;
2483 dst_rgb565 += 4; // Advance 2 pixels.
2484 }
2485 if (width & 1) {
2486 YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
2487 b0 = b0 >> 3;
2488 g0 = g0 >> 2;
2489 r0 = r0 >> 3;
2490 *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
2491 }
2492 }
2493
YUY2ToARGBRow_C(const uint8_t * src_yuy2,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2494 void YUY2ToARGBRow_C(const uint8_t* src_yuy2,
2495 uint8_t* rgb_buf,
2496 const struct YuvConstants* yuvconstants,
2497 int width) {
2498 int x;
2499 for (x = 0; x < width - 1; x += 2) {
2500 YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
2501 rgb_buf + 2, yuvconstants);
2502 rgb_buf[3] = 255;
2503 YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3], rgb_buf + 4, rgb_buf + 5,
2504 rgb_buf + 6, yuvconstants);
2505 rgb_buf[7] = 255;
2506 src_yuy2 += 4;
2507 rgb_buf += 8; // Advance 2 pixels.
2508 }
2509 if (width & 1) {
2510 YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
2511 rgb_buf + 2, yuvconstants);
2512 rgb_buf[3] = 255;
2513 }
2514 }
2515
UYVYToARGBRow_C(const uint8_t * src_uyvy,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2516 void UYVYToARGBRow_C(const uint8_t* src_uyvy,
2517 uint8_t* rgb_buf,
2518 const struct YuvConstants* yuvconstants,
2519 int width) {
2520 int x;
2521 for (x = 0; x < width - 1; x += 2) {
2522 YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
2523 rgb_buf + 2, yuvconstants);
2524 rgb_buf[3] = 255;
2525 YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2], rgb_buf + 4, rgb_buf + 5,
2526 rgb_buf + 6, yuvconstants);
2527 rgb_buf[7] = 255;
2528 src_uyvy += 4;
2529 rgb_buf += 8; // Advance 2 pixels.
2530 }
2531 if (width & 1) {
2532 YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
2533 rgb_buf + 2, yuvconstants);
2534 rgb_buf[3] = 255;
2535 }
2536 }
2537
I422ToRGBARow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2538 void I422ToRGBARow_C(const uint8_t* src_y,
2539 const uint8_t* src_u,
2540 const uint8_t* src_v,
2541 uint8_t* rgb_buf,
2542 const struct YuvConstants* yuvconstants,
2543 int width) {
2544 int x;
2545 for (x = 0; x < width - 1; x += 2) {
2546 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
2547 rgb_buf + 3, yuvconstants);
2548 rgb_buf[0] = 255;
2549 YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 5, rgb_buf + 6,
2550 rgb_buf + 7, yuvconstants);
2551 rgb_buf[4] = 255;
2552 src_y += 2;
2553 src_u += 1;
2554 src_v += 1;
2555 rgb_buf += 8; // Advance 2 pixels.
2556 }
2557 if (width & 1) {
2558 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
2559 rgb_buf + 3, yuvconstants);
2560 rgb_buf[0] = 255;
2561 }
2562 }
2563
I400ToARGBRow_C(const uint8_t * src_y,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2564 void I400ToARGBRow_C(const uint8_t* src_y,
2565 uint8_t* rgb_buf,
2566 const struct YuvConstants* yuvconstants,
2567 int width) {
2568 int x;
2569 for (x = 0; x < width - 1; x += 2) {
2570 YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
2571 rgb_buf[3] = 255;
2572 YPixel(src_y[1], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
2573 rgb_buf[7] = 255;
2574 src_y += 2;
2575 rgb_buf += 8; // Advance 2 pixels.
2576 }
2577 if (width & 1) {
2578 YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
2579 rgb_buf[3] = 255;
2580 }
2581 }
2582
MirrorRow_C(const uint8_t * src,uint8_t * dst,int width)2583 void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
2584 int x;
2585 src += width - 1;
2586 for (x = 0; x < width - 1; x += 2) {
2587 dst[x] = src[0];
2588 dst[x + 1] = src[-1];
2589 src -= 2;
2590 }
2591 if (width & 1) {
2592 dst[width - 1] = src[0];
2593 }
2594 }
2595
MirrorUVRow_C(const uint8_t * src_uv,uint8_t * dst_uv,int width)2596 void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_uv, int width) {
2597 int x;
2598 src_uv += (width - 1) << 1;
2599 for (x = 0; x < width; ++x) {
2600 dst_uv[0] = src_uv[0];
2601 dst_uv[1] = src_uv[1];
2602 src_uv -= 2;
2603 dst_uv += 2;
2604 }
2605 }
2606
MirrorSplitUVRow_C(const uint8_t * src_uv,uint8_t * dst_u,uint8_t * dst_v,int width)2607 void MirrorSplitUVRow_C(const uint8_t* src_uv,
2608 uint8_t* dst_u,
2609 uint8_t* dst_v,
2610 int width) {
2611 int x;
2612 src_uv += (width - 1) << 1;
2613 for (x = 0; x < width - 1; x += 2) {
2614 dst_u[x] = src_uv[0];
2615 dst_u[x + 1] = src_uv[-2];
2616 dst_v[x] = src_uv[1];
2617 dst_v[x + 1] = src_uv[-2 + 1];
2618 src_uv -= 4;
2619 }
2620 if (width & 1) {
2621 dst_u[width - 1] = src_uv[0];
2622 dst_v[width - 1] = src_uv[1];
2623 }
2624 }
2625
ARGBMirrorRow_C(const uint8_t * src,uint8_t * dst,int width)2626 void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
2627 int x;
2628 const uint32_t* src32 = (const uint32_t*)(src);
2629 uint32_t* dst32 = (uint32_t*)(dst);
2630 src32 += width - 1;
2631 for (x = 0; x < width - 1; x += 2) {
2632 dst32[x] = src32[0];
2633 dst32[x + 1] = src32[-1];
2634 src32 -= 2;
2635 }
2636 if (width & 1) {
2637 dst32[width - 1] = src32[0];
2638 }
2639 }
2640
RGB24MirrorRow_C(const uint8_t * src_rgb24,uint8_t * dst_rgb24,int width)2641 void RGB24MirrorRow_C(const uint8_t* src_rgb24, uint8_t* dst_rgb24, int width) {
2642 int x;
2643 src_rgb24 += width * 3 - 3;
2644 for (x = 0; x < width; ++x) {
2645 uint8_t b = src_rgb24[0];
2646 uint8_t g = src_rgb24[1];
2647 uint8_t r = src_rgb24[2];
2648 dst_rgb24[0] = b;
2649 dst_rgb24[1] = g;
2650 dst_rgb24[2] = r;
2651 src_rgb24 -= 3;
2652 dst_rgb24 += 3;
2653 }
2654 }
2655
SplitUVRow_C(const uint8_t * src_uv,uint8_t * dst_u,uint8_t * dst_v,int width)2656 void SplitUVRow_C(const uint8_t* src_uv,
2657 uint8_t* dst_u,
2658 uint8_t* dst_v,
2659 int width) {
2660 int x;
2661 for (x = 0; x < width - 1; x += 2) {
2662 dst_u[x] = src_uv[0];
2663 dst_u[x + 1] = src_uv[2];
2664 dst_v[x] = src_uv[1];
2665 dst_v[x + 1] = src_uv[3];
2666 src_uv += 4;
2667 }
2668 if (width & 1) {
2669 dst_u[width - 1] = src_uv[0];
2670 dst_v[width - 1] = src_uv[1];
2671 }
2672 }
2673
MergeUVRow_C(const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_uv,int width)2674 void MergeUVRow_C(const uint8_t* src_u,
2675 const uint8_t* src_v,
2676 uint8_t* dst_uv,
2677 int width) {
2678 int x;
2679 for (x = 0; x < width - 1; x += 2) {
2680 dst_uv[0] = src_u[x];
2681 dst_uv[1] = src_v[x];
2682 dst_uv[2] = src_u[x + 1];
2683 dst_uv[3] = src_v[x + 1];
2684 dst_uv += 4;
2685 }
2686 if (width & 1) {
2687 dst_uv[0] = src_u[width - 1];
2688 dst_uv[1] = src_v[width - 1];
2689 }
2690 }
2691
SplitRGBRow_C(const uint8_t * src_rgb,uint8_t * dst_r,uint8_t * dst_g,uint8_t * dst_b,int width)2692 void SplitRGBRow_C(const uint8_t* src_rgb,
2693 uint8_t* dst_r,
2694 uint8_t* dst_g,
2695 uint8_t* dst_b,
2696 int width) {
2697 int x;
2698 for (x = 0; x < width; ++x) {
2699 dst_r[x] = src_rgb[0];
2700 dst_g[x] = src_rgb[1];
2701 dst_b[x] = src_rgb[2];
2702 src_rgb += 3;
2703 }
2704 }
2705
MergeRGBRow_C(const uint8_t * src_r,const uint8_t * src_g,const uint8_t * src_b,uint8_t * dst_rgb,int width)2706 void MergeRGBRow_C(const uint8_t* src_r,
2707 const uint8_t* src_g,
2708 const uint8_t* src_b,
2709 uint8_t* dst_rgb,
2710 int width) {
2711 int x;
2712 for (x = 0; x < width; ++x) {
2713 dst_rgb[0] = src_r[x];
2714 dst_rgb[1] = src_g[x];
2715 dst_rgb[2] = src_b[x];
2716 dst_rgb += 3;
2717 }
2718 }
2719
SplitARGBRow_C(const uint8_t * src_argb,uint8_t * dst_r,uint8_t * dst_g,uint8_t * dst_b,uint8_t * dst_a,int width)2720 void SplitARGBRow_C(const uint8_t* src_argb,
2721 uint8_t* dst_r,
2722 uint8_t* dst_g,
2723 uint8_t* dst_b,
2724 uint8_t* dst_a,
2725 int width) {
2726 int x;
2727 for (x = 0; x < width; ++x) {
2728 dst_b[x] = src_argb[0];
2729 dst_g[x] = src_argb[1];
2730 dst_r[x] = src_argb[2];
2731 dst_a[x] = src_argb[3];
2732 src_argb += 4;
2733 }
2734 }
2735
MergeARGBRow_C(const uint8_t * src_r,const uint8_t * src_g,const uint8_t * src_b,const uint8_t * src_a,uint8_t * dst_argb,int width)2736 void MergeARGBRow_C(const uint8_t* src_r,
2737 const uint8_t* src_g,
2738 const uint8_t* src_b,
2739 const uint8_t* src_a,
2740 uint8_t* dst_argb,
2741 int width) {
2742 int x;
2743 for (x = 0; x < width; ++x) {
2744 dst_argb[0] = src_b[x];
2745 dst_argb[1] = src_g[x];
2746 dst_argb[2] = src_r[x];
2747 dst_argb[3] = src_a[x];
2748 dst_argb += 4;
2749 }
2750 }
2751
MergeXR30Row_C(const uint16_t * src_r,const uint16_t * src_g,const uint16_t * src_b,uint8_t * dst_ar30,int depth,int width)2752 void MergeXR30Row_C(const uint16_t* src_r,
2753 const uint16_t* src_g,
2754 const uint16_t* src_b,
2755 uint8_t* dst_ar30,
2756 int depth,
2757 int width) {
2758 assert(depth >= 10);
2759 assert(depth <= 16);
2760 int x;
2761 int shift = depth - 10;
2762 uint32_t* dst_ar30_32 = (uint32_t*)dst_ar30;
2763 for (x = 0; x < width; ++x) {
2764 uint32_t r = clamp1023(src_r[x] >> shift);
2765 uint32_t g = clamp1023(src_g[x] >> shift);
2766 uint32_t b = clamp1023(src_b[x] >> shift);
2767 dst_ar30_32[x] = b | (g << 10) | (r << 20) | 0xc0000000;
2768 }
2769 }
2770
MergeAR64Row_C(const uint16_t * src_r,const uint16_t * src_g,const uint16_t * src_b,const uint16_t * src_a,uint16_t * dst_ar64,int depth,int width)2771 void MergeAR64Row_C(const uint16_t* src_r,
2772 const uint16_t* src_g,
2773 const uint16_t* src_b,
2774 const uint16_t* src_a,
2775 uint16_t* dst_ar64,
2776 int depth,
2777 int width) {
2778 assert(depth >= 1);
2779 assert(depth <= 16);
2780 int x;
2781 int shift = 16 - depth;
2782 int max = (1 << depth) - 1;
2783 for (x = 0; x < width; ++x) {
2784 dst_ar64[0] = ClampMax(src_b[x], max) << shift;
2785 dst_ar64[1] = ClampMax(src_g[x], max) << shift;
2786 dst_ar64[2] = ClampMax(src_r[x], max) << shift;
2787 dst_ar64[3] = ClampMax(src_a[x], max) << shift;
2788 dst_ar64 += 4;
2789 }
2790 }
2791
MergeARGB16To8Row_C(const uint16_t * src_r,const uint16_t * src_g,const uint16_t * src_b,const uint16_t * src_a,uint8_t * dst_argb,int depth,int width)2792 void MergeARGB16To8Row_C(const uint16_t* src_r,
2793 const uint16_t* src_g,
2794 const uint16_t* src_b,
2795 const uint16_t* src_a,
2796 uint8_t* dst_argb,
2797 int depth,
2798 int width) {
2799 assert(depth >= 8);
2800 assert(depth <= 16);
2801 int x;
2802 int shift = depth - 8;
2803 for (x = 0; x < width; ++x) {
2804 dst_argb[0] = clamp255(src_b[x] >> shift);
2805 dst_argb[1] = clamp255(src_g[x] >> shift);
2806 dst_argb[2] = clamp255(src_r[x] >> shift);
2807 dst_argb[3] = clamp255(src_a[x] >> shift);
2808 dst_argb += 4;
2809 }
2810 }
2811
MergeXR64Row_C(const uint16_t * src_r,const uint16_t * src_g,const uint16_t * src_b,uint16_t * dst_ar64,int depth,int width)2812 void MergeXR64Row_C(const uint16_t* src_r,
2813 const uint16_t* src_g,
2814 const uint16_t* src_b,
2815 uint16_t* dst_ar64,
2816 int depth,
2817 int width) {
2818 assert(depth >= 1);
2819 assert(depth <= 16);
2820 int x;
2821 int shift = 16 - depth;
2822 int max = (1 << depth) - 1;
2823 for (x = 0; x < width; ++x) {
2824 dst_ar64[0] = ClampMax(src_b[x], max) << shift;
2825 dst_ar64[1] = ClampMax(src_g[x], max) << shift;
2826 dst_ar64[2] = ClampMax(src_r[x], max) << shift;
2827 dst_ar64[3] = 0xffff;
2828 dst_ar64 += 4;
2829 }
2830 }
2831
MergeXRGB16To8Row_C(const uint16_t * src_r,const uint16_t * src_g,const uint16_t * src_b,uint8_t * dst_argb,int depth,int width)2832 void MergeXRGB16To8Row_C(const uint16_t* src_r,
2833 const uint16_t* src_g,
2834 const uint16_t* src_b,
2835 uint8_t* dst_argb,
2836 int depth,
2837 int width) {
2838 assert(depth >= 8);
2839 assert(depth <= 16);
2840 int x;
2841 int shift = depth - 8;
2842 for (x = 0; x < width; ++x) {
2843 dst_argb[0] = clamp255(src_b[x] >> shift);
2844 dst_argb[1] = clamp255(src_g[x] >> shift);
2845 dst_argb[2] = clamp255(src_r[x] >> shift);
2846 dst_argb[3] = 0xff;
2847 dst_argb += 4;
2848 }
2849 }
2850
SplitXRGBRow_C(const uint8_t * src_argb,uint8_t * dst_r,uint8_t * dst_g,uint8_t * dst_b,int width)2851 void SplitXRGBRow_C(const uint8_t* src_argb,
2852 uint8_t* dst_r,
2853 uint8_t* dst_g,
2854 uint8_t* dst_b,
2855 int width) {
2856 int x;
2857 for (x = 0; x < width; ++x) {
2858 dst_b[x] = src_argb[0];
2859 dst_g[x] = src_argb[1];
2860 dst_r[x] = src_argb[2];
2861 src_argb += 4;
2862 }
2863 }
2864
MergeXRGBRow_C(const uint8_t * src_r,const uint8_t * src_g,const uint8_t * src_b,uint8_t * dst_argb,int width)2865 void MergeXRGBRow_C(const uint8_t* src_r,
2866 const uint8_t* src_g,
2867 const uint8_t* src_b,
2868 uint8_t* dst_argb,
2869 int width) {
2870 int x;
2871 for (x = 0; x < width; ++x) {
2872 dst_argb[0] = src_b[x];
2873 dst_argb[1] = src_g[x];
2874 dst_argb[2] = src_r[x];
2875 dst_argb[3] = 255;
2876 dst_argb += 4;
2877 }
2878 }
2879
2880 // Convert lsb formats to msb, depending on sample depth.
MergeUVRow_16_C(const uint16_t * src_u,const uint16_t * src_v,uint16_t * dst_uv,int depth,int width)2881 void MergeUVRow_16_C(const uint16_t* src_u,
2882 const uint16_t* src_v,
2883 uint16_t* dst_uv,
2884 int depth,
2885 int width) {
2886 int shift = 16 - depth;
2887 assert(depth >= 8);
2888 assert(depth <= 16);
2889 int x;
2890 for (x = 0; x < width; ++x) {
2891 dst_uv[0] = src_u[x] << shift;
2892 dst_uv[1] = src_v[x] << shift;
2893 dst_uv += 2;
2894 }
2895 }
2896
2897 // Convert msb formats to lsb, depending on sample depth.
SplitUVRow_16_C(const uint16_t * src_uv,uint16_t * dst_u,uint16_t * dst_v,int depth,int width)2898 void SplitUVRow_16_C(const uint16_t* src_uv,
2899 uint16_t* dst_u,
2900 uint16_t* dst_v,
2901 int depth,
2902 int width) {
2903 int shift = 16 - depth;
2904 int x;
2905 assert(depth >= 8);
2906 assert(depth <= 16);
2907 for (x = 0; x < width; ++x) {
2908 dst_u[x] = src_uv[0] >> shift;
2909 dst_v[x] = src_uv[1] >> shift;
2910 src_uv += 2;
2911 }
2912 }
2913
MultiplyRow_16_C(const uint16_t * src_y,uint16_t * dst_y,int scale,int width)2914 void MultiplyRow_16_C(const uint16_t* src_y,
2915 uint16_t* dst_y,
2916 int scale,
2917 int width) {
2918 int x;
2919 for (x = 0; x < width; ++x) {
2920 dst_y[x] = src_y[x] * scale;
2921 }
2922 }
2923
DivideRow_16_C(const uint16_t * src_y,uint16_t * dst_y,int scale,int width)2924 void DivideRow_16_C(const uint16_t* src_y,
2925 uint16_t* dst_y,
2926 int scale,
2927 int width) {
2928 int x;
2929 for (x = 0; x < width; ++x) {
2930 dst_y[x] = (src_y[x] * scale) >> 16;
2931 }
2932 }
2933
2934 // Use scale to convert lsb formats to msb, depending how many bits there are:
2935 // 32768 = 9 bits
2936 // 16384 = 10 bits
2937 // 4096 = 12 bits
2938 // 256 = 16 bits
Convert16To8Row_C(const uint16_t * src_y,uint8_t * dst_y,int scale,int width)2939 void Convert16To8Row_C(const uint16_t* src_y,
2940 uint8_t* dst_y,
2941 int scale,
2942 int width) {
2943 int x;
2944 assert(scale >= 256);
2945 assert(scale <= 32768);
2946
2947 for (x = 0; x < width; ++x) {
2948 dst_y[x] = clamp255((src_y[x] * scale) >> 16);
2949 }
2950 }
2951
2952 // Use scale to convert lsb formats to msb, depending how many bits there are:
2953 // 1024 = 10 bits
Convert8To16Row_C(const uint8_t * src_y,uint16_t * dst_y,int scale,int width)2954 void Convert8To16Row_C(const uint8_t* src_y,
2955 uint16_t* dst_y,
2956 int scale,
2957 int width) {
2958 int x;
2959 scale *= 0x0101; // replicates the byte.
2960 for (x = 0; x < width; ++x) {
2961 dst_y[x] = (src_y[x] * scale) >> 16;
2962 }
2963 }
2964
CopyRow_C(const uint8_t * src,uint8_t * dst,int count)2965 void CopyRow_C(const uint8_t* src, uint8_t* dst, int count) {
2966 memcpy(dst, src, count);
2967 }
2968
CopyRow_16_C(const uint16_t * src,uint16_t * dst,int count)2969 void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count) {
2970 memcpy(dst, src, count * 2);
2971 }
2972
SetRow_C(uint8_t * dst,uint8_t v8,int width)2973 void SetRow_C(uint8_t* dst, uint8_t v8, int width) {
2974 memset(dst, v8, width);
2975 }
2976
ARGBSetRow_C(uint8_t * dst_argb,uint32_t v32,int width)2977 void ARGBSetRow_C(uint8_t* dst_argb, uint32_t v32, int width) {
2978 int x;
2979 for (x = 0; x < width; ++x) {
2980 memcpy(dst_argb + x * sizeof v32, &v32, sizeof v32);
2981 }
2982 }
2983
2984 // Filter 2 rows of YUY2 UV's (422) into U and V (420).
YUY2ToUVRow_C(const uint8_t * src_yuy2,int src_stride_yuy2,uint8_t * dst_u,uint8_t * dst_v,int width)2985 void YUY2ToUVRow_C(const uint8_t* src_yuy2,
2986 int src_stride_yuy2,
2987 uint8_t* dst_u,
2988 uint8_t* dst_v,
2989 int width) {
2990 // Output a row of UV values, filtering 2 rows of YUY2.
2991 int x;
2992 for (x = 0; x < width; x += 2) {
2993 dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
2994 dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
2995 src_yuy2 += 4;
2996 dst_u += 1;
2997 dst_v += 1;
2998 }
2999 }
3000
3001 // Copy row of YUY2 UV's (422) into U and V (422).
YUY2ToUV422Row_C(const uint8_t * src_yuy2,uint8_t * dst_u,uint8_t * dst_v,int width)3002 void YUY2ToUV422Row_C(const uint8_t* src_yuy2,
3003 uint8_t* dst_u,
3004 uint8_t* dst_v,
3005 int width) {
3006 // Output a row of UV values.
3007 int x;
3008 for (x = 0; x < width; x += 2) {
3009 dst_u[0] = src_yuy2[1];
3010 dst_v[0] = src_yuy2[3];
3011 src_yuy2 += 4;
3012 dst_u += 1;
3013 dst_v += 1;
3014 }
3015 }
3016
3017 // Copy row of YUY2 Y's (422) into Y (420/422).
YUY2ToYRow_C(const uint8_t * src_yuy2,uint8_t * dst_y,int width)3018 void YUY2ToYRow_C(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
3019 // Output a row of Y values.
3020 int x;
3021 for (x = 0; x < width - 1; x += 2) {
3022 dst_y[x] = src_yuy2[0];
3023 dst_y[x + 1] = src_yuy2[2];
3024 src_yuy2 += 4;
3025 }
3026 if (width & 1) {
3027 dst_y[width - 1] = src_yuy2[0];
3028 }
3029 }
3030
3031 // Filter 2 rows of UYVY UV's (422) into U and V (420).
UYVYToUVRow_C(const uint8_t * src_uyvy,int src_stride_uyvy,uint8_t * dst_u,uint8_t * dst_v,int width)3032 void UYVYToUVRow_C(const uint8_t* src_uyvy,
3033 int src_stride_uyvy,
3034 uint8_t* dst_u,
3035 uint8_t* dst_v,
3036 int width) {
3037 // Output a row of UV values.
3038 int x;
3039 for (x = 0; x < width; x += 2) {
3040 dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
3041 dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
3042 src_uyvy += 4;
3043 dst_u += 1;
3044 dst_v += 1;
3045 }
3046 }
3047
3048 // Copy row of UYVY UV's (422) into U and V (422).
UYVYToUV422Row_C(const uint8_t * src_uyvy,uint8_t * dst_u,uint8_t * dst_v,int width)3049 void UYVYToUV422Row_C(const uint8_t* src_uyvy,
3050 uint8_t* dst_u,
3051 uint8_t* dst_v,
3052 int width) {
3053 // Output a row of UV values.
3054 int x;
3055 for (x = 0; x < width; x += 2) {
3056 dst_u[0] = src_uyvy[0];
3057 dst_v[0] = src_uyvy[2];
3058 src_uyvy += 4;
3059 dst_u += 1;
3060 dst_v += 1;
3061 }
3062 }
3063
3064 // Copy row of UYVY Y's (422) into Y (420/422).
UYVYToYRow_C(const uint8_t * src_uyvy,uint8_t * dst_y,int width)3065 void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
3066 // Output a row of Y values.
3067 int x;
3068 for (x = 0; x < width - 1; x += 2) {
3069 dst_y[x] = src_uyvy[1];
3070 dst_y[x + 1] = src_uyvy[3];
3071 src_uyvy += 4;
3072 }
3073 if (width & 1) {
3074 dst_y[width - 1] = src_uyvy[1];
3075 }
3076 }
3077
3078 #define BLEND(f, b, a) clamp255((((256 - a) * b) >> 8) + f)
3079
3080 // Blend src_argb over src_argb1 and store to dst_argb.
3081 // dst_argb may be src_argb or src_argb1.
3082 // This code mimics the SSSE3 version for better testability.
ARGBBlendRow_C(const uint8_t * src_argb,const uint8_t * src_argb1,uint8_t * dst_argb,int width)3083 void ARGBBlendRow_C(const uint8_t* src_argb,
3084 const uint8_t* src_argb1,
3085 uint8_t* dst_argb,
3086 int width) {
3087 int x;
3088 for (x = 0; x < width - 1; x += 2) {
3089 uint32_t fb = src_argb[0];
3090 uint32_t fg = src_argb[1];
3091 uint32_t fr = src_argb[2];
3092 uint32_t a = src_argb[3];
3093 uint32_t bb = src_argb1[0];
3094 uint32_t bg = src_argb1[1];
3095 uint32_t br = src_argb1[2];
3096 dst_argb[0] = BLEND(fb, bb, a);
3097 dst_argb[1] = BLEND(fg, bg, a);
3098 dst_argb[2] = BLEND(fr, br, a);
3099 dst_argb[3] = 255u;
3100
3101 fb = src_argb[4 + 0];
3102 fg = src_argb[4 + 1];
3103 fr = src_argb[4 + 2];
3104 a = src_argb[4 + 3];
3105 bb = src_argb1[4 + 0];
3106 bg = src_argb1[4 + 1];
3107 br = src_argb1[4 + 2];
3108 dst_argb[4 + 0] = BLEND(fb, bb, a);
3109 dst_argb[4 + 1] = BLEND(fg, bg, a);
3110 dst_argb[4 + 2] = BLEND(fr, br, a);
3111 dst_argb[4 + 3] = 255u;
3112 src_argb += 8;
3113 src_argb1 += 8;
3114 dst_argb += 8;
3115 }
3116
3117 if (width & 1) {
3118 uint32_t fb = src_argb[0];
3119 uint32_t fg = src_argb[1];
3120 uint32_t fr = src_argb[2];
3121 uint32_t a = src_argb[3];
3122 uint32_t bb = src_argb1[0];
3123 uint32_t bg = src_argb1[1];
3124 uint32_t br = src_argb1[2];
3125 dst_argb[0] = BLEND(fb, bb, a);
3126 dst_argb[1] = BLEND(fg, bg, a);
3127 dst_argb[2] = BLEND(fr, br, a);
3128 dst_argb[3] = 255u;
3129 }
3130 }
3131 #undef BLEND
3132
3133 #define UBLEND(f, b, a) (((a)*f) + ((255 - a) * b) + 255) >> 8
BlendPlaneRow_C(const uint8_t * src0,const uint8_t * src1,const uint8_t * alpha,uint8_t * dst,int width)3134 void BlendPlaneRow_C(const uint8_t* src0,
3135 const uint8_t* src1,
3136 const uint8_t* alpha,
3137 uint8_t* dst,
3138 int width) {
3139 int x;
3140 for (x = 0; x < width - 1; x += 2) {
3141 dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
3142 dst[1] = UBLEND(src0[1], src1[1], alpha[1]);
3143 src0 += 2;
3144 src1 += 2;
3145 alpha += 2;
3146 dst += 2;
3147 }
3148 if (width & 1) {
3149 dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
3150 }
3151 }
3152 #undef UBLEND
3153
3154 #if defined(__aarch64__) || defined(__arm__)
3155 #define ATTENUATE(f, a) (f * a + 128) >> 8
3156 #else
3157 // This code mimics the SSSE3 version for better testability.
3158 #define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
3159 #endif
3160
3161 // Multiply source RGB by alpha and store to destination.
ARGBAttenuateRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width)3162 void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
3163 int i;
3164 for (i = 0; i < width - 1; i += 2) {
3165 uint32_t b = src_argb[0];
3166 uint32_t g = src_argb[1];
3167 uint32_t r = src_argb[2];
3168 uint32_t a = src_argb[3];
3169 dst_argb[0] = ATTENUATE(b, a);
3170 dst_argb[1] = ATTENUATE(g, a);
3171 dst_argb[2] = ATTENUATE(r, a);
3172 dst_argb[3] = a;
3173 b = src_argb[4];
3174 g = src_argb[5];
3175 r = src_argb[6];
3176 a = src_argb[7];
3177 dst_argb[4] = ATTENUATE(b, a);
3178 dst_argb[5] = ATTENUATE(g, a);
3179 dst_argb[6] = ATTENUATE(r, a);
3180 dst_argb[7] = a;
3181 src_argb += 8;
3182 dst_argb += 8;
3183 }
3184
3185 if (width & 1) {
3186 const uint32_t b = src_argb[0];
3187 const uint32_t g = src_argb[1];
3188 const uint32_t r = src_argb[2];
3189 const uint32_t a = src_argb[3];
3190 dst_argb[0] = ATTENUATE(b, a);
3191 dst_argb[1] = ATTENUATE(g, a);
3192 dst_argb[2] = ATTENUATE(r, a);
3193 dst_argb[3] = a;
3194 }
3195 }
3196 #undef ATTENUATE
3197
3198 // Divide source RGB by alpha and store to destination.
3199 // b = (b * 255 + (a / 2)) / a;
3200 // g = (g * 255 + (a / 2)) / a;
3201 // r = (r * 255 + (a / 2)) / a;
3202 // Reciprocal method is off by 1 on some values. ie 125
3203 // 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.
3204 #define T(a) 0x01000000 + (0x10000 / a)
3205 const uint32_t fixed_invtbl8[256] = {
3206 0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06),
3207 T(0x07), T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d),
3208 T(0x0e), T(0x0f), T(0x10), T(0x11), T(0x12), T(0x13), T(0x14),
3209 T(0x15), T(0x16), T(0x17), T(0x18), T(0x19), T(0x1a), T(0x1b),
3210 T(0x1c), T(0x1d), T(0x1e), T(0x1f), T(0x20), T(0x21), T(0x22),
3211 T(0x23), T(0x24), T(0x25), T(0x26), T(0x27), T(0x28), T(0x29),
3212 T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f), T(0x30),
3213 T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37),
3214 T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e),
3215 T(0x3f), T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45),
3216 T(0x46), T(0x47), T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c),
3217 T(0x4d), T(0x4e), T(0x4f), T(0x50), T(0x51), T(0x52), T(0x53),
3218 T(0x54), T(0x55), T(0x56), T(0x57), T(0x58), T(0x59), T(0x5a),
3219 T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f), T(0x60), T(0x61),
3220 T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67), T(0x68),
3221 T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f),
3222 T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76),
3223 T(0x77), T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d),
3224 T(0x7e), T(0x7f), T(0x80), T(0x81), T(0x82), T(0x83), T(0x84),
3225 T(0x85), T(0x86), T(0x87), T(0x88), T(0x89), T(0x8a), T(0x8b),
3226 T(0x8c), T(0x8d), T(0x8e), T(0x8f), T(0x90), T(0x91), T(0x92),
3227 T(0x93), T(0x94), T(0x95), T(0x96), T(0x97), T(0x98), T(0x99),
3228 T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f), T(0xa0),
3229 T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7),
3230 T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae),
3231 T(0xaf), T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5),
3232 T(0xb6), T(0xb7), T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc),
3233 T(0xbd), T(0xbe), T(0xbf), T(0xc0), T(0xc1), T(0xc2), T(0xc3),
3234 T(0xc4), T(0xc5), T(0xc6), T(0xc7), T(0xc8), T(0xc9), T(0xca),
3235 T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf), T(0xd0), T(0xd1),
3236 T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7), T(0xd8),
3237 T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf),
3238 T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6),
3239 T(0xe7), T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed),
3240 T(0xee), T(0xef), T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4),
3241 T(0xf5), T(0xf6), T(0xf7), T(0xf8), T(0xf9), T(0xfa), T(0xfb),
3242 T(0xfc), T(0xfd), T(0xfe), 0x01000100};
3243 #undef T
3244
ARGBUnattenuateRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width)3245 void ARGBUnattenuateRow_C(const uint8_t* src_argb,
3246 uint8_t* dst_argb,
3247 int width) {
3248 int i;
3249 for (i = 0; i < width; ++i) {
3250 uint32_t b = src_argb[0];
3251 uint32_t g = src_argb[1];
3252 uint32_t r = src_argb[2];
3253 const uint32_t a = src_argb[3];
3254 const uint32_t ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point
3255 b = (b * ia) >> 8;
3256 g = (g * ia) >> 8;
3257 r = (r * ia) >> 8;
3258 // Clamping should not be necessary but is free in assembly.
3259 dst_argb[0] = clamp255(b);
3260 dst_argb[1] = clamp255(g);
3261 dst_argb[2] = clamp255(r);
3262 dst_argb[3] = a;
3263 src_argb += 4;
3264 dst_argb += 4;
3265 }
3266 }
3267
ComputeCumulativeSumRow_C(const uint8_t * row,int32_t * cumsum,const int32_t * previous_cumsum,int width)3268 void ComputeCumulativeSumRow_C(const uint8_t* row,
3269 int32_t* cumsum,
3270 const int32_t* previous_cumsum,
3271 int width) {
3272 int32_t row_sum[4] = {0, 0, 0, 0};
3273 int x;
3274 for (x = 0; x < width; ++x) {
3275 row_sum[0] += row[x * 4 + 0];
3276 row_sum[1] += row[x * 4 + 1];
3277 row_sum[2] += row[x * 4 + 2];
3278 row_sum[3] += row[x * 4 + 3];
3279 cumsum[x * 4 + 0] = row_sum[0] + previous_cumsum[x * 4 + 0];
3280 cumsum[x * 4 + 1] = row_sum[1] + previous_cumsum[x * 4 + 1];
3281 cumsum[x * 4 + 2] = row_sum[2] + previous_cumsum[x * 4 + 2];
3282 cumsum[x * 4 + 3] = row_sum[3] + previous_cumsum[x * 4 + 3];
3283 }
3284 }
3285
CumulativeSumToAverageRow_C(const int32_t * tl,const int32_t * bl,int w,int area,uint8_t * dst,int count)3286 void CumulativeSumToAverageRow_C(const int32_t* tl,
3287 const int32_t* bl,
3288 int w,
3289 int area,
3290 uint8_t* dst,
3291 int count) {
3292 float ooa = 1.0f / area;
3293 int i;
3294 for (i = 0; i < count; ++i) {
3295 dst[0] = (uint8_t)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
3296 dst[1] = (uint8_t)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);
3297 dst[2] = (uint8_t)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa);
3298 dst[3] = (uint8_t)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa);
3299 dst += 4;
3300 tl += 4;
3301 bl += 4;
3302 }
3303 }
3304
3305 // Copy pixels from rotated source to destination row with a slope.
3306 LIBYUV_API
ARGBAffineRow_C(const uint8_t * src_argb,int src_argb_stride,uint8_t * dst_argb,const float * uv_dudv,int width)3307 void ARGBAffineRow_C(const uint8_t* src_argb,
3308 int src_argb_stride,
3309 uint8_t* dst_argb,
3310 const float* uv_dudv,
3311 int width) {
3312 int i;
3313 // Render a row of pixels from source into a buffer.
3314 float uv[2];
3315 uv[0] = uv_dudv[0];
3316 uv[1] = uv_dudv[1];
3317 for (i = 0; i < width; ++i) {
3318 int x = (int)(uv[0]);
3319 int y = (int)(uv[1]);
3320 *(uint32_t*)(dst_argb) =
3321 *(const uint32_t*)(src_argb + y * src_argb_stride + x * 4);
3322 dst_argb += 4;
3323 uv[0] += uv_dudv[2];
3324 uv[1] += uv_dudv[3];
3325 }
3326 }
3327
3328 // Blend 2 rows into 1.
HalfRow_C(const uint8_t * src_uv,ptrdiff_t src_uv_stride,uint8_t * dst_uv,int width)3329 static void HalfRow_C(const uint8_t* src_uv,
3330 ptrdiff_t src_uv_stride,
3331 uint8_t* dst_uv,
3332 int width) {
3333 int x;
3334 for (x = 0; x < width; ++x) {
3335 dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
3336 }
3337 }
3338
HalfRow_16_C(const uint16_t * src_uv,ptrdiff_t src_uv_stride,uint16_t * dst_uv,int width)3339 static void HalfRow_16_C(const uint16_t* src_uv,
3340 ptrdiff_t src_uv_stride,
3341 uint16_t* dst_uv,
3342 int width) {
3343 int x;
3344 for (x = 0; x < width; ++x) {
3345 dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
3346 }
3347 }
3348
3349 // C version 2x2 -> 2x1.
InterpolateRow_C(uint8_t * dst_ptr,const uint8_t * src_ptr,ptrdiff_t src_stride,int width,int source_y_fraction)3350 void InterpolateRow_C(uint8_t* dst_ptr,
3351 const uint8_t* src_ptr,
3352 ptrdiff_t src_stride,
3353 int width,
3354 int source_y_fraction) {
3355 int y1_fraction = source_y_fraction;
3356 int y0_fraction = 256 - y1_fraction;
3357 const uint8_t* src_ptr1 = src_ptr + src_stride;
3358 int x;
3359 if (y1_fraction == 0) {
3360 memcpy(dst_ptr, src_ptr, width);
3361 return;
3362 }
3363 if (y1_fraction == 128) {
3364 HalfRow_C(src_ptr, src_stride, dst_ptr, width);
3365 return;
3366 }
3367 for (x = 0; x < width - 1; x += 2) {
3368 dst_ptr[0] =
3369 (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
3370 dst_ptr[1] =
3371 (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction + 128) >> 8;
3372 src_ptr += 2;
3373 src_ptr1 += 2;
3374 dst_ptr += 2;
3375 }
3376 if (width & 1) {
3377 dst_ptr[0] =
3378 (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
3379 }
3380 }
3381
InterpolateRow_16_C(uint16_t * dst_ptr,const uint16_t * src_ptr,ptrdiff_t src_stride,int width,int source_y_fraction)3382 void InterpolateRow_16_C(uint16_t* dst_ptr,
3383 const uint16_t* src_ptr,
3384 ptrdiff_t src_stride,
3385 int width,
3386 int source_y_fraction) {
3387 int y1_fraction = source_y_fraction;
3388 int y0_fraction = 256 - y1_fraction;
3389 const uint16_t* src_ptr1 = src_ptr + src_stride;
3390 int x;
3391 if (source_y_fraction == 0) {
3392 memcpy(dst_ptr, src_ptr, width * 2);
3393 return;
3394 }
3395 if (source_y_fraction == 128) {
3396 HalfRow_16_C(src_ptr, src_stride, dst_ptr, width);
3397 return;
3398 }
3399 for (x = 0; x < width - 1; x += 2) {
3400 dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
3401 dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
3402 src_ptr += 2;
3403 src_ptr1 += 2;
3404 dst_ptr += 2;
3405 }
3406 if (width & 1) {
3407 dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
3408 }
3409 }
3410
3411 // Use first 4 shuffler values to reorder ARGB channels.
ARGBShuffleRow_C(const uint8_t * src_argb,uint8_t * dst_argb,const uint8_t * shuffler,int width)3412 void ARGBShuffleRow_C(const uint8_t* src_argb,
3413 uint8_t* dst_argb,
3414 const uint8_t* shuffler,
3415 int width) {
3416 int index0 = shuffler[0];
3417 int index1 = shuffler[1];
3418 int index2 = shuffler[2];
3419 int index3 = shuffler[3];
3420 // Shuffle a row of ARGB.
3421 int x;
3422 for (x = 0; x < width; ++x) {
3423 // To support in-place conversion.
3424 uint8_t b = src_argb[index0];
3425 uint8_t g = src_argb[index1];
3426 uint8_t r = src_argb[index2];
3427 uint8_t a = src_argb[index3];
3428 dst_argb[0] = b;
3429 dst_argb[1] = g;
3430 dst_argb[2] = r;
3431 dst_argb[3] = a;
3432 src_argb += 4;
3433 dst_argb += 4;
3434 }
3435 }
3436
I422ToYUY2Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_frame,int width)3437 void I422ToYUY2Row_C(const uint8_t* src_y,
3438 const uint8_t* src_u,
3439 const uint8_t* src_v,
3440 uint8_t* dst_frame,
3441 int width) {
3442 int x;
3443 for (x = 0; x < width - 1; x += 2) {
3444 dst_frame[0] = src_y[0];
3445 dst_frame[1] = src_u[0];
3446 dst_frame[2] = src_y[1];
3447 dst_frame[3] = src_v[0];
3448 dst_frame += 4;
3449 src_y += 2;
3450 src_u += 1;
3451 src_v += 1;
3452 }
3453 if (width & 1) {
3454 dst_frame[0] = src_y[0];
3455 dst_frame[1] = src_u[0];
3456 dst_frame[2] = 0;
3457 dst_frame[3] = src_v[0];
3458 }
3459 }
3460
I422ToUYVYRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_frame,int width)3461 void I422ToUYVYRow_C(const uint8_t* src_y,
3462 const uint8_t* src_u,
3463 const uint8_t* src_v,
3464 uint8_t* dst_frame,
3465 int width) {
3466 int x;
3467 for (x = 0; x < width - 1; x += 2) {
3468 dst_frame[0] = src_u[0];
3469 dst_frame[1] = src_y[0];
3470 dst_frame[2] = src_v[0];
3471 dst_frame[3] = src_y[1];
3472 dst_frame += 4;
3473 src_y += 2;
3474 src_u += 1;
3475 src_v += 1;
3476 }
3477 if (width & 1) {
3478 dst_frame[0] = src_u[0];
3479 dst_frame[1] = src_y[0];
3480 dst_frame[2] = src_v[0];
3481 dst_frame[3] = 0;
3482 }
3483 }
3484
ARGBPolynomialRow_C(const uint8_t * src_argb,uint8_t * dst_argb,const float * poly,int width)3485 void ARGBPolynomialRow_C(const uint8_t* src_argb,
3486 uint8_t* dst_argb,
3487 const float* poly,
3488 int width) {
3489 int i;
3490 for (i = 0; i < width; ++i) {
3491 float b = (float)(src_argb[0]);
3492 float g = (float)(src_argb[1]);
3493 float r = (float)(src_argb[2]);
3494 float a = (float)(src_argb[3]);
3495 float b2 = b * b;
3496 float g2 = g * g;
3497 float r2 = r * r;
3498 float a2 = a * a;
3499 float db = poly[0] + poly[4] * b;
3500 float dg = poly[1] + poly[5] * g;
3501 float dr = poly[2] + poly[6] * r;
3502 float da = poly[3] + poly[7] * a;
3503 float b3 = b2 * b;
3504 float g3 = g2 * g;
3505 float r3 = r2 * r;
3506 float a3 = a2 * a;
3507 db += poly[8] * b2;
3508 dg += poly[9] * g2;
3509 dr += poly[10] * r2;
3510 da += poly[11] * a2;
3511 db += poly[12] * b3;
3512 dg += poly[13] * g3;
3513 dr += poly[14] * r3;
3514 da += poly[15] * a3;
3515
3516 dst_argb[0] = Clamp((int32_t)(db));
3517 dst_argb[1] = Clamp((int32_t)(dg));
3518 dst_argb[2] = Clamp((int32_t)(dr));
3519 dst_argb[3] = Clamp((int32_t)(da));
3520 src_argb += 4;
3521 dst_argb += 4;
3522 }
3523 }
3524
3525 // Samples assumed to be unsigned in low 9, 10 or 12 bits. Scale factor
3526 // adjust the source integer range to the half float range desired.
3527
3528 // This magic constant is 2^-112. Multiplying by this
3529 // is the same as subtracting 112 from the exponent, which
3530 // is the difference in exponent bias between 32-bit and
3531 // 16-bit floats. Once we've done this subtraction, we can
3532 // simply extract the low bits of the exponent and the high
3533 // bits of the mantissa from our float and we're done.
3534
3535 // Work around GCC 7 punning warning -Wstrict-aliasing
3536 #if defined(__GNUC__)
3537 typedef uint32_t __attribute__((__may_alias__)) uint32_alias_t;
3538 #else
3539 typedef uint32_t uint32_alias_t;
3540 #endif
3541
HalfFloatRow_C(const uint16_t * src,uint16_t * dst,float scale,int width)3542 void HalfFloatRow_C(const uint16_t* src,
3543 uint16_t* dst,
3544 float scale,
3545 int width) {
3546 int i;
3547 float mult = 1.9259299444e-34f * scale;
3548 for (i = 0; i < width; ++i) {
3549 float value = src[i] * mult;
3550 dst[i] = (uint16_t)((*(const uint32_alias_t*)&value) >> 13);
3551 }
3552 }
3553
ByteToFloatRow_C(const uint8_t * src,float * dst,float scale,int width)3554 void ByteToFloatRow_C(const uint8_t* src, float* dst, float scale, int width) {
3555 int i;
3556 for (i = 0; i < width; ++i) {
3557 float value = src[i] * scale;
3558 dst[i] = value;
3559 }
3560 }
3561
ARGBLumaColorTableRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width,const uint8_t * luma,uint32_t lumacoeff)3562 void ARGBLumaColorTableRow_C(const uint8_t* src_argb,
3563 uint8_t* dst_argb,
3564 int width,
3565 const uint8_t* luma,
3566 uint32_t lumacoeff) {
3567 uint32_t bc = lumacoeff & 0xff;
3568 uint32_t gc = (lumacoeff >> 8) & 0xff;
3569 uint32_t rc = (lumacoeff >> 16) & 0xff;
3570
3571 int i;
3572 for (i = 0; i < width - 1; i += 2) {
3573 // Luminance in rows, color values in columns.
3574 const uint8_t* luma0 =
3575 ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) +
3576 luma;
3577 const uint8_t* luma1;
3578 dst_argb[0] = luma0[src_argb[0]];
3579 dst_argb[1] = luma0[src_argb[1]];
3580 dst_argb[2] = luma0[src_argb[2]];
3581 dst_argb[3] = src_argb[3];
3582 luma1 =
3583 ((src_argb[4] * bc + src_argb[5] * gc + src_argb[6] * rc) & 0x7F00u) +
3584 luma;
3585 dst_argb[4] = luma1[src_argb[4]];
3586 dst_argb[5] = luma1[src_argb[5]];
3587 dst_argb[6] = luma1[src_argb[6]];
3588 dst_argb[7] = src_argb[7];
3589 src_argb += 8;
3590 dst_argb += 8;
3591 }
3592 if (width & 1) {
3593 // Luminance in rows, color values in columns.
3594 const uint8_t* luma0 =
3595 ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) +
3596 luma;
3597 dst_argb[0] = luma0[src_argb[0]];
3598 dst_argb[1] = luma0[src_argb[1]];
3599 dst_argb[2] = luma0[src_argb[2]];
3600 dst_argb[3] = src_argb[3];
3601 }
3602 }
3603
ARGBCopyAlphaRow_C(const uint8_t * src,uint8_t * dst,int width)3604 void ARGBCopyAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) {
3605 int i;
3606 for (i = 0; i < width - 1; i += 2) {
3607 dst[3] = src[3];
3608 dst[7] = src[7];
3609 dst += 8;
3610 src += 8;
3611 }
3612 if (width & 1) {
3613 dst[3] = src[3];
3614 }
3615 }
3616
ARGBExtractAlphaRow_C(const uint8_t * src_argb,uint8_t * dst_a,int width)3617 void ARGBExtractAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_a, int width) {
3618 int i;
3619 for (i = 0; i < width - 1; i += 2) {
3620 dst_a[0] = src_argb[3];
3621 dst_a[1] = src_argb[7];
3622 dst_a += 2;
3623 src_argb += 8;
3624 }
3625 if (width & 1) {
3626 dst_a[0] = src_argb[3];
3627 }
3628 }
3629
ARGBCopyYToAlphaRow_C(const uint8_t * src,uint8_t * dst,int width)3630 void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) {
3631 int i;
3632 for (i = 0; i < width - 1; i += 2) {
3633 dst[3] = src[0];
3634 dst[7] = src[1];
3635 dst += 8;
3636 src += 2;
3637 }
3638 if (width & 1) {
3639 dst[3] = src[0];
3640 }
3641 }
3642
3643 // Maximum temporary width for wrappers to process at a time, in pixels.
3644 #define MAXTWIDTH 2048
3645
3646 #if !(defined(_MSC_VER) && !defined(__clang__) && defined(_M_IX86)) && \
3647 defined(HAS_I422TORGB565ROW_SSSE3)
3648 // row_win.cc has asm version, but GCC uses 2 step wrapper.
I422ToRGB565Row_SSSE3(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)3649 void I422ToRGB565Row_SSSE3(const uint8_t* src_y,
3650 const uint8_t* src_u,
3651 const uint8_t* src_v,
3652 uint8_t* dst_rgb565,
3653 const struct YuvConstants* yuvconstants,
3654 int width) {
3655 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3656 while (width > 0) {
3657 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3658 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
3659 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
3660 src_y += twidth;
3661 src_u += twidth / 2;
3662 src_v += twidth / 2;
3663 dst_rgb565 += twidth * 2;
3664 width -= twidth;
3665 }
3666 }
3667 #endif
3668
3669 #if defined(HAS_I422TOARGB1555ROW_SSSE3)
I422ToARGB1555Row_SSSE3(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb1555,const struct YuvConstants * yuvconstants,int width)3670 void I422ToARGB1555Row_SSSE3(const uint8_t* src_y,
3671 const uint8_t* src_u,
3672 const uint8_t* src_v,
3673 uint8_t* dst_argb1555,
3674 const struct YuvConstants* yuvconstants,
3675 int width) {
3676 // Row buffer for intermediate ARGB pixels.
3677 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3678 while (width > 0) {
3679 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3680 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
3681 ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
3682 src_y += twidth;
3683 src_u += twidth / 2;
3684 src_v += twidth / 2;
3685 dst_argb1555 += twidth * 2;
3686 width -= twidth;
3687 }
3688 }
3689 #endif
3690
3691 #if defined(HAS_I422TOARGB4444ROW_SSSE3)
I422ToARGB4444Row_SSSE3(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb4444,const struct YuvConstants * yuvconstants,int width)3692 void I422ToARGB4444Row_SSSE3(const uint8_t* src_y,
3693 const uint8_t* src_u,
3694 const uint8_t* src_v,
3695 uint8_t* dst_argb4444,
3696 const struct YuvConstants* yuvconstants,
3697 int width) {
3698 // Row buffer for intermediate ARGB pixels.
3699 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3700 while (width > 0) {
3701 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3702 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
3703 ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
3704 src_y += twidth;
3705 src_u += twidth / 2;
3706 src_v += twidth / 2;
3707 dst_argb4444 += twidth * 2;
3708 width -= twidth;
3709 }
3710 }
3711 #endif
3712
3713 #if defined(HAS_NV12TORGB565ROW_SSSE3)
NV12ToRGB565Row_SSSE3(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)3714 void NV12ToRGB565Row_SSSE3(const uint8_t* src_y,
3715 const uint8_t* src_uv,
3716 uint8_t* dst_rgb565,
3717 const struct YuvConstants* yuvconstants,
3718 int width) {
3719 // Row buffer for intermediate ARGB pixels.
3720 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3721 while (width > 0) {
3722 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3723 NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
3724 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
3725 src_y += twidth;
3726 src_uv += twidth;
3727 dst_rgb565 += twidth * 2;
3728 width -= twidth;
3729 }
3730 }
3731 #endif
3732
3733 #if defined(HAS_NV12TORGB24ROW_SSSE3)
NV12ToRGB24Row_SSSE3(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)3734 void NV12ToRGB24Row_SSSE3(const uint8_t* src_y,
3735 const uint8_t* src_uv,
3736 uint8_t* dst_rgb24,
3737 const struct YuvConstants* yuvconstants,
3738 int width) {
3739 // Row buffer for intermediate ARGB pixels.
3740 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3741 while (width > 0) {
3742 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3743 NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
3744 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
3745 src_y += twidth;
3746 src_uv += twidth;
3747 dst_rgb24 += twidth * 3;
3748 width -= twidth;
3749 }
3750 }
3751 #endif
3752
3753 #if defined(HAS_NV21TORGB24ROW_SSSE3)
NV21ToRGB24Row_SSSE3(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)3754 void NV21ToRGB24Row_SSSE3(const uint8_t* src_y,
3755 const uint8_t* src_vu,
3756 uint8_t* dst_rgb24,
3757 const struct YuvConstants* yuvconstants,
3758 int width) {
3759 // Row buffer for intermediate ARGB pixels.
3760 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3761 while (width > 0) {
3762 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3763 NV21ToARGBRow_SSSE3(src_y, src_vu, row, yuvconstants, twidth);
3764 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
3765 src_y += twidth;
3766 src_vu += twidth;
3767 dst_rgb24 += twidth * 3;
3768 width -= twidth;
3769 }
3770 }
3771 #endif
3772
3773 #if defined(HAS_NV12TORGB24ROW_AVX2)
NV12ToRGB24Row_AVX2(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)3774 void NV12ToRGB24Row_AVX2(const uint8_t* src_y,
3775 const uint8_t* src_uv,
3776 uint8_t* dst_rgb24,
3777 const struct YuvConstants* yuvconstants,
3778 int width) {
3779 // Row buffer for intermediate ARGB pixels.
3780 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3781 while (width > 0) {
3782 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3783 NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
3784 #if defined(HAS_ARGBTORGB24ROW_AVX2)
3785 ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
3786 #else
3787 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
3788 #endif
3789 src_y += twidth;
3790 src_uv += twidth;
3791 dst_rgb24 += twidth * 3;
3792 width -= twidth;
3793 }
3794 }
3795 #endif
3796
3797 #if defined(HAS_NV21TORGB24ROW_AVX2)
NV21ToRGB24Row_AVX2(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)3798 void NV21ToRGB24Row_AVX2(const uint8_t* src_y,
3799 const uint8_t* src_vu,
3800 uint8_t* dst_rgb24,
3801 const struct YuvConstants* yuvconstants,
3802 int width) {
3803 // Row buffer for intermediate ARGB pixels.
3804 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3805 while (width > 0) {
3806 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3807 NV21ToARGBRow_AVX2(src_y, src_vu, row, yuvconstants, twidth);
3808 #if defined(HAS_ARGBTORGB24ROW_AVX2)
3809 ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
3810 #else
3811 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
3812 #endif
3813 src_y += twidth;
3814 src_vu += twidth;
3815 dst_rgb24 += twidth * 3;
3816 width -= twidth;
3817 }
3818 }
3819 #endif
3820
3821 #if defined(HAS_I422TORGB565ROW_AVX2)
I422ToRGB565Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)3822 void I422ToRGB565Row_AVX2(const uint8_t* src_y,
3823 const uint8_t* src_u,
3824 const uint8_t* src_v,
3825 uint8_t* dst_rgb565,
3826 const struct YuvConstants* yuvconstants,
3827 int width) {
3828 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3829 while (width > 0) {
3830 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3831 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
3832 #if defined(HAS_ARGBTORGB565ROW_AVX2)
3833 ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
3834 #else
3835 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
3836 #endif
3837 src_y += twidth;
3838 src_u += twidth / 2;
3839 src_v += twidth / 2;
3840 dst_rgb565 += twidth * 2;
3841 width -= twidth;
3842 }
3843 }
3844 #endif
3845
3846 #if defined(HAS_I422TOARGB1555ROW_AVX2)
I422ToARGB1555Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb1555,const struct YuvConstants * yuvconstants,int width)3847 void I422ToARGB1555Row_AVX2(const uint8_t* src_y,
3848 const uint8_t* src_u,
3849 const uint8_t* src_v,
3850 uint8_t* dst_argb1555,
3851 const struct YuvConstants* yuvconstants,
3852 int width) {
3853 // Row buffer for intermediate ARGB pixels.
3854 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3855 while (width > 0) {
3856 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3857 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
3858 #if defined(HAS_ARGBTOARGB1555ROW_AVX2)
3859 ARGBToARGB1555Row_AVX2(row, dst_argb1555, twidth);
3860 #else
3861 ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
3862 #endif
3863 src_y += twidth;
3864 src_u += twidth / 2;
3865 src_v += twidth / 2;
3866 dst_argb1555 += twidth * 2;
3867 width -= twidth;
3868 }
3869 }
3870 #endif
3871
3872 #if defined(HAS_I422TOARGB4444ROW_AVX2)
I422ToARGB4444Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb4444,const struct YuvConstants * yuvconstants,int width)3873 void I422ToARGB4444Row_AVX2(const uint8_t* src_y,
3874 const uint8_t* src_u,
3875 const uint8_t* src_v,
3876 uint8_t* dst_argb4444,
3877 const struct YuvConstants* yuvconstants,
3878 int width) {
3879 // Row buffer for intermediate ARGB pixels.
3880 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3881 while (width > 0) {
3882 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3883 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
3884 #if defined(HAS_ARGBTOARGB4444ROW_AVX2)
3885 ARGBToARGB4444Row_AVX2(row, dst_argb4444, twidth);
3886 #else
3887 ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
3888 #endif
3889 src_y += twidth;
3890 src_u += twidth / 2;
3891 src_v += twidth / 2;
3892 dst_argb4444 += twidth * 2;
3893 width -= twidth;
3894 }
3895 }
3896 #endif
3897
3898 #if defined(HAS_I422TORGB24ROW_AVX2)
I422ToRGB24Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)3899 void I422ToRGB24Row_AVX2(const uint8_t* src_y,
3900 const uint8_t* src_u,
3901 const uint8_t* src_v,
3902 uint8_t* dst_rgb24,
3903 const struct YuvConstants* yuvconstants,
3904 int width) {
3905 // Row buffer for intermediate ARGB pixels.
3906 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3907 while (width > 0) {
3908 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3909 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
3910 #if defined(HAS_ARGBTORGB24ROW_AVX2)
3911 ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
3912 #else
3913 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
3914 #endif
3915 src_y += twidth;
3916 src_u += twidth / 2;
3917 src_v += twidth / 2;
3918 dst_rgb24 += twidth * 3;
3919 width -= twidth;
3920 }
3921 }
3922 #endif
3923
3924 #if defined(HAS_NV12TORGB565ROW_AVX2)
NV12ToRGB565Row_AVX2(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)3925 void NV12ToRGB565Row_AVX2(const uint8_t* src_y,
3926 const uint8_t* src_uv,
3927 uint8_t* dst_rgb565,
3928 const struct YuvConstants* yuvconstants,
3929 int width) {
3930 // Row buffer for intermediate ARGB pixels.
3931 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3932 while (width > 0) {
3933 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3934 NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
3935 #if defined(HAS_ARGBTORGB565ROW_AVX2)
3936 ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
3937 #else
3938 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
3939 #endif
3940 src_y += twidth;
3941 src_uv += twidth;
3942 dst_rgb565 += twidth * 2;
3943 width -= twidth;
3944 }
3945 }
3946 #endif
3947
3948 #ifdef HAS_RGB24TOYJROW_AVX2
3949 // Convert 16 RGB24 pixels (64 bytes) to 16 YJ values.
RGB24ToYJRow_AVX2(const uint8_t * src_rgb24,uint8_t * dst_yj,int width)3950 void RGB24ToYJRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
3951 // Row buffer for intermediate ARGB pixels.
3952 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3953 while (width > 0) {
3954 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3955 RGB24ToARGBRow_SSSE3(src_rgb24, row, twidth);
3956 ARGBToYJRow_AVX2(row, dst_yj, twidth);
3957 src_rgb24 += twidth * 3;
3958 dst_yj += twidth;
3959 width -= twidth;
3960 }
3961 }
3962 #endif // HAS_RGB24TOYJROW_AVX2
3963
3964 #ifdef HAS_RAWTOYJROW_AVX2
3965 // Convert 16 RAW pixels (64 bytes) to 16 YJ values.
RAWToYJRow_AVX2(const uint8_t * src_raw,uint8_t * dst_yj,int width)3966 void RAWToYJRow_AVX2(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
3967 // Row buffer for intermediate ARGB pixels.
3968 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3969 while (width > 0) {
3970 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3971 RAWToARGBRow_SSSE3(src_raw, row, twidth);
3972 ARGBToYJRow_AVX2(row, dst_yj, twidth);
3973 src_raw += twidth * 3;
3974 dst_yj += twidth;
3975 width -= twidth;
3976 }
3977 }
3978 #endif // HAS_RAWTOYJROW_AVX2
3979
3980 #ifdef HAS_RGB24TOYJROW_SSSE3
3981 // Convert 16 RGB24 pixels (64 bytes) to 16 YJ values.
RGB24ToYJRow_SSSE3(const uint8_t * src_rgb24,uint8_t * dst_yj,int width)3982 void RGB24ToYJRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
3983 // Row buffer for intermediate ARGB pixels.
3984 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3985 while (width > 0) {
3986 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3987 RGB24ToARGBRow_SSSE3(src_rgb24, row, twidth);
3988 ARGBToYJRow_SSSE3(row, dst_yj, twidth);
3989 src_rgb24 += twidth * 3;
3990 dst_yj += twidth;
3991 width -= twidth;
3992 }
3993 }
3994 #endif // HAS_RGB24TOYJROW_SSSE3
3995
3996 #ifdef HAS_RAWTOYJROW_SSSE3
3997 // Convert 16 RAW pixels (64 bytes) to 16 YJ values.
RAWToYJRow_SSSE3(const uint8_t * src_raw,uint8_t * dst_yj,int width)3998 void RAWToYJRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
3999 // Row buffer for intermediate ARGB pixels.
4000 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4001 while (width > 0) {
4002 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4003 RAWToARGBRow_SSSE3(src_raw, row, twidth);
4004 ARGBToYJRow_SSSE3(row, dst_yj, twidth);
4005 src_raw += twidth * 3;
4006 dst_yj += twidth;
4007 width -= twidth;
4008 }
4009 }
4010 #endif // HAS_RAWTOYJROW_SSSE3
4011
ScaleSumSamples_C(const float * src,float * dst,float scale,int width)4012 float ScaleSumSamples_C(const float* src, float* dst, float scale, int width) {
4013 float fsum = 0.f;
4014 int i;
4015 for (i = 0; i < width; ++i) {
4016 float v = *src++;
4017 fsum += v * v;
4018 *dst++ = v * scale;
4019 }
4020 return fsum;
4021 }
4022
ScaleMaxSamples_C(const float * src,float * dst,float scale,int width)4023 float ScaleMaxSamples_C(const float* src, float* dst, float scale, int width) {
4024 float fmax = 0.f;
4025 int i;
4026 for (i = 0; i < width; ++i) {
4027 float v = *src++;
4028 float vs = v * scale;
4029 fmax = (v > fmax) ? v : fmax;
4030 *dst++ = vs;
4031 }
4032 return fmax;
4033 }
4034
ScaleSamples_C(const float * src,float * dst,float scale,int width)4035 void ScaleSamples_C(const float* src, float* dst, float scale, int width) {
4036 int i;
4037 for (i = 0; i < width; ++i) {
4038 *dst++ = *src++ * scale;
4039 }
4040 }
4041
GaussRow_C(const uint32_t * src,uint16_t * dst,int width)4042 void GaussRow_C(const uint32_t* src, uint16_t* dst, int width) {
4043 int i;
4044 for (i = 0; i < width; ++i) {
4045 *dst++ =
4046 (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4] + 128) >> 8;
4047 ++src;
4048 }
4049 }
4050
4051 // filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
GaussCol_C(const uint16_t * src0,const uint16_t * src1,const uint16_t * src2,const uint16_t * src3,const uint16_t * src4,uint32_t * dst,int width)4052 void GaussCol_C(const uint16_t* src0,
4053 const uint16_t* src1,
4054 const uint16_t* src2,
4055 const uint16_t* src3,
4056 const uint16_t* src4,
4057 uint32_t* dst,
4058 int width) {
4059 int i;
4060 for (i = 0; i < width; ++i) {
4061 *dst++ = *src0++ + *src1++ * 4 + *src2++ * 6 + *src3++ * 4 + *src4++;
4062 }
4063 }
4064
GaussRow_F32_C(const float * src,float * dst,int width)4065 void GaussRow_F32_C(const float* src, float* dst, int width) {
4066 int i;
4067 for (i = 0; i < width; ++i) {
4068 *dst++ = (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4]) *
4069 (1.0f / 256.0f);
4070 ++src;
4071 }
4072 }
4073
4074 // filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
GaussCol_F32_C(const float * src0,const float * src1,const float * src2,const float * src3,const float * src4,float * dst,int width)4075 void GaussCol_F32_C(const float* src0,
4076 const float* src1,
4077 const float* src2,
4078 const float* src3,
4079 const float* src4,
4080 float* dst,
4081 int width) {
4082 int i;
4083 for (i = 0; i < width; ++i) {
4084 *dst++ = *src0++ + *src1++ * 4 + *src2++ * 6 + *src3++ * 4 + *src4++;
4085 }
4086 }
4087
4088 // Convert biplanar NV21 to packed YUV24
NV21ToYUV24Row_C(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * dst_yuv24,int width)4089 void NV21ToYUV24Row_C(const uint8_t* src_y,
4090 const uint8_t* src_vu,
4091 uint8_t* dst_yuv24,
4092 int width) {
4093 int x;
4094 for (x = 0; x < width - 1; x += 2) {
4095 dst_yuv24[0] = src_vu[0]; // V
4096 dst_yuv24[1] = src_vu[1]; // U
4097 dst_yuv24[2] = src_y[0]; // Y0
4098 dst_yuv24[3] = src_vu[0]; // V
4099 dst_yuv24[4] = src_vu[1]; // U
4100 dst_yuv24[5] = src_y[1]; // Y1
4101 src_y += 2;
4102 src_vu += 2;
4103 dst_yuv24 += 6; // Advance 2 pixels.
4104 }
4105 if (width & 1) {
4106 dst_yuv24[0] = src_vu[0]; // V
4107 dst_yuv24[1] = src_vu[1]; // U
4108 dst_yuv24[2] = src_y[0]; // Y0
4109 }
4110 }
4111
4112 // Filter 2 rows of AYUV UV's (444) into UV (420).
4113 // AYUV is VUYA in memory. UV for NV12 is UV order in memory.
AYUVToUVRow_C(const uint8_t * src_ayuv,int src_stride_ayuv,uint8_t * dst_uv,int width)4114 void AYUVToUVRow_C(const uint8_t* src_ayuv,
4115 int src_stride_ayuv,
4116 uint8_t* dst_uv,
4117 int width) {
4118 // Output a row of UV values, filtering 2x2 rows of AYUV.
4119 int x;
4120 for (x = 0; x < width - 1; x += 2) {
4121 dst_uv[0] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] +
4122 src_ayuv[src_stride_ayuv + 5] + 2) >>
4123 2;
4124 dst_uv[1] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] +
4125 src_ayuv[src_stride_ayuv + 4] + 2) >>
4126 2;
4127 src_ayuv += 8;
4128 dst_uv += 2;
4129 }
4130 if (width & 1) {
4131 dst_uv[0] = (src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] + 1) >> 1;
4132 dst_uv[1] = (src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] + 1) >> 1;
4133 }
4134 }
4135
4136 // Filter 2 rows of AYUV UV's (444) into VU (420).
AYUVToVURow_C(const uint8_t * src_ayuv,int src_stride_ayuv,uint8_t * dst_vu,int width)4137 void AYUVToVURow_C(const uint8_t* src_ayuv,
4138 int src_stride_ayuv,
4139 uint8_t* dst_vu,
4140 int width) {
4141 // Output a row of VU values, filtering 2x2 rows of AYUV.
4142 int x;
4143 for (x = 0; x < width - 1; x += 2) {
4144 dst_vu[0] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] +
4145 src_ayuv[src_stride_ayuv + 4] + 2) >>
4146 2;
4147 dst_vu[1] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] +
4148 src_ayuv[src_stride_ayuv + 5] + 2) >>
4149 2;
4150 src_ayuv += 8;
4151 dst_vu += 2;
4152 }
4153 if (width & 1) {
4154 dst_vu[0] = (src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] + 1) >> 1;
4155 dst_vu[1] = (src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] + 1) >> 1;
4156 }
4157 }
4158
4159 // Copy row of AYUV Y's into Y
AYUVToYRow_C(const uint8_t * src_ayuv,uint8_t * dst_y,int width)4160 void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
4161 // Output a row of Y values.
4162 int x;
4163 for (x = 0; x < width; ++x) {
4164 dst_y[x] = src_ayuv[2]; // v,u,y,a
4165 src_ayuv += 4;
4166 }
4167 }
4168
4169 // Convert UV plane of NV12 to VU of NV21.
SwapUVRow_C(const uint8_t * src_uv,uint8_t * dst_vu,int width)4170 void SwapUVRow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
4171 int x;
4172 for (x = 0; x < width; ++x) {
4173 uint8_t u = src_uv[0];
4174 uint8_t v = src_uv[1];
4175 dst_vu[0] = v;
4176 dst_vu[1] = u;
4177 src_uv += 2;
4178 dst_vu += 2;
4179 }
4180 }
4181
HalfMergeUVRow_C(const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_uv,int width)4182 void HalfMergeUVRow_C(const uint8_t* src_u,
4183 int src_stride_u,
4184 const uint8_t* src_v,
4185 int src_stride_v,
4186 uint8_t* dst_uv,
4187 int width) {
4188 int x;
4189 for (x = 0; x < width - 1; x += 2) {
4190 dst_uv[0] = (src_u[0] + src_u[1] + src_u[src_stride_u] +
4191 src_u[src_stride_u + 1] + 2) >>
4192 2;
4193 dst_uv[1] = (src_v[0] + src_v[1] + src_v[src_stride_v] +
4194 src_v[src_stride_v + 1] + 2) >>
4195 2;
4196 src_u += 2;
4197 src_v += 2;
4198 dst_uv += 2;
4199 }
4200 if (width & 1) {
4201 dst_uv[0] = (src_u[0] + src_u[src_stride_u] + 1) >> 1;
4202 dst_uv[1] = (src_v[0] + src_v[src_stride_v] + 1) >> 1;
4203 }
4204 }
4205
4206 #ifdef __cplusplus
4207 } // extern "C"
4208 } // namespace libyuv
4209 #endif
4210