1 /*
2 * Copyright (C) 2001-2012 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <math.h>
22 #include <stdint.h>
23 #include <stdio.h>
24 #include <string.h>
25
26 #include "libavutil/attributes.h"
27 #include "libavutil/avutil.h"
28 #include "libavutil/avassert.h"
29 #include "libavutil/bswap.h"
30 #include "libavutil/cpu.h"
31 #include "libavutil/intreadwrite.h"
32 #include "libavutil/mathematics.h"
33 #include "libavutil/pixdesc.h"
34 #include "config.h"
35 #include "rgb2rgb.h"
36 #include "swscale.h"
37 #include "swscale_internal.h"
38
39 DECLARE_ALIGNED(8, const uint8_t, ff_dither_2x2_4)[][8] = {
40 { 1, 3, 1, 3, 1, 3, 1, 3, },
41 { 2, 0, 2, 0, 2, 0, 2, 0, },
42 { 1, 3, 1, 3, 1, 3, 1, 3, },
43 };
44
45 DECLARE_ALIGNED(8, const uint8_t, ff_dither_2x2_8)[][8] = {
46 { 6, 2, 6, 2, 6, 2, 6, 2, },
47 { 0, 4, 0, 4, 0, 4, 0, 4, },
48 { 6, 2, 6, 2, 6, 2, 6, 2, },
49 };
50
51 DECLARE_ALIGNED(8, const uint8_t, ff_dither_4x4_16)[][8] = {
52 { 8, 4, 11, 7, 8, 4, 11, 7, },
53 { 2, 14, 1, 13, 2, 14, 1, 13, },
54 { 10, 6, 9, 5, 10, 6, 9, 5, },
55 { 0, 12, 3, 15, 0, 12, 3, 15, },
56 { 8, 4, 11, 7, 8, 4, 11, 7, },
57 };
58
59 DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_32)[][8] = {
60 { 17, 9, 23, 15, 16, 8, 22, 14, },
61 { 5, 29, 3, 27, 4, 28, 2, 26, },
62 { 21, 13, 19, 11, 20, 12, 18, 10, },
63 { 0, 24, 6, 30, 1, 25, 7, 31, },
64 { 16, 8, 22, 14, 17, 9, 23, 15, },
65 { 4, 28, 2, 26, 5, 29, 3, 27, },
66 { 20, 12, 18, 10, 21, 13, 19, 11, },
67 { 1, 25, 7, 31, 0, 24, 6, 30, },
68 { 17, 9, 23, 15, 16, 8, 22, 14, },
69 };
70
71 DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_73)[][8] = {
72 { 0, 55, 14, 68, 3, 58, 17, 72, },
73 { 37, 18, 50, 32, 40, 22, 54, 35, },
74 { 9, 64, 5, 59, 13, 67, 8, 63, },
75 { 46, 27, 41, 23, 49, 31, 44, 26, },
76 { 2, 57, 16, 71, 1, 56, 15, 70, },
77 { 39, 21, 52, 34, 38, 19, 51, 33, },
78 { 11, 66, 7, 62, 10, 65, 6, 60, },
79 { 48, 30, 43, 25, 47, 29, 42, 24, },
80 { 0, 55, 14, 68, 3, 58, 17, 72, },
81 };
82
83 #if 1
84 DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = {
85 {117, 62, 158, 103, 113, 58, 155, 100, },
86 { 34, 199, 21, 186, 31, 196, 17, 182, },
87 {144, 89, 131, 76, 141, 86, 127, 72, },
88 { 0, 165, 41, 206, 10, 175, 52, 217, },
89 {110, 55, 151, 96, 120, 65, 162, 107, },
90 { 28, 193, 14, 179, 38, 203, 24, 189, },
91 {138, 83, 124, 69, 148, 93, 134, 79, },
92 { 7, 172, 48, 213, 3, 168, 45, 210, },
93 {117, 62, 158, 103, 113, 58, 155, 100, },
94 };
95 #elif 1
96 // tries to correct a gamma of 1.5
97 DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = {
98 { 0, 143, 18, 200, 2, 156, 25, 215, },
99 { 78, 28, 125, 64, 89, 36, 138, 74, },
100 { 10, 180, 3, 161, 16, 195, 8, 175, },
101 {109, 51, 93, 38, 121, 60, 105, 47, },
102 { 1, 152, 23, 210, 0, 147, 20, 205, },
103 { 85, 33, 134, 71, 81, 30, 130, 67, },
104 { 14, 190, 6, 171, 12, 185, 5, 166, },
105 {117, 57, 101, 44, 113, 54, 97, 41, },
106 { 0, 143, 18, 200, 2, 156, 25, 215, },
107 };
108 #elif 1
109 // tries to correct a gamma of 2.0
110 DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = {
111 { 0, 124, 8, 193, 0, 140, 12, 213, },
112 { 55, 14, 104, 42, 66, 19, 119, 52, },
113 { 3, 168, 1, 145, 6, 187, 3, 162, },
114 { 86, 31, 70, 21, 99, 39, 82, 28, },
115 { 0, 134, 11, 206, 0, 129, 9, 200, },
116 { 62, 17, 114, 48, 58, 16, 109, 45, },
117 { 5, 181, 2, 157, 4, 175, 1, 151, },
118 { 95, 36, 78, 26, 90, 34, 74, 24, },
119 { 0, 124, 8, 193, 0, 140, 12, 213, },
120 };
121 #else
122 // tries to correct a gamma of 2.5
123 DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = {
124 { 0, 107, 3, 187, 0, 125, 6, 212, },
125 { 39, 7, 86, 28, 49, 11, 102, 36, },
126 { 1, 158, 0, 131, 3, 180, 1, 151, },
127 { 68, 19, 52, 12, 81, 25, 64, 17, },
128 { 0, 119, 5, 203, 0, 113, 4, 195, },
129 { 45, 9, 96, 33, 42, 8, 91, 30, },
130 { 2, 172, 1, 144, 2, 165, 0, 137, },
131 { 77, 23, 60, 15, 72, 21, 56, 14, },
132 { 0, 107, 3, 187, 0, 125, 6, 212, },
133 };
134 #endif
135
136 #define output_pixel(pos, val, bias, signedness) \
137 if (big_endian) { \
138 AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
139 } else { \
140 AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
141 }
142
143 static av_always_inline void
yuv2plane1_16_c_template(const int32_t * src,uint16_t * dest,int dstW,int big_endian,int output_bits)144 yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
145 int big_endian, int output_bits)
146 {
147 int i;
148 int shift = 3;
149 av_assert0(output_bits == 16);
150
151 for (i = 0; i < dstW; i++) {
152 int val = src[i] + (1 << (shift - 1));
153 output_pixel(&dest[i], val, 0, uint);
154 }
155 }
156
157 static av_always_inline void
yuv2planeX_16_c_template(const int16_t * filter,int filterSize,const int32_t ** src,uint16_t * dest,int dstW,int big_endian,int output_bits)158 yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
159 const int32_t **src, uint16_t *dest, int dstW,
160 int big_endian, int output_bits)
161 {
162 int i;
163 int shift = 15;
164 av_assert0(output_bits == 16);
165
166 for (i = 0; i < dstW; i++) {
167 int val = 1 << (shift - 1);
168 int j;
169
170 /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
171 * filters (or anything with negative coeffs, the range can be slightly
172 * wider in both directions. To account for this overflow, we subtract
173 * a constant so it always fits in the signed range (assuming a
174 * reasonable filterSize), and re-add that at the end. */
175 val -= 0x40000000;
176 for (j = 0; j < filterSize; j++)
177 val += src[j][i] * (unsigned)filter[j];
178
179 output_pixel(&dest[i], val, 0x8000, int);
180 }
181 }
182
yuv2p016cX_c(SwsContext * c,const int16_t * chrFilter,int chrFilterSize,const int16_t ** chrUSrc,const int16_t ** chrVSrc,uint8_t * dest8,int chrDstW)183 static void yuv2p016cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
184 const int16_t **chrUSrc, const int16_t **chrVSrc,
185 uint8_t *dest8, int chrDstW)
186 {
187 uint16_t *dest = (uint16_t*)dest8;
188 const int32_t **uSrc = (const int32_t **)chrUSrc;
189 const int32_t **vSrc = (const int32_t **)chrVSrc;
190 int shift = 15;
191 int big_endian = c->dstFormat == AV_PIX_FMT_P016BE;
192 int i, j;
193
194 for (i = 0; i < chrDstW; i++) {
195 int u = 1 << (shift - 1);
196 int v = 1 << (shift - 1);
197
198 /* See yuv2planeX_16_c_template for details. */
199 u -= 0x40000000;
200 v -= 0x40000000;
201 for (j = 0; j < chrFilterSize; j++) {
202 u += uSrc[j][i] * (unsigned)chrFilter[j];
203 v += vSrc[j][i] * (unsigned)chrFilter[j];
204 }
205
206 output_pixel(&dest[2*i] , u, 0x8000, int);
207 output_pixel(&dest[2*i+1], v, 0x8000, int);
208 }
209 }
210
211 static av_always_inline void
yuv2plane1_float_c_template(const int32_t * src,float * dest,int dstW)212 yuv2plane1_float_c_template(const int32_t *src, float *dest, int dstW)
213 {
214 static const int big_endian = HAVE_BIGENDIAN;
215 static const int shift = 3;
216 static const float float_mult = 1.0f / 65535.0f;
217 int i, val;
218 uint16_t val_uint;
219
220 for (i = 0; i < dstW; ++i){
221 val = src[i] + (1 << (shift - 1));
222 output_pixel(&val_uint, val, 0, uint);
223 dest[i] = float_mult * (float)val_uint;
224 }
225 }
226
227 static av_always_inline void
yuv2plane1_float_bswap_c_template(const int32_t * src,uint32_t * dest,int dstW)228 yuv2plane1_float_bswap_c_template(const int32_t *src, uint32_t *dest, int dstW)
229 {
230 static const int big_endian = HAVE_BIGENDIAN;
231 static const int shift = 3;
232 static const float float_mult = 1.0f / 65535.0f;
233 int i, val;
234 uint16_t val_uint;
235
236 for (i = 0; i < dstW; ++i){
237 val = src[i] + (1 << (shift - 1));
238 output_pixel(&val_uint, val, 0, uint);
239 dest[i] = av_bswap32(av_float2int(float_mult * (float)val_uint));
240 }
241 }
242
243 static av_always_inline void
yuv2planeX_float_c_template(const int16_t * filter,int filterSize,const int32_t ** src,float * dest,int dstW)244 yuv2planeX_float_c_template(const int16_t *filter, int filterSize, const int32_t **src,
245 float *dest, int dstW)
246 {
247 static const int big_endian = HAVE_BIGENDIAN;
248 static const int shift = 15;
249 static const float float_mult = 1.0f / 65535.0f;
250 int i, j, val;
251 uint16_t val_uint;
252
253 for (i = 0; i < dstW; ++i){
254 val = (1 << (shift - 1)) - 0x40000000;
255 for (j = 0; j < filterSize; ++j){
256 val += src[j][i] * (unsigned)filter[j];
257 }
258 output_pixel(&val_uint, val, 0x8000, int);
259 dest[i] = float_mult * (float)val_uint;
260 }
261 }
262
263 static av_always_inline void
yuv2planeX_float_bswap_c_template(const int16_t * filter,int filterSize,const int32_t ** src,uint32_t * dest,int dstW)264 yuv2planeX_float_bswap_c_template(const int16_t *filter, int filterSize, const int32_t **src,
265 uint32_t *dest, int dstW)
266 {
267 static const int big_endian = HAVE_BIGENDIAN;
268 static const int shift = 15;
269 static const float float_mult = 1.0f / 65535.0f;
270 int i, j, val;
271 uint16_t val_uint;
272
273 for (i = 0; i < dstW; ++i){
274 val = (1 << (shift - 1)) - 0x40000000;
275 for (j = 0; j < filterSize; ++j){
276 val += src[j][i] * (unsigned)filter[j];
277 }
278 output_pixel(&val_uint, val, 0x8000, int);
279 dest[i] = av_bswap32(av_float2int(float_mult * (float)val_uint));
280 }
281 }
282
283 #define yuv2plane1_float(template, dest_type, BE_LE) \
284 static void yuv2plane1_float ## BE_LE ## _c(const int16_t *src, uint8_t *dest, int dstW, \
285 const uint8_t *dither, int offset) \
286 { \
287 template((const int32_t *)src, (dest_type *)dest, dstW); \
288 }
289
290 #define yuv2planeX_float(template, dest_type, BE_LE) \
291 static void yuv2planeX_float ## BE_LE ## _c(const int16_t *filter, int filterSize, \
292 const int16_t **src, uint8_t *dest, int dstW, \
293 const uint8_t *dither, int offset) \
294 { \
295 template(filter, filterSize, (const int32_t **)src, (dest_type *)dest, dstW); \
296 }
297
298 #if HAVE_BIGENDIAN
yuv2plane1_float(yuv2plane1_float_c_template,float,BE)299 yuv2plane1_float(yuv2plane1_float_c_template, float, BE)
300 yuv2plane1_float(yuv2plane1_float_bswap_c_template, uint32_t, LE)
301 yuv2planeX_float(yuv2planeX_float_c_template, float, BE)
302 yuv2planeX_float(yuv2planeX_float_bswap_c_template, uint32_t, LE)
303 #else
304 yuv2plane1_float(yuv2plane1_float_c_template, float, LE)
305 yuv2plane1_float(yuv2plane1_float_bswap_c_template, uint32_t, BE)
306 yuv2planeX_float(yuv2planeX_float_c_template, float, LE)
307 yuv2planeX_float(yuv2planeX_float_bswap_c_template, uint32_t, BE)
308 #endif
309
310 #undef output_pixel
311
312 #define output_pixel(pos, val) \
313 if (big_endian) { \
314 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
315 } else { \
316 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
317 }
318
319 static av_always_inline void
320 yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW,
321 int big_endian, int output_bits)
322 {
323 int i;
324 int shift = 15 - output_bits;
325
326 for (i = 0; i < dstW; i++) {
327 int val = src[i] + (1 << (shift - 1));
328 output_pixel(&dest[i], val);
329 }
330 }
331
332 static av_always_inline void
yuv2planeX_10_c_template(const int16_t * filter,int filterSize,const int16_t ** src,uint16_t * dest,int dstW,int big_endian,int output_bits)333 yuv2planeX_10_c_template(const int16_t *filter, int filterSize,
334 const int16_t **src, uint16_t *dest, int dstW,
335 int big_endian, int output_bits)
336 {
337 int i;
338 int shift = 11 + 16 - output_bits;
339
340 for (i = 0; i < dstW; i++) {
341 int val = 1 << (shift - 1);
342 int j;
343
344 for (j = 0; j < filterSize; j++)
345 val += src[j][i] * filter[j];
346
347 output_pixel(&dest[i], val);
348 }
349 }
350
351 #undef output_pixel
352
353 #define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
354 static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \
355 uint8_t *dest, int dstW, \
356 const uint8_t *dither, int offset)\
357 { \
358 yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \
359 (uint16_t *) dest, dstW, is_be, bits); \
360 }\
361 static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
362 const int16_t **src, uint8_t *dest, int dstW, \
363 const uint8_t *dither, int offset)\
364 { \
365 yuv2planeX_## template_size ## _c_template(filter, \
366 filterSize, (const typeX_t **) src, \
367 (uint16_t *) dest, dstW, is_be, bits); \
368 }
369 yuv2NBPS( 9, BE, 1, 10, int16_t)
370 yuv2NBPS( 9, LE, 0, 10, int16_t)
371 yuv2NBPS(10, BE, 1, 10, int16_t)
372 yuv2NBPS(10, LE, 0, 10, int16_t)
373 yuv2NBPS(12, BE, 1, 10, int16_t)
374 yuv2NBPS(12, LE, 0, 10, int16_t)
375 yuv2NBPS(14, BE, 1, 10, int16_t)
376 yuv2NBPS(14, LE, 0, 10, int16_t)
377 yuv2NBPS(16, BE, 1, 16, int32_t)
378 yuv2NBPS(16, LE, 0, 16, int32_t)
379
yuv2planeX_8_c(const int16_t * filter,int filterSize,const int16_t ** src,uint8_t * dest,int dstW,const uint8_t * dither,int offset)380 static void yuv2planeX_8_c(const int16_t *filter, int filterSize,
381 const int16_t **src, uint8_t *dest, int dstW,
382 const uint8_t *dither, int offset)
383 {
384 int i;
385 for (i=0; i<dstW; i++) {
386 int val = dither[(i + offset) & 7] << 12;
387 int j;
388 for (j=0; j<filterSize; j++)
389 val += src[j][i] * filter[j];
390
391 dest[i]= av_clip_uint8(val>>19);
392 }
393 }
394
yuv2plane1_8_c(const int16_t * src,uint8_t * dest,int dstW,const uint8_t * dither,int offset)395 static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW,
396 const uint8_t *dither, int offset)
397 {
398 int i;
399 for (i=0; i<dstW; i++) {
400 int val = (src[i] + dither[(i + offset) & 7]) >> 7;
401 dest[i]= av_clip_uint8(val);
402 }
403 }
404
yuv2nv12cX_c(SwsContext * c,const int16_t * chrFilter,int chrFilterSize,const int16_t ** chrUSrc,const int16_t ** chrVSrc,uint8_t * dest,int chrDstW)405 static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
406 const int16_t **chrUSrc, const int16_t **chrVSrc,
407 uint8_t *dest, int chrDstW)
408 {
409 enum AVPixelFormat dstFormat = c->dstFormat;
410 const uint8_t *chrDither = c->chrDither8;
411 int i;
412
413 if (dstFormat == AV_PIX_FMT_NV12)
414 for (i=0; i<chrDstW; i++) {
415 int u = chrDither[i & 7] << 12;
416 int v = chrDither[(i + 3) & 7] << 12;
417 int j;
418 for (j=0; j<chrFilterSize; j++) {
419 u += chrUSrc[j][i] * chrFilter[j];
420 v += chrVSrc[j][i] * chrFilter[j];
421 }
422
423 dest[2*i]= av_clip_uint8(u>>19);
424 dest[2*i+1]= av_clip_uint8(v>>19);
425 }
426 else
427 for (i=0; i<chrDstW; i++) {
428 int u = chrDither[i & 7] << 12;
429 int v = chrDither[(i + 3) & 7] << 12;
430 int j;
431 for (j=0; j<chrFilterSize; j++) {
432 u += chrUSrc[j][i] * chrFilter[j];
433 v += chrVSrc[j][i] * chrFilter[j];
434 }
435
436 dest[2*i]= av_clip_uint8(v>>19);
437 dest[2*i+1]= av_clip_uint8(u>>19);
438 }
439 }
440
441
442 #define output_pixel(pos, val) \
443 if (big_endian) { \
444 AV_WB16(pos, av_clip_uintp2(val >> shift, 10) << 6); \
445 } else { \
446 AV_WL16(pos, av_clip_uintp2(val >> shift, 10) << 6); \
447 }
448
yuv2p010l1_c(const int16_t * src,uint16_t * dest,int dstW,int big_endian)449 static void yuv2p010l1_c(const int16_t *src,
450 uint16_t *dest, int dstW,
451 int big_endian)
452 {
453 int i;
454 int shift = 5;
455
456 for (i = 0; i < dstW; i++) {
457 int val = src[i] + (1 << (shift - 1));
458 output_pixel(&dest[i], val);
459 }
460 }
461
yuv2p010lX_c(const int16_t * filter,int filterSize,const int16_t ** src,uint16_t * dest,int dstW,int big_endian)462 static void yuv2p010lX_c(const int16_t *filter, int filterSize,
463 const int16_t **src, uint16_t *dest, int dstW,
464 int big_endian)
465 {
466 int i, j;
467 int shift = 17;
468
469 for (i = 0; i < dstW; i++) {
470 int val = 1 << (shift - 1);
471
472 for (j = 0; j < filterSize; j++)
473 val += src[j][i] * filter[j];
474
475 output_pixel(&dest[i], val);
476 }
477 }
478
yuv2p010cX_c(SwsContext * c,const int16_t * chrFilter,int chrFilterSize,const int16_t ** chrUSrc,const int16_t ** chrVSrc,uint8_t * dest8,int chrDstW)479 static void yuv2p010cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
480 const int16_t **chrUSrc, const int16_t **chrVSrc,
481 uint8_t *dest8, int chrDstW)
482 {
483 uint16_t *dest = (uint16_t*)dest8;
484 int shift = 17;
485 int big_endian = c->dstFormat == AV_PIX_FMT_P010BE;
486 int i, j;
487
488 for (i = 0; i < chrDstW; i++) {
489 int u = 1 << (shift - 1);
490 int v = 1 << (shift - 1);
491
492 for (j = 0; j < chrFilterSize; j++) {
493 u += chrUSrc[j][i] * chrFilter[j];
494 v += chrVSrc[j][i] * chrFilter[j];
495 }
496
497 output_pixel(&dest[2*i] , u);
498 output_pixel(&dest[2*i+1], v);
499 }
500 }
501
yuv2p010l1_LE_c(const int16_t * src,uint8_t * dest,int dstW,const uint8_t * dither,int offset)502 static void yuv2p010l1_LE_c(const int16_t *src,
503 uint8_t *dest, int dstW,
504 const uint8_t *dither, int offset)
505 {
506 yuv2p010l1_c(src, (uint16_t*)dest, dstW, 0);
507 }
508
yuv2p010l1_BE_c(const int16_t * src,uint8_t * dest,int dstW,const uint8_t * dither,int offset)509 static void yuv2p010l1_BE_c(const int16_t *src,
510 uint8_t *dest, int dstW,
511 const uint8_t *dither, int offset)
512 {
513 yuv2p010l1_c(src, (uint16_t*)dest, dstW, 1);
514 }
515
yuv2p010lX_LE_c(const int16_t * filter,int filterSize,const int16_t ** src,uint8_t * dest,int dstW,const uint8_t * dither,int offset)516 static void yuv2p010lX_LE_c(const int16_t *filter, int filterSize,
517 const int16_t **src, uint8_t *dest, int dstW,
518 const uint8_t *dither, int offset)
519 {
520 yuv2p010lX_c(filter, filterSize, src, (uint16_t*)dest, dstW, 0);
521 }
522
yuv2p010lX_BE_c(const int16_t * filter,int filterSize,const int16_t ** src,uint8_t * dest,int dstW,const uint8_t * dither,int offset)523 static void yuv2p010lX_BE_c(const int16_t *filter, int filterSize,
524 const int16_t **src, uint8_t *dest, int dstW,
525 const uint8_t *dither, int offset)
526 {
527 yuv2p010lX_c(filter, filterSize, src, (uint16_t*)dest, dstW, 1);
528 }
529
530 #undef output_pixel
531
532
533 #define accumulate_bit(acc, val) \
534 acc <<= 1; \
535 acc |= (val) >= 234
536 #define output_pixel(pos, acc) \
537 if (target == AV_PIX_FMT_MONOBLACK) { \
538 pos = acc; \
539 } else { \
540 pos = ~acc; \
541 }
542
543 static av_always_inline void
yuv2mono_X_c_template(SwsContext * c,const int16_t * lumFilter,const int16_t ** lumSrc,int lumFilterSize,const int16_t * chrFilter,const int16_t ** chrUSrc,const int16_t ** chrVSrc,int chrFilterSize,const int16_t ** alpSrc,uint8_t * dest,int dstW,int y,enum AVPixelFormat target)544 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
545 const int16_t **lumSrc, int lumFilterSize,
546 const int16_t *chrFilter, const int16_t **chrUSrc,
547 const int16_t **chrVSrc, int chrFilterSize,
548 const int16_t **alpSrc, uint8_t *dest, int dstW,
549 int y, enum AVPixelFormat target)
550 {
551 const uint8_t * const d128 = ff_dither_8x8_220[y&7];
552 int i;
553 unsigned acc = 0;
554 int err = 0;
555
556 for (i = 0; i < dstW; i += 2) {
557 int j;
558 int Y1 = 1 << 18;
559 int Y2 = 1 << 18;
560
561 for (j = 0; j < lumFilterSize; j++) {
562 Y1 += lumSrc[j][i] * lumFilter[j];
563 Y2 += lumSrc[j][i+1] * lumFilter[j];
564 }
565 Y1 >>= 19;
566 Y2 >>= 19;
567 if ((Y1 | Y2) & 0x100) {
568 Y1 = av_clip_uint8(Y1);
569 Y2 = av_clip_uint8(Y2);
570 }
571 if (c->dither == SWS_DITHER_ED) {
572 Y1 += (7*err + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2] + 8 - 256)>>4;
573 c->dither_error[0][i] = err;
574 acc = 2*acc + (Y1 >= 128);
575 Y1 -= 220*(acc&1);
576
577 err = Y2 + ((7*Y1 + 1*c->dither_error[0][i+1] + 5*c->dither_error[0][i+2] + 3*c->dither_error[0][i+3] + 8 - 256)>>4);
578 c->dither_error[0][i+1] = Y1;
579 acc = 2*acc + (err >= 128);
580 err -= 220*(acc&1);
581 } else {
582 accumulate_bit(acc, Y1 + d128[(i + 0) & 7]);
583 accumulate_bit(acc, Y2 + d128[(i + 1) & 7]);
584 }
585 if ((i & 7) == 6) {
586 output_pixel(*dest++, acc);
587 }
588 }
589 c->dither_error[0][i] = err;
590
591 if (i & 6) {
592 output_pixel(*dest, acc);
593 }
594 }
595
596 static av_always_inline void
yuv2mono_2_c_template(SwsContext * c,const int16_t * buf[2],const int16_t * ubuf[2],const int16_t * vbuf[2],const int16_t * abuf[2],uint8_t * dest,int dstW,int yalpha,int uvalpha,int y,enum AVPixelFormat target)597 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
598 const int16_t *ubuf[2], const int16_t *vbuf[2],
599 const int16_t *abuf[2], uint8_t *dest, int dstW,
600 int yalpha, int uvalpha, int y,
601 enum AVPixelFormat target)
602 {
603 const int16_t *buf0 = buf[0], *buf1 = buf[1];
604 const uint8_t * const d128 = ff_dither_8x8_220[y & 7];
605 int yalpha1 = 4096 - yalpha;
606 int i;
607 av_assert2(yalpha <= 4096U);
608
609 if (c->dither == SWS_DITHER_ED) {
610 int err = 0;
611 int acc = 0;
612 for (i = 0; i < dstW; i +=2) {
613 int Y;
614
615 Y = (buf0[i + 0] * yalpha1 + buf1[i + 0] * yalpha) >> 19;
616 Y += (7*err + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2] + 8 - 256)>>4;
617 c->dither_error[0][i] = err;
618 acc = 2*acc + (Y >= 128);
619 Y -= 220*(acc&1);
620
621 err = (buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19;
622 err += (7*Y + 1*c->dither_error[0][i+1] + 5*c->dither_error[0][i+2] + 3*c->dither_error[0][i+3] + 8 - 256)>>4;
623 c->dither_error[0][i+1] = Y;
624 acc = 2*acc + (err >= 128);
625 err -= 220*(acc&1);
626
627 if ((i & 7) == 6)
628 output_pixel(*dest++, acc);
629 }
630 c->dither_error[0][i] = err;
631 } else {
632 for (i = 0; i < dstW; i += 8) {
633 int Y, acc = 0;
634
635 Y = (buf0[i + 0] * yalpha1 + buf1[i + 0] * yalpha) >> 19;
636 accumulate_bit(acc, Y + d128[0]);
637 Y = (buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19;
638 accumulate_bit(acc, Y + d128[1]);
639 Y = (buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19;
640 accumulate_bit(acc, Y + d128[2]);
641 Y = (buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19;
642 accumulate_bit(acc, Y + d128[3]);
643 Y = (buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19;
644 accumulate_bit(acc, Y + d128[4]);
645 Y = (buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19;
646 accumulate_bit(acc, Y + d128[5]);
647 Y = (buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19;
648 accumulate_bit(acc, Y + d128[6]);
649 Y = (buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19;
650 accumulate_bit(acc, Y + d128[7]);
651
652 output_pixel(*dest++, acc);
653 }
654 }
655 }
656
657 static av_always_inline void
yuv2mono_1_c_template(SwsContext * c,const int16_t * buf0,const int16_t * ubuf[2],const int16_t * vbuf[2],const int16_t * abuf0,uint8_t * dest,int dstW,int uvalpha,int y,enum AVPixelFormat target)658 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
659 const int16_t *ubuf[2], const int16_t *vbuf[2],
660 const int16_t *abuf0, uint8_t *dest, int dstW,
661 int uvalpha, int y, enum AVPixelFormat target)
662 {
663 const uint8_t * const d128 = ff_dither_8x8_220[y & 7];
664 int i;
665
666 if (c->dither == SWS_DITHER_ED) {
667 int err = 0;
668 int acc = 0;
669 for (i = 0; i < dstW; i +=2) {
670 int Y;
671
672 Y = ((buf0[i + 0] + 64) >> 7);
673 Y += (7*err + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2] + 8 - 256)>>4;
674 c->dither_error[0][i] = err;
675 acc = 2*acc + (Y >= 128);
676 Y -= 220*(acc&1);
677
678 err = ((buf0[i + 1] + 64) >> 7);
679 err += (7*Y + 1*c->dither_error[0][i+1] + 5*c->dither_error[0][i+2] + 3*c->dither_error[0][i+3] + 8 - 256)>>4;
680 c->dither_error[0][i+1] = Y;
681 acc = 2*acc + (err >= 128);
682 err -= 220*(acc&1);
683
684 if ((i & 7) == 6)
685 output_pixel(*dest++, acc);
686 }
687 c->dither_error[0][i] = err;
688 } else {
689 for (i = 0; i < dstW; i += 8) {
690 int acc = 0;
691 accumulate_bit(acc, ((buf0[i + 0] + 64) >> 7) + d128[0]);
692 accumulate_bit(acc, ((buf0[i + 1] + 64) >> 7) + d128[1]);
693 accumulate_bit(acc, ((buf0[i + 2] + 64) >> 7) + d128[2]);
694 accumulate_bit(acc, ((buf0[i + 3] + 64) >> 7) + d128[3]);
695 accumulate_bit(acc, ((buf0[i + 4] + 64) >> 7) + d128[4]);
696 accumulate_bit(acc, ((buf0[i + 5] + 64) >> 7) + d128[5]);
697 accumulate_bit(acc, ((buf0[i + 6] + 64) >> 7) + d128[6]);
698 accumulate_bit(acc, ((buf0[i + 7] + 64) >> 7) + d128[7]);
699
700 output_pixel(*dest++, acc);
701 }
702 }
703 }
704
705 #undef output_pixel
706 #undef accumulate_bit
707
708 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
709 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
710 const int16_t **lumSrc, int lumFilterSize, \
711 const int16_t *chrFilter, const int16_t **chrUSrc, \
712 const int16_t **chrVSrc, int chrFilterSize, \
713 const int16_t **alpSrc, uint8_t *dest, int dstW, \
714 int y) \
715 { \
716 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
717 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
718 alpSrc, dest, dstW, y, fmt); \
719 } \
720 \
721 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
722 const int16_t *ubuf[2], const int16_t *vbuf[2], \
723 const int16_t *abuf[2], uint8_t *dest, int dstW, \
724 int yalpha, int uvalpha, int y) \
725 { \
726 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
727 dest, dstW, yalpha, uvalpha, y, fmt); \
728 } \
729 \
730 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
731 const int16_t *ubuf[2], const int16_t *vbuf[2], \
732 const int16_t *abuf0, uint8_t *dest, int dstW, \
733 int uvalpha, int y) \
734 { \
735 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
736 abuf0, dest, dstW, uvalpha, \
737 y, fmt); \
738 }
739
740 YUV2PACKEDWRAPPER(yuv2mono,, white, AV_PIX_FMT_MONOWHITE)
741 YUV2PACKEDWRAPPER(yuv2mono,, black, AV_PIX_FMT_MONOBLACK)
742
743 #define output_pixels(pos, Y1, U, Y2, V) \
744 if (target == AV_PIX_FMT_YUYV422) { \
745 dest[pos + 0] = Y1; \
746 dest[pos + 1] = U; \
747 dest[pos + 2] = Y2; \
748 dest[pos + 3] = V; \
749 } else if (target == AV_PIX_FMT_YVYU422) { \
750 dest[pos + 0] = Y1; \
751 dest[pos + 1] = V; \
752 dest[pos + 2] = Y2; \
753 dest[pos + 3] = U; \
754 } else { /* AV_PIX_FMT_UYVY422 */ \
755 dest[pos + 0] = U; \
756 dest[pos + 1] = Y1; \
757 dest[pos + 2] = V; \
758 dest[pos + 3] = Y2; \
759 }
760
761 static av_always_inline void
yuv2422_X_c_template(SwsContext * c,const int16_t * lumFilter,const int16_t ** lumSrc,int lumFilterSize,const int16_t * chrFilter,const int16_t ** chrUSrc,const int16_t ** chrVSrc,int chrFilterSize,const int16_t ** alpSrc,uint8_t * dest,int dstW,int y,enum AVPixelFormat target)762 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
763 const int16_t **lumSrc, int lumFilterSize,
764 const int16_t *chrFilter, const int16_t **chrUSrc,
765 const int16_t **chrVSrc, int chrFilterSize,
766 const int16_t **alpSrc, uint8_t *dest, int dstW,
767 int y, enum AVPixelFormat target)
768 {
769 int i;
770
771 for (i = 0; i < ((dstW + 1) >> 1); i++) {
772 int j;
773 int Y1 = 1 << 18;
774 int Y2 = 1 << 18;
775 int U = 1 << 18;
776 int V = 1 << 18;
777
778 for (j = 0; j < lumFilterSize; j++) {
779 Y1 += lumSrc[j][i * 2] * lumFilter[j];
780 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
781 }
782 for (j = 0; j < chrFilterSize; j++) {
783 U += chrUSrc[j][i] * chrFilter[j];
784 V += chrVSrc[j][i] * chrFilter[j];
785 }
786 Y1 >>= 19;
787 Y2 >>= 19;
788 U >>= 19;
789 V >>= 19;
790 if ((Y1 | Y2 | U | V) & 0x100) {
791 Y1 = av_clip_uint8(Y1);
792 Y2 = av_clip_uint8(Y2);
793 U = av_clip_uint8(U);
794 V = av_clip_uint8(V);
795 }
796 output_pixels(4*i, Y1, U, Y2, V);
797 }
798 }
799
800 static av_always_inline void
yuv2422_2_c_template(SwsContext * c,const int16_t * buf[2],const int16_t * ubuf[2],const int16_t * vbuf[2],const int16_t * abuf[2],uint8_t * dest,int dstW,int yalpha,int uvalpha,int y,enum AVPixelFormat target)801 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
802 const int16_t *ubuf[2], const int16_t *vbuf[2],
803 const int16_t *abuf[2], uint8_t *dest, int dstW,
804 int yalpha, int uvalpha, int y,
805 enum AVPixelFormat target)
806 {
807 const int16_t *buf0 = buf[0], *buf1 = buf[1],
808 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
809 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
810 int yalpha1 = 4096 - yalpha;
811 int uvalpha1 = 4096 - uvalpha;
812 int i;
813 av_assert2(yalpha <= 4096U);
814 av_assert2(uvalpha <= 4096U);
815
816 for (i = 0; i < ((dstW + 1) >> 1); i++) {
817 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
818 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
819 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
820 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
821
822 if ((Y1 | Y2 | U | V) & 0x100) {
823 Y1 = av_clip_uint8(Y1);
824 Y2 = av_clip_uint8(Y2);
825 U = av_clip_uint8(U);
826 V = av_clip_uint8(V);
827 }
828
829 output_pixels(i * 4, Y1, U, Y2, V);
830 }
831 }
832
833 static av_always_inline void
yuv2422_1_c_template(SwsContext * c,const int16_t * buf0,const int16_t * ubuf[2],const int16_t * vbuf[2],const int16_t * abuf0,uint8_t * dest,int dstW,int uvalpha,int y,enum AVPixelFormat target)834 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
835 const int16_t *ubuf[2], const int16_t *vbuf[2],
836 const int16_t *abuf0, uint8_t *dest, int dstW,
837 int uvalpha, int y, enum AVPixelFormat target)
838 {
839 const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
840 int i;
841
842 if (uvalpha < 2048) {
843 for (i = 0; i < ((dstW + 1) >> 1); i++) {
844 int Y1 = (buf0[i * 2 ]+64) >> 7;
845 int Y2 = (buf0[i * 2 + 1]+64) >> 7;
846 int U = (ubuf0[i] +64) >> 7;
847 int V = (vbuf0[i] +64) >> 7;
848
849 if ((Y1 | Y2 | U | V) & 0x100) {
850 Y1 = av_clip_uint8(Y1);
851 Y2 = av_clip_uint8(Y2);
852 U = av_clip_uint8(U);
853 V = av_clip_uint8(V);
854 }
855
856 Y1 = av_clip_uint8(Y1);
857 Y2 = av_clip_uint8(Y2);
858 U = av_clip_uint8(U);
859 V = av_clip_uint8(V);
860
861 output_pixels(i * 4, Y1, U, Y2, V);
862 }
863 } else {
864 const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
865 for (i = 0; i < ((dstW + 1) >> 1); i++) {
866 int Y1 = (buf0[i * 2 ] + 64) >> 7;
867 int Y2 = (buf0[i * 2 + 1] + 64) >> 7;
868 int U = (ubuf0[i] + ubuf1[i]+128) >> 8;
869 int V = (vbuf0[i] + vbuf1[i]+128) >> 8;
870
871 if ((Y1 | Y2 | U | V) & 0x100) {
872 Y1 = av_clip_uint8(Y1);
873 Y2 = av_clip_uint8(Y2);
874 U = av_clip_uint8(U);
875 V = av_clip_uint8(V);
876 }
877
878 Y1 = av_clip_uint8(Y1);
879 Y2 = av_clip_uint8(Y2);
880 U = av_clip_uint8(U);
881 V = av_clip_uint8(V);
882
883 output_pixels(i * 4, Y1, U, Y2, V);
884 }
885 }
886 }
887
888 #undef output_pixels
889
890 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, AV_PIX_FMT_YUYV422)
891 YUV2PACKEDWRAPPER(yuv2, 422, yvyu422, AV_PIX_FMT_YVYU422)
892 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, AV_PIX_FMT_UYVY422)
893
894 #define R_B ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE || target == AV_PIX_FMT_RGBA64LE || target == AV_PIX_FMT_RGBA64BE) ? R : B)
895 #define B_R ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE || target == AV_PIX_FMT_RGBA64LE || target == AV_PIX_FMT_RGBA64BE) ? B : R)
896 #define output_pixel(pos, val) \
897 if (isBE(target)) { \
898 AV_WB16(pos, val); \
899 } else { \
900 AV_WL16(pos, val); \
901 }
902
903 static av_always_inline void
yuv2ya16_X_c_template(SwsContext * c,const int16_t * lumFilter,const int32_t ** lumSrc,int lumFilterSize,const int16_t * chrFilter,const int32_t ** unused_chrUSrc,const int32_t ** unused_chrVSrc,int unused_chrFilterSize,const int32_t ** alpSrc,uint16_t * dest,int dstW,int y,enum AVPixelFormat target,int unused_hasAlpha,int unused_eightbytes)904 yuv2ya16_X_c_template(SwsContext *c, const int16_t *lumFilter,
905 const int32_t **lumSrc, int lumFilterSize,
906 const int16_t *chrFilter, const int32_t **unused_chrUSrc,
907 const int32_t **unused_chrVSrc, int unused_chrFilterSize,
908 const int32_t **alpSrc, uint16_t *dest, int dstW,
909 int y, enum AVPixelFormat target, int unused_hasAlpha, int unused_eightbytes)
910 {
911 int hasAlpha = !!alpSrc;
912 int i;
913
914 for (i = 0; i < dstW; i++) {
915 int j;
916 int Y = 1 << 18;
917 int64_t A = 0xffff<<14;
918
919 for (j = 0; j < lumFilterSize; j++)
920 Y += lumSrc[j][i] * lumFilter[j];
921
922 Y >>= 15;
923 Y = av_clip_uint16(Y);
924
925 if (hasAlpha) {
926 for (j = 0; j < lumFilterSize; j++)
927 A += alpSrc[j][i] * lumFilter[j];
928
929 A >>= 15;
930 A = av_clip_uint16(A);
931 }
932
933 output_pixel(&dest[2 * i ], Y);
934 output_pixel(&dest[2 * i + 1], hasAlpha ? A : 65535);
935 }
936 }
937
938 static av_always_inline void
yuv2ya16_2_c_template(SwsContext * c,const int32_t * buf[2],const int32_t * unused_ubuf[2],const int32_t * unused_vbuf[2],const int32_t * abuf[2],uint16_t * dest,int dstW,int yalpha,int unused_uvalpha,int y,enum AVPixelFormat target,int unused_hasAlpha,int unused_eightbytes)939 yuv2ya16_2_c_template(SwsContext *c, const int32_t *buf[2],
940 const int32_t *unused_ubuf[2], const int32_t *unused_vbuf[2],
941 const int32_t *abuf[2], uint16_t *dest, int dstW,
942 int yalpha, int unused_uvalpha, int y,
943 enum AVPixelFormat target, int unused_hasAlpha, int unused_eightbytes)
944 {
945 int hasAlpha = abuf && abuf[0] && abuf[1];
946 const int32_t *buf0 = buf[0], *buf1 = buf[1],
947 *abuf0 = hasAlpha ? abuf[0] : NULL,
948 *abuf1 = hasAlpha ? abuf[1] : NULL;
949 int yalpha1 = 4096 - yalpha;
950 int i;
951
952 av_assert2(yalpha <= 4096U);
953
954 for (i = 0; i < dstW; i++) {
955 int Y = (buf0[i] * yalpha1 + buf1[i] * yalpha) >> 15;
956 int A;
957
958 Y = av_clip_uint16(Y);
959
960 if (hasAlpha) {
961 A = (abuf0[i] * yalpha1 + abuf1[i] * yalpha) >> 15;
962 A = av_clip_uint16(A);
963 }
964
965 output_pixel(&dest[2 * i ], Y);
966 output_pixel(&dest[2 * i + 1], hasAlpha ? A : 65535);
967 }
968 }
969
970 static av_always_inline void
yuv2ya16_1_c_template(SwsContext * c,const int32_t * buf0,const int32_t * unused_ubuf[2],const int32_t * unused_vbuf[2],const int32_t * abuf0,uint16_t * dest,int dstW,int unused_uvalpha,int y,enum AVPixelFormat target,int unused_hasAlpha,int unused_eightbytes)971 yuv2ya16_1_c_template(SwsContext *c, const int32_t *buf0,
972 const int32_t *unused_ubuf[2], const int32_t *unused_vbuf[2],
973 const int32_t *abuf0, uint16_t *dest, int dstW,
974 int unused_uvalpha, int y, enum AVPixelFormat target, int unused_hasAlpha, int unused_eightbytes)
975 {
976 int hasAlpha = !!abuf0;
977 int i;
978
979 for (i = 0; i < dstW; i++) {
980 int Y = buf0[i] >> 3;/* 19 - 16 */
981 int A;
982
983 Y = av_clip_uint16(Y);
984
985 if (hasAlpha) {
986 A = abuf0[i] >> 3;
987 if (A & 0x100)
988 A = av_clip_uint16(A);
989 }
990
991 output_pixel(&dest[2 * i ], Y);
992 output_pixel(&dest[2 * i + 1], hasAlpha ? A : 65535);
993 }
994 }
995
996 static av_always_inline void
yuv2rgba64_X_c_template(SwsContext * c,const int16_t * lumFilter,const int32_t ** lumSrc,int lumFilterSize,const int16_t * chrFilter,const int32_t ** chrUSrc,const int32_t ** chrVSrc,int chrFilterSize,const int32_t ** alpSrc,uint16_t * dest,int dstW,int y,enum AVPixelFormat target,int hasAlpha,int eightbytes)997 yuv2rgba64_X_c_template(SwsContext *c, const int16_t *lumFilter,
998 const int32_t **lumSrc, int lumFilterSize,
999 const int16_t *chrFilter, const int32_t **chrUSrc,
1000 const int32_t **chrVSrc, int chrFilterSize,
1001 const int32_t **alpSrc, uint16_t *dest, int dstW,
1002 int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
1003 {
1004 int i;
1005 int A1 = 0xffff<<14, A2 = 0xffff<<14;
1006
1007 for (i = 0; i < ((dstW + 1) >> 1); i++) {
1008 int j;
1009 int Y1 = -0x40000000;
1010 int Y2 = -0x40000000;
1011 int U = -(128 << 23); // 19
1012 int V = -(128 << 23);
1013 int R, G, B;
1014
1015 for (j = 0; j < lumFilterSize; j++) {
1016 Y1 += lumSrc[j][i * 2] * (unsigned)lumFilter[j];
1017 Y2 += lumSrc[j][i * 2 + 1] * (unsigned)lumFilter[j];
1018 }
1019 for (j = 0; j < chrFilterSize; j++) {;
1020 U += chrUSrc[j][i] * (unsigned)chrFilter[j];
1021 V += chrVSrc[j][i] * (unsigned)chrFilter[j];
1022 }
1023
1024 if (hasAlpha) {
1025 A1 = -0x40000000;
1026 A2 = -0x40000000;
1027 for (j = 0; j < lumFilterSize; j++) {
1028 A1 += alpSrc[j][i * 2] * (unsigned)lumFilter[j];
1029 A2 += alpSrc[j][i * 2 + 1] * (unsigned)lumFilter[j];
1030 }
1031 A1 >>= 1;
1032 A1 += 0x20002000;
1033 A2 >>= 1;
1034 A2 += 0x20002000;
1035 }
1036
1037 // 8 bits: 12+15=27; 16 bits: 12+19=31
1038 Y1 >>= 14; // 10
1039 Y1 += 0x10000;
1040 Y2 >>= 14;
1041 Y2 += 0x10000;
1042 U >>= 14;
1043 V >>= 14;
1044
1045 // 8 bits: 27 -> 17 bits, 16 bits: 31 - 14 = 17 bits
1046 Y1 -= c->yuv2rgb_y_offset;
1047 Y2 -= c->yuv2rgb_y_offset;
1048 Y1 *= c->yuv2rgb_y_coeff;
1049 Y2 *= c->yuv2rgb_y_coeff;
1050 Y1 += 1 << 13; // 21
1051 Y2 += 1 << 13;
1052 // 8 bits: 17 + 13 bits = 30 bits, 16 bits: 17 + 13 bits = 30 bits
1053
1054 R = V * c->yuv2rgb_v2r_coeff;
1055 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1056 B = U * c->yuv2rgb_u2b_coeff;
1057
1058 // 8 bits: 30 - 22 = 8 bits, 16 bits: 30 bits - 14 = 16 bits
1059 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
1060 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
1061 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
1062 if (eightbytes) {
1063 output_pixel(&dest[3], av_clip_uintp2(A1 , 30) >> 14);
1064 output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14);
1065 output_pixel(&dest[5], av_clip_uintp2( G + Y2, 30) >> 14);
1066 output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14);
1067 output_pixel(&dest[7], av_clip_uintp2(A2 , 30) >> 14);
1068 dest += 8;
1069 } else {
1070 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
1071 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
1072 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
1073 dest += 6;
1074 }
1075 }
1076 }
1077
1078 static av_always_inline void
yuv2rgba64_2_c_template(SwsContext * c,const int32_t * buf[2],const int32_t * ubuf[2],const int32_t * vbuf[2],const int32_t * abuf[2],uint16_t * dest,int dstW,int yalpha,int uvalpha,int y,enum AVPixelFormat target,int hasAlpha,int eightbytes)1079 yuv2rgba64_2_c_template(SwsContext *c, const int32_t *buf[2],
1080 const int32_t *ubuf[2], const int32_t *vbuf[2],
1081 const int32_t *abuf[2], uint16_t *dest, int dstW,
1082 int yalpha, int uvalpha, int y,
1083 enum AVPixelFormat target, int hasAlpha, int eightbytes)
1084 {
1085 const int32_t *buf0 = buf[0], *buf1 = buf[1],
1086 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1087 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1088 *abuf0 = hasAlpha ? abuf[0] : NULL,
1089 *abuf1 = hasAlpha ? abuf[1] : NULL;
1090 int yalpha1 = 4096 - yalpha;
1091 int uvalpha1 = 4096 - uvalpha;
1092 int i;
1093 int A1 = 0xffff<<14, A2 = 0xffff<<14;
1094
1095 av_assert2(yalpha <= 4096U);
1096 av_assert2(uvalpha <= 4096U);
1097
1098 for (i = 0; i < ((dstW + 1) >> 1); i++) {
1099 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
1100 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
1101 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha - (128 << 23)) >> 14;
1102 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha - (128 << 23)) >> 14;
1103 int R, G, B;
1104
1105 Y1 -= c->yuv2rgb_y_offset;
1106 Y2 -= c->yuv2rgb_y_offset;
1107 Y1 *= c->yuv2rgb_y_coeff;
1108 Y2 *= c->yuv2rgb_y_coeff;
1109 Y1 += 1 << 13;
1110 Y2 += 1 << 13;
1111
1112 R = V * c->yuv2rgb_v2r_coeff;
1113 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1114 B = U * c->yuv2rgb_u2b_coeff;
1115
1116 if (hasAlpha) {
1117 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 1;
1118 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 1;
1119
1120 A1 += 1 << 13;
1121 A2 += 1 << 13;
1122 }
1123
1124 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
1125 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
1126 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
1127 if (eightbytes) {
1128 output_pixel(&dest[3], av_clip_uintp2(A1 , 30) >> 14);
1129 output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14);
1130 output_pixel(&dest[5], av_clip_uintp2( G + Y2, 30) >> 14);
1131 output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14);
1132 output_pixel(&dest[7], av_clip_uintp2(A2 , 30) >> 14);
1133 dest += 8;
1134 } else {
1135 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
1136 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
1137 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
1138 dest += 6;
1139 }
1140 }
1141 }
1142
1143 static av_always_inline void
yuv2rgba64_1_c_template(SwsContext * c,const int32_t * buf0,const int32_t * ubuf[2],const int32_t * vbuf[2],const int32_t * abuf0,uint16_t * dest,int dstW,int uvalpha,int y,enum AVPixelFormat target,int hasAlpha,int eightbytes)1144 yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0,
1145 const int32_t *ubuf[2], const int32_t *vbuf[2],
1146 const int32_t *abuf0, uint16_t *dest, int dstW,
1147 int uvalpha, int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
1148 {
1149 const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
1150 int i;
1151 int A1 = 0xffff<<14, A2= 0xffff<<14;
1152
1153 if (uvalpha < 2048) {
1154 for (i = 0; i < ((dstW + 1) >> 1); i++) {
1155 int Y1 = (buf0[i * 2] ) >> 2;
1156 int Y2 = (buf0[i * 2 + 1]) >> 2;
1157 int U = (ubuf0[i] - (128 << 11)) >> 2;
1158 int V = (vbuf0[i] - (128 << 11)) >> 2;
1159 int R, G, B;
1160
1161 Y1 -= c->yuv2rgb_y_offset;
1162 Y2 -= c->yuv2rgb_y_offset;
1163 Y1 *= c->yuv2rgb_y_coeff;
1164 Y2 *= c->yuv2rgb_y_coeff;
1165 Y1 += 1 << 13;
1166 Y2 += 1 << 13;
1167
1168 if (hasAlpha) {
1169 A1 = abuf0[i * 2 ] << 11;
1170 A2 = abuf0[i * 2 + 1] << 11;
1171
1172 A1 += 1 << 13;
1173 A2 += 1 << 13;
1174 }
1175
1176 R = V * c->yuv2rgb_v2r_coeff;
1177 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1178 B = U * c->yuv2rgb_u2b_coeff;
1179
1180 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
1181 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
1182 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
1183 if (eightbytes) {
1184 output_pixel(&dest[3], av_clip_uintp2(A1 , 30) >> 14);
1185 output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14);
1186 output_pixel(&dest[5], av_clip_uintp2( G + Y2, 30) >> 14);
1187 output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14);
1188 output_pixel(&dest[7], av_clip_uintp2(A2 , 30) >> 14);
1189 dest += 8;
1190 } else {
1191 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
1192 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
1193 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
1194 dest += 6;
1195 }
1196 }
1197 } else {
1198 const int32_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
1199 int A1 = 0xffff<<14, A2 = 0xffff<<14;
1200 for (i = 0; i < ((dstW + 1) >> 1); i++) {
1201 int Y1 = (buf0[i * 2] ) >> 2;
1202 int Y2 = (buf0[i * 2 + 1]) >> 2;
1203 int U = (ubuf0[i] + ubuf1[i] - (128 << 12)) >> 3;
1204 int V = (vbuf0[i] + vbuf1[i] - (128 << 12)) >> 3;
1205 int R, G, B;
1206
1207 Y1 -= c->yuv2rgb_y_offset;
1208 Y2 -= c->yuv2rgb_y_offset;
1209 Y1 *= c->yuv2rgb_y_coeff;
1210 Y2 *= c->yuv2rgb_y_coeff;
1211 Y1 += 1 << 13;
1212 Y2 += 1 << 13;
1213
1214 if (hasAlpha) {
1215 A1 = abuf0[i * 2 ] << 11;
1216 A2 = abuf0[i * 2 + 1] << 11;
1217
1218 A1 += 1 << 13;
1219 A2 += 1 << 13;
1220 }
1221
1222 R = V * c->yuv2rgb_v2r_coeff;
1223 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1224 B = U * c->yuv2rgb_u2b_coeff;
1225
1226 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
1227 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
1228 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
1229 if (eightbytes) {
1230 output_pixel(&dest[3], av_clip_uintp2(A1 , 30) >> 14);
1231 output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14);
1232 output_pixel(&dest[5], av_clip_uintp2( G + Y2, 30) >> 14);
1233 output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14);
1234 output_pixel(&dest[7], av_clip_uintp2(A2 , 30) >> 14);
1235 dest += 8;
1236 } else {
1237 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
1238 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
1239 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
1240 dest += 6;
1241 }
1242 }
1243 }
1244 }
1245
1246 static av_always_inline void
yuv2rgba64_full_X_c_template(SwsContext * c,const int16_t * lumFilter,const int32_t ** lumSrc,int lumFilterSize,const int16_t * chrFilter,const int32_t ** chrUSrc,const int32_t ** chrVSrc,int chrFilterSize,const int32_t ** alpSrc,uint16_t * dest,int dstW,int y,enum AVPixelFormat target,int hasAlpha,int eightbytes)1247 yuv2rgba64_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1248 const int32_t **lumSrc, int lumFilterSize,
1249 const int16_t *chrFilter, const int32_t **chrUSrc,
1250 const int32_t **chrVSrc, int chrFilterSize,
1251 const int32_t **alpSrc, uint16_t *dest, int dstW,
1252 int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
1253 {
1254 int i;
1255 int A = 0xffff<<14;
1256
1257 for (i = 0; i < dstW; i++) {
1258 int j;
1259 int Y = -0x40000000;
1260 int U = -(128 << 23); // 19
1261 int V = -(128 << 23);
1262 int R, G, B;
1263
1264 for (j = 0; j < lumFilterSize; j++) {
1265 Y += lumSrc[j][i] * (unsigned)lumFilter[j];
1266 }
1267 for (j = 0; j < chrFilterSize; j++) {;
1268 U += chrUSrc[j][i] * (unsigned)chrFilter[j];
1269 V += chrVSrc[j][i] * (unsigned)chrFilter[j];
1270 }
1271
1272 if (hasAlpha) {
1273 A = -0x40000000;
1274 for (j = 0; j < lumFilterSize; j++) {
1275 A += alpSrc[j][i] * (unsigned)lumFilter[j];
1276 }
1277 A >>= 1;
1278 A += 0x20002000;
1279 }
1280
1281 // 8bit: 12+15=27; 16-bit: 12+19=31
1282 Y >>= 14; // 10
1283 Y += 0x10000;
1284 U >>= 14;
1285 V >>= 14;
1286
1287 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
1288 Y -= c->yuv2rgb_y_offset;
1289 Y *= c->yuv2rgb_y_coeff;
1290 Y += 1 << 13; // 21
1291 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
1292
1293 R = V * c->yuv2rgb_v2r_coeff;
1294 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1295 B = U * c->yuv2rgb_u2b_coeff;
1296
1297 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
1298 output_pixel(&dest[0], av_clip_uintp2(R_B + Y, 30) >> 14);
1299 output_pixel(&dest[1], av_clip_uintp2( G + Y, 30) >> 14);
1300 output_pixel(&dest[2], av_clip_uintp2(B_R + Y, 30) >> 14);
1301 if (eightbytes) {
1302 output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
1303 dest += 4;
1304 } else {
1305 dest += 3;
1306 }
1307 }
1308 }
1309
1310 static av_always_inline void
yuv2rgba64_full_2_c_template(SwsContext * c,const int32_t * buf[2],const int32_t * ubuf[2],const int32_t * vbuf[2],const int32_t * abuf[2],uint16_t * dest,int dstW,int yalpha,int uvalpha,int y,enum AVPixelFormat target,int hasAlpha,int eightbytes)1311 yuv2rgba64_full_2_c_template(SwsContext *c, const int32_t *buf[2],
1312 const int32_t *ubuf[2], const int32_t *vbuf[2],
1313 const int32_t *abuf[2], uint16_t *dest, int dstW,
1314 int yalpha, int uvalpha, int y,
1315 enum AVPixelFormat target, int hasAlpha, int eightbytes)
1316 {
1317 const int32_t *buf0 = buf[0], *buf1 = buf[1],
1318 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1319 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1320 *abuf0 = hasAlpha ? abuf[0] : NULL,
1321 *abuf1 = hasAlpha ? abuf[1] : NULL;
1322 int yalpha1 = 4096 - yalpha;
1323 int uvalpha1 = 4096 - uvalpha;
1324 int i;
1325 int A = 0xffff<<14;
1326
1327 av_assert2(yalpha <= 4096U);
1328 av_assert2(uvalpha <= 4096U);
1329
1330 for (i = 0; i < dstW; i++) {
1331 int Y = (buf0[i] * yalpha1 + buf1[i] * yalpha) >> 14;
1332 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha - (128 << 23)) >> 14;
1333 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha - (128 << 23)) >> 14;
1334 int R, G, B;
1335
1336 Y -= c->yuv2rgb_y_offset;
1337 Y *= c->yuv2rgb_y_coeff;
1338 Y += 1 << 13;
1339
1340 R = V * c->yuv2rgb_v2r_coeff;
1341 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1342 B = U * c->yuv2rgb_u2b_coeff;
1343
1344 if (hasAlpha) {
1345 A = (abuf0[i] * yalpha1 + abuf1[i] * yalpha) >> 1;
1346
1347 A += 1 << 13;
1348 }
1349
1350 output_pixel(&dest[0], av_clip_uintp2(R_B + Y, 30) >> 14);
1351 output_pixel(&dest[1], av_clip_uintp2( G + Y, 30) >> 14);
1352 output_pixel(&dest[2], av_clip_uintp2(B_R + Y, 30) >> 14);
1353 if (eightbytes) {
1354 output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
1355 dest += 4;
1356 } else {
1357 dest += 3;
1358 }
1359 }
1360 }
1361
1362 static av_always_inline void
yuv2rgba64_full_1_c_template(SwsContext * c,const int32_t * buf0,const int32_t * ubuf[2],const int32_t * vbuf[2],const int32_t * abuf0,uint16_t * dest,int dstW,int uvalpha,int y,enum AVPixelFormat target,int hasAlpha,int eightbytes)1363 yuv2rgba64_full_1_c_template(SwsContext *c, const int32_t *buf0,
1364 const int32_t *ubuf[2], const int32_t *vbuf[2],
1365 const int32_t *abuf0, uint16_t *dest, int dstW,
1366 int uvalpha, int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
1367 {
1368 const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
1369 int i;
1370 int A = 0xffff<<14;
1371
1372 if (uvalpha < 2048) {
1373 for (i = 0; i < dstW; i++) {
1374 int Y = (buf0[i]) >> 2;
1375 int U = (ubuf0[i] - (128 << 11)) >> 2;
1376 int V = (vbuf0[i] - (128 << 11)) >> 2;
1377 int R, G, B;
1378
1379 Y -= c->yuv2rgb_y_offset;
1380 Y *= c->yuv2rgb_y_coeff;
1381 Y += 1 << 13;
1382
1383 if (hasAlpha) {
1384 A = abuf0[i] << 11;
1385
1386 A += 1 << 13;
1387 }
1388
1389 R = V * c->yuv2rgb_v2r_coeff;
1390 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1391 B = U * c->yuv2rgb_u2b_coeff;
1392
1393 output_pixel(&dest[0], av_clip_uintp2(R_B + Y, 30) >> 14);
1394 output_pixel(&dest[1], av_clip_uintp2( G + Y, 30) >> 14);
1395 output_pixel(&dest[2], av_clip_uintp2(B_R + Y, 30) >> 14);
1396 if (eightbytes) {
1397 output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
1398 dest += 4;
1399 } else {
1400 dest += 3;
1401 }
1402 }
1403 } else {
1404 const int32_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
1405 int A = 0xffff<<14;
1406 for (i = 0; i < dstW; i++) {
1407 int Y = (buf0[i] ) >> 2;
1408 int U = (ubuf0[i] + ubuf1[i] - (128 << 12)) >> 3;
1409 int V = (vbuf0[i] + vbuf1[i] - (128 << 12)) >> 3;
1410 int R, G, B;
1411
1412 Y -= c->yuv2rgb_y_offset;
1413 Y *= c->yuv2rgb_y_coeff;
1414 Y += 1 << 13;
1415
1416 if (hasAlpha) {
1417 A = abuf0[i] << 11;
1418
1419 A += 1 << 13;
1420 }
1421
1422 R = V * c->yuv2rgb_v2r_coeff;
1423 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1424 B = U * c->yuv2rgb_u2b_coeff;
1425
1426 output_pixel(&dest[0], av_clip_uintp2(R_B + Y, 30) >> 14);
1427 output_pixel(&dest[1], av_clip_uintp2( G + Y, 30) >> 14);
1428 output_pixel(&dest[2], av_clip_uintp2(B_R + Y, 30) >> 14);
1429 if (eightbytes) {
1430 output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
1431 dest += 4;
1432 } else {
1433 dest += 3;
1434 }
1435 }
1436 }
1437 }
1438
1439 #undef output_pixel
1440 #undef r_b
1441 #undef b_r
1442
1443 #define YUV2PACKED16WRAPPER(name, base, ext, fmt, hasAlpha, eightbytes) \
1444 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1445 const int16_t **_lumSrc, int lumFilterSize, \
1446 const int16_t *chrFilter, const int16_t **_chrUSrc, \
1447 const int16_t **_chrVSrc, int chrFilterSize, \
1448 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
1449 int y) \
1450 { \
1451 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
1452 **chrUSrc = (const int32_t **) _chrUSrc, \
1453 **chrVSrc = (const int32_t **) _chrVSrc, \
1454 **alpSrc = (const int32_t **) _alpSrc; \
1455 uint16_t *dest = (uint16_t *) _dest; \
1456 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1457 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1458 alpSrc, dest, dstW, y, fmt, hasAlpha, eightbytes); \
1459 } \
1460 \
1461 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
1462 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
1463 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
1464 int yalpha, int uvalpha, int y) \
1465 { \
1466 const int32_t **buf = (const int32_t **) _buf, \
1467 **ubuf = (const int32_t **) _ubuf, \
1468 **vbuf = (const int32_t **) _vbuf, \
1469 **abuf = (const int32_t **) _abuf; \
1470 uint16_t *dest = (uint16_t *) _dest; \
1471 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1472 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha, eightbytes); \
1473 } \
1474 \
1475 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
1476 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
1477 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
1478 int uvalpha, int y) \
1479 { \
1480 const int32_t *buf0 = (const int32_t *) _buf0, \
1481 **ubuf = (const int32_t **) _ubuf, \
1482 **vbuf = (const int32_t **) _vbuf, \
1483 *abuf0 = (const int32_t *) _abuf0; \
1484 uint16_t *dest = (uint16_t *) _dest; \
1485 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1486 dstW, uvalpha, y, fmt, hasAlpha, eightbytes); \
1487 }
1488
1489 YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48be, AV_PIX_FMT_RGB48BE, 0, 0)
1490 YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48le, AV_PIX_FMT_RGB48LE, 0, 0)
1491 YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48be, AV_PIX_FMT_BGR48BE, 0, 0)
1492 YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48le, AV_PIX_FMT_BGR48LE, 0, 0)
1493 YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64be, AV_PIX_FMT_RGBA64BE, 1, 1)
1494 YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64le, AV_PIX_FMT_RGBA64LE, 1, 1)
1495 YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64be, AV_PIX_FMT_RGBA64BE, 0, 1)
1496 YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64le, AV_PIX_FMT_RGBA64LE, 0, 1)
1497 YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64be, AV_PIX_FMT_BGRA64BE, 1, 1)
1498 YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64le, AV_PIX_FMT_BGRA64LE, 1, 1)
1499 YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64be, AV_PIX_FMT_BGRA64BE, 0, 1)
1500 YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64le, AV_PIX_FMT_BGRA64LE, 0, 1)
1501 YUV2PACKED16WRAPPER(yuv2, ya16, ya16be, AV_PIX_FMT_YA16BE, 1, 0)
1502 YUV2PACKED16WRAPPER(yuv2, ya16, ya16le, AV_PIX_FMT_YA16LE, 1, 0)
1503
1504 YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48be_full, AV_PIX_FMT_RGB48BE, 0, 0)
1505 YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48le_full, AV_PIX_FMT_RGB48LE, 0, 0)
1506 YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48be_full, AV_PIX_FMT_BGR48BE, 0, 0)
1507 YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48le_full, AV_PIX_FMT_BGR48LE, 0, 0)
1508 YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64be_full, AV_PIX_FMT_RGBA64BE, 1, 1)
1509 YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64le_full, AV_PIX_FMT_RGBA64LE, 1, 1)
1510 YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64be_full, AV_PIX_FMT_RGBA64BE, 0, 1)
1511 YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64le_full, AV_PIX_FMT_RGBA64LE, 0, 1)
1512 YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64be_full, AV_PIX_FMT_BGRA64BE, 1, 1)
1513 YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64le_full, AV_PIX_FMT_BGRA64LE, 1, 1)
1514 YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64be_full, AV_PIX_FMT_BGRA64BE, 0, 1)
1515 YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64le_full, AV_PIX_FMT_BGRA64LE, 0, 1)
1516
1517 /*
1518 * Write out 2 RGB pixels in the target pixel format. This function takes a
1519 * R/G/B LUT as generated by ff_yuv2rgb_c_init_tables(), which takes care of
1520 * things like endianness conversion and shifting. The caller takes care of
1521 * setting the correct offset in these tables from the chroma (U/V) values.
1522 * This function then uses the luminance (Y1/Y2) values to write out the
1523 * correct RGB values into the destination buffer.
1524 */
1525 static av_always_inline void
yuv2rgb_write(uint8_t * _dest,int i,int Y1,int Y2,unsigned A1,unsigned A2,const void * _r,const void * _g,const void * _b,int y,enum AVPixelFormat target,int hasAlpha)1526 yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
1527 unsigned A1, unsigned A2,
1528 const void *_r, const void *_g, const void *_b, int y,
1529 enum AVPixelFormat target, int hasAlpha)
1530 {
1531 if (target == AV_PIX_FMT_ARGB || target == AV_PIX_FMT_RGBA ||
1532 target == AV_PIX_FMT_ABGR || target == AV_PIX_FMT_BGRA) {
1533 uint32_t *dest = (uint32_t *) _dest;
1534 const uint32_t *r = (const uint32_t *) _r;
1535 const uint32_t *g = (const uint32_t *) _g;
1536 const uint32_t *b = (const uint32_t *) _b;
1537
1538 #if CONFIG_SMALL
1539 int sh = hasAlpha ? ((target == AV_PIX_FMT_RGB32_1 || target == AV_PIX_FMT_BGR32_1) ? 0 : 24) : 0;
1540
1541 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
1542 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
1543 #else
1544 if (hasAlpha) {
1545 int sh = (target == AV_PIX_FMT_RGB32_1 || target == AV_PIX_FMT_BGR32_1) ? 0 : 24;
1546
1547 av_assert2((((r[Y1] + g[Y1] + b[Y1]) >> sh) & 0xFF) == 0);
1548 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
1549 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
1550 } else {
1551 #if defined(ASSERT_LEVEL) && ASSERT_LEVEL > 1
1552 int sh = (target == AV_PIX_FMT_RGB32_1 || target == AV_PIX_FMT_BGR32_1) ? 0 : 24;
1553
1554 av_assert2((((r[Y1] + g[Y1] + b[Y1]) >> sh) & 0xFF) == 0xFF);
1555 #endif
1556 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
1557 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
1558 }
1559 #endif
1560 } else if (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) {
1561 uint8_t *dest = (uint8_t *) _dest;
1562 const uint8_t *r = (const uint8_t *) _r;
1563 const uint8_t *g = (const uint8_t *) _g;
1564 const uint8_t *b = (const uint8_t *) _b;
1565
1566 #define r_b ((target == AV_PIX_FMT_RGB24) ? r : b)
1567 #define b_r ((target == AV_PIX_FMT_RGB24) ? b : r)
1568
1569 dest[i * 6 + 0] = r_b[Y1];
1570 dest[i * 6 + 1] = g[Y1];
1571 dest[i * 6 + 2] = b_r[Y1];
1572 dest[i * 6 + 3] = r_b[Y2];
1573 dest[i * 6 + 4] = g[Y2];
1574 dest[i * 6 + 5] = b_r[Y2];
1575 #undef r_b
1576 #undef b_r
1577 } else if (target == AV_PIX_FMT_RGB565 || target == AV_PIX_FMT_BGR565 ||
1578 target == AV_PIX_FMT_RGB555 || target == AV_PIX_FMT_BGR555 ||
1579 target == AV_PIX_FMT_RGB444 || target == AV_PIX_FMT_BGR444) {
1580 uint16_t *dest = (uint16_t *) _dest;
1581 const uint16_t *r = (const uint16_t *) _r;
1582 const uint16_t *g = (const uint16_t *) _g;
1583 const uint16_t *b = (const uint16_t *) _b;
1584 int dr1, dg1, db1, dr2, dg2, db2;
1585
1586 if (target == AV_PIX_FMT_RGB565 || target == AV_PIX_FMT_BGR565) {
1587 dr1 = ff_dither_2x2_8[ y & 1 ][0];
1588 dg1 = ff_dither_2x2_4[ y & 1 ][0];
1589 db1 = ff_dither_2x2_8[(y & 1) ^ 1][0];
1590 dr2 = ff_dither_2x2_8[ y & 1 ][1];
1591 dg2 = ff_dither_2x2_4[ y & 1 ][1];
1592 db2 = ff_dither_2x2_8[(y & 1) ^ 1][1];
1593 } else if (target == AV_PIX_FMT_RGB555 || target == AV_PIX_FMT_BGR555) {
1594 dr1 = ff_dither_2x2_8[ y & 1 ][0];
1595 dg1 = ff_dither_2x2_8[ y & 1 ][1];
1596 db1 = ff_dither_2x2_8[(y & 1) ^ 1][0];
1597 dr2 = ff_dither_2x2_8[ y & 1 ][1];
1598 dg2 = ff_dither_2x2_8[ y & 1 ][0];
1599 db2 = ff_dither_2x2_8[(y & 1) ^ 1][1];
1600 } else {
1601 dr1 = ff_dither_4x4_16[ y & 3 ][0];
1602 dg1 = ff_dither_4x4_16[ y & 3 ][1];
1603 db1 = ff_dither_4x4_16[(y & 3) ^ 3][0];
1604 dr2 = ff_dither_4x4_16[ y & 3 ][1];
1605 dg2 = ff_dither_4x4_16[ y & 3 ][0];
1606 db2 = ff_dither_4x4_16[(y & 3) ^ 3][1];
1607 }
1608
1609 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1610 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1611 } else /* 8/4 bits */ {
1612 uint8_t *dest = (uint8_t *) _dest;
1613 const uint8_t *r = (const uint8_t *) _r;
1614 const uint8_t *g = (const uint8_t *) _g;
1615 const uint8_t *b = (const uint8_t *) _b;
1616 int dr1, dg1, db1, dr2, dg2, db2;
1617
1618 if (target == AV_PIX_FMT_RGB8 || target == AV_PIX_FMT_BGR8) {
1619 const uint8_t * const d64 = ff_dither_8x8_73[y & 7];
1620 const uint8_t * const d32 = ff_dither_8x8_32[y & 7];
1621 dr1 = dg1 = d32[(i * 2 + 0) & 7];
1622 db1 = d64[(i * 2 + 0) & 7];
1623 dr2 = dg2 = d32[(i * 2 + 1) & 7];
1624 db2 = d64[(i * 2 + 1) & 7];
1625 } else {
1626 const uint8_t * const d64 = ff_dither_8x8_73 [y & 7];
1627 const uint8_t * const d128 = ff_dither_8x8_220[y & 7];
1628 dr1 = db1 = d128[(i * 2 + 0) & 7];
1629 dg1 = d64[(i * 2 + 0) & 7];
1630 dr2 = db2 = d128[(i * 2 + 1) & 7];
1631 dg2 = d64[(i * 2 + 1) & 7];
1632 }
1633
1634 if (target == AV_PIX_FMT_RGB4 || target == AV_PIX_FMT_BGR4) {
1635 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
1636 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1637 } else {
1638 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1639 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1640 }
1641 }
1642 }
1643
1644 static av_always_inline void
yuv2rgb_X_c_template(SwsContext * c,const int16_t * lumFilter,const int16_t ** lumSrc,int lumFilterSize,const int16_t * chrFilter,const int16_t ** chrUSrc,const int16_t ** chrVSrc,int chrFilterSize,const int16_t ** alpSrc,uint8_t * dest,int dstW,int y,enum AVPixelFormat target,int hasAlpha)1645 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1646 const int16_t **lumSrc, int lumFilterSize,
1647 const int16_t *chrFilter, const int16_t **chrUSrc,
1648 const int16_t **chrVSrc, int chrFilterSize,
1649 const int16_t **alpSrc, uint8_t *dest, int dstW,
1650 int y, enum AVPixelFormat target, int hasAlpha)
1651 {
1652 int i;
1653
1654 for (i = 0; i < ((dstW + 1) >> 1); i++) {
1655 int j, A1, A2;
1656 int Y1 = 1 << 18;
1657 int Y2 = 1 << 18;
1658 int U = 1 << 18;
1659 int V = 1 << 18;
1660 const void *r, *g, *b;
1661
1662 for (j = 0; j < lumFilterSize; j++) {
1663 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1664 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1665 }
1666 for (j = 0; j < chrFilterSize; j++) {
1667 U += chrUSrc[j][i] * chrFilter[j];
1668 V += chrVSrc[j][i] * chrFilter[j];
1669 }
1670 Y1 >>= 19;
1671 Y2 >>= 19;
1672 U >>= 19;
1673 V >>= 19;
1674 if (hasAlpha) {
1675 A1 = 1 << 18;
1676 A2 = 1 << 18;
1677 for (j = 0; j < lumFilterSize; j++) {
1678 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1679 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1680 }
1681 A1 >>= 19;
1682 A2 >>= 19;
1683 if ((A1 | A2) & 0x100) {
1684 A1 = av_clip_uint8(A1);
1685 A2 = av_clip_uint8(A2);
1686 }
1687 }
1688
1689 r = c->table_rV[V + YUVRGB_TABLE_HEADROOM];
1690 g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]);
1691 b = c->table_bU[U + YUVRGB_TABLE_HEADROOM];
1692
1693 yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1694 r, g, b, y, target, hasAlpha);
1695 }
1696 }
1697
1698 static av_always_inline void
yuv2rgb_2_c_template(SwsContext * c,const int16_t * buf[2],const int16_t * ubuf[2],const int16_t * vbuf[2],const int16_t * abuf[2],uint8_t * dest,int dstW,int yalpha,int uvalpha,int y,enum AVPixelFormat target,int hasAlpha)1699 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1700 const int16_t *ubuf[2], const int16_t *vbuf[2],
1701 const int16_t *abuf[2], uint8_t *dest, int dstW,
1702 int yalpha, int uvalpha, int y,
1703 enum AVPixelFormat target, int hasAlpha)
1704 {
1705 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1706 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1707 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1708 *abuf0 = hasAlpha ? abuf[0] : NULL,
1709 *abuf1 = hasAlpha ? abuf[1] : NULL;
1710 int yalpha1 = 4096 - yalpha;
1711 int uvalpha1 = 4096 - uvalpha;
1712 int i;
1713 av_assert2(yalpha <= 4096U);
1714 av_assert2(uvalpha <= 4096U);
1715
1716 for (i = 0; i < ((dstW + 1) >> 1); i++) {
1717 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1718 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1719 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1720 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1721 int A1, A2;
1722 const void *r = c->table_rV[V + YUVRGB_TABLE_HEADROOM],
1723 *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
1724 *b = c->table_bU[U + YUVRGB_TABLE_HEADROOM];
1725
1726 if (hasAlpha) {
1727 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1728 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1729 A1 = av_clip_uint8(A1);
1730 A2 = av_clip_uint8(A2);
1731 }
1732
1733 yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1734 r, g, b, y, target, hasAlpha);
1735 }
1736 }
1737
1738 static av_always_inline void
yuv2rgb_1_c_template(SwsContext * c,const int16_t * buf0,const int16_t * ubuf[2],const int16_t * vbuf[2],const int16_t * abuf0,uint8_t * dest,int dstW,int uvalpha,int y,enum AVPixelFormat target,int hasAlpha)1739 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1740 const int16_t *ubuf[2], const int16_t *vbuf[2],
1741 const int16_t *abuf0, uint8_t *dest, int dstW,
1742 int uvalpha, int y, enum AVPixelFormat target,
1743 int hasAlpha)
1744 {
1745 const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
1746 int i;
1747
1748 if (uvalpha < 2048) {
1749 for (i = 0; i < ((dstW + 1) >> 1); i++) {
1750 int Y1 = (buf0[i * 2 ] + 64) >> 7;
1751 int Y2 = (buf0[i * 2 + 1] + 64) >> 7;
1752 int U = (ubuf0[i] + 64) >> 7;
1753 int V = (vbuf0[i] + 64) >> 7;
1754 int A1, A2;
1755 const void *r = c->table_rV[V + YUVRGB_TABLE_HEADROOM],
1756 *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
1757 *b = c->table_bU[U + YUVRGB_TABLE_HEADROOM];
1758
1759 if (hasAlpha) {
1760 A1 = abuf0[i * 2 ] * 255 + 16384 >> 15;
1761 A2 = abuf0[i * 2 + 1] * 255 + 16384 >> 15;
1762 A1 = av_clip_uint8(A1);
1763 A2 = av_clip_uint8(A2);
1764 }
1765
1766 yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1767 r, g, b, y, target, hasAlpha);
1768 }
1769 } else {
1770 const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
1771 for (i = 0; i < ((dstW + 1) >> 1); i++) {
1772 int Y1 = (buf0[i * 2 ] + 64) >> 7;
1773 int Y2 = (buf0[i * 2 + 1] + 64) >> 7;
1774 int U = (ubuf0[i] + ubuf1[i] + 128) >> 8;
1775 int V = (vbuf0[i] + vbuf1[i] + 128) >> 8;
1776 int A1, A2;
1777 const void *r = c->table_rV[V + YUVRGB_TABLE_HEADROOM],
1778 *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
1779 *b = c->table_bU[U + YUVRGB_TABLE_HEADROOM];
1780
1781 if (hasAlpha) {
1782 A1 = (abuf0[i * 2 ] + 64) >> 7;
1783 A2 = (abuf0[i * 2 + 1] + 64) >> 7;
1784 A1 = av_clip_uint8(A1);
1785 A2 = av_clip_uint8(A2);
1786 }
1787
1788 yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1789 r, g, b, y, target, hasAlpha);
1790 }
1791 }
1792 }
1793
1794 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1795 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1796 const int16_t **lumSrc, int lumFilterSize, \
1797 const int16_t *chrFilter, const int16_t **chrUSrc, \
1798 const int16_t **chrVSrc, int chrFilterSize, \
1799 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1800 int y) \
1801 { \
1802 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1803 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1804 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1805 }
1806
1807 #define YUV2RGBWRAPPERX2(name, base, ext, fmt, hasAlpha) \
1808 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1809 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1810 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1811 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1812 int yalpha, int uvalpha, int y) \
1813 { \
1814 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1815 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1816 }
1817
1818 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1819 YUV2RGBWRAPPERX2(name, base, ext, fmt, hasAlpha) \
1820 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1821 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1822 const int16_t *abuf0, uint8_t *dest, int dstW, \
1823 int uvalpha, int y) \
1824 { \
1825 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1826 dstW, uvalpha, y, fmt, hasAlpha); \
1827 }
1828
1829 #if CONFIG_SMALL
1830 YUV2RGBWRAPPER(yuv2rgb,, 32_1, AV_PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->needAlpha)
1831 YUV2RGBWRAPPER(yuv2rgb,, 32, AV_PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->needAlpha)
1832 #else
1833 #if CONFIG_SWSCALE_ALPHA
1834 YUV2RGBWRAPPER(yuv2rgb,, a32_1, AV_PIX_FMT_RGB32_1, 1)
1835 YUV2RGBWRAPPER(yuv2rgb,, a32, AV_PIX_FMT_RGB32, 1)
1836 #endif
1837 YUV2RGBWRAPPER(yuv2rgb,, x32_1, AV_PIX_FMT_RGB32_1, 0)
1838 YUV2RGBWRAPPER(yuv2rgb,, x32, AV_PIX_FMT_RGB32, 0)
1839 #endif
1840 YUV2RGBWRAPPER(yuv2, rgb, rgb24, AV_PIX_FMT_RGB24, 0)
1841 YUV2RGBWRAPPER(yuv2, rgb, bgr24, AV_PIX_FMT_BGR24, 0)
1842 YUV2RGBWRAPPER(yuv2rgb,, 16, AV_PIX_FMT_RGB565, 0)
1843 YUV2RGBWRAPPER(yuv2rgb,, 15, AV_PIX_FMT_RGB555, 0)
1844 YUV2RGBWRAPPER(yuv2rgb,, 12, AV_PIX_FMT_RGB444, 0)
1845 YUV2RGBWRAPPER(yuv2rgb,, 8, AV_PIX_FMT_RGB8, 0)
1846 YUV2RGBWRAPPER(yuv2rgb,, 4, AV_PIX_FMT_RGB4, 0)
1847 YUV2RGBWRAPPER(yuv2rgb,, 4b, AV_PIX_FMT_RGB4_BYTE, 0)
1848
yuv2rgb_write_full(SwsContext * c,uint8_t * dest,int i,int Y,int A,int U,int V,int y,enum AVPixelFormat target,int hasAlpha,int err[4])1849 static av_always_inline void yuv2rgb_write_full(SwsContext *c,
1850 uint8_t *dest, int i, int Y, int A, int U, int V,
1851 int y, enum AVPixelFormat target, int hasAlpha, int err[4])
1852 {
1853 int R, G, B;
1854 int isrgb8 = target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8;
1855
1856 Y -= c->yuv2rgb_y_offset;
1857 Y *= c->yuv2rgb_y_coeff;
1858 Y += 1 << 21;
1859 R = Y + V*c->yuv2rgb_v2r_coeff;
1860 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1861 B = Y + U*c->yuv2rgb_u2b_coeff;
1862 if ((R | G | B) & 0xC0000000) {
1863 R = av_clip_uintp2(R, 30);
1864 G = av_clip_uintp2(G, 30);
1865 B = av_clip_uintp2(B, 30);
1866 }
1867
1868 switch(target) {
1869 case AV_PIX_FMT_ARGB:
1870 dest[0] = hasAlpha ? A : 255;
1871 dest[1] = R >> 22;
1872 dest[2] = G >> 22;
1873 dest[3] = B >> 22;
1874 break;
1875 case AV_PIX_FMT_RGB24:
1876 dest[0] = R >> 22;
1877 dest[1] = G >> 22;
1878 dest[2] = B >> 22;
1879 break;
1880 case AV_PIX_FMT_RGBA:
1881 dest[0] = R >> 22;
1882 dest[1] = G >> 22;
1883 dest[2] = B >> 22;
1884 dest[3] = hasAlpha ? A : 255;
1885 break;
1886 case AV_PIX_FMT_ABGR:
1887 dest[0] = hasAlpha ? A : 255;
1888 dest[1] = B >> 22;
1889 dest[2] = G >> 22;
1890 dest[3] = R >> 22;
1891 break;
1892 case AV_PIX_FMT_BGR24:
1893 dest[0] = B >> 22;
1894 dest[1] = G >> 22;
1895 dest[2] = R >> 22;
1896 break;
1897 case AV_PIX_FMT_BGRA:
1898 dest[0] = B >> 22;
1899 dest[1] = G >> 22;
1900 dest[2] = R >> 22;
1901 dest[3] = hasAlpha ? A : 255;
1902 break;
1903 case AV_PIX_FMT_BGR4_BYTE:
1904 case AV_PIX_FMT_RGB4_BYTE:
1905 case AV_PIX_FMT_BGR8:
1906 case AV_PIX_FMT_RGB8:
1907 {
1908 int r,g,b;
1909
1910 switch (c->dither) {
1911 default:
1912 case SWS_DITHER_AUTO:
1913 case SWS_DITHER_ED:
1914 R >>= 22;
1915 G >>= 22;
1916 B >>= 22;
1917 R += (7*err[0] + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2])>>4;
1918 G += (7*err[1] + 1*c->dither_error[1][i] + 5*c->dither_error[1][i+1] + 3*c->dither_error[1][i+2])>>4;
1919 B += (7*err[2] + 1*c->dither_error[2][i] + 5*c->dither_error[2][i+1] + 3*c->dither_error[2][i+2])>>4;
1920 c->dither_error[0][i] = err[0];
1921 c->dither_error[1][i] = err[1];
1922 c->dither_error[2][i] = err[2];
1923 r = R >> (isrgb8 ? 5 : 7);
1924 g = G >> (isrgb8 ? 5 : 6);
1925 b = B >> (isrgb8 ? 6 : 7);
1926 r = av_clip(r, 0, isrgb8 ? 7 : 1);
1927 g = av_clip(g, 0, isrgb8 ? 7 : 3);
1928 b = av_clip(b, 0, isrgb8 ? 3 : 1);
1929 err[0] = R - r*(isrgb8 ? 36 : 255);
1930 err[1] = G - g*(isrgb8 ? 36 : 85);
1931 err[2] = B - b*(isrgb8 ? 85 : 255);
1932 break;
1933 case SWS_DITHER_A_DITHER:
1934 if (isrgb8) {
1935 /* see http://pippin.gimp.org/a_dither/ for details/origin */
1936 #define A_DITHER(u,v) (((((u)+((v)*236))*119)&0xff))
1937 r = (((R >> 19) + A_DITHER(i,y) -96)>>8);
1938 g = (((G >> 19) + A_DITHER(i + 17,y) - 96)>>8);
1939 b = (((B >> 20) + A_DITHER(i + 17*2,y) -96)>>8);
1940 r = av_clip_uintp2(r, 3);
1941 g = av_clip_uintp2(g, 3);
1942 b = av_clip_uintp2(b, 2);
1943 } else {
1944 r = (((R >> 21) + A_DITHER(i,y)-256)>>8);
1945 g = (((G >> 19) + A_DITHER(i + 17,y)-256)>>8);
1946 b = (((B >> 21) + A_DITHER(i + 17*2,y)-256)>>8);
1947 r = av_clip_uintp2(r, 1);
1948 g = av_clip_uintp2(g, 2);
1949 b = av_clip_uintp2(b, 1);
1950 }
1951 break;
1952 case SWS_DITHER_X_DITHER:
1953 if (isrgb8) {
1954 /* see http://pippin.gimp.org/a_dither/ for details/origin */
1955 #define X_DITHER(u,v) (((((u)^((v)*237))*181)&0x1ff)/2)
1956 r = (((R >> 19) + X_DITHER(i,y) - 96)>>8);
1957 g = (((G >> 19) + X_DITHER(i + 17,y) - 96)>>8);
1958 b = (((B >> 20) + X_DITHER(i + 17*2,y) - 96)>>8);
1959 r = av_clip_uintp2(r, 3);
1960 g = av_clip_uintp2(g, 3);
1961 b = av_clip_uintp2(b, 2);
1962 } else {
1963 r = (((R >> 21) + X_DITHER(i,y)-256)>>8);
1964 g = (((G >> 19) + X_DITHER(i + 17,y)-256)>>8);
1965 b = (((B >> 21) + X_DITHER(i + 17*2,y)-256)>>8);
1966 r = av_clip_uintp2(r, 1);
1967 g = av_clip_uintp2(g, 2);
1968 b = av_clip_uintp2(b, 1);
1969 }
1970
1971 break;
1972 }
1973
1974 if(target == AV_PIX_FMT_BGR4_BYTE) {
1975 dest[0] = r + 2*g + 8*b;
1976 } else if(target == AV_PIX_FMT_RGB4_BYTE) {
1977 dest[0] = b + 2*g + 8*r;
1978 } else if(target == AV_PIX_FMT_BGR8) {
1979 dest[0] = r + 8*g + 64*b;
1980 } else if(target == AV_PIX_FMT_RGB8) {
1981 dest[0] = b + 4*g + 32*r;
1982 } else
1983 av_assert2(0);
1984 break;}
1985 }
1986 }
1987
1988 static av_always_inline void
yuv2rgb_full_X_c_template(SwsContext * c,const int16_t * lumFilter,const int16_t ** lumSrc,int lumFilterSize,const int16_t * chrFilter,const int16_t ** chrUSrc,const int16_t ** chrVSrc,int chrFilterSize,const int16_t ** alpSrc,uint8_t * dest,int dstW,int y,enum AVPixelFormat target,int hasAlpha)1989 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1990 const int16_t **lumSrc, int lumFilterSize,
1991 const int16_t *chrFilter, const int16_t **chrUSrc,
1992 const int16_t **chrVSrc, int chrFilterSize,
1993 const int16_t **alpSrc, uint8_t *dest,
1994 int dstW, int y, enum AVPixelFormat target, int hasAlpha)
1995 {
1996 int i;
1997 int step = (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) ? 3 : 4;
1998 int err[4] = {0};
1999 int A = 0; //init to silence warning
2000
2001 if( target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE
2002 || target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8)
2003 step = 1;
2004
2005 for (i = 0; i < dstW; i++) {
2006 int j;
2007 int Y = 1<<9;
2008 int U = (1<<9)-(128 << 19);
2009 int V = (1<<9)-(128 << 19);
2010
2011 for (j = 0; j < lumFilterSize; j++) {
2012 Y += lumSrc[j][i] * lumFilter[j];
2013 }
2014 for (j = 0; j < chrFilterSize; j++) {
2015 U += chrUSrc[j][i] * chrFilter[j];
2016 V += chrVSrc[j][i] * chrFilter[j];
2017 }
2018 Y >>= 10;
2019 U >>= 10;
2020 V >>= 10;
2021 if (hasAlpha) {
2022 A = 1 << 18;
2023 for (j = 0; j < lumFilterSize; j++) {
2024 A += alpSrc[j][i] * lumFilter[j];
2025 }
2026 A >>= 19;
2027 if (A & 0x100)
2028 A = av_clip_uint8(A);
2029 }
2030 yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err);
2031 dest += step;
2032 }
2033 c->dither_error[0][i] = err[0];
2034 c->dither_error[1][i] = err[1];
2035 c->dither_error[2][i] = err[2];
2036 }
2037
2038 static av_always_inline void
yuv2rgb_full_2_c_template(SwsContext * c,const int16_t * buf[2],const int16_t * ubuf[2],const int16_t * vbuf[2],const int16_t * abuf[2],uint8_t * dest,int dstW,int yalpha,int uvalpha,int y,enum AVPixelFormat target,int hasAlpha)2039 yuv2rgb_full_2_c_template(SwsContext *c, const int16_t *buf[2],
2040 const int16_t *ubuf[2], const int16_t *vbuf[2],
2041 const int16_t *abuf[2], uint8_t *dest, int dstW,
2042 int yalpha, int uvalpha, int y,
2043 enum AVPixelFormat target, int hasAlpha)
2044 {
2045 const int16_t *buf0 = buf[0], *buf1 = buf[1],
2046 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
2047 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
2048 *abuf0 = hasAlpha ? abuf[0] : NULL,
2049 *abuf1 = hasAlpha ? abuf[1] : NULL;
2050 int yalpha1 = 4096 - yalpha;
2051 int uvalpha1 = 4096 - uvalpha;
2052 int i;
2053 int step = (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) ? 3 : 4;
2054 int err[4] = {0};
2055 int A = 0; // init to silcene warning
2056
2057 av_assert2(yalpha <= 4096U);
2058 av_assert2(uvalpha <= 4096U);
2059
2060 if( target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE
2061 || target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8)
2062 step = 1;
2063
2064 for (i = 0; i < dstW; i++) {
2065 int Y = ( buf0[i] * yalpha1 + buf1[i] * yalpha ) >> 10; //FIXME rounding
2066 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha-(128 << 19)) >> 10;
2067 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha-(128 << 19)) >> 10;
2068
2069 if (hasAlpha) {
2070 A = (abuf0[i] * yalpha1 + abuf1[i] * yalpha + (1<<18)) >> 19;
2071 if (A & 0x100)
2072 A = av_clip_uint8(A);
2073 }
2074
2075 yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err);
2076 dest += step;
2077 }
2078 c->dither_error[0][i] = err[0];
2079 c->dither_error[1][i] = err[1];
2080 c->dither_error[2][i] = err[2];
2081 }
2082
2083 static av_always_inline void
yuv2rgb_full_1_c_template(SwsContext * c,const int16_t * buf0,const int16_t * ubuf[2],const int16_t * vbuf[2],const int16_t * abuf0,uint8_t * dest,int dstW,int uvalpha,int y,enum AVPixelFormat target,int hasAlpha)2084 yuv2rgb_full_1_c_template(SwsContext *c, const int16_t *buf0,
2085 const int16_t *ubuf[2], const int16_t *vbuf[2],
2086 const int16_t *abuf0, uint8_t *dest, int dstW,
2087 int uvalpha, int y, enum AVPixelFormat target,
2088 int hasAlpha)
2089 {
2090 const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
2091 int i;
2092 int step = (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) ? 3 : 4;
2093 int err[4] = {0};
2094
2095 if( target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE
2096 || target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8)
2097 step = 1;
2098
2099 if (uvalpha < 2048) {
2100 int A = 0; //init to silence warning
2101 for (i = 0; i < dstW; i++) {
2102 int Y = buf0[i] << 2;
2103 int U = (ubuf0[i] - (128<<7)) * 4;
2104 int V = (vbuf0[i] - (128<<7)) * 4;
2105
2106 if (hasAlpha) {
2107 A = (abuf0[i] + 64) >> 7;
2108 if (A & 0x100)
2109 A = av_clip_uint8(A);
2110 }
2111
2112 yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err);
2113 dest += step;
2114 }
2115 } else {
2116 const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
2117 int A = 0; //init to silence warning
2118 for (i = 0; i < dstW; i++) {
2119 int Y = buf0[i] << 2;
2120 int U = (ubuf0[i] + ubuf1[i] - (128<<8)) << 1;
2121 int V = (vbuf0[i] + vbuf1[i] - (128<<8)) << 1;
2122
2123 if (hasAlpha) {
2124 A = (abuf0[i] + 64) >> 7;
2125 if (A & 0x100)
2126 A = av_clip_uint8(A);
2127 }
2128
2129 yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err);
2130 dest += step;
2131 }
2132 }
2133
2134 c->dither_error[0][i] = err[0];
2135 c->dither_error[1][i] = err[1];
2136 c->dither_error[2][i] = err[2];
2137 }
2138
2139 #if CONFIG_SMALL
2140 YUV2RGBWRAPPER(yuv2, rgb_full, bgra32_full, AV_PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->needAlpha)
2141 YUV2RGBWRAPPER(yuv2, rgb_full, abgr32_full, AV_PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->needAlpha)
2142 YUV2RGBWRAPPER(yuv2, rgb_full, rgba32_full, AV_PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->needAlpha)
2143 YUV2RGBWRAPPER(yuv2, rgb_full, argb32_full, AV_PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->needAlpha)
2144 #else
2145 #if CONFIG_SWSCALE_ALPHA
2146 YUV2RGBWRAPPER(yuv2, rgb_full, bgra32_full, AV_PIX_FMT_BGRA, 1)
2147 YUV2RGBWRAPPER(yuv2, rgb_full, abgr32_full, AV_PIX_FMT_ABGR, 1)
2148 YUV2RGBWRAPPER(yuv2, rgb_full, rgba32_full, AV_PIX_FMT_RGBA, 1)
2149 YUV2RGBWRAPPER(yuv2, rgb_full, argb32_full, AV_PIX_FMT_ARGB, 1)
2150 #endif
2151 YUV2RGBWRAPPER(yuv2, rgb_full, bgrx32_full, AV_PIX_FMT_BGRA, 0)
2152 YUV2RGBWRAPPER(yuv2, rgb_full, xbgr32_full, AV_PIX_FMT_ABGR, 0)
2153 YUV2RGBWRAPPER(yuv2, rgb_full, rgbx32_full, AV_PIX_FMT_RGBA, 0)
2154 YUV2RGBWRAPPER(yuv2, rgb_full, xrgb32_full, AV_PIX_FMT_ARGB, 0)
2155 #endif
2156 YUV2RGBWRAPPER(yuv2, rgb_full, bgr24_full, AV_PIX_FMT_BGR24, 0)
2157 YUV2RGBWRAPPER(yuv2, rgb_full, rgb24_full, AV_PIX_FMT_RGB24, 0)
2158
2159 YUV2RGBWRAPPER(yuv2, rgb_full, bgr4_byte_full, AV_PIX_FMT_BGR4_BYTE, 0)
2160 YUV2RGBWRAPPER(yuv2, rgb_full, rgb4_byte_full, AV_PIX_FMT_RGB4_BYTE, 0)
2161 YUV2RGBWRAPPER(yuv2, rgb_full, bgr8_full, AV_PIX_FMT_BGR8, 0)
2162 YUV2RGBWRAPPER(yuv2, rgb_full, rgb8_full, AV_PIX_FMT_RGB8, 0)
2163
2164 static void
yuv2gbrp_full_X_c(SwsContext * c,const int16_t * lumFilter,const int16_t ** lumSrc,int lumFilterSize,const int16_t * chrFilter,const int16_t ** chrUSrc,const int16_t ** chrVSrc,int chrFilterSize,const int16_t ** alpSrc,uint8_t ** dest,int dstW,int y)2165 yuv2gbrp_full_X_c(SwsContext *c, const int16_t *lumFilter,
2166 const int16_t **lumSrc, int lumFilterSize,
2167 const int16_t *chrFilter, const int16_t **chrUSrc,
2168 const int16_t **chrVSrc, int chrFilterSize,
2169 const int16_t **alpSrc, uint8_t **dest,
2170 int dstW, int y)
2171 {
2172 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat);
2173 int i;
2174 int hasAlpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA) && alpSrc;
2175 uint16_t **dest16 = (uint16_t**)dest;
2176 int SH = 22 + 8 - desc->comp[0].depth;
2177 int A = 0; // init to silence warning
2178
2179 for (i = 0; i < dstW; i++) {
2180 int j;
2181 int Y = 1 << 9;
2182 int U = (1 << 9) - (128 << 19);
2183 int V = (1 << 9) - (128 << 19);
2184 int R, G, B;
2185
2186 for (j = 0; j < lumFilterSize; j++)
2187 Y += lumSrc[j][i] * lumFilter[j];
2188
2189 for (j = 0; j < chrFilterSize; j++) {
2190 U += chrUSrc[j][i] * chrFilter[j];
2191 V += chrVSrc[j][i] * chrFilter[j];
2192 }
2193
2194 Y >>= 10;
2195 U >>= 10;
2196 V >>= 10;
2197
2198 if (hasAlpha) {
2199 A = 1 << 18;
2200
2201 for (j = 0; j < lumFilterSize; j++)
2202 A += alpSrc[j][i] * lumFilter[j];
2203
2204 if (A & 0xF8000000)
2205 A = av_clip_uintp2(A, 27);
2206 }
2207
2208 Y -= c->yuv2rgb_y_offset;
2209 Y *= c->yuv2rgb_y_coeff;
2210 Y += 1 << (SH-1);
2211 R = Y + V * c->yuv2rgb_v2r_coeff;
2212 G = Y + V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
2213 B = Y + U * c->yuv2rgb_u2b_coeff;
2214
2215 if ((R | G | B) & 0xC0000000) {
2216 R = av_clip_uintp2(R, 30);
2217 G = av_clip_uintp2(G, 30);
2218 B = av_clip_uintp2(B, 30);
2219 }
2220
2221 if (SH != 22) {
2222 dest16[0][i] = G >> SH;
2223 dest16[1][i] = B >> SH;
2224 dest16[2][i] = R >> SH;
2225 if (hasAlpha)
2226 dest16[3][i] = A >> (SH - 3);
2227 } else {
2228 dest[0][i] = G >> 22;
2229 dest[1][i] = B >> 22;
2230 dest[2][i] = R >> 22;
2231 if (hasAlpha)
2232 dest[3][i] = A >> 19;
2233 }
2234 }
2235 if (SH != 22 && (!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
2236 for (i = 0; i < dstW; i++) {
2237 dest16[0][i] = av_bswap16(dest16[0][i]);
2238 dest16[1][i] = av_bswap16(dest16[1][i]);
2239 dest16[2][i] = av_bswap16(dest16[2][i]);
2240 if (hasAlpha)
2241 dest16[3][i] = av_bswap16(dest16[3][i]);
2242 }
2243 }
2244 }
2245
2246 static void
yuv2gbrp16_full_X_c(SwsContext * c,const int16_t * lumFilter,const int16_t ** lumSrcx,int lumFilterSize,const int16_t * chrFilter,const int16_t ** chrUSrcx,const int16_t ** chrVSrcx,int chrFilterSize,const int16_t ** alpSrcx,uint8_t ** dest,int dstW,int y)2247 yuv2gbrp16_full_X_c(SwsContext *c, const int16_t *lumFilter,
2248 const int16_t **lumSrcx, int lumFilterSize,
2249 const int16_t *chrFilter, const int16_t **chrUSrcx,
2250 const int16_t **chrVSrcx, int chrFilterSize,
2251 const int16_t **alpSrcx, uint8_t **dest,
2252 int dstW, int y)
2253 {
2254 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat);
2255 int i;
2256 int hasAlpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA) && alpSrcx;
2257 uint16_t **dest16 = (uint16_t**)dest;
2258 const int32_t **lumSrc = (const int32_t**)lumSrcx;
2259 const int32_t **chrUSrc = (const int32_t**)chrUSrcx;
2260 const int32_t **chrVSrc = (const int32_t**)chrVSrcx;
2261 const int32_t **alpSrc = (const int32_t**)alpSrcx;
2262
2263 for (i = 0; i < dstW; i++) {
2264 int j;
2265 int Y = -0x40000000;
2266 int U = -(128 << 23);
2267 int V = -(128 << 23);
2268 int R, G, B, A;
2269
2270 for (j = 0; j < lumFilterSize; j++)
2271 Y += lumSrc[j][i] * (unsigned)lumFilter[j];
2272
2273 for (j = 0; j < chrFilterSize; j++) {
2274 U += chrUSrc[j][i] * (unsigned)chrFilter[j];
2275 V += chrVSrc[j][i] * (unsigned)chrFilter[j];
2276 }
2277
2278 Y >>= 14;
2279 Y += 0x10000;
2280 U >>= 14;
2281 V >>= 14;
2282
2283 if (hasAlpha) {
2284 A = -0x40000000;
2285
2286 for (j = 0; j < lumFilterSize; j++)
2287 A += alpSrc[j][i] * lumFilter[j];
2288
2289 A >>= 1;
2290 A += 0x20002000;
2291 }
2292
2293 Y -= c->yuv2rgb_y_offset;
2294 Y *= c->yuv2rgb_y_coeff;
2295 Y += 1 << 13;
2296 R = V * c->yuv2rgb_v2r_coeff;
2297 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
2298 B = U * c->yuv2rgb_u2b_coeff;
2299
2300 R = av_clip_uintp2(Y + R, 30);
2301 G = av_clip_uintp2(Y + G, 30);
2302 B = av_clip_uintp2(Y + B, 30);
2303
2304 dest16[0][i] = G >> 14;
2305 dest16[1][i] = B >> 14;
2306 dest16[2][i] = R >> 14;
2307 if (hasAlpha)
2308 dest16[3][i] = av_clip_uintp2(A, 30) >> 14;
2309 }
2310 if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
2311 for (i = 0; i < dstW; i++) {
2312 dest16[0][i] = av_bswap16(dest16[0][i]);
2313 dest16[1][i] = av_bswap16(dest16[1][i]);
2314 dest16[2][i] = av_bswap16(dest16[2][i]);
2315 if (hasAlpha)
2316 dest16[3][i] = av_bswap16(dest16[3][i]);
2317 }
2318 }
2319 }
2320
2321 static void
yuv2ya8_1_c(SwsContext * c,const int16_t * buf0,const int16_t * ubuf[2],const int16_t * vbuf[2],const int16_t * abuf0,uint8_t * dest,int dstW,int uvalpha,int y)2322 yuv2ya8_1_c(SwsContext *c, const int16_t *buf0,
2323 const int16_t *ubuf[2], const int16_t *vbuf[2],
2324 const int16_t *abuf0, uint8_t *dest, int dstW,
2325 int uvalpha, int y)
2326 {
2327 int hasAlpha = !!abuf0;
2328 int i;
2329
2330 for (i = 0; i < dstW; i++) {
2331 int Y = (buf0[i] + 64) >> 7;
2332 int A;
2333
2334 Y = av_clip_uint8(Y);
2335
2336 if (hasAlpha) {
2337 A = (abuf0[i] + 64) >> 7;
2338 if (A & 0x100)
2339 A = av_clip_uint8(A);
2340 }
2341
2342 dest[i * 2 ] = Y;
2343 dest[i * 2 + 1] = hasAlpha ? A : 255;
2344 }
2345 }
2346
2347 static void
yuv2ya8_2_c(SwsContext * c,const int16_t * buf[2],const int16_t * ubuf[2],const int16_t * vbuf[2],const int16_t * abuf[2],uint8_t * dest,int dstW,int yalpha,int uvalpha,int y)2348 yuv2ya8_2_c(SwsContext *c, const int16_t *buf[2],
2349 const int16_t *ubuf[2], const int16_t *vbuf[2],
2350 const int16_t *abuf[2], uint8_t *dest, int dstW,
2351 int yalpha, int uvalpha, int y)
2352 {
2353 int hasAlpha = abuf && abuf[0] && abuf[1];
2354 const int16_t *buf0 = buf[0], *buf1 = buf[1],
2355 *abuf0 = hasAlpha ? abuf[0] : NULL,
2356 *abuf1 = hasAlpha ? abuf[1] : NULL;
2357 int yalpha1 = 4096 - yalpha;
2358 int i;
2359
2360 av_assert2(yalpha <= 4096U);
2361
2362 for (i = 0; i < dstW; i++) {
2363 int Y = (buf0[i] * yalpha1 + buf1[i] * yalpha) >> 19;
2364 int A;
2365
2366 Y = av_clip_uint8(Y);
2367
2368 if (hasAlpha) {
2369 A = (abuf0[i] * yalpha1 + abuf1[i] * yalpha) >> 19;
2370 A = av_clip_uint8(A);
2371 }
2372
2373 dest[i * 2 ] = Y;
2374 dest[i * 2 + 1] = hasAlpha ? A : 255;
2375 }
2376 }
2377
2378 static void
yuv2ya8_X_c(SwsContext * c,const int16_t * lumFilter,const int16_t ** lumSrc,int lumFilterSize,const int16_t * chrFilter,const int16_t ** chrUSrc,const int16_t ** chrVSrc,int chrFilterSize,const int16_t ** alpSrc,uint8_t * dest,int dstW,int y)2379 yuv2ya8_X_c(SwsContext *c, const int16_t *lumFilter,
2380 const int16_t **lumSrc, int lumFilterSize,
2381 const int16_t *chrFilter, const int16_t **chrUSrc,
2382 const int16_t **chrVSrc, int chrFilterSize,
2383 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
2384 {
2385 int hasAlpha = !!alpSrc;
2386 int i;
2387
2388 for (i = 0; i < dstW; i++) {
2389 int j;
2390 int Y = 1 << 18, A = 1 << 18;
2391
2392 for (j = 0; j < lumFilterSize; j++)
2393 Y += lumSrc[j][i] * lumFilter[j];
2394
2395 Y >>= 19;
2396 if (Y & 0x100)
2397 Y = av_clip_uint8(Y);
2398
2399 if (hasAlpha) {
2400 for (j = 0; j < lumFilterSize; j++)
2401 A += alpSrc[j][i] * lumFilter[j];
2402
2403 A >>= 19;
2404
2405 if (A & 0x100)
2406 A = av_clip_uint8(A);
2407 }
2408
2409 dest[2 * i ] = Y;
2410 dest[2 * i + 1] = hasAlpha ? A : 255;
2411 }
2412 }
2413
2414 static void
yuv2ayuv64le_X_c(SwsContext * c,const int16_t * lumFilter,const int16_t ** _lumSrc,int lumFilterSize,const int16_t * chrFilter,const int16_t ** _chrUSrc,const int16_t ** _chrVSrc,int chrFilterSize,const int16_t ** _alpSrc,uint8_t * dest,int dstW,int y)2415 yuv2ayuv64le_X_c(SwsContext *c, const int16_t *lumFilter,
2416 const int16_t **_lumSrc, int lumFilterSize,
2417 const int16_t *chrFilter, const int16_t **_chrUSrc,
2418 const int16_t **_chrVSrc, int chrFilterSize,
2419 const int16_t **_alpSrc, uint8_t *dest, int dstW, int y)
2420 {
2421 const int32_t **lumSrc = (const int32_t **) _lumSrc,
2422 **chrUSrc = (const int32_t **) _chrUSrc,
2423 **chrVSrc = (const int32_t **) _chrVSrc,
2424 **alpSrc = (const int32_t **) _alpSrc;
2425 int hasAlpha = !!alpSrc;
2426 int i;
2427
2428 for (i = 0; i < dstW; i++) {
2429 int Y = 1 << 14, U = 1 << 14;
2430 int V = 1 << 14, A = 1 << 14;
2431 int j;
2432
2433 Y -= 0x40000000;
2434 U -= 0x40000000;
2435 V -= 0x40000000;
2436 A -= 0x40000000;
2437
2438 for (j = 0; j < lumFilterSize; j++)
2439 Y += lumSrc[j][i] * (unsigned)lumFilter[j];
2440
2441 for (j = 0; j < chrFilterSize; j++)
2442 U += chrUSrc[j][i] * (unsigned)chrFilter[j];
2443
2444 for (j = 0; j < chrFilterSize; j++)
2445 V += chrVSrc[j][i] * (unsigned)chrFilter[j];
2446
2447 if (hasAlpha)
2448 for (j = 0; j < lumFilterSize; j++)
2449 A += alpSrc[j][i] * (unsigned)lumFilter[j];
2450
2451 Y = 0x8000 + av_clip_int16(Y >> 15);
2452 U = 0x8000 + av_clip_int16(U >> 15);
2453 V = 0x8000 + av_clip_int16(V >> 15);
2454 A = 0x8000 + av_clip_int16(A >> 15);
2455
2456 AV_WL16(dest + 8 * i, hasAlpha ? A : 65535);
2457 AV_WL16(dest + 8 * i + 2, Y);
2458 AV_WL16(dest + 8 * i + 4, U);
2459 AV_WL16(dest + 8 * i + 6, V);
2460 }
2461 }
2462
ff_sws_init_output_funcs(SwsContext * c,yuv2planar1_fn * yuv2plane1,yuv2planarX_fn * yuv2planeX,yuv2interleavedX_fn * yuv2nv12cX,yuv2packed1_fn * yuv2packed1,yuv2packed2_fn * yuv2packed2,yuv2packedX_fn * yuv2packedX,yuv2anyX_fn * yuv2anyX)2463 av_cold void ff_sws_init_output_funcs(SwsContext *c,
2464 yuv2planar1_fn *yuv2plane1,
2465 yuv2planarX_fn *yuv2planeX,
2466 yuv2interleavedX_fn *yuv2nv12cX,
2467 yuv2packed1_fn *yuv2packed1,
2468 yuv2packed2_fn *yuv2packed2,
2469 yuv2packedX_fn *yuv2packedX,
2470 yuv2anyX_fn *yuv2anyX)
2471 {
2472 enum AVPixelFormat dstFormat = c->dstFormat;
2473 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(dstFormat);
2474
2475 if (dstFormat == AV_PIX_FMT_P010LE || dstFormat == AV_PIX_FMT_P010BE) {
2476 *yuv2plane1 = isBE(dstFormat) ? yuv2p010l1_BE_c : yuv2p010l1_LE_c;
2477 *yuv2planeX = isBE(dstFormat) ? yuv2p010lX_BE_c : yuv2p010lX_LE_c;
2478 *yuv2nv12cX = yuv2p010cX_c;
2479 } else if (is16BPS(dstFormat)) {
2480 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c : yuv2planeX_16LE_c;
2481 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c : yuv2plane1_16LE_c;
2482 if (dstFormat == AV_PIX_FMT_P016LE || dstFormat == AV_PIX_FMT_P016BE) {
2483 *yuv2nv12cX = yuv2p016cX_c;
2484 }
2485 } else if (isNBPS(dstFormat)) {
2486 if (desc->comp[0].depth == 9) {
2487 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c : yuv2planeX_9LE_c;
2488 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c : yuv2plane1_9LE_c;
2489 } else if (desc->comp[0].depth == 10) {
2490 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c : yuv2planeX_10LE_c;
2491 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c : yuv2plane1_10LE_c;
2492 } else if (desc->comp[0].depth == 12) {
2493 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_12BE_c : yuv2planeX_12LE_c;
2494 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_12BE_c : yuv2plane1_12LE_c;
2495 } else if (desc->comp[0].depth == 14) {
2496 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_14BE_c : yuv2planeX_14LE_c;
2497 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_14BE_c : yuv2plane1_14LE_c;
2498 } else
2499 av_assert0(0);
2500 } else if (dstFormat == AV_PIX_FMT_GRAYF32BE) {
2501 *yuv2planeX = yuv2planeX_floatBE_c;
2502 *yuv2plane1 = yuv2plane1_floatBE_c;
2503 } else if (dstFormat == AV_PIX_FMT_GRAYF32LE) {
2504 *yuv2planeX = yuv2planeX_floatLE_c;
2505 *yuv2plane1 = yuv2plane1_floatLE_c;
2506 } else {
2507 *yuv2plane1 = yuv2plane1_8_c;
2508 *yuv2planeX = yuv2planeX_8_c;
2509 if (dstFormat == AV_PIX_FMT_NV12 || dstFormat == AV_PIX_FMT_NV21)
2510 *yuv2nv12cX = yuv2nv12cX_c;
2511 }
2512
2513 if(c->flags & SWS_FULL_CHR_H_INT) {
2514 switch (dstFormat) {
2515 case AV_PIX_FMT_RGBA:
2516 #if CONFIG_SMALL
2517 *yuv2packedX = yuv2rgba32_full_X_c;
2518 *yuv2packed2 = yuv2rgba32_full_2_c;
2519 *yuv2packed1 = yuv2rgba32_full_1_c;
2520 #else
2521 #if CONFIG_SWSCALE_ALPHA
2522 if (c->needAlpha) {
2523 *yuv2packedX = yuv2rgba32_full_X_c;
2524 *yuv2packed2 = yuv2rgba32_full_2_c;
2525 *yuv2packed1 = yuv2rgba32_full_1_c;
2526 } else
2527 #endif /* CONFIG_SWSCALE_ALPHA */
2528 {
2529 *yuv2packedX = yuv2rgbx32_full_X_c;
2530 *yuv2packed2 = yuv2rgbx32_full_2_c;
2531 *yuv2packed1 = yuv2rgbx32_full_1_c;
2532 }
2533 #endif /* !CONFIG_SMALL */
2534 break;
2535 case AV_PIX_FMT_ARGB:
2536 #if CONFIG_SMALL
2537 *yuv2packedX = yuv2argb32_full_X_c;
2538 *yuv2packed2 = yuv2argb32_full_2_c;
2539 *yuv2packed1 = yuv2argb32_full_1_c;
2540 #else
2541 #if CONFIG_SWSCALE_ALPHA
2542 if (c->needAlpha) {
2543 *yuv2packedX = yuv2argb32_full_X_c;
2544 *yuv2packed2 = yuv2argb32_full_2_c;
2545 *yuv2packed1 = yuv2argb32_full_1_c;
2546 } else
2547 #endif /* CONFIG_SWSCALE_ALPHA */
2548 {
2549 *yuv2packedX = yuv2xrgb32_full_X_c;
2550 *yuv2packed2 = yuv2xrgb32_full_2_c;
2551 *yuv2packed1 = yuv2xrgb32_full_1_c;
2552 }
2553 #endif /* !CONFIG_SMALL */
2554 break;
2555 case AV_PIX_FMT_BGRA:
2556 #if CONFIG_SMALL
2557 *yuv2packedX = yuv2bgra32_full_X_c;
2558 *yuv2packed2 = yuv2bgra32_full_2_c;
2559 *yuv2packed1 = yuv2bgra32_full_1_c;
2560 #else
2561 #if CONFIG_SWSCALE_ALPHA
2562 if (c->needAlpha) {
2563 *yuv2packedX = yuv2bgra32_full_X_c;
2564 *yuv2packed2 = yuv2bgra32_full_2_c;
2565 *yuv2packed1 = yuv2bgra32_full_1_c;
2566 } else
2567 #endif /* CONFIG_SWSCALE_ALPHA */
2568 {
2569 *yuv2packedX = yuv2bgrx32_full_X_c;
2570 *yuv2packed2 = yuv2bgrx32_full_2_c;
2571 *yuv2packed1 = yuv2bgrx32_full_1_c;
2572 }
2573 #endif /* !CONFIG_SMALL */
2574 break;
2575 case AV_PIX_FMT_ABGR:
2576 #if CONFIG_SMALL
2577 *yuv2packedX = yuv2abgr32_full_X_c;
2578 *yuv2packed2 = yuv2abgr32_full_2_c;
2579 *yuv2packed1 = yuv2abgr32_full_1_c;
2580 #else
2581 #if CONFIG_SWSCALE_ALPHA
2582 if (c->needAlpha) {
2583 *yuv2packedX = yuv2abgr32_full_X_c;
2584 *yuv2packed2 = yuv2abgr32_full_2_c;
2585 *yuv2packed1 = yuv2abgr32_full_1_c;
2586 } else
2587 #endif /* CONFIG_SWSCALE_ALPHA */
2588 {
2589 *yuv2packedX = yuv2xbgr32_full_X_c;
2590 *yuv2packed2 = yuv2xbgr32_full_2_c;
2591 *yuv2packed1 = yuv2xbgr32_full_1_c;
2592 }
2593 #endif /* !CONFIG_SMALL */
2594 break;
2595 case AV_PIX_FMT_RGBA64LE:
2596 #if CONFIG_SWSCALE_ALPHA
2597 if (c->needAlpha) {
2598 *yuv2packedX = yuv2rgba64le_full_X_c;
2599 *yuv2packed2 = yuv2rgba64le_full_2_c;
2600 *yuv2packed1 = yuv2rgba64le_full_1_c;
2601 } else
2602 #endif /* CONFIG_SWSCALE_ALPHA */
2603 {
2604 *yuv2packedX = yuv2rgbx64le_full_X_c;
2605 *yuv2packed2 = yuv2rgbx64le_full_2_c;
2606 *yuv2packed1 = yuv2rgbx64le_full_1_c;
2607 }
2608 break;
2609 case AV_PIX_FMT_RGBA64BE:
2610 #if CONFIG_SWSCALE_ALPHA
2611 if (c->needAlpha) {
2612 *yuv2packedX = yuv2rgba64be_full_X_c;
2613 *yuv2packed2 = yuv2rgba64be_full_2_c;
2614 *yuv2packed1 = yuv2rgba64be_full_1_c;
2615 } else
2616 #endif /* CONFIG_SWSCALE_ALPHA */
2617 {
2618 *yuv2packedX = yuv2rgbx64be_full_X_c;
2619 *yuv2packed2 = yuv2rgbx64be_full_2_c;
2620 *yuv2packed1 = yuv2rgbx64be_full_1_c;
2621 }
2622 break;
2623 case AV_PIX_FMT_BGRA64LE:
2624 #if CONFIG_SWSCALE_ALPHA
2625 if (c->needAlpha) {
2626 *yuv2packedX = yuv2bgra64le_full_X_c;
2627 *yuv2packed2 = yuv2bgra64le_full_2_c;
2628 *yuv2packed1 = yuv2bgra64le_full_1_c;
2629 } else
2630 #endif /* CONFIG_SWSCALE_ALPHA */
2631 {
2632 *yuv2packedX = yuv2bgrx64le_full_X_c;
2633 *yuv2packed2 = yuv2bgrx64le_full_2_c;
2634 *yuv2packed1 = yuv2bgrx64le_full_1_c;
2635 }
2636 break;
2637 case AV_PIX_FMT_BGRA64BE:
2638 #if CONFIG_SWSCALE_ALPHA
2639 if (c->needAlpha) {
2640 *yuv2packedX = yuv2bgra64be_full_X_c;
2641 *yuv2packed2 = yuv2bgra64be_full_2_c;
2642 *yuv2packed1 = yuv2bgra64be_full_1_c;
2643 } else
2644 #endif /* CONFIG_SWSCALE_ALPHA */
2645 {
2646 *yuv2packedX = yuv2bgrx64be_full_X_c;
2647 *yuv2packed2 = yuv2bgrx64be_full_2_c;
2648 *yuv2packed1 = yuv2bgrx64be_full_1_c;
2649 }
2650 break;
2651
2652 case AV_PIX_FMT_RGB24:
2653 *yuv2packedX = yuv2rgb24_full_X_c;
2654 *yuv2packed2 = yuv2rgb24_full_2_c;
2655 *yuv2packed1 = yuv2rgb24_full_1_c;
2656 break;
2657 case AV_PIX_FMT_BGR24:
2658 *yuv2packedX = yuv2bgr24_full_X_c;
2659 *yuv2packed2 = yuv2bgr24_full_2_c;
2660 *yuv2packed1 = yuv2bgr24_full_1_c;
2661 break;
2662 case AV_PIX_FMT_RGB48LE:
2663 *yuv2packedX = yuv2rgb48le_full_X_c;
2664 *yuv2packed2 = yuv2rgb48le_full_2_c;
2665 *yuv2packed1 = yuv2rgb48le_full_1_c;
2666 break;
2667 case AV_PIX_FMT_BGR48LE:
2668 *yuv2packedX = yuv2bgr48le_full_X_c;
2669 *yuv2packed2 = yuv2bgr48le_full_2_c;
2670 *yuv2packed1 = yuv2bgr48le_full_1_c;
2671 break;
2672 case AV_PIX_FMT_RGB48BE:
2673 *yuv2packedX = yuv2rgb48be_full_X_c;
2674 *yuv2packed2 = yuv2rgb48be_full_2_c;
2675 *yuv2packed1 = yuv2rgb48be_full_1_c;
2676 break;
2677 case AV_PIX_FMT_BGR48BE:
2678 *yuv2packedX = yuv2bgr48be_full_X_c;
2679 *yuv2packed2 = yuv2bgr48be_full_2_c;
2680 *yuv2packed1 = yuv2bgr48be_full_1_c;
2681 break;
2682 case AV_PIX_FMT_BGR4_BYTE:
2683 *yuv2packedX = yuv2bgr4_byte_full_X_c;
2684 *yuv2packed2 = yuv2bgr4_byte_full_2_c;
2685 *yuv2packed1 = yuv2bgr4_byte_full_1_c;
2686 break;
2687 case AV_PIX_FMT_RGB4_BYTE:
2688 *yuv2packedX = yuv2rgb4_byte_full_X_c;
2689 *yuv2packed2 = yuv2rgb4_byte_full_2_c;
2690 *yuv2packed1 = yuv2rgb4_byte_full_1_c;
2691 break;
2692 case AV_PIX_FMT_BGR8:
2693 *yuv2packedX = yuv2bgr8_full_X_c;
2694 *yuv2packed2 = yuv2bgr8_full_2_c;
2695 *yuv2packed1 = yuv2bgr8_full_1_c;
2696 break;
2697 case AV_PIX_FMT_RGB8:
2698 *yuv2packedX = yuv2rgb8_full_X_c;
2699 *yuv2packed2 = yuv2rgb8_full_2_c;
2700 *yuv2packed1 = yuv2rgb8_full_1_c;
2701 break;
2702 case AV_PIX_FMT_GBRP:
2703 case AV_PIX_FMT_GBRP9BE:
2704 case AV_PIX_FMT_GBRP9LE:
2705 case AV_PIX_FMT_GBRP10BE:
2706 case AV_PIX_FMT_GBRP10LE:
2707 case AV_PIX_FMT_GBRP12BE:
2708 case AV_PIX_FMT_GBRP12LE:
2709 case AV_PIX_FMT_GBRP14BE:
2710 case AV_PIX_FMT_GBRP14LE:
2711 case AV_PIX_FMT_GBRAP:
2712 case AV_PIX_FMT_GBRAP10BE:
2713 case AV_PIX_FMT_GBRAP10LE:
2714 case AV_PIX_FMT_GBRAP12BE:
2715 case AV_PIX_FMT_GBRAP12LE:
2716 *yuv2anyX = yuv2gbrp_full_X_c;
2717 break;
2718 case AV_PIX_FMT_GBRP16BE:
2719 case AV_PIX_FMT_GBRP16LE:
2720 case AV_PIX_FMT_GBRAP16BE:
2721 case AV_PIX_FMT_GBRAP16LE:
2722 *yuv2anyX = yuv2gbrp16_full_X_c;
2723 break;
2724 }
2725 if (!*yuv2packedX && !*yuv2anyX)
2726 goto YUV_PACKED;
2727 } else {
2728 YUV_PACKED:
2729 switch (dstFormat) {
2730 case AV_PIX_FMT_RGBA64LE:
2731 #if CONFIG_SWSCALE_ALPHA
2732 if (c->needAlpha) {
2733 *yuv2packed1 = yuv2rgba64le_1_c;
2734 *yuv2packed2 = yuv2rgba64le_2_c;
2735 *yuv2packedX = yuv2rgba64le_X_c;
2736 } else
2737 #endif /* CONFIG_SWSCALE_ALPHA */
2738 {
2739 *yuv2packed1 = yuv2rgbx64le_1_c;
2740 *yuv2packed2 = yuv2rgbx64le_2_c;
2741 *yuv2packedX = yuv2rgbx64le_X_c;
2742 }
2743 break;
2744 case AV_PIX_FMT_RGBA64BE:
2745 #if CONFIG_SWSCALE_ALPHA
2746 if (c->needAlpha) {
2747 *yuv2packed1 = yuv2rgba64be_1_c;
2748 *yuv2packed2 = yuv2rgba64be_2_c;
2749 *yuv2packedX = yuv2rgba64be_X_c;
2750 } else
2751 #endif /* CONFIG_SWSCALE_ALPHA */
2752 {
2753 *yuv2packed1 = yuv2rgbx64be_1_c;
2754 *yuv2packed2 = yuv2rgbx64be_2_c;
2755 *yuv2packedX = yuv2rgbx64be_X_c;
2756 }
2757 break;
2758 case AV_PIX_FMT_BGRA64LE:
2759 #if CONFIG_SWSCALE_ALPHA
2760 if (c->needAlpha) {
2761 *yuv2packed1 = yuv2bgra64le_1_c;
2762 *yuv2packed2 = yuv2bgra64le_2_c;
2763 *yuv2packedX = yuv2bgra64le_X_c;
2764 } else
2765 #endif /* CONFIG_SWSCALE_ALPHA */
2766 {
2767 *yuv2packed1 = yuv2bgrx64le_1_c;
2768 *yuv2packed2 = yuv2bgrx64le_2_c;
2769 *yuv2packedX = yuv2bgrx64le_X_c;
2770 }
2771 break;
2772 case AV_PIX_FMT_BGRA64BE:
2773 #if CONFIG_SWSCALE_ALPHA
2774 if (c->needAlpha) {
2775 *yuv2packed1 = yuv2bgra64be_1_c;
2776 *yuv2packed2 = yuv2bgra64be_2_c;
2777 *yuv2packedX = yuv2bgra64be_X_c;
2778 } else
2779 #endif /* CONFIG_SWSCALE_ALPHA */
2780 {
2781 *yuv2packed1 = yuv2bgrx64be_1_c;
2782 *yuv2packed2 = yuv2bgrx64be_2_c;
2783 *yuv2packedX = yuv2bgrx64be_X_c;
2784 }
2785 break;
2786 case AV_PIX_FMT_RGB48LE:
2787 *yuv2packed1 = yuv2rgb48le_1_c;
2788 *yuv2packed2 = yuv2rgb48le_2_c;
2789 *yuv2packedX = yuv2rgb48le_X_c;
2790 break;
2791 case AV_PIX_FMT_RGB48BE:
2792 *yuv2packed1 = yuv2rgb48be_1_c;
2793 *yuv2packed2 = yuv2rgb48be_2_c;
2794 *yuv2packedX = yuv2rgb48be_X_c;
2795 break;
2796 case AV_PIX_FMT_BGR48LE:
2797 *yuv2packed1 = yuv2bgr48le_1_c;
2798 *yuv2packed2 = yuv2bgr48le_2_c;
2799 *yuv2packedX = yuv2bgr48le_X_c;
2800 break;
2801 case AV_PIX_FMT_BGR48BE:
2802 *yuv2packed1 = yuv2bgr48be_1_c;
2803 *yuv2packed2 = yuv2bgr48be_2_c;
2804 *yuv2packedX = yuv2bgr48be_X_c;
2805 break;
2806 case AV_PIX_FMT_RGB32:
2807 case AV_PIX_FMT_BGR32:
2808 #if CONFIG_SMALL
2809 *yuv2packed1 = yuv2rgb32_1_c;
2810 *yuv2packed2 = yuv2rgb32_2_c;
2811 *yuv2packedX = yuv2rgb32_X_c;
2812 #else
2813 #if CONFIG_SWSCALE_ALPHA
2814 if (c->needAlpha) {
2815 *yuv2packed1 = yuv2rgba32_1_c;
2816 *yuv2packed2 = yuv2rgba32_2_c;
2817 *yuv2packedX = yuv2rgba32_X_c;
2818 } else
2819 #endif /* CONFIG_SWSCALE_ALPHA */
2820 {
2821 *yuv2packed1 = yuv2rgbx32_1_c;
2822 *yuv2packed2 = yuv2rgbx32_2_c;
2823 *yuv2packedX = yuv2rgbx32_X_c;
2824 }
2825 #endif /* !CONFIG_SMALL */
2826 break;
2827 case AV_PIX_FMT_RGB32_1:
2828 case AV_PIX_FMT_BGR32_1:
2829 #if CONFIG_SMALL
2830 *yuv2packed1 = yuv2rgb32_1_1_c;
2831 *yuv2packed2 = yuv2rgb32_1_2_c;
2832 *yuv2packedX = yuv2rgb32_1_X_c;
2833 #else
2834 #if CONFIG_SWSCALE_ALPHA
2835 if (c->needAlpha) {
2836 *yuv2packed1 = yuv2rgba32_1_1_c;
2837 *yuv2packed2 = yuv2rgba32_1_2_c;
2838 *yuv2packedX = yuv2rgba32_1_X_c;
2839 } else
2840 #endif /* CONFIG_SWSCALE_ALPHA */
2841 {
2842 *yuv2packed1 = yuv2rgbx32_1_1_c;
2843 *yuv2packed2 = yuv2rgbx32_1_2_c;
2844 *yuv2packedX = yuv2rgbx32_1_X_c;
2845 }
2846 #endif /* !CONFIG_SMALL */
2847 break;
2848 case AV_PIX_FMT_RGB24:
2849 *yuv2packed1 = yuv2rgb24_1_c;
2850 *yuv2packed2 = yuv2rgb24_2_c;
2851 *yuv2packedX = yuv2rgb24_X_c;
2852 break;
2853 case AV_PIX_FMT_BGR24:
2854 *yuv2packed1 = yuv2bgr24_1_c;
2855 *yuv2packed2 = yuv2bgr24_2_c;
2856 *yuv2packedX = yuv2bgr24_X_c;
2857 break;
2858 case AV_PIX_FMT_RGB565LE:
2859 case AV_PIX_FMT_RGB565BE:
2860 case AV_PIX_FMT_BGR565LE:
2861 case AV_PIX_FMT_BGR565BE:
2862 *yuv2packed1 = yuv2rgb16_1_c;
2863 *yuv2packed2 = yuv2rgb16_2_c;
2864 *yuv2packedX = yuv2rgb16_X_c;
2865 break;
2866 case AV_PIX_FMT_RGB555LE:
2867 case AV_PIX_FMT_RGB555BE:
2868 case AV_PIX_FMT_BGR555LE:
2869 case AV_PIX_FMT_BGR555BE:
2870 *yuv2packed1 = yuv2rgb15_1_c;
2871 *yuv2packed2 = yuv2rgb15_2_c;
2872 *yuv2packedX = yuv2rgb15_X_c;
2873 break;
2874 case AV_PIX_FMT_RGB444LE:
2875 case AV_PIX_FMT_RGB444BE:
2876 case AV_PIX_FMT_BGR444LE:
2877 case AV_PIX_FMT_BGR444BE:
2878 *yuv2packed1 = yuv2rgb12_1_c;
2879 *yuv2packed2 = yuv2rgb12_2_c;
2880 *yuv2packedX = yuv2rgb12_X_c;
2881 break;
2882 case AV_PIX_FMT_RGB8:
2883 case AV_PIX_FMT_BGR8:
2884 *yuv2packed1 = yuv2rgb8_1_c;
2885 *yuv2packed2 = yuv2rgb8_2_c;
2886 *yuv2packedX = yuv2rgb8_X_c;
2887 break;
2888 case AV_PIX_FMT_RGB4:
2889 case AV_PIX_FMT_BGR4:
2890 *yuv2packed1 = yuv2rgb4_1_c;
2891 *yuv2packed2 = yuv2rgb4_2_c;
2892 *yuv2packedX = yuv2rgb4_X_c;
2893 break;
2894 case AV_PIX_FMT_RGB4_BYTE:
2895 case AV_PIX_FMT_BGR4_BYTE:
2896 *yuv2packed1 = yuv2rgb4b_1_c;
2897 *yuv2packed2 = yuv2rgb4b_2_c;
2898 *yuv2packedX = yuv2rgb4b_X_c;
2899 break;
2900 }
2901 }
2902 switch (dstFormat) {
2903 case AV_PIX_FMT_MONOWHITE:
2904 *yuv2packed1 = yuv2monowhite_1_c;
2905 *yuv2packed2 = yuv2monowhite_2_c;
2906 *yuv2packedX = yuv2monowhite_X_c;
2907 break;
2908 case AV_PIX_FMT_MONOBLACK:
2909 *yuv2packed1 = yuv2monoblack_1_c;
2910 *yuv2packed2 = yuv2monoblack_2_c;
2911 *yuv2packedX = yuv2monoblack_X_c;
2912 break;
2913 case AV_PIX_FMT_YUYV422:
2914 *yuv2packed1 = yuv2yuyv422_1_c;
2915 *yuv2packed2 = yuv2yuyv422_2_c;
2916 *yuv2packedX = yuv2yuyv422_X_c;
2917 break;
2918 case AV_PIX_FMT_YVYU422:
2919 *yuv2packed1 = yuv2yvyu422_1_c;
2920 *yuv2packed2 = yuv2yvyu422_2_c;
2921 *yuv2packedX = yuv2yvyu422_X_c;
2922 break;
2923 case AV_PIX_FMT_UYVY422:
2924 *yuv2packed1 = yuv2uyvy422_1_c;
2925 *yuv2packed2 = yuv2uyvy422_2_c;
2926 *yuv2packedX = yuv2uyvy422_X_c;
2927 break;
2928 case AV_PIX_FMT_YA8:
2929 *yuv2packed1 = yuv2ya8_1_c;
2930 *yuv2packed2 = yuv2ya8_2_c;
2931 *yuv2packedX = yuv2ya8_X_c;
2932 break;
2933 case AV_PIX_FMT_YA16LE:
2934 *yuv2packed1 = yuv2ya16le_1_c;
2935 *yuv2packed2 = yuv2ya16le_2_c;
2936 *yuv2packedX = yuv2ya16le_X_c;
2937 break;
2938 case AV_PIX_FMT_YA16BE:
2939 *yuv2packed1 = yuv2ya16be_1_c;
2940 *yuv2packed2 = yuv2ya16be_2_c;
2941 *yuv2packedX = yuv2ya16be_X_c;
2942 break;
2943 case AV_PIX_FMT_AYUV64LE:
2944 *yuv2packedX = yuv2ayuv64le_X_c;
2945 break;
2946 }
2947 }
2948