1 // Copyright 2016 Adrien Descamps
2 // Distributed under BSD 3-Clause License
3 
4 // Provide optimized functions to convert images from 8bits yuv420 to rgb24 format
5 
6 // There are a few slightly different variations of the YCbCr color space with different parameters that
7 // change the conversion matrix.
8 // The three most common YCbCr color space, defined by BT.601, BT.709 and JPEG standard are implemented here.
9 // See the respective standards for details
10 // The matrix values used are derived from http://www.equasys.de/colorconversion.html
11 
12 // YUV420 is stored as three separate channels, with U and V (Cb and Cr) subsampled by a 2 factor
13 // For conversion from yuv to rgb, no interpolation is done, and the same UV value are used for 4 rgb pixels. This
14 // is suboptimal for image quality, but by far the fastest method.
15 
16 // For all methods, width and height should be even, if not, the last row/column of the result image won't be affected.
17 // For sse methods, if the width if not divisable by 32, the last (width%32) pixels of each line won't be affected.
18 
19 #include "SDL_stdinc.h"
20 /*#include <stdint.h>*/
21 
22 typedef enum
23 {
24 	YCBCR_JPEG,
25 	YCBCR_601,
26 	YCBCR_709
27 } YCbCrType;
28 
29 // yuv to rgb, standard c implementation
30 void yuv420_rgb565_std(
31 	uint32_t width, uint32_t height,
32 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
33 	uint8_t *rgb, uint32_t rgb_stride,
34 	YCbCrType yuv_type);
35 
36 void yuv420_rgb24_std(
37 	uint32_t width, uint32_t height,
38 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
39 	uint8_t *rgb, uint32_t rgb_stride,
40 	YCbCrType yuv_type);
41 
42 void yuv420_rgba_std(
43 	uint32_t width, uint32_t height,
44 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
45 	uint8_t *rgb, uint32_t rgb_stride,
46 	YCbCrType yuv_type);
47 
48 void yuv420_bgra_std(
49 	uint32_t width, uint32_t height,
50 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
51 	uint8_t *rgb, uint32_t rgb_stride,
52 	YCbCrType yuv_type);
53 
54 void yuv420_argb_std(
55 	uint32_t width, uint32_t height,
56 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
57 	uint8_t *rgb, uint32_t rgb_stride,
58 	YCbCrType yuv_type);
59 
60 void yuv420_abgr_std(
61 	uint32_t width, uint32_t height,
62 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
63 	uint8_t *rgb, uint32_t rgb_stride,
64 	YCbCrType yuv_type);
65 
66 void yuv422_rgb565_std(
67 	uint32_t width, uint32_t height,
68 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
69 	uint8_t *rgb, uint32_t rgb_stride,
70 	YCbCrType yuv_type);
71 
72 void yuv422_rgb24_std(
73 	uint32_t width, uint32_t height,
74 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
75 	uint8_t *rgb, uint32_t rgb_stride,
76 	YCbCrType yuv_type);
77 
78 void yuv422_rgba_std(
79 	uint32_t width, uint32_t height,
80 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
81 	uint8_t *rgb, uint32_t rgb_stride,
82 	YCbCrType yuv_type);
83 
84 void yuv422_bgra_std(
85 	uint32_t width, uint32_t height,
86 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
87 	uint8_t *rgb, uint32_t rgb_stride,
88 	YCbCrType yuv_type);
89 
90 void yuv422_argb_std(
91 	uint32_t width, uint32_t height,
92 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
93 	uint8_t *rgb, uint32_t rgb_stride,
94 	YCbCrType yuv_type);
95 
96 void yuv422_abgr_std(
97 	uint32_t width, uint32_t height,
98 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
99 	uint8_t *rgb, uint32_t rgb_stride,
100 	YCbCrType yuv_type);
101 
102 void yuvnv12_rgb565_std(
103 	uint32_t width, uint32_t height,
104 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
105 	uint8_t *rgb, uint32_t rgb_stride,
106 	YCbCrType yuv_type);
107 
108 void yuvnv12_rgb24_std(
109 	uint32_t width, uint32_t height,
110 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
111 	uint8_t *rgb, uint32_t rgb_stride,
112 	YCbCrType yuv_type);
113 
114 void yuvnv12_rgba_std(
115 	uint32_t width, uint32_t height,
116 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
117 	uint8_t *rgb, uint32_t rgb_stride,
118 	YCbCrType yuv_type);
119 
120 void yuvnv12_bgra_std(
121 	uint32_t width, uint32_t height,
122 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
123 	uint8_t *rgb, uint32_t rgb_stride,
124 	YCbCrType yuv_type);
125 
126 void yuvnv12_argb_std(
127 	uint32_t width, uint32_t height,
128 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
129 	uint8_t *rgb, uint32_t rgb_stride,
130 	YCbCrType yuv_type);
131 
132 void yuvnv12_abgr_std(
133 	uint32_t width, uint32_t height,
134 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
135 	uint8_t *rgb, uint32_t rgb_stride,
136 	YCbCrType yuv_type);
137 
138 // yuv to rgb, sse implementation
139 // pointers must be 16 byte aligned, and strides must be divisable by 16
140 void yuv420_rgb565_sse(
141 	uint32_t width, uint32_t height,
142 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
143 	uint8_t *rgb, uint32_t rgb_stride,
144 	YCbCrType yuv_type);
145 
146 void yuv420_rgb24_sse(
147 	uint32_t width, uint32_t height,
148 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
149 	uint8_t *rgb, uint32_t rgb_stride,
150 	YCbCrType yuv_type);
151 
152 void yuv420_rgba_sse(
153 	uint32_t width, uint32_t height,
154 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
155 	uint8_t *rgb, uint32_t rgb_stride,
156 	YCbCrType yuv_type);
157 
158 void yuv420_bgra_sse(
159 	uint32_t width, uint32_t height,
160 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
161 	uint8_t *rgb, uint32_t rgb_stride,
162 	YCbCrType yuv_type);
163 
164 void yuv420_argb_sse(
165 	uint32_t width, uint32_t height,
166 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
167 	uint8_t *rgb, uint32_t rgb_stride,
168 	YCbCrType yuv_type);
169 
170 void yuv420_abgr_sse(
171 	uint32_t width, uint32_t height,
172 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
173 	uint8_t *rgb, uint32_t rgb_stride,
174 	YCbCrType yuv_type);
175 
176 void yuv422_rgb565_sse(
177 	uint32_t width, uint32_t height,
178 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
179 	uint8_t *rgb, uint32_t rgb_stride,
180 	YCbCrType yuv_type);
181 
182 void yuv422_rgb24_sse(
183 	uint32_t width, uint32_t height,
184 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
185 	uint8_t *rgb, uint32_t rgb_stride,
186 	YCbCrType yuv_type);
187 
188 void yuv422_rgba_sse(
189 	uint32_t width, uint32_t height,
190 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
191 	uint8_t *rgb, uint32_t rgb_stride,
192 	YCbCrType yuv_type);
193 
194 void yuv422_bgra_sse(
195 	uint32_t width, uint32_t height,
196 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
197 	uint8_t *rgb, uint32_t rgb_stride,
198 	YCbCrType yuv_type);
199 
200 void yuv422_argb_sse(
201 	uint32_t width, uint32_t height,
202 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
203 	uint8_t *rgb, uint32_t rgb_stride,
204 	YCbCrType yuv_type);
205 
206 void yuv422_abgr_sse(
207 	uint32_t width, uint32_t height,
208 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
209 	uint8_t *rgb, uint32_t rgb_stride,
210 	YCbCrType yuv_type);
211 
212 void yuvnv12_rgb565_sse(
213 	uint32_t width, uint32_t height,
214 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
215 	uint8_t *rgb, uint32_t rgb_stride,
216 	YCbCrType yuv_type);
217 
218 void yuvnv12_rgb24_sse(
219 	uint32_t width, uint32_t height,
220 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
221 	uint8_t *rgb, uint32_t rgb_stride,
222 	YCbCrType yuv_type);
223 
224 void yuvnv12_rgba_sse(
225 	uint32_t width, uint32_t height,
226 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
227 	uint8_t *rgb, uint32_t rgb_stride,
228 	YCbCrType yuv_type);
229 
230 void yuvnv12_bgra_sse(
231 	uint32_t width, uint32_t height,
232 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
233 	uint8_t *rgb, uint32_t rgb_stride,
234 	YCbCrType yuv_type);
235 
236 void yuvnv12_argb_sse(
237 	uint32_t width, uint32_t height,
238 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
239 	uint8_t *rgb, uint32_t rgb_stride,
240 	YCbCrType yuv_type);
241 
242 void yuvnv12_abgr_sse(
243 	uint32_t width, uint32_t height,
244 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
245 	uint8_t *rgb, uint32_t rgb_stride,
246 	YCbCrType yuv_type);
247 
248 // yuv to rgb, sse implementation
249 // pointers do not need to be 16 byte aligned
250 void yuv420_rgb565_sseu(
251 	uint32_t width, uint32_t height,
252 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
253 	uint8_t *rgb, uint32_t rgb_stride,
254 	YCbCrType yuv_type);
255 
256 void yuv420_rgb24_sseu(
257 	uint32_t width, uint32_t height,
258 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
259 	uint8_t *rgb, uint32_t rgb_stride,
260 	YCbCrType yuv_type);
261 
262 void yuv420_rgba_sseu(
263 	uint32_t width, uint32_t height,
264 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
265 	uint8_t *rgb, uint32_t rgb_stride,
266 	YCbCrType yuv_type);
267 
268 void yuv420_bgra_sseu(
269 	uint32_t width, uint32_t height,
270 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
271 	uint8_t *rgb, uint32_t rgb_stride,
272 	YCbCrType yuv_type);
273 
274 void yuv420_argb_sseu(
275 	uint32_t width, uint32_t height,
276 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
277 	uint8_t *rgb, uint32_t rgb_stride,
278 	YCbCrType yuv_type);
279 
280 void yuv420_abgr_sseu(
281 	uint32_t width, uint32_t height,
282 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
283 	uint8_t *rgb, uint32_t rgb_stride,
284 	YCbCrType yuv_type);
285 
286 void yuv422_rgb565_sseu(
287 	uint32_t width, uint32_t height,
288 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
289 	uint8_t *rgb, uint32_t rgb_stride,
290 	YCbCrType yuv_type);
291 
292 void yuv422_rgb24_sseu(
293 	uint32_t width, uint32_t height,
294 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
295 	uint8_t *rgb, uint32_t rgb_stride,
296 	YCbCrType yuv_type);
297 
298 void yuv422_rgba_sseu(
299 	uint32_t width, uint32_t height,
300 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
301 	uint8_t *rgb, uint32_t rgb_stride,
302 	YCbCrType yuv_type);
303 
304 void yuv422_bgra_sseu(
305 	uint32_t width, uint32_t height,
306 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
307 	uint8_t *rgb, uint32_t rgb_stride,
308 	YCbCrType yuv_type);
309 
310 void yuv422_argb_sseu(
311 	uint32_t width, uint32_t height,
312 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
313 	uint8_t *rgb, uint32_t rgb_stride,
314 	YCbCrType yuv_type);
315 
316 void yuv422_abgr_sseu(
317 	uint32_t width, uint32_t height,
318 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
319 	uint8_t *rgb, uint32_t rgb_stride,
320 	YCbCrType yuv_type);
321 
322 void yuvnv12_rgb565_sseu(
323 	uint32_t width, uint32_t height,
324 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
325 	uint8_t *rgb, uint32_t rgb_stride,
326 	YCbCrType yuv_type);
327 
328 void yuvnv12_rgb24_sseu(
329 	uint32_t width, uint32_t height,
330 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
331 	uint8_t *rgb, uint32_t rgb_stride,
332 	YCbCrType yuv_type);
333 
334 void yuvnv12_rgba_sseu(
335 	uint32_t width, uint32_t height,
336 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
337 	uint8_t *rgb, uint32_t rgb_stride,
338 	YCbCrType yuv_type);
339 
340 void yuvnv12_bgra_sseu(
341 	uint32_t width, uint32_t height,
342 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
343 	uint8_t *rgb, uint32_t rgb_stride,
344 	YCbCrType yuv_type);
345 
346 void yuvnv12_argb_sseu(
347 	uint32_t width, uint32_t height,
348 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
349 	uint8_t *rgb, uint32_t rgb_stride,
350 	YCbCrType yuv_type);
351 
352 void yuvnv12_abgr_sseu(
353 	uint32_t width, uint32_t height,
354 	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
355 	uint8_t *rgb, uint32_t rgb_stride,
356 	YCbCrType yuv_type);
357 
358 
359 // rgb to yuv, standard c implementation
360 void rgb24_yuv420_std(
361 	uint32_t width, uint32_t height,
362 	const uint8_t *rgb, uint32_t rgb_stride,
363 	uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
364 	YCbCrType yuv_type);
365 
366 // rgb to yuv, sse implementation
367 // pointers must be 16 byte aligned, and strides must be divisible by 16
368 void rgb24_yuv420_sse(
369 	uint32_t width, uint32_t height,
370 	const uint8_t *rgb, uint32_t rgb_stride,
371 	uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
372 	YCbCrType yuv_type);
373 
374 // rgb to yuv, sse implementation
375 // pointers do not need to be 16 byte aligned
376 void rgb24_yuv420_sseu(
377 	uint32_t width, uint32_t height,
378 	const uint8_t *rgb, uint32_t rgb_stride,
379 	uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
380 	YCbCrType yuv_type);
381 
382