1 /**
2  * FreeRDP: A Remote Desktop Protocol Implementation
3  * Generic YUV/RGB conversion operations
4  *
5  * Copyright 2014 Marc-Andre Moreau <marcandre.moreau@gmail.com>
6  * Copyright 2015-2017 Armin Novak <armin.novak@thincast.com>
7  * Copyright 2015-2017 Norbert Federa <norbert.federa@thincast.com>
8  * Copyright 2015-2017 Vic Lee
9  * Copyright 2015-2017 Thincast Technologies GmbH
10  *
11  * Licensed under the Apache License, Version 2.0 (the "License");
12  * you may not use this file except in compliance with the License.
13  * You may obtain a copy of the License at
14  *
15  *     http://www.apache.org/licenses/LICENSE-2.0
16  *
17  * Unless required by applicable law or agreed to in writing, software
18  * distributed under the License is distributed on an "AS IS" BASIS,
19  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20  * See the License for the specific language governing permissions and
21  * limitations under the License.
22  */
23 
24 #ifdef HAVE_CONFIG_H
25 #include "config.h"
26 #endif
27 
28 #include <freerdp/types.h>
29 #include <freerdp/primitives.h>
30 #include <freerdp/codec/color.h>
31 #include "prim_internal.h"
32 
general_LumaToYUV444(const BYTE * const pSrcRaw[3],const UINT32 srcStep[3],BYTE * pDstRaw[3],const UINT32 dstStep[3],const RECTANGLE_16 * roi)33 static pstatus_t general_LumaToYUV444(const BYTE* const pSrcRaw[3], const UINT32 srcStep[3],
34                                       BYTE* pDstRaw[3], const UINT32 dstStep[3],
35                                       const RECTANGLE_16* roi)
36 {
37 	UINT32 x, y;
38 	const UINT32 nWidth = roi->right - roi->left;
39 	const UINT32 nHeight = roi->bottom - roi->top;
40 	const UINT32 halfWidth = (nWidth + 1) / 2;
41 	const UINT32 halfHeight = (nHeight + 1) / 2;
42 	const UINT32 oddY = 1;
43 	const UINT32 evenY = 0;
44 	const UINT32 oddX = 1;
45 	const UINT32 evenX = 0;
46 	const BYTE* pSrc[3] = { pSrcRaw[0] + roi->top * srcStep[0] + roi->left,
47 		                    pSrcRaw[1] + roi->top / 2 * srcStep[1] + roi->left / 2,
48 		                    pSrcRaw[2] + roi->top / 2 * srcStep[2] + roi->left / 2 };
49 	BYTE* pDst[3] = { pDstRaw[0] + roi->top * dstStep[0] + roi->left,
50 		              pDstRaw[1] + roi->top * dstStep[1] + roi->left,
51 		              pDstRaw[2] + roi->top * dstStep[2] + roi->left };
52 
53 	/* Y data is already here... */
54 	/* B1 */
55 	for (y = 0; y < nHeight; y++)
56 	{
57 		const BYTE* Ym = pSrc[0] + srcStep[0] * y;
58 		BYTE* pY = pDst[0] + dstStep[0] * y;
59 		memcpy(pY, Ym, nWidth);
60 	}
61 
62 	/* The first half of U, V are already here part of this frame. */
63 	/* B2 and B3 */
64 	for (y = 0; y < halfHeight; y++)
65 	{
66 		const UINT32 val2y = (2 * y + evenY);
67 		const UINT32 val2y1 = val2y + oddY;
68 		const BYTE* Um = pSrc[1] + srcStep[1] * y;
69 		const BYTE* Vm = pSrc[2] + srcStep[2] * y;
70 		BYTE* pU = pDst[1] + dstStep[1] * val2y;
71 		BYTE* pV = pDst[2] + dstStep[2] * val2y;
72 		BYTE* pU1 = pDst[1] + dstStep[1] * val2y1;
73 		BYTE* pV1 = pDst[2] + dstStep[2] * val2y1;
74 
75 		for (x = 0; x < halfWidth; x++)
76 		{
77 			const UINT32 val2x = 2 * x + evenX;
78 			const UINT32 val2x1 = val2x + oddX;
79 			pU[val2x] = Um[x];
80 			pV[val2x] = Vm[x];
81 			pU[val2x1] = Um[x];
82 			pV[val2x1] = Vm[x];
83 			pU1[val2x] = Um[x];
84 			pV1[val2x] = Vm[x];
85 			pU1[val2x1] = Um[x];
86 			pV1[val2x1] = Vm[x];
87 		}
88 	}
89 
90 	return PRIMITIVES_SUCCESS;
91 }
92 
general_ChromaFilter(BYTE * pDst[3],const UINT32 dstStep[3],const RECTANGLE_16 * roi)93 static pstatus_t general_ChromaFilter(BYTE* pDst[3], const UINT32 dstStep[3],
94                                       const RECTANGLE_16* roi)
95 {
96 	const UINT32 oddY = 1;
97 	const UINT32 evenY = 0;
98 	const UINT32 nWidth = roi->right - roi->left;
99 	const UINT32 nHeight = roi->bottom - roi->top;
100 	const UINT32 halfHeight = (nHeight + 1) / 2;
101 	const UINT32 halfWidth = (nWidth + 1) / 2;
102 	UINT32 x, y;
103 
104 	/* Filter */
105 	for (y = roi->top; y < halfHeight + roi->top; y++)
106 	{
107 		const UINT32 val2y = (y * 2 + evenY);
108 		const UINT32 val2y1 = val2y + oddY;
109 		BYTE* pU1 = pDst[1] + dstStep[1] * val2y1;
110 		BYTE* pV1 = pDst[2] + dstStep[2] * val2y1;
111 		BYTE* pU = pDst[1] + dstStep[1] * val2y;
112 		BYTE* pV = pDst[2] + dstStep[2] * val2y;
113 
114 		if (val2y1 > nHeight)
115 			continue;
116 
117 		for (x = roi->left; x < halfWidth + roi->left; x++)
118 		{
119 			const UINT32 val2x = (x * 2);
120 			const UINT32 val2x1 = val2x + 1;
121 			const INT32 up = pU[val2x] * 4;
122 			const INT32 vp = pV[val2x] * 4;
123 			INT32 u2020;
124 			INT32 v2020;
125 
126 			if (val2x1 > nWidth)
127 				continue;
128 
129 			u2020 = up - pU[val2x1] - pU1[val2x] - pU1[val2x1];
130 			v2020 = vp - pV[val2x1] - pV1[val2x] - pV1[val2x1];
131 			pU[val2x] = CLIP(u2020);
132 			pV[val2x] = CLIP(v2020);
133 		}
134 	}
135 
136 	return PRIMITIVES_SUCCESS;
137 }
138 
general_ChromaV1ToYUV444(const BYTE * const pSrcRaw[3],const UINT32 srcStep[3],BYTE * pDstRaw[3],const UINT32 dstStep[3],const RECTANGLE_16 * roi)139 static pstatus_t general_ChromaV1ToYUV444(const BYTE* const pSrcRaw[3], const UINT32 srcStep[3],
140                                           BYTE* pDstRaw[3], const UINT32 dstStep[3],
141                                           const RECTANGLE_16* roi)
142 {
143 	const UINT32 mod = 16;
144 	UINT32 uY = 0;
145 	UINT32 vY = 0;
146 	UINT32 x, y;
147 	const UINT32 nWidth = roi->right - roi->left;
148 	const UINT32 nHeight = roi->bottom - roi->top;
149 	const UINT32 halfWidth = (nWidth) / 2;
150 	const UINT32 halfHeight = (nHeight) / 2;
151 	const UINT32 oddY = 1;
152 	const UINT32 evenY = 0;
153 	const UINT32 oddX = 1;
154 	/* The auxilary frame is aligned to multiples of 16x16.
155 	 * We need the padded height for B4 and B5 conversion. */
156 	const UINT32 padHeigth = nHeight + 16 - nHeight % 16;
157 	const BYTE* pSrc[3] = { pSrcRaw[0] + roi->top * srcStep[0] + roi->left,
158 		                    pSrcRaw[1] + roi->top / 2 * srcStep[1] + roi->left / 2,
159 		                    pSrcRaw[2] + roi->top / 2 * srcStep[2] + roi->left / 2 };
160 	BYTE* pDst[3] = { pDstRaw[0] + roi->top * dstStep[0] + roi->left,
161 		              pDstRaw[1] + roi->top * dstStep[1] + roi->left,
162 		              pDstRaw[2] + roi->top * dstStep[2] + roi->left };
163 
164 	/* The second half of U and V is a bit more tricky... */
165 	/* B4 and B5 */
166 	for (y = 0; y < padHeigth; y++)
167 	{
168 		const BYTE* Ya = pSrc[0] + srcStep[0] * y;
169 		BYTE* pX;
170 
171 		if ((y) % mod < (mod + 1) / 2)
172 		{
173 			const UINT32 pos = (2 * uY++ + oddY);
174 
175 			if (pos >= nHeight)
176 				continue;
177 
178 			pX = pDst[1] + dstStep[1] * pos;
179 		}
180 		else
181 		{
182 			const UINT32 pos = (2 * vY++ + oddY);
183 
184 			if (pos >= nHeight)
185 				continue;
186 
187 			pX = pDst[2] + dstStep[2] * pos;
188 		}
189 
190 		memcpy(pX, Ya, nWidth);
191 	}
192 
193 	/* B6 and B7 */
194 	for (y = 0; y < halfHeight; y++)
195 	{
196 		const UINT32 val2y = (y * 2 + evenY);
197 		const BYTE* Ua = pSrc[1] + srcStep[1] * y;
198 		const BYTE* Va = pSrc[2] + srcStep[2] * y;
199 		BYTE* pU = pDst[1] + dstStep[1] * val2y;
200 		BYTE* pV = pDst[2] + dstStep[2] * val2y;
201 
202 		for (x = 0; x < halfWidth; x++)
203 		{
204 			const UINT32 val2x1 = (x * 2 + oddX);
205 			pU[val2x1] = Ua[x];
206 			pV[val2x1] = Va[x];
207 		}
208 	}
209 
210 	/* Filter */
211 	return general_ChromaFilter(pDst, dstStep, roi);
212 }
213 
general_ChromaV2ToYUV444(const BYTE * const pSrc[3],const UINT32 srcStep[3],UINT32 nTotalWidth,UINT32 nTotalHeight,BYTE * pDst[3],const UINT32 dstStep[3],const RECTANGLE_16 * roi)214 static pstatus_t general_ChromaV2ToYUV444(const BYTE* const pSrc[3], const UINT32 srcStep[3],
215                                           UINT32 nTotalWidth, UINT32 nTotalHeight, BYTE* pDst[3],
216                                           const UINT32 dstStep[3], const RECTANGLE_16* roi)
217 {
218 	UINT32 x, y;
219 	const UINT32 nWidth = roi->right - roi->left;
220 	const UINT32 nHeight = roi->bottom - roi->top;
221 	const UINT32 halfWidth = (nWidth + 1) / 2;
222 	const UINT32 halfHeight = (nHeight + 1) / 2;
223 	const UINT32 quaterWidth = (nWidth + 3) / 4;
224 
225 	/* B4 and B5: odd UV values for width/2, height */
226 	for (y = 0; y < nHeight; y++)
227 	{
228 		const UINT32 yTop = y + roi->top;
229 		const BYTE* pYaU = pSrc[0] + srcStep[0] * yTop + roi->left / 2;
230 		const BYTE* pYaV = pYaU + nTotalWidth / 2;
231 		BYTE* pU = pDst[1] + dstStep[1] * yTop + roi->left;
232 		BYTE* pV = pDst[2] + dstStep[2] * yTop + roi->left;
233 
234 		for (x = 0; x < halfWidth; x++)
235 		{
236 			const UINT32 odd = 2 * x + 1;
237 			pU[odd] = *pYaU++;
238 			pV[odd] = *pYaV++;
239 		}
240 	}
241 
242 	/* B6 - B9 */
243 	for (y = 0; y < halfHeight; y++)
244 	{
245 		const BYTE* pUaU = pSrc[1] + srcStep[1] * (y + roi->top / 2) + roi->left / 4;
246 		const BYTE* pUaV = pUaU + nTotalWidth / 4;
247 		const BYTE* pVaU = pSrc[2] + srcStep[2] * (y + roi->top / 2) + roi->left / 4;
248 		const BYTE* pVaV = pVaU + nTotalWidth / 4;
249 		BYTE* pU = pDst[1] + dstStep[1] * (2 * y + 1 + roi->top) + roi->left;
250 		BYTE* pV = pDst[2] + dstStep[2] * (2 * y + 1 + roi->top) + roi->left;
251 
252 		for (x = 0; x < quaterWidth; x++)
253 		{
254 			pU[4 * x + 0] = *pUaU++;
255 			pV[4 * x + 0] = *pUaV++;
256 			pU[4 * x + 2] = *pVaU++;
257 			pV[4 * x + 2] = *pVaV++;
258 		}
259 	}
260 
261 	return general_ChromaFilter(pDst, dstStep, roi);
262 }
263 
general_YUV420CombineToYUV444(avc444_frame_type type,const BYTE * const pSrc[3],const UINT32 srcStep[3],UINT32 nWidth,UINT32 nHeight,BYTE * pDst[3],const UINT32 dstStep[3],const RECTANGLE_16 * roi)264 static pstatus_t general_YUV420CombineToYUV444(avc444_frame_type type, const BYTE* const pSrc[3],
265                                                const UINT32 srcStep[3], UINT32 nWidth,
266                                                UINT32 nHeight, BYTE* pDst[3],
267                                                const UINT32 dstStep[3], const RECTANGLE_16* roi)
268 {
269 	if (!pSrc || !pSrc[0] || !pSrc[1] || !pSrc[2])
270 		return -1;
271 
272 	if (!pDst || !pDst[0] || !pDst[1] || !pDst[2])
273 		return -1;
274 
275 	if (!roi)
276 		return -1;
277 
278 	switch (type)
279 	{
280 		case AVC444_LUMA:
281 			return general_LumaToYUV444(pSrc, srcStep, pDst, dstStep, roi);
282 
283 		case AVC444_CHROMAv1:
284 			return general_ChromaV1ToYUV444(pSrc, srcStep, pDst, dstStep, roi);
285 
286 		case AVC444_CHROMAv2:
287 			return general_ChromaV2ToYUV444(pSrc, srcStep, nWidth, nHeight, pDst, dstStep, roi);
288 
289 		default:
290 			return -1;
291 	}
292 }
293 
general_YUV444SplitToYUV420(const BYTE * const pSrc[3],const UINT32 srcStep[3],BYTE * pMainDst[3],const UINT32 dstMainStep[3],BYTE * pAuxDst[3],const UINT32 dstAuxStep[3],const prim_size_t * roi)294 static pstatus_t general_YUV444SplitToYUV420(const BYTE* const pSrc[3], const UINT32 srcStep[3],
295                                              BYTE* pMainDst[3], const UINT32 dstMainStep[3],
296                                              BYTE* pAuxDst[3], const UINT32 dstAuxStep[3],
297                                              const prim_size_t* roi)
298 {
299 	UINT32 x, y, uY = 0, vY = 0;
300 	UINT32 halfWidth, halfHeight;
301 	/* The auxilary frame is aligned to multiples of 16x16.
302 	 * We need the padded height for B4 and B5 conversion. */
303 	const UINT32 padHeigth = roi->height + 16 - roi->height % 16;
304 	halfWidth = (roi->width + 1) / 2;
305 	halfHeight = (roi->height + 1) / 2;
306 
307 	/* B1 */
308 	for (y = 0; y < roi->height; y++)
309 	{
310 		const BYTE* pSrcY = pSrc[0] + y * srcStep[0];
311 		BYTE* pY = pMainDst[0] + y * dstMainStep[0];
312 		memcpy(pY, pSrcY, roi->width);
313 	}
314 
315 	/* B2 and B3 */
316 	for (y = 0; y < halfHeight; y++)
317 	{
318 		const BYTE* pSrcU = pSrc[1] + 2 * y * srcStep[1];
319 		const BYTE* pSrcV = pSrc[2] + 2 * y * srcStep[2];
320 		const BYTE* pSrcU1 = pSrc[1] + (2 * y + 1) * srcStep[1];
321 		const BYTE* pSrcV1 = pSrc[2] + (2 * y + 1) * srcStep[2];
322 		BYTE* pU = pMainDst[1] + y * dstMainStep[1];
323 		BYTE* pV = pMainDst[2] + y * dstMainStep[2];
324 
325 		for (x = 0; x < halfWidth; x++)
326 		{
327 			/* Filter */
328 			const INT32 u = pSrcU[2 * x] + pSrcU[2 * x + 1] + pSrcU1[2 * x] + pSrcU1[2 * x + 1];
329 			const INT32 v = pSrcV[2 * x] + pSrcV[2 * x + 1] + pSrcV1[2 * x] + pSrcV1[2 * x + 1];
330 			pU[x] = CLIP(u / 4L);
331 			pV[x] = CLIP(v / 4L);
332 		}
333 	}
334 
335 	/* B4 and B5 */
336 	for (y = 0; y < padHeigth; y++)
337 	{
338 		BYTE* pY = pAuxDst[0] + y * dstAuxStep[0];
339 
340 		if (y % 16 < 8)
341 		{
342 			const UINT32 pos = (2 * uY++ + 1);
343 			const BYTE* pSrcU = pSrc[1] + pos * srcStep[1];
344 
345 			if (pos >= roi->height)
346 				continue;
347 
348 			memcpy(pY, pSrcU, roi->width);
349 		}
350 		else
351 		{
352 			const UINT32 pos = (2 * vY++ + 1);
353 			const BYTE* pSrcV = pSrc[2] + pos * srcStep[2];
354 
355 			if (pos >= roi->height)
356 				continue;
357 
358 			memcpy(pY, pSrcV, roi->width);
359 		}
360 	}
361 
362 	/* B6 and B7 */
363 	for (y = 0; y < halfHeight; y++)
364 	{
365 		const BYTE* pSrcU = pSrc[1] + 2 * y * srcStep[1];
366 		const BYTE* pSrcV = pSrc[2] + 2 * y * srcStep[2];
367 		BYTE* pU = pAuxDst[1] + y * dstAuxStep[1];
368 		BYTE* pV = pAuxDst[2] + y * dstAuxStep[2];
369 
370 		for (x = 0; x < halfWidth; x++)
371 		{
372 			pU[x] = pSrcU[2 * x + 1];
373 			pV[x] = pSrcV[2 * x + 1];
374 		}
375 	}
376 
377 	return PRIMITIVES_SUCCESS;
378 }
379 
general_YUV444ToRGB_8u_P3AC4R_general(const BYTE * const pSrc[3],const UINT32 srcStep[3],BYTE * pDst,UINT32 dstStep,UINT32 DstFormat,const prim_size_t * roi)380 static pstatus_t general_YUV444ToRGB_8u_P3AC4R_general(const BYTE* const pSrc[3],
381                                                        const UINT32 srcStep[3], BYTE* pDst,
382                                                        UINT32 dstStep, UINT32 DstFormat,
383                                                        const prim_size_t* roi)
384 {
385 	UINT32 x, y;
386 	UINT32 nWidth, nHeight;
387 	const DWORD formatSize = GetBytesPerPixel(DstFormat);
388 	fkt_writePixel writePixel = getPixelWriteFunction(DstFormat, FALSE);
389 	nWidth = roi->width;
390 	nHeight = roi->height;
391 
392 	for (y = 0; y < nHeight; y++)
393 	{
394 		const BYTE* pY = pSrc[0] + y * srcStep[0];
395 		const BYTE* pU = pSrc[1] + y * srcStep[1];
396 		const BYTE* pV = pSrc[2] + y * srcStep[2];
397 		BYTE* pRGB = pDst + y * dstStep;
398 
399 		for (x = 0; x < nWidth; x++)
400 		{
401 			const BYTE Y = pY[x];
402 			const BYTE U = pU[x];
403 			const BYTE V = pV[x];
404 			const BYTE r = YUV2R(Y, U, V);
405 			const BYTE g = YUV2G(Y, U, V);
406 			const BYTE b = YUV2B(Y, U, V);
407 			pRGB = writePixel(pRGB, formatSize, DstFormat, r, g, b, 0);
408 		}
409 	}
410 
411 	return PRIMITIVES_SUCCESS;
412 }
413 
general_YUV444ToRGB_8u_P3AC4R_BGRX(const BYTE * const pSrc[3],const UINT32 srcStep[3],BYTE * pDst,UINT32 dstStep,UINT32 DstFormat,const prim_size_t * roi)414 static pstatus_t general_YUV444ToRGB_8u_P3AC4R_BGRX(const BYTE* const pSrc[3],
415                                                     const UINT32 srcStep[3], BYTE* pDst,
416                                                     UINT32 dstStep, UINT32 DstFormat,
417                                                     const prim_size_t* roi)
418 {
419 	UINT32 x, y;
420 	UINT32 nWidth, nHeight;
421 	const DWORD formatSize = GetBytesPerPixel(DstFormat);
422 	nWidth = roi->width;
423 	nHeight = roi->height;
424 
425 	for (y = 0; y < nHeight; y++)
426 	{
427 		const BYTE* pY = pSrc[0] + y * srcStep[0];
428 		const BYTE* pU = pSrc[1] + y * srcStep[1];
429 		const BYTE* pV = pSrc[2] + y * srcStep[2];
430 		BYTE* pRGB = pDst + y * dstStep;
431 
432 		for (x = 0; x < nWidth; x++)
433 		{
434 			const BYTE Y = pY[x];
435 			const BYTE U = pU[x];
436 			const BYTE V = pV[x];
437 			const BYTE r = YUV2R(Y, U, V);
438 			const BYTE g = YUV2G(Y, U, V);
439 			const BYTE b = YUV2B(Y, U, V);
440 			pRGB = writePixelBGRX(pRGB, formatSize, DstFormat, r, g, b, 0);
441 		}
442 	}
443 
444 	return PRIMITIVES_SUCCESS;
445 }
446 
general_YUV444ToRGB_8u_P3AC4R(const BYTE * const pSrc[3],const UINT32 srcStep[3],BYTE * pDst,UINT32 dstStep,UINT32 DstFormat,const prim_size_t * roi)447 static pstatus_t general_YUV444ToRGB_8u_P3AC4R(const BYTE* const pSrc[3], const UINT32 srcStep[3],
448                                                BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
449                                                const prim_size_t* roi)
450 {
451 	switch (DstFormat)
452 	{
453 		case PIXEL_FORMAT_BGRA32:
454 		case PIXEL_FORMAT_BGRX32:
455 			return general_YUV444ToRGB_8u_P3AC4R_BGRX(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
456 
457 		default:
458 			return general_YUV444ToRGB_8u_P3AC4R_general(pSrc, srcStep, pDst, dstStep, DstFormat,
459 			                                             roi);
460 	}
461 }
462 /**
463  * | R |   ( | 256     0    403 | |    Y    | )
464  * | G | = ( | 256   -48   -120 | | U - 128 | ) >> 8
465  * | B |   ( | 256   475      0 | | V - 128 | )
466  */
general_YUV420ToRGB_8u_P3AC4R(const BYTE * const pSrc[3],const UINT32 srcStep[3],BYTE * pDst,UINT32 dstStep,UINT32 DstFormat,const prim_size_t * roi)467 static pstatus_t general_YUV420ToRGB_8u_P3AC4R(const BYTE* const pSrc[3], const UINT32 srcStep[3],
468                                                BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
469                                                const prim_size_t* roi)
470 {
471 	UINT32 x, y;
472 	UINT32 dstPad;
473 	UINT32 srcPad[3];
474 	BYTE Y, U, V;
475 	UINT32 halfWidth;
476 	UINT32 halfHeight;
477 	const BYTE* pY;
478 	const BYTE* pU;
479 	const BYTE* pV;
480 	BYTE* pRGB = pDst;
481 	UINT32 nWidth, nHeight;
482 	UINT32 lastRow, lastCol;
483 	const DWORD formatSize = GetBytesPerPixel(DstFormat);
484 	fkt_writePixel writePixel = getPixelWriteFunction(DstFormat, FALSE);
485 	pY = pSrc[0];
486 	pU = pSrc[1];
487 	pV = pSrc[2];
488 	lastCol = roi->width & 0x01;
489 	lastRow = roi->height & 0x01;
490 	nWidth = (roi->width + 1) & ~0x0001;
491 	nHeight = (roi->height + 1) & ~0x0001;
492 	halfWidth = nWidth / 2;
493 	halfHeight = nHeight / 2;
494 	srcPad[0] = (srcStep[0] - nWidth);
495 	srcPad[1] = (srcStep[1] - halfWidth);
496 	srcPad[2] = (srcStep[2] - halfWidth);
497 	dstPad = (dstStep - (nWidth * 4));
498 
499 	for (y = 0; y < halfHeight;)
500 	{
501 		if (++y == halfHeight)
502 			lastRow <<= 1;
503 
504 		for (x = 0; x < halfWidth;)
505 		{
506 			BYTE r;
507 			BYTE g;
508 			BYTE b;
509 
510 			if (++x == halfWidth)
511 				lastCol <<= 1;
512 
513 			U = *pU++;
514 			V = *pV++;
515 			/* 1st pixel */
516 			Y = *pY++;
517 			r = YUV2R(Y, U, V);
518 			g = YUV2G(Y, U, V);
519 			b = YUV2B(Y, U, V);
520 			pRGB = writePixel(pRGB, formatSize, DstFormat, r, g, b, 0);
521 
522 			/* 2nd pixel */
523 			if (!(lastCol & 0x02))
524 			{
525 				Y = *pY++;
526 				r = YUV2R(Y, U, V);
527 				g = YUV2G(Y, U, V);
528 				b = YUV2B(Y, U, V);
529 				pRGB = writePixel(pRGB, formatSize, DstFormat, r, g, b, 0);
530 			}
531 			else
532 			{
533 				pY++;
534 				pRGB += formatSize;
535 				lastCol >>= 1;
536 			}
537 		}
538 
539 		pY += srcPad[0];
540 		pU -= halfWidth;
541 		pV -= halfWidth;
542 		pRGB += dstPad;
543 
544 		if (lastRow & 0x02)
545 			break;
546 
547 		for (x = 0; x < halfWidth;)
548 		{
549 			BYTE r;
550 			BYTE g;
551 			BYTE b;
552 
553 			if (++x == halfWidth)
554 				lastCol <<= 1;
555 
556 			U = *pU++;
557 			V = *pV++;
558 			/* 3rd pixel */
559 			Y = *pY++;
560 			r = YUV2R(Y, U, V);
561 			g = YUV2G(Y, U, V);
562 			b = YUV2B(Y, U, V);
563 			pRGB = writePixel(pRGB, formatSize, DstFormat, r, g, b, 0);
564 
565 			/* 4th pixel */
566 			if (!(lastCol & 0x02))
567 			{
568 				Y = *pY++;
569 				r = YUV2R(Y, U, V);
570 				g = YUV2G(Y, U, V);
571 				b = YUV2B(Y, U, V);
572 				pRGB = writePixel(pRGB, formatSize, DstFormat, r, g, b, 0);
573 			}
574 			else
575 			{
576 				pY++;
577 				pRGB += formatSize;
578 				lastCol >>= 1;
579 			}
580 		}
581 
582 		pY += srcPad[0];
583 		pU += srcPad[1];
584 		pV += srcPad[2];
585 		pRGB += dstPad;
586 	}
587 
588 	return PRIMITIVES_SUCCESS;
589 }
590 
591 /**
592  * | Y |    ( |  54   183     18 | | R | )        |  0  |
593  * | U | =  ( | -29   -99    128 | | G | ) >> 8 + | 128 |
594  * | V |    ( | 128  -116    -12 | | B | )        | 128 |
595  */
RGB2Y(BYTE R,BYTE G,BYTE B)596 static INLINE BYTE RGB2Y(BYTE R, BYTE G, BYTE B)
597 {
598 	return (54 * R + 183 * G + 18 * B) >> 8;
599 }
600 
RGB2U(BYTE R,BYTE G,BYTE B)601 static INLINE BYTE RGB2U(BYTE R, BYTE G, BYTE B)
602 {
603 	return ((-29 * R - 99 * G + 128 * B) >> 8) + 128;
604 }
605 
RGB2V(INT32 R,INT32 G,INT32 B)606 static INLINE BYTE RGB2V(INT32 R, INT32 G, INT32 B)
607 {
608 	return ((128L * R - 116 * G - 12 * B) >> 8) + 128;
609 }
610 
general_RGBToYUV444_8u_P3AC4R(const BYTE * pSrc,UINT32 SrcFormat,const UINT32 srcStep,BYTE * pDst[3],UINT32 dstStep[3],const prim_size_t * roi)611 static pstatus_t general_RGBToYUV444_8u_P3AC4R(const BYTE* pSrc, UINT32 SrcFormat,
612                                                const UINT32 srcStep, BYTE* pDst[3],
613                                                UINT32 dstStep[3], const prim_size_t* roi)
614 {
615 	const UINT32 bpp = GetBytesPerPixel(SrcFormat);
616 	UINT32 x, y;
617 	UINT32 nWidth, nHeight;
618 	nWidth = roi->width;
619 	nHeight = roi->height;
620 
621 	for (y = 0; y < nHeight; y++)
622 	{
623 		const BYTE* pRGB = pSrc + y * srcStep;
624 		BYTE* pY = pDst[0] + y * dstStep[0];
625 		BYTE* pU = pDst[1] + y * dstStep[1];
626 		BYTE* pV = pDst[2] + y * dstStep[2];
627 
628 		for (x = 0; x < nWidth; x++)
629 		{
630 			BYTE B, G, R;
631 			const UINT32 color = ReadColor(&pRGB[x * bpp], SrcFormat);
632 			SplitColor(color, SrcFormat, &R, &G, &B, NULL, NULL);
633 			pY[x] = RGB2Y(R, G, B);
634 			pU[x] = RGB2U(R, G, B);
635 			pV[x] = RGB2V(R, G, B);
636 		}
637 	}
638 
639 	return PRIMITIVES_SUCCESS;
640 }
641 
general_RGBToYUV420_BGRX(const BYTE * pSrc,UINT32 srcStep,BYTE * pDst[3],UINT32 dstStep[3],const prim_size_t * roi)642 static INLINE pstatus_t general_RGBToYUV420_BGRX(const BYTE* pSrc, UINT32 srcStep, BYTE* pDst[3],
643                                                  UINT32 dstStep[3], const prim_size_t* roi)
644 {
645 	UINT32 x, y, i;
646 	size_t x1 = 0, x2 = 4, x3 = srcStep, x4 = srcStep + 4;
647 	size_t y1 = 0, y2 = 1, y3 = dstStep[0], y4 = dstStep[0] + 1;
648 	UINT32 max_x = roi->width - 1;
649 	UINT32 max_y = roi->height - 1;
650 
651 	for (y = i = 0; y < roi->height; y += 2, i++)
652 	{
653 		const BYTE* src = pSrc + y * srcStep;
654 		BYTE* ydst = pDst[0] + y * dstStep[0];
655 		BYTE* udst = pDst[1] + i * dstStep[1];
656 		BYTE* vdst = pDst[2] + i * dstStep[2];
657 
658 		for (x = 0; x < roi->width; x += 2)
659 		{
660 			BYTE R, G, B;
661 			INT32 Ra, Ga, Ba;
662 			/* row 1, pixel 1 */
663 			Ba = B = *(src + x1 + 0);
664 			Ga = G = *(src + x1 + 1);
665 			Ra = R = *(src + x1 + 2);
666 			ydst[y1] = RGB2Y(R, G, B);
667 
668 			if (x < max_x)
669 			{
670 				/* row 1, pixel 2 */
671 				Ba += B = *(src + x2 + 0);
672 				Ga += G = *(src + x2 + 1);
673 				Ra += R = *(src + x2 + 2);
674 				ydst[y2] = RGB2Y(R, G, B);
675 			}
676 
677 			if (y < max_y)
678 			{
679 				/* row 2, pixel 1 */
680 				Ba += B = *(src + x3 + 0);
681 				Ga += G = *(src + x3 + 1);
682 				Ra += R = *(src + x3 + 2);
683 				ydst[y3] = RGB2Y(R, G, B);
684 
685 				if (x < max_x)
686 				{
687 					/* row 2, pixel 2 */
688 					Ba += B = *(src + x4 + 0);
689 					Ga += G = *(src + x4 + 1);
690 					Ra += R = *(src + x4 + 2);
691 					ydst[y4] = RGB2Y(R, G, B);
692 				}
693 			}
694 
695 			Ba >>= 2;
696 			Ga >>= 2;
697 			Ra >>= 2;
698 			*udst++ = RGB2U(Ra, Ga, Ba);
699 			*vdst++ = RGB2V(Ra, Ga, Ba);
700 			ydst += 2;
701 			src += 8;
702 		}
703 	}
704 
705 	return PRIMITIVES_SUCCESS;
706 }
707 
general_RGBToYUV420_RGBX(const BYTE * pSrc,UINT32 srcStep,BYTE * pDst[3],UINT32 dstStep[3],const prim_size_t * roi)708 static INLINE pstatus_t general_RGBToYUV420_RGBX(const BYTE* pSrc, UINT32 srcStep, BYTE* pDst[3],
709                                                  UINT32 dstStep[3], const prim_size_t* roi)
710 {
711 	UINT32 x, y, i;
712 	size_t x1 = 0, x2 = 4, x3 = srcStep, x4 = srcStep + 4;
713 	size_t y1 = 0, y2 = 1, y3 = dstStep[0], y4 = dstStep[0] + 1;
714 	UINT32 max_x = roi->width - 1;
715 	UINT32 max_y = roi->height - 1;
716 
717 	for (y = i = 0; y < roi->height; y += 2, i++)
718 	{
719 		const BYTE* src = pSrc + y * srcStep;
720 		BYTE* ydst = pDst[0] + y * dstStep[0];
721 		BYTE* udst = pDst[1] + i * dstStep[1];
722 		BYTE* vdst = pDst[2] + i * dstStep[2];
723 
724 		for (x = 0; x < roi->width; x += 2)
725 		{
726 			BYTE R, G, B;
727 			INT32 Ra, Ga, Ba;
728 			/* row 1, pixel 1 */
729 			Ra = R = *(src + x1 + 0);
730 			Ga = G = *(src + x1 + 1);
731 			Ba = B = *(src + x1 + 2);
732 			ydst[y1] = RGB2Y(R, G, B);
733 
734 			if (x < max_x)
735 			{
736 				/* row 1, pixel 2 */
737 				Ra += R = *(src + x2 + 0);
738 				Ga += G = *(src + x2 + 1);
739 				Ba += B = *(src + x2 + 2);
740 				ydst[y2] = RGB2Y(R, G, B);
741 			}
742 
743 			if (y < max_y)
744 			{
745 				/* row 2, pixel 1 */
746 				Ra += R = *(src + x3 + 0);
747 				Ga += G = *(src + x3 + 1);
748 				Ba += B = *(src + x3 + 2);
749 				ydst[y3] = RGB2Y(R, G, B);
750 
751 				if (x < max_x)
752 				{
753 					/* row 2, pixel 2 */
754 					Ra += R = *(src + x4 + 0);
755 					Ga += G = *(src + x4 + 1);
756 					Ba += B = *(src + x4 + 2);
757 					ydst[y4] = RGB2Y(R, G, B);
758 				}
759 			}
760 
761 			Ba >>= 2;
762 			Ga >>= 2;
763 			Ra >>= 2;
764 			*udst++ = RGB2U(Ra, Ga, Ba);
765 			*vdst++ = RGB2V(Ra, Ga, Ba);
766 			ydst += 2;
767 			src += 8;
768 		}
769 	}
770 
771 	return PRIMITIVES_SUCCESS;
772 }
773 
general_RGBToYUV420_ANY(const BYTE * pSrc,UINT32 srcFormat,UINT32 srcStep,BYTE * pDst[3],UINT32 dstStep[3],const prim_size_t * roi)774 static INLINE pstatus_t general_RGBToYUV420_ANY(const BYTE* pSrc, UINT32 srcFormat, UINT32 srcStep,
775                                                 BYTE* pDst[3], UINT32 dstStep[3],
776                                                 const prim_size_t* roi)
777 {
778 	const UINT32 bpp = GetBytesPerPixel(srcFormat);
779 	UINT32 x, y, i;
780 	size_t x1 = 0, x2 = bpp, x3 = srcStep, x4 = srcStep + bpp;
781 	size_t y1 = 0, y2 = 1, y3 = dstStep[0], y4 = dstStep[0] + 1;
782 	UINT32 max_x = roi->width - 1;
783 	UINT32 max_y = roi->height - 1;
784 
785 	for (y = i = 0; y < roi->height; y += 2, i++)
786 	{
787 		const BYTE* src = pSrc + y * srcStep;
788 		BYTE* ydst = pDst[0] + y * dstStep[0];
789 		BYTE* udst = pDst[1] + i * dstStep[1];
790 		BYTE* vdst = pDst[2] + i * dstStep[2];
791 
792 		for (x = 0; x < roi->width; x += 2)
793 		{
794 			BYTE R, G, B;
795 			INT32 Ra, Ga, Ba;
796 			UINT32 color;
797 			/* row 1, pixel 1 */
798 			color = ReadColor(src + x1, srcFormat);
799 			SplitColor(color, srcFormat, &R, &G, &B, NULL, NULL);
800 			Ra = R;
801 			Ga = G;
802 			Ba = B;
803 			ydst[y1] = RGB2Y(R, G, B);
804 
805 			if (x < max_x)
806 			{
807 				/* row 1, pixel 2 */
808 				color = ReadColor(src + x2, srcFormat);
809 				SplitColor(color, srcFormat, &R, &G, &B, NULL, NULL);
810 				Ra += R;
811 				Ga += G;
812 				Ba += B;
813 				ydst[y2] = RGB2Y(R, G, B);
814 			}
815 
816 			if (y < max_y)
817 			{
818 				/* row 2, pixel 1 */
819 				color = ReadColor(src + x3, srcFormat);
820 				SplitColor(color, srcFormat, &R, &G, &B, NULL, NULL);
821 				Ra += R;
822 				Ga += G;
823 				Ba += B;
824 				ydst[y3] = RGB2Y(R, G, B);
825 
826 				if (x < max_x)
827 				{
828 					/* row 2, pixel 2 */
829 					color = ReadColor(src + x4, srcFormat);
830 					SplitColor(color, srcFormat, &R, &G, &B, NULL, NULL);
831 					Ra += R;
832 					Ga += G;
833 					Ba += B;
834 					ydst[y4] = RGB2Y(R, G, B);
835 				}
836 			}
837 
838 			Ra >>= 2;
839 			Ga >>= 2;
840 			Ba >>= 2;
841 			*udst++ = RGB2U(Ra, Ga, Ba);
842 			*vdst++ = RGB2V(Ra, Ga, Ba);
843 			ydst += 2;
844 			src += 2 * bpp;
845 		}
846 	}
847 
848 	return PRIMITIVES_SUCCESS;
849 }
850 
general_RGBToYUV420_8u_P3AC4R(const BYTE * pSrc,UINT32 srcFormat,UINT32 srcStep,BYTE * pDst[3],UINT32 dstStep[3],const prim_size_t * roi)851 static pstatus_t general_RGBToYUV420_8u_P3AC4R(const BYTE* pSrc, UINT32 srcFormat, UINT32 srcStep,
852                                                BYTE* pDst[3], UINT32 dstStep[3],
853                                                const prim_size_t* roi)
854 {
855 	switch (srcFormat)
856 	{
857 		case PIXEL_FORMAT_BGRA32:
858 		case PIXEL_FORMAT_BGRX32:
859 			return general_RGBToYUV420_BGRX(pSrc, srcStep, pDst, dstStep, roi);
860 
861 		case PIXEL_FORMAT_RGBA32:
862 		case PIXEL_FORMAT_RGBX32:
863 			return general_RGBToYUV420_RGBX(pSrc, srcStep, pDst, dstStep, roi);
864 
865 		default:
866 			return general_RGBToYUV420_ANY(pSrc, srcFormat, srcStep, pDst, dstStep, roi);
867 	}
868 }
869 
general_RGBToAVC444YUV_BGRX_DOUBLE_ROW(const BYTE * srcEven,const BYTE * srcOdd,BYTE * b1Even,BYTE * b1Odd,BYTE * b2,BYTE * b3,BYTE * b4,BYTE * b5,BYTE * b6,BYTE * b7,UINT32 width)870 static INLINE void general_RGBToAVC444YUV_BGRX_DOUBLE_ROW(const BYTE* srcEven, const BYTE* srcOdd,
871                                                           BYTE* b1Even, BYTE* b1Odd, BYTE* b2,
872                                                           BYTE* b3, BYTE* b4, BYTE* b5, BYTE* b6,
873                                                           BYTE* b7, UINT32 width)
874 {
875 	UINT32 x;
876 
877 	for (x = 0; x < width; x += 2)
878 	{
879 		const BOOL lastX = (x + 1) >= width;
880 		BYTE Y1e, Y2e, U1e, V1e, U2e, V2e;
881 		BYTE Y1o, Y2o, U1o, V1o, U2o, V2o;
882 		/* Read 4 pixels, 2 from even, 2 from odd lines */
883 		{
884 			const BYTE b = *srcEven++;
885 			const BYTE g = *srcEven++;
886 			const BYTE r = *srcEven++;
887 			srcEven++;
888 			Y1e = Y2e = Y1o = Y2o = RGB2Y(r, g, b);
889 			U1e = U2e = U1o = U2o = RGB2U(r, g, b);
890 			V1e = V2e = V1o = V2o = RGB2V(r, g, b);
891 		}
892 
893 		if (!lastX)
894 		{
895 			const BYTE b = *srcEven++;
896 			const BYTE g = *srcEven++;
897 			const BYTE r = *srcEven++;
898 			srcEven++;
899 			Y2e = RGB2Y(r, g, b);
900 			U2e = RGB2U(r, g, b);
901 			V2e = RGB2V(r, g, b);
902 		}
903 
904 		if (b1Odd)
905 		{
906 			const BYTE b = *srcOdd++;
907 			const BYTE g = *srcOdd++;
908 			const BYTE r = *srcOdd++;
909 			srcOdd++;
910 			Y1o = Y2o = RGB2Y(r, g, b);
911 			U1o = U2o = RGB2U(r, g, b);
912 			V1o = V2o = RGB2V(r, g, b);
913 		}
914 
915 		if (b1Odd && !lastX)
916 		{
917 			const BYTE b = *srcOdd++;
918 			const BYTE g = *srcOdd++;
919 			const BYTE r = *srcOdd++;
920 			srcOdd++;
921 			Y2o = RGB2Y(r, g, b);
922 			U2o = RGB2U(r, g, b);
923 			V2o = RGB2V(r, g, b);
924 		}
925 
926 		/* We have 4 Y pixels, so store them. */
927 		*b1Even++ = Y1e;
928 		*b1Even++ = Y2e;
929 
930 		if (b1Odd)
931 		{
932 			*b1Odd++ = Y1o;
933 			*b1Odd++ = Y2o;
934 		}
935 
936 		/* 2x 2y pixel in luma UV plane use averaging
937 		 */
938 		{
939 			const BYTE Uavg = ((UINT16)U1e + (UINT16)U2e + (UINT16)U1o + (UINT16)U2o) / 4;
940 			const BYTE Vavg = ((UINT16)V1e + (UINT16)V2e + (UINT16)V1o + (UINT16)V2o) / 4;
941 			*b2++ = Uavg;
942 			*b3++ = Vavg;
943 		}
944 
945 		/* UV from 2x, 2y+1 */
946 		if (b1Odd)
947 		{
948 			*b4++ = U1o;
949 			*b5++ = V1o;
950 
951 			if (!lastX)
952 			{
953 				*b4++ = U2o;
954 				*b5++ = V2o;
955 			}
956 		}
957 
958 		/* UV from 2x+1, 2y */
959 		if (!lastX)
960 		{
961 			*b6++ = U2e;
962 			*b7++ = V2e;
963 		}
964 	}
965 }
966 
general_RGBToAVC444YUV_BGRX(const BYTE * pSrc,UINT32 srcStep,BYTE * pDst1[3],const UINT32 dst1Step[3],BYTE * pDst2[3],const UINT32 dst2Step[3],const prim_size_t * roi)967 static INLINE pstatus_t general_RGBToAVC444YUV_BGRX(const BYTE* pSrc, UINT32 srcStep,
968                                                     BYTE* pDst1[3], const UINT32 dst1Step[3],
969                                                     BYTE* pDst2[3], const UINT32 dst2Step[3],
970                                                     const prim_size_t* roi)
971 {
972 	/**
973 	 * Note:
974 	 * Read information in function general_RGBToAVC444YUV_ANY below !
975 	 */
976 	UINT32 y;
977 	const BYTE* pMaxSrc = pSrc + (roi->height - 1) * srcStep;
978 
979 	for (y = 0; y < roi->height; y += 2)
980 	{
981 		const BOOL last = (y >= (roi->height - 1));
982 		const BYTE* srcEven = y < roi->height ? pSrc + y * srcStep : pMaxSrc;
983 		const BYTE* srcOdd = !last ? pSrc + (y + 1) * srcStep : pMaxSrc;
984 		const UINT32 i = y >> 1;
985 		const UINT32 n = (i & ~7) + i;
986 		BYTE* b1Even = pDst1[0] + y * dst1Step[0];
987 		BYTE* b1Odd = !last ? (b1Even + dst1Step[0]) : NULL;
988 		BYTE* b2 = pDst1[1] + (y / 2) * dst1Step[1];
989 		BYTE* b3 = pDst1[2] + (y / 2) * dst1Step[2];
990 		BYTE* b4 = pDst2[0] + dst2Step[0] * n;
991 		BYTE* b5 = b4 + 8 * dst2Step[0];
992 		BYTE* b6 = pDst2[1] + (y / 2) * dst2Step[1];
993 		BYTE* b7 = pDst2[2] + (y / 2) * dst2Step[2];
994 		general_RGBToAVC444YUV_BGRX_DOUBLE_ROW(srcEven, srcOdd, b1Even, b1Odd, b2, b3, b4, b5, b6,
995 		                                       b7, roi->width);
996 	}
997 
998 	return PRIMITIVES_SUCCESS;
999 }
1000 
general_RGBToAVC444YUV_RGBX_DOUBLE_ROW(const BYTE * srcEven,const BYTE * srcOdd,BYTE * b1Even,BYTE * b1Odd,BYTE * b2,BYTE * b3,BYTE * b4,BYTE * b5,BYTE * b6,BYTE * b7,UINT32 width)1001 static INLINE void general_RGBToAVC444YUV_RGBX_DOUBLE_ROW(const BYTE* srcEven, const BYTE* srcOdd,
1002                                                           BYTE* b1Even, BYTE* b1Odd, BYTE* b2,
1003                                                           BYTE* b3, BYTE* b4, BYTE* b5, BYTE* b6,
1004                                                           BYTE* b7, UINT32 width)
1005 {
1006 	UINT32 x;
1007 
1008 	for (x = 0; x < width; x += 2)
1009 	{
1010 		const BOOL lastX = (x + 1) >= width;
1011 		BYTE Y1e, Y2e, U1e, V1e, U2e, V2e;
1012 		BYTE Y1o, Y2o, U1o, V1o, U2o, V2o;
1013 		/* Read 4 pixels, 2 from even, 2 from odd lines */
1014 		{
1015 			const BYTE r = *srcEven++;
1016 			const BYTE g = *srcEven++;
1017 			const BYTE b = *srcEven++;
1018 			srcEven++;
1019 			Y1e = Y2e = Y1o = Y2o = RGB2Y(r, g, b);
1020 			U1e = U2e = U1o = U2o = RGB2U(r, g, b);
1021 			V1e = V2e = V1o = V2o = RGB2V(r, g, b);
1022 		}
1023 
1024 		if (!lastX)
1025 		{
1026 			const BYTE r = *srcEven++;
1027 			const BYTE g = *srcEven++;
1028 			const BYTE b = *srcEven++;
1029 			srcEven++;
1030 			Y2e = RGB2Y(r, g, b);
1031 			U2e = RGB2U(r, g, b);
1032 			V2e = RGB2V(r, g, b);
1033 		}
1034 
1035 		if (b1Odd)
1036 		{
1037 			const BYTE r = *srcOdd++;
1038 			const BYTE g = *srcOdd++;
1039 			const BYTE b = *srcOdd++;
1040 			srcOdd++;
1041 			Y1o = Y2o = RGB2Y(r, g, b);
1042 			U1o = U2o = RGB2U(r, g, b);
1043 			V1o = V2o = RGB2V(r, g, b);
1044 		}
1045 
1046 		if (b1Odd && !lastX)
1047 		{
1048 			const BYTE r = *srcOdd++;
1049 			const BYTE g = *srcOdd++;
1050 			const BYTE b = *srcOdd++;
1051 			srcOdd++;
1052 			Y2o = RGB2Y(r, g, b);
1053 			U2o = RGB2U(r, g, b);
1054 			V2o = RGB2V(r, g, b);
1055 		}
1056 
1057 		/* We have 4 Y pixels, so store them. */
1058 		*b1Even++ = Y1e;
1059 		*b1Even++ = Y2e;
1060 
1061 		if (b1Odd)
1062 		{
1063 			*b1Odd++ = Y1o;
1064 			*b1Odd++ = Y2o;
1065 		}
1066 
1067 		/* 2x 2y pixel in luma UV plane use averaging
1068 		 */
1069 		{
1070 			const BYTE Uavg = ((UINT16)U1e + (UINT16)U2e + (UINT16)U1o + (UINT16)U2o) / 4;
1071 			const BYTE Vavg = ((UINT16)V1e + (UINT16)V2e + (UINT16)V1o + (UINT16)V2o) / 4;
1072 			*b2++ = Uavg;
1073 			*b3++ = Vavg;
1074 		}
1075 
1076 		/* UV from 2x, 2y+1 */
1077 		if (b1Odd)
1078 		{
1079 			*b4++ = U1o;
1080 			*b5++ = V1o;
1081 
1082 			if (!lastX)
1083 			{
1084 				*b4++ = U2o;
1085 				*b5++ = V2o;
1086 			}
1087 		}
1088 
1089 		/* UV from 2x+1, 2y */
1090 		if (!lastX)
1091 		{
1092 			*b6++ = U2e;
1093 			*b7++ = V2e;
1094 		}
1095 	}
1096 }
1097 
general_RGBToAVC444YUV_RGBX(const BYTE * pSrc,UINT32 srcStep,BYTE * pDst1[3],const UINT32 dst1Step[3],BYTE * pDst2[3],const UINT32 dst2Step[3],const prim_size_t * roi)1098 static INLINE pstatus_t general_RGBToAVC444YUV_RGBX(const BYTE* pSrc, UINT32 srcStep,
1099                                                     BYTE* pDst1[3], const UINT32 dst1Step[3],
1100                                                     BYTE* pDst2[3], const UINT32 dst2Step[3],
1101                                                     const prim_size_t* roi)
1102 {
1103 	/**
1104 	 * Note:
1105 	 * Read information in function general_RGBToAVC444YUV_ANY below !
1106 	 */
1107 	UINT32 y;
1108 	const BYTE* pMaxSrc = pSrc + (roi->height - 1) * srcStep;
1109 
1110 	for (y = 0; y < roi->height; y += 2)
1111 	{
1112 		const BOOL last = (y >= (roi->height - 1));
1113 		const BYTE* srcEven = y < roi->height ? pSrc + y * srcStep : pMaxSrc;
1114 		const BYTE* srcOdd = !last ? pSrc + (y + 1) * srcStep : pMaxSrc;
1115 		const UINT32 i = y >> 1;
1116 		const UINT32 n = (i & ~7) + i;
1117 		BYTE* b1Even = pDst1[0] + y * dst1Step[0];
1118 		BYTE* b1Odd = !last ? (b1Even + dst1Step[0]) : NULL;
1119 		BYTE* b2 = pDst1[1] + (y / 2) * dst1Step[1];
1120 		BYTE* b3 = pDst1[2] + (y / 2) * dst1Step[2];
1121 		BYTE* b4 = pDst2[0] + dst2Step[0] * n;
1122 		BYTE* b5 = b4 + 8 * dst2Step[0];
1123 		BYTE* b6 = pDst2[1] + (y / 2) * dst2Step[1];
1124 		BYTE* b7 = pDst2[2] + (y / 2) * dst2Step[2];
1125 		general_RGBToAVC444YUV_RGBX_DOUBLE_ROW(srcEven, srcOdd, b1Even, b1Odd, b2, b3, b4, b5, b6,
1126 		                                       b7, roi->width);
1127 	}
1128 
1129 	return PRIMITIVES_SUCCESS;
1130 }
1131 
general_RGBToAVC444YUV_ANY_DOUBLE_ROW(const BYTE * srcEven,const BYTE * srcOdd,UINT32 srcFormat,BYTE * b1Even,BYTE * b1Odd,BYTE * b2,BYTE * b3,BYTE * b4,BYTE * b5,BYTE * b6,BYTE * b7,UINT32 width)1132 static INLINE void general_RGBToAVC444YUV_ANY_DOUBLE_ROW(const BYTE* srcEven, const BYTE* srcOdd,
1133                                                          UINT32 srcFormat, BYTE* b1Even,
1134                                                          BYTE* b1Odd, BYTE* b2, BYTE* b3, BYTE* b4,
1135                                                          BYTE* b5, BYTE* b6, BYTE* b7, UINT32 width)
1136 {
1137 	const UINT32 bpp = GetBytesPerPixel(srcFormat);
1138 	UINT32 x;
1139 
1140 	for (x = 0; x < width; x += 2)
1141 	{
1142 		const BOOL lastX = (x + 1) >= width;
1143 		BYTE Y1e, Y2e, U1e, V1e, U2e, V2e;
1144 		BYTE Y1o, Y2o, U1o, V1o, U2o, V2o;
1145 		/* Read 4 pixels, 2 from even, 2 from odd lines */
1146 		{
1147 			BYTE r, g, b;
1148 			const UINT32 color = ReadColor(srcEven, srcFormat);
1149 			srcEven += bpp;
1150 			SplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
1151 			Y1e = Y2e = Y1o = Y2o = RGB2Y(r, g, b);
1152 			U1e = U2e = U1o = U2o = RGB2U(r, g, b);
1153 			V1e = V2e = V1o = V2o = RGB2V(r, g, b);
1154 		}
1155 
1156 		if (!lastX)
1157 		{
1158 			BYTE r, g, b;
1159 			const UINT32 color = ReadColor(srcEven, srcFormat);
1160 			srcEven += bpp;
1161 			SplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
1162 			Y2e = RGB2Y(r, g, b);
1163 			U2e = RGB2U(r, g, b);
1164 			V2e = RGB2V(r, g, b);
1165 		}
1166 
1167 		if (b1Odd)
1168 		{
1169 			BYTE r, g, b;
1170 			const UINT32 color = ReadColor(srcOdd, srcFormat);
1171 			srcOdd += bpp;
1172 			SplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
1173 			Y1o = Y2o = RGB2Y(r, g, b);
1174 			U1o = U2o = RGB2U(r, g, b);
1175 			V1o = V2o = RGB2V(r, g, b);
1176 		}
1177 
1178 		if (b1Odd && !lastX)
1179 		{
1180 			BYTE r, g, b;
1181 			const UINT32 color = ReadColor(srcOdd, srcFormat);
1182 			srcOdd += bpp;
1183 			SplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
1184 			Y2o = RGB2Y(r, g, b);
1185 			U2o = RGB2U(r, g, b);
1186 			V2o = RGB2V(r, g, b);
1187 		}
1188 
1189 		/* We have 4 Y pixels, so store them. */
1190 		*b1Even++ = Y1e;
1191 		*b1Even++ = Y2e;
1192 
1193 		if (b1Odd)
1194 		{
1195 			*b1Odd++ = Y1o;
1196 			*b1Odd++ = Y2o;
1197 		}
1198 
1199 		/* 2x 2y pixel in luma UV plane use averaging
1200 		 */
1201 		{
1202 			const BYTE Uavg = ((UINT16)U1e + (UINT16)U2e + (UINT16)U1o + (UINT16)U2o) / 4;
1203 			const BYTE Vavg = ((UINT16)V1e + (UINT16)V2e + (UINT16)V1o + (UINT16)V2o) / 4;
1204 			*b2++ = Uavg;
1205 			*b3++ = Vavg;
1206 		}
1207 
1208 		/* UV from 2x, 2y+1 */
1209 		if (b1Odd)
1210 		{
1211 			*b4++ = U1o;
1212 			*b5++ = V1o;
1213 
1214 			if (!lastX)
1215 			{
1216 				*b4++ = U2o;
1217 				*b5++ = V2o;
1218 			}
1219 		}
1220 
1221 		/* UV from 2x+1, 2y */
1222 		if (!lastX)
1223 		{
1224 			*b6++ = U2e;
1225 			*b7++ = V2e;
1226 		}
1227 	}
1228 }
1229 
general_RGBToAVC444YUV_ANY(const BYTE * pSrc,UINT32 srcFormat,UINT32 srcStep,BYTE * pDst1[3],const UINT32 dst1Step[3],BYTE * pDst2[3],const UINT32 dst2Step[3],const prim_size_t * roi)1230 static INLINE pstatus_t general_RGBToAVC444YUV_ANY(const BYTE* pSrc, UINT32 srcFormat,
1231                                                    UINT32 srcStep, BYTE* pDst1[3],
1232                                                    const UINT32 dst1Step[3], BYTE* pDst2[3],
1233                                                    const UINT32 dst2Step[3], const prim_size_t* roi)
1234 {
1235 	/**
1236 	 * Note: According to [MS-RDPEGFX 2.2.4.4 RFX_AVC420_BITMAP_STREAM] the
1237 	 * width and height of the MPEG-4 AVC/H.264 codec bitstream MUST be aligned
1238 	 * to a multiple of 16.
1239 	 * Hence the passed destination YUV420/CHROMA420 buffers must have been
1240 	 * allocated accordingly !!
1241 	 */
1242 	/**
1243 	 * [MS-RDPEGFX 3.3.8.3.2 YUV420p Stream Combination] defines the following "Bx areas":
1244 	 *
1245 	 * YUV420 frame (main view):
1246 	 * B1:  From Y444 all pixels
1247 	 * B2:  From U444 all pixels in even rows with even columns
1248 	 * B3:  From V444 all pixels in even rows with even columns
1249 	 *
1250 	 * Chroma420 frame (auxillary view):
1251 	 * B45: From U444 and V444 all pixels from all odd rows
1252 	 *      (The odd U444 and V444 rows must be interleaved in 8-line blocks in B45 !!!)
1253 	 * B6:  From U444 all pixels in even rows with odd columns
1254 	 * B7:  From V444 all pixels in even rows with odd columns
1255 	 *
1256 	 * Microsoft's horrible unclear description in MS-RDPEGFX translated to pseudo code looks like
1257 	 * this:
1258 	 *
1259 	 * for (y = 0; y < fullHeight; y++)
1260 	 * {
1261 	 *     for (x = 0; x < fullWidth; x++)
1262 	 *     {
1263 	 *         B1[x,y] = Y444[x,y];
1264 	 *     }
1265 	 *  }
1266 	 *
1267 	 * for (y = 0; y < halfHeight; y++)
1268 	 * {
1269 	 *     for (x = 0; x < halfWidth; x++)
1270 	 *     {
1271 	 *         B2[x,y] = U444[2 * x,     2 * y];
1272 	 *         B3[x,y] = V444[2 * x,     2 * y];
1273 	 *         B6[x,y] = U444[2 * x + 1, 2 * y];
1274 	 *     	   B7[x,y] = V444[2 * x + 1, 2 * y];
1275 	 *     }
1276 	 *  }
1277 	 *
1278 	 * for (y = 0; y < halfHeight; y++)
1279 	 * {
1280 	 *     yU  = (y / 8) * 16;   // identify first row of correct 8-line U block in B45
1281 	 *     yU += (y % 8);        // add offset rows in destination block
1282 	 *     yV  = yU + 8;         // the corresponding v line is always 8 rows ahead
1283 	 *
1284 	 *     for (x = 0; x < fullWidth; x++)
1285 	 *     {
1286 	 *         B45[x,yU] = U444[x, 2 * y + 1];
1287 	 *         B45[x,yV] = V444[x, 2 * y + 1];
1288 	 *     }
1289 	 *  }
1290 	 *
1291 	 */
1292 	UINT32 y;
1293 	const BYTE* pMaxSrc = pSrc + (roi->height - 1) * srcStep;
1294 
1295 	for (y = 0; y < roi->height; y += 2)
1296 	{
1297 		const BOOL last = (y >= (roi->height - 1));
1298 		const BYTE* srcEven = y < roi->height ? pSrc + y * srcStep : pMaxSrc;
1299 		const BYTE* srcOdd = !last ? pSrc + (y + 1) * srcStep : pMaxSrc;
1300 		const UINT32 i = y >> 1;
1301 		const UINT32 n = (i & ~7) + i;
1302 		BYTE* b1Even = pDst1[0] + y * dst1Step[0];
1303 		BYTE* b1Odd = !last ? (b1Even + dst1Step[0]) : NULL;
1304 		BYTE* b2 = pDst1[1] + (y / 2) * dst1Step[1];
1305 		BYTE* b3 = pDst1[2] + (y / 2) * dst1Step[2];
1306 		BYTE* b4 = pDst2[0] + dst2Step[0] * n;
1307 		BYTE* b5 = b4 + 8 * dst2Step[0];
1308 		BYTE* b6 = pDst2[1] + (y / 2) * dst2Step[1];
1309 		BYTE* b7 = pDst2[2] + (y / 2) * dst2Step[2];
1310 		general_RGBToAVC444YUV_ANY_DOUBLE_ROW(srcEven, srcOdd, srcFormat, b1Even, b1Odd, b2, b3, b4,
1311 		                                      b5, b6, b7, roi->width);
1312 	}
1313 
1314 	return PRIMITIVES_SUCCESS;
1315 }
1316 
general_RGBToAVC444YUV(const BYTE * pSrc,UINT32 srcFormat,UINT32 srcStep,BYTE * pDst1[3],const UINT32 dst1Step[3],BYTE * pDst2[3],const UINT32 dst2Step[3],const prim_size_t * roi)1317 static INLINE pstatus_t general_RGBToAVC444YUV(const BYTE* pSrc, UINT32 srcFormat, UINT32 srcStep,
1318                                                BYTE* pDst1[3], const UINT32 dst1Step[3],
1319                                                BYTE* pDst2[3], const UINT32 dst2Step[3],
1320                                                const prim_size_t* roi)
1321 {
1322 	if (!pSrc || !pDst1 || !dst1Step || !pDst2 || !dst2Step)
1323 		return -1;
1324 
1325 	if (!pDst1[0] || !pDst1[1] || !pDst1[2])
1326 		return -1;
1327 
1328 	if (!dst1Step[0] || !dst1Step[1] || !dst1Step[2])
1329 		return -1;
1330 
1331 	if (!pDst2[0] || !pDst2[1] || !pDst2[2])
1332 		return -1;
1333 
1334 	if (!dst2Step[0] || !dst2Step[1] || !dst2Step[2])
1335 		return -1;
1336 
1337 	switch (srcFormat)
1338 	{
1339 		case PIXEL_FORMAT_BGRA32:
1340 		case PIXEL_FORMAT_BGRX32:
1341 			return general_RGBToAVC444YUV_BGRX(pSrc, srcStep, pDst1, dst1Step, pDst2, dst2Step,
1342 			                                   roi);
1343 
1344 		case PIXEL_FORMAT_RGBA32:
1345 		case PIXEL_FORMAT_RGBX32:
1346 			return general_RGBToAVC444YUV_RGBX(pSrc, srcStep, pDst1, dst1Step, pDst2, dst2Step,
1347 			                                   roi);
1348 
1349 		default:
1350 			return general_RGBToAVC444YUV_ANY(pSrc, srcFormat, srcStep, pDst1, dst1Step, pDst2,
1351 			                                  dst2Step, roi);
1352 	}
1353 
1354 	return !PRIMITIVES_SUCCESS;
1355 }
1356 
general_RGBToAVC444YUVv2_ANY_DOUBLE_ROW(const BYTE * srcEven,const BYTE * srcOdd,UINT32 srcFormat,BYTE * yLumaDstEven,BYTE * yLumaDstOdd,BYTE * uLumaDst,BYTE * vLumaDst,BYTE * yEvenChromaDst1,BYTE * yEvenChromaDst2,BYTE * yOddChromaDst1,BYTE * yOddChromaDst2,BYTE * uChromaDst1,BYTE * uChromaDst2,BYTE * vChromaDst1,BYTE * vChromaDst2,UINT32 width)1357 static INLINE void general_RGBToAVC444YUVv2_ANY_DOUBLE_ROW(
1358     const BYTE* srcEven, const BYTE* srcOdd, UINT32 srcFormat, BYTE* yLumaDstEven,
1359     BYTE* yLumaDstOdd, BYTE* uLumaDst, BYTE* vLumaDst, BYTE* yEvenChromaDst1, BYTE* yEvenChromaDst2,
1360     BYTE* yOddChromaDst1, BYTE* yOddChromaDst2, BYTE* uChromaDst1, BYTE* uChromaDst2,
1361     BYTE* vChromaDst1, BYTE* vChromaDst2, UINT32 width)
1362 {
1363 	UINT32 x;
1364 	const UINT32 bpp = GetBytesPerPixel(srcFormat);
1365 
1366 	for (x = 0; x < width; x += 2)
1367 	{
1368 		BYTE Ya, Ua, Va;
1369 		BYTE Yb, Ub, Vb;
1370 		BYTE Yc, Uc, Vc;
1371 		BYTE Yd, Ud, Vd;
1372 		{
1373 			BYTE b, g, r;
1374 			const UINT32 color = ReadColor(srcEven, srcFormat);
1375 			srcEven += bpp;
1376 			SplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
1377 			Ya = RGB2Y(r, g, b);
1378 			Ua = RGB2U(r, g, b);
1379 			Va = RGB2V(r, g, b);
1380 		}
1381 
1382 		if (x < width - 1)
1383 		{
1384 			BYTE b, g, r;
1385 			const UINT32 color = ReadColor(srcEven, srcFormat);
1386 			srcEven += bpp;
1387 			SplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
1388 			Yb = RGB2Y(r, g, b);
1389 			Ub = RGB2U(r, g, b);
1390 			Vb = RGB2V(r, g, b);
1391 		}
1392 		else
1393 		{
1394 			Yb = Ya;
1395 			Ub = Ua;
1396 			Vb = Va;
1397 		}
1398 
1399 		if (srcOdd)
1400 		{
1401 			BYTE b, g, r;
1402 			const UINT32 color = ReadColor(srcOdd, srcFormat);
1403 			srcOdd += bpp;
1404 			SplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
1405 			Yc = RGB2Y(r, g, b);
1406 			Uc = RGB2U(r, g, b);
1407 			Vc = RGB2V(r, g, b);
1408 		}
1409 		else
1410 		{
1411 			Yc = Ya;
1412 			Uc = Ua;
1413 			Vc = Va;
1414 		}
1415 
1416 		if (srcOdd && (x < width - 1))
1417 		{
1418 			BYTE b, g, r;
1419 			const UINT32 color = ReadColor(srcOdd, srcFormat);
1420 			srcOdd += bpp;
1421 			SplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
1422 			Yd = RGB2Y(r, g, b);
1423 			Ud = RGB2U(r, g, b);
1424 			Vd = RGB2V(r, g, b);
1425 		}
1426 		else
1427 		{
1428 			Yd = Ya;
1429 			Ud = Ua;
1430 			Vd = Va;
1431 		}
1432 
1433 		/* Y [b1] */
1434 		*yLumaDstEven++ = Ya;
1435 
1436 		if (x < width - 1)
1437 			*yLumaDstEven++ = Yb;
1438 
1439 		if (srcOdd)
1440 			*yLumaDstOdd++ = Yc;
1441 
1442 		if (srcOdd && (x < width - 1))
1443 			*yLumaDstOdd++ = Yd;
1444 
1445 		/* 2x 2y [b2,b3] */
1446 		*uLumaDst++ = (Ua + Ub + Uc + Ud) / 4;
1447 		*vLumaDst++ = (Va + Vb + Vc + Vd) / 4;
1448 
1449 		/* 2x+1, y [b4,b5] even */
1450 		if (x < width - 1)
1451 		{
1452 			*yEvenChromaDst1++ = Ub;
1453 			*yEvenChromaDst2++ = Vb;
1454 		}
1455 
1456 		if (srcOdd)
1457 		{
1458 			/* 2x+1, y [b4,b5] odd */
1459 			if (x < width - 1)
1460 			{
1461 				*yOddChromaDst1++ = Ud;
1462 				*yOddChromaDst2++ = Vd;
1463 			}
1464 
1465 			/* 4x 2y+1 [b6, b7] */
1466 			if (x % 4 == 0)
1467 			{
1468 				*uChromaDst1++ = Uc;
1469 				*uChromaDst2++ = Vc;
1470 			}
1471 			/* 4x+2 2y+1 [b8, b9] */
1472 			else
1473 			{
1474 				*vChromaDst1++ = Uc;
1475 				*vChromaDst2++ = Vc;
1476 			}
1477 		}
1478 	}
1479 }
1480 
general_RGBToAVC444YUVv2_ANY(const BYTE * pSrc,UINT32 srcFormat,UINT32 srcStep,BYTE * pDst1[3],const UINT32 dst1Step[3],BYTE * pDst2[3],const UINT32 dst2Step[3],const prim_size_t * roi)1481 static INLINE pstatus_t general_RGBToAVC444YUVv2_ANY(const BYTE* pSrc, UINT32 srcFormat,
1482                                                      UINT32 srcStep, BYTE* pDst1[3],
1483                                                      const UINT32 dst1Step[3], BYTE* pDst2[3],
1484                                                      const UINT32 dst2Step[3],
1485                                                      const prim_size_t* roi)
1486 {
1487 	/**
1488 	 * Note: According to [MS-RDPEGFX 2.2.4.4 RFX_AVC420_BITMAP_STREAM] the
1489 	 * width and height of the MPEG-4 AVC/H.264 codec bitstream MUST be aligned
1490 	 * to a multiple of 16.
1491 	 * Hence the passed destination YUV420/CHROMA420 buffers must have been
1492 	 * allocated accordingly !!
1493 	 */
1494 	/**
1495 	 * [MS-RDPEGFX 3.3.8.3.3 YUV420p Stream Combination for YUV444v2 mode] defines the following "Bx
1496 	 * areas":
1497 	 *
1498 	 * YUV420 frame (main view):
1499 	 * B1:  From Y444 all pixels
1500 	 * B2:  From U444 all pixels in even rows with even rows and columns
1501 	 * B3:  From V444 all pixels in even rows with even rows and columns
1502 	 *
1503 	 * Chroma420 frame (auxillary view):
1504 	 * B45: From U444 and V444 all pixels from all odd columns
1505 	 * B67: From U444 and V444 every 4th pixel in odd rows
1506 	 * B89:  From U444 and V444 every 4th pixel (initial offset of 2) in odd rows
1507 	 *
1508 	 * Chroma Bxy areas correspond to the left and right half of the YUV420 plane.
1509 	 * for (y = 0; y < fullHeight; y++)
1510 	 * {
1511 	 *     for (x = 0; x < fullWidth; x++)
1512 	 *     {
1513 	 *         B1[x,y] = Y444[x,y];
1514 	 *     }
1515 	 *
1516 	 *     for (x = 0; x < halfWidth; x++)
1517 	 *     {
1518 	 *         B4[x,y] = U444[2 * x, 2 * y];
1519 	 *         B5[x,y] = V444[2 * x, 2 * y];
1520 	 *     }
1521 	 *  }
1522 	 *
1523 	 * for (y = 0; y < halfHeight; y++)
1524 	 * {
1525 	 *     for (x = 0; x < halfWidth; x++)
1526 	 *     {
1527 	 *         B2[x,y] = U444[2 * x,     2 * y];
1528 	 *         B3[x,y] = V444[2 * x,     2 * y];
1529 	 *         B6[x,y] = U444[4 * x,     2 * y + 1];
1530 	 *         B7[x,y] = V444[4 * x,     2 * y + 1];
1531 	 *         B8[x,y] = V444[4 * x + 2, 2 * y + 1];
1532 	 *         B9[x,y] = V444[4 * x + 2, 2 * y] + 1;
1533 	 *     }
1534 	 *  }
1535 	 *
1536 	 */
1537 	UINT32 y;
1538 
1539 	if (roi->height < 1 || roi->width < 1)
1540 		return !PRIMITIVES_SUCCESS;
1541 
1542 	for (y = 0; y < roi->height; y += 2)
1543 	{
1544 		const BYTE* srcEven = (pSrc + y * srcStep);
1545 		const BYTE* srcOdd = (y < roi->height - 1) ? (srcEven + srcStep) : NULL;
1546 		BYTE* dstLumaYEven = (pDst1[0] + y * dst1Step[0]);
1547 		BYTE* dstLumaYOdd = (dstLumaYEven + dst1Step[0]);
1548 		BYTE* dstLumaU = (pDst1[1] + (y / 2) * dst1Step[1]);
1549 		BYTE* dstLumaV = (pDst1[2] + (y / 2) * dst1Step[2]);
1550 		BYTE* dstEvenChromaY1 = (pDst2[0] + y * dst2Step[0]);
1551 		BYTE* dstEvenChromaY2 = dstEvenChromaY1 + roi->width / 2;
1552 		BYTE* dstOddChromaY1 = dstEvenChromaY1 + dst2Step[0];
1553 		BYTE* dstOddChromaY2 = dstEvenChromaY2 + dst2Step[0];
1554 		BYTE* dstChromaU1 = (pDst2[1] + (y / 2) * dst2Step[1]);
1555 		BYTE* dstChromaV1 = (pDst2[2] + (y / 2) * dst2Step[2]);
1556 		BYTE* dstChromaU2 = dstChromaU1 + roi->width / 4;
1557 		BYTE* dstChromaV2 = dstChromaV1 + roi->width / 4;
1558 		general_RGBToAVC444YUVv2_ANY_DOUBLE_ROW(
1559 		    srcEven, srcOdd, srcFormat, dstLumaYEven, dstLumaYOdd, dstLumaU, dstLumaV,
1560 		    dstEvenChromaY1, dstEvenChromaY2, dstOddChromaY1, dstOddChromaY2, dstChromaU1,
1561 		    dstChromaU2, dstChromaV1, dstChromaV2, roi->width);
1562 	}
1563 
1564 	return PRIMITIVES_SUCCESS;
1565 }
1566 
general_RGBToAVC444YUVv2_BGRX_DOUBLE_ROW(const BYTE * srcEven,const BYTE * srcOdd,BYTE * yLumaDstEven,BYTE * yLumaDstOdd,BYTE * uLumaDst,BYTE * vLumaDst,BYTE * yEvenChromaDst1,BYTE * yEvenChromaDst2,BYTE * yOddChromaDst1,BYTE * yOddChromaDst2,BYTE * uChromaDst1,BYTE * uChromaDst2,BYTE * vChromaDst1,BYTE * vChromaDst2,UINT32 width)1567 static INLINE void general_RGBToAVC444YUVv2_BGRX_DOUBLE_ROW(
1568     const BYTE* srcEven, const BYTE* srcOdd, BYTE* yLumaDstEven, BYTE* yLumaDstOdd, BYTE* uLumaDst,
1569     BYTE* vLumaDst, BYTE* yEvenChromaDst1, BYTE* yEvenChromaDst2, BYTE* yOddChromaDst1,
1570     BYTE* yOddChromaDst2, BYTE* uChromaDst1, BYTE* uChromaDst2, BYTE* vChromaDst1,
1571     BYTE* vChromaDst2, UINT32 width)
1572 {
1573 	UINT32 x;
1574 
1575 	for (x = 0; x < width; x += 2)
1576 	{
1577 		BYTE Ya, Ua, Va;
1578 		BYTE Yb, Ub, Vb;
1579 		BYTE Yc, Uc, Vc;
1580 		BYTE Yd, Ud, Vd;
1581 		{
1582 			const BYTE b = *srcEven++;
1583 			const BYTE g = *srcEven++;
1584 			const BYTE r = *srcEven++;
1585 			srcEven++;
1586 			Ya = RGB2Y(r, g, b);
1587 			Ua = RGB2U(r, g, b);
1588 			Va = RGB2V(r, g, b);
1589 		}
1590 
1591 		if (x < width - 1)
1592 		{
1593 			const BYTE b = *srcEven++;
1594 			const BYTE g = *srcEven++;
1595 			const BYTE r = *srcEven++;
1596 			srcEven++;
1597 			Yb = RGB2Y(r, g, b);
1598 			Ub = RGB2U(r, g, b);
1599 			Vb = RGB2V(r, g, b);
1600 		}
1601 		else
1602 		{
1603 			Yb = Ya;
1604 			Ub = Ua;
1605 			Vb = Va;
1606 		}
1607 
1608 		if (srcOdd)
1609 		{
1610 			const BYTE b = *srcOdd++;
1611 			const BYTE g = *srcOdd++;
1612 			const BYTE r = *srcOdd++;
1613 			srcOdd++;
1614 			Yc = RGB2Y(r, g, b);
1615 			Uc = RGB2U(r, g, b);
1616 			Vc = RGB2V(r, g, b);
1617 		}
1618 		else
1619 		{
1620 			Yc = Ya;
1621 			Uc = Ua;
1622 			Vc = Va;
1623 		}
1624 
1625 		if (srcOdd && (x < width - 1))
1626 		{
1627 			const BYTE b = *srcOdd++;
1628 			const BYTE g = *srcOdd++;
1629 			const BYTE r = *srcOdd++;
1630 			srcOdd++;
1631 			Yd = RGB2Y(r, g, b);
1632 			Ud = RGB2U(r, g, b);
1633 			Vd = RGB2V(r, g, b);
1634 		}
1635 		else
1636 		{
1637 			Yd = Ya;
1638 			Ud = Ua;
1639 			Vd = Va;
1640 		}
1641 
1642 		/* Y [b1] */
1643 		*yLumaDstEven++ = Ya;
1644 
1645 		if (x < width - 1)
1646 			*yLumaDstEven++ = Yb;
1647 
1648 		if (srcOdd)
1649 			*yLumaDstOdd++ = Yc;
1650 
1651 		if (srcOdd && (x < width - 1))
1652 			*yLumaDstOdd++ = Yd;
1653 
1654 		/* 2x 2y [b2,b3] */
1655 		*uLumaDst++ = (Ua + Ub + Uc + Ud) / 4;
1656 		*vLumaDst++ = (Va + Vb + Vc + Vd) / 4;
1657 
1658 		/* 2x+1, y [b4,b5] even */
1659 		if (x < width - 1)
1660 		{
1661 			*yEvenChromaDst1++ = Ub;
1662 			*yEvenChromaDst2++ = Vb;
1663 		}
1664 
1665 		if (srcOdd)
1666 		{
1667 			/* 2x+1, y [b4,b5] odd */
1668 			if (x < width - 1)
1669 			{
1670 				*yOddChromaDst1++ = Ud;
1671 				*yOddChromaDst2++ = Vd;
1672 			}
1673 
1674 			/* 4x 2y+1 [b6, b7] */
1675 			if (x % 4 == 0)
1676 			{
1677 				*uChromaDst1++ = Uc;
1678 				*uChromaDst2++ = Vc;
1679 			}
1680 			/* 4x+2 2y+1 [b8, b9] */
1681 			else
1682 			{
1683 				*vChromaDst1++ = Uc;
1684 				*vChromaDst2++ = Vc;
1685 			}
1686 		}
1687 	}
1688 }
1689 
general_RGBToAVC444YUVv2_BGRX(const BYTE * pSrc,UINT32 srcStep,BYTE * pDst1[3],const UINT32 dst1Step[3],BYTE * pDst2[3],const UINT32 dst2Step[3],const prim_size_t * roi)1690 static INLINE pstatus_t general_RGBToAVC444YUVv2_BGRX(const BYTE* pSrc, UINT32 srcStep,
1691                                                       BYTE* pDst1[3], const UINT32 dst1Step[3],
1692                                                       BYTE* pDst2[3], const UINT32 dst2Step[3],
1693                                                       const prim_size_t* roi)
1694 {
1695 	UINT32 y;
1696 
1697 	if (roi->height < 1 || roi->width < 1)
1698 		return !PRIMITIVES_SUCCESS;
1699 
1700 	for (y = 0; y < roi->height; y += 2)
1701 	{
1702 		const BYTE* srcEven = (pSrc + y * srcStep);
1703 		const BYTE* srcOdd = (y < roi->height - 1) ? (srcEven + srcStep) : NULL;
1704 		BYTE* dstLumaYEven = (pDst1[0] + y * dst1Step[0]);
1705 		BYTE* dstLumaYOdd = (dstLumaYEven + dst1Step[0]);
1706 		BYTE* dstLumaU = (pDst1[1] + (y / 2) * dst1Step[1]);
1707 		BYTE* dstLumaV = (pDst1[2] + (y / 2) * dst1Step[2]);
1708 		BYTE* dstEvenChromaY1 = (pDst2[0] + y * dst2Step[0]);
1709 		BYTE* dstEvenChromaY2 = dstEvenChromaY1 + roi->width / 2;
1710 		BYTE* dstOddChromaY1 = dstEvenChromaY1 + dst2Step[0];
1711 		BYTE* dstOddChromaY2 = dstEvenChromaY2 + dst2Step[0];
1712 		BYTE* dstChromaU1 = (pDst2[1] + (y / 2) * dst2Step[1]);
1713 		BYTE* dstChromaV1 = (pDst2[2] + (y / 2) * dst2Step[2]);
1714 		BYTE* dstChromaU2 = dstChromaU1 + roi->width / 4;
1715 		BYTE* dstChromaV2 = dstChromaV1 + roi->width / 4;
1716 		general_RGBToAVC444YUVv2_BGRX_DOUBLE_ROW(
1717 		    srcEven, srcOdd, dstLumaYEven, dstLumaYOdd, dstLumaU, dstLumaV, dstEvenChromaY1,
1718 		    dstEvenChromaY2, dstOddChromaY1, dstOddChromaY2, dstChromaU1, dstChromaU2, dstChromaV1,
1719 		    dstChromaV2, roi->width);
1720 	}
1721 
1722 	return PRIMITIVES_SUCCESS;
1723 }
1724 
general_RGBToAVC444YUVv2(const BYTE * pSrc,UINT32 srcFormat,UINT32 srcStep,BYTE * pDst1[3],const UINT32 dst1Step[3],BYTE * pDst2[3],const UINT32 dst2Step[3],const prim_size_t * roi)1725 static INLINE pstatus_t general_RGBToAVC444YUVv2(const BYTE* pSrc, UINT32 srcFormat, UINT32 srcStep,
1726                                                  BYTE* pDst1[3], const UINT32 dst1Step[3],
1727                                                  BYTE* pDst2[3], const UINT32 dst2Step[3],
1728                                                  const prim_size_t* roi)
1729 {
1730 	switch (srcFormat)
1731 	{
1732 		case PIXEL_FORMAT_BGRA32:
1733 		case PIXEL_FORMAT_BGRX32:
1734 			return general_RGBToAVC444YUVv2_BGRX(pSrc, srcStep, pDst1, dst1Step, pDst2, dst2Step,
1735 			                                     roi);
1736 
1737 		default:
1738 			return general_RGBToAVC444YUVv2_ANY(pSrc, srcFormat, srcStep, pDst1, dst1Step, pDst2,
1739 			                                    dst2Step, roi);
1740 	}
1741 
1742 	return !PRIMITIVES_SUCCESS;
1743 }
1744 
primitives_init_YUV(primitives_t * prims)1745 void primitives_init_YUV(primitives_t* prims)
1746 {
1747 	prims->YUV420ToRGB_8u_P3AC4R = general_YUV420ToRGB_8u_P3AC4R;
1748 	prims->YUV444ToRGB_8u_P3AC4R = general_YUV444ToRGB_8u_P3AC4R;
1749 	prims->RGBToYUV420_8u_P3AC4R = general_RGBToYUV420_8u_P3AC4R;
1750 	prims->RGBToYUV444_8u_P3AC4R = general_RGBToYUV444_8u_P3AC4R;
1751 	prims->YUV420CombineToYUV444 = general_YUV420CombineToYUV444;
1752 	prims->YUV444SplitToYUV420 = general_YUV444SplitToYUV420;
1753 	prims->RGBToAVC444YUV = general_RGBToAVC444YUV;
1754 	prims->RGBToAVC444YUVv2 = general_RGBToAVC444YUVv2;
1755 }
1756