1 /* test_colors.c
2  * vi:ts=4 sw=4
3  *
4  * (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
5  * Licensed under the Apache License, Version 2.0 (the "License"); you may
6  * not use this file except in compliance with the License. You may obtain
7  * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
8  * Unless required by applicable law or agreed to in writing, software
9  * distributed under the License is distributed on an "AS IS" BASIS,
10  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11  * or implied. See the License for the specific language governing
12  * permissions and limitations under the License.
13  */
14 
15 #ifdef HAVE_CONFIG_H
16 #include "config.h"
17 #endif
18 
19 #include <winpr/sysinfo.h>
20 #include <freerdp/utils/profiler.h>
21 
22 #include "prim_test.h"
23 
24 /* ------------------------------------------------------------------------- */
test_RGBToRGB_16s8u_P3AC4R_func(prim_size_t roi,DWORD DstFormat)25 static BOOL test_RGBToRGB_16s8u_P3AC4R_func(prim_size_t roi, DWORD DstFormat)
26 {
27 	INT16* r;
28 	INT16* g;
29 	INT16* b;
30 	BYTE* out1;
31 	BYTE* out2;
32 	UINT64 i;
33 	BOOL failed = FALSE;
34 	const INT16* ptrs[3];
35 	const UINT32 rgbStride = roi.width * 2;
36 	const UINT32 dstStride = roi.width * 4;
37 	PROFILER_DEFINE(genericProf)
38 	PROFILER_DEFINE(optProf)
39 	PROFILER_CREATE(genericProf, "RGBToRGB_16s8u_P3AC4R-GENERIC")
40 	PROFILER_CREATE(optProf, "RGBToRGB_16s8u_P3AC4R-OPTIMIZED")
41 	r = _aligned_recalloc(NULL, 1, rgbStride * roi.height, 16);
42 	g = _aligned_recalloc(NULL, 1, rgbStride * roi.height, 16);
43 	b = _aligned_recalloc(NULL, 1, rgbStride * roi.height, 16);
44 	out1 = _aligned_recalloc(NULL, 1, dstStride * roi.height, 16);
45 	out2 = _aligned_recalloc(NULL, 1, dstStride * roi.height, 16);
46 
47 	if (!r || !g || !b || !out1 || !out2)
48 		goto fail;
49 
50 #if 0
51 	{
52 		UINT32 x, y;
53 
54 		for (y = 0; y < roi.height; y++)
55 		{
56 			for (x = 0; x < roi.width; x++)
57 			{
58 				r[y * roi.width + x] = 0x01;
59 				g[y * roi.width + x] = 0x02;
60 				b[y * roi.width + x] = 0x04;
61 			}
62 		}
63 	}
64 #else
65 	winpr_RAND((BYTE*)r, rgbStride * roi.height);
66 	winpr_RAND((BYTE*)g, rgbStride * roi.height);
67 	winpr_RAND((BYTE*)b, rgbStride * roi.height);
68 #endif
69 	ptrs[0] = r;
70 	ptrs[1] = g;
71 	ptrs[2] = b;
72 	PROFILER_ENTER(genericProf)
73 
74 	if (generic->RGBToRGB_16s8u_P3AC4R(ptrs, rgbStride, out1, dstStride, DstFormat, &roi) !=
75 	    PRIMITIVES_SUCCESS)
76 		goto fail;
77 
78 	PROFILER_EXIT(genericProf)
79 	PROFILER_ENTER(optProf)
80 
81 	if (optimized->RGBToRGB_16s8u_P3AC4R(ptrs, rgbStride, out2, dstStride, DstFormat, &roi) !=
82 	    PRIMITIVES_SUCCESS)
83 		goto fail;
84 
85 	PROFILER_EXIT(optProf)
86 
87 	if (memcmp(out1, out2, dstStride * roi.height) != 0)
88 	{
89 		for (i = 0; i < roi.width * roi.height; ++i)
90 		{
91 			const UINT32 o1 = ReadColor(out1 + 4 * i, DstFormat);
92 			const UINT32 o2 = ReadColor(out2 + 4 * i, DstFormat);
93 
94 			if (o1 != o2)
95 			{
96 				printf("RGBToRGB_16s8u_P3AC4R FAIL: out1[%" PRIu64 "]=0x%08" PRIx8 " out2[%" PRIu64
97 				       "]=0x%08" PRIx8 "\n",
98 				       i, out1[i], i, out2[i]);
99 				failed = TRUE;
100 			}
101 		}
102 	}
103 
104 	printf("Results for %" PRIu32 "x%" PRIu32 " [%s]", roi.width, roi.height,
105 	       FreeRDPGetColorFormatName(DstFormat));
106 	PROFILER_PRINT_HEADER
107 	PROFILER_PRINT(genericProf)
108 	PROFILER_PRINT(optProf)
109 	PROFILER_PRINT_FOOTER
110 fail:
111 	PROFILER_FREE(genericProf)
112 	PROFILER_FREE(optProf)
113 	_aligned_free(r);
114 	_aligned_free(g);
115 	_aligned_free(b);
116 	_aligned_free(out1);
117 	_aligned_free(out2);
118 	return !failed;
119 }
120 
121 /* ------------------------------------------------------------------------- */
test_RGBToRGB_16s8u_P3AC4R_speed(void)122 static BOOL test_RGBToRGB_16s8u_P3AC4R_speed(void)
123 {
124 	const prim_size_t roi64x64 = { 64, 64 };
125 	INT16 ALIGN(r[4096 + 1]), ALIGN(g[4096 + 1]), ALIGN(b[4096 + 1]);
126 	UINT32 ALIGN(dst[4096 + 1]);
127 	int i;
128 	INT16* ptrs[3];
129 	winpr_RAND((BYTE*)r, sizeof(r));
130 	winpr_RAND((BYTE*)g, sizeof(g));
131 	winpr_RAND((BYTE*)b, sizeof(b));
132 
133 	/* clear upper bytes */
134 	for (i = 0; i < 4096; ++i)
135 	{
136 		r[i] &= 0x00FFU;
137 		g[i] &= 0x00FFU;
138 		b[i] &= 0x00FFU;
139 	}
140 
141 	ptrs[0] = r + 1;
142 	ptrs[1] = g + 1;
143 	ptrs[2] = b + 1;
144 
145 	if (!speed_test("RGBToRGB_16s8u_P3AC4R", "aligned", g_Iterations,
146 	                (speed_test_fkt)generic->RGBToRGB_16s8u_P3AC4R,
147 	                (speed_test_fkt)optimized->RGBToRGB_16s8u_P3AC4R, (const INT16**)ptrs, 64 * 2,
148 	                (BYTE*)dst, 64 * 4, &roi64x64))
149 		return FALSE;
150 
151 	if (!speed_test("RGBToRGB_16s8u_P3AC4R", "unaligned", g_Iterations,
152 	                (speed_test_fkt)generic->RGBToRGB_16s8u_P3AC4R,
153 	                (speed_test_fkt)optimized->RGBToRGB_16s8u_P3AC4R, (const INT16**)ptrs, 64 * 2,
154 	                ((BYTE*)dst) + 1, 64 * 4, &roi64x64))
155 		return FALSE;
156 
157 	return TRUE;
158 }
159 
160 /* ========================================================================= */
test_yCbCrToRGB_16s16s_P3P3_func(void)161 static BOOL test_yCbCrToRGB_16s16s_P3P3_func(void)
162 {
163 	pstatus_t status;
164 	INT16 ALIGN(y[4096]), ALIGN(cb[4096]), ALIGN(cr[4096]);
165 	INT16 ALIGN(r1[4096]), ALIGN(g1[4096]), ALIGN(b1[4096]);
166 	INT16 ALIGN(r2[4096]), ALIGN(g2[4096]), ALIGN(b2[4096]);
167 	int i;
168 	const INT16* in[3];
169 	INT16* out1[3];
170 	INT16* out2[3];
171 	prim_size_t roi = { 64, 64 };
172 	winpr_RAND((BYTE*)y, sizeof(y));
173 	winpr_RAND((BYTE*)cb, sizeof(cb));
174 	winpr_RAND((BYTE*)cr, sizeof(cr));
175 
176 	/* Normalize to 11.5 fixed radix */
177 	for (i = 0; i < 4096; ++i)
178 	{
179 		y[i] &= 0x1FE0U;
180 		cb[i] &= 0x1FE0U;
181 		cr[i] &= 0x1FE0U;
182 	}
183 
184 	memset(r1, 0, sizeof(r1));
185 	memset(g1, 0, sizeof(g1));
186 	memset(b1, 0, sizeof(b1));
187 	memset(r2, 0, sizeof(r2));
188 	memset(g2, 0, sizeof(g2));
189 	memset(b2, 0, sizeof(b2));
190 	in[0] = y;
191 	in[1] = cb;
192 	in[2] = cr;
193 	out1[0] = r1;
194 	out1[1] = g1;
195 	out1[2] = b1;
196 	out2[0] = r2;
197 	out2[1] = g2;
198 	out2[2] = b2;
199 	status = generic->yCbCrToRGB_16s16s_P3P3(in, 64 * 2, out1, 64 * 2, &roi);
200 
201 	if (status != PRIMITIVES_SUCCESS)
202 		return FALSE;
203 
204 	status = optimized->yCbCrToRGB_16s16s_P3P3(in, 64 * 2, out2, 64 * 2, &roi);
205 
206 	if (status != PRIMITIVES_SUCCESS)
207 		return FALSE;
208 
209 	for (i = 0; i < 4096; ++i)
210 	{
211 		if ((ABS(r1[i] - r2[i]) > 1) || (ABS(g1[i] - g2[i]) > 1) || (ABS(b1[i] - b2[i]) > 1))
212 		{
213 			printf("YCbCrToRGB-SSE FAIL[%d]: %" PRId16 ",%" PRId16 ",%" PRId16 " vs %" PRId16
214 			       ",%" PRId16 ",%" PRId16 "\n",
215 			       i, r1[i], g1[i], b1[i], r2[i], g2[i], b2[i]);
216 			return FALSE;
217 		}
218 	}
219 
220 	return TRUE;
221 }
222 
223 /* ------------------------------------------------------------------------- */
test_yCbCrToRGB_16s16s_P3P3_speed(void)224 static int test_yCbCrToRGB_16s16s_P3P3_speed(void)
225 {
226 	prim_size_t roi = { 64, 64 };
227 	INT16 ALIGN(y[4096]), ALIGN(cb[4096]), ALIGN(cr[4096]);
228 	INT16 ALIGN(r[4096]), ALIGN(g[4096]), ALIGN(b[4096]);
229 	int i;
230 	const INT16* input[3];
231 	INT16* output[3];
232 	winpr_RAND((BYTE*)y, sizeof(y));
233 	winpr_RAND((BYTE*)cb, sizeof(cb));
234 	winpr_RAND((BYTE*)cr, sizeof(cr));
235 
236 	/* Normalize to 11.5 fixed radix */
237 	for (i = 0; i < 4096; ++i)
238 	{
239 		y[i] &= 0x1FE0U;
240 		cb[i] &= 0x1FE0U;
241 		cr[i] &= 0x1FE0U;
242 	}
243 
244 	input[0] = y;
245 	input[1] = cb;
246 	input[2] = cr;
247 	output[0] = r;
248 	output[1] = g;
249 	output[2] = b;
250 
251 	if (!speed_test("yCbCrToRGB_16s16s_P3P3", "aligned", g_Iterations,
252 	                (speed_test_fkt)generic->yCbCrToRGB_16s16s_P3P3,
253 	                (speed_test_fkt)optimized->yCbCrToRGB_16s16s_P3P3, input, 64 * 2, output,
254 	                64 * 2, &roi))
255 		return FALSE;
256 
257 	return TRUE;
258 }
259 
TestPrimitivesColors(int argc,char * argv[])260 int TestPrimitivesColors(int argc, char* argv[])
261 {
262 	const DWORD formats[] = { PIXEL_FORMAT_ARGB32, PIXEL_FORMAT_XRGB32, PIXEL_FORMAT_ABGR32,
263 		                      PIXEL_FORMAT_XBGR32, PIXEL_FORMAT_RGBA32, PIXEL_FORMAT_RGBX32,
264 		                      PIXEL_FORMAT_BGRA32, PIXEL_FORMAT_BGRX32 };
265 	DWORD x;
266 	prim_size_t roi = { 1920, 1080 };
267 	WINPR_UNUSED(argc);
268 	WINPR_UNUSED(argv);
269 	prim_test_setup(FALSE);
270 
271 	for (x = 0; x < sizeof(formats) / sizeof(formats[0]); x++)
272 	{
273 		if (!test_RGBToRGB_16s8u_P3AC4R_func(roi, formats[x]))
274 			return 1;
275 
276 #if 0
277 
278 		if (g_TestPrimitivesPerformance)
279 		{
280 			if (!test_RGBToRGB_16s8u_P3AC4R_speed())
281 				return 1;
282 		}
283 
284 		if (!test_yCbCrToRGB_16s16s_P3P3_func())
285 			return 1;
286 
287 		if (g_TestPrimitivesPerformance)
288 		{
289 			if (!test_yCbCrToRGB_16s16s_P3P3_speed())
290 				return 1;
291 		}
292 
293 #endif
294 	}
295 
296 	return 0;
297 }
298