1 /* primitives.c
2  * This code queries processor features and calls the init/deinit routines.
3  * vi:ts=4 sw=4
4  *
5  * Copyright 2011 Martin Fleisz <martin.fleisz@thincast.com>
6  * (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
7  * Copyright 2019 David Fort <contact@hardening-consulting.com>
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License"); you may
10  * not use this file except in compliance with the License. You may obtain
11  * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
15  * or implied. See the License for the specific language governing
16  * permissions and limitations under the License.
17  */
18 
19 #ifdef HAVE_CONFIG_H
20 #include "config.h"
21 #endif
22 
23 #include <string.h>
24 #include <stdlib.h>
25 
26 #include <winpr/synch.h>
27 #include <winpr/sysinfo.h>
28 #include <winpr/crypto.h>
29 #include <freerdp/primitives.h>
30 
31 #include "prim_internal.h"
32 
33 #define TAG FREERDP_TAG("primitives")
34 
35 /* hints to know which kind of primitives to use */
36 static primitive_hints primitivesHints = PRIMITIVES_AUTODETECT;
37 static BOOL primitives_init_optimized(primitives_t* prims);
38 
primitives_set_hints(primitive_hints hints)39 void primitives_set_hints(primitive_hints hints)
40 {
41 	primitivesHints = hints;
42 }
43 
primitives_get_hints(void)44 primitive_hints primitives_get_hints(void)
45 {
46 	return primitivesHints;
47 }
48 
49 /* Singleton pointer used throughout the program when requested. */
50 static primitives_t pPrimitivesGeneric = { 0 };
51 static INIT_ONCE generic_primitives_InitOnce = INIT_ONCE_STATIC_INIT;
52 
53 #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
54 static primitives_t pPrimitivesCpu = { 0 };
55 static INIT_ONCE cpu_primitives_InitOnce = INIT_ONCE_STATIC_INIT;
56 
57 #endif
58 #if defined(WITH_OPENCL)
59 static primitives_t pPrimitivesGpu = { 0 };
60 static INIT_ONCE gpu_primitives_InitOnce = INIT_ONCE_STATIC_INIT;
61 
62 #endif
63 
64 static INIT_ONCE auto_primitives_InitOnce = INIT_ONCE_STATIC_INIT;
65 
66 static primitives_t pPrimitives = { 0 };
67 
68 /* ------------------------------------------------------------------------- */
primitives_init_generic(primitives_t * prims)69 static BOOL primitives_init_generic(primitives_t* prims)
70 {
71 	primitives_init_add(prims);
72 	primitives_init_andor(prims);
73 	primitives_init_alphaComp(prims);
74 	primitives_init_copy(prims);
75 	primitives_init_set(prims);
76 	primitives_init_shift(prims);
77 	primitives_init_sign(prims);
78 	primitives_init_colors(prims);
79 	primitives_init_YCoCg(prims);
80 	primitives_init_YUV(prims);
81 	prims->uninit = NULL;
82 	return TRUE;
83 }
84 
primitives_init_generic_cb(PINIT_ONCE once,PVOID param,PVOID * context)85 static BOOL CALLBACK primitives_init_generic_cb(PINIT_ONCE once, PVOID param, PVOID* context)
86 {
87 	WINPR_UNUSED(once);
88 	WINPR_UNUSED(param);
89 	WINPR_UNUSED(context);
90 	return primitives_init_generic(&pPrimitivesGeneric);
91 }
92 
primitives_init_optimized(primitives_t * prims)93 static BOOL primitives_init_optimized(primitives_t* prims)
94 {
95 	primitives_init_generic(prims);
96 
97 #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
98 	primitives_init_add_opt(prims);
99 	primitives_init_andor_opt(prims);
100 	primitives_init_alphaComp_opt(prims);
101 	primitives_init_copy_opt(prims);
102 	primitives_init_set_opt(prims);
103 	primitives_init_shift_opt(prims);
104 	primitives_init_sign_opt(prims);
105 	primitives_init_colors_opt(prims);
106 	primitives_init_YCoCg_opt(prims);
107 	primitives_init_YUV_opt(prims);
108 	prims->flags |= PRIM_FLAGS_HAVE_EXTCPU;
109 #endif
110 	return TRUE;
111 }
112 
113 typedef struct
114 {
115 	BYTE* channels[3];
116 	UINT32 steps[3];
117 	prim_size_t roi;
118 	BYTE* outputBuffer;
119 	UINT32 outputStride;
120 	UINT32 testedFormat;
121 } primitives_YUV_benchmark;
122 
primitives_YUV_benchmark_free(primitives_YUV_benchmark * bench)123 static void primitives_YUV_benchmark_free(primitives_YUV_benchmark* bench)
124 {
125 	int i;
126 	if (!bench)
127 		return;
128 
129 	free(bench->outputBuffer);
130 
131 	for (i = 0; i < 3; i++)
132 		free(bench->channels[i]);
133 	memset(bench, 0, sizeof(primitives_YUV_benchmark));
134 }
135 
primitives_YUV_benchmark_init(primitives_YUV_benchmark * ret)136 static primitives_YUV_benchmark* primitives_YUV_benchmark_init(primitives_YUV_benchmark* ret)
137 {
138 	int i;
139 	prim_size_t* roi;
140 	if (!ret)
141 		return NULL;
142 
143 	memset(ret, 0, sizeof(primitives_YUV_benchmark));
144 	roi = &ret->roi;
145 	roi->width = 1024;
146 	roi->height = 768;
147 	ret->outputStride = roi->width * 4;
148 	ret->testedFormat = PIXEL_FORMAT_BGRA32;
149 
150 	ret->outputBuffer = calloc(ret->outputStride, roi->height);
151 	if (!ret->outputBuffer)
152 		goto fail;
153 
154 	for (i = 0; i < 3; i++)
155 	{
156 		BYTE* buf = ret->channels[i] = calloc(roi->width, roi->height);
157 		if (!buf)
158 			goto fail;
159 
160 		winpr_RAND(buf, roi->width * roi->height * 1ULL);
161 		ret->steps[i] = roi->width;
162 	}
163 
164 	return ret;
165 
166 fail:
167 	primitives_YUV_benchmark_free(ret);
168 	return ret;
169 }
170 
primitives_YUV_benchmark_run(primitives_YUV_benchmark * bench,primitives_t * prims,UINT64 runTime,UINT32 * computations)171 static BOOL primitives_YUV_benchmark_run(primitives_YUV_benchmark* bench, primitives_t* prims,
172                                          UINT64 runTime, UINT32* computations)
173 {
174 	ULONGLONG dueDate;
175 	const BYTE* channels[3] = { 0 };
176 	size_t i;
177 	pstatus_t status;
178 
179 	*computations = 0;
180 
181 	for (i = 0; i < 3; i++)
182 		channels[i] = bench->channels[i];
183 
184 	/* do a first dry run to initialize cache and such */
185 	status = prims->YUV420ToRGB_8u_P3AC4R(channels, bench->steps, bench->outputBuffer,
186 	                                      bench->outputStride, bench->testedFormat, &bench->roi);
187 	if (status != PRIMITIVES_SUCCESS)
188 		return FALSE;
189 
190 	/* let's run the benchmark */
191 	dueDate = GetTickCount64() + runTime;
192 	while (GetTickCount64() < dueDate)
193 	{
194 		pstatus_t status =
195 		    prims->YUV420ToRGB_8u_P3AC4R(channels, bench->steps, bench->outputBuffer,
196 		                                 bench->outputStride, bench->testedFormat, &bench->roi);
197 		if (status != PRIMITIVES_SUCCESS)
198 			return FALSE;
199 		*computations = *computations + 1;
200 	}
201 	return TRUE;
202 }
203 
primitives_autodetect_best(primitives_t * prims)204 static BOOL primitives_autodetect_best(primitives_t* prims)
205 {
206 	size_t x;
207 	BOOL ret = FALSE;
208 	UINT64 benchDuration = 150; /* 150 ms */
209 	struct prim_benchmark
210 	{
211 		const char* name;
212 		primitives_t* prims;
213 		UINT32 flags;
214 		UINT32 count;
215 	};
216 
217 	struct prim_benchmark testcases[] =
218 	{
219 		{ "generic", NULL, PRIMITIVES_PURE_SOFT, 0 },
220 #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
221 		{ "optimized", NULL, PRIMITIVES_ONLY_CPU, 0 },
222 #endif
223 #if defined(WITH_OPENCL)
224 		{ "opencl", NULL, PRIMITIVES_ONLY_GPU, 0 },
225 #endif
226 	};
227 	const struct prim_benchmark* best = NULL;
228 
229 	primitives_YUV_benchmark bench;
230 	primitives_YUV_benchmark* yuvBench = primitives_YUV_benchmark_init(&bench);
231 	if (!yuvBench)
232 		return FALSE;
233 
234 	WLog_DBG(TAG, "primitives benchmark result:");
235 	for (x = 0; x < ARRAYSIZE(testcases); x++)
236 	{
237 		struct prim_benchmark* cur = &testcases[x];
238 		cur->prims = primitives_get_by_type(cur->flags);
239 		if (!cur->prims)
240 		{
241 			WLog_WARN(TAG, "Failed to initialize %s primitives", cur->name);
242 			continue;
243 		}
244 		if (!primitives_YUV_benchmark_run(yuvBench, cur->prims, benchDuration, &cur->count))
245 		{
246 			WLog_WARN(TAG, "error running %s YUV bench", cur->name);
247 			continue;
248 		}
249 
250 		WLog_DBG(TAG, " * %s= %" PRIu32, cur->name, cur->count);
251 		if (!best || (best->count < cur->count))
252 			best = cur;
253 	}
254 
255 	if (!best)
256 	{
257 		WLog_ERR(TAG, "No primitives to test, aborting.");
258 		goto out;
259 	}
260 	/* finally compute the results */
261 	*prims = *best->prims;
262 
263 	WLog_INFO(TAG, "primitives autodetect, using %s", best->name);
264 	ret = TRUE;
265 out:
266 	if (!ret)
267 		*prims = pPrimitivesGeneric;
268 	primitives_YUV_benchmark_free(yuvBench);
269 	return ret;
270 }
271 
272 #if defined(WITH_OPENCL)
primitives_init_gpu_cb(PINIT_ONCE once,PVOID param,PVOID * context)273 static BOOL CALLBACK primitives_init_gpu_cb(PINIT_ONCE once, PVOID param, PVOID* context)
274 {
275 	WINPR_UNUSED(once);
276 	WINPR_UNUSED(param);
277 	WINPR_UNUSED(context);
278 
279 	if (!primitives_init_opencl(&pPrimitivesGpu))
280 		return FALSE;
281 
282 	return TRUE;
283 }
284 #endif
285 
286 #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
primitives_init_cpu_cb(PINIT_ONCE once,PVOID param,PVOID * context)287 static BOOL CALLBACK primitives_init_cpu_cb(PINIT_ONCE once, PVOID param, PVOID* context)
288 {
289 	WINPR_UNUSED(once);
290 	WINPR_UNUSED(param);
291 	WINPR_UNUSED(context);
292 
293 	if (!primitives_init_optimized(&pPrimitivesCpu))
294 		return FALSE;
295 
296 	return TRUE;
297 }
298 #endif
299 
primitives_auto_init_cb(PINIT_ONCE once,PVOID param,PVOID * context)300 static BOOL CALLBACK primitives_auto_init_cb(PINIT_ONCE once, PVOID param, PVOID* context)
301 {
302 	WINPR_UNUSED(once);
303 	WINPR_UNUSED(param);
304 	WINPR_UNUSED(context);
305 
306 	return primitives_init(&pPrimitives, primitivesHints);
307 }
308 
primitives_init(primitives_t * p,primitive_hints hints)309 BOOL primitives_init(primitives_t* p, primitive_hints hints)
310 {
311 	switch (hints)
312 	{
313 		case PRIMITIVES_AUTODETECT:
314 			return primitives_autodetect_best(p);
315 		case PRIMITIVES_PURE_SOFT:
316 			*p = pPrimitivesGeneric;
317 			return TRUE;
318 		case PRIMITIVES_ONLY_CPU:
319 #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
320 			*p = pPrimitivesCpu;
321 			return TRUE;
322 #endif
323 		case PRIMITIVES_ONLY_GPU:
324 #if defined(WITH_OPENCL)
325 			*p = pPrimitivesGpu;
326 			return TRUE;
327 #endif
328 		default:
329 			WLog_ERR(TAG, "unknown hint %d", hints);
330 			return FALSE;
331 	}
332 }
333 
primitives_uninit(void)334 void primitives_uninit(void)
335 {
336 #if defined(WITH_OPENCL)
337 	if (pPrimitivesGpu.uninit)
338 		pPrimitivesGpu.uninit();
339 #endif
340 #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
341 	if (pPrimitivesCpu.uninit)
342 		pPrimitivesCpu.uninit();
343 #endif
344 	if (pPrimitivesGeneric.uninit)
345 		pPrimitivesGeneric.uninit();
346 }
347 
348 /* ------------------------------------------------------------------------- */
setup(void)349 static void setup(void)
350 {
351 	InitOnceExecuteOnce(&generic_primitives_InitOnce, primitives_init_generic_cb, NULL, NULL);
352 #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
353 	InitOnceExecuteOnce(&cpu_primitives_InitOnce, primitives_init_cpu_cb, NULL, NULL);
354 #endif
355 #if defined(WITH_OPENCL)
356 	InitOnceExecuteOnce(&gpu_primitives_InitOnce, primitives_init_gpu_cb, NULL, NULL);
357 #endif
358 	InitOnceExecuteOnce(&auto_primitives_InitOnce, primitives_auto_init_cb, NULL, NULL);
359 }
360 
primitives_get(void)361 primitives_t* primitives_get(void)
362 {
363 	setup();
364 	return &pPrimitives;
365 }
366 
primitives_get_generic(void)367 primitives_t* primitives_get_generic(void)
368 {
369 	InitOnceExecuteOnce(&generic_primitives_InitOnce, primitives_init_generic_cb, NULL, NULL);
370 	return &pPrimitivesGeneric;
371 }
372 
primitives_get_by_type(DWORD type)373 primitives_t* primitives_get_by_type(DWORD type)
374 {
375 	InitOnceExecuteOnce(&generic_primitives_InitOnce, primitives_init_generic_cb, NULL, NULL);
376 
377 	switch (type)
378 	{
379 		case PRIMITIVES_ONLY_GPU:
380 #if defined(WITH_OPENCL)
381 			if (!InitOnceExecuteOnce(&gpu_primitives_InitOnce, primitives_init_gpu_cb, NULL, NULL))
382 				return NULL;
383 			return &pPrimitivesGpu;
384 #endif
385 		case PRIMITIVES_ONLY_CPU:
386 #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
387 			if (!InitOnceExecuteOnce(&cpu_primitives_InitOnce, primitives_init_cpu_cb, NULL, NULL))
388 				return NULL;
389 			return &pPrimitivesCpu;
390 #endif
391 		case PRIMITIVES_PURE_SOFT:
392 		default:
393 			return &pPrimitivesGeneric;
394 	}
395 }
396 
primitives_flags(primitives_t * p)397 DWORD primitives_flags(primitives_t* p)
398 {
399 	return p->flags;
400 }
401