1 /* primitives.c
2 * This code queries processor features and calls the init/deinit routines.
3 * vi:ts=4 sw=4
4 *
5 * Copyright 2011 Martin Fleisz <martin.fleisz@thincast.com>
6 * (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
7 * Copyright 2019 David Fort <contact@hardening-consulting.com>
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License"); you may
10 * not use this file except in compliance with the License. You may obtain
11 * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
15 * or implied. See the License for the specific language governing
16 * permissions and limitations under the License.
17 */
18
19 #ifdef HAVE_CONFIG_H
20 #include "config.h"
21 #endif
22
23 #include <string.h>
24 #include <stdlib.h>
25
26 #include <winpr/synch.h>
27 #include <winpr/sysinfo.h>
28 #include <winpr/crypto.h>
29 #include <freerdp/primitives.h>
30
31 #include "prim_internal.h"
32
33 #define TAG FREERDP_TAG("primitives")
34
35 /* hints to know which kind of primitives to use */
36 static primitive_hints primitivesHints = PRIMITIVES_AUTODETECT;
37 static BOOL primitives_init_optimized(primitives_t* prims);
38
primitives_set_hints(primitive_hints hints)39 void primitives_set_hints(primitive_hints hints)
40 {
41 primitivesHints = hints;
42 }
43
primitives_get_hints(void)44 primitive_hints primitives_get_hints(void)
45 {
46 return primitivesHints;
47 }
48
49 /* Singleton pointer used throughout the program when requested. */
50 static primitives_t pPrimitivesGeneric = { 0 };
51 static INIT_ONCE generic_primitives_InitOnce = INIT_ONCE_STATIC_INIT;
52
53 #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
54 static primitives_t pPrimitivesCpu = { 0 };
55 static INIT_ONCE cpu_primitives_InitOnce = INIT_ONCE_STATIC_INIT;
56
57 #endif
58 #if defined(WITH_OPENCL)
59 static primitives_t pPrimitivesGpu = { 0 };
60 static INIT_ONCE gpu_primitives_InitOnce = INIT_ONCE_STATIC_INIT;
61
62 #endif
63
64 static INIT_ONCE auto_primitives_InitOnce = INIT_ONCE_STATIC_INIT;
65
66 static primitives_t pPrimitives = { 0 };
67
68 /* ------------------------------------------------------------------------- */
primitives_init_generic(primitives_t * prims)69 static BOOL primitives_init_generic(primitives_t* prims)
70 {
71 primitives_init_add(prims);
72 primitives_init_andor(prims);
73 primitives_init_alphaComp(prims);
74 primitives_init_copy(prims);
75 primitives_init_set(prims);
76 primitives_init_shift(prims);
77 primitives_init_sign(prims);
78 primitives_init_colors(prims);
79 primitives_init_YCoCg(prims);
80 primitives_init_YUV(prims);
81 prims->uninit = NULL;
82 return TRUE;
83 }
84
primitives_init_generic_cb(PINIT_ONCE once,PVOID param,PVOID * context)85 static BOOL CALLBACK primitives_init_generic_cb(PINIT_ONCE once, PVOID param, PVOID* context)
86 {
87 WINPR_UNUSED(once);
88 WINPR_UNUSED(param);
89 WINPR_UNUSED(context);
90 return primitives_init_generic(&pPrimitivesGeneric);
91 }
92
primitives_init_optimized(primitives_t * prims)93 static BOOL primitives_init_optimized(primitives_t* prims)
94 {
95 primitives_init_generic(prims);
96
97 #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
98 primitives_init_add_opt(prims);
99 primitives_init_andor_opt(prims);
100 primitives_init_alphaComp_opt(prims);
101 primitives_init_copy_opt(prims);
102 primitives_init_set_opt(prims);
103 primitives_init_shift_opt(prims);
104 primitives_init_sign_opt(prims);
105 primitives_init_colors_opt(prims);
106 primitives_init_YCoCg_opt(prims);
107 primitives_init_YUV_opt(prims);
108 prims->flags |= PRIM_FLAGS_HAVE_EXTCPU;
109 #endif
110 return TRUE;
111 }
112
113 typedef struct
114 {
115 BYTE* channels[3];
116 UINT32 steps[3];
117 prim_size_t roi;
118 BYTE* outputBuffer;
119 UINT32 outputStride;
120 UINT32 testedFormat;
121 } primitives_YUV_benchmark;
122
primitives_YUV_benchmark_free(primitives_YUV_benchmark * bench)123 static void primitives_YUV_benchmark_free(primitives_YUV_benchmark* bench)
124 {
125 int i;
126 if (!bench)
127 return;
128
129 free(bench->outputBuffer);
130
131 for (i = 0; i < 3; i++)
132 free(bench->channels[i]);
133 memset(bench, 0, sizeof(primitives_YUV_benchmark));
134 }
135
primitives_YUV_benchmark_init(primitives_YUV_benchmark * ret)136 static primitives_YUV_benchmark* primitives_YUV_benchmark_init(primitives_YUV_benchmark* ret)
137 {
138 int i;
139 prim_size_t* roi;
140 if (!ret)
141 return NULL;
142
143 memset(ret, 0, sizeof(primitives_YUV_benchmark));
144 roi = &ret->roi;
145 roi->width = 1024;
146 roi->height = 768;
147 ret->outputStride = roi->width * 4;
148 ret->testedFormat = PIXEL_FORMAT_BGRA32;
149
150 ret->outputBuffer = calloc(ret->outputStride, roi->height);
151 if (!ret->outputBuffer)
152 goto fail;
153
154 for (i = 0; i < 3; i++)
155 {
156 BYTE* buf = ret->channels[i] = calloc(roi->width, roi->height);
157 if (!buf)
158 goto fail;
159
160 winpr_RAND(buf, roi->width * roi->height * 1ULL);
161 ret->steps[i] = roi->width;
162 }
163
164 return ret;
165
166 fail:
167 primitives_YUV_benchmark_free(ret);
168 return ret;
169 }
170
primitives_YUV_benchmark_run(primitives_YUV_benchmark * bench,primitives_t * prims,UINT64 runTime,UINT32 * computations)171 static BOOL primitives_YUV_benchmark_run(primitives_YUV_benchmark* bench, primitives_t* prims,
172 UINT64 runTime, UINT32* computations)
173 {
174 ULONGLONG dueDate;
175 const BYTE* channels[3] = { 0 };
176 size_t i;
177 pstatus_t status;
178
179 *computations = 0;
180
181 for (i = 0; i < 3; i++)
182 channels[i] = bench->channels[i];
183
184 /* do a first dry run to initialize cache and such */
185 status = prims->YUV420ToRGB_8u_P3AC4R(channels, bench->steps, bench->outputBuffer,
186 bench->outputStride, bench->testedFormat, &bench->roi);
187 if (status != PRIMITIVES_SUCCESS)
188 return FALSE;
189
190 /* let's run the benchmark */
191 dueDate = GetTickCount64() + runTime;
192 while (GetTickCount64() < dueDate)
193 {
194 pstatus_t status =
195 prims->YUV420ToRGB_8u_P3AC4R(channels, bench->steps, bench->outputBuffer,
196 bench->outputStride, bench->testedFormat, &bench->roi);
197 if (status != PRIMITIVES_SUCCESS)
198 return FALSE;
199 *computations = *computations + 1;
200 }
201 return TRUE;
202 }
203
primitives_autodetect_best(primitives_t * prims)204 static BOOL primitives_autodetect_best(primitives_t* prims)
205 {
206 size_t x;
207 BOOL ret = FALSE;
208 UINT64 benchDuration = 150; /* 150 ms */
209 struct prim_benchmark
210 {
211 const char* name;
212 primitives_t* prims;
213 UINT32 flags;
214 UINT32 count;
215 };
216
217 struct prim_benchmark testcases[] =
218 {
219 { "generic", NULL, PRIMITIVES_PURE_SOFT, 0 },
220 #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
221 { "optimized", NULL, PRIMITIVES_ONLY_CPU, 0 },
222 #endif
223 #if defined(WITH_OPENCL)
224 { "opencl", NULL, PRIMITIVES_ONLY_GPU, 0 },
225 #endif
226 };
227 const struct prim_benchmark* best = NULL;
228
229 primitives_YUV_benchmark bench;
230 primitives_YUV_benchmark* yuvBench = primitives_YUV_benchmark_init(&bench);
231 if (!yuvBench)
232 return FALSE;
233
234 WLog_DBG(TAG, "primitives benchmark result:");
235 for (x = 0; x < ARRAYSIZE(testcases); x++)
236 {
237 struct prim_benchmark* cur = &testcases[x];
238 cur->prims = primitives_get_by_type(cur->flags);
239 if (!cur->prims)
240 {
241 WLog_WARN(TAG, "Failed to initialize %s primitives", cur->name);
242 continue;
243 }
244 if (!primitives_YUV_benchmark_run(yuvBench, cur->prims, benchDuration, &cur->count))
245 {
246 WLog_WARN(TAG, "error running %s YUV bench", cur->name);
247 continue;
248 }
249
250 WLog_DBG(TAG, " * %s= %" PRIu32, cur->name, cur->count);
251 if (!best || (best->count < cur->count))
252 best = cur;
253 }
254
255 if (!best)
256 {
257 WLog_ERR(TAG, "No primitives to test, aborting.");
258 goto out;
259 }
260 /* finally compute the results */
261 *prims = *best->prims;
262
263 WLog_INFO(TAG, "primitives autodetect, using %s", best->name);
264 ret = TRUE;
265 out:
266 if (!ret)
267 *prims = pPrimitivesGeneric;
268 primitives_YUV_benchmark_free(yuvBench);
269 return ret;
270 }
271
272 #if defined(WITH_OPENCL)
primitives_init_gpu_cb(PINIT_ONCE once,PVOID param,PVOID * context)273 static BOOL CALLBACK primitives_init_gpu_cb(PINIT_ONCE once, PVOID param, PVOID* context)
274 {
275 WINPR_UNUSED(once);
276 WINPR_UNUSED(param);
277 WINPR_UNUSED(context);
278
279 if (!primitives_init_opencl(&pPrimitivesGpu))
280 return FALSE;
281
282 return TRUE;
283 }
284 #endif
285
286 #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
primitives_init_cpu_cb(PINIT_ONCE once,PVOID param,PVOID * context)287 static BOOL CALLBACK primitives_init_cpu_cb(PINIT_ONCE once, PVOID param, PVOID* context)
288 {
289 WINPR_UNUSED(once);
290 WINPR_UNUSED(param);
291 WINPR_UNUSED(context);
292
293 if (!primitives_init_optimized(&pPrimitivesCpu))
294 return FALSE;
295
296 return TRUE;
297 }
298 #endif
299
primitives_auto_init_cb(PINIT_ONCE once,PVOID param,PVOID * context)300 static BOOL CALLBACK primitives_auto_init_cb(PINIT_ONCE once, PVOID param, PVOID* context)
301 {
302 WINPR_UNUSED(once);
303 WINPR_UNUSED(param);
304 WINPR_UNUSED(context);
305
306 return primitives_init(&pPrimitives, primitivesHints);
307 }
308
primitives_init(primitives_t * p,primitive_hints hints)309 BOOL primitives_init(primitives_t* p, primitive_hints hints)
310 {
311 switch (hints)
312 {
313 case PRIMITIVES_AUTODETECT:
314 return primitives_autodetect_best(p);
315 case PRIMITIVES_PURE_SOFT:
316 *p = pPrimitivesGeneric;
317 return TRUE;
318 case PRIMITIVES_ONLY_CPU:
319 #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
320 *p = pPrimitivesCpu;
321 return TRUE;
322 #endif
323 case PRIMITIVES_ONLY_GPU:
324 #if defined(WITH_OPENCL)
325 *p = pPrimitivesGpu;
326 return TRUE;
327 #endif
328 default:
329 WLog_ERR(TAG, "unknown hint %d", hints);
330 return FALSE;
331 }
332 }
333
primitives_uninit(void)334 void primitives_uninit(void)
335 {
336 #if defined(WITH_OPENCL)
337 if (pPrimitivesGpu.uninit)
338 pPrimitivesGpu.uninit();
339 #endif
340 #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
341 if (pPrimitivesCpu.uninit)
342 pPrimitivesCpu.uninit();
343 #endif
344 if (pPrimitivesGeneric.uninit)
345 pPrimitivesGeneric.uninit();
346 }
347
348 /* ------------------------------------------------------------------------- */
setup(void)349 static void setup(void)
350 {
351 InitOnceExecuteOnce(&generic_primitives_InitOnce, primitives_init_generic_cb, NULL, NULL);
352 #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
353 InitOnceExecuteOnce(&cpu_primitives_InitOnce, primitives_init_cpu_cb, NULL, NULL);
354 #endif
355 #if defined(WITH_OPENCL)
356 InitOnceExecuteOnce(&gpu_primitives_InitOnce, primitives_init_gpu_cb, NULL, NULL);
357 #endif
358 InitOnceExecuteOnce(&auto_primitives_InitOnce, primitives_auto_init_cb, NULL, NULL);
359 }
360
primitives_get(void)361 primitives_t* primitives_get(void)
362 {
363 setup();
364 return &pPrimitives;
365 }
366
primitives_get_generic(void)367 primitives_t* primitives_get_generic(void)
368 {
369 InitOnceExecuteOnce(&generic_primitives_InitOnce, primitives_init_generic_cb, NULL, NULL);
370 return &pPrimitivesGeneric;
371 }
372
primitives_get_by_type(DWORD type)373 primitives_t* primitives_get_by_type(DWORD type)
374 {
375 InitOnceExecuteOnce(&generic_primitives_InitOnce, primitives_init_generic_cb, NULL, NULL);
376
377 switch (type)
378 {
379 case PRIMITIVES_ONLY_GPU:
380 #if defined(WITH_OPENCL)
381 if (!InitOnceExecuteOnce(&gpu_primitives_InitOnce, primitives_init_gpu_cb, NULL, NULL))
382 return NULL;
383 return &pPrimitivesGpu;
384 #endif
385 case PRIMITIVES_ONLY_CPU:
386 #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
387 if (!InitOnceExecuteOnce(&cpu_primitives_InitOnce, primitives_init_cpu_cb, NULL, NULL))
388 return NULL;
389 return &pPrimitivesCpu;
390 #endif
391 case PRIMITIVES_PURE_SOFT:
392 default:
393 return &pPrimitivesGeneric;
394 }
395 }
396
primitives_flags(primitives_t * p)397 DWORD primitives_flags(primitives_t* p)
398 {
399 return p->flags;
400 }
401