1 /*****************************************************************************
2 * This file is part of Kvazaar HEVC encoder.
3 *
4 * Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without modification,
8 * are permitted provided that the following conditions are met:
9 *
10 * * Redistributions of source code must retain the above copyright notice, this
11 * list of conditions and the following disclaimer.
12 *
13 * * Redistributions in binary form must reproduce the above copyright notice, this
14 * list of conditions and the following disclaimer in the documentation and/or
15 * other materials provided with the distribution.
16 *
17 * * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
18 * contributors may be used to endorse or promote products derived from
19 * this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
25 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 * INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
28 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
31 ****************************************************************************/
32
33 #include "strategyselector.h"
34
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38
39 #ifdef _WIN32
40 #include <windows.h>
41 #else
42 #include <unistd.h>
43 #endif
44
45 hardware_flags_t kvz_g_hardware_flags;
46 hardware_flags_t kvz_g_strategies_in_use;
47 hardware_flags_t kvz_g_strategies_available;
48
49 static void set_hardware_flags(int32_t cpuid);
50 static void* strategyselector_choose_for(const strategy_list_t * const strategies, const char * const strategy_type);
51
52 //Strategies to include (add new file here)
53
54 //Returns 1 if successful
kvz_strategyselector_init(int32_t cpuid,uint8_t bitdepth)55 int kvz_strategyselector_init(int32_t cpuid, uint8_t bitdepth) {
56 const strategy_to_select_t *cur_strategy_to_select = strategies_to_select;
57 strategy_list_t strategies;
58
59 strategies.allocated = 0;
60 strategies.count = 0;
61 strategies.strategies = NULL;
62
63 set_hardware_flags(cpuid);
64
65 //Add new register function here
66 if (!kvz_strategy_register_picture(&strategies, bitdepth)) {
67 fprintf(stderr, "kvz_strategy_register_picture failed!\n");
68 return 0;
69 }
70
71 if (!kvz_strategy_register_nal(&strategies, bitdepth)) {
72 fprintf(stderr, "kvz_strategy_register_nal failed!\n");
73 return 0;
74 }
75
76 if (!kvz_strategy_register_dct(&strategies, bitdepth)) {
77 fprintf(stderr, "kvz_strategy_register_dct failed!\n");
78 return 0;
79 }
80
81 if (!kvz_strategy_register_ipol(&strategies, bitdepth)) {
82 fprintf(stderr, "kvz_strategy_register_ipol failed!\n");
83 return 0;
84 }
85
86 if (!kvz_strategy_register_quant(&strategies, bitdepth)) {
87 fprintf(stderr, "kvz_strategy_register_quant failed!\n");
88 return 0;
89 }
90
91 if (!kvz_strategy_register_intra(&strategies, bitdepth)) {
92 fprintf(stderr, "kvz_strategy_register_intra failed!\n");
93 return 0;
94 }
95
96 if (!kvz_strategy_register_sao(&strategies, bitdepth)) {
97 fprintf(stderr, "kvz_strategy_register_sao failed!\n");
98 return 0;
99 }
100
101 if (!kvz_strategy_register_encode(&strategies, bitdepth)) {
102 fprintf(stderr, "kvz_strategy_register_encode failed!\n");
103 return 0;
104 }
105
106 while(cur_strategy_to_select->fptr) {
107 *(cur_strategy_to_select->fptr) = strategyselector_choose_for(&strategies, cur_strategy_to_select->strategy_type);
108
109 if (!(*(cur_strategy_to_select->fptr))) {
110 fprintf(stderr, "Could not find a strategy for %s!\n", cur_strategy_to_select->strategy_type);
111 return 0;
112 }
113 ++cur_strategy_to_select;
114 }
115
116 //We can free the structure now, as all strategies are statically set to pointers
117 if (strategies.allocated) {
118 //Also check what optimizations are available and what are in use
119 //SIMD optimizations available
120 bool strategies_available = false;
121 fprintf(stderr, "Available: ");
122 if (kvz_g_strategies_available.intel_flags.avx != 0){
123 fprintf(stderr, "avx(%d) ", kvz_g_strategies_available.intel_flags.avx);
124 strategies_available = true;
125 }
126 if (kvz_g_strategies_available.intel_flags.avx2 != 0){
127 fprintf(stderr, "avx2(%d) ", kvz_g_strategies_available.intel_flags.avx2);
128 strategies_available = true;
129 }
130 if (kvz_g_strategies_available.intel_flags.mmx != 0) {
131 fprintf(stderr, "mmx(%d) ", kvz_g_strategies_available.intel_flags.mmx);
132 strategies_available = true;
133 }
134 if (kvz_g_strategies_available.intel_flags.sse != 0) {
135 fprintf(stderr, "sse(%d) ", kvz_g_strategies_available.intel_flags.sse);
136 strategies_available = true;
137 }
138 if (kvz_g_strategies_available.intel_flags.sse2 != 0) {
139 fprintf(stderr, "sse2(%d) ", kvz_g_strategies_available.intel_flags.sse2);
140 strategies_available = true;
141 }
142 if (kvz_g_strategies_available.intel_flags.sse3 != 0) {
143 fprintf(stderr, "sse3(%d) ", kvz_g_strategies_available.intel_flags.sse3);
144 strategies_available = true;
145 }
146 if (kvz_g_strategies_available.intel_flags.sse41 != 0) {
147 fprintf(stderr, "sse41(%d) ", kvz_g_strategies_available.intel_flags.sse41);
148 strategies_available = true;
149 }
150 if (kvz_g_strategies_available.intel_flags.sse42 != 0) {
151 fprintf(stderr, "sse42(%d) ", kvz_g_strategies_available.intel_flags.sse42);
152 strategies_available = true;
153 }
154 if (kvz_g_strategies_available.intel_flags.ssse3 != 0) {
155 fprintf(stderr, "ssse3(%d) ", kvz_g_strategies_available.intel_flags.ssse3);
156 strategies_available = true;
157 }
158 if (kvz_g_strategies_available.arm_flags.neon != 0) {
159 fprintf(stderr, "neon(%d) ", kvz_g_strategies_available.arm_flags.neon);
160 strategies_available = true;
161 }
162 if (kvz_g_strategies_available.powerpc_flags.altivec != 0) {
163 fprintf(stderr, "altivec(%d) ", kvz_g_strategies_available.powerpc_flags.altivec);
164 strategies_available = true;
165 }
166 //If there is no strategies available
167 if (!strategies_available){
168 fprintf(stderr, "no SIMD optimizations");
169 }
170 fprintf(stderr, "\n");
171
172 //SIMD optimizations in use
173 bool strategies_in_use = false;
174 fprintf(stderr, "In use: ");
175 if (kvz_g_strategies_in_use.intel_flags.avx != 0){
176 fprintf(stderr, "avx(%d) ", kvz_g_strategies_in_use.intel_flags.avx);
177 strategies_in_use = true;
178 }
179 if (kvz_g_strategies_in_use.intel_flags.avx2 != 0){
180 fprintf(stderr, "avx2(%d) ", kvz_g_strategies_in_use.intel_flags.avx2);
181 strategies_in_use = true;
182 }
183 if (kvz_g_strategies_in_use.intel_flags.mmx != 0) {
184 fprintf(stderr, "mmx(%d) ", kvz_g_strategies_in_use.intel_flags.mmx);
185 strategies_in_use = true;
186 }
187 if (kvz_g_strategies_in_use.intel_flags.sse != 0) {
188 fprintf(stderr, "sse(%d) ", kvz_g_strategies_in_use.intel_flags.sse);
189 strategies_in_use = true;
190 }
191 if (kvz_g_strategies_in_use.intel_flags.sse2 != 0) {
192 fprintf(stderr, "sse2(%d) ", kvz_g_strategies_in_use.intel_flags.sse2);
193 strategies_in_use = true;
194 }
195 if (kvz_g_strategies_in_use.intel_flags.sse3 != 0) {
196 fprintf(stderr, "sse3(%d) ", kvz_g_strategies_in_use.intel_flags.sse3);
197 strategies_in_use = true;
198 }
199 if (kvz_g_strategies_in_use.intel_flags.sse41 != 0) {
200 fprintf(stderr, "sse41(%d) ", kvz_g_strategies_in_use.intel_flags.sse41);
201 strategies_in_use = true;
202 }
203 if (kvz_g_strategies_in_use.intel_flags.sse42 != 0) {
204 fprintf(stderr, "sse42(%d) ", kvz_g_strategies_in_use.intel_flags.sse42);
205 strategies_in_use = true;
206 }
207 if (kvz_g_strategies_in_use.intel_flags.ssse3 != 0) {
208 fprintf(stderr, "ssse3(%d) ", kvz_g_strategies_in_use.intel_flags.ssse3);
209 strategies_in_use = true;
210 }
211 if (kvz_g_strategies_in_use.arm_flags.neon != 0) {
212 fprintf(stderr, "neon(%d) ", kvz_g_strategies_in_use.arm_flags.neon);
213 strategies_in_use = true;
214 }
215 if (kvz_g_strategies_in_use.powerpc_flags.altivec != 0) {
216 fprintf(stderr, "altivec(%d) ", kvz_g_strategies_in_use.powerpc_flags.altivec);
217 strategies_in_use = true;
218 }
219 //If there is no strategies in use
220 if (!strategies_in_use){
221 fprintf(stderr, "no SIMD optimizations");
222 }
223 fprintf(stderr, "\n");
224
225 //Free memory
226 free(strategies.strategies);
227 }
228
229 return 1;
230 }
231
232 //Returns 1 if successful, 0 otherwise
kvz_strategyselector_register(void * const opaque,const char * const type,const char * const strategy_name,int priority,void * const fptr)233 int kvz_strategyselector_register(void * const opaque, const char * const type, const char * const strategy_name, int priority, void * const fptr) {
234 strategy_list_t * const strategies = opaque;
235
236 if (strategies->allocated == strategies->count) {
237 strategy_t* new_strategies = realloc(strategies->strategies, sizeof(strategy_t) * (strategies->allocated + STRATEGY_LIST_ALLOC_SIZE));
238 if (!new_strategies) {
239 fprintf(stderr, "Could not increase strategies list size!\n");
240 return 0;
241 }
242 strategies->strategies = new_strategies;
243 strategies->allocated += STRATEGY_LIST_ALLOC_SIZE;
244 }
245
246 {
247 strategy_t *new_strategy = &strategies->strategies[strategies->count++];
248 new_strategy->type = type;
249 new_strategy->strategy_name = strategy_name;
250 new_strategy->priority = priority;
251 new_strategy->fptr = fptr;
252 }
253
254 //Check what strategies are available when they are registered
255 if (strcmp(strategy_name, "avx") == 0) kvz_g_strategies_available.intel_flags.avx++;
256 if (strcmp(strategy_name, "x86_asm_avx") == 0) kvz_g_strategies_available.intel_flags.avx++;
257 if (strcmp(strategy_name, "avx2") == 0) kvz_g_strategies_available.intel_flags.avx2++;
258 if (strcmp(strategy_name, "mmx") == 0) kvz_g_strategies_available.intel_flags.mmx++;
259 if (strcmp(strategy_name, "sse") == 0) kvz_g_strategies_available.intel_flags.sse++;
260 if (strcmp(strategy_name, "sse2") == 0) kvz_g_strategies_available.intel_flags.sse2++;
261 if (strcmp(strategy_name, "sse3") == 0) kvz_g_strategies_available.intel_flags.sse3++;
262 if (strcmp(strategy_name, "sse41") == 0) kvz_g_strategies_available.intel_flags.sse41++;
263 if (strcmp(strategy_name, "sse42") == 0) kvz_g_strategies_available.intel_flags.sse42++;
264 if (strcmp(strategy_name, "ssse3") == 0) kvz_g_strategies_available.intel_flags.ssse3++;
265 if (strcmp(strategy_name, "altivec") == 0) kvz_g_strategies_available.powerpc_flags.altivec++;
266 if (strcmp(strategy_name, "neon") == 0) kvz_g_strategies_available.arm_flags.neon++;
267
268 #ifdef DEBUG_STRATEGYSELECTOR
269 fprintf(stderr, "Registered strategy %s:%s with priority %d (%p)\n", type, strategy_name, priority, fptr);
270 #endif //DEBUG_STRATEGYSELECTOR
271
272 return 1;
273 }
274
strategyselector_choose_for(const strategy_list_t * const strategies,const char * const strategy_type)275 static void* strategyselector_choose_for(const strategy_list_t * const strategies, const char * const strategy_type) {
276 unsigned int max_priority = 0;
277 int max_priority_i = -1;
278 char buffer[256];
279 char *override = NULL;
280 int i = 0;
281
282 // Because VS doesn't support snprintf, let's assert that there is
283 // enough room in the buffer. Max length for strategy type is
284 // buffersize (256) - prefix including terminating zero.
285 assert(strlen(strategy_type) < 256 - sizeof("KVAZAAR_OVERRIDE_") );
286 sprintf(buffer, "KVAZAAR_OVERRIDE_%s", strategy_type);
287
288 override = getenv(buffer);
289
290 for (i=0; i < strategies->count; ++i) {
291 if (strcmp(strategies->strategies[i].type, strategy_type) == 0) {
292 if (override && strcmp(strategies->strategies[i].strategy_name, override) == 0) {
293 fprintf(stderr, "%s environment variable present, choosing %s:%s\n", buffer, strategy_type, strategies->strategies[i].strategy_name);
294 return strategies->strategies[i].fptr;
295 }
296 if (strategies->strategies[i].priority >= max_priority) {
297 max_priority_i = i;
298 max_priority = strategies->strategies[i].priority;
299 }
300 }
301 }
302
303 if (override) {
304 fprintf(stderr, "%s environment variable present, but no strategy %s was found!\n", buffer, override);
305 return NULL;
306 }
307
308 #ifdef DEBUG_STRATEGYSELECTOR
309 fprintf(stderr, "Choosing strategy for %s:\n", strategy_type);
310 for (i=0; i < strategies->count; ++i) {
311 if (strcmp(strategies->strategies[i].type, strategy_type) == 0) {
312 if (i != max_priority_i) {
313 fprintf(stderr, "- %s (%d, %p)\n", strategies->strategies[i].strategy_name, strategies->strategies[i].priority, strategies->strategies[i].fptr);
314 } else {
315 fprintf(stderr, "> %s (%d, %p)\n", strategies->strategies[i].strategy_name, strategies->strategies[i].priority, strategies->strategies[i].fptr);
316 }
317 }
318 }
319 #endif //DEBUG_STRATEGYSELECTOR
320
321
322 if (max_priority_i == -1) {
323 return NULL;
324 }
325
326 //Check what strategy we are going to use
327 if (strcmp(strategies->strategies[max_priority_i].strategy_name, "avx") == 0) kvz_g_strategies_in_use.intel_flags.avx++;
328 if (strcmp(strategies->strategies[max_priority_i].strategy_name, "x86_asm_avx") == 0) kvz_g_strategies_in_use.intel_flags.avx++;
329 if (strcmp(strategies->strategies[max_priority_i].strategy_name, "avx2") == 0) kvz_g_strategies_in_use.intel_flags.avx2++;
330 if (strcmp(strategies->strategies[max_priority_i].strategy_name, "mmx") == 0) kvz_g_strategies_in_use.intel_flags.mmx++;
331 if (strcmp(strategies->strategies[max_priority_i].strategy_name, "sse") == 0) kvz_g_strategies_in_use.intel_flags.sse++;
332 if (strcmp(strategies->strategies[max_priority_i].strategy_name, "sse2") == 0) kvz_g_strategies_in_use.intel_flags.sse2++;
333 if (strcmp(strategies->strategies[max_priority_i].strategy_name, "sse3") == 0) kvz_g_strategies_in_use.intel_flags.sse3++;
334 if (strcmp(strategies->strategies[max_priority_i].strategy_name, "sse41") == 0) kvz_g_strategies_in_use.intel_flags.sse41++;
335 if (strcmp(strategies->strategies[max_priority_i].strategy_name, "sse42") == 0) kvz_g_strategies_in_use.intel_flags.sse42++;
336 if (strcmp(strategies->strategies[max_priority_i].strategy_name, "ssse3") == 0) kvz_g_strategies_in_use.intel_flags.ssse3++;
337 if (strcmp(strategies->strategies[max_priority_i].strategy_name, "altivec") == 0) kvz_g_strategies_in_use.powerpc_flags.altivec++;
338 if (strcmp(strategies->strategies[max_priority_i].strategy_name, "neon") == 0) kvz_g_strategies_in_use.arm_flags.neon++;
339
340 return strategies->strategies[max_priority_i].fptr;
341 }
342
343 #if COMPILE_INTEL
344
345 typedef struct {
346 unsigned int eax;
347 unsigned int ebx;
348 unsigned int ecx;
349 unsigned int edx;
350 } cpuid_t;
351
352 // CPUID adapters for different compilers.
353 # if defined(__GNUC__)
354 #include <cpuid.h>
355
get_cpuid(unsigned level,unsigned sublevel,cpuid_t * cpu_info)356 static INLINE int get_cpuid(unsigned level, unsigned sublevel, cpuid_t *cpu_info) {
357 if (__get_cpuid_max(level & 0x80000000, NULL) < level) return 0;
358 __cpuid_count(level, sublevel, cpu_info->eax, cpu_info->ebx, cpu_info->ecx, cpu_info->edx);
359 return 1;
360 }
361 # elif defined(_MSC_VER)
362 #include <intrin.h>
363
get_cpuid(unsigned level,unsigned sublevel,cpuid_t * cpu_info)364 static INLINE int get_cpuid(unsigned level, unsigned sublevel, cpuid_t *cpu_info) {
365 int vendor_info[4] = { 0, 0, 0, 0 };
366 __cpuidex(vendor_info, 0, 0);
367
368 // Check highest supported function.
369 if (level > vendor_info[0]) return 0;
370
371 int ms_cpu_info[4] = { cpu_info->eax, cpu_info->ebx, cpu_info->ecx, cpu_info->edx };
372 __cpuidex(ms_cpu_info, level, sublevel);
373 cpu_info->eax = ms_cpu_info[0];
374 cpu_info->ebx = ms_cpu_info[1];
375 cpu_info->ecx = ms_cpu_info[2];
376 cpu_info->edx = ms_cpu_info[3];
377
378 return 1;
379 }
380 # else
get_cpuid(unsigned level,unsigned sublevel,cpuid_t * cpu_info)381 static INLINE int get_cpuid(unsigned level, unsigned sublevel, cpuid_t *cpu_info)
382 {
383 return 0;
384 }
385 # endif
386 #endif // COMPILE_INTEL
387
388 #if COMPILE_POWERPC
389 # if defined(__linux__) || (defined(__FreeBSD__) && __FreeBSD__ >= 12)
390 #ifdef __linux__
391 #include <asm/cputable.h>
392 #else
393 #include <machine/cpu.h>
394 #endif
395 #include <sys/auxv.h>
396
altivec_available(void)397 static int altivec_available(void)
398 {
399 unsigned long hwcap = 0;
400 #ifdef __linux__
401 hwcap = getauxval(AT_HWCAP);
402 #else
403 elf_aux_info(AT_HWCAP, &hwcap, sizeof(hwcap));
404 #endif
405 return !!(hwcap & PPC_FEATURE_HAS_ALTIVEC);
406 }
407 # elif defined(__FreeBSD__)
408 #include <sys/types.h>
409 #include <sys/sysctl.h>
410 #include <machine/cpu.h>
411
altivec_available(void)412 static int altivec_available(void)
413 {
414 u_long cpu_features = 0;
415 size_t len = sizeof(cpu_features);
416
417 sysctlbyname("hw.cpu_features", &cpu_features, &len, NULL, 0);
418 return !!(cpu_features & PPC_FEATURE_HAS_ALTIVEC);
419 }
420 # elif defined(__APPLE__) || defined(__NetBSD__) || defined(__OpenBSD__)
421 #include <sys/param.h>
422 #include <sys/sysctl.h>
423 #ifndef __APPLE__
424 #include <machine/cpu.h>
425 #endif
426
altivec_available(void)427 static int altivec_available(void)
428 {
429 int cpu_altivec = 0;
430 size_t len = sizeof(cpu_altivec);
431 #ifdef HW_VECTORUNIT
432 int mib[] = { CTL_HW, HW_VECTORUNIT };
433 #else
434 int mib[] = { CTL_MACHDEP, CPU_ALTIVEC };
435 #endif
436
437 sysctl(mib, sizeof(mib)/sizeof(mib[0]), &cpu_altivec, &len, NULL, 0);
438 return cpu_altivec;
439 }
440 # else
altivec_available(void)441 static int altivec_available(void)
442 {
443 #if COMPILE_POWERPC_ALTIVEC
444 return 1;
445 #else
446 return 0;
447 #endif
448 }
449 # endif
450 #endif //COMPILE_POWERPC
451
set_hardware_flags(int32_t cpuid)452 static void set_hardware_flags(int32_t cpuid) {
453 FILL(kvz_g_hardware_flags, 0);
454
455 #if COMPILE_INTEL
456 if (cpuid) {
457 cpuid_t cpuid1 = { 0, 0, 0, 0 };
458 /* CPU feature bits */
459 enum {
460 CPUID1_EDX_MMX = 1 << 23,
461 CPUID1_EDX_SSE = 1 << 25,
462 CPUID1_EDX_SSE2 = 1 << 26,
463 CPUID1_EDX_HYPER_THREADING = 1 << 28,
464 };
465 enum {
466 CPUID1_ECX_SSE3 = 1 << 0,
467 CPUID1_ECX_SSSE3 = 1 << 9,
468 CPUID1_ECX_SSE41 = 1 << 19,
469 CPUID1_ECX_SSE42 = 1 << 20,
470 CPUID1_ECX_XSAVE = 1 << 26,
471 CPUID1_ECX_OSXSAVE = 1 << 27,
472 CPUID1_ECX_AVX = 1 << 28,
473 };
474 enum {
475 CPUID7_EBX_AVX2 = 1 << 5,
476 };
477 enum {
478 XGETBV_XCR0_XMM = 1 << 1,
479 XGETBV_XCR0_YMM = 1 << 2,
480 };
481
482 // Dig CPU features with cpuid
483 get_cpuid(1, 0, &cpuid1);
484
485 #ifdef _WIN32
486 SYSTEM_INFO systeminfo;
487 GetSystemInfo(&systeminfo);
488
489 kvz_g_hardware_flags.logical_cpu_count = systeminfo.dwNumberOfProcessors;
490 #else
491 kvz_g_hardware_flags.logical_cpu_count = sysconf(_SC_NPROCESSORS_ONLN);
492 #endif
493
494 kvz_g_hardware_flags.physical_cpu_count = kvz_g_hardware_flags.logical_cpu_count;
495 kvz_g_hardware_flags.intel_flags.hyper_threading = cpuid1.edx & CPUID1_EDX_HYPER_THREADING;
496 if (kvz_g_hardware_flags.intel_flags.hyper_threading) {
497 kvz_g_hardware_flags.physical_cpu_count /= 2;
498 }
499
500 // EDX
501 if (cpuid1.edx & CPUID1_EDX_MMX) kvz_g_hardware_flags.intel_flags.mmx = 1;
502 if (cpuid1.edx & CPUID1_EDX_SSE) kvz_g_hardware_flags.intel_flags.sse = 1;
503 if (cpuid1.edx & CPUID1_EDX_SSE2) kvz_g_hardware_flags.intel_flags.sse2 = 1;
504 // ECX
505 if (cpuid1.ecx & CPUID1_ECX_SSE3) kvz_g_hardware_flags.intel_flags.sse3 = 1;;
506 if (cpuid1.ecx & CPUID1_ECX_SSSE3) kvz_g_hardware_flags.intel_flags.ssse3 = 1;
507 if (cpuid1.ecx & CPUID1_ECX_SSE41) kvz_g_hardware_flags.intel_flags.sse41 = 1;
508 if (cpuid1.ecx & CPUID1_ECX_SSE42) kvz_g_hardware_flags.intel_flags.sse42 = 1;
509
510 // Check hardware and OS support for xsave and xgetbv.
511 if (cpuid1.ecx & (CPUID1_ECX_XSAVE | CPUID1_ECX_OSXSAVE)) {
512 uint64_t xcr0 = 0;
513 // Use _XCR_XFEATURE_ENABLED_MASK to check if _xgetbv intrinsic is
514 // supported by the compiler.
515 #ifdef _XCR_XFEATURE_ENABLED_MASK
516 xcr0 = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
517 #elif defined(__GNUC__)
518 unsigned eax = 0, edx = 0;
519 asm("xgetbv" : "=a"(eax), "=d"(edx) : "c" (0));
520 xcr0 = (uint64_t)edx << 32 | eax;
521 #endif
522 bool avx_support = cpuid1.ecx & CPUID1_ECX_AVX || false;
523 bool xmm_support = xcr0 & XGETBV_XCR0_XMM || false;
524 bool ymm_support = xcr0 & XGETBV_XCR0_YMM || false;
525
526 if (avx_support && xmm_support && ymm_support) {
527 kvz_g_hardware_flags.intel_flags.avx = 1;
528 }
529
530 if (kvz_g_hardware_flags.intel_flags.avx) {
531 cpuid_t cpuid7 = { 0, 0, 0, 0 };
532 get_cpuid(7, 0, &cpuid7);
533 if (cpuid7.ebx & CPUID7_EBX_AVX2) kvz_g_hardware_flags.intel_flags.avx2 = 1;
534 }
535 }
536 }
537
538 fprintf(stderr, "Compiled: INTEL, flags:");
539 #if COMPILE_INTEL_MMX
540 fprintf(stderr, " MMX");
541 #endif
542 #if COMPILE_INTEL_SSE
543 fprintf(stderr, " SSE");
544 #endif
545 #if COMPILE_INTEL_SSE2
546 fprintf(stderr, " SSE2");
547 #endif
548 #if COMPILE_INTEL_SSE3
549 fprintf(stderr, " SSE3");
550 #endif
551 #if COMPILE_INTEL_SSSE3
552 fprintf(stderr, " SSSE3");
553 #endif
554 #if COMPILE_INTEL_SSE41
555 fprintf(stderr, " SSE41");
556 #endif
557 #if COMPILE_INTEL_SSE42
558 fprintf(stderr, " SSE42");
559 #endif
560 #if COMPILE_INTEL_AVX
561 fprintf(stderr, " AVX");
562 #endif
563 #if COMPILE_INTEL_AVX2
564 fprintf(stderr, " AVX2");
565 #endif
566 fprintf(stderr, "\nDetected: INTEL, flags:");
567 if (kvz_g_hardware_flags.intel_flags.mmx) fprintf(stderr, " MMX");
568 if (kvz_g_hardware_flags.intel_flags.sse) fprintf(stderr, " SSE");
569 if (kvz_g_hardware_flags.intel_flags.sse2) fprintf(stderr, " SSE2");
570 if (kvz_g_hardware_flags.intel_flags.sse3) fprintf(stderr, " SSE3");
571 if (kvz_g_hardware_flags.intel_flags.ssse3) fprintf(stderr, " SSSE3");
572 if (kvz_g_hardware_flags.intel_flags.sse41) fprintf(stderr, " SSE41");
573 if (kvz_g_hardware_flags.intel_flags.sse42) fprintf(stderr, " SSE42");
574 if (kvz_g_hardware_flags.intel_flags.avx) fprintf(stderr, " AVX");
575 if (kvz_g_hardware_flags.intel_flags.avx2) fprintf(stderr, " AVX2");
576 fprintf(stderr, "\n");
577 #endif //COMPILE_INTEL
578
579 #if COMPILE_POWERPC
580 if (cpuid) {
581 kvz_g_hardware_flags.powerpc_flags.altivec = altivec_available();
582 }
583
584 fprintf(stderr, "Compiled: PowerPC, flags:");
585 #if COMPILE_POWERPC_ALTIVEC
586 fprintf(stderr, " AltiVec");
587 #endif
588 fprintf(stderr, "\nDetected: PowerPC, flags:");
589 if (kvz_g_hardware_flags.powerpc_flags.altivec) fprintf(stderr, " AltiVec");
590 fprintf(stderr, "\n");
591 #endif
592
593 }
594