1 /*****************************************************************************
2  * This file is part of Kvazaar HEVC encoder.
3  *
4  * Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without modification,
8  * are permitted provided that the following conditions are met:
9  *
10  * * Redistributions of source code must retain the above copyright notice, this
11  *   list of conditions and the following disclaimer.
12  *
13  * * Redistributions in binary form must reproduce the above copyright notice, this
14  *   list of conditions and the following disclaimer in the documentation and/or
15  *   other materials provided with the distribution.
16  *
17  * * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
18  *   contributors may be used to endorse or promote products derived from
19  *   this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
25  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26  * INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
28  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  * INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
31  ****************************************************************************/
32 
33 #include "strategyselector.h"
34 
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 
39 #ifdef _WIN32
40 #include <windows.h>
41 #else
42 #include <unistd.h>
43 #endif
44 
45 hardware_flags_t kvz_g_hardware_flags;
46 hardware_flags_t kvz_g_strategies_in_use;
47 hardware_flags_t kvz_g_strategies_available;
48 
49 static void set_hardware_flags(int32_t cpuid);
50 static void* strategyselector_choose_for(const strategy_list_t * const strategies, const char * const strategy_type);
51 
52 //Strategies to include (add new file here)
53 
54 //Returns 1 if successful
kvz_strategyselector_init(int32_t cpuid,uint8_t bitdepth)55 int kvz_strategyselector_init(int32_t cpuid, uint8_t bitdepth) {
56   const strategy_to_select_t *cur_strategy_to_select = strategies_to_select;
57   strategy_list_t strategies;
58 
59   strategies.allocated = 0;
60   strategies.count = 0;
61   strategies.strategies = NULL;
62 
63   set_hardware_flags(cpuid);
64 
65   //Add new register function here
66   if (!kvz_strategy_register_picture(&strategies, bitdepth)) {
67     fprintf(stderr, "kvz_strategy_register_picture failed!\n");
68     return 0;
69   }
70 
71   if (!kvz_strategy_register_nal(&strategies, bitdepth)) {
72     fprintf(stderr, "kvz_strategy_register_nal failed!\n");
73     return 0;
74   }
75 
76   if (!kvz_strategy_register_dct(&strategies, bitdepth)) {
77     fprintf(stderr, "kvz_strategy_register_dct failed!\n");
78     return 0;
79   }
80 
81   if (!kvz_strategy_register_ipol(&strategies, bitdepth)) {
82     fprintf(stderr, "kvz_strategy_register_ipol failed!\n");
83     return 0;
84   }
85 
86   if (!kvz_strategy_register_quant(&strategies, bitdepth)) {
87     fprintf(stderr, "kvz_strategy_register_quant failed!\n");
88     return 0;
89   }
90 
91   if (!kvz_strategy_register_intra(&strategies, bitdepth)) {
92     fprintf(stderr, "kvz_strategy_register_intra failed!\n");
93     return 0;
94   }
95 
96   if (!kvz_strategy_register_sao(&strategies, bitdepth)) {
97     fprintf(stderr, "kvz_strategy_register_sao failed!\n");
98     return 0;
99   }
100 
101   if (!kvz_strategy_register_encode(&strategies, bitdepth)) {
102     fprintf(stderr, "kvz_strategy_register_encode failed!\n");
103     return 0;
104   }
105 
106   while(cur_strategy_to_select->fptr) {
107     *(cur_strategy_to_select->fptr) = strategyselector_choose_for(&strategies, cur_strategy_to_select->strategy_type);
108 
109     if (!(*(cur_strategy_to_select->fptr))) {
110       fprintf(stderr, "Could not find a strategy for %s!\n", cur_strategy_to_select->strategy_type);
111       return 0;
112     }
113     ++cur_strategy_to_select;
114   }
115 
116   //We can free the structure now, as all strategies are statically set to pointers
117   if (strategies.allocated) {
118     //Also check what optimizations are available and what are in use
119     //SIMD optimizations available
120     bool strategies_available = false;
121     fprintf(stderr, "Available: ");
122     if (kvz_g_strategies_available.intel_flags.avx != 0){
123       fprintf(stderr, "avx(%d) ", kvz_g_strategies_available.intel_flags.avx);
124       strategies_available = true;
125     }
126     if (kvz_g_strategies_available.intel_flags.avx2 != 0){
127       fprintf(stderr, "avx2(%d) ", kvz_g_strategies_available.intel_flags.avx2);
128       strategies_available = true;
129     }
130     if (kvz_g_strategies_available.intel_flags.mmx != 0) {
131       fprintf(stderr, "mmx(%d) ", kvz_g_strategies_available.intel_flags.mmx);
132       strategies_available = true;
133     }
134     if (kvz_g_strategies_available.intel_flags.sse != 0) {
135       fprintf(stderr, "sse(%d) ", kvz_g_strategies_available.intel_flags.sse);
136       strategies_available = true;
137     }
138     if (kvz_g_strategies_available.intel_flags.sse2 != 0) {
139       fprintf(stderr, "sse2(%d) ", kvz_g_strategies_available.intel_flags.sse2);
140       strategies_available = true;
141     }
142     if (kvz_g_strategies_available.intel_flags.sse3 != 0) {
143       fprintf(stderr, "sse3(%d) ", kvz_g_strategies_available.intel_flags.sse3);
144       strategies_available = true;
145     }
146     if (kvz_g_strategies_available.intel_flags.sse41 != 0) {
147       fprintf(stderr, "sse41(%d) ", kvz_g_strategies_available.intel_flags.sse41);
148       strategies_available = true;
149     }
150     if (kvz_g_strategies_available.intel_flags.sse42 != 0) {
151       fprintf(stderr, "sse42(%d) ", kvz_g_strategies_available.intel_flags.sse42);
152       strategies_available = true;
153     }
154     if (kvz_g_strategies_available.intel_flags.ssse3 != 0) {
155       fprintf(stderr, "ssse3(%d) ", kvz_g_strategies_available.intel_flags.ssse3);
156       strategies_available = true;
157     }
158     if (kvz_g_strategies_available.arm_flags.neon != 0) {
159       fprintf(stderr, "neon(%d) ", kvz_g_strategies_available.arm_flags.neon);
160       strategies_available = true;
161     }
162     if (kvz_g_strategies_available.powerpc_flags.altivec != 0) {
163       fprintf(stderr, "altivec(%d) ", kvz_g_strategies_available.powerpc_flags.altivec);
164       strategies_available = true;
165     }
166     //If there is no strategies available
167     if (!strategies_available){
168       fprintf(stderr, "no SIMD optimizations");
169     }
170     fprintf(stderr, "\n");
171 
172     //SIMD optimizations in use
173     bool strategies_in_use = false;
174     fprintf(stderr, "In use: ");
175     if (kvz_g_strategies_in_use.intel_flags.avx != 0){
176       fprintf(stderr, "avx(%d) ", kvz_g_strategies_in_use.intel_flags.avx);
177       strategies_in_use = true;
178     }
179     if (kvz_g_strategies_in_use.intel_flags.avx2 != 0){
180       fprintf(stderr, "avx2(%d) ", kvz_g_strategies_in_use.intel_flags.avx2);
181       strategies_in_use = true;
182     }
183     if (kvz_g_strategies_in_use.intel_flags.mmx != 0) {
184       fprintf(stderr, "mmx(%d) ", kvz_g_strategies_in_use.intel_flags.mmx);
185       strategies_in_use = true;
186     }
187     if (kvz_g_strategies_in_use.intel_flags.sse != 0) {
188       fprintf(stderr, "sse(%d) ", kvz_g_strategies_in_use.intel_flags.sse);
189       strategies_in_use = true;
190     }
191     if (kvz_g_strategies_in_use.intel_flags.sse2 != 0) {
192       fprintf(stderr, "sse2(%d) ", kvz_g_strategies_in_use.intel_flags.sse2);
193       strategies_in_use = true;
194     }
195     if (kvz_g_strategies_in_use.intel_flags.sse3 != 0) {
196       fprintf(stderr, "sse3(%d) ", kvz_g_strategies_in_use.intel_flags.sse3);
197       strategies_in_use = true;
198     }
199     if (kvz_g_strategies_in_use.intel_flags.sse41 != 0) {
200       fprintf(stderr, "sse41(%d) ", kvz_g_strategies_in_use.intel_flags.sse41);
201       strategies_in_use = true;
202     }
203     if (kvz_g_strategies_in_use.intel_flags.sse42 != 0) {
204       fprintf(stderr, "sse42(%d) ", kvz_g_strategies_in_use.intel_flags.sse42);
205       strategies_in_use = true;
206     }
207     if (kvz_g_strategies_in_use.intel_flags.ssse3 != 0) {
208       fprintf(stderr, "ssse3(%d) ", kvz_g_strategies_in_use.intel_flags.ssse3);
209       strategies_in_use = true;
210     }
211     if (kvz_g_strategies_in_use.arm_flags.neon != 0) {
212       fprintf(stderr, "neon(%d) ", kvz_g_strategies_in_use.arm_flags.neon);
213       strategies_in_use = true;
214     }
215     if (kvz_g_strategies_in_use.powerpc_flags.altivec != 0) {
216       fprintf(stderr, "altivec(%d) ", kvz_g_strategies_in_use.powerpc_flags.altivec);
217       strategies_in_use = true;
218     }
219     //If there is no strategies in use
220     if (!strategies_in_use){
221       fprintf(stderr, "no SIMD optimizations");
222     }
223     fprintf(stderr, "\n");
224 
225     //Free memory
226     free(strategies.strategies);
227   }
228 
229   return 1;
230 }
231 
232 //Returns 1 if successful, 0 otherwise
kvz_strategyselector_register(void * const opaque,const char * const type,const char * const strategy_name,int priority,void * const fptr)233 int kvz_strategyselector_register(void * const opaque, const char * const type, const char * const strategy_name, int priority, void * const fptr) {
234   strategy_list_t * const strategies = opaque;
235 
236   if (strategies->allocated == strategies->count) {
237     strategy_t* new_strategies = realloc(strategies->strategies, sizeof(strategy_t) * (strategies->allocated + STRATEGY_LIST_ALLOC_SIZE));
238     if (!new_strategies) {
239       fprintf(stderr, "Could not increase strategies list size!\n");
240       return 0;
241     }
242     strategies->strategies = new_strategies;
243     strategies->allocated += STRATEGY_LIST_ALLOC_SIZE;
244   }
245 
246   {
247     strategy_t *new_strategy = &strategies->strategies[strategies->count++];
248     new_strategy->type = type;
249     new_strategy->strategy_name = strategy_name;
250     new_strategy->priority = priority;
251     new_strategy->fptr = fptr;
252   }
253 
254   //Check what strategies are available when they are registered
255   if (strcmp(strategy_name, "avx") == 0) kvz_g_strategies_available.intel_flags.avx++;
256   if (strcmp(strategy_name, "x86_asm_avx") == 0) kvz_g_strategies_available.intel_flags.avx++;
257   if (strcmp(strategy_name, "avx2") == 0) kvz_g_strategies_available.intel_flags.avx2++;
258   if (strcmp(strategy_name, "mmx") == 0) kvz_g_strategies_available.intel_flags.mmx++;
259   if (strcmp(strategy_name, "sse") == 0) kvz_g_strategies_available.intel_flags.sse++;
260   if (strcmp(strategy_name, "sse2") == 0) kvz_g_strategies_available.intel_flags.sse2++;
261   if (strcmp(strategy_name, "sse3") == 0) kvz_g_strategies_available.intel_flags.sse3++;
262   if (strcmp(strategy_name, "sse41") == 0) kvz_g_strategies_available.intel_flags.sse41++;
263   if (strcmp(strategy_name, "sse42") == 0) kvz_g_strategies_available.intel_flags.sse42++;
264   if (strcmp(strategy_name, "ssse3") == 0) kvz_g_strategies_available.intel_flags.ssse3++;
265   if (strcmp(strategy_name, "altivec") == 0) kvz_g_strategies_available.powerpc_flags.altivec++;
266   if (strcmp(strategy_name, "neon") == 0) kvz_g_strategies_available.arm_flags.neon++;
267 
268 #ifdef DEBUG_STRATEGYSELECTOR
269   fprintf(stderr, "Registered strategy %s:%s with priority %d (%p)\n", type, strategy_name, priority, fptr);
270 #endif //DEBUG_STRATEGYSELECTOR
271 
272   return 1;
273 }
274 
strategyselector_choose_for(const strategy_list_t * const strategies,const char * const strategy_type)275 static void* strategyselector_choose_for(const strategy_list_t * const strategies, const char * const strategy_type) {
276   unsigned int max_priority = 0;
277   int max_priority_i = -1;
278   char buffer[256];
279   char *override = NULL;
280   int i = 0;
281 
282   // Because VS doesn't support snprintf, let's assert that there is
283   // enough room in the buffer. Max length for strategy type is
284   // buffersize (256) - prefix including terminating zero.
285   assert(strlen(strategy_type) < 256 - sizeof("KVAZAAR_OVERRIDE_") );
286   sprintf(buffer, "KVAZAAR_OVERRIDE_%s", strategy_type);
287 
288   override = getenv(buffer);
289 
290   for (i=0; i < strategies->count; ++i) {
291     if (strcmp(strategies->strategies[i].type, strategy_type) == 0) {
292       if (override && strcmp(strategies->strategies[i].strategy_name, override) == 0) {
293         fprintf(stderr, "%s environment variable present, choosing %s:%s\n", buffer, strategy_type, strategies->strategies[i].strategy_name);
294         return strategies->strategies[i].fptr;
295       }
296       if (strategies->strategies[i].priority >= max_priority) {
297         max_priority_i = i;
298         max_priority = strategies->strategies[i].priority;
299       }
300     }
301   }
302 
303   if (override) {
304     fprintf(stderr, "%s environment variable present, but no strategy %s was found!\n", buffer, override);
305     return NULL;
306   }
307 
308 #ifdef DEBUG_STRATEGYSELECTOR
309   fprintf(stderr, "Choosing strategy for %s:\n", strategy_type);
310   for (i=0; i < strategies->count; ++i) {
311     if (strcmp(strategies->strategies[i].type, strategy_type) == 0) {
312       if (i != max_priority_i) {
313         fprintf(stderr, "- %s (%d, %p)\n", strategies->strategies[i].strategy_name, strategies->strategies[i].priority, strategies->strategies[i].fptr);
314       } else {
315         fprintf(stderr, "> %s (%d, %p)\n", strategies->strategies[i].strategy_name, strategies->strategies[i].priority, strategies->strategies[i].fptr);
316       }
317     }
318   }
319 #endif //DEBUG_STRATEGYSELECTOR
320 
321 
322   if (max_priority_i == -1) {
323     return NULL;
324   }
325 
326   //Check what strategy we are going to use
327   if (strcmp(strategies->strategies[max_priority_i].strategy_name, "avx") == 0) kvz_g_strategies_in_use.intel_flags.avx++;
328   if (strcmp(strategies->strategies[max_priority_i].strategy_name, "x86_asm_avx") == 0) kvz_g_strategies_in_use.intel_flags.avx++;
329   if (strcmp(strategies->strategies[max_priority_i].strategy_name, "avx2") == 0) kvz_g_strategies_in_use.intel_flags.avx2++;
330   if (strcmp(strategies->strategies[max_priority_i].strategy_name, "mmx") == 0) kvz_g_strategies_in_use.intel_flags.mmx++;
331   if (strcmp(strategies->strategies[max_priority_i].strategy_name, "sse") == 0) kvz_g_strategies_in_use.intel_flags.sse++;
332   if (strcmp(strategies->strategies[max_priority_i].strategy_name, "sse2") == 0) kvz_g_strategies_in_use.intel_flags.sse2++;
333   if (strcmp(strategies->strategies[max_priority_i].strategy_name, "sse3") == 0) kvz_g_strategies_in_use.intel_flags.sse3++;
334   if (strcmp(strategies->strategies[max_priority_i].strategy_name, "sse41") == 0) kvz_g_strategies_in_use.intel_flags.sse41++;
335   if (strcmp(strategies->strategies[max_priority_i].strategy_name, "sse42") == 0) kvz_g_strategies_in_use.intel_flags.sse42++;
336   if (strcmp(strategies->strategies[max_priority_i].strategy_name, "ssse3") == 0) kvz_g_strategies_in_use.intel_flags.ssse3++;
337   if (strcmp(strategies->strategies[max_priority_i].strategy_name, "altivec") == 0) kvz_g_strategies_in_use.powerpc_flags.altivec++;
338   if (strcmp(strategies->strategies[max_priority_i].strategy_name, "neon") == 0) kvz_g_strategies_in_use.arm_flags.neon++;
339 
340   return strategies->strategies[max_priority_i].fptr;
341 }
342 
343 #if COMPILE_INTEL
344 
345 typedef struct {
346   unsigned int eax;
347   unsigned int ebx;
348   unsigned int ecx;
349   unsigned int edx;
350 } cpuid_t;
351 
352 // CPUID adapters for different compilers.
353 #  if defined(__GNUC__)
354 #include <cpuid.h>
355 
get_cpuid(unsigned level,unsigned sublevel,cpuid_t * cpu_info)356 static INLINE int get_cpuid(unsigned level, unsigned sublevel, cpuid_t *cpu_info) {
357   if (__get_cpuid_max(level & 0x80000000, NULL) < level) return 0;
358   __cpuid_count(level, sublevel, cpu_info->eax, cpu_info->ebx, cpu_info->ecx, cpu_info->edx);
359   return 1;
360 }
361 #  elif defined(_MSC_VER)
362 #include <intrin.h>
363 
get_cpuid(unsigned level,unsigned sublevel,cpuid_t * cpu_info)364 static INLINE int get_cpuid(unsigned level, unsigned sublevel, cpuid_t *cpu_info) {
365   int vendor_info[4] = { 0, 0, 0, 0 };
366   __cpuidex(vendor_info, 0, 0);
367 
368   // Check highest supported function.
369   if (level > vendor_info[0]) return 0;
370 
371   int ms_cpu_info[4] = { cpu_info->eax, cpu_info->ebx, cpu_info->ecx, cpu_info->edx };
372   __cpuidex(ms_cpu_info, level, sublevel);
373   cpu_info->eax = ms_cpu_info[0];
374   cpu_info->ebx = ms_cpu_info[1];
375   cpu_info->ecx = ms_cpu_info[2];
376   cpu_info->edx = ms_cpu_info[3];
377 
378   return 1;
379 }
380 #  else
get_cpuid(unsigned level,unsigned sublevel,cpuid_t * cpu_info)381 static INLINE int get_cpuid(unsigned level, unsigned sublevel, cpuid_t *cpu_info)
382 {
383   return 0;
384 }
385 #  endif
386 #endif // COMPILE_INTEL
387 
388 #if COMPILE_POWERPC
389 #  if defined(__linux__) || (defined(__FreeBSD__) && __FreeBSD__ >= 12)
390 #ifdef __linux__
391 #include <asm/cputable.h>
392 #else
393 #include <machine/cpu.h>
394 #endif
395 #include <sys/auxv.h>
396 
altivec_available(void)397 static int altivec_available(void)
398 {
399     unsigned long hwcap = 0;
400 #ifdef __linux__
401     hwcap = getauxval(AT_HWCAP);
402 #else
403     elf_aux_info(AT_HWCAP, &hwcap, sizeof(hwcap));
404 #endif
405     return !!(hwcap & PPC_FEATURE_HAS_ALTIVEC);
406 }
407 #  elif defined(__FreeBSD__)
408 #include <sys/types.h>
409 #include <sys/sysctl.h>
410 #include <machine/cpu.h>
411 
altivec_available(void)412 static int altivec_available(void)
413 {
414   u_long cpu_features = 0;
415   size_t len = sizeof(cpu_features);
416 
417   sysctlbyname("hw.cpu_features", &cpu_features, &len, NULL, 0);
418   return !!(cpu_features & PPC_FEATURE_HAS_ALTIVEC);
419 }
420 #  elif defined(__APPLE__) || defined(__NetBSD__) || defined(__OpenBSD__)
421 #include <sys/param.h>
422 #include <sys/sysctl.h>
423 #ifndef __APPLE__
424 #include <machine/cpu.h>
425 #endif
426 
altivec_available(void)427 static int altivec_available(void)
428 {
429   int cpu_altivec = 0;
430   size_t len = sizeof(cpu_altivec);
431 #ifdef HW_VECTORUNIT
432   int mib[] = { CTL_HW, HW_VECTORUNIT };
433 #else
434   int mib[] = { CTL_MACHDEP, CPU_ALTIVEC };
435 #endif
436 
437   sysctl(mib, sizeof(mib)/sizeof(mib[0]), &cpu_altivec, &len, NULL, 0);
438   return cpu_altivec;
439 }
440 #  else
altivec_available(void)441 static int altivec_available(void)
442 {
443 #if COMPILE_POWERPC_ALTIVEC
444   return 1;
445 #else
446   return 0;
447 #endif
448 }
449 #  endif
450 #endif //COMPILE_POWERPC
451 
set_hardware_flags(int32_t cpuid)452 static void set_hardware_flags(int32_t cpuid) {
453   FILL(kvz_g_hardware_flags, 0);
454 
455 #if COMPILE_INTEL
456   if (cpuid) {
457     cpuid_t cpuid1 = { 0, 0, 0, 0 };
458     /* CPU feature bits */
459     enum {
460       CPUID1_EDX_MMX = 1 << 23,
461       CPUID1_EDX_SSE = 1 << 25,
462       CPUID1_EDX_SSE2 = 1 << 26,
463       CPUID1_EDX_HYPER_THREADING = 1 << 28,
464     };
465     enum {
466       CPUID1_ECX_SSE3 = 1 << 0,
467       CPUID1_ECX_SSSE3 = 1 << 9,
468       CPUID1_ECX_SSE41 = 1 << 19,
469       CPUID1_ECX_SSE42 = 1 << 20,
470       CPUID1_ECX_XSAVE = 1 << 26,
471       CPUID1_ECX_OSXSAVE = 1 << 27,
472       CPUID1_ECX_AVX = 1 << 28,
473     };
474     enum {
475       CPUID7_EBX_AVX2 = 1 << 5,
476     };
477     enum {
478       XGETBV_XCR0_XMM = 1 << 1,
479       XGETBV_XCR0_YMM = 1 << 2,
480     };
481 
482     // Dig CPU features with cpuid
483     get_cpuid(1, 0, &cpuid1);
484 
485 #ifdef _WIN32
486     SYSTEM_INFO systeminfo;
487     GetSystemInfo(&systeminfo);
488 
489     kvz_g_hardware_flags.logical_cpu_count = systeminfo.dwNumberOfProcessors;
490 #else
491     kvz_g_hardware_flags.logical_cpu_count = sysconf(_SC_NPROCESSORS_ONLN);
492 #endif
493 
494     kvz_g_hardware_flags.physical_cpu_count = kvz_g_hardware_flags.logical_cpu_count;
495     kvz_g_hardware_flags.intel_flags.hyper_threading = cpuid1.edx & CPUID1_EDX_HYPER_THREADING;
496     if (kvz_g_hardware_flags.intel_flags.hyper_threading) {
497       kvz_g_hardware_flags.physical_cpu_count /= 2;
498     }
499 
500     // EDX
501     if (cpuid1.edx & CPUID1_EDX_MMX)   kvz_g_hardware_flags.intel_flags.mmx = 1;
502     if (cpuid1.edx & CPUID1_EDX_SSE)   kvz_g_hardware_flags.intel_flags.sse = 1;
503     if (cpuid1.edx & CPUID1_EDX_SSE2)  kvz_g_hardware_flags.intel_flags.sse2 = 1;
504     // ECX
505     if (cpuid1.ecx & CPUID1_ECX_SSE3)  kvz_g_hardware_flags.intel_flags.sse3 = 1;;
506     if (cpuid1.ecx & CPUID1_ECX_SSSE3) kvz_g_hardware_flags.intel_flags.ssse3 = 1;
507     if (cpuid1.ecx & CPUID1_ECX_SSE41) kvz_g_hardware_flags.intel_flags.sse41 = 1;
508     if (cpuid1.ecx & CPUID1_ECX_SSE42) kvz_g_hardware_flags.intel_flags.sse42 = 1;
509 
510     // Check hardware and OS support for xsave and xgetbv.
511     if (cpuid1.ecx & (CPUID1_ECX_XSAVE | CPUID1_ECX_OSXSAVE)) {
512       uint64_t xcr0 = 0;
513       // Use _XCR_XFEATURE_ENABLED_MASK to check if _xgetbv intrinsic is
514       // supported by the compiler.
515 #ifdef _XCR_XFEATURE_ENABLED_MASK
516       xcr0 = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
517 #elif defined(__GNUC__)
518       unsigned eax = 0, edx = 0;
519       asm("xgetbv" : "=a"(eax), "=d"(edx) : "c" (0));
520       xcr0 = (uint64_t)edx << 32 | eax;
521 #endif
522       bool avx_support = cpuid1.ecx & CPUID1_ECX_AVX || false;
523       bool xmm_support = xcr0 & XGETBV_XCR0_XMM || false;
524       bool ymm_support = xcr0 & XGETBV_XCR0_YMM || false;
525 
526       if (avx_support && xmm_support && ymm_support) {
527         kvz_g_hardware_flags.intel_flags.avx = 1;
528       }
529 
530       if (kvz_g_hardware_flags.intel_flags.avx) {
531         cpuid_t cpuid7 = { 0, 0, 0, 0 };
532         get_cpuid(7, 0, &cpuid7);
533         if (cpuid7.ebx & CPUID7_EBX_AVX2)  kvz_g_hardware_flags.intel_flags.avx2 = 1;
534       }
535     }
536   }
537 
538   fprintf(stderr, "Compiled: INTEL, flags:");
539 #if COMPILE_INTEL_MMX
540   fprintf(stderr, " MMX");
541 #endif
542 #if COMPILE_INTEL_SSE
543   fprintf(stderr, " SSE");
544 #endif
545 #if COMPILE_INTEL_SSE2
546   fprintf(stderr, " SSE2");
547 #endif
548 #if COMPILE_INTEL_SSE3
549   fprintf(stderr, " SSE3");
550 #endif
551 #if COMPILE_INTEL_SSSE3
552   fprintf(stderr, " SSSE3");
553 #endif
554 #if COMPILE_INTEL_SSE41
555   fprintf(stderr, " SSE41");
556 #endif
557 #if COMPILE_INTEL_SSE42
558   fprintf(stderr, " SSE42");
559 #endif
560 #if COMPILE_INTEL_AVX
561   fprintf(stderr, " AVX");
562 #endif
563 #if COMPILE_INTEL_AVX2
564   fprintf(stderr, " AVX2");
565 #endif
566   fprintf(stderr, "\nDetected: INTEL, flags:");
567   if (kvz_g_hardware_flags.intel_flags.mmx) fprintf(stderr, " MMX");
568   if (kvz_g_hardware_flags.intel_flags.sse) fprintf(stderr, " SSE");
569   if (kvz_g_hardware_flags.intel_flags.sse2) fprintf(stderr, " SSE2");
570   if (kvz_g_hardware_flags.intel_flags.sse3) fprintf(stderr, " SSE3");
571   if (kvz_g_hardware_flags.intel_flags.ssse3) fprintf(stderr, " SSSE3");
572   if (kvz_g_hardware_flags.intel_flags.sse41) fprintf(stderr, " SSE41");
573   if (kvz_g_hardware_flags.intel_flags.sse42) fprintf(stderr, " SSE42");
574   if (kvz_g_hardware_flags.intel_flags.avx) fprintf(stderr, " AVX");
575   if (kvz_g_hardware_flags.intel_flags.avx2) fprintf(stderr, " AVX2");
576   fprintf(stderr, "\n");
577 #endif //COMPILE_INTEL
578 
579 #if COMPILE_POWERPC
580   if (cpuid) {
581     kvz_g_hardware_flags.powerpc_flags.altivec = altivec_available();
582   }
583 
584   fprintf(stderr, "Compiled: PowerPC, flags:");
585 #if COMPILE_POWERPC_ALTIVEC
586   fprintf(stderr, " AltiVec");
587 #endif
588   fprintf(stderr, "\nDetected: PowerPC, flags:");
589   if (kvz_g_hardware_flags.powerpc_flags.altivec) fprintf(stderr, " AltiVec");
590   fprintf(stderr, "\n");
591 #endif
592 
593 }
594