1 /* 2 * Copyright (c) 2007-2019, NVIDIA CORPORATION. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 * 16 */ 17 18 #ifndef X86ID_H_ 19 #define X86ID_H_ 20 21 22 #define X86IDFN_(l,r) l##r 23 #define X86IDFN(n) X86IDFN_(__Cpuid_,n) 24 25 #define X86ID_IS_CACHED_UNDEF (-1) 26 27 /* 28 * Bit offsets for various X8664 hardware features within X86IDFN(hw_features). 29 * If X86IDFN(hw_features) == 0, the variable is undefined and is 30 * initialized by calling X86IDFN(init_hw_features)(). 31 * 32 * X86IDFN(hw_features) is intended to be use by runtime routines that have 33 * different execution paths depending on hardware characteristics. In 34 * particular different SSE and AVX implementations to avoid some processors' 35 * expensive AVX/SSE transition penalties. 36 * 37 * A prototype assembly pseudo code implementation would be: 38 * 39 * #if defined(TARGET_WIN_X8664) 40 * movl ENT(X86IDFN(hw_features))(%rip), %eax 41 * #else 42 * movq ENT(X86IDFN(is_avx_cached))@GOTPCREL(%rip), %rax 43 * movl (%rax), %eax 44 * #endif 45 * 46 * 1: 47 * testl $HW_AVX, %eax 48 * jnz do_avx 49 * testl $HW_SSE, %eax 50 * jnz do_sse // Can't assume do_sse on first pass 51 * subq $8, %rsp // Adjusted for number of local regs to save 52 * movq I1, (%rsp) // Or %xmm0, or I1, %xmm0, or %ymm0, ... 53 * movl %eax, I1W // Input to X86IDFN(init_hw_feature)() 54 * CALL (ENT(X86IDFN(init_hw_features))) // (%eax) = hw_features 55 * movq (%rsp), I1 // And possibly more regs 56 * addq $8, %rsp 57 * jmp 1b // Restart feature tests 58 * 59 * Note: X86IDFN(init_hw_feature)(X86IDFN(hw_features)) will abort if 60 * I1W on entry is the same as the return value. 61 * 62 */ 63 64 #define HW_SSE 0x00000001 // SSE, SSE2, SSE3 65 #define HW_SSE4 0x00000002 // SSE4A, SSE41, SSE42 66 #define HW_AVX 0x00000004 67 #define HW_AVX2 0x00000008 68 #define HW_AVX512 0x00000010 69 #define HW_AVX512F 0x00000020 70 #define HW_AVX512VL 0x00000040 71 #define HW_FMA 0x00000080 72 #define HW_FMA4 0x00000100 73 #define HW_KNL 0x00000200 74 #define HW_F16C 0x00000400 75 #define HW_SSSE3 0x00000800 76 77 #if ! defined(__ASSEMBLER__) 78 79 #include <stdint.h> 80 81 #define IS_CONCAT3_(l,m,r) l##m##r 82 #define IS_CONCAT3(l,m,r) IS_CONCAT3_(l,m,r) 83 84 #define IS_X86ID(f) \ 85 (X86IDFN(IS_CONCAT3(is_,f,_cached)) != X86ID_IS_CACHED_UNDEF) ? \ 86 X86IDFN(IS_CONCAT3(is_,f,_cached)) : X86IDFN(IS_CONCAT3(is_,f,))() 87 88 /* 89 * All the "_cached" varaibles are one of three values: 90 * 1) IS_X86ID_CACHED_UNDEF: not initialized 91 * 2) false (0): initialized and value is false 92 * 3) true (1): initialized and value is true 93 */ 94 95 /* 96 * For Non-Windows based builds (Linux, OSX), the extern keyword 97 * gives the proper attribute for the global variables is_<FEATURE>_cached. 98 * But for Windows, we need to use MS' __declspec attribute. 99 * When building x86id.c which defines those global variables, we define the 100 * CPP object macro OBJ_WIN_X8664_IS_X86ID. 101 */ 102 103 #if defined (TARGET_WIN_X8664) && defined(_DLL) 104 # if defined(OBJ_WIN_X8664_IS_X86ID) 105 # define DECLEXTERN __declspec(dllexport) 106 # else 107 # define DECLEXTERN __declspec(dllimport) 108 # endif 109 #else 110 # define DECLEXTERN extern 111 #endif 112 113 #ifdef __cplusplus 114 extern "C" { 115 #endif 116 DECLEXTERN uint32_t X86IDFN(hw_features); 117 DECLEXTERN int X86IDFN(is_intel_cached); 118 DECLEXTERN int X86IDFN(is_amd_cached); 119 DECLEXTERN int X86IDFN(is_ip6_cached); 120 DECLEXTERN int X86IDFN(is_sse_cached); 121 DECLEXTERN int X86IDFN(is_sse2_cached); 122 DECLEXTERN int X86IDFN(is_sse3_cached); 123 DECLEXTERN int X86IDFN(is_ssse3_cached); 124 DECLEXTERN int X86IDFN(is_sse4a_cached); 125 DECLEXTERN int X86IDFN(is_sse41_cached); 126 DECLEXTERN int X86IDFN(is_sse42_cached); 127 DECLEXTERN int X86IDFN(is_aes_cached); 128 DECLEXTERN int X86IDFN(is_avx_cached); 129 DECLEXTERN int X86IDFN(is_avx2_cached); 130 DECLEXTERN int X86IDFN(is_avx512_cached); 131 DECLEXTERN int X86IDFN(is_avx512f_cached); 132 DECLEXTERN int X86IDFN(is_avx512vl_cached); 133 DECLEXTERN int X86IDFN(is_fma_cached); 134 DECLEXTERN int X86IDFN(is_fma4_cached); 135 DECLEXTERN int X86IDFN(is_ht_cached); 136 DECLEXTERN int X86IDFN(is_athlon_cached); 137 DECLEXTERN int X86IDFN(is_hammer_cached); 138 DECLEXTERN int X86IDFN(is_gh_cached); 139 DECLEXTERN int X86IDFN(is_gh_a_cached); 140 DECLEXTERN int X86IDFN(is_gh_b_cached); 141 DECLEXTERN int X86IDFN(is_shanghai_cached); 142 DECLEXTERN int X86IDFN(is_istanbul_cached); 143 DECLEXTERN int X86IDFN(is_bulldozer_cached); 144 DECLEXTERN int X86IDFN(is_piledriver_cached); 145 DECLEXTERN int X86IDFN(is_k7_cached); 146 DECLEXTERN int X86IDFN(is_ia32e_cached); 147 DECLEXTERN int X86IDFN(is_p4_cached); 148 DECLEXTERN int X86IDFN(is_knl_cached); 149 DECLEXTERN int X86IDFN(is_x86_64_cached); 150 DECLEXTERN int X86IDFN(is_f16c_cached); 151 152 DECLEXTERN int X86IDFN(is_intel)(void); /* return 0 or 1 */ 153 DECLEXTERN int X86IDFN(is_amd)(void); /* return 0 or 1 */ 154 DECLEXTERN int X86IDFN(is_ip6)(void); /* return 0 or 1 */ 155 DECLEXTERN int X86IDFN(is_sse)(void); /* return 0 or 1 */ 156 DECLEXTERN int X86IDFN(is_sse2)(void); /* return 0 or 1 */ 157 DECLEXTERN int X86IDFN(is_sse3)(void); /* return 0 or 1 */ 158 DECLEXTERN int X86IDFN(is_ssse3)(void); /* return 0 or 1 */ 159 DECLEXTERN int X86IDFN(is_sse4a)(void); /* return 0 or 1 */ 160 DECLEXTERN int X86IDFN(is_sse41)(void); /* return 0 or 1 */ 161 DECLEXTERN int X86IDFN(is_sse42)(void); /* return 0 or 1 */ 162 DECLEXTERN int X86IDFN(is_aes)(void); /* return 0 or 1 */ 163 DECLEXTERN int X86IDFN(is_avx)(void); /* return 0 or 1 */ 164 DECLEXTERN int X86IDFN(is_avx2)(void); /* return 0 or 1 */ 165 DECLEXTERN int X86IDFN(is_avx512)(void); /* return 0 or 1 */ 166 DECLEXTERN int X86IDFN(is_avx512f)(void); /* return 0 or 1 */ 167 DECLEXTERN int X86IDFN(is_avx512vl)(void); /* return 0 or 1 */ 168 DECLEXTERN int X86IDFN(is_fma)(void); /* return 0 or 1 */ 169 DECLEXTERN int X86IDFN(is_fma4)(void); /* return 0 or 1 */ 170 DECLEXTERN int X86IDFN(is_ht)(void); /* return 0 .. logical processor count */ 171 DECLEXTERN int X86IDFN(is_athlon)(void); /* return 0 or 1 */ 172 DECLEXTERN int X86IDFN(is_hammer)(void); /* return 0 or 1 */ 173 DECLEXTERN int X86IDFN(is_gh)(void); /* return 0 or 1 */ 174 DECLEXTERN int X86IDFN(is_gh_a)(void); /* return 0 or 1 */ 175 DECLEXTERN int X86IDFN(is_gh_b)(void); /* return 0 or 1 */ 176 DECLEXTERN int X86IDFN(is_shanghai)(void); /* return 0 or 1 */ 177 DECLEXTERN int X86IDFN(is_istanbul)(void); /* return 0 or 1 */ 178 DECLEXTERN int X86IDFN(is_bulldozer)(void); /* return 0 or 1 */ 179 DECLEXTERN int X86IDFN(is_piledriver)(void);/* return 0 or 1 */ 180 DECLEXTERN int X86IDFN(is_k7)(void); /* return 0 or 1 */ 181 DECLEXTERN int X86IDFN(is_ia32e)(void); /* return 0 or 1 */ 182 DECLEXTERN int X86IDFN(is_p4)(void); /* return 0 or 1 */ 183 DECLEXTERN int X86IDFN(is_knl)(void); /* return 0 or 1 */ 184 DECLEXTERN int X86IDFN(is_x86_64)(void); /* return 0 or 1 */ 185 DECLEXTERN int X86IDFN(get_cachesize)(void); 186 DECLEXTERN int X86IDFN(is_f16c)(void); 187 DECLEXTERN char *X86IDFN(get_processor_name)(void); 188 189 extern int X86IDFN(get_cores)(void); 190 191 #ifdef __cplusplus 192 } 193 #endif 194 195 #endif /* ! defined(__ASSEMBLER__) */ 196 197 #endif /* X86ID_H_ */ 198 /* vim: set ts=4 expandtab: */ 199