1 /*
2  * Copyright (c) 2007-2019, NVIDIA CORPORATION.  All rights reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */
17 
18 #ifndef X86ID_H_
19 #define X86ID_H_
20 
21 
22 #define X86IDFN_(l,r) l##r
23 #define X86IDFN(n) X86IDFN_(__Cpuid_,n)
24 
25 #define	X86ID_IS_CACHED_UNDEF   (-1)
26 
27 /*
28  * Bit offsets for various X8664 hardware features within X86IDFN(hw_features).
29  * If X86IDFN(hw_features) == 0, the variable is undefined and is
30  * initialized by calling X86IDFN(init_hw_features)().
31  *
32  * X86IDFN(hw_features) is intended to be use by runtime routines that have
33  * different execution paths depending on hardware characteristics.  In
34  * particular different SSE and AVX implementations to avoid some processors'
35  * expensive AVX/SSE transition penalties.
36  *
37  * A prototype assembly pseudo code implementation would be:
38  *
39  * #if  defined(TARGET_WIN_X8664)
40  *      movl    ENT(X86IDFN(hw_features))(%rip), %eax
41  * #else
42  *      movq    ENT(X86IDFN(is_avx_cached))@GOTPCREL(%rip), %rax
43  *      movl    (%rax), %eax
44  * #endif
45  *
46  * 1:
47  *      testl   $HW_AVX, %eax
48  *      jnz     do_avx
49  *      testl   $HW_SSE, %eax
50  *      jnz     do_sse          // Can't assume do_sse on first pass
51  *      subq    $8, %rsp        // Adjusted for number of local regs to save
52  *      movq    I1, (%rsp)      // Or %xmm0, or I1, %xmm0, or %ymm0, ...
53  *      movl    %eax, I1W       // Input to X86IDFN(init_hw_feature)()
54  *      CALL    (ENT(X86IDFN(init_hw_features)))    // (%eax) = hw_features
55  *      movq    (%rsp), I1      // And possibly more regs
56  *      addq    $8, %rsp
57  *      jmp     1b              // Restart feature tests
58  *
59  * Note: X86IDFN(init_hw_feature)(X86IDFN(hw_features)) will abort if
60  * I1W on entry is the same as the return value.
61  *
62  */
63 
64 #define	HW_SSE      0x00000001       // SSE, SSE2, SSE3
65 #define	HW_SSE4     0x00000002       // SSE4A, SSE41, SSE42
66 #define	HW_AVX      0x00000004
67 #define	HW_AVX2     0x00000008
68 #define	HW_AVX512   0x00000010
69 #define	HW_AVX512F  0x00000020
70 #define	HW_AVX512VL 0x00000040
71 #define	HW_FMA      0x00000080
72 #define	HW_FMA4     0x00000100
73 #define	HW_KNL      0x00000200
74 #define	HW_F16C     0x00000400
75 #define	HW_SSSE3    0x00000800
76 
77 #if     ! defined(__ASSEMBLER__)
78 
79 #include <stdint.h>
80 
81 #define IS_CONCAT3_(l,m,r)  l##m##r
82 #define IS_CONCAT3(l,m,r)   IS_CONCAT3_(l,m,r)
83 
84 #define IS_X86ID(f)                                                           \
85     (X86IDFN(IS_CONCAT3(is_,f,_cached)) != X86ID_IS_CACHED_UNDEF) ?           \
86         X86IDFN(IS_CONCAT3(is_,f,_cached)) : X86IDFN(IS_CONCAT3(is_,f,))()
87 
88 /*
89  * All the "_cached" varaibles are one of three values:
90  * 1) IS_X86ID_CACHED_UNDEF:    not initialized
91  * 2) false (0):                initialized and value is false
92  * 3) true (1):                 initialized and value is true
93  */
94 
95 /*
96  *  For Non-Windows based builds (Linux, OSX), the extern keyword
97  *  gives the proper attribute for the global variables is_<FEATURE>_cached.
98  *  But for Windows, we need to use MS' __declspec attribute.
99  *  When building x86id.c which defines those global variables, we define the
100  *  CPP object macro OBJ_WIN_X8664_IS_X86ID.
101  */
102 
103 #if     defined (TARGET_WIN_X8664) && defined(_DLL)
104 #   if      defined(OBJ_WIN_X8664_IS_X86ID)
105 #       define  DECLEXTERN  __declspec(dllexport)
106 #   else
107 #       define  DECLEXTERN  __declspec(dllimport)
108 #   endif
109 #else
110 #   define  DECLEXTERN  extern
111 #endif
112 
113 #ifdef __cplusplus
114 extern "C" {
115 #endif
116 DECLEXTERN  uint32_t    X86IDFN(hw_features);
117 DECLEXTERN	int X86IDFN(is_intel_cached);
118 DECLEXTERN	int X86IDFN(is_amd_cached);
119 DECLEXTERN	int X86IDFN(is_ip6_cached);
120 DECLEXTERN	int X86IDFN(is_sse_cached);
121 DECLEXTERN	int X86IDFN(is_sse2_cached);
122 DECLEXTERN	int X86IDFN(is_sse3_cached);
123 DECLEXTERN	int X86IDFN(is_ssse3_cached);
124 DECLEXTERN	int X86IDFN(is_sse4a_cached);
125 DECLEXTERN	int X86IDFN(is_sse41_cached);
126 DECLEXTERN	int X86IDFN(is_sse42_cached);
127 DECLEXTERN	int X86IDFN(is_aes_cached);
128 DECLEXTERN	int X86IDFN(is_avx_cached);
129 DECLEXTERN	int X86IDFN(is_avx2_cached);
130 DECLEXTERN	int X86IDFN(is_avx512_cached);
131 DECLEXTERN	int X86IDFN(is_avx512f_cached);
132 DECLEXTERN	int X86IDFN(is_avx512vl_cached);
133 DECLEXTERN	int X86IDFN(is_fma_cached);
134 DECLEXTERN	int X86IDFN(is_fma4_cached);
135 DECLEXTERN	int X86IDFN(is_ht_cached);
136 DECLEXTERN	int X86IDFN(is_athlon_cached);
137 DECLEXTERN	int X86IDFN(is_hammer_cached);
138 DECLEXTERN	int X86IDFN(is_gh_cached);
139 DECLEXTERN	int X86IDFN(is_gh_a_cached);
140 DECLEXTERN	int X86IDFN(is_gh_b_cached);
141 DECLEXTERN	int X86IDFN(is_shanghai_cached);
142 DECLEXTERN	int X86IDFN(is_istanbul_cached);
143 DECLEXTERN	int X86IDFN(is_bulldozer_cached);
144 DECLEXTERN	int X86IDFN(is_piledriver_cached);
145 DECLEXTERN	int X86IDFN(is_k7_cached);
146 DECLEXTERN	int X86IDFN(is_ia32e_cached);
147 DECLEXTERN	int X86IDFN(is_p4_cached);
148 DECLEXTERN	int X86IDFN(is_knl_cached);
149 DECLEXTERN	int X86IDFN(is_x86_64_cached);
150 DECLEXTERN	int X86IDFN(is_f16c_cached);
151 
152 DECLEXTERN	int X86IDFN(is_intel)(void);	/* return 0 or 1 */
153 DECLEXTERN	int X86IDFN(is_amd)(void);	/* return 0 or 1 */
154 DECLEXTERN	int X86IDFN(is_ip6)(void);	/* return 0 or 1 */
155 DECLEXTERN	int X86IDFN(is_sse)(void);	/* return 0 or 1 */
156 DECLEXTERN	int X86IDFN(is_sse2)(void);	/* return 0 or 1 */
157 DECLEXTERN	int X86IDFN(is_sse3)(void);	/* return 0 or 1 */
158 DECLEXTERN	int X86IDFN(is_ssse3)(void);	/* return 0 or 1 */
159 DECLEXTERN	int X86IDFN(is_sse4a)(void);	/* return 0 or 1 */
160 DECLEXTERN	int X86IDFN(is_sse41)(void);	/* return 0 or 1 */
161 DECLEXTERN	int X86IDFN(is_sse42)(void);	/* return 0 or 1 */
162 DECLEXTERN	int X86IDFN(is_aes)(void);	/* return 0 or 1 */
163 DECLEXTERN	int X86IDFN(is_avx)(void);	/* return 0 or 1 */
164 DECLEXTERN	int X86IDFN(is_avx2)(void);	/* return 0 or 1 */
165 DECLEXTERN	int X86IDFN(is_avx512)(void);	/* return 0 or 1 */
166 DECLEXTERN	int X86IDFN(is_avx512f)(void);	/* return 0 or 1 */
167 DECLEXTERN	int X86IDFN(is_avx512vl)(void);	/* return 0 or 1 */
168 DECLEXTERN	int X86IDFN(is_fma)(void);	/* return 0 or 1 */
169 DECLEXTERN	int X86IDFN(is_fma4)(void);	/* return 0 or 1 */
170 DECLEXTERN	int X86IDFN(is_ht)(void);	/* return 0 .. logical processor count */
171 DECLEXTERN	int X86IDFN(is_athlon)(void);	/* return 0 or 1 */
172 DECLEXTERN	int X86IDFN(is_hammer)(void);	/* return 0 or 1 */
173 DECLEXTERN	int X86IDFN(is_gh)(void);	/* return 0 or 1 */
174 DECLEXTERN	int X86IDFN(is_gh_a)(void);	/* return 0 or 1 */
175 DECLEXTERN	int X86IDFN(is_gh_b)(void);	/* return 0 or 1 */
176 DECLEXTERN	int X86IDFN(is_shanghai)(void);	/* return 0 or 1 */
177 DECLEXTERN	int X86IDFN(is_istanbul)(void);	/* return 0 or 1 */
178 DECLEXTERN	int X86IDFN(is_bulldozer)(void);	/* return 0 or 1 */
179 DECLEXTERN	int X86IDFN(is_piledriver)(void);/* return 0 or 1 */
180 DECLEXTERN	int X86IDFN(is_k7)(void);	/* return 0 or 1 */
181 DECLEXTERN	int X86IDFN(is_ia32e)(void);	/* return 0 or 1 */
182 DECLEXTERN	int X86IDFN(is_p4)(void);	/* return 0 or 1 */
183 DECLEXTERN	int X86IDFN(is_knl)(void);	/* return 0 or 1 */
184 DECLEXTERN	int X86IDFN(is_x86_64)(void);	/* return 0 or 1 */
185 DECLEXTERN	int X86IDFN(get_cachesize)(void);
186 DECLEXTERN	int X86IDFN(is_f16c)(void);
187 DECLEXTERN	char *X86IDFN(get_processor_name)(void);
188 
189 extern int X86IDFN(get_cores)(void);
190 
191 #ifdef __cplusplus
192 }
193 #endif
194 
195 #endif          /* ! defined(__ASSEMBLER__) */
196 
197 #endif /* X86ID_H_ */
198 /* vim: set ts=4 expandtab: */
199