1 /*
2  * Copyright (c) 2006-2019, NVIDIA CORPORATION.  All rights reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */
17 
18 /**
19    \file
20    \brief Structures to describe the x86 CPU type and CPU features
21  */
22 
23 #ifndef X86_H_
24 #define X86_H_
25 
26 #define MACH_GENERIC 1
27 #define MACH_INTEL 2
28 #define MACH_INTEL_PENTIUM4 3
29 #define MACH_INTEL_CORE2 4
30 #define MACH_INTEL_PENRYN 5
31 #define MACH_INTEL_NEHALEM 6
32 #define MACH_INTEL_SANDYBRIDGE 7
33 #define MACH_INTEL_HASWELL 8
34 #define MACH_INTEL_KNIGHTS_LANDING 9
35 #define MACH_INTEL_SKYLAKE 10
36 #define MACH_INTEL_LARRABEE 11 /* delete this when possible! */
37 #define MACH_AMD 12
38 #define MACH_AMD_ATHLON 13
39 #define MACH_AMD_ATHLON_XP 14
40 #define MACH_AMD_HAMMER 15
41 #define MACH_AMD_GH 16
42 #define MACH_AMD_SHANGHAI 17
43 #define MACH_AMD_ISTANBUL 18
44 #define MACH_AMD_BULLDOZER 19
45 #define MACH_AMD_PILEDRIVER 20
46 #define MACH_AMD_ZEN 21
47 
48 #define MACH_NUMBER 22
49 
50 #define FEATURE_SCALAR_SSE 0 /* -Mscalarsse flag */
51 #define FEATURE_SSE 1        /* supports SSE */
52 #define FEATURE_SSE2 2       /* supports SSE2 */
53 #define FEATURE_SSE3 3       /* supports SSE3 */
54 #define FEATURE_SSE41 4      /*   "   SSE4.1 (>= Intel penryn, AMD bulldozer) */
55 #define FEATURE_SSE42 5      /*   "   SSE4.2 (>= Intel nehalem, AMD bulldozer)*/
56 #define FEATURE_SSE4A 6      /*   "   SSE4a  (>= AMD barcelona) */
57 #define FEATURE_SSE5 7       /*   "   SSE5 (AMD) */
58 #define FEATURE_MNI 8        /*   "   Meron New Instructions, SSSE3 Intel */
59 #define FEATURE_DAZ 9              /* -Mdaz flag, denorm as zero */
60 #define FEATURE_PREFER_MOVLPD 10   /* prefer movlpd over movsd, used in CG */
61 #define FEATURE_USE_INC 11         /* prefer incl over addl $1 */
62 #define FEATURE_USE_MOVAPD 12      /* use movapd instead of movsd */
63 #define FEATURE_MERGE_DEPENDENT 13 /* different CG decisions */
64 #define FEATURE_SCALAR_NONTEMP 14  /* in llvect */
65 #define FEATURE_SSEIMAX 15         /* use SSE code sequence for IMAX/IMIN */
66 #define FEATURE_MISALIGNEDSSE 16   /* allow misaligned SSE ops from memory */
67 #define FEATURE_LD_MOVUPD 17     /* use movupd for unaligned packed loads */
68 #define FEATURE_ST_MOVUPD 18     /* use movupd for unaligned packed stores */
69 #define FEATURE_UNROLL_16 19     /* extra unrolling, unroll factor is 16 \
70                                   *   (initially for GH) */
71 #define FEATURE_DOUBLE_UNROLL 20 /* double unroll factor (initially for GH) */
72 #define FEATURE_PEEL_SHUFFLE 21  /* allow peel-shuffle */
73 #define FEATURE_PREFETCHNTA 22   /* allow prefetchnta */
74 #define FEATURE_PDSHUF 23        /* prefer PDSHUF over UNPCK[LH]PD etc. */
75 #define FEATURE_SSEPMAX 24     /* use PMAX/PMIN for IMAX/IMIN in SSE (SSE4.1) */
76 #define FEATURE_GHLIBS 25      /* use _gh library routines */
77 #define FEATURE_SSEMISALN 26   /* allow misaligned SSE memory operands */
78 #define FEATURE_ABM 27         /* allow advanced bit manipulation */
79 #define FEATURE_AVX 28         /* supports AVX - Advanced Vector Extensions */
80 #define FEATURE_LRBNI 29       /* supports LRBni - Larrabee new instructions */
81 #define FEATURE_FMA4 30        /* supports 4-operand FMA */
82 #define FEATURE_XOP 31         /* supports eXtended OPerations */
83 #define FEATURE_FMA3 32        /* supports 3-operand FMA */
84 #define FEATURE_MULTI_ACCUM 33 /* multiple accumulators for reductions */
85 #define FEATURE_SIMD128 34     /* Use SIMD:128, even with AVX */
86 #define FEATURE_NOPREFETCH 35  /* Disable prefetches */
87 #define FEATURE_ALIGNLOOP4 36  /* Align loops at 4 */
88 #define FEATURE_ALIGNLOOP8 37  /* Align loops at 8 */
89 #define FEATURE_ALIGNLOOP16 38 /* Align loops at 16 */
90 #define FEATURE_ALIGNLOOP32 39 /* Align loops at 32 */
91 #define FEATURE_ALIGNJMP8 40   /* Align after jump at 8 */
92 #define FEATURE_ALIGNJMP16 41  /* after after jump at 16 */
93 #define FEATURE_LD_VMOVUPD 42  /* use vmovupd for 32-byte unaligned loads */
94 #define FEATURE_ST_VMOVUPD 43  /* use vmovupd for 32-byte unaligned stores */
95 #define FEATURE_AVX2 44        /* supports AVX2 */
96 #define FEATURE_AVX512F 45     /* supports AVX-512F */
97 #define FEATURE_AVX512VL 46    /* supports AVX-512VL */
98 
99 #define FEATURE_NUMBER 47
100 
101 /*****  ARM -- recycle FEATURE_ x64/x86 manifests  *****/
102 #if defined(TARGET_LLVM_ARM)
103 #define FEATURE_SCALAR_NEON FEATURE_SCALAR_SSE
104 #define FEATURE_NEON FEATURE_SSE
105 #define FEATURE_FMA FEATURE_FMA3
106 #endif
107 
108 /*****  POWER -- recycle FEATURE_ x64/x86 manifests  *****/
109 #if defined(TARGET_LLVM_POWER)
110 #define FEATURE_SCALAR_VSX FEATURE_SCALAR_SSE
111 #define FEATURE_VSX FEATURE_SSE
112 #define FEATURE_FMA FEATURE_FMA3
113 #endif
114 
115 typedef struct {
116   int tpval;
117   int type[MACH_NUMBER];
118   int feature[FEATURE_NUMBER];
119   long cachesize;
120 } X86TYPE;
121 
122 extern X86TYPE mach;
123 
124 /* These TP values should be sorted so the most powerful have the
125  * largest values; these are used to sort the TP values, so we
126  * generate the code for the most aggressive processors first.  The
127  * lowest allowable value is 1.
128  */
129 #define TP_PY 1
130 #define TP_PX 2
131 #define TP_P5 3
132 #define TP_ATHLON 4
133 #define TP_P6 5
134 #define TP_ATHLON_XP 6
135 #define TP_PIII 7
136 #define TP_K8 8
137 #define TP_P7 9
138 #define TP_K8E 10
139 #define TP_PIV 11
140 #define TP_GH 12
141 #define TP_CORE2 13
142 #define TP_PENRYN 14
143 #define TP_SHANGHAI 15
144 #define TP_ISTANBUL 16
145 #define TP_NEHALEM 17
146 #define TP_BULLDOZER 18
147 #define TP_SANDYBRIDGE 19
148 #define TP_IVYBRIDGE 20
149 #define TP_HASWELL 21
150 #define TP_LARRABEE 22 /* delete this when possible! */
151 #define TP_PILEDRIVER 23
152 #define TP_ZEN 24
153 #define TP_KNIGHTS_LANDING 25
154 #define TP_SKYLAKE 26
155 
156 #define TEST_MACH(M) (mach.type[M])
157 #define TEST_MACH2(M1, M2) (mach.type[M1] || mach.type[M2])
158 #define TEST_MACHN(M, N) (mach.type[M])
159 #define TEST_FEATURE(M) (mach.feature[M])
160 #define TEST_FEATURE2(M1, M2) (mach.feature[M1] || mach.feature[M2])
161 #define TEST_FEATUREN(M, N) (mach.feature[M])
162 #define TEST_CACHE (mach.cachesize)
163 #define TEST_ACCEL mach.accel
164 
165 /**
166    \brief return \c true if any accelerator is not the host device
167  */
168 bool any_gpu_device(void);
169 
170 /**
171    \brief Is this accel type specified on the command line?
172  */
173 bool have_mach_accel(int v);
174 
175 /**
176    \brief ...
177  */
178 char *sxaccel(int a);
179 
180 /**
181    \brief ...
182  */
183 char *sxacceltype(int a);
184 
185 /**
186    \brief ...
187  */
188 char *sxaccfeature(int a);
189 
190 /**
191    \brief ...
192  */
193 char *sxfeature(int f);
194 
195 /**
196    \brief ...
197  */
198 char *sxtp(int tp);
199 
200 /**
201    \brief ...
202  */
203 char *sxtype(int m);
204 
205 /**
206    \brief return \c ACC_TYPE value from accelerator value
207  */
208 int acctype(int accval);
209 
210 /**
211    \brief return ACC_ value given the accelerator name
212  */
213 int accvalue(char *accname);
214 
215 /**
216    \brief ...
217  */
218 int machvalue(char *thistpname);
219 
220 /**
221    \brief make sure the first accelerator is ACC_MULTICORE or ACC_HOST
222 
223    If there are more than one TP value also fill in flg.acctypeindex.
224  */
225 void check_no_acc(void);
226 
227 /**
228    \brief ...
229  */
230 void check_tp(bool skip);
231 
232 /**
233    \brief ...
234  */
235 void copy_mach_intersect(X86TYPE *mach);
236 
237 /**
238    \brief ...
239  */
240 void dumpmach(void);
241 
242 /**
243    \brief ...
244  */
245 void _dumpmach(X86TYPE *mach);
246 
247 /**
248    \brief ...
249  */
250 void init_mach_intersect(void);
251 
252 /**
253    \brief ...
254  */
255 void intersect_mach_intersect(X86TYPE *mach);
256 
257 /**
258    \brief ...
259  */
260 void set_acc(char *accname);
261 
262 /**
263    \brief set mach.accel
264  */
265 void set_mach_accel(X86TYPE *mach, int accelvalue);
266 
267 /**
268    \brief ...
269  */
270 void set_mach(X86TYPE *mach, int machtype);
271 
272 /**
273    \brief ...
274  */
275 void set_tp(char *thistpname);
276 
277 #endif /* X86_H_ */
278