1 /* 2 * Copyright (c) 2006-2019, NVIDIA CORPORATION. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 * 16 */ 17 18 /** 19 \file 20 \brief Structures to describe the x86 CPU type and CPU features 21 */ 22 23 #ifndef X86_H_ 24 #define X86_H_ 25 26 #define MACH_GENERIC 1 27 #define MACH_INTEL 2 28 #define MACH_INTEL_PENTIUM4 3 29 #define MACH_INTEL_CORE2 4 30 #define MACH_INTEL_PENRYN 5 31 #define MACH_INTEL_NEHALEM 6 32 #define MACH_INTEL_SANDYBRIDGE 7 33 #define MACH_INTEL_HASWELL 8 34 #define MACH_INTEL_KNIGHTS_LANDING 9 35 #define MACH_INTEL_SKYLAKE 10 36 #define MACH_INTEL_LARRABEE 11 /* delete this when possible! */ 37 #define MACH_AMD 12 38 #define MACH_AMD_ATHLON 13 39 #define MACH_AMD_ATHLON_XP 14 40 #define MACH_AMD_HAMMER 15 41 #define MACH_AMD_GH 16 42 #define MACH_AMD_SHANGHAI 17 43 #define MACH_AMD_ISTANBUL 18 44 #define MACH_AMD_BULLDOZER 19 45 #define MACH_AMD_PILEDRIVER 20 46 #define MACH_AMD_ZEN 21 47 48 #define MACH_NUMBER 22 49 50 #define FEATURE_SCALAR_SSE 0 /* -Mscalarsse flag */ 51 #define FEATURE_SSE 1 /* supports SSE */ 52 #define FEATURE_SSE2 2 /* supports SSE2 */ 53 #define FEATURE_SSE3 3 /* supports SSE3 */ 54 #define FEATURE_SSE41 4 /* " SSE4.1 (>= Intel penryn, AMD bulldozer) */ 55 #define FEATURE_SSE42 5 /* " SSE4.2 (>= Intel nehalem, AMD bulldozer)*/ 56 #define FEATURE_SSE4A 6 /* " SSE4a (>= AMD barcelona) */ 57 #define FEATURE_SSE5 7 /* " SSE5 (AMD) */ 58 #define FEATURE_MNI 8 /* " Meron New Instructions, SSSE3 Intel */ 59 #define FEATURE_DAZ 9 /* -Mdaz flag, denorm as zero */ 60 #define FEATURE_PREFER_MOVLPD 10 /* prefer movlpd over movsd, used in CG */ 61 #define FEATURE_USE_INC 11 /* prefer incl over addl $1 */ 62 #define FEATURE_USE_MOVAPD 12 /* use movapd instead of movsd */ 63 #define FEATURE_MERGE_DEPENDENT 13 /* different CG decisions */ 64 #define FEATURE_SCALAR_NONTEMP 14 /* in llvect */ 65 #define FEATURE_SSEIMAX 15 /* use SSE code sequence for IMAX/IMIN */ 66 #define FEATURE_MISALIGNEDSSE 16 /* allow misaligned SSE ops from memory */ 67 #define FEATURE_LD_MOVUPD 17 /* use movupd for unaligned packed loads */ 68 #define FEATURE_ST_MOVUPD 18 /* use movupd for unaligned packed stores */ 69 #define FEATURE_UNROLL_16 19 /* extra unrolling, unroll factor is 16 \ 70 * (initially for GH) */ 71 #define FEATURE_DOUBLE_UNROLL 20 /* double unroll factor (initially for GH) */ 72 #define FEATURE_PEEL_SHUFFLE 21 /* allow peel-shuffle */ 73 #define FEATURE_PREFETCHNTA 22 /* allow prefetchnta */ 74 #define FEATURE_PDSHUF 23 /* prefer PDSHUF over UNPCK[LH]PD etc. */ 75 #define FEATURE_SSEPMAX 24 /* use PMAX/PMIN for IMAX/IMIN in SSE (SSE4.1) */ 76 #define FEATURE_GHLIBS 25 /* use _gh library routines */ 77 #define FEATURE_SSEMISALN 26 /* allow misaligned SSE memory operands */ 78 #define FEATURE_ABM 27 /* allow advanced bit manipulation */ 79 #define FEATURE_AVX 28 /* supports AVX - Advanced Vector Extensions */ 80 #define FEATURE_LRBNI 29 /* supports LRBni - Larrabee new instructions */ 81 #define FEATURE_FMA4 30 /* supports 4-operand FMA */ 82 #define FEATURE_XOP 31 /* supports eXtended OPerations */ 83 #define FEATURE_FMA3 32 /* supports 3-operand FMA */ 84 #define FEATURE_MULTI_ACCUM 33 /* multiple accumulators for reductions */ 85 #define FEATURE_SIMD128 34 /* Use SIMD:128, even with AVX */ 86 #define FEATURE_NOPREFETCH 35 /* Disable prefetches */ 87 #define FEATURE_ALIGNLOOP4 36 /* Align loops at 4 */ 88 #define FEATURE_ALIGNLOOP8 37 /* Align loops at 8 */ 89 #define FEATURE_ALIGNLOOP16 38 /* Align loops at 16 */ 90 #define FEATURE_ALIGNLOOP32 39 /* Align loops at 32 */ 91 #define FEATURE_ALIGNJMP8 40 /* Align after jump at 8 */ 92 #define FEATURE_ALIGNJMP16 41 /* after after jump at 16 */ 93 #define FEATURE_LD_VMOVUPD 42 /* use vmovupd for 32-byte unaligned loads */ 94 #define FEATURE_ST_VMOVUPD 43 /* use vmovupd for 32-byte unaligned stores */ 95 #define FEATURE_AVX2 44 /* supports AVX2 */ 96 #define FEATURE_AVX512F 45 /* supports AVX-512F */ 97 #define FEATURE_AVX512VL 46 /* supports AVX-512VL */ 98 99 #define FEATURE_NUMBER 47 100 101 /***** ARM -- recycle FEATURE_ x64/x86 manifests *****/ 102 #if defined(TARGET_LLVM_ARM) 103 #define FEATURE_SCALAR_NEON FEATURE_SCALAR_SSE 104 #define FEATURE_NEON FEATURE_SSE 105 #define FEATURE_FMA FEATURE_FMA3 106 #endif 107 108 /***** POWER -- recycle FEATURE_ x64/x86 manifests *****/ 109 #if defined(TARGET_LLVM_POWER) 110 #define FEATURE_SCALAR_VSX FEATURE_SCALAR_SSE 111 #define FEATURE_VSX FEATURE_SSE 112 #define FEATURE_FMA FEATURE_FMA3 113 #endif 114 115 typedef struct { 116 int tpval; 117 int type[MACH_NUMBER]; 118 int feature[FEATURE_NUMBER]; 119 long cachesize; 120 } X86TYPE; 121 122 extern X86TYPE mach; 123 124 /* These TP values should be sorted so the most powerful have the 125 * largest values; these are used to sort the TP values, so we 126 * generate the code for the most aggressive processors first. The 127 * lowest allowable value is 1. 128 */ 129 #define TP_PY 1 130 #define TP_PX 2 131 #define TP_P5 3 132 #define TP_ATHLON 4 133 #define TP_P6 5 134 #define TP_ATHLON_XP 6 135 #define TP_PIII 7 136 #define TP_K8 8 137 #define TP_P7 9 138 #define TP_K8E 10 139 #define TP_PIV 11 140 #define TP_GH 12 141 #define TP_CORE2 13 142 #define TP_PENRYN 14 143 #define TP_SHANGHAI 15 144 #define TP_ISTANBUL 16 145 #define TP_NEHALEM 17 146 #define TP_BULLDOZER 18 147 #define TP_SANDYBRIDGE 19 148 #define TP_IVYBRIDGE 20 149 #define TP_HASWELL 21 150 #define TP_LARRABEE 22 /* delete this when possible! */ 151 #define TP_PILEDRIVER 23 152 #define TP_ZEN 24 153 #define TP_KNIGHTS_LANDING 25 154 #define TP_SKYLAKE 26 155 156 #define TEST_MACH(M) (mach.type[M]) 157 #define TEST_MACH2(M1, M2) (mach.type[M1] || mach.type[M2]) 158 #define TEST_MACHN(M, N) (mach.type[M]) 159 #define TEST_FEATURE(M) (mach.feature[M]) 160 #define TEST_FEATURE2(M1, M2) (mach.feature[M1] || mach.feature[M2]) 161 #define TEST_FEATUREN(M, N) (mach.feature[M]) 162 #define TEST_CACHE (mach.cachesize) 163 #define TEST_ACCEL mach.accel 164 165 /** 166 \brief return \c true if any accelerator is not the host device 167 */ 168 bool any_gpu_device(void); 169 170 /** 171 \brief Is this accel type specified on the command line? 172 */ 173 bool have_mach_accel(int v); 174 175 /** 176 \brief ... 177 */ 178 char *sxaccel(int a); 179 180 /** 181 \brief ... 182 */ 183 char *sxacceltype(int a); 184 185 /** 186 \brief ... 187 */ 188 char *sxaccfeature(int a); 189 190 /** 191 \brief ... 192 */ 193 char *sxfeature(int f); 194 195 /** 196 \brief ... 197 */ 198 char *sxtp(int tp); 199 200 /** 201 \brief ... 202 */ 203 char *sxtype(int m); 204 205 /** 206 \brief return \c ACC_TYPE value from accelerator value 207 */ 208 int acctype(int accval); 209 210 /** 211 \brief return ACC_ value given the accelerator name 212 */ 213 int accvalue(char *accname); 214 215 /** 216 \brief ... 217 */ 218 int machvalue(char *thistpname); 219 220 /** 221 \brief make sure the first accelerator is ACC_MULTICORE or ACC_HOST 222 223 If there are more than one TP value also fill in flg.acctypeindex. 224 */ 225 void check_no_acc(void); 226 227 /** 228 \brief ... 229 */ 230 void check_tp(bool skip); 231 232 /** 233 \brief ... 234 */ 235 void copy_mach_intersect(X86TYPE *mach); 236 237 /** 238 \brief ... 239 */ 240 void dumpmach(void); 241 242 /** 243 \brief ... 244 */ 245 void _dumpmach(X86TYPE *mach); 246 247 /** 248 \brief ... 249 */ 250 void init_mach_intersect(void); 251 252 /** 253 \brief ... 254 */ 255 void intersect_mach_intersect(X86TYPE *mach); 256 257 /** 258 \brief ... 259 */ 260 void set_acc(char *accname); 261 262 /** 263 \brief set mach.accel 264 */ 265 void set_mach_accel(X86TYPE *mach, int accelvalue); 266 267 /** 268 \brief ... 269 */ 270 void set_mach(X86TYPE *mach, int machtype); 271 272 /** 273 \brief ... 274 */ 275 void set_tp(char *thistpname); 276 277 #endif /* X86_H_ */ 278