1// Copyright 2017 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// +build 386 amd64 amd64p32 6 7package cpu 8 9import ( 10 "fmt" 11 "strings" 12) 13 14const CacheLineSize = 64 15 16// cpuid is implemented in cpu_x86.s. 17func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32) 18 19// xgetbv with ecx = 0 is implemented in cpu_x86.s. 20func xgetbv() (eax, edx uint32) 21 22const ( 23 // edx bits 24 cpuid_SSE2 = 1 << 26 25 26 // ecx bits 27 cpuid_SSE3 = 1 << 0 28 cpuid_PCLMULQDQ = 1 << 1 29 cpuid_SSSE3 = 1 << 9 30 cpuid_FMA = 1 << 12 31 cpuid_SSE41 = 1 << 19 32 cpuid_SSE42 = 1 << 20 33 cpuid_POPCNT = 1 << 23 34 cpuid_AES = 1 << 25 35 cpuid_OSXSAVE = 1 << 27 36 cpuid_AVX = 1 << 28 37 38 // ebx bits 39 cpuid_BMI1 = 1 << 3 40 cpuid_AVX2 = 1 << 5 41 cpuid_BMI2 = 1 << 8 42 cpuid_ERMS = 1 << 9 43 cpuid_ADX = 1 << 19 44 cpuid_AVX512F = 1 << 16 45 cpuid_AVX512DQ = 1 << 17 46 cpuid_AVX512BW = 1 << 30 47 cpuid_AVX512VL = 1 << 31 48 49 // edx bits 50 cpuid_Invariant_TSC = 1 << 8 51) 52 53func doinit() { 54 options = []option{ 55 {"adx", &X86.HasADX}, 56 {"aes", &X86.HasAES}, 57 {"avx", &X86.HasAVX}, 58 {"avx2", &X86.HasAVX2}, 59 {"bmi1", &X86.HasBMI1}, 60 {"bmi2", &X86.HasBMI2}, 61 {"erms", &X86.HasERMS}, 62 {"fma", &X86.HasFMA}, 63 {"pclmulqdq", &X86.HasPCLMULQDQ}, 64 {"popcnt", &X86.HasPOPCNT}, 65 {"sse3", &X86.HasSSE3}, 66 {"sse41", &X86.HasSSE41}, 67 {"sse42", &X86.HasSSE42}, 68 {"ssse3", &X86.HasSSSE3}, 69 {"avx512f", &X86.HasAVX512F}, 70 {"avx512dq", &X86.HasAVX512DQ}, 71 {"avx512bw", &X86.HasAVX512BW}, 72 {"avx512vl", &X86.HasAVX512VL}, 73 {"invariant_tsc", &X86.HasInvariantTSC}, 74 75 // sse2 set as last element so it can easily be removed again. See code below. 76 {"sse2", &X86.HasSSE2}, 77 } 78 79 // Remove sse2 from options on amd64(p32) because SSE2 is a mandatory feature for these GOARCHs. 80 if GOARCH == "amd64" || GOARCH == "amd64p32" { 81 options = options[:len(options)-1] 82 } 83 84 maxID, _, _, _ := cpuid(0, 0) 85 86 if maxID < 1 { 87 return 88 } 89 90 _, _, ecx1, edx1 := cpuid(1, 0) 91 X86.HasSSE2 = isSet(edx1, cpuid_SSE2) 92 93 X86.HasSSE3 = isSet(ecx1, cpuid_SSE3) 94 X86.HasPCLMULQDQ = isSet(ecx1, cpuid_PCLMULQDQ) 95 X86.HasSSSE3 = isSet(ecx1, cpuid_SSSE3) 96 X86.HasFMA = isSet(ecx1, cpuid_FMA) 97 X86.HasSSE41 = isSet(ecx1, cpuid_SSE41) 98 X86.HasSSE42 = isSet(ecx1, cpuid_SSE42) 99 X86.HasPOPCNT = isSet(ecx1, cpuid_POPCNT) 100 X86.HasAES = isSet(ecx1, cpuid_AES) 101 X86.HasOSXSAVE = isSet(ecx1, cpuid_OSXSAVE) 102 103 osSupportsAVX := false 104 osSupportsAVX512 := false 105 // For XGETBV, OSXSAVE bit is required and sufficient. 106 if X86.HasOSXSAVE { 107 eax, _ := xgetbv() 108 // Check if XMM and YMM registers have OS support. 109 osSupportsAVX = isSet(eax, 1<<1) && isSet(eax, 1<<2) 110 // Check is ZMM registers have OS support. 111 osSupportsAVX512 = isSet(eax>>5, 7) && isSet(eax>>1, 3) 112 } 113 114 X86.HasAVX = isSet(ecx1, cpuid_AVX) && osSupportsAVX 115 116 if maxID < 7 { 117 return 118 } 119 120 _, ebx7, _, _ := cpuid(7, 0) 121 X86.HasBMI1 = isSet(ebx7, cpuid_BMI1) 122 X86.HasAVX2 = isSet(ebx7, cpuid_AVX2) && osSupportsAVX 123 X86.HasAVX512F = isSet(ebx7, cpuid_AVX512F) && osSupportsAVX512 124 X86.HasAVX512DQ = isSet(ebx7, cpuid_AVX512DQ) && osSupportsAVX512 125 X86.HasAVX512BW = isSet(ebx7, cpuid_AVX512BW) && osSupportsAVX512 126 X86.HasAVX512VL = isSet(ebx7, cpuid_AVX512VL) && osSupportsAVX512 127 X86.HasBMI2 = isSet(ebx7, cpuid_BMI2) 128 X86.HasERMS = isSet(ebx7, cpuid_ERMS) 129 X86.HasADX = isSet(ebx7, cpuid_ADX) 130 131 X86.Cache = getCacheSize() 132 133 X86.HasInvariantTSC = hasInvariantTSC() 134 135 X86.Family, X86.Model = getFamilyModel() 136 137 X86.Signature = makeSignature(X86.Family, X86.Model) 138 139 X86.Name = getName() 140 141 X86.TSCFrequency = getNativeTSCFrequency(X86.Name, X86.Signature) 142} 143 144func isSet(hwc uint32, value uint32) bool { 145 return hwc&value != 0 146} 147 148func hasInvariantTSC() bool { 149 if maxExtendedFunction() < 0x80000007 { 150 return false 151 } 152 _, _, _, edx := cpuid(0x80000007, 0) 153 return isSet(edx, cpuid_Invariant_TSC) 154} 155 156func getName() string { 157 if maxExtendedFunction() >= 0x80000004 { 158 v := make([]uint32, 0, 48) 159 for i := uint32(0); i < 3; i++ { 160 a, b, c, d := cpuid(0x80000002+i, 0) 161 v = append(v, a, b, c, d) 162 } 163 return strings.Trim(string(valAsString(v...)), " ") 164 } 165 return "unknown" 166} 167 168// getNativeTSCFrequency gets TSC frequency from CPUID, 169// only supports Intel (Skylake or later microarchitecture) & key information is from Intel manual & kernel codes 170// (especially this commit: https://github.com/torvalds/linux/commit/604dc9170f2435d27da5039a3efd757dceadc684). 171func getNativeTSCFrequency(name, sign string) uint64 { 172 173 if vendorID() != Intel { 174 return 0 175 } 176 177 if maxFunctionID() < 0x15 { 178 return 0 179 } 180 181 // ApolloLake, GeminiLake, CannonLake (and presumably all new chipsets 182 // from this point) report the crystal frequency directly via CPUID.0x15. 183 // That's definitive data that we can rely upon. 184 eax, ebx, ecx, _ := cpuid(0x15, 0) 185 186 // If ebx is 0, the TSC/”core crystal clock” ratio is not enumerated. 187 // We won't provide TSC frequency detection in this situation. 188 if eax == 0 || ebx == 0 { 189 return 0 190 } 191 192 // Skylake, Kabylake and all variants of those two chipsets report a 193 // crystal frequency of zero. 194 if ecx == 0 { // Crystal clock frequency is not enumerated. 195 ecx = getCrystalClockFrequency(sign) 196 } 197 198 // TSC frequency = “core crystal clock frequency” * EBX/EAX. 199 return uint64(ecx) * (uint64(ebx) / uint64(eax)) 200} 201 202// Copied from: CPUID Signature values of DisplayFamily and DisplayModel, 203// in Intel® 64 and IA-32 Architectures Software Developer’s Manual 204// Volume 4: Model-Specific Registers 205// & https://github.com/torvalds/linux/blob/master/arch/x86/include/asm/intel-family.h 206const ( 207 IntelFam6SkylakeL = "06_4EH" 208 IntelFam6Skylake = "06_5EH" 209 IntelFam6SkylakeX = "06_55H" 210 IntelFam6KabylakeL = "06_8EH" 211 IntelFam6Kabylake = "06_9EH" 212) 213 214// getCrystalClockFrequency gets crystal clock frequency 215// for Intel processors in which CPUID.15H.EBX[31:0] ÷ CPUID.0x15.EAX[31:0] is enumerated 216// but CPUID.15H.ECX is not enumerated using this function to get nominal core crystal clock frequency. 217// 218// Actually these crystal clock frequencies provided by Intel hardcoded tables are not so accurate in some cases, 219// e.g. SkyLake server CPU may have issue (All SKX subject the crystal to an EMI reduction circuit that 220//reduces its actual frequency by (approximately) -0.25%): 221// see https://lore.kernel.org/lkml/ff6dcea166e8ff8f2f6a03c17beab2cb436aa779.1513920414.git.len.brown@intel.com/ 222// for more details. 223// With this report, I set a coefficient (0.9975) for IntelFam6SkyLakeX. 224// 225// Unlike the kernel way (mentioned in https://github.com/torvalds/linux/commit/604dc9170f2435d27da5039a3efd757dceadc684), 226// I prefer the Intel hardcoded tables, 227// because after some testing (comparing with wall clock, see https://github.com/templexxx/tsc/tsc_test.go for more details), 228// I found hardcoded tables are more accurate. 229func getCrystalClockFrequency(sign string) uint32 { 230 231 if maxFunctionID() < 0x16 { 232 return 0 233 } 234 235 switch sign { 236 case IntelFam6SkylakeL: 237 return 24 * 1000 * 1000 238 case IntelFam6Skylake: 239 return 24 * 1000 * 1000 240 case IntelFam6SkylakeX: 241 return 25 * 1000 * 1000 * 0.9975 242 case IntelFam6KabylakeL: 243 return 24 * 1000 * 1000 244 case IntelFam6Kabylake: 245 return 24 * 1000 * 1000 246 } 247 248 return 0 249} 250 251func getFamilyModel() (uint32, uint32) { 252 if maxFunctionID() < 0x1 { 253 return 0, 0 254 } 255 eax, _, _, _ := cpuid(1, 0) 256 family := (eax >> 8) & 0xf 257 displayFamily := family 258 if family == 0xf { 259 displayFamily = ((eax >> 20) & 0xff) + family 260 } 261 model := (eax >> 4) & 0xf 262 displayModel := model 263 if family == 0x6 || family == 0xf { 264 displayModel = ((eax >> 12) & 0xf0) + model 265 } 266 return displayFamily, displayModel 267} 268 269// signature format: XX_XXH 270func makeSignature(family, model uint32) string { 271 signature := strings.ToUpper(fmt.Sprintf("0%x_0%xH", family, model)) 272 ss := strings.Split(signature, "_") 273 for i, s := range ss { 274 // Maybe insert too more `0`, drop it. 275 if len(s) > 2 { 276 s = s[1:] 277 ss[i] = s 278 } 279 } 280 return strings.Join(ss, "_") 281} 282 283// getCacheSize is from 284// https://github.com/klauspost/cpuid/blob/5a626f7029c910cc8329dae5405ee4f65034bce5/cpuid.go#L723 285func getCacheSize() Cache { 286 c := Cache{ 287 L1I: -1, 288 L1D: -1, 289 L2: -1, 290 L3: -1, 291 } 292 293 vendor := vendorID() 294 switch vendor { 295 case Intel: 296 if maxFunctionID() < 4 { 297 return c 298 } 299 for i := uint32(0); ; i++ { 300 eax, ebx, ecx, _ := cpuid(4, i) 301 cacheType := eax & 15 302 if cacheType == 0 { 303 break 304 } 305 cacheLevel := (eax >> 5) & 7 306 coherency := int(ebx&0xfff) + 1 307 partitions := int((ebx>>12)&0x3ff) + 1 308 associativity := int((ebx>>22)&0x3ff) + 1 309 sets := int(ecx) + 1 310 size := associativity * partitions * coherency * sets 311 switch cacheLevel { 312 case 1: 313 if cacheType == 1 { 314 // 1 = Data Cache 315 c.L1D = size 316 } else if cacheType == 2 { 317 // 2 = Instruction Cache 318 c.L1I = size 319 } else { 320 if c.L1D < 0 { 321 c.L1I = size 322 } 323 if c.L1I < 0 { 324 c.L1I = size 325 } 326 } 327 case 2: 328 c.L2 = size 329 case 3: 330 c.L3 = size 331 } 332 } 333 case AMD, Hygon: 334 // Untested. 335 if maxExtendedFunction() < 0x80000005 { 336 return c 337 } 338 _, _, ecx, edx := cpuid(0x80000005, 0) 339 c.L1D = int(((ecx >> 24) & 0xFF) * 1024) 340 c.L1I = int(((edx >> 24) & 0xFF) * 1024) 341 342 if maxExtendedFunction() < 0x80000006 { 343 return c 344 } 345 _, _, ecx, _ = cpuid(0x80000006, 0) 346 c.L2 = int(((ecx >> 16) & 0xFFFF) * 1024) 347 } 348 349 return c 350} 351 352func maxFunctionID() uint32 { 353 a, _, _, _ := cpuid(0, 0) 354 return a 355} 356 357func maxExtendedFunction() uint32 { 358 eax, _, _, _ := cpuid(0x80000000, 0) 359 return eax 360} 361 362const ( 363 Other = iota 364 Intel 365 AMD 366 VIA 367 Transmeta 368 NSC 369 KVM // Kernel-based Virtual Machine 370 MSVM // Microsoft Hyper-V or Windows Virtual PC 371 VMware 372 XenHVM 373 Bhyve 374 Hygon 375) 376 377// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID 378var vendorMapping = map[string]int{ 379 "AMDisbetter!": AMD, 380 "AuthenticAMD": AMD, 381 "CentaurHauls": VIA, 382 "GenuineIntel": Intel, 383 "TransmetaCPU": Transmeta, 384 "GenuineTMx86": Transmeta, 385 "Geode by NSC": NSC, 386 "VIA VIA VIA ": VIA, 387 "KVMKVMKVMKVM": KVM, 388 "Microsoft Hv": MSVM, 389 "VMwareVMware": VMware, 390 "XenVMMXenVMM": XenHVM, 391 "bhyve bhyve ": Bhyve, 392 "HygonGenuine": Hygon, 393} 394 395func vendorID() int { 396 _, b, c, d := cpuid(0, 0) 397 v := valAsString(b, d, c) 398 vend, ok := vendorMapping[string(v)] 399 if !ok { 400 return Other 401 } 402 return vend 403} 404 405func valAsString(values ...uint32) []byte { 406 r := make([]byte, 4*len(values)) 407 for i, v := range values { 408 dst := r[i*4:] 409 dst[0] = byte(v & 0xff) 410 dst[1] = byte((v >> 8) & 0xff) 411 dst[2] = byte((v >> 16) & 0xff) 412 dst[3] = byte((v >> 24) & 0xff) 413 switch { 414 case dst[0] == 0: 415 return r[:i*4] 416 case dst[1] == 0: 417 return r[:i*4+1] 418 case dst[2] == 0: 419 return r[:i*4+2] 420 case dst[3] == 0: 421 return r[:i*4+3] 422 } 423 } 424 return r 425} 426