1 /*- 2 * Copyright (c) KATO Takenori, 1997, 1998. 3 * Copyright (c) 2008 The DragonFly Project. 4 * 5 * All rights reserved. Unpublished rights reserved under the copyright 6 * laws of Japan. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer as 14 * the first lines of this file unmodified. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include "opt_cpu.h" 32 33 #include <sys/param.h> 34 #include <sys/kernel.h> 35 #include <sys/systm.h> 36 #include <sys/sysctl.h> 37 38 #include <machine/clock.h> 39 #include <machine/cputypes.h> 40 #include <machine/md_var.h> 41 #include <machine/specialreg.h> 42 #include <machine/smp.h> 43 44 #include <vm/vm.h> 45 #include <vm/pmap.h> 46 47 static int tsc_ignore_cpuid = 0; 48 TUNABLE_INT("hw.tsc_ignore_cpuid", &tsc_ignore_cpuid); 49 50 static int hw_instruction_sse; 51 SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD, 52 &hw_instruction_sse, 0, "SIMD/MMX2 instructions available in CPU"); 53 54 int cpu_type; /* XXX CPU_CLAWHAMMER */ 55 u_int cpu_feature; /* Feature flags */ 56 u_int cpu_feature2; /* Feature flags */ 57 u_int amd_feature; /* AMD feature flags */ 58 u_int amd_feature2; /* AMD feature flags */ 59 u_int via_feature_rng; /* VIA RNG features */ 60 u_int via_feature_xcrypt; /* VIA ACE features */ 61 u_int cpu_high; /* Highest arg to CPUID */ 62 u_int cpu_exthigh; /* Highest arg to extended CPUID */ 63 u_int cpu_id; /* Stepping ID */ 64 u_int cpu_procinfo; /* HyperThreading Info / Brand Index / CLFUSH */ 65 u_int cpu_procinfo2; /* Multicore info */ 66 char cpu_vendor[20]; /* CPU Origin code */ 67 u_int cpu_vendor_id; /* CPU vendor ID */ 68 u_int cpu_fxsr; /* SSE enabled */ 69 u_int cpu_xsave; /* Using XSAVE */ 70 u_int cpu_clflush_line_size = 32; /* Default CLFLUSH line size */ 71 u_int cpu_stdext_feature; 72 u_int cpu_stdext_feature2; 73 u_int cpu_stdext_feature3; 74 u_long cpu_ia32_arch_caps; 75 u_int cpu_thermal_feature; 76 u_int cpu_mwait_feature; 77 u_int cpu_mwait_extemu; 78 79 /* 80 * -1: automatic (enable on h/w, disable on VMs) 81 * 0: disable 82 * 1: enable (where available) 83 */ 84 static int hw_clflush_enable = -1; 85 86 SYSCTL_INT(_hw, OID_AUTO, clflush_enable, CTLFLAG_RD, &hw_clflush_enable, 0, 87 ""); 88 89 SYSCTL_UINT(_hw, OID_AUTO, via_feature_rng, CTLFLAG_RD, 90 &via_feature_rng, 0, "VIA C3/C7 RNG feature available in CPU"); 91 SYSCTL_UINT(_hw, OID_AUTO, via_feature_xcrypt, CTLFLAG_RD, 92 &via_feature_xcrypt, 0, "VIA C3/C7 xcrypt feature available in CPU"); 93 94 /* 95 * Initialize special VIA C3/C7 features 96 */ 97 static void 98 init_via(void) 99 { 100 u_int regs[4], val; 101 u_int64_t msreg; 102 103 do_cpuid(0xc0000000, regs); 104 val = regs[0]; 105 if (val >= 0xc0000001) { 106 do_cpuid(0xc0000001, regs); 107 val = regs[3]; 108 } else 109 val = 0; 110 111 /* Enable RNG if present and disabled */ 112 if (val & VIA_CPUID_HAS_RNG) { 113 if (!(val & VIA_CPUID_DO_RNG)) { 114 msreg = rdmsr(0x110B); 115 msreg |= 0x40; 116 wrmsr(0x110B, msreg); 117 } 118 via_feature_rng = VIA_HAS_RNG; 119 } 120 /* Enable AES engine if present and disabled */ 121 if (val & VIA_CPUID_HAS_ACE) { 122 if (!(val & VIA_CPUID_DO_ACE)) { 123 msreg = rdmsr(0x1107); 124 msreg |= (0x01 << 28); 125 wrmsr(0x1107, msreg); 126 } 127 via_feature_xcrypt |= VIA_HAS_AES; 128 } 129 /* Enable ACE2 engine if present and disabled */ 130 if (val & VIA_CPUID_HAS_ACE2) { 131 if (!(val & VIA_CPUID_DO_ACE2)) { 132 msreg = rdmsr(0x1107); 133 msreg |= (0x01 << 28); 134 wrmsr(0x1107, msreg); 135 } 136 via_feature_xcrypt |= VIA_HAS_AESCTR; 137 } 138 /* Enable SHA engine if present and disabled */ 139 if (val & VIA_CPUID_HAS_PHE) { 140 if (!(val & VIA_CPUID_DO_PHE)) { 141 msreg = rdmsr(0x1107); 142 msreg |= (0x01 << 28/**/); 143 wrmsr(0x1107, msreg); 144 } 145 via_feature_xcrypt |= VIA_HAS_SHA; 146 } 147 /* Enable MM engine if present and disabled */ 148 if (val & VIA_CPUID_HAS_PMM) { 149 if (!(val & VIA_CPUID_DO_PMM)) { 150 msreg = rdmsr(0x1107); 151 msreg |= (0x01 << 28/**/); 152 wrmsr(0x1107, msreg); 153 } 154 via_feature_xcrypt |= VIA_HAS_MM; 155 } 156 } 157 158 static enum vmm_guest_type 159 detect_vmm(void) 160 { 161 enum vmm_guest_type guest; 162 char vendor[16]; 163 164 /* 165 * [RFC] CPUID usage for interaction between Hypervisors and Linux. 166 * http://lkml.org/lkml/2008/10/1/246 167 * 168 * KB1009458: Mechanisms to determine if software is running in 169 * a VMware virtual machine 170 * http://kb.vmware.com/kb/1009458 171 */ 172 if (cpu_feature2 & CPUID2_VMM) { 173 u_int regs[4]; 174 175 do_cpuid(0x40000000, regs); 176 ((u_int *)&vendor)[0] = regs[1]; 177 ((u_int *)&vendor)[1] = regs[2]; 178 ((u_int *)&vendor)[2] = regs[3]; 179 vendor[12] = '\0'; 180 if (regs[0] >= 0x40000000) { 181 memcpy(vmm_vendor, vendor, 13); 182 if (strcmp(vmm_vendor, "VMwareVMware") == 0) 183 return VMM_GUEST_VMWARE; 184 else if (strcmp(vmm_vendor, "Microsoft Hv") == 0) 185 return VMM_GUEST_HYPERV; 186 else if (strcmp(vmm_vendor, "KVMKVMKVM") == 0) 187 return VMM_GUEST_KVM; 188 else if (strcmp(vmm_vendor, "___ NVMM ___") == 0) 189 return VMM_GUEST_NVMM; 190 } else if (regs[0] == 0) { 191 /* Also detect old KVM versions with regs[0] == 0 */ 192 if (strcmp(vendor, "KVMKVMKVM") == 0) { 193 memcpy(vmm_vendor, vendor, 13); 194 return VMM_GUEST_KVM; 195 } 196 } 197 } 198 199 guest = detect_virtual(); 200 if (guest == VMM_GUEST_NONE && (cpu_feature2 & CPUID2_VMM)) 201 guest = VMM_GUEST_UNKNOWN; 202 return guest; 203 } 204 205 /* 206 * Initialize CPU control registers 207 */ 208 void 209 initializecpu(int cpu) 210 { 211 uint64_t msr; 212 213 /* 214 * Check for FXSR and SSE support and enable if available 215 */ 216 if ((cpu_feature & CPUID_SSE) && (cpu_feature & CPUID_FXSR)) { 217 load_cr4(rcr4() | CR4_OSFXSR | CR4_OSXMMEXCPT); 218 cpu_fxsr = hw_instruction_sse = 1; 219 } 220 221 if (cpu == 0) { 222 /* Check if we are running in a hypervisor. */ 223 vmm_guest = detect_vmm(); 224 } 225 226 #if !defined(CPU_DISABLE_AVX) 227 /* Use XSAVE if supported */ 228 if (cpu_feature2 & CPUID2_XSAVE) { 229 load_cr4(rcr4() | CR4_OSXSAVE); 230 231 /* Adjust size of savefpu in npx.h before adding to mask.*/ 232 npx_xcr0_mask = CPU_XFEATURE_X87 | CPU_XFEATURE_SSE; 233 if (cpu_feature2 & CPUID2_AVX) 234 npx_xcr0_mask |= CPU_XFEATURE_YMM; 235 236 load_xcr(0, npx_xcr0_mask); 237 cpu_xsave = 1; 238 } 239 #endif 240 241 if (cpu_vendor_id == CPU_VENDOR_AMD) { 242 switch((cpu_id & 0xFF0000)) { 243 case 0x100000: 244 case 0x120000: 245 /* 246 * Errata 721 is the cpu bug found by your's truly 247 * (Matthew Dillon). It is a bug where a sequence 248 * of 5 or more popq's + a retq, under involved 249 * deep recursion circumstances, can cause the %rsp 250 * to not be properly updated, almost always 251 * resulting in a seg-fault soon after. 252 * 253 * Do not install the workaround when we are running 254 * in a virtual machine. 255 */ 256 if (vmm_guest) 257 break; 258 259 msr = rdmsr(MSR_AMD_DE_CFG); 260 if ((msr & 1) == 0) { 261 if (cpu == 0) 262 kprintf("Errata 721 workaround " 263 "installed\n"); 264 msr |= 1; 265 wrmsr(MSR_AMD_DE_CFG, msr); 266 } 267 break; 268 } 269 270 /* 271 * BIOS may fail to set InitApicIdCpuIdLo to 1 as it should 272 * per BKDG. So, do it here or otherwise some tools could 273 * be confused by Initial Local APIC ID reported with 274 * CPUID Function 1 in EBX. 275 */ 276 if (CPUID_TO_FAMILY(cpu_id) == 0x10) { 277 if ((cpu_feature2 & CPUID2_VMM) == 0) { 278 msr = rdmsr(0xc001001f); 279 msr |= (uint64_t)1 << 54; 280 wrmsr(0xc001001f, msr); 281 } 282 } 283 284 /* 285 * BIOS may configure Family 10h processors to convert 286 * WC+ cache type to CD. That can hurt performance of 287 * guest VMs using nested paging. 288 * 289 * The relevant MSR bit is not documented in the BKDG, 290 * the fix is borrowed from Linux. 291 */ 292 if (CPUID_TO_FAMILY(cpu_id) == 0x10) { 293 if ((cpu_feature2 & CPUID2_VMM) == 0) { 294 msr = rdmsr(0xc001102a); 295 msr &= ~((uint64_t)1 << 24); 296 wrmsr(0xc001102a, msr); 297 } 298 } 299 300 /* 301 * Work around Erratum 793: Specific Combination of Writes 302 * to Write Combined Memory Types and Locked Instructions 303 * May Cause Core Hang. See Revision Guide for AMD Family 304 * 16h Models 00h-0Fh Processors, revision 3.04 or later, 305 * publication 51810. 306 */ 307 if (CPUID_TO_FAMILY(cpu_id) == 0x16 && 308 CPUID_TO_MODEL(cpu_id) <= 0xf) { 309 if ((cpu_feature2 & CPUID2_VMM) == 0) { 310 msr = rdmsr(0xc0011020); 311 msr |= (uint64_t)1 << 15; 312 wrmsr(0xc0011020, msr); 313 } 314 } 315 } 316 317 if ((amd_feature & AMDID_NX) != 0) { 318 msr = rdmsr(MSR_EFER) | EFER_NXE; 319 wrmsr(MSR_EFER, msr); 320 #if 0 /* JG */ 321 pg_nx = PG_NX; 322 #endif 323 } 324 if (cpu_vendor_id == CPU_VENDOR_CENTAUR && 325 CPUID_TO_FAMILY(cpu_id) == 0x6 && 326 CPUID_TO_MODEL(cpu_id) >= 0xf) 327 init_via(); 328 329 TUNABLE_INT_FETCH("hw.clflush_enable", &hw_clflush_enable); 330 if (cpu_feature & CPUID_CLFSH) { 331 cpu_clflush_line_size = ((cpu_procinfo >> 8) & 0xff) * 8; 332 333 if (hw_clflush_enable == 0 || 334 ((hw_clflush_enable == -1) && vmm_guest)) 335 cpu_feature &= ~CPUID_CLFSH; 336 } 337 338 /* Set TSC_AUX register to the cpuid, for using rdtscp in userland. */ 339 if ((amd_feature & AMDID_RDTSCP) != 0) 340 wrmsr(MSR_TSC_AUX, cpu); 341 } 342 343 /* 344 * This method should be at least as good as calibrating the TSC based on the 345 * HPET timer, since the HPET runs with the core crystal clock apparently. 346 */ 347 static void 348 detect_tsc_frequency(void) 349 { 350 int cpu_family, cpu_model; 351 u_int regs[4]; 352 uint64_t crystal = 0; 353 354 cpu_model = CPUID_TO_MODEL(cpu_id); 355 cpu_family = CPUID_TO_FAMILY(cpu_id); 356 357 if (cpu_vendor_id != CPU_VENDOR_INTEL) 358 return; 359 360 if (cpu_high < 0x15) 361 return; 362 363 do_cpuid(0x15, regs); 364 if (regs[0] == 0 || regs[1] == 0) 365 return; 366 367 if (regs[2] == 0) { 368 /* For some families the SDM contains the core crystal clock. */ 369 if (cpu_family == 0x6) { 370 switch (cpu_model) { 371 case 0x55: /* Xeon Scalable */ 372 crystal = 25000000; /* 25 MHz */ 373 break; 374 /* Skylake */ 375 case 0x4e: 376 case 0x5e: 377 /* Kabylake/Coffeelake */ 378 case 0x8e: 379 case 0x9e: 380 crystal = 24000000; /* 24 MHz */ 381 break; 382 case 0x5c: /* Goldmont Atom */ 383 crystal = 19200000; /* 19.2 MHz */ 384 break; 385 default: 386 break; 387 } 388 } 389 } else { 390 crystal = regs[2]; 391 } 392 393 if (crystal == 0) 394 return; 395 396 kprintf("TSC crystal clock: %ju Hz, TSC/crystal ratio: %u/%u\n", 397 crystal, regs[1], regs[0]); 398 399 if (tsc_ignore_cpuid == 0) { 400 tsc_frequency = (crystal * regs[1]) / regs[0]; 401 i8254_cputimer_disable = 1; 402 } 403 } 404 405 TIMECOUNTER_INIT(cpuid_tsc_frequency, detect_tsc_frequency); 406