1 /*-
2 * Copyright (c) KATO Takenori, 1997, 1998.
3 * Copyright (c) 2008 The DragonFly Project.
4 *
5 * All rights reserved. Unpublished rights reserved under the copyright
6 * laws of Japan.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer as
14 * the first lines of this file unmodified.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #include "opt_cpu.h"
32
33 #include <sys/param.h>
34 #include <sys/kernel.h>
35 #include <sys/systm.h>
36 #include <sys/sysctl.h>
37
38 #include <machine/clock.h>
39 #include <machine/cputypes.h>
40 #include <machine/md_var.h>
41 #include <machine/specialreg.h>
42 #include <machine/smp.h>
43
44 #include <vm/vm.h>
45 #include <vm/pmap.h>
46
47 static int tsc_ignore_cpuid = 0;
48 TUNABLE_INT("hw.tsc_ignore_cpuid", &tsc_ignore_cpuid);
49
50 static int hw_instruction_sse;
51 SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD,
52 &hw_instruction_sse, 0, "SIMD/MMX2 instructions available in CPU");
53
54 int cpu_type; /* XXX CPU_CLAWHAMMER */
55 u_int cpu_feature; /* Feature flags */
56 u_int cpu_feature2; /* Feature flags */
57 u_int amd_feature; /* AMD feature flags */
58 u_int amd_feature2; /* AMD feature flags */
59 u_int via_feature_rng; /* VIA RNG features */
60 u_int via_feature_xcrypt; /* VIA ACE features */
61 u_int cpu_high; /* Highest arg to CPUID */
62 u_int cpu_exthigh; /* Highest arg to extended CPUID */
63 u_int cpu_id; /* Stepping ID */
64 u_int cpu_procinfo; /* HyperThreading Info / Brand Index / CLFUSH */
65 u_int cpu_procinfo2; /* Multicore info */
66 char cpu_vendor[20]; /* CPU Origin code */
67 u_int cpu_vendor_id; /* CPU vendor ID */
68 u_int cpu_fxsr; /* SSE enabled */
69 u_int cpu_xsave; /* Using XSAVE */
70 u_int cpu_clflush_line_size = 32; /* Default CLFLUSH line size */
71 u_int cpu_stdext_feature;
72 u_int cpu_stdext_feature2;
73 u_int cpu_stdext_feature3;
74 u_long cpu_ia32_arch_caps;
75 u_int cpu_thermal_feature;
76 u_int cpu_mwait_feature;
77 u_int cpu_mwait_extemu;
78
79 /*
80 * -1: automatic (enable on h/w, disable on VMs)
81 * 0: disable
82 * 1: enable (where available)
83 */
84 static int hw_clflush_enable = -1;
85
86 SYSCTL_INT(_hw, OID_AUTO, clflush_enable, CTLFLAG_RD, &hw_clflush_enable, 0,
87 "");
88
89 SYSCTL_UINT(_hw, OID_AUTO, via_feature_rng, CTLFLAG_RD,
90 &via_feature_rng, 0, "VIA C3/C7 RNG feature available in CPU");
91 SYSCTL_UINT(_hw, OID_AUTO, via_feature_xcrypt, CTLFLAG_RD,
92 &via_feature_xcrypt, 0, "VIA C3/C7 xcrypt feature available in CPU");
93
94 /*
95 * Initialize special VIA C3/C7 features
96 */
97 static void
init_via(void)98 init_via(void)
99 {
100 u_int regs[4], val;
101 u_int64_t msreg;
102
103 do_cpuid(0xc0000000, regs);
104 val = regs[0];
105 if (val >= 0xc0000001) {
106 do_cpuid(0xc0000001, regs);
107 val = regs[3];
108 } else
109 val = 0;
110
111 /* Enable RNG if present and disabled */
112 if (val & VIA_CPUID_HAS_RNG) {
113 if (!(val & VIA_CPUID_DO_RNG)) {
114 msreg = rdmsr(0x110B);
115 msreg |= 0x40;
116 wrmsr(0x110B, msreg);
117 }
118 via_feature_rng = VIA_HAS_RNG;
119 }
120 /* Enable AES engine if present and disabled */
121 if (val & VIA_CPUID_HAS_ACE) {
122 if (!(val & VIA_CPUID_DO_ACE)) {
123 msreg = rdmsr(0x1107);
124 msreg |= (0x01 << 28);
125 wrmsr(0x1107, msreg);
126 }
127 via_feature_xcrypt |= VIA_HAS_AES;
128 }
129 /* Enable ACE2 engine if present and disabled */
130 if (val & VIA_CPUID_HAS_ACE2) {
131 if (!(val & VIA_CPUID_DO_ACE2)) {
132 msreg = rdmsr(0x1107);
133 msreg |= (0x01 << 28);
134 wrmsr(0x1107, msreg);
135 }
136 via_feature_xcrypt |= VIA_HAS_AESCTR;
137 }
138 /* Enable SHA engine if present and disabled */
139 if (val & VIA_CPUID_HAS_PHE) {
140 if (!(val & VIA_CPUID_DO_PHE)) {
141 msreg = rdmsr(0x1107);
142 msreg |= (0x01 << 28/**/);
143 wrmsr(0x1107, msreg);
144 }
145 via_feature_xcrypt |= VIA_HAS_SHA;
146 }
147 /* Enable MM engine if present and disabled */
148 if (val & VIA_CPUID_HAS_PMM) {
149 if (!(val & VIA_CPUID_DO_PMM)) {
150 msreg = rdmsr(0x1107);
151 msreg |= (0x01 << 28/**/);
152 wrmsr(0x1107, msreg);
153 }
154 via_feature_xcrypt |= VIA_HAS_MM;
155 }
156 }
157
158 static enum vmm_guest_type
detect_vmm(void)159 detect_vmm(void)
160 {
161 enum vmm_guest_type guest;
162 char vendor[16];
163
164 /*
165 * [RFC] CPUID usage for interaction between Hypervisors and Linux.
166 * http://lkml.org/lkml/2008/10/1/246
167 *
168 * KB1009458: Mechanisms to determine if software is running in
169 * a VMware virtual machine
170 * http://kb.vmware.com/kb/1009458
171 */
172 if (cpu_feature2 & CPUID2_VMM) {
173 u_int regs[4];
174
175 do_cpuid(0x40000000, regs);
176 ((u_int *)&vendor)[0] = regs[1];
177 ((u_int *)&vendor)[1] = regs[2];
178 ((u_int *)&vendor)[2] = regs[3];
179 vendor[12] = '\0';
180 if (regs[0] >= 0x40000000) {
181 memcpy(vmm_vendor, vendor, 13);
182 if (strcmp(vmm_vendor, "VMwareVMware") == 0)
183 return VMM_GUEST_VMWARE;
184 else if (strcmp(vmm_vendor, "Microsoft Hv") == 0)
185 return VMM_GUEST_HYPERV;
186 else if (strcmp(vmm_vendor, "KVMKVMKVM") == 0)
187 return VMM_GUEST_KVM;
188 else if (strcmp(vmm_vendor, "___ NVMM ___") == 0)
189 return VMM_GUEST_NVMM;
190 } else if (regs[0] == 0) {
191 /* Also detect old KVM versions with regs[0] == 0 */
192 if (strcmp(vendor, "KVMKVMKVM") == 0) {
193 memcpy(vmm_vendor, vendor, 13);
194 return VMM_GUEST_KVM;
195 }
196 }
197 }
198
199 guest = detect_virtual();
200 if (guest == VMM_GUEST_NONE && (cpu_feature2 & CPUID2_VMM))
201 guest = VMM_GUEST_UNKNOWN;
202 return guest;
203 }
204
205 /*
206 * Initialize CPU control registers
207 */
208 void
initializecpu(int cpu)209 initializecpu(int cpu)
210 {
211 uint64_t msr;
212
213 /*
214 * Check for FXSR and SSE support and enable if available
215 */
216 if ((cpu_feature & CPUID_SSE) && (cpu_feature & CPUID_FXSR)) {
217 load_cr4(rcr4() | CR4_OSFXSR | CR4_OSXMMEXCPT);
218 cpu_fxsr = hw_instruction_sse = 1;
219 }
220
221 if (cpu == 0) {
222 /* Check if we are running in a hypervisor. */
223 vmm_guest = detect_vmm();
224 }
225
226 #if !defined(CPU_DISABLE_AVX)
227 /* Use XSAVE if supported */
228 if (cpu_feature2 & CPUID2_XSAVE) {
229 load_cr4(rcr4() | CR4_OSXSAVE);
230
231 /* Adjust size of savefpu in npx.h before adding to mask.*/
232 npx_xcr0_mask = CPU_XFEATURE_X87 | CPU_XFEATURE_SSE;
233 if (cpu_feature2 & CPUID2_AVX)
234 npx_xcr0_mask |= CPU_XFEATURE_YMM;
235
236 load_xcr(0, npx_xcr0_mask);
237 cpu_xsave = 1;
238 }
239 #endif
240
241 if (cpu_vendor_id == CPU_VENDOR_AMD) {
242 switch((cpu_id & 0xFF0000)) {
243 case 0x100000:
244 case 0x120000:
245 /*
246 * Errata 721 is the cpu bug found by your's truly
247 * (Matthew Dillon). It is a bug where a sequence
248 * of 5 or more popq's + a retq, under involved
249 * deep recursion circumstances, can cause the %rsp
250 * to not be properly updated, almost always
251 * resulting in a seg-fault soon after.
252 *
253 * Do not install the workaround when we are running
254 * in a virtual machine.
255 */
256 if (vmm_guest)
257 break;
258
259 msr = rdmsr(MSR_AMD_DE_CFG);
260 if ((msr & 1) == 0) {
261 if (cpu == 0)
262 kprintf("Errata 721 workaround "
263 "installed\n");
264 msr |= 1;
265 wrmsr(MSR_AMD_DE_CFG, msr);
266 }
267 break;
268 }
269
270 /*
271 * BIOS may fail to set InitApicIdCpuIdLo to 1 as it should
272 * per BKDG. So, do it here or otherwise some tools could
273 * be confused by Initial Local APIC ID reported with
274 * CPUID Function 1 in EBX.
275 */
276 if (CPUID_TO_FAMILY(cpu_id) == 0x10) {
277 if ((cpu_feature2 & CPUID2_VMM) == 0) {
278 msr = rdmsr(0xc001001f);
279 msr |= (uint64_t)1 << 54;
280 wrmsr(0xc001001f, msr);
281 }
282 }
283
284 /*
285 * BIOS may configure Family 10h processors to convert
286 * WC+ cache type to CD. That can hurt performance of
287 * guest VMs using nested paging.
288 *
289 * The relevant MSR bit is not documented in the BKDG,
290 * the fix is borrowed from Linux.
291 */
292 if (CPUID_TO_FAMILY(cpu_id) == 0x10) {
293 if ((cpu_feature2 & CPUID2_VMM) == 0) {
294 msr = rdmsr(0xc001102a);
295 msr &= ~((uint64_t)1 << 24);
296 wrmsr(0xc001102a, msr);
297 }
298 }
299
300 /*
301 * Work around Erratum 793: Specific Combination of Writes
302 * to Write Combined Memory Types and Locked Instructions
303 * May Cause Core Hang. See Revision Guide for AMD Family
304 * 16h Models 00h-0Fh Processors, revision 3.04 or later,
305 * publication 51810.
306 */
307 if (CPUID_TO_FAMILY(cpu_id) == 0x16 &&
308 CPUID_TO_MODEL(cpu_id) <= 0xf) {
309 if ((cpu_feature2 & CPUID2_VMM) == 0) {
310 msr = rdmsr(0xc0011020);
311 msr |= (uint64_t)1 << 15;
312 wrmsr(0xc0011020, msr);
313 }
314 }
315 }
316
317 if ((amd_feature & AMDID_NX) != 0) {
318 msr = rdmsr(MSR_EFER) | EFER_NXE;
319 wrmsr(MSR_EFER, msr);
320 #if 0 /* JG */
321 pg_nx = PG_NX;
322 #endif
323 }
324 if (cpu_vendor_id == CPU_VENDOR_CENTAUR &&
325 CPUID_TO_FAMILY(cpu_id) == 0x6 &&
326 CPUID_TO_MODEL(cpu_id) >= 0xf)
327 init_via();
328
329 TUNABLE_INT_FETCH("hw.clflush_enable", &hw_clflush_enable);
330 if (cpu_feature & CPUID_CLFSH) {
331 cpu_clflush_line_size = ((cpu_procinfo >> 8) & 0xff) * 8;
332
333 if (hw_clflush_enable == 0 ||
334 ((hw_clflush_enable == -1) && vmm_guest))
335 cpu_feature &= ~CPUID_CLFSH;
336 }
337
338 /* Set TSC_AUX register to the cpuid, for using rdtscp in userland. */
339 if ((amd_feature & AMDID_RDTSCP) != 0)
340 wrmsr(MSR_TSC_AUX, cpu);
341 }
342
343 /*
344 * This method should be at least as good as calibrating the TSC based on the
345 * HPET timer, since the HPET runs with the core crystal clock apparently.
346 */
347 static void
detect_tsc_frequency(void)348 detect_tsc_frequency(void)
349 {
350 int cpu_family, cpu_model;
351 u_int regs[4];
352 uint64_t crystal = 0;
353
354 cpu_model = CPUID_TO_MODEL(cpu_id);
355 cpu_family = CPUID_TO_FAMILY(cpu_id);
356
357 if (cpu_vendor_id != CPU_VENDOR_INTEL)
358 return;
359
360 if (cpu_high < 0x15)
361 return;
362
363 do_cpuid(0x15, regs);
364 if (regs[0] == 0 || regs[1] == 0)
365 return;
366
367 if (regs[2] == 0) {
368 /* For some families the SDM contains the core crystal clock. */
369 if (cpu_family == 0x6) {
370 switch (cpu_model) {
371 case 0x55: /* Xeon Scalable */
372 crystal = 25000000; /* 25 MHz */
373 break;
374 /* Skylake */
375 case 0x4e:
376 case 0x5e:
377 /* Kabylake/Coffeelake */
378 case 0x8e:
379 case 0x9e:
380 crystal = 24000000; /* 24 MHz */
381 break;
382 case 0x5c: /* Goldmont Atom */
383 crystal = 19200000; /* 19.2 MHz */
384 break;
385 default:
386 break;
387 }
388 }
389 } else {
390 crystal = regs[2];
391 }
392
393 if (crystal == 0)
394 return;
395
396 kprintf("TSC crystal clock: %ju Hz, TSC/crystal ratio: %u/%u\n",
397 crystal, regs[1], regs[0]);
398
399 if (tsc_ignore_cpuid == 0) {
400 tsc_frequency = (crystal * regs[1]) / regs[0];
401 i8254_cputimer_disable = 1;
402 }
403 }
404
405 TIMECOUNTER_INIT(cpuid_tsc_frequency, detect_tsc_frequency);
406