xref: /dragonfly/sys/platform/pc64/x86_64/initcpu.c (revision 1c9138ce)
1 /*-
2  * Copyright (c) KATO Takenori, 1997, 1998.
3  * Copyright (c) 2008 The DragonFly Project.
4  *
5  * All rights reserved.  Unpublished rights reserved under the copyright
6  * laws of Japan.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer as
14  *    the first lines of this file unmodified.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #include "opt_cpu.h"
32 
33 #include <sys/param.h>
34 #include <sys/kernel.h>
35 #include <sys/systm.h>
36 #include <sys/sysctl.h>
37 
38 #include <machine/cputypes.h>
39 #include <machine/md_var.h>
40 #include <machine/specialreg.h>
41 #include <machine/smp.h>
42 
43 #include <vm/vm.h>
44 #include <vm/pmap.h>
45 
46 extern int i8254_cputimer_disable;
47 
48 static int tsc_ignore_cpuid = 0;
49 TUNABLE_INT("hw.tsc_ignore_cpuid", &tsc_ignore_cpuid);
50 
51 static int	hw_instruction_sse;
52 SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD,
53     &hw_instruction_sse, 0, "SIMD/MMX2 instructions available in CPU");
54 
55 int	cpu_type;		/* XXX CPU_CLAWHAMMER */
56 u_int	cpu_feature;		/* Feature flags */
57 u_int	cpu_feature2;		/* Feature flags */
58 u_int	amd_feature;		/* AMD feature flags */
59 u_int	amd_feature2;		/* AMD feature flags */
60 u_int	via_feature_rng;	/* VIA RNG features */
61 u_int	via_feature_xcrypt;	/* VIA ACE features */
62 u_int	cpu_high;		/* Highest arg to CPUID */
63 u_int	cpu_exthigh;		/* Highest arg to extended CPUID */
64 u_int	cpu_id;			/* Stepping ID */
65 u_int	cpu_procinfo;		/* HyperThreading Info / Brand Index / CLFUSH */
66 u_int	cpu_procinfo2;		/* Multicore info */
67 char	cpu_vendor[20];		/* CPU Origin code */
68 u_int	cpu_vendor_id;		/* CPU vendor ID */
69 u_int	cpu_fxsr;		/* SSE enabled */
70 u_int	cpu_xsave;		/* AVX enabled by OS*/
71 u_int	cpu_mxcsr_mask;		/* Valid bits in mxcsr */
72 u_int	cpu_clflush_line_size = 32;	/* Default CLFLUSH line size */
73 u_int	cpu_stdext_feature;
74 u_int	cpu_stdext_feature2;
75 u_int	cpu_stdext_feature3;
76 u_long	cpu_ia32_arch_caps;
77 u_int	cpu_thermal_feature;
78 u_int	cpu_mwait_feature;
79 u_int	cpu_mwait_extemu;
80 
81 /*
82  * -1: automatic (enable on h/w, disable on VMs)
83  * 0: disable
84  * 1: enable (where available)
85  */
86 static int hw_clflush_enable = -1;
87 
88 SYSCTL_INT(_hw, OID_AUTO, clflush_enable, CTLFLAG_RD, &hw_clflush_enable, 0,
89 	   "");
90 
91 SYSCTL_UINT(_hw, OID_AUTO, via_feature_rng, CTLFLAG_RD,
92 	&via_feature_rng, 0, "VIA C3/C7 RNG feature available in CPU");
93 SYSCTL_UINT(_hw, OID_AUTO, via_feature_xcrypt, CTLFLAG_RD,
94 	&via_feature_xcrypt, 0, "VIA C3/C7 xcrypt feature available in CPU");
95 
96 /*
97  * Initialize special VIA C3/C7 features
98  */
99 static void
100 init_via(void)
101 {
102 	u_int regs[4], val;
103 	u_int64_t msreg;
104 
105 	do_cpuid(0xc0000000, regs);
106 	val = regs[0];
107 	if (val >= 0xc0000001) {
108 		do_cpuid(0xc0000001, regs);
109 		val = regs[3];
110 	} else
111 		val = 0;
112 
113 	/* Enable RNG if present and disabled */
114 	if (val & VIA_CPUID_HAS_RNG) {
115 		if (!(val & VIA_CPUID_DO_RNG)) {
116 			msreg = rdmsr(0x110B);
117 			msreg |= 0x40;
118 			wrmsr(0x110B, msreg);
119 		}
120 		via_feature_rng = VIA_HAS_RNG;
121 	}
122 	/* Enable AES engine if present and disabled */
123 	if (val & VIA_CPUID_HAS_ACE) {
124 		if (!(val & VIA_CPUID_DO_ACE)) {
125 			msreg = rdmsr(0x1107);
126 			msreg |= (0x01 << 28);
127 			wrmsr(0x1107, msreg);
128 		}
129 		via_feature_xcrypt |= VIA_HAS_AES;
130 	}
131 	/* Enable ACE2 engine if present and disabled */
132 	if (val & VIA_CPUID_HAS_ACE2) {
133 		if (!(val & VIA_CPUID_DO_ACE2)) {
134 			msreg = rdmsr(0x1107);
135 			msreg |= (0x01 << 28);
136 			wrmsr(0x1107, msreg);
137 		}
138 		via_feature_xcrypt |= VIA_HAS_AESCTR;
139 	}
140 	/* Enable SHA engine if present and disabled */
141 	if (val & VIA_CPUID_HAS_PHE) {
142 		if (!(val & VIA_CPUID_DO_PHE)) {
143 			msreg = rdmsr(0x1107);
144 			msreg |= (0x01 << 28/**/);
145 			wrmsr(0x1107, msreg);
146 		}
147 		via_feature_xcrypt |= VIA_HAS_SHA;
148 	}
149 	/* Enable MM engine if present and disabled */
150 	if (val & VIA_CPUID_HAS_PMM) {
151 		if (!(val & VIA_CPUID_DO_PMM)) {
152 			msreg = rdmsr(0x1107);
153 			msreg |= (0x01 << 28/**/);
154 			wrmsr(0x1107, msreg);
155 		}
156 		via_feature_xcrypt |= VIA_HAS_MM;
157 	}
158 }
159 
160 static enum vmm_guest_type
161 detect_vmm(void)
162 {
163 	enum vmm_guest_type guest;
164 	char vendor[16];
165 
166 	/*
167 	 * [RFC] CPUID usage for interaction between Hypervisors and Linux.
168 	 * http://lkml.org/lkml/2008/10/1/246
169 	 *
170 	 * KB1009458: Mechanisms to determine if software is running in
171 	 * a VMware virtual machine
172 	 * http://kb.vmware.com/kb/1009458
173 	 */
174 	if (cpu_feature2 & CPUID2_VMM) {
175 		u_int regs[4];
176 
177 		do_cpuid(0x40000000, regs);
178 		((u_int *)&vendor)[0] = regs[1];
179 		((u_int *)&vendor)[1] = regs[2];
180 		((u_int *)&vendor)[2] = regs[3];
181 		vendor[12] = '\0';
182 		if (regs[0] >= 0x40000000) {
183 			memcpy(vmm_vendor, vendor, 13);
184 			if (strcmp(vmm_vendor, "VMwareVMware") == 0)
185 				return VMM_GUEST_VMWARE;
186 			else if (strcmp(vmm_vendor, "Microsoft Hv") == 0)
187 				return VMM_GUEST_HYPERV;
188 			else if (strcmp(vmm_vendor, "KVMKVMKVM") == 0)
189 				return VMM_GUEST_KVM;
190 		} else if (regs[0] == 0) {
191 			/* Also detect old KVM versions with regs[0] == 0 */
192 			if (strcmp(vendor, "KVMKVMKVM") == 0) {
193 				memcpy(vmm_vendor, vendor, 13);
194 				return VMM_GUEST_KVM;
195 			}
196 		}
197 	}
198 
199 	guest = detect_virtual();
200 	if (guest == VMM_GUEST_NONE && (cpu_feature2 & CPUID2_VMM))
201 		guest = VMM_GUEST_UNKNOWN;
202 	return guest;
203 }
204 
205 /*
206  * Initialize CPU control registers
207  */
208 void
209 initializecpu(int cpu)
210 {
211 	uint64_t msr;
212 
213 	/*
214 	 * Check for FXSR and SSE support and enable if available
215 	 */
216 	if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) {
217 		load_cr4(rcr4() | CR4_FXSR | CR4_XMM);
218 		cpu_fxsr = hw_instruction_sse = 1;
219 	}
220 
221 	if (cpu == 0) {
222 		/* Check if we are running in a hypervisor. */
223 		vmm_guest = detect_vmm();
224 	}
225 
226 #if !defined(CPU_DISABLE_AVX)
227 	/*Check for XSAVE and AVX support and enable if available.*/
228 	if ((cpu_feature2 & CPUID2_AVX) && (cpu_feature2 & CPUID2_XSAVE)
229 	     && (cpu_feature & CPUID_SSE)) {
230 		load_cr4(rcr4() | CR4_XSAVE);
231 
232 		/* Adjust size of savefpu in npx.h before adding to mask.*/
233 		xsetbv(0, CPU_XFEATURE_X87 | CPU_XFEATURE_SSE | CPU_XFEATURE_YMM, 0);
234 		cpu_xsave = 1;
235 	}
236 #endif
237 
238 	if (cpu_vendor_id == CPU_VENDOR_AMD) {
239 		switch((cpu_id & 0xFF0000)) {
240 		case 0x100000:
241 		case 0x120000:
242 			/*
243 			 * Errata 721 is the cpu bug found by your's truly
244 			 * (Matthew Dillon).  It is a bug where a sequence
245 			 * of 5 or more popq's + a retq, under involved
246 			 * deep recursion circumstances, can cause the %rsp
247 			 * to not be properly updated, almost always
248 			 * resulting in a seg-fault soon after.
249 			 *
250 			 * Do not install the workaround when we are running
251 			 * in a virtual machine.
252 			 */
253 			if (vmm_guest)
254 				break;
255 
256 			msr = rdmsr(MSR_AMD_DE_CFG);
257 			if ((msr & 1) == 0) {
258 				if (cpu == 0)
259 					kprintf("Errata 721 workaround "
260 						"installed\n");
261 				msr |= 1;
262 				wrmsr(MSR_AMD_DE_CFG, msr);
263 			}
264 			break;
265 		}
266 
267 		/*
268 		 * BIOS may fail to set InitApicIdCpuIdLo to 1 as it should
269 		 * per BKDG.  So, do it here or otherwise some tools could
270 		 * be confused by Initial Local APIC ID reported with
271 		 * CPUID Function 1 in EBX.
272 		 */
273 		if (CPUID_TO_FAMILY(cpu_id) == 0x10) {
274 			if ((cpu_feature2 & CPUID2_VMM) == 0) {
275 				msr = rdmsr(0xc001001f);
276 				msr |= (uint64_t)1 << 54;
277 				wrmsr(0xc001001f, msr);
278 			}
279 		}
280 
281 		/*
282 		 * BIOS may configure Family 10h processors to convert
283 		 * WC+ cache type to CD.  That can hurt performance of
284 		 * guest VMs using nested paging.
285 		 *
286 		 * The relevant MSR bit is not documented in the BKDG,
287 		 * the fix is borrowed from Linux.
288 		 */
289 		if (CPUID_TO_FAMILY(cpu_id) == 0x10) {
290 			if ((cpu_feature2 & CPUID2_VMM) == 0) {
291 				msr = rdmsr(0xc001102a);
292 				msr &= ~((uint64_t)1 << 24);
293 				wrmsr(0xc001102a, msr);
294 			}
295 		}
296 
297 		/*
298 		 * Work around Erratum 793: Specific Combination of Writes
299 		 * to Write Combined Memory Types and Locked Instructions
300 		 * May Cause Core Hang.  See Revision Guide for AMD Family
301 		 * 16h Models 00h-0Fh Processors, revision 3.04 or later,
302 		 * publication 51810.
303 		 */
304 		if (CPUID_TO_FAMILY(cpu_id) == 0x16 &&
305 		    CPUID_TO_MODEL(cpu_id) <= 0xf) {
306 			if ((cpu_feature2 & CPUID2_VMM) == 0) {
307 				msr = rdmsr(0xc0011020);
308 				msr |= (uint64_t)1 << 15;
309 				wrmsr(0xc0011020, msr);
310 			}
311 		}
312 	}
313 
314 	if ((amd_feature & AMDID_NX) != 0) {
315 		msr = rdmsr(MSR_EFER) | EFER_NXE;
316 		wrmsr(MSR_EFER, msr);
317 #if 0 /* JG */
318 		pg_nx = PG_NX;
319 #endif
320 	}
321 	if (cpu_vendor_id == CPU_VENDOR_CENTAUR &&
322 	    CPUID_TO_FAMILY(cpu_id) == 0x6 &&
323 	    CPUID_TO_MODEL(cpu_id) >= 0xf)
324 		init_via();
325 
326 	TUNABLE_INT_FETCH("hw.clflush_enable", &hw_clflush_enable);
327 	if (cpu_feature & CPUID_CLFSH) {
328 		cpu_clflush_line_size = ((cpu_procinfo >> 8) & 0xff) * 8;
329 
330 		if (hw_clflush_enable == 0 ||
331 		    ((hw_clflush_enable == -1) && vmm_guest))
332 			cpu_feature &= ~CPUID_CLFSH;
333 	}
334 
335 	/* Set TSC_AUX register to the cpuid, for using rdtscp in userland. */
336 	if ((amd_feature & AMDID_RDTSCP) != 0)
337 		wrmsr(MSR_TSCAUX, cpu);
338 }
339 
340 /*
341  * This method should be at least as good as calibrating the TSC based on the
342  * HPET timer, since the HPET runs with the core crystal clock apparently.
343  */
344 static void
345 detect_tsc_frequency(void)
346 {
347 	int cpu_family, cpu_model;
348 	u_int regs[4];
349 	uint64_t crystal = 0;
350 
351 	cpu_model = CPUID_TO_MODEL(cpu_id);
352 	cpu_family = CPUID_TO_FAMILY(cpu_id);
353 
354 	if (cpu_vendor_id != CPU_VENDOR_INTEL)
355 		return;
356 
357 	if (cpu_high < 0x15)
358 		return;
359 
360 	do_cpuid(0x15, regs);
361 	if (regs[0] == 0 || regs[1] == 0)
362 		return;
363 
364 	if (regs[2] == 0) {
365 		/* For some families the SDM contains the core crystal clock. */
366 		if (cpu_family == 0x6) {
367 			switch (cpu_model) {
368 			case 0x55:	/* Xeon Scalable */
369 				crystal = 25000000;	/* 25 MHz */
370 				break;
371 			/* Skylake */
372 			case 0x4e:
373 			case 0x5e:
374 			/* Kabylake/Coffeelake */
375 			case 0x8e:
376 			case 0x9e:
377 				crystal = 24000000;	/* 24 MHz */
378 				break;
379 			case 0x5c:	/* Goldmont Atom */
380 				crystal = 19200000;	/* 19.2 MHz */
381 				break;
382 			default:
383 				break;
384 			}
385 		}
386 	} else {
387 		crystal = regs[2];
388 	}
389 
390 	if (crystal == 0)
391 		return;
392 
393 	kprintf("TSC crystal clock: %ju Hz, TSC/crystal ratio: %u/%u\n",
394 	    crystal, regs[1], regs[0]);
395 
396 	if (tsc_ignore_cpuid == 0) {
397 		tsc_frequency = (crystal * regs[1]) / regs[0];
398 		i8254_cputimer_disable = 1;
399 	}
400 }
401 
402 TIMECOUNTER_INIT(cpuid_tsc_frequency, detect_tsc_frequency);
403