xref: /openbsd/sys/arch/amd64/amd64/cpu.c (revision 76d0caae)
1 /*	$OpenBSD: cpu.c,v 1.154 2021/08/31 17:40:59 dv Exp $	*/
2 /* $NetBSD: cpu.c,v 1.1 2003/04/26 18:39:26 fvdl Exp $ */
3 
4 /*-
5  * Copyright (c) 2000 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by RedBack Networks Inc.
10  *
11  * Author: Bill Sommerfeld
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  * POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 /*
36  * Copyright (c) 1999 Stefan Grefen
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions
40  * are met:
41  * 1. Redistributions of source code must retain the above copyright
42  *    notice, this list of conditions and the following disclaimer.
43  * 2. Redistributions in binary form must reproduce the above copyright
44  *    notice, this list of conditions and the following disclaimer in the
45  *    documentation and/or other materials provided with the distribution.
46  * 3. All advertising materials mentioning features or use of this software
47  *    must display the following acknowledgement:
48  *      This product includes software developed by the NetBSD
49  *      Foundation, Inc. and its contributors.
50  * 4. Neither the name of The NetBSD Foundation nor the names of its
51  *    contributors may be used to endorse or promote products derived
52  *    from this software without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY
55  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR AND CONTRIBUTORS BE LIABLE
58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64  * SUCH DAMAGE.
65  */
66 
67 #include "lapic.h"
68 #include "ioapic.h"
69 #include "vmm.h"
70 #include "pctr.h"
71 #include "pvbus.h"
72 
73 #include <sys/param.h>
74 #include <sys/proc.h>
75 #include <sys/timeout.h>
76 #include <sys/systm.h>
77 #include <sys/device.h>
78 #include <sys/malloc.h>
79 #include <sys/memrange.h>
80 #include <sys/atomic.h>
81 #include <sys/user.h>
82 
83 #include <uvm/uvm_extern.h>
84 
85 #include <machine/codepatch.h>
86 #include <machine/cpu_full.h>
87 #include <machine/cpufunc.h>
88 #include <machine/cpuvar.h>
89 #include <machine/pmap.h>
90 #include <machine/vmparam.h>
91 #include <machine/mpbiosvar.h>
92 #include <machine/pcb.h>
93 #include <machine/specialreg.h>
94 #include <machine/segments.h>
95 #include <machine/gdt.h>
96 #include <machine/pio.h>
97 #include <machine/vmmvar.h>
98 
99 #if NLAPIC > 0
100 #include <machine/i82489reg.h>
101 #include <machine/i82489var.h>
102 #endif
103 
104 #if NIOAPIC > 0
105 #include <machine/i82093var.h>
106 #endif
107 
108 #if NPCTR > 0
109 #include <machine/pctr.h>
110 #endif
111 
112 #if NPVBUS > 0
113 #include <dev/pv/pvvar.h>
114 #endif
115 
116 #include <dev/ic/mc146818reg.h>
117 #include <amd64/isa/nvram.h>
118 #include <dev/isa/isareg.h>
119 
120 #ifdef HIBERNATE
121 #include <sys/hibernate.h>
122 #include <machine/hibernate.h>
123 #endif /* HIBERNATE */
124 
125 /* #define CPU_DEBUG */
126 
127 #ifdef CPU_DEBUG
128 #define DPRINTF(x...)	do { printf(x); } while(0)
129 #else
130 #define DPRINTF(x...)
131 #endif /* CPU_DEBUG */
132 
133 int     cpu_match(struct device *, void *, void *);
134 void    cpu_attach(struct device *, struct device *, void *);
135 int     cpu_activate(struct device *, int);
136 void	patinit(struct cpu_info *ci);
137 #if NVMM > 0
138 void	cpu_init_vmm(struct cpu_info *ci);
139 #endif /* NVMM > 0 */
140 
141 struct cpu_softc {
142 	struct device sc_dev;		/* device tree glue */
143 	struct cpu_info *sc_info;	/* pointer to CPU info */
144 };
145 
146 void	replacesmap(void);
147 void	replacemeltdown(void);
148 void	replacemds(void);
149 
150 extern long _stac;
151 extern long _clac;
152 
153 void
154 replacesmap(void)
155 {
156 	static int replacedone = 0;
157 	int s;
158 
159 	if (replacedone)
160 		return;
161 	replacedone = 1;
162 
163 	s = splhigh();
164 
165 	codepatch_replace(CPTAG_STAC, &_stac, 3);
166 	codepatch_replace(CPTAG_CLAC, &_clac, 3);
167 
168 	splx(s);
169 }
170 
171 void
172 replacemeltdown(void)
173 {
174 	static int replacedone = 0;
175 	struct cpu_info *ci = &cpu_info_primary;
176 	int swapgs_vuln = 0, s;
177 
178 	if (replacedone)
179 		return;
180 	replacedone = 1;
181 
182 	if (strcmp(cpu_vendor, "GenuineIntel") == 0) {
183 		int family = ci->ci_family;
184 		int model = ci->ci_model;
185 
186 		swapgs_vuln = 1;
187 		if (family == 0x6 &&
188 		    (model == 0x37 || model == 0x4a || model == 0x4c ||
189 		     model == 0x4d || model == 0x5a || model == 0x5d ||
190 		     model == 0x6e || model == 0x65 || model == 0x75)) {
191 			/* Silvermont, Airmont */
192 			swapgs_vuln = 0;
193 		} else if (family == 0x6 && (model == 0x85 || model == 0x57)) {
194 			/* KnightsLanding */
195 			swapgs_vuln = 0;
196 		}
197 	}
198 
199 	s = splhigh();
200 	if (!cpu_meltdown)
201 		codepatch_nop(CPTAG_MELTDOWN_NOP);
202 	else {
203 		extern long alltraps_kern_meltdown;
204 
205 		/* eliminate conditional branch in alltraps */
206 		codepatch_jmp(CPTAG_MELTDOWN_ALLTRAPS, &alltraps_kern_meltdown);
207 
208 		/* enable reuse of PCID for U-K page tables */
209 		if (pmap_use_pcid) {
210 			extern long _pcid_set_reuse;
211 			DPRINTF("%s: codepatching PCID use", __func__);
212 			codepatch_replace(CPTAG_PCID_SET_REUSE,
213 			    &_pcid_set_reuse, PCID_SET_REUSE_SIZE);
214 		}
215 	}
216 
217 	/*
218 	 * CVE-2019-1125: if the CPU has SMAP and it's not vulnerable to
219 	 * Meltdown, then it's protected both from speculatively mis-skipping
220 	 * the swapgs during interrupts of userspace and from speculatively
221 	 * mis-taking a swapgs during interrupts while already in the kernel
222 	 * as the speculative path will fault from SMAP.  Warning: enabling
223 	 * WRGSBASE would break this 'protection'.
224 	 *
225 	 * Otherwise, if the CPU's swapgs can't be speculated over and it
226 	 * _is_ vulnerable to Meltdown then the %cr3 change will serialize
227 	 * user->kern transitions, but we still need to mitigate the
228 	 * already-in-kernel cases.
229 	 */
230 	if (!cpu_meltdown && (ci->ci_feature_sefflags_ebx & SEFF0EBX_SMAP)) {
231 		codepatch_nop(CPTAG_FENCE_SWAPGS_MIS_TAKEN);
232 		codepatch_nop(CPTAG_FENCE_NO_SAFE_SMAP);
233 	} else if (!swapgs_vuln && cpu_meltdown) {
234 		codepatch_nop(CPTAG_FENCE_SWAPGS_MIS_TAKEN);
235 	}
236 	splx(s);
237 }
238 
239 void
240 replacemds(void)
241 {
242 	static int replacedone = 0;
243 	extern long mds_handler_bdw, mds_handler_ivb, mds_handler_skl;
244 	extern long mds_handler_skl_sse, mds_handler_skl_avx;
245 	extern long mds_handler_silvermont, mds_handler_knights;
246 	struct cpu_info *ci = &cpu_info_primary;
247 	CPU_INFO_ITERATOR cii;
248 	void *handler = NULL, *vmm_handler = NULL;
249 	const char *type;
250 	int has_verw, s;
251 
252 	/* ci_mds_tmp must be 32byte aligned for AVX instructions */
253 	CTASSERT((offsetof(struct cpu_info, ci_mds_tmp) -
254 		  offsetof(struct cpu_info, ci_PAGEALIGN)) % 32 == 0);
255 
256 	if (replacedone)
257 		return;
258 	replacedone = 1;
259 
260 	if (strcmp(cpu_vendor, "GenuineIntel") != 0 ||
261 	    ((ci->ci_feature_sefflags_edx & SEFF0EDX_ARCH_CAP) &&
262 	     (rdmsr(MSR_ARCH_CAPABILITIES) & ARCH_CAPABILITIES_MDS_NO))) {
263 		/* Unaffected, nop out the handling code */
264 		has_verw = 0;
265 	} else if (ci->ci_feature_sefflags_edx & SEFF0EDX_MD_CLEAR) {
266 		/* new firmware, use VERW */
267 		has_verw = 1;
268 	} else {
269 		int family = ci->ci_family;
270 		int model = ci->ci_model;
271 		int stepping = CPUID2STEPPING(ci->ci_signature);
272 
273 		has_verw = 0;
274 		if (family == 0x6 &&
275 		    (model == 0x2e || model == 0x1e || model == 0x1f ||
276 		     model == 0x1a || model == 0x2f || model == 0x25 ||
277 		     model == 0x2c || model == 0x2d || model == 0x2a ||
278 		     model == 0x3e || model == 0x3a)) {
279 			/* Nehalem, SandyBridge, IvyBridge */
280 			handler = vmm_handler = &mds_handler_ivb;
281 			type = "IvyBridge";
282 			CPU_INFO_FOREACH(cii, ci) {
283 				ci->ci_mds_buf = malloc(672, M_DEVBUF,
284 				    M_WAITOK);
285 				memset(ci->ci_mds_buf, 0, 16);
286 			}
287 		} else if (family == 0x6 &&
288 		    (model == 0x3f || model == 0x3c || model == 0x45 ||
289 		     model == 0x46 || model == 0x56 || model == 0x4f ||
290 		     model == 0x47 || model == 0x3d)) {
291 			/* Haswell and Broadwell */
292 			handler = vmm_handler = &mds_handler_bdw;
293 			type = "Broadwell";
294 			CPU_INFO_FOREACH(cii, ci) {
295 				ci->ci_mds_buf = malloc(1536, M_DEVBUF,
296 				    M_WAITOK);
297 			}
298 		} else if (family == 0x6 &&
299 		    ((model == 0x55 && stepping <= 5) || model == 0x4e ||
300 		    model == 0x5e || (model == 0x8e && stepping <= 0xb) ||
301 		    (model == 0x9e && stepping <= 0xc))) {
302 			/*
303 			 * Skylake, KabyLake, CoffeeLake, WhiskeyLake,
304 			 * CascadeLake
305 			 */
306 			/* XXX mds_handler_skl_avx512 */
307 			if (xgetbv(0) & XCR0_AVX) {
308 				handler = &mds_handler_skl_avx;
309 				type = "Skylake AVX";
310 			} else {
311 				handler = &mds_handler_skl_sse;
312 				type = "Skylake SSE";
313 			}
314 			vmm_handler = &mds_handler_skl;
315 			CPU_INFO_FOREACH(cii, ci) {
316 				vaddr_t b64;
317 				b64 = (vaddr_t)malloc(6 * 1024 + 64 + 63,
318 				    M_DEVBUF, M_WAITOK);
319 				ci->ci_mds_buf = (void *)((b64 + 63) & ~63);
320 				memset(ci->ci_mds_buf, 0, 64);
321 			}
322 		} else if (family == 0x6 &&
323 		    (model == 0x37 || model == 0x4a || model == 0x4c ||
324 		     model == 0x4d || model == 0x5a || model == 0x5d ||
325 		     model == 0x6e || model == 0x65 || model == 0x75)) {
326 			/* Silvermont, Airmont */
327 			handler = vmm_handler = &mds_handler_silvermont;
328 			type = "Silvermont";
329 			CPU_INFO_FOREACH(cii, ci) {
330 				ci->ci_mds_buf = malloc(256, M_DEVBUF,
331 				    M_WAITOK);
332 				memset(ci->ci_mds_buf, 0, 16);
333 			}
334 		} else if (family == 0x6 && (model == 0x85 || model == 0x57)) {
335 			handler = vmm_handler = &mds_handler_knights;
336 			type = "KnightsLanding";
337 			CPU_INFO_FOREACH(cii, ci) {
338 				vaddr_t b64;
339 				b64 = (vaddr_t)malloc(1152 + 63, M_DEVBUF,
340 				    M_WAITOK);
341 				ci->ci_mds_buf = (void *)((b64 + 63) & ~63);
342 			}
343 		}
344 	}
345 
346 	if (handler != NULL) {
347 		printf("cpu0: using %s MDS workaround%s\n", type, "");
348 		s = splhigh();
349 		codepatch_call(CPTAG_MDS, handler);
350 		codepatch_call(CPTAG_MDS_VMM, vmm_handler);
351 		splx(s);
352 	} else if (has_verw) {
353 		/* The new firmware enhances L1D_FLUSH MSR to flush MDS too */
354 		if (cpu_info_primary.ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr == 1) {
355 			s = splhigh();
356 			codepatch_nop(CPTAG_MDS_VMM);
357 			splx(s);
358 			type = " (except on vmm entry)";
359 		} else {
360 			type = "";
361 		}
362 		printf("cpu0: using %s MDS workaround%s\n", "VERW", type);
363 	} else {
364 		s = splhigh();
365 		codepatch_nop(CPTAG_MDS);
366 		codepatch_nop(CPTAG_MDS_VMM);
367 		splx(s);
368 	}
369 }
370 
371 #ifdef MULTIPROCESSOR
372 int mp_cpu_start(struct cpu_info *);
373 void mp_cpu_start_cleanup(struct cpu_info *);
374 struct cpu_functions mp_cpu_funcs = { mp_cpu_start, NULL,
375 				      mp_cpu_start_cleanup };
376 #endif /* MULTIPROCESSOR */
377 
378 struct cfattach cpu_ca = {
379 	sizeof(struct cpu_softc), cpu_match, cpu_attach, NULL, cpu_activate
380 };
381 
382 struct cfdriver cpu_cd = {
383 	NULL, "cpu", DV_DULL
384 };
385 
386 /*
387  * Statically-allocated CPU info for the primary CPU (or the only
388  * CPU, on uniprocessors).  The CPU info list is initialized to
389  * point at it.
390  */
391 struct cpu_info_full cpu_info_full_primary = { .cif_cpu = { .ci_self = &cpu_info_primary } };
392 
393 struct cpu_info *cpu_info_list = &cpu_info_primary;
394 
395 #ifdef MULTIPROCESSOR
396 /*
397  * Array of CPU info structures.  Must be statically-allocated because
398  * curproc, etc. are used early.
399  */
400 struct cpu_info *cpu_info[MAXCPUS] = { &cpu_info_primary };
401 
402 void    	cpu_hatch(void *);
403 void    	cpu_boot_secondary(struct cpu_info *ci);
404 void    	cpu_start_secondary(struct cpu_info *ci);
405 #endif
406 
407 int
408 cpu_match(struct device *parent, void *match, void *aux)
409 {
410 	struct cfdata *cf = match;
411 	struct cpu_attach_args *caa = aux;
412 
413 	if (strcmp(caa->caa_name, cf->cf_driver->cd_name) != 0)
414 		return 0;
415 
416 	if (cf->cf_unit >= MAXCPUS)
417 		return 0;
418 
419 	return 1;
420 }
421 
422 void	cpu_idle_mwait_cycle(void);
423 void	cpu_init_mwait(struct cpu_softc *);
424 
425 u_int	cpu_mwait_size, cpu_mwait_states;
426 
427 void
428 cpu_idle_mwait_cycle(void)
429 {
430 	struct cpu_info *ci = curcpu();
431 
432 	if ((read_rflags() & PSL_I) == 0)
433 		panic("idle with interrupts blocked!");
434 
435 	/* something already queued? */
436 	if (!cpu_is_idle(ci))
437 		return;
438 
439 	/*
440 	 * About to idle; setting the MWAIT_IN_IDLE bit tells
441 	 * cpu_unidle() that it can't be a no-op and tells cpu_kick()
442 	 * that it doesn't need to use an IPI.  We also set the
443 	 * MWAIT_KEEP_IDLING bit: those routines clear it to stop
444 	 * the mwait.  Once they're set, we do a final check of the
445 	 * queue, in case another cpu called setrunqueue() and added
446 	 * something to the queue and called cpu_unidle() between
447 	 * the check in sched_idle() and here.
448 	 */
449 	atomic_setbits_int(&ci->ci_mwait, MWAIT_IDLING | MWAIT_ONLY);
450 	if (cpu_is_idle(ci)) {
451 		monitor(&ci->ci_mwait, 0, 0);
452 		if ((ci->ci_mwait & MWAIT_IDLING) == MWAIT_IDLING)
453 			mwait(0, 0);
454 	}
455 
456 	/* done idling; let cpu_kick() know that an IPI is required */
457 	atomic_clearbits_int(&ci->ci_mwait, MWAIT_IDLING);
458 }
459 
460 void
461 cpu_init_mwait(struct cpu_softc *sc)
462 {
463 	unsigned int smallest, largest, extensions, c_substates;
464 
465 	if ((cpu_ecxfeature & CPUIDECX_MWAIT) == 0 || cpuid_level < 0x5)
466 		return;
467 
468 	/* get the monitor granularity */
469 	CPUID(0x5, smallest, largest, extensions, cpu_mwait_states);
470 	smallest &= 0xffff;
471 	largest  &= 0xffff;
472 
473 	printf("%s: mwait min=%u, max=%u", sc->sc_dev.dv_xname,
474 	    smallest, largest);
475 	if (extensions & 0x1) {
476 		if (cpu_mwait_states > 0) {
477 			c_substates = cpu_mwait_states;
478 			printf(", C-substates=%u", 0xf & c_substates);
479 			while ((c_substates >>= 4) > 0)
480 				printf(".%u", 0xf & c_substates);
481 		}
482 		if (extensions & 0x2)
483 			printf(", IBE");
484 	} else {
485 		/* substates not supported, forge the default: just C1 */
486 		cpu_mwait_states = 1 << 4;
487 	}
488 
489 	/* paranoia: check the values */
490 	if (smallest < sizeof(int) || largest < smallest ||
491 	    (largest & (sizeof(int)-1)))
492 		printf(" (bogus)");
493 	else
494 		cpu_mwait_size = largest;
495 	printf("\n");
496 
497 	/* enable use of mwait; may be overridden by acpicpu later */
498 	if (cpu_mwait_size > 0)
499 		cpu_idle_cycle_fcn = &cpu_idle_mwait_cycle;
500 }
501 
502 void
503 cpu_attach(struct device *parent, struct device *self, void *aux)
504 {
505 	struct cpu_softc *sc = (void *) self;
506 	struct cpu_attach_args *caa = aux;
507 	struct cpu_info *ci;
508 #if defined(MULTIPROCESSOR)
509 	int cpunum = sc->sc_dev.dv_unit;
510 	vaddr_t kstack;
511 	struct pcb *pcb;
512 #endif
513 
514 	/*
515 	 * If we're an Application Processor, allocate a cpu_info
516 	 * structure, otherwise use the primary's.
517 	 */
518 	if (caa->cpu_role == CPU_ROLE_AP) {
519 		struct cpu_info_full *cif;
520 
521 		cif = km_alloc(sizeof *cif, &kv_any, &kp_zero, &kd_waitok);
522 		ci = &cif->cif_cpu;
523 #if defined(MULTIPROCESSOR)
524 		ci->ci_tss = &cif->cif_tss;
525 		ci->ci_gdt = &cif->cif_gdt;
526 		memcpy(ci->ci_gdt, cpu_info_primary.ci_gdt, GDT_SIZE);
527 		cpu_enter_pages(cif);
528 		if (cpu_info[cpunum] != NULL)
529 			panic("cpu at apic id %d already attached?", cpunum);
530 		cpu_info[cpunum] = ci;
531 #endif
532 #ifdef TRAPLOG
533 		ci->ci_tlog_base = malloc(sizeof(struct tlog),
534 		    M_DEVBUF, M_WAITOK);
535 #endif
536 	} else {
537 		ci = &cpu_info_primary;
538 #if defined(MULTIPROCESSOR)
539 		if (caa->cpu_apicid != lapic_cpu_number()) {
540 			panic("%s: running cpu is at apic %d"
541 			    " instead of at expected %d",
542 			    sc->sc_dev.dv_xname, lapic_cpu_number(), caa->cpu_apicid);
543 		}
544 #endif
545 	}
546 
547 	ci->ci_self = ci;
548 	sc->sc_info = ci;
549 
550 	ci->ci_dev = self;
551 	ci->ci_apicid = caa->cpu_apicid;
552 	ci->ci_acpi_proc_id = caa->cpu_acpi_proc_id;
553 #ifdef MULTIPROCESSOR
554 	ci->ci_cpuid = cpunum;
555 #else
556 	ci->ci_cpuid = 0;	/* False for APs, but they're not used anyway */
557 #endif
558 	ci->ci_func = caa->cpu_func;
559 	ci->ci_handled_intr_level = IPL_NONE;
560 
561 #if defined(MULTIPROCESSOR)
562 	/*
563 	 * Allocate UPAGES contiguous pages for the idle PCB and stack.
564 	 */
565 	kstack = (vaddr_t)km_alloc(USPACE, &kv_any, &kp_dirty, &kd_nowait);
566 	if (kstack == 0) {
567 		if (caa->cpu_role != CPU_ROLE_AP) {
568 			panic("cpu_attach: unable to allocate idle stack for"
569 			    " primary");
570 		}
571 		printf("%s: unable to allocate idle stack\n",
572 		    sc->sc_dev.dv_xname);
573 		return;
574 	}
575 	pcb = ci->ci_idle_pcb = (struct pcb *) kstack;
576 	memset(pcb, 0, USPACE);
577 
578 	pcb->pcb_kstack = kstack + USPACE - 16;
579 	pcb->pcb_rbp = pcb->pcb_rsp = kstack + USPACE - 16;
580 	pcb->pcb_pmap = pmap_kernel();
581 	pcb->pcb_cr3 = pcb->pcb_pmap->pm_pdirpa;
582 #endif
583 
584 	/* further PCB init done later. */
585 
586 	printf(": ");
587 
588 	switch (caa->cpu_role) {
589 	case CPU_ROLE_SP:
590 		printf("(uniprocessor)\n");
591 		ci->ci_flags |= CPUF_PRESENT | CPUF_SP | CPUF_PRIMARY;
592 		cpu_intr_init(ci);
593 #ifndef SMALL_KERNEL
594 		cpu_ucode_apply(ci);
595 #endif
596 		cpu_tsx_disable(ci);
597 		identifycpu(ci);
598 #ifdef MTRR
599 		mem_range_attach();
600 #endif /* MTRR */
601 		/* XXX SP fpuinit(ci) is done earlier */
602 		cpu_init(ci);
603 		cpu_init_mwait(sc);
604 		break;
605 
606 	case CPU_ROLE_BP:
607 		printf("apid %d (boot processor)\n", caa->cpu_apicid);
608 		ci->ci_flags |= CPUF_PRESENT | CPUF_BSP | CPUF_PRIMARY;
609 		cpu_intr_init(ci);
610 		identifycpu(ci);
611 #ifdef MTRR
612 		mem_range_attach();
613 #endif /* MTRR */
614 
615 #if NLAPIC > 0
616 		/*
617 		 * Enable local apic
618 		 */
619 		lapic_enable();
620 		lapic_calibrate_timer(ci);
621 #endif
622 		/* XXX BP fpuinit(ci) is done earlier */
623 		cpu_init(ci);
624 
625 #if NIOAPIC > 0
626 		ioapic_bsp_id = caa->cpu_apicid;
627 #endif
628 		cpu_init_mwait(sc);
629 		break;
630 
631 	case CPU_ROLE_AP:
632 		/*
633 		 * report on an AP
634 		 */
635 		printf("apid %d (application processor)\n", caa->cpu_apicid);
636 
637 #if defined(MULTIPROCESSOR)
638 		cpu_intr_init(ci);
639 		cpu_start_secondary(ci);
640 		sched_init_cpu(ci);
641 		ncpus++;
642 		if (ci->ci_flags & CPUF_PRESENT) {
643 			ci->ci_next = cpu_info_list->ci_next;
644 			cpu_info_list->ci_next = ci;
645 		}
646 #else
647 		printf("%s: not started\n", sc->sc_dev.dv_xname);
648 #endif
649 		break;
650 
651 	default:
652 		panic("unknown processor type??");
653 	}
654 
655 #if defined(MULTIPROCESSOR)
656 	if (mp_verbose) {
657 		printf("%s: kstack at 0x%lx for %d bytes\n",
658 		    sc->sc_dev.dv_xname, kstack, USPACE);
659 		printf("%s: idle pcb at %p, idle sp at 0x%llx\n",
660 		    sc->sc_dev.dv_xname, pcb, pcb->pcb_rsp);
661 	}
662 #endif
663 #if NVMM > 0
664 	cpu_init_vmm(ci);
665 #endif /* NVMM > 0 */
666 }
667 
668 static void
669 replacexsave(void)
670 {
671 	extern long _xrstor, _xsave, _xsaveopt;
672 	u_int32_t eax, ebx, ecx, edx;
673 	static int replacedone = 0;
674 	int s;
675 
676 	if (replacedone)
677 		return;
678 	replacedone = 1;
679 
680 	/* find out whether xsaveopt is supported */
681 	CPUID_LEAF(0xd, 1, eax, ebx, ecx, edx);
682 	s = splhigh();
683 	codepatch_replace(CPTAG_XRSTOR, &_xrstor, 4);
684 	codepatch_replace(CPTAG_XSAVE,
685 	    (eax & XSAVE_XSAVEOPT) ? &_xsaveopt : &_xsave, 4);
686 	splx(s);
687 }
688 
689 
690 /*
691  * Initialize the processor appropriately.
692  */
693 
694 void
695 cpu_init(struct cpu_info *ci)
696 {
697 	struct savefpu *sfp;
698 	u_int cr4;
699 
700 	/* configure the CPU if needed */
701 	if (ci->cpu_setup != NULL)
702 		(*ci->cpu_setup)(ci);
703 
704 	cr4 = rcr4() | CR4_DEFAULT;
705 	if (ci->ci_feature_sefflags_ebx & SEFF0EBX_SMEP)
706 		cr4 |= CR4_SMEP;
707 	if (ci->ci_feature_sefflags_ebx & SEFF0EBX_SMAP)
708 		cr4 |= CR4_SMAP;
709 	if (ci->ci_feature_sefflags_ecx & SEFF0ECX_UMIP)
710 		cr4 |= CR4_UMIP;
711 	if ((cpu_ecxfeature & CPUIDECX_XSAVE) && cpuid_level >= 0xd)
712 		cr4 |= CR4_OSXSAVE;
713 	if (pmap_use_pcid)
714 		cr4 |= CR4_PCIDE;
715 	lcr4(cr4);
716 
717 	if ((cpu_ecxfeature & CPUIDECX_XSAVE) && cpuid_level >= 0xd) {
718 		u_int32_t eax, ebx, ecx, edx;
719 
720 		xsave_mask = XCR0_X87 | XCR0_SSE;
721 		CPUID_LEAF(0xd, 0, eax, ebx, ecx, edx);
722 		if (eax & XCR0_AVX)
723 			xsave_mask |= XCR0_AVX;
724 		xsetbv(0, xsave_mask);
725 		CPUID_LEAF(0xd, 0, eax, ebx, ecx, edx);
726 		if (CPU_IS_PRIMARY(ci)) {
727 			fpu_save_len = ebx;
728 			KASSERT(fpu_save_len <= sizeof(struct savefpu));
729 		} else {
730 			KASSERT(ebx == fpu_save_len);
731 		}
732 
733 		replacexsave();
734 	}
735 
736 	/* Give proc0 a clean FPU save area */
737 	sfp = &proc0.p_addr->u_pcb.pcb_savefpu;
738 	memset(sfp, 0, fpu_save_len);
739 	sfp->fp_fxsave.fx_fcw = __INITIAL_NPXCW__;
740 	sfp->fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__;
741 	fpureset();
742 	if (xsave_mask) {
743 		/* must not use xsaveopt here */
744 		xsave(sfp, xsave_mask);
745 	} else
746 		fxsave(sfp);
747 
748 #if NVMM > 0
749 	/* Re-enable VMM if needed */
750 	if (ci->ci_flags & CPUF_VMM)
751 		start_vmm_on_cpu(ci);
752 #endif /* NVMM > 0 */
753 
754 #ifdef MULTIPROCESSOR
755 	ci->ci_flags |= CPUF_RUNNING;
756 	/*
757 	 * Big hammer: flush all TLB entries, including ones from PTEs
758 	 * with the G bit set.  This should only be necessary if TLB
759 	 * shootdown falls far behind.
760 	 */
761 	cr4 = rcr4();
762 	lcr4(cr4 & ~CR4_PGE);
763 	lcr4(cr4);
764 
765 	/* Synchronize TSC */
766 	if (cold && !CPU_IS_PRIMARY(ci))
767 	      tsc_sync_ap(ci);
768 #endif
769 }
770 
771 #if NVMM > 0
772 /*
773  * cpu_init_vmm
774  *
775  * Initializes per-cpu VMM state
776  *
777  * Parameters:
778  *  ci: the cpu for which state is being initialized
779  */
780 void
781 cpu_init_vmm(struct cpu_info *ci)
782 {
783 	/*
784 	 * Allocate a per-cpu VMXON region for VMX CPUs
785 	 */
786 	if (ci->ci_vmm_flags & CI_VMM_VMX) {
787 		ci->ci_vmxon_region = (struct vmxon_region *)malloc(PAGE_SIZE,
788 		    M_DEVBUF, M_WAITOK | M_ZERO);
789 		if (!pmap_extract(pmap_kernel(), (vaddr_t)ci->ci_vmxon_region,
790 		    &ci->ci_vmxon_region_pa))
791 			panic("Can't locate VMXON region in phys mem");
792 		ci->ci_vmcs_pa = VMX_VMCS_PA_CLEAR;
793 		rw_init(&ci->ci_vmcs_lock, "vmcslock");
794 	}
795 }
796 #endif /* NVMM > 0 */
797 
798 #ifdef MULTIPROCESSOR
799 void
800 cpu_boot_secondary_processors(void)
801 {
802 	struct cpu_info *ci;
803 	u_long i;
804 
805 	for (i=0; i < MAXCPUS; i++) {
806 		ci = cpu_info[i];
807 		if (ci == NULL)
808 			continue;
809 		if (ci->ci_idle_pcb == NULL)
810 			continue;
811 		if ((ci->ci_flags & CPUF_PRESENT) == 0)
812 			continue;
813 		if (ci->ci_flags & (CPUF_BSP | CPUF_SP | CPUF_PRIMARY))
814 			continue;
815 		ci->ci_randseed = (arc4random() & 0x7fffffff) + 1;
816 		cpu_boot_secondary(ci);
817 	}
818 }
819 
820 void
821 cpu_start_secondary(struct cpu_info *ci)
822 {
823 	int i;
824 	u_long s;
825 
826 	ci->ci_flags |= CPUF_AP;
827 
828 	pmap_kenter_pa(MP_TRAMPOLINE, MP_TRAMPOLINE, PROT_READ | PROT_EXEC);
829 	pmap_kenter_pa(MP_TRAMP_DATA, MP_TRAMP_DATA, PROT_READ | PROT_WRITE);
830 
831 	CPU_STARTUP(ci);
832 
833 	/*
834 	 * wait for it to become ready
835 	 */
836 	for (i = 100000; (!(ci->ci_flags & CPUF_PRESENT)) && i>0;i--) {
837 		delay(10);
838 	}
839 	if (! (ci->ci_flags & CPUF_PRESENT)) {
840 		printf("%s: failed to become ready\n", ci->ci_dev->dv_xname);
841 #if defined(MPDEBUG) && defined(DDB)
842 		printf("dropping into debugger; continue from here to resume boot\n");
843 		db_enter();
844 #endif
845 	} else {
846 		/*
847 		 * Synchronize time stamp counters. Invalidate cache and
848 		 * synchronize twice (in tsc_sync_bp) to minimize possible
849 		 * cache effects. Disable interrupts to try and rule out any
850 		 * external interference.
851 		 */
852 		s = intr_disable();
853 		wbinvd();
854 		tsc_sync_bp(ci);
855 		intr_restore(s);
856 #ifdef TSC_DEBUG
857 		printf("TSC skew=%lld\n", (long long)ci->ci_tsc_skew);
858 #endif
859 	}
860 
861 	if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) {
862 		atomic_setbits_int(&ci->ci_flags, CPUF_IDENTIFY);
863 
864 		/* wait for it to identify */
865 		for (i = 2000000; (ci->ci_flags & CPUF_IDENTIFY) && i > 0; i--)
866 			delay(10);
867 
868 		if (ci->ci_flags & CPUF_IDENTIFY)
869 			printf("%s: failed to identify\n",
870 			    ci->ci_dev->dv_xname);
871 	}
872 
873 	CPU_START_CLEANUP(ci);
874 
875 	pmap_kremove(MP_TRAMPOLINE, PAGE_SIZE);
876 	pmap_kremove(MP_TRAMP_DATA, PAGE_SIZE);
877 }
878 
879 void
880 cpu_boot_secondary(struct cpu_info *ci)
881 {
882 	int i;
883 	int64_t drift;
884 	u_long s;
885 
886 	atomic_setbits_int(&ci->ci_flags, CPUF_GO);
887 
888 	for (i = 100000; (!(ci->ci_flags & CPUF_RUNNING)) && i>0;i--) {
889 		delay(10);
890 	}
891 	if (! (ci->ci_flags & CPUF_RUNNING)) {
892 		printf("cpu failed to start\n");
893 #if defined(MPDEBUG) && defined(DDB)
894 		printf("dropping into debugger; continue from here to resume boot\n");
895 		db_enter();
896 #endif
897 	} else if (cold) {
898 		/* Synchronize TSC again, check for drift. */
899 		drift = ci->ci_tsc_skew;
900 		s = intr_disable();
901 		wbinvd();
902 		tsc_sync_bp(ci);
903 		intr_restore(s);
904 		drift -= ci->ci_tsc_skew;
905 #ifdef TSC_DEBUG
906 		printf("TSC skew=%lld drift=%lld\n",
907 		    (long long)ci->ci_tsc_skew, (long long)drift);
908 #endif
909 		tsc_sync_drift(drift);
910 	}
911 }
912 
913 /*
914  * The CPU ends up here when it's ready to run
915  * This is called from code in mptramp.s; at this point, we are running
916  * in the idle pcb/idle stack of the new cpu.  When this function returns,
917  * this processor will enter the idle loop and start looking for work.
918  *
919  * XXX should share some of this with init386 in machdep.c
920  */
921 void
922 cpu_hatch(void *v)
923 {
924 	struct cpu_info *ci = (struct cpu_info *)v;
925 	int s;
926 
927 	cpu_init_msrs(ci);
928 
929 #ifdef DEBUG
930 	if (ci->ci_flags & CPUF_PRESENT)
931 		panic("%s: already running!?", ci->ci_dev->dv_xname);
932 #endif
933 
934 	/*
935 	 * Synchronize the TSC for the first time. Note that interrupts are
936 	 * off at this point.
937 	 */
938 	wbinvd();
939 	ci->ci_flags |= CPUF_PRESENT;
940 	ci->ci_tsc_skew = 0;	/* reset on resume */
941 	tsc_sync_ap(ci);
942 
943 	lapic_enable();
944 	lapic_startclock();
945 	cpu_ucode_apply(ci);
946 	cpu_tsx_disable(ci);
947 
948 	if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) {
949 		/*
950 		 * We need to wait until we can identify, otherwise dmesg
951 		 * output will be messy.
952 		 */
953 		while ((ci->ci_flags & CPUF_IDENTIFY) == 0)
954 			delay(10);
955 
956 		identifycpu(ci);
957 
958 		/* Signal we're done */
959 		atomic_clearbits_int(&ci->ci_flags, CPUF_IDENTIFY);
960 		/* Prevent identifycpu() from running again */
961 		atomic_setbits_int(&ci->ci_flags, CPUF_IDENTIFIED);
962 	}
963 
964 	while ((ci->ci_flags & CPUF_GO) == 0)
965 		delay(10);
966 #ifdef HIBERNATE
967 	if ((ci->ci_flags & CPUF_PARK) != 0) {
968 		atomic_clearbits_int(&ci->ci_flags, CPUF_PARK);
969 		hibernate_drop_to_real_mode();
970 	}
971 #endif /* HIBERNATE */
972 
973 #ifdef DEBUG
974 	if (ci->ci_flags & CPUF_RUNNING)
975 		panic("%s: already running!?", ci->ci_dev->dv_xname);
976 #endif
977 
978 	cpu_init_idt();
979 	lapic_set_lvt();
980 	gdt_init_cpu(ci);
981 	fpuinit(ci);
982 
983 	lldt(0);
984 
985 	cpu_init(ci);
986 #if NPVBUS > 0
987 	pvbus_init_cpu();
988 #endif
989 
990 	/* Re-initialise memory range handling on AP */
991 	if (mem_range_softc.mr_op != NULL)
992 		mem_range_softc.mr_op->initAP(&mem_range_softc);
993 
994 	s = splhigh();
995 	lcr8(0);
996 	intr_enable();
997 
998 	nanouptime(&ci->ci_schedstate.spc_runtime);
999 	splx(s);
1000 
1001 	SCHED_LOCK(s);
1002 	cpu_switchto(NULL, sched_chooseproc());
1003 }
1004 
1005 #if defined(DDB)
1006 
1007 #include <ddb/db_output.h>
1008 #include <machine/db_machdep.h>
1009 
1010 /*
1011  * Dump cpu information from ddb.
1012  */
1013 void
1014 cpu_debug_dump(void)
1015 {
1016 	struct cpu_info *ci;
1017 	CPU_INFO_ITERATOR cii;
1018 
1019 	db_printf("addr		dev	id	flags	ipis	curproc\n");
1020 	CPU_INFO_FOREACH(cii, ci) {
1021 		db_printf("%p	%s	%u	%x	%x	%10p\n",
1022 		    ci,
1023 		    ci->ci_dev == NULL ? "BOOT" : ci->ci_dev->dv_xname,
1024 		    ci->ci_cpuid,
1025 		    ci->ci_flags, ci->ci_ipis,
1026 		    ci->ci_curproc);
1027 	}
1028 }
1029 #endif
1030 
1031 int
1032 mp_cpu_start(struct cpu_info *ci)
1033 {
1034 	unsigned short dwordptr[2];
1035 
1036 	/*
1037 	 * "The BSP must initialize CMOS shutdown code to 0Ah ..."
1038 	 */
1039 
1040 	outb(IO_RTC, NVRAM_RESET);
1041 	outb(IO_RTC+1, NVRAM_RESET_JUMP);
1042 
1043 	/*
1044 	 * "and the warm reset vector (DWORD based at 40:67) to point
1045 	 * to the AP startup code ..."
1046 	 */
1047 
1048 	dwordptr[0] = 0;
1049 	dwordptr[1] = MP_TRAMPOLINE >> 4;
1050 
1051 	pmap_kenter_pa(0, 0, PROT_READ | PROT_WRITE);
1052 	memcpy((u_int8_t *) 0x467, dwordptr, 4);
1053 	pmap_kremove(0, PAGE_SIZE);
1054 
1055 #if NLAPIC > 0
1056 	/*
1057 	 * ... prior to executing the following sequence:"
1058 	 */
1059 
1060 	if (ci->ci_flags & CPUF_AP) {
1061 		x86_ipi_init(ci->ci_apicid);
1062 
1063 		delay(10000);
1064 
1065 		if (cpu_feature & CPUID_APIC) {
1066 			x86_ipi(MP_TRAMPOLINE/PAGE_SIZE, ci->ci_apicid,
1067 			    LAPIC_DLMODE_STARTUP);
1068 			delay(200);
1069 
1070 			x86_ipi(MP_TRAMPOLINE/PAGE_SIZE, ci->ci_apicid,
1071 			    LAPIC_DLMODE_STARTUP);
1072 			delay(200);
1073 		}
1074 	}
1075 #endif
1076 	return 0;
1077 }
1078 
1079 void
1080 mp_cpu_start_cleanup(struct cpu_info *ci)
1081 {
1082 	/*
1083 	 * Ensure the NVRAM reset byte contains something vaguely sane.
1084 	 */
1085 
1086 	outb(IO_RTC, NVRAM_RESET);
1087 	outb(IO_RTC+1, NVRAM_RESET_RST);
1088 }
1089 #endif	/* MULTIPROCESSOR */
1090 
1091 typedef void (vector)(void);
1092 extern vector Xsyscall_meltdown, Xsyscall, Xsyscall32;
1093 
1094 void
1095 cpu_init_msrs(struct cpu_info *ci)
1096 {
1097 	uint64_t msr;
1098 	int family;
1099 
1100 	wrmsr(MSR_STAR,
1101 	    ((uint64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) |
1102 	    ((uint64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48));
1103 	wrmsr(MSR_LSTAR, cpu_meltdown ? (uint64_t)Xsyscall_meltdown :
1104 	    (uint64_t)Xsyscall);
1105 	wrmsr(MSR_CSTAR, (uint64_t)Xsyscall32);
1106 	wrmsr(MSR_SFMASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D|PSL_AC);
1107 
1108 	wrmsr(MSR_FSBASE, 0);
1109 	wrmsr(MSR_GSBASE, (u_int64_t)ci);
1110 	wrmsr(MSR_KERNELGSBASE, 0);
1111 
1112 	family = ci->ci_family;
1113 	if (strcmp(cpu_vendor, "GenuineIntel") == 0 &&
1114 	    (family > 6 || (family == 6 && ci->ci_model >= 0xd)) &&
1115 	    rdmsr_safe(MSR_MISC_ENABLE, &msr) == 0 &&
1116 	    (msr & MISC_ENABLE_FAST_STRINGS) == 0) {
1117 		msr |= MISC_ENABLE_FAST_STRINGS;
1118 		wrmsr(MSR_MISC_ENABLE, msr);
1119 		DPRINTF("%s: enabled fast strings\n", ci->ci_dev->dv_xname);
1120 	}
1121 
1122 	patinit(ci);
1123 }
1124 
1125 void
1126 cpu_tsx_disable(struct cpu_info *ci)
1127 {
1128 	uint64_t msr;
1129 	uint32_t dummy, sefflags_edx;
1130 
1131 	/* this runs before identifycpu() populates ci_feature_sefflags_edx */
1132 	if (cpuid_level < 0x07)
1133 		return;
1134 	CPUID_LEAF(0x7, 0, dummy, dummy, dummy, sefflags_edx);
1135 
1136 	if (strcmp(cpu_vendor, "GenuineIntel") == 0 &&
1137 	    (sefflags_edx & SEFF0EDX_ARCH_CAP)) {
1138 		msr = rdmsr(MSR_ARCH_CAPABILITIES);
1139 		if (msr & ARCH_CAPABILITIES_TSX_CTRL) {
1140 			msr = rdmsr(MSR_TSX_CTRL);
1141 			msr |= TSX_CTRL_RTM_DISABLE | TSX_CTRL_TSX_CPUID_CLEAR;
1142 			wrmsr(MSR_TSX_CTRL, msr);
1143 		}
1144 	}
1145 }
1146 
1147 void
1148 patinit(struct cpu_info *ci)
1149 {
1150 	extern int	pmap_pg_wc;
1151 	u_int64_t	reg;
1152 
1153 	if ((cpu_feature & CPUID_PAT) == 0)
1154 		return;
1155 	/*
1156 	 * Set up PAT bits.
1157 	 * The default pat table is the following:
1158 	 * WB, WT, UC-, UC, WB, WT, UC-, UC
1159 	 * We change it to:
1160 	 * WB, WC, UC-, UC, WB, WC, UC-, UC
1161 	 * i.e change the WT bit to be WC.
1162 	 */
1163 	reg = PATENTRY(0, PAT_WB) | PATENTRY(1, PAT_WC) |
1164 	    PATENTRY(2, PAT_UCMINUS) | PATENTRY(3, PAT_UC) |
1165 	    PATENTRY(4, PAT_WB) | PATENTRY(5, PAT_WC) |
1166 	    PATENTRY(6, PAT_UCMINUS) | PATENTRY(7, PAT_UC);
1167 
1168 	wrmsr(MSR_CR_PAT, reg);
1169 	pmap_pg_wc = PG_WC;
1170 }
1171 
1172 struct timeout rdrand_tmo;
1173 void rdrand(void *);
1174 
1175 void
1176 rdrand(void *v)
1177 {
1178 	struct timeout *tmo = v;
1179 	extern int	has_rdrand, has_rdseed;
1180 	union {
1181 		uint64_t u64;
1182 		uint32_t u32[2];
1183 	} r, t;
1184 	uint64_t tsc;
1185 	uint8_t valid = 0;
1186 
1187 	tsc = rdtsc();
1188 	if (has_rdseed)
1189 		__asm volatile(
1190 		    "rdseed	%0\n\t"
1191 		    "setc	%1\n"
1192 		    : "=r" (r.u64), "=qm" (valid) );
1193 	if (has_rdrand && (has_rdseed == 0 || valid == 0))
1194 		__asm volatile(
1195 		    "rdrand	%0\n\t"
1196 		    "setc	%1\n"
1197 		    : "=r" (r.u64), "=qm" (valid) );
1198 
1199 	t.u64 = tsc;
1200 	t.u64 ^= r.u64;
1201 	t.u64 ^= valid;			/* potential rdrand empty */
1202 	if (has_rdrand)
1203 		t.u64 += rdtsc();	/* potential vmexit latency */
1204 
1205 	enqueue_randomness(t.u32[0]);
1206 	enqueue_randomness(t.u32[1]);
1207 
1208 	if (tmo)
1209 		timeout_add_msec(tmo, 10);
1210 }
1211 
1212 int
1213 cpu_activate(struct device *self, int act)
1214 {
1215 	struct cpu_softc *sc = (struct cpu_softc *)self;
1216 
1217 	switch (act) {
1218 	case DVACT_RESUME:
1219 		if (sc->sc_info->ci_cpuid == 0)
1220 			rdrand(NULL);
1221 #if NPCTR > 0
1222 		pctr_resume(sc->sc_info);
1223 #endif
1224 		break;
1225 	}
1226 
1227 	return (0);
1228 }
1229 
1230 /*
1231  * cpu_enter_pages
1232  *
1233  * Requests mapping of various special pages required in the Intel Meltdown
1234  * case (to be entered into the U-K page table):
1235  *
1236  *  1 tss+gdt page for each CPU
1237  *  1 trampoline stack page for each CPU
1238  *
1239  * The cpu_info_full struct for each CPU straddles these pages. The offset into
1240  * 'cif' is calculated below, for each page. For more information, consult
1241  * the definition of struct cpu_info_full in cpu_full.h
1242  *
1243  * On CPUs unaffected by Meltdown, this function still configures 'cif' but
1244  * the calls to pmap_enter_special become no-ops.
1245  *
1246  * Parameters:
1247  *  cif : the cpu_info_full structure describing a CPU whose pages are to be
1248  *    entered into the special meltdown U-K page table.
1249  */
1250 void
1251 cpu_enter_pages(struct cpu_info_full *cif)
1252 {
1253 	vaddr_t va;
1254 	paddr_t pa;
1255 
1256 	/* The TSS+GDT need to be readable */
1257 	va = (vaddr_t)cif;
1258 	pmap_extract(pmap_kernel(), va, &pa);
1259 	pmap_enter_special(va, pa, PROT_READ);
1260 	DPRINTF("%s: entered tss+gdt page at va 0x%llx pa 0x%llx\n", __func__,
1261 	   (uint64_t)va, (uint64_t)pa);
1262 
1263 	/* The trampoline stack page needs to be read/write */
1264 	va = (vaddr_t)&cif->cif_tramp_stack;
1265 	pmap_extract(pmap_kernel(), va, &pa);
1266 	pmap_enter_special(va, pa, PROT_READ | PROT_WRITE);
1267 	DPRINTF("%s: entered t.stack page at va 0x%llx pa 0x%llx\n", __func__,
1268 	   (uint64_t)va, (uint64_t)pa);
1269 
1270 	cif->cif_tss.tss_rsp0 = va + sizeof(cif->cif_tramp_stack) - 16;
1271 	DPRINTF("%s: cif_tss.tss_rsp0 = 0x%llx\n" ,__func__,
1272 	    (uint64_t)cif->cif_tss.tss_rsp0);
1273 	cif->cif_cpu.ci_intr_rsp = cif->cif_tss.tss_rsp0 -
1274 	    sizeof(struct iretq_frame);
1275 
1276 #define	SETUP_IST_SPECIAL_STACK(ist, cif, member) do {			\
1277 	(cif)->cif_tss.tss_ist[(ist)] = (vaddr_t)&(cif)->member +	\
1278 	    sizeof((cif)->member) - 16;					\
1279 	(cif)->member[nitems((cif)->member) - 2] = (int64_t)&(cif)->cif_cpu; \
1280 } while (0)
1281 
1282 	SETUP_IST_SPECIAL_STACK(0, cif, cif_dblflt_stack);
1283 	SETUP_IST_SPECIAL_STACK(1, cif, cif_nmi_stack);
1284 
1285 	/* an empty iomap, by setting its offset to the TSS limit */
1286 	cif->cif_tss.tss_iobase = sizeof(cif->cif_tss);
1287 }
1288 
1289 #ifdef MULTIPROCESSOR
1290 int
1291 wbinvd_on_all_cpus(void)
1292 {
1293 	x86_broadcast_ipi(X86_IPI_WBINVD);
1294 	wbinvd();
1295 	return 0;
1296 }
1297 #endif
1298