1 /*-
2  * Copyright (c) 1982, 1986 The Regents of the University of California.
3  * Copyright (c) 1989, 1990 William Jolitz
4  * Copyright (c) 1994 John Dyson
5  * Copyright (c) 2008-2018 The DragonFly Project.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * the Systems Programming Group of the University of Utah Computer
10  * Science Department, and William Jolitz.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *	This product includes software developed by the University of
23  *	California, Berkeley and its contributors.
24  * 4. Neither the name of the University nor the names of its contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38  * SUCH DAMAGE.
39  *
40  *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
41  *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
42  * $FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.132.2.9 2003/01/25 19:02:23 dillon Exp $
43  */
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/malloc.h>
48 #include <sys/proc.h>
49 #include <sys/buf.h>
50 #include <sys/interrupt.h>
51 #include <sys/vnode.h>
52 #include <sys/vmmeter.h>
53 #include <sys/kernel.h>
54 #include <sys/sysctl.h>
55 #include <sys/unistd.h>
56 #include <sys/lwp.h>
57 
58 #include <machine/clock.h>
59 #include <machine/cpu.h>
60 #include <machine/md_var.h>
61 #include <machine/smp.h>
62 #include <machine/pcb.h>
63 #include <machine/pcb_ext.h>
64 #include <machine/segments.h>
65 #include <machine/globaldata.h>	/* npxthread */
66 #include <machine/specialreg.h>
67 #include <machine/vmm.h>
68 
69 #include <vm/vm.h>
70 #include <vm/vm_param.h>
71 #include <sys/lock.h>
72 #include <vm/vm_kern.h>
73 #include <vm/vm_page.h>
74 #include <vm/vm_map.h>
75 #include <vm/vm_extern.h>
76 
77 #include <sys/thread2.h>
78 #include <sys/mplock2.h>
79 
80 #include <bus/isa/isa.h>
81 
82 static void	cpu_reset_real (void);
83 
84 int spectre_mitigation = -1;
85 
86 static int spectre_ibrs_mode = 0;
87 SYSCTL_INT(_machdep, OID_AUTO, spectre_ibrs_mode, CTLFLAG_RD,
88 	&spectre_ibrs_mode, 0, "current IBRS mode");
89 static int spectre_ibpb_mode = 0;
90 SYSCTL_INT(_machdep, OID_AUTO, spectre_ibpb_mode, CTLFLAG_RD,
91 	&spectre_ibpb_mode, 0, "current IBPB mode");
92 static int spectre_ibrs_supported = 0;
93 SYSCTL_INT(_machdep, OID_AUTO, spectre_ibrs_supported, CTLFLAG_RD,
94 	&spectre_ibrs_supported, 0, "IBRS mode supported");
95 static int spectre_ibpb_supported = 0;
96 SYSCTL_INT(_machdep, OID_AUTO, spectre_ibpb_supported, CTLFLAG_RD,
97 	&spectre_ibpb_supported, 0, "IBPB mode supported");
98 
99 /*
100  * Finish a fork operation, with lwp lp2 nearly set up.
101  * Copy and update the pcb, set up the stack so that the child
102  * ready to run and return to user mode.
103  */
104 void
105 cpu_fork(struct lwp *lp1, struct lwp *lp2, int flags)
106 {
107 	struct pcb *pcb2;
108 	struct pmap *pmap2;
109 
110 	if ((flags & RFPROC) == 0) {
111 		if ((flags & RFMEM) == 0) {
112 			/*
113 			 * Unshare user LDT.  > 1 test is MPSAFE.  While
114 			 * it can potentially race a 2->1 transition, the
115 			 * worst that happens is that we do an unnecessary
116 			 * ldt replacement.
117 			 */
118 			struct pcb *pcb1 = lp1->lwp_thread->td_pcb;
119 			struct pcb_ldt *pcb_ldt = pcb1->pcb_ldt;
120 
121 			if (pcb_ldt && pcb_ldt->ldt_refcnt > 1) {
122 				pcb_ldt = user_ldt_alloc(pcb1,pcb_ldt->ldt_len);
123 				user_ldt_free(pcb1);
124 				pcb1->pcb_ldt = pcb_ldt;
125 				set_user_ldt(pcb1);
126 			}
127 		}
128 		return;
129 	}
130 
131 	/* Ensure that lp1's pcb is up to date. */
132 	if (mdcpu->gd_npxthread == lp1->lwp_thread)
133 		npxsave(lp1->lwp_thread->td_savefpu);
134 
135 	/*
136 	 * Copy lp1's PCB.  This really only applies to the
137 	 * debug registers and FP state, but its faster to just copy the
138 	 * whole thing.  Because we only save the PCB at switchout time,
139 	 * the register state may not be current.
140 	 */
141 	pcb2 = lp2->lwp_thread->td_pcb;
142 	*pcb2 = *lp1->lwp_thread->td_pcb;
143 
144 	/*
145 	 * Create a new fresh stack for the new process.
146 	 * Copy the trap frame for the return to user mode as if from a
147 	 * syscall.  This copies the user mode register values.
148 	 *
149 	 * pcb_rsp must allocate an additional call-return pointer below
150 	 * the trap frame which will be restored by cpu_heavy_restore from
151 	 * PCB_RIP, and the thread's td_sp pointer must allocate an
152 	 * additonal two quadwords below the pcb_rsp call-return pointer to
153 	 * hold the LWKT restore function pointer and rflags.
154 	 *
155 	 * The LWKT restore function pointer must be set to cpu_heavy_restore,
156 	 * which is our standard heavy-weight process switch-in function.
157 	 * YYY eventually we should shortcut fork_return and fork_trampoline
158 	 * to use the LWKT restore function directly so we can get rid of
159 	 * all the extra crap we are setting up.
160 	 */
161 	lp2->lwp_md.md_regs = (struct trapframe *)pcb2 - 1;
162 	bcopy(lp1->lwp_md.md_regs, lp2->lwp_md.md_regs, sizeof(*lp2->lwp_md.md_regs));
163 
164 	/*
165 	 * Set registers for trampoline to user mode.  Leave space for the
166 	 * return address on stack.  These are the kernel mode register values.
167 	 *
168 	 * Set the new pmap CR3.  If the new process uses isolated VM spaces,
169 	 * also set the isolated CR3.
170 	 */
171 	pmap2 = vmspace_pmap(lp2->lwp_proc->p_vmspace);
172 	pcb2->pcb_cr3 = vtophys(pmap2->pm_pml4);
173 	if ((pcb2->pcb_flags & PCB_ISOMMU) && pmap2->pm_pmlpv_iso) {
174 		pcb2->pcb_cr3_iso = vtophys(pmap2->pm_pml4_iso);
175 	} else {
176 		pcb2->pcb_flags &= ~PCB_ISOMMU;
177 		pcb2->pcb_cr3_iso = 0;
178 	}
179 
180 #if 0
181 	/*
182 	 * Per-process spectre mitigation (future)
183 	 */
184 	pcb2->pcb_flags &= ~(PCB_IBRS1 | PCB_IBRS2);
185 	switch (spectre_mitigation) {
186 	case 1:
187 		pcb2->pcb_flags |= PCB_IBRS1;
188 		break;
189 	case 2:
190 		pcb2->pcb_flags |= PCB_IBRS2;
191 		break;
192 	default:
193 		break;
194 	}
195 #endif
196 
197 	pcb2->pcb_rbx = (unsigned long)fork_return;	/* fork_trampoline argument */
198 	pcb2->pcb_rbp = 0;
199 	pcb2->pcb_rsp = (unsigned long)lp2->lwp_md.md_regs - sizeof(void *);
200 	pcb2->pcb_r12 = (unsigned long)lp2;		/* fork_trampoline argument */
201 	pcb2->pcb_r13 = 0;
202 	pcb2->pcb_r14 = 0;
203 	pcb2->pcb_r15 = 0;
204 	pcb2->pcb_rip = (unsigned long)fork_trampoline;
205 	lp2->lwp_thread->td_sp = (char *)(pcb2->pcb_rsp - sizeof(void *));
206 	*(u_int64_t *)lp2->lwp_thread->td_sp = PSL_USER;
207 	lp2->lwp_thread->td_sp -= sizeof(void *);
208 	*(void **)lp2->lwp_thread->td_sp = (void *)cpu_heavy_restore;
209 
210 	/*
211 	 * pcb2->pcb_ldt:	duplicated below, if necessary.
212 	 * pcb2->pcb_savefpu:	cloned above.
213 	 * pcb2->pcb_flags:	cloned above
214 	 * pcb2->pcb_onfault:	cloned above (always NULL here).
215 	 * pcb2->pcb_onfault_sp:cloned above (dont care)
216 	 */
217 
218 	/*
219 	 * XXX don't copy the i/o pages.  this should probably be fixed.
220 	 */
221 	pcb2->pcb_ext = NULL;
222 
223         /* Copy the LDT, if necessary. */
224         if (pcb2->pcb_ldt != NULL) {
225 		if (flags & RFMEM) {
226 			atomic_add_int(&pcb2->pcb_ldt->ldt_refcnt, 1);
227 		} else {
228 			pcb2->pcb_ldt = user_ldt_alloc(pcb2,
229 						       pcb2->pcb_ldt->ldt_len);
230 		}
231         }
232 	bcopy(&lp1->lwp_thread->td_tls, &lp2->lwp_thread->td_tls,
233 	      sizeof(lp2->lwp_thread->td_tls));
234 	/*
235 	 * Now, cpu_switch() can schedule the new lwp.
236 	 * pcb_rsp is loaded pointing to the cpu_switch() stack frame
237 	 * containing the return address when exiting cpu_switch.
238 	 * This will normally be to fork_trampoline(), which will have
239 	 * %rbx loaded with the new lwp's pointer.  fork_trampoline()
240 	 * will set up a stack to call fork_return(lp, frame); to complete
241 	 * the return to user-mode.
242 	 */
243 }
244 
245 /*
246  * Prepare new lwp to return to the address specified in params.
247  */
248 int
249 cpu_prepare_lwp(struct lwp *lp, struct lwp_params *params)
250 {
251 	struct trapframe *regs = lp->lwp_md.md_regs;
252 	void *bad_return = NULL;
253 	int error;
254 
255 	regs->tf_rip = (long)params->lwp_func;
256 	regs->tf_rsp = (long)params->lwp_stack;
257 	/* Set up argument for function call */
258 	regs->tf_rdi = (long)params->lwp_arg;
259 
260 	/*
261 	 * Set up fake return address.  As the lwp function may never return,
262 	 * we simply copy out a NULL pointer and force the lwp to receive
263 	 * a SIGSEGV if it returns anyways.
264 	 */
265 	regs->tf_rsp -= sizeof(void *);
266 	error = copyout(&bad_return, (void *)regs->tf_rsp, sizeof(bad_return));
267 	if (error)
268 		return (error);
269 
270 	if (lp->lwp_proc->p_vmm) {
271 		lp->lwp_thread->td_pcb->pcb_cr3 = KPML4phys;
272 		cpu_set_fork_handler(lp,
273 		    (void (*)(void *, struct trapframe *))vmm_lwp_return, lp);
274 	} else {
275 		cpu_set_fork_handler(lp,
276 		    (void (*)(void *, struct trapframe *))generic_lwp_return, lp);
277 	}
278 	return (0);
279 }
280 
281 /*
282  * Intercept the return address from a freshly forked process that has NOT
283  * been scheduled yet.
284  *
285  * This is needed to make kernel threads stay in kernel mode.
286  */
287 void
288 cpu_set_fork_handler(struct lwp *lp, void (*func)(void *, struct trapframe *),
289 		     void *arg)
290 {
291 	/*
292 	 * Note that the trap frame follows the args, so the function
293 	 * is really called like this:  func(arg, frame);
294 	 */
295 	lp->lwp_thread->td_pcb->pcb_rbx = (long)func;	/* function */
296 	lp->lwp_thread->td_pcb->pcb_r12 = (long)arg;	/* first arg */
297 }
298 
299 void
300 cpu_set_thread_handler(thread_t td, void (*rfunc)(void), void *func, void *arg)
301 {
302 	td->td_pcb->pcb_rbx = (long)func;
303 	td->td_pcb->pcb_r12 = (long)arg;
304 	td->td_switch = cpu_lwkt_switch;
305 	td->td_sp -= sizeof(void *);
306 	*(void **)td->td_sp = rfunc;	/* exit function on return */
307 	td->td_sp -= sizeof(void *);
308 	*(void **)td->td_sp = cpu_kthread_restore;
309 }
310 
311 void
312 cpu_lwp_exit(void)
313 {
314 	struct thread *td = curthread;
315 	struct pcb *pcb;
316 
317 	pcb = td->td_pcb;
318 
319 	/* Some x86 functionality was dropped */
320 	KKASSERT(pcb->pcb_ext == NULL);
321 
322 	/*
323 	 * disable all hardware breakpoints
324 	 */
325         if (pcb->pcb_flags & PCB_DBREGS) {
326                 reset_dbregs();
327                 pcb->pcb_flags &= ~PCB_DBREGS;
328         }
329 	td->td_gd->gd_cnt.v_swtch++;
330 
331 	crit_enter_quick(td);
332 	if (td->td_flags & TDF_TSLEEPQ)
333 		tsleep_remove(td);
334 	lwkt_deschedule_self(td);
335 	lwkt_remove_tdallq(td);
336 	cpu_thread_exit();
337 }
338 
339 /*
340  * Terminate the current thread.  The caller must have already acquired
341  * the thread's rwlock and placed it on a reap list or otherwise notified
342  * a reaper of its existance.  We set a special assembly switch function which
343  * releases td_rwlock after it has cleaned up the MMU state and switched
344  * out the stack.
345  *
346  * Must be caller from a critical section and with the thread descheduled.
347  */
348 void
349 cpu_thread_exit(void)
350 {
351 	npxexit();
352 	curthread->td_switch = cpu_exit_switch;
353 	curthread->td_flags |= TDF_EXITING;
354 	lwkt_switch();
355 	panic("cpu_thread_exit: lwkt_switch() unexpectedly returned");
356 }
357 
358 void
359 cpu_reset(void)
360 {
361 	cpu_reset_real();
362 }
363 
364 static void
365 cpu_reset_real(void)
366 {
367 	/*
368 	 * Attempt to do a CPU reset via the keyboard controller,
369 	 * do not turn off the GateA20, as any machine that fails
370 	 * to do the reset here would then end up in no man's land.
371 	 */
372 
373 #if !defined(BROKEN_KEYBOARD_RESET)
374 	outb(IO_KBD + 4, 0xFE);
375 	DELAY(500000);	/* wait 0.5 sec to see if that did it */
376 	kprintf("Keyboard reset did not work, attempting CPU shutdown\n");
377 	DELAY(1000000);	/* wait 1 sec for kprintf to complete */
378 #endif
379 #if 0 /* JG */
380 	/* force a shutdown by unmapping entire address space ! */
381 	bzero((caddr_t) PTD, PAGE_SIZE);
382 #endif
383 
384 	/* "good night, sweet prince .... <THUNK!>" */
385 	cpu_invltlb();
386 	/* NOTREACHED */
387 	while(1);
388 }
389 
390 /*
391  * Convert kernel VA to physical address
392  */
393 vm_paddr_t
394 kvtop(void *addr)
395 {
396 	vm_paddr_t pa;
397 
398 	pa = pmap_kextract((vm_offset_t)addr);
399 	if (pa == 0)
400 		panic("kvtop: zero page frame");
401 	return (pa);
402 }
403 
404 static void
405 swi_vm(void *arg, void *frame)
406 {
407 	if (busdma_swi_pending != 0)
408 		busdma_swi();
409 }
410 
411 static void
412 swi_vm_setup(void *arg)
413 {
414 	register_swi_mp(SWI_VM, swi_vm, NULL, "swi_vm", NULL, 0);
415 }
416 
417 SYSINIT(swi_vm_setup, SI_BOOT2_MACHDEP, SI_ORDER_ANY, swi_vm_setup, NULL);
418 
419 /*
420  * NOTE: This routine is also called after a successful microcode
421  *	 reload on cpu 0.
422  */
423 void spectre_vm_setup(void *arg);
424 
425 /*
426  * Check for IBPB and IBRS support
427  *
428  * Returns a mask: 	0x1	IBRS supported
429  *			0x2	IBPB supported
430  */
431 static
432 int
433 spectre_check_support(void)
434 {
435 	uint32_t p[4];
436 	int rv = 0;
437 
438 	/*
439 	 * SPEC_CTRL (bit 26) and STIBP support (bit 27)
440 	 *
441 	 * XXX Not sure what the STIBP flag is meant to be used for.
442 	 *
443 	 * SPEC_CTRL indicates IBRS and IBPB support.
444 	 */
445 	p[0] = 0;
446 	p[1] = 0;
447 	p[2] = 0;
448 	p[3] = 0;
449 	cpuid_count(7, 0, p);
450 	if (p[3] & CPUID_7_0_I3_SPEC_CTRL)
451 		rv |= 3;
452 
453 	/*
454 	 * 0x80000008 p[1] bit 12 indicates IBPB support
455 	 *
456 	 * This bit might be set even though SPEC_CTRL is not set.
457 	 */
458 	if (cpu_vendor_id == CPU_VENDOR_INTEL) {
459 		p[0] = 0;
460 		p[1] = 0;
461 		p[2] = 0;
462 		p[3] = 0;
463 		do_cpuid(0x80000008U, p);
464 		if (p[1] & CPUID_80000008_I1_IBPB_SUPPORT)
465 			rv |= 2;
466 	}
467 
468 	return rv;
469 }
470 
471 /*
472  * Iterate CPUs and adjust MSR for global operations, since
473  * the KMMU* code won't do it if spectre_mitigation is 0 or 2.
474  */
475 static
476 void
477 spectre_sysctl_changed(void)
478 {
479 	globaldata_t save_gd;
480 	struct trampframe *tr;
481 	int n;
482 
483 	/*
484 	 * Console message on mitigation mode change
485 	 */
486 	kprintf("machdep.spectre_mitigation=%d: ", spectre_mitigation);
487 
488 	if (spectre_ibrs_supported == 0) {
489 		kprintf("IBRS=NOSUPPORT, ");
490 	} else {
491 		switch(spectre_mitigation & 3) {
492 		case 0:
493 			kprintf("IBRS=0 (disabled), ");
494 			break;
495 		case 1:
496 			kprintf("IBRS=1 (kern-only), ");
497 			break;
498 		case 2:
499 			kprintf("IBRS=2 (always-on), ");
500 			break;
501 		case 3:
502 			kprintf("IBRS=?, ");
503 			break;
504 		}
505 	}
506 
507 	if (spectre_ibpb_supported == 0) {
508 		kprintf("IBPB=NOSUPPORT\n");
509 	} else {
510 		switch(spectre_mitigation & 4) {
511 		case 0:
512 			kprintf("IBPB=0 (disabled)\n");
513 			break;
514 		case 4:
515 			kprintf("IBPB=1 (enabled)\n");
516 			break;
517 		}
518 	}
519 
520 	/*
521 	 * Fixup state
522 	 */
523 	save_gd = mycpu;
524 	for (n = 0; n < ncpus; ++n) {
525 		lwkt_setcpu_self(globaldata_find(n));
526 		cpu_ccfence();
527 		tr = &pscpu->trampoline;
528 
529 		/*
530 		 * Make sure we are cleaned out.
531 		 *
532 		 * XXX cleanup, reusing globals inside the loop (they get
533 		 * set to the same thing each loop)
534 		 */
535 		tr->tr_pcb_gflags &= ~(PCB_IBRS1 | PCB_IBRS2 | PCB_IBPB);
536 		spectre_ibrs_mode = 0;
537 		spectre_ibpb_mode = 0;
538 
539 		/*
540 		 * Don't try to parse if not available
541 		 */
542 		if (spectre_mitigation < 0)
543 			continue;
544 
545 		/*
546 		 * IBRS mode
547 		 */
548 		switch(spectre_mitigation & 3) {
549 		case 0:
550 			/*
551 			 * Disable IBRS
552 			 *
553 			 * Make sure IBRS is turned off in case we were in
554 			 * a global mode before.
555 			 */
556 			if (spectre_ibrs_supported)
557 				wrmsr(MSR_SPEC_CTRL, 0);
558 			break;
559 		case 1:
560 			/*
561 			 * IBRS in kernel
562 			 */
563 			if (spectre_ibrs_supported) {
564 				tr->tr_pcb_gflags |= PCB_IBRS1;
565 				wrmsr(MSR_SPEC_CTRL, 1);
566 				spectre_ibrs_mode = 1;
567 			}
568 			break;
569 		case 2:
570 			/*
571 			 * IBRS at all times
572 			 */
573 			if (spectre_ibrs_supported) {
574 				tr->tr_pcb_gflags |= PCB_IBRS2;
575 				wrmsr(MSR_SPEC_CTRL, 1);
576 				spectre_ibrs_mode = 2;
577 			}
578 			break;
579 		}
580 
581 		/*
582 		 * IBPB mode
583 		 */
584 		if (spectre_mitigation & 4) {
585 			if (spectre_ibpb_supported) {
586 				tr->tr_pcb_gflags |= PCB_IBPB;
587 				spectre_ibpb_mode = 1;
588 			}
589 		}
590 	}
591 	lwkt_setcpu_self(save_gd);
592 	cpu_ccfence();
593 }
594 
595 /*
596  * User changes sysctl value
597  */
598 static int
599 sysctl_spectre_mitigation(SYSCTL_HANDLER_ARGS)
600 {
601 	int spectre;
602 	int error;
603 
604 	spectre = spectre_mitigation;
605 	error = sysctl_handle_int(oidp, &spectre, 0, req);
606 	if (error || req->newptr == NULL)
607 		return error;
608 	spectre_mitigation = spectre;
609 	spectre_sysctl_changed();
610 
611 	return 0;
612 }
613 
614 SYSCTL_PROC(_machdep, OID_AUTO, spectre_mitigation, CTLTYPE_INT | CTLFLAG_RW,
615 	0, 0, sysctl_spectre_mitigation, "I", "Spectre exploit mitigation");
616 
617 /*
618  * NOTE: Called at SI_BOOT2_MACHDEP and also when the microcode is
619  *	 updated.  Microcode updates must be applied to all cpus
620  *	 for support to be recognized.
621  */
622 void
623 spectre_vm_setup(void *arg)
624 {
625 	int inconsistent = 0;
626 	int supmask;
627 
628 	/*
629 	 * Fetch tunable in auto mode
630 	 */
631 	if (spectre_mitigation < 0) {
632 		TUNABLE_INT_FETCH("machdep.spectre_mitigation",
633 				  &spectre_mitigation);
634 	}
635 
636 	if ((supmask = spectre_check_support()) != 0) {
637 		/*
638 		 * Must be supported on all cpus before we
639 		 * can enable it.  Returns silently if it
640 		 * isn't.
641 		 *
642 		 * NOTE! arg != NULL indicates we were called
643 		 *	 from cpuctl after a successful microcode
644 		 *	 update.
645 		 */
646 		if (arg != NULL) {
647 			globaldata_t save_gd;
648 			int n;
649 
650 			save_gd = mycpu;
651 			for (n = 0; n < ncpus; ++n) {
652 				lwkt_setcpu_self(globaldata_find(n));
653 				cpu_ccfence();
654 				if (spectre_check_support() !=
655 				    supmask) {
656 					inconsistent = 1;
657 					break;
658 				}
659 			}
660 			lwkt_setcpu_self(save_gd);
661 			cpu_ccfence();
662 		}
663 	}
664 
665 	/*
666 	 * Be silent while microcode is being loaded on various CPUs,
667 	 * until all done.
668 	 */
669 	if (inconsistent) {
670 		spectre_mitigation = -1;
671 		spectre_ibrs_supported = 0;
672 		spectre_ibpb_supported = 0;
673 		return;
674 	}
675 
676 	/*
677 	 * IBRS support
678 	 */
679 	if (supmask & 1)
680 		spectre_ibrs_supported = 1;
681 	else
682 		spectre_ibrs_supported = 0;
683 
684 	/*
685 	 * IBPB support.
686 	 */
687 	if (supmask & 2)
688 		spectre_ibpb_supported = 1;
689 	else
690 		spectre_ibpb_supported = 0;
691 
692 	/*
693 	 * Enable spectre_mitigation, set defaults if -1, adjust
694 	 * tuned value according to support if not.
695 	 *
696 	 * NOTE!  We do not enable IBPB for user->kernel transitions
697 	 *	  by default, so this code is commented out for now.
698 	 */
699 	if (spectre_ibrs_supported || spectre_ibpb_supported) {
700 		if (spectre_mitigation < 0) {
701 			spectre_mitigation = 0;
702 			if (spectre_ibrs_supported)
703 				spectre_mitigation |= 1;
704 #if 0
705 			if (spectre_ibpb_supported)
706 				spectre_mitigation |= 4;
707 #endif
708 		}
709 		if (spectre_ibrs_supported == 0)
710 			spectre_mitigation &= ~3;
711 		if (spectre_ibpb_supported == 0)
712 			spectre_mitigation &= ~4;
713 	} else {
714 		spectre_mitigation = -1;
715 	}
716 
717 	/*
718 	 * Disallow sysctl changes when there is no support (otherwise
719 	 * the wrmsr will cause a protection fault).
720 	 */
721 	if (spectre_mitigation < 0)
722 		sysctl___machdep_spectre_mitigation.oid_kind &= ~CTLFLAG_WR;
723 	else
724 		sysctl___machdep_spectre_mitigation.oid_kind |= CTLFLAG_WR;
725 
726 	spectre_sysctl_changed();
727 }
728 
729 SYSINIT(spectre_vm_setup, SI_BOOT2_MACHDEP, SI_ORDER_ANY,
730 	spectre_vm_setup, NULL);
731 
732 /*
733  * platform-specific vmspace initialization (nothing for x86_64)
734  */
735 void
736 cpu_vmspace_alloc(struct vmspace *vm __unused)
737 {
738 }
739 
740 void
741 cpu_vmspace_free(struct vmspace *vm __unused)
742 {
743 }
744 
745 int
746 kvm_access_check(vm_offset_t saddr, vm_offset_t eaddr, int prot)
747 {
748 	vm_offset_t addr;
749 
750 	if (saddr < KvaStart)
751 		return EFAULT;
752 	if (eaddr >= KvaEnd)
753 		return EFAULT;
754 	for (addr = saddr; addr < eaddr; addr += PAGE_SIZE)  {
755 		if (pmap_kextract(addr) == 0)
756 			return EFAULT;
757 	}
758 	if (!kernacc((caddr_t)saddr, eaddr - saddr, prot))
759 		return EFAULT;
760 	return 0;
761 }
762 
763 #if 0
764 
765 void _test_frame_enter(struct trapframe *frame);
766 void _test_frame_exit(struct trapframe *frame);
767 
768 void
769 _test_frame_enter(struct trapframe *frame)
770 {
771 	thread_t td = curthread;
772 
773 	if (ISPL(frame->tf_cs) == SEL_UPL) {
774 		KKASSERT(td->td_lwp);
775                 KASSERT(td->td_lwp->lwp_md.md_regs == frame,
776                         ("_test_frame_exit: Frame mismatch %p %p",
777 			td->td_lwp->lwp_md.md_regs, frame));
778 	    td->td_lwp->lwp_saveusp = (void *)frame->tf_rsp;
779 	    td->td_lwp->lwp_saveupc = (void *)frame->tf_rip;
780 	}
781 	if ((char *)frame < td->td_kstack ||
782 	    (char *)frame > td->td_kstack + td->td_kstack_size) {
783 		panic("_test_frame_exit: frame not on kstack %p kstack=%p",
784 			frame, td->td_kstack);
785 	}
786 }
787 
788 void
789 _test_frame_exit(struct trapframe *frame)
790 {
791 	thread_t td = curthread;
792 
793 	if (ISPL(frame->tf_cs) == SEL_UPL) {
794 		KKASSERT(td->td_lwp);
795                 KASSERT(td->td_lwp->lwp_md.md_regs == frame,
796                         ("_test_frame_exit: Frame mismatch %p %p",
797 			td->td_lwp->lwp_md.md_regs, frame));
798 		if (td->td_lwp->lwp_saveusp != (void *)frame->tf_rsp) {
799 			kprintf("_test_frame_exit: %s:%d usp mismatch %p/%p\n",
800 				td->td_comm, td->td_proc->p_pid,
801 				td->td_lwp->lwp_saveusp,
802 				(void *)frame->tf_rsp);
803 		}
804 		if (td->td_lwp->lwp_saveupc != (void *)frame->tf_rip) {
805 			kprintf("_test_frame_exit: %s:%d upc mismatch %p/%p\n",
806 				td->td_comm, td->td_proc->p_pid,
807 				td->td_lwp->lwp_saveupc,
808 				(void *)frame->tf_rip);
809 		}
810 
811 		/*
812 		 * adulterate the fields to catch entries that
813 		 * don't run through test_frame_enter
814 		 */
815 		td->td_lwp->lwp_saveusp =
816 			(void *)~(intptr_t)td->td_lwp->lwp_saveusp;
817 		td->td_lwp->lwp_saveupc =
818 			(void *)~(intptr_t)td->td_lwp->lwp_saveupc;
819 	}
820 	if ((char *)frame < td->td_kstack ||
821 	    (char *)frame > td->td_kstack + td->td_kstack_size) {
822 		panic("_test_frame_exit: frame not on kstack %p kstack=%p",
823 			frame, td->td_kstack);
824 	}
825 }
826 
827 #endif
828