1 /*-
2  * Copyright (c) 1982, 1986 The Regents of the University of California.
3  * Copyright (c) 1989, 1990 William Jolitz
4  * Copyright (c) 1994 John Dyson
5  * Copyright (c) 2008-2018 The DragonFly Project.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * the Systems Programming Group of the University of Utah Computer
10  * Science Department, and William Jolitz.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *	This product includes software developed by the University of
23  *	California, Berkeley and its contributors.
24  * 4. Neither the name of the University nor the names of its contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38  * SUCH DAMAGE.
39  *
40  *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
41  *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
42  * $FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.132.2.9 2003/01/25 19:02:23 dillon Exp $
43  */
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/malloc.h>
48 #include <sys/proc.h>
49 #include <sys/buf.h>
50 #include <sys/interrupt.h>
51 #include <sys/vnode.h>
52 #include <sys/vmmeter.h>
53 #include <sys/kernel.h>
54 #include <sys/sysctl.h>
55 #include <sys/unistd.h>
56 #include <sys/lwp.h>
57 
58 #include <machine/clock.h>
59 #include <machine/cpu.h>
60 #include <machine/md_var.h>
61 #include <machine/smp.h>
62 #include <machine/pcb.h>
63 #include <machine/pcb_ext.h>
64 #include <machine/segments.h>
65 #include <machine/globaldata.h>	/* npxthread */
66 #include <machine/specialreg.h>
67 #include <machine/vmm.h>
68 
69 #include <vm/vm.h>
70 #include <vm/vm_param.h>
71 #include <sys/lock.h>
72 #include <vm/vm_kern.h>
73 #include <vm/vm_page.h>
74 #include <vm/vm_map.h>
75 #include <vm/vm_extern.h>
76 
77 #include <sys/thread2.h>
78 #include <sys/mplock2.h>
79 
80 #include <bus/isa/isa.h>
81 
82 static void	cpu_reset_real (void);
83 
84 static int spectre_mitigation = -1;
85 static int spectre_support = 0;
86 static int spectre_mode = 0;
87 SYSCTL_INT(_machdep, OID_AUTO, spectre_mode, CTLFLAG_RD,
88 	&spectre_mode, 0, "current Spectre enablements");
89 
90 static int mds_mitigation = -1;
91 static int mds_support = 0;
92 static int mds_mode = 0;
93 SYSCTL_INT(_machdep, OID_AUTO, mds_mode, CTLFLAG_RD,
94 	&mds_mode, 0, "current MDS enablements");
95 
96 /*
97  * Finish a fork operation, with lwp lp2 nearly set up.
98  * Copy and update the pcb, set up the stack so that the child
99  * ready to run and return to user mode.
100  */
101 void
102 cpu_fork(struct lwp *lp1, struct lwp *lp2, int flags)
103 {
104 	struct pcb *pcb2;
105 	struct pmap *pmap2;
106 
107 	if ((flags & RFPROC) == 0) {
108 		if ((flags & RFMEM) == 0) {
109 			/*
110 			 * Unshare user LDT.  > 1 test is MPSAFE.  While
111 			 * it can potentially race a 2->1 transition, the
112 			 * worst that happens is that we do an unnecessary
113 			 * ldt replacement.
114 			 */
115 			struct pcb *pcb1 = lp1->lwp_thread->td_pcb;
116 			struct pcb_ldt *pcb_ldt = pcb1->pcb_ldt;
117 
118 			if (pcb_ldt && pcb_ldt->ldt_refcnt > 1) {
119 				pcb_ldt = user_ldt_alloc(pcb1,pcb_ldt->ldt_len);
120 				user_ldt_free(pcb1);
121 				pcb1->pcb_ldt = pcb_ldt;
122 				set_user_ldt(pcb1);
123 			}
124 		}
125 		return;
126 	}
127 
128 	/* Ensure that lp1's pcb is up to date. */
129 	if (mdcpu->gd_npxthread == lp1->lwp_thread)
130 		npxsave(lp1->lwp_thread->td_savefpu);
131 
132 	/*
133 	 * Copy lp1's PCB.  This really only applies to the
134 	 * debug registers and FP state, but its faster to just copy the
135 	 * whole thing.  Because we only save the PCB at switchout time,
136 	 * the register state may not be current.
137 	 */
138 	pcb2 = lp2->lwp_thread->td_pcb;
139 	*pcb2 = *lp1->lwp_thread->td_pcb;
140 
141 	/*
142 	 * Create a new fresh stack for the new process.
143 	 * Copy the trap frame for the return to user mode as if from a
144 	 * syscall.  This copies the user mode register values.
145 	 *
146 	 * pcb_rsp must allocate an additional call-return pointer below
147 	 * the trap frame which will be restored by cpu_heavy_restore from
148 	 * PCB_RIP, and the thread's td_sp pointer must allocate an
149 	 * additonal two quadwords below the pcb_rsp call-return pointer to
150 	 * hold the LWKT restore function pointer and rflags.
151 	 *
152 	 * The LWKT restore function pointer must be set to cpu_heavy_restore,
153 	 * which is our standard heavy-weight process switch-in function.
154 	 * YYY eventually we should shortcut fork_return and fork_trampoline
155 	 * to use the LWKT restore function directly so we can get rid of
156 	 * all the extra crap we are setting up.
157 	 */
158 	lp2->lwp_md.md_regs = (struct trapframe *)pcb2 - 1;
159 	bcopy(lp1->lwp_md.md_regs, lp2->lwp_md.md_regs, sizeof(*lp2->lwp_md.md_regs));
160 
161 	/*
162 	 * Set registers for trampoline to user mode.  Leave space for the
163 	 * return address on stack.  These are the kernel mode register values.
164 	 *
165 	 * Set the new pmap CR3.  If the new process uses isolated VM spaces,
166 	 * also set the isolated CR3.
167 	 */
168 	pmap2 = vmspace_pmap(lp2->lwp_proc->p_vmspace);
169 	pcb2->pcb_cr3 = vtophys(pmap2->pm_pml4);
170 	if ((pcb2->pcb_flags & PCB_ISOMMU) && pmap2->pm_pmlpv_iso) {
171 		pcb2->pcb_cr3_iso = vtophys(pmap2->pm_pml4_iso);
172 	} else {
173 		pcb2->pcb_flags &= ~PCB_ISOMMU;
174 		pcb2->pcb_cr3_iso = 0;
175 	}
176 
177 #if 0
178 	/*
179 	 * Per-process spectre mitigation (future)
180 	 */
181 	pcb2->pcb_flags &= ~(PCB_IBRS1 | PCB_IBRS2);
182 	switch (spectre_mitigation) {
183 	case 1:
184 		pcb2->pcb_flags |= PCB_IBRS1;
185 		break;
186 	case 2:
187 		pcb2->pcb_flags |= PCB_IBRS2;
188 		break;
189 	default:
190 		break;
191 	}
192 #endif
193 
194 	pcb2->pcb_rbx = (unsigned long)fork_return;	/* fork_trampoline argument */
195 	pcb2->pcb_rbp = 0;
196 	pcb2->pcb_rsp = (unsigned long)lp2->lwp_md.md_regs - sizeof(void *);
197 	pcb2->pcb_r12 = (unsigned long)lp2;		/* fork_trampoline argument */
198 	pcb2->pcb_r13 = 0;
199 	pcb2->pcb_r14 = 0;
200 	pcb2->pcb_r15 = 0;
201 	pcb2->pcb_rip = (unsigned long)fork_trampoline;
202 	lp2->lwp_thread->td_sp = (char *)(pcb2->pcb_rsp - sizeof(void *));
203 	*(u_int64_t *)lp2->lwp_thread->td_sp = PSL_USER;
204 	lp2->lwp_thread->td_sp -= sizeof(void *);
205 	*(void **)lp2->lwp_thread->td_sp = (void *)cpu_heavy_restore;
206 
207 	/*
208 	 * pcb2->pcb_ldt:	duplicated below, if necessary.
209 	 * pcb2->pcb_savefpu:	cloned above.
210 	 * pcb2->pcb_flags:	cloned above
211 	 * pcb2->pcb_onfault:	cloned above (always NULL here).
212 	 * pcb2->pcb_onfault_sp:cloned above (dont care)
213 	 */
214 
215 	/*
216 	 * XXX don't copy the i/o pages.  this should probably be fixed.
217 	 */
218 	pcb2->pcb_ext = NULL;
219 
220         /* Copy the LDT, if necessary. */
221         if (pcb2->pcb_ldt != NULL) {
222 		if (flags & RFMEM) {
223 			atomic_add_int(&pcb2->pcb_ldt->ldt_refcnt, 1);
224 		} else {
225 			pcb2->pcb_ldt = user_ldt_alloc(pcb2,
226 						       pcb2->pcb_ldt->ldt_len);
227 		}
228         }
229 	bcopy(&lp1->lwp_thread->td_tls, &lp2->lwp_thread->td_tls,
230 	      sizeof(lp2->lwp_thread->td_tls));
231 	/*
232 	 * Now, cpu_switch() can schedule the new lwp.
233 	 * pcb_rsp is loaded pointing to the cpu_switch() stack frame
234 	 * containing the return address when exiting cpu_switch.
235 	 * This will normally be to fork_trampoline(), which will have
236 	 * %rbx loaded with the new lwp's pointer.  fork_trampoline()
237 	 * will set up a stack to call fork_return(lp, frame); to complete
238 	 * the return to user-mode.
239 	 */
240 }
241 
242 /*
243  * Prepare new lwp to return to the address specified in params.
244  */
245 int
246 cpu_prepare_lwp(struct lwp *lp, struct lwp_params *params)
247 {
248 	struct trapframe *regs = lp->lwp_md.md_regs;
249 	void *bad_return = NULL;
250 	int error;
251 
252 	regs->tf_rip = (long)params->lwp_func;
253 	regs->tf_rsp = (long)params->lwp_stack;
254 	/* Set up argument for function call */
255 	regs->tf_rdi = (long)params->lwp_arg;
256 
257 	/*
258 	 * Set up fake return address.  As the lwp function may never return,
259 	 * we simply copy out a NULL pointer and force the lwp to receive
260 	 * a SIGSEGV if it returns anyways.
261 	 */
262 	regs->tf_rsp -= sizeof(void *);
263 	error = copyout(&bad_return, (void *)regs->tf_rsp, sizeof(bad_return));
264 	if (error)
265 		return (error);
266 
267 	if (lp->lwp_proc->p_vmm) {
268 		lp->lwp_thread->td_pcb->pcb_cr3 = KPML4phys;
269 		cpu_set_fork_handler(lp,
270 		    (void (*)(void *, struct trapframe *))vmm_lwp_return, lp);
271 	} else {
272 		cpu_set_fork_handler(lp,
273 		    (void (*)(void *, struct trapframe *))generic_lwp_return, lp);
274 	}
275 	return (0);
276 }
277 
278 /*
279  * Intercept the return address from a freshly forked process that has NOT
280  * been scheduled yet.
281  *
282  * This is needed to make kernel threads stay in kernel mode.
283  */
284 void
285 cpu_set_fork_handler(struct lwp *lp, void (*func)(void *, struct trapframe *),
286 		     void *arg)
287 {
288 	/*
289 	 * Note that the trap frame follows the args, so the function
290 	 * is really called like this:  func(arg, frame);
291 	 */
292 	lp->lwp_thread->td_pcb->pcb_rbx = (long)func;	/* function */
293 	lp->lwp_thread->td_pcb->pcb_r12 = (long)arg;	/* first arg */
294 }
295 
296 void
297 cpu_set_thread_handler(thread_t td, void (*rfunc)(void), void *func, void *arg)
298 {
299 	td->td_pcb->pcb_rbx = (long)func;
300 	td->td_pcb->pcb_r12 = (long)arg;
301 	td->td_switch = cpu_lwkt_switch;
302 	td->td_sp -= sizeof(void *);
303 	*(void **)td->td_sp = rfunc;	/* exit function on return */
304 	td->td_sp -= sizeof(void *);
305 	*(void **)td->td_sp = cpu_kthread_restore;
306 }
307 
308 void
309 cpu_lwp_exit(void)
310 {
311 	struct thread *td = curthread;
312 	struct pcb *pcb;
313 
314 	pcb = td->td_pcb;
315 
316 	/* Some x86 functionality was dropped */
317 	KKASSERT(pcb->pcb_ext == NULL);
318 
319 	/*
320 	 * disable all hardware breakpoints
321 	 */
322         if (pcb->pcb_flags & PCB_DBREGS) {
323                 reset_dbregs();
324                 pcb->pcb_flags &= ~PCB_DBREGS;
325         }
326 	td->td_gd->gd_cnt.v_swtch++;
327 
328 	crit_enter_quick(td);
329 	if (td->td_flags & TDF_TSLEEPQ)
330 		tsleep_remove(td);
331 	lwkt_deschedule_self(td);
332 	lwkt_remove_tdallq(td);
333 	cpu_thread_exit();
334 }
335 
336 /*
337  * Terminate the current thread.  The caller must have already acquired
338  * the thread's rwlock and placed it on a reap list or otherwise notified
339  * a reaper of its existance.  We set a special assembly switch function which
340  * releases td_rwlock after it has cleaned up the MMU state and switched
341  * out the stack.
342  *
343  * Must be caller from a critical section and with the thread descheduled.
344  */
345 void
346 cpu_thread_exit(void)
347 {
348 	npxexit();
349 	curthread->td_switch = cpu_exit_switch;
350 	curthread->td_flags |= TDF_EXITING;
351 	lwkt_switch();
352 	panic("cpu_thread_exit: lwkt_switch() unexpectedly returned");
353 }
354 
355 void
356 cpu_reset(void)
357 {
358 	cpu_reset_real();
359 }
360 
361 static void
362 cpu_reset_real(void)
363 {
364 	/*
365 	 * Attempt to do a CPU reset via the keyboard controller,
366 	 * do not turn off the GateA20, as any machine that fails
367 	 * to do the reset here would then end up in no man's land.
368 	 */
369 
370 #if !defined(BROKEN_KEYBOARD_RESET)
371 	outb(IO_KBD + 4, 0xFE);
372 	DELAY(500000);	/* wait 0.5 sec to see if that did it */
373 	kprintf("Keyboard reset did not work, attempting CPU shutdown\n");
374 	DELAY(1000000);	/* wait 1 sec for kprintf to complete */
375 #endif
376 #if 0 /* JG */
377 	/* force a shutdown by unmapping entire address space ! */
378 	bzero((caddr_t) PTD, PAGE_SIZE);
379 #endif
380 
381 	/* "good night, sweet prince .... <THUNK!>" */
382 	cpu_invltlb();
383 	/* NOTREACHED */
384 	while(1);
385 }
386 
387 static void
388 swi_vm(void *arg, void *frame)
389 {
390 	if (busdma_swi_pending != 0)
391 		busdma_swi();
392 }
393 
394 static void
395 swi_vm_setup(void *arg)
396 {
397 	register_swi_mp(SWI_VM, swi_vm, NULL, "swi_vm", NULL, 0);
398 }
399 
400 SYSINIT(swi_vm_setup, SI_BOOT2_MACHDEP, SI_ORDER_ANY, swi_vm_setup, NULL);
401 
402 /*
403  * NOTE: This routine is also called after a successful microcode
404  *	 reload on cpu 0.
405  */
406 void mitigation_vm_setup(void *arg);
407 
408 /*
409  * Check for IBPB and IBRS support
410  *
411  * This bits also specify desired modes in the spectre_mitigation sysctl.
412  */
413 #define IBRS_SUPPORTED		0x0001
414 #define STIBP_SUPPORTED		0x0002
415 #define IBPB_SUPPORTED		0x0004
416 #define IBRS_AUTO_SUPPORTED	0x0008
417 #define STIBP_AUTO_SUPPORTED	0x0010
418 #define IBRS_PREFERRED_REQUEST	0x0020
419 
420 static
421 int
422 spectre_check_support(void)
423 {
424 	uint32_t p[4];
425 	int rv = 0;
426 
427 	/*
428 	 * Spectre mitigation hw bits
429 	 *
430 	 * IBRS		Indirect Branch Restricted Speculation   (isolation)
431 	 * STIBP	Single Thread Indirect Branch Prediction (isolation)
432 	 * IBPB		Branch Prediction Barrier		 (barrier)
433 	 *
434 	 * IBRS and STIBP must be toggled (enabled on entry to kernel,
435 	 * disabled on exit, as well as disabled during any MWAIT/HLT).
436 	 * When *_AUTO bits are available, IBRS and STIBP may be left
437 	 * turned on and do not have to be toggled on kernel entry/exit.
438 	 *
439 	 * All this shit has enormous overhead.  IBPB in particular, and
440 	 * non-auto modes are disabled by default.
441 	 */
442 	if (cpu_vendor_id == CPU_VENDOR_INTEL) {
443 		p[0] = 0;
444 		p[1] = 0;
445 		p[2] = 0;
446 		p[3] = 0;
447 		cpuid_count(7, 0, p);
448 		if (p[3] & CPUID_7_0_I3_SPEC_CTRL)
449 			rv |= IBRS_SUPPORTED | IBPB_SUPPORTED;
450 		if (p[3] & CPUID_7_0_I3_STIBP)
451 			rv |= STIBP_SUPPORTED;
452 
453 		/*
454 		 * 0x80000008 p[1] bit 12 indicates IBPB support
455 		 *
456 		 * This bit might be set even though SPEC_CTRL is not set.
457 		 */
458 		p[0] = 0;
459 		p[1] = 0;
460 		p[2] = 0;
461 		p[3] = 0;
462 		do_cpuid(0x80000008U, p);
463 		if (p[1] & CPUID_INTEL_80000008_I1_IBPB_SUPPORT)
464 			rv |= IBPB_SUPPORTED;
465 	} else if (cpu_vendor_id == CPU_VENDOR_AMD) {
466 		/*
467 		 * 0x80000008 p[1] bit 12 indicates IBPB support
468 		 *	      p[1] bit 14 indicates IBRS support
469 		 *	      p[1] bit 15 indicates STIBP support
470 		 *
471 		 *	      p[1] bit 16 indicates IBRS auto support
472 		 *	      p[1] bit 17 indicates STIBP auto support
473 		 *	      p[1] bit 18 indicates processor prefers using
474 		 *		IBRS instead of retpoline.
475 		 */
476 		p[0] = 0;
477 		p[1] = 0;
478 		p[2] = 0;
479 		p[3] = 0;
480 		do_cpuid(0x80000008U, p);
481 		if (p[1] & CPUID_AMD_80000008_I1_IBPB_SUPPORT)
482 			rv |= IBPB_SUPPORTED;
483 		if (p[1] & CPUID_AMD_80000008_I1_IBRS_SUPPORT)
484 			rv |= IBRS_SUPPORTED;
485 		if (p[1] & CPUID_AMD_80000008_I1_STIBP_SUPPORT)
486 			rv |= STIBP_SUPPORTED;
487 
488 		if (p[1] & CPUID_AMD_80000008_I1_IBRS_AUTO)
489 			rv |= IBRS_AUTO_SUPPORTED;
490 		if (p[1] & CPUID_AMD_80000008_I1_STIBP_AUTO)
491 			rv |= STIBP_AUTO_SUPPORTED;
492 		if (p[1] & CPUID_AMD_80000008_I1_IBRS_REQUESTED)
493 			rv |= IBRS_PREFERRED_REQUEST;
494 	}
495 
496 	return rv;
497 }
498 
499 /*
500  * Iterate CPUs and adjust MSR for global operations, since
501  * the KMMU* code won't do it if spectre_mitigation is 0 or 2.
502  */
503 #define CHECK(flag)	(spectre_mitigation & spectre_support & (flag))
504 
505 static
506 void
507 spectre_sysctl_changed(void)
508 {
509 	globaldata_t save_gd;
510 	struct trampframe *tr;
511 	int spec_ctrl;
512 	int spec_mask;
513 	int mode;
514 	int n;
515 
516 
517 	spec_mask = SPEC_CTRL_IBRS | SPEC_CTRL_STIBP |
518 		    SPEC_CTRL_DUMMY_ENABLE | SPEC_CTRL_DUMMY_IBPB;
519 
520 	/*
521 	 * Fixup state
522 	 */
523 	mode = 0;
524 	save_gd = mycpu;
525 	for (n = 0; n < ncpus; ++n) {
526 		lwkt_setcpu_self(globaldata_find(n));
527 		cpu_ccfence();
528 		tr = &pscpu->trampoline;
529 
530 		/*
531 		 * Make sure we are cleaned out.
532 		 *
533 		 * XXX cleanup, reusing globals inside the loop (they get
534 		 * set to the same thing each loop)
535 		 *
536 		 * [0] kernel entry (idle exit)
537 		 * [1] kernel exit  (idle entry)
538 		 */
539 		tr->tr_pcb_spec_ctrl[0] &= ~spec_mask;
540 		tr->tr_pcb_spec_ctrl[1] &= ~spec_mask;
541 
542 		/*
543 		 * Don't try to parse if not available
544 		 */
545 		if (spectre_mitigation < 0)
546 			continue;
547 
548 		/*
549 		 * IBRS mode.  Auto overrides toggling.
550 		 *
551 		 * Only set the ENABLE flag if we have to toggle something
552 		 * on entry and exit.
553 		 */
554 		spec_ctrl = 0;
555 		if (CHECK(IBRS_AUTO_SUPPORTED)) {
556 			spec_ctrl |= SPEC_CTRL_IBRS;
557 			mode |= IBRS_AUTO_SUPPORTED;
558 		} else if (CHECK(IBRS_SUPPORTED)) {
559 			spec_ctrl |= SPEC_CTRL_IBRS | SPEC_CTRL_DUMMY_ENABLE;
560 			mode |= IBRS_SUPPORTED;
561 		}
562 		if (CHECK(STIBP_AUTO_SUPPORTED)) {
563 			spec_ctrl |= SPEC_CTRL_STIBP;
564 			mode |= STIBP_AUTO_SUPPORTED;
565 		} else if (CHECK(STIBP_SUPPORTED)) {
566 			spec_ctrl |= SPEC_CTRL_STIBP | SPEC_CTRL_DUMMY_ENABLE;
567 			mode |= STIBP_SUPPORTED;
568 		}
569 
570 		/*
571 		 * IBPB requested and supported.
572 		 */
573 		if (CHECK(IBPB_SUPPORTED)) {
574 			spec_ctrl |= SPEC_CTRL_DUMMY_IBPB;
575 			mode |= IBPB_SUPPORTED;
576 		}
577 
578 		/*
579 		 * Update the MSR if the cpu supports the modes to ensure
580 		 * proper disablement if the user disabled the mode.
581 		 */
582 		if (spectre_support & (IBRS_SUPPORTED | IBRS_AUTO_SUPPORTED |
583 				    STIBP_SUPPORTED | STIBP_AUTO_SUPPORTED)) {
584 			wrmsr(MSR_SPEC_CTRL,
585 			      spec_ctrl & (SPEC_CTRL_IBRS|SPEC_CTRL_STIBP));
586 		}
587 
588 		/*
589 		 * Update spec_ctrl fields in the trampoline.
590 		 *
591 		 * [0] on-kernel-entry (on-idle-exit)
592 		 * [1] on-kernel-exit  (on-idle-entry)
593 		 *
594 		 * When auto mode is supported we leave the bit set, otherwise
595 		 * we clear the bits.
596 		 */
597 		tr->tr_pcb_spec_ctrl[0] |= spec_ctrl;
598 		if (CHECK(IBRS_AUTO_SUPPORTED) == 0)
599 			spec_ctrl &= ~SPEC_CTRL_IBRS;
600 		if (CHECK(STIBP_AUTO_SUPPORTED) == 0)
601 			spec_ctrl &= ~SPEC_CTRL_STIBP;
602 		tr->tr_pcb_spec_ctrl[1] |= spec_ctrl;
603 
604 		/*
605 		 * Make sure we set this on the first loop.  It will be
606 		 * the same value on remaining loops.
607 		 */
608 		spectre_mode = mode;
609 	}
610 	lwkt_setcpu_self(save_gd);
611 	cpu_ccfence();
612 
613 	/*
614 	 * Console message on mitigation mode change
615 	 */
616 	kprintf("Spectre: support=(");
617 	if (spectre_support == 0) {
618 		kprintf(" none");
619 	} else {
620 		if (spectre_support & IBRS_SUPPORTED)
621 			kprintf(" IBRS");
622 		if (spectre_support & STIBP_SUPPORTED)
623 			kprintf(" STIBP");
624 		if (spectre_support & IBPB_SUPPORTED)
625 			kprintf(" IBPB");
626 		if (spectre_support & IBRS_AUTO_SUPPORTED)
627 			kprintf(" IBRS_AUTO");
628 		if (spectre_support & STIBP_AUTO_SUPPORTED)
629 			kprintf(" STIBP_AUTO");
630 		if (spectre_support & IBRS_PREFERRED_REQUEST)
631 			kprintf(" IBRS_REQUESTED");
632 	}
633 	kprintf(" ) req=%04x operating=(", (uint16_t)spectre_mitigation);
634 	if (spectre_mode == 0) {
635 		kprintf(" none");
636 	} else {
637 		if (spectre_mode & IBRS_SUPPORTED)
638 			kprintf(" IBRS");
639 		if (spectre_mode & STIBP_SUPPORTED)
640 			kprintf(" STIBP");
641 		if (spectre_mode & IBPB_SUPPORTED)
642 			kprintf(" IBPB");
643 		if (spectre_mode & IBRS_AUTO_SUPPORTED)
644 			kprintf(" IBRS_AUTO");
645 		if (spectre_mode & STIBP_AUTO_SUPPORTED)
646 			kprintf(" STIBP_AUTO");
647 		if (spectre_mode & IBRS_PREFERRED_REQUEST)
648 			kprintf(" IBRS_REQUESTED");
649 	}
650 	kprintf(" )\n");
651 }
652 
653 #undef CHECK
654 
655 /*
656  * User changes sysctl value
657  */
658 static int
659 sysctl_spectre_mitigation(SYSCTL_HANDLER_ARGS)
660 {
661 	char buf[128];
662 	char *ptr;
663 	char *iter;
664 	size_t len;
665 	int spectre;
666 	int error = 0;
667 	int loop = 0;
668 
669 	/*
670 	 * Return current operating mode or support.
671 	 */
672 	if (oidp->oid_kind & CTLFLAG_WR)
673 		spectre = spectre_mode;
674 	else
675 		spectre = spectre_support;
676 
677 	spectre &= (IBRS_SUPPORTED | IBRS_AUTO_SUPPORTED |
678 		    STIBP_SUPPORTED | STIBP_AUTO_SUPPORTED |
679 		    IBPB_SUPPORTED);
680 	while (spectre) {
681 		if (error)
682 			break;
683 		if (loop++) {
684 			error = SYSCTL_OUT(req, " ", 1);
685 			if (error)
686 				break;
687 		}
688 		if (spectre & IBRS_SUPPORTED) {
689 			spectre &= ~IBRS_SUPPORTED;
690 			error = SYSCTL_OUT(req, "IBRS", 4);
691 		} else
692 		if (spectre & IBRS_AUTO_SUPPORTED) {
693 			spectre &= ~IBRS_AUTO_SUPPORTED;
694 			error = SYSCTL_OUT(req, "IBRS_AUTO", 9);
695 		} else
696 		if (spectre & STIBP_SUPPORTED) {
697 			spectre &= ~STIBP_SUPPORTED;
698 			error = SYSCTL_OUT(req, "STIBP", 5);
699 		} else
700 		if (spectre & STIBP_AUTO_SUPPORTED) {
701 			spectre &= ~STIBP_AUTO_SUPPORTED;
702 			error = SYSCTL_OUT(req, "STIBP_AUTO", 10);
703 		} else
704 		if (spectre & IBPB_SUPPORTED) {
705 			spectre &= ~IBPB_SUPPORTED;
706 			error = SYSCTL_OUT(req, "IBPB", 4);
707 		}
708 	}
709 	if (loop == 0) {
710 		error = SYSCTL_OUT(req, "NONE", 4);
711 	}
712 
713 	if (error || req->newptr == NULL)
714 		return error;
715 	if ((oidp->oid_kind & CTLFLAG_WR) == 0)
716 		return error;
717 
718 	/*
719 	 * Change current operating mode
720 	 */
721 	len = req->newlen - req->newidx;
722 	if (len >= sizeof(buf)) {
723 		error = EINVAL;
724 		len = 0;
725 	} else {
726 		error = SYSCTL_IN(req, buf, len);
727 	}
728 	buf[len] = 0;
729 	iter = &buf[0];
730 	spectre = 0;
731 
732 	while (error == 0 && iter) {
733 		ptr = strsep(&iter, " ,\t\r\n");
734 		if (*ptr == 0)
735 			continue;
736 		if (strcasecmp(ptr, "NONE") == 0)
737 			spectre |= 0;
738 		else if (strcasecmp(ptr, "IBRS") == 0)
739 			spectre |= IBRS_SUPPORTED;
740 		else if (strcasecmp(ptr, "IBRS_AUTO") == 0)
741 			spectre |= IBRS_AUTO_SUPPORTED;
742 		else if (strcasecmp(ptr, "STIBP") == 0)
743 			spectre |= STIBP_SUPPORTED;
744 		else if (strcasecmp(ptr, "STIBP_AUTO") == 0)
745 			spectre |= STIBP_AUTO_SUPPORTED;
746 		else if (strcasecmp(ptr, "IBPB") == 0)
747 			spectre |= IBPB_SUPPORTED;
748 		else
749 			error = ENOENT;
750 	}
751 	if (error == 0) {
752 		spectre_mitigation = spectre;
753 		spectre_sysctl_changed();
754 	}
755 	return error;
756 }
757 
758 SYSCTL_PROC(_machdep, OID_AUTO, spectre_mitigation,
759 	CTLTYPE_STRING | CTLFLAG_RW,
760 	0, 0, sysctl_spectre_mitigation, "A", "Spectre exploit mitigation");
761 SYSCTL_PROC(_machdep, OID_AUTO, spectre_support,
762 	CTLTYPE_STRING | CTLFLAG_RD,
763 	0, 0, sysctl_spectre_mitigation, "A", "Spectre supported features");
764 
765 /*
766  * NOTE: Called at SI_BOOT2_MACHDEP and also when the microcode is
767  *	 updated.  Microcode updates must be applied to all cpus
768  *	 for support to be recognized.
769  */
770 static void
771 spectre_vm_setup(void *arg)
772 {
773 	int inconsistent = 0;
774 	int supmask;
775 
776 	/*
777 	 * Fetch tunable in auto mode
778 	 */
779 	if (spectre_mitigation < 0) {
780 		TUNABLE_INT_FETCH("machdep.spectre_mitigation",
781 				  &spectre_mitigation);
782 	}
783 
784 	if ((supmask = spectre_check_support()) != 0) {
785 		/*
786 		 * Must be supported on all cpus before we
787 		 * can enable it.  Returns silently if it
788 		 * isn't.
789 		 *
790 		 * NOTE! arg != NULL indicates we were called
791 		 *	 from cpuctl after a successful microcode
792 		 *	 update.
793 		 */
794 		if (arg != NULL) {
795 			globaldata_t save_gd;
796 			int n;
797 
798 			save_gd = mycpu;
799 			for (n = 0; n < ncpus; ++n) {
800 				lwkt_setcpu_self(globaldata_find(n));
801 				cpu_ccfence();
802 				if (spectre_check_support() !=
803 				    supmask) {
804 					inconsistent = 1;
805 					break;
806 				}
807 			}
808 			lwkt_setcpu_self(save_gd);
809 			cpu_ccfence();
810 		}
811 	}
812 
813 	/*
814 	 * Be silent while microcode is being loaded on various CPUs,
815 	 * until all done.
816 	 */
817 	if (inconsistent) {
818 		spectre_mitigation = -1;
819 		spectre_support = 0;
820 		return;
821 	}
822 
823 	/*
824 	 * IBRS support
825 	 */
826 	spectre_support = supmask;
827 
828 	/*
829 	 * Enable spectre_mitigation, set defaults if -1, adjust
830 	 * tuned value according to support if not.
831 	 *
832 	 * NOTE!  We do not enable IBPB for user->kernel transitions
833 	 *	  by default, so this code is commented out for now.
834 	 */
835 	if (spectre_support) {
836 		if (spectre_mitigation < 0) {
837 			spectre_mitigation = 0;
838 
839 			/*
840 			 * IBRS toggling not currently recommended as a
841 			 * default.
842 			 */
843 			if (spectre_support & IBRS_AUTO_SUPPORTED)
844 				spectre_mitigation |= IBRS_AUTO_SUPPORTED;
845 			else if (spectre_support & IBRS_SUPPORTED)
846 				spectre_mitigation |= 0;
847 
848 			/*
849 			 * STIBP toggling not currently recommended as a
850 			 * default.
851 			 */
852 			if (spectre_support & STIBP_AUTO_SUPPORTED)
853 				spectre_mitigation |= STIBP_AUTO_SUPPORTED;
854 			else if (spectre_support & STIBP_SUPPORTED)
855 				spectre_mitigation |= 0;
856 
857 			/*
858 			 * IBPB adds enormous (~2uS) overhead to system
859 			 * calls etc, we do not enable it by default.
860 			 */
861 			if (spectre_support & IBPB_SUPPORTED)
862 				spectre_mitigation |= 0;
863 		}
864 	} else {
865 		spectre_mitigation = -1;
866 	}
867 
868 	/*
869 	 * Disallow sysctl changes when there is no support (otherwise
870 	 * the wrmsr will cause a protection fault).
871 	 */
872 	if (spectre_mitigation < 0)
873 		sysctl___machdep_spectre_mitigation.oid_kind &= ~CTLFLAG_WR;
874 	else
875 		sysctl___machdep_spectre_mitigation.oid_kind |= CTLFLAG_WR;
876 
877 	spectre_sysctl_changed();
878 }
879 
880 #define MDS_AVX512_4VNNIW_SUPPORTED	0x0001
881 #define MDS_AVX512_4FMAPS_SUPPORTED	0x0002
882 #define MDS_MD_CLEAR_SUPPORTED		0x0004
883 #define MDS_TSX_FORCE_ABORT_SUPPORTED	0x0008
884 #define MDS_NOT_REQUIRED		0x8000
885 
886 static
887 int
888 mds_check_support(void)
889 {
890 	uint64_t msr;
891 	uint32_t p[4];
892 	int rv = 0;
893 
894 	/*
895 	 * MDS mitigation hw bits
896 	 *
897 	 * MD_CLEAR	Use microcode-supported verf insn.  This is the
898 	 *		only mode we really support.
899 	 */
900 	if (cpu_vendor_id == CPU_VENDOR_INTEL) {
901 		p[0] = 0;
902 		p[1] = 0;
903 		p[2] = 0;
904 		p[3] = 0;
905 		cpuid_count(7, 0, p);
906 		if (p[3] & CPUID_SEF_ARCH_CAP) {
907 			msr = rdmsr(MSR_IA32_ARCH_CAPABILITIES);
908 			if (msr & IA32_ARCH_MDS_NO)
909 				rv = MDS_NOT_REQUIRED;
910 		}
911 		if (p[3] & CPUID_SEF_AVX512_4VNNIW)
912 			rv |= MDS_AVX512_4VNNIW_SUPPORTED;
913 		if (p[3] & CPUID_SEF_AVX512_4FMAPS)
914 			rv |= MDS_AVX512_4FMAPS_SUPPORTED;
915 		if (p[3] & CPUID_SEF_MD_CLEAR)
916 			rv |= MDS_MD_CLEAR_SUPPORTED;
917 		if (p[3] & CPUID_SEF_TSX_FORCE_ABORT)
918 			rv |= MDS_TSX_FORCE_ABORT_SUPPORTED;
919 	} else {
920 		rv = MDS_NOT_REQUIRED;
921 	}
922 
923 	return rv;
924 }
925 
926 /*
927  * Iterate CPUs and adjust MSR for global operations, since
928  * the KMMU* code won't do it if spectre_mitigation is 0 or 2.
929  */
930 #define CHECK(flag)	(mds_mitigation & mds_support & (flag))
931 
932 static
933 void
934 mds_sysctl_changed(void)
935 {
936 	globaldata_t save_gd;
937 	struct trampframe *tr;
938 	int spec_ctrl;
939 	int spec_mask;
940 	int mode;
941 	int n;
942 
943 	spec_mask = SPEC_CTRL_MDS_ENABLE;
944 
945 	/*
946 	 * Fixup state
947 	 */
948 	mode = 0;
949 	save_gd = mycpu;
950 	for (n = 0; n < ncpus; ++n) {
951 		lwkt_setcpu_self(globaldata_find(n));
952 		cpu_ccfence();
953 		tr = &pscpu->trampoline;
954 
955 		/*
956 		 * Make sure we are cleaned out.
957 		 *
958 		 * XXX cleanup, reusing globals inside the loop (they get
959 		 * set to the same thing each loop)
960 		 *
961 		 * [0] kernel entry (idle exit)
962 		 * [1] kernel exit  (idle entry)
963 		 */
964 		tr->tr_pcb_spec_ctrl[0] &= ~spec_mask;
965 		tr->tr_pcb_spec_ctrl[1] &= ~spec_mask;
966 
967 		/*
968 		 * Don't try to parse if not available
969 		 */
970 		if (mds_mitigation < 0)
971 			continue;
972 
973 		spec_ctrl = 0;
974 		if (CHECK(MDS_MD_CLEAR_SUPPORTED)) {
975 			spec_ctrl |= SPEC_CTRL_MDS_ENABLE;
976 			mode |= MDS_MD_CLEAR_SUPPORTED;
977 		}
978 
979 		/*
980 		 * Update spec_ctrl fields in the trampoline.
981 		 *
982 		 * [0] on-kernel-entry (on-idle-exit)
983 		 * [1] on-kernel-exit  (on-idle-entry)
984 		 *
985 		 * The MDS stuff is only needed on kernel-exit or idle-entry
986 		 */
987 		/* tr->tr_pcb_spec_ctrl[0] |= spec_ctrl; */
988 		tr->tr_pcb_spec_ctrl[1] |= spec_ctrl;
989 
990 		/*
991 		 * Make sure we set this on the first loop.  It will be
992 		 * the same value on remaining loops.
993 		 */
994 		mds_mode = mode;
995 	}
996 	lwkt_setcpu_self(save_gd);
997 	cpu_ccfence();
998 
999 	/*
1000 	 * Console message on mitigation mode change
1001 	 */
1002 	kprintf("MDS: support=(");
1003 	if (mds_support == 0) {
1004 		kprintf(" none");
1005 	} else {
1006 		if (mds_support & MDS_AVX512_4VNNIW_SUPPORTED)
1007 			kprintf(" AVX512_4VNNIW");
1008 		if (mds_support & MDS_AVX512_4FMAPS_SUPPORTED)
1009 			kprintf(" AVX512_4FMAPS");
1010 		if (mds_support & MDS_MD_CLEAR_SUPPORTED)
1011 			kprintf(" MD_CLEAR");
1012 		if (mds_support & MDS_TSX_FORCE_ABORT_SUPPORTED)
1013 			kprintf(" TSX_FORCE_ABORT");
1014 		if (mds_support & MDS_NOT_REQUIRED)
1015 			kprintf(" MDS_NOT_REQUIRED");
1016 	}
1017 	kprintf(" ) req=%04x operating=(", (uint16_t)mds_mitigation);
1018 	if (mds_mode == 0) {
1019 		kprintf(" none");
1020 	} else {
1021 		if (mds_mode & MDS_AVX512_4VNNIW_SUPPORTED)
1022 			kprintf(" AVX512_4VNNIW");
1023 		if (mds_mode & MDS_AVX512_4FMAPS_SUPPORTED)
1024 			kprintf(" AVX512_4FMAPS");
1025 		if (mds_mode & MDS_MD_CLEAR_SUPPORTED)
1026 			kprintf(" MD_CLEAR");
1027 		if (mds_mode & MDS_TSX_FORCE_ABORT_SUPPORTED)
1028 			kprintf(" TSX_FORCE_ABORT");
1029 		if (mds_mode & MDS_NOT_REQUIRED)
1030 			kprintf(" MDS_NOT_REQUIRED");
1031 	}
1032 	kprintf(" )\n");
1033 }
1034 
1035 #undef CHECK
1036 
1037 /*
1038  * User changes sysctl value
1039  */
1040 static int
1041 sysctl_mds_mitigation(SYSCTL_HANDLER_ARGS)
1042 {
1043 	char buf[128];
1044 	char *ptr;
1045 	char *iter;
1046 	size_t len;
1047 	int mds;
1048 	int error = 0;
1049 	int loop = 0;
1050 
1051 	/*
1052 	 * Return current operating mode or support.
1053 	 */
1054 	if (oidp->oid_kind & CTLFLAG_WR)
1055 		mds = mds_mode;
1056 	else
1057 		mds = mds_support;
1058 
1059 	mds &= MDS_AVX512_4VNNIW_SUPPORTED |
1060 	       MDS_AVX512_4FMAPS_SUPPORTED |
1061 	       MDS_MD_CLEAR_SUPPORTED |
1062 	       MDS_TSX_FORCE_ABORT_SUPPORTED |
1063 	       MDS_NOT_REQUIRED;
1064 
1065 	while (mds) {
1066 		if (error)
1067 			break;
1068 		if (loop++) {
1069 			error = SYSCTL_OUT(req, " ", 1);
1070 			if (error)
1071 				break;
1072 		}
1073 		if (mds & MDS_AVX512_4VNNIW_SUPPORTED) {
1074 			mds &= ~MDS_AVX512_4VNNIW_SUPPORTED;
1075 			error = SYSCTL_OUT(req, "AVX512_4VNNIW", 13);
1076 		} else
1077 		if (mds & MDS_AVX512_4FMAPS_SUPPORTED) {
1078 			mds &= ~MDS_AVX512_4FMAPS_SUPPORTED;
1079 			error = SYSCTL_OUT(req, "AVX512_4FMAPS", 13);
1080 		} else
1081 		if (mds & MDS_MD_CLEAR_SUPPORTED) {
1082 			mds &= ~MDS_MD_CLEAR_SUPPORTED;
1083 			error = SYSCTL_OUT(req, "MD_CLEAR", 8);
1084 		} else
1085 		if (mds & MDS_TSX_FORCE_ABORT_SUPPORTED) {
1086 			mds &= ~MDS_TSX_FORCE_ABORT_SUPPORTED;
1087 			error = SYSCTL_OUT(req, "TSX_FORCE_ABORT", 15);
1088 		} else
1089 		if (mds & MDS_NOT_REQUIRED) {
1090 			mds &= ~MDS_NOT_REQUIRED;
1091 			error = SYSCTL_OUT(req, "MDS_NOT_REQUIRED", 16);
1092 		}
1093 	}
1094 	if (loop == 0) {
1095 		error = SYSCTL_OUT(req, "NONE", 4);
1096 	}
1097 
1098 	if (error || req->newptr == NULL)
1099 		return error;
1100 	if ((oidp->oid_kind & CTLFLAG_WR) == 0)
1101 		return error;
1102 
1103 	/*
1104 	 * Change current operating mode
1105 	 */
1106 	len = req->newlen - req->newidx;
1107 	if (len >= sizeof(buf)) {
1108 		error = EINVAL;
1109 		len = 0;
1110 	} else {
1111 		error = SYSCTL_IN(req, buf, len);
1112 	}
1113 	buf[len] = 0;
1114 	iter = &buf[0];
1115 	mds = 0;
1116 
1117 	while (error == 0 && iter) {
1118 		ptr = strsep(&iter, " ,\t\r\n");
1119 		if (*ptr == 0)
1120 			continue;
1121 		if (strcasecmp(ptr, "NONE") == 0)
1122 			mds |= 0;
1123 		else if (strcasecmp(ptr, "AVX512_4VNNIW") == 0)
1124 			mds |= MDS_AVX512_4VNNIW_SUPPORTED;
1125 		else if (strcasecmp(ptr, "AVX512_4FMAPS") == 0)
1126 			mds |= MDS_AVX512_4FMAPS_SUPPORTED;
1127 		else if (strcasecmp(ptr, "MD_CLEAR") == 0)
1128 			mds |= MDS_MD_CLEAR_SUPPORTED;
1129 		else if (strcasecmp(ptr, "TSX_FORCE_ABORT") == 0)
1130 			mds |= MDS_TSX_FORCE_ABORT_SUPPORTED;
1131 		else if (strcasecmp(ptr, "MDS_NOT_REQUIRED") == 0)
1132 			mds |= MDS_NOT_REQUIRED;
1133 		else
1134 			error = ENOENT;
1135 	}
1136 	if (error == 0) {
1137 		mds_mitigation = mds;
1138 		mds_sysctl_changed();
1139 	}
1140 	return error;
1141 }
1142 
1143 SYSCTL_PROC(_machdep, OID_AUTO, mds_mitigation,
1144 	CTLTYPE_STRING | CTLFLAG_RW,
1145 	0, 0, sysctl_mds_mitigation, "A", "MDS exploit mitigation");
1146 SYSCTL_PROC(_machdep, OID_AUTO, mds_support,
1147 	CTLTYPE_STRING | CTLFLAG_RD,
1148 	0, 0, sysctl_mds_mitigation, "A", "MDS supported features");
1149 
1150 /*
1151  * NOTE: Called at SI_BOOT2_MACHDEP and also when the microcode is
1152  *	 updated.  Microcode updates must be applied to all cpus
1153  *	 for support to be recognized.
1154  */
1155 static void
1156 mds_vm_setup(void *arg)
1157 {
1158 	int inconsistent = 0;
1159 	int supmask;
1160 
1161 	/*
1162 	 * Fetch tunable in auto mode
1163 	 */
1164 	if (mds_mitigation < 0) {
1165 		TUNABLE_INT_FETCH("machdep.mds_mitigation", &mds_mitigation);
1166 	}
1167 
1168 	if ((supmask = mds_check_support()) != 0) {
1169 		/*
1170 		 * Must be supported on all cpus before we
1171 		 * can enable it.  Returns silently if it
1172 		 * isn't.
1173 		 *
1174 		 * NOTE! arg != NULL indicates we were called
1175 		 *	 from cpuctl after a successful microcode
1176 		 *	 update.
1177 		 */
1178 		if (arg != NULL) {
1179 			globaldata_t save_gd;
1180 			int n;
1181 
1182 			save_gd = mycpu;
1183 			for (n = 0; n < ncpus; ++n) {
1184 				lwkt_setcpu_self(globaldata_find(n));
1185 				cpu_ccfence();
1186 				if (mds_check_support() != supmask) {
1187 					inconsistent = 1;
1188 					break;
1189 				}
1190 			}
1191 			lwkt_setcpu_self(save_gd);
1192 			cpu_ccfence();
1193 		}
1194 	}
1195 
1196 	/*
1197 	 * Be silent while microcode is being loaded on various CPUs,
1198 	 * until all done.
1199 	 */
1200 	if (inconsistent) {
1201 		mds_mitigation = -1;
1202 		mds_support = 0;
1203 		return;
1204 	}
1205 
1206 	/*
1207 	 * IBRS support
1208 	 */
1209 	mds_support = supmask;
1210 
1211 	/*
1212 	 * Enable mds_mitigation, set defaults if -1, adjust
1213 	 * tuned value according to support if not.
1214 	 *
1215 	 * NOTE!  MDS is not enabled by default.
1216 	 */
1217 	if (mds_support) {
1218 		if (mds_mitigation < 0) {
1219 			mds_mitigation = 0;
1220 
1221 			if ((mds_support & MDS_NOT_REQUIRED) == 0 &&
1222 			    (mds_support & MDS_MD_CLEAR_SUPPORTED)) {
1223 				/* mds_mitigation |= MDS_MD_CLEAR_SUPPORTED; */
1224 			}
1225 		}
1226 	} else {
1227 		mds_mitigation = -1;
1228 	}
1229 
1230 	/*
1231 	 * Disallow sysctl changes when there is no support (otherwise
1232 	 * the wrmsr will cause a protection fault).
1233 	 */
1234 	if (mds_mitigation < 0)
1235 		sysctl___machdep_mds_mitigation.oid_kind &= ~CTLFLAG_WR;
1236 	else
1237 		sysctl___machdep_mds_mitigation.oid_kind |= CTLFLAG_WR;
1238 
1239 	mds_sysctl_changed();
1240 }
1241 
1242 /*
1243  * NOTE: Called at SI_BOOT2_MACHDEP and also when the microcode is
1244  *	 updated.  Microcode updates must be applied to all cpus
1245  *	 for support to be recognized.
1246  */
1247 void
1248 mitigation_vm_setup(void *arg)
1249 {
1250 	spectre_vm_setup(arg);
1251 	mds_vm_setup(arg);
1252 }
1253 
1254 SYSINIT(mitigation_vm_setup, SI_BOOT2_MACHDEP, SI_ORDER_ANY,
1255 	mitigation_vm_setup, NULL);
1256 
1257 /*
1258  * platform-specific vmspace initialization (nothing for x86_64)
1259  */
1260 void
1261 cpu_vmspace_alloc(struct vmspace *vm __unused)
1262 {
1263 }
1264 
1265 void
1266 cpu_vmspace_free(struct vmspace *vm __unused)
1267 {
1268 }
1269 
1270 int
1271 kvm_access_check(vm_offset_t saddr, vm_offset_t eaddr, int prot)
1272 {
1273 	vm_offset_t addr;
1274 
1275 	if (saddr < KvaStart)
1276 		return EFAULT;
1277 	if (eaddr >= KvaEnd)
1278 		return EFAULT;
1279 	for (addr = saddr; addr < eaddr; addr += PAGE_SIZE)  {
1280 		if (pmap_kextract(addr) == 0)
1281 			return EFAULT;
1282 	}
1283 	if (!kernacc((caddr_t)saddr, eaddr - saddr, prot))
1284 		return EFAULT;
1285 	return 0;
1286 }
1287 
1288 #if 0
1289 
1290 void _test_frame_enter(struct trapframe *frame);
1291 void _test_frame_exit(struct trapframe *frame);
1292 
1293 void
1294 _test_frame_enter(struct trapframe *frame)
1295 {
1296 	thread_t td = curthread;
1297 
1298 	if (ISPL(frame->tf_cs) == SEL_UPL) {
1299 		KKASSERT(td->td_lwp);
1300                 KASSERT(td->td_lwp->lwp_md.md_regs == frame,
1301                         ("_test_frame_exit: Frame mismatch %p %p",
1302 			td->td_lwp->lwp_md.md_regs, frame));
1303 	    td->td_lwp->lwp_saveusp = (void *)frame->tf_rsp;
1304 	    td->td_lwp->lwp_saveupc = (void *)frame->tf_rip;
1305 	}
1306 	if ((char *)frame < td->td_kstack ||
1307 	    (char *)frame > td->td_kstack + td->td_kstack_size) {
1308 		panic("_test_frame_exit: frame not on kstack %p kstack=%p",
1309 			frame, td->td_kstack);
1310 	}
1311 }
1312 
1313 void
1314 _test_frame_exit(struct trapframe *frame)
1315 {
1316 	thread_t td = curthread;
1317 
1318 	if (ISPL(frame->tf_cs) == SEL_UPL) {
1319 		KKASSERT(td->td_lwp);
1320                 KASSERT(td->td_lwp->lwp_md.md_regs == frame,
1321                         ("_test_frame_exit: Frame mismatch %p %p",
1322 			td->td_lwp->lwp_md.md_regs, frame));
1323 		if (td->td_lwp->lwp_saveusp != (void *)frame->tf_rsp) {
1324 			kprintf("_test_frame_exit: %s:%d usp mismatch %p/%p\n",
1325 				td->td_comm, td->td_proc->p_pid,
1326 				td->td_lwp->lwp_saveusp,
1327 				(void *)frame->tf_rsp);
1328 		}
1329 		if (td->td_lwp->lwp_saveupc != (void *)frame->tf_rip) {
1330 			kprintf("_test_frame_exit: %s:%d upc mismatch %p/%p\n",
1331 				td->td_comm, td->td_proc->p_pid,
1332 				td->td_lwp->lwp_saveupc,
1333 				(void *)frame->tf_rip);
1334 		}
1335 
1336 		/*
1337 		 * adulterate the fields to catch entries that
1338 		 * don't run through test_frame_enter
1339 		 */
1340 		td->td_lwp->lwp_saveusp =
1341 			(void *)~(intptr_t)td->td_lwp->lwp_saveusp;
1342 		td->td_lwp->lwp_saveupc =
1343 			(void *)~(intptr_t)td->td_lwp->lwp_saveupc;
1344 	}
1345 	if ((char *)frame < td->td_kstack ||
1346 	    (char *)frame > td->td_kstack + td->td_kstack_size) {
1347 		panic("_test_frame_exit: frame not on kstack %p kstack=%p",
1348 			frame, td->td_kstack);
1349 	}
1350 }
1351 
1352 #endif
1353