1 /*-
2  * Copyright (c) 1982, 1986 The Regents of the University of California.
3  * Copyright (c) 1989, 1990 William Jolitz
4  * Copyright (c) 1994 John Dyson
5  * Copyright (c) 2008-2018 The DragonFly Project.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * the Systems Programming Group of the University of Utah Computer
10  * Science Department, and William Jolitz.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *	This product includes software developed by the University of
23  *	California, Berkeley and its contributors.
24  * 4. Neither the name of the University nor the names of its contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38  * SUCH DAMAGE.
39  *
40  *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
41  *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
42  * $FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.132.2.9 2003/01/25 19:02:23 dillon Exp $
43  */
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/malloc.h>
48 #include <sys/proc.h>
49 #include <sys/buf.h>
50 #include <sys/interrupt.h>
51 #include <sys/vnode.h>
52 #include <sys/vmmeter.h>
53 #include <sys/kernel.h>
54 #include <sys/sysctl.h>
55 #include <sys/unistd.h>
56 #include <sys/lwp.h>
57 
58 #include <machine/clock.h>
59 #include <machine/cpu.h>
60 #include <machine/md_var.h>
61 #include <machine/smp.h>
62 #include <machine/pcb.h>
63 #include <machine/pcb_ext.h>
64 #include <machine/segments.h>
65 #include <machine/globaldata.h>	/* npxthread */
66 #include <machine/specialreg.h>
67 #include <machine/vmm.h>
68 
69 #include <vm/vm.h>
70 #include <vm/vm_param.h>
71 #include <sys/lock.h>
72 #include <vm/vm_kern.h>
73 #include <vm/vm_page.h>
74 #include <vm/vm_map.h>
75 #include <vm/vm_extern.h>
76 
77 #include <sys/thread2.h>
78 #include <sys/mplock2.h>
79 
80 #include <bus/isa/isa.h>
81 
82 static void	cpu_reset_real (void);
83 
84 static int spectre_mitigation = -1;
85 static int spectre_support = 0;
86 static int spectre_mode = 0;
87 SYSCTL_INT(_machdep, OID_AUTO, spectre_mode, CTLFLAG_RD,
88 	&spectre_mode, 0, "current Spectre enablements");
89 
90 static int mds_mitigation = -1;
91 static int mds_support = 0;
92 static int mds_mode = 0;
93 SYSCTL_INT(_machdep, OID_AUTO, mds_mode, CTLFLAG_RD,
94 	&mds_mode, 0, "current MDS enablements");
95 
96 /*
97  * Finish a fork operation, with lwp lp2 nearly set up.
98  * Copy and update the pcb, set up the stack so that the child
99  * ready to run and return to user mode.
100  */
101 void
102 cpu_fork(struct lwp *lp1, struct lwp *lp2, int flags)
103 {
104 	struct pcb *pcb2;
105 	struct pmap *pmap2;
106 
107 	if ((flags & RFPROC) == 0) {
108 		if ((flags & RFMEM) == 0) {
109 			/*
110 			 * Unshare user LDT.  > 1 test is MPSAFE.  While
111 			 * it can potentially race a 2->1 transition, the
112 			 * worst that happens is that we do an unnecessary
113 			 * ldt replacement.
114 			 */
115 			struct pcb *pcb1 = lp1->lwp_thread->td_pcb;
116 			struct pcb_ldt *pcb_ldt = pcb1->pcb_ldt;
117 
118 			if (pcb_ldt && pcb_ldt->ldt_refcnt > 1) {
119 				pcb_ldt = user_ldt_alloc(pcb1,pcb_ldt->ldt_len);
120 				user_ldt_free(pcb1);
121 				pcb1->pcb_ldt = pcb_ldt;
122 				set_user_ldt(pcb1);
123 			}
124 		}
125 		return;
126 	}
127 
128 	/* Ensure that lp1's pcb is up to date. */
129 	if (mdcpu->gd_npxthread == lp1->lwp_thread)
130 		npxsave(lp1->lwp_thread->td_savefpu);
131 
132 	/*
133 	 * Copy lp1's PCB.  This really only applies to the
134 	 * debug registers and FP state, but its faster to just copy the
135 	 * whole thing.  Because we only save the PCB at switchout time,
136 	 * the register state may not be current.
137 	 */
138 	pcb2 = lp2->lwp_thread->td_pcb;
139 	*pcb2 = *lp1->lwp_thread->td_pcb;
140 
141 	/*
142 	 * Create a new fresh stack for the new process.
143 	 * Copy the trap frame for the return to user mode as if from a
144 	 * syscall.  This copies the user mode register values.
145 	 *
146 	 * pcb_rsp must allocate an additional call-return pointer below
147 	 * the trap frame which will be restored by cpu_heavy_restore from
148 	 * PCB_RIP, and the thread's td_sp pointer must allocate an
149 	 * additonal two quadwords below the pcb_rsp call-return pointer to
150 	 * hold the LWKT restore function pointer and rflags.
151 	 *
152 	 * The LWKT restore function pointer must be set to cpu_heavy_restore,
153 	 * which is our standard heavy-weight process switch-in function.
154 	 * YYY eventually we should shortcut fork_return and fork_trampoline
155 	 * to use the LWKT restore function directly so we can get rid of
156 	 * all the extra crap we are setting up.
157 	 */
158 	lp2->lwp_md.md_regs = (struct trapframe *)pcb2 - 1;
159 	bcopy(lp1->lwp_md.md_regs, lp2->lwp_md.md_regs, sizeof(*lp2->lwp_md.md_regs));
160 
161 	/*
162 	 * Set registers for trampoline to user mode.  Leave space for the
163 	 * return address on stack.  These are the kernel mode register values.
164 	 *
165 	 * Set the new pmap CR3.  If the new process uses isolated VM spaces,
166 	 * also set the isolated CR3.
167 	 */
168 	pmap2 = vmspace_pmap(lp2->lwp_proc->p_vmspace);
169 	pcb2->pcb_cr3 = vtophys(pmap2->pm_pml4);
170 	if ((pcb2->pcb_flags & PCB_ISOMMU) && pmap2->pm_pmlpv_iso) {
171 		pcb2->pcb_cr3_iso = vtophys(pmap2->pm_pml4_iso);
172 	} else {
173 		pcb2->pcb_flags &= ~PCB_ISOMMU;
174 		pcb2->pcb_cr3_iso = 0;
175 	}
176 
177 #if 0
178 	/*
179 	 * Per-process spectre mitigation (future)
180 	 */
181 	pcb2->pcb_flags &= ~(PCB_IBRS1 | PCB_IBRS2);
182 	switch (spectre_mitigation) {
183 	case 1:
184 		pcb2->pcb_flags |= PCB_IBRS1;
185 		break;
186 	case 2:
187 		pcb2->pcb_flags |= PCB_IBRS2;
188 		break;
189 	default:
190 		break;
191 	}
192 #endif
193 
194 	pcb2->pcb_rbx = (unsigned long)fork_return;	/* fork_trampoline argument */
195 	pcb2->pcb_rbp = 0;
196 	pcb2->pcb_rsp = (unsigned long)lp2->lwp_md.md_regs - sizeof(void *);
197 	pcb2->pcb_r12 = (unsigned long)lp2;		/* fork_trampoline argument */
198 	pcb2->pcb_r13 = 0;
199 	pcb2->pcb_r14 = 0;
200 	pcb2->pcb_r15 = 0;
201 	pcb2->pcb_rip = (unsigned long)fork_trampoline;
202 	lp2->lwp_thread->td_sp = (char *)(pcb2->pcb_rsp - sizeof(void *));
203 	*(u_int64_t *)lp2->lwp_thread->td_sp = PSL_USER;
204 	lp2->lwp_thread->td_sp -= sizeof(void *);
205 	*(void **)lp2->lwp_thread->td_sp = (void *)cpu_heavy_restore;
206 
207 	/*
208 	 * pcb2->pcb_ldt:	duplicated below, if necessary.
209 	 * pcb2->pcb_savefpu:	cloned above.
210 	 * pcb2->pcb_flags:	cloned above
211 	 * pcb2->pcb_onfault:	cloned above (always NULL here).
212 	 * pcb2->pcb_onfault_sp:cloned above (dont care)
213 	 */
214 
215 	/*
216 	 * XXX don't copy the i/o pages.  this should probably be fixed.
217 	 */
218 	pcb2->pcb_ext = NULL;
219 
220         /* Copy the LDT, if necessary. */
221         if (pcb2->pcb_ldt != NULL) {
222 		if (flags & RFMEM) {
223 			atomic_add_int(&pcb2->pcb_ldt->ldt_refcnt, 1);
224 		} else {
225 			pcb2->pcb_ldt = user_ldt_alloc(pcb2,
226 						       pcb2->pcb_ldt->ldt_len);
227 		}
228         }
229 	bcopy(&lp1->lwp_thread->td_tls, &lp2->lwp_thread->td_tls,
230 	      sizeof(lp2->lwp_thread->td_tls));
231 	/*
232 	 * Now, cpu_switch() can schedule the new lwp.
233 	 * pcb_rsp is loaded pointing to the cpu_switch() stack frame
234 	 * containing the return address when exiting cpu_switch.
235 	 * This will normally be to fork_trampoline(), which will have
236 	 * %rbx loaded with the new lwp's pointer.  fork_trampoline()
237 	 * will set up a stack to call fork_return(lp, frame); to complete
238 	 * the return to user-mode.
239 	 */
240 }
241 
242 /*
243  * Prepare new lwp to return to the address specified in params.
244  */
245 int
246 cpu_prepare_lwp(struct lwp *lp, struct lwp_params *params)
247 {
248 	struct trapframe *regs = lp->lwp_md.md_regs;
249 	void *bad_return = NULL;
250 	int error;
251 
252 	regs->tf_rip = (long)params->lwp_func;
253 	regs->tf_rsp = (long)params->lwp_stack;
254 	/* Set up argument for function call */
255 	regs->tf_rdi = (long)params->lwp_arg;
256 
257 	/*
258 	 * Set up fake return address.  As the lwp function may never return,
259 	 * we simply copy out a NULL pointer and force the lwp to receive
260 	 * a SIGSEGV if it returns anyways.
261 	 */
262 	regs->tf_rsp -= sizeof(void *);
263 	error = copyout(&bad_return, (void *)regs->tf_rsp, sizeof(bad_return));
264 	if (error)
265 		return (error);
266 
267 	if (lp->lwp_proc->p_vmm) {
268 		lp->lwp_thread->td_pcb->pcb_cr3 = KPML4phys;
269 		cpu_set_fork_handler(lp,
270 		    (void (*)(void *, struct trapframe *))vmm_lwp_return, lp);
271 	} else {
272 		cpu_set_fork_handler(lp,
273 		    (void (*)(void *, struct trapframe *))generic_lwp_return, lp);
274 	}
275 	return (0);
276 }
277 
278 /*
279  * Intercept the return address from a freshly forked process that has NOT
280  * been scheduled yet.
281  *
282  * This is needed to make kernel threads stay in kernel mode.
283  */
284 void
285 cpu_set_fork_handler(struct lwp *lp, void (*func)(void *, struct trapframe *),
286 		     void *arg)
287 {
288 	/*
289 	 * Note that the trap frame follows the args, so the function
290 	 * is really called like this:  func(arg, frame);
291 	 */
292 	lp->lwp_thread->td_pcb->pcb_rbx = (long)func;	/* function */
293 	lp->lwp_thread->td_pcb->pcb_r12 = (long)arg;	/* first arg */
294 }
295 
296 void
297 cpu_set_thread_handler(thread_t td, void (*rfunc)(void), void *func, void *arg)
298 {
299 	td->td_pcb->pcb_rbx = (long)func;
300 	td->td_pcb->pcb_r12 = (long)arg;
301 	td->td_switch = cpu_lwkt_switch;
302 	td->td_sp -= sizeof(void *);
303 	*(void **)td->td_sp = rfunc;	/* exit function on return */
304 	td->td_sp -= sizeof(void *);
305 	*(void **)td->td_sp = cpu_kthread_restore;
306 }
307 
308 void
309 cpu_lwp_exit(void)
310 {
311 	struct thread *td = curthread;
312 	struct pcb *pcb;
313 
314 	pcb = td->td_pcb;
315 
316 	/* Some x86 functionality was dropped */
317 	KKASSERT(pcb->pcb_ext == NULL);
318 
319 	/*
320 	 * disable all hardware breakpoints
321 	 */
322         if (pcb->pcb_flags & PCB_DBREGS) {
323                 reset_dbregs();
324                 pcb->pcb_flags &= ~PCB_DBREGS;
325         }
326 	td->td_gd->gd_cnt.v_swtch++;
327 
328 	crit_enter_quick(td);
329 	if (td->td_flags & TDF_TSLEEPQ)
330 		tsleep_remove(td);
331 	lwkt_deschedule_self(td);
332 	lwkt_remove_tdallq(td);
333 	cpu_thread_exit();
334 }
335 
336 /*
337  * Terminate the current thread.  The caller must have already acquired
338  * the thread's rwlock and placed it on a reap list or otherwise notified
339  * a reaper of its existance.  We set a special assembly switch function which
340  * releases td_rwlock after it has cleaned up the MMU state and switched
341  * out the stack.
342  *
343  * Must be caller from a critical section and with the thread descheduled.
344  */
345 void
346 cpu_thread_exit(void)
347 {
348 	npxexit();
349 	curthread->td_switch = cpu_exit_switch;
350 	curthread->td_flags |= TDF_EXITING;
351 	lwkt_switch();
352 	panic("cpu_thread_exit: lwkt_switch() unexpectedly returned");
353 }
354 
355 void
356 cpu_reset(void)
357 {
358 	cpu_reset_real();
359 }
360 
361 static void
362 cpu_reset_real(void)
363 {
364 	/*
365 	 * Attempt to do a CPU reset via the keyboard controller,
366 	 * do not turn off the GateA20, as any machine that fails
367 	 * to do the reset here would then end up in no man's land.
368 	 */
369 
370 #if !defined(BROKEN_KEYBOARD_RESET)
371 	outb(IO_KBD + 4, 0xFE);
372 	DELAY(500000);	/* wait 0.5 sec to see if that did it */
373 	kprintf("Keyboard reset did not work, attempting CPU shutdown\n");
374 	DELAY(1000000);	/* wait 1 sec for kprintf to complete */
375 #endif
376 #if 0 /* JG */
377 	/* force a shutdown by unmapping entire address space ! */
378 	bzero((caddr_t) PTD, PAGE_SIZE);
379 #endif
380 
381 	/* "good night, sweet prince .... <THUNK!>" */
382 	cpu_invltlb();
383 	/* NOTREACHED */
384 	while(1);
385 }
386 
387 /*
388  * Convert kernel VA to physical address
389  */
390 vm_paddr_t
391 kvtop(void *addr)
392 {
393 	vm_paddr_t pa;
394 
395 	pa = pmap_kextract((vm_offset_t)addr);
396 	if (pa == 0)
397 		panic("kvtop: zero page frame");
398 	return (pa);
399 }
400 
401 static void
402 swi_vm(void *arg, void *frame)
403 {
404 	if (busdma_swi_pending != 0)
405 		busdma_swi();
406 }
407 
408 static void
409 swi_vm_setup(void *arg)
410 {
411 	register_swi_mp(SWI_VM, swi_vm, NULL, "swi_vm", NULL, 0);
412 }
413 
414 SYSINIT(swi_vm_setup, SI_BOOT2_MACHDEP, SI_ORDER_ANY, swi_vm_setup, NULL);
415 
416 /*
417  * NOTE: This routine is also called after a successful microcode
418  *	 reload on cpu 0.
419  */
420 void mitigation_vm_setup(void *arg);
421 
422 /*
423  * Check for IBPB and IBRS support
424  *
425  * This bits also specify desired modes in the spectre_mitigation sysctl.
426  */
427 #define IBRS_SUPPORTED		0x0001
428 #define STIBP_SUPPORTED		0x0002
429 #define IBPB_SUPPORTED		0x0004
430 #define IBRS_AUTO_SUPPORTED	0x0008
431 #define STIBP_AUTO_SUPPORTED	0x0010
432 #define IBRS_PREFERRED_REQUEST	0x0020
433 
434 static
435 int
436 spectre_check_support(void)
437 {
438 	uint32_t p[4];
439 	int rv = 0;
440 
441 	/*
442 	 * Spectre mitigation hw bits
443 	 *
444 	 * IBRS		Indirect Branch Restricted Speculation   (isolation)
445 	 * STIBP	Single Thread Indirect Branch Prediction (isolation)
446 	 * IBPB		Branch Prediction Barrier		 (barrier)
447 	 *
448 	 * IBRS and STIBP must be toggled (enabled on entry to kernel,
449 	 * disabled on exit, as well as disabled during any MWAIT/HLT).
450 	 * When *_AUTO bits are available, IBRS and STIBP may be left
451 	 * turned on and do not have to be toggled on kernel entry/exit.
452 	 *
453 	 * All this shit has enormous overhead.  IBPB in particular, and
454 	 * non-auto modes are disabled by default.
455 	 */
456 	if (cpu_vendor_id == CPU_VENDOR_INTEL) {
457 		p[0] = 0;
458 		p[1] = 0;
459 		p[2] = 0;
460 		p[3] = 0;
461 		cpuid_count(7, 0, p);
462 		if (p[3] & CPUID_7_0_I3_SPEC_CTRL)
463 			rv |= IBRS_SUPPORTED | IBPB_SUPPORTED;
464 		if (p[3] & CPUID_7_0_I3_STIBP)
465 			rv |= STIBP_SUPPORTED;
466 
467 		/*
468 		 * 0x80000008 p[1] bit 12 indicates IBPB support
469 		 *
470 		 * This bit might be set even though SPEC_CTRL is not set.
471 		 */
472 		p[0] = 0;
473 		p[1] = 0;
474 		p[2] = 0;
475 		p[3] = 0;
476 		do_cpuid(0x80000008U, p);
477 		if (p[1] & CPUID_INTEL_80000008_I1_IBPB_SUPPORT)
478 			rv |= IBPB_SUPPORTED;
479 	} else if (cpu_vendor_id == CPU_VENDOR_AMD) {
480 		/*
481 		 * 0x80000008 p[1] bit 12 indicates IBPB support
482 		 *	      p[1] bit 14 indicates IBRS support
483 		 *	      p[1] bit 15 indicates STIBP support
484 		 *
485 		 *	      p[1] bit 16 indicates IBRS auto support
486 		 *	      p[1] bit 17 indicates STIBP auto support
487 		 *	      p[1] bit 18 indicates processor prefers using
488 		 *		IBRS instead of retpoline.
489 		 */
490 		p[0] = 0;
491 		p[1] = 0;
492 		p[2] = 0;
493 		p[3] = 0;
494 		do_cpuid(0x80000008U, p);
495 		if (p[1] & CPUID_AMD_80000008_I1_IBPB_SUPPORT)
496 			rv |= IBPB_SUPPORTED;
497 		if (p[1] & CPUID_AMD_80000008_I1_IBRS_SUPPORT)
498 			rv |= IBRS_SUPPORTED;
499 		if (p[1] & CPUID_AMD_80000008_I1_STIBP_SUPPORT)
500 			rv |= STIBP_SUPPORTED;
501 
502 		if (p[1] & CPUID_AMD_80000008_I1_IBRS_AUTO)
503 			rv |= IBRS_AUTO_SUPPORTED;
504 		if (p[1] & CPUID_AMD_80000008_I1_STIBP_AUTO)
505 			rv |= STIBP_AUTO_SUPPORTED;
506 		if (p[1] & CPUID_AMD_80000008_I1_IBRS_REQUESTED)
507 			rv |= IBRS_PREFERRED_REQUEST;
508 	}
509 
510 	return rv;
511 }
512 
513 /*
514  * Iterate CPUs and adjust MSR for global operations, since
515  * the KMMU* code won't do it if spectre_mitigation is 0 or 2.
516  */
517 #define CHECK(flag)	(spectre_mitigation & spectre_support & (flag))
518 
519 static
520 void
521 spectre_sysctl_changed(void)
522 {
523 	globaldata_t save_gd;
524 	struct trampframe *tr;
525 	int spec_ctrl;
526 	int spec_mask;
527 	int mode;
528 	int n;
529 
530 
531 	spec_mask = SPEC_CTRL_IBRS | SPEC_CTRL_STIBP |
532 		    SPEC_CTRL_DUMMY_ENABLE | SPEC_CTRL_DUMMY_IBPB;
533 
534 	/*
535 	 * Fixup state
536 	 */
537 	mode = 0;
538 	save_gd = mycpu;
539 	for (n = 0; n < ncpus; ++n) {
540 		lwkt_setcpu_self(globaldata_find(n));
541 		cpu_ccfence();
542 		tr = &pscpu->trampoline;
543 
544 		/*
545 		 * Make sure we are cleaned out.
546 		 *
547 		 * XXX cleanup, reusing globals inside the loop (they get
548 		 * set to the same thing each loop)
549 		 *
550 		 * [0] kernel entry (idle exit)
551 		 * [1] kernel exit  (idle entry)
552 		 */
553 		tr->tr_pcb_spec_ctrl[0] &= ~spec_mask;
554 		tr->tr_pcb_spec_ctrl[1] &= ~spec_mask;
555 
556 		/*
557 		 * Don't try to parse if not available
558 		 */
559 		if (spectre_mitigation < 0)
560 			continue;
561 
562 		/*
563 		 * IBRS mode.  Auto overrides toggling.
564 		 *
565 		 * Only set the ENABLE flag if we have to toggle something
566 		 * on entry and exit.
567 		 */
568 		spec_ctrl = 0;
569 		if (CHECK(IBRS_AUTO_SUPPORTED)) {
570 			spec_ctrl |= SPEC_CTRL_IBRS;
571 			mode |= IBRS_AUTO_SUPPORTED;
572 		} else if (CHECK(IBRS_SUPPORTED)) {
573 			spec_ctrl |= SPEC_CTRL_IBRS | SPEC_CTRL_DUMMY_ENABLE;
574 			mode |= IBRS_SUPPORTED;
575 		}
576 		if (CHECK(STIBP_AUTO_SUPPORTED)) {
577 			spec_ctrl |= SPEC_CTRL_STIBP;
578 			mode |= STIBP_AUTO_SUPPORTED;
579 		} else if (CHECK(STIBP_SUPPORTED)) {
580 			spec_ctrl |= SPEC_CTRL_STIBP | SPEC_CTRL_DUMMY_ENABLE;
581 			mode |= STIBP_SUPPORTED;
582 		}
583 
584 		/*
585 		 * IBPB requested and supported.
586 		 */
587 		if (CHECK(IBPB_SUPPORTED)) {
588 			spec_ctrl |= SPEC_CTRL_DUMMY_IBPB;
589 			mode |= IBPB_SUPPORTED;
590 		}
591 
592 		/*
593 		 * Update the MSR if the cpu supports the modes to ensure
594 		 * proper disablement if the user disabled the mode.
595 		 */
596 		if (spectre_support & (IBRS_SUPPORTED | IBRS_AUTO_SUPPORTED |
597 				    STIBP_SUPPORTED | STIBP_AUTO_SUPPORTED)) {
598 			wrmsr(MSR_SPEC_CTRL,
599 			      spec_ctrl & (SPEC_CTRL_IBRS|SPEC_CTRL_STIBP));
600 		}
601 
602 		/*
603 		 * Update spec_ctrl fields in the trampoline.
604 		 *
605 		 * [0] on-kernel-entry (on-idle-exit)
606 		 * [1] on-kernel-exit  (on-idle-entry)
607 		 *
608 		 * When auto mode is supported we leave the bit set, otherwise
609 		 * we clear the bits.
610 		 */
611 		tr->tr_pcb_spec_ctrl[0] |= spec_ctrl;
612 		if (CHECK(IBRS_AUTO_SUPPORTED) == 0)
613 			spec_ctrl &= ~SPEC_CTRL_IBRS;
614 		if (CHECK(STIBP_AUTO_SUPPORTED) == 0)
615 			spec_ctrl &= ~SPEC_CTRL_STIBP;
616 		tr->tr_pcb_spec_ctrl[1] |= spec_ctrl;
617 
618 		/*
619 		 * Make sure we set this on the first loop.  It will be
620 		 * the same value on remaining loops.
621 		 */
622 		spectre_mode = mode;
623 	}
624 	lwkt_setcpu_self(save_gd);
625 	cpu_ccfence();
626 
627 	/*
628 	 * Console message on mitigation mode change
629 	 */
630 	kprintf("Spectre: support=(");
631 	if (spectre_support == 0) {
632 		kprintf(" none");
633 	} else {
634 		if (spectre_support & IBRS_SUPPORTED)
635 			kprintf(" IBRS");
636 		if (spectre_support & STIBP_SUPPORTED)
637 			kprintf(" STIBP");
638 		if (spectre_support & IBPB_SUPPORTED)
639 			kprintf(" IBPB");
640 		if (spectre_support & IBRS_AUTO_SUPPORTED)
641 			kprintf(" IBRS_AUTO");
642 		if (spectre_support & STIBP_AUTO_SUPPORTED)
643 			kprintf(" STIBP_AUTO");
644 		if (spectre_support & IBRS_PREFERRED_REQUEST)
645 			kprintf(" IBRS_REQUESTED");
646 	}
647 	kprintf(" ) req=%04x operating=(", (uint16_t)spectre_mitigation);
648 	if (spectre_mode == 0) {
649 		kprintf(" none");
650 	} else {
651 		if (spectre_mode & IBRS_SUPPORTED)
652 			kprintf(" IBRS");
653 		if (spectre_mode & STIBP_SUPPORTED)
654 			kprintf(" STIBP");
655 		if (spectre_mode & IBPB_SUPPORTED)
656 			kprintf(" IBPB");
657 		if (spectre_mode & IBRS_AUTO_SUPPORTED)
658 			kprintf(" IBRS_AUTO");
659 		if (spectre_mode & STIBP_AUTO_SUPPORTED)
660 			kprintf(" STIBP_AUTO");
661 		if (spectre_mode & IBRS_PREFERRED_REQUEST)
662 			kprintf(" IBRS_REQUESTED");
663 	}
664 	kprintf(" )\n");
665 }
666 
667 #undef CHECK
668 
669 /*
670  * User changes sysctl value
671  */
672 static int
673 sysctl_spectre_mitigation(SYSCTL_HANDLER_ARGS)
674 {
675 	char buf[128];
676 	char *ptr;
677 	char *iter;
678 	size_t len;
679 	int spectre;
680 	int error = 0;
681 	int loop = 0;
682 
683 	/*
684 	 * Return current operating mode or support.
685 	 */
686 	if (oidp->oid_kind & CTLFLAG_WR)
687 		spectre = spectre_mode;
688 	else
689 		spectre = spectre_support;
690 
691 	spectre &= (IBRS_SUPPORTED | IBRS_AUTO_SUPPORTED |
692 		    STIBP_SUPPORTED | STIBP_AUTO_SUPPORTED |
693 		    IBPB_SUPPORTED);
694 	while (spectre) {
695 		if (error)
696 			break;
697 		if (loop++) {
698 			error = SYSCTL_OUT(req, " ", 1);
699 			if (error)
700 				break;
701 		}
702 		if (spectre & IBRS_SUPPORTED) {
703 			spectre &= ~IBRS_SUPPORTED;
704 			error = SYSCTL_OUT(req, "IBRS", 4);
705 		} else
706 		if (spectre & IBRS_AUTO_SUPPORTED) {
707 			spectre &= ~IBRS_AUTO_SUPPORTED;
708 			error = SYSCTL_OUT(req, "IBRS_AUTO", 9);
709 		} else
710 		if (spectre & STIBP_SUPPORTED) {
711 			spectre &= ~STIBP_SUPPORTED;
712 			error = SYSCTL_OUT(req, "STIBP", 5);
713 		} else
714 		if (spectre & STIBP_AUTO_SUPPORTED) {
715 			spectre &= ~STIBP_AUTO_SUPPORTED;
716 			error = SYSCTL_OUT(req, "STIBP_AUTO", 10);
717 		} else
718 		if (spectre & IBPB_SUPPORTED) {
719 			spectre &= ~IBPB_SUPPORTED;
720 			error = SYSCTL_OUT(req, "IBPB", 4);
721 		}
722 	}
723 	if (loop == 0) {
724 		error = SYSCTL_OUT(req, "NONE", 4);
725 	}
726 
727 	if (error || req->newptr == NULL)
728 		return error;
729 	if ((oidp->oid_kind & CTLFLAG_WR) == 0)
730 		return error;
731 
732 	/*
733 	 * Change current operating mode
734 	 */
735 	len = req->newlen - req->newidx;
736 	if (len >= sizeof(buf)) {
737 		error = EINVAL;
738 		len = 0;
739 	} else {
740 		error = SYSCTL_IN(req, buf, len);
741 	}
742 	buf[len] = 0;
743 	iter = &buf[0];
744 	spectre = 0;
745 
746 	while (error == 0 && iter) {
747 		ptr = strsep(&iter, " ,\t\r\n");
748 		if (*ptr == 0)
749 			continue;
750 		if (strcasecmp(ptr, "NONE") == 0)
751 			spectre |= 0;
752 		else if (strcasecmp(ptr, "IBRS") == 0)
753 			spectre |= IBRS_SUPPORTED;
754 		else if (strcasecmp(ptr, "IBRS_AUTO") == 0)
755 			spectre |= IBRS_AUTO_SUPPORTED;
756 		else if (strcasecmp(ptr, "STIBP") == 0)
757 			spectre |= STIBP_SUPPORTED;
758 		else if (strcasecmp(ptr, "STIBP_AUTO") == 0)
759 			spectre |= STIBP_AUTO_SUPPORTED;
760 		else if (strcasecmp(ptr, "IBPB") == 0)
761 			spectre |= IBPB_SUPPORTED;
762 		else
763 			error = ENOENT;
764 	}
765 	if (error == 0) {
766 		spectre_mitigation = spectre;
767 		spectre_sysctl_changed();
768 	}
769 	return error;
770 }
771 
772 SYSCTL_PROC(_machdep, OID_AUTO, spectre_mitigation,
773 	CTLTYPE_STRING | CTLFLAG_RW,
774 	0, 0, sysctl_spectre_mitigation, "A", "Spectre exploit mitigation");
775 SYSCTL_PROC(_machdep, OID_AUTO, spectre_support,
776 	CTLTYPE_STRING | CTLFLAG_RD,
777 	0, 0, sysctl_spectre_mitigation, "A", "Spectre supported features");
778 
779 /*
780  * NOTE: Called at SI_BOOT2_MACHDEP and also when the microcode is
781  *	 updated.  Microcode updates must be applied to all cpus
782  *	 for support to be recognized.
783  */
784 static void
785 spectre_vm_setup(void *arg)
786 {
787 	int inconsistent = 0;
788 	int supmask;
789 
790 	/*
791 	 * Fetch tunable in auto mode
792 	 */
793 	if (spectre_mitigation < 0) {
794 		TUNABLE_INT_FETCH("machdep.spectre_mitigation",
795 				  &spectre_mitigation);
796 	}
797 
798 	if ((supmask = spectre_check_support()) != 0) {
799 		/*
800 		 * Must be supported on all cpus before we
801 		 * can enable it.  Returns silently if it
802 		 * isn't.
803 		 *
804 		 * NOTE! arg != NULL indicates we were called
805 		 *	 from cpuctl after a successful microcode
806 		 *	 update.
807 		 */
808 		if (arg != NULL) {
809 			globaldata_t save_gd;
810 			int n;
811 
812 			save_gd = mycpu;
813 			for (n = 0; n < ncpus; ++n) {
814 				lwkt_setcpu_self(globaldata_find(n));
815 				cpu_ccfence();
816 				if (spectre_check_support() !=
817 				    supmask) {
818 					inconsistent = 1;
819 					break;
820 				}
821 			}
822 			lwkt_setcpu_self(save_gd);
823 			cpu_ccfence();
824 		}
825 	}
826 
827 	/*
828 	 * Be silent while microcode is being loaded on various CPUs,
829 	 * until all done.
830 	 */
831 	if (inconsistent) {
832 		spectre_mitigation = -1;
833 		spectre_support = 0;
834 		return;
835 	}
836 
837 	/*
838 	 * IBRS support
839 	 */
840 	spectre_support = supmask;
841 
842 	/*
843 	 * Enable spectre_mitigation, set defaults if -1, adjust
844 	 * tuned value according to support if not.
845 	 *
846 	 * NOTE!  We do not enable IBPB for user->kernel transitions
847 	 *	  by default, so this code is commented out for now.
848 	 */
849 	if (spectre_support) {
850 		if (spectre_mitigation < 0) {
851 			spectre_mitigation = 0;
852 
853 			/*
854 			 * IBRS toggling not currently recommended as a
855 			 * default.
856 			 */
857 			if (spectre_support & IBRS_AUTO_SUPPORTED)
858 				spectre_mitigation |= IBRS_AUTO_SUPPORTED;
859 			else if (spectre_support & IBRS_SUPPORTED)
860 				spectre_mitigation |= 0;
861 
862 			/*
863 			 * STIBP toggling not currently recommended as a
864 			 * default.
865 			 */
866 			if (spectre_support & STIBP_AUTO_SUPPORTED)
867 				spectre_mitigation |= STIBP_AUTO_SUPPORTED;
868 			else if (spectre_support & STIBP_SUPPORTED)
869 				spectre_mitigation |= 0;
870 
871 			/*
872 			 * IBPB adds enormous (~2uS) overhead to system
873 			 * calls etc, we do not enable it by default.
874 			 */
875 			if (spectre_support & IBPB_SUPPORTED)
876 				spectre_mitigation |= 0;
877 		}
878 	} else {
879 		spectre_mitigation = -1;
880 	}
881 
882 	/*
883 	 * Disallow sysctl changes when there is no support (otherwise
884 	 * the wrmsr will cause a protection fault).
885 	 */
886 	if (spectre_mitigation < 0)
887 		sysctl___machdep_spectre_mitigation.oid_kind &= ~CTLFLAG_WR;
888 	else
889 		sysctl___machdep_spectre_mitigation.oid_kind |= CTLFLAG_WR;
890 
891 	spectre_sysctl_changed();
892 }
893 
894 #define MDS_AVX512_4VNNIW_SUPPORTED	0x0001
895 #define MDS_AVX512_4FMAPS_SUPPORTED	0x0002
896 #define MDS_MD_CLEAR_SUPPORTED		0x0004
897 #define MDS_TSX_FORCE_ABORT_SUPPORTED	0x0008
898 #define MDS_NOT_REQUIRED		0x8000
899 
900 static
901 int
902 mds_check_support(void)
903 {
904 	uint64_t msr;
905 	uint32_t p[4];
906 	int rv = 0;
907 
908 	/*
909 	 * MDS mitigation hw bits
910 	 *
911 	 * MD_CLEAR	Use microcode-supported verf insn.  This is the
912 	 *		only mode we really support.
913 	 */
914 	if (cpu_vendor_id == CPU_VENDOR_INTEL) {
915 		p[0] = 0;
916 		p[1] = 0;
917 		p[2] = 0;
918 		p[3] = 0;
919 		cpuid_count(7, 0, p);
920 		if (p[3] & CPUID_SEF_ARCH_CAP) {
921 			msr = rdmsr(MSR_IA32_ARCH_CAPABILITIES);
922 			if (msr & IA32_ARCH_MDS_NO)
923 				rv = MDS_NOT_REQUIRED;
924 		}
925 		if (p[3] & CPUID_SEF_AVX512_4VNNIW)
926 			rv |= MDS_AVX512_4VNNIW_SUPPORTED;
927 		if (p[3] & CPUID_SEF_AVX512_4FMAPS)
928 			rv |= MDS_AVX512_4FMAPS_SUPPORTED;
929 		if (p[3] & CPUID_SEF_MD_CLEAR)
930 			rv |= MDS_MD_CLEAR_SUPPORTED;
931 		if (p[3] & CPUID_SEF_TSX_FORCE_ABORT)
932 			rv |= MDS_TSX_FORCE_ABORT_SUPPORTED;
933 	} else {
934 		rv = MDS_NOT_REQUIRED;
935 	}
936 
937 	return rv;
938 }
939 
940 /*
941  * Iterate CPUs and adjust MSR for global operations, since
942  * the KMMU* code won't do it if spectre_mitigation is 0 or 2.
943  */
944 #define CHECK(flag)	(mds_mitigation & mds_support & (flag))
945 
946 static
947 void
948 mds_sysctl_changed(void)
949 {
950 	globaldata_t save_gd;
951 	struct trampframe *tr;
952 	int spec_ctrl;
953 	int spec_mask;
954 	int mode;
955 	int n;
956 
957 	spec_mask = SPEC_CTRL_MDS_ENABLE;
958 
959 	/*
960 	 * Fixup state
961 	 */
962 	mode = 0;
963 	save_gd = mycpu;
964 	for (n = 0; n < ncpus; ++n) {
965 		lwkt_setcpu_self(globaldata_find(n));
966 		cpu_ccfence();
967 		tr = &pscpu->trampoline;
968 
969 		/*
970 		 * Make sure we are cleaned out.
971 		 *
972 		 * XXX cleanup, reusing globals inside the loop (they get
973 		 * set to the same thing each loop)
974 		 *
975 		 * [0] kernel entry (idle exit)
976 		 * [1] kernel exit  (idle entry)
977 		 */
978 		tr->tr_pcb_spec_ctrl[0] &= ~spec_mask;
979 		tr->tr_pcb_spec_ctrl[1] &= ~spec_mask;
980 
981 		/*
982 		 * Don't try to parse if not available
983 		 */
984 		if (mds_mitigation < 0)
985 			continue;
986 
987 		spec_ctrl = 0;
988 		if (CHECK(MDS_MD_CLEAR_SUPPORTED)) {
989 			spec_ctrl |= SPEC_CTRL_MDS_ENABLE;
990 			mode |= MDS_MD_CLEAR_SUPPORTED;
991 		}
992 
993 		/*
994 		 * Update spec_ctrl fields in the trampoline.
995 		 *
996 		 * [0] on-kernel-entry (on-idle-exit)
997 		 * [1] on-kernel-exit  (on-idle-entry)
998 		 *
999 		 * The MDS stuff is only needed on kernel-exit or idle-entry
1000 		 */
1001 		/* tr->tr_pcb_spec_ctrl[0] |= spec_ctrl; */
1002 		tr->tr_pcb_spec_ctrl[1] |= spec_ctrl;
1003 
1004 		/*
1005 		 * Make sure we set this on the first loop.  It will be
1006 		 * the same value on remaining loops.
1007 		 */
1008 		mds_mode = mode;
1009 	}
1010 	lwkt_setcpu_self(save_gd);
1011 	cpu_ccfence();
1012 
1013 	/*
1014 	 * Console message on mitigation mode change
1015 	 */
1016 	kprintf("MDS: support=(");
1017 	if (mds_support == 0) {
1018 		kprintf(" none");
1019 	} else {
1020 		if (mds_support & MDS_AVX512_4VNNIW_SUPPORTED)
1021 			kprintf(" AVX512_4VNNIW");
1022 		if (mds_support & MDS_AVX512_4FMAPS_SUPPORTED)
1023 			kprintf(" AVX512_4FMAPS");
1024 		if (mds_support & MDS_MD_CLEAR_SUPPORTED)
1025 			kprintf(" MD_CLEAR");
1026 		if (mds_support & MDS_TSX_FORCE_ABORT_SUPPORTED)
1027 			kprintf(" TSX_FORCE_ABORT");
1028 		if (mds_support & MDS_NOT_REQUIRED)
1029 			kprintf(" MDS_NOT_REQUIRED");
1030 	}
1031 	kprintf(" ) req=%04x operating=(", (uint16_t)mds_mitigation);
1032 	if (mds_mode == 0) {
1033 		kprintf(" none");
1034 	} else {
1035 		if (mds_mode & MDS_AVX512_4VNNIW_SUPPORTED)
1036 			kprintf(" AVX512_4VNNIW");
1037 		if (mds_mode & MDS_AVX512_4FMAPS_SUPPORTED)
1038 			kprintf(" AVX512_4FMAPS");
1039 		if (mds_mode & MDS_MD_CLEAR_SUPPORTED)
1040 			kprintf(" MD_CLEAR");
1041 		if (mds_mode & MDS_TSX_FORCE_ABORT_SUPPORTED)
1042 			kprintf(" TSX_FORCE_ABORT");
1043 		if (mds_mode & MDS_NOT_REQUIRED)
1044 			kprintf(" MDS_NOT_REQUIRED");
1045 	}
1046 	kprintf(" )\n");
1047 }
1048 
1049 #undef CHECK
1050 
1051 /*
1052  * User changes sysctl value
1053  */
1054 static int
1055 sysctl_mds_mitigation(SYSCTL_HANDLER_ARGS)
1056 {
1057 	char buf[128];
1058 	char *ptr;
1059 	char *iter;
1060 	size_t len;
1061 	int mds;
1062 	int error = 0;
1063 	int loop = 0;
1064 
1065 	/*
1066 	 * Return current operating mode or support.
1067 	 */
1068 	if (oidp->oid_kind & CTLFLAG_WR)
1069 		mds = mds_mode;
1070 	else
1071 		mds = mds_support;
1072 
1073 	mds &= MDS_AVX512_4VNNIW_SUPPORTED |
1074 	       MDS_AVX512_4FMAPS_SUPPORTED |
1075 	       MDS_MD_CLEAR_SUPPORTED |
1076 	       MDS_TSX_FORCE_ABORT_SUPPORTED |
1077 	       MDS_NOT_REQUIRED;
1078 
1079 	while (mds) {
1080 		if (error)
1081 			break;
1082 		if (loop++) {
1083 			error = SYSCTL_OUT(req, " ", 1);
1084 			if (error)
1085 				break;
1086 		}
1087 		if (mds & MDS_AVX512_4VNNIW_SUPPORTED) {
1088 			mds &= ~MDS_AVX512_4VNNIW_SUPPORTED;
1089 			error = SYSCTL_OUT(req, "AVX512_4VNNIW", 13);
1090 		} else
1091 		if (mds & MDS_AVX512_4FMAPS_SUPPORTED) {
1092 			mds &= ~MDS_AVX512_4FMAPS_SUPPORTED;
1093 			error = SYSCTL_OUT(req, "AVX512_4FMAPS", 13);
1094 		} else
1095 		if (mds & MDS_MD_CLEAR_SUPPORTED) {
1096 			mds &= ~MDS_MD_CLEAR_SUPPORTED;
1097 			error = SYSCTL_OUT(req, "MD_CLEAR", 8);
1098 		} else
1099 		if (mds & MDS_TSX_FORCE_ABORT_SUPPORTED) {
1100 			mds &= ~MDS_TSX_FORCE_ABORT_SUPPORTED;
1101 			error = SYSCTL_OUT(req, "TSX_FORCE_ABORT", 15);
1102 		} else
1103 		if (mds & MDS_NOT_REQUIRED) {
1104 			mds &= ~MDS_NOT_REQUIRED;
1105 			error = SYSCTL_OUT(req, "MDS_NOT_REQUIRED", 16);
1106 		}
1107 	}
1108 	if (loop == 0) {
1109 		error = SYSCTL_OUT(req, "NONE", 4);
1110 	}
1111 
1112 	if (error || req->newptr == NULL)
1113 		return error;
1114 	if ((oidp->oid_kind & CTLFLAG_WR) == 0)
1115 		return error;
1116 
1117 	/*
1118 	 * Change current operating mode
1119 	 */
1120 	len = req->newlen - req->newidx;
1121 	if (len >= sizeof(buf)) {
1122 		error = EINVAL;
1123 		len = 0;
1124 	} else {
1125 		error = SYSCTL_IN(req, buf, len);
1126 	}
1127 	buf[len] = 0;
1128 	iter = &buf[0];
1129 	mds = 0;
1130 
1131 	while (error == 0 && iter) {
1132 		ptr = strsep(&iter, " ,\t\r\n");
1133 		if (*ptr == 0)
1134 			continue;
1135 		if (strcasecmp(ptr, "NONE") == 0)
1136 			mds |= 0;
1137 		else if (strcasecmp(ptr, "AVX512_4VNNIW") == 0)
1138 			mds |= MDS_AVX512_4VNNIW_SUPPORTED;
1139 		else if (strcasecmp(ptr, "AVX512_4FMAPS") == 0)
1140 			mds |= MDS_AVX512_4FMAPS_SUPPORTED;
1141 		else if (strcasecmp(ptr, "MD_CLEAR") == 0)
1142 			mds |= MDS_MD_CLEAR_SUPPORTED;
1143 		else if (strcasecmp(ptr, "TSX_FORCE_ABORT") == 0)
1144 			mds |= MDS_TSX_FORCE_ABORT_SUPPORTED;
1145 		else if (strcasecmp(ptr, "MDS_NOT_REQUIRED") == 0)
1146 			mds |= MDS_NOT_REQUIRED;
1147 		else
1148 			error = ENOENT;
1149 	}
1150 	if (error == 0) {
1151 		mds_mitigation = mds;
1152 		mds_sysctl_changed();
1153 	}
1154 	return error;
1155 }
1156 
1157 SYSCTL_PROC(_machdep, OID_AUTO, mds_mitigation,
1158 	CTLTYPE_STRING | CTLFLAG_RW,
1159 	0, 0, sysctl_mds_mitigation, "A", "MDS exploit mitigation");
1160 SYSCTL_PROC(_machdep, OID_AUTO, mds_support,
1161 	CTLTYPE_STRING | CTLFLAG_RD,
1162 	0, 0, sysctl_mds_mitigation, "A", "MDS supported features");
1163 
1164 /*
1165  * NOTE: Called at SI_BOOT2_MACHDEP and also when the microcode is
1166  *	 updated.  Microcode updates must be applied to all cpus
1167  *	 for support to be recognized.
1168  */
1169 static void
1170 mds_vm_setup(void *arg)
1171 {
1172 	int inconsistent = 0;
1173 	int supmask;
1174 
1175 	/*
1176 	 * Fetch tunable in auto mode
1177 	 */
1178 	if (mds_mitigation < 0) {
1179 		TUNABLE_INT_FETCH("machdep.mds_mitigation", &mds_mitigation);
1180 	}
1181 
1182 	if ((supmask = mds_check_support()) != 0) {
1183 		/*
1184 		 * Must be supported on all cpus before we
1185 		 * can enable it.  Returns silently if it
1186 		 * isn't.
1187 		 *
1188 		 * NOTE! arg != NULL indicates we were called
1189 		 *	 from cpuctl after a successful microcode
1190 		 *	 update.
1191 		 */
1192 		if (arg != NULL) {
1193 			globaldata_t save_gd;
1194 			int n;
1195 
1196 			save_gd = mycpu;
1197 			for (n = 0; n < ncpus; ++n) {
1198 				lwkt_setcpu_self(globaldata_find(n));
1199 				cpu_ccfence();
1200 				if (mds_check_support() != supmask) {
1201 					inconsistent = 1;
1202 					break;
1203 				}
1204 			}
1205 			lwkt_setcpu_self(save_gd);
1206 			cpu_ccfence();
1207 		}
1208 	}
1209 
1210 	/*
1211 	 * Be silent while microcode is being loaded on various CPUs,
1212 	 * until all done.
1213 	 */
1214 	if (inconsistent) {
1215 		mds_mitigation = -1;
1216 		mds_support = 0;
1217 		return;
1218 	}
1219 
1220 	/*
1221 	 * IBRS support
1222 	 */
1223 	mds_support = supmask;
1224 
1225 	/*
1226 	 * Enable mds_mitigation, set defaults if -1, adjust
1227 	 * tuned value according to support if not.
1228 	 *
1229 	 * NOTE!  MDS is not enabled by default.
1230 	 */
1231 	if (mds_support) {
1232 		if (mds_mitigation < 0) {
1233 			mds_mitigation = 0;
1234 
1235 			if ((mds_support & MDS_NOT_REQUIRED) == 0 &&
1236 			    (mds_support & MDS_MD_CLEAR_SUPPORTED)) {
1237 				/* mds_mitigation |= MDS_MD_CLEAR_SUPPORTED; */
1238 			}
1239 		}
1240 	} else {
1241 		mds_mitigation = -1;
1242 	}
1243 
1244 	/*
1245 	 * Disallow sysctl changes when there is no support (otherwise
1246 	 * the wrmsr will cause a protection fault).
1247 	 */
1248 	if (mds_mitigation < 0)
1249 		sysctl___machdep_mds_mitigation.oid_kind &= ~CTLFLAG_WR;
1250 	else
1251 		sysctl___machdep_mds_mitigation.oid_kind |= CTLFLAG_WR;
1252 
1253 	mds_sysctl_changed();
1254 }
1255 
1256 /*
1257  * NOTE: Called at SI_BOOT2_MACHDEP and also when the microcode is
1258  *	 updated.  Microcode updates must be applied to all cpus
1259  *	 for support to be recognized.
1260  */
1261 void
1262 mitigation_vm_setup(void *arg)
1263 {
1264 	spectre_vm_setup(arg);
1265 	mds_vm_setup(arg);
1266 }
1267 
1268 SYSINIT(mitigation_vm_setup, SI_BOOT2_MACHDEP, SI_ORDER_ANY,
1269 	mitigation_vm_setup, NULL);
1270 
1271 /*
1272  * platform-specific vmspace initialization (nothing for x86_64)
1273  */
1274 void
1275 cpu_vmspace_alloc(struct vmspace *vm __unused)
1276 {
1277 }
1278 
1279 void
1280 cpu_vmspace_free(struct vmspace *vm __unused)
1281 {
1282 }
1283 
1284 int
1285 kvm_access_check(vm_offset_t saddr, vm_offset_t eaddr, int prot)
1286 {
1287 	vm_offset_t addr;
1288 
1289 	if (saddr < KvaStart)
1290 		return EFAULT;
1291 	if (eaddr >= KvaEnd)
1292 		return EFAULT;
1293 	for (addr = saddr; addr < eaddr; addr += PAGE_SIZE)  {
1294 		if (pmap_kextract(addr) == 0)
1295 			return EFAULT;
1296 	}
1297 	if (!kernacc((caddr_t)saddr, eaddr - saddr, prot))
1298 		return EFAULT;
1299 	return 0;
1300 }
1301 
1302 #if 0
1303 
1304 void _test_frame_enter(struct trapframe *frame);
1305 void _test_frame_exit(struct trapframe *frame);
1306 
1307 void
1308 _test_frame_enter(struct trapframe *frame)
1309 {
1310 	thread_t td = curthread;
1311 
1312 	if (ISPL(frame->tf_cs) == SEL_UPL) {
1313 		KKASSERT(td->td_lwp);
1314                 KASSERT(td->td_lwp->lwp_md.md_regs == frame,
1315                         ("_test_frame_exit: Frame mismatch %p %p",
1316 			td->td_lwp->lwp_md.md_regs, frame));
1317 	    td->td_lwp->lwp_saveusp = (void *)frame->tf_rsp;
1318 	    td->td_lwp->lwp_saveupc = (void *)frame->tf_rip;
1319 	}
1320 	if ((char *)frame < td->td_kstack ||
1321 	    (char *)frame > td->td_kstack + td->td_kstack_size) {
1322 		panic("_test_frame_exit: frame not on kstack %p kstack=%p",
1323 			frame, td->td_kstack);
1324 	}
1325 }
1326 
1327 void
1328 _test_frame_exit(struct trapframe *frame)
1329 {
1330 	thread_t td = curthread;
1331 
1332 	if (ISPL(frame->tf_cs) == SEL_UPL) {
1333 		KKASSERT(td->td_lwp);
1334                 KASSERT(td->td_lwp->lwp_md.md_regs == frame,
1335                         ("_test_frame_exit: Frame mismatch %p %p",
1336 			td->td_lwp->lwp_md.md_regs, frame));
1337 		if (td->td_lwp->lwp_saveusp != (void *)frame->tf_rsp) {
1338 			kprintf("_test_frame_exit: %s:%d usp mismatch %p/%p\n",
1339 				td->td_comm, td->td_proc->p_pid,
1340 				td->td_lwp->lwp_saveusp,
1341 				(void *)frame->tf_rsp);
1342 		}
1343 		if (td->td_lwp->lwp_saveupc != (void *)frame->tf_rip) {
1344 			kprintf("_test_frame_exit: %s:%d upc mismatch %p/%p\n",
1345 				td->td_comm, td->td_proc->p_pid,
1346 				td->td_lwp->lwp_saveupc,
1347 				(void *)frame->tf_rip);
1348 		}
1349 
1350 		/*
1351 		 * adulterate the fields to catch entries that
1352 		 * don't run through test_frame_enter
1353 		 */
1354 		td->td_lwp->lwp_saveusp =
1355 			(void *)~(intptr_t)td->td_lwp->lwp_saveusp;
1356 		td->td_lwp->lwp_saveupc =
1357 			(void *)~(intptr_t)td->td_lwp->lwp_saveupc;
1358 	}
1359 	if ((char *)frame < td->td_kstack ||
1360 	    (char *)frame > td->td_kstack + td->td_kstack_size) {
1361 		panic("_test_frame_exit: frame not on kstack %p kstack=%p",
1362 			frame, td->td_kstack);
1363 	}
1364 }
1365 
1366 #endif
1367