xref: /openbsd/sys/arch/amd64/amd64/vmm_machdep.c (revision 9b6e80ec)
1 /* $OpenBSD: vmm_machdep.c,v 1.40 2024/11/10 22:35:31 jsg Exp $ */
2 /*
3  * Copyright (c) 2014 Mike Larkin <mlarkin@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include <sys/param.h>
19 #include <sys/systm.h>
20 #include <sys/malloc.h>
21 #include <sys/device.h>
22 #include <sys/pool.h>
23 #include <sys/proc.h>
24 #include <sys/user.h>
25 #include <sys/ioctl.h>
26 #include <sys/queue.h>
27 #include <sys/refcnt.h>
28 #include <sys/rwlock.h>
29 #include <sys/pledge.h>
30 #include <sys/memrange.h>
31 #include <sys/tracepoint.h>
32 
33 #include <uvm/uvm_extern.h>
34 
35 #include <machine/fpu.h>
36 #include <machine/pmap.h>
37 #include <machine/biosvar.h>
38 #include <machine/segments.h>
39 #include <machine/cpufunc.h>
40 #include <machine/vmmvar.h>
41 
42 #include <dev/isa/isareg.h>
43 #include <dev/pv/pvreg.h>
44 
45 #include <dev/vmm/vmm.h>
46 
47 #ifdef MP_LOCKDEBUG
48 #include <ddb/db_output.h>
49 extern int __mp_lock_spinout;
50 #endif /* MP_LOCKDEBUG */
51 
52 void *l1tf_flush_region;
53 
54 #define DEVNAME(s)  ((s)->sc_dev.dv_xname)
55 
56 #define CTRL_DUMP(x,y,z) printf("     %s: Can set:%s Can clear:%s\n", #z , \
57 				vcpu_vmx_check_cap(x, IA32_VMX_##y ##_CTLS, \
58 				IA32_VMX_##z, 1) ? "Yes" : "No", \
59 				vcpu_vmx_check_cap(x, IA32_VMX_##y ##_CTLS, \
60 				IA32_VMX_##z, 0) ? "Yes" : "No");
61 
62 #define VMX_EXIT_INFO_HAVE_RIP		0x1
63 #define VMX_EXIT_INFO_HAVE_REASON	0x2
64 #define VMX_EXIT_INFO_COMPLETE				\
65     (VMX_EXIT_INFO_HAVE_RIP | VMX_EXIT_INFO_HAVE_REASON)
66 
67 void vmx_dump_vmcs_field(uint16_t, const char *);
68 int vmm_enabled(void);
69 void vmm_activate_machdep(struct device *, int);
70 int vmmioctl_machdep(dev_t, u_long, caddr_t, int, struct proc *);
71 int vmm_quiesce_vmx(void);
72 int vm_run(struct vm_run_params *);
73 int vm_intr_pending(struct vm_intr_params *);
74 int vm_rwregs(struct vm_rwregs_params *, int);
75 int vm_rwvmparams(struct vm_rwvmparams_params *, int);
76 int vcpu_readregs_vmx(struct vcpu *, uint64_t, int, struct vcpu_reg_state *);
77 int vcpu_readregs_svm(struct vcpu *, uint64_t, struct vcpu_reg_state *);
78 int vcpu_writeregs_vmx(struct vcpu *, uint64_t, int, struct vcpu_reg_state *);
79 int vcpu_writeregs_svm(struct vcpu *, uint64_t, struct vcpu_reg_state *);
80 int vcpu_reset_regs(struct vcpu *, struct vcpu_reg_state *);
81 int vcpu_reset_regs_vmx(struct vcpu *, struct vcpu_reg_state *);
82 int vcpu_reset_regs_svm(struct vcpu *, struct vcpu_reg_state *);
83 int vcpu_reload_vmcs_vmx(struct vcpu *);
84 int vcpu_init(struct vcpu *, struct vm_create_params *);
85 int vcpu_init_vmx(struct vcpu *);
86 int vcpu_init_svm(struct vcpu *, struct vm_create_params *);
87 int vcpu_run_vmx(struct vcpu *, struct vm_run_params *);
88 int vcpu_run_svm(struct vcpu *, struct vm_run_params *);
89 void vcpu_deinit(struct vcpu *);
90 void vcpu_deinit_svm(struct vcpu *);
91 void vcpu_deinit_vmx(struct vcpu *);
92 int vcpu_vmx_check_cap(struct vcpu *, uint32_t, uint32_t, int);
93 int vcpu_vmx_compute_ctrl(uint64_t, uint16_t, uint32_t, uint32_t, uint32_t *);
94 int vmx_get_exit_info(uint64_t *, uint64_t *);
95 int vmx_load_pdptes(struct vcpu *);
96 int vmx_handle_exit(struct vcpu *);
97 int svm_handle_exit(struct vcpu *);
98 int svm_handle_msr(struct vcpu *);
99 int vmm_handle_xsetbv(struct vcpu *, uint64_t *);
100 int vmx_handle_xsetbv(struct vcpu *);
101 int svm_handle_xsetbv(struct vcpu *);
102 int vmm_handle_cpuid(struct vcpu *);
103 int vmx_handle_rdmsr(struct vcpu *);
104 int vmx_handle_wrmsr(struct vcpu *);
105 int vmx_handle_cr0_write(struct vcpu *, uint64_t);
106 int vmx_handle_cr4_write(struct vcpu *, uint64_t);
107 int vmx_handle_cr(struct vcpu *);
108 int svm_handle_inout(struct vcpu *);
109 int vmx_handle_inout(struct vcpu *);
110 int svm_handle_hlt(struct vcpu *);
111 int vmx_handle_hlt(struct vcpu *);
112 int vmm_inject_ud(struct vcpu *);
113 int vmm_inject_gp(struct vcpu *);
114 int vmm_inject_db(struct vcpu *);
115 void vmx_handle_intr(struct vcpu *);
116 void vmx_handle_misc_enable_msr(struct vcpu *);
117 int vmm_get_guest_memtype(struct vm *, paddr_t);
118 int vmx_get_guest_faulttype(void);
119 int svm_get_guest_faulttype(struct vmcb *);
120 int vmx_get_exit_qualification(uint64_t *);
121 int vmm_get_guest_cpu_cpl(struct vcpu *);
122 int vmm_get_guest_cpu_mode(struct vcpu *);
123 int svm_fault_page(struct vcpu *, paddr_t);
124 int vmx_fault_page(struct vcpu *, paddr_t);
125 int vmx_handle_np_fault(struct vcpu *);
126 int svm_handle_np_fault(struct vcpu *);
127 int vmm_alloc_vpid(uint16_t *);
128 void vmm_free_vpid(uint16_t);
129 const char *vcpu_state_decode(u_int);
130 const char *vmx_exit_reason_decode(uint32_t);
131 const char *svm_exit_reason_decode(uint32_t);
132 const char *vmx_instruction_error_decode(uint32_t);
133 void svm_setmsrbr(struct vcpu *, uint32_t);
134 void svm_setmsrbw(struct vcpu *, uint32_t);
135 void svm_setmsrbrw(struct vcpu *, uint32_t);
136 void vmx_setmsrbr(struct vcpu *, uint32_t);
137 void vmx_setmsrbw(struct vcpu *, uint32_t);
138 void vmx_setmsrbrw(struct vcpu *, uint32_t);
139 void svm_set_clean(struct vcpu *, uint32_t);
140 void svm_set_dirty(struct vcpu *, uint32_t);
141 
142 int vmm_gpa_is_valid(struct vcpu *vcpu, paddr_t gpa, size_t obj_size);
143 void vmm_init_pvclock(struct vcpu *, paddr_t);
144 int vmm_update_pvclock(struct vcpu *);
145 int vmm_pat_is_valid(uint64_t);
146 
147 #ifdef MULTIPROCESSOR
148 static int vmx_remote_vmclear(struct cpu_info*, struct vcpu *);
149 #endif
150 
151 #ifdef VMM_DEBUG
152 void vmx_vcpu_dump_regs(struct vcpu *);
153 void vmx_dump_vmcs(struct vcpu *);
154 const char *msr_name_decode(uint32_t);
155 void vmm_segment_desc_decode(uint64_t);
156 void vmm_decode_cr0(uint64_t);
157 void vmm_decode_cr3(uint64_t);
158 void vmm_decode_cr4(uint64_t);
159 void vmm_decode_msr_value(uint64_t, uint64_t);
160 void vmm_decode_apicbase_msr_value(uint64_t);
161 void vmm_decode_ia32_fc_value(uint64_t);
162 void vmm_decode_mtrrcap_value(uint64_t);
163 void vmm_decode_perf_status_value(uint64_t);
164 void vmm_decode_perf_ctl_value(uint64_t);
165 void vmm_decode_mtrrdeftype_value(uint64_t);
166 void vmm_decode_efer_value(uint64_t);
167 void vmm_decode_rflags(uint64_t);
168 void vmm_decode_misc_enable_value(uint64_t);
169 const char *vmm_decode_cpu_mode(struct vcpu *);
170 
171 extern int mtrr2mrt(int);
172 
173 struct vmm_reg_debug_info {
174 	uint64_t	vrdi_bit;
175 	const char	*vrdi_present;
176 	const char	*vrdi_absent;
177 };
178 #endif /* VMM_DEBUG */
179 
180 extern uint64_t tsc_frequency;
181 extern int tsc_is_invariant;
182 
183 const char *vmm_hv_signature = VMM_HV_SIGNATURE;
184 
185 const struct kmem_pa_mode vmm_kp_contig = {
186 	.kp_constraint = &no_constraint,
187 	.kp_maxseg = 1,
188 	.kp_align = 4096,
189 	.kp_zero = 1,
190 };
191 
192 extern struct cfdriver vmm_cd;
193 extern const struct cfattach vmm_ca;
194 
195 /*
196  * Helper struct to easily get the VMCS field IDs needed in vmread/vmwrite
197  * to access the individual fields of the guest segment registers. This
198  * struct is indexed by VCPU_REGS_* id.
199  */
200 const struct {
201 	uint64_t selid;
202 	uint64_t limitid;
203 	uint64_t arid;
204 	uint64_t baseid;
205 } vmm_vmx_sreg_vmcs_fields[] = {
206 	{ VMCS_GUEST_IA32_ES_SEL, VMCS_GUEST_IA32_ES_LIMIT,
207 	  VMCS_GUEST_IA32_ES_AR, VMCS_GUEST_IA32_ES_BASE },
208 	{ VMCS_GUEST_IA32_CS_SEL, VMCS_GUEST_IA32_CS_LIMIT,
209 	  VMCS_GUEST_IA32_CS_AR, VMCS_GUEST_IA32_CS_BASE },
210 	{ VMCS_GUEST_IA32_SS_SEL, VMCS_GUEST_IA32_SS_LIMIT,
211 	  VMCS_GUEST_IA32_SS_AR, VMCS_GUEST_IA32_SS_BASE },
212 	{ VMCS_GUEST_IA32_DS_SEL, VMCS_GUEST_IA32_DS_LIMIT,
213 	  VMCS_GUEST_IA32_DS_AR, VMCS_GUEST_IA32_DS_BASE },
214 	{ VMCS_GUEST_IA32_FS_SEL, VMCS_GUEST_IA32_FS_LIMIT,
215 	  VMCS_GUEST_IA32_FS_AR, VMCS_GUEST_IA32_FS_BASE },
216 	{ VMCS_GUEST_IA32_GS_SEL, VMCS_GUEST_IA32_GS_LIMIT,
217 	  VMCS_GUEST_IA32_GS_AR, VMCS_GUEST_IA32_GS_BASE },
218 	{ VMCS_GUEST_IA32_LDTR_SEL, VMCS_GUEST_IA32_LDTR_LIMIT,
219 	  VMCS_GUEST_IA32_LDTR_AR, VMCS_GUEST_IA32_LDTR_BASE },
220 	{ VMCS_GUEST_IA32_TR_SEL, VMCS_GUEST_IA32_TR_LIMIT,
221 	  VMCS_GUEST_IA32_TR_AR, VMCS_GUEST_IA32_TR_BASE }
222 };
223 
224 /* Pools for VMs and VCPUs */
225 extern struct pool vm_pool;
226 extern struct pool vcpu_pool;
227 
228 extern struct vmm_softc *vmm_softc;
229 
230 /* IDT information used when populating host state area */
231 extern vaddr_t idt_vaddr;
232 extern struct gate_descriptor *idt;
233 
234 /* Constants used in "CR access exit" */
235 #define CR_WRITE	0
236 #define CR_READ		1
237 #define CR_CLTS		2
238 #define CR_LMSW		3
239 
240 /*
241  * vmm_enabled
242  *
243  * Checks if we have at least one CPU with either VMX or SVM.
244  * Returns 1 if we have at least one of either type, but not both, 0 otherwise.
245  */
246 int
vmm_enabled(void)247 vmm_enabled(void)
248 {
249 	struct cpu_info *ci;
250 	CPU_INFO_ITERATOR cii;
251 	int found_vmx = 0, found_svm = 0;
252 
253 	/* Check if we have at least one CPU with either VMX or SVM */
254 	CPU_INFO_FOREACH(cii, ci) {
255 		if (ci->ci_vmm_flags & CI_VMM_VMX)
256 			found_vmx = 1;
257 		if (ci->ci_vmm_flags & CI_VMM_SVM)
258 			found_svm = 1;
259 	}
260 
261 	/* Don't support both SVM and VMX at the same time */
262 	if (found_vmx && found_svm)
263 		return (0);
264 
265 	if (found_vmx || found_svm)
266 		return 1;
267 
268 	return 0;
269 }
270 
271 void
vmm_attach_machdep(struct device * parent,struct device * self,void * aux)272 vmm_attach_machdep(struct device *parent, struct device *self, void *aux)
273 {
274 	struct vmm_softc *sc = (struct vmm_softc *)self;
275 	struct cpu_info *ci;
276 	CPU_INFO_ITERATOR cii;
277 
278 	sc->sc_md.nr_rvi_cpus = 0;
279 	sc->sc_md.nr_ept_cpus = 0;
280 
281 	/* Calculate CPU features */
282 	CPU_INFO_FOREACH(cii, ci) {
283 		if (ci->ci_vmm_flags & CI_VMM_RVI)
284 			sc->sc_md.nr_rvi_cpus++;
285 		if (ci->ci_vmm_flags & CI_VMM_EPT)
286 			sc->sc_md.nr_ept_cpus++;
287 	}
288 
289 	sc->sc_md.pkru_enabled = 0;
290 	if (rcr4() & CR4_PKE)
291 		sc->sc_md.pkru_enabled = 1;
292 
293 	if (sc->sc_md.nr_ept_cpus) {
294 		printf(": VMX/EPT");
295 		sc->mode = VMM_MODE_EPT;
296 	} else if (sc->sc_md.nr_rvi_cpus) {
297 		printf(": SVM/RVI");
298 		sc->mode = VMM_MODE_RVI;
299 	} else {
300 		printf(": unknown");
301 		sc->mode = VMM_MODE_UNKNOWN;
302 	}
303 
304 	if (sc->mode == VMM_MODE_EPT) {
305 		if (!(curcpu()->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr)) {
306 			l1tf_flush_region = km_alloc(VMX_L1D_FLUSH_SIZE,
307 			    &kv_any, &vmm_kp_contig, &kd_waitok);
308 			if (!l1tf_flush_region) {
309 				printf(" (failing, no memory)");
310 				sc->mode = VMM_MODE_UNKNOWN;
311 			} else {
312 				printf(" (using slow L1TF mitigation)");
313 				memset(l1tf_flush_region, 0xcc,
314 				    VMX_L1D_FLUSH_SIZE);
315 			}
316 		}
317 	}
318 
319 	if (sc->mode == VMM_MODE_RVI) {
320 		sc->max_vpid = curcpu()->ci_vmm_cap.vcc_svm.svm_max_asid;
321 	} else {
322 		sc->max_vpid = 0xFFF;
323 	}
324 
325 	bzero(&sc->vpids, sizeof(sc->vpids));
326 	rw_init(&sc->vpid_lock, "vpid");
327 }
328 
329 /*
330  * vmm_quiesce_vmx
331  *
332  * Prepare the host for suspend by flushing all VMCS states.
333  */
334 int
vmm_quiesce_vmx(void)335 vmm_quiesce_vmx(void)
336 {
337 	struct vm		*vm;
338 	struct vcpu		*vcpu;
339 	int			 err;
340 
341 	/*
342 	 * We should be only called from a quiescing device state so we
343 	 * don't expect to sleep here. If we can't get all our locks,
344 	 * something is wrong.
345 	 */
346 	if ((err = rw_enter(&vmm_softc->vm_lock, RW_WRITE | RW_NOSLEEP)))
347 		return (err);
348 
349 	/* Iterate over each vm... */
350 	SLIST_FOREACH(vm, &vmm_softc->vm_list, vm_link) {
351 		/* Iterate over each vcpu... */
352 		SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) {
353 			err = rw_enter(&vcpu->vc_lock, RW_WRITE | RW_NOSLEEP);
354 			if (err)
355 				break;
356 
357 			/* We can skip unlaunched VMCS. Nothing to flush. */
358 			if (atomic_load_int(&vcpu->vc_vmx_vmcs_state)
359 			    != VMCS_LAUNCHED) {
360 				DPRINTF("%s: skipping vcpu %d for vm %d\n",
361 				    __func__, vcpu->vc_id, vm->vm_id);
362 				rw_exit_write(&vcpu->vc_lock);
363 				continue;
364 			}
365 
366 #ifdef MULTIPROCESSOR
367 			if (vcpu->vc_last_pcpu != curcpu()) {
368 				/* Remote cpu vmclear via ipi. */
369 				err = vmx_remote_vmclear(vcpu->vc_last_pcpu,
370 				    vcpu);
371 				if (err)
372 					printf("%s: failed to remote vmclear "
373 					    "vcpu %d of vm %d\n", __func__,
374 					    vcpu->vc_id, vm->vm_id);
375 			} else
376 #endif
377 			{
378 				/* Local cpu vmclear instruction. */
379 				if ((err = vmclear(&vcpu->vc_control_pa)))
380 					printf("%s: failed to locally vmclear "
381 					    "vcpu %d of vm %d\n", __func__,
382 					    vcpu->vc_id, vm->vm_id);
383 				atomic_swap_uint(&vcpu->vc_vmx_vmcs_state,
384 				    VMCS_CLEARED);
385 			}
386 
387 			rw_exit_write(&vcpu->vc_lock);
388 			if (err)
389 				break;
390 			DPRINTF("%s: cleared vcpu %d for vm %d\n", __func__,
391 			    vcpu->vc_id, vm->vm_id);
392 		}
393 		if (err)
394 			break;
395 	}
396 	rw_exit_write(&vmm_softc->vm_lock);
397 
398 	if (err)
399 		return (err);
400 	return (0);
401 }
402 
403 void
vmm_activate_machdep(struct device * self,int act)404 vmm_activate_machdep(struct device *self, int act)
405 {
406 	struct cpu_info		*ci = curcpu();
407 
408 	switch (act) {
409 	case DVACT_QUIESCE:
410 		/* If we're not in vmm mode, nothing to do. */
411 		if ((ci->ci_flags & CPUF_VMM) == 0)
412 			break;
413 
414 		/* Intel systems need extra steps to sync vcpu state. */
415 		if (vmm_softc->mode == VMM_MODE_EPT)
416 			if (vmm_quiesce_vmx())
417 				DPRINTF("%s: vmx quiesce failed\n", __func__);
418 
419 		/* Stop virtualization mode on all cpus. */
420 		vmm_stop();
421 		break;
422 
423 	case DVACT_WAKEUP:
424 		/* Restart virtualization mode on all cpu's. */
425 		if (vmm_softc->vm_ct > 0)
426 			vmm_start();
427 		break;
428 	}
429 }
430 
431 int
vmmioctl_machdep(dev_t dev,u_long cmd,caddr_t data,int flag,struct proc * p)432 vmmioctl_machdep(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
433 {
434 	int ret;
435 
436 	switch (cmd) {
437 	case VMM_IOC_INTR:
438 		ret = vm_intr_pending((struct vm_intr_params *)data);
439 		break;
440 	default:
441 		DPRINTF("%s: unknown ioctl code 0x%lx\n", __func__, cmd);
442 		ret = ENOTTY;
443 	}
444 
445 	return (ret);
446 }
447 
448 int
pledge_ioctl_vmm_machdep(struct proc * p,long com)449 pledge_ioctl_vmm_machdep(struct proc *p, long com)
450 {
451 	switch (com) {
452 	case VMM_IOC_INTR:
453 		return (0);
454 	}
455 
456 	return (EPERM);
457 }
458 
459 /*
460  * vm_intr_pending
461  *
462  * IOCTL handler routine for VMM_IOC_INTR messages, sent from vmd when an
463  * interrupt is pending and needs acknowledgment
464  *
465  * Parameters:
466  *  vip: Describes the vm/vcpu for which the interrupt is pending
467  *
468  * Return values:
469  *  0: if successful
470  *  ENOENT: if the VM/VCPU defined by 'vip' cannot be found
471  */
472 int
vm_intr_pending(struct vm_intr_params * vip)473 vm_intr_pending(struct vm_intr_params *vip)
474 {
475 	struct vm *vm;
476 	struct vcpu *vcpu;
477 #ifdef MULTIPROCESSOR
478 	struct cpu_info *ci;
479 #endif
480 	int error, ret = 0;
481 
482 	/* Find the desired VM */
483 	error = vm_find(vip->vip_vm_id, &vm);
484 
485 	/* Not found? exit. */
486 	if (error != 0)
487 		return (error);
488 
489 	vcpu = vm_find_vcpu(vm, vip->vip_vcpu_id);
490 
491 	if (vcpu == NULL) {
492 		ret = ENOENT;
493 		goto out;
494 	}
495 
496 	vcpu->vc_intr = vip->vip_intr;
497 #ifdef MULTIPROCESSOR
498 	ci = READ_ONCE(vcpu->vc_curcpu);
499 	if (ci != NULL)
500 		x86_send_ipi(ci, X86_IPI_NOP);
501 #endif
502 
503 out:
504 	refcnt_rele_wake(&vm->vm_refcnt);
505 	return (ret);
506 }
507 
508 /*
509  * vm_rwvmparams
510  *
511  * IOCTL handler to read/write the current vmm params like pvclock gpa, pvclock
512  * version, etc.
513  *
514  * Parameters:
515  *   vrwp: Describes the VM and VCPU to get/set the params from
516  *   dir: 0 for reading, 1 for writing
517  *
518  * Return values:
519  *  0: if successful
520  *  ENOENT: if the VM/VCPU defined by 'vpp' cannot be found
521  *  EINVAL: if an error occurred reading the registers of the guest
522  */
523 int
vm_rwvmparams(struct vm_rwvmparams_params * vpp,int dir)524 vm_rwvmparams(struct vm_rwvmparams_params *vpp, int dir)
525 {
526 	struct vm *vm;
527 	struct vcpu *vcpu;
528 	int error, ret = 0;
529 
530 	/* Find the desired VM */
531 	error = vm_find(vpp->vpp_vm_id, &vm);
532 
533 	/* Not found? exit. */
534 	if (error != 0)
535 		return (error);
536 
537 	vcpu = vm_find_vcpu(vm, vpp->vpp_vcpu_id);
538 
539 	if (vcpu == NULL) {
540 		ret = ENOENT;
541 		goto out;
542 	}
543 
544 	if (dir == 0) {
545 		if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_VERSION)
546 			vpp->vpp_pvclock_version = vcpu->vc_pvclock_version;
547 		if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_SYSTEM_GPA)
548 			vpp->vpp_pvclock_system_gpa = \
549 			    vcpu->vc_pvclock_system_gpa;
550 	} else {
551 		if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_VERSION)
552 			vcpu->vc_pvclock_version = vpp->vpp_pvclock_version;
553 		if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_SYSTEM_GPA) {
554 			vmm_init_pvclock(vcpu, vpp->vpp_pvclock_system_gpa);
555 		}
556 	}
557 out:
558 	refcnt_rele_wake(&vm->vm_refcnt);
559 	return (ret);
560 }
561 
562 /*
563  * vm_readregs
564  *
565  * IOCTL handler to read/write the current register values of a guest VCPU.
566  * The VCPU must not be running.
567  *
568  * Parameters:
569  *   vrwp: Describes the VM and VCPU to get/set the registers from. The
570  *    register values are returned here as well.
571  *   dir: 0 for reading, 1 for writing
572  *
573  * Return values:
574  *  0: if successful
575  *  ENOENT: if the VM/VCPU defined by 'vrwp' cannot be found
576  *  EINVAL: if an error occurred accessing the registers of the guest
577  *  EPERM: if the vm cannot be accessed from the calling process
578  */
579 int
vm_rwregs(struct vm_rwregs_params * vrwp,int dir)580 vm_rwregs(struct vm_rwregs_params *vrwp, int dir)
581 {
582 	struct vm *vm;
583 	struct vcpu *vcpu;
584 	struct vcpu_reg_state *vrs = &vrwp->vrwp_regs;
585 	int error, ret = 0;
586 
587 	/* Find the desired VM */
588 	error = vm_find(vrwp->vrwp_vm_id, &vm);
589 
590 	/* Not found? exit. */
591 	if (error != 0)
592 		return (error);
593 
594 	vcpu = vm_find_vcpu(vm, vrwp->vrwp_vcpu_id);
595 
596 	if (vcpu == NULL) {
597 		ret = ENOENT;
598 		goto out;
599 	}
600 
601 	rw_enter_write(&vcpu->vc_lock);
602 	if (vmm_softc->mode == VMM_MODE_EPT)
603 		ret = (dir == 0) ?
604 		    vcpu_readregs_vmx(vcpu, vrwp->vrwp_mask, 1, vrs) :
605 		    vcpu_writeregs_vmx(vcpu, vrwp->vrwp_mask, 1, vrs);
606 	else if (vmm_softc->mode == VMM_MODE_RVI)
607 		ret = (dir == 0) ?
608 		    vcpu_readregs_svm(vcpu, vrwp->vrwp_mask, vrs) :
609 		    vcpu_writeregs_svm(vcpu, vrwp->vrwp_mask, vrs);
610 	else {
611 		DPRINTF("%s: unknown vmm mode", __func__);
612 		ret = EINVAL;
613 	}
614 	rw_exit_write(&vcpu->vc_lock);
615 out:
616 	refcnt_rele_wake(&vm->vm_refcnt);
617 	return (ret);
618 }
619 
620 /*
621  * vmm_start
622  *
623  * Starts VMM mode on the system
624  */
625 int
vmm_start(void)626 vmm_start(void)
627 {
628 	int rv = 0;
629 	struct cpu_info *self = curcpu();
630 #ifdef MULTIPROCESSOR
631 	struct cpu_info *ci;
632 	CPU_INFO_ITERATOR cii;
633 #ifdef MP_LOCKDEBUG
634 	int nticks;
635 #endif /* MP_LOCKDEBUG */
636 #endif /* MULTIPROCESSOR */
637 
638 	rw_enter_write(&vmm_softc->sc_slock);
639 
640 	/* VMM is already running */
641 	if (self->ci_flags & CPUF_VMM)
642 		goto unlock;
643 
644 	/* Start VMM on this CPU */
645 	start_vmm_on_cpu(self);
646 	if (!(self->ci_flags & CPUF_VMM)) {
647 		printf("%s: failed to enter VMM mode\n",
648 			self->ci_dev->dv_xname);
649 		rv = EIO;
650 		goto unlock;
651 	}
652 
653 #ifdef MULTIPROCESSOR
654 	/* Broadcast start VMM IPI */
655 	x86_broadcast_ipi(X86_IPI_START_VMM);
656 
657 	CPU_INFO_FOREACH(cii, ci) {
658 		if (ci == self)
659 			continue;
660 #ifdef MP_LOCKDEBUG
661 		nticks = __mp_lock_spinout;
662 #endif /* MP_LOCKDEBUG */
663 		while (!(ci->ci_flags & CPUF_VMM)) {
664 			CPU_BUSY_CYCLE();
665 #ifdef MP_LOCKDEBUG
666 			if (--nticks <= 0) {
667 				db_printf("%s: spun out", __func__);
668 				db_enter();
669 				nticks = __mp_lock_spinout;
670 			}
671 #endif /* MP_LOCKDEBUG */
672 		}
673 	}
674 #endif /* MULTIPROCESSOR */
675 unlock:
676 	rw_exit_write(&vmm_softc->sc_slock);
677 	return (rv);
678 }
679 
680 /*
681  * vmm_stop
682  *
683  * Stops VMM mode on the system
684  */
685 int
vmm_stop(void)686 vmm_stop(void)
687 {
688 	int rv = 0;
689 	struct cpu_info *self = curcpu();
690 #ifdef MULTIPROCESSOR
691 	struct cpu_info *ci;
692 	CPU_INFO_ITERATOR cii;
693 #ifdef MP_LOCKDEBUG
694 	int nticks;
695 #endif /* MP_LOCKDEBUG */
696 #endif /* MULTIPROCESSOR */
697 
698 	rw_enter_write(&vmm_softc->sc_slock);
699 
700 	/* VMM is not running */
701 	if (!(self->ci_flags & CPUF_VMM))
702 		goto unlock;
703 
704 	/* Stop VMM on this CPU */
705 	stop_vmm_on_cpu(self);
706 	if (self->ci_flags & CPUF_VMM) {
707 		printf("%s: failed to exit VMM mode\n",
708 			self->ci_dev->dv_xname);
709 		rv = EIO;
710 		goto unlock;
711 	}
712 
713 #ifdef MULTIPROCESSOR
714 	/* Stop VMM on other CPUs */
715 	x86_broadcast_ipi(X86_IPI_STOP_VMM);
716 
717 	CPU_INFO_FOREACH(cii, ci) {
718 		if (ci == self)
719 			continue;
720 #ifdef MP_LOCKDEBUG
721 		nticks = __mp_lock_spinout;
722 #endif /* MP_LOCKDEBUG */
723 		while ((ci->ci_flags & CPUF_VMM)) {
724 			CPU_BUSY_CYCLE();
725 #ifdef MP_LOCKDEBUG
726 			if (--nticks <= 0) {
727 				db_printf("%s: spunout", __func__);
728 				db_enter();
729 				nticks = __mp_lock_spinout;
730 			}
731 #endif /* MP_LOCKDEBUG */
732 		}
733 	}
734 #endif /* MULTIPROCESSOR */
735 unlock:
736 	rw_exit_write(&vmm_softc->sc_slock);
737 	return (0);
738 }
739 
740 /*
741  * start_vmm_on_cpu
742  *
743  * Starts VMM mode on 'ci' by executing the appropriate CPU-specific insn
744  * sequence to enter VMM mode (eg, VMXON)
745  */
746 void
start_vmm_on_cpu(struct cpu_info * ci)747 start_vmm_on_cpu(struct cpu_info *ci)
748 {
749 	uint64_t msr;
750 	uint32_t cr4;
751 	struct vmx_invept_descriptor vid;
752 
753 	/* No VMM mode? exit. */
754 	if ((ci->ci_vmm_flags & CI_VMM_VMX) == 0 &&
755 	    (ci->ci_vmm_flags & CI_VMM_SVM) == 0)
756 		return;
757 
758 	/*
759 	 * AMD SVM
760 	 */
761 	if (ci->ci_vmm_flags & CI_VMM_SVM) {
762 		msr = rdmsr(MSR_EFER);
763 		msr |= EFER_SVME;
764 		wrmsr(MSR_EFER, msr);
765 	}
766 
767 	/*
768 	 * Intel VMX
769 	 */
770 	if (ci->ci_vmm_flags & CI_VMM_VMX) {
771 		if (ci->ci_vmxon_region == 0)
772 			return;
773 		else {
774 			bzero(ci->ci_vmxon_region, PAGE_SIZE);
775 			ci->ci_vmxon_region->vr_revision =
776 			    ci->ci_vmm_cap.vcc_vmx.vmx_vmxon_revision;
777 
778 			/* Enable VMX */
779 			msr = rdmsr(MSR_IA32_FEATURE_CONTROL);
780 			if (msr & IA32_FEATURE_CONTROL_LOCK) {
781 				if (!(msr & IA32_FEATURE_CONTROL_VMX_EN))
782 					return;
783 			} else {
784 				msr |= IA32_FEATURE_CONTROL_VMX_EN |
785 				    IA32_FEATURE_CONTROL_LOCK;
786 				wrmsr(MSR_IA32_FEATURE_CONTROL, msr);
787 			}
788 
789 			/* Set CR4.VMXE */
790 			cr4 = rcr4();
791 			cr4 |= CR4_VMXE;
792 			lcr4(cr4);
793 
794 			/* Enter VMX mode and clear EPTs on this cpu */
795 			if (vmxon((uint64_t *)&ci->ci_vmxon_region_pa))
796 				panic("vmxon failed");
797 
798 			memset(&vid, 0, sizeof(vid));
799 			if (invept(IA32_VMX_INVEPT_GLOBAL_CTX, &vid))
800 				panic("invept failed");
801 		}
802 	}
803 
804 	atomic_setbits_int(&ci->ci_flags, CPUF_VMM);
805 }
806 
807 /*
808  * stop_vmm_on_cpu
809  *
810  * Stops VMM mode on 'ci' by executing the appropriate CPU-specific insn
811  * sequence to exit VMM mode (eg, VMXOFF)
812  */
813 void
stop_vmm_on_cpu(struct cpu_info * ci)814 stop_vmm_on_cpu(struct cpu_info *ci)
815 {
816 	uint64_t msr;
817 	uint32_t cr4;
818 
819 	if (!(ci->ci_flags & CPUF_VMM))
820 		return;
821 
822 	/*
823 	 * AMD SVM
824 	 */
825 	if (ci->ci_vmm_flags & CI_VMM_SVM) {
826 		msr = rdmsr(MSR_EFER);
827 		msr &= ~EFER_SVME;
828 		wrmsr(MSR_EFER, msr);
829 	}
830 
831 	/*
832 	 * Intel VMX
833 	 */
834 	if (ci->ci_vmm_flags & CI_VMM_VMX) {
835 		if (vmxoff())
836 			panic("VMXOFF failed");
837 
838 		cr4 = rcr4();
839 		cr4 &= ~CR4_VMXE;
840 		lcr4(cr4);
841 	}
842 
843 	atomic_clearbits_int(&ci->ci_flags, CPUF_VMM);
844 }
845 
846 /*
847  * vmclear_on_cpu
848  *
849  * Flush and clear VMCS on 'ci' by executing vmclear.
850  *
851  */
852 void
vmclear_on_cpu(struct cpu_info * ci)853 vmclear_on_cpu(struct cpu_info *ci)
854 {
855 	if ((ci->ci_flags & CPUF_VMM) && (ci->ci_vmm_flags & CI_VMM_VMX)) {
856 		if (vmclear(&ci->ci_vmcs_pa))
857 			panic("VMCLEAR ipi failed");
858 		atomic_swap_ulong(&ci->ci_vmcs_pa, VMX_VMCS_PA_CLEAR);
859 	}
860 }
861 
862 #ifdef MULTIPROCESSOR
863 static int
vmx_remote_vmclear(struct cpu_info * ci,struct vcpu * vcpu)864 vmx_remote_vmclear(struct cpu_info *ci, struct vcpu *vcpu)
865 {
866 #ifdef MP_LOCKDEBUG
867 	int nticks = __mp_lock_spinout;
868 #endif /* MP_LOCKDEBUG */
869 
870 	rw_enter_write(&ci->ci_vmcs_lock);
871 	atomic_swap_ulong(&ci->ci_vmcs_pa, vcpu->vc_control_pa);
872 	x86_send_ipi(ci, X86_IPI_VMCLEAR_VMM);
873 
874 	while (ci->ci_vmcs_pa != VMX_VMCS_PA_CLEAR) {
875 		CPU_BUSY_CYCLE();
876 #ifdef MP_LOCKDEBUG
877 		if (--nticks <= 0) {
878 			db_printf("%s: spun out\n", __func__);
879 			db_enter();
880 			nticks = __mp_lock_spinout;
881 		}
882 #endif /* MP_LOCKDEBUG */
883 	}
884 	atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_CLEARED);
885 	rw_exit_write(&ci->ci_vmcs_lock);
886 
887 	return (0);
888 }
889 #endif /* MULTIPROCESSOR */
890 
891 /*
892  * vm_impl_init
893  *
894  * VM address space initialization routine
895  *
896  * Parameters:
897  *  vm: the VM being initialized
898  *   p: vmd process owning the VM
899  *
900  * Return values:
901  *  0: the initialization was successful
902  *  EINVAL: unsupported vmm mode
903  *  ENOMEM: the initialization failed (lack of resources)
904  */
905 int
vm_impl_init(struct vm * vm,struct proc * p)906 vm_impl_init(struct vm *vm, struct proc *p)
907 {
908 	int i, mode, ret;
909 	vaddr_t mingpa, maxgpa;
910 	struct vm_mem_range *vmr;
911 
912 	/* If not EPT or RVI, nothing to do here */
913 	switch (vmm_softc->mode) {
914 	case VMM_MODE_EPT:
915 		mode = PMAP_TYPE_EPT;
916 		break;
917 	case VMM_MODE_RVI:
918 		mode = PMAP_TYPE_RVI;
919 		break;
920 	default:
921 		printf("%s: invalid vmm mode %d\n", __func__, vmm_softc->mode);
922 		return (EINVAL);
923 	}
924 
925 	vmr = &vm->vm_memranges[0];
926 	mingpa = vmr->vmr_gpa;
927 	vmr = &vm->vm_memranges[vm->vm_nmemranges - 1];
928 	maxgpa = vmr->vmr_gpa + vmr->vmr_size;
929 
930 	/*
931 	 * uvmspace_alloc (currently) always returns a valid vmspace
932 	 */
933 	vm->vm_vmspace = uvmspace_alloc(mingpa, maxgpa, TRUE, FALSE);
934 	vm->vm_map = &vm->vm_vmspace->vm_map;
935 
936 	/* Map the new map with an anon */
937 	DPRINTF("%s: created vm_map @ %p\n", __func__, vm->vm_map);
938 	for (i = 0; i < vm->vm_nmemranges; i++) {
939 		vmr = &vm->vm_memranges[i];
940 		ret = uvm_share(vm->vm_map, vmr->vmr_gpa,
941 		    PROT_READ | PROT_WRITE | PROT_EXEC,
942 		    &p->p_vmspace->vm_map, vmr->vmr_va, vmr->vmr_size);
943 		if (ret) {
944 			printf("%s: uvm_share failed (%d)\n", __func__, ret);
945 			/* uvmspace_free calls pmap_destroy for us */
946 			KERNEL_LOCK();
947 			uvmspace_free(vm->vm_vmspace);
948 			vm->vm_vmspace = NULL;
949 			KERNEL_UNLOCK();
950 			return (ENOMEM);
951 		}
952 	}
953 
954 	pmap_convert(vm->vm_map->pmap, mode);
955 
956 	return (0);
957 }
958 
959 void
vm_impl_deinit(struct vm * vm)960 vm_impl_deinit(struct vm *vm)
961 {
962 	/* unused */
963 }
964 
965 /*
966  * vcpu_reload_vmcs_vmx
967  *
968  * (Re)load the VMCS on the current cpu. Must be called with the VMCS write
969  * lock acquired. If the VMCS is determined to be loaded on a remote cpu, an
970  * ipi will be used to remotely flush it before loading the VMCS locally.
971  *
972  * Parameters:
973  *  vcpu: Pointer to the vcpu needing its VMCS
974  *
975  * Return values:
976  *  0: if successful
977  *  EINVAL: an error occurred during flush or reload
978  */
979 int
vcpu_reload_vmcs_vmx(struct vcpu * vcpu)980 vcpu_reload_vmcs_vmx(struct vcpu *vcpu)
981 {
982 	struct cpu_info *ci, *last_ci;
983 
984 	rw_assert_wrlock(&vcpu->vc_lock);
985 
986 	ci = curcpu();
987 	last_ci = vcpu->vc_last_pcpu;
988 
989 	if (last_ci == NULL) {
990 		/* First launch */
991 		if (vmclear(&vcpu->vc_control_pa))
992 				return (EINVAL);
993 		atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_CLEARED);
994 #ifdef MULTIPROCESSOR
995 	} else if (last_ci != ci) {
996 		/* We've moved CPUs at some point, so remote VMCLEAR */
997 		if (vmx_remote_vmclear(last_ci, vcpu))
998 			return (EINVAL);
999 		KASSERT(vcpu->vc_vmx_vmcs_state == VMCS_CLEARED);
1000 #endif /* MULTIPROCESSOR */
1001 	}
1002 
1003 	if (vmptrld(&vcpu->vc_control_pa)) {
1004 		printf("%s: vmptrld\n", __func__);
1005 		return (EINVAL);
1006 	}
1007 
1008 	return (0);
1009 }
1010 
1011 /*
1012  * vcpu_readregs_vmx
1013  *
1014  * Reads 'vcpu's registers
1015  *
1016  * Parameters:
1017  *  vcpu: the vcpu to read register values from
1018  *  regmask: the types of registers to read
1019  *  loadvmcs: bit to indicate whether the VMCS has to be loaded first
1020  *  vrs: output parameter where register values are stored
1021  *
1022  * Return values:
1023  *  0: if successful
1024  *  EINVAL: an error reading registers occurred
1025  */
1026 int
vcpu_readregs_vmx(struct vcpu * vcpu,uint64_t regmask,int loadvmcs,struct vcpu_reg_state * vrs)1027 vcpu_readregs_vmx(struct vcpu *vcpu, uint64_t regmask, int loadvmcs,
1028     struct vcpu_reg_state *vrs)
1029 {
1030 	int i, ret = 0;
1031 	uint64_t sel, limit, ar;
1032 	uint64_t *gprs = vrs->vrs_gprs;
1033 	uint64_t *crs = vrs->vrs_crs;
1034 	uint64_t *msrs = vrs->vrs_msrs;
1035 	uint64_t *drs = vrs->vrs_drs;
1036 	struct vcpu_segment_info *sregs = vrs->vrs_sregs;
1037 	struct vmx_msr_store *msr_store;
1038 
1039 	if (loadvmcs) {
1040 		if (vcpu_reload_vmcs_vmx(vcpu))
1041 			return (EINVAL);
1042 	}
1043 
1044 #ifdef VMM_DEBUG
1045 	/* VMCS should be loaded... */
1046 	paddr_t pa = 0ULL;
1047 	if (vmptrst(&pa))
1048 		panic("%s: vmptrst", __func__);
1049 	KASSERT(pa == vcpu->vc_control_pa);
1050 #endif /* VMM_DEBUG */
1051 
1052 	if (regmask & VM_RWREGS_GPRS) {
1053 		gprs[VCPU_REGS_RAX] = vcpu->vc_gueststate.vg_rax;
1054 		gprs[VCPU_REGS_RBX] = vcpu->vc_gueststate.vg_rbx;
1055 		gprs[VCPU_REGS_RCX] = vcpu->vc_gueststate.vg_rcx;
1056 		gprs[VCPU_REGS_RDX] = vcpu->vc_gueststate.vg_rdx;
1057 		gprs[VCPU_REGS_RSI] = vcpu->vc_gueststate.vg_rsi;
1058 		gprs[VCPU_REGS_RDI] = vcpu->vc_gueststate.vg_rdi;
1059 		gprs[VCPU_REGS_R8] = vcpu->vc_gueststate.vg_r8;
1060 		gprs[VCPU_REGS_R9] = vcpu->vc_gueststate.vg_r9;
1061 		gprs[VCPU_REGS_R10] = vcpu->vc_gueststate.vg_r10;
1062 		gprs[VCPU_REGS_R11] = vcpu->vc_gueststate.vg_r11;
1063 		gprs[VCPU_REGS_R12] = vcpu->vc_gueststate.vg_r12;
1064 		gprs[VCPU_REGS_R13] = vcpu->vc_gueststate.vg_r13;
1065 		gprs[VCPU_REGS_R14] = vcpu->vc_gueststate.vg_r14;
1066 		gprs[VCPU_REGS_R15] = vcpu->vc_gueststate.vg_r15;
1067 		gprs[VCPU_REGS_RBP] = vcpu->vc_gueststate.vg_rbp;
1068 		gprs[VCPU_REGS_RIP] = vcpu->vc_gueststate.vg_rip;
1069 		if (vmread(VMCS_GUEST_IA32_RSP, &gprs[VCPU_REGS_RSP]))
1070 			goto errout;
1071                 if (vmread(VMCS_GUEST_IA32_RFLAGS, &gprs[VCPU_REGS_RFLAGS]))
1072 			goto errout;
1073         }
1074 
1075 	if (regmask & VM_RWREGS_SREGS) {
1076 		for (i = 0; i < nitems(vmm_vmx_sreg_vmcs_fields); i++) {
1077 			if (vmread(vmm_vmx_sreg_vmcs_fields[i].selid, &sel))
1078 				goto errout;
1079 			if (vmread(vmm_vmx_sreg_vmcs_fields[i].limitid, &limit))
1080 				goto errout;
1081 			if (vmread(vmm_vmx_sreg_vmcs_fields[i].arid, &ar))
1082 				goto errout;
1083 			if (vmread(vmm_vmx_sreg_vmcs_fields[i].baseid,
1084 			   &sregs[i].vsi_base))
1085 				goto errout;
1086 
1087 			sregs[i].vsi_sel = sel;
1088 			sregs[i].vsi_limit = limit;
1089 			sregs[i].vsi_ar = ar;
1090 		}
1091 
1092 		if (vmread(VMCS_GUEST_IA32_GDTR_LIMIT, &limit))
1093 			goto errout;
1094 		if (vmread(VMCS_GUEST_IA32_GDTR_BASE,
1095 		    &vrs->vrs_gdtr.vsi_base))
1096 			goto errout;
1097 		vrs->vrs_gdtr.vsi_limit = limit;
1098 
1099 		if (vmread(VMCS_GUEST_IA32_IDTR_LIMIT, &limit))
1100 			goto errout;
1101 		if (vmread(VMCS_GUEST_IA32_IDTR_BASE,
1102 		    &vrs->vrs_idtr.vsi_base))
1103 			goto errout;
1104 		vrs->vrs_idtr.vsi_limit = limit;
1105 	}
1106 
1107 	if (regmask & VM_RWREGS_CRS) {
1108 		crs[VCPU_REGS_CR2] = vcpu->vc_gueststate.vg_cr2;
1109 		crs[VCPU_REGS_XCR0] = vcpu->vc_gueststate.vg_xcr0;
1110 		if (vmread(VMCS_GUEST_IA32_CR0, &crs[VCPU_REGS_CR0]))
1111 			goto errout;
1112 		if (vmread(VMCS_GUEST_IA32_CR3, &crs[VCPU_REGS_CR3]))
1113 			goto errout;
1114 		if (vmread(VMCS_GUEST_IA32_CR4, &crs[VCPU_REGS_CR4]))
1115 			goto errout;
1116 		if (vmread(VMCS_GUEST_PDPTE0, &crs[VCPU_REGS_PDPTE0]))
1117 			goto errout;
1118 		if (vmread(VMCS_GUEST_PDPTE1, &crs[VCPU_REGS_PDPTE1]))
1119 			goto errout;
1120 		if (vmread(VMCS_GUEST_PDPTE2, &crs[VCPU_REGS_PDPTE2]))
1121 			goto errout;
1122 		if (vmread(VMCS_GUEST_PDPTE3, &crs[VCPU_REGS_PDPTE3]))
1123 			goto errout;
1124 	}
1125 
1126 	msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
1127 
1128 	if (regmask & VM_RWREGS_MSRS) {
1129 		for (i = 0; i < VCPU_REGS_NMSRS; i++) {
1130 			msrs[i] = msr_store[i].vms_data;
1131 		}
1132 	}
1133 
1134 	if (regmask & VM_RWREGS_DRS) {
1135 		drs[VCPU_REGS_DR0] = vcpu->vc_gueststate.vg_dr0;
1136 		drs[VCPU_REGS_DR1] = vcpu->vc_gueststate.vg_dr1;
1137 		drs[VCPU_REGS_DR2] = vcpu->vc_gueststate.vg_dr2;
1138 		drs[VCPU_REGS_DR3] = vcpu->vc_gueststate.vg_dr3;
1139 		drs[VCPU_REGS_DR6] = vcpu->vc_gueststate.vg_dr6;
1140 		if (vmread(VMCS_GUEST_IA32_DR7, &drs[VCPU_REGS_DR7]))
1141 			goto errout;
1142 	}
1143 
1144 	goto out;
1145 
1146 errout:
1147 	ret = EINVAL;
1148 out:
1149 	return (ret);
1150 }
1151 
1152 /*
1153  * vcpu_readregs_svm
1154  *
1155  * Reads 'vcpu's registers
1156  *
1157  * Parameters:
1158  *  vcpu: the vcpu to read register values from
1159  *  regmask: the types of registers to read
1160  *  vrs: output parameter where register values are stored
1161  *
1162  * Return values:
1163  *  0: if successful
1164  */
1165 int
vcpu_readregs_svm(struct vcpu * vcpu,uint64_t regmask,struct vcpu_reg_state * vrs)1166 vcpu_readregs_svm(struct vcpu *vcpu, uint64_t regmask,
1167     struct vcpu_reg_state *vrs)
1168 {
1169 	uint64_t *gprs = vrs->vrs_gprs;
1170 	uint64_t *crs = vrs->vrs_crs;
1171 	uint64_t *msrs = vrs->vrs_msrs;
1172 	uint64_t *drs = vrs->vrs_drs;
1173 	uint32_t attr;
1174 	struct vcpu_segment_info *sregs = vrs->vrs_sregs;
1175 	struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
1176 
1177 	if (regmask & VM_RWREGS_GPRS) {
1178 		gprs[VCPU_REGS_RAX] = vmcb->v_rax;
1179 		gprs[VCPU_REGS_RBX] = vcpu->vc_gueststate.vg_rbx;
1180 		gprs[VCPU_REGS_RCX] = vcpu->vc_gueststate.vg_rcx;
1181 		gprs[VCPU_REGS_RDX] = vcpu->vc_gueststate.vg_rdx;
1182 		gprs[VCPU_REGS_RSI] = vcpu->vc_gueststate.vg_rsi;
1183 		gprs[VCPU_REGS_RDI] = vcpu->vc_gueststate.vg_rdi;
1184 		gprs[VCPU_REGS_R8] = vcpu->vc_gueststate.vg_r8;
1185 		gprs[VCPU_REGS_R9] = vcpu->vc_gueststate.vg_r9;
1186 		gprs[VCPU_REGS_R10] = vcpu->vc_gueststate.vg_r10;
1187 		gprs[VCPU_REGS_R11] = vcpu->vc_gueststate.vg_r11;
1188 		gprs[VCPU_REGS_R12] = vcpu->vc_gueststate.vg_r12;
1189 		gprs[VCPU_REGS_R13] = vcpu->vc_gueststate.vg_r13;
1190 		gprs[VCPU_REGS_R14] = vcpu->vc_gueststate.vg_r14;
1191 		gprs[VCPU_REGS_R15] = vcpu->vc_gueststate.vg_r15;
1192 		gprs[VCPU_REGS_RBP] = vcpu->vc_gueststate.vg_rbp;
1193 		gprs[VCPU_REGS_RIP] = vmcb->v_rip;
1194 		gprs[VCPU_REGS_RSP] = vmcb->v_rsp;
1195 		gprs[VCPU_REGS_RFLAGS] = vmcb->v_rflags;
1196 	}
1197 
1198 	if (regmask & VM_RWREGS_SREGS) {
1199 		sregs[VCPU_REGS_CS].vsi_sel = vmcb->v_cs.vs_sel;
1200 		sregs[VCPU_REGS_CS].vsi_limit = vmcb->v_cs.vs_lim;
1201 		attr = vmcb->v_cs.vs_attr;
1202 		sregs[VCPU_REGS_CS].vsi_ar = (attr & 0xff) | ((attr << 4) &
1203 		    0xf000);
1204 		sregs[VCPU_REGS_CS].vsi_base = vmcb->v_cs.vs_base;
1205 
1206 		sregs[VCPU_REGS_DS].vsi_sel = vmcb->v_ds.vs_sel;
1207 		sregs[VCPU_REGS_DS].vsi_limit = vmcb->v_ds.vs_lim;
1208 		attr = vmcb->v_ds.vs_attr;
1209 		sregs[VCPU_REGS_DS].vsi_ar = (attr & 0xff) | ((attr << 4) &
1210 		    0xf000);
1211 		sregs[VCPU_REGS_DS].vsi_base = vmcb->v_ds.vs_base;
1212 
1213 		sregs[VCPU_REGS_ES].vsi_sel = vmcb->v_es.vs_sel;
1214 		sregs[VCPU_REGS_ES].vsi_limit = vmcb->v_es.vs_lim;
1215 		attr = vmcb->v_es.vs_attr;
1216 		sregs[VCPU_REGS_ES].vsi_ar = (attr & 0xff) | ((attr << 4) &
1217 		    0xf000);
1218 		sregs[VCPU_REGS_ES].vsi_base = vmcb->v_es.vs_base;
1219 
1220 		sregs[VCPU_REGS_FS].vsi_sel = vmcb->v_fs.vs_sel;
1221 		sregs[VCPU_REGS_FS].vsi_limit = vmcb->v_fs.vs_lim;
1222 		attr = vmcb->v_fs.vs_attr;
1223 		sregs[VCPU_REGS_FS].vsi_ar = (attr & 0xff) | ((attr << 4) &
1224 		    0xf000);
1225 		sregs[VCPU_REGS_FS].vsi_base = vmcb->v_fs.vs_base;
1226 
1227 		sregs[VCPU_REGS_GS].vsi_sel = vmcb->v_gs.vs_sel;
1228 		sregs[VCPU_REGS_GS].vsi_limit = vmcb->v_gs.vs_lim;
1229 		attr = vmcb->v_gs.vs_attr;
1230 		sregs[VCPU_REGS_GS].vsi_ar = (attr & 0xff) | ((attr << 4) &
1231 		    0xf000);
1232 		sregs[VCPU_REGS_GS].vsi_base = vmcb->v_gs.vs_base;
1233 
1234 		sregs[VCPU_REGS_SS].vsi_sel = vmcb->v_ss.vs_sel;
1235 		sregs[VCPU_REGS_SS].vsi_limit = vmcb->v_ss.vs_lim;
1236 		attr = vmcb->v_ss.vs_attr;
1237 		sregs[VCPU_REGS_SS].vsi_ar = (attr & 0xff) | ((attr << 4) &
1238 		    0xf000);
1239 		sregs[VCPU_REGS_SS].vsi_base = vmcb->v_ss.vs_base;
1240 
1241 		sregs[VCPU_REGS_LDTR].vsi_sel = vmcb->v_ldtr.vs_sel;
1242 		sregs[VCPU_REGS_LDTR].vsi_limit = vmcb->v_ldtr.vs_lim;
1243 		attr = vmcb->v_ldtr.vs_attr;
1244 		sregs[VCPU_REGS_LDTR].vsi_ar = (attr & 0xff) | ((attr << 4)
1245 		    & 0xf000);
1246 		sregs[VCPU_REGS_LDTR].vsi_base = vmcb->v_ldtr.vs_base;
1247 
1248 		sregs[VCPU_REGS_TR].vsi_sel = vmcb->v_tr.vs_sel;
1249 		sregs[VCPU_REGS_TR].vsi_limit = vmcb->v_tr.vs_lim;
1250 		attr = vmcb->v_tr.vs_attr;
1251 		sregs[VCPU_REGS_TR].vsi_ar = (attr & 0xff) | ((attr << 4) &
1252 		    0xf000);
1253 		sregs[VCPU_REGS_TR].vsi_base = vmcb->v_tr.vs_base;
1254 
1255 		vrs->vrs_gdtr.vsi_limit = vmcb->v_gdtr.vs_lim;
1256 		vrs->vrs_gdtr.vsi_base = vmcb->v_gdtr.vs_base;
1257 		vrs->vrs_idtr.vsi_limit = vmcb->v_idtr.vs_lim;
1258 		vrs->vrs_idtr.vsi_base = vmcb->v_idtr.vs_base;
1259 	}
1260 
1261 	if (regmask & VM_RWREGS_CRS) {
1262 		crs[VCPU_REGS_CR0] = vmcb->v_cr0;
1263 		crs[VCPU_REGS_CR3] = vmcb->v_cr3;
1264 		crs[VCPU_REGS_CR4] = vmcb->v_cr4;
1265 		crs[VCPU_REGS_CR2] = vcpu->vc_gueststate.vg_cr2;
1266 		crs[VCPU_REGS_XCR0] = vcpu->vc_gueststate.vg_xcr0;
1267 	}
1268 
1269 	if (regmask & VM_RWREGS_MSRS) {
1270 		 msrs[VCPU_REGS_EFER] = vmcb->v_efer;
1271 		 msrs[VCPU_REGS_STAR] = vmcb->v_star;
1272 		 msrs[VCPU_REGS_LSTAR] = vmcb->v_lstar;
1273 		 msrs[VCPU_REGS_CSTAR] = vmcb->v_cstar;
1274 		 msrs[VCPU_REGS_SFMASK] = vmcb->v_sfmask;
1275 		 msrs[VCPU_REGS_KGSBASE] = vmcb->v_kgsbase;
1276 	}
1277 
1278 	if (regmask & VM_RWREGS_DRS) {
1279 		drs[VCPU_REGS_DR0] = vcpu->vc_gueststate.vg_dr0;
1280 		drs[VCPU_REGS_DR1] = vcpu->vc_gueststate.vg_dr1;
1281 		drs[VCPU_REGS_DR2] = vcpu->vc_gueststate.vg_dr2;
1282 		drs[VCPU_REGS_DR3] = vcpu->vc_gueststate.vg_dr3;
1283 		drs[VCPU_REGS_DR6] = vmcb->v_dr6;
1284 		drs[VCPU_REGS_DR7] = vmcb->v_dr7;
1285 	}
1286 
1287 	return (0);
1288 }
1289 
1290 /*
1291  * vcpu_writeregs_vmx
1292  *
1293  * Writes VCPU registers
1294  *
1295  * Parameters:
1296  *  vcpu: the vcpu that has to get its registers written to
1297  *  regmask: the types of registers to write
1298  *  loadvmcs: bit to indicate whether the VMCS has to be loaded first
1299  *  vrs: the register values to write
1300  *
1301  * Return values:
1302  *  0: if successful
1303  *  EINVAL an error writing registers occurred
1304  */
1305 int
vcpu_writeregs_vmx(struct vcpu * vcpu,uint64_t regmask,int loadvmcs,struct vcpu_reg_state * vrs)1306 vcpu_writeregs_vmx(struct vcpu *vcpu, uint64_t regmask, int loadvmcs,
1307     struct vcpu_reg_state *vrs)
1308 {
1309 	int i, ret = 0;
1310 	uint16_t sel;
1311 	uint64_t limit, ar;
1312 	uint64_t *gprs = vrs->vrs_gprs;
1313 	uint64_t *crs = vrs->vrs_crs;
1314 	uint64_t *msrs = vrs->vrs_msrs;
1315 	uint64_t *drs = vrs->vrs_drs;
1316 	struct vcpu_segment_info *sregs = vrs->vrs_sregs;
1317 	struct vmx_msr_store *msr_store;
1318 
1319 	if (loadvmcs) {
1320 		if (vcpu_reload_vmcs_vmx(vcpu))
1321 			return (EINVAL);
1322 	}
1323 
1324 #ifdef VMM_DEBUG
1325 	/* VMCS should be loaded... */
1326 	paddr_t pa = 0ULL;
1327 	if (vmptrst(&pa))
1328 		panic("%s: vmptrst", __func__);
1329 	KASSERT(pa == vcpu->vc_control_pa);
1330 #endif /* VMM_DEBUG */
1331 
1332 	if (regmask & VM_RWREGS_GPRS) {
1333 		vcpu->vc_gueststate.vg_rax = gprs[VCPU_REGS_RAX];
1334 		vcpu->vc_gueststate.vg_rbx = gprs[VCPU_REGS_RBX];
1335 		vcpu->vc_gueststate.vg_rcx = gprs[VCPU_REGS_RCX];
1336 		vcpu->vc_gueststate.vg_rdx = gprs[VCPU_REGS_RDX];
1337 		vcpu->vc_gueststate.vg_rsi = gprs[VCPU_REGS_RSI];
1338 		vcpu->vc_gueststate.vg_rdi = gprs[VCPU_REGS_RDI];
1339 		vcpu->vc_gueststate.vg_r8 = gprs[VCPU_REGS_R8];
1340 		vcpu->vc_gueststate.vg_r9 = gprs[VCPU_REGS_R9];
1341 		vcpu->vc_gueststate.vg_r10 = gprs[VCPU_REGS_R10];
1342 		vcpu->vc_gueststate.vg_r11 = gprs[VCPU_REGS_R11];
1343 		vcpu->vc_gueststate.vg_r12 = gprs[VCPU_REGS_R12];
1344 		vcpu->vc_gueststate.vg_r13 = gprs[VCPU_REGS_R13];
1345 		vcpu->vc_gueststate.vg_r14 = gprs[VCPU_REGS_R14];
1346 		vcpu->vc_gueststate.vg_r15 = gprs[VCPU_REGS_R15];
1347 		vcpu->vc_gueststate.vg_rbp = gprs[VCPU_REGS_RBP];
1348 		vcpu->vc_gueststate.vg_rip = gprs[VCPU_REGS_RIP];
1349 		if (vmwrite(VMCS_GUEST_IA32_RIP, gprs[VCPU_REGS_RIP]))
1350 			goto errout;
1351 		if (vmwrite(VMCS_GUEST_IA32_RSP, gprs[VCPU_REGS_RSP]))
1352 			goto errout;
1353                 if (vmwrite(VMCS_GUEST_IA32_RFLAGS, gprs[VCPU_REGS_RFLAGS]))
1354 			goto errout;
1355 	}
1356 
1357 	if (regmask & VM_RWREGS_SREGS) {
1358 		for (i = 0; i < nitems(vmm_vmx_sreg_vmcs_fields); i++) {
1359 			sel = sregs[i].vsi_sel;
1360 			limit = sregs[i].vsi_limit;
1361 			ar = sregs[i].vsi_ar;
1362 
1363 			if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].selid, sel))
1364 				goto errout;
1365 			if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].limitid, limit))
1366 				goto errout;
1367 			if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].arid, ar))
1368 				goto errout;
1369 			if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].baseid,
1370 			    sregs[i].vsi_base))
1371 				goto errout;
1372 		}
1373 
1374 		if (vmwrite(VMCS_GUEST_IA32_GDTR_LIMIT,
1375 		    vrs->vrs_gdtr.vsi_limit))
1376 			goto errout;
1377 		if (vmwrite(VMCS_GUEST_IA32_GDTR_BASE,
1378 		    vrs->vrs_gdtr.vsi_base))
1379 			goto errout;
1380 		if (vmwrite(VMCS_GUEST_IA32_IDTR_LIMIT,
1381 		    vrs->vrs_idtr.vsi_limit))
1382 			goto errout;
1383 		if (vmwrite(VMCS_GUEST_IA32_IDTR_BASE,
1384 		    vrs->vrs_idtr.vsi_base))
1385 			goto errout;
1386 	}
1387 
1388 	if (regmask & VM_RWREGS_CRS) {
1389 		vcpu->vc_gueststate.vg_xcr0 = crs[VCPU_REGS_XCR0];
1390 		if (vmwrite(VMCS_GUEST_IA32_CR0, crs[VCPU_REGS_CR0]))
1391 			goto errout;
1392 		if (vmwrite(VMCS_GUEST_IA32_CR3, crs[VCPU_REGS_CR3]))
1393 			goto errout;
1394 		if (vmwrite(VMCS_GUEST_IA32_CR4, crs[VCPU_REGS_CR4]))
1395 			goto errout;
1396 		if (vmwrite(VMCS_GUEST_PDPTE0, crs[VCPU_REGS_PDPTE0]))
1397 			goto errout;
1398 		if (vmwrite(VMCS_GUEST_PDPTE1, crs[VCPU_REGS_PDPTE1]))
1399 			goto errout;
1400 		if (vmwrite(VMCS_GUEST_PDPTE2, crs[VCPU_REGS_PDPTE2]))
1401 			goto errout;
1402 		if (vmwrite(VMCS_GUEST_PDPTE3, crs[VCPU_REGS_PDPTE3]))
1403 			goto errout;
1404 	}
1405 
1406 	msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
1407 
1408 	if (regmask & VM_RWREGS_MSRS) {
1409 		for (i = 0; i < VCPU_REGS_NMSRS; i++) {
1410 			msr_store[i].vms_data = msrs[i];
1411 		}
1412 	}
1413 
1414 	if (regmask & VM_RWREGS_DRS) {
1415 		vcpu->vc_gueststate.vg_dr0 = drs[VCPU_REGS_DR0];
1416 		vcpu->vc_gueststate.vg_dr1 = drs[VCPU_REGS_DR1];
1417 		vcpu->vc_gueststate.vg_dr2 = drs[VCPU_REGS_DR2];
1418 		vcpu->vc_gueststate.vg_dr3 = drs[VCPU_REGS_DR3];
1419 		vcpu->vc_gueststate.vg_dr6 = drs[VCPU_REGS_DR6];
1420 		if (vmwrite(VMCS_GUEST_IA32_DR7, drs[VCPU_REGS_DR7]))
1421 			goto errout;
1422 	}
1423 
1424 	goto out;
1425 
1426 errout:
1427 	ret = EINVAL;
1428 out:
1429 	if (loadvmcs) {
1430 		if (vmclear(&vcpu->vc_control_pa))
1431 			ret = EINVAL;
1432 		atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_CLEARED);
1433 	}
1434 	return (ret);
1435 }
1436 
1437 /*
1438  * vcpu_writeregs_svm
1439  *
1440  * Writes 'vcpu's registers
1441  *
1442  * Parameters:
1443  *  vcpu: the vcpu that has to get its registers written to
1444  *  regmask: the types of registers to write
1445  *  vrs: the register values to write
1446  *
1447  * Return values:
1448  *  0: if successful
1449  *  EINVAL an error writing registers occurred
1450  */
1451 int
vcpu_writeregs_svm(struct vcpu * vcpu,uint64_t regmask,struct vcpu_reg_state * vrs)1452 vcpu_writeregs_svm(struct vcpu *vcpu, uint64_t regmask,
1453     struct vcpu_reg_state *vrs)
1454 {
1455 	uint64_t *gprs = vrs->vrs_gprs;
1456 	uint64_t *crs = vrs->vrs_crs;
1457 	uint16_t attr;
1458 	uint64_t *msrs = vrs->vrs_msrs;
1459 	uint64_t *drs = vrs->vrs_drs;
1460 	struct vcpu_segment_info *sregs = vrs->vrs_sregs;
1461 	struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
1462 
1463 	if (regmask & VM_RWREGS_GPRS) {
1464 		vcpu->vc_gueststate.vg_rax = gprs[VCPU_REGS_RAX];
1465 		vcpu->vc_gueststate.vg_rbx = gprs[VCPU_REGS_RBX];
1466 		vcpu->vc_gueststate.vg_rcx = gprs[VCPU_REGS_RCX];
1467 		vcpu->vc_gueststate.vg_rdx = gprs[VCPU_REGS_RDX];
1468 		vcpu->vc_gueststate.vg_rsi = gprs[VCPU_REGS_RSI];
1469 		vcpu->vc_gueststate.vg_rdi = gprs[VCPU_REGS_RDI];
1470 		vcpu->vc_gueststate.vg_r8 = gprs[VCPU_REGS_R8];
1471 		vcpu->vc_gueststate.vg_r9 = gprs[VCPU_REGS_R9];
1472 		vcpu->vc_gueststate.vg_r10 = gprs[VCPU_REGS_R10];
1473 		vcpu->vc_gueststate.vg_r11 = gprs[VCPU_REGS_R11];
1474 		vcpu->vc_gueststate.vg_r12 = gprs[VCPU_REGS_R12];
1475 		vcpu->vc_gueststate.vg_r13 = gprs[VCPU_REGS_R13];
1476 		vcpu->vc_gueststate.vg_r14 = gprs[VCPU_REGS_R14];
1477 		vcpu->vc_gueststate.vg_r15 = gprs[VCPU_REGS_R15];
1478 		vcpu->vc_gueststate.vg_rbp = gprs[VCPU_REGS_RBP];
1479 		vcpu->vc_gueststate.vg_rip = gprs[VCPU_REGS_RIP];
1480 
1481 		vmcb->v_rax = gprs[VCPU_REGS_RAX];
1482 		vmcb->v_rip = gprs[VCPU_REGS_RIP];
1483 		vmcb->v_rsp = gprs[VCPU_REGS_RSP];
1484 		vmcb->v_rflags = gprs[VCPU_REGS_RFLAGS];
1485 	}
1486 
1487 	if (regmask & VM_RWREGS_SREGS) {
1488 		vmcb->v_cs.vs_sel = sregs[VCPU_REGS_CS].vsi_sel;
1489 		vmcb->v_cs.vs_lim = sregs[VCPU_REGS_CS].vsi_limit;
1490 		attr = sregs[VCPU_REGS_CS].vsi_ar;
1491 		vmcb->v_cs.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
1492 		vmcb->v_cs.vs_base = sregs[VCPU_REGS_CS].vsi_base;
1493 		vmcb->v_ds.vs_sel = sregs[VCPU_REGS_DS].vsi_sel;
1494 		vmcb->v_ds.vs_lim = sregs[VCPU_REGS_DS].vsi_limit;
1495 		attr = sregs[VCPU_REGS_DS].vsi_ar;
1496 		vmcb->v_ds.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
1497 		vmcb->v_ds.vs_base = sregs[VCPU_REGS_DS].vsi_base;
1498 		vmcb->v_es.vs_sel = sregs[VCPU_REGS_ES].vsi_sel;
1499 		vmcb->v_es.vs_lim = sregs[VCPU_REGS_ES].vsi_limit;
1500 		attr = sregs[VCPU_REGS_ES].vsi_ar;
1501 		vmcb->v_es.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
1502 		vmcb->v_es.vs_base = sregs[VCPU_REGS_ES].vsi_base;
1503 		vmcb->v_fs.vs_sel = sregs[VCPU_REGS_FS].vsi_sel;
1504 		vmcb->v_fs.vs_lim = sregs[VCPU_REGS_FS].vsi_limit;
1505 		attr = sregs[VCPU_REGS_FS].vsi_ar;
1506 		vmcb->v_fs.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
1507 		vmcb->v_fs.vs_base = sregs[VCPU_REGS_FS].vsi_base;
1508 		vmcb->v_gs.vs_sel = sregs[VCPU_REGS_GS].vsi_sel;
1509 		vmcb->v_gs.vs_lim = sregs[VCPU_REGS_GS].vsi_limit;
1510 		attr = sregs[VCPU_REGS_GS].vsi_ar;
1511 		vmcb->v_gs.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
1512 		vmcb->v_gs.vs_base = sregs[VCPU_REGS_GS].vsi_base;
1513 		vmcb->v_ss.vs_sel = sregs[VCPU_REGS_SS].vsi_sel;
1514 		vmcb->v_ss.vs_lim = sregs[VCPU_REGS_SS].vsi_limit;
1515 		attr = sregs[VCPU_REGS_SS].vsi_ar;
1516 		vmcb->v_ss.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
1517 		vmcb->v_ss.vs_base = sregs[VCPU_REGS_SS].vsi_base;
1518 		vmcb->v_ldtr.vs_sel = sregs[VCPU_REGS_LDTR].vsi_sel;
1519 		vmcb->v_ldtr.vs_lim = sregs[VCPU_REGS_LDTR].vsi_limit;
1520 		attr = sregs[VCPU_REGS_LDTR].vsi_ar;
1521 		vmcb->v_ldtr.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
1522 		vmcb->v_ldtr.vs_base = sregs[VCPU_REGS_LDTR].vsi_base;
1523 		vmcb->v_tr.vs_sel = sregs[VCPU_REGS_TR].vsi_sel;
1524 		vmcb->v_tr.vs_lim = sregs[VCPU_REGS_TR].vsi_limit;
1525 		attr = sregs[VCPU_REGS_TR].vsi_ar;
1526 		vmcb->v_tr.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
1527 		vmcb->v_tr.vs_base = sregs[VCPU_REGS_TR].vsi_base;
1528 		vmcb->v_gdtr.vs_lim = vrs->vrs_gdtr.vsi_limit;
1529 		vmcb->v_gdtr.vs_base = vrs->vrs_gdtr.vsi_base;
1530 		vmcb->v_idtr.vs_lim = vrs->vrs_idtr.vsi_limit;
1531 		vmcb->v_idtr.vs_base = vrs->vrs_idtr.vsi_base;
1532 	}
1533 
1534 	if (regmask & VM_RWREGS_CRS) {
1535 		vmcb->v_cr0 = crs[VCPU_REGS_CR0];
1536 		vmcb->v_cr3 = crs[VCPU_REGS_CR3];
1537 		vmcb->v_cr4 = crs[VCPU_REGS_CR4];
1538 		vcpu->vc_gueststate.vg_cr2 = crs[VCPU_REGS_CR2];
1539 		vcpu->vc_gueststate.vg_xcr0 = crs[VCPU_REGS_XCR0];
1540 	}
1541 
1542 	if (regmask & VM_RWREGS_MSRS) {
1543 		vmcb->v_efer |= msrs[VCPU_REGS_EFER];
1544 		vmcb->v_star = msrs[VCPU_REGS_STAR];
1545 		vmcb->v_lstar = msrs[VCPU_REGS_LSTAR];
1546 		vmcb->v_cstar = msrs[VCPU_REGS_CSTAR];
1547 		vmcb->v_sfmask = msrs[VCPU_REGS_SFMASK];
1548 		vmcb->v_kgsbase = msrs[VCPU_REGS_KGSBASE];
1549 	}
1550 
1551 	if (regmask & VM_RWREGS_DRS) {
1552 		vcpu->vc_gueststate.vg_dr0 = drs[VCPU_REGS_DR0];
1553 		vcpu->vc_gueststate.vg_dr1 = drs[VCPU_REGS_DR1];
1554 		vcpu->vc_gueststate.vg_dr2 = drs[VCPU_REGS_DR2];
1555 		vcpu->vc_gueststate.vg_dr3 = drs[VCPU_REGS_DR3];
1556 		vmcb->v_dr6 = drs[VCPU_REGS_DR6];
1557 		vmcb->v_dr7 = drs[VCPU_REGS_DR7];
1558 	}
1559 
1560 	return (0);
1561 }
1562 
1563 /*
1564  * vcpu_reset_regs_svm
1565  *
1566  * Initializes 'vcpu's registers to supplied state
1567  *
1568  * Parameters:
1569  *  vcpu: the vcpu whose register state is to be initialized
1570  *  vrs: the register state to set
1571  *
1572  * Return values:
1573  *  0: registers init'ed successfully
1574  *  EINVAL: an error occurred setting register state
1575  */
1576 int
vcpu_reset_regs_svm(struct vcpu * vcpu,struct vcpu_reg_state * vrs)1577 vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
1578 {
1579 	struct vmcb *vmcb;
1580 	int ret;
1581 
1582 	vmcb = (struct vmcb *)vcpu->vc_control_va;
1583 
1584 	/*
1585 	 * Intercept controls
1586 	 *
1587 	 * External Interrupt exiting (SVM_INTERCEPT_INTR)
1588 	 * External NMI exiting (SVM_INTERCEPT_NMI)
1589 	 * CPUID instruction (SVM_INTERCEPT_CPUID)
1590 	 * HLT instruction (SVM_INTERCEPT_HLT)
1591 	 * I/O instructions (SVM_INTERCEPT_INOUT)
1592 	 * MSR access (SVM_INTERCEPT_MSR)
1593 	 * shutdown events (SVM_INTERCEPT_SHUTDOWN)
1594 	 * INVLPGA instruction (SVM_INTERCEPT_INVLPGA)
1595 	 *
1596 	 * VMRUN instruction (SVM_INTERCEPT_VMRUN)
1597 	 * VMMCALL instruction (SVM_INTERCEPT_VMMCALL)
1598 	 * VMLOAD instruction (SVM_INTERCEPT_VMLOAD)
1599 	 * VMSAVE instruction (SVM_INTERCEPT_VMSAVE)
1600 	 * STGI instruction (SVM_INTERCEPT_STGI)
1601 	 * CLGI instruction (SVM_INTERCEPT_CLGI)
1602 	 * SKINIT instruction (SVM_INTERCEPT_SKINIT)
1603 	 * ICEBP instruction (SVM_INTERCEPT_ICEBP)
1604 	 * MWAIT instruction (SVM_INTERCEPT_MWAIT_UNCOND)
1605 	 * MWAIT instruction (SVM_INTERCEPT_MWAIT_COND)
1606 	 * MONITOR instruction (SVM_INTERCEPT_MONITOR)
1607 	 * RDTSCP instruction (SVM_INTERCEPT_RDTSCP)
1608 	 * XSETBV instruction (SVM_INTERCEPT_XSETBV) (if available)
1609 	 */
1610 	vmcb->v_intercept1 = SVM_INTERCEPT_INTR | SVM_INTERCEPT_NMI |
1611 	    SVM_INTERCEPT_CPUID | SVM_INTERCEPT_HLT | SVM_INTERCEPT_INOUT |
1612 	    SVM_INTERCEPT_MSR | SVM_INTERCEPT_SHUTDOWN | SVM_INTERCEPT_INVLPGA;
1613 
1614 	vmcb->v_intercept2 = SVM_INTERCEPT_VMRUN | SVM_INTERCEPT_VMMCALL |
1615 	    SVM_INTERCEPT_VMLOAD | SVM_INTERCEPT_VMSAVE | SVM_INTERCEPT_STGI |
1616 	    SVM_INTERCEPT_CLGI | SVM_INTERCEPT_SKINIT | SVM_INTERCEPT_ICEBP |
1617 	    SVM_INTERCEPT_MWAIT_UNCOND | SVM_INTERCEPT_MONITOR |
1618 	    SVM_INTERCEPT_MWAIT_COND | SVM_INTERCEPT_RDTSCP;
1619 
1620 	if (xsave_mask)
1621 		vmcb->v_intercept2 |= SVM_INTERCEPT_XSETBV;
1622 
1623 	/* Setup I/O bitmap */
1624 	memset((uint8_t *)vcpu->vc_svm_ioio_va, 0xFF, 3 * PAGE_SIZE);
1625 	vmcb->v_iopm_pa = (uint64_t)(vcpu->vc_svm_ioio_pa);
1626 
1627 	/* Setup MSR bitmap */
1628 	memset((uint8_t *)vcpu->vc_msr_bitmap_va, 0xFF, 2 * PAGE_SIZE);
1629 	vmcb->v_msrpm_pa = (uint64_t)(vcpu->vc_msr_bitmap_pa);
1630 	svm_setmsrbrw(vcpu, MSR_IA32_FEATURE_CONTROL);
1631 	svm_setmsrbrw(vcpu, MSR_SYSENTER_CS);
1632 	svm_setmsrbrw(vcpu, MSR_SYSENTER_ESP);
1633 	svm_setmsrbrw(vcpu, MSR_SYSENTER_EIP);
1634 	svm_setmsrbrw(vcpu, MSR_STAR);
1635 	svm_setmsrbrw(vcpu, MSR_LSTAR);
1636 	svm_setmsrbrw(vcpu, MSR_CSTAR);
1637 	svm_setmsrbrw(vcpu, MSR_SFMASK);
1638 	svm_setmsrbrw(vcpu, MSR_FSBASE);
1639 	svm_setmsrbrw(vcpu, MSR_GSBASE);
1640 	svm_setmsrbrw(vcpu, MSR_KERNELGSBASE);
1641 
1642 	/* EFER is R/O so we can ensure the guest always has SVME */
1643 	svm_setmsrbr(vcpu, MSR_EFER);
1644 
1645 	/* allow reading TSC */
1646 	svm_setmsrbr(vcpu, MSR_TSC);
1647 
1648 	/* allow reading HWCR and PSTATEDEF to determine TSC frequency */
1649 	svm_setmsrbr(vcpu, MSR_HWCR);
1650 	svm_setmsrbr(vcpu, MSR_PSTATEDEF(0));
1651 
1652 	/* Guest VCPU ASID */
1653 	vmcb->v_asid = vcpu->vc_vpid;
1654 
1655 	/* TLB Control - First time in, flush all*/
1656 	vmcb->v_tlb_control = SVM_TLB_CONTROL_FLUSH_ALL;
1657 
1658 	/* INTR masking */
1659 	vmcb->v_intr_masking = 1;
1660 
1661 	/* PAT */
1662 	vmcb->v_g_pat = PATENTRY(0, PAT_WB) | PATENTRY(1, PAT_WC) |
1663             PATENTRY(2, PAT_UCMINUS) | PATENTRY(3, PAT_UC) |
1664             PATENTRY(4, PAT_WB) | PATENTRY(5, PAT_WC) |
1665             PATENTRY(6, PAT_UCMINUS) | PATENTRY(7, PAT_UC);
1666 
1667 	/* NPT */
1668 	vmcb->v_np_enable = SVM_ENABLE_NP;
1669 	vmcb->v_n_cr3 = vcpu->vc_parent->vm_map->pmap->pm_pdirpa;
1670 
1671 	/* SEV */
1672 	if (vcpu->vc_sev)
1673 		vmcb->v_np_enable |= SVM_ENABLE_SEV;
1674 
1675 	/* Enable SVME in EFER (must always be set) */
1676 	vmcb->v_efer |= EFER_SVME;
1677 
1678 	ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_ALL, vrs);
1679 
1680 	/* xcr0 power on default sets bit 0 (x87 state) */
1681 	vcpu->vc_gueststate.vg_xcr0 = XFEATURE_X87 & xsave_mask;
1682 
1683 	vcpu->vc_parent->vm_map->pmap->eptp = 0;
1684 
1685 	return ret;
1686 }
1687 
1688 /*
1689  * svm_setmsrbr
1690  *
1691  * Allow read access to the specified msr on the supplied vcpu.
1692  *
1693  * Parameters:
1694  *  vcpu: the VCPU to allow access
1695  *  msr: the MSR number to allow access to
1696  */
1697 void
svm_setmsrbr(struct vcpu * vcpu,uint32_t msr)1698 svm_setmsrbr(struct vcpu *vcpu, uint32_t msr)
1699 {
1700 	uint8_t *msrs;
1701 	uint16_t idx;
1702 
1703 	msrs = (uint8_t *)vcpu->vc_msr_bitmap_va;
1704 
1705 	/*
1706 	 * MSR Read bitmap layout:
1707 	 * Pentium MSRs (0x0 - 0x1fff) @ 0x0
1708 	 * Gen6 and Syscall MSRs (0xc0000000 - 0xc0001fff) @ 0x800
1709 	 * Gen7 and Gen8 MSRs (0xc0010000 - 0xc0011fff) @ 0x1000
1710 	 *
1711 	 * Read enable bit is low order bit of 2-bit pair
1712 	 * per MSR (eg, MSR 0x0 write bit is at bit 0 @ 0x0)
1713 	 */
1714 	if (msr <= 0x1fff) {
1715 		idx = SVM_MSRIDX(msr);
1716 		msrs[idx] &= ~(SVM_MSRBIT_R(msr));
1717 	} else if (msr >= 0xc0000000 && msr <= 0xc0001fff) {
1718 		idx = SVM_MSRIDX(msr - 0xc0000000) + 0x800;
1719 		msrs[idx] &= ~(SVM_MSRBIT_R(msr - 0xc0000000));
1720 	} else if (msr >= 0xc0010000 && msr <= 0xc0011fff) {
1721 		idx = SVM_MSRIDX(msr - 0xc0010000) + 0x1000;
1722 		msrs[idx] &= ~(SVM_MSRBIT_R(msr - 0xc0010000));
1723 	} else {
1724 		printf("%s: invalid msr 0x%x\n", __func__, msr);
1725 		return;
1726 	}
1727 }
1728 
1729 /*
1730  * svm_setmsrbw
1731  *
1732  * Allow write access to the specified msr on the supplied vcpu
1733  *
1734  * Parameters:
1735  *  vcpu: the VCPU to allow access
1736  *  msr: the MSR number to allow access to
1737  */
1738 void
svm_setmsrbw(struct vcpu * vcpu,uint32_t msr)1739 svm_setmsrbw(struct vcpu *vcpu, uint32_t msr)
1740 {
1741 	uint8_t *msrs;
1742 	uint16_t idx;
1743 
1744 	msrs = (uint8_t *)vcpu->vc_msr_bitmap_va;
1745 
1746 	/*
1747 	 * MSR Write bitmap layout:
1748 	 * Pentium MSRs (0x0 - 0x1fff) @ 0x0
1749 	 * Gen6 and Syscall MSRs (0xc0000000 - 0xc0001fff) @ 0x800
1750 	 * Gen7 and Gen8 MSRs (0xc0010000 - 0xc0011fff) @ 0x1000
1751 	 *
1752 	 * Write enable bit is high order bit of 2-bit pair
1753 	 * per MSR (eg, MSR 0x0 write bit is at bit 1 @ 0x0)
1754 	 */
1755 	if (msr <= 0x1fff) {
1756 		idx = SVM_MSRIDX(msr);
1757 		msrs[idx] &= ~(SVM_MSRBIT_W(msr));
1758 	} else if (msr >= 0xc0000000 && msr <= 0xc0001fff) {
1759 		idx = SVM_MSRIDX(msr - 0xc0000000) + 0x800;
1760 		msrs[idx] &= ~(SVM_MSRBIT_W(msr - 0xc0000000));
1761 	} else if (msr >= 0xc0010000 && msr <= 0xc0011fff) {
1762 		idx = SVM_MSRIDX(msr - 0xc0010000) + 0x1000;
1763 		msrs[idx] &= ~(SVM_MSRBIT_W(msr - 0xc0010000));
1764 	} else {
1765 		printf("%s: invalid msr 0x%x\n", __func__, msr);
1766 		return;
1767 	}
1768 }
1769 
1770 /*
1771  * svm_setmsrbrw
1772  *
1773  * Allow read/write access to the specified msr on the supplied vcpu
1774  *
1775  * Parameters:
1776  *  vcpu: the VCPU to allow access
1777  *  msr: the MSR number to allow access to
1778  */
1779 void
svm_setmsrbrw(struct vcpu * vcpu,uint32_t msr)1780 svm_setmsrbrw(struct vcpu *vcpu, uint32_t msr)
1781 {
1782 	svm_setmsrbr(vcpu, msr);
1783 	svm_setmsrbw(vcpu, msr);
1784 }
1785 
1786 /*
1787  * vmx_setmsrbr
1788  *
1789  * Allow read access to the specified msr on the supplied vcpu.
1790  *
1791  * Parameters:
1792  *  vcpu: the VCPU to allow access
1793  *  msr: the MSR number to allow access to
1794  */
1795 void
vmx_setmsrbr(struct vcpu * vcpu,uint32_t msr)1796 vmx_setmsrbr(struct vcpu *vcpu, uint32_t msr)
1797 {
1798 	uint8_t *msrs;
1799 	uint16_t idx;
1800 
1801 	msrs = (uint8_t *)vcpu->vc_msr_bitmap_va;
1802 
1803 	/*
1804 	 * MSR Read bitmap layout:
1805 	 * "Low" MSRs (0x0 - 0x1fff) @ 0x0
1806 	 * "High" MSRs (0xc0000000 - 0xc0001fff) @ 0x400
1807 	 */
1808 	if (msr <= 0x1fff) {
1809 		idx = VMX_MSRIDX(msr);
1810 		msrs[idx] &= ~(VMX_MSRBIT(msr));
1811 	} else if (msr >= 0xc0000000 && msr <= 0xc0001fff) {
1812 		idx = VMX_MSRIDX(msr - 0xc0000000) + 0x400;
1813 		msrs[idx] &= ~(VMX_MSRBIT(msr - 0xc0000000));
1814 	} else
1815 		printf("%s: invalid msr 0x%x\n", __func__, msr);
1816 }
1817 
1818 /*
1819  * vmx_setmsrbw
1820  *
1821  * Allow write access to the specified msr on the supplied vcpu
1822  *
1823  * Parameters:
1824  *  vcpu: the VCPU to allow access
1825  *  msr: the MSR number to allow access to
1826  */
1827 void
vmx_setmsrbw(struct vcpu * vcpu,uint32_t msr)1828 vmx_setmsrbw(struct vcpu *vcpu, uint32_t msr)
1829 {
1830 	uint8_t *msrs;
1831 	uint16_t idx;
1832 
1833 	msrs = (uint8_t *)vcpu->vc_msr_bitmap_va;
1834 
1835 	/*
1836 	 * MSR Write bitmap layout:
1837 	 * "Low" MSRs (0x0 - 0x1fff) @ 0x800
1838 	 * "High" MSRs (0xc0000000 - 0xc0001fff) @ 0xc00
1839 	 */
1840 	if (msr <= 0x1fff) {
1841 		idx = VMX_MSRIDX(msr) + 0x800;
1842 		msrs[idx] &= ~(VMX_MSRBIT(msr));
1843 	} else if (msr >= 0xc0000000 && msr <= 0xc0001fff) {
1844 		idx = VMX_MSRIDX(msr - 0xc0000000) + 0xc00;
1845 		msrs[idx] &= ~(VMX_MSRBIT(msr - 0xc0000000));
1846 	} else
1847 		printf("%s: invalid msr 0x%x\n", __func__, msr);
1848 }
1849 
1850 /*
1851  * vmx_setmsrbrw
1852  *
1853  * Allow read/write access to the specified msr on the supplied vcpu
1854  *
1855  * Parameters:
1856  *  vcpu: the VCPU to allow access
1857  *  msr: the MSR number to allow access to
1858  */
1859 void
vmx_setmsrbrw(struct vcpu * vcpu,uint32_t msr)1860 vmx_setmsrbrw(struct vcpu *vcpu, uint32_t msr)
1861 {
1862 	vmx_setmsrbr(vcpu, msr);
1863 	vmx_setmsrbw(vcpu, msr);
1864 }
1865 
1866 /*
1867  * svm_set_clean
1868  *
1869  * Sets (mark as unmodified) the VMCB clean bit set in 'value'.
1870  * For example, to set the clean bit for the VMCB intercepts (bit position 0),
1871  * the caller provides 'SVM_CLEANBITS_I' (0x1) for the 'value' argument.
1872  * Multiple cleanbits can be provided in 'value' at the same time (eg,
1873  * "SVM_CLEANBITS_I | SVM_CLEANBITS_TPR").
1874  *
1875  * Note that this function does not clear any bits; to clear bits in the
1876  * vmcb cleanbits bitfield, use 'svm_set_dirty'.
1877  *
1878  * Parameters:
1879  *  vmcs: the VCPU whose VMCB clean value should be set
1880  *  value: the value(s) to enable in the cleanbits mask
1881  */
1882 void
svm_set_clean(struct vcpu * vcpu,uint32_t value)1883 svm_set_clean(struct vcpu *vcpu, uint32_t value)
1884 {
1885 	struct vmcb *vmcb;
1886 
1887 	/* If no cleanbits support, do nothing */
1888 	if (!curcpu()->ci_vmm_cap.vcc_svm.svm_vmcb_clean)
1889 		return;
1890 
1891 	vmcb = (struct vmcb *)vcpu->vc_control_va;
1892 
1893 	vmcb->v_vmcb_clean_bits |= value;
1894 }
1895 
1896 /*
1897  * svm_set_dirty
1898  *
1899  * Clears (mark as modified) the VMCB clean bit set in 'value'.
1900  * For example, to clear the bit for the VMCB intercepts (bit position 0)
1901  * the caller provides 'SVM_CLEANBITS_I' (0x1) for the 'value' argument.
1902  * Multiple dirty bits can be provided in 'value' at the same time (eg,
1903  * "SVM_CLEANBITS_I | SVM_CLEANBITS_TPR").
1904  *
1905  * Parameters:
1906  *  vmcs: the VCPU whose VMCB dirty value should be set
1907  *  value: the value(s) to dirty in the cleanbits mask
1908  */
1909 void
svm_set_dirty(struct vcpu * vcpu,uint32_t value)1910 svm_set_dirty(struct vcpu *vcpu, uint32_t value)
1911 {
1912 	struct vmcb *vmcb;
1913 
1914 	/* If no cleanbits support, do nothing */
1915 	if (!curcpu()->ci_vmm_cap.vcc_svm.svm_vmcb_clean)
1916 		return;
1917 
1918 	vmcb = (struct vmcb *)vcpu->vc_control_va;
1919 
1920 	vmcb->v_vmcb_clean_bits &= ~value;
1921 }
1922 
1923 /*
1924  * vcpu_reset_regs_vmx
1925  *
1926  * Initializes 'vcpu's registers to supplied state
1927  *
1928  * Parameters:
1929  *  vcpu: the vcpu whose register state is to be initialized
1930  *  vrs: the register state to set
1931  *
1932  * Return values:
1933  *  0: registers init'ed successfully
1934  *  EINVAL: an error occurred setting register state
1935  */
1936 int
vcpu_reset_regs_vmx(struct vcpu * vcpu,struct vcpu_reg_state * vrs)1937 vcpu_reset_regs_vmx(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
1938 {
1939 	int ret = 0, ug = 0;
1940 	uint32_t cr0, cr4;
1941 	uint32_t pinbased, procbased, procbased2, exit, entry;
1942 	uint32_t want1, want0;
1943 	uint64_t ctrlval, cr3, msr_misc_enable;
1944 	uint16_t ctrl;
1945 	struct vmx_msr_store *msr_store;
1946 
1947 	rw_assert_wrlock(&vcpu->vc_lock);
1948 
1949 	cr0 = vrs->vrs_crs[VCPU_REGS_CR0];
1950 
1951 	if (vcpu_reload_vmcs_vmx(vcpu)) {
1952 		DPRINTF("%s: error reloading VMCS\n", __func__);
1953 		ret = EINVAL;
1954 		goto exit;
1955 	}
1956 
1957 #ifdef VMM_DEBUG
1958 	/* VMCS should be loaded... */
1959 	paddr_t pa = 0ULL;
1960 	if (vmptrst(&pa))
1961 		panic("%s: vmptrst", __func__);
1962 	KASSERT(pa == vcpu->vc_control_pa);
1963 #endif /* VMM_DEBUG */
1964 
1965 	/* Compute Basic Entry / Exit Controls */
1966 	vcpu->vc_vmx_basic = rdmsr(IA32_VMX_BASIC);
1967 	vcpu->vc_vmx_entry_ctls = rdmsr(IA32_VMX_ENTRY_CTLS);
1968 	vcpu->vc_vmx_exit_ctls = rdmsr(IA32_VMX_EXIT_CTLS);
1969 	vcpu->vc_vmx_pinbased_ctls = rdmsr(IA32_VMX_PINBASED_CTLS);
1970 	vcpu->vc_vmx_procbased_ctls = rdmsr(IA32_VMX_PROCBASED_CTLS);
1971 
1972 	/* Compute True Entry / Exit Controls (if applicable) */
1973 	if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL) {
1974 		vcpu->vc_vmx_true_entry_ctls = rdmsr(IA32_VMX_TRUE_ENTRY_CTLS);
1975 		vcpu->vc_vmx_true_exit_ctls = rdmsr(IA32_VMX_TRUE_EXIT_CTLS);
1976 		vcpu->vc_vmx_true_pinbased_ctls =
1977 		    rdmsr(IA32_VMX_TRUE_PINBASED_CTLS);
1978 		vcpu->vc_vmx_true_procbased_ctls =
1979 		    rdmsr(IA32_VMX_TRUE_PROCBASED_CTLS);
1980 	}
1981 
1982 	/* Compute Secondary Procbased Controls (if applicable) */
1983 	if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS,
1984 	    IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1))
1985 		vcpu->vc_vmx_procbased2_ctls = rdmsr(IA32_VMX_PROCBASED2_CTLS);
1986 
1987 	/*
1988 	 * Pinbased ctrls
1989 	 *
1990 	 * We must be able to set the following:
1991 	 * IA32_VMX_EXTERNAL_INT_EXITING - exit on host interrupt
1992 	 * IA32_VMX_NMI_EXITING - exit on host NMI
1993 	 */
1994 	want1 = IA32_VMX_EXTERNAL_INT_EXITING |
1995 	    IA32_VMX_NMI_EXITING;
1996 	want0 = 0;
1997 
1998 	if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL) {
1999 		ctrl = IA32_VMX_TRUE_PINBASED_CTLS;
2000 		ctrlval = vcpu->vc_vmx_true_pinbased_ctls;
2001 	} else {
2002 		ctrl = IA32_VMX_PINBASED_CTLS;
2003 		ctrlval = vcpu->vc_vmx_pinbased_ctls;
2004 	}
2005 
2006 	if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &pinbased)) {
2007 		DPRINTF("%s: error computing pinbased controls\n", __func__);
2008 		ret = EINVAL;
2009 		goto exit;
2010 	}
2011 
2012 	if (vmwrite(VMCS_PINBASED_CTLS, pinbased)) {
2013 		DPRINTF("%s: error setting pinbased controls\n", __func__);
2014 		ret = EINVAL;
2015 		goto exit;
2016 	}
2017 
2018 	/*
2019 	 * Procbased ctrls
2020 	 *
2021 	 * We must be able to set the following:
2022 	 * IA32_VMX_HLT_EXITING - exit on HLT instruction
2023 	 * IA32_VMX_MWAIT_EXITING - exit on MWAIT instruction
2024 	 * IA32_VMX_UNCONDITIONAL_IO_EXITING - exit on I/O instructions
2025 	 * IA32_VMX_USE_MSR_BITMAPS - exit on various MSR accesses
2026 	 * IA32_VMX_CR8_LOAD_EXITING - guest TPR access
2027 	 * IA32_VMX_CR8_STORE_EXITING - guest TPR access
2028 	 * IA32_VMX_USE_TPR_SHADOW - guest TPR access (shadow)
2029 	 * IA32_VMX_MONITOR_EXITING - exit on MONITOR instruction
2030 	 *
2031 	 * If we have EPT, we must be able to clear the following
2032 	 * IA32_VMX_CR3_LOAD_EXITING - don't care about guest CR3 accesses
2033 	 * IA32_VMX_CR3_STORE_EXITING - don't care about guest CR3 accesses
2034 	 */
2035 	want1 = IA32_VMX_HLT_EXITING |
2036 	    IA32_VMX_MWAIT_EXITING |
2037 	    IA32_VMX_UNCONDITIONAL_IO_EXITING |
2038 	    IA32_VMX_USE_MSR_BITMAPS |
2039 	    IA32_VMX_CR8_LOAD_EXITING |
2040 	    IA32_VMX_CR8_STORE_EXITING |
2041 	    IA32_VMX_MONITOR_EXITING |
2042 	    IA32_VMX_USE_TPR_SHADOW;
2043 	want0 = 0;
2044 
2045 	want1 |= IA32_VMX_ACTIVATE_SECONDARY_CONTROLS;
2046 	want0 |= IA32_VMX_CR3_LOAD_EXITING | IA32_VMX_CR3_STORE_EXITING;
2047 
2048 	if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL) {
2049 		ctrl = IA32_VMX_TRUE_PROCBASED_CTLS;
2050 		ctrlval = vcpu->vc_vmx_true_procbased_ctls;
2051 	} else {
2052 		ctrl = IA32_VMX_PROCBASED_CTLS;
2053 		ctrlval = vcpu->vc_vmx_procbased_ctls;
2054 	}
2055 
2056 	if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &procbased)) {
2057 		DPRINTF("%s: error computing procbased controls\n", __func__);
2058 		ret = EINVAL;
2059 		goto exit;
2060 	}
2061 
2062 	if (vmwrite(VMCS_PROCBASED_CTLS, procbased)) {
2063 		DPRINTF("%s: error setting procbased controls\n", __func__);
2064 		ret = EINVAL;
2065 		goto exit;
2066 	}
2067 
2068 	/*
2069 	 * Secondary Procbased ctrls
2070 	 *
2071 	 * We want to be able to set the following, if available:
2072 	 * IA32_VMX_ENABLE_VPID - use VPIDs where available
2073 	 *
2074 	 * If we have EPT, we must be able to set the following:
2075 	 * IA32_VMX_ENABLE_EPT - enable EPT
2076 	 *
2077 	 * If we have unrestricted guest capability, we must be able to set
2078 	 * the following:
2079 	 * IA32_VMX_UNRESTRICTED_GUEST - enable unrestricted guest (if caller
2080 	 *     specified CR0_PG | CR0_PE in %cr0 in the 'vrs' parameter)
2081 	 */
2082 	want1 = IA32_VMX_ENABLE_EPT;
2083 
2084 	/* XXX checking for 2ndary controls can be combined here */
2085 	if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS,
2086 	    IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1)) {
2087 		if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
2088 		    IA32_VMX_ENABLE_VPID, 1)) {
2089 			want1 |= IA32_VMX_ENABLE_VPID;
2090 			vcpu->vc_vmx_vpid_enabled = 1;
2091 		}
2092 	}
2093 
2094 	if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS,
2095 	    IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1)) {
2096 		if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
2097 		    IA32_VMX_UNRESTRICTED_GUEST, 1)) {
2098 			if ((cr0 & (CR0_PE | CR0_PG)) == 0) {
2099 				want1 |= IA32_VMX_UNRESTRICTED_GUEST;
2100 				ug = 1;
2101 			}
2102 		}
2103 	}
2104 
2105 	want0 = ~want1;
2106 	ctrlval = vcpu->vc_vmx_procbased2_ctls;
2107 	ctrl = IA32_VMX_PROCBASED2_CTLS;
2108 
2109 	if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &procbased2)) {
2110 		DPRINTF("%s: error computing secondary procbased controls\n",
2111 		     __func__);
2112 		ret = EINVAL;
2113 		goto exit;
2114 	}
2115 
2116 	if (vmwrite(VMCS_PROCBASED2_CTLS, procbased2)) {
2117 		DPRINTF("%s: error setting secondary procbased controls\n",
2118 		     __func__);
2119 		ret = EINVAL;
2120 		goto exit;
2121 	}
2122 
2123 	/*
2124 	 * Exit ctrls
2125 	 *
2126 	 * We must be able to set the following:
2127 	 * IA32_VMX_SAVE_DEBUG_CONTROLS
2128 	 * IA32_VMX_HOST_SPACE_ADDRESS_SIZE - exit to long mode
2129 	 * IA32_VMX_ACKNOWLEDGE_INTERRUPT_ON_EXIT - ack interrupt on exit
2130 	 */
2131 	want1 = IA32_VMX_HOST_SPACE_ADDRESS_SIZE |
2132 	    IA32_VMX_ACKNOWLEDGE_INTERRUPT_ON_EXIT |
2133 	    IA32_VMX_SAVE_DEBUG_CONTROLS;
2134 	want0 = 0;
2135 
2136 	if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL) {
2137 		ctrl = IA32_VMX_TRUE_EXIT_CTLS;
2138 		ctrlval = vcpu->vc_vmx_true_exit_ctls;
2139 	} else {
2140 		ctrl = IA32_VMX_EXIT_CTLS;
2141 		ctrlval = vcpu->vc_vmx_exit_ctls;
2142 	}
2143 
2144 	if (rcr4() & CR4_CET)
2145 		want1 |= IA32_VMX_LOAD_HOST_CET_STATE;
2146 	else
2147 		want0 |= IA32_VMX_LOAD_HOST_CET_STATE;
2148 
2149 	if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &exit)) {
2150 		DPRINTF("%s: error computing exit controls\n", __func__);
2151 		ret = EINVAL;
2152 		goto exit;
2153 	}
2154 
2155 	if (vmwrite(VMCS_EXIT_CTLS, exit)) {
2156 		DPRINTF("%s: error setting exit controls\n", __func__);
2157 		ret = EINVAL;
2158 		goto exit;
2159 	}
2160 
2161 	/*
2162 	 * Entry ctrls
2163 	 *
2164 	 * We must be able to set the following:
2165 	 * IA32_VMX_IA32E_MODE_GUEST (if no unrestricted guest)
2166 	 * IA32_VMX_LOAD_DEBUG_CONTROLS
2167 	 * We must be able to clear the following:
2168 	 * IA32_VMX_ENTRY_TO_SMM - enter to SMM
2169 	 * IA32_VMX_DEACTIVATE_DUAL_MONITOR_TREATMENT
2170 	 * IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY
2171 	 */
2172 	want1 = IA32_VMX_LOAD_DEBUG_CONTROLS;
2173 	if (vrs->vrs_msrs[VCPU_REGS_EFER] & EFER_LMA)
2174 		want1 |= IA32_VMX_IA32E_MODE_GUEST;
2175 
2176 	want0 = IA32_VMX_ENTRY_TO_SMM |
2177 	    IA32_VMX_DEACTIVATE_DUAL_MONITOR_TREATMENT |
2178 	    IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY;
2179 
2180 	if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL) {
2181 		ctrl = IA32_VMX_TRUE_ENTRY_CTLS;
2182 		ctrlval = vcpu->vc_vmx_true_entry_ctls;
2183 	} else {
2184 		ctrl = IA32_VMX_ENTRY_CTLS;
2185 		ctrlval = vcpu->vc_vmx_entry_ctls;
2186 	}
2187 
2188 	if (rcr4() & CR4_CET)
2189 		want1 |= IA32_VMX_LOAD_GUEST_CET_STATE;
2190 	else
2191 		want0 |= IA32_VMX_LOAD_GUEST_CET_STATE;
2192 
2193 	if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &entry)) {
2194 		ret = EINVAL;
2195 		goto exit;
2196 	}
2197 
2198 	if (vmwrite(VMCS_ENTRY_CTLS, entry)) {
2199 		ret = EINVAL;
2200 		goto exit;
2201 	}
2202 
2203 	if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS,
2204 	    IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1)) {
2205 		if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
2206 		    IA32_VMX_ENABLE_VPID, 1)) {
2207 			if (vmwrite(VMCS_GUEST_VPID, vcpu->vc_vpid)) {
2208 				DPRINTF("%s: error setting guest VPID\n",
2209 				    __func__);
2210 				ret = EINVAL;
2211 				goto exit;
2212 			}
2213 		}
2214 	}
2215 
2216 	/*
2217 	 * Determine which bits in CR0 have to be set to a fixed
2218 	 * value as per Intel SDM A.7.
2219 	 * CR0 bits in the vrs parameter must match these.
2220 	 */
2221 	want1 = (curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0) &
2222 	    (curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1);
2223 	want0 = ~(curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0) &
2224 	    ~(curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1);
2225 
2226 	/*
2227 	 * CR0_FIXED0 and CR0_FIXED1 may report the CR0_PG and CR0_PE bits as
2228 	 * fixed to 1 even if the CPU supports the unrestricted guest
2229 	 * feature. Update want1 and want0 accordingly to allow
2230 	 * any value for CR0_PG and CR0_PE in vrs->vrs_crs[VCPU_REGS_CR0] if
2231 	 * the CPU has the unrestricted guest capability.
2232 	 */
2233 	if (ug) {
2234 		want1 &= ~(CR0_PG | CR0_PE);
2235 		want0 &= ~(CR0_PG | CR0_PE);
2236 	}
2237 
2238 	/*
2239 	 * VMX may require some bits to be set that userland should not have
2240 	 * to care about. Set those here.
2241 	 */
2242 	if (want1 & CR0_NE)
2243 		cr0 |= CR0_NE;
2244 
2245 	if ((cr0 & want1) != want1) {
2246 		ret = EINVAL;
2247 		goto exit;
2248 	}
2249 
2250 	if ((~cr0 & want0) != want0) {
2251 		ret = EINVAL;
2252 		goto exit;
2253 	}
2254 
2255 	vcpu->vc_vmx_cr0_fixed1 = want1;
2256 	vcpu->vc_vmx_cr0_fixed0 = want0;
2257 	/*
2258 	 * Determine which bits in CR4 have to be set to a fixed
2259 	 * value as per Intel SDM A.8.
2260 	 * CR4 bits in the vrs parameter must match these, except
2261 	 * CR4_VMXE - we add that here since it must always be set.
2262 	 */
2263 	want1 = (curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0) &
2264 	    (curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1);
2265 	want0 = ~(curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0) &
2266 	    ~(curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1);
2267 
2268 	cr4 = vrs->vrs_crs[VCPU_REGS_CR4] | CR4_VMXE;
2269 
2270 	if ((cr4 & want1) != want1) {
2271 		ret = EINVAL;
2272 		goto exit;
2273 	}
2274 
2275 	if ((~cr4 & want0) != want0) {
2276 		ret = EINVAL;
2277 		goto exit;
2278 	}
2279 
2280 	cr3 = vrs->vrs_crs[VCPU_REGS_CR3];
2281 
2282 	/* Restore PDPTEs if 32-bit PAE paging is being used */
2283 	if (cr3 && (cr4 & CR4_PAE) &&
2284 	    !(vrs->vrs_msrs[VCPU_REGS_EFER] & EFER_LMA)) {
2285 		if (vmwrite(VMCS_GUEST_PDPTE0,
2286 		    vrs->vrs_crs[VCPU_REGS_PDPTE0])) {
2287 			ret = EINVAL;
2288 			goto exit;
2289 		}
2290 
2291 		if (vmwrite(VMCS_GUEST_PDPTE1,
2292 		    vrs->vrs_crs[VCPU_REGS_PDPTE1])) {
2293 			ret = EINVAL;
2294 			goto exit;
2295 		}
2296 
2297 		if (vmwrite(VMCS_GUEST_PDPTE2,
2298 		    vrs->vrs_crs[VCPU_REGS_PDPTE2])) {
2299 			ret = EINVAL;
2300 			goto exit;
2301 		}
2302 
2303 		if (vmwrite(VMCS_GUEST_PDPTE3,
2304 		    vrs->vrs_crs[VCPU_REGS_PDPTE3])) {
2305 			ret = EINVAL;
2306 			goto exit;
2307 		}
2308 	}
2309 
2310 	vrs->vrs_crs[VCPU_REGS_CR0] = cr0;
2311 	vrs->vrs_crs[VCPU_REGS_CR4] = cr4;
2312 
2313 	msr_misc_enable = rdmsr(MSR_MISC_ENABLE);
2314 
2315 	/*
2316 	 * Select host MSRs to be loaded on exit
2317 	 */
2318 	msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_load_va;
2319 	msr_store[VCPU_HOST_REGS_EFER].vms_index = MSR_EFER;
2320 	msr_store[VCPU_HOST_REGS_EFER].vms_data = rdmsr(MSR_EFER);
2321 	msr_store[VCPU_HOST_REGS_STAR].vms_index = MSR_STAR;
2322 	msr_store[VCPU_HOST_REGS_STAR].vms_data = rdmsr(MSR_STAR);
2323 	msr_store[VCPU_HOST_REGS_LSTAR].vms_index = MSR_LSTAR;
2324 	msr_store[VCPU_HOST_REGS_LSTAR].vms_data = rdmsr(MSR_LSTAR);
2325 	msr_store[VCPU_HOST_REGS_CSTAR].vms_index = MSR_CSTAR;
2326 	msr_store[VCPU_HOST_REGS_CSTAR].vms_data = 0;
2327 	msr_store[VCPU_HOST_REGS_SFMASK].vms_index = MSR_SFMASK;
2328 	msr_store[VCPU_HOST_REGS_SFMASK].vms_data = rdmsr(MSR_SFMASK);
2329 	msr_store[VCPU_HOST_REGS_KGSBASE].vms_index = MSR_KERNELGSBASE;
2330 	msr_store[VCPU_HOST_REGS_KGSBASE].vms_data = 0;
2331 	msr_store[VCPU_HOST_REGS_MISC_ENABLE].vms_index = MSR_MISC_ENABLE;
2332 	msr_store[VCPU_HOST_REGS_MISC_ENABLE].vms_data = msr_misc_enable;
2333 
2334 	/*
2335 	 * Select guest MSRs to be loaded on entry / saved on exit
2336 	 */
2337 	msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
2338 
2339 	msr_store[VCPU_REGS_EFER].vms_index = MSR_EFER;
2340 	msr_store[VCPU_REGS_STAR].vms_index = MSR_STAR;
2341 	msr_store[VCPU_REGS_LSTAR].vms_index = MSR_LSTAR;
2342 	msr_store[VCPU_REGS_CSTAR].vms_index = MSR_CSTAR;
2343 	msr_store[VCPU_REGS_SFMASK].vms_index = MSR_SFMASK;
2344 	msr_store[VCPU_REGS_KGSBASE].vms_index = MSR_KERNELGSBASE;
2345 	msr_store[VCPU_REGS_MISC_ENABLE].vms_index = MSR_MISC_ENABLE;
2346 
2347 	/*
2348 	 * Initialize MSR_MISC_ENABLE as it can't be read and populated from vmd
2349 	 * and some of the content is based on the host.
2350 	 */
2351 	msr_store[VCPU_REGS_MISC_ENABLE].vms_data = msr_misc_enable;
2352 	msr_store[VCPU_REGS_MISC_ENABLE].vms_data &=
2353 	    ~(MISC_ENABLE_TCC | MISC_ENABLE_PERF_MON_AVAILABLE |
2354 	      MISC_ENABLE_EIST_ENABLED | MISC_ENABLE_ENABLE_MONITOR_FSM |
2355 	      MISC_ENABLE_xTPR_MESSAGE_DISABLE);
2356 	msr_store[VCPU_REGS_MISC_ENABLE].vms_data |=
2357 	      MISC_ENABLE_BTS_UNAVAILABLE | MISC_ENABLE_PEBS_UNAVAILABLE;
2358 
2359 	/*
2360 	 * Currently we use the same memory for guest MSRs (entry-load and
2361 	 * exit-store) so they have the same count.  We exit-load the same
2362 	 * host MSRs, so same count but different memory.  Those are just
2363 	 * our current choices, not architectural requirements.
2364 	 */
2365 	if (vmwrite(VMCS_EXIT_MSR_STORE_COUNT, VCPU_REGS_NMSRS)) {
2366 		DPRINTF("%s: error setting guest MSR exit store count\n",
2367 		    __func__);
2368 		ret = EINVAL;
2369 		goto exit;
2370 	}
2371 
2372 	if (vmwrite(VMCS_EXIT_MSR_LOAD_COUNT, VCPU_HOST_REGS_NMSRS)) {
2373 		DPRINTF("%s: error setting guest MSR exit load count\n",
2374 		    __func__);
2375 		ret = EINVAL;
2376 		goto exit;
2377 	}
2378 
2379 	if (vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, VCPU_REGS_NMSRS)) {
2380 		DPRINTF("%s: error setting guest MSR entry load count\n",
2381 		    __func__);
2382 		ret = EINVAL;
2383 		goto exit;
2384 	}
2385 
2386 	if (vmwrite(VMCS_EXIT_STORE_MSR_ADDRESS,
2387 	    vcpu->vc_vmx_msr_exit_save_pa)) {
2388 		DPRINTF("%s: error setting guest MSR exit store address\n",
2389 		    __func__);
2390 		ret = EINVAL;
2391 		goto exit;
2392 	}
2393 
2394 	if (vmwrite(VMCS_EXIT_LOAD_MSR_ADDRESS,
2395 	    vcpu->vc_vmx_msr_exit_load_pa)) {
2396 		DPRINTF("%s: error setting guest MSR exit load address\n",
2397 		    __func__);
2398 		ret = EINVAL;
2399 		goto exit;
2400 	}
2401 
2402 	if (vmwrite(VMCS_ENTRY_LOAD_MSR_ADDRESS,
2403 	    vcpu->vc_vmx_msr_exit_save_pa)) {
2404 		DPRINTF("%s: error setting guest MSR entry load address\n",
2405 		    __func__);
2406 		ret = EINVAL;
2407 		goto exit;
2408 	}
2409 
2410 	if (vmwrite(VMCS_MSR_BITMAP_ADDRESS,
2411 	    vcpu->vc_msr_bitmap_pa)) {
2412 		DPRINTF("%s: error setting guest MSR bitmap address\n",
2413 		    __func__);
2414 		ret = EINVAL;
2415 		goto exit;
2416 	}
2417 
2418 	if (vmwrite(VMCS_CR4_MASK, CR4_VMXE)) {
2419 		DPRINTF("%s: error setting guest CR4 mask\n", __func__);
2420 		ret = EINVAL;
2421 		goto exit;
2422 	}
2423 
2424 	if (vmwrite(VMCS_CR0_MASK, CR0_NE)) {
2425 		DPRINTF("%s: error setting guest CR0 mask\n", __func__);
2426 		ret = EINVAL;
2427 		goto exit;
2428 	}
2429 
2430 	/*
2431 	 * Set up the VMCS for the register state we want during VCPU start.
2432 	 * This matches what the CPU state would be after a bootloader
2433 	 * transition to 'start'.
2434 	 */
2435 	ret = vcpu_writeregs_vmx(vcpu, VM_RWREGS_ALL, 0, vrs);
2436 
2437 	/*
2438 	 * Set up the MSR bitmap
2439 	 */
2440 	memset((uint8_t *)vcpu->vc_msr_bitmap_va, 0xFF, PAGE_SIZE);
2441 	vmx_setmsrbrw(vcpu, MSR_IA32_FEATURE_CONTROL);
2442 	vmx_setmsrbrw(vcpu, MSR_SYSENTER_CS);
2443 	vmx_setmsrbrw(vcpu, MSR_SYSENTER_ESP);
2444 	vmx_setmsrbrw(vcpu, MSR_SYSENTER_EIP);
2445 	vmx_setmsrbrw(vcpu, MSR_EFER);
2446 	vmx_setmsrbrw(vcpu, MSR_STAR);
2447 	vmx_setmsrbrw(vcpu, MSR_LSTAR);
2448 	vmx_setmsrbrw(vcpu, MSR_CSTAR);
2449 	vmx_setmsrbrw(vcpu, MSR_SFMASK);
2450 	vmx_setmsrbrw(vcpu, MSR_FSBASE);
2451 	vmx_setmsrbrw(vcpu, MSR_GSBASE);
2452 	vmx_setmsrbrw(vcpu, MSR_KERNELGSBASE);
2453 
2454 	vmx_setmsrbr(vcpu, MSR_MISC_ENABLE);
2455 	vmx_setmsrbr(vcpu, MSR_TSC);
2456 
2457 	/* If host supports CET, pass through access to the guest. */
2458 	if (rcr4() & CR4_CET)
2459 		vmx_setmsrbrw(vcpu, MSR_S_CET);
2460 
2461 	/* XXX CR0 shadow */
2462 	/* XXX CR4 shadow */
2463 
2464 	/* xcr0 power on default sets bit 0 (x87 state) */
2465 	vcpu->vc_gueststate.vg_xcr0 = XFEATURE_X87 & xsave_mask;
2466 
2467 	/* XXX PAT shadow */
2468 	vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT);
2469 
2470 	/* Flush the VMCS */
2471 	if (vmclear(&vcpu->vc_control_pa)) {
2472 		DPRINTF("%s: vmclear failed\n", __func__);
2473 		ret = EINVAL;
2474 	}
2475 	atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_CLEARED);
2476 
2477 exit:
2478 	return (ret);
2479 }
2480 
2481 /*
2482  * vcpu_init_vmx
2483  *
2484  * Intel VMX specific VCPU initialization routine.
2485  *
2486  * This function allocates various per-VCPU memory regions, sets up initial
2487  * VCPU VMCS controls, and sets initial register values.
2488  *
2489  * Parameters:
2490  *  vcpu: the VCPU structure being initialized
2491  *
2492  * Return values:
2493  *  0: the VCPU was initialized successfully
2494  *  ENOMEM: insufficient resources
2495  *  EINVAL: an error occurred during VCPU initialization
2496  */
2497 int
vcpu_init_vmx(struct vcpu * vcpu)2498 vcpu_init_vmx(struct vcpu *vcpu)
2499 {
2500 	struct vmcs *vmcs;
2501 	uint64_t msr, eptp;
2502 	uint32_t cr0, cr4;
2503 	int ret = 0;
2504 
2505 	/* Allocate a VPID early to avoid km_alloc if we're out of VPIDs. */
2506 	if (vmm_alloc_vpid(&vcpu->vc_vpid))
2507 		return (ENOMEM);
2508 
2509 	/* Allocate VMCS VA */
2510 	vcpu->vc_control_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, &kp_zero,
2511 	    &kd_waitok);
2512 	vcpu->vc_vmx_vmcs_state = VMCS_CLEARED;
2513 
2514 	if (!vcpu->vc_control_va) {
2515 		ret = ENOMEM;
2516 		goto exit;
2517 	}
2518 
2519 	/* Compute VMCS PA */
2520 	if (!pmap_extract(pmap_kernel(), vcpu->vc_control_va,
2521 	    (paddr_t *)&vcpu->vc_control_pa)) {
2522 		ret = ENOMEM;
2523 		goto exit;
2524 	}
2525 
2526 	/* Allocate MSR bitmap VA */
2527 	vcpu->vc_msr_bitmap_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, &kp_zero,
2528 	    &kd_waitok);
2529 
2530 	if (!vcpu->vc_msr_bitmap_va) {
2531 		ret = ENOMEM;
2532 		goto exit;
2533 	}
2534 
2535 	/* Compute MSR bitmap PA */
2536 	if (!pmap_extract(pmap_kernel(), vcpu->vc_msr_bitmap_va,
2537 	    (paddr_t *)&vcpu->vc_msr_bitmap_pa)) {
2538 		ret = ENOMEM;
2539 		goto exit;
2540 	}
2541 
2542 	/* Allocate MSR exit load area VA */
2543 	vcpu->vc_vmx_msr_exit_load_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page,
2544 	   &kp_zero, &kd_waitok);
2545 
2546 	if (!vcpu->vc_vmx_msr_exit_load_va) {
2547 		ret = ENOMEM;
2548 		goto exit;
2549 	}
2550 
2551 	/* Compute MSR exit load area PA */
2552 	if (!pmap_extract(pmap_kernel(), vcpu->vc_vmx_msr_exit_load_va,
2553 	    &vcpu->vc_vmx_msr_exit_load_pa)) {
2554 		ret = ENOMEM;
2555 		goto exit;
2556 	}
2557 
2558 	/* Allocate MSR exit save area VA */
2559 	vcpu->vc_vmx_msr_exit_save_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page,
2560 	   &kp_zero, &kd_waitok);
2561 
2562 	if (!vcpu->vc_vmx_msr_exit_save_va) {
2563 		ret = ENOMEM;
2564 		goto exit;
2565 	}
2566 
2567 	/* Compute MSR exit save area PA */
2568 	if (!pmap_extract(pmap_kernel(), vcpu->vc_vmx_msr_exit_save_va,
2569 	    &vcpu->vc_vmx_msr_exit_save_pa)) {
2570 		ret = ENOMEM;
2571 		goto exit;
2572 	}
2573 
2574 #if 0	/* XXX currently use msr_exit_save for msr_entry_load too */
2575 	/* Allocate MSR entry load area VA */
2576 	vcpu->vc_vmx_msr_entry_load_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page,
2577 	   &kp_zero, &kd_waitok);
2578 
2579 	if (!vcpu->vc_vmx_msr_entry_load_va) {
2580 		ret = ENOMEM;
2581 		goto exit;
2582 	}
2583 
2584 	/* Compute MSR entry load area PA */
2585 	if (!pmap_extract(pmap_kernel(), vcpu->vc_vmx_msr_entry_load_va,
2586 	    &vcpu->vc_vmx_msr_entry_load_pa)) {
2587 		ret = ENOMEM;
2588 		goto exit;
2589 	}
2590 #endif
2591 
2592 	vmcs = (struct vmcs *)vcpu->vc_control_va;
2593 	vmcs->vmcs_revision = curcpu()->ci_vmm_cap.vcc_vmx.vmx_vmxon_revision;
2594 
2595 	/*
2596 	 * Load the VMCS onto this PCPU so we can write registers
2597 	 */
2598 	if (vmptrld(&vcpu->vc_control_pa)) {
2599 		ret = EINVAL;
2600 		goto exit;
2601 	}
2602 
2603 	/* Configure EPT Pointer */
2604 	eptp = vcpu->vc_parent->vm_map->pmap->pm_pdirpa;
2605 	msr = rdmsr(IA32_VMX_EPT_VPID_CAP);
2606 	if (msr & IA32_EPT_VPID_CAP_PAGE_WALK_4) {
2607 		/* Page walk length 4 supported */
2608 		eptp |= ((IA32_EPT_PAGE_WALK_LENGTH - 1) << 3);
2609 	} else {
2610 		DPRINTF("EPT page walk length 4 not supported\n");
2611 		ret = EINVAL;
2612 		goto exit;
2613 	}
2614 	if (msr & IA32_EPT_VPID_CAP_WB) {
2615 		/* WB cache type supported */
2616 		eptp |= IA32_EPT_PAGING_CACHE_TYPE_WB;
2617 	} else
2618 		DPRINTF("%s: no WB cache type available, guest VM will run "
2619 		    "uncached\n", __func__);
2620 
2621 	DPRINTF("Guest EPTP = 0x%llx\n", eptp);
2622 	if (vmwrite(VMCS_GUEST_IA32_EPTP, eptp)) {
2623 		DPRINTF("%s: error setting guest EPTP\n", __func__);
2624 		ret = EINVAL;
2625 		goto exit;
2626 	}
2627 
2628 	vcpu->vc_parent->vm_map->pmap->eptp = eptp;
2629 
2630 	/* Host CR0 */
2631 	cr0 = rcr0() & ~CR0_TS;
2632 	if (vmwrite(VMCS_HOST_IA32_CR0, cr0)) {
2633 		DPRINTF("%s: error writing host CR0\n", __func__);
2634 		ret = EINVAL;
2635 		goto exit;
2636 	}
2637 
2638 	/* Host CR4 */
2639 	cr4 = rcr4();
2640 	if (vmwrite(VMCS_HOST_IA32_CR4, cr4)) {
2641 		DPRINTF("%s: error writing host CR4\n", __func__);
2642 		ret = EINVAL;
2643 		goto exit;
2644 	}
2645 
2646 	/* Host Segment Selectors */
2647 	if (vmwrite(VMCS_HOST_IA32_CS_SEL, GSEL(GCODE_SEL, SEL_KPL))) {
2648 		DPRINTF("%s: error writing host CS selector\n", __func__);
2649 		ret = EINVAL;
2650 		goto exit;
2651 	}
2652 
2653 	if (vmwrite(VMCS_HOST_IA32_DS_SEL, GSEL(GDATA_SEL, SEL_KPL))) {
2654 		DPRINTF("%s: error writing host DS selector\n", __func__);
2655 		ret = EINVAL;
2656 		goto exit;
2657 	}
2658 
2659 	if (vmwrite(VMCS_HOST_IA32_ES_SEL, GSEL(GDATA_SEL, SEL_KPL))) {
2660 		DPRINTF("%s: error writing host ES selector\n", __func__);
2661 		ret = EINVAL;
2662 		goto exit;
2663 	}
2664 
2665 	if (vmwrite(VMCS_HOST_IA32_FS_SEL, GSEL(GDATA_SEL, SEL_KPL))) {
2666 		DPRINTF("%s: error writing host FS selector\n", __func__);
2667 		ret = EINVAL;
2668 		goto exit;
2669 	}
2670 
2671 	if (vmwrite(VMCS_HOST_IA32_GS_SEL, GSEL(GDATA_SEL, SEL_KPL))) {
2672 		DPRINTF("%s: error writing host GS selector\n", __func__);
2673 		ret = EINVAL;
2674 		goto exit;
2675 	}
2676 
2677 	if (vmwrite(VMCS_HOST_IA32_SS_SEL, GSEL(GDATA_SEL, SEL_KPL))) {
2678 		DPRINTF("%s: error writing host SS selector\n", __func__);
2679 		ret = EINVAL;
2680 		goto exit;
2681 	}
2682 
2683 	if (vmwrite(VMCS_HOST_IA32_TR_SEL, GSYSSEL(GPROC0_SEL, SEL_KPL))) {
2684 		DPRINTF("%s: error writing host TR selector\n", __func__);
2685 		ret = EINVAL;
2686 		goto exit;
2687 	}
2688 
2689 	/* Host IDTR base */
2690 	if (vmwrite(VMCS_HOST_IA32_IDTR_BASE, idt_vaddr)) {
2691 		DPRINTF("%s: error writing host IDTR base\n", __func__);
2692 		ret = EINVAL;
2693 		goto exit;
2694 	}
2695 
2696 	/* VMCS link */
2697 	if (vmwrite(VMCS_LINK_POINTER, VMX_VMCS_PA_CLEAR)) {
2698 		DPRINTF("%s: error writing VMCS link pointer\n", __func__);
2699 		ret = EINVAL;
2700 		goto exit;
2701 	}
2702 
2703 	/* Flush the initial VMCS */
2704 	if (vmclear(&vcpu->vc_control_pa)) {
2705 		DPRINTF("%s: vmclear failed\n", __func__);
2706 		ret = EINVAL;
2707 	}
2708 
2709 exit:
2710 	if (ret)
2711 		vcpu_deinit_vmx(vcpu);
2712 
2713 	return (ret);
2714 }
2715 
2716 /*
2717  * vcpu_reset_regs
2718  *
2719  * Resets a vcpu's registers to the provided state
2720  *
2721  * Parameters:
2722  *  vcpu: the vcpu whose registers shall be reset
2723  *  vrs: the desired register state
2724  *
2725  * Return values:
2726  *  0: the vcpu's registers were successfully reset
2727  *  !0: the vcpu's registers could not be reset (see arch-specific reset
2728  *      function for various values that can be returned here)
2729  */
2730 int
vcpu_reset_regs(struct vcpu * vcpu,struct vcpu_reg_state * vrs)2731 vcpu_reset_regs(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
2732 {
2733 	int ret;
2734 
2735 	if (vmm_softc->mode == VMM_MODE_EPT)
2736 		ret = vcpu_reset_regs_vmx(vcpu, vrs);
2737 	else if (vmm_softc->mode == VMM_MODE_RVI)
2738 		ret = vcpu_reset_regs_svm(vcpu, vrs);
2739 	else
2740 		panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode);
2741 
2742 	return (ret);
2743 }
2744 
2745 /*
2746  * vcpu_init_svm
2747  *
2748  * AMD SVM specific VCPU initialization routine.
2749  *
2750  * This function allocates various per-VCPU memory regions, sets up initial
2751  * VCPU VMCB controls, and sets initial register values.
2752  *
2753  * Parameters:
2754  *  vcpu: the VCPU structure being initialized
2755  *  vcp: parameters provided by vmd(8)
2756  *
2757  * Return values:
2758  *  0: the VCPU was initialized successfully
2759  *  ENOMEM: insufficient resources
2760  *  EINVAL: an error occurred during VCPU initialization
2761  */
2762 int
vcpu_init_svm(struct vcpu * vcpu,struct vm_create_params * vcp)2763 vcpu_init_svm(struct vcpu *vcpu, struct vm_create_params *vcp)
2764 {
2765 	int ret = 0;
2766 
2767 	/* Allocate an ASID early to avoid km_alloc if we're out of ASIDs. */
2768 	if (vmm_alloc_vpid(&vcpu->vc_vpid))
2769 		return (ENOMEM);
2770 
2771 	/* Allocate VMCB VA */
2772 	vcpu->vc_control_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, &kp_zero,
2773 	    &kd_waitok);
2774 
2775 	if (!vcpu->vc_control_va) {
2776 		ret = ENOMEM;
2777 		goto exit;
2778 	}
2779 
2780 	/* Compute VMCB PA */
2781 	if (!pmap_extract(pmap_kernel(), vcpu->vc_control_va,
2782 	    (paddr_t *)&vcpu->vc_control_pa)) {
2783 		ret = ENOMEM;
2784 		goto exit;
2785 	}
2786 
2787 	DPRINTF("%s: VMCB va @ 0x%llx, pa @ 0x%llx\n", __func__,
2788 	    (uint64_t)vcpu->vc_control_va,
2789 	    (uint64_t)vcpu->vc_control_pa);
2790 
2791 
2792 	/* Allocate MSR bitmap VA (2 pages) */
2793 	vcpu->vc_msr_bitmap_va = (vaddr_t)km_alloc(2 * PAGE_SIZE, &kv_any,
2794 	    &vmm_kp_contig, &kd_waitok);
2795 
2796 	if (!vcpu->vc_msr_bitmap_va) {
2797 		ret = ENOMEM;
2798 		goto exit;
2799 	}
2800 
2801 	/* Compute MSR bitmap PA */
2802 	if (!pmap_extract(pmap_kernel(), vcpu->vc_msr_bitmap_va,
2803 	    (paddr_t *)&vcpu->vc_msr_bitmap_pa)) {
2804 		ret = ENOMEM;
2805 		goto exit;
2806 	}
2807 
2808 	DPRINTF("%s: MSR bitmap va @ 0x%llx, pa @ 0x%llx\n", __func__,
2809 	    (uint64_t)vcpu->vc_msr_bitmap_va,
2810 	    (uint64_t)vcpu->vc_msr_bitmap_pa);
2811 
2812 	/* Allocate host state area VA */
2813 	vcpu->vc_svm_hsa_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page,
2814 	   &kp_zero, &kd_waitok);
2815 
2816 	if (!vcpu->vc_svm_hsa_va) {
2817 		ret = ENOMEM;
2818 		goto exit;
2819 	}
2820 
2821 	/* Compute host state area PA */
2822 	if (!pmap_extract(pmap_kernel(), vcpu->vc_svm_hsa_va,
2823 	    &vcpu->vc_svm_hsa_pa)) {
2824 		ret = ENOMEM;
2825 		goto exit;
2826 	}
2827 
2828 	DPRINTF("%s: HSA va @ 0x%llx, pa @ 0x%llx\n", __func__,
2829 	    (uint64_t)vcpu->vc_svm_hsa_va,
2830 	    (uint64_t)vcpu->vc_svm_hsa_pa);
2831 
2832 	/* Allocate IOIO area VA (3 pages) */
2833 	vcpu->vc_svm_ioio_va = (vaddr_t)km_alloc(3 * PAGE_SIZE, &kv_any,
2834 	   &vmm_kp_contig, &kd_waitok);
2835 
2836 	if (!vcpu->vc_svm_ioio_va) {
2837 		ret = ENOMEM;
2838 		goto exit;
2839 	}
2840 
2841 	/* Compute IOIO area PA */
2842 	if (!pmap_extract(pmap_kernel(), vcpu->vc_svm_ioio_va,
2843 	    &vcpu->vc_svm_ioio_pa)) {
2844 		ret = ENOMEM;
2845 		goto exit;
2846 	}
2847 
2848 	DPRINTF("%s: IOIO va @ 0x%llx, pa @ 0x%llx\n", __func__,
2849 	    (uint64_t)vcpu->vc_svm_ioio_va,
2850 	    (uint64_t)vcpu->vc_svm_ioio_pa);
2851 
2852 	/* Shall we enable SEV? */
2853 	vcpu->vc_sev = vcp->vcp_sev;
2854 
2855 	/* Inform vmd(8) about ASID and C bit position. */
2856 	vcp->vcp_poscbit = amd64_pos_cbit;
2857 	vcp->vcp_asid[vcpu->vc_id] = vcpu->vc_vpid;
2858 
2859 exit:
2860 	if (ret)
2861 		vcpu_deinit_svm(vcpu);
2862 
2863 	return (ret);
2864 }
2865 
2866 /*
2867  * vcpu_init
2868  *
2869  * Calls the architecture-specific VCPU init routine
2870  */
2871 int
vcpu_init(struct vcpu * vcpu,struct vm_create_params * vcp)2872 vcpu_init(struct vcpu *vcpu, struct vm_create_params *vcp)
2873 {
2874 	int ret = 0;
2875 
2876 	vcpu->vc_virt_mode = vmm_softc->mode;
2877 	vcpu->vc_state = VCPU_STATE_STOPPED;
2878 	vcpu->vc_vpid = 0;
2879 	vcpu->vc_pvclock_system_gpa = 0;
2880 	vcpu->vc_last_pcpu = NULL;
2881 
2882 	rw_init(&vcpu->vc_lock, "vcpu");
2883 
2884 	/* Shadow PAT MSR, starting with host's value. */
2885 	vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT);
2886 
2887 	if (vmm_softc->mode == VMM_MODE_EPT)
2888 		ret = vcpu_init_vmx(vcpu);
2889 	else if (vmm_softc->mode == VMM_MODE_RVI)
2890 		ret = vcpu_init_svm(vcpu, vcp);
2891 	else
2892 		panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode);
2893 
2894 	return (ret);
2895 }
2896 
2897 /*
2898  * vcpu_deinit_vmx
2899  *
2900  * Deinitializes the vcpu described by 'vcpu'
2901  *
2902  * Parameters:
2903  *  vcpu: the vcpu to be deinited
2904  */
2905 void
vcpu_deinit_vmx(struct vcpu * vcpu)2906 vcpu_deinit_vmx(struct vcpu *vcpu)
2907 {
2908 	if (vcpu->vc_control_va) {
2909 		km_free((void *)vcpu->vc_control_va, PAGE_SIZE,
2910 		    &kv_page, &kp_zero);
2911 		vcpu->vc_control_va = 0;
2912 	}
2913 	if (vcpu->vc_vmx_msr_exit_save_va) {
2914 		km_free((void *)vcpu->vc_vmx_msr_exit_save_va,
2915 		    PAGE_SIZE, &kv_page, &kp_zero);
2916 		vcpu->vc_vmx_msr_exit_save_va = 0;
2917 	}
2918 	if (vcpu->vc_vmx_msr_exit_load_va) {
2919 		km_free((void *)vcpu->vc_vmx_msr_exit_load_va,
2920 		    PAGE_SIZE, &kv_page, &kp_zero);
2921 		vcpu->vc_vmx_msr_exit_load_va = 0;
2922 	}
2923 #if 0
2924 	if (vcpu->vc_vmx_msr_entry_load_va) {
2925 		km_free((void *)vcpu->vc_vmx_msr_entry_load_va,
2926 		    PAGE_SIZE, &kv_page, &kp_zero);
2927 		vcpu->vc_vmx_msr_entry_load_va = 0;
2928 	}
2929 #endif
2930 
2931 	vmm_free_vpid(vcpu->vc_vpid);
2932 }
2933 
2934 /*
2935  * vcpu_deinit_svm
2936  *
2937  * Deinitializes the vcpu described by 'vcpu'
2938  *
2939  * Parameters:
2940  *  vcpu: the vcpu to be deinited
2941  */
2942 void
vcpu_deinit_svm(struct vcpu * vcpu)2943 vcpu_deinit_svm(struct vcpu *vcpu)
2944 {
2945 	if (vcpu->vc_control_va) {
2946 		km_free((void *)vcpu->vc_control_va, PAGE_SIZE, &kv_page,
2947 		    &kp_zero);
2948 		vcpu->vc_control_va = 0;
2949 	}
2950 	if (vcpu->vc_msr_bitmap_va) {
2951 		km_free((void *)vcpu->vc_msr_bitmap_va, 2 * PAGE_SIZE, &kv_any,
2952 		    &vmm_kp_contig);
2953 		vcpu->vc_msr_bitmap_va = 0;
2954 	}
2955 	if (vcpu->vc_svm_hsa_va) {
2956 		km_free((void *)vcpu->vc_svm_hsa_va, PAGE_SIZE, &kv_page,
2957 		    &kp_zero);
2958 		vcpu->vc_svm_hsa_va = 0;
2959 	}
2960 	if (vcpu->vc_svm_ioio_va) {
2961 		km_free((void *)vcpu->vc_svm_ioio_va, 3 * PAGE_SIZE, &kv_any,
2962 		    &vmm_kp_contig);
2963 		vcpu->vc_svm_ioio_va = 0;
2964 	}
2965 
2966 	vmm_free_vpid(vcpu->vc_vpid);
2967 }
2968 
2969 /*
2970  * vcpu_deinit
2971  *
2972  * Calls the architecture-specific VCPU deinit routine
2973  *
2974  * Parameters:
2975  *  vcpu: the vcpu to be deinited
2976  */
2977 void
vcpu_deinit(struct vcpu * vcpu)2978 vcpu_deinit(struct vcpu *vcpu)
2979 {
2980 	if (vmm_softc->mode == VMM_MODE_EPT)
2981 		vcpu_deinit_vmx(vcpu);
2982 	else if	(vmm_softc->mode == VMM_MODE_RVI)
2983 		vcpu_deinit_svm(vcpu);
2984 	else
2985 		panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode);
2986 }
2987 
2988 /*
2989  * vcpu_vmx_check_cap
2990  *
2991  * Checks if the 'cap' bit in the 'msr' MSR can be set or cleared (set = 1
2992  * or set = 0, respectively).
2993  *
2994  * When considering 'msr', we check to see if true controls are available,
2995  * and use those if so.
2996  *
2997  * Returns 1 of 'cap' can be set/cleared as requested, 0 otherwise.
2998  */
2999 int
vcpu_vmx_check_cap(struct vcpu * vcpu,uint32_t msr,uint32_t cap,int set)3000 vcpu_vmx_check_cap(struct vcpu *vcpu, uint32_t msr, uint32_t cap, int set)
3001 {
3002 	uint64_t ctl;
3003 
3004 	if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL) {
3005 		switch (msr) {
3006 		case IA32_VMX_PINBASED_CTLS:
3007 			ctl = vcpu->vc_vmx_true_pinbased_ctls;
3008 			break;
3009 		case IA32_VMX_PROCBASED_CTLS:
3010 			ctl = vcpu->vc_vmx_true_procbased_ctls;
3011 			break;
3012 		case IA32_VMX_PROCBASED2_CTLS:
3013 			ctl = vcpu->vc_vmx_procbased2_ctls;
3014 			break;
3015 		case IA32_VMX_ENTRY_CTLS:
3016 			ctl = vcpu->vc_vmx_true_entry_ctls;
3017 			break;
3018 		case IA32_VMX_EXIT_CTLS:
3019 			ctl = vcpu->vc_vmx_true_exit_ctls;
3020 			break;
3021 		default:
3022 			return (0);
3023 		}
3024 	} else {
3025 		switch (msr) {
3026 		case IA32_VMX_PINBASED_CTLS:
3027 			ctl = vcpu->vc_vmx_pinbased_ctls;
3028 			break;
3029 		case IA32_VMX_PROCBASED_CTLS:
3030 			ctl = vcpu->vc_vmx_procbased_ctls;
3031 			break;
3032 		case IA32_VMX_PROCBASED2_CTLS:
3033 			ctl = vcpu->vc_vmx_procbased2_ctls;
3034 			break;
3035 		case IA32_VMX_ENTRY_CTLS:
3036 			ctl = vcpu->vc_vmx_entry_ctls;
3037 			break;
3038 		case IA32_VMX_EXIT_CTLS:
3039 			ctl = vcpu->vc_vmx_exit_ctls;
3040 			break;
3041 		default:
3042 			return (0);
3043 		}
3044 	}
3045 
3046 	if (set) {
3047 		/* Check bit 'cap << 32', must be !0 */
3048 		return (ctl & ((uint64_t)cap << 32)) != 0;
3049 	} else {
3050 		/* Check bit 'cap', must be 0 */
3051 		return (ctl & cap) == 0;
3052 	}
3053 }
3054 
3055 /*
3056  * vcpu_vmx_compute_ctrl
3057  *
3058  * Computes the appropriate control value, given the supplied parameters
3059  * and CPU capabilities.
3060  *
3061  * Intel has made somewhat of a mess of this computation - it is described
3062  * using no fewer than three different approaches, spread across many
3063  * pages of the SDM. Further compounding the problem is the fact that now
3064  * we have "true controls" for each type of "control", and each needs to
3065  * be examined to get the calculation right, but only if "true" controls
3066  * are present on the CPU we're on.
3067  *
3068  * Parameters:
3069  *  ctrlval: the control value, as read from the CPU MSR
3070  *  ctrl: which control is being set (eg, pinbased, procbased, etc)
3071  *  want0: the set of desired 0 bits
3072  *  want1: the set of desired 1 bits
3073  *  out: (out) the correct value to write into the VMCS for this VCPU,
3074  *      for the 'ctrl' desired.
3075  *
3076  * Returns 0 if successful, or EINVAL if the supplied parameters define
3077  *     an unworkable control setup.
3078  */
3079 int
vcpu_vmx_compute_ctrl(uint64_t ctrlval,uint16_t ctrl,uint32_t want1,uint32_t want0,uint32_t * out)3080 vcpu_vmx_compute_ctrl(uint64_t ctrlval, uint16_t ctrl, uint32_t want1,
3081 	uint32_t want0, uint32_t *out)
3082 {
3083 	int i, set, clear;
3084 
3085 	*out = 0;
3086 
3087 	/*
3088 	 * The Intel SDM gives three formulae for determining which bits to
3089 	 * set/clear for a given control and desired functionality. Formula
3090 	 * 1 is the simplest but disallows use of newer features that are
3091 	 * enabled by functionality in later CPUs.
3092 	 *
3093 	 * Formulas 2 and 3 allow such extra functionality. We use formula
3094 	 * 2 - this requires us to know the identity of controls in the
3095 	 * "default1" class for each control register, but allows us to not
3096 	 * have to pass along and/or query both sets of capability MSRs for
3097 	 * each control lookup. This makes the code slightly longer,
3098 	 * however.
3099 	 */
3100 	for (i = 0; i < 32; i++) {
3101 		/* Figure out if we can set and / or clear this bit */
3102 		set = (ctrlval & (1ULL << (i + 32))) != 0;
3103 		clear = ((1ULL << i) & ((uint64_t)ctrlval)) == 0;
3104 
3105 		/* If the bit can't be set nor cleared, something's wrong */
3106 		if (!set && !clear)
3107 			return (EINVAL);
3108 
3109 		/*
3110 		 * Formula 2.c.i - "If the relevant VMX capability MSR
3111 		 * reports that a control has a single setting, use that
3112 		 * setting."
3113 		 */
3114 		if (set && !clear) {
3115 			if (want0 & (1ULL << i))
3116 				return (EINVAL);
3117 			else
3118 				*out |= (1ULL << i);
3119 		} else if (clear && !set) {
3120 			if (want1 & (1ULL << i))
3121 				return (EINVAL);
3122 			else
3123 				*out &= ~(1ULL << i);
3124 		} else {
3125 			/*
3126 			 * 2.c.ii - "If the relevant VMX capability MSR
3127 			 * reports that a control can be set to 0 or 1
3128 			 * and that control's meaning is known to the VMM,
3129 			 * set the control based on the functionality desired."
3130 			 */
3131 			if (want1 & (1ULL << i))
3132 				*out |= (1ULL << i);
3133 			else if (want0 & (1 << i))
3134 				*out &= ~(1ULL << i);
3135 			else {
3136 				/*
3137 				 * ... assuming the control's meaning is not
3138 				 * known to the VMM ...
3139 				 *
3140 				 * 2.c.iii - "If the relevant VMX capability
3141 				 * MSR reports that a control can be set to 0
3142 			 	 * or 1 and the control is not in the default1
3143 				 * class, set the control to 0."
3144 				 *
3145 				 * 2.c.iv - "If the relevant VMX capability
3146 				 * MSR reports that a control can be set to 0
3147 				 * or 1 and the control is in the default1
3148 				 * class, set the control to 1."
3149 				 */
3150 				switch (ctrl) {
3151 				case IA32_VMX_PINBASED_CTLS:
3152 				case IA32_VMX_TRUE_PINBASED_CTLS:
3153 					/*
3154 					 * A.3.1 - default1 class of pinbased
3155 					 * controls comprises bits 1,2,4
3156 					 */
3157 					switch (i) {
3158 						case 1:
3159 						case 2:
3160 						case 4:
3161 							*out |= (1ULL << i);
3162 							break;
3163 						default:
3164 							*out &= ~(1ULL << i);
3165 							break;
3166 					}
3167 					break;
3168 				case IA32_VMX_PROCBASED_CTLS:
3169 				case IA32_VMX_TRUE_PROCBASED_CTLS:
3170 					/*
3171 					 * A.3.2 - default1 class of procbased
3172 					 * controls comprises bits 1, 4-6, 8,
3173 					 * 13-16, 26
3174 					 */
3175 					switch (i) {
3176 						case 1:
3177 						case 4 ... 6:
3178 						case 8:
3179 						case 13 ... 16:
3180 						case 26:
3181 							*out |= (1ULL << i);
3182 							break;
3183 						default:
3184 							*out &= ~(1ULL << i);
3185 							break;
3186 					}
3187 					break;
3188 					/*
3189 					 * Unknown secondary procbased controls
3190 					 * can always be set to 0
3191 					 */
3192 				case IA32_VMX_PROCBASED2_CTLS:
3193 					*out &= ~(1ULL << i);
3194 					break;
3195 				case IA32_VMX_EXIT_CTLS:
3196 				case IA32_VMX_TRUE_EXIT_CTLS:
3197 					/*
3198 					 * A.4 - default1 class of exit
3199 					 * controls comprises bits 0-8, 10,
3200 					 * 11, 13, 14, 16, 17
3201 					 */
3202 					switch (i) {
3203 						case 0 ... 8:
3204 						case 10 ... 11:
3205 						case 13 ... 14:
3206 						case 16 ... 17:
3207 							*out |= (1ULL << i);
3208 							break;
3209 						default:
3210 							*out &= ~(1ULL << i);
3211 							break;
3212 					}
3213 					break;
3214 				case IA32_VMX_ENTRY_CTLS:
3215 				case IA32_VMX_TRUE_ENTRY_CTLS:
3216 					/*
3217 					 * A.5 - default1 class of entry
3218 					 * controls comprises bits 0-8, 12
3219 					 */
3220 					switch (i) {
3221 						case 0 ... 8:
3222 						case 12:
3223 							*out |= (1ULL << i);
3224 							break;
3225 						default:
3226 							*out &= ~(1ULL << i);
3227 							break;
3228 					}
3229 					break;
3230 				}
3231 			}
3232 		}
3233 	}
3234 
3235 	return (0);
3236 }
3237 
3238 /*
3239  * vm_run
3240  *
3241  * Run the vm / vcpu specified by 'vrp'
3242  *
3243  * Parameters:
3244  *  vrp: structure defining the VM to run
3245  *
3246  * Return value:
3247  *  ENOENT: the VM defined in 'vrp' could not be located
3248  *  EBUSY: the VM defined in 'vrp' is already running
3249  *  EFAULT: error copying data from userspace (vmd) on return from previous
3250  *      exit.
3251  *  EAGAIN: help is needed from vmd(8) (device I/O or exit vmm(4) cannot
3252  *      handle in-kernel.)
3253  *  0: the run loop exited and no help is needed from vmd(8)
3254  */
3255 int
vm_run(struct vm_run_params * vrp)3256 vm_run(struct vm_run_params *vrp)
3257 {
3258 	struct vm *vm;
3259 	struct vcpu *vcpu;
3260 	int ret = 0;
3261 	u_int old, next;
3262 
3263 	/*
3264 	 * Find desired VM
3265 	 */
3266 	ret = vm_find(vrp->vrp_vm_id, &vm);
3267 	if (ret)
3268 		return (ret);
3269 
3270 	vcpu = vm_find_vcpu(vm, vrp->vrp_vcpu_id);
3271 	if (vcpu == NULL) {
3272 		ret = ENOENT;
3273 		goto out;
3274 	}
3275 
3276 	/*
3277 	 * Attempt to transition from VCPU_STATE_STOPPED -> VCPU_STATE_RUNNING.
3278 	 * Failure to make the transition indicates the VCPU is busy.
3279 	 */
3280 	rw_enter_write(&vcpu->vc_lock);
3281 	old = VCPU_STATE_STOPPED;
3282 	next = VCPU_STATE_RUNNING;
3283 	if (atomic_cas_uint(&vcpu->vc_state, old, next) != old) {
3284 		ret = EBUSY;
3285 		goto out_unlock;
3286 	}
3287 
3288 	/*
3289 	 * We may be returning from userland helping us from the last
3290 	 * exit. Copy in the exit data from vmd. The exit data will be
3291 	 * consumed before the next entry (this typically comprises
3292 	 * VCPU register changes as the result of vmd(8)'s actions).
3293 	 */
3294 	ret = copyin(vrp->vrp_exit, &vcpu->vc_exit, sizeof(struct vm_exit));
3295 	if (ret)
3296 		goto out_unlock;
3297 
3298 	vcpu->vc_inject.vie_type = vrp->vrp_inject.vie_type;
3299 	vcpu->vc_inject.vie_vector = vrp->vrp_inject.vie_vector;
3300 	vcpu->vc_inject.vie_errorcode = vrp->vrp_inject.vie_errorcode;
3301 
3302 	WRITE_ONCE(vcpu->vc_curcpu, curcpu());
3303 	/* Run the VCPU specified in vrp */
3304 	if (vcpu->vc_virt_mode == VMM_MODE_EPT) {
3305 		ret = vcpu_run_vmx(vcpu, vrp);
3306 	} else if (vcpu->vc_virt_mode == VMM_MODE_RVI) {
3307 		ret = vcpu_run_svm(vcpu, vrp);
3308 	}
3309 	WRITE_ONCE(vcpu->vc_curcpu, NULL);
3310 
3311 	if (ret == 0 || ret == EAGAIN) {
3312 		/* If we are exiting, populate exit data so vmd can help. */
3313 		vrp->vrp_exit_reason = (ret == 0) ? VM_EXIT_NONE
3314 		    : vcpu->vc_gueststate.vg_exit_reason;
3315 		vrp->vrp_irqready = vcpu->vc_irqready;
3316 		vcpu->vc_state = VCPU_STATE_STOPPED;
3317 
3318 		if (copyout(&vcpu->vc_exit, vrp->vrp_exit,
3319 		    sizeof(struct vm_exit)) == EFAULT) {
3320 			ret = EFAULT;
3321 		} else
3322 			ret = 0;
3323 	} else {
3324 		vrp->vrp_exit_reason = VM_EXIT_TERMINATED;
3325 		vcpu->vc_state = VCPU_STATE_TERMINATED;
3326 	}
3327 out_unlock:
3328 	rw_exit_write(&vcpu->vc_lock);
3329 out:
3330 	refcnt_rele_wake(&vm->vm_refcnt);
3331 	return (ret);
3332 }
3333 
3334 /*
3335  * vmm_fpurestore
3336  *
3337  * Restore the guest's FPU state, saving the existing userland thread's
3338  * FPU context if necessary.  Must be called with interrupts disabled.
3339  */
3340 int
vmm_fpurestore(struct vcpu * vcpu)3341 vmm_fpurestore(struct vcpu *vcpu)
3342 {
3343 	struct cpu_info *ci = curcpu();
3344 
3345 	rw_assert_wrlock(&vcpu->vc_lock);
3346 
3347 	/* save vmm's FPU state if we haven't already */
3348 	if (ci->ci_pflags & CPUPF_USERXSTATE) {
3349 		ci->ci_pflags &= ~CPUPF_USERXSTATE;
3350 		fpusavereset(&curproc->p_addr->u_pcb.pcb_savefpu);
3351 	}
3352 
3353 	if (vcpu->vc_fpuinited)
3354 		xrstor_kern(&vcpu->vc_g_fpu, xsave_mask);
3355 
3356 	if (xsave_mask) {
3357 		/* Restore guest %xcr0 */
3358 		if (xsetbv_user(0, vcpu->vc_gueststate.vg_xcr0)) {
3359 			DPRINTF("%s: guest attempted to set invalid bits in "
3360 			    "xcr0 (guest %%xcr0=0x%llx, host %%xcr0=0x%llx)\n",
3361 			    __func__, vcpu->vc_gueststate.vg_xcr0, xsave_mask);
3362 			return EINVAL;
3363 		}
3364 	}
3365 
3366 	return 0;
3367 }
3368 
3369 /*
3370  * vmm_fpusave
3371  *
3372  * Save the guest's FPU state.  Must be called with interrupts disabled.
3373  */
3374 void
vmm_fpusave(struct vcpu * vcpu)3375 vmm_fpusave(struct vcpu *vcpu)
3376 {
3377 	rw_assert_wrlock(&vcpu->vc_lock);
3378 
3379 	if (xsave_mask) {
3380 		/* Save guest %xcr0 */
3381 		vcpu->vc_gueststate.vg_xcr0 = xgetbv(0);
3382 
3383 		/* Restore host %xcr0 */
3384 		xsetbv(0, xsave_mask & XFEATURE_XCR0_MASK);
3385 	}
3386 
3387 	/*
3388 	 * Save full copy of FPU state - guest content is always
3389 	 * a subset of host's save area (see xsetbv exit handler)
3390 	 */
3391 	fpusavereset(&vcpu->vc_g_fpu);
3392 	vcpu->vc_fpuinited = 1;
3393 }
3394 
3395 /*
3396  * vmm_translate_gva
3397  *
3398  * Translates a guest virtual address to a guest physical address by walking
3399  * the currently active page table (if needed).
3400  *
3401  * Note - this function can possibly alter the supplied VCPU state.
3402  *  Specifically, it may inject exceptions depending on the current VCPU
3403  *  configuration, and may alter %cr2 on #PF. Consequently, this function
3404  *  should only be used as part of instruction emulation.
3405  *
3406  * Parameters:
3407  *  vcpu: The VCPU this translation should be performed for (guest MMU settings
3408  *   are gathered from this VCPU)
3409  *  va: virtual address to translate
3410  *  pa: pointer to paddr_t variable that will receive the translated physical
3411  *   address. 'pa' is unchanged on error.
3412  *  mode: one of PROT_READ, PROT_WRITE, PROT_EXEC indicating the mode in which
3413  *   the address should be translated
3414  *
3415  * Return values:
3416  *  0: the address was successfully translated - 'pa' contains the physical
3417  *     address currently mapped by 'va'.
3418  *  EFAULT: the PTE for 'VA' is unmapped. A #PF will be injected in this case
3419  *     and %cr2 set in the vcpu structure.
3420  *  EINVAL: an error occurred reading paging table structures
3421  */
3422 int
vmm_translate_gva(struct vcpu * vcpu,uint64_t va,uint64_t * pa,int mode)3423 vmm_translate_gva(struct vcpu *vcpu, uint64_t va, uint64_t *pa, int mode)
3424 {
3425 	int level, shift, pdidx;
3426 	uint64_t pte, pt_paddr, pte_paddr, mask, low_mask, high_mask;
3427 	uint64_t shift_width, pte_size, *hva;
3428 	paddr_t hpa;
3429 	struct vcpu_reg_state vrs;
3430 
3431 	level = 0;
3432 
3433 	if (vmm_softc->mode == VMM_MODE_EPT) {
3434 		if (vcpu_readregs_vmx(vcpu, VM_RWREGS_ALL, 1, &vrs))
3435 			return (EINVAL);
3436 	} else if (vmm_softc->mode == VMM_MODE_RVI) {
3437 		if (vcpu_readregs_svm(vcpu, VM_RWREGS_ALL, &vrs))
3438 			return (EINVAL);
3439 	} else {
3440 		printf("%s: unknown vmm mode", __func__);
3441 		return (EINVAL);
3442 	}
3443 
3444 	DPRINTF("%s: guest %%cr0=0x%llx, %%cr3=0x%llx\n", __func__,
3445 	    vrs.vrs_crs[VCPU_REGS_CR0], vrs.vrs_crs[VCPU_REGS_CR3]);
3446 
3447 	if (!(vrs.vrs_crs[VCPU_REGS_CR0] & CR0_PG)) {
3448 		DPRINTF("%s: unpaged, va=pa=0x%llx\n", __func__,
3449 		    va);
3450 		*pa = va;
3451 		return (0);
3452 	}
3453 
3454 	pt_paddr = vrs.vrs_crs[VCPU_REGS_CR3];
3455 
3456 	if (vrs.vrs_crs[VCPU_REGS_CR0] & CR0_PE) {
3457 		if (vrs.vrs_crs[VCPU_REGS_CR4] & CR4_PAE) {
3458 			pte_size = sizeof(uint64_t);
3459 			shift_width = 9;
3460 
3461 			if (vrs.vrs_msrs[VCPU_REGS_EFER] & EFER_LMA) {
3462 				level = 4;
3463 				mask = L4_MASK;
3464 				shift = L4_SHIFT;
3465 			} else {
3466 				level = 3;
3467 				mask = L3_MASK;
3468 				shift = L3_SHIFT;
3469 			}
3470 		} else {
3471 			level = 2;
3472 			shift_width = 10;
3473 			mask = 0xFFC00000;
3474 			shift = 22;
3475 			pte_size = sizeof(uint32_t);
3476 		}
3477 	} else {
3478 		return (EINVAL);
3479 	}
3480 
3481 	DPRINTF("%s: pte size=%lld level=%d mask=0x%llx, shift=%d, "
3482 	    "shift_width=%lld\n", __func__, pte_size, level, mask, shift,
3483 	    shift_width);
3484 
3485 	/* XXX: Check for R bit in segment selector and set A bit */
3486 
3487 	for (;level > 0; level--) {
3488 		pdidx = (va & mask) >> shift;
3489 		pte_paddr = (pt_paddr) + (pdidx * pte_size);
3490 
3491 		DPRINTF("%s: read pte level %d @ GPA 0x%llx\n", __func__,
3492 		    level, pte_paddr);
3493 		if (!pmap_extract(vcpu->vc_parent->vm_map->pmap, pte_paddr,
3494 		    &hpa)) {
3495 			DPRINTF("%s: cannot extract HPA for GPA 0x%llx\n",
3496 			    __func__, pte_paddr);
3497 			return (EINVAL);
3498 		}
3499 
3500 		hpa = hpa | (pte_paddr & 0xFFF);
3501 		hva = (uint64_t *)PMAP_DIRECT_MAP(hpa);
3502 		DPRINTF("%s: GPA 0x%llx -> HPA 0x%llx -> HVA 0x%llx\n",
3503 		    __func__, pte_paddr, (uint64_t)hpa, (uint64_t)hva);
3504 		if (pte_size == 8)
3505 			pte = *hva;
3506 		else
3507 			pte = *(uint32_t *)hva;
3508 
3509 		DPRINTF("%s: PTE @ 0x%llx = 0x%llx\n", __func__, pte_paddr,
3510 		    pte);
3511 
3512 		/* XXX: Set CR2  */
3513 		if (!(pte & PG_V))
3514 			return (EFAULT);
3515 
3516 		/* XXX: Check for SMAP */
3517 		if ((mode == PROT_WRITE) && !(pte & PG_RW))
3518 			return (EPERM);
3519 
3520 		if ((vcpu->vc_exit.cpl > 0) && !(pte & PG_u))
3521 			return (EPERM);
3522 
3523 		pte = pte | PG_U;
3524 		if (mode == PROT_WRITE)
3525 			pte = pte | PG_M;
3526 		*hva = pte;
3527 
3528 		/* XXX: EINVAL if in 32bit and PG_PS is 1 but CR4.PSE is 0 */
3529 		if (pte & PG_PS)
3530 			break;
3531 
3532 		if (level > 1) {
3533 			pt_paddr = pte & PG_FRAME;
3534 			shift -= shift_width;
3535 			mask = mask >> shift_width;
3536 		}
3537 	}
3538 
3539 	low_mask = ((uint64_t)1ULL << shift) - 1;
3540 	high_mask = (((uint64_t)1ULL << ((pte_size * 8) - 1)) - 1) ^ low_mask;
3541 	*pa = (pte & high_mask) | (va & low_mask);
3542 
3543 	DPRINTF("%s: final GPA for GVA 0x%llx = 0x%llx\n", __func__,
3544 	    va, *pa);
3545 
3546 	return (0);
3547 }
3548 
3549 
3550 /*
3551  * vcpu_run_vmx
3552  *
3553  * VMX main loop used to run a VCPU.
3554  *
3555  * Parameters:
3556  *  vcpu: The VCPU to run
3557  *  vrp: run parameters
3558  *
3559  * Return values:
3560  *  0: The run loop exited and no help is needed from vmd
3561  *  EAGAIN: The run loop exited and help from vmd is needed
3562  *  EINVAL: an error occurred
3563  */
3564 int
vcpu_run_vmx(struct vcpu * vcpu,struct vm_run_params * vrp)3565 vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp)
3566 {
3567 	int ret = 0, exitinfo;
3568 	struct region_descriptor gdt;
3569 	struct cpu_info *ci = NULL;
3570 	uint64_t exit_reason, cr3, msr, insn_error;
3571 	struct schedstate_percpu *spc;
3572 	struct vmx_msr_store *msr_store;
3573 	struct vmx_invvpid_descriptor vid;
3574 	struct vmx_invept_descriptor vid_ept;
3575 	uint64_t cr0, eii, procbased, int_st;
3576 	u_long s;
3577 
3578 	rw_assert_wrlock(&vcpu->vc_lock);
3579 
3580 	if (vcpu_reload_vmcs_vmx(vcpu)) {
3581 		printf("%s: failed (re)loading vmcs\n", __func__);
3582 		return (EINVAL);
3583 	}
3584 
3585 	/*
3586 	 * If we are returning from userspace (vmd) because we exited
3587 	 * last time, fix up any needed vcpu state first. Which state
3588 	 * needs to be fixed up depends on what vmd populated in the
3589 	 * exit data structure.
3590 	 */
3591 	if (vrp->vrp_intr_pending)
3592 		vcpu->vc_intr = 1;
3593 	else
3594 		vcpu->vc_intr = 0;
3595 
3596 	switch (vcpu->vc_gueststate.vg_exit_reason) {
3597 	case VMX_EXIT_IO:
3598 		if (vcpu->vc_exit.vei.vei_dir == VEI_DIR_IN)
3599 			vcpu->vc_gueststate.vg_rax = vcpu->vc_exit.vei.vei_data;
3600 		vcpu->vc_gueststate.vg_rip =
3601 		    vcpu->vc_exit.vrs.vrs_gprs[VCPU_REGS_RIP];
3602 		if (vmwrite(VMCS_GUEST_IA32_RIP, vcpu->vc_gueststate.vg_rip)) {
3603 			printf("%s: failed to update rip\n", __func__);
3604 			return (EINVAL);
3605 		}
3606 		break;
3607 	case VMX_EXIT_EPT_VIOLATION:
3608 		ret = vcpu_writeregs_vmx(vcpu, VM_RWREGS_GPRS, 0,
3609 		    &vcpu->vc_exit.vrs);
3610 		if (ret) {
3611 			printf("%s: vm %d vcpu %d failed to update registers\n",
3612 			    __func__, vcpu->vc_parent->vm_id, vcpu->vc_id);
3613 			return (EINVAL);
3614 		}
3615 		break;
3616 	}
3617 	memset(&vcpu->vc_exit, 0, sizeof(vcpu->vc_exit));
3618 
3619 	/* Handle vmd(8) injected interrupts */
3620 	/* Is there an interrupt pending injection? */
3621 	if (vcpu->vc_inject.vie_type == VCPU_INJECT_INTR) {
3622 		if (vmread(VMCS_GUEST_INTERRUPTIBILITY_ST, &int_st)) {
3623 			printf("%s: can't get interruptibility state\n",
3624 			    __func__);
3625 			return (EINVAL);
3626 		}
3627 
3628 		/* Interruptibility state 0x3 covers NMIs and STI */
3629 		if (!(int_st & 0x3) && vcpu->vc_irqready) {
3630 			eii = (uint64_t)vcpu->vc_inject.vie_vector;
3631 			eii |= (1ULL << 31);	/* Valid */
3632 			if (vmwrite(VMCS_ENTRY_INTERRUPTION_INFO, eii)) {
3633 				printf("vcpu_run_vmx: can't vector "
3634 				    "interrupt to guest\n");
3635 				return (EINVAL);
3636 			}
3637 
3638 			vcpu->vc_inject.vie_type = VCPU_INJECT_NONE;
3639 		}
3640 	} else if (!vcpu->vc_intr) {
3641 		/*
3642 		 * Disable window exiting
3643 		 */
3644 		if (vmread(VMCS_PROCBASED_CTLS, &procbased)) {
3645 			printf("%s: can't read procbased ctls on exit\n",
3646 			    __func__);
3647 			return (EINVAL);
3648 		} else {
3649 			procbased &= ~IA32_VMX_INTERRUPT_WINDOW_EXITING;
3650 			if (vmwrite(VMCS_PROCBASED_CTLS, procbased)) {
3651 				printf("%s: can't write procbased ctls "
3652 				    "on exit\n", __func__);
3653 				return (EINVAL);
3654 			}
3655 		}
3656 	}
3657 
3658 	msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_load_va;
3659 	while (ret == 0) {
3660 #ifdef VMM_DEBUG
3661 		paddr_t pa = 0ULL;
3662 		vmptrst(&pa);
3663 		KASSERT(pa == vcpu->vc_control_pa);
3664 #endif /* VMM_DEBUG */
3665 
3666 		vmm_update_pvclock(vcpu);
3667 
3668 		if (ci != curcpu()) {
3669 			ci = curcpu();
3670 			vcpu->vc_last_pcpu = ci;
3671 
3672 			/* We're now using this vcpu's EPT pmap on this cpu. */
3673 			atomic_swap_ptr(&ci->ci_ept_pmap,
3674 			    vcpu->vc_parent->vm_map->pmap);
3675 
3676 			/* Invalidate EPT cache. */
3677 			vid_ept.vid_reserved = 0;
3678 			vid_ept.vid_eptp = vcpu->vc_parent->vm_map->pmap->eptp;
3679 			if (invept(ci->ci_vmm_cap.vcc_vmx.vmx_invept_mode,
3680 			    &vid_ept)) {
3681 				printf("%s: invept\n", __func__);
3682 				return (EINVAL);
3683 			}
3684 
3685 			/* Host CR3 */
3686 			cr3 = rcr3();
3687 			if (vmwrite(VMCS_HOST_IA32_CR3, cr3)) {
3688 				printf("%s: vmwrite(0x%04X, 0x%llx)\n", __func__,
3689 				    VMCS_HOST_IA32_CR3, cr3);
3690 				return (EINVAL);
3691 			}
3692 
3693 			setregion(&gdt, ci->ci_gdt, GDT_SIZE - 1);
3694 			if (gdt.rd_base == 0) {
3695 				printf("%s: setregion\n", __func__);
3696 				return (EINVAL);
3697 			}
3698 
3699 			/* Host GDTR base */
3700 			if (vmwrite(VMCS_HOST_IA32_GDTR_BASE, gdt.rd_base)) {
3701 				printf("%s: vmwrite(0x%04X, 0x%llx)\n",
3702 				    __func__, VMCS_HOST_IA32_GDTR_BASE,
3703 				    gdt.rd_base);
3704 				return (EINVAL);
3705 			}
3706 
3707 			/* Host TR base */
3708 			if (vmwrite(VMCS_HOST_IA32_TR_BASE,
3709 			    (uint64_t)ci->ci_tss)) {
3710 				printf("%s: vmwrite(0x%04X, 0x%llx)\n",
3711 				    __func__, VMCS_HOST_IA32_TR_BASE,
3712 				    (uint64_t)ci->ci_tss);
3713 				return (EINVAL);
3714 			}
3715 
3716 			/* Host GS.base (aka curcpu) */
3717 			if (vmwrite(VMCS_HOST_IA32_GS_BASE, (uint64_t)ci)) {
3718 				printf("%s: vmwrite(0x%04X, 0x%llx)\n",
3719 				    __func__, VMCS_HOST_IA32_GS_BASE,
3720 				    (uint64_t)ci);
3721 				return (EINVAL);
3722 			}
3723 
3724 			/* Host FS.base */
3725 			msr = rdmsr(MSR_FSBASE);
3726 			if (vmwrite(VMCS_HOST_IA32_FS_BASE, msr)) {
3727 				printf("%s: vmwrite(0x%04X, 0x%llx)\n",
3728 				    __func__, VMCS_HOST_IA32_FS_BASE, msr);
3729 				return (EINVAL);
3730 			}
3731 
3732 			/* Host KernelGS.base (userspace GS.base here) */
3733 			msr_store[VCPU_HOST_REGS_KGSBASE].vms_data =
3734 			    rdmsr(MSR_KERNELGSBASE);
3735 		}
3736 
3737 		/* Inject event if present */
3738 		if (vcpu->vc_inject.vie_type == VCPU_INJECT_EX) {
3739 			eii = (uint64_t)vcpu->vc_inject.vie_vector;
3740 			eii |= (1ULL << 31);	/* Valid */
3741 
3742 			switch (vcpu->vc_inject.vie_vector) {
3743 			case VMM_EX_BP:
3744 			case VMM_EX_OF:
3745 				/* Software Exceptions */
3746 				eii |= (4ULL << 8);
3747 				break;
3748 			case VMM_EX_DF:
3749 			case VMM_EX_TS:
3750 			case VMM_EX_NP:
3751 			case VMM_EX_SS:
3752 			case VMM_EX_GP:
3753 			case VMM_EX_PF:
3754 			case VMM_EX_AC:
3755 				/* Hardware Exceptions */
3756 				eii |= (3ULL << 8);
3757 				cr0 = 0;
3758 				if (vmread(VMCS_GUEST_IA32_CR0, &cr0)) {
3759 					printf("%s: vmread(VMCS_GUEST_IA32_CR0)"
3760 					    "\n", __func__);
3761 					ret = EINVAL;
3762 					break;
3763 				}
3764 
3765 				/* Don't set error codes if in real mode. */
3766 				if (ret == EINVAL || !(cr0 & CR0_PE))
3767 					break;
3768 				eii |= (1ULL << 11);
3769 
3770 				/* Enforce a 0 error code for #AC. */
3771 				if (vcpu->vc_inject.vie_vector == VMM_EX_AC)
3772 					vcpu->vc_inject.vie_errorcode = 0;
3773 				/*
3774 				 * XXX: Intel SDM says if IA32_VMX_BASIC[56] is
3775 				 * set, error codes can be injected for hw
3776 				 * exceptions with or without error code,
3777 				 * regardless of vector. See Vol 3D. A1. Ignore
3778 				 * this capability for now.
3779 				 */
3780 				if (vmwrite(VMCS_ENTRY_EXCEPTION_ERROR_CODE,
3781 				    vcpu->vc_inject.vie_errorcode)) {
3782 					printf("%s: can't write error code to "
3783 					    "guest\n", __func__);
3784 					ret = EINVAL;
3785 				}
3786 			} /* switch */
3787 			if (ret == EINVAL)
3788 				break;
3789 
3790 			if (vmwrite(VMCS_ENTRY_INTERRUPTION_INFO, eii)) {
3791 				printf("%s: can't vector event to guest\n",
3792 				    __func__);
3793 				ret = EINVAL;
3794 				break;
3795 			}
3796 			vcpu->vc_inject.vie_type = VCPU_INJECT_NONE;
3797 		}
3798 
3799 		if (vcpu->vc_vmx_vpid_enabled) {
3800 			/* Invalidate old TLB mappings */
3801 			vid.vid_vpid = vcpu->vc_vpid;
3802 			vid.vid_addr = 0;
3803 			invvpid(IA32_VMX_INVVPID_SINGLE_CTX_GLB, &vid);
3804 		}
3805 
3806 		/* Start / resume the VCPU */
3807 
3808 		/* Disable interrupts and save the current host FPU state. */
3809 		s = intr_disable();
3810 		if ((ret = vmm_fpurestore(vcpu))) {
3811 			intr_restore(s);
3812 			break;
3813 		}
3814 
3815 		TRACEPOINT(vmm, guest_enter, vcpu, vrp);
3816 
3817 		/*
3818 		 * If we're resuming to a different VCPU and have IBPB,
3819 		 * then use it to prevent cross-VM branch-target injection.
3820 		 */
3821 		if (ci->ci_guest_vcpu != vcpu &&
3822 		    (ci->ci_feature_sefflags_edx & SEFF0EDX_IBRS)) {
3823 			wrmsr(MSR_PRED_CMD, PRED_CMD_IBPB);
3824 			ci->ci_guest_vcpu = vcpu;
3825 		}
3826 
3827 		/* Restore any guest PKRU state. */
3828 		if (vmm_softc->sc_md.pkru_enabled)
3829 			wrpkru(0, vcpu->vc_pkru);
3830 
3831 		ret = vmx_enter_guest(&vcpu->vc_control_pa,
3832 		    &vcpu->vc_gueststate,
3833 		    (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED),
3834 		    ci->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr);
3835 
3836 		/* Restore host PKRU state. */
3837 		if (vmm_softc->sc_md.pkru_enabled) {
3838 			vcpu->vc_pkru = rdpkru(0);
3839 			wrpkru(0, PGK_VALUE);
3840 		}
3841 
3842 		/*
3843 		 * VM exit restores the GDT and IDT bases, but gives
3844 		 * them high limits.  Reload with the correct limits here.
3845 		 * 'gdt' is set above first time through and reset there
3846 		 * whenever this thread switches CPU.
3847 		 */
3848 		bare_lgdt(&gdt);
3849 		cpu_init_idt();
3850 
3851 		/*
3852 		 * On exit, interrupts are disabled, and we are running with
3853 		 * the guest FPU state still possibly on the CPU. Save the FPU
3854 		 * state before re-enabling interrupts.
3855 		 */
3856 		vmm_fpusave(vcpu);
3857 		intr_restore(s);
3858 
3859 		atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_LAUNCHED);
3860 		exit_reason = VM_EXIT_NONE;
3861 
3862 		/* If we exited successfully ... */
3863 		if (ret == 0) {
3864 			exitinfo = vmx_get_exit_info(
3865 			    &vcpu->vc_gueststate.vg_rip, &exit_reason);
3866 			if (!(exitinfo & VMX_EXIT_INFO_HAVE_RIP)) {
3867 				printf("%s: cannot read guest rip\n", __func__);
3868 				ret = EINVAL;
3869 				break;
3870 			}
3871 			if (!(exitinfo & VMX_EXIT_INFO_HAVE_REASON)) {
3872 				printf("%s: can't read exit reason\n",
3873 				    __func__);
3874 				ret = EINVAL;
3875 				break;
3876 			}
3877 			vcpu->vc_gueststate.vg_exit_reason = exit_reason;
3878 			TRACEPOINT(vmm, guest_exit, vcpu, vrp, exit_reason);
3879 
3880 			/* Update our state */
3881 			if (vmread(VMCS_GUEST_IA32_RFLAGS,
3882 			    &vcpu->vc_gueststate.vg_rflags)) {
3883 				printf("%s: can't read guest rflags during "
3884 				    "exit\n", __func__);
3885 				ret = EINVAL;
3886 				break;
3887                         }
3888 
3889 			/*
3890 			 * Handle the exit. This will alter "ret" to EAGAIN if
3891 			 * the exit handler determines help from vmd is needed.
3892 			 */
3893 			ret = vmx_handle_exit(vcpu);
3894 
3895 			if (vcpu->vc_gueststate.vg_rflags & PSL_I)
3896 				vcpu->vc_irqready = 1;
3897 			else
3898 				vcpu->vc_irqready = 0;
3899 
3900 			/*
3901 			 * If not ready for interrupts, but interrupts pending,
3902 			 * enable interrupt window exiting.
3903 			 */
3904 			if (vcpu->vc_irqready == 0 && vcpu->vc_intr) {
3905 				if (vmread(VMCS_PROCBASED_CTLS, &procbased)) {
3906 					printf("%s: can't read procbased ctls "
3907 					    "on intwin exit\n", __func__);
3908 					ret = EINVAL;
3909 					break;
3910 				}
3911 
3912 				procbased |= IA32_VMX_INTERRUPT_WINDOW_EXITING;
3913 				if (vmwrite(VMCS_PROCBASED_CTLS, procbased)) {
3914 					printf("%s: can't write procbased ctls "
3915 					    "on intwin exit\n", __func__);
3916 					ret = EINVAL;
3917 					break;
3918 				}
3919 			}
3920 
3921 			/*
3922 			 * Exit to vmd if we are terminating, failed to enter,
3923 			 * or need help (device I/O)
3924 			 */
3925 			if (ret || vcpu_must_stop(vcpu))
3926 				break;
3927 
3928 			if (vcpu->vc_intr && vcpu->vc_irqready) {
3929 				ret = EAGAIN;
3930 				break;
3931 			}
3932 
3933 			/* Check if we should yield - don't hog the {p,v}pu */
3934 			spc = &ci->ci_schedstate;
3935 			if (spc->spc_schedflags & SPCF_SHOULDYIELD)
3936 				break;
3937 
3938 		} else {
3939 			/*
3940 			 * We failed vmresume or vmlaunch for some reason,
3941 			 * typically due to invalid vmcs state or other
3942 			 * reasons documented in SDM Vol 3C 30.4.
3943 			 */
3944 			switch (ret) {
3945 			case VMX_FAIL_LAUNCH_INVALID_VMCS:
3946 				printf("%s: failed %s with invalid vmcs\n",
3947 				    __func__,
3948 				    (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED
3949 					? "vmresume" : "vmlaunch"));
3950 				break;
3951 			case VMX_FAIL_LAUNCH_VALID_VMCS:
3952 				printf("%s: failed %s with valid vmcs\n",
3953 				    __func__,
3954 				    (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED
3955 					? "vmresume" : "vmlaunch"));
3956 				break;
3957 			default:
3958 				printf("%s: failed %s for unknown reason\n",
3959 				    __func__,
3960 				    (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED
3961 					? "vmresume" : "vmlaunch"));
3962 			}
3963 
3964 			ret = EINVAL;
3965 
3966 			/* Try to translate a vmfail error code, if possible. */
3967 			if (vmread(VMCS_INSTRUCTION_ERROR, &insn_error)) {
3968 				printf("%s: can't read insn error field\n",
3969 				    __func__);
3970 			} else
3971 				printf("%s: error code = %lld, %s\n", __func__,
3972 				    insn_error,
3973 				    vmx_instruction_error_decode(insn_error));
3974 #ifdef VMM_DEBUG
3975 			vmx_vcpu_dump_regs(vcpu);
3976 			dump_vcpu(vcpu);
3977 #endif /* VMM_DEBUG */
3978 		}
3979 	}
3980 
3981 	vcpu->vc_last_pcpu = curcpu();
3982 
3983 	/* Copy the VCPU register state to the exit structure */
3984 	if (vcpu_readregs_vmx(vcpu, VM_RWREGS_ALL, 0, &vcpu->vc_exit.vrs))
3985 		ret = EINVAL;
3986 	vcpu->vc_exit.cpl = vmm_get_guest_cpu_cpl(vcpu);
3987 
3988 	return (ret);
3989 }
3990 
3991 /*
3992  * vmx_handle_intr
3993  *
3994  * Handle host (external) interrupts. We read which interrupt fired by
3995  * extracting the vector from the VMCS and dispatch the interrupt directly
3996  * to the host using vmm_dispatch_intr.
3997  */
3998 void
vmx_handle_intr(struct vcpu * vcpu)3999 vmx_handle_intr(struct vcpu *vcpu)
4000 {
4001 	uint8_t vec;
4002 	uint64_t eii;
4003 	struct gate_descriptor *idte;
4004 	vaddr_t handler;
4005 
4006 	if (vmread(VMCS_EXIT_INTERRUPTION_INFO, &eii)) {
4007 		printf("%s: can't obtain intr info\n", __func__);
4008 		return;
4009 	}
4010 
4011 	vec = eii & 0xFF;
4012 
4013 	/* XXX check "error valid" code in eii, abort if 0 */
4014 	idte=&idt[vec];
4015 	handler = idte->gd_looffset + ((uint64_t)idte->gd_hioffset << 16);
4016 	vmm_dispatch_intr(handler);
4017 }
4018 
4019 /*
4020  * svm_handle_hlt
4021  *
4022  * Handle HLT exits
4023  *
4024  * Parameters
4025  *  vcpu: The VCPU that executed the HLT instruction
4026  *
4027  * Return Values:
4028  *  EIO: The guest halted with interrupts disabled
4029  *  EAGAIN: Normal return to vmd - vmd should halt scheduling this VCPU
4030  *   until a virtual interrupt is ready to inject
4031  */
4032 int
svm_handle_hlt(struct vcpu * vcpu)4033 svm_handle_hlt(struct vcpu *vcpu)
4034 {
4035 	struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
4036 	uint64_t rflags = vmcb->v_rflags;
4037 
4038 	/* All HLT insns are 1 byte */
4039 	vcpu->vc_gueststate.vg_rip += 1;
4040 
4041 	if (!(rflags & PSL_I)) {
4042 		DPRINTF("%s: guest halted with interrupts disabled\n",
4043 		    __func__);
4044 		return (EIO);
4045 	}
4046 
4047 	return (EAGAIN);
4048 }
4049 
4050 /*
4051  * vmx_handle_hlt
4052  *
4053  * Handle HLT exits. HLTing the CPU with interrupts disabled will terminate
4054  * the guest (no NMIs handled) by returning EIO to vmd.
4055  *
4056  * Parameters:
4057  *  vcpu: The VCPU that executed the HLT instruction
4058  *
4059  * Return Values:
4060  *  EINVAL: An error occurred extracting information from the VMCS, or an
4061  *   invalid HLT instruction was encountered
4062  *  EIO: The guest halted with interrupts disabled
4063  *  EAGAIN: Normal return to vmd - vmd should halt scheduling this VCPU
4064  *   until a virtual interrupt is ready to inject
4065  *
4066  */
4067 int
vmx_handle_hlt(struct vcpu * vcpu)4068 vmx_handle_hlt(struct vcpu *vcpu)
4069 {
4070 	uint64_t insn_length, rflags;
4071 
4072 	if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) {
4073 		printf("%s: can't obtain instruction length\n", __func__);
4074 		return (EINVAL);
4075 	}
4076 
4077 	if (vmread(VMCS_GUEST_IA32_RFLAGS, &rflags)) {
4078 		printf("%s: can't obtain guest rflags\n", __func__);
4079 		return (EINVAL);
4080 	}
4081 
4082 	if (insn_length != 1) {
4083 		DPRINTF("%s: HLT with instruction length %lld not supported\n",
4084 		    __func__, insn_length);
4085 		return (EINVAL);
4086 	}
4087 
4088 	if (!(rflags & PSL_I)) {
4089 		DPRINTF("%s: guest halted with interrupts disabled\n",
4090 		    __func__);
4091 		return (EIO);
4092 	}
4093 
4094 	vcpu->vc_gueststate.vg_rip += insn_length;
4095 	return (EAGAIN);
4096 }
4097 
4098 /*
4099  * vmx_get_exit_info
4100  *
4101  * Returns exit information containing the current guest RIP and exit reason
4102  * in rip and exit_reason. The return value is a bitmask indicating whether
4103  * reading the RIP and exit reason was successful.
4104  */
4105 int
vmx_get_exit_info(uint64_t * rip,uint64_t * exit_reason)4106 vmx_get_exit_info(uint64_t *rip, uint64_t *exit_reason)
4107 {
4108 	int rv = 0;
4109 
4110 	if (vmread(VMCS_GUEST_IA32_RIP, rip) == 0) {
4111 		rv |= VMX_EXIT_INFO_HAVE_RIP;
4112 		if (vmread(VMCS_EXIT_REASON, exit_reason) == 0)
4113 			rv |= VMX_EXIT_INFO_HAVE_REASON;
4114 	}
4115 	return (rv);
4116 }
4117 
4118 /*
4119  * svm_handle_exit
4120  *
4121  * Handle exits from the VM by decoding the exit reason and calling various
4122  * subhandlers as needed.
4123  */
4124 int
svm_handle_exit(struct vcpu * vcpu)4125 svm_handle_exit(struct vcpu *vcpu)
4126 {
4127 	uint64_t exit_reason, rflags;
4128 	int update_rip, ret = 0;
4129 	struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
4130 
4131 	update_rip = 0;
4132 	exit_reason = vcpu->vc_gueststate.vg_exit_reason;
4133 	rflags = vcpu->vc_gueststate.vg_rflags;
4134 
4135 	switch (exit_reason) {
4136 	case SVM_VMEXIT_VINTR:
4137 		if (!(rflags & PSL_I)) {
4138 			DPRINTF("%s: impossible interrupt window exit "
4139 			    "config\n", __func__);
4140 			ret = EINVAL;
4141 			break;
4142 		}
4143 
4144 		/*
4145 		 * Guest is now ready for interrupts, so disable interrupt
4146 		 * window exiting.
4147 		 */
4148 		vmcb->v_irq = 0;
4149 		vmcb->v_intr_vector = 0;
4150 		vmcb->v_intercept1 &= ~SVM_INTERCEPT_VINTR;
4151 		svm_set_dirty(vcpu, SVM_CLEANBITS_TPR | SVM_CLEANBITS_I);
4152 
4153 		update_rip = 0;
4154 		break;
4155 	case SVM_VMEXIT_INTR:
4156 		update_rip = 0;
4157 		break;
4158 	case SVM_VMEXIT_SHUTDOWN:
4159 		update_rip = 0;
4160 		ret = EAGAIN;
4161 		break;
4162 	case SVM_VMEXIT_NPF:
4163 		ret = svm_handle_np_fault(vcpu);
4164 		break;
4165 	case SVM_VMEXIT_CPUID:
4166 		ret = vmm_handle_cpuid(vcpu);
4167 		update_rip = 1;
4168 		break;
4169 	case SVM_VMEXIT_MSR:
4170 		ret = svm_handle_msr(vcpu);
4171 		update_rip = 1;
4172 		break;
4173 	case SVM_VMEXIT_XSETBV:
4174 		ret = svm_handle_xsetbv(vcpu);
4175 		update_rip = 1;
4176 		break;
4177 	case SVM_VMEXIT_IOIO:
4178 		if (svm_handle_inout(vcpu) == 0)
4179 			ret = EAGAIN;
4180 		break;
4181 	case SVM_VMEXIT_HLT:
4182 		ret = svm_handle_hlt(vcpu);
4183 		update_rip = 1;
4184 		break;
4185 	case SVM_VMEXIT_MWAIT:
4186 	case SVM_VMEXIT_MWAIT_CONDITIONAL:
4187 	case SVM_VMEXIT_MONITOR:
4188 	case SVM_VMEXIT_VMRUN:
4189 	case SVM_VMEXIT_VMMCALL:
4190 	case SVM_VMEXIT_VMLOAD:
4191 	case SVM_VMEXIT_VMSAVE:
4192 	case SVM_VMEXIT_STGI:
4193 	case SVM_VMEXIT_CLGI:
4194 	case SVM_VMEXIT_SKINIT:
4195 	case SVM_VMEXIT_RDTSCP:
4196 	case SVM_VMEXIT_ICEBP:
4197 	case SVM_VMEXIT_INVLPGA:
4198 		ret = vmm_inject_ud(vcpu);
4199 		update_rip = 0;
4200 		break;
4201 	default:
4202 		DPRINTF("%s: unhandled exit 0x%llx (pa=0x%llx)\n", __func__,
4203 		    exit_reason, (uint64_t)vcpu->vc_control_pa);
4204 		return (EINVAL);
4205 	}
4206 
4207 	if (update_rip) {
4208 		vmcb->v_rip = vcpu->vc_gueststate.vg_rip;
4209 
4210 		if (rflags & PSL_T) {
4211 			if (vmm_inject_db(vcpu)) {
4212 				printf("%s: can't inject #DB exception to "
4213 				    "guest", __func__);
4214 				return (EINVAL);
4215 			}
4216 		}
4217 	}
4218 
4219 	/* Enable SVME in EFER (must always be set) */
4220 	vmcb->v_efer |= EFER_SVME;
4221 	svm_set_dirty(vcpu, SVM_CLEANBITS_CR);
4222 
4223 	return (ret);
4224 }
4225 
4226 /*
4227  * vmx_handle_exit
4228  *
4229  * Handle exits from the VM by decoding the exit reason and calling various
4230  * subhandlers as needed.
4231  */
4232 int
vmx_handle_exit(struct vcpu * vcpu)4233 vmx_handle_exit(struct vcpu *vcpu)
4234 {
4235 	uint64_t exit_reason, rflags, istate;
4236 	int update_rip, ret = 0;
4237 
4238 	update_rip = 0;
4239 	exit_reason = vcpu->vc_gueststate.vg_exit_reason;
4240 	rflags = vcpu->vc_gueststate.vg_rflags;
4241 
4242 	switch (exit_reason) {
4243 	case VMX_EXIT_INT_WINDOW:
4244 		if (!(rflags & PSL_I)) {
4245 			DPRINTF("%s: impossible interrupt window exit "
4246 			    "config\n", __func__);
4247 			ret = EINVAL;
4248 			break;
4249 		}
4250 
4251 		ret = EAGAIN;
4252 		update_rip = 0;
4253 		break;
4254 	case VMX_EXIT_EPT_VIOLATION:
4255 		ret = vmx_handle_np_fault(vcpu);
4256 		break;
4257 	case VMX_EXIT_CPUID:
4258 		ret = vmm_handle_cpuid(vcpu);
4259 		update_rip = 1;
4260 		break;
4261 	case VMX_EXIT_IO:
4262 		if (vmx_handle_inout(vcpu) == 0)
4263 			ret = EAGAIN;
4264 		break;
4265 	case VMX_EXIT_EXTINT:
4266 		vmx_handle_intr(vcpu);
4267 		update_rip = 0;
4268 		break;
4269 	case VMX_EXIT_CR_ACCESS:
4270 		ret = vmx_handle_cr(vcpu);
4271 		update_rip = 1;
4272 		break;
4273 	case VMX_EXIT_HLT:
4274 		ret = vmx_handle_hlt(vcpu);
4275 		update_rip = 1;
4276 		break;
4277 	case VMX_EXIT_RDMSR:
4278 		ret = vmx_handle_rdmsr(vcpu);
4279 		update_rip = 1;
4280 		break;
4281 	case VMX_EXIT_WRMSR:
4282 		ret = vmx_handle_wrmsr(vcpu);
4283 		update_rip = 1;
4284 		break;
4285 	case VMX_EXIT_XSETBV:
4286 		ret = vmx_handle_xsetbv(vcpu);
4287 		update_rip = 1;
4288 		break;
4289 	case VMX_EXIT_MWAIT:
4290 	case VMX_EXIT_MONITOR:
4291 	case VMX_EXIT_VMXON:
4292 	case VMX_EXIT_VMWRITE:
4293 	case VMX_EXIT_VMREAD:
4294 	case VMX_EXIT_VMLAUNCH:
4295 	case VMX_EXIT_VMRESUME:
4296 	case VMX_EXIT_VMPTRLD:
4297 	case VMX_EXIT_VMPTRST:
4298 	case VMX_EXIT_VMCLEAR:
4299 	case VMX_EXIT_VMCALL:
4300 	case VMX_EXIT_VMFUNC:
4301 	case VMX_EXIT_VMXOFF:
4302 	case VMX_EXIT_INVVPID:
4303 	case VMX_EXIT_INVEPT:
4304 		ret = vmm_inject_ud(vcpu);
4305 		update_rip = 0;
4306 		break;
4307 	case VMX_EXIT_TRIPLE_FAULT:
4308 #ifdef VMM_DEBUG
4309 		DPRINTF("%s: vm %d vcpu %d triple fault\n", __func__,
4310 		    vcpu->vc_parent->vm_id, vcpu->vc_id);
4311 		vmx_vcpu_dump_regs(vcpu);
4312 		dump_vcpu(vcpu);
4313 		vmx_dump_vmcs(vcpu);
4314 #endif /* VMM_DEBUG */
4315 		ret = EAGAIN;
4316 		update_rip = 0;
4317 		break;
4318 	default:
4319 #ifdef VMM_DEBUG
4320 		DPRINTF("%s: unhandled exit 0x%llx (%s)\n", __func__,
4321 		    exit_reason, vmx_exit_reason_decode(exit_reason));
4322 #endif /* VMM_DEBUG */
4323 		return (EINVAL);
4324 	}
4325 
4326 	if (update_rip) {
4327 		if (vmwrite(VMCS_GUEST_IA32_RIP,
4328 		    vcpu->vc_gueststate.vg_rip)) {
4329 			printf("%s: can't advance rip\n", __func__);
4330 			return (EINVAL);
4331 		}
4332 
4333 		if (vmread(VMCS_GUEST_INTERRUPTIBILITY_ST,
4334 		    &istate)) {
4335 			printf("%s: can't read interruptibility state\n",
4336 			    __func__);
4337 			return (EINVAL);
4338 		}
4339 
4340 		/* Interruptibility state 0x3 covers NMIs and STI */
4341 		istate &= ~0x3;
4342 
4343 		if (vmwrite(VMCS_GUEST_INTERRUPTIBILITY_ST,
4344 		    istate)) {
4345 			printf("%s: can't write interruptibility state\n",
4346 			    __func__);
4347 			return (EINVAL);
4348 		}
4349 
4350 		if (rflags & PSL_T) {
4351 			if (vmm_inject_db(vcpu)) {
4352 				printf("%s: can't inject #DB exception to "
4353 				    "guest", __func__);
4354 				return (EINVAL);
4355 			}
4356 		}
4357 	}
4358 
4359 	return (ret);
4360 }
4361 
4362 /*
4363  * vmm_inject_gp
4364  *
4365  * Injects an #GP exception into the guest VCPU.
4366  *
4367  * Parameters:
4368  *  vcpu: vcpu to inject into
4369  *
4370  * Return values:
4371  *  Always 0
4372  */
4373 int
vmm_inject_gp(struct vcpu * vcpu)4374 vmm_inject_gp(struct vcpu *vcpu)
4375 {
4376 	DPRINTF("%s: injecting #GP at guest %%rip 0x%llx\n", __func__,
4377 	    vcpu->vc_gueststate.vg_rip);
4378 	vcpu->vc_inject.vie_vector = VMM_EX_GP;
4379 	vcpu->vc_inject.vie_type = VCPU_INJECT_EX;
4380 	vcpu->vc_inject.vie_errorcode = 0;
4381 
4382 	return (0);
4383 }
4384 
4385 /*
4386  * vmm_inject_ud
4387  *
4388  * Injects an #UD exception into the guest VCPU.
4389  *
4390  * Parameters:
4391  *  vcpu: vcpu to inject into
4392  *
4393  * Return values:
4394  *  Always 0
4395  */
4396 int
vmm_inject_ud(struct vcpu * vcpu)4397 vmm_inject_ud(struct vcpu *vcpu)
4398 {
4399 	DPRINTF("%s: injecting #UD at guest %%rip 0x%llx\n", __func__,
4400 	    vcpu->vc_gueststate.vg_rip);
4401 	vcpu->vc_inject.vie_vector = VMM_EX_UD;
4402 	vcpu->vc_inject.vie_type = VCPU_INJECT_EX;
4403 	vcpu->vc_inject.vie_errorcode = 0;
4404 
4405 	return (0);
4406 }
4407 
4408 /*
4409  * vmm_inject_db
4410  *
4411  * Injects a #DB exception into the guest VCPU.
4412  *
4413  * Parameters:
4414  *  vcpu: vcpu to inject into
4415  *
4416  * Return values:
4417  *  Always 0
4418  */
4419 int
vmm_inject_db(struct vcpu * vcpu)4420 vmm_inject_db(struct vcpu *vcpu)
4421 {
4422 	DPRINTF("%s: injecting #DB at guest %%rip 0x%llx\n", __func__,
4423 	    vcpu->vc_gueststate.vg_rip);
4424 	vcpu->vc_inject.vie_vector = VMM_EX_DB;
4425 	vcpu->vc_inject.vie_type = VCPU_INJECT_EX;
4426 	vcpu->vc_inject.vie_errorcode = 0;
4427 
4428 	return (0);
4429 }
4430 
4431 /*
4432  * vmm_get_guest_memtype
4433  *
4434  * Returns the type of memory 'gpa' refers to in the context of vm 'vm'
4435  */
4436 int
vmm_get_guest_memtype(struct vm * vm,paddr_t gpa)4437 vmm_get_guest_memtype(struct vm *vm, paddr_t gpa)
4438 {
4439 	int i;
4440 	struct vm_mem_range *vmr;
4441 
4442 	/* XXX Use binary search? */
4443 	for (i = 0; i < vm->vm_nmemranges; i++) {
4444 		vmr = &vm->vm_memranges[i];
4445 
4446 		/*
4447 		 * vm_memranges are ascending. gpa can no longer be in one of
4448 		 * the memranges
4449 		 */
4450 		if (gpa < vmr->vmr_gpa)
4451 			break;
4452 
4453 		if (gpa < vmr->vmr_gpa + vmr->vmr_size) {
4454 			if (vmr->vmr_type == VM_MEM_MMIO)
4455 				return (VMM_MEM_TYPE_MMIO);
4456 			return (VMM_MEM_TYPE_REGULAR);
4457 		}
4458 	}
4459 
4460 	DPRINTF("guest memtype @ 0x%llx unknown\n", (uint64_t)gpa);
4461 	return (VMM_MEM_TYPE_UNKNOWN);
4462 }
4463 
4464 /*
4465  * vmx_get_exit_qualification
4466  *
4467  * Return the current VMCS' exit qualification information
4468  */
4469 int
vmx_get_exit_qualification(uint64_t * exit_qualification)4470 vmx_get_exit_qualification(uint64_t *exit_qualification)
4471 {
4472 	if (vmread(VMCS_GUEST_EXIT_QUALIFICATION, exit_qualification)) {
4473 		printf("%s: can't extract exit qual\n", __func__);
4474 		return (EINVAL);
4475 	}
4476 
4477 	return (0);
4478 }
4479 
4480 /*
4481  * vmx_get_guest_faulttype
4482  *
4483  * Determines the type (R/W/X) of the last fault on the VCPU last run on
4484  * this PCPU.
4485  */
4486 int
vmx_get_guest_faulttype(void)4487 vmx_get_guest_faulttype(void)
4488 {
4489 	uint64_t exit_qual;
4490 	uint64_t presentmask = IA32_VMX_EPT_FAULT_WAS_READABLE |
4491 	    IA32_VMX_EPT_FAULT_WAS_WRITABLE | IA32_VMX_EPT_FAULT_WAS_EXECABLE;
4492 	vm_prot_t prot, was_prot;
4493 
4494 	if (vmx_get_exit_qualification(&exit_qual))
4495 		return (-1);
4496 
4497 	if ((exit_qual & presentmask) == 0)
4498 		return VM_FAULT_INVALID;
4499 
4500 	was_prot = 0;
4501 	if (exit_qual & IA32_VMX_EPT_FAULT_WAS_READABLE)
4502 		was_prot |= PROT_READ;
4503 	if (exit_qual & IA32_VMX_EPT_FAULT_WAS_WRITABLE)
4504 		was_prot |= PROT_WRITE;
4505 	if (exit_qual & IA32_VMX_EPT_FAULT_WAS_EXECABLE)
4506 		was_prot |= PROT_EXEC;
4507 
4508 	prot = 0;
4509 	if (exit_qual & IA32_VMX_EPT_FAULT_READ)
4510 		prot = PROT_READ;
4511 	else if (exit_qual & IA32_VMX_EPT_FAULT_WRITE)
4512 		prot = PROT_WRITE;
4513 	else if (exit_qual & IA32_VMX_EPT_FAULT_EXEC)
4514 		prot = PROT_EXEC;
4515 
4516 	if ((was_prot & prot) == 0)
4517 		return VM_FAULT_PROTECT;
4518 
4519 	return (-1);
4520 }
4521 
4522 /*
4523  * svm_get_guest_faulttype
4524  *
4525  * Determines the type (R/W/X) of the last fault on the VCPU last run on
4526  * this PCPU.
4527  */
4528 int
svm_get_guest_faulttype(struct vmcb * vmcb)4529 svm_get_guest_faulttype(struct vmcb *vmcb)
4530 {
4531 	if (!(vmcb->v_exitinfo1 & 0x1))
4532 		return VM_FAULT_INVALID;
4533 	return VM_FAULT_PROTECT;
4534 }
4535 
4536 /*
4537  * svm_fault_page
4538  *
4539  * Request a new page to be faulted into the UVM map of the VM owning 'vcpu'
4540  * at address 'gpa'.
4541  */
4542 int
svm_fault_page(struct vcpu * vcpu,paddr_t gpa)4543 svm_fault_page(struct vcpu *vcpu, paddr_t gpa)
4544 {
4545 	int ret;
4546 
4547 	ret = uvm_fault(vcpu->vc_parent->vm_map, gpa, VM_FAULT_WIRE,
4548 	    PROT_READ | PROT_WRITE | PROT_EXEC);
4549 	if (ret)
4550 		printf("%s: uvm_fault returns %d, GPA=0x%llx, rip=0x%llx\n",
4551 		    __func__, ret, (uint64_t)gpa, vcpu->vc_gueststate.vg_rip);
4552 
4553 	return (ret);
4554 }
4555 
4556 /*
4557  * svm_handle_np_fault
4558  *
4559  * High level nested paging handler for SVM. Verifies that a fault is for a
4560  * valid memory region, then faults a page, or aborts otherwise.
4561  */
4562 int
svm_handle_np_fault(struct vcpu * vcpu)4563 svm_handle_np_fault(struct vcpu *vcpu)
4564 {
4565 	uint64_t gpa;
4566 	int gpa_memtype, ret = 0;
4567 	struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
4568 	struct vm_exit_eptviolation *vee = &vcpu->vc_exit.vee;
4569 	struct cpu_info *ci = curcpu();
4570 
4571 	memset(vee, 0, sizeof(*vee));
4572 
4573 	gpa = vmcb->v_exitinfo2;
4574 
4575 	gpa_memtype = vmm_get_guest_memtype(vcpu->vc_parent, gpa);
4576 	switch (gpa_memtype) {
4577 	case VMM_MEM_TYPE_REGULAR:
4578 		vee->vee_fault_type = VEE_FAULT_HANDLED;
4579 		ret = svm_fault_page(vcpu, gpa);
4580 		break;
4581 	case VMM_MEM_TYPE_MMIO:
4582 		vee->vee_fault_type = VEE_FAULT_MMIO_ASSIST;
4583 		if (ci->ci_vmm_cap.vcc_svm.svm_decode_assist) {
4584 			vee->vee_insn_len = vmcb->v_n_bytes_fetched;
4585 			memcpy(&vee->vee_insn_bytes, vmcb->v_guest_ins_bytes,
4586 			    sizeof(vee->vee_insn_bytes));
4587 			vee->vee_insn_info |= VEE_BYTES_VALID;
4588 		}
4589 		ret = EAGAIN;
4590 		break;
4591 	default:
4592 		printf("%s: unknown memory type %d for GPA 0x%llx\n",
4593 		    __func__, gpa_memtype, gpa);
4594 		return (EINVAL);
4595 	}
4596 
4597 	return (ret);
4598 }
4599 
4600 /*
4601  * vmx_fault_page
4602  *
4603  * Request a new page to be faulted into the UVM map of the VM owning 'vcpu'
4604  * at address 'gpa'.
4605  *
4606  * Parameters:
4607  *  vcpu: guest VCPU requiring the page to be faulted into the UVM map
4608  *  gpa: guest physical address that triggered the fault
4609  *
4610  * Return Values:
4611  *  0: if successful
4612  *  EINVAL: if fault type could not be determined or VMCS reload fails
4613  *  EAGAIN: if a protection fault occurred, ie writing to a read-only page
4614  *  errno: if uvm_fault(9) fails to wire in the page
4615  */
4616 int
vmx_fault_page(struct vcpu * vcpu,paddr_t gpa)4617 vmx_fault_page(struct vcpu *vcpu, paddr_t gpa)
4618 {
4619 	int fault_type, ret;
4620 
4621 	fault_type = vmx_get_guest_faulttype();
4622 	switch (fault_type) {
4623 	case -1:
4624 		printf("%s: invalid fault type\n", __func__);
4625 		return (EINVAL);
4626 	case VM_FAULT_PROTECT:
4627 		vcpu->vc_exit.vee.vee_fault_type = VEE_FAULT_PROTECT;
4628 		return (EAGAIN);
4629 	default:
4630 		vcpu->vc_exit.vee.vee_fault_type = VEE_FAULT_HANDLED;
4631 		break;
4632 	}
4633 
4634 	/* We may sleep during uvm_fault(9), so reload VMCS. */
4635 	vcpu->vc_last_pcpu = curcpu();
4636 	ret = uvm_fault(vcpu->vc_parent->vm_map, gpa, VM_FAULT_WIRE,
4637 	    PROT_READ | PROT_WRITE | PROT_EXEC);
4638 	if (vcpu_reload_vmcs_vmx(vcpu)) {
4639 		printf("%s: failed to reload vmcs\n", __func__);
4640 		return (EINVAL);
4641 	}
4642 
4643 	if (ret)
4644 		printf("%s: uvm_fault returns %d, GPA=0x%llx, rip=0x%llx\n",
4645 		    __func__, ret, (uint64_t)gpa, vcpu->vc_gueststate.vg_rip);
4646 
4647 	return (ret);
4648 }
4649 
4650 /*
4651  * vmx_handle_np_fault
4652  *
4653  * High level nested paging handler for VMX. Verifies that a fault is for a
4654  * valid memory region, then faults a page, or aborts otherwise.
4655  */
4656 int
vmx_handle_np_fault(struct vcpu * vcpu)4657 vmx_handle_np_fault(struct vcpu *vcpu)
4658 {
4659 	uint64_t insn_len = 0, gpa;
4660 	int gpa_memtype, ret = 0;
4661 	struct vm_exit_eptviolation *vee = &vcpu->vc_exit.vee;
4662 
4663 	memset(vee, 0, sizeof(*vee));
4664 
4665 	if (vmread(VMCS_GUEST_PHYSICAL_ADDRESS, &gpa)) {
4666 		printf("%s: cannot extract faulting pa\n", __func__);
4667 		return (EINVAL);
4668 	}
4669 
4670 	gpa_memtype = vmm_get_guest_memtype(vcpu->vc_parent, gpa);
4671 	switch (gpa_memtype) {
4672 	case VMM_MEM_TYPE_REGULAR:
4673 		vee->vee_fault_type = VEE_FAULT_HANDLED;
4674 		ret = vmx_fault_page(vcpu, gpa);
4675 		break;
4676 	case VMM_MEM_TYPE_MMIO:
4677 		vee->vee_fault_type = VEE_FAULT_MMIO_ASSIST;
4678 		if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_len) ||
4679 		    insn_len == 0 || insn_len > 15) {
4680 			printf("%s: failed to extract instruction length\n",
4681 			    __func__);
4682 			ret = EINVAL;
4683 		} else {
4684 			vee->vee_insn_len = (uint32_t)insn_len;
4685 			vee->vee_insn_info |= VEE_LEN_VALID;
4686 			ret = EAGAIN;
4687 		}
4688 		break;
4689 	default:
4690 		printf("%s: unknown memory type %d for GPA 0x%llx\n",
4691 		    __func__, gpa_memtype, gpa);
4692 		return (EINVAL);
4693 	}
4694 
4695 	return (ret);
4696 }
4697 
4698 /*
4699  * vmm_get_guest_cpu_cpl
4700  *
4701  * Determines current CPL of 'vcpu'. On VMX/Intel, this is gathered from the
4702  * VMCS field for the DPL of SS (this seems odd, but is documented that way
4703  * in the SDM). For SVM/AMD, this is gathered directly from the VMCB's 'cpl'
4704  * field, as per the APM.
4705  *
4706  * Parameters:
4707  *  vcpu: guest VCPU for which CPL is to be checked
4708  *
4709  * Return Values:
4710  *  -1: the CPL could not be determined
4711  *  0-3 indicating the current CPL. For real mode operation, 0 is returned.
4712  */
4713 int
vmm_get_guest_cpu_cpl(struct vcpu * vcpu)4714 vmm_get_guest_cpu_cpl(struct vcpu *vcpu)
4715 {
4716 	int mode;
4717 	struct vmcb *vmcb;
4718 	uint64_t ss_ar;
4719 
4720 	mode = vmm_get_guest_cpu_mode(vcpu);
4721 
4722 	if (mode == VMM_CPU_MODE_UNKNOWN)
4723 		return (-1);
4724 
4725 	if (mode == VMM_CPU_MODE_REAL)
4726 		return (0);
4727 
4728 	if (vmm_softc->mode == VMM_MODE_RVI) {
4729 		vmcb = (struct vmcb *)vcpu->vc_control_va;
4730 		return (vmcb->v_cpl);
4731 	} else if (vmm_softc->mode == VMM_MODE_EPT) {
4732 		if (vmread(VMCS_GUEST_IA32_SS_AR, &ss_ar))
4733 			return (-1);
4734 		return ((ss_ar & 0x60) >> 5);
4735 	} else
4736 		return (-1);
4737 }
4738 
4739 /*
4740  * vmm_get_guest_cpu_mode
4741  *
4742  * Determines current CPU mode of 'vcpu'.
4743  *
4744  * Parameters:
4745  *  vcpu: guest VCPU for which mode is to be checked
4746  *
4747  * Return Values:
4748  *  One of VMM_CPU_MODE_*, or VMM_CPU_MODE_UNKNOWN if the mode could not be
4749  *   ascertained.
4750  */
4751 int
vmm_get_guest_cpu_mode(struct vcpu * vcpu)4752 vmm_get_guest_cpu_mode(struct vcpu *vcpu)
4753 {
4754 	uint64_t cr0, efer, cs_ar;
4755 	uint8_t l, dib;
4756 	struct vmcb *vmcb;
4757 	struct vmx_msr_store *msr_store;
4758 
4759 	if (vmm_softc->mode == VMM_MODE_RVI) {
4760 		vmcb = (struct vmcb *)vcpu->vc_control_va;
4761 		cr0 = vmcb->v_cr0;
4762 		efer = vmcb->v_efer;
4763 		cs_ar = vmcb->v_cs.vs_attr;
4764 		cs_ar = (cs_ar & 0xff) | ((cs_ar << 4) & 0xf000);
4765 	} else if (vmm_softc->mode == VMM_MODE_EPT) {
4766 		if (vmread(VMCS_GUEST_IA32_CR0, &cr0))
4767 			return (VMM_CPU_MODE_UNKNOWN);
4768 		if (vmread(VMCS_GUEST_IA32_CS_AR, &cs_ar))
4769 			return (VMM_CPU_MODE_UNKNOWN);
4770 		msr_store =
4771 		    (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
4772 		efer = msr_store[VCPU_REGS_EFER].vms_data;
4773 	} else
4774 		return (VMM_CPU_MODE_UNKNOWN);
4775 
4776 	l = (cs_ar & 0x2000) >> 13;
4777 	dib = (cs_ar & 0x4000) >> 14;
4778 
4779 	/* Check CR0.PE */
4780 	if (!(cr0 & CR0_PE))
4781 		return (VMM_CPU_MODE_REAL);
4782 
4783 	/* Check EFER */
4784 	if (efer & EFER_LMA) {
4785 		/* Could be compat or long mode, check CS.L */
4786 		if (l)
4787 			return (VMM_CPU_MODE_LONG);
4788 		else
4789 			return (VMM_CPU_MODE_COMPAT);
4790 	}
4791 
4792 	/* Check prot vs prot32 */
4793 	if (dib)
4794 		return (VMM_CPU_MODE_PROT32);
4795 	else
4796 		return (VMM_CPU_MODE_PROT);
4797 }
4798 
4799 /*
4800  * svm_handle_inout
4801  *
4802  * Exit handler for IN/OUT instructions.
4803  *
4804  * Parameters:
4805  *  vcpu: The VCPU where the IN/OUT instruction occurred
4806  *
4807  * Return values:
4808  *  0: if successful
4809  *  EINVAL: an invalid IN/OUT instruction was encountered
4810  */
4811 int
svm_handle_inout(struct vcpu * vcpu)4812 svm_handle_inout(struct vcpu *vcpu)
4813 {
4814 	uint64_t insn_length, exit_qual;
4815 	struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
4816 
4817 	insn_length = vmcb->v_exitinfo2 - vmcb->v_rip;
4818 	exit_qual = vmcb->v_exitinfo1;
4819 
4820 	/* Bit 0 - direction */
4821 	if (exit_qual & 0x1)
4822 		vcpu->vc_exit.vei.vei_dir = VEI_DIR_IN;
4823 	else
4824 		vcpu->vc_exit.vei.vei_dir = VEI_DIR_OUT;
4825 	/* Bit 2 - string instruction? */
4826 	vcpu->vc_exit.vei.vei_string = (exit_qual & 0x4) >> 2;
4827 	/* Bit 3 - REP prefix? */
4828 	vcpu->vc_exit.vei.vei_rep = (exit_qual & 0x8) >> 3;
4829 
4830 	/* Bits 4:6 - size of exit */
4831 	if (exit_qual & 0x10)
4832 		vcpu->vc_exit.vei.vei_size = 1;
4833 	else if (exit_qual & 0x20)
4834 		vcpu->vc_exit.vei.vei_size = 2;
4835 	else if (exit_qual & 0x40)
4836 		vcpu->vc_exit.vei.vei_size = 4;
4837 
4838 	/* Bit 16:31 - port */
4839 	vcpu->vc_exit.vei.vei_port = (exit_qual & 0xFFFF0000) >> 16;
4840 	/* Data */
4841 	vcpu->vc_exit.vei.vei_data = vmcb->v_rax;
4842 
4843 	vcpu->vc_exit.vei.vei_insn_len = (uint8_t)insn_length;
4844 
4845 	TRACEPOINT(vmm, inout, vcpu, vcpu->vc_exit.vei.vei_port,
4846 	    vcpu->vc_exit.vei.vei_dir, vcpu->vc_exit.vei.vei_data);
4847 
4848 	return (0);
4849 }
4850 
4851 /*
4852  * vmx_handle_inout
4853  *
4854  * Exit handler for IN/OUT instructions.
4855  *
4856  * Parameters:
4857  *  vcpu: The VCPU where the IN/OUT instruction occurred
4858  *
4859  * Return values:
4860  *  0: if successful
4861  *  EINVAL: invalid IN/OUT instruction or vmread failures occurred
4862  */
4863 int
vmx_handle_inout(struct vcpu * vcpu)4864 vmx_handle_inout(struct vcpu *vcpu)
4865 {
4866 	uint64_t insn_length, exit_qual;
4867 
4868 	if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) {
4869 		printf("%s: can't obtain instruction length\n", __func__);
4870 		return (EINVAL);
4871 	}
4872 
4873 	if (vmx_get_exit_qualification(&exit_qual)) {
4874 		printf("%s: can't get exit qual\n", __func__);
4875 		return (EINVAL);
4876 	}
4877 
4878 	/* Bits 0:2 - size of exit */
4879 	vcpu->vc_exit.vei.vei_size = (exit_qual & 0x7) + 1;
4880 	/* Bit 3 - direction */
4881 	if ((exit_qual & 0x8) >> 3)
4882 		vcpu->vc_exit.vei.vei_dir = VEI_DIR_IN;
4883 	else
4884 		vcpu->vc_exit.vei.vei_dir = VEI_DIR_OUT;
4885 	/* Bit 4 - string instruction? */
4886 	vcpu->vc_exit.vei.vei_string = (exit_qual & 0x10) >> 4;
4887 	/* Bit 5 - REP prefix? */
4888 	vcpu->vc_exit.vei.vei_rep = (exit_qual & 0x20) >> 5;
4889 	/* Bit 6 - Operand encoding */
4890 	vcpu->vc_exit.vei.vei_encoding = (exit_qual & 0x40) >> 6;
4891 	/* Bit 16:31 - port */
4892 	vcpu->vc_exit.vei.vei_port = (exit_qual & 0xFFFF0000) >> 16;
4893 	/* Data */
4894 	vcpu->vc_exit.vei.vei_data = (uint32_t)vcpu->vc_gueststate.vg_rax;
4895 
4896 	vcpu->vc_exit.vei.vei_insn_len = (uint8_t)insn_length;
4897 
4898 	TRACEPOINT(vmm, inout, vcpu, vcpu->vc_exit.vei.vei_port,
4899 	    vcpu->vc_exit.vei.vei_dir, vcpu->vc_exit.vei.vei_data);
4900 
4901 	return (0);
4902 }
4903 
4904 /*
4905  * vmx_load_pdptes
4906  *
4907  * Update the PDPTEs in the VMCS with the values currently indicated by the
4908  * guest CR3. This is used for 32-bit PAE guests when enabling paging.
4909  *
4910  * Parameters
4911  *  vcpu: The vcpu whose PDPTEs should be loaded
4912  *
4913  * Return values:
4914  *  0: if successful
4915  *  EINVAL: if the PDPTEs could not be loaded
4916  *  ENOMEM: memory allocation failure
4917  */
4918 int
vmx_load_pdptes(struct vcpu * vcpu)4919 vmx_load_pdptes(struct vcpu *vcpu)
4920 {
4921 	uint64_t cr3, cr3_host_phys;
4922 	vaddr_t cr3_host_virt;
4923 	pd_entry_t *pdptes;
4924 	int ret;
4925 
4926 	if (vmread(VMCS_GUEST_IA32_CR3, &cr3)) {
4927 		printf("%s: can't read guest cr3\n", __func__);
4928 		return (EINVAL);
4929 	}
4930 
4931 	if (!pmap_extract(vcpu->vc_parent->vm_map->pmap, (vaddr_t)cr3,
4932 	    (paddr_t *)&cr3_host_phys)) {
4933 		DPRINTF("%s: nonmapped guest CR3, setting PDPTEs to 0\n",
4934 		    __func__);
4935 		if (vmwrite(VMCS_GUEST_PDPTE0, 0)) {
4936 			printf("%s: can't write guest PDPTE0\n", __func__);
4937 			return (EINVAL);
4938 		}
4939 
4940 		if (vmwrite(VMCS_GUEST_PDPTE1, 0)) {
4941 			printf("%s: can't write guest PDPTE1\n", __func__);
4942 			return (EINVAL);
4943 		}
4944 
4945 		if (vmwrite(VMCS_GUEST_PDPTE2, 0)) {
4946 			printf("%s: can't write guest PDPTE2\n", __func__);
4947 			return (EINVAL);
4948 		}
4949 
4950 		if (vmwrite(VMCS_GUEST_PDPTE3, 0)) {
4951 			printf("%s: can't write guest PDPTE3\n", __func__);
4952 			return (EINVAL);
4953 		}
4954 		return (0);
4955 	}
4956 
4957 	ret = 0;
4958 
4959 	/* We may sleep during km_alloc(9), so reload VMCS. */
4960 	vcpu->vc_last_pcpu = curcpu();
4961 	cr3_host_virt = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any, &kp_none,
4962 	    &kd_waitok);
4963 	if (vcpu_reload_vmcs_vmx(vcpu)) {
4964 		printf("%s: failed to reload vmcs\n", __func__);
4965 		ret = EINVAL;
4966 		goto exit;
4967 	}
4968 
4969 	if (!cr3_host_virt) {
4970 		printf("%s: can't allocate address for guest CR3 mapping\n",
4971 		    __func__);
4972 		return (ENOMEM);
4973 	}
4974 
4975 	pmap_kenter_pa(cr3_host_virt, cr3_host_phys, PROT_READ);
4976 
4977 	pdptes = (pd_entry_t *)cr3_host_virt;
4978 	if (vmwrite(VMCS_GUEST_PDPTE0, pdptes[0])) {
4979 		printf("%s: can't write guest PDPTE0\n", __func__);
4980 		ret = EINVAL;
4981 		goto exit;
4982 	}
4983 
4984 	if (vmwrite(VMCS_GUEST_PDPTE1, pdptes[1])) {
4985 		printf("%s: can't write guest PDPTE1\n", __func__);
4986 		ret = EINVAL;
4987 		goto exit;
4988 	}
4989 
4990 	if (vmwrite(VMCS_GUEST_PDPTE2, pdptes[2])) {
4991 		printf("%s: can't write guest PDPTE2\n", __func__);
4992 		ret = EINVAL;
4993 		goto exit;
4994 	}
4995 
4996 	if (vmwrite(VMCS_GUEST_PDPTE3, pdptes[3])) {
4997 		printf("%s: can't write guest PDPTE3\n", __func__);
4998 		ret = EINVAL;
4999 		goto exit;
5000 	}
5001 
5002 exit:
5003 	pmap_kremove(cr3_host_virt, PAGE_SIZE);
5004 
5005 	/* km_free(9) might sleep, so we need to reload VMCS. */
5006 	vcpu->vc_last_pcpu = curcpu();
5007 	km_free((void *)cr3_host_virt, PAGE_SIZE, &kv_any, &kp_none);
5008 	if (vcpu_reload_vmcs_vmx(vcpu)) {
5009 		printf("%s: failed to reload vmcs after km_free\n", __func__);
5010 		ret = EINVAL;
5011 	}
5012 
5013 	return (ret);
5014 }
5015 
5016 /*
5017  * vmx_handle_cr0_write
5018  *
5019  * Write handler for CR0. This function ensures valid values are written into
5020  * CR0 for the cpu/vmm mode in use (cr0 must-be-0 and must-be-1 bits, etc).
5021  *
5022  * Parameters
5023  *  vcpu: The vcpu taking the cr0 write exit
5024  *     r: The guest's desired (incoming) cr0 value
5025  *
5026  * Return values:
5027  *  0: if successful
5028  *  EINVAL: if an error occurred
5029  */
5030 int
vmx_handle_cr0_write(struct vcpu * vcpu,uint64_t r)5031 vmx_handle_cr0_write(struct vcpu *vcpu, uint64_t r)
5032 {
5033 	struct vmx_msr_store *msr_store;
5034 	struct vmx_invvpid_descriptor vid;
5035 	uint64_t ectls, oldcr0, cr4, mask;
5036 	int ret;
5037 
5038 	/* Check must-be-0 bits */
5039 	mask = vcpu->vc_vmx_cr0_fixed1;
5040 	if (~r & mask) {
5041 		/* Inject #GP, let the guest handle it */
5042 		DPRINTF("%s: guest set invalid bits in %%cr0. Zeros "
5043 		    "mask=0x%llx, data=0x%llx\n", __func__,
5044 		    vcpu->vc_vmx_cr0_fixed1, r);
5045 		vmm_inject_gp(vcpu);
5046 		return (0);
5047 	}
5048 
5049 	/* Check must-be-1 bits */
5050 	mask = vcpu->vc_vmx_cr0_fixed0;
5051 	if ((r & mask) != mask) {
5052 		/* Inject #GP, let the guest handle it */
5053 		DPRINTF("%s: guest set invalid bits in %%cr0. Ones "
5054 		    "mask=0x%llx, data=0x%llx\n", __func__,
5055 		    vcpu->vc_vmx_cr0_fixed0, r);
5056 		vmm_inject_gp(vcpu);
5057 		return (0);
5058 	}
5059 
5060 	if (r & 0xFFFFFFFF00000000ULL) {
5061 		DPRINTF("%s: setting bits 63:32 of %%cr0 is invalid,"
5062 		    " inject #GP, cr0=0x%llx\n", __func__, r);
5063 		vmm_inject_gp(vcpu);
5064 		return (0);
5065 	}
5066 
5067 	if ((r & CR0_PG) && (r & CR0_PE) == 0) {
5068 		DPRINTF("%s: PG flag set when the PE flag is clear,"
5069 		    " inject #GP, cr0=0x%llx\n", __func__, r);
5070 		vmm_inject_gp(vcpu);
5071 		return (0);
5072 	}
5073 
5074 	if ((r & CR0_NW) && (r & CR0_CD) == 0) {
5075 		DPRINTF("%s: NW flag set when the CD flag is clear,"
5076 		    " inject #GP, cr0=0x%llx\n", __func__, r);
5077 		vmm_inject_gp(vcpu);
5078 		return (0);
5079 	}
5080 
5081 	if (vmread(VMCS_GUEST_IA32_CR0, &oldcr0)) {
5082 		printf("%s: can't read guest cr0\n", __func__);
5083 		return (EINVAL);
5084 	}
5085 
5086 	/* CR0 must always have NE set */
5087 	r |= CR0_NE;
5088 
5089 	if (vmwrite(VMCS_GUEST_IA32_CR0, r)) {
5090 		printf("%s: can't write guest cr0\n", __func__);
5091 		return (EINVAL);
5092 	}
5093 
5094 	/* If the guest hasn't enabled paging ... */
5095 	if (!(r & CR0_PG) && (oldcr0 & CR0_PG)) {
5096 		/* Paging was disabled (prev. enabled) - Flush TLB */
5097 		if (vcpu->vc_vmx_vpid_enabled) {
5098 			vid.vid_vpid = vcpu->vc_vpid;
5099 			vid.vid_addr = 0;
5100 			invvpid(IA32_VMX_INVVPID_SINGLE_CTX_GLB, &vid);
5101 		}
5102 	} else if (!(oldcr0 & CR0_PG) && (r & CR0_PG)) {
5103 		/*
5104 		 * Since the guest has enabled paging, then the IA32_VMX_IA32E_MODE_GUEST
5105 		 * control must be set to the same as EFER_LME.
5106 		 */
5107 		msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
5108 
5109 		if (vmread(VMCS_ENTRY_CTLS, &ectls)) {
5110 			printf("%s: can't read entry controls", __func__);
5111 			return (EINVAL);
5112 		}
5113 
5114 		if (msr_store[VCPU_REGS_EFER].vms_data & EFER_LME)
5115 			ectls |= IA32_VMX_IA32E_MODE_GUEST;
5116 		else
5117 			ectls &= ~IA32_VMX_IA32E_MODE_GUEST;
5118 
5119 		if (vmwrite(VMCS_ENTRY_CTLS, ectls)) {
5120 			printf("%s: can't write entry controls", __func__);
5121 			return (EINVAL);
5122 		}
5123 
5124 		if (vmread(VMCS_GUEST_IA32_CR4, &cr4)) {
5125 			printf("%s: can't read guest cr4\n", __func__);
5126 			return (EINVAL);
5127 		}
5128 
5129 		/* Load PDPTEs if PAE guest enabling paging */
5130 		if (cr4 & CR4_PAE) {
5131 			ret = vmx_load_pdptes(vcpu);
5132 
5133 			if (ret) {
5134 				printf("%s: updating PDPTEs failed\n", __func__);
5135 				return (ret);
5136 			}
5137 		}
5138 	}
5139 
5140 	return (0);
5141 }
5142 
5143 /*
5144  * vmx_handle_cr4_write
5145  *
5146  * Write handler for CR4. This function ensures valid values are written into
5147  * CR4 for the cpu/vmm mode in use (cr4 must-be-0 and must-be-1 bits, etc).
5148  *
5149  * Parameters
5150  *  vcpu: The vcpu taking the cr4 write exit
5151  *     r: The guest's desired (incoming) cr4 value
5152  *
5153  * Return values:
5154  *  0: if successful
5155  *  EINVAL: if an error occurred
5156  */
5157 int
vmx_handle_cr4_write(struct vcpu * vcpu,uint64_t r)5158 vmx_handle_cr4_write(struct vcpu *vcpu, uint64_t r)
5159 {
5160 	uint64_t mask;
5161 
5162 	/* Check must-be-0 bits */
5163 	mask = ~(curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1);
5164 	if (r & mask) {
5165 		/* Inject #GP, let the guest handle it */
5166 		DPRINTF("%s: guest set invalid bits in %%cr4. Zeros "
5167 		    "mask=0x%llx, data=0x%llx\n", __func__,
5168 		    curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1,
5169 		    r);
5170 		vmm_inject_gp(vcpu);
5171 		return (0);
5172 	}
5173 
5174 	/* Check must-be-1 bits */
5175 	mask = curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0;
5176 	if ((r & mask) != mask) {
5177 		/* Inject #GP, let the guest handle it */
5178 		DPRINTF("%s: guest set invalid bits in %%cr4. Ones "
5179 		    "mask=0x%llx, data=0x%llx\n", __func__,
5180 		    curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0,
5181 		    r);
5182 		vmm_inject_gp(vcpu);
5183 		return (0);
5184 	}
5185 
5186 	/* CR4_VMXE must always be enabled */
5187 	r |= CR4_VMXE;
5188 
5189 	if (vmwrite(VMCS_GUEST_IA32_CR4, r)) {
5190 		printf("%s: can't write guest cr4\n", __func__);
5191 		return (EINVAL);
5192 	}
5193 
5194 	return (0);
5195 }
5196 
5197 /*
5198  * vmx_handle_cr
5199  *
5200  * Handle reads/writes to control registers (except CR3)
5201  */
5202 int
vmx_handle_cr(struct vcpu * vcpu)5203 vmx_handle_cr(struct vcpu *vcpu)
5204 {
5205 	uint64_t insn_length, exit_qual, r;
5206 	uint8_t crnum, dir, reg;
5207 
5208 	if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) {
5209 		printf("%s: can't obtain instruction length\n", __func__);
5210 		return (EINVAL);
5211 	}
5212 
5213 	if (vmx_get_exit_qualification(&exit_qual)) {
5214 		printf("%s: can't get exit qual\n", __func__);
5215 		return (EINVAL);
5216 	}
5217 
5218 	/* Low 4 bits of exit_qual represent the CR number */
5219 	crnum = exit_qual & 0xf;
5220 
5221 	/*
5222 	 * Bits 5:4 indicate the direction of operation (or special CR-modifying
5223 	 * instruction)
5224 	 */
5225 	dir = (exit_qual & 0x30) >> 4;
5226 
5227 	/* Bits 11:8 encode the source/target register */
5228 	reg = (exit_qual & 0xf00) >> 8;
5229 
5230 	switch (dir) {
5231 	case CR_WRITE:
5232 		if (crnum == 0 || crnum == 4) {
5233 			switch (reg) {
5234 			case 0: r = vcpu->vc_gueststate.vg_rax; break;
5235 			case 1: r = vcpu->vc_gueststate.vg_rcx; break;
5236 			case 2: r = vcpu->vc_gueststate.vg_rdx; break;
5237 			case 3: r = vcpu->vc_gueststate.vg_rbx; break;
5238 			case 4: if (vmread(VMCS_GUEST_IA32_RSP, &r)) {
5239 					printf("%s: unable to read guest "
5240 					    "RSP\n", __func__);
5241 					return (EINVAL);
5242 				}
5243 				break;
5244 			case 5: r = vcpu->vc_gueststate.vg_rbp; break;
5245 			case 6: r = vcpu->vc_gueststate.vg_rsi; break;
5246 			case 7: r = vcpu->vc_gueststate.vg_rdi; break;
5247 			case 8: r = vcpu->vc_gueststate.vg_r8; break;
5248 			case 9: r = vcpu->vc_gueststate.vg_r9; break;
5249 			case 10: r = vcpu->vc_gueststate.vg_r10; break;
5250 			case 11: r = vcpu->vc_gueststate.vg_r11; break;
5251 			case 12: r = vcpu->vc_gueststate.vg_r12; break;
5252 			case 13: r = vcpu->vc_gueststate.vg_r13; break;
5253 			case 14: r = vcpu->vc_gueststate.vg_r14; break;
5254 			case 15: r = vcpu->vc_gueststate.vg_r15; break;
5255 			}
5256 			DPRINTF("%s: mov to cr%d @ %llx, data=0x%llx\n",
5257 			    __func__, crnum, vcpu->vc_gueststate.vg_rip, r);
5258 		}
5259 
5260 		if (crnum == 0)
5261 			vmx_handle_cr0_write(vcpu, r);
5262 
5263 		if (crnum == 4)
5264 			vmx_handle_cr4_write(vcpu, r);
5265 
5266 		break;
5267 	case CR_READ:
5268 		DPRINTF("%s: mov from cr%d @ %llx\n", __func__, crnum,
5269 		    vcpu->vc_gueststate.vg_rip);
5270 		break;
5271 	case CR_CLTS:
5272 		DPRINTF("%s: clts instruction @ %llx\n", __func__,
5273 		    vcpu->vc_gueststate.vg_rip);
5274 		break;
5275 	case CR_LMSW:
5276 		DPRINTF("%s: lmsw instruction @ %llx\n", __func__,
5277 		    vcpu->vc_gueststate.vg_rip);
5278 		break;
5279 	default:
5280 		DPRINTF("%s: unknown cr access @ %llx\n", __func__,
5281 		    vcpu->vc_gueststate.vg_rip);
5282 	}
5283 
5284 	vcpu->vc_gueststate.vg_rip += insn_length;
5285 
5286 	return (0);
5287 }
5288 
5289 /*
5290  * vmx_handle_rdmsr
5291  *
5292  * Handler for rdmsr instructions. Bitmap MSRs are allowed implicit access
5293  * and won't end up here. This handler is primarily intended to catch otherwise
5294  * unknown MSR access for possible later inclusion in the bitmap list. For
5295  * each MSR access that ends up here, we log the access (when VMM_DEBUG is
5296  * enabled)
5297  *
5298  * Parameters:
5299  *  vcpu: vcpu structure containing instruction info causing the exit
5300  *
5301  * Return value:
5302  *  0: The operation was successful
5303  *  EINVAL: An error occurred
5304  */
5305 int
vmx_handle_rdmsr(struct vcpu * vcpu)5306 vmx_handle_rdmsr(struct vcpu *vcpu)
5307 {
5308 	uint64_t insn_length;
5309 	uint64_t *rax, *rdx;
5310 	uint64_t *rcx;
5311 	int ret;
5312 
5313 	if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) {
5314 		printf("%s: can't obtain instruction length\n", __func__);
5315 		return (EINVAL);
5316 	}
5317 
5318 	if (insn_length != 2) {
5319 		DPRINTF("%s: RDMSR with instruction length %lld not "
5320 		    "supported\n", __func__, insn_length);
5321 		return (EINVAL);
5322 	}
5323 
5324 	rax = &vcpu->vc_gueststate.vg_rax;
5325 	rcx = &vcpu->vc_gueststate.vg_rcx;
5326 	rdx = &vcpu->vc_gueststate.vg_rdx;
5327 
5328 	switch (*rcx) {
5329 	case MSR_BIOS_SIGN:
5330 	case MSR_PLATFORM_ID:
5331 		/* Ignored */
5332 		*rax = 0;
5333 		*rdx = 0;
5334 		break;
5335 	case MSR_CR_PAT:
5336 		*rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL);
5337 		*rdx = (vcpu->vc_shadow_pat >> 32);
5338 		break;
5339 	default:
5340 		/* Unsupported MSRs causes #GP exception, don't advance %rip */
5341 		DPRINTF("%s: unsupported rdmsr (msr=0x%llx), injecting #GP\n",
5342 		    __func__, *rcx);
5343 		ret = vmm_inject_gp(vcpu);
5344 		return (ret);
5345 	}
5346 
5347 	vcpu->vc_gueststate.vg_rip += insn_length;
5348 
5349 	return (0);
5350 }
5351 
5352 /*
5353  * vmx_handle_xsetbv
5354  *
5355  * VMX-specific part of the xsetbv instruction exit handler
5356  *
5357  * Parameters:
5358  *  vcpu: vcpu structure containing instruction info causing the exit
5359  *
5360  * Return value:
5361  *  0: The operation was successful
5362  *  EINVAL: An error occurred
5363  */
5364 int
vmx_handle_xsetbv(struct vcpu * vcpu)5365 vmx_handle_xsetbv(struct vcpu *vcpu)
5366 {
5367 	uint64_t insn_length, *rax;
5368 	int ret;
5369 
5370 	if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) {
5371 		printf("%s: can't obtain instruction length\n", __func__);
5372 		return (EINVAL);
5373 	}
5374 
5375 	/* All XSETBV instructions are 3 bytes */
5376 	if (insn_length != 3) {
5377 		DPRINTF("%s: XSETBV with instruction length %lld not "
5378 		    "supported\n", __func__, insn_length);
5379 		return (EINVAL);
5380 	}
5381 
5382 	rax = &vcpu->vc_gueststate.vg_rax;
5383 
5384 	ret = vmm_handle_xsetbv(vcpu, rax);
5385 
5386 	vcpu->vc_gueststate.vg_rip += insn_length;
5387 
5388 	return ret;
5389 }
5390 
5391 /*
5392  * svm_handle_xsetbv
5393  *
5394  * SVM-specific part of the xsetbv instruction exit handler
5395  *
5396  * Parameters:
5397  *  vcpu: vcpu structure containing instruction info causing the exit
5398  *
5399  * Return value:
5400  *  0: The operation was successful
5401  *  EINVAL: An error occurred
5402  */
5403 int
svm_handle_xsetbv(struct vcpu * vcpu)5404 svm_handle_xsetbv(struct vcpu *vcpu)
5405 {
5406 	uint64_t insn_length, *rax;
5407 	int ret;
5408 	struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
5409 
5410 	/* All XSETBV instructions are 3 bytes */
5411 	insn_length = 3;
5412 
5413 	rax = &vmcb->v_rax;
5414 
5415 	ret = vmm_handle_xsetbv(vcpu, rax);
5416 
5417 	vcpu->vc_gueststate.vg_rip += insn_length;
5418 
5419 	return ret;
5420 }
5421 
5422 /*
5423  * vmm_handle_xsetbv
5424  *
5425  * Handler for xsetbv instructions. We allow the guest VM to set xcr0 values
5426  * limited to the xsave_mask in use in the host.
5427  *
5428  * Parameters:
5429  *  vcpu: vcpu structure containing instruction info causing the exit
5430  *  rax: pointer to guest %rax
5431  *
5432  * Return value:
5433  *  0: The operation was successful
5434  *  EINVAL: An error occurred
5435  */
5436 int
vmm_handle_xsetbv(struct vcpu * vcpu,uint64_t * rax)5437 vmm_handle_xsetbv(struct vcpu *vcpu, uint64_t *rax)
5438 {
5439 	uint64_t *rdx, *rcx, val;
5440 
5441 	rcx = &vcpu->vc_gueststate.vg_rcx;
5442 	rdx = &vcpu->vc_gueststate.vg_rdx;
5443 
5444 	if (vmm_get_guest_cpu_cpl(vcpu) != 0) {
5445 		DPRINTF("%s: guest cpl not zero\n", __func__);
5446 		return (vmm_inject_gp(vcpu));
5447 	}
5448 
5449 	if (*rcx != 0) {
5450 		DPRINTF("%s: guest specified invalid xcr register number "
5451 		    "%lld\n", __func__, *rcx);
5452 		return (vmm_inject_gp(vcpu));
5453 	}
5454 
5455 	val = *rax + (*rdx << 32);
5456 	if (val & ~xsave_mask) {
5457 		DPRINTF("%s: guest specified xcr0 outside xsave_mask %lld\n",
5458 		    __func__, val);
5459 		return (vmm_inject_gp(vcpu));
5460 	}
5461 
5462 	vcpu->vc_gueststate.vg_xcr0 = val;
5463 
5464 	return (0);
5465 }
5466 
5467 /*
5468  * vmx_handle_misc_enable_msr
5469  *
5470  * Handler for writes to the MSR_MISC_ENABLE (0x1a0) MSR on Intel CPUs. We
5471  * limit what the guest can write to this MSR (certain hardware-related
5472  * settings like speedstep, etc).
5473  *
5474  * Parameters:
5475  *  vcpu: vcpu structure containing information about the wrmsr causing this
5476  *   exit
5477  */
5478 void
vmx_handle_misc_enable_msr(struct vcpu * vcpu)5479 vmx_handle_misc_enable_msr(struct vcpu *vcpu)
5480 {
5481 	uint64_t *rax, *rdx;
5482 	struct vmx_msr_store *msr_store;
5483 
5484 	rax = &vcpu->vc_gueststate.vg_rax;
5485 	rdx = &vcpu->vc_gueststate.vg_rdx;
5486 	msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
5487 
5488 	/* Filter out guest writes to TCC, EIST, and xTPR */
5489 	*rax &= ~(MISC_ENABLE_TCC | MISC_ENABLE_EIST_ENABLED |
5490 	    MISC_ENABLE_xTPR_MESSAGE_DISABLE);
5491 
5492 	msr_store[VCPU_REGS_MISC_ENABLE].vms_data = *rax | (*rdx << 32);
5493 }
5494 
5495 /*
5496  * vmx_handle_wrmsr
5497  *
5498  * Handler for wrmsr instructions. This handler logs the access, and discards
5499  * the written data (when VMM_DEBUG is enabled). Any valid wrmsr will not end
5500  * up here (it will be whitelisted in the MSR bitmap).
5501  *
5502  * Parameters:
5503  *  vcpu: vcpu structure containing instruction info causing the exit
5504  *
5505  * Return value:
5506  *  0: The operation was successful
5507  *  EINVAL: An error occurred
5508  */
5509 int
vmx_handle_wrmsr(struct vcpu * vcpu)5510 vmx_handle_wrmsr(struct vcpu *vcpu)
5511 {
5512 	uint64_t insn_length, val;
5513 	uint64_t *rax, *rdx, *rcx;
5514 	int ret;
5515 
5516 	if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) {
5517 		printf("%s: can't obtain instruction length\n", __func__);
5518 		return (EINVAL);
5519 	}
5520 
5521 	if (insn_length != 2) {
5522 		DPRINTF("%s: WRMSR with instruction length %lld not "
5523 		    "supported\n", __func__, insn_length);
5524 		return (EINVAL);
5525 	}
5526 
5527 	rax = &vcpu->vc_gueststate.vg_rax;
5528 	rcx = &vcpu->vc_gueststate.vg_rcx;
5529 	rdx = &vcpu->vc_gueststate.vg_rdx;
5530 	val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL);
5531 
5532 	switch (*rcx) {
5533 	case MSR_CR_PAT:
5534 		if (!vmm_pat_is_valid(val)) {
5535 			ret = vmm_inject_gp(vcpu);
5536 			return (ret);
5537 		}
5538 		vcpu->vc_shadow_pat = val;
5539 		break;
5540 	case MSR_MISC_ENABLE:
5541 		vmx_handle_misc_enable_msr(vcpu);
5542 		break;
5543 	case MSR_SMM_MONITOR_CTL:
5544 		/*
5545 		 * 34.15.5 - Enabling dual monitor treatment
5546 		 *
5547 		 * Unsupported, so inject #GP and return without
5548 		 * advancing %rip.
5549 		 */
5550 		ret = vmm_inject_gp(vcpu);
5551 		return (ret);
5552 	case KVM_MSR_SYSTEM_TIME:
5553 		vmm_init_pvclock(vcpu,
5554 		    (*rax & 0xFFFFFFFFULL) | (*rdx  << 32));
5555 		break;
5556 #ifdef VMM_DEBUG
5557 	default:
5558 		/*
5559 		 * Log the access, to be able to identify unknown MSRs
5560 		 */
5561 		DPRINTF("%s: wrmsr exit, msr=0x%llx, discarding data "
5562 		    "written from guest=0x%llx:0x%llx\n", __func__,
5563 		    *rcx, *rdx, *rax);
5564 #endif /* VMM_DEBUG */
5565 	}
5566 
5567 	vcpu->vc_gueststate.vg_rip += insn_length;
5568 
5569 	return (0);
5570 }
5571 
5572 /*
5573  * svm_handle_msr
5574  *
5575  * Handler for MSR instructions.
5576  *
5577  * Parameters:
5578  *  vcpu: vcpu structure containing instruction info causing the exit
5579  *
5580  * Return value:
5581  *  Always 0 (successful)
5582  */
5583 int
svm_handle_msr(struct vcpu * vcpu)5584 svm_handle_msr(struct vcpu *vcpu)
5585 {
5586 	uint64_t insn_length, val;
5587 	uint64_t *rax, *rcx, *rdx;
5588 	struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
5589 	int ret;
5590 
5591 	/* XXX: Validate RDMSR / WRMSR insn_length */
5592 	insn_length = 2;
5593 
5594 	rax = &vmcb->v_rax;
5595 	rcx = &vcpu->vc_gueststate.vg_rcx;
5596 	rdx = &vcpu->vc_gueststate.vg_rdx;
5597 
5598 	if (vmcb->v_exitinfo1 == 1) {
5599 		/* WRMSR */
5600 		val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL);
5601 
5602 		switch (*rcx) {
5603 		case MSR_CR_PAT:
5604 			if (!vmm_pat_is_valid(val)) {
5605 				ret = vmm_inject_gp(vcpu);
5606 				return (ret);
5607 			}
5608 			vcpu->vc_shadow_pat = val;
5609 			break;
5610 		case MSR_EFER:
5611 			vmcb->v_efer = *rax | EFER_SVME;
5612 			break;
5613 		case KVM_MSR_SYSTEM_TIME:
5614 			vmm_init_pvclock(vcpu,
5615 			    (*rax & 0xFFFFFFFFULL) | (*rdx  << 32));
5616 			break;
5617 		default:
5618 			/* Log the access, to be able to identify unknown MSRs */
5619 			DPRINTF("%s: wrmsr exit, msr=0x%llx, discarding data "
5620 			    "written from guest=0x%llx:0x%llx\n", __func__,
5621 			    *rcx, *rdx, *rax);
5622 		}
5623 	} else {
5624 		/* RDMSR */
5625 		switch (*rcx) {
5626 		case MSR_BIOS_SIGN:
5627 		case MSR_INT_PEN_MSG:
5628 		case MSR_PLATFORM_ID:
5629 			/* Ignored */
5630 			*rax = 0;
5631 			*rdx = 0;
5632 			break;
5633 		case MSR_CR_PAT:
5634 			*rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL);
5635 			*rdx = (vcpu->vc_shadow_pat >> 32);
5636 			break;
5637 		case MSR_DE_CFG:
5638 			/* LFENCE serializing bit is set by host */
5639 			*rax = DE_CFG_SERIALIZE_LFENCE;
5640 			*rdx = 0;
5641 			break;
5642 		default:
5643 			/*
5644 			 * Unsupported MSRs causes #GP exception, don't advance
5645 			 * %rip
5646 			 */
5647 			DPRINTF("%s: unsupported rdmsr (msr=0x%llx), "
5648 			    "injecting #GP\n", __func__, *rcx);
5649 			ret = vmm_inject_gp(vcpu);
5650 			return (ret);
5651 		}
5652 	}
5653 
5654 	vcpu->vc_gueststate.vg_rip += insn_length;
5655 
5656 	return (0);
5657 }
5658 
5659 /* Handle cpuid(0xd) and its subleafs */
5660 static void
vmm_handle_cpuid_0xd(struct vcpu * vcpu,uint32_t subleaf,uint64_t * rax,uint32_t eax,uint32_t ebx,uint32_t ecx,uint32_t edx)5661 vmm_handle_cpuid_0xd(struct vcpu *vcpu, uint32_t subleaf, uint64_t *rax,
5662     uint32_t eax, uint32_t ebx, uint32_t ecx, uint32_t edx)
5663 {
5664 	if (subleaf == 0) {
5665 		/*
5666 		 * CPUID(0xd.0) depends on the value in XCR0 and MSR_XSS.  If
5667 		 * the guest XCR0 isn't the same as the host then set it, redo
5668 		 * the CPUID, and restore it.
5669 		 */
5670 		uint64_t xcr0 = vcpu->vc_gueststate.vg_xcr0;
5671 
5672 		/*
5673 		 * "ecx enumerates the size required ... for an area
5674 		 *  containing all the ... components supported by this
5675 		 *  processor"
5676 		 * "ebx enumerates the size required ... for an area
5677 		 *  containing all the ... components corresponding to bits
5678 		 *  currently set in xcr0"
5679 		 * So: since the VMM 'processor' is what our base kernel uses,
5680 		 * the VMM ecx is our ebx
5681 		 */
5682 		ecx = ebx;
5683 		if (xcr0 != (xsave_mask & XFEATURE_XCR0_MASK)) {
5684 			uint32_t dummy;
5685 			xsetbv(0, xcr0);
5686 			CPUID_LEAF(0xd, subleaf, eax, ebx, dummy, edx);
5687 			xsetbv(0, xsave_mask & XFEATURE_XCR0_MASK);
5688 		}
5689 		eax = xsave_mask & XFEATURE_XCR0_MASK;
5690 		edx = (xsave_mask & XFEATURE_XCR0_MASK) >> 32;
5691 	} else if (subleaf == 1) {
5692 		/* mask out XSAVEC, XSAVES, and XFD support */
5693 		eax &= XSAVE_XSAVEOPT | XSAVE_XGETBV1;
5694 		ebx = 0;	/* no xsavec or xsaves for now */
5695 		ecx = edx = 0;	/* no xsaves for now */
5696 	} else if (subleaf >= 63 ||
5697 	    ((1ULL << subleaf) & xsave_mask & XFEATURE_XCR0_MASK) == 0) {
5698 		/* disclaim subleaves of features we don't expose */
5699 		eax = ebx = ecx = edx = 0;
5700 	} else {
5701 		/* disclaim compressed alignment or xfd support */
5702 		ecx = 0;
5703 	}
5704 
5705 	*rax = eax;
5706 	vcpu->vc_gueststate.vg_rbx = ebx;
5707 	vcpu->vc_gueststate.vg_rcx = ecx;
5708 	vcpu->vc_gueststate.vg_rdx = edx;
5709 }
5710 
5711 /*
5712  * vmm_handle_cpuid
5713  *
5714  * Exit handler for CPUID instruction
5715  *
5716  * Parameters:
5717  *  vcpu: vcpu causing the CPUID exit
5718  *
5719  * Return value:
5720  *  0: the exit was processed successfully
5721  *  EINVAL: error occurred validating the CPUID instruction arguments
5722  */
5723 int
vmm_handle_cpuid(struct vcpu * vcpu)5724 vmm_handle_cpuid(struct vcpu *vcpu)
5725 {
5726 	uint64_t insn_length, cr4;
5727 	uint64_t *rax, *rbx, *rcx, *rdx;
5728 	struct vmcb *vmcb;
5729 	uint32_t leaf, subleaf, eax, ebx, ecx, edx;
5730 	struct vmx_msr_store *msr_store;
5731 	int vmm_cpuid_level;
5732 
5733 	/* what's the cpuid level we support/advertise? */
5734 	vmm_cpuid_level = cpuid_level;
5735 	if (vmm_cpuid_level < 0x15 && tsc_is_invariant)
5736 		vmm_cpuid_level = 0x15;
5737 
5738 	if (vmm_softc->mode == VMM_MODE_EPT) {
5739 		if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) {
5740 			DPRINTF("%s: can't obtain instruction length\n",
5741 			    __func__);
5742 			return (EINVAL);
5743 		}
5744 
5745 		if (vmread(VMCS_GUEST_IA32_CR4, &cr4)) {
5746 			DPRINTF("%s: can't obtain cr4\n", __func__);
5747 			return (EINVAL);
5748 		}
5749 
5750 		rax = &vcpu->vc_gueststate.vg_rax;
5751 
5752 		/*
5753 		 * "CPUID leaves above 02H and below 80000000H are only
5754 		 * visible when IA32_MISC_ENABLE MSR has bit 22 set to its
5755 		 * default value 0"
5756 		 */
5757 		msr_store =
5758 		    (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
5759 		if (msr_store[VCPU_REGS_MISC_ENABLE].vms_data &
5760 		    MISC_ENABLE_LIMIT_CPUID_MAXVAL)
5761 			vmm_cpuid_level = 0x02;
5762 	} else {
5763 		/* XXX: validate insn_length 2 */
5764 		insn_length = 2;
5765 		vmcb = (struct vmcb *)vcpu->vc_control_va;
5766 		rax = &vmcb->v_rax;
5767 		cr4 = vmcb->v_cr4;
5768 	}
5769 
5770 	rbx = &vcpu->vc_gueststate.vg_rbx;
5771 	rcx = &vcpu->vc_gueststate.vg_rcx;
5772 	rdx = &vcpu->vc_gueststate.vg_rdx;
5773 	vcpu->vc_gueststate.vg_rip += insn_length;
5774 
5775 	leaf = *rax;
5776 	subleaf = *rcx;
5777 
5778 	/*
5779 	 * "If a value entered for CPUID.EAX is higher than the maximum input
5780 	 *  value for basic or extended function for that processor then the
5781 	 *  data for the highest basic information leaf is returned."
5782 	 *
5783 	 * "When CPUID returns the highest basic leaf information as a result
5784 	 *  of an invalid input EAX value, any dependence on input ECX value
5785 	 *  in the basic leaf is honored."
5786 	 *
5787 	 * This means if leaf is between vmm_cpuid_level and 0x40000000 (the start
5788 	 * of the hypervisor info leaves), clamp to vmm_cpuid_level, but without
5789 	 * altering subleaf.  Also, if leaf is greater than the extended function
5790 	 * info, clamp also to vmm_cpuid_level.
5791 	 */
5792 	if ((leaf > vmm_cpuid_level && leaf < 0x40000000) ||
5793 	    (leaf > curcpu()->ci_pnfeatset)) {
5794 		DPRINTF("%s: invalid cpuid input leaf 0x%x, guest rip="
5795 		    "0x%llx - resetting to 0x%x\n", __func__, leaf,
5796 		    vcpu->vc_gueststate.vg_rip - insn_length,
5797 		    vmm_cpuid_level);
5798 		leaf = vmm_cpuid_level;
5799 	}
5800 
5801 	/* we fake up values in the range (cpuid_level, vmm_cpuid_level] */
5802 	if (leaf <= cpuid_level || leaf > 0x80000000)
5803 		CPUID_LEAF(leaf, subleaf, eax, ebx, ecx, edx);
5804 	else
5805 		eax = ebx = ecx = edx = 0;
5806 
5807 	switch (leaf) {
5808 	case 0x00:	/* Max level and vendor ID */
5809 		*rax = vmm_cpuid_level;
5810 		*rbx = *((uint32_t *)&cpu_vendor);
5811 		*rdx = *((uint32_t *)&cpu_vendor + 1);
5812 		*rcx = *((uint32_t *)&cpu_vendor + 2);
5813 		break;
5814 	case 0x01:	/* Version, brand, feature info */
5815 		*rax = cpu_id;
5816 		/* mask off host's APIC ID, reset to vcpu id */
5817 		*rbx = cpu_ebxfeature & 0x0000FFFF;
5818 		*rbx |= (vcpu->vc_id & 0xFF) << 24;
5819 		*rcx = (cpu_ecxfeature | CPUIDECX_HV) & VMM_CPUIDECX_MASK;
5820 
5821 		/* Guest CR4.OSXSAVE determines presence of CPUIDECX_OSXSAVE */
5822 		if (cr4 & CR4_OSXSAVE)
5823 			*rcx |= CPUIDECX_OSXSAVE;
5824 		else
5825 			*rcx &= ~CPUIDECX_OSXSAVE;
5826 
5827 		*rdx = curcpu()->ci_feature_flags & VMM_CPUIDEDX_MASK;
5828 		break;
5829 	case 0x02:	/* Cache and TLB information */
5830 		*rax = eax;
5831 		*rbx = ebx;
5832 		*rcx = ecx;
5833 		*rdx = edx;
5834 		break;
5835 	case 0x03:	/* Processor serial number (not supported) */
5836 		DPRINTF("%s: function 0x03 (processor serial number) not "
5837 		"supported\n", __func__);
5838 		*rax = 0;
5839 		*rbx = 0;
5840 		*rcx = 0;
5841 		*rdx = 0;
5842 		break;
5843 	case 0x04: 	/* Deterministic cache info */
5844 		*rax = eax & VMM_CPUID4_CACHE_TOPOLOGY_MASK;
5845 		*rbx = ebx;
5846 		*rcx = ecx;
5847 		*rdx = edx;
5848 		break;
5849 	case 0x05:	/* MONITOR/MWAIT (not supported) */
5850 		DPRINTF("%s: function 0x05 (monitor/mwait) not supported\n",
5851 		    __func__);
5852 		*rax = 0;
5853 		*rbx = 0;
5854 		*rcx = 0;
5855 		*rdx = 0;
5856 		break;
5857 	case 0x06:	/* Thermal / Power management (not supported) */
5858 		DPRINTF("%s: function 0x06 (thermal/power mgt) not supported\n",
5859 		    __func__);
5860 		*rax = 0;
5861 		*rbx = 0;
5862 		*rcx = 0;
5863 		*rdx = 0;
5864 		break;
5865 	case 0x07:	/* SEFF */
5866 		if (subleaf == 0) {
5867 			*rax = 0;	/* Highest subleaf supported */
5868 			*rbx = curcpu()->ci_feature_sefflags_ebx & VMM_SEFF0EBX_MASK;
5869 			*rcx = curcpu()->ci_feature_sefflags_ecx & VMM_SEFF0ECX_MASK;
5870 			*rdx = curcpu()->ci_feature_sefflags_edx & VMM_SEFF0EDX_MASK;
5871 			/*
5872 			 * Only expose PKU support if we've detected it in use
5873 			 * on the host.
5874 			 */
5875 			if (vmm_softc->sc_md.pkru_enabled)
5876 				*rcx |= SEFF0ECX_PKU;
5877 			else
5878 				*rcx &= ~SEFF0ECX_PKU;
5879 
5880 			/* Expose IBT bit if we've enabled CET on the host. */
5881 			if (rcr4() & CR4_CET)
5882 				*rdx |= SEFF0EDX_IBT;
5883 			else
5884 				*rdx &= ~SEFF0EDX_IBT;
5885 
5886 		} else {
5887 			/* Unsupported subleaf */
5888 			DPRINTF("%s: function 0x07 (SEFF) unsupported subleaf "
5889 			    "0x%x not supported\n", __func__, subleaf);
5890 			*rax = 0;
5891 			*rbx = 0;
5892 			*rcx = 0;
5893 			*rdx = 0;
5894 		}
5895 		break;
5896 	case 0x09:	/* Direct Cache Access (not supported) */
5897 		DPRINTF("%s: function 0x09 (direct cache access) not "
5898 		    "supported\n", __func__);
5899 		*rax = 0;
5900 		*rbx = 0;
5901 		*rcx = 0;
5902 		*rdx = 0;
5903 		break;
5904 	case 0x0a:	/* Architectural perf monitoring (not supported) */
5905 		DPRINTF("%s: function 0x0a (arch. perf mon) not supported\n",
5906 		    __func__);
5907 		*rax = 0;
5908 		*rbx = 0;
5909 		*rcx = 0;
5910 		*rdx = 0;
5911 		break;
5912 	case 0x0b:	/* Extended topology enumeration (not supported) */
5913 		DPRINTF("%s: function 0x0b (topology enumeration) not "
5914 		    "supported\n", __func__);
5915 		*rax = 0;
5916 		*rbx = 0;
5917 		*rcx = 0;
5918 		*rdx = 0;
5919 		break;
5920 	case 0x0d:	/* Processor ext. state information */
5921 		vmm_handle_cpuid_0xd(vcpu, subleaf, rax, eax, ebx, ecx, edx);
5922 		break;
5923 	case 0x0f:	/* QoS info (not supported) */
5924 		DPRINTF("%s: function 0x0f (QoS info) not supported\n",
5925 		    __func__);
5926 		*rax = 0;
5927 		*rbx = 0;
5928 		*rcx = 0;
5929 		*rdx = 0;
5930 		break;
5931 	case 0x14:	/* Processor Trace info (not supported) */
5932 		DPRINTF("%s: function 0x14 (processor trace info) not "
5933 		    "supported\n", __func__);
5934 		*rax = 0;
5935 		*rbx = 0;
5936 		*rcx = 0;
5937 		*rdx = 0;
5938 		break;
5939 	case 0x15:
5940 		if (cpuid_level >= 0x15) {
5941 			*rax = eax;
5942 			*rbx = ebx;
5943 			*rcx = ecx;
5944 			*rdx = edx;
5945 		} else {
5946 			KASSERT(tsc_is_invariant);
5947 			*rax = 1;
5948 			*rbx = 100;
5949 			*rcx = tsc_frequency / 100;
5950 			*rdx = 0;
5951 		}
5952 		break;
5953 	case 0x16:	/* Processor frequency info */
5954 		*rax = eax;
5955 		*rbx = ebx;
5956 		*rcx = ecx;
5957 		*rdx = edx;
5958 		break;
5959 	case 0x40000000:	/* Hypervisor information */
5960 		*rax = 0;
5961 		*rbx = *((uint32_t *)&vmm_hv_signature[0]);
5962 		*rcx = *((uint32_t *)&vmm_hv_signature[4]);
5963 		*rdx = *((uint32_t *)&vmm_hv_signature[8]);
5964 		break;
5965 	case 0x40000001:	/* KVM hypervisor features */
5966 		*rax = (1 << KVM_FEATURE_CLOCKSOURCE2) |
5967 		    (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
5968 		*rbx = 0;
5969 		*rcx = 0;
5970 		*rdx = 0;
5971 		break;
5972 	case 0x80000000:	/* Extended function level */
5973 		/* We don't emulate past 0x8000001f currently. */
5974 		*rax = min(curcpu()->ci_pnfeatset, 0x8000001f);
5975 		*rbx = 0;
5976 		*rcx = 0;
5977 		*rdx = 0;
5978 		break;
5979 	case 0x80000001: 	/* Extended function info */
5980 		*rax = curcpu()->ci_efeature_eax;
5981 		*rbx = 0;	/* Reserved */
5982 		*rcx = curcpu()->ci_efeature_ecx & VMM_ECPUIDECX_MASK;
5983 		*rdx = curcpu()->ci_feature_eflags & VMM_FEAT_EFLAGS_MASK;
5984 		break;
5985 	case 0x80000002:	/* Brand string */
5986 		*rax = curcpu()->ci_brand[0];
5987 		*rbx = curcpu()->ci_brand[1];
5988 		*rcx = curcpu()->ci_brand[2];
5989 		*rdx = curcpu()->ci_brand[3];
5990 		break;
5991 	case 0x80000003:	/* Brand string */
5992 		*rax = curcpu()->ci_brand[4];
5993 		*rbx = curcpu()->ci_brand[5];
5994 		*rcx = curcpu()->ci_brand[6];
5995 		*rdx = curcpu()->ci_brand[7];
5996 		break;
5997 	case 0x80000004:	/* Brand string */
5998 		*rax = curcpu()->ci_brand[8];
5999 		*rbx = curcpu()->ci_brand[9];
6000 		*rcx = curcpu()->ci_brand[10];
6001 		*rdx = curcpu()->ci_brand[11];
6002 		break;
6003 	case 0x80000005:	/* Reserved (Intel), cacheinfo (AMD) */
6004 		*rax = eax;
6005 		*rbx = ebx;
6006 		*rcx = ecx;
6007 		*rdx = edx;
6008 		break;
6009 	case 0x80000006:	/* ext. cache info */
6010 		*rax = eax;
6011 		*rbx = ebx;
6012 		*rcx = ecx;
6013 		*rdx = edx;
6014 		break;
6015 	case 0x80000007:	/* apmi */
6016 		*rax = eax;
6017 		*rbx = ebx;
6018 		*rcx = ecx;
6019 		*rdx = edx & VMM_APMI_EDX_INCLUDE_MASK;
6020 		break;
6021 	case 0x80000008:	/* Phys bits info and topology (AMD) */
6022 		*rax = eax;
6023 		*rbx = ebx & VMM_AMDSPEC_EBX_MASK;
6024 		/* Reset %rcx (topology) */
6025 		*rcx = 0;
6026 		*rdx = edx;
6027 		break;
6028 	case 0x8000001d:	/* cache topology (AMD) */
6029 		*rax = eax;
6030 		*rbx = ebx;
6031 		*rcx = ecx;
6032 		*rdx = edx;
6033 		break;
6034 	case 0x8000001f:	/* encryption features (AMD) */
6035 		*rax = eax;
6036 		*rbx = ebx;
6037 		*rcx = ecx;
6038 		*rdx = edx;
6039 		break;
6040 	default:
6041 		DPRINTF("%s: unsupported rax=0x%llx\n", __func__, *rax);
6042 		*rax = 0;
6043 		*rbx = 0;
6044 		*rcx = 0;
6045 		*rdx = 0;
6046 	}
6047 
6048 
6049 	if (vmm_softc->mode == VMM_MODE_RVI) {
6050 		/*
6051 		 * update %rax. the rest of the registers get updated in
6052 		 * svm_enter_guest
6053 	 	 */
6054 		vmcb->v_rax = *rax;
6055 	}
6056 
6057 	return (0);
6058 }
6059 
6060 /*
6061  * vcpu_run_svm
6062  *
6063  * SVM main loop used to run a VCPU.
6064  *
6065  * Parameters:
6066  *  vcpu: The VCPU to run
6067  *  vrp: run parameters
6068  *
6069  * Return values:
6070  *  0: The run loop exited and no help is needed from vmd
6071  *  EAGAIN: The run loop exited and help from vmd is needed
6072  *  EINVAL: an error occurred
6073  */
6074 int
vcpu_run_svm(struct vcpu * vcpu,struct vm_run_params * vrp)6075 vcpu_run_svm(struct vcpu *vcpu, struct vm_run_params *vrp)
6076 {
6077 	int ret = 0;
6078 	struct region_descriptor gdt;
6079 	struct cpu_info *ci = NULL;
6080 	uint64_t exit_reason;
6081 	struct schedstate_percpu *spc;
6082 	struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
6083 
6084 	if (vrp->vrp_intr_pending)
6085 		vcpu->vc_intr = 1;
6086 	else
6087 		vcpu->vc_intr = 0;
6088 
6089 	/*
6090 	 * If we are returning from userspace (vmd) because we exited
6091 	 * last time, fix up any needed vcpu state first. Which state
6092 	 * needs to be fixed up depends on what vmd populated in the
6093 	 * exit data structure.
6094 	 */
6095 	switch (vcpu->vc_gueststate.vg_exit_reason) {
6096 	case SVM_VMEXIT_IOIO:
6097 		if (vcpu->vc_exit.vei.vei_dir == VEI_DIR_IN) {
6098 			vcpu->vc_gueststate.vg_rax =
6099 			    vcpu->vc_exit.vei.vei_data;
6100 			vmcb->v_rax = vcpu->vc_gueststate.vg_rax;
6101 		}
6102 		vcpu->vc_gueststate.vg_rip =
6103 		    vcpu->vc_exit.vrs.vrs_gprs[VCPU_REGS_RIP];
6104 		vmcb->v_rip = vcpu->vc_gueststate.vg_rip;
6105 		break;
6106 	case SVM_VMEXIT_NPF:
6107 		ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_GPRS,
6108 		    &vcpu->vc_exit.vrs);
6109 		if (ret) {
6110 			printf("%s: vm %d vcpu %d failed to update "
6111 			    "registers\n", __func__,
6112 			    vcpu->vc_parent->vm_id, vcpu->vc_id);
6113 			return (EINVAL);
6114 		}
6115 		break;
6116 	}
6117 	memset(&vcpu->vc_exit, 0, sizeof(vcpu->vc_exit));
6118 
6119 	while (ret == 0) {
6120 		vmm_update_pvclock(vcpu);
6121 		if (ci != curcpu()) {
6122 			/*
6123 			 * We are launching for the first time, or we are
6124 			 * resuming from a different pcpu, so we need to
6125 			 * reset certain pcpu-specific values.
6126 			 */
6127 			ci = curcpu();
6128 			setregion(&gdt, ci->ci_gdt, GDT_SIZE - 1);
6129 
6130 			if (ci != vcpu->vc_last_pcpu) {
6131 				/*
6132 				 * Flush TLB by guest ASID if feature
6133 				 * available, flush entire TLB if not.
6134 				 */
6135 				if (ci->ci_vmm_cap.vcc_svm.svm_flush_by_asid)
6136 					vmcb->v_tlb_control =
6137 					    SVM_TLB_CONTROL_FLUSH_ASID;
6138 				else
6139 					vmcb->v_tlb_control =
6140 					    SVM_TLB_CONTROL_FLUSH_ALL;
6141 
6142 				svm_set_dirty(vcpu, SVM_CLEANBITS_ALL);
6143 			}
6144 
6145 			vcpu->vc_last_pcpu = ci;
6146 
6147 			if (gdt.rd_base == 0) {
6148 				ret = EINVAL;
6149 				break;
6150 			}
6151 		}
6152 
6153 		/* Handle vmd(8) injected interrupts */
6154 		/* Is there an interrupt pending injection? */
6155 		if (vcpu->vc_inject.vie_type == VCPU_INJECT_INTR &&
6156 		    vcpu->vc_irqready) {
6157 			vmcb->v_eventinj = vcpu->vc_inject.vie_vector |
6158 			    (1U << 31);
6159 			vcpu->vc_inject.vie_type = VCPU_INJECT_NONE;
6160 		}
6161 
6162 		/* Inject event if present */
6163 		if (vcpu->vc_inject.vie_type == VCPU_INJECT_EX) {
6164 			vmcb->v_eventinj = vcpu->vc_inject.vie_vector;
6165 
6166 			/* Set the "Event Valid" flag for certain vectors */
6167 			switch (vcpu->vc_inject.vie_vector) {
6168 			case VMM_EX_BP:
6169 			case VMM_EX_OF:
6170 			case VMM_EX_DB:
6171 				/*
6172 				 * Software exception.
6173 				 * XXX check nRIP support.
6174 				 */
6175 				vmcb->v_eventinj |= (4ULL << 8);
6176 				break;
6177 			case VMM_EX_AC:
6178 				vcpu->vc_inject.vie_errorcode = 0;
6179 				/* fallthrough */
6180 			case VMM_EX_DF:
6181 			case VMM_EX_TS:
6182 			case VMM_EX_NP:
6183 			case VMM_EX_SS:
6184 			case VMM_EX_GP:
6185 			case VMM_EX_PF:
6186 				/* Hardware exception. */
6187 				vmcb->v_eventinj |= (3ULL << 8);
6188 
6189 				if (vmcb->v_cr0 & CR0_PE) {
6190 					/* Error code valid. */
6191 					vmcb->v_eventinj |= (1ULL << 11);
6192 					vmcb->v_eventinj |= (uint64_t)
6193 					    vcpu->vc_inject.vie_errorcode << 32;
6194 				}
6195 				break;
6196 			default:
6197 				printf("%s: unsupported exception vector %u\n",
6198 				    __func__, vcpu->vc_inject.vie_vector);
6199 				ret = EINVAL;
6200 			} /* switch */
6201 			if (ret == EINVAL)
6202 				break;
6203 
6204 			/* Event is valid. */
6205 			vmcb->v_eventinj |= (1U << 31);
6206 			vcpu->vc_inject.vie_type = VCPU_INJECT_NONE;
6207 		}
6208 
6209 		TRACEPOINT(vmm, guest_enter, vcpu, vrp);
6210 
6211 		/* Start / resume the VCPU */
6212 		/* Disable interrupts and save the current host FPU state. */
6213 		clgi();
6214 		if ((ret = vmm_fpurestore(vcpu))) {
6215 			stgi();
6216 			break;
6217 		}
6218 
6219 		/*
6220 		 * If we're resuming to a different VCPU and have IBPB,
6221 		 * then use it to prevent cross-VM branch-target injection.
6222 		 */
6223 		if (ci->ci_guest_vcpu != vcpu &&
6224 		    (ci->ci_feature_amdspec_ebx & CPUIDEBX_IBPB)) {
6225 			wrmsr(MSR_PRED_CMD, PRED_CMD_IBPB);
6226 			ci->ci_guest_vcpu = vcpu;
6227 		}
6228 
6229 		/* Restore any guest PKRU state. */
6230 		if (vmm_softc->sc_md.pkru_enabled)
6231 			wrpkru(0, vcpu->vc_pkru);
6232 
6233 		KASSERT(vmcb->v_intercept1 & SVM_INTERCEPT_INTR);
6234 		wrmsr(MSR_AMD_VM_HSAVE_PA, vcpu->vc_svm_hsa_pa);
6235 
6236 		ret = svm_enter_guest(vcpu->vc_control_pa,
6237 		    &vcpu->vc_gueststate, &gdt);
6238 
6239 		/* Restore host PKRU state. */
6240 		if (vmm_softc->sc_md.pkru_enabled) {
6241 			vcpu->vc_pkru = rdpkru(0);
6242 			wrpkru(0, PGK_VALUE);
6243 		}
6244 
6245 		/*
6246 		 * On exit, interrupts are disabled, and we are running with
6247 		 * the guest FPU state still possibly on the CPU. Save the FPU
6248 		 * state before re-enabling interrupts.
6249 		 */
6250 		vmm_fpusave(vcpu);
6251 
6252 		/*
6253 		 * Enable interrupts now. Note that if the exit was due to INTR
6254 		 * (external interrupt), the interrupt will be processed now.
6255 		 */
6256 		stgi();
6257 
6258 		vcpu->vc_gueststate.vg_rip = vmcb->v_rip;
6259 		vmcb->v_tlb_control = SVM_TLB_CONTROL_FLUSH_NONE;
6260 		svm_set_clean(vcpu, SVM_CLEANBITS_ALL);
6261 
6262 		/* If we exited successfully ... */
6263 		if (ret == 0) {
6264 			exit_reason = vmcb->v_exitcode;
6265 			vcpu->vc_gueststate.vg_exit_reason = exit_reason;
6266 			TRACEPOINT(vmm, guest_exit, vcpu, vrp, exit_reason);
6267 
6268 			vcpu->vc_gueststate.vg_rflags = vmcb->v_rflags;
6269 
6270 			/*
6271 			 * Handle the exit. This will alter "ret" to EAGAIN if
6272 			 * the exit handler determines help from vmd is needed.
6273 			 */
6274 			ret = svm_handle_exit(vcpu);
6275 
6276 			if (vcpu->vc_gueststate.vg_rflags & PSL_I)
6277 				vcpu->vc_irqready = 1;
6278 			else
6279 				vcpu->vc_irqready = 0;
6280 
6281 			/*
6282 			 * If not ready for interrupts, but interrupts pending,
6283 			 * enable interrupt window exiting.
6284 			 */
6285 			if (vcpu->vc_irqready == 0 && vcpu->vc_intr) {
6286 				vmcb->v_intercept1 |= SVM_INTERCEPT_VINTR;
6287 				vmcb->v_irq = 1;
6288 				vmcb->v_intr_misc = SVM_INTR_MISC_V_IGN_TPR;
6289 				vmcb->v_intr_vector = 0;
6290 				svm_set_dirty(vcpu, SVM_CLEANBITS_TPR |
6291 				    SVM_CLEANBITS_I);
6292 			}
6293 
6294 			/*
6295 			 * Exit to vmd if we are terminating, failed to enter,
6296 			 * or need help (device I/O)
6297 			 */
6298 			if (ret || vcpu_must_stop(vcpu))
6299 				break;
6300 
6301 			if (vcpu->vc_intr && vcpu->vc_irqready) {
6302 				ret = EAGAIN;
6303 				break;
6304 			}
6305 
6306 			/* Check if we should yield - don't hog the cpu */
6307 			spc = &ci->ci_schedstate;
6308 			if (spc->spc_schedflags & SPCF_SHOULDYIELD)
6309 				break;
6310 		}
6311 	}
6312 
6313 	/*
6314 	 * We are heading back to userspace (vmd), either because we need help
6315 	 * handling an exit, a guest interrupt is pending, or we failed in some
6316 	 * way to enter the guest. Copy the guest registers to the exit struct
6317 	 * and return to vmd.
6318 	 */
6319 	if (vcpu_readregs_svm(vcpu, VM_RWREGS_ALL, &vcpu->vc_exit.vrs))
6320 		ret = EINVAL;
6321 
6322 	return (ret);
6323 }
6324 
6325 /*
6326  * vmm_alloc_vpid
6327  *
6328  * Sets the memory location pointed to by "vpid" to the next available VPID
6329  * or ASID.
6330  *
6331  * Parameters:
6332  *  vpid: Pointer to location to receive the next VPID/ASID
6333  *
6334  * Return Values:
6335  *  0: The operation completed successfully
6336  *  ENOMEM: No VPIDs/ASIDs were available. Content of 'vpid' is unchanged.
6337  */
6338 int
vmm_alloc_vpid(uint16_t * vpid)6339 vmm_alloc_vpid(uint16_t *vpid)
6340 {
6341 	uint16_t i;
6342 	uint8_t idx, bit;
6343 	struct vmm_softc *sc = vmm_softc;
6344 
6345 	rw_enter_write(&vmm_softc->vpid_lock);
6346 	for (i = 1; i <= sc->max_vpid; i++) {
6347 		idx = i / 8;
6348 		bit = i - (idx * 8);
6349 
6350 		if (!(sc->vpids[idx] & (1 << bit))) {
6351 			sc->vpids[idx] |= (1 << bit);
6352 			*vpid = i;
6353 			DPRINTF("%s: allocated VPID/ASID %d\n", __func__,
6354 			    i);
6355 			rw_exit_write(&vmm_softc->vpid_lock);
6356 			return 0;
6357 		}
6358 	}
6359 
6360 	printf("%s: no available %ss\n", __func__,
6361 	    (sc->mode == VMM_MODE_EPT) ? "VPID" :
6362 	    "ASID");
6363 
6364 	rw_exit_write(&vmm_softc->vpid_lock);
6365 	return ENOMEM;
6366 }
6367 
6368 /*
6369  * vmm_free_vpid
6370  *
6371  * Frees the VPID/ASID id supplied in "vpid".
6372  *
6373  * Parameters:
6374  *  vpid: VPID/ASID to free.
6375  */
6376 void
vmm_free_vpid(uint16_t vpid)6377 vmm_free_vpid(uint16_t vpid)
6378 {
6379 	uint8_t idx, bit;
6380 	struct vmm_softc *sc = vmm_softc;
6381 
6382 	rw_enter_write(&vmm_softc->vpid_lock);
6383 	idx = vpid / 8;
6384 	bit = vpid - (idx * 8);
6385 	sc->vpids[idx] &= ~(1 << bit);
6386 
6387 	DPRINTF("%s: freed VPID/ASID %d\n", __func__, vpid);
6388 	rw_exit_write(&vmm_softc->vpid_lock);
6389 }
6390 
6391 
6392 /* vmm_gpa_is_valid
6393  *
6394  * Check if the given gpa is within guest memory space.
6395  *
6396  * Parameters:
6397  * 	vcpu: The virtual cpu we are running on.
6398  * 	gpa: The address to check.
6399  * 	obj_size: The size of the object assigned to gpa
6400  *
6401  * Return values:
6402  * 	1: gpa is within the memory ranges allocated for the vcpu
6403  * 	0: otherwise
6404  */
6405 int
vmm_gpa_is_valid(struct vcpu * vcpu,paddr_t gpa,size_t obj_size)6406 vmm_gpa_is_valid(struct vcpu *vcpu, paddr_t gpa, size_t obj_size)
6407 {
6408 	struct vm *vm = vcpu->vc_parent;
6409 	struct vm_mem_range *vmr;
6410 	size_t i;
6411 
6412 	for (i = 0; i < vm->vm_nmemranges; ++i) {
6413 		vmr = &vm->vm_memranges[i];
6414 		if (vmr->vmr_size >= obj_size &&
6415 		    vmr->vmr_gpa <= gpa &&
6416 		    gpa < (vmr->vmr_gpa + vmr->vmr_size - obj_size)) {
6417 		    return 1;
6418 		}
6419 	}
6420 	return 0;
6421 }
6422 
6423 void
vmm_init_pvclock(struct vcpu * vcpu,paddr_t gpa)6424 vmm_init_pvclock(struct vcpu *vcpu, paddr_t gpa)
6425 {
6426 	paddr_t pvclock_gpa = gpa & 0xFFFFFFFFFFFFFFF0;
6427 	if (!vmm_gpa_is_valid(vcpu, pvclock_gpa,
6428 	        sizeof(struct pvclock_time_info))) {
6429 		/* XXX: Kill guest? */
6430 		vmm_inject_gp(vcpu);
6431 		return;
6432 	}
6433 
6434 	/* XXX: handle case when this struct goes over page boundaries */
6435 	if ((pvclock_gpa & PAGE_MASK) + sizeof(struct pvclock_time_info) >
6436 	    PAGE_SIZE) {
6437 		vmm_inject_gp(vcpu);
6438 		return;
6439 	}
6440 
6441 	vcpu->vc_pvclock_system_gpa = gpa;
6442 	if (tsc_frequency > 0)
6443 		vcpu->vc_pvclock_system_tsc_mul =
6444 		    (int) ((1000000000L << 20) / tsc_frequency);
6445 	else
6446 		vcpu->vc_pvclock_system_tsc_mul = 0;
6447 	vmm_update_pvclock(vcpu);
6448 }
6449 
6450 int
vmm_update_pvclock(struct vcpu * vcpu)6451 vmm_update_pvclock(struct vcpu *vcpu)
6452 {
6453 	struct pvclock_time_info *pvclock_ti;
6454 	struct timespec tv;
6455 	struct vm *vm = vcpu->vc_parent;
6456 	paddr_t pvclock_hpa, pvclock_gpa;
6457 
6458 	if (vcpu->vc_pvclock_system_gpa & PVCLOCK_SYSTEM_TIME_ENABLE) {
6459 		pvclock_gpa = vcpu->vc_pvclock_system_gpa & 0xFFFFFFFFFFFFFFF0;
6460 		if (!pmap_extract(vm->vm_map->pmap, pvclock_gpa, &pvclock_hpa))
6461 			return (EINVAL);
6462 		pvclock_ti = (void*) PMAP_DIRECT_MAP(pvclock_hpa);
6463 
6464 		/* START next cycle (must be odd) */
6465 		pvclock_ti->ti_version =
6466 		    (++vcpu->vc_pvclock_version << 1) | 0x1;
6467 
6468 		pvclock_ti->ti_tsc_timestamp = rdtsc();
6469 		nanotime(&tv);
6470 		pvclock_ti->ti_system_time =
6471 		    tv.tv_sec * 1000000000L + tv.tv_nsec;
6472 		pvclock_ti->ti_tsc_shift = 12;
6473 		pvclock_ti->ti_tsc_to_system_mul =
6474 		    vcpu->vc_pvclock_system_tsc_mul;
6475 		pvclock_ti->ti_flags = PVCLOCK_FLAG_TSC_STABLE;
6476 
6477 		/* END (must be even) */
6478 		pvclock_ti->ti_version &= ~0x1;
6479 	}
6480 	return (0);
6481 }
6482 
6483 int
vmm_pat_is_valid(uint64_t pat)6484 vmm_pat_is_valid(uint64_t pat)
6485 {
6486 	int i;
6487 	uint8_t *byte = (uint8_t *)&pat;
6488 
6489 	/* Intel SDM Vol 3A, 11.12.2: 0x02, 0x03, and 0x08-0xFF result in #GP */
6490 	for (i = 0; i < 8; i++) {
6491 		if (byte[i] == 0x02 || byte[i] == 0x03 || byte[i] > 0x07) {
6492 			DPRINTF("%s: invalid pat %llx\n", __func__, pat);
6493 			return 0;
6494 		}
6495 	}
6496 
6497 	return 1;
6498 }
6499 
6500 /*
6501  * vmx_exit_reason_decode
6502  *
6503  * Returns a human readable string describing exit type 'code'
6504  */
6505 const char *
vmx_exit_reason_decode(uint32_t code)6506 vmx_exit_reason_decode(uint32_t code)
6507 {
6508 	switch (code) {
6509 	case VMX_EXIT_NMI: return "NMI";
6510 	case VMX_EXIT_EXTINT: return "External interrupt";
6511 	case VMX_EXIT_TRIPLE_FAULT: return "Triple fault";
6512 	case VMX_EXIT_INIT: return "INIT signal";
6513 	case VMX_EXIT_SIPI: return "SIPI signal";
6514 	case VMX_EXIT_IO_SMI: return "I/O SMI";
6515 	case VMX_EXIT_OTHER_SMI: return "other SMI";
6516 	case VMX_EXIT_INT_WINDOW: return "Interrupt window";
6517 	case VMX_EXIT_NMI_WINDOW: return "NMI window";
6518 	case VMX_EXIT_TASK_SWITCH: return "Task switch";
6519 	case VMX_EXIT_CPUID: return "CPUID instruction";
6520 	case VMX_EXIT_GETSEC: return "GETSEC instruction";
6521 	case VMX_EXIT_HLT: return "HLT instruction";
6522 	case VMX_EXIT_INVD: return "INVD instruction";
6523 	case VMX_EXIT_INVLPG: return "INVLPG instruction";
6524 	case VMX_EXIT_RDPMC: return "RDPMC instruction";
6525 	case VMX_EXIT_RDTSC: return "RDTSC instruction";
6526 	case VMX_EXIT_RSM: return "RSM instruction";
6527 	case VMX_EXIT_VMCALL: return "VMCALL instruction";
6528 	case VMX_EXIT_VMCLEAR: return "VMCLEAR instruction";
6529 	case VMX_EXIT_VMLAUNCH: return "VMLAUNCH instruction";
6530 	case VMX_EXIT_VMPTRLD: return "VMPTRLD instruction";
6531 	case VMX_EXIT_VMPTRST: return "VMPTRST instruction";
6532 	case VMX_EXIT_VMREAD: return "VMREAD instruction";
6533 	case VMX_EXIT_VMRESUME: return "VMRESUME instruction";
6534 	case VMX_EXIT_VMWRITE: return "VMWRITE instruction";
6535 	case VMX_EXIT_VMXOFF: return "VMXOFF instruction";
6536 	case VMX_EXIT_VMXON: return "VMXON instruction";
6537 	case VMX_EXIT_CR_ACCESS: return "CR access";
6538 	case VMX_EXIT_MOV_DR: return "MOV DR instruction";
6539 	case VMX_EXIT_IO: return "I/O instruction";
6540 	case VMX_EXIT_RDMSR: return "RDMSR instruction";
6541 	case VMX_EXIT_WRMSR: return "WRMSR instruction";
6542 	case VMX_EXIT_ENTRY_FAILED_GUEST_STATE: return "guest state invalid";
6543 	case VMX_EXIT_ENTRY_FAILED_MSR_LOAD: return "MSR load failed";
6544 	case VMX_EXIT_MWAIT: return "MWAIT instruction";
6545 	case VMX_EXIT_MTF: return "monitor trap flag";
6546 	case VMX_EXIT_MONITOR: return "MONITOR instruction";
6547 	case VMX_EXIT_PAUSE: return "PAUSE instruction";
6548 	case VMX_EXIT_ENTRY_FAILED_MCE: return "MCE during entry";
6549 	case VMX_EXIT_TPR_BELOW_THRESHOLD: return "TPR below threshold";
6550 	case VMX_EXIT_APIC_ACCESS: return "APIC access";
6551 	case VMX_EXIT_VIRTUALIZED_EOI: return "virtualized EOI";
6552 	case VMX_EXIT_GDTR_IDTR: return "GDTR/IDTR access";
6553 	case VMX_EXIT_LDTR_TR: return "LDTR/TR access";
6554 	case VMX_EXIT_EPT_VIOLATION: return "EPT violation";
6555 	case VMX_EXIT_EPT_MISCONFIGURATION: return "EPT misconfiguration";
6556 	case VMX_EXIT_INVEPT: return "INVEPT instruction";
6557 	case VMX_EXIT_RDTSCP: return "RDTSCP instruction";
6558 	case VMX_EXIT_VMX_PREEMPTION_TIMER_EXPIRED:
6559 	    return "preemption timer expired";
6560 	case VMX_EXIT_INVVPID: return "INVVPID instruction";
6561 	case VMX_EXIT_WBINVD: return "WBINVD instruction";
6562 	case VMX_EXIT_XSETBV: return "XSETBV instruction";
6563 	case VMX_EXIT_APIC_WRITE: return "APIC write";
6564 	case VMX_EXIT_RDRAND: return "RDRAND instruction";
6565 	case VMX_EXIT_INVPCID: return "INVPCID instruction";
6566 	case VMX_EXIT_VMFUNC: return "VMFUNC instruction";
6567 	case VMX_EXIT_RDSEED: return "RDSEED instruction";
6568 	case VMX_EXIT_XSAVES: return "XSAVES instruction";
6569 	case VMX_EXIT_XRSTORS: return "XRSTORS instruction";
6570 	default: return "unknown";
6571 	}
6572 }
6573 
6574 /*
6575  * svm_exit_reason_decode
6576  *
6577  * Returns a human readable string describing exit type 'code'
6578  */
6579 const char *
svm_exit_reason_decode(uint32_t code)6580 svm_exit_reason_decode(uint32_t code)
6581 {
6582 	switch (code) {
6583 	case SVM_VMEXIT_CR0_READ: return "CR0 read";		/* 0x00 */
6584 	case SVM_VMEXIT_CR1_READ: return "CR1 read";		/* 0x01 */
6585 	case SVM_VMEXIT_CR2_READ: return "CR2 read";		/* 0x02 */
6586 	case SVM_VMEXIT_CR3_READ: return "CR3 read";		/* 0x03 */
6587 	case SVM_VMEXIT_CR4_READ: return "CR4 read";		/* 0x04 */
6588 	case SVM_VMEXIT_CR5_READ: return "CR5 read";		/* 0x05 */
6589 	case SVM_VMEXIT_CR6_READ: return "CR6 read";		/* 0x06 */
6590 	case SVM_VMEXIT_CR7_READ: return "CR7 read";		/* 0x07 */
6591 	case SVM_VMEXIT_CR8_READ: return "CR8 read";		/* 0x08 */
6592 	case SVM_VMEXIT_CR9_READ: return "CR9 read";		/* 0x09 */
6593 	case SVM_VMEXIT_CR10_READ: return "CR10 read";		/* 0x0A */
6594 	case SVM_VMEXIT_CR11_READ: return "CR11 read";		/* 0x0B */
6595 	case SVM_VMEXIT_CR12_READ: return "CR12 read";		/* 0x0C */
6596 	case SVM_VMEXIT_CR13_READ: return "CR13 read";		/* 0x0D */
6597 	case SVM_VMEXIT_CR14_READ: return "CR14 read";		/* 0x0E */
6598 	case SVM_VMEXIT_CR15_READ: return "CR15 read";		/* 0x0F */
6599 	case SVM_VMEXIT_CR0_WRITE: return "CR0 write";		/* 0x10 */
6600 	case SVM_VMEXIT_CR1_WRITE: return "CR1 write";		/* 0x11 */
6601 	case SVM_VMEXIT_CR2_WRITE: return "CR2 write";		/* 0x12 */
6602 	case SVM_VMEXIT_CR3_WRITE: return "CR3 write";		/* 0x13 */
6603 	case SVM_VMEXIT_CR4_WRITE: return "CR4 write";		/* 0x14 */
6604 	case SVM_VMEXIT_CR5_WRITE: return "CR5 write";		/* 0x15 */
6605 	case SVM_VMEXIT_CR6_WRITE: return "CR6 write";		/* 0x16 */
6606 	case SVM_VMEXIT_CR7_WRITE: return "CR7 write";		/* 0x17 */
6607 	case SVM_VMEXIT_CR8_WRITE: return "CR8 write";		/* 0x18 */
6608 	case SVM_VMEXIT_CR9_WRITE: return "CR9 write";		/* 0x19 */
6609 	case SVM_VMEXIT_CR10_WRITE: return "CR10 write";	/* 0x1A */
6610 	case SVM_VMEXIT_CR11_WRITE: return "CR11 write";	/* 0x1B */
6611 	case SVM_VMEXIT_CR12_WRITE: return "CR12 write";	/* 0x1C */
6612 	case SVM_VMEXIT_CR13_WRITE: return "CR13 write";	/* 0x1D */
6613 	case SVM_VMEXIT_CR14_WRITE: return "CR14 write";	/* 0x1E */
6614 	case SVM_VMEXIT_CR15_WRITE: return "CR15 write";	/* 0x1F */
6615 	case SVM_VMEXIT_DR0_READ: return "DR0 read";	 	/* 0x20 */
6616 	case SVM_VMEXIT_DR1_READ: return "DR1 read";		/* 0x21 */
6617 	case SVM_VMEXIT_DR2_READ: return "DR2 read";		/* 0x22 */
6618 	case SVM_VMEXIT_DR3_READ: return "DR3 read";		/* 0x23 */
6619 	case SVM_VMEXIT_DR4_READ: return "DR4 read";		/* 0x24 */
6620 	case SVM_VMEXIT_DR5_READ: return "DR5 read";		/* 0x25 */
6621 	case SVM_VMEXIT_DR6_READ: return "DR6 read";		/* 0x26 */
6622 	case SVM_VMEXIT_DR7_READ: return "DR7 read";		/* 0x27 */
6623 	case SVM_VMEXIT_DR8_READ: return "DR8 read";		/* 0x28 */
6624 	case SVM_VMEXIT_DR9_READ: return "DR9 read";		/* 0x29 */
6625 	case SVM_VMEXIT_DR10_READ: return "DR10 read";		/* 0x2A */
6626 	case SVM_VMEXIT_DR11_READ: return "DR11 read";		/* 0x2B */
6627 	case SVM_VMEXIT_DR12_READ: return "DR12 read";		/* 0x2C */
6628 	case SVM_VMEXIT_DR13_READ: return "DR13 read";		/* 0x2D */
6629 	case SVM_VMEXIT_DR14_READ: return "DR14 read";		/* 0x2E */
6630 	case SVM_VMEXIT_DR15_READ: return "DR15 read";		/* 0x2F */
6631 	case SVM_VMEXIT_DR0_WRITE: return "DR0 write";		/* 0x30 */
6632 	case SVM_VMEXIT_DR1_WRITE: return "DR1 write";		/* 0x31 */
6633 	case SVM_VMEXIT_DR2_WRITE: return "DR2 write";		/* 0x32 */
6634 	case SVM_VMEXIT_DR3_WRITE: return "DR3 write";		/* 0x33 */
6635 	case SVM_VMEXIT_DR4_WRITE: return "DR4 write";		/* 0x34 */
6636 	case SVM_VMEXIT_DR5_WRITE: return "DR5 write";		/* 0x35 */
6637 	case SVM_VMEXIT_DR6_WRITE: return "DR6 write";		/* 0x36 */
6638 	case SVM_VMEXIT_DR7_WRITE: return "DR7 write";		/* 0x37 */
6639 	case SVM_VMEXIT_DR8_WRITE: return "DR8 write";		/* 0x38 */
6640 	case SVM_VMEXIT_DR9_WRITE: return "DR9 write";		/* 0x39 */
6641 	case SVM_VMEXIT_DR10_WRITE: return "DR10 write";	/* 0x3A */
6642 	case SVM_VMEXIT_DR11_WRITE: return "DR11 write";	/* 0x3B */
6643 	case SVM_VMEXIT_DR12_WRITE: return "DR12 write";	/* 0x3C */
6644 	case SVM_VMEXIT_DR13_WRITE: return "DR13 write";	/* 0x3D */
6645 	case SVM_VMEXIT_DR14_WRITE: return "DR14 write";	/* 0x3E */
6646 	case SVM_VMEXIT_DR15_WRITE: return "DR15 write";	/* 0x3F */
6647 	case SVM_VMEXIT_EXCP0: return "Exception 0x00";		/* 0x40 */
6648 	case SVM_VMEXIT_EXCP1: return "Exception 0x01";		/* 0x41 */
6649 	case SVM_VMEXIT_EXCP2: return "Exception 0x02";		/* 0x42 */
6650 	case SVM_VMEXIT_EXCP3: return "Exception 0x03";		/* 0x43 */
6651 	case SVM_VMEXIT_EXCP4: return "Exception 0x04";		/* 0x44 */
6652 	case SVM_VMEXIT_EXCP5: return "Exception 0x05";		/* 0x45 */
6653 	case SVM_VMEXIT_EXCP6: return "Exception 0x06";		/* 0x46 */
6654 	case SVM_VMEXIT_EXCP7: return "Exception 0x07";		/* 0x47 */
6655 	case SVM_VMEXIT_EXCP8: return "Exception 0x08";		/* 0x48 */
6656 	case SVM_VMEXIT_EXCP9: return "Exception 0x09";		/* 0x49 */
6657 	case SVM_VMEXIT_EXCP10: return "Exception 0x0A";	/* 0x4A */
6658 	case SVM_VMEXIT_EXCP11: return "Exception 0x0B";	/* 0x4B */
6659 	case SVM_VMEXIT_EXCP12: return "Exception 0x0C";	/* 0x4C */
6660 	case SVM_VMEXIT_EXCP13: return "Exception 0x0D";	/* 0x4D */
6661 	case SVM_VMEXIT_EXCP14: return "Exception 0x0E";	/* 0x4E */
6662 	case SVM_VMEXIT_EXCP15: return "Exception 0x0F";	/* 0x4F */
6663 	case SVM_VMEXIT_EXCP16: return "Exception 0x10";	/* 0x50 */
6664 	case SVM_VMEXIT_EXCP17: return "Exception 0x11";	/* 0x51 */
6665 	case SVM_VMEXIT_EXCP18: return "Exception 0x12";	/* 0x52 */
6666 	case SVM_VMEXIT_EXCP19: return "Exception 0x13";	/* 0x53 */
6667 	case SVM_VMEXIT_EXCP20: return "Exception 0x14";	/* 0x54 */
6668 	case SVM_VMEXIT_EXCP21: return "Exception 0x15";	/* 0x55 */
6669 	case SVM_VMEXIT_EXCP22: return "Exception 0x16";	/* 0x56 */
6670 	case SVM_VMEXIT_EXCP23: return "Exception 0x17";	/* 0x57 */
6671 	case SVM_VMEXIT_EXCP24: return "Exception 0x18";	/* 0x58 */
6672 	case SVM_VMEXIT_EXCP25: return "Exception 0x19";	/* 0x59 */
6673 	case SVM_VMEXIT_EXCP26: return "Exception 0x1A";	/* 0x5A */
6674 	case SVM_VMEXIT_EXCP27: return "Exception 0x1B";	/* 0x5B */
6675 	case SVM_VMEXIT_EXCP28: return "Exception 0x1C";	/* 0x5C */
6676 	case SVM_VMEXIT_EXCP29: return "Exception 0x1D";	/* 0x5D */
6677 	case SVM_VMEXIT_EXCP30: return "Exception 0x1E";	/* 0x5E */
6678 	case SVM_VMEXIT_EXCP31: return "Exception 0x1F";	/* 0x5F */
6679 	case SVM_VMEXIT_INTR: return "External interrupt";	/* 0x60 */
6680 	case SVM_VMEXIT_NMI: return "NMI";			/* 0x61 */
6681 	case SVM_VMEXIT_SMI: return "SMI";			/* 0x62 */
6682 	case SVM_VMEXIT_INIT: return "INIT";			/* 0x63 */
6683 	case SVM_VMEXIT_VINTR: return "Interrupt window";	/* 0x64 */
6684 	case SVM_VMEXIT_CR0_SEL_WRITE: return "Sel CR0 write";	/* 0x65 */
6685 	case SVM_VMEXIT_IDTR_READ: return "IDTR read";		/* 0x66 */
6686 	case SVM_VMEXIT_GDTR_READ: return "GDTR read";		/* 0x67 */
6687 	case SVM_VMEXIT_LDTR_READ: return "LDTR read";		/* 0x68 */
6688 	case SVM_VMEXIT_TR_READ: return "TR read";		/* 0x69 */
6689 	case SVM_VMEXIT_IDTR_WRITE: return "IDTR write";	/* 0x6A */
6690 	case SVM_VMEXIT_GDTR_WRITE: return "GDTR write";	/* 0x6B */
6691 	case SVM_VMEXIT_LDTR_WRITE: return "LDTR write";	/* 0x6C */
6692 	case SVM_VMEXIT_TR_WRITE: return "TR write";		/* 0x6D */
6693 	case SVM_VMEXIT_RDTSC: return "RDTSC instruction";	/* 0x6E */
6694 	case SVM_VMEXIT_RDPMC: return "RDPMC instruction";	/* 0x6F */
6695 	case SVM_VMEXIT_PUSHF: return "PUSHF instruction";	/* 0x70 */
6696 	case SVM_VMEXIT_POPF: return "POPF instruction";	/* 0x71 */
6697 	case SVM_VMEXIT_CPUID: return "CPUID instruction";	/* 0x72 */
6698 	case SVM_VMEXIT_RSM: return "RSM instruction";		/* 0x73 */
6699 	case SVM_VMEXIT_IRET: return "IRET instruction";	/* 0x74 */
6700 	case SVM_VMEXIT_SWINT: return "SWINT instruction";	/* 0x75 */
6701 	case SVM_VMEXIT_INVD: return "INVD instruction";	/* 0x76 */
6702 	case SVM_VMEXIT_PAUSE: return "PAUSE instruction";	/* 0x77 */
6703 	case SVM_VMEXIT_HLT: return "HLT instruction";		/* 0x78 */
6704 	case SVM_VMEXIT_INVLPG: return "INVLPG instruction";	/* 0x79 */
6705 	case SVM_VMEXIT_INVLPGA: return "INVLPGA instruction";	/* 0x7A */
6706 	case SVM_VMEXIT_IOIO: return "I/O instruction";		/* 0x7B */
6707 	case SVM_VMEXIT_MSR: return "RDMSR/WRMSR instruction";	/* 0x7C */
6708 	case SVM_VMEXIT_TASK_SWITCH: return "Task switch";	/* 0x7D */
6709 	case SVM_VMEXIT_FERR_FREEZE: return "FERR_FREEZE";	/* 0x7E */
6710 	case SVM_VMEXIT_SHUTDOWN: return "Triple fault";	/* 0x7F */
6711 	case SVM_VMEXIT_VMRUN: return "VMRUN instruction";	/* 0x80 */
6712 	case SVM_VMEXIT_VMMCALL: return "VMMCALL instruction";	/* 0x81 */
6713 	case SVM_VMEXIT_VMLOAD: return "VMLOAD instruction";	/* 0x82 */
6714 	case SVM_VMEXIT_VMSAVE: return "VMSAVE instruction";	/* 0x83 */
6715 	case SVM_VMEXIT_STGI: return "STGI instruction";	/* 0x84 */
6716 	case SVM_VMEXIT_CLGI: return "CLGI instruction";	/* 0x85 */
6717 	case SVM_VMEXIT_SKINIT: return "SKINIT instruction";	/* 0x86 */
6718 	case SVM_VMEXIT_RDTSCP: return "RDTSCP instruction";	/* 0x87 */
6719 	case SVM_VMEXIT_ICEBP: return "ICEBP instruction";	/* 0x88 */
6720 	case SVM_VMEXIT_WBINVD: return "WBINVD instruction";	/* 0x89 */
6721 	case SVM_VMEXIT_MONITOR: return "MONITOR instruction";	/* 0x8A */
6722 	case SVM_VMEXIT_MWAIT: return "MWAIT instruction";	/* 0x8B */
6723 	case SVM_VMEXIT_MWAIT_CONDITIONAL: return "Cond MWAIT";	/* 0x8C */
6724 	case SVM_VMEXIT_NPF: return "NPT violation";		/* 0x400 */
6725 	default: return "unknown";
6726 	}
6727 }
6728 
6729 /*
6730  * vmx_instruction_error_decode
6731  *
6732  * Returns a human readable string describing the instruction error in 'code'
6733  */
6734 const char *
vmx_instruction_error_decode(uint32_t code)6735 vmx_instruction_error_decode(uint32_t code)
6736 {
6737 	switch (code) {
6738 	case 1: return "VMCALL: unsupported in VMX root";
6739 	case 2: return "VMCLEAR: invalid paddr";
6740 	case 3: return "VMCLEAR: VMXON pointer";
6741 	case 4: return "VMLAUNCH: non-clear VMCS";
6742 	case 5: return "VMRESUME: non-launched VMCS";
6743 	case 6: return "VMRESUME: executed after VMXOFF";
6744 	case 7: return "VM entry: invalid control field(s)";
6745 	case 8: return "VM entry: invalid host state field(s)";
6746 	case 9: return "VMPTRLD: invalid paddr";
6747 	case 10: return "VMPTRLD: VMXON pointer";
6748 	case 11: return "VMPTRLD: incorrect VMCS revid";
6749 	case 12: return "VMREAD/VMWRITE: unsupported VMCS field";
6750 	case 13: return "VMWRITE: RO VMCS field";
6751 	case 15: return "VMXON: unsupported in VMX root";
6752 	case 20: return "VMCALL: invalid VM exit control fields";
6753 	case 26: return "VM entry: blocked by MOV SS";
6754 	case 28: return "Invalid operand to INVEPT/INVVPID";
6755 	case 0x80000021: return "VM entry: invalid guest state";
6756 	case 0x80000022: return "VM entry: failure due to MSR loading";
6757 	case 0x80000029: return "VM entry: machine-check event";
6758 	default: return "unknown";
6759 	}
6760 }
6761 
6762 /*
6763  * vcpu_state_decode
6764  *
6765  * Returns a human readable string describing the vcpu state in 'state'.
6766  */
6767 const char *
vcpu_state_decode(u_int state)6768 vcpu_state_decode(u_int state)
6769 {
6770 	switch (state) {
6771 	case VCPU_STATE_STOPPED: return "stopped";
6772 	case VCPU_STATE_RUNNING: return "running";
6773 	case VCPU_STATE_REQTERM: return "requesting termination";
6774 	case VCPU_STATE_TERMINATED: return "terminated";
6775 	case VCPU_STATE_UNKNOWN: return "unknown";
6776 	default: return "invalid";
6777 	}
6778 }
6779 
6780 #ifdef VMM_DEBUG
6781 /*
6782  * dump_vcpu
6783  *
6784  * Dumps the VMX capabilities of vcpu 'vcpu'
6785  */
6786 void
dump_vcpu(struct vcpu * vcpu)6787 dump_vcpu(struct vcpu *vcpu)
6788 {
6789 	printf("vcpu @ %p\n", vcpu);
6790 	printf("    parent vm @ %p\n", vcpu->vc_parent);
6791 	printf("    mode: ");
6792 	if (vcpu->vc_virt_mode == VMM_MODE_EPT) {
6793 		printf("VMX\n");
6794 		printf("    pinbased ctls: 0x%llx\n",
6795 		    vcpu->vc_vmx_pinbased_ctls);
6796 		printf("    true pinbased ctls: 0x%llx\n",
6797 		    vcpu->vc_vmx_true_pinbased_ctls);
6798 		CTRL_DUMP(vcpu, PINBASED, EXTERNAL_INT_EXITING);
6799 		CTRL_DUMP(vcpu, PINBASED, NMI_EXITING);
6800 		CTRL_DUMP(vcpu, PINBASED, VIRTUAL_NMIS);
6801 		CTRL_DUMP(vcpu, PINBASED, ACTIVATE_VMX_PREEMPTION_TIMER);
6802 		CTRL_DUMP(vcpu, PINBASED, PROCESS_POSTED_INTERRUPTS);
6803 		printf("    procbased ctls: 0x%llx\n",
6804 		    vcpu->vc_vmx_procbased_ctls);
6805 		printf("    true procbased ctls: 0x%llx\n",
6806 		    vcpu->vc_vmx_true_procbased_ctls);
6807 		CTRL_DUMP(vcpu, PROCBASED, INTERRUPT_WINDOW_EXITING);
6808 		CTRL_DUMP(vcpu, PROCBASED, USE_TSC_OFFSETTING);
6809 		CTRL_DUMP(vcpu, PROCBASED, HLT_EXITING);
6810 		CTRL_DUMP(vcpu, PROCBASED, INVLPG_EXITING);
6811 		CTRL_DUMP(vcpu, PROCBASED, MWAIT_EXITING);
6812 		CTRL_DUMP(vcpu, PROCBASED, RDPMC_EXITING);
6813 		CTRL_DUMP(vcpu, PROCBASED, RDTSC_EXITING);
6814 		CTRL_DUMP(vcpu, PROCBASED, CR3_LOAD_EXITING);
6815 		CTRL_DUMP(vcpu, PROCBASED, CR3_STORE_EXITING);
6816 		CTRL_DUMP(vcpu, PROCBASED, CR8_LOAD_EXITING);
6817 		CTRL_DUMP(vcpu, PROCBASED, CR8_STORE_EXITING);
6818 		CTRL_DUMP(vcpu, PROCBASED, USE_TPR_SHADOW);
6819 		CTRL_DUMP(vcpu, PROCBASED, NMI_WINDOW_EXITING);
6820 		CTRL_DUMP(vcpu, PROCBASED, MOV_DR_EXITING);
6821 		CTRL_DUMP(vcpu, PROCBASED, UNCONDITIONAL_IO_EXITING);
6822 		CTRL_DUMP(vcpu, PROCBASED, USE_IO_BITMAPS);
6823 		CTRL_DUMP(vcpu, PROCBASED, MONITOR_TRAP_FLAG);
6824 		CTRL_DUMP(vcpu, PROCBASED, USE_MSR_BITMAPS);
6825 		CTRL_DUMP(vcpu, PROCBASED, MONITOR_EXITING);
6826 		CTRL_DUMP(vcpu, PROCBASED, PAUSE_EXITING);
6827 		if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS,
6828 		    IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1)) {
6829 			printf("    procbased2 ctls: 0x%llx\n",
6830 			    vcpu->vc_vmx_procbased2_ctls);
6831 			CTRL_DUMP(vcpu, PROCBASED2, VIRTUALIZE_APIC);
6832 			CTRL_DUMP(vcpu, PROCBASED2, ENABLE_EPT);
6833 			CTRL_DUMP(vcpu, PROCBASED2, DESCRIPTOR_TABLE_EXITING);
6834 			CTRL_DUMP(vcpu, PROCBASED2, ENABLE_RDTSCP);
6835 			CTRL_DUMP(vcpu, PROCBASED2, VIRTUALIZE_X2APIC_MODE);
6836 			CTRL_DUMP(vcpu, PROCBASED2, ENABLE_VPID);
6837 			CTRL_DUMP(vcpu, PROCBASED2, WBINVD_EXITING);
6838 			CTRL_DUMP(vcpu, PROCBASED2, UNRESTRICTED_GUEST);
6839 			CTRL_DUMP(vcpu, PROCBASED2,
6840 			    APIC_REGISTER_VIRTUALIZATION);
6841 			CTRL_DUMP(vcpu, PROCBASED2,
6842 			    VIRTUAL_INTERRUPT_DELIVERY);
6843 			CTRL_DUMP(vcpu, PROCBASED2, PAUSE_LOOP_EXITING);
6844 			CTRL_DUMP(vcpu, PROCBASED2, RDRAND_EXITING);
6845 			CTRL_DUMP(vcpu, PROCBASED2, ENABLE_INVPCID);
6846 			CTRL_DUMP(vcpu, PROCBASED2, ENABLE_VM_FUNCTIONS);
6847 			CTRL_DUMP(vcpu, PROCBASED2, VMCS_SHADOWING);
6848 			CTRL_DUMP(vcpu, PROCBASED2, ENABLE_ENCLS_EXITING);
6849 			CTRL_DUMP(vcpu, PROCBASED2, RDSEED_EXITING);
6850 			CTRL_DUMP(vcpu, PROCBASED2, ENABLE_PML);
6851 			CTRL_DUMP(vcpu, PROCBASED2, EPT_VIOLATION_VE);
6852 			CTRL_DUMP(vcpu, PROCBASED2, CONCEAL_VMX_FROM_PT);
6853 			CTRL_DUMP(vcpu, PROCBASED2, ENABLE_XSAVES_XRSTORS);
6854 			CTRL_DUMP(vcpu, PROCBASED2, ENABLE_TSC_SCALING);
6855 		}
6856 		printf("    entry ctls: 0x%llx\n",
6857 		    vcpu->vc_vmx_entry_ctls);
6858 		printf("    true entry ctls: 0x%llx\n",
6859 		    vcpu->vc_vmx_true_entry_ctls);
6860 		CTRL_DUMP(vcpu, ENTRY, LOAD_DEBUG_CONTROLS);
6861 		CTRL_DUMP(vcpu, ENTRY, IA32E_MODE_GUEST);
6862 		CTRL_DUMP(vcpu, ENTRY, ENTRY_TO_SMM);
6863 		CTRL_DUMP(vcpu, ENTRY, DEACTIVATE_DUAL_MONITOR_TREATMENT);
6864 		CTRL_DUMP(vcpu, ENTRY, LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY);
6865 		CTRL_DUMP(vcpu, ENTRY, LOAD_IA32_PAT_ON_ENTRY);
6866 		CTRL_DUMP(vcpu, ENTRY, LOAD_IA32_EFER_ON_ENTRY);
6867 		CTRL_DUMP(vcpu, ENTRY, LOAD_IA32_BNDCFGS_ON_ENTRY);
6868 		CTRL_DUMP(vcpu, ENTRY, CONCEAL_VM_ENTRIES_FROM_PT);
6869 		printf("    exit ctls: 0x%llx\n",
6870 		    vcpu->vc_vmx_exit_ctls);
6871 		printf("    true exit ctls: 0x%llx\n",
6872 		    vcpu->vc_vmx_true_exit_ctls);
6873 		CTRL_DUMP(vcpu, EXIT, SAVE_DEBUG_CONTROLS);
6874 		CTRL_DUMP(vcpu, EXIT, HOST_SPACE_ADDRESS_SIZE);
6875 		CTRL_DUMP(vcpu, EXIT, LOAD_IA32_PERF_GLOBAL_CTRL_ON_EXIT);
6876 		CTRL_DUMP(vcpu, EXIT, ACKNOWLEDGE_INTERRUPT_ON_EXIT);
6877 		CTRL_DUMP(vcpu, EXIT, SAVE_IA32_PAT_ON_EXIT);
6878 		CTRL_DUMP(vcpu, EXIT, LOAD_IA32_PAT_ON_EXIT);
6879 		CTRL_DUMP(vcpu, EXIT, SAVE_IA32_EFER_ON_EXIT);
6880 		CTRL_DUMP(vcpu, EXIT, LOAD_IA32_EFER_ON_EXIT);
6881 		CTRL_DUMP(vcpu, EXIT, SAVE_VMX_PREEMPTION_TIMER);
6882 		CTRL_DUMP(vcpu, EXIT, CLEAR_IA32_BNDCFGS_ON_EXIT);
6883 		CTRL_DUMP(vcpu, EXIT, CONCEAL_VM_EXITS_FROM_PT);
6884 	}
6885 }
6886 
6887 /*
6888  * vmx_dump_vmcs_field
6889  *
6890  * Debug function to dump the contents of a single VMCS field
6891  *
6892  * Parameters:
6893  *  fieldid: VMCS Field ID
6894  *  msg: string to display
6895  */
6896 void
vmx_dump_vmcs_field(uint16_t fieldid,const char * msg)6897 vmx_dump_vmcs_field(uint16_t fieldid, const char *msg)
6898 {
6899 	uint8_t width;
6900 	uint64_t val;
6901 
6902 
6903 	DPRINTF("%s (0x%04x): ", msg, fieldid);
6904 	if (vmread(fieldid, &val))
6905 		DPRINTF("???? ");
6906 	else {
6907 		/*
6908 		 * Field width encoding : bits 13:14
6909 		 *
6910 		 * 0: 16-bit
6911 		 * 1: 64-bit
6912 		 * 2: 32-bit
6913 		 * 3: natural width
6914 		 */
6915 		width = (fieldid >> 13) & 0x3;
6916 		switch (width) {
6917 			case 0: DPRINTF("0x%04llx ", val); break;
6918 			case 1:
6919 			case 3: DPRINTF("0x%016llx ", val); break;
6920 			case 2: DPRINTF("0x%08llx ", val);
6921 		}
6922 	}
6923 }
6924 
6925 /*
6926  * vmx_dump_vmcs
6927  *
6928  * Debug function to dump the contents of the current VMCS.
6929  */
6930 void
vmx_dump_vmcs(struct vcpu * vcpu)6931 vmx_dump_vmcs(struct vcpu *vcpu)
6932 {
6933 	int has_sec, i;
6934 	uint32_t cr3_tgt_ct;
6935 
6936 	/* XXX save and load new vmcs, restore at end */
6937 
6938 	DPRINTF("--CURRENT VMCS STATE--\n");
6939 	printf("VMCS launched: %s\n",
6940 	    (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED) ? "Yes" : "No");
6941 	DPRINTF("VMXON revision : 0x%x\n",
6942 	    curcpu()->ci_vmm_cap.vcc_vmx.vmx_vmxon_revision);
6943 	DPRINTF("CR0 fixed0: 0x%llx\n",
6944 	    curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0);
6945 	DPRINTF("CR0 fixed1: 0x%llx\n",
6946 	    curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1);
6947 	DPRINTF("CR4 fixed0: 0x%llx\n",
6948 	    curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0);
6949 	DPRINTF("CR4 fixed1: 0x%llx\n",
6950 	    curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1);
6951 	DPRINTF("MSR table size: 0x%x\n",
6952 	    512 * (curcpu()->ci_vmm_cap.vcc_vmx.vmx_msr_table_size + 1));
6953 
6954 	has_sec = vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS,
6955 	    IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1);
6956 
6957 	if (has_sec) {
6958 		if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
6959 		    IA32_VMX_ENABLE_VPID, 1)) {
6960 			vmx_dump_vmcs_field(VMCS_GUEST_VPID, "VPID");
6961 		}
6962 	}
6963 
6964 	if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PINBASED_CTLS,
6965 	    IA32_VMX_PROCESS_POSTED_INTERRUPTS, 1)) {
6966 		vmx_dump_vmcs_field(VMCS_POSTED_INT_NOTIF_VECTOR,
6967 		    "Posted Int Notif Vec");
6968 	}
6969 
6970 	if (has_sec) {
6971 		if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
6972 		    IA32_VMX_EPT_VIOLATION_VE, 1)) {
6973 			vmx_dump_vmcs_field(VMCS_EPTP_INDEX, "EPTP idx");
6974 		}
6975 	}
6976 
6977 	DPRINTF("\n");
6978 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_ES_SEL, "G.ES");
6979 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_CS_SEL, "G.CS");
6980 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_SS_SEL, "G.SS");
6981 	DPRINTF("\n");
6982 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_DS_SEL, "G.DS");
6983 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_FS_SEL, "G.FS");
6984 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_GS_SEL, "G.GS");
6985 	DPRINTF("\n");
6986 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_LDTR_SEL, "LDTR");
6987 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_TR_SEL, "G.TR");
6988 
6989 	if (has_sec) {
6990 		if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
6991 		    IA32_VMX_VIRTUAL_INTERRUPT_DELIVERY, 1)) {
6992 			vmx_dump_vmcs_field(VMCS_GUEST_INTERRUPT_STATUS,
6993 			    "Int sts");
6994 		}
6995 
6996 		if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
6997 		    IA32_VMX_ENABLE_PML, 1)) {
6998 			vmx_dump_vmcs_field(VMCS_GUEST_PML_INDEX, "PML Idx");
6999 		}
7000 	}
7001 
7002 	DPRINTF("\n");
7003 	vmx_dump_vmcs_field(VMCS_HOST_IA32_ES_SEL, "H.ES");
7004 	vmx_dump_vmcs_field(VMCS_HOST_IA32_CS_SEL, "H.CS");
7005 	vmx_dump_vmcs_field(VMCS_HOST_IA32_SS_SEL, "H.SS");
7006 	DPRINTF("\n");
7007 	vmx_dump_vmcs_field(VMCS_HOST_IA32_DS_SEL, "H.DS");
7008 	vmx_dump_vmcs_field(VMCS_HOST_IA32_FS_SEL, "H.FS");
7009 	vmx_dump_vmcs_field(VMCS_HOST_IA32_GS_SEL, "H.GS");
7010 	DPRINTF("\n");
7011 
7012 	vmx_dump_vmcs_field(VMCS_IO_BITMAP_A, "I/O Bitmap A");
7013 	DPRINTF("\n");
7014 	vmx_dump_vmcs_field(VMCS_IO_BITMAP_B, "I/O Bitmap B");
7015 	DPRINTF("\n");
7016 
7017 	if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS,
7018 	    IA32_VMX_USE_MSR_BITMAPS, 1)) {
7019 		vmx_dump_vmcs_field(VMCS_MSR_BITMAP_ADDRESS, "MSR Bitmap");
7020 		DPRINTF("\n");
7021 	}
7022 
7023 	vmx_dump_vmcs_field(VMCS_EXIT_STORE_MSR_ADDRESS, "Exit Store MSRs");
7024 	DPRINTF("\n");
7025 	vmx_dump_vmcs_field(VMCS_EXIT_LOAD_MSR_ADDRESS, "Exit Load MSRs");
7026 	DPRINTF("\n");
7027 	vmx_dump_vmcs_field(VMCS_ENTRY_LOAD_MSR_ADDRESS, "Entry Load MSRs");
7028 	DPRINTF("\n");
7029 	vmx_dump_vmcs_field(VMCS_EXECUTIVE_VMCS_POINTER, "Exec VMCS Ptr");
7030 	DPRINTF("\n");
7031 
7032 	if (has_sec) {
7033 		if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7034 		    IA32_VMX_ENABLE_PML, 1)) {
7035 			vmx_dump_vmcs_field(VMCS_PML_ADDRESS, "PML Addr");
7036 			DPRINTF("\n");
7037 		}
7038 	}
7039 
7040 	vmx_dump_vmcs_field(VMCS_TSC_OFFSET, "TSC Offset");
7041 	DPRINTF("\n");
7042 
7043 	if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS,
7044 	    IA32_VMX_USE_TPR_SHADOW, 1)) {
7045 		vmx_dump_vmcs_field(VMCS_VIRTUAL_APIC_ADDRESS,
7046 		    "Virtual APIC Addr");
7047 		DPRINTF("\n");
7048 	}
7049 
7050 	if (has_sec) {
7051 		if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7052 		    IA32_VMX_VIRTUALIZE_APIC, 1)) {
7053 			vmx_dump_vmcs_field(VMCS_APIC_ACCESS_ADDRESS,
7054 			    "APIC Access Addr");
7055 			DPRINTF("\n");
7056 		}
7057 	}
7058 
7059 	if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PINBASED_CTLS,
7060 	    IA32_VMX_PROCESS_POSTED_INTERRUPTS, 1)) {
7061 		vmx_dump_vmcs_field(VMCS_POSTED_INTERRUPT_DESC,
7062 		    "Posted Int Desc Addr");
7063 		DPRINTF("\n");
7064 	}
7065 
7066 	if (has_sec) {
7067 		if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7068 		    IA32_VMX_ENABLE_VM_FUNCTIONS, 1)) {
7069 			vmx_dump_vmcs_field(VMCS_VM_FUNCTION_CONTROLS,
7070 			    "VM Function Controls");
7071 			DPRINTF("\n");
7072 		}
7073 
7074 		if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7075 		    IA32_VMX_ENABLE_EPT, 1)) {
7076 			vmx_dump_vmcs_field(VMCS_GUEST_IA32_EPTP,
7077 			    "EPT Pointer");
7078 			DPRINTF("\n");
7079 		}
7080 
7081 		if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7082 		    IA32_VMX_VIRTUAL_INTERRUPT_DELIVERY, 1)) {
7083 			vmx_dump_vmcs_field(VMCS_EOI_EXIT_BITMAP_0,
7084 			    "EOI Exit Bitmap 0");
7085 			DPRINTF("\n");
7086 			vmx_dump_vmcs_field(VMCS_EOI_EXIT_BITMAP_1,
7087 			    "EOI Exit Bitmap 1");
7088 			DPRINTF("\n");
7089 			vmx_dump_vmcs_field(VMCS_EOI_EXIT_BITMAP_2,
7090 			    "EOI Exit Bitmap 2");
7091 			DPRINTF("\n");
7092 			vmx_dump_vmcs_field(VMCS_EOI_EXIT_BITMAP_3,
7093 			    "EOI Exit Bitmap 3");
7094 			DPRINTF("\n");
7095 		}
7096 
7097 		if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7098 		    IA32_VMX_VMCS_SHADOWING, 1)) {
7099 			vmx_dump_vmcs_field(VMCS_VMREAD_BITMAP_ADDRESS,
7100 			    "VMREAD Bitmap Addr");
7101 			DPRINTF("\n");
7102 			vmx_dump_vmcs_field(VMCS_VMWRITE_BITMAP_ADDRESS,
7103 			    "VMWRITE Bitmap Addr");
7104 			DPRINTF("\n");
7105 		}
7106 
7107 		if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7108 		    IA32_VMX_EPT_VIOLATION_VE, 1)) {
7109 			vmx_dump_vmcs_field(VMCS_VIRTUALIZATION_EXC_ADDRESS,
7110 			    "#VE Addr");
7111 			DPRINTF("\n");
7112 		}
7113 
7114 		if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7115 		    IA32_VMX_ENABLE_XSAVES_XRSTORS, 1)) {
7116 			vmx_dump_vmcs_field(VMCS_XSS_EXITING_BITMAP,
7117 			    "XSS exiting bitmap addr");
7118 			DPRINTF("\n");
7119 		}
7120 
7121 		if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7122 		    IA32_VMX_ENABLE_ENCLS_EXITING, 1)) {
7123 			vmx_dump_vmcs_field(VMCS_ENCLS_EXITING_BITMAP,
7124 			    "Encls exiting bitmap addr");
7125 			DPRINTF("\n");
7126 		}
7127 
7128 		if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7129 		    IA32_VMX_ENABLE_TSC_SCALING, 1)) {
7130 			vmx_dump_vmcs_field(VMCS_TSC_MULTIPLIER,
7131 			    "TSC scaling factor");
7132 			DPRINTF("\n");
7133 		}
7134 
7135 		if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7136 		    IA32_VMX_ENABLE_EPT, 1)) {
7137 			vmx_dump_vmcs_field(VMCS_GUEST_PHYSICAL_ADDRESS,
7138 			    "Guest PA");
7139 			DPRINTF("\n");
7140 		}
7141 	}
7142 
7143 	vmx_dump_vmcs_field(VMCS_LINK_POINTER, "VMCS Link Pointer");
7144 	DPRINTF("\n");
7145 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_DEBUGCTL, "Guest DEBUGCTL");
7146 	DPRINTF("\n");
7147 
7148 	if (vcpu_vmx_check_cap(vcpu, IA32_VMX_ENTRY_CTLS,
7149 	    IA32_VMX_LOAD_IA32_PAT_ON_ENTRY, 1) ||
7150 	    vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS,
7151 	    IA32_VMX_SAVE_IA32_PAT_ON_EXIT, 1)) {
7152 		vmx_dump_vmcs_field(VMCS_GUEST_IA32_PAT,
7153 		    "Guest PAT");
7154 		DPRINTF("\n");
7155 	}
7156 
7157 	if (vcpu_vmx_check_cap(vcpu, IA32_VMX_ENTRY_CTLS,
7158 	    IA32_VMX_LOAD_IA32_EFER_ON_ENTRY, 1) ||
7159 	    vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS,
7160 	    IA32_VMX_SAVE_IA32_EFER_ON_EXIT, 1)) {
7161 		vmx_dump_vmcs_field(VMCS_GUEST_IA32_EFER,
7162 		    "Guest EFER");
7163 		DPRINTF("\n");
7164 	}
7165 
7166 	if (vcpu_vmx_check_cap(vcpu, IA32_VMX_ENTRY_CTLS,
7167 	    IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY, 1)) {
7168 		vmx_dump_vmcs_field(VMCS_GUEST_IA32_PERF_GBL_CTRL,
7169 		    "Guest Perf Global Ctrl");
7170 		DPRINTF("\n");
7171 	}
7172 
7173 	if (has_sec) {
7174 		if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7175 		    IA32_VMX_ENABLE_EPT, 1)) {
7176 			vmx_dump_vmcs_field(VMCS_GUEST_PDPTE0, "Guest PDPTE0");
7177 			DPRINTF("\n");
7178 			vmx_dump_vmcs_field(VMCS_GUEST_PDPTE1, "Guest PDPTE1");
7179 			DPRINTF("\n");
7180 			vmx_dump_vmcs_field(VMCS_GUEST_PDPTE2, "Guest PDPTE2");
7181 			DPRINTF("\n");
7182 			vmx_dump_vmcs_field(VMCS_GUEST_PDPTE3, "Guest PDPTE3");
7183 			DPRINTF("\n");
7184 		}
7185 	}
7186 
7187 	if (vcpu_vmx_check_cap(vcpu, IA32_VMX_ENTRY_CTLS,
7188 	    IA32_VMX_LOAD_IA32_BNDCFGS_ON_ENTRY, 1) ||
7189 	    vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS,
7190 	    IA32_VMX_CLEAR_IA32_BNDCFGS_ON_EXIT, 1)) {
7191 		vmx_dump_vmcs_field(VMCS_GUEST_IA32_BNDCFGS,
7192 		    "Guest BNDCFGS");
7193 		DPRINTF("\n");
7194 	}
7195 
7196 	if (vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS,
7197 	    IA32_VMX_LOAD_IA32_PAT_ON_EXIT, 1)) {
7198 		vmx_dump_vmcs_field(VMCS_HOST_IA32_PAT,
7199 		    "Host PAT");
7200 		DPRINTF("\n");
7201 	}
7202 
7203 	if (vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS,
7204 	    IA32_VMX_LOAD_IA32_EFER_ON_EXIT, 1)) {
7205 		vmx_dump_vmcs_field(VMCS_HOST_IA32_EFER,
7206 		    "Host EFER");
7207 		DPRINTF("\n");
7208 	}
7209 
7210 	if (vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS,
7211 	    IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_EXIT, 1)) {
7212 		vmx_dump_vmcs_field(VMCS_HOST_IA32_PERF_GBL_CTRL,
7213 		    "Host Perf Global Ctrl");
7214 		DPRINTF("\n");
7215 	}
7216 
7217 	vmx_dump_vmcs_field(VMCS_PINBASED_CTLS, "Pinbased Ctrls");
7218 	vmx_dump_vmcs_field(VMCS_PROCBASED_CTLS, "Procbased Ctrls");
7219 	DPRINTF("\n");
7220 	vmx_dump_vmcs_field(VMCS_EXCEPTION_BITMAP, "Exception Bitmap");
7221 	vmx_dump_vmcs_field(VMCS_PF_ERROR_CODE_MASK, "#PF Err Code Mask");
7222 	DPRINTF("\n");
7223 	vmx_dump_vmcs_field(VMCS_PF_ERROR_CODE_MATCH, "#PF Err Code Match");
7224 	vmx_dump_vmcs_field(VMCS_CR3_TARGET_COUNT, "CR3 Tgt Count");
7225 	DPRINTF("\n");
7226 	vmx_dump_vmcs_field(VMCS_EXIT_CTLS, "Exit Ctrls");
7227 	vmx_dump_vmcs_field(VMCS_EXIT_MSR_STORE_COUNT, "Exit MSR Store Ct");
7228 	DPRINTF("\n");
7229 	vmx_dump_vmcs_field(VMCS_EXIT_MSR_LOAD_COUNT, "Exit MSR Load Ct");
7230 	vmx_dump_vmcs_field(VMCS_ENTRY_CTLS, "Entry Ctrls");
7231 	DPRINTF("\n");
7232 	vmx_dump_vmcs_field(VMCS_ENTRY_MSR_LOAD_COUNT, "Entry MSR Load Ct");
7233 	vmx_dump_vmcs_field(VMCS_ENTRY_INTERRUPTION_INFO, "Entry Int. Info");
7234 	DPRINTF("\n");
7235 	vmx_dump_vmcs_field(VMCS_ENTRY_EXCEPTION_ERROR_CODE,
7236 	    "Entry Ex. Err Code");
7237 	vmx_dump_vmcs_field(VMCS_ENTRY_INSTRUCTION_LENGTH, "Entry Insn Len");
7238 	DPRINTF("\n");
7239 
7240 	if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS,
7241 	    IA32_VMX_USE_TPR_SHADOW, 1)) {
7242 		vmx_dump_vmcs_field(VMCS_TPR_THRESHOLD, "TPR Threshold");
7243 		DPRINTF("\n");
7244 	}
7245 
7246 	if (has_sec) {
7247 		vmx_dump_vmcs_field(VMCS_PROCBASED2_CTLS, "2ndary Ctrls");
7248 		DPRINTF("\n");
7249 		if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7250 		    IA32_VMX_PAUSE_LOOP_EXITING, 1)) {
7251 			vmx_dump_vmcs_field(VMCS_PLE_GAP, "PLE Gap");
7252 			vmx_dump_vmcs_field(VMCS_PLE_WINDOW, "PLE Window");
7253 		}
7254 		DPRINTF("\n");
7255 	}
7256 
7257 	vmx_dump_vmcs_field(VMCS_INSTRUCTION_ERROR, "Insn Error");
7258 	vmx_dump_vmcs_field(VMCS_EXIT_REASON, "Exit Reason");
7259 	DPRINTF("\n");
7260 
7261 	vmx_dump_vmcs_field(VMCS_EXIT_INTERRUPTION_INFO, "Exit Int. Info");
7262 	vmx_dump_vmcs_field(VMCS_EXIT_INTERRUPTION_ERR_CODE,
7263 	    "Exit Int. Err Code");
7264 	DPRINTF("\n");
7265 
7266 	vmx_dump_vmcs_field(VMCS_IDT_VECTORING_INFO, "IDT vect info");
7267 	vmx_dump_vmcs_field(VMCS_IDT_VECTORING_ERROR_CODE,
7268 	    "IDT vect err code");
7269 	DPRINTF("\n");
7270 
7271 	vmx_dump_vmcs_field(VMCS_INSTRUCTION_LENGTH, "Insn Len");
7272 	vmx_dump_vmcs_field(VMCS_EXIT_INSTRUCTION_INFO, "Exit Insn Info");
7273 	DPRINTF("\n");
7274 
7275 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_ES_LIMIT, "G. ES Lim");
7276 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_CS_LIMIT, "G. CS Lim");
7277 	DPRINTF("\n");
7278 
7279 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_SS_LIMIT, "G. SS Lim");
7280 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_DS_LIMIT, "G. DS Lim");
7281 	DPRINTF("\n");
7282 
7283 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_FS_LIMIT, "G. FS Lim");
7284 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_GS_LIMIT, "G. GS Lim");
7285 	DPRINTF("\n");
7286 
7287 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_LDTR_LIMIT, "G. LDTR Lim");
7288 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_TR_LIMIT, "G. TR Lim");
7289 	DPRINTF("\n");
7290 
7291 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_GDTR_LIMIT, "G. GDTR Lim");
7292 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_IDTR_LIMIT, "G. IDTR Lim");
7293 	DPRINTF("\n");
7294 
7295 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_ES_AR, "G. ES AR");
7296 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_CS_AR, "G. CS AR");
7297 	DPRINTF("\n");
7298 
7299 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_SS_AR, "G. SS AR");
7300 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_DS_AR, "G. DS AR");
7301 	DPRINTF("\n");
7302 
7303 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_FS_AR, "G. FS AR");
7304 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_GS_AR, "G. GS AR");
7305 	DPRINTF("\n");
7306 
7307 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_LDTR_AR, "G. LDTR AR");
7308 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_TR_AR, "G. TR AR");
7309 	DPRINTF("\n");
7310 
7311 	vmx_dump_vmcs_field(VMCS_GUEST_INTERRUPTIBILITY_ST, "G. Int St.");
7312 	vmx_dump_vmcs_field(VMCS_GUEST_ACTIVITY_STATE, "G. Act St.");
7313 	DPRINTF("\n");
7314 
7315 	vmx_dump_vmcs_field(VMCS_GUEST_SMBASE, "G. SMBASE");
7316 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_SYSENTER_CS, "G. SYSENTER CS");
7317 	DPRINTF("\n");
7318 
7319 	if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PINBASED_CTLS,
7320 	    IA32_VMX_ACTIVATE_VMX_PREEMPTION_TIMER, 1)) {
7321 		vmx_dump_vmcs_field(VMCS_VMX_PREEMPTION_TIMER_VAL,
7322 		    "VMX Preempt Timer");
7323 		DPRINTF("\n");
7324 	}
7325 
7326 	vmx_dump_vmcs_field(VMCS_HOST_IA32_SYSENTER_CS, "H. SYSENTER CS");
7327 	DPRINTF("\n");
7328 
7329 	vmx_dump_vmcs_field(VMCS_CR0_MASK, "CR0 Mask");
7330 	DPRINTF("\n");
7331 	vmx_dump_vmcs_field(VMCS_CR4_MASK, "CR4 Mask");
7332 	DPRINTF("\n");
7333 
7334 	vmx_dump_vmcs_field(VMCS_CR0_READ_SHADOW, "CR0 RD Shadow");
7335 	DPRINTF("\n");
7336 	vmx_dump_vmcs_field(VMCS_CR4_READ_SHADOW, "CR4 RD Shadow");
7337 	DPRINTF("\n");
7338 
7339 	/* We assume all CPUs have the same max CR3 target ct */
7340 	cr3_tgt_ct = curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr3_tgt_count;
7341 	DPRINTF("Max CR3 target count: 0x%x\n", cr3_tgt_ct);
7342 	if (cr3_tgt_ct <= VMX_MAX_CR3_TARGETS) {
7343 		for (i = 0 ; i < cr3_tgt_ct; i++) {
7344 			vmx_dump_vmcs_field(VMCS_CR3_TARGET_0 + (2 * i),
7345 			    "CR3 Target");
7346 			DPRINTF("\n");
7347 		}
7348 	} else {
7349 		DPRINTF("(Bogus CR3 Target Count > %d", VMX_MAX_CR3_TARGETS);
7350 	}
7351 
7352 	vmx_dump_vmcs_field(VMCS_GUEST_EXIT_QUALIFICATION, "G. Exit Qual");
7353 	DPRINTF("\n");
7354 	vmx_dump_vmcs_field(VMCS_IO_RCX, "I/O RCX");
7355 	DPRINTF("\n");
7356 	vmx_dump_vmcs_field(VMCS_IO_RSI, "I/O RSI");
7357 	DPRINTF("\n");
7358 	vmx_dump_vmcs_field(VMCS_IO_RDI, "I/O RDI");
7359 	DPRINTF("\n");
7360 	vmx_dump_vmcs_field(VMCS_IO_RIP, "I/O RIP");
7361 	DPRINTF("\n");
7362 	vmx_dump_vmcs_field(VMCS_GUEST_LINEAR_ADDRESS, "G. Lin Addr");
7363 	DPRINTF("\n");
7364 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_CR0, "G. CR0");
7365 	DPRINTF("\n");
7366 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_CR3, "G. CR3");
7367 	DPRINTF("\n");
7368 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_CR4, "G. CR4");
7369 	DPRINTF("\n");
7370 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_ES_BASE, "G. ES Base");
7371 	DPRINTF("\n");
7372 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_CS_BASE, "G. CS Base");
7373 	DPRINTF("\n");
7374 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_SS_BASE, "G. SS Base");
7375 	DPRINTF("\n");
7376 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_DS_BASE, "G. DS Base");
7377 	DPRINTF("\n");
7378 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_FS_BASE, "G. FS Base");
7379 	DPRINTF("\n");
7380 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_GS_BASE, "G. GS Base");
7381 	DPRINTF("\n");
7382 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_LDTR_BASE, "G. LDTR Base");
7383 	DPRINTF("\n");
7384 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_TR_BASE, "G. TR Base");
7385 	DPRINTF("\n");
7386 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_GDTR_BASE, "G. GDTR Base");
7387 	DPRINTF("\n");
7388 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_IDTR_BASE, "G. IDTR Base");
7389 	DPRINTF("\n");
7390 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_DR7, "G. DR7");
7391 	DPRINTF("\n");
7392 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_RSP, "G. RSP");
7393 	DPRINTF("\n");
7394 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_RIP, "G. RIP");
7395 	DPRINTF("\n");
7396 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_RFLAGS, "G. RFLAGS");
7397 	DPRINTF("\n");
7398 	vmx_dump_vmcs_field(VMCS_GUEST_PENDING_DBG_EXC, "G. Pend Dbg Exc");
7399 	DPRINTF("\n");
7400 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_SYSENTER_ESP, "G. SYSENTER ESP");
7401 	DPRINTF("\n");
7402 	vmx_dump_vmcs_field(VMCS_GUEST_IA32_SYSENTER_EIP, "G. SYSENTER EIP");
7403 	DPRINTF("\n");
7404 	vmx_dump_vmcs_field(VMCS_HOST_IA32_CR0, "H. CR0");
7405 	DPRINTF("\n");
7406 	vmx_dump_vmcs_field(VMCS_HOST_IA32_CR3, "H. CR3");
7407 	DPRINTF("\n");
7408 	vmx_dump_vmcs_field(VMCS_HOST_IA32_CR4, "H. CR4");
7409 	DPRINTF("\n");
7410 	vmx_dump_vmcs_field(VMCS_HOST_IA32_FS_BASE, "H. FS Base");
7411 	DPRINTF("\n");
7412 	vmx_dump_vmcs_field(VMCS_HOST_IA32_GS_BASE, "H. GS Base");
7413 	DPRINTF("\n");
7414 	vmx_dump_vmcs_field(VMCS_HOST_IA32_TR_BASE, "H. TR Base");
7415 	DPRINTF("\n");
7416 	vmx_dump_vmcs_field(VMCS_HOST_IA32_GDTR_BASE, "H. GDTR Base");
7417 	DPRINTF("\n");
7418 	vmx_dump_vmcs_field(VMCS_HOST_IA32_IDTR_BASE, "H. IDTR Base");
7419 	DPRINTF("\n");
7420 	vmx_dump_vmcs_field(VMCS_HOST_IA32_SYSENTER_ESP, "H. SYSENTER ESP");
7421 	DPRINTF("\n");
7422 	vmx_dump_vmcs_field(VMCS_HOST_IA32_SYSENTER_EIP, "H. SYSENTER EIP");
7423 	DPRINTF("\n");
7424 	vmx_dump_vmcs_field(VMCS_HOST_IA32_RSP, "H. RSP");
7425 	DPRINTF("\n");
7426 	vmx_dump_vmcs_field(VMCS_HOST_IA32_RIP, "H. RIP");
7427 	DPRINTF("\n");
7428 }
7429 
7430 /*
7431  * vmx_vcpu_dump_regs
7432  *
7433  * Debug function to print vcpu regs from the current vcpu
7434  *  note - vmcs for 'vcpu' must be on this pcpu.
7435  *
7436  * Parameters:
7437  *  vcpu - vcpu whose registers should be dumped
7438  */
7439 void
vmx_vcpu_dump_regs(struct vcpu * vcpu)7440 vmx_vcpu_dump_regs(struct vcpu *vcpu)
7441 {
7442 	uint64_t r;
7443 	int i;
7444 	struct vmx_msr_store *msr_store;
7445 
7446 	/* XXX reformat this for 32 bit guest as needed */
7447 	DPRINTF("vcpu @ %p in %s mode\n", vcpu, vmm_decode_cpu_mode(vcpu));
7448 	i = vmm_get_guest_cpu_cpl(vcpu);
7449 	if (i == -1)
7450 		DPRINTF(" CPL=unknown\n");
7451 	else
7452 		DPRINTF(" CPL=%d\n", i);
7453 	DPRINTF(" rax=0x%016llx rbx=0x%016llx rcx=0x%016llx\n",
7454 	    vcpu->vc_gueststate.vg_rax, vcpu->vc_gueststate.vg_rbx,
7455 	    vcpu->vc_gueststate.vg_rcx);
7456 	DPRINTF(" rdx=0x%016llx rbp=0x%016llx rdi=0x%016llx\n",
7457 	    vcpu->vc_gueststate.vg_rdx, vcpu->vc_gueststate.vg_rbp,
7458 	    vcpu->vc_gueststate.vg_rdi);
7459 	DPRINTF(" rsi=0x%016llx  r8=0x%016llx  r9=0x%016llx\n",
7460 	    vcpu->vc_gueststate.vg_rsi, vcpu->vc_gueststate.vg_r8,
7461 	    vcpu->vc_gueststate.vg_r9);
7462 	DPRINTF(" r10=0x%016llx r11=0x%016llx r12=0x%016llx\n",
7463 	    vcpu->vc_gueststate.vg_r10, vcpu->vc_gueststate.vg_r11,
7464 	    vcpu->vc_gueststate.vg_r12);
7465 	DPRINTF(" r13=0x%016llx r14=0x%016llx r15=0x%016llx\n",
7466 	    vcpu->vc_gueststate.vg_r13, vcpu->vc_gueststate.vg_r14,
7467 	    vcpu->vc_gueststate.vg_r15);
7468 
7469 	DPRINTF(" rip=0x%016llx rsp=", vcpu->vc_gueststate.vg_rip);
7470 	if (vmread(VMCS_GUEST_IA32_RSP, &r))
7471 		DPRINTF("(error reading)\n");
7472 	else
7473 		DPRINTF("0x%016llx\n", r);
7474 
7475 	DPRINTF(" rflags=");
7476 	if (vmread(VMCS_GUEST_IA32_RFLAGS, &r))
7477 		DPRINTF("(error reading)\n");
7478 	else {
7479 		DPRINTF("0x%016llx ", r);
7480 		vmm_decode_rflags(r);
7481 	}
7482 
7483 	DPRINTF(" cr0=");
7484 	if (vmread(VMCS_GUEST_IA32_CR0, &r))
7485 		DPRINTF("(error reading)\n");
7486 	else {
7487 		DPRINTF("0x%016llx ", r);
7488 		vmm_decode_cr0(r);
7489 	}
7490 
7491 	DPRINTF(" cr2=0x%016llx\n", vcpu->vc_gueststate.vg_cr2);
7492 
7493 	DPRINTF(" cr3=");
7494 	if (vmread(VMCS_GUEST_IA32_CR3, &r))
7495 		DPRINTF("(error reading)\n");
7496 	else {
7497 		DPRINTF("0x%016llx ", r);
7498 		vmm_decode_cr3(r);
7499 	}
7500 
7501 	DPRINTF(" cr4=");
7502 	if (vmread(VMCS_GUEST_IA32_CR4, &r))
7503 		DPRINTF("(error reading)\n");
7504 	else {
7505 		DPRINTF("0x%016llx ", r);
7506 		vmm_decode_cr4(r);
7507 	}
7508 
7509 	DPRINTF(" --Guest Segment Info--\n");
7510 
7511 	DPRINTF(" cs=");
7512 	if (vmread(VMCS_GUEST_IA32_CS_SEL, &r))
7513 		DPRINTF("(error reading)");
7514 	else
7515 		DPRINTF("0x%04llx rpl=%lld", r, r & 0x3);
7516 
7517 	DPRINTF(" base=");
7518 	if (vmread(VMCS_GUEST_IA32_CS_BASE, &r))
7519 		DPRINTF("(error reading)");
7520 	else
7521 		DPRINTF("0x%016llx", r);
7522 
7523 	DPRINTF(" limit=");
7524 	if (vmread(VMCS_GUEST_IA32_CS_LIMIT, &r))
7525 		DPRINTF("(error reading)");
7526 	else
7527 		DPRINTF("0x%016llx", r);
7528 
7529 	DPRINTF(" a/r=");
7530 	if (vmread(VMCS_GUEST_IA32_CS_AR, &r))
7531 		DPRINTF("(error reading)\n");
7532 	else {
7533 		DPRINTF("0x%04llx\n  ", r);
7534 		vmm_segment_desc_decode(r);
7535 	}
7536 
7537 	DPRINTF(" ds=");
7538 	if (vmread(VMCS_GUEST_IA32_DS_SEL, &r))
7539 		DPRINTF("(error reading)");
7540 	else
7541 		DPRINTF("0x%04llx rpl=%lld", r, r & 0x3);
7542 
7543 	DPRINTF(" base=");
7544 	if (vmread(VMCS_GUEST_IA32_DS_BASE, &r))
7545 		DPRINTF("(error reading)");
7546 	else
7547 		DPRINTF("0x%016llx", r);
7548 
7549 	DPRINTF(" limit=");
7550 	if (vmread(VMCS_GUEST_IA32_DS_LIMIT, &r))
7551 		DPRINTF("(error reading)");
7552 	else
7553 		DPRINTF("0x%016llx", r);
7554 
7555 	DPRINTF(" a/r=");
7556 	if (vmread(VMCS_GUEST_IA32_DS_AR, &r))
7557 		DPRINTF("(error reading)\n");
7558 	else {
7559 		DPRINTF("0x%04llx\n  ", r);
7560 		vmm_segment_desc_decode(r);
7561 	}
7562 
7563 	DPRINTF(" es=");
7564 	if (vmread(VMCS_GUEST_IA32_ES_SEL, &r))
7565 		DPRINTF("(error reading)");
7566 	else
7567 		DPRINTF("0x%04llx rpl=%lld", r, r & 0x3);
7568 
7569 	DPRINTF(" base=");
7570 	if (vmread(VMCS_GUEST_IA32_ES_BASE, &r))
7571 		DPRINTF("(error reading)");
7572 	else
7573 		DPRINTF("0x%016llx", r);
7574 
7575 	DPRINTF(" limit=");
7576 	if (vmread(VMCS_GUEST_IA32_ES_LIMIT, &r))
7577 		DPRINTF("(error reading)");
7578 	else
7579 		DPRINTF("0x%016llx", r);
7580 
7581 	DPRINTF(" a/r=");
7582 	if (vmread(VMCS_GUEST_IA32_ES_AR, &r))
7583 		DPRINTF("(error reading)\n");
7584 	else {
7585 		DPRINTF("0x%04llx\n  ", r);
7586 		vmm_segment_desc_decode(r);
7587 	}
7588 
7589 	DPRINTF(" fs=");
7590 	if (vmread(VMCS_GUEST_IA32_FS_SEL, &r))
7591 		DPRINTF("(error reading)");
7592 	else
7593 		DPRINTF("0x%04llx rpl=%lld", r, r & 0x3);
7594 
7595 	DPRINTF(" base=");
7596 	if (vmread(VMCS_GUEST_IA32_FS_BASE, &r))
7597 		DPRINTF("(error reading)");
7598 	else
7599 		DPRINTF("0x%016llx", r);
7600 
7601 	DPRINTF(" limit=");
7602 	if (vmread(VMCS_GUEST_IA32_FS_LIMIT, &r))
7603 		DPRINTF("(error reading)");
7604 	else
7605 		DPRINTF("0x%016llx", r);
7606 
7607 	DPRINTF(" a/r=");
7608 	if (vmread(VMCS_GUEST_IA32_FS_AR, &r))
7609 		DPRINTF("(error reading)\n");
7610 	else {
7611 		DPRINTF("0x%04llx\n  ", r);
7612 		vmm_segment_desc_decode(r);
7613 	}
7614 
7615 	DPRINTF(" gs=");
7616 	if (vmread(VMCS_GUEST_IA32_GS_SEL, &r))
7617 		DPRINTF("(error reading)");
7618 	else
7619 		DPRINTF("0x%04llx rpl=%lld", r, r & 0x3);
7620 
7621 	DPRINTF(" base=");
7622 	if (vmread(VMCS_GUEST_IA32_GS_BASE, &r))
7623 		DPRINTF("(error reading)");
7624 	else
7625 		DPRINTF("0x%016llx", r);
7626 
7627 	DPRINTF(" limit=");
7628 	if (vmread(VMCS_GUEST_IA32_GS_LIMIT, &r))
7629 		DPRINTF("(error reading)");
7630 	else
7631 		DPRINTF("0x%016llx", r);
7632 
7633 	DPRINTF(" a/r=");
7634 	if (vmread(VMCS_GUEST_IA32_GS_AR, &r))
7635 		DPRINTF("(error reading)\n");
7636 	else {
7637 		DPRINTF("0x%04llx\n  ", r);
7638 		vmm_segment_desc_decode(r);
7639 	}
7640 
7641 	DPRINTF(" ss=");
7642 	if (vmread(VMCS_GUEST_IA32_SS_SEL, &r))
7643 		DPRINTF("(error reading)");
7644 	else
7645 		DPRINTF("0x%04llx rpl=%lld", r, r & 0x3);
7646 
7647 	DPRINTF(" base=");
7648 	if (vmread(VMCS_GUEST_IA32_SS_BASE, &r))
7649 		DPRINTF("(error reading)");
7650 	else
7651 		DPRINTF("0x%016llx", r);
7652 
7653 	DPRINTF(" limit=");
7654 	if (vmread(VMCS_GUEST_IA32_SS_LIMIT, &r))
7655 		DPRINTF("(error reading)");
7656 	else
7657 		DPRINTF("0x%016llx", r);
7658 
7659 	DPRINTF(" a/r=");
7660 	if (vmread(VMCS_GUEST_IA32_SS_AR, &r))
7661 		DPRINTF("(error reading)\n");
7662 	else {
7663 		DPRINTF("0x%04llx\n  ", r);
7664 		vmm_segment_desc_decode(r);
7665 	}
7666 
7667 	DPRINTF(" tr=");
7668 	if (vmread(VMCS_GUEST_IA32_TR_SEL, &r))
7669 		DPRINTF("(error reading)");
7670 	else
7671 		DPRINTF("0x%04llx", r);
7672 
7673 	DPRINTF(" base=");
7674 	if (vmread(VMCS_GUEST_IA32_TR_BASE, &r))
7675 		DPRINTF("(error reading)");
7676 	else
7677 		DPRINTF("0x%016llx", r);
7678 
7679 	DPRINTF(" limit=");
7680 	if (vmread(VMCS_GUEST_IA32_TR_LIMIT, &r))
7681 		DPRINTF("(error reading)");
7682 	else
7683 		DPRINTF("0x%016llx", r);
7684 
7685 	DPRINTF(" a/r=");
7686 	if (vmread(VMCS_GUEST_IA32_TR_AR, &r))
7687 		DPRINTF("(error reading)\n");
7688 	else {
7689 		DPRINTF("0x%04llx\n  ", r);
7690 		vmm_segment_desc_decode(r);
7691 	}
7692 
7693 	DPRINTF(" gdtr base=");
7694 	if (vmread(VMCS_GUEST_IA32_GDTR_BASE, &r))
7695 		DPRINTF("(error reading)   ");
7696 	else
7697 		DPRINTF("0x%016llx", r);
7698 
7699 	DPRINTF(" limit=");
7700 	if (vmread(VMCS_GUEST_IA32_GDTR_LIMIT, &r))
7701 		DPRINTF("(error reading)\n");
7702 	else
7703 		DPRINTF("0x%016llx\n", r);
7704 
7705 	DPRINTF(" idtr base=");
7706 	if (vmread(VMCS_GUEST_IA32_IDTR_BASE, &r))
7707 		DPRINTF("(error reading)   ");
7708 	else
7709 		DPRINTF("0x%016llx", r);
7710 
7711 	DPRINTF(" limit=");
7712 	if (vmread(VMCS_GUEST_IA32_IDTR_LIMIT, &r))
7713 		DPRINTF("(error reading)\n");
7714 	else
7715 		DPRINTF("0x%016llx\n", r);
7716 
7717 	DPRINTF(" ldtr=");
7718 	if (vmread(VMCS_GUEST_IA32_LDTR_SEL, &r))
7719 		DPRINTF("(error reading)");
7720 	else
7721 		DPRINTF("0x%04llx", r);
7722 
7723 	DPRINTF(" base=");
7724 	if (vmread(VMCS_GUEST_IA32_LDTR_BASE, &r))
7725 		DPRINTF("(error reading)");
7726 	else
7727 		DPRINTF("0x%016llx", r);
7728 
7729 	DPRINTF(" limit=");
7730 	if (vmread(VMCS_GUEST_IA32_LDTR_LIMIT, &r))
7731 		DPRINTF("(error reading)");
7732 	else
7733 		DPRINTF("0x%016llx", r);
7734 
7735 	DPRINTF(" a/r=");
7736 	if (vmread(VMCS_GUEST_IA32_LDTR_AR, &r))
7737 		DPRINTF("(error reading)\n");
7738 	else {
7739 		DPRINTF("0x%04llx\n  ", r);
7740 		vmm_segment_desc_decode(r);
7741 	}
7742 
7743 	DPRINTF(" --Guest MSRs @ 0x%016llx (paddr: 0x%016llx)--\n",
7744 	    (uint64_t)vcpu->vc_vmx_msr_exit_save_va,
7745 	    (uint64_t)vcpu->vc_vmx_msr_exit_save_pa);
7746 
7747 	msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
7748 
7749 	for (i = 0; i < VCPU_REGS_NMSRS; i++) {
7750 		DPRINTF("  MSR %d @ %p : 0x%08llx (%s), "
7751 		    "value=0x%016llx ",
7752 		    i, &msr_store[i], msr_store[i].vms_index,
7753 		    msr_name_decode(msr_store[i].vms_index),
7754 		    msr_store[i].vms_data);
7755 		vmm_decode_msr_value(msr_store[i].vms_index,
7756 		    msr_store[i].vms_data);
7757 	}
7758 }
7759 
7760 /*
7761  * msr_name_decode
7762  *
7763  * Returns a human-readable name for the MSR supplied in 'msr'.
7764  *
7765  * Parameters:
7766  *  msr - The MSR to decode
7767  *
7768  * Return value:
7769  *  NULL-terminated character string containing the name of the MSR requested
7770  */
7771 const char *
msr_name_decode(uint32_t msr)7772 msr_name_decode(uint32_t msr)
7773 {
7774 	/*
7775 	 * Add as needed. Also consider adding a decode function when
7776 	 * adding to this table.
7777 	 */
7778 
7779 	switch (msr) {
7780 	case MSR_TSC: return "TSC";
7781 	case MSR_APICBASE: return "APIC base";
7782 	case MSR_IA32_FEATURE_CONTROL: return "IA32 feature control";
7783 	case MSR_PERFCTR0: return "perf counter 0";
7784 	case MSR_PERFCTR1: return "perf counter 1";
7785 	case MSR_TEMPERATURE_TARGET: return "temperature target";
7786 	case MSR_MTRRcap: return "MTRR cap";
7787 	case MSR_PERF_STATUS: return "perf status";
7788 	case MSR_PERF_CTL: return "perf control";
7789 	case MSR_MTRRvarBase: return "MTRR variable base";
7790 	case MSR_MTRRfix64K_00000: return "MTRR fixed 64K";
7791 	case MSR_MTRRfix16K_80000: return "MTRR fixed 16K";
7792 	case MSR_MTRRfix4K_C0000: return "MTRR fixed 4K";
7793 	case MSR_CR_PAT: return "PAT";
7794 	case MSR_MTRRdefType: return "MTRR default type";
7795 	case MSR_EFER: return "EFER";
7796 	case MSR_STAR: return "STAR";
7797 	case MSR_LSTAR: return "LSTAR";
7798 	case MSR_CSTAR: return "CSTAR";
7799 	case MSR_SFMASK: return "SFMASK";
7800 	case MSR_FSBASE: return "FSBASE";
7801 	case MSR_GSBASE: return "GSBASE";
7802 	case MSR_KERNELGSBASE: return "KGSBASE";
7803 	case MSR_MISC_ENABLE: return "Misc Enable";
7804 	default: return "Unknown MSR";
7805 	}
7806 }
7807 
7808 /*
7809  * vmm_segment_desc_decode
7810  *
7811  * Debug function to print segment information for supplied descriptor
7812  *
7813  * Parameters:
7814  *  val - The A/R bytes for the segment descriptor to decode
7815  */
7816 void
vmm_segment_desc_decode(uint64_t val)7817 vmm_segment_desc_decode(uint64_t val)
7818 {
7819 	uint16_t ar;
7820 	uint8_t g, type, s, dpl, p, dib, l;
7821 	uint32_t unusable;
7822 
7823 	/* Exit early on unusable descriptors */
7824 	unusable = val & 0x10000;
7825 	if (unusable) {
7826 		DPRINTF("(unusable)\n");
7827 		return;
7828 	}
7829 
7830 	ar = (uint16_t)val;
7831 
7832 	g = (ar & 0x8000) >> 15;
7833 	dib = (ar & 0x4000) >> 14;
7834 	l = (ar & 0x2000) >> 13;
7835 	p = (ar & 0x80) >> 7;
7836 	dpl = (ar & 0x60) >> 5;
7837 	s = (ar & 0x10) >> 4;
7838 	type = (ar & 0xf);
7839 
7840 	DPRINTF("granularity=%d dib=%d l(64 bit)=%d present=%d sys=%d ",
7841 	    g, dib, l, p, s);
7842 
7843 	DPRINTF("type=");
7844 	if (!s) {
7845 		switch (type) {
7846 		case SDT_SYSLDT: DPRINTF("ldt\n"); break;
7847 		case SDT_SYS386TSS: DPRINTF("tss (available)\n"); break;
7848 		case SDT_SYS386BSY: DPRINTF("tss (busy)\n"); break;
7849 		case SDT_SYS386CGT: DPRINTF("call gate\n"); break;
7850 		case SDT_SYS386IGT: DPRINTF("interrupt gate\n"); break;
7851 		case SDT_SYS386TGT: DPRINTF("trap gate\n"); break;
7852 		/* XXX handle 32 bit segment types by inspecting mode */
7853 		default: DPRINTF("unknown");
7854 		}
7855 	} else {
7856 		switch (type + 16) {
7857 		case SDT_MEMRO: DPRINTF("data, r/o\n"); break;
7858 		case SDT_MEMROA: DPRINTF("data, r/o, accessed\n"); break;
7859 		case SDT_MEMRW: DPRINTF("data, r/w\n"); break;
7860 		case SDT_MEMRWA: DPRINTF("data, r/w, accessed\n"); break;
7861 		case SDT_MEMROD: DPRINTF("data, r/o, expand down\n"); break;
7862 		case SDT_MEMRODA: DPRINTF("data, r/o, expand down, "
7863 		    "accessed\n");
7864 			break;
7865 		case SDT_MEMRWD: DPRINTF("data, r/w, expand down\n"); break;
7866 		case SDT_MEMRWDA: DPRINTF("data, r/w, expand down, "
7867 		    "accessed\n");
7868 			break;
7869 		case SDT_MEME: DPRINTF("code, x only\n"); break;
7870 		case SDT_MEMEA: DPRINTF("code, x only, accessed\n");
7871 		case SDT_MEMER: DPRINTF("code, r/x\n"); break;
7872 		case SDT_MEMERA: DPRINTF("code, r/x, accessed\n"); break;
7873 		case SDT_MEMEC: DPRINTF("code, x only, conforming\n"); break;
7874 		case SDT_MEMEAC: DPRINTF("code, x only, conforming, "
7875 		    "accessed\n");
7876 			break;
7877 		case SDT_MEMERC: DPRINTF("code, r/x, conforming\n"); break;
7878 		case SDT_MEMERAC: DPRINTF("code, r/x, conforming, accessed\n");
7879 			break;
7880 		}
7881 	}
7882 }
7883 
7884 void
vmm_decode_cr0(uint64_t cr0)7885 vmm_decode_cr0(uint64_t cr0)
7886 {
7887 	struct vmm_reg_debug_info cr0_info[11] = {
7888 		{ CR0_PG, "PG ", "pg " },
7889 		{ CR0_CD, "CD ", "cd " },
7890 		{ CR0_NW, "NW ", "nw " },
7891 		{ CR0_AM, "AM ", "am " },
7892 		{ CR0_WP, "WP ", "wp " },
7893 		{ CR0_NE, "NE ", "ne " },
7894 		{ CR0_ET, "ET ", "et " },
7895 		{ CR0_TS, "TS ", "ts " },
7896 		{ CR0_EM, "EM ", "em " },
7897 		{ CR0_MP, "MP ", "mp " },
7898 		{ CR0_PE, "PE", "pe" }
7899 	};
7900 
7901 	uint8_t i;
7902 
7903 	DPRINTF("(");
7904 	for (i = 0; i < nitems(cr0_info); i++)
7905 		if (cr0 & cr0_info[i].vrdi_bit)
7906 			DPRINTF("%s", cr0_info[i].vrdi_present);
7907 		else
7908 			DPRINTF("%s", cr0_info[i].vrdi_absent);
7909 
7910 	DPRINTF(")\n");
7911 }
7912 
7913 void
vmm_decode_cr3(uint64_t cr3)7914 vmm_decode_cr3(uint64_t cr3)
7915 {
7916 	struct vmm_reg_debug_info cr3_info[2] = {
7917 		{ CR3_PWT, "PWT ", "pwt "},
7918 		{ CR3_PCD, "PCD", "pcd"}
7919 	};
7920 
7921 	uint64_t cr4;
7922 	uint8_t i;
7923 
7924 	if (vmread(VMCS_GUEST_IA32_CR4, &cr4)) {
7925 		DPRINTF("(error)\n");
7926 		return;
7927 	}
7928 
7929 	/* If CR4.PCIDE = 0, interpret CR3.PWT and CR3.PCD */
7930 	if ((cr4 & CR4_PCIDE) == 0) {
7931 		DPRINTF("(");
7932 		for (i = 0 ; i < nitems(cr3_info) ; i++)
7933 			if (cr3 & cr3_info[i].vrdi_bit)
7934 				DPRINTF("%s", cr3_info[i].vrdi_present);
7935 			else
7936 				DPRINTF("%s", cr3_info[i].vrdi_absent);
7937 
7938 		DPRINTF(")\n");
7939 	} else {
7940 		DPRINTF("(pcid=0x%llx)\n", cr3 & 0xFFF);
7941 	}
7942 }
7943 
7944 void
vmm_decode_cr4(uint64_t cr4)7945 vmm_decode_cr4(uint64_t cr4)
7946 {
7947 	struct vmm_reg_debug_info cr4_info[19] = {
7948 		{ CR4_PKE, "PKE ", "pke "},
7949 		{ CR4_SMAP, "SMAP ", "smap "},
7950 		{ CR4_SMEP, "SMEP ", "smep "},
7951 		{ CR4_OSXSAVE, "OSXSAVE ", "osxsave "},
7952 		{ CR4_PCIDE, "PCIDE ", "pcide "},
7953 		{ CR4_FSGSBASE, "FSGSBASE ", "fsgsbase "},
7954 		{ CR4_SMXE, "SMXE ", "smxe "},
7955 		{ CR4_VMXE, "VMXE ", "vmxe "},
7956 		{ CR4_OSXMMEXCPT, "OSXMMEXCPT ", "osxmmexcpt "},
7957 		{ CR4_OSFXSR, "OSFXSR ", "osfxsr "},
7958 		{ CR4_PCE, "PCE ", "pce "},
7959 		{ CR4_PGE, "PGE ", "pge "},
7960 		{ CR4_MCE, "MCE ", "mce "},
7961 		{ CR4_PAE, "PAE ", "pae "},
7962 		{ CR4_PSE, "PSE ", "pse "},
7963 		{ CR4_DE, "DE ", "de "},
7964 		{ CR4_TSD, "TSD ", "tsd "},
7965 		{ CR4_PVI, "PVI ", "pvi "},
7966 		{ CR4_VME, "VME", "vme"}
7967 	};
7968 
7969 	uint8_t i;
7970 
7971 	DPRINTF("(");
7972 	for (i = 0; i < nitems(cr4_info); i++)
7973 		if (cr4 & cr4_info[i].vrdi_bit)
7974 			DPRINTF("%s", cr4_info[i].vrdi_present);
7975 		else
7976 			DPRINTF("%s", cr4_info[i].vrdi_absent);
7977 
7978 	DPRINTF(")\n");
7979 }
7980 
7981 void
vmm_decode_apicbase_msr_value(uint64_t apicbase)7982 vmm_decode_apicbase_msr_value(uint64_t apicbase)
7983 {
7984 	struct vmm_reg_debug_info apicbase_info[3] = {
7985 		{ APICBASE_BSP, "BSP ", "bsp "},
7986 		{ APICBASE_ENABLE_X2APIC, "X2APIC ", "x2apic "},
7987 		{ APICBASE_GLOBAL_ENABLE, "GLB_EN", "glb_en"}
7988 	};
7989 
7990 	uint8_t i;
7991 
7992 	DPRINTF("(");
7993 	for (i = 0; i < nitems(apicbase_info); i++)
7994 		if (apicbase & apicbase_info[i].vrdi_bit)
7995 			DPRINTF("%s", apicbase_info[i].vrdi_present);
7996 		else
7997 			DPRINTF("%s", apicbase_info[i].vrdi_absent);
7998 
7999 	DPRINTF(")\n");
8000 }
8001 
8002 void
vmm_decode_ia32_fc_value(uint64_t fcr)8003 vmm_decode_ia32_fc_value(uint64_t fcr)
8004 {
8005 	struct vmm_reg_debug_info fcr_info[4] = {
8006 		{ IA32_FEATURE_CONTROL_LOCK, "LOCK ", "lock "},
8007 		{ IA32_FEATURE_CONTROL_SMX_EN, "SMX ", "smx "},
8008 		{ IA32_FEATURE_CONTROL_VMX_EN, "VMX ", "vmx "},
8009 		{ IA32_FEATURE_CONTROL_SENTER_EN, "SENTER ", "senter "}
8010 	};
8011 
8012 	uint8_t i;
8013 
8014 	DPRINTF("(");
8015 	for (i = 0; i < nitems(fcr_info); i++)
8016 		if (fcr & fcr_info[i].vrdi_bit)
8017 			DPRINTF("%s", fcr_info[i].vrdi_present);
8018 		else
8019 			DPRINTF("%s", fcr_info[i].vrdi_absent);
8020 
8021 	if (fcr & IA32_FEATURE_CONTROL_SENTER_EN)
8022 		DPRINTF(" [SENTER param = 0x%llx]",
8023 		    (fcr & IA32_FEATURE_CONTROL_SENTER_PARAM_MASK) >> 8);
8024 
8025 	DPRINTF(")\n");
8026 }
8027 
8028 void
vmm_decode_mtrrcap_value(uint64_t val)8029 vmm_decode_mtrrcap_value(uint64_t val)
8030 {
8031 	struct vmm_reg_debug_info mtrrcap_info[3] = {
8032 		{ MTRRcap_FIXED, "FIXED ", "fixed "},
8033 		{ MTRRcap_WC, "WC ", "wc "},
8034 		{ MTRRcap_SMRR, "SMRR ", "smrr "}
8035 	};
8036 
8037 	uint8_t i;
8038 
8039 	DPRINTF("(");
8040 	for (i = 0; i < nitems(mtrrcap_info); i++)
8041 		if (val & mtrrcap_info[i].vrdi_bit)
8042 			DPRINTF("%s", mtrrcap_info[i].vrdi_present);
8043 		else
8044 			DPRINTF("%s", mtrrcap_info[i].vrdi_absent);
8045 
8046 	if (val & MTRRcap_FIXED)
8047 		DPRINTF(" [nr fixed ranges = 0x%llx]",
8048 		    (val & 0xff));
8049 
8050 	DPRINTF(")\n");
8051 }
8052 
8053 void
vmm_decode_perf_status_value(uint64_t val)8054 vmm_decode_perf_status_value(uint64_t val)
8055 {
8056 	DPRINTF("(pstate ratio = 0x%llx)\n", (val & 0xffff));
8057 }
8058 
8059 void
vmm_decode_perf_ctl_value(uint64_t val)8060 vmm_decode_perf_ctl_value(uint64_t val)
8061 {
8062 	DPRINTF("(%s ", (val & PERF_CTL_TURBO) ? "TURBO" : "turbo");
8063 	DPRINTF("pstate req = 0x%llx)\n", (val & 0xfffF));
8064 }
8065 
8066 void
vmm_decode_mtrrdeftype_value(uint64_t mtrrdeftype)8067 vmm_decode_mtrrdeftype_value(uint64_t mtrrdeftype)
8068 {
8069 	struct vmm_reg_debug_info mtrrdeftype_info[2] = {
8070 		{ MTRRdefType_FIXED_ENABLE, "FIXED ", "fixed "},
8071 		{ MTRRdefType_ENABLE, "ENABLED ", "enabled "},
8072 	};
8073 
8074 	uint8_t i;
8075 	int type;
8076 
8077 	DPRINTF("(");
8078 	for (i = 0; i < nitems(mtrrdeftype_info); i++)
8079 		if (mtrrdeftype & mtrrdeftype_info[i].vrdi_bit)
8080 			DPRINTF("%s", mtrrdeftype_info[i].vrdi_present);
8081 		else
8082 			DPRINTF("%s", mtrrdeftype_info[i].vrdi_absent);
8083 
8084 	DPRINTF("type = ");
8085 	type = mtrr2mrt(mtrrdeftype & 0xff);
8086 	switch (type) {
8087 	case MDF_UNCACHEABLE: DPRINTF("UC"); break;
8088 	case MDF_WRITECOMBINE: DPRINTF("WC"); break;
8089 	case MDF_WRITETHROUGH: DPRINTF("WT"); break;
8090 	case MDF_WRITEPROTECT: DPRINTF("RO"); break;
8091 	case MDF_WRITEBACK: DPRINTF("WB"); break;
8092 	case MDF_UNKNOWN:
8093 	default:
8094 		DPRINTF("??");
8095 		break;
8096 	}
8097 
8098 	DPRINTF(")\n");
8099 }
8100 
8101 void
vmm_decode_efer_value(uint64_t efer)8102 vmm_decode_efer_value(uint64_t efer)
8103 {
8104 	struct vmm_reg_debug_info efer_info[4] = {
8105 		{ EFER_SCE, "SCE ", "sce "},
8106 		{ EFER_LME, "LME ", "lme "},
8107 		{ EFER_LMA, "LMA ", "lma "},
8108 		{ EFER_NXE, "NXE", "nxe"},
8109 	};
8110 
8111 	uint8_t i;
8112 
8113 	DPRINTF("(");
8114 	for (i = 0; i < nitems(efer_info); i++)
8115 		if (efer & efer_info[i].vrdi_bit)
8116 			DPRINTF("%s", efer_info[i].vrdi_present);
8117 		else
8118 			DPRINTF("%s", efer_info[i].vrdi_absent);
8119 
8120 	DPRINTF(")\n");
8121 }
8122 
8123 void
vmm_decode_msr_value(uint64_t msr,uint64_t val)8124 vmm_decode_msr_value(uint64_t msr, uint64_t val)
8125 {
8126 	switch (msr) {
8127 	case MSR_APICBASE: vmm_decode_apicbase_msr_value(val); break;
8128 	case MSR_IA32_FEATURE_CONTROL: vmm_decode_ia32_fc_value(val); break;
8129 	case MSR_MTRRcap: vmm_decode_mtrrcap_value(val); break;
8130 	case MSR_PERF_STATUS: vmm_decode_perf_status_value(val); break;
8131 	case MSR_PERF_CTL: vmm_decode_perf_ctl_value(val); break;
8132 	case MSR_MTRRdefType: vmm_decode_mtrrdeftype_value(val); break;
8133 	case MSR_EFER: vmm_decode_efer_value(val); break;
8134 	case MSR_MISC_ENABLE: vmm_decode_misc_enable_value(val); break;
8135 	default: DPRINTF("\n");
8136 	}
8137 }
8138 
8139 void
vmm_decode_rflags(uint64_t rflags)8140 vmm_decode_rflags(uint64_t rflags)
8141 {
8142 	struct vmm_reg_debug_info rflags_info[16] = {
8143 		{ PSL_C,   "CF ", "cf "},
8144 		{ PSL_PF,  "PF ", "pf "},
8145 		{ PSL_AF,  "AF ", "af "},
8146 		{ PSL_Z,   "ZF ", "zf "},
8147 		{ PSL_N,   "SF ", "sf "},	/* sign flag */
8148 		{ PSL_T,   "TF ", "tf "},
8149 		{ PSL_I,   "IF ", "if "},
8150 		{ PSL_D,   "DF ", "df "},
8151 		{ PSL_V,   "OF ", "of "},	/* overflow flag */
8152 		{ PSL_NT,  "NT ", "nt "},
8153 		{ PSL_RF,  "RF ", "rf "},
8154 		{ PSL_VM,  "VM ", "vm "},
8155 		{ PSL_AC,  "AC ", "ac "},
8156 		{ PSL_VIF, "VIF ", "vif "},
8157 		{ PSL_VIP, "VIP ", "vip "},
8158 		{ PSL_ID,  "ID ", "id "},
8159 	};
8160 
8161 	uint8_t i, iopl;
8162 
8163 	DPRINTF("(");
8164 	for (i = 0; i < nitems(rflags_info); i++)
8165 		if (rflags & rflags_info[i].vrdi_bit)
8166 			DPRINTF("%s", rflags_info[i].vrdi_present);
8167 		else
8168 			DPRINTF("%s", rflags_info[i].vrdi_absent);
8169 
8170 	iopl = (rflags & PSL_IOPL) >> 12;
8171 	DPRINTF("IOPL=%d", iopl);
8172 
8173 	DPRINTF(")\n");
8174 }
8175 
8176 void
vmm_decode_misc_enable_value(uint64_t misc)8177 vmm_decode_misc_enable_value(uint64_t misc)
8178 {
8179 	struct vmm_reg_debug_info misc_info[10] = {
8180 		{ MISC_ENABLE_FAST_STRINGS,		"FSE ", "fse "},
8181 		{ MISC_ENABLE_TCC,			"TCC ", "tcc "},
8182 		{ MISC_ENABLE_PERF_MON_AVAILABLE,	"PERF ", "perf "},
8183 		{ MISC_ENABLE_BTS_UNAVAILABLE,		"BTSU ", "btsu "},
8184 		{ MISC_ENABLE_PEBS_UNAVAILABLE,		"PEBSU ", "pebsu "},
8185 		{ MISC_ENABLE_EIST_ENABLED,		"EIST ", "eist "},
8186 		{ MISC_ENABLE_ENABLE_MONITOR_FSM,	"MFSM ", "mfsm "},
8187 		{ MISC_ENABLE_LIMIT_CPUID_MAXVAL,	"CMAX ", "cmax "},
8188 		{ MISC_ENABLE_xTPR_MESSAGE_DISABLE,	"xTPRD ", "xtprd "},
8189 		{ MISC_ENABLE_XD_BIT_DISABLE,		"NXD", "nxd"},
8190 	};
8191 
8192 	uint8_t i;
8193 
8194 	DPRINTF("(");
8195 	for (i = 0; i < nitems(misc_info); i++)
8196 		if (misc & misc_info[i].vrdi_bit)
8197 			DPRINTF("%s", misc_info[i].vrdi_present);
8198 		else
8199 			DPRINTF("%s", misc_info[i].vrdi_absent);
8200 
8201 	DPRINTF(")\n");
8202 }
8203 
8204 const char *
vmm_decode_cpu_mode(struct vcpu * vcpu)8205 vmm_decode_cpu_mode(struct vcpu *vcpu)
8206 {
8207 	int mode = vmm_get_guest_cpu_mode(vcpu);
8208 
8209 	switch (mode) {
8210 	case VMM_CPU_MODE_REAL: return "real";
8211 	case VMM_CPU_MODE_PROT: return "16 bit protected";
8212 	case VMM_CPU_MODE_PROT32: return "32 bit protected";
8213 	case VMM_CPU_MODE_COMPAT: return "compatibility";
8214 	case VMM_CPU_MODE_LONG: return "long";
8215 	default: return "unknown";
8216 	}
8217 }
8218 #endif /* VMM_DEBUG */
8219