1 /* $OpenBSD: vmm_machdep.c,v 1.41 2024/11/27 10:09:51 mpi Exp $ */
2 /*
3 * Copyright (c) 2014 Mike Larkin <mlarkin@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18 #include <sys/param.h>
19 #include <sys/systm.h>
20 #include <sys/malloc.h>
21 #include <sys/device.h>
22 #include <sys/pool.h>
23 #include <sys/proc.h>
24 #include <sys/user.h>
25 #include <sys/ioctl.h>
26 #include <sys/queue.h>
27 #include <sys/refcnt.h>
28 #include <sys/rwlock.h>
29 #include <sys/pledge.h>
30 #include <sys/memrange.h>
31 #include <sys/tracepoint.h>
32
33 #include <uvm/uvm_extern.h>
34
35 #include <machine/fpu.h>
36 #include <machine/pmap.h>
37 #include <machine/biosvar.h>
38 #include <machine/segments.h>
39 #include <machine/cpufunc.h>
40 #include <machine/vmmvar.h>
41
42 #include <dev/isa/isareg.h>
43 #include <dev/pv/pvreg.h>
44
45 #include <dev/vmm/vmm.h>
46
47 #ifdef MP_LOCKDEBUG
48 #include <ddb/db_output.h>
49 extern int __mp_lock_spinout;
50 #endif /* MP_LOCKDEBUG */
51
52 void *l1tf_flush_region;
53
54 #define DEVNAME(s) ((s)->sc_dev.dv_xname)
55
56 #define CTRL_DUMP(x,y,z) printf(" %s: Can set:%s Can clear:%s\n", #z , \
57 vcpu_vmx_check_cap(x, IA32_VMX_##y ##_CTLS, \
58 IA32_VMX_##z, 1) ? "Yes" : "No", \
59 vcpu_vmx_check_cap(x, IA32_VMX_##y ##_CTLS, \
60 IA32_VMX_##z, 0) ? "Yes" : "No");
61
62 #define VMX_EXIT_INFO_HAVE_RIP 0x1
63 #define VMX_EXIT_INFO_HAVE_REASON 0x2
64 #define VMX_EXIT_INFO_COMPLETE \
65 (VMX_EXIT_INFO_HAVE_RIP | VMX_EXIT_INFO_HAVE_REASON)
66
67 void vmx_dump_vmcs_field(uint16_t, const char *);
68 int vmm_enabled(void);
69 void vmm_activate_machdep(struct device *, int);
70 int vmmioctl_machdep(dev_t, u_long, caddr_t, int, struct proc *);
71 int vmm_quiesce_vmx(void);
72 int vm_run(struct vm_run_params *);
73 int vm_intr_pending(struct vm_intr_params *);
74 int vm_rwregs(struct vm_rwregs_params *, int);
75 int vm_rwvmparams(struct vm_rwvmparams_params *, int);
76 int vcpu_readregs_vmx(struct vcpu *, uint64_t, int, struct vcpu_reg_state *);
77 int vcpu_readregs_svm(struct vcpu *, uint64_t, struct vcpu_reg_state *);
78 int vcpu_writeregs_vmx(struct vcpu *, uint64_t, int, struct vcpu_reg_state *);
79 int vcpu_writeregs_svm(struct vcpu *, uint64_t, struct vcpu_reg_state *);
80 int vcpu_reset_regs(struct vcpu *, struct vcpu_reg_state *);
81 int vcpu_reset_regs_vmx(struct vcpu *, struct vcpu_reg_state *);
82 int vcpu_reset_regs_svm(struct vcpu *, struct vcpu_reg_state *);
83 int vcpu_reload_vmcs_vmx(struct vcpu *);
84 int vcpu_init(struct vcpu *, struct vm_create_params *);
85 int vcpu_init_vmx(struct vcpu *);
86 int vcpu_init_svm(struct vcpu *, struct vm_create_params *);
87 int vcpu_run_vmx(struct vcpu *, struct vm_run_params *);
88 int vcpu_run_svm(struct vcpu *, struct vm_run_params *);
89 void vcpu_deinit(struct vcpu *);
90 void vcpu_deinit_svm(struct vcpu *);
91 void vcpu_deinit_vmx(struct vcpu *);
92 int vcpu_vmx_check_cap(struct vcpu *, uint32_t, uint32_t, int);
93 int vcpu_vmx_compute_ctrl(uint64_t, uint16_t, uint32_t, uint32_t, uint32_t *);
94 int vmx_get_exit_info(uint64_t *, uint64_t *);
95 int vmx_load_pdptes(struct vcpu *);
96 int vmx_handle_exit(struct vcpu *);
97 int svm_handle_exit(struct vcpu *);
98 int svm_handle_msr(struct vcpu *);
99 int vmm_handle_xsetbv(struct vcpu *, uint64_t *);
100 int vmx_handle_xsetbv(struct vcpu *);
101 int svm_handle_xsetbv(struct vcpu *);
102 int vmm_handle_cpuid(struct vcpu *);
103 int vmx_handle_rdmsr(struct vcpu *);
104 int vmx_handle_wrmsr(struct vcpu *);
105 int vmx_handle_cr0_write(struct vcpu *, uint64_t);
106 int vmx_handle_cr4_write(struct vcpu *, uint64_t);
107 int vmx_handle_cr(struct vcpu *);
108 int svm_handle_inout(struct vcpu *);
109 int vmx_handle_inout(struct vcpu *);
110 int svm_handle_hlt(struct vcpu *);
111 int vmx_handle_hlt(struct vcpu *);
112 int vmm_inject_ud(struct vcpu *);
113 int vmm_inject_gp(struct vcpu *);
114 int vmm_inject_db(struct vcpu *);
115 void vmx_handle_intr(struct vcpu *);
116 void vmx_handle_misc_enable_msr(struct vcpu *);
117 int vmm_get_guest_memtype(struct vm *, paddr_t);
118 int vmx_get_guest_faulttype(void);
119 int svm_get_guest_faulttype(struct vmcb *);
120 int vmx_get_exit_qualification(uint64_t *);
121 int vmm_get_guest_cpu_cpl(struct vcpu *);
122 int vmm_get_guest_cpu_mode(struct vcpu *);
123 int svm_fault_page(struct vcpu *, paddr_t);
124 int vmx_fault_page(struct vcpu *, paddr_t);
125 int vmx_handle_np_fault(struct vcpu *);
126 int svm_handle_np_fault(struct vcpu *);
127 int vmm_alloc_vpid(uint16_t *);
128 void vmm_free_vpid(uint16_t);
129 const char *vcpu_state_decode(u_int);
130 const char *vmx_exit_reason_decode(uint32_t);
131 const char *svm_exit_reason_decode(uint32_t);
132 const char *vmx_instruction_error_decode(uint32_t);
133 void svm_setmsrbr(struct vcpu *, uint32_t);
134 void svm_setmsrbw(struct vcpu *, uint32_t);
135 void svm_setmsrbrw(struct vcpu *, uint32_t);
136 void vmx_setmsrbr(struct vcpu *, uint32_t);
137 void vmx_setmsrbw(struct vcpu *, uint32_t);
138 void vmx_setmsrbrw(struct vcpu *, uint32_t);
139 void svm_set_clean(struct vcpu *, uint32_t);
140 void svm_set_dirty(struct vcpu *, uint32_t);
141
142 int vmm_gpa_is_valid(struct vcpu *vcpu, paddr_t gpa, size_t obj_size);
143 void vmm_init_pvclock(struct vcpu *, paddr_t);
144 int vmm_update_pvclock(struct vcpu *);
145 int vmm_pat_is_valid(uint64_t);
146
147 #ifdef MULTIPROCESSOR
148 static int vmx_remote_vmclear(struct cpu_info*, struct vcpu *);
149 #endif
150
151 #ifdef VMM_DEBUG
152 void vmx_vcpu_dump_regs(struct vcpu *);
153 void vmx_dump_vmcs(struct vcpu *);
154 const char *msr_name_decode(uint32_t);
155 void vmm_segment_desc_decode(uint64_t);
156 void vmm_decode_cr0(uint64_t);
157 void vmm_decode_cr3(uint64_t);
158 void vmm_decode_cr4(uint64_t);
159 void vmm_decode_msr_value(uint64_t, uint64_t);
160 void vmm_decode_apicbase_msr_value(uint64_t);
161 void vmm_decode_ia32_fc_value(uint64_t);
162 void vmm_decode_mtrrcap_value(uint64_t);
163 void vmm_decode_perf_status_value(uint64_t);
164 void vmm_decode_perf_ctl_value(uint64_t);
165 void vmm_decode_mtrrdeftype_value(uint64_t);
166 void vmm_decode_efer_value(uint64_t);
167 void vmm_decode_rflags(uint64_t);
168 void vmm_decode_misc_enable_value(uint64_t);
169 const char *vmm_decode_cpu_mode(struct vcpu *);
170
171 extern int mtrr2mrt(int);
172
173 struct vmm_reg_debug_info {
174 uint64_t vrdi_bit;
175 const char *vrdi_present;
176 const char *vrdi_absent;
177 };
178 #endif /* VMM_DEBUG */
179
180 extern uint64_t tsc_frequency;
181 extern int tsc_is_invariant;
182
183 const char *vmm_hv_signature = VMM_HV_SIGNATURE;
184
185 const struct kmem_pa_mode vmm_kp_contig = {
186 .kp_constraint = &no_constraint,
187 .kp_maxseg = 1,
188 .kp_align = 4096,
189 .kp_zero = 1,
190 };
191
192 extern struct cfdriver vmm_cd;
193 extern const struct cfattach vmm_ca;
194
195 /*
196 * Helper struct to easily get the VMCS field IDs needed in vmread/vmwrite
197 * to access the individual fields of the guest segment registers. This
198 * struct is indexed by VCPU_REGS_* id.
199 */
200 const struct {
201 uint64_t selid;
202 uint64_t limitid;
203 uint64_t arid;
204 uint64_t baseid;
205 } vmm_vmx_sreg_vmcs_fields[] = {
206 { VMCS_GUEST_IA32_ES_SEL, VMCS_GUEST_IA32_ES_LIMIT,
207 VMCS_GUEST_IA32_ES_AR, VMCS_GUEST_IA32_ES_BASE },
208 { VMCS_GUEST_IA32_CS_SEL, VMCS_GUEST_IA32_CS_LIMIT,
209 VMCS_GUEST_IA32_CS_AR, VMCS_GUEST_IA32_CS_BASE },
210 { VMCS_GUEST_IA32_SS_SEL, VMCS_GUEST_IA32_SS_LIMIT,
211 VMCS_GUEST_IA32_SS_AR, VMCS_GUEST_IA32_SS_BASE },
212 { VMCS_GUEST_IA32_DS_SEL, VMCS_GUEST_IA32_DS_LIMIT,
213 VMCS_GUEST_IA32_DS_AR, VMCS_GUEST_IA32_DS_BASE },
214 { VMCS_GUEST_IA32_FS_SEL, VMCS_GUEST_IA32_FS_LIMIT,
215 VMCS_GUEST_IA32_FS_AR, VMCS_GUEST_IA32_FS_BASE },
216 { VMCS_GUEST_IA32_GS_SEL, VMCS_GUEST_IA32_GS_LIMIT,
217 VMCS_GUEST_IA32_GS_AR, VMCS_GUEST_IA32_GS_BASE },
218 { VMCS_GUEST_IA32_LDTR_SEL, VMCS_GUEST_IA32_LDTR_LIMIT,
219 VMCS_GUEST_IA32_LDTR_AR, VMCS_GUEST_IA32_LDTR_BASE },
220 { VMCS_GUEST_IA32_TR_SEL, VMCS_GUEST_IA32_TR_LIMIT,
221 VMCS_GUEST_IA32_TR_AR, VMCS_GUEST_IA32_TR_BASE }
222 };
223
224 /* Pools for VMs and VCPUs */
225 extern struct pool vm_pool;
226 extern struct pool vcpu_pool;
227
228 extern struct vmm_softc *vmm_softc;
229
230 /* IDT information used when populating host state area */
231 extern vaddr_t idt_vaddr;
232 extern struct gate_descriptor *idt;
233
234 /* Constants used in "CR access exit" */
235 #define CR_WRITE 0
236 #define CR_READ 1
237 #define CR_CLTS 2
238 #define CR_LMSW 3
239
240 /*
241 * vmm_enabled
242 *
243 * Checks if we have at least one CPU with either VMX or SVM.
244 * Returns 1 if we have at least one of either type, but not both, 0 otherwise.
245 */
246 int
vmm_enabled(void)247 vmm_enabled(void)
248 {
249 struct cpu_info *ci;
250 CPU_INFO_ITERATOR cii;
251 int found_vmx = 0, found_svm = 0;
252
253 /* Check if we have at least one CPU with either VMX or SVM */
254 CPU_INFO_FOREACH(cii, ci) {
255 if (ci->ci_vmm_flags & CI_VMM_VMX)
256 found_vmx = 1;
257 if (ci->ci_vmm_flags & CI_VMM_SVM)
258 found_svm = 1;
259 }
260
261 /* Don't support both SVM and VMX at the same time */
262 if (found_vmx && found_svm)
263 return (0);
264
265 if (found_vmx || found_svm)
266 return 1;
267
268 return 0;
269 }
270
271 void
vmm_attach_machdep(struct device * parent,struct device * self,void * aux)272 vmm_attach_machdep(struct device *parent, struct device *self, void *aux)
273 {
274 struct vmm_softc *sc = (struct vmm_softc *)self;
275 struct cpu_info *ci;
276 CPU_INFO_ITERATOR cii;
277
278 sc->sc_md.nr_rvi_cpus = 0;
279 sc->sc_md.nr_ept_cpus = 0;
280
281 /* Calculate CPU features */
282 CPU_INFO_FOREACH(cii, ci) {
283 if (ci->ci_vmm_flags & CI_VMM_RVI)
284 sc->sc_md.nr_rvi_cpus++;
285 if (ci->ci_vmm_flags & CI_VMM_EPT)
286 sc->sc_md.nr_ept_cpus++;
287 }
288
289 sc->sc_md.pkru_enabled = 0;
290 if (rcr4() & CR4_PKE)
291 sc->sc_md.pkru_enabled = 1;
292
293 if (sc->sc_md.nr_ept_cpus) {
294 printf(": VMX/EPT");
295 sc->mode = VMM_MODE_EPT;
296 } else if (sc->sc_md.nr_rvi_cpus) {
297 printf(": SVM/RVI");
298 sc->mode = VMM_MODE_RVI;
299 } else {
300 printf(": unknown");
301 sc->mode = VMM_MODE_UNKNOWN;
302 }
303
304 if (sc->mode == VMM_MODE_EPT) {
305 if (!(curcpu()->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr)) {
306 l1tf_flush_region = km_alloc(VMX_L1D_FLUSH_SIZE,
307 &kv_any, &vmm_kp_contig, &kd_waitok);
308 if (!l1tf_flush_region) {
309 printf(" (failing, no memory)");
310 sc->mode = VMM_MODE_UNKNOWN;
311 } else {
312 printf(" (using slow L1TF mitigation)");
313 memset(l1tf_flush_region, 0xcc,
314 VMX_L1D_FLUSH_SIZE);
315 }
316 }
317 }
318
319 if (sc->mode == VMM_MODE_RVI) {
320 sc->max_vpid = curcpu()->ci_vmm_cap.vcc_svm.svm_max_asid;
321 } else {
322 sc->max_vpid = 0xFFF;
323 }
324
325 bzero(&sc->vpids, sizeof(sc->vpids));
326 rw_init(&sc->vpid_lock, "vpid");
327 }
328
329 /*
330 * vmm_quiesce_vmx
331 *
332 * Prepare the host for suspend by flushing all VMCS states.
333 */
334 int
vmm_quiesce_vmx(void)335 vmm_quiesce_vmx(void)
336 {
337 struct vm *vm;
338 struct vcpu *vcpu;
339 int err;
340
341 /*
342 * We should be only called from a quiescing device state so we
343 * don't expect to sleep here. If we can't get all our locks,
344 * something is wrong.
345 */
346 if ((err = rw_enter(&vmm_softc->vm_lock, RW_WRITE | RW_NOSLEEP)))
347 return (err);
348
349 /* Iterate over each vm... */
350 SLIST_FOREACH(vm, &vmm_softc->vm_list, vm_link) {
351 /* Iterate over each vcpu... */
352 SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) {
353 err = rw_enter(&vcpu->vc_lock, RW_WRITE | RW_NOSLEEP);
354 if (err)
355 break;
356
357 /* We can skip unlaunched VMCS. Nothing to flush. */
358 if (atomic_load_int(&vcpu->vc_vmx_vmcs_state)
359 != VMCS_LAUNCHED) {
360 DPRINTF("%s: skipping vcpu %d for vm %d\n",
361 __func__, vcpu->vc_id, vm->vm_id);
362 rw_exit_write(&vcpu->vc_lock);
363 continue;
364 }
365
366 #ifdef MULTIPROCESSOR
367 if (vcpu->vc_last_pcpu != curcpu()) {
368 /* Remote cpu vmclear via ipi. */
369 err = vmx_remote_vmclear(vcpu->vc_last_pcpu,
370 vcpu);
371 if (err)
372 printf("%s: failed to remote vmclear "
373 "vcpu %d of vm %d\n", __func__,
374 vcpu->vc_id, vm->vm_id);
375 } else
376 #endif
377 {
378 /* Local cpu vmclear instruction. */
379 if ((err = vmclear(&vcpu->vc_control_pa)))
380 printf("%s: failed to locally vmclear "
381 "vcpu %d of vm %d\n", __func__,
382 vcpu->vc_id, vm->vm_id);
383 atomic_swap_uint(&vcpu->vc_vmx_vmcs_state,
384 VMCS_CLEARED);
385 }
386
387 rw_exit_write(&vcpu->vc_lock);
388 if (err)
389 break;
390 DPRINTF("%s: cleared vcpu %d for vm %d\n", __func__,
391 vcpu->vc_id, vm->vm_id);
392 }
393 if (err)
394 break;
395 }
396 rw_exit_write(&vmm_softc->vm_lock);
397
398 if (err)
399 return (err);
400 return (0);
401 }
402
403 void
vmm_activate_machdep(struct device * self,int act)404 vmm_activate_machdep(struct device *self, int act)
405 {
406 struct cpu_info *ci = curcpu();
407
408 switch (act) {
409 case DVACT_QUIESCE:
410 /* If we're not in vmm mode, nothing to do. */
411 if ((ci->ci_flags & CPUF_VMM) == 0)
412 break;
413
414 /* Intel systems need extra steps to sync vcpu state. */
415 if (vmm_softc->mode == VMM_MODE_EPT)
416 if (vmm_quiesce_vmx())
417 DPRINTF("%s: vmx quiesce failed\n", __func__);
418
419 /* Stop virtualization mode on all cpus. */
420 vmm_stop();
421 break;
422
423 case DVACT_WAKEUP:
424 /* Restart virtualization mode on all cpu's. */
425 if (vmm_softc->vm_ct > 0)
426 vmm_start();
427 break;
428 }
429 }
430
431 int
vmmioctl_machdep(dev_t dev,u_long cmd,caddr_t data,int flag,struct proc * p)432 vmmioctl_machdep(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
433 {
434 int ret;
435
436 switch (cmd) {
437 case VMM_IOC_INTR:
438 ret = vm_intr_pending((struct vm_intr_params *)data);
439 break;
440 default:
441 DPRINTF("%s: unknown ioctl code 0x%lx\n", __func__, cmd);
442 ret = ENOTTY;
443 }
444
445 return (ret);
446 }
447
448 int
pledge_ioctl_vmm_machdep(struct proc * p,long com)449 pledge_ioctl_vmm_machdep(struct proc *p, long com)
450 {
451 switch (com) {
452 case VMM_IOC_INTR:
453 return (0);
454 }
455
456 return (EPERM);
457 }
458
459 /*
460 * vm_intr_pending
461 *
462 * IOCTL handler routine for VMM_IOC_INTR messages, sent from vmd when an
463 * interrupt is pending and needs acknowledgment
464 *
465 * Parameters:
466 * vip: Describes the vm/vcpu for which the interrupt is pending
467 *
468 * Return values:
469 * 0: if successful
470 * ENOENT: if the VM/VCPU defined by 'vip' cannot be found
471 */
472 int
vm_intr_pending(struct vm_intr_params * vip)473 vm_intr_pending(struct vm_intr_params *vip)
474 {
475 struct vm *vm;
476 struct vcpu *vcpu;
477 #ifdef MULTIPROCESSOR
478 struct cpu_info *ci;
479 #endif
480 int error, ret = 0;
481
482 /* Find the desired VM */
483 error = vm_find(vip->vip_vm_id, &vm);
484
485 /* Not found? exit. */
486 if (error != 0)
487 return (error);
488
489 vcpu = vm_find_vcpu(vm, vip->vip_vcpu_id);
490
491 if (vcpu == NULL) {
492 ret = ENOENT;
493 goto out;
494 }
495
496 vcpu->vc_intr = vip->vip_intr;
497 #ifdef MULTIPROCESSOR
498 ci = READ_ONCE(vcpu->vc_curcpu);
499 if (ci != NULL)
500 x86_send_ipi(ci, X86_IPI_NOP);
501 #endif
502
503 out:
504 refcnt_rele_wake(&vm->vm_refcnt);
505 return (ret);
506 }
507
508 /*
509 * vm_rwvmparams
510 *
511 * IOCTL handler to read/write the current vmm params like pvclock gpa, pvclock
512 * version, etc.
513 *
514 * Parameters:
515 * vrwp: Describes the VM and VCPU to get/set the params from
516 * dir: 0 for reading, 1 for writing
517 *
518 * Return values:
519 * 0: if successful
520 * ENOENT: if the VM/VCPU defined by 'vpp' cannot be found
521 * EINVAL: if an error occurred reading the registers of the guest
522 */
523 int
vm_rwvmparams(struct vm_rwvmparams_params * vpp,int dir)524 vm_rwvmparams(struct vm_rwvmparams_params *vpp, int dir)
525 {
526 struct vm *vm;
527 struct vcpu *vcpu;
528 int error, ret = 0;
529
530 /* Find the desired VM */
531 error = vm_find(vpp->vpp_vm_id, &vm);
532
533 /* Not found? exit. */
534 if (error != 0)
535 return (error);
536
537 vcpu = vm_find_vcpu(vm, vpp->vpp_vcpu_id);
538
539 if (vcpu == NULL) {
540 ret = ENOENT;
541 goto out;
542 }
543
544 if (dir == 0) {
545 if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_VERSION)
546 vpp->vpp_pvclock_version = vcpu->vc_pvclock_version;
547 if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_SYSTEM_GPA)
548 vpp->vpp_pvclock_system_gpa = \
549 vcpu->vc_pvclock_system_gpa;
550 } else {
551 if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_VERSION)
552 vcpu->vc_pvclock_version = vpp->vpp_pvclock_version;
553 if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_SYSTEM_GPA) {
554 vmm_init_pvclock(vcpu, vpp->vpp_pvclock_system_gpa);
555 }
556 }
557 out:
558 refcnt_rele_wake(&vm->vm_refcnt);
559 return (ret);
560 }
561
562 /*
563 * vm_readregs
564 *
565 * IOCTL handler to read/write the current register values of a guest VCPU.
566 * The VCPU must not be running.
567 *
568 * Parameters:
569 * vrwp: Describes the VM and VCPU to get/set the registers from. The
570 * register values are returned here as well.
571 * dir: 0 for reading, 1 for writing
572 *
573 * Return values:
574 * 0: if successful
575 * ENOENT: if the VM/VCPU defined by 'vrwp' cannot be found
576 * EINVAL: if an error occurred accessing the registers of the guest
577 * EPERM: if the vm cannot be accessed from the calling process
578 */
579 int
vm_rwregs(struct vm_rwregs_params * vrwp,int dir)580 vm_rwregs(struct vm_rwregs_params *vrwp, int dir)
581 {
582 struct vm *vm;
583 struct vcpu *vcpu;
584 struct vcpu_reg_state *vrs = &vrwp->vrwp_regs;
585 int error, ret = 0;
586
587 /* Find the desired VM */
588 error = vm_find(vrwp->vrwp_vm_id, &vm);
589
590 /* Not found? exit. */
591 if (error != 0)
592 return (error);
593
594 vcpu = vm_find_vcpu(vm, vrwp->vrwp_vcpu_id);
595
596 if (vcpu == NULL) {
597 ret = ENOENT;
598 goto out;
599 }
600
601 rw_enter_write(&vcpu->vc_lock);
602 if (vmm_softc->mode == VMM_MODE_EPT)
603 ret = (dir == 0) ?
604 vcpu_readregs_vmx(vcpu, vrwp->vrwp_mask, 1, vrs) :
605 vcpu_writeregs_vmx(vcpu, vrwp->vrwp_mask, 1, vrs);
606 else if (vmm_softc->mode == VMM_MODE_RVI)
607 ret = (dir == 0) ?
608 vcpu_readregs_svm(vcpu, vrwp->vrwp_mask, vrs) :
609 vcpu_writeregs_svm(vcpu, vrwp->vrwp_mask, vrs);
610 else {
611 DPRINTF("%s: unknown vmm mode", __func__);
612 ret = EINVAL;
613 }
614 rw_exit_write(&vcpu->vc_lock);
615 out:
616 refcnt_rele_wake(&vm->vm_refcnt);
617 return (ret);
618 }
619
620 /*
621 * vmm_start
622 *
623 * Starts VMM mode on the system
624 */
625 int
vmm_start(void)626 vmm_start(void)
627 {
628 int rv = 0;
629 struct cpu_info *self = curcpu();
630 #ifdef MULTIPROCESSOR
631 struct cpu_info *ci;
632 CPU_INFO_ITERATOR cii;
633 #ifdef MP_LOCKDEBUG
634 int nticks;
635 #endif /* MP_LOCKDEBUG */
636 #endif /* MULTIPROCESSOR */
637
638 rw_enter_write(&vmm_softc->sc_slock);
639
640 /* VMM is already running */
641 if (self->ci_flags & CPUF_VMM)
642 goto unlock;
643
644 /* Start VMM on this CPU */
645 start_vmm_on_cpu(self);
646 if (!(self->ci_flags & CPUF_VMM)) {
647 printf("%s: failed to enter VMM mode\n",
648 self->ci_dev->dv_xname);
649 rv = EIO;
650 goto unlock;
651 }
652
653 #ifdef MULTIPROCESSOR
654 /* Broadcast start VMM IPI */
655 x86_broadcast_ipi(X86_IPI_START_VMM);
656
657 CPU_INFO_FOREACH(cii, ci) {
658 if (ci == self)
659 continue;
660 #ifdef MP_LOCKDEBUG
661 nticks = __mp_lock_spinout;
662 #endif /* MP_LOCKDEBUG */
663 while (!(ci->ci_flags & CPUF_VMM)) {
664 CPU_BUSY_CYCLE();
665 #ifdef MP_LOCKDEBUG
666 if (--nticks <= 0) {
667 db_printf("%s: spun out", __func__);
668 db_enter();
669 nticks = __mp_lock_spinout;
670 }
671 #endif /* MP_LOCKDEBUG */
672 }
673 }
674 #endif /* MULTIPROCESSOR */
675 unlock:
676 rw_exit_write(&vmm_softc->sc_slock);
677 return (rv);
678 }
679
680 /*
681 * vmm_stop
682 *
683 * Stops VMM mode on the system
684 */
685 int
vmm_stop(void)686 vmm_stop(void)
687 {
688 int rv = 0;
689 struct cpu_info *self = curcpu();
690 #ifdef MULTIPROCESSOR
691 struct cpu_info *ci;
692 CPU_INFO_ITERATOR cii;
693 #ifdef MP_LOCKDEBUG
694 int nticks;
695 #endif /* MP_LOCKDEBUG */
696 #endif /* MULTIPROCESSOR */
697
698 rw_enter_write(&vmm_softc->sc_slock);
699
700 /* VMM is not running */
701 if (!(self->ci_flags & CPUF_VMM))
702 goto unlock;
703
704 /* Stop VMM on this CPU */
705 stop_vmm_on_cpu(self);
706 if (self->ci_flags & CPUF_VMM) {
707 printf("%s: failed to exit VMM mode\n",
708 self->ci_dev->dv_xname);
709 rv = EIO;
710 goto unlock;
711 }
712
713 #ifdef MULTIPROCESSOR
714 /* Stop VMM on other CPUs */
715 x86_broadcast_ipi(X86_IPI_STOP_VMM);
716
717 CPU_INFO_FOREACH(cii, ci) {
718 if (ci == self)
719 continue;
720 #ifdef MP_LOCKDEBUG
721 nticks = __mp_lock_spinout;
722 #endif /* MP_LOCKDEBUG */
723 while ((ci->ci_flags & CPUF_VMM)) {
724 CPU_BUSY_CYCLE();
725 #ifdef MP_LOCKDEBUG
726 if (--nticks <= 0) {
727 db_printf("%s: spunout", __func__);
728 db_enter();
729 nticks = __mp_lock_spinout;
730 }
731 #endif /* MP_LOCKDEBUG */
732 }
733 }
734 #endif /* MULTIPROCESSOR */
735 unlock:
736 rw_exit_write(&vmm_softc->sc_slock);
737 return (0);
738 }
739
740 /*
741 * start_vmm_on_cpu
742 *
743 * Starts VMM mode on 'ci' by executing the appropriate CPU-specific insn
744 * sequence to enter VMM mode (eg, VMXON)
745 */
746 void
start_vmm_on_cpu(struct cpu_info * ci)747 start_vmm_on_cpu(struct cpu_info *ci)
748 {
749 uint64_t msr;
750 uint32_t cr4;
751 struct vmx_invept_descriptor vid;
752
753 /* No VMM mode? exit. */
754 if ((ci->ci_vmm_flags & CI_VMM_VMX) == 0 &&
755 (ci->ci_vmm_flags & CI_VMM_SVM) == 0)
756 return;
757
758 /*
759 * AMD SVM
760 */
761 if (ci->ci_vmm_flags & CI_VMM_SVM) {
762 msr = rdmsr(MSR_EFER);
763 msr |= EFER_SVME;
764 wrmsr(MSR_EFER, msr);
765 }
766
767 /*
768 * Intel VMX
769 */
770 if (ci->ci_vmm_flags & CI_VMM_VMX) {
771 if (ci->ci_vmxon_region == 0)
772 return;
773 else {
774 bzero(ci->ci_vmxon_region, PAGE_SIZE);
775 ci->ci_vmxon_region->vr_revision =
776 ci->ci_vmm_cap.vcc_vmx.vmx_vmxon_revision;
777
778 /* Enable VMX */
779 msr = rdmsr(MSR_IA32_FEATURE_CONTROL);
780 if (msr & IA32_FEATURE_CONTROL_LOCK) {
781 if (!(msr & IA32_FEATURE_CONTROL_VMX_EN))
782 return;
783 } else {
784 msr |= IA32_FEATURE_CONTROL_VMX_EN |
785 IA32_FEATURE_CONTROL_LOCK;
786 wrmsr(MSR_IA32_FEATURE_CONTROL, msr);
787 }
788
789 /* Set CR4.VMXE */
790 cr4 = rcr4();
791 cr4 |= CR4_VMXE;
792 lcr4(cr4);
793
794 /* Enter VMX mode and clear EPTs on this cpu */
795 if (vmxon((uint64_t *)&ci->ci_vmxon_region_pa))
796 panic("vmxon failed");
797
798 memset(&vid, 0, sizeof(vid));
799 if (invept(IA32_VMX_INVEPT_GLOBAL_CTX, &vid))
800 panic("invept failed");
801 }
802 }
803
804 atomic_setbits_int(&ci->ci_flags, CPUF_VMM);
805 }
806
807 /*
808 * stop_vmm_on_cpu
809 *
810 * Stops VMM mode on 'ci' by executing the appropriate CPU-specific insn
811 * sequence to exit VMM mode (eg, VMXOFF)
812 */
813 void
stop_vmm_on_cpu(struct cpu_info * ci)814 stop_vmm_on_cpu(struct cpu_info *ci)
815 {
816 uint64_t msr;
817 uint32_t cr4;
818
819 if (!(ci->ci_flags & CPUF_VMM))
820 return;
821
822 /*
823 * AMD SVM
824 */
825 if (ci->ci_vmm_flags & CI_VMM_SVM) {
826 msr = rdmsr(MSR_EFER);
827 msr &= ~EFER_SVME;
828 wrmsr(MSR_EFER, msr);
829 }
830
831 /*
832 * Intel VMX
833 */
834 if (ci->ci_vmm_flags & CI_VMM_VMX) {
835 if (vmxoff())
836 panic("VMXOFF failed");
837
838 cr4 = rcr4();
839 cr4 &= ~CR4_VMXE;
840 lcr4(cr4);
841 }
842
843 atomic_clearbits_int(&ci->ci_flags, CPUF_VMM);
844 }
845
846 /*
847 * vmclear_on_cpu
848 *
849 * Flush and clear VMCS on 'ci' by executing vmclear.
850 *
851 */
852 void
vmclear_on_cpu(struct cpu_info * ci)853 vmclear_on_cpu(struct cpu_info *ci)
854 {
855 if ((ci->ci_flags & CPUF_VMM) && (ci->ci_vmm_flags & CI_VMM_VMX)) {
856 if (vmclear(&ci->ci_vmcs_pa))
857 panic("VMCLEAR ipi failed");
858 atomic_swap_ulong(&ci->ci_vmcs_pa, VMX_VMCS_PA_CLEAR);
859 }
860 }
861
862 #ifdef MULTIPROCESSOR
863 static int
vmx_remote_vmclear(struct cpu_info * ci,struct vcpu * vcpu)864 vmx_remote_vmclear(struct cpu_info *ci, struct vcpu *vcpu)
865 {
866 #ifdef MP_LOCKDEBUG
867 int nticks = __mp_lock_spinout;
868 #endif /* MP_LOCKDEBUG */
869
870 rw_enter_write(&ci->ci_vmcs_lock);
871 atomic_swap_ulong(&ci->ci_vmcs_pa, vcpu->vc_control_pa);
872 x86_send_ipi(ci, X86_IPI_VMCLEAR_VMM);
873
874 while (ci->ci_vmcs_pa != VMX_VMCS_PA_CLEAR) {
875 CPU_BUSY_CYCLE();
876 #ifdef MP_LOCKDEBUG
877 if (--nticks <= 0) {
878 db_printf("%s: spun out\n", __func__);
879 db_enter();
880 nticks = __mp_lock_spinout;
881 }
882 #endif /* MP_LOCKDEBUG */
883 }
884 atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_CLEARED);
885 rw_exit_write(&ci->ci_vmcs_lock);
886
887 return (0);
888 }
889 #endif /* MULTIPROCESSOR */
890
891 /*
892 * vm_impl_init
893 *
894 * VM address space initialization routine
895 *
896 * Parameters:
897 * vm: the VM being initialized
898 * p: vmd process owning the VM
899 *
900 * Return values:
901 * 0: the initialization was successful
902 * EINVAL: unsupported vmm mode
903 * ENOMEM: the initialization failed (lack of resources)
904 */
905 int
vm_impl_init(struct vm * vm,struct proc * p)906 vm_impl_init(struct vm *vm, struct proc *p)
907 {
908 int i, mode, ret;
909 vaddr_t mingpa, maxgpa;
910 struct vm_mem_range *vmr;
911
912 /* If not EPT or RVI, nothing to do here */
913 switch (vmm_softc->mode) {
914 case VMM_MODE_EPT:
915 mode = PMAP_TYPE_EPT;
916 break;
917 case VMM_MODE_RVI:
918 mode = PMAP_TYPE_RVI;
919 break;
920 default:
921 printf("%s: invalid vmm mode %d\n", __func__, vmm_softc->mode);
922 return (EINVAL);
923 }
924
925 vmr = &vm->vm_memranges[0];
926 mingpa = vmr->vmr_gpa;
927 vmr = &vm->vm_memranges[vm->vm_nmemranges - 1];
928 maxgpa = vmr->vmr_gpa + vmr->vmr_size;
929
930 /*
931 * uvmspace_alloc (currently) always returns a valid vmspace
932 */
933 vm->vm_vmspace = uvmspace_alloc(mingpa, maxgpa, TRUE, FALSE);
934 vm->vm_map = &vm->vm_vmspace->vm_map;
935
936 /* Map the new map with an anon */
937 DPRINTF("%s: created vm_map @ %p\n", __func__, vm->vm_map);
938 for (i = 0; i < vm->vm_nmemranges; i++) {
939 vmr = &vm->vm_memranges[i];
940 ret = uvm_share(vm->vm_map, vmr->vmr_gpa,
941 PROT_READ | PROT_WRITE | PROT_EXEC,
942 &p->p_vmspace->vm_map, vmr->vmr_va, vmr->vmr_size);
943 if (ret) {
944 printf("%s: uvm_share failed (%d)\n", __func__, ret);
945 /* uvmspace_free calls pmap_destroy for us */
946 KERNEL_LOCK();
947 uvmspace_free(vm->vm_vmspace);
948 vm->vm_vmspace = NULL;
949 KERNEL_UNLOCK();
950 return (ENOMEM);
951 }
952 }
953
954 pmap_convert(vm->vm_map->pmap, mode);
955
956 return (0);
957 }
958
959 void
vm_impl_deinit(struct vm * vm)960 vm_impl_deinit(struct vm *vm)
961 {
962 /* unused */
963 }
964
965 /*
966 * vcpu_reload_vmcs_vmx
967 *
968 * (Re)load the VMCS on the current cpu. Must be called with the VMCS write
969 * lock acquired. If the VMCS is determined to be loaded on a remote cpu, an
970 * ipi will be used to remotely flush it before loading the VMCS locally.
971 *
972 * Parameters:
973 * vcpu: Pointer to the vcpu needing its VMCS
974 *
975 * Return values:
976 * 0: if successful
977 * EINVAL: an error occurred during flush or reload
978 */
979 int
vcpu_reload_vmcs_vmx(struct vcpu * vcpu)980 vcpu_reload_vmcs_vmx(struct vcpu *vcpu)
981 {
982 struct cpu_info *ci, *last_ci;
983
984 rw_assert_wrlock(&vcpu->vc_lock);
985
986 ci = curcpu();
987 last_ci = vcpu->vc_last_pcpu;
988
989 if (last_ci == NULL) {
990 /* First launch */
991 if (vmclear(&vcpu->vc_control_pa))
992 return (EINVAL);
993 atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_CLEARED);
994 #ifdef MULTIPROCESSOR
995 } else if (last_ci != ci) {
996 /* We've moved CPUs at some point, so remote VMCLEAR */
997 if (vmx_remote_vmclear(last_ci, vcpu))
998 return (EINVAL);
999 KASSERT(vcpu->vc_vmx_vmcs_state == VMCS_CLEARED);
1000 #endif /* MULTIPROCESSOR */
1001 }
1002
1003 if (vmptrld(&vcpu->vc_control_pa)) {
1004 printf("%s: vmptrld\n", __func__);
1005 return (EINVAL);
1006 }
1007
1008 return (0);
1009 }
1010
1011 /*
1012 * vcpu_readregs_vmx
1013 *
1014 * Reads 'vcpu's registers
1015 *
1016 * Parameters:
1017 * vcpu: the vcpu to read register values from
1018 * regmask: the types of registers to read
1019 * loadvmcs: bit to indicate whether the VMCS has to be loaded first
1020 * vrs: output parameter where register values are stored
1021 *
1022 * Return values:
1023 * 0: if successful
1024 * EINVAL: an error reading registers occurred
1025 */
1026 int
vcpu_readregs_vmx(struct vcpu * vcpu,uint64_t regmask,int loadvmcs,struct vcpu_reg_state * vrs)1027 vcpu_readregs_vmx(struct vcpu *vcpu, uint64_t regmask, int loadvmcs,
1028 struct vcpu_reg_state *vrs)
1029 {
1030 int i, ret = 0;
1031 uint64_t sel, limit, ar;
1032 uint64_t *gprs = vrs->vrs_gprs;
1033 uint64_t *crs = vrs->vrs_crs;
1034 uint64_t *msrs = vrs->vrs_msrs;
1035 uint64_t *drs = vrs->vrs_drs;
1036 struct vcpu_segment_info *sregs = vrs->vrs_sregs;
1037 struct vmx_msr_store *msr_store;
1038
1039 if (loadvmcs) {
1040 if (vcpu_reload_vmcs_vmx(vcpu))
1041 return (EINVAL);
1042 }
1043
1044 #ifdef VMM_DEBUG
1045 /* VMCS should be loaded... */
1046 paddr_t pa = 0ULL;
1047 if (vmptrst(&pa))
1048 panic("%s: vmptrst", __func__);
1049 KASSERT(pa == vcpu->vc_control_pa);
1050 #endif /* VMM_DEBUG */
1051
1052 if (regmask & VM_RWREGS_GPRS) {
1053 gprs[VCPU_REGS_RAX] = vcpu->vc_gueststate.vg_rax;
1054 gprs[VCPU_REGS_RBX] = vcpu->vc_gueststate.vg_rbx;
1055 gprs[VCPU_REGS_RCX] = vcpu->vc_gueststate.vg_rcx;
1056 gprs[VCPU_REGS_RDX] = vcpu->vc_gueststate.vg_rdx;
1057 gprs[VCPU_REGS_RSI] = vcpu->vc_gueststate.vg_rsi;
1058 gprs[VCPU_REGS_RDI] = vcpu->vc_gueststate.vg_rdi;
1059 gprs[VCPU_REGS_R8] = vcpu->vc_gueststate.vg_r8;
1060 gprs[VCPU_REGS_R9] = vcpu->vc_gueststate.vg_r9;
1061 gprs[VCPU_REGS_R10] = vcpu->vc_gueststate.vg_r10;
1062 gprs[VCPU_REGS_R11] = vcpu->vc_gueststate.vg_r11;
1063 gprs[VCPU_REGS_R12] = vcpu->vc_gueststate.vg_r12;
1064 gprs[VCPU_REGS_R13] = vcpu->vc_gueststate.vg_r13;
1065 gprs[VCPU_REGS_R14] = vcpu->vc_gueststate.vg_r14;
1066 gprs[VCPU_REGS_R15] = vcpu->vc_gueststate.vg_r15;
1067 gprs[VCPU_REGS_RBP] = vcpu->vc_gueststate.vg_rbp;
1068 gprs[VCPU_REGS_RIP] = vcpu->vc_gueststate.vg_rip;
1069 if (vmread(VMCS_GUEST_IA32_RSP, &gprs[VCPU_REGS_RSP]))
1070 goto errout;
1071 if (vmread(VMCS_GUEST_IA32_RFLAGS, &gprs[VCPU_REGS_RFLAGS]))
1072 goto errout;
1073 }
1074
1075 if (regmask & VM_RWREGS_SREGS) {
1076 for (i = 0; i < nitems(vmm_vmx_sreg_vmcs_fields); i++) {
1077 if (vmread(vmm_vmx_sreg_vmcs_fields[i].selid, &sel))
1078 goto errout;
1079 if (vmread(vmm_vmx_sreg_vmcs_fields[i].limitid, &limit))
1080 goto errout;
1081 if (vmread(vmm_vmx_sreg_vmcs_fields[i].arid, &ar))
1082 goto errout;
1083 if (vmread(vmm_vmx_sreg_vmcs_fields[i].baseid,
1084 &sregs[i].vsi_base))
1085 goto errout;
1086
1087 sregs[i].vsi_sel = sel;
1088 sregs[i].vsi_limit = limit;
1089 sregs[i].vsi_ar = ar;
1090 }
1091
1092 if (vmread(VMCS_GUEST_IA32_GDTR_LIMIT, &limit))
1093 goto errout;
1094 if (vmread(VMCS_GUEST_IA32_GDTR_BASE,
1095 &vrs->vrs_gdtr.vsi_base))
1096 goto errout;
1097 vrs->vrs_gdtr.vsi_limit = limit;
1098
1099 if (vmread(VMCS_GUEST_IA32_IDTR_LIMIT, &limit))
1100 goto errout;
1101 if (vmread(VMCS_GUEST_IA32_IDTR_BASE,
1102 &vrs->vrs_idtr.vsi_base))
1103 goto errout;
1104 vrs->vrs_idtr.vsi_limit = limit;
1105 }
1106
1107 if (regmask & VM_RWREGS_CRS) {
1108 crs[VCPU_REGS_CR2] = vcpu->vc_gueststate.vg_cr2;
1109 crs[VCPU_REGS_XCR0] = vcpu->vc_gueststate.vg_xcr0;
1110 if (vmread(VMCS_GUEST_IA32_CR0, &crs[VCPU_REGS_CR0]))
1111 goto errout;
1112 if (vmread(VMCS_GUEST_IA32_CR3, &crs[VCPU_REGS_CR3]))
1113 goto errout;
1114 if (vmread(VMCS_GUEST_IA32_CR4, &crs[VCPU_REGS_CR4]))
1115 goto errout;
1116 if (vmread(VMCS_GUEST_PDPTE0, &crs[VCPU_REGS_PDPTE0]))
1117 goto errout;
1118 if (vmread(VMCS_GUEST_PDPTE1, &crs[VCPU_REGS_PDPTE1]))
1119 goto errout;
1120 if (vmread(VMCS_GUEST_PDPTE2, &crs[VCPU_REGS_PDPTE2]))
1121 goto errout;
1122 if (vmread(VMCS_GUEST_PDPTE3, &crs[VCPU_REGS_PDPTE3]))
1123 goto errout;
1124 }
1125
1126 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
1127
1128 if (regmask & VM_RWREGS_MSRS) {
1129 for (i = 0; i < VCPU_REGS_NMSRS; i++) {
1130 msrs[i] = msr_store[i].vms_data;
1131 }
1132 }
1133
1134 if (regmask & VM_RWREGS_DRS) {
1135 drs[VCPU_REGS_DR0] = vcpu->vc_gueststate.vg_dr0;
1136 drs[VCPU_REGS_DR1] = vcpu->vc_gueststate.vg_dr1;
1137 drs[VCPU_REGS_DR2] = vcpu->vc_gueststate.vg_dr2;
1138 drs[VCPU_REGS_DR3] = vcpu->vc_gueststate.vg_dr3;
1139 drs[VCPU_REGS_DR6] = vcpu->vc_gueststate.vg_dr6;
1140 if (vmread(VMCS_GUEST_IA32_DR7, &drs[VCPU_REGS_DR7]))
1141 goto errout;
1142 }
1143
1144 goto out;
1145
1146 errout:
1147 ret = EINVAL;
1148 out:
1149 return (ret);
1150 }
1151
1152 /*
1153 * vcpu_readregs_svm
1154 *
1155 * Reads 'vcpu's registers
1156 *
1157 * Parameters:
1158 * vcpu: the vcpu to read register values from
1159 * regmask: the types of registers to read
1160 * vrs: output parameter where register values are stored
1161 *
1162 * Return values:
1163 * 0: if successful
1164 */
1165 int
vcpu_readregs_svm(struct vcpu * vcpu,uint64_t regmask,struct vcpu_reg_state * vrs)1166 vcpu_readregs_svm(struct vcpu *vcpu, uint64_t regmask,
1167 struct vcpu_reg_state *vrs)
1168 {
1169 uint64_t *gprs = vrs->vrs_gprs;
1170 uint64_t *crs = vrs->vrs_crs;
1171 uint64_t *msrs = vrs->vrs_msrs;
1172 uint64_t *drs = vrs->vrs_drs;
1173 uint32_t attr;
1174 struct vcpu_segment_info *sregs = vrs->vrs_sregs;
1175 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
1176
1177 if (regmask & VM_RWREGS_GPRS) {
1178 gprs[VCPU_REGS_RAX] = vmcb->v_rax;
1179 gprs[VCPU_REGS_RBX] = vcpu->vc_gueststate.vg_rbx;
1180 gprs[VCPU_REGS_RCX] = vcpu->vc_gueststate.vg_rcx;
1181 gprs[VCPU_REGS_RDX] = vcpu->vc_gueststate.vg_rdx;
1182 gprs[VCPU_REGS_RSI] = vcpu->vc_gueststate.vg_rsi;
1183 gprs[VCPU_REGS_RDI] = vcpu->vc_gueststate.vg_rdi;
1184 gprs[VCPU_REGS_R8] = vcpu->vc_gueststate.vg_r8;
1185 gprs[VCPU_REGS_R9] = vcpu->vc_gueststate.vg_r9;
1186 gprs[VCPU_REGS_R10] = vcpu->vc_gueststate.vg_r10;
1187 gprs[VCPU_REGS_R11] = vcpu->vc_gueststate.vg_r11;
1188 gprs[VCPU_REGS_R12] = vcpu->vc_gueststate.vg_r12;
1189 gprs[VCPU_REGS_R13] = vcpu->vc_gueststate.vg_r13;
1190 gprs[VCPU_REGS_R14] = vcpu->vc_gueststate.vg_r14;
1191 gprs[VCPU_REGS_R15] = vcpu->vc_gueststate.vg_r15;
1192 gprs[VCPU_REGS_RBP] = vcpu->vc_gueststate.vg_rbp;
1193 gprs[VCPU_REGS_RIP] = vmcb->v_rip;
1194 gprs[VCPU_REGS_RSP] = vmcb->v_rsp;
1195 gprs[VCPU_REGS_RFLAGS] = vmcb->v_rflags;
1196 }
1197
1198 if (regmask & VM_RWREGS_SREGS) {
1199 sregs[VCPU_REGS_CS].vsi_sel = vmcb->v_cs.vs_sel;
1200 sregs[VCPU_REGS_CS].vsi_limit = vmcb->v_cs.vs_lim;
1201 attr = vmcb->v_cs.vs_attr;
1202 sregs[VCPU_REGS_CS].vsi_ar = (attr & 0xff) | ((attr << 4) &
1203 0xf000);
1204 sregs[VCPU_REGS_CS].vsi_base = vmcb->v_cs.vs_base;
1205
1206 sregs[VCPU_REGS_DS].vsi_sel = vmcb->v_ds.vs_sel;
1207 sregs[VCPU_REGS_DS].vsi_limit = vmcb->v_ds.vs_lim;
1208 attr = vmcb->v_ds.vs_attr;
1209 sregs[VCPU_REGS_DS].vsi_ar = (attr & 0xff) | ((attr << 4) &
1210 0xf000);
1211 sregs[VCPU_REGS_DS].vsi_base = vmcb->v_ds.vs_base;
1212
1213 sregs[VCPU_REGS_ES].vsi_sel = vmcb->v_es.vs_sel;
1214 sregs[VCPU_REGS_ES].vsi_limit = vmcb->v_es.vs_lim;
1215 attr = vmcb->v_es.vs_attr;
1216 sregs[VCPU_REGS_ES].vsi_ar = (attr & 0xff) | ((attr << 4) &
1217 0xf000);
1218 sregs[VCPU_REGS_ES].vsi_base = vmcb->v_es.vs_base;
1219
1220 sregs[VCPU_REGS_FS].vsi_sel = vmcb->v_fs.vs_sel;
1221 sregs[VCPU_REGS_FS].vsi_limit = vmcb->v_fs.vs_lim;
1222 attr = vmcb->v_fs.vs_attr;
1223 sregs[VCPU_REGS_FS].vsi_ar = (attr & 0xff) | ((attr << 4) &
1224 0xf000);
1225 sregs[VCPU_REGS_FS].vsi_base = vmcb->v_fs.vs_base;
1226
1227 sregs[VCPU_REGS_GS].vsi_sel = vmcb->v_gs.vs_sel;
1228 sregs[VCPU_REGS_GS].vsi_limit = vmcb->v_gs.vs_lim;
1229 attr = vmcb->v_gs.vs_attr;
1230 sregs[VCPU_REGS_GS].vsi_ar = (attr & 0xff) | ((attr << 4) &
1231 0xf000);
1232 sregs[VCPU_REGS_GS].vsi_base = vmcb->v_gs.vs_base;
1233
1234 sregs[VCPU_REGS_SS].vsi_sel = vmcb->v_ss.vs_sel;
1235 sregs[VCPU_REGS_SS].vsi_limit = vmcb->v_ss.vs_lim;
1236 attr = vmcb->v_ss.vs_attr;
1237 sregs[VCPU_REGS_SS].vsi_ar = (attr & 0xff) | ((attr << 4) &
1238 0xf000);
1239 sregs[VCPU_REGS_SS].vsi_base = vmcb->v_ss.vs_base;
1240
1241 sregs[VCPU_REGS_LDTR].vsi_sel = vmcb->v_ldtr.vs_sel;
1242 sregs[VCPU_REGS_LDTR].vsi_limit = vmcb->v_ldtr.vs_lim;
1243 attr = vmcb->v_ldtr.vs_attr;
1244 sregs[VCPU_REGS_LDTR].vsi_ar = (attr & 0xff) | ((attr << 4)
1245 & 0xf000);
1246 sregs[VCPU_REGS_LDTR].vsi_base = vmcb->v_ldtr.vs_base;
1247
1248 sregs[VCPU_REGS_TR].vsi_sel = vmcb->v_tr.vs_sel;
1249 sregs[VCPU_REGS_TR].vsi_limit = vmcb->v_tr.vs_lim;
1250 attr = vmcb->v_tr.vs_attr;
1251 sregs[VCPU_REGS_TR].vsi_ar = (attr & 0xff) | ((attr << 4) &
1252 0xf000);
1253 sregs[VCPU_REGS_TR].vsi_base = vmcb->v_tr.vs_base;
1254
1255 vrs->vrs_gdtr.vsi_limit = vmcb->v_gdtr.vs_lim;
1256 vrs->vrs_gdtr.vsi_base = vmcb->v_gdtr.vs_base;
1257 vrs->vrs_idtr.vsi_limit = vmcb->v_idtr.vs_lim;
1258 vrs->vrs_idtr.vsi_base = vmcb->v_idtr.vs_base;
1259 }
1260
1261 if (regmask & VM_RWREGS_CRS) {
1262 crs[VCPU_REGS_CR0] = vmcb->v_cr0;
1263 crs[VCPU_REGS_CR3] = vmcb->v_cr3;
1264 crs[VCPU_REGS_CR4] = vmcb->v_cr4;
1265 crs[VCPU_REGS_CR2] = vcpu->vc_gueststate.vg_cr2;
1266 crs[VCPU_REGS_XCR0] = vcpu->vc_gueststate.vg_xcr0;
1267 }
1268
1269 if (regmask & VM_RWREGS_MSRS) {
1270 msrs[VCPU_REGS_EFER] = vmcb->v_efer;
1271 msrs[VCPU_REGS_STAR] = vmcb->v_star;
1272 msrs[VCPU_REGS_LSTAR] = vmcb->v_lstar;
1273 msrs[VCPU_REGS_CSTAR] = vmcb->v_cstar;
1274 msrs[VCPU_REGS_SFMASK] = vmcb->v_sfmask;
1275 msrs[VCPU_REGS_KGSBASE] = vmcb->v_kgsbase;
1276 }
1277
1278 if (regmask & VM_RWREGS_DRS) {
1279 drs[VCPU_REGS_DR0] = vcpu->vc_gueststate.vg_dr0;
1280 drs[VCPU_REGS_DR1] = vcpu->vc_gueststate.vg_dr1;
1281 drs[VCPU_REGS_DR2] = vcpu->vc_gueststate.vg_dr2;
1282 drs[VCPU_REGS_DR3] = vcpu->vc_gueststate.vg_dr3;
1283 drs[VCPU_REGS_DR6] = vmcb->v_dr6;
1284 drs[VCPU_REGS_DR7] = vmcb->v_dr7;
1285 }
1286
1287 return (0);
1288 }
1289
1290 /*
1291 * vcpu_writeregs_vmx
1292 *
1293 * Writes VCPU registers
1294 *
1295 * Parameters:
1296 * vcpu: the vcpu that has to get its registers written to
1297 * regmask: the types of registers to write
1298 * loadvmcs: bit to indicate whether the VMCS has to be loaded first
1299 * vrs: the register values to write
1300 *
1301 * Return values:
1302 * 0: if successful
1303 * EINVAL an error writing registers occurred
1304 */
1305 int
vcpu_writeregs_vmx(struct vcpu * vcpu,uint64_t regmask,int loadvmcs,struct vcpu_reg_state * vrs)1306 vcpu_writeregs_vmx(struct vcpu *vcpu, uint64_t regmask, int loadvmcs,
1307 struct vcpu_reg_state *vrs)
1308 {
1309 int i, ret = 0;
1310 uint16_t sel;
1311 uint64_t limit, ar;
1312 uint64_t *gprs = vrs->vrs_gprs;
1313 uint64_t *crs = vrs->vrs_crs;
1314 uint64_t *msrs = vrs->vrs_msrs;
1315 uint64_t *drs = vrs->vrs_drs;
1316 struct vcpu_segment_info *sregs = vrs->vrs_sregs;
1317 struct vmx_msr_store *msr_store;
1318
1319 if (loadvmcs) {
1320 if (vcpu_reload_vmcs_vmx(vcpu))
1321 return (EINVAL);
1322 }
1323
1324 #ifdef VMM_DEBUG
1325 /* VMCS should be loaded... */
1326 paddr_t pa = 0ULL;
1327 if (vmptrst(&pa))
1328 panic("%s: vmptrst", __func__);
1329 KASSERT(pa == vcpu->vc_control_pa);
1330 #endif /* VMM_DEBUG */
1331
1332 if (regmask & VM_RWREGS_GPRS) {
1333 vcpu->vc_gueststate.vg_rax = gprs[VCPU_REGS_RAX];
1334 vcpu->vc_gueststate.vg_rbx = gprs[VCPU_REGS_RBX];
1335 vcpu->vc_gueststate.vg_rcx = gprs[VCPU_REGS_RCX];
1336 vcpu->vc_gueststate.vg_rdx = gprs[VCPU_REGS_RDX];
1337 vcpu->vc_gueststate.vg_rsi = gprs[VCPU_REGS_RSI];
1338 vcpu->vc_gueststate.vg_rdi = gprs[VCPU_REGS_RDI];
1339 vcpu->vc_gueststate.vg_r8 = gprs[VCPU_REGS_R8];
1340 vcpu->vc_gueststate.vg_r9 = gprs[VCPU_REGS_R9];
1341 vcpu->vc_gueststate.vg_r10 = gprs[VCPU_REGS_R10];
1342 vcpu->vc_gueststate.vg_r11 = gprs[VCPU_REGS_R11];
1343 vcpu->vc_gueststate.vg_r12 = gprs[VCPU_REGS_R12];
1344 vcpu->vc_gueststate.vg_r13 = gprs[VCPU_REGS_R13];
1345 vcpu->vc_gueststate.vg_r14 = gprs[VCPU_REGS_R14];
1346 vcpu->vc_gueststate.vg_r15 = gprs[VCPU_REGS_R15];
1347 vcpu->vc_gueststate.vg_rbp = gprs[VCPU_REGS_RBP];
1348 vcpu->vc_gueststate.vg_rip = gprs[VCPU_REGS_RIP];
1349 if (vmwrite(VMCS_GUEST_IA32_RIP, gprs[VCPU_REGS_RIP]))
1350 goto errout;
1351 if (vmwrite(VMCS_GUEST_IA32_RSP, gprs[VCPU_REGS_RSP]))
1352 goto errout;
1353 if (vmwrite(VMCS_GUEST_IA32_RFLAGS, gprs[VCPU_REGS_RFLAGS]))
1354 goto errout;
1355 }
1356
1357 if (regmask & VM_RWREGS_SREGS) {
1358 for (i = 0; i < nitems(vmm_vmx_sreg_vmcs_fields); i++) {
1359 sel = sregs[i].vsi_sel;
1360 limit = sregs[i].vsi_limit;
1361 ar = sregs[i].vsi_ar;
1362
1363 if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].selid, sel))
1364 goto errout;
1365 if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].limitid, limit))
1366 goto errout;
1367 if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].arid, ar))
1368 goto errout;
1369 if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].baseid,
1370 sregs[i].vsi_base))
1371 goto errout;
1372 }
1373
1374 if (vmwrite(VMCS_GUEST_IA32_GDTR_LIMIT,
1375 vrs->vrs_gdtr.vsi_limit))
1376 goto errout;
1377 if (vmwrite(VMCS_GUEST_IA32_GDTR_BASE,
1378 vrs->vrs_gdtr.vsi_base))
1379 goto errout;
1380 if (vmwrite(VMCS_GUEST_IA32_IDTR_LIMIT,
1381 vrs->vrs_idtr.vsi_limit))
1382 goto errout;
1383 if (vmwrite(VMCS_GUEST_IA32_IDTR_BASE,
1384 vrs->vrs_idtr.vsi_base))
1385 goto errout;
1386 }
1387
1388 if (regmask & VM_RWREGS_CRS) {
1389 vcpu->vc_gueststate.vg_xcr0 = crs[VCPU_REGS_XCR0];
1390 if (vmwrite(VMCS_GUEST_IA32_CR0, crs[VCPU_REGS_CR0]))
1391 goto errout;
1392 if (vmwrite(VMCS_GUEST_IA32_CR3, crs[VCPU_REGS_CR3]))
1393 goto errout;
1394 if (vmwrite(VMCS_GUEST_IA32_CR4, crs[VCPU_REGS_CR4]))
1395 goto errout;
1396 if (vmwrite(VMCS_GUEST_PDPTE0, crs[VCPU_REGS_PDPTE0]))
1397 goto errout;
1398 if (vmwrite(VMCS_GUEST_PDPTE1, crs[VCPU_REGS_PDPTE1]))
1399 goto errout;
1400 if (vmwrite(VMCS_GUEST_PDPTE2, crs[VCPU_REGS_PDPTE2]))
1401 goto errout;
1402 if (vmwrite(VMCS_GUEST_PDPTE3, crs[VCPU_REGS_PDPTE3]))
1403 goto errout;
1404 }
1405
1406 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
1407
1408 if (regmask & VM_RWREGS_MSRS) {
1409 for (i = 0; i < VCPU_REGS_NMSRS; i++) {
1410 msr_store[i].vms_data = msrs[i];
1411 }
1412 }
1413
1414 if (regmask & VM_RWREGS_DRS) {
1415 vcpu->vc_gueststate.vg_dr0 = drs[VCPU_REGS_DR0];
1416 vcpu->vc_gueststate.vg_dr1 = drs[VCPU_REGS_DR1];
1417 vcpu->vc_gueststate.vg_dr2 = drs[VCPU_REGS_DR2];
1418 vcpu->vc_gueststate.vg_dr3 = drs[VCPU_REGS_DR3];
1419 vcpu->vc_gueststate.vg_dr6 = drs[VCPU_REGS_DR6];
1420 if (vmwrite(VMCS_GUEST_IA32_DR7, drs[VCPU_REGS_DR7]))
1421 goto errout;
1422 }
1423
1424 goto out;
1425
1426 errout:
1427 ret = EINVAL;
1428 out:
1429 if (loadvmcs) {
1430 if (vmclear(&vcpu->vc_control_pa))
1431 ret = EINVAL;
1432 atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_CLEARED);
1433 }
1434 return (ret);
1435 }
1436
1437 /*
1438 * vcpu_writeregs_svm
1439 *
1440 * Writes 'vcpu's registers
1441 *
1442 * Parameters:
1443 * vcpu: the vcpu that has to get its registers written to
1444 * regmask: the types of registers to write
1445 * vrs: the register values to write
1446 *
1447 * Return values:
1448 * 0: if successful
1449 * EINVAL an error writing registers occurred
1450 */
1451 int
vcpu_writeregs_svm(struct vcpu * vcpu,uint64_t regmask,struct vcpu_reg_state * vrs)1452 vcpu_writeregs_svm(struct vcpu *vcpu, uint64_t regmask,
1453 struct vcpu_reg_state *vrs)
1454 {
1455 uint64_t *gprs = vrs->vrs_gprs;
1456 uint64_t *crs = vrs->vrs_crs;
1457 uint16_t attr;
1458 uint64_t *msrs = vrs->vrs_msrs;
1459 uint64_t *drs = vrs->vrs_drs;
1460 struct vcpu_segment_info *sregs = vrs->vrs_sregs;
1461 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
1462
1463 if (regmask & VM_RWREGS_GPRS) {
1464 vcpu->vc_gueststate.vg_rax = gprs[VCPU_REGS_RAX];
1465 vcpu->vc_gueststate.vg_rbx = gprs[VCPU_REGS_RBX];
1466 vcpu->vc_gueststate.vg_rcx = gprs[VCPU_REGS_RCX];
1467 vcpu->vc_gueststate.vg_rdx = gprs[VCPU_REGS_RDX];
1468 vcpu->vc_gueststate.vg_rsi = gprs[VCPU_REGS_RSI];
1469 vcpu->vc_gueststate.vg_rdi = gprs[VCPU_REGS_RDI];
1470 vcpu->vc_gueststate.vg_r8 = gprs[VCPU_REGS_R8];
1471 vcpu->vc_gueststate.vg_r9 = gprs[VCPU_REGS_R9];
1472 vcpu->vc_gueststate.vg_r10 = gprs[VCPU_REGS_R10];
1473 vcpu->vc_gueststate.vg_r11 = gprs[VCPU_REGS_R11];
1474 vcpu->vc_gueststate.vg_r12 = gprs[VCPU_REGS_R12];
1475 vcpu->vc_gueststate.vg_r13 = gprs[VCPU_REGS_R13];
1476 vcpu->vc_gueststate.vg_r14 = gprs[VCPU_REGS_R14];
1477 vcpu->vc_gueststate.vg_r15 = gprs[VCPU_REGS_R15];
1478 vcpu->vc_gueststate.vg_rbp = gprs[VCPU_REGS_RBP];
1479 vcpu->vc_gueststate.vg_rip = gprs[VCPU_REGS_RIP];
1480
1481 vmcb->v_rax = gprs[VCPU_REGS_RAX];
1482 vmcb->v_rip = gprs[VCPU_REGS_RIP];
1483 vmcb->v_rsp = gprs[VCPU_REGS_RSP];
1484 vmcb->v_rflags = gprs[VCPU_REGS_RFLAGS];
1485 }
1486
1487 if (regmask & VM_RWREGS_SREGS) {
1488 vmcb->v_cs.vs_sel = sregs[VCPU_REGS_CS].vsi_sel;
1489 vmcb->v_cs.vs_lim = sregs[VCPU_REGS_CS].vsi_limit;
1490 attr = sregs[VCPU_REGS_CS].vsi_ar;
1491 vmcb->v_cs.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
1492 vmcb->v_cs.vs_base = sregs[VCPU_REGS_CS].vsi_base;
1493 vmcb->v_ds.vs_sel = sregs[VCPU_REGS_DS].vsi_sel;
1494 vmcb->v_ds.vs_lim = sregs[VCPU_REGS_DS].vsi_limit;
1495 attr = sregs[VCPU_REGS_DS].vsi_ar;
1496 vmcb->v_ds.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
1497 vmcb->v_ds.vs_base = sregs[VCPU_REGS_DS].vsi_base;
1498 vmcb->v_es.vs_sel = sregs[VCPU_REGS_ES].vsi_sel;
1499 vmcb->v_es.vs_lim = sregs[VCPU_REGS_ES].vsi_limit;
1500 attr = sregs[VCPU_REGS_ES].vsi_ar;
1501 vmcb->v_es.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
1502 vmcb->v_es.vs_base = sregs[VCPU_REGS_ES].vsi_base;
1503 vmcb->v_fs.vs_sel = sregs[VCPU_REGS_FS].vsi_sel;
1504 vmcb->v_fs.vs_lim = sregs[VCPU_REGS_FS].vsi_limit;
1505 attr = sregs[VCPU_REGS_FS].vsi_ar;
1506 vmcb->v_fs.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
1507 vmcb->v_fs.vs_base = sregs[VCPU_REGS_FS].vsi_base;
1508 vmcb->v_gs.vs_sel = sregs[VCPU_REGS_GS].vsi_sel;
1509 vmcb->v_gs.vs_lim = sregs[VCPU_REGS_GS].vsi_limit;
1510 attr = sregs[VCPU_REGS_GS].vsi_ar;
1511 vmcb->v_gs.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
1512 vmcb->v_gs.vs_base = sregs[VCPU_REGS_GS].vsi_base;
1513 vmcb->v_ss.vs_sel = sregs[VCPU_REGS_SS].vsi_sel;
1514 vmcb->v_ss.vs_lim = sregs[VCPU_REGS_SS].vsi_limit;
1515 attr = sregs[VCPU_REGS_SS].vsi_ar;
1516 vmcb->v_ss.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
1517 vmcb->v_ss.vs_base = sregs[VCPU_REGS_SS].vsi_base;
1518 vmcb->v_ldtr.vs_sel = sregs[VCPU_REGS_LDTR].vsi_sel;
1519 vmcb->v_ldtr.vs_lim = sregs[VCPU_REGS_LDTR].vsi_limit;
1520 attr = sregs[VCPU_REGS_LDTR].vsi_ar;
1521 vmcb->v_ldtr.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
1522 vmcb->v_ldtr.vs_base = sregs[VCPU_REGS_LDTR].vsi_base;
1523 vmcb->v_tr.vs_sel = sregs[VCPU_REGS_TR].vsi_sel;
1524 vmcb->v_tr.vs_lim = sregs[VCPU_REGS_TR].vsi_limit;
1525 attr = sregs[VCPU_REGS_TR].vsi_ar;
1526 vmcb->v_tr.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
1527 vmcb->v_tr.vs_base = sregs[VCPU_REGS_TR].vsi_base;
1528 vmcb->v_gdtr.vs_lim = vrs->vrs_gdtr.vsi_limit;
1529 vmcb->v_gdtr.vs_base = vrs->vrs_gdtr.vsi_base;
1530 vmcb->v_idtr.vs_lim = vrs->vrs_idtr.vsi_limit;
1531 vmcb->v_idtr.vs_base = vrs->vrs_idtr.vsi_base;
1532 }
1533
1534 if (regmask & VM_RWREGS_CRS) {
1535 vmcb->v_cr0 = crs[VCPU_REGS_CR0];
1536 vmcb->v_cr3 = crs[VCPU_REGS_CR3];
1537 vmcb->v_cr4 = crs[VCPU_REGS_CR4];
1538 vcpu->vc_gueststate.vg_cr2 = crs[VCPU_REGS_CR2];
1539 vcpu->vc_gueststate.vg_xcr0 = crs[VCPU_REGS_XCR0];
1540 }
1541
1542 if (regmask & VM_RWREGS_MSRS) {
1543 vmcb->v_efer |= msrs[VCPU_REGS_EFER];
1544 vmcb->v_star = msrs[VCPU_REGS_STAR];
1545 vmcb->v_lstar = msrs[VCPU_REGS_LSTAR];
1546 vmcb->v_cstar = msrs[VCPU_REGS_CSTAR];
1547 vmcb->v_sfmask = msrs[VCPU_REGS_SFMASK];
1548 vmcb->v_kgsbase = msrs[VCPU_REGS_KGSBASE];
1549 }
1550
1551 if (regmask & VM_RWREGS_DRS) {
1552 vcpu->vc_gueststate.vg_dr0 = drs[VCPU_REGS_DR0];
1553 vcpu->vc_gueststate.vg_dr1 = drs[VCPU_REGS_DR1];
1554 vcpu->vc_gueststate.vg_dr2 = drs[VCPU_REGS_DR2];
1555 vcpu->vc_gueststate.vg_dr3 = drs[VCPU_REGS_DR3];
1556 vmcb->v_dr6 = drs[VCPU_REGS_DR6];
1557 vmcb->v_dr7 = drs[VCPU_REGS_DR7];
1558 }
1559
1560 return (0);
1561 }
1562
1563 /*
1564 * vcpu_reset_regs_svm
1565 *
1566 * Initializes 'vcpu's registers to supplied state
1567 *
1568 * Parameters:
1569 * vcpu: the vcpu whose register state is to be initialized
1570 * vrs: the register state to set
1571 *
1572 * Return values:
1573 * 0: registers init'ed successfully
1574 * EINVAL: an error occurred setting register state
1575 */
1576 int
vcpu_reset_regs_svm(struct vcpu * vcpu,struct vcpu_reg_state * vrs)1577 vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
1578 {
1579 struct vmcb *vmcb;
1580 int ret;
1581
1582 vmcb = (struct vmcb *)vcpu->vc_control_va;
1583
1584 /*
1585 * Intercept controls
1586 *
1587 * External Interrupt exiting (SVM_INTERCEPT_INTR)
1588 * External NMI exiting (SVM_INTERCEPT_NMI)
1589 * CPUID instruction (SVM_INTERCEPT_CPUID)
1590 * HLT instruction (SVM_INTERCEPT_HLT)
1591 * I/O instructions (SVM_INTERCEPT_INOUT)
1592 * MSR access (SVM_INTERCEPT_MSR)
1593 * shutdown events (SVM_INTERCEPT_SHUTDOWN)
1594 * INVLPGA instruction (SVM_INTERCEPT_INVLPGA)
1595 *
1596 * VMRUN instruction (SVM_INTERCEPT_VMRUN)
1597 * VMMCALL instruction (SVM_INTERCEPT_VMMCALL)
1598 * VMLOAD instruction (SVM_INTERCEPT_VMLOAD)
1599 * VMSAVE instruction (SVM_INTERCEPT_VMSAVE)
1600 * STGI instruction (SVM_INTERCEPT_STGI)
1601 * CLGI instruction (SVM_INTERCEPT_CLGI)
1602 * SKINIT instruction (SVM_INTERCEPT_SKINIT)
1603 * ICEBP instruction (SVM_INTERCEPT_ICEBP)
1604 * MWAIT instruction (SVM_INTERCEPT_MWAIT_UNCOND)
1605 * MWAIT instruction (SVM_INTERCEPT_MWAIT_COND)
1606 * MONITOR instruction (SVM_INTERCEPT_MONITOR)
1607 * RDTSCP instruction (SVM_INTERCEPT_RDTSCP)
1608 * XSETBV instruction (SVM_INTERCEPT_XSETBV) (if available)
1609 */
1610 vmcb->v_intercept1 = SVM_INTERCEPT_INTR | SVM_INTERCEPT_NMI |
1611 SVM_INTERCEPT_CPUID | SVM_INTERCEPT_HLT | SVM_INTERCEPT_INOUT |
1612 SVM_INTERCEPT_MSR | SVM_INTERCEPT_SHUTDOWN | SVM_INTERCEPT_INVLPGA;
1613
1614 vmcb->v_intercept2 = SVM_INTERCEPT_VMRUN | SVM_INTERCEPT_VMMCALL |
1615 SVM_INTERCEPT_VMLOAD | SVM_INTERCEPT_VMSAVE | SVM_INTERCEPT_STGI |
1616 SVM_INTERCEPT_CLGI | SVM_INTERCEPT_SKINIT | SVM_INTERCEPT_ICEBP |
1617 SVM_INTERCEPT_MWAIT_UNCOND | SVM_INTERCEPT_MONITOR |
1618 SVM_INTERCEPT_MWAIT_COND | SVM_INTERCEPT_RDTSCP;
1619
1620 if (xsave_mask)
1621 vmcb->v_intercept2 |= SVM_INTERCEPT_XSETBV;
1622
1623 /* Setup I/O bitmap */
1624 memset((uint8_t *)vcpu->vc_svm_ioio_va, 0xFF, 3 * PAGE_SIZE);
1625 vmcb->v_iopm_pa = (uint64_t)(vcpu->vc_svm_ioio_pa);
1626
1627 /* Setup MSR bitmap */
1628 memset((uint8_t *)vcpu->vc_msr_bitmap_va, 0xFF, 2 * PAGE_SIZE);
1629 vmcb->v_msrpm_pa = (uint64_t)(vcpu->vc_msr_bitmap_pa);
1630 svm_setmsrbrw(vcpu, MSR_IA32_FEATURE_CONTROL);
1631 svm_setmsrbrw(vcpu, MSR_SYSENTER_CS);
1632 svm_setmsrbrw(vcpu, MSR_SYSENTER_ESP);
1633 svm_setmsrbrw(vcpu, MSR_SYSENTER_EIP);
1634 svm_setmsrbrw(vcpu, MSR_STAR);
1635 svm_setmsrbrw(vcpu, MSR_LSTAR);
1636 svm_setmsrbrw(vcpu, MSR_CSTAR);
1637 svm_setmsrbrw(vcpu, MSR_SFMASK);
1638 svm_setmsrbrw(vcpu, MSR_FSBASE);
1639 svm_setmsrbrw(vcpu, MSR_GSBASE);
1640 svm_setmsrbrw(vcpu, MSR_KERNELGSBASE);
1641
1642 /* EFER is R/O so we can ensure the guest always has SVME */
1643 svm_setmsrbr(vcpu, MSR_EFER);
1644
1645 /* allow reading TSC */
1646 svm_setmsrbr(vcpu, MSR_TSC);
1647
1648 /* allow reading HWCR and PSTATEDEF to determine TSC frequency */
1649 svm_setmsrbr(vcpu, MSR_HWCR);
1650 svm_setmsrbr(vcpu, MSR_PSTATEDEF(0));
1651
1652 /* Guest VCPU ASID */
1653 vmcb->v_asid = vcpu->vc_vpid;
1654
1655 /* TLB Control - First time in, flush all*/
1656 vmcb->v_tlb_control = SVM_TLB_CONTROL_FLUSH_ALL;
1657
1658 /* INTR masking */
1659 vmcb->v_intr_masking = 1;
1660
1661 /* PAT */
1662 vmcb->v_g_pat = PATENTRY(0, PAT_WB) | PATENTRY(1, PAT_WC) |
1663 PATENTRY(2, PAT_UCMINUS) | PATENTRY(3, PAT_UC) |
1664 PATENTRY(4, PAT_WB) | PATENTRY(5, PAT_WC) |
1665 PATENTRY(6, PAT_UCMINUS) | PATENTRY(7, PAT_UC);
1666
1667 /* NPT */
1668 vmcb->v_np_enable = SVM_ENABLE_NP;
1669 vmcb->v_n_cr3 = vcpu->vc_parent->vm_map->pmap->pm_pdirpa;
1670
1671 /* SEV */
1672 if (vcpu->vc_sev)
1673 vmcb->v_np_enable |= SVM_ENABLE_SEV;
1674
1675 /* Enable SVME in EFER (must always be set) */
1676 vmcb->v_efer |= EFER_SVME;
1677
1678 ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_ALL, vrs);
1679
1680 /* xcr0 power on default sets bit 0 (x87 state) */
1681 vcpu->vc_gueststate.vg_xcr0 = XFEATURE_X87 & xsave_mask;
1682
1683 vcpu->vc_parent->vm_map->pmap->eptp = 0;
1684
1685 return ret;
1686 }
1687
1688 /*
1689 * svm_setmsrbr
1690 *
1691 * Allow read access to the specified msr on the supplied vcpu.
1692 *
1693 * Parameters:
1694 * vcpu: the VCPU to allow access
1695 * msr: the MSR number to allow access to
1696 */
1697 void
svm_setmsrbr(struct vcpu * vcpu,uint32_t msr)1698 svm_setmsrbr(struct vcpu *vcpu, uint32_t msr)
1699 {
1700 uint8_t *msrs;
1701 uint16_t idx;
1702
1703 msrs = (uint8_t *)vcpu->vc_msr_bitmap_va;
1704
1705 /*
1706 * MSR Read bitmap layout:
1707 * Pentium MSRs (0x0 - 0x1fff) @ 0x0
1708 * Gen6 and Syscall MSRs (0xc0000000 - 0xc0001fff) @ 0x800
1709 * Gen7 and Gen8 MSRs (0xc0010000 - 0xc0011fff) @ 0x1000
1710 *
1711 * Read enable bit is low order bit of 2-bit pair
1712 * per MSR (eg, MSR 0x0 write bit is at bit 0 @ 0x0)
1713 */
1714 if (msr <= 0x1fff) {
1715 idx = SVM_MSRIDX(msr);
1716 msrs[idx] &= ~(SVM_MSRBIT_R(msr));
1717 } else if (msr >= 0xc0000000 && msr <= 0xc0001fff) {
1718 idx = SVM_MSRIDX(msr - 0xc0000000) + 0x800;
1719 msrs[idx] &= ~(SVM_MSRBIT_R(msr - 0xc0000000));
1720 } else if (msr >= 0xc0010000 && msr <= 0xc0011fff) {
1721 idx = SVM_MSRIDX(msr - 0xc0010000) + 0x1000;
1722 msrs[idx] &= ~(SVM_MSRBIT_R(msr - 0xc0010000));
1723 } else {
1724 printf("%s: invalid msr 0x%x\n", __func__, msr);
1725 return;
1726 }
1727 }
1728
1729 /*
1730 * svm_setmsrbw
1731 *
1732 * Allow write access to the specified msr on the supplied vcpu
1733 *
1734 * Parameters:
1735 * vcpu: the VCPU to allow access
1736 * msr: the MSR number to allow access to
1737 */
1738 void
svm_setmsrbw(struct vcpu * vcpu,uint32_t msr)1739 svm_setmsrbw(struct vcpu *vcpu, uint32_t msr)
1740 {
1741 uint8_t *msrs;
1742 uint16_t idx;
1743
1744 msrs = (uint8_t *)vcpu->vc_msr_bitmap_va;
1745
1746 /*
1747 * MSR Write bitmap layout:
1748 * Pentium MSRs (0x0 - 0x1fff) @ 0x0
1749 * Gen6 and Syscall MSRs (0xc0000000 - 0xc0001fff) @ 0x800
1750 * Gen7 and Gen8 MSRs (0xc0010000 - 0xc0011fff) @ 0x1000
1751 *
1752 * Write enable bit is high order bit of 2-bit pair
1753 * per MSR (eg, MSR 0x0 write bit is at bit 1 @ 0x0)
1754 */
1755 if (msr <= 0x1fff) {
1756 idx = SVM_MSRIDX(msr);
1757 msrs[idx] &= ~(SVM_MSRBIT_W(msr));
1758 } else if (msr >= 0xc0000000 && msr <= 0xc0001fff) {
1759 idx = SVM_MSRIDX(msr - 0xc0000000) + 0x800;
1760 msrs[idx] &= ~(SVM_MSRBIT_W(msr - 0xc0000000));
1761 } else if (msr >= 0xc0010000 && msr <= 0xc0011fff) {
1762 idx = SVM_MSRIDX(msr - 0xc0010000) + 0x1000;
1763 msrs[idx] &= ~(SVM_MSRBIT_W(msr - 0xc0010000));
1764 } else {
1765 printf("%s: invalid msr 0x%x\n", __func__, msr);
1766 return;
1767 }
1768 }
1769
1770 /*
1771 * svm_setmsrbrw
1772 *
1773 * Allow read/write access to the specified msr on the supplied vcpu
1774 *
1775 * Parameters:
1776 * vcpu: the VCPU to allow access
1777 * msr: the MSR number to allow access to
1778 */
1779 void
svm_setmsrbrw(struct vcpu * vcpu,uint32_t msr)1780 svm_setmsrbrw(struct vcpu *vcpu, uint32_t msr)
1781 {
1782 svm_setmsrbr(vcpu, msr);
1783 svm_setmsrbw(vcpu, msr);
1784 }
1785
1786 /*
1787 * vmx_setmsrbr
1788 *
1789 * Allow read access to the specified msr on the supplied vcpu.
1790 *
1791 * Parameters:
1792 * vcpu: the VCPU to allow access
1793 * msr: the MSR number to allow access to
1794 */
1795 void
vmx_setmsrbr(struct vcpu * vcpu,uint32_t msr)1796 vmx_setmsrbr(struct vcpu *vcpu, uint32_t msr)
1797 {
1798 uint8_t *msrs;
1799 uint16_t idx;
1800
1801 msrs = (uint8_t *)vcpu->vc_msr_bitmap_va;
1802
1803 /*
1804 * MSR Read bitmap layout:
1805 * "Low" MSRs (0x0 - 0x1fff) @ 0x0
1806 * "High" MSRs (0xc0000000 - 0xc0001fff) @ 0x400
1807 */
1808 if (msr <= 0x1fff) {
1809 idx = VMX_MSRIDX(msr);
1810 msrs[idx] &= ~(VMX_MSRBIT(msr));
1811 } else if (msr >= 0xc0000000 && msr <= 0xc0001fff) {
1812 idx = VMX_MSRIDX(msr - 0xc0000000) + 0x400;
1813 msrs[idx] &= ~(VMX_MSRBIT(msr - 0xc0000000));
1814 } else
1815 printf("%s: invalid msr 0x%x\n", __func__, msr);
1816 }
1817
1818 /*
1819 * vmx_setmsrbw
1820 *
1821 * Allow write access to the specified msr on the supplied vcpu
1822 *
1823 * Parameters:
1824 * vcpu: the VCPU to allow access
1825 * msr: the MSR number to allow access to
1826 */
1827 void
vmx_setmsrbw(struct vcpu * vcpu,uint32_t msr)1828 vmx_setmsrbw(struct vcpu *vcpu, uint32_t msr)
1829 {
1830 uint8_t *msrs;
1831 uint16_t idx;
1832
1833 msrs = (uint8_t *)vcpu->vc_msr_bitmap_va;
1834
1835 /*
1836 * MSR Write bitmap layout:
1837 * "Low" MSRs (0x0 - 0x1fff) @ 0x800
1838 * "High" MSRs (0xc0000000 - 0xc0001fff) @ 0xc00
1839 */
1840 if (msr <= 0x1fff) {
1841 idx = VMX_MSRIDX(msr) + 0x800;
1842 msrs[idx] &= ~(VMX_MSRBIT(msr));
1843 } else if (msr >= 0xc0000000 && msr <= 0xc0001fff) {
1844 idx = VMX_MSRIDX(msr - 0xc0000000) + 0xc00;
1845 msrs[idx] &= ~(VMX_MSRBIT(msr - 0xc0000000));
1846 } else
1847 printf("%s: invalid msr 0x%x\n", __func__, msr);
1848 }
1849
1850 /*
1851 * vmx_setmsrbrw
1852 *
1853 * Allow read/write access to the specified msr on the supplied vcpu
1854 *
1855 * Parameters:
1856 * vcpu: the VCPU to allow access
1857 * msr: the MSR number to allow access to
1858 */
1859 void
vmx_setmsrbrw(struct vcpu * vcpu,uint32_t msr)1860 vmx_setmsrbrw(struct vcpu *vcpu, uint32_t msr)
1861 {
1862 vmx_setmsrbr(vcpu, msr);
1863 vmx_setmsrbw(vcpu, msr);
1864 }
1865
1866 /*
1867 * svm_set_clean
1868 *
1869 * Sets (mark as unmodified) the VMCB clean bit set in 'value'.
1870 * For example, to set the clean bit for the VMCB intercepts (bit position 0),
1871 * the caller provides 'SVM_CLEANBITS_I' (0x1) for the 'value' argument.
1872 * Multiple cleanbits can be provided in 'value' at the same time (eg,
1873 * "SVM_CLEANBITS_I | SVM_CLEANBITS_TPR").
1874 *
1875 * Note that this function does not clear any bits; to clear bits in the
1876 * vmcb cleanbits bitfield, use 'svm_set_dirty'.
1877 *
1878 * Parameters:
1879 * vmcs: the VCPU whose VMCB clean value should be set
1880 * value: the value(s) to enable in the cleanbits mask
1881 */
1882 void
svm_set_clean(struct vcpu * vcpu,uint32_t value)1883 svm_set_clean(struct vcpu *vcpu, uint32_t value)
1884 {
1885 struct vmcb *vmcb;
1886
1887 /* If no cleanbits support, do nothing */
1888 if (!curcpu()->ci_vmm_cap.vcc_svm.svm_vmcb_clean)
1889 return;
1890
1891 vmcb = (struct vmcb *)vcpu->vc_control_va;
1892
1893 vmcb->v_vmcb_clean_bits |= value;
1894 }
1895
1896 /*
1897 * svm_set_dirty
1898 *
1899 * Clears (mark as modified) the VMCB clean bit set in 'value'.
1900 * For example, to clear the bit for the VMCB intercepts (bit position 0)
1901 * the caller provides 'SVM_CLEANBITS_I' (0x1) for the 'value' argument.
1902 * Multiple dirty bits can be provided in 'value' at the same time (eg,
1903 * "SVM_CLEANBITS_I | SVM_CLEANBITS_TPR").
1904 *
1905 * Parameters:
1906 * vmcs: the VCPU whose VMCB dirty value should be set
1907 * value: the value(s) to dirty in the cleanbits mask
1908 */
1909 void
svm_set_dirty(struct vcpu * vcpu,uint32_t value)1910 svm_set_dirty(struct vcpu *vcpu, uint32_t value)
1911 {
1912 struct vmcb *vmcb;
1913
1914 /* If no cleanbits support, do nothing */
1915 if (!curcpu()->ci_vmm_cap.vcc_svm.svm_vmcb_clean)
1916 return;
1917
1918 vmcb = (struct vmcb *)vcpu->vc_control_va;
1919
1920 vmcb->v_vmcb_clean_bits &= ~value;
1921 }
1922
1923 /*
1924 * vcpu_reset_regs_vmx
1925 *
1926 * Initializes 'vcpu's registers to supplied state
1927 *
1928 * Parameters:
1929 * vcpu: the vcpu whose register state is to be initialized
1930 * vrs: the register state to set
1931 *
1932 * Return values:
1933 * 0: registers init'ed successfully
1934 * EINVAL: an error occurred setting register state
1935 */
1936 int
vcpu_reset_regs_vmx(struct vcpu * vcpu,struct vcpu_reg_state * vrs)1937 vcpu_reset_regs_vmx(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
1938 {
1939 int ret = 0, ug = 0;
1940 uint32_t cr0, cr4;
1941 uint32_t pinbased, procbased, procbased2, exit, entry;
1942 uint32_t want1, want0;
1943 uint64_t ctrlval, cr3, msr_misc_enable;
1944 uint16_t ctrl;
1945 struct vmx_msr_store *msr_store;
1946
1947 rw_assert_wrlock(&vcpu->vc_lock);
1948
1949 cr0 = vrs->vrs_crs[VCPU_REGS_CR0];
1950
1951 if (vcpu_reload_vmcs_vmx(vcpu)) {
1952 DPRINTF("%s: error reloading VMCS\n", __func__);
1953 ret = EINVAL;
1954 goto exit;
1955 }
1956
1957 #ifdef VMM_DEBUG
1958 /* VMCS should be loaded... */
1959 paddr_t pa = 0ULL;
1960 if (vmptrst(&pa))
1961 panic("%s: vmptrst", __func__);
1962 KASSERT(pa == vcpu->vc_control_pa);
1963 #endif /* VMM_DEBUG */
1964
1965 /* Compute Basic Entry / Exit Controls */
1966 vcpu->vc_vmx_basic = rdmsr(IA32_VMX_BASIC);
1967 vcpu->vc_vmx_entry_ctls = rdmsr(IA32_VMX_ENTRY_CTLS);
1968 vcpu->vc_vmx_exit_ctls = rdmsr(IA32_VMX_EXIT_CTLS);
1969 vcpu->vc_vmx_pinbased_ctls = rdmsr(IA32_VMX_PINBASED_CTLS);
1970 vcpu->vc_vmx_procbased_ctls = rdmsr(IA32_VMX_PROCBASED_CTLS);
1971
1972 /* Compute True Entry / Exit Controls (if applicable) */
1973 if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL) {
1974 vcpu->vc_vmx_true_entry_ctls = rdmsr(IA32_VMX_TRUE_ENTRY_CTLS);
1975 vcpu->vc_vmx_true_exit_ctls = rdmsr(IA32_VMX_TRUE_EXIT_CTLS);
1976 vcpu->vc_vmx_true_pinbased_ctls =
1977 rdmsr(IA32_VMX_TRUE_PINBASED_CTLS);
1978 vcpu->vc_vmx_true_procbased_ctls =
1979 rdmsr(IA32_VMX_TRUE_PROCBASED_CTLS);
1980 }
1981
1982 /* Compute Secondary Procbased Controls (if applicable) */
1983 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS,
1984 IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1))
1985 vcpu->vc_vmx_procbased2_ctls = rdmsr(IA32_VMX_PROCBASED2_CTLS);
1986
1987 /*
1988 * Pinbased ctrls
1989 *
1990 * We must be able to set the following:
1991 * IA32_VMX_EXTERNAL_INT_EXITING - exit on host interrupt
1992 * IA32_VMX_NMI_EXITING - exit on host NMI
1993 */
1994 want1 = IA32_VMX_EXTERNAL_INT_EXITING |
1995 IA32_VMX_NMI_EXITING;
1996 want0 = 0;
1997
1998 if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL) {
1999 ctrl = IA32_VMX_TRUE_PINBASED_CTLS;
2000 ctrlval = vcpu->vc_vmx_true_pinbased_ctls;
2001 } else {
2002 ctrl = IA32_VMX_PINBASED_CTLS;
2003 ctrlval = vcpu->vc_vmx_pinbased_ctls;
2004 }
2005
2006 if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &pinbased)) {
2007 DPRINTF("%s: error computing pinbased controls\n", __func__);
2008 ret = EINVAL;
2009 goto exit;
2010 }
2011
2012 if (vmwrite(VMCS_PINBASED_CTLS, pinbased)) {
2013 DPRINTF("%s: error setting pinbased controls\n", __func__);
2014 ret = EINVAL;
2015 goto exit;
2016 }
2017
2018 /*
2019 * Procbased ctrls
2020 *
2021 * We must be able to set the following:
2022 * IA32_VMX_HLT_EXITING - exit on HLT instruction
2023 * IA32_VMX_MWAIT_EXITING - exit on MWAIT instruction
2024 * IA32_VMX_UNCONDITIONAL_IO_EXITING - exit on I/O instructions
2025 * IA32_VMX_USE_MSR_BITMAPS - exit on various MSR accesses
2026 * IA32_VMX_CR8_LOAD_EXITING - guest TPR access
2027 * IA32_VMX_CR8_STORE_EXITING - guest TPR access
2028 * IA32_VMX_USE_TPR_SHADOW - guest TPR access (shadow)
2029 * IA32_VMX_MONITOR_EXITING - exit on MONITOR instruction
2030 *
2031 * If we have EPT, we must be able to clear the following
2032 * IA32_VMX_CR3_LOAD_EXITING - don't care about guest CR3 accesses
2033 * IA32_VMX_CR3_STORE_EXITING - don't care about guest CR3 accesses
2034 */
2035 want1 = IA32_VMX_HLT_EXITING |
2036 IA32_VMX_MWAIT_EXITING |
2037 IA32_VMX_UNCONDITIONAL_IO_EXITING |
2038 IA32_VMX_USE_MSR_BITMAPS |
2039 IA32_VMX_CR8_LOAD_EXITING |
2040 IA32_VMX_CR8_STORE_EXITING |
2041 IA32_VMX_MONITOR_EXITING |
2042 IA32_VMX_USE_TPR_SHADOW;
2043 want0 = 0;
2044
2045 want1 |= IA32_VMX_ACTIVATE_SECONDARY_CONTROLS;
2046 want0 |= IA32_VMX_CR3_LOAD_EXITING | IA32_VMX_CR3_STORE_EXITING;
2047
2048 if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL) {
2049 ctrl = IA32_VMX_TRUE_PROCBASED_CTLS;
2050 ctrlval = vcpu->vc_vmx_true_procbased_ctls;
2051 } else {
2052 ctrl = IA32_VMX_PROCBASED_CTLS;
2053 ctrlval = vcpu->vc_vmx_procbased_ctls;
2054 }
2055
2056 if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &procbased)) {
2057 DPRINTF("%s: error computing procbased controls\n", __func__);
2058 ret = EINVAL;
2059 goto exit;
2060 }
2061
2062 if (vmwrite(VMCS_PROCBASED_CTLS, procbased)) {
2063 DPRINTF("%s: error setting procbased controls\n", __func__);
2064 ret = EINVAL;
2065 goto exit;
2066 }
2067
2068 /*
2069 * Secondary Procbased ctrls
2070 *
2071 * We want to be able to set the following, if available:
2072 * IA32_VMX_ENABLE_VPID - use VPIDs where available
2073 *
2074 * If we have EPT, we must be able to set the following:
2075 * IA32_VMX_ENABLE_EPT - enable EPT
2076 *
2077 * If we have unrestricted guest capability, we must be able to set
2078 * the following:
2079 * IA32_VMX_UNRESTRICTED_GUEST - enable unrestricted guest (if caller
2080 * specified CR0_PG | CR0_PE in %cr0 in the 'vrs' parameter)
2081 */
2082 want1 = IA32_VMX_ENABLE_EPT;
2083
2084 /* XXX checking for 2ndary controls can be combined here */
2085 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS,
2086 IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1)) {
2087 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
2088 IA32_VMX_ENABLE_VPID, 1)) {
2089 want1 |= IA32_VMX_ENABLE_VPID;
2090 vcpu->vc_vmx_vpid_enabled = 1;
2091 }
2092 }
2093
2094 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS,
2095 IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1)) {
2096 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
2097 IA32_VMX_UNRESTRICTED_GUEST, 1)) {
2098 if ((cr0 & (CR0_PE | CR0_PG)) == 0) {
2099 want1 |= IA32_VMX_UNRESTRICTED_GUEST;
2100 ug = 1;
2101 }
2102 }
2103 }
2104
2105 want0 = ~want1;
2106 ctrlval = vcpu->vc_vmx_procbased2_ctls;
2107 ctrl = IA32_VMX_PROCBASED2_CTLS;
2108
2109 if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &procbased2)) {
2110 DPRINTF("%s: error computing secondary procbased controls\n",
2111 __func__);
2112 ret = EINVAL;
2113 goto exit;
2114 }
2115
2116 if (vmwrite(VMCS_PROCBASED2_CTLS, procbased2)) {
2117 DPRINTF("%s: error setting secondary procbased controls\n",
2118 __func__);
2119 ret = EINVAL;
2120 goto exit;
2121 }
2122
2123 /*
2124 * Exit ctrls
2125 *
2126 * We must be able to set the following:
2127 * IA32_VMX_SAVE_DEBUG_CONTROLS
2128 * IA32_VMX_HOST_SPACE_ADDRESS_SIZE - exit to long mode
2129 * IA32_VMX_ACKNOWLEDGE_INTERRUPT_ON_EXIT - ack interrupt on exit
2130 */
2131 want1 = IA32_VMX_HOST_SPACE_ADDRESS_SIZE |
2132 IA32_VMX_ACKNOWLEDGE_INTERRUPT_ON_EXIT |
2133 IA32_VMX_SAVE_DEBUG_CONTROLS;
2134 want0 = 0;
2135
2136 if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL) {
2137 ctrl = IA32_VMX_TRUE_EXIT_CTLS;
2138 ctrlval = vcpu->vc_vmx_true_exit_ctls;
2139 } else {
2140 ctrl = IA32_VMX_EXIT_CTLS;
2141 ctrlval = vcpu->vc_vmx_exit_ctls;
2142 }
2143
2144 if (rcr4() & CR4_CET)
2145 want1 |= IA32_VMX_LOAD_HOST_CET_STATE;
2146 else
2147 want0 |= IA32_VMX_LOAD_HOST_CET_STATE;
2148
2149 if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &exit)) {
2150 DPRINTF("%s: error computing exit controls\n", __func__);
2151 ret = EINVAL;
2152 goto exit;
2153 }
2154
2155 if (vmwrite(VMCS_EXIT_CTLS, exit)) {
2156 DPRINTF("%s: error setting exit controls\n", __func__);
2157 ret = EINVAL;
2158 goto exit;
2159 }
2160
2161 /*
2162 * Entry ctrls
2163 *
2164 * We must be able to set the following:
2165 * IA32_VMX_IA32E_MODE_GUEST (if no unrestricted guest)
2166 * IA32_VMX_LOAD_DEBUG_CONTROLS
2167 * We must be able to clear the following:
2168 * IA32_VMX_ENTRY_TO_SMM - enter to SMM
2169 * IA32_VMX_DEACTIVATE_DUAL_MONITOR_TREATMENT
2170 * IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY
2171 */
2172 want1 = IA32_VMX_LOAD_DEBUG_CONTROLS;
2173 if (vrs->vrs_msrs[VCPU_REGS_EFER] & EFER_LMA)
2174 want1 |= IA32_VMX_IA32E_MODE_GUEST;
2175
2176 want0 = IA32_VMX_ENTRY_TO_SMM |
2177 IA32_VMX_DEACTIVATE_DUAL_MONITOR_TREATMENT |
2178 IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY;
2179
2180 if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL) {
2181 ctrl = IA32_VMX_TRUE_ENTRY_CTLS;
2182 ctrlval = vcpu->vc_vmx_true_entry_ctls;
2183 } else {
2184 ctrl = IA32_VMX_ENTRY_CTLS;
2185 ctrlval = vcpu->vc_vmx_entry_ctls;
2186 }
2187
2188 if (rcr4() & CR4_CET)
2189 want1 |= IA32_VMX_LOAD_GUEST_CET_STATE;
2190 else
2191 want0 |= IA32_VMX_LOAD_GUEST_CET_STATE;
2192
2193 if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &entry)) {
2194 ret = EINVAL;
2195 goto exit;
2196 }
2197
2198 if (vmwrite(VMCS_ENTRY_CTLS, entry)) {
2199 ret = EINVAL;
2200 goto exit;
2201 }
2202
2203 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS,
2204 IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1)) {
2205 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
2206 IA32_VMX_ENABLE_VPID, 1)) {
2207 if (vmwrite(VMCS_GUEST_VPID, vcpu->vc_vpid)) {
2208 DPRINTF("%s: error setting guest VPID\n",
2209 __func__);
2210 ret = EINVAL;
2211 goto exit;
2212 }
2213 }
2214 }
2215
2216 /*
2217 * Determine which bits in CR0 have to be set to a fixed
2218 * value as per Intel SDM A.7.
2219 * CR0 bits in the vrs parameter must match these.
2220 */
2221 want1 = (curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0) &
2222 (curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1);
2223 want0 = ~(curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0) &
2224 ~(curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1);
2225
2226 /*
2227 * CR0_FIXED0 and CR0_FIXED1 may report the CR0_PG and CR0_PE bits as
2228 * fixed to 1 even if the CPU supports the unrestricted guest
2229 * feature. Update want1 and want0 accordingly to allow
2230 * any value for CR0_PG and CR0_PE in vrs->vrs_crs[VCPU_REGS_CR0] if
2231 * the CPU has the unrestricted guest capability.
2232 */
2233 if (ug) {
2234 want1 &= ~(CR0_PG | CR0_PE);
2235 want0 &= ~(CR0_PG | CR0_PE);
2236 }
2237
2238 /*
2239 * VMX may require some bits to be set that userland should not have
2240 * to care about. Set those here.
2241 */
2242 if (want1 & CR0_NE)
2243 cr0 |= CR0_NE;
2244
2245 if ((cr0 & want1) != want1) {
2246 ret = EINVAL;
2247 goto exit;
2248 }
2249
2250 if ((~cr0 & want0) != want0) {
2251 ret = EINVAL;
2252 goto exit;
2253 }
2254
2255 vcpu->vc_vmx_cr0_fixed1 = want1;
2256 vcpu->vc_vmx_cr0_fixed0 = want0;
2257 /*
2258 * Determine which bits in CR4 have to be set to a fixed
2259 * value as per Intel SDM A.8.
2260 * CR4 bits in the vrs parameter must match these, except
2261 * CR4_VMXE - we add that here since it must always be set.
2262 */
2263 want1 = (curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0) &
2264 (curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1);
2265 want0 = ~(curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0) &
2266 ~(curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1);
2267
2268 cr4 = vrs->vrs_crs[VCPU_REGS_CR4] | CR4_VMXE;
2269
2270 if ((cr4 & want1) != want1) {
2271 ret = EINVAL;
2272 goto exit;
2273 }
2274
2275 if ((~cr4 & want0) != want0) {
2276 ret = EINVAL;
2277 goto exit;
2278 }
2279
2280 cr3 = vrs->vrs_crs[VCPU_REGS_CR3];
2281
2282 /* Restore PDPTEs if 32-bit PAE paging is being used */
2283 if (cr3 && (cr4 & CR4_PAE) &&
2284 !(vrs->vrs_msrs[VCPU_REGS_EFER] & EFER_LMA)) {
2285 if (vmwrite(VMCS_GUEST_PDPTE0,
2286 vrs->vrs_crs[VCPU_REGS_PDPTE0])) {
2287 ret = EINVAL;
2288 goto exit;
2289 }
2290
2291 if (vmwrite(VMCS_GUEST_PDPTE1,
2292 vrs->vrs_crs[VCPU_REGS_PDPTE1])) {
2293 ret = EINVAL;
2294 goto exit;
2295 }
2296
2297 if (vmwrite(VMCS_GUEST_PDPTE2,
2298 vrs->vrs_crs[VCPU_REGS_PDPTE2])) {
2299 ret = EINVAL;
2300 goto exit;
2301 }
2302
2303 if (vmwrite(VMCS_GUEST_PDPTE3,
2304 vrs->vrs_crs[VCPU_REGS_PDPTE3])) {
2305 ret = EINVAL;
2306 goto exit;
2307 }
2308 }
2309
2310 vrs->vrs_crs[VCPU_REGS_CR0] = cr0;
2311 vrs->vrs_crs[VCPU_REGS_CR4] = cr4;
2312
2313 msr_misc_enable = rdmsr(MSR_MISC_ENABLE);
2314
2315 /*
2316 * Select host MSRs to be loaded on exit
2317 */
2318 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_load_va;
2319 msr_store[VCPU_HOST_REGS_EFER].vms_index = MSR_EFER;
2320 msr_store[VCPU_HOST_REGS_EFER].vms_data = rdmsr(MSR_EFER);
2321 msr_store[VCPU_HOST_REGS_STAR].vms_index = MSR_STAR;
2322 msr_store[VCPU_HOST_REGS_STAR].vms_data = rdmsr(MSR_STAR);
2323 msr_store[VCPU_HOST_REGS_LSTAR].vms_index = MSR_LSTAR;
2324 msr_store[VCPU_HOST_REGS_LSTAR].vms_data = rdmsr(MSR_LSTAR);
2325 msr_store[VCPU_HOST_REGS_CSTAR].vms_index = MSR_CSTAR;
2326 msr_store[VCPU_HOST_REGS_CSTAR].vms_data = 0;
2327 msr_store[VCPU_HOST_REGS_SFMASK].vms_index = MSR_SFMASK;
2328 msr_store[VCPU_HOST_REGS_SFMASK].vms_data = rdmsr(MSR_SFMASK);
2329 msr_store[VCPU_HOST_REGS_KGSBASE].vms_index = MSR_KERNELGSBASE;
2330 msr_store[VCPU_HOST_REGS_KGSBASE].vms_data = 0;
2331 msr_store[VCPU_HOST_REGS_MISC_ENABLE].vms_index = MSR_MISC_ENABLE;
2332 msr_store[VCPU_HOST_REGS_MISC_ENABLE].vms_data = msr_misc_enable;
2333
2334 /*
2335 * Select guest MSRs to be loaded on entry / saved on exit
2336 */
2337 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
2338
2339 msr_store[VCPU_REGS_EFER].vms_index = MSR_EFER;
2340 msr_store[VCPU_REGS_STAR].vms_index = MSR_STAR;
2341 msr_store[VCPU_REGS_LSTAR].vms_index = MSR_LSTAR;
2342 msr_store[VCPU_REGS_CSTAR].vms_index = MSR_CSTAR;
2343 msr_store[VCPU_REGS_SFMASK].vms_index = MSR_SFMASK;
2344 msr_store[VCPU_REGS_KGSBASE].vms_index = MSR_KERNELGSBASE;
2345 msr_store[VCPU_REGS_MISC_ENABLE].vms_index = MSR_MISC_ENABLE;
2346
2347 /*
2348 * Initialize MSR_MISC_ENABLE as it can't be read and populated from vmd
2349 * and some of the content is based on the host.
2350 */
2351 msr_store[VCPU_REGS_MISC_ENABLE].vms_data = msr_misc_enable;
2352 msr_store[VCPU_REGS_MISC_ENABLE].vms_data &=
2353 ~(MISC_ENABLE_TCC | MISC_ENABLE_PERF_MON_AVAILABLE |
2354 MISC_ENABLE_EIST_ENABLED | MISC_ENABLE_ENABLE_MONITOR_FSM |
2355 MISC_ENABLE_xTPR_MESSAGE_DISABLE);
2356 msr_store[VCPU_REGS_MISC_ENABLE].vms_data |=
2357 MISC_ENABLE_BTS_UNAVAILABLE | MISC_ENABLE_PEBS_UNAVAILABLE;
2358
2359 /*
2360 * Currently we use the same memory for guest MSRs (entry-load and
2361 * exit-store) so they have the same count. We exit-load the same
2362 * host MSRs, so same count but different memory. Those are just
2363 * our current choices, not architectural requirements.
2364 */
2365 if (vmwrite(VMCS_EXIT_MSR_STORE_COUNT, VCPU_REGS_NMSRS)) {
2366 DPRINTF("%s: error setting guest MSR exit store count\n",
2367 __func__);
2368 ret = EINVAL;
2369 goto exit;
2370 }
2371
2372 if (vmwrite(VMCS_EXIT_MSR_LOAD_COUNT, VCPU_HOST_REGS_NMSRS)) {
2373 DPRINTF("%s: error setting guest MSR exit load count\n",
2374 __func__);
2375 ret = EINVAL;
2376 goto exit;
2377 }
2378
2379 if (vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, VCPU_REGS_NMSRS)) {
2380 DPRINTF("%s: error setting guest MSR entry load count\n",
2381 __func__);
2382 ret = EINVAL;
2383 goto exit;
2384 }
2385
2386 if (vmwrite(VMCS_EXIT_STORE_MSR_ADDRESS,
2387 vcpu->vc_vmx_msr_exit_save_pa)) {
2388 DPRINTF("%s: error setting guest MSR exit store address\n",
2389 __func__);
2390 ret = EINVAL;
2391 goto exit;
2392 }
2393
2394 if (vmwrite(VMCS_EXIT_LOAD_MSR_ADDRESS,
2395 vcpu->vc_vmx_msr_exit_load_pa)) {
2396 DPRINTF("%s: error setting guest MSR exit load address\n",
2397 __func__);
2398 ret = EINVAL;
2399 goto exit;
2400 }
2401
2402 if (vmwrite(VMCS_ENTRY_LOAD_MSR_ADDRESS,
2403 vcpu->vc_vmx_msr_exit_save_pa)) {
2404 DPRINTF("%s: error setting guest MSR entry load address\n",
2405 __func__);
2406 ret = EINVAL;
2407 goto exit;
2408 }
2409
2410 if (vmwrite(VMCS_MSR_BITMAP_ADDRESS,
2411 vcpu->vc_msr_bitmap_pa)) {
2412 DPRINTF("%s: error setting guest MSR bitmap address\n",
2413 __func__);
2414 ret = EINVAL;
2415 goto exit;
2416 }
2417
2418 if (vmwrite(VMCS_CR4_MASK, CR4_VMXE)) {
2419 DPRINTF("%s: error setting guest CR4 mask\n", __func__);
2420 ret = EINVAL;
2421 goto exit;
2422 }
2423
2424 if (vmwrite(VMCS_CR0_MASK, CR0_NE)) {
2425 DPRINTF("%s: error setting guest CR0 mask\n", __func__);
2426 ret = EINVAL;
2427 goto exit;
2428 }
2429
2430 /*
2431 * Set up the VMCS for the register state we want during VCPU start.
2432 * This matches what the CPU state would be after a bootloader
2433 * transition to 'start'.
2434 */
2435 ret = vcpu_writeregs_vmx(vcpu, VM_RWREGS_ALL, 0, vrs);
2436
2437 /*
2438 * Set up the MSR bitmap
2439 */
2440 memset((uint8_t *)vcpu->vc_msr_bitmap_va, 0xFF, PAGE_SIZE);
2441 vmx_setmsrbrw(vcpu, MSR_IA32_FEATURE_CONTROL);
2442 vmx_setmsrbrw(vcpu, MSR_SYSENTER_CS);
2443 vmx_setmsrbrw(vcpu, MSR_SYSENTER_ESP);
2444 vmx_setmsrbrw(vcpu, MSR_SYSENTER_EIP);
2445 vmx_setmsrbrw(vcpu, MSR_EFER);
2446 vmx_setmsrbrw(vcpu, MSR_STAR);
2447 vmx_setmsrbrw(vcpu, MSR_LSTAR);
2448 vmx_setmsrbrw(vcpu, MSR_CSTAR);
2449 vmx_setmsrbrw(vcpu, MSR_SFMASK);
2450 vmx_setmsrbrw(vcpu, MSR_FSBASE);
2451 vmx_setmsrbrw(vcpu, MSR_GSBASE);
2452 vmx_setmsrbrw(vcpu, MSR_KERNELGSBASE);
2453
2454 vmx_setmsrbr(vcpu, MSR_MISC_ENABLE);
2455 vmx_setmsrbr(vcpu, MSR_TSC);
2456
2457 /* If host supports CET, pass through access to the guest. */
2458 if (rcr4() & CR4_CET)
2459 vmx_setmsrbrw(vcpu, MSR_S_CET);
2460
2461 /* XXX CR0 shadow */
2462 /* XXX CR4 shadow */
2463
2464 /* xcr0 power on default sets bit 0 (x87 state) */
2465 vcpu->vc_gueststate.vg_xcr0 = XFEATURE_X87 & xsave_mask;
2466
2467 /* XXX PAT shadow */
2468 vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT);
2469
2470 /* Flush the VMCS */
2471 if (vmclear(&vcpu->vc_control_pa)) {
2472 DPRINTF("%s: vmclear failed\n", __func__);
2473 ret = EINVAL;
2474 }
2475 atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_CLEARED);
2476
2477 exit:
2478 return (ret);
2479 }
2480
2481 /*
2482 * vcpu_init_vmx
2483 *
2484 * Intel VMX specific VCPU initialization routine.
2485 *
2486 * This function allocates various per-VCPU memory regions, sets up initial
2487 * VCPU VMCS controls, and sets initial register values.
2488 *
2489 * Parameters:
2490 * vcpu: the VCPU structure being initialized
2491 *
2492 * Return values:
2493 * 0: the VCPU was initialized successfully
2494 * ENOMEM: insufficient resources
2495 * EINVAL: an error occurred during VCPU initialization
2496 */
2497 int
vcpu_init_vmx(struct vcpu * vcpu)2498 vcpu_init_vmx(struct vcpu *vcpu)
2499 {
2500 struct vmcs *vmcs;
2501 uint64_t msr, eptp;
2502 uint32_t cr0, cr4;
2503 int ret = 0;
2504
2505 /* Allocate a VPID early to avoid km_alloc if we're out of VPIDs. */
2506 if (vmm_alloc_vpid(&vcpu->vc_vpid))
2507 return (ENOMEM);
2508
2509 /* Allocate VMCS VA */
2510 vcpu->vc_control_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, &kp_zero,
2511 &kd_waitok);
2512 vcpu->vc_vmx_vmcs_state = VMCS_CLEARED;
2513
2514 if (!vcpu->vc_control_va) {
2515 ret = ENOMEM;
2516 goto exit;
2517 }
2518
2519 /* Compute VMCS PA */
2520 if (!pmap_extract(pmap_kernel(), vcpu->vc_control_va,
2521 (paddr_t *)&vcpu->vc_control_pa)) {
2522 ret = ENOMEM;
2523 goto exit;
2524 }
2525
2526 /* Allocate MSR bitmap VA */
2527 vcpu->vc_msr_bitmap_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, &kp_zero,
2528 &kd_waitok);
2529
2530 if (!vcpu->vc_msr_bitmap_va) {
2531 ret = ENOMEM;
2532 goto exit;
2533 }
2534
2535 /* Compute MSR bitmap PA */
2536 if (!pmap_extract(pmap_kernel(), vcpu->vc_msr_bitmap_va,
2537 (paddr_t *)&vcpu->vc_msr_bitmap_pa)) {
2538 ret = ENOMEM;
2539 goto exit;
2540 }
2541
2542 /* Allocate MSR exit load area VA */
2543 vcpu->vc_vmx_msr_exit_load_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page,
2544 &kp_zero, &kd_waitok);
2545
2546 if (!vcpu->vc_vmx_msr_exit_load_va) {
2547 ret = ENOMEM;
2548 goto exit;
2549 }
2550
2551 /* Compute MSR exit load area PA */
2552 if (!pmap_extract(pmap_kernel(), vcpu->vc_vmx_msr_exit_load_va,
2553 &vcpu->vc_vmx_msr_exit_load_pa)) {
2554 ret = ENOMEM;
2555 goto exit;
2556 }
2557
2558 /* Allocate MSR exit save area VA */
2559 vcpu->vc_vmx_msr_exit_save_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page,
2560 &kp_zero, &kd_waitok);
2561
2562 if (!vcpu->vc_vmx_msr_exit_save_va) {
2563 ret = ENOMEM;
2564 goto exit;
2565 }
2566
2567 /* Compute MSR exit save area PA */
2568 if (!pmap_extract(pmap_kernel(), vcpu->vc_vmx_msr_exit_save_va,
2569 &vcpu->vc_vmx_msr_exit_save_pa)) {
2570 ret = ENOMEM;
2571 goto exit;
2572 }
2573
2574 #if 0 /* XXX currently use msr_exit_save for msr_entry_load too */
2575 /* Allocate MSR entry load area VA */
2576 vcpu->vc_vmx_msr_entry_load_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page,
2577 &kp_zero, &kd_waitok);
2578
2579 if (!vcpu->vc_vmx_msr_entry_load_va) {
2580 ret = ENOMEM;
2581 goto exit;
2582 }
2583
2584 /* Compute MSR entry load area PA */
2585 if (!pmap_extract(pmap_kernel(), vcpu->vc_vmx_msr_entry_load_va,
2586 &vcpu->vc_vmx_msr_entry_load_pa)) {
2587 ret = ENOMEM;
2588 goto exit;
2589 }
2590 #endif
2591
2592 vmcs = (struct vmcs *)vcpu->vc_control_va;
2593 vmcs->vmcs_revision = curcpu()->ci_vmm_cap.vcc_vmx.vmx_vmxon_revision;
2594
2595 /*
2596 * Load the VMCS onto this PCPU so we can write registers
2597 */
2598 if (vmptrld(&vcpu->vc_control_pa)) {
2599 ret = EINVAL;
2600 goto exit;
2601 }
2602
2603 /* Configure EPT Pointer */
2604 eptp = vcpu->vc_parent->vm_map->pmap->pm_pdirpa;
2605 msr = rdmsr(IA32_VMX_EPT_VPID_CAP);
2606 if (msr & IA32_EPT_VPID_CAP_PAGE_WALK_4) {
2607 /* Page walk length 4 supported */
2608 eptp |= ((IA32_EPT_PAGE_WALK_LENGTH - 1) << 3);
2609 } else {
2610 DPRINTF("EPT page walk length 4 not supported\n");
2611 ret = EINVAL;
2612 goto exit;
2613 }
2614 if (msr & IA32_EPT_VPID_CAP_WB) {
2615 /* WB cache type supported */
2616 eptp |= IA32_EPT_PAGING_CACHE_TYPE_WB;
2617 } else
2618 DPRINTF("%s: no WB cache type available, guest VM will run "
2619 "uncached\n", __func__);
2620
2621 DPRINTF("Guest EPTP = 0x%llx\n", eptp);
2622 if (vmwrite(VMCS_GUEST_IA32_EPTP, eptp)) {
2623 DPRINTF("%s: error setting guest EPTP\n", __func__);
2624 ret = EINVAL;
2625 goto exit;
2626 }
2627
2628 vcpu->vc_parent->vm_map->pmap->eptp = eptp;
2629
2630 /* Host CR0 */
2631 cr0 = rcr0() & ~CR0_TS;
2632 if (vmwrite(VMCS_HOST_IA32_CR0, cr0)) {
2633 DPRINTF("%s: error writing host CR0\n", __func__);
2634 ret = EINVAL;
2635 goto exit;
2636 }
2637
2638 /* Host CR4 */
2639 cr4 = rcr4();
2640 if (vmwrite(VMCS_HOST_IA32_CR4, cr4)) {
2641 DPRINTF("%s: error writing host CR4\n", __func__);
2642 ret = EINVAL;
2643 goto exit;
2644 }
2645
2646 /* Host Segment Selectors */
2647 if (vmwrite(VMCS_HOST_IA32_CS_SEL, GSEL(GCODE_SEL, SEL_KPL))) {
2648 DPRINTF("%s: error writing host CS selector\n", __func__);
2649 ret = EINVAL;
2650 goto exit;
2651 }
2652
2653 if (vmwrite(VMCS_HOST_IA32_DS_SEL, GSEL(GDATA_SEL, SEL_KPL))) {
2654 DPRINTF("%s: error writing host DS selector\n", __func__);
2655 ret = EINVAL;
2656 goto exit;
2657 }
2658
2659 if (vmwrite(VMCS_HOST_IA32_ES_SEL, GSEL(GDATA_SEL, SEL_KPL))) {
2660 DPRINTF("%s: error writing host ES selector\n", __func__);
2661 ret = EINVAL;
2662 goto exit;
2663 }
2664
2665 if (vmwrite(VMCS_HOST_IA32_FS_SEL, GSEL(GDATA_SEL, SEL_KPL))) {
2666 DPRINTF("%s: error writing host FS selector\n", __func__);
2667 ret = EINVAL;
2668 goto exit;
2669 }
2670
2671 if (vmwrite(VMCS_HOST_IA32_GS_SEL, GSEL(GDATA_SEL, SEL_KPL))) {
2672 DPRINTF("%s: error writing host GS selector\n", __func__);
2673 ret = EINVAL;
2674 goto exit;
2675 }
2676
2677 if (vmwrite(VMCS_HOST_IA32_SS_SEL, GSEL(GDATA_SEL, SEL_KPL))) {
2678 DPRINTF("%s: error writing host SS selector\n", __func__);
2679 ret = EINVAL;
2680 goto exit;
2681 }
2682
2683 if (vmwrite(VMCS_HOST_IA32_TR_SEL, GSYSSEL(GPROC0_SEL, SEL_KPL))) {
2684 DPRINTF("%s: error writing host TR selector\n", __func__);
2685 ret = EINVAL;
2686 goto exit;
2687 }
2688
2689 /* Host IDTR base */
2690 if (vmwrite(VMCS_HOST_IA32_IDTR_BASE, idt_vaddr)) {
2691 DPRINTF("%s: error writing host IDTR base\n", __func__);
2692 ret = EINVAL;
2693 goto exit;
2694 }
2695
2696 /* VMCS link */
2697 if (vmwrite(VMCS_LINK_POINTER, VMX_VMCS_PA_CLEAR)) {
2698 DPRINTF("%s: error writing VMCS link pointer\n", __func__);
2699 ret = EINVAL;
2700 goto exit;
2701 }
2702
2703 /* Flush the initial VMCS */
2704 if (vmclear(&vcpu->vc_control_pa)) {
2705 DPRINTF("%s: vmclear failed\n", __func__);
2706 ret = EINVAL;
2707 }
2708
2709 exit:
2710 if (ret)
2711 vcpu_deinit_vmx(vcpu);
2712
2713 return (ret);
2714 }
2715
2716 /*
2717 * vcpu_reset_regs
2718 *
2719 * Resets a vcpu's registers to the provided state
2720 *
2721 * Parameters:
2722 * vcpu: the vcpu whose registers shall be reset
2723 * vrs: the desired register state
2724 *
2725 * Return values:
2726 * 0: the vcpu's registers were successfully reset
2727 * !0: the vcpu's registers could not be reset (see arch-specific reset
2728 * function for various values that can be returned here)
2729 */
2730 int
vcpu_reset_regs(struct vcpu * vcpu,struct vcpu_reg_state * vrs)2731 vcpu_reset_regs(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
2732 {
2733 int ret;
2734
2735 if (vmm_softc->mode == VMM_MODE_EPT)
2736 ret = vcpu_reset_regs_vmx(vcpu, vrs);
2737 else if (vmm_softc->mode == VMM_MODE_RVI)
2738 ret = vcpu_reset_regs_svm(vcpu, vrs);
2739 else
2740 panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode);
2741
2742 return (ret);
2743 }
2744
2745 /*
2746 * vcpu_init_svm
2747 *
2748 * AMD SVM specific VCPU initialization routine.
2749 *
2750 * This function allocates various per-VCPU memory regions, sets up initial
2751 * VCPU VMCB controls, and sets initial register values.
2752 *
2753 * Parameters:
2754 * vcpu: the VCPU structure being initialized
2755 * vcp: parameters provided by vmd(8)
2756 *
2757 * Return values:
2758 * 0: the VCPU was initialized successfully
2759 * ENOMEM: insufficient resources
2760 * EINVAL: an error occurred during VCPU initialization
2761 */
2762 int
vcpu_init_svm(struct vcpu * vcpu,struct vm_create_params * vcp)2763 vcpu_init_svm(struct vcpu *vcpu, struct vm_create_params *vcp)
2764 {
2765 int ret = 0;
2766
2767 /* Allocate an ASID early to avoid km_alloc if we're out of ASIDs. */
2768 if (vmm_alloc_vpid(&vcpu->vc_vpid))
2769 return (ENOMEM);
2770
2771 /* Allocate VMCB VA */
2772 vcpu->vc_control_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, &kp_zero,
2773 &kd_waitok);
2774
2775 if (!vcpu->vc_control_va) {
2776 ret = ENOMEM;
2777 goto exit;
2778 }
2779
2780 /* Compute VMCB PA */
2781 if (!pmap_extract(pmap_kernel(), vcpu->vc_control_va,
2782 (paddr_t *)&vcpu->vc_control_pa)) {
2783 ret = ENOMEM;
2784 goto exit;
2785 }
2786
2787 DPRINTF("%s: VMCB va @ 0x%llx, pa @ 0x%llx\n", __func__,
2788 (uint64_t)vcpu->vc_control_va,
2789 (uint64_t)vcpu->vc_control_pa);
2790
2791
2792 /* Allocate MSR bitmap VA (2 pages) */
2793 vcpu->vc_msr_bitmap_va = (vaddr_t)km_alloc(2 * PAGE_SIZE, &kv_any,
2794 &vmm_kp_contig, &kd_waitok);
2795
2796 if (!vcpu->vc_msr_bitmap_va) {
2797 ret = ENOMEM;
2798 goto exit;
2799 }
2800
2801 /* Compute MSR bitmap PA */
2802 if (!pmap_extract(pmap_kernel(), vcpu->vc_msr_bitmap_va,
2803 (paddr_t *)&vcpu->vc_msr_bitmap_pa)) {
2804 ret = ENOMEM;
2805 goto exit;
2806 }
2807
2808 DPRINTF("%s: MSR bitmap va @ 0x%llx, pa @ 0x%llx\n", __func__,
2809 (uint64_t)vcpu->vc_msr_bitmap_va,
2810 (uint64_t)vcpu->vc_msr_bitmap_pa);
2811
2812 /* Allocate host state area VA */
2813 vcpu->vc_svm_hsa_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page,
2814 &kp_zero, &kd_waitok);
2815
2816 if (!vcpu->vc_svm_hsa_va) {
2817 ret = ENOMEM;
2818 goto exit;
2819 }
2820
2821 /* Compute host state area PA */
2822 if (!pmap_extract(pmap_kernel(), vcpu->vc_svm_hsa_va,
2823 &vcpu->vc_svm_hsa_pa)) {
2824 ret = ENOMEM;
2825 goto exit;
2826 }
2827
2828 DPRINTF("%s: HSA va @ 0x%llx, pa @ 0x%llx\n", __func__,
2829 (uint64_t)vcpu->vc_svm_hsa_va,
2830 (uint64_t)vcpu->vc_svm_hsa_pa);
2831
2832 /* Allocate IOIO area VA (3 pages) */
2833 vcpu->vc_svm_ioio_va = (vaddr_t)km_alloc(3 * PAGE_SIZE, &kv_any,
2834 &vmm_kp_contig, &kd_waitok);
2835
2836 if (!vcpu->vc_svm_ioio_va) {
2837 ret = ENOMEM;
2838 goto exit;
2839 }
2840
2841 /* Compute IOIO area PA */
2842 if (!pmap_extract(pmap_kernel(), vcpu->vc_svm_ioio_va,
2843 &vcpu->vc_svm_ioio_pa)) {
2844 ret = ENOMEM;
2845 goto exit;
2846 }
2847
2848 DPRINTF("%s: IOIO va @ 0x%llx, pa @ 0x%llx\n", __func__,
2849 (uint64_t)vcpu->vc_svm_ioio_va,
2850 (uint64_t)vcpu->vc_svm_ioio_pa);
2851
2852 /* Shall we enable SEV? */
2853 vcpu->vc_sev = vcp->vcp_sev;
2854
2855 /* Inform vmd(8) about ASID and C bit position. */
2856 vcp->vcp_poscbit = amd64_pos_cbit;
2857 vcp->vcp_asid[vcpu->vc_id] = vcpu->vc_vpid;
2858
2859 exit:
2860 if (ret)
2861 vcpu_deinit_svm(vcpu);
2862
2863 return (ret);
2864 }
2865
2866 /*
2867 * vcpu_init
2868 *
2869 * Calls the architecture-specific VCPU init routine
2870 */
2871 int
vcpu_init(struct vcpu * vcpu,struct vm_create_params * vcp)2872 vcpu_init(struct vcpu *vcpu, struct vm_create_params *vcp)
2873 {
2874 int ret = 0;
2875
2876 vcpu->vc_virt_mode = vmm_softc->mode;
2877 vcpu->vc_state = VCPU_STATE_STOPPED;
2878 vcpu->vc_vpid = 0;
2879 vcpu->vc_pvclock_system_gpa = 0;
2880 vcpu->vc_last_pcpu = NULL;
2881
2882 rw_init(&vcpu->vc_lock, "vcpu");
2883
2884 /* Shadow PAT MSR, starting with host's value. */
2885 vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT);
2886
2887 if (vmm_softc->mode == VMM_MODE_EPT)
2888 ret = vcpu_init_vmx(vcpu);
2889 else if (vmm_softc->mode == VMM_MODE_RVI)
2890 ret = vcpu_init_svm(vcpu, vcp);
2891 else
2892 panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode);
2893
2894 return (ret);
2895 }
2896
2897 /*
2898 * vcpu_deinit_vmx
2899 *
2900 * Deinitializes the vcpu described by 'vcpu'
2901 *
2902 * Parameters:
2903 * vcpu: the vcpu to be deinited
2904 */
2905 void
vcpu_deinit_vmx(struct vcpu * vcpu)2906 vcpu_deinit_vmx(struct vcpu *vcpu)
2907 {
2908 if (vcpu->vc_control_va) {
2909 km_free((void *)vcpu->vc_control_va, PAGE_SIZE,
2910 &kv_page, &kp_zero);
2911 vcpu->vc_control_va = 0;
2912 }
2913 if (vcpu->vc_vmx_msr_exit_save_va) {
2914 km_free((void *)vcpu->vc_vmx_msr_exit_save_va,
2915 PAGE_SIZE, &kv_page, &kp_zero);
2916 vcpu->vc_vmx_msr_exit_save_va = 0;
2917 }
2918 if (vcpu->vc_vmx_msr_exit_load_va) {
2919 km_free((void *)vcpu->vc_vmx_msr_exit_load_va,
2920 PAGE_SIZE, &kv_page, &kp_zero);
2921 vcpu->vc_vmx_msr_exit_load_va = 0;
2922 }
2923 #if 0
2924 if (vcpu->vc_vmx_msr_entry_load_va) {
2925 km_free((void *)vcpu->vc_vmx_msr_entry_load_va,
2926 PAGE_SIZE, &kv_page, &kp_zero);
2927 vcpu->vc_vmx_msr_entry_load_va = 0;
2928 }
2929 #endif
2930
2931 vmm_free_vpid(vcpu->vc_vpid);
2932 }
2933
2934 /*
2935 * vcpu_deinit_svm
2936 *
2937 * Deinitializes the vcpu described by 'vcpu'
2938 *
2939 * Parameters:
2940 * vcpu: the vcpu to be deinited
2941 */
2942 void
vcpu_deinit_svm(struct vcpu * vcpu)2943 vcpu_deinit_svm(struct vcpu *vcpu)
2944 {
2945 if (vcpu->vc_control_va) {
2946 km_free((void *)vcpu->vc_control_va, PAGE_SIZE, &kv_page,
2947 &kp_zero);
2948 vcpu->vc_control_va = 0;
2949 }
2950 if (vcpu->vc_msr_bitmap_va) {
2951 km_free((void *)vcpu->vc_msr_bitmap_va, 2 * PAGE_SIZE, &kv_any,
2952 &vmm_kp_contig);
2953 vcpu->vc_msr_bitmap_va = 0;
2954 }
2955 if (vcpu->vc_svm_hsa_va) {
2956 km_free((void *)vcpu->vc_svm_hsa_va, PAGE_SIZE, &kv_page,
2957 &kp_zero);
2958 vcpu->vc_svm_hsa_va = 0;
2959 }
2960 if (vcpu->vc_svm_ioio_va) {
2961 km_free((void *)vcpu->vc_svm_ioio_va, 3 * PAGE_SIZE, &kv_any,
2962 &vmm_kp_contig);
2963 vcpu->vc_svm_ioio_va = 0;
2964 }
2965
2966 vmm_free_vpid(vcpu->vc_vpid);
2967 }
2968
2969 /*
2970 * vcpu_deinit
2971 *
2972 * Calls the architecture-specific VCPU deinit routine
2973 *
2974 * Parameters:
2975 * vcpu: the vcpu to be deinited
2976 */
2977 void
vcpu_deinit(struct vcpu * vcpu)2978 vcpu_deinit(struct vcpu *vcpu)
2979 {
2980 if (vmm_softc->mode == VMM_MODE_EPT)
2981 vcpu_deinit_vmx(vcpu);
2982 else if (vmm_softc->mode == VMM_MODE_RVI)
2983 vcpu_deinit_svm(vcpu);
2984 else
2985 panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode);
2986 }
2987
2988 /*
2989 * vcpu_vmx_check_cap
2990 *
2991 * Checks if the 'cap' bit in the 'msr' MSR can be set or cleared (set = 1
2992 * or set = 0, respectively).
2993 *
2994 * When considering 'msr', we check to see if true controls are available,
2995 * and use those if so.
2996 *
2997 * Returns 1 of 'cap' can be set/cleared as requested, 0 otherwise.
2998 */
2999 int
vcpu_vmx_check_cap(struct vcpu * vcpu,uint32_t msr,uint32_t cap,int set)3000 vcpu_vmx_check_cap(struct vcpu *vcpu, uint32_t msr, uint32_t cap, int set)
3001 {
3002 uint64_t ctl;
3003
3004 if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL) {
3005 switch (msr) {
3006 case IA32_VMX_PINBASED_CTLS:
3007 ctl = vcpu->vc_vmx_true_pinbased_ctls;
3008 break;
3009 case IA32_VMX_PROCBASED_CTLS:
3010 ctl = vcpu->vc_vmx_true_procbased_ctls;
3011 break;
3012 case IA32_VMX_PROCBASED2_CTLS:
3013 ctl = vcpu->vc_vmx_procbased2_ctls;
3014 break;
3015 case IA32_VMX_ENTRY_CTLS:
3016 ctl = vcpu->vc_vmx_true_entry_ctls;
3017 break;
3018 case IA32_VMX_EXIT_CTLS:
3019 ctl = vcpu->vc_vmx_true_exit_ctls;
3020 break;
3021 default:
3022 return (0);
3023 }
3024 } else {
3025 switch (msr) {
3026 case IA32_VMX_PINBASED_CTLS:
3027 ctl = vcpu->vc_vmx_pinbased_ctls;
3028 break;
3029 case IA32_VMX_PROCBASED_CTLS:
3030 ctl = vcpu->vc_vmx_procbased_ctls;
3031 break;
3032 case IA32_VMX_PROCBASED2_CTLS:
3033 ctl = vcpu->vc_vmx_procbased2_ctls;
3034 break;
3035 case IA32_VMX_ENTRY_CTLS:
3036 ctl = vcpu->vc_vmx_entry_ctls;
3037 break;
3038 case IA32_VMX_EXIT_CTLS:
3039 ctl = vcpu->vc_vmx_exit_ctls;
3040 break;
3041 default:
3042 return (0);
3043 }
3044 }
3045
3046 if (set) {
3047 /* Check bit 'cap << 32', must be !0 */
3048 return (ctl & ((uint64_t)cap << 32)) != 0;
3049 } else {
3050 /* Check bit 'cap', must be 0 */
3051 return (ctl & cap) == 0;
3052 }
3053 }
3054
3055 /*
3056 * vcpu_vmx_compute_ctrl
3057 *
3058 * Computes the appropriate control value, given the supplied parameters
3059 * and CPU capabilities.
3060 *
3061 * Intel has made somewhat of a mess of this computation - it is described
3062 * using no fewer than three different approaches, spread across many
3063 * pages of the SDM. Further compounding the problem is the fact that now
3064 * we have "true controls" for each type of "control", and each needs to
3065 * be examined to get the calculation right, but only if "true" controls
3066 * are present on the CPU we're on.
3067 *
3068 * Parameters:
3069 * ctrlval: the control value, as read from the CPU MSR
3070 * ctrl: which control is being set (eg, pinbased, procbased, etc)
3071 * want0: the set of desired 0 bits
3072 * want1: the set of desired 1 bits
3073 * out: (out) the correct value to write into the VMCS for this VCPU,
3074 * for the 'ctrl' desired.
3075 *
3076 * Returns 0 if successful, or EINVAL if the supplied parameters define
3077 * an unworkable control setup.
3078 */
3079 int
vcpu_vmx_compute_ctrl(uint64_t ctrlval,uint16_t ctrl,uint32_t want1,uint32_t want0,uint32_t * out)3080 vcpu_vmx_compute_ctrl(uint64_t ctrlval, uint16_t ctrl, uint32_t want1,
3081 uint32_t want0, uint32_t *out)
3082 {
3083 int i, set, clear;
3084
3085 *out = 0;
3086
3087 /*
3088 * The Intel SDM gives three formulae for determining which bits to
3089 * set/clear for a given control and desired functionality. Formula
3090 * 1 is the simplest but disallows use of newer features that are
3091 * enabled by functionality in later CPUs.
3092 *
3093 * Formulas 2 and 3 allow such extra functionality. We use formula
3094 * 2 - this requires us to know the identity of controls in the
3095 * "default1" class for each control register, but allows us to not
3096 * have to pass along and/or query both sets of capability MSRs for
3097 * each control lookup. This makes the code slightly longer,
3098 * however.
3099 */
3100 for (i = 0; i < 32; i++) {
3101 /* Figure out if we can set and / or clear this bit */
3102 set = (ctrlval & (1ULL << (i + 32))) != 0;
3103 clear = ((1ULL << i) & ((uint64_t)ctrlval)) == 0;
3104
3105 /* If the bit can't be set nor cleared, something's wrong */
3106 if (!set && !clear)
3107 return (EINVAL);
3108
3109 /*
3110 * Formula 2.c.i - "If the relevant VMX capability MSR
3111 * reports that a control has a single setting, use that
3112 * setting."
3113 */
3114 if (set && !clear) {
3115 if (want0 & (1ULL << i))
3116 return (EINVAL);
3117 else
3118 *out |= (1ULL << i);
3119 } else if (clear && !set) {
3120 if (want1 & (1ULL << i))
3121 return (EINVAL);
3122 else
3123 *out &= ~(1ULL << i);
3124 } else {
3125 /*
3126 * 2.c.ii - "If the relevant VMX capability MSR
3127 * reports that a control can be set to 0 or 1
3128 * and that control's meaning is known to the VMM,
3129 * set the control based on the functionality desired."
3130 */
3131 if (want1 & (1ULL << i))
3132 *out |= (1ULL << i);
3133 else if (want0 & (1 << i))
3134 *out &= ~(1ULL << i);
3135 else {
3136 /*
3137 * ... assuming the control's meaning is not
3138 * known to the VMM ...
3139 *
3140 * 2.c.iii - "If the relevant VMX capability
3141 * MSR reports that a control can be set to 0
3142 * or 1 and the control is not in the default1
3143 * class, set the control to 0."
3144 *
3145 * 2.c.iv - "If the relevant VMX capability
3146 * MSR reports that a control can be set to 0
3147 * or 1 and the control is in the default1
3148 * class, set the control to 1."
3149 */
3150 switch (ctrl) {
3151 case IA32_VMX_PINBASED_CTLS:
3152 case IA32_VMX_TRUE_PINBASED_CTLS:
3153 /*
3154 * A.3.1 - default1 class of pinbased
3155 * controls comprises bits 1,2,4
3156 */
3157 switch (i) {
3158 case 1:
3159 case 2:
3160 case 4:
3161 *out |= (1ULL << i);
3162 break;
3163 default:
3164 *out &= ~(1ULL << i);
3165 break;
3166 }
3167 break;
3168 case IA32_VMX_PROCBASED_CTLS:
3169 case IA32_VMX_TRUE_PROCBASED_CTLS:
3170 /*
3171 * A.3.2 - default1 class of procbased
3172 * controls comprises bits 1, 4-6, 8,
3173 * 13-16, 26
3174 */
3175 switch (i) {
3176 case 1:
3177 case 4 ... 6:
3178 case 8:
3179 case 13 ... 16:
3180 case 26:
3181 *out |= (1ULL << i);
3182 break;
3183 default:
3184 *out &= ~(1ULL << i);
3185 break;
3186 }
3187 break;
3188 /*
3189 * Unknown secondary procbased controls
3190 * can always be set to 0
3191 */
3192 case IA32_VMX_PROCBASED2_CTLS:
3193 *out &= ~(1ULL << i);
3194 break;
3195 case IA32_VMX_EXIT_CTLS:
3196 case IA32_VMX_TRUE_EXIT_CTLS:
3197 /*
3198 * A.4 - default1 class of exit
3199 * controls comprises bits 0-8, 10,
3200 * 11, 13, 14, 16, 17
3201 */
3202 switch (i) {
3203 case 0 ... 8:
3204 case 10 ... 11:
3205 case 13 ... 14:
3206 case 16 ... 17:
3207 *out |= (1ULL << i);
3208 break;
3209 default:
3210 *out &= ~(1ULL << i);
3211 break;
3212 }
3213 break;
3214 case IA32_VMX_ENTRY_CTLS:
3215 case IA32_VMX_TRUE_ENTRY_CTLS:
3216 /*
3217 * A.5 - default1 class of entry
3218 * controls comprises bits 0-8, 12
3219 */
3220 switch (i) {
3221 case 0 ... 8:
3222 case 12:
3223 *out |= (1ULL << i);
3224 break;
3225 default:
3226 *out &= ~(1ULL << i);
3227 break;
3228 }
3229 break;
3230 }
3231 }
3232 }
3233 }
3234
3235 return (0);
3236 }
3237
3238 /*
3239 * vm_run
3240 *
3241 * Run the vm / vcpu specified by 'vrp'
3242 *
3243 * Parameters:
3244 * vrp: structure defining the VM to run
3245 *
3246 * Return value:
3247 * ENOENT: the VM defined in 'vrp' could not be located
3248 * EBUSY: the VM defined in 'vrp' is already running
3249 * EFAULT: error copying data from userspace (vmd) on return from previous
3250 * exit.
3251 * EAGAIN: help is needed from vmd(8) (device I/O or exit vmm(4) cannot
3252 * handle in-kernel.)
3253 * 0: the run loop exited and no help is needed from vmd(8)
3254 */
3255 int
vm_run(struct vm_run_params * vrp)3256 vm_run(struct vm_run_params *vrp)
3257 {
3258 struct vm *vm;
3259 struct vcpu *vcpu;
3260 int ret = 0;
3261 u_int old, next;
3262
3263 /*
3264 * Find desired VM
3265 */
3266 ret = vm_find(vrp->vrp_vm_id, &vm);
3267 if (ret)
3268 return (ret);
3269
3270 vcpu = vm_find_vcpu(vm, vrp->vrp_vcpu_id);
3271 if (vcpu == NULL) {
3272 ret = ENOENT;
3273 goto out;
3274 }
3275
3276 /*
3277 * Attempt to transition from VCPU_STATE_STOPPED -> VCPU_STATE_RUNNING.
3278 * Failure to make the transition indicates the VCPU is busy.
3279 */
3280 rw_enter_write(&vcpu->vc_lock);
3281 old = VCPU_STATE_STOPPED;
3282 next = VCPU_STATE_RUNNING;
3283 if (atomic_cas_uint(&vcpu->vc_state, old, next) != old) {
3284 ret = EBUSY;
3285 goto out_unlock;
3286 }
3287
3288 /*
3289 * We may be returning from userland helping us from the last
3290 * exit. Copy in the exit data from vmd. The exit data will be
3291 * consumed before the next entry (this typically comprises
3292 * VCPU register changes as the result of vmd(8)'s actions).
3293 */
3294 ret = copyin(vrp->vrp_exit, &vcpu->vc_exit, sizeof(struct vm_exit));
3295 if (ret)
3296 goto out_unlock;
3297
3298 vcpu->vc_inject.vie_type = vrp->vrp_inject.vie_type;
3299 vcpu->vc_inject.vie_vector = vrp->vrp_inject.vie_vector;
3300 vcpu->vc_inject.vie_errorcode = vrp->vrp_inject.vie_errorcode;
3301
3302 WRITE_ONCE(vcpu->vc_curcpu, curcpu());
3303 /* Run the VCPU specified in vrp */
3304 if (vcpu->vc_virt_mode == VMM_MODE_EPT) {
3305 ret = vcpu_run_vmx(vcpu, vrp);
3306 } else if (vcpu->vc_virt_mode == VMM_MODE_RVI) {
3307 ret = vcpu_run_svm(vcpu, vrp);
3308 }
3309 WRITE_ONCE(vcpu->vc_curcpu, NULL);
3310
3311 if (ret == 0 || ret == EAGAIN) {
3312 /* If we are exiting, populate exit data so vmd can help. */
3313 vrp->vrp_exit_reason = (ret == 0) ? VM_EXIT_NONE
3314 : vcpu->vc_gueststate.vg_exit_reason;
3315 vrp->vrp_irqready = vcpu->vc_irqready;
3316 vcpu->vc_state = VCPU_STATE_STOPPED;
3317
3318 if (copyout(&vcpu->vc_exit, vrp->vrp_exit,
3319 sizeof(struct vm_exit)) == EFAULT) {
3320 ret = EFAULT;
3321 } else
3322 ret = 0;
3323 } else {
3324 vrp->vrp_exit_reason = VM_EXIT_TERMINATED;
3325 vcpu->vc_state = VCPU_STATE_TERMINATED;
3326 }
3327 out_unlock:
3328 rw_exit_write(&vcpu->vc_lock);
3329 out:
3330 refcnt_rele_wake(&vm->vm_refcnt);
3331 return (ret);
3332 }
3333
3334 /*
3335 * vmm_fpurestore
3336 *
3337 * Restore the guest's FPU state, saving the existing userland thread's
3338 * FPU context if necessary. Must be called with interrupts disabled.
3339 */
3340 int
vmm_fpurestore(struct vcpu * vcpu)3341 vmm_fpurestore(struct vcpu *vcpu)
3342 {
3343 struct cpu_info *ci = curcpu();
3344
3345 rw_assert_wrlock(&vcpu->vc_lock);
3346
3347 /* save vmm's FPU state if we haven't already */
3348 if (ci->ci_pflags & CPUPF_USERXSTATE) {
3349 ci->ci_pflags &= ~CPUPF_USERXSTATE;
3350 fpusavereset(&curproc->p_addr->u_pcb.pcb_savefpu);
3351 }
3352
3353 if (vcpu->vc_fpuinited)
3354 xrstor_kern(&vcpu->vc_g_fpu, xsave_mask);
3355
3356 if (xsave_mask) {
3357 /* Restore guest %xcr0 */
3358 if (xsetbv_user(0, vcpu->vc_gueststate.vg_xcr0)) {
3359 DPRINTF("%s: guest attempted to set invalid bits in "
3360 "xcr0 (guest %%xcr0=0x%llx, host %%xcr0=0x%llx)\n",
3361 __func__, vcpu->vc_gueststate.vg_xcr0, xsave_mask);
3362 return EINVAL;
3363 }
3364 }
3365
3366 return 0;
3367 }
3368
3369 /*
3370 * vmm_fpusave
3371 *
3372 * Save the guest's FPU state. Must be called with interrupts disabled.
3373 */
3374 void
vmm_fpusave(struct vcpu * vcpu)3375 vmm_fpusave(struct vcpu *vcpu)
3376 {
3377 rw_assert_wrlock(&vcpu->vc_lock);
3378
3379 if (xsave_mask) {
3380 /* Save guest %xcr0 */
3381 vcpu->vc_gueststate.vg_xcr0 = xgetbv(0);
3382
3383 /* Restore host %xcr0 */
3384 xsetbv(0, xsave_mask & XFEATURE_XCR0_MASK);
3385 }
3386
3387 /*
3388 * Save full copy of FPU state - guest content is always
3389 * a subset of host's save area (see xsetbv exit handler)
3390 */
3391 fpusavereset(&vcpu->vc_g_fpu);
3392 vcpu->vc_fpuinited = 1;
3393 }
3394
3395 /*
3396 * vmm_translate_gva
3397 *
3398 * Translates a guest virtual address to a guest physical address by walking
3399 * the currently active page table (if needed).
3400 *
3401 * Note - this function can possibly alter the supplied VCPU state.
3402 * Specifically, it may inject exceptions depending on the current VCPU
3403 * configuration, and may alter %cr2 on #PF. Consequently, this function
3404 * should only be used as part of instruction emulation.
3405 *
3406 * Parameters:
3407 * vcpu: The VCPU this translation should be performed for (guest MMU settings
3408 * are gathered from this VCPU)
3409 * va: virtual address to translate
3410 * pa: pointer to paddr_t variable that will receive the translated physical
3411 * address. 'pa' is unchanged on error.
3412 * mode: one of PROT_READ, PROT_WRITE, PROT_EXEC indicating the mode in which
3413 * the address should be translated
3414 *
3415 * Return values:
3416 * 0: the address was successfully translated - 'pa' contains the physical
3417 * address currently mapped by 'va'.
3418 * EFAULT: the PTE for 'VA' is unmapped. A #PF will be injected in this case
3419 * and %cr2 set in the vcpu structure.
3420 * EINVAL: an error occurred reading paging table structures
3421 */
3422 int
vmm_translate_gva(struct vcpu * vcpu,uint64_t va,uint64_t * pa,int mode)3423 vmm_translate_gva(struct vcpu *vcpu, uint64_t va, uint64_t *pa, int mode)
3424 {
3425 int level, shift, pdidx;
3426 uint64_t pte, pt_paddr, pte_paddr, mask, low_mask, high_mask;
3427 uint64_t shift_width, pte_size, *hva;
3428 paddr_t hpa;
3429 struct vcpu_reg_state vrs;
3430
3431 level = 0;
3432
3433 if (vmm_softc->mode == VMM_MODE_EPT) {
3434 if (vcpu_readregs_vmx(vcpu, VM_RWREGS_ALL, 1, &vrs))
3435 return (EINVAL);
3436 } else if (vmm_softc->mode == VMM_MODE_RVI) {
3437 if (vcpu_readregs_svm(vcpu, VM_RWREGS_ALL, &vrs))
3438 return (EINVAL);
3439 } else {
3440 printf("%s: unknown vmm mode", __func__);
3441 return (EINVAL);
3442 }
3443
3444 DPRINTF("%s: guest %%cr0=0x%llx, %%cr3=0x%llx\n", __func__,
3445 vrs.vrs_crs[VCPU_REGS_CR0], vrs.vrs_crs[VCPU_REGS_CR3]);
3446
3447 if (!(vrs.vrs_crs[VCPU_REGS_CR0] & CR0_PG)) {
3448 DPRINTF("%s: unpaged, va=pa=0x%llx\n", __func__,
3449 va);
3450 *pa = va;
3451 return (0);
3452 }
3453
3454 pt_paddr = vrs.vrs_crs[VCPU_REGS_CR3];
3455
3456 if (vrs.vrs_crs[VCPU_REGS_CR0] & CR0_PE) {
3457 if (vrs.vrs_crs[VCPU_REGS_CR4] & CR4_PAE) {
3458 pte_size = sizeof(uint64_t);
3459 shift_width = 9;
3460
3461 if (vrs.vrs_msrs[VCPU_REGS_EFER] & EFER_LMA) {
3462 level = 4;
3463 mask = L4_MASK;
3464 shift = L4_SHIFT;
3465 } else {
3466 level = 3;
3467 mask = L3_MASK;
3468 shift = L3_SHIFT;
3469 }
3470 } else {
3471 level = 2;
3472 shift_width = 10;
3473 mask = 0xFFC00000;
3474 shift = 22;
3475 pte_size = sizeof(uint32_t);
3476 }
3477 } else {
3478 return (EINVAL);
3479 }
3480
3481 DPRINTF("%s: pte size=%lld level=%d mask=0x%llx, shift=%d, "
3482 "shift_width=%lld\n", __func__, pte_size, level, mask, shift,
3483 shift_width);
3484
3485 /* XXX: Check for R bit in segment selector and set A bit */
3486
3487 for (;level > 0; level--) {
3488 pdidx = (va & mask) >> shift;
3489 pte_paddr = (pt_paddr) + (pdidx * pte_size);
3490
3491 DPRINTF("%s: read pte level %d @ GPA 0x%llx\n", __func__,
3492 level, pte_paddr);
3493 if (!pmap_extract(vcpu->vc_parent->vm_map->pmap, pte_paddr,
3494 &hpa)) {
3495 DPRINTF("%s: cannot extract HPA for GPA 0x%llx\n",
3496 __func__, pte_paddr);
3497 return (EINVAL);
3498 }
3499
3500 hpa = hpa | (pte_paddr & 0xFFF);
3501 hva = (uint64_t *)PMAP_DIRECT_MAP(hpa);
3502 DPRINTF("%s: GPA 0x%llx -> HPA 0x%llx -> HVA 0x%llx\n",
3503 __func__, pte_paddr, (uint64_t)hpa, (uint64_t)hva);
3504 if (pte_size == 8)
3505 pte = *hva;
3506 else
3507 pte = *(uint32_t *)hva;
3508
3509 DPRINTF("%s: PTE @ 0x%llx = 0x%llx\n", __func__, pte_paddr,
3510 pte);
3511
3512 /* XXX: Set CR2 */
3513 if (!(pte & PG_V))
3514 return (EFAULT);
3515
3516 /* XXX: Check for SMAP */
3517 if ((mode == PROT_WRITE) && !(pte & PG_RW))
3518 return (EPERM);
3519
3520 if ((vcpu->vc_exit.cpl > 0) && !(pte & PG_u))
3521 return (EPERM);
3522
3523 pte = pte | PG_U;
3524 if (mode == PROT_WRITE)
3525 pte = pte | PG_M;
3526 *hva = pte;
3527
3528 /* XXX: EINVAL if in 32bit and PG_PS is 1 but CR4.PSE is 0 */
3529 if (pte & PG_PS)
3530 break;
3531
3532 if (level > 1) {
3533 pt_paddr = pte & PG_FRAME;
3534 shift -= shift_width;
3535 mask = mask >> shift_width;
3536 }
3537 }
3538
3539 low_mask = ((uint64_t)1ULL << shift) - 1;
3540 high_mask = (((uint64_t)1ULL << ((pte_size * 8) - 1)) - 1) ^ low_mask;
3541 *pa = (pte & high_mask) | (va & low_mask);
3542
3543 DPRINTF("%s: final GPA for GVA 0x%llx = 0x%llx\n", __func__,
3544 va, *pa);
3545
3546 return (0);
3547 }
3548
3549
3550 /*
3551 * vcpu_run_vmx
3552 *
3553 * VMX main loop used to run a VCPU.
3554 *
3555 * Parameters:
3556 * vcpu: The VCPU to run
3557 * vrp: run parameters
3558 *
3559 * Return values:
3560 * 0: The run loop exited and no help is needed from vmd
3561 * EAGAIN: The run loop exited and help from vmd is needed
3562 * EINVAL: an error occurred
3563 */
3564 int
vcpu_run_vmx(struct vcpu * vcpu,struct vm_run_params * vrp)3565 vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp)
3566 {
3567 int ret = 0, exitinfo;
3568 struct region_descriptor gdt;
3569 struct cpu_info *ci = NULL;
3570 uint64_t exit_reason, cr3, msr, insn_error;
3571 struct schedstate_percpu *spc;
3572 struct vmx_msr_store *msr_store;
3573 struct vmx_invvpid_descriptor vid;
3574 struct vmx_invept_descriptor vid_ept;
3575 uint64_t cr0, eii, procbased, int_st;
3576 u_long s;
3577
3578 rw_assert_wrlock(&vcpu->vc_lock);
3579
3580 if (vcpu_reload_vmcs_vmx(vcpu)) {
3581 printf("%s: failed (re)loading vmcs\n", __func__);
3582 return (EINVAL);
3583 }
3584
3585 /*
3586 * If we are returning from userspace (vmd) because we exited
3587 * last time, fix up any needed vcpu state first. Which state
3588 * needs to be fixed up depends on what vmd populated in the
3589 * exit data structure.
3590 */
3591 if (vrp->vrp_intr_pending)
3592 vcpu->vc_intr = 1;
3593 else
3594 vcpu->vc_intr = 0;
3595
3596 switch (vcpu->vc_gueststate.vg_exit_reason) {
3597 case VMX_EXIT_IO:
3598 if (vcpu->vc_exit.vei.vei_dir == VEI_DIR_IN)
3599 vcpu->vc_gueststate.vg_rax = vcpu->vc_exit.vei.vei_data;
3600 vcpu->vc_gueststate.vg_rip =
3601 vcpu->vc_exit.vrs.vrs_gprs[VCPU_REGS_RIP];
3602 if (vmwrite(VMCS_GUEST_IA32_RIP, vcpu->vc_gueststate.vg_rip)) {
3603 printf("%s: failed to update rip\n", __func__);
3604 return (EINVAL);
3605 }
3606 break;
3607 case VMX_EXIT_EPT_VIOLATION:
3608 ret = vcpu_writeregs_vmx(vcpu, VM_RWREGS_GPRS, 0,
3609 &vcpu->vc_exit.vrs);
3610 if (ret) {
3611 printf("%s: vm %d vcpu %d failed to update registers\n",
3612 __func__, vcpu->vc_parent->vm_id, vcpu->vc_id);
3613 return (EINVAL);
3614 }
3615 break;
3616 }
3617 memset(&vcpu->vc_exit, 0, sizeof(vcpu->vc_exit));
3618
3619 /* Handle vmd(8) injected interrupts */
3620 /* Is there an interrupt pending injection? */
3621 if (vcpu->vc_inject.vie_type == VCPU_INJECT_INTR) {
3622 if (vmread(VMCS_GUEST_INTERRUPTIBILITY_ST, &int_st)) {
3623 printf("%s: can't get interruptibility state\n",
3624 __func__);
3625 return (EINVAL);
3626 }
3627
3628 /* Interruptibility state 0x3 covers NMIs and STI */
3629 if (!(int_st & 0x3) && vcpu->vc_irqready) {
3630 eii = (uint64_t)vcpu->vc_inject.vie_vector;
3631 eii |= (1ULL << 31); /* Valid */
3632 if (vmwrite(VMCS_ENTRY_INTERRUPTION_INFO, eii)) {
3633 printf("vcpu_run_vmx: can't vector "
3634 "interrupt to guest\n");
3635 return (EINVAL);
3636 }
3637
3638 vcpu->vc_inject.vie_type = VCPU_INJECT_NONE;
3639 }
3640 } else if (!vcpu->vc_intr) {
3641 /*
3642 * Disable window exiting
3643 */
3644 if (vmread(VMCS_PROCBASED_CTLS, &procbased)) {
3645 printf("%s: can't read procbased ctls on exit\n",
3646 __func__);
3647 return (EINVAL);
3648 } else {
3649 procbased &= ~IA32_VMX_INTERRUPT_WINDOW_EXITING;
3650 if (vmwrite(VMCS_PROCBASED_CTLS, procbased)) {
3651 printf("%s: can't write procbased ctls "
3652 "on exit\n", __func__);
3653 return (EINVAL);
3654 }
3655 }
3656 }
3657
3658 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_load_va;
3659 while (ret == 0) {
3660 #ifdef VMM_DEBUG
3661 paddr_t pa = 0ULL;
3662 vmptrst(&pa);
3663 KASSERT(pa == vcpu->vc_control_pa);
3664 #endif /* VMM_DEBUG */
3665
3666 vmm_update_pvclock(vcpu);
3667
3668 if (ci != curcpu()) {
3669 ci = curcpu();
3670 vcpu->vc_last_pcpu = ci;
3671
3672 /* We're now using this vcpu's EPT pmap on this cpu. */
3673 atomic_swap_ptr(&ci->ci_ept_pmap,
3674 vcpu->vc_parent->vm_map->pmap);
3675
3676 /* Invalidate EPT cache. */
3677 vid_ept.vid_reserved = 0;
3678 vid_ept.vid_eptp = vcpu->vc_parent->vm_map->pmap->eptp;
3679 if (invept(ci->ci_vmm_cap.vcc_vmx.vmx_invept_mode,
3680 &vid_ept)) {
3681 printf("%s: invept\n", __func__);
3682 return (EINVAL);
3683 }
3684
3685 /* Host CR3 */
3686 cr3 = rcr3();
3687 if (vmwrite(VMCS_HOST_IA32_CR3, cr3)) {
3688 printf("%s: vmwrite(0x%04X, 0x%llx)\n", __func__,
3689 VMCS_HOST_IA32_CR3, cr3);
3690 return (EINVAL);
3691 }
3692
3693 setregion(&gdt, ci->ci_gdt, GDT_SIZE - 1);
3694 if (gdt.rd_base == 0) {
3695 printf("%s: setregion\n", __func__);
3696 return (EINVAL);
3697 }
3698
3699 /* Host GDTR base */
3700 if (vmwrite(VMCS_HOST_IA32_GDTR_BASE, gdt.rd_base)) {
3701 printf("%s: vmwrite(0x%04X, 0x%llx)\n",
3702 __func__, VMCS_HOST_IA32_GDTR_BASE,
3703 gdt.rd_base);
3704 return (EINVAL);
3705 }
3706
3707 /* Host TR base */
3708 if (vmwrite(VMCS_HOST_IA32_TR_BASE,
3709 (uint64_t)ci->ci_tss)) {
3710 printf("%s: vmwrite(0x%04X, 0x%llx)\n",
3711 __func__, VMCS_HOST_IA32_TR_BASE,
3712 (uint64_t)ci->ci_tss);
3713 return (EINVAL);
3714 }
3715
3716 /* Host GS.base (aka curcpu) */
3717 if (vmwrite(VMCS_HOST_IA32_GS_BASE, (uint64_t)ci)) {
3718 printf("%s: vmwrite(0x%04X, 0x%llx)\n",
3719 __func__, VMCS_HOST_IA32_GS_BASE,
3720 (uint64_t)ci);
3721 return (EINVAL);
3722 }
3723
3724 /* Host FS.base */
3725 msr = rdmsr(MSR_FSBASE);
3726 if (vmwrite(VMCS_HOST_IA32_FS_BASE, msr)) {
3727 printf("%s: vmwrite(0x%04X, 0x%llx)\n",
3728 __func__, VMCS_HOST_IA32_FS_BASE, msr);
3729 return (EINVAL);
3730 }
3731
3732 /* Host KernelGS.base (userspace GS.base here) */
3733 msr_store[VCPU_HOST_REGS_KGSBASE].vms_data =
3734 rdmsr(MSR_KERNELGSBASE);
3735 }
3736
3737 /* Inject event if present */
3738 if (vcpu->vc_inject.vie_type == VCPU_INJECT_EX) {
3739 eii = (uint64_t)vcpu->vc_inject.vie_vector;
3740 eii |= (1ULL << 31); /* Valid */
3741
3742 switch (vcpu->vc_inject.vie_vector) {
3743 case VMM_EX_BP:
3744 case VMM_EX_OF:
3745 /* Software Exceptions */
3746 eii |= (4ULL << 8);
3747 break;
3748 case VMM_EX_DF:
3749 case VMM_EX_TS:
3750 case VMM_EX_NP:
3751 case VMM_EX_SS:
3752 case VMM_EX_GP:
3753 case VMM_EX_PF:
3754 case VMM_EX_AC:
3755 /* Hardware Exceptions */
3756 eii |= (3ULL << 8);
3757 cr0 = 0;
3758 if (vmread(VMCS_GUEST_IA32_CR0, &cr0)) {
3759 printf("%s: vmread(VMCS_GUEST_IA32_CR0)"
3760 "\n", __func__);
3761 ret = EINVAL;
3762 break;
3763 }
3764
3765 /* Don't set error codes if in real mode. */
3766 if (ret == EINVAL || !(cr0 & CR0_PE))
3767 break;
3768 eii |= (1ULL << 11);
3769
3770 /* Enforce a 0 error code for #AC. */
3771 if (vcpu->vc_inject.vie_vector == VMM_EX_AC)
3772 vcpu->vc_inject.vie_errorcode = 0;
3773 /*
3774 * XXX: Intel SDM says if IA32_VMX_BASIC[56] is
3775 * set, error codes can be injected for hw
3776 * exceptions with or without error code,
3777 * regardless of vector. See Vol 3D. A1. Ignore
3778 * this capability for now.
3779 */
3780 if (vmwrite(VMCS_ENTRY_EXCEPTION_ERROR_CODE,
3781 vcpu->vc_inject.vie_errorcode)) {
3782 printf("%s: can't write error code to "
3783 "guest\n", __func__);
3784 ret = EINVAL;
3785 }
3786 } /* switch */
3787 if (ret == EINVAL)
3788 break;
3789
3790 if (vmwrite(VMCS_ENTRY_INTERRUPTION_INFO, eii)) {
3791 printf("%s: can't vector event to guest\n",
3792 __func__);
3793 ret = EINVAL;
3794 break;
3795 }
3796 vcpu->vc_inject.vie_type = VCPU_INJECT_NONE;
3797 }
3798
3799 if (vcpu->vc_vmx_vpid_enabled) {
3800 /* Invalidate old TLB mappings */
3801 vid.vid_vpid = vcpu->vc_vpid;
3802 vid.vid_addr = 0;
3803 invvpid(IA32_VMX_INVVPID_SINGLE_CTX_GLB, &vid);
3804 }
3805
3806 /* Start / resume the VCPU */
3807
3808 /* Disable interrupts and save the current host FPU state. */
3809 s = intr_disable();
3810 if ((ret = vmm_fpurestore(vcpu))) {
3811 intr_restore(s);
3812 break;
3813 }
3814
3815 TRACEPOINT(vmm, guest_enter, vcpu, vrp);
3816
3817 /*
3818 * If we're resuming to a different VCPU and have IBPB,
3819 * then use it to prevent cross-VM branch-target injection.
3820 */
3821 if (ci->ci_guest_vcpu != vcpu &&
3822 (ci->ci_feature_sefflags_edx & SEFF0EDX_IBRS)) {
3823 wrmsr(MSR_PRED_CMD, PRED_CMD_IBPB);
3824 ci->ci_guest_vcpu = vcpu;
3825 }
3826
3827 /* Restore any guest PKRU state. */
3828 if (vmm_softc->sc_md.pkru_enabled)
3829 wrpkru(0, vcpu->vc_pkru);
3830
3831 ret = vmx_enter_guest(&vcpu->vc_control_pa,
3832 &vcpu->vc_gueststate,
3833 (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED),
3834 ci->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr);
3835
3836 /* Restore host PKRU state. */
3837 if (vmm_softc->sc_md.pkru_enabled) {
3838 vcpu->vc_pkru = rdpkru(0);
3839 wrpkru(0, PGK_VALUE);
3840 }
3841
3842 /*
3843 * VM exit restores the GDT and IDT bases, but gives
3844 * them high limits. Reload with the correct limits here.
3845 * 'gdt' is set above first time through and reset there
3846 * whenever this thread switches CPU.
3847 */
3848 bare_lgdt(&gdt);
3849 cpu_init_idt();
3850
3851 /*
3852 * On exit, interrupts are disabled, and we are running with
3853 * the guest FPU state still possibly on the CPU. Save the FPU
3854 * state before re-enabling interrupts.
3855 */
3856 vmm_fpusave(vcpu);
3857 intr_restore(s);
3858
3859 atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_LAUNCHED);
3860 exit_reason = VM_EXIT_NONE;
3861
3862 /* If we exited successfully ... */
3863 if (ret == 0) {
3864 exitinfo = vmx_get_exit_info(
3865 &vcpu->vc_gueststate.vg_rip, &exit_reason);
3866 if (!(exitinfo & VMX_EXIT_INFO_HAVE_RIP)) {
3867 printf("%s: cannot read guest rip\n", __func__);
3868 ret = EINVAL;
3869 break;
3870 }
3871 if (!(exitinfo & VMX_EXIT_INFO_HAVE_REASON)) {
3872 printf("%s: can't read exit reason\n",
3873 __func__);
3874 ret = EINVAL;
3875 break;
3876 }
3877 vcpu->vc_gueststate.vg_exit_reason = exit_reason;
3878 TRACEPOINT(vmm, guest_exit, vcpu, vrp, exit_reason);
3879
3880 /* Update our state */
3881 if (vmread(VMCS_GUEST_IA32_RFLAGS,
3882 &vcpu->vc_gueststate.vg_rflags)) {
3883 printf("%s: can't read guest rflags during "
3884 "exit\n", __func__);
3885 ret = EINVAL;
3886 break;
3887 }
3888
3889 /*
3890 * Handle the exit. This will alter "ret" to EAGAIN if
3891 * the exit handler determines help from vmd is needed.
3892 */
3893 ret = vmx_handle_exit(vcpu);
3894
3895 if (vcpu->vc_gueststate.vg_rflags & PSL_I)
3896 vcpu->vc_irqready = 1;
3897 else
3898 vcpu->vc_irqready = 0;
3899
3900 /*
3901 * If not ready for interrupts, but interrupts pending,
3902 * enable interrupt window exiting.
3903 */
3904 if (vcpu->vc_irqready == 0 && vcpu->vc_intr) {
3905 if (vmread(VMCS_PROCBASED_CTLS, &procbased)) {
3906 printf("%s: can't read procbased ctls "
3907 "on intwin exit\n", __func__);
3908 ret = EINVAL;
3909 break;
3910 }
3911
3912 procbased |= IA32_VMX_INTERRUPT_WINDOW_EXITING;
3913 if (vmwrite(VMCS_PROCBASED_CTLS, procbased)) {
3914 printf("%s: can't write procbased ctls "
3915 "on intwin exit\n", __func__);
3916 ret = EINVAL;
3917 break;
3918 }
3919 }
3920
3921 /*
3922 * Exit to vmd if we are terminating, failed to enter,
3923 * or need help (device I/O)
3924 */
3925 if (ret || vcpu_must_stop(vcpu))
3926 break;
3927
3928 if (vcpu->vc_intr && vcpu->vc_irqready) {
3929 ret = EAGAIN;
3930 break;
3931 }
3932
3933 /* Check if we should yield - don't hog the {p,v}pu */
3934 spc = &ci->ci_schedstate;
3935 if (spc->spc_schedflags & SPCF_SHOULDYIELD)
3936 break;
3937
3938 } else {
3939 /*
3940 * We failed vmresume or vmlaunch for some reason,
3941 * typically due to invalid vmcs state or other
3942 * reasons documented in SDM Vol 3C 30.4.
3943 */
3944 switch (ret) {
3945 case VMX_FAIL_LAUNCH_INVALID_VMCS:
3946 printf("%s: failed %s with invalid vmcs\n",
3947 __func__,
3948 (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED
3949 ? "vmresume" : "vmlaunch"));
3950 break;
3951 case VMX_FAIL_LAUNCH_VALID_VMCS:
3952 printf("%s: failed %s with valid vmcs\n",
3953 __func__,
3954 (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED
3955 ? "vmresume" : "vmlaunch"));
3956 break;
3957 default:
3958 printf("%s: failed %s for unknown reason\n",
3959 __func__,
3960 (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED
3961 ? "vmresume" : "vmlaunch"));
3962 }
3963
3964 ret = EINVAL;
3965
3966 /* Try to translate a vmfail error code, if possible. */
3967 if (vmread(VMCS_INSTRUCTION_ERROR, &insn_error)) {
3968 printf("%s: can't read insn error field\n",
3969 __func__);
3970 } else
3971 printf("%s: error code = %lld, %s\n", __func__,
3972 insn_error,
3973 vmx_instruction_error_decode(insn_error));
3974 #ifdef VMM_DEBUG
3975 vmx_vcpu_dump_regs(vcpu);
3976 dump_vcpu(vcpu);
3977 #endif /* VMM_DEBUG */
3978 }
3979 }
3980
3981 vcpu->vc_last_pcpu = curcpu();
3982
3983 /* Copy the VCPU register state to the exit structure */
3984 if (vcpu_readregs_vmx(vcpu, VM_RWREGS_ALL, 0, &vcpu->vc_exit.vrs))
3985 ret = EINVAL;
3986 vcpu->vc_exit.cpl = vmm_get_guest_cpu_cpl(vcpu);
3987
3988 return (ret);
3989 }
3990
3991 /*
3992 * vmx_handle_intr
3993 *
3994 * Handle host (external) interrupts. We read which interrupt fired by
3995 * extracting the vector from the VMCS and dispatch the interrupt directly
3996 * to the host using vmm_dispatch_intr.
3997 */
3998 void
vmx_handle_intr(struct vcpu * vcpu)3999 vmx_handle_intr(struct vcpu *vcpu)
4000 {
4001 uint8_t vec;
4002 uint64_t eii;
4003 struct gate_descriptor *idte;
4004 vaddr_t handler;
4005
4006 if (vmread(VMCS_EXIT_INTERRUPTION_INFO, &eii)) {
4007 printf("%s: can't obtain intr info\n", __func__);
4008 return;
4009 }
4010
4011 vec = eii & 0xFF;
4012
4013 /* XXX check "error valid" code in eii, abort if 0 */
4014 idte=&idt[vec];
4015 handler = idte->gd_looffset + ((uint64_t)idte->gd_hioffset << 16);
4016 vmm_dispatch_intr(handler);
4017 }
4018
4019 /*
4020 * svm_handle_hlt
4021 *
4022 * Handle HLT exits
4023 *
4024 * Parameters
4025 * vcpu: The VCPU that executed the HLT instruction
4026 *
4027 * Return Values:
4028 * EIO: The guest halted with interrupts disabled
4029 * EAGAIN: Normal return to vmd - vmd should halt scheduling this VCPU
4030 * until a virtual interrupt is ready to inject
4031 */
4032 int
svm_handle_hlt(struct vcpu * vcpu)4033 svm_handle_hlt(struct vcpu *vcpu)
4034 {
4035 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
4036 uint64_t rflags = vmcb->v_rflags;
4037
4038 /* All HLT insns are 1 byte */
4039 vcpu->vc_gueststate.vg_rip += 1;
4040
4041 if (!(rflags & PSL_I)) {
4042 DPRINTF("%s: guest halted with interrupts disabled\n",
4043 __func__);
4044 return (EIO);
4045 }
4046
4047 return (EAGAIN);
4048 }
4049
4050 /*
4051 * vmx_handle_hlt
4052 *
4053 * Handle HLT exits. HLTing the CPU with interrupts disabled will terminate
4054 * the guest (no NMIs handled) by returning EIO to vmd.
4055 *
4056 * Parameters:
4057 * vcpu: The VCPU that executed the HLT instruction
4058 *
4059 * Return Values:
4060 * EINVAL: An error occurred extracting information from the VMCS, or an
4061 * invalid HLT instruction was encountered
4062 * EIO: The guest halted with interrupts disabled
4063 * EAGAIN: Normal return to vmd - vmd should halt scheduling this VCPU
4064 * until a virtual interrupt is ready to inject
4065 *
4066 */
4067 int
vmx_handle_hlt(struct vcpu * vcpu)4068 vmx_handle_hlt(struct vcpu *vcpu)
4069 {
4070 uint64_t insn_length, rflags;
4071
4072 if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) {
4073 printf("%s: can't obtain instruction length\n", __func__);
4074 return (EINVAL);
4075 }
4076
4077 if (vmread(VMCS_GUEST_IA32_RFLAGS, &rflags)) {
4078 printf("%s: can't obtain guest rflags\n", __func__);
4079 return (EINVAL);
4080 }
4081
4082 if (insn_length != 1) {
4083 DPRINTF("%s: HLT with instruction length %lld not supported\n",
4084 __func__, insn_length);
4085 return (EINVAL);
4086 }
4087
4088 if (!(rflags & PSL_I)) {
4089 DPRINTF("%s: guest halted with interrupts disabled\n",
4090 __func__);
4091 return (EIO);
4092 }
4093
4094 vcpu->vc_gueststate.vg_rip += insn_length;
4095 return (EAGAIN);
4096 }
4097
4098 /*
4099 * vmx_get_exit_info
4100 *
4101 * Returns exit information containing the current guest RIP and exit reason
4102 * in rip and exit_reason. The return value is a bitmask indicating whether
4103 * reading the RIP and exit reason was successful.
4104 */
4105 int
vmx_get_exit_info(uint64_t * rip,uint64_t * exit_reason)4106 vmx_get_exit_info(uint64_t *rip, uint64_t *exit_reason)
4107 {
4108 int rv = 0;
4109
4110 if (vmread(VMCS_GUEST_IA32_RIP, rip) == 0) {
4111 rv |= VMX_EXIT_INFO_HAVE_RIP;
4112 if (vmread(VMCS_EXIT_REASON, exit_reason) == 0)
4113 rv |= VMX_EXIT_INFO_HAVE_REASON;
4114 }
4115 return (rv);
4116 }
4117
4118 /*
4119 * svm_handle_exit
4120 *
4121 * Handle exits from the VM by decoding the exit reason and calling various
4122 * subhandlers as needed.
4123 */
4124 int
svm_handle_exit(struct vcpu * vcpu)4125 svm_handle_exit(struct vcpu *vcpu)
4126 {
4127 uint64_t exit_reason, rflags;
4128 int update_rip, ret = 0;
4129 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
4130
4131 update_rip = 0;
4132 exit_reason = vcpu->vc_gueststate.vg_exit_reason;
4133 rflags = vcpu->vc_gueststate.vg_rflags;
4134
4135 switch (exit_reason) {
4136 case SVM_VMEXIT_VINTR:
4137 if (!(rflags & PSL_I)) {
4138 DPRINTF("%s: impossible interrupt window exit "
4139 "config\n", __func__);
4140 ret = EINVAL;
4141 break;
4142 }
4143
4144 /*
4145 * Guest is now ready for interrupts, so disable interrupt
4146 * window exiting.
4147 */
4148 vmcb->v_irq = 0;
4149 vmcb->v_intr_vector = 0;
4150 vmcb->v_intercept1 &= ~SVM_INTERCEPT_VINTR;
4151 svm_set_dirty(vcpu, SVM_CLEANBITS_TPR | SVM_CLEANBITS_I);
4152
4153 update_rip = 0;
4154 break;
4155 case SVM_VMEXIT_INTR:
4156 update_rip = 0;
4157 break;
4158 case SVM_VMEXIT_SHUTDOWN:
4159 update_rip = 0;
4160 ret = EAGAIN;
4161 break;
4162 case SVM_VMEXIT_NPF:
4163 ret = svm_handle_np_fault(vcpu);
4164 break;
4165 case SVM_VMEXIT_CPUID:
4166 ret = vmm_handle_cpuid(vcpu);
4167 update_rip = 1;
4168 break;
4169 case SVM_VMEXIT_MSR:
4170 ret = svm_handle_msr(vcpu);
4171 update_rip = 1;
4172 break;
4173 case SVM_VMEXIT_XSETBV:
4174 ret = svm_handle_xsetbv(vcpu);
4175 update_rip = 1;
4176 break;
4177 case SVM_VMEXIT_IOIO:
4178 if (svm_handle_inout(vcpu) == 0)
4179 ret = EAGAIN;
4180 break;
4181 case SVM_VMEXIT_HLT:
4182 ret = svm_handle_hlt(vcpu);
4183 update_rip = 1;
4184 break;
4185 case SVM_VMEXIT_MWAIT:
4186 case SVM_VMEXIT_MWAIT_CONDITIONAL:
4187 case SVM_VMEXIT_MONITOR:
4188 case SVM_VMEXIT_VMRUN:
4189 case SVM_VMEXIT_VMMCALL:
4190 case SVM_VMEXIT_VMLOAD:
4191 case SVM_VMEXIT_VMSAVE:
4192 case SVM_VMEXIT_STGI:
4193 case SVM_VMEXIT_CLGI:
4194 case SVM_VMEXIT_SKINIT:
4195 case SVM_VMEXIT_RDTSCP:
4196 case SVM_VMEXIT_ICEBP:
4197 case SVM_VMEXIT_INVLPGA:
4198 ret = vmm_inject_ud(vcpu);
4199 update_rip = 0;
4200 break;
4201 default:
4202 DPRINTF("%s: unhandled exit 0x%llx (pa=0x%llx)\n", __func__,
4203 exit_reason, (uint64_t)vcpu->vc_control_pa);
4204 return (EINVAL);
4205 }
4206
4207 if (update_rip) {
4208 vmcb->v_rip = vcpu->vc_gueststate.vg_rip;
4209
4210 if (rflags & PSL_T) {
4211 if (vmm_inject_db(vcpu)) {
4212 printf("%s: can't inject #DB exception to "
4213 "guest", __func__);
4214 return (EINVAL);
4215 }
4216 }
4217 }
4218
4219 /* Enable SVME in EFER (must always be set) */
4220 vmcb->v_efer |= EFER_SVME;
4221 svm_set_dirty(vcpu, SVM_CLEANBITS_CR);
4222
4223 return (ret);
4224 }
4225
4226 /*
4227 * vmx_handle_exit
4228 *
4229 * Handle exits from the VM by decoding the exit reason and calling various
4230 * subhandlers as needed.
4231 */
4232 int
vmx_handle_exit(struct vcpu * vcpu)4233 vmx_handle_exit(struct vcpu *vcpu)
4234 {
4235 uint64_t exit_reason, rflags, istate;
4236 int update_rip, ret = 0;
4237
4238 update_rip = 0;
4239 exit_reason = vcpu->vc_gueststate.vg_exit_reason;
4240 rflags = vcpu->vc_gueststate.vg_rflags;
4241
4242 switch (exit_reason) {
4243 case VMX_EXIT_INT_WINDOW:
4244 if (!(rflags & PSL_I)) {
4245 DPRINTF("%s: impossible interrupt window exit "
4246 "config\n", __func__);
4247 ret = EINVAL;
4248 break;
4249 }
4250
4251 ret = EAGAIN;
4252 update_rip = 0;
4253 break;
4254 case VMX_EXIT_EPT_VIOLATION:
4255 ret = vmx_handle_np_fault(vcpu);
4256 break;
4257 case VMX_EXIT_CPUID:
4258 ret = vmm_handle_cpuid(vcpu);
4259 update_rip = 1;
4260 break;
4261 case VMX_EXIT_IO:
4262 if (vmx_handle_inout(vcpu) == 0)
4263 ret = EAGAIN;
4264 break;
4265 case VMX_EXIT_EXTINT:
4266 vmx_handle_intr(vcpu);
4267 update_rip = 0;
4268 break;
4269 case VMX_EXIT_CR_ACCESS:
4270 ret = vmx_handle_cr(vcpu);
4271 update_rip = 1;
4272 break;
4273 case VMX_EXIT_HLT:
4274 ret = vmx_handle_hlt(vcpu);
4275 update_rip = 1;
4276 break;
4277 case VMX_EXIT_RDMSR:
4278 ret = vmx_handle_rdmsr(vcpu);
4279 update_rip = 1;
4280 break;
4281 case VMX_EXIT_WRMSR:
4282 ret = vmx_handle_wrmsr(vcpu);
4283 update_rip = 1;
4284 break;
4285 case VMX_EXIT_XSETBV:
4286 ret = vmx_handle_xsetbv(vcpu);
4287 update_rip = 1;
4288 break;
4289 case VMX_EXIT_MWAIT:
4290 case VMX_EXIT_MONITOR:
4291 case VMX_EXIT_VMXON:
4292 case VMX_EXIT_VMWRITE:
4293 case VMX_EXIT_VMREAD:
4294 case VMX_EXIT_VMLAUNCH:
4295 case VMX_EXIT_VMRESUME:
4296 case VMX_EXIT_VMPTRLD:
4297 case VMX_EXIT_VMPTRST:
4298 case VMX_EXIT_VMCLEAR:
4299 case VMX_EXIT_VMCALL:
4300 case VMX_EXIT_VMFUNC:
4301 case VMX_EXIT_VMXOFF:
4302 case VMX_EXIT_INVVPID:
4303 case VMX_EXIT_INVEPT:
4304 ret = vmm_inject_ud(vcpu);
4305 update_rip = 0;
4306 break;
4307 case VMX_EXIT_TRIPLE_FAULT:
4308 #ifdef VMM_DEBUG
4309 DPRINTF("%s: vm %d vcpu %d triple fault\n", __func__,
4310 vcpu->vc_parent->vm_id, vcpu->vc_id);
4311 vmx_vcpu_dump_regs(vcpu);
4312 dump_vcpu(vcpu);
4313 vmx_dump_vmcs(vcpu);
4314 #endif /* VMM_DEBUG */
4315 ret = EAGAIN;
4316 update_rip = 0;
4317 break;
4318 default:
4319 #ifdef VMM_DEBUG
4320 DPRINTF("%s: unhandled exit 0x%llx (%s)\n", __func__,
4321 exit_reason, vmx_exit_reason_decode(exit_reason));
4322 #endif /* VMM_DEBUG */
4323 return (EINVAL);
4324 }
4325
4326 if (update_rip) {
4327 if (vmwrite(VMCS_GUEST_IA32_RIP,
4328 vcpu->vc_gueststate.vg_rip)) {
4329 printf("%s: can't advance rip\n", __func__);
4330 return (EINVAL);
4331 }
4332
4333 if (vmread(VMCS_GUEST_INTERRUPTIBILITY_ST,
4334 &istate)) {
4335 printf("%s: can't read interruptibility state\n",
4336 __func__);
4337 return (EINVAL);
4338 }
4339
4340 /* Interruptibility state 0x3 covers NMIs and STI */
4341 istate &= ~0x3;
4342
4343 if (vmwrite(VMCS_GUEST_INTERRUPTIBILITY_ST,
4344 istate)) {
4345 printf("%s: can't write interruptibility state\n",
4346 __func__);
4347 return (EINVAL);
4348 }
4349
4350 if (rflags & PSL_T) {
4351 if (vmm_inject_db(vcpu)) {
4352 printf("%s: can't inject #DB exception to "
4353 "guest", __func__);
4354 return (EINVAL);
4355 }
4356 }
4357 }
4358
4359 return (ret);
4360 }
4361
4362 /*
4363 * vmm_inject_gp
4364 *
4365 * Injects an #GP exception into the guest VCPU.
4366 *
4367 * Parameters:
4368 * vcpu: vcpu to inject into
4369 *
4370 * Return values:
4371 * Always 0
4372 */
4373 int
vmm_inject_gp(struct vcpu * vcpu)4374 vmm_inject_gp(struct vcpu *vcpu)
4375 {
4376 DPRINTF("%s: injecting #GP at guest %%rip 0x%llx\n", __func__,
4377 vcpu->vc_gueststate.vg_rip);
4378 vcpu->vc_inject.vie_vector = VMM_EX_GP;
4379 vcpu->vc_inject.vie_type = VCPU_INJECT_EX;
4380 vcpu->vc_inject.vie_errorcode = 0;
4381
4382 return (0);
4383 }
4384
4385 /*
4386 * vmm_inject_ud
4387 *
4388 * Injects an #UD exception into the guest VCPU.
4389 *
4390 * Parameters:
4391 * vcpu: vcpu to inject into
4392 *
4393 * Return values:
4394 * Always 0
4395 */
4396 int
vmm_inject_ud(struct vcpu * vcpu)4397 vmm_inject_ud(struct vcpu *vcpu)
4398 {
4399 DPRINTF("%s: injecting #UD at guest %%rip 0x%llx\n", __func__,
4400 vcpu->vc_gueststate.vg_rip);
4401 vcpu->vc_inject.vie_vector = VMM_EX_UD;
4402 vcpu->vc_inject.vie_type = VCPU_INJECT_EX;
4403 vcpu->vc_inject.vie_errorcode = 0;
4404
4405 return (0);
4406 }
4407
4408 /*
4409 * vmm_inject_db
4410 *
4411 * Injects a #DB exception into the guest VCPU.
4412 *
4413 * Parameters:
4414 * vcpu: vcpu to inject into
4415 *
4416 * Return values:
4417 * Always 0
4418 */
4419 int
vmm_inject_db(struct vcpu * vcpu)4420 vmm_inject_db(struct vcpu *vcpu)
4421 {
4422 DPRINTF("%s: injecting #DB at guest %%rip 0x%llx\n", __func__,
4423 vcpu->vc_gueststate.vg_rip);
4424 vcpu->vc_inject.vie_vector = VMM_EX_DB;
4425 vcpu->vc_inject.vie_type = VCPU_INJECT_EX;
4426 vcpu->vc_inject.vie_errorcode = 0;
4427
4428 return (0);
4429 }
4430
4431 /*
4432 * vmm_get_guest_memtype
4433 *
4434 * Returns the type of memory 'gpa' refers to in the context of vm 'vm'
4435 */
4436 int
vmm_get_guest_memtype(struct vm * vm,paddr_t gpa)4437 vmm_get_guest_memtype(struct vm *vm, paddr_t gpa)
4438 {
4439 int i;
4440 struct vm_mem_range *vmr;
4441
4442 /* XXX Use binary search? */
4443 for (i = 0; i < vm->vm_nmemranges; i++) {
4444 vmr = &vm->vm_memranges[i];
4445
4446 /*
4447 * vm_memranges are ascending. gpa can no longer be in one of
4448 * the memranges
4449 */
4450 if (gpa < vmr->vmr_gpa)
4451 break;
4452
4453 if (gpa < vmr->vmr_gpa + vmr->vmr_size) {
4454 if (vmr->vmr_type == VM_MEM_MMIO)
4455 return (VMM_MEM_TYPE_MMIO);
4456 return (VMM_MEM_TYPE_REGULAR);
4457 }
4458 }
4459
4460 DPRINTF("guest memtype @ 0x%llx unknown\n", (uint64_t)gpa);
4461 return (VMM_MEM_TYPE_UNKNOWN);
4462 }
4463
4464 /*
4465 * vmx_get_exit_qualification
4466 *
4467 * Return the current VMCS' exit qualification information
4468 */
4469 int
vmx_get_exit_qualification(uint64_t * exit_qualification)4470 vmx_get_exit_qualification(uint64_t *exit_qualification)
4471 {
4472 if (vmread(VMCS_GUEST_EXIT_QUALIFICATION, exit_qualification)) {
4473 printf("%s: can't extract exit qual\n", __func__);
4474 return (EINVAL);
4475 }
4476
4477 return (0);
4478 }
4479
4480 /*
4481 * vmx_get_guest_faulttype
4482 *
4483 * Determines the type (R/W/X) of the last fault on the VCPU last run on
4484 * this PCPU.
4485 */
4486 int
vmx_get_guest_faulttype(void)4487 vmx_get_guest_faulttype(void)
4488 {
4489 uint64_t exit_qual;
4490 uint64_t presentmask = IA32_VMX_EPT_FAULT_WAS_READABLE |
4491 IA32_VMX_EPT_FAULT_WAS_WRITABLE | IA32_VMX_EPT_FAULT_WAS_EXECABLE;
4492 vm_prot_t prot, was_prot;
4493
4494 if (vmx_get_exit_qualification(&exit_qual))
4495 return (-1);
4496
4497 if ((exit_qual & presentmask) == 0)
4498 return VM_FAULT_INVALID;
4499
4500 was_prot = 0;
4501 if (exit_qual & IA32_VMX_EPT_FAULT_WAS_READABLE)
4502 was_prot |= PROT_READ;
4503 if (exit_qual & IA32_VMX_EPT_FAULT_WAS_WRITABLE)
4504 was_prot |= PROT_WRITE;
4505 if (exit_qual & IA32_VMX_EPT_FAULT_WAS_EXECABLE)
4506 was_prot |= PROT_EXEC;
4507
4508 prot = 0;
4509 if (exit_qual & IA32_VMX_EPT_FAULT_READ)
4510 prot = PROT_READ;
4511 else if (exit_qual & IA32_VMX_EPT_FAULT_WRITE)
4512 prot = PROT_WRITE;
4513 else if (exit_qual & IA32_VMX_EPT_FAULT_EXEC)
4514 prot = PROT_EXEC;
4515
4516 if ((was_prot & prot) == 0)
4517 return VM_FAULT_PROTECT;
4518
4519 return (-1);
4520 }
4521
4522 /*
4523 * svm_get_guest_faulttype
4524 *
4525 * Determines the type (R/W/X) of the last fault on the VCPU last run on
4526 * this PCPU.
4527 */
4528 int
svm_get_guest_faulttype(struct vmcb * vmcb)4529 svm_get_guest_faulttype(struct vmcb *vmcb)
4530 {
4531 if (!(vmcb->v_exitinfo1 & 0x1))
4532 return VM_FAULT_INVALID;
4533 return VM_FAULT_PROTECT;
4534 }
4535
4536 /*
4537 * svm_fault_page
4538 *
4539 * Request a new page to be faulted into the UVM map of the VM owning 'vcpu'
4540 * at address 'gpa'.
4541 */
4542 int
svm_fault_page(struct vcpu * vcpu,paddr_t gpa)4543 svm_fault_page(struct vcpu *vcpu, paddr_t gpa)
4544 {
4545 paddr_t pa = trunc_page(gpa);
4546 int ret;
4547
4548 ret = uvm_fault_wire(vcpu->vc_parent->vm_map, pa, pa + PAGE_SIZE,
4549 PROT_READ | PROT_WRITE | PROT_EXEC);
4550 if (ret)
4551 printf("%s: uvm_fault returns %d, GPA=0x%llx, rip=0x%llx\n",
4552 __func__, ret, (uint64_t)gpa, vcpu->vc_gueststate.vg_rip);
4553
4554 return (ret);
4555 }
4556
4557 /*
4558 * svm_handle_np_fault
4559 *
4560 * High level nested paging handler for SVM. Verifies that a fault is for a
4561 * valid memory region, then faults a page, or aborts otherwise.
4562 */
4563 int
svm_handle_np_fault(struct vcpu * vcpu)4564 svm_handle_np_fault(struct vcpu *vcpu)
4565 {
4566 uint64_t gpa;
4567 int gpa_memtype, ret = 0;
4568 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
4569 struct vm_exit_eptviolation *vee = &vcpu->vc_exit.vee;
4570 struct cpu_info *ci = curcpu();
4571
4572 memset(vee, 0, sizeof(*vee));
4573
4574 gpa = vmcb->v_exitinfo2;
4575
4576 gpa_memtype = vmm_get_guest_memtype(vcpu->vc_parent, gpa);
4577 switch (gpa_memtype) {
4578 case VMM_MEM_TYPE_REGULAR:
4579 vee->vee_fault_type = VEE_FAULT_HANDLED;
4580 ret = svm_fault_page(vcpu, gpa);
4581 break;
4582 case VMM_MEM_TYPE_MMIO:
4583 vee->vee_fault_type = VEE_FAULT_MMIO_ASSIST;
4584 if (ci->ci_vmm_cap.vcc_svm.svm_decode_assist) {
4585 vee->vee_insn_len = vmcb->v_n_bytes_fetched;
4586 memcpy(&vee->vee_insn_bytes, vmcb->v_guest_ins_bytes,
4587 sizeof(vee->vee_insn_bytes));
4588 vee->vee_insn_info |= VEE_BYTES_VALID;
4589 }
4590 ret = EAGAIN;
4591 break;
4592 default:
4593 printf("%s: unknown memory type %d for GPA 0x%llx\n",
4594 __func__, gpa_memtype, gpa);
4595 return (EINVAL);
4596 }
4597
4598 return (ret);
4599 }
4600
4601 /*
4602 * vmx_fault_page
4603 *
4604 * Request a new page to be faulted into the UVM map of the VM owning 'vcpu'
4605 * at address 'gpa'.
4606 *
4607 * Parameters:
4608 * vcpu: guest VCPU requiring the page to be faulted into the UVM map
4609 * gpa: guest physical address that triggered the fault
4610 *
4611 * Return Values:
4612 * 0: if successful
4613 * EINVAL: if fault type could not be determined or VMCS reload fails
4614 * EAGAIN: if a protection fault occurred, ie writing to a read-only page
4615 * errno: if uvm_fault_wire() fails to wire in the page
4616 */
4617 int
vmx_fault_page(struct vcpu * vcpu,paddr_t gpa)4618 vmx_fault_page(struct vcpu *vcpu, paddr_t gpa)
4619 {
4620 int fault_type, ret;
4621 paddr_t pa = trunc_page(gpa);
4622
4623 fault_type = vmx_get_guest_faulttype();
4624 switch (fault_type) {
4625 case -1:
4626 printf("%s: invalid fault type\n", __func__);
4627 return (EINVAL);
4628 case VM_FAULT_PROTECT:
4629 vcpu->vc_exit.vee.vee_fault_type = VEE_FAULT_PROTECT;
4630 return (EAGAIN);
4631 default:
4632 vcpu->vc_exit.vee.vee_fault_type = VEE_FAULT_HANDLED;
4633 break;
4634 }
4635
4636 /* We may sleep during uvm_fault_wire(), so reload VMCS. */
4637 vcpu->vc_last_pcpu = curcpu();
4638 ret = uvm_fault_wire(vcpu->vc_parent->vm_map, pa, pa + PAGE_SIZE,
4639 PROT_READ | PROT_WRITE | PROT_EXEC);
4640 if (vcpu_reload_vmcs_vmx(vcpu)) {
4641 printf("%s: failed to reload vmcs\n", __func__);
4642 return (EINVAL);
4643 }
4644
4645 if (ret)
4646 printf("%s: uvm_fault returns %d, GPA=0x%llx, rip=0x%llx\n",
4647 __func__, ret, (uint64_t)gpa, vcpu->vc_gueststate.vg_rip);
4648
4649 return (ret);
4650 }
4651
4652 /*
4653 * vmx_handle_np_fault
4654 *
4655 * High level nested paging handler for VMX. Verifies that a fault is for a
4656 * valid memory region, then faults a page, or aborts otherwise.
4657 */
4658 int
vmx_handle_np_fault(struct vcpu * vcpu)4659 vmx_handle_np_fault(struct vcpu *vcpu)
4660 {
4661 uint64_t insn_len = 0, gpa;
4662 int gpa_memtype, ret = 0;
4663 struct vm_exit_eptviolation *vee = &vcpu->vc_exit.vee;
4664
4665 memset(vee, 0, sizeof(*vee));
4666
4667 if (vmread(VMCS_GUEST_PHYSICAL_ADDRESS, &gpa)) {
4668 printf("%s: cannot extract faulting pa\n", __func__);
4669 return (EINVAL);
4670 }
4671
4672 gpa_memtype = vmm_get_guest_memtype(vcpu->vc_parent, gpa);
4673 switch (gpa_memtype) {
4674 case VMM_MEM_TYPE_REGULAR:
4675 vee->vee_fault_type = VEE_FAULT_HANDLED;
4676 ret = vmx_fault_page(vcpu, gpa);
4677 break;
4678 case VMM_MEM_TYPE_MMIO:
4679 vee->vee_fault_type = VEE_FAULT_MMIO_ASSIST;
4680 if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_len) ||
4681 insn_len == 0 || insn_len > 15) {
4682 printf("%s: failed to extract instruction length\n",
4683 __func__);
4684 ret = EINVAL;
4685 } else {
4686 vee->vee_insn_len = (uint32_t)insn_len;
4687 vee->vee_insn_info |= VEE_LEN_VALID;
4688 ret = EAGAIN;
4689 }
4690 break;
4691 default:
4692 printf("%s: unknown memory type %d for GPA 0x%llx\n",
4693 __func__, gpa_memtype, gpa);
4694 return (EINVAL);
4695 }
4696
4697 return (ret);
4698 }
4699
4700 /*
4701 * vmm_get_guest_cpu_cpl
4702 *
4703 * Determines current CPL of 'vcpu'. On VMX/Intel, this is gathered from the
4704 * VMCS field for the DPL of SS (this seems odd, but is documented that way
4705 * in the SDM). For SVM/AMD, this is gathered directly from the VMCB's 'cpl'
4706 * field, as per the APM.
4707 *
4708 * Parameters:
4709 * vcpu: guest VCPU for which CPL is to be checked
4710 *
4711 * Return Values:
4712 * -1: the CPL could not be determined
4713 * 0-3 indicating the current CPL. For real mode operation, 0 is returned.
4714 */
4715 int
vmm_get_guest_cpu_cpl(struct vcpu * vcpu)4716 vmm_get_guest_cpu_cpl(struct vcpu *vcpu)
4717 {
4718 int mode;
4719 struct vmcb *vmcb;
4720 uint64_t ss_ar;
4721
4722 mode = vmm_get_guest_cpu_mode(vcpu);
4723
4724 if (mode == VMM_CPU_MODE_UNKNOWN)
4725 return (-1);
4726
4727 if (mode == VMM_CPU_MODE_REAL)
4728 return (0);
4729
4730 if (vmm_softc->mode == VMM_MODE_RVI) {
4731 vmcb = (struct vmcb *)vcpu->vc_control_va;
4732 return (vmcb->v_cpl);
4733 } else if (vmm_softc->mode == VMM_MODE_EPT) {
4734 if (vmread(VMCS_GUEST_IA32_SS_AR, &ss_ar))
4735 return (-1);
4736 return ((ss_ar & 0x60) >> 5);
4737 } else
4738 return (-1);
4739 }
4740
4741 /*
4742 * vmm_get_guest_cpu_mode
4743 *
4744 * Determines current CPU mode of 'vcpu'.
4745 *
4746 * Parameters:
4747 * vcpu: guest VCPU for which mode is to be checked
4748 *
4749 * Return Values:
4750 * One of VMM_CPU_MODE_*, or VMM_CPU_MODE_UNKNOWN if the mode could not be
4751 * ascertained.
4752 */
4753 int
vmm_get_guest_cpu_mode(struct vcpu * vcpu)4754 vmm_get_guest_cpu_mode(struct vcpu *vcpu)
4755 {
4756 uint64_t cr0, efer, cs_ar;
4757 uint8_t l, dib;
4758 struct vmcb *vmcb;
4759 struct vmx_msr_store *msr_store;
4760
4761 if (vmm_softc->mode == VMM_MODE_RVI) {
4762 vmcb = (struct vmcb *)vcpu->vc_control_va;
4763 cr0 = vmcb->v_cr0;
4764 efer = vmcb->v_efer;
4765 cs_ar = vmcb->v_cs.vs_attr;
4766 cs_ar = (cs_ar & 0xff) | ((cs_ar << 4) & 0xf000);
4767 } else if (vmm_softc->mode == VMM_MODE_EPT) {
4768 if (vmread(VMCS_GUEST_IA32_CR0, &cr0))
4769 return (VMM_CPU_MODE_UNKNOWN);
4770 if (vmread(VMCS_GUEST_IA32_CS_AR, &cs_ar))
4771 return (VMM_CPU_MODE_UNKNOWN);
4772 msr_store =
4773 (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
4774 efer = msr_store[VCPU_REGS_EFER].vms_data;
4775 } else
4776 return (VMM_CPU_MODE_UNKNOWN);
4777
4778 l = (cs_ar & 0x2000) >> 13;
4779 dib = (cs_ar & 0x4000) >> 14;
4780
4781 /* Check CR0.PE */
4782 if (!(cr0 & CR0_PE))
4783 return (VMM_CPU_MODE_REAL);
4784
4785 /* Check EFER */
4786 if (efer & EFER_LMA) {
4787 /* Could be compat or long mode, check CS.L */
4788 if (l)
4789 return (VMM_CPU_MODE_LONG);
4790 else
4791 return (VMM_CPU_MODE_COMPAT);
4792 }
4793
4794 /* Check prot vs prot32 */
4795 if (dib)
4796 return (VMM_CPU_MODE_PROT32);
4797 else
4798 return (VMM_CPU_MODE_PROT);
4799 }
4800
4801 /*
4802 * svm_handle_inout
4803 *
4804 * Exit handler for IN/OUT instructions.
4805 *
4806 * Parameters:
4807 * vcpu: The VCPU where the IN/OUT instruction occurred
4808 *
4809 * Return values:
4810 * 0: if successful
4811 * EINVAL: an invalid IN/OUT instruction was encountered
4812 */
4813 int
svm_handle_inout(struct vcpu * vcpu)4814 svm_handle_inout(struct vcpu *vcpu)
4815 {
4816 uint64_t insn_length, exit_qual;
4817 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
4818
4819 insn_length = vmcb->v_exitinfo2 - vmcb->v_rip;
4820 exit_qual = vmcb->v_exitinfo1;
4821
4822 /* Bit 0 - direction */
4823 if (exit_qual & 0x1)
4824 vcpu->vc_exit.vei.vei_dir = VEI_DIR_IN;
4825 else
4826 vcpu->vc_exit.vei.vei_dir = VEI_DIR_OUT;
4827 /* Bit 2 - string instruction? */
4828 vcpu->vc_exit.vei.vei_string = (exit_qual & 0x4) >> 2;
4829 /* Bit 3 - REP prefix? */
4830 vcpu->vc_exit.vei.vei_rep = (exit_qual & 0x8) >> 3;
4831
4832 /* Bits 4:6 - size of exit */
4833 if (exit_qual & 0x10)
4834 vcpu->vc_exit.vei.vei_size = 1;
4835 else if (exit_qual & 0x20)
4836 vcpu->vc_exit.vei.vei_size = 2;
4837 else if (exit_qual & 0x40)
4838 vcpu->vc_exit.vei.vei_size = 4;
4839
4840 /* Bit 16:31 - port */
4841 vcpu->vc_exit.vei.vei_port = (exit_qual & 0xFFFF0000) >> 16;
4842 /* Data */
4843 vcpu->vc_exit.vei.vei_data = vmcb->v_rax;
4844
4845 vcpu->vc_exit.vei.vei_insn_len = (uint8_t)insn_length;
4846
4847 TRACEPOINT(vmm, inout, vcpu, vcpu->vc_exit.vei.vei_port,
4848 vcpu->vc_exit.vei.vei_dir, vcpu->vc_exit.vei.vei_data);
4849
4850 return (0);
4851 }
4852
4853 /*
4854 * vmx_handle_inout
4855 *
4856 * Exit handler for IN/OUT instructions.
4857 *
4858 * Parameters:
4859 * vcpu: The VCPU where the IN/OUT instruction occurred
4860 *
4861 * Return values:
4862 * 0: if successful
4863 * EINVAL: invalid IN/OUT instruction or vmread failures occurred
4864 */
4865 int
vmx_handle_inout(struct vcpu * vcpu)4866 vmx_handle_inout(struct vcpu *vcpu)
4867 {
4868 uint64_t insn_length, exit_qual;
4869
4870 if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) {
4871 printf("%s: can't obtain instruction length\n", __func__);
4872 return (EINVAL);
4873 }
4874
4875 if (vmx_get_exit_qualification(&exit_qual)) {
4876 printf("%s: can't get exit qual\n", __func__);
4877 return (EINVAL);
4878 }
4879
4880 /* Bits 0:2 - size of exit */
4881 vcpu->vc_exit.vei.vei_size = (exit_qual & 0x7) + 1;
4882 /* Bit 3 - direction */
4883 if ((exit_qual & 0x8) >> 3)
4884 vcpu->vc_exit.vei.vei_dir = VEI_DIR_IN;
4885 else
4886 vcpu->vc_exit.vei.vei_dir = VEI_DIR_OUT;
4887 /* Bit 4 - string instruction? */
4888 vcpu->vc_exit.vei.vei_string = (exit_qual & 0x10) >> 4;
4889 /* Bit 5 - REP prefix? */
4890 vcpu->vc_exit.vei.vei_rep = (exit_qual & 0x20) >> 5;
4891 /* Bit 6 - Operand encoding */
4892 vcpu->vc_exit.vei.vei_encoding = (exit_qual & 0x40) >> 6;
4893 /* Bit 16:31 - port */
4894 vcpu->vc_exit.vei.vei_port = (exit_qual & 0xFFFF0000) >> 16;
4895 /* Data */
4896 vcpu->vc_exit.vei.vei_data = (uint32_t)vcpu->vc_gueststate.vg_rax;
4897
4898 vcpu->vc_exit.vei.vei_insn_len = (uint8_t)insn_length;
4899
4900 TRACEPOINT(vmm, inout, vcpu, vcpu->vc_exit.vei.vei_port,
4901 vcpu->vc_exit.vei.vei_dir, vcpu->vc_exit.vei.vei_data);
4902
4903 return (0);
4904 }
4905
4906 /*
4907 * vmx_load_pdptes
4908 *
4909 * Update the PDPTEs in the VMCS with the values currently indicated by the
4910 * guest CR3. This is used for 32-bit PAE guests when enabling paging.
4911 *
4912 * Parameters
4913 * vcpu: The vcpu whose PDPTEs should be loaded
4914 *
4915 * Return values:
4916 * 0: if successful
4917 * EINVAL: if the PDPTEs could not be loaded
4918 * ENOMEM: memory allocation failure
4919 */
4920 int
vmx_load_pdptes(struct vcpu * vcpu)4921 vmx_load_pdptes(struct vcpu *vcpu)
4922 {
4923 uint64_t cr3, cr3_host_phys;
4924 vaddr_t cr3_host_virt;
4925 pd_entry_t *pdptes;
4926 int ret;
4927
4928 if (vmread(VMCS_GUEST_IA32_CR3, &cr3)) {
4929 printf("%s: can't read guest cr3\n", __func__);
4930 return (EINVAL);
4931 }
4932
4933 if (!pmap_extract(vcpu->vc_parent->vm_map->pmap, (vaddr_t)cr3,
4934 (paddr_t *)&cr3_host_phys)) {
4935 DPRINTF("%s: nonmapped guest CR3, setting PDPTEs to 0\n",
4936 __func__);
4937 if (vmwrite(VMCS_GUEST_PDPTE0, 0)) {
4938 printf("%s: can't write guest PDPTE0\n", __func__);
4939 return (EINVAL);
4940 }
4941
4942 if (vmwrite(VMCS_GUEST_PDPTE1, 0)) {
4943 printf("%s: can't write guest PDPTE1\n", __func__);
4944 return (EINVAL);
4945 }
4946
4947 if (vmwrite(VMCS_GUEST_PDPTE2, 0)) {
4948 printf("%s: can't write guest PDPTE2\n", __func__);
4949 return (EINVAL);
4950 }
4951
4952 if (vmwrite(VMCS_GUEST_PDPTE3, 0)) {
4953 printf("%s: can't write guest PDPTE3\n", __func__);
4954 return (EINVAL);
4955 }
4956 return (0);
4957 }
4958
4959 ret = 0;
4960
4961 /* We may sleep during km_alloc(9), so reload VMCS. */
4962 vcpu->vc_last_pcpu = curcpu();
4963 cr3_host_virt = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any, &kp_none,
4964 &kd_waitok);
4965 if (vcpu_reload_vmcs_vmx(vcpu)) {
4966 printf("%s: failed to reload vmcs\n", __func__);
4967 ret = EINVAL;
4968 goto exit;
4969 }
4970
4971 if (!cr3_host_virt) {
4972 printf("%s: can't allocate address for guest CR3 mapping\n",
4973 __func__);
4974 return (ENOMEM);
4975 }
4976
4977 pmap_kenter_pa(cr3_host_virt, cr3_host_phys, PROT_READ);
4978
4979 pdptes = (pd_entry_t *)cr3_host_virt;
4980 if (vmwrite(VMCS_GUEST_PDPTE0, pdptes[0])) {
4981 printf("%s: can't write guest PDPTE0\n", __func__);
4982 ret = EINVAL;
4983 goto exit;
4984 }
4985
4986 if (vmwrite(VMCS_GUEST_PDPTE1, pdptes[1])) {
4987 printf("%s: can't write guest PDPTE1\n", __func__);
4988 ret = EINVAL;
4989 goto exit;
4990 }
4991
4992 if (vmwrite(VMCS_GUEST_PDPTE2, pdptes[2])) {
4993 printf("%s: can't write guest PDPTE2\n", __func__);
4994 ret = EINVAL;
4995 goto exit;
4996 }
4997
4998 if (vmwrite(VMCS_GUEST_PDPTE3, pdptes[3])) {
4999 printf("%s: can't write guest PDPTE3\n", __func__);
5000 ret = EINVAL;
5001 goto exit;
5002 }
5003
5004 exit:
5005 pmap_kremove(cr3_host_virt, PAGE_SIZE);
5006
5007 /* km_free(9) might sleep, so we need to reload VMCS. */
5008 vcpu->vc_last_pcpu = curcpu();
5009 km_free((void *)cr3_host_virt, PAGE_SIZE, &kv_any, &kp_none);
5010 if (vcpu_reload_vmcs_vmx(vcpu)) {
5011 printf("%s: failed to reload vmcs after km_free\n", __func__);
5012 ret = EINVAL;
5013 }
5014
5015 return (ret);
5016 }
5017
5018 /*
5019 * vmx_handle_cr0_write
5020 *
5021 * Write handler for CR0. This function ensures valid values are written into
5022 * CR0 for the cpu/vmm mode in use (cr0 must-be-0 and must-be-1 bits, etc).
5023 *
5024 * Parameters
5025 * vcpu: The vcpu taking the cr0 write exit
5026 * r: The guest's desired (incoming) cr0 value
5027 *
5028 * Return values:
5029 * 0: if successful
5030 * EINVAL: if an error occurred
5031 */
5032 int
vmx_handle_cr0_write(struct vcpu * vcpu,uint64_t r)5033 vmx_handle_cr0_write(struct vcpu *vcpu, uint64_t r)
5034 {
5035 struct vmx_msr_store *msr_store;
5036 struct vmx_invvpid_descriptor vid;
5037 uint64_t ectls, oldcr0, cr4, mask;
5038 int ret;
5039
5040 /* Check must-be-0 bits */
5041 mask = vcpu->vc_vmx_cr0_fixed1;
5042 if (~r & mask) {
5043 /* Inject #GP, let the guest handle it */
5044 DPRINTF("%s: guest set invalid bits in %%cr0. Zeros "
5045 "mask=0x%llx, data=0x%llx\n", __func__,
5046 vcpu->vc_vmx_cr0_fixed1, r);
5047 vmm_inject_gp(vcpu);
5048 return (0);
5049 }
5050
5051 /* Check must-be-1 bits */
5052 mask = vcpu->vc_vmx_cr0_fixed0;
5053 if ((r & mask) != mask) {
5054 /* Inject #GP, let the guest handle it */
5055 DPRINTF("%s: guest set invalid bits in %%cr0. Ones "
5056 "mask=0x%llx, data=0x%llx\n", __func__,
5057 vcpu->vc_vmx_cr0_fixed0, r);
5058 vmm_inject_gp(vcpu);
5059 return (0);
5060 }
5061
5062 if (r & 0xFFFFFFFF00000000ULL) {
5063 DPRINTF("%s: setting bits 63:32 of %%cr0 is invalid,"
5064 " inject #GP, cr0=0x%llx\n", __func__, r);
5065 vmm_inject_gp(vcpu);
5066 return (0);
5067 }
5068
5069 if ((r & CR0_PG) && (r & CR0_PE) == 0) {
5070 DPRINTF("%s: PG flag set when the PE flag is clear,"
5071 " inject #GP, cr0=0x%llx\n", __func__, r);
5072 vmm_inject_gp(vcpu);
5073 return (0);
5074 }
5075
5076 if ((r & CR0_NW) && (r & CR0_CD) == 0) {
5077 DPRINTF("%s: NW flag set when the CD flag is clear,"
5078 " inject #GP, cr0=0x%llx\n", __func__, r);
5079 vmm_inject_gp(vcpu);
5080 return (0);
5081 }
5082
5083 if (vmread(VMCS_GUEST_IA32_CR0, &oldcr0)) {
5084 printf("%s: can't read guest cr0\n", __func__);
5085 return (EINVAL);
5086 }
5087
5088 /* CR0 must always have NE set */
5089 r |= CR0_NE;
5090
5091 if (vmwrite(VMCS_GUEST_IA32_CR0, r)) {
5092 printf("%s: can't write guest cr0\n", __func__);
5093 return (EINVAL);
5094 }
5095
5096 /* If the guest hasn't enabled paging ... */
5097 if (!(r & CR0_PG) && (oldcr0 & CR0_PG)) {
5098 /* Paging was disabled (prev. enabled) - Flush TLB */
5099 if (vcpu->vc_vmx_vpid_enabled) {
5100 vid.vid_vpid = vcpu->vc_vpid;
5101 vid.vid_addr = 0;
5102 invvpid(IA32_VMX_INVVPID_SINGLE_CTX_GLB, &vid);
5103 }
5104 } else if (!(oldcr0 & CR0_PG) && (r & CR0_PG)) {
5105 /*
5106 * Since the guest has enabled paging, then the IA32_VMX_IA32E_MODE_GUEST
5107 * control must be set to the same as EFER_LME.
5108 */
5109 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
5110
5111 if (vmread(VMCS_ENTRY_CTLS, &ectls)) {
5112 printf("%s: can't read entry controls", __func__);
5113 return (EINVAL);
5114 }
5115
5116 if (msr_store[VCPU_REGS_EFER].vms_data & EFER_LME)
5117 ectls |= IA32_VMX_IA32E_MODE_GUEST;
5118 else
5119 ectls &= ~IA32_VMX_IA32E_MODE_GUEST;
5120
5121 if (vmwrite(VMCS_ENTRY_CTLS, ectls)) {
5122 printf("%s: can't write entry controls", __func__);
5123 return (EINVAL);
5124 }
5125
5126 if (vmread(VMCS_GUEST_IA32_CR4, &cr4)) {
5127 printf("%s: can't read guest cr4\n", __func__);
5128 return (EINVAL);
5129 }
5130
5131 /* Load PDPTEs if PAE guest enabling paging */
5132 if (cr4 & CR4_PAE) {
5133 ret = vmx_load_pdptes(vcpu);
5134
5135 if (ret) {
5136 printf("%s: updating PDPTEs failed\n", __func__);
5137 return (ret);
5138 }
5139 }
5140 }
5141
5142 return (0);
5143 }
5144
5145 /*
5146 * vmx_handle_cr4_write
5147 *
5148 * Write handler for CR4. This function ensures valid values are written into
5149 * CR4 for the cpu/vmm mode in use (cr4 must-be-0 and must-be-1 bits, etc).
5150 *
5151 * Parameters
5152 * vcpu: The vcpu taking the cr4 write exit
5153 * r: The guest's desired (incoming) cr4 value
5154 *
5155 * Return values:
5156 * 0: if successful
5157 * EINVAL: if an error occurred
5158 */
5159 int
vmx_handle_cr4_write(struct vcpu * vcpu,uint64_t r)5160 vmx_handle_cr4_write(struct vcpu *vcpu, uint64_t r)
5161 {
5162 uint64_t mask;
5163
5164 /* Check must-be-0 bits */
5165 mask = ~(curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1);
5166 if (r & mask) {
5167 /* Inject #GP, let the guest handle it */
5168 DPRINTF("%s: guest set invalid bits in %%cr4. Zeros "
5169 "mask=0x%llx, data=0x%llx\n", __func__,
5170 curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1,
5171 r);
5172 vmm_inject_gp(vcpu);
5173 return (0);
5174 }
5175
5176 /* Check must-be-1 bits */
5177 mask = curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0;
5178 if ((r & mask) != mask) {
5179 /* Inject #GP, let the guest handle it */
5180 DPRINTF("%s: guest set invalid bits in %%cr4. Ones "
5181 "mask=0x%llx, data=0x%llx\n", __func__,
5182 curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0,
5183 r);
5184 vmm_inject_gp(vcpu);
5185 return (0);
5186 }
5187
5188 /* CR4_VMXE must always be enabled */
5189 r |= CR4_VMXE;
5190
5191 if (vmwrite(VMCS_GUEST_IA32_CR4, r)) {
5192 printf("%s: can't write guest cr4\n", __func__);
5193 return (EINVAL);
5194 }
5195
5196 return (0);
5197 }
5198
5199 /*
5200 * vmx_handle_cr
5201 *
5202 * Handle reads/writes to control registers (except CR3)
5203 */
5204 int
vmx_handle_cr(struct vcpu * vcpu)5205 vmx_handle_cr(struct vcpu *vcpu)
5206 {
5207 uint64_t insn_length, exit_qual, r;
5208 uint8_t crnum, dir, reg;
5209
5210 if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) {
5211 printf("%s: can't obtain instruction length\n", __func__);
5212 return (EINVAL);
5213 }
5214
5215 if (vmx_get_exit_qualification(&exit_qual)) {
5216 printf("%s: can't get exit qual\n", __func__);
5217 return (EINVAL);
5218 }
5219
5220 /* Low 4 bits of exit_qual represent the CR number */
5221 crnum = exit_qual & 0xf;
5222
5223 /*
5224 * Bits 5:4 indicate the direction of operation (or special CR-modifying
5225 * instruction)
5226 */
5227 dir = (exit_qual & 0x30) >> 4;
5228
5229 /* Bits 11:8 encode the source/target register */
5230 reg = (exit_qual & 0xf00) >> 8;
5231
5232 switch (dir) {
5233 case CR_WRITE:
5234 if (crnum == 0 || crnum == 4) {
5235 switch (reg) {
5236 case 0: r = vcpu->vc_gueststate.vg_rax; break;
5237 case 1: r = vcpu->vc_gueststate.vg_rcx; break;
5238 case 2: r = vcpu->vc_gueststate.vg_rdx; break;
5239 case 3: r = vcpu->vc_gueststate.vg_rbx; break;
5240 case 4: if (vmread(VMCS_GUEST_IA32_RSP, &r)) {
5241 printf("%s: unable to read guest "
5242 "RSP\n", __func__);
5243 return (EINVAL);
5244 }
5245 break;
5246 case 5: r = vcpu->vc_gueststate.vg_rbp; break;
5247 case 6: r = vcpu->vc_gueststate.vg_rsi; break;
5248 case 7: r = vcpu->vc_gueststate.vg_rdi; break;
5249 case 8: r = vcpu->vc_gueststate.vg_r8; break;
5250 case 9: r = vcpu->vc_gueststate.vg_r9; break;
5251 case 10: r = vcpu->vc_gueststate.vg_r10; break;
5252 case 11: r = vcpu->vc_gueststate.vg_r11; break;
5253 case 12: r = vcpu->vc_gueststate.vg_r12; break;
5254 case 13: r = vcpu->vc_gueststate.vg_r13; break;
5255 case 14: r = vcpu->vc_gueststate.vg_r14; break;
5256 case 15: r = vcpu->vc_gueststate.vg_r15; break;
5257 }
5258 DPRINTF("%s: mov to cr%d @ %llx, data=0x%llx\n",
5259 __func__, crnum, vcpu->vc_gueststate.vg_rip, r);
5260 }
5261
5262 if (crnum == 0)
5263 vmx_handle_cr0_write(vcpu, r);
5264
5265 if (crnum == 4)
5266 vmx_handle_cr4_write(vcpu, r);
5267
5268 break;
5269 case CR_READ:
5270 DPRINTF("%s: mov from cr%d @ %llx\n", __func__, crnum,
5271 vcpu->vc_gueststate.vg_rip);
5272 break;
5273 case CR_CLTS:
5274 DPRINTF("%s: clts instruction @ %llx\n", __func__,
5275 vcpu->vc_gueststate.vg_rip);
5276 break;
5277 case CR_LMSW:
5278 DPRINTF("%s: lmsw instruction @ %llx\n", __func__,
5279 vcpu->vc_gueststate.vg_rip);
5280 break;
5281 default:
5282 DPRINTF("%s: unknown cr access @ %llx\n", __func__,
5283 vcpu->vc_gueststate.vg_rip);
5284 }
5285
5286 vcpu->vc_gueststate.vg_rip += insn_length;
5287
5288 return (0);
5289 }
5290
5291 /*
5292 * vmx_handle_rdmsr
5293 *
5294 * Handler for rdmsr instructions. Bitmap MSRs are allowed implicit access
5295 * and won't end up here. This handler is primarily intended to catch otherwise
5296 * unknown MSR access for possible later inclusion in the bitmap list. For
5297 * each MSR access that ends up here, we log the access (when VMM_DEBUG is
5298 * enabled)
5299 *
5300 * Parameters:
5301 * vcpu: vcpu structure containing instruction info causing the exit
5302 *
5303 * Return value:
5304 * 0: The operation was successful
5305 * EINVAL: An error occurred
5306 */
5307 int
vmx_handle_rdmsr(struct vcpu * vcpu)5308 vmx_handle_rdmsr(struct vcpu *vcpu)
5309 {
5310 uint64_t insn_length;
5311 uint64_t *rax, *rdx;
5312 uint64_t *rcx;
5313 int ret;
5314
5315 if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) {
5316 printf("%s: can't obtain instruction length\n", __func__);
5317 return (EINVAL);
5318 }
5319
5320 if (insn_length != 2) {
5321 DPRINTF("%s: RDMSR with instruction length %lld not "
5322 "supported\n", __func__, insn_length);
5323 return (EINVAL);
5324 }
5325
5326 rax = &vcpu->vc_gueststate.vg_rax;
5327 rcx = &vcpu->vc_gueststate.vg_rcx;
5328 rdx = &vcpu->vc_gueststate.vg_rdx;
5329
5330 switch (*rcx) {
5331 case MSR_BIOS_SIGN:
5332 case MSR_PLATFORM_ID:
5333 /* Ignored */
5334 *rax = 0;
5335 *rdx = 0;
5336 break;
5337 case MSR_CR_PAT:
5338 *rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL);
5339 *rdx = (vcpu->vc_shadow_pat >> 32);
5340 break;
5341 default:
5342 /* Unsupported MSRs causes #GP exception, don't advance %rip */
5343 DPRINTF("%s: unsupported rdmsr (msr=0x%llx), injecting #GP\n",
5344 __func__, *rcx);
5345 ret = vmm_inject_gp(vcpu);
5346 return (ret);
5347 }
5348
5349 vcpu->vc_gueststate.vg_rip += insn_length;
5350
5351 return (0);
5352 }
5353
5354 /*
5355 * vmx_handle_xsetbv
5356 *
5357 * VMX-specific part of the xsetbv instruction exit handler
5358 *
5359 * Parameters:
5360 * vcpu: vcpu structure containing instruction info causing the exit
5361 *
5362 * Return value:
5363 * 0: The operation was successful
5364 * EINVAL: An error occurred
5365 */
5366 int
vmx_handle_xsetbv(struct vcpu * vcpu)5367 vmx_handle_xsetbv(struct vcpu *vcpu)
5368 {
5369 uint64_t insn_length, *rax;
5370 int ret;
5371
5372 if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) {
5373 printf("%s: can't obtain instruction length\n", __func__);
5374 return (EINVAL);
5375 }
5376
5377 /* All XSETBV instructions are 3 bytes */
5378 if (insn_length != 3) {
5379 DPRINTF("%s: XSETBV with instruction length %lld not "
5380 "supported\n", __func__, insn_length);
5381 return (EINVAL);
5382 }
5383
5384 rax = &vcpu->vc_gueststate.vg_rax;
5385
5386 ret = vmm_handle_xsetbv(vcpu, rax);
5387
5388 vcpu->vc_gueststate.vg_rip += insn_length;
5389
5390 return ret;
5391 }
5392
5393 /*
5394 * svm_handle_xsetbv
5395 *
5396 * SVM-specific part of the xsetbv instruction exit handler
5397 *
5398 * Parameters:
5399 * vcpu: vcpu structure containing instruction info causing the exit
5400 *
5401 * Return value:
5402 * 0: The operation was successful
5403 * EINVAL: An error occurred
5404 */
5405 int
svm_handle_xsetbv(struct vcpu * vcpu)5406 svm_handle_xsetbv(struct vcpu *vcpu)
5407 {
5408 uint64_t insn_length, *rax;
5409 int ret;
5410 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
5411
5412 /* All XSETBV instructions are 3 bytes */
5413 insn_length = 3;
5414
5415 rax = &vmcb->v_rax;
5416
5417 ret = vmm_handle_xsetbv(vcpu, rax);
5418
5419 vcpu->vc_gueststate.vg_rip += insn_length;
5420
5421 return ret;
5422 }
5423
5424 /*
5425 * vmm_handle_xsetbv
5426 *
5427 * Handler for xsetbv instructions. We allow the guest VM to set xcr0 values
5428 * limited to the xsave_mask in use in the host.
5429 *
5430 * Parameters:
5431 * vcpu: vcpu structure containing instruction info causing the exit
5432 * rax: pointer to guest %rax
5433 *
5434 * Return value:
5435 * 0: The operation was successful
5436 * EINVAL: An error occurred
5437 */
5438 int
vmm_handle_xsetbv(struct vcpu * vcpu,uint64_t * rax)5439 vmm_handle_xsetbv(struct vcpu *vcpu, uint64_t *rax)
5440 {
5441 uint64_t *rdx, *rcx, val;
5442
5443 rcx = &vcpu->vc_gueststate.vg_rcx;
5444 rdx = &vcpu->vc_gueststate.vg_rdx;
5445
5446 if (vmm_get_guest_cpu_cpl(vcpu) != 0) {
5447 DPRINTF("%s: guest cpl not zero\n", __func__);
5448 return (vmm_inject_gp(vcpu));
5449 }
5450
5451 if (*rcx != 0) {
5452 DPRINTF("%s: guest specified invalid xcr register number "
5453 "%lld\n", __func__, *rcx);
5454 return (vmm_inject_gp(vcpu));
5455 }
5456
5457 val = *rax + (*rdx << 32);
5458 if (val & ~xsave_mask) {
5459 DPRINTF("%s: guest specified xcr0 outside xsave_mask %lld\n",
5460 __func__, val);
5461 return (vmm_inject_gp(vcpu));
5462 }
5463
5464 vcpu->vc_gueststate.vg_xcr0 = val;
5465
5466 return (0);
5467 }
5468
5469 /*
5470 * vmx_handle_misc_enable_msr
5471 *
5472 * Handler for writes to the MSR_MISC_ENABLE (0x1a0) MSR on Intel CPUs. We
5473 * limit what the guest can write to this MSR (certain hardware-related
5474 * settings like speedstep, etc).
5475 *
5476 * Parameters:
5477 * vcpu: vcpu structure containing information about the wrmsr causing this
5478 * exit
5479 */
5480 void
vmx_handle_misc_enable_msr(struct vcpu * vcpu)5481 vmx_handle_misc_enable_msr(struct vcpu *vcpu)
5482 {
5483 uint64_t *rax, *rdx;
5484 struct vmx_msr_store *msr_store;
5485
5486 rax = &vcpu->vc_gueststate.vg_rax;
5487 rdx = &vcpu->vc_gueststate.vg_rdx;
5488 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
5489
5490 /* Filter out guest writes to TCC, EIST, and xTPR */
5491 *rax &= ~(MISC_ENABLE_TCC | MISC_ENABLE_EIST_ENABLED |
5492 MISC_ENABLE_xTPR_MESSAGE_DISABLE);
5493
5494 msr_store[VCPU_REGS_MISC_ENABLE].vms_data = *rax | (*rdx << 32);
5495 }
5496
5497 /*
5498 * vmx_handle_wrmsr
5499 *
5500 * Handler for wrmsr instructions. This handler logs the access, and discards
5501 * the written data (when VMM_DEBUG is enabled). Any valid wrmsr will not end
5502 * up here (it will be whitelisted in the MSR bitmap).
5503 *
5504 * Parameters:
5505 * vcpu: vcpu structure containing instruction info causing the exit
5506 *
5507 * Return value:
5508 * 0: The operation was successful
5509 * EINVAL: An error occurred
5510 */
5511 int
vmx_handle_wrmsr(struct vcpu * vcpu)5512 vmx_handle_wrmsr(struct vcpu *vcpu)
5513 {
5514 uint64_t insn_length, val;
5515 uint64_t *rax, *rdx, *rcx;
5516 int ret;
5517
5518 if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) {
5519 printf("%s: can't obtain instruction length\n", __func__);
5520 return (EINVAL);
5521 }
5522
5523 if (insn_length != 2) {
5524 DPRINTF("%s: WRMSR with instruction length %lld not "
5525 "supported\n", __func__, insn_length);
5526 return (EINVAL);
5527 }
5528
5529 rax = &vcpu->vc_gueststate.vg_rax;
5530 rcx = &vcpu->vc_gueststate.vg_rcx;
5531 rdx = &vcpu->vc_gueststate.vg_rdx;
5532 val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL);
5533
5534 switch (*rcx) {
5535 case MSR_CR_PAT:
5536 if (!vmm_pat_is_valid(val)) {
5537 ret = vmm_inject_gp(vcpu);
5538 return (ret);
5539 }
5540 vcpu->vc_shadow_pat = val;
5541 break;
5542 case MSR_MISC_ENABLE:
5543 vmx_handle_misc_enable_msr(vcpu);
5544 break;
5545 case MSR_SMM_MONITOR_CTL:
5546 /*
5547 * 34.15.5 - Enabling dual monitor treatment
5548 *
5549 * Unsupported, so inject #GP and return without
5550 * advancing %rip.
5551 */
5552 ret = vmm_inject_gp(vcpu);
5553 return (ret);
5554 case KVM_MSR_SYSTEM_TIME:
5555 vmm_init_pvclock(vcpu,
5556 (*rax & 0xFFFFFFFFULL) | (*rdx << 32));
5557 break;
5558 #ifdef VMM_DEBUG
5559 default:
5560 /*
5561 * Log the access, to be able to identify unknown MSRs
5562 */
5563 DPRINTF("%s: wrmsr exit, msr=0x%llx, discarding data "
5564 "written from guest=0x%llx:0x%llx\n", __func__,
5565 *rcx, *rdx, *rax);
5566 #endif /* VMM_DEBUG */
5567 }
5568
5569 vcpu->vc_gueststate.vg_rip += insn_length;
5570
5571 return (0);
5572 }
5573
5574 /*
5575 * svm_handle_msr
5576 *
5577 * Handler for MSR instructions.
5578 *
5579 * Parameters:
5580 * vcpu: vcpu structure containing instruction info causing the exit
5581 *
5582 * Return value:
5583 * Always 0 (successful)
5584 */
5585 int
svm_handle_msr(struct vcpu * vcpu)5586 svm_handle_msr(struct vcpu *vcpu)
5587 {
5588 uint64_t insn_length, val;
5589 uint64_t *rax, *rcx, *rdx;
5590 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
5591 int ret;
5592
5593 /* XXX: Validate RDMSR / WRMSR insn_length */
5594 insn_length = 2;
5595
5596 rax = &vmcb->v_rax;
5597 rcx = &vcpu->vc_gueststate.vg_rcx;
5598 rdx = &vcpu->vc_gueststate.vg_rdx;
5599
5600 if (vmcb->v_exitinfo1 == 1) {
5601 /* WRMSR */
5602 val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL);
5603
5604 switch (*rcx) {
5605 case MSR_CR_PAT:
5606 if (!vmm_pat_is_valid(val)) {
5607 ret = vmm_inject_gp(vcpu);
5608 return (ret);
5609 }
5610 vcpu->vc_shadow_pat = val;
5611 break;
5612 case MSR_EFER:
5613 vmcb->v_efer = *rax | EFER_SVME;
5614 break;
5615 case KVM_MSR_SYSTEM_TIME:
5616 vmm_init_pvclock(vcpu,
5617 (*rax & 0xFFFFFFFFULL) | (*rdx << 32));
5618 break;
5619 default:
5620 /* Log the access, to be able to identify unknown MSRs */
5621 DPRINTF("%s: wrmsr exit, msr=0x%llx, discarding data "
5622 "written from guest=0x%llx:0x%llx\n", __func__,
5623 *rcx, *rdx, *rax);
5624 }
5625 } else {
5626 /* RDMSR */
5627 switch (*rcx) {
5628 case MSR_BIOS_SIGN:
5629 case MSR_INT_PEN_MSG:
5630 case MSR_PLATFORM_ID:
5631 /* Ignored */
5632 *rax = 0;
5633 *rdx = 0;
5634 break;
5635 case MSR_CR_PAT:
5636 *rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL);
5637 *rdx = (vcpu->vc_shadow_pat >> 32);
5638 break;
5639 case MSR_DE_CFG:
5640 /* LFENCE serializing bit is set by host */
5641 *rax = DE_CFG_SERIALIZE_LFENCE;
5642 *rdx = 0;
5643 break;
5644 default:
5645 /*
5646 * Unsupported MSRs causes #GP exception, don't advance
5647 * %rip
5648 */
5649 DPRINTF("%s: unsupported rdmsr (msr=0x%llx), "
5650 "injecting #GP\n", __func__, *rcx);
5651 ret = vmm_inject_gp(vcpu);
5652 return (ret);
5653 }
5654 }
5655
5656 vcpu->vc_gueststate.vg_rip += insn_length;
5657
5658 return (0);
5659 }
5660
5661 /* Handle cpuid(0xd) and its subleafs */
5662 static void
vmm_handle_cpuid_0xd(struct vcpu * vcpu,uint32_t subleaf,uint64_t * rax,uint32_t eax,uint32_t ebx,uint32_t ecx,uint32_t edx)5663 vmm_handle_cpuid_0xd(struct vcpu *vcpu, uint32_t subleaf, uint64_t *rax,
5664 uint32_t eax, uint32_t ebx, uint32_t ecx, uint32_t edx)
5665 {
5666 if (subleaf == 0) {
5667 /*
5668 * CPUID(0xd.0) depends on the value in XCR0 and MSR_XSS. If
5669 * the guest XCR0 isn't the same as the host then set it, redo
5670 * the CPUID, and restore it.
5671 */
5672 uint64_t xcr0 = vcpu->vc_gueststate.vg_xcr0;
5673
5674 /*
5675 * "ecx enumerates the size required ... for an area
5676 * containing all the ... components supported by this
5677 * processor"
5678 * "ebx enumerates the size required ... for an area
5679 * containing all the ... components corresponding to bits
5680 * currently set in xcr0"
5681 * So: since the VMM 'processor' is what our base kernel uses,
5682 * the VMM ecx is our ebx
5683 */
5684 ecx = ebx;
5685 if (xcr0 != (xsave_mask & XFEATURE_XCR0_MASK)) {
5686 uint32_t dummy;
5687 xsetbv(0, xcr0);
5688 CPUID_LEAF(0xd, subleaf, eax, ebx, dummy, edx);
5689 xsetbv(0, xsave_mask & XFEATURE_XCR0_MASK);
5690 }
5691 eax = xsave_mask & XFEATURE_XCR0_MASK;
5692 edx = (xsave_mask & XFEATURE_XCR0_MASK) >> 32;
5693 } else if (subleaf == 1) {
5694 /* mask out XSAVEC, XSAVES, and XFD support */
5695 eax &= XSAVE_XSAVEOPT | XSAVE_XGETBV1;
5696 ebx = 0; /* no xsavec or xsaves for now */
5697 ecx = edx = 0; /* no xsaves for now */
5698 } else if (subleaf >= 63 ||
5699 ((1ULL << subleaf) & xsave_mask & XFEATURE_XCR0_MASK) == 0) {
5700 /* disclaim subleaves of features we don't expose */
5701 eax = ebx = ecx = edx = 0;
5702 } else {
5703 /* disclaim compressed alignment or xfd support */
5704 ecx = 0;
5705 }
5706
5707 *rax = eax;
5708 vcpu->vc_gueststate.vg_rbx = ebx;
5709 vcpu->vc_gueststate.vg_rcx = ecx;
5710 vcpu->vc_gueststate.vg_rdx = edx;
5711 }
5712
5713 /*
5714 * vmm_handle_cpuid
5715 *
5716 * Exit handler for CPUID instruction
5717 *
5718 * Parameters:
5719 * vcpu: vcpu causing the CPUID exit
5720 *
5721 * Return value:
5722 * 0: the exit was processed successfully
5723 * EINVAL: error occurred validating the CPUID instruction arguments
5724 */
5725 int
vmm_handle_cpuid(struct vcpu * vcpu)5726 vmm_handle_cpuid(struct vcpu *vcpu)
5727 {
5728 uint64_t insn_length, cr4;
5729 uint64_t *rax, *rbx, *rcx, *rdx;
5730 struct vmcb *vmcb;
5731 uint32_t leaf, subleaf, eax, ebx, ecx, edx;
5732 struct vmx_msr_store *msr_store;
5733 int vmm_cpuid_level;
5734
5735 /* what's the cpuid level we support/advertise? */
5736 vmm_cpuid_level = cpuid_level;
5737 if (vmm_cpuid_level < 0x15 && tsc_is_invariant)
5738 vmm_cpuid_level = 0x15;
5739
5740 if (vmm_softc->mode == VMM_MODE_EPT) {
5741 if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) {
5742 DPRINTF("%s: can't obtain instruction length\n",
5743 __func__);
5744 return (EINVAL);
5745 }
5746
5747 if (vmread(VMCS_GUEST_IA32_CR4, &cr4)) {
5748 DPRINTF("%s: can't obtain cr4\n", __func__);
5749 return (EINVAL);
5750 }
5751
5752 rax = &vcpu->vc_gueststate.vg_rax;
5753
5754 /*
5755 * "CPUID leaves above 02H and below 80000000H are only
5756 * visible when IA32_MISC_ENABLE MSR has bit 22 set to its
5757 * default value 0"
5758 */
5759 msr_store =
5760 (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
5761 if (msr_store[VCPU_REGS_MISC_ENABLE].vms_data &
5762 MISC_ENABLE_LIMIT_CPUID_MAXVAL)
5763 vmm_cpuid_level = 0x02;
5764 } else {
5765 /* XXX: validate insn_length 2 */
5766 insn_length = 2;
5767 vmcb = (struct vmcb *)vcpu->vc_control_va;
5768 rax = &vmcb->v_rax;
5769 cr4 = vmcb->v_cr4;
5770 }
5771
5772 rbx = &vcpu->vc_gueststate.vg_rbx;
5773 rcx = &vcpu->vc_gueststate.vg_rcx;
5774 rdx = &vcpu->vc_gueststate.vg_rdx;
5775 vcpu->vc_gueststate.vg_rip += insn_length;
5776
5777 leaf = *rax;
5778 subleaf = *rcx;
5779
5780 /*
5781 * "If a value entered for CPUID.EAX is higher than the maximum input
5782 * value for basic or extended function for that processor then the
5783 * data for the highest basic information leaf is returned."
5784 *
5785 * "When CPUID returns the highest basic leaf information as a result
5786 * of an invalid input EAX value, any dependence on input ECX value
5787 * in the basic leaf is honored."
5788 *
5789 * This means if leaf is between vmm_cpuid_level and 0x40000000 (the start
5790 * of the hypervisor info leaves), clamp to vmm_cpuid_level, but without
5791 * altering subleaf. Also, if leaf is greater than the extended function
5792 * info, clamp also to vmm_cpuid_level.
5793 */
5794 if ((leaf > vmm_cpuid_level && leaf < 0x40000000) ||
5795 (leaf > curcpu()->ci_pnfeatset)) {
5796 DPRINTF("%s: invalid cpuid input leaf 0x%x, guest rip="
5797 "0x%llx - resetting to 0x%x\n", __func__, leaf,
5798 vcpu->vc_gueststate.vg_rip - insn_length,
5799 vmm_cpuid_level);
5800 leaf = vmm_cpuid_level;
5801 }
5802
5803 /* we fake up values in the range (cpuid_level, vmm_cpuid_level] */
5804 if (leaf <= cpuid_level || leaf > 0x80000000)
5805 CPUID_LEAF(leaf, subleaf, eax, ebx, ecx, edx);
5806 else
5807 eax = ebx = ecx = edx = 0;
5808
5809 switch (leaf) {
5810 case 0x00: /* Max level and vendor ID */
5811 *rax = vmm_cpuid_level;
5812 *rbx = *((uint32_t *)&cpu_vendor);
5813 *rdx = *((uint32_t *)&cpu_vendor + 1);
5814 *rcx = *((uint32_t *)&cpu_vendor + 2);
5815 break;
5816 case 0x01: /* Version, brand, feature info */
5817 *rax = cpu_id;
5818 /* mask off host's APIC ID, reset to vcpu id */
5819 *rbx = cpu_ebxfeature & 0x0000FFFF;
5820 *rbx |= (vcpu->vc_id & 0xFF) << 24;
5821 *rcx = (cpu_ecxfeature | CPUIDECX_HV) & VMM_CPUIDECX_MASK;
5822
5823 /* Guest CR4.OSXSAVE determines presence of CPUIDECX_OSXSAVE */
5824 if (cr4 & CR4_OSXSAVE)
5825 *rcx |= CPUIDECX_OSXSAVE;
5826 else
5827 *rcx &= ~CPUIDECX_OSXSAVE;
5828
5829 *rdx = curcpu()->ci_feature_flags & VMM_CPUIDEDX_MASK;
5830 break;
5831 case 0x02: /* Cache and TLB information */
5832 *rax = eax;
5833 *rbx = ebx;
5834 *rcx = ecx;
5835 *rdx = edx;
5836 break;
5837 case 0x03: /* Processor serial number (not supported) */
5838 DPRINTF("%s: function 0x03 (processor serial number) not "
5839 "supported\n", __func__);
5840 *rax = 0;
5841 *rbx = 0;
5842 *rcx = 0;
5843 *rdx = 0;
5844 break;
5845 case 0x04: /* Deterministic cache info */
5846 *rax = eax & VMM_CPUID4_CACHE_TOPOLOGY_MASK;
5847 *rbx = ebx;
5848 *rcx = ecx;
5849 *rdx = edx;
5850 break;
5851 case 0x05: /* MONITOR/MWAIT (not supported) */
5852 DPRINTF("%s: function 0x05 (monitor/mwait) not supported\n",
5853 __func__);
5854 *rax = 0;
5855 *rbx = 0;
5856 *rcx = 0;
5857 *rdx = 0;
5858 break;
5859 case 0x06: /* Thermal / Power management (not supported) */
5860 DPRINTF("%s: function 0x06 (thermal/power mgt) not supported\n",
5861 __func__);
5862 *rax = 0;
5863 *rbx = 0;
5864 *rcx = 0;
5865 *rdx = 0;
5866 break;
5867 case 0x07: /* SEFF */
5868 if (subleaf == 0) {
5869 *rax = 0; /* Highest subleaf supported */
5870 *rbx = curcpu()->ci_feature_sefflags_ebx & VMM_SEFF0EBX_MASK;
5871 *rcx = curcpu()->ci_feature_sefflags_ecx & VMM_SEFF0ECX_MASK;
5872 *rdx = curcpu()->ci_feature_sefflags_edx & VMM_SEFF0EDX_MASK;
5873 /*
5874 * Only expose PKU support if we've detected it in use
5875 * on the host.
5876 */
5877 if (vmm_softc->sc_md.pkru_enabled)
5878 *rcx |= SEFF0ECX_PKU;
5879 else
5880 *rcx &= ~SEFF0ECX_PKU;
5881
5882 /* Expose IBT bit if we've enabled CET on the host. */
5883 if (rcr4() & CR4_CET)
5884 *rdx |= SEFF0EDX_IBT;
5885 else
5886 *rdx &= ~SEFF0EDX_IBT;
5887
5888 } else {
5889 /* Unsupported subleaf */
5890 DPRINTF("%s: function 0x07 (SEFF) unsupported subleaf "
5891 "0x%x not supported\n", __func__, subleaf);
5892 *rax = 0;
5893 *rbx = 0;
5894 *rcx = 0;
5895 *rdx = 0;
5896 }
5897 break;
5898 case 0x09: /* Direct Cache Access (not supported) */
5899 DPRINTF("%s: function 0x09 (direct cache access) not "
5900 "supported\n", __func__);
5901 *rax = 0;
5902 *rbx = 0;
5903 *rcx = 0;
5904 *rdx = 0;
5905 break;
5906 case 0x0a: /* Architectural perf monitoring (not supported) */
5907 DPRINTF("%s: function 0x0a (arch. perf mon) not supported\n",
5908 __func__);
5909 *rax = 0;
5910 *rbx = 0;
5911 *rcx = 0;
5912 *rdx = 0;
5913 break;
5914 case 0x0b: /* Extended topology enumeration (not supported) */
5915 DPRINTF("%s: function 0x0b (topology enumeration) not "
5916 "supported\n", __func__);
5917 *rax = 0;
5918 *rbx = 0;
5919 *rcx = 0;
5920 *rdx = 0;
5921 break;
5922 case 0x0d: /* Processor ext. state information */
5923 vmm_handle_cpuid_0xd(vcpu, subleaf, rax, eax, ebx, ecx, edx);
5924 break;
5925 case 0x0f: /* QoS info (not supported) */
5926 DPRINTF("%s: function 0x0f (QoS info) not supported\n",
5927 __func__);
5928 *rax = 0;
5929 *rbx = 0;
5930 *rcx = 0;
5931 *rdx = 0;
5932 break;
5933 case 0x14: /* Processor Trace info (not supported) */
5934 DPRINTF("%s: function 0x14 (processor trace info) not "
5935 "supported\n", __func__);
5936 *rax = 0;
5937 *rbx = 0;
5938 *rcx = 0;
5939 *rdx = 0;
5940 break;
5941 case 0x15:
5942 if (cpuid_level >= 0x15) {
5943 *rax = eax;
5944 *rbx = ebx;
5945 *rcx = ecx;
5946 *rdx = edx;
5947 } else {
5948 KASSERT(tsc_is_invariant);
5949 *rax = 1;
5950 *rbx = 100;
5951 *rcx = tsc_frequency / 100;
5952 *rdx = 0;
5953 }
5954 break;
5955 case 0x16: /* Processor frequency info */
5956 *rax = eax;
5957 *rbx = ebx;
5958 *rcx = ecx;
5959 *rdx = edx;
5960 break;
5961 case 0x40000000: /* Hypervisor information */
5962 *rax = 0;
5963 *rbx = *((uint32_t *)&vmm_hv_signature[0]);
5964 *rcx = *((uint32_t *)&vmm_hv_signature[4]);
5965 *rdx = *((uint32_t *)&vmm_hv_signature[8]);
5966 break;
5967 case 0x40000001: /* KVM hypervisor features */
5968 *rax = (1 << KVM_FEATURE_CLOCKSOURCE2) |
5969 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
5970 *rbx = 0;
5971 *rcx = 0;
5972 *rdx = 0;
5973 break;
5974 case 0x80000000: /* Extended function level */
5975 /* We don't emulate past 0x8000001f currently. */
5976 *rax = min(curcpu()->ci_pnfeatset, 0x8000001f);
5977 *rbx = 0;
5978 *rcx = 0;
5979 *rdx = 0;
5980 break;
5981 case 0x80000001: /* Extended function info */
5982 *rax = curcpu()->ci_efeature_eax;
5983 *rbx = 0; /* Reserved */
5984 *rcx = curcpu()->ci_efeature_ecx & VMM_ECPUIDECX_MASK;
5985 *rdx = curcpu()->ci_feature_eflags & VMM_FEAT_EFLAGS_MASK;
5986 break;
5987 case 0x80000002: /* Brand string */
5988 *rax = curcpu()->ci_brand[0];
5989 *rbx = curcpu()->ci_brand[1];
5990 *rcx = curcpu()->ci_brand[2];
5991 *rdx = curcpu()->ci_brand[3];
5992 break;
5993 case 0x80000003: /* Brand string */
5994 *rax = curcpu()->ci_brand[4];
5995 *rbx = curcpu()->ci_brand[5];
5996 *rcx = curcpu()->ci_brand[6];
5997 *rdx = curcpu()->ci_brand[7];
5998 break;
5999 case 0x80000004: /* Brand string */
6000 *rax = curcpu()->ci_brand[8];
6001 *rbx = curcpu()->ci_brand[9];
6002 *rcx = curcpu()->ci_brand[10];
6003 *rdx = curcpu()->ci_brand[11];
6004 break;
6005 case 0x80000005: /* Reserved (Intel), cacheinfo (AMD) */
6006 *rax = eax;
6007 *rbx = ebx;
6008 *rcx = ecx;
6009 *rdx = edx;
6010 break;
6011 case 0x80000006: /* ext. cache info */
6012 *rax = eax;
6013 *rbx = ebx;
6014 *rcx = ecx;
6015 *rdx = edx;
6016 break;
6017 case 0x80000007: /* apmi */
6018 *rax = eax;
6019 *rbx = ebx;
6020 *rcx = ecx;
6021 *rdx = edx & VMM_APMI_EDX_INCLUDE_MASK;
6022 break;
6023 case 0x80000008: /* Phys bits info and topology (AMD) */
6024 *rax = eax;
6025 *rbx = ebx & VMM_AMDSPEC_EBX_MASK;
6026 /* Reset %rcx (topology) */
6027 *rcx = 0;
6028 *rdx = edx;
6029 break;
6030 case 0x8000001d: /* cache topology (AMD) */
6031 *rax = eax;
6032 *rbx = ebx;
6033 *rcx = ecx;
6034 *rdx = edx;
6035 break;
6036 case 0x8000001f: /* encryption features (AMD) */
6037 *rax = eax;
6038 *rbx = ebx;
6039 *rcx = ecx;
6040 *rdx = edx;
6041 break;
6042 default:
6043 DPRINTF("%s: unsupported rax=0x%llx\n", __func__, *rax);
6044 *rax = 0;
6045 *rbx = 0;
6046 *rcx = 0;
6047 *rdx = 0;
6048 }
6049
6050
6051 if (vmm_softc->mode == VMM_MODE_RVI) {
6052 /*
6053 * update %rax. the rest of the registers get updated in
6054 * svm_enter_guest
6055 */
6056 vmcb->v_rax = *rax;
6057 }
6058
6059 return (0);
6060 }
6061
6062 /*
6063 * vcpu_run_svm
6064 *
6065 * SVM main loop used to run a VCPU.
6066 *
6067 * Parameters:
6068 * vcpu: The VCPU to run
6069 * vrp: run parameters
6070 *
6071 * Return values:
6072 * 0: The run loop exited and no help is needed from vmd
6073 * EAGAIN: The run loop exited and help from vmd is needed
6074 * EINVAL: an error occurred
6075 */
6076 int
vcpu_run_svm(struct vcpu * vcpu,struct vm_run_params * vrp)6077 vcpu_run_svm(struct vcpu *vcpu, struct vm_run_params *vrp)
6078 {
6079 int ret = 0;
6080 struct region_descriptor gdt;
6081 struct cpu_info *ci = NULL;
6082 uint64_t exit_reason;
6083 struct schedstate_percpu *spc;
6084 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
6085
6086 if (vrp->vrp_intr_pending)
6087 vcpu->vc_intr = 1;
6088 else
6089 vcpu->vc_intr = 0;
6090
6091 /*
6092 * If we are returning from userspace (vmd) because we exited
6093 * last time, fix up any needed vcpu state first. Which state
6094 * needs to be fixed up depends on what vmd populated in the
6095 * exit data structure.
6096 */
6097 switch (vcpu->vc_gueststate.vg_exit_reason) {
6098 case SVM_VMEXIT_IOIO:
6099 if (vcpu->vc_exit.vei.vei_dir == VEI_DIR_IN) {
6100 vcpu->vc_gueststate.vg_rax =
6101 vcpu->vc_exit.vei.vei_data;
6102 vmcb->v_rax = vcpu->vc_gueststate.vg_rax;
6103 }
6104 vcpu->vc_gueststate.vg_rip =
6105 vcpu->vc_exit.vrs.vrs_gprs[VCPU_REGS_RIP];
6106 vmcb->v_rip = vcpu->vc_gueststate.vg_rip;
6107 break;
6108 case SVM_VMEXIT_NPF:
6109 ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_GPRS,
6110 &vcpu->vc_exit.vrs);
6111 if (ret) {
6112 printf("%s: vm %d vcpu %d failed to update "
6113 "registers\n", __func__,
6114 vcpu->vc_parent->vm_id, vcpu->vc_id);
6115 return (EINVAL);
6116 }
6117 break;
6118 }
6119 memset(&vcpu->vc_exit, 0, sizeof(vcpu->vc_exit));
6120
6121 while (ret == 0) {
6122 vmm_update_pvclock(vcpu);
6123 if (ci != curcpu()) {
6124 /*
6125 * We are launching for the first time, or we are
6126 * resuming from a different pcpu, so we need to
6127 * reset certain pcpu-specific values.
6128 */
6129 ci = curcpu();
6130 setregion(&gdt, ci->ci_gdt, GDT_SIZE - 1);
6131
6132 if (ci != vcpu->vc_last_pcpu) {
6133 /*
6134 * Flush TLB by guest ASID if feature
6135 * available, flush entire TLB if not.
6136 */
6137 if (ci->ci_vmm_cap.vcc_svm.svm_flush_by_asid)
6138 vmcb->v_tlb_control =
6139 SVM_TLB_CONTROL_FLUSH_ASID;
6140 else
6141 vmcb->v_tlb_control =
6142 SVM_TLB_CONTROL_FLUSH_ALL;
6143
6144 svm_set_dirty(vcpu, SVM_CLEANBITS_ALL);
6145 }
6146
6147 vcpu->vc_last_pcpu = ci;
6148
6149 if (gdt.rd_base == 0) {
6150 ret = EINVAL;
6151 break;
6152 }
6153 }
6154
6155 /* Handle vmd(8) injected interrupts */
6156 /* Is there an interrupt pending injection? */
6157 if (vcpu->vc_inject.vie_type == VCPU_INJECT_INTR &&
6158 vcpu->vc_irqready) {
6159 vmcb->v_eventinj = vcpu->vc_inject.vie_vector |
6160 (1U << 31);
6161 vcpu->vc_inject.vie_type = VCPU_INJECT_NONE;
6162 }
6163
6164 /* Inject event if present */
6165 if (vcpu->vc_inject.vie_type == VCPU_INJECT_EX) {
6166 vmcb->v_eventinj = vcpu->vc_inject.vie_vector;
6167
6168 /* Set the "Event Valid" flag for certain vectors */
6169 switch (vcpu->vc_inject.vie_vector) {
6170 case VMM_EX_BP:
6171 case VMM_EX_OF:
6172 case VMM_EX_DB:
6173 /*
6174 * Software exception.
6175 * XXX check nRIP support.
6176 */
6177 vmcb->v_eventinj |= (4ULL << 8);
6178 break;
6179 case VMM_EX_AC:
6180 vcpu->vc_inject.vie_errorcode = 0;
6181 /* fallthrough */
6182 case VMM_EX_DF:
6183 case VMM_EX_TS:
6184 case VMM_EX_NP:
6185 case VMM_EX_SS:
6186 case VMM_EX_GP:
6187 case VMM_EX_PF:
6188 /* Hardware exception. */
6189 vmcb->v_eventinj |= (3ULL << 8);
6190
6191 if (vmcb->v_cr0 & CR0_PE) {
6192 /* Error code valid. */
6193 vmcb->v_eventinj |= (1ULL << 11);
6194 vmcb->v_eventinj |= (uint64_t)
6195 vcpu->vc_inject.vie_errorcode << 32;
6196 }
6197 break;
6198 default:
6199 printf("%s: unsupported exception vector %u\n",
6200 __func__, vcpu->vc_inject.vie_vector);
6201 ret = EINVAL;
6202 } /* switch */
6203 if (ret == EINVAL)
6204 break;
6205
6206 /* Event is valid. */
6207 vmcb->v_eventinj |= (1U << 31);
6208 vcpu->vc_inject.vie_type = VCPU_INJECT_NONE;
6209 }
6210
6211 TRACEPOINT(vmm, guest_enter, vcpu, vrp);
6212
6213 /* Start / resume the VCPU */
6214 /* Disable interrupts and save the current host FPU state. */
6215 clgi();
6216 if ((ret = vmm_fpurestore(vcpu))) {
6217 stgi();
6218 break;
6219 }
6220
6221 /*
6222 * If we're resuming to a different VCPU and have IBPB,
6223 * then use it to prevent cross-VM branch-target injection.
6224 */
6225 if (ci->ci_guest_vcpu != vcpu &&
6226 (ci->ci_feature_amdspec_ebx & CPUIDEBX_IBPB)) {
6227 wrmsr(MSR_PRED_CMD, PRED_CMD_IBPB);
6228 ci->ci_guest_vcpu = vcpu;
6229 }
6230
6231 /* Restore any guest PKRU state. */
6232 if (vmm_softc->sc_md.pkru_enabled)
6233 wrpkru(0, vcpu->vc_pkru);
6234
6235 KASSERT(vmcb->v_intercept1 & SVM_INTERCEPT_INTR);
6236 wrmsr(MSR_AMD_VM_HSAVE_PA, vcpu->vc_svm_hsa_pa);
6237
6238 ret = svm_enter_guest(vcpu->vc_control_pa,
6239 &vcpu->vc_gueststate, &gdt);
6240
6241 /* Restore host PKRU state. */
6242 if (vmm_softc->sc_md.pkru_enabled) {
6243 vcpu->vc_pkru = rdpkru(0);
6244 wrpkru(0, PGK_VALUE);
6245 }
6246
6247 /*
6248 * On exit, interrupts are disabled, and we are running with
6249 * the guest FPU state still possibly on the CPU. Save the FPU
6250 * state before re-enabling interrupts.
6251 */
6252 vmm_fpusave(vcpu);
6253
6254 /*
6255 * Enable interrupts now. Note that if the exit was due to INTR
6256 * (external interrupt), the interrupt will be processed now.
6257 */
6258 stgi();
6259
6260 vcpu->vc_gueststate.vg_rip = vmcb->v_rip;
6261 vmcb->v_tlb_control = SVM_TLB_CONTROL_FLUSH_NONE;
6262 svm_set_clean(vcpu, SVM_CLEANBITS_ALL);
6263
6264 /* If we exited successfully ... */
6265 if (ret == 0) {
6266 exit_reason = vmcb->v_exitcode;
6267 vcpu->vc_gueststate.vg_exit_reason = exit_reason;
6268 TRACEPOINT(vmm, guest_exit, vcpu, vrp, exit_reason);
6269
6270 vcpu->vc_gueststate.vg_rflags = vmcb->v_rflags;
6271
6272 /*
6273 * Handle the exit. This will alter "ret" to EAGAIN if
6274 * the exit handler determines help from vmd is needed.
6275 */
6276 ret = svm_handle_exit(vcpu);
6277
6278 if (vcpu->vc_gueststate.vg_rflags & PSL_I)
6279 vcpu->vc_irqready = 1;
6280 else
6281 vcpu->vc_irqready = 0;
6282
6283 /*
6284 * If not ready for interrupts, but interrupts pending,
6285 * enable interrupt window exiting.
6286 */
6287 if (vcpu->vc_irqready == 0 && vcpu->vc_intr) {
6288 vmcb->v_intercept1 |= SVM_INTERCEPT_VINTR;
6289 vmcb->v_irq = 1;
6290 vmcb->v_intr_misc = SVM_INTR_MISC_V_IGN_TPR;
6291 vmcb->v_intr_vector = 0;
6292 svm_set_dirty(vcpu, SVM_CLEANBITS_TPR |
6293 SVM_CLEANBITS_I);
6294 }
6295
6296 /*
6297 * Exit to vmd if we are terminating, failed to enter,
6298 * or need help (device I/O)
6299 */
6300 if (ret || vcpu_must_stop(vcpu))
6301 break;
6302
6303 if (vcpu->vc_intr && vcpu->vc_irqready) {
6304 ret = EAGAIN;
6305 break;
6306 }
6307
6308 /* Check if we should yield - don't hog the cpu */
6309 spc = &ci->ci_schedstate;
6310 if (spc->spc_schedflags & SPCF_SHOULDYIELD)
6311 break;
6312 }
6313 }
6314
6315 /*
6316 * We are heading back to userspace (vmd), either because we need help
6317 * handling an exit, a guest interrupt is pending, or we failed in some
6318 * way to enter the guest. Copy the guest registers to the exit struct
6319 * and return to vmd.
6320 */
6321 if (vcpu_readregs_svm(vcpu, VM_RWREGS_ALL, &vcpu->vc_exit.vrs))
6322 ret = EINVAL;
6323
6324 return (ret);
6325 }
6326
6327 /*
6328 * vmm_alloc_vpid
6329 *
6330 * Sets the memory location pointed to by "vpid" to the next available VPID
6331 * or ASID.
6332 *
6333 * Parameters:
6334 * vpid: Pointer to location to receive the next VPID/ASID
6335 *
6336 * Return Values:
6337 * 0: The operation completed successfully
6338 * ENOMEM: No VPIDs/ASIDs were available. Content of 'vpid' is unchanged.
6339 */
6340 int
vmm_alloc_vpid(uint16_t * vpid)6341 vmm_alloc_vpid(uint16_t *vpid)
6342 {
6343 uint16_t i;
6344 uint8_t idx, bit;
6345 struct vmm_softc *sc = vmm_softc;
6346
6347 rw_enter_write(&vmm_softc->vpid_lock);
6348 for (i = 1; i <= sc->max_vpid; i++) {
6349 idx = i / 8;
6350 bit = i - (idx * 8);
6351
6352 if (!(sc->vpids[idx] & (1 << bit))) {
6353 sc->vpids[idx] |= (1 << bit);
6354 *vpid = i;
6355 DPRINTF("%s: allocated VPID/ASID %d\n", __func__,
6356 i);
6357 rw_exit_write(&vmm_softc->vpid_lock);
6358 return 0;
6359 }
6360 }
6361
6362 printf("%s: no available %ss\n", __func__,
6363 (sc->mode == VMM_MODE_EPT) ? "VPID" :
6364 "ASID");
6365
6366 rw_exit_write(&vmm_softc->vpid_lock);
6367 return ENOMEM;
6368 }
6369
6370 /*
6371 * vmm_free_vpid
6372 *
6373 * Frees the VPID/ASID id supplied in "vpid".
6374 *
6375 * Parameters:
6376 * vpid: VPID/ASID to free.
6377 */
6378 void
vmm_free_vpid(uint16_t vpid)6379 vmm_free_vpid(uint16_t vpid)
6380 {
6381 uint8_t idx, bit;
6382 struct vmm_softc *sc = vmm_softc;
6383
6384 rw_enter_write(&vmm_softc->vpid_lock);
6385 idx = vpid / 8;
6386 bit = vpid - (idx * 8);
6387 sc->vpids[idx] &= ~(1 << bit);
6388
6389 DPRINTF("%s: freed VPID/ASID %d\n", __func__, vpid);
6390 rw_exit_write(&vmm_softc->vpid_lock);
6391 }
6392
6393
6394 /* vmm_gpa_is_valid
6395 *
6396 * Check if the given gpa is within guest memory space.
6397 *
6398 * Parameters:
6399 * vcpu: The virtual cpu we are running on.
6400 * gpa: The address to check.
6401 * obj_size: The size of the object assigned to gpa
6402 *
6403 * Return values:
6404 * 1: gpa is within the memory ranges allocated for the vcpu
6405 * 0: otherwise
6406 */
6407 int
vmm_gpa_is_valid(struct vcpu * vcpu,paddr_t gpa,size_t obj_size)6408 vmm_gpa_is_valid(struct vcpu *vcpu, paddr_t gpa, size_t obj_size)
6409 {
6410 struct vm *vm = vcpu->vc_parent;
6411 struct vm_mem_range *vmr;
6412 size_t i;
6413
6414 for (i = 0; i < vm->vm_nmemranges; ++i) {
6415 vmr = &vm->vm_memranges[i];
6416 if (vmr->vmr_size >= obj_size &&
6417 vmr->vmr_gpa <= gpa &&
6418 gpa < (vmr->vmr_gpa + vmr->vmr_size - obj_size)) {
6419 return 1;
6420 }
6421 }
6422 return 0;
6423 }
6424
6425 void
vmm_init_pvclock(struct vcpu * vcpu,paddr_t gpa)6426 vmm_init_pvclock(struct vcpu *vcpu, paddr_t gpa)
6427 {
6428 paddr_t pvclock_gpa = gpa & 0xFFFFFFFFFFFFFFF0;
6429 if (!vmm_gpa_is_valid(vcpu, pvclock_gpa,
6430 sizeof(struct pvclock_time_info))) {
6431 /* XXX: Kill guest? */
6432 vmm_inject_gp(vcpu);
6433 return;
6434 }
6435
6436 /* XXX: handle case when this struct goes over page boundaries */
6437 if ((pvclock_gpa & PAGE_MASK) + sizeof(struct pvclock_time_info) >
6438 PAGE_SIZE) {
6439 vmm_inject_gp(vcpu);
6440 return;
6441 }
6442
6443 vcpu->vc_pvclock_system_gpa = gpa;
6444 if (tsc_frequency > 0)
6445 vcpu->vc_pvclock_system_tsc_mul =
6446 (int) ((1000000000L << 20) / tsc_frequency);
6447 else
6448 vcpu->vc_pvclock_system_tsc_mul = 0;
6449 vmm_update_pvclock(vcpu);
6450 }
6451
6452 int
vmm_update_pvclock(struct vcpu * vcpu)6453 vmm_update_pvclock(struct vcpu *vcpu)
6454 {
6455 struct pvclock_time_info *pvclock_ti;
6456 struct timespec tv;
6457 struct vm *vm = vcpu->vc_parent;
6458 paddr_t pvclock_hpa, pvclock_gpa;
6459
6460 if (vcpu->vc_pvclock_system_gpa & PVCLOCK_SYSTEM_TIME_ENABLE) {
6461 pvclock_gpa = vcpu->vc_pvclock_system_gpa & 0xFFFFFFFFFFFFFFF0;
6462 if (!pmap_extract(vm->vm_map->pmap, pvclock_gpa, &pvclock_hpa))
6463 return (EINVAL);
6464 pvclock_ti = (void*) PMAP_DIRECT_MAP(pvclock_hpa);
6465
6466 /* START next cycle (must be odd) */
6467 pvclock_ti->ti_version =
6468 (++vcpu->vc_pvclock_version << 1) | 0x1;
6469
6470 pvclock_ti->ti_tsc_timestamp = rdtsc();
6471 nanotime(&tv);
6472 pvclock_ti->ti_system_time =
6473 tv.tv_sec * 1000000000L + tv.tv_nsec;
6474 pvclock_ti->ti_tsc_shift = 12;
6475 pvclock_ti->ti_tsc_to_system_mul =
6476 vcpu->vc_pvclock_system_tsc_mul;
6477 pvclock_ti->ti_flags = PVCLOCK_FLAG_TSC_STABLE;
6478
6479 /* END (must be even) */
6480 pvclock_ti->ti_version &= ~0x1;
6481 }
6482 return (0);
6483 }
6484
6485 int
vmm_pat_is_valid(uint64_t pat)6486 vmm_pat_is_valid(uint64_t pat)
6487 {
6488 int i;
6489 uint8_t *byte = (uint8_t *)&pat;
6490
6491 /* Intel SDM Vol 3A, 11.12.2: 0x02, 0x03, and 0x08-0xFF result in #GP */
6492 for (i = 0; i < 8; i++) {
6493 if (byte[i] == 0x02 || byte[i] == 0x03 || byte[i] > 0x07) {
6494 DPRINTF("%s: invalid pat %llx\n", __func__, pat);
6495 return 0;
6496 }
6497 }
6498
6499 return 1;
6500 }
6501
6502 /*
6503 * vmx_exit_reason_decode
6504 *
6505 * Returns a human readable string describing exit type 'code'
6506 */
6507 const char *
vmx_exit_reason_decode(uint32_t code)6508 vmx_exit_reason_decode(uint32_t code)
6509 {
6510 switch (code) {
6511 case VMX_EXIT_NMI: return "NMI";
6512 case VMX_EXIT_EXTINT: return "External interrupt";
6513 case VMX_EXIT_TRIPLE_FAULT: return "Triple fault";
6514 case VMX_EXIT_INIT: return "INIT signal";
6515 case VMX_EXIT_SIPI: return "SIPI signal";
6516 case VMX_EXIT_IO_SMI: return "I/O SMI";
6517 case VMX_EXIT_OTHER_SMI: return "other SMI";
6518 case VMX_EXIT_INT_WINDOW: return "Interrupt window";
6519 case VMX_EXIT_NMI_WINDOW: return "NMI window";
6520 case VMX_EXIT_TASK_SWITCH: return "Task switch";
6521 case VMX_EXIT_CPUID: return "CPUID instruction";
6522 case VMX_EXIT_GETSEC: return "GETSEC instruction";
6523 case VMX_EXIT_HLT: return "HLT instruction";
6524 case VMX_EXIT_INVD: return "INVD instruction";
6525 case VMX_EXIT_INVLPG: return "INVLPG instruction";
6526 case VMX_EXIT_RDPMC: return "RDPMC instruction";
6527 case VMX_EXIT_RDTSC: return "RDTSC instruction";
6528 case VMX_EXIT_RSM: return "RSM instruction";
6529 case VMX_EXIT_VMCALL: return "VMCALL instruction";
6530 case VMX_EXIT_VMCLEAR: return "VMCLEAR instruction";
6531 case VMX_EXIT_VMLAUNCH: return "VMLAUNCH instruction";
6532 case VMX_EXIT_VMPTRLD: return "VMPTRLD instruction";
6533 case VMX_EXIT_VMPTRST: return "VMPTRST instruction";
6534 case VMX_EXIT_VMREAD: return "VMREAD instruction";
6535 case VMX_EXIT_VMRESUME: return "VMRESUME instruction";
6536 case VMX_EXIT_VMWRITE: return "VMWRITE instruction";
6537 case VMX_EXIT_VMXOFF: return "VMXOFF instruction";
6538 case VMX_EXIT_VMXON: return "VMXON instruction";
6539 case VMX_EXIT_CR_ACCESS: return "CR access";
6540 case VMX_EXIT_MOV_DR: return "MOV DR instruction";
6541 case VMX_EXIT_IO: return "I/O instruction";
6542 case VMX_EXIT_RDMSR: return "RDMSR instruction";
6543 case VMX_EXIT_WRMSR: return "WRMSR instruction";
6544 case VMX_EXIT_ENTRY_FAILED_GUEST_STATE: return "guest state invalid";
6545 case VMX_EXIT_ENTRY_FAILED_MSR_LOAD: return "MSR load failed";
6546 case VMX_EXIT_MWAIT: return "MWAIT instruction";
6547 case VMX_EXIT_MTF: return "monitor trap flag";
6548 case VMX_EXIT_MONITOR: return "MONITOR instruction";
6549 case VMX_EXIT_PAUSE: return "PAUSE instruction";
6550 case VMX_EXIT_ENTRY_FAILED_MCE: return "MCE during entry";
6551 case VMX_EXIT_TPR_BELOW_THRESHOLD: return "TPR below threshold";
6552 case VMX_EXIT_APIC_ACCESS: return "APIC access";
6553 case VMX_EXIT_VIRTUALIZED_EOI: return "virtualized EOI";
6554 case VMX_EXIT_GDTR_IDTR: return "GDTR/IDTR access";
6555 case VMX_EXIT_LDTR_TR: return "LDTR/TR access";
6556 case VMX_EXIT_EPT_VIOLATION: return "EPT violation";
6557 case VMX_EXIT_EPT_MISCONFIGURATION: return "EPT misconfiguration";
6558 case VMX_EXIT_INVEPT: return "INVEPT instruction";
6559 case VMX_EXIT_RDTSCP: return "RDTSCP instruction";
6560 case VMX_EXIT_VMX_PREEMPTION_TIMER_EXPIRED:
6561 return "preemption timer expired";
6562 case VMX_EXIT_INVVPID: return "INVVPID instruction";
6563 case VMX_EXIT_WBINVD: return "WBINVD instruction";
6564 case VMX_EXIT_XSETBV: return "XSETBV instruction";
6565 case VMX_EXIT_APIC_WRITE: return "APIC write";
6566 case VMX_EXIT_RDRAND: return "RDRAND instruction";
6567 case VMX_EXIT_INVPCID: return "INVPCID instruction";
6568 case VMX_EXIT_VMFUNC: return "VMFUNC instruction";
6569 case VMX_EXIT_RDSEED: return "RDSEED instruction";
6570 case VMX_EXIT_XSAVES: return "XSAVES instruction";
6571 case VMX_EXIT_XRSTORS: return "XRSTORS instruction";
6572 default: return "unknown";
6573 }
6574 }
6575
6576 /*
6577 * svm_exit_reason_decode
6578 *
6579 * Returns a human readable string describing exit type 'code'
6580 */
6581 const char *
svm_exit_reason_decode(uint32_t code)6582 svm_exit_reason_decode(uint32_t code)
6583 {
6584 switch (code) {
6585 case SVM_VMEXIT_CR0_READ: return "CR0 read"; /* 0x00 */
6586 case SVM_VMEXIT_CR1_READ: return "CR1 read"; /* 0x01 */
6587 case SVM_VMEXIT_CR2_READ: return "CR2 read"; /* 0x02 */
6588 case SVM_VMEXIT_CR3_READ: return "CR3 read"; /* 0x03 */
6589 case SVM_VMEXIT_CR4_READ: return "CR4 read"; /* 0x04 */
6590 case SVM_VMEXIT_CR5_READ: return "CR5 read"; /* 0x05 */
6591 case SVM_VMEXIT_CR6_READ: return "CR6 read"; /* 0x06 */
6592 case SVM_VMEXIT_CR7_READ: return "CR7 read"; /* 0x07 */
6593 case SVM_VMEXIT_CR8_READ: return "CR8 read"; /* 0x08 */
6594 case SVM_VMEXIT_CR9_READ: return "CR9 read"; /* 0x09 */
6595 case SVM_VMEXIT_CR10_READ: return "CR10 read"; /* 0x0A */
6596 case SVM_VMEXIT_CR11_READ: return "CR11 read"; /* 0x0B */
6597 case SVM_VMEXIT_CR12_READ: return "CR12 read"; /* 0x0C */
6598 case SVM_VMEXIT_CR13_READ: return "CR13 read"; /* 0x0D */
6599 case SVM_VMEXIT_CR14_READ: return "CR14 read"; /* 0x0E */
6600 case SVM_VMEXIT_CR15_READ: return "CR15 read"; /* 0x0F */
6601 case SVM_VMEXIT_CR0_WRITE: return "CR0 write"; /* 0x10 */
6602 case SVM_VMEXIT_CR1_WRITE: return "CR1 write"; /* 0x11 */
6603 case SVM_VMEXIT_CR2_WRITE: return "CR2 write"; /* 0x12 */
6604 case SVM_VMEXIT_CR3_WRITE: return "CR3 write"; /* 0x13 */
6605 case SVM_VMEXIT_CR4_WRITE: return "CR4 write"; /* 0x14 */
6606 case SVM_VMEXIT_CR5_WRITE: return "CR5 write"; /* 0x15 */
6607 case SVM_VMEXIT_CR6_WRITE: return "CR6 write"; /* 0x16 */
6608 case SVM_VMEXIT_CR7_WRITE: return "CR7 write"; /* 0x17 */
6609 case SVM_VMEXIT_CR8_WRITE: return "CR8 write"; /* 0x18 */
6610 case SVM_VMEXIT_CR9_WRITE: return "CR9 write"; /* 0x19 */
6611 case SVM_VMEXIT_CR10_WRITE: return "CR10 write"; /* 0x1A */
6612 case SVM_VMEXIT_CR11_WRITE: return "CR11 write"; /* 0x1B */
6613 case SVM_VMEXIT_CR12_WRITE: return "CR12 write"; /* 0x1C */
6614 case SVM_VMEXIT_CR13_WRITE: return "CR13 write"; /* 0x1D */
6615 case SVM_VMEXIT_CR14_WRITE: return "CR14 write"; /* 0x1E */
6616 case SVM_VMEXIT_CR15_WRITE: return "CR15 write"; /* 0x1F */
6617 case SVM_VMEXIT_DR0_READ: return "DR0 read"; /* 0x20 */
6618 case SVM_VMEXIT_DR1_READ: return "DR1 read"; /* 0x21 */
6619 case SVM_VMEXIT_DR2_READ: return "DR2 read"; /* 0x22 */
6620 case SVM_VMEXIT_DR3_READ: return "DR3 read"; /* 0x23 */
6621 case SVM_VMEXIT_DR4_READ: return "DR4 read"; /* 0x24 */
6622 case SVM_VMEXIT_DR5_READ: return "DR5 read"; /* 0x25 */
6623 case SVM_VMEXIT_DR6_READ: return "DR6 read"; /* 0x26 */
6624 case SVM_VMEXIT_DR7_READ: return "DR7 read"; /* 0x27 */
6625 case SVM_VMEXIT_DR8_READ: return "DR8 read"; /* 0x28 */
6626 case SVM_VMEXIT_DR9_READ: return "DR9 read"; /* 0x29 */
6627 case SVM_VMEXIT_DR10_READ: return "DR10 read"; /* 0x2A */
6628 case SVM_VMEXIT_DR11_READ: return "DR11 read"; /* 0x2B */
6629 case SVM_VMEXIT_DR12_READ: return "DR12 read"; /* 0x2C */
6630 case SVM_VMEXIT_DR13_READ: return "DR13 read"; /* 0x2D */
6631 case SVM_VMEXIT_DR14_READ: return "DR14 read"; /* 0x2E */
6632 case SVM_VMEXIT_DR15_READ: return "DR15 read"; /* 0x2F */
6633 case SVM_VMEXIT_DR0_WRITE: return "DR0 write"; /* 0x30 */
6634 case SVM_VMEXIT_DR1_WRITE: return "DR1 write"; /* 0x31 */
6635 case SVM_VMEXIT_DR2_WRITE: return "DR2 write"; /* 0x32 */
6636 case SVM_VMEXIT_DR3_WRITE: return "DR3 write"; /* 0x33 */
6637 case SVM_VMEXIT_DR4_WRITE: return "DR4 write"; /* 0x34 */
6638 case SVM_VMEXIT_DR5_WRITE: return "DR5 write"; /* 0x35 */
6639 case SVM_VMEXIT_DR6_WRITE: return "DR6 write"; /* 0x36 */
6640 case SVM_VMEXIT_DR7_WRITE: return "DR7 write"; /* 0x37 */
6641 case SVM_VMEXIT_DR8_WRITE: return "DR8 write"; /* 0x38 */
6642 case SVM_VMEXIT_DR9_WRITE: return "DR9 write"; /* 0x39 */
6643 case SVM_VMEXIT_DR10_WRITE: return "DR10 write"; /* 0x3A */
6644 case SVM_VMEXIT_DR11_WRITE: return "DR11 write"; /* 0x3B */
6645 case SVM_VMEXIT_DR12_WRITE: return "DR12 write"; /* 0x3C */
6646 case SVM_VMEXIT_DR13_WRITE: return "DR13 write"; /* 0x3D */
6647 case SVM_VMEXIT_DR14_WRITE: return "DR14 write"; /* 0x3E */
6648 case SVM_VMEXIT_DR15_WRITE: return "DR15 write"; /* 0x3F */
6649 case SVM_VMEXIT_EXCP0: return "Exception 0x00"; /* 0x40 */
6650 case SVM_VMEXIT_EXCP1: return "Exception 0x01"; /* 0x41 */
6651 case SVM_VMEXIT_EXCP2: return "Exception 0x02"; /* 0x42 */
6652 case SVM_VMEXIT_EXCP3: return "Exception 0x03"; /* 0x43 */
6653 case SVM_VMEXIT_EXCP4: return "Exception 0x04"; /* 0x44 */
6654 case SVM_VMEXIT_EXCP5: return "Exception 0x05"; /* 0x45 */
6655 case SVM_VMEXIT_EXCP6: return "Exception 0x06"; /* 0x46 */
6656 case SVM_VMEXIT_EXCP7: return "Exception 0x07"; /* 0x47 */
6657 case SVM_VMEXIT_EXCP8: return "Exception 0x08"; /* 0x48 */
6658 case SVM_VMEXIT_EXCP9: return "Exception 0x09"; /* 0x49 */
6659 case SVM_VMEXIT_EXCP10: return "Exception 0x0A"; /* 0x4A */
6660 case SVM_VMEXIT_EXCP11: return "Exception 0x0B"; /* 0x4B */
6661 case SVM_VMEXIT_EXCP12: return "Exception 0x0C"; /* 0x4C */
6662 case SVM_VMEXIT_EXCP13: return "Exception 0x0D"; /* 0x4D */
6663 case SVM_VMEXIT_EXCP14: return "Exception 0x0E"; /* 0x4E */
6664 case SVM_VMEXIT_EXCP15: return "Exception 0x0F"; /* 0x4F */
6665 case SVM_VMEXIT_EXCP16: return "Exception 0x10"; /* 0x50 */
6666 case SVM_VMEXIT_EXCP17: return "Exception 0x11"; /* 0x51 */
6667 case SVM_VMEXIT_EXCP18: return "Exception 0x12"; /* 0x52 */
6668 case SVM_VMEXIT_EXCP19: return "Exception 0x13"; /* 0x53 */
6669 case SVM_VMEXIT_EXCP20: return "Exception 0x14"; /* 0x54 */
6670 case SVM_VMEXIT_EXCP21: return "Exception 0x15"; /* 0x55 */
6671 case SVM_VMEXIT_EXCP22: return "Exception 0x16"; /* 0x56 */
6672 case SVM_VMEXIT_EXCP23: return "Exception 0x17"; /* 0x57 */
6673 case SVM_VMEXIT_EXCP24: return "Exception 0x18"; /* 0x58 */
6674 case SVM_VMEXIT_EXCP25: return "Exception 0x19"; /* 0x59 */
6675 case SVM_VMEXIT_EXCP26: return "Exception 0x1A"; /* 0x5A */
6676 case SVM_VMEXIT_EXCP27: return "Exception 0x1B"; /* 0x5B */
6677 case SVM_VMEXIT_EXCP28: return "Exception 0x1C"; /* 0x5C */
6678 case SVM_VMEXIT_EXCP29: return "Exception 0x1D"; /* 0x5D */
6679 case SVM_VMEXIT_EXCP30: return "Exception 0x1E"; /* 0x5E */
6680 case SVM_VMEXIT_EXCP31: return "Exception 0x1F"; /* 0x5F */
6681 case SVM_VMEXIT_INTR: return "External interrupt"; /* 0x60 */
6682 case SVM_VMEXIT_NMI: return "NMI"; /* 0x61 */
6683 case SVM_VMEXIT_SMI: return "SMI"; /* 0x62 */
6684 case SVM_VMEXIT_INIT: return "INIT"; /* 0x63 */
6685 case SVM_VMEXIT_VINTR: return "Interrupt window"; /* 0x64 */
6686 case SVM_VMEXIT_CR0_SEL_WRITE: return "Sel CR0 write"; /* 0x65 */
6687 case SVM_VMEXIT_IDTR_READ: return "IDTR read"; /* 0x66 */
6688 case SVM_VMEXIT_GDTR_READ: return "GDTR read"; /* 0x67 */
6689 case SVM_VMEXIT_LDTR_READ: return "LDTR read"; /* 0x68 */
6690 case SVM_VMEXIT_TR_READ: return "TR read"; /* 0x69 */
6691 case SVM_VMEXIT_IDTR_WRITE: return "IDTR write"; /* 0x6A */
6692 case SVM_VMEXIT_GDTR_WRITE: return "GDTR write"; /* 0x6B */
6693 case SVM_VMEXIT_LDTR_WRITE: return "LDTR write"; /* 0x6C */
6694 case SVM_VMEXIT_TR_WRITE: return "TR write"; /* 0x6D */
6695 case SVM_VMEXIT_RDTSC: return "RDTSC instruction"; /* 0x6E */
6696 case SVM_VMEXIT_RDPMC: return "RDPMC instruction"; /* 0x6F */
6697 case SVM_VMEXIT_PUSHF: return "PUSHF instruction"; /* 0x70 */
6698 case SVM_VMEXIT_POPF: return "POPF instruction"; /* 0x71 */
6699 case SVM_VMEXIT_CPUID: return "CPUID instruction"; /* 0x72 */
6700 case SVM_VMEXIT_RSM: return "RSM instruction"; /* 0x73 */
6701 case SVM_VMEXIT_IRET: return "IRET instruction"; /* 0x74 */
6702 case SVM_VMEXIT_SWINT: return "SWINT instruction"; /* 0x75 */
6703 case SVM_VMEXIT_INVD: return "INVD instruction"; /* 0x76 */
6704 case SVM_VMEXIT_PAUSE: return "PAUSE instruction"; /* 0x77 */
6705 case SVM_VMEXIT_HLT: return "HLT instruction"; /* 0x78 */
6706 case SVM_VMEXIT_INVLPG: return "INVLPG instruction"; /* 0x79 */
6707 case SVM_VMEXIT_INVLPGA: return "INVLPGA instruction"; /* 0x7A */
6708 case SVM_VMEXIT_IOIO: return "I/O instruction"; /* 0x7B */
6709 case SVM_VMEXIT_MSR: return "RDMSR/WRMSR instruction"; /* 0x7C */
6710 case SVM_VMEXIT_TASK_SWITCH: return "Task switch"; /* 0x7D */
6711 case SVM_VMEXIT_FERR_FREEZE: return "FERR_FREEZE"; /* 0x7E */
6712 case SVM_VMEXIT_SHUTDOWN: return "Triple fault"; /* 0x7F */
6713 case SVM_VMEXIT_VMRUN: return "VMRUN instruction"; /* 0x80 */
6714 case SVM_VMEXIT_VMMCALL: return "VMMCALL instruction"; /* 0x81 */
6715 case SVM_VMEXIT_VMLOAD: return "VMLOAD instruction"; /* 0x82 */
6716 case SVM_VMEXIT_VMSAVE: return "VMSAVE instruction"; /* 0x83 */
6717 case SVM_VMEXIT_STGI: return "STGI instruction"; /* 0x84 */
6718 case SVM_VMEXIT_CLGI: return "CLGI instruction"; /* 0x85 */
6719 case SVM_VMEXIT_SKINIT: return "SKINIT instruction"; /* 0x86 */
6720 case SVM_VMEXIT_RDTSCP: return "RDTSCP instruction"; /* 0x87 */
6721 case SVM_VMEXIT_ICEBP: return "ICEBP instruction"; /* 0x88 */
6722 case SVM_VMEXIT_WBINVD: return "WBINVD instruction"; /* 0x89 */
6723 case SVM_VMEXIT_MONITOR: return "MONITOR instruction"; /* 0x8A */
6724 case SVM_VMEXIT_MWAIT: return "MWAIT instruction"; /* 0x8B */
6725 case SVM_VMEXIT_MWAIT_CONDITIONAL: return "Cond MWAIT"; /* 0x8C */
6726 case SVM_VMEXIT_NPF: return "NPT violation"; /* 0x400 */
6727 default: return "unknown";
6728 }
6729 }
6730
6731 /*
6732 * vmx_instruction_error_decode
6733 *
6734 * Returns a human readable string describing the instruction error in 'code'
6735 */
6736 const char *
vmx_instruction_error_decode(uint32_t code)6737 vmx_instruction_error_decode(uint32_t code)
6738 {
6739 switch (code) {
6740 case 1: return "VMCALL: unsupported in VMX root";
6741 case 2: return "VMCLEAR: invalid paddr";
6742 case 3: return "VMCLEAR: VMXON pointer";
6743 case 4: return "VMLAUNCH: non-clear VMCS";
6744 case 5: return "VMRESUME: non-launched VMCS";
6745 case 6: return "VMRESUME: executed after VMXOFF";
6746 case 7: return "VM entry: invalid control field(s)";
6747 case 8: return "VM entry: invalid host state field(s)";
6748 case 9: return "VMPTRLD: invalid paddr";
6749 case 10: return "VMPTRLD: VMXON pointer";
6750 case 11: return "VMPTRLD: incorrect VMCS revid";
6751 case 12: return "VMREAD/VMWRITE: unsupported VMCS field";
6752 case 13: return "VMWRITE: RO VMCS field";
6753 case 15: return "VMXON: unsupported in VMX root";
6754 case 20: return "VMCALL: invalid VM exit control fields";
6755 case 26: return "VM entry: blocked by MOV SS";
6756 case 28: return "Invalid operand to INVEPT/INVVPID";
6757 case 0x80000021: return "VM entry: invalid guest state";
6758 case 0x80000022: return "VM entry: failure due to MSR loading";
6759 case 0x80000029: return "VM entry: machine-check event";
6760 default: return "unknown";
6761 }
6762 }
6763
6764 /*
6765 * vcpu_state_decode
6766 *
6767 * Returns a human readable string describing the vcpu state in 'state'.
6768 */
6769 const char *
vcpu_state_decode(u_int state)6770 vcpu_state_decode(u_int state)
6771 {
6772 switch (state) {
6773 case VCPU_STATE_STOPPED: return "stopped";
6774 case VCPU_STATE_RUNNING: return "running";
6775 case VCPU_STATE_REQTERM: return "requesting termination";
6776 case VCPU_STATE_TERMINATED: return "terminated";
6777 case VCPU_STATE_UNKNOWN: return "unknown";
6778 default: return "invalid";
6779 }
6780 }
6781
6782 #ifdef VMM_DEBUG
6783 /*
6784 * dump_vcpu
6785 *
6786 * Dumps the VMX capabilities of vcpu 'vcpu'
6787 */
6788 void
dump_vcpu(struct vcpu * vcpu)6789 dump_vcpu(struct vcpu *vcpu)
6790 {
6791 printf("vcpu @ %p\n", vcpu);
6792 printf(" parent vm @ %p\n", vcpu->vc_parent);
6793 printf(" mode: ");
6794 if (vcpu->vc_virt_mode == VMM_MODE_EPT) {
6795 printf("VMX\n");
6796 printf(" pinbased ctls: 0x%llx\n",
6797 vcpu->vc_vmx_pinbased_ctls);
6798 printf(" true pinbased ctls: 0x%llx\n",
6799 vcpu->vc_vmx_true_pinbased_ctls);
6800 CTRL_DUMP(vcpu, PINBASED, EXTERNAL_INT_EXITING);
6801 CTRL_DUMP(vcpu, PINBASED, NMI_EXITING);
6802 CTRL_DUMP(vcpu, PINBASED, VIRTUAL_NMIS);
6803 CTRL_DUMP(vcpu, PINBASED, ACTIVATE_VMX_PREEMPTION_TIMER);
6804 CTRL_DUMP(vcpu, PINBASED, PROCESS_POSTED_INTERRUPTS);
6805 printf(" procbased ctls: 0x%llx\n",
6806 vcpu->vc_vmx_procbased_ctls);
6807 printf(" true procbased ctls: 0x%llx\n",
6808 vcpu->vc_vmx_true_procbased_ctls);
6809 CTRL_DUMP(vcpu, PROCBASED, INTERRUPT_WINDOW_EXITING);
6810 CTRL_DUMP(vcpu, PROCBASED, USE_TSC_OFFSETTING);
6811 CTRL_DUMP(vcpu, PROCBASED, HLT_EXITING);
6812 CTRL_DUMP(vcpu, PROCBASED, INVLPG_EXITING);
6813 CTRL_DUMP(vcpu, PROCBASED, MWAIT_EXITING);
6814 CTRL_DUMP(vcpu, PROCBASED, RDPMC_EXITING);
6815 CTRL_DUMP(vcpu, PROCBASED, RDTSC_EXITING);
6816 CTRL_DUMP(vcpu, PROCBASED, CR3_LOAD_EXITING);
6817 CTRL_DUMP(vcpu, PROCBASED, CR3_STORE_EXITING);
6818 CTRL_DUMP(vcpu, PROCBASED, CR8_LOAD_EXITING);
6819 CTRL_DUMP(vcpu, PROCBASED, CR8_STORE_EXITING);
6820 CTRL_DUMP(vcpu, PROCBASED, USE_TPR_SHADOW);
6821 CTRL_DUMP(vcpu, PROCBASED, NMI_WINDOW_EXITING);
6822 CTRL_DUMP(vcpu, PROCBASED, MOV_DR_EXITING);
6823 CTRL_DUMP(vcpu, PROCBASED, UNCONDITIONAL_IO_EXITING);
6824 CTRL_DUMP(vcpu, PROCBASED, USE_IO_BITMAPS);
6825 CTRL_DUMP(vcpu, PROCBASED, MONITOR_TRAP_FLAG);
6826 CTRL_DUMP(vcpu, PROCBASED, USE_MSR_BITMAPS);
6827 CTRL_DUMP(vcpu, PROCBASED, MONITOR_EXITING);
6828 CTRL_DUMP(vcpu, PROCBASED, PAUSE_EXITING);
6829 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS,
6830 IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1)) {
6831 printf(" procbased2 ctls: 0x%llx\n",
6832 vcpu->vc_vmx_procbased2_ctls);
6833 CTRL_DUMP(vcpu, PROCBASED2, VIRTUALIZE_APIC);
6834 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_EPT);
6835 CTRL_DUMP(vcpu, PROCBASED2, DESCRIPTOR_TABLE_EXITING);
6836 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_RDTSCP);
6837 CTRL_DUMP(vcpu, PROCBASED2, VIRTUALIZE_X2APIC_MODE);
6838 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_VPID);
6839 CTRL_DUMP(vcpu, PROCBASED2, WBINVD_EXITING);
6840 CTRL_DUMP(vcpu, PROCBASED2, UNRESTRICTED_GUEST);
6841 CTRL_DUMP(vcpu, PROCBASED2,
6842 APIC_REGISTER_VIRTUALIZATION);
6843 CTRL_DUMP(vcpu, PROCBASED2,
6844 VIRTUAL_INTERRUPT_DELIVERY);
6845 CTRL_DUMP(vcpu, PROCBASED2, PAUSE_LOOP_EXITING);
6846 CTRL_DUMP(vcpu, PROCBASED2, RDRAND_EXITING);
6847 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_INVPCID);
6848 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_VM_FUNCTIONS);
6849 CTRL_DUMP(vcpu, PROCBASED2, VMCS_SHADOWING);
6850 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_ENCLS_EXITING);
6851 CTRL_DUMP(vcpu, PROCBASED2, RDSEED_EXITING);
6852 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_PML);
6853 CTRL_DUMP(vcpu, PROCBASED2, EPT_VIOLATION_VE);
6854 CTRL_DUMP(vcpu, PROCBASED2, CONCEAL_VMX_FROM_PT);
6855 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_XSAVES_XRSTORS);
6856 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_TSC_SCALING);
6857 }
6858 printf(" entry ctls: 0x%llx\n",
6859 vcpu->vc_vmx_entry_ctls);
6860 printf(" true entry ctls: 0x%llx\n",
6861 vcpu->vc_vmx_true_entry_ctls);
6862 CTRL_DUMP(vcpu, ENTRY, LOAD_DEBUG_CONTROLS);
6863 CTRL_DUMP(vcpu, ENTRY, IA32E_MODE_GUEST);
6864 CTRL_DUMP(vcpu, ENTRY, ENTRY_TO_SMM);
6865 CTRL_DUMP(vcpu, ENTRY, DEACTIVATE_DUAL_MONITOR_TREATMENT);
6866 CTRL_DUMP(vcpu, ENTRY, LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY);
6867 CTRL_DUMP(vcpu, ENTRY, LOAD_IA32_PAT_ON_ENTRY);
6868 CTRL_DUMP(vcpu, ENTRY, LOAD_IA32_EFER_ON_ENTRY);
6869 CTRL_DUMP(vcpu, ENTRY, LOAD_IA32_BNDCFGS_ON_ENTRY);
6870 CTRL_DUMP(vcpu, ENTRY, CONCEAL_VM_ENTRIES_FROM_PT);
6871 printf(" exit ctls: 0x%llx\n",
6872 vcpu->vc_vmx_exit_ctls);
6873 printf(" true exit ctls: 0x%llx\n",
6874 vcpu->vc_vmx_true_exit_ctls);
6875 CTRL_DUMP(vcpu, EXIT, SAVE_DEBUG_CONTROLS);
6876 CTRL_DUMP(vcpu, EXIT, HOST_SPACE_ADDRESS_SIZE);
6877 CTRL_DUMP(vcpu, EXIT, LOAD_IA32_PERF_GLOBAL_CTRL_ON_EXIT);
6878 CTRL_DUMP(vcpu, EXIT, ACKNOWLEDGE_INTERRUPT_ON_EXIT);
6879 CTRL_DUMP(vcpu, EXIT, SAVE_IA32_PAT_ON_EXIT);
6880 CTRL_DUMP(vcpu, EXIT, LOAD_IA32_PAT_ON_EXIT);
6881 CTRL_DUMP(vcpu, EXIT, SAVE_IA32_EFER_ON_EXIT);
6882 CTRL_DUMP(vcpu, EXIT, LOAD_IA32_EFER_ON_EXIT);
6883 CTRL_DUMP(vcpu, EXIT, SAVE_VMX_PREEMPTION_TIMER);
6884 CTRL_DUMP(vcpu, EXIT, CLEAR_IA32_BNDCFGS_ON_EXIT);
6885 CTRL_DUMP(vcpu, EXIT, CONCEAL_VM_EXITS_FROM_PT);
6886 }
6887 }
6888
6889 /*
6890 * vmx_dump_vmcs_field
6891 *
6892 * Debug function to dump the contents of a single VMCS field
6893 *
6894 * Parameters:
6895 * fieldid: VMCS Field ID
6896 * msg: string to display
6897 */
6898 void
vmx_dump_vmcs_field(uint16_t fieldid,const char * msg)6899 vmx_dump_vmcs_field(uint16_t fieldid, const char *msg)
6900 {
6901 uint8_t width;
6902 uint64_t val;
6903
6904
6905 DPRINTF("%s (0x%04x): ", msg, fieldid);
6906 if (vmread(fieldid, &val))
6907 DPRINTF("???? ");
6908 else {
6909 /*
6910 * Field width encoding : bits 13:14
6911 *
6912 * 0: 16-bit
6913 * 1: 64-bit
6914 * 2: 32-bit
6915 * 3: natural width
6916 */
6917 width = (fieldid >> 13) & 0x3;
6918 switch (width) {
6919 case 0: DPRINTF("0x%04llx ", val); break;
6920 case 1:
6921 case 3: DPRINTF("0x%016llx ", val); break;
6922 case 2: DPRINTF("0x%08llx ", val);
6923 }
6924 }
6925 }
6926
6927 /*
6928 * vmx_dump_vmcs
6929 *
6930 * Debug function to dump the contents of the current VMCS.
6931 */
6932 void
vmx_dump_vmcs(struct vcpu * vcpu)6933 vmx_dump_vmcs(struct vcpu *vcpu)
6934 {
6935 int has_sec, i;
6936 uint32_t cr3_tgt_ct;
6937
6938 /* XXX save and load new vmcs, restore at end */
6939
6940 DPRINTF("--CURRENT VMCS STATE--\n");
6941 printf("VMCS launched: %s\n",
6942 (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED) ? "Yes" : "No");
6943 DPRINTF("VMXON revision : 0x%x\n",
6944 curcpu()->ci_vmm_cap.vcc_vmx.vmx_vmxon_revision);
6945 DPRINTF("CR0 fixed0: 0x%llx\n",
6946 curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0);
6947 DPRINTF("CR0 fixed1: 0x%llx\n",
6948 curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1);
6949 DPRINTF("CR4 fixed0: 0x%llx\n",
6950 curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0);
6951 DPRINTF("CR4 fixed1: 0x%llx\n",
6952 curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1);
6953 DPRINTF("MSR table size: 0x%x\n",
6954 512 * (curcpu()->ci_vmm_cap.vcc_vmx.vmx_msr_table_size + 1));
6955
6956 has_sec = vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS,
6957 IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1);
6958
6959 if (has_sec) {
6960 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
6961 IA32_VMX_ENABLE_VPID, 1)) {
6962 vmx_dump_vmcs_field(VMCS_GUEST_VPID, "VPID");
6963 }
6964 }
6965
6966 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PINBASED_CTLS,
6967 IA32_VMX_PROCESS_POSTED_INTERRUPTS, 1)) {
6968 vmx_dump_vmcs_field(VMCS_POSTED_INT_NOTIF_VECTOR,
6969 "Posted Int Notif Vec");
6970 }
6971
6972 if (has_sec) {
6973 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
6974 IA32_VMX_EPT_VIOLATION_VE, 1)) {
6975 vmx_dump_vmcs_field(VMCS_EPTP_INDEX, "EPTP idx");
6976 }
6977 }
6978
6979 DPRINTF("\n");
6980 vmx_dump_vmcs_field(VMCS_GUEST_IA32_ES_SEL, "G.ES");
6981 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CS_SEL, "G.CS");
6982 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SS_SEL, "G.SS");
6983 DPRINTF("\n");
6984 vmx_dump_vmcs_field(VMCS_GUEST_IA32_DS_SEL, "G.DS");
6985 vmx_dump_vmcs_field(VMCS_GUEST_IA32_FS_SEL, "G.FS");
6986 vmx_dump_vmcs_field(VMCS_GUEST_IA32_GS_SEL, "G.GS");
6987 DPRINTF("\n");
6988 vmx_dump_vmcs_field(VMCS_GUEST_IA32_LDTR_SEL, "LDTR");
6989 vmx_dump_vmcs_field(VMCS_GUEST_IA32_TR_SEL, "G.TR");
6990
6991 if (has_sec) {
6992 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
6993 IA32_VMX_VIRTUAL_INTERRUPT_DELIVERY, 1)) {
6994 vmx_dump_vmcs_field(VMCS_GUEST_INTERRUPT_STATUS,
6995 "Int sts");
6996 }
6997
6998 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
6999 IA32_VMX_ENABLE_PML, 1)) {
7000 vmx_dump_vmcs_field(VMCS_GUEST_PML_INDEX, "PML Idx");
7001 }
7002 }
7003
7004 DPRINTF("\n");
7005 vmx_dump_vmcs_field(VMCS_HOST_IA32_ES_SEL, "H.ES");
7006 vmx_dump_vmcs_field(VMCS_HOST_IA32_CS_SEL, "H.CS");
7007 vmx_dump_vmcs_field(VMCS_HOST_IA32_SS_SEL, "H.SS");
7008 DPRINTF("\n");
7009 vmx_dump_vmcs_field(VMCS_HOST_IA32_DS_SEL, "H.DS");
7010 vmx_dump_vmcs_field(VMCS_HOST_IA32_FS_SEL, "H.FS");
7011 vmx_dump_vmcs_field(VMCS_HOST_IA32_GS_SEL, "H.GS");
7012 DPRINTF("\n");
7013
7014 vmx_dump_vmcs_field(VMCS_IO_BITMAP_A, "I/O Bitmap A");
7015 DPRINTF("\n");
7016 vmx_dump_vmcs_field(VMCS_IO_BITMAP_B, "I/O Bitmap B");
7017 DPRINTF("\n");
7018
7019 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS,
7020 IA32_VMX_USE_MSR_BITMAPS, 1)) {
7021 vmx_dump_vmcs_field(VMCS_MSR_BITMAP_ADDRESS, "MSR Bitmap");
7022 DPRINTF("\n");
7023 }
7024
7025 vmx_dump_vmcs_field(VMCS_EXIT_STORE_MSR_ADDRESS, "Exit Store MSRs");
7026 DPRINTF("\n");
7027 vmx_dump_vmcs_field(VMCS_EXIT_LOAD_MSR_ADDRESS, "Exit Load MSRs");
7028 DPRINTF("\n");
7029 vmx_dump_vmcs_field(VMCS_ENTRY_LOAD_MSR_ADDRESS, "Entry Load MSRs");
7030 DPRINTF("\n");
7031 vmx_dump_vmcs_field(VMCS_EXECUTIVE_VMCS_POINTER, "Exec VMCS Ptr");
7032 DPRINTF("\n");
7033
7034 if (has_sec) {
7035 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7036 IA32_VMX_ENABLE_PML, 1)) {
7037 vmx_dump_vmcs_field(VMCS_PML_ADDRESS, "PML Addr");
7038 DPRINTF("\n");
7039 }
7040 }
7041
7042 vmx_dump_vmcs_field(VMCS_TSC_OFFSET, "TSC Offset");
7043 DPRINTF("\n");
7044
7045 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS,
7046 IA32_VMX_USE_TPR_SHADOW, 1)) {
7047 vmx_dump_vmcs_field(VMCS_VIRTUAL_APIC_ADDRESS,
7048 "Virtual APIC Addr");
7049 DPRINTF("\n");
7050 }
7051
7052 if (has_sec) {
7053 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7054 IA32_VMX_VIRTUALIZE_APIC, 1)) {
7055 vmx_dump_vmcs_field(VMCS_APIC_ACCESS_ADDRESS,
7056 "APIC Access Addr");
7057 DPRINTF("\n");
7058 }
7059 }
7060
7061 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PINBASED_CTLS,
7062 IA32_VMX_PROCESS_POSTED_INTERRUPTS, 1)) {
7063 vmx_dump_vmcs_field(VMCS_POSTED_INTERRUPT_DESC,
7064 "Posted Int Desc Addr");
7065 DPRINTF("\n");
7066 }
7067
7068 if (has_sec) {
7069 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7070 IA32_VMX_ENABLE_VM_FUNCTIONS, 1)) {
7071 vmx_dump_vmcs_field(VMCS_VM_FUNCTION_CONTROLS,
7072 "VM Function Controls");
7073 DPRINTF("\n");
7074 }
7075
7076 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7077 IA32_VMX_ENABLE_EPT, 1)) {
7078 vmx_dump_vmcs_field(VMCS_GUEST_IA32_EPTP,
7079 "EPT Pointer");
7080 DPRINTF("\n");
7081 }
7082
7083 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7084 IA32_VMX_VIRTUAL_INTERRUPT_DELIVERY, 1)) {
7085 vmx_dump_vmcs_field(VMCS_EOI_EXIT_BITMAP_0,
7086 "EOI Exit Bitmap 0");
7087 DPRINTF("\n");
7088 vmx_dump_vmcs_field(VMCS_EOI_EXIT_BITMAP_1,
7089 "EOI Exit Bitmap 1");
7090 DPRINTF("\n");
7091 vmx_dump_vmcs_field(VMCS_EOI_EXIT_BITMAP_2,
7092 "EOI Exit Bitmap 2");
7093 DPRINTF("\n");
7094 vmx_dump_vmcs_field(VMCS_EOI_EXIT_BITMAP_3,
7095 "EOI Exit Bitmap 3");
7096 DPRINTF("\n");
7097 }
7098
7099 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7100 IA32_VMX_VMCS_SHADOWING, 1)) {
7101 vmx_dump_vmcs_field(VMCS_VMREAD_BITMAP_ADDRESS,
7102 "VMREAD Bitmap Addr");
7103 DPRINTF("\n");
7104 vmx_dump_vmcs_field(VMCS_VMWRITE_BITMAP_ADDRESS,
7105 "VMWRITE Bitmap Addr");
7106 DPRINTF("\n");
7107 }
7108
7109 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7110 IA32_VMX_EPT_VIOLATION_VE, 1)) {
7111 vmx_dump_vmcs_field(VMCS_VIRTUALIZATION_EXC_ADDRESS,
7112 "#VE Addr");
7113 DPRINTF("\n");
7114 }
7115
7116 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7117 IA32_VMX_ENABLE_XSAVES_XRSTORS, 1)) {
7118 vmx_dump_vmcs_field(VMCS_XSS_EXITING_BITMAP,
7119 "XSS exiting bitmap addr");
7120 DPRINTF("\n");
7121 }
7122
7123 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7124 IA32_VMX_ENABLE_ENCLS_EXITING, 1)) {
7125 vmx_dump_vmcs_field(VMCS_ENCLS_EXITING_BITMAP,
7126 "Encls exiting bitmap addr");
7127 DPRINTF("\n");
7128 }
7129
7130 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7131 IA32_VMX_ENABLE_TSC_SCALING, 1)) {
7132 vmx_dump_vmcs_field(VMCS_TSC_MULTIPLIER,
7133 "TSC scaling factor");
7134 DPRINTF("\n");
7135 }
7136
7137 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7138 IA32_VMX_ENABLE_EPT, 1)) {
7139 vmx_dump_vmcs_field(VMCS_GUEST_PHYSICAL_ADDRESS,
7140 "Guest PA");
7141 DPRINTF("\n");
7142 }
7143 }
7144
7145 vmx_dump_vmcs_field(VMCS_LINK_POINTER, "VMCS Link Pointer");
7146 DPRINTF("\n");
7147 vmx_dump_vmcs_field(VMCS_GUEST_IA32_DEBUGCTL, "Guest DEBUGCTL");
7148 DPRINTF("\n");
7149
7150 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_ENTRY_CTLS,
7151 IA32_VMX_LOAD_IA32_PAT_ON_ENTRY, 1) ||
7152 vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS,
7153 IA32_VMX_SAVE_IA32_PAT_ON_EXIT, 1)) {
7154 vmx_dump_vmcs_field(VMCS_GUEST_IA32_PAT,
7155 "Guest PAT");
7156 DPRINTF("\n");
7157 }
7158
7159 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_ENTRY_CTLS,
7160 IA32_VMX_LOAD_IA32_EFER_ON_ENTRY, 1) ||
7161 vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS,
7162 IA32_VMX_SAVE_IA32_EFER_ON_EXIT, 1)) {
7163 vmx_dump_vmcs_field(VMCS_GUEST_IA32_EFER,
7164 "Guest EFER");
7165 DPRINTF("\n");
7166 }
7167
7168 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_ENTRY_CTLS,
7169 IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY, 1)) {
7170 vmx_dump_vmcs_field(VMCS_GUEST_IA32_PERF_GBL_CTRL,
7171 "Guest Perf Global Ctrl");
7172 DPRINTF("\n");
7173 }
7174
7175 if (has_sec) {
7176 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7177 IA32_VMX_ENABLE_EPT, 1)) {
7178 vmx_dump_vmcs_field(VMCS_GUEST_PDPTE0, "Guest PDPTE0");
7179 DPRINTF("\n");
7180 vmx_dump_vmcs_field(VMCS_GUEST_PDPTE1, "Guest PDPTE1");
7181 DPRINTF("\n");
7182 vmx_dump_vmcs_field(VMCS_GUEST_PDPTE2, "Guest PDPTE2");
7183 DPRINTF("\n");
7184 vmx_dump_vmcs_field(VMCS_GUEST_PDPTE3, "Guest PDPTE3");
7185 DPRINTF("\n");
7186 }
7187 }
7188
7189 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_ENTRY_CTLS,
7190 IA32_VMX_LOAD_IA32_BNDCFGS_ON_ENTRY, 1) ||
7191 vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS,
7192 IA32_VMX_CLEAR_IA32_BNDCFGS_ON_EXIT, 1)) {
7193 vmx_dump_vmcs_field(VMCS_GUEST_IA32_BNDCFGS,
7194 "Guest BNDCFGS");
7195 DPRINTF("\n");
7196 }
7197
7198 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS,
7199 IA32_VMX_LOAD_IA32_PAT_ON_EXIT, 1)) {
7200 vmx_dump_vmcs_field(VMCS_HOST_IA32_PAT,
7201 "Host PAT");
7202 DPRINTF("\n");
7203 }
7204
7205 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS,
7206 IA32_VMX_LOAD_IA32_EFER_ON_EXIT, 1)) {
7207 vmx_dump_vmcs_field(VMCS_HOST_IA32_EFER,
7208 "Host EFER");
7209 DPRINTF("\n");
7210 }
7211
7212 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS,
7213 IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_EXIT, 1)) {
7214 vmx_dump_vmcs_field(VMCS_HOST_IA32_PERF_GBL_CTRL,
7215 "Host Perf Global Ctrl");
7216 DPRINTF("\n");
7217 }
7218
7219 vmx_dump_vmcs_field(VMCS_PINBASED_CTLS, "Pinbased Ctrls");
7220 vmx_dump_vmcs_field(VMCS_PROCBASED_CTLS, "Procbased Ctrls");
7221 DPRINTF("\n");
7222 vmx_dump_vmcs_field(VMCS_EXCEPTION_BITMAP, "Exception Bitmap");
7223 vmx_dump_vmcs_field(VMCS_PF_ERROR_CODE_MASK, "#PF Err Code Mask");
7224 DPRINTF("\n");
7225 vmx_dump_vmcs_field(VMCS_PF_ERROR_CODE_MATCH, "#PF Err Code Match");
7226 vmx_dump_vmcs_field(VMCS_CR3_TARGET_COUNT, "CR3 Tgt Count");
7227 DPRINTF("\n");
7228 vmx_dump_vmcs_field(VMCS_EXIT_CTLS, "Exit Ctrls");
7229 vmx_dump_vmcs_field(VMCS_EXIT_MSR_STORE_COUNT, "Exit MSR Store Ct");
7230 DPRINTF("\n");
7231 vmx_dump_vmcs_field(VMCS_EXIT_MSR_LOAD_COUNT, "Exit MSR Load Ct");
7232 vmx_dump_vmcs_field(VMCS_ENTRY_CTLS, "Entry Ctrls");
7233 DPRINTF("\n");
7234 vmx_dump_vmcs_field(VMCS_ENTRY_MSR_LOAD_COUNT, "Entry MSR Load Ct");
7235 vmx_dump_vmcs_field(VMCS_ENTRY_INTERRUPTION_INFO, "Entry Int. Info");
7236 DPRINTF("\n");
7237 vmx_dump_vmcs_field(VMCS_ENTRY_EXCEPTION_ERROR_CODE,
7238 "Entry Ex. Err Code");
7239 vmx_dump_vmcs_field(VMCS_ENTRY_INSTRUCTION_LENGTH, "Entry Insn Len");
7240 DPRINTF("\n");
7241
7242 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS,
7243 IA32_VMX_USE_TPR_SHADOW, 1)) {
7244 vmx_dump_vmcs_field(VMCS_TPR_THRESHOLD, "TPR Threshold");
7245 DPRINTF("\n");
7246 }
7247
7248 if (has_sec) {
7249 vmx_dump_vmcs_field(VMCS_PROCBASED2_CTLS, "2ndary Ctrls");
7250 DPRINTF("\n");
7251 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7252 IA32_VMX_PAUSE_LOOP_EXITING, 1)) {
7253 vmx_dump_vmcs_field(VMCS_PLE_GAP, "PLE Gap");
7254 vmx_dump_vmcs_field(VMCS_PLE_WINDOW, "PLE Window");
7255 }
7256 DPRINTF("\n");
7257 }
7258
7259 vmx_dump_vmcs_field(VMCS_INSTRUCTION_ERROR, "Insn Error");
7260 vmx_dump_vmcs_field(VMCS_EXIT_REASON, "Exit Reason");
7261 DPRINTF("\n");
7262
7263 vmx_dump_vmcs_field(VMCS_EXIT_INTERRUPTION_INFO, "Exit Int. Info");
7264 vmx_dump_vmcs_field(VMCS_EXIT_INTERRUPTION_ERR_CODE,
7265 "Exit Int. Err Code");
7266 DPRINTF("\n");
7267
7268 vmx_dump_vmcs_field(VMCS_IDT_VECTORING_INFO, "IDT vect info");
7269 vmx_dump_vmcs_field(VMCS_IDT_VECTORING_ERROR_CODE,
7270 "IDT vect err code");
7271 DPRINTF("\n");
7272
7273 vmx_dump_vmcs_field(VMCS_INSTRUCTION_LENGTH, "Insn Len");
7274 vmx_dump_vmcs_field(VMCS_EXIT_INSTRUCTION_INFO, "Exit Insn Info");
7275 DPRINTF("\n");
7276
7277 vmx_dump_vmcs_field(VMCS_GUEST_IA32_ES_LIMIT, "G. ES Lim");
7278 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CS_LIMIT, "G. CS Lim");
7279 DPRINTF("\n");
7280
7281 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SS_LIMIT, "G. SS Lim");
7282 vmx_dump_vmcs_field(VMCS_GUEST_IA32_DS_LIMIT, "G. DS Lim");
7283 DPRINTF("\n");
7284
7285 vmx_dump_vmcs_field(VMCS_GUEST_IA32_FS_LIMIT, "G. FS Lim");
7286 vmx_dump_vmcs_field(VMCS_GUEST_IA32_GS_LIMIT, "G. GS Lim");
7287 DPRINTF("\n");
7288
7289 vmx_dump_vmcs_field(VMCS_GUEST_IA32_LDTR_LIMIT, "G. LDTR Lim");
7290 vmx_dump_vmcs_field(VMCS_GUEST_IA32_TR_LIMIT, "G. TR Lim");
7291 DPRINTF("\n");
7292
7293 vmx_dump_vmcs_field(VMCS_GUEST_IA32_GDTR_LIMIT, "G. GDTR Lim");
7294 vmx_dump_vmcs_field(VMCS_GUEST_IA32_IDTR_LIMIT, "G. IDTR Lim");
7295 DPRINTF("\n");
7296
7297 vmx_dump_vmcs_field(VMCS_GUEST_IA32_ES_AR, "G. ES AR");
7298 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CS_AR, "G. CS AR");
7299 DPRINTF("\n");
7300
7301 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SS_AR, "G. SS AR");
7302 vmx_dump_vmcs_field(VMCS_GUEST_IA32_DS_AR, "G. DS AR");
7303 DPRINTF("\n");
7304
7305 vmx_dump_vmcs_field(VMCS_GUEST_IA32_FS_AR, "G. FS AR");
7306 vmx_dump_vmcs_field(VMCS_GUEST_IA32_GS_AR, "G. GS AR");
7307 DPRINTF("\n");
7308
7309 vmx_dump_vmcs_field(VMCS_GUEST_IA32_LDTR_AR, "G. LDTR AR");
7310 vmx_dump_vmcs_field(VMCS_GUEST_IA32_TR_AR, "G. TR AR");
7311 DPRINTF("\n");
7312
7313 vmx_dump_vmcs_field(VMCS_GUEST_INTERRUPTIBILITY_ST, "G. Int St.");
7314 vmx_dump_vmcs_field(VMCS_GUEST_ACTIVITY_STATE, "G. Act St.");
7315 DPRINTF("\n");
7316
7317 vmx_dump_vmcs_field(VMCS_GUEST_SMBASE, "G. SMBASE");
7318 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SYSENTER_CS, "G. SYSENTER CS");
7319 DPRINTF("\n");
7320
7321 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PINBASED_CTLS,
7322 IA32_VMX_ACTIVATE_VMX_PREEMPTION_TIMER, 1)) {
7323 vmx_dump_vmcs_field(VMCS_VMX_PREEMPTION_TIMER_VAL,
7324 "VMX Preempt Timer");
7325 DPRINTF("\n");
7326 }
7327
7328 vmx_dump_vmcs_field(VMCS_HOST_IA32_SYSENTER_CS, "H. SYSENTER CS");
7329 DPRINTF("\n");
7330
7331 vmx_dump_vmcs_field(VMCS_CR0_MASK, "CR0 Mask");
7332 DPRINTF("\n");
7333 vmx_dump_vmcs_field(VMCS_CR4_MASK, "CR4 Mask");
7334 DPRINTF("\n");
7335
7336 vmx_dump_vmcs_field(VMCS_CR0_READ_SHADOW, "CR0 RD Shadow");
7337 DPRINTF("\n");
7338 vmx_dump_vmcs_field(VMCS_CR4_READ_SHADOW, "CR4 RD Shadow");
7339 DPRINTF("\n");
7340
7341 /* We assume all CPUs have the same max CR3 target ct */
7342 cr3_tgt_ct = curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr3_tgt_count;
7343 DPRINTF("Max CR3 target count: 0x%x\n", cr3_tgt_ct);
7344 if (cr3_tgt_ct <= VMX_MAX_CR3_TARGETS) {
7345 for (i = 0 ; i < cr3_tgt_ct; i++) {
7346 vmx_dump_vmcs_field(VMCS_CR3_TARGET_0 + (2 * i),
7347 "CR3 Target");
7348 DPRINTF("\n");
7349 }
7350 } else {
7351 DPRINTF("(Bogus CR3 Target Count > %d", VMX_MAX_CR3_TARGETS);
7352 }
7353
7354 vmx_dump_vmcs_field(VMCS_GUEST_EXIT_QUALIFICATION, "G. Exit Qual");
7355 DPRINTF("\n");
7356 vmx_dump_vmcs_field(VMCS_IO_RCX, "I/O RCX");
7357 DPRINTF("\n");
7358 vmx_dump_vmcs_field(VMCS_IO_RSI, "I/O RSI");
7359 DPRINTF("\n");
7360 vmx_dump_vmcs_field(VMCS_IO_RDI, "I/O RDI");
7361 DPRINTF("\n");
7362 vmx_dump_vmcs_field(VMCS_IO_RIP, "I/O RIP");
7363 DPRINTF("\n");
7364 vmx_dump_vmcs_field(VMCS_GUEST_LINEAR_ADDRESS, "G. Lin Addr");
7365 DPRINTF("\n");
7366 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CR0, "G. CR0");
7367 DPRINTF("\n");
7368 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CR3, "G. CR3");
7369 DPRINTF("\n");
7370 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CR4, "G. CR4");
7371 DPRINTF("\n");
7372 vmx_dump_vmcs_field(VMCS_GUEST_IA32_ES_BASE, "G. ES Base");
7373 DPRINTF("\n");
7374 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CS_BASE, "G. CS Base");
7375 DPRINTF("\n");
7376 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SS_BASE, "G. SS Base");
7377 DPRINTF("\n");
7378 vmx_dump_vmcs_field(VMCS_GUEST_IA32_DS_BASE, "G. DS Base");
7379 DPRINTF("\n");
7380 vmx_dump_vmcs_field(VMCS_GUEST_IA32_FS_BASE, "G. FS Base");
7381 DPRINTF("\n");
7382 vmx_dump_vmcs_field(VMCS_GUEST_IA32_GS_BASE, "G. GS Base");
7383 DPRINTF("\n");
7384 vmx_dump_vmcs_field(VMCS_GUEST_IA32_LDTR_BASE, "G. LDTR Base");
7385 DPRINTF("\n");
7386 vmx_dump_vmcs_field(VMCS_GUEST_IA32_TR_BASE, "G. TR Base");
7387 DPRINTF("\n");
7388 vmx_dump_vmcs_field(VMCS_GUEST_IA32_GDTR_BASE, "G. GDTR Base");
7389 DPRINTF("\n");
7390 vmx_dump_vmcs_field(VMCS_GUEST_IA32_IDTR_BASE, "G. IDTR Base");
7391 DPRINTF("\n");
7392 vmx_dump_vmcs_field(VMCS_GUEST_IA32_DR7, "G. DR7");
7393 DPRINTF("\n");
7394 vmx_dump_vmcs_field(VMCS_GUEST_IA32_RSP, "G. RSP");
7395 DPRINTF("\n");
7396 vmx_dump_vmcs_field(VMCS_GUEST_IA32_RIP, "G. RIP");
7397 DPRINTF("\n");
7398 vmx_dump_vmcs_field(VMCS_GUEST_IA32_RFLAGS, "G. RFLAGS");
7399 DPRINTF("\n");
7400 vmx_dump_vmcs_field(VMCS_GUEST_PENDING_DBG_EXC, "G. Pend Dbg Exc");
7401 DPRINTF("\n");
7402 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SYSENTER_ESP, "G. SYSENTER ESP");
7403 DPRINTF("\n");
7404 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SYSENTER_EIP, "G. SYSENTER EIP");
7405 DPRINTF("\n");
7406 vmx_dump_vmcs_field(VMCS_HOST_IA32_CR0, "H. CR0");
7407 DPRINTF("\n");
7408 vmx_dump_vmcs_field(VMCS_HOST_IA32_CR3, "H. CR3");
7409 DPRINTF("\n");
7410 vmx_dump_vmcs_field(VMCS_HOST_IA32_CR4, "H. CR4");
7411 DPRINTF("\n");
7412 vmx_dump_vmcs_field(VMCS_HOST_IA32_FS_BASE, "H. FS Base");
7413 DPRINTF("\n");
7414 vmx_dump_vmcs_field(VMCS_HOST_IA32_GS_BASE, "H. GS Base");
7415 DPRINTF("\n");
7416 vmx_dump_vmcs_field(VMCS_HOST_IA32_TR_BASE, "H. TR Base");
7417 DPRINTF("\n");
7418 vmx_dump_vmcs_field(VMCS_HOST_IA32_GDTR_BASE, "H. GDTR Base");
7419 DPRINTF("\n");
7420 vmx_dump_vmcs_field(VMCS_HOST_IA32_IDTR_BASE, "H. IDTR Base");
7421 DPRINTF("\n");
7422 vmx_dump_vmcs_field(VMCS_HOST_IA32_SYSENTER_ESP, "H. SYSENTER ESP");
7423 DPRINTF("\n");
7424 vmx_dump_vmcs_field(VMCS_HOST_IA32_SYSENTER_EIP, "H. SYSENTER EIP");
7425 DPRINTF("\n");
7426 vmx_dump_vmcs_field(VMCS_HOST_IA32_RSP, "H. RSP");
7427 DPRINTF("\n");
7428 vmx_dump_vmcs_field(VMCS_HOST_IA32_RIP, "H. RIP");
7429 DPRINTF("\n");
7430 }
7431
7432 /*
7433 * vmx_vcpu_dump_regs
7434 *
7435 * Debug function to print vcpu regs from the current vcpu
7436 * note - vmcs for 'vcpu' must be on this pcpu.
7437 *
7438 * Parameters:
7439 * vcpu - vcpu whose registers should be dumped
7440 */
7441 void
vmx_vcpu_dump_regs(struct vcpu * vcpu)7442 vmx_vcpu_dump_regs(struct vcpu *vcpu)
7443 {
7444 uint64_t r;
7445 int i;
7446 struct vmx_msr_store *msr_store;
7447
7448 /* XXX reformat this for 32 bit guest as needed */
7449 DPRINTF("vcpu @ %p in %s mode\n", vcpu, vmm_decode_cpu_mode(vcpu));
7450 i = vmm_get_guest_cpu_cpl(vcpu);
7451 if (i == -1)
7452 DPRINTF(" CPL=unknown\n");
7453 else
7454 DPRINTF(" CPL=%d\n", i);
7455 DPRINTF(" rax=0x%016llx rbx=0x%016llx rcx=0x%016llx\n",
7456 vcpu->vc_gueststate.vg_rax, vcpu->vc_gueststate.vg_rbx,
7457 vcpu->vc_gueststate.vg_rcx);
7458 DPRINTF(" rdx=0x%016llx rbp=0x%016llx rdi=0x%016llx\n",
7459 vcpu->vc_gueststate.vg_rdx, vcpu->vc_gueststate.vg_rbp,
7460 vcpu->vc_gueststate.vg_rdi);
7461 DPRINTF(" rsi=0x%016llx r8=0x%016llx r9=0x%016llx\n",
7462 vcpu->vc_gueststate.vg_rsi, vcpu->vc_gueststate.vg_r8,
7463 vcpu->vc_gueststate.vg_r9);
7464 DPRINTF(" r10=0x%016llx r11=0x%016llx r12=0x%016llx\n",
7465 vcpu->vc_gueststate.vg_r10, vcpu->vc_gueststate.vg_r11,
7466 vcpu->vc_gueststate.vg_r12);
7467 DPRINTF(" r13=0x%016llx r14=0x%016llx r15=0x%016llx\n",
7468 vcpu->vc_gueststate.vg_r13, vcpu->vc_gueststate.vg_r14,
7469 vcpu->vc_gueststate.vg_r15);
7470
7471 DPRINTF(" rip=0x%016llx rsp=", vcpu->vc_gueststate.vg_rip);
7472 if (vmread(VMCS_GUEST_IA32_RSP, &r))
7473 DPRINTF("(error reading)\n");
7474 else
7475 DPRINTF("0x%016llx\n", r);
7476
7477 DPRINTF(" rflags=");
7478 if (vmread(VMCS_GUEST_IA32_RFLAGS, &r))
7479 DPRINTF("(error reading)\n");
7480 else {
7481 DPRINTF("0x%016llx ", r);
7482 vmm_decode_rflags(r);
7483 }
7484
7485 DPRINTF(" cr0=");
7486 if (vmread(VMCS_GUEST_IA32_CR0, &r))
7487 DPRINTF("(error reading)\n");
7488 else {
7489 DPRINTF("0x%016llx ", r);
7490 vmm_decode_cr0(r);
7491 }
7492
7493 DPRINTF(" cr2=0x%016llx\n", vcpu->vc_gueststate.vg_cr2);
7494
7495 DPRINTF(" cr3=");
7496 if (vmread(VMCS_GUEST_IA32_CR3, &r))
7497 DPRINTF("(error reading)\n");
7498 else {
7499 DPRINTF("0x%016llx ", r);
7500 vmm_decode_cr3(r);
7501 }
7502
7503 DPRINTF(" cr4=");
7504 if (vmread(VMCS_GUEST_IA32_CR4, &r))
7505 DPRINTF("(error reading)\n");
7506 else {
7507 DPRINTF("0x%016llx ", r);
7508 vmm_decode_cr4(r);
7509 }
7510
7511 DPRINTF(" --Guest Segment Info--\n");
7512
7513 DPRINTF(" cs=");
7514 if (vmread(VMCS_GUEST_IA32_CS_SEL, &r))
7515 DPRINTF("(error reading)");
7516 else
7517 DPRINTF("0x%04llx rpl=%lld", r, r & 0x3);
7518
7519 DPRINTF(" base=");
7520 if (vmread(VMCS_GUEST_IA32_CS_BASE, &r))
7521 DPRINTF("(error reading)");
7522 else
7523 DPRINTF("0x%016llx", r);
7524
7525 DPRINTF(" limit=");
7526 if (vmread(VMCS_GUEST_IA32_CS_LIMIT, &r))
7527 DPRINTF("(error reading)");
7528 else
7529 DPRINTF("0x%016llx", r);
7530
7531 DPRINTF(" a/r=");
7532 if (vmread(VMCS_GUEST_IA32_CS_AR, &r))
7533 DPRINTF("(error reading)\n");
7534 else {
7535 DPRINTF("0x%04llx\n ", r);
7536 vmm_segment_desc_decode(r);
7537 }
7538
7539 DPRINTF(" ds=");
7540 if (vmread(VMCS_GUEST_IA32_DS_SEL, &r))
7541 DPRINTF("(error reading)");
7542 else
7543 DPRINTF("0x%04llx rpl=%lld", r, r & 0x3);
7544
7545 DPRINTF(" base=");
7546 if (vmread(VMCS_GUEST_IA32_DS_BASE, &r))
7547 DPRINTF("(error reading)");
7548 else
7549 DPRINTF("0x%016llx", r);
7550
7551 DPRINTF(" limit=");
7552 if (vmread(VMCS_GUEST_IA32_DS_LIMIT, &r))
7553 DPRINTF("(error reading)");
7554 else
7555 DPRINTF("0x%016llx", r);
7556
7557 DPRINTF(" a/r=");
7558 if (vmread(VMCS_GUEST_IA32_DS_AR, &r))
7559 DPRINTF("(error reading)\n");
7560 else {
7561 DPRINTF("0x%04llx\n ", r);
7562 vmm_segment_desc_decode(r);
7563 }
7564
7565 DPRINTF(" es=");
7566 if (vmread(VMCS_GUEST_IA32_ES_SEL, &r))
7567 DPRINTF("(error reading)");
7568 else
7569 DPRINTF("0x%04llx rpl=%lld", r, r & 0x3);
7570
7571 DPRINTF(" base=");
7572 if (vmread(VMCS_GUEST_IA32_ES_BASE, &r))
7573 DPRINTF("(error reading)");
7574 else
7575 DPRINTF("0x%016llx", r);
7576
7577 DPRINTF(" limit=");
7578 if (vmread(VMCS_GUEST_IA32_ES_LIMIT, &r))
7579 DPRINTF("(error reading)");
7580 else
7581 DPRINTF("0x%016llx", r);
7582
7583 DPRINTF(" a/r=");
7584 if (vmread(VMCS_GUEST_IA32_ES_AR, &r))
7585 DPRINTF("(error reading)\n");
7586 else {
7587 DPRINTF("0x%04llx\n ", r);
7588 vmm_segment_desc_decode(r);
7589 }
7590
7591 DPRINTF(" fs=");
7592 if (vmread(VMCS_GUEST_IA32_FS_SEL, &r))
7593 DPRINTF("(error reading)");
7594 else
7595 DPRINTF("0x%04llx rpl=%lld", r, r & 0x3);
7596
7597 DPRINTF(" base=");
7598 if (vmread(VMCS_GUEST_IA32_FS_BASE, &r))
7599 DPRINTF("(error reading)");
7600 else
7601 DPRINTF("0x%016llx", r);
7602
7603 DPRINTF(" limit=");
7604 if (vmread(VMCS_GUEST_IA32_FS_LIMIT, &r))
7605 DPRINTF("(error reading)");
7606 else
7607 DPRINTF("0x%016llx", r);
7608
7609 DPRINTF(" a/r=");
7610 if (vmread(VMCS_GUEST_IA32_FS_AR, &r))
7611 DPRINTF("(error reading)\n");
7612 else {
7613 DPRINTF("0x%04llx\n ", r);
7614 vmm_segment_desc_decode(r);
7615 }
7616
7617 DPRINTF(" gs=");
7618 if (vmread(VMCS_GUEST_IA32_GS_SEL, &r))
7619 DPRINTF("(error reading)");
7620 else
7621 DPRINTF("0x%04llx rpl=%lld", r, r & 0x3);
7622
7623 DPRINTF(" base=");
7624 if (vmread(VMCS_GUEST_IA32_GS_BASE, &r))
7625 DPRINTF("(error reading)");
7626 else
7627 DPRINTF("0x%016llx", r);
7628
7629 DPRINTF(" limit=");
7630 if (vmread(VMCS_GUEST_IA32_GS_LIMIT, &r))
7631 DPRINTF("(error reading)");
7632 else
7633 DPRINTF("0x%016llx", r);
7634
7635 DPRINTF(" a/r=");
7636 if (vmread(VMCS_GUEST_IA32_GS_AR, &r))
7637 DPRINTF("(error reading)\n");
7638 else {
7639 DPRINTF("0x%04llx\n ", r);
7640 vmm_segment_desc_decode(r);
7641 }
7642
7643 DPRINTF(" ss=");
7644 if (vmread(VMCS_GUEST_IA32_SS_SEL, &r))
7645 DPRINTF("(error reading)");
7646 else
7647 DPRINTF("0x%04llx rpl=%lld", r, r & 0x3);
7648
7649 DPRINTF(" base=");
7650 if (vmread(VMCS_GUEST_IA32_SS_BASE, &r))
7651 DPRINTF("(error reading)");
7652 else
7653 DPRINTF("0x%016llx", r);
7654
7655 DPRINTF(" limit=");
7656 if (vmread(VMCS_GUEST_IA32_SS_LIMIT, &r))
7657 DPRINTF("(error reading)");
7658 else
7659 DPRINTF("0x%016llx", r);
7660
7661 DPRINTF(" a/r=");
7662 if (vmread(VMCS_GUEST_IA32_SS_AR, &r))
7663 DPRINTF("(error reading)\n");
7664 else {
7665 DPRINTF("0x%04llx\n ", r);
7666 vmm_segment_desc_decode(r);
7667 }
7668
7669 DPRINTF(" tr=");
7670 if (vmread(VMCS_GUEST_IA32_TR_SEL, &r))
7671 DPRINTF("(error reading)");
7672 else
7673 DPRINTF("0x%04llx", r);
7674
7675 DPRINTF(" base=");
7676 if (vmread(VMCS_GUEST_IA32_TR_BASE, &r))
7677 DPRINTF("(error reading)");
7678 else
7679 DPRINTF("0x%016llx", r);
7680
7681 DPRINTF(" limit=");
7682 if (vmread(VMCS_GUEST_IA32_TR_LIMIT, &r))
7683 DPRINTF("(error reading)");
7684 else
7685 DPRINTF("0x%016llx", r);
7686
7687 DPRINTF(" a/r=");
7688 if (vmread(VMCS_GUEST_IA32_TR_AR, &r))
7689 DPRINTF("(error reading)\n");
7690 else {
7691 DPRINTF("0x%04llx\n ", r);
7692 vmm_segment_desc_decode(r);
7693 }
7694
7695 DPRINTF(" gdtr base=");
7696 if (vmread(VMCS_GUEST_IA32_GDTR_BASE, &r))
7697 DPRINTF("(error reading) ");
7698 else
7699 DPRINTF("0x%016llx", r);
7700
7701 DPRINTF(" limit=");
7702 if (vmread(VMCS_GUEST_IA32_GDTR_LIMIT, &r))
7703 DPRINTF("(error reading)\n");
7704 else
7705 DPRINTF("0x%016llx\n", r);
7706
7707 DPRINTF(" idtr base=");
7708 if (vmread(VMCS_GUEST_IA32_IDTR_BASE, &r))
7709 DPRINTF("(error reading) ");
7710 else
7711 DPRINTF("0x%016llx", r);
7712
7713 DPRINTF(" limit=");
7714 if (vmread(VMCS_GUEST_IA32_IDTR_LIMIT, &r))
7715 DPRINTF("(error reading)\n");
7716 else
7717 DPRINTF("0x%016llx\n", r);
7718
7719 DPRINTF(" ldtr=");
7720 if (vmread(VMCS_GUEST_IA32_LDTR_SEL, &r))
7721 DPRINTF("(error reading)");
7722 else
7723 DPRINTF("0x%04llx", r);
7724
7725 DPRINTF(" base=");
7726 if (vmread(VMCS_GUEST_IA32_LDTR_BASE, &r))
7727 DPRINTF("(error reading)");
7728 else
7729 DPRINTF("0x%016llx", r);
7730
7731 DPRINTF(" limit=");
7732 if (vmread(VMCS_GUEST_IA32_LDTR_LIMIT, &r))
7733 DPRINTF("(error reading)");
7734 else
7735 DPRINTF("0x%016llx", r);
7736
7737 DPRINTF(" a/r=");
7738 if (vmread(VMCS_GUEST_IA32_LDTR_AR, &r))
7739 DPRINTF("(error reading)\n");
7740 else {
7741 DPRINTF("0x%04llx\n ", r);
7742 vmm_segment_desc_decode(r);
7743 }
7744
7745 DPRINTF(" --Guest MSRs @ 0x%016llx (paddr: 0x%016llx)--\n",
7746 (uint64_t)vcpu->vc_vmx_msr_exit_save_va,
7747 (uint64_t)vcpu->vc_vmx_msr_exit_save_pa);
7748
7749 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
7750
7751 for (i = 0; i < VCPU_REGS_NMSRS; i++) {
7752 DPRINTF(" MSR %d @ %p : 0x%08llx (%s), "
7753 "value=0x%016llx ",
7754 i, &msr_store[i], msr_store[i].vms_index,
7755 msr_name_decode(msr_store[i].vms_index),
7756 msr_store[i].vms_data);
7757 vmm_decode_msr_value(msr_store[i].vms_index,
7758 msr_store[i].vms_data);
7759 }
7760 }
7761
7762 /*
7763 * msr_name_decode
7764 *
7765 * Returns a human-readable name for the MSR supplied in 'msr'.
7766 *
7767 * Parameters:
7768 * msr - The MSR to decode
7769 *
7770 * Return value:
7771 * NULL-terminated character string containing the name of the MSR requested
7772 */
7773 const char *
msr_name_decode(uint32_t msr)7774 msr_name_decode(uint32_t msr)
7775 {
7776 /*
7777 * Add as needed. Also consider adding a decode function when
7778 * adding to this table.
7779 */
7780
7781 switch (msr) {
7782 case MSR_TSC: return "TSC";
7783 case MSR_APICBASE: return "APIC base";
7784 case MSR_IA32_FEATURE_CONTROL: return "IA32 feature control";
7785 case MSR_PERFCTR0: return "perf counter 0";
7786 case MSR_PERFCTR1: return "perf counter 1";
7787 case MSR_TEMPERATURE_TARGET: return "temperature target";
7788 case MSR_MTRRcap: return "MTRR cap";
7789 case MSR_PERF_STATUS: return "perf status";
7790 case MSR_PERF_CTL: return "perf control";
7791 case MSR_MTRRvarBase: return "MTRR variable base";
7792 case MSR_MTRRfix64K_00000: return "MTRR fixed 64K";
7793 case MSR_MTRRfix16K_80000: return "MTRR fixed 16K";
7794 case MSR_MTRRfix4K_C0000: return "MTRR fixed 4K";
7795 case MSR_CR_PAT: return "PAT";
7796 case MSR_MTRRdefType: return "MTRR default type";
7797 case MSR_EFER: return "EFER";
7798 case MSR_STAR: return "STAR";
7799 case MSR_LSTAR: return "LSTAR";
7800 case MSR_CSTAR: return "CSTAR";
7801 case MSR_SFMASK: return "SFMASK";
7802 case MSR_FSBASE: return "FSBASE";
7803 case MSR_GSBASE: return "GSBASE";
7804 case MSR_KERNELGSBASE: return "KGSBASE";
7805 case MSR_MISC_ENABLE: return "Misc Enable";
7806 default: return "Unknown MSR";
7807 }
7808 }
7809
7810 /*
7811 * vmm_segment_desc_decode
7812 *
7813 * Debug function to print segment information for supplied descriptor
7814 *
7815 * Parameters:
7816 * val - The A/R bytes for the segment descriptor to decode
7817 */
7818 void
vmm_segment_desc_decode(uint64_t val)7819 vmm_segment_desc_decode(uint64_t val)
7820 {
7821 uint16_t ar;
7822 uint8_t g, type, s, dpl, p, dib, l;
7823 uint32_t unusable;
7824
7825 /* Exit early on unusable descriptors */
7826 unusable = val & 0x10000;
7827 if (unusable) {
7828 DPRINTF("(unusable)\n");
7829 return;
7830 }
7831
7832 ar = (uint16_t)val;
7833
7834 g = (ar & 0x8000) >> 15;
7835 dib = (ar & 0x4000) >> 14;
7836 l = (ar & 0x2000) >> 13;
7837 p = (ar & 0x80) >> 7;
7838 dpl = (ar & 0x60) >> 5;
7839 s = (ar & 0x10) >> 4;
7840 type = (ar & 0xf);
7841
7842 DPRINTF("granularity=%d dib=%d l(64 bit)=%d present=%d sys=%d ",
7843 g, dib, l, p, s);
7844
7845 DPRINTF("type=");
7846 if (!s) {
7847 switch (type) {
7848 case SDT_SYSLDT: DPRINTF("ldt\n"); break;
7849 case SDT_SYS386TSS: DPRINTF("tss (available)\n"); break;
7850 case SDT_SYS386BSY: DPRINTF("tss (busy)\n"); break;
7851 case SDT_SYS386CGT: DPRINTF("call gate\n"); break;
7852 case SDT_SYS386IGT: DPRINTF("interrupt gate\n"); break;
7853 case SDT_SYS386TGT: DPRINTF("trap gate\n"); break;
7854 /* XXX handle 32 bit segment types by inspecting mode */
7855 default: DPRINTF("unknown");
7856 }
7857 } else {
7858 switch (type + 16) {
7859 case SDT_MEMRO: DPRINTF("data, r/o\n"); break;
7860 case SDT_MEMROA: DPRINTF("data, r/o, accessed\n"); break;
7861 case SDT_MEMRW: DPRINTF("data, r/w\n"); break;
7862 case SDT_MEMRWA: DPRINTF("data, r/w, accessed\n"); break;
7863 case SDT_MEMROD: DPRINTF("data, r/o, expand down\n"); break;
7864 case SDT_MEMRODA: DPRINTF("data, r/o, expand down, "
7865 "accessed\n");
7866 break;
7867 case SDT_MEMRWD: DPRINTF("data, r/w, expand down\n"); break;
7868 case SDT_MEMRWDA: DPRINTF("data, r/w, expand down, "
7869 "accessed\n");
7870 break;
7871 case SDT_MEME: DPRINTF("code, x only\n"); break;
7872 case SDT_MEMEA: DPRINTF("code, x only, accessed\n");
7873 case SDT_MEMER: DPRINTF("code, r/x\n"); break;
7874 case SDT_MEMERA: DPRINTF("code, r/x, accessed\n"); break;
7875 case SDT_MEMEC: DPRINTF("code, x only, conforming\n"); break;
7876 case SDT_MEMEAC: DPRINTF("code, x only, conforming, "
7877 "accessed\n");
7878 break;
7879 case SDT_MEMERC: DPRINTF("code, r/x, conforming\n"); break;
7880 case SDT_MEMERAC: DPRINTF("code, r/x, conforming, accessed\n");
7881 break;
7882 }
7883 }
7884 }
7885
7886 void
vmm_decode_cr0(uint64_t cr0)7887 vmm_decode_cr0(uint64_t cr0)
7888 {
7889 struct vmm_reg_debug_info cr0_info[11] = {
7890 { CR0_PG, "PG ", "pg " },
7891 { CR0_CD, "CD ", "cd " },
7892 { CR0_NW, "NW ", "nw " },
7893 { CR0_AM, "AM ", "am " },
7894 { CR0_WP, "WP ", "wp " },
7895 { CR0_NE, "NE ", "ne " },
7896 { CR0_ET, "ET ", "et " },
7897 { CR0_TS, "TS ", "ts " },
7898 { CR0_EM, "EM ", "em " },
7899 { CR0_MP, "MP ", "mp " },
7900 { CR0_PE, "PE", "pe" }
7901 };
7902
7903 uint8_t i;
7904
7905 DPRINTF("(");
7906 for (i = 0; i < nitems(cr0_info); i++)
7907 if (cr0 & cr0_info[i].vrdi_bit)
7908 DPRINTF("%s", cr0_info[i].vrdi_present);
7909 else
7910 DPRINTF("%s", cr0_info[i].vrdi_absent);
7911
7912 DPRINTF(")\n");
7913 }
7914
7915 void
vmm_decode_cr3(uint64_t cr3)7916 vmm_decode_cr3(uint64_t cr3)
7917 {
7918 struct vmm_reg_debug_info cr3_info[2] = {
7919 { CR3_PWT, "PWT ", "pwt "},
7920 { CR3_PCD, "PCD", "pcd"}
7921 };
7922
7923 uint64_t cr4;
7924 uint8_t i;
7925
7926 if (vmread(VMCS_GUEST_IA32_CR4, &cr4)) {
7927 DPRINTF("(error)\n");
7928 return;
7929 }
7930
7931 /* If CR4.PCIDE = 0, interpret CR3.PWT and CR3.PCD */
7932 if ((cr4 & CR4_PCIDE) == 0) {
7933 DPRINTF("(");
7934 for (i = 0 ; i < nitems(cr3_info) ; i++)
7935 if (cr3 & cr3_info[i].vrdi_bit)
7936 DPRINTF("%s", cr3_info[i].vrdi_present);
7937 else
7938 DPRINTF("%s", cr3_info[i].vrdi_absent);
7939
7940 DPRINTF(")\n");
7941 } else {
7942 DPRINTF("(pcid=0x%llx)\n", cr3 & 0xFFF);
7943 }
7944 }
7945
7946 void
vmm_decode_cr4(uint64_t cr4)7947 vmm_decode_cr4(uint64_t cr4)
7948 {
7949 struct vmm_reg_debug_info cr4_info[19] = {
7950 { CR4_PKE, "PKE ", "pke "},
7951 { CR4_SMAP, "SMAP ", "smap "},
7952 { CR4_SMEP, "SMEP ", "smep "},
7953 { CR4_OSXSAVE, "OSXSAVE ", "osxsave "},
7954 { CR4_PCIDE, "PCIDE ", "pcide "},
7955 { CR4_FSGSBASE, "FSGSBASE ", "fsgsbase "},
7956 { CR4_SMXE, "SMXE ", "smxe "},
7957 { CR4_VMXE, "VMXE ", "vmxe "},
7958 { CR4_OSXMMEXCPT, "OSXMMEXCPT ", "osxmmexcpt "},
7959 { CR4_OSFXSR, "OSFXSR ", "osfxsr "},
7960 { CR4_PCE, "PCE ", "pce "},
7961 { CR4_PGE, "PGE ", "pge "},
7962 { CR4_MCE, "MCE ", "mce "},
7963 { CR4_PAE, "PAE ", "pae "},
7964 { CR4_PSE, "PSE ", "pse "},
7965 { CR4_DE, "DE ", "de "},
7966 { CR4_TSD, "TSD ", "tsd "},
7967 { CR4_PVI, "PVI ", "pvi "},
7968 { CR4_VME, "VME", "vme"}
7969 };
7970
7971 uint8_t i;
7972
7973 DPRINTF("(");
7974 for (i = 0; i < nitems(cr4_info); i++)
7975 if (cr4 & cr4_info[i].vrdi_bit)
7976 DPRINTF("%s", cr4_info[i].vrdi_present);
7977 else
7978 DPRINTF("%s", cr4_info[i].vrdi_absent);
7979
7980 DPRINTF(")\n");
7981 }
7982
7983 void
vmm_decode_apicbase_msr_value(uint64_t apicbase)7984 vmm_decode_apicbase_msr_value(uint64_t apicbase)
7985 {
7986 struct vmm_reg_debug_info apicbase_info[3] = {
7987 { APICBASE_BSP, "BSP ", "bsp "},
7988 { APICBASE_ENABLE_X2APIC, "X2APIC ", "x2apic "},
7989 { APICBASE_GLOBAL_ENABLE, "GLB_EN", "glb_en"}
7990 };
7991
7992 uint8_t i;
7993
7994 DPRINTF("(");
7995 for (i = 0; i < nitems(apicbase_info); i++)
7996 if (apicbase & apicbase_info[i].vrdi_bit)
7997 DPRINTF("%s", apicbase_info[i].vrdi_present);
7998 else
7999 DPRINTF("%s", apicbase_info[i].vrdi_absent);
8000
8001 DPRINTF(")\n");
8002 }
8003
8004 void
vmm_decode_ia32_fc_value(uint64_t fcr)8005 vmm_decode_ia32_fc_value(uint64_t fcr)
8006 {
8007 struct vmm_reg_debug_info fcr_info[4] = {
8008 { IA32_FEATURE_CONTROL_LOCK, "LOCK ", "lock "},
8009 { IA32_FEATURE_CONTROL_SMX_EN, "SMX ", "smx "},
8010 { IA32_FEATURE_CONTROL_VMX_EN, "VMX ", "vmx "},
8011 { IA32_FEATURE_CONTROL_SENTER_EN, "SENTER ", "senter "}
8012 };
8013
8014 uint8_t i;
8015
8016 DPRINTF("(");
8017 for (i = 0; i < nitems(fcr_info); i++)
8018 if (fcr & fcr_info[i].vrdi_bit)
8019 DPRINTF("%s", fcr_info[i].vrdi_present);
8020 else
8021 DPRINTF("%s", fcr_info[i].vrdi_absent);
8022
8023 if (fcr & IA32_FEATURE_CONTROL_SENTER_EN)
8024 DPRINTF(" [SENTER param = 0x%llx]",
8025 (fcr & IA32_FEATURE_CONTROL_SENTER_PARAM_MASK) >> 8);
8026
8027 DPRINTF(")\n");
8028 }
8029
8030 void
vmm_decode_mtrrcap_value(uint64_t val)8031 vmm_decode_mtrrcap_value(uint64_t val)
8032 {
8033 struct vmm_reg_debug_info mtrrcap_info[3] = {
8034 { MTRRcap_FIXED, "FIXED ", "fixed "},
8035 { MTRRcap_WC, "WC ", "wc "},
8036 { MTRRcap_SMRR, "SMRR ", "smrr "}
8037 };
8038
8039 uint8_t i;
8040
8041 DPRINTF("(");
8042 for (i = 0; i < nitems(mtrrcap_info); i++)
8043 if (val & mtrrcap_info[i].vrdi_bit)
8044 DPRINTF("%s", mtrrcap_info[i].vrdi_present);
8045 else
8046 DPRINTF("%s", mtrrcap_info[i].vrdi_absent);
8047
8048 if (val & MTRRcap_FIXED)
8049 DPRINTF(" [nr fixed ranges = 0x%llx]",
8050 (val & 0xff));
8051
8052 DPRINTF(")\n");
8053 }
8054
8055 void
vmm_decode_perf_status_value(uint64_t val)8056 vmm_decode_perf_status_value(uint64_t val)
8057 {
8058 DPRINTF("(pstate ratio = 0x%llx)\n", (val & 0xffff));
8059 }
8060
8061 void
vmm_decode_perf_ctl_value(uint64_t val)8062 vmm_decode_perf_ctl_value(uint64_t val)
8063 {
8064 DPRINTF("(%s ", (val & PERF_CTL_TURBO) ? "TURBO" : "turbo");
8065 DPRINTF("pstate req = 0x%llx)\n", (val & 0xfffF));
8066 }
8067
8068 void
vmm_decode_mtrrdeftype_value(uint64_t mtrrdeftype)8069 vmm_decode_mtrrdeftype_value(uint64_t mtrrdeftype)
8070 {
8071 struct vmm_reg_debug_info mtrrdeftype_info[2] = {
8072 { MTRRdefType_FIXED_ENABLE, "FIXED ", "fixed "},
8073 { MTRRdefType_ENABLE, "ENABLED ", "enabled "},
8074 };
8075
8076 uint8_t i;
8077 int type;
8078
8079 DPRINTF("(");
8080 for (i = 0; i < nitems(mtrrdeftype_info); i++)
8081 if (mtrrdeftype & mtrrdeftype_info[i].vrdi_bit)
8082 DPRINTF("%s", mtrrdeftype_info[i].vrdi_present);
8083 else
8084 DPRINTF("%s", mtrrdeftype_info[i].vrdi_absent);
8085
8086 DPRINTF("type = ");
8087 type = mtrr2mrt(mtrrdeftype & 0xff);
8088 switch (type) {
8089 case MDF_UNCACHEABLE: DPRINTF("UC"); break;
8090 case MDF_WRITECOMBINE: DPRINTF("WC"); break;
8091 case MDF_WRITETHROUGH: DPRINTF("WT"); break;
8092 case MDF_WRITEPROTECT: DPRINTF("RO"); break;
8093 case MDF_WRITEBACK: DPRINTF("WB"); break;
8094 case MDF_UNKNOWN:
8095 default:
8096 DPRINTF("??");
8097 break;
8098 }
8099
8100 DPRINTF(")\n");
8101 }
8102
8103 void
vmm_decode_efer_value(uint64_t efer)8104 vmm_decode_efer_value(uint64_t efer)
8105 {
8106 struct vmm_reg_debug_info efer_info[4] = {
8107 { EFER_SCE, "SCE ", "sce "},
8108 { EFER_LME, "LME ", "lme "},
8109 { EFER_LMA, "LMA ", "lma "},
8110 { EFER_NXE, "NXE", "nxe"},
8111 };
8112
8113 uint8_t i;
8114
8115 DPRINTF("(");
8116 for (i = 0; i < nitems(efer_info); i++)
8117 if (efer & efer_info[i].vrdi_bit)
8118 DPRINTF("%s", efer_info[i].vrdi_present);
8119 else
8120 DPRINTF("%s", efer_info[i].vrdi_absent);
8121
8122 DPRINTF(")\n");
8123 }
8124
8125 void
vmm_decode_msr_value(uint64_t msr,uint64_t val)8126 vmm_decode_msr_value(uint64_t msr, uint64_t val)
8127 {
8128 switch (msr) {
8129 case MSR_APICBASE: vmm_decode_apicbase_msr_value(val); break;
8130 case MSR_IA32_FEATURE_CONTROL: vmm_decode_ia32_fc_value(val); break;
8131 case MSR_MTRRcap: vmm_decode_mtrrcap_value(val); break;
8132 case MSR_PERF_STATUS: vmm_decode_perf_status_value(val); break;
8133 case MSR_PERF_CTL: vmm_decode_perf_ctl_value(val); break;
8134 case MSR_MTRRdefType: vmm_decode_mtrrdeftype_value(val); break;
8135 case MSR_EFER: vmm_decode_efer_value(val); break;
8136 case MSR_MISC_ENABLE: vmm_decode_misc_enable_value(val); break;
8137 default: DPRINTF("\n");
8138 }
8139 }
8140
8141 void
vmm_decode_rflags(uint64_t rflags)8142 vmm_decode_rflags(uint64_t rflags)
8143 {
8144 struct vmm_reg_debug_info rflags_info[16] = {
8145 { PSL_C, "CF ", "cf "},
8146 { PSL_PF, "PF ", "pf "},
8147 { PSL_AF, "AF ", "af "},
8148 { PSL_Z, "ZF ", "zf "},
8149 { PSL_N, "SF ", "sf "}, /* sign flag */
8150 { PSL_T, "TF ", "tf "},
8151 { PSL_I, "IF ", "if "},
8152 { PSL_D, "DF ", "df "},
8153 { PSL_V, "OF ", "of "}, /* overflow flag */
8154 { PSL_NT, "NT ", "nt "},
8155 { PSL_RF, "RF ", "rf "},
8156 { PSL_VM, "VM ", "vm "},
8157 { PSL_AC, "AC ", "ac "},
8158 { PSL_VIF, "VIF ", "vif "},
8159 { PSL_VIP, "VIP ", "vip "},
8160 { PSL_ID, "ID ", "id "},
8161 };
8162
8163 uint8_t i, iopl;
8164
8165 DPRINTF("(");
8166 for (i = 0; i < nitems(rflags_info); i++)
8167 if (rflags & rflags_info[i].vrdi_bit)
8168 DPRINTF("%s", rflags_info[i].vrdi_present);
8169 else
8170 DPRINTF("%s", rflags_info[i].vrdi_absent);
8171
8172 iopl = (rflags & PSL_IOPL) >> 12;
8173 DPRINTF("IOPL=%d", iopl);
8174
8175 DPRINTF(")\n");
8176 }
8177
8178 void
vmm_decode_misc_enable_value(uint64_t misc)8179 vmm_decode_misc_enable_value(uint64_t misc)
8180 {
8181 struct vmm_reg_debug_info misc_info[10] = {
8182 { MISC_ENABLE_FAST_STRINGS, "FSE ", "fse "},
8183 { MISC_ENABLE_TCC, "TCC ", "tcc "},
8184 { MISC_ENABLE_PERF_MON_AVAILABLE, "PERF ", "perf "},
8185 { MISC_ENABLE_BTS_UNAVAILABLE, "BTSU ", "btsu "},
8186 { MISC_ENABLE_PEBS_UNAVAILABLE, "PEBSU ", "pebsu "},
8187 { MISC_ENABLE_EIST_ENABLED, "EIST ", "eist "},
8188 { MISC_ENABLE_ENABLE_MONITOR_FSM, "MFSM ", "mfsm "},
8189 { MISC_ENABLE_LIMIT_CPUID_MAXVAL, "CMAX ", "cmax "},
8190 { MISC_ENABLE_xTPR_MESSAGE_DISABLE, "xTPRD ", "xtprd "},
8191 { MISC_ENABLE_XD_BIT_DISABLE, "NXD", "nxd"},
8192 };
8193
8194 uint8_t i;
8195
8196 DPRINTF("(");
8197 for (i = 0; i < nitems(misc_info); i++)
8198 if (misc & misc_info[i].vrdi_bit)
8199 DPRINTF("%s", misc_info[i].vrdi_present);
8200 else
8201 DPRINTF("%s", misc_info[i].vrdi_absent);
8202
8203 DPRINTF(")\n");
8204 }
8205
8206 const char *
vmm_decode_cpu_mode(struct vcpu * vcpu)8207 vmm_decode_cpu_mode(struct vcpu *vcpu)
8208 {
8209 int mode = vmm_get_guest_cpu_mode(vcpu);
8210
8211 switch (mode) {
8212 case VMM_CPU_MODE_REAL: return "real";
8213 case VMM_CPU_MODE_PROT: return "16 bit protected";
8214 case VMM_CPU_MODE_PROT32: return "32 bit protected";
8215 case VMM_CPU_MODE_COMPAT: return "compatibility";
8216 case VMM_CPU_MODE_LONG: return "long";
8217 default: return "unknown";
8218 }
8219 }
8220 #endif /* VMM_DEBUG */
8221