1 /* $OpenBSD: vmm_machdep.c,v 1.40 2024/11/10 22:35:31 jsg Exp $ */
2 /*
3 * Copyright (c) 2014 Mike Larkin <mlarkin@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18 #include <sys/param.h>
19 #include <sys/systm.h>
20 #include <sys/malloc.h>
21 #include <sys/device.h>
22 #include <sys/pool.h>
23 #include <sys/proc.h>
24 #include <sys/user.h>
25 #include <sys/ioctl.h>
26 #include <sys/queue.h>
27 #include <sys/refcnt.h>
28 #include <sys/rwlock.h>
29 #include <sys/pledge.h>
30 #include <sys/memrange.h>
31 #include <sys/tracepoint.h>
32
33 #include <uvm/uvm_extern.h>
34
35 #include <machine/fpu.h>
36 #include <machine/pmap.h>
37 #include <machine/biosvar.h>
38 #include <machine/segments.h>
39 #include <machine/cpufunc.h>
40 #include <machine/vmmvar.h>
41
42 #include <dev/isa/isareg.h>
43 #include <dev/pv/pvreg.h>
44
45 #include <dev/vmm/vmm.h>
46
47 #ifdef MP_LOCKDEBUG
48 #include <ddb/db_output.h>
49 extern int __mp_lock_spinout;
50 #endif /* MP_LOCKDEBUG */
51
52 void *l1tf_flush_region;
53
54 #define DEVNAME(s) ((s)->sc_dev.dv_xname)
55
56 #define CTRL_DUMP(x,y,z) printf(" %s: Can set:%s Can clear:%s\n", #z , \
57 vcpu_vmx_check_cap(x, IA32_VMX_##y ##_CTLS, \
58 IA32_VMX_##z, 1) ? "Yes" : "No", \
59 vcpu_vmx_check_cap(x, IA32_VMX_##y ##_CTLS, \
60 IA32_VMX_##z, 0) ? "Yes" : "No");
61
62 #define VMX_EXIT_INFO_HAVE_RIP 0x1
63 #define VMX_EXIT_INFO_HAVE_REASON 0x2
64 #define VMX_EXIT_INFO_COMPLETE \
65 (VMX_EXIT_INFO_HAVE_RIP | VMX_EXIT_INFO_HAVE_REASON)
66
67 void vmx_dump_vmcs_field(uint16_t, const char *);
68 int vmm_enabled(void);
69 void vmm_activate_machdep(struct device *, int);
70 int vmmioctl_machdep(dev_t, u_long, caddr_t, int, struct proc *);
71 int vmm_quiesce_vmx(void);
72 int vm_run(struct vm_run_params *);
73 int vm_intr_pending(struct vm_intr_params *);
74 int vm_rwregs(struct vm_rwregs_params *, int);
75 int vm_rwvmparams(struct vm_rwvmparams_params *, int);
76 int vcpu_readregs_vmx(struct vcpu *, uint64_t, int, struct vcpu_reg_state *);
77 int vcpu_readregs_svm(struct vcpu *, uint64_t, struct vcpu_reg_state *);
78 int vcpu_writeregs_vmx(struct vcpu *, uint64_t, int, struct vcpu_reg_state *);
79 int vcpu_writeregs_svm(struct vcpu *, uint64_t, struct vcpu_reg_state *);
80 int vcpu_reset_regs(struct vcpu *, struct vcpu_reg_state *);
81 int vcpu_reset_regs_vmx(struct vcpu *, struct vcpu_reg_state *);
82 int vcpu_reset_regs_svm(struct vcpu *, struct vcpu_reg_state *);
83 int vcpu_reload_vmcs_vmx(struct vcpu *);
84 int vcpu_init(struct vcpu *, struct vm_create_params *);
85 int vcpu_init_vmx(struct vcpu *);
86 int vcpu_init_svm(struct vcpu *, struct vm_create_params *);
87 int vcpu_run_vmx(struct vcpu *, struct vm_run_params *);
88 int vcpu_run_svm(struct vcpu *, struct vm_run_params *);
89 void vcpu_deinit(struct vcpu *);
90 void vcpu_deinit_svm(struct vcpu *);
91 void vcpu_deinit_vmx(struct vcpu *);
92 int vcpu_vmx_check_cap(struct vcpu *, uint32_t, uint32_t, int);
93 int vcpu_vmx_compute_ctrl(uint64_t, uint16_t, uint32_t, uint32_t, uint32_t *);
94 int vmx_get_exit_info(uint64_t *, uint64_t *);
95 int vmx_load_pdptes(struct vcpu *);
96 int vmx_handle_exit(struct vcpu *);
97 int svm_handle_exit(struct vcpu *);
98 int svm_handle_msr(struct vcpu *);
99 int vmm_handle_xsetbv(struct vcpu *, uint64_t *);
100 int vmx_handle_xsetbv(struct vcpu *);
101 int svm_handle_xsetbv(struct vcpu *);
102 int vmm_handle_cpuid(struct vcpu *);
103 int vmx_handle_rdmsr(struct vcpu *);
104 int vmx_handle_wrmsr(struct vcpu *);
105 int vmx_handle_cr0_write(struct vcpu *, uint64_t);
106 int vmx_handle_cr4_write(struct vcpu *, uint64_t);
107 int vmx_handle_cr(struct vcpu *);
108 int svm_handle_inout(struct vcpu *);
109 int vmx_handle_inout(struct vcpu *);
110 int svm_handle_hlt(struct vcpu *);
111 int vmx_handle_hlt(struct vcpu *);
112 int vmm_inject_ud(struct vcpu *);
113 int vmm_inject_gp(struct vcpu *);
114 int vmm_inject_db(struct vcpu *);
115 void vmx_handle_intr(struct vcpu *);
116 void vmx_handle_misc_enable_msr(struct vcpu *);
117 int vmm_get_guest_memtype(struct vm *, paddr_t);
118 int vmx_get_guest_faulttype(void);
119 int svm_get_guest_faulttype(struct vmcb *);
120 int vmx_get_exit_qualification(uint64_t *);
121 int vmm_get_guest_cpu_cpl(struct vcpu *);
122 int vmm_get_guest_cpu_mode(struct vcpu *);
123 int svm_fault_page(struct vcpu *, paddr_t);
124 int vmx_fault_page(struct vcpu *, paddr_t);
125 int vmx_handle_np_fault(struct vcpu *);
126 int svm_handle_np_fault(struct vcpu *);
127 int vmm_alloc_vpid(uint16_t *);
128 void vmm_free_vpid(uint16_t);
129 const char *vcpu_state_decode(u_int);
130 const char *vmx_exit_reason_decode(uint32_t);
131 const char *svm_exit_reason_decode(uint32_t);
132 const char *vmx_instruction_error_decode(uint32_t);
133 void svm_setmsrbr(struct vcpu *, uint32_t);
134 void svm_setmsrbw(struct vcpu *, uint32_t);
135 void svm_setmsrbrw(struct vcpu *, uint32_t);
136 void vmx_setmsrbr(struct vcpu *, uint32_t);
137 void vmx_setmsrbw(struct vcpu *, uint32_t);
138 void vmx_setmsrbrw(struct vcpu *, uint32_t);
139 void svm_set_clean(struct vcpu *, uint32_t);
140 void svm_set_dirty(struct vcpu *, uint32_t);
141
142 int vmm_gpa_is_valid(struct vcpu *vcpu, paddr_t gpa, size_t obj_size);
143 void vmm_init_pvclock(struct vcpu *, paddr_t);
144 int vmm_update_pvclock(struct vcpu *);
145 int vmm_pat_is_valid(uint64_t);
146
147 #ifdef MULTIPROCESSOR
148 static int vmx_remote_vmclear(struct cpu_info*, struct vcpu *);
149 #endif
150
151 #ifdef VMM_DEBUG
152 void vmx_vcpu_dump_regs(struct vcpu *);
153 void vmx_dump_vmcs(struct vcpu *);
154 const char *msr_name_decode(uint32_t);
155 void vmm_segment_desc_decode(uint64_t);
156 void vmm_decode_cr0(uint64_t);
157 void vmm_decode_cr3(uint64_t);
158 void vmm_decode_cr4(uint64_t);
159 void vmm_decode_msr_value(uint64_t, uint64_t);
160 void vmm_decode_apicbase_msr_value(uint64_t);
161 void vmm_decode_ia32_fc_value(uint64_t);
162 void vmm_decode_mtrrcap_value(uint64_t);
163 void vmm_decode_perf_status_value(uint64_t);
164 void vmm_decode_perf_ctl_value(uint64_t);
165 void vmm_decode_mtrrdeftype_value(uint64_t);
166 void vmm_decode_efer_value(uint64_t);
167 void vmm_decode_rflags(uint64_t);
168 void vmm_decode_misc_enable_value(uint64_t);
169 const char *vmm_decode_cpu_mode(struct vcpu *);
170
171 extern int mtrr2mrt(int);
172
173 struct vmm_reg_debug_info {
174 uint64_t vrdi_bit;
175 const char *vrdi_present;
176 const char *vrdi_absent;
177 };
178 #endif /* VMM_DEBUG */
179
180 extern uint64_t tsc_frequency;
181 extern int tsc_is_invariant;
182
183 const char *vmm_hv_signature = VMM_HV_SIGNATURE;
184
185 const struct kmem_pa_mode vmm_kp_contig = {
186 .kp_constraint = &no_constraint,
187 .kp_maxseg = 1,
188 .kp_align = 4096,
189 .kp_zero = 1,
190 };
191
192 extern struct cfdriver vmm_cd;
193 extern const struct cfattach vmm_ca;
194
195 /*
196 * Helper struct to easily get the VMCS field IDs needed in vmread/vmwrite
197 * to access the individual fields of the guest segment registers. This
198 * struct is indexed by VCPU_REGS_* id.
199 */
200 const struct {
201 uint64_t selid;
202 uint64_t limitid;
203 uint64_t arid;
204 uint64_t baseid;
205 } vmm_vmx_sreg_vmcs_fields[] = {
206 { VMCS_GUEST_IA32_ES_SEL, VMCS_GUEST_IA32_ES_LIMIT,
207 VMCS_GUEST_IA32_ES_AR, VMCS_GUEST_IA32_ES_BASE },
208 { VMCS_GUEST_IA32_CS_SEL, VMCS_GUEST_IA32_CS_LIMIT,
209 VMCS_GUEST_IA32_CS_AR, VMCS_GUEST_IA32_CS_BASE },
210 { VMCS_GUEST_IA32_SS_SEL, VMCS_GUEST_IA32_SS_LIMIT,
211 VMCS_GUEST_IA32_SS_AR, VMCS_GUEST_IA32_SS_BASE },
212 { VMCS_GUEST_IA32_DS_SEL, VMCS_GUEST_IA32_DS_LIMIT,
213 VMCS_GUEST_IA32_DS_AR, VMCS_GUEST_IA32_DS_BASE },
214 { VMCS_GUEST_IA32_FS_SEL, VMCS_GUEST_IA32_FS_LIMIT,
215 VMCS_GUEST_IA32_FS_AR, VMCS_GUEST_IA32_FS_BASE },
216 { VMCS_GUEST_IA32_GS_SEL, VMCS_GUEST_IA32_GS_LIMIT,
217 VMCS_GUEST_IA32_GS_AR, VMCS_GUEST_IA32_GS_BASE },
218 { VMCS_GUEST_IA32_LDTR_SEL, VMCS_GUEST_IA32_LDTR_LIMIT,
219 VMCS_GUEST_IA32_LDTR_AR, VMCS_GUEST_IA32_LDTR_BASE },
220 { VMCS_GUEST_IA32_TR_SEL, VMCS_GUEST_IA32_TR_LIMIT,
221 VMCS_GUEST_IA32_TR_AR, VMCS_GUEST_IA32_TR_BASE }
222 };
223
224 /* Pools for VMs and VCPUs */
225 extern struct pool vm_pool;
226 extern struct pool vcpu_pool;
227
228 extern struct vmm_softc *vmm_softc;
229
230 /* IDT information used when populating host state area */
231 extern vaddr_t idt_vaddr;
232 extern struct gate_descriptor *idt;
233
234 /* Constants used in "CR access exit" */
235 #define CR_WRITE 0
236 #define CR_READ 1
237 #define CR_CLTS 2
238 #define CR_LMSW 3
239
240 /*
241 * vmm_enabled
242 *
243 * Checks if we have at least one CPU with either VMX or SVM.
244 * Returns 1 if we have at least one of either type, but not both, 0 otherwise.
245 */
246 int
vmm_enabled(void)247 vmm_enabled(void)
248 {
249 struct cpu_info *ci;
250 CPU_INFO_ITERATOR cii;
251 int found_vmx = 0, found_svm = 0;
252
253 /* Check if we have at least one CPU with either VMX or SVM */
254 CPU_INFO_FOREACH(cii, ci) {
255 if (ci->ci_vmm_flags & CI_VMM_VMX)
256 found_vmx = 1;
257 if (ci->ci_vmm_flags & CI_VMM_SVM)
258 found_svm = 1;
259 }
260
261 /* Don't support both SVM and VMX at the same time */
262 if (found_vmx && found_svm)
263 return (0);
264
265 if (found_vmx || found_svm)
266 return 1;
267
268 return 0;
269 }
270
271 void
vmm_attach_machdep(struct device * parent,struct device * self,void * aux)272 vmm_attach_machdep(struct device *parent, struct device *self, void *aux)
273 {
274 struct vmm_softc *sc = (struct vmm_softc *)self;
275 struct cpu_info *ci;
276 CPU_INFO_ITERATOR cii;
277
278 sc->sc_md.nr_rvi_cpus = 0;
279 sc->sc_md.nr_ept_cpus = 0;
280
281 /* Calculate CPU features */
282 CPU_INFO_FOREACH(cii, ci) {
283 if (ci->ci_vmm_flags & CI_VMM_RVI)
284 sc->sc_md.nr_rvi_cpus++;
285 if (ci->ci_vmm_flags & CI_VMM_EPT)
286 sc->sc_md.nr_ept_cpus++;
287 }
288
289 sc->sc_md.pkru_enabled = 0;
290 if (rcr4() & CR4_PKE)
291 sc->sc_md.pkru_enabled = 1;
292
293 if (sc->sc_md.nr_ept_cpus) {
294 printf(": VMX/EPT");
295 sc->mode = VMM_MODE_EPT;
296 } else if (sc->sc_md.nr_rvi_cpus) {
297 printf(": SVM/RVI");
298 sc->mode = VMM_MODE_RVI;
299 } else {
300 printf(": unknown");
301 sc->mode = VMM_MODE_UNKNOWN;
302 }
303
304 if (sc->mode == VMM_MODE_EPT) {
305 if (!(curcpu()->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr)) {
306 l1tf_flush_region = km_alloc(VMX_L1D_FLUSH_SIZE,
307 &kv_any, &vmm_kp_contig, &kd_waitok);
308 if (!l1tf_flush_region) {
309 printf(" (failing, no memory)");
310 sc->mode = VMM_MODE_UNKNOWN;
311 } else {
312 printf(" (using slow L1TF mitigation)");
313 memset(l1tf_flush_region, 0xcc,
314 VMX_L1D_FLUSH_SIZE);
315 }
316 }
317 }
318
319 if (sc->mode == VMM_MODE_RVI) {
320 sc->max_vpid = curcpu()->ci_vmm_cap.vcc_svm.svm_max_asid;
321 } else {
322 sc->max_vpid = 0xFFF;
323 }
324
325 bzero(&sc->vpids, sizeof(sc->vpids));
326 rw_init(&sc->vpid_lock, "vpid");
327 }
328
329 /*
330 * vmm_quiesce_vmx
331 *
332 * Prepare the host for suspend by flushing all VMCS states.
333 */
334 int
vmm_quiesce_vmx(void)335 vmm_quiesce_vmx(void)
336 {
337 struct vm *vm;
338 struct vcpu *vcpu;
339 int err;
340
341 /*
342 * We should be only called from a quiescing device state so we
343 * don't expect to sleep here. If we can't get all our locks,
344 * something is wrong.
345 */
346 if ((err = rw_enter(&vmm_softc->vm_lock, RW_WRITE | RW_NOSLEEP)))
347 return (err);
348
349 /* Iterate over each vm... */
350 SLIST_FOREACH(vm, &vmm_softc->vm_list, vm_link) {
351 /* Iterate over each vcpu... */
352 SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) {
353 err = rw_enter(&vcpu->vc_lock, RW_WRITE | RW_NOSLEEP);
354 if (err)
355 break;
356
357 /* We can skip unlaunched VMCS. Nothing to flush. */
358 if (atomic_load_int(&vcpu->vc_vmx_vmcs_state)
359 != VMCS_LAUNCHED) {
360 DPRINTF("%s: skipping vcpu %d for vm %d\n",
361 __func__, vcpu->vc_id, vm->vm_id);
362 rw_exit_write(&vcpu->vc_lock);
363 continue;
364 }
365
366 #ifdef MULTIPROCESSOR
367 if (vcpu->vc_last_pcpu != curcpu()) {
368 /* Remote cpu vmclear via ipi. */
369 err = vmx_remote_vmclear(vcpu->vc_last_pcpu,
370 vcpu);
371 if (err)
372 printf("%s: failed to remote vmclear "
373 "vcpu %d of vm %d\n", __func__,
374 vcpu->vc_id, vm->vm_id);
375 } else
376 #endif
377 {
378 /* Local cpu vmclear instruction. */
379 if ((err = vmclear(&vcpu->vc_control_pa)))
380 printf("%s: failed to locally vmclear "
381 "vcpu %d of vm %d\n", __func__,
382 vcpu->vc_id, vm->vm_id);
383 atomic_swap_uint(&vcpu->vc_vmx_vmcs_state,
384 VMCS_CLEARED);
385 }
386
387 rw_exit_write(&vcpu->vc_lock);
388 if (err)
389 break;
390 DPRINTF("%s: cleared vcpu %d for vm %d\n", __func__,
391 vcpu->vc_id, vm->vm_id);
392 }
393 if (err)
394 break;
395 }
396 rw_exit_write(&vmm_softc->vm_lock);
397
398 if (err)
399 return (err);
400 return (0);
401 }
402
403 void
vmm_activate_machdep(struct device * self,int act)404 vmm_activate_machdep(struct device *self, int act)
405 {
406 struct cpu_info *ci = curcpu();
407
408 switch (act) {
409 case DVACT_QUIESCE:
410 /* If we're not in vmm mode, nothing to do. */
411 if ((ci->ci_flags & CPUF_VMM) == 0)
412 break;
413
414 /* Intel systems need extra steps to sync vcpu state. */
415 if (vmm_softc->mode == VMM_MODE_EPT)
416 if (vmm_quiesce_vmx())
417 DPRINTF("%s: vmx quiesce failed\n", __func__);
418
419 /* Stop virtualization mode on all cpus. */
420 vmm_stop();
421 break;
422
423 case DVACT_WAKEUP:
424 /* Restart virtualization mode on all cpu's. */
425 if (vmm_softc->vm_ct > 0)
426 vmm_start();
427 break;
428 }
429 }
430
431 int
vmmioctl_machdep(dev_t dev,u_long cmd,caddr_t data,int flag,struct proc * p)432 vmmioctl_machdep(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
433 {
434 int ret;
435
436 switch (cmd) {
437 case VMM_IOC_INTR:
438 ret = vm_intr_pending((struct vm_intr_params *)data);
439 break;
440 default:
441 DPRINTF("%s: unknown ioctl code 0x%lx\n", __func__, cmd);
442 ret = ENOTTY;
443 }
444
445 return (ret);
446 }
447
448 int
pledge_ioctl_vmm_machdep(struct proc * p,long com)449 pledge_ioctl_vmm_machdep(struct proc *p, long com)
450 {
451 switch (com) {
452 case VMM_IOC_INTR:
453 return (0);
454 }
455
456 return (EPERM);
457 }
458
459 /*
460 * vm_intr_pending
461 *
462 * IOCTL handler routine for VMM_IOC_INTR messages, sent from vmd when an
463 * interrupt is pending and needs acknowledgment
464 *
465 * Parameters:
466 * vip: Describes the vm/vcpu for which the interrupt is pending
467 *
468 * Return values:
469 * 0: if successful
470 * ENOENT: if the VM/VCPU defined by 'vip' cannot be found
471 */
472 int
vm_intr_pending(struct vm_intr_params * vip)473 vm_intr_pending(struct vm_intr_params *vip)
474 {
475 struct vm *vm;
476 struct vcpu *vcpu;
477 #ifdef MULTIPROCESSOR
478 struct cpu_info *ci;
479 #endif
480 int error, ret = 0;
481
482 /* Find the desired VM */
483 error = vm_find(vip->vip_vm_id, &vm);
484
485 /* Not found? exit. */
486 if (error != 0)
487 return (error);
488
489 vcpu = vm_find_vcpu(vm, vip->vip_vcpu_id);
490
491 if (vcpu == NULL) {
492 ret = ENOENT;
493 goto out;
494 }
495
496 vcpu->vc_intr = vip->vip_intr;
497 #ifdef MULTIPROCESSOR
498 ci = READ_ONCE(vcpu->vc_curcpu);
499 if (ci != NULL)
500 x86_send_ipi(ci, X86_IPI_NOP);
501 #endif
502
503 out:
504 refcnt_rele_wake(&vm->vm_refcnt);
505 return (ret);
506 }
507
508 /*
509 * vm_rwvmparams
510 *
511 * IOCTL handler to read/write the current vmm params like pvclock gpa, pvclock
512 * version, etc.
513 *
514 * Parameters:
515 * vrwp: Describes the VM and VCPU to get/set the params from
516 * dir: 0 for reading, 1 for writing
517 *
518 * Return values:
519 * 0: if successful
520 * ENOENT: if the VM/VCPU defined by 'vpp' cannot be found
521 * EINVAL: if an error occurred reading the registers of the guest
522 */
523 int
vm_rwvmparams(struct vm_rwvmparams_params * vpp,int dir)524 vm_rwvmparams(struct vm_rwvmparams_params *vpp, int dir)
525 {
526 struct vm *vm;
527 struct vcpu *vcpu;
528 int error, ret = 0;
529
530 /* Find the desired VM */
531 error = vm_find(vpp->vpp_vm_id, &vm);
532
533 /* Not found? exit. */
534 if (error != 0)
535 return (error);
536
537 vcpu = vm_find_vcpu(vm, vpp->vpp_vcpu_id);
538
539 if (vcpu == NULL) {
540 ret = ENOENT;
541 goto out;
542 }
543
544 if (dir == 0) {
545 if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_VERSION)
546 vpp->vpp_pvclock_version = vcpu->vc_pvclock_version;
547 if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_SYSTEM_GPA)
548 vpp->vpp_pvclock_system_gpa = \
549 vcpu->vc_pvclock_system_gpa;
550 } else {
551 if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_VERSION)
552 vcpu->vc_pvclock_version = vpp->vpp_pvclock_version;
553 if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_SYSTEM_GPA) {
554 vmm_init_pvclock(vcpu, vpp->vpp_pvclock_system_gpa);
555 }
556 }
557 out:
558 refcnt_rele_wake(&vm->vm_refcnt);
559 return (ret);
560 }
561
562 /*
563 * vm_readregs
564 *
565 * IOCTL handler to read/write the current register values of a guest VCPU.
566 * The VCPU must not be running.
567 *
568 * Parameters:
569 * vrwp: Describes the VM and VCPU to get/set the registers from. The
570 * register values are returned here as well.
571 * dir: 0 for reading, 1 for writing
572 *
573 * Return values:
574 * 0: if successful
575 * ENOENT: if the VM/VCPU defined by 'vrwp' cannot be found
576 * EINVAL: if an error occurred accessing the registers of the guest
577 * EPERM: if the vm cannot be accessed from the calling process
578 */
579 int
vm_rwregs(struct vm_rwregs_params * vrwp,int dir)580 vm_rwregs(struct vm_rwregs_params *vrwp, int dir)
581 {
582 struct vm *vm;
583 struct vcpu *vcpu;
584 struct vcpu_reg_state *vrs = &vrwp->vrwp_regs;
585 int error, ret = 0;
586
587 /* Find the desired VM */
588 error = vm_find(vrwp->vrwp_vm_id, &vm);
589
590 /* Not found? exit. */
591 if (error != 0)
592 return (error);
593
594 vcpu = vm_find_vcpu(vm, vrwp->vrwp_vcpu_id);
595
596 if (vcpu == NULL) {
597 ret = ENOENT;
598 goto out;
599 }
600
601 rw_enter_write(&vcpu->vc_lock);
602 if (vmm_softc->mode == VMM_MODE_EPT)
603 ret = (dir == 0) ?
604 vcpu_readregs_vmx(vcpu, vrwp->vrwp_mask, 1, vrs) :
605 vcpu_writeregs_vmx(vcpu, vrwp->vrwp_mask, 1, vrs);
606 else if (vmm_softc->mode == VMM_MODE_RVI)
607 ret = (dir == 0) ?
608 vcpu_readregs_svm(vcpu, vrwp->vrwp_mask, vrs) :
609 vcpu_writeregs_svm(vcpu, vrwp->vrwp_mask, vrs);
610 else {
611 DPRINTF("%s: unknown vmm mode", __func__);
612 ret = EINVAL;
613 }
614 rw_exit_write(&vcpu->vc_lock);
615 out:
616 refcnt_rele_wake(&vm->vm_refcnt);
617 return (ret);
618 }
619
620 /*
621 * vmm_start
622 *
623 * Starts VMM mode on the system
624 */
625 int
vmm_start(void)626 vmm_start(void)
627 {
628 int rv = 0;
629 struct cpu_info *self = curcpu();
630 #ifdef MULTIPROCESSOR
631 struct cpu_info *ci;
632 CPU_INFO_ITERATOR cii;
633 #ifdef MP_LOCKDEBUG
634 int nticks;
635 #endif /* MP_LOCKDEBUG */
636 #endif /* MULTIPROCESSOR */
637
638 rw_enter_write(&vmm_softc->sc_slock);
639
640 /* VMM is already running */
641 if (self->ci_flags & CPUF_VMM)
642 goto unlock;
643
644 /* Start VMM on this CPU */
645 start_vmm_on_cpu(self);
646 if (!(self->ci_flags & CPUF_VMM)) {
647 printf("%s: failed to enter VMM mode\n",
648 self->ci_dev->dv_xname);
649 rv = EIO;
650 goto unlock;
651 }
652
653 #ifdef MULTIPROCESSOR
654 /* Broadcast start VMM IPI */
655 x86_broadcast_ipi(X86_IPI_START_VMM);
656
657 CPU_INFO_FOREACH(cii, ci) {
658 if (ci == self)
659 continue;
660 #ifdef MP_LOCKDEBUG
661 nticks = __mp_lock_spinout;
662 #endif /* MP_LOCKDEBUG */
663 while (!(ci->ci_flags & CPUF_VMM)) {
664 CPU_BUSY_CYCLE();
665 #ifdef MP_LOCKDEBUG
666 if (--nticks <= 0) {
667 db_printf("%s: spun out", __func__);
668 db_enter();
669 nticks = __mp_lock_spinout;
670 }
671 #endif /* MP_LOCKDEBUG */
672 }
673 }
674 #endif /* MULTIPROCESSOR */
675 unlock:
676 rw_exit_write(&vmm_softc->sc_slock);
677 return (rv);
678 }
679
680 /*
681 * vmm_stop
682 *
683 * Stops VMM mode on the system
684 */
685 int
vmm_stop(void)686 vmm_stop(void)
687 {
688 int rv = 0;
689 struct cpu_info *self = curcpu();
690 #ifdef MULTIPROCESSOR
691 struct cpu_info *ci;
692 CPU_INFO_ITERATOR cii;
693 #ifdef MP_LOCKDEBUG
694 int nticks;
695 #endif /* MP_LOCKDEBUG */
696 #endif /* MULTIPROCESSOR */
697
698 rw_enter_write(&vmm_softc->sc_slock);
699
700 /* VMM is not running */
701 if (!(self->ci_flags & CPUF_VMM))
702 goto unlock;
703
704 /* Stop VMM on this CPU */
705 stop_vmm_on_cpu(self);
706 if (self->ci_flags & CPUF_VMM) {
707 printf("%s: failed to exit VMM mode\n",
708 self->ci_dev->dv_xname);
709 rv = EIO;
710 goto unlock;
711 }
712
713 #ifdef MULTIPROCESSOR
714 /* Stop VMM on other CPUs */
715 x86_broadcast_ipi(X86_IPI_STOP_VMM);
716
717 CPU_INFO_FOREACH(cii, ci) {
718 if (ci == self)
719 continue;
720 #ifdef MP_LOCKDEBUG
721 nticks = __mp_lock_spinout;
722 #endif /* MP_LOCKDEBUG */
723 while ((ci->ci_flags & CPUF_VMM)) {
724 CPU_BUSY_CYCLE();
725 #ifdef MP_LOCKDEBUG
726 if (--nticks <= 0) {
727 db_printf("%s: spunout", __func__);
728 db_enter();
729 nticks = __mp_lock_spinout;
730 }
731 #endif /* MP_LOCKDEBUG */
732 }
733 }
734 #endif /* MULTIPROCESSOR */
735 unlock:
736 rw_exit_write(&vmm_softc->sc_slock);
737 return (0);
738 }
739
740 /*
741 * start_vmm_on_cpu
742 *
743 * Starts VMM mode on 'ci' by executing the appropriate CPU-specific insn
744 * sequence to enter VMM mode (eg, VMXON)
745 */
746 void
start_vmm_on_cpu(struct cpu_info * ci)747 start_vmm_on_cpu(struct cpu_info *ci)
748 {
749 uint64_t msr;
750 uint32_t cr4;
751 struct vmx_invept_descriptor vid;
752
753 /* No VMM mode? exit. */
754 if ((ci->ci_vmm_flags & CI_VMM_VMX) == 0 &&
755 (ci->ci_vmm_flags & CI_VMM_SVM) == 0)
756 return;
757
758 /*
759 * AMD SVM
760 */
761 if (ci->ci_vmm_flags & CI_VMM_SVM) {
762 msr = rdmsr(MSR_EFER);
763 msr |= EFER_SVME;
764 wrmsr(MSR_EFER, msr);
765 }
766
767 /*
768 * Intel VMX
769 */
770 if (ci->ci_vmm_flags & CI_VMM_VMX) {
771 if (ci->ci_vmxon_region == 0)
772 return;
773 else {
774 bzero(ci->ci_vmxon_region, PAGE_SIZE);
775 ci->ci_vmxon_region->vr_revision =
776 ci->ci_vmm_cap.vcc_vmx.vmx_vmxon_revision;
777
778 /* Enable VMX */
779 msr = rdmsr(MSR_IA32_FEATURE_CONTROL);
780 if (msr & IA32_FEATURE_CONTROL_LOCK) {
781 if (!(msr & IA32_FEATURE_CONTROL_VMX_EN))
782 return;
783 } else {
784 msr |= IA32_FEATURE_CONTROL_VMX_EN |
785 IA32_FEATURE_CONTROL_LOCK;
786 wrmsr(MSR_IA32_FEATURE_CONTROL, msr);
787 }
788
789 /* Set CR4.VMXE */
790 cr4 = rcr4();
791 cr4 |= CR4_VMXE;
792 lcr4(cr4);
793
794 /* Enter VMX mode and clear EPTs on this cpu */
795 if (vmxon((uint64_t *)&ci->ci_vmxon_region_pa))
796 panic("vmxon failed");
797
798 memset(&vid, 0, sizeof(vid));
799 if (invept(IA32_VMX_INVEPT_GLOBAL_CTX, &vid))
800 panic("invept failed");
801 }
802 }
803
804 atomic_setbits_int(&ci->ci_flags, CPUF_VMM);
805 }
806
807 /*
808 * stop_vmm_on_cpu
809 *
810 * Stops VMM mode on 'ci' by executing the appropriate CPU-specific insn
811 * sequence to exit VMM mode (eg, VMXOFF)
812 */
813 void
stop_vmm_on_cpu(struct cpu_info * ci)814 stop_vmm_on_cpu(struct cpu_info *ci)
815 {
816 uint64_t msr;
817 uint32_t cr4;
818
819 if (!(ci->ci_flags & CPUF_VMM))
820 return;
821
822 /*
823 * AMD SVM
824 */
825 if (ci->ci_vmm_flags & CI_VMM_SVM) {
826 msr = rdmsr(MSR_EFER);
827 msr &= ~EFER_SVME;
828 wrmsr(MSR_EFER, msr);
829 }
830
831 /*
832 * Intel VMX
833 */
834 if (ci->ci_vmm_flags & CI_VMM_VMX) {
835 if (vmxoff())
836 panic("VMXOFF failed");
837
838 cr4 = rcr4();
839 cr4 &= ~CR4_VMXE;
840 lcr4(cr4);
841 }
842
843 atomic_clearbits_int(&ci->ci_flags, CPUF_VMM);
844 }
845
846 /*
847 * vmclear_on_cpu
848 *
849 * Flush and clear VMCS on 'ci' by executing vmclear.
850 *
851 */
852 void
vmclear_on_cpu(struct cpu_info * ci)853 vmclear_on_cpu(struct cpu_info *ci)
854 {
855 if ((ci->ci_flags & CPUF_VMM) && (ci->ci_vmm_flags & CI_VMM_VMX)) {
856 if (vmclear(&ci->ci_vmcs_pa))
857 panic("VMCLEAR ipi failed");
858 atomic_swap_ulong(&ci->ci_vmcs_pa, VMX_VMCS_PA_CLEAR);
859 }
860 }
861
862 #ifdef MULTIPROCESSOR
863 static int
vmx_remote_vmclear(struct cpu_info * ci,struct vcpu * vcpu)864 vmx_remote_vmclear(struct cpu_info *ci, struct vcpu *vcpu)
865 {
866 #ifdef MP_LOCKDEBUG
867 int nticks = __mp_lock_spinout;
868 #endif /* MP_LOCKDEBUG */
869
870 rw_enter_write(&ci->ci_vmcs_lock);
871 atomic_swap_ulong(&ci->ci_vmcs_pa, vcpu->vc_control_pa);
872 x86_send_ipi(ci, X86_IPI_VMCLEAR_VMM);
873
874 while (ci->ci_vmcs_pa != VMX_VMCS_PA_CLEAR) {
875 CPU_BUSY_CYCLE();
876 #ifdef MP_LOCKDEBUG
877 if (--nticks <= 0) {
878 db_printf("%s: spun out\n", __func__);
879 db_enter();
880 nticks = __mp_lock_spinout;
881 }
882 #endif /* MP_LOCKDEBUG */
883 }
884 atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_CLEARED);
885 rw_exit_write(&ci->ci_vmcs_lock);
886
887 return (0);
888 }
889 #endif /* MULTIPROCESSOR */
890
891 /*
892 * vm_impl_init
893 *
894 * VM address space initialization routine
895 *
896 * Parameters:
897 * vm: the VM being initialized
898 * p: vmd process owning the VM
899 *
900 * Return values:
901 * 0: the initialization was successful
902 * EINVAL: unsupported vmm mode
903 * ENOMEM: the initialization failed (lack of resources)
904 */
905 int
vm_impl_init(struct vm * vm,struct proc * p)906 vm_impl_init(struct vm *vm, struct proc *p)
907 {
908 int i, mode, ret;
909 vaddr_t mingpa, maxgpa;
910 struct vm_mem_range *vmr;
911
912 /* If not EPT or RVI, nothing to do here */
913 switch (vmm_softc->mode) {
914 case VMM_MODE_EPT:
915 mode = PMAP_TYPE_EPT;
916 break;
917 case VMM_MODE_RVI:
918 mode = PMAP_TYPE_RVI;
919 break;
920 default:
921 printf("%s: invalid vmm mode %d\n", __func__, vmm_softc->mode);
922 return (EINVAL);
923 }
924
925 vmr = &vm->vm_memranges[0];
926 mingpa = vmr->vmr_gpa;
927 vmr = &vm->vm_memranges[vm->vm_nmemranges - 1];
928 maxgpa = vmr->vmr_gpa + vmr->vmr_size;
929
930 /*
931 * uvmspace_alloc (currently) always returns a valid vmspace
932 */
933 vm->vm_vmspace = uvmspace_alloc(mingpa, maxgpa, TRUE, FALSE);
934 vm->vm_map = &vm->vm_vmspace->vm_map;
935
936 /* Map the new map with an anon */
937 DPRINTF("%s: created vm_map @ %p\n", __func__, vm->vm_map);
938 for (i = 0; i < vm->vm_nmemranges; i++) {
939 vmr = &vm->vm_memranges[i];
940 ret = uvm_share(vm->vm_map, vmr->vmr_gpa,
941 PROT_READ | PROT_WRITE | PROT_EXEC,
942 &p->p_vmspace->vm_map, vmr->vmr_va, vmr->vmr_size);
943 if (ret) {
944 printf("%s: uvm_share failed (%d)\n", __func__, ret);
945 /* uvmspace_free calls pmap_destroy for us */
946 KERNEL_LOCK();
947 uvmspace_free(vm->vm_vmspace);
948 vm->vm_vmspace = NULL;
949 KERNEL_UNLOCK();
950 return (ENOMEM);
951 }
952 }
953
954 pmap_convert(vm->vm_map->pmap, mode);
955
956 return (0);
957 }
958
959 void
vm_impl_deinit(struct vm * vm)960 vm_impl_deinit(struct vm *vm)
961 {
962 /* unused */
963 }
964
965 /*
966 * vcpu_reload_vmcs_vmx
967 *
968 * (Re)load the VMCS on the current cpu. Must be called with the VMCS write
969 * lock acquired. If the VMCS is determined to be loaded on a remote cpu, an
970 * ipi will be used to remotely flush it before loading the VMCS locally.
971 *
972 * Parameters:
973 * vcpu: Pointer to the vcpu needing its VMCS
974 *
975 * Return values:
976 * 0: if successful
977 * EINVAL: an error occurred during flush or reload
978 */
979 int
vcpu_reload_vmcs_vmx(struct vcpu * vcpu)980 vcpu_reload_vmcs_vmx(struct vcpu *vcpu)
981 {
982 struct cpu_info *ci, *last_ci;
983
984 rw_assert_wrlock(&vcpu->vc_lock);
985
986 ci = curcpu();
987 last_ci = vcpu->vc_last_pcpu;
988
989 if (last_ci == NULL) {
990 /* First launch */
991 if (vmclear(&vcpu->vc_control_pa))
992 return (EINVAL);
993 atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_CLEARED);
994 #ifdef MULTIPROCESSOR
995 } else if (last_ci != ci) {
996 /* We've moved CPUs at some point, so remote VMCLEAR */
997 if (vmx_remote_vmclear(last_ci, vcpu))
998 return (EINVAL);
999 KASSERT(vcpu->vc_vmx_vmcs_state == VMCS_CLEARED);
1000 #endif /* MULTIPROCESSOR */
1001 }
1002
1003 if (vmptrld(&vcpu->vc_control_pa)) {
1004 printf("%s: vmptrld\n", __func__);
1005 return (EINVAL);
1006 }
1007
1008 return (0);
1009 }
1010
1011 /*
1012 * vcpu_readregs_vmx
1013 *
1014 * Reads 'vcpu's registers
1015 *
1016 * Parameters:
1017 * vcpu: the vcpu to read register values from
1018 * regmask: the types of registers to read
1019 * loadvmcs: bit to indicate whether the VMCS has to be loaded first
1020 * vrs: output parameter where register values are stored
1021 *
1022 * Return values:
1023 * 0: if successful
1024 * EINVAL: an error reading registers occurred
1025 */
1026 int
vcpu_readregs_vmx(struct vcpu * vcpu,uint64_t regmask,int loadvmcs,struct vcpu_reg_state * vrs)1027 vcpu_readregs_vmx(struct vcpu *vcpu, uint64_t regmask, int loadvmcs,
1028 struct vcpu_reg_state *vrs)
1029 {
1030 int i, ret = 0;
1031 uint64_t sel, limit, ar;
1032 uint64_t *gprs = vrs->vrs_gprs;
1033 uint64_t *crs = vrs->vrs_crs;
1034 uint64_t *msrs = vrs->vrs_msrs;
1035 uint64_t *drs = vrs->vrs_drs;
1036 struct vcpu_segment_info *sregs = vrs->vrs_sregs;
1037 struct vmx_msr_store *msr_store;
1038
1039 if (loadvmcs) {
1040 if (vcpu_reload_vmcs_vmx(vcpu))
1041 return (EINVAL);
1042 }
1043
1044 #ifdef VMM_DEBUG
1045 /* VMCS should be loaded... */
1046 paddr_t pa = 0ULL;
1047 if (vmptrst(&pa))
1048 panic("%s: vmptrst", __func__);
1049 KASSERT(pa == vcpu->vc_control_pa);
1050 #endif /* VMM_DEBUG */
1051
1052 if (regmask & VM_RWREGS_GPRS) {
1053 gprs[VCPU_REGS_RAX] = vcpu->vc_gueststate.vg_rax;
1054 gprs[VCPU_REGS_RBX] = vcpu->vc_gueststate.vg_rbx;
1055 gprs[VCPU_REGS_RCX] = vcpu->vc_gueststate.vg_rcx;
1056 gprs[VCPU_REGS_RDX] = vcpu->vc_gueststate.vg_rdx;
1057 gprs[VCPU_REGS_RSI] = vcpu->vc_gueststate.vg_rsi;
1058 gprs[VCPU_REGS_RDI] = vcpu->vc_gueststate.vg_rdi;
1059 gprs[VCPU_REGS_R8] = vcpu->vc_gueststate.vg_r8;
1060 gprs[VCPU_REGS_R9] = vcpu->vc_gueststate.vg_r9;
1061 gprs[VCPU_REGS_R10] = vcpu->vc_gueststate.vg_r10;
1062 gprs[VCPU_REGS_R11] = vcpu->vc_gueststate.vg_r11;
1063 gprs[VCPU_REGS_R12] = vcpu->vc_gueststate.vg_r12;
1064 gprs[VCPU_REGS_R13] = vcpu->vc_gueststate.vg_r13;
1065 gprs[VCPU_REGS_R14] = vcpu->vc_gueststate.vg_r14;
1066 gprs[VCPU_REGS_R15] = vcpu->vc_gueststate.vg_r15;
1067 gprs[VCPU_REGS_RBP] = vcpu->vc_gueststate.vg_rbp;
1068 gprs[VCPU_REGS_RIP] = vcpu->vc_gueststate.vg_rip;
1069 if (vmread(VMCS_GUEST_IA32_RSP, &gprs[VCPU_REGS_RSP]))
1070 goto errout;
1071 if (vmread(VMCS_GUEST_IA32_RFLAGS, &gprs[VCPU_REGS_RFLAGS]))
1072 goto errout;
1073 }
1074
1075 if (regmask & VM_RWREGS_SREGS) {
1076 for (i = 0; i < nitems(vmm_vmx_sreg_vmcs_fields); i++) {
1077 if (vmread(vmm_vmx_sreg_vmcs_fields[i].selid, &sel))
1078 goto errout;
1079 if (vmread(vmm_vmx_sreg_vmcs_fields[i].limitid, &limit))
1080 goto errout;
1081 if (vmread(vmm_vmx_sreg_vmcs_fields[i].arid, &ar))
1082 goto errout;
1083 if (vmread(vmm_vmx_sreg_vmcs_fields[i].baseid,
1084 &sregs[i].vsi_base))
1085 goto errout;
1086
1087 sregs[i].vsi_sel = sel;
1088 sregs[i].vsi_limit = limit;
1089 sregs[i].vsi_ar = ar;
1090 }
1091
1092 if (vmread(VMCS_GUEST_IA32_GDTR_LIMIT, &limit))
1093 goto errout;
1094 if (vmread(VMCS_GUEST_IA32_GDTR_BASE,
1095 &vrs->vrs_gdtr.vsi_base))
1096 goto errout;
1097 vrs->vrs_gdtr.vsi_limit = limit;
1098
1099 if (vmread(VMCS_GUEST_IA32_IDTR_LIMIT, &limit))
1100 goto errout;
1101 if (vmread(VMCS_GUEST_IA32_IDTR_BASE,
1102 &vrs->vrs_idtr.vsi_base))
1103 goto errout;
1104 vrs->vrs_idtr.vsi_limit = limit;
1105 }
1106
1107 if (regmask & VM_RWREGS_CRS) {
1108 crs[VCPU_REGS_CR2] = vcpu->vc_gueststate.vg_cr2;
1109 crs[VCPU_REGS_XCR0] = vcpu->vc_gueststate.vg_xcr0;
1110 if (vmread(VMCS_GUEST_IA32_CR0, &crs[VCPU_REGS_CR0]))
1111 goto errout;
1112 if (vmread(VMCS_GUEST_IA32_CR3, &crs[VCPU_REGS_CR3]))
1113 goto errout;
1114 if (vmread(VMCS_GUEST_IA32_CR4, &crs[VCPU_REGS_CR4]))
1115 goto errout;
1116 if (vmread(VMCS_GUEST_PDPTE0, &crs[VCPU_REGS_PDPTE0]))
1117 goto errout;
1118 if (vmread(VMCS_GUEST_PDPTE1, &crs[VCPU_REGS_PDPTE1]))
1119 goto errout;
1120 if (vmread(VMCS_GUEST_PDPTE2, &crs[VCPU_REGS_PDPTE2]))
1121 goto errout;
1122 if (vmread(VMCS_GUEST_PDPTE3, &crs[VCPU_REGS_PDPTE3]))
1123 goto errout;
1124 }
1125
1126 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
1127
1128 if (regmask & VM_RWREGS_MSRS) {
1129 for (i = 0; i < VCPU_REGS_NMSRS; i++) {
1130 msrs[i] = msr_store[i].vms_data;
1131 }
1132 }
1133
1134 if (regmask & VM_RWREGS_DRS) {
1135 drs[VCPU_REGS_DR0] = vcpu->vc_gueststate.vg_dr0;
1136 drs[VCPU_REGS_DR1] = vcpu->vc_gueststate.vg_dr1;
1137 drs[VCPU_REGS_DR2] = vcpu->vc_gueststate.vg_dr2;
1138 drs[VCPU_REGS_DR3] = vcpu->vc_gueststate.vg_dr3;
1139 drs[VCPU_REGS_DR6] = vcpu->vc_gueststate.vg_dr6;
1140 if (vmread(VMCS_GUEST_IA32_DR7, &drs[VCPU_REGS_DR7]))
1141 goto errout;
1142 }
1143
1144 goto out;
1145
1146 errout:
1147 ret = EINVAL;
1148 out:
1149 return (ret);
1150 }
1151
1152 /*
1153 * vcpu_readregs_svm
1154 *
1155 * Reads 'vcpu's registers
1156 *
1157 * Parameters:
1158 * vcpu: the vcpu to read register values from
1159 * regmask: the types of registers to read
1160 * vrs: output parameter where register values are stored
1161 *
1162 * Return values:
1163 * 0: if successful
1164 */
1165 int
vcpu_readregs_svm(struct vcpu * vcpu,uint64_t regmask,struct vcpu_reg_state * vrs)1166 vcpu_readregs_svm(struct vcpu *vcpu, uint64_t regmask,
1167 struct vcpu_reg_state *vrs)
1168 {
1169 uint64_t *gprs = vrs->vrs_gprs;
1170 uint64_t *crs = vrs->vrs_crs;
1171 uint64_t *msrs = vrs->vrs_msrs;
1172 uint64_t *drs = vrs->vrs_drs;
1173 uint32_t attr;
1174 struct vcpu_segment_info *sregs = vrs->vrs_sregs;
1175 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
1176
1177 if (regmask & VM_RWREGS_GPRS) {
1178 gprs[VCPU_REGS_RAX] = vmcb->v_rax;
1179 gprs[VCPU_REGS_RBX] = vcpu->vc_gueststate.vg_rbx;
1180 gprs[VCPU_REGS_RCX] = vcpu->vc_gueststate.vg_rcx;
1181 gprs[VCPU_REGS_RDX] = vcpu->vc_gueststate.vg_rdx;
1182 gprs[VCPU_REGS_RSI] = vcpu->vc_gueststate.vg_rsi;
1183 gprs[VCPU_REGS_RDI] = vcpu->vc_gueststate.vg_rdi;
1184 gprs[VCPU_REGS_R8] = vcpu->vc_gueststate.vg_r8;
1185 gprs[VCPU_REGS_R9] = vcpu->vc_gueststate.vg_r9;
1186 gprs[VCPU_REGS_R10] = vcpu->vc_gueststate.vg_r10;
1187 gprs[VCPU_REGS_R11] = vcpu->vc_gueststate.vg_r11;
1188 gprs[VCPU_REGS_R12] = vcpu->vc_gueststate.vg_r12;
1189 gprs[VCPU_REGS_R13] = vcpu->vc_gueststate.vg_r13;
1190 gprs[VCPU_REGS_R14] = vcpu->vc_gueststate.vg_r14;
1191 gprs[VCPU_REGS_R15] = vcpu->vc_gueststate.vg_r15;
1192 gprs[VCPU_REGS_RBP] = vcpu->vc_gueststate.vg_rbp;
1193 gprs[VCPU_REGS_RIP] = vmcb->v_rip;
1194 gprs[VCPU_REGS_RSP] = vmcb->v_rsp;
1195 gprs[VCPU_REGS_RFLAGS] = vmcb->v_rflags;
1196 }
1197
1198 if (regmask & VM_RWREGS_SREGS) {
1199 sregs[VCPU_REGS_CS].vsi_sel = vmcb->v_cs.vs_sel;
1200 sregs[VCPU_REGS_CS].vsi_limit = vmcb->v_cs.vs_lim;
1201 attr = vmcb->v_cs.vs_attr;
1202 sregs[VCPU_REGS_CS].vsi_ar = (attr & 0xff) | ((attr << 4) &
1203 0xf000);
1204 sregs[VCPU_REGS_CS].vsi_base = vmcb->v_cs.vs_base;
1205
1206 sregs[VCPU_REGS_DS].vsi_sel = vmcb->v_ds.vs_sel;
1207 sregs[VCPU_REGS_DS].vsi_limit = vmcb->v_ds.vs_lim;
1208 attr = vmcb->v_ds.vs_attr;
1209 sregs[VCPU_REGS_DS].vsi_ar = (attr & 0xff) | ((attr << 4) &
1210 0xf000);
1211 sregs[VCPU_REGS_DS].vsi_base = vmcb->v_ds.vs_base;
1212
1213 sregs[VCPU_REGS_ES].vsi_sel = vmcb->v_es.vs_sel;
1214 sregs[VCPU_REGS_ES].vsi_limit = vmcb->v_es.vs_lim;
1215 attr = vmcb->v_es.vs_attr;
1216 sregs[VCPU_REGS_ES].vsi_ar = (attr & 0xff) | ((attr << 4) &
1217 0xf000);
1218 sregs[VCPU_REGS_ES].vsi_base = vmcb->v_es.vs_base;
1219
1220 sregs[VCPU_REGS_FS].vsi_sel = vmcb->v_fs.vs_sel;
1221 sregs[VCPU_REGS_FS].vsi_limit = vmcb->v_fs.vs_lim;
1222 attr = vmcb->v_fs.vs_attr;
1223 sregs[VCPU_REGS_FS].vsi_ar = (attr & 0xff) | ((attr << 4) &
1224 0xf000);
1225 sregs[VCPU_REGS_FS].vsi_base = vmcb->v_fs.vs_base;
1226
1227 sregs[VCPU_REGS_GS].vsi_sel = vmcb->v_gs.vs_sel;
1228 sregs[VCPU_REGS_GS].vsi_limit = vmcb->v_gs.vs_lim;
1229 attr = vmcb->v_gs.vs_attr;
1230 sregs[VCPU_REGS_GS].vsi_ar = (attr & 0xff) | ((attr << 4) &
1231 0xf000);
1232 sregs[VCPU_REGS_GS].vsi_base = vmcb->v_gs.vs_base;
1233
1234 sregs[VCPU_REGS_SS].vsi_sel = vmcb->v_ss.vs_sel;
1235 sregs[VCPU_REGS_SS].vsi_limit = vmcb->v_ss.vs_lim;
1236 attr = vmcb->v_ss.vs_attr;
1237 sregs[VCPU_REGS_SS].vsi_ar = (attr & 0xff) | ((attr << 4) &
1238 0xf000);
1239 sregs[VCPU_REGS_SS].vsi_base = vmcb->v_ss.vs_base;
1240
1241 sregs[VCPU_REGS_LDTR].vsi_sel = vmcb->v_ldtr.vs_sel;
1242 sregs[VCPU_REGS_LDTR].vsi_limit = vmcb->v_ldtr.vs_lim;
1243 attr = vmcb->v_ldtr.vs_attr;
1244 sregs[VCPU_REGS_LDTR].vsi_ar = (attr & 0xff) | ((attr << 4)
1245 & 0xf000);
1246 sregs[VCPU_REGS_LDTR].vsi_base = vmcb->v_ldtr.vs_base;
1247
1248 sregs[VCPU_REGS_TR].vsi_sel = vmcb->v_tr.vs_sel;
1249 sregs[VCPU_REGS_TR].vsi_limit = vmcb->v_tr.vs_lim;
1250 attr = vmcb->v_tr.vs_attr;
1251 sregs[VCPU_REGS_TR].vsi_ar = (attr & 0xff) | ((attr << 4) &
1252 0xf000);
1253 sregs[VCPU_REGS_TR].vsi_base = vmcb->v_tr.vs_base;
1254
1255 vrs->vrs_gdtr.vsi_limit = vmcb->v_gdtr.vs_lim;
1256 vrs->vrs_gdtr.vsi_base = vmcb->v_gdtr.vs_base;
1257 vrs->vrs_idtr.vsi_limit = vmcb->v_idtr.vs_lim;
1258 vrs->vrs_idtr.vsi_base = vmcb->v_idtr.vs_base;
1259 }
1260
1261 if (regmask & VM_RWREGS_CRS) {
1262 crs[VCPU_REGS_CR0] = vmcb->v_cr0;
1263 crs[VCPU_REGS_CR3] = vmcb->v_cr3;
1264 crs[VCPU_REGS_CR4] = vmcb->v_cr4;
1265 crs[VCPU_REGS_CR2] = vcpu->vc_gueststate.vg_cr2;
1266 crs[VCPU_REGS_XCR0] = vcpu->vc_gueststate.vg_xcr0;
1267 }
1268
1269 if (regmask & VM_RWREGS_MSRS) {
1270 msrs[VCPU_REGS_EFER] = vmcb->v_efer;
1271 msrs[VCPU_REGS_STAR] = vmcb->v_star;
1272 msrs[VCPU_REGS_LSTAR] = vmcb->v_lstar;
1273 msrs[VCPU_REGS_CSTAR] = vmcb->v_cstar;
1274 msrs[VCPU_REGS_SFMASK] = vmcb->v_sfmask;
1275 msrs[VCPU_REGS_KGSBASE] = vmcb->v_kgsbase;
1276 }
1277
1278 if (regmask & VM_RWREGS_DRS) {
1279 drs[VCPU_REGS_DR0] = vcpu->vc_gueststate.vg_dr0;
1280 drs[VCPU_REGS_DR1] = vcpu->vc_gueststate.vg_dr1;
1281 drs[VCPU_REGS_DR2] = vcpu->vc_gueststate.vg_dr2;
1282 drs[VCPU_REGS_DR3] = vcpu->vc_gueststate.vg_dr3;
1283 drs[VCPU_REGS_DR6] = vmcb->v_dr6;
1284 drs[VCPU_REGS_DR7] = vmcb->v_dr7;
1285 }
1286
1287 return (0);
1288 }
1289
1290 /*
1291 * vcpu_writeregs_vmx
1292 *
1293 * Writes VCPU registers
1294 *
1295 * Parameters:
1296 * vcpu: the vcpu that has to get its registers written to
1297 * regmask: the types of registers to write
1298 * loadvmcs: bit to indicate whether the VMCS has to be loaded first
1299 * vrs: the register values to write
1300 *
1301 * Return values:
1302 * 0: if successful
1303 * EINVAL an error writing registers occurred
1304 */
1305 int
vcpu_writeregs_vmx(struct vcpu * vcpu,uint64_t regmask,int loadvmcs,struct vcpu_reg_state * vrs)1306 vcpu_writeregs_vmx(struct vcpu *vcpu, uint64_t regmask, int loadvmcs,
1307 struct vcpu_reg_state *vrs)
1308 {
1309 int i, ret = 0;
1310 uint16_t sel;
1311 uint64_t limit, ar;
1312 uint64_t *gprs = vrs->vrs_gprs;
1313 uint64_t *crs = vrs->vrs_crs;
1314 uint64_t *msrs = vrs->vrs_msrs;
1315 uint64_t *drs = vrs->vrs_drs;
1316 struct vcpu_segment_info *sregs = vrs->vrs_sregs;
1317 struct vmx_msr_store *msr_store;
1318
1319 if (loadvmcs) {
1320 if (vcpu_reload_vmcs_vmx(vcpu))
1321 return (EINVAL);
1322 }
1323
1324 #ifdef VMM_DEBUG
1325 /* VMCS should be loaded... */
1326 paddr_t pa = 0ULL;
1327 if (vmptrst(&pa))
1328 panic("%s: vmptrst", __func__);
1329 KASSERT(pa == vcpu->vc_control_pa);
1330 #endif /* VMM_DEBUG */
1331
1332 if (regmask & VM_RWREGS_GPRS) {
1333 vcpu->vc_gueststate.vg_rax = gprs[VCPU_REGS_RAX];
1334 vcpu->vc_gueststate.vg_rbx = gprs[VCPU_REGS_RBX];
1335 vcpu->vc_gueststate.vg_rcx = gprs[VCPU_REGS_RCX];
1336 vcpu->vc_gueststate.vg_rdx = gprs[VCPU_REGS_RDX];
1337 vcpu->vc_gueststate.vg_rsi = gprs[VCPU_REGS_RSI];
1338 vcpu->vc_gueststate.vg_rdi = gprs[VCPU_REGS_RDI];
1339 vcpu->vc_gueststate.vg_r8 = gprs[VCPU_REGS_R8];
1340 vcpu->vc_gueststate.vg_r9 = gprs[VCPU_REGS_R9];
1341 vcpu->vc_gueststate.vg_r10 = gprs[VCPU_REGS_R10];
1342 vcpu->vc_gueststate.vg_r11 = gprs[VCPU_REGS_R11];
1343 vcpu->vc_gueststate.vg_r12 = gprs[VCPU_REGS_R12];
1344 vcpu->vc_gueststate.vg_r13 = gprs[VCPU_REGS_R13];
1345 vcpu->vc_gueststate.vg_r14 = gprs[VCPU_REGS_R14];
1346 vcpu->vc_gueststate.vg_r15 = gprs[VCPU_REGS_R15];
1347 vcpu->vc_gueststate.vg_rbp = gprs[VCPU_REGS_RBP];
1348 vcpu->vc_gueststate.vg_rip = gprs[VCPU_REGS_RIP];
1349 if (vmwrite(VMCS_GUEST_IA32_RIP, gprs[VCPU_REGS_RIP]))
1350 goto errout;
1351 if (vmwrite(VMCS_GUEST_IA32_RSP, gprs[VCPU_REGS_RSP]))
1352 goto errout;
1353 if (vmwrite(VMCS_GUEST_IA32_RFLAGS, gprs[VCPU_REGS_RFLAGS]))
1354 goto errout;
1355 }
1356
1357 if (regmask & VM_RWREGS_SREGS) {
1358 for (i = 0; i < nitems(vmm_vmx_sreg_vmcs_fields); i++) {
1359 sel = sregs[i].vsi_sel;
1360 limit = sregs[i].vsi_limit;
1361 ar = sregs[i].vsi_ar;
1362
1363 if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].selid, sel))
1364 goto errout;
1365 if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].limitid, limit))
1366 goto errout;
1367 if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].arid, ar))
1368 goto errout;
1369 if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].baseid,
1370 sregs[i].vsi_base))
1371 goto errout;
1372 }
1373
1374 if (vmwrite(VMCS_GUEST_IA32_GDTR_LIMIT,
1375 vrs->vrs_gdtr.vsi_limit))
1376 goto errout;
1377 if (vmwrite(VMCS_GUEST_IA32_GDTR_BASE,
1378 vrs->vrs_gdtr.vsi_base))
1379 goto errout;
1380 if (vmwrite(VMCS_GUEST_IA32_IDTR_LIMIT,
1381 vrs->vrs_idtr.vsi_limit))
1382 goto errout;
1383 if (vmwrite(VMCS_GUEST_IA32_IDTR_BASE,
1384 vrs->vrs_idtr.vsi_base))
1385 goto errout;
1386 }
1387
1388 if (regmask & VM_RWREGS_CRS) {
1389 vcpu->vc_gueststate.vg_xcr0 = crs[VCPU_REGS_XCR0];
1390 if (vmwrite(VMCS_GUEST_IA32_CR0, crs[VCPU_REGS_CR0]))
1391 goto errout;
1392 if (vmwrite(VMCS_GUEST_IA32_CR3, crs[VCPU_REGS_CR3]))
1393 goto errout;
1394 if (vmwrite(VMCS_GUEST_IA32_CR4, crs[VCPU_REGS_CR4]))
1395 goto errout;
1396 if (vmwrite(VMCS_GUEST_PDPTE0, crs[VCPU_REGS_PDPTE0]))
1397 goto errout;
1398 if (vmwrite(VMCS_GUEST_PDPTE1, crs[VCPU_REGS_PDPTE1]))
1399 goto errout;
1400 if (vmwrite(VMCS_GUEST_PDPTE2, crs[VCPU_REGS_PDPTE2]))
1401 goto errout;
1402 if (vmwrite(VMCS_GUEST_PDPTE3, crs[VCPU_REGS_PDPTE3]))
1403 goto errout;
1404 }
1405
1406 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
1407
1408 if (regmask & VM_RWREGS_MSRS) {
1409 for (i = 0; i < VCPU_REGS_NMSRS; i++) {
1410 msr_store[i].vms_data = msrs[i];
1411 }
1412 }
1413
1414 if (regmask & VM_RWREGS_DRS) {
1415 vcpu->vc_gueststate.vg_dr0 = drs[VCPU_REGS_DR0];
1416 vcpu->vc_gueststate.vg_dr1 = drs[VCPU_REGS_DR1];
1417 vcpu->vc_gueststate.vg_dr2 = drs[VCPU_REGS_DR2];
1418 vcpu->vc_gueststate.vg_dr3 = drs[VCPU_REGS_DR3];
1419 vcpu->vc_gueststate.vg_dr6 = drs[VCPU_REGS_DR6];
1420 if (vmwrite(VMCS_GUEST_IA32_DR7, drs[VCPU_REGS_DR7]))
1421 goto errout;
1422 }
1423
1424 goto out;
1425
1426 errout:
1427 ret = EINVAL;
1428 out:
1429 if (loadvmcs) {
1430 if (vmclear(&vcpu->vc_control_pa))
1431 ret = EINVAL;
1432 atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_CLEARED);
1433 }
1434 return (ret);
1435 }
1436
1437 /*
1438 * vcpu_writeregs_svm
1439 *
1440 * Writes 'vcpu's registers
1441 *
1442 * Parameters:
1443 * vcpu: the vcpu that has to get its registers written to
1444 * regmask: the types of registers to write
1445 * vrs: the register values to write
1446 *
1447 * Return values:
1448 * 0: if successful
1449 * EINVAL an error writing registers occurred
1450 */
1451 int
vcpu_writeregs_svm(struct vcpu * vcpu,uint64_t regmask,struct vcpu_reg_state * vrs)1452 vcpu_writeregs_svm(struct vcpu *vcpu, uint64_t regmask,
1453 struct vcpu_reg_state *vrs)
1454 {
1455 uint64_t *gprs = vrs->vrs_gprs;
1456 uint64_t *crs = vrs->vrs_crs;
1457 uint16_t attr;
1458 uint64_t *msrs = vrs->vrs_msrs;
1459 uint64_t *drs = vrs->vrs_drs;
1460 struct vcpu_segment_info *sregs = vrs->vrs_sregs;
1461 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
1462
1463 if (regmask & VM_RWREGS_GPRS) {
1464 vcpu->vc_gueststate.vg_rax = gprs[VCPU_REGS_RAX];
1465 vcpu->vc_gueststate.vg_rbx = gprs[VCPU_REGS_RBX];
1466 vcpu->vc_gueststate.vg_rcx = gprs[VCPU_REGS_RCX];
1467 vcpu->vc_gueststate.vg_rdx = gprs[VCPU_REGS_RDX];
1468 vcpu->vc_gueststate.vg_rsi = gprs[VCPU_REGS_RSI];
1469 vcpu->vc_gueststate.vg_rdi = gprs[VCPU_REGS_RDI];
1470 vcpu->vc_gueststate.vg_r8 = gprs[VCPU_REGS_R8];
1471 vcpu->vc_gueststate.vg_r9 = gprs[VCPU_REGS_R9];
1472 vcpu->vc_gueststate.vg_r10 = gprs[VCPU_REGS_R10];
1473 vcpu->vc_gueststate.vg_r11 = gprs[VCPU_REGS_R11];
1474 vcpu->vc_gueststate.vg_r12 = gprs[VCPU_REGS_R12];
1475 vcpu->vc_gueststate.vg_r13 = gprs[VCPU_REGS_R13];
1476 vcpu->vc_gueststate.vg_r14 = gprs[VCPU_REGS_R14];
1477 vcpu->vc_gueststate.vg_r15 = gprs[VCPU_REGS_R15];
1478 vcpu->vc_gueststate.vg_rbp = gprs[VCPU_REGS_RBP];
1479 vcpu->vc_gueststate.vg_rip = gprs[VCPU_REGS_RIP];
1480
1481 vmcb->v_rax = gprs[VCPU_REGS_RAX];
1482 vmcb->v_rip = gprs[VCPU_REGS_RIP];
1483 vmcb->v_rsp = gprs[VCPU_REGS_RSP];
1484 vmcb->v_rflags = gprs[VCPU_REGS_RFLAGS];
1485 }
1486
1487 if (regmask & VM_RWREGS_SREGS) {
1488 vmcb->v_cs.vs_sel = sregs[VCPU_REGS_CS].vsi_sel;
1489 vmcb->v_cs.vs_lim = sregs[VCPU_REGS_CS].vsi_limit;
1490 attr = sregs[VCPU_REGS_CS].vsi_ar;
1491 vmcb->v_cs.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
1492 vmcb->v_cs.vs_base = sregs[VCPU_REGS_CS].vsi_base;
1493 vmcb->v_ds.vs_sel = sregs[VCPU_REGS_DS].vsi_sel;
1494 vmcb->v_ds.vs_lim = sregs[VCPU_REGS_DS].vsi_limit;
1495 attr = sregs[VCPU_REGS_DS].vsi_ar;
1496 vmcb->v_ds.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
1497 vmcb->v_ds.vs_base = sregs[VCPU_REGS_DS].vsi_base;
1498 vmcb->v_es.vs_sel = sregs[VCPU_REGS_ES].vsi_sel;
1499 vmcb->v_es.vs_lim = sregs[VCPU_REGS_ES].vsi_limit;
1500 attr = sregs[VCPU_REGS_ES].vsi_ar;
1501 vmcb->v_es.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
1502 vmcb->v_es.vs_base = sregs[VCPU_REGS_ES].vsi_base;
1503 vmcb->v_fs.vs_sel = sregs[VCPU_REGS_FS].vsi_sel;
1504 vmcb->v_fs.vs_lim = sregs[VCPU_REGS_FS].vsi_limit;
1505 attr = sregs[VCPU_REGS_FS].vsi_ar;
1506 vmcb->v_fs.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
1507 vmcb->v_fs.vs_base = sregs[VCPU_REGS_FS].vsi_base;
1508 vmcb->v_gs.vs_sel = sregs[VCPU_REGS_GS].vsi_sel;
1509 vmcb->v_gs.vs_lim = sregs[VCPU_REGS_GS].vsi_limit;
1510 attr = sregs[VCPU_REGS_GS].vsi_ar;
1511 vmcb->v_gs.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
1512 vmcb->v_gs.vs_base = sregs[VCPU_REGS_GS].vsi_base;
1513 vmcb->v_ss.vs_sel = sregs[VCPU_REGS_SS].vsi_sel;
1514 vmcb->v_ss.vs_lim = sregs[VCPU_REGS_SS].vsi_limit;
1515 attr = sregs[VCPU_REGS_SS].vsi_ar;
1516 vmcb->v_ss.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
1517 vmcb->v_ss.vs_base = sregs[VCPU_REGS_SS].vsi_base;
1518 vmcb->v_ldtr.vs_sel = sregs[VCPU_REGS_LDTR].vsi_sel;
1519 vmcb->v_ldtr.vs_lim = sregs[VCPU_REGS_LDTR].vsi_limit;
1520 attr = sregs[VCPU_REGS_LDTR].vsi_ar;
1521 vmcb->v_ldtr.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
1522 vmcb->v_ldtr.vs_base = sregs[VCPU_REGS_LDTR].vsi_base;
1523 vmcb->v_tr.vs_sel = sregs[VCPU_REGS_TR].vsi_sel;
1524 vmcb->v_tr.vs_lim = sregs[VCPU_REGS_TR].vsi_limit;
1525 attr = sregs[VCPU_REGS_TR].vsi_ar;
1526 vmcb->v_tr.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
1527 vmcb->v_tr.vs_base = sregs[VCPU_REGS_TR].vsi_base;
1528 vmcb->v_gdtr.vs_lim = vrs->vrs_gdtr.vsi_limit;
1529 vmcb->v_gdtr.vs_base = vrs->vrs_gdtr.vsi_base;
1530 vmcb->v_idtr.vs_lim = vrs->vrs_idtr.vsi_limit;
1531 vmcb->v_idtr.vs_base = vrs->vrs_idtr.vsi_base;
1532 }
1533
1534 if (regmask & VM_RWREGS_CRS) {
1535 vmcb->v_cr0 = crs[VCPU_REGS_CR0];
1536 vmcb->v_cr3 = crs[VCPU_REGS_CR3];
1537 vmcb->v_cr4 = crs[VCPU_REGS_CR4];
1538 vcpu->vc_gueststate.vg_cr2 = crs[VCPU_REGS_CR2];
1539 vcpu->vc_gueststate.vg_xcr0 = crs[VCPU_REGS_XCR0];
1540 }
1541
1542 if (regmask & VM_RWREGS_MSRS) {
1543 vmcb->v_efer |= msrs[VCPU_REGS_EFER];
1544 vmcb->v_star = msrs[VCPU_REGS_STAR];
1545 vmcb->v_lstar = msrs[VCPU_REGS_LSTAR];
1546 vmcb->v_cstar = msrs[VCPU_REGS_CSTAR];
1547 vmcb->v_sfmask = msrs[VCPU_REGS_SFMASK];
1548 vmcb->v_kgsbase = msrs[VCPU_REGS_KGSBASE];
1549 }
1550
1551 if (regmask & VM_RWREGS_DRS) {
1552 vcpu->vc_gueststate.vg_dr0 = drs[VCPU_REGS_DR0];
1553 vcpu->vc_gueststate.vg_dr1 = drs[VCPU_REGS_DR1];
1554 vcpu->vc_gueststate.vg_dr2 = drs[VCPU_REGS_DR2];
1555 vcpu->vc_gueststate.vg_dr3 = drs[VCPU_REGS_DR3];
1556 vmcb->v_dr6 = drs[VCPU_REGS_DR6];
1557 vmcb->v_dr7 = drs[VCPU_REGS_DR7];
1558 }
1559
1560 return (0);
1561 }
1562
1563 /*
1564 * vcpu_reset_regs_svm
1565 *
1566 * Initializes 'vcpu's registers to supplied state
1567 *
1568 * Parameters:
1569 * vcpu: the vcpu whose register state is to be initialized
1570 * vrs: the register state to set
1571 *
1572 * Return values:
1573 * 0: registers init'ed successfully
1574 * EINVAL: an error occurred setting register state
1575 */
1576 int
vcpu_reset_regs_svm(struct vcpu * vcpu,struct vcpu_reg_state * vrs)1577 vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
1578 {
1579 struct vmcb *vmcb;
1580 int ret;
1581
1582 vmcb = (struct vmcb *)vcpu->vc_control_va;
1583
1584 /*
1585 * Intercept controls
1586 *
1587 * External Interrupt exiting (SVM_INTERCEPT_INTR)
1588 * External NMI exiting (SVM_INTERCEPT_NMI)
1589 * CPUID instruction (SVM_INTERCEPT_CPUID)
1590 * HLT instruction (SVM_INTERCEPT_HLT)
1591 * I/O instructions (SVM_INTERCEPT_INOUT)
1592 * MSR access (SVM_INTERCEPT_MSR)
1593 * shutdown events (SVM_INTERCEPT_SHUTDOWN)
1594 * INVLPGA instruction (SVM_INTERCEPT_INVLPGA)
1595 *
1596 * VMRUN instruction (SVM_INTERCEPT_VMRUN)
1597 * VMMCALL instruction (SVM_INTERCEPT_VMMCALL)
1598 * VMLOAD instruction (SVM_INTERCEPT_VMLOAD)
1599 * VMSAVE instruction (SVM_INTERCEPT_VMSAVE)
1600 * STGI instruction (SVM_INTERCEPT_STGI)
1601 * CLGI instruction (SVM_INTERCEPT_CLGI)
1602 * SKINIT instruction (SVM_INTERCEPT_SKINIT)
1603 * ICEBP instruction (SVM_INTERCEPT_ICEBP)
1604 * MWAIT instruction (SVM_INTERCEPT_MWAIT_UNCOND)
1605 * MWAIT instruction (SVM_INTERCEPT_MWAIT_COND)
1606 * MONITOR instruction (SVM_INTERCEPT_MONITOR)
1607 * RDTSCP instruction (SVM_INTERCEPT_RDTSCP)
1608 * XSETBV instruction (SVM_INTERCEPT_XSETBV) (if available)
1609 */
1610 vmcb->v_intercept1 = SVM_INTERCEPT_INTR | SVM_INTERCEPT_NMI |
1611 SVM_INTERCEPT_CPUID | SVM_INTERCEPT_HLT | SVM_INTERCEPT_INOUT |
1612 SVM_INTERCEPT_MSR | SVM_INTERCEPT_SHUTDOWN | SVM_INTERCEPT_INVLPGA;
1613
1614 vmcb->v_intercept2 = SVM_INTERCEPT_VMRUN | SVM_INTERCEPT_VMMCALL |
1615 SVM_INTERCEPT_VMLOAD | SVM_INTERCEPT_VMSAVE | SVM_INTERCEPT_STGI |
1616 SVM_INTERCEPT_CLGI | SVM_INTERCEPT_SKINIT | SVM_INTERCEPT_ICEBP |
1617 SVM_INTERCEPT_MWAIT_UNCOND | SVM_INTERCEPT_MONITOR |
1618 SVM_INTERCEPT_MWAIT_COND | SVM_INTERCEPT_RDTSCP;
1619
1620 if (xsave_mask)
1621 vmcb->v_intercept2 |= SVM_INTERCEPT_XSETBV;
1622
1623 /* Setup I/O bitmap */
1624 memset((uint8_t *)vcpu->vc_svm_ioio_va, 0xFF, 3 * PAGE_SIZE);
1625 vmcb->v_iopm_pa = (uint64_t)(vcpu->vc_svm_ioio_pa);
1626
1627 /* Setup MSR bitmap */
1628 memset((uint8_t *)vcpu->vc_msr_bitmap_va, 0xFF, 2 * PAGE_SIZE);
1629 vmcb->v_msrpm_pa = (uint64_t)(vcpu->vc_msr_bitmap_pa);
1630 svm_setmsrbrw(vcpu, MSR_IA32_FEATURE_CONTROL);
1631 svm_setmsrbrw(vcpu, MSR_SYSENTER_CS);
1632 svm_setmsrbrw(vcpu, MSR_SYSENTER_ESP);
1633 svm_setmsrbrw(vcpu, MSR_SYSENTER_EIP);
1634 svm_setmsrbrw(vcpu, MSR_STAR);
1635 svm_setmsrbrw(vcpu, MSR_LSTAR);
1636 svm_setmsrbrw(vcpu, MSR_CSTAR);
1637 svm_setmsrbrw(vcpu, MSR_SFMASK);
1638 svm_setmsrbrw(vcpu, MSR_FSBASE);
1639 svm_setmsrbrw(vcpu, MSR_GSBASE);
1640 svm_setmsrbrw(vcpu, MSR_KERNELGSBASE);
1641
1642 /* EFER is R/O so we can ensure the guest always has SVME */
1643 svm_setmsrbr(vcpu, MSR_EFER);
1644
1645 /* allow reading TSC */
1646 svm_setmsrbr(vcpu, MSR_TSC);
1647
1648 /* allow reading HWCR and PSTATEDEF to determine TSC frequency */
1649 svm_setmsrbr(vcpu, MSR_HWCR);
1650 svm_setmsrbr(vcpu, MSR_PSTATEDEF(0));
1651
1652 /* Guest VCPU ASID */
1653 vmcb->v_asid = vcpu->vc_vpid;
1654
1655 /* TLB Control - First time in, flush all*/
1656 vmcb->v_tlb_control = SVM_TLB_CONTROL_FLUSH_ALL;
1657
1658 /* INTR masking */
1659 vmcb->v_intr_masking = 1;
1660
1661 /* PAT */
1662 vmcb->v_g_pat = PATENTRY(0, PAT_WB) | PATENTRY(1, PAT_WC) |
1663 PATENTRY(2, PAT_UCMINUS) | PATENTRY(3, PAT_UC) |
1664 PATENTRY(4, PAT_WB) | PATENTRY(5, PAT_WC) |
1665 PATENTRY(6, PAT_UCMINUS) | PATENTRY(7, PAT_UC);
1666
1667 /* NPT */
1668 vmcb->v_np_enable = SVM_ENABLE_NP;
1669 vmcb->v_n_cr3 = vcpu->vc_parent->vm_map->pmap->pm_pdirpa;
1670
1671 /* SEV */
1672 if (vcpu->vc_sev)
1673 vmcb->v_np_enable |= SVM_ENABLE_SEV;
1674
1675 /* Enable SVME in EFER (must always be set) */
1676 vmcb->v_efer |= EFER_SVME;
1677
1678 ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_ALL, vrs);
1679
1680 /* xcr0 power on default sets bit 0 (x87 state) */
1681 vcpu->vc_gueststate.vg_xcr0 = XFEATURE_X87 & xsave_mask;
1682
1683 vcpu->vc_parent->vm_map->pmap->eptp = 0;
1684
1685 return ret;
1686 }
1687
1688 /*
1689 * svm_setmsrbr
1690 *
1691 * Allow read access to the specified msr on the supplied vcpu.
1692 *
1693 * Parameters:
1694 * vcpu: the VCPU to allow access
1695 * msr: the MSR number to allow access to
1696 */
1697 void
svm_setmsrbr(struct vcpu * vcpu,uint32_t msr)1698 svm_setmsrbr(struct vcpu *vcpu, uint32_t msr)
1699 {
1700 uint8_t *msrs;
1701 uint16_t idx;
1702
1703 msrs = (uint8_t *)vcpu->vc_msr_bitmap_va;
1704
1705 /*
1706 * MSR Read bitmap layout:
1707 * Pentium MSRs (0x0 - 0x1fff) @ 0x0
1708 * Gen6 and Syscall MSRs (0xc0000000 - 0xc0001fff) @ 0x800
1709 * Gen7 and Gen8 MSRs (0xc0010000 - 0xc0011fff) @ 0x1000
1710 *
1711 * Read enable bit is low order bit of 2-bit pair
1712 * per MSR (eg, MSR 0x0 write bit is at bit 0 @ 0x0)
1713 */
1714 if (msr <= 0x1fff) {
1715 idx = SVM_MSRIDX(msr);
1716 msrs[idx] &= ~(SVM_MSRBIT_R(msr));
1717 } else if (msr >= 0xc0000000 && msr <= 0xc0001fff) {
1718 idx = SVM_MSRIDX(msr - 0xc0000000) + 0x800;
1719 msrs[idx] &= ~(SVM_MSRBIT_R(msr - 0xc0000000));
1720 } else if (msr >= 0xc0010000 && msr <= 0xc0011fff) {
1721 idx = SVM_MSRIDX(msr - 0xc0010000) + 0x1000;
1722 msrs[idx] &= ~(SVM_MSRBIT_R(msr - 0xc0010000));
1723 } else {
1724 printf("%s: invalid msr 0x%x\n", __func__, msr);
1725 return;
1726 }
1727 }
1728
1729 /*
1730 * svm_setmsrbw
1731 *
1732 * Allow write access to the specified msr on the supplied vcpu
1733 *
1734 * Parameters:
1735 * vcpu: the VCPU to allow access
1736 * msr: the MSR number to allow access to
1737 */
1738 void
svm_setmsrbw(struct vcpu * vcpu,uint32_t msr)1739 svm_setmsrbw(struct vcpu *vcpu, uint32_t msr)
1740 {
1741 uint8_t *msrs;
1742 uint16_t idx;
1743
1744 msrs = (uint8_t *)vcpu->vc_msr_bitmap_va;
1745
1746 /*
1747 * MSR Write bitmap layout:
1748 * Pentium MSRs (0x0 - 0x1fff) @ 0x0
1749 * Gen6 and Syscall MSRs (0xc0000000 - 0xc0001fff) @ 0x800
1750 * Gen7 and Gen8 MSRs (0xc0010000 - 0xc0011fff) @ 0x1000
1751 *
1752 * Write enable bit is high order bit of 2-bit pair
1753 * per MSR (eg, MSR 0x0 write bit is at bit 1 @ 0x0)
1754 */
1755 if (msr <= 0x1fff) {
1756 idx = SVM_MSRIDX(msr);
1757 msrs[idx] &= ~(SVM_MSRBIT_W(msr));
1758 } else if (msr >= 0xc0000000 && msr <= 0xc0001fff) {
1759 idx = SVM_MSRIDX(msr - 0xc0000000) + 0x800;
1760 msrs[idx] &= ~(SVM_MSRBIT_W(msr - 0xc0000000));
1761 } else if (msr >= 0xc0010000 && msr <= 0xc0011fff) {
1762 idx = SVM_MSRIDX(msr - 0xc0010000) + 0x1000;
1763 msrs[idx] &= ~(SVM_MSRBIT_W(msr - 0xc0010000));
1764 } else {
1765 printf("%s: invalid msr 0x%x\n", __func__, msr);
1766 return;
1767 }
1768 }
1769
1770 /*
1771 * svm_setmsrbrw
1772 *
1773 * Allow read/write access to the specified msr on the supplied vcpu
1774 *
1775 * Parameters:
1776 * vcpu: the VCPU to allow access
1777 * msr: the MSR number to allow access to
1778 */
1779 void
svm_setmsrbrw(struct vcpu * vcpu,uint32_t msr)1780 svm_setmsrbrw(struct vcpu *vcpu, uint32_t msr)
1781 {
1782 svm_setmsrbr(vcpu, msr);
1783 svm_setmsrbw(vcpu, msr);
1784 }
1785
1786 /*
1787 * vmx_setmsrbr
1788 *
1789 * Allow read access to the specified msr on the supplied vcpu.
1790 *
1791 * Parameters:
1792 * vcpu: the VCPU to allow access
1793 * msr: the MSR number to allow access to
1794 */
1795 void
vmx_setmsrbr(struct vcpu * vcpu,uint32_t msr)1796 vmx_setmsrbr(struct vcpu *vcpu, uint32_t msr)
1797 {
1798 uint8_t *msrs;
1799 uint16_t idx;
1800
1801 msrs = (uint8_t *)vcpu->vc_msr_bitmap_va;
1802
1803 /*
1804 * MSR Read bitmap layout:
1805 * "Low" MSRs (0x0 - 0x1fff) @ 0x0
1806 * "High" MSRs (0xc0000000 - 0xc0001fff) @ 0x400
1807 */
1808 if (msr <= 0x1fff) {
1809 idx = VMX_MSRIDX(msr);
1810 msrs[idx] &= ~(VMX_MSRBIT(msr));
1811 } else if (msr >= 0xc0000000 && msr <= 0xc0001fff) {
1812 idx = VMX_MSRIDX(msr - 0xc0000000) + 0x400;
1813 msrs[idx] &= ~(VMX_MSRBIT(msr - 0xc0000000));
1814 } else
1815 printf("%s: invalid msr 0x%x\n", __func__, msr);
1816 }
1817
1818 /*
1819 * vmx_setmsrbw
1820 *
1821 * Allow write access to the specified msr on the supplied vcpu
1822 *
1823 * Parameters:
1824 * vcpu: the VCPU to allow access
1825 * msr: the MSR number to allow access to
1826 */
1827 void
vmx_setmsrbw(struct vcpu * vcpu,uint32_t msr)1828 vmx_setmsrbw(struct vcpu *vcpu, uint32_t msr)
1829 {
1830 uint8_t *msrs;
1831 uint16_t idx;
1832
1833 msrs = (uint8_t *)vcpu->vc_msr_bitmap_va;
1834
1835 /*
1836 * MSR Write bitmap layout:
1837 * "Low" MSRs (0x0 - 0x1fff) @ 0x800
1838 * "High" MSRs (0xc0000000 - 0xc0001fff) @ 0xc00
1839 */
1840 if (msr <= 0x1fff) {
1841 idx = VMX_MSRIDX(msr) + 0x800;
1842 msrs[idx] &= ~(VMX_MSRBIT(msr));
1843 } else if (msr >= 0xc0000000 && msr <= 0xc0001fff) {
1844 idx = VMX_MSRIDX(msr - 0xc0000000) + 0xc00;
1845 msrs[idx] &= ~(VMX_MSRBIT(msr - 0xc0000000));
1846 } else
1847 printf("%s: invalid msr 0x%x\n", __func__, msr);
1848 }
1849
1850 /*
1851 * vmx_setmsrbrw
1852 *
1853 * Allow read/write access to the specified msr on the supplied vcpu
1854 *
1855 * Parameters:
1856 * vcpu: the VCPU to allow access
1857 * msr: the MSR number to allow access to
1858 */
1859 void
vmx_setmsrbrw(struct vcpu * vcpu,uint32_t msr)1860 vmx_setmsrbrw(struct vcpu *vcpu, uint32_t msr)
1861 {
1862 vmx_setmsrbr(vcpu, msr);
1863 vmx_setmsrbw(vcpu, msr);
1864 }
1865
1866 /*
1867 * svm_set_clean
1868 *
1869 * Sets (mark as unmodified) the VMCB clean bit set in 'value'.
1870 * For example, to set the clean bit for the VMCB intercepts (bit position 0),
1871 * the caller provides 'SVM_CLEANBITS_I' (0x1) for the 'value' argument.
1872 * Multiple cleanbits can be provided in 'value' at the same time (eg,
1873 * "SVM_CLEANBITS_I | SVM_CLEANBITS_TPR").
1874 *
1875 * Note that this function does not clear any bits; to clear bits in the
1876 * vmcb cleanbits bitfield, use 'svm_set_dirty'.
1877 *
1878 * Parameters:
1879 * vmcs: the VCPU whose VMCB clean value should be set
1880 * value: the value(s) to enable in the cleanbits mask
1881 */
1882 void
svm_set_clean(struct vcpu * vcpu,uint32_t value)1883 svm_set_clean(struct vcpu *vcpu, uint32_t value)
1884 {
1885 struct vmcb *vmcb;
1886
1887 /* If no cleanbits support, do nothing */
1888 if (!curcpu()->ci_vmm_cap.vcc_svm.svm_vmcb_clean)
1889 return;
1890
1891 vmcb = (struct vmcb *)vcpu->vc_control_va;
1892
1893 vmcb->v_vmcb_clean_bits |= value;
1894 }
1895
1896 /*
1897 * svm_set_dirty
1898 *
1899 * Clears (mark as modified) the VMCB clean bit set in 'value'.
1900 * For example, to clear the bit for the VMCB intercepts (bit position 0)
1901 * the caller provides 'SVM_CLEANBITS_I' (0x1) for the 'value' argument.
1902 * Multiple dirty bits can be provided in 'value' at the same time (eg,
1903 * "SVM_CLEANBITS_I | SVM_CLEANBITS_TPR").
1904 *
1905 * Parameters:
1906 * vmcs: the VCPU whose VMCB dirty value should be set
1907 * value: the value(s) to dirty in the cleanbits mask
1908 */
1909 void
svm_set_dirty(struct vcpu * vcpu,uint32_t value)1910 svm_set_dirty(struct vcpu *vcpu, uint32_t value)
1911 {
1912 struct vmcb *vmcb;
1913
1914 /* If no cleanbits support, do nothing */
1915 if (!curcpu()->ci_vmm_cap.vcc_svm.svm_vmcb_clean)
1916 return;
1917
1918 vmcb = (struct vmcb *)vcpu->vc_control_va;
1919
1920 vmcb->v_vmcb_clean_bits &= ~value;
1921 }
1922
1923 /*
1924 * vcpu_reset_regs_vmx
1925 *
1926 * Initializes 'vcpu's registers to supplied state
1927 *
1928 * Parameters:
1929 * vcpu: the vcpu whose register state is to be initialized
1930 * vrs: the register state to set
1931 *
1932 * Return values:
1933 * 0: registers init'ed successfully
1934 * EINVAL: an error occurred setting register state
1935 */
1936 int
vcpu_reset_regs_vmx(struct vcpu * vcpu,struct vcpu_reg_state * vrs)1937 vcpu_reset_regs_vmx(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
1938 {
1939 int ret = 0, ug = 0;
1940 uint32_t cr0, cr4;
1941 uint32_t pinbased, procbased, procbased2, exit, entry;
1942 uint32_t want1, want0;
1943 uint64_t ctrlval, cr3, msr_misc_enable;
1944 uint16_t ctrl;
1945 struct vmx_msr_store *msr_store;
1946
1947 rw_assert_wrlock(&vcpu->vc_lock);
1948
1949 cr0 = vrs->vrs_crs[VCPU_REGS_CR0];
1950
1951 if (vcpu_reload_vmcs_vmx(vcpu)) {
1952 DPRINTF("%s: error reloading VMCS\n", __func__);
1953 ret = EINVAL;
1954 goto exit;
1955 }
1956
1957 #ifdef VMM_DEBUG
1958 /* VMCS should be loaded... */
1959 paddr_t pa = 0ULL;
1960 if (vmptrst(&pa))
1961 panic("%s: vmptrst", __func__);
1962 KASSERT(pa == vcpu->vc_control_pa);
1963 #endif /* VMM_DEBUG */
1964
1965 /* Compute Basic Entry / Exit Controls */
1966 vcpu->vc_vmx_basic = rdmsr(IA32_VMX_BASIC);
1967 vcpu->vc_vmx_entry_ctls = rdmsr(IA32_VMX_ENTRY_CTLS);
1968 vcpu->vc_vmx_exit_ctls = rdmsr(IA32_VMX_EXIT_CTLS);
1969 vcpu->vc_vmx_pinbased_ctls = rdmsr(IA32_VMX_PINBASED_CTLS);
1970 vcpu->vc_vmx_procbased_ctls = rdmsr(IA32_VMX_PROCBASED_CTLS);
1971
1972 /* Compute True Entry / Exit Controls (if applicable) */
1973 if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL) {
1974 vcpu->vc_vmx_true_entry_ctls = rdmsr(IA32_VMX_TRUE_ENTRY_CTLS);
1975 vcpu->vc_vmx_true_exit_ctls = rdmsr(IA32_VMX_TRUE_EXIT_CTLS);
1976 vcpu->vc_vmx_true_pinbased_ctls =
1977 rdmsr(IA32_VMX_TRUE_PINBASED_CTLS);
1978 vcpu->vc_vmx_true_procbased_ctls =
1979 rdmsr(IA32_VMX_TRUE_PROCBASED_CTLS);
1980 }
1981
1982 /* Compute Secondary Procbased Controls (if applicable) */
1983 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS,
1984 IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1))
1985 vcpu->vc_vmx_procbased2_ctls = rdmsr(IA32_VMX_PROCBASED2_CTLS);
1986
1987 /*
1988 * Pinbased ctrls
1989 *
1990 * We must be able to set the following:
1991 * IA32_VMX_EXTERNAL_INT_EXITING - exit on host interrupt
1992 * IA32_VMX_NMI_EXITING - exit on host NMI
1993 */
1994 want1 = IA32_VMX_EXTERNAL_INT_EXITING |
1995 IA32_VMX_NMI_EXITING;
1996 want0 = 0;
1997
1998 if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL) {
1999 ctrl = IA32_VMX_TRUE_PINBASED_CTLS;
2000 ctrlval = vcpu->vc_vmx_true_pinbased_ctls;
2001 } else {
2002 ctrl = IA32_VMX_PINBASED_CTLS;
2003 ctrlval = vcpu->vc_vmx_pinbased_ctls;
2004 }
2005
2006 if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &pinbased)) {
2007 DPRINTF("%s: error computing pinbased controls\n", __func__);
2008 ret = EINVAL;
2009 goto exit;
2010 }
2011
2012 if (vmwrite(VMCS_PINBASED_CTLS, pinbased)) {
2013 DPRINTF("%s: error setting pinbased controls\n", __func__);
2014 ret = EINVAL;
2015 goto exit;
2016 }
2017
2018 /*
2019 * Procbased ctrls
2020 *
2021 * We must be able to set the following:
2022 * IA32_VMX_HLT_EXITING - exit on HLT instruction
2023 * IA32_VMX_MWAIT_EXITING - exit on MWAIT instruction
2024 * IA32_VMX_UNCONDITIONAL_IO_EXITING - exit on I/O instructions
2025 * IA32_VMX_USE_MSR_BITMAPS - exit on various MSR accesses
2026 * IA32_VMX_CR8_LOAD_EXITING - guest TPR access
2027 * IA32_VMX_CR8_STORE_EXITING - guest TPR access
2028 * IA32_VMX_USE_TPR_SHADOW - guest TPR access (shadow)
2029 * IA32_VMX_MONITOR_EXITING - exit on MONITOR instruction
2030 *
2031 * If we have EPT, we must be able to clear the following
2032 * IA32_VMX_CR3_LOAD_EXITING - don't care about guest CR3 accesses
2033 * IA32_VMX_CR3_STORE_EXITING - don't care about guest CR3 accesses
2034 */
2035 want1 = IA32_VMX_HLT_EXITING |
2036 IA32_VMX_MWAIT_EXITING |
2037 IA32_VMX_UNCONDITIONAL_IO_EXITING |
2038 IA32_VMX_USE_MSR_BITMAPS |
2039 IA32_VMX_CR8_LOAD_EXITING |
2040 IA32_VMX_CR8_STORE_EXITING |
2041 IA32_VMX_MONITOR_EXITING |
2042 IA32_VMX_USE_TPR_SHADOW;
2043 want0 = 0;
2044
2045 want1 |= IA32_VMX_ACTIVATE_SECONDARY_CONTROLS;
2046 want0 |= IA32_VMX_CR3_LOAD_EXITING | IA32_VMX_CR3_STORE_EXITING;
2047
2048 if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL) {
2049 ctrl = IA32_VMX_TRUE_PROCBASED_CTLS;
2050 ctrlval = vcpu->vc_vmx_true_procbased_ctls;
2051 } else {
2052 ctrl = IA32_VMX_PROCBASED_CTLS;
2053 ctrlval = vcpu->vc_vmx_procbased_ctls;
2054 }
2055
2056 if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &procbased)) {
2057 DPRINTF("%s: error computing procbased controls\n", __func__);
2058 ret = EINVAL;
2059 goto exit;
2060 }
2061
2062 if (vmwrite(VMCS_PROCBASED_CTLS, procbased)) {
2063 DPRINTF("%s: error setting procbased controls\n", __func__);
2064 ret = EINVAL;
2065 goto exit;
2066 }
2067
2068 /*
2069 * Secondary Procbased ctrls
2070 *
2071 * We want to be able to set the following, if available:
2072 * IA32_VMX_ENABLE_VPID - use VPIDs where available
2073 *
2074 * If we have EPT, we must be able to set the following:
2075 * IA32_VMX_ENABLE_EPT - enable EPT
2076 *
2077 * If we have unrestricted guest capability, we must be able to set
2078 * the following:
2079 * IA32_VMX_UNRESTRICTED_GUEST - enable unrestricted guest (if caller
2080 * specified CR0_PG | CR0_PE in %cr0 in the 'vrs' parameter)
2081 */
2082 want1 = IA32_VMX_ENABLE_EPT;
2083
2084 /* XXX checking for 2ndary controls can be combined here */
2085 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS,
2086 IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1)) {
2087 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
2088 IA32_VMX_ENABLE_VPID, 1)) {
2089 want1 |= IA32_VMX_ENABLE_VPID;
2090 vcpu->vc_vmx_vpid_enabled = 1;
2091 }
2092 }
2093
2094 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS,
2095 IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1)) {
2096 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
2097 IA32_VMX_UNRESTRICTED_GUEST, 1)) {
2098 if ((cr0 & (CR0_PE | CR0_PG)) == 0) {
2099 want1 |= IA32_VMX_UNRESTRICTED_GUEST;
2100 ug = 1;
2101 }
2102 }
2103 }
2104
2105 want0 = ~want1;
2106 ctrlval = vcpu->vc_vmx_procbased2_ctls;
2107 ctrl = IA32_VMX_PROCBASED2_CTLS;
2108
2109 if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &procbased2)) {
2110 DPRINTF("%s: error computing secondary procbased controls\n",
2111 __func__);
2112 ret = EINVAL;
2113 goto exit;
2114 }
2115
2116 if (vmwrite(VMCS_PROCBASED2_CTLS, procbased2)) {
2117 DPRINTF("%s: error setting secondary procbased controls\n",
2118 __func__);
2119 ret = EINVAL;
2120 goto exit;
2121 }
2122
2123 /*
2124 * Exit ctrls
2125 *
2126 * We must be able to set the following:
2127 * IA32_VMX_SAVE_DEBUG_CONTROLS
2128 * IA32_VMX_HOST_SPACE_ADDRESS_SIZE - exit to long mode
2129 * IA32_VMX_ACKNOWLEDGE_INTERRUPT_ON_EXIT - ack interrupt on exit
2130 */
2131 want1 = IA32_VMX_HOST_SPACE_ADDRESS_SIZE |
2132 IA32_VMX_ACKNOWLEDGE_INTERRUPT_ON_EXIT |
2133 IA32_VMX_SAVE_DEBUG_CONTROLS;
2134 want0 = 0;
2135
2136 if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL) {
2137 ctrl = IA32_VMX_TRUE_EXIT_CTLS;
2138 ctrlval = vcpu->vc_vmx_true_exit_ctls;
2139 } else {
2140 ctrl = IA32_VMX_EXIT_CTLS;
2141 ctrlval = vcpu->vc_vmx_exit_ctls;
2142 }
2143
2144 if (rcr4() & CR4_CET)
2145 want1 |= IA32_VMX_LOAD_HOST_CET_STATE;
2146 else
2147 want0 |= IA32_VMX_LOAD_HOST_CET_STATE;
2148
2149 if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &exit)) {
2150 DPRINTF("%s: error computing exit controls\n", __func__);
2151 ret = EINVAL;
2152 goto exit;
2153 }
2154
2155 if (vmwrite(VMCS_EXIT_CTLS, exit)) {
2156 DPRINTF("%s: error setting exit controls\n", __func__);
2157 ret = EINVAL;
2158 goto exit;
2159 }
2160
2161 /*
2162 * Entry ctrls
2163 *
2164 * We must be able to set the following:
2165 * IA32_VMX_IA32E_MODE_GUEST (if no unrestricted guest)
2166 * IA32_VMX_LOAD_DEBUG_CONTROLS
2167 * We must be able to clear the following:
2168 * IA32_VMX_ENTRY_TO_SMM - enter to SMM
2169 * IA32_VMX_DEACTIVATE_DUAL_MONITOR_TREATMENT
2170 * IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY
2171 */
2172 want1 = IA32_VMX_LOAD_DEBUG_CONTROLS;
2173 if (vrs->vrs_msrs[VCPU_REGS_EFER] & EFER_LMA)
2174 want1 |= IA32_VMX_IA32E_MODE_GUEST;
2175
2176 want0 = IA32_VMX_ENTRY_TO_SMM |
2177 IA32_VMX_DEACTIVATE_DUAL_MONITOR_TREATMENT |
2178 IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY;
2179
2180 if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL) {
2181 ctrl = IA32_VMX_TRUE_ENTRY_CTLS;
2182 ctrlval = vcpu->vc_vmx_true_entry_ctls;
2183 } else {
2184 ctrl = IA32_VMX_ENTRY_CTLS;
2185 ctrlval = vcpu->vc_vmx_entry_ctls;
2186 }
2187
2188 if (rcr4() & CR4_CET)
2189 want1 |= IA32_VMX_LOAD_GUEST_CET_STATE;
2190 else
2191 want0 |= IA32_VMX_LOAD_GUEST_CET_STATE;
2192
2193 if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &entry)) {
2194 ret = EINVAL;
2195 goto exit;
2196 }
2197
2198 if (vmwrite(VMCS_ENTRY_CTLS, entry)) {
2199 ret = EINVAL;
2200 goto exit;
2201 }
2202
2203 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS,
2204 IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1)) {
2205 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
2206 IA32_VMX_ENABLE_VPID, 1)) {
2207 if (vmwrite(VMCS_GUEST_VPID, vcpu->vc_vpid)) {
2208 DPRINTF("%s: error setting guest VPID\n",
2209 __func__);
2210 ret = EINVAL;
2211 goto exit;
2212 }
2213 }
2214 }
2215
2216 /*
2217 * Determine which bits in CR0 have to be set to a fixed
2218 * value as per Intel SDM A.7.
2219 * CR0 bits in the vrs parameter must match these.
2220 */
2221 want1 = (curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0) &
2222 (curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1);
2223 want0 = ~(curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0) &
2224 ~(curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1);
2225
2226 /*
2227 * CR0_FIXED0 and CR0_FIXED1 may report the CR0_PG and CR0_PE bits as
2228 * fixed to 1 even if the CPU supports the unrestricted guest
2229 * feature. Update want1 and want0 accordingly to allow
2230 * any value for CR0_PG and CR0_PE in vrs->vrs_crs[VCPU_REGS_CR0] if
2231 * the CPU has the unrestricted guest capability.
2232 */
2233 if (ug) {
2234 want1 &= ~(CR0_PG | CR0_PE);
2235 want0 &= ~(CR0_PG | CR0_PE);
2236 }
2237
2238 /*
2239 * VMX may require some bits to be set that userland should not have
2240 * to care about. Set those here.
2241 */
2242 if (want1 & CR0_NE)
2243 cr0 |= CR0_NE;
2244
2245 if ((cr0 & want1) != want1) {
2246 ret = EINVAL;
2247 goto exit;
2248 }
2249
2250 if ((~cr0 & want0) != want0) {
2251 ret = EINVAL;
2252 goto exit;
2253 }
2254
2255 vcpu->vc_vmx_cr0_fixed1 = want1;
2256 vcpu->vc_vmx_cr0_fixed0 = want0;
2257 /*
2258 * Determine which bits in CR4 have to be set to a fixed
2259 * value as per Intel SDM A.8.
2260 * CR4 bits in the vrs parameter must match these, except
2261 * CR4_VMXE - we add that here since it must always be set.
2262 */
2263 want1 = (curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0) &
2264 (curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1);
2265 want0 = ~(curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0) &
2266 ~(curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1);
2267
2268 cr4 = vrs->vrs_crs[VCPU_REGS_CR4] | CR4_VMXE;
2269
2270 if ((cr4 & want1) != want1) {
2271 ret = EINVAL;
2272 goto exit;
2273 }
2274
2275 if ((~cr4 & want0) != want0) {
2276 ret = EINVAL;
2277 goto exit;
2278 }
2279
2280 cr3 = vrs->vrs_crs[VCPU_REGS_CR3];
2281
2282 /* Restore PDPTEs if 32-bit PAE paging is being used */
2283 if (cr3 && (cr4 & CR4_PAE) &&
2284 !(vrs->vrs_msrs[VCPU_REGS_EFER] & EFER_LMA)) {
2285 if (vmwrite(VMCS_GUEST_PDPTE0,
2286 vrs->vrs_crs[VCPU_REGS_PDPTE0])) {
2287 ret = EINVAL;
2288 goto exit;
2289 }
2290
2291 if (vmwrite(VMCS_GUEST_PDPTE1,
2292 vrs->vrs_crs[VCPU_REGS_PDPTE1])) {
2293 ret = EINVAL;
2294 goto exit;
2295 }
2296
2297 if (vmwrite(VMCS_GUEST_PDPTE2,
2298 vrs->vrs_crs[VCPU_REGS_PDPTE2])) {
2299 ret = EINVAL;
2300 goto exit;
2301 }
2302
2303 if (vmwrite(VMCS_GUEST_PDPTE3,
2304 vrs->vrs_crs[VCPU_REGS_PDPTE3])) {
2305 ret = EINVAL;
2306 goto exit;
2307 }
2308 }
2309
2310 vrs->vrs_crs[VCPU_REGS_CR0] = cr0;
2311 vrs->vrs_crs[VCPU_REGS_CR4] = cr4;
2312
2313 msr_misc_enable = rdmsr(MSR_MISC_ENABLE);
2314
2315 /*
2316 * Select host MSRs to be loaded on exit
2317 */
2318 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_load_va;
2319 msr_store[VCPU_HOST_REGS_EFER].vms_index = MSR_EFER;
2320 msr_store[VCPU_HOST_REGS_EFER].vms_data = rdmsr(MSR_EFER);
2321 msr_store[VCPU_HOST_REGS_STAR].vms_index = MSR_STAR;
2322 msr_store[VCPU_HOST_REGS_STAR].vms_data = rdmsr(MSR_STAR);
2323 msr_store[VCPU_HOST_REGS_LSTAR].vms_index = MSR_LSTAR;
2324 msr_store[VCPU_HOST_REGS_LSTAR].vms_data = rdmsr(MSR_LSTAR);
2325 msr_store[VCPU_HOST_REGS_CSTAR].vms_index = MSR_CSTAR;
2326 msr_store[VCPU_HOST_REGS_CSTAR].vms_data = 0;
2327 msr_store[VCPU_HOST_REGS_SFMASK].vms_index = MSR_SFMASK;
2328 msr_store[VCPU_HOST_REGS_SFMASK].vms_data = rdmsr(MSR_SFMASK);
2329 msr_store[VCPU_HOST_REGS_KGSBASE].vms_index = MSR_KERNELGSBASE;
2330 msr_store[VCPU_HOST_REGS_KGSBASE].vms_data = 0;
2331 msr_store[VCPU_HOST_REGS_MISC_ENABLE].vms_index = MSR_MISC_ENABLE;
2332 msr_store[VCPU_HOST_REGS_MISC_ENABLE].vms_data = msr_misc_enable;
2333
2334 /*
2335 * Select guest MSRs to be loaded on entry / saved on exit
2336 */
2337 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
2338
2339 msr_store[VCPU_REGS_EFER].vms_index = MSR_EFER;
2340 msr_store[VCPU_REGS_STAR].vms_index = MSR_STAR;
2341 msr_store[VCPU_REGS_LSTAR].vms_index = MSR_LSTAR;
2342 msr_store[VCPU_REGS_CSTAR].vms_index = MSR_CSTAR;
2343 msr_store[VCPU_REGS_SFMASK].vms_index = MSR_SFMASK;
2344 msr_store[VCPU_REGS_KGSBASE].vms_index = MSR_KERNELGSBASE;
2345 msr_store[VCPU_REGS_MISC_ENABLE].vms_index = MSR_MISC_ENABLE;
2346
2347 /*
2348 * Initialize MSR_MISC_ENABLE as it can't be read and populated from vmd
2349 * and some of the content is based on the host.
2350 */
2351 msr_store[VCPU_REGS_MISC_ENABLE].vms_data = msr_misc_enable;
2352 msr_store[VCPU_REGS_MISC_ENABLE].vms_data &=
2353 ~(MISC_ENABLE_TCC | MISC_ENABLE_PERF_MON_AVAILABLE |
2354 MISC_ENABLE_EIST_ENABLED | MISC_ENABLE_ENABLE_MONITOR_FSM |
2355 MISC_ENABLE_xTPR_MESSAGE_DISABLE);
2356 msr_store[VCPU_REGS_MISC_ENABLE].vms_data |=
2357 MISC_ENABLE_BTS_UNAVAILABLE | MISC_ENABLE_PEBS_UNAVAILABLE;
2358
2359 /*
2360 * Currently we use the same memory for guest MSRs (entry-load and
2361 * exit-store) so they have the same count. We exit-load the same
2362 * host MSRs, so same count but different memory. Those are just
2363 * our current choices, not architectural requirements.
2364 */
2365 if (vmwrite(VMCS_EXIT_MSR_STORE_COUNT, VCPU_REGS_NMSRS)) {
2366 DPRINTF("%s: error setting guest MSR exit store count\n",
2367 __func__);
2368 ret = EINVAL;
2369 goto exit;
2370 }
2371
2372 if (vmwrite(VMCS_EXIT_MSR_LOAD_COUNT, VCPU_HOST_REGS_NMSRS)) {
2373 DPRINTF("%s: error setting guest MSR exit load count\n",
2374 __func__);
2375 ret = EINVAL;
2376 goto exit;
2377 }
2378
2379 if (vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, VCPU_REGS_NMSRS)) {
2380 DPRINTF("%s: error setting guest MSR entry load count\n",
2381 __func__);
2382 ret = EINVAL;
2383 goto exit;
2384 }
2385
2386 if (vmwrite(VMCS_EXIT_STORE_MSR_ADDRESS,
2387 vcpu->vc_vmx_msr_exit_save_pa)) {
2388 DPRINTF("%s: error setting guest MSR exit store address\n",
2389 __func__);
2390 ret = EINVAL;
2391 goto exit;
2392 }
2393
2394 if (vmwrite(VMCS_EXIT_LOAD_MSR_ADDRESS,
2395 vcpu->vc_vmx_msr_exit_load_pa)) {
2396 DPRINTF("%s: error setting guest MSR exit load address\n",
2397 __func__);
2398 ret = EINVAL;
2399 goto exit;
2400 }
2401
2402 if (vmwrite(VMCS_ENTRY_LOAD_MSR_ADDRESS,
2403 vcpu->vc_vmx_msr_exit_save_pa)) {
2404 DPRINTF("%s: error setting guest MSR entry load address\n",
2405 __func__);
2406 ret = EINVAL;
2407 goto exit;
2408 }
2409
2410 if (vmwrite(VMCS_MSR_BITMAP_ADDRESS,
2411 vcpu->vc_msr_bitmap_pa)) {
2412 DPRINTF("%s: error setting guest MSR bitmap address\n",
2413 __func__);
2414 ret = EINVAL;
2415 goto exit;
2416 }
2417
2418 if (vmwrite(VMCS_CR4_MASK, CR4_VMXE)) {
2419 DPRINTF("%s: error setting guest CR4 mask\n", __func__);
2420 ret = EINVAL;
2421 goto exit;
2422 }
2423
2424 if (vmwrite(VMCS_CR0_MASK, CR0_NE)) {
2425 DPRINTF("%s: error setting guest CR0 mask\n", __func__);
2426 ret = EINVAL;
2427 goto exit;
2428 }
2429
2430 /*
2431 * Set up the VMCS for the register state we want during VCPU start.
2432 * This matches what the CPU state would be after a bootloader
2433 * transition to 'start'.
2434 */
2435 ret = vcpu_writeregs_vmx(vcpu, VM_RWREGS_ALL, 0, vrs);
2436
2437 /*
2438 * Set up the MSR bitmap
2439 */
2440 memset((uint8_t *)vcpu->vc_msr_bitmap_va, 0xFF, PAGE_SIZE);
2441 vmx_setmsrbrw(vcpu, MSR_IA32_FEATURE_CONTROL);
2442 vmx_setmsrbrw(vcpu, MSR_SYSENTER_CS);
2443 vmx_setmsrbrw(vcpu, MSR_SYSENTER_ESP);
2444 vmx_setmsrbrw(vcpu, MSR_SYSENTER_EIP);
2445 vmx_setmsrbrw(vcpu, MSR_EFER);
2446 vmx_setmsrbrw(vcpu, MSR_STAR);
2447 vmx_setmsrbrw(vcpu, MSR_LSTAR);
2448 vmx_setmsrbrw(vcpu, MSR_CSTAR);
2449 vmx_setmsrbrw(vcpu, MSR_SFMASK);
2450 vmx_setmsrbrw(vcpu, MSR_FSBASE);
2451 vmx_setmsrbrw(vcpu, MSR_GSBASE);
2452 vmx_setmsrbrw(vcpu, MSR_KERNELGSBASE);
2453
2454 vmx_setmsrbr(vcpu, MSR_MISC_ENABLE);
2455 vmx_setmsrbr(vcpu, MSR_TSC);
2456
2457 /* If host supports CET, pass through access to the guest. */
2458 if (rcr4() & CR4_CET)
2459 vmx_setmsrbrw(vcpu, MSR_S_CET);
2460
2461 /* XXX CR0 shadow */
2462 /* XXX CR4 shadow */
2463
2464 /* xcr0 power on default sets bit 0 (x87 state) */
2465 vcpu->vc_gueststate.vg_xcr0 = XFEATURE_X87 & xsave_mask;
2466
2467 /* XXX PAT shadow */
2468 vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT);
2469
2470 /* Flush the VMCS */
2471 if (vmclear(&vcpu->vc_control_pa)) {
2472 DPRINTF("%s: vmclear failed\n", __func__);
2473 ret = EINVAL;
2474 }
2475 atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_CLEARED);
2476
2477 exit:
2478 return (ret);
2479 }
2480
2481 /*
2482 * vcpu_init_vmx
2483 *
2484 * Intel VMX specific VCPU initialization routine.
2485 *
2486 * This function allocates various per-VCPU memory regions, sets up initial
2487 * VCPU VMCS controls, and sets initial register values.
2488 *
2489 * Parameters:
2490 * vcpu: the VCPU structure being initialized
2491 *
2492 * Return values:
2493 * 0: the VCPU was initialized successfully
2494 * ENOMEM: insufficient resources
2495 * EINVAL: an error occurred during VCPU initialization
2496 */
2497 int
vcpu_init_vmx(struct vcpu * vcpu)2498 vcpu_init_vmx(struct vcpu *vcpu)
2499 {
2500 struct vmcs *vmcs;
2501 uint64_t msr, eptp;
2502 uint32_t cr0, cr4;
2503 int ret = 0;
2504
2505 /* Allocate a VPID early to avoid km_alloc if we're out of VPIDs. */
2506 if (vmm_alloc_vpid(&vcpu->vc_vpid))
2507 return (ENOMEM);
2508
2509 /* Allocate VMCS VA */
2510 vcpu->vc_control_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, &kp_zero,
2511 &kd_waitok);
2512 vcpu->vc_vmx_vmcs_state = VMCS_CLEARED;
2513
2514 if (!vcpu->vc_control_va) {
2515 ret = ENOMEM;
2516 goto exit;
2517 }
2518
2519 /* Compute VMCS PA */
2520 if (!pmap_extract(pmap_kernel(), vcpu->vc_control_va,
2521 (paddr_t *)&vcpu->vc_control_pa)) {
2522 ret = ENOMEM;
2523 goto exit;
2524 }
2525
2526 /* Allocate MSR bitmap VA */
2527 vcpu->vc_msr_bitmap_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, &kp_zero,
2528 &kd_waitok);
2529
2530 if (!vcpu->vc_msr_bitmap_va) {
2531 ret = ENOMEM;
2532 goto exit;
2533 }
2534
2535 /* Compute MSR bitmap PA */
2536 if (!pmap_extract(pmap_kernel(), vcpu->vc_msr_bitmap_va,
2537 (paddr_t *)&vcpu->vc_msr_bitmap_pa)) {
2538 ret = ENOMEM;
2539 goto exit;
2540 }
2541
2542 /* Allocate MSR exit load area VA */
2543 vcpu->vc_vmx_msr_exit_load_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page,
2544 &kp_zero, &kd_waitok);
2545
2546 if (!vcpu->vc_vmx_msr_exit_load_va) {
2547 ret = ENOMEM;
2548 goto exit;
2549 }
2550
2551 /* Compute MSR exit load area PA */
2552 if (!pmap_extract(pmap_kernel(), vcpu->vc_vmx_msr_exit_load_va,
2553 &vcpu->vc_vmx_msr_exit_load_pa)) {
2554 ret = ENOMEM;
2555 goto exit;
2556 }
2557
2558 /* Allocate MSR exit save area VA */
2559 vcpu->vc_vmx_msr_exit_save_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page,
2560 &kp_zero, &kd_waitok);
2561
2562 if (!vcpu->vc_vmx_msr_exit_save_va) {
2563 ret = ENOMEM;
2564 goto exit;
2565 }
2566
2567 /* Compute MSR exit save area PA */
2568 if (!pmap_extract(pmap_kernel(), vcpu->vc_vmx_msr_exit_save_va,
2569 &vcpu->vc_vmx_msr_exit_save_pa)) {
2570 ret = ENOMEM;
2571 goto exit;
2572 }
2573
2574 #if 0 /* XXX currently use msr_exit_save for msr_entry_load too */
2575 /* Allocate MSR entry load area VA */
2576 vcpu->vc_vmx_msr_entry_load_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page,
2577 &kp_zero, &kd_waitok);
2578
2579 if (!vcpu->vc_vmx_msr_entry_load_va) {
2580 ret = ENOMEM;
2581 goto exit;
2582 }
2583
2584 /* Compute MSR entry load area PA */
2585 if (!pmap_extract(pmap_kernel(), vcpu->vc_vmx_msr_entry_load_va,
2586 &vcpu->vc_vmx_msr_entry_load_pa)) {
2587 ret = ENOMEM;
2588 goto exit;
2589 }
2590 #endif
2591
2592 vmcs = (struct vmcs *)vcpu->vc_control_va;
2593 vmcs->vmcs_revision = curcpu()->ci_vmm_cap.vcc_vmx.vmx_vmxon_revision;
2594
2595 /*
2596 * Load the VMCS onto this PCPU so we can write registers
2597 */
2598 if (vmptrld(&vcpu->vc_control_pa)) {
2599 ret = EINVAL;
2600 goto exit;
2601 }
2602
2603 /* Configure EPT Pointer */
2604 eptp = vcpu->vc_parent->vm_map->pmap->pm_pdirpa;
2605 msr = rdmsr(IA32_VMX_EPT_VPID_CAP);
2606 if (msr & IA32_EPT_VPID_CAP_PAGE_WALK_4) {
2607 /* Page walk length 4 supported */
2608 eptp |= ((IA32_EPT_PAGE_WALK_LENGTH - 1) << 3);
2609 } else {
2610 DPRINTF("EPT page walk length 4 not supported\n");
2611 ret = EINVAL;
2612 goto exit;
2613 }
2614 if (msr & IA32_EPT_VPID_CAP_WB) {
2615 /* WB cache type supported */
2616 eptp |= IA32_EPT_PAGING_CACHE_TYPE_WB;
2617 } else
2618 DPRINTF("%s: no WB cache type available, guest VM will run "
2619 "uncached\n", __func__);
2620
2621 DPRINTF("Guest EPTP = 0x%llx\n", eptp);
2622 if (vmwrite(VMCS_GUEST_IA32_EPTP, eptp)) {
2623 DPRINTF("%s: error setting guest EPTP\n", __func__);
2624 ret = EINVAL;
2625 goto exit;
2626 }
2627
2628 vcpu->vc_parent->vm_map->pmap->eptp = eptp;
2629
2630 /* Host CR0 */
2631 cr0 = rcr0() & ~CR0_TS;
2632 if (vmwrite(VMCS_HOST_IA32_CR0, cr0)) {
2633 DPRINTF("%s: error writing host CR0\n", __func__);
2634 ret = EINVAL;
2635 goto exit;
2636 }
2637
2638 /* Host CR4 */
2639 cr4 = rcr4();
2640 if (vmwrite(VMCS_HOST_IA32_CR4, cr4)) {
2641 DPRINTF("%s: error writing host CR4\n", __func__);
2642 ret = EINVAL;
2643 goto exit;
2644 }
2645
2646 /* Host Segment Selectors */
2647 if (vmwrite(VMCS_HOST_IA32_CS_SEL, GSEL(GCODE_SEL, SEL_KPL))) {
2648 DPRINTF("%s: error writing host CS selector\n", __func__);
2649 ret = EINVAL;
2650 goto exit;
2651 }
2652
2653 if (vmwrite(VMCS_HOST_IA32_DS_SEL, GSEL(GDATA_SEL, SEL_KPL))) {
2654 DPRINTF("%s: error writing host DS selector\n", __func__);
2655 ret = EINVAL;
2656 goto exit;
2657 }
2658
2659 if (vmwrite(VMCS_HOST_IA32_ES_SEL, GSEL(GDATA_SEL, SEL_KPL))) {
2660 DPRINTF("%s: error writing host ES selector\n", __func__);
2661 ret = EINVAL;
2662 goto exit;
2663 }
2664
2665 if (vmwrite(VMCS_HOST_IA32_FS_SEL, GSEL(GDATA_SEL, SEL_KPL))) {
2666 DPRINTF("%s: error writing host FS selector\n", __func__);
2667 ret = EINVAL;
2668 goto exit;
2669 }
2670
2671 if (vmwrite(VMCS_HOST_IA32_GS_SEL, GSEL(GDATA_SEL, SEL_KPL))) {
2672 DPRINTF("%s: error writing host GS selector\n", __func__);
2673 ret = EINVAL;
2674 goto exit;
2675 }
2676
2677 if (vmwrite(VMCS_HOST_IA32_SS_SEL, GSEL(GDATA_SEL, SEL_KPL))) {
2678 DPRINTF("%s: error writing host SS selector\n", __func__);
2679 ret = EINVAL;
2680 goto exit;
2681 }
2682
2683 if (vmwrite(VMCS_HOST_IA32_TR_SEL, GSYSSEL(GPROC0_SEL, SEL_KPL))) {
2684 DPRINTF("%s: error writing host TR selector\n", __func__);
2685 ret = EINVAL;
2686 goto exit;
2687 }
2688
2689 /* Host IDTR base */
2690 if (vmwrite(VMCS_HOST_IA32_IDTR_BASE, idt_vaddr)) {
2691 DPRINTF("%s: error writing host IDTR base\n", __func__);
2692 ret = EINVAL;
2693 goto exit;
2694 }
2695
2696 /* VMCS link */
2697 if (vmwrite(VMCS_LINK_POINTER, VMX_VMCS_PA_CLEAR)) {
2698 DPRINTF("%s: error writing VMCS link pointer\n", __func__);
2699 ret = EINVAL;
2700 goto exit;
2701 }
2702
2703 /* Flush the initial VMCS */
2704 if (vmclear(&vcpu->vc_control_pa)) {
2705 DPRINTF("%s: vmclear failed\n", __func__);
2706 ret = EINVAL;
2707 }
2708
2709 exit:
2710 if (ret)
2711 vcpu_deinit_vmx(vcpu);
2712
2713 return (ret);
2714 }
2715
2716 /*
2717 * vcpu_reset_regs
2718 *
2719 * Resets a vcpu's registers to the provided state
2720 *
2721 * Parameters:
2722 * vcpu: the vcpu whose registers shall be reset
2723 * vrs: the desired register state
2724 *
2725 * Return values:
2726 * 0: the vcpu's registers were successfully reset
2727 * !0: the vcpu's registers could not be reset (see arch-specific reset
2728 * function for various values that can be returned here)
2729 */
2730 int
vcpu_reset_regs(struct vcpu * vcpu,struct vcpu_reg_state * vrs)2731 vcpu_reset_regs(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
2732 {
2733 int ret;
2734
2735 if (vmm_softc->mode == VMM_MODE_EPT)
2736 ret = vcpu_reset_regs_vmx(vcpu, vrs);
2737 else if (vmm_softc->mode == VMM_MODE_RVI)
2738 ret = vcpu_reset_regs_svm(vcpu, vrs);
2739 else
2740 panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode);
2741
2742 return (ret);
2743 }
2744
2745 /*
2746 * vcpu_init_svm
2747 *
2748 * AMD SVM specific VCPU initialization routine.
2749 *
2750 * This function allocates various per-VCPU memory regions, sets up initial
2751 * VCPU VMCB controls, and sets initial register values.
2752 *
2753 * Parameters:
2754 * vcpu: the VCPU structure being initialized
2755 * vcp: parameters provided by vmd(8)
2756 *
2757 * Return values:
2758 * 0: the VCPU was initialized successfully
2759 * ENOMEM: insufficient resources
2760 * EINVAL: an error occurred during VCPU initialization
2761 */
2762 int
vcpu_init_svm(struct vcpu * vcpu,struct vm_create_params * vcp)2763 vcpu_init_svm(struct vcpu *vcpu, struct vm_create_params *vcp)
2764 {
2765 int ret = 0;
2766
2767 /* Allocate an ASID early to avoid km_alloc if we're out of ASIDs. */
2768 if (vmm_alloc_vpid(&vcpu->vc_vpid))
2769 return (ENOMEM);
2770
2771 /* Allocate VMCB VA */
2772 vcpu->vc_control_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, &kp_zero,
2773 &kd_waitok);
2774
2775 if (!vcpu->vc_control_va) {
2776 ret = ENOMEM;
2777 goto exit;
2778 }
2779
2780 /* Compute VMCB PA */
2781 if (!pmap_extract(pmap_kernel(), vcpu->vc_control_va,
2782 (paddr_t *)&vcpu->vc_control_pa)) {
2783 ret = ENOMEM;
2784 goto exit;
2785 }
2786
2787 DPRINTF("%s: VMCB va @ 0x%llx, pa @ 0x%llx\n", __func__,
2788 (uint64_t)vcpu->vc_control_va,
2789 (uint64_t)vcpu->vc_control_pa);
2790
2791
2792 /* Allocate MSR bitmap VA (2 pages) */
2793 vcpu->vc_msr_bitmap_va = (vaddr_t)km_alloc(2 * PAGE_SIZE, &kv_any,
2794 &vmm_kp_contig, &kd_waitok);
2795
2796 if (!vcpu->vc_msr_bitmap_va) {
2797 ret = ENOMEM;
2798 goto exit;
2799 }
2800
2801 /* Compute MSR bitmap PA */
2802 if (!pmap_extract(pmap_kernel(), vcpu->vc_msr_bitmap_va,
2803 (paddr_t *)&vcpu->vc_msr_bitmap_pa)) {
2804 ret = ENOMEM;
2805 goto exit;
2806 }
2807
2808 DPRINTF("%s: MSR bitmap va @ 0x%llx, pa @ 0x%llx\n", __func__,
2809 (uint64_t)vcpu->vc_msr_bitmap_va,
2810 (uint64_t)vcpu->vc_msr_bitmap_pa);
2811
2812 /* Allocate host state area VA */
2813 vcpu->vc_svm_hsa_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page,
2814 &kp_zero, &kd_waitok);
2815
2816 if (!vcpu->vc_svm_hsa_va) {
2817 ret = ENOMEM;
2818 goto exit;
2819 }
2820
2821 /* Compute host state area PA */
2822 if (!pmap_extract(pmap_kernel(), vcpu->vc_svm_hsa_va,
2823 &vcpu->vc_svm_hsa_pa)) {
2824 ret = ENOMEM;
2825 goto exit;
2826 }
2827
2828 DPRINTF("%s: HSA va @ 0x%llx, pa @ 0x%llx\n", __func__,
2829 (uint64_t)vcpu->vc_svm_hsa_va,
2830 (uint64_t)vcpu->vc_svm_hsa_pa);
2831
2832 /* Allocate IOIO area VA (3 pages) */
2833 vcpu->vc_svm_ioio_va = (vaddr_t)km_alloc(3 * PAGE_SIZE, &kv_any,
2834 &vmm_kp_contig, &kd_waitok);
2835
2836 if (!vcpu->vc_svm_ioio_va) {
2837 ret = ENOMEM;
2838 goto exit;
2839 }
2840
2841 /* Compute IOIO area PA */
2842 if (!pmap_extract(pmap_kernel(), vcpu->vc_svm_ioio_va,
2843 &vcpu->vc_svm_ioio_pa)) {
2844 ret = ENOMEM;
2845 goto exit;
2846 }
2847
2848 DPRINTF("%s: IOIO va @ 0x%llx, pa @ 0x%llx\n", __func__,
2849 (uint64_t)vcpu->vc_svm_ioio_va,
2850 (uint64_t)vcpu->vc_svm_ioio_pa);
2851
2852 /* Shall we enable SEV? */
2853 vcpu->vc_sev = vcp->vcp_sev;
2854
2855 /* Inform vmd(8) about ASID and C bit position. */
2856 vcp->vcp_poscbit = amd64_pos_cbit;
2857 vcp->vcp_asid[vcpu->vc_id] = vcpu->vc_vpid;
2858
2859 exit:
2860 if (ret)
2861 vcpu_deinit_svm(vcpu);
2862
2863 return (ret);
2864 }
2865
2866 /*
2867 * vcpu_init
2868 *
2869 * Calls the architecture-specific VCPU init routine
2870 */
2871 int
vcpu_init(struct vcpu * vcpu,struct vm_create_params * vcp)2872 vcpu_init(struct vcpu *vcpu, struct vm_create_params *vcp)
2873 {
2874 int ret = 0;
2875
2876 vcpu->vc_virt_mode = vmm_softc->mode;
2877 vcpu->vc_state = VCPU_STATE_STOPPED;
2878 vcpu->vc_vpid = 0;
2879 vcpu->vc_pvclock_system_gpa = 0;
2880 vcpu->vc_last_pcpu = NULL;
2881
2882 rw_init(&vcpu->vc_lock, "vcpu");
2883
2884 /* Shadow PAT MSR, starting with host's value. */
2885 vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT);
2886
2887 if (vmm_softc->mode == VMM_MODE_EPT)
2888 ret = vcpu_init_vmx(vcpu);
2889 else if (vmm_softc->mode == VMM_MODE_RVI)
2890 ret = vcpu_init_svm(vcpu, vcp);
2891 else
2892 panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode);
2893
2894 return (ret);
2895 }
2896
2897 /*
2898 * vcpu_deinit_vmx
2899 *
2900 * Deinitializes the vcpu described by 'vcpu'
2901 *
2902 * Parameters:
2903 * vcpu: the vcpu to be deinited
2904 */
2905 void
vcpu_deinit_vmx(struct vcpu * vcpu)2906 vcpu_deinit_vmx(struct vcpu *vcpu)
2907 {
2908 if (vcpu->vc_control_va) {
2909 km_free((void *)vcpu->vc_control_va, PAGE_SIZE,
2910 &kv_page, &kp_zero);
2911 vcpu->vc_control_va = 0;
2912 }
2913 if (vcpu->vc_vmx_msr_exit_save_va) {
2914 km_free((void *)vcpu->vc_vmx_msr_exit_save_va,
2915 PAGE_SIZE, &kv_page, &kp_zero);
2916 vcpu->vc_vmx_msr_exit_save_va = 0;
2917 }
2918 if (vcpu->vc_vmx_msr_exit_load_va) {
2919 km_free((void *)vcpu->vc_vmx_msr_exit_load_va,
2920 PAGE_SIZE, &kv_page, &kp_zero);
2921 vcpu->vc_vmx_msr_exit_load_va = 0;
2922 }
2923 #if 0
2924 if (vcpu->vc_vmx_msr_entry_load_va) {
2925 km_free((void *)vcpu->vc_vmx_msr_entry_load_va,
2926 PAGE_SIZE, &kv_page, &kp_zero);
2927 vcpu->vc_vmx_msr_entry_load_va = 0;
2928 }
2929 #endif
2930
2931 vmm_free_vpid(vcpu->vc_vpid);
2932 }
2933
2934 /*
2935 * vcpu_deinit_svm
2936 *
2937 * Deinitializes the vcpu described by 'vcpu'
2938 *
2939 * Parameters:
2940 * vcpu: the vcpu to be deinited
2941 */
2942 void
vcpu_deinit_svm(struct vcpu * vcpu)2943 vcpu_deinit_svm(struct vcpu *vcpu)
2944 {
2945 if (vcpu->vc_control_va) {
2946 km_free((void *)vcpu->vc_control_va, PAGE_SIZE, &kv_page,
2947 &kp_zero);
2948 vcpu->vc_control_va = 0;
2949 }
2950 if (vcpu->vc_msr_bitmap_va) {
2951 km_free((void *)vcpu->vc_msr_bitmap_va, 2 * PAGE_SIZE, &kv_any,
2952 &vmm_kp_contig);
2953 vcpu->vc_msr_bitmap_va = 0;
2954 }
2955 if (vcpu->vc_svm_hsa_va) {
2956 km_free((void *)vcpu->vc_svm_hsa_va, PAGE_SIZE, &kv_page,
2957 &kp_zero);
2958 vcpu->vc_svm_hsa_va = 0;
2959 }
2960 if (vcpu->vc_svm_ioio_va) {
2961 km_free((void *)vcpu->vc_svm_ioio_va, 3 * PAGE_SIZE, &kv_any,
2962 &vmm_kp_contig);
2963 vcpu->vc_svm_ioio_va = 0;
2964 }
2965
2966 vmm_free_vpid(vcpu->vc_vpid);
2967 }
2968
2969 /*
2970 * vcpu_deinit
2971 *
2972 * Calls the architecture-specific VCPU deinit routine
2973 *
2974 * Parameters:
2975 * vcpu: the vcpu to be deinited
2976 */
2977 void
vcpu_deinit(struct vcpu * vcpu)2978 vcpu_deinit(struct vcpu *vcpu)
2979 {
2980 if (vmm_softc->mode == VMM_MODE_EPT)
2981 vcpu_deinit_vmx(vcpu);
2982 else if (vmm_softc->mode == VMM_MODE_RVI)
2983 vcpu_deinit_svm(vcpu);
2984 else
2985 panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode);
2986 }
2987
2988 /*
2989 * vcpu_vmx_check_cap
2990 *
2991 * Checks if the 'cap' bit in the 'msr' MSR can be set or cleared (set = 1
2992 * or set = 0, respectively).
2993 *
2994 * When considering 'msr', we check to see if true controls are available,
2995 * and use those if so.
2996 *
2997 * Returns 1 of 'cap' can be set/cleared as requested, 0 otherwise.
2998 */
2999 int
vcpu_vmx_check_cap(struct vcpu * vcpu,uint32_t msr,uint32_t cap,int set)3000 vcpu_vmx_check_cap(struct vcpu *vcpu, uint32_t msr, uint32_t cap, int set)
3001 {
3002 uint64_t ctl;
3003
3004 if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL) {
3005 switch (msr) {
3006 case IA32_VMX_PINBASED_CTLS:
3007 ctl = vcpu->vc_vmx_true_pinbased_ctls;
3008 break;
3009 case IA32_VMX_PROCBASED_CTLS:
3010 ctl = vcpu->vc_vmx_true_procbased_ctls;
3011 break;
3012 case IA32_VMX_PROCBASED2_CTLS:
3013 ctl = vcpu->vc_vmx_procbased2_ctls;
3014 break;
3015 case IA32_VMX_ENTRY_CTLS:
3016 ctl = vcpu->vc_vmx_true_entry_ctls;
3017 break;
3018 case IA32_VMX_EXIT_CTLS:
3019 ctl = vcpu->vc_vmx_true_exit_ctls;
3020 break;
3021 default:
3022 return (0);
3023 }
3024 } else {
3025 switch (msr) {
3026 case IA32_VMX_PINBASED_CTLS:
3027 ctl = vcpu->vc_vmx_pinbased_ctls;
3028 break;
3029 case IA32_VMX_PROCBASED_CTLS:
3030 ctl = vcpu->vc_vmx_procbased_ctls;
3031 break;
3032 case IA32_VMX_PROCBASED2_CTLS:
3033 ctl = vcpu->vc_vmx_procbased2_ctls;
3034 break;
3035 case IA32_VMX_ENTRY_CTLS:
3036 ctl = vcpu->vc_vmx_entry_ctls;
3037 break;
3038 case IA32_VMX_EXIT_CTLS:
3039 ctl = vcpu->vc_vmx_exit_ctls;
3040 break;
3041 default:
3042 return (0);
3043 }
3044 }
3045
3046 if (set) {
3047 /* Check bit 'cap << 32', must be !0 */
3048 return (ctl & ((uint64_t)cap << 32)) != 0;
3049 } else {
3050 /* Check bit 'cap', must be 0 */
3051 return (ctl & cap) == 0;
3052 }
3053 }
3054
3055 /*
3056 * vcpu_vmx_compute_ctrl
3057 *
3058 * Computes the appropriate control value, given the supplied parameters
3059 * and CPU capabilities.
3060 *
3061 * Intel has made somewhat of a mess of this computation - it is described
3062 * using no fewer than three different approaches, spread across many
3063 * pages of the SDM. Further compounding the problem is the fact that now
3064 * we have "true controls" for each type of "control", and each needs to
3065 * be examined to get the calculation right, but only if "true" controls
3066 * are present on the CPU we're on.
3067 *
3068 * Parameters:
3069 * ctrlval: the control value, as read from the CPU MSR
3070 * ctrl: which control is being set (eg, pinbased, procbased, etc)
3071 * want0: the set of desired 0 bits
3072 * want1: the set of desired 1 bits
3073 * out: (out) the correct value to write into the VMCS for this VCPU,
3074 * for the 'ctrl' desired.
3075 *
3076 * Returns 0 if successful, or EINVAL if the supplied parameters define
3077 * an unworkable control setup.
3078 */
3079 int
vcpu_vmx_compute_ctrl(uint64_t ctrlval,uint16_t ctrl,uint32_t want1,uint32_t want0,uint32_t * out)3080 vcpu_vmx_compute_ctrl(uint64_t ctrlval, uint16_t ctrl, uint32_t want1,
3081 uint32_t want0, uint32_t *out)
3082 {
3083 int i, set, clear;
3084
3085 *out = 0;
3086
3087 /*
3088 * The Intel SDM gives three formulae for determining which bits to
3089 * set/clear for a given control and desired functionality. Formula
3090 * 1 is the simplest but disallows use of newer features that are
3091 * enabled by functionality in later CPUs.
3092 *
3093 * Formulas 2 and 3 allow such extra functionality. We use formula
3094 * 2 - this requires us to know the identity of controls in the
3095 * "default1" class for each control register, but allows us to not
3096 * have to pass along and/or query both sets of capability MSRs for
3097 * each control lookup. This makes the code slightly longer,
3098 * however.
3099 */
3100 for (i = 0; i < 32; i++) {
3101 /* Figure out if we can set and / or clear this bit */
3102 set = (ctrlval & (1ULL << (i + 32))) != 0;
3103 clear = ((1ULL << i) & ((uint64_t)ctrlval)) == 0;
3104
3105 /* If the bit can't be set nor cleared, something's wrong */
3106 if (!set && !clear)
3107 return (EINVAL);
3108
3109 /*
3110 * Formula 2.c.i - "If the relevant VMX capability MSR
3111 * reports that a control has a single setting, use that
3112 * setting."
3113 */
3114 if (set && !clear) {
3115 if (want0 & (1ULL << i))
3116 return (EINVAL);
3117 else
3118 *out |= (1ULL << i);
3119 } else if (clear && !set) {
3120 if (want1 & (1ULL << i))
3121 return (EINVAL);
3122 else
3123 *out &= ~(1ULL << i);
3124 } else {
3125 /*
3126 * 2.c.ii - "If the relevant VMX capability MSR
3127 * reports that a control can be set to 0 or 1
3128 * and that control's meaning is known to the VMM,
3129 * set the control based on the functionality desired."
3130 */
3131 if (want1 & (1ULL << i))
3132 *out |= (1ULL << i);
3133 else if (want0 & (1 << i))
3134 *out &= ~(1ULL << i);
3135 else {
3136 /*
3137 * ... assuming the control's meaning is not
3138 * known to the VMM ...
3139 *
3140 * 2.c.iii - "If the relevant VMX capability
3141 * MSR reports that a control can be set to 0
3142 * or 1 and the control is not in the default1
3143 * class, set the control to 0."
3144 *
3145 * 2.c.iv - "If the relevant VMX capability
3146 * MSR reports that a control can be set to 0
3147 * or 1 and the control is in the default1
3148 * class, set the control to 1."
3149 */
3150 switch (ctrl) {
3151 case IA32_VMX_PINBASED_CTLS:
3152 case IA32_VMX_TRUE_PINBASED_CTLS:
3153 /*
3154 * A.3.1 - default1 class of pinbased
3155 * controls comprises bits 1,2,4
3156 */
3157 switch (i) {
3158 case 1:
3159 case 2:
3160 case 4:
3161 *out |= (1ULL << i);
3162 break;
3163 default:
3164 *out &= ~(1ULL << i);
3165 break;
3166 }
3167 break;
3168 case IA32_VMX_PROCBASED_CTLS:
3169 case IA32_VMX_TRUE_PROCBASED_CTLS:
3170 /*
3171 * A.3.2 - default1 class of procbased
3172 * controls comprises bits 1, 4-6, 8,
3173 * 13-16, 26
3174 */
3175 switch (i) {
3176 case 1:
3177 case 4 ... 6:
3178 case 8:
3179 case 13 ... 16:
3180 case 26:
3181 *out |= (1ULL << i);
3182 break;
3183 default:
3184 *out &= ~(1ULL << i);
3185 break;
3186 }
3187 break;
3188 /*
3189 * Unknown secondary procbased controls
3190 * can always be set to 0
3191 */
3192 case IA32_VMX_PROCBASED2_CTLS:
3193 *out &= ~(1ULL << i);
3194 break;
3195 case IA32_VMX_EXIT_CTLS:
3196 case IA32_VMX_TRUE_EXIT_CTLS:
3197 /*
3198 * A.4 - default1 class of exit
3199 * controls comprises bits 0-8, 10,
3200 * 11, 13, 14, 16, 17
3201 */
3202 switch (i) {
3203 case 0 ... 8:
3204 case 10 ... 11:
3205 case 13 ... 14:
3206 case 16 ... 17:
3207 *out |= (1ULL << i);
3208 break;
3209 default:
3210 *out &= ~(1ULL << i);
3211 break;
3212 }
3213 break;
3214 case IA32_VMX_ENTRY_CTLS:
3215 case IA32_VMX_TRUE_ENTRY_CTLS:
3216 /*
3217 * A.5 - default1 class of entry
3218 * controls comprises bits 0-8, 12
3219 */
3220 switch (i) {
3221 case 0 ... 8:
3222 case 12:
3223 *out |= (1ULL << i);
3224 break;
3225 default:
3226 *out &= ~(1ULL << i);
3227 break;
3228 }
3229 break;
3230 }
3231 }
3232 }
3233 }
3234
3235 return (0);
3236 }
3237
3238 /*
3239 * vm_run
3240 *
3241 * Run the vm / vcpu specified by 'vrp'
3242 *
3243 * Parameters:
3244 * vrp: structure defining the VM to run
3245 *
3246 * Return value:
3247 * ENOENT: the VM defined in 'vrp' could not be located
3248 * EBUSY: the VM defined in 'vrp' is already running
3249 * EFAULT: error copying data from userspace (vmd) on return from previous
3250 * exit.
3251 * EAGAIN: help is needed from vmd(8) (device I/O or exit vmm(4) cannot
3252 * handle in-kernel.)
3253 * 0: the run loop exited and no help is needed from vmd(8)
3254 */
3255 int
vm_run(struct vm_run_params * vrp)3256 vm_run(struct vm_run_params *vrp)
3257 {
3258 struct vm *vm;
3259 struct vcpu *vcpu;
3260 int ret = 0;
3261 u_int old, next;
3262
3263 /*
3264 * Find desired VM
3265 */
3266 ret = vm_find(vrp->vrp_vm_id, &vm);
3267 if (ret)
3268 return (ret);
3269
3270 vcpu = vm_find_vcpu(vm, vrp->vrp_vcpu_id);
3271 if (vcpu == NULL) {
3272 ret = ENOENT;
3273 goto out;
3274 }
3275
3276 /*
3277 * Attempt to transition from VCPU_STATE_STOPPED -> VCPU_STATE_RUNNING.
3278 * Failure to make the transition indicates the VCPU is busy.
3279 */
3280 rw_enter_write(&vcpu->vc_lock);
3281 old = VCPU_STATE_STOPPED;
3282 next = VCPU_STATE_RUNNING;
3283 if (atomic_cas_uint(&vcpu->vc_state, old, next) != old) {
3284 ret = EBUSY;
3285 goto out_unlock;
3286 }
3287
3288 /*
3289 * We may be returning from userland helping us from the last
3290 * exit. Copy in the exit data from vmd. The exit data will be
3291 * consumed before the next entry (this typically comprises
3292 * VCPU register changes as the result of vmd(8)'s actions).
3293 */
3294 ret = copyin(vrp->vrp_exit, &vcpu->vc_exit, sizeof(struct vm_exit));
3295 if (ret)
3296 goto out_unlock;
3297
3298 vcpu->vc_inject.vie_type = vrp->vrp_inject.vie_type;
3299 vcpu->vc_inject.vie_vector = vrp->vrp_inject.vie_vector;
3300 vcpu->vc_inject.vie_errorcode = vrp->vrp_inject.vie_errorcode;
3301
3302 WRITE_ONCE(vcpu->vc_curcpu, curcpu());
3303 /* Run the VCPU specified in vrp */
3304 if (vcpu->vc_virt_mode == VMM_MODE_EPT) {
3305 ret = vcpu_run_vmx(vcpu, vrp);
3306 } else if (vcpu->vc_virt_mode == VMM_MODE_RVI) {
3307 ret = vcpu_run_svm(vcpu, vrp);
3308 }
3309 WRITE_ONCE(vcpu->vc_curcpu, NULL);
3310
3311 if (ret == 0 || ret == EAGAIN) {
3312 /* If we are exiting, populate exit data so vmd can help. */
3313 vrp->vrp_exit_reason = (ret == 0) ? VM_EXIT_NONE
3314 : vcpu->vc_gueststate.vg_exit_reason;
3315 vrp->vrp_irqready = vcpu->vc_irqready;
3316 vcpu->vc_state = VCPU_STATE_STOPPED;
3317
3318 if (copyout(&vcpu->vc_exit, vrp->vrp_exit,
3319 sizeof(struct vm_exit)) == EFAULT) {
3320 ret = EFAULT;
3321 } else
3322 ret = 0;
3323 } else {
3324 vrp->vrp_exit_reason = VM_EXIT_TERMINATED;
3325 vcpu->vc_state = VCPU_STATE_TERMINATED;
3326 }
3327 out_unlock:
3328 rw_exit_write(&vcpu->vc_lock);
3329 out:
3330 refcnt_rele_wake(&vm->vm_refcnt);
3331 return (ret);
3332 }
3333
3334 /*
3335 * vmm_fpurestore
3336 *
3337 * Restore the guest's FPU state, saving the existing userland thread's
3338 * FPU context if necessary. Must be called with interrupts disabled.
3339 */
3340 int
vmm_fpurestore(struct vcpu * vcpu)3341 vmm_fpurestore(struct vcpu *vcpu)
3342 {
3343 struct cpu_info *ci = curcpu();
3344
3345 rw_assert_wrlock(&vcpu->vc_lock);
3346
3347 /* save vmm's FPU state if we haven't already */
3348 if (ci->ci_pflags & CPUPF_USERXSTATE) {
3349 ci->ci_pflags &= ~CPUPF_USERXSTATE;
3350 fpusavereset(&curproc->p_addr->u_pcb.pcb_savefpu);
3351 }
3352
3353 if (vcpu->vc_fpuinited)
3354 xrstor_kern(&vcpu->vc_g_fpu, xsave_mask);
3355
3356 if (xsave_mask) {
3357 /* Restore guest %xcr0 */
3358 if (xsetbv_user(0, vcpu->vc_gueststate.vg_xcr0)) {
3359 DPRINTF("%s: guest attempted to set invalid bits in "
3360 "xcr0 (guest %%xcr0=0x%llx, host %%xcr0=0x%llx)\n",
3361 __func__, vcpu->vc_gueststate.vg_xcr0, xsave_mask);
3362 return EINVAL;
3363 }
3364 }
3365
3366 return 0;
3367 }
3368
3369 /*
3370 * vmm_fpusave
3371 *
3372 * Save the guest's FPU state. Must be called with interrupts disabled.
3373 */
3374 void
vmm_fpusave(struct vcpu * vcpu)3375 vmm_fpusave(struct vcpu *vcpu)
3376 {
3377 rw_assert_wrlock(&vcpu->vc_lock);
3378
3379 if (xsave_mask) {
3380 /* Save guest %xcr0 */
3381 vcpu->vc_gueststate.vg_xcr0 = xgetbv(0);
3382
3383 /* Restore host %xcr0 */
3384 xsetbv(0, xsave_mask & XFEATURE_XCR0_MASK);
3385 }
3386
3387 /*
3388 * Save full copy of FPU state - guest content is always
3389 * a subset of host's save area (see xsetbv exit handler)
3390 */
3391 fpusavereset(&vcpu->vc_g_fpu);
3392 vcpu->vc_fpuinited = 1;
3393 }
3394
3395 /*
3396 * vmm_translate_gva
3397 *
3398 * Translates a guest virtual address to a guest physical address by walking
3399 * the currently active page table (if needed).
3400 *
3401 * Note - this function can possibly alter the supplied VCPU state.
3402 * Specifically, it may inject exceptions depending on the current VCPU
3403 * configuration, and may alter %cr2 on #PF. Consequently, this function
3404 * should only be used as part of instruction emulation.
3405 *
3406 * Parameters:
3407 * vcpu: The VCPU this translation should be performed for (guest MMU settings
3408 * are gathered from this VCPU)
3409 * va: virtual address to translate
3410 * pa: pointer to paddr_t variable that will receive the translated physical
3411 * address. 'pa' is unchanged on error.
3412 * mode: one of PROT_READ, PROT_WRITE, PROT_EXEC indicating the mode in which
3413 * the address should be translated
3414 *
3415 * Return values:
3416 * 0: the address was successfully translated - 'pa' contains the physical
3417 * address currently mapped by 'va'.
3418 * EFAULT: the PTE for 'VA' is unmapped. A #PF will be injected in this case
3419 * and %cr2 set in the vcpu structure.
3420 * EINVAL: an error occurred reading paging table structures
3421 */
3422 int
vmm_translate_gva(struct vcpu * vcpu,uint64_t va,uint64_t * pa,int mode)3423 vmm_translate_gva(struct vcpu *vcpu, uint64_t va, uint64_t *pa, int mode)
3424 {
3425 int level, shift, pdidx;
3426 uint64_t pte, pt_paddr, pte_paddr, mask, low_mask, high_mask;
3427 uint64_t shift_width, pte_size, *hva;
3428 paddr_t hpa;
3429 struct vcpu_reg_state vrs;
3430
3431 level = 0;
3432
3433 if (vmm_softc->mode == VMM_MODE_EPT) {
3434 if (vcpu_readregs_vmx(vcpu, VM_RWREGS_ALL, 1, &vrs))
3435 return (EINVAL);
3436 } else if (vmm_softc->mode == VMM_MODE_RVI) {
3437 if (vcpu_readregs_svm(vcpu, VM_RWREGS_ALL, &vrs))
3438 return (EINVAL);
3439 } else {
3440 printf("%s: unknown vmm mode", __func__);
3441 return (EINVAL);
3442 }
3443
3444 DPRINTF("%s: guest %%cr0=0x%llx, %%cr3=0x%llx\n", __func__,
3445 vrs.vrs_crs[VCPU_REGS_CR0], vrs.vrs_crs[VCPU_REGS_CR3]);
3446
3447 if (!(vrs.vrs_crs[VCPU_REGS_CR0] & CR0_PG)) {
3448 DPRINTF("%s: unpaged, va=pa=0x%llx\n", __func__,
3449 va);
3450 *pa = va;
3451 return (0);
3452 }
3453
3454 pt_paddr = vrs.vrs_crs[VCPU_REGS_CR3];
3455
3456 if (vrs.vrs_crs[VCPU_REGS_CR0] & CR0_PE) {
3457 if (vrs.vrs_crs[VCPU_REGS_CR4] & CR4_PAE) {
3458 pte_size = sizeof(uint64_t);
3459 shift_width = 9;
3460
3461 if (vrs.vrs_msrs[VCPU_REGS_EFER] & EFER_LMA) {
3462 level = 4;
3463 mask = L4_MASK;
3464 shift = L4_SHIFT;
3465 } else {
3466 level = 3;
3467 mask = L3_MASK;
3468 shift = L3_SHIFT;
3469 }
3470 } else {
3471 level = 2;
3472 shift_width = 10;
3473 mask = 0xFFC00000;
3474 shift = 22;
3475 pte_size = sizeof(uint32_t);
3476 }
3477 } else {
3478 return (EINVAL);
3479 }
3480
3481 DPRINTF("%s: pte size=%lld level=%d mask=0x%llx, shift=%d, "
3482 "shift_width=%lld\n", __func__, pte_size, level, mask, shift,
3483 shift_width);
3484
3485 /* XXX: Check for R bit in segment selector and set A bit */
3486
3487 for (;level > 0; level--) {
3488 pdidx = (va & mask) >> shift;
3489 pte_paddr = (pt_paddr) + (pdidx * pte_size);
3490
3491 DPRINTF("%s: read pte level %d @ GPA 0x%llx\n", __func__,
3492 level, pte_paddr);
3493 if (!pmap_extract(vcpu->vc_parent->vm_map->pmap, pte_paddr,
3494 &hpa)) {
3495 DPRINTF("%s: cannot extract HPA for GPA 0x%llx\n",
3496 __func__, pte_paddr);
3497 return (EINVAL);
3498 }
3499
3500 hpa = hpa | (pte_paddr & 0xFFF);
3501 hva = (uint64_t *)PMAP_DIRECT_MAP(hpa);
3502 DPRINTF("%s: GPA 0x%llx -> HPA 0x%llx -> HVA 0x%llx\n",
3503 __func__, pte_paddr, (uint64_t)hpa, (uint64_t)hva);
3504 if (pte_size == 8)
3505 pte = *hva;
3506 else
3507 pte = *(uint32_t *)hva;
3508
3509 DPRINTF("%s: PTE @ 0x%llx = 0x%llx\n", __func__, pte_paddr,
3510 pte);
3511
3512 /* XXX: Set CR2 */
3513 if (!(pte & PG_V))
3514 return (EFAULT);
3515
3516 /* XXX: Check for SMAP */
3517 if ((mode == PROT_WRITE) && !(pte & PG_RW))
3518 return (EPERM);
3519
3520 if ((vcpu->vc_exit.cpl > 0) && !(pte & PG_u))
3521 return (EPERM);
3522
3523 pte = pte | PG_U;
3524 if (mode == PROT_WRITE)
3525 pte = pte | PG_M;
3526 *hva = pte;
3527
3528 /* XXX: EINVAL if in 32bit and PG_PS is 1 but CR4.PSE is 0 */
3529 if (pte & PG_PS)
3530 break;
3531
3532 if (level > 1) {
3533 pt_paddr = pte & PG_FRAME;
3534 shift -= shift_width;
3535 mask = mask >> shift_width;
3536 }
3537 }
3538
3539 low_mask = ((uint64_t)1ULL << shift) - 1;
3540 high_mask = (((uint64_t)1ULL << ((pte_size * 8) - 1)) - 1) ^ low_mask;
3541 *pa = (pte & high_mask) | (va & low_mask);
3542
3543 DPRINTF("%s: final GPA for GVA 0x%llx = 0x%llx\n", __func__,
3544 va, *pa);
3545
3546 return (0);
3547 }
3548
3549
3550 /*
3551 * vcpu_run_vmx
3552 *
3553 * VMX main loop used to run a VCPU.
3554 *
3555 * Parameters:
3556 * vcpu: The VCPU to run
3557 * vrp: run parameters
3558 *
3559 * Return values:
3560 * 0: The run loop exited and no help is needed from vmd
3561 * EAGAIN: The run loop exited and help from vmd is needed
3562 * EINVAL: an error occurred
3563 */
3564 int
vcpu_run_vmx(struct vcpu * vcpu,struct vm_run_params * vrp)3565 vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp)
3566 {
3567 int ret = 0, exitinfo;
3568 struct region_descriptor gdt;
3569 struct cpu_info *ci = NULL;
3570 uint64_t exit_reason, cr3, msr, insn_error;
3571 struct schedstate_percpu *spc;
3572 struct vmx_msr_store *msr_store;
3573 struct vmx_invvpid_descriptor vid;
3574 struct vmx_invept_descriptor vid_ept;
3575 uint64_t cr0, eii, procbased, int_st;
3576 u_long s;
3577
3578 rw_assert_wrlock(&vcpu->vc_lock);
3579
3580 if (vcpu_reload_vmcs_vmx(vcpu)) {
3581 printf("%s: failed (re)loading vmcs\n", __func__);
3582 return (EINVAL);
3583 }
3584
3585 /*
3586 * If we are returning from userspace (vmd) because we exited
3587 * last time, fix up any needed vcpu state first. Which state
3588 * needs to be fixed up depends on what vmd populated in the
3589 * exit data structure.
3590 */
3591 if (vrp->vrp_intr_pending)
3592 vcpu->vc_intr = 1;
3593 else
3594 vcpu->vc_intr = 0;
3595
3596 switch (vcpu->vc_gueststate.vg_exit_reason) {
3597 case VMX_EXIT_IO:
3598 if (vcpu->vc_exit.vei.vei_dir == VEI_DIR_IN)
3599 vcpu->vc_gueststate.vg_rax = vcpu->vc_exit.vei.vei_data;
3600 vcpu->vc_gueststate.vg_rip =
3601 vcpu->vc_exit.vrs.vrs_gprs[VCPU_REGS_RIP];
3602 if (vmwrite(VMCS_GUEST_IA32_RIP, vcpu->vc_gueststate.vg_rip)) {
3603 printf("%s: failed to update rip\n", __func__);
3604 return (EINVAL);
3605 }
3606 break;
3607 case VMX_EXIT_EPT_VIOLATION:
3608 ret = vcpu_writeregs_vmx(vcpu, VM_RWREGS_GPRS, 0,
3609 &vcpu->vc_exit.vrs);
3610 if (ret) {
3611 printf("%s: vm %d vcpu %d failed to update registers\n",
3612 __func__, vcpu->vc_parent->vm_id, vcpu->vc_id);
3613 return (EINVAL);
3614 }
3615 break;
3616 }
3617 memset(&vcpu->vc_exit, 0, sizeof(vcpu->vc_exit));
3618
3619 /* Handle vmd(8) injected interrupts */
3620 /* Is there an interrupt pending injection? */
3621 if (vcpu->vc_inject.vie_type == VCPU_INJECT_INTR) {
3622 if (vmread(VMCS_GUEST_INTERRUPTIBILITY_ST, &int_st)) {
3623 printf("%s: can't get interruptibility state\n",
3624 __func__);
3625 return (EINVAL);
3626 }
3627
3628 /* Interruptibility state 0x3 covers NMIs and STI */
3629 if (!(int_st & 0x3) && vcpu->vc_irqready) {
3630 eii = (uint64_t)vcpu->vc_inject.vie_vector;
3631 eii |= (1ULL << 31); /* Valid */
3632 if (vmwrite(VMCS_ENTRY_INTERRUPTION_INFO, eii)) {
3633 printf("vcpu_run_vmx: can't vector "
3634 "interrupt to guest\n");
3635 return (EINVAL);
3636 }
3637
3638 vcpu->vc_inject.vie_type = VCPU_INJECT_NONE;
3639 }
3640 } else if (!vcpu->vc_intr) {
3641 /*
3642 * Disable window exiting
3643 */
3644 if (vmread(VMCS_PROCBASED_CTLS, &procbased)) {
3645 printf("%s: can't read procbased ctls on exit\n",
3646 __func__);
3647 return (EINVAL);
3648 } else {
3649 procbased &= ~IA32_VMX_INTERRUPT_WINDOW_EXITING;
3650 if (vmwrite(VMCS_PROCBASED_CTLS, procbased)) {
3651 printf("%s: can't write procbased ctls "
3652 "on exit\n", __func__);
3653 return (EINVAL);
3654 }
3655 }
3656 }
3657
3658 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_load_va;
3659 while (ret == 0) {
3660 #ifdef VMM_DEBUG
3661 paddr_t pa = 0ULL;
3662 vmptrst(&pa);
3663 KASSERT(pa == vcpu->vc_control_pa);
3664 #endif /* VMM_DEBUG */
3665
3666 vmm_update_pvclock(vcpu);
3667
3668 if (ci != curcpu()) {
3669 ci = curcpu();
3670 vcpu->vc_last_pcpu = ci;
3671
3672 /* We're now using this vcpu's EPT pmap on this cpu. */
3673 atomic_swap_ptr(&ci->ci_ept_pmap,
3674 vcpu->vc_parent->vm_map->pmap);
3675
3676 /* Invalidate EPT cache. */
3677 vid_ept.vid_reserved = 0;
3678 vid_ept.vid_eptp = vcpu->vc_parent->vm_map->pmap->eptp;
3679 if (invept(ci->ci_vmm_cap.vcc_vmx.vmx_invept_mode,
3680 &vid_ept)) {
3681 printf("%s: invept\n", __func__);
3682 return (EINVAL);
3683 }
3684
3685 /* Host CR3 */
3686 cr3 = rcr3();
3687 if (vmwrite(VMCS_HOST_IA32_CR3, cr3)) {
3688 printf("%s: vmwrite(0x%04X, 0x%llx)\n", __func__,
3689 VMCS_HOST_IA32_CR3, cr3);
3690 return (EINVAL);
3691 }
3692
3693 setregion(&gdt, ci->ci_gdt, GDT_SIZE - 1);
3694 if (gdt.rd_base == 0) {
3695 printf("%s: setregion\n", __func__);
3696 return (EINVAL);
3697 }
3698
3699 /* Host GDTR base */
3700 if (vmwrite(VMCS_HOST_IA32_GDTR_BASE, gdt.rd_base)) {
3701 printf("%s: vmwrite(0x%04X, 0x%llx)\n",
3702 __func__, VMCS_HOST_IA32_GDTR_BASE,
3703 gdt.rd_base);
3704 return (EINVAL);
3705 }
3706
3707 /* Host TR base */
3708 if (vmwrite(VMCS_HOST_IA32_TR_BASE,
3709 (uint64_t)ci->ci_tss)) {
3710 printf("%s: vmwrite(0x%04X, 0x%llx)\n",
3711 __func__, VMCS_HOST_IA32_TR_BASE,
3712 (uint64_t)ci->ci_tss);
3713 return (EINVAL);
3714 }
3715
3716 /* Host GS.base (aka curcpu) */
3717 if (vmwrite(VMCS_HOST_IA32_GS_BASE, (uint64_t)ci)) {
3718 printf("%s: vmwrite(0x%04X, 0x%llx)\n",
3719 __func__, VMCS_HOST_IA32_GS_BASE,
3720 (uint64_t)ci);
3721 return (EINVAL);
3722 }
3723
3724 /* Host FS.base */
3725 msr = rdmsr(MSR_FSBASE);
3726 if (vmwrite(VMCS_HOST_IA32_FS_BASE, msr)) {
3727 printf("%s: vmwrite(0x%04X, 0x%llx)\n",
3728 __func__, VMCS_HOST_IA32_FS_BASE, msr);
3729 return (EINVAL);
3730 }
3731
3732 /* Host KernelGS.base (userspace GS.base here) */
3733 msr_store[VCPU_HOST_REGS_KGSBASE].vms_data =
3734 rdmsr(MSR_KERNELGSBASE);
3735 }
3736
3737 /* Inject event if present */
3738 if (vcpu->vc_inject.vie_type == VCPU_INJECT_EX) {
3739 eii = (uint64_t)vcpu->vc_inject.vie_vector;
3740 eii |= (1ULL << 31); /* Valid */
3741
3742 switch (vcpu->vc_inject.vie_vector) {
3743 case VMM_EX_BP:
3744 case VMM_EX_OF:
3745 /* Software Exceptions */
3746 eii |= (4ULL << 8);
3747 break;
3748 case VMM_EX_DF:
3749 case VMM_EX_TS:
3750 case VMM_EX_NP:
3751 case VMM_EX_SS:
3752 case VMM_EX_GP:
3753 case VMM_EX_PF:
3754 case VMM_EX_AC:
3755 /* Hardware Exceptions */
3756 eii |= (3ULL << 8);
3757 cr0 = 0;
3758 if (vmread(VMCS_GUEST_IA32_CR0, &cr0)) {
3759 printf("%s: vmread(VMCS_GUEST_IA32_CR0)"
3760 "\n", __func__);
3761 ret = EINVAL;
3762 break;
3763 }
3764
3765 /* Don't set error codes if in real mode. */
3766 if (ret == EINVAL || !(cr0 & CR0_PE))
3767 break;
3768 eii |= (1ULL << 11);
3769
3770 /* Enforce a 0 error code for #AC. */
3771 if (vcpu->vc_inject.vie_vector == VMM_EX_AC)
3772 vcpu->vc_inject.vie_errorcode = 0;
3773 /*
3774 * XXX: Intel SDM says if IA32_VMX_BASIC[56] is
3775 * set, error codes can be injected for hw
3776 * exceptions with or without error code,
3777 * regardless of vector. See Vol 3D. A1. Ignore
3778 * this capability for now.
3779 */
3780 if (vmwrite(VMCS_ENTRY_EXCEPTION_ERROR_CODE,
3781 vcpu->vc_inject.vie_errorcode)) {
3782 printf("%s: can't write error code to "
3783 "guest\n", __func__);
3784 ret = EINVAL;
3785 }
3786 } /* switch */
3787 if (ret == EINVAL)
3788 break;
3789
3790 if (vmwrite(VMCS_ENTRY_INTERRUPTION_INFO, eii)) {
3791 printf("%s: can't vector event to guest\n",
3792 __func__);
3793 ret = EINVAL;
3794 break;
3795 }
3796 vcpu->vc_inject.vie_type = VCPU_INJECT_NONE;
3797 }
3798
3799 if (vcpu->vc_vmx_vpid_enabled) {
3800 /* Invalidate old TLB mappings */
3801 vid.vid_vpid = vcpu->vc_vpid;
3802 vid.vid_addr = 0;
3803 invvpid(IA32_VMX_INVVPID_SINGLE_CTX_GLB, &vid);
3804 }
3805
3806 /* Start / resume the VCPU */
3807
3808 /* Disable interrupts and save the current host FPU state. */
3809 s = intr_disable();
3810 if ((ret = vmm_fpurestore(vcpu))) {
3811 intr_restore(s);
3812 break;
3813 }
3814
3815 TRACEPOINT(vmm, guest_enter, vcpu, vrp);
3816
3817 /*
3818 * If we're resuming to a different VCPU and have IBPB,
3819 * then use it to prevent cross-VM branch-target injection.
3820 */
3821 if (ci->ci_guest_vcpu != vcpu &&
3822 (ci->ci_feature_sefflags_edx & SEFF0EDX_IBRS)) {
3823 wrmsr(MSR_PRED_CMD, PRED_CMD_IBPB);
3824 ci->ci_guest_vcpu = vcpu;
3825 }
3826
3827 /* Restore any guest PKRU state. */
3828 if (vmm_softc->sc_md.pkru_enabled)
3829 wrpkru(0, vcpu->vc_pkru);
3830
3831 ret = vmx_enter_guest(&vcpu->vc_control_pa,
3832 &vcpu->vc_gueststate,
3833 (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED),
3834 ci->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr);
3835
3836 /* Restore host PKRU state. */
3837 if (vmm_softc->sc_md.pkru_enabled) {
3838 vcpu->vc_pkru = rdpkru(0);
3839 wrpkru(0, PGK_VALUE);
3840 }
3841
3842 /*
3843 * VM exit restores the GDT and IDT bases, but gives
3844 * them high limits. Reload with the correct limits here.
3845 * 'gdt' is set above first time through and reset there
3846 * whenever this thread switches CPU.
3847 */
3848 bare_lgdt(&gdt);
3849 cpu_init_idt();
3850
3851 /*
3852 * On exit, interrupts are disabled, and we are running with
3853 * the guest FPU state still possibly on the CPU. Save the FPU
3854 * state before re-enabling interrupts.
3855 */
3856 vmm_fpusave(vcpu);
3857 intr_restore(s);
3858
3859 atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_LAUNCHED);
3860 exit_reason = VM_EXIT_NONE;
3861
3862 /* If we exited successfully ... */
3863 if (ret == 0) {
3864 exitinfo = vmx_get_exit_info(
3865 &vcpu->vc_gueststate.vg_rip, &exit_reason);
3866 if (!(exitinfo & VMX_EXIT_INFO_HAVE_RIP)) {
3867 printf("%s: cannot read guest rip\n", __func__);
3868 ret = EINVAL;
3869 break;
3870 }
3871 if (!(exitinfo & VMX_EXIT_INFO_HAVE_REASON)) {
3872 printf("%s: can't read exit reason\n",
3873 __func__);
3874 ret = EINVAL;
3875 break;
3876 }
3877 vcpu->vc_gueststate.vg_exit_reason = exit_reason;
3878 TRACEPOINT(vmm, guest_exit, vcpu, vrp, exit_reason);
3879
3880 /* Update our state */
3881 if (vmread(VMCS_GUEST_IA32_RFLAGS,
3882 &vcpu->vc_gueststate.vg_rflags)) {
3883 printf("%s: can't read guest rflags during "
3884 "exit\n", __func__);
3885 ret = EINVAL;
3886 break;
3887 }
3888
3889 /*
3890 * Handle the exit. This will alter "ret" to EAGAIN if
3891 * the exit handler determines help from vmd is needed.
3892 */
3893 ret = vmx_handle_exit(vcpu);
3894
3895 if (vcpu->vc_gueststate.vg_rflags & PSL_I)
3896 vcpu->vc_irqready = 1;
3897 else
3898 vcpu->vc_irqready = 0;
3899
3900 /*
3901 * If not ready for interrupts, but interrupts pending,
3902 * enable interrupt window exiting.
3903 */
3904 if (vcpu->vc_irqready == 0 && vcpu->vc_intr) {
3905 if (vmread(VMCS_PROCBASED_CTLS, &procbased)) {
3906 printf("%s: can't read procbased ctls "
3907 "on intwin exit\n", __func__);
3908 ret = EINVAL;
3909 break;
3910 }
3911
3912 procbased |= IA32_VMX_INTERRUPT_WINDOW_EXITING;
3913 if (vmwrite(VMCS_PROCBASED_CTLS, procbased)) {
3914 printf("%s: can't write procbased ctls "
3915 "on intwin exit\n", __func__);
3916 ret = EINVAL;
3917 break;
3918 }
3919 }
3920
3921 /*
3922 * Exit to vmd if we are terminating, failed to enter,
3923 * or need help (device I/O)
3924 */
3925 if (ret || vcpu_must_stop(vcpu))
3926 break;
3927
3928 if (vcpu->vc_intr && vcpu->vc_irqready) {
3929 ret = EAGAIN;
3930 break;
3931 }
3932
3933 /* Check if we should yield - don't hog the {p,v}pu */
3934 spc = &ci->ci_schedstate;
3935 if (spc->spc_schedflags & SPCF_SHOULDYIELD)
3936 break;
3937
3938 } else {
3939 /*
3940 * We failed vmresume or vmlaunch for some reason,
3941 * typically due to invalid vmcs state or other
3942 * reasons documented in SDM Vol 3C 30.4.
3943 */
3944 switch (ret) {
3945 case VMX_FAIL_LAUNCH_INVALID_VMCS:
3946 printf("%s: failed %s with invalid vmcs\n",
3947 __func__,
3948 (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED
3949 ? "vmresume" : "vmlaunch"));
3950 break;
3951 case VMX_FAIL_LAUNCH_VALID_VMCS:
3952 printf("%s: failed %s with valid vmcs\n",
3953 __func__,
3954 (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED
3955 ? "vmresume" : "vmlaunch"));
3956 break;
3957 default:
3958 printf("%s: failed %s for unknown reason\n",
3959 __func__,
3960 (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED
3961 ? "vmresume" : "vmlaunch"));
3962 }
3963
3964 ret = EINVAL;
3965
3966 /* Try to translate a vmfail error code, if possible. */
3967 if (vmread(VMCS_INSTRUCTION_ERROR, &insn_error)) {
3968 printf("%s: can't read insn error field\n",
3969 __func__);
3970 } else
3971 printf("%s: error code = %lld, %s\n", __func__,
3972 insn_error,
3973 vmx_instruction_error_decode(insn_error));
3974 #ifdef VMM_DEBUG
3975 vmx_vcpu_dump_regs(vcpu);
3976 dump_vcpu(vcpu);
3977 #endif /* VMM_DEBUG */
3978 }
3979 }
3980
3981 vcpu->vc_last_pcpu = curcpu();
3982
3983 /* Copy the VCPU register state to the exit structure */
3984 if (vcpu_readregs_vmx(vcpu, VM_RWREGS_ALL, 0, &vcpu->vc_exit.vrs))
3985 ret = EINVAL;
3986 vcpu->vc_exit.cpl = vmm_get_guest_cpu_cpl(vcpu);
3987
3988 return (ret);
3989 }
3990
3991 /*
3992 * vmx_handle_intr
3993 *
3994 * Handle host (external) interrupts. We read which interrupt fired by
3995 * extracting the vector from the VMCS and dispatch the interrupt directly
3996 * to the host using vmm_dispatch_intr.
3997 */
3998 void
vmx_handle_intr(struct vcpu * vcpu)3999 vmx_handle_intr(struct vcpu *vcpu)
4000 {
4001 uint8_t vec;
4002 uint64_t eii;
4003 struct gate_descriptor *idte;
4004 vaddr_t handler;
4005
4006 if (vmread(VMCS_EXIT_INTERRUPTION_INFO, &eii)) {
4007 printf("%s: can't obtain intr info\n", __func__);
4008 return;
4009 }
4010
4011 vec = eii & 0xFF;
4012
4013 /* XXX check "error valid" code in eii, abort if 0 */
4014 idte=&idt[vec];
4015 handler = idte->gd_looffset + ((uint64_t)idte->gd_hioffset << 16);
4016 vmm_dispatch_intr(handler);
4017 }
4018
4019 /*
4020 * svm_handle_hlt
4021 *
4022 * Handle HLT exits
4023 *
4024 * Parameters
4025 * vcpu: The VCPU that executed the HLT instruction
4026 *
4027 * Return Values:
4028 * EIO: The guest halted with interrupts disabled
4029 * EAGAIN: Normal return to vmd - vmd should halt scheduling this VCPU
4030 * until a virtual interrupt is ready to inject
4031 */
4032 int
svm_handle_hlt(struct vcpu * vcpu)4033 svm_handle_hlt(struct vcpu *vcpu)
4034 {
4035 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
4036 uint64_t rflags = vmcb->v_rflags;
4037
4038 /* All HLT insns are 1 byte */
4039 vcpu->vc_gueststate.vg_rip += 1;
4040
4041 if (!(rflags & PSL_I)) {
4042 DPRINTF("%s: guest halted with interrupts disabled\n",
4043 __func__);
4044 return (EIO);
4045 }
4046
4047 return (EAGAIN);
4048 }
4049
4050 /*
4051 * vmx_handle_hlt
4052 *
4053 * Handle HLT exits. HLTing the CPU with interrupts disabled will terminate
4054 * the guest (no NMIs handled) by returning EIO to vmd.
4055 *
4056 * Parameters:
4057 * vcpu: The VCPU that executed the HLT instruction
4058 *
4059 * Return Values:
4060 * EINVAL: An error occurred extracting information from the VMCS, or an
4061 * invalid HLT instruction was encountered
4062 * EIO: The guest halted with interrupts disabled
4063 * EAGAIN: Normal return to vmd - vmd should halt scheduling this VCPU
4064 * until a virtual interrupt is ready to inject
4065 *
4066 */
4067 int
vmx_handle_hlt(struct vcpu * vcpu)4068 vmx_handle_hlt(struct vcpu *vcpu)
4069 {
4070 uint64_t insn_length, rflags;
4071
4072 if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) {
4073 printf("%s: can't obtain instruction length\n", __func__);
4074 return (EINVAL);
4075 }
4076
4077 if (vmread(VMCS_GUEST_IA32_RFLAGS, &rflags)) {
4078 printf("%s: can't obtain guest rflags\n", __func__);
4079 return (EINVAL);
4080 }
4081
4082 if (insn_length != 1) {
4083 DPRINTF("%s: HLT with instruction length %lld not supported\n",
4084 __func__, insn_length);
4085 return (EINVAL);
4086 }
4087
4088 if (!(rflags & PSL_I)) {
4089 DPRINTF("%s: guest halted with interrupts disabled\n",
4090 __func__);
4091 return (EIO);
4092 }
4093
4094 vcpu->vc_gueststate.vg_rip += insn_length;
4095 return (EAGAIN);
4096 }
4097
4098 /*
4099 * vmx_get_exit_info
4100 *
4101 * Returns exit information containing the current guest RIP and exit reason
4102 * in rip and exit_reason. The return value is a bitmask indicating whether
4103 * reading the RIP and exit reason was successful.
4104 */
4105 int
vmx_get_exit_info(uint64_t * rip,uint64_t * exit_reason)4106 vmx_get_exit_info(uint64_t *rip, uint64_t *exit_reason)
4107 {
4108 int rv = 0;
4109
4110 if (vmread(VMCS_GUEST_IA32_RIP, rip) == 0) {
4111 rv |= VMX_EXIT_INFO_HAVE_RIP;
4112 if (vmread(VMCS_EXIT_REASON, exit_reason) == 0)
4113 rv |= VMX_EXIT_INFO_HAVE_REASON;
4114 }
4115 return (rv);
4116 }
4117
4118 /*
4119 * svm_handle_exit
4120 *
4121 * Handle exits from the VM by decoding the exit reason and calling various
4122 * subhandlers as needed.
4123 */
4124 int
svm_handle_exit(struct vcpu * vcpu)4125 svm_handle_exit(struct vcpu *vcpu)
4126 {
4127 uint64_t exit_reason, rflags;
4128 int update_rip, ret = 0;
4129 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
4130
4131 update_rip = 0;
4132 exit_reason = vcpu->vc_gueststate.vg_exit_reason;
4133 rflags = vcpu->vc_gueststate.vg_rflags;
4134
4135 switch (exit_reason) {
4136 case SVM_VMEXIT_VINTR:
4137 if (!(rflags & PSL_I)) {
4138 DPRINTF("%s: impossible interrupt window exit "
4139 "config\n", __func__);
4140 ret = EINVAL;
4141 break;
4142 }
4143
4144 /*
4145 * Guest is now ready for interrupts, so disable interrupt
4146 * window exiting.
4147 */
4148 vmcb->v_irq = 0;
4149 vmcb->v_intr_vector = 0;
4150 vmcb->v_intercept1 &= ~SVM_INTERCEPT_VINTR;
4151 svm_set_dirty(vcpu, SVM_CLEANBITS_TPR | SVM_CLEANBITS_I);
4152
4153 update_rip = 0;
4154 break;
4155 case SVM_VMEXIT_INTR:
4156 update_rip = 0;
4157 break;
4158 case SVM_VMEXIT_SHUTDOWN:
4159 update_rip = 0;
4160 ret = EAGAIN;
4161 break;
4162 case SVM_VMEXIT_NPF:
4163 ret = svm_handle_np_fault(vcpu);
4164 break;
4165 case SVM_VMEXIT_CPUID:
4166 ret = vmm_handle_cpuid(vcpu);
4167 update_rip = 1;
4168 break;
4169 case SVM_VMEXIT_MSR:
4170 ret = svm_handle_msr(vcpu);
4171 update_rip = 1;
4172 break;
4173 case SVM_VMEXIT_XSETBV:
4174 ret = svm_handle_xsetbv(vcpu);
4175 update_rip = 1;
4176 break;
4177 case SVM_VMEXIT_IOIO:
4178 if (svm_handle_inout(vcpu) == 0)
4179 ret = EAGAIN;
4180 break;
4181 case SVM_VMEXIT_HLT:
4182 ret = svm_handle_hlt(vcpu);
4183 update_rip = 1;
4184 break;
4185 case SVM_VMEXIT_MWAIT:
4186 case SVM_VMEXIT_MWAIT_CONDITIONAL:
4187 case SVM_VMEXIT_MONITOR:
4188 case SVM_VMEXIT_VMRUN:
4189 case SVM_VMEXIT_VMMCALL:
4190 case SVM_VMEXIT_VMLOAD:
4191 case SVM_VMEXIT_VMSAVE:
4192 case SVM_VMEXIT_STGI:
4193 case SVM_VMEXIT_CLGI:
4194 case SVM_VMEXIT_SKINIT:
4195 case SVM_VMEXIT_RDTSCP:
4196 case SVM_VMEXIT_ICEBP:
4197 case SVM_VMEXIT_INVLPGA:
4198 ret = vmm_inject_ud(vcpu);
4199 update_rip = 0;
4200 break;
4201 default:
4202 DPRINTF("%s: unhandled exit 0x%llx (pa=0x%llx)\n", __func__,
4203 exit_reason, (uint64_t)vcpu->vc_control_pa);
4204 return (EINVAL);
4205 }
4206
4207 if (update_rip) {
4208 vmcb->v_rip = vcpu->vc_gueststate.vg_rip;
4209
4210 if (rflags & PSL_T) {
4211 if (vmm_inject_db(vcpu)) {
4212 printf("%s: can't inject #DB exception to "
4213 "guest", __func__);
4214 return (EINVAL);
4215 }
4216 }
4217 }
4218
4219 /* Enable SVME in EFER (must always be set) */
4220 vmcb->v_efer |= EFER_SVME;
4221 svm_set_dirty(vcpu, SVM_CLEANBITS_CR);
4222
4223 return (ret);
4224 }
4225
4226 /*
4227 * vmx_handle_exit
4228 *
4229 * Handle exits from the VM by decoding the exit reason and calling various
4230 * subhandlers as needed.
4231 */
4232 int
vmx_handle_exit(struct vcpu * vcpu)4233 vmx_handle_exit(struct vcpu *vcpu)
4234 {
4235 uint64_t exit_reason, rflags, istate;
4236 int update_rip, ret = 0;
4237
4238 update_rip = 0;
4239 exit_reason = vcpu->vc_gueststate.vg_exit_reason;
4240 rflags = vcpu->vc_gueststate.vg_rflags;
4241
4242 switch (exit_reason) {
4243 case VMX_EXIT_INT_WINDOW:
4244 if (!(rflags & PSL_I)) {
4245 DPRINTF("%s: impossible interrupt window exit "
4246 "config\n", __func__);
4247 ret = EINVAL;
4248 break;
4249 }
4250
4251 ret = EAGAIN;
4252 update_rip = 0;
4253 break;
4254 case VMX_EXIT_EPT_VIOLATION:
4255 ret = vmx_handle_np_fault(vcpu);
4256 break;
4257 case VMX_EXIT_CPUID:
4258 ret = vmm_handle_cpuid(vcpu);
4259 update_rip = 1;
4260 break;
4261 case VMX_EXIT_IO:
4262 if (vmx_handle_inout(vcpu) == 0)
4263 ret = EAGAIN;
4264 break;
4265 case VMX_EXIT_EXTINT:
4266 vmx_handle_intr(vcpu);
4267 update_rip = 0;
4268 break;
4269 case VMX_EXIT_CR_ACCESS:
4270 ret = vmx_handle_cr(vcpu);
4271 update_rip = 1;
4272 break;
4273 case VMX_EXIT_HLT:
4274 ret = vmx_handle_hlt(vcpu);
4275 update_rip = 1;
4276 break;
4277 case VMX_EXIT_RDMSR:
4278 ret = vmx_handle_rdmsr(vcpu);
4279 update_rip = 1;
4280 break;
4281 case VMX_EXIT_WRMSR:
4282 ret = vmx_handle_wrmsr(vcpu);
4283 update_rip = 1;
4284 break;
4285 case VMX_EXIT_XSETBV:
4286 ret = vmx_handle_xsetbv(vcpu);
4287 update_rip = 1;
4288 break;
4289 case VMX_EXIT_MWAIT:
4290 case VMX_EXIT_MONITOR:
4291 case VMX_EXIT_VMXON:
4292 case VMX_EXIT_VMWRITE:
4293 case VMX_EXIT_VMREAD:
4294 case VMX_EXIT_VMLAUNCH:
4295 case VMX_EXIT_VMRESUME:
4296 case VMX_EXIT_VMPTRLD:
4297 case VMX_EXIT_VMPTRST:
4298 case VMX_EXIT_VMCLEAR:
4299 case VMX_EXIT_VMCALL:
4300 case VMX_EXIT_VMFUNC:
4301 case VMX_EXIT_VMXOFF:
4302 case VMX_EXIT_INVVPID:
4303 case VMX_EXIT_INVEPT:
4304 ret = vmm_inject_ud(vcpu);
4305 update_rip = 0;
4306 break;
4307 case VMX_EXIT_TRIPLE_FAULT:
4308 #ifdef VMM_DEBUG
4309 DPRINTF("%s: vm %d vcpu %d triple fault\n", __func__,
4310 vcpu->vc_parent->vm_id, vcpu->vc_id);
4311 vmx_vcpu_dump_regs(vcpu);
4312 dump_vcpu(vcpu);
4313 vmx_dump_vmcs(vcpu);
4314 #endif /* VMM_DEBUG */
4315 ret = EAGAIN;
4316 update_rip = 0;
4317 break;
4318 default:
4319 #ifdef VMM_DEBUG
4320 DPRINTF("%s: unhandled exit 0x%llx (%s)\n", __func__,
4321 exit_reason, vmx_exit_reason_decode(exit_reason));
4322 #endif /* VMM_DEBUG */
4323 return (EINVAL);
4324 }
4325
4326 if (update_rip) {
4327 if (vmwrite(VMCS_GUEST_IA32_RIP,
4328 vcpu->vc_gueststate.vg_rip)) {
4329 printf("%s: can't advance rip\n", __func__);
4330 return (EINVAL);
4331 }
4332
4333 if (vmread(VMCS_GUEST_INTERRUPTIBILITY_ST,
4334 &istate)) {
4335 printf("%s: can't read interruptibility state\n",
4336 __func__);
4337 return (EINVAL);
4338 }
4339
4340 /* Interruptibility state 0x3 covers NMIs and STI */
4341 istate &= ~0x3;
4342
4343 if (vmwrite(VMCS_GUEST_INTERRUPTIBILITY_ST,
4344 istate)) {
4345 printf("%s: can't write interruptibility state\n",
4346 __func__);
4347 return (EINVAL);
4348 }
4349
4350 if (rflags & PSL_T) {
4351 if (vmm_inject_db(vcpu)) {
4352 printf("%s: can't inject #DB exception to "
4353 "guest", __func__);
4354 return (EINVAL);
4355 }
4356 }
4357 }
4358
4359 return (ret);
4360 }
4361
4362 /*
4363 * vmm_inject_gp
4364 *
4365 * Injects an #GP exception into the guest VCPU.
4366 *
4367 * Parameters:
4368 * vcpu: vcpu to inject into
4369 *
4370 * Return values:
4371 * Always 0
4372 */
4373 int
vmm_inject_gp(struct vcpu * vcpu)4374 vmm_inject_gp(struct vcpu *vcpu)
4375 {
4376 DPRINTF("%s: injecting #GP at guest %%rip 0x%llx\n", __func__,
4377 vcpu->vc_gueststate.vg_rip);
4378 vcpu->vc_inject.vie_vector = VMM_EX_GP;
4379 vcpu->vc_inject.vie_type = VCPU_INJECT_EX;
4380 vcpu->vc_inject.vie_errorcode = 0;
4381
4382 return (0);
4383 }
4384
4385 /*
4386 * vmm_inject_ud
4387 *
4388 * Injects an #UD exception into the guest VCPU.
4389 *
4390 * Parameters:
4391 * vcpu: vcpu to inject into
4392 *
4393 * Return values:
4394 * Always 0
4395 */
4396 int
vmm_inject_ud(struct vcpu * vcpu)4397 vmm_inject_ud(struct vcpu *vcpu)
4398 {
4399 DPRINTF("%s: injecting #UD at guest %%rip 0x%llx\n", __func__,
4400 vcpu->vc_gueststate.vg_rip);
4401 vcpu->vc_inject.vie_vector = VMM_EX_UD;
4402 vcpu->vc_inject.vie_type = VCPU_INJECT_EX;
4403 vcpu->vc_inject.vie_errorcode = 0;
4404
4405 return (0);
4406 }
4407
4408 /*
4409 * vmm_inject_db
4410 *
4411 * Injects a #DB exception into the guest VCPU.
4412 *
4413 * Parameters:
4414 * vcpu: vcpu to inject into
4415 *
4416 * Return values:
4417 * Always 0
4418 */
4419 int
vmm_inject_db(struct vcpu * vcpu)4420 vmm_inject_db(struct vcpu *vcpu)
4421 {
4422 DPRINTF("%s: injecting #DB at guest %%rip 0x%llx\n", __func__,
4423 vcpu->vc_gueststate.vg_rip);
4424 vcpu->vc_inject.vie_vector = VMM_EX_DB;
4425 vcpu->vc_inject.vie_type = VCPU_INJECT_EX;
4426 vcpu->vc_inject.vie_errorcode = 0;
4427
4428 return (0);
4429 }
4430
4431 /*
4432 * vmm_get_guest_memtype
4433 *
4434 * Returns the type of memory 'gpa' refers to in the context of vm 'vm'
4435 */
4436 int
vmm_get_guest_memtype(struct vm * vm,paddr_t gpa)4437 vmm_get_guest_memtype(struct vm *vm, paddr_t gpa)
4438 {
4439 int i;
4440 struct vm_mem_range *vmr;
4441
4442 /* XXX Use binary search? */
4443 for (i = 0; i < vm->vm_nmemranges; i++) {
4444 vmr = &vm->vm_memranges[i];
4445
4446 /*
4447 * vm_memranges are ascending. gpa can no longer be in one of
4448 * the memranges
4449 */
4450 if (gpa < vmr->vmr_gpa)
4451 break;
4452
4453 if (gpa < vmr->vmr_gpa + vmr->vmr_size) {
4454 if (vmr->vmr_type == VM_MEM_MMIO)
4455 return (VMM_MEM_TYPE_MMIO);
4456 return (VMM_MEM_TYPE_REGULAR);
4457 }
4458 }
4459
4460 DPRINTF("guest memtype @ 0x%llx unknown\n", (uint64_t)gpa);
4461 return (VMM_MEM_TYPE_UNKNOWN);
4462 }
4463
4464 /*
4465 * vmx_get_exit_qualification
4466 *
4467 * Return the current VMCS' exit qualification information
4468 */
4469 int
vmx_get_exit_qualification(uint64_t * exit_qualification)4470 vmx_get_exit_qualification(uint64_t *exit_qualification)
4471 {
4472 if (vmread(VMCS_GUEST_EXIT_QUALIFICATION, exit_qualification)) {
4473 printf("%s: can't extract exit qual\n", __func__);
4474 return (EINVAL);
4475 }
4476
4477 return (0);
4478 }
4479
4480 /*
4481 * vmx_get_guest_faulttype
4482 *
4483 * Determines the type (R/W/X) of the last fault on the VCPU last run on
4484 * this PCPU.
4485 */
4486 int
vmx_get_guest_faulttype(void)4487 vmx_get_guest_faulttype(void)
4488 {
4489 uint64_t exit_qual;
4490 uint64_t presentmask = IA32_VMX_EPT_FAULT_WAS_READABLE |
4491 IA32_VMX_EPT_FAULT_WAS_WRITABLE | IA32_VMX_EPT_FAULT_WAS_EXECABLE;
4492 vm_prot_t prot, was_prot;
4493
4494 if (vmx_get_exit_qualification(&exit_qual))
4495 return (-1);
4496
4497 if ((exit_qual & presentmask) == 0)
4498 return VM_FAULT_INVALID;
4499
4500 was_prot = 0;
4501 if (exit_qual & IA32_VMX_EPT_FAULT_WAS_READABLE)
4502 was_prot |= PROT_READ;
4503 if (exit_qual & IA32_VMX_EPT_FAULT_WAS_WRITABLE)
4504 was_prot |= PROT_WRITE;
4505 if (exit_qual & IA32_VMX_EPT_FAULT_WAS_EXECABLE)
4506 was_prot |= PROT_EXEC;
4507
4508 prot = 0;
4509 if (exit_qual & IA32_VMX_EPT_FAULT_READ)
4510 prot = PROT_READ;
4511 else if (exit_qual & IA32_VMX_EPT_FAULT_WRITE)
4512 prot = PROT_WRITE;
4513 else if (exit_qual & IA32_VMX_EPT_FAULT_EXEC)
4514 prot = PROT_EXEC;
4515
4516 if ((was_prot & prot) == 0)
4517 return VM_FAULT_PROTECT;
4518
4519 return (-1);
4520 }
4521
4522 /*
4523 * svm_get_guest_faulttype
4524 *
4525 * Determines the type (R/W/X) of the last fault on the VCPU last run on
4526 * this PCPU.
4527 */
4528 int
svm_get_guest_faulttype(struct vmcb * vmcb)4529 svm_get_guest_faulttype(struct vmcb *vmcb)
4530 {
4531 if (!(vmcb->v_exitinfo1 & 0x1))
4532 return VM_FAULT_INVALID;
4533 return VM_FAULT_PROTECT;
4534 }
4535
4536 /*
4537 * svm_fault_page
4538 *
4539 * Request a new page to be faulted into the UVM map of the VM owning 'vcpu'
4540 * at address 'gpa'.
4541 */
4542 int
svm_fault_page(struct vcpu * vcpu,paddr_t gpa)4543 svm_fault_page(struct vcpu *vcpu, paddr_t gpa)
4544 {
4545 int ret;
4546
4547 ret = uvm_fault(vcpu->vc_parent->vm_map, gpa, VM_FAULT_WIRE,
4548 PROT_READ | PROT_WRITE | PROT_EXEC);
4549 if (ret)
4550 printf("%s: uvm_fault returns %d, GPA=0x%llx, rip=0x%llx\n",
4551 __func__, ret, (uint64_t)gpa, vcpu->vc_gueststate.vg_rip);
4552
4553 return (ret);
4554 }
4555
4556 /*
4557 * svm_handle_np_fault
4558 *
4559 * High level nested paging handler for SVM. Verifies that a fault is for a
4560 * valid memory region, then faults a page, or aborts otherwise.
4561 */
4562 int
svm_handle_np_fault(struct vcpu * vcpu)4563 svm_handle_np_fault(struct vcpu *vcpu)
4564 {
4565 uint64_t gpa;
4566 int gpa_memtype, ret = 0;
4567 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
4568 struct vm_exit_eptviolation *vee = &vcpu->vc_exit.vee;
4569 struct cpu_info *ci = curcpu();
4570
4571 memset(vee, 0, sizeof(*vee));
4572
4573 gpa = vmcb->v_exitinfo2;
4574
4575 gpa_memtype = vmm_get_guest_memtype(vcpu->vc_parent, gpa);
4576 switch (gpa_memtype) {
4577 case VMM_MEM_TYPE_REGULAR:
4578 vee->vee_fault_type = VEE_FAULT_HANDLED;
4579 ret = svm_fault_page(vcpu, gpa);
4580 break;
4581 case VMM_MEM_TYPE_MMIO:
4582 vee->vee_fault_type = VEE_FAULT_MMIO_ASSIST;
4583 if (ci->ci_vmm_cap.vcc_svm.svm_decode_assist) {
4584 vee->vee_insn_len = vmcb->v_n_bytes_fetched;
4585 memcpy(&vee->vee_insn_bytes, vmcb->v_guest_ins_bytes,
4586 sizeof(vee->vee_insn_bytes));
4587 vee->vee_insn_info |= VEE_BYTES_VALID;
4588 }
4589 ret = EAGAIN;
4590 break;
4591 default:
4592 printf("%s: unknown memory type %d for GPA 0x%llx\n",
4593 __func__, gpa_memtype, gpa);
4594 return (EINVAL);
4595 }
4596
4597 return (ret);
4598 }
4599
4600 /*
4601 * vmx_fault_page
4602 *
4603 * Request a new page to be faulted into the UVM map of the VM owning 'vcpu'
4604 * at address 'gpa'.
4605 *
4606 * Parameters:
4607 * vcpu: guest VCPU requiring the page to be faulted into the UVM map
4608 * gpa: guest physical address that triggered the fault
4609 *
4610 * Return Values:
4611 * 0: if successful
4612 * EINVAL: if fault type could not be determined or VMCS reload fails
4613 * EAGAIN: if a protection fault occurred, ie writing to a read-only page
4614 * errno: if uvm_fault(9) fails to wire in the page
4615 */
4616 int
vmx_fault_page(struct vcpu * vcpu,paddr_t gpa)4617 vmx_fault_page(struct vcpu *vcpu, paddr_t gpa)
4618 {
4619 int fault_type, ret;
4620
4621 fault_type = vmx_get_guest_faulttype();
4622 switch (fault_type) {
4623 case -1:
4624 printf("%s: invalid fault type\n", __func__);
4625 return (EINVAL);
4626 case VM_FAULT_PROTECT:
4627 vcpu->vc_exit.vee.vee_fault_type = VEE_FAULT_PROTECT;
4628 return (EAGAIN);
4629 default:
4630 vcpu->vc_exit.vee.vee_fault_type = VEE_FAULT_HANDLED;
4631 break;
4632 }
4633
4634 /* We may sleep during uvm_fault(9), so reload VMCS. */
4635 vcpu->vc_last_pcpu = curcpu();
4636 ret = uvm_fault(vcpu->vc_parent->vm_map, gpa, VM_FAULT_WIRE,
4637 PROT_READ | PROT_WRITE | PROT_EXEC);
4638 if (vcpu_reload_vmcs_vmx(vcpu)) {
4639 printf("%s: failed to reload vmcs\n", __func__);
4640 return (EINVAL);
4641 }
4642
4643 if (ret)
4644 printf("%s: uvm_fault returns %d, GPA=0x%llx, rip=0x%llx\n",
4645 __func__, ret, (uint64_t)gpa, vcpu->vc_gueststate.vg_rip);
4646
4647 return (ret);
4648 }
4649
4650 /*
4651 * vmx_handle_np_fault
4652 *
4653 * High level nested paging handler for VMX. Verifies that a fault is for a
4654 * valid memory region, then faults a page, or aborts otherwise.
4655 */
4656 int
vmx_handle_np_fault(struct vcpu * vcpu)4657 vmx_handle_np_fault(struct vcpu *vcpu)
4658 {
4659 uint64_t insn_len = 0, gpa;
4660 int gpa_memtype, ret = 0;
4661 struct vm_exit_eptviolation *vee = &vcpu->vc_exit.vee;
4662
4663 memset(vee, 0, sizeof(*vee));
4664
4665 if (vmread(VMCS_GUEST_PHYSICAL_ADDRESS, &gpa)) {
4666 printf("%s: cannot extract faulting pa\n", __func__);
4667 return (EINVAL);
4668 }
4669
4670 gpa_memtype = vmm_get_guest_memtype(vcpu->vc_parent, gpa);
4671 switch (gpa_memtype) {
4672 case VMM_MEM_TYPE_REGULAR:
4673 vee->vee_fault_type = VEE_FAULT_HANDLED;
4674 ret = vmx_fault_page(vcpu, gpa);
4675 break;
4676 case VMM_MEM_TYPE_MMIO:
4677 vee->vee_fault_type = VEE_FAULT_MMIO_ASSIST;
4678 if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_len) ||
4679 insn_len == 0 || insn_len > 15) {
4680 printf("%s: failed to extract instruction length\n",
4681 __func__);
4682 ret = EINVAL;
4683 } else {
4684 vee->vee_insn_len = (uint32_t)insn_len;
4685 vee->vee_insn_info |= VEE_LEN_VALID;
4686 ret = EAGAIN;
4687 }
4688 break;
4689 default:
4690 printf("%s: unknown memory type %d for GPA 0x%llx\n",
4691 __func__, gpa_memtype, gpa);
4692 return (EINVAL);
4693 }
4694
4695 return (ret);
4696 }
4697
4698 /*
4699 * vmm_get_guest_cpu_cpl
4700 *
4701 * Determines current CPL of 'vcpu'. On VMX/Intel, this is gathered from the
4702 * VMCS field for the DPL of SS (this seems odd, but is documented that way
4703 * in the SDM). For SVM/AMD, this is gathered directly from the VMCB's 'cpl'
4704 * field, as per the APM.
4705 *
4706 * Parameters:
4707 * vcpu: guest VCPU for which CPL is to be checked
4708 *
4709 * Return Values:
4710 * -1: the CPL could not be determined
4711 * 0-3 indicating the current CPL. For real mode operation, 0 is returned.
4712 */
4713 int
vmm_get_guest_cpu_cpl(struct vcpu * vcpu)4714 vmm_get_guest_cpu_cpl(struct vcpu *vcpu)
4715 {
4716 int mode;
4717 struct vmcb *vmcb;
4718 uint64_t ss_ar;
4719
4720 mode = vmm_get_guest_cpu_mode(vcpu);
4721
4722 if (mode == VMM_CPU_MODE_UNKNOWN)
4723 return (-1);
4724
4725 if (mode == VMM_CPU_MODE_REAL)
4726 return (0);
4727
4728 if (vmm_softc->mode == VMM_MODE_RVI) {
4729 vmcb = (struct vmcb *)vcpu->vc_control_va;
4730 return (vmcb->v_cpl);
4731 } else if (vmm_softc->mode == VMM_MODE_EPT) {
4732 if (vmread(VMCS_GUEST_IA32_SS_AR, &ss_ar))
4733 return (-1);
4734 return ((ss_ar & 0x60) >> 5);
4735 } else
4736 return (-1);
4737 }
4738
4739 /*
4740 * vmm_get_guest_cpu_mode
4741 *
4742 * Determines current CPU mode of 'vcpu'.
4743 *
4744 * Parameters:
4745 * vcpu: guest VCPU for which mode is to be checked
4746 *
4747 * Return Values:
4748 * One of VMM_CPU_MODE_*, or VMM_CPU_MODE_UNKNOWN if the mode could not be
4749 * ascertained.
4750 */
4751 int
vmm_get_guest_cpu_mode(struct vcpu * vcpu)4752 vmm_get_guest_cpu_mode(struct vcpu *vcpu)
4753 {
4754 uint64_t cr0, efer, cs_ar;
4755 uint8_t l, dib;
4756 struct vmcb *vmcb;
4757 struct vmx_msr_store *msr_store;
4758
4759 if (vmm_softc->mode == VMM_MODE_RVI) {
4760 vmcb = (struct vmcb *)vcpu->vc_control_va;
4761 cr0 = vmcb->v_cr0;
4762 efer = vmcb->v_efer;
4763 cs_ar = vmcb->v_cs.vs_attr;
4764 cs_ar = (cs_ar & 0xff) | ((cs_ar << 4) & 0xf000);
4765 } else if (vmm_softc->mode == VMM_MODE_EPT) {
4766 if (vmread(VMCS_GUEST_IA32_CR0, &cr0))
4767 return (VMM_CPU_MODE_UNKNOWN);
4768 if (vmread(VMCS_GUEST_IA32_CS_AR, &cs_ar))
4769 return (VMM_CPU_MODE_UNKNOWN);
4770 msr_store =
4771 (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
4772 efer = msr_store[VCPU_REGS_EFER].vms_data;
4773 } else
4774 return (VMM_CPU_MODE_UNKNOWN);
4775
4776 l = (cs_ar & 0x2000) >> 13;
4777 dib = (cs_ar & 0x4000) >> 14;
4778
4779 /* Check CR0.PE */
4780 if (!(cr0 & CR0_PE))
4781 return (VMM_CPU_MODE_REAL);
4782
4783 /* Check EFER */
4784 if (efer & EFER_LMA) {
4785 /* Could be compat or long mode, check CS.L */
4786 if (l)
4787 return (VMM_CPU_MODE_LONG);
4788 else
4789 return (VMM_CPU_MODE_COMPAT);
4790 }
4791
4792 /* Check prot vs prot32 */
4793 if (dib)
4794 return (VMM_CPU_MODE_PROT32);
4795 else
4796 return (VMM_CPU_MODE_PROT);
4797 }
4798
4799 /*
4800 * svm_handle_inout
4801 *
4802 * Exit handler for IN/OUT instructions.
4803 *
4804 * Parameters:
4805 * vcpu: The VCPU where the IN/OUT instruction occurred
4806 *
4807 * Return values:
4808 * 0: if successful
4809 * EINVAL: an invalid IN/OUT instruction was encountered
4810 */
4811 int
svm_handle_inout(struct vcpu * vcpu)4812 svm_handle_inout(struct vcpu *vcpu)
4813 {
4814 uint64_t insn_length, exit_qual;
4815 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
4816
4817 insn_length = vmcb->v_exitinfo2 - vmcb->v_rip;
4818 exit_qual = vmcb->v_exitinfo1;
4819
4820 /* Bit 0 - direction */
4821 if (exit_qual & 0x1)
4822 vcpu->vc_exit.vei.vei_dir = VEI_DIR_IN;
4823 else
4824 vcpu->vc_exit.vei.vei_dir = VEI_DIR_OUT;
4825 /* Bit 2 - string instruction? */
4826 vcpu->vc_exit.vei.vei_string = (exit_qual & 0x4) >> 2;
4827 /* Bit 3 - REP prefix? */
4828 vcpu->vc_exit.vei.vei_rep = (exit_qual & 0x8) >> 3;
4829
4830 /* Bits 4:6 - size of exit */
4831 if (exit_qual & 0x10)
4832 vcpu->vc_exit.vei.vei_size = 1;
4833 else if (exit_qual & 0x20)
4834 vcpu->vc_exit.vei.vei_size = 2;
4835 else if (exit_qual & 0x40)
4836 vcpu->vc_exit.vei.vei_size = 4;
4837
4838 /* Bit 16:31 - port */
4839 vcpu->vc_exit.vei.vei_port = (exit_qual & 0xFFFF0000) >> 16;
4840 /* Data */
4841 vcpu->vc_exit.vei.vei_data = vmcb->v_rax;
4842
4843 vcpu->vc_exit.vei.vei_insn_len = (uint8_t)insn_length;
4844
4845 TRACEPOINT(vmm, inout, vcpu, vcpu->vc_exit.vei.vei_port,
4846 vcpu->vc_exit.vei.vei_dir, vcpu->vc_exit.vei.vei_data);
4847
4848 return (0);
4849 }
4850
4851 /*
4852 * vmx_handle_inout
4853 *
4854 * Exit handler for IN/OUT instructions.
4855 *
4856 * Parameters:
4857 * vcpu: The VCPU where the IN/OUT instruction occurred
4858 *
4859 * Return values:
4860 * 0: if successful
4861 * EINVAL: invalid IN/OUT instruction or vmread failures occurred
4862 */
4863 int
vmx_handle_inout(struct vcpu * vcpu)4864 vmx_handle_inout(struct vcpu *vcpu)
4865 {
4866 uint64_t insn_length, exit_qual;
4867
4868 if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) {
4869 printf("%s: can't obtain instruction length\n", __func__);
4870 return (EINVAL);
4871 }
4872
4873 if (vmx_get_exit_qualification(&exit_qual)) {
4874 printf("%s: can't get exit qual\n", __func__);
4875 return (EINVAL);
4876 }
4877
4878 /* Bits 0:2 - size of exit */
4879 vcpu->vc_exit.vei.vei_size = (exit_qual & 0x7) + 1;
4880 /* Bit 3 - direction */
4881 if ((exit_qual & 0x8) >> 3)
4882 vcpu->vc_exit.vei.vei_dir = VEI_DIR_IN;
4883 else
4884 vcpu->vc_exit.vei.vei_dir = VEI_DIR_OUT;
4885 /* Bit 4 - string instruction? */
4886 vcpu->vc_exit.vei.vei_string = (exit_qual & 0x10) >> 4;
4887 /* Bit 5 - REP prefix? */
4888 vcpu->vc_exit.vei.vei_rep = (exit_qual & 0x20) >> 5;
4889 /* Bit 6 - Operand encoding */
4890 vcpu->vc_exit.vei.vei_encoding = (exit_qual & 0x40) >> 6;
4891 /* Bit 16:31 - port */
4892 vcpu->vc_exit.vei.vei_port = (exit_qual & 0xFFFF0000) >> 16;
4893 /* Data */
4894 vcpu->vc_exit.vei.vei_data = (uint32_t)vcpu->vc_gueststate.vg_rax;
4895
4896 vcpu->vc_exit.vei.vei_insn_len = (uint8_t)insn_length;
4897
4898 TRACEPOINT(vmm, inout, vcpu, vcpu->vc_exit.vei.vei_port,
4899 vcpu->vc_exit.vei.vei_dir, vcpu->vc_exit.vei.vei_data);
4900
4901 return (0);
4902 }
4903
4904 /*
4905 * vmx_load_pdptes
4906 *
4907 * Update the PDPTEs in the VMCS with the values currently indicated by the
4908 * guest CR3. This is used for 32-bit PAE guests when enabling paging.
4909 *
4910 * Parameters
4911 * vcpu: The vcpu whose PDPTEs should be loaded
4912 *
4913 * Return values:
4914 * 0: if successful
4915 * EINVAL: if the PDPTEs could not be loaded
4916 * ENOMEM: memory allocation failure
4917 */
4918 int
vmx_load_pdptes(struct vcpu * vcpu)4919 vmx_load_pdptes(struct vcpu *vcpu)
4920 {
4921 uint64_t cr3, cr3_host_phys;
4922 vaddr_t cr3_host_virt;
4923 pd_entry_t *pdptes;
4924 int ret;
4925
4926 if (vmread(VMCS_GUEST_IA32_CR3, &cr3)) {
4927 printf("%s: can't read guest cr3\n", __func__);
4928 return (EINVAL);
4929 }
4930
4931 if (!pmap_extract(vcpu->vc_parent->vm_map->pmap, (vaddr_t)cr3,
4932 (paddr_t *)&cr3_host_phys)) {
4933 DPRINTF("%s: nonmapped guest CR3, setting PDPTEs to 0\n",
4934 __func__);
4935 if (vmwrite(VMCS_GUEST_PDPTE0, 0)) {
4936 printf("%s: can't write guest PDPTE0\n", __func__);
4937 return (EINVAL);
4938 }
4939
4940 if (vmwrite(VMCS_GUEST_PDPTE1, 0)) {
4941 printf("%s: can't write guest PDPTE1\n", __func__);
4942 return (EINVAL);
4943 }
4944
4945 if (vmwrite(VMCS_GUEST_PDPTE2, 0)) {
4946 printf("%s: can't write guest PDPTE2\n", __func__);
4947 return (EINVAL);
4948 }
4949
4950 if (vmwrite(VMCS_GUEST_PDPTE3, 0)) {
4951 printf("%s: can't write guest PDPTE3\n", __func__);
4952 return (EINVAL);
4953 }
4954 return (0);
4955 }
4956
4957 ret = 0;
4958
4959 /* We may sleep during km_alloc(9), so reload VMCS. */
4960 vcpu->vc_last_pcpu = curcpu();
4961 cr3_host_virt = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any, &kp_none,
4962 &kd_waitok);
4963 if (vcpu_reload_vmcs_vmx(vcpu)) {
4964 printf("%s: failed to reload vmcs\n", __func__);
4965 ret = EINVAL;
4966 goto exit;
4967 }
4968
4969 if (!cr3_host_virt) {
4970 printf("%s: can't allocate address for guest CR3 mapping\n",
4971 __func__);
4972 return (ENOMEM);
4973 }
4974
4975 pmap_kenter_pa(cr3_host_virt, cr3_host_phys, PROT_READ);
4976
4977 pdptes = (pd_entry_t *)cr3_host_virt;
4978 if (vmwrite(VMCS_GUEST_PDPTE0, pdptes[0])) {
4979 printf("%s: can't write guest PDPTE0\n", __func__);
4980 ret = EINVAL;
4981 goto exit;
4982 }
4983
4984 if (vmwrite(VMCS_GUEST_PDPTE1, pdptes[1])) {
4985 printf("%s: can't write guest PDPTE1\n", __func__);
4986 ret = EINVAL;
4987 goto exit;
4988 }
4989
4990 if (vmwrite(VMCS_GUEST_PDPTE2, pdptes[2])) {
4991 printf("%s: can't write guest PDPTE2\n", __func__);
4992 ret = EINVAL;
4993 goto exit;
4994 }
4995
4996 if (vmwrite(VMCS_GUEST_PDPTE3, pdptes[3])) {
4997 printf("%s: can't write guest PDPTE3\n", __func__);
4998 ret = EINVAL;
4999 goto exit;
5000 }
5001
5002 exit:
5003 pmap_kremove(cr3_host_virt, PAGE_SIZE);
5004
5005 /* km_free(9) might sleep, so we need to reload VMCS. */
5006 vcpu->vc_last_pcpu = curcpu();
5007 km_free((void *)cr3_host_virt, PAGE_SIZE, &kv_any, &kp_none);
5008 if (vcpu_reload_vmcs_vmx(vcpu)) {
5009 printf("%s: failed to reload vmcs after km_free\n", __func__);
5010 ret = EINVAL;
5011 }
5012
5013 return (ret);
5014 }
5015
5016 /*
5017 * vmx_handle_cr0_write
5018 *
5019 * Write handler for CR0. This function ensures valid values are written into
5020 * CR0 for the cpu/vmm mode in use (cr0 must-be-0 and must-be-1 bits, etc).
5021 *
5022 * Parameters
5023 * vcpu: The vcpu taking the cr0 write exit
5024 * r: The guest's desired (incoming) cr0 value
5025 *
5026 * Return values:
5027 * 0: if successful
5028 * EINVAL: if an error occurred
5029 */
5030 int
vmx_handle_cr0_write(struct vcpu * vcpu,uint64_t r)5031 vmx_handle_cr0_write(struct vcpu *vcpu, uint64_t r)
5032 {
5033 struct vmx_msr_store *msr_store;
5034 struct vmx_invvpid_descriptor vid;
5035 uint64_t ectls, oldcr0, cr4, mask;
5036 int ret;
5037
5038 /* Check must-be-0 bits */
5039 mask = vcpu->vc_vmx_cr0_fixed1;
5040 if (~r & mask) {
5041 /* Inject #GP, let the guest handle it */
5042 DPRINTF("%s: guest set invalid bits in %%cr0. Zeros "
5043 "mask=0x%llx, data=0x%llx\n", __func__,
5044 vcpu->vc_vmx_cr0_fixed1, r);
5045 vmm_inject_gp(vcpu);
5046 return (0);
5047 }
5048
5049 /* Check must-be-1 bits */
5050 mask = vcpu->vc_vmx_cr0_fixed0;
5051 if ((r & mask) != mask) {
5052 /* Inject #GP, let the guest handle it */
5053 DPRINTF("%s: guest set invalid bits in %%cr0. Ones "
5054 "mask=0x%llx, data=0x%llx\n", __func__,
5055 vcpu->vc_vmx_cr0_fixed0, r);
5056 vmm_inject_gp(vcpu);
5057 return (0);
5058 }
5059
5060 if (r & 0xFFFFFFFF00000000ULL) {
5061 DPRINTF("%s: setting bits 63:32 of %%cr0 is invalid,"
5062 " inject #GP, cr0=0x%llx\n", __func__, r);
5063 vmm_inject_gp(vcpu);
5064 return (0);
5065 }
5066
5067 if ((r & CR0_PG) && (r & CR0_PE) == 0) {
5068 DPRINTF("%s: PG flag set when the PE flag is clear,"
5069 " inject #GP, cr0=0x%llx\n", __func__, r);
5070 vmm_inject_gp(vcpu);
5071 return (0);
5072 }
5073
5074 if ((r & CR0_NW) && (r & CR0_CD) == 0) {
5075 DPRINTF("%s: NW flag set when the CD flag is clear,"
5076 " inject #GP, cr0=0x%llx\n", __func__, r);
5077 vmm_inject_gp(vcpu);
5078 return (0);
5079 }
5080
5081 if (vmread(VMCS_GUEST_IA32_CR0, &oldcr0)) {
5082 printf("%s: can't read guest cr0\n", __func__);
5083 return (EINVAL);
5084 }
5085
5086 /* CR0 must always have NE set */
5087 r |= CR0_NE;
5088
5089 if (vmwrite(VMCS_GUEST_IA32_CR0, r)) {
5090 printf("%s: can't write guest cr0\n", __func__);
5091 return (EINVAL);
5092 }
5093
5094 /* If the guest hasn't enabled paging ... */
5095 if (!(r & CR0_PG) && (oldcr0 & CR0_PG)) {
5096 /* Paging was disabled (prev. enabled) - Flush TLB */
5097 if (vcpu->vc_vmx_vpid_enabled) {
5098 vid.vid_vpid = vcpu->vc_vpid;
5099 vid.vid_addr = 0;
5100 invvpid(IA32_VMX_INVVPID_SINGLE_CTX_GLB, &vid);
5101 }
5102 } else if (!(oldcr0 & CR0_PG) && (r & CR0_PG)) {
5103 /*
5104 * Since the guest has enabled paging, then the IA32_VMX_IA32E_MODE_GUEST
5105 * control must be set to the same as EFER_LME.
5106 */
5107 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
5108
5109 if (vmread(VMCS_ENTRY_CTLS, &ectls)) {
5110 printf("%s: can't read entry controls", __func__);
5111 return (EINVAL);
5112 }
5113
5114 if (msr_store[VCPU_REGS_EFER].vms_data & EFER_LME)
5115 ectls |= IA32_VMX_IA32E_MODE_GUEST;
5116 else
5117 ectls &= ~IA32_VMX_IA32E_MODE_GUEST;
5118
5119 if (vmwrite(VMCS_ENTRY_CTLS, ectls)) {
5120 printf("%s: can't write entry controls", __func__);
5121 return (EINVAL);
5122 }
5123
5124 if (vmread(VMCS_GUEST_IA32_CR4, &cr4)) {
5125 printf("%s: can't read guest cr4\n", __func__);
5126 return (EINVAL);
5127 }
5128
5129 /* Load PDPTEs if PAE guest enabling paging */
5130 if (cr4 & CR4_PAE) {
5131 ret = vmx_load_pdptes(vcpu);
5132
5133 if (ret) {
5134 printf("%s: updating PDPTEs failed\n", __func__);
5135 return (ret);
5136 }
5137 }
5138 }
5139
5140 return (0);
5141 }
5142
5143 /*
5144 * vmx_handle_cr4_write
5145 *
5146 * Write handler for CR4. This function ensures valid values are written into
5147 * CR4 for the cpu/vmm mode in use (cr4 must-be-0 and must-be-1 bits, etc).
5148 *
5149 * Parameters
5150 * vcpu: The vcpu taking the cr4 write exit
5151 * r: The guest's desired (incoming) cr4 value
5152 *
5153 * Return values:
5154 * 0: if successful
5155 * EINVAL: if an error occurred
5156 */
5157 int
vmx_handle_cr4_write(struct vcpu * vcpu,uint64_t r)5158 vmx_handle_cr4_write(struct vcpu *vcpu, uint64_t r)
5159 {
5160 uint64_t mask;
5161
5162 /* Check must-be-0 bits */
5163 mask = ~(curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1);
5164 if (r & mask) {
5165 /* Inject #GP, let the guest handle it */
5166 DPRINTF("%s: guest set invalid bits in %%cr4. Zeros "
5167 "mask=0x%llx, data=0x%llx\n", __func__,
5168 curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1,
5169 r);
5170 vmm_inject_gp(vcpu);
5171 return (0);
5172 }
5173
5174 /* Check must-be-1 bits */
5175 mask = curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0;
5176 if ((r & mask) != mask) {
5177 /* Inject #GP, let the guest handle it */
5178 DPRINTF("%s: guest set invalid bits in %%cr4. Ones "
5179 "mask=0x%llx, data=0x%llx\n", __func__,
5180 curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0,
5181 r);
5182 vmm_inject_gp(vcpu);
5183 return (0);
5184 }
5185
5186 /* CR4_VMXE must always be enabled */
5187 r |= CR4_VMXE;
5188
5189 if (vmwrite(VMCS_GUEST_IA32_CR4, r)) {
5190 printf("%s: can't write guest cr4\n", __func__);
5191 return (EINVAL);
5192 }
5193
5194 return (0);
5195 }
5196
5197 /*
5198 * vmx_handle_cr
5199 *
5200 * Handle reads/writes to control registers (except CR3)
5201 */
5202 int
vmx_handle_cr(struct vcpu * vcpu)5203 vmx_handle_cr(struct vcpu *vcpu)
5204 {
5205 uint64_t insn_length, exit_qual, r;
5206 uint8_t crnum, dir, reg;
5207
5208 if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) {
5209 printf("%s: can't obtain instruction length\n", __func__);
5210 return (EINVAL);
5211 }
5212
5213 if (vmx_get_exit_qualification(&exit_qual)) {
5214 printf("%s: can't get exit qual\n", __func__);
5215 return (EINVAL);
5216 }
5217
5218 /* Low 4 bits of exit_qual represent the CR number */
5219 crnum = exit_qual & 0xf;
5220
5221 /*
5222 * Bits 5:4 indicate the direction of operation (or special CR-modifying
5223 * instruction)
5224 */
5225 dir = (exit_qual & 0x30) >> 4;
5226
5227 /* Bits 11:8 encode the source/target register */
5228 reg = (exit_qual & 0xf00) >> 8;
5229
5230 switch (dir) {
5231 case CR_WRITE:
5232 if (crnum == 0 || crnum == 4) {
5233 switch (reg) {
5234 case 0: r = vcpu->vc_gueststate.vg_rax; break;
5235 case 1: r = vcpu->vc_gueststate.vg_rcx; break;
5236 case 2: r = vcpu->vc_gueststate.vg_rdx; break;
5237 case 3: r = vcpu->vc_gueststate.vg_rbx; break;
5238 case 4: if (vmread(VMCS_GUEST_IA32_RSP, &r)) {
5239 printf("%s: unable to read guest "
5240 "RSP\n", __func__);
5241 return (EINVAL);
5242 }
5243 break;
5244 case 5: r = vcpu->vc_gueststate.vg_rbp; break;
5245 case 6: r = vcpu->vc_gueststate.vg_rsi; break;
5246 case 7: r = vcpu->vc_gueststate.vg_rdi; break;
5247 case 8: r = vcpu->vc_gueststate.vg_r8; break;
5248 case 9: r = vcpu->vc_gueststate.vg_r9; break;
5249 case 10: r = vcpu->vc_gueststate.vg_r10; break;
5250 case 11: r = vcpu->vc_gueststate.vg_r11; break;
5251 case 12: r = vcpu->vc_gueststate.vg_r12; break;
5252 case 13: r = vcpu->vc_gueststate.vg_r13; break;
5253 case 14: r = vcpu->vc_gueststate.vg_r14; break;
5254 case 15: r = vcpu->vc_gueststate.vg_r15; break;
5255 }
5256 DPRINTF("%s: mov to cr%d @ %llx, data=0x%llx\n",
5257 __func__, crnum, vcpu->vc_gueststate.vg_rip, r);
5258 }
5259
5260 if (crnum == 0)
5261 vmx_handle_cr0_write(vcpu, r);
5262
5263 if (crnum == 4)
5264 vmx_handle_cr4_write(vcpu, r);
5265
5266 break;
5267 case CR_READ:
5268 DPRINTF("%s: mov from cr%d @ %llx\n", __func__, crnum,
5269 vcpu->vc_gueststate.vg_rip);
5270 break;
5271 case CR_CLTS:
5272 DPRINTF("%s: clts instruction @ %llx\n", __func__,
5273 vcpu->vc_gueststate.vg_rip);
5274 break;
5275 case CR_LMSW:
5276 DPRINTF("%s: lmsw instruction @ %llx\n", __func__,
5277 vcpu->vc_gueststate.vg_rip);
5278 break;
5279 default:
5280 DPRINTF("%s: unknown cr access @ %llx\n", __func__,
5281 vcpu->vc_gueststate.vg_rip);
5282 }
5283
5284 vcpu->vc_gueststate.vg_rip += insn_length;
5285
5286 return (0);
5287 }
5288
5289 /*
5290 * vmx_handle_rdmsr
5291 *
5292 * Handler for rdmsr instructions. Bitmap MSRs are allowed implicit access
5293 * and won't end up here. This handler is primarily intended to catch otherwise
5294 * unknown MSR access for possible later inclusion in the bitmap list. For
5295 * each MSR access that ends up here, we log the access (when VMM_DEBUG is
5296 * enabled)
5297 *
5298 * Parameters:
5299 * vcpu: vcpu structure containing instruction info causing the exit
5300 *
5301 * Return value:
5302 * 0: The operation was successful
5303 * EINVAL: An error occurred
5304 */
5305 int
vmx_handle_rdmsr(struct vcpu * vcpu)5306 vmx_handle_rdmsr(struct vcpu *vcpu)
5307 {
5308 uint64_t insn_length;
5309 uint64_t *rax, *rdx;
5310 uint64_t *rcx;
5311 int ret;
5312
5313 if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) {
5314 printf("%s: can't obtain instruction length\n", __func__);
5315 return (EINVAL);
5316 }
5317
5318 if (insn_length != 2) {
5319 DPRINTF("%s: RDMSR with instruction length %lld not "
5320 "supported\n", __func__, insn_length);
5321 return (EINVAL);
5322 }
5323
5324 rax = &vcpu->vc_gueststate.vg_rax;
5325 rcx = &vcpu->vc_gueststate.vg_rcx;
5326 rdx = &vcpu->vc_gueststate.vg_rdx;
5327
5328 switch (*rcx) {
5329 case MSR_BIOS_SIGN:
5330 case MSR_PLATFORM_ID:
5331 /* Ignored */
5332 *rax = 0;
5333 *rdx = 0;
5334 break;
5335 case MSR_CR_PAT:
5336 *rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL);
5337 *rdx = (vcpu->vc_shadow_pat >> 32);
5338 break;
5339 default:
5340 /* Unsupported MSRs causes #GP exception, don't advance %rip */
5341 DPRINTF("%s: unsupported rdmsr (msr=0x%llx), injecting #GP\n",
5342 __func__, *rcx);
5343 ret = vmm_inject_gp(vcpu);
5344 return (ret);
5345 }
5346
5347 vcpu->vc_gueststate.vg_rip += insn_length;
5348
5349 return (0);
5350 }
5351
5352 /*
5353 * vmx_handle_xsetbv
5354 *
5355 * VMX-specific part of the xsetbv instruction exit handler
5356 *
5357 * Parameters:
5358 * vcpu: vcpu structure containing instruction info causing the exit
5359 *
5360 * Return value:
5361 * 0: The operation was successful
5362 * EINVAL: An error occurred
5363 */
5364 int
vmx_handle_xsetbv(struct vcpu * vcpu)5365 vmx_handle_xsetbv(struct vcpu *vcpu)
5366 {
5367 uint64_t insn_length, *rax;
5368 int ret;
5369
5370 if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) {
5371 printf("%s: can't obtain instruction length\n", __func__);
5372 return (EINVAL);
5373 }
5374
5375 /* All XSETBV instructions are 3 bytes */
5376 if (insn_length != 3) {
5377 DPRINTF("%s: XSETBV with instruction length %lld not "
5378 "supported\n", __func__, insn_length);
5379 return (EINVAL);
5380 }
5381
5382 rax = &vcpu->vc_gueststate.vg_rax;
5383
5384 ret = vmm_handle_xsetbv(vcpu, rax);
5385
5386 vcpu->vc_gueststate.vg_rip += insn_length;
5387
5388 return ret;
5389 }
5390
5391 /*
5392 * svm_handle_xsetbv
5393 *
5394 * SVM-specific part of the xsetbv instruction exit handler
5395 *
5396 * Parameters:
5397 * vcpu: vcpu structure containing instruction info causing the exit
5398 *
5399 * Return value:
5400 * 0: The operation was successful
5401 * EINVAL: An error occurred
5402 */
5403 int
svm_handle_xsetbv(struct vcpu * vcpu)5404 svm_handle_xsetbv(struct vcpu *vcpu)
5405 {
5406 uint64_t insn_length, *rax;
5407 int ret;
5408 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
5409
5410 /* All XSETBV instructions are 3 bytes */
5411 insn_length = 3;
5412
5413 rax = &vmcb->v_rax;
5414
5415 ret = vmm_handle_xsetbv(vcpu, rax);
5416
5417 vcpu->vc_gueststate.vg_rip += insn_length;
5418
5419 return ret;
5420 }
5421
5422 /*
5423 * vmm_handle_xsetbv
5424 *
5425 * Handler for xsetbv instructions. We allow the guest VM to set xcr0 values
5426 * limited to the xsave_mask in use in the host.
5427 *
5428 * Parameters:
5429 * vcpu: vcpu structure containing instruction info causing the exit
5430 * rax: pointer to guest %rax
5431 *
5432 * Return value:
5433 * 0: The operation was successful
5434 * EINVAL: An error occurred
5435 */
5436 int
vmm_handle_xsetbv(struct vcpu * vcpu,uint64_t * rax)5437 vmm_handle_xsetbv(struct vcpu *vcpu, uint64_t *rax)
5438 {
5439 uint64_t *rdx, *rcx, val;
5440
5441 rcx = &vcpu->vc_gueststate.vg_rcx;
5442 rdx = &vcpu->vc_gueststate.vg_rdx;
5443
5444 if (vmm_get_guest_cpu_cpl(vcpu) != 0) {
5445 DPRINTF("%s: guest cpl not zero\n", __func__);
5446 return (vmm_inject_gp(vcpu));
5447 }
5448
5449 if (*rcx != 0) {
5450 DPRINTF("%s: guest specified invalid xcr register number "
5451 "%lld\n", __func__, *rcx);
5452 return (vmm_inject_gp(vcpu));
5453 }
5454
5455 val = *rax + (*rdx << 32);
5456 if (val & ~xsave_mask) {
5457 DPRINTF("%s: guest specified xcr0 outside xsave_mask %lld\n",
5458 __func__, val);
5459 return (vmm_inject_gp(vcpu));
5460 }
5461
5462 vcpu->vc_gueststate.vg_xcr0 = val;
5463
5464 return (0);
5465 }
5466
5467 /*
5468 * vmx_handle_misc_enable_msr
5469 *
5470 * Handler for writes to the MSR_MISC_ENABLE (0x1a0) MSR on Intel CPUs. We
5471 * limit what the guest can write to this MSR (certain hardware-related
5472 * settings like speedstep, etc).
5473 *
5474 * Parameters:
5475 * vcpu: vcpu structure containing information about the wrmsr causing this
5476 * exit
5477 */
5478 void
vmx_handle_misc_enable_msr(struct vcpu * vcpu)5479 vmx_handle_misc_enable_msr(struct vcpu *vcpu)
5480 {
5481 uint64_t *rax, *rdx;
5482 struct vmx_msr_store *msr_store;
5483
5484 rax = &vcpu->vc_gueststate.vg_rax;
5485 rdx = &vcpu->vc_gueststate.vg_rdx;
5486 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
5487
5488 /* Filter out guest writes to TCC, EIST, and xTPR */
5489 *rax &= ~(MISC_ENABLE_TCC | MISC_ENABLE_EIST_ENABLED |
5490 MISC_ENABLE_xTPR_MESSAGE_DISABLE);
5491
5492 msr_store[VCPU_REGS_MISC_ENABLE].vms_data = *rax | (*rdx << 32);
5493 }
5494
5495 /*
5496 * vmx_handle_wrmsr
5497 *
5498 * Handler for wrmsr instructions. This handler logs the access, and discards
5499 * the written data (when VMM_DEBUG is enabled). Any valid wrmsr will not end
5500 * up here (it will be whitelisted in the MSR bitmap).
5501 *
5502 * Parameters:
5503 * vcpu: vcpu structure containing instruction info causing the exit
5504 *
5505 * Return value:
5506 * 0: The operation was successful
5507 * EINVAL: An error occurred
5508 */
5509 int
vmx_handle_wrmsr(struct vcpu * vcpu)5510 vmx_handle_wrmsr(struct vcpu *vcpu)
5511 {
5512 uint64_t insn_length, val;
5513 uint64_t *rax, *rdx, *rcx;
5514 int ret;
5515
5516 if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) {
5517 printf("%s: can't obtain instruction length\n", __func__);
5518 return (EINVAL);
5519 }
5520
5521 if (insn_length != 2) {
5522 DPRINTF("%s: WRMSR with instruction length %lld not "
5523 "supported\n", __func__, insn_length);
5524 return (EINVAL);
5525 }
5526
5527 rax = &vcpu->vc_gueststate.vg_rax;
5528 rcx = &vcpu->vc_gueststate.vg_rcx;
5529 rdx = &vcpu->vc_gueststate.vg_rdx;
5530 val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL);
5531
5532 switch (*rcx) {
5533 case MSR_CR_PAT:
5534 if (!vmm_pat_is_valid(val)) {
5535 ret = vmm_inject_gp(vcpu);
5536 return (ret);
5537 }
5538 vcpu->vc_shadow_pat = val;
5539 break;
5540 case MSR_MISC_ENABLE:
5541 vmx_handle_misc_enable_msr(vcpu);
5542 break;
5543 case MSR_SMM_MONITOR_CTL:
5544 /*
5545 * 34.15.5 - Enabling dual monitor treatment
5546 *
5547 * Unsupported, so inject #GP and return without
5548 * advancing %rip.
5549 */
5550 ret = vmm_inject_gp(vcpu);
5551 return (ret);
5552 case KVM_MSR_SYSTEM_TIME:
5553 vmm_init_pvclock(vcpu,
5554 (*rax & 0xFFFFFFFFULL) | (*rdx << 32));
5555 break;
5556 #ifdef VMM_DEBUG
5557 default:
5558 /*
5559 * Log the access, to be able to identify unknown MSRs
5560 */
5561 DPRINTF("%s: wrmsr exit, msr=0x%llx, discarding data "
5562 "written from guest=0x%llx:0x%llx\n", __func__,
5563 *rcx, *rdx, *rax);
5564 #endif /* VMM_DEBUG */
5565 }
5566
5567 vcpu->vc_gueststate.vg_rip += insn_length;
5568
5569 return (0);
5570 }
5571
5572 /*
5573 * svm_handle_msr
5574 *
5575 * Handler for MSR instructions.
5576 *
5577 * Parameters:
5578 * vcpu: vcpu structure containing instruction info causing the exit
5579 *
5580 * Return value:
5581 * Always 0 (successful)
5582 */
5583 int
svm_handle_msr(struct vcpu * vcpu)5584 svm_handle_msr(struct vcpu *vcpu)
5585 {
5586 uint64_t insn_length, val;
5587 uint64_t *rax, *rcx, *rdx;
5588 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
5589 int ret;
5590
5591 /* XXX: Validate RDMSR / WRMSR insn_length */
5592 insn_length = 2;
5593
5594 rax = &vmcb->v_rax;
5595 rcx = &vcpu->vc_gueststate.vg_rcx;
5596 rdx = &vcpu->vc_gueststate.vg_rdx;
5597
5598 if (vmcb->v_exitinfo1 == 1) {
5599 /* WRMSR */
5600 val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL);
5601
5602 switch (*rcx) {
5603 case MSR_CR_PAT:
5604 if (!vmm_pat_is_valid(val)) {
5605 ret = vmm_inject_gp(vcpu);
5606 return (ret);
5607 }
5608 vcpu->vc_shadow_pat = val;
5609 break;
5610 case MSR_EFER:
5611 vmcb->v_efer = *rax | EFER_SVME;
5612 break;
5613 case KVM_MSR_SYSTEM_TIME:
5614 vmm_init_pvclock(vcpu,
5615 (*rax & 0xFFFFFFFFULL) | (*rdx << 32));
5616 break;
5617 default:
5618 /* Log the access, to be able to identify unknown MSRs */
5619 DPRINTF("%s: wrmsr exit, msr=0x%llx, discarding data "
5620 "written from guest=0x%llx:0x%llx\n", __func__,
5621 *rcx, *rdx, *rax);
5622 }
5623 } else {
5624 /* RDMSR */
5625 switch (*rcx) {
5626 case MSR_BIOS_SIGN:
5627 case MSR_INT_PEN_MSG:
5628 case MSR_PLATFORM_ID:
5629 /* Ignored */
5630 *rax = 0;
5631 *rdx = 0;
5632 break;
5633 case MSR_CR_PAT:
5634 *rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL);
5635 *rdx = (vcpu->vc_shadow_pat >> 32);
5636 break;
5637 case MSR_DE_CFG:
5638 /* LFENCE serializing bit is set by host */
5639 *rax = DE_CFG_SERIALIZE_LFENCE;
5640 *rdx = 0;
5641 break;
5642 default:
5643 /*
5644 * Unsupported MSRs causes #GP exception, don't advance
5645 * %rip
5646 */
5647 DPRINTF("%s: unsupported rdmsr (msr=0x%llx), "
5648 "injecting #GP\n", __func__, *rcx);
5649 ret = vmm_inject_gp(vcpu);
5650 return (ret);
5651 }
5652 }
5653
5654 vcpu->vc_gueststate.vg_rip += insn_length;
5655
5656 return (0);
5657 }
5658
5659 /* Handle cpuid(0xd) and its subleafs */
5660 static void
vmm_handle_cpuid_0xd(struct vcpu * vcpu,uint32_t subleaf,uint64_t * rax,uint32_t eax,uint32_t ebx,uint32_t ecx,uint32_t edx)5661 vmm_handle_cpuid_0xd(struct vcpu *vcpu, uint32_t subleaf, uint64_t *rax,
5662 uint32_t eax, uint32_t ebx, uint32_t ecx, uint32_t edx)
5663 {
5664 if (subleaf == 0) {
5665 /*
5666 * CPUID(0xd.0) depends on the value in XCR0 and MSR_XSS. If
5667 * the guest XCR0 isn't the same as the host then set it, redo
5668 * the CPUID, and restore it.
5669 */
5670 uint64_t xcr0 = vcpu->vc_gueststate.vg_xcr0;
5671
5672 /*
5673 * "ecx enumerates the size required ... for an area
5674 * containing all the ... components supported by this
5675 * processor"
5676 * "ebx enumerates the size required ... for an area
5677 * containing all the ... components corresponding to bits
5678 * currently set in xcr0"
5679 * So: since the VMM 'processor' is what our base kernel uses,
5680 * the VMM ecx is our ebx
5681 */
5682 ecx = ebx;
5683 if (xcr0 != (xsave_mask & XFEATURE_XCR0_MASK)) {
5684 uint32_t dummy;
5685 xsetbv(0, xcr0);
5686 CPUID_LEAF(0xd, subleaf, eax, ebx, dummy, edx);
5687 xsetbv(0, xsave_mask & XFEATURE_XCR0_MASK);
5688 }
5689 eax = xsave_mask & XFEATURE_XCR0_MASK;
5690 edx = (xsave_mask & XFEATURE_XCR0_MASK) >> 32;
5691 } else if (subleaf == 1) {
5692 /* mask out XSAVEC, XSAVES, and XFD support */
5693 eax &= XSAVE_XSAVEOPT | XSAVE_XGETBV1;
5694 ebx = 0; /* no xsavec or xsaves for now */
5695 ecx = edx = 0; /* no xsaves for now */
5696 } else if (subleaf >= 63 ||
5697 ((1ULL << subleaf) & xsave_mask & XFEATURE_XCR0_MASK) == 0) {
5698 /* disclaim subleaves of features we don't expose */
5699 eax = ebx = ecx = edx = 0;
5700 } else {
5701 /* disclaim compressed alignment or xfd support */
5702 ecx = 0;
5703 }
5704
5705 *rax = eax;
5706 vcpu->vc_gueststate.vg_rbx = ebx;
5707 vcpu->vc_gueststate.vg_rcx = ecx;
5708 vcpu->vc_gueststate.vg_rdx = edx;
5709 }
5710
5711 /*
5712 * vmm_handle_cpuid
5713 *
5714 * Exit handler for CPUID instruction
5715 *
5716 * Parameters:
5717 * vcpu: vcpu causing the CPUID exit
5718 *
5719 * Return value:
5720 * 0: the exit was processed successfully
5721 * EINVAL: error occurred validating the CPUID instruction arguments
5722 */
5723 int
vmm_handle_cpuid(struct vcpu * vcpu)5724 vmm_handle_cpuid(struct vcpu *vcpu)
5725 {
5726 uint64_t insn_length, cr4;
5727 uint64_t *rax, *rbx, *rcx, *rdx;
5728 struct vmcb *vmcb;
5729 uint32_t leaf, subleaf, eax, ebx, ecx, edx;
5730 struct vmx_msr_store *msr_store;
5731 int vmm_cpuid_level;
5732
5733 /* what's the cpuid level we support/advertise? */
5734 vmm_cpuid_level = cpuid_level;
5735 if (vmm_cpuid_level < 0x15 && tsc_is_invariant)
5736 vmm_cpuid_level = 0x15;
5737
5738 if (vmm_softc->mode == VMM_MODE_EPT) {
5739 if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) {
5740 DPRINTF("%s: can't obtain instruction length\n",
5741 __func__);
5742 return (EINVAL);
5743 }
5744
5745 if (vmread(VMCS_GUEST_IA32_CR4, &cr4)) {
5746 DPRINTF("%s: can't obtain cr4\n", __func__);
5747 return (EINVAL);
5748 }
5749
5750 rax = &vcpu->vc_gueststate.vg_rax;
5751
5752 /*
5753 * "CPUID leaves above 02H and below 80000000H are only
5754 * visible when IA32_MISC_ENABLE MSR has bit 22 set to its
5755 * default value 0"
5756 */
5757 msr_store =
5758 (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
5759 if (msr_store[VCPU_REGS_MISC_ENABLE].vms_data &
5760 MISC_ENABLE_LIMIT_CPUID_MAXVAL)
5761 vmm_cpuid_level = 0x02;
5762 } else {
5763 /* XXX: validate insn_length 2 */
5764 insn_length = 2;
5765 vmcb = (struct vmcb *)vcpu->vc_control_va;
5766 rax = &vmcb->v_rax;
5767 cr4 = vmcb->v_cr4;
5768 }
5769
5770 rbx = &vcpu->vc_gueststate.vg_rbx;
5771 rcx = &vcpu->vc_gueststate.vg_rcx;
5772 rdx = &vcpu->vc_gueststate.vg_rdx;
5773 vcpu->vc_gueststate.vg_rip += insn_length;
5774
5775 leaf = *rax;
5776 subleaf = *rcx;
5777
5778 /*
5779 * "If a value entered for CPUID.EAX is higher than the maximum input
5780 * value for basic or extended function for that processor then the
5781 * data for the highest basic information leaf is returned."
5782 *
5783 * "When CPUID returns the highest basic leaf information as a result
5784 * of an invalid input EAX value, any dependence on input ECX value
5785 * in the basic leaf is honored."
5786 *
5787 * This means if leaf is between vmm_cpuid_level and 0x40000000 (the start
5788 * of the hypervisor info leaves), clamp to vmm_cpuid_level, but without
5789 * altering subleaf. Also, if leaf is greater than the extended function
5790 * info, clamp also to vmm_cpuid_level.
5791 */
5792 if ((leaf > vmm_cpuid_level && leaf < 0x40000000) ||
5793 (leaf > curcpu()->ci_pnfeatset)) {
5794 DPRINTF("%s: invalid cpuid input leaf 0x%x, guest rip="
5795 "0x%llx - resetting to 0x%x\n", __func__, leaf,
5796 vcpu->vc_gueststate.vg_rip - insn_length,
5797 vmm_cpuid_level);
5798 leaf = vmm_cpuid_level;
5799 }
5800
5801 /* we fake up values in the range (cpuid_level, vmm_cpuid_level] */
5802 if (leaf <= cpuid_level || leaf > 0x80000000)
5803 CPUID_LEAF(leaf, subleaf, eax, ebx, ecx, edx);
5804 else
5805 eax = ebx = ecx = edx = 0;
5806
5807 switch (leaf) {
5808 case 0x00: /* Max level and vendor ID */
5809 *rax = vmm_cpuid_level;
5810 *rbx = *((uint32_t *)&cpu_vendor);
5811 *rdx = *((uint32_t *)&cpu_vendor + 1);
5812 *rcx = *((uint32_t *)&cpu_vendor + 2);
5813 break;
5814 case 0x01: /* Version, brand, feature info */
5815 *rax = cpu_id;
5816 /* mask off host's APIC ID, reset to vcpu id */
5817 *rbx = cpu_ebxfeature & 0x0000FFFF;
5818 *rbx |= (vcpu->vc_id & 0xFF) << 24;
5819 *rcx = (cpu_ecxfeature | CPUIDECX_HV) & VMM_CPUIDECX_MASK;
5820
5821 /* Guest CR4.OSXSAVE determines presence of CPUIDECX_OSXSAVE */
5822 if (cr4 & CR4_OSXSAVE)
5823 *rcx |= CPUIDECX_OSXSAVE;
5824 else
5825 *rcx &= ~CPUIDECX_OSXSAVE;
5826
5827 *rdx = curcpu()->ci_feature_flags & VMM_CPUIDEDX_MASK;
5828 break;
5829 case 0x02: /* Cache and TLB information */
5830 *rax = eax;
5831 *rbx = ebx;
5832 *rcx = ecx;
5833 *rdx = edx;
5834 break;
5835 case 0x03: /* Processor serial number (not supported) */
5836 DPRINTF("%s: function 0x03 (processor serial number) not "
5837 "supported\n", __func__);
5838 *rax = 0;
5839 *rbx = 0;
5840 *rcx = 0;
5841 *rdx = 0;
5842 break;
5843 case 0x04: /* Deterministic cache info */
5844 *rax = eax & VMM_CPUID4_CACHE_TOPOLOGY_MASK;
5845 *rbx = ebx;
5846 *rcx = ecx;
5847 *rdx = edx;
5848 break;
5849 case 0x05: /* MONITOR/MWAIT (not supported) */
5850 DPRINTF("%s: function 0x05 (monitor/mwait) not supported\n",
5851 __func__);
5852 *rax = 0;
5853 *rbx = 0;
5854 *rcx = 0;
5855 *rdx = 0;
5856 break;
5857 case 0x06: /* Thermal / Power management (not supported) */
5858 DPRINTF("%s: function 0x06 (thermal/power mgt) not supported\n",
5859 __func__);
5860 *rax = 0;
5861 *rbx = 0;
5862 *rcx = 0;
5863 *rdx = 0;
5864 break;
5865 case 0x07: /* SEFF */
5866 if (subleaf == 0) {
5867 *rax = 0; /* Highest subleaf supported */
5868 *rbx = curcpu()->ci_feature_sefflags_ebx & VMM_SEFF0EBX_MASK;
5869 *rcx = curcpu()->ci_feature_sefflags_ecx & VMM_SEFF0ECX_MASK;
5870 *rdx = curcpu()->ci_feature_sefflags_edx & VMM_SEFF0EDX_MASK;
5871 /*
5872 * Only expose PKU support if we've detected it in use
5873 * on the host.
5874 */
5875 if (vmm_softc->sc_md.pkru_enabled)
5876 *rcx |= SEFF0ECX_PKU;
5877 else
5878 *rcx &= ~SEFF0ECX_PKU;
5879
5880 /* Expose IBT bit if we've enabled CET on the host. */
5881 if (rcr4() & CR4_CET)
5882 *rdx |= SEFF0EDX_IBT;
5883 else
5884 *rdx &= ~SEFF0EDX_IBT;
5885
5886 } else {
5887 /* Unsupported subleaf */
5888 DPRINTF("%s: function 0x07 (SEFF) unsupported subleaf "
5889 "0x%x not supported\n", __func__, subleaf);
5890 *rax = 0;
5891 *rbx = 0;
5892 *rcx = 0;
5893 *rdx = 0;
5894 }
5895 break;
5896 case 0x09: /* Direct Cache Access (not supported) */
5897 DPRINTF("%s: function 0x09 (direct cache access) not "
5898 "supported\n", __func__);
5899 *rax = 0;
5900 *rbx = 0;
5901 *rcx = 0;
5902 *rdx = 0;
5903 break;
5904 case 0x0a: /* Architectural perf monitoring (not supported) */
5905 DPRINTF("%s: function 0x0a (arch. perf mon) not supported\n",
5906 __func__);
5907 *rax = 0;
5908 *rbx = 0;
5909 *rcx = 0;
5910 *rdx = 0;
5911 break;
5912 case 0x0b: /* Extended topology enumeration (not supported) */
5913 DPRINTF("%s: function 0x0b (topology enumeration) not "
5914 "supported\n", __func__);
5915 *rax = 0;
5916 *rbx = 0;
5917 *rcx = 0;
5918 *rdx = 0;
5919 break;
5920 case 0x0d: /* Processor ext. state information */
5921 vmm_handle_cpuid_0xd(vcpu, subleaf, rax, eax, ebx, ecx, edx);
5922 break;
5923 case 0x0f: /* QoS info (not supported) */
5924 DPRINTF("%s: function 0x0f (QoS info) not supported\n",
5925 __func__);
5926 *rax = 0;
5927 *rbx = 0;
5928 *rcx = 0;
5929 *rdx = 0;
5930 break;
5931 case 0x14: /* Processor Trace info (not supported) */
5932 DPRINTF("%s: function 0x14 (processor trace info) not "
5933 "supported\n", __func__);
5934 *rax = 0;
5935 *rbx = 0;
5936 *rcx = 0;
5937 *rdx = 0;
5938 break;
5939 case 0x15:
5940 if (cpuid_level >= 0x15) {
5941 *rax = eax;
5942 *rbx = ebx;
5943 *rcx = ecx;
5944 *rdx = edx;
5945 } else {
5946 KASSERT(tsc_is_invariant);
5947 *rax = 1;
5948 *rbx = 100;
5949 *rcx = tsc_frequency / 100;
5950 *rdx = 0;
5951 }
5952 break;
5953 case 0x16: /* Processor frequency info */
5954 *rax = eax;
5955 *rbx = ebx;
5956 *rcx = ecx;
5957 *rdx = edx;
5958 break;
5959 case 0x40000000: /* Hypervisor information */
5960 *rax = 0;
5961 *rbx = *((uint32_t *)&vmm_hv_signature[0]);
5962 *rcx = *((uint32_t *)&vmm_hv_signature[4]);
5963 *rdx = *((uint32_t *)&vmm_hv_signature[8]);
5964 break;
5965 case 0x40000001: /* KVM hypervisor features */
5966 *rax = (1 << KVM_FEATURE_CLOCKSOURCE2) |
5967 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
5968 *rbx = 0;
5969 *rcx = 0;
5970 *rdx = 0;
5971 break;
5972 case 0x80000000: /* Extended function level */
5973 /* We don't emulate past 0x8000001f currently. */
5974 *rax = min(curcpu()->ci_pnfeatset, 0x8000001f);
5975 *rbx = 0;
5976 *rcx = 0;
5977 *rdx = 0;
5978 break;
5979 case 0x80000001: /* Extended function info */
5980 *rax = curcpu()->ci_efeature_eax;
5981 *rbx = 0; /* Reserved */
5982 *rcx = curcpu()->ci_efeature_ecx & VMM_ECPUIDECX_MASK;
5983 *rdx = curcpu()->ci_feature_eflags & VMM_FEAT_EFLAGS_MASK;
5984 break;
5985 case 0x80000002: /* Brand string */
5986 *rax = curcpu()->ci_brand[0];
5987 *rbx = curcpu()->ci_brand[1];
5988 *rcx = curcpu()->ci_brand[2];
5989 *rdx = curcpu()->ci_brand[3];
5990 break;
5991 case 0x80000003: /* Brand string */
5992 *rax = curcpu()->ci_brand[4];
5993 *rbx = curcpu()->ci_brand[5];
5994 *rcx = curcpu()->ci_brand[6];
5995 *rdx = curcpu()->ci_brand[7];
5996 break;
5997 case 0x80000004: /* Brand string */
5998 *rax = curcpu()->ci_brand[8];
5999 *rbx = curcpu()->ci_brand[9];
6000 *rcx = curcpu()->ci_brand[10];
6001 *rdx = curcpu()->ci_brand[11];
6002 break;
6003 case 0x80000005: /* Reserved (Intel), cacheinfo (AMD) */
6004 *rax = eax;
6005 *rbx = ebx;
6006 *rcx = ecx;
6007 *rdx = edx;
6008 break;
6009 case 0x80000006: /* ext. cache info */
6010 *rax = eax;
6011 *rbx = ebx;
6012 *rcx = ecx;
6013 *rdx = edx;
6014 break;
6015 case 0x80000007: /* apmi */
6016 *rax = eax;
6017 *rbx = ebx;
6018 *rcx = ecx;
6019 *rdx = edx & VMM_APMI_EDX_INCLUDE_MASK;
6020 break;
6021 case 0x80000008: /* Phys bits info and topology (AMD) */
6022 *rax = eax;
6023 *rbx = ebx & VMM_AMDSPEC_EBX_MASK;
6024 /* Reset %rcx (topology) */
6025 *rcx = 0;
6026 *rdx = edx;
6027 break;
6028 case 0x8000001d: /* cache topology (AMD) */
6029 *rax = eax;
6030 *rbx = ebx;
6031 *rcx = ecx;
6032 *rdx = edx;
6033 break;
6034 case 0x8000001f: /* encryption features (AMD) */
6035 *rax = eax;
6036 *rbx = ebx;
6037 *rcx = ecx;
6038 *rdx = edx;
6039 break;
6040 default:
6041 DPRINTF("%s: unsupported rax=0x%llx\n", __func__, *rax);
6042 *rax = 0;
6043 *rbx = 0;
6044 *rcx = 0;
6045 *rdx = 0;
6046 }
6047
6048
6049 if (vmm_softc->mode == VMM_MODE_RVI) {
6050 /*
6051 * update %rax. the rest of the registers get updated in
6052 * svm_enter_guest
6053 */
6054 vmcb->v_rax = *rax;
6055 }
6056
6057 return (0);
6058 }
6059
6060 /*
6061 * vcpu_run_svm
6062 *
6063 * SVM main loop used to run a VCPU.
6064 *
6065 * Parameters:
6066 * vcpu: The VCPU to run
6067 * vrp: run parameters
6068 *
6069 * Return values:
6070 * 0: The run loop exited and no help is needed from vmd
6071 * EAGAIN: The run loop exited and help from vmd is needed
6072 * EINVAL: an error occurred
6073 */
6074 int
vcpu_run_svm(struct vcpu * vcpu,struct vm_run_params * vrp)6075 vcpu_run_svm(struct vcpu *vcpu, struct vm_run_params *vrp)
6076 {
6077 int ret = 0;
6078 struct region_descriptor gdt;
6079 struct cpu_info *ci = NULL;
6080 uint64_t exit_reason;
6081 struct schedstate_percpu *spc;
6082 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
6083
6084 if (vrp->vrp_intr_pending)
6085 vcpu->vc_intr = 1;
6086 else
6087 vcpu->vc_intr = 0;
6088
6089 /*
6090 * If we are returning from userspace (vmd) because we exited
6091 * last time, fix up any needed vcpu state first. Which state
6092 * needs to be fixed up depends on what vmd populated in the
6093 * exit data structure.
6094 */
6095 switch (vcpu->vc_gueststate.vg_exit_reason) {
6096 case SVM_VMEXIT_IOIO:
6097 if (vcpu->vc_exit.vei.vei_dir == VEI_DIR_IN) {
6098 vcpu->vc_gueststate.vg_rax =
6099 vcpu->vc_exit.vei.vei_data;
6100 vmcb->v_rax = vcpu->vc_gueststate.vg_rax;
6101 }
6102 vcpu->vc_gueststate.vg_rip =
6103 vcpu->vc_exit.vrs.vrs_gprs[VCPU_REGS_RIP];
6104 vmcb->v_rip = vcpu->vc_gueststate.vg_rip;
6105 break;
6106 case SVM_VMEXIT_NPF:
6107 ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_GPRS,
6108 &vcpu->vc_exit.vrs);
6109 if (ret) {
6110 printf("%s: vm %d vcpu %d failed to update "
6111 "registers\n", __func__,
6112 vcpu->vc_parent->vm_id, vcpu->vc_id);
6113 return (EINVAL);
6114 }
6115 break;
6116 }
6117 memset(&vcpu->vc_exit, 0, sizeof(vcpu->vc_exit));
6118
6119 while (ret == 0) {
6120 vmm_update_pvclock(vcpu);
6121 if (ci != curcpu()) {
6122 /*
6123 * We are launching for the first time, or we are
6124 * resuming from a different pcpu, so we need to
6125 * reset certain pcpu-specific values.
6126 */
6127 ci = curcpu();
6128 setregion(&gdt, ci->ci_gdt, GDT_SIZE - 1);
6129
6130 if (ci != vcpu->vc_last_pcpu) {
6131 /*
6132 * Flush TLB by guest ASID if feature
6133 * available, flush entire TLB if not.
6134 */
6135 if (ci->ci_vmm_cap.vcc_svm.svm_flush_by_asid)
6136 vmcb->v_tlb_control =
6137 SVM_TLB_CONTROL_FLUSH_ASID;
6138 else
6139 vmcb->v_tlb_control =
6140 SVM_TLB_CONTROL_FLUSH_ALL;
6141
6142 svm_set_dirty(vcpu, SVM_CLEANBITS_ALL);
6143 }
6144
6145 vcpu->vc_last_pcpu = ci;
6146
6147 if (gdt.rd_base == 0) {
6148 ret = EINVAL;
6149 break;
6150 }
6151 }
6152
6153 /* Handle vmd(8) injected interrupts */
6154 /* Is there an interrupt pending injection? */
6155 if (vcpu->vc_inject.vie_type == VCPU_INJECT_INTR &&
6156 vcpu->vc_irqready) {
6157 vmcb->v_eventinj = vcpu->vc_inject.vie_vector |
6158 (1U << 31);
6159 vcpu->vc_inject.vie_type = VCPU_INJECT_NONE;
6160 }
6161
6162 /* Inject event if present */
6163 if (vcpu->vc_inject.vie_type == VCPU_INJECT_EX) {
6164 vmcb->v_eventinj = vcpu->vc_inject.vie_vector;
6165
6166 /* Set the "Event Valid" flag for certain vectors */
6167 switch (vcpu->vc_inject.vie_vector) {
6168 case VMM_EX_BP:
6169 case VMM_EX_OF:
6170 case VMM_EX_DB:
6171 /*
6172 * Software exception.
6173 * XXX check nRIP support.
6174 */
6175 vmcb->v_eventinj |= (4ULL << 8);
6176 break;
6177 case VMM_EX_AC:
6178 vcpu->vc_inject.vie_errorcode = 0;
6179 /* fallthrough */
6180 case VMM_EX_DF:
6181 case VMM_EX_TS:
6182 case VMM_EX_NP:
6183 case VMM_EX_SS:
6184 case VMM_EX_GP:
6185 case VMM_EX_PF:
6186 /* Hardware exception. */
6187 vmcb->v_eventinj |= (3ULL << 8);
6188
6189 if (vmcb->v_cr0 & CR0_PE) {
6190 /* Error code valid. */
6191 vmcb->v_eventinj |= (1ULL << 11);
6192 vmcb->v_eventinj |= (uint64_t)
6193 vcpu->vc_inject.vie_errorcode << 32;
6194 }
6195 break;
6196 default:
6197 printf("%s: unsupported exception vector %u\n",
6198 __func__, vcpu->vc_inject.vie_vector);
6199 ret = EINVAL;
6200 } /* switch */
6201 if (ret == EINVAL)
6202 break;
6203
6204 /* Event is valid. */
6205 vmcb->v_eventinj |= (1U << 31);
6206 vcpu->vc_inject.vie_type = VCPU_INJECT_NONE;
6207 }
6208
6209 TRACEPOINT(vmm, guest_enter, vcpu, vrp);
6210
6211 /* Start / resume the VCPU */
6212 /* Disable interrupts and save the current host FPU state. */
6213 clgi();
6214 if ((ret = vmm_fpurestore(vcpu))) {
6215 stgi();
6216 break;
6217 }
6218
6219 /*
6220 * If we're resuming to a different VCPU and have IBPB,
6221 * then use it to prevent cross-VM branch-target injection.
6222 */
6223 if (ci->ci_guest_vcpu != vcpu &&
6224 (ci->ci_feature_amdspec_ebx & CPUIDEBX_IBPB)) {
6225 wrmsr(MSR_PRED_CMD, PRED_CMD_IBPB);
6226 ci->ci_guest_vcpu = vcpu;
6227 }
6228
6229 /* Restore any guest PKRU state. */
6230 if (vmm_softc->sc_md.pkru_enabled)
6231 wrpkru(0, vcpu->vc_pkru);
6232
6233 KASSERT(vmcb->v_intercept1 & SVM_INTERCEPT_INTR);
6234 wrmsr(MSR_AMD_VM_HSAVE_PA, vcpu->vc_svm_hsa_pa);
6235
6236 ret = svm_enter_guest(vcpu->vc_control_pa,
6237 &vcpu->vc_gueststate, &gdt);
6238
6239 /* Restore host PKRU state. */
6240 if (vmm_softc->sc_md.pkru_enabled) {
6241 vcpu->vc_pkru = rdpkru(0);
6242 wrpkru(0, PGK_VALUE);
6243 }
6244
6245 /*
6246 * On exit, interrupts are disabled, and we are running with
6247 * the guest FPU state still possibly on the CPU. Save the FPU
6248 * state before re-enabling interrupts.
6249 */
6250 vmm_fpusave(vcpu);
6251
6252 /*
6253 * Enable interrupts now. Note that if the exit was due to INTR
6254 * (external interrupt), the interrupt will be processed now.
6255 */
6256 stgi();
6257
6258 vcpu->vc_gueststate.vg_rip = vmcb->v_rip;
6259 vmcb->v_tlb_control = SVM_TLB_CONTROL_FLUSH_NONE;
6260 svm_set_clean(vcpu, SVM_CLEANBITS_ALL);
6261
6262 /* If we exited successfully ... */
6263 if (ret == 0) {
6264 exit_reason = vmcb->v_exitcode;
6265 vcpu->vc_gueststate.vg_exit_reason = exit_reason;
6266 TRACEPOINT(vmm, guest_exit, vcpu, vrp, exit_reason);
6267
6268 vcpu->vc_gueststate.vg_rflags = vmcb->v_rflags;
6269
6270 /*
6271 * Handle the exit. This will alter "ret" to EAGAIN if
6272 * the exit handler determines help from vmd is needed.
6273 */
6274 ret = svm_handle_exit(vcpu);
6275
6276 if (vcpu->vc_gueststate.vg_rflags & PSL_I)
6277 vcpu->vc_irqready = 1;
6278 else
6279 vcpu->vc_irqready = 0;
6280
6281 /*
6282 * If not ready for interrupts, but interrupts pending,
6283 * enable interrupt window exiting.
6284 */
6285 if (vcpu->vc_irqready == 0 && vcpu->vc_intr) {
6286 vmcb->v_intercept1 |= SVM_INTERCEPT_VINTR;
6287 vmcb->v_irq = 1;
6288 vmcb->v_intr_misc = SVM_INTR_MISC_V_IGN_TPR;
6289 vmcb->v_intr_vector = 0;
6290 svm_set_dirty(vcpu, SVM_CLEANBITS_TPR |
6291 SVM_CLEANBITS_I);
6292 }
6293
6294 /*
6295 * Exit to vmd if we are terminating, failed to enter,
6296 * or need help (device I/O)
6297 */
6298 if (ret || vcpu_must_stop(vcpu))
6299 break;
6300
6301 if (vcpu->vc_intr && vcpu->vc_irqready) {
6302 ret = EAGAIN;
6303 break;
6304 }
6305
6306 /* Check if we should yield - don't hog the cpu */
6307 spc = &ci->ci_schedstate;
6308 if (spc->spc_schedflags & SPCF_SHOULDYIELD)
6309 break;
6310 }
6311 }
6312
6313 /*
6314 * We are heading back to userspace (vmd), either because we need help
6315 * handling an exit, a guest interrupt is pending, or we failed in some
6316 * way to enter the guest. Copy the guest registers to the exit struct
6317 * and return to vmd.
6318 */
6319 if (vcpu_readregs_svm(vcpu, VM_RWREGS_ALL, &vcpu->vc_exit.vrs))
6320 ret = EINVAL;
6321
6322 return (ret);
6323 }
6324
6325 /*
6326 * vmm_alloc_vpid
6327 *
6328 * Sets the memory location pointed to by "vpid" to the next available VPID
6329 * or ASID.
6330 *
6331 * Parameters:
6332 * vpid: Pointer to location to receive the next VPID/ASID
6333 *
6334 * Return Values:
6335 * 0: The operation completed successfully
6336 * ENOMEM: No VPIDs/ASIDs were available. Content of 'vpid' is unchanged.
6337 */
6338 int
vmm_alloc_vpid(uint16_t * vpid)6339 vmm_alloc_vpid(uint16_t *vpid)
6340 {
6341 uint16_t i;
6342 uint8_t idx, bit;
6343 struct vmm_softc *sc = vmm_softc;
6344
6345 rw_enter_write(&vmm_softc->vpid_lock);
6346 for (i = 1; i <= sc->max_vpid; i++) {
6347 idx = i / 8;
6348 bit = i - (idx * 8);
6349
6350 if (!(sc->vpids[idx] & (1 << bit))) {
6351 sc->vpids[idx] |= (1 << bit);
6352 *vpid = i;
6353 DPRINTF("%s: allocated VPID/ASID %d\n", __func__,
6354 i);
6355 rw_exit_write(&vmm_softc->vpid_lock);
6356 return 0;
6357 }
6358 }
6359
6360 printf("%s: no available %ss\n", __func__,
6361 (sc->mode == VMM_MODE_EPT) ? "VPID" :
6362 "ASID");
6363
6364 rw_exit_write(&vmm_softc->vpid_lock);
6365 return ENOMEM;
6366 }
6367
6368 /*
6369 * vmm_free_vpid
6370 *
6371 * Frees the VPID/ASID id supplied in "vpid".
6372 *
6373 * Parameters:
6374 * vpid: VPID/ASID to free.
6375 */
6376 void
vmm_free_vpid(uint16_t vpid)6377 vmm_free_vpid(uint16_t vpid)
6378 {
6379 uint8_t idx, bit;
6380 struct vmm_softc *sc = vmm_softc;
6381
6382 rw_enter_write(&vmm_softc->vpid_lock);
6383 idx = vpid / 8;
6384 bit = vpid - (idx * 8);
6385 sc->vpids[idx] &= ~(1 << bit);
6386
6387 DPRINTF("%s: freed VPID/ASID %d\n", __func__, vpid);
6388 rw_exit_write(&vmm_softc->vpid_lock);
6389 }
6390
6391
6392 /* vmm_gpa_is_valid
6393 *
6394 * Check if the given gpa is within guest memory space.
6395 *
6396 * Parameters:
6397 * vcpu: The virtual cpu we are running on.
6398 * gpa: The address to check.
6399 * obj_size: The size of the object assigned to gpa
6400 *
6401 * Return values:
6402 * 1: gpa is within the memory ranges allocated for the vcpu
6403 * 0: otherwise
6404 */
6405 int
vmm_gpa_is_valid(struct vcpu * vcpu,paddr_t gpa,size_t obj_size)6406 vmm_gpa_is_valid(struct vcpu *vcpu, paddr_t gpa, size_t obj_size)
6407 {
6408 struct vm *vm = vcpu->vc_parent;
6409 struct vm_mem_range *vmr;
6410 size_t i;
6411
6412 for (i = 0; i < vm->vm_nmemranges; ++i) {
6413 vmr = &vm->vm_memranges[i];
6414 if (vmr->vmr_size >= obj_size &&
6415 vmr->vmr_gpa <= gpa &&
6416 gpa < (vmr->vmr_gpa + vmr->vmr_size - obj_size)) {
6417 return 1;
6418 }
6419 }
6420 return 0;
6421 }
6422
6423 void
vmm_init_pvclock(struct vcpu * vcpu,paddr_t gpa)6424 vmm_init_pvclock(struct vcpu *vcpu, paddr_t gpa)
6425 {
6426 paddr_t pvclock_gpa = gpa & 0xFFFFFFFFFFFFFFF0;
6427 if (!vmm_gpa_is_valid(vcpu, pvclock_gpa,
6428 sizeof(struct pvclock_time_info))) {
6429 /* XXX: Kill guest? */
6430 vmm_inject_gp(vcpu);
6431 return;
6432 }
6433
6434 /* XXX: handle case when this struct goes over page boundaries */
6435 if ((pvclock_gpa & PAGE_MASK) + sizeof(struct pvclock_time_info) >
6436 PAGE_SIZE) {
6437 vmm_inject_gp(vcpu);
6438 return;
6439 }
6440
6441 vcpu->vc_pvclock_system_gpa = gpa;
6442 if (tsc_frequency > 0)
6443 vcpu->vc_pvclock_system_tsc_mul =
6444 (int) ((1000000000L << 20) / tsc_frequency);
6445 else
6446 vcpu->vc_pvclock_system_tsc_mul = 0;
6447 vmm_update_pvclock(vcpu);
6448 }
6449
6450 int
vmm_update_pvclock(struct vcpu * vcpu)6451 vmm_update_pvclock(struct vcpu *vcpu)
6452 {
6453 struct pvclock_time_info *pvclock_ti;
6454 struct timespec tv;
6455 struct vm *vm = vcpu->vc_parent;
6456 paddr_t pvclock_hpa, pvclock_gpa;
6457
6458 if (vcpu->vc_pvclock_system_gpa & PVCLOCK_SYSTEM_TIME_ENABLE) {
6459 pvclock_gpa = vcpu->vc_pvclock_system_gpa & 0xFFFFFFFFFFFFFFF0;
6460 if (!pmap_extract(vm->vm_map->pmap, pvclock_gpa, &pvclock_hpa))
6461 return (EINVAL);
6462 pvclock_ti = (void*) PMAP_DIRECT_MAP(pvclock_hpa);
6463
6464 /* START next cycle (must be odd) */
6465 pvclock_ti->ti_version =
6466 (++vcpu->vc_pvclock_version << 1) | 0x1;
6467
6468 pvclock_ti->ti_tsc_timestamp = rdtsc();
6469 nanotime(&tv);
6470 pvclock_ti->ti_system_time =
6471 tv.tv_sec * 1000000000L + tv.tv_nsec;
6472 pvclock_ti->ti_tsc_shift = 12;
6473 pvclock_ti->ti_tsc_to_system_mul =
6474 vcpu->vc_pvclock_system_tsc_mul;
6475 pvclock_ti->ti_flags = PVCLOCK_FLAG_TSC_STABLE;
6476
6477 /* END (must be even) */
6478 pvclock_ti->ti_version &= ~0x1;
6479 }
6480 return (0);
6481 }
6482
6483 int
vmm_pat_is_valid(uint64_t pat)6484 vmm_pat_is_valid(uint64_t pat)
6485 {
6486 int i;
6487 uint8_t *byte = (uint8_t *)&pat;
6488
6489 /* Intel SDM Vol 3A, 11.12.2: 0x02, 0x03, and 0x08-0xFF result in #GP */
6490 for (i = 0; i < 8; i++) {
6491 if (byte[i] == 0x02 || byte[i] == 0x03 || byte[i] > 0x07) {
6492 DPRINTF("%s: invalid pat %llx\n", __func__, pat);
6493 return 0;
6494 }
6495 }
6496
6497 return 1;
6498 }
6499
6500 /*
6501 * vmx_exit_reason_decode
6502 *
6503 * Returns a human readable string describing exit type 'code'
6504 */
6505 const char *
vmx_exit_reason_decode(uint32_t code)6506 vmx_exit_reason_decode(uint32_t code)
6507 {
6508 switch (code) {
6509 case VMX_EXIT_NMI: return "NMI";
6510 case VMX_EXIT_EXTINT: return "External interrupt";
6511 case VMX_EXIT_TRIPLE_FAULT: return "Triple fault";
6512 case VMX_EXIT_INIT: return "INIT signal";
6513 case VMX_EXIT_SIPI: return "SIPI signal";
6514 case VMX_EXIT_IO_SMI: return "I/O SMI";
6515 case VMX_EXIT_OTHER_SMI: return "other SMI";
6516 case VMX_EXIT_INT_WINDOW: return "Interrupt window";
6517 case VMX_EXIT_NMI_WINDOW: return "NMI window";
6518 case VMX_EXIT_TASK_SWITCH: return "Task switch";
6519 case VMX_EXIT_CPUID: return "CPUID instruction";
6520 case VMX_EXIT_GETSEC: return "GETSEC instruction";
6521 case VMX_EXIT_HLT: return "HLT instruction";
6522 case VMX_EXIT_INVD: return "INVD instruction";
6523 case VMX_EXIT_INVLPG: return "INVLPG instruction";
6524 case VMX_EXIT_RDPMC: return "RDPMC instruction";
6525 case VMX_EXIT_RDTSC: return "RDTSC instruction";
6526 case VMX_EXIT_RSM: return "RSM instruction";
6527 case VMX_EXIT_VMCALL: return "VMCALL instruction";
6528 case VMX_EXIT_VMCLEAR: return "VMCLEAR instruction";
6529 case VMX_EXIT_VMLAUNCH: return "VMLAUNCH instruction";
6530 case VMX_EXIT_VMPTRLD: return "VMPTRLD instruction";
6531 case VMX_EXIT_VMPTRST: return "VMPTRST instruction";
6532 case VMX_EXIT_VMREAD: return "VMREAD instruction";
6533 case VMX_EXIT_VMRESUME: return "VMRESUME instruction";
6534 case VMX_EXIT_VMWRITE: return "VMWRITE instruction";
6535 case VMX_EXIT_VMXOFF: return "VMXOFF instruction";
6536 case VMX_EXIT_VMXON: return "VMXON instruction";
6537 case VMX_EXIT_CR_ACCESS: return "CR access";
6538 case VMX_EXIT_MOV_DR: return "MOV DR instruction";
6539 case VMX_EXIT_IO: return "I/O instruction";
6540 case VMX_EXIT_RDMSR: return "RDMSR instruction";
6541 case VMX_EXIT_WRMSR: return "WRMSR instruction";
6542 case VMX_EXIT_ENTRY_FAILED_GUEST_STATE: return "guest state invalid";
6543 case VMX_EXIT_ENTRY_FAILED_MSR_LOAD: return "MSR load failed";
6544 case VMX_EXIT_MWAIT: return "MWAIT instruction";
6545 case VMX_EXIT_MTF: return "monitor trap flag";
6546 case VMX_EXIT_MONITOR: return "MONITOR instruction";
6547 case VMX_EXIT_PAUSE: return "PAUSE instruction";
6548 case VMX_EXIT_ENTRY_FAILED_MCE: return "MCE during entry";
6549 case VMX_EXIT_TPR_BELOW_THRESHOLD: return "TPR below threshold";
6550 case VMX_EXIT_APIC_ACCESS: return "APIC access";
6551 case VMX_EXIT_VIRTUALIZED_EOI: return "virtualized EOI";
6552 case VMX_EXIT_GDTR_IDTR: return "GDTR/IDTR access";
6553 case VMX_EXIT_LDTR_TR: return "LDTR/TR access";
6554 case VMX_EXIT_EPT_VIOLATION: return "EPT violation";
6555 case VMX_EXIT_EPT_MISCONFIGURATION: return "EPT misconfiguration";
6556 case VMX_EXIT_INVEPT: return "INVEPT instruction";
6557 case VMX_EXIT_RDTSCP: return "RDTSCP instruction";
6558 case VMX_EXIT_VMX_PREEMPTION_TIMER_EXPIRED:
6559 return "preemption timer expired";
6560 case VMX_EXIT_INVVPID: return "INVVPID instruction";
6561 case VMX_EXIT_WBINVD: return "WBINVD instruction";
6562 case VMX_EXIT_XSETBV: return "XSETBV instruction";
6563 case VMX_EXIT_APIC_WRITE: return "APIC write";
6564 case VMX_EXIT_RDRAND: return "RDRAND instruction";
6565 case VMX_EXIT_INVPCID: return "INVPCID instruction";
6566 case VMX_EXIT_VMFUNC: return "VMFUNC instruction";
6567 case VMX_EXIT_RDSEED: return "RDSEED instruction";
6568 case VMX_EXIT_XSAVES: return "XSAVES instruction";
6569 case VMX_EXIT_XRSTORS: return "XRSTORS instruction";
6570 default: return "unknown";
6571 }
6572 }
6573
6574 /*
6575 * svm_exit_reason_decode
6576 *
6577 * Returns a human readable string describing exit type 'code'
6578 */
6579 const char *
svm_exit_reason_decode(uint32_t code)6580 svm_exit_reason_decode(uint32_t code)
6581 {
6582 switch (code) {
6583 case SVM_VMEXIT_CR0_READ: return "CR0 read"; /* 0x00 */
6584 case SVM_VMEXIT_CR1_READ: return "CR1 read"; /* 0x01 */
6585 case SVM_VMEXIT_CR2_READ: return "CR2 read"; /* 0x02 */
6586 case SVM_VMEXIT_CR3_READ: return "CR3 read"; /* 0x03 */
6587 case SVM_VMEXIT_CR4_READ: return "CR4 read"; /* 0x04 */
6588 case SVM_VMEXIT_CR5_READ: return "CR5 read"; /* 0x05 */
6589 case SVM_VMEXIT_CR6_READ: return "CR6 read"; /* 0x06 */
6590 case SVM_VMEXIT_CR7_READ: return "CR7 read"; /* 0x07 */
6591 case SVM_VMEXIT_CR8_READ: return "CR8 read"; /* 0x08 */
6592 case SVM_VMEXIT_CR9_READ: return "CR9 read"; /* 0x09 */
6593 case SVM_VMEXIT_CR10_READ: return "CR10 read"; /* 0x0A */
6594 case SVM_VMEXIT_CR11_READ: return "CR11 read"; /* 0x0B */
6595 case SVM_VMEXIT_CR12_READ: return "CR12 read"; /* 0x0C */
6596 case SVM_VMEXIT_CR13_READ: return "CR13 read"; /* 0x0D */
6597 case SVM_VMEXIT_CR14_READ: return "CR14 read"; /* 0x0E */
6598 case SVM_VMEXIT_CR15_READ: return "CR15 read"; /* 0x0F */
6599 case SVM_VMEXIT_CR0_WRITE: return "CR0 write"; /* 0x10 */
6600 case SVM_VMEXIT_CR1_WRITE: return "CR1 write"; /* 0x11 */
6601 case SVM_VMEXIT_CR2_WRITE: return "CR2 write"; /* 0x12 */
6602 case SVM_VMEXIT_CR3_WRITE: return "CR3 write"; /* 0x13 */
6603 case SVM_VMEXIT_CR4_WRITE: return "CR4 write"; /* 0x14 */
6604 case SVM_VMEXIT_CR5_WRITE: return "CR5 write"; /* 0x15 */
6605 case SVM_VMEXIT_CR6_WRITE: return "CR6 write"; /* 0x16 */
6606 case SVM_VMEXIT_CR7_WRITE: return "CR7 write"; /* 0x17 */
6607 case SVM_VMEXIT_CR8_WRITE: return "CR8 write"; /* 0x18 */
6608 case SVM_VMEXIT_CR9_WRITE: return "CR9 write"; /* 0x19 */
6609 case SVM_VMEXIT_CR10_WRITE: return "CR10 write"; /* 0x1A */
6610 case SVM_VMEXIT_CR11_WRITE: return "CR11 write"; /* 0x1B */
6611 case SVM_VMEXIT_CR12_WRITE: return "CR12 write"; /* 0x1C */
6612 case SVM_VMEXIT_CR13_WRITE: return "CR13 write"; /* 0x1D */
6613 case SVM_VMEXIT_CR14_WRITE: return "CR14 write"; /* 0x1E */
6614 case SVM_VMEXIT_CR15_WRITE: return "CR15 write"; /* 0x1F */
6615 case SVM_VMEXIT_DR0_READ: return "DR0 read"; /* 0x20 */
6616 case SVM_VMEXIT_DR1_READ: return "DR1 read"; /* 0x21 */
6617 case SVM_VMEXIT_DR2_READ: return "DR2 read"; /* 0x22 */
6618 case SVM_VMEXIT_DR3_READ: return "DR3 read"; /* 0x23 */
6619 case SVM_VMEXIT_DR4_READ: return "DR4 read"; /* 0x24 */
6620 case SVM_VMEXIT_DR5_READ: return "DR5 read"; /* 0x25 */
6621 case SVM_VMEXIT_DR6_READ: return "DR6 read"; /* 0x26 */
6622 case SVM_VMEXIT_DR7_READ: return "DR7 read"; /* 0x27 */
6623 case SVM_VMEXIT_DR8_READ: return "DR8 read"; /* 0x28 */
6624 case SVM_VMEXIT_DR9_READ: return "DR9 read"; /* 0x29 */
6625 case SVM_VMEXIT_DR10_READ: return "DR10 read"; /* 0x2A */
6626 case SVM_VMEXIT_DR11_READ: return "DR11 read"; /* 0x2B */
6627 case SVM_VMEXIT_DR12_READ: return "DR12 read"; /* 0x2C */
6628 case SVM_VMEXIT_DR13_READ: return "DR13 read"; /* 0x2D */
6629 case SVM_VMEXIT_DR14_READ: return "DR14 read"; /* 0x2E */
6630 case SVM_VMEXIT_DR15_READ: return "DR15 read"; /* 0x2F */
6631 case SVM_VMEXIT_DR0_WRITE: return "DR0 write"; /* 0x30 */
6632 case SVM_VMEXIT_DR1_WRITE: return "DR1 write"; /* 0x31 */
6633 case SVM_VMEXIT_DR2_WRITE: return "DR2 write"; /* 0x32 */
6634 case SVM_VMEXIT_DR3_WRITE: return "DR3 write"; /* 0x33 */
6635 case SVM_VMEXIT_DR4_WRITE: return "DR4 write"; /* 0x34 */
6636 case SVM_VMEXIT_DR5_WRITE: return "DR5 write"; /* 0x35 */
6637 case SVM_VMEXIT_DR6_WRITE: return "DR6 write"; /* 0x36 */
6638 case SVM_VMEXIT_DR7_WRITE: return "DR7 write"; /* 0x37 */
6639 case SVM_VMEXIT_DR8_WRITE: return "DR8 write"; /* 0x38 */
6640 case SVM_VMEXIT_DR9_WRITE: return "DR9 write"; /* 0x39 */
6641 case SVM_VMEXIT_DR10_WRITE: return "DR10 write"; /* 0x3A */
6642 case SVM_VMEXIT_DR11_WRITE: return "DR11 write"; /* 0x3B */
6643 case SVM_VMEXIT_DR12_WRITE: return "DR12 write"; /* 0x3C */
6644 case SVM_VMEXIT_DR13_WRITE: return "DR13 write"; /* 0x3D */
6645 case SVM_VMEXIT_DR14_WRITE: return "DR14 write"; /* 0x3E */
6646 case SVM_VMEXIT_DR15_WRITE: return "DR15 write"; /* 0x3F */
6647 case SVM_VMEXIT_EXCP0: return "Exception 0x00"; /* 0x40 */
6648 case SVM_VMEXIT_EXCP1: return "Exception 0x01"; /* 0x41 */
6649 case SVM_VMEXIT_EXCP2: return "Exception 0x02"; /* 0x42 */
6650 case SVM_VMEXIT_EXCP3: return "Exception 0x03"; /* 0x43 */
6651 case SVM_VMEXIT_EXCP4: return "Exception 0x04"; /* 0x44 */
6652 case SVM_VMEXIT_EXCP5: return "Exception 0x05"; /* 0x45 */
6653 case SVM_VMEXIT_EXCP6: return "Exception 0x06"; /* 0x46 */
6654 case SVM_VMEXIT_EXCP7: return "Exception 0x07"; /* 0x47 */
6655 case SVM_VMEXIT_EXCP8: return "Exception 0x08"; /* 0x48 */
6656 case SVM_VMEXIT_EXCP9: return "Exception 0x09"; /* 0x49 */
6657 case SVM_VMEXIT_EXCP10: return "Exception 0x0A"; /* 0x4A */
6658 case SVM_VMEXIT_EXCP11: return "Exception 0x0B"; /* 0x4B */
6659 case SVM_VMEXIT_EXCP12: return "Exception 0x0C"; /* 0x4C */
6660 case SVM_VMEXIT_EXCP13: return "Exception 0x0D"; /* 0x4D */
6661 case SVM_VMEXIT_EXCP14: return "Exception 0x0E"; /* 0x4E */
6662 case SVM_VMEXIT_EXCP15: return "Exception 0x0F"; /* 0x4F */
6663 case SVM_VMEXIT_EXCP16: return "Exception 0x10"; /* 0x50 */
6664 case SVM_VMEXIT_EXCP17: return "Exception 0x11"; /* 0x51 */
6665 case SVM_VMEXIT_EXCP18: return "Exception 0x12"; /* 0x52 */
6666 case SVM_VMEXIT_EXCP19: return "Exception 0x13"; /* 0x53 */
6667 case SVM_VMEXIT_EXCP20: return "Exception 0x14"; /* 0x54 */
6668 case SVM_VMEXIT_EXCP21: return "Exception 0x15"; /* 0x55 */
6669 case SVM_VMEXIT_EXCP22: return "Exception 0x16"; /* 0x56 */
6670 case SVM_VMEXIT_EXCP23: return "Exception 0x17"; /* 0x57 */
6671 case SVM_VMEXIT_EXCP24: return "Exception 0x18"; /* 0x58 */
6672 case SVM_VMEXIT_EXCP25: return "Exception 0x19"; /* 0x59 */
6673 case SVM_VMEXIT_EXCP26: return "Exception 0x1A"; /* 0x5A */
6674 case SVM_VMEXIT_EXCP27: return "Exception 0x1B"; /* 0x5B */
6675 case SVM_VMEXIT_EXCP28: return "Exception 0x1C"; /* 0x5C */
6676 case SVM_VMEXIT_EXCP29: return "Exception 0x1D"; /* 0x5D */
6677 case SVM_VMEXIT_EXCP30: return "Exception 0x1E"; /* 0x5E */
6678 case SVM_VMEXIT_EXCP31: return "Exception 0x1F"; /* 0x5F */
6679 case SVM_VMEXIT_INTR: return "External interrupt"; /* 0x60 */
6680 case SVM_VMEXIT_NMI: return "NMI"; /* 0x61 */
6681 case SVM_VMEXIT_SMI: return "SMI"; /* 0x62 */
6682 case SVM_VMEXIT_INIT: return "INIT"; /* 0x63 */
6683 case SVM_VMEXIT_VINTR: return "Interrupt window"; /* 0x64 */
6684 case SVM_VMEXIT_CR0_SEL_WRITE: return "Sel CR0 write"; /* 0x65 */
6685 case SVM_VMEXIT_IDTR_READ: return "IDTR read"; /* 0x66 */
6686 case SVM_VMEXIT_GDTR_READ: return "GDTR read"; /* 0x67 */
6687 case SVM_VMEXIT_LDTR_READ: return "LDTR read"; /* 0x68 */
6688 case SVM_VMEXIT_TR_READ: return "TR read"; /* 0x69 */
6689 case SVM_VMEXIT_IDTR_WRITE: return "IDTR write"; /* 0x6A */
6690 case SVM_VMEXIT_GDTR_WRITE: return "GDTR write"; /* 0x6B */
6691 case SVM_VMEXIT_LDTR_WRITE: return "LDTR write"; /* 0x6C */
6692 case SVM_VMEXIT_TR_WRITE: return "TR write"; /* 0x6D */
6693 case SVM_VMEXIT_RDTSC: return "RDTSC instruction"; /* 0x6E */
6694 case SVM_VMEXIT_RDPMC: return "RDPMC instruction"; /* 0x6F */
6695 case SVM_VMEXIT_PUSHF: return "PUSHF instruction"; /* 0x70 */
6696 case SVM_VMEXIT_POPF: return "POPF instruction"; /* 0x71 */
6697 case SVM_VMEXIT_CPUID: return "CPUID instruction"; /* 0x72 */
6698 case SVM_VMEXIT_RSM: return "RSM instruction"; /* 0x73 */
6699 case SVM_VMEXIT_IRET: return "IRET instruction"; /* 0x74 */
6700 case SVM_VMEXIT_SWINT: return "SWINT instruction"; /* 0x75 */
6701 case SVM_VMEXIT_INVD: return "INVD instruction"; /* 0x76 */
6702 case SVM_VMEXIT_PAUSE: return "PAUSE instruction"; /* 0x77 */
6703 case SVM_VMEXIT_HLT: return "HLT instruction"; /* 0x78 */
6704 case SVM_VMEXIT_INVLPG: return "INVLPG instruction"; /* 0x79 */
6705 case SVM_VMEXIT_INVLPGA: return "INVLPGA instruction"; /* 0x7A */
6706 case SVM_VMEXIT_IOIO: return "I/O instruction"; /* 0x7B */
6707 case SVM_VMEXIT_MSR: return "RDMSR/WRMSR instruction"; /* 0x7C */
6708 case SVM_VMEXIT_TASK_SWITCH: return "Task switch"; /* 0x7D */
6709 case SVM_VMEXIT_FERR_FREEZE: return "FERR_FREEZE"; /* 0x7E */
6710 case SVM_VMEXIT_SHUTDOWN: return "Triple fault"; /* 0x7F */
6711 case SVM_VMEXIT_VMRUN: return "VMRUN instruction"; /* 0x80 */
6712 case SVM_VMEXIT_VMMCALL: return "VMMCALL instruction"; /* 0x81 */
6713 case SVM_VMEXIT_VMLOAD: return "VMLOAD instruction"; /* 0x82 */
6714 case SVM_VMEXIT_VMSAVE: return "VMSAVE instruction"; /* 0x83 */
6715 case SVM_VMEXIT_STGI: return "STGI instruction"; /* 0x84 */
6716 case SVM_VMEXIT_CLGI: return "CLGI instruction"; /* 0x85 */
6717 case SVM_VMEXIT_SKINIT: return "SKINIT instruction"; /* 0x86 */
6718 case SVM_VMEXIT_RDTSCP: return "RDTSCP instruction"; /* 0x87 */
6719 case SVM_VMEXIT_ICEBP: return "ICEBP instruction"; /* 0x88 */
6720 case SVM_VMEXIT_WBINVD: return "WBINVD instruction"; /* 0x89 */
6721 case SVM_VMEXIT_MONITOR: return "MONITOR instruction"; /* 0x8A */
6722 case SVM_VMEXIT_MWAIT: return "MWAIT instruction"; /* 0x8B */
6723 case SVM_VMEXIT_MWAIT_CONDITIONAL: return "Cond MWAIT"; /* 0x8C */
6724 case SVM_VMEXIT_NPF: return "NPT violation"; /* 0x400 */
6725 default: return "unknown";
6726 }
6727 }
6728
6729 /*
6730 * vmx_instruction_error_decode
6731 *
6732 * Returns a human readable string describing the instruction error in 'code'
6733 */
6734 const char *
vmx_instruction_error_decode(uint32_t code)6735 vmx_instruction_error_decode(uint32_t code)
6736 {
6737 switch (code) {
6738 case 1: return "VMCALL: unsupported in VMX root";
6739 case 2: return "VMCLEAR: invalid paddr";
6740 case 3: return "VMCLEAR: VMXON pointer";
6741 case 4: return "VMLAUNCH: non-clear VMCS";
6742 case 5: return "VMRESUME: non-launched VMCS";
6743 case 6: return "VMRESUME: executed after VMXOFF";
6744 case 7: return "VM entry: invalid control field(s)";
6745 case 8: return "VM entry: invalid host state field(s)";
6746 case 9: return "VMPTRLD: invalid paddr";
6747 case 10: return "VMPTRLD: VMXON pointer";
6748 case 11: return "VMPTRLD: incorrect VMCS revid";
6749 case 12: return "VMREAD/VMWRITE: unsupported VMCS field";
6750 case 13: return "VMWRITE: RO VMCS field";
6751 case 15: return "VMXON: unsupported in VMX root";
6752 case 20: return "VMCALL: invalid VM exit control fields";
6753 case 26: return "VM entry: blocked by MOV SS";
6754 case 28: return "Invalid operand to INVEPT/INVVPID";
6755 case 0x80000021: return "VM entry: invalid guest state";
6756 case 0x80000022: return "VM entry: failure due to MSR loading";
6757 case 0x80000029: return "VM entry: machine-check event";
6758 default: return "unknown";
6759 }
6760 }
6761
6762 /*
6763 * vcpu_state_decode
6764 *
6765 * Returns a human readable string describing the vcpu state in 'state'.
6766 */
6767 const char *
vcpu_state_decode(u_int state)6768 vcpu_state_decode(u_int state)
6769 {
6770 switch (state) {
6771 case VCPU_STATE_STOPPED: return "stopped";
6772 case VCPU_STATE_RUNNING: return "running";
6773 case VCPU_STATE_REQTERM: return "requesting termination";
6774 case VCPU_STATE_TERMINATED: return "terminated";
6775 case VCPU_STATE_UNKNOWN: return "unknown";
6776 default: return "invalid";
6777 }
6778 }
6779
6780 #ifdef VMM_DEBUG
6781 /*
6782 * dump_vcpu
6783 *
6784 * Dumps the VMX capabilities of vcpu 'vcpu'
6785 */
6786 void
dump_vcpu(struct vcpu * vcpu)6787 dump_vcpu(struct vcpu *vcpu)
6788 {
6789 printf("vcpu @ %p\n", vcpu);
6790 printf(" parent vm @ %p\n", vcpu->vc_parent);
6791 printf(" mode: ");
6792 if (vcpu->vc_virt_mode == VMM_MODE_EPT) {
6793 printf("VMX\n");
6794 printf(" pinbased ctls: 0x%llx\n",
6795 vcpu->vc_vmx_pinbased_ctls);
6796 printf(" true pinbased ctls: 0x%llx\n",
6797 vcpu->vc_vmx_true_pinbased_ctls);
6798 CTRL_DUMP(vcpu, PINBASED, EXTERNAL_INT_EXITING);
6799 CTRL_DUMP(vcpu, PINBASED, NMI_EXITING);
6800 CTRL_DUMP(vcpu, PINBASED, VIRTUAL_NMIS);
6801 CTRL_DUMP(vcpu, PINBASED, ACTIVATE_VMX_PREEMPTION_TIMER);
6802 CTRL_DUMP(vcpu, PINBASED, PROCESS_POSTED_INTERRUPTS);
6803 printf(" procbased ctls: 0x%llx\n",
6804 vcpu->vc_vmx_procbased_ctls);
6805 printf(" true procbased ctls: 0x%llx\n",
6806 vcpu->vc_vmx_true_procbased_ctls);
6807 CTRL_DUMP(vcpu, PROCBASED, INTERRUPT_WINDOW_EXITING);
6808 CTRL_DUMP(vcpu, PROCBASED, USE_TSC_OFFSETTING);
6809 CTRL_DUMP(vcpu, PROCBASED, HLT_EXITING);
6810 CTRL_DUMP(vcpu, PROCBASED, INVLPG_EXITING);
6811 CTRL_DUMP(vcpu, PROCBASED, MWAIT_EXITING);
6812 CTRL_DUMP(vcpu, PROCBASED, RDPMC_EXITING);
6813 CTRL_DUMP(vcpu, PROCBASED, RDTSC_EXITING);
6814 CTRL_DUMP(vcpu, PROCBASED, CR3_LOAD_EXITING);
6815 CTRL_DUMP(vcpu, PROCBASED, CR3_STORE_EXITING);
6816 CTRL_DUMP(vcpu, PROCBASED, CR8_LOAD_EXITING);
6817 CTRL_DUMP(vcpu, PROCBASED, CR8_STORE_EXITING);
6818 CTRL_DUMP(vcpu, PROCBASED, USE_TPR_SHADOW);
6819 CTRL_DUMP(vcpu, PROCBASED, NMI_WINDOW_EXITING);
6820 CTRL_DUMP(vcpu, PROCBASED, MOV_DR_EXITING);
6821 CTRL_DUMP(vcpu, PROCBASED, UNCONDITIONAL_IO_EXITING);
6822 CTRL_DUMP(vcpu, PROCBASED, USE_IO_BITMAPS);
6823 CTRL_DUMP(vcpu, PROCBASED, MONITOR_TRAP_FLAG);
6824 CTRL_DUMP(vcpu, PROCBASED, USE_MSR_BITMAPS);
6825 CTRL_DUMP(vcpu, PROCBASED, MONITOR_EXITING);
6826 CTRL_DUMP(vcpu, PROCBASED, PAUSE_EXITING);
6827 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS,
6828 IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1)) {
6829 printf(" procbased2 ctls: 0x%llx\n",
6830 vcpu->vc_vmx_procbased2_ctls);
6831 CTRL_DUMP(vcpu, PROCBASED2, VIRTUALIZE_APIC);
6832 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_EPT);
6833 CTRL_DUMP(vcpu, PROCBASED2, DESCRIPTOR_TABLE_EXITING);
6834 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_RDTSCP);
6835 CTRL_DUMP(vcpu, PROCBASED2, VIRTUALIZE_X2APIC_MODE);
6836 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_VPID);
6837 CTRL_DUMP(vcpu, PROCBASED2, WBINVD_EXITING);
6838 CTRL_DUMP(vcpu, PROCBASED2, UNRESTRICTED_GUEST);
6839 CTRL_DUMP(vcpu, PROCBASED2,
6840 APIC_REGISTER_VIRTUALIZATION);
6841 CTRL_DUMP(vcpu, PROCBASED2,
6842 VIRTUAL_INTERRUPT_DELIVERY);
6843 CTRL_DUMP(vcpu, PROCBASED2, PAUSE_LOOP_EXITING);
6844 CTRL_DUMP(vcpu, PROCBASED2, RDRAND_EXITING);
6845 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_INVPCID);
6846 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_VM_FUNCTIONS);
6847 CTRL_DUMP(vcpu, PROCBASED2, VMCS_SHADOWING);
6848 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_ENCLS_EXITING);
6849 CTRL_DUMP(vcpu, PROCBASED2, RDSEED_EXITING);
6850 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_PML);
6851 CTRL_DUMP(vcpu, PROCBASED2, EPT_VIOLATION_VE);
6852 CTRL_DUMP(vcpu, PROCBASED2, CONCEAL_VMX_FROM_PT);
6853 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_XSAVES_XRSTORS);
6854 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_TSC_SCALING);
6855 }
6856 printf(" entry ctls: 0x%llx\n",
6857 vcpu->vc_vmx_entry_ctls);
6858 printf(" true entry ctls: 0x%llx\n",
6859 vcpu->vc_vmx_true_entry_ctls);
6860 CTRL_DUMP(vcpu, ENTRY, LOAD_DEBUG_CONTROLS);
6861 CTRL_DUMP(vcpu, ENTRY, IA32E_MODE_GUEST);
6862 CTRL_DUMP(vcpu, ENTRY, ENTRY_TO_SMM);
6863 CTRL_DUMP(vcpu, ENTRY, DEACTIVATE_DUAL_MONITOR_TREATMENT);
6864 CTRL_DUMP(vcpu, ENTRY, LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY);
6865 CTRL_DUMP(vcpu, ENTRY, LOAD_IA32_PAT_ON_ENTRY);
6866 CTRL_DUMP(vcpu, ENTRY, LOAD_IA32_EFER_ON_ENTRY);
6867 CTRL_DUMP(vcpu, ENTRY, LOAD_IA32_BNDCFGS_ON_ENTRY);
6868 CTRL_DUMP(vcpu, ENTRY, CONCEAL_VM_ENTRIES_FROM_PT);
6869 printf(" exit ctls: 0x%llx\n",
6870 vcpu->vc_vmx_exit_ctls);
6871 printf(" true exit ctls: 0x%llx\n",
6872 vcpu->vc_vmx_true_exit_ctls);
6873 CTRL_DUMP(vcpu, EXIT, SAVE_DEBUG_CONTROLS);
6874 CTRL_DUMP(vcpu, EXIT, HOST_SPACE_ADDRESS_SIZE);
6875 CTRL_DUMP(vcpu, EXIT, LOAD_IA32_PERF_GLOBAL_CTRL_ON_EXIT);
6876 CTRL_DUMP(vcpu, EXIT, ACKNOWLEDGE_INTERRUPT_ON_EXIT);
6877 CTRL_DUMP(vcpu, EXIT, SAVE_IA32_PAT_ON_EXIT);
6878 CTRL_DUMP(vcpu, EXIT, LOAD_IA32_PAT_ON_EXIT);
6879 CTRL_DUMP(vcpu, EXIT, SAVE_IA32_EFER_ON_EXIT);
6880 CTRL_DUMP(vcpu, EXIT, LOAD_IA32_EFER_ON_EXIT);
6881 CTRL_DUMP(vcpu, EXIT, SAVE_VMX_PREEMPTION_TIMER);
6882 CTRL_DUMP(vcpu, EXIT, CLEAR_IA32_BNDCFGS_ON_EXIT);
6883 CTRL_DUMP(vcpu, EXIT, CONCEAL_VM_EXITS_FROM_PT);
6884 }
6885 }
6886
6887 /*
6888 * vmx_dump_vmcs_field
6889 *
6890 * Debug function to dump the contents of a single VMCS field
6891 *
6892 * Parameters:
6893 * fieldid: VMCS Field ID
6894 * msg: string to display
6895 */
6896 void
vmx_dump_vmcs_field(uint16_t fieldid,const char * msg)6897 vmx_dump_vmcs_field(uint16_t fieldid, const char *msg)
6898 {
6899 uint8_t width;
6900 uint64_t val;
6901
6902
6903 DPRINTF("%s (0x%04x): ", msg, fieldid);
6904 if (vmread(fieldid, &val))
6905 DPRINTF("???? ");
6906 else {
6907 /*
6908 * Field width encoding : bits 13:14
6909 *
6910 * 0: 16-bit
6911 * 1: 64-bit
6912 * 2: 32-bit
6913 * 3: natural width
6914 */
6915 width = (fieldid >> 13) & 0x3;
6916 switch (width) {
6917 case 0: DPRINTF("0x%04llx ", val); break;
6918 case 1:
6919 case 3: DPRINTF("0x%016llx ", val); break;
6920 case 2: DPRINTF("0x%08llx ", val);
6921 }
6922 }
6923 }
6924
6925 /*
6926 * vmx_dump_vmcs
6927 *
6928 * Debug function to dump the contents of the current VMCS.
6929 */
6930 void
vmx_dump_vmcs(struct vcpu * vcpu)6931 vmx_dump_vmcs(struct vcpu *vcpu)
6932 {
6933 int has_sec, i;
6934 uint32_t cr3_tgt_ct;
6935
6936 /* XXX save and load new vmcs, restore at end */
6937
6938 DPRINTF("--CURRENT VMCS STATE--\n");
6939 printf("VMCS launched: %s\n",
6940 (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED) ? "Yes" : "No");
6941 DPRINTF("VMXON revision : 0x%x\n",
6942 curcpu()->ci_vmm_cap.vcc_vmx.vmx_vmxon_revision);
6943 DPRINTF("CR0 fixed0: 0x%llx\n",
6944 curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0);
6945 DPRINTF("CR0 fixed1: 0x%llx\n",
6946 curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1);
6947 DPRINTF("CR4 fixed0: 0x%llx\n",
6948 curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0);
6949 DPRINTF("CR4 fixed1: 0x%llx\n",
6950 curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1);
6951 DPRINTF("MSR table size: 0x%x\n",
6952 512 * (curcpu()->ci_vmm_cap.vcc_vmx.vmx_msr_table_size + 1));
6953
6954 has_sec = vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS,
6955 IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1);
6956
6957 if (has_sec) {
6958 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
6959 IA32_VMX_ENABLE_VPID, 1)) {
6960 vmx_dump_vmcs_field(VMCS_GUEST_VPID, "VPID");
6961 }
6962 }
6963
6964 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PINBASED_CTLS,
6965 IA32_VMX_PROCESS_POSTED_INTERRUPTS, 1)) {
6966 vmx_dump_vmcs_field(VMCS_POSTED_INT_NOTIF_VECTOR,
6967 "Posted Int Notif Vec");
6968 }
6969
6970 if (has_sec) {
6971 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
6972 IA32_VMX_EPT_VIOLATION_VE, 1)) {
6973 vmx_dump_vmcs_field(VMCS_EPTP_INDEX, "EPTP idx");
6974 }
6975 }
6976
6977 DPRINTF("\n");
6978 vmx_dump_vmcs_field(VMCS_GUEST_IA32_ES_SEL, "G.ES");
6979 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CS_SEL, "G.CS");
6980 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SS_SEL, "G.SS");
6981 DPRINTF("\n");
6982 vmx_dump_vmcs_field(VMCS_GUEST_IA32_DS_SEL, "G.DS");
6983 vmx_dump_vmcs_field(VMCS_GUEST_IA32_FS_SEL, "G.FS");
6984 vmx_dump_vmcs_field(VMCS_GUEST_IA32_GS_SEL, "G.GS");
6985 DPRINTF("\n");
6986 vmx_dump_vmcs_field(VMCS_GUEST_IA32_LDTR_SEL, "LDTR");
6987 vmx_dump_vmcs_field(VMCS_GUEST_IA32_TR_SEL, "G.TR");
6988
6989 if (has_sec) {
6990 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
6991 IA32_VMX_VIRTUAL_INTERRUPT_DELIVERY, 1)) {
6992 vmx_dump_vmcs_field(VMCS_GUEST_INTERRUPT_STATUS,
6993 "Int sts");
6994 }
6995
6996 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
6997 IA32_VMX_ENABLE_PML, 1)) {
6998 vmx_dump_vmcs_field(VMCS_GUEST_PML_INDEX, "PML Idx");
6999 }
7000 }
7001
7002 DPRINTF("\n");
7003 vmx_dump_vmcs_field(VMCS_HOST_IA32_ES_SEL, "H.ES");
7004 vmx_dump_vmcs_field(VMCS_HOST_IA32_CS_SEL, "H.CS");
7005 vmx_dump_vmcs_field(VMCS_HOST_IA32_SS_SEL, "H.SS");
7006 DPRINTF("\n");
7007 vmx_dump_vmcs_field(VMCS_HOST_IA32_DS_SEL, "H.DS");
7008 vmx_dump_vmcs_field(VMCS_HOST_IA32_FS_SEL, "H.FS");
7009 vmx_dump_vmcs_field(VMCS_HOST_IA32_GS_SEL, "H.GS");
7010 DPRINTF("\n");
7011
7012 vmx_dump_vmcs_field(VMCS_IO_BITMAP_A, "I/O Bitmap A");
7013 DPRINTF("\n");
7014 vmx_dump_vmcs_field(VMCS_IO_BITMAP_B, "I/O Bitmap B");
7015 DPRINTF("\n");
7016
7017 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS,
7018 IA32_VMX_USE_MSR_BITMAPS, 1)) {
7019 vmx_dump_vmcs_field(VMCS_MSR_BITMAP_ADDRESS, "MSR Bitmap");
7020 DPRINTF("\n");
7021 }
7022
7023 vmx_dump_vmcs_field(VMCS_EXIT_STORE_MSR_ADDRESS, "Exit Store MSRs");
7024 DPRINTF("\n");
7025 vmx_dump_vmcs_field(VMCS_EXIT_LOAD_MSR_ADDRESS, "Exit Load MSRs");
7026 DPRINTF("\n");
7027 vmx_dump_vmcs_field(VMCS_ENTRY_LOAD_MSR_ADDRESS, "Entry Load MSRs");
7028 DPRINTF("\n");
7029 vmx_dump_vmcs_field(VMCS_EXECUTIVE_VMCS_POINTER, "Exec VMCS Ptr");
7030 DPRINTF("\n");
7031
7032 if (has_sec) {
7033 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7034 IA32_VMX_ENABLE_PML, 1)) {
7035 vmx_dump_vmcs_field(VMCS_PML_ADDRESS, "PML Addr");
7036 DPRINTF("\n");
7037 }
7038 }
7039
7040 vmx_dump_vmcs_field(VMCS_TSC_OFFSET, "TSC Offset");
7041 DPRINTF("\n");
7042
7043 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS,
7044 IA32_VMX_USE_TPR_SHADOW, 1)) {
7045 vmx_dump_vmcs_field(VMCS_VIRTUAL_APIC_ADDRESS,
7046 "Virtual APIC Addr");
7047 DPRINTF("\n");
7048 }
7049
7050 if (has_sec) {
7051 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7052 IA32_VMX_VIRTUALIZE_APIC, 1)) {
7053 vmx_dump_vmcs_field(VMCS_APIC_ACCESS_ADDRESS,
7054 "APIC Access Addr");
7055 DPRINTF("\n");
7056 }
7057 }
7058
7059 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PINBASED_CTLS,
7060 IA32_VMX_PROCESS_POSTED_INTERRUPTS, 1)) {
7061 vmx_dump_vmcs_field(VMCS_POSTED_INTERRUPT_DESC,
7062 "Posted Int Desc Addr");
7063 DPRINTF("\n");
7064 }
7065
7066 if (has_sec) {
7067 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7068 IA32_VMX_ENABLE_VM_FUNCTIONS, 1)) {
7069 vmx_dump_vmcs_field(VMCS_VM_FUNCTION_CONTROLS,
7070 "VM Function Controls");
7071 DPRINTF("\n");
7072 }
7073
7074 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7075 IA32_VMX_ENABLE_EPT, 1)) {
7076 vmx_dump_vmcs_field(VMCS_GUEST_IA32_EPTP,
7077 "EPT Pointer");
7078 DPRINTF("\n");
7079 }
7080
7081 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7082 IA32_VMX_VIRTUAL_INTERRUPT_DELIVERY, 1)) {
7083 vmx_dump_vmcs_field(VMCS_EOI_EXIT_BITMAP_0,
7084 "EOI Exit Bitmap 0");
7085 DPRINTF("\n");
7086 vmx_dump_vmcs_field(VMCS_EOI_EXIT_BITMAP_1,
7087 "EOI Exit Bitmap 1");
7088 DPRINTF("\n");
7089 vmx_dump_vmcs_field(VMCS_EOI_EXIT_BITMAP_2,
7090 "EOI Exit Bitmap 2");
7091 DPRINTF("\n");
7092 vmx_dump_vmcs_field(VMCS_EOI_EXIT_BITMAP_3,
7093 "EOI Exit Bitmap 3");
7094 DPRINTF("\n");
7095 }
7096
7097 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7098 IA32_VMX_VMCS_SHADOWING, 1)) {
7099 vmx_dump_vmcs_field(VMCS_VMREAD_BITMAP_ADDRESS,
7100 "VMREAD Bitmap Addr");
7101 DPRINTF("\n");
7102 vmx_dump_vmcs_field(VMCS_VMWRITE_BITMAP_ADDRESS,
7103 "VMWRITE Bitmap Addr");
7104 DPRINTF("\n");
7105 }
7106
7107 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7108 IA32_VMX_EPT_VIOLATION_VE, 1)) {
7109 vmx_dump_vmcs_field(VMCS_VIRTUALIZATION_EXC_ADDRESS,
7110 "#VE Addr");
7111 DPRINTF("\n");
7112 }
7113
7114 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7115 IA32_VMX_ENABLE_XSAVES_XRSTORS, 1)) {
7116 vmx_dump_vmcs_field(VMCS_XSS_EXITING_BITMAP,
7117 "XSS exiting bitmap addr");
7118 DPRINTF("\n");
7119 }
7120
7121 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7122 IA32_VMX_ENABLE_ENCLS_EXITING, 1)) {
7123 vmx_dump_vmcs_field(VMCS_ENCLS_EXITING_BITMAP,
7124 "Encls exiting bitmap addr");
7125 DPRINTF("\n");
7126 }
7127
7128 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7129 IA32_VMX_ENABLE_TSC_SCALING, 1)) {
7130 vmx_dump_vmcs_field(VMCS_TSC_MULTIPLIER,
7131 "TSC scaling factor");
7132 DPRINTF("\n");
7133 }
7134
7135 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7136 IA32_VMX_ENABLE_EPT, 1)) {
7137 vmx_dump_vmcs_field(VMCS_GUEST_PHYSICAL_ADDRESS,
7138 "Guest PA");
7139 DPRINTF("\n");
7140 }
7141 }
7142
7143 vmx_dump_vmcs_field(VMCS_LINK_POINTER, "VMCS Link Pointer");
7144 DPRINTF("\n");
7145 vmx_dump_vmcs_field(VMCS_GUEST_IA32_DEBUGCTL, "Guest DEBUGCTL");
7146 DPRINTF("\n");
7147
7148 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_ENTRY_CTLS,
7149 IA32_VMX_LOAD_IA32_PAT_ON_ENTRY, 1) ||
7150 vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS,
7151 IA32_VMX_SAVE_IA32_PAT_ON_EXIT, 1)) {
7152 vmx_dump_vmcs_field(VMCS_GUEST_IA32_PAT,
7153 "Guest PAT");
7154 DPRINTF("\n");
7155 }
7156
7157 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_ENTRY_CTLS,
7158 IA32_VMX_LOAD_IA32_EFER_ON_ENTRY, 1) ||
7159 vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS,
7160 IA32_VMX_SAVE_IA32_EFER_ON_EXIT, 1)) {
7161 vmx_dump_vmcs_field(VMCS_GUEST_IA32_EFER,
7162 "Guest EFER");
7163 DPRINTF("\n");
7164 }
7165
7166 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_ENTRY_CTLS,
7167 IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY, 1)) {
7168 vmx_dump_vmcs_field(VMCS_GUEST_IA32_PERF_GBL_CTRL,
7169 "Guest Perf Global Ctrl");
7170 DPRINTF("\n");
7171 }
7172
7173 if (has_sec) {
7174 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7175 IA32_VMX_ENABLE_EPT, 1)) {
7176 vmx_dump_vmcs_field(VMCS_GUEST_PDPTE0, "Guest PDPTE0");
7177 DPRINTF("\n");
7178 vmx_dump_vmcs_field(VMCS_GUEST_PDPTE1, "Guest PDPTE1");
7179 DPRINTF("\n");
7180 vmx_dump_vmcs_field(VMCS_GUEST_PDPTE2, "Guest PDPTE2");
7181 DPRINTF("\n");
7182 vmx_dump_vmcs_field(VMCS_GUEST_PDPTE3, "Guest PDPTE3");
7183 DPRINTF("\n");
7184 }
7185 }
7186
7187 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_ENTRY_CTLS,
7188 IA32_VMX_LOAD_IA32_BNDCFGS_ON_ENTRY, 1) ||
7189 vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS,
7190 IA32_VMX_CLEAR_IA32_BNDCFGS_ON_EXIT, 1)) {
7191 vmx_dump_vmcs_field(VMCS_GUEST_IA32_BNDCFGS,
7192 "Guest BNDCFGS");
7193 DPRINTF("\n");
7194 }
7195
7196 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS,
7197 IA32_VMX_LOAD_IA32_PAT_ON_EXIT, 1)) {
7198 vmx_dump_vmcs_field(VMCS_HOST_IA32_PAT,
7199 "Host PAT");
7200 DPRINTF("\n");
7201 }
7202
7203 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS,
7204 IA32_VMX_LOAD_IA32_EFER_ON_EXIT, 1)) {
7205 vmx_dump_vmcs_field(VMCS_HOST_IA32_EFER,
7206 "Host EFER");
7207 DPRINTF("\n");
7208 }
7209
7210 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS,
7211 IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_EXIT, 1)) {
7212 vmx_dump_vmcs_field(VMCS_HOST_IA32_PERF_GBL_CTRL,
7213 "Host Perf Global Ctrl");
7214 DPRINTF("\n");
7215 }
7216
7217 vmx_dump_vmcs_field(VMCS_PINBASED_CTLS, "Pinbased Ctrls");
7218 vmx_dump_vmcs_field(VMCS_PROCBASED_CTLS, "Procbased Ctrls");
7219 DPRINTF("\n");
7220 vmx_dump_vmcs_field(VMCS_EXCEPTION_BITMAP, "Exception Bitmap");
7221 vmx_dump_vmcs_field(VMCS_PF_ERROR_CODE_MASK, "#PF Err Code Mask");
7222 DPRINTF("\n");
7223 vmx_dump_vmcs_field(VMCS_PF_ERROR_CODE_MATCH, "#PF Err Code Match");
7224 vmx_dump_vmcs_field(VMCS_CR3_TARGET_COUNT, "CR3 Tgt Count");
7225 DPRINTF("\n");
7226 vmx_dump_vmcs_field(VMCS_EXIT_CTLS, "Exit Ctrls");
7227 vmx_dump_vmcs_field(VMCS_EXIT_MSR_STORE_COUNT, "Exit MSR Store Ct");
7228 DPRINTF("\n");
7229 vmx_dump_vmcs_field(VMCS_EXIT_MSR_LOAD_COUNT, "Exit MSR Load Ct");
7230 vmx_dump_vmcs_field(VMCS_ENTRY_CTLS, "Entry Ctrls");
7231 DPRINTF("\n");
7232 vmx_dump_vmcs_field(VMCS_ENTRY_MSR_LOAD_COUNT, "Entry MSR Load Ct");
7233 vmx_dump_vmcs_field(VMCS_ENTRY_INTERRUPTION_INFO, "Entry Int. Info");
7234 DPRINTF("\n");
7235 vmx_dump_vmcs_field(VMCS_ENTRY_EXCEPTION_ERROR_CODE,
7236 "Entry Ex. Err Code");
7237 vmx_dump_vmcs_field(VMCS_ENTRY_INSTRUCTION_LENGTH, "Entry Insn Len");
7238 DPRINTF("\n");
7239
7240 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS,
7241 IA32_VMX_USE_TPR_SHADOW, 1)) {
7242 vmx_dump_vmcs_field(VMCS_TPR_THRESHOLD, "TPR Threshold");
7243 DPRINTF("\n");
7244 }
7245
7246 if (has_sec) {
7247 vmx_dump_vmcs_field(VMCS_PROCBASED2_CTLS, "2ndary Ctrls");
7248 DPRINTF("\n");
7249 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
7250 IA32_VMX_PAUSE_LOOP_EXITING, 1)) {
7251 vmx_dump_vmcs_field(VMCS_PLE_GAP, "PLE Gap");
7252 vmx_dump_vmcs_field(VMCS_PLE_WINDOW, "PLE Window");
7253 }
7254 DPRINTF("\n");
7255 }
7256
7257 vmx_dump_vmcs_field(VMCS_INSTRUCTION_ERROR, "Insn Error");
7258 vmx_dump_vmcs_field(VMCS_EXIT_REASON, "Exit Reason");
7259 DPRINTF("\n");
7260
7261 vmx_dump_vmcs_field(VMCS_EXIT_INTERRUPTION_INFO, "Exit Int. Info");
7262 vmx_dump_vmcs_field(VMCS_EXIT_INTERRUPTION_ERR_CODE,
7263 "Exit Int. Err Code");
7264 DPRINTF("\n");
7265
7266 vmx_dump_vmcs_field(VMCS_IDT_VECTORING_INFO, "IDT vect info");
7267 vmx_dump_vmcs_field(VMCS_IDT_VECTORING_ERROR_CODE,
7268 "IDT vect err code");
7269 DPRINTF("\n");
7270
7271 vmx_dump_vmcs_field(VMCS_INSTRUCTION_LENGTH, "Insn Len");
7272 vmx_dump_vmcs_field(VMCS_EXIT_INSTRUCTION_INFO, "Exit Insn Info");
7273 DPRINTF("\n");
7274
7275 vmx_dump_vmcs_field(VMCS_GUEST_IA32_ES_LIMIT, "G. ES Lim");
7276 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CS_LIMIT, "G. CS Lim");
7277 DPRINTF("\n");
7278
7279 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SS_LIMIT, "G. SS Lim");
7280 vmx_dump_vmcs_field(VMCS_GUEST_IA32_DS_LIMIT, "G. DS Lim");
7281 DPRINTF("\n");
7282
7283 vmx_dump_vmcs_field(VMCS_GUEST_IA32_FS_LIMIT, "G. FS Lim");
7284 vmx_dump_vmcs_field(VMCS_GUEST_IA32_GS_LIMIT, "G. GS Lim");
7285 DPRINTF("\n");
7286
7287 vmx_dump_vmcs_field(VMCS_GUEST_IA32_LDTR_LIMIT, "G. LDTR Lim");
7288 vmx_dump_vmcs_field(VMCS_GUEST_IA32_TR_LIMIT, "G. TR Lim");
7289 DPRINTF("\n");
7290
7291 vmx_dump_vmcs_field(VMCS_GUEST_IA32_GDTR_LIMIT, "G. GDTR Lim");
7292 vmx_dump_vmcs_field(VMCS_GUEST_IA32_IDTR_LIMIT, "G. IDTR Lim");
7293 DPRINTF("\n");
7294
7295 vmx_dump_vmcs_field(VMCS_GUEST_IA32_ES_AR, "G. ES AR");
7296 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CS_AR, "G. CS AR");
7297 DPRINTF("\n");
7298
7299 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SS_AR, "G. SS AR");
7300 vmx_dump_vmcs_field(VMCS_GUEST_IA32_DS_AR, "G. DS AR");
7301 DPRINTF("\n");
7302
7303 vmx_dump_vmcs_field(VMCS_GUEST_IA32_FS_AR, "G. FS AR");
7304 vmx_dump_vmcs_field(VMCS_GUEST_IA32_GS_AR, "G. GS AR");
7305 DPRINTF("\n");
7306
7307 vmx_dump_vmcs_field(VMCS_GUEST_IA32_LDTR_AR, "G. LDTR AR");
7308 vmx_dump_vmcs_field(VMCS_GUEST_IA32_TR_AR, "G. TR AR");
7309 DPRINTF("\n");
7310
7311 vmx_dump_vmcs_field(VMCS_GUEST_INTERRUPTIBILITY_ST, "G. Int St.");
7312 vmx_dump_vmcs_field(VMCS_GUEST_ACTIVITY_STATE, "G. Act St.");
7313 DPRINTF("\n");
7314
7315 vmx_dump_vmcs_field(VMCS_GUEST_SMBASE, "G. SMBASE");
7316 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SYSENTER_CS, "G. SYSENTER CS");
7317 DPRINTF("\n");
7318
7319 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PINBASED_CTLS,
7320 IA32_VMX_ACTIVATE_VMX_PREEMPTION_TIMER, 1)) {
7321 vmx_dump_vmcs_field(VMCS_VMX_PREEMPTION_TIMER_VAL,
7322 "VMX Preempt Timer");
7323 DPRINTF("\n");
7324 }
7325
7326 vmx_dump_vmcs_field(VMCS_HOST_IA32_SYSENTER_CS, "H. SYSENTER CS");
7327 DPRINTF("\n");
7328
7329 vmx_dump_vmcs_field(VMCS_CR0_MASK, "CR0 Mask");
7330 DPRINTF("\n");
7331 vmx_dump_vmcs_field(VMCS_CR4_MASK, "CR4 Mask");
7332 DPRINTF("\n");
7333
7334 vmx_dump_vmcs_field(VMCS_CR0_READ_SHADOW, "CR0 RD Shadow");
7335 DPRINTF("\n");
7336 vmx_dump_vmcs_field(VMCS_CR4_READ_SHADOW, "CR4 RD Shadow");
7337 DPRINTF("\n");
7338
7339 /* We assume all CPUs have the same max CR3 target ct */
7340 cr3_tgt_ct = curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr3_tgt_count;
7341 DPRINTF("Max CR3 target count: 0x%x\n", cr3_tgt_ct);
7342 if (cr3_tgt_ct <= VMX_MAX_CR3_TARGETS) {
7343 for (i = 0 ; i < cr3_tgt_ct; i++) {
7344 vmx_dump_vmcs_field(VMCS_CR3_TARGET_0 + (2 * i),
7345 "CR3 Target");
7346 DPRINTF("\n");
7347 }
7348 } else {
7349 DPRINTF("(Bogus CR3 Target Count > %d", VMX_MAX_CR3_TARGETS);
7350 }
7351
7352 vmx_dump_vmcs_field(VMCS_GUEST_EXIT_QUALIFICATION, "G. Exit Qual");
7353 DPRINTF("\n");
7354 vmx_dump_vmcs_field(VMCS_IO_RCX, "I/O RCX");
7355 DPRINTF("\n");
7356 vmx_dump_vmcs_field(VMCS_IO_RSI, "I/O RSI");
7357 DPRINTF("\n");
7358 vmx_dump_vmcs_field(VMCS_IO_RDI, "I/O RDI");
7359 DPRINTF("\n");
7360 vmx_dump_vmcs_field(VMCS_IO_RIP, "I/O RIP");
7361 DPRINTF("\n");
7362 vmx_dump_vmcs_field(VMCS_GUEST_LINEAR_ADDRESS, "G. Lin Addr");
7363 DPRINTF("\n");
7364 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CR0, "G. CR0");
7365 DPRINTF("\n");
7366 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CR3, "G. CR3");
7367 DPRINTF("\n");
7368 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CR4, "G. CR4");
7369 DPRINTF("\n");
7370 vmx_dump_vmcs_field(VMCS_GUEST_IA32_ES_BASE, "G. ES Base");
7371 DPRINTF("\n");
7372 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CS_BASE, "G. CS Base");
7373 DPRINTF("\n");
7374 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SS_BASE, "G. SS Base");
7375 DPRINTF("\n");
7376 vmx_dump_vmcs_field(VMCS_GUEST_IA32_DS_BASE, "G. DS Base");
7377 DPRINTF("\n");
7378 vmx_dump_vmcs_field(VMCS_GUEST_IA32_FS_BASE, "G. FS Base");
7379 DPRINTF("\n");
7380 vmx_dump_vmcs_field(VMCS_GUEST_IA32_GS_BASE, "G. GS Base");
7381 DPRINTF("\n");
7382 vmx_dump_vmcs_field(VMCS_GUEST_IA32_LDTR_BASE, "G. LDTR Base");
7383 DPRINTF("\n");
7384 vmx_dump_vmcs_field(VMCS_GUEST_IA32_TR_BASE, "G. TR Base");
7385 DPRINTF("\n");
7386 vmx_dump_vmcs_field(VMCS_GUEST_IA32_GDTR_BASE, "G. GDTR Base");
7387 DPRINTF("\n");
7388 vmx_dump_vmcs_field(VMCS_GUEST_IA32_IDTR_BASE, "G. IDTR Base");
7389 DPRINTF("\n");
7390 vmx_dump_vmcs_field(VMCS_GUEST_IA32_DR7, "G. DR7");
7391 DPRINTF("\n");
7392 vmx_dump_vmcs_field(VMCS_GUEST_IA32_RSP, "G. RSP");
7393 DPRINTF("\n");
7394 vmx_dump_vmcs_field(VMCS_GUEST_IA32_RIP, "G. RIP");
7395 DPRINTF("\n");
7396 vmx_dump_vmcs_field(VMCS_GUEST_IA32_RFLAGS, "G. RFLAGS");
7397 DPRINTF("\n");
7398 vmx_dump_vmcs_field(VMCS_GUEST_PENDING_DBG_EXC, "G. Pend Dbg Exc");
7399 DPRINTF("\n");
7400 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SYSENTER_ESP, "G. SYSENTER ESP");
7401 DPRINTF("\n");
7402 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SYSENTER_EIP, "G. SYSENTER EIP");
7403 DPRINTF("\n");
7404 vmx_dump_vmcs_field(VMCS_HOST_IA32_CR0, "H. CR0");
7405 DPRINTF("\n");
7406 vmx_dump_vmcs_field(VMCS_HOST_IA32_CR3, "H. CR3");
7407 DPRINTF("\n");
7408 vmx_dump_vmcs_field(VMCS_HOST_IA32_CR4, "H. CR4");
7409 DPRINTF("\n");
7410 vmx_dump_vmcs_field(VMCS_HOST_IA32_FS_BASE, "H. FS Base");
7411 DPRINTF("\n");
7412 vmx_dump_vmcs_field(VMCS_HOST_IA32_GS_BASE, "H. GS Base");
7413 DPRINTF("\n");
7414 vmx_dump_vmcs_field(VMCS_HOST_IA32_TR_BASE, "H. TR Base");
7415 DPRINTF("\n");
7416 vmx_dump_vmcs_field(VMCS_HOST_IA32_GDTR_BASE, "H. GDTR Base");
7417 DPRINTF("\n");
7418 vmx_dump_vmcs_field(VMCS_HOST_IA32_IDTR_BASE, "H. IDTR Base");
7419 DPRINTF("\n");
7420 vmx_dump_vmcs_field(VMCS_HOST_IA32_SYSENTER_ESP, "H. SYSENTER ESP");
7421 DPRINTF("\n");
7422 vmx_dump_vmcs_field(VMCS_HOST_IA32_SYSENTER_EIP, "H. SYSENTER EIP");
7423 DPRINTF("\n");
7424 vmx_dump_vmcs_field(VMCS_HOST_IA32_RSP, "H. RSP");
7425 DPRINTF("\n");
7426 vmx_dump_vmcs_field(VMCS_HOST_IA32_RIP, "H. RIP");
7427 DPRINTF("\n");
7428 }
7429
7430 /*
7431 * vmx_vcpu_dump_regs
7432 *
7433 * Debug function to print vcpu regs from the current vcpu
7434 * note - vmcs for 'vcpu' must be on this pcpu.
7435 *
7436 * Parameters:
7437 * vcpu - vcpu whose registers should be dumped
7438 */
7439 void
vmx_vcpu_dump_regs(struct vcpu * vcpu)7440 vmx_vcpu_dump_regs(struct vcpu *vcpu)
7441 {
7442 uint64_t r;
7443 int i;
7444 struct vmx_msr_store *msr_store;
7445
7446 /* XXX reformat this for 32 bit guest as needed */
7447 DPRINTF("vcpu @ %p in %s mode\n", vcpu, vmm_decode_cpu_mode(vcpu));
7448 i = vmm_get_guest_cpu_cpl(vcpu);
7449 if (i == -1)
7450 DPRINTF(" CPL=unknown\n");
7451 else
7452 DPRINTF(" CPL=%d\n", i);
7453 DPRINTF(" rax=0x%016llx rbx=0x%016llx rcx=0x%016llx\n",
7454 vcpu->vc_gueststate.vg_rax, vcpu->vc_gueststate.vg_rbx,
7455 vcpu->vc_gueststate.vg_rcx);
7456 DPRINTF(" rdx=0x%016llx rbp=0x%016llx rdi=0x%016llx\n",
7457 vcpu->vc_gueststate.vg_rdx, vcpu->vc_gueststate.vg_rbp,
7458 vcpu->vc_gueststate.vg_rdi);
7459 DPRINTF(" rsi=0x%016llx r8=0x%016llx r9=0x%016llx\n",
7460 vcpu->vc_gueststate.vg_rsi, vcpu->vc_gueststate.vg_r8,
7461 vcpu->vc_gueststate.vg_r9);
7462 DPRINTF(" r10=0x%016llx r11=0x%016llx r12=0x%016llx\n",
7463 vcpu->vc_gueststate.vg_r10, vcpu->vc_gueststate.vg_r11,
7464 vcpu->vc_gueststate.vg_r12);
7465 DPRINTF(" r13=0x%016llx r14=0x%016llx r15=0x%016llx\n",
7466 vcpu->vc_gueststate.vg_r13, vcpu->vc_gueststate.vg_r14,
7467 vcpu->vc_gueststate.vg_r15);
7468
7469 DPRINTF(" rip=0x%016llx rsp=", vcpu->vc_gueststate.vg_rip);
7470 if (vmread(VMCS_GUEST_IA32_RSP, &r))
7471 DPRINTF("(error reading)\n");
7472 else
7473 DPRINTF("0x%016llx\n", r);
7474
7475 DPRINTF(" rflags=");
7476 if (vmread(VMCS_GUEST_IA32_RFLAGS, &r))
7477 DPRINTF("(error reading)\n");
7478 else {
7479 DPRINTF("0x%016llx ", r);
7480 vmm_decode_rflags(r);
7481 }
7482
7483 DPRINTF(" cr0=");
7484 if (vmread(VMCS_GUEST_IA32_CR0, &r))
7485 DPRINTF("(error reading)\n");
7486 else {
7487 DPRINTF("0x%016llx ", r);
7488 vmm_decode_cr0(r);
7489 }
7490
7491 DPRINTF(" cr2=0x%016llx\n", vcpu->vc_gueststate.vg_cr2);
7492
7493 DPRINTF(" cr3=");
7494 if (vmread(VMCS_GUEST_IA32_CR3, &r))
7495 DPRINTF("(error reading)\n");
7496 else {
7497 DPRINTF("0x%016llx ", r);
7498 vmm_decode_cr3(r);
7499 }
7500
7501 DPRINTF(" cr4=");
7502 if (vmread(VMCS_GUEST_IA32_CR4, &r))
7503 DPRINTF("(error reading)\n");
7504 else {
7505 DPRINTF("0x%016llx ", r);
7506 vmm_decode_cr4(r);
7507 }
7508
7509 DPRINTF(" --Guest Segment Info--\n");
7510
7511 DPRINTF(" cs=");
7512 if (vmread(VMCS_GUEST_IA32_CS_SEL, &r))
7513 DPRINTF("(error reading)");
7514 else
7515 DPRINTF("0x%04llx rpl=%lld", r, r & 0x3);
7516
7517 DPRINTF(" base=");
7518 if (vmread(VMCS_GUEST_IA32_CS_BASE, &r))
7519 DPRINTF("(error reading)");
7520 else
7521 DPRINTF("0x%016llx", r);
7522
7523 DPRINTF(" limit=");
7524 if (vmread(VMCS_GUEST_IA32_CS_LIMIT, &r))
7525 DPRINTF("(error reading)");
7526 else
7527 DPRINTF("0x%016llx", r);
7528
7529 DPRINTF(" a/r=");
7530 if (vmread(VMCS_GUEST_IA32_CS_AR, &r))
7531 DPRINTF("(error reading)\n");
7532 else {
7533 DPRINTF("0x%04llx\n ", r);
7534 vmm_segment_desc_decode(r);
7535 }
7536
7537 DPRINTF(" ds=");
7538 if (vmread(VMCS_GUEST_IA32_DS_SEL, &r))
7539 DPRINTF("(error reading)");
7540 else
7541 DPRINTF("0x%04llx rpl=%lld", r, r & 0x3);
7542
7543 DPRINTF(" base=");
7544 if (vmread(VMCS_GUEST_IA32_DS_BASE, &r))
7545 DPRINTF("(error reading)");
7546 else
7547 DPRINTF("0x%016llx", r);
7548
7549 DPRINTF(" limit=");
7550 if (vmread(VMCS_GUEST_IA32_DS_LIMIT, &r))
7551 DPRINTF("(error reading)");
7552 else
7553 DPRINTF("0x%016llx", r);
7554
7555 DPRINTF(" a/r=");
7556 if (vmread(VMCS_GUEST_IA32_DS_AR, &r))
7557 DPRINTF("(error reading)\n");
7558 else {
7559 DPRINTF("0x%04llx\n ", r);
7560 vmm_segment_desc_decode(r);
7561 }
7562
7563 DPRINTF(" es=");
7564 if (vmread(VMCS_GUEST_IA32_ES_SEL, &r))
7565 DPRINTF("(error reading)");
7566 else
7567 DPRINTF("0x%04llx rpl=%lld", r, r & 0x3);
7568
7569 DPRINTF(" base=");
7570 if (vmread(VMCS_GUEST_IA32_ES_BASE, &r))
7571 DPRINTF("(error reading)");
7572 else
7573 DPRINTF("0x%016llx", r);
7574
7575 DPRINTF(" limit=");
7576 if (vmread(VMCS_GUEST_IA32_ES_LIMIT, &r))
7577 DPRINTF("(error reading)");
7578 else
7579 DPRINTF("0x%016llx", r);
7580
7581 DPRINTF(" a/r=");
7582 if (vmread(VMCS_GUEST_IA32_ES_AR, &r))
7583 DPRINTF("(error reading)\n");
7584 else {
7585 DPRINTF("0x%04llx\n ", r);
7586 vmm_segment_desc_decode(r);
7587 }
7588
7589 DPRINTF(" fs=");
7590 if (vmread(VMCS_GUEST_IA32_FS_SEL, &r))
7591 DPRINTF("(error reading)");
7592 else
7593 DPRINTF("0x%04llx rpl=%lld", r, r & 0x3);
7594
7595 DPRINTF(" base=");
7596 if (vmread(VMCS_GUEST_IA32_FS_BASE, &r))
7597 DPRINTF("(error reading)");
7598 else
7599 DPRINTF("0x%016llx", r);
7600
7601 DPRINTF(" limit=");
7602 if (vmread(VMCS_GUEST_IA32_FS_LIMIT, &r))
7603 DPRINTF("(error reading)");
7604 else
7605 DPRINTF("0x%016llx", r);
7606
7607 DPRINTF(" a/r=");
7608 if (vmread(VMCS_GUEST_IA32_FS_AR, &r))
7609 DPRINTF("(error reading)\n");
7610 else {
7611 DPRINTF("0x%04llx\n ", r);
7612 vmm_segment_desc_decode(r);
7613 }
7614
7615 DPRINTF(" gs=");
7616 if (vmread(VMCS_GUEST_IA32_GS_SEL, &r))
7617 DPRINTF("(error reading)");
7618 else
7619 DPRINTF("0x%04llx rpl=%lld", r, r & 0x3);
7620
7621 DPRINTF(" base=");
7622 if (vmread(VMCS_GUEST_IA32_GS_BASE, &r))
7623 DPRINTF("(error reading)");
7624 else
7625 DPRINTF("0x%016llx", r);
7626
7627 DPRINTF(" limit=");
7628 if (vmread(VMCS_GUEST_IA32_GS_LIMIT, &r))
7629 DPRINTF("(error reading)");
7630 else
7631 DPRINTF("0x%016llx", r);
7632
7633 DPRINTF(" a/r=");
7634 if (vmread(VMCS_GUEST_IA32_GS_AR, &r))
7635 DPRINTF("(error reading)\n");
7636 else {
7637 DPRINTF("0x%04llx\n ", r);
7638 vmm_segment_desc_decode(r);
7639 }
7640
7641 DPRINTF(" ss=");
7642 if (vmread(VMCS_GUEST_IA32_SS_SEL, &r))
7643 DPRINTF("(error reading)");
7644 else
7645 DPRINTF("0x%04llx rpl=%lld", r, r & 0x3);
7646
7647 DPRINTF(" base=");
7648 if (vmread(VMCS_GUEST_IA32_SS_BASE, &r))
7649 DPRINTF("(error reading)");
7650 else
7651 DPRINTF("0x%016llx", r);
7652
7653 DPRINTF(" limit=");
7654 if (vmread(VMCS_GUEST_IA32_SS_LIMIT, &r))
7655 DPRINTF("(error reading)");
7656 else
7657 DPRINTF("0x%016llx", r);
7658
7659 DPRINTF(" a/r=");
7660 if (vmread(VMCS_GUEST_IA32_SS_AR, &r))
7661 DPRINTF("(error reading)\n");
7662 else {
7663 DPRINTF("0x%04llx\n ", r);
7664 vmm_segment_desc_decode(r);
7665 }
7666
7667 DPRINTF(" tr=");
7668 if (vmread(VMCS_GUEST_IA32_TR_SEL, &r))
7669 DPRINTF("(error reading)");
7670 else
7671 DPRINTF("0x%04llx", r);
7672
7673 DPRINTF(" base=");
7674 if (vmread(VMCS_GUEST_IA32_TR_BASE, &r))
7675 DPRINTF("(error reading)");
7676 else
7677 DPRINTF("0x%016llx", r);
7678
7679 DPRINTF(" limit=");
7680 if (vmread(VMCS_GUEST_IA32_TR_LIMIT, &r))
7681 DPRINTF("(error reading)");
7682 else
7683 DPRINTF("0x%016llx", r);
7684
7685 DPRINTF(" a/r=");
7686 if (vmread(VMCS_GUEST_IA32_TR_AR, &r))
7687 DPRINTF("(error reading)\n");
7688 else {
7689 DPRINTF("0x%04llx\n ", r);
7690 vmm_segment_desc_decode(r);
7691 }
7692
7693 DPRINTF(" gdtr base=");
7694 if (vmread(VMCS_GUEST_IA32_GDTR_BASE, &r))
7695 DPRINTF("(error reading) ");
7696 else
7697 DPRINTF("0x%016llx", r);
7698
7699 DPRINTF(" limit=");
7700 if (vmread(VMCS_GUEST_IA32_GDTR_LIMIT, &r))
7701 DPRINTF("(error reading)\n");
7702 else
7703 DPRINTF("0x%016llx\n", r);
7704
7705 DPRINTF(" idtr base=");
7706 if (vmread(VMCS_GUEST_IA32_IDTR_BASE, &r))
7707 DPRINTF("(error reading) ");
7708 else
7709 DPRINTF("0x%016llx", r);
7710
7711 DPRINTF(" limit=");
7712 if (vmread(VMCS_GUEST_IA32_IDTR_LIMIT, &r))
7713 DPRINTF("(error reading)\n");
7714 else
7715 DPRINTF("0x%016llx\n", r);
7716
7717 DPRINTF(" ldtr=");
7718 if (vmread(VMCS_GUEST_IA32_LDTR_SEL, &r))
7719 DPRINTF("(error reading)");
7720 else
7721 DPRINTF("0x%04llx", r);
7722
7723 DPRINTF(" base=");
7724 if (vmread(VMCS_GUEST_IA32_LDTR_BASE, &r))
7725 DPRINTF("(error reading)");
7726 else
7727 DPRINTF("0x%016llx", r);
7728
7729 DPRINTF(" limit=");
7730 if (vmread(VMCS_GUEST_IA32_LDTR_LIMIT, &r))
7731 DPRINTF("(error reading)");
7732 else
7733 DPRINTF("0x%016llx", r);
7734
7735 DPRINTF(" a/r=");
7736 if (vmread(VMCS_GUEST_IA32_LDTR_AR, &r))
7737 DPRINTF("(error reading)\n");
7738 else {
7739 DPRINTF("0x%04llx\n ", r);
7740 vmm_segment_desc_decode(r);
7741 }
7742
7743 DPRINTF(" --Guest MSRs @ 0x%016llx (paddr: 0x%016llx)--\n",
7744 (uint64_t)vcpu->vc_vmx_msr_exit_save_va,
7745 (uint64_t)vcpu->vc_vmx_msr_exit_save_pa);
7746
7747 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
7748
7749 for (i = 0; i < VCPU_REGS_NMSRS; i++) {
7750 DPRINTF(" MSR %d @ %p : 0x%08llx (%s), "
7751 "value=0x%016llx ",
7752 i, &msr_store[i], msr_store[i].vms_index,
7753 msr_name_decode(msr_store[i].vms_index),
7754 msr_store[i].vms_data);
7755 vmm_decode_msr_value(msr_store[i].vms_index,
7756 msr_store[i].vms_data);
7757 }
7758 }
7759
7760 /*
7761 * msr_name_decode
7762 *
7763 * Returns a human-readable name for the MSR supplied in 'msr'.
7764 *
7765 * Parameters:
7766 * msr - The MSR to decode
7767 *
7768 * Return value:
7769 * NULL-terminated character string containing the name of the MSR requested
7770 */
7771 const char *
msr_name_decode(uint32_t msr)7772 msr_name_decode(uint32_t msr)
7773 {
7774 /*
7775 * Add as needed. Also consider adding a decode function when
7776 * adding to this table.
7777 */
7778
7779 switch (msr) {
7780 case MSR_TSC: return "TSC";
7781 case MSR_APICBASE: return "APIC base";
7782 case MSR_IA32_FEATURE_CONTROL: return "IA32 feature control";
7783 case MSR_PERFCTR0: return "perf counter 0";
7784 case MSR_PERFCTR1: return "perf counter 1";
7785 case MSR_TEMPERATURE_TARGET: return "temperature target";
7786 case MSR_MTRRcap: return "MTRR cap";
7787 case MSR_PERF_STATUS: return "perf status";
7788 case MSR_PERF_CTL: return "perf control";
7789 case MSR_MTRRvarBase: return "MTRR variable base";
7790 case MSR_MTRRfix64K_00000: return "MTRR fixed 64K";
7791 case MSR_MTRRfix16K_80000: return "MTRR fixed 16K";
7792 case MSR_MTRRfix4K_C0000: return "MTRR fixed 4K";
7793 case MSR_CR_PAT: return "PAT";
7794 case MSR_MTRRdefType: return "MTRR default type";
7795 case MSR_EFER: return "EFER";
7796 case MSR_STAR: return "STAR";
7797 case MSR_LSTAR: return "LSTAR";
7798 case MSR_CSTAR: return "CSTAR";
7799 case MSR_SFMASK: return "SFMASK";
7800 case MSR_FSBASE: return "FSBASE";
7801 case MSR_GSBASE: return "GSBASE";
7802 case MSR_KERNELGSBASE: return "KGSBASE";
7803 case MSR_MISC_ENABLE: return "Misc Enable";
7804 default: return "Unknown MSR";
7805 }
7806 }
7807
7808 /*
7809 * vmm_segment_desc_decode
7810 *
7811 * Debug function to print segment information for supplied descriptor
7812 *
7813 * Parameters:
7814 * val - The A/R bytes for the segment descriptor to decode
7815 */
7816 void
vmm_segment_desc_decode(uint64_t val)7817 vmm_segment_desc_decode(uint64_t val)
7818 {
7819 uint16_t ar;
7820 uint8_t g, type, s, dpl, p, dib, l;
7821 uint32_t unusable;
7822
7823 /* Exit early on unusable descriptors */
7824 unusable = val & 0x10000;
7825 if (unusable) {
7826 DPRINTF("(unusable)\n");
7827 return;
7828 }
7829
7830 ar = (uint16_t)val;
7831
7832 g = (ar & 0x8000) >> 15;
7833 dib = (ar & 0x4000) >> 14;
7834 l = (ar & 0x2000) >> 13;
7835 p = (ar & 0x80) >> 7;
7836 dpl = (ar & 0x60) >> 5;
7837 s = (ar & 0x10) >> 4;
7838 type = (ar & 0xf);
7839
7840 DPRINTF("granularity=%d dib=%d l(64 bit)=%d present=%d sys=%d ",
7841 g, dib, l, p, s);
7842
7843 DPRINTF("type=");
7844 if (!s) {
7845 switch (type) {
7846 case SDT_SYSLDT: DPRINTF("ldt\n"); break;
7847 case SDT_SYS386TSS: DPRINTF("tss (available)\n"); break;
7848 case SDT_SYS386BSY: DPRINTF("tss (busy)\n"); break;
7849 case SDT_SYS386CGT: DPRINTF("call gate\n"); break;
7850 case SDT_SYS386IGT: DPRINTF("interrupt gate\n"); break;
7851 case SDT_SYS386TGT: DPRINTF("trap gate\n"); break;
7852 /* XXX handle 32 bit segment types by inspecting mode */
7853 default: DPRINTF("unknown");
7854 }
7855 } else {
7856 switch (type + 16) {
7857 case SDT_MEMRO: DPRINTF("data, r/o\n"); break;
7858 case SDT_MEMROA: DPRINTF("data, r/o, accessed\n"); break;
7859 case SDT_MEMRW: DPRINTF("data, r/w\n"); break;
7860 case SDT_MEMRWA: DPRINTF("data, r/w, accessed\n"); break;
7861 case SDT_MEMROD: DPRINTF("data, r/o, expand down\n"); break;
7862 case SDT_MEMRODA: DPRINTF("data, r/o, expand down, "
7863 "accessed\n");
7864 break;
7865 case SDT_MEMRWD: DPRINTF("data, r/w, expand down\n"); break;
7866 case SDT_MEMRWDA: DPRINTF("data, r/w, expand down, "
7867 "accessed\n");
7868 break;
7869 case SDT_MEME: DPRINTF("code, x only\n"); break;
7870 case SDT_MEMEA: DPRINTF("code, x only, accessed\n");
7871 case SDT_MEMER: DPRINTF("code, r/x\n"); break;
7872 case SDT_MEMERA: DPRINTF("code, r/x, accessed\n"); break;
7873 case SDT_MEMEC: DPRINTF("code, x only, conforming\n"); break;
7874 case SDT_MEMEAC: DPRINTF("code, x only, conforming, "
7875 "accessed\n");
7876 break;
7877 case SDT_MEMERC: DPRINTF("code, r/x, conforming\n"); break;
7878 case SDT_MEMERAC: DPRINTF("code, r/x, conforming, accessed\n");
7879 break;
7880 }
7881 }
7882 }
7883
7884 void
vmm_decode_cr0(uint64_t cr0)7885 vmm_decode_cr0(uint64_t cr0)
7886 {
7887 struct vmm_reg_debug_info cr0_info[11] = {
7888 { CR0_PG, "PG ", "pg " },
7889 { CR0_CD, "CD ", "cd " },
7890 { CR0_NW, "NW ", "nw " },
7891 { CR0_AM, "AM ", "am " },
7892 { CR0_WP, "WP ", "wp " },
7893 { CR0_NE, "NE ", "ne " },
7894 { CR0_ET, "ET ", "et " },
7895 { CR0_TS, "TS ", "ts " },
7896 { CR0_EM, "EM ", "em " },
7897 { CR0_MP, "MP ", "mp " },
7898 { CR0_PE, "PE", "pe" }
7899 };
7900
7901 uint8_t i;
7902
7903 DPRINTF("(");
7904 for (i = 0; i < nitems(cr0_info); i++)
7905 if (cr0 & cr0_info[i].vrdi_bit)
7906 DPRINTF("%s", cr0_info[i].vrdi_present);
7907 else
7908 DPRINTF("%s", cr0_info[i].vrdi_absent);
7909
7910 DPRINTF(")\n");
7911 }
7912
7913 void
vmm_decode_cr3(uint64_t cr3)7914 vmm_decode_cr3(uint64_t cr3)
7915 {
7916 struct vmm_reg_debug_info cr3_info[2] = {
7917 { CR3_PWT, "PWT ", "pwt "},
7918 { CR3_PCD, "PCD", "pcd"}
7919 };
7920
7921 uint64_t cr4;
7922 uint8_t i;
7923
7924 if (vmread(VMCS_GUEST_IA32_CR4, &cr4)) {
7925 DPRINTF("(error)\n");
7926 return;
7927 }
7928
7929 /* If CR4.PCIDE = 0, interpret CR3.PWT and CR3.PCD */
7930 if ((cr4 & CR4_PCIDE) == 0) {
7931 DPRINTF("(");
7932 for (i = 0 ; i < nitems(cr3_info) ; i++)
7933 if (cr3 & cr3_info[i].vrdi_bit)
7934 DPRINTF("%s", cr3_info[i].vrdi_present);
7935 else
7936 DPRINTF("%s", cr3_info[i].vrdi_absent);
7937
7938 DPRINTF(")\n");
7939 } else {
7940 DPRINTF("(pcid=0x%llx)\n", cr3 & 0xFFF);
7941 }
7942 }
7943
7944 void
vmm_decode_cr4(uint64_t cr4)7945 vmm_decode_cr4(uint64_t cr4)
7946 {
7947 struct vmm_reg_debug_info cr4_info[19] = {
7948 { CR4_PKE, "PKE ", "pke "},
7949 { CR4_SMAP, "SMAP ", "smap "},
7950 { CR4_SMEP, "SMEP ", "smep "},
7951 { CR4_OSXSAVE, "OSXSAVE ", "osxsave "},
7952 { CR4_PCIDE, "PCIDE ", "pcide "},
7953 { CR4_FSGSBASE, "FSGSBASE ", "fsgsbase "},
7954 { CR4_SMXE, "SMXE ", "smxe "},
7955 { CR4_VMXE, "VMXE ", "vmxe "},
7956 { CR4_OSXMMEXCPT, "OSXMMEXCPT ", "osxmmexcpt "},
7957 { CR4_OSFXSR, "OSFXSR ", "osfxsr "},
7958 { CR4_PCE, "PCE ", "pce "},
7959 { CR4_PGE, "PGE ", "pge "},
7960 { CR4_MCE, "MCE ", "mce "},
7961 { CR4_PAE, "PAE ", "pae "},
7962 { CR4_PSE, "PSE ", "pse "},
7963 { CR4_DE, "DE ", "de "},
7964 { CR4_TSD, "TSD ", "tsd "},
7965 { CR4_PVI, "PVI ", "pvi "},
7966 { CR4_VME, "VME", "vme"}
7967 };
7968
7969 uint8_t i;
7970
7971 DPRINTF("(");
7972 for (i = 0; i < nitems(cr4_info); i++)
7973 if (cr4 & cr4_info[i].vrdi_bit)
7974 DPRINTF("%s", cr4_info[i].vrdi_present);
7975 else
7976 DPRINTF("%s", cr4_info[i].vrdi_absent);
7977
7978 DPRINTF(")\n");
7979 }
7980
7981 void
vmm_decode_apicbase_msr_value(uint64_t apicbase)7982 vmm_decode_apicbase_msr_value(uint64_t apicbase)
7983 {
7984 struct vmm_reg_debug_info apicbase_info[3] = {
7985 { APICBASE_BSP, "BSP ", "bsp "},
7986 { APICBASE_ENABLE_X2APIC, "X2APIC ", "x2apic "},
7987 { APICBASE_GLOBAL_ENABLE, "GLB_EN", "glb_en"}
7988 };
7989
7990 uint8_t i;
7991
7992 DPRINTF("(");
7993 for (i = 0; i < nitems(apicbase_info); i++)
7994 if (apicbase & apicbase_info[i].vrdi_bit)
7995 DPRINTF("%s", apicbase_info[i].vrdi_present);
7996 else
7997 DPRINTF("%s", apicbase_info[i].vrdi_absent);
7998
7999 DPRINTF(")\n");
8000 }
8001
8002 void
vmm_decode_ia32_fc_value(uint64_t fcr)8003 vmm_decode_ia32_fc_value(uint64_t fcr)
8004 {
8005 struct vmm_reg_debug_info fcr_info[4] = {
8006 { IA32_FEATURE_CONTROL_LOCK, "LOCK ", "lock "},
8007 { IA32_FEATURE_CONTROL_SMX_EN, "SMX ", "smx "},
8008 { IA32_FEATURE_CONTROL_VMX_EN, "VMX ", "vmx "},
8009 { IA32_FEATURE_CONTROL_SENTER_EN, "SENTER ", "senter "}
8010 };
8011
8012 uint8_t i;
8013
8014 DPRINTF("(");
8015 for (i = 0; i < nitems(fcr_info); i++)
8016 if (fcr & fcr_info[i].vrdi_bit)
8017 DPRINTF("%s", fcr_info[i].vrdi_present);
8018 else
8019 DPRINTF("%s", fcr_info[i].vrdi_absent);
8020
8021 if (fcr & IA32_FEATURE_CONTROL_SENTER_EN)
8022 DPRINTF(" [SENTER param = 0x%llx]",
8023 (fcr & IA32_FEATURE_CONTROL_SENTER_PARAM_MASK) >> 8);
8024
8025 DPRINTF(")\n");
8026 }
8027
8028 void
vmm_decode_mtrrcap_value(uint64_t val)8029 vmm_decode_mtrrcap_value(uint64_t val)
8030 {
8031 struct vmm_reg_debug_info mtrrcap_info[3] = {
8032 { MTRRcap_FIXED, "FIXED ", "fixed "},
8033 { MTRRcap_WC, "WC ", "wc "},
8034 { MTRRcap_SMRR, "SMRR ", "smrr "}
8035 };
8036
8037 uint8_t i;
8038
8039 DPRINTF("(");
8040 for (i = 0; i < nitems(mtrrcap_info); i++)
8041 if (val & mtrrcap_info[i].vrdi_bit)
8042 DPRINTF("%s", mtrrcap_info[i].vrdi_present);
8043 else
8044 DPRINTF("%s", mtrrcap_info[i].vrdi_absent);
8045
8046 if (val & MTRRcap_FIXED)
8047 DPRINTF(" [nr fixed ranges = 0x%llx]",
8048 (val & 0xff));
8049
8050 DPRINTF(")\n");
8051 }
8052
8053 void
vmm_decode_perf_status_value(uint64_t val)8054 vmm_decode_perf_status_value(uint64_t val)
8055 {
8056 DPRINTF("(pstate ratio = 0x%llx)\n", (val & 0xffff));
8057 }
8058
8059 void
vmm_decode_perf_ctl_value(uint64_t val)8060 vmm_decode_perf_ctl_value(uint64_t val)
8061 {
8062 DPRINTF("(%s ", (val & PERF_CTL_TURBO) ? "TURBO" : "turbo");
8063 DPRINTF("pstate req = 0x%llx)\n", (val & 0xfffF));
8064 }
8065
8066 void
vmm_decode_mtrrdeftype_value(uint64_t mtrrdeftype)8067 vmm_decode_mtrrdeftype_value(uint64_t mtrrdeftype)
8068 {
8069 struct vmm_reg_debug_info mtrrdeftype_info[2] = {
8070 { MTRRdefType_FIXED_ENABLE, "FIXED ", "fixed "},
8071 { MTRRdefType_ENABLE, "ENABLED ", "enabled "},
8072 };
8073
8074 uint8_t i;
8075 int type;
8076
8077 DPRINTF("(");
8078 for (i = 0; i < nitems(mtrrdeftype_info); i++)
8079 if (mtrrdeftype & mtrrdeftype_info[i].vrdi_bit)
8080 DPRINTF("%s", mtrrdeftype_info[i].vrdi_present);
8081 else
8082 DPRINTF("%s", mtrrdeftype_info[i].vrdi_absent);
8083
8084 DPRINTF("type = ");
8085 type = mtrr2mrt(mtrrdeftype & 0xff);
8086 switch (type) {
8087 case MDF_UNCACHEABLE: DPRINTF("UC"); break;
8088 case MDF_WRITECOMBINE: DPRINTF("WC"); break;
8089 case MDF_WRITETHROUGH: DPRINTF("WT"); break;
8090 case MDF_WRITEPROTECT: DPRINTF("RO"); break;
8091 case MDF_WRITEBACK: DPRINTF("WB"); break;
8092 case MDF_UNKNOWN:
8093 default:
8094 DPRINTF("??");
8095 break;
8096 }
8097
8098 DPRINTF(")\n");
8099 }
8100
8101 void
vmm_decode_efer_value(uint64_t efer)8102 vmm_decode_efer_value(uint64_t efer)
8103 {
8104 struct vmm_reg_debug_info efer_info[4] = {
8105 { EFER_SCE, "SCE ", "sce "},
8106 { EFER_LME, "LME ", "lme "},
8107 { EFER_LMA, "LMA ", "lma "},
8108 { EFER_NXE, "NXE", "nxe"},
8109 };
8110
8111 uint8_t i;
8112
8113 DPRINTF("(");
8114 for (i = 0; i < nitems(efer_info); i++)
8115 if (efer & efer_info[i].vrdi_bit)
8116 DPRINTF("%s", efer_info[i].vrdi_present);
8117 else
8118 DPRINTF("%s", efer_info[i].vrdi_absent);
8119
8120 DPRINTF(")\n");
8121 }
8122
8123 void
vmm_decode_msr_value(uint64_t msr,uint64_t val)8124 vmm_decode_msr_value(uint64_t msr, uint64_t val)
8125 {
8126 switch (msr) {
8127 case MSR_APICBASE: vmm_decode_apicbase_msr_value(val); break;
8128 case MSR_IA32_FEATURE_CONTROL: vmm_decode_ia32_fc_value(val); break;
8129 case MSR_MTRRcap: vmm_decode_mtrrcap_value(val); break;
8130 case MSR_PERF_STATUS: vmm_decode_perf_status_value(val); break;
8131 case MSR_PERF_CTL: vmm_decode_perf_ctl_value(val); break;
8132 case MSR_MTRRdefType: vmm_decode_mtrrdeftype_value(val); break;
8133 case MSR_EFER: vmm_decode_efer_value(val); break;
8134 case MSR_MISC_ENABLE: vmm_decode_misc_enable_value(val); break;
8135 default: DPRINTF("\n");
8136 }
8137 }
8138
8139 void
vmm_decode_rflags(uint64_t rflags)8140 vmm_decode_rflags(uint64_t rflags)
8141 {
8142 struct vmm_reg_debug_info rflags_info[16] = {
8143 { PSL_C, "CF ", "cf "},
8144 { PSL_PF, "PF ", "pf "},
8145 { PSL_AF, "AF ", "af "},
8146 { PSL_Z, "ZF ", "zf "},
8147 { PSL_N, "SF ", "sf "}, /* sign flag */
8148 { PSL_T, "TF ", "tf "},
8149 { PSL_I, "IF ", "if "},
8150 { PSL_D, "DF ", "df "},
8151 { PSL_V, "OF ", "of "}, /* overflow flag */
8152 { PSL_NT, "NT ", "nt "},
8153 { PSL_RF, "RF ", "rf "},
8154 { PSL_VM, "VM ", "vm "},
8155 { PSL_AC, "AC ", "ac "},
8156 { PSL_VIF, "VIF ", "vif "},
8157 { PSL_VIP, "VIP ", "vip "},
8158 { PSL_ID, "ID ", "id "},
8159 };
8160
8161 uint8_t i, iopl;
8162
8163 DPRINTF("(");
8164 for (i = 0; i < nitems(rflags_info); i++)
8165 if (rflags & rflags_info[i].vrdi_bit)
8166 DPRINTF("%s", rflags_info[i].vrdi_present);
8167 else
8168 DPRINTF("%s", rflags_info[i].vrdi_absent);
8169
8170 iopl = (rflags & PSL_IOPL) >> 12;
8171 DPRINTF("IOPL=%d", iopl);
8172
8173 DPRINTF(")\n");
8174 }
8175
8176 void
vmm_decode_misc_enable_value(uint64_t misc)8177 vmm_decode_misc_enable_value(uint64_t misc)
8178 {
8179 struct vmm_reg_debug_info misc_info[10] = {
8180 { MISC_ENABLE_FAST_STRINGS, "FSE ", "fse "},
8181 { MISC_ENABLE_TCC, "TCC ", "tcc "},
8182 { MISC_ENABLE_PERF_MON_AVAILABLE, "PERF ", "perf "},
8183 { MISC_ENABLE_BTS_UNAVAILABLE, "BTSU ", "btsu "},
8184 { MISC_ENABLE_PEBS_UNAVAILABLE, "PEBSU ", "pebsu "},
8185 { MISC_ENABLE_EIST_ENABLED, "EIST ", "eist "},
8186 { MISC_ENABLE_ENABLE_MONITOR_FSM, "MFSM ", "mfsm "},
8187 { MISC_ENABLE_LIMIT_CPUID_MAXVAL, "CMAX ", "cmax "},
8188 { MISC_ENABLE_xTPR_MESSAGE_DISABLE, "xTPRD ", "xtprd "},
8189 { MISC_ENABLE_XD_BIT_DISABLE, "NXD", "nxd"},
8190 };
8191
8192 uint8_t i;
8193
8194 DPRINTF("(");
8195 for (i = 0; i < nitems(misc_info); i++)
8196 if (misc & misc_info[i].vrdi_bit)
8197 DPRINTF("%s", misc_info[i].vrdi_present);
8198 else
8199 DPRINTF("%s", misc_info[i].vrdi_absent);
8200
8201 DPRINTF(")\n");
8202 }
8203
8204 const char *
vmm_decode_cpu_mode(struct vcpu * vcpu)8205 vmm_decode_cpu_mode(struct vcpu *vcpu)
8206 {
8207 int mode = vmm_get_guest_cpu_mode(vcpu);
8208
8209 switch (mode) {
8210 case VMM_CPU_MODE_REAL: return "real";
8211 case VMM_CPU_MODE_PROT: return "16 bit protected";
8212 case VMM_CPU_MODE_PROT32: return "32 bit protected";
8213 case VMM_CPU_MODE_COMPAT: return "compatibility";
8214 case VMM_CPU_MODE_LONG: return "long";
8215 default: return "unknown";
8216 }
8217 }
8218 #endif /* VMM_DEBUG */
8219