xref: /freebsd/sys/amd64/vmm/intel/vmcs.c (revision 069ac184)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include "opt_bhyve_snapshot.h"
30 #include "opt_ddb.h"
31 
32 #include <sys/param.h>
33 #include <sys/sysctl.h>
34 #include <sys/systm.h>
35 #include <sys/pcpu.h>
36 
37 #include <vm/vm.h>
38 #include <vm/pmap.h>
39 
40 #include <machine/segments.h>
41 #include <machine/vmm.h>
42 #include <machine/vmm_snapshot.h>
43 #include "vmm_host.h"
44 #include "vmx_cpufunc.h"
45 #include "vmcs.h"
46 #include "ept.h"
47 #include "vmx.h"
48 
49 #ifdef DDB
50 #include <ddb/ddb.h>
51 #endif
52 
53 SYSCTL_DECL(_hw_vmm_vmx);
54 
55 static int no_flush_rsb;
56 SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, no_flush_rsb, CTLFLAG_RW,
57     &no_flush_rsb, 0, "Do not flush RSB upon vmexit");
58 
59 static uint64_t
60 vmcs_fix_regval(uint32_t encoding, uint64_t val)
61 {
62 
63 	switch (encoding) {
64 	case VMCS_GUEST_CR0:
65 		val = vmx_fix_cr0(val);
66 		break;
67 	case VMCS_GUEST_CR4:
68 		val = vmx_fix_cr4(val);
69 		break;
70 	default:
71 		break;
72 	}
73 	return (val);
74 }
75 
76 static uint32_t
77 vmcs_field_encoding(int ident)
78 {
79 	switch (ident) {
80 	case VM_REG_GUEST_CR0:
81 		return (VMCS_GUEST_CR0);
82 	case VM_REG_GUEST_CR3:
83 		return (VMCS_GUEST_CR3);
84 	case VM_REG_GUEST_CR4:
85 		return (VMCS_GUEST_CR4);
86 	case VM_REG_GUEST_DR7:
87 		return (VMCS_GUEST_DR7);
88 	case VM_REG_GUEST_RSP:
89 		return (VMCS_GUEST_RSP);
90 	case VM_REG_GUEST_RIP:
91 		return (VMCS_GUEST_RIP);
92 	case VM_REG_GUEST_RFLAGS:
93 		return (VMCS_GUEST_RFLAGS);
94 	case VM_REG_GUEST_ES:
95 		return (VMCS_GUEST_ES_SELECTOR);
96 	case VM_REG_GUEST_CS:
97 		return (VMCS_GUEST_CS_SELECTOR);
98 	case VM_REG_GUEST_SS:
99 		return (VMCS_GUEST_SS_SELECTOR);
100 	case VM_REG_GUEST_DS:
101 		return (VMCS_GUEST_DS_SELECTOR);
102 	case VM_REG_GUEST_FS:
103 		return (VMCS_GUEST_FS_SELECTOR);
104 	case VM_REG_GUEST_GS:
105 		return (VMCS_GUEST_GS_SELECTOR);
106 	case VM_REG_GUEST_TR:
107 		return (VMCS_GUEST_TR_SELECTOR);
108 	case VM_REG_GUEST_LDTR:
109 		return (VMCS_GUEST_LDTR_SELECTOR);
110 	case VM_REG_GUEST_EFER:
111 		return (VMCS_GUEST_IA32_EFER);
112 	case VM_REG_GUEST_PDPTE0:
113 		return (VMCS_GUEST_PDPTE0);
114 	case VM_REG_GUEST_PDPTE1:
115 		return (VMCS_GUEST_PDPTE1);
116 	case VM_REG_GUEST_PDPTE2:
117 		return (VMCS_GUEST_PDPTE2);
118 	case VM_REG_GUEST_PDPTE3:
119 		return (VMCS_GUEST_PDPTE3);
120 	case VM_REG_GUEST_ENTRY_INST_LENGTH:
121 		return (VMCS_ENTRY_INST_LENGTH);
122 	default:
123 		return (-1);
124 	}
125 
126 }
127 
128 static int
129 vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc)
130 {
131 
132 	switch (seg) {
133 	case VM_REG_GUEST_ES:
134 		*base = VMCS_GUEST_ES_BASE;
135 		*lim = VMCS_GUEST_ES_LIMIT;
136 		*acc = VMCS_GUEST_ES_ACCESS_RIGHTS;
137 		break;
138 	case VM_REG_GUEST_CS:
139 		*base = VMCS_GUEST_CS_BASE;
140 		*lim = VMCS_GUEST_CS_LIMIT;
141 		*acc = VMCS_GUEST_CS_ACCESS_RIGHTS;
142 		break;
143 	case VM_REG_GUEST_SS:
144 		*base = VMCS_GUEST_SS_BASE;
145 		*lim = VMCS_GUEST_SS_LIMIT;
146 		*acc = VMCS_GUEST_SS_ACCESS_RIGHTS;
147 		break;
148 	case VM_REG_GUEST_DS:
149 		*base = VMCS_GUEST_DS_BASE;
150 		*lim = VMCS_GUEST_DS_LIMIT;
151 		*acc = VMCS_GUEST_DS_ACCESS_RIGHTS;
152 		break;
153 	case VM_REG_GUEST_FS:
154 		*base = VMCS_GUEST_FS_BASE;
155 		*lim = VMCS_GUEST_FS_LIMIT;
156 		*acc = VMCS_GUEST_FS_ACCESS_RIGHTS;
157 		break;
158 	case VM_REG_GUEST_GS:
159 		*base = VMCS_GUEST_GS_BASE;
160 		*lim = VMCS_GUEST_GS_LIMIT;
161 		*acc = VMCS_GUEST_GS_ACCESS_RIGHTS;
162 		break;
163 	case VM_REG_GUEST_TR:
164 		*base = VMCS_GUEST_TR_BASE;
165 		*lim = VMCS_GUEST_TR_LIMIT;
166 		*acc = VMCS_GUEST_TR_ACCESS_RIGHTS;
167 		break;
168 	case VM_REG_GUEST_LDTR:
169 		*base = VMCS_GUEST_LDTR_BASE;
170 		*lim = VMCS_GUEST_LDTR_LIMIT;
171 		*acc = VMCS_GUEST_LDTR_ACCESS_RIGHTS;
172 		break;
173 	case VM_REG_GUEST_IDTR:
174 		*base = VMCS_GUEST_IDTR_BASE;
175 		*lim = VMCS_GUEST_IDTR_LIMIT;
176 		*acc = VMCS_INVALID_ENCODING;
177 		break;
178 	case VM_REG_GUEST_GDTR:
179 		*base = VMCS_GUEST_GDTR_BASE;
180 		*lim = VMCS_GUEST_GDTR_LIMIT;
181 		*acc = VMCS_INVALID_ENCODING;
182 		break;
183 	default:
184 		return (EINVAL);
185 	}
186 
187 	return (0);
188 }
189 
190 int
191 vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *retval)
192 {
193 	int error;
194 	uint32_t encoding;
195 
196 	/*
197 	 * If we need to get at vmx-specific state in the VMCS we can bypass
198 	 * the translation of 'ident' to 'encoding' by simply setting the
199 	 * sign bit. As it so happens the upper 16 bits are reserved (i.e
200 	 * set to 0) in the encodings for the VMCS so we are free to use the
201 	 * sign bit.
202 	 */
203 	if (ident < 0)
204 		encoding = ident & 0x7fffffff;
205 	else
206 		encoding = vmcs_field_encoding(ident);
207 
208 	if (encoding == (uint32_t)-1)
209 		return (EINVAL);
210 
211 	if (!running)
212 		VMPTRLD(vmcs);
213 
214 	error = vmread(encoding, retval);
215 
216 	if (!running)
217 		VMCLEAR(vmcs);
218 
219 	return (error);
220 }
221 
222 int
223 vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val)
224 {
225 	int error;
226 	uint32_t encoding;
227 
228 	if (ident < 0)
229 		encoding = ident & 0x7fffffff;
230 	else
231 		encoding = vmcs_field_encoding(ident);
232 
233 	if (encoding == (uint32_t)-1)
234 		return (EINVAL);
235 
236 	val = vmcs_fix_regval(encoding, val);
237 
238 	if (!running)
239 		VMPTRLD(vmcs);
240 
241 	error = vmwrite(encoding, val);
242 
243 	if (!running)
244 		VMCLEAR(vmcs);
245 
246 	return (error);
247 }
248 
249 int
250 vmcs_setdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc)
251 {
252 	int error;
253 	uint32_t base, limit, access;
254 
255 	error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
256 	if (error != 0)
257 		panic("vmcs_setdesc: invalid segment register %d", seg);
258 
259 	if (!running)
260 		VMPTRLD(vmcs);
261 	if ((error = vmwrite(base, desc->base)) != 0)
262 		goto done;
263 
264 	if ((error = vmwrite(limit, desc->limit)) != 0)
265 		goto done;
266 
267 	if (access != VMCS_INVALID_ENCODING) {
268 		if ((error = vmwrite(access, desc->access)) != 0)
269 			goto done;
270 	}
271 done:
272 	if (!running)
273 		VMCLEAR(vmcs);
274 	return (error);
275 }
276 
277 int
278 vmcs_getdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc)
279 {
280 	int error;
281 	uint32_t base, limit, access;
282 	uint64_t u64;
283 
284 	error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
285 	if (error != 0)
286 		panic("vmcs_getdesc: invalid segment register %d", seg);
287 
288 	if (!running)
289 		VMPTRLD(vmcs);
290 	if ((error = vmread(base, &u64)) != 0)
291 		goto done;
292 	desc->base = u64;
293 
294 	if ((error = vmread(limit, &u64)) != 0)
295 		goto done;
296 	desc->limit = u64;
297 
298 	if (access != VMCS_INVALID_ENCODING) {
299 		if ((error = vmread(access, &u64)) != 0)
300 			goto done;
301 		desc->access = u64;
302 	}
303 done:
304 	if (!running)
305 		VMCLEAR(vmcs);
306 	return (error);
307 }
308 
309 int
310 vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count)
311 {
312 	int error;
313 
314 	VMPTRLD(vmcs);
315 
316 	/*
317 	 * Guest MSRs are saved in the VM-exit MSR-store area.
318 	 * Guest MSRs are loaded from the VM-entry MSR-load area.
319 	 * Both areas point to the same location in memory.
320 	 */
321 	if ((error = vmwrite(VMCS_EXIT_MSR_STORE, g_area)) != 0)
322 		goto done;
323 	if ((error = vmwrite(VMCS_EXIT_MSR_STORE_COUNT, g_count)) != 0)
324 		goto done;
325 
326 	if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD, g_area)) != 0)
327 		goto done;
328 	if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, g_count)) != 0)
329 		goto done;
330 
331 	error = 0;
332 done:
333 	VMCLEAR(vmcs);
334 	return (error);
335 }
336 
337 int
338 vmcs_init(struct vmcs *vmcs)
339 {
340 	int error, codesel, datasel, tsssel;
341 	u_long cr0, cr4, efer;
342 	uint64_t pat, fsbase, idtrbase;
343 
344 	codesel = vmm_get_host_codesel();
345 	datasel = vmm_get_host_datasel();
346 	tsssel = vmm_get_host_tsssel();
347 
348 	/*
349 	 * Make sure we have a "current" VMCS to work with.
350 	 */
351 	VMPTRLD(vmcs);
352 
353 	/* Host state */
354 
355 	/* Initialize host IA32_PAT MSR */
356 	pat = vmm_get_host_pat();
357 	if ((error = vmwrite(VMCS_HOST_IA32_PAT, pat)) != 0)
358 		goto done;
359 
360 	/* Load the IA32_EFER MSR */
361 	efer = vmm_get_host_efer();
362 	if ((error = vmwrite(VMCS_HOST_IA32_EFER, efer)) != 0)
363 		goto done;
364 
365 	/* Load the control registers */
366 
367 	cr0 = vmm_get_host_cr0();
368 	if ((error = vmwrite(VMCS_HOST_CR0, cr0)) != 0)
369 		goto done;
370 
371 	cr4 = vmm_get_host_cr4() | CR4_VMXE;
372 	if ((error = vmwrite(VMCS_HOST_CR4, cr4)) != 0)
373 		goto done;
374 
375 	/* Load the segment selectors */
376 	if ((error = vmwrite(VMCS_HOST_ES_SELECTOR, datasel)) != 0)
377 		goto done;
378 
379 	if ((error = vmwrite(VMCS_HOST_CS_SELECTOR, codesel)) != 0)
380 		goto done;
381 
382 	if ((error = vmwrite(VMCS_HOST_SS_SELECTOR, datasel)) != 0)
383 		goto done;
384 
385 	if ((error = vmwrite(VMCS_HOST_DS_SELECTOR, datasel)) != 0)
386 		goto done;
387 
388 	if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, datasel)) != 0)
389 		goto done;
390 
391 	if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, datasel)) != 0)
392 		goto done;
393 
394 	if ((error = vmwrite(VMCS_HOST_TR_SELECTOR, tsssel)) != 0)
395 		goto done;
396 
397 	/*
398 	 * Load the Base-Address for %fs and idtr.
399 	 *
400 	 * Note that we exclude %gs, tss and gdtr here because their base
401 	 * address is pcpu specific.
402 	 */
403 	fsbase = vmm_get_host_fsbase();
404 	if ((error = vmwrite(VMCS_HOST_FS_BASE, fsbase)) != 0)
405 		goto done;
406 
407 	idtrbase = vmm_get_host_idtrbase();
408 	if ((error = vmwrite(VMCS_HOST_IDTR_BASE, idtrbase)) != 0)
409 		goto done;
410 
411 	/* instruction pointer */
412 	if (no_flush_rsb) {
413 		if ((error = vmwrite(VMCS_HOST_RIP,
414 		    (u_long)vmx_exit_guest)) != 0)
415 			goto done;
416 	} else {
417 		if ((error = vmwrite(VMCS_HOST_RIP,
418 		    (u_long)vmx_exit_guest_flush_rsb)) != 0)
419 			goto done;
420 	}
421 
422 	/* link pointer */
423 	if ((error = vmwrite(VMCS_LINK_POINTER, ~0)) != 0)
424 		goto done;
425 done:
426 	VMCLEAR(vmcs);
427 	return (error);
428 }
429 
430 #ifdef BHYVE_SNAPSHOT
431 int
432 vmcs_getany(struct vmcs *vmcs, int running, int ident, uint64_t *val)
433 {
434 	int error;
435 
436 	if (!running)
437 		VMPTRLD(vmcs);
438 
439 	error = vmread(ident, val);
440 
441 	if (!running)
442 		VMCLEAR(vmcs);
443 
444 	return (error);
445 }
446 
447 int
448 vmcs_setany(struct vmcs *vmcs, int running, int ident, uint64_t val)
449 {
450 	int error;
451 
452 	if (!running)
453 		VMPTRLD(vmcs);
454 
455 	error = vmwrite(ident, val);
456 
457 	if (!running)
458 		VMCLEAR(vmcs);
459 
460 	return (error);
461 }
462 
463 int
464 vmcs_snapshot_reg(struct vmcs *vmcs, int running, int ident,
465 		  struct vm_snapshot_meta *meta)
466 {
467 	int ret;
468 	uint64_t val;
469 
470 	if (meta->op == VM_SNAPSHOT_SAVE) {
471 		ret = vmcs_getreg(vmcs, running, ident, &val);
472 		if (ret != 0)
473 			goto done;
474 
475 		SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
476 	} else if (meta->op == VM_SNAPSHOT_RESTORE) {
477 		SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
478 
479 		ret = vmcs_setreg(vmcs, running, ident, val);
480 		if (ret != 0)
481 			goto done;
482 	} else {
483 		ret = EINVAL;
484 		goto done;
485 	}
486 
487 done:
488 	return (ret);
489 }
490 
491 int
492 vmcs_snapshot_desc(struct vmcs *vmcs, int running, int seg,
493 		   struct vm_snapshot_meta *meta)
494 {
495 	int ret;
496 	struct seg_desc desc;
497 
498 	if (meta->op == VM_SNAPSHOT_SAVE) {
499 		ret = vmcs_getdesc(vmcs, running, seg, &desc);
500 		if (ret != 0)
501 			goto done;
502 
503 		SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done);
504 		SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done);
505 		SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done);
506 	} else if (meta->op == VM_SNAPSHOT_RESTORE) {
507 		SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done);
508 		SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done);
509 		SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done);
510 
511 		ret = vmcs_setdesc(vmcs, running, seg, &desc);
512 		if (ret != 0)
513 			goto done;
514 	} else {
515 		ret = EINVAL;
516 		goto done;
517 	}
518 
519 done:
520 	return (ret);
521 }
522 
523 int
524 vmcs_snapshot_any(struct vmcs *vmcs, int running, int ident,
525 		  struct vm_snapshot_meta *meta)
526 {
527 	int ret;
528 	uint64_t val;
529 
530 	if (meta->op == VM_SNAPSHOT_SAVE) {
531 		ret = vmcs_getany(vmcs, running, ident, &val);
532 		if (ret != 0)
533 			goto done;
534 
535 		SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
536 	} else if (meta->op == VM_SNAPSHOT_RESTORE) {
537 		SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
538 
539 		ret = vmcs_setany(vmcs, running, ident, val);
540 		if (ret != 0)
541 			goto done;
542 	} else {
543 		ret = EINVAL;
544 		goto done;
545 	}
546 
547 done:
548 	return (ret);
549 }
550 #endif
551 
552 #ifdef DDB
553 extern int vmxon_enabled[];
554 
555 DB_SHOW_COMMAND(vmcs, db_show_vmcs)
556 {
557 	uint64_t cur_vmcs, val;
558 	uint32_t exit;
559 
560 	if (!vmxon_enabled[curcpu]) {
561 		db_printf("VMX not enabled\n");
562 		return;
563 	}
564 
565 	if (have_addr) {
566 		db_printf("Only current VMCS supported\n");
567 		return;
568 	}
569 
570 	vmptrst(&cur_vmcs);
571 	if (cur_vmcs == VMCS_INITIAL) {
572 		db_printf("No current VM context\n");
573 		return;
574 	}
575 	db_printf("VMCS: %jx\n", cur_vmcs);
576 	db_printf("VPID: %lu\n", vmcs_read(VMCS_VPID));
577 	db_printf("Activity: ");
578 	val = vmcs_read(VMCS_GUEST_ACTIVITY);
579 	switch (val) {
580 	case 0:
581 		db_printf("Active");
582 		break;
583 	case 1:
584 		db_printf("HLT");
585 		break;
586 	case 2:
587 		db_printf("Shutdown");
588 		break;
589 	case 3:
590 		db_printf("Wait for SIPI");
591 		break;
592 	default:
593 		db_printf("Unknown: %#lx", val);
594 	}
595 	db_printf("\n");
596 	exit = vmcs_read(VMCS_EXIT_REASON);
597 	if (exit & 0x80000000)
598 		db_printf("Entry Failure Reason: %u\n", exit & 0xffff);
599 	else
600 		db_printf("Exit Reason: %u\n", exit & 0xffff);
601 	db_printf("Qualification: %#lx\n", vmcs_exit_qualification());
602 	db_printf("Guest Linear Address: %#lx\n",
603 	    vmcs_read(VMCS_GUEST_LINEAR_ADDRESS));
604 	switch (exit & 0x8000ffff) {
605 	case EXIT_REASON_EXCEPTION:
606 	case EXIT_REASON_EXT_INTR:
607 		val = vmcs_read(VMCS_EXIT_INTR_INFO);
608 		db_printf("Interrupt Type: ");
609 		switch (val >> 8 & 0x7) {
610 		case 0:
611 			db_printf("external");
612 			break;
613 		case 2:
614 			db_printf("NMI");
615 			break;
616 		case 3:
617 			db_printf("HW exception");
618 			break;
619 		case 4:
620 			db_printf("SW exception");
621 			break;
622 		default:
623 			db_printf("?? %lu", val >> 8 & 0x7);
624 			break;
625 		}
626 		db_printf("  Vector: %lu", val & 0xff);
627 		if (val & 0x800)
628 			db_printf("  Error Code: %lx",
629 			    vmcs_read(VMCS_EXIT_INTR_ERRCODE));
630 		db_printf("\n");
631 		break;
632 	case EXIT_REASON_EPT_FAULT:
633 	case EXIT_REASON_EPT_MISCONFIG:
634 		db_printf("Guest Physical Address: %#lx\n",
635 		    vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS));
636 		break;
637 	}
638 	db_printf("VM-instruction error: %#lx\n", vmcs_instruction_error());
639 }
640 #endif
641