1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2022 Oxide Computer Company
14  */
15 
16 #include <stdio.h>
17 #include <unistd.h>
18 #include <stdlib.h>
19 #include <strings.h>
20 #include <assert.h>
21 #include <errno.h>
22 
23 #include <sys/types.h>
24 #include <sys/segments.h>
25 #include <sys/psw.h>
26 #include <sys/controlregs.h>
27 #include <sys/sysmacros.h>
28 #include <sys/varargs.h>
29 #include <sys/debug.h>
30 #include <sys/mman.h>
31 
32 #include <sys/vmm.h>
33 #include <sys/vmm_dev.h>
34 #include <vmmapi.h>
35 
36 #include "in_guest.h"
37 
38 
39 #define	PT_VALID	0x01
40 #define	PT_WRITABLE	0x02
41 #define	PT_WRITETHRU	0x08
42 #define	PT_NOCACHE	0x10
43 #define	PT_PAGESIZE	0x80
44 
45 #define	SEG_ACCESS_TYPE_MASK	0x1f
46 #define	SEG_ACCESS_DPL_MASK	0x60
47 #define	SEG_ACCESS_P		(1 << 7)
48 #define	SEG_ACCESS_AVL		(1 << 12)
49 #define	SEG_ACCESS_L		(1 << 13)
50 #define	SEG_ACCESS_D		(1 << 14)
51 #define	SEG_ACCESS_G		(1 << 15)
52 #define	SEG_ACCESS_UNUSABLE	(1 << 16)
53 
54 
55 /*
56  * Keep the test name and VM context around so the consumer is not required to
57  * pass either of them to us for subsequent test-related operations after the
58  * initialization has been performed.
59  *
60  * The test code is not designed to be reentrant at this point.
61  */
62 static struct vmctx *test_vmctx = NULL;
63 static const char *test_name = NULL;
64 
65 static int
66 setup_rom(struct vmctx *ctx)
67 {
68 	const size_t seg_sz = 0x1000;
69 	const uintptr_t seg_addr = MEM_LOC_ROM;
70 	const int fd = vm_get_device_fd(ctx);
71 	int err;
72 
73 	struct vm_memseg memseg = {
74 		.segid = VM_BOOTROM,
75 		.len = 0x1000,
76 	};
77 	(void) strlcpy(memseg.name, "testrom", sizeof (memseg.name));
78 	err = ioctl(fd, VM_ALLOC_MEMSEG, &memseg);
79 	if (err != 0) {
80 		return (err);
81 	}
82 	err = vm_mmap_memseg(ctx, seg_addr, VM_BOOTROM, 0, seg_sz,
83 	    PROT_READ | PROT_EXEC);
84 	return (err);
85 }
86 
87 static void
88 populate_identity_table(struct vmctx *ctx)
89 {
90 	uint64_t gpa, pte_loc;
91 
92 	/* Set up 2MiB PTEs for everything up through 0xffffffff */
93 	for (gpa = 0, pte_loc = MEM_LOC_PAGE_TABLE_2M;
94 	    gpa < 0x100000000;
95 	    pte_loc += PAGE_SIZE) {
96 		uint64_t *ptep = vm_map_gpa(ctx, pte_loc, PAGE_SIZE);
97 
98 		for (uint_t i = 0; i < 512; i++, ptep++, gpa += 0x200000) {
99 			*ptep =  gpa | PT_VALID | PT_WRITABLE | PT_PAGESIZE;
100 			/* Make traditional MMIO space uncachable */
101 			if (gpa >= 0xc0000000) {
102 				*ptep |= PT_WRITETHRU | PT_NOCACHE;
103 			}
104 		}
105 	}
106 	assert(gpa == 0x100000000 && pte_loc == MEM_LOC_PAGE_TABLE_1G);
107 
108 	uint64_t *pdep = vm_map_gpa(ctx, MEM_LOC_PAGE_TABLE_1G, PAGE_SIZE);
109 	pdep[0] = MEM_LOC_PAGE_TABLE_2M | PT_VALID | PT_WRITABLE;
110 	pdep[1] = (MEM_LOC_PAGE_TABLE_2M + PAGE_SIZE) | PT_VALID | PT_WRITABLE;
111 	pdep[2] =
112 	    (MEM_LOC_PAGE_TABLE_2M + 2 * PAGE_SIZE) | PT_VALID | PT_WRITABLE;
113 	pdep[3] =
114 	    (MEM_LOC_PAGE_TABLE_2M + 3 * PAGE_SIZE) | PT_VALID | PT_WRITABLE;
115 
116 	pdep = vm_map_gpa(ctx, MEM_LOC_PAGE_TABLE_512G, PAGE_SIZE);
117 	pdep[0] = MEM_LOC_PAGE_TABLE_1G | PT_VALID | PT_WRITABLE;
118 }
119 
120 static void
121 populate_desc_tables(struct vmctx *ctx)
122 {
123 
124 }
125 
126 static void
127 test_cleanup(bool is_failure)
128 {
129 	if (test_vmctx != NULL) {
130 		bool keep_on_fail = false;
131 
132 		const char *keep_var;
133 		if ((keep_var = getenv("KEEP_ON_FAIL")) != NULL) {
134 			if (strlen(keep_var) != 0 &&
135 			    strcmp(keep_var, "0") != 0) {
136 				keep_on_fail = true;
137 			}
138 		}
139 
140 		/*
141 		 * Destroy the instance unless the test failed and it was
142 		 * requested that we keep it around.
143 		 */
144 		if (!is_failure || !keep_on_fail) {
145 			vm_destroy(test_vmctx);
146 		}
147 		test_vmctx = NULL;
148 	}
149 }
150 
151 static void fail_finish(void)
152 {
153 	assert(test_name != NULL);
154 	(void) printf("FAIL %s\n", test_name);
155 
156 	test_cleanup(true);
157 	exit(EXIT_FAILURE);
158 }
159 
160 void
161 test_fail_errno(int err, const char *msg)
162 {
163 	const char *err_str = strerror(err);
164 
165 	(void) fprintf(stderr, "%s: %s\n", msg, err_str);
166 	fail_finish();
167 }
168 
169 void
170 test_fail_msg(const char *fmt, ...)
171 {
172 	va_list ap;
173 
174 	va_start(ap, fmt);
175 	(void) vfprintf(stderr, fmt, ap);
176 
177 	fail_finish();
178 }
179 
180 void
181 test_fail_vmexit(const struct vm_exit *vexit)
182 {
183 	const char *hdr_fmt = "Unexpected %s exit:\n\t%%rip: %lx\n";
184 
185 	switch (vexit->exitcode) {
186 	case VM_EXITCODE_INOUT:
187 		(void) fprintf(stderr, hdr_fmt, "IN/OUT", vexit->rip);
188 		(void) fprintf(stderr,
189 		    "\teax: %08x\n"
190 		    "\tport: %04x\n"
191 		    "\tbytes: %u\n"
192 		    "\tflags: %x\n",
193 		    vexit->u.inout.eax,
194 		    vexit->u.inout.port,
195 		    vexit->u.inout.bytes,
196 		    vexit->u.inout.flags);
197 		break;
198 	case VM_EXITCODE_RDMSR:
199 		(void) fprintf(stderr, hdr_fmt, "RDMSR", vexit->rip);
200 		(void) fprintf(stderr, "\tcode: %08x\n", vexit->u.msr.code);
201 		break;
202 	case VM_EXITCODE_WRMSR:
203 		(void) fprintf(stderr, hdr_fmt, "WRMSR", vexit->rip);
204 		(void) fprintf(stderr,
205 		    "\tcode: %08x\n"
206 		    "\twval: %016lx\n",
207 		    vexit->u.msr.code, vexit->u.msr.wval);
208 		break;
209 	case VM_EXITCODE_MMIO:
210 		(void) fprintf(stderr, hdr_fmt, "MMIO", vexit->rip);
211 		(void) fprintf(stderr,
212 		    "\tbytes: %u\n"
213 		    "\ttype: %s\n"
214 		    "\tgpa: %x\n"
215 		    "\tdata: %016x\n",
216 		    vexit->u.mmio.bytes,
217 		    vexit->u.mmio.read == 0 ? "write" : "read",
218 		    vexit->u.mmio.gpa,
219 		    vexit->u.mmio.data);
220 		break;
221 	case VM_EXITCODE_VMX:
222 		(void) fprintf(stderr, hdr_fmt, "VMX", vexit->rip);
223 		(void) fprintf(stderr,
224 		    "\tstatus: %x\n"
225 		    "\treason: %x\n"
226 		    "\tqualification: %lx\n"
227 		    "\tinst_type: %x\n"
228 		    "\tinst_error: %x\n",
229 		    vexit->u.vmx.status,
230 		    vexit->u.vmx.exit_reason,
231 		    vexit->u.vmx.exit_qualification,
232 		    vexit->u.vmx.inst_type,
233 		    vexit->u.vmx.inst_error);
234 		break;
235 	case VM_EXITCODE_SVM:
236 		(void) fprintf(stderr, hdr_fmt, "SVM", vexit->rip);
237 		break;
238 	case VM_EXITCODE_INST_EMUL:
239 		(void) fprintf(stderr, hdr_fmt, "instruction emulation",
240 		    vexit->rip);
241 		const uint_t len = vexit->u.inst_emul.num_valid > 0 ?
242 		    vexit->u.inst_emul.num_valid : 15;
243 		(void) fprintf(stderr, "\tinstruction bytes: [");
244 		for (uint_t i = 0; i < len; i++) {
245 			(void) fprintf(stderr, "%s%02x",
246 			    i == 0 ? "" : ", ",
247 			    vexit->u.inst_emul.inst[i]);
248 		}
249 		(void) fprintf(stderr, "]\n");
250 		break;
251 	case VM_EXITCODE_SUSPENDED:
252 		(void) fprintf(stderr, hdr_fmt, "suspend", vexit->rip);
253 		switch (vexit->u.suspended.how) {
254 		case VM_SUSPEND_RESET:
255 			(void) fprintf(stderr, "\thow: reset");
256 			break;
257 		case VM_SUSPEND_POWEROFF:
258 			(void) fprintf(stderr, "\thow: poweroff");
259 			break;
260 		case VM_SUSPEND_HALT:
261 			(void) fprintf(stderr, "\thow: halt");
262 			break;
263 		case VM_SUSPEND_TRIPLEFAULT:
264 			(void) fprintf(stderr, "\thow: triple-fault");
265 			break;
266 		default:
267 			(void) fprintf(stderr, "\thow: unknown - %d",
268 			    vexit->u.suspended.how);
269 			break;
270 		}
271 		break;
272 	default:
273 		(void) fprintf(stderr, "Unexpected code %d exit:\n"
274 		    "\t%%rip: %lx\n", vexit->exitcode, vexit->rip);
275 		break;
276 	}
277 	fail_finish();
278 }
279 
280 void
281 test_pass(void)
282 {
283 	assert(test_name != NULL);
284 	(void) printf("PASS %s\n", test_name);
285 	test_cleanup(false);
286 	exit(EXIT_SUCCESS);
287 }
288 
289 static int
290 load_payload(struct vmctx *ctx)
291 {
292 	extern uint8_t payload_data;
293 	extern uint32_t payload_size;
294 
295 	const uint32_t len = payload_size;
296 	const uint32_t cap = (MEM_TOTAL_SZ - MEM_LOC_PAYLOAD);
297 
298 	if (len > cap) {
299 		test_fail_msg("Payload size %u > capacity %u\n", len, cap);
300 	}
301 
302 	const size_t map_len = P2ROUNDUP(len, PAGE_SIZE);
303 	void *outp = vm_map_gpa(ctx, MEM_LOC_PAYLOAD, map_len);
304 	bcopy(&payload_data, outp, len);
305 
306 	return (0);
307 }
308 
309 struct vmctx *
310 test_initialize(const char *tname)
311 {
312 	char vm_name[VM_MAX_NAMELEN];
313 	int err;
314 	struct vmctx *ctx;
315 
316 	assert(test_vmctx == NULL);
317 	assert(test_name == NULL);
318 
319 	test_name = strdup(tname);
320 	(void) snprintf(vm_name, sizeof (vm_name), "bhyve-test-%s-%d",
321 	    test_name, getpid());
322 
323 	err = vm_create(vm_name, 0);
324 	if (err != 0) {
325 		test_fail_errno(err, "Could not create VM");
326 	}
327 
328 	ctx = vm_open(vm_name);
329 	if (ctx == NULL) {
330 		test_fail_errno(errno, "Could not open VM");
331 	}
332 	test_vmctx = ctx;
333 
334 	err = vm_setup_memory(ctx, MEM_TOTAL_SZ, VM_MMAP_ALL);
335 	if (err != 0) {
336 		test_fail_errno(err, "Could not set up VM memory");
337 	}
338 
339 	err = setup_rom(ctx);
340 	if (err != 0) {
341 		test_fail_errno(err, "Could not set up VM ROM segment");
342 	}
343 
344 	populate_identity_table(ctx);
345 	populate_desc_tables(ctx);
346 
347 	err = load_payload(ctx);
348 	if (err != 0) {
349 		test_fail_errno(err, "Could not load payload");
350 	}
351 
352 	return (ctx);
353 }
354 
355 int
356 test_setup_vcpu(struct vmctx *ctx, int vcpu, uint64_t rip, uint64_t rsp)
357 {
358 	int err;
359 
360 	err = vm_activate_cpu(ctx, vcpu);
361 	if (err != 0 && err != EBUSY) {
362 		return (err);
363 	}
364 
365 	/*
366 	 * Granularity bit important here for VMX validity:
367 	 * "If any bit in the limit field in the range 31:20 is 1, G must be 1"
368 	 */
369 	err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_CS, 0, UINT32_MAX,
370 	    SDT_MEMERA | SEG_ACCESS_P | SEG_ACCESS_L | SEG_ACCESS_G);
371 	if (err != 0) {
372 		return (err);
373 	}
374 
375 	err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_SS, 0, UINT32_MAX,
376 	    SDT_MEMRWA | SEG_ACCESS_P | SEG_ACCESS_L |
377 	    SEG_ACCESS_D | SEG_ACCESS_G);
378 	if (err != 0) {
379 		return (err);
380 	}
381 
382 	err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_DS, 0, UINT32_MAX,
383 	    SDT_MEMRWA | SEG_ACCESS_P | SEG_ACCESS_D | SEG_ACCESS_G);
384 	if (err != 0) {
385 		return (err);
386 	}
387 
388 	/*
389 	 * While SVM will happilly run with an otherwise unusable TR, VMX
390 	 * includes it among its entry checks.
391 	 */
392 	err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_TR, MEM_LOC_TSS, 0xff,
393 	    SDT_SYSTSSBSY | SEG_ACCESS_P);
394 	if (err != 0) {
395 		return (err);
396 	}
397 	err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_GDTR, MEM_LOC_GDT, 0x1ff, 0);
398 	if (err != 0) {
399 		return (err);
400 	}
401 	err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_IDTR, MEM_LOC_IDT, 0xfff, 0);
402 	if (err != 0) {
403 		return (err);
404 	}
405 
406 	/* Mark unused segments as explicitly unusable (for VMX) */
407 	const int unsable_segs[] = {
408 		VM_REG_GUEST_ES,
409 		VM_REG_GUEST_FS,
410 		VM_REG_GUEST_GS,
411 		VM_REG_GUEST_LDTR,
412 	};
413 	for (uint_t i = 0; i < ARRAY_SIZE(unsable_segs); i++) {
414 		err = vm_set_desc(ctx, vcpu, unsable_segs[i], 0, 0,
415 		    SEG_ACCESS_UNUSABLE);
416 		if (err != 0) {
417 			return (err);
418 		}
419 	}
420 
421 	/* Place CPU directly in long mode */
422 	const int regnums[] = {
423 		VM_REG_GUEST_CR0,
424 		VM_REG_GUEST_CR3,
425 		VM_REG_GUEST_CR4,
426 		VM_REG_GUEST_EFER,
427 		VM_REG_GUEST_RFLAGS,
428 		VM_REG_GUEST_RIP,
429 		VM_REG_GUEST_RSP,
430 		VM_REG_GUEST_CS,
431 		VM_REG_GUEST_SS,
432 		VM_REG_GUEST_DS,
433 		VM_REG_GUEST_TR,
434 	};
435 	uint64_t regvals[] = {
436 		CR0_PG | CR0_AM | CR0_WP | CR0_NE | CR0_ET | CR0_TS |
437 		    CR0_MP | CR0_PE,
438 		MEM_LOC_PAGE_TABLE_512G,
439 		CR4_DE | CR4_PSE | CR4_PAE | CR4_MCE | CR4_PGE | CR4_FSGSBASE,
440 		AMD_EFER_SCE | AMD_EFER_LME | AMD_EFER_LMA | AMD_EFER_NXE,
441 		/* start with interrupts disabled */
442 		PS_MB1,
443 		rip,
444 		rsp,
445 		(GDT_KCODE << 3),
446 		(GDT_KDATA << 3),
447 		(GDT_KDATA << 3),
448 		(GDT_KTSS << 3),
449 	};
450 	assert(ARRAY_SIZE(regnums) == ARRAY_SIZE(regvals));
451 
452 	err = vm_set_register_set(ctx, vcpu, ARRAY_SIZE(regnums), regnums,
453 	    regvals);
454 	if (err != 0) {
455 		return (err);
456 	}
457 
458 	err = vm_set_run_state(ctx, vcpu, VRS_RUN, 0);
459 	if (err != 0) {
460 		return (err);
461 	}
462 
463 	return (0);
464 }
465 
466 static enum vm_exit_kind
467 which_exit_kind(struct vm_entry *ventry, const struct vm_exit *vexit)
468 {
469 	const struct vm_inout *inout = &vexit->u.inout;
470 
471 	switch (vexit->exitcode) {
472 	case VM_EXITCODE_BOGUS:
473 	case VM_EXITCODE_REQIDLE:
474 		bzero(ventry, sizeof (ventry));
475 		return (VEK_REENTR);
476 	case VM_EXITCODE_INOUT:
477 		if (inout->port == IOP_TEST_RESULT &&
478 		    (inout->flags & INOUT_IN) == 0) {
479 			if (inout->eax == TEST_RESULT_PASS) {
480 				return (VEK_TEST_PASS);
481 			} else {
482 				return (VEK_TEST_FAIL);
483 			}
484 		}
485 		break;
486 	default:
487 		break;
488 	}
489 	return (VEK_UNHANDLED);
490 }
491 
492 enum vm_exit_kind
493 test_run_vcpu(struct vmctx *ctx, int vcpu, struct vm_entry *ventry,
494     struct vm_exit *vexit)
495 {
496 	int err;
497 
498 	err = vm_run(ctx, vcpu, ventry, vexit);
499 	if (err != 0) {
500 		test_fail_errno(err, "Failure during vcpu entry");
501 	}
502 
503 	return (which_exit_kind(ventry, vexit));
504 }
505 
506 void
507 ventry_fulfill_inout(const struct vm_exit *vexit, struct vm_entry *ventry,
508     uint32_t data)
509 {
510 	VERIFY3U(vexit->exitcode, ==, VM_EXITCODE_INOUT);
511 
512 	ventry->cmd = VEC_FULFILL_INOUT;
513 	bcopy(&vexit->u.inout, &ventry->u.inout, sizeof (struct vm_inout));
514 	if ((ventry->u.inout.flags & INOUT_IN) != 0) {
515 		ventry->u.inout.eax = data;
516 	}
517 }
518 
519 void
520 ventry_fulfill_mmio(const struct vm_exit *vexit, struct vm_entry *ventry,
521     uint64_t data)
522 {
523 	VERIFY3U(vexit->exitcode, ==, VM_EXITCODE_MMIO);
524 
525 	ventry->cmd = VEC_FULFILL_MMIO;
526 	bcopy(&vexit->u.mmio, &ventry->u.mmio, sizeof (struct vm_mmio));
527 	if (ventry->u.mmio.read != 0) {
528 		ventry->u.mmio.data = data;
529 	}
530 }
531 
532 bool
533 vexit_match_inout(const struct vm_exit *vexit, bool is_read, uint16_t port,
534     uint_t len, uint32_t *valp)
535 {
536 	if (vexit->exitcode != VM_EXITCODE_INOUT) {
537 		return (false);
538 	}
539 
540 	const uint_t flag = is_read ? INOUT_IN : 0;
541 	if (vexit->u.inout.port != port ||
542 	    vexit->u.inout.bytes != len ||
543 	    (vexit->u.inout.flags & INOUT_IN) != flag) {
544 		return (false);
545 	}
546 
547 	if (!is_read && valp != NULL) {
548 		*valp = vexit->u.inout.eax;
549 	}
550 	return (true);
551 }
552 
553 bool
554 vexit_match_mmio(const struct vm_exit *vexit, bool is_read, uint64_t addr,
555     uint_t len, uint64_t *valp)
556 {
557 	if (vexit->exitcode != VM_EXITCODE_MMIO) {
558 		return (false);
559 	}
560 
561 	if (vexit->u.mmio.gpa != addr ||
562 	    vexit->u.mmio.bytes != len ||
563 	    (vexit->u.mmio.read != 0) != is_read) {
564 		return (false);
565 	}
566 
567 	if (!is_read && valp != NULL) {
568 		*valp = vexit->u.mmio.data;
569 	}
570 	return (true);
571 }
572