1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2022 Oxide Computer Company
14  */
15 
16 #include <stdio.h>
17 #include <unistd.h>
18 #include <stdlib.h>
19 #include <strings.h>
20 #include <assert.h>
21 #include <errno.h>
22 
23 #include <sys/types.h>
24 #include <sys/segments.h>
25 #include <sys/psw.h>
26 #include <sys/controlregs.h>
27 #include <sys/sysmacros.h>
28 #include <sys/varargs.h>
29 #include <sys/debug.h>
30 
31 #include <sys/vmm.h>
32 #include <sys/vmm_dev.h>
33 #include <vmmapi.h>
34 
35 #include "in_guest.h"
36 
37 
38 #define	PT_VALID	0x01
39 #define	PT_WRITABLE	0x02
40 #define	PT_WRITETHRU	0x08
41 #define	PT_NOCACHE	0x10
42 #define	PT_PAGESIZE	0x80
43 
44 #define	SEG_ACCESS_TYPE_MASK	0x1f
45 #define	SEG_ACCESS_DPL_MASK	0x60
46 #define	SEG_ACCESS_P		(1 << 7)
47 #define	SEG_ACCESS_AVL		(1 << 12)
48 #define	SEG_ACCESS_L		(1 << 13)
49 #define	SEG_ACCESS_D		(1 << 14)
50 #define	SEG_ACCESS_G		(1 << 15)
51 #define	SEG_ACCESS_UNUSABLE	(1 << 16)
52 
53 
54 /*
55  * Keep the test name and VM context around so the consumer is not required to
56  * pass either of them to us for subsequent test-related operations after the
57  * initialization has been performed.
58  *
59  * The test code is not designed to be reentrant at this point.
60  */
61 static struct vmctx *test_vmctx = NULL;
62 static const char *test_name = NULL;
63 
64 static void
65 populate_identity_table(struct vmctx *ctx)
66 {
67 	uint64_t gpa, pte_loc;
68 
69 	/* Set up 2MiB PTEs for everything up through 0xffffffff */
70 	for (gpa = 0, pte_loc = MEM_LOC_PAGE_TABLE_2M;
71 	    gpa < 0x100000000;
72 	    pte_loc += PAGE_SIZE) {
73 		uint64_t *ptep = vm_map_gpa(ctx, pte_loc, PAGE_SIZE);
74 
75 		for (uint_t i = 0; i < 512; i++, ptep++, gpa += 0x200000) {
76 			*ptep =  gpa | PT_VALID | PT_WRITABLE | PT_PAGESIZE;
77 			/* Make traditional MMIO space uncachable */
78 			if (gpa >= 0xc0000000) {
79 				*ptep |= PT_WRITETHRU | PT_NOCACHE;
80 			}
81 		}
82 	}
83 	assert(gpa == 0x100000000 && pte_loc == MEM_LOC_PAGE_TABLE_1G);
84 
85 	uint64_t *pdep = vm_map_gpa(ctx, MEM_LOC_PAGE_TABLE_1G, PAGE_SIZE);
86 	pdep[0] = MEM_LOC_PAGE_TABLE_2M | PT_VALID | PT_WRITABLE;
87 	pdep[1] = (MEM_LOC_PAGE_TABLE_2M + PAGE_SIZE) | PT_VALID | PT_WRITABLE;
88 	pdep[2] =
89 	    (MEM_LOC_PAGE_TABLE_2M + 2 * PAGE_SIZE) | PT_VALID | PT_WRITABLE;
90 	pdep[3] =
91 	    (MEM_LOC_PAGE_TABLE_2M + 3 * PAGE_SIZE) | PT_VALID | PT_WRITABLE;
92 
93 	pdep = vm_map_gpa(ctx, MEM_LOC_PAGE_TABLE_512G, PAGE_SIZE);
94 	pdep[0] = MEM_LOC_PAGE_TABLE_1G | PT_VALID | PT_WRITABLE;
95 }
96 
97 static void
98 populate_desc_tables(struct vmctx *ctx)
99 {
100 
101 }
102 
103 static void
104 test_cleanup(bool is_failure)
105 {
106 	if (test_vmctx != NULL) {
107 		bool keep_on_fail = false;
108 
109 		const char *keep_var;
110 		if ((keep_var = getenv("KEEP_ON_FAIL")) != NULL) {
111 			if (strlen(keep_var) != 0 &&
112 			    strcmp(keep_var, "0") != 0) {
113 				keep_on_fail = true;
114 			}
115 		}
116 
117 		/*
118 		 * Destroy the instance unless the test failed and it was
119 		 * requested that we keep it around.
120 		 */
121 		if (!is_failure || !keep_on_fail) {
122 			vm_destroy(test_vmctx);
123 		}
124 		test_vmctx = NULL;
125 	}
126 }
127 
128 static void fail_finish(void)
129 {
130 	assert(test_name != NULL);
131 	(void) printf("FAIL %s\n", test_name);
132 
133 	test_cleanup(true);
134 	exit(EXIT_FAILURE);
135 }
136 
137 void
138 test_fail_errno(int err, const char *msg)
139 {
140 	const char *err_str = strerror(err);
141 
142 	(void) fprintf(stderr, "%s: %s\n", msg, err_str);
143 	fail_finish();
144 }
145 
146 void
147 test_fail_msg(const char *fmt, ...)
148 {
149 	va_list ap;
150 
151 	va_start(ap, fmt);
152 	(void) vfprintf(stderr, fmt, ap);
153 
154 	fail_finish();
155 }
156 
157 void
158 test_fail_vmexit(const struct vm_exit *vexit)
159 {
160 	const char *hdr_fmt = "Unexpected %s exit:\n\t%%rip: %lx\n";
161 
162 	switch (vexit->exitcode) {
163 	case VM_EXITCODE_INOUT:
164 		(void) fprintf(stderr, hdr_fmt, "IN/OUT", vexit->rip);
165 		(void) fprintf(stderr,
166 		    "\teax: %08x\n"
167 		    "\tport: %04x\n"
168 		    "\tbytes: %u\n"
169 		    "\tflags: %x\n",
170 		    vexit->u.inout.eax,
171 		    vexit->u.inout.port,
172 		    vexit->u.inout.bytes,
173 		    vexit->u.inout.flags);
174 		break;
175 	case VM_EXITCODE_RDMSR:
176 		(void) fprintf(stderr, hdr_fmt, "RDMSR", vexit->rip);
177 		(void) fprintf(stderr, "\tcode: %08x\n", vexit->u.msr.code);
178 		break;
179 	case VM_EXITCODE_WRMSR:
180 		(void) fprintf(stderr, hdr_fmt, "WRMSR", vexit->rip);
181 		(void) fprintf(stderr,
182 		    "\tcode: %08x\n"
183 		    "\twval: %016lx\n",
184 		    vexit->u.msr.code, vexit->u.msr.wval);
185 		break;
186 	case VM_EXITCODE_MMIO:
187 		(void) fprintf(stderr, hdr_fmt, "MMIO", vexit->rip);
188 		(void) fprintf(stderr,
189 		    "\tbytes: %u\n"
190 		    "\ttype: %s\n"
191 		    "\tgpa: %x\n"
192 		    "\tdata: %016x\n",
193 		    vexit->u.mmio.bytes,
194 		    vexit->u.mmio.read == 0 ? "write" : "read",
195 		    vexit->u.mmio.gpa,
196 		    vexit->u.mmio.data);
197 		break;
198 	case VM_EXITCODE_VMX:
199 		(void) fprintf(stderr, hdr_fmt, "VMX", vexit->rip);
200 		(void) fprintf(stderr,
201 		    "\tstatus: %x\n"
202 		    "\treason: %x\n"
203 		    "\tqualification: %lx\n"
204 		    "\tinst_type: %x\n"
205 		    "\tinst_error: %x\n",
206 		    vexit->u.vmx.status,
207 		    vexit->u.vmx.exit_reason,
208 		    vexit->u.vmx.exit_qualification,
209 		    vexit->u.vmx.inst_type,
210 		    vexit->u.vmx.inst_error);
211 		break;
212 	case VM_EXITCODE_SVM:
213 		(void) fprintf(stderr, hdr_fmt, "SVM", vexit->rip);
214 		break;
215 	case VM_EXITCODE_INST_EMUL:
216 		(void) fprintf(stderr, hdr_fmt, "instruction emulation",
217 		    vexit->rip);
218 		const uint_t len = vexit->u.inst_emul.num_valid > 0 ?
219 		    vexit->u.inst_emul.num_valid : 15;
220 		(void) fprintf(stderr, "\tinstruction bytes: [");
221 		for (uint_t i = 0; i < len; i++) {
222 			(void) fprintf(stderr, "%s%02x",
223 			    i == 0 ? "" : ", ",
224 			    vexit->u.inst_emul.inst[i]);
225 		}
226 		(void) fprintf(stderr, "]\n");
227 		break;
228 	case VM_EXITCODE_SUSPENDED:
229 		(void) fprintf(stderr, hdr_fmt, "suspend", vexit->rip);
230 		switch (vexit->u.suspended.how) {
231 		case VM_SUSPEND_RESET:
232 			(void) fprintf(stderr, "\thow: reset");
233 			break;
234 		case VM_SUSPEND_POWEROFF:
235 			(void) fprintf(stderr, "\thow: poweroff");
236 			break;
237 		case VM_SUSPEND_HALT:
238 			(void) fprintf(stderr, "\thow: halt");
239 			break;
240 		case VM_SUSPEND_TRIPLEFAULT:
241 			(void) fprintf(stderr, "\thow: triple-fault");
242 			break;
243 		default:
244 			(void) fprintf(stderr, "\thow: unknown - %d",
245 			    vexit->u.suspended.how);
246 			break;
247 		}
248 		break;
249 	default:
250 		(void) fprintf(stderr, "Unexpected code %d exit:\n"
251 		    "\t%%rip: %lx\n", vexit->exitcode, vexit->rip);
252 		break;
253 	}
254 	fail_finish();
255 }
256 
257 void
258 test_pass(void)
259 {
260 	assert(test_name != NULL);
261 	(void) printf("PASS %s\n", test_name);
262 	test_cleanup(false);
263 	exit(EXIT_SUCCESS);
264 }
265 
266 static int
267 load_payload(struct vmctx *ctx)
268 {
269 	extern uint8_t payload_data;
270 	extern uint32_t payload_size;
271 
272 	const uint32_t len = payload_size;
273 	const uint32_t cap = (MEM_TOTAL_SZ - MEM_LOC_PAYLOAD);
274 
275 	if (len > cap) {
276 		test_fail_msg("Payload size %u > capacity %u\n", len, cap);
277 	}
278 
279 	const size_t map_len = P2ROUNDUP(len, PAGE_SIZE);
280 	void *outp = vm_map_gpa(ctx, MEM_LOC_PAYLOAD, map_len);
281 	bcopy(&payload_data, outp, len);
282 
283 	return (0);
284 }
285 
286 struct vmctx *
287 test_initialize(const char *tname)
288 {
289 	char vm_name[VM_MAX_NAMELEN];
290 	int err;
291 	struct vmctx *ctx;
292 
293 	assert(test_vmctx == NULL);
294 	assert(test_name == NULL);
295 
296 	test_name = strdup(tname);
297 	(void) snprintf(vm_name, sizeof (vm_name), "bhyve-test-%s-%d",
298 	    test_name, getpid());
299 
300 	err = vm_create(vm_name, 0);
301 	if (err != 0) {
302 		test_fail_errno(err, "Could not create VM");
303 	}
304 
305 	ctx = vm_open(vm_name);
306 	if (ctx == NULL) {
307 		test_fail_errno(errno, "Could not open VM");
308 	}
309 	test_vmctx = ctx;
310 
311 	err = vm_setup_memory(ctx, MEM_TOTAL_SZ, VM_MMAP_ALL);
312 	if (err != 0) {
313 		test_fail_errno(err, "Could not set up VM memory");
314 	}
315 
316 	populate_identity_table(ctx);
317 	populate_desc_tables(ctx);
318 
319 	err = load_payload(ctx);
320 	if (err != 0) {
321 		test_fail_errno(err, "Could not load payload");
322 	}
323 
324 	return (ctx);
325 }
326 
327 int
328 test_setup_vcpu(struct vmctx *ctx, int vcpu, uint64_t rip, uint64_t rsp)
329 {
330 	int err;
331 
332 	err = vm_activate_cpu(ctx, vcpu);
333 	if (err != 0 && err != EBUSY) {
334 		return (err);
335 	}
336 
337 	/*
338 	 * Granularity bit important here for VMX validity:
339 	 * "If any bit in the limit field in the range 31:20 is 1, G must be 1"
340 	 */
341 	err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_CS, 0, UINT32_MAX,
342 	    SDT_MEMERA | SEG_ACCESS_P | SEG_ACCESS_L | SEG_ACCESS_G);
343 	if (err != 0) {
344 		return (err);
345 	}
346 
347 	err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_SS, 0, UINT32_MAX,
348 	    SDT_MEMRWA | SEG_ACCESS_P | SEG_ACCESS_L |
349 	    SEG_ACCESS_D | SEG_ACCESS_G);
350 	if (err != 0) {
351 		return (err);
352 	}
353 
354 	err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_DS, 0, UINT32_MAX,
355 	    SDT_MEMRWA | SEG_ACCESS_P | SEG_ACCESS_D | SEG_ACCESS_G);
356 	if (err != 0) {
357 		return (err);
358 	}
359 
360 	/*
361 	 * While SVM will happilly run with an otherwise unusable TR, VMX
362 	 * includes it among its entry checks.
363 	 */
364 	err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_TR, MEM_LOC_TSS, 0xff,
365 	    SDT_SYSTSSBSY | SEG_ACCESS_P);
366 	if (err != 0) {
367 		return (err);
368 	}
369 	err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_GDTR, MEM_LOC_GDT, 0x1ff, 0);
370 	if (err != 0) {
371 		return (err);
372 	}
373 	err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_IDTR, MEM_LOC_IDT, 0xfff, 0);
374 	if (err != 0) {
375 		return (err);
376 	}
377 
378 	/* Mark unused segments as explicitly unusable (for VMX) */
379 	const int unsable_segs[] = {
380 		VM_REG_GUEST_ES,
381 		VM_REG_GUEST_FS,
382 		VM_REG_GUEST_GS,
383 		VM_REG_GUEST_LDTR,
384 	};
385 	for (uint_t i = 0; i < ARRAY_SIZE(unsable_segs); i++) {
386 		err = vm_set_desc(ctx, vcpu, unsable_segs[i], 0, 0,
387 		    SEG_ACCESS_UNUSABLE);
388 		if (err != 0) {
389 			return (err);
390 		}
391 	}
392 
393 	/* Place CPU directly in long mode */
394 	const int regnums[] = {
395 		VM_REG_GUEST_CR0,
396 		VM_REG_GUEST_CR3,
397 		VM_REG_GUEST_CR4,
398 		VM_REG_GUEST_EFER,
399 		VM_REG_GUEST_RFLAGS,
400 		VM_REG_GUEST_RIP,
401 		VM_REG_GUEST_RSP,
402 		VM_REG_GUEST_CS,
403 		VM_REG_GUEST_SS,
404 		VM_REG_GUEST_DS,
405 		VM_REG_GUEST_TR,
406 	};
407 	uint64_t regvals[] = {
408 		CR0_PG | CR0_AM | CR0_WP | CR0_NE | CR0_ET | CR0_TS |
409 		    CR0_MP | CR0_PE,
410 		MEM_LOC_PAGE_TABLE_512G,
411 		CR4_DE | CR4_PSE | CR4_PAE | CR4_MCE | CR4_PGE | CR4_FSGSBASE,
412 		AMD_EFER_SCE | AMD_EFER_LME | AMD_EFER_LMA | AMD_EFER_NXE,
413 		/* start with interrupts disabled */
414 		PS_MB1,
415 		rip,
416 		rsp,
417 		(GDT_KCODE << 3),
418 		(GDT_KDATA << 3),
419 		(GDT_KDATA << 3),
420 		(GDT_KTSS << 3),
421 	};
422 	assert(ARRAY_SIZE(regnums) == ARRAY_SIZE(regvals));
423 
424 	err = vm_set_register_set(ctx, vcpu, ARRAY_SIZE(regnums), regnums,
425 	    regvals);
426 	if (err != 0) {
427 		return (err);
428 	}
429 
430 	err = vm_set_run_state(ctx, vcpu, VRS_RUN, 0);
431 	if (err != 0) {
432 		return (err);
433 	}
434 
435 	return (0);
436 }
437 
438 static enum vm_exit_kind
439 which_exit_kind(struct vm_entry *ventry, const struct vm_exit *vexit)
440 {
441 	const struct vm_inout *inout = &vexit->u.inout;
442 
443 	switch (vexit->exitcode) {
444 	case VM_EXITCODE_BOGUS:
445 	case VM_EXITCODE_REQIDLE:
446 		bzero(ventry, sizeof (ventry));
447 		return (VEK_REENTR);
448 	case VM_EXITCODE_INOUT:
449 		if (inout->port == IOP_TEST_RESULT &&
450 		    (inout->flags & INOUT_IN) == 0) {
451 			if (inout->eax == TEST_RESULT_PASS) {
452 				return (VEK_TEST_PASS);
453 			} else {
454 				return (VEK_TEST_FAIL);
455 			}
456 		}
457 		break;
458 	default:
459 		break;
460 	}
461 	return (VEK_UNHANDLED);
462 }
463 
464 enum vm_exit_kind
465 test_run_vcpu(struct vmctx *ctx, int vcpu, struct vm_entry *ventry,
466     struct vm_exit *vexit)
467 {
468 	int err;
469 
470 	err = vm_run(ctx, vcpu, ventry, vexit);
471 	if (err != 0) {
472 		test_fail_errno(err, "Failure during vcpu entry");
473 	}
474 
475 	return (which_exit_kind(ventry, vexit));
476 }
477 
478 void
479 ventry_fulfill_inout(const struct vm_exit *vexit, struct vm_entry *ventry,
480     uint32_t data)
481 {
482 	VERIFY3U(vexit->exitcode, ==, VM_EXITCODE_INOUT);
483 
484 	ventry->cmd = VEC_FULFILL_INOUT;
485 	bcopy(&vexit->u.inout, &ventry->u.inout, sizeof (struct vm_inout));
486 	if ((ventry->u.inout.flags & INOUT_IN) != 0) {
487 		ventry->u.inout.eax = data;
488 	}
489 }
490 
491 void
492 ventry_fulfill_mmio(const struct vm_exit *vexit, struct vm_entry *ventry,
493     uint64_t data)
494 {
495 	VERIFY3U(vexit->exitcode, ==, VM_EXITCODE_MMIO);
496 
497 	ventry->cmd = VEC_FULFILL_MMIO;
498 	bcopy(&vexit->u.mmio, &ventry->u.mmio, sizeof (struct vm_mmio));
499 	if (ventry->u.mmio.read != 0) {
500 		ventry->u.mmio.data = data;
501 	}
502 }
503 
504 bool
505 vexit_match_inout(const struct vm_exit *vexit, bool is_read, uint16_t port,
506     uint_t len, uint32_t *valp)
507 {
508 	if (vexit->exitcode != VM_EXITCODE_INOUT) {
509 		return (false);
510 	}
511 
512 	const uint_t flag = is_read ? INOUT_IN : 0;
513 	if (vexit->u.inout.port != port ||
514 	    vexit->u.inout.bytes != len ||
515 	    (vexit->u.inout.flags & INOUT_IN) != flag) {
516 		return (false);
517 	}
518 
519 	if (!is_read && valp != NULL) {
520 		*valp = vexit->u.inout.eax;
521 	}
522 	return (true);
523 }
524 
525 bool
526 vexit_match_mmio(const struct vm_exit *vexit, bool is_read, uint64_t addr,
527     uint_t len, uint64_t *valp)
528 {
529 	if (vexit->exitcode != VM_EXITCODE_MMIO) {
530 		return (false);
531 	}
532 
533 	if (vexit->u.mmio.gpa != addr ||
534 	    vexit->u.mmio.bytes != len ||
535 	    (vexit->u.mmio.read != 0) != is_read) {
536 		return (false);
537 	}
538 
539 	if (!is_read && valp != NULL) {
540 		*valp = vexit->u.mmio.data;
541 	}
542 	return (true);
543 }
544