1 /* $OpenBSD: x86_vm.c,v 1.2 2024/07/12 13:51:12 dv Exp $ */ 2 /* 3 * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include <sys/stat.h> 19 #include <sys/types.h> 20 21 #include <dev/ic/i8253reg.h> 22 #include <dev/isa/isareg.h> 23 24 #include <machine/psl.h> 25 #include <machine/pte.h> 26 #include <machine/specialreg.h> 27 #include <machine/vmmvar.h> 28 29 #include <errno.h> 30 #include <string.h> 31 #include <unistd.h> 32 33 #include <zlib.h> 34 35 #include "atomicio.h" 36 #include "fw_cfg.h" 37 #include "i8253.h" 38 #include "i8259.h" 39 #include "loadfile.h" 40 #include "mc146818.h" 41 #include "ns8250.h" 42 #include "pci.h" 43 #include "virtio.h" 44 45 typedef uint8_t (*io_fn_t)(struct vm_run_params *); 46 47 #define MAX_PORTS 65536 48 49 io_fn_t ioports_map[MAX_PORTS]; 50 extern char *__progname; 51 52 void create_memory_map(struct vm_create_params *); 53 int translate_gva(struct vm_exit*, uint64_t, uint64_t *, int); 54 55 static struct vm_mem_range *find_gpa_range(struct vm_create_params *, paddr_t, 56 size_t); 57 static int loadfile_bios(gzFile, off_t, struct vcpu_reg_state *); 58 static int vcpu_exit_eptviolation(struct vm_run_params *); 59 static void vcpu_exit_inout(struct vm_run_params *); 60 61 extern struct vmd_vm *current_vm; 62 extern int con_fd; 63 64 /* 65 * Represents a standard register set for an OS to be booted 66 * as a flat 64 bit address space. 67 * 68 * NOT set here are: 69 * RIP 70 * RSP 71 * GDTR BASE 72 * 73 * Specific bootloaders should clone this structure and override 74 * those fields as needed. 75 * 76 * Note - CR3 and various bits in CR0 may be overridden by vmm(4) based on 77 * features of the CPU in use. 78 */ 79 static const struct vcpu_reg_state vcpu_init_flat64 = { 80 .vrs_gprs[VCPU_REGS_RFLAGS] = 0x2, 81 .vrs_gprs[VCPU_REGS_RIP] = 0x0, 82 .vrs_gprs[VCPU_REGS_RSP] = 0x0, 83 .vrs_crs[VCPU_REGS_CR0] = CR0_ET | CR0_PE | CR0_PG, 84 .vrs_crs[VCPU_REGS_CR3] = PML4_PAGE, 85 .vrs_crs[VCPU_REGS_CR4] = CR4_PAE | CR4_PSE, 86 .vrs_crs[VCPU_REGS_PDPTE0] = 0ULL, 87 .vrs_crs[VCPU_REGS_PDPTE1] = 0ULL, 88 .vrs_crs[VCPU_REGS_PDPTE2] = 0ULL, 89 .vrs_crs[VCPU_REGS_PDPTE3] = 0ULL, 90 .vrs_sregs[VCPU_REGS_CS] = { 0x8, 0xFFFFFFFF, 0xC09F, 0x0}, 91 .vrs_sregs[VCPU_REGS_DS] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0}, 92 .vrs_sregs[VCPU_REGS_ES] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0}, 93 .vrs_sregs[VCPU_REGS_FS] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0}, 94 .vrs_sregs[VCPU_REGS_GS] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0}, 95 .vrs_sregs[VCPU_REGS_SS] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0}, 96 .vrs_gdtr = { 0x0, 0xFFFF, 0x0, 0x0}, 97 .vrs_idtr = { 0x0, 0xFFFF, 0x0, 0x0}, 98 .vrs_sregs[VCPU_REGS_LDTR] = { 0x0, 0xFFFF, 0x0082, 0x0}, 99 .vrs_sregs[VCPU_REGS_TR] = { 0x0, 0xFFFF, 0x008B, 0x0}, 100 .vrs_msrs[VCPU_REGS_EFER] = EFER_LME | EFER_LMA, 101 .vrs_drs[VCPU_REGS_DR0] = 0x0, 102 .vrs_drs[VCPU_REGS_DR1] = 0x0, 103 .vrs_drs[VCPU_REGS_DR2] = 0x0, 104 .vrs_drs[VCPU_REGS_DR3] = 0x0, 105 .vrs_drs[VCPU_REGS_DR6] = 0xFFFF0FF0, 106 .vrs_drs[VCPU_REGS_DR7] = 0x400, 107 .vrs_msrs[VCPU_REGS_STAR] = 0ULL, 108 .vrs_msrs[VCPU_REGS_LSTAR] = 0ULL, 109 .vrs_msrs[VCPU_REGS_CSTAR] = 0ULL, 110 .vrs_msrs[VCPU_REGS_SFMASK] = 0ULL, 111 .vrs_msrs[VCPU_REGS_KGSBASE] = 0ULL, 112 .vrs_msrs[VCPU_REGS_MISC_ENABLE] = 0ULL, 113 .vrs_crs[VCPU_REGS_XCR0] = XFEATURE_X87 114 }; 115 116 /* 117 * Represents a standard register set for an BIOS to be booted 118 * as a flat 16 bit address space. 119 */ 120 static const struct vcpu_reg_state vcpu_init_flat16 = { 121 .vrs_gprs[VCPU_REGS_RFLAGS] = 0x2, 122 .vrs_gprs[VCPU_REGS_RIP] = 0xFFF0, 123 .vrs_gprs[VCPU_REGS_RSP] = 0x0, 124 .vrs_crs[VCPU_REGS_CR0] = 0x60000010, 125 .vrs_crs[VCPU_REGS_CR3] = 0, 126 .vrs_sregs[VCPU_REGS_CS] = { 0xF000, 0xFFFF, 0x809F, 0xF0000}, 127 .vrs_sregs[VCPU_REGS_DS] = { 0x0, 0xFFFF, 0x8093, 0x0}, 128 .vrs_sregs[VCPU_REGS_ES] = { 0x0, 0xFFFF, 0x8093, 0x0}, 129 .vrs_sregs[VCPU_REGS_FS] = { 0x0, 0xFFFF, 0x8093, 0x0}, 130 .vrs_sregs[VCPU_REGS_GS] = { 0x0, 0xFFFF, 0x8093, 0x0}, 131 .vrs_sregs[VCPU_REGS_SS] = { 0x0, 0xFFFF, 0x8093, 0x0}, 132 .vrs_gdtr = { 0x0, 0xFFFF, 0x0, 0x0}, 133 .vrs_idtr = { 0x0, 0xFFFF, 0x0, 0x0}, 134 .vrs_sregs[VCPU_REGS_LDTR] = { 0x0, 0xFFFF, 0x0082, 0x0}, 135 .vrs_sregs[VCPU_REGS_TR] = { 0x0, 0xFFFF, 0x008B, 0x0}, 136 .vrs_msrs[VCPU_REGS_EFER] = 0ULL, 137 .vrs_drs[VCPU_REGS_DR0] = 0x0, 138 .vrs_drs[VCPU_REGS_DR1] = 0x0, 139 .vrs_drs[VCPU_REGS_DR2] = 0x0, 140 .vrs_drs[VCPU_REGS_DR3] = 0x0, 141 .vrs_drs[VCPU_REGS_DR6] = 0xFFFF0FF0, 142 .vrs_drs[VCPU_REGS_DR7] = 0x400, 143 .vrs_msrs[VCPU_REGS_STAR] = 0ULL, 144 .vrs_msrs[VCPU_REGS_LSTAR] = 0ULL, 145 .vrs_msrs[VCPU_REGS_CSTAR] = 0ULL, 146 .vrs_msrs[VCPU_REGS_SFMASK] = 0ULL, 147 .vrs_msrs[VCPU_REGS_KGSBASE] = 0ULL, 148 .vrs_crs[VCPU_REGS_XCR0] = XFEATURE_X87 149 }; 150 151 /* 152 * create_memory_map 153 * 154 * Sets up the guest physical memory ranges that the VM can access. 155 * 156 * Parameters: 157 * vcp: VM create parameters describing the VM whose memory map 158 * is being created 159 * 160 * Return values: 161 * nothing 162 */ 163 void 164 create_memory_map(struct vm_create_params *vcp) 165 { 166 size_t len, mem_bytes; 167 size_t above_1m = 0, above_4g = 0; 168 169 mem_bytes = vcp->vcp_memranges[0].vmr_size; 170 vcp->vcp_nmemranges = 0; 171 if (mem_bytes == 0 || mem_bytes > VMM_MAX_VM_MEM_SIZE) 172 return; 173 174 /* First memory region: 0 - LOWMEM_KB (DOS low mem) */ 175 len = LOWMEM_KB * 1024; 176 vcp->vcp_memranges[0].vmr_gpa = 0x0; 177 vcp->vcp_memranges[0].vmr_size = len; 178 vcp->vcp_memranges[0].vmr_type = VM_MEM_RAM; 179 mem_bytes -= len; 180 181 /* 182 * Second memory region: LOWMEM_KB - 1MB. 183 * 184 * N.B. - Normally ROMs or parts of video RAM are mapped here. 185 * We have to add this region, because some systems 186 * unconditionally write to 0xb8000 (VGA RAM), and 187 * we need to make sure that vmm(4) permits accesses 188 * to it. So allocate guest memory for it. 189 */ 190 len = MB(1) - (LOWMEM_KB * 1024); 191 vcp->vcp_memranges[1].vmr_gpa = LOWMEM_KB * 1024; 192 vcp->vcp_memranges[1].vmr_size = len; 193 vcp->vcp_memranges[1].vmr_type = VM_MEM_RESERVED; 194 mem_bytes -= len; 195 196 /* If we have less than 2MB remaining, still create a 2nd BIOS area. */ 197 if (mem_bytes <= MB(2)) { 198 vcp->vcp_memranges[2].vmr_gpa = VMM_PCI_MMIO_BAR_END; 199 vcp->vcp_memranges[2].vmr_size = MB(2); 200 vcp->vcp_memranges[2].vmr_type = VM_MEM_RESERVED; 201 vcp->vcp_nmemranges = 3; 202 return; 203 } 204 205 /* 206 * Calculate the how to split any remaining memory across the 4GB 207 * boundary while making sure we do not place physical memory into 208 * MMIO ranges. 209 */ 210 if (mem_bytes > VMM_PCI_MMIO_BAR_BASE - MB(1)) { 211 above_1m = VMM_PCI_MMIO_BAR_BASE - MB(1); 212 above_4g = mem_bytes - above_1m; 213 } else { 214 above_1m = mem_bytes; 215 above_4g = 0; 216 } 217 218 /* Third memory region: area above 1MB to MMIO region */ 219 vcp->vcp_memranges[2].vmr_gpa = MB(1); 220 vcp->vcp_memranges[2].vmr_size = above_1m; 221 vcp->vcp_memranges[2].vmr_type = VM_MEM_RAM; 222 223 /* Fourth region: PCI MMIO range */ 224 vcp->vcp_memranges[3].vmr_gpa = VMM_PCI_MMIO_BAR_BASE; 225 vcp->vcp_memranges[3].vmr_size = VMM_PCI_MMIO_BAR_END - 226 VMM_PCI_MMIO_BAR_BASE + 1; 227 vcp->vcp_memranges[3].vmr_type = VM_MEM_MMIO; 228 229 /* Fifth region: 2nd copy of BIOS above MMIO ending at 4GB */ 230 vcp->vcp_memranges[4].vmr_gpa = VMM_PCI_MMIO_BAR_END + 1; 231 vcp->vcp_memranges[4].vmr_size = MB(2); 232 vcp->vcp_memranges[4].vmr_type = VM_MEM_RESERVED; 233 234 /* Sixth region: any remainder above 4GB */ 235 if (above_4g > 0) { 236 vcp->vcp_memranges[5].vmr_gpa = GB(4); 237 vcp->vcp_memranges[5].vmr_size = above_4g; 238 vcp->vcp_memranges[5].vmr_type = VM_MEM_RAM; 239 vcp->vcp_nmemranges = 6; 240 } else 241 vcp->vcp_nmemranges = 5; 242 } 243 244 int 245 load_firmware(struct vmd_vm *vm, struct vcpu_reg_state *vrs) 246 { 247 int ret; 248 gzFile fp; 249 struct stat sb; 250 251 /* 252 * Set up default "flat 64 bit" register state - RIP, RSP, and 253 * GDT info will be set in bootloader 254 */ 255 memcpy(vrs, &vcpu_init_flat64, sizeof(*vrs)); 256 257 /* Find and open kernel image */ 258 if ((fp = gzdopen(vm->vm_kernel, "r")) == NULL) 259 fatalx("failed to open kernel - exiting"); 260 261 /* Load kernel image */ 262 ret = loadfile_elf(fp, vm, vrs, vm->vm_params.vmc_bootdevice); 263 264 /* 265 * Try BIOS as a fallback (only if it was provided as an image 266 * with vm->vm_kernel and the file is not compressed) 267 */ 268 if (ret && errno == ENOEXEC && vm->vm_kernel != -1 && 269 gzdirect(fp) && (ret = fstat(vm->vm_kernel, &sb)) == 0) 270 ret = loadfile_bios(fp, sb.st_size, vrs); 271 272 gzclose(fp); 273 274 return (ret); 275 } 276 277 278 /* 279 * loadfile_bios 280 * 281 * Alternatively to loadfile_elf, this function loads a non-ELF BIOS image 282 * directly into memory. 283 * 284 * Parameters: 285 * fp: file of a kernel file to load 286 * size: uncompressed size of the image 287 * (out) vrs: register state to set on init for this kernel 288 * 289 * Return values: 290 * 0 if successful 291 * various error codes returned from read(2) or loadelf functions 292 */ 293 int 294 loadfile_bios(gzFile fp, off_t size, struct vcpu_reg_state *vrs) 295 { 296 off_t off; 297 298 /* Set up a "flat 16 bit" register state for BIOS */ 299 memcpy(vrs, &vcpu_init_flat16, sizeof(*vrs)); 300 301 /* Seek to the beginning of the BIOS image */ 302 if (gzseek(fp, 0, SEEK_SET) == -1) 303 return (-1); 304 305 /* The BIOS image must end at 1MB */ 306 if ((off = MB(1) - size) < 0) 307 return (-1); 308 309 /* Read BIOS image into memory */ 310 if (mread(fp, off, size) != (size_t)size) { 311 errno = EIO; 312 return (-1); 313 } 314 315 if (gzseek(fp, 0, SEEK_SET) == -1) 316 return (-1); 317 318 /* Read a second BIOS copy into memory ending at 4GB */ 319 off = GB(4) - size; 320 if (mread(fp, off, size) != (size_t)size) { 321 errno = EIO; 322 return (-1); 323 } 324 325 log_debug("%s: loaded BIOS image", __func__); 326 327 return (0); 328 } 329 330 /* 331 * init_emulated_hw 332 * 333 * Initializes the userspace hardware emulation 334 */ 335 void 336 init_emulated_hw(struct vmop_create_params *vmc, int child_cdrom, 337 int child_disks[][VM_MAX_BASE_PER_DISK], int *child_taps) 338 { 339 struct vm_create_params *vcp = &vmc->vmc_params; 340 size_t i; 341 uint64_t memlo, memhi; 342 343 /* Calculate memory size for NVRAM registers */ 344 memlo = memhi = 0; 345 for (i = 0; i < vcp->vcp_nmemranges; i++) { 346 if (vcp->vcp_memranges[i].vmr_gpa == MB(1) && 347 vcp->vcp_memranges[i].vmr_size > (15 * MB(1))) 348 memlo = vcp->vcp_memranges[i].vmr_size - (15 * MB(1)); 349 else if (vcp->vcp_memranges[i].vmr_gpa == GB(4)) 350 memhi = vcp->vcp_memranges[i].vmr_size; 351 } 352 353 /* Reset the IO port map */ 354 memset(&ioports_map, 0, sizeof(io_fn_t) * MAX_PORTS); 355 356 /* Init i8253 PIT */ 357 i8253_init(vcp->vcp_id); 358 ioports_map[TIMER_CTRL] = vcpu_exit_i8253; 359 ioports_map[TIMER_BASE + TIMER_CNTR0] = vcpu_exit_i8253; 360 ioports_map[TIMER_BASE + TIMER_CNTR1] = vcpu_exit_i8253; 361 ioports_map[TIMER_BASE + TIMER_CNTR2] = vcpu_exit_i8253; 362 ioports_map[PCKBC_AUX] = vcpu_exit_i8253_misc; 363 364 /* Init mc146818 RTC */ 365 mc146818_init(vcp->vcp_id, memlo, memhi); 366 ioports_map[IO_RTC] = vcpu_exit_mc146818; 367 ioports_map[IO_RTC + 1] = vcpu_exit_mc146818; 368 369 /* Init master and slave PICs */ 370 i8259_init(); 371 ioports_map[IO_ICU1] = vcpu_exit_i8259; 372 ioports_map[IO_ICU1 + 1] = vcpu_exit_i8259; 373 ioports_map[IO_ICU2] = vcpu_exit_i8259; 374 ioports_map[IO_ICU2 + 1] = vcpu_exit_i8259; 375 ioports_map[ELCR0] = vcpu_exit_elcr; 376 ioports_map[ELCR1] = vcpu_exit_elcr; 377 378 /* Init ns8250 UART */ 379 ns8250_init(con_fd, vcp->vcp_id); 380 for (i = COM1_DATA; i <= COM1_SCR; i++) 381 ioports_map[i] = vcpu_exit_com; 382 383 /* Initialize PCI */ 384 for (i = VM_PCI_IO_BAR_BASE; i <= VM_PCI_IO_BAR_END; i++) 385 ioports_map[i] = vcpu_exit_pci; 386 387 ioports_map[PCI_MODE1_ADDRESS_REG] = vcpu_exit_pci; 388 ioports_map[PCI_MODE1_DATA_REG] = vcpu_exit_pci; 389 ioports_map[PCI_MODE1_DATA_REG + 1] = vcpu_exit_pci; 390 ioports_map[PCI_MODE1_DATA_REG + 2] = vcpu_exit_pci; 391 ioports_map[PCI_MODE1_DATA_REG + 3] = vcpu_exit_pci; 392 pci_init(); 393 394 /* Initialize virtio devices */ 395 virtio_init(current_vm, child_cdrom, child_disks, child_taps); 396 397 /* 398 * Init QEMU fw_cfg interface. Must be done last for pci hardware 399 * detection. 400 */ 401 fw_cfg_init(vmc); 402 ioports_map[FW_CFG_IO_SELECT] = vcpu_exit_fw_cfg; 403 ioports_map[FW_CFG_IO_DATA] = vcpu_exit_fw_cfg; 404 ioports_map[FW_CFG_IO_DMA_ADDR_HIGH] = vcpu_exit_fw_cfg_dma; 405 ioports_map[FW_CFG_IO_DMA_ADDR_LOW] = vcpu_exit_fw_cfg_dma; 406 } 407 408 /* 409 * restore_emulated_hw 410 * 411 * Restores the userspace hardware emulation from fd 412 */ 413 void 414 restore_emulated_hw(struct vm_create_params *vcp, int fd, 415 int *child_taps, int child_disks[][VM_MAX_BASE_PER_DISK], int child_cdrom) 416 { 417 /* struct vm_create_params *vcp = &vmc->vmc_params; */ 418 int i; 419 memset(&ioports_map, 0, sizeof(io_fn_t) * MAX_PORTS); 420 421 /* Init i8253 PIT */ 422 i8253_restore(fd, vcp->vcp_id); 423 ioports_map[TIMER_CTRL] = vcpu_exit_i8253; 424 ioports_map[TIMER_BASE + TIMER_CNTR0] = vcpu_exit_i8253; 425 ioports_map[TIMER_BASE + TIMER_CNTR1] = vcpu_exit_i8253; 426 ioports_map[TIMER_BASE + TIMER_CNTR2] = vcpu_exit_i8253; 427 428 /* Init master and slave PICs */ 429 i8259_restore(fd); 430 ioports_map[IO_ICU1] = vcpu_exit_i8259; 431 ioports_map[IO_ICU1 + 1] = vcpu_exit_i8259; 432 ioports_map[IO_ICU2] = vcpu_exit_i8259; 433 ioports_map[IO_ICU2 + 1] = vcpu_exit_i8259; 434 435 /* Init ns8250 UART */ 436 ns8250_restore(fd, con_fd, vcp->vcp_id); 437 for (i = COM1_DATA; i <= COM1_SCR; i++) 438 ioports_map[i] = vcpu_exit_com; 439 440 /* Init mc146818 RTC */ 441 mc146818_restore(fd, vcp->vcp_id); 442 ioports_map[IO_RTC] = vcpu_exit_mc146818; 443 ioports_map[IO_RTC + 1] = vcpu_exit_mc146818; 444 445 /* Init QEMU fw_cfg interface */ 446 fw_cfg_restore(fd); 447 ioports_map[FW_CFG_IO_SELECT] = vcpu_exit_fw_cfg; 448 ioports_map[FW_CFG_IO_DATA] = vcpu_exit_fw_cfg; 449 ioports_map[FW_CFG_IO_DMA_ADDR_HIGH] = vcpu_exit_fw_cfg_dma; 450 ioports_map[FW_CFG_IO_DMA_ADDR_LOW] = vcpu_exit_fw_cfg_dma; 451 452 /* Initialize PCI */ 453 for (i = VM_PCI_IO_BAR_BASE; i <= VM_PCI_IO_BAR_END; i++) 454 ioports_map[i] = vcpu_exit_pci; 455 456 ioports_map[PCI_MODE1_ADDRESS_REG] = vcpu_exit_pci; 457 ioports_map[PCI_MODE1_DATA_REG] = vcpu_exit_pci; 458 ioports_map[PCI_MODE1_DATA_REG + 1] = vcpu_exit_pci; 459 ioports_map[PCI_MODE1_DATA_REG + 2] = vcpu_exit_pci; 460 ioports_map[PCI_MODE1_DATA_REG + 3] = vcpu_exit_pci; 461 pci_restore(fd); 462 virtio_restore(fd, current_vm, child_cdrom, child_disks, child_taps); 463 } 464 465 void 466 pause_vm_md(struct vmd_vm *vm) 467 { 468 i8253_stop(); 469 mc146818_stop(); 470 ns8250_stop(); 471 virtio_stop(vm); 472 } 473 474 void 475 unpause_vm_md(struct vmd_vm *vm) 476 { 477 i8253_start(); 478 mc146818_start(); 479 ns8250_start(); 480 virtio_start(vm); 481 } 482 483 int 484 dump_devs(int fd) 485 { 486 int ret = 0; 487 488 if ((ret = i8253_dump(fd))) 489 return ret; 490 if ((ret = i8259_dump(fd))) 491 return ret; 492 if ((ret = ns8250_dump(fd))) 493 return ret; 494 if ((ret = mc146818_dump(fd))) 495 return ret; 496 ret = fw_cfg_dump(fd); 497 498 return ret; 499 } 500 501 int 502 dump_send_header(int fd) { 503 struct vm_dump_header vmh; 504 int i; 505 506 memcpy(&vmh.vmh_signature, VM_DUMP_SIGNATURE, 507 sizeof(vmh.vmh_signature)); 508 509 vmh.vmh_cpuids[0].code = 0x00; 510 vmh.vmh_cpuids[0].leaf = 0x00; 511 512 vmh.vmh_cpuids[1].code = 0x01; 513 vmh.vmh_cpuids[1].leaf = 0x00; 514 515 vmh.vmh_cpuids[2].code = 0x07; 516 vmh.vmh_cpuids[2].leaf = 0x00; 517 518 vmh.vmh_cpuids[3].code = 0x0d; 519 vmh.vmh_cpuids[3].leaf = 0x00; 520 521 vmh.vmh_cpuids[4].code = 0x80000001; 522 vmh.vmh_cpuids[4].leaf = 0x00; 523 524 vmh.vmh_version = VM_DUMP_VERSION; 525 526 for (i=0; i < VM_DUMP_HEADER_CPUID_COUNT; i++) { 527 CPUID_LEAF(vmh.vmh_cpuids[i].code, 528 vmh.vmh_cpuids[i].leaf, 529 vmh.vmh_cpuids[i].a, 530 vmh.vmh_cpuids[i].b, 531 vmh.vmh_cpuids[i].c, 532 vmh.vmh_cpuids[i].d); 533 } 534 535 if (atomicio(vwrite, fd, &vmh, sizeof(vmh)) != sizeof(vmh)) 536 return (-1); 537 538 return (0); 539 } 540 541 542 /* 543 * vcpu_exit_inout 544 * 545 * Handle all I/O exits that need to be emulated in vmd. This includes the 546 * i8253 PIT, the com1 ns8250 UART, and the MC146818 RTC/NVRAM device. 547 * 548 * Parameters: 549 * vrp: vcpu run parameters containing guest state for this exit 550 */ 551 void 552 vcpu_exit_inout(struct vm_run_params *vrp) 553 { 554 struct vm_exit *vei = vrp->vrp_exit; 555 uint8_t intr = 0xFF; 556 557 if (vei->vei.vei_rep || vei->vei.vei_string) { 558 #ifdef MMIO_DEBUG 559 log_info("%s: %s%s%s %d-byte, enc=%d, data=0x%08x, port=0x%04x", 560 __func__, 561 vei->vei.vei_rep == 0 ? "" : "REP ", 562 vei->vei.vei_dir == VEI_DIR_IN ? "IN" : "OUT", 563 vei->vei.vei_string == 0 ? "" : "S", 564 vei->vei.vei_size, vei->vei.vei_encoding, 565 vei->vei.vei_data, vei->vei.vei_port); 566 log_info("%s: ECX = 0x%llx, RDX = 0x%llx, RSI = 0x%llx", 567 __func__, 568 vei->vrs.vrs_gprs[VCPU_REGS_RCX], 569 vei->vrs.vrs_gprs[VCPU_REGS_RDX], 570 vei->vrs.vrs_gprs[VCPU_REGS_RSI]); 571 #endif /* MMIO_DEBUG */ 572 fatalx("%s: can't emulate REP prefixed IN(S)/OUT(S)", 573 __func__); 574 } 575 576 if (ioports_map[vei->vei.vei_port] != NULL) 577 intr = ioports_map[vei->vei.vei_port](vrp); 578 else if (vei->vei.vei_dir == VEI_DIR_IN) 579 set_return_data(vei, 0xFFFFFFFF); 580 581 vei->vrs.vrs_gprs[VCPU_REGS_RIP] += vei->vei.vei_insn_len; 582 583 if (intr != 0xFF) 584 vcpu_assert_irq(vrp->vrp_vm_id, vrp->vrp_vcpu_id, intr); 585 } 586 587 /* 588 * vcpu_exit 589 * 590 * Handle a vcpu exit. This function is called when it is determined that 591 * vmm(4) requires the assistance of vmd to support a particular guest 592 * exit type (eg, accessing an I/O port or device). Guest state is contained 593 * in 'vrp', and will be resent to vmm(4) on exit completion. 594 * 595 * Upon conclusion of handling the exit, the function determines if any 596 * interrupts should be injected into the guest, and asserts the proper 597 * IRQ line whose interrupt should be vectored. 598 * 599 * Parameters: 600 * vrp: vcpu run parameters containing guest state for this exit 601 * 602 * Return values: 603 * 0: the exit was handled successfully 604 * 1: an error occurred (eg, unknown exit reason passed in 'vrp') 605 */ 606 int 607 vcpu_exit(struct vm_run_params *vrp) 608 { 609 int ret; 610 611 switch (vrp->vrp_exit_reason) { 612 case VMX_EXIT_INT_WINDOW: 613 case SVM_VMEXIT_VINTR: 614 case VMX_EXIT_CPUID: 615 case VMX_EXIT_EXTINT: 616 case SVM_VMEXIT_INTR: 617 case SVM_VMEXIT_MSR: 618 case SVM_VMEXIT_CPUID: 619 /* 620 * We may be exiting to vmd to handle a pending interrupt but 621 * at the same time the last exit type may have been one of 622 * these. In this case, there's nothing extra to be done 623 * here (and falling through to the default case below results 624 * in more vmd log spam). 625 */ 626 break; 627 case SVM_VMEXIT_NPF: 628 case VMX_EXIT_EPT_VIOLATION: 629 ret = vcpu_exit_eptviolation(vrp); 630 if (ret) 631 return (ret); 632 break; 633 case VMX_EXIT_IO: 634 case SVM_VMEXIT_IOIO: 635 vcpu_exit_inout(vrp); 636 break; 637 case VMX_EXIT_HLT: 638 case SVM_VMEXIT_HLT: 639 vcpu_halt(vrp->vrp_vcpu_id); 640 break; 641 case VMX_EXIT_TRIPLE_FAULT: 642 case SVM_VMEXIT_SHUTDOWN: 643 /* reset VM */ 644 return (EAGAIN); 645 default: 646 log_debug("%s: unknown exit reason 0x%x", 647 __progname, vrp->vrp_exit_reason); 648 } 649 650 return (0); 651 } 652 653 /* 654 * vcpu_exit_eptviolation 655 * 656 * handle an EPT Violation 657 * 658 * Parameters: 659 * vrp: vcpu run parameters containing guest state for this exit 660 * 661 * Return values: 662 * 0: no action required 663 * EFAULT: a protection fault occured, kill the vm. 664 */ 665 static int 666 vcpu_exit_eptviolation(struct vm_run_params *vrp) 667 { 668 struct vm_exit *ve = vrp->vrp_exit; 669 int ret = 0; 670 #if MMIO_NOTYET 671 struct x86_insn insn; 672 uint64_t va, pa; 673 size_t len = 15; /* Max instruction length in x86. */ 674 #endif /* MMIO_NOTYET */ 675 switch (ve->vee.vee_fault_type) { 676 case VEE_FAULT_HANDLED: 677 break; 678 679 #if MMIO_NOTYET 680 case VEE_FAULT_MMIO_ASSIST: 681 /* Intel VMX might give us the length of the instruction. */ 682 if (ve->vee.vee_insn_info & VEE_LEN_VALID) 683 len = ve->vee.vee_insn_len; 684 685 if (len > 15) 686 fatalx("%s: invalid instruction length %lu", __func__, 687 len); 688 689 /* If we weren't given instruction bytes, we need to fetch. */ 690 if (!(ve->vee.vee_insn_info & VEE_BYTES_VALID)) { 691 memset(ve->vee.vee_insn_bytes, 0, 692 sizeof(ve->vee.vee_insn_bytes)); 693 va = ve->vrs.vrs_gprs[VCPU_REGS_RIP]; 694 695 /* XXX Only support instructions that fit on 1 page. */ 696 if ((va & PAGE_MASK) + len > PAGE_SIZE) { 697 log_warnx("%s: instruction might cross page " 698 "boundary", __func__); 699 ret = EINVAL; 700 break; 701 } 702 703 ret = translate_gva(ve, va, &pa, PROT_EXEC); 704 if (ret != 0) { 705 log_warnx("%s: failed gva translation", 706 __func__); 707 break; 708 } 709 710 ret = read_mem(pa, ve->vee.vee_insn_bytes, len); 711 if (ret != 0) { 712 log_warnx("%s: failed to fetch instruction " 713 "bytes from 0x%llx", __func__, pa); 714 break; 715 } 716 } 717 718 ret = insn_decode(ve, &insn); 719 if (ret == 0) 720 ret = insn_emulate(ve, &insn); 721 break; 722 #endif /* MMIO_NOTYET */ 723 724 case VEE_FAULT_PROTECT: 725 log_debug("%s: EPT Violation: rip=0x%llx", __progname, 726 ve->vrs.vrs_gprs[VCPU_REGS_RIP]); 727 ret = EFAULT; 728 break; 729 730 default: 731 fatalx("%s: invalid fault_type %d", __progname, 732 ve->vee.vee_fault_type); 733 /* UNREACHED */ 734 } 735 736 return (ret); 737 } 738 739 /* 740 * vcpu_exit_pci 741 * 742 * Handle all I/O to the emulated PCI subsystem. 743 * 744 * Parameters: 745 * vrp: vcpu run parameters containing guest state for this exit 746 * 747 * Return value: 748 * Interrupt to inject to the guest VM, or 0xFF if no interrupt should 749 * be injected. 750 */ 751 uint8_t 752 vcpu_exit_pci(struct vm_run_params *vrp) 753 { 754 struct vm_exit *vei = vrp->vrp_exit; 755 uint8_t intr; 756 757 intr = 0xFF; 758 759 switch (vei->vei.vei_port) { 760 case PCI_MODE1_ADDRESS_REG: 761 pci_handle_address_reg(vrp); 762 break; 763 case PCI_MODE1_DATA_REG: 764 case PCI_MODE1_DATA_REG + 1: 765 case PCI_MODE1_DATA_REG + 2: 766 case PCI_MODE1_DATA_REG + 3: 767 pci_handle_data_reg(vrp); 768 break; 769 case VM_PCI_IO_BAR_BASE ... VM_PCI_IO_BAR_END: 770 intr = pci_handle_io(vrp); 771 break; 772 default: 773 log_warnx("%s: unknown PCI register 0x%llx", 774 __progname, (uint64_t)vei->vei.vei_port); 775 break; 776 } 777 778 return (intr); 779 } 780 781 /* 782 * find_gpa_range 783 * 784 * Search for a contiguous guest physical mem range. 785 * 786 * Parameters: 787 * vcp: VM create parameters that contain the memory map to search in 788 * gpa: the starting guest physical address 789 * len: the length of the memory range 790 * 791 * Return values: 792 * NULL: on failure if there is no memory range as described by the parameters 793 * Pointer to vm_mem_range that contains the start of the range otherwise. 794 */ 795 static struct vm_mem_range * 796 find_gpa_range(struct vm_create_params *vcp, paddr_t gpa, size_t len) 797 { 798 size_t i, n; 799 struct vm_mem_range *vmr; 800 801 /* Find the first vm_mem_range that contains gpa */ 802 for (i = 0; i < vcp->vcp_nmemranges; i++) { 803 vmr = &vcp->vcp_memranges[i]; 804 if (gpa < vmr->vmr_gpa + vmr->vmr_size) 805 break; 806 } 807 808 /* No range found. */ 809 if (i == vcp->vcp_nmemranges) 810 return (NULL); 811 812 /* 813 * vmr may cover the range [gpa, gpa + len) only partly. Make 814 * sure that the following vm_mem_ranges are contiguous and 815 * cover the rest. 816 */ 817 n = vmr->vmr_size - (gpa - vmr->vmr_gpa); 818 if (len < n) 819 len = 0; 820 else 821 len -= n; 822 gpa = vmr->vmr_gpa + vmr->vmr_size; 823 for (i = i + 1; len != 0 && i < vcp->vcp_nmemranges; i++) { 824 vmr = &vcp->vcp_memranges[i]; 825 if (gpa != vmr->vmr_gpa) 826 return (NULL); 827 if (len <= vmr->vmr_size) 828 len = 0; 829 else 830 len -= vmr->vmr_size; 831 832 gpa = vmr->vmr_gpa + vmr->vmr_size; 833 } 834 835 if (len != 0) 836 return (NULL); 837 838 return (vmr); 839 } 840 /* 841 * write_mem 842 * 843 * Copies data from 'buf' into the guest VM's memory at paddr 'dst'. 844 * 845 * Parameters: 846 * dst: the destination paddr_t in the guest VM 847 * buf: data to copy (or NULL to zero the data) 848 * len: number of bytes to copy 849 * 850 * Return values: 851 * 0: success 852 * EINVAL: if the guest physical memory range [dst, dst + len) does not 853 * exist in the guest. 854 */ 855 int 856 write_mem(paddr_t dst, const void *buf, size_t len) 857 { 858 const char *from = buf; 859 char *to; 860 size_t n, off; 861 struct vm_mem_range *vmr; 862 863 vmr = find_gpa_range(¤t_vm->vm_params.vmc_params, dst, len); 864 if (vmr == NULL) { 865 errno = EINVAL; 866 log_warn("%s: failed - invalid memory range dst = 0x%lx, " 867 "len = 0x%zx", __func__, dst, len); 868 return (EINVAL); 869 } 870 871 off = dst - vmr->vmr_gpa; 872 while (len != 0) { 873 n = vmr->vmr_size - off; 874 if (len < n) 875 n = len; 876 877 to = (char *)vmr->vmr_va + off; 878 if (buf == NULL) 879 memset(to, 0, n); 880 else { 881 memcpy(to, from, n); 882 from += n; 883 } 884 len -= n; 885 off = 0; 886 vmr++; 887 } 888 889 return (0); 890 } 891 892 /* 893 * read_mem 894 * 895 * Reads memory at guest paddr 'src' into 'buf'. 896 * 897 * Parameters: 898 * src: the source paddr_t in the guest VM to read from. 899 * buf: destination (local) buffer 900 * len: number of bytes to read 901 * 902 * Return values: 903 * 0: success 904 * EINVAL: if the guest physical memory range [dst, dst + len) does not 905 * exist in the guest. 906 */ 907 int 908 read_mem(paddr_t src, void *buf, size_t len) 909 { 910 char *from, *to = buf; 911 size_t n, off; 912 struct vm_mem_range *vmr; 913 914 vmr = find_gpa_range(¤t_vm->vm_params.vmc_params, src, len); 915 if (vmr == NULL) { 916 errno = EINVAL; 917 log_warn("%s: failed - invalid memory range src = 0x%lx, " 918 "len = 0x%zx", __func__, src, len); 919 return (EINVAL); 920 } 921 922 off = src - vmr->vmr_gpa; 923 while (len != 0) { 924 n = vmr->vmr_size - off; 925 if (len < n) 926 n = len; 927 928 from = (char *)vmr->vmr_va + off; 929 memcpy(to, from, n); 930 931 to += n; 932 len -= n; 933 off = 0; 934 vmr++; 935 } 936 937 return (0); 938 } 939 940 /* 941 * hvaddr_mem 942 * 943 * Translate a guest physical address to a host virtual address, checking the 944 * provided memory range length to confirm it's contiguous within the same 945 * guest memory range (vm_mem_range). 946 * 947 * Parameters: 948 * gpa: guest physical address to translate 949 * len: number of bytes in the intended range 950 * 951 * Return values: 952 * void* to host virtual memory on success 953 * NULL on error, setting errno to: 954 * EFAULT: gpa falls outside guest memory ranges 955 * EINVAL: requested len extends beyond memory range 956 */ 957 void * 958 hvaddr_mem(paddr_t gpa, size_t len) 959 { 960 struct vm_mem_range *vmr; 961 size_t off; 962 963 vmr = find_gpa_range(¤t_vm->vm_params.vmc_params, gpa, len); 964 if (vmr == NULL) { 965 log_warnx("%s: failed - invalid gpa: 0x%lx\n", __func__, gpa); 966 errno = EFAULT; 967 return (NULL); 968 } 969 970 off = gpa - vmr->vmr_gpa; 971 if (len > (vmr->vmr_size - off)) { 972 log_warnx("%s: failed - invalid memory range: gpa=0x%lx, " 973 "len=%zu", __func__, gpa, len); 974 errno = EINVAL; 975 return (NULL); 976 } 977 978 return ((char *)vmr->vmr_va + off); 979 } 980 981 /* 982 * vcpu_assert_irq 983 * 984 * Injects the specified IRQ on the supplied vcpu/vm 985 * 986 * Parameters: 987 * vm_id: VM ID to inject to 988 * vcpu_id: VCPU ID to inject to 989 * irq: IRQ to inject 990 */ 991 void 992 vcpu_assert_irq(uint32_t vm_id, uint32_t vcpu_id, int irq) 993 { 994 i8259_assert_irq(irq); 995 996 if (i8259_is_pending()) { 997 if (vcpu_intr(vm_id, vcpu_id, 1)) 998 fatalx("%s: can't assert INTR", __func__); 999 1000 vcpu_unhalt(vcpu_id); 1001 vcpu_signal_run(vcpu_id); 1002 } 1003 } 1004 1005 /* 1006 * vcpu_deassert_pic_irq 1007 * 1008 * Clears the specified IRQ on the supplied vcpu/vm 1009 * 1010 * Parameters: 1011 * vm_id: VM ID to clear in 1012 * vcpu_id: VCPU ID to clear in 1013 * irq: IRQ to clear 1014 */ 1015 void 1016 vcpu_deassert_irq(uint32_t vm_id, uint32_t vcpu_id, int irq) 1017 { 1018 i8259_deassert_irq(irq); 1019 1020 if (!i8259_is_pending()) { 1021 if (vcpu_intr(vm_id, vcpu_id, 0)) 1022 fatalx("%s: can't deassert INTR for vm_id %d, " 1023 "vcpu_id %d", __func__, vm_id, vcpu_id); 1024 } 1025 } 1026 /* 1027 * set_return_data 1028 * 1029 * Utility function for manipulating register data in vm exit info structs. This 1030 * function ensures that the data is copied to the vei->vei.vei_data field with 1031 * the proper size for the operation being performed. 1032 * 1033 * Parameters: 1034 * vei: exit information 1035 * data: return data 1036 */ 1037 void 1038 set_return_data(struct vm_exit *vei, uint32_t data) 1039 { 1040 switch (vei->vei.vei_size) { 1041 case 1: 1042 vei->vei.vei_data &= ~0xFF; 1043 vei->vei.vei_data |= (uint8_t)data; 1044 break; 1045 case 2: 1046 vei->vei.vei_data &= ~0xFFFF; 1047 vei->vei.vei_data |= (uint16_t)data; 1048 break; 1049 case 4: 1050 vei->vei.vei_data = data; 1051 break; 1052 } 1053 } 1054 1055 /* 1056 * get_input_data 1057 * 1058 * Utility function for manipulating register data in vm exit info 1059 * structs. This function ensures that the data is copied from the 1060 * vei->vei.vei_data field with the proper size for the operation being 1061 * performed. 1062 * 1063 * Parameters: 1064 * vei: exit information 1065 * data: location to store the result 1066 */ 1067 void 1068 get_input_data(struct vm_exit *vei, uint32_t *data) 1069 { 1070 switch (vei->vei.vei_size) { 1071 case 1: 1072 *data &= 0xFFFFFF00; 1073 *data |= (uint8_t)vei->vei.vei_data; 1074 break; 1075 case 2: 1076 *data &= 0xFFFF0000; 1077 *data |= (uint16_t)vei->vei.vei_data; 1078 break; 1079 case 4: 1080 *data = vei->vei.vei_data; 1081 break; 1082 default: 1083 log_warnx("%s: invalid i/o size %d", __func__, 1084 vei->vei.vei_size); 1085 } 1086 1087 } 1088 1089 /* 1090 * translate_gva 1091 * 1092 * Translates a guest virtual address to a guest physical address by walking 1093 * the currently active page table (if needed). 1094 * 1095 * XXX ensure translate_gva updates the A bit in the PTE 1096 * XXX ensure translate_gva respects segment base and limits in i386 mode 1097 * XXX ensure translate_gva respects segment wraparound in i8086 mode 1098 * XXX ensure translate_gva updates the A bit in the segment selector 1099 * XXX ensure translate_gva respects CR4.LMSLE if available 1100 * 1101 * Parameters: 1102 * exit: The VCPU this translation should be performed for (guest MMU settings 1103 * are gathered from this VCPU) 1104 * va: virtual address to translate 1105 * pa: pointer to paddr_t variable that will receive the translated physical 1106 * address. 'pa' is unchanged on error. 1107 * mode: one of PROT_READ, PROT_WRITE, PROT_EXEC indicating the mode in which 1108 * the address should be translated 1109 * 1110 * Return values: 1111 * 0: the address was successfully translated - 'pa' contains the physical 1112 * address currently mapped by 'va'. 1113 * EFAULT: the PTE for 'VA' is unmapped. A #PF will be injected in this case 1114 * and %cr2 set in the vcpu structure. 1115 * EINVAL: an error occurred reading paging table structures 1116 */ 1117 int 1118 translate_gva(struct vm_exit* exit, uint64_t va, uint64_t* pa, int mode) 1119 { 1120 int level, shift, pdidx; 1121 uint64_t pte, pt_paddr, pte_paddr, mask, low_mask, high_mask; 1122 uint64_t shift_width, pte_size; 1123 struct vcpu_reg_state *vrs; 1124 1125 vrs = &exit->vrs; 1126 1127 if (!pa) 1128 return (EINVAL); 1129 1130 if (!(vrs->vrs_crs[VCPU_REGS_CR0] & CR0_PG)) { 1131 log_debug("%s: unpaged, va=pa=0x%llx", __func__, va); 1132 *pa = va; 1133 return (0); 1134 } 1135 1136 pt_paddr = vrs->vrs_crs[VCPU_REGS_CR3]; 1137 1138 log_debug("%s: guest %%cr0=0x%llx, %%cr3=0x%llx", __func__, 1139 vrs->vrs_crs[VCPU_REGS_CR0], vrs->vrs_crs[VCPU_REGS_CR3]); 1140 1141 if (vrs->vrs_crs[VCPU_REGS_CR0] & CR0_PE) { 1142 if (vrs->vrs_crs[VCPU_REGS_CR4] & CR4_PAE) { 1143 pte_size = sizeof(uint64_t); 1144 shift_width = 9; 1145 1146 if (vrs->vrs_msrs[VCPU_REGS_EFER] & EFER_LMA) { 1147 /* 4 level paging */ 1148 level = 4; 1149 mask = L4_MASK; 1150 shift = L4_SHIFT; 1151 } else { 1152 /* 32 bit with PAE paging */ 1153 level = 3; 1154 mask = L3_MASK; 1155 shift = L3_SHIFT; 1156 } 1157 } else { 1158 /* 32 bit paging */ 1159 level = 2; 1160 shift_width = 10; 1161 mask = 0xFFC00000; 1162 shift = 22; 1163 pte_size = sizeof(uint32_t); 1164 } 1165 } else 1166 return (EINVAL); 1167 1168 /* XXX: Check for R bit in segment selector and set A bit */ 1169 1170 for (;level > 0; level--) { 1171 pdidx = (va & mask) >> shift; 1172 pte_paddr = (pt_paddr) + (pdidx * pte_size); 1173 1174 log_debug("%s: read pte level %d @ GPA 0x%llx", __func__, 1175 level, pte_paddr); 1176 if (read_mem(pte_paddr, &pte, pte_size)) { 1177 log_warn("%s: failed to read pte", __func__); 1178 return (EFAULT); 1179 } 1180 1181 log_debug("%s: PTE @ 0x%llx = 0x%llx", __func__, pte_paddr, 1182 pte); 1183 1184 /* XXX: Set CR2 */ 1185 if (!(pte & PG_V)) 1186 return (EFAULT); 1187 1188 /* XXX: Check for SMAP */ 1189 if ((mode == PROT_WRITE) && !(pte & PG_RW)) 1190 return (EPERM); 1191 1192 if ((exit->cpl > 0) && !(pte & PG_u)) 1193 return (EPERM); 1194 1195 pte = pte | PG_U; 1196 if (mode == PROT_WRITE) 1197 pte = pte | PG_M; 1198 if (write_mem(pte_paddr, &pte, pte_size)) { 1199 log_warn("%s: failed to write back flags to pte", 1200 __func__); 1201 return (EIO); 1202 } 1203 1204 /* XXX: EINVAL if in 32bit and PG_PS is 1 but CR4.PSE is 0 */ 1205 if (pte & PG_PS) 1206 break; 1207 1208 if (level > 1) { 1209 pt_paddr = pte & PG_FRAME; 1210 shift -= shift_width; 1211 mask = mask >> shift_width; 1212 } 1213 } 1214 1215 low_mask = (1 << shift) - 1; 1216 high_mask = (((uint64_t)1ULL << ((pte_size * 8) - 1)) - 1) ^ low_mask; 1217 *pa = (pte & high_mask) | (va & low_mask); 1218 1219 log_debug("%s: final GPA for GVA 0x%llx = 0x%llx\n", __func__, va, *pa); 1220 1221 return (0); 1222 } 1223 1224 int 1225 intr_pending(struct vmd_vm *vm) 1226 { 1227 /* XXX select active interrupt controller */ 1228 return i8259_is_pending(); 1229 } 1230 1231 int 1232 intr_ack(struct vmd_vm *vm) 1233 { 1234 /* XXX select active interrupt controller */ 1235 return i8259_ack(); 1236 } 1237 1238 void 1239 intr_toggle_el(struct vmd_vm *vm, int irq, int val) 1240 { 1241 /* XXX select active interrupt controller */ 1242 pic_set_elcr(irq, val); 1243 } 1244 1245 int 1246 vmd_check_vmh(struct vm_dump_header *vmh) 1247 { 1248 int i; 1249 unsigned int code, leaf; 1250 unsigned int a, b, c, d; 1251 1252 if (strncmp(vmh->vmh_signature, VM_DUMP_SIGNATURE, strlen(VM_DUMP_SIGNATURE)) != 0) { 1253 log_warnx("%s: incompatible dump signature", __func__); 1254 return (-1); 1255 } 1256 1257 if (vmh->vmh_version != VM_DUMP_VERSION) { 1258 log_warnx("%s: incompatible dump version", __func__); 1259 return (-1); 1260 } 1261 1262 for (i = 0; i < VM_DUMP_HEADER_CPUID_COUNT; i++) { 1263 code = vmh->vmh_cpuids[i].code; 1264 leaf = vmh->vmh_cpuids[i].leaf; 1265 if (leaf != 0x00) { 1266 log_debug("%s: invalid leaf 0x%x for code 0x%x", 1267 __func__, leaf, code); 1268 return (-1); 1269 } 1270 1271 switch (code) { 1272 case 0x00: 1273 CPUID_LEAF(code, leaf, a, b, c, d); 1274 if (vmh->vmh_cpuids[i].a > a) { 1275 log_debug("%s: incompatible cpuid level", 1276 __func__); 1277 return (-1); 1278 } 1279 if (!(vmh->vmh_cpuids[i].b == b && 1280 vmh->vmh_cpuids[i].c == c && 1281 vmh->vmh_cpuids[i].d == d)) { 1282 log_debug("%s: incompatible cpu brand", 1283 __func__); 1284 return (-1); 1285 } 1286 break; 1287 1288 case 0x01: 1289 CPUID_LEAF(code, leaf, a, b, c, d); 1290 if ((vmh->vmh_cpuids[i].c & c & VMM_CPUIDECX_MASK) != 1291 (vmh->vmh_cpuids[i].c & VMM_CPUIDECX_MASK)) { 1292 log_debug("%s: incompatible cpu features " 1293 "code: 0x%x leaf: 0x%x reg: c", __func__, 1294 code, leaf); 1295 return (-1); 1296 } 1297 if ((vmh->vmh_cpuids[i].d & d & VMM_CPUIDEDX_MASK) != 1298 (vmh->vmh_cpuids[i].d & VMM_CPUIDEDX_MASK)) { 1299 log_debug("%s: incompatible cpu features " 1300 "code: 0x%x leaf: 0x%x reg: d", __func__, 1301 code, leaf); 1302 return (-1); 1303 } 1304 break; 1305 1306 case 0x07: 1307 CPUID_LEAF(code, leaf, a, b, c, d); 1308 if ((vmh->vmh_cpuids[i].b & b & VMM_SEFF0EBX_MASK) != 1309 (vmh->vmh_cpuids[i].b & VMM_SEFF0EBX_MASK)) { 1310 log_debug("%s: incompatible cpu features " 1311 "code: 0x%x leaf: 0x%x reg: c", __func__, 1312 code, leaf); 1313 return (-1); 1314 } 1315 if ((vmh->vmh_cpuids[i].c & c & VMM_SEFF0ECX_MASK) != 1316 (vmh->vmh_cpuids[i].c & VMM_SEFF0ECX_MASK)) { 1317 log_debug("%s: incompatible cpu features " 1318 "code: 0x%x leaf: 0x%x reg: d", __func__, 1319 code, leaf); 1320 return (-1); 1321 } 1322 break; 1323 1324 case 0x0d: 1325 CPUID_LEAF(code, leaf, a, b, c, d); 1326 if (vmh->vmh_cpuids[i].b > b) { 1327 log_debug("%s: incompatible cpu: insufficient " 1328 "max save area for enabled XCR0 features", 1329 __func__); 1330 return (-1); 1331 } 1332 if (vmh->vmh_cpuids[i].c > c) { 1333 log_debug("%s: incompatible cpu: insufficient " 1334 "max save area for supported XCR0 features", 1335 __func__); 1336 return (-1); 1337 } 1338 break; 1339 1340 case 0x80000001: 1341 CPUID_LEAF(code, leaf, a, b, c, d); 1342 if ((vmh->vmh_cpuids[i].a & a) != 1343 vmh->vmh_cpuids[i].a) { 1344 log_debug("%s: incompatible cpu features " 1345 "code: 0x%x leaf: 0x%x reg: a", __func__, 1346 code, leaf); 1347 return (-1); 1348 } 1349 if ((vmh->vmh_cpuids[i].c & c) != 1350 vmh->vmh_cpuids[i].c) { 1351 log_debug("%s: incompatible cpu features " 1352 "code: 0x%x leaf: 0x%x reg: c", __func__, 1353 code, leaf); 1354 return (-1); 1355 } 1356 if ((vmh->vmh_cpuids[i].d & d) != 1357 vmh->vmh_cpuids[i].d) { 1358 log_debug("%s: incompatible cpu features " 1359 "code: 0x%x leaf: 0x%x reg: d", __func__, 1360 code, leaf); 1361 return (-1); 1362 } 1363 break; 1364 1365 default: 1366 log_debug("%s: unknown code 0x%x", __func__, code); 1367 return (-1); 1368 } 1369 } 1370 1371 return (0); 1372 } 1373