1 /* $OpenBSD: vmm.c,v 1.41 2016/09/01 17:09:33 mlarkin Exp $ */ 2 3 /* 4 * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> /* nitems */ 20 #include <sys/ioctl.h> 21 #include <sys/queue.h> 22 #include <sys/wait.h> 23 #include <sys/uio.h> 24 #include <sys/socket.h> 25 #include <sys/time.h> 26 #include <sys/mman.h> 27 28 #include <dev/ic/i8253reg.h> 29 #include <dev/isa/isareg.h> 30 #include <dev/pci/pcireg.h> 31 32 #include <machine/param.h> 33 #include <machine/psl.h> 34 #include <machine/specialreg.h> 35 #include <machine/vmmvar.h> 36 37 #include <errno.h> 38 #include <event.h> 39 #include <fcntl.h> 40 #include <imsg.h> 41 #include <limits.h> 42 #include <poll.h> 43 #include <pthread.h> 44 #include <stddef.h> 45 #include <stdio.h> 46 #include <stdlib.h> 47 #include <string.h> 48 #include <unistd.h> 49 #include <util.h> 50 51 #include "vmd.h" 52 #include "vmm.h" 53 #include "loadfile.h" 54 #include "pci.h" 55 #include "virtio.h" 56 #include "proc.h" 57 #include "i8253.h" 58 #include "i8259.h" 59 #include "ns8250.h" 60 #include "mc146818.h" 61 62 io_fn_t ioports_map[MAX_PORTS]; 63 64 void vmm_sighdlr(int, short, void *); 65 int start_client_vmd(void); 66 int opentap(void); 67 int start_vm(struct imsg *, uint32_t *); 68 int terminate_vm(struct vm_terminate_params *); 69 int get_info_vm(struct privsep *, struct imsg *, int); 70 int run_vm(int *, int *, struct vm_create_params *, struct vcpu_reg_state *); 71 void *vcpu_run_loop(void *); 72 int vcpu_exit(struct vm_run_params *); 73 int vcpu_reset(uint32_t, uint32_t, struct vcpu_reg_state *); 74 void create_memory_map(struct vm_create_params *); 75 int alloc_guest_mem(struct vm_create_params *); 76 int vmm_create_vm(struct vm_create_params *); 77 void init_emulated_hw(struct vm_create_params *, int *, int *); 78 void vcpu_exit_inout(struct vm_run_params *); 79 uint8_t vcpu_exit_pci(struct vm_run_params *); 80 int vmm_dispatch_parent(int, struct privsep_proc *, struct imsg *); 81 void vmm_run(struct privsep *, struct privsep_proc *, void *); 82 int vcpu_pic_intr(uint32_t, uint32_t, uint8_t); 83 84 static struct vm_mem_range *find_gpa_range(struct vm_create_params *, paddr_t, 85 size_t); 86 87 int con_fd; 88 struct vmd_vm *current_vm; 89 90 extern struct vmd *env; 91 92 extern char *__progname; 93 94 pthread_cond_t vcpu_run_cond[VMM_MAX_VCPUS_PER_VM]; 95 pthread_mutex_t vcpu_run_mtx[VMM_MAX_VCPUS_PER_VM]; 96 uint8_t vcpu_hlt[VMM_MAX_VCPUS_PER_VM]; 97 98 static struct privsep_proc procs[] = { 99 { "parent", PROC_PARENT, vmm_dispatch_parent }, 100 }; 101 102 /* 103 * Represents a standard register set for an OS to be booted 104 * as a flat 32 bit address space, before paging is enabled. 105 * 106 * NOT set here are: 107 * RIP 108 * RSP 109 * GDTR BASE 110 * 111 * Specific bootloaders should clone this structure and override 112 * those fields as needed. 113 * 114 * Note - CR3 and various bits in CR0 may be overridden by vmm(4) based on 115 * features of the CPU in use. 116 */ 117 static const struct vcpu_reg_state vcpu_init_flat32 = { 118 .vrs_gprs[VCPU_REGS_RFLAGS] = 0x2, 119 .vrs_gprs[VCPU_REGS_RIP] = 0x0, 120 .vrs_gprs[VCPU_REGS_RSP] = 0x0, 121 .vrs_crs[VCPU_REGS_CR0] = CR0_CD | CR0_NW | CR0_ET | CR0_PE | CR0_PG, 122 .vrs_crs[VCPU_REGS_CR3] = PML4_PAGE, 123 .vrs_sregs[VCPU_REGS_CS] = { 0x8, 0xFFFFFFFF, 0xC09F, 0x0}, 124 .vrs_sregs[VCPU_REGS_DS] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0}, 125 .vrs_sregs[VCPU_REGS_ES] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0}, 126 .vrs_sregs[VCPU_REGS_FS] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0}, 127 .vrs_sregs[VCPU_REGS_GS] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0}, 128 .vrs_sregs[VCPU_REGS_SS] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0}, 129 .vrs_gdtr = { 0x0, 0xFFFF, 0x0, 0x0}, 130 .vrs_idtr = { 0x0, 0xFFFF, 0x0, 0x0}, 131 .vrs_sregs[VCPU_REGS_LDTR] = { 0x0, 0xFFFF, 0x0082, 0x0}, 132 .vrs_sregs[VCPU_REGS_TR] = { 0x0, 0xFFFF, 0x008B, 0x0}, 133 }; 134 135 pid_t 136 vmm(struct privsep *ps, struct privsep_proc *p) 137 { 138 return (proc_run(ps, p, procs, nitems(procs), vmm_run, NULL)); 139 } 140 141 void 142 vmm_run(struct privsep *ps, struct privsep_proc *p, void *arg) 143 { 144 if (config_init(ps->ps_env) == -1) 145 fatal("failed to initialize configuration"); 146 147 signal_del(&ps->ps_evsigchld); 148 signal_set(&ps->ps_evsigchld, SIGCHLD, vmm_sighdlr, ps); 149 signal_add(&ps->ps_evsigchld, NULL); 150 151 #if 0 152 /* 153 * pledge in the vmm process: 154 * stdio - for malloc and basic I/O including events. 155 * vmm - for the vmm ioctls and operations. 156 * proc - for forking and maitaining vms. 157 * recvfd - for disks, interfaces and other fds. 158 */ 159 /* XXX'ed pledge to hide it from grep as long as it's disabled */ 160 if (XXX("stdio vmm recvfd proc", NULL) == -1) 161 fatal("pledge"); 162 #endif 163 164 /* Get and terminate all running VMs */ 165 get_info_vm(ps, NULL, 1); 166 } 167 168 int 169 vmm_dispatch_parent(int fd, struct privsep_proc *p, struct imsg *imsg) 170 { 171 struct privsep *ps = p->p_ps; 172 int res = 0, cmd = 0; 173 struct vm_create_params vcp; 174 struct vm_terminate_params vtp; 175 struct vmop_result vmr; 176 uint32_t id = 0; 177 struct vmd_vm *vm; 178 179 switch (imsg->hdr.type) { 180 case IMSG_VMDOP_START_VM_REQUEST: 181 IMSG_SIZE_CHECK(imsg, &vcp); 182 memcpy(&vcp, imsg->data, sizeof(vcp)); 183 res = config_getvm(ps, &vcp, imsg->fd, imsg->hdr.peerid); 184 if (res == -1) { 185 res = errno; 186 cmd = IMSG_VMDOP_START_VM_RESPONSE; 187 } 188 break; 189 case IMSG_VMDOP_START_VM_DISK: 190 res = config_getdisk(ps, imsg); 191 if (res == -1) { 192 res = errno; 193 cmd = IMSG_VMDOP_START_VM_RESPONSE; 194 } 195 break; 196 case IMSG_VMDOP_START_VM_IF: 197 res = config_getif(ps, imsg); 198 if (res == -1) { 199 res = errno; 200 cmd = IMSG_VMDOP_START_VM_RESPONSE; 201 } 202 break; 203 case IMSG_VMDOP_START_VM_END: 204 res = start_vm(imsg, &id); 205 cmd = IMSG_VMDOP_START_VM_RESPONSE; 206 break; 207 case IMSG_VMDOP_TERMINATE_VM_REQUEST: 208 IMSG_SIZE_CHECK(imsg, &vtp); 209 memcpy(&vtp, imsg->data, sizeof(vtp)); 210 id = vtp.vtp_vm_id; 211 res = terminate_vm(&vtp); 212 cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; 213 if (res == 0) { 214 /* Remove local reference */ 215 vm = vm_getbyid(id); 216 vm_remove(vm); 217 } 218 break; 219 case IMSG_VMDOP_GET_INFO_VM_REQUEST: 220 res = get_info_vm(ps, imsg, 0); 221 cmd = IMSG_VMDOP_GET_INFO_VM_END_DATA; 222 break; 223 case IMSG_CTL_RESET: 224 config_getreset(env, imsg); 225 break; 226 default: 227 return (-1); 228 } 229 230 switch (cmd) { 231 case 0: 232 break; 233 case IMSG_VMDOP_START_VM_RESPONSE: 234 if (res != 0) { 235 vm = vm_getbyvmid(imsg->hdr.peerid); 236 vm_remove(vm); 237 } 238 case IMSG_VMDOP_TERMINATE_VM_RESPONSE: 239 memset(&vmr, 0, sizeof(vmr)); 240 vmr.vmr_result = res; 241 vmr.vmr_id = id; 242 if (proc_compose_imsg(ps, PROC_PARENT, -1, cmd, 243 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 244 return (-1); 245 break; 246 default: 247 if (proc_compose_imsg(ps, PROC_PARENT, -1, cmd, 248 imsg->hdr.peerid, -1, &res, sizeof(res)) == -1) 249 return (-1); 250 break; 251 } 252 253 return (0); 254 } 255 256 void 257 vmm_sighdlr(int sig, short event, void *arg) 258 { 259 struct privsep *ps = arg; 260 int status; 261 uint32_t vmid; 262 pid_t pid; 263 struct vmop_result vmr; 264 struct vmd_vm *vm; 265 struct vm_terminate_params vtp; 266 267 switch (sig) { 268 case SIGCHLD: 269 do { 270 pid = waitpid(-1, &status, WNOHANG); 271 if (pid <= 0) 272 continue; 273 274 if (WIFEXITED(status) || WIFSIGNALED(status)) { 275 vm = vm_getbypid(pid); 276 if (vm == NULL) { 277 /* 278 * If the VM is gone already, it 279 * got terminated via a 280 * IMSG_VMDOP_TERMINATE_VM_REQUEST. 281 */ 282 continue; 283 } 284 285 vmid = vm->vm_params.vcp_id; 286 vtp.vtp_vm_id = vmid; 287 if (terminate_vm(&vtp) == 0) { 288 memset(&vmr, 0, sizeof(vmr)); 289 vmr.vmr_result = 0; 290 vmr.vmr_id = vmid; 291 vm_remove(vm); 292 if (proc_compose_imsg(ps, PROC_PARENT, 293 -1, IMSG_VMDOP_TERMINATE_VM_EVENT, 294 0, -1, &vmr, sizeof(vmr)) == -1) 295 log_warnx("could not signal " 296 "termination of VM %u to " 297 "parent", vmid); 298 } else 299 log_warnx("could not terminate VM %u", 300 vmid); 301 } else 302 fatalx("unexpected cause of SIGCHLD"); 303 } while (pid > 0 || (pid == -1 && errno == EINTR)); 304 break; 305 default: 306 fatalx("unexpected signal"); 307 } 308 } 309 310 /* 311 * vcpu_reset 312 * 313 * Requests vmm(4) to reset the VCPUs in the indicated VM to 314 * the register state provided 315 * 316 * Parameters 317 * vmid: VM ID to reset 318 * vcpu_id: VCPU ID to reset 319 * vrs: the register state to initialize 320 * 321 * Return values: 322 * 0: success 323 * !0 : ioctl to vmm(4) failed (eg, ENOENT if the supplied VM ID is not 324 * valid) 325 */ 326 int 327 vcpu_reset(uint32_t vmid, uint32_t vcpu_id, struct vcpu_reg_state *vrs) 328 { 329 struct vm_resetcpu_params vrp; 330 331 memset(&vrp, 0, sizeof(vrp)); 332 vrp.vrp_vm_id = vmid; 333 vrp.vrp_vcpu_id = vcpu_id; 334 memcpy(&vrp.vrp_init_state, vrs, sizeof(struct vcpu_reg_state)); 335 336 log_debug("%s: resetting vcpu %d for vm %d", __func__, vcpu_id, vmid); 337 338 if (ioctl(env->vmd_fd, VMM_IOC_RESETCPU, &vrp) < 0) 339 return (errno); 340 341 return (0); 342 } 343 344 /* 345 * terminate_vm 346 * 347 * Requests vmm(4) to terminate the VM whose ID is provided in the 348 * supplied vm_terminate_params structure (vtp->vtp_vm_id) 349 * 350 * Parameters 351 * vtp: vm_create_params struct containing the ID of the VM to terminate 352 * 353 * Return values: 354 * 0: success 355 * !0 : ioctl to vmm(4) failed (eg, ENOENT if the supplied VM is not 356 * valid) 357 */ 358 int 359 terminate_vm(struct vm_terminate_params *vtp) 360 { 361 if (ioctl(env->vmd_fd, VMM_IOC_TERM, vtp) < 0) 362 return (errno); 363 364 return (0); 365 } 366 367 /* 368 * opentap 369 * 370 * Opens the next available tap device, up to MAX_TAP. 371 * 372 * Returns a file descriptor to the tap node opened, or -1 if no tap 373 * devices were available. 374 */ 375 int 376 opentap(void) 377 { 378 int i, fd; 379 char path[PATH_MAX]; 380 381 for (i = 0; i < MAX_TAP; i++) { 382 snprintf(path, PATH_MAX, "/dev/tap%d", i); 383 fd = open(path, O_RDWR | O_NONBLOCK); 384 if (fd != -1) 385 return (fd); 386 } 387 388 return (-1); 389 } 390 391 /* 392 * start_vm 393 * 394 * Starts a new VM with the creation parameters supplied (in the incoming 395 * imsg->data field). This function performs a basic sanity check on the 396 * incoming parameters and then performs the following steps to complete 397 * the creation of the VM: 398 * 399 * 1. opens the VM disk image files specified in the VM creation parameters 400 * 2. opens the specified VM kernel 401 * 3. creates a VM console tty pair using openpty 402 * 4. forks, passing the file descriptors opened in steps 1-3 to the child 403 * vmd responsible for dropping privilege and running the VM's VCPU 404 * loops. 405 * 406 * Parameters: 407 * imsg: The incoming imsg body whose 'data' field is a vm_create_params 408 * struct containing the VM creation parameters. 409 * id: Returns the VM id as reported by the kernel. 410 * 411 * Return values: 412 * 0: success 413 * !0 : failure - typically an errno indicating the source of the failure 414 */ 415 int 416 start_vm(struct imsg *imsg, uint32_t *id) 417 { 418 struct vm_create_params *vcp; 419 struct vmd_vm *vm; 420 size_t i; 421 int ret = EINVAL; 422 int fds[2]; 423 struct vcpu_reg_state vrs; 424 425 if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) { 426 log_warnx("%s: can't find vm", __func__); 427 ret = ENOENT; 428 goto err; 429 } 430 vcp = &vm->vm_params; 431 432 if ((vm->vm_tty = imsg->fd) == -1) { 433 log_warnx("%s: can't get tty", __func__); 434 goto err; 435 } 436 437 if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, fds) == -1) 438 fatal("socketpair"); 439 440 /* Start child vmd for this VM (fork, chroot, drop privs) */ 441 ret = start_client_vmd(); 442 443 /* Start child failed? - cleanup and leave */ 444 if (ret == -1) { 445 log_warnx("%s: start child failed", __func__); 446 ret = EIO; 447 goto err; 448 } 449 450 if (ret > 0) { 451 /* Parent */ 452 vm->vm_pid = ret; 453 454 for (i = 0 ; i < vcp->vcp_ndisks; i++) { 455 close(vm->vm_disks[i]); 456 vm->vm_disks[i] = -1; 457 } 458 459 for (i = 0 ; i < vcp->vcp_nnics; i++) { 460 close(vm->vm_ifs[i]); 461 vm->vm_ifs[i] = -1; 462 } 463 464 close(vm->vm_kernel); 465 vm->vm_kernel = -1; 466 467 close(vm->vm_tty); 468 vm->vm_tty = -1; 469 470 /* read back the kernel-generated vm id from the child */ 471 close(fds[1]); 472 if (read(fds[0], &vcp->vcp_id, sizeof(vcp->vcp_id)) != 473 sizeof(vcp->vcp_id)) 474 fatal("read vcp id"); 475 close(fds[0]); 476 477 if (vcp->vcp_id == 0) 478 goto err; 479 480 *id = vcp->vcp_id; 481 482 return (0); 483 } else { 484 /* Child */ 485 setproctitle("%s", vcp->vcp_name); 486 log_procinit(vcp->vcp_name); 487 488 create_memory_map(vcp); 489 ret = alloc_guest_mem(vcp); 490 if (ret) { 491 errno = ret; 492 fatal("could not allocate guest memory - exiting"); 493 } 494 495 ret = vmm_create_vm(vcp); 496 current_vm = vm; 497 498 /* send back the kernel-generated vm id (0 on error) */ 499 close(fds[0]); 500 if (write(fds[1], &vcp->vcp_id, sizeof(vcp->vcp_id)) != 501 sizeof(vcp->vcp_id)) 502 fatal("write vcp id"); 503 close(fds[1]); 504 505 if (ret) { 506 errno = ret; 507 fatal("create vmm ioctl failed - exiting"); 508 } 509 510 #if 0 511 /* 512 * pledge in the vm processes: 513 * stdio - for malloc and basic I/O including events. 514 * vmm - for the vmm ioctls and operations. 515 */ 516 if (XXX("stdio vmm", NULL) == -1) 517 fatal("pledge"); 518 #endif 519 520 /* 521 * Set up default "flat 32 bit" register state - RIP, 522 * RSP, and GDT info will be set in bootloader 523 */ 524 memcpy(&vrs, &vcpu_init_flat32, sizeof(struct vcpu_reg_state)); 525 526 /* Load kernel image */ 527 ret = loadelf_main(vm->vm_kernel, vcp, &vrs); 528 if (ret) { 529 errno = ret; 530 fatal("failed to load kernel - exiting"); 531 } 532 533 close(vm->vm_kernel); 534 535 con_fd = vm->vm_tty; 536 if (fcntl(con_fd, F_SETFL, O_NONBLOCK) == -1) 537 fatal("failed to set nonblocking mode on console"); 538 539 /* Execute the vcpu run loop(s) for this VM */ 540 ret = run_vm(vm->vm_disks, vm->vm_ifs, vcp, &vrs); 541 542 _exit(ret != 0); 543 } 544 545 return (0); 546 547 err: 548 vm_remove(vm); 549 550 return (ret); 551 } 552 553 /* 554 * get_info_vm 555 * 556 * Returns a list of VMs known to vmm(4). 557 * 558 * Parameters: 559 * ps: the privsep context. 560 * imsg: the received imsg including the peer id. 561 * terminate: terminate the listed vm. 562 * 563 * Return values: 564 * 0: success 565 * !0 : failure (eg, ENOMEM, EIO or another error code from vmm(4) ioctl) 566 */ 567 int 568 get_info_vm(struct privsep *ps, struct imsg *imsg, int terminate) 569 { 570 int ret; 571 size_t ct, i; 572 struct vm_info_params vip; 573 struct vm_info_result *info; 574 struct vm_terminate_params vtp; 575 struct vmop_info_result vir; 576 577 /* 578 * We issue the VMM_IOC_INFO ioctl twice, once with an input 579 * buffer size of 0, which results in vmm(4) returning the 580 * number of bytes required back to us in vip.vip_size, 581 * and then we call it again after malloc'ing the required 582 * number of bytes. 583 * 584 * It is possible that we could fail a second time (eg, if 585 * another VM was created in the instant between the two 586 * ioctls, but in that case the caller can just try again 587 * as vmm(4) will return a zero-sized list in that case. 588 */ 589 vip.vip_size = 0; 590 info = NULL; 591 ret = 0; 592 memset(&vir, 0, sizeof(vir)); 593 594 /* First ioctl to see how many bytes needed (vip.vip_size) */ 595 if (ioctl(env->vmd_fd, VMM_IOC_INFO, &vip) < 0) 596 return (errno); 597 598 if (vip.vip_info_ct != 0) 599 return (EIO); 600 601 info = malloc(vip.vip_size); 602 if (info == NULL) 603 return (ENOMEM); 604 605 /* Second ioctl to get the actual list */ 606 vip.vip_info = info; 607 if (ioctl(env->vmd_fd, VMM_IOC_INFO, &vip) < 0) { 608 ret = errno; 609 free(info); 610 return (ret); 611 } 612 613 /* Return info */ 614 ct = vip.vip_size / sizeof(struct vm_info_result); 615 for (i = 0; i < ct; i++) { 616 if (terminate) { 617 vtp.vtp_vm_id = info[i].vir_id; 618 if ((ret = terminate_vm(&vtp)) != 0) 619 return (ret); 620 log_debug("%s: terminated VM %s (id %d)", __func__, 621 info[i].vir_name, info[i].vir_id); 622 continue; 623 } 624 memcpy(&vir.vir_info, &info[i], sizeof(vir.vir_info)); 625 if (proc_compose_imsg(ps, PROC_PARENT, -1, 626 IMSG_VMDOP_GET_INFO_VM_DATA, imsg->hdr.peerid, -1, 627 &vir, sizeof(vir)) == -1) 628 return (EIO); 629 } 630 free(info); 631 return (0); 632 } 633 634 635 /* 636 * start_client_vmd 637 * 638 * forks a copy of the parent vmd, chroots to VMD_USER's home, drops 639 * privileges (changes to user VMD_USER), and returns. 640 * Should the fork operation succeed, but later chroot/privsep 641 * fail, the child exits. 642 * 643 * Return values (returns to both child and parent on success): 644 * -1 : failure 645 * 0: return to child vmd returns 0 646 * !0 : return to parent vmd returns the child's pid 647 */ 648 int 649 start_client_vmd(void) 650 { 651 int child_pid; 652 653 child_pid = fork(); 654 if (child_pid < 0) 655 return (-1); 656 657 if (!child_pid) { 658 /* child, already running without privileges */ 659 return (0); 660 } 661 662 /* Parent */ 663 return (child_pid); 664 } 665 666 /* 667 * create_memory_map 668 * 669 * Sets up the guest physical memory ranges that the VM can access. 670 * 671 * Return values: 672 * nothing 673 */ 674 void 675 create_memory_map(struct vm_create_params *vcp) 676 { 677 size_t len, mem_bytes, mem_mb; 678 679 mem_mb = vcp->vcp_memranges[0].vmr_size; 680 vcp->vcp_nmemranges = 0; 681 if (mem_mb < 1 || mem_mb > VMM_MAX_VM_MEM_SIZE) 682 return; 683 684 mem_bytes = mem_mb * 1024 * 1024; 685 686 /* First memory region: 0 - LOWMEM_KB (DOS low mem) */ 687 len = LOWMEM_KB * 1024; 688 vcp->vcp_memranges[0].vmr_gpa = 0x0; 689 vcp->vcp_memranges[0].vmr_size = len; 690 mem_bytes -= len; 691 692 /* 693 * Second memory region: LOWMEM_KB - 1MB. 694 * 695 * N.B. - Normally ROMs or parts of video RAM are mapped here. 696 * We have to add this region, because some systems 697 * unconditionally write to 0xb8000 (VGA RAM), and 698 * we need to make sure that vmm(4) permits accesses 699 * to it. So allocate guest memory for it. 700 */ 701 len = 0x100000 - LOWMEM_KB * 1024; 702 vcp->vcp_memranges[1].vmr_gpa = LOWMEM_KB * 1024; 703 vcp->vcp_memranges[1].vmr_size = len; 704 mem_bytes -= len; 705 706 /* Make sure that we do not place physical memory into MMIO ranges. */ 707 if (mem_bytes > VMM_PCI_MMIO_BAR_BASE - 0x100000) 708 len = VMM_PCI_MMIO_BAR_BASE - 0x100000; 709 else 710 len = mem_bytes; 711 712 /* Third memory region: 1MB - (1MB + len) */ 713 vcp->vcp_memranges[2].vmr_gpa = 0x100000; 714 vcp->vcp_memranges[2].vmr_size = len; 715 mem_bytes -= len; 716 717 if (mem_bytes > 0) { 718 /* Fourth memory region for the remaining memory (if any) */ 719 vcp->vcp_memranges[3].vmr_gpa = VMM_PCI_MMIO_BAR_END + 1; 720 vcp->vcp_memranges[3].vmr_size = mem_bytes; 721 vcp->vcp_nmemranges = 4; 722 } else 723 vcp->vcp_nmemranges = 3; 724 } 725 726 /* 727 * alloc_guest_mem 728 * 729 * Allocates memory for the guest. 730 * Instead of doing a single allocation with one mmap(), we allocate memory 731 * separately for every range for the following reasons: 732 * - ASLR for the individual ranges 733 * - to reduce memory consumption in the UVM subsystem: if vmm(4) had to 734 * map the single mmap'd userspace memory to the individual guest physical 735 * memory ranges, the underlying amap of the single mmap'd range would have 736 * to allocate per-page reference counters. The reason is that the 737 * individual guest physical ranges would reference the single mmap'd region 738 * only partially. However, if every guest physical range has its own 739 * corresponding mmap'd userspace allocation, there are no partial 740 * references: every guest physical range fully references an mmap'd 741 * range => no per-page reference counters have to be allocated. 742 * 743 * Return values: 744 * 0: success 745 * !0: failure - errno indicating the source of the failure 746 */ 747 int 748 alloc_guest_mem(struct vm_create_params *vcp) 749 { 750 void *p; 751 int ret; 752 size_t i, j; 753 struct vm_mem_range *vmr; 754 755 for (i = 0; i < vcp->vcp_nmemranges; i++) { 756 vmr = &vcp->vcp_memranges[i]; 757 p = mmap(NULL, vmr->vmr_size, PROT_READ | PROT_WRITE, 758 MAP_PRIVATE | MAP_ANON, -1, 0); 759 if (p == MAP_FAILED) { 760 ret = errno; 761 for (j = 0; j < i; j++) { 762 vmr = &vcp->vcp_memranges[j]; 763 munmap((void *)vmr->vmr_va, vmr->vmr_size); 764 } 765 766 return (ret); 767 } 768 769 vmr->vmr_va = (vaddr_t)p; 770 } 771 772 return (0); 773 } 774 775 /* 776 * vmm_create_vm 777 * 778 * Requests vmm(4) to create a new VM using the supplied creation 779 * parameters. This operation results in the creation of the in-kernel 780 * structures for the VM, but does not start the VM's vcpu(s). 781 * 782 * Parameters: 783 * vcp: vm_create_params struct containing the VM's desired creation 784 * configuration 785 * 786 * Return values: 787 * 0: success 788 * !0 : ioctl to vmm(4) failed 789 */ 790 int 791 vmm_create_vm(struct vm_create_params *vcp) 792 { 793 /* Sanity check arguments */ 794 if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM) 795 return (EINVAL); 796 797 if (vcp->vcp_nmemranges == 0 || 798 vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES) 799 return (EINVAL); 800 801 if (vcp->vcp_ndisks > VMM_MAX_DISKS_PER_VM) 802 return (EINVAL); 803 804 if (vcp->vcp_nnics > VMM_MAX_NICS_PER_VM) 805 return (EINVAL); 806 807 if (ioctl(env->vmd_fd, VMM_IOC_CREATE, vcp) < 0) 808 return (errno); 809 810 return (0); 811 } 812 813 /* 814 * init_emulated_hw 815 * 816 * Initializes the userspace hardware emulation 817 */ 818 void 819 init_emulated_hw(struct vm_create_params *vcp, int *child_disks, 820 int *child_taps) 821 { 822 int i; 823 824 /* Reset the IO port map */ 825 memset(&ioports_map, 0, sizeof(io_fn_t) * MAX_PORTS); 826 827 /* Init i8253 PIT */ 828 i8253_init(vcp->vcp_id); 829 ioports_map[TIMER_CTRL] = vcpu_exit_i8253; 830 ioports_map[TIMER_BASE + TIMER_CNTR0] = vcpu_exit_i8253; 831 ioports_map[TIMER_BASE + TIMER_CNTR1] = vcpu_exit_i8253; 832 ioports_map[TIMER_BASE + TIMER_CNTR2] = vcpu_exit_i8253; 833 834 /* Init mc146818 RTC */ 835 mc146818_init(vcp->vcp_id); 836 ioports_map[IO_RTC] = vcpu_exit_mc146818; 837 ioports_map[IO_RTC + 1] = vcpu_exit_mc146818; 838 839 /* Init master and slave PICs */ 840 i8259_init(); 841 ioports_map[IO_ICU1] = vcpu_exit_i8259; 842 ioports_map[IO_ICU1 + 1] = vcpu_exit_i8259; 843 ioports_map[IO_ICU2] = vcpu_exit_i8259; 844 ioports_map[IO_ICU2 + 1] = vcpu_exit_i8259; 845 846 /* Init ns8250 UART */ 847 ns8250_init(con_fd); 848 for (i = COM1_DATA; i <= COM1_SCR; i++) 849 ioports_map[i] = vcpu_exit_com; 850 851 /* Initialize PCI */ 852 for (i = VMM_PCI_IO_BAR_BASE; i <= VMM_PCI_IO_BAR_END; i++) 853 ioports_map[i] = vcpu_exit_pci; 854 855 ioports_map[PCI_MODE1_ADDRESS_REG] = vcpu_exit_pci; 856 ioports_map[PCI_MODE1_DATA_REG] = vcpu_exit_pci; 857 pci_init(); 858 859 /* Initialize virtio devices */ 860 virtio_init(vcp, child_disks, child_taps); 861 } 862 863 /* 864 * run_vm 865 * 866 * Runs the VM whose creation parameters are specified in vcp 867 * 868 * Parameters: 869 * child_disks: previously-opened child VM disk file file descriptors 870 * child_taps: previously-opened child tap file descriptors 871 * vcp: vm_create_params struct containing the VM's desired creation 872 * configuration 873 * vrs: VCPU register state to initialize 874 * 875 * Return values: 876 * 0: the VM exited normally 877 * !0 : the VM exited abnormally or failed to start 878 */ 879 int 880 run_vm(int *child_disks, int *child_taps, struct vm_create_params *vcp, 881 struct vcpu_reg_state *vrs) 882 { 883 size_t i; 884 int ret; 885 pthread_t *tid; 886 struct vm_run_params **vrp; 887 #if 0 888 void *exit_status; 889 #endif 890 891 if (vcp == NULL) 892 return (EINVAL); 893 894 if (child_disks == NULL && vcp->vcp_ndisks != 0) 895 return (EINVAL); 896 897 if (child_taps == NULL && vcp->vcp_nnics != 0) 898 return (EINVAL); 899 900 if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM) 901 return (EINVAL); 902 903 if (vcp->vcp_ndisks > VMM_MAX_DISKS_PER_VM) 904 return (EINVAL); 905 906 if (vcp->vcp_nnics > VMM_MAX_NICS_PER_VM) 907 return (EINVAL); 908 909 if (vcp->vcp_nmemranges == 0 || 910 vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES) 911 return (EINVAL); 912 913 ret = 0; 914 915 event_init(); 916 917 tid = calloc(vcp->vcp_ncpus, sizeof(pthread_t)); 918 vrp = calloc(vcp->vcp_ncpus, sizeof(struct vm_run_params *)); 919 if (tid == NULL || vrp == NULL) { 920 log_warn("%s: memory allocation error - exiting.", 921 __progname); 922 return (ENOMEM); 923 } 924 925 log_debug("%s: initializing hardware for vm %s", __func__, 926 vcp->vcp_name); 927 928 init_emulated_hw(vcp, child_disks, child_taps); 929 930 log_debug("%s: starting vcpu threads for vm %s", __func__, 931 vcp->vcp_name); 932 933 /* 934 * Create and launch one thread for each VCPU. These threads may 935 * migrate between PCPUs over time; the need to reload CPU state 936 * in such situations is detected and performed by vmm(4) in the 937 * kernel. 938 */ 939 for (i = 0 ; i < vcp->vcp_ncpus; i++) { 940 vrp[i] = malloc(sizeof(struct vm_run_params)); 941 if (vrp[i] == NULL) { 942 log_warn("%s: memory allocation error - " 943 "exiting.", __progname); 944 /* caller will exit, so skip free'ing */ 945 return (ENOMEM); 946 } 947 vrp[i]->vrp_exit = malloc(sizeof(union vm_exit)); 948 if (vrp[i]->vrp_exit == NULL) { 949 log_warn("%s: memory allocation error - " 950 "exiting.", __progname); 951 /* caller will exit, so skip free'ing */ 952 return (ENOMEM); 953 } 954 vrp[i]->vrp_vm_id = vcp->vcp_id; 955 vrp[i]->vrp_vcpu_id = i; 956 957 if (vcpu_reset(vcp->vcp_id, i, vrs)) { 958 log_warnx("%s: cannot reset VCPU %zu - exiting.", 959 __progname, i); 960 return (EIO); 961 } 962 963 ret = pthread_cond_init(&vcpu_run_cond[i], NULL); 964 if (ret) { 965 log_warnx("%s: cannot initialize cond var (%d)", 966 __progname, ret); 967 return (ret); 968 } 969 970 ret = pthread_mutex_init(&vcpu_run_mtx[i], NULL); 971 if (ret) { 972 log_warnx("%s: cannot initialize mtx (%d)", 973 __progname, ret); 974 return (ret); 975 } 976 977 vcpu_hlt[i] = 0; 978 979 /* Start each VCPU run thread at vcpu_run_loop */ 980 ret = pthread_create(&tid[i], NULL, vcpu_run_loop, vrp[i]); 981 if (ret) { 982 /* caller will _exit after this return */ 983 return (ret); 984 } 985 } 986 987 log_debug("%s: waiting on events for VM %s", __func__, vcp->vcp_name); 988 ret = event_dispatch(); 989 990 #if 0 991 /* XXX need to handle clean exits now */ 992 993 /* Wait for all the threads to exit */ 994 for (i = 0; i < vcp->vcp_ncpus; i++) { 995 if (pthread_join(tid[i], &exit_status)) { 996 log_warnx("%s: failed to join thread %zd - " 997 "exiting", __progname, i); 998 return (EIO); 999 } 1000 1001 if (exit_status != NULL) { 1002 log_warnx("%s: vm %d vcpu run thread %zd exited " 1003 "abnormally", __progname, vcp->vcp_id, i); 1004 ret = EIO; 1005 } 1006 } 1007 #endif 1008 1009 return (ret); 1010 } 1011 1012 /* 1013 * vcpu_run_loop 1014 * 1015 * Runs a single VCPU until vmm(4) requires help handling an exit, 1016 * or the VM terminates. 1017 * 1018 * Parameters: 1019 * arg: vcpu_run_params for the VCPU being run by this thread 1020 * 1021 * Return values: 1022 * NULL: the VCPU shutdown properly 1023 * !NULL: error processing VCPU run, or the VCPU shutdown abnormally 1024 */ 1025 void * 1026 vcpu_run_loop(void *arg) 1027 { 1028 struct vm_run_params *vrp = (struct vm_run_params *)arg; 1029 intptr_t ret; 1030 int irq; 1031 uint32_t n; 1032 1033 vrp->vrp_continue = 0; 1034 n = vrp->vrp_vcpu_id; 1035 1036 for (;;) { 1037 ret = pthread_mutex_lock(&vcpu_run_mtx[n]); 1038 1039 if (ret) { 1040 log_warnx("%s: can't lock vcpu run mtx (%d)", 1041 __func__, (int)ret); 1042 return ((void *)ret); 1043 } 1044 1045 /* If we are halted, wait */ 1046 if (vcpu_hlt[n]) { 1047 ret = pthread_cond_wait(&vcpu_run_cond[n], 1048 &vcpu_run_mtx[n]); 1049 1050 if (ret) { 1051 log_warnx("%s: can't wait on cond (%d)", 1052 __func__, (int)ret); 1053 (void)pthread_mutex_unlock(&vcpu_run_mtx[n]); 1054 return ((void *)ret); 1055 } 1056 } 1057 1058 ret = pthread_mutex_unlock(&vcpu_run_mtx[n]); 1059 if (ret) { 1060 log_warnx("%s: can't unlock mutex on cond (%d)", 1061 __func__, (int)ret); 1062 return ((void *)ret); 1063 } 1064 1065 if (vrp->vrp_irqready && i8259_is_pending()) { 1066 irq = i8259_ack(); 1067 vrp->vrp_irq = irq; 1068 } else 1069 vrp->vrp_irq = 0xFFFF; 1070 1071 /* Still more pending? */ 1072 if (i8259_is_pending()) { 1073 /* XXX can probably avoid ioctls here by providing intr in vrp */ 1074 if (vcpu_pic_intr(vrp->vrp_vm_id, vrp->vrp_vcpu_id, 1)) { 1075 fatal("can't set INTR"); 1076 } 1077 } else { 1078 if (vcpu_pic_intr(vrp->vrp_vm_id, vrp->vrp_vcpu_id, 0)) { 1079 fatal("can't clear INTR"); 1080 } 1081 } 1082 1083 if (ioctl(env->vmd_fd, VMM_IOC_RUN, vrp) < 0) { 1084 /* If run ioctl failed, exit */ 1085 ret = errno; 1086 log_warn("%s: vm %d / vcpu %d run ioctl failed", 1087 __func__, vrp->vrp_vm_id, n); 1088 return ((void *)ret); 1089 } 1090 1091 /* If the VM is terminating, exit normally */ 1092 if (vrp->vrp_exit_reason == VM_EXIT_TERMINATED) 1093 return (NULL); 1094 1095 if (vrp->vrp_exit_reason != VM_EXIT_NONE) { 1096 /* 1097 * vmm(4) needs help handling an exit, handle in 1098 * vcpu_exit. 1099 */ 1100 if (vcpu_exit(vrp)) 1101 return ((void *)EIO); 1102 } 1103 } 1104 1105 return (NULL); 1106 } 1107 1108 int 1109 vcpu_pic_intr(uint32_t vm_id, uint32_t vcpu_id, uint8_t intr) 1110 { 1111 struct vm_intr_params vip; 1112 1113 memset(&vip, 0, sizeof(vip)); 1114 1115 vip.vip_vm_id = vm_id; 1116 vip.vip_vcpu_id = vcpu_id; /* XXX always 0? */ 1117 vip.vip_intr = intr; 1118 1119 if (ioctl(env->vmd_fd, VMM_IOC_INTR, &vip) < 0) 1120 return (errno); 1121 1122 return (0); 1123 } 1124 1125 /* 1126 * vcpu_exit_pci 1127 * 1128 * Handle all I/O to the emulated PCI subsystem. 1129 * 1130 * Parameters: 1131 * vrp: vcpu run paramters containing guest state for this exit 1132 * 1133 * Return value: 1134 * Interrupt to inject to the guest VM, or 0xFF if no interrupt should 1135 * be injected. 1136 */ 1137 uint8_t 1138 vcpu_exit_pci(struct vm_run_params *vrp) 1139 { 1140 union vm_exit *vei = vrp->vrp_exit; 1141 uint8_t intr; 1142 1143 intr = 0xFF; 1144 1145 switch (vei->vei.vei_port) { 1146 case PCI_MODE1_ADDRESS_REG: 1147 pci_handle_address_reg(vrp); 1148 break; 1149 case PCI_MODE1_DATA_REG: 1150 pci_handle_data_reg(vrp); 1151 break; 1152 case VMM_PCI_IO_BAR_BASE ... VMM_PCI_IO_BAR_END: 1153 intr = pci_handle_io(vrp); 1154 break; 1155 default: 1156 log_warnx("%s: unknown PCI register 0x%llx", 1157 __progname, (uint64_t)vei->vei.vei_port); 1158 break; 1159 } 1160 1161 return (intr); 1162 } 1163 1164 /* 1165 * vcpu_exit_inout 1166 * 1167 * Handle all I/O exits that need to be emulated in vmd. This includes the 1168 * i8253 PIT, the com1 ns8250 UART, and the MC146818 RTC/NVRAM device. 1169 * 1170 * Parameters: 1171 * vrp: vcpu run parameters containing guest state for this exit 1172 */ 1173 void 1174 vcpu_exit_inout(struct vm_run_params *vrp) 1175 { 1176 union vm_exit *vei = vrp->vrp_exit; 1177 uint8_t intr = 0xFF; 1178 1179 if (ioports_map[vei->vei.vei_port] != NULL) 1180 intr = ioports_map[vei->vei.vei_port](vrp); 1181 else if (vei->vei.vei_dir == VEI_DIR_IN) 1182 vei->vei.vei_data = 0xFFFFFFFF; 1183 1184 if (intr != 0xFF) 1185 vcpu_assert_pic_irq(vrp->vrp_vm_id, vrp->vrp_vcpu_id, intr); 1186 } 1187 1188 /* 1189 * vcpu_exit 1190 * 1191 * Handle a vcpu exit. This function is called when it is determined that 1192 * vmm(4) requires the assistance of vmd to support a particular guest 1193 * exit type (eg, accessing an I/O port or device). Guest state is contained 1194 * in 'vrp', and will be resent to vmm(4) on exit completion. 1195 * 1196 * Upon conclusion of handling the exit, the function determines if any 1197 * interrupts should be injected into the guest, and asserts the proper 1198 * IRQ line whose interrupt should be vectored. 1199 * 1200 * Parameters: 1201 * vrp: vcpu run parameters containing guest state for this exit 1202 * 1203 * Return values: 1204 * 0: the exit was handled successfully 1205 * 1: an error occurred (eg, unknown exit reason passed in 'vrp') 1206 */ 1207 int 1208 vcpu_exit(struct vm_run_params *vrp) 1209 { 1210 int ret; 1211 1212 switch (vrp->vrp_exit_reason) { 1213 case VMX_EXIT_IO: 1214 vcpu_exit_inout(vrp); 1215 break; 1216 case VMX_EXIT_HLT: 1217 ret = pthread_mutex_lock(&vcpu_run_mtx[vrp->vrp_vcpu_id]); 1218 if (ret) { 1219 log_warnx("%s: can't lock vcpu mutex (%d)", 1220 __func__, ret); 1221 return (1); 1222 } 1223 vcpu_hlt[vrp->vrp_vcpu_id] = 1; 1224 ret = pthread_mutex_unlock(&vcpu_run_mtx[vrp->vrp_vcpu_id]); 1225 if (ret) { 1226 log_warnx("%s: can't unlock vcpu mutex (%d)", 1227 __func__, ret); 1228 return (1); 1229 } 1230 break; 1231 case VMX_EXIT_INT_WINDOW: 1232 break; 1233 default: 1234 log_warnx("%s: unknown exit reason %d", 1235 __progname, vrp->vrp_exit_reason); 1236 return (1); 1237 } 1238 1239 /* XXX this may not be irq 9 all the time */ 1240 /* XXX change this to poll on the tap interface */ 1241 if (vionet_process_rx()) 1242 vcpu_assert_pic_irq(vrp->vrp_vm_id, vrp->vrp_vcpu_id, 9); 1243 1244 /* XXX temporary until this is polled */ 1245 if (vcpu_com1_needs_intr()) 1246 vcpu_assert_pic_irq(vrp->vrp_vm_id, vrp->vrp_vcpu_id, 4); 1247 1248 vrp->vrp_continue = 1; 1249 1250 return (0); 1251 } 1252 1253 /* 1254 * find_gpa_range 1255 * 1256 * Search for a contiguous guest physical mem range. 1257 * 1258 * Parameters: 1259 * vcp: VM create parameters that contain the memory map to search in 1260 * gpa: the starting guest physical address 1261 * len: the length of the memory range 1262 * 1263 * Return values: 1264 * NULL: on failure if there is no memory range as described by the parameters 1265 * Pointer to vm_mem_range that contains the start of the range otherwise. 1266 */ 1267 static struct vm_mem_range * 1268 find_gpa_range(struct vm_create_params *vcp, paddr_t gpa, size_t len) 1269 { 1270 size_t i, n; 1271 struct vm_mem_range *vmr; 1272 1273 /* Find the first vm_mem_range that contains gpa */ 1274 for (i = 0; i < vcp->vcp_nmemranges; i++) { 1275 vmr = &vcp->vcp_memranges[i]; 1276 if (vmr->vmr_gpa + vmr->vmr_size >= gpa) 1277 break; 1278 } 1279 1280 /* No range found. */ 1281 if (i == vcp->vcp_nmemranges) 1282 return (NULL); 1283 1284 /* 1285 * vmr may cover the range [gpa, gpa + len) only partly. Make 1286 * sure that the following vm_mem_ranges are contiguous and 1287 * cover the rest. 1288 */ 1289 n = vmr->vmr_size - (gpa - vmr->vmr_gpa); 1290 if (len < n) 1291 len = 0; 1292 else 1293 len -= n; 1294 gpa = vmr->vmr_gpa + vmr->vmr_size; 1295 for (i = i + 1; len != 0 && i < vcp->vcp_nmemranges; i++) { 1296 vmr = &vcp->vcp_memranges[i]; 1297 if (gpa != vmr->vmr_gpa) 1298 return (NULL); 1299 if (len <= vmr->vmr_size) 1300 len = 0; 1301 else 1302 len -= vmr->vmr_size; 1303 1304 gpa = vmr->vmr_gpa + vmr->vmr_size; 1305 } 1306 1307 if (len != 0) 1308 return (NULL); 1309 1310 return (vmr); 1311 } 1312 1313 /* 1314 * write_mem 1315 * 1316 * Copies data from 'buf' into the guest VM's memory at paddr 'dst'. 1317 * 1318 * Parameters: 1319 * dst: the destination paddr_t in the guest VM 1320 * buf: data to copy 1321 * len: number of bytes to copy 1322 * 1323 * Return values: 1324 * 0: success 1325 * EINVAL: if the guest physical memory range [dst, dst + len) does not 1326 * exist in the guest. 1327 */ 1328 int 1329 write_mem(paddr_t dst, void *buf, size_t len) 1330 { 1331 char *from = buf, *to; 1332 size_t n, off; 1333 struct vm_mem_range *vmr; 1334 1335 vmr = find_gpa_range(¤t_vm->vm_params, dst, len); 1336 if (vmr == NULL) { 1337 errno = EINVAL; 1338 log_warn("%s: failed - invalid memory range dst = 0x%lx, " 1339 "len = 0x%zx", __func__, dst, len); 1340 return (EINVAL); 1341 } 1342 1343 off = dst - vmr->vmr_gpa; 1344 while (len != 0) { 1345 n = vmr->vmr_size - off; 1346 if (len < n) 1347 n = len; 1348 1349 to = (char *)vmr->vmr_va + off; 1350 memcpy(to, from, n); 1351 1352 from += n; 1353 len -= n; 1354 off = 0; 1355 vmr++; 1356 } 1357 1358 return (0); 1359 } 1360 1361 /* 1362 * read_mem 1363 * 1364 * Reads memory at guest paddr 'src' into 'buf'. 1365 * 1366 * Parameters: 1367 * src: the source paddr_t in the guest VM to read from. 1368 * buf: destination (local) buffer 1369 * len: number of bytes to read 1370 * 1371 * Return values: 1372 * 0: success 1373 * EINVAL: if the guest physical memory range [dst, dst + len) does not 1374 * exist in the guest. 1375 */ 1376 int 1377 read_mem(paddr_t src, void *buf, size_t len) 1378 { 1379 char *from, *to = buf; 1380 size_t n, off; 1381 struct vm_mem_range *vmr; 1382 1383 vmr = find_gpa_range(¤t_vm->vm_params, src, len); 1384 if (vmr == NULL) { 1385 errno = EINVAL; 1386 log_warn("%s: failed - invalid memory range src = 0x%lx, " 1387 "len = 0x%zx", __func__, src, len); 1388 return (EINVAL); 1389 } 1390 1391 off = src - vmr->vmr_gpa; 1392 while (len != 0) { 1393 n = vmr->vmr_size - off; 1394 if (len < n) 1395 n = len; 1396 1397 from = (char *)vmr->vmr_va + off; 1398 memcpy(to, from, n); 1399 1400 to += n; 1401 len -= n; 1402 off = 0; 1403 vmr++; 1404 } 1405 1406 return (0); 1407 } 1408 1409 /* 1410 * vcpu_assert_pic_irq 1411 * 1412 * Injects the specified IRQ on the supplied vcpu/vm 1413 * 1414 * Parameters: 1415 * vm_id: VM ID to inject to 1416 * vcpu_id: VCPU ID to inject to 1417 * irq: IRQ to inject 1418 */ 1419 void 1420 vcpu_assert_pic_irq(uint32_t vm_id, uint32_t vcpu_id, int irq) 1421 { 1422 int ret; 1423 1424 i8259_assert_irq(irq); 1425 1426 if (i8259_is_pending()) { 1427 if (vcpu_pic_intr(vm_id, vcpu_id, 1)) 1428 fatalx("%s: can't assert INTR", __func__); 1429 1430 ret = pthread_mutex_lock(&vcpu_run_mtx[vcpu_id]); 1431 if (ret) 1432 fatalx("%s: can't lock vcpu mtx (%d)", __func__, ret); 1433 1434 vcpu_hlt[vcpu_id] = 0; 1435 ret = pthread_cond_signal(&vcpu_run_cond[vcpu_id]); 1436 if (ret) 1437 fatalx("%s: can't signal (%d)", __func__, ret); 1438 ret = pthread_mutex_unlock(&vcpu_run_mtx[vcpu_id]); 1439 if (ret) 1440 fatalx("%s: can't unlock vcpu mtx (%d)", __func__, ret); 1441 } 1442 } 1443 1444 /* 1445 * fd_hasdata 1446 * 1447 * Determines if data can be read from a file descriptor. 1448 * 1449 * Parameters: 1450 * fd: the fd to check 1451 * 1452 * Return values: 1453 * 1 if data can be read from an fd, or 0 otherwise. 1454 */ 1455 int 1456 fd_hasdata(int fd) 1457 { 1458 struct pollfd pfd[1]; 1459 int nready, hasdata = 0; 1460 1461 pfd[0].fd = fd; 1462 pfd[0].events = POLLIN; 1463 nready = poll(pfd, 1, 0); 1464 if (nready == -1) 1465 log_warn("checking file descriptor for data failed"); 1466 else if (nready == 1 && pfd[0].revents & POLLIN) 1467 hasdata = 1; 1468 return (hasdata); 1469 } 1470