1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #include "opt_bhyve_snapshot.h" 31 32 #include <sys/param.h> 33 #include <sys/kernel.h> 34 #include <sys/jail.h> 35 #include <sys/queue.h> 36 #include <sys/lock.h> 37 #include <sys/mutex.h> 38 #include <sys/malloc.h> 39 #include <sys/conf.h> 40 #include <sys/sysctl.h> 41 #include <sys/libkern.h> 42 #include <sys/ioccom.h> 43 #include <sys/mman.h> 44 #include <sys/uio.h> 45 #include <sys/proc.h> 46 47 #include <vm/vm.h> 48 #include <vm/pmap.h> 49 #include <vm/vm_map.h> 50 #include <vm/vm_object.h> 51 52 #include <machine/vmparam.h> 53 #include <machine/vmm.h> 54 #include <machine/vmm_dev.h> 55 #include <machine/vmm_instruction_emul.h> 56 #include <machine/vmm_snapshot.h> 57 #include <x86/apicreg.h> 58 59 #include "vmm_lapic.h" 60 #include "vmm_stat.h" 61 #include "vmm_mem.h" 62 #include "io/ppt.h" 63 #include "io/vatpic.h" 64 #include "io/vioapic.h" 65 #include "io/vhpet.h" 66 #include "io/vrtc.h" 67 68 #ifdef COMPAT_FREEBSD13 69 struct vm_stats_old { 70 int cpuid; /* in */ 71 int num_entries; /* out */ 72 struct timeval tv; 73 uint64_t statbuf[MAX_VM_STATS]; 74 }; 75 76 #define VM_STATS_OLD \ 77 _IOWR('v', IOCNUM_VM_STATS, struct vm_stats_old) 78 79 struct vm_snapshot_meta_old { 80 void *ctx; /* unused */ 81 void *dev_data; 82 const char *dev_name; /* identify userspace devices */ 83 enum snapshot_req dev_req; /* identify kernel structs */ 84 85 struct vm_snapshot_buffer buffer; 86 87 enum vm_snapshot_op op; 88 }; 89 90 #define VM_SNAPSHOT_REQ_OLD \ 91 _IOWR('v', IOCNUM_SNAPSHOT_REQ, struct vm_snapshot_meta_old) 92 93 struct vm_exit_ipi_13 { 94 uint32_t mode; 95 uint8_t vector; 96 __BITSET_DEFINE(, 256) dmask; 97 }; 98 99 struct vm_exit_13 { 100 uint32_t exitcode; 101 int32_t inst_length; 102 uint64_t rip; 103 uint64_t u[120 / sizeof(uint64_t)]; 104 }; 105 106 struct vm_run_13 { 107 int cpuid; 108 struct vm_exit_13 vm_exit; 109 }; 110 111 #define VM_RUN_13 \ 112 _IOWR('v', IOCNUM_RUN, struct vm_run_13) 113 114 #endif /* COMPAT_FREEBSD13 */ 115 116 struct devmem_softc { 117 int segid; 118 char *name; 119 struct cdev *cdev; 120 struct vmmdev_softc *sc; 121 SLIST_ENTRY(devmem_softc) link; 122 }; 123 124 struct vmmdev_softc { 125 struct vm *vm; /* vm instance cookie */ 126 struct cdev *cdev; 127 struct ucred *ucred; 128 SLIST_ENTRY(vmmdev_softc) link; 129 SLIST_HEAD(, devmem_softc) devmem; 130 int flags; 131 }; 132 #define VSC_LINKED 0x01 133 134 static SLIST_HEAD(, vmmdev_softc) head; 135 136 static unsigned pr_allow_flag; 137 static struct mtx vmmdev_mtx; 138 MTX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex", MTX_DEF); 139 140 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); 141 142 SYSCTL_DECL(_hw_vmm); 143 144 static int vmm_priv_check(struct ucred *ucred); 145 static int devmem_create_cdev(const char *vmname, int id, char *devmem); 146 static void devmem_destroy(void *arg); 147 148 static int 149 vmm_priv_check(struct ucred *ucred) 150 { 151 152 if (jailed(ucred) && 153 !(ucred->cr_prison->pr_allow & pr_allow_flag)) 154 return (EPERM); 155 156 return (0); 157 } 158 159 static int 160 vcpu_lock_one(struct vcpu *vcpu) 161 { 162 return (vcpu_set_state(vcpu, VCPU_FROZEN, true)); 163 } 164 165 static void 166 vcpu_unlock_one(struct vmmdev_softc *sc, int vcpuid, struct vcpu *vcpu) 167 { 168 enum vcpu_state state; 169 170 state = vcpu_get_state(vcpu, NULL); 171 if (state != VCPU_FROZEN) { 172 panic("vcpu %s(%d) has invalid state %d", vm_name(sc->vm), 173 vcpuid, state); 174 } 175 176 vcpu_set_state(vcpu, VCPU_IDLE, false); 177 } 178 179 static int 180 vcpu_lock_all(struct vmmdev_softc *sc) 181 { 182 struct vcpu *vcpu; 183 int error; 184 uint16_t i, j, maxcpus; 185 186 error = 0; 187 vm_slock_vcpus(sc->vm); 188 maxcpus = vm_get_maxcpus(sc->vm); 189 for (i = 0; i < maxcpus; i++) { 190 vcpu = vm_vcpu(sc->vm, i); 191 if (vcpu == NULL) 192 continue; 193 error = vcpu_lock_one(vcpu); 194 if (error) 195 break; 196 } 197 198 if (error) { 199 for (j = 0; j < i; j++) { 200 vcpu = vm_vcpu(sc->vm, j); 201 if (vcpu == NULL) 202 continue; 203 vcpu_unlock_one(sc, j, vcpu); 204 } 205 vm_unlock_vcpus(sc->vm); 206 } 207 208 return (error); 209 } 210 211 static void 212 vcpu_unlock_all(struct vmmdev_softc *sc) 213 { 214 struct vcpu *vcpu; 215 uint16_t i, maxcpus; 216 217 maxcpus = vm_get_maxcpus(sc->vm); 218 for (i = 0; i < maxcpus; i++) { 219 vcpu = vm_vcpu(sc->vm, i); 220 if (vcpu == NULL) 221 continue; 222 vcpu_unlock_one(sc, i, vcpu); 223 } 224 vm_unlock_vcpus(sc->vm); 225 } 226 227 static struct vmmdev_softc * 228 vmmdev_lookup(const char *name) 229 { 230 struct vmmdev_softc *sc; 231 232 #ifdef notyet /* XXX kernel is not compiled with invariants */ 233 mtx_assert(&vmmdev_mtx, MA_OWNED); 234 #endif 235 236 SLIST_FOREACH(sc, &head, link) { 237 if (strcmp(name, vm_name(sc->vm)) == 0) 238 break; 239 } 240 241 if (sc == NULL) 242 return (NULL); 243 244 if (cr_cansee(curthread->td_ucred, sc->ucred)) 245 return (NULL); 246 247 return (sc); 248 } 249 250 static struct vmmdev_softc * 251 vmmdev_lookup2(struct cdev *cdev) 252 { 253 254 return (cdev->si_drv1); 255 } 256 257 static int 258 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) 259 { 260 int error, off, c, prot; 261 vm_paddr_t gpa, maxaddr; 262 void *hpa, *cookie; 263 struct vmmdev_softc *sc; 264 265 error = vmm_priv_check(curthread->td_ucred); 266 if (error) 267 return (error); 268 269 sc = vmmdev_lookup2(cdev); 270 if (sc == NULL) 271 return (ENXIO); 272 273 /* 274 * Get a read lock on the guest memory map. 275 */ 276 vm_slock_memsegs(sc->vm); 277 278 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); 279 maxaddr = vmm_sysmem_maxaddr(sc->vm); 280 while (uio->uio_resid > 0 && error == 0) { 281 gpa = uio->uio_offset; 282 off = gpa & PAGE_MASK; 283 c = min(uio->uio_resid, PAGE_SIZE - off); 284 285 /* 286 * The VM has a hole in its physical memory map. If we want to 287 * use 'dd' to inspect memory beyond the hole we need to 288 * provide bogus data for memory that lies in the hole. 289 * 290 * Since this device does not support lseek(2), dd(1) will 291 * read(2) blocks of data to simulate the lseek(2). 292 */ 293 hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie); 294 if (hpa == NULL) { 295 if (uio->uio_rw == UIO_READ && gpa < maxaddr) 296 error = uiomove(__DECONST(void *, zero_region), 297 c, uio); 298 else 299 error = EFAULT; 300 } else { 301 error = uiomove(hpa, c, uio); 302 vm_gpa_release(cookie); 303 } 304 } 305 vm_unlock_memsegs(sc->vm); 306 return (error); 307 } 308 309 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1); 310 311 static int 312 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) 313 { 314 struct devmem_softc *dsc; 315 int error; 316 bool sysmem; 317 318 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL); 319 if (error || mseg->len == 0) 320 return (error); 321 322 if (!sysmem) { 323 SLIST_FOREACH(dsc, &sc->devmem, link) { 324 if (dsc->segid == mseg->segid) 325 break; 326 } 327 KASSERT(dsc != NULL, ("%s: devmem segment %d not found", 328 __func__, mseg->segid)); 329 error = copystr(dsc->name, mseg->name, len, NULL); 330 } else { 331 bzero(mseg->name, len); 332 } 333 334 return (error); 335 } 336 337 static int 338 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) 339 { 340 char *name; 341 int error; 342 bool sysmem; 343 344 error = 0; 345 name = NULL; 346 sysmem = true; 347 348 /* 349 * The allocation is lengthened by 1 to hold a terminating NUL. It'll 350 * by stripped off when devfs processes the full string. 351 */ 352 if (VM_MEMSEG_NAME(mseg)) { 353 sysmem = false; 354 name = malloc(len, M_VMMDEV, M_WAITOK); 355 error = copystr(mseg->name, name, len, NULL); 356 if (error) 357 goto done; 358 } 359 360 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem); 361 if (error) 362 goto done; 363 364 if (VM_MEMSEG_NAME(mseg)) { 365 error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name); 366 if (error) 367 vm_free_memseg(sc->vm, mseg->segid); 368 else 369 name = NULL; /* freed when 'cdev' is destroyed */ 370 } 371 done: 372 free(name, M_VMMDEV); 373 return (error); 374 } 375 376 static int 377 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 378 uint64_t *regval) 379 { 380 int error, i; 381 382 error = 0; 383 for (i = 0; i < count; i++) { 384 error = vm_get_register(vcpu, regnum[i], ®val[i]); 385 if (error) 386 break; 387 } 388 return (error); 389 } 390 391 static int 392 vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 393 uint64_t *regval) 394 { 395 int error, i; 396 397 error = 0; 398 for (i = 0; i < count; i++) { 399 error = vm_set_register(vcpu, regnum[i], regval[i]); 400 if (error) 401 break; 402 } 403 return (error); 404 } 405 406 static int 407 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 408 struct thread *td) 409 { 410 int error, vcpuid, size; 411 cpuset_t *cpuset; 412 struct vmmdev_softc *sc; 413 struct vcpu *vcpu; 414 struct vm_register *vmreg; 415 struct vm_seg_desc *vmsegdesc; 416 struct vm_register_set *vmregset; 417 struct vm_run *vmrun; 418 #ifdef COMPAT_FREEBSD13 419 struct vm_run_13 *vmrun_13; 420 #endif 421 struct vm_exception *vmexc; 422 struct vm_lapic_irq *vmirq; 423 struct vm_lapic_msi *vmmsi; 424 struct vm_ioapic_irq *ioapic_irq; 425 struct vm_isa_irq *isa_irq; 426 struct vm_isa_irq_trigger *isa_irq_trigger; 427 struct vm_capability *vmcap; 428 struct vm_pptdev *pptdev; 429 struct vm_pptdev_mmio *pptmmio; 430 struct vm_pptdev_msi *pptmsi; 431 struct vm_pptdev_msix *pptmsix; 432 #ifdef COMPAT_FREEBSD13 433 struct vm_stats_old *vmstats_old; 434 #endif 435 struct vm_stats *vmstats; 436 struct vm_stat_desc *statdesc; 437 struct vm_x2apic *x2apic; 438 struct vm_gpa_pte *gpapte; 439 struct vm_suspend *vmsuspend; 440 struct vm_gla2gpa *gg; 441 struct vm_cpuset *vm_cpuset; 442 struct vm_intinfo *vmii; 443 struct vm_rtc_time *rtctime; 444 struct vm_rtc_data *rtcdata; 445 struct vm_memmap *mm; 446 struct vm_munmap *mu; 447 struct vm_cpu_topology *topology; 448 struct vm_readwrite_kernemu_device *kernemu; 449 uint64_t *regvals; 450 int *regnums; 451 enum { NONE, SINGLE, ALL } vcpus_locked; 452 bool memsegs_locked; 453 #ifdef BHYVE_SNAPSHOT 454 struct vm_snapshot_meta *snapshot_meta; 455 #ifdef COMPAT_FREEBSD13 456 struct vm_snapshot_meta_old *snapshot_old; 457 #endif 458 #endif 459 460 error = vmm_priv_check(curthread->td_ucred); 461 if (error) 462 return (error); 463 464 sc = vmmdev_lookup2(cdev); 465 if (sc == NULL) 466 return (ENXIO); 467 468 vcpuid = -1; 469 vcpu = NULL; 470 vcpus_locked = NONE; 471 memsegs_locked = false; 472 473 /* 474 * For VMM ioctls that operate on a single vCPU, lookup the 475 * vcpu. For VMM ioctls which require one or more vCPUs to 476 * not be running, lock necessary vCPUs. 477 * 478 * XXX fragile, handle with care 479 * Most of these assume that the first field of the ioctl data 480 * is the vcpuid. 481 */ 482 switch (cmd) { 483 case VM_RUN: 484 #ifdef COMPAT_FREEBSD13 485 case VM_RUN_13: 486 #endif 487 case VM_GET_REGISTER: 488 case VM_SET_REGISTER: 489 case VM_GET_SEGMENT_DESCRIPTOR: 490 case VM_SET_SEGMENT_DESCRIPTOR: 491 case VM_GET_REGISTER_SET: 492 case VM_SET_REGISTER_SET: 493 case VM_INJECT_EXCEPTION: 494 case VM_GET_CAPABILITY: 495 case VM_SET_CAPABILITY: 496 case VM_SET_X2APIC_STATE: 497 case VM_GLA2GPA: 498 case VM_GLA2GPA_NOFAULT: 499 case VM_ACTIVATE_CPU: 500 case VM_SET_INTINFO: 501 case VM_GET_INTINFO: 502 case VM_RESTART_INSTRUCTION: 503 case VM_GET_KERNEMU_DEV: 504 case VM_SET_KERNEMU_DEV: 505 /* 506 * ioctls that can operate only on vcpus that are not running. 507 */ 508 vcpuid = *(int *)data; 509 vcpu = vm_alloc_vcpu(sc->vm, vcpuid); 510 if (vcpu == NULL) { 511 error = EINVAL; 512 goto done; 513 } 514 error = vcpu_lock_one(vcpu); 515 if (error) 516 goto done; 517 vcpus_locked = SINGLE; 518 break; 519 520 #ifdef COMPAT_FREEBSD12 521 case VM_ALLOC_MEMSEG_FBSD12: 522 #endif 523 case VM_ALLOC_MEMSEG: 524 case VM_BIND_PPTDEV: 525 case VM_UNBIND_PPTDEV: 526 case VM_MMAP_MEMSEG: 527 case VM_MUNMAP_MEMSEG: 528 case VM_REINIT: 529 /* 530 * ioctls that modify the memory map must lock memory 531 * segments exclusively. 532 */ 533 vm_xlock_memsegs(sc->vm); 534 memsegs_locked = true; 535 /* FALLTHROUGH */ 536 case VM_MAP_PPTDEV_MMIO: 537 case VM_UNMAP_PPTDEV_MMIO: 538 #ifdef BHYVE_SNAPSHOT 539 case VM_SNAPSHOT_REQ: 540 #ifdef COMPAT_FREEBSD13 541 case VM_SNAPSHOT_REQ_OLD: 542 #endif 543 case VM_RESTORE_TIME: 544 #endif 545 /* 546 * ioctls that operate on the entire virtual machine must 547 * prevent all vcpus from running. 548 */ 549 error = vcpu_lock_all(sc); 550 if (error) 551 goto done; 552 vcpus_locked = ALL; 553 break; 554 555 #ifdef COMPAT_FREEBSD12 556 case VM_GET_MEMSEG_FBSD12: 557 #endif 558 case VM_GET_MEMSEG: 559 case VM_MMAP_GETNEXT: 560 /* 561 * Lock the memory map while it is being inspected. 562 */ 563 vm_slock_memsegs(sc->vm); 564 memsegs_locked = true; 565 break; 566 567 #ifdef COMPAT_FREEBSD13 568 case VM_STATS_OLD: 569 #endif 570 case VM_STATS: 571 case VM_INJECT_NMI: 572 case VM_LAPIC_IRQ: 573 case VM_GET_X2APIC_STATE: 574 /* 575 * These do not need the vCPU locked but do operate on 576 * a specific vCPU. 577 */ 578 vcpuid = *(int *)data; 579 vcpu = vm_alloc_vcpu(sc->vm, vcpuid); 580 if (vcpu == NULL) { 581 error = EINVAL; 582 goto done; 583 } 584 break; 585 586 case VM_LAPIC_LOCAL_IRQ: 587 case VM_SUSPEND_CPU: 588 case VM_RESUME_CPU: 589 /* 590 * These can either operate on all CPUs via a vcpuid of 591 * -1 or on a specific vCPU. 592 */ 593 vcpuid = *(int *)data; 594 if (vcpuid == -1) 595 break; 596 vcpu = vm_alloc_vcpu(sc->vm, vcpuid); 597 if (vcpu == NULL) { 598 error = EINVAL; 599 goto done; 600 } 601 break; 602 603 default: 604 break; 605 } 606 607 switch (cmd) { 608 case VM_RUN: { 609 struct vm_exit *vme; 610 611 vmrun = (struct vm_run *)data; 612 vme = vm_exitinfo(vcpu); 613 614 error = vm_run(vcpu); 615 if (error != 0) 616 break; 617 618 error = copyout(vme, vmrun->vm_exit, sizeof(*vme)); 619 if (error != 0) 620 break; 621 if (vme->exitcode == VM_EXITCODE_IPI) { 622 error = copyout(vm_exitinfo_cpuset(vcpu), 623 vmrun->cpuset, 624 min(vmrun->cpusetsize, sizeof(cpuset_t))); 625 if (error != 0) 626 break; 627 if (sizeof(cpuset_t) < vmrun->cpusetsize) { 628 uint8_t *p; 629 630 p = (uint8_t *)vmrun->cpuset + 631 sizeof(cpuset_t); 632 while (p < (uint8_t *)vmrun->cpuset + 633 vmrun->cpusetsize) { 634 if (subyte(p++, 0) != 0) { 635 error = EFAULT; 636 break; 637 } 638 } 639 } 640 } 641 break; 642 } 643 #ifdef COMPAT_FREEBSD13 644 case VM_RUN_13: { 645 struct vm_exit *vme; 646 struct vm_exit_13 *vme_13; 647 648 vmrun_13 = (struct vm_run_13 *)data; 649 vme_13 = &vmrun_13->vm_exit; 650 vme = vm_exitinfo(vcpu); 651 652 error = vm_run(vcpu); 653 if (error == 0) { 654 vme_13->exitcode = vme->exitcode; 655 vme_13->inst_length = vme->inst_length; 656 vme_13->rip = vme->rip; 657 memcpy(vme_13->u, &vme->u, sizeof(vme_13->u)); 658 if (vme->exitcode == VM_EXITCODE_IPI) { 659 struct vm_exit_ipi_13 *ipi; 660 cpuset_t *dmask; 661 int cpu; 662 663 dmask = vm_exitinfo_cpuset(vcpu); 664 ipi = (struct vm_exit_ipi_13 *)&vme_13->u[0]; 665 BIT_ZERO(256, &ipi->dmask); 666 CPU_FOREACH_ISSET(cpu, dmask) { 667 if (cpu >= 256) 668 break; 669 BIT_SET(256, cpu, &ipi->dmask); 670 } 671 } 672 } 673 break; 674 } 675 #endif 676 case VM_SUSPEND: 677 vmsuspend = (struct vm_suspend *)data; 678 error = vm_suspend(sc->vm, vmsuspend->how); 679 break; 680 case VM_REINIT: 681 error = vm_reinit(sc->vm); 682 break; 683 case VM_STAT_DESC: { 684 statdesc = (struct vm_stat_desc *)data; 685 error = vmm_stat_desc_copy(statdesc->index, 686 statdesc->desc, sizeof(statdesc->desc)); 687 break; 688 } 689 #ifdef COMPAT_FREEBSD13 690 case VM_STATS_OLD: 691 vmstats_old = (struct vm_stats_old *)data; 692 getmicrotime(&vmstats_old->tv); 693 error = vmm_stat_copy(vcpu, 0, 694 nitems(vmstats_old->statbuf), 695 &vmstats_old->num_entries, 696 vmstats_old->statbuf); 697 break; 698 #endif 699 case VM_STATS: { 700 vmstats = (struct vm_stats *)data; 701 getmicrotime(&vmstats->tv); 702 error = vmm_stat_copy(vcpu, vmstats->index, 703 nitems(vmstats->statbuf), 704 &vmstats->num_entries, vmstats->statbuf); 705 break; 706 } 707 case VM_PPTDEV_MSI: 708 pptmsi = (struct vm_pptdev_msi *)data; 709 error = ppt_setup_msi(sc->vm, 710 pptmsi->bus, pptmsi->slot, pptmsi->func, 711 pptmsi->addr, pptmsi->msg, 712 pptmsi->numvec); 713 break; 714 case VM_PPTDEV_MSIX: 715 pptmsix = (struct vm_pptdev_msix *)data; 716 error = ppt_setup_msix(sc->vm, 717 pptmsix->bus, pptmsix->slot, 718 pptmsix->func, pptmsix->idx, 719 pptmsix->addr, pptmsix->msg, 720 pptmsix->vector_control); 721 break; 722 case VM_PPTDEV_DISABLE_MSIX: 723 pptdev = (struct vm_pptdev *)data; 724 error = ppt_disable_msix(sc->vm, pptdev->bus, pptdev->slot, 725 pptdev->func); 726 break; 727 case VM_MAP_PPTDEV_MMIO: 728 pptmmio = (struct vm_pptdev_mmio *)data; 729 error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot, 730 pptmmio->func, pptmmio->gpa, pptmmio->len, 731 pptmmio->hpa); 732 break; 733 case VM_UNMAP_PPTDEV_MMIO: 734 pptmmio = (struct vm_pptdev_mmio *)data; 735 error = ppt_unmap_mmio(sc->vm, pptmmio->bus, pptmmio->slot, 736 pptmmio->func, pptmmio->gpa, pptmmio->len); 737 break; 738 case VM_BIND_PPTDEV: 739 pptdev = (struct vm_pptdev *)data; 740 error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot, 741 pptdev->func); 742 break; 743 case VM_UNBIND_PPTDEV: 744 pptdev = (struct vm_pptdev *)data; 745 error = vm_unassign_pptdev(sc->vm, pptdev->bus, pptdev->slot, 746 pptdev->func); 747 break; 748 case VM_INJECT_EXCEPTION: 749 vmexc = (struct vm_exception *)data; 750 error = vm_inject_exception(vcpu, 751 vmexc->vector, vmexc->error_code_valid, vmexc->error_code, 752 vmexc->restart_instruction); 753 break; 754 case VM_INJECT_NMI: 755 error = vm_inject_nmi(vcpu); 756 break; 757 case VM_LAPIC_IRQ: 758 vmirq = (struct vm_lapic_irq *)data; 759 error = lapic_intr_edge(vcpu, vmirq->vector); 760 break; 761 case VM_LAPIC_LOCAL_IRQ: 762 vmirq = (struct vm_lapic_irq *)data; 763 error = lapic_set_local_intr(sc->vm, vcpu, vmirq->vector); 764 break; 765 case VM_LAPIC_MSI: 766 vmmsi = (struct vm_lapic_msi *)data; 767 error = lapic_intr_msi(sc->vm, vmmsi->addr, vmmsi->msg); 768 break; 769 case VM_IOAPIC_ASSERT_IRQ: 770 ioapic_irq = (struct vm_ioapic_irq *)data; 771 error = vioapic_assert_irq(sc->vm, ioapic_irq->irq); 772 break; 773 case VM_IOAPIC_DEASSERT_IRQ: 774 ioapic_irq = (struct vm_ioapic_irq *)data; 775 error = vioapic_deassert_irq(sc->vm, ioapic_irq->irq); 776 break; 777 case VM_IOAPIC_PULSE_IRQ: 778 ioapic_irq = (struct vm_ioapic_irq *)data; 779 error = vioapic_pulse_irq(sc->vm, ioapic_irq->irq); 780 break; 781 case VM_IOAPIC_PINCOUNT: 782 *(int *)data = vioapic_pincount(sc->vm); 783 break; 784 case VM_SET_KERNEMU_DEV: 785 case VM_GET_KERNEMU_DEV: { 786 mem_region_write_t mwrite; 787 mem_region_read_t mread; 788 bool arg; 789 790 kernemu = (void *)data; 791 792 if (kernemu->access_width > 0) 793 size = (1u << kernemu->access_width); 794 else 795 size = 1; 796 797 if (kernemu->gpa >= DEFAULT_APIC_BASE && kernemu->gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { 798 mread = lapic_mmio_read; 799 mwrite = lapic_mmio_write; 800 } else if (kernemu->gpa >= VIOAPIC_BASE && kernemu->gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { 801 mread = vioapic_mmio_read; 802 mwrite = vioapic_mmio_write; 803 } else if (kernemu->gpa >= VHPET_BASE && kernemu->gpa < VHPET_BASE + VHPET_SIZE) { 804 mread = vhpet_mmio_read; 805 mwrite = vhpet_mmio_write; 806 } else { 807 error = EINVAL; 808 break; 809 } 810 811 if (cmd == VM_SET_KERNEMU_DEV) 812 error = mwrite(vcpu, kernemu->gpa, 813 kernemu->value, size, &arg); 814 else 815 error = mread(vcpu, kernemu->gpa, 816 &kernemu->value, size, &arg); 817 break; 818 } 819 case VM_ISA_ASSERT_IRQ: 820 isa_irq = (struct vm_isa_irq *)data; 821 error = vatpic_assert_irq(sc->vm, isa_irq->atpic_irq); 822 if (error == 0 && isa_irq->ioapic_irq != -1) 823 error = vioapic_assert_irq(sc->vm, 824 isa_irq->ioapic_irq); 825 break; 826 case VM_ISA_DEASSERT_IRQ: 827 isa_irq = (struct vm_isa_irq *)data; 828 error = vatpic_deassert_irq(sc->vm, isa_irq->atpic_irq); 829 if (error == 0 && isa_irq->ioapic_irq != -1) 830 error = vioapic_deassert_irq(sc->vm, 831 isa_irq->ioapic_irq); 832 break; 833 case VM_ISA_PULSE_IRQ: 834 isa_irq = (struct vm_isa_irq *)data; 835 error = vatpic_pulse_irq(sc->vm, isa_irq->atpic_irq); 836 if (error == 0 && isa_irq->ioapic_irq != -1) 837 error = vioapic_pulse_irq(sc->vm, isa_irq->ioapic_irq); 838 break; 839 case VM_ISA_SET_IRQ_TRIGGER: 840 isa_irq_trigger = (struct vm_isa_irq_trigger *)data; 841 error = vatpic_set_irq_trigger(sc->vm, 842 isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger); 843 break; 844 case VM_MMAP_GETNEXT: 845 mm = (struct vm_memmap *)data; 846 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid, 847 &mm->segoff, &mm->len, &mm->prot, &mm->flags); 848 break; 849 case VM_MMAP_MEMSEG: 850 mm = (struct vm_memmap *)data; 851 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff, 852 mm->len, mm->prot, mm->flags); 853 break; 854 case VM_MUNMAP_MEMSEG: 855 mu = (struct vm_munmap *)data; 856 error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len); 857 break; 858 #ifdef COMPAT_FREEBSD12 859 case VM_ALLOC_MEMSEG_FBSD12: 860 error = alloc_memseg(sc, (struct vm_memseg *)data, 861 sizeof(((struct vm_memseg_fbsd12 *)0)->name)); 862 break; 863 #endif 864 case VM_ALLOC_MEMSEG: 865 error = alloc_memseg(sc, (struct vm_memseg *)data, 866 sizeof(((struct vm_memseg *)0)->name)); 867 break; 868 #ifdef COMPAT_FREEBSD12 869 case VM_GET_MEMSEG_FBSD12: 870 error = get_memseg(sc, (struct vm_memseg *)data, 871 sizeof(((struct vm_memseg_fbsd12 *)0)->name)); 872 break; 873 #endif 874 case VM_GET_MEMSEG: 875 error = get_memseg(sc, (struct vm_memseg *)data, 876 sizeof(((struct vm_memseg *)0)->name)); 877 break; 878 case VM_GET_REGISTER: 879 vmreg = (struct vm_register *)data; 880 error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval); 881 break; 882 case VM_SET_REGISTER: 883 vmreg = (struct vm_register *)data; 884 error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval); 885 break; 886 case VM_SET_SEGMENT_DESCRIPTOR: 887 vmsegdesc = (struct vm_seg_desc *)data; 888 error = vm_set_seg_desc(vcpu, 889 vmsegdesc->regnum, 890 &vmsegdesc->desc); 891 break; 892 case VM_GET_SEGMENT_DESCRIPTOR: 893 vmsegdesc = (struct vm_seg_desc *)data; 894 error = vm_get_seg_desc(vcpu, 895 vmsegdesc->regnum, 896 &vmsegdesc->desc); 897 break; 898 case VM_GET_REGISTER_SET: 899 vmregset = (struct vm_register_set *)data; 900 if (vmregset->count > VM_REG_LAST) { 901 error = EINVAL; 902 break; 903 } 904 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 905 M_WAITOK); 906 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 907 M_WAITOK); 908 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 909 vmregset->count); 910 if (error == 0) 911 error = vm_get_register_set(vcpu, 912 vmregset->count, regnums, regvals); 913 if (error == 0) 914 error = copyout(regvals, vmregset->regvals, 915 sizeof(regvals[0]) * vmregset->count); 916 free(regvals, M_VMMDEV); 917 free(regnums, M_VMMDEV); 918 break; 919 case VM_SET_REGISTER_SET: 920 vmregset = (struct vm_register_set *)data; 921 if (vmregset->count > VM_REG_LAST) { 922 error = EINVAL; 923 break; 924 } 925 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 926 M_WAITOK); 927 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 928 M_WAITOK); 929 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 930 vmregset->count); 931 if (error == 0) 932 error = copyin(vmregset->regvals, regvals, 933 sizeof(regvals[0]) * vmregset->count); 934 if (error == 0) 935 error = vm_set_register_set(vcpu, 936 vmregset->count, regnums, regvals); 937 free(regvals, M_VMMDEV); 938 free(regnums, M_VMMDEV); 939 break; 940 case VM_GET_CAPABILITY: 941 vmcap = (struct vm_capability *)data; 942 error = vm_get_capability(vcpu, 943 vmcap->captype, 944 &vmcap->capval); 945 break; 946 case VM_SET_CAPABILITY: 947 vmcap = (struct vm_capability *)data; 948 error = vm_set_capability(vcpu, 949 vmcap->captype, 950 vmcap->capval); 951 break; 952 case VM_SET_X2APIC_STATE: 953 x2apic = (struct vm_x2apic *)data; 954 error = vm_set_x2apic_state(vcpu, x2apic->state); 955 break; 956 case VM_GET_X2APIC_STATE: 957 x2apic = (struct vm_x2apic *)data; 958 error = vm_get_x2apic_state(vcpu, &x2apic->state); 959 break; 960 case VM_GET_GPA_PMAP: 961 gpapte = (struct vm_gpa_pte *)data; 962 pmap_get_mapping(vmspace_pmap(vm_get_vmspace(sc->vm)), 963 gpapte->gpa, gpapte->pte, &gpapte->ptenum); 964 error = 0; 965 break; 966 case VM_GET_HPET_CAPABILITIES: 967 error = vhpet_getcap((struct vm_hpet_cap *)data); 968 break; 969 case VM_GLA2GPA: { 970 CTASSERT(PROT_READ == VM_PROT_READ); 971 CTASSERT(PROT_WRITE == VM_PROT_WRITE); 972 CTASSERT(PROT_EXEC == VM_PROT_EXECUTE); 973 gg = (struct vm_gla2gpa *)data; 974 error = vm_gla2gpa(vcpu, &gg->paging, gg->gla, 975 gg->prot, &gg->gpa, &gg->fault); 976 KASSERT(error == 0 || error == EFAULT, 977 ("%s: vm_gla2gpa unknown error %d", __func__, error)); 978 break; 979 } 980 case VM_GLA2GPA_NOFAULT: 981 gg = (struct vm_gla2gpa *)data; 982 error = vm_gla2gpa_nofault(vcpu, &gg->paging, gg->gla, 983 gg->prot, &gg->gpa, &gg->fault); 984 KASSERT(error == 0 || error == EFAULT, 985 ("%s: vm_gla2gpa unknown error %d", __func__, error)); 986 break; 987 case VM_ACTIVATE_CPU: 988 error = vm_activate_cpu(vcpu); 989 break; 990 case VM_GET_CPUS: 991 error = 0; 992 vm_cpuset = (struct vm_cpuset *)data; 993 size = vm_cpuset->cpusetsize; 994 if (size < 1 || size > CPU_MAXSIZE / NBBY) { 995 error = ERANGE; 996 break; 997 } 998 cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP, 999 M_WAITOK | M_ZERO); 1000 if (vm_cpuset->which == VM_ACTIVE_CPUS) 1001 *cpuset = vm_active_cpus(sc->vm); 1002 else if (vm_cpuset->which == VM_SUSPENDED_CPUS) 1003 *cpuset = vm_suspended_cpus(sc->vm); 1004 else if (vm_cpuset->which == VM_DEBUG_CPUS) 1005 *cpuset = vm_debug_cpus(sc->vm); 1006 else 1007 error = EINVAL; 1008 if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY)) 1009 error = ERANGE; 1010 if (error == 0) 1011 error = copyout(cpuset, vm_cpuset->cpus, size); 1012 free(cpuset, M_TEMP); 1013 break; 1014 case VM_SUSPEND_CPU: 1015 error = vm_suspend_cpu(sc->vm, vcpu); 1016 break; 1017 case VM_RESUME_CPU: 1018 error = vm_resume_cpu(sc->vm, vcpu); 1019 break; 1020 case VM_SET_INTINFO: 1021 vmii = (struct vm_intinfo *)data; 1022 error = vm_exit_intinfo(vcpu, vmii->info1); 1023 break; 1024 case VM_GET_INTINFO: 1025 vmii = (struct vm_intinfo *)data; 1026 error = vm_get_intinfo(vcpu, &vmii->info1, &vmii->info2); 1027 break; 1028 case VM_RTC_WRITE: 1029 rtcdata = (struct vm_rtc_data *)data; 1030 error = vrtc_nvram_write(sc->vm, rtcdata->offset, 1031 rtcdata->value); 1032 break; 1033 case VM_RTC_READ: 1034 rtcdata = (struct vm_rtc_data *)data; 1035 error = vrtc_nvram_read(sc->vm, rtcdata->offset, 1036 &rtcdata->value); 1037 break; 1038 case VM_RTC_SETTIME: 1039 rtctime = (struct vm_rtc_time *)data; 1040 error = vrtc_set_time(sc->vm, rtctime->secs); 1041 break; 1042 case VM_RTC_GETTIME: 1043 error = 0; 1044 rtctime = (struct vm_rtc_time *)data; 1045 rtctime->secs = vrtc_get_time(sc->vm); 1046 break; 1047 case VM_RESTART_INSTRUCTION: 1048 error = vm_restart_instruction(vcpu); 1049 break; 1050 case VM_SET_TOPOLOGY: 1051 topology = (struct vm_cpu_topology *)data; 1052 error = vm_set_topology(sc->vm, topology->sockets, 1053 topology->cores, topology->threads, topology->maxcpus); 1054 break; 1055 case VM_GET_TOPOLOGY: 1056 topology = (struct vm_cpu_topology *)data; 1057 vm_get_topology(sc->vm, &topology->sockets, &topology->cores, 1058 &topology->threads, &topology->maxcpus); 1059 error = 0; 1060 break; 1061 #ifdef BHYVE_SNAPSHOT 1062 case VM_SNAPSHOT_REQ: 1063 snapshot_meta = (struct vm_snapshot_meta *)data; 1064 error = vm_snapshot_req(sc->vm, snapshot_meta); 1065 break; 1066 #ifdef COMPAT_FREEBSD13 1067 case VM_SNAPSHOT_REQ_OLD: 1068 /* 1069 * The old structure just has an additional pointer at 1070 * the start that is ignored. 1071 */ 1072 snapshot_old = (struct vm_snapshot_meta_old *)data; 1073 snapshot_meta = 1074 (struct vm_snapshot_meta *)&snapshot_old->dev_data; 1075 error = vm_snapshot_req(sc->vm, snapshot_meta); 1076 break; 1077 #endif 1078 case VM_RESTORE_TIME: 1079 error = vm_restore_time(sc->vm); 1080 break; 1081 #endif 1082 default: 1083 error = ENOTTY; 1084 break; 1085 } 1086 1087 done: 1088 if (vcpus_locked == SINGLE) 1089 vcpu_unlock_one(sc, vcpuid, vcpu); 1090 else if (vcpus_locked == ALL) 1091 vcpu_unlock_all(sc); 1092 if (memsegs_locked) 1093 vm_unlock_memsegs(sc->vm); 1094 1095 /* 1096 * Make sure that no handler returns a kernel-internal 1097 * error value to userspace. 1098 */ 1099 KASSERT(error == ERESTART || error >= 0, 1100 ("vmmdev_ioctl: invalid error return %d", error)); 1101 return (error); 1102 } 1103 1104 static int 1105 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, 1106 struct vm_object **objp, int nprot) 1107 { 1108 struct vmmdev_softc *sc; 1109 vm_paddr_t gpa; 1110 size_t len; 1111 vm_ooffset_t segoff, first, last; 1112 int error, found, segid; 1113 bool sysmem; 1114 1115 error = vmm_priv_check(curthread->td_ucred); 1116 if (error) 1117 return (error); 1118 1119 first = *offset; 1120 last = first + mapsize; 1121 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 1122 return (EINVAL); 1123 1124 sc = vmmdev_lookup2(cdev); 1125 if (sc == NULL) { 1126 /* virtual machine is in the process of being created */ 1127 return (EINVAL); 1128 } 1129 1130 /* 1131 * Get a read lock on the guest memory map. 1132 */ 1133 vm_slock_memsegs(sc->vm); 1134 1135 gpa = 0; 1136 found = 0; 1137 while (!found) { 1138 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len, 1139 NULL, NULL); 1140 if (error) 1141 break; 1142 1143 if (first >= gpa && last <= gpa + len) 1144 found = 1; 1145 else 1146 gpa += len; 1147 } 1148 1149 if (found) { 1150 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp); 1151 KASSERT(error == 0 && *objp != NULL, 1152 ("%s: invalid memory segment %d", __func__, segid)); 1153 if (sysmem) { 1154 vm_object_reference(*objp); 1155 *offset = segoff + (first - gpa); 1156 } else { 1157 error = EINVAL; 1158 } 1159 } 1160 vm_unlock_memsegs(sc->vm); 1161 return (error); 1162 } 1163 1164 static void 1165 vmmdev_destroy(void *arg) 1166 { 1167 struct vmmdev_softc *sc = arg; 1168 struct devmem_softc *dsc; 1169 int error __diagused; 1170 1171 vm_disable_vcpu_creation(sc->vm); 1172 error = vcpu_lock_all(sc); 1173 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); 1174 vm_unlock_vcpus(sc->vm); 1175 1176 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { 1177 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__)); 1178 SLIST_REMOVE_HEAD(&sc->devmem, link); 1179 free(dsc->name, M_VMMDEV); 1180 free(dsc, M_VMMDEV); 1181 } 1182 1183 if (sc->cdev != NULL) 1184 destroy_dev(sc->cdev); 1185 1186 if (sc->vm != NULL) 1187 vm_destroy(sc->vm); 1188 1189 if (sc->ucred != NULL) 1190 crfree(sc->ucred); 1191 1192 if ((sc->flags & VSC_LINKED) != 0) { 1193 mtx_lock(&vmmdev_mtx); 1194 SLIST_REMOVE(&head, sc, vmmdev_softc, link); 1195 mtx_unlock(&vmmdev_mtx); 1196 } 1197 1198 free(sc, M_VMMDEV); 1199 } 1200 1201 static int 1202 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) 1203 { 1204 struct devmem_softc *dsc; 1205 struct vmmdev_softc *sc; 1206 struct cdev *cdev; 1207 char *buf; 1208 int error, buflen; 1209 1210 error = vmm_priv_check(req->td->td_ucred); 1211 if (error) 1212 return (error); 1213 1214 buflen = VM_MAX_NAMELEN + 1; 1215 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 1216 strlcpy(buf, "beavis", buflen); 1217 error = sysctl_handle_string(oidp, buf, buflen, req); 1218 if (error != 0 || req->newptr == NULL) 1219 goto out; 1220 1221 mtx_lock(&vmmdev_mtx); 1222 sc = vmmdev_lookup(buf); 1223 if (sc == NULL || sc->cdev == NULL) { 1224 mtx_unlock(&vmmdev_mtx); 1225 error = EINVAL; 1226 goto out; 1227 } 1228 1229 /* 1230 * Setting 'sc->cdev' to NULL is used to indicate that the VM 1231 * is scheduled for destruction. 1232 */ 1233 cdev = sc->cdev; 1234 sc->cdev = NULL; 1235 mtx_unlock(&vmmdev_mtx); 1236 1237 /* 1238 * Destroy all cdevs: 1239 * 1240 * - any new operations on the 'cdev' will return an error (ENXIO). 1241 * 1242 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' 1243 */ 1244 SLIST_FOREACH(dsc, &sc->devmem, link) { 1245 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); 1246 destroy_dev(dsc->cdev); 1247 devmem_destroy(dsc); 1248 } 1249 destroy_dev(cdev); 1250 vmmdev_destroy(sc); 1251 error = 0; 1252 1253 out: 1254 free(buf, M_VMMDEV); 1255 return (error); 1256 } 1257 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, 1258 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 1259 NULL, 0, sysctl_vmm_destroy, "A", 1260 NULL); 1261 1262 static struct cdevsw vmmdevsw = { 1263 .d_name = "vmmdev", 1264 .d_version = D_VERSION, 1265 .d_ioctl = vmmdev_ioctl, 1266 .d_mmap_single = vmmdev_mmap_single, 1267 .d_read = vmmdev_rw, 1268 .d_write = vmmdev_rw, 1269 }; 1270 1271 static int 1272 sysctl_vmm_create(SYSCTL_HANDLER_ARGS) 1273 { 1274 struct vm *vm; 1275 struct cdev *cdev; 1276 struct vmmdev_softc *sc, *sc2; 1277 char *buf; 1278 int error, buflen; 1279 1280 error = vmm_priv_check(req->td->td_ucred); 1281 if (error) 1282 return (error); 1283 1284 buflen = VM_MAX_NAMELEN + 1; 1285 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 1286 strlcpy(buf, "beavis", buflen); 1287 error = sysctl_handle_string(oidp, buf, buflen, req); 1288 if (error != 0 || req->newptr == NULL) 1289 goto out; 1290 1291 mtx_lock(&vmmdev_mtx); 1292 sc = vmmdev_lookup(buf); 1293 mtx_unlock(&vmmdev_mtx); 1294 if (sc != NULL) { 1295 error = EEXIST; 1296 goto out; 1297 } 1298 1299 error = vm_create(buf, &vm); 1300 if (error != 0) 1301 goto out; 1302 1303 sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO); 1304 sc->ucred = crhold(curthread->td_ucred); 1305 sc->vm = vm; 1306 SLIST_INIT(&sc->devmem); 1307 1308 /* 1309 * Lookup the name again just in case somebody sneaked in when we 1310 * dropped the lock. 1311 */ 1312 mtx_lock(&vmmdev_mtx); 1313 sc2 = vmmdev_lookup(buf); 1314 if (sc2 == NULL) { 1315 SLIST_INSERT_HEAD(&head, sc, link); 1316 sc->flags |= VSC_LINKED; 1317 } 1318 mtx_unlock(&vmmdev_mtx); 1319 1320 if (sc2 != NULL) { 1321 vmmdev_destroy(sc); 1322 error = EEXIST; 1323 goto out; 1324 } 1325 1326 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, sc->ucred, 1327 UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf); 1328 if (error != 0) { 1329 vmmdev_destroy(sc); 1330 goto out; 1331 } 1332 1333 mtx_lock(&vmmdev_mtx); 1334 sc->cdev = cdev; 1335 sc->cdev->si_drv1 = sc; 1336 mtx_unlock(&vmmdev_mtx); 1337 1338 out: 1339 free(buf, M_VMMDEV); 1340 return (error); 1341 } 1342 SYSCTL_PROC(_hw_vmm, OID_AUTO, create, 1343 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 1344 NULL, 0, sysctl_vmm_create, "A", 1345 NULL); 1346 1347 void 1348 vmmdev_init(void) 1349 { 1350 pr_allow_flag = prison_add_allow(NULL, "vmm", NULL, 1351 "Allow use of vmm in a jail."); 1352 } 1353 1354 int 1355 vmmdev_cleanup(void) 1356 { 1357 int error; 1358 1359 if (SLIST_EMPTY(&head)) 1360 error = 0; 1361 else 1362 error = EBUSY; 1363 1364 return (error); 1365 } 1366 1367 static int 1368 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, 1369 struct vm_object **objp, int nprot) 1370 { 1371 struct devmem_softc *dsc; 1372 vm_ooffset_t first, last; 1373 size_t seglen; 1374 int error; 1375 bool sysmem; 1376 1377 dsc = cdev->si_drv1; 1378 if (dsc == NULL) { 1379 /* 'cdev' has been created but is not ready for use */ 1380 return (ENXIO); 1381 } 1382 1383 first = *offset; 1384 last = *offset + len; 1385 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 1386 return (EINVAL); 1387 1388 vm_slock_memsegs(dsc->sc->vm); 1389 1390 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); 1391 KASSERT(error == 0 && !sysmem && *objp != NULL, 1392 ("%s: invalid devmem segment %d", __func__, dsc->segid)); 1393 1394 if (seglen >= last) 1395 vm_object_reference(*objp); 1396 else 1397 error = EINVAL; 1398 1399 vm_unlock_memsegs(dsc->sc->vm); 1400 return (error); 1401 } 1402 1403 static struct cdevsw devmemsw = { 1404 .d_name = "devmem", 1405 .d_version = D_VERSION, 1406 .d_mmap_single = devmem_mmap_single, 1407 }; 1408 1409 static int 1410 devmem_create_cdev(const char *vmname, int segid, char *devname) 1411 { 1412 struct devmem_softc *dsc; 1413 struct vmmdev_softc *sc; 1414 struct cdev *cdev; 1415 int error; 1416 1417 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL, 1418 UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname); 1419 if (error) 1420 return (error); 1421 1422 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO); 1423 1424 mtx_lock(&vmmdev_mtx); 1425 sc = vmmdev_lookup(vmname); 1426 KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname)); 1427 if (sc->cdev == NULL) { 1428 /* virtual machine is being created or destroyed */ 1429 mtx_unlock(&vmmdev_mtx); 1430 free(dsc, M_VMMDEV); 1431 destroy_dev_sched_cb(cdev, NULL, 0); 1432 return (ENODEV); 1433 } 1434 1435 dsc->segid = segid; 1436 dsc->name = devname; 1437 dsc->cdev = cdev; 1438 dsc->sc = sc; 1439 SLIST_INSERT_HEAD(&sc->devmem, dsc, link); 1440 mtx_unlock(&vmmdev_mtx); 1441 1442 /* The 'cdev' is ready for use after 'si_drv1' is initialized */ 1443 cdev->si_drv1 = dsc; 1444 return (0); 1445 } 1446 1447 static void 1448 devmem_destroy(void *arg) 1449 { 1450 struct devmem_softc *dsc = arg; 1451 1452 KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__)); 1453 dsc->cdev = NULL; 1454 dsc->sc = NULL; 1455 } 1456