1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #include "opt_bhyve_snapshot.h" 31 32 #include <sys/param.h> 33 #include <sys/kernel.h> 34 #include <sys/jail.h> 35 #include <sys/queue.h> 36 #include <sys/lock.h> 37 #include <sys/mutex.h> 38 #include <sys/malloc.h> 39 #include <sys/conf.h> 40 #include <sys/sysctl.h> 41 #include <sys/libkern.h> 42 #include <sys/ioccom.h> 43 #include <sys/mman.h> 44 #include <sys/uio.h> 45 #include <sys/proc.h> 46 47 #include <vm/vm.h> 48 #include <vm/pmap.h> 49 #include <vm/vm_map.h> 50 #include <vm/vm_object.h> 51 52 #include <machine/vmparam.h> 53 #include <machine/vmm.h> 54 #include <machine/vmm_dev.h> 55 #include <machine/vmm_instruction_emul.h> 56 #include <machine/vmm_snapshot.h> 57 #include <x86/apicreg.h> 58 59 #include "vmm_lapic.h" 60 #include "vmm_stat.h" 61 #include "vmm_mem.h" 62 #include "io/ppt.h" 63 #include "io/vatpic.h" 64 #include "io/vioapic.h" 65 #include "io/vhpet.h" 66 #include "io/vrtc.h" 67 68 #ifdef COMPAT_FREEBSD13 69 struct vm_stats_old { 70 int cpuid; /* in */ 71 int num_entries; /* out */ 72 struct timeval tv; 73 uint64_t statbuf[MAX_VM_STATS]; 74 }; 75 76 #define VM_STATS_OLD \ 77 _IOWR('v', IOCNUM_VM_STATS, struct vm_stats_old) 78 79 struct vm_snapshot_meta_old { 80 void *ctx; /* unused */ 81 void *dev_data; 82 const char *dev_name; /* identify userspace devices */ 83 enum snapshot_req dev_req; /* identify kernel structs */ 84 85 struct vm_snapshot_buffer buffer; 86 87 enum vm_snapshot_op op; 88 }; 89 90 #define VM_SNAPSHOT_REQ_OLD \ 91 _IOWR('v', IOCNUM_SNAPSHOT_REQ, struct vm_snapshot_meta_old) 92 93 struct vm_exit_ipi_13 { 94 uint32_t mode; 95 uint8_t vector; 96 __BITSET_DEFINE(, 256) dmask; 97 }; 98 99 struct vm_exit_13 { 100 uint32_t exitcode; 101 int32_t inst_length; 102 uint64_t rip; 103 uint64_t u[120 / sizeof(uint64_t)]; 104 }; 105 106 struct vm_run_13 { 107 int cpuid; 108 struct vm_exit_13 vm_exit; 109 }; 110 111 #define VM_RUN_13 \ 112 _IOWR('v', IOCNUM_RUN, struct vm_run_13) 113 114 #endif /* COMPAT_FREEBSD13 */ 115 116 struct devmem_softc { 117 int segid; 118 char *name; 119 struct cdev *cdev; 120 struct vmmdev_softc *sc; 121 SLIST_ENTRY(devmem_softc) link; 122 }; 123 124 struct vmmdev_softc { 125 struct vm *vm; /* vm instance cookie */ 126 struct cdev *cdev; 127 struct ucred *ucred; 128 SLIST_ENTRY(vmmdev_softc) link; 129 SLIST_HEAD(, devmem_softc) devmem; 130 int flags; 131 }; 132 #define VSC_LINKED 0x01 133 134 static SLIST_HEAD(, vmmdev_softc) head; 135 136 static unsigned pr_allow_flag; 137 static struct mtx vmmdev_mtx; 138 MTX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex", MTX_DEF); 139 140 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); 141 142 SYSCTL_DECL(_hw_vmm); 143 144 static int vmm_priv_check(struct ucred *ucred); 145 static int devmem_create_cdev(const char *vmname, int id, char *devmem); 146 static void devmem_destroy(void *arg); 147 148 static int 149 vmm_priv_check(struct ucred *ucred) 150 { 151 152 if (jailed(ucred) && 153 !(ucred->cr_prison->pr_allow & pr_allow_flag)) 154 return (EPERM); 155 156 return (0); 157 } 158 159 static int 160 vcpu_lock_one(struct vcpu *vcpu) 161 { 162 return (vcpu_set_state(vcpu, VCPU_FROZEN, true)); 163 } 164 165 static void 166 vcpu_unlock_one(struct vmmdev_softc *sc, int vcpuid, struct vcpu *vcpu) 167 { 168 enum vcpu_state state; 169 170 state = vcpu_get_state(vcpu, NULL); 171 if (state != VCPU_FROZEN) { 172 panic("vcpu %s(%d) has invalid state %d", vm_name(sc->vm), 173 vcpuid, state); 174 } 175 176 vcpu_set_state(vcpu, VCPU_IDLE, false); 177 } 178 179 static int 180 vcpu_lock_all(struct vmmdev_softc *sc) 181 { 182 struct vcpu *vcpu; 183 int error; 184 uint16_t i, j, maxcpus; 185 186 error = 0; 187 vm_slock_vcpus(sc->vm); 188 maxcpus = vm_get_maxcpus(sc->vm); 189 for (i = 0; i < maxcpus; i++) { 190 vcpu = vm_vcpu(sc->vm, i); 191 if (vcpu == NULL) 192 continue; 193 error = vcpu_lock_one(vcpu); 194 if (error) 195 break; 196 } 197 198 if (error) { 199 for (j = 0; j < i; j++) { 200 vcpu = vm_vcpu(sc->vm, j); 201 if (vcpu == NULL) 202 continue; 203 vcpu_unlock_one(sc, j, vcpu); 204 } 205 vm_unlock_vcpus(sc->vm); 206 } 207 208 return (error); 209 } 210 211 static void 212 vcpu_unlock_all(struct vmmdev_softc *sc) 213 { 214 struct vcpu *vcpu; 215 uint16_t i, maxcpus; 216 217 maxcpus = vm_get_maxcpus(sc->vm); 218 for (i = 0; i < maxcpus; i++) { 219 vcpu = vm_vcpu(sc->vm, i); 220 if (vcpu == NULL) 221 continue; 222 vcpu_unlock_one(sc, i, vcpu); 223 } 224 vm_unlock_vcpus(sc->vm); 225 } 226 227 static struct vmmdev_softc * 228 vmmdev_lookup(const char *name) 229 { 230 struct vmmdev_softc *sc; 231 232 #ifdef notyet /* XXX kernel is not compiled with invariants */ 233 mtx_assert(&vmmdev_mtx, MA_OWNED); 234 #endif 235 236 SLIST_FOREACH(sc, &head, link) { 237 if (strcmp(name, vm_name(sc->vm)) == 0) 238 break; 239 } 240 241 if (sc == NULL) 242 return (NULL); 243 244 if (cr_cansee(curthread->td_ucred, sc->ucred)) 245 return (NULL); 246 247 return (sc); 248 } 249 250 static struct vmmdev_softc * 251 vmmdev_lookup2(struct cdev *cdev) 252 { 253 254 return (cdev->si_drv1); 255 } 256 257 static int 258 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) 259 { 260 int error, off, c, prot; 261 vm_paddr_t gpa, maxaddr; 262 void *hpa, *cookie; 263 struct vmmdev_softc *sc; 264 265 error = vmm_priv_check(curthread->td_ucred); 266 if (error) 267 return (error); 268 269 sc = vmmdev_lookup2(cdev); 270 if (sc == NULL) 271 return (ENXIO); 272 273 /* 274 * Get a read lock on the guest memory map. 275 */ 276 vm_slock_memsegs(sc->vm); 277 278 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); 279 maxaddr = vmm_sysmem_maxaddr(sc->vm); 280 while (uio->uio_resid > 0 && error == 0) { 281 gpa = uio->uio_offset; 282 off = gpa & PAGE_MASK; 283 c = min(uio->uio_resid, PAGE_SIZE - off); 284 285 /* 286 * The VM has a hole in its physical memory map. If we want to 287 * use 'dd' to inspect memory beyond the hole we need to 288 * provide bogus data for memory that lies in the hole. 289 * 290 * Since this device does not support lseek(2), dd(1) will 291 * read(2) blocks of data to simulate the lseek(2). 292 */ 293 hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie); 294 if (hpa == NULL) { 295 if (uio->uio_rw == UIO_READ && gpa < maxaddr) 296 error = uiomove(__DECONST(void *, zero_region), 297 c, uio); 298 else 299 error = EFAULT; 300 } else { 301 error = uiomove(hpa, c, uio); 302 vm_gpa_release(cookie); 303 } 304 } 305 vm_unlock_memsegs(sc->vm); 306 return (error); 307 } 308 309 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1); 310 311 static int 312 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) 313 { 314 struct devmem_softc *dsc; 315 int error; 316 bool sysmem; 317 318 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL); 319 if (error || mseg->len == 0) 320 return (error); 321 322 if (!sysmem) { 323 SLIST_FOREACH(dsc, &sc->devmem, link) { 324 if (dsc->segid == mseg->segid) 325 break; 326 } 327 KASSERT(dsc != NULL, ("%s: devmem segment %d not found", 328 __func__, mseg->segid)); 329 error = copystr(dsc->name, mseg->name, len, NULL); 330 } else { 331 bzero(mseg->name, len); 332 } 333 334 return (error); 335 } 336 337 static int 338 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) 339 { 340 char *name; 341 int error; 342 bool sysmem; 343 344 error = 0; 345 name = NULL; 346 sysmem = true; 347 348 /* 349 * The allocation is lengthened by 1 to hold a terminating NUL. It'll 350 * by stripped off when devfs processes the full string. 351 */ 352 if (VM_MEMSEG_NAME(mseg)) { 353 sysmem = false; 354 name = malloc(len, M_VMMDEV, M_WAITOK); 355 error = copystr(mseg->name, name, len, NULL); 356 if (error) 357 goto done; 358 } 359 360 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem); 361 if (error) 362 goto done; 363 364 if (VM_MEMSEG_NAME(mseg)) { 365 error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name); 366 if (error) 367 vm_free_memseg(sc->vm, mseg->segid); 368 else 369 name = NULL; /* freed when 'cdev' is destroyed */ 370 } 371 done: 372 free(name, M_VMMDEV); 373 return (error); 374 } 375 376 static int 377 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 378 uint64_t *regval) 379 { 380 int error, i; 381 382 error = 0; 383 for (i = 0; i < count; i++) { 384 error = vm_get_register(vcpu, regnum[i], ®val[i]); 385 if (error) 386 break; 387 } 388 return (error); 389 } 390 391 static int 392 vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 393 uint64_t *regval) 394 { 395 int error, i; 396 397 error = 0; 398 for (i = 0; i < count; i++) { 399 error = vm_set_register(vcpu, regnum[i], regval[i]); 400 if (error) 401 break; 402 } 403 return (error); 404 } 405 406 static int 407 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 408 struct thread *td) 409 { 410 int error, vcpuid, size; 411 cpuset_t *cpuset; 412 struct vmmdev_softc *sc; 413 struct vcpu *vcpu; 414 struct vm_register *vmreg; 415 struct vm_seg_desc *vmsegdesc; 416 struct vm_register_set *vmregset; 417 struct vm_run *vmrun; 418 #ifdef COMPAT_FREEBSD13 419 struct vm_run_13 *vmrun_13; 420 #endif 421 struct vm_exception *vmexc; 422 struct vm_lapic_irq *vmirq; 423 struct vm_lapic_msi *vmmsi; 424 struct vm_ioapic_irq *ioapic_irq; 425 struct vm_isa_irq *isa_irq; 426 struct vm_isa_irq_trigger *isa_irq_trigger; 427 struct vm_capability *vmcap; 428 struct vm_pptdev *pptdev; 429 struct vm_pptdev_mmio *pptmmio; 430 struct vm_pptdev_msi *pptmsi; 431 struct vm_pptdev_msix *pptmsix; 432 #ifdef COMPAT_FREEBSD13 433 struct vm_stats_old *vmstats_old; 434 #endif 435 struct vm_stats *vmstats; 436 struct vm_stat_desc *statdesc; 437 struct vm_x2apic *x2apic; 438 struct vm_gpa_pte *gpapte; 439 struct vm_suspend *vmsuspend; 440 struct vm_gla2gpa *gg; 441 struct vm_cpuset *vm_cpuset; 442 struct vm_intinfo *vmii; 443 struct vm_rtc_time *rtctime; 444 struct vm_rtc_data *rtcdata; 445 struct vm_memmap *mm; 446 struct vm_munmap *mu; 447 struct vm_cpu_topology *topology; 448 struct vm_readwrite_kernemu_device *kernemu; 449 uint64_t *regvals; 450 int *regnums; 451 enum { NONE, SINGLE, ALL } vcpus_locked; 452 bool memsegs_locked; 453 #ifdef BHYVE_SNAPSHOT 454 struct vm_snapshot_meta *snapshot_meta; 455 #ifdef COMPAT_FREEBSD13 456 struct vm_snapshot_meta_old *snapshot_old; 457 #endif 458 #endif 459 460 error = vmm_priv_check(curthread->td_ucred); 461 if (error) 462 return (error); 463 464 sc = vmmdev_lookup2(cdev); 465 if (sc == NULL) 466 return (ENXIO); 467 468 vcpuid = -1; 469 vcpu = NULL; 470 vcpus_locked = NONE; 471 memsegs_locked = false; 472 473 /* 474 * For VMM ioctls that operate on a single vCPU, lookup the 475 * vcpu. For VMM ioctls which require one or more vCPUs to 476 * not be running, lock necessary vCPUs. 477 * 478 * XXX fragile, handle with care 479 * Most of these assume that the first field of the ioctl data 480 * is the vcpuid. 481 */ 482 switch (cmd) { 483 case VM_RUN: 484 #ifdef COMPAT_FREEBSD13 485 case VM_RUN_13: 486 #endif 487 case VM_GET_REGISTER: 488 case VM_SET_REGISTER: 489 case VM_GET_SEGMENT_DESCRIPTOR: 490 case VM_SET_SEGMENT_DESCRIPTOR: 491 case VM_GET_REGISTER_SET: 492 case VM_SET_REGISTER_SET: 493 case VM_INJECT_EXCEPTION: 494 case VM_GET_CAPABILITY: 495 case VM_SET_CAPABILITY: 496 case VM_SET_X2APIC_STATE: 497 case VM_GLA2GPA: 498 case VM_GLA2GPA_NOFAULT: 499 case VM_ACTIVATE_CPU: 500 case VM_SET_INTINFO: 501 case VM_GET_INTINFO: 502 case VM_RESTART_INSTRUCTION: 503 case VM_GET_KERNEMU_DEV: 504 case VM_SET_KERNEMU_DEV: 505 /* 506 * ioctls that can operate only on vcpus that are not running. 507 */ 508 vcpuid = *(int *)data; 509 vcpu = vm_alloc_vcpu(sc->vm, vcpuid); 510 if (vcpu == NULL) { 511 error = EINVAL; 512 goto done; 513 } 514 error = vcpu_lock_one(vcpu); 515 if (error) 516 goto done; 517 vcpus_locked = SINGLE; 518 break; 519 520 #ifdef COMPAT_FREEBSD12 521 case VM_ALLOC_MEMSEG_FBSD12: 522 #endif 523 case VM_ALLOC_MEMSEG: 524 case VM_BIND_PPTDEV: 525 case VM_UNBIND_PPTDEV: 526 case VM_MMAP_MEMSEG: 527 case VM_MUNMAP_MEMSEG: 528 case VM_REINIT: 529 /* 530 * ioctls that modify the memory map must lock memory 531 * segments exclusively. 532 */ 533 vm_xlock_memsegs(sc->vm); 534 memsegs_locked = true; 535 /* FALLTHROUGH */ 536 case VM_MAP_PPTDEV_MMIO: 537 case VM_UNMAP_PPTDEV_MMIO: 538 #ifdef BHYVE_SNAPSHOT 539 case VM_SNAPSHOT_REQ: 540 #ifdef COMPAT_FREEBSD13 541 case VM_SNAPSHOT_REQ_OLD: 542 #endif 543 case VM_RESTORE_TIME: 544 #endif 545 /* 546 * ioctls that operate on the entire virtual machine must 547 * prevent all vcpus from running. 548 */ 549 error = vcpu_lock_all(sc); 550 if (error) 551 goto done; 552 vcpus_locked = ALL; 553 break; 554 555 #ifdef COMPAT_FREEBSD12 556 case VM_GET_MEMSEG_FBSD12: 557 #endif 558 case VM_GET_MEMSEG: 559 case VM_MMAP_GETNEXT: 560 /* 561 * Lock the memory map while it is being inspected. 562 */ 563 vm_slock_memsegs(sc->vm); 564 memsegs_locked = true; 565 break; 566 567 #ifdef COMPAT_FREEBSD13 568 case VM_STATS_OLD: 569 #endif 570 case VM_STATS: 571 case VM_INJECT_NMI: 572 case VM_LAPIC_IRQ: 573 case VM_GET_X2APIC_STATE: 574 /* 575 * These do not need the vCPU locked but do operate on 576 * a specific vCPU. 577 */ 578 vcpuid = *(int *)data; 579 vcpu = vm_alloc_vcpu(sc->vm, vcpuid); 580 if (vcpu == NULL) { 581 error = EINVAL; 582 goto done; 583 } 584 break; 585 586 case VM_LAPIC_LOCAL_IRQ: 587 case VM_SUSPEND_CPU: 588 case VM_RESUME_CPU: 589 /* 590 * These can either operate on all CPUs via a vcpuid of 591 * -1 or on a specific vCPU. 592 */ 593 vcpuid = *(int *)data; 594 if (vcpuid == -1) 595 break; 596 vcpu = vm_alloc_vcpu(sc->vm, vcpuid); 597 if (vcpu == NULL) { 598 error = EINVAL; 599 goto done; 600 } 601 break; 602 603 default: 604 break; 605 } 606 607 switch (cmd) { 608 case VM_RUN: { 609 struct vm_exit *vme; 610 611 vmrun = (struct vm_run *)data; 612 vme = vm_exitinfo(vcpu); 613 614 error = vm_run(vcpu); 615 if (error != 0) 616 break; 617 618 error = copyout(vme, vmrun->vm_exit, sizeof(*vme)); 619 if (error != 0) 620 break; 621 if (vme->exitcode == VM_EXITCODE_IPI) { 622 error = copyout(vm_exitinfo_cpuset(vcpu), 623 vmrun->cpuset, 624 min(vmrun->cpusetsize, sizeof(cpuset_t))); 625 if (error != 0) 626 break; 627 if (sizeof(cpuset_t) < vmrun->cpusetsize) { 628 uint8_t *p; 629 630 p = (uint8_t *)vmrun->cpuset + 631 sizeof(cpuset_t); 632 while (error == 0 && 633 p < (uint8_t *)vmrun->cpuset + 634 vmrun->cpusetsize) { 635 error = subyte(p++, 0); 636 } 637 } 638 } 639 break; 640 } 641 #ifdef COMPAT_FREEBSD13 642 case VM_RUN_13: { 643 struct vm_exit *vme; 644 struct vm_exit_13 *vme_13; 645 646 vmrun_13 = (struct vm_run_13 *)data; 647 vme_13 = &vmrun_13->vm_exit; 648 vme = vm_exitinfo(vcpu); 649 650 error = vm_run(vcpu); 651 if (error == 0) { 652 vme_13->exitcode = vme->exitcode; 653 vme_13->inst_length = vme->inst_length; 654 vme_13->rip = vme->rip; 655 memcpy(vme_13->u, &vme->u, sizeof(vme_13->u)); 656 if (vme->exitcode == VM_EXITCODE_IPI) { 657 struct vm_exit_ipi_13 *ipi; 658 cpuset_t *dmask; 659 int cpu; 660 661 dmask = vm_exitinfo_cpuset(vcpu); 662 ipi = (struct vm_exit_ipi_13 *)&vme_13->u[0]; 663 BIT_ZERO(256, &ipi->dmask); 664 CPU_FOREACH_ISSET(cpu, dmask) { 665 if (cpu >= 256) 666 break; 667 BIT_SET(256, cpu, &ipi->dmask); 668 } 669 } 670 } 671 break; 672 } 673 #endif 674 case VM_SUSPEND: 675 vmsuspend = (struct vm_suspend *)data; 676 error = vm_suspend(sc->vm, vmsuspend->how); 677 break; 678 case VM_REINIT: 679 error = vm_reinit(sc->vm); 680 break; 681 case VM_STAT_DESC: { 682 statdesc = (struct vm_stat_desc *)data; 683 error = vmm_stat_desc_copy(statdesc->index, 684 statdesc->desc, sizeof(statdesc->desc)); 685 break; 686 } 687 #ifdef COMPAT_FREEBSD13 688 case VM_STATS_OLD: 689 vmstats_old = (struct vm_stats_old *)data; 690 getmicrotime(&vmstats_old->tv); 691 error = vmm_stat_copy(vcpu, 0, 692 nitems(vmstats_old->statbuf), 693 &vmstats_old->num_entries, 694 vmstats_old->statbuf); 695 break; 696 #endif 697 case VM_STATS: { 698 vmstats = (struct vm_stats *)data; 699 getmicrotime(&vmstats->tv); 700 error = vmm_stat_copy(vcpu, vmstats->index, 701 nitems(vmstats->statbuf), 702 &vmstats->num_entries, vmstats->statbuf); 703 break; 704 } 705 case VM_PPTDEV_MSI: 706 pptmsi = (struct vm_pptdev_msi *)data; 707 error = ppt_setup_msi(sc->vm, 708 pptmsi->bus, pptmsi->slot, pptmsi->func, 709 pptmsi->addr, pptmsi->msg, 710 pptmsi->numvec); 711 break; 712 case VM_PPTDEV_MSIX: 713 pptmsix = (struct vm_pptdev_msix *)data; 714 error = ppt_setup_msix(sc->vm, 715 pptmsix->bus, pptmsix->slot, 716 pptmsix->func, pptmsix->idx, 717 pptmsix->addr, pptmsix->msg, 718 pptmsix->vector_control); 719 break; 720 case VM_PPTDEV_DISABLE_MSIX: 721 pptdev = (struct vm_pptdev *)data; 722 error = ppt_disable_msix(sc->vm, pptdev->bus, pptdev->slot, 723 pptdev->func); 724 break; 725 case VM_MAP_PPTDEV_MMIO: 726 pptmmio = (struct vm_pptdev_mmio *)data; 727 error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot, 728 pptmmio->func, pptmmio->gpa, pptmmio->len, 729 pptmmio->hpa); 730 break; 731 case VM_UNMAP_PPTDEV_MMIO: 732 pptmmio = (struct vm_pptdev_mmio *)data; 733 error = ppt_unmap_mmio(sc->vm, pptmmio->bus, pptmmio->slot, 734 pptmmio->func, pptmmio->gpa, pptmmio->len); 735 break; 736 case VM_BIND_PPTDEV: 737 pptdev = (struct vm_pptdev *)data; 738 error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot, 739 pptdev->func); 740 break; 741 case VM_UNBIND_PPTDEV: 742 pptdev = (struct vm_pptdev *)data; 743 error = vm_unassign_pptdev(sc->vm, pptdev->bus, pptdev->slot, 744 pptdev->func); 745 break; 746 case VM_INJECT_EXCEPTION: 747 vmexc = (struct vm_exception *)data; 748 error = vm_inject_exception(vcpu, 749 vmexc->vector, vmexc->error_code_valid, vmexc->error_code, 750 vmexc->restart_instruction); 751 break; 752 case VM_INJECT_NMI: 753 error = vm_inject_nmi(vcpu); 754 break; 755 case VM_LAPIC_IRQ: 756 vmirq = (struct vm_lapic_irq *)data; 757 error = lapic_intr_edge(vcpu, vmirq->vector); 758 break; 759 case VM_LAPIC_LOCAL_IRQ: 760 vmirq = (struct vm_lapic_irq *)data; 761 error = lapic_set_local_intr(sc->vm, vcpu, vmirq->vector); 762 break; 763 case VM_LAPIC_MSI: 764 vmmsi = (struct vm_lapic_msi *)data; 765 error = lapic_intr_msi(sc->vm, vmmsi->addr, vmmsi->msg); 766 break; 767 case VM_IOAPIC_ASSERT_IRQ: 768 ioapic_irq = (struct vm_ioapic_irq *)data; 769 error = vioapic_assert_irq(sc->vm, ioapic_irq->irq); 770 break; 771 case VM_IOAPIC_DEASSERT_IRQ: 772 ioapic_irq = (struct vm_ioapic_irq *)data; 773 error = vioapic_deassert_irq(sc->vm, ioapic_irq->irq); 774 break; 775 case VM_IOAPIC_PULSE_IRQ: 776 ioapic_irq = (struct vm_ioapic_irq *)data; 777 error = vioapic_pulse_irq(sc->vm, ioapic_irq->irq); 778 break; 779 case VM_IOAPIC_PINCOUNT: 780 *(int *)data = vioapic_pincount(sc->vm); 781 break; 782 case VM_SET_KERNEMU_DEV: 783 case VM_GET_KERNEMU_DEV: { 784 mem_region_write_t mwrite; 785 mem_region_read_t mread; 786 bool arg; 787 788 kernemu = (void *)data; 789 790 if (kernemu->access_width > 0) 791 size = (1u << kernemu->access_width); 792 else 793 size = 1; 794 795 if (kernemu->gpa >= DEFAULT_APIC_BASE && kernemu->gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { 796 mread = lapic_mmio_read; 797 mwrite = lapic_mmio_write; 798 } else if (kernemu->gpa >= VIOAPIC_BASE && kernemu->gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { 799 mread = vioapic_mmio_read; 800 mwrite = vioapic_mmio_write; 801 } else if (kernemu->gpa >= VHPET_BASE && kernemu->gpa < VHPET_BASE + VHPET_SIZE) { 802 mread = vhpet_mmio_read; 803 mwrite = vhpet_mmio_write; 804 } else { 805 error = EINVAL; 806 break; 807 } 808 809 if (cmd == VM_SET_KERNEMU_DEV) 810 error = mwrite(vcpu, kernemu->gpa, 811 kernemu->value, size, &arg); 812 else 813 error = mread(vcpu, kernemu->gpa, 814 &kernemu->value, size, &arg); 815 break; 816 } 817 case VM_ISA_ASSERT_IRQ: 818 isa_irq = (struct vm_isa_irq *)data; 819 error = vatpic_assert_irq(sc->vm, isa_irq->atpic_irq); 820 if (error == 0 && isa_irq->ioapic_irq != -1) 821 error = vioapic_assert_irq(sc->vm, 822 isa_irq->ioapic_irq); 823 break; 824 case VM_ISA_DEASSERT_IRQ: 825 isa_irq = (struct vm_isa_irq *)data; 826 error = vatpic_deassert_irq(sc->vm, isa_irq->atpic_irq); 827 if (error == 0 && isa_irq->ioapic_irq != -1) 828 error = vioapic_deassert_irq(sc->vm, 829 isa_irq->ioapic_irq); 830 break; 831 case VM_ISA_PULSE_IRQ: 832 isa_irq = (struct vm_isa_irq *)data; 833 error = vatpic_pulse_irq(sc->vm, isa_irq->atpic_irq); 834 if (error == 0 && isa_irq->ioapic_irq != -1) 835 error = vioapic_pulse_irq(sc->vm, isa_irq->ioapic_irq); 836 break; 837 case VM_ISA_SET_IRQ_TRIGGER: 838 isa_irq_trigger = (struct vm_isa_irq_trigger *)data; 839 error = vatpic_set_irq_trigger(sc->vm, 840 isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger); 841 break; 842 case VM_MMAP_GETNEXT: 843 mm = (struct vm_memmap *)data; 844 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid, 845 &mm->segoff, &mm->len, &mm->prot, &mm->flags); 846 break; 847 case VM_MMAP_MEMSEG: 848 mm = (struct vm_memmap *)data; 849 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff, 850 mm->len, mm->prot, mm->flags); 851 break; 852 case VM_MUNMAP_MEMSEG: 853 mu = (struct vm_munmap *)data; 854 error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len); 855 break; 856 #ifdef COMPAT_FREEBSD12 857 case VM_ALLOC_MEMSEG_FBSD12: 858 error = alloc_memseg(sc, (struct vm_memseg *)data, 859 sizeof(((struct vm_memseg_fbsd12 *)0)->name)); 860 break; 861 #endif 862 case VM_ALLOC_MEMSEG: 863 error = alloc_memseg(sc, (struct vm_memseg *)data, 864 sizeof(((struct vm_memseg *)0)->name)); 865 break; 866 #ifdef COMPAT_FREEBSD12 867 case VM_GET_MEMSEG_FBSD12: 868 error = get_memseg(sc, (struct vm_memseg *)data, 869 sizeof(((struct vm_memseg_fbsd12 *)0)->name)); 870 break; 871 #endif 872 case VM_GET_MEMSEG: 873 error = get_memseg(sc, (struct vm_memseg *)data, 874 sizeof(((struct vm_memseg *)0)->name)); 875 break; 876 case VM_GET_REGISTER: 877 vmreg = (struct vm_register *)data; 878 error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval); 879 break; 880 case VM_SET_REGISTER: 881 vmreg = (struct vm_register *)data; 882 error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval); 883 break; 884 case VM_SET_SEGMENT_DESCRIPTOR: 885 vmsegdesc = (struct vm_seg_desc *)data; 886 error = vm_set_seg_desc(vcpu, 887 vmsegdesc->regnum, 888 &vmsegdesc->desc); 889 break; 890 case VM_GET_SEGMENT_DESCRIPTOR: 891 vmsegdesc = (struct vm_seg_desc *)data; 892 error = vm_get_seg_desc(vcpu, 893 vmsegdesc->regnum, 894 &vmsegdesc->desc); 895 break; 896 case VM_GET_REGISTER_SET: 897 vmregset = (struct vm_register_set *)data; 898 if (vmregset->count > VM_REG_LAST) { 899 error = EINVAL; 900 break; 901 } 902 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 903 M_WAITOK); 904 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 905 M_WAITOK); 906 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 907 vmregset->count); 908 if (error == 0) 909 error = vm_get_register_set(vcpu, 910 vmregset->count, regnums, regvals); 911 if (error == 0) 912 error = copyout(regvals, vmregset->regvals, 913 sizeof(regvals[0]) * vmregset->count); 914 free(regvals, M_VMMDEV); 915 free(regnums, M_VMMDEV); 916 break; 917 case VM_SET_REGISTER_SET: 918 vmregset = (struct vm_register_set *)data; 919 if (vmregset->count > VM_REG_LAST) { 920 error = EINVAL; 921 break; 922 } 923 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 924 M_WAITOK); 925 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 926 M_WAITOK); 927 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 928 vmregset->count); 929 if (error == 0) 930 error = copyin(vmregset->regvals, regvals, 931 sizeof(regvals[0]) * vmregset->count); 932 if (error == 0) 933 error = vm_set_register_set(vcpu, 934 vmregset->count, regnums, regvals); 935 free(regvals, M_VMMDEV); 936 free(regnums, M_VMMDEV); 937 break; 938 case VM_GET_CAPABILITY: 939 vmcap = (struct vm_capability *)data; 940 error = vm_get_capability(vcpu, 941 vmcap->captype, 942 &vmcap->capval); 943 break; 944 case VM_SET_CAPABILITY: 945 vmcap = (struct vm_capability *)data; 946 error = vm_set_capability(vcpu, 947 vmcap->captype, 948 vmcap->capval); 949 break; 950 case VM_SET_X2APIC_STATE: 951 x2apic = (struct vm_x2apic *)data; 952 error = vm_set_x2apic_state(vcpu, x2apic->state); 953 break; 954 case VM_GET_X2APIC_STATE: 955 x2apic = (struct vm_x2apic *)data; 956 error = vm_get_x2apic_state(vcpu, &x2apic->state); 957 break; 958 case VM_GET_GPA_PMAP: 959 gpapte = (struct vm_gpa_pte *)data; 960 pmap_get_mapping(vmspace_pmap(vm_get_vmspace(sc->vm)), 961 gpapte->gpa, gpapte->pte, &gpapte->ptenum); 962 error = 0; 963 break; 964 case VM_GET_HPET_CAPABILITIES: 965 error = vhpet_getcap((struct vm_hpet_cap *)data); 966 break; 967 case VM_GLA2GPA: { 968 CTASSERT(PROT_READ == VM_PROT_READ); 969 CTASSERT(PROT_WRITE == VM_PROT_WRITE); 970 CTASSERT(PROT_EXEC == VM_PROT_EXECUTE); 971 gg = (struct vm_gla2gpa *)data; 972 error = vm_gla2gpa(vcpu, &gg->paging, gg->gla, 973 gg->prot, &gg->gpa, &gg->fault); 974 KASSERT(error == 0 || error == EFAULT, 975 ("%s: vm_gla2gpa unknown error %d", __func__, error)); 976 break; 977 } 978 case VM_GLA2GPA_NOFAULT: 979 gg = (struct vm_gla2gpa *)data; 980 error = vm_gla2gpa_nofault(vcpu, &gg->paging, gg->gla, 981 gg->prot, &gg->gpa, &gg->fault); 982 KASSERT(error == 0 || error == EFAULT, 983 ("%s: vm_gla2gpa unknown error %d", __func__, error)); 984 break; 985 case VM_ACTIVATE_CPU: 986 error = vm_activate_cpu(vcpu); 987 break; 988 case VM_GET_CPUS: 989 error = 0; 990 vm_cpuset = (struct vm_cpuset *)data; 991 size = vm_cpuset->cpusetsize; 992 if (size < sizeof(cpuset_t) || size > CPU_MAXSIZE / NBBY) { 993 error = ERANGE; 994 break; 995 } 996 cpuset = malloc(size, M_TEMP, M_WAITOK | M_ZERO); 997 if (vm_cpuset->which == VM_ACTIVE_CPUS) 998 *cpuset = vm_active_cpus(sc->vm); 999 else if (vm_cpuset->which == VM_SUSPENDED_CPUS) 1000 *cpuset = vm_suspended_cpus(sc->vm); 1001 else if (vm_cpuset->which == VM_DEBUG_CPUS) 1002 *cpuset = vm_debug_cpus(sc->vm); 1003 else 1004 error = EINVAL; 1005 if (error == 0) 1006 error = copyout(cpuset, vm_cpuset->cpus, size); 1007 free(cpuset, M_TEMP); 1008 break; 1009 case VM_SUSPEND_CPU: 1010 error = vm_suspend_cpu(sc->vm, vcpu); 1011 break; 1012 case VM_RESUME_CPU: 1013 error = vm_resume_cpu(sc->vm, vcpu); 1014 break; 1015 case VM_SET_INTINFO: 1016 vmii = (struct vm_intinfo *)data; 1017 error = vm_exit_intinfo(vcpu, vmii->info1); 1018 break; 1019 case VM_GET_INTINFO: 1020 vmii = (struct vm_intinfo *)data; 1021 error = vm_get_intinfo(vcpu, &vmii->info1, &vmii->info2); 1022 break; 1023 case VM_RTC_WRITE: 1024 rtcdata = (struct vm_rtc_data *)data; 1025 error = vrtc_nvram_write(sc->vm, rtcdata->offset, 1026 rtcdata->value); 1027 break; 1028 case VM_RTC_READ: 1029 rtcdata = (struct vm_rtc_data *)data; 1030 error = vrtc_nvram_read(sc->vm, rtcdata->offset, 1031 &rtcdata->value); 1032 break; 1033 case VM_RTC_SETTIME: 1034 rtctime = (struct vm_rtc_time *)data; 1035 error = vrtc_set_time(sc->vm, rtctime->secs); 1036 break; 1037 case VM_RTC_GETTIME: 1038 error = 0; 1039 rtctime = (struct vm_rtc_time *)data; 1040 rtctime->secs = vrtc_get_time(sc->vm); 1041 break; 1042 case VM_RESTART_INSTRUCTION: 1043 error = vm_restart_instruction(vcpu); 1044 break; 1045 case VM_SET_TOPOLOGY: 1046 topology = (struct vm_cpu_topology *)data; 1047 error = vm_set_topology(sc->vm, topology->sockets, 1048 topology->cores, topology->threads, topology->maxcpus); 1049 break; 1050 case VM_GET_TOPOLOGY: 1051 topology = (struct vm_cpu_topology *)data; 1052 vm_get_topology(sc->vm, &topology->sockets, &topology->cores, 1053 &topology->threads, &topology->maxcpus); 1054 error = 0; 1055 break; 1056 #ifdef BHYVE_SNAPSHOT 1057 case VM_SNAPSHOT_REQ: 1058 snapshot_meta = (struct vm_snapshot_meta *)data; 1059 error = vm_snapshot_req(sc->vm, snapshot_meta); 1060 break; 1061 #ifdef COMPAT_FREEBSD13 1062 case VM_SNAPSHOT_REQ_OLD: 1063 /* 1064 * The old structure just has an additional pointer at 1065 * the start that is ignored. 1066 */ 1067 snapshot_old = (struct vm_snapshot_meta_old *)data; 1068 snapshot_meta = 1069 (struct vm_snapshot_meta *)&snapshot_old->dev_data; 1070 error = vm_snapshot_req(sc->vm, snapshot_meta); 1071 break; 1072 #endif 1073 case VM_RESTORE_TIME: 1074 error = vm_restore_time(sc->vm); 1075 break; 1076 #endif 1077 default: 1078 error = ENOTTY; 1079 break; 1080 } 1081 1082 done: 1083 if (vcpus_locked == SINGLE) 1084 vcpu_unlock_one(sc, vcpuid, vcpu); 1085 else if (vcpus_locked == ALL) 1086 vcpu_unlock_all(sc); 1087 if (memsegs_locked) 1088 vm_unlock_memsegs(sc->vm); 1089 1090 /* 1091 * Make sure that no handler returns a kernel-internal 1092 * error value to userspace. 1093 */ 1094 KASSERT(error == ERESTART || error >= 0, 1095 ("vmmdev_ioctl: invalid error return %d", error)); 1096 return (error); 1097 } 1098 1099 static int 1100 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, 1101 struct vm_object **objp, int nprot) 1102 { 1103 struct vmmdev_softc *sc; 1104 vm_paddr_t gpa; 1105 size_t len; 1106 vm_ooffset_t segoff, first, last; 1107 int error, found, segid; 1108 bool sysmem; 1109 1110 error = vmm_priv_check(curthread->td_ucred); 1111 if (error) 1112 return (error); 1113 1114 first = *offset; 1115 last = first + mapsize; 1116 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 1117 return (EINVAL); 1118 1119 sc = vmmdev_lookup2(cdev); 1120 if (sc == NULL) { 1121 /* virtual machine is in the process of being created */ 1122 return (EINVAL); 1123 } 1124 1125 /* 1126 * Get a read lock on the guest memory map. 1127 */ 1128 vm_slock_memsegs(sc->vm); 1129 1130 gpa = 0; 1131 found = 0; 1132 while (!found) { 1133 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len, 1134 NULL, NULL); 1135 if (error) 1136 break; 1137 1138 if (first >= gpa && last <= gpa + len) 1139 found = 1; 1140 else 1141 gpa += len; 1142 } 1143 1144 if (found) { 1145 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp); 1146 KASSERT(error == 0 && *objp != NULL, 1147 ("%s: invalid memory segment %d", __func__, segid)); 1148 if (sysmem) { 1149 vm_object_reference(*objp); 1150 *offset = segoff + (first - gpa); 1151 } else { 1152 error = EINVAL; 1153 } 1154 } 1155 vm_unlock_memsegs(sc->vm); 1156 return (error); 1157 } 1158 1159 static void 1160 vmmdev_destroy(void *arg) 1161 { 1162 struct vmmdev_softc *sc = arg; 1163 struct devmem_softc *dsc; 1164 int error __diagused; 1165 1166 vm_disable_vcpu_creation(sc->vm); 1167 error = vcpu_lock_all(sc); 1168 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); 1169 vm_unlock_vcpus(sc->vm); 1170 1171 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { 1172 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__)); 1173 SLIST_REMOVE_HEAD(&sc->devmem, link); 1174 free(dsc->name, M_VMMDEV); 1175 free(dsc, M_VMMDEV); 1176 } 1177 1178 if (sc->cdev != NULL) 1179 destroy_dev(sc->cdev); 1180 1181 if (sc->vm != NULL) 1182 vm_destroy(sc->vm); 1183 1184 if (sc->ucred != NULL) 1185 crfree(sc->ucred); 1186 1187 if ((sc->flags & VSC_LINKED) != 0) { 1188 mtx_lock(&vmmdev_mtx); 1189 SLIST_REMOVE(&head, sc, vmmdev_softc, link); 1190 mtx_unlock(&vmmdev_mtx); 1191 } 1192 1193 free(sc, M_VMMDEV); 1194 } 1195 1196 static int 1197 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) 1198 { 1199 struct devmem_softc *dsc; 1200 struct vmmdev_softc *sc; 1201 struct cdev *cdev; 1202 char *buf; 1203 int error, buflen; 1204 1205 error = vmm_priv_check(req->td->td_ucred); 1206 if (error) 1207 return (error); 1208 1209 buflen = VM_MAX_NAMELEN + 1; 1210 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 1211 strlcpy(buf, "beavis", buflen); 1212 error = sysctl_handle_string(oidp, buf, buflen, req); 1213 if (error != 0 || req->newptr == NULL) 1214 goto out; 1215 1216 mtx_lock(&vmmdev_mtx); 1217 sc = vmmdev_lookup(buf); 1218 if (sc == NULL || sc->cdev == NULL) { 1219 mtx_unlock(&vmmdev_mtx); 1220 error = EINVAL; 1221 goto out; 1222 } 1223 1224 /* 1225 * Setting 'sc->cdev' to NULL is used to indicate that the VM 1226 * is scheduled for destruction. 1227 */ 1228 cdev = sc->cdev; 1229 sc->cdev = NULL; 1230 mtx_unlock(&vmmdev_mtx); 1231 1232 /* 1233 * Destroy all cdevs: 1234 * 1235 * - any new operations on the 'cdev' will return an error (ENXIO). 1236 * 1237 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' 1238 */ 1239 SLIST_FOREACH(dsc, &sc->devmem, link) { 1240 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); 1241 destroy_dev(dsc->cdev); 1242 devmem_destroy(dsc); 1243 } 1244 destroy_dev(cdev); 1245 vmmdev_destroy(sc); 1246 error = 0; 1247 1248 out: 1249 free(buf, M_VMMDEV); 1250 return (error); 1251 } 1252 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, 1253 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 1254 NULL, 0, sysctl_vmm_destroy, "A", 1255 NULL); 1256 1257 static struct cdevsw vmmdevsw = { 1258 .d_name = "vmmdev", 1259 .d_version = D_VERSION, 1260 .d_ioctl = vmmdev_ioctl, 1261 .d_mmap_single = vmmdev_mmap_single, 1262 .d_read = vmmdev_rw, 1263 .d_write = vmmdev_rw, 1264 }; 1265 1266 static int 1267 sysctl_vmm_create(SYSCTL_HANDLER_ARGS) 1268 { 1269 struct vm *vm; 1270 struct cdev *cdev; 1271 struct vmmdev_softc *sc, *sc2; 1272 char *buf; 1273 int error, buflen; 1274 1275 error = vmm_priv_check(req->td->td_ucred); 1276 if (error) 1277 return (error); 1278 1279 buflen = VM_MAX_NAMELEN + 1; 1280 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 1281 strlcpy(buf, "beavis", buflen); 1282 error = sysctl_handle_string(oidp, buf, buflen, req); 1283 if (error != 0 || req->newptr == NULL) 1284 goto out; 1285 1286 mtx_lock(&vmmdev_mtx); 1287 sc = vmmdev_lookup(buf); 1288 mtx_unlock(&vmmdev_mtx); 1289 if (sc != NULL) { 1290 error = EEXIST; 1291 goto out; 1292 } 1293 1294 error = vm_create(buf, &vm); 1295 if (error != 0) 1296 goto out; 1297 1298 sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO); 1299 sc->ucred = crhold(curthread->td_ucred); 1300 sc->vm = vm; 1301 SLIST_INIT(&sc->devmem); 1302 1303 /* 1304 * Lookup the name again just in case somebody sneaked in when we 1305 * dropped the lock. 1306 */ 1307 mtx_lock(&vmmdev_mtx); 1308 sc2 = vmmdev_lookup(buf); 1309 if (sc2 == NULL) { 1310 SLIST_INSERT_HEAD(&head, sc, link); 1311 sc->flags |= VSC_LINKED; 1312 } 1313 mtx_unlock(&vmmdev_mtx); 1314 1315 if (sc2 != NULL) { 1316 vmmdev_destroy(sc); 1317 error = EEXIST; 1318 goto out; 1319 } 1320 1321 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, sc->ucred, 1322 UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf); 1323 if (error != 0) { 1324 vmmdev_destroy(sc); 1325 goto out; 1326 } 1327 1328 mtx_lock(&vmmdev_mtx); 1329 sc->cdev = cdev; 1330 sc->cdev->si_drv1 = sc; 1331 mtx_unlock(&vmmdev_mtx); 1332 1333 out: 1334 free(buf, M_VMMDEV); 1335 return (error); 1336 } 1337 SYSCTL_PROC(_hw_vmm, OID_AUTO, create, 1338 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 1339 NULL, 0, sysctl_vmm_create, "A", 1340 NULL); 1341 1342 void 1343 vmmdev_init(void) 1344 { 1345 pr_allow_flag = prison_add_allow(NULL, "vmm", NULL, 1346 "Allow use of vmm in a jail."); 1347 } 1348 1349 int 1350 vmmdev_cleanup(void) 1351 { 1352 int error; 1353 1354 if (SLIST_EMPTY(&head)) 1355 error = 0; 1356 else 1357 error = EBUSY; 1358 1359 return (error); 1360 } 1361 1362 static int 1363 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, 1364 struct vm_object **objp, int nprot) 1365 { 1366 struct devmem_softc *dsc; 1367 vm_ooffset_t first, last; 1368 size_t seglen; 1369 int error; 1370 bool sysmem; 1371 1372 dsc = cdev->si_drv1; 1373 if (dsc == NULL) { 1374 /* 'cdev' has been created but is not ready for use */ 1375 return (ENXIO); 1376 } 1377 1378 first = *offset; 1379 last = *offset + len; 1380 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 1381 return (EINVAL); 1382 1383 vm_slock_memsegs(dsc->sc->vm); 1384 1385 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); 1386 KASSERT(error == 0 && !sysmem && *objp != NULL, 1387 ("%s: invalid devmem segment %d", __func__, dsc->segid)); 1388 1389 if (seglen >= last) 1390 vm_object_reference(*objp); 1391 else 1392 error = EINVAL; 1393 1394 vm_unlock_memsegs(dsc->sc->vm); 1395 return (error); 1396 } 1397 1398 static struct cdevsw devmemsw = { 1399 .d_name = "devmem", 1400 .d_version = D_VERSION, 1401 .d_mmap_single = devmem_mmap_single, 1402 }; 1403 1404 static int 1405 devmem_create_cdev(const char *vmname, int segid, char *devname) 1406 { 1407 struct devmem_softc *dsc; 1408 struct vmmdev_softc *sc; 1409 struct cdev *cdev; 1410 int error; 1411 1412 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL, 1413 UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname); 1414 if (error) 1415 return (error); 1416 1417 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO); 1418 1419 mtx_lock(&vmmdev_mtx); 1420 sc = vmmdev_lookup(vmname); 1421 KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname)); 1422 if (sc->cdev == NULL) { 1423 /* virtual machine is being created or destroyed */ 1424 mtx_unlock(&vmmdev_mtx); 1425 free(dsc, M_VMMDEV); 1426 destroy_dev_sched_cb(cdev, NULL, 0); 1427 return (ENODEV); 1428 } 1429 1430 dsc->segid = segid; 1431 dsc->name = devname; 1432 dsc->cdev = cdev; 1433 dsc->sc = sc; 1434 SLIST_INSERT_HEAD(&sc->devmem, dsc, link); 1435 mtx_unlock(&vmmdev_mtx); 1436 1437 /* The 'cdev' is ready for use after 'si_drv1' is initialized */ 1438 cdev->si_drv1 = dsc; 1439 return (0); 1440 } 1441 1442 static void 1443 devmem_destroy(void *arg) 1444 { 1445 struct devmem_softc *dsc = arg; 1446 1447 KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__)); 1448 dsc->cdev = NULL; 1449 dsc->sc = NULL; 1450 } 1451