1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #include "opt_bhyve_snapshot.h" 31 32 #include <sys/param.h> 33 #include <sys/kernel.h> 34 #include <sys/jail.h> 35 #include <sys/queue.h> 36 #include <sys/lock.h> 37 #include <sys/mutex.h> 38 #include <sys/malloc.h> 39 #include <sys/conf.h> 40 #include <sys/sysctl.h> 41 #include <sys/libkern.h> 42 #include <sys/ioccom.h> 43 #include <sys/mman.h> 44 #include <sys/uio.h> 45 #include <sys/proc.h> 46 47 #include <vm/vm.h> 48 #include <vm/pmap.h> 49 #include <vm/vm_map.h> 50 #include <vm/vm_object.h> 51 52 #include <machine/vmparam.h> 53 #include <machine/vmm.h> 54 #include <machine/vmm_dev.h> 55 #include <machine/vmm_instruction_emul.h> 56 #include <machine/vmm_snapshot.h> 57 #include <x86/apicreg.h> 58 59 #include "vmm_lapic.h" 60 #include "vmm_stat.h" 61 #include "vmm_mem.h" 62 #include "io/ppt.h" 63 #include "io/vatpic.h" 64 #include "io/vioapic.h" 65 #include "io/vhpet.h" 66 #include "io/vrtc.h" 67 68 #ifdef COMPAT_FREEBSD13 69 struct vm_stats_old { 70 int cpuid; /* in */ 71 int num_entries; /* out */ 72 struct timeval tv; 73 uint64_t statbuf[MAX_VM_STATS]; 74 }; 75 76 #define VM_STATS_OLD \ 77 _IOWR('v', IOCNUM_VM_STATS, struct vm_stats_old) 78 79 struct vm_snapshot_meta_old { 80 void *ctx; /* unused */ 81 void *dev_data; 82 const char *dev_name; /* identify userspace devices */ 83 enum snapshot_req dev_req; /* identify kernel structs */ 84 85 struct vm_snapshot_buffer buffer; 86 87 enum vm_snapshot_op op; 88 }; 89 90 #define VM_SNAPSHOT_REQ_OLD \ 91 _IOWR('v', IOCNUM_SNAPSHOT_REQ, struct vm_snapshot_meta_old) 92 93 struct vm_exit_ipi_13 { 94 uint32_t mode; 95 uint8_t vector; 96 __BITSET_DEFINE(, 256) dmask; 97 }; 98 99 struct vm_exit_13 { 100 uint32_t exitcode; 101 int32_t inst_length; 102 uint64_t rip; 103 uint64_t u[120 / sizeof(uint64_t)]; 104 }; 105 106 struct vm_run_13 { 107 int cpuid; 108 struct vm_exit_13 vm_exit; 109 }; 110 111 #define VM_RUN_13 \ 112 _IOWR('v', IOCNUM_RUN, struct vm_run_13) 113 114 #endif /* COMPAT_FREEBSD13 */ 115 116 struct devmem_softc { 117 int segid; 118 char *name; 119 struct cdev *cdev; 120 struct vmmdev_softc *sc; 121 SLIST_ENTRY(devmem_softc) link; 122 }; 123 124 struct vmmdev_softc { 125 struct vm *vm; /* vm instance cookie */ 126 struct cdev *cdev; 127 struct ucred *ucred; 128 SLIST_ENTRY(vmmdev_softc) link; 129 SLIST_HEAD(, devmem_softc) devmem; 130 int flags; 131 }; 132 #define VSC_LINKED 0x01 133 134 static SLIST_HEAD(, vmmdev_softc) head; 135 136 static unsigned pr_allow_flag; 137 static struct mtx vmmdev_mtx; 138 MTX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex", MTX_DEF); 139 140 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); 141 142 SYSCTL_DECL(_hw_vmm); 143 144 static int vmm_priv_check(struct ucred *ucred); 145 static int devmem_create_cdev(const char *vmname, int id, char *devmem); 146 static void devmem_destroy(void *arg); 147 148 static int 149 vmm_priv_check(struct ucred *ucred) 150 { 151 152 if (jailed(ucred) && 153 !(ucred->cr_prison->pr_allow & pr_allow_flag)) 154 return (EPERM); 155 156 return (0); 157 } 158 159 static int 160 vcpu_lock_one(struct vcpu *vcpu) 161 { 162 return (vcpu_set_state(vcpu, VCPU_FROZEN, true)); 163 } 164 165 static void 166 vcpu_unlock_one(struct vmmdev_softc *sc, int vcpuid, struct vcpu *vcpu) 167 { 168 enum vcpu_state state; 169 170 state = vcpu_get_state(vcpu, NULL); 171 if (state != VCPU_FROZEN) { 172 panic("vcpu %s(%d) has invalid state %d", vm_name(sc->vm), 173 vcpuid, state); 174 } 175 176 vcpu_set_state(vcpu, VCPU_IDLE, false); 177 } 178 179 static int 180 vcpu_lock_all(struct vmmdev_softc *sc) 181 { 182 struct vcpu *vcpu; 183 int error; 184 uint16_t i, j, maxcpus; 185 186 error = 0; 187 vm_slock_vcpus(sc->vm); 188 maxcpus = vm_get_maxcpus(sc->vm); 189 for (i = 0; i < maxcpus; i++) { 190 vcpu = vm_vcpu(sc->vm, i); 191 if (vcpu == NULL) 192 continue; 193 error = vcpu_lock_one(vcpu); 194 if (error) 195 break; 196 } 197 198 if (error) { 199 for (j = 0; j < i; j++) { 200 vcpu = vm_vcpu(sc->vm, j); 201 if (vcpu == NULL) 202 continue; 203 vcpu_unlock_one(sc, j, vcpu); 204 } 205 vm_unlock_vcpus(sc->vm); 206 } 207 208 return (error); 209 } 210 211 static void 212 vcpu_unlock_all(struct vmmdev_softc *sc) 213 { 214 struct vcpu *vcpu; 215 uint16_t i, maxcpus; 216 217 maxcpus = vm_get_maxcpus(sc->vm); 218 for (i = 0; i < maxcpus; i++) { 219 vcpu = vm_vcpu(sc->vm, i); 220 if (vcpu == NULL) 221 continue; 222 vcpu_unlock_one(sc, i, vcpu); 223 } 224 vm_unlock_vcpus(sc->vm); 225 } 226 227 static struct vmmdev_softc * 228 vmmdev_lookup(const char *name) 229 { 230 struct vmmdev_softc *sc; 231 232 #ifdef notyet /* XXX kernel is not compiled with invariants */ 233 mtx_assert(&vmmdev_mtx, MA_OWNED); 234 #endif 235 236 SLIST_FOREACH(sc, &head, link) { 237 if (strcmp(name, vm_name(sc->vm)) == 0) 238 break; 239 } 240 241 if (sc == NULL) 242 return (NULL); 243 244 if (cr_cansee(curthread->td_ucred, sc->ucred)) 245 return (NULL); 246 247 return (sc); 248 } 249 250 static struct vmmdev_softc * 251 vmmdev_lookup2(struct cdev *cdev) 252 { 253 254 return (cdev->si_drv1); 255 } 256 257 static int 258 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) 259 { 260 int error, off, c, prot; 261 vm_paddr_t gpa, maxaddr; 262 void *hpa, *cookie; 263 struct vmmdev_softc *sc; 264 265 error = vmm_priv_check(curthread->td_ucred); 266 if (error) 267 return (error); 268 269 sc = vmmdev_lookup2(cdev); 270 if (sc == NULL) 271 return (ENXIO); 272 273 /* 274 * Get a read lock on the guest memory map. 275 */ 276 vm_slock_memsegs(sc->vm); 277 278 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); 279 maxaddr = vmm_sysmem_maxaddr(sc->vm); 280 while (uio->uio_resid > 0 && error == 0) { 281 gpa = uio->uio_offset; 282 off = gpa & PAGE_MASK; 283 c = min(uio->uio_resid, PAGE_SIZE - off); 284 285 /* 286 * The VM has a hole in its physical memory map. If we want to 287 * use 'dd' to inspect memory beyond the hole we need to 288 * provide bogus data for memory that lies in the hole. 289 * 290 * Since this device does not support lseek(2), dd(1) will 291 * read(2) blocks of data to simulate the lseek(2). 292 */ 293 hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie); 294 if (hpa == NULL) { 295 if (uio->uio_rw == UIO_READ && gpa < maxaddr) 296 error = uiomove(__DECONST(void *, zero_region), 297 c, uio); 298 else 299 error = EFAULT; 300 } else { 301 error = uiomove(hpa, c, uio); 302 vm_gpa_release(cookie); 303 } 304 } 305 vm_unlock_memsegs(sc->vm); 306 return (error); 307 } 308 309 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1); 310 311 static int 312 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) 313 { 314 struct devmem_softc *dsc; 315 int error; 316 bool sysmem; 317 318 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL); 319 if (error || mseg->len == 0) 320 return (error); 321 322 if (!sysmem) { 323 SLIST_FOREACH(dsc, &sc->devmem, link) { 324 if (dsc->segid == mseg->segid) 325 break; 326 } 327 KASSERT(dsc != NULL, ("%s: devmem segment %d not found", 328 __func__, mseg->segid)); 329 error = copystr(dsc->name, mseg->name, len, NULL); 330 } else { 331 bzero(mseg->name, len); 332 } 333 334 return (error); 335 } 336 337 static int 338 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) 339 { 340 char *name; 341 int error; 342 bool sysmem; 343 344 error = 0; 345 name = NULL; 346 sysmem = true; 347 348 /* 349 * The allocation is lengthened by 1 to hold a terminating NUL. It'll 350 * by stripped off when devfs processes the full string. 351 */ 352 if (VM_MEMSEG_NAME(mseg)) { 353 sysmem = false; 354 name = malloc(len, M_VMMDEV, M_WAITOK); 355 error = copystr(mseg->name, name, len, NULL); 356 if (error) 357 goto done; 358 } 359 360 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem); 361 if (error) 362 goto done; 363 364 if (VM_MEMSEG_NAME(mseg)) { 365 error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name); 366 if (error) 367 vm_free_memseg(sc->vm, mseg->segid); 368 else 369 name = NULL; /* freed when 'cdev' is destroyed */ 370 } 371 done: 372 free(name, M_VMMDEV); 373 return (error); 374 } 375 376 static int 377 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 378 uint64_t *regval) 379 { 380 int error, i; 381 382 error = 0; 383 for (i = 0; i < count; i++) { 384 error = vm_get_register(vcpu, regnum[i], ®val[i]); 385 if (error) 386 break; 387 } 388 return (error); 389 } 390 391 static int 392 vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 393 uint64_t *regval) 394 { 395 int error, i; 396 397 error = 0; 398 for (i = 0; i < count; i++) { 399 error = vm_set_register(vcpu, regnum[i], regval[i]); 400 if (error) 401 break; 402 } 403 return (error); 404 } 405 406 static int 407 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 408 struct thread *td) 409 { 410 int error, vcpuid, size; 411 cpuset_t *cpuset; 412 struct vmmdev_softc *sc; 413 struct vcpu *vcpu; 414 struct vm_register *vmreg; 415 struct vm_seg_desc *vmsegdesc; 416 struct vm_register_set *vmregset; 417 struct vm_run *vmrun; 418 #ifdef COMPAT_FREEBSD13 419 struct vm_run_13 *vmrun_13; 420 #endif 421 struct vm_exception *vmexc; 422 struct vm_lapic_irq *vmirq; 423 struct vm_lapic_msi *vmmsi; 424 struct vm_ioapic_irq *ioapic_irq; 425 struct vm_isa_irq *isa_irq; 426 struct vm_isa_irq_trigger *isa_irq_trigger; 427 struct vm_capability *vmcap; 428 struct vm_pptdev *pptdev; 429 struct vm_pptdev_mmio *pptmmio; 430 struct vm_pptdev_msi *pptmsi; 431 struct vm_pptdev_msix *pptmsix; 432 #ifdef COMPAT_FREEBSD13 433 struct vm_stats_old *vmstats_old; 434 #endif 435 struct vm_stats *vmstats; 436 struct vm_stat_desc *statdesc; 437 struct vm_x2apic *x2apic; 438 struct vm_gpa_pte *gpapte; 439 struct vm_suspend *vmsuspend; 440 struct vm_gla2gpa *gg; 441 struct vm_cpuset *vm_cpuset; 442 struct vm_intinfo *vmii; 443 struct vm_rtc_time *rtctime; 444 struct vm_rtc_data *rtcdata; 445 struct vm_memmap *mm; 446 struct vm_munmap *mu; 447 struct vm_cpu_topology *topology; 448 struct vm_readwrite_kernemu_device *kernemu; 449 uint64_t *regvals; 450 int *regnums; 451 enum { NONE, SINGLE, ALL } vcpus_locked; 452 bool memsegs_locked; 453 #ifdef BHYVE_SNAPSHOT 454 struct vm_snapshot_meta *snapshot_meta; 455 #ifdef COMPAT_FREEBSD13 456 struct vm_snapshot_meta_old *snapshot_old; 457 #endif 458 #endif 459 460 error = vmm_priv_check(curthread->td_ucred); 461 if (error) 462 return (error); 463 464 sc = vmmdev_lookup2(cdev); 465 if (sc == NULL) 466 return (ENXIO); 467 468 vcpuid = -1; 469 vcpu = NULL; 470 vcpus_locked = NONE; 471 memsegs_locked = false; 472 473 /* 474 * For VMM ioctls that operate on a single vCPU, lookup the 475 * vcpu. For VMM ioctls which require one or more vCPUs to 476 * not be running, lock necessary vCPUs. 477 * 478 * XXX fragile, handle with care 479 * Most of these assume that the first field of the ioctl data 480 * is the vcpuid. 481 */ 482 switch (cmd) { 483 case VM_RUN: 484 #ifdef COMPAT_FREEBSD13 485 case VM_RUN_13: 486 #endif 487 case VM_GET_REGISTER: 488 case VM_SET_REGISTER: 489 case VM_GET_SEGMENT_DESCRIPTOR: 490 case VM_SET_SEGMENT_DESCRIPTOR: 491 case VM_GET_REGISTER_SET: 492 case VM_SET_REGISTER_SET: 493 case VM_INJECT_EXCEPTION: 494 case VM_GET_CAPABILITY: 495 case VM_SET_CAPABILITY: 496 case VM_SET_X2APIC_STATE: 497 case VM_GLA2GPA: 498 case VM_GLA2GPA_NOFAULT: 499 case VM_ACTIVATE_CPU: 500 case VM_SET_INTINFO: 501 case VM_GET_INTINFO: 502 case VM_RESTART_INSTRUCTION: 503 case VM_GET_KERNEMU_DEV: 504 case VM_SET_KERNEMU_DEV: 505 /* 506 * ioctls that can operate only on vcpus that are not running. 507 */ 508 vcpuid = *(int *)data; 509 vcpu = vm_alloc_vcpu(sc->vm, vcpuid); 510 if (vcpu == NULL) { 511 error = EINVAL; 512 goto done; 513 } 514 error = vcpu_lock_one(vcpu); 515 if (error) 516 goto done; 517 vcpus_locked = SINGLE; 518 break; 519 520 #ifdef COMPAT_FREEBSD12 521 case VM_ALLOC_MEMSEG_FBSD12: 522 #endif 523 case VM_ALLOC_MEMSEG: 524 case VM_BIND_PPTDEV: 525 case VM_UNBIND_PPTDEV: 526 case VM_MMAP_MEMSEG: 527 case VM_MUNMAP_MEMSEG: 528 case VM_REINIT: 529 /* 530 * ioctls that modify the memory map must lock memory 531 * segments exclusively. 532 */ 533 vm_xlock_memsegs(sc->vm); 534 memsegs_locked = true; 535 /* FALLTHROUGH */ 536 case VM_MAP_PPTDEV_MMIO: 537 case VM_UNMAP_PPTDEV_MMIO: 538 #ifdef BHYVE_SNAPSHOT 539 case VM_SNAPSHOT_REQ: 540 #ifdef COMPAT_FREEBSD13 541 case VM_SNAPSHOT_REQ_OLD: 542 #endif 543 case VM_RESTORE_TIME: 544 #endif 545 /* 546 * ioctls that operate on the entire virtual machine must 547 * prevent all vcpus from running. 548 */ 549 error = vcpu_lock_all(sc); 550 if (error) 551 goto done; 552 vcpus_locked = ALL; 553 break; 554 555 #ifdef COMPAT_FREEBSD12 556 case VM_GET_MEMSEG_FBSD12: 557 #endif 558 case VM_GET_MEMSEG: 559 case VM_MMAP_GETNEXT: 560 /* 561 * Lock the memory map while it is being inspected. 562 */ 563 vm_slock_memsegs(sc->vm); 564 memsegs_locked = true; 565 break; 566 567 #ifdef COMPAT_FREEBSD13 568 case VM_STATS_OLD: 569 #endif 570 case VM_STATS: 571 case VM_INJECT_NMI: 572 case VM_LAPIC_IRQ: 573 case VM_GET_X2APIC_STATE: 574 /* 575 * These do not need the vCPU locked but do operate on 576 * a specific vCPU. 577 */ 578 vcpuid = *(int *)data; 579 vcpu = vm_alloc_vcpu(sc->vm, vcpuid); 580 if (vcpu == NULL) { 581 error = EINVAL; 582 goto done; 583 } 584 break; 585 586 case VM_LAPIC_LOCAL_IRQ: 587 case VM_SUSPEND_CPU: 588 case VM_RESUME_CPU: 589 /* 590 * These can either operate on all CPUs via a vcpuid of 591 * -1 or on a specific vCPU. 592 */ 593 vcpuid = *(int *)data; 594 if (vcpuid == -1) 595 break; 596 vcpu = vm_alloc_vcpu(sc->vm, vcpuid); 597 if (vcpu == NULL) { 598 error = EINVAL; 599 goto done; 600 } 601 break; 602 603 default: 604 break; 605 } 606 607 switch (cmd) { 608 case VM_RUN: { 609 struct vm_exit *vme; 610 611 vmrun = (struct vm_run *)data; 612 vme = vm_exitinfo(vcpu); 613 614 error = vm_run(vcpu); 615 if (error != 0) 616 break; 617 618 error = copyout(vme, vmrun->vm_exit, sizeof(*vme)); 619 if (error != 0) 620 break; 621 if (vme->exitcode == VM_EXITCODE_IPI) { 622 error = copyout(vm_exitinfo_cpuset(vcpu), 623 vmrun->cpuset, 624 min(vmrun->cpusetsize, sizeof(cpuset_t))); 625 if (error != 0) 626 break; 627 if (sizeof(cpuset_t) < vmrun->cpusetsize) { 628 uint8_t *p; 629 630 p = (uint8_t *)vmrun->cpuset + 631 sizeof(cpuset_t); 632 while (error == 0 && 633 p < (uint8_t *)vmrun->cpuset + 634 vmrun->cpusetsize) { 635 error = subyte(p++, 0); 636 } 637 } 638 } 639 break; 640 } 641 #ifdef COMPAT_FREEBSD13 642 case VM_RUN_13: { 643 struct vm_exit *vme; 644 struct vm_exit_13 *vme_13; 645 646 vmrun_13 = (struct vm_run_13 *)data; 647 vme_13 = &vmrun_13->vm_exit; 648 vme = vm_exitinfo(vcpu); 649 650 error = vm_run(vcpu); 651 if (error == 0) { 652 vme_13->exitcode = vme->exitcode; 653 vme_13->inst_length = vme->inst_length; 654 vme_13->rip = vme->rip; 655 memcpy(vme_13->u, &vme->u, sizeof(vme_13->u)); 656 if (vme->exitcode == VM_EXITCODE_IPI) { 657 struct vm_exit_ipi_13 *ipi; 658 cpuset_t *dmask; 659 int cpu; 660 661 dmask = vm_exitinfo_cpuset(vcpu); 662 ipi = (struct vm_exit_ipi_13 *)&vme_13->u[0]; 663 BIT_ZERO(256, &ipi->dmask); 664 CPU_FOREACH_ISSET(cpu, dmask) { 665 if (cpu >= 256) 666 break; 667 BIT_SET(256, cpu, &ipi->dmask); 668 } 669 } 670 } 671 break; 672 } 673 #endif 674 case VM_SUSPEND: 675 vmsuspend = (struct vm_suspend *)data; 676 error = vm_suspend(sc->vm, vmsuspend->how); 677 break; 678 case VM_REINIT: 679 error = vm_reinit(sc->vm); 680 break; 681 case VM_STAT_DESC: { 682 statdesc = (struct vm_stat_desc *)data; 683 error = vmm_stat_desc_copy(statdesc->index, 684 statdesc->desc, sizeof(statdesc->desc)); 685 break; 686 } 687 #ifdef COMPAT_FREEBSD13 688 case VM_STATS_OLD: 689 vmstats_old = (struct vm_stats_old *)data; 690 getmicrotime(&vmstats_old->tv); 691 error = vmm_stat_copy(vcpu, 0, 692 nitems(vmstats_old->statbuf), 693 &vmstats_old->num_entries, 694 vmstats_old->statbuf); 695 break; 696 #endif 697 case VM_STATS: { 698 vmstats = (struct vm_stats *)data; 699 getmicrotime(&vmstats->tv); 700 error = vmm_stat_copy(vcpu, vmstats->index, 701 nitems(vmstats->statbuf), 702 &vmstats->num_entries, vmstats->statbuf); 703 break; 704 } 705 case VM_PPTDEV_MSI: 706 pptmsi = (struct vm_pptdev_msi *)data; 707 error = ppt_setup_msi(sc->vm, 708 pptmsi->bus, pptmsi->slot, pptmsi->func, 709 pptmsi->addr, pptmsi->msg, 710 pptmsi->numvec); 711 break; 712 case VM_PPTDEV_MSIX: 713 pptmsix = (struct vm_pptdev_msix *)data; 714 error = ppt_setup_msix(sc->vm, 715 pptmsix->bus, pptmsix->slot, 716 pptmsix->func, pptmsix->idx, 717 pptmsix->addr, pptmsix->msg, 718 pptmsix->vector_control); 719 break; 720 case VM_PPTDEV_DISABLE_MSIX: 721 pptdev = (struct vm_pptdev *)data; 722 error = ppt_disable_msix(sc->vm, pptdev->bus, pptdev->slot, 723 pptdev->func); 724 break; 725 case VM_MAP_PPTDEV_MMIO: 726 pptmmio = (struct vm_pptdev_mmio *)data; 727 error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot, 728 pptmmio->func, pptmmio->gpa, pptmmio->len, 729 pptmmio->hpa); 730 break; 731 case VM_UNMAP_PPTDEV_MMIO: 732 pptmmio = (struct vm_pptdev_mmio *)data; 733 error = ppt_unmap_mmio(sc->vm, pptmmio->bus, pptmmio->slot, 734 pptmmio->func, pptmmio->gpa, pptmmio->len); 735 break; 736 case VM_BIND_PPTDEV: 737 pptdev = (struct vm_pptdev *)data; 738 error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot, 739 pptdev->func); 740 break; 741 case VM_UNBIND_PPTDEV: 742 pptdev = (struct vm_pptdev *)data; 743 error = vm_unassign_pptdev(sc->vm, pptdev->bus, pptdev->slot, 744 pptdev->func); 745 break; 746 case VM_INJECT_EXCEPTION: 747 vmexc = (struct vm_exception *)data; 748 error = vm_inject_exception(vcpu, 749 vmexc->vector, vmexc->error_code_valid, vmexc->error_code, 750 vmexc->restart_instruction); 751 break; 752 case VM_INJECT_NMI: 753 error = vm_inject_nmi(vcpu); 754 break; 755 case VM_LAPIC_IRQ: 756 vmirq = (struct vm_lapic_irq *)data; 757 error = lapic_intr_edge(vcpu, vmirq->vector); 758 break; 759 case VM_LAPIC_LOCAL_IRQ: 760 vmirq = (struct vm_lapic_irq *)data; 761 error = lapic_set_local_intr(sc->vm, vcpu, vmirq->vector); 762 break; 763 case VM_LAPIC_MSI: 764 vmmsi = (struct vm_lapic_msi *)data; 765 error = lapic_intr_msi(sc->vm, vmmsi->addr, vmmsi->msg); 766 break; 767 case VM_IOAPIC_ASSERT_IRQ: 768 ioapic_irq = (struct vm_ioapic_irq *)data; 769 error = vioapic_assert_irq(sc->vm, ioapic_irq->irq); 770 break; 771 case VM_IOAPIC_DEASSERT_IRQ: 772 ioapic_irq = (struct vm_ioapic_irq *)data; 773 error = vioapic_deassert_irq(sc->vm, ioapic_irq->irq); 774 break; 775 case VM_IOAPIC_PULSE_IRQ: 776 ioapic_irq = (struct vm_ioapic_irq *)data; 777 error = vioapic_pulse_irq(sc->vm, ioapic_irq->irq); 778 break; 779 case VM_IOAPIC_PINCOUNT: 780 *(int *)data = vioapic_pincount(sc->vm); 781 break; 782 case VM_SET_KERNEMU_DEV: 783 case VM_GET_KERNEMU_DEV: { 784 mem_region_write_t mwrite; 785 mem_region_read_t mread; 786 bool arg; 787 788 kernemu = (void *)data; 789 790 if (kernemu->access_width > 0) 791 size = (1u << kernemu->access_width); 792 else 793 size = 1; 794 795 if (kernemu->gpa >= DEFAULT_APIC_BASE && kernemu->gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { 796 mread = lapic_mmio_read; 797 mwrite = lapic_mmio_write; 798 } else if (kernemu->gpa >= VIOAPIC_BASE && kernemu->gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { 799 mread = vioapic_mmio_read; 800 mwrite = vioapic_mmio_write; 801 } else if (kernemu->gpa >= VHPET_BASE && kernemu->gpa < VHPET_BASE + VHPET_SIZE) { 802 mread = vhpet_mmio_read; 803 mwrite = vhpet_mmio_write; 804 } else { 805 error = EINVAL; 806 break; 807 } 808 809 if (cmd == VM_SET_KERNEMU_DEV) 810 error = mwrite(vcpu, kernemu->gpa, 811 kernemu->value, size, &arg); 812 else 813 error = mread(vcpu, kernemu->gpa, 814 &kernemu->value, size, &arg); 815 break; 816 } 817 case VM_ISA_ASSERT_IRQ: 818 isa_irq = (struct vm_isa_irq *)data; 819 error = vatpic_assert_irq(sc->vm, isa_irq->atpic_irq); 820 if (error == 0 && isa_irq->ioapic_irq != -1) 821 error = vioapic_assert_irq(sc->vm, 822 isa_irq->ioapic_irq); 823 break; 824 case VM_ISA_DEASSERT_IRQ: 825 isa_irq = (struct vm_isa_irq *)data; 826 error = vatpic_deassert_irq(sc->vm, isa_irq->atpic_irq); 827 if (error == 0 && isa_irq->ioapic_irq != -1) 828 error = vioapic_deassert_irq(sc->vm, 829 isa_irq->ioapic_irq); 830 break; 831 case VM_ISA_PULSE_IRQ: 832 isa_irq = (struct vm_isa_irq *)data; 833 error = vatpic_pulse_irq(sc->vm, isa_irq->atpic_irq); 834 if (error == 0 && isa_irq->ioapic_irq != -1) 835 error = vioapic_pulse_irq(sc->vm, isa_irq->ioapic_irq); 836 break; 837 case VM_ISA_SET_IRQ_TRIGGER: 838 isa_irq_trigger = (struct vm_isa_irq_trigger *)data; 839 error = vatpic_set_irq_trigger(sc->vm, 840 isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger); 841 break; 842 case VM_MMAP_GETNEXT: 843 mm = (struct vm_memmap *)data; 844 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid, 845 &mm->segoff, &mm->len, &mm->prot, &mm->flags); 846 break; 847 case VM_MMAP_MEMSEG: 848 mm = (struct vm_memmap *)data; 849 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff, 850 mm->len, mm->prot, mm->flags); 851 break; 852 case VM_MUNMAP_MEMSEG: 853 mu = (struct vm_munmap *)data; 854 error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len); 855 break; 856 #ifdef COMPAT_FREEBSD12 857 case VM_ALLOC_MEMSEG_FBSD12: 858 error = alloc_memseg(sc, (struct vm_memseg *)data, 859 sizeof(((struct vm_memseg_fbsd12 *)0)->name)); 860 break; 861 #endif 862 case VM_ALLOC_MEMSEG: 863 error = alloc_memseg(sc, (struct vm_memseg *)data, 864 sizeof(((struct vm_memseg *)0)->name)); 865 break; 866 #ifdef COMPAT_FREEBSD12 867 case VM_GET_MEMSEG_FBSD12: 868 error = get_memseg(sc, (struct vm_memseg *)data, 869 sizeof(((struct vm_memseg_fbsd12 *)0)->name)); 870 break; 871 #endif 872 case VM_GET_MEMSEG: 873 error = get_memseg(sc, (struct vm_memseg *)data, 874 sizeof(((struct vm_memseg *)0)->name)); 875 break; 876 case VM_GET_REGISTER: 877 vmreg = (struct vm_register *)data; 878 error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval); 879 break; 880 case VM_SET_REGISTER: 881 vmreg = (struct vm_register *)data; 882 error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval); 883 break; 884 case VM_SET_SEGMENT_DESCRIPTOR: 885 vmsegdesc = (struct vm_seg_desc *)data; 886 error = vm_set_seg_desc(vcpu, 887 vmsegdesc->regnum, 888 &vmsegdesc->desc); 889 break; 890 case VM_GET_SEGMENT_DESCRIPTOR: 891 vmsegdesc = (struct vm_seg_desc *)data; 892 error = vm_get_seg_desc(vcpu, 893 vmsegdesc->regnum, 894 &vmsegdesc->desc); 895 break; 896 case VM_GET_REGISTER_SET: 897 vmregset = (struct vm_register_set *)data; 898 if (vmregset->count > VM_REG_LAST) { 899 error = EINVAL; 900 break; 901 } 902 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 903 M_WAITOK); 904 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 905 M_WAITOK); 906 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 907 vmregset->count); 908 if (error == 0) 909 error = vm_get_register_set(vcpu, 910 vmregset->count, regnums, regvals); 911 if (error == 0) 912 error = copyout(regvals, vmregset->regvals, 913 sizeof(regvals[0]) * vmregset->count); 914 free(regvals, M_VMMDEV); 915 free(regnums, M_VMMDEV); 916 break; 917 case VM_SET_REGISTER_SET: 918 vmregset = (struct vm_register_set *)data; 919 if (vmregset->count > VM_REG_LAST) { 920 error = EINVAL; 921 break; 922 } 923 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 924 M_WAITOK); 925 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 926 M_WAITOK); 927 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 928 vmregset->count); 929 if (error == 0) 930 error = copyin(vmregset->regvals, regvals, 931 sizeof(regvals[0]) * vmregset->count); 932 if (error == 0) 933 error = vm_set_register_set(vcpu, 934 vmregset->count, regnums, regvals); 935 free(regvals, M_VMMDEV); 936 free(regnums, M_VMMDEV); 937 break; 938 case VM_GET_CAPABILITY: 939 vmcap = (struct vm_capability *)data; 940 error = vm_get_capability(vcpu, 941 vmcap->captype, 942 &vmcap->capval); 943 break; 944 case VM_SET_CAPABILITY: 945 vmcap = (struct vm_capability *)data; 946 error = vm_set_capability(vcpu, 947 vmcap->captype, 948 vmcap->capval); 949 break; 950 case VM_SET_X2APIC_STATE: 951 x2apic = (struct vm_x2apic *)data; 952 error = vm_set_x2apic_state(vcpu, x2apic->state); 953 break; 954 case VM_GET_X2APIC_STATE: 955 x2apic = (struct vm_x2apic *)data; 956 error = vm_get_x2apic_state(vcpu, &x2apic->state); 957 break; 958 case VM_GET_GPA_PMAP: 959 gpapte = (struct vm_gpa_pte *)data; 960 pmap_get_mapping(vmspace_pmap(vm_get_vmspace(sc->vm)), 961 gpapte->gpa, gpapte->pte, &gpapte->ptenum); 962 error = 0; 963 break; 964 case VM_GET_HPET_CAPABILITIES: 965 error = vhpet_getcap((struct vm_hpet_cap *)data); 966 break; 967 case VM_GLA2GPA: { 968 CTASSERT(PROT_READ == VM_PROT_READ); 969 CTASSERT(PROT_WRITE == VM_PROT_WRITE); 970 CTASSERT(PROT_EXEC == VM_PROT_EXECUTE); 971 gg = (struct vm_gla2gpa *)data; 972 error = vm_gla2gpa(vcpu, &gg->paging, gg->gla, 973 gg->prot, &gg->gpa, &gg->fault); 974 KASSERT(error == 0 || error == EFAULT, 975 ("%s: vm_gla2gpa unknown error %d", __func__, error)); 976 break; 977 } 978 case VM_GLA2GPA_NOFAULT: 979 gg = (struct vm_gla2gpa *)data; 980 error = vm_gla2gpa_nofault(vcpu, &gg->paging, gg->gla, 981 gg->prot, &gg->gpa, &gg->fault); 982 KASSERT(error == 0 || error == EFAULT, 983 ("%s: vm_gla2gpa unknown error %d", __func__, error)); 984 break; 985 case VM_ACTIVATE_CPU: 986 error = vm_activate_cpu(vcpu); 987 break; 988 case VM_GET_CPUS: 989 error = 0; 990 vm_cpuset = (struct vm_cpuset *)data; 991 size = vm_cpuset->cpusetsize; 992 if (size < 1 || size > CPU_MAXSIZE / NBBY) { 993 error = ERANGE; 994 break; 995 } 996 cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP, 997 M_WAITOK | M_ZERO); 998 if (vm_cpuset->which == VM_ACTIVE_CPUS) 999 *cpuset = vm_active_cpus(sc->vm); 1000 else if (vm_cpuset->which == VM_SUSPENDED_CPUS) 1001 *cpuset = vm_suspended_cpus(sc->vm); 1002 else if (vm_cpuset->which == VM_DEBUG_CPUS) 1003 *cpuset = vm_debug_cpus(sc->vm); 1004 else 1005 error = EINVAL; 1006 if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY)) 1007 error = ERANGE; 1008 if (error == 0) 1009 error = copyout(cpuset, vm_cpuset->cpus, size); 1010 free(cpuset, M_TEMP); 1011 break; 1012 case VM_SUSPEND_CPU: 1013 error = vm_suspend_cpu(sc->vm, vcpu); 1014 break; 1015 case VM_RESUME_CPU: 1016 error = vm_resume_cpu(sc->vm, vcpu); 1017 break; 1018 case VM_SET_INTINFO: 1019 vmii = (struct vm_intinfo *)data; 1020 error = vm_exit_intinfo(vcpu, vmii->info1); 1021 break; 1022 case VM_GET_INTINFO: 1023 vmii = (struct vm_intinfo *)data; 1024 error = vm_get_intinfo(vcpu, &vmii->info1, &vmii->info2); 1025 break; 1026 case VM_RTC_WRITE: 1027 rtcdata = (struct vm_rtc_data *)data; 1028 error = vrtc_nvram_write(sc->vm, rtcdata->offset, 1029 rtcdata->value); 1030 break; 1031 case VM_RTC_READ: 1032 rtcdata = (struct vm_rtc_data *)data; 1033 error = vrtc_nvram_read(sc->vm, rtcdata->offset, 1034 &rtcdata->value); 1035 break; 1036 case VM_RTC_SETTIME: 1037 rtctime = (struct vm_rtc_time *)data; 1038 error = vrtc_set_time(sc->vm, rtctime->secs); 1039 break; 1040 case VM_RTC_GETTIME: 1041 error = 0; 1042 rtctime = (struct vm_rtc_time *)data; 1043 rtctime->secs = vrtc_get_time(sc->vm); 1044 break; 1045 case VM_RESTART_INSTRUCTION: 1046 error = vm_restart_instruction(vcpu); 1047 break; 1048 case VM_SET_TOPOLOGY: 1049 topology = (struct vm_cpu_topology *)data; 1050 error = vm_set_topology(sc->vm, topology->sockets, 1051 topology->cores, topology->threads, topology->maxcpus); 1052 break; 1053 case VM_GET_TOPOLOGY: 1054 topology = (struct vm_cpu_topology *)data; 1055 vm_get_topology(sc->vm, &topology->sockets, &topology->cores, 1056 &topology->threads, &topology->maxcpus); 1057 error = 0; 1058 break; 1059 #ifdef BHYVE_SNAPSHOT 1060 case VM_SNAPSHOT_REQ: 1061 snapshot_meta = (struct vm_snapshot_meta *)data; 1062 error = vm_snapshot_req(sc->vm, snapshot_meta); 1063 break; 1064 #ifdef COMPAT_FREEBSD13 1065 case VM_SNAPSHOT_REQ_OLD: 1066 /* 1067 * The old structure just has an additional pointer at 1068 * the start that is ignored. 1069 */ 1070 snapshot_old = (struct vm_snapshot_meta_old *)data; 1071 snapshot_meta = 1072 (struct vm_snapshot_meta *)&snapshot_old->dev_data; 1073 error = vm_snapshot_req(sc->vm, snapshot_meta); 1074 break; 1075 #endif 1076 case VM_RESTORE_TIME: 1077 error = vm_restore_time(sc->vm); 1078 break; 1079 #endif 1080 default: 1081 error = ENOTTY; 1082 break; 1083 } 1084 1085 done: 1086 if (vcpus_locked == SINGLE) 1087 vcpu_unlock_one(sc, vcpuid, vcpu); 1088 else if (vcpus_locked == ALL) 1089 vcpu_unlock_all(sc); 1090 if (memsegs_locked) 1091 vm_unlock_memsegs(sc->vm); 1092 1093 /* 1094 * Make sure that no handler returns a kernel-internal 1095 * error value to userspace. 1096 */ 1097 KASSERT(error == ERESTART || error >= 0, 1098 ("vmmdev_ioctl: invalid error return %d", error)); 1099 return (error); 1100 } 1101 1102 static int 1103 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, 1104 struct vm_object **objp, int nprot) 1105 { 1106 struct vmmdev_softc *sc; 1107 vm_paddr_t gpa; 1108 size_t len; 1109 vm_ooffset_t segoff, first, last; 1110 int error, found, segid; 1111 bool sysmem; 1112 1113 error = vmm_priv_check(curthread->td_ucred); 1114 if (error) 1115 return (error); 1116 1117 first = *offset; 1118 last = first + mapsize; 1119 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 1120 return (EINVAL); 1121 1122 sc = vmmdev_lookup2(cdev); 1123 if (sc == NULL) { 1124 /* virtual machine is in the process of being created */ 1125 return (EINVAL); 1126 } 1127 1128 /* 1129 * Get a read lock on the guest memory map. 1130 */ 1131 vm_slock_memsegs(sc->vm); 1132 1133 gpa = 0; 1134 found = 0; 1135 while (!found) { 1136 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len, 1137 NULL, NULL); 1138 if (error) 1139 break; 1140 1141 if (first >= gpa && last <= gpa + len) 1142 found = 1; 1143 else 1144 gpa += len; 1145 } 1146 1147 if (found) { 1148 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp); 1149 KASSERT(error == 0 && *objp != NULL, 1150 ("%s: invalid memory segment %d", __func__, segid)); 1151 if (sysmem) { 1152 vm_object_reference(*objp); 1153 *offset = segoff + (first - gpa); 1154 } else { 1155 error = EINVAL; 1156 } 1157 } 1158 vm_unlock_memsegs(sc->vm); 1159 return (error); 1160 } 1161 1162 static void 1163 vmmdev_destroy(void *arg) 1164 { 1165 struct vmmdev_softc *sc = arg; 1166 struct devmem_softc *dsc; 1167 int error __diagused; 1168 1169 vm_disable_vcpu_creation(sc->vm); 1170 error = vcpu_lock_all(sc); 1171 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); 1172 vm_unlock_vcpus(sc->vm); 1173 1174 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { 1175 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__)); 1176 SLIST_REMOVE_HEAD(&sc->devmem, link); 1177 free(dsc->name, M_VMMDEV); 1178 free(dsc, M_VMMDEV); 1179 } 1180 1181 if (sc->cdev != NULL) 1182 destroy_dev(sc->cdev); 1183 1184 if (sc->vm != NULL) 1185 vm_destroy(sc->vm); 1186 1187 if (sc->ucred != NULL) 1188 crfree(sc->ucred); 1189 1190 if ((sc->flags & VSC_LINKED) != 0) { 1191 mtx_lock(&vmmdev_mtx); 1192 SLIST_REMOVE(&head, sc, vmmdev_softc, link); 1193 mtx_unlock(&vmmdev_mtx); 1194 } 1195 1196 free(sc, M_VMMDEV); 1197 } 1198 1199 static int 1200 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) 1201 { 1202 struct devmem_softc *dsc; 1203 struct vmmdev_softc *sc; 1204 struct cdev *cdev; 1205 char *buf; 1206 int error, buflen; 1207 1208 error = vmm_priv_check(req->td->td_ucred); 1209 if (error) 1210 return (error); 1211 1212 buflen = VM_MAX_NAMELEN + 1; 1213 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 1214 strlcpy(buf, "beavis", buflen); 1215 error = sysctl_handle_string(oidp, buf, buflen, req); 1216 if (error != 0 || req->newptr == NULL) 1217 goto out; 1218 1219 mtx_lock(&vmmdev_mtx); 1220 sc = vmmdev_lookup(buf); 1221 if (sc == NULL || sc->cdev == NULL) { 1222 mtx_unlock(&vmmdev_mtx); 1223 error = EINVAL; 1224 goto out; 1225 } 1226 1227 /* 1228 * Setting 'sc->cdev' to NULL is used to indicate that the VM 1229 * is scheduled for destruction. 1230 */ 1231 cdev = sc->cdev; 1232 sc->cdev = NULL; 1233 mtx_unlock(&vmmdev_mtx); 1234 1235 /* 1236 * Destroy all cdevs: 1237 * 1238 * - any new operations on the 'cdev' will return an error (ENXIO). 1239 * 1240 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' 1241 */ 1242 SLIST_FOREACH(dsc, &sc->devmem, link) { 1243 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); 1244 destroy_dev(dsc->cdev); 1245 devmem_destroy(dsc); 1246 } 1247 destroy_dev(cdev); 1248 vmmdev_destroy(sc); 1249 error = 0; 1250 1251 out: 1252 free(buf, M_VMMDEV); 1253 return (error); 1254 } 1255 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, 1256 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 1257 NULL, 0, sysctl_vmm_destroy, "A", 1258 NULL); 1259 1260 static struct cdevsw vmmdevsw = { 1261 .d_name = "vmmdev", 1262 .d_version = D_VERSION, 1263 .d_ioctl = vmmdev_ioctl, 1264 .d_mmap_single = vmmdev_mmap_single, 1265 .d_read = vmmdev_rw, 1266 .d_write = vmmdev_rw, 1267 }; 1268 1269 static int 1270 sysctl_vmm_create(SYSCTL_HANDLER_ARGS) 1271 { 1272 struct vm *vm; 1273 struct cdev *cdev; 1274 struct vmmdev_softc *sc, *sc2; 1275 char *buf; 1276 int error, buflen; 1277 1278 error = vmm_priv_check(req->td->td_ucred); 1279 if (error) 1280 return (error); 1281 1282 buflen = VM_MAX_NAMELEN + 1; 1283 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 1284 strlcpy(buf, "beavis", buflen); 1285 error = sysctl_handle_string(oidp, buf, buflen, req); 1286 if (error != 0 || req->newptr == NULL) 1287 goto out; 1288 1289 mtx_lock(&vmmdev_mtx); 1290 sc = vmmdev_lookup(buf); 1291 mtx_unlock(&vmmdev_mtx); 1292 if (sc != NULL) { 1293 error = EEXIST; 1294 goto out; 1295 } 1296 1297 error = vm_create(buf, &vm); 1298 if (error != 0) 1299 goto out; 1300 1301 sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO); 1302 sc->ucred = crhold(curthread->td_ucred); 1303 sc->vm = vm; 1304 SLIST_INIT(&sc->devmem); 1305 1306 /* 1307 * Lookup the name again just in case somebody sneaked in when we 1308 * dropped the lock. 1309 */ 1310 mtx_lock(&vmmdev_mtx); 1311 sc2 = vmmdev_lookup(buf); 1312 if (sc2 == NULL) { 1313 SLIST_INSERT_HEAD(&head, sc, link); 1314 sc->flags |= VSC_LINKED; 1315 } 1316 mtx_unlock(&vmmdev_mtx); 1317 1318 if (sc2 != NULL) { 1319 vmmdev_destroy(sc); 1320 error = EEXIST; 1321 goto out; 1322 } 1323 1324 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, sc->ucred, 1325 UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf); 1326 if (error != 0) { 1327 vmmdev_destroy(sc); 1328 goto out; 1329 } 1330 1331 mtx_lock(&vmmdev_mtx); 1332 sc->cdev = cdev; 1333 sc->cdev->si_drv1 = sc; 1334 mtx_unlock(&vmmdev_mtx); 1335 1336 out: 1337 free(buf, M_VMMDEV); 1338 return (error); 1339 } 1340 SYSCTL_PROC(_hw_vmm, OID_AUTO, create, 1341 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 1342 NULL, 0, sysctl_vmm_create, "A", 1343 NULL); 1344 1345 void 1346 vmmdev_init(void) 1347 { 1348 pr_allow_flag = prison_add_allow(NULL, "vmm", NULL, 1349 "Allow use of vmm in a jail."); 1350 } 1351 1352 int 1353 vmmdev_cleanup(void) 1354 { 1355 int error; 1356 1357 if (SLIST_EMPTY(&head)) 1358 error = 0; 1359 else 1360 error = EBUSY; 1361 1362 return (error); 1363 } 1364 1365 static int 1366 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, 1367 struct vm_object **objp, int nprot) 1368 { 1369 struct devmem_softc *dsc; 1370 vm_ooffset_t first, last; 1371 size_t seglen; 1372 int error; 1373 bool sysmem; 1374 1375 dsc = cdev->si_drv1; 1376 if (dsc == NULL) { 1377 /* 'cdev' has been created but is not ready for use */ 1378 return (ENXIO); 1379 } 1380 1381 first = *offset; 1382 last = *offset + len; 1383 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 1384 return (EINVAL); 1385 1386 vm_slock_memsegs(dsc->sc->vm); 1387 1388 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); 1389 KASSERT(error == 0 && !sysmem && *objp != NULL, 1390 ("%s: invalid devmem segment %d", __func__, dsc->segid)); 1391 1392 if (seglen >= last) 1393 vm_object_reference(*objp); 1394 else 1395 error = EINVAL; 1396 1397 vm_unlock_memsegs(dsc->sc->vm); 1398 return (error); 1399 } 1400 1401 static struct cdevsw devmemsw = { 1402 .d_name = "devmem", 1403 .d_version = D_VERSION, 1404 .d_mmap_single = devmem_mmap_single, 1405 }; 1406 1407 static int 1408 devmem_create_cdev(const char *vmname, int segid, char *devname) 1409 { 1410 struct devmem_softc *dsc; 1411 struct vmmdev_softc *sc; 1412 struct cdev *cdev; 1413 int error; 1414 1415 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL, 1416 UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname); 1417 if (error) 1418 return (error); 1419 1420 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO); 1421 1422 mtx_lock(&vmmdev_mtx); 1423 sc = vmmdev_lookup(vmname); 1424 KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname)); 1425 if (sc->cdev == NULL) { 1426 /* virtual machine is being created or destroyed */ 1427 mtx_unlock(&vmmdev_mtx); 1428 free(dsc, M_VMMDEV); 1429 destroy_dev_sched_cb(cdev, NULL, 0); 1430 return (ENODEV); 1431 } 1432 1433 dsc->segid = segid; 1434 dsc->name = devname; 1435 dsc->cdev = cdev; 1436 dsc->sc = sc; 1437 SLIST_INSERT_HEAD(&sc->devmem, dsc, link); 1438 mtx_unlock(&vmmdev_mtx); 1439 1440 /* The 'cdev' is ready for use after 'si_drv1' is initialized */ 1441 cdev->si_drv1 = dsc; 1442 return (0); 1443 } 1444 1445 static void 1446 devmem_destroy(void *arg) 1447 { 1448 struct devmem_softc *dsc = arg; 1449 1450 KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__)); 1451 dsc->cdev = NULL; 1452 dsc->sc = NULL; 1453 } 1454