1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/param.h> 31 #include <sys/kernel.h> 32 #include <sys/jail.h> 33 #include <sys/queue.h> 34 #include <sys/lock.h> 35 #include <sys/mutex.h> 36 #include <sys/malloc.h> 37 #include <sys/conf.h> 38 #include <sys/sysctl.h> 39 #include <sys/libkern.h> 40 #include <sys/ioccom.h> 41 #include <sys/mman.h> 42 #include <sys/uio.h> 43 #include <sys/proc.h> 44 45 #include <vm/vm.h> 46 #include <vm/pmap.h> 47 #include <vm/vm_map.h> 48 #include <vm/vm_object.h> 49 50 #include <machine/machdep.h> 51 #include <machine/vmparam.h> 52 #include <machine/vmm.h> 53 #include <machine/vmm_dev.h> 54 55 #include "vmm_stat.h" 56 57 #include "io/vgic.h" 58 59 struct devmem_softc { 60 int segid; 61 char *name; 62 struct cdev *cdev; 63 struct vmmdev_softc *sc; 64 SLIST_ENTRY(devmem_softc) link; 65 }; 66 67 struct vmmdev_softc { 68 struct vm *vm; /* vm instance cookie */ 69 struct cdev *cdev; 70 struct ucred *ucred; 71 SLIST_ENTRY(vmmdev_softc) link; 72 SLIST_HEAD(, devmem_softc) devmem; 73 int flags; 74 }; 75 #define VSC_LINKED 0x01 76 77 static SLIST_HEAD(, vmmdev_softc) head; 78 79 static unsigned pr_allow_flag; 80 static struct mtx vmmdev_mtx; 81 MTX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex", MTX_DEF); 82 83 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); 84 85 SYSCTL_DECL(_hw_vmm); 86 87 static int vmm_priv_check(struct ucred *ucred); 88 static int devmem_create_cdev(const char *vmname, int id, char *devmem); 89 static void devmem_destroy(void *arg); 90 91 static int 92 vmm_priv_check(struct ucred *ucred) 93 { 94 95 if (jailed(ucred) && 96 !(ucred->cr_prison->pr_allow & pr_allow_flag)) 97 return (EPERM); 98 99 return (0); 100 } 101 102 static int 103 vcpu_lock_one(struct vcpu *vcpu) 104 { 105 int error; 106 107 error = vcpu_set_state(vcpu, VCPU_FROZEN, true); 108 return (error); 109 } 110 111 static void 112 vcpu_unlock_one(struct vcpu *vcpu) 113 { 114 enum vcpu_state state; 115 116 state = vcpu_get_state(vcpu, NULL); 117 if (state != VCPU_FROZEN) { 118 panic("vcpu %s(%d) has invalid state %d", 119 vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state); 120 } 121 122 vcpu_set_state(vcpu, VCPU_IDLE, false); 123 } 124 125 static int 126 vcpu_lock_all(struct vmmdev_softc *sc) 127 { 128 struct vcpu *vcpu; 129 int error; 130 uint16_t i, j, maxcpus; 131 132 error = 0; 133 vm_slock_vcpus(sc->vm); 134 maxcpus = vm_get_maxcpus(sc->vm); 135 for (i = 0; i < maxcpus; i++) { 136 vcpu = vm_vcpu(sc->vm, i); 137 if (vcpu == NULL) 138 continue; 139 error = vcpu_lock_one(vcpu); 140 if (error) 141 break; 142 } 143 144 if (error) { 145 for (j = 0; j < i; j++) { 146 vcpu = vm_vcpu(sc->vm, j); 147 if (vcpu == NULL) 148 continue; 149 vcpu_unlock_one(vcpu); 150 } 151 vm_unlock_vcpus(sc->vm); 152 } 153 154 return (error); 155 } 156 157 static void 158 vcpu_unlock_all(struct vmmdev_softc *sc) 159 { 160 struct vcpu *vcpu; 161 uint16_t i, maxcpus; 162 163 maxcpus = vm_get_maxcpus(sc->vm); 164 for (i = 0; i < maxcpus; i++) { 165 vcpu = vm_vcpu(sc->vm, i); 166 if (vcpu == NULL) 167 continue; 168 vcpu_unlock_one(vcpu); 169 } 170 vm_unlock_vcpus(sc->vm); 171 } 172 173 static struct vmmdev_softc * 174 vmmdev_lookup(const char *name) 175 { 176 struct vmmdev_softc *sc; 177 178 #ifdef notyet /* XXX kernel is not compiled with invariants */ 179 mtx_assert(&vmmdev_mtx, MA_OWNED); 180 #endif 181 182 SLIST_FOREACH(sc, &head, link) { 183 if (strcmp(name, vm_name(sc->vm)) == 0) 184 break; 185 } 186 187 if (sc == NULL) 188 return (NULL); 189 190 if (cr_cansee(curthread->td_ucred, sc->ucred)) 191 return (NULL); 192 193 return (sc); 194 } 195 196 static struct vmmdev_softc * 197 vmmdev_lookup2(struct cdev *cdev) 198 { 199 200 return (cdev->si_drv1); 201 } 202 203 static int 204 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) 205 { 206 int error, off, c, prot; 207 vm_paddr_t gpa, maxaddr; 208 void *hpa, *cookie; 209 struct vmmdev_softc *sc; 210 211 error = vmm_priv_check(curthread->td_ucred); 212 if (error) 213 return (error); 214 215 sc = vmmdev_lookup2(cdev); 216 if (sc == NULL) 217 return (ENXIO); 218 219 /* 220 * Get a read lock on the guest memory map. 221 */ 222 vm_slock_memsegs(sc->vm); 223 224 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); 225 maxaddr = vmm_sysmem_maxaddr(sc->vm); 226 while (uio->uio_resid > 0 && error == 0) { 227 gpa = uio->uio_offset; 228 off = gpa & PAGE_MASK; 229 c = min(uio->uio_resid, PAGE_SIZE - off); 230 231 /* 232 * The VM has a hole in its physical memory map. If we want to 233 * use 'dd' to inspect memory beyond the hole we need to 234 * provide bogus data for memory that lies in the hole. 235 * 236 * Since this device does not support lseek(2), dd(1) will 237 * read(2) blocks of data to simulate the lseek(2). 238 */ 239 hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie); 240 if (hpa == NULL) { 241 if (uio->uio_rw == UIO_READ && gpa < maxaddr) 242 error = uiomove(__DECONST(void *, zero_region), 243 c, uio); 244 else 245 error = EFAULT; 246 } else { 247 error = uiomove(hpa, c, uio); 248 vm_gpa_release(cookie); 249 } 250 } 251 vm_unlock_memsegs(sc->vm); 252 return (error); 253 } 254 255 static int 256 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg) 257 { 258 struct devmem_softc *dsc; 259 int error; 260 bool sysmem; 261 262 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL); 263 if (error || mseg->len == 0) 264 return (error); 265 266 if (!sysmem) { 267 SLIST_FOREACH(dsc, &sc->devmem, link) { 268 if (dsc->segid == mseg->segid) 269 break; 270 } 271 KASSERT(dsc != NULL, ("%s: devmem segment %d not found", 272 __func__, mseg->segid)); 273 error = copystr(dsc->name, mseg->name, sizeof(mseg->name), 274 NULL); 275 } else { 276 bzero(mseg->name, sizeof(mseg->name)); 277 } 278 279 return (error); 280 } 281 282 static int 283 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg) 284 { 285 char *name; 286 int error; 287 bool sysmem; 288 289 error = 0; 290 name = NULL; 291 sysmem = true; 292 293 /* 294 * The allocation is lengthened by 1 to hold a terminating NUL. It'll 295 * by stripped off when devfs processes the full string. 296 */ 297 if (VM_MEMSEG_NAME(mseg)) { 298 sysmem = false; 299 name = malloc(sizeof(mseg->name), M_VMMDEV, M_WAITOK); 300 error = copystr(mseg->name, name, sizeof(mseg->name), NULL); 301 if (error) 302 goto done; 303 } 304 305 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem); 306 if (error) 307 goto done; 308 309 if (VM_MEMSEG_NAME(mseg)) { 310 error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name); 311 if (error) 312 vm_free_memseg(sc->vm, mseg->segid); 313 else 314 name = NULL; /* freed when 'cdev' is destroyed */ 315 } 316 done: 317 free(name, M_VMMDEV); 318 return (error); 319 } 320 321 static int 322 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 323 uint64_t *regval) 324 { 325 int error, i; 326 327 error = 0; 328 for (i = 0; i < count; i++) { 329 error = vm_get_register(vcpu, regnum[i], ®val[i]); 330 if (error) 331 break; 332 } 333 return (error); 334 } 335 336 static int 337 vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 338 uint64_t *regval) 339 { 340 int error, i; 341 342 error = 0; 343 for (i = 0; i < count; i++) { 344 error = vm_set_register(vcpu, regnum[i], regval[i]); 345 if (error) 346 break; 347 } 348 return (error); 349 } 350 351 static int 352 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 353 struct thread *td) 354 { 355 int error, vcpuid, size; 356 cpuset_t *cpuset; 357 struct vmmdev_softc *sc; 358 struct vcpu *vcpu; 359 struct vm_register *vmreg; 360 struct vm_register_set *vmregset; 361 struct vm_run *vmrun; 362 struct vm_vgic_version *vgv; 363 struct vm_vgic_descr *vgic; 364 struct vm_cpuset *vm_cpuset; 365 struct vm_irq *vi; 366 struct vm_capability *vmcap; 367 struct vm_stats *vmstats; 368 struct vm_stat_desc *statdesc; 369 struct vm_suspend *vmsuspend; 370 struct vm_exception *vmexc; 371 struct vm_gla2gpa *gg; 372 struct vm_memmap *mm; 373 struct vm_munmap *mu; 374 struct vm_msi *vmsi; 375 struct vm_cpu_topology *topology; 376 uint64_t *regvals; 377 int *regnums; 378 enum { NONE, SINGLE, ALL } vcpus_locked; 379 bool memsegs_locked; 380 381 error = vmm_priv_check(curthread->td_ucred); 382 if (error) 383 return (error); 384 385 sc = vmmdev_lookup2(cdev); 386 if (sc == NULL) 387 return (ENXIO); 388 389 error = 0; 390 vcpuid = -1; 391 vcpu = NULL; 392 vcpus_locked = NONE; 393 memsegs_locked = false; 394 395 /* 396 * Some VMM ioctls can operate only on vcpus that are not running. 397 */ 398 switch (cmd) { 399 case VM_RUN: 400 case VM_GET_REGISTER: 401 case VM_SET_REGISTER: 402 case VM_GET_REGISTER_SET: 403 case VM_SET_REGISTER_SET: 404 case VM_INJECT_EXCEPTION: 405 case VM_GET_CAPABILITY: 406 case VM_SET_CAPABILITY: 407 case VM_GLA2GPA_NOFAULT: 408 case VM_ACTIVATE_CPU: 409 /* 410 * ioctls that can operate only on vcpus that are not running. 411 */ 412 vcpuid = *(int *)data; 413 vcpu = vm_alloc_vcpu(sc->vm, vcpuid); 414 if (vcpu == NULL) { 415 error = EINVAL; 416 goto done; 417 } 418 error = vcpu_lock_one(vcpu); 419 if (error) 420 goto done; 421 vcpus_locked = SINGLE; 422 break; 423 424 case VM_ALLOC_MEMSEG: 425 case VM_MMAP_MEMSEG: 426 case VM_MUNMAP_MEMSEG: 427 case VM_REINIT: 428 case VM_ATTACH_VGIC: 429 /* 430 * ioctls that modify the memory map must lock memory 431 * segments exclusively. 432 */ 433 vm_xlock_memsegs(sc->vm); 434 memsegs_locked = true; 435 436 /* 437 * ioctls that operate on the entire virtual machine must 438 * prevent all vcpus from running. 439 */ 440 error = vcpu_lock_all(sc); 441 if (error) 442 goto done; 443 vcpus_locked = ALL; 444 break; 445 case VM_GET_MEMSEG: 446 case VM_MMAP_GETNEXT: 447 /* 448 * Lock the memory map while it is being inspected. 449 */ 450 vm_slock_memsegs(sc->vm); 451 memsegs_locked = true; 452 break; 453 454 case VM_STATS: 455 /* 456 * These do not need the vCPU locked but do operate on 457 * a specific vCPU. 458 */ 459 vcpuid = *(int *)data; 460 vcpu = vm_alloc_vcpu(sc->vm, vcpuid); 461 if (vcpu == NULL) { 462 error = EINVAL; 463 goto done; 464 } 465 break; 466 467 case VM_SUSPEND_CPU: 468 case VM_RESUME_CPU: 469 /* 470 * These can either operate on all CPUs via a vcpuid of 471 * -1 or on a specific vCPU. 472 */ 473 vcpuid = *(int *)data; 474 if (vcpuid == -1) 475 break; 476 vcpu = vm_alloc_vcpu(sc->vm, vcpuid); 477 if (vcpu == NULL) { 478 error = EINVAL; 479 goto done; 480 } 481 break; 482 483 case VM_ASSERT_IRQ: 484 vi = (struct vm_irq *)data; 485 error = vm_assert_irq(sc->vm, vi->irq); 486 break; 487 case VM_DEASSERT_IRQ: 488 vi = (struct vm_irq *)data; 489 error = vm_deassert_irq(sc->vm, vi->irq); 490 break; 491 default: 492 break; 493 } 494 495 switch (cmd) { 496 case VM_RUN: { 497 struct vm_exit *vme; 498 499 vmrun = (struct vm_run *)data; 500 vme = vm_exitinfo(vcpu); 501 502 error = vm_run(vcpu); 503 if (error != 0) 504 break; 505 506 error = copyout(vme, vmrun->vm_exit, sizeof(*vme)); 507 if (error != 0) 508 break; 509 break; 510 } 511 case VM_SUSPEND: 512 vmsuspend = (struct vm_suspend *)data; 513 error = vm_suspend(sc->vm, vmsuspend->how); 514 break; 515 case VM_REINIT: 516 error = vm_reinit(sc->vm); 517 break; 518 case VM_STAT_DESC: { 519 statdesc = (struct vm_stat_desc *)data; 520 error = vmm_stat_desc_copy(statdesc->index, 521 statdesc->desc, sizeof(statdesc->desc)); 522 break; 523 } 524 case VM_STATS: { 525 CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_ELEMS); 526 vmstats = (struct vm_stats *)data; 527 getmicrotime(&vmstats->tv); 528 error = vmm_stat_copy(vcpu, vmstats->index, 529 nitems(vmstats->statbuf), 530 &vmstats->num_entries, vmstats->statbuf); 531 break; 532 } 533 case VM_MMAP_GETNEXT: 534 mm = (struct vm_memmap *)data; 535 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid, 536 &mm->segoff, &mm->len, &mm->prot, &mm->flags); 537 break; 538 case VM_MMAP_MEMSEG: 539 mm = (struct vm_memmap *)data; 540 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff, 541 mm->len, mm->prot, mm->flags); 542 break; 543 case VM_MUNMAP_MEMSEG: 544 mu = (struct vm_munmap *)data; 545 error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len); 546 break; 547 case VM_ALLOC_MEMSEG: 548 error = alloc_memseg(sc, (struct vm_memseg *)data); 549 break; 550 case VM_GET_MEMSEG: 551 error = get_memseg(sc, (struct vm_memseg *)data); 552 break; 553 case VM_GET_REGISTER: 554 vmreg = (struct vm_register *)data; 555 error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval); 556 break; 557 case VM_SET_REGISTER: 558 vmreg = (struct vm_register *)data; 559 error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval); 560 break; 561 case VM_GET_REGISTER_SET: 562 vmregset = (struct vm_register_set *)data; 563 if (vmregset->count > VM_REG_LAST) { 564 error = EINVAL; 565 break; 566 } 567 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 568 M_WAITOK); 569 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 570 M_WAITOK); 571 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 572 vmregset->count); 573 if (error == 0) 574 error = vm_get_register_set(vcpu, vmregset->count, 575 regnums, regvals); 576 if (error == 0) 577 error = copyout(regvals, vmregset->regvals, 578 sizeof(regvals[0]) * vmregset->count); 579 free(regvals, M_VMMDEV); 580 free(regnums, M_VMMDEV); 581 break; 582 case VM_SET_REGISTER_SET: 583 vmregset = (struct vm_register_set *)data; 584 if (vmregset->count > VM_REG_LAST) { 585 error = EINVAL; 586 break; 587 } 588 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 589 M_WAITOK); 590 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 591 M_WAITOK); 592 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 593 vmregset->count); 594 if (error == 0) 595 error = copyin(vmregset->regvals, regvals, 596 sizeof(regvals[0]) * vmregset->count); 597 if (error == 0) 598 error = vm_set_register_set(vcpu, vmregset->count, 599 regnums, regvals); 600 free(regvals, M_VMMDEV); 601 free(regnums, M_VMMDEV); 602 break; 603 case VM_GET_CAPABILITY: 604 vmcap = (struct vm_capability *)data; 605 error = vm_get_capability(vcpu, 606 vmcap->captype, 607 &vmcap->capval); 608 break; 609 case VM_SET_CAPABILITY: 610 vmcap = (struct vm_capability *)data; 611 error = vm_set_capability(vcpu, 612 vmcap->captype, 613 vmcap->capval); 614 break; 615 case VM_INJECT_EXCEPTION: 616 vmexc = (struct vm_exception *)data; 617 error = vm_inject_exception(vcpu, vmexc->esr, vmexc->far); 618 break; 619 case VM_GLA2GPA_NOFAULT: 620 gg = (struct vm_gla2gpa *)data; 621 error = vm_gla2gpa_nofault(vcpu, &gg->paging, gg->gla, 622 gg->prot, &gg->gpa, &gg->fault); 623 KASSERT(error == 0 || error == EFAULT, 624 ("%s: vm_gla2gpa unknown error %d", __func__, error)); 625 break; 626 case VM_ACTIVATE_CPU: 627 error = vm_activate_cpu(vcpu); 628 break; 629 case VM_GET_CPUS: 630 error = 0; 631 vm_cpuset = (struct vm_cpuset *)data; 632 size = vm_cpuset->cpusetsize; 633 if (size < sizeof(cpuset_t) || size > CPU_MAXSIZE / NBBY) { 634 error = ERANGE; 635 break; 636 } 637 cpuset = malloc(size, M_TEMP, M_WAITOK | M_ZERO); 638 if (vm_cpuset->which == VM_ACTIVE_CPUS) 639 *cpuset = vm_active_cpus(sc->vm); 640 else if (vm_cpuset->which == VM_SUSPENDED_CPUS) 641 *cpuset = vm_suspended_cpus(sc->vm); 642 else if (vm_cpuset->which == VM_DEBUG_CPUS) 643 *cpuset = vm_debug_cpus(sc->vm); 644 else 645 error = EINVAL; 646 if (error == 0) 647 error = copyout(cpuset, vm_cpuset->cpus, size); 648 free(cpuset, M_TEMP); 649 break; 650 case VM_SUSPEND_CPU: 651 error = vm_suspend_cpu(sc->vm, vcpu); 652 break; 653 case VM_RESUME_CPU: 654 error = vm_resume_cpu(sc->vm, vcpu); 655 break; 656 case VM_GET_VGIC_VERSION: 657 vgv = (struct vm_vgic_version *)data; 658 /* TODO: Query the vgic driver for this */ 659 vgv->version = 3; 660 vgv->flags = 0; 661 error = 0; 662 break; 663 case VM_ATTACH_VGIC: 664 vgic = (struct vm_vgic_descr *)data; 665 error = vm_attach_vgic(sc->vm, vgic); 666 break; 667 case VM_RAISE_MSI: 668 vmsi = (struct vm_msi *)data; 669 error = vm_raise_msi(sc->vm, vmsi->msg, vmsi->addr, vmsi->bus, 670 vmsi->slot, vmsi->func); 671 break; 672 case VM_SET_TOPOLOGY: 673 topology = (struct vm_cpu_topology *)data; 674 error = vm_set_topology(sc->vm, topology->sockets, 675 topology->cores, topology->threads, topology->maxcpus); 676 break; 677 case VM_GET_TOPOLOGY: 678 topology = (struct vm_cpu_topology *)data; 679 vm_get_topology(sc->vm, &topology->sockets, &topology->cores, 680 &topology->threads, &topology->maxcpus); 681 error = 0; 682 break; 683 default: 684 error = ENOTTY; 685 break; 686 } 687 688 done: 689 if (vcpus_locked == SINGLE) 690 vcpu_unlock_one(vcpu); 691 else if (vcpus_locked == ALL) 692 vcpu_unlock_all(sc); 693 if (memsegs_locked) 694 vm_unlock_memsegs(sc->vm); 695 696 /* 697 * Make sure that no handler returns a kernel-internal 698 * error value to userspace. 699 */ 700 KASSERT(error == ERESTART || error >= 0, 701 ("vmmdev_ioctl: invalid error return %d", error)); 702 return (error); 703 } 704 705 static int 706 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, 707 struct vm_object **objp, int nprot) 708 { 709 struct vmmdev_softc *sc; 710 vm_paddr_t gpa; 711 size_t len; 712 vm_ooffset_t segoff, first, last; 713 int error, found, segid; 714 bool sysmem; 715 716 error = vmm_priv_check(curthread->td_ucred); 717 if (error) 718 return (error); 719 720 first = *offset; 721 last = first + mapsize; 722 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 723 return (EINVAL); 724 725 sc = vmmdev_lookup2(cdev); 726 if (sc == NULL) { 727 /* virtual machine is in the process of being created */ 728 return (EINVAL); 729 } 730 731 /* 732 * Get a read lock on the guest memory map. 733 */ 734 vm_slock_memsegs(sc->vm); 735 736 gpa = 0; 737 found = 0; 738 while (!found) { 739 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len, 740 NULL, NULL); 741 if (error) 742 break; 743 744 if (first >= gpa && last <= gpa + len) 745 found = 1; 746 else 747 gpa += len; 748 } 749 750 if (found) { 751 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp); 752 KASSERT(error == 0 && *objp != NULL, 753 ("%s: invalid memory segment %d", __func__, segid)); 754 if (sysmem) { 755 vm_object_reference(*objp); 756 *offset = segoff + (first - gpa); 757 } else { 758 error = EINVAL; 759 } 760 } 761 vm_unlock_memsegs(sc->vm); 762 return (error); 763 } 764 765 static void 766 vmmdev_destroy(void *arg) 767 { 768 struct vmmdev_softc *sc = arg; 769 struct devmem_softc *dsc; 770 int error __diagused; 771 772 error = vcpu_lock_all(sc); 773 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); 774 vm_unlock_vcpus(sc->vm); 775 776 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { 777 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__)); 778 SLIST_REMOVE_HEAD(&sc->devmem, link); 779 free(dsc->name, M_VMMDEV); 780 free(dsc, M_VMMDEV); 781 } 782 783 if (sc->cdev != NULL) 784 destroy_dev(sc->cdev); 785 786 if (sc->vm != NULL) 787 vm_destroy(sc->vm); 788 789 if (sc->ucred != NULL) 790 crfree(sc->ucred); 791 792 if ((sc->flags & VSC_LINKED) != 0) { 793 mtx_lock(&vmmdev_mtx); 794 SLIST_REMOVE(&head, sc, vmmdev_softc, link); 795 mtx_unlock(&vmmdev_mtx); 796 } 797 798 free(sc, M_VMMDEV); 799 } 800 801 static int 802 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) 803 { 804 struct devmem_softc *dsc; 805 struct vmmdev_softc *sc; 806 struct cdev *cdev; 807 char *buf; 808 int error, buflen; 809 810 error = vmm_priv_check(req->td->td_ucred); 811 if (error) 812 return (error); 813 814 buflen = VM_MAX_NAMELEN + 1; 815 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 816 strlcpy(buf, "beavis", buflen); 817 error = sysctl_handle_string(oidp, buf, buflen, req); 818 if (error != 0 || req->newptr == NULL) 819 goto out; 820 821 mtx_lock(&vmmdev_mtx); 822 sc = vmmdev_lookup(buf); 823 if (sc == NULL || sc->cdev == NULL) { 824 mtx_unlock(&vmmdev_mtx); 825 error = EINVAL; 826 goto out; 827 } 828 829 /* 830 * Setting 'sc->cdev' to NULL is used to indicate that the VM 831 * is scheduled for destruction. 832 */ 833 cdev = sc->cdev; 834 sc->cdev = NULL; 835 mtx_unlock(&vmmdev_mtx); 836 837 /* 838 * Destroy all cdevs: 839 * 840 * - any new operations on the 'cdev' will return an error (ENXIO). 841 * 842 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' 843 */ 844 SLIST_FOREACH(dsc, &sc->devmem, link) { 845 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); 846 destroy_dev(dsc->cdev); 847 devmem_destroy(dsc); 848 } 849 destroy_dev(cdev); 850 vmmdev_destroy(sc); 851 error = 0; 852 853 out: 854 free(buf, M_VMMDEV); 855 return (error); 856 } 857 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, 858 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 859 NULL, 0, sysctl_vmm_destroy, "A", 860 NULL); 861 862 static struct cdevsw vmmdevsw = { 863 .d_name = "vmmdev", 864 .d_version = D_VERSION, 865 .d_ioctl = vmmdev_ioctl, 866 .d_mmap_single = vmmdev_mmap_single, 867 .d_read = vmmdev_rw, 868 .d_write = vmmdev_rw, 869 }; 870 871 static int 872 sysctl_vmm_create(SYSCTL_HANDLER_ARGS) 873 { 874 struct vm *vm; 875 struct cdev *cdev; 876 struct vmmdev_softc *sc, *sc2; 877 char *buf; 878 int error, buflen; 879 880 error = vmm_priv_check(req->td->td_ucred); 881 if (error) 882 return (error); 883 884 buflen = VM_MAX_NAMELEN + 1; 885 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 886 strlcpy(buf, "beavis", buflen); 887 error = sysctl_handle_string(oidp, buf, buflen, req); 888 if (error != 0 || req->newptr == NULL) 889 goto out; 890 891 mtx_lock(&vmmdev_mtx); 892 sc = vmmdev_lookup(buf); 893 mtx_unlock(&vmmdev_mtx); 894 if (sc != NULL) { 895 error = EEXIST; 896 goto out; 897 } 898 899 error = vm_create(buf, &vm); 900 if (error != 0) 901 goto out; 902 903 sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO); 904 sc->ucred = crhold(curthread->td_ucred); 905 sc->vm = vm; 906 SLIST_INIT(&sc->devmem); 907 908 /* 909 * Lookup the name again just in case somebody sneaked in when we 910 * dropped the lock. 911 */ 912 mtx_lock(&vmmdev_mtx); 913 sc2 = vmmdev_lookup(buf); 914 if (sc2 == NULL) { 915 SLIST_INSERT_HEAD(&head, sc, link); 916 sc->flags |= VSC_LINKED; 917 } 918 mtx_unlock(&vmmdev_mtx); 919 920 if (sc2 != NULL) { 921 vmmdev_destroy(sc); 922 error = EEXIST; 923 goto out; 924 } 925 926 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, sc->ucred, 927 UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf); 928 if (error != 0) { 929 vmmdev_destroy(sc); 930 goto out; 931 } 932 933 mtx_lock(&vmmdev_mtx); 934 sc->cdev = cdev; 935 sc->cdev->si_drv1 = sc; 936 mtx_unlock(&vmmdev_mtx); 937 938 out: 939 free(buf, M_VMMDEV); 940 return (error); 941 } 942 SYSCTL_PROC(_hw_vmm, OID_AUTO, create, 943 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 944 NULL, 0, sysctl_vmm_create, "A", 945 NULL); 946 947 void 948 vmmdev_init(void) 949 { 950 pr_allow_flag = prison_add_allow(NULL, "vmm", NULL, 951 "Allow use of vmm in a jail."); 952 } 953 954 int 955 vmmdev_cleanup(void) 956 { 957 int error; 958 959 if (SLIST_EMPTY(&head)) 960 error = 0; 961 else 962 error = EBUSY; 963 964 return (error); 965 } 966 967 static int 968 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, 969 struct vm_object **objp, int nprot) 970 { 971 struct devmem_softc *dsc; 972 vm_ooffset_t first, last; 973 size_t seglen; 974 int error; 975 bool sysmem; 976 977 dsc = cdev->si_drv1; 978 if (dsc == NULL) { 979 /* 'cdev' has been created but is not ready for use */ 980 return (ENXIO); 981 } 982 983 first = *offset; 984 last = *offset + len; 985 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 986 return (EINVAL); 987 988 vm_slock_memsegs(dsc->sc->vm); 989 990 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); 991 KASSERT(error == 0 && !sysmem && *objp != NULL, 992 ("%s: invalid devmem segment %d", __func__, dsc->segid)); 993 994 if (seglen >= last) 995 vm_object_reference(*objp); 996 else 997 error = 0; 998 vm_unlock_memsegs(dsc->sc->vm); 999 return (error); 1000 } 1001 1002 static struct cdevsw devmemsw = { 1003 .d_name = "devmem", 1004 .d_version = D_VERSION, 1005 .d_mmap_single = devmem_mmap_single, 1006 }; 1007 1008 static int 1009 devmem_create_cdev(const char *vmname, int segid, char *devname) 1010 { 1011 struct devmem_softc *dsc; 1012 struct vmmdev_softc *sc; 1013 struct cdev *cdev; 1014 int error; 1015 1016 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL, 1017 UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname); 1018 if (error) 1019 return (error); 1020 1021 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO); 1022 1023 mtx_lock(&vmmdev_mtx); 1024 sc = vmmdev_lookup(vmname); 1025 KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname)); 1026 if (sc->cdev == NULL) { 1027 /* virtual machine is being created or destroyed */ 1028 mtx_unlock(&vmmdev_mtx); 1029 free(dsc, M_VMMDEV); 1030 destroy_dev_sched_cb(cdev, NULL, 0); 1031 return (ENODEV); 1032 } 1033 1034 dsc->segid = segid; 1035 dsc->name = devname; 1036 dsc->cdev = cdev; 1037 dsc->sc = sc; 1038 SLIST_INSERT_HEAD(&sc->devmem, dsc, link); 1039 mtx_unlock(&vmmdev_mtx); 1040 1041 /* The 'cdev' is ready for use after 'si_drv1' is initialized */ 1042 cdev->si_drv1 = dsc; 1043 return (0); 1044 } 1045 1046 static void 1047 devmem_destroy(void *arg) 1048 { 1049 struct devmem_softc *dsc = arg; 1050 1051 KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__)); 1052 dsc->cdev = NULL; 1053 dsc->sc = NULL; 1054 } 1055