1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/kernel.h> 36 #include <sys/jail.h> 37 #include <sys/queue.h> 38 #include <sys/lock.h> 39 #include <sys/mutex.h> 40 #include <sys/malloc.h> 41 #include <sys/conf.h> 42 #include <sys/sysctl.h> 43 #include <sys/libkern.h> 44 #include <sys/ioccom.h> 45 #include <sys/mman.h> 46 #include <sys/uio.h> 47 #include <sys/proc.h> 48 49 #include <vm/vm.h> 50 #include <vm/pmap.h> 51 #include <vm/vm_map.h> 52 #include <vm/vm_object.h> 53 54 #include <machine/vmparam.h> 55 #include <machine/vmm.h> 56 #include <machine/vmm_instruction_emul.h> 57 #include <machine/vmm_dev.h> 58 59 #include "vmm_lapic.h" 60 #include "vmm_stat.h" 61 #include "vmm_mem.h" 62 #include "io/ppt.h" 63 #include "io/vatpic.h" 64 #include "io/vioapic.h" 65 #include "io/vhpet.h" 66 #include "io/vrtc.h" 67 68 struct devmem_softc { 69 int segid; 70 char *name; 71 struct cdev *cdev; 72 struct vmmdev_softc *sc; 73 SLIST_ENTRY(devmem_softc) link; 74 }; 75 76 struct vmmdev_softc { 77 struct vm *vm; /* vm instance cookie */ 78 struct cdev *cdev; 79 SLIST_ENTRY(vmmdev_softc) link; 80 SLIST_HEAD(, devmem_softc) devmem; 81 int flags; 82 }; 83 #define VSC_LINKED 0x01 84 85 static SLIST_HEAD(, vmmdev_softc) head; 86 87 static unsigned pr_allow_flag; 88 static struct mtx vmmdev_mtx; 89 90 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); 91 92 SYSCTL_DECL(_hw_vmm); 93 94 static int vmm_priv_check(struct ucred *ucred); 95 static int devmem_create_cdev(const char *vmname, int id, char *devmem); 96 static void devmem_destroy(void *arg); 97 98 static int 99 vmm_priv_check(struct ucred *ucred) 100 { 101 102 if (jailed(ucred) && 103 !(ucred->cr_prison->pr_allow & pr_allow_flag)) 104 return (EPERM); 105 106 return (0); 107 } 108 109 static int 110 vcpu_lock_one(struct vmmdev_softc *sc, int vcpu) 111 { 112 int error; 113 114 if (vcpu < 0 || vcpu >= vm_get_maxcpus(sc->vm)) 115 return (EINVAL); 116 117 error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true); 118 return (error); 119 } 120 121 static void 122 vcpu_unlock_one(struct vmmdev_softc *sc, int vcpu) 123 { 124 enum vcpu_state state; 125 126 state = vcpu_get_state(sc->vm, vcpu, NULL); 127 if (state != VCPU_FROZEN) { 128 panic("vcpu %s(%d) has invalid state %d", vm_name(sc->vm), 129 vcpu, state); 130 } 131 132 vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false); 133 } 134 135 static int 136 vcpu_lock_all(struct vmmdev_softc *sc) 137 { 138 int error, vcpu; 139 uint16_t maxcpus; 140 141 maxcpus = vm_get_maxcpus(sc->vm); 142 for (vcpu = 0; vcpu < maxcpus; vcpu++) { 143 error = vcpu_lock_one(sc, vcpu); 144 if (error) 145 break; 146 } 147 148 if (error) { 149 while (--vcpu >= 0) 150 vcpu_unlock_one(sc, vcpu); 151 } 152 153 return (error); 154 } 155 156 static void 157 vcpu_unlock_all(struct vmmdev_softc *sc) 158 { 159 int vcpu; 160 uint16_t maxcpus; 161 162 maxcpus = vm_get_maxcpus(sc->vm); 163 for (vcpu = 0; vcpu < maxcpus; vcpu++) 164 vcpu_unlock_one(sc, vcpu); 165 } 166 167 static struct vmmdev_softc * 168 vmmdev_lookup(const char *name) 169 { 170 struct vmmdev_softc *sc; 171 172 #ifdef notyet /* XXX kernel is not compiled with invariants */ 173 mtx_assert(&vmmdev_mtx, MA_OWNED); 174 #endif 175 176 SLIST_FOREACH(sc, &head, link) { 177 if (strcmp(name, vm_name(sc->vm)) == 0) 178 break; 179 } 180 181 return (sc); 182 } 183 184 static struct vmmdev_softc * 185 vmmdev_lookup2(struct cdev *cdev) 186 { 187 188 return (cdev->si_drv1); 189 } 190 191 static int 192 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) 193 { 194 int error, off, c, prot; 195 vm_paddr_t gpa, maxaddr; 196 void *hpa, *cookie; 197 struct vmmdev_softc *sc; 198 uint16_t lastcpu; 199 200 error = vmm_priv_check(curthread->td_ucred); 201 if (error) 202 return (error); 203 204 sc = vmmdev_lookup2(cdev); 205 if (sc == NULL) 206 return (ENXIO); 207 208 /* 209 * Get a read lock on the guest memory map by freezing any vcpu. 210 */ 211 lastcpu = vm_get_maxcpus(sc->vm) - 1; 212 error = vcpu_lock_one(sc, lastcpu); 213 if (error) 214 return (error); 215 216 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); 217 maxaddr = vmm_sysmem_maxaddr(sc->vm); 218 while (uio->uio_resid > 0 && error == 0) { 219 gpa = uio->uio_offset; 220 off = gpa & PAGE_MASK; 221 c = min(uio->uio_resid, PAGE_SIZE - off); 222 223 /* 224 * The VM has a hole in its physical memory map. If we want to 225 * use 'dd' to inspect memory beyond the hole we need to 226 * provide bogus data for memory that lies in the hole. 227 * 228 * Since this device does not support lseek(2), dd(1) will 229 * read(2) blocks of data to simulate the lseek(2). 230 */ 231 hpa = vm_gpa_hold(sc->vm, lastcpu, gpa, c, 232 prot, &cookie); 233 if (hpa == NULL) { 234 if (uio->uio_rw == UIO_READ && gpa < maxaddr) 235 error = uiomove(__DECONST(void *, zero_region), 236 c, uio); 237 else 238 error = EFAULT; 239 } else { 240 error = uiomove(hpa, c, uio); 241 vm_gpa_release(cookie); 242 } 243 } 244 vcpu_unlock_one(sc, lastcpu); 245 return (error); 246 } 247 248 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1); 249 250 static int 251 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg) 252 { 253 struct devmem_softc *dsc; 254 int error; 255 bool sysmem; 256 257 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL); 258 if (error || mseg->len == 0) 259 return (error); 260 261 if (!sysmem) { 262 SLIST_FOREACH(dsc, &sc->devmem, link) { 263 if (dsc->segid == mseg->segid) 264 break; 265 } 266 KASSERT(dsc != NULL, ("%s: devmem segment %d not found", 267 __func__, mseg->segid)); 268 error = copystr(dsc->name, mseg->name, sizeof(mseg->name), 269 NULL); 270 } else { 271 bzero(mseg->name, sizeof(mseg->name)); 272 } 273 274 return (error); 275 } 276 277 static int 278 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg) 279 { 280 char *name; 281 int error; 282 bool sysmem; 283 284 error = 0; 285 name = NULL; 286 sysmem = true; 287 288 /* 289 * The allocation is lengthened by 1 to hold a terminating NUL. It'll 290 * by stripped off when devfs processes the full string. 291 */ 292 if (VM_MEMSEG_NAME(mseg)) { 293 sysmem = false; 294 name = malloc(sizeof(mseg->name), M_VMMDEV, M_WAITOK); 295 error = copystr(mseg->name, name, sizeof(mseg->name), NULL); 296 if (error) 297 goto done; 298 } 299 300 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem); 301 if (error) 302 goto done; 303 304 if (VM_MEMSEG_NAME(mseg)) { 305 error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name); 306 if (error) 307 vm_free_memseg(sc->vm, mseg->segid); 308 else 309 name = NULL; /* freed when 'cdev' is destroyed */ 310 } 311 done: 312 free(name, M_VMMDEV); 313 return (error); 314 } 315 316 static int 317 vm_get_register_set(struct vm *vm, int vcpu, unsigned int count, int *regnum, 318 uint64_t *regval) 319 { 320 int error, i; 321 322 error = 0; 323 for (i = 0; i < count; i++) { 324 error = vm_get_register(vm, vcpu, regnum[i], ®val[i]); 325 if (error) 326 break; 327 } 328 return (error); 329 } 330 331 static int 332 vm_set_register_set(struct vm *vm, int vcpu, unsigned int count, int *regnum, 333 uint64_t *regval) 334 { 335 int error, i; 336 337 error = 0; 338 for (i = 0; i < count; i++) { 339 error = vm_set_register(vm, vcpu, regnum[i], regval[i]); 340 if (error) 341 break; 342 } 343 return (error); 344 } 345 346 static int 347 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 348 struct thread *td) 349 { 350 int error, vcpu, state_changed, size; 351 cpuset_t *cpuset; 352 struct vmmdev_softc *sc; 353 struct vm_register *vmreg; 354 struct vm_seg_desc *vmsegdesc; 355 struct vm_register_set *vmregset; 356 struct vm_run *vmrun; 357 struct vm_exception *vmexc; 358 struct vm_lapic_irq *vmirq; 359 struct vm_lapic_msi *vmmsi; 360 struct vm_ioapic_irq *ioapic_irq; 361 struct vm_isa_irq *isa_irq; 362 struct vm_isa_irq_trigger *isa_irq_trigger; 363 struct vm_capability *vmcap; 364 struct vm_pptdev *pptdev; 365 struct vm_pptdev_mmio *pptmmio; 366 struct vm_pptdev_msi *pptmsi; 367 struct vm_pptdev_msix *pptmsix; 368 struct vm_nmi *vmnmi; 369 struct vm_stats *vmstats; 370 struct vm_stat_desc *statdesc; 371 struct vm_x2apic *x2apic; 372 struct vm_gpa_pte *gpapte; 373 struct vm_suspend *vmsuspend; 374 struct vm_gla2gpa *gg; 375 struct vm_activate_cpu *vac; 376 struct vm_cpuset *vm_cpuset; 377 struct vm_intinfo *vmii; 378 struct vm_rtc_time *rtctime; 379 struct vm_rtc_data *rtcdata; 380 struct vm_memmap *mm; 381 struct vm_cpu_topology *topology; 382 uint64_t *regvals; 383 int *regnums; 384 385 error = vmm_priv_check(curthread->td_ucred); 386 if (error) 387 return (error); 388 389 sc = vmmdev_lookup2(cdev); 390 if (sc == NULL) 391 return (ENXIO); 392 393 vcpu = -1; 394 state_changed = 0; 395 396 /* 397 * Some VMM ioctls can operate only on vcpus that are not running. 398 */ 399 switch (cmd) { 400 case VM_RUN: 401 case VM_GET_REGISTER: 402 case VM_SET_REGISTER: 403 case VM_GET_SEGMENT_DESCRIPTOR: 404 case VM_SET_SEGMENT_DESCRIPTOR: 405 case VM_GET_REGISTER_SET: 406 case VM_SET_REGISTER_SET: 407 case VM_INJECT_EXCEPTION: 408 case VM_GET_CAPABILITY: 409 case VM_SET_CAPABILITY: 410 case VM_PPTDEV_MSI: 411 case VM_PPTDEV_MSIX: 412 case VM_SET_X2APIC_STATE: 413 case VM_GLA2GPA: 414 case VM_GLA2GPA_NOFAULT: 415 case VM_ACTIVATE_CPU: 416 case VM_SET_INTINFO: 417 case VM_GET_INTINFO: 418 case VM_RESTART_INSTRUCTION: 419 /* 420 * XXX fragile, handle with care 421 * Assumes that the first field of the ioctl data is the vcpu. 422 */ 423 vcpu = *(int *)data; 424 error = vcpu_lock_one(sc, vcpu); 425 if (error) 426 goto done; 427 state_changed = 1; 428 break; 429 430 case VM_MAP_PPTDEV_MMIO: 431 case VM_BIND_PPTDEV: 432 case VM_UNBIND_PPTDEV: 433 case VM_ALLOC_MEMSEG: 434 case VM_MMAP_MEMSEG: 435 case VM_REINIT: 436 /* 437 * ioctls that operate on the entire virtual machine must 438 * prevent all vcpus from running. 439 */ 440 error = vcpu_lock_all(sc); 441 if (error) 442 goto done; 443 state_changed = 2; 444 break; 445 446 case VM_GET_MEMSEG: 447 case VM_MMAP_GETNEXT: 448 /* 449 * Lock a vcpu to make sure that the memory map cannot be 450 * modified while it is being inspected. 451 */ 452 vcpu = vm_get_maxcpus(sc->vm) - 1; 453 error = vcpu_lock_one(sc, vcpu); 454 if (error) 455 goto done; 456 state_changed = 1; 457 break; 458 459 default: 460 break; 461 } 462 463 switch(cmd) { 464 case VM_RUN: 465 vmrun = (struct vm_run *)data; 466 error = vm_run(sc->vm, vmrun); 467 break; 468 case VM_SUSPEND: 469 vmsuspend = (struct vm_suspend *)data; 470 error = vm_suspend(sc->vm, vmsuspend->how); 471 break; 472 case VM_REINIT: 473 error = vm_reinit(sc->vm); 474 break; 475 case VM_STAT_DESC: { 476 statdesc = (struct vm_stat_desc *)data; 477 error = vmm_stat_desc_copy(statdesc->index, 478 statdesc->desc, sizeof(statdesc->desc)); 479 break; 480 } 481 case VM_STATS: { 482 CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_ELEMS); 483 vmstats = (struct vm_stats *)data; 484 getmicrotime(&vmstats->tv); 485 error = vmm_stat_copy(sc->vm, vmstats->cpuid, 486 &vmstats->num_entries, vmstats->statbuf); 487 break; 488 } 489 case VM_PPTDEV_MSI: 490 pptmsi = (struct vm_pptdev_msi *)data; 491 error = ppt_setup_msi(sc->vm, pptmsi->vcpu, 492 pptmsi->bus, pptmsi->slot, pptmsi->func, 493 pptmsi->addr, pptmsi->msg, 494 pptmsi->numvec); 495 break; 496 case VM_PPTDEV_MSIX: 497 pptmsix = (struct vm_pptdev_msix *)data; 498 error = ppt_setup_msix(sc->vm, pptmsix->vcpu, 499 pptmsix->bus, pptmsix->slot, 500 pptmsix->func, pptmsix->idx, 501 pptmsix->addr, pptmsix->msg, 502 pptmsix->vector_control); 503 break; 504 case VM_MAP_PPTDEV_MMIO: 505 pptmmio = (struct vm_pptdev_mmio *)data; 506 error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot, 507 pptmmio->func, pptmmio->gpa, pptmmio->len, 508 pptmmio->hpa); 509 break; 510 case VM_BIND_PPTDEV: 511 pptdev = (struct vm_pptdev *)data; 512 error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot, 513 pptdev->func); 514 break; 515 case VM_UNBIND_PPTDEV: 516 pptdev = (struct vm_pptdev *)data; 517 error = vm_unassign_pptdev(sc->vm, pptdev->bus, pptdev->slot, 518 pptdev->func); 519 break; 520 case VM_INJECT_EXCEPTION: 521 vmexc = (struct vm_exception *)data; 522 error = vm_inject_exception(sc->vm, vmexc->cpuid, 523 vmexc->vector, vmexc->error_code_valid, vmexc->error_code, 524 vmexc->restart_instruction); 525 break; 526 case VM_INJECT_NMI: 527 vmnmi = (struct vm_nmi *)data; 528 error = vm_inject_nmi(sc->vm, vmnmi->cpuid); 529 break; 530 case VM_LAPIC_IRQ: 531 vmirq = (struct vm_lapic_irq *)data; 532 error = lapic_intr_edge(sc->vm, vmirq->cpuid, vmirq->vector); 533 break; 534 case VM_LAPIC_LOCAL_IRQ: 535 vmirq = (struct vm_lapic_irq *)data; 536 error = lapic_set_local_intr(sc->vm, vmirq->cpuid, 537 vmirq->vector); 538 break; 539 case VM_LAPIC_MSI: 540 vmmsi = (struct vm_lapic_msi *)data; 541 error = lapic_intr_msi(sc->vm, vmmsi->addr, vmmsi->msg); 542 break; 543 case VM_IOAPIC_ASSERT_IRQ: 544 ioapic_irq = (struct vm_ioapic_irq *)data; 545 error = vioapic_assert_irq(sc->vm, ioapic_irq->irq); 546 break; 547 case VM_IOAPIC_DEASSERT_IRQ: 548 ioapic_irq = (struct vm_ioapic_irq *)data; 549 error = vioapic_deassert_irq(sc->vm, ioapic_irq->irq); 550 break; 551 case VM_IOAPIC_PULSE_IRQ: 552 ioapic_irq = (struct vm_ioapic_irq *)data; 553 error = vioapic_pulse_irq(sc->vm, ioapic_irq->irq); 554 break; 555 case VM_IOAPIC_PINCOUNT: 556 *(int *)data = vioapic_pincount(sc->vm); 557 break; 558 case VM_ISA_ASSERT_IRQ: 559 isa_irq = (struct vm_isa_irq *)data; 560 error = vatpic_assert_irq(sc->vm, isa_irq->atpic_irq); 561 if (error == 0 && isa_irq->ioapic_irq != -1) 562 error = vioapic_assert_irq(sc->vm, 563 isa_irq->ioapic_irq); 564 break; 565 case VM_ISA_DEASSERT_IRQ: 566 isa_irq = (struct vm_isa_irq *)data; 567 error = vatpic_deassert_irq(sc->vm, isa_irq->atpic_irq); 568 if (error == 0 && isa_irq->ioapic_irq != -1) 569 error = vioapic_deassert_irq(sc->vm, 570 isa_irq->ioapic_irq); 571 break; 572 case VM_ISA_PULSE_IRQ: 573 isa_irq = (struct vm_isa_irq *)data; 574 error = vatpic_pulse_irq(sc->vm, isa_irq->atpic_irq); 575 if (error == 0 && isa_irq->ioapic_irq != -1) 576 error = vioapic_pulse_irq(sc->vm, isa_irq->ioapic_irq); 577 break; 578 case VM_ISA_SET_IRQ_TRIGGER: 579 isa_irq_trigger = (struct vm_isa_irq_trigger *)data; 580 error = vatpic_set_irq_trigger(sc->vm, 581 isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger); 582 break; 583 case VM_MMAP_GETNEXT: 584 mm = (struct vm_memmap *)data; 585 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid, 586 &mm->segoff, &mm->len, &mm->prot, &mm->flags); 587 break; 588 case VM_MMAP_MEMSEG: 589 mm = (struct vm_memmap *)data; 590 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff, 591 mm->len, mm->prot, mm->flags); 592 break; 593 case VM_ALLOC_MEMSEG: 594 error = alloc_memseg(sc, (struct vm_memseg *)data); 595 break; 596 case VM_GET_MEMSEG: 597 error = get_memseg(sc, (struct vm_memseg *)data); 598 break; 599 case VM_GET_REGISTER: 600 vmreg = (struct vm_register *)data; 601 error = vm_get_register(sc->vm, vmreg->cpuid, vmreg->regnum, 602 &vmreg->regval); 603 break; 604 case VM_SET_REGISTER: 605 vmreg = (struct vm_register *)data; 606 error = vm_set_register(sc->vm, vmreg->cpuid, vmreg->regnum, 607 vmreg->regval); 608 break; 609 case VM_SET_SEGMENT_DESCRIPTOR: 610 vmsegdesc = (struct vm_seg_desc *)data; 611 error = vm_set_seg_desc(sc->vm, vmsegdesc->cpuid, 612 vmsegdesc->regnum, 613 &vmsegdesc->desc); 614 break; 615 case VM_GET_SEGMENT_DESCRIPTOR: 616 vmsegdesc = (struct vm_seg_desc *)data; 617 error = vm_get_seg_desc(sc->vm, vmsegdesc->cpuid, 618 vmsegdesc->regnum, 619 &vmsegdesc->desc); 620 break; 621 case VM_GET_REGISTER_SET: 622 vmregset = (struct vm_register_set *)data; 623 if (vmregset->count > VM_REG_LAST) { 624 error = EINVAL; 625 break; 626 } 627 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 628 M_WAITOK); 629 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 630 M_WAITOK); 631 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 632 vmregset->count); 633 if (error == 0) 634 error = vm_get_register_set(sc->vm, vmregset->cpuid, 635 vmregset->count, regnums, regvals); 636 if (error == 0) 637 error = copyout(regvals, vmregset->regvals, 638 sizeof(regvals[0]) * vmregset->count); 639 free(regvals, M_VMMDEV); 640 free(regnums, M_VMMDEV); 641 break; 642 case VM_SET_REGISTER_SET: 643 vmregset = (struct vm_register_set *)data; 644 if (vmregset->count > VM_REG_LAST) { 645 error = EINVAL; 646 break; 647 } 648 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 649 M_WAITOK); 650 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 651 M_WAITOK); 652 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 653 vmregset->count); 654 if (error == 0) 655 error = copyin(vmregset->regvals, regvals, 656 sizeof(regvals[0]) * vmregset->count); 657 if (error == 0) 658 error = vm_set_register_set(sc->vm, vmregset->cpuid, 659 vmregset->count, regnums, regvals); 660 free(regvals, M_VMMDEV); 661 free(regnums, M_VMMDEV); 662 break; 663 case VM_GET_CAPABILITY: 664 vmcap = (struct vm_capability *)data; 665 error = vm_get_capability(sc->vm, vmcap->cpuid, 666 vmcap->captype, 667 &vmcap->capval); 668 break; 669 case VM_SET_CAPABILITY: 670 vmcap = (struct vm_capability *)data; 671 error = vm_set_capability(sc->vm, vmcap->cpuid, 672 vmcap->captype, 673 vmcap->capval); 674 break; 675 case VM_SET_X2APIC_STATE: 676 x2apic = (struct vm_x2apic *)data; 677 error = vm_set_x2apic_state(sc->vm, 678 x2apic->cpuid, x2apic->state); 679 break; 680 case VM_GET_X2APIC_STATE: 681 x2apic = (struct vm_x2apic *)data; 682 error = vm_get_x2apic_state(sc->vm, 683 x2apic->cpuid, &x2apic->state); 684 break; 685 case VM_GET_GPA_PMAP: 686 gpapte = (struct vm_gpa_pte *)data; 687 pmap_get_mapping(vmspace_pmap(vm_get_vmspace(sc->vm)), 688 gpapte->gpa, gpapte->pte, &gpapte->ptenum); 689 error = 0; 690 break; 691 case VM_GET_HPET_CAPABILITIES: 692 error = vhpet_getcap((struct vm_hpet_cap *)data); 693 break; 694 case VM_GLA2GPA: { 695 CTASSERT(PROT_READ == VM_PROT_READ); 696 CTASSERT(PROT_WRITE == VM_PROT_WRITE); 697 CTASSERT(PROT_EXEC == VM_PROT_EXECUTE); 698 gg = (struct vm_gla2gpa *)data; 699 error = vm_gla2gpa(sc->vm, gg->vcpuid, &gg->paging, gg->gla, 700 gg->prot, &gg->gpa, &gg->fault); 701 KASSERT(error == 0 || error == EFAULT, 702 ("%s: vm_gla2gpa unknown error %d", __func__, error)); 703 break; 704 } 705 case VM_GLA2GPA_NOFAULT: 706 gg = (struct vm_gla2gpa *)data; 707 error = vm_gla2gpa_nofault(sc->vm, gg->vcpuid, &gg->paging, 708 gg->gla, gg->prot, &gg->gpa, &gg->fault); 709 KASSERT(error == 0 || error == EFAULT, 710 ("%s: vm_gla2gpa unknown error %d", __func__, error)); 711 break; 712 case VM_ACTIVATE_CPU: 713 vac = (struct vm_activate_cpu *)data; 714 error = vm_activate_cpu(sc->vm, vac->vcpuid); 715 break; 716 case VM_GET_CPUS: 717 error = 0; 718 vm_cpuset = (struct vm_cpuset *)data; 719 size = vm_cpuset->cpusetsize; 720 if (size < sizeof(cpuset_t) || size > CPU_MAXSIZE / NBBY) { 721 error = ERANGE; 722 break; 723 } 724 cpuset = malloc(size, M_TEMP, M_WAITOK | M_ZERO); 725 if (vm_cpuset->which == VM_ACTIVE_CPUS) 726 *cpuset = vm_active_cpus(sc->vm); 727 else if (vm_cpuset->which == VM_SUSPENDED_CPUS) 728 *cpuset = vm_suspended_cpus(sc->vm); 729 else if (vm_cpuset->which == VM_DEBUG_CPUS) 730 *cpuset = vm_debug_cpus(sc->vm); 731 else 732 error = EINVAL; 733 if (error == 0) 734 error = copyout(cpuset, vm_cpuset->cpus, size); 735 free(cpuset, M_TEMP); 736 break; 737 case VM_SUSPEND_CPU: 738 vac = (struct vm_activate_cpu *)data; 739 error = vm_suspend_cpu(sc->vm, vac->vcpuid); 740 break; 741 case VM_RESUME_CPU: 742 vac = (struct vm_activate_cpu *)data; 743 error = vm_resume_cpu(sc->vm, vac->vcpuid); 744 break; 745 case VM_SET_INTINFO: 746 vmii = (struct vm_intinfo *)data; 747 error = vm_exit_intinfo(sc->vm, vmii->vcpuid, vmii->info1); 748 break; 749 case VM_GET_INTINFO: 750 vmii = (struct vm_intinfo *)data; 751 error = vm_get_intinfo(sc->vm, vmii->vcpuid, &vmii->info1, 752 &vmii->info2); 753 break; 754 case VM_RTC_WRITE: 755 rtcdata = (struct vm_rtc_data *)data; 756 error = vrtc_nvram_write(sc->vm, rtcdata->offset, 757 rtcdata->value); 758 break; 759 case VM_RTC_READ: 760 rtcdata = (struct vm_rtc_data *)data; 761 error = vrtc_nvram_read(sc->vm, rtcdata->offset, 762 &rtcdata->value); 763 break; 764 case VM_RTC_SETTIME: 765 rtctime = (struct vm_rtc_time *)data; 766 error = vrtc_set_time(sc->vm, rtctime->secs); 767 break; 768 case VM_RTC_GETTIME: 769 error = 0; 770 rtctime = (struct vm_rtc_time *)data; 771 rtctime->secs = vrtc_get_time(sc->vm); 772 break; 773 case VM_RESTART_INSTRUCTION: 774 error = vm_restart_instruction(sc->vm, vcpu); 775 break; 776 case VM_SET_TOPOLOGY: 777 topology = (struct vm_cpu_topology *)data; 778 error = vm_set_topology(sc->vm, topology->sockets, 779 topology->cores, topology->threads, topology->maxcpus); 780 break; 781 case VM_GET_TOPOLOGY: 782 topology = (struct vm_cpu_topology *)data; 783 vm_get_topology(sc->vm, &topology->sockets, &topology->cores, 784 &topology->threads, &topology->maxcpus); 785 error = 0; 786 break; 787 default: 788 error = ENOTTY; 789 break; 790 } 791 792 if (state_changed == 1) 793 vcpu_unlock_one(sc, vcpu); 794 else if (state_changed == 2) 795 vcpu_unlock_all(sc); 796 797 done: 798 /* Make sure that no handler returns a bogus value like ERESTART */ 799 KASSERT(error >= 0, ("vmmdev_ioctl: invalid error return %d", error)); 800 return (error); 801 } 802 803 static int 804 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, 805 struct vm_object **objp, int nprot) 806 { 807 struct vmmdev_softc *sc; 808 vm_paddr_t gpa; 809 size_t len; 810 vm_ooffset_t segoff, first, last; 811 int error, found, segid; 812 uint16_t lastcpu; 813 bool sysmem; 814 815 error = vmm_priv_check(curthread->td_ucred); 816 if (error) 817 return (error); 818 819 first = *offset; 820 last = first + mapsize; 821 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 822 return (EINVAL); 823 824 sc = vmmdev_lookup2(cdev); 825 if (sc == NULL) { 826 /* virtual machine is in the process of being created */ 827 return (EINVAL); 828 } 829 830 /* 831 * Get a read lock on the guest memory map by freezing any vcpu. 832 */ 833 lastcpu = vm_get_maxcpus(sc->vm) - 1; 834 error = vcpu_lock_one(sc, lastcpu); 835 if (error) 836 return (error); 837 838 gpa = 0; 839 found = 0; 840 while (!found) { 841 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len, 842 NULL, NULL); 843 if (error) 844 break; 845 846 if (first >= gpa && last <= gpa + len) 847 found = 1; 848 else 849 gpa += len; 850 } 851 852 if (found) { 853 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp); 854 KASSERT(error == 0 && *objp != NULL, 855 ("%s: invalid memory segment %d", __func__, segid)); 856 if (sysmem) { 857 vm_object_reference(*objp); 858 *offset = segoff + (first - gpa); 859 } else { 860 error = EINVAL; 861 } 862 } 863 vcpu_unlock_one(sc, lastcpu); 864 return (error); 865 } 866 867 static void 868 vmmdev_destroy(void *arg) 869 { 870 struct vmmdev_softc *sc = arg; 871 struct devmem_softc *dsc; 872 int error; 873 874 error = vcpu_lock_all(sc); 875 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); 876 877 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { 878 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__)); 879 SLIST_REMOVE_HEAD(&sc->devmem, link); 880 free(dsc->name, M_VMMDEV); 881 free(dsc, M_VMMDEV); 882 } 883 884 if (sc->cdev != NULL) 885 destroy_dev(sc->cdev); 886 887 if (sc->vm != NULL) 888 vm_destroy(sc->vm); 889 890 if ((sc->flags & VSC_LINKED) != 0) { 891 mtx_lock(&vmmdev_mtx); 892 SLIST_REMOVE(&head, sc, vmmdev_softc, link); 893 mtx_unlock(&vmmdev_mtx); 894 } 895 896 free(sc, M_VMMDEV); 897 } 898 899 static int 900 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) 901 { 902 struct devmem_softc *dsc; 903 struct vmmdev_softc *sc; 904 struct cdev *cdev; 905 char *buf; 906 int error, buflen; 907 908 error = vmm_priv_check(req->td->td_ucred); 909 if (error) 910 return (error); 911 912 buflen = VM_MAX_NAMELEN + 1; 913 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 914 strlcpy(buf, "beavis", buflen); 915 error = sysctl_handle_string(oidp, buf, buflen, req); 916 if (error != 0 || req->newptr == NULL) 917 goto out; 918 919 mtx_lock(&vmmdev_mtx); 920 sc = vmmdev_lookup(buf); 921 if (sc == NULL || sc->cdev == NULL) { 922 mtx_unlock(&vmmdev_mtx); 923 error = EINVAL; 924 goto out; 925 } 926 927 /* 928 * The 'cdev' will be destroyed asynchronously when 'si_threadcount' 929 * goes down to 0 so we should not do it again in the callback. 930 * 931 * Setting 'sc->cdev' to NULL is also used to indicate that the VM 932 * is scheduled for destruction. 933 */ 934 cdev = sc->cdev; 935 sc->cdev = NULL; 936 mtx_unlock(&vmmdev_mtx); 937 938 /* 939 * Schedule all cdevs to be destroyed: 940 * 941 * - any new operations on the 'cdev' will return an error (ENXIO). 942 * 943 * - when the 'si_threadcount' dwindles down to zero the 'cdev' will 944 * be destroyed and the callback will be invoked in a taskqueue 945 * context. 946 * 947 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' 948 */ 949 SLIST_FOREACH(dsc, &sc->devmem, link) { 950 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); 951 destroy_dev_sched_cb(dsc->cdev, devmem_destroy, dsc); 952 } 953 destroy_dev_sched_cb(cdev, vmmdev_destroy, sc); 954 error = 0; 955 956 out: 957 free(buf, M_VMMDEV); 958 return (error); 959 } 960 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, 961 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON, 962 NULL, 0, sysctl_vmm_destroy, "A", NULL); 963 964 static struct cdevsw vmmdevsw = { 965 .d_name = "vmmdev", 966 .d_version = D_VERSION, 967 .d_ioctl = vmmdev_ioctl, 968 .d_mmap_single = vmmdev_mmap_single, 969 .d_read = vmmdev_rw, 970 .d_write = vmmdev_rw, 971 }; 972 973 static int 974 sysctl_vmm_create(SYSCTL_HANDLER_ARGS) 975 { 976 struct vm *vm; 977 struct cdev *cdev; 978 struct vmmdev_softc *sc, *sc2; 979 char *buf; 980 int error, buflen; 981 982 error = vmm_priv_check(req->td->td_ucred); 983 if (error) 984 return (error); 985 986 buflen = VM_MAX_NAMELEN + 1; 987 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 988 strlcpy(buf, "beavis", buflen); 989 error = sysctl_handle_string(oidp, buf, buflen, req); 990 if (error != 0 || req->newptr == NULL) 991 goto out; 992 993 mtx_lock(&vmmdev_mtx); 994 sc = vmmdev_lookup(buf); 995 mtx_unlock(&vmmdev_mtx); 996 if (sc != NULL) { 997 error = EEXIST; 998 goto out; 999 } 1000 1001 error = vm_create(buf, &vm); 1002 if (error != 0) 1003 goto out; 1004 1005 sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO); 1006 sc->vm = vm; 1007 SLIST_INIT(&sc->devmem); 1008 1009 /* 1010 * Lookup the name again just in case somebody sneaked in when we 1011 * dropped the lock. 1012 */ 1013 mtx_lock(&vmmdev_mtx); 1014 sc2 = vmmdev_lookup(buf); 1015 if (sc2 == NULL) { 1016 SLIST_INSERT_HEAD(&head, sc, link); 1017 sc->flags |= VSC_LINKED; 1018 } 1019 mtx_unlock(&vmmdev_mtx); 1020 1021 if (sc2 != NULL) { 1022 vmmdev_destroy(sc); 1023 error = EEXIST; 1024 goto out; 1025 } 1026 1027 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, NULL, 1028 UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf); 1029 if (error != 0) { 1030 vmmdev_destroy(sc); 1031 goto out; 1032 } 1033 1034 mtx_lock(&vmmdev_mtx); 1035 sc->cdev = cdev; 1036 sc->cdev->si_drv1 = sc; 1037 mtx_unlock(&vmmdev_mtx); 1038 1039 out: 1040 free(buf, M_VMMDEV); 1041 return (error); 1042 } 1043 SYSCTL_PROC(_hw_vmm, OID_AUTO, create, 1044 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON, 1045 NULL, 0, sysctl_vmm_create, "A", NULL); 1046 1047 void 1048 vmmdev_init(void) 1049 { 1050 mtx_init(&vmmdev_mtx, "vmm device mutex", NULL, MTX_DEF); 1051 pr_allow_flag = prison_add_allow(NULL, "vmm", NULL, 1052 "Allow use of vmm in a jail."); 1053 } 1054 1055 int 1056 vmmdev_cleanup(void) 1057 { 1058 int error; 1059 1060 if (SLIST_EMPTY(&head)) 1061 error = 0; 1062 else 1063 error = EBUSY; 1064 1065 return (error); 1066 } 1067 1068 static int 1069 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, 1070 struct vm_object **objp, int nprot) 1071 { 1072 struct devmem_softc *dsc; 1073 vm_ooffset_t first, last; 1074 size_t seglen; 1075 int error; 1076 uint16_t lastcpu; 1077 bool sysmem; 1078 1079 dsc = cdev->si_drv1; 1080 if (dsc == NULL) { 1081 /* 'cdev' has been created but is not ready for use */ 1082 return (ENXIO); 1083 } 1084 1085 first = *offset; 1086 last = *offset + len; 1087 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 1088 return (EINVAL); 1089 1090 lastcpu = vm_get_maxcpus(dsc->sc->vm) - 1; 1091 error = vcpu_lock_one(dsc->sc, lastcpu); 1092 if (error) 1093 return (error); 1094 1095 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); 1096 KASSERT(error == 0 && !sysmem && *objp != NULL, 1097 ("%s: invalid devmem segment %d", __func__, dsc->segid)); 1098 1099 vcpu_unlock_one(dsc->sc, lastcpu); 1100 1101 if (seglen >= last) { 1102 vm_object_reference(*objp); 1103 return (0); 1104 } else { 1105 return (EINVAL); 1106 } 1107 } 1108 1109 static struct cdevsw devmemsw = { 1110 .d_name = "devmem", 1111 .d_version = D_VERSION, 1112 .d_mmap_single = devmem_mmap_single, 1113 }; 1114 1115 static int 1116 devmem_create_cdev(const char *vmname, int segid, char *devname) 1117 { 1118 struct devmem_softc *dsc; 1119 struct vmmdev_softc *sc; 1120 struct cdev *cdev; 1121 int error; 1122 1123 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL, 1124 UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname); 1125 if (error) 1126 return (error); 1127 1128 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO); 1129 1130 mtx_lock(&vmmdev_mtx); 1131 sc = vmmdev_lookup(vmname); 1132 KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname)); 1133 if (sc->cdev == NULL) { 1134 /* virtual machine is being created or destroyed */ 1135 mtx_unlock(&vmmdev_mtx); 1136 free(dsc, M_VMMDEV); 1137 destroy_dev_sched_cb(cdev, NULL, 0); 1138 return (ENODEV); 1139 } 1140 1141 dsc->segid = segid; 1142 dsc->name = devname; 1143 dsc->cdev = cdev; 1144 dsc->sc = sc; 1145 SLIST_INSERT_HEAD(&sc->devmem, dsc, link); 1146 mtx_unlock(&vmmdev_mtx); 1147 1148 /* The 'cdev' is ready for use after 'si_drv1' is initialized */ 1149 cdev->si_drv1 = dsc; 1150 return (0); 1151 } 1152 1153 static void 1154 devmem_destroy(void *arg) 1155 { 1156 struct devmem_softc *dsc = arg; 1157 1158 KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__)); 1159 dsc->cdev = NULL; 1160 dsc->sc = NULL; 1161 } 1162