1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/kernel.h> 36 #include <sys/jail.h> 37 #include <sys/queue.h> 38 #include <sys/lock.h> 39 #include <sys/mutex.h> 40 #include <sys/malloc.h> 41 #include <sys/conf.h> 42 #include <sys/sysctl.h> 43 #include <sys/libkern.h> 44 #include <sys/ioccom.h> 45 #include <sys/mman.h> 46 #include <sys/uio.h> 47 #include <sys/proc.h> 48 49 #include <vm/vm.h> 50 #include <vm/pmap.h> 51 #include <vm/vm_map.h> 52 #include <vm/vm_object.h> 53 54 #include <machine/vmparam.h> 55 #include <machine/vmm.h> 56 #include <machine/vmm_instruction_emul.h> 57 #include <machine/vmm_dev.h> 58 59 #include "vmm_lapic.h" 60 #include "vmm_stat.h" 61 #include "vmm_mem.h" 62 #include "io/ppt.h" 63 #include "io/vatpic.h" 64 #include "io/vioapic.h" 65 #include "io/vhpet.h" 66 #include "io/vrtc.h" 67 68 struct devmem_softc { 69 int segid; 70 char *name; 71 struct cdev *cdev; 72 struct vmmdev_softc *sc; 73 SLIST_ENTRY(devmem_softc) link; 74 }; 75 76 struct vmmdev_softc { 77 struct vm *vm; /* vm instance cookie */ 78 struct cdev *cdev; 79 SLIST_ENTRY(vmmdev_softc) link; 80 SLIST_HEAD(, devmem_softc) devmem; 81 int flags; 82 }; 83 #define VSC_LINKED 0x01 84 85 static SLIST_HEAD(, vmmdev_softc) head; 86 87 static unsigned pr_allow_flag; 88 static struct mtx vmmdev_mtx; 89 90 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); 91 92 SYSCTL_DECL(_hw_vmm); 93 94 static int vmm_priv_check(struct ucred *ucred); 95 static int devmem_create_cdev(const char *vmname, int id, char *devmem); 96 static void devmem_destroy(void *arg); 97 98 static int 99 vmm_priv_check(struct ucred *ucred) 100 { 101 102 if (jailed(ucred) && 103 !(ucred->cr_prison->pr_allow & pr_allow_flag)) 104 return (EPERM); 105 106 return (0); 107 } 108 109 static int 110 vcpu_lock_one(struct vmmdev_softc *sc, int vcpu) 111 { 112 int error; 113 114 if (vcpu < 0 || vcpu >= VM_MAXCPU) 115 return (EINVAL); 116 117 error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true); 118 return (error); 119 } 120 121 static void 122 vcpu_unlock_one(struct vmmdev_softc *sc, int vcpu) 123 { 124 enum vcpu_state state; 125 126 state = vcpu_get_state(sc->vm, vcpu, NULL); 127 if (state != VCPU_FROZEN) { 128 panic("vcpu %s(%d) has invalid state %d", vm_name(sc->vm), 129 vcpu, state); 130 } 131 132 vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false); 133 } 134 135 static int 136 vcpu_lock_all(struct vmmdev_softc *sc) 137 { 138 int error, vcpu; 139 140 for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) { 141 error = vcpu_lock_one(sc, vcpu); 142 if (error) 143 break; 144 } 145 146 if (error) { 147 while (--vcpu >= 0) 148 vcpu_unlock_one(sc, vcpu); 149 } 150 151 return (error); 152 } 153 154 static void 155 vcpu_unlock_all(struct vmmdev_softc *sc) 156 { 157 int vcpu; 158 159 for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) 160 vcpu_unlock_one(sc, vcpu); 161 } 162 163 static struct vmmdev_softc * 164 vmmdev_lookup(const char *name) 165 { 166 struct vmmdev_softc *sc; 167 168 #ifdef notyet /* XXX kernel is not compiled with invariants */ 169 mtx_assert(&vmmdev_mtx, MA_OWNED); 170 #endif 171 172 SLIST_FOREACH(sc, &head, link) { 173 if (strcmp(name, vm_name(sc->vm)) == 0) 174 break; 175 } 176 177 return (sc); 178 } 179 180 static struct vmmdev_softc * 181 vmmdev_lookup2(struct cdev *cdev) 182 { 183 184 return (cdev->si_drv1); 185 } 186 187 static int 188 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) 189 { 190 int error, off, c, prot; 191 vm_paddr_t gpa, maxaddr; 192 void *hpa, *cookie; 193 struct vmmdev_softc *sc; 194 195 error = vmm_priv_check(curthread->td_ucred); 196 if (error) 197 return (error); 198 199 sc = vmmdev_lookup2(cdev); 200 if (sc == NULL) 201 return (ENXIO); 202 203 /* 204 * Get a read lock on the guest memory map by freezing any vcpu. 205 */ 206 error = vcpu_lock_one(sc, VM_MAXCPU - 1); 207 if (error) 208 return (error); 209 210 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); 211 maxaddr = vmm_sysmem_maxaddr(sc->vm); 212 while (uio->uio_resid > 0 && error == 0) { 213 gpa = uio->uio_offset; 214 off = gpa & PAGE_MASK; 215 c = min(uio->uio_resid, PAGE_SIZE - off); 216 217 /* 218 * The VM has a hole in its physical memory map. If we want to 219 * use 'dd' to inspect memory beyond the hole we need to 220 * provide bogus data for memory that lies in the hole. 221 * 222 * Since this device does not support lseek(2), dd(1) will 223 * read(2) blocks of data to simulate the lseek(2). 224 */ 225 hpa = vm_gpa_hold(sc->vm, VM_MAXCPU - 1, gpa, c, prot, &cookie); 226 if (hpa == NULL) { 227 if (uio->uio_rw == UIO_READ && gpa < maxaddr) 228 error = uiomove(__DECONST(void *, zero_region), 229 c, uio); 230 else 231 error = EFAULT; 232 } else { 233 error = uiomove(hpa, c, uio); 234 vm_gpa_release(cookie); 235 } 236 } 237 vcpu_unlock_one(sc, VM_MAXCPU - 1); 238 return (error); 239 } 240 241 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= SPECNAMELEN + 1); 242 243 static int 244 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg) 245 { 246 struct devmem_softc *dsc; 247 int error; 248 bool sysmem; 249 250 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL); 251 if (error || mseg->len == 0) 252 return (error); 253 254 if (!sysmem) { 255 SLIST_FOREACH(dsc, &sc->devmem, link) { 256 if (dsc->segid == mseg->segid) 257 break; 258 } 259 KASSERT(dsc != NULL, ("%s: devmem segment %d not found", 260 __func__, mseg->segid)); 261 error = copystr(dsc->name, mseg->name, SPECNAMELEN + 1, NULL); 262 } else { 263 bzero(mseg->name, sizeof(mseg->name)); 264 } 265 266 return (error); 267 } 268 269 static int 270 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg) 271 { 272 char *name; 273 int error; 274 bool sysmem; 275 276 error = 0; 277 name = NULL; 278 sysmem = true; 279 280 if (VM_MEMSEG_NAME(mseg)) { 281 sysmem = false; 282 name = malloc(SPECNAMELEN + 1, M_VMMDEV, M_WAITOK); 283 error = copystr(mseg->name, name, SPECNAMELEN + 1, 0); 284 if (error) 285 goto done; 286 } 287 288 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem); 289 if (error) 290 goto done; 291 292 if (VM_MEMSEG_NAME(mseg)) { 293 error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name); 294 if (error) 295 vm_free_memseg(sc->vm, mseg->segid); 296 else 297 name = NULL; /* freed when 'cdev' is destroyed */ 298 } 299 done: 300 free(name, M_VMMDEV); 301 return (error); 302 } 303 304 static int 305 vm_get_register_set(struct vm *vm, int vcpu, unsigned int count, int *regnum, 306 uint64_t *regval) 307 { 308 int error, i; 309 310 error = 0; 311 for (i = 0; i < count; i++) { 312 error = vm_get_register(vm, vcpu, regnum[i], ®val[i]); 313 if (error) 314 break; 315 } 316 return (error); 317 } 318 319 static int 320 vm_set_register_set(struct vm *vm, int vcpu, unsigned int count, int *regnum, 321 uint64_t *regval) 322 { 323 int error, i; 324 325 error = 0; 326 for (i = 0; i < count; i++) { 327 error = vm_set_register(vm, vcpu, regnum[i], regval[i]); 328 if (error) 329 break; 330 } 331 return (error); 332 } 333 334 static int 335 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 336 struct thread *td) 337 { 338 int error, vcpu, state_changed, size; 339 cpuset_t *cpuset; 340 struct vmmdev_softc *sc; 341 struct vm_register *vmreg; 342 struct vm_seg_desc *vmsegdesc; 343 struct vm_register_set *vmregset; 344 struct vm_run *vmrun; 345 struct vm_exception *vmexc; 346 struct vm_lapic_irq *vmirq; 347 struct vm_lapic_msi *vmmsi; 348 struct vm_ioapic_irq *ioapic_irq; 349 struct vm_isa_irq *isa_irq; 350 struct vm_isa_irq_trigger *isa_irq_trigger; 351 struct vm_capability *vmcap; 352 struct vm_pptdev *pptdev; 353 struct vm_pptdev_mmio *pptmmio; 354 struct vm_pptdev_msi *pptmsi; 355 struct vm_pptdev_msix *pptmsix; 356 struct vm_nmi *vmnmi; 357 struct vm_stats *vmstats; 358 struct vm_stat_desc *statdesc; 359 struct vm_x2apic *x2apic; 360 struct vm_gpa_pte *gpapte; 361 struct vm_suspend *vmsuspend; 362 struct vm_gla2gpa *gg; 363 struct vm_activate_cpu *vac; 364 struct vm_cpuset *vm_cpuset; 365 struct vm_intinfo *vmii; 366 struct vm_rtc_time *rtctime; 367 struct vm_rtc_data *rtcdata; 368 struct vm_memmap *mm; 369 struct vm_cpu_topology *topology; 370 uint64_t *regvals; 371 int *regnums; 372 373 error = vmm_priv_check(curthread->td_ucred); 374 if (error) 375 return (error); 376 377 sc = vmmdev_lookup2(cdev); 378 if (sc == NULL) 379 return (ENXIO); 380 381 vcpu = -1; 382 state_changed = 0; 383 384 /* 385 * Some VMM ioctls can operate only on vcpus that are not running. 386 */ 387 switch (cmd) { 388 case VM_RUN: 389 case VM_GET_REGISTER: 390 case VM_SET_REGISTER: 391 case VM_GET_SEGMENT_DESCRIPTOR: 392 case VM_SET_SEGMENT_DESCRIPTOR: 393 case VM_GET_REGISTER_SET: 394 case VM_SET_REGISTER_SET: 395 case VM_INJECT_EXCEPTION: 396 case VM_GET_CAPABILITY: 397 case VM_SET_CAPABILITY: 398 case VM_PPTDEV_MSI: 399 case VM_PPTDEV_MSIX: 400 case VM_SET_X2APIC_STATE: 401 case VM_GLA2GPA: 402 case VM_GLA2GPA_NOFAULT: 403 case VM_ACTIVATE_CPU: 404 case VM_SET_INTINFO: 405 case VM_GET_INTINFO: 406 case VM_RESTART_INSTRUCTION: 407 /* 408 * XXX fragile, handle with care 409 * Assumes that the first field of the ioctl data is the vcpu. 410 */ 411 vcpu = *(int *)data; 412 error = vcpu_lock_one(sc, vcpu); 413 if (error) 414 goto done; 415 state_changed = 1; 416 break; 417 418 case VM_MAP_PPTDEV_MMIO: 419 case VM_BIND_PPTDEV: 420 case VM_UNBIND_PPTDEV: 421 case VM_ALLOC_MEMSEG: 422 case VM_MMAP_MEMSEG: 423 case VM_REINIT: 424 /* 425 * ioctls that operate on the entire virtual machine must 426 * prevent all vcpus from running. 427 */ 428 error = vcpu_lock_all(sc); 429 if (error) 430 goto done; 431 state_changed = 2; 432 break; 433 434 case VM_GET_MEMSEG: 435 case VM_MMAP_GETNEXT: 436 /* 437 * Lock a vcpu to make sure that the memory map cannot be 438 * modified while it is being inspected. 439 */ 440 vcpu = VM_MAXCPU - 1; 441 error = vcpu_lock_one(sc, vcpu); 442 if (error) 443 goto done; 444 state_changed = 1; 445 break; 446 447 default: 448 break; 449 } 450 451 switch(cmd) { 452 case VM_RUN: 453 vmrun = (struct vm_run *)data; 454 error = vm_run(sc->vm, vmrun); 455 break; 456 case VM_SUSPEND: 457 vmsuspend = (struct vm_suspend *)data; 458 error = vm_suspend(sc->vm, vmsuspend->how); 459 break; 460 case VM_REINIT: 461 error = vm_reinit(sc->vm); 462 break; 463 case VM_STAT_DESC: { 464 statdesc = (struct vm_stat_desc *)data; 465 error = vmm_stat_desc_copy(statdesc->index, 466 statdesc->desc, sizeof(statdesc->desc)); 467 break; 468 } 469 case VM_STATS: { 470 CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_ELEMS); 471 vmstats = (struct vm_stats *)data; 472 getmicrotime(&vmstats->tv); 473 error = vmm_stat_copy(sc->vm, vmstats->cpuid, 474 &vmstats->num_entries, vmstats->statbuf); 475 break; 476 } 477 case VM_PPTDEV_MSI: 478 pptmsi = (struct vm_pptdev_msi *)data; 479 error = ppt_setup_msi(sc->vm, pptmsi->vcpu, 480 pptmsi->bus, pptmsi->slot, pptmsi->func, 481 pptmsi->addr, pptmsi->msg, 482 pptmsi->numvec); 483 break; 484 case VM_PPTDEV_MSIX: 485 pptmsix = (struct vm_pptdev_msix *)data; 486 error = ppt_setup_msix(sc->vm, pptmsix->vcpu, 487 pptmsix->bus, pptmsix->slot, 488 pptmsix->func, pptmsix->idx, 489 pptmsix->addr, pptmsix->msg, 490 pptmsix->vector_control); 491 break; 492 case VM_MAP_PPTDEV_MMIO: 493 pptmmio = (struct vm_pptdev_mmio *)data; 494 error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot, 495 pptmmio->func, pptmmio->gpa, pptmmio->len, 496 pptmmio->hpa); 497 break; 498 case VM_BIND_PPTDEV: 499 pptdev = (struct vm_pptdev *)data; 500 error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot, 501 pptdev->func); 502 break; 503 case VM_UNBIND_PPTDEV: 504 pptdev = (struct vm_pptdev *)data; 505 error = vm_unassign_pptdev(sc->vm, pptdev->bus, pptdev->slot, 506 pptdev->func); 507 break; 508 case VM_INJECT_EXCEPTION: 509 vmexc = (struct vm_exception *)data; 510 error = vm_inject_exception(sc->vm, vmexc->cpuid, 511 vmexc->vector, vmexc->error_code_valid, vmexc->error_code, 512 vmexc->restart_instruction); 513 break; 514 case VM_INJECT_NMI: 515 vmnmi = (struct vm_nmi *)data; 516 error = vm_inject_nmi(sc->vm, vmnmi->cpuid); 517 break; 518 case VM_LAPIC_IRQ: 519 vmirq = (struct vm_lapic_irq *)data; 520 error = lapic_intr_edge(sc->vm, vmirq->cpuid, vmirq->vector); 521 break; 522 case VM_LAPIC_LOCAL_IRQ: 523 vmirq = (struct vm_lapic_irq *)data; 524 error = lapic_set_local_intr(sc->vm, vmirq->cpuid, 525 vmirq->vector); 526 break; 527 case VM_LAPIC_MSI: 528 vmmsi = (struct vm_lapic_msi *)data; 529 error = lapic_intr_msi(sc->vm, vmmsi->addr, vmmsi->msg); 530 break; 531 case VM_IOAPIC_ASSERT_IRQ: 532 ioapic_irq = (struct vm_ioapic_irq *)data; 533 error = vioapic_assert_irq(sc->vm, ioapic_irq->irq); 534 break; 535 case VM_IOAPIC_DEASSERT_IRQ: 536 ioapic_irq = (struct vm_ioapic_irq *)data; 537 error = vioapic_deassert_irq(sc->vm, ioapic_irq->irq); 538 break; 539 case VM_IOAPIC_PULSE_IRQ: 540 ioapic_irq = (struct vm_ioapic_irq *)data; 541 error = vioapic_pulse_irq(sc->vm, ioapic_irq->irq); 542 break; 543 case VM_IOAPIC_PINCOUNT: 544 *(int *)data = vioapic_pincount(sc->vm); 545 break; 546 case VM_ISA_ASSERT_IRQ: 547 isa_irq = (struct vm_isa_irq *)data; 548 error = vatpic_assert_irq(sc->vm, isa_irq->atpic_irq); 549 if (error == 0 && isa_irq->ioapic_irq != -1) 550 error = vioapic_assert_irq(sc->vm, 551 isa_irq->ioapic_irq); 552 break; 553 case VM_ISA_DEASSERT_IRQ: 554 isa_irq = (struct vm_isa_irq *)data; 555 error = vatpic_deassert_irq(sc->vm, isa_irq->atpic_irq); 556 if (error == 0 && isa_irq->ioapic_irq != -1) 557 error = vioapic_deassert_irq(sc->vm, 558 isa_irq->ioapic_irq); 559 break; 560 case VM_ISA_PULSE_IRQ: 561 isa_irq = (struct vm_isa_irq *)data; 562 error = vatpic_pulse_irq(sc->vm, isa_irq->atpic_irq); 563 if (error == 0 && isa_irq->ioapic_irq != -1) 564 error = vioapic_pulse_irq(sc->vm, isa_irq->ioapic_irq); 565 break; 566 case VM_ISA_SET_IRQ_TRIGGER: 567 isa_irq_trigger = (struct vm_isa_irq_trigger *)data; 568 error = vatpic_set_irq_trigger(sc->vm, 569 isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger); 570 break; 571 case VM_MMAP_GETNEXT: 572 mm = (struct vm_memmap *)data; 573 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid, 574 &mm->segoff, &mm->len, &mm->prot, &mm->flags); 575 break; 576 case VM_MMAP_MEMSEG: 577 mm = (struct vm_memmap *)data; 578 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff, 579 mm->len, mm->prot, mm->flags); 580 break; 581 case VM_ALLOC_MEMSEG: 582 error = alloc_memseg(sc, (struct vm_memseg *)data); 583 break; 584 case VM_GET_MEMSEG: 585 error = get_memseg(sc, (struct vm_memseg *)data); 586 break; 587 case VM_GET_REGISTER: 588 vmreg = (struct vm_register *)data; 589 error = vm_get_register(sc->vm, vmreg->cpuid, vmreg->regnum, 590 &vmreg->regval); 591 break; 592 case VM_SET_REGISTER: 593 vmreg = (struct vm_register *)data; 594 error = vm_set_register(sc->vm, vmreg->cpuid, vmreg->regnum, 595 vmreg->regval); 596 break; 597 case VM_SET_SEGMENT_DESCRIPTOR: 598 vmsegdesc = (struct vm_seg_desc *)data; 599 error = vm_set_seg_desc(sc->vm, vmsegdesc->cpuid, 600 vmsegdesc->regnum, 601 &vmsegdesc->desc); 602 break; 603 case VM_GET_SEGMENT_DESCRIPTOR: 604 vmsegdesc = (struct vm_seg_desc *)data; 605 error = vm_get_seg_desc(sc->vm, vmsegdesc->cpuid, 606 vmsegdesc->regnum, 607 &vmsegdesc->desc); 608 break; 609 case VM_GET_REGISTER_SET: 610 vmregset = (struct vm_register_set *)data; 611 if (vmregset->count > VM_REG_LAST) { 612 error = EINVAL; 613 break; 614 } 615 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 616 M_WAITOK); 617 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 618 M_WAITOK); 619 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 620 vmregset->count); 621 if (error == 0) 622 error = vm_get_register_set(sc->vm, vmregset->cpuid, 623 vmregset->count, regnums, regvals); 624 if (error == 0) 625 error = copyout(regvals, vmregset->regvals, 626 sizeof(regvals[0]) * vmregset->count); 627 free(regvals, M_VMMDEV); 628 free(regnums, M_VMMDEV); 629 break; 630 case VM_SET_REGISTER_SET: 631 vmregset = (struct vm_register_set *)data; 632 if (vmregset->count > VM_REG_LAST) { 633 error = EINVAL; 634 break; 635 } 636 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 637 M_WAITOK); 638 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 639 M_WAITOK); 640 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 641 vmregset->count); 642 if (error == 0) 643 error = copyin(vmregset->regvals, regvals, 644 sizeof(regvals[0]) * vmregset->count); 645 if (error == 0) 646 error = vm_set_register_set(sc->vm, vmregset->cpuid, 647 vmregset->count, regnums, regvals); 648 free(regvals, M_VMMDEV); 649 free(regnums, M_VMMDEV); 650 break; 651 case VM_GET_CAPABILITY: 652 vmcap = (struct vm_capability *)data; 653 error = vm_get_capability(sc->vm, vmcap->cpuid, 654 vmcap->captype, 655 &vmcap->capval); 656 break; 657 case VM_SET_CAPABILITY: 658 vmcap = (struct vm_capability *)data; 659 error = vm_set_capability(sc->vm, vmcap->cpuid, 660 vmcap->captype, 661 vmcap->capval); 662 break; 663 case VM_SET_X2APIC_STATE: 664 x2apic = (struct vm_x2apic *)data; 665 error = vm_set_x2apic_state(sc->vm, 666 x2apic->cpuid, x2apic->state); 667 break; 668 case VM_GET_X2APIC_STATE: 669 x2apic = (struct vm_x2apic *)data; 670 error = vm_get_x2apic_state(sc->vm, 671 x2apic->cpuid, &x2apic->state); 672 break; 673 case VM_GET_GPA_PMAP: 674 gpapte = (struct vm_gpa_pte *)data; 675 pmap_get_mapping(vmspace_pmap(vm_get_vmspace(sc->vm)), 676 gpapte->gpa, gpapte->pte, &gpapte->ptenum); 677 error = 0; 678 break; 679 case VM_GET_HPET_CAPABILITIES: 680 error = vhpet_getcap((struct vm_hpet_cap *)data); 681 break; 682 case VM_GLA2GPA: { 683 CTASSERT(PROT_READ == VM_PROT_READ); 684 CTASSERT(PROT_WRITE == VM_PROT_WRITE); 685 CTASSERT(PROT_EXEC == VM_PROT_EXECUTE); 686 gg = (struct vm_gla2gpa *)data; 687 error = vm_gla2gpa(sc->vm, gg->vcpuid, &gg->paging, gg->gla, 688 gg->prot, &gg->gpa, &gg->fault); 689 KASSERT(error == 0 || error == EFAULT, 690 ("%s: vm_gla2gpa unknown error %d", __func__, error)); 691 break; 692 } 693 case VM_GLA2GPA_NOFAULT: 694 gg = (struct vm_gla2gpa *)data; 695 error = vm_gla2gpa_nofault(sc->vm, gg->vcpuid, &gg->paging, 696 gg->gla, gg->prot, &gg->gpa, &gg->fault); 697 KASSERT(error == 0 || error == EFAULT, 698 ("%s: vm_gla2gpa unknown error %d", __func__, error)); 699 break; 700 case VM_ACTIVATE_CPU: 701 vac = (struct vm_activate_cpu *)data; 702 error = vm_activate_cpu(sc->vm, vac->vcpuid); 703 break; 704 case VM_GET_CPUS: 705 error = 0; 706 vm_cpuset = (struct vm_cpuset *)data; 707 size = vm_cpuset->cpusetsize; 708 if (size < sizeof(cpuset_t) || size > CPU_MAXSIZE / NBBY) { 709 error = ERANGE; 710 break; 711 } 712 cpuset = malloc(size, M_TEMP, M_WAITOK | M_ZERO); 713 if (vm_cpuset->which == VM_ACTIVE_CPUS) 714 *cpuset = vm_active_cpus(sc->vm); 715 else if (vm_cpuset->which == VM_SUSPENDED_CPUS) 716 *cpuset = vm_suspended_cpus(sc->vm); 717 else if (vm_cpuset->which == VM_DEBUG_CPUS) 718 *cpuset = vm_debug_cpus(sc->vm); 719 else 720 error = EINVAL; 721 if (error == 0) 722 error = copyout(cpuset, vm_cpuset->cpus, size); 723 free(cpuset, M_TEMP); 724 break; 725 case VM_SUSPEND_CPU: 726 vac = (struct vm_activate_cpu *)data; 727 error = vm_suspend_cpu(sc->vm, vac->vcpuid); 728 break; 729 case VM_RESUME_CPU: 730 vac = (struct vm_activate_cpu *)data; 731 error = vm_resume_cpu(sc->vm, vac->vcpuid); 732 break; 733 case VM_SET_INTINFO: 734 vmii = (struct vm_intinfo *)data; 735 error = vm_exit_intinfo(sc->vm, vmii->vcpuid, vmii->info1); 736 break; 737 case VM_GET_INTINFO: 738 vmii = (struct vm_intinfo *)data; 739 error = vm_get_intinfo(sc->vm, vmii->vcpuid, &vmii->info1, 740 &vmii->info2); 741 break; 742 case VM_RTC_WRITE: 743 rtcdata = (struct vm_rtc_data *)data; 744 error = vrtc_nvram_write(sc->vm, rtcdata->offset, 745 rtcdata->value); 746 break; 747 case VM_RTC_READ: 748 rtcdata = (struct vm_rtc_data *)data; 749 error = vrtc_nvram_read(sc->vm, rtcdata->offset, 750 &rtcdata->value); 751 break; 752 case VM_RTC_SETTIME: 753 rtctime = (struct vm_rtc_time *)data; 754 error = vrtc_set_time(sc->vm, rtctime->secs); 755 break; 756 case VM_RTC_GETTIME: 757 error = 0; 758 rtctime = (struct vm_rtc_time *)data; 759 rtctime->secs = vrtc_get_time(sc->vm); 760 break; 761 case VM_RESTART_INSTRUCTION: 762 error = vm_restart_instruction(sc->vm, vcpu); 763 break; 764 case VM_SET_TOPOLOGY: 765 topology = (struct vm_cpu_topology *)data; 766 error = vm_set_topology(sc->vm, topology->sockets, 767 topology->cores, topology->threads, topology->maxcpus); 768 break; 769 case VM_GET_TOPOLOGY: 770 topology = (struct vm_cpu_topology *)data; 771 vm_get_topology(sc->vm, &topology->sockets, &topology->cores, 772 &topology->threads, &topology->maxcpus); 773 error = 0; 774 break; 775 default: 776 error = ENOTTY; 777 break; 778 } 779 780 if (state_changed == 1) 781 vcpu_unlock_one(sc, vcpu); 782 else if (state_changed == 2) 783 vcpu_unlock_all(sc); 784 785 done: 786 /* Make sure that no handler returns a bogus value like ERESTART */ 787 KASSERT(error >= 0, ("vmmdev_ioctl: invalid error return %d", error)); 788 return (error); 789 } 790 791 static int 792 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, 793 struct vm_object **objp, int nprot) 794 { 795 struct vmmdev_softc *sc; 796 vm_paddr_t gpa; 797 size_t len; 798 vm_ooffset_t segoff, first, last; 799 int error, found, segid; 800 bool sysmem; 801 802 error = vmm_priv_check(curthread->td_ucred); 803 if (error) 804 return (error); 805 806 first = *offset; 807 last = first + mapsize; 808 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 809 return (EINVAL); 810 811 sc = vmmdev_lookup2(cdev); 812 if (sc == NULL) { 813 /* virtual machine is in the process of being created */ 814 return (EINVAL); 815 } 816 817 /* 818 * Get a read lock on the guest memory map by freezing any vcpu. 819 */ 820 error = vcpu_lock_one(sc, VM_MAXCPU - 1); 821 if (error) 822 return (error); 823 824 gpa = 0; 825 found = 0; 826 while (!found) { 827 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len, 828 NULL, NULL); 829 if (error) 830 break; 831 832 if (first >= gpa && last <= gpa + len) 833 found = 1; 834 else 835 gpa += len; 836 } 837 838 if (found) { 839 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp); 840 KASSERT(error == 0 && *objp != NULL, 841 ("%s: invalid memory segment %d", __func__, segid)); 842 if (sysmem) { 843 vm_object_reference(*objp); 844 *offset = segoff + (first - gpa); 845 } else { 846 error = EINVAL; 847 } 848 } 849 vcpu_unlock_one(sc, VM_MAXCPU - 1); 850 return (error); 851 } 852 853 static void 854 vmmdev_destroy(void *arg) 855 { 856 struct vmmdev_softc *sc = arg; 857 struct devmem_softc *dsc; 858 int error; 859 860 error = vcpu_lock_all(sc); 861 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); 862 863 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { 864 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__)); 865 SLIST_REMOVE_HEAD(&sc->devmem, link); 866 free(dsc->name, M_VMMDEV); 867 free(dsc, M_VMMDEV); 868 } 869 870 if (sc->cdev != NULL) 871 destroy_dev(sc->cdev); 872 873 if (sc->vm != NULL) 874 vm_destroy(sc->vm); 875 876 if ((sc->flags & VSC_LINKED) != 0) { 877 mtx_lock(&vmmdev_mtx); 878 SLIST_REMOVE(&head, sc, vmmdev_softc, link); 879 mtx_unlock(&vmmdev_mtx); 880 } 881 882 free(sc, M_VMMDEV); 883 } 884 885 static int 886 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) 887 { 888 int error; 889 char buf[VM_MAX_NAMELEN]; 890 struct devmem_softc *dsc; 891 struct vmmdev_softc *sc; 892 struct cdev *cdev; 893 894 error = vmm_priv_check(req->td->td_ucred); 895 if (error) 896 return (error); 897 898 strlcpy(buf, "beavis", sizeof(buf)); 899 error = sysctl_handle_string(oidp, buf, sizeof(buf), req); 900 if (error != 0 || req->newptr == NULL) 901 return (error); 902 903 mtx_lock(&vmmdev_mtx); 904 sc = vmmdev_lookup(buf); 905 if (sc == NULL || sc->cdev == NULL) { 906 mtx_unlock(&vmmdev_mtx); 907 return (EINVAL); 908 } 909 910 /* 911 * The 'cdev' will be destroyed asynchronously when 'si_threadcount' 912 * goes down to 0 so we should not do it again in the callback. 913 * 914 * Setting 'sc->cdev' to NULL is also used to indicate that the VM 915 * is scheduled for destruction. 916 */ 917 cdev = sc->cdev; 918 sc->cdev = NULL; 919 mtx_unlock(&vmmdev_mtx); 920 921 /* 922 * Schedule all cdevs to be destroyed: 923 * 924 * - any new operations on the 'cdev' will return an error (ENXIO). 925 * 926 * - when the 'si_threadcount' dwindles down to zero the 'cdev' will 927 * be destroyed and the callback will be invoked in a taskqueue 928 * context. 929 * 930 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' 931 */ 932 SLIST_FOREACH(dsc, &sc->devmem, link) { 933 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); 934 destroy_dev_sched_cb(dsc->cdev, devmem_destroy, dsc); 935 } 936 destroy_dev_sched_cb(cdev, vmmdev_destroy, sc); 937 return (0); 938 } 939 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, 940 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON, 941 NULL, 0, sysctl_vmm_destroy, "A", NULL); 942 943 static struct cdevsw vmmdevsw = { 944 .d_name = "vmmdev", 945 .d_version = D_VERSION, 946 .d_ioctl = vmmdev_ioctl, 947 .d_mmap_single = vmmdev_mmap_single, 948 .d_read = vmmdev_rw, 949 .d_write = vmmdev_rw, 950 }; 951 952 static int 953 sysctl_vmm_create(SYSCTL_HANDLER_ARGS) 954 { 955 int error; 956 struct vm *vm; 957 struct cdev *cdev; 958 struct vmmdev_softc *sc, *sc2; 959 char buf[VM_MAX_NAMELEN]; 960 961 error = vmm_priv_check(req->td->td_ucred); 962 if (error) 963 return (error); 964 965 strlcpy(buf, "beavis", sizeof(buf)); 966 error = sysctl_handle_string(oidp, buf, sizeof(buf), req); 967 if (error != 0 || req->newptr == NULL) 968 return (error); 969 970 mtx_lock(&vmmdev_mtx); 971 sc = vmmdev_lookup(buf); 972 mtx_unlock(&vmmdev_mtx); 973 if (sc != NULL) 974 return (EEXIST); 975 976 error = vm_create(buf, &vm); 977 if (error != 0) 978 return (error); 979 980 sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO); 981 sc->vm = vm; 982 SLIST_INIT(&sc->devmem); 983 984 /* 985 * Lookup the name again just in case somebody sneaked in when we 986 * dropped the lock. 987 */ 988 mtx_lock(&vmmdev_mtx); 989 sc2 = vmmdev_lookup(buf); 990 if (sc2 == NULL) { 991 SLIST_INSERT_HEAD(&head, sc, link); 992 sc->flags |= VSC_LINKED; 993 } 994 mtx_unlock(&vmmdev_mtx); 995 996 if (sc2 != NULL) { 997 vmmdev_destroy(sc); 998 return (EEXIST); 999 } 1000 1001 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, NULL, 1002 UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf); 1003 if (error != 0) { 1004 vmmdev_destroy(sc); 1005 return (error); 1006 } 1007 1008 mtx_lock(&vmmdev_mtx); 1009 sc->cdev = cdev; 1010 sc->cdev->si_drv1 = sc; 1011 mtx_unlock(&vmmdev_mtx); 1012 1013 return (0); 1014 } 1015 SYSCTL_PROC(_hw_vmm, OID_AUTO, create, 1016 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON, 1017 NULL, 0, sysctl_vmm_create, "A", NULL); 1018 1019 void 1020 vmmdev_init(void) 1021 { 1022 mtx_init(&vmmdev_mtx, "vmm device mutex", NULL, MTX_DEF); 1023 pr_allow_flag = prison_add_allow(NULL, "vmm", NULL, 1024 "Allow use of vmm in a jail."); 1025 } 1026 1027 int 1028 vmmdev_cleanup(void) 1029 { 1030 int error; 1031 1032 if (SLIST_EMPTY(&head)) 1033 error = 0; 1034 else 1035 error = EBUSY; 1036 1037 return (error); 1038 } 1039 1040 static int 1041 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, 1042 struct vm_object **objp, int nprot) 1043 { 1044 struct devmem_softc *dsc; 1045 vm_ooffset_t first, last; 1046 size_t seglen; 1047 int error; 1048 bool sysmem; 1049 1050 dsc = cdev->si_drv1; 1051 if (dsc == NULL) { 1052 /* 'cdev' has been created but is not ready for use */ 1053 return (ENXIO); 1054 } 1055 1056 first = *offset; 1057 last = *offset + len; 1058 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 1059 return (EINVAL); 1060 1061 error = vcpu_lock_one(dsc->sc, VM_MAXCPU - 1); 1062 if (error) 1063 return (error); 1064 1065 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); 1066 KASSERT(error == 0 && !sysmem && *objp != NULL, 1067 ("%s: invalid devmem segment %d", __func__, dsc->segid)); 1068 1069 vcpu_unlock_one(dsc->sc, VM_MAXCPU - 1); 1070 1071 if (seglen >= last) { 1072 vm_object_reference(*objp); 1073 return (0); 1074 } else { 1075 return (EINVAL); 1076 } 1077 } 1078 1079 static struct cdevsw devmemsw = { 1080 .d_name = "devmem", 1081 .d_version = D_VERSION, 1082 .d_mmap_single = devmem_mmap_single, 1083 }; 1084 1085 static int 1086 devmem_create_cdev(const char *vmname, int segid, char *devname) 1087 { 1088 struct devmem_softc *dsc; 1089 struct vmmdev_softc *sc; 1090 struct cdev *cdev; 1091 int error; 1092 1093 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL, 1094 UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname); 1095 if (error) 1096 return (error); 1097 1098 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO); 1099 1100 mtx_lock(&vmmdev_mtx); 1101 sc = vmmdev_lookup(vmname); 1102 KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname)); 1103 if (sc->cdev == NULL) { 1104 /* virtual machine is being created or destroyed */ 1105 mtx_unlock(&vmmdev_mtx); 1106 free(dsc, M_VMMDEV); 1107 destroy_dev_sched_cb(cdev, NULL, 0); 1108 return (ENODEV); 1109 } 1110 1111 dsc->segid = segid; 1112 dsc->name = devname; 1113 dsc->cdev = cdev; 1114 dsc->sc = sc; 1115 SLIST_INSERT_HEAD(&sc->devmem, dsc, link); 1116 mtx_unlock(&vmmdev_mtx); 1117 1118 /* The 'cdev' is ready for use after 'si_drv1' is initialized */ 1119 cdev->si_drv1 = dsc; 1120 return (0); 1121 } 1122 1123 static void 1124 devmem_destroy(void *arg) 1125 { 1126 struct devmem_softc *dsc = arg; 1127 1128 KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__)); 1129 dsc->cdev = NULL; 1130 dsc->sc = NULL; 1131 } 1132