1 /* 2 * Copyright (c) 2018-2021 Maxime Villard, m00nbsd.net 3 * All rights reserved. 4 * 5 * This code is part of the NVMM hypervisor. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 32 #include <sys/kernel.h> 33 #include <sys/mman.h> 34 35 #include "nvmm.h" 36 #include "nvmm_internal.h" 37 #include "nvmm_ioctl.h" 38 39 static struct nvmm_machine machines[NVMM_MAX_MACHINES]; 40 volatile unsigned int nmachines __cacheline_aligned; 41 42 static const struct nvmm_impl *nvmm_impl_list[] = { 43 #if defined(__x86_64__) 44 &nvmm_x86_svm, /* x86 AMD SVM */ 45 &nvmm_x86_vmx /* x86 Intel VMX */ 46 #endif 47 }; 48 49 const struct nvmm_impl *nvmm_impl __read_mostly = NULL; 50 51 struct nvmm_owner nvmm_root_owner; 52 53 /* -------------------------------------------------------------------------- */ 54 55 static int 56 nvmm_machine_alloc(struct nvmm_machine **ret) 57 { 58 struct nvmm_machine *mach; 59 size_t i; 60 61 for (i = 0; i < NVMM_MAX_MACHINES; i++) { 62 mach = &machines[i]; 63 64 os_rwl_wlock(&mach->lock); 65 if (mach->present) { 66 os_rwl_unlock(&mach->lock); 67 continue; 68 } 69 70 mach->present = true; 71 mach->time = time_second; 72 *ret = mach; 73 os_atomic_inc_uint(&nmachines); 74 return 0; 75 } 76 77 return ENOBUFS; 78 } 79 80 static void 81 nvmm_machine_free(struct nvmm_machine *mach) 82 { 83 OS_ASSERT(os_rwl_wheld(&mach->lock)); 84 OS_ASSERT(mach->present); 85 mach->present = false; 86 os_atomic_dec_uint(&nmachines); 87 } 88 89 static int 90 nvmm_machine_get(struct nvmm_owner *owner, nvmm_machid_t machid, 91 struct nvmm_machine **ret, bool writer) 92 { 93 struct nvmm_machine *mach; 94 95 if (__predict_false(machid >= NVMM_MAX_MACHINES)) { 96 return EINVAL; 97 } 98 mach = &machines[machid]; 99 100 if (__predict_false(writer)) { 101 os_rwl_wlock(&mach->lock); 102 } else { 103 os_rwl_rlock(&mach->lock); 104 } 105 if (__predict_false(!mach->present)) { 106 os_rwl_unlock(&mach->lock); 107 return ENOENT; 108 } 109 if (__predict_false(mach->owner != owner && 110 owner != &nvmm_root_owner)) { 111 os_rwl_unlock(&mach->lock); 112 return EPERM; 113 } 114 *ret = mach; 115 116 return 0; 117 } 118 119 static void 120 nvmm_machine_put(struct nvmm_machine *mach) 121 { 122 os_rwl_unlock(&mach->lock); 123 } 124 125 /* -------------------------------------------------------------------------- */ 126 127 static int 128 nvmm_vcpu_alloc(struct nvmm_machine *mach, nvmm_cpuid_t cpuid, 129 struct nvmm_cpu **ret) 130 { 131 struct nvmm_cpu *vcpu; 132 133 if (cpuid >= NVMM_MAX_VCPUS) { 134 return EINVAL; 135 } 136 vcpu = &mach->cpus[cpuid]; 137 138 os_mtx_lock(&vcpu->lock); 139 if (vcpu->present) { 140 os_mtx_unlock(&vcpu->lock); 141 return EBUSY; 142 } 143 144 vcpu->present = true; 145 vcpu->comm = NULL; 146 vcpu->hcpu_last = -1; 147 *ret = vcpu; 148 return 0; 149 } 150 151 static void 152 nvmm_vcpu_free(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 153 { 154 OS_ASSERT(os_mtx_owned(&vcpu->lock)); 155 vcpu->present = false; 156 if (vcpu->comm != NULL) { 157 os_vmobj_unmap(os_kernel_map, (vaddr_t)vcpu->comm, 158 (vaddr_t)vcpu->comm + NVMM_COMM_PAGE_SIZE, true); 159 /* 160 * Require userland to unmap the comm page from its address 161 * space, because os_curproc_map at this point (fd close) 162 * is not guaranteed to be the correct address space. 163 */ 164 } 165 } 166 167 static int 168 nvmm_vcpu_get(struct nvmm_machine *mach, nvmm_cpuid_t cpuid, 169 struct nvmm_cpu **ret) 170 { 171 struct nvmm_cpu *vcpu; 172 173 if (__predict_false(cpuid >= NVMM_MAX_VCPUS)) { 174 return EINVAL; 175 } 176 vcpu = &mach->cpus[cpuid]; 177 178 os_mtx_lock(&vcpu->lock); 179 if (__predict_false(!vcpu->present)) { 180 os_mtx_unlock(&vcpu->lock); 181 return ENOENT; 182 } 183 *ret = vcpu; 184 185 return 0; 186 } 187 188 static void 189 nvmm_vcpu_put(struct nvmm_cpu *vcpu) 190 { 191 os_mtx_unlock(&vcpu->lock); 192 } 193 194 /* -------------------------------------------------------------------------- */ 195 196 void 197 nvmm_kill_machines(struct nvmm_owner *owner) 198 { 199 struct nvmm_machine *mach; 200 struct nvmm_cpu *vcpu; 201 size_t i, j; 202 int error; 203 204 for (i = 0; i < NVMM_MAX_MACHINES; i++) { 205 mach = &machines[i]; 206 207 os_rwl_wlock(&mach->lock); 208 if (!mach->present || mach->owner != owner) { 209 os_rwl_unlock(&mach->lock); 210 continue; 211 } 212 213 /* Kill it. */ 214 for (j = 0; j < NVMM_MAX_VCPUS; j++) { 215 error = nvmm_vcpu_get(mach, j, &vcpu); 216 if (error) 217 continue; 218 (*nvmm_impl->vcpu_destroy)(mach, vcpu); 219 nvmm_vcpu_free(mach, vcpu); 220 nvmm_vcpu_put(vcpu); 221 os_atomic_dec_uint(&mach->ncpus); 222 } 223 (*nvmm_impl->machine_destroy)(mach); 224 os_vmspace_destroy(mach->vm); 225 226 /* Drop the kernel vmobj refs. */ 227 for (j = 0; j < NVMM_MAX_HMAPPINGS; j++) { 228 if (!mach->hmap[j].present) 229 continue; 230 os_vmobj_rel(mach->hmap[j].vmobj); 231 } 232 233 nvmm_machine_free(mach); 234 235 os_rwl_unlock(&mach->lock); 236 } 237 } 238 239 /* -------------------------------------------------------------------------- */ 240 241 static int 242 nvmm_capability(struct nvmm_owner *owner, struct nvmm_ioc_capability *args) 243 { 244 args->cap.version = NVMM_KERN_VERSION; 245 args->cap.state_size = nvmm_impl->state_size; 246 args->cap.comm_size = NVMM_COMM_PAGE_SIZE; 247 args->cap.max_machines = NVMM_MAX_MACHINES; 248 args->cap.max_vcpus = NVMM_MAX_VCPUS; 249 args->cap.max_ram = NVMM_MAX_RAM; 250 251 (*nvmm_impl->capability)(&args->cap); 252 253 return 0; 254 } 255 256 static int 257 nvmm_machine_create(struct nvmm_owner *owner, 258 struct nvmm_ioc_machine_create *args) 259 { 260 struct nvmm_machine *mach; 261 int error; 262 263 error = nvmm_machine_alloc(&mach); 264 if (error) 265 return error; 266 267 /* Curproc owns the machine. */ 268 mach->owner = owner; 269 270 /* Zero out the host mappings. */ 271 memset(&mach->hmap, 0, sizeof(mach->hmap)); 272 273 /* Create the machine vmspace. */ 274 mach->gpa_begin = 0; 275 mach->gpa_end = NVMM_MAX_RAM; 276 mach->vm = os_vmspace_create(mach->gpa_begin, mach->gpa_end); 277 278 /* Create the comm vmobj. */ 279 mach->commvmobj = os_vmobj_create( 280 NVMM_MAX_VCPUS * NVMM_COMM_PAGE_SIZE); 281 282 (*nvmm_impl->machine_create)(mach); 283 284 args->machid = mach->machid; 285 nvmm_machine_put(mach); 286 287 return 0; 288 } 289 290 static int 291 nvmm_machine_destroy(struct nvmm_owner *owner, 292 struct nvmm_ioc_machine_destroy *args) 293 { 294 struct nvmm_machine *mach; 295 struct nvmm_cpu *vcpu; 296 int error; 297 size_t i; 298 299 error = nvmm_machine_get(owner, args->machid, &mach, true); 300 if (error) 301 return error; 302 303 for (i = 0; i < NVMM_MAX_VCPUS; i++) { 304 error = nvmm_vcpu_get(mach, i, &vcpu); 305 if (error) 306 continue; 307 308 (*nvmm_impl->vcpu_destroy)(mach, vcpu); 309 nvmm_vcpu_free(mach, vcpu); 310 nvmm_vcpu_put(vcpu); 311 os_atomic_dec_uint(&mach->ncpus); 312 } 313 314 (*nvmm_impl->machine_destroy)(mach); 315 316 /* Free the machine vmspace. */ 317 os_vmspace_destroy(mach->vm); 318 319 /* Drop the kernel vmobj refs. */ 320 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) { 321 if (!mach->hmap[i].present) 322 continue; 323 os_vmobj_rel(mach->hmap[i].vmobj); 324 } 325 326 nvmm_machine_free(mach); 327 nvmm_machine_put(mach); 328 329 return 0; 330 } 331 332 static int 333 nvmm_machine_configure(struct nvmm_owner *owner, 334 struct nvmm_ioc_machine_configure *args) 335 { 336 struct nvmm_machine *mach; 337 size_t allocsz; 338 uint64_t op; 339 void *data; 340 int error; 341 342 op = NVMM_MACH_CONF_MD(args->op); 343 if (__predict_false(op >= nvmm_impl->mach_conf_max)) { 344 return EINVAL; 345 } 346 347 allocsz = nvmm_impl->mach_conf_sizes[op]; 348 data = os_mem_alloc(allocsz); 349 350 error = nvmm_machine_get(owner, args->machid, &mach, true); 351 if (error) { 352 os_mem_free(data, allocsz); 353 return error; 354 } 355 356 error = copyin(args->conf, data, allocsz); 357 if (error) { 358 goto out; 359 } 360 361 error = (*nvmm_impl->machine_configure)(mach, op, data); 362 363 out: 364 nvmm_machine_put(mach); 365 os_mem_free(data, allocsz); 366 return error; 367 } 368 369 static int 370 nvmm_vcpu_create(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_create *args) 371 { 372 struct nvmm_machine *mach; 373 struct nvmm_cpu *vcpu; 374 int error; 375 376 error = nvmm_machine_get(owner, args->machid, &mach, false); 377 if (error) 378 return error; 379 380 error = nvmm_vcpu_alloc(mach, args->cpuid, &vcpu); 381 if (error) 382 goto out; 383 384 /* Map the comm page on the kernel side, as wired. */ 385 error = os_vmobj_map(os_kernel_map, (vaddr_t *)&vcpu->comm, 386 NVMM_COMM_PAGE_SIZE, mach->commvmobj, 387 args->cpuid * NVMM_COMM_PAGE_SIZE, true /* wired */, 388 false /* !fixed */, true /* shared */, PROT_READ | PROT_WRITE, 389 PROT_READ | PROT_WRITE); 390 if (error) { 391 nvmm_vcpu_free(mach, vcpu); 392 nvmm_vcpu_put(vcpu); 393 goto out; 394 } 395 396 memset(vcpu->comm, 0, NVMM_COMM_PAGE_SIZE); 397 398 /* Map the comm page on the user side, as pageable. */ 399 error = os_vmobj_map(os_curproc_map, (vaddr_t *)&args->comm, 400 NVMM_COMM_PAGE_SIZE, mach->commvmobj, 401 args->cpuid * NVMM_COMM_PAGE_SIZE, false /* !wired */, 402 false /* !fixed */, true /* shared */, PROT_READ | PROT_WRITE, 403 PROT_READ | PROT_WRITE); 404 if (error) { 405 nvmm_vcpu_free(mach, vcpu); 406 nvmm_vcpu_put(vcpu); 407 goto out; 408 } 409 410 error = (*nvmm_impl->vcpu_create)(mach, vcpu); 411 if (error) { 412 nvmm_vcpu_free(mach, vcpu); 413 nvmm_vcpu_put(vcpu); 414 goto out; 415 } 416 417 nvmm_vcpu_put(vcpu); 418 os_atomic_inc_uint(&mach->ncpus); 419 420 out: 421 nvmm_machine_put(mach); 422 return error; 423 } 424 425 static int 426 nvmm_vcpu_destroy(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_destroy *args) 427 { 428 struct nvmm_machine *mach; 429 struct nvmm_cpu *vcpu; 430 int error; 431 432 error = nvmm_machine_get(owner, args->machid, &mach, false); 433 if (error) 434 return error; 435 436 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu); 437 if (error) 438 goto out; 439 440 (*nvmm_impl->vcpu_destroy)(mach, vcpu); 441 nvmm_vcpu_free(mach, vcpu); 442 nvmm_vcpu_put(vcpu); 443 os_atomic_dec_uint(&mach->ncpus); 444 445 out: 446 nvmm_machine_put(mach); 447 return error; 448 } 449 450 static int 451 nvmm_vcpu_configure(struct nvmm_owner *owner, 452 struct nvmm_ioc_vcpu_configure *args) 453 { 454 struct nvmm_machine *mach; 455 struct nvmm_cpu *vcpu; 456 size_t allocsz; 457 uint64_t op; 458 void *data; 459 int error; 460 461 op = NVMM_VCPU_CONF_MD(args->op); 462 if (__predict_false(op >= nvmm_impl->vcpu_conf_max)) 463 return EINVAL; 464 465 allocsz = nvmm_impl->vcpu_conf_sizes[op]; 466 data = os_mem_alloc(allocsz); 467 468 error = nvmm_machine_get(owner, args->machid, &mach, false); 469 if (error) { 470 os_mem_free(data, allocsz); 471 return error; 472 } 473 474 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu); 475 if (error) { 476 nvmm_machine_put(mach); 477 os_mem_free(data, allocsz); 478 return error; 479 } 480 481 error = copyin(args->conf, data, allocsz); 482 if (error) { 483 goto out; 484 } 485 486 error = (*nvmm_impl->vcpu_configure)(vcpu, op, data); 487 488 out: 489 nvmm_vcpu_put(vcpu); 490 nvmm_machine_put(mach); 491 os_mem_free(data, allocsz); 492 return error; 493 } 494 495 static int 496 nvmm_vcpu_setstate(struct nvmm_owner *owner, 497 struct nvmm_ioc_vcpu_setstate *args) 498 { 499 struct nvmm_machine *mach; 500 struct nvmm_cpu *vcpu; 501 int error; 502 503 error = nvmm_machine_get(owner, args->machid, &mach, false); 504 if (error) 505 return error; 506 507 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu); 508 if (error) 509 goto out; 510 511 (*nvmm_impl->vcpu_setstate)(vcpu); 512 nvmm_vcpu_put(vcpu); 513 514 out: 515 nvmm_machine_put(mach); 516 return error; 517 } 518 519 static int 520 nvmm_vcpu_getstate(struct nvmm_owner *owner, 521 struct nvmm_ioc_vcpu_getstate *args) 522 { 523 struct nvmm_machine *mach; 524 struct nvmm_cpu *vcpu; 525 int error; 526 527 error = nvmm_machine_get(owner, args->machid, &mach, false); 528 if (error) 529 return error; 530 531 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu); 532 if (error) 533 goto out; 534 535 (*nvmm_impl->vcpu_getstate)(vcpu); 536 nvmm_vcpu_put(vcpu); 537 538 out: 539 nvmm_machine_put(mach); 540 return error; 541 } 542 543 static int 544 nvmm_vcpu_inject(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_inject *args) 545 { 546 struct nvmm_machine *mach; 547 struct nvmm_cpu *vcpu; 548 int error; 549 550 error = nvmm_machine_get(owner, args->machid, &mach, false); 551 if (error) 552 return error; 553 554 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu); 555 if (error) 556 goto out; 557 558 error = (*nvmm_impl->vcpu_inject)(vcpu); 559 nvmm_vcpu_put(vcpu); 560 561 out: 562 nvmm_machine_put(mach); 563 return error; 564 } 565 566 static int 567 nvmm_do_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 568 struct nvmm_vcpu_exit *exit) 569 { 570 struct vmspace *vm = mach->vm; 571 int ret; 572 573 while (1) { 574 /* Got a signal? Or pending resched? Leave. */ 575 if (__predict_false(os_return_needed())) { 576 exit->reason = NVMM_VCPU_EXIT_NONE; 577 return 0; 578 } 579 580 /* Run the VCPU. */ 581 ret = (*nvmm_impl->vcpu_run)(mach, vcpu, exit); 582 if (__predict_false(ret != 0)) { 583 return ret; 584 } 585 586 /* Process nested page faults. */ 587 if (__predict_true(exit->reason != NVMM_VCPU_EXIT_MEMORY)) { 588 break; 589 } 590 if (exit->u.mem.gpa >= mach->gpa_end) { 591 break; 592 } 593 if (os_vmspace_fault(vm, exit->u.mem.gpa, exit->u.mem.prot)) { 594 break; 595 } 596 } 597 598 return 0; 599 } 600 601 static int 602 nvmm_vcpu_run(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_run *args) 603 { 604 struct nvmm_machine *mach; 605 struct nvmm_cpu *vcpu; 606 int error; 607 608 error = nvmm_machine_get(owner, args->machid, &mach, false); 609 if (error) 610 return error; 611 612 error = nvmm_vcpu_get(mach, args->cpuid, &vcpu); 613 if (error) 614 goto out; 615 616 error = nvmm_do_vcpu_run(mach, vcpu, &args->exit); 617 nvmm_vcpu_put(vcpu); 618 619 out: 620 nvmm_machine_put(mach); 621 return error; 622 } 623 624 /* -------------------------------------------------------------------------- */ 625 626 static os_vmobj_t * 627 nvmm_hmapping_getvmobj(struct nvmm_machine *mach, uintptr_t hva, size_t size, 628 size_t *off) 629 { 630 struct nvmm_hmapping *hmapping; 631 size_t i; 632 633 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) { 634 hmapping = &mach->hmap[i]; 635 if (!hmapping->present) { 636 continue; 637 } 638 if (hva >= hmapping->hva && 639 hva + size <= hmapping->hva + hmapping->size) { 640 *off = hva - hmapping->hva; 641 return hmapping->vmobj; 642 } 643 } 644 645 return NULL; 646 } 647 648 static int 649 nvmm_hmapping_validate(struct nvmm_machine *mach, uintptr_t hva, size_t size) 650 { 651 struct nvmm_hmapping *hmapping; 652 size_t i; 653 uintptr_t hva_end; 654 uintptr_t hmap_end; 655 656 if ((hva % PAGE_SIZE) != 0 || (size % PAGE_SIZE) != 0) { 657 return EINVAL; 658 } 659 if (hva == 0) { 660 return EINVAL; 661 } 662 663 /* 664 * Overflow tests MUST be done very carefully to avoid compiler 665 * optimizations from effectively deleting the test. 666 */ 667 hva_end = hva + size; 668 if (hva_end <= hva) 669 return EINVAL; 670 671 /* 672 * Overlap tests 673 */ 674 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) { 675 hmapping = &mach->hmap[i]; 676 677 if (!hmapping->present) { 678 continue; 679 } 680 hmap_end = hmapping->hva + hmapping->size; 681 682 if (hva >= hmapping->hva && hva_end <= hmap_end) 683 break; 684 if (hva >= hmapping->hva && hva < hmap_end) 685 return EEXIST; 686 if (hva_end > hmapping->hva && hva_end <= hmap_end) 687 return EEXIST; 688 if (hva <= hmapping->hva && hva_end >= hmap_end) 689 return EEXIST; 690 } 691 692 return 0; 693 } 694 695 static struct nvmm_hmapping * 696 nvmm_hmapping_alloc(struct nvmm_machine *mach) 697 { 698 struct nvmm_hmapping *hmapping; 699 size_t i; 700 701 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) { 702 hmapping = &mach->hmap[i]; 703 if (!hmapping->present) { 704 hmapping->present = true; 705 return hmapping; 706 } 707 } 708 709 return NULL; 710 } 711 712 static int 713 nvmm_hmapping_free(struct nvmm_machine *mach, uintptr_t hva, size_t size) 714 { 715 struct nvmm_hmapping *hmapping; 716 size_t i; 717 718 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) { 719 hmapping = &mach->hmap[i]; 720 if (!hmapping->present || hmapping->hva != hva || 721 hmapping->size != size) { 722 continue; 723 } 724 725 os_vmobj_unmap(os_curproc_map, hmapping->hva, 726 hmapping->hva + hmapping->size, false); 727 os_vmobj_rel(hmapping->vmobj); 728 729 hmapping->vmobj = NULL; 730 hmapping->present = false; 731 732 return 0; 733 } 734 735 return ENOENT; 736 } 737 738 static int 739 nvmm_hva_map(struct nvmm_owner *owner, struct nvmm_ioc_hva_map *args) 740 { 741 struct nvmm_machine *mach; 742 struct nvmm_hmapping *hmapping; 743 vaddr_t uva; 744 int error; 745 746 error = nvmm_machine_get(owner, args->machid, &mach, true); 747 if (error) 748 return error; 749 750 error = nvmm_hmapping_validate(mach, args->hva, args->size); 751 if (error) 752 goto out; 753 754 hmapping = nvmm_hmapping_alloc(mach); 755 if (hmapping == NULL) { 756 error = ENOBUFS; 757 goto out; 758 } 759 760 hmapping->hva = args->hva; 761 hmapping->size = args->size; 762 hmapping->vmobj = os_vmobj_create(hmapping->size); 763 uva = hmapping->hva; 764 765 /* Map the vmobj into the user address space, as pageable. */ 766 error = os_vmobj_map(os_curproc_map, &uva, hmapping->size, 767 hmapping->vmobj, 0, false /* !wired */, true /* fixed */, 768 true /* shared */, PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE); 769 770 out: 771 nvmm_machine_put(mach); 772 return error; 773 } 774 775 static int 776 nvmm_hva_unmap(struct nvmm_owner *owner, struct nvmm_ioc_hva_unmap *args) 777 { 778 struct nvmm_machine *mach; 779 int error; 780 781 error = nvmm_machine_get(owner, args->machid, &mach, true); 782 if (error) 783 return error; 784 785 error = nvmm_hmapping_free(mach, args->hva, args->size); 786 787 nvmm_machine_put(mach); 788 return error; 789 } 790 791 /* -------------------------------------------------------------------------- */ 792 793 static int 794 nvmm_gpa_map(struct nvmm_owner *owner, struct nvmm_ioc_gpa_map *args) 795 { 796 struct nvmm_machine *mach; 797 os_vmobj_t *vmobj; 798 gpaddr_t gpa; 799 gpaddr_t gpa_end; 800 size_t off; 801 int error; 802 803 error = nvmm_machine_get(owner, args->machid, &mach, false); 804 if (error) 805 return error; 806 807 if ((args->prot & ~(PROT_READ|PROT_WRITE|PROT_EXEC)) != 0) { 808 error = EINVAL; 809 goto out; 810 } 811 812 /* 813 * Overflow tests MUST be done very carefully to avoid compiler 814 * optimizations from effectively deleting the test. 815 */ 816 gpa = args->gpa; 817 gpa_end = gpa + args->size; 818 if (gpa_end <= gpa) { 819 error = EINVAL; 820 goto out; 821 } 822 823 if ((gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0 || 824 (args->hva % PAGE_SIZE) != 0) { 825 error = EINVAL; 826 goto out; 827 } 828 if (args->hva == 0) { 829 error = EINVAL; 830 goto out; 831 } 832 833 if (gpa < mach->gpa_begin || gpa >= mach->gpa_end) { 834 error = EINVAL; 835 goto out; 836 } 837 if (gpa_end > mach->gpa_end) { 838 error = EINVAL; 839 goto out; 840 } 841 842 vmobj = nvmm_hmapping_getvmobj(mach, args->hva, args->size, &off); 843 if (vmobj == NULL) { 844 error = EINVAL; 845 goto out; 846 } 847 848 /* Map the vmobj into the machine address space, as pageable. */ 849 error = os_vmobj_map(&mach->vm->vm_map, &gpa, args->size, vmobj, off, 850 false /* !wired */, true /* fixed */, false /* !shared */, 851 args->prot, PROT_READ | PROT_WRITE | PROT_EXEC); 852 853 out: 854 nvmm_machine_put(mach); 855 return error; 856 } 857 858 static int 859 nvmm_gpa_unmap(struct nvmm_owner *owner, struct nvmm_ioc_gpa_unmap *args) 860 { 861 struct nvmm_machine *mach; 862 gpaddr_t gpa; 863 gpaddr_t gpa_end; 864 int error; 865 866 error = nvmm_machine_get(owner, args->machid, &mach, false); 867 if (error) 868 return error; 869 870 /* 871 * Overflow tests MUST be done very carefully to avoid compiler 872 * optimizations from effectively deleting the test. 873 */ 874 gpa = args->gpa; 875 gpa_end = gpa + args->size; 876 if (gpa_end <= gpa) { 877 error = EINVAL; 878 goto out; 879 } 880 881 if ((gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0) { 882 error = EINVAL; 883 goto out; 884 } 885 if (gpa < mach->gpa_begin || gpa >= mach->gpa_end) { 886 error = EINVAL; 887 goto out; 888 } 889 if (gpa_end >= mach->gpa_end) { 890 error = EINVAL; 891 goto out; 892 } 893 894 /* Unmap the memory from the machine. */ 895 os_vmobj_unmap(&mach->vm->vm_map, gpa, gpa + args->size, false); 896 897 out: 898 nvmm_machine_put(mach); 899 return error; 900 } 901 902 /* -------------------------------------------------------------------------- */ 903 904 static int 905 nvmm_ctl_mach_info(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args) 906 { 907 struct nvmm_ctl_mach_info ctl; 908 struct nvmm_machine *mach; 909 int error; 910 size_t i; 911 912 if (args->size != sizeof(ctl)) 913 return EINVAL; 914 error = copyin(args->data, &ctl, sizeof(ctl)); 915 if (error) 916 return error; 917 918 error = nvmm_machine_get(owner, ctl.machid, &mach, true); 919 if (error) 920 return error; 921 922 ctl.nvcpus = mach->ncpus; 923 924 ctl.nram = 0; 925 for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) { 926 if (!mach->hmap[i].present) 927 continue; 928 ctl.nram += mach->hmap[i].size; 929 } 930 931 ctl.pid = mach->owner->pid; 932 ctl.time = mach->time; 933 934 nvmm_machine_put(mach); 935 936 error = copyout(&ctl, args->data, sizeof(ctl)); 937 if (error) 938 return error; 939 940 return 0; 941 } 942 943 static int 944 nvmm_ctl(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args) 945 { 946 switch (args->op) { 947 case NVMM_CTL_MACH_INFO: 948 return nvmm_ctl_mach_info(owner, args); 949 default: 950 return EINVAL; 951 } 952 } 953 954 /* -------------------------------------------------------------------------- */ 955 956 const struct nvmm_impl * 957 nvmm_ident(void) 958 { 959 size_t i; 960 961 for (i = 0; i < __arraycount(nvmm_impl_list); i++) { 962 if ((*nvmm_impl_list[i]->ident)()) 963 return nvmm_impl_list[i]; 964 } 965 966 return NULL; 967 } 968 969 int 970 nvmm_init(void) 971 { 972 size_t i, n; 973 974 nvmm_impl = nvmm_ident(); 975 if (nvmm_impl == NULL) 976 return ENOTSUP; 977 978 for (i = 0; i < NVMM_MAX_MACHINES; i++) { 979 machines[i].machid = i; 980 os_rwl_init(&machines[i].lock); 981 for (n = 0; n < NVMM_MAX_VCPUS; n++) { 982 machines[i].cpus[n].present = false; 983 machines[i].cpus[n].cpuid = n; 984 os_mtx_init(&machines[i].cpus[n].lock); 985 } 986 } 987 988 (*nvmm_impl->init)(); 989 990 return 0; 991 } 992 993 void 994 nvmm_fini(void) 995 { 996 size_t i, n; 997 998 for (i = 0; i < NVMM_MAX_MACHINES; i++) { 999 os_rwl_destroy(&machines[i].lock); 1000 for (n = 0; n < NVMM_MAX_VCPUS; n++) { 1001 os_mtx_destroy(&machines[i].cpus[n].lock); 1002 } 1003 } 1004 1005 (*nvmm_impl->fini)(); 1006 nvmm_impl = NULL; 1007 } 1008 1009 /* -------------------------------------------------------------------------- */ 1010 1011 int 1012 nvmm_ioctl(struct nvmm_owner *owner, unsigned long cmd, void *data) 1013 { 1014 switch (cmd) { 1015 case NVMM_IOC_CAPABILITY: 1016 return nvmm_capability(owner, data); 1017 case NVMM_IOC_MACHINE_CREATE: 1018 return nvmm_machine_create(owner, data); 1019 case NVMM_IOC_MACHINE_DESTROY: 1020 return nvmm_machine_destroy(owner, data); 1021 case NVMM_IOC_MACHINE_CONFIGURE: 1022 return nvmm_machine_configure(owner, data); 1023 case NVMM_IOC_VCPU_CREATE: 1024 return nvmm_vcpu_create(owner, data); 1025 case NVMM_IOC_VCPU_DESTROY: 1026 return nvmm_vcpu_destroy(owner, data); 1027 case NVMM_IOC_VCPU_CONFIGURE: 1028 return nvmm_vcpu_configure(owner, data); 1029 case NVMM_IOC_VCPU_SETSTATE: 1030 return nvmm_vcpu_setstate(owner, data); 1031 case NVMM_IOC_VCPU_GETSTATE: 1032 return nvmm_vcpu_getstate(owner, data); 1033 case NVMM_IOC_VCPU_INJECT: 1034 return nvmm_vcpu_inject(owner, data); 1035 case NVMM_IOC_VCPU_RUN: 1036 return nvmm_vcpu_run(owner, data); 1037 case NVMM_IOC_GPA_MAP: 1038 return nvmm_gpa_map(owner, data); 1039 case NVMM_IOC_GPA_UNMAP: 1040 return nvmm_gpa_unmap(owner, data); 1041 case NVMM_IOC_HVA_MAP: 1042 return nvmm_hva_map(owner, data); 1043 case NVMM_IOC_HVA_UNMAP: 1044 return nvmm_hva_unmap(owner, data); 1045 case NVMM_IOC_CTL: 1046 return nvmm_ctl(owner, data); 1047 default: 1048 return EINVAL; 1049 } 1050 } 1051