1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/kernel.h> 36 #include <sys/systm.h> 37 #include <sys/malloc.h> 38 39 #include <dev/pci/pcireg.h> 40 41 #include <machine/vmparam.h> 42 #include <sys/vmm_vm.h> 43 44 #include <contrib/dev/acpica/include/acpi.h> 45 46 #include <sys/sunndi.h> 47 48 #include "io/iommu.h" 49 50 /* 51 * Documented in the "Intel Virtualization Technology for Directed I/O", 52 * Architecture Spec, September 2008. 53 */ 54 55 #define VTD_DRHD_INCLUDE_PCI_ALL(Flags) (((Flags) >> 0) & 0x1) 56 57 /* Section 10.4 "Register Descriptions" */ 58 struct vtdmap { 59 volatile uint32_t version; 60 volatile uint32_t res0; 61 volatile uint64_t cap; 62 volatile uint64_t ext_cap; 63 volatile uint32_t gcr; 64 volatile uint32_t gsr; 65 volatile uint64_t rta; 66 volatile uint64_t ccr; 67 }; 68 69 #define VTD_CAP_SAGAW(cap) (((cap) >> 8) & 0x1F) 70 #define VTD_CAP_ND(cap) ((cap) & 0x7) 71 #define VTD_CAP_CM(cap) (((cap) >> 7) & 0x1) 72 #define VTD_CAP_SPS(cap) (((cap) >> 34) & 0xF) 73 #define VTD_CAP_RWBF(cap) (((cap) >> 4) & 0x1) 74 75 #define VTD_ECAP_DI(ecap) (((ecap) >> 2) & 0x1) 76 #define VTD_ECAP_COHERENCY(ecap) ((ecap) & 0x1) 77 #define VTD_ECAP_IRO(ecap) (((ecap) >> 8) & 0x3FF) 78 79 #define VTD_GCR_WBF (1 << 27) 80 #define VTD_GCR_SRTP (1 << 30) 81 #define VTD_GCR_TE (1U << 31) 82 83 #define VTD_GSR_WBFS (1 << 27) 84 #define VTD_GSR_RTPS (1 << 30) 85 #define VTD_GSR_TES (1U << 31) 86 87 #define VTD_CCR_ICC (1UL << 63) /* invalidate context cache */ 88 #define VTD_CCR_CIRG_GLOBAL (1UL << 61) /* global invalidation */ 89 90 #define VTD_IIR_IVT (1UL << 63) /* invalidation IOTLB */ 91 #define VTD_IIR_IIRG_GLOBAL (1ULL << 60) /* global IOTLB invalidation */ 92 #define VTD_IIR_IIRG_DOMAIN (2ULL << 60) /* domain IOTLB invalidation */ 93 #define VTD_IIR_IIRG_PAGE (3ULL << 60) /* page IOTLB invalidation */ 94 #define VTD_IIR_DRAIN_READS (1ULL << 49) /* drain pending DMA reads */ 95 #define VTD_IIR_DRAIN_WRITES (1ULL << 48) /* drain pending DMA writes */ 96 #define VTD_IIR_DOMAIN_P 32 97 98 #define VTD_ROOT_PRESENT 0x1 99 #define VTD_CTX_PRESENT 0x1 100 #define VTD_CTX_TT_ALL (1UL << 2) 101 102 #define VTD_PTE_RD (1UL << 0) 103 #define VTD_PTE_WR (1UL << 1) 104 #define VTD_PTE_SUPERPAGE (1UL << 7) 105 #define VTD_PTE_ADDR_M (0x000FFFFFFFFFF000UL) 106 107 #define VTD_RID2IDX(rid) (((rid) & 0xff) * 2) 108 109 struct domain { 110 uint64_t *ptp; /* first level page table page */ 111 int pt_levels; /* number of page table levels */ 112 int addrwidth; /* 'AW' field in context entry */ 113 int spsmask; /* supported super page sizes */ 114 uint_t id; /* domain id */ 115 vm_paddr_t maxaddr; /* highest address to be mapped */ 116 SLIST_ENTRY(domain) next; 117 }; 118 119 static SLIST_HEAD(, domain) domhead; 120 121 #define DRHD_MAX_UNITS 8 122 static ACPI_DMAR_HARDWARE_UNIT *drhds[DRHD_MAX_UNITS]; 123 static int drhd_num; 124 static struct vtdmap *vtdmaps[DRHD_MAX_UNITS]; 125 static int max_domains; 126 typedef int (*drhd_ident_func_t)(void); 127 #ifndef __FreeBSD__ 128 static dev_info_t *vtddips[DRHD_MAX_UNITS]; 129 #endif 130 131 static uint64_t root_table[PAGE_SIZE / sizeof (uint64_t)] __aligned(4096); 132 static uint64_t ctx_tables[256][PAGE_SIZE / sizeof (uint64_t)] __aligned(4096); 133 134 static MALLOC_DEFINE(M_VTD, "vtd", "vtd"); 135 136 static int 137 vtd_max_domains(struct vtdmap *vtdmap) 138 { 139 int nd; 140 141 nd = VTD_CAP_ND(vtdmap->cap); 142 143 switch (nd) { 144 case 0: 145 return (16); 146 case 1: 147 return (64); 148 case 2: 149 return (256); 150 case 3: 151 return (1024); 152 case 4: 153 return (4 * 1024); 154 case 5: 155 return (16 * 1024); 156 case 6: 157 return (64 * 1024); 158 default: 159 panic("vtd_max_domains: invalid value of nd (0x%0x)", nd); 160 } 161 } 162 163 static uint_t 164 domain_id(void) 165 { 166 uint_t id; 167 struct domain *dom; 168 169 /* Skip domain id 0 - it is reserved when Caching Mode field is set */ 170 for (id = 1; id < max_domains; id++) { 171 SLIST_FOREACH(dom, &domhead, next) { 172 if (dom->id == id) 173 break; 174 } 175 if (dom == NULL) 176 break; /* found it */ 177 } 178 179 if (id >= max_domains) 180 panic("domain ids exhausted"); 181 182 return (id); 183 } 184 185 static struct vtdmap * 186 vtd_device_scope(uint16_t rid) 187 { 188 int i, remaining, pathrem; 189 char *end, *pathend; 190 struct vtdmap *vtdmap; 191 ACPI_DMAR_HARDWARE_UNIT *drhd; 192 ACPI_DMAR_DEVICE_SCOPE *device_scope; 193 ACPI_DMAR_PCI_PATH *path; 194 195 for (i = 0; i < drhd_num; i++) { 196 drhd = drhds[i]; 197 198 if (VTD_DRHD_INCLUDE_PCI_ALL(drhd->Flags)) { 199 /* 200 * From Intel VT-d arch spec, version 3.0: 201 * If a DRHD structure with INCLUDE_PCI_ALL flag Set is 202 * reported for a Segment, it must be enumerated by BIOS 203 * after all other DRHD structures for the same Segment. 204 */ 205 vtdmap = vtdmaps[i]; 206 return (vtdmap); 207 } 208 209 end = (char *)drhd + drhd->Header.Length; 210 remaining = drhd->Header.Length - 211 sizeof (ACPI_DMAR_HARDWARE_UNIT); 212 while (remaining > sizeof (ACPI_DMAR_DEVICE_SCOPE)) { 213 device_scope = 214 (ACPI_DMAR_DEVICE_SCOPE *)(end - remaining); 215 remaining -= device_scope->Length; 216 217 switch (device_scope->EntryType) { 218 /* 0x01 and 0x02 are PCI device entries */ 219 case 0x01: 220 case 0x02: 221 break; 222 default: 223 continue; 224 } 225 226 if (PCI_RID2BUS(rid) != device_scope->Bus) 227 continue; 228 229 pathend = (char *)device_scope + device_scope->Length; 230 pathrem = device_scope->Length - 231 sizeof (ACPI_DMAR_DEVICE_SCOPE); 232 while (pathrem >= sizeof (ACPI_DMAR_PCI_PATH)) { 233 path = (ACPI_DMAR_PCI_PATH *) 234 (pathend - pathrem); 235 pathrem -= sizeof (ACPI_DMAR_PCI_PATH); 236 237 if (PCI_RID2SLOT(rid) != path->Device) 238 continue; 239 if (PCI_RID2FUNC(rid) != path->Function) 240 continue; 241 242 vtdmap = vtdmaps[i]; 243 return (vtdmap); 244 } 245 } 246 } 247 248 /* No matching scope */ 249 return (NULL); 250 } 251 252 static void 253 vtd_wbflush(struct vtdmap *vtdmap) 254 { 255 256 if (VTD_ECAP_COHERENCY(vtdmap->ext_cap) == 0) 257 invalidate_cache_all(); 258 259 if (VTD_CAP_RWBF(vtdmap->cap)) { 260 vtdmap->gcr = VTD_GCR_WBF; 261 while ((vtdmap->gsr & VTD_GSR_WBFS) != 0) 262 ; 263 } 264 } 265 266 static void 267 vtd_ctx_global_invalidate(struct vtdmap *vtdmap) 268 { 269 270 vtdmap->ccr = VTD_CCR_ICC | VTD_CCR_CIRG_GLOBAL; 271 while ((vtdmap->ccr & VTD_CCR_ICC) != 0) 272 ; 273 } 274 275 static void 276 vtd_iotlb_global_invalidate(struct vtdmap *vtdmap) 277 { 278 int offset; 279 volatile uint64_t *iotlb_reg, val; 280 281 vtd_wbflush(vtdmap); 282 283 offset = VTD_ECAP_IRO(vtdmap->ext_cap) * 16; 284 iotlb_reg = (volatile uint64_t *)((caddr_t)vtdmap + offset + 8); 285 286 *iotlb_reg = VTD_IIR_IVT | VTD_IIR_IIRG_GLOBAL | 287 VTD_IIR_DRAIN_READS | VTD_IIR_DRAIN_WRITES; 288 289 while (1) { 290 val = *iotlb_reg; 291 if ((val & VTD_IIR_IVT) == 0) 292 break; 293 } 294 } 295 296 static void 297 vtd_translation_enable(struct vtdmap *vtdmap) 298 { 299 300 vtdmap->gcr = VTD_GCR_TE; 301 while ((vtdmap->gsr & VTD_GSR_TES) == 0) 302 ; 303 } 304 305 static void 306 vtd_translation_disable(struct vtdmap *vtdmap) 307 { 308 309 vtdmap->gcr = 0; 310 while ((vtdmap->gsr & VTD_GSR_TES) != 0) 311 ; 312 } 313 314 static void * 315 vtd_map(dev_info_t *dip) 316 { 317 caddr_t regs; 318 ddi_acc_handle_t hdl; 319 int error; 320 321 static ddi_device_acc_attr_t regs_attr = { 322 DDI_DEVICE_ATTR_V0, 323 DDI_NEVERSWAP_ACC, 324 DDI_STRICTORDER_ACC, 325 }; 326 327 error = ddi_regs_map_setup(dip, 0, ®s, 0, PAGE_SIZE, ®s_attr, 328 &hdl); 329 330 if (error != DDI_SUCCESS) 331 return (NULL); 332 333 ddi_set_driver_private(dip, hdl); 334 335 return (regs); 336 } 337 338 static void 339 vtd_unmap(dev_info_t *dip) 340 { 341 ddi_acc_handle_t hdl = ddi_get_driver_private(dip); 342 343 if (hdl != NULL) 344 ddi_regs_map_free(&hdl); 345 } 346 347 #ifndef __FreeBSD__ 348 /* 349 * This lives in vtd_sol.c for license reasons. 350 */ 351 extern dev_info_t *vtd_get_dip(ACPI_DMAR_HARDWARE_UNIT *, int); 352 #endif 353 354 static int 355 vtd_init(void) 356 { 357 int i, units, remaining, tmp; 358 struct vtdmap *vtdmap; 359 vm_paddr_t ctx_paddr; 360 char *end; 361 #ifdef __FreeBSD__ 362 char envname[32]; 363 unsigned long mapaddr; 364 #endif 365 ACPI_STATUS status; 366 ACPI_TABLE_DMAR *dmar; 367 ACPI_DMAR_HEADER *hdr; 368 ACPI_DMAR_HARDWARE_UNIT *drhd; 369 370 #ifdef __FreeBSD__ 371 /* 372 * Allow the user to override the ACPI DMAR table by specifying the 373 * physical address of each remapping unit. 374 * 375 * The following example specifies two remapping units at 376 * physical addresses 0xfed90000 and 0xfeda0000 respectively. 377 * set vtd.regmap.0.addr=0xfed90000 378 * set vtd.regmap.1.addr=0xfeda0000 379 */ 380 for (units = 0; units < DRHD_MAX_UNITS; units++) { 381 snprintf(envname, sizeof (envname), "vtd.regmap.%d.addr", 382 units); 383 if (getenv_ulong(envname, &mapaddr) == 0) 384 break; 385 vtdmaps[units] = (struct vtdmap *)PHYS_TO_DMAP(mapaddr); 386 } 387 388 if (units > 0) 389 goto skip_dmar; 390 #else 391 units = 0; 392 #endif 393 /* Search for DMAR table. */ 394 status = AcpiGetTable(ACPI_SIG_DMAR, 0, (ACPI_TABLE_HEADER **)&dmar); 395 if (ACPI_FAILURE(status)) 396 return (ENXIO); 397 398 end = (char *)dmar + dmar->Header.Length; 399 remaining = dmar->Header.Length - sizeof (ACPI_TABLE_DMAR); 400 while (remaining > sizeof (ACPI_DMAR_HEADER)) { 401 hdr = (ACPI_DMAR_HEADER *)(end - remaining); 402 if (hdr->Length > remaining) 403 break; 404 /* 405 * From Intel VT-d arch spec, version 1.3: 406 * BIOS implementations must report mapping structures 407 * in numerical order, i.e. All remapping structures of 408 * type 0 (DRHD) enumerated before remapping structures of 409 * type 1 (RMRR) and so forth. 410 */ 411 if (hdr->Type != ACPI_DMAR_TYPE_HARDWARE_UNIT) 412 break; 413 414 drhd = (ACPI_DMAR_HARDWARE_UNIT *)hdr; 415 drhds[units] = drhd; 416 #ifdef __FreeBSD__ 417 vtdmaps[units] = (struct vtdmap *)PHYS_TO_DMAP(drhd->Address); 418 #else 419 vtddips[units] = vtd_get_dip(drhd, units); 420 vtdmaps[units] = (struct vtdmap *)vtd_map(vtddips[units]); 421 if (vtdmaps[units] == NULL) 422 goto fail; 423 #endif 424 if (++units >= DRHD_MAX_UNITS) 425 break; 426 remaining -= hdr->Length; 427 } 428 429 if (units <= 0) 430 return (ENXIO); 431 432 #ifdef __FreeBSD__ 433 skip_dmar: 434 #endif 435 drhd_num = units; 436 437 max_domains = 64 * 1024; /* maximum valid value */ 438 for (i = 0; i < drhd_num; i++) { 439 vtdmap = vtdmaps[i]; 440 441 if (VTD_CAP_CM(vtdmap->cap) != 0) 442 panic("vtd_init: invalid caching mode"); 443 444 /* take most compatible (minimum) value */ 445 if ((tmp = vtd_max_domains(vtdmap)) < max_domains) 446 max_domains = tmp; 447 } 448 449 /* 450 * Set up the root-table to point to the context-entry tables 451 */ 452 for (i = 0; i < 256; i++) { 453 ctx_paddr = vtophys(ctx_tables[i]); 454 if (ctx_paddr & PAGE_MASK) 455 panic("ctx table (0x%0lx) not page aligned", ctx_paddr); 456 457 root_table[i * 2] = ctx_paddr | VTD_ROOT_PRESENT; 458 } 459 460 return (0); 461 462 #ifndef __FreeBSD__ 463 fail: 464 for (i = 0; i <= units; i++) 465 vtd_unmap(vtddips[i]); 466 return (ENXIO); 467 #endif 468 } 469 470 static void 471 vtd_cleanup(void) 472 { 473 #ifndef __FreeBSD__ 474 int i; 475 476 KASSERT(SLIST_EMPTY(&domhead), ("domain list not empty")); 477 478 bzero(root_table, sizeof (root_table)); 479 480 for (i = 0; i <= drhd_num; i++) { 481 vtdmaps[i] = NULL; 482 /* 483 * Unmap the vtd registers. Note that the devinfo nodes 484 * themselves aren't removed, they are considered system state 485 * and can be reused when the module is reloaded. 486 */ 487 if (vtddips[i] != NULL) 488 vtd_unmap(vtddips[i]); 489 } 490 #endif 491 } 492 493 static void 494 vtd_enable(void) 495 { 496 int i; 497 struct vtdmap *vtdmap; 498 499 for (i = 0; i < drhd_num; i++) { 500 vtdmap = vtdmaps[i]; 501 vtd_wbflush(vtdmap); 502 503 /* Update the root table address */ 504 vtdmap->rta = vtophys(root_table); 505 vtdmap->gcr = VTD_GCR_SRTP; 506 while ((vtdmap->gsr & VTD_GSR_RTPS) == 0) 507 ; 508 509 vtd_ctx_global_invalidate(vtdmap); 510 vtd_iotlb_global_invalidate(vtdmap); 511 512 vtd_translation_enable(vtdmap); 513 } 514 } 515 516 static void 517 vtd_disable(void) 518 { 519 int i; 520 struct vtdmap *vtdmap; 521 522 for (i = 0; i < drhd_num; i++) { 523 vtdmap = vtdmaps[i]; 524 vtd_translation_disable(vtdmap); 525 } 526 } 527 528 static void 529 vtd_add_device(void *arg, uint16_t rid) 530 { 531 int idx; 532 uint64_t *ctxp; 533 struct domain *dom = arg; 534 vm_paddr_t pt_paddr; 535 struct vtdmap *vtdmap; 536 uint8_t bus; 537 538 bus = PCI_RID2BUS(rid); 539 ctxp = ctx_tables[bus]; 540 pt_paddr = vtophys(dom->ptp); 541 idx = VTD_RID2IDX(rid); 542 543 if (ctxp[idx] & VTD_CTX_PRESENT) { 544 panic("vtd_add_device: device %x is already owned by " 545 "domain %d", rid, (uint16_t)(ctxp[idx + 1] >> 8)); 546 } 547 548 if ((vtdmap = vtd_device_scope(rid)) == NULL) 549 panic("vtd_add_device: device %x is not in scope for " 550 "any DMA remapping unit", rid); 551 552 /* 553 * Order is important. The 'present' bit is set only after all fields 554 * of the context pointer are initialized. 555 */ 556 ctxp[idx + 1] = dom->addrwidth | (dom->id << 8); 557 558 if (VTD_ECAP_DI(vtdmap->ext_cap)) 559 ctxp[idx] = VTD_CTX_TT_ALL; 560 else 561 ctxp[idx] = 0; 562 563 ctxp[idx] |= pt_paddr | VTD_CTX_PRESENT; 564 565 /* 566 * 'Not Present' entries are not cached in either the Context Cache 567 * or in the IOTLB, so there is no need to invalidate either of them. 568 */ 569 } 570 571 static void 572 vtd_remove_device(void *arg, uint16_t rid) 573 { 574 int i, idx; 575 uint64_t *ctxp; 576 struct vtdmap *vtdmap; 577 uint8_t bus; 578 579 bus = PCI_RID2BUS(rid); 580 ctxp = ctx_tables[bus]; 581 idx = VTD_RID2IDX(rid); 582 583 /* 584 * Order is important. The 'present' bit is must be cleared first. 585 */ 586 ctxp[idx] = 0; 587 ctxp[idx + 1] = 0; 588 589 /* 590 * Invalidate the Context Cache and the IOTLB. 591 * 592 * XXX use device-selective invalidation for Context Cache 593 * XXX use domain-selective invalidation for IOTLB 594 */ 595 for (i = 0; i < drhd_num; i++) { 596 vtdmap = vtdmaps[i]; 597 vtd_ctx_global_invalidate(vtdmap); 598 vtd_iotlb_global_invalidate(vtdmap); 599 } 600 } 601 602 #define CREATE_MAPPING 0 603 #define REMOVE_MAPPING 1 604 605 static uint64_t 606 vtd_update_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len, 607 int remove) 608 { 609 struct domain *dom; 610 int i, spshift, ptpshift, ptpindex, nlevels; 611 uint64_t spsize, *ptp; 612 613 dom = arg; 614 ptpindex = 0; 615 ptpshift = 0; 616 617 KASSERT(gpa + len > gpa, ("%s: invalid gpa range %lx/%lx", __func__, 618 gpa, len)); 619 KASSERT(gpa + len <= dom->maxaddr, ("%s: gpa range %lx/%lx beyond " 620 "domain maxaddr %lx", __func__, gpa, len, dom->maxaddr)); 621 622 if (gpa & PAGE_MASK) 623 panic("vtd_create_mapping: unaligned gpa 0x%0lx", gpa); 624 625 if (hpa & PAGE_MASK) 626 panic("vtd_create_mapping: unaligned hpa 0x%0lx", hpa); 627 628 if (len & PAGE_MASK) 629 panic("vtd_create_mapping: unaligned len 0x%0lx", len); 630 631 /* 632 * Compute the size of the mapping that we can accommodate. 633 * 634 * This is based on three factors: 635 * - supported super page size 636 * - alignment of the region starting at 'gpa' and 'hpa' 637 * - length of the region 'len' 638 */ 639 spshift = 48; 640 for (i = 3; i >= 0; i--) { 641 spsize = 1UL << spshift; 642 if ((dom->spsmask & (1 << i)) != 0 && 643 (gpa & (spsize - 1)) == 0 && 644 (hpa & (spsize - 1)) == 0 && 645 (len >= spsize)) { 646 break; 647 } 648 spshift -= 9; 649 } 650 651 ptp = dom->ptp; 652 nlevels = dom->pt_levels; 653 while (--nlevels >= 0) { 654 ptpshift = 12 + nlevels * 9; 655 ptpindex = (gpa >> ptpshift) & 0x1FF; 656 657 /* We have reached the leaf mapping */ 658 if (spshift >= ptpshift) { 659 break; 660 } 661 662 /* 663 * We are working on a non-leaf page table page. 664 * 665 * Create a downstream page table page if necessary and point 666 * to it from the current page table. 667 */ 668 if (ptp[ptpindex] == 0) { 669 void *nlp = malloc(PAGE_SIZE, M_VTD, M_WAITOK | M_ZERO); 670 ptp[ptpindex] = vtophys(nlp)| VTD_PTE_RD | VTD_PTE_WR; 671 } 672 673 ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & VTD_PTE_ADDR_M); 674 } 675 676 if ((gpa & ((1UL << ptpshift) - 1)) != 0) 677 panic("gpa 0x%lx and ptpshift %d mismatch", gpa, ptpshift); 678 679 /* 680 * Update the 'gpa' -> 'hpa' mapping 681 */ 682 if (remove) { 683 ptp[ptpindex] = 0; 684 } else { 685 ptp[ptpindex] = hpa | VTD_PTE_RD | VTD_PTE_WR; 686 687 if (nlevels > 0) 688 ptp[ptpindex] |= VTD_PTE_SUPERPAGE; 689 } 690 691 return (1UL << ptpshift); 692 } 693 694 static uint64_t 695 vtd_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len) 696 { 697 698 return (vtd_update_mapping(arg, gpa, hpa, len, CREATE_MAPPING)); 699 } 700 701 static uint64_t 702 vtd_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len) 703 { 704 705 return (vtd_update_mapping(arg, gpa, 0, len, REMOVE_MAPPING)); 706 } 707 708 static void 709 vtd_invalidate_tlb(void *dom) 710 { 711 int i; 712 struct vtdmap *vtdmap; 713 714 /* 715 * Invalidate the IOTLB. 716 * XXX use domain-selective invalidation for IOTLB 717 */ 718 for (i = 0; i < drhd_num; i++) { 719 vtdmap = vtdmaps[i]; 720 vtd_iotlb_global_invalidate(vtdmap); 721 } 722 } 723 724 static void * 725 vtd_create_domain(vm_paddr_t maxaddr) 726 { 727 struct domain *dom; 728 vm_paddr_t addr; 729 int tmp, i, gaw, agaw, sagaw, res, pt_levels, addrwidth; 730 struct vtdmap *vtdmap; 731 732 if (drhd_num <= 0) 733 panic("vtd_create_domain: no dma remapping hardware available"); 734 735 /* 736 * Calculate AGAW. 737 * Section 3.4.2 "Adjusted Guest Address Width", Architecture Spec. 738 */ 739 addr = 0; 740 for (gaw = 0; addr < maxaddr; gaw++) 741 addr = 1ULL << gaw; 742 743 res = (gaw - 12) % 9; 744 if (res == 0) 745 agaw = gaw; 746 else 747 agaw = gaw + 9 - res; 748 749 if (agaw > 64) 750 agaw = 64; 751 752 /* 753 * Select the smallest Supported AGAW and the corresponding number 754 * of page table levels. 755 */ 756 pt_levels = 2; 757 sagaw = 30; 758 addrwidth = 0; 759 760 tmp = ~0; 761 for (i = 0; i < drhd_num; i++) { 762 vtdmap = vtdmaps[i]; 763 /* take most compatible value */ 764 tmp &= VTD_CAP_SAGAW(vtdmap->cap); 765 } 766 767 for (i = 0; i < 5; i++) { 768 if ((tmp & (1 << i)) != 0 && sagaw >= agaw) 769 break; 770 pt_levels++; 771 addrwidth++; 772 sagaw += 9; 773 if (sagaw > 64) 774 sagaw = 64; 775 } 776 777 if (i >= 5) { 778 panic("vtd_create_domain: SAGAW 0x%x does not support AGAW %d", 779 tmp, agaw); 780 } 781 782 dom = malloc(sizeof (struct domain), M_VTD, M_ZERO | M_WAITOK); 783 dom->pt_levels = pt_levels; 784 dom->addrwidth = addrwidth; 785 dom->id = domain_id(); 786 dom->maxaddr = maxaddr; 787 dom->ptp = malloc(PAGE_SIZE, M_VTD, M_ZERO | M_WAITOK); 788 if ((uintptr_t)dom->ptp & PAGE_MASK) 789 panic("vtd_create_domain: ptp (%p) not page aligned", dom->ptp); 790 791 #ifdef __FreeBSD__ 792 #ifdef notyet 793 /* 794 * XXX superpage mappings for the iommu do not work correctly. 795 * 796 * By default all physical memory is mapped into the host_domain. 797 * When a VM is allocated wired memory the pages belonging to it 798 * are removed from the host_domain and added to the vm's domain. 799 * 800 * If the page being removed was mapped using a superpage mapping 801 * in the host_domain then we need to demote the mapping before 802 * removing the page. 803 * 804 * There is not any code to deal with the demotion at the moment 805 * so we disable superpage mappings altogether. 806 */ 807 dom->spsmask = ~0; 808 for (i = 0; i < drhd_num; i++) { 809 vtdmap = vtdmaps[i]; 810 /* take most compatible value */ 811 dom->spsmask &= VTD_CAP_SPS(vtdmap->cap); 812 } 813 #endif 814 #else 815 /* 816 * On illumos we decidedly do not remove memory mapped to a VM's domain 817 * from the host_domain, so we don't have to deal with page demotion and 818 * can just use large pages. 819 * 820 * Since VM memory is currently allocated as 4k pages and mapped into 821 * the VM domain page by page, the use of large pages is essentially 822 * limited to the host_domain. 823 */ 824 dom->spsmask = VTD_CAP_SPS(vtdmap->cap); 825 #endif 826 827 SLIST_INSERT_HEAD(&domhead, dom, next); 828 829 return (dom); 830 } 831 832 static void 833 vtd_free_ptp(uint64_t *ptp, int level) 834 { 835 int i; 836 uint64_t *nlp; 837 838 if (level > 1) { 839 for (i = 0; i < 512; i++) { 840 if ((ptp[i] & (VTD_PTE_RD | VTD_PTE_WR)) == 0) 841 continue; 842 if ((ptp[i] & VTD_PTE_SUPERPAGE) != 0) 843 continue; 844 nlp = (uint64_t *)PHYS_TO_DMAP(ptp[i] & VTD_PTE_ADDR_M); 845 vtd_free_ptp(nlp, level - 1); 846 } 847 } 848 849 bzero(ptp, PAGE_SIZE); 850 free(ptp, M_VTD); 851 } 852 853 static void 854 vtd_destroy_domain(void *arg) 855 { 856 struct domain *dom; 857 858 dom = arg; 859 860 SLIST_REMOVE(&domhead, dom, domain, next); 861 vtd_free_ptp(dom->ptp, dom->pt_levels); 862 free(dom, M_VTD); 863 } 864 865 const struct iommu_ops iommu_ops_intel = { 866 .init = vtd_init, 867 .cleanup = vtd_cleanup, 868 .enable = vtd_enable, 869 .disable = vtd_disable, 870 .create_domain = vtd_create_domain, 871 .destroy_domain = vtd_destroy_domain, 872 .create_mapping = vtd_create_mapping, 873 .remove_mapping = vtd_remove_mapping, 874 .add_device = vtd_add_device, 875 .remove_device = vtd_remove_device, 876 .invalidate_tlb = vtd_invalidate_tlb, 877 }; 878