1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/linker_set.h> 36 37 #include <ctype.h> 38 #include <err.h> 39 #include <errno.h> 40 #include <pthread.h> 41 #include <stdio.h> 42 #include <stdlib.h> 43 #include <string.h> 44 #include <strings.h> 45 #include <assert.h> 46 #include <stdbool.h> 47 #include <sysexits.h> 48 49 #include <machine/vmm.h> 50 #include <machine/vmm_snapshot.h> 51 #include <vmmapi.h> 52 53 #include "acpi.h" 54 #include "bhyverun.h" 55 #include "config.h" 56 #include "debug.h" 57 #include "inout.h" 58 #include "ioapic.h" 59 #include "mem.h" 60 #include "pci_emul.h" 61 #include "pci_irq.h" 62 #include "pci_lpc.h" 63 64 #define CONF1_ADDR_PORT 0x0cf8 65 #define CONF1_DATA_PORT 0x0cfc 66 67 #define CONF1_ENABLE 0x80000000ul 68 69 #define MAXBUSES (PCI_BUSMAX + 1) 70 #define MAXSLOTS (PCI_SLOTMAX + 1) 71 #define MAXFUNCS (PCI_FUNCMAX + 1) 72 73 #define GB (1024 * 1024 * 1024UL) 74 75 struct funcinfo { 76 nvlist_t *fi_config; 77 struct pci_devemu *fi_pde; 78 struct pci_devinst *fi_devi; 79 }; 80 81 struct intxinfo { 82 int ii_count; 83 int ii_pirq_pin; 84 int ii_ioapic_irq; 85 }; 86 87 struct slotinfo { 88 struct intxinfo si_intpins[4]; 89 struct funcinfo si_funcs[MAXFUNCS]; 90 }; 91 92 struct businfo { 93 uint16_t iobase, iolimit; /* I/O window */ 94 uint32_t membase32, memlimit32; /* mmio window below 4GB */ 95 uint64_t membase64, memlimit64; /* mmio window above 4GB */ 96 struct slotinfo slotinfo[MAXSLOTS]; 97 }; 98 99 static struct businfo *pci_businfo[MAXBUSES]; 100 101 SET_DECLARE(pci_devemu_set, struct pci_devemu); 102 103 static uint64_t pci_emul_iobase; 104 static uint64_t pci_emul_membase32; 105 static uint64_t pci_emul_membase64; 106 static uint64_t pci_emul_memlim64; 107 108 struct pci_bar_allocation { 109 TAILQ_ENTRY(pci_bar_allocation) chain; 110 struct pci_devinst *pdi; 111 int idx; 112 enum pcibar_type type; 113 uint64_t size; 114 }; 115 TAILQ_HEAD(pci_bar_list, pci_bar_allocation) pci_bars = TAILQ_HEAD_INITIALIZER( 116 pci_bars); 117 118 #define PCI_EMUL_IOBASE 0x2000 119 #define PCI_EMUL_IOLIMIT 0x10000 120 121 #define PCI_EMUL_ECFG_BASE 0xE0000000 /* 3.5GB */ 122 #define PCI_EMUL_ECFG_SIZE (MAXBUSES * 1024 * 1024) /* 1MB per bus */ 123 SYSRES_MEM(PCI_EMUL_ECFG_BASE, PCI_EMUL_ECFG_SIZE); 124 125 /* 126 * OVMF always uses 0xC0000000 as base address for 32 bit PCI MMIO. Don't 127 * change this address without changing it in OVMF. 128 */ 129 #define PCI_EMUL_MEMBASE32 0xC0000000 130 #define PCI_EMUL_MEMLIMIT32 PCI_EMUL_ECFG_BASE 131 #define PCI_EMUL_MEMSIZE64 (32*GB) 132 133 static struct pci_devemu *pci_emul_finddev(const char *name); 134 static void pci_lintr_route(struct pci_devinst *pi); 135 static void pci_lintr_update(struct pci_devinst *pi); 136 static void pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, 137 int func, int coff, int bytes, uint32_t *val); 138 139 static __inline void 140 CFGWRITE(struct pci_devinst *pi, int coff, uint32_t val, int bytes) 141 { 142 143 if (bytes == 1) 144 pci_set_cfgdata8(pi, coff, val); 145 else if (bytes == 2) 146 pci_set_cfgdata16(pi, coff, val); 147 else 148 pci_set_cfgdata32(pi, coff, val); 149 } 150 151 static __inline uint32_t 152 CFGREAD(struct pci_devinst *pi, int coff, int bytes) 153 { 154 155 if (bytes == 1) 156 return (pci_get_cfgdata8(pi, coff)); 157 else if (bytes == 2) 158 return (pci_get_cfgdata16(pi, coff)); 159 else 160 return (pci_get_cfgdata32(pi, coff)); 161 } 162 163 /* 164 * I/O access 165 */ 166 167 /* 168 * Slot options are in the form: 169 * 170 * <bus>:<slot>:<func>,<emul>[,<config>] 171 * <slot>[:<func>],<emul>[,<config>] 172 * 173 * slot is 0..31 174 * func is 0..7 175 * emul is a string describing the type of PCI device e.g. virtio-net 176 * config is an optional string, depending on the device, that can be 177 * used for configuration. 178 * Examples are: 179 * 1,virtio-net,tap0 180 * 3:0,dummy 181 */ 182 static void 183 pci_parse_slot_usage(char *aopt) 184 { 185 186 EPRINTLN("Invalid PCI slot info field \"%s\"", aopt); 187 } 188 189 /* 190 * Helper function to parse a list of comma-separated options where 191 * each option is formatted as "name[=value]". If no value is 192 * provided, the option is treated as a boolean and is given a value 193 * of true. 194 */ 195 int 196 pci_parse_legacy_config(nvlist_t *nvl, const char *opt) 197 { 198 char *config, *name, *tofree, *value; 199 200 if (opt == NULL) 201 return (0); 202 203 config = tofree = strdup(opt); 204 while ((name = strsep(&config, ",")) != NULL) { 205 value = strchr(name, '='); 206 if (value != NULL) { 207 *value = '\0'; 208 value++; 209 set_config_value_node(nvl, name, value); 210 } else 211 set_config_bool_node(nvl, name, true); 212 } 213 free(tofree); 214 return (0); 215 } 216 217 /* 218 * PCI device configuration is stored in MIBs that encode the device's 219 * location: 220 * 221 * pci.<bus>.<slot>.<func> 222 * 223 * Where "bus", "slot", and "func" are all decimal values without 224 * leading zeroes. Each valid device must have a "device" node which 225 * identifies the driver model of the device. 226 * 227 * Device backends can provide a parser for the "config" string. If 228 * a custom parser is not provided, pci_parse_legacy_config() is used 229 * to parse the string. 230 */ 231 int 232 pci_parse_slot(char *opt) 233 { 234 char node_name[sizeof("pci.XXX.XX.X")]; 235 struct pci_devemu *pde; 236 char *emul, *config, *str, *cp; 237 int error, bnum, snum, fnum; 238 nvlist_t *nvl; 239 240 error = -1; 241 str = strdup(opt); 242 243 emul = config = NULL; 244 if ((cp = strchr(str, ',')) != NULL) { 245 *cp = '\0'; 246 emul = cp + 1; 247 if ((cp = strchr(emul, ',')) != NULL) { 248 *cp = '\0'; 249 config = cp + 1; 250 } 251 } else { 252 pci_parse_slot_usage(opt); 253 goto done; 254 } 255 256 /* <bus>:<slot>:<func> */ 257 if (sscanf(str, "%d:%d:%d", &bnum, &snum, &fnum) != 3) { 258 bnum = 0; 259 /* <slot>:<func> */ 260 if (sscanf(str, "%d:%d", &snum, &fnum) != 2) { 261 fnum = 0; 262 /* <slot> */ 263 if (sscanf(str, "%d", &snum) != 1) { 264 snum = -1; 265 } 266 } 267 } 268 269 if (bnum < 0 || bnum >= MAXBUSES || snum < 0 || snum >= MAXSLOTS || 270 fnum < 0 || fnum >= MAXFUNCS) { 271 pci_parse_slot_usage(opt); 272 goto done; 273 } 274 275 pde = pci_emul_finddev(emul); 276 if (pde == NULL) { 277 EPRINTLN("pci slot %d:%d:%d: unknown device \"%s\"", bnum, snum, 278 fnum, emul); 279 goto done; 280 } 281 282 snprintf(node_name, sizeof(node_name), "pci.%d.%d.%d", bnum, snum, 283 fnum); 284 nvl = find_config_node(node_name); 285 if (nvl != NULL) { 286 EPRINTLN("pci slot %d:%d:%d already occupied!", bnum, snum, 287 fnum); 288 goto done; 289 } 290 nvl = create_config_node(node_name); 291 if (pde->pe_alias != NULL) 292 set_config_value_node(nvl, "device", pde->pe_alias); 293 else 294 set_config_value_node(nvl, "device", pde->pe_emu); 295 296 if (pde->pe_legacy_config != NULL) 297 error = pde->pe_legacy_config(nvl, config); 298 else 299 error = pci_parse_legacy_config(nvl, config); 300 done: 301 free(str); 302 return (error); 303 } 304 305 void 306 pci_print_supported_devices() 307 { 308 struct pci_devemu **pdpp, *pdp; 309 310 SET_FOREACH(pdpp, pci_devemu_set) { 311 pdp = *pdpp; 312 printf("%s\n", pdp->pe_emu); 313 } 314 } 315 316 static int 317 pci_valid_pba_offset(struct pci_devinst *pi, uint64_t offset) 318 { 319 320 if (offset < pi->pi_msix.pba_offset) 321 return (0); 322 323 if (offset >= pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { 324 return (0); 325 } 326 327 return (1); 328 } 329 330 int 331 pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size, 332 uint64_t value) 333 { 334 int msix_entry_offset; 335 int tab_index; 336 char *dest; 337 338 /* support only 4 or 8 byte writes */ 339 if (size != 4 && size != 8) 340 return (-1); 341 342 /* 343 * Return if table index is beyond what device supports 344 */ 345 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 346 if (tab_index >= pi->pi_msix.table_count) 347 return (-1); 348 349 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 350 351 /* support only aligned writes */ 352 if ((msix_entry_offset % size) != 0) 353 return (-1); 354 355 dest = (char *)(pi->pi_msix.table + tab_index); 356 dest += msix_entry_offset; 357 358 if (size == 4) 359 *((uint32_t *)dest) = value; 360 else 361 *((uint64_t *)dest) = value; 362 363 return (0); 364 } 365 366 uint64_t 367 pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size) 368 { 369 char *dest; 370 int msix_entry_offset; 371 int tab_index; 372 uint64_t retval = ~0; 373 374 /* 375 * The PCI standard only allows 4 and 8 byte accesses to the MSI-X 376 * table but we also allow 1 byte access to accommodate reads from 377 * ddb. 378 */ 379 if (size != 1 && size != 4 && size != 8) 380 return (retval); 381 382 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 383 384 /* support only aligned reads */ 385 if ((msix_entry_offset % size) != 0) { 386 return (retval); 387 } 388 389 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 390 391 if (tab_index < pi->pi_msix.table_count) { 392 /* valid MSI-X Table access */ 393 dest = (char *)(pi->pi_msix.table + tab_index); 394 dest += msix_entry_offset; 395 396 if (size == 1) 397 retval = *((uint8_t *)dest); 398 else if (size == 4) 399 retval = *((uint32_t *)dest); 400 else 401 retval = *((uint64_t *)dest); 402 } else if (pci_valid_pba_offset(pi, offset)) { 403 /* return 0 for PBA access */ 404 retval = 0; 405 } 406 407 return (retval); 408 } 409 410 int 411 pci_msix_table_bar(struct pci_devinst *pi) 412 { 413 414 if (pi->pi_msix.table != NULL) 415 return (pi->pi_msix.table_bar); 416 else 417 return (-1); 418 } 419 420 int 421 pci_msix_pba_bar(struct pci_devinst *pi) 422 { 423 424 if (pi->pi_msix.table != NULL) 425 return (pi->pi_msix.pba_bar); 426 else 427 return (-1); 428 } 429 430 static int 431 pci_emul_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 432 uint32_t *eax, void *arg) 433 { 434 struct pci_devinst *pdi = arg; 435 struct pci_devemu *pe = pdi->pi_d; 436 uint64_t offset; 437 int i; 438 439 for (i = 0; i <= PCI_BARMAX; i++) { 440 if (pdi->pi_bar[i].type == PCIBAR_IO && 441 port >= pdi->pi_bar[i].addr && 442 port + bytes <= pdi->pi_bar[i].addr + pdi->pi_bar[i].size) { 443 offset = port - pdi->pi_bar[i].addr; 444 if (in) 445 *eax = (*pe->pe_barread)(ctx, vcpu, pdi, i, 446 offset, bytes); 447 else 448 (*pe->pe_barwrite)(ctx, vcpu, pdi, i, offset, 449 bytes, *eax); 450 return (0); 451 } 452 } 453 return (-1); 454 } 455 456 static int 457 pci_emul_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 458 int size, uint64_t *val, void *arg1, long arg2) 459 { 460 struct pci_devinst *pdi = arg1; 461 struct pci_devemu *pe = pdi->pi_d; 462 uint64_t offset; 463 int bidx = (int) arg2; 464 465 assert(bidx <= PCI_BARMAX); 466 assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 || 467 pdi->pi_bar[bidx].type == PCIBAR_MEM64); 468 assert(addr >= pdi->pi_bar[bidx].addr && 469 addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size); 470 471 offset = addr - pdi->pi_bar[bidx].addr; 472 473 if (dir == MEM_F_WRITE) { 474 if (size == 8) { 475 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, 476 4, *val & 0xffffffff); 477 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset + 4, 478 4, *val >> 32); 479 } else { 480 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, 481 size, *val); 482 } 483 } else { 484 if (size == 8) { 485 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 486 offset, 4); 487 *val |= (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 488 offset + 4, 4) << 32; 489 } else { 490 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 491 offset, size); 492 } 493 } 494 495 return (0); 496 } 497 498 499 static int 500 pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size, 501 uint64_t *addr) 502 { 503 uint64_t base; 504 505 assert((size & (size - 1)) == 0); /* must be a power of 2 */ 506 507 base = roundup2(*baseptr, size); 508 509 if (base + size <= limit) { 510 *addr = base; 511 *baseptr = base + size; 512 return (0); 513 } else 514 return (-1); 515 } 516 517 /* 518 * Register (or unregister) the MMIO or I/O region associated with the BAR 519 * register 'idx' of an emulated pci device. 520 */ 521 static void 522 modify_bar_registration(struct pci_devinst *pi, int idx, int registration) 523 { 524 struct pci_devemu *pe; 525 int error; 526 struct inout_port iop; 527 struct mem_range mr; 528 529 pe = pi->pi_d; 530 switch (pi->pi_bar[idx].type) { 531 case PCIBAR_IO: 532 bzero(&iop, sizeof(struct inout_port)); 533 iop.name = pi->pi_name; 534 iop.port = pi->pi_bar[idx].addr; 535 iop.size = pi->pi_bar[idx].size; 536 if (registration) { 537 iop.flags = IOPORT_F_INOUT; 538 iop.handler = pci_emul_io_handler; 539 iop.arg = pi; 540 error = register_inout(&iop); 541 } else 542 error = unregister_inout(&iop); 543 if (pe->pe_baraddr != NULL) 544 (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration, 545 pi->pi_bar[idx].addr); 546 break; 547 case PCIBAR_MEM32: 548 case PCIBAR_MEM64: 549 bzero(&mr, sizeof(struct mem_range)); 550 mr.name = pi->pi_name; 551 mr.base = pi->pi_bar[idx].addr; 552 mr.size = pi->pi_bar[idx].size; 553 if (registration) { 554 mr.flags = MEM_F_RW; 555 mr.handler = pci_emul_mem_handler; 556 mr.arg1 = pi; 557 mr.arg2 = idx; 558 error = register_mem(&mr); 559 } else 560 error = unregister_mem(&mr); 561 if (pe->pe_baraddr != NULL) 562 (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration, 563 pi->pi_bar[idx].addr); 564 break; 565 default: 566 error = EINVAL; 567 break; 568 } 569 assert(error == 0); 570 } 571 572 static void 573 unregister_bar(struct pci_devinst *pi, int idx) 574 { 575 576 modify_bar_registration(pi, idx, 0); 577 } 578 579 static void 580 register_bar(struct pci_devinst *pi, int idx) 581 { 582 583 modify_bar_registration(pi, idx, 1); 584 } 585 586 /* Are we decoding i/o port accesses for the emulated pci device? */ 587 static int 588 porten(struct pci_devinst *pi) 589 { 590 uint16_t cmd; 591 592 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 593 594 return (cmd & PCIM_CMD_PORTEN); 595 } 596 597 /* Are we decoding memory accesses for the emulated pci device? */ 598 static int 599 memen(struct pci_devinst *pi) 600 { 601 uint16_t cmd; 602 603 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 604 605 return (cmd & PCIM_CMD_MEMEN); 606 } 607 608 /* 609 * Update the MMIO or I/O address that is decoded by the BAR register. 610 * 611 * If the pci device has enabled the address space decoding then intercept 612 * the address range decoded by the BAR register. 613 */ 614 static void 615 update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type) 616 { 617 int decode; 618 619 if (pi->pi_bar[idx].type == PCIBAR_IO) 620 decode = porten(pi); 621 else 622 decode = memen(pi); 623 624 if (decode) 625 unregister_bar(pi, idx); 626 627 switch (type) { 628 case PCIBAR_IO: 629 case PCIBAR_MEM32: 630 pi->pi_bar[idx].addr = addr; 631 break; 632 case PCIBAR_MEM64: 633 pi->pi_bar[idx].addr &= ~0xffffffffUL; 634 pi->pi_bar[idx].addr |= addr; 635 break; 636 case PCIBAR_MEMHI64: 637 pi->pi_bar[idx].addr &= 0xffffffff; 638 pi->pi_bar[idx].addr |= addr; 639 break; 640 default: 641 assert(0); 642 } 643 644 if (decode) 645 register_bar(pi, idx); 646 } 647 648 int 649 pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, 650 uint64_t size) 651 { 652 assert(idx >= 0 && idx <= PCI_BARMAX); 653 654 if ((size & (size - 1)) != 0) 655 size = 1UL << flsl(size); /* round up to a power of 2 */ 656 657 /* Enforce minimum BAR sizes required by the PCI standard */ 658 if (type == PCIBAR_IO) { 659 if (size < 4) 660 size = 4; 661 } else { 662 if (size < 16) 663 size = 16; 664 } 665 666 /* 667 * To reduce fragmentation of the MMIO space, we allocate the BARs by 668 * size. Therefore, don't allocate the BAR yet. We create a list of all 669 * BAR allocation which is sorted by BAR size. When all PCI devices are 670 * initialized, we will assign an address to the BARs. 671 */ 672 673 /* create a new list entry */ 674 struct pci_bar_allocation *const new_bar = malloc(sizeof(*new_bar)); 675 memset(new_bar, 0, sizeof(*new_bar)); 676 new_bar->pdi = pdi; 677 new_bar->idx = idx; 678 new_bar->type = type; 679 new_bar->size = size; 680 681 /* 682 * Search for a BAR which size is lower than the size of our newly 683 * allocated BAR. 684 */ 685 struct pci_bar_allocation *bar = NULL; 686 TAILQ_FOREACH(bar, &pci_bars, chain) { 687 if (bar->size < size) { 688 break; 689 } 690 } 691 692 if (bar == NULL) { 693 /* 694 * Either the list is empty or new BAR is the smallest BAR of 695 * the list. Append it to the end of our list. 696 */ 697 TAILQ_INSERT_TAIL(&pci_bars, new_bar, chain); 698 } else { 699 /* 700 * The found BAR is smaller than our new BAR. For that reason, 701 * insert our new BAR before the found BAR. 702 */ 703 TAILQ_INSERT_BEFORE(bar, new_bar, chain); 704 } 705 706 /* 707 * pci_passthru devices synchronize their physical and virtual command 708 * register on init. For that reason, the virtual cmd reg should be 709 * updated as early as possible. 710 */ 711 uint16_t enbit = 0; 712 switch (type) { 713 case PCIBAR_IO: 714 enbit = PCIM_CMD_PORTEN; 715 break; 716 case PCIBAR_MEM64: 717 case PCIBAR_MEM32: 718 enbit = PCIM_CMD_MEMEN; 719 break; 720 default: 721 enbit = 0; 722 break; 723 } 724 725 const uint16_t cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND); 726 pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit); 727 728 return (0); 729 } 730 731 static int 732 pci_emul_assign_bar(struct pci_devinst *const pdi, const int idx, 733 const enum pcibar_type type, const uint64_t size) 734 { 735 int error; 736 uint64_t *baseptr, limit, addr, mask, lobits, bar; 737 738 switch (type) { 739 case PCIBAR_NONE: 740 baseptr = NULL; 741 addr = mask = lobits = 0; 742 break; 743 case PCIBAR_IO: 744 baseptr = &pci_emul_iobase; 745 limit = PCI_EMUL_IOLIMIT; 746 mask = PCIM_BAR_IO_BASE; 747 lobits = PCIM_BAR_IO_SPACE; 748 break; 749 case PCIBAR_MEM64: 750 /* 751 * XXX 752 * Some drivers do not work well if the 64-bit BAR is allocated 753 * above 4GB. Allow for this by allocating small requests under 754 * 4GB unless then allocation size is larger than some arbitrary 755 * number (128MB currently). 756 */ 757 if (size > 128 * 1024 * 1024) { 758 baseptr = &pci_emul_membase64; 759 limit = pci_emul_memlim64; 760 mask = PCIM_BAR_MEM_BASE; 761 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | 762 PCIM_BAR_MEM_PREFETCH; 763 } else { 764 baseptr = &pci_emul_membase32; 765 limit = PCI_EMUL_MEMLIMIT32; 766 mask = PCIM_BAR_MEM_BASE; 767 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64; 768 } 769 break; 770 case PCIBAR_MEM32: 771 baseptr = &pci_emul_membase32; 772 limit = PCI_EMUL_MEMLIMIT32; 773 mask = PCIM_BAR_MEM_BASE; 774 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; 775 break; 776 default: 777 printf("pci_emul_alloc_base: invalid bar type %d\n", type); 778 assert(0); 779 } 780 781 if (baseptr != NULL) { 782 error = pci_emul_alloc_resource(baseptr, limit, size, &addr); 783 if (error != 0) 784 return (error); 785 } 786 787 pdi->pi_bar[idx].type = type; 788 pdi->pi_bar[idx].addr = addr; 789 pdi->pi_bar[idx].size = size; 790 /* 791 * passthru devices are using same lobits as physical device they set 792 * this property 793 */ 794 if (pdi->pi_bar[idx].lobits != 0) { 795 lobits = pdi->pi_bar[idx].lobits; 796 } else { 797 pdi->pi_bar[idx].lobits = lobits; 798 } 799 800 /* Initialize the BAR register in config space */ 801 bar = (addr & mask) | lobits; 802 pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar); 803 804 if (type == PCIBAR_MEM64) { 805 assert(idx + 1 <= PCI_BARMAX); 806 pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64; 807 pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32); 808 } 809 810 register_bar(pdi, idx); 811 812 return (0); 813 } 814 815 #define CAP_START_OFFSET 0x40 816 static int 817 pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen) 818 { 819 int i, capoff, reallen; 820 uint16_t sts; 821 822 assert(caplen > 0); 823 824 reallen = roundup2(caplen, 4); /* dword aligned */ 825 826 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 827 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) 828 capoff = CAP_START_OFFSET; 829 else 830 capoff = pi->pi_capend + 1; 831 832 /* Check if we have enough space */ 833 if (capoff + reallen > PCI_REGMAX + 1) 834 return (-1); 835 836 /* Set the previous capability pointer */ 837 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) { 838 pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff); 839 pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT); 840 } else 841 pci_set_cfgdata8(pi, pi->pi_prevcap + 1, capoff); 842 843 /* Copy the capability */ 844 for (i = 0; i < caplen; i++) 845 pci_set_cfgdata8(pi, capoff + i, capdata[i]); 846 847 /* Set the next capability pointer */ 848 pci_set_cfgdata8(pi, capoff + 1, 0); 849 850 pi->pi_prevcap = capoff; 851 pi->pi_capend = capoff + reallen - 1; 852 return (0); 853 } 854 855 static struct pci_devemu * 856 pci_emul_finddev(const char *name) 857 { 858 struct pci_devemu **pdpp, *pdp; 859 860 SET_FOREACH(pdpp, pci_devemu_set) { 861 pdp = *pdpp; 862 if (!strcmp(pdp->pe_emu, name)) { 863 return (pdp); 864 } 865 } 866 867 return (NULL); 868 } 869 870 static int 871 pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot, 872 int func, struct funcinfo *fi) 873 { 874 struct pci_devinst *pdi; 875 int err; 876 877 pdi = calloc(1, sizeof(struct pci_devinst)); 878 879 pdi->pi_vmctx = ctx; 880 pdi->pi_bus = bus; 881 pdi->pi_slot = slot; 882 pdi->pi_func = func; 883 pthread_mutex_init(&pdi->pi_lintr.lock, NULL); 884 pdi->pi_lintr.pin = 0; 885 pdi->pi_lintr.state = IDLE; 886 pdi->pi_lintr.pirq_pin = 0; 887 pdi->pi_lintr.ioapic_irq = 0; 888 pdi->pi_d = pde; 889 snprintf(pdi->pi_name, PI_NAMESZ, "%s-pci-%d", pde->pe_emu, slot); 890 891 /* Disable legacy interrupts */ 892 pci_set_cfgdata8(pdi, PCIR_INTLINE, 255); 893 pci_set_cfgdata8(pdi, PCIR_INTPIN, 0); 894 895 pci_set_cfgdata8(pdi, PCIR_COMMAND, PCIM_CMD_BUSMASTEREN); 896 897 err = (*pde->pe_init)(ctx, pdi, fi->fi_config); 898 if (err == 0) 899 fi->fi_devi = pdi; 900 else 901 free(pdi); 902 903 return (err); 904 } 905 906 void 907 pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr) 908 { 909 int mmc; 910 911 /* Number of msi messages must be a power of 2 between 1 and 32 */ 912 assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32); 913 mmc = ffs(msgnum) - 1; 914 915 bzero(msicap, sizeof(struct msicap)); 916 msicap->capid = PCIY_MSI; 917 msicap->nextptr = nextptr; 918 msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1); 919 } 920 921 int 922 pci_emul_add_msicap(struct pci_devinst *pi, int msgnum) 923 { 924 struct msicap msicap; 925 926 pci_populate_msicap(&msicap, msgnum, 0); 927 928 return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap))); 929 } 930 931 static void 932 pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum, 933 uint32_t msix_tab_size) 934 { 935 936 assert(msix_tab_size % 4096 == 0); 937 938 bzero(msixcap, sizeof(struct msixcap)); 939 msixcap->capid = PCIY_MSIX; 940 941 /* 942 * Message Control Register, all fields set to 943 * zero except for the Table Size. 944 * Note: Table size N is encoded as N-1 945 */ 946 msixcap->msgctrl = msgnum - 1; 947 948 /* 949 * MSI-X BAR setup: 950 * - MSI-X table start at offset 0 951 * - PBA table starts at a 4K aligned offset after the MSI-X table 952 */ 953 msixcap->table_info = barnum & PCIM_MSIX_BIR_MASK; 954 msixcap->pba_info = msix_tab_size | (barnum & PCIM_MSIX_BIR_MASK); 955 } 956 957 static void 958 pci_msix_table_init(struct pci_devinst *pi, int table_entries) 959 { 960 int i, table_size; 961 962 assert(table_entries > 0); 963 assert(table_entries <= MAX_MSIX_TABLE_ENTRIES); 964 965 table_size = table_entries * MSIX_TABLE_ENTRY_SIZE; 966 pi->pi_msix.table = calloc(1, table_size); 967 968 /* set mask bit of vector control register */ 969 for (i = 0; i < table_entries; i++) 970 pi->pi_msix.table[i].vector_control |= PCIM_MSIX_VCTRL_MASK; 971 } 972 973 int 974 pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum) 975 { 976 uint32_t tab_size; 977 struct msixcap msixcap; 978 979 assert(msgnum >= 1 && msgnum <= MAX_MSIX_TABLE_ENTRIES); 980 assert(barnum >= 0 && barnum <= PCIR_MAX_BAR_0); 981 982 tab_size = msgnum * MSIX_TABLE_ENTRY_SIZE; 983 984 /* Align table size to nearest 4K */ 985 tab_size = roundup2(tab_size, 4096); 986 987 pi->pi_msix.table_bar = barnum; 988 pi->pi_msix.pba_bar = barnum; 989 pi->pi_msix.table_offset = 0; 990 pi->pi_msix.table_count = msgnum; 991 pi->pi_msix.pba_offset = tab_size; 992 pi->pi_msix.pba_size = PBA_SIZE(msgnum); 993 994 pci_msix_table_init(pi, msgnum); 995 996 pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size); 997 998 /* allocate memory for MSI-X Table and PBA */ 999 pci_emul_alloc_bar(pi, barnum, PCIBAR_MEM32, 1000 tab_size + pi->pi_msix.pba_size); 1001 1002 return (pci_emul_add_capability(pi, (u_char *)&msixcap, 1003 sizeof(msixcap))); 1004 } 1005 1006 static void 1007 msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 1008 int bytes, uint32_t val) 1009 { 1010 uint16_t msgctrl, rwmask; 1011 int off; 1012 1013 off = offset - capoff; 1014 /* Message Control Register */ 1015 if (off == 2 && bytes == 2) { 1016 rwmask = PCIM_MSIXCTRL_MSIX_ENABLE | PCIM_MSIXCTRL_FUNCTION_MASK; 1017 msgctrl = pci_get_cfgdata16(pi, offset); 1018 msgctrl &= ~rwmask; 1019 msgctrl |= val & rwmask; 1020 val = msgctrl; 1021 1022 pi->pi_msix.enabled = val & PCIM_MSIXCTRL_MSIX_ENABLE; 1023 pi->pi_msix.function_mask = val & PCIM_MSIXCTRL_FUNCTION_MASK; 1024 pci_lintr_update(pi); 1025 } 1026 1027 CFGWRITE(pi, offset, val, bytes); 1028 } 1029 1030 static void 1031 msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 1032 int bytes, uint32_t val) 1033 { 1034 uint16_t msgctrl, rwmask, msgdata, mme; 1035 uint32_t addrlo; 1036 1037 /* 1038 * If guest is writing to the message control register make sure 1039 * we do not overwrite read-only fields. 1040 */ 1041 if ((offset - capoff) == 2 && bytes == 2) { 1042 rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE; 1043 msgctrl = pci_get_cfgdata16(pi, offset); 1044 msgctrl &= ~rwmask; 1045 msgctrl |= val & rwmask; 1046 val = msgctrl; 1047 } 1048 CFGWRITE(pi, offset, val, bytes); 1049 1050 msgctrl = pci_get_cfgdata16(pi, capoff + 2); 1051 addrlo = pci_get_cfgdata32(pi, capoff + 4); 1052 if (msgctrl & PCIM_MSICTRL_64BIT) 1053 msgdata = pci_get_cfgdata16(pi, capoff + 12); 1054 else 1055 msgdata = pci_get_cfgdata16(pi, capoff + 8); 1056 1057 mme = msgctrl & PCIM_MSICTRL_MME_MASK; 1058 pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0; 1059 if (pi->pi_msi.enabled) { 1060 pi->pi_msi.addr = addrlo; 1061 pi->pi_msi.msg_data = msgdata; 1062 pi->pi_msi.maxmsgnum = 1 << (mme >> 4); 1063 } else { 1064 pi->pi_msi.maxmsgnum = 0; 1065 } 1066 pci_lintr_update(pi); 1067 } 1068 1069 void 1070 pciecap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 1071 int bytes, uint32_t val) 1072 { 1073 1074 /* XXX don't write to the readonly parts */ 1075 CFGWRITE(pi, offset, val, bytes); 1076 } 1077 1078 #define PCIECAP_VERSION 0x2 1079 int 1080 pci_emul_add_pciecap(struct pci_devinst *pi, int type) 1081 { 1082 int err; 1083 struct pciecap pciecap; 1084 1085 bzero(&pciecap, sizeof(pciecap)); 1086 1087 /* 1088 * Use the integrated endpoint type for endpoints on a root complex bus. 1089 * 1090 * NB: bhyve currently only supports a single PCI bus that is the root 1091 * complex bus, so all endpoints are integrated. 1092 */ 1093 if ((type == PCIEM_TYPE_ENDPOINT) && (pi->pi_bus == 0)) 1094 type = PCIEM_TYPE_ROOT_INT_EP; 1095 1096 pciecap.capid = PCIY_EXPRESS; 1097 pciecap.pcie_capabilities = PCIECAP_VERSION | type; 1098 if (type != PCIEM_TYPE_ROOT_INT_EP) { 1099 pciecap.link_capabilities = 0x411; /* gen1, x1 */ 1100 pciecap.link_status = 0x11; /* gen1, x1 */ 1101 } 1102 1103 err = pci_emul_add_capability(pi, (u_char *)&pciecap, sizeof(pciecap)); 1104 return (err); 1105 } 1106 1107 /* 1108 * This function assumes that 'coff' is in the capabilities region of the 1109 * config space. A capoff parameter of zero will force a search for the 1110 * offset and type. 1111 */ 1112 void 1113 pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val, 1114 uint8_t capoff, int capid) 1115 { 1116 uint8_t nextoff; 1117 1118 /* Do not allow un-aligned writes */ 1119 if ((offset & (bytes - 1)) != 0) 1120 return; 1121 1122 if (capoff == 0) { 1123 /* Find the capability that we want to update */ 1124 capoff = CAP_START_OFFSET; 1125 while (1) { 1126 nextoff = pci_get_cfgdata8(pi, capoff + 1); 1127 if (nextoff == 0) 1128 break; 1129 if (offset >= capoff && offset < nextoff) 1130 break; 1131 1132 capoff = nextoff; 1133 } 1134 assert(offset >= capoff); 1135 capid = pci_get_cfgdata8(pi, capoff); 1136 } 1137 1138 /* 1139 * Capability ID and Next Capability Pointer are readonly. 1140 * However, some o/s's do 4-byte writes that include these. 1141 * For this case, trim the write back to 2 bytes and adjust 1142 * the data. 1143 */ 1144 if (offset == capoff || offset == capoff + 1) { 1145 if (offset == capoff && bytes == 4) { 1146 bytes = 2; 1147 offset += 2; 1148 val >>= 16; 1149 } else 1150 return; 1151 } 1152 1153 switch (capid) { 1154 case PCIY_MSI: 1155 msicap_cfgwrite(pi, capoff, offset, bytes, val); 1156 break; 1157 case PCIY_MSIX: 1158 msixcap_cfgwrite(pi, capoff, offset, bytes, val); 1159 break; 1160 case PCIY_EXPRESS: 1161 pciecap_cfgwrite(pi, capoff, offset, bytes, val); 1162 break; 1163 default: 1164 break; 1165 } 1166 } 1167 1168 static int 1169 pci_emul_iscap(struct pci_devinst *pi, int offset) 1170 { 1171 uint16_t sts; 1172 1173 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 1174 if ((sts & PCIM_STATUS_CAPPRESENT) != 0) { 1175 if (offset >= CAP_START_OFFSET && offset <= pi->pi_capend) 1176 return (1); 1177 } 1178 return (0); 1179 } 1180 1181 static int 1182 pci_emul_fallback_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 1183 int size, uint64_t *val, void *arg1, long arg2) 1184 { 1185 /* 1186 * Ignore writes; return 0xff's for reads. The mem read code 1187 * will take care of truncating to the correct size. 1188 */ 1189 if (dir == MEM_F_READ) { 1190 *val = 0xffffffffffffffff; 1191 } 1192 1193 return (0); 1194 } 1195 1196 static int 1197 pci_emul_ecfg_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 1198 int bytes, uint64_t *val, void *arg1, long arg2) 1199 { 1200 int bus, slot, func, coff, in; 1201 1202 coff = addr & 0xfff; 1203 func = (addr >> 12) & 0x7; 1204 slot = (addr >> 15) & 0x1f; 1205 bus = (addr >> 20) & 0xff; 1206 in = (dir == MEM_F_READ); 1207 if (in) 1208 *val = ~0UL; 1209 pci_cfgrw(ctx, vcpu, in, bus, slot, func, coff, bytes, (uint32_t *)val); 1210 return (0); 1211 } 1212 1213 uint64_t 1214 pci_ecfg_base(void) 1215 { 1216 1217 return (PCI_EMUL_ECFG_BASE); 1218 } 1219 1220 #define BUSIO_ROUNDUP 32 1221 #define BUSMEM32_ROUNDUP (1024 * 1024) 1222 #define BUSMEM64_ROUNDUP (512 * 1024 * 1024) 1223 1224 int 1225 init_pci(struct vmctx *ctx) 1226 { 1227 char node_name[sizeof("pci.XXX.XX.X")]; 1228 struct mem_range mr; 1229 struct pci_devemu *pde; 1230 struct businfo *bi; 1231 struct slotinfo *si; 1232 struct funcinfo *fi; 1233 nvlist_t *nvl; 1234 const char *emul; 1235 size_t lowmem; 1236 int bus, slot, func; 1237 int error; 1238 1239 if (vm_get_lowmem_limit(ctx) > PCI_EMUL_MEMBASE32) 1240 errx(EX_OSERR, "Invalid lowmem limit"); 1241 1242 pci_emul_iobase = PCI_EMUL_IOBASE; 1243 pci_emul_membase32 = PCI_EMUL_MEMBASE32; 1244 1245 pci_emul_membase64 = 4*GB + vm_get_highmem_size(ctx); 1246 pci_emul_membase64 = roundup2(pci_emul_membase64, PCI_EMUL_MEMSIZE64); 1247 pci_emul_memlim64 = pci_emul_membase64 + PCI_EMUL_MEMSIZE64; 1248 1249 for (bus = 0; bus < MAXBUSES; bus++) { 1250 snprintf(node_name, sizeof(node_name), "pci.%d", bus); 1251 nvl = find_config_node(node_name); 1252 if (nvl == NULL) 1253 continue; 1254 pci_businfo[bus] = calloc(1, sizeof(struct businfo)); 1255 bi = pci_businfo[bus]; 1256 1257 /* 1258 * Keep track of the i/o and memory resources allocated to 1259 * this bus. 1260 */ 1261 bi->iobase = pci_emul_iobase; 1262 bi->membase32 = pci_emul_membase32; 1263 bi->membase64 = pci_emul_membase64; 1264 1265 /* first run: init devices */ 1266 for (slot = 0; slot < MAXSLOTS; slot++) { 1267 si = &bi->slotinfo[slot]; 1268 for (func = 0; func < MAXFUNCS; func++) { 1269 fi = &si->si_funcs[func]; 1270 snprintf(node_name, sizeof(node_name), 1271 "pci.%d.%d.%d", bus, slot, func); 1272 nvl = find_config_node(node_name); 1273 if (nvl == NULL) 1274 continue; 1275 1276 fi->fi_config = nvl; 1277 emul = get_config_value_node(nvl, "device"); 1278 if (emul == NULL) { 1279 EPRINTLN("pci slot %d:%d:%d: missing " 1280 "\"device\" value", bus, slot, func); 1281 return (EINVAL); 1282 } 1283 pde = pci_emul_finddev(emul); 1284 if (pde == NULL) { 1285 EPRINTLN("pci slot %d:%d:%d: unknown " 1286 "device \"%s\"", bus, slot, func, 1287 emul); 1288 return (EINVAL); 1289 } 1290 if (pde->pe_alias != NULL) { 1291 EPRINTLN("pci slot %d:%d:%d: legacy " 1292 "device \"%s\", use \"%s\" instead", 1293 bus, slot, func, emul, 1294 pde->pe_alias); 1295 return (EINVAL); 1296 } 1297 fi->fi_pde = pde; 1298 error = pci_emul_init(ctx, pde, bus, slot, 1299 func, fi); 1300 if (error) 1301 return (error); 1302 } 1303 } 1304 1305 /* second run: assign BARs and free list */ 1306 struct pci_bar_allocation *bar; 1307 struct pci_bar_allocation *bar_tmp; 1308 TAILQ_FOREACH_SAFE(bar, &pci_bars, chain, bar_tmp) { 1309 pci_emul_assign_bar(bar->pdi, bar->idx, bar->type, 1310 bar->size); 1311 free(bar); 1312 } 1313 TAILQ_INIT(&pci_bars); 1314 1315 /* 1316 * Add some slop to the I/O and memory resources decoded by 1317 * this bus to give a guest some flexibility if it wants to 1318 * reprogram the BARs. 1319 */ 1320 pci_emul_iobase += BUSIO_ROUNDUP; 1321 pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP); 1322 bi->iolimit = pci_emul_iobase; 1323 1324 pci_emul_membase32 += BUSMEM32_ROUNDUP; 1325 pci_emul_membase32 = roundup2(pci_emul_membase32, 1326 BUSMEM32_ROUNDUP); 1327 bi->memlimit32 = pci_emul_membase32; 1328 1329 pci_emul_membase64 += BUSMEM64_ROUNDUP; 1330 pci_emul_membase64 = roundup2(pci_emul_membase64, 1331 BUSMEM64_ROUNDUP); 1332 bi->memlimit64 = pci_emul_membase64; 1333 } 1334 1335 /* 1336 * PCI backends are initialized before routing INTx interrupts 1337 * so that LPC devices are able to reserve ISA IRQs before 1338 * routing PIRQ pins. 1339 */ 1340 for (bus = 0; bus < MAXBUSES; bus++) { 1341 if ((bi = pci_businfo[bus]) == NULL) 1342 continue; 1343 1344 for (slot = 0; slot < MAXSLOTS; slot++) { 1345 si = &bi->slotinfo[slot]; 1346 for (func = 0; func < MAXFUNCS; func++) { 1347 fi = &si->si_funcs[func]; 1348 if (fi->fi_devi == NULL) 1349 continue; 1350 pci_lintr_route(fi->fi_devi); 1351 } 1352 } 1353 } 1354 lpc_pirq_routed(); 1355 1356 /* 1357 * The guest physical memory map looks like the following: 1358 * [0, lowmem) guest system memory 1359 * [lowmem, 0xC0000000) memory hole (may be absent) 1360 * [0xC0000000, 0xE0000000) PCI hole (32-bit BAR allocation) 1361 * [0xE0000000, 0xF0000000) PCI extended config window 1362 * [0xF0000000, 4GB) LAPIC, IOAPIC, HPET, firmware 1363 * [4GB, 4GB + highmem) 1364 */ 1365 1366 /* 1367 * Accesses to memory addresses that are not allocated to system 1368 * memory or PCI devices return 0xff's. 1369 */ 1370 lowmem = vm_get_lowmem_size(ctx); 1371 bzero(&mr, sizeof(struct mem_range)); 1372 mr.name = "PCI hole"; 1373 mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; 1374 mr.base = lowmem; 1375 mr.size = (4ULL * 1024 * 1024 * 1024) - lowmem; 1376 mr.handler = pci_emul_fallback_handler; 1377 error = register_mem_fallback(&mr); 1378 assert(error == 0); 1379 1380 /* PCI extended config space */ 1381 bzero(&mr, sizeof(struct mem_range)); 1382 mr.name = "PCI ECFG"; 1383 mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; 1384 mr.base = PCI_EMUL_ECFG_BASE; 1385 mr.size = PCI_EMUL_ECFG_SIZE; 1386 mr.handler = pci_emul_ecfg_handler; 1387 error = register_mem(&mr); 1388 assert(error == 0); 1389 1390 return (0); 1391 } 1392 1393 static void 1394 pci_apic_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, 1395 void *arg) 1396 { 1397 1398 dsdt_line(" Package ()"); 1399 dsdt_line(" {"); 1400 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1401 dsdt_line(" 0x%02X,", pin - 1); 1402 dsdt_line(" Zero,"); 1403 dsdt_line(" 0x%X", ioapic_irq); 1404 dsdt_line(" },"); 1405 } 1406 1407 static void 1408 pci_pirq_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, 1409 void *arg) 1410 { 1411 char *name; 1412 1413 name = lpc_pirq_name(pirq_pin); 1414 if (name == NULL) 1415 return; 1416 dsdt_line(" Package ()"); 1417 dsdt_line(" {"); 1418 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1419 dsdt_line(" 0x%02X,", pin - 1); 1420 dsdt_line(" %s,", name); 1421 dsdt_line(" 0x00"); 1422 dsdt_line(" },"); 1423 free(name); 1424 } 1425 1426 /* 1427 * A bhyve virtual machine has a flat PCI hierarchy with a root port 1428 * corresponding to each PCI bus. 1429 */ 1430 static void 1431 pci_bus_write_dsdt(int bus) 1432 { 1433 struct businfo *bi; 1434 struct slotinfo *si; 1435 struct pci_devinst *pi; 1436 int count, func, slot; 1437 1438 /* 1439 * If there are no devices on this 'bus' then just return. 1440 */ 1441 if ((bi = pci_businfo[bus]) == NULL) { 1442 /* 1443 * Bus 0 is special because it decodes the I/O ports used 1444 * for PCI config space access even if there are no devices 1445 * on it. 1446 */ 1447 if (bus != 0) 1448 return; 1449 } 1450 1451 dsdt_line(" Device (PC%02X)", bus); 1452 dsdt_line(" {"); 1453 dsdt_line(" Name (_HID, EisaId (\"PNP0A03\"))"); 1454 1455 dsdt_line(" Method (_BBN, 0, NotSerialized)"); 1456 dsdt_line(" {"); 1457 dsdt_line(" Return (0x%08X)", bus); 1458 dsdt_line(" }"); 1459 dsdt_line(" Name (_CRS, ResourceTemplate ()"); 1460 dsdt_line(" {"); 1461 dsdt_line(" WordBusNumber (ResourceProducer, MinFixed, " 1462 "MaxFixed, PosDecode,"); 1463 dsdt_line(" 0x0000, // Granularity"); 1464 dsdt_line(" 0x%04X, // Range Minimum", bus); 1465 dsdt_line(" 0x%04X, // Range Maximum", bus); 1466 dsdt_line(" 0x0000, // Translation Offset"); 1467 dsdt_line(" 0x0001, // Length"); 1468 dsdt_line(" ,, )"); 1469 1470 if (bus == 0) { 1471 dsdt_indent(3); 1472 dsdt_fixed_ioport(0xCF8, 8); 1473 dsdt_unindent(3); 1474 1475 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1476 "PosDecode, EntireRange,"); 1477 dsdt_line(" 0x0000, // Granularity"); 1478 dsdt_line(" 0x0000, // Range Minimum"); 1479 dsdt_line(" 0x0CF7, // Range Maximum"); 1480 dsdt_line(" 0x0000, // Translation Offset"); 1481 dsdt_line(" 0x0CF8, // Length"); 1482 dsdt_line(" ,, , TypeStatic)"); 1483 1484 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1485 "PosDecode, EntireRange,"); 1486 dsdt_line(" 0x0000, // Granularity"); 1487 dsdt_line(" 0x0D00, // Range Minimum"); 1488 dsdt_line(" 0x%04X, // Range Maximum", 1489 PCI_EMUL_IOBASE - 1); 1490 dsdt_line(" 0x0000, // Translation Offset"); 1491 dsdt_line(" 0x%04X, // Length", 1492 PCI_EMUL_IOBASE - 0x0D00); 1493 dsdt_line(" ,, , TypeStatic)"); 1494 1495 if (bi == NULL) { 1496 dsdt_line(" })"); 1497 goto done; 1498 } 1499 } 1500 assert(bi != NULL); 1501 1502 /* i/o window */ 1503 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1504 "PosDecode, EntireRange,"); 1505 dsdt_line(" 0x0000, // Granularity"); 1506 dsdt_line(" 0x%04X, // Range Minimum", bi->iobase); 1507 dsdt_line(" 0x%04X, // Range Maximum", 1508 bi->iolimit - 1); 1509 dsdt_line(" 0x0000, // Translation Offset"); 1510 dsdt_line(" 0x%04X, // Length", 1511 bi->iolimit - bi->iobase); 1512 dsdt_line(" ,, , TypeStatic)"); 1513 1514 /* mmio window (32-bit) */ 1515 dsdt_line(" DWordMemory (ResourceProducer, PosDecode, " 1516 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1517 dsdt_line(" 0x00000000, // Granularity"); 1518 dsdt_line(" 0x%08X, // Range Minimum\n", bi->membase32); 1519 dsdt_line(" 0x%08X, // Range Maximum\n", 1520 bi->memlimit32 - 1); 1521 dsdt_line(" 0x00000000, // Translation Offset"); 1522 dsdt_line(" 0x%08X, // Length\n", 1523 bi->memlimit32 - bi->membase32); 1524 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1525 1526 /* mmio window (64-bit) */ 1527 dsdt_line(" QWordMemory (ResourceProducer, PosDecode, " 1528 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1529 dsdt_line(" 0x0000000000000000, // Granularity"); 1530 dsdt_line(" 0x%016lX, // Range Minimum\n", bi->membase64); 1531 dsdt_line(" 0x%016lX, // Range Maximum\n", 1532 bi->memlimit64 - 1); 1533 dsdt_line(" 0x0000000000000000, // Translation Offset"); 1534 dsdt_line(" 0x%016lX, // Length\n", 1535 bi->memlimit64 - bi->membase64); 1536 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1537 dsdt_line(" })"); 1538 1539 count = pci_count_lintr(bus); 1540 if (count != 0) { 1541 dsdt_indent(2); 1542 dsdt_line("Name (PPRT, Package ()"); 1543 dsdt_line("{"); 1544 pci_walk_lintr(bus, pci_pirq_prt_entry, NULL); 1545 dsdt_line("})"); 1546 dsdt_line("Name (APRT, Package ()"); 1547 dsdt_line("{"); 1548 pci_walk_lintr(bus, pci_apic_prt_entry, NULL); 1549 dsdt_line("})"); 1550 dsdt_line("Method (_PRT, 0, NotSerialized)"); 1551 dsdt_line("{"); 1552 dsdt_line(" If (PICM)"); 1553 dsdt_line(" {"); 1554 dsdt_line(" Return (APRT)"); 1555 dsdt_line(" }"); 1556 dsdt_line(" Else"); 1557 dsdt_line(" {"); 1558 dsdt_line(" Return (PPRT)"); 1559 dsdt_line(" }"); 1560 dsdt_line("}"); 1561 dsdt_unindent(2); 1562 } 1563 1564 dsdt_indent(2); 1565 for (slot = 0; slot < MAXSLOTS; slot++) { 1566 si = &bi->slotinfo[slot]; 1567 for (func = 0; func < MAXFUNCS; func++) { 1568 pi = si->si_funcs[func].fi_devi; 1569 if (pi != NULL && pi->pi_d->pe_write_dsdt != NULL) 1570 pi->pi_d->pe_write_dsdt(pi); 1571 } 1572 } 1573 dsdt_unindent(2); 1574 done: 1575 dsdt_line(" }"); 1576 } 1577 1578 void 1579 pci_write_dsdt(void) 1580 { 1581 int bus; 1582 1583 dsdt_indent(1); 1584 dsdt_line("Name (PICM, 0x00)"); 1585 dsdt_line("Method (_PIC, 1, NotSerialized)"); 1586 dsdt_line("{"); 1587 dsdt_line(" Store (Arg0, PICM)"); 1588 dsdt_line("}"); 1589 dsdt_line(""); 1590 dsdt_line("Scope (_SB)"); 1591 dsdt_line("{"); 1592 for (bus = 0; bus < MAXBUSES; bus++) 1593 pci_bus_write_dsdt(bus); 1594 dsdt_line("}"); 1595 dsdt_unindent(1); 1596 } 1597 1598 int 1599 pci_bus_configured(int bus) 1600 { 1601 assert(bus >= 0 && bus < MAXBUSES); 1602 return (pci_businfo[bus] != NULL); 1603 } 1604 1605 int 1606 pci_msi_enabled(struct pci_devinst *pi) 1607 { 1608 return (pi->pi_msi.enabled); 1609 } 1610 1611 int 1612 pci_msi_maxmsgnum(struct pci_devinst *pi) 1613 { 1614 if (pi->pi_msi.enabled) 1615 return (pi->pi_msi.maxmsgnum); 1616 else 1617 return (0); 1618 } 1619 1620 int 1621 pci_msix_enabled(struct pci_devinst *pi) 1622 { 1623 1624 return (pi->pi_msix.enabled && !pi->pi_msi.enabled); 1625 } 1626 1627 void 1628 pci_generate_msix(struct pci_devinst *pi, int index) 1629 { 1630 struct msix_table_entry *mte; 1631 1632 if (!pci_msix_enabled(pi)) 1633 return; 1634 1635 if (pi->pi_msix.function_mask) 1636 return; 1637 1638 if (index >= pi->pi_msix.table_count) 1639 return; 1640 1641 mte = &pi->pi_msix.table[index]; 1642 if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 1643 /* XXX Set PBA bit if interrupt is disabled */ 1644 vm_lapic_msi(pi->pi_vmctx, mte->addr, mte->msg_data); 1645 } 1646 } 1647 1648 void 1649 pci_generate_msi(struct pci_devinst *pi, int index) 1650 { 1651 1652 if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) { 1653 vm_lapic_msi(pi->pi_vmctx, pi->pi_msi.addr, 1654 pi->pi_msi.msg_data + index); 1655 } 1656 } 1657 1658 static bool 1659 pci_lintr_permitted(struct pci_devinst *pi) 1660 { 1661 uint16_t cmd; 1662 1663 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 1664 return (!(pi->pi_msi.enabled || pi->pi_msix.enabled || 1665 (cmd & PCIM_CMD_INTxDIS))); 1666 } 1667 1668 void 1669 pci_lintr_request(struct pci_devinst *pi) 1670 { 1671 struct businfo *bi; 1672 struct slotinfo *si; 1673 int bestpin, bestcount, pin; 1674 1675 bi = pci_businfo[pi->pi_bus]; 1676 assert(bi != NULL); 1677 1678 /* 1679 * Just allocate a pin from our slot. The pin will be 1680 * assigned IRQs later when interrupts are routed. 1681 */ 1682 si = &bi->slotinfo[pi->pi_slot]; 1683 bestpin = 0; 1684 bestcount = si->si_intpins[0].ii_count; 1685 for (pin = 1; pin < 4; pin++) { 1686 if (si->si_intpins[pin].ii_count < bestcount) { 1687 bestpin = pin; 1688 bestcount = si->si_intpins[pin].ii_count; 1689 } 1690 } 1691 1692 si->si_intpins[bestpin].ii_count++; 1693 pi->pi_lintr.pin = bestpin + 1; 1694 pci_set_cfgdata8(pi, PCIR_INTPIN, bestpin + 1); 1695 } 1696 1697 static void 1698 pci_lintr_route(struct pci_devinst *pi) 1699 { 1700 struct businfo *bi; 1701 struct intxinfo *ii; 1702 1703 if (pi->pi_lintr.pin == 0) 1704 return; 1705 1706 bi = pci_businfo[pi->pi_bus]; 1707 assert(bi != NULL); 1708 ii = &bi->slotinfo[pi->pi_slot].si_intpins[pi->pi_lintr.pin - 1]; 1709 1710 /* 1711 * Attempt to allocate an I/O APIC pin for this intpin if one 1712 * is not yet assigned. 1713 */ 1714 if (ii->ii_ioapic_irq == 0) 1715 ii->ii_ioapic_irq = ioapic_pci_alloc_irq(pi); 1716 assert(ii->ii_ioapic_irq > 0); 1717 1718 /* 1719 * Attempt to allocate a PIRQ pin for this intpin if one is 1720 * not yet assigned. 1721 */ 1722 if (ii->ii_pirq_pin == 0) 1723 ii->ii_pirq_pin = pirq_alloc_pin(pi); 1724 assert(ii->ii_pirq_pin > 0); 1725 1726 pi->pi_lintr.ioapic_irq = ii->ii_ioapic_irq; 1727 pi->pi_lintr.pirq_pin = ii->ii_pirq_pin; 1728 pci_set_cfgdata8(pi, PCIR_INTLINE, pirq_irq(ii->ii_pirq_pin)); 1729 } 1730 1731 void 1732 pci_lintr_assert(struct pci_devinst *pi) 1733 { 1734 1735 assert(pi->pi_lintr.pin > 0); 1736 1737 pthread_mutex_lock(&pi->pi_lintr.lock); 1738 if (pi->pi_lintr.state == IDLE) { 1739 if (pci_lintr_permitted(pi)) { 1740 pi->pi_lintr.state = ASSERTED; 1741 pci_irq_assert(pi); 1742 } else 1743 pi->pi_lintr.state = PENDING; 1744 } 1745 pthread_mutex_unlock(&pi->pi_lintr.lock); 1746 } 1747 1748 void 1749 pci_lintr_deassert(struct pci_devinst *pi) 1750 { 1751 1752 assert(pi->pi_lintr.pin > 0); 1753 1754 pthread_mutex_lock(&pi->pi_lintr.lock); 1755 if (pi->pi_lintr.state == ASSERTED) { 1756 pi->pi_lintr.state = IDLE; 1757 pci_irq_deassert(pi); 1758 } else if (pi->pi_lintr.state == PENDING) 1759 pi->pi_lintr.state = IDLE; 1760 pthread_mutex_unlock(&pi->pi_lintr.lock); 1761 } 1762 1763 static void 1764 pci_lintr_update(struct pci_devinst *pi) 1765 { 1766 1767 pthread_mutex_lock(&pi->pi_lintr.lock); 1768 if (pi->pi_lintr.state == ASSERTED && !pci_lintr_permitted(pi)) { 1769 pci_irq_deassert(pi); 1770 pi->pi_lintr.state = PENDING; 1771 } else if (pi->pi_lintr.state == PENDING && pci_lintr_permitted(pi)) { 1772 pi->pi_lintr.state = ASSERTED; 1773 pci_irq_assert(pi); 1774 } 1775 pthread_mutex_unlock(&pi->pi_lintr.lock); 1776 } 1777 1778 int 1779 pci_count_lintr(int bus) 1780 { 1781 int count, slot, pin; 1782 struct slotinfo *slotinfo; 1783 1784 count = 0; 1785 if (pci_businfo[bus] != NULL) { 1786 for (slot = 0; slot < MAXSLOTS; slot++) { 1787 slotinfo = &pci_businfo[bus]->slotinfo[slot]; 1788 for (pin = 0; pin < 4; pin++) { 1789 if (slotinfo->si_intpins[pin].ii_count != 0) 1790 count++; 1791 } 1792 } 1793 } 1794 return (count); 1795 } 1796 1797 void 1798 pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg) 1799 { 1800 struct businfo *bi; 1801 struct slotinfo *si; 1802 struct intxinfo *ii; 1803 int slot, pin; 1804 1805 if ((bi = pci_businfo[bus]) == NULL) 1806 return; 1807 1808 for (slot = 0; slot < MAXSLOTS; slot++) { 1809 si = &bi->slotinfo[slot]; 1810 for (pin = 0; pin < 4; pin++) { 1811 ii = &si->si_intpins[pin]; 1812 if (ii->ii_count != 0) 1813 cb(bus, slot, pin + 1, ii->ii_pirq_pin, 1814 ii->ii_ioapic_irq, arg); 1815 } 1816 } 1817 } 1818 1819 /* 1820 * Return 1 if the emulated device in 'slot' is a multi-function device. 1821 * Return 0 otherwise. 1822 */ 1823 static int 1824 pci_emul_is_mfdev(int bus, int slot) 1825 { 1826 struct businfo *bi; 1827 struct slotinfo *si; 1828 int f, numfuncs; 1829 1830 numfuncs = 0; 1831 if ((bi = pci_businfo[bus]) != NULL) { 1832 si = &bi->slotinfo[slot]; 1833 for (f = 0; f < MAXFUNCS; f++) { 1834 if (si->si_funcs[f].fi_devi != NULL) { 1835 numfuncs++; 1836 } 1837 } 1838 } 1839 return (numfuncs > 1); 1840 } 1841 1842 /* 1843 * Ensure that the PCIM_MFDEV bit is properly set (or unset) depending on 1844 * whether or not is a multi-function being emulated in the pci 'slot'. 1845 */ 1846 static void 1847 pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv) 1848 { 1849 int mfdev; 1850 1851 if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) { 1852 mfdev = pci_emul_is_mfdev(bus, slot); 1853 switch (bytes) { 1854 case 1: 1855 case 2: 1856 *rv &= ~PCIM_MFDEV; 1857 if (mfdev) { 1858 *rv |= PCIM_MFDEV; 1859 } 1860 break; 1861 case 4: 1862 *rv &= ~(PCIM_MFDEV << 16); 1863 if (mfdev) { 1864 *rv |= (PCIM_MFDEV << 16); 1865 } 1866 break; 1867 } 1868 } 1869 } 1870 1871 /* 1872 * Update device state in response to changes to the PCI command 1873 * register. 1874 */ 1875 void 1876 pci_emul_cmd_changed(struct pci_devinst *pi, uint16_t old) 1877 { 1878 int i; 1879 uint16_t changed, new; 1880 1881 new = pci_get_cfgdata16(pi, PCIR_COMMAND); 1882 changed = old ^ new; 1883 1884 /* 1885 * If the MMIO or I/O address space decoding has changed then 1886 * register/unregister all BARs that decode that address space. 1887 */ 1888 for (i = 0; i <= PCI_BARMAX; i++) { 1889 switch (pi->pi_bar[i].type) { 1890 case PCIBAR_NONE: 1891 case PCIBAR_MEMHI64: 1892 break; 1893 case PCIBAR_IO: 1894 /* I/O address space decoding changed? */ 1895 if (changed & PCIM_CMD_PORTEN) { 1896 if (new & PCIM_CMD_PORTEN) 1897 register_bar(pi, i); 1898 else 1899 unregister_bar(pi, i); 1900 } 1901 break; 1902 case PCIBAR_MEM32: 1903 case PCIBAR_MEM64: 1904 /* MMIO address space decoding changed? */ 1905 if (changed & PCIM_CMD_MEMEN) { 1906 if (new & PCIM_CMD_MEMEN) 1907 register_bar(pi, i); 1908 else 1909 unregister_bar(pi, i); 1910 } 1911 break; 1912 default: 1913 assert(0); 1914 } 1915 } 1916 1917 /* 1918 * If INTx has been unmasked and is pending, assert the 1919 * interrupt. 1920 */ 1921 pci_lintr_update(pi); 1922 } 1923 1924 static void 1925 pci_emul_cmdsts_write(struct pci_devinst *pi, int coff, uint32_t new, int bytes) 1926 { 1927 int rshift; 1928 uint32_t cmd, old, readonly; 1929 1930 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */ 1931 1932 /* 1933 * From PCI Local Bus Specification 3.0 sections 6.2.2 and 6.2.3. 1934 * 1935 * XXX Bits 8, 11, 12, 13, 14 and 15 in the status register are 1936 * 'write 1 to clear'. However these bits are not set to '1' by 1937 * any device emulation so it is simpler to treat them as readonly. 1938 */ 1939 rshift = (coff & 0x3) * 8; 1940 readonly = 0xFFFFF880 >> rshift; 1941 1942 old = CFGREAD(pi, coff, bytes); 1943 new &= ~readonly; 1944 new |= (old & readonly); 1945 CFGWRITE(pi, coff, new, bytes); /* update config */ 1946 1947 pci_emul_cmd_changed(pi, cmd); 1948 } 1949 1950 static void 1951 pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, int func, 1952 int coff, int bytes, uint32_t *eax) 1953 { 1954 struct businfo *bi; 1955 struct slotinfo *si; 1956 struct pci_devinst *pi; 1957 struct pci_devemu *pe; 1958 int idx, needcfg; 1959 uint64_t addr, bar, mask; 1960 1961 if ((bi = pci_businfo[bus]) != NULL) { 1962 si = &bi->slotinfo[slot]; 1963 pi = si->si_funcs[func].fi_devi; 1964 } else 1965 pi = NULL; 1966 1967 /* 1968 * Just return if there is no device at this slot:func or if the 1969 * the guest is doing an un-aligned access. 1970 */ 1971 if (pi == NULL || (bytes != 1 && bytes != 2 && bytes != 4) || 1972 (coff & (bytes - 1)) != 0) { 1973 if (in) 1974 *eax = 0xffffffff; 1975 return; 1976 } 1977 1978 /* 1979 * Ignore all writes beyond the standard config space and return all 1980 * ones on reads. 1981 */ 1982 if (coff >= PCI_REGMAX + 1) { 1983 if (in) { 1984 *eax = 0xffffffff; 1985 /* 1986 * Extended capabilities begin at offset 256 in config 1987 * space. Absence of extended capabilities is signaled 1988 * with all 0s in the extended capability header at 1989 * offset 256. 1990 */ 1991 if (coff <= PCI_REGMAX + 4) 1992 *eax = 0x00000000; 1993 } 1994 return; 1995 } 1996 1997 pe = pi->pi_d; 1998 1999 /* 2000 * Config read 2001 */ 2002 if (in) { 2003 /* Let the device emulation override the default handler */ 2004 if (pe->pe_cfgread != NULL) { 2005 needcfg = pe->pe_cfgread(ctx, vcpu, pi, coff, bytes, 2006 eax); 2007 } else { 2008 needcfg = 1; 2009 } 2010 2011 if (needcfg) 2012 *eax = CFGREAD(pi, coff, bytes); 2013 2014 pci_emul_hdrtype_fixup(bus, slot, coff, bytes, eax); 2015 } else { 2016 /* Let the device emulation override the default handler */ 2017 if (pe->pe_cfgwrite != NULL && 2018 (*pe->pe_cfgwrite)(ctx, vcpu, pi, coff, bytes, *eax) == 0) 2019 return; 2020 2021 /* 2022 * Special handling for write to BAR registers 2023 */ 2024 if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) { 2025 /* 2026 * Ignore writes to BAR registers that are not 2027 * 4-byte aligned. 2028 */ 2029 if (bytes != 4 || (coff & 0x3) != 0) 2030 return; 2031 idx = (coff - PCIR_BAR(0)) / 4; 2032 mask = ~(pi->pi_bar[idx].size - 1); 2033 switch (pi->pi_bar[idx].type) { 2034 case PCIBAR_NONE: 2035 pi->pi_bar[idx].addr = bar = 0; 2036 break; 2037 case PCIBAR_IO: 2038 addr = *eax & mask; 2039 addr &= 0xffff; 2040 bar = addr | pi->pi_bar[idx].lobits; 2041 /* 2042 * Register the new BAR value for interception 2043 */ 2044 if (addr != pi->pi_bar[idx].addr) { 2045 update_bar_address(pi, addr, idx, 2046 PCIBAR_IO); 2047 } 2048 break; 2049 case PCIBAR_MEM32: 2050 addr = bar = *eax & mask; 2051 bar |= pi->pi_bar[idx].lobits; 2052 if (addr != pi->pi_bar[idx].addr) { 2053 update_bar_address(pi, addr, idx, 2054 PCIBAR_MEM32); 2055 } 2056 break; 2057 case PCIBAR_MEM64: 2058 addr = bar = *eax & mask; 2059 bar |= pi->pi_bar[idx].lobits; 2060 if (addr != (uint32_t)pi->pi_bar[idx].addr) { 2061 update_bar_address(pi, addr, idx, 2062 PCIBAR_MEM64); 2063 } 2064 break; 2065 case PCIBAR_MEMHI64: 2066 mask = ~(pi->pi_bar[idx - 1].size - 1); 2067 addr = ((uint64_t)*eax << 32) & mask; 2068 bar = addr >> 32; 2069 if (bar != pi->pi_bar[idx - 1].addr >> 32) { 2070 update_bar_address(pi, addr, idx - 1, 2071 PCIBAR_MEMHI64); 2072 } 2073 break; 2074 default: 2075 assert(0); 2076 } 2077 pci_set_cfgdata32(pi, coff, bar); 2078 2079 } else if (pci_emul_iscap(pi, coff)) { 2080 pci_emul_capwrite(pi, coff, bytes, *eax, 0, 0); 2081 } else if (coff >= PCIR_COMMAND && coff < PCIR_REVID) { 2082 pci_emul_cmdsts_write(pi, coff, *eax, bytes); 2083 } else { 2084 CFGWRITE(pi, coff, *eax, bytes); 2085 } 2086 } 2087 } 2088 2089 static int cfgenable, cfgbus, cfgslot, cfgfunc, cfgoff; 2090 2091 static int 2092 pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 2093 uint32_t *eax, void *arg) 2094 { 2095 uint32_t x; 2096 2097 if (bytes != 4) { 2098 if (in) 2099 *eax = (bytes == 2) ? 0xffff : 0xff; 2100 return (0); 2101 } 2102 2103 if (in) { 2104 x = (cfgbus << 16) | (cfgslot << 11) | (cfgfunc << 8) | cfgoff; 2105 if (cfgenable) 2106 x |= CONF1_ENABLE; 2107 *eax = x; 2108 } else { 2109 x = *eax; 2110 cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE; 2111 cfgoff = (x & PCI_REGMAX) & ~0x03; 2112 cfgfunc = (x >> 8) & PCI_FUNCMAX; 2113 cfgslot = (x >> 11) & PCI_SLOTMAX; 2114 cfgbus = (x >> 16) & PCI_BUSMAX; 2115 } 2116 2117 return (0); 2118 } 2119 INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_INOUT, pci_emul_cfgaddr); 2120 2121 static int 2122 pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 2123 uint32_t *eax, void *arg) 2124 { 2125 int coff; 2126 2127 assert(bytes == 1 || bytes == 2 || bytes == 4); 2128 2129 coff = cfgoff + (port - CONF1_DATA_PORT); 2130 if (cfgenable) { 2131 pci_cfgrw(ctx, vcpu, in, cfgbus, cfgslot, cfgfunc, coff, bytes, 2132 eax); 2133 } else { 2134 /* Ignore accesses to cfgdata if not enabled by cfgaddr */ 2135 if (in) 2136 *eax = 0xffffffff; 2137 } 2138 return (0); 2139 } 2140 2141 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata); 2142 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata); 2143 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata); 2144 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata); 2145 2146 #ifdef BHYVE_SNAPSHOT 2147 /* 2148 * Saves/restores PCI device emulated state. Returns 0 on success. 2149 */ 2150 static int 2151 pci_snapshot_pci_dev(struct vm_snapshot_meta *meta) 2152 { 2153 struct pci_devinst *pi; 2154 int i; 2155 int ret; 2156 2157 pi = meta->dev_data; 2158 2159 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.enabled, meta, ret, done); 2160 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.addr, meta, ret, done); 2161 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.msg_data, meta, ret, done); 2162 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.maxmsgnum, meta, ret, done); 2163 2164 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.enabled, meta, ret, done); 2165 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_bar, meta, ret, done); 2166 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_bar, meta, ret, done); 2167 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_offset, meta, ret, done); 2168 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_count, meta, ret, done); 2169 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_offset, meta, ret, done); 2170 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_size, meta, ret, done); 2171 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.function_mask, meta, ret, done); 2172 2173 SNAPSHOT_BUF_OR_LEAVE(pi->pi_cfgdata, sizeof(pi->pi_cfgdata), 2174 meta, ret, done); 2175 2176 for (i = 0; i < nitems(pi->pi_bar); i++) { 2177 SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].type, meta, ret, done); 2178 SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].size, meta, ret, done); 2179 SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].addr, meta, ret, done); 2180 } 2181 2182 /* Restore MSI-X table. */ 2183 for (i = 0; i < pi->pi_msix.table_count; i++) { 2184 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].addr, 2185 meta, ret, done); 2186 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].msg_data, 2187 meta, ret, done); 2188 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].vector_control, 2189 meta, ret, done); 2190 } 2191 2192 done: 2193 return (ret); 2194 } 2195 2196 static int 2197 pci_find_slotted_dev(const char *dev_name, struct pci_devemu **pde, 2198 struct pci_devinst **pdi) 2199 { 2200 struct businfo *bi; 2201 struct slotinfo *si; 2202 struct funcinfo *fi; 2203 int bus, slot, func; 2204 2205 assert(dev_name != NULL); 2206 assert(pde != NULL); 2207 assert(pdi != NULL); 2208 2209 for (bus = 0; bus < MAXBUSES; bus++) { 2210 if ((bi = pci_businfo[bus]) == NULL) 2211 continue; 2212 2213 for (slot = 0; slot < MAXSLOTS; slot++) { 2214 si = &bi->slotinfo[slot]; 2215 for (func = 0; func < MAXFUNCS; func++) { 2216 fi = &si->si_funcs[func]; 2217 if (fi->fi_pde == NULL) 2218 continue; 2219 if (strcmp(dev_name, fi->fi_pde->pe_emu) != 0) 2220 continue; 2221 2222 *pde = fi->fi_pde; 2223 *pdi = fi->fi_devi; 2224 return (0); 2225 } 2226 } 2227 } 2228 2229 return (EINVAL); 2230 } 2231 2232 int 2233 pci_snapshot(struct vm_snapshot_meta *meta) 2234 { 2235 struct pci_devemu *pde; 2236 struct pci_devinst *pdi; 2237 int ret; 2238 2239 assert(meta->dev_name != NULL); 2240 2241 ret = pci_find_slotted_dev(meta->dev_name, &pde, &pdi); 2242 if (ret != 0) { 2243 fprintf(stderr, "%s: no such name: %s\r\n", 2244 __func__, meta->dev_name); 2245 memset(meta->buffer.buf_start, 0, meta->buffer.buf_size); 2246 return (0); 2247 } 2248 2249 meta->dev_data = pdi; 2250 2251 if (pde->pe_snapshot == NULL) { 2252 fprintf(stderr, "%s: not implemented yet for: %s\r\n", 2253 __func__, meta->dev_name); 2254 return (-1); 2255 } 2256 2257 ret = pci_snapshot_pci_dev(meta); 2258 if (ret != 0) { 2259 fprintf(stderr, "%s: failed to snapshot pci dev\r\n", 2260 __func__); 2261 return (-1); 2262 } 2263 2264 ret = (*pde->pe_snapshot)(meta); 2265 2266 return (ret); 2267 } 2268 2269 int 2270 pci_pause(struct vmctx *ctx, const char *dev_name) 2271 { 2272 struct pci_devemu *pde; 2273 struct pci_devinst *pdi; 2274 int ret; 2275 2276 assert(dev_name != NULL); 2277 2278 ret = pci_find_slotted_dev(dev_name, &pde, &pdi); 2279 if (ret != 0) { 2280 /* 2281 * It is possible to call this function without 2282 * checking that the device is inserted first. 2283 */ 2284 fprintf(stderr, "%s: no such name: %s\n", __func__, dev_name); 2285 return (0); 2286 } 2287 2288 if (pde->pe_pause == NULL) { 2289 /* The pause/resume functionality is optional. */ 2290 fprintf(stderr, "%s: not implemented for: %s\n", 2291 __func__, dev_name); 2292 return (0); 2293 } 2294 2295 return (*pde->pe_pause)(ctx, pdi); 2296 } 2297 2298 int 2299 pci_resume(struct vmctx *ctx, const char *dev_name) 2300 { 2301 struct pci_devemu *pde; 2302 struct pci_devinst *pdi; 2303 int ret; 2304 2305 assert(dev_name != NULL); 2306 2307 ret = pci_find_slotted_dev(dev_name, &pde, &pdi); 2308 if (ret != 0) { 2309 /* 2310 * It is possible to call this function without 2311 * checking that the device is inserted first. 2312 */ 2313 fprintf(stderr, "%s: no such name: %s\n", __func__, dev_name); 2314 return (0); 2315 } 2316 2317 if (pde->pe_resume == NULL) { 2318 /* The pause/resume functionality is optional. */ 2319 fprintf(stderr, "%s: not implemented for: %s\n", 2320 __func__, dev_name); 2321 return (0); 2322 } 2323 2324 return (*pde->pe_resume)(ctx, pdi); 2325 } 2326 #endif 2327 2328 #define PCI_EMUL_TEST 2329 #ifdef PCI_EMUL_TEST 2330 /* 2331 * Define a dummy test device 2332 */ 2333 #define DIOSZ 8 2334 #define DMEMSZ 4096 2335 struct pci_emul_dsoftc { 2336 uint8_t ioregs[DIOSZ]; 2337 uint8_t memregs[2][DMEMSZ]; 2338 }; 2339 2340 #define PCI_EMUL_MSI_MSGS 4 2341 #define PCI_EMUL_MSIX_MSGS 16 2342 2343 static int 2344 pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) 2345 { 2346 int error; 2347 struct pci_emul_dsoftc *sc; 2348 2349 sc = calloc(1, sizeof(struct pci_emul_dsoftc)); 2350 2351 pi->pi_arg = sc; 2352 2353 pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001); 2354 pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD); 2355 pci_set_cfgdata8(pi, PCIR_CLASS, 0x02); 2356 2357 error = pci_emul_add_msicap(pi, PCI_EMUL_MSI_MSGS); 2358 assert(error == 0); 2359 2360 error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, DIOSZ); 2361 assert(error == 0); 2362 2363 error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ); 2364 assert(error == 0); 2365 2366 error = pci_emul_alloc_bar(pi, 2, PCIBAR_MEM32, DMEMSZ); 2367 assert(error == 0); 2368 2369 return (0); 2370 } 2371 2372 static void 2373 pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 2374 uint64_t offset, int size, uint64_t value) 2375 { 2376 int i; 2377 struct pci_emul_dsoftc *sc = pi->pi_arg; 2378 2379 if (baridx == 0) { 2380 if (offset + size > DIOSZ) { 2381 printf("diow: iow too large, offset %ld size %d\n", 2382 offset, size); 2383 return; 2384 } 2385 2386 if (size == 1) { 2387 sc->ioregs[offset] = value & 0xff; 2388 } else if (size == 2) { 2389 *(uint16_t *)&sc->ioregs[offset] = value & 0xffff; 2390 } else if (size == 4) { 2391 *(uint32_t *)&sc->ioregs[offset] = value; 2392 } else { 2393 printf("diow: iow unknown size %d\n", size); 2394 } 2395 2396 /* 2397 * Special magic value to generate an interrupt 2398 */ 2399 if (offset == 4 && size == 4 && pci_msi_enabled(pi)) 2400 pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi)); 2401 2402 if (value == 0xabcdef) { 2403 for (i = 0; i < pci_msi_maxmsgnum(pi); i++) 2404 pci_generate_msi(pi, i); 2405 } 2406 } 2407 2408 if (baridx == 1 || baridx == 2) { 2409 if (offset + size > DMEMSZ) { 2410 printf("diow: memw too large, offset %ld size %d\n", 2411 offset, size); 2412 return; 2413 } 2414 2415 i = baridx - 1; /* 'memregs' index */ 2416 2417 if (size == 1) { 2418 sc->memregs[i][offset] = value; 2419 } else if (size == 2) { 2420 *(uint16_t *)&sc->memregs[i][offset] = value; 2421 } else if (size == 4) { 2422 *(uint32_t *)&sc->memregs[i][offset] = value; 2423 } else if (size == 8) { 2424 *(uint64_t *)&sc->memregs[i][offset] = value; 2425 } else { 2426 printf("diow: memw unknown size %d\n", size); 2427 } 2428 2429 /* 2430 * magic interrupt ?? 2431 */ 2432 } 2433 2434 if (baridx > 2 || baridx < 0) { 2435 printf("diow: unknown bar idx %d\n", baridx); 2436 } 2437 } 2438 2439 static uint64_t 2440 pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 2441 uint64_t offset, int size) 2442 { 2443 struct pci_emul_dsoftc *sc = pi->pi_arg; 2444 uint32_t value; 2445 int i; 2446 2447 if (baridx == 0) { 2448 if (offset + size > DIOSZ) { 2449 printf("dior: ior too large, offset %ld size %d\n", 2450 offset, size); 2451 return (0); 2452 } 2453 2454 value = 0; 2455 if (size == 1) { 2456 value = sc->ioregs[offset]; 2457 } else if (size == 2) { 2458 value = *(uint16_t *) &sc->ioregs[offset]; 2459 } else if (size == 4) { 2460 value = *(uint32_t *) &sc->ioregs[offset]; 2461 } else { 2462 printf("dior: ior unknown size %d\n", size); 2463 } 2464 } 2465 2466 if (baridx == 1 || baridx == 2) { 2467 if (offset + size > DMEMSZ) { 2468 printf("dior: memr too large, offset %ld size %d\n", 2469 offset, size); 2470 return (0); 2471 } 2472 2473 i = baridx - 1; /* 'memregs' index */ 2474 2475 if (size == 1) { 2476 value = sc->memregs[i][offset]; 2477 } else if (size == 2) { 2478 value = *(uint16_t *) &sc->memregs[i][offset]; 2479 } else if (size == 4) { 2480 value = *(uint32_t *) &sc->memregs[i][offset]; 2481 } else if (size == 8) { 2482 value = *(uint64_t *) &sc->memregs[i][offset]; 2483 } else { 2484 printf("dior: ior unknown size %d\n", size); 2485 } 2486 } 2487 2488 2489 if (baridx > 2 || baridx < 0) { 2490 printf("dior: unknown bar idx %d\n", baridx); 2491 return (0); 2492 } 2493 2494 return (value); 2495 } 2496 2497 #ifdef BHYVE_SNAPSHOT 2498 int 2499 pci_emul_snapshot(struct vm_snapshot_meta *meta) 2500 { 2501 2502 return (0); 2503 } 2504 #endif 2505 2506 struct pci_devemu pci_dummy = { 2507 .pe_emu = "dummy", 2508 .pe_init = pci_emul_dinit, 2509 .pe_barwrite = pci_emul_diow, 2510 .pe_barread = pci_emul_dior, 2511 #ifdef BHYVE_SNAPSHOT 2512 .pe_snapshot = pci_emul_snapshot, 2513 #endif 2514 }; 2515 PCI_EMUL_SET(pci_dummy); 2516 2517 #endif /* PCI_EMUL_TEST */ 2518