1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 /* 31 * This file and its contents are supplied under the terms of the 32 * Common Development and Distribution License ("CDDL"), version 1.0. 33 * You may only use this file in accordance with the terms of version 34 * 1.0 of the CDDL. 35 * 36 * A full copy of the text of the CDDL should have accompanied this 37 * source. A copy of the CDDL is also available via the Internet at 38 * http://www.illumos.org/license/CDDL. 39 * 40 * Copyright 2014 Pluribus Networks Inc. 41 * Copyright 2018 Joyent, Inc. 42 */ 43 44 #include <sys/cdefs.h> 45 __FBSDID("$FreeBSD$"); 46 47 #include <sys/param.h> 48 #include <sys/linker_set.h> 49 #include <sys/mman.h> 50 51 #include <ctype.h> 52 #include <err.h> 53 #include <errno.h> 54 #include <pthread.h> 55 #include <stdio.h> 56 #include <stdlib.h> 57 #include <string.h> 58 #include <strings.h> 59 #include <assert.h> 60 #include <stdbool.h> 61 #include <sysexits.h> 62 63 #include <machine/vmm.h> 64 #include <vmmapi.h> 65 66 #include "acpi.h" 67 #include "bhyverun.h" 68 #include "config.h" 69 #include "debug.h" 70 #include "inout.h" 71 #include "ioapic.h" 72 #include "mem.h" 73 #include "pci_emul.h" 74 #include "pci_irq.h" 75 #include "pci_lpc.h" 76 77 #define CONF1_ADDR_PORT 0x0cf8 78 #define CONF1_DATA_PORT 0x0cfc 79 80 #define CONF1_ENABLE 0x80000000ul 81 82 #define MAXBUSES (PCI_BUSMAX + 1) 83 #define MAXSLOTS (PCI_SLOTMAX + 1) 84 #define MAXFUNCS (PCI_FUNCMAX + 1) 85 86 #define GB (1024 * 1024 * 1024UL) 87 88 struct funcinfo { 89 nvlist_t *fi_config; 90 struct pci_devemu *fi_pde; 91 struct pci_devinst *fi_devi; 92 }; 93 94 struct intxinfo { 95 int ii_count; 96 int ii_pirq_pin; 97 int ii_ioapic_irq; 98 }; 99 100 struct slotinfo { 101 struct intxinfo si_intpins[4]; 102 struct funcinfo si_funcs[MAXFUNCS]; 103 }; 104 105 struct businfo { 106 uint16_t iobase, iolimit; /* I/O window */ 107 uint32_t membase32, memlimit32; /* mmio window below 4GB */ 108 uint64_t membase64, memlimit64; /* mmio window above 4GB */ 109 struct slotinfo slotinfo[MAXSLOTS]; 110 }; 111 112 static struct businfo *pci_businfo[MAXBUSES]; 113 114 SET_DECLARE(pci_devemu_set, struct pci_devemu); 115 116 static uint64_t pci_emul_iobase; 117 static uint8_t *pci_emul_rombase; 118 static uint64_t pci_emul_romoffset; 119 static uint8_t *pci_emul_romlim; 120 static uint64_t pci_emul_membase32; 121 static uint64_t pci_emul_membase64; 122 static uint64_t pci_emul_memlim64; 123 124 struct pci_bar_allocation { 125 TAILQ_ENTRY(pci_bar_allocation) chain; 126 struct pci_devinst *pdi; 127 int idx; 128 enum pcibar_type type; 129 uint64_t size; 130 }; 131 TAILQ_HEAD(pci_bar_list, pci_bar_allocation) pci_bars = TAILQ_HEAD_INITIALIZER( 132 pci_bars); 133 134 #define PCI_EMUL_IOBASE 0x2000 135 #define PCI_EMUL_IOLIMIT 0x10000 136 137 #define PCI_EMUL_ROMSIZE 0x10000000 138 139 #define PCI_EMUL_ECFG_BASE 0xE0000000 /* 3.5GB */ 140 #define PCI_EMUL_ECFG_SIZE (MAXBUSES * 1024 * 1024) /* 1MB per bus */ 141 SYSRES_MEM(PCI_EMUL_ECFG_BASE, PCI_EMUL_ECFG_SIZE); 142 143 /* 144 * OVMF always uses 0xC0000000 as base address for 32 bit PCI MMIO. Don't 145 * change this address without changing it in OVMF. 146 */ 147 #define PCI_EMUL_MEMBASE32 0xC0000000 148 #define PCI_EMUL_MEMLIMIT32 PCI_EMUL_ECFG_BASE 149 #define PCI_EMUL_MEMSIZE64 (32*GB) 150 151 static struct pci_devemu *pci_emul_finddev(const char *name); 152 static void pci_lintr_route(struct pci_devinst *pi); 153 static void pci_lintr_update(struct pci_devinst *pi); 154 static void pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, 155 int func, int coff, int bytes, uint32_t *val); 156 157 static __inline void 158 CFGWRITE(struct pci_devinst *pi, int coff, uint32_t val, int bytes) 159 { 160 161 if (bytes == 1) 162 pci_set_cfgdata8(pi, coff, val); 163 else if (bytes == 2) 164 pci_set_cfgdata16(pi, coff, val); 165 else 166 pci_set_cfgdata32(pi, coff, val); 167 } 168 169 static __inline uint32_t 170 CFGREAD(struct pci_devinst *pi, int coff, int bytes) 171 { 172 173 if (bytes == 1) 174 return (pci_get_cfgdata8(pi, coff)); 175 else if (bytes == 2) 176 return (pci_get_cfgdata16(pi, coff)); 177 else 178 return (pci_get_cfgdata32(pi, coff)); 179 } 180 181 static int 182 is_pcir_bar(int coff) 183 { 184 return (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)); 185 } 186 187 static int 188 is_pcir_bios(int coff) 189 { 190 return (coff >= PCIR_BIOS && coff < PCIR_BIOS + 4); 191 } 192 193 /* 194 * I/O access 195 */ 196 197 /* 198 * Slot options are in the form: 199 * 200 * <bus>:<slot>:<func>,<emul>[,<config>] 201 * <slot>[:<func>],<emul>[,<config>] 202 * 203 * slot is 0..31 204 * func is 0..7 205 * emul is a string describing the type of PCI device e.g. virtio-net 206 * config is an optional string, depending on the device, that can be 207 * used for configuration. 208 * Examples are: 209 * 1,virtio-net,tap0 210 * 3:0,dummy 211 */ 212 static void 213 pci_parse_slot_usage(char *aopt) 214 { 215 216 EPRINTLN("Invalid PCI slot info field \"%s\"", aopt); 217 } 218 219 /* 220 * Helper function to parse a list of comma-separated options where 221 * each option is formatted as "name[=value]". If no value is 222 * provided, the option is treated as a boolean and is given a value 223 * of true. 224 */ 225 int 226 pci_parse_legacy_config(nvlist_t *nvl, const char *opt) 227 { 228 char *config, *name, *tofree, *value; 229 230 if (opt == NULL) 231 return (0); 232 233 config = tofree = strdup(opt); 234 while ((name = strsep(&config, ",")) != NULL) { 235 value = strchr(name, '='); 236 if (value != NULL) { 237 *value = '\0'; 238 value++; 239 set_config_value_node(nvl, name, value); 240 } else 241 set_config_bool_node(nvl, name, true); 242 } 243 free(tofree); 244 return (0); 245 } 246 247 /* 248 * PCI device configuration is stored in MIBs that encode the device's 249 * location: 250 * 251 * pci.<bus>.<slot>.<func> 252 * 253 * Where "bus", "slot", and "func" are all decimal values without 254 * leading zeroes. Each valid device must have a "device" node which 255 * identifies the driver model of the device. 256 * 257 * Device backends can provide a parser for the "config" string. If 258 * a custom parser is not provided, pci_parse_legacy_config() is used 259 * to parse the string. 260 */ 261 int 262 pci_parse_slot(char *opt) 263 { 264 char node_name[sizeof("pci.XXX.XX.X")]; 265 struct pci_devemu *pde; 266 char *emul, *config, *str, *cp; 267 int error, bnum, snum, fnum; 268 nvlist_t *nvl; 269 270 error = -1; 271 str = strdup(opt); 272 273 emul = config = NULL; 274 if ((cp = strchr(str, ',')) != NULL) { 275 *cp = '\0'; 276 emul = cp + 1; 277 if ((cp = strchr(emul, ',')) != NULL) { 278 *cp = '\0'; 279 config = cp + 1; 280 } 281 } else { 282 pci_parse_slot_usage(opt); 283 goto done; 284 } 285 286 /* <bus>:<slot>:<func> */ 287 if (sscanf(str, "%d:%d:%d", &bnum, &snum, &fnum) != 3) { 288 bnum = 0; 289 /* <slot>:<func> */ 290 if (sscanf(str, "%d:%d", &snum, &fnum) != 2) { 291 fnum = 0; 292 /* <slot> */ 293 if (sscanf(str, "%d", &snum) != 1) { 294 snum = -1; 295 } 296 } 297 } 298 299 if (bnum < 0 || bnum >= MAXBUSES || snum < 0 || snum >= MAXSLOTS || 300 fnum < 0 || fnum >= MAXFUNCS) { 301 pci_parse_slot_usage(opt); 302 goto done; 303 } 304 305 pde = pci_emul_finddev(emul); 306 if (pde == NULL) { 307 EPRINTLN("pci slot %d:%d:%d: unknown device \"%s\"", bnum, snum, 308 fnum, emul); 309 goto done; 310 } 311 312 snprintf(node_name, sizeof(node_name), "pci.%d.%d.%d", bnum, snum, 313 fnum); 314 nvl = find_config_node(node_name); 315 if (nvl != NULL) { 316 EPRINTLN("pci slot %d:%d:%d already occupied!", bnum, snum, 317 fnum); 318 goto done; 319 } 320 nvl = create_config_node(node_name); 321 if (pde->pe_alias != NULL) 322 set_config_value_node(nvl, "device", pde->pe_alias); 323 else 324 set_config_value_node(nvl, "device", pde->pe_emu); 325 326 if (pde->pe_legacy_config != NULL) 327 error = pde->pe_legacy_config(nvl, config); 328 else 329 error = pci_parse_legacy_config(nvl, config); 330 done: 331 free(str); 332 return (error); 333 } 334 335 void 336 pci_print_supported_devices(void) 337 { 338 struct pci_devemu **pdpp, *pdp; 339 340 SET_FOREACH(pdpp, pci_devemu_set) { 341 pdp = *pdpp; 342 printf("%s\n", pdp->pe_emu); 343 } 344 } 345 346 static int 347 pci_valid_pba_offset(struct pci_devinst *pi, uint64_t offset) 348 { 349 350 if (offset < pi->pi_msix.pba_offset) 351 return (0); 352 353 if (offset >= pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { 354 return (0); 355 } 356 357 return (1); 358 } 359 360 int 361 pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size, 362 uint64_t value) 363 { 364 int msix_entry_offset; 365 int tab_index; 366 char *dest; 367 368 /* support only 4 or 8 byte writes */ 369 if (size != 4 && size != 8) 370 return (-1); 371 372 /* 373 * Return if table index is beyond what device supports 374 */ 375 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 376 if (tab_index >= pi->pi_msix.table_count) 377 return (-1); 378 379 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 380 381 /* support only aligned writes */ 382 if ((msix_entry_offset % size) != 0) 383 return (-1); 384 385 dest = (char *)(pi->pi_msix.table + tab_index); 386 dest += msix_entry_offset; 387 388 if (size == 4) 389 *((uint32_t *)dest) = value; 390 else 391 *((uint64_t *)dest) = value; 392 393 return (0); 394 } 395 396 uint64_t 397 pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size) 398 { 399 char *dest; 400 int msix_entry_offset; 401 int tab_index; 402 uint64_t retval = ~0; 403 404 /* 405 * The PCI standard only allows 4 and 8 byte accesses to the MSI-X 406 * table but we also allow 1 byte access to accommodate reads from 407 * ddb. 408 */ 409 if (size != 1 && size != 4 && size != 8) 410 return (retval); 411 412 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 413 414 /* support only aligned reads */ 415 if ((msix_entry_offset % size) != 0) { 416 return (retval); 417 } 418 419 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 420 421 if (tab_index < pi->pi_msix.table_count) { 422 /* valid MSI-X Table access */ 423 dest = (char *)(pi->pi_msix.table + tab_index); 424 dest += msix_entry_offset; 425 426 if (size == 1) 427 retval = *((uint8_t *)dest); 428 else if (size == 4) 429 retval = *((uint32_t *)dest); 430 else 431 retval = *((uint64_t *)dest); 432 } else if (pci_valid_pba_offset(pi, offset)) { 433 /* return 0 for PBA access */ 434 retval = 0; 435 } 436 437 return (retval); 438 } 439 440 int 441 pci_msix_table_bar(struct pci_devinst *pi) 442 { 443 444 if (pi->pi_msix.table != NULL) 445 return (pi->pi_msix.table_bar); 446 else 447 return (-1); 448 } 449 450 int 451 pci_msix_pba_bar(struct pci_devinst *pi) 452 { 453 454 if (pi->pi_msix.table != NULL) 455 return (pi->pi_msix.pba_bar); 456 else 457 return (-1); 458 } 459 460 static int 461 pci_emul_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 462 uint32_t *eax, void *arg) 463 { 464 struct pci_devinst *pdi = arg; 465 struct pci_devemu *pe = pdi->pi_d; 466 uint64_t offset; 467 int i; 468 469 for (i = 0; i <= PCI_BARMAX; i++) { 470 if (pdi->pi_bar[i].type == PCIBAR_IO && 471 port >= pdi->pi_bar[i].addr && 472 port + bytes <= pdi->pi_bar[i].addr + pdi->pi_bar[i].size) { 473 offset = port - pdi->pi_bar[i].addr; 474 if (in) 475 *eax = (*pe->pe_barread)(ctx, vcpu, pdi, i, 476 offset, bytes); 477 else 478 (*pe->pe_barwrite)(ctx, vcpu, pdi, i, offset, 479 bytes, *eax); 480 return (0); 481 } 482 } 483 return (-1); 484 } 485 486 static int 487 pci_emul_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 488 int size, uint64_t *val, void *arg1, long arg2) 489 { 490 struct pci_devinst *pdi = arg1; 491 struct pci_devemu *pe = pdi->pi_d; 492 uint64_t offset; 493 int bidx = (int) arg2; 494 495 assert(bidx <= PCI_BARMAX); 496 assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 || 497 pdi->pi_bar[bidx].type == PCIBAR_MEM64); 498 assert(addr >= pdi->pi_bar[bidx].addr && 499 addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size); 500 501 offset = addr - pdi->pi_bar[bidx].addr; 502 503 if (dir == MEM_F_WRITE) { 504 if (size == 8) { 505 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, 506 4, *val & 0xffffffff); 507 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset + 4, 508 4, *val >> 32); 509 } else { 510 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, 511 size, *val); 512 } 513 } else { 514 if (size == 8) { 515 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 516 offset, 4); 517 *val |= (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 518 offset + 4, 4) << 32; 519 } else { 520 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 521 offset, size); 522 } 523 } 524 525 return (0); 526 } 527 528 529 static int 530 pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size, 531 uint64_t *addr) 532 { 533 uint64_t base; 534 535 assert((size & (size - 1)) == 0); /* must be a power of 2 */ 536 537 base = roundup2(*baseptr, size); 538 539 if (base + size <= limit) { 540 *addr = base; 541 *baseptr = base + size; 542 return (0); 543 } else 544 return (-1); 545 } 546 547 /* 548 * Register (or unregister) the MMIO or I/O region associated with the BAR 549 * register 'idx' of an emulated pci device. 550 */ 551 static void 552 modify_bar_registration(struct pci_devinst *pi, int idx, int registration) 553 { 554 struct pci_devemu *pe; 555 int error; 556 struct inout_port iop; 557 struct mem_range mr; 558 559 pe = pi->pi_d; 560 switch (pi->pi_bar[idx].type) { 561 case PCIBAR_IO: 562 bzero(&iop, sizeof(struct inout_port)); 563 iop.name = pi->pi_name; 564 iop.port = pi->pi_bar[idx].addr; 565 iop.size = pi->pi_bar[idx].size; 566 if (registration) { 567 iop.flags = IOPORT_F_INOUT; 568 iop.handler = pci_emul_io_handler; 569 iop.arg = pi; 570 error = register_inout(&iop); 571 } else 572 error = unregister_inout(&iop); 573 if (pe->pe_baraddr != NULL) 574 (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration, 575 pi->pi_bar[idx].addr); 576 break; 577 case PCIBAR_MEM32: 578 case PCIBAR_MEM64: 579 bzero(&mr, sizeof(struct mem_range)); 580 mr.name = pi->pi_name; 581 mr.base = pi->pi_bar[idx].addr; 582 mr.size = pi->pi_bar[idx].size; 583 if (registration) { 584 mr.flags = MEM_F_RW; 585 mr.handler = pci_emul_mem_handler; 586 mr.arg1 = pi; 587 mr.arg2 = idx; 588 error = register_mem(&mr); 589 } else 590 error = unregister_mem(&mr); 591 if (pe->pe_baraddr != NULL) 592 (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration, 593 pi->pi_bar[idx].addr); 594 break; 595 case PCIBAR_ROM: 596 error = 0; 597 if (pe->pe_baraddr != NULL) 598 (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration, 599 pi->pi_bar[idx].addr); 600 break; 601 default: 602 error = EINVAL; 603 break; 604 } 605 assert(error == 0); 606 } 607 608 static void 609 unregister_bar(struct pci_devinst *pi, int idx) 610 { 611 612 modify_bar_registration(pi, idx, 0); 613 } 614 615 static void 616 register_bar(struct pci_devinst *pi, int idx) 617 { 618 619 modify_bar_registration(pi, idx, 1); 620 } 621 622 /* Is the ROM enabled for the emulated pci device? */ 623 static int 624 romen(struct pci_devinst *pi) 625 { 626 return (pi->pi_bar[PCI_ROM_IDX].lobits & PCIM_BIOS_ENABLE) == 627 PCIM_BIOS_ENABLE; 628 } 629 630 /* Are we decoding i/o port accesses for the emulated pci device? */ 631 static int 632 porten(struct pci_devinst *pi) 633 { 634 uint16_t cmd; 635 636 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 637 638 return (cmd & PCIM_CMD_PORTEN); 639 } 640 641 /* Are we decoding memory accesses for the emulated pci device? */ 642 static int 643 memen(struct pci_devinst *pi) 644 { 645 uint16_t cmd; 646 647 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 648 649 return (cmd & PCIM_CMD_MEMEN); 650 } 651 652 /* 653 * Update the MMIO or I/O address that is decoded by the BAR register. 654 * 655 * If the pci device has enabled the address space decoding then intercept 656 * the address range decoded by the BAR register. 657 */ 658 static void 659 update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type) 660 { 661 int decode; 662 663 if (pi->pi_bar[idx].type == PCIBAR_IO) 664 decode = porten(pi); 665 else 666 decode = memen(pi); 667 668 if (decode) 669 unregister_bar(pi, idx); 670 671 switch (type) { 672 case PCIBAR_IO: 673 case PCIBAR_MEM32: 674 pi->pi_bar[idx].addr = addr; 675 break; 676 case PCIBAR_MEM64: 677 pi->pi_bar[idx].addr &= ~0xffffffffUL; 678 pi->pi_bar[idx].addr |= addr; 679 break; 680 case PCIBAR_MEMHI64: 681 pi->pi_bar[idx].addr &= 0xffffffff; 682 pi->pi_bar[idx].addr |= addr; 683 break; 684 default: 685 assert(0); 686 } 687 688 if (decode) 689 register_bar(pi, idx); 690 } 691 692 int 693 pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, 694 uint64_t size) 695 { 696 assert((type == PCIBAR_ROM) || (idx >= 0 && idx <= PCI_BARMAX)); 697 assert((type != PCIBAR_ROM) || (idx == PCI_ROM_IDX)); 698 699 if ((size & (size - 1)) != 0) 700 size = 1UL << flsl(size); /* round up to a power of 2 */ 701 702 /* Enforce minimum BAR sizes required by the PCI standard */ 703 if (type == PCIBAR_IO) { 704 if (size < 4) 705 size = 4; 706 } else if (type == PCIBAR_ROM) { 707 if (size < ~PCIM_BIOS_ADDR_MASK + 1) 708 size = ~PCIM_BIOS_ADDR_MASK + 1; 709 } else { 710 if (size < 16) 711 size = 16; 712 } 713 714 /* 715 * To reduce fragmentation of the MMIO space, we allocate the BARs by 716 * size. Therefore, don't allocate the BAR yet. We create a list of all 717 * BAR allocation which is sorted by BAR size. When all PCI devices are 718 * initialized, we will assign an address to the BARs. 719 */ 720 721 /* create a new list entry */ 722 struct pci_bar_allocation *const new_bar = malloc(sizeof(*new_bar)); 723 memset(new_bar, 0, sizeof(*new_bar)); 724 new_bar->pdi = pdi; 725 new_bar->idx = idx; 726 new_bar->type = type; 727 new_bar->size = size; 728 729 /* 730 * Search for a BAR which size is lower than the size of our newly 731 * allocated BAR. 732 */ 733 struct pci_bar_allocation *bar = NULL; 734 TAILQ_FOREACH(bar, &pci_bars, chain) { 735 if (bar->size < size) { 736 break; 737 } 738 } 739 740 if (bar == NULL) { 741 /* 742 * Either the list is empty or new BAR is the smallest BAR of 743 * the list. Append it to the end of our list. 744 */ 745 TAILQ_INSERT_TAIL(&pci_bars, new_bar, chain); 746 } else { 747 /* 748 * The found BAR is smaller than our new BAR. For that reason, 749 * insert our new BAR before the found BAR. 750 */ 751 TAILQ_INSERT_BEFORE(bar, new_bar, chain); 752 } 753 754 /* 755 * pci_passthru devices synchronize their physical and virtual command 756 * register on init. For that reason, the virtual cmd reg should be 757 * updated as early as possible. 758 */ 759 uint16_t enbit = 0; 760 switch (type) { 761 case PCIBAR_IO: 762 enbit = PCIM_CMD_PORTEN; 763 break; 764 case PCIBAR_MEM64: 765 case PCIBAR_MEM32: 766 enbit = PCIM_CMD_MEMEN; 767 break; 768 default: 769 enbit = 0; 770 break; 771 } 772 773 const uint16_t cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND); 774 pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit); 775 776 return (0); 777 } 778 779 static int 780 pci_emul_assign_bar(struct pci_devinst *const pdi, const int idx, 781 const enum pcibar_type type, const uint64_t size) 782 { 783 int error; 784 uint64_t *baseptr, limit, addr, mask, lobits, bar; 785 786 switch (type) { 787 case PCIBAR_NONE: 788 baseptr = NULL; 789 addr = mask = lobits = 0; 790 break; 791 case PCIBAR_IO: 792 baseptr = &pci_emul_iobase; 793 limit = PCI_EMUL_IOLIMIT; 794 mask = PCIM_BAR_IO_BASE; 795 lobits = PCIM_BAR_IO_SPACE; 796 break; 797 case PCIBAR_MEM64: 798 /* 799 * XXX 800 * Some drivers do not work well if the 64-bit BAR is allocated 801 * above 4GB. Allow for this by allocating small requests under 802 * 4GB unless then allocation size is larger than some arbitrary 803 * number (128MB currently). 804 */ 805 if (size > 128 * 1024 * 1024) { 806 baseptr = &pci_emul_membase64; 807 limit = pci_emul_memlim64; 808 mask = PCIM_BAR_MEM_BASE; 809 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | 810 PCIM_BAR_MEM_PREFETCH; 811 } else { 812 baseptr = &pci_emul_membase32; 813 limit = PCI_EMUL_MEMLIMIT32; 814 mask = PCIM_BAR_MEM_BASE; 815 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64; 816 } 817 break; 818 case PCIBAR_MEM32: 819 baseptr = &pci_emul_membase32; 820 limit = PCI_EMUL_MEMLIMIT32; 821 mask = PCIM_BAR_MEM_BASE; 822 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; 823 break; 824 case PCIBAR_ROM: 825 /* do not claim memory for ROM. OVMF will do it for us. */ 826 baseptr = NULL; 827 limit = 0; 828 mask = PCIM_BIOS_ADDR_MASK; 829 lobits = 0; 830 break; 831 default: 832 printf("pci_emul_alloc_base: invalid bar type %d\n", type); 833 #ifdef FreeBSD 834 assert(0); 835 #else 836 abort(); 837 #endif 838 } 839 840 if (baseptr != NULL) { 841 error = pci_emul_alloc_resource(baseptr, limit, size, &addr); 842 if (error != 0) 843 return (error); 844 } else { 845 addr = 0; 846 } 847 848 pdi->pi_bar[idx].type = type; 849 pdi->pi_bar[idx].addr = addr; 850 pdi->pi_bar[idx].size = size; 851 /* 852 * passthru devices are using same lobits as physical device they set 853 * this property 854 */ 855 if (pdi->pi_bar[idx].lobits != 0) { 856 lobits = pdi->pi_bar[idx].lobits; 857 } else { 858 pdi->pi_bar[idx].lobits = lobits; 859 } 860 861 /* Initialize the BAR register in config space */ 862 bar = (addr & mask) | lobits; 863 pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar); 864 865 if (type == PCIBAR_MEM64) { 866 assert(idx + 1 <= PCI_BARMAX); 867 pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64; 868 pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32); 869 } 870 871 if (type != PCIBAR_ROM) { 872 register_bar(pdi, idx); 873 } 874 875 return (0); 876 } 877 878 int 879 pci_emul_alloc_rom(struct pci_devinst *const pdi, const uint64_t size, 880 void **const addr) 881 { 882 /* allocate ROM space once on first call */ 883 if (pci_emul_rombase == 0) { 884 pci_emul_rombase = vm_create_devmem(pdi->pi_vmctx, VM_PCIROM, 885 "pcirom", PCI_EMUL_ROMSIZE); 886 if (pci_emul_rombase == MAP_FAILED) { 887 warnx("%s: failed to create rom segment", __func__); 888 return (-1); 889 } 890 pci_emul_romlim = pci_emul_rombase + PCI_EMUL_ROMSIZE; 891 pci_emul_romoffset = 0; 892 } 893 894 /* ROM size should be a power of 2 and greater than 2 KB */ 895 const uint64_t rom_size = MAX(1UL << flsl(size), 896 ~PCIM_BIOS_ADDR_MASK + 1); 897 898 /* check if ROM fits into ROM space */ 899 if (pci_emul_romoffset + rom_size > PCI_EMUL_ROMSIZE) { 900 warnx("%s: no space left in rom segment:", __func__); 901 warnx("%16lu bytes left", 902 PCI_EMUL_ROMSIZE - pci_emul_romoffset); 903 warnx("%16lu bytes required by %d/%d/%d", rom_size, pdi->pi_bus, 904 pdi->pi_slot, pdi->pi_func); 905 return (-1); 906 } 907 908 /* allocate ROM BAR */ 909 const int error = pci_emul_alloc_bar(pdi, PCI_ROM_IDX, PCIBAR_ROM, 910 rom_size); 911 if (error) 912 return error; 913 914 /* return address */ 915 *addr = pci_emul_rombase + pci_emul_romoffset; 916 917 /* save offset into ROM Space */ 918 pdi->pi_romoffset = pci_emul_romoffset; 919 920 /* increase offset for next ROM */ 921 pci_emul_romoffset += rom_size; 922 923 return (0); 924 } 925 926 #define CAP_START_OFFSET 0x40 927 static int 928 pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen) 929 { 930 int i, capoff, reallen; 931 uint16_t sts; 932 933 assert(caplen > 0); 934 935 reallen = roundup2(caplen, 4); /* dword aligned */ 936 937 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 938 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) 939 capoff = CAP_START_OFFSET; 940 else 941 capoff = pi->pi_capend + 1; 942 943 /* Check if we have enough space */ 944 if (capoff + reallen > PCI_REGMAX + 1) 945 return (-1); 946 947 /* Set the previous capability pointer */ 948 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) { 949 pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff); 950 pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT); 951 } else 952 pci_set_cfgdata8(pi, pi->pi_prevcap + 1, capoff); 953 954 /* Copy the capability */ 955 for (i = 0; i < caplen; i++) 956 pci_set_cfgdata8(pi, capoff + i, capdata[i]); 957 958 /* Set the next capability pointer */ 959 pci_set_cfgdata8(pi, capoff + 1, 0); 960 961 pi->pi_prevcap = capoff; 962 pi->pi_capend = capoff + reallen - 1; 963 return (0); 964 } 965 966 static struct pci_devemu * 967 pci_emul_finddev(const char *name) 968 { 969 struct pci_devemu **pdpp, *pdp; 970 971 SET_FOREACH(pdpp, pci_devemu_set) { 972 pdp = *pdpp; 973 if (!strcmp(pdp->pe_emu, name)) { 974 return (pdp); 975 } 976 } 977 978 return (NULL); 979 } 980 981 static int 982 pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot, 983 int func, struct funcinfo *fi) 984 { 985 struct pci_devinst *pdi; 986 int err; 987 988 pdi = calloc(1, sizeof(struct pci_devinst)); 989 990 pdi->pi_vmctx = ctx; 991 pdi->pi_bus = bus; 992 pdi->pi_slot = slot; 993 pdi->pi_func = func; 994 pthread_mutex_init(&pdi->pi_lintr.lock, NULL); 995 pdi->pi_lintr.pin = 0; 996 pdi->pi_lintr.state = IDLE; 997 pdi->pi_lintr.pirq_pin = 0; 998 pdi->pi_lintr.ioapic_irq = 0; 999 pdi->pi_d = pde; 1000 snprintf(pdi->pi_name, PI_NAMESZ, "%s-pci-%d", pde->pe_emu, slot); 1001 1002 /* Disable legacy interrupts */ 1003 pci_set_cfgdata8(pdi, PCIR_INTLINE, 255); 1004 pci_set_cfgdata8(pdi, PCIR_INTPIN, 0); 1005 1006 pci_set_cfgdata8(pdi, PCIR_COMMAND, PCIM_CMD_BUSMASTEREN); 1007 1008 err = (*pde->pe_init)(ctx, pdi, fi->fi_config); 1009 if (err == 0) 1010 fi->fi_devi = pdi; 1011 else 1012 free(pdi); 1013 1014 return (err); 1015 } 1016 1017 void 1018 pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr) 1019 { 1020 int mmc; 1021 1022 /* Number of msi messages must be a power of 2 between 1 and 32 */ 1023 assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32); 1024 mmc = ffs(msgnum) - 1; 1025 1026 bzero(msicap, sizeof(struct msicap)); 1027 msicap->capid = PCIY_MSI; 1028 msicap->nextptr = nextptr; 1029 msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1); 1030 } 1031 1032 int 1033 pci_emul_add_msicap(struct pci_devinst *pi, int msgnum) 1034 { 1035 struct msicap msicap; 1036 1037 pci_populate_msicap(&msicap, msgnum, 0); 1038 1039 return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap))); 1040 } 1041 1042 static void 1043 pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum, 1044 uint32_t msix_tab_size) 1045 { 1046 1047 assert(msix_tab_size % 4096 == 0); 1048 1049 bzero(msixcap, sizeof(struct msixcap)); 1050 msixcap->capid = PCIY_MSIX; 1051 1052 /* 1053 * Message Control Register, all fields set to 1054 * zero except for the Table Size. 1055 * Note: Table size N is encoded as N-1 1056 */ 1057 msixcap->msgctrl = msgnum - 1; 1058 1059 /* 1060 * MSI-X BAR setup: 1061 * - MSI-X table start at offset 0 1062 * - PBA table starts at a 4K aligned offset after the MSI-X table 1063 */ 1064 msixcap->table_info = barnum & PCIM_MSIX_BIR_MASK; 1065 msixcap->pba_info = msix_tab_size | (barnum & PCIM_MSIX_BIR_MASK); 1066 } 1067 1068 static void 1069 pci_msix_table_init(struct pci_devinst *pi, int table_entries) 1070 { 1071 int i, table_size; 1072 1073 assert(table_entries > 0); 1074 assert(table_entries <= MAX_MSIX_TABLE_ENTRIES); 1075 1076 table_size = table_entries * MSIX_TABLE_ENTRY_SIZE; 1077 pi->pi_msix.table = calloc(1, table_size); 1078 1079 /* set mask bit of vector control register */ 1080 for (i = 0; i < table_entries; i++) 1081 pi->pi_msix.table[i].vector_control |= PCIM_MSIX_VCTRL_MASK; 1082 } 1083 1084 int 1085 pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum) 1086 { 1087 uint32_t tab_size; 1088 struct msixcap msixcap; 1089 1090 assert(msgnum >= 1 && msgnum <= MAX_MSIX_TABLE_ENTRIES); 1091 assert(barnum >= 0 && barnum <= PCIR_MAX_BAR_0); 1092 1093 tab_size = msgnum * MSIX_TABLE_ENTRY_SIZE; 1094 1095 /* Align table size to nearest 4K */ 1096 tab_size = roundup2(tab_size, 4096); 1097 1098 pi->pi_msix.table_bar = barnum; 1099 pi->pi_msix.pba_bar = barnum; 1100 pi->pi_msix.table_offset = 0; 1101 pi->pi_msix.table_count = msgnum; 1102 pi->pi_msix.pba_offset = tab_size; 1103 pi->pi_msix.pba_size = PBA_SIZE(msgnum); 1104 1105 pci_msix_table_init(pi, msgnum); 1106 1107 pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size); 1108 1109 /* allocate memory for MSI-X Table and PBA */ 1110 pci_emul_alloc_bar(pi, barnum, PCIBAR_MEM32, 1111 tab_size + pi->pi_msix.pba_size); 1112 1113 return (pci_emul_add_capability(pi, (u_char *)&msixcap, 1114 sizeof(msixcap))); 1115 } 1116 1117 static void 1118 msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 1119 int bytes, uint32_t val) 1120 { 1121 uint16_t msgctrl, rwmask; 1122 int off; 1123 1124 off = offset - capoff; 1125 /* Message Control Register */ 1126 if (off == 2 && bytes == 2) { 1127 rwmask = PCIM_MSIXCTRL_MSIX_ENABLE | PCIM_MSIXCTRL_FUNCTION_MASK; 1128 msgctrl = pci_get_cfgdata16(pi, offset); 1129 msgctrl &= ~rwmask; 1130 msgctrl |= val & rwmask; 1131 val = msgctrl; 1132 1133 pi->pi_msix.enabled = val & PCIM_MSIXCTRL_MSIX_ENABLE; 1134 pi->pi_msix.function_mask = val & PCIM_MSIXCTRL_FUNCTION_MASK; 1135 pci_lintr_update(pi); 1136 } 1137 1138 CFGWRITE(pi, offset, val, bytes); 1139 } 1140 1141 static void 1142 msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 1143 int bytes, uint32_t val) 1144 { 1145 uint16_t msgctrl, rwmask, msgdata, mme; 1146 uint32_t addrlo; 1147 1148 /* 1149 * If guest is writing to the message control register make sure 1150 * we do not overwrite read-only fields. 1151 */ 1152 if ((offset - capoff) == 2 && bytes == 2) { 1153 rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE; 1154 msgctrl = pci_get_cfgdata16(pi, offset); 1155 msgctrl &= ~rwmask; 1156 msgctrl |= val & rwmask; 1157 val = msgctrl; 1158 } 1159 CFGWRITE(pi, offset, val, bytes); 1160 1161 msgctrl = pci_get_cfgdata16(pi, capoff + 2); 1162 addrlo = pci_get_cfgdata32(pi, capoff + 4); 1163 if (msgctrl & PCIM_MSICTRL_64BIT) 1164 msgdata = pci_get_cfgdata16(pi, capoff + 12); 1165 else 1166 msgdata = pci_get_cfgdata16(pi, capoff + 8); 1167 1168 mme = msgctrl & PCIM_MSICTRL_MME_MASK; 1169 pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0; 1170 if (pi->pi_msi.enabled) { 1171 pi->pi_msi.addr = addrlo; 1172 pi->pi_msi.msg_data = msgdata; 1173 pi->pi_msi.maxmsgnum = 1 << (mme >> 4); 1174 } else { 1175 pi->pi_msi.maxmsgnum = 0; 1176 } 1177 pci_lintr_update(pi); 1178 } 1179 1180 void 1181 pciecap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 1182 int bytes, uint32_t val) 1183 { 1184 1185 /* XXX don't write to the readonly parts */ 1186 CFGWRITE(pi, offset, val, bytes); 1187 } 1188 1189 #define PCIECAP_VERSION 0x2 1190 int 1191 pci_emul_add_pciecap(struct pci_devinst *pi, int type) 1192 { 1193 int err; 1194 struct pciecap pciecap; 1195 1196 bzero(&pciecap, sizeof(pciecap)); 1197 1198 /* 1199 * Use the integrated endpoint type for endpoints on a root complex bus. 1200 * 1201 * NB: bhyve currently only supports a single PCI bus that is the root 1202 * complex bus, so all endpoints are integrated. 1203 */ 1204 if ((type == PCIEM_TYPE_ENDPOINT) && (pi->pi_bus == 0)) 1205 type = PCIEM_TYPE_ROOT_INT_EP; 1206 1207 pciecap.capid = PCIY_EXPRESS; 1208 pciecap.pcie_capabilities = PCIECAP_VERSION | type; 1209 if (type != PCIEM_TYPE_ROOT_INT_EP) { 1210 pciecap.link_capabilities = 0x411; /* gen1, x1 */ 1211 pciecap.link_status = 0x11; /* gen1, x1 */ 1212 } 1213 1214 err = pci_emul_add_capability(pi, (u_char *)&pciecap, sizeof(pciecap)); 1215 return (err); 1216 } 1217 1218 /* 1219 * This function assumes that 'coff' is in the capabilities region of the 1220 * config space. A capoff parameter of zero will force a search for the 1221 * offset and type. 1222 */ 1223 void 1224 pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val, 1225 uint8_t capoff, int capid) 1226 { 1227 uint8_t nextoff; 1228 1229 /* Do not allow un-aligned writes */ 1230 if ((offset & (bytes - 1)) != 0) 1231 return; 1232 1233 if (capoff == 0) { 1234 /* Find the capability that we want to update */ 1235 capoff = CAP_START_OFFSET; 1236 while (1) { 1237 nextoff = pci_get_cfgdata8(pi, capoff + 1); 1238 if (nextoff == 0) 1239 break; 1240 if (offset >= capoff && offset < nextoff) 1241 break; 1242 1243 capoff = nextoff; 1244 } 1245 assert(offset >= capoff); 1246 capid = pci_get_cfgdata8(pi, capoff); 1247 } 1248 1249 /* 1250 * Capability ID and Next Capability Pointer are readonly. 1251 * However, some o/s's do 4-byte writes that include these. 1252 * For this case, trim the write back to 2 bytes and adjust 1253 * the data. 1254 */ 1255 if (offset == capoff || offset == capoff + 1) { 1256 if (offset == capoff && bytes == 4) { 1257 bytes = 2; 1258 offset += 2; 1259 val >>= 16; 1260 } else 1261 return; 1262 } 1263 1264 switch (capid) { 1265 case PCIY_MSI: 1266 msicap_cfgwrite(pi, capoff, offset, bytes, val); 1267 break; 1268 case PCIY_MSIX: 1269 msixcap_cfgwrite(pi, capoff, offset, bytes, val); 1270 break; 1271 case PCIY_EXPRESS: 1272 pciecap_cfgwrite(pi, capoff, offset, bytes, val); 1273 break; 1274 default: 1275 break; 1276 } 1277 } 1278 1279 static int 1280 pci_emul_iscap(struct pci_devinst *pi, int offset) 1281 { 1282 uint16_t sts; 1283 1284 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 1285 if ((sts & PCIM_STATUS_CAPPRESENT) != 0) { 1286 if (offset >= CAP_START_OFFSET && offset <= pi->pi_capend) 1287 return (1); 1288 } 1289 return (0); 1290 } 1291 1292 static int 1293 pci_emul_fallback_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 1294 int size, uint64_t *val, void *arg1, long arg2) 1295 { 1296 /* 1297 * Ignore writes; return 0xff's for reads. The mem read code 1298 * will take care of truncating to the correct size. 1299 */ 1300 if (dir == MEM_F_READ) { 1301 *val = 0xffffffffffffffff; 1302 } 1303 1304 return (0); 1305 } 1306 1307 static int 1308 pci_emul_ecfg_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 1309 int bytes, uint64_t *val, void *arg1, long arg2) 1310 { 1311 int bus, slot, func, coff, in; 1312 1313 coff = addr & 0xfff; 1314 func = (addr >> 12) & 0x7; 1315 slot = (addr >> 15) & 0x1f; 1316 bus = (addr >> 20) & 0xff; 1317 in = (dir == MEM_F_READ); 1318 if (in) 1319 *val = ~0UL; 1320 pci_cfgrw(ctx, vcpu, in, bus, slot, func, coff, bytes, (uint32_t *)val); 1321 return (0); 1322 } 1323 1324 uint64_t 1325 pci_ecfg_base(void) 1326 { 1327 1328 return (PCI_EMUL_ECFG_BASE); 1329 } 1330 1331 #define BUSIO_ROUNDUP 32 1332 #define BUSMEM32_ROUNDUP (1024 * 1024) 1333 #define BUSMEM64_ROUNDUP (512 * 1024 * 1024) 1334 1335 int 1336 init_pci(struct vmctx *ctx) 1337 { 1338 char node_name[sizeof("pci.XXX.XX.X")]; 1339 struct mem_range mr; 1340 struct pci_devemu *pde; 1341 struct businfo *bi; 1342 struct slotinfo *si; 1343 struct funcinfo *fi; 1344 nvlist_t *nvl; 1345 const char *emul; 1346 size_t lowmem; 1347 int bus, slot, func; 1348 int error; 1349 1350 if (vm_get_lowmem_limit(ctx) > PCI_EMUL_MEMBASE32) 1351 errx(EX_OSERR, "Invalid lowmem limit"); 1352 1353 pci_emul_iobase = PCI_EMUL_IOBASE; 1354 pci_emul_membase32 = PCI_EMUL_MEMBASE32; 1355 1356 pci_emul_membase64 = 4*GB + vm_get_highmem_size(ctx); 1357 pci_emul_membase64 = roundup2(pci_emul_membase64, PCI_EMUL_MEMSIZE64); 1358 pci_emul_memlim64 = pci_emul_membase64 + PCI_EMUL_MEMSIZE64; 1359 1360 for (bus = 0; bus < MAXBUSES; bus++) { 1361 snprintf(node_name, sizeof(node_name), "pci.%d", bus); 1362 nvl = find_config_node(node_name); 1363 if (nvl == NULL) 1364 continue; 1365 pci_businfo[bus] = calloc(1, sizeof(struct businfo)); 1366 bi = pci_businfo[bus]; 1367 1368 /* 1369 * Keep track of the i/o and memory resources allocated to 1370 * this bus. 1371 */ 1372 bi->iobase = pci_emul_iobase; 1373 bi->membase32 = pci_emul_membase32; 1374 bi->membase64 = pci_emul_membase64; 1375 1376 /* first run: init devices */ 1377 for (slot = 0; slot < MAXSLOTS; slot++) { 1378 si = &bi->slotinfo[slot]; 1379 for (func = 0; func < MAXFUNCS; func++) { 1380 fi = &si->si_funcs[func]; 1381 snprintf(node_name, sizeof(node_name), 1382 "pci.%d.%d.%d", bus, slot, func); 1383 nvl = find_config_node(node_name); 1384 if (nvl == NULL) 1385 continue; 1386 1387 fi->fi_config = nvl; 1388 emul = get_config_value_node(nvl, "device"); 1389 if (emul == NULL) { 1390 EPRINTLN("pci slot %d:%d:%d: missing " 1391 "\"device\" value", bus, slot, func); 1392 return (EINVAL); 1393 } 1394 pde = pci_emul_finddev(emul); 1395 if (pde == NULL) { 1396 EPRINTLN("pci slot %d:%d:%d: unknown " 1397 "device \"%s\"", bus, slot, func, 1398 emul); 1399 return (EINVAL); 1400 } 1401 if (pde->pe_alias != NULL) { 1402 EPRINTLN("pci slot %d:%d:%d: legacy " 1403 "device \"%s\", use \"%s\" instead", 1404 bus, slot, func, emul, 1405 pde->pe_alias); 1406 return (EINVAL); 1407 } 1408 fi->fi_pde = pde; 1409 error = pci_emul_init(ctx, pde, bus, slot, 1410 func, fi); 1411 if (error) 1412 return (error); 1413 } 1414 } 1415 1416 /* second run: assign BARs and free list */ 1417 struct pci_bar_allocation *bar; 1418 struct pci_bar_allocation *bar_tmp; 1419 TAILQ_FOREACH_SAFE(bar, &pci_bars, chain, bar_tmp) { 1420 pci_emul_assign_bar(bar->pdi, bar->idx, bar->type, 1421 bar->size); 1422 free(bar); 1423 } 1424 TAILQ_INIT(&pci_bars); 1425 1426 /* 1427 * Add some slop to the I/O and memory resources decoded by 1428 * this bus to give a guest some flexibility if it wants to 1429 * reprogram the BARs. 1430 */ 1431 pci_emul_iobase += BUSIO_ROUNDUP; 1432 pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP); 1433 bi->iolimit = pci_emul_iobase; 1434 1435 pci_emul_membase32 += BUSMEM32_ROUNDUP; 1436 pci_emul_membase32 = roundup2(pci_emul_membase32, 1437 BUSMEM32_ROUNDUP); 1438 bi->memlimit32 = pci_emul_membase32; 1439 1440 pci_emul_membase64 += BUSMEM64_ROUNDUP; 1441 pci_emul_membase64 = roundup2(pci_emul_membase64, 1442 BUSMEM64_ROUNDUP); 1443 bi->memlimit64 = pci_emul_membase64; 1444 } 1445 1446 /* 1447 * PCI backends are initialized before routing INTx interrupts 1448 * so that LPC devices are able to reserve ISA IRQs before 1449 * routing PIRQ pins. 1450 */ 1451 for (bus = 0; bus < MAXBUSES; bus++) { 1452 if ((bi = pci_businfo[bus]) == NULL) 1453 continue; 1454 1455 for (slot = 0; slot < MAXSLOTS; slot++) { 1456 si = &bi->slotinfo[slot]; 1457 for (func = 0; func < MAXFUNCS; func++) { 1458 fi = &si->si_funcs[func]; 1459 if (fi->fi_devi == NULL) 1460 continue; 1461 pci_lintr_route(fi->fi_devi); 1462 } 1463 } 1464 } 1465 lpc_pirq_routed(); 1466 1467 /* 1468 * The guest physical memory map looks like the following: 1469 * [0, lowmem) guest system memory 1470 * [lowmem, 0xC0000000) memory hole (may be absent) 1471 * [0xC0000000, 0xE0000000) PCI hole (32-bit BAR allocation) 1472 * [0xE0000000, 0xF0000000) PCI extended config window 1473 * [0xF0000000, 4GB) LAPIC, IOAPIC, HPET, firmware 1474 * [4GB, 4GB + highmem) 1475 */ 1476 1477 /* 1478 * Accesses to memory addresses that are not allocated to system 1479 * memory or PCI devices return 0xff's. 1480 */ 1481 lowmem = vm_get_lowmem_size(ctx); 1482 bzero(&mr, sizeof(struct mem_range)); 1483 mr.name = "PCI hole"; 1484 mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; 1485 mr.base = lowmem; 1486 mr.size = (4ULL * 1024 * 1024 * 1024) - lowmem; 1487 mr.handler = pci_emul_fallback_handler; 1488 error = register_mem_fallback(&mr); 1489 assert(error == 0); 1490 1491 /* PCI extended config space */ 1492 bzero(&mr, sizeof(struct mem_range)); 1493 mr.name = "PCI ECFG"; 1494 mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; 1495 mr.base = PCI_EMUL_ECFG_BASE; 1496 mr.size = PCI_EMUL_ECFG_SIZE; 1497 mr.handler = pci_emul_ecfg_handler; 1498 error = register_mem(&mr); 1499 assert(error == 0); 1500 1501 return (0); 1502 } 1503 1504 static void 1505 pci_apic_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, 1506 void *arg) 1507 { 1508 1509 dsdt_line(" Package ()"); 1510 dsdt_line(" {"); 1511 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1512 dsdt_line(" 0x%02X,", pin - 1); 1513 dsdt_line(" Zero,"); 1514 dsdt_line(" 0x%X", ioapic_irq); 1515 dsdt_line(" },"); 1516 } 1517 1518 static void 1519 pci_pirq_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, 1520 void *arg) 1521 { 1522 char *name; 1523 1524 name = lpc_pirq_name(pirq_pin); 1525 if (name == NULL) 1526 return; 1527 dsdt_line(" Package ()"); 1528 dsdt_line(" {"); 1529 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1530 dsdt_line(" 0x%02X,", pin - 1); 1531 dsdt_line(" %s,", name); 1532 dsdt_line(" 0x00"); 1533 dsdt_line(" },"); 1534 free(name); 1535 } 1536 1537 /* 1538 * A bhyve virtual machine has a flat PCI hierarchy with a root port 1539 * corresponding to each PCI bus. 1540 */ 1541 static void 1542 pci_bus_write_dsdt(int bus) 1543 { 1544 struct businfo *bi; 1545 struct slotinfo *si; 1546 struct pci_devinst *pi; 1547 int count, func, slot; 1548 1549 /* 1550 * If there are no devices on this 'bus' then just return. 1551 */ 1552 if ((bi = pci_businfo[bus]) == NULL) { 1553 /* 1554 * Bus 0 is special because it decodes the I/O ports used 1555 * for PCI config space access even if there are no devices 1556 * on it. 1557 */ 1558 if (bus != 0) 1559 return; 1560 } 1561 1562 dsdt_line(" Device (PC%02X)", bus); 1563 dsdt_line(" {"); 1564 dsdt_line(" Name (_HID, EisaId (\"PNP0A03\"))"); 1565 1566 dsdt_line(" Method (_BBN, 0, NotSerialized)"); 1567 dsdt_line(" {"); 1568 dsdt_line(" Return (0x%08X)", bus); 1569 dsdt_line(" }"); 1570 dsdt_line(" Name (_CRS, ResourceTemplate ()"); 1571 dsdt_line(" {"); 1572 dsdt_line(" WordBusNumber (ResourceProducer, MinFixed, " 1573 "MaxFixed, PosDecode,"); 1574 dsdt_line(" 0x0000, // Granularity"); 1575 dsdt_line(" 0x%04X, // Range Minimum", bus); 1576 dsdt_line(" 0x%04X, // Range Maximum", bus); 1577 dsdt_line(" 0x0000, // Translation Offset"); 1578 dsdt_line(" 0x0001, // Length"); 1579 dsdt_line(" ,, )"); 1580 1581 if (bus == 0) { 1582 dsdt_indent(3); 1583 dsdt_fixed_ioport(0xCF8, 8); 1584 dsdt_unindent(3); 1585 1586 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1587 "PosDecode, EntireRange,"); 1588 dsdt_line(" 0x0000, // Granularity"); 1589 dsdt_line(" 0x0000, // Range Minimum"); 1590 dsdt_line(" 0x0CF7, // Range Maximum"); 1591 dsdt_line(" 0x0000, // Translation Offset"); 1592 dsdt_line(" 0x0CF8, // Length"); 1593 dsdt_line(" ,, , TypeStatic)"); 1594 1595 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1596 "PosDecode, EntireRange,"); 1597 dsdt_line(" 0x0000, // Granularity"); 1598 dsdt_line(" 0x0D00, // Range Minimum"); 1599 dsdt_line(" 0x%04X, // Range Maximum", 1600 PCI_EMUL_IOBASE - 1); 1601 dsdt_line(" 0x0000, // Translation Offset"); 1602 dsdt_line(" 0x%04X, // Length", 1603 PCI_EMUL_IOBASE - 0x0D00); 1604 dsdt_line(" ,, , TypeStatic)"); 1605 1606 if (bi == NULL) { 1607 dsdt_line(" })"); 1608 goto done; 1609 } 1610 } 1611 assert(bi != NULL); 1612 1613 /* i/o window */ 1614 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1615 "PosDecode, EntireRange,"); 1616 dsdt_line(" 0x0000, // Granularity"); 1617 dsdt_line(" 0x%04X, // Range Minimum", bi->iobase); 1618 dsdt_line(" 0x%04X, // Range Maximum", 1619 bi->iolimit - 1); 1620 dsdt_line(" 0x0000, // Translation Offset"); 1621 dsdt_line(" 0x%04X, // Length", 1622 bi->iolimit - bi->iobase); 1623 dsdt_line(" ,, , TypeStatic)"); 1624 1625 /* mmio window (32-bit) */ 1626 dsdt_line(" DWordMemory (ResourceProducer, PosDecode, " 1627 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1628 dsdt_line(" 0x00000000, // Granularity"); 1629 dsdt_line(" 0x%08X, // Range Minimum\n", bi->membase32); 1630 dsdt_line(" 0x%08X, // Range Maximum\n", 1631 bi->memlimit32 - 1); 1632 dsdt_line(" 0x00000000, // Translation Offset"); 1633 dsdt_line(" 0x%08X, // Length\n", 1634 bi->memlimit32 - bi->membase32); 1635 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1636 1637 /* mmio window (64-bit) */ 1638 dsdt_line(" QWordMemory (ResourceProducer, PosDecode, " 1639 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1640 dsdt_line(" 0x0000000000000000, // Granularity"); 1641 dsdt_line(" 0x%016lX, // Range Minimum\n", bi->membase64); 1642 dsdt_line(" 0x%016lX, // Range Maximum\n", 1643 bi->memlimit64 - 1); 1644 dsdt_line(" 0x0000000000000000, // Translation Offset"); 1645 dsdt_line(" 0x%016lX, // Length\n", 1646 bi->memlimit64 - bi->membase64); 1647 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1648 dsdt_line(" })"); 1649 1650 count = pci_count_lintr(bus); 1651 if (count != 0) { 1652 dsdt_indent(2); 1653 dsdt_line("Name (PPRT, Package ()"); 1654 dsdt_line("{"); 1655 pci_walk_lintr(bus, pci_pirq_prt_entry, NULL); 1656 dsdt_line("})"); 1657 dsdt_line("Name (APRT, Package ()"); 1658 dsdt_line("{"); 1659 pci_walk_lintr(bus, pci_apic_prt_entry, NULL); 1660 dsdt_line("})"); 1661 dsdt_line("Method (_PRT, 0, NotSerialized)"); 1662 dsdt_line("{"); 1663 dsdt_line(" If (PICM)"); 1664 dsdt_line(" {"); 1665 dsdt_line(" Return (APRT)"); 1666 dsdt_line(" }"); 1667 dsdt_line(" Else"); 1668 dsdt_line(" {"); 1669 dsdt_line(" Return (PPRT)"); 1670 dsdt_line(" }"); 1671 dsdt_line("}"); 1672 dsdt_unindent(2); 1673 } 1674 1675 dsdt_indent(2); 1676 for (slot = 0; slot < MAXSLOTS; slot++) { 1677 si = &bi->slotinfo[slot]; 1678 for (func = 0; func < MAXFUNCS; func++) { 1679 pi = si->si_funcs[func].fi_devi; 1680 if (pi != NULL && pi->pi_d->pe_write_dsdt != NULL) 1681 pi->pi_d->pe_write_dsdt(pi); 1682 } 1683 } 1684 dsdt_unindent(2); 1685 done: 1686 dsdt_line(" }"); 1687 } 1688 1689 void 1690 pci_write_dsdt(void) 1691 { 1692 int bus; 1693 1694 dsdt_indent(1); 1695 dsdt_line("Name (PICM, 0x00)"); 1696 dsdt_line("Method (_PIC, 1, NotSerialized)"); 1697 dsdt_line("{"); 1698 dsdt_line(" Store (Arg0, PICM)"); 1699 dsdt_line("}"); 1700 dsdt_line(""); 1701 dsdt_line("Scope (_SB)"); 1702 dsdt_line("{"); 1703 for (bus = 0; bus < MAXBUSES; bus++) 1704 pci_bus_write_dsdt(bus); 1705 dsdt_line("}"); 1706 dsdt_unindent(1); 1707 } 1708 1709 int 1710 pci_bus_configured(int bus) 1711 { 1712 assert(bus >= 0 && bus < MAXBUSES); 1713 return (pci_businfo[bus] != NULL); 1714 } 1715 1716 int 1717 pci_msi_enabled(struct pci_devinst *pi) 1718 { 1719 return (pi->pi_msi.enabled); 1720 } 1721 1722 int 1723 pci_msi_maxmsgnum(struct pci_devinst *pi) 1724 { 1725 if (pi->pi_msi.enabled) 1726 return (pi->pi_msi.maxmsgnum); 1727 else 1728 return (0); 1729 } 1730 1731 int 1732 pci_msix_enabled(struct pci_devinst *pi) 1733 { 1734 1735 return (pi->pi_msix.enabled && !pi->pi_msi.enabled); 1736 } 1737 1738 void 1739 pci_generate_msix(struct pci_devinst *pi, int index) 1740 { 1741 struct msix_table_entry *mte; 1742 1743 if (!pci_msix_enabled(pi)) 1744 return; 1745 1746 if (pi->pi_msix.function_mask) 1747 return; 1748 1749 if (index >= pi->pi_msix.table_count) 1750 return; 1751 1752 mte = &pi->pi_msix.table[index]; 1753 if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 1754 /* XXX Set PBA bit if interrupt is disabled */ 1755 vm_lapic_msi(pi->pi_vmctx, mte->addr, mte->msg_data); 1756 } 1757 } 1758 1759 void 1760 pci_generate_msi(struct pci_devinst *pi, int index) 1761 { 1762 1763 if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) { 1764 vm_lapic_msi(pi->pi_vmctx, pi->pi_msi.addr, 1765 pi->pi_msi.msg_data + index); 1766 } 1767 } 1768 1769 static bool 1770 pci_lintr_permitted(struct pci_devinst *pi) 1771 { 1772 uint16_t cmd; 1773 1774 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 1775 return (!(pi->pi_msi.enabled || pi->pi_msix.enabled || 1776 (cmd & PCIM_CMD_INTxDIS))); 1777 } 1778 1779 void 1780 pci_lintr_request(struct pci_devinst *pi) 1781 { 1782 struct businfo *bi; 1783 struct slotinfo *si; 1784 int bestpin, bestcount, pin; 1785 1786 bi = pci_businfo[pi->pi_bus]; 1787 assert(bi != NULL); 1788 1789 /* 1790 * Just allocate a pin from our slot. The pin will be 1791 * assigned IRQs later when interrupts are routed. 1792 */ 1793 si = &bi->slotinfo[pi->pi_slot]; 1794 bestpin = 0; 1795 bestcount = si->si_intpins[0].ii_count; 1796 for (pin = 1; pin < 4; pin++) { 1797 if (si->si_intpins[pin].ii_count < bestcount) { 1798 bestpin = pin; 1799 bestcount = si->si_intpins[pin].ii_count; 1800 } 1801 } 1802 1803 si->si_intpins[bestpin].ii_count++; 1804 pi->pi_lintr.pin = bestpin + 1; 1805 pci_set_cfgdata8(pi, PCIR_INTPIN, bestpin + 1); 1806 } 1807 1808 static void 1809 pci_lintr_route(struct pci_devinst *pi) 1810 { 1811 struct businfo *bi; 1812 struct intxinfo *ii; 1813 1814 if (pi->pi_lintr.pin == 0) 1815 return; 1816 1817 bi = pci_businfo[pi->pi_bus]; 1818 assert(bi != NULL); 1819 ii = &bi->slotinfo[pi->pi_slot].si_intpins[pi->pi_lintr.pin - 1]; 1820 1821 /* 1822 * Attempt to allocate an I/O APIC pin for this intpin if one 1823 * is not yet assigned. 1824 */ 1825 if (ii->ii_ioapic_irq == 0) 1826 ii->ii_ioapic_irq = ioapic_pci_alloc_irq(pi); 1827 assert(ii->ii_ioapic_irq > 0); 1828 1829 /* 1830 * Attempt to allocate a PIRQ pin for this intpin if one is 1831 * not yet assigned. 1832 */ 1833 if (ii->ii_pirq_pin == 0) 1834 ii->ii_pirq_pin = pirq_alloc_pin(pi); 1835 assert(ii->ii_pirq_pin > 0); 1836 1837 pi->pi_lintr.ioapic_irq = ii->ii_ioapic_irq; 1838 pi->pi_lintr.pirq_pin = ii->ii_pirq_pin; 1839 pci_set_cfgdata8(pi, PCIR_INTLINE, pirq_irq(ii->ii_pirq_pin)); 1840 } 1841 1842 void 1843 pci_lintr_assert(struct pci_devinst *pi) 1844 { 1845 1846 assert(pi->pi_lintr.pin > 0); 1847 1848 pthread_mutex_lock(&pi->pi_lintr.lock); 1849 if (pi->pi_lintr.state == IDLE) { 1850 if (pci_lintr_permitted(pi)) { 1851 pi->pi_lintr.state = ASSERTED; 1852 pci_irq_assert(pi); 1853 } else 1854 pi->pi_lintr.state = PENDING; 1855 } 1856 pthread_mutex_unlock(&pi->pi_lintr.lock); 1857 } 1858 1859 void 1860 pci_lintr_deassert(struct pci_devinst *pi) 1861 { 1862 1863 assert(pi->pi_lintr.pin > 0); 1864 1865 pthread_mutex_lock(&pi->pi_lintr.lock); 1866 if (pi->pi_lintr.state == ASSERTED) { 1867 pi->pi_lintr.state = IDLE; 1868 pci_irq_deassert(pi); 1869 } else if (pi->pi_lintr.state == PENDING) 1870 pi->pi_lintr.state = IDLE; 1871 pthread_mutex_unlock(&pi->pi_lintr.lock); 1872 } 1873 1874 static void 1875 pci_lintr_update(struct pci_devinst *pi) 1876 { 1877 1878 pthread_mutex_lock(&pi->pi_lintr.lock); 1879 if (pi->pi_lintr.state == ASSERTED && !pci_lintr_permitted(pi)) { 1880 pci_irq_deassert(pi); 1881 pi->pi_lintr.state = PENDING; 1882 } else if (pi->pi_lintr.state == PENDING && pci_lintr_permitted(pi)) { 1883 pi->pi_lintr.state = ASSERTED; 1884 pci_irq_assert(pi); 1885 } 1886 pthread_mutex_unlock(&pi->pi_lintr.lock); 1887 #ifndef __FreeBSD__ 1888 if (pi->pi_d->pe_lintrupdate != NULL) { 1889 pi->pi_d->pe_lintrupdate(pi); 1890 } 1891 #endif /* __FreeBSD__ */ 1892 } 1893 1894 int 1895 pci_count_lintr(int bus) 1896 { 1897 int count, slot, pin; 1898 struct slotinfo *slotinfo; 1899 1900 count = 0; 1901 if (pci_businfo[bus] != NULL) { 1902 for (slot = 0; slot < MAXSLOTS; slot++) { 1903 slotinfo = &pci_businfo[bus]->slotinfo[slot]; 1904 for (pin = 0; pin < 4; pin++) { 1905 if (slotinfo->si_intpins[pin].ii_count != 0) 1906 count++; 1907 } 1908 } 1909 } 1910 return (count); 1911 } 1912 1913 void 1914 pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg) 1915 { 1916 struct businfo *bi; 1917 struct slotinfo *si; 1918 struct intxinfo *ii; 1919 int slot, pin; 1920 1921 if ((bi = pci_businfo[bus]) == NULL) 1922 return; 1923 1924 for (slot = 0; slot < MAXSLOTS; slot++) { 1925 si = &bi->slotinfo[slot]; 1926 for (pin = 0; pin < 4; pin++) { 1927 ii = &si->si_intpins[pin]; 1928 if (ii->ii_count != 0) 1929 cb(bus, slot, pin + 1, ii->ii_pirq_pin, 1930 ii->ii_ioapic_irq, arg); 1931 } 1932 } 1933 } 1934 1935 /* 1936 * Return 1 if the emulated device in 'slot' is a multi-function device. 1937 * Return 0 otherwise. 1938 */ 1939 static int 1940 pci_emul_is_mfdev(int bus, int slot) 1941 { 1942 struct businfo *bi; 1943 struct slotinfo *si; 1944 int f, numfuncs; 1945 1946 numfuncs = 0; 1947 if ((bi = pci_businfo[bus]) != NULL) { 1948 si = &bi->slotinfo[slot]; 1949 for (f = 0; f < MAXFUNCS; f++) { 1950 if (si->si_funcs[f].fi_devi != NULL) { 1951 numfuncs++; 1952 } 1953 } 1954 } 1955 return (numfuncs > 1); 1956 } 1957 1958 /* 1959 * Ensure that the PCIM_MFDEV bit is properly set (or unset) depending on 1960 * whether or not is a multi-function being emulated in the pci 'slot'. 1961 */ 1962 static void 1963 pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv) 1964 { 1965 int mfdev; 1966 1967 if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) { 1968 mfdev = pci_emul_is_mfdev(bus, slot); 1969 switch (bytes) { 1970 case 1: 1971 case 2: 1972 *rv &= ~PCIM_MFDEV; 1973 if (mfdev) { 1974 *rv |= PCIM_MFDEV; 1975 } 1976 break; 1977 case 4: 1978 *rv &= ~(PCIM_MFDEV << 16); 1979 if (mfdev) { 1980 *rv |= (PCIM_MFDEV << 16); 1981 } 1982 break; 1983 } 1984 } 1985 } 1986 1987 /* 1988 * Update device state in response to changes to the PCI command 1989 * register. 1990 */ 1991 void 1992 pci_emul_cmd_changed(struct pci_devinst *pi, uint16_t old) 1993 { 1994 int i; 1995 uint16_t changed, new; 1996 1997 new = pci_get_cfgdata16(pi, PCIR_COMMAND); 1998 changed = old ^ new; 1999 2000 /* 2001 * If the MMIO or I/O address space decoding has changed then 2002 * register/unregister all BARs that decode that address space. 2003 */ 2004 for (i = 0; i <= PCI_BARMAX_WITH_ROM; i++) { 2005 switch (pi->pi_bar[i].type) { 2006 case PCIBAR_NONE: 2007 case PCIBAR_MEMHI64: 2008 break; 2009 case PCIBAR_IO: 2010 /* I/O address space decoding changed? */ 2011 if (changed & PCIM_CMD_PORTEN) { 2012 if (new & PCIM_CMD_PORTEN) 2013 register_bar(pi, i); 2014 else 2015 unregister_bar(pi, i); 2016 } 2017 break; 2018 case PCIBAR_ROM: 2019 /* skip (un-)register of ROM if it disabled */ 2020 if (!romen(pi)) 2021 break; 2022 /* fallthrough */ 2023 case PCIBAR_MEM32: 2024 case PCIBAR_MEM64: 2025 /* MMIO address space decoding changed? */ 2026 if (changed & PCIM_CMD_MEMEN) { 2027 if (new & PCIM_CMD_MEMEN) 2028 register_bar(pi, i); 2029 else 2030 unregister_bar(pi, i); 2031 } 2032 break; 2033 default: 2034 assert(0); 2035 } 2036 } 2037 2038 /* 2039 * If INTx has been unmasked and is pending, assert the 2040 * interrupt. 2041 */ 2042 pci_lintr_update(pi); 2043 } 2044 2045 static void 2046 pci_emul_cmdsts_write(struct pci_devinst *pi, int coff, uint32_t new, int bytes) 2047 { 2048 int rshift; 2049 uint32_t cmd, old, readonly; 2050 2051 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */ 2052 2053 /* 2054 * From PCI Local Bus Specification 3.0 sections 6.2.2 and 6.2.3. 2055 * 2056 * XXX Bits 8, 11, 12, 13, 14 and 15 in the status register are 2057 * 'write 1 to clear'. However these bits are not set to '1' by 2058 * any device emulation so it is simpler to treat them as readonly. 2059 */ 2060 rshift = (coff & 0x3) * 8; 2061 readonly = 0xFFFFF880 >> rshift; 2062 2063 old = CFGREAD(pi, coff, bytes); 2064 new &= ~readonly; 2065 new |= (old & readonly); 2066 CFGWRITE(pi, coff, new, bytes); /* update config */ 2067 2068 pci_emul_cmd_changed(pi, cmd); 2069 } 2070 2071 static void 2072 pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, int func, 2073 int coff, int bytes, uint32_t *eax) 2074 { 2075 struct businfo *bi; 2076 struct slotinfo *si; 2077 struct pci_devinst *pi; 2078 struct pci_devemu *pe; 2079 int idx, needcfg; 2080 uint64_t addr, mask; 2081 uint64_t bar = 0; 2082 2083 if ((bi = pci_businfo[bus]) != NULL) { 2084 si = &bi->slotinfo[slot]; 2085 pi = si->si_funcs[func].fi_devi; 2086 } else 2087 pi = NULL; 2088 2089 /* 2090 * Just return if there is no device at this slot:func or if the 2091 * the guest is doing an un-aligned access. 2092 */ 2093 if (pi == NULL || (bytes != 1 && bytes != 2 && bytes != 4) || 2094 (coff & (bytes - 1)) != 0) { 2095 if (in) 2096 *eax = 0xffffffff; 2097 return; 2098 } 2099 2100 /* 2101 * Ignore all writes beyond the standard config space and return all 2102 * ones on reads. 2103 */ 2104 if (coff >= PCI_REGMAX + 1) { 2105 if (in) { 2106 *eax = 0xffffffff; 2107 /* 2108 * Extended capabilities begin at offset 256 in config 2109 * space. Absence of extended capabilities is signaled 2110 * with all 0s in the extended capability header at 2111 * offset 256. 2112 */ 2113 if (coff <= PCI_REGMAX + 4) 2114 *eax = 0x00000000; 2115 } 2116 return; 2117 } 2118 2119 pe = pi->pi_d; 2120 2121 /* 2122 * Config read 2123 */ 2124 if (in) { 2125 /* Let the device emulation override the default handler */ 2126 if (pe->pe_cfgread != NULL) { 2127 needcfg = pe->pe_cfgread(ctx, vcpu, pi, coff, bytes, 2128 eax); 2129 } else { 2130 needcfg = 1; 2131 } 2132 2133 if (needcfg) 2134 *eax = CFGREAD(pi, coff, bytes); 2135 2136 pci_emul_hdrtype_fixup(bus, slot, coff, bytes, eax); 2137 } else { 2138 /* Let the device emulation override the default handler */ 2139 if (pe->pe_cfgwrite != NULL && 2140 (*pe->pe_cfgwrite)(ctx, vcpu, pi, coff, bytes, *eax) == 0) 2141 return; 2142 2143 /* 2144 * Special handling for write to BAR and ROM registers 2145 */ 2146 if (is_pcir_bar(coff) || is_pcir_bios(coff)) { 2147 /* 2148 * Ignore writes to BAR registers that are not 2149 * 4-byte aligned. 2150 */ 2151 if (bytes != 4 || (coff & 0x3) != 0) 2152 return; 2153 2154 if (is_pcir_bar(coff)) { 2155 idx = (coff - PCIR_BAR(0)) / 4; 2156 } else if (is_pcir_bios(coff)) { 2157 idx = PCI_ROM_IDX; 2158 } else { 2159 errx(4, "%s: invalid BAR offset %d", __func__, 2160 coff); 2161 } 2162 2163 mask = ~(pi->pi_bar[idx].size - 1); 2164 switch (pi->pi_bar[idx].type) { 2165 case PCIBAR_NONE: 2166 pi->pi_bar[idx].addr = bar = 0; 2167 break; 2168 case PCIBAR_IO: 2169 addr = *eax & mask; 2170 addr &= 0xffff; 2171 bar = addr | pi->pi_bar[idx].lobits; 2172 /* 2173 * Register the new BAR value for interception 2174 */ 2175 if (addr != pi->pi_bar[idx].addr) { 2176 update_bar_address(pi, addr, idx, 2177 PCIBAR_IO); 2178 } 2179 break; 2180 case PCIBAR_MEM32: 2181 addr = bar = *eax & mask; 2182 bar |= pi->pi_bar[idx].lobits; 2183 if (addr != pi->pi_bar[idx].addr) { 2184 update_bar_address(pi, addr, idx, 2185 PCIBAR_MEM32); 2186 } 2187 break; 2188 case PCIBAR_MEM64: 2189 addr = bar = *eax & mask; 2190 bar |= pi->pi_bar[idx].lobits; 2191 if (addr != (uint32_t)pi->pi_bar[idx].addr) { 2192 update_bar_address(pi, addr, idx, 2193 PCIBAR_MEM64); 2194 } 2195 break; 2196 case PCIBAR_MEMHI64: 2197 mask = ~(pi->pi_bar[idx - 1].size - 1); 2198 addr = ((uint64_t)*eax << 32) & mask; 2199 bar = addr >> 32; 2200 if (bar != pi->pi_bar[idx - 1].addr >> 32) { 2201 update_bar_address(pi, addr, idx - 1, 2202 PCIBAR_MEMHI64); 2203 } 2204 break; 2205 case PCIBAR_ROM: 2206 addr = bar = *eax & mask; 2207 if (memen(pi) && romen(pi)) { 2208 unregister_bar(pi, idx); 2209 } 2210 pi->pi_bar[idx].addr = addr; 2211 pi->pi_bar[idx].lobits = *eax & 2212 PCIM_BIOS_ENABLE; 2213 /* romen could have changed it value */ 2214 if (memen(pi) && romen(pi)) { 2215 register_bar(pi, idx); 2216 } 2217 bar |= pi->pi_bar[idx].lobits; 2218 break; 2219 default: 2220 assert(0); 2221 } 2222 pci_set_cfgdata32(pi, coff, bar); 2223 2224 } else if (pci_emul_iscap(pi, coff)) { 2225 pci_emul_capwrite(pi, coff, bytes, *eax, 0, 0); 2226 } else if (coff >= PCIR_COMMAND && coff < PCIR_REVID) { 2227 pci_emul_cmdsts_write(pi, coff, *eax, bytes); 2228 } else { 2229 CFGWRITE(pi, coff, *eax, bytes); 2230 } 2231 } 2232 } 2233 2234 static int cfgenable, cfgbus, cfgslot, cfgfunc, cfgoff; 2235 2236 static int 2237 pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 2238 uint32_t *eax, void *arg) 2239 { 2240 uint32_t x; 2241 2242 if (bytes != 4) { 2243 if (in) 2244 *eax = (bytes == 2) ? 0xffff : 0xff; 2245 return (0); 2246 } 2247 2248 if (in) { 2249 x = (cfgbus << 16) | (cfgslot << 11) | (cfgfunc << 8) | cfgoff; 2250 if (cfgenable) 2251 x |= CONF1_ENABLE; 2252 *eax = x; 2253 } else { 2254 x = *eax; 2255 cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE; 2256 cfgoff = (x & PCI_REGMAX) & ~0x03; 2257 cfgfunc = (x >> 8) & PCI_FUNCMAX; 2258 cfgslot = (x >> 11) & PCI_SLOTMAX; 2259 cfgbus = (x >> 16) & PCI_BUSMAX; 2260 } 2261 2262 return (0); 2263 } 2264 INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_INOUT, pci_emul_cfgaddr); 2265 2266 static int 2267 pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 2268 uint32_t *eax, void *arg) 2269 { 2270 int coff; 2271 2272 assert(bytes == 1 || bytes == 2 || bytes == 4); 2273 2274 coff = cfgoff + (port - CONF1_DATA_PORT); 2275 if (cfgenable) { 2276 pci_cfgrw(ctx, vcpu, in, cfgbus, cfgslot, cfgfunc, coff, bytes, 2277 eax); 2278 } else { 2279 /* Ignore accesses to cfgdata if not enabled by cfgaddr */ 2280 if (in) 2281 *eax = 0xffffffff; 2282 } 2283 return (0); 2284 } 2285 2286 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata); 2287 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata); 2288 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata); 2289 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata); 2290 2291 #define PCI_EMUL_TEST 2292 #ifdef PCI_EMUL_TEST 2293 /* 2294 * Define a dummy test device 2295 */ 2296 #define DIOSZ 8 2297 #define DMEMSZ 4096 2298 struct pci_emul_dsoftc { 2299 uint8_t ioregs[DIOSZ]; 2300 uint8_t memregs[2][DMEMSZ]; 2301 }; 2302 2303 #define PCI_EMUL_MSI_MSGS 4 2304 #define PCI_EMUL_MSIX_MSGS 16 2305 2306 static int 2307 pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) 2308 { 2309 int error; 2310 struct pci_emul_dsoftc *sc; 2311 2312 sc = calloc(1, sizeof(struct pci_emul_dsoftc)); 2313 2314 pi->pi_arg = sc; 2315 2316 pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001); 2317 pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD); 2318 pci_set_cfgdata8(pi, PCIR_CLASS, 0x02); 2319 2320 error = pci_emul_add_msicap(pi, PCI_EMUL_MSI_MSGS); 2321 assert(error == 0); 2322 2323 error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, DIOSZ); 2324 assert(error == 0); 2325 2326 error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ); 2327 assert(error == 0); 2328 2329 error = pci_emul_alloc_bar(pi, 2, PCIBAR_MEM32, DMEMSZ); 2330 assert(error == 0); 2331 2332 return (0); 2333 } 2334 2335 static void 2336 pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 2337 uint64_t offset, int size, uint64_t value) 2338 { 2339 int i; 2340 struct pci_emul_dsoftc *sc = pi->pi_arg; 2341 2342 if (baridx == 0) { 2343 if (offset + size > DIOSZ) { 2344 printf("diow: iow too large, offset %ld size %d\n", 2345 offset, size); 2346 return; 2347 } 2348 2349 if (size == 1) { 2350 sc->ioregs[offset] = value & 0xff; 2351 } else if (size == 2) { 2352 *(uint16_t *)&sc->ioregs[offset] = value & 0xffff; 2353 } else if (size == 4) { 2354 *(uint32_t *)&sc->ioregs[offset] = value; 2355 } else { 2356 printf("diow: iow unknown size %d\n", size); 2357 } 2358 2359 /* 2360 * Special magic value to generate an interrupt 2361 */ 2362 if (offset == 4 && size == 4 && pci_msi_enabled(pi)) 2363 pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi)); 2364 2365 if (value == 0xabcdef) { 2366 for (i = 0; i < pci_msi_maxmsgnum(pi); i++) 2367 pci_generate_msi(pi, i); 2368 } 2369 } 2370 2371 if (baridx == 1 || baridx == 2) { 2372 if (offset + size > DMEMSZ) { 2373 printf("diow: memw too large, offset %ld size %d\n", 2374 offset, size); 2375 return; 2376 } 2377 2378 i = baridx - 1; /* 'memregs' index */ 2379 2380 if (size == 1) { 2381 sc->memregs[i][offset] = value; 2382 } else if (size == 2) { 2383 *(uint16_t *)&sc->memregs[i][offset] = value; 2384 } else if (size == 4) { 2385 *(uint32_t *)&sc->memregs[i][offset] = value; 2386 } else if (size == 8) { 2387 *(uint64_t *)&sc->memregs[i][offset] = value; 2388 } else { 2389 printf("diow: memw unknown size %d\n", size); 2390 } 2391 2392 /* 2393 * magic interrupt ?? 2394 */ 2395 } 2396 2397 if (baridx > 2 || baridx < 0) { 2398 printf("diow: unknown bar idx %d\n", baridx); 2399 } 2400 } 2401 2402 static uint64_t 2403 pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 2404 uint64_t offset, int size) 2405 { 2406 struct pci_emul_dsoftc *sc = pi->pi_arg; 2407 uint32_t value; 2408 int i; 2409 2410 value = 0; 2411 if (baridx == 0) { 2412 if (offset + size > DIOSZ) { 2413 printf("dior: ior too large, offset %ld size %d\n", 2414 offset, size); 2415 return (0); 2416 } 2417 2418 value = 0; 2419 if (size == 1) { 2420 value = sc->ioregs[offset]; 2421 } else if (size == 2) { 2422 value = *(uint16_t *) &sc->ioregs[offset]; 2423 } else if (size == 4) { 2424 value = *(uint32_t *) &sc->ioregs[offset]; 2425 } else { 2426 printf("dior: ior unknown size %d\n", size); 2427 } 2428 } 2429 2430 if (baridx == 1 || baridx == 2) { 2431 if (offset + size > DMEMSZ) { 2432 printf("dior: memr too large, offset %ld size %d\n", 2433 offset, size); 2434 return (0); 2435 } 2436 2437 i = baridx - 1; /* 'memregs' index */ 2438 2439 if (size == 1) { 2440 value = sc->memregs[i][offset]; 2441 } else if (size == 2) { 2442 value = *(uint16_t *) &sc->memregs[i][offset]; 2443 } else if (size == 4) { 2444 value = *(uint32_t *) &sc->memregs[i][offset]; 2445 } else if (size == 8) { 2446 value = *(uint64_t *) &sc->memregs[i][offset]; 2447 } else { 2448 printf("dior: ior unknown size %d\n", size); 2449 } 2450 } 2451 2452 2453 if (baridx > 2 || baridx < 0) { 2454 printf("dior: unknown bar idx %d\n", baridx); 2455 return (0); 2456 } 2457 2458 return (value); 2459 } 2460 2461 static const struct pci_devemu pci_dummy = { 2462 .pe_emu = "dummy", 2463 .pe_init = pci_emul_dinit, 2464 .pe_barwrite = pci_emul_diow, 2465 .pe_barread = pci_emul_dior, 2466 }; 2467 PCI_EMUL_SET(pci_dummy); 2468 2469 #endif /* PCI_EMUL_TEST */ 2470