1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 /* 31 * This file and its contents are supplied under the terms of the 32 * Common Development and Distribution License ("CDDL"), version 1.0. 33 * You may only use this file in accordance with the terms of version 34 * 1.0 of the CDDL. 35 * 36 * A full copy of the text of the CDDL should have accompanied this 37 * source. A copy of the CDDL is also available via the Internet at 38 * http://www.illumos.org/license/CDDL. 39 * 40 * Copyright 2014 Pluribus Networks Inc. 41 * Copyright 2018 Joyent, Inc. 42 */ 43 44 #include <sys/cdefs.h> 45 __FBSDID("$FreeBSD$"); 46 47 #include <sys/param.h> 48 #include <sys/linker_set.h> 49 50 #include <ctype.h> 51 #include <err.h> 52 #include <errno.h> 53 #include <pthread.h> 54 #include <stdio.h> 55 #include <stdlib.h> 56 #include <string.h> 57 #include <strings.h> 58 #include <assert.h> 59 #include <stdbool.h> 60 #include <sysexits.h> 61 62 #include <machine/vmm.h> 63 #include <vmmapi.h> 64 65 #include "acpi.h" 66 #include "bhyverun.h" 67 #include "config.h" 68 #include "debug.h" 69 #include "inout.h" 70 #include "ioapic.h" 71 #include "mem.h" 72 #include "pci_emul.h" 73 #include "pci_irq.h" 74 #include "pci_lpc.h" 75 76 #define CONF1_ADDR_PORT 0x0cf8 77 #define CONF1_DATA_PORT 0x0cfc 78 79 #define CONF1_ENABLE 0x80000000ul 80 81 #define MAXBUSES (PCI_BUSMAX + 1) 82 #define MAXSLOTS (PCI_SLOTMAX + 1) 83 #define MAXFUNCS (PCI_FUNCMAX + 1) 84 85 #define GB (1024 * 1024 * 1024UL) 86 87 struct funcinfo { 88 nvlist_t *fi_config; 89 struct pci_devemu *fi_pde; 90 struct pci_devinst *fi_devi; 91 }; 92 93 struct intxinfo { 94 int ii_count; 95 int ii_pirq_pin; 96 int ii_ioapic_irq; 97 }; 98 99 struct slotinfo { 100 struct intxinfo si_intpins[4]; 101 struct funcinfo si_funcs[MAXFUNCS]; 102 }; 103 104 struct businfo { 105 uint16_t iobase, iolimit; /* I/O window */ 106 uint32_t membase32, memlimit32; /* mmio window below 4GB */ 107 uint64_t membase64, memlimit64; /* mmio window above 4GB */ 108 struct slotinfo slotinfo[MAXSLOTS]; 109 }; 110 111 static struct businfo *pci_businfo[MAXBUSES]; 112 113 SET_DECLARE(pci_devemu_set, struct pci_devemu); 114 115 static uint64_t pci_emul_iobase; 116 static uint64_t pci_emul_membase32; 117 static uint64_t pci_emul_membase64; 118 static uint64_t pci_emul_memlim64; 119 120 struct pci_bar_allocation { 121 TAILQ_ENTRY(pci_bar_allocation) chain; 122 struct pci_devinst *pdi; 123 int idx; 124 enum pcibar_type type; 125 uint64_t size; 126 }; 127 TAILQ_HEAD(pci_bar_list, pci_bar_allocation) pci_bars = TAILQ_HEAD_INITIALIZER( 128 pci_bars); 129 130 #define PCI_EMUL_IOBASE 0x2000 131 #define PCI_EMUL_IOLIMIT 0x10000 132 133 #define PCI_EMUL_ECFG_BASE 0xE0000000 /* 3.5GB */ 134 #define PCI_EMUL_ECFG_SIZE (MAXBUSES * 1024 * 1024) /* 1MB per bus */ 135 SYSRES_MEM(PCI_EMUL_ECFG_BASE, PCI_EMUL_ECFG_SIZE); 136 137 /* 138 * OVMF always uses 0xC0000000 as base address for 32 bit PCI MMIO. Don't 139 * change this address without changing it in OVMF. 140 */ 141 #define PCI_EMUL_MEMBASE32 0xC0000000 142 #define PCI_EMUL_MEMLIMIT32 PCI_EMUL_ECFG_BASE 143 #define PCI_EMUL_MEMSIZE64 (32*GB) 144 145 static struct pci_devemu *pci_emul_finddev(const char *name); 146 static void pci_lintr_route(struct pci_devinst *pi); 147 static void pci_lintr_update(struct pci_devinst *pi); 148 static void pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, 149 int func, int coff, int bytes, uint32_t *val); 150 151 static __inline void 152 CFGWRITE(struct pci_devinst *pi, int coff, uint32_t val, int bytes) 153 { 154 155 if (bytes == 1) 156 pci_set_cfgdata8(pi, coff, val); 157 else if (bytes == 2) 158 pci_set_cfgdata16(pi, coff, val); 159 else 160 pci_set_cfgdata32(pi, coff, val); 161 } 162 163 static __inline uint32_t 164 CFGREAD(struct pci_devinst *pi, int coff, int bytes) 165 { 166 167 if (bytes == 1) 168 return (pci_get_cfgdata8(pi, coff)); 169 else if (bytes == 2) 170 return (pci_get_cfgdata16(pi, coff)); 171 else 172 return (pci_get_cfgdata32(pi, coff)); 173 } 174 175 /* 176 * I/O access 177 */ 178 179 /* 180 * Slot options are in the form: 181 * 182 * <bus>:<slot>:<func>,<emul>[,<config>] 183 * <slot>[:<func>],<emul>[,<config>] 184 * 185 * slot is 0..31 186 * func is 0..7 187 * emul is a string describing the type of PCI device e.g. virtio-net 188 * config is an optional string, depending on the device, that can be 189 * used for configuration. 190 * Examples are: 191 * 1,virtio-net,tap0 192 * 3:0,dummy 193 */ 194 static void 195 pci_parse_slot_usage(char *aopt) 196 { 197 198 EPRINTLN("Invalid PCI slot info field \"%s\"", aopt); 199 } 200 201 /* 202 * Helper function to parse a list of comma-separated options where 203 * each option is formatted as "name[=value]". If no value is 204 * provided, the option is treated as a boolean and is given a value 205 * of true. 206 */ 207 int 208 pci_parse_legacy_config(nvlist_t *nvl, const char *opt) 209 { 210 char *config, *name, *tofree, *value; 211 212 if (opt == NULL) 213 return (0); 214 215 config = tofree = strdup(opt); 216 while ((name = strsep(&config, ",")) != NULL) { 217 value = strchr(name, '='); 218 if (value != NULL) { 219 *value = '\0'; 220 value++; 221 set_config_value_node(nvl, name, value); 222 } else 223 set_config_bool_node(nvl, name, true); 224 } 225 free(tofree); 226 return (0); 227 } 228 229 /* 230 * PCI device configuration is stored in MIBs that encode the device's 231 * location: 232 * 233 * pci.<bus>.<slot>.<func> 234 * 235 * Where "bus", "slot", and "func" are all decimal values without 236 * leading zeroes. Each valid device must have a "device" node which 237 * identifies the driver model of the device. 238 * 239 * Device backends can provide a parser for the "config" string. If 240 * a custom parser is not provided, pci_parse_legacy_config() is used 241 * to parse the string. 242 */ 243 int 244 pci_parse_slot(char *opt) 245 { 246 char node_name[sizeof("pci.XXX.XX.X")]; 247 struct pci_devemu *pde; 248 char *emul, *config, *str, *cp; 249 int error, bnum, snum, fnum; 250 nvlist_t *nvl; 251 252 error = -1; 253 str = strdup(opt); 254 255 emul = config = NULL; 256 if ((cp = strchr(str, ',')) != NULL) { 257 *cp = '\0'; 258 emul = cp + 1; 259 if ((cp = strchr(emul, ',')) != NULL) { 260 *cp = '\0'; 261 config = cp + 1; 262 } 263 } else { 264 pci_parse_slot_usage(opt); 265 goto done; 266 } 267 268 /* <bus>:<slot>:<func> */ 269 if (sscanf(str, "%d:%d:%d", &bnum, &snum, &fnum) != 3) { 270 bnum = 0; 271 /* <slot>:<func> */ 272 if (sscanf(str, "%d:%d", &snum, &fnum) != 2) { 273 fnum = 0; 274 /* <slot> */ 275 if (sscanf(str, "%d", &snum) != 1) { 276 snum = -1; 277 } 278 } 279 } 280 281 if (bnum < 0 || bnum >= MAXBUSES || snum < 0 || snum >= MAXSLOTS || 282 fnum < 0 || fnum >= MAXFUNCS) { 283 pci_parse_slot_usage(opt); 284 goto done; 285 } 286 287 pde = pci_emul_finddev(emul); 288 if (pde == NULL) { 289 EPRINTLN("pci slot %d:%d:%d: unknown device \"%s\"", bnum, snum, 290 fnum, emul); 291 goto done; 292 } 293 294 snprintf(node_name, sizeof(node_name), "pci.%d.%d.%d", bnum, snum, 295 fnum); 296 nvl = find_config_node(node_name); 297 if (nvl != NULL) { 298 EPRINTLN("pci slot %d:%d:%d already occupied!", bnum, snum, 299 fnum); 300 goto done; 301 } 302 nvl = create_config_node(node_name); 303 if (pde->pe_alias != NULL) 304 set_config_value_node(nvl, "device", pde->pe_alias); 305 else 306 set_config_value_node(nvl, "device", pde->pe_emu); 307 308 if (pde->pe_legacy_config != NULL) 309 error = pde->pe_legacy_config(nvl, config); 310 else 311 error = pci_parse_legacy_config(nvl, config); 312 done: 313 free(str); 314 return (error); 315 } 316 317 void 318 pci_print_supported_devices() 319 { 320 struct pci_devemu **pdpp, *pdp; 321 322 SET_FOREACH(pdpp, pci_devemu_set) { 323 pdp = *pdpp; 324 printf("%s\n", pdp->pe_emu); 325 } 326 } 327 328 static int 329 pci_valid_pba_offset(struct pci_devinst *pi, uint64_t offset) 330 { 331 332 if (offset < pi->pi_msix.pba_offset) 333 return (0); 334 335 if (offset >= pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { 336 return (0); 337 } 338 339 return (1); 340 } 341 342 int 343 pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size, 344 uint64_t value) 345 { 346 int msix_entry_offset; 347 int tab_index; 348 char *dest; 349 350 /* support only 4 or 8 byte writes */ 351 if (size != 4 && size != 8) 352 return (-1); 353 354 /* 355 * Return if table index is beyond what device supports 356 */ 357 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 358 if (tab_index >= pi->pi_msix.table_count) 359 return (-1); 360 361 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 362 363 /* support only aligned writes */ 364 if ((msix_entry_offset % size) != 0) 365 return (-1); 366 367 dest = (char *)(pi->pi_msix.table + tab_index); 368 dest += msix_entry_offset; 369 370 if (size == 4) 371 *((uint32_t *)dest) = value; 372 else 373 *((uint64_t *)dest) = value; 374 375 return (0); 376 } 377 378 uint64_t 379 pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size) 380 { 381 char *dest; 382 int msix_entry_offset; 383 int tab_index; 384 uint64_t retval = ~0; 385 386 /* 387 * The PCI standard only allows 4 and 8 byte accesses to the MSI-X 388 * table but we also allow 1 byte access to accommodate reads from 389 * ddb. 390 */ 391 if (size != 1 && size != 4 && size != 8) 392 return (retval); 393 394 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 395 396 /* support only aligned reads */ 397 if ((msix_entry_offset % size) != 0) { 398 return (retval); 399 } 400 401 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 402 403 if (tab_index < pi->pi_msix.table_count) { 404 /* valid MSI-X Table access */ 405 dest = (char *)(pi->pi_msix.table + tab_index); 406 dest += msix_entry_offset; 407 408 if (size == 1) 409 retval = *((uint8_t *)dest); 410 else if (size == 4) 411 retval = *((uint32_t *)dest); 412 else 413 retval = *((uint64_t *)dest); 414 } else if (pci_valid_pba_offset(pi, offset)) { 415 /* return 0 for PBA access */ 416 retval = 0; 417 } 418 419 return (retval); 420 } 421 422 int 423 pci_msix_table_bar(struct pci_devinst *pi) 424 { 425 426 if (pi->pi_msix.table != NULL) 427 return (pi->pi_msix.table_bar); 428 else 429 return (-1); 430 } 431 432 int 433 pci_msix_pba_bar(struct pci_devinst *pi) 434 { 435 436 if (pi->pi_msix.table != NULL) 437 return (pi->pi_msix.pba_bar); 438 else 439 return (-1); 440 } 441 442 static int 443 pci_emul_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 444 uint32_t *eax, void *arg) 445 { 446 struct pci_devinst *pdi = arg; 447 struct pci_devemu *pe = pdi->pi_d; 448 uint64_t offset; 449 int i; 450 451 for (i = 0; i <= PCI_BARMAX; i++) { 452 if (pdi->pi_bar[i].type == PCIBAR_IO && 453 port >= pdi->pi_bar[i].addr && 454 port + bytes <= pdi->pi_bar[i].addr + pdi->pi_bar[i].size) { 455 offset = port - pdi->pi_bar[i].addr; 456 if (in) 457 *eax = (*pe->pe_barread)(ctx, vcpu, pdi, i, 458 offset, bytes); 459 else 460 (*pe->pe_barwrite)(ctx, vcpu, pdi, i, offset, 461 bytes, *eax); 462 return (0); 463 } 464 } 465 return (-1); 466 } 467 468 static int 469 pci_emul_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 470 int size, uint64_t *val, void *arg1, long arg2) 471 { 472 struct pci_devinst *pdi = arg1; 473 struct pci_devemu *pe = pdi->pi_d; 474 uint64_t offset; 475 int bidx = (int) arg2; 476 477 assert(bidx <= PCI_BARMAX); 478 assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 || 479 pdi->pi_bar[bidx].type == PCIBAR_MEM64); 480 assert(addr >= pdi->pi_bar[bidx].addr && 481 addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size); 482 483 offset = addr - pdi->pi_bar[bidx].addr; 484 485 if (dir == MEM_F_WRITE) { 486 if (size == 8) { 487 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, 488 4, *val & 0xffffffff); 489 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset + 4, 490 4, *val >> 32); 491 } else { 492 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, 493 size, *val); 494 } 495 } else { 496 if (size == 8) { 497 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 498 offset, 4); 499 *val |= (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 500 offset + 4, 4) << 32; 501 } else { 502 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 503 offset, size); 504 } 505 } 506 507 return (0); 508 } 509 510 511 static int 512 pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size, 513 uint64_t *addr) 514 { 515 uint64_t base; 516 517 assert((size & (size - 1)) == 0); /* must be a power of 2 */ 518 519 base = roundup2(*baseptr, size); 520 521 if (base + size <= limit) { 522 *addr = base; 523 *baseptr = base + size; 524 return (0); 525 } else 526 return (-1); 527 } 528 529 /* 530 * Register (or unregister) the MMIO or I/O region associated with the BAR 531 * register 'idx' of an emulated pci device. 532 */ 533 static void 534 modify_bar_registration(struct pci_devinst *pi, int idx, int registration) 535 { 536 struct pci_devemu *pe; 537 int error; 538 struct inout_port iop; 539 struct mem_range mr; 540 541 pe = pi->pi_d; 542 switch (pi->pi_bar[idx].type) { 543 case PCIBAR_IO: 544 bzero(&iop, sizeof(struct inout_port)); 545 iop.name = pi->pi_name; 546 iop.port = pi->pi_bar[idx].addr; 547 iop.size = pi->pi_bar[idx].size; 548 if (registration) { 549 iop.flags = IOPORT_F_INOUT; 550 iop.handler = pci_emul_io_handler; 551 iop.arg = pi; 552 error = register_inout(&iop); 553 } else 554 error = unregister_inout(&iop); 555 if (pe->pe_baraddr != NULL) 556 (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration, 557 pi->pi_bar[idx].addr); 558 break; 559 case PCIBAR_MEM32: 560 case PCIBAR_MEM64: 561 bzero(&mr, sizeof(struct mem_range)); 562 mr.name = pi->pi_name; 563 mr.base = pi->pi_bar[idx].addr; 564 mr.size = pi->pi_bar[idx].size; 565 if (registration) { 566 mr.flags = MEM_F_RW; 567 mr.handler = pci_emul_mem_handler; 568 mr.arg1 = pi; 569 mr.arg2 = idx; 570 error = register_mem(&mr); 571 } else 572 error = unregister_mem(&mr); 573 if (pe->pe_baraddr != NULL) 574 (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration, 575 pi->pi_bar[idx].addr); 576 break; 577 default: 578 error = EINVAL; 579 break; 580 } 581 assert(error == 0); 582 } 583 584 static void 585 unregister_bar(struct pci_devinst *pi, int idx) 586 { 587 588 modify_bar_registration(pi, idx, 0); 589 } 590 591 static void 592 register_bar(struct pci_devinst *pi, int idx) 593 { 594 595 modify_bar_registration(pi, idx, 1); 596 } 597 598 /* Are we decoding i/o port accesses for the emulated pci device? */ 599 static int 600 porten(struct pci_devinst *pi) 601 { 602 uint16_t cmd; 603 604 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 605 606 return (cmd & PCIM_CMD_PORTEN); 607 } 608 609 /* Are we decoding memory accesses for the emulated pci device? */ 610 static int 611 memen(struct pci_devinst *pi) 612 { 613 uint16_t cmd; 614 615 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 616 617 return (cmd & PCIM_CMD_MEMEN); 618 } 619 620 /* 621 * Update the MMIO or I/O address that is decoded by the BAR register. 622 * 623 * If the pci device has enabled the address space decoding then intercept 624 * the address range decoded by the BAR register. 625 */ 626 static void 627 update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type) 628 { 629 int decode; 630 631 if (pi->pi_bar[idx].type == PCIBAR_IO) 632 decode = porten(pi); 633 else 634 decode = memen(pi); 635 636 if (decode) 637 unregister_bar(pi, idx); 638 639 switch (type) { 640 case PCIBAR_IO: 641 case PCIBAR_MEM32: 642 pi->pi_bar[idx].addr = addr; 643 break; 644 case PCIBAR_MEM64: 645 pi->pi_bar[idx].addr &= ~0xffffffffUL; 646 pi->pi_bar[idx].addr |= addr; 647 break; 648 case PCIBAR_MEMHI64: 649 pi->pi_bar[idx].addr &= 0xffffffff; 650 pi->pi_bar[idx].addr |= addr; 651 break; 652 default: 653 assert(0); 654 } 655 656 if (decode) 657 register_bar(pi, idx); 658 } 659 660 int 661 pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, 662 uint64_t size) 663 { 664 assert(idx >= 0 && idx <= PCI_BARMAX); 665 666 if ((size & (size - 1)) != 0) 667 size = 1UL << flsl(size); /* round up to a power of 2 */ 668 669 /* Enforce minimum BAR sizes required by the PCI standard */ 670 if (type == PCIBAR_IO) { 671 if (size < 4) 672 size = 4; 673 } else { 674 if (size < 16) 675 size = 16; 676 } 677 678 /* 679 * To reduce fragmentation of the MMIO space, we allocate the BARs by 680 * size. Therefore, don't allocate the BAR yet. We create a list of all 681 * BAR allocation which is sorted by BAR size. When all PCI devices are 682 * initialized, we will assign an address to the BARs. 683 */ 684 685 /* create a new list entry */ 686 struct pci_bar_allocation *const new_bar = malloc(sizeof(*new_bar)); 687 memset(new_bar, 0, sizeof(*new_bar)); 688 new_bar->pdi = pdi; 689 new_bar->idx = idx; 690 new_bar->type = type; 691 new_bar->size = size; 692 693 /* 694 * Search for a BAR which size is lower than the size of our newly 695 * allocated BAR. 696 */ 697 struct pci_bar_allocation *bar = NULL; 698 TAILQ_FOREACH(bar, &pci_bars, chain) { 699 if (bar->size < size) { 700 break; 701 } 702 } 703 704 if (bar == NULL) { 705 /* 706 * Either the list is empty or new BAR is the smallest BAR of 707 * the list. Append it to the end of our list. 708 */ 709 TAILQ_INSERT_TAIL(&pci_bars, new_bar, chain); 710 } else { 711 /* 712 * The found BAR is smaller than our new BAR. For that reason, 713 * insert our new BAR before the found BAR. 714 */ 715 TAILQ_INSERT_BEFORE(bar, new_bar, chain); 716 } 717 718 /* 719 * pci_passthru devices synchronize their physical and virtual command 720 * register on init. For that reason, the virtual cmd reg should be 721 * updated as early as possible. 722 */ 723 uint16_t enbit = 0; 724 switch (type) { 725 case PCIBAR_IO: 726 enbit = PCIM_CMD_PORTEN; 727 break; 728 case PCIBAR_MEM64: 729 case PCIBAR_MEM32: 730 enbit = PCIM_CMD_MEMEN; 731 break; 732 default: 733 enbit = 0; 734 break; 735 } 736 737 const uint16_t cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND); 738 pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit); 739 740 return (0); 741 } 742 743 static int 744 pci_emul_assign_bar(struct pci_devinst *const pdi, const int idx, 745 const enum pcibar_type type, const uint64_t size) 746 { 747 int error; 748 uint64_t *baseptr, limit, addr, mask, lobits, bar; 749 750 switch (type) { 751 case PCIBAR_NONE: 752 baseptr = NULL; 753 addr = mask = lobits = 0; 754 break; 755 case PCIBAR_IO: 756 baseptr = &pci_emul_iobase; 757 limit = PCI_EMUL_IOLIMIT; 758 mask = PCIM_BAR_IO_BASE; 759 lobits = PCIM_BAR_IO_SPACE; 760 break; 761 case PCIBAR_MEM64: 762 /* 763 * XXX 764 * Some drivers do not work well if the 64-bit BAR is allocated 765 * above 4GB. Allow for this by allocating small requests under 766 * 4GB unless then allocation size is larger than some arbitrary 767 * number (128MB currently). 768 */ 769 if (size > 128 * 1024 * 1024) { 770 baseptr = &pci_emul_membase64; 771 limit = pci_emul_memlim64; 772 mask = PCIM_BAR_MEM_BASE; 773 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | 774 PCIM_BAR_MEM_PREFETCH; 775 } else { 776 baseptr = &pci_emul_membase32; 777 limit = PCI_EMUL_MEMLIMIT32; 778 mask = PCIM_BAR_MEM_BASE; 779 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64; 780 } 781 break; 782 case PCIBAR_MEM32: 783 baseptr = &pci_emul_membase32; 784 limit = PCI_EMUL_MEMLIMIT32; 785 mask = PCIM_BAR_MEM_BASE; 786 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; 787 break; 788 default: 789 printf("pci_emul_alloc_base: invalid bar type %d\n", type); 790 #ifdef FreeBSD 791 assert(0); 792 #else 793 abort(); 794 #endif 795 } 796 797 if (baseptr != NULL) { 798 error = pci_emul_alloc_resource(baseptr, limit, size, &addr); 799 if (error != 0) 800 return (error); 801 } 802 803 pdi->pi_bar[idx].type = type; 804 pdi->pi_bar[idx].addr = addr; 805 pdi->pi_bar[idx].size = size; 806 /* 807 * passthru devices are using same lobits as physical device they set 808 * this property 809 */ 810 if (pdi->pi_bar[idx].lobits != 0) { 811 lobits = pdi->pi_bar[idx].lobits; 812 } else { 813 pdi->pi_bar[idx].lobits = lobits; 814 } 815 816 /* Initialize the BAR register in config space */ 817 bar = (addr & mask) | lobits; 818 pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar); 819 820 if (type == PCIBAR_MEM64) { 821 assert(idx + 1 <= PCI_BARMAX); 822 pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64; 823 pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32); 824 } 825 826 register_bar(pdi, idx); 827 828 return (0); 829 } 830 831 #define CAP_START_OFFSET 0x40 832 static int 833 pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen) 834 { 835 int i, capoff, reallen; 836 uint16_t sts; 837 838 assert(caplen > 0); 839 840 reallen = roundup2(caplen, 4); /* dword aligned */ 841 842 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 843 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) 844 capoff = CAP_START_OFFSET; 845 else 846 capoff = pi->pi_capend + 1; 847 848 /* Check if we have enough space */ 849 if (capoff + reallen > PCI_REGMAX + 1) 850 return (-1); 851 852 /* Set the previous capability pointer */ 853 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) { 854 pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff); 855 pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT); 856 } else 857 pci_set_cfgdata8(pi, pi->pi_prevcap + 1, capoff); 858 859 /* Copy the capability */ 860 for (i = 0; i < caplen; i++) 861 pci_set_cfgdata8(pi, capoff + i, capdata[i]); 862 863 /* Set the next capability pointer */ 864 pci_set_cfgdata8(pi, capoff + 1, 0); 865 866 pi->pi_prevcap = capoff; 867 pi->pi_capend = capoff + reallen - 1; 868 return (0); 869 } 870 871 static struct pci_devemu * 872 pci_emul_finddev(const char *name) 873 { 874 struct pci_devemu **pdpp, *pdp; 875 876 SET_FOREACH(pdpp, pci_devemu_set) { 877 pdp = *pdpp; 878 if (!strcmp(pdp->pe_emu, name)) { 879 return (pdp); 880 } 881 } 882 883 return (NULL); 884 } 885 886 static int 887 pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot, 888 int func, struct funcinfo *fi) 889 { 890 struct pci_devinst *pdi; 891 int err; 892 893 pdi = calloc(1, sizeof(struct pci_devinst)); 894 895 pdi->pi_vmctx = ctx; 896 pdi->pi_bus = bus; 897 pdi->pi_slot = slot; 898 pdi->pi_func = func; 899 pthread_mutex_init(&pdi->pi_lintr.lock, NULL); 900 pdi->pi_lintr.pin = 0; 901 pdi->pi_lintr.state = IDLE; 902 pdi->pi_lintr.pirq_pin = 0; 903 pdi->pi_lintr.ioapic_irq = 0; 904 pdi->pi_d = pde; 905 snprintf(pdi->pi_name, PI_NAMESZ, "%s-pci-%d", pde->pe_emu, slot); 906 907 /* Disable legacy interrupts */ 908 pci_set_cfgdata8(pdi, PCIR_INTLINE, 255); 909 pci_set_cfgdata8(pdi, PCIR_INTPIN, 0); 910 911 pci_set_cfgdata8(pdi, PCIR_COMMAND, PCIM_CMD_BUSMASTEREN); 912 913 err = (*pde->pe_init)(ctx, pdi, fi->fi_config); 914 if (err == 0) 915 fi->fi_devi = pdi; 916 else 917 free(pdi); 918 919 return (err); 920 } 921 922 void 923 pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr) 924 { 925 int mmc; 926 927 /* Number of msi messages must be a power of 2 between 1 and 32 */ 928 assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32); 929 mmc = ffs(msgnum) - 1; 930 931 bzero(msicap, sizeof(struct msicap)); 932 msicap->capid = PCIY_MSI; 933 msicap->nextptr = nextptr; 934 msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1); 935 } 936 937 int 938 pci_emul_add_msicap(struct pci_devinst *pi, int msgnum) 939 { 940 struct msicap msicap; 941 942 pci_populate_msicap(&msicap, msgnum, 0); 943 944 return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap))); 945 } 946 947 static void 948 pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum, 949 uint32_t msix_tab_size) 950 { 951 952 assert(msix_tab_size % 4096 == 0); 953 954 bzero(msixcap, sizeof(struct msixcap)); 955 msixcap->capid = PCIY_MSIX; 956 957 /* 958 * Message Control Register, all fields set to 959 * zero except for the Table Size. 960 * Note: Table size N is encoded as N-1 961 */ 962 msixcap->msgctrl = msgnum - 1; 963 964 /* 965 * MSI-X BAR setup: 966 * - MSI-X table start at offset 0 967 * - PBA table starts at a 4K aligned offset after the MSI-X table 968 */ 969 msixcap->table_info = barnum & PCIM_MSIX_BIR_MASK; 970 msixcap->pba_info = msix_tab_size | (barnum & PCIM_MSIX_BIR_MASK); 971 } 972 973 static void 974 pci_msix_table_init(struct pci_devinst *pi, int table_entries) 975 { 976 int i, table_size; 977 978 assert(table_entries > 0); 979 assert(table_entries <= MAX_MSIX_TABLE_ENTRIES); 980 981 table_size = table_entries * MSIX_TABLE_ENTRY_SIZE; 982 pi->pi_msix.table = calloc(1, table_size); 983 984 /* set mask bit of vector control register */ 985 for (i = 0; i < table_entries; i++) 986 pi->pi_msix.table[i].vector_control |= PCIM_MSIX_VCTRL_MASK; 987 } 988 989 int 990 pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum) 991 { 992 uint32_t tab_size; 993 struct msixcap msixcap; 994 995 assert(msgnum >= 1 && msgnum <= MAX_MSIX_TABLE_ENTRIES); 996 assert(barnum >= 0 && barnum <= PCIR_MAX_BAR_0); 997 998 tab_size = msgnum * MSIX_TABLE_ENTRY_SIZE; 999 1000 /* Align table size to nearest 4K */ 1001 tab_size = roundup2(tab_size, 4096); 1002 1003 pi->pi_msix.table_bar = barnum; 1004 pi->pi_msix.pba_bar = barnum; 1005 pi->pi_msix.table_offset = 0; 1006 pi->pi_msix.table_count = msgnum; 1007 pi->pi_msix.pba_offset = tab_size; 1008 pi->pi_msix.pba_size = PBA_SIZE(msgnum); 1009 1010 pci_msix_table_init(pi, msgnum); 1011 1012 pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size); 1013 1014 /* allocate memory for MSI-X Table and PBA */ 1015 pci_emul_alloc_bar(pi, barnum, PCIBAR_MEM32, 1016 tab_size + pi->pi_msix.pba_size); 1017 1018 return (pci_emul_add_capability(pi, (u_char *)&msixcap, 1019 sizeof(msixcap))); 1020 } 1021 1022 static void 1023 msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 1024 int bytes, uint32_t val) 1025 { 1026 uint16_t msgctrl, rwmask; 1027 int off; 1028 1029 off = offset - capoff; 1030 /* Message Control Register */ 1031 if (off == 2 && bytes == 2) { 1032 rwmask = PCIM_MSIXCTRL_MSIX_ENABLE | PCIM_MSIXCTRL_FUNCTION_MASK; 1033 msgctrl = pci_get_cfgdata16(pi, offset); 1034 msgctrl &= ~rwmask; 1035 msgctrl |= val & rwmask; 1036 val = msgctrl; 1037 1038 pi->pi_msix.enabled = val & PCIM_MSIXCTRL_MSIX_ENABLE; 1039 pi->pi_msix.function_mask = val & PCIM_MSIXCTRL_FUNCTION_MASK; 1040 pci_lintr_update(pi); 1041 } 1042 1043 CFGWRITE(pi, offset, val, bytes); 1044 } 1045 1046 static void 1047 msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 1048 int bytes, uint32_t val) 1049 { 1050 uint16_t msgctrl, rwmask, msgdata, mme; 1051 uint32_t addrlo; 1052 1053 /* 1054 * If guest is writing to the message control register make sure 1055 * we do not overwrite read-only fields. 1056 */ 1057 if ((offset - capoff) == 2 && bytes == 2) { 1058 rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE; 1059 msgctrl = pci_get_cfgdata16(pi, offset); 1060 msgctrl &= ~rwmask; 1061 msgctrl |= val & rwmask; 1062 val = msgctrl; 1063 } 1064 CFGWRITE(pi, offset, val, bytes); 1065 1066 msgctrl = pci_get_cfgdata16(pi, capoff + 2); 1067 addrlo = pci_get_cfgdata32(pi, capoff + 4); 1068 if (msgctrl & PCIM_MSICTRL_64BIT) 1069 msgdata = pci_get_cfgdata16(pi, capoff + 12); 1070 else 1071 msgdata = pci_get_cfgdata16(pi, capoff + 8); 1072 1073 mme = msgctrl & PCIM_MSICTRL_MME_MASK; 1074 pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0; 1075 if (pi->pi_msi.enabled) { 1076 pi->pi_msi.addr = addrlo; 1077 pi->pi_msi.msg_data = msgdata; 1078 pi->pi_msi.maxmsgnum = 1 << (mme >> 4); 1079 } else { 1080 pi->pi_msi.maxmsgnum = 0; 1081 } 1082 pci_lintr_update(pi); 1083 } 1084 1085 void 1086 pciecap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 1087 int bytes, uint32_t val) 1088 { 1089 1090 /* XXX don't write to the readonly parts */ 1091 CFGWRITE(pi, offset, val, bytes); 1092 } 1093 1094 #define PCIECAP_VERSION 0x2 1095 int 1096 pci_emul_add_pciecap(struct pci_devinst *pi, int type) 1097 { 1098 int err; 1099 struct pciecap pciecap; 1100 1101 bzero(&pciecap, sizeof(pciecap)); 1102 1103 /* 1104 * Use the integrated endpoint type for endpoints on a root complex bus. 1105 * 1106 * NB: bhyve currently only supports a single PCI bus that is the root 1107 * complex bus, so all endpoints are integrated. 1108 */ 1109 if ((type == PCIEM_TYPE_ENDPOINT) && (pi->pi_bus == 0)) 1110 type = PCIEM_TYPE_ROOT_INT_EP; 1111 1112 pciecap.capid = PCIY_EXPRESS; 1113 pciecap.pcie_capabilities = PCIECAP_VERSION | type; 1114 if (type != PCIEM_TYPE_ROOT_INT_EP) { 1115 pciecap.link_capabilities = 0x411; /* gen1, x1 */ 1116 pciecap.link_status = 0x11; /* gen1, x1 */ 1117 } 1118 1119 err = pci_emul_add_capability(pi, (u_char *)&pciecap, sizeof(pciecap)); 1120 return (err); 1121 } 1122 1123 /* 1124 * This function assumes that 'coff' is in the capabilities region of the 1125 * config space. A capoff parameter of zero will force a search for the 1126 * offset and type. 1127 */ 1128 void 1129 pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val, 1130 uint8_t capoff, int capid) 1131 { 1132 uint8_t nextoff; 1133 1134 /* Do not allow un-aligned writes */ 1135 if ((offset & (bytes - 1)) != 0) 1136 return; 1137 1138 if (capoff == 0) { 1139 /* Find the capability that we want to update */ 1140 capoff = CAP_START_OFFSET; 1141 while (1) { 1142 nextoff = pci_get_cfgdata8(pi, capoff + 1); 1143 if (nextoff == 0) 1144 break; 1145 if (offset >= capoff && offset < nextoff) 1146 break; 1147 1148 capoff = nextoff; 1149 } 1150 assert(offset >= capoff); 1151 capid = pci_get_cfgdata8(pi, capoff); 1152 } 1153 1154 /* 1155 * Capability ID and Next Capability Pointer are readonly. 1156 * However, some o/s's do 4-byte writes that include these. 1157 * For this case, trim the write back to 2 bytes and adjust 1158 * the data. 1159 */ 1160 if (offset == capoff || offset == capoff + 1) { 1161 if (offset == capoff && bytes == 4) { 1162 bytes = 2; 1163 offset += 2; 1164 val >>= 16; 1165 } else 1166 return; 1167 } 1168 1169 switch (capid) { 1170 case PCIY_MSI: 1171 msicap_cfgwrite(pi, capoff, offset, bytes, val); 1172 break; 1173 case PCIY_MSIX: 1174 msixcap_cfgwrite(pi, capoff, offset, bytes, val); 1175 break; 1176 case PCIY_EXPRESS: 1177 pciecap_cfgwrite(pi, capoff, offset, bytes, val); 1178 break; 1179 default: 1180 break; 1181 } 1182 } 1183 1184 static int 1185 pci_emul_iscap(struct pci_devinst *pi, int offset) 1186 { 1187 uint16_t sts; 1188 1189 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 1190 if ((sts & PCIM_STATUS_CAPPRESENT) != 0) { 1191 if (offset >= CAP_START_OFFSET && offset <= pi->pi_capend) 1192 return (1); 1193 } 1194 return (0); 1195 } 1196 1197 static int 1198 pci_emul_fallback_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 1199 int size, uint64_t *val, void *arg1, long arg2) 1200 { 1201 /* 1202 * Ignore writes; return 0xff's for reads. The mem read code 1203 * will take care of truncating to the correct size. 1204 */ 1205 if (dir == MEM_F_READ) { 1206 *val = 0xffffffffffffffff; 1207 } 1208 1209 return (0); 1210 } 1211 1212 static int 1213 pci_emul_ecfg_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 1214 int bytes, uint64_t *val, void *arg1, long arg2) 1215 { 1216 int bus, slot, func, coff, in; 1217 1218 coff = addr & 0xfff; 1219 func = (addr >> 12) & 0x7; 1220 slot = (addr >> 15) & 0x1f; 1221 bus = (addr >> 20) & 0xff; 1222 in = (dir == MEM_F_READ); 1223 if (in) 1224 *val = ~0UL; 1225 pci_cfgrw(ctx, vcpu, in, bus, slot, func, coff, bytes, (uint32_t *)val); 1226 return (0); 1227 } 1228 1229 uint64_t 1230 pci_ecfg_base(void) 1231 { 1232 1233 return (PCI_EMUL_ECFG_BASE); 1234 } 1235 1236 #define BUSIO_ROUNDUP 32 1237 #define BUSMEM32_ROUNDUP (1024 * 1024) 1238 #define BUSMEM64_ROUNDUP (512 * 1024 * 1024) 1239 1240 int 1241 init_pci(struct vmctx *ctx) 1242 { 1243 char node_name[sizeof("pci.XXX.XX.X")]; 1244 struct mem_range mr; 1245 struct pci_devemu *pde; 1246 struct businfo *bi; 1247 struct slotinfo *si; 1248 struct funcinfo *fi; 1249 nvlist_t *nvl; 1250 const char *emul; 1251 size_t lowmem; 1252 int bus, slot, func; 1253 int error; 1254 1255 if (vm_get_lowmem_limit(ctx) > PCI_EMUL_MEMBASE32) 1256 errx(EX_OSERR, "Invalid lowmem limit"); 1257 1258 pci_emul_iobase = PCI_EMUL_IOBASE; 1259 pci_emul_membase32 = PCI_EMUL_MEMBASE32; 1260 1261 pci_emul_membase64 = 4*GB + vm_get_highmem_size(ctx); 1262 pci_emul_membase64 = roundup2(pci_emul_membase64, PCI_EMUL_MEMSIZE64); 1263 pci_emul_memlim64 = pci_emul_membase64 + PCI_EMUL_MEMSIZE64; 1264 1265 for (bus = 0; bus < MAXBUSES; bus++) { 1266 snprintf(node_name, sizeof(node_name), "pci.%d", bus); 1267 nvl = find_config_node(node_name); 1268 if (nvl == NULL) 1269 continue; 1270 pci_businfo[bus] = calloc(1, sizeof(struct businfo)); 1271 bi = pci_businfo[bus]; 1272 1273 /* 1274 * Keep track of the i/o and memory resources allocated to 1275 * this bus. 1276 */ 1277 bi->iobase = pci_emul_iobase; 1278 bi->membase32 = pci_emul_membase32; 1279 bi->membase64 = pci_emul_membase64; 1280 1281 /* first run: init devices */ 1282 for (slot = 0; slot < MAXSLOTS; slot++) { 1283 si = &bi->slotinfo[slot]; 1284 for (func = 0; func < MAXFUNCS; func++) { 1285 fi = &si->si_funcs[func]; 1286 snprintf(node_name, sizeof(node_name), 1287 "pci.%d.%d.%d", bus, slot, func); 1288 nvl = find_config_node(node_name); 1289 if (nvl == NULL) 1290 continue; 1291 1292 fi->fi_config = nvl; 1293 emul = get_config_value_node(nvl, "device"); 1294 if (emul == NULL) { 1295 EPRINTLN("pci slot %d:%d:%d: missing " 1296 "\"device\" value", bus, slot, func); 1297 return (EINVAL); 1298 } 1299 pde = pci_emul_finddev(emul); 1300 if (pde == NULL) { 1301 EPRINTLN("pci slot %d:%d:%d: unknown " 1302 "device \"%s\"", bus, slot, func, 1303 emul); 1304 return (EINVAL); 1305 } 1306 if (pde->pe_alias != NULL) { 1307 EPRINTLN("pci slot %d:%d:%d: legacy " 1308 "device \"%s\", use \"%s\" instead", 1309 bus, slot, func, emul, 1310 pde->pe_alias); 1311 return (EINVAL); 1312 } 1313 fi->fi_pde = pde; 1314 error = pci_emul_init(ctx, pde, bus, slot, 1315 func, fi); 1316 if (error) 1317 return (error); 1318 } 1319 } 1320 1321 /* second run: assign BARs and free list */ 1322 struct pci_bar_allocation *bar; 1323 struct pci_bar_allocation *bar_tmp; 1324 TAILQ_FOREACH_SAFE(bar, &pci_bars, chain, bar_tmp) { 1325 pci_emul_assign_bar(bar->pdi, bar->idx, bar->type, 1326 bar->size); 1327 free(bar); 1328 } 1329 TAILQ_INIT(&pci_bars); 1330 1331 /* 1332 * Add some slop to the I/O and memory resources decoded by 1333 * this bus to give a guest some flexibility if it wants to 1334 * reprogram the BARs. 1335 */ 1336 pci_emul_iobase += BUSIO_ROUNDUP; 1337 pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP); 1338 bi->iolimit = pci_emul_iobase; 1339 1340 pci_emul_membase32 += BUSMEM32_ROUNDUP; 1341 pci_emul_membase32 = roundup2(pci_emul_membase32, 1342 BUSMEM32_ROUNDUP); 1343 bi->memlimit32 = pci_emul_membase32; 1344 1345 pci_emul_membase64 += BUSMEM64_ROUNDUP; 1346 pci_emul_membase64 = roundup2(pci_emul_membase64, 1347 BUSMEM64_ROUNDUP); 1348 bi->memlimit64 = pci_emul_membase64; 1349 } 1350 1351 /* 1352 * PCI backends are initialized before routing INTx interrupts 1353 * so that LPC devices are able to reserve ISA IRQs before 1354 * routing PIRQ pins. 1355 */ 1356 for (bus = 0; bus < MAXBUSES; bus++) { 1357 if ((bi = pci_businfo[bus]) == NULL) 1358 continue; 1359 1360 for (slot = 0; slot < MAXSLOTS; slot++) { 1361 si = &bi->slotinfo[slot]; 1362 for (func = 0; func < MAXFUNCS; func++) { 1363 fi = &si->si_funcs[func]; 1364 if (fi->fi_devi == NULL) 1365 continue; 1366 pci_lintr_route(fi->fi_devi); 1367 } 1368 } 1369 } 1370 lpc_pirq_routed(); 1371 1372 /* 1373 * The guest physical memory map looks like the following: 1374 * [0, lowmem) guest system memory 1375 * [lowmem, 0xC0000000) memory hole (may be absent) 1376 * [0xC0000000, 0xE0000000) PCI hole (32-bit BAR allocation) 1377 * [0xE0000000, 0xF0000000) PCI extended config window 1378 * [0xF0000000, 4GB) LAPIC, IOAPIC, HPET, firmware 1379 * [4GB, 4GB + highmem) 1380 */ 1381 1382 /* 1383 * Accesses to memory addresses that are not allocated to system 1384 * memory or PCI devices return 0xff's. 1385 */ 1386 lowmem = vm_get_lowmem_size(ctx); 1387 bzero(&mr, sizeof(struct mem_range)); 1388 mr.name = "PCI hole"; 1389 mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; 1390 mr.base = lowmem; 1391 mr.size = (4ULL * 1024 * 1024 * 1024) - lowmem; 1392 mr.handler = pci_emul_fallback_handler; 1393 error = register_mem_fallback(&mr); 1394 assert(error == 0); 1395 1396 /* PCI extended config space */ 1397 bzero(&mr, sizeof(struct mem_range)); 1398 mr.name = "PCI ECFG"; 1399 mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; 1400 mr.base = PCI_EMUL_ECFG_BASE; 1401 mr.size = PCI_EMUL_ECFG_SIZE; 1402 mr.handler = pci_emul_ecfg_handler; 1403 error = register_mem(&mr); 1404 assert(error == 0); 1405 1406 return (0); 1407 } 1408 1409 static void 1410 pci_apic_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, 1411 void *arg) 1412 { 1413 1414 dsdt_line(" Package ()"); 1415 dsdt_line(" {"); 1416 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1417 dsdt_line(" 0x%02X,", pin - 1); 1418 dsdt_line(" Zero,"); 1419 dsdt_line(" 0x%X", ioapic_irq); 1420 dsdt_line(" },"); 1421 } 1422 1423 static void 1424 pci_pirq_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, 1425 void *arg) 1426 { 1427 char *name; 1428 1429 name = lpc_pirq_name(pirq_pin); 1430 if (name == NULL) 1431 return; 1432 dsdt_line(" Package ()"); 1433 dsdt_line(" {"); 1434 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1435 dsdt_line(" 0x%02X,", pin - 1); 1436 dsdt_line(" %s,", name); 1437 dsdt_line(" 0x00"); 1438 dsdt_line(" },"); 1439 free(name); 1440 } 1441 1442 /* 1443 * A bhyve virtual machine has a flat PCI hierarchy with a root port 1444 * corresponding to each PCI bus. 1445 */ 1446 static void 1447 pci_bus_write_dsdt(int bus) 1448 { 1449 struct businfo *bi; 1450 struct slotinfo *si; 1451 struct pci_devinst *pi; 1452 int count, func, slot; 1453 1454 /* 1455 * If there are no devices on this 'bus' then just return. 1456 */ 1457 if ((bi = pci_businfo[bus]) == NULL) { 1458 /* 1459 * Bus 0 is special because it decodes the I/O ports used 1460 * for PCI config space access even if there are no devices 1461 * on it. 1462 */ 1463 if (bus != 0) 1464 return; 1465 } 1466 1467 dsdt_line(" Device (PC%02X)", bus); 1468 dsdt_line(" {"); 1469 dsdt_line(" Name (_HID, EisaId (\"PNP0A03\"))"); 1470 1471 dsdt_line(" Method (_BBN, 0, NotSerialized)"); 1472 dsdt_line(" {"); 1473 dsdt_line(" Return (0x%08X)", bus); 1474 dsdt_line(" }"); 1475 dsdt_line(" Name (_CRS, ResourceTemplate ()"); 1476 dsdt_line(" {"); 1477 dsdt_line(" WordBusNumber (ResourceProducer, MinFixed, " 1478 "MaxFixed, PosDecode,"); 1479 dsdt_line(" 0x0000, // Granularity"); 1480 dsdt_line(" 0x%04X, // Range Minimum", bus); 1481 dsdt_line(" 0x%04X, // Range Maximum", bus); 1482 dsdt_line(" 0x0000, // Translation Offset"); 1483 dsdt_line(" 0x0001, // Length"); 1484 dsdt_line(" ,, )"); 1485 1486 if (bus == 0) { 1487 dsdt_indent(3); 1488 dsdt_fixed_ioport(0xCF8, 8); 1489 dsdt_unindent(3); 1490 1491 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1492 "PosDecode, EntireRange,"); 1493 dsdt_line(" 0x0000, // Granularity"); 1494 dsdt_line(" 0x0000, // Range Minimum"); 1495 dsdt_line(" 0x0CF7, // Range Maximum"); 1496 dsdt_line(" 0x0000, // Translation Offset"); 1497 dsdt_line(" 0x0CF8, // Length"); 1498 dsdt_line(" ,, , TypeStatic)"); 1499 1500 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1501 "PosDecode, EntireRange,"); 1502 dsdt_line(" 0x0000, // Granularity"); 1503 dsdt_line(" 0x0D00, // Range Minimum"); 1504 dsdt_line(" 0x%04X, // Range Maximum", 1505 PCI_EMUL_IOBASE - 1); 1506 dsdt_line(" 0x0000, // Translation Offset"); 1507 dsdt_line(" 0x%04X, // Length", 1508 PCI_EMUL_IOBASE - 0x0D00); 1509 dsdt_line(" ,, , TypeStatic)"); 1510 1511 if (bi == NULL) { 1512 dsdt_line(" })"); 1513 goto done; 1514 } 1515 } 1516 assert(bi != NULL); 1517 1518 /* i/o window */ 1519 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1520 "PosDecode, EntireRange,"); 1521 dsdt_line(" 0x0000, // Granularity"); 1522 dsdt_line(" 0x%04X, // Range Minimum", bi->iobase); 1523 dsdt_line(" 0x%04X, // Range Maximum", 1524 bi->iolimit - 1); 1525 dsdt_line(" 0x0000, // Translation Offset"); 1526 dsdt_line(" 0x%04X, // Length", 1527 bi->iolimit - bi->iobase); 1528 dsdt_line(" ,, , TypeStatic)"); 1529 1530 /* mmio window (32-bit) */ 1531 dsdt_line(" DWordMemory (ResourceProducer, PosDecode, " 1532 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1533 dsdt_line(" 0x00000000, // Granularity"); 1534 dsdt_line(" 0x%08X, // Range Minimum\n", bi->membase32); 1535 dsdt_line(" 0x%08X, // Range Maximum\n", 1536 bi->memlimit32 - 1); 1537 dsdt_line(" 0x00000000, // Translation Offset"); 1538 dsdt_line(" 0x%08X, // Length\n", 1539 bi->memlimit32 - bi->membase32); 1540 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1541 1542 /* mmio window (64-bit) */ 1543 dsdt_line(" QWordMemory (ResourceProducer, PosDecode, " 1544 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1545 dsdt_line(" 0x0000000000000000, // Granularity"); 1546 dsdt_line(" 0x%016lX, // Range Minimum\n", bi->membase64); 1547 dsdt_line(" 0x%016lX, // Range Maximum\n", 1548 bi->memlimit64 - 1); 1549 dsdt_line(" 0x0000000000000000, // Translation Offset"); 1550 dsdt_line(" 0x%016lX, // Length\n", 1551 bi->memlimit64 - bi->membase64); 1552 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1553 dsdt_line(" })"); 1554 1555 count = pci_count_lintr(bus); 1556 if (count != 0) { 1557 dsdt_indent(2); 1558 dsdt_line("Name (PPRT, Package ()"); 1559 dsdt_line("{"); 1560 pci_walk_lintr(bus, pci_pirq_prt_entry, NULL); 1561 dsdt_line("})"); 1562 dsdt_line("Name (APRT, Package ()"); 1563 dsdt_line("{"); 1564 pci_walk_lintr(bus, pci_apic_prt_entry, NULL); 1565 dsdt_line("})"); 1566 dsdt_line("Method (_PRT, 0, NotSerialized)"); 1567 dsdt_line("{"); 1568 dsdt_line(" If (PICM)"); 1569 dsdt_line(" {"); 1570 dsdt_line(" Return (APRT)"); 1571 dsdt_line(" }"); 1572 dsdt_line(" Else"); 1573 dsdt_line(" {"); 1574 dsdt_line(" Return (PPRT)"); 1575 dsdt_line(" }"); 1576 dsdt_line("}"); 1577 dsdt_unindent(2); 1578 } 1579 1580 dsdt_indent(2); 1581 for (slot = 0; slot < MAXSLOTS; slot++) { 1582 si = &bi->slotinfo[slot]; 1583 for (func = 0; func < MAXFUNCS; func++) { 1584 pi = si->si_funcs[func].fi_devi; 1585 if (pi != NULL && pi->pi_d->pe_write_dsdt != NULL) 1586 pi->pi_d->pe_write_dsdt(pi); 1587 } 1588 } 1589 dsdt_unindent(2); 1590 done: 1591 dsdt_line(" }"); 1592 } 1593 1594 void 1595 pci_write_dsdt(void) 1596 { 1597 int bus; 1598 1599 dsdt_indent(1); 1600 dsdt_line("Name (PICM, 0x00)"); 1601 dsdt_line("Method (_PIC, 1, NotSerialized)"); 1602 dsdt_line("{"); 1603 dsdt_line(" Store (Arg0, PICM)"); 1604 dsdt_line("}"); 1605 dsdt_line(""); 1606 dsdt_line("Scope (_SB)"); 1607 dsdt_line("{"); 1608 for (bus = 0; bus < MAXBUSES; bus++) 1609 pci_bus_write_dsdt(bus); 1610 dsdt_line("}"); 1611 dsdt_unindent(1); 1612 } 1613 1614 int 1615 pci_bus_configured(int bus) 1616 { 1617 assert(bus >= 0 && bus < MAXBUSES); 1618 return (pci_businfo[bus] != NULL); 1619 } 1620 1621 int 1622 pci_msi_enabled(struct pci_devinst *pi) 1623 { 1624 return (pi->pi_msi.enabled); 1625 } 1626 1627 int 1628 pci_msi_maxmsgnum(struct pci_devinst *pi) 1629 { 1630 if (pi->pi_msi.enabled) 1631 return (pi->pi_msi.maxmsgnum); 1632 else 1633 return (0); 1634 } 1635 1636 int 1637 pci_msix_enabled(struct pci_devinst *pi) 1638 { 1639 1640 return (pi->pi_msix.enabled && !pi->pi_msi.enabled); 1641 } 1642 1643 void 1644 pci_generate_msix(struct pci_devinst *pi, int index) 1645 { 1646 struct msix_table_entry *mte; 1647 1648 if (!pci_msix_enabled(pi)) 1649 return; 1650 1651 if (pi->pi_msix.function_mask) 1652 return; 1653 1654 if (index >= pi->pi_msix.table_count) 1655 return; 1656 1657 mte = &pi->pi_msix.table[index]; 1658 if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 1659 /* XXX Set PBA bit if interrupt is disabled */ 1660 vm_lapic_msi(pi->pi_vmctx, mte->addr, mte->msg_data); 1661 } 1662 } 1663 1664 void 1665 pci_generate_msi(struct pci_devinst *pi, int index) 1666 { 1667 1668 if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) { 1669 vm_lapic_msi(pi->pi_vmctx, pi->pi_msi.addr, 1670 pi->pi_msi.msg_data + index); 1671 } 1672 } 1673 1674 static bool 1675 pci_lintr_permitted(struct pci_devinst *pi) 1676 { 1677 uint16_t cmd; 1678 1679 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 1680 return (!(pi->pi_msi.enabled || pi->pi_msix.enabled || 1681 (cmd & PCIM_CMD_INTxDIS))); 1682 } 1683 1684 void 1685 pci_lintr_request(struct pci_devinst *pi) 1686 { 1687 struct businfo *bi; 1688 struct slotinfo *si; 1689 int bestpin, bestcount, pin; 1690 1691 bi = pci_businfo[pi->pi_bus]; 1692 assert(bi != NULL); 1693 1694 /* 1695 * Just allocate a pin from our slot. The pin will be 1696 * assigned IRQs later when interrupts are routed. 1697 */ 1698 si = &bi->slotinfo[pi->pi_slot]; 1699 bestpin = 0; 1700 bestcount = si->si_intpins[0].ii_count; 1701 for (pin = 1; pin < 4; pin++) { 1702 if (si->si_intpins[pin].ii_count < bestcount) { 1703 bestpin = pin; 1704 bestcount = si->si_intpins[pin].ii_count; 1705 } 1706 } 1707 1708 si->si_intpins[bestpin].ii_count++; 1709 pi->pi_lintr.pin = bestpin + 1; 1710 pci_set_cfgdata8(pi, PCIR_INTPIN, bestpin + 1); 1711 } 1712 1713 static void 1714 pci_lintr_route(struct pci_devinst *pi) 1715 { 1716 struct businfo *bi; 1717 struct intxinfo *ii; 1718 1719 if (pi->pi_lintr.pin == 0) 1720 return; 1721 1722 bi = pci_businfo[pi->pi_bus]; 1723 assert(bi != NULL); 1724 ii = &bi->slotinfo[pi->pi_slot].si_intpins[pi->pi_lintr.pin - 1]; 1725 1726 /* 1727 * Attempt to allocate an I/O APIC pin for this intpin if one 1728 * is not yet assigned. 1729 */ 1730 if (ii->ii_ioapic_irq == 0) 1731 ii->ii_ioapic_irq = ioapic_pci_alloc_irq(pi); 1732 assert(ii->ii_ioapic_irq > 0); 1733 1734 /* 1735 * Attempt to allocate a PIRQ pin for this intpin if one is 1736 * not yet assigned. 1737 */ 1738 if (ii->ii_pirq_pin == 0) 1739 ii->ii_pirq_pin = pirq_alloc_pin(pi); 1740 assert(ii->ii_pirq_pin > 0); 1741 1742 pi->pi_lintr.ioapic_irq = ii->ii_ioapic_irq; 1743 pi->pi_lintr.pirq_pin = ii->ii_pirq_pin; 1744 pci_set_cfgdata8(pi, PCIR_INTLINE, pirq_irq(ii->ii_pirq_pin)); 1745 } 1746 1747 void 1748 pci_lintr_assert(struct pci_devinst *pi) 1749 { 1750 1751 assert(pi->pi_lintr.pin > 0); 1752 1753 pthread_mutex_lock(&pi->pi_lintr.lock); 1754 if (pi->pi_lintr.state == IDLE) { 1755 if (pci_lintr_permitted(pi)) { 1756 pi->pi_lintr.state = ASSERTED; 1757 pci_irq_assert(pi); 1758 } else 1759 pi->pi_lintr.state = PENDING; 1760 } 1761 pthread_mutex_unlock(&pi->pi_lintr.lock); 1762 } 1763 1764 void 1765 pci_lintr_deassert(struct pci_devinst *pi) 1766 { 1767 1768 assert(pi->pi_lintr.pin > 0); 1769 1770 pthread_mutex_lock(&pi->pi_lintr.lock); 1771 if (pi->pi_lintr.state == ASSERTED) { 1772 pi->pi_lintr.state = IDLE; 1773 pci_irq_deassert(pi); 1774 } else if (pi->pi_lintr.state == PENDING) 1775 pi->pi_lintr.state = IDLE; 1776 pthread_mutex_unlock(&pi->pi_lintr.lock); 1777 } 1778 1779 static void 1780 pci_lintr_update(struct pci_devinst *pi) 1781 { 1782 1783 pthread_mutex_lock(&pi->pi_lintr.lock); 1784 if (pi->pi_lintr.state == ASSERTED && !pci_lintr_permitted(pi)) { 1785 pci_irq_deassert(pi); 1786 pi->pi_lintr.state = PENDING; 1787 } else if (pi->pi_lintr.state == PENDING && pci_lintr_permitted(pi)) { 1788 pi->pi_lintr.state = ASSERTED; 1789 pci_irq_assert(pi); 1790 } 1791 pthread_mutex_unlock(&pi->pi_lintr.lock); 1792 #ifndef __FreeBSD__ 1793 if (pi->pi_d->pe_lintrupdate != NULL) { 1794 pi->pi_d->pe_lintrupdate(pi); 1795 } 1796 #endif /* __FreeBSD__ */ 1797 } 1798 1799 int 1800 pci_count_lintr(int bus) 1801 { 1802 int count, slot, pin; 1803 struct slotinfo *slotinfo; 1804 1805 count = 0; 1806 if (pci_businfo[bus] != NULL) { 1807 for (slot = 0; slot < MAXSLOTS; slot++) { 1808 slotinfo = &pci_businfo[bus]->slotinfo[slot]; 1809 for (pin = 0; pin < 4; pin++) { 1810 if (slotinfo->si_intpins[pin].ii_count != 0) 1811 count++; 1812 } 1813 } 1814 } 1815 return (count); 1816 } 1817 1818 void 1819 pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg) 1820 { 1821 struct businfo *bi; 1822 struct slotinfo *si; 1823 struct intxinfo *ii; 1824 int slot, pin; 1825 1826 if ((bi = pci_businfo[bus]) == NULL) 1827 return; 1828 1829 for (slot = 0; slot < MAXSLOTS; slot++) { 1830 si = &bi->slotinfo[slot]; 1831 for (pin = 0; pin < 4; pin++) { 1832 ii = &si->si_intpins[pin]; 1833 if (ii->ii_count != 0) 1834 cb(bus, slot, pin + 1, ii->ii_pirq_pin, 1835 ii->ii_ioapic_irq, arg); 1836 } 1837 } 1838 } 1839 1840 /* 1841 * Return 1 if the emulated device in 'slot' is a multi-function device. 1842 * Return 0 otherwise. 1843 */ 1844 static int 1845 pci_emul_is_mfdev(int bus, int slot) 1846 { 1847 struct businfo *bi; 1848 struct slotinfo *si; 1849 int f, numfuncs; 1850 1851 numfuncs = 0; 1852 if ((bi = pci_businfo[bus]) != NULL) { 1853 si = &bi->slotinfo[slot]; 1854 for (f = 0; f < MAXFUNCS; f++) { 1855 if (si->si_funcs[f].fi_devi != NULL) { 1856 numfuncs++; 1857 } 1858 } 1859 } 1860 return (numfuncs > 1); 1861 } 1862 1863 /* 1864 * Ensure that the PCIM_MFDEV bit is properly set (or unset) depending on 1865 * whether or not is a multi-function being emulated in the pci 'slot'. 1866 */ 1867 static void 1868 pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv) 1869 { 1870 int mfdev; 1871 1872 if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) { 1873 mfdev = pci_emul_is_mfdev(bus, slot); 1874 switch (bytes) { 1875 case 1: 1876 case 2: 1877 *rv &= ~PCIM_MFDEV; 1878 if (mfdev) { 1879 *rv |= PCIM_MFDEV; 1880 } 1881 break; 1882 case 4: 1883 *rv &= ~(PCIM_MFDEV << 16); 1884 if (mfdev) { 1885 *rv |= (PCIM_MFDEV << 16); 1886 } 1887 break; 1888 } 1889 } 1890 } 1891 1892 /* 1893 * Update device state in response to changes to the PCI command 1894 * register. 1895 */ 1896 void 1897 pci_emul_cmd_changed(struct pci_devinst *pi, uint16_t old) 1898 { 1899 int i; 1900 uint16_t changed, new; 1901 1902 new = pci_get_cfgdata16(pi, PCIR_COMMAND); 1903 changed = old ^ new; 1904 1905 /* 1906 * If the MMIO or I/O address space decoding has changed then 1907 * register/unregister all BARs that decode that address space. 1908 */ 1909 for (i = 0; i <= PCI_BARMAX; i++) { 1910 switch (pi->pi_bar[i].type) { 1911 case PCIBAR_NONE: 1912 case PCIBAR_MEMHI64: 1913 break; 1914 case PCIBAR_IO: 1915 /* I/O address space decoding changed? */ 1916 if (changed & PCIM_CMD_PORTEN) { 1917 if (new & PCIM_CMD_PORTEN) 1918 register_bar(pi, i); 1919 else 1920 unregister_bar(pi, i); 1921 } 1922 break; 1923 case PCIBAR_MEM32: 1924 case PCIBAR_MEM64: 1925 /* MMIO address space decoding changed? */ 1926 if (changed & PCIM_CMD_MEMEN) { 1927 if (new & PCIM_CMD_MEMEN) 1928 register_bar(pi, i); 1929 else 1930 unregister_bar(pi, i); 1931 } 1932 break; 1933 default: 1934 assert(0); 1935 } 1936 } 1937 1938 /* 1939 * If INTx has been unmasked and is pending, assert the 1940 * interrupt. 1941 */ 1942 pci_lintr_update(pi); 1943 } 1944 1945 static void 1946 pci_emul_cmdsts_write(struct pci_devinst *pi, int coff, uint32_t new, int bytes) 1947 { 1948 int rshift; 1949 uint32_t cmd, old, readonly; 1950 1951 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */ 1952 1953 /* 1954 * From PCI Local Bus Specification 3.0 sections 6.2.2 and 6.2.3. 1955 * 1956 * XXX Bits 8, 11, 12, 13, 14 and 15 in the status register are 1957 * 'write 1 to clear'. However these bits are not set to '1' by 1958 * any device emulation so it is simpler to treat them as readonly. 1959 */ 1960 rshift = (coff & 0x3) * 8; 1961 readonly = 0xFFFFF880 >> rshift; 1962 1963 old = CFGREAD(pi, coff, bytes); 1964 new &= ~readonly; 1965 new |= (old & readonly); 1966 CFGWRITE(pi, coff, new, bytes); /* update config */ 1967 1968 pci_emul_cmd_changed(pi, cmd); 1969 } 1970 1971 static void 1972 pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, int func, 1973 int coff, int bytes, uint32_t *eax) 1974 { 1975 struct businfo *bi; 1976 struct slotinfo *si; 1977 struct pci_devinst *pi; 1978 struct pci_devemu *pe; 1979 int idx, needcfg; 1980 uint64_t addr, mask; 1981 uint64_t bar = 0; 1982 1983 if ((bi = pci_businfo[bus]) != NULL) { 1984 si = &bi->slotinfo[slot]; 1985 pi = si->si_funcs[func].fi_devi; 1986 } else 1987 pi = NULL; 1988 1989 /* 1990 * Just return if there is no device at this slot:func or if the 1991 * the guest is doing an un-aligned access. 1992 */ 1993 if (pi == NULL || (bytes != 1 && bytes != 2 && bytes != 4) || 1994 (coff & (bytes - 1)) != 0) { 1995 if (in) 1996 *eax = 0xffffffff; 1997 return; 1998 } 1999 2000 /* 2001 * Ignore all writes beyond the standard config space and return all 2002 * ones on reads. 2003 */ 2004 if (coff >= PCI_REGMAX + 1) { 2005 if (in) { 2006 *eax = 0xffffffff; 2007 /* 2008 * Extended capabilities begin at offset 256 in config 2009 * space. Absence of extended capabilities is signaled 2010 * with all 0s in the extended capability header at 2011 * offset 256. 2012 */ 2013 if (coff <= PCI_REGMAX + 4) 2014 *eax = 0x00000000; 2015 } 2016 return; 2017 } 2018 2019 pe = pi->pi_d; 2020 2021 /* 2022 * Config read 2023 */ 2024 if (in) { 2025 /* Let the device emulation override the default handler */ 2026 if (pe->pe_cfgread != NULL) { 2027 needcfg = pe->pe_cfgread(ctx, vcpu, pi, coff, bytes, 2028 eax); 2029 } else { 2030 needcfg = 1; 2031 } 2032 2033 if (needcfg) 2034 *eax = CFGREAD(pi, coff, bytes); 2035 2036 pci_emul_hdrtype_fixup(bus, slot, coff, bytes, eax); 2037 } else { 2038 /* Let the device emulation override the default handler */ 2039 if (pe->pe_cfgwrite != NULL && 2040 (*pe->pe_cfgwrite)(ctx, vcpu, pi, coff, bytes, *eax) == 0) 2041 return; 2042 2043 /* 2044 * Special handling for write to BAR registers 2045 */ 2046 if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) { 2047 /* 2048 * Ignore writes to BAR registers that are not 2049 * 4-byte aligned. 2050 */ 2051 if (bytes != 4 || (coff & 0x3) != 0) 2052 return; 2053 idx = (coff - PCIR_BAR(0)) / 4; 2054 mask = ~(pi->pi_bar[idx].size - 1); 2055 switch (pi->pi_bar[idx].type) { 2056 case PCIBAR_NONE: 2057 pi->pi_bar[idx].addr = bar = 0; 2058 break; 2059 case PCIBAR_IO: 2060 addr = *eax & mask; 2061 addr &= 0xffff; 2062 bar = addr | pi->pi_bar[idx].lobits; 2063 /* 2064 * Register the new BAR value for interception 2065 */ 2066 if (addr != pi->pi_bar[idx].addr) { 2067 update_bar_address(pi, addr, idx, 2068 PCIBAR_IO); 2069 } 2070 break; 2071 case PCIBAR_MEM32: 2072 addr = bar = *eax & mask; 2073 bar |= pi->pi_bar[idx].lobits; 2074 if (addr != pi->pi_bar[idx].addr) { 2075 update_bar_address(pi, addr, idx, 2076 PCIBAR_MEM32); 2077 } 2078 break; 2079 case PCIBAR_MEM64: 2080 addr = bar = *eax & mask; 2081 bar |= pi->pi_bar[idx].lobits; 2082 if (addr != (uint32_t)pi->pi_bar[idx].addr) { 2083 update_bar_address(pi, addr, idx, 2084 PCIBAR_MEM64); 2085 } 2086 break; 2087 case PCIBAR_MEMHI64: 2088 mask = ~(pi->pi_bar[idx - 1].size - 1); 2089 addr = ((uint64_t)*eax << 32) & mask; 2090 bar = addr >> 32; 2091 if (bar != pi->pi_bar[idx - 1].addr >> 32) { 2092 update_bar_address(pi, addr, idx - 1, 2093 PCIBAR_MEMHI64); 2094 } 2095 break; 2096 default: 2097 assert(0); 2098 } 2099 pci_set_cfgdata32(pi, coff, bar); 2100 2101 } else if (pci_emul_iscap(pi, coff)) { 2102 pci_emul_capwrite(pi, coff, bytes, *eax, 0, 0); 2103 } else if (coff >= PCIR_COMMAND && coff < PCIR_REVID) { 2104 pci_emul_cmdsts_write(pi, coff, *eax, bytes); 2105 } else { 2106 CFGWRITE(pi, coff, *eax, bytes); 2107 } 2108 } 2109 } 2110 2111 static int cfgenable, cfgbus, cfgslot, cfgfunc, cfgoff; 2112 2113 static int 2114 pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 2115 uint32_t *eax, void *arg) 2116 { 2117 uint32_t x; 2118 2119 if (bytes != 4) { 2120 if (in) 2121 *eax = (bytes == 2) ? 0xffff : 0xff; 2122 return (0); 2123 } 2124 2125 if (in) { 2126 x = (cfgbus << 16) | (cfgslot << 11) | (cfgfunc << 8) | cfgoff; 2127 if (cfgenable) 2128 x |= CONF1_ENABLE; 2129 *eax = x; 2130 } else { 2131 x = *eax; 2132 cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE; 2133 cfgoff = (x & PCI_REGMAX) & ~0x03; 2134 cfgfunc = (x >> 8) & PCI_FUNCMAX; 2135 cfgslot = (x >> 11) & PCI_SLOTMAX; 2136 cfgbus = (x >> 16) & PCI_BUSMAX; 2137 } 2138 2139 return (0); 2140 } 2141 INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_INOUT, pci_emul_cfgaddr); 2142 2143 static int 2144 pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 2145 uint32_t *eax, void *arg) 2146 { 2147 int coff; 2148 2149 assert(bytes == 1 || bytes == 2 || bytes == 4); 2150 2151 coff = cfgoff + (port - CONF1_DATA_PORT); 2152 if (cfgenable) { 2153 pci_cfgrw(ctx, vcpu, in, cfgbus, cfgslot, cfgfunc, coff, bytes, 2154 eax); 2155 } else { 2156 /* Ignore accesses to cfgdata if not enabled by cfgaddr */ 2157 if (in) 2158 *eax = 0xffffffff; 2159 } 2160 return (0); 2161 } 2162 2163 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata); 2164 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata); 2165 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata); 2166 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata); 2167 2168 #define PCI_EMUL_TEST 2169 #ifdef PCI_EMUL_TEST 2170 /* 2171 * Define a dummy test device 2172 */ 2173 #define DIOSZ 8 2174 #define DMEMSZ 4096 2175 struct pci_emul_dsoftc { 2176 uint8_t ioregs[DIOSZ]; 2177 uint8_t memregs[2][DMEMSZ]; 2178 }; 2179 2180 #define PCI_EMUL_MSI_MSGS 4 2181 #define PCI_EMUL_MSIX_MSGS 16 2182 2183 static int 2184 pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) 2185 { 2186 int error; 2187 struct pci_emul_dsoftc *sc; 2188 2189 sc = calloc(1, sizeof(struct pci_emul_dsoftc)); 2190 2191 pi->pi_arg = sc; 2192 2193 pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001); 2194 pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD); 2195 pci_set_cfgdata8(pi, PCIR_CLASS, 0x02); 2196 2197 error = pci_emul_add_msicap(pi, PCI_EMUL_MSI_MSGS); 2198 assert(error == 0); 2199 2200 error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, DIOSZ); 2201 assert(error == 0); 2202 2203 error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ); 2204 assert(error == 0); 2205 2206 error = pci_emul_alloc_bar(pi, 2, PCIBAR_MEM32, DMEMSZ); 2207 assert(error == 0); 2208 2209 return (0); 2210 } 2211 2212 static void 2213 pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 2214 uint64_t offset, int size, uint64_t value) 2215 { 2216 int i; 2217 struct pci_emul_dsoftc *sc = pi->pi_arg; 2218 2219 if (baridx == 0) { 2220 if (offset + size > DIOSZ) { 2221 printf("diow: iow too large, offset %ld size %d\n", 2222 offset, size); 2223 return; 2224 } 2225 2226 if (size == 1) { 2227 sc->ioregs[offset] = value & 0xff; 2228 } else if (size == 2) { 2229 *(uint16_t *)&sc->ioregs[offset] = value & 0xffff; 2230 } else if (size == 4) { 2231 *(uint32_t *)&sc->ioregs[offset] = value; 2232 } else { 2233 printf("diow: iow unknown size %d\n", size); 2234 } 2235 2236 /* 2237 * Special magic value to generate an interrupt 2238 */ 2239 if (offset == 4 && size == 4 && pci_msi_enabled(pi)) 2240 pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi)); 2241 2242 if (value == 0xabcdef) { 2243 for (i = 0; i < pci_msi_maxmsgnum(pi); i++) 2244 pci_generate_msi(pi, i); 2245 } 2246 } 2247 2248 if (baridx == 1 || baridx == 2) { 2249 if (offset + size > DMEMSZ) { 2250 printf("diow: memw too large, offset %ld size %d\n", 2251 offset, size); 2252 return; 2253 } 2254 2255 i = baridx - 1; /* 'memregs' index */ 2256 2257 if (size == 1) { 2258 sc->memregs[i][offset] = value; 2259 } else if (size == 2) { 2260 *(uint16_t *)&sc->memregs[i][offset] = value; 2261 } else if (size == 4) { 2262 *(uint32_t *)&sc->memregs[i][offset] = value; 2263 } else if (size == 8) { 2264 *(uint64_t *)&sc->memregs[i][offset] = value; 2265 } else { 2266 printf("diow: memw unknown size %d\n", size); 2267 } 2268 2269 /* 2270 * magic interrupt ?? 2271 */ 2272 } 2273 2274 if (baridx > 2 || baridx < 0) { 2275 printf("diow: unknown bar idx %d\n", baridx); 2276 } 2277 } 2278 2279 static uint64_t 2280 pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 2281 uint64_t offset, int size) 2282 { 2283 struct pci_emul_dsoftc *sc = pi->pi_arg; 2284 uint32_t value; 2285 int i; 2286 2287 value = 0; 2288 if (baridx == 0) { 2289 if (offset + size > DIOSZ) { 2290 printf("dior: ior too large, offset %ld size %d\n", 2291 offset, size); 2292 return (0); 2293 } 2294 2295 value = 0; 2296 if (size == 1) { 2297 value = sc->ioregs[offset]; 2298 } else if (size == 2) { 2299 value = *(uint16_t *) &sc->ioregs[offset]; 2300 } else if (size == 4) { 2301 value = *(uint32_t *) &sc->ioregs[offset]; 2302 } else { 2303 printf("dior: ior unknown size %d\n", size); 2304 } 2305 } 2306 2307 if (baridx == 1 || baridx == 2) { 2308 if (offset + size > DMEMSZ) { 2309 printf("dior: memr too large, offset %ld size %d\n", 2310 offset, size); 2311 return (0); 2312 } 2313 2314 i = baridx - 1; /* 'memregs' index */ 2315 2316 if (size == 1) { 2317 value = sc->memregs[i][offset]; 2318 } else if (size == 2) { 2319 value = *(uint16_t *) &sc->memregs[i][offset]; 2320 } else if (size == 4) { 2321 value = *(uint32_t *) &sc->memregs[i][offset]; 2322 } else if (size == 8) { 2323 value = *(uint64_t *) &sc->memregs[i][offset]; 2324 } else { 2325 printf("dior: ior unknown size %d\n", size); 2326 } 2327 } 2328 2329 2330 if (baridx > 2 || baridx < 0) { 2331 printf("dior: unknown bar idx %d\n", baridx); 2332 return (0); 2333 } 2334 2335 return (value); 2336 } 2337 2338 struct pci_devemu pci_dummy = { 2339 .pe_emu = "dummy", 2340 .pe_init = pci_emul_dinit, 2341 .pe_barwrite = pci_emul_diow, 2342 .pe_barread = pci_emul_dior, 2343 }; 2344 PCI_EMUL_SET(pci_dummy); 2345 2346 #endif /* PCI_EMUL_TEST */ 2347