1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "nv-pci-table.h" 25 #include "nv-pci-types.h" 26 #include "nv-pci.h" 27 #include "nv-ibmnpu.h" 28 #include "nv-frontend.h" 29 #include "nv-msi.h" 30 #include "nv-hypervisor.h" 31 32 #if defined(NV_VGPU_KVM_BUILD) 33 #include "nv-vgpu-vfio-interface.h" 34 #endif 35 36 #if defined(NV_SEQ_READ_ITER_PRESENT) 37 #include <linux/seq_file.h> 38 #include <linux/kernfs.h> 39 #endif 40 41 static void 42 nv_check_and_exclude_gpu( 43 nvidia_stack_t *sp, 44 nv_state_t *nv 45 ) 46 { 47 char *uuid_str; 48 49 uuid_str = rm_get_gpu_uuid(sp, nv); 50 if (uuid_str == NULL) 51 { 52 NV_DEV_PRINTF(NV_DBG_INFO, nv, "Unable to read UUID"); 53 return; 54 } 55 56 if (nv_is_uuid_in_gpu_exclusion_list(uuid_str)) 57 { 58 NV_STATUS rm_status = rm_exclude_adapter(sp, nv); 59 if (rm_status != NV_OK) 60 { 61 NV_DEV_PRINTF_STATUS(NV_DBG_ERRORS, nv, rm_status, 62 "Failed to exclude GPU %s", uuid_str); 63 goto done; 64 } 65 nv->flags |= NV_FLAG_EXCLUDE; 66 NV_DEV_PRINTF(NV_DBG_INFO, nv, "Excluded GPU %s successfully\n", 67 uuid_str); 68 } 69 70 done: 71 os_free_mem(uuid_str); 72 } 73 74 static NvBool nv_treat_missing_irq_as_error(void) 75 { 76 #if defined(NV_LINUX_PCIE_MSI_SUPPORTED) 77 return (nv_get_hypervisor_type() != OS_HYPERVISOR_HYPERV); 78 #else 79 return NV_TRUE; 80 #endif 81 } 82 83 static void nv_init_dynamic_power_management 84 ( 85 nvidia_stack_t *sp, 86 struct pci_dev *pci_dev 87 ) 88 { 89 nv_linux_state_t *nvl = pci_get_drvdata(pci_dev); 90 nv_state_t *nv = NV_STATE_PTR(nvl); 91 char filename[50]; 92 int ret; 93 NvBool pr3_acpi_method_present = NV_FALSE; 94 95 nvl->sysfs_config_file = NULL; 96 97 ret = snprintf(filename, sizeof(filename), 98 "/sys/bus/pci/devices/%04x:%02x:%02x.0/config", 99 NV_PCI_DOMAIN_NUMBER(pci_dev), 100 NV_PCI_BUS_NUMBER(pci_dev), 101 NV_PCI_SLOT_NUMBER(pci_dev)); 102 if (ret > 0 && ret < sizeof(filename)) 103 { 104 struct file *file = filp_open(filename, O_RDONLY, 0); 105 if (!IS_ERR(file)) 106 { 107 #if defined(NV_SEQ_READ_ITER_PRESENT) 108 /* 109 * Sanity check for confirming if file path is mounted over 110 * sysfs file system. 111 */ 112 if ((file->f_inode != NULL) && (file->f_inode->i_sb != NULL) && 113 (strcmp(file->f_inode->i_sb->s_id, "sysfs") == 0)) 114 { 115 struct seq_file *sf = file->private_data; 116 117 /* 118 * Sanity check for confirming if 'file->private_data' 119 * actually points to 'struct seq_file'. 120 */ 121 if ((sf != NULL) && (sf->file == file) && (sf->op == NULL)) 122 { 123 struct kernfs_open_file *of = sf->private; 124 125 /* 126 * Sanity check for confirming if 'sf->private' 127 * actually points to 'struct kernfs_open_file'. 128 */ 129 if ((of != NULL) && (of->file == file) && 130 (of->seq_file == sf)) 131 { 132 nvl->sysfs_config_file = file; 133 } 134 } 135 } 136 137 if (nvl->sysfs_config_file == NULL) 138 { 139 filp_close(file, NULL); 140 } 141 #else 142 nvl->sysfs_config_file = file; 143 #endif 144 } 145 } 146 147 if (nv_get_hypervisor_type() != OS_HYPERVISOR_UNKNOWN) 148 { 149 pr3_acpi_method_present = nv_acpi_power_resource_method_present(pci_dev); 150 } 151 else if (pci_dev->bus && pci_dev->bus->self) 152 { 153 pr3_acpi_method_present = nv_acpi_power_resource_method_present(pci_dev->bus->self); 154 } 155 156 rm_init_dynamic_power_management(sp, nv, pr3_acpi_method_present); 157 } 158 159 static int nv_resize_pcie_bars(struct pci_dev *pci_dev) { 160 #if defined(NV_PCI_REBAR_GET_POSSIBLE_SIZES_PRESENT) 161 u16 cmd; 162 int r, old_size, requested_size; 163 unsigned long sizes; 164 int ret = 0; 165 #if NV_IS_EXPORT_SYMBOL_PRESENT_pci_find_host_bridge 166 struct pci_host_bridge *host; 167 #endif 168 169 if (NVreg_EnableResizableBar == 0) 170 { 171 nv_printf(NV_DBG_INFO, "NVRM: resizable BAR disabled by regkey, skipping\n"); 172 return 0; 173 } 174 175 // Check if BAR1 has PCIe rebar capabilities 176 sizes = pci_rebar_get_possible_sizes(pci_dev, NV_GPU_BAR1); 177 if (sizes == 0) { 178 /* ReBAR not available. Nothing to do. */ 179 return 0; 180 } 181 182 /* Try to resize the BAR to the largest supported size */ 183 requested_size = fls(sizes) - 1; 184 185 /* Save the current size, just in case things go wrong */ 186 old_size = pci_rebar_bytes_to_size(pci_resource_len(pci_dev, NV_GPU_BAR1)); 187 188 if (old_size == requested_size) { 189 nv_printf(NV_DBG_INFO, "NVRM: %04x:%02x:%02x.%x: BAR1 already at requested size.\n", 190 NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev), 191 NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn)); 192 return 0; 193 } 194 #if NV_IS_EXPORT_SYMBOL_PRESENT_pci_find_host_bridge 195 /* If the kernel will refuse us, don't even try to resize, 196 but give an informative error */ 197 host = pci_find_host_bridge(pci_dev->bus); 198 if (host->preserve_config) { 199 nv_printf(NV_DBG_INFO, "NVRM: Not resizing BAR because the firmware forbids moving windows.\n"); 200 return 0; 201 } 202 #endif 203 nv_printf(NV_DBG_INFO, "NVRM: %04x:%02x:%02x.%x: Attempting to resize BAR1.\n", 204 NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev), 205 NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn)); 206 207 /* Disable memory decoding - required by the kernel APIs */ 208 pci_read_config_word(pci_dev, PCI_COMMAND, &cmd); 209 pci_write_config_word(pci_dev, PCI_COMMAND, cmd & ~PCI_COMMAND_MEMORY); 210 211 /* Release BAR1 */ 212 pci_release_resource(pci_dev, NV_GPU_BAR1); 213 214 /* Release BAR3 - we don't want to resize it, it's in the same bridge, so we'll want to move it */ 215 pci_release_resource(pci_dev, NV_GPU_BAR3); 216 217 resize: 218 /* Attempt to resize BAR1 to the largest supported size */ 219 r = pci_resize_resource(pci_dev, NV_GPU_BAR1, requested_size); 220 221 if (r) { 222 if (r == -ENOSPC) 223 { 224 /* step through smaller sizes down to original size */ 225 if (requested_size > old_size) 226 { 227 clear_bit(fls(sizes) - 1, &sizes); 228 requested_size = fls(sizes) - 1; 229 goto resize; 230 } 231 else 232 { 233 nv_printf(NV_DBG_ERRORS, "NVRM: No address space to allocate resized BAR1.\n"); 234 } 235 } 236 else if (r == -EOPNOTSUPP) 237 { 238 nv_printf(NV_DBG_WARNINGS, "NVRM: BAR resize resource not supported.\n"); 239 } 240 else 241 { 242 nv_printf(NV_DBG_WARNINGS, "NVRM: BAR resizing failed with error `%d`.\n", r); 243 } 244 } 245 246 /* Re-attempt assignment of PCIe resources */ 247 pci_assign_unassigned_bus_resources(pci_dev->bus); 248 249 if ((pci_resource_flags(pci_dev, NV_GPU_BAR1) & IORESOURCE_UNSET) || 250 (pci_resource_flags(pci_dev, NV_GPU_BAR3) & IORESOURCE_UNSET)) { 251 if (requested_size != old_size) { 252 /* Try to get the BAR back with the original size */ 253 requested_size = old_size; 254 goto resize; 255 } 256 /* Something went horribly wrong and the kernel didn't manage to re-allocate BAR1. 257 This is unlikely (because we had space before), but can happen. */ 258 nv_printf(NV_DBG_ERRORS, "NVRM: FATAL: Failed to re-allocate BAR1.\n"); 259 ret = -ENODEV; 260 } 261 262 /* Re-enable memory decoding */ 263 pci_write_config_word(pci_dev, PCI_COMMAND, cmd); 264 265 return ret; 266 #else 267 nv_printf(NV_DBG_INFO, "NVRM: Resizable BAR is not supported on this kernel version.\n"); 268 return 0; 269 #endif /* NV_PCI_REBAR_GET_POSSIBLE_SIZES_PRESENT */ 270 } 271 272 /* find nvidia devices and set initial state */ 273 static int 274 nv_pci_probe 275 ( 276 struct pci_dev *pci_dev, 277 const struct pci_device_id *id_table 278 ) 279 { 280 nv_state_t *nv = NULL; 281 nv_linux_state_t *nvl = NULL; 282 unsigned int i, j; 283 int flags = 0; 284 nvidia_stack_t *sp = NULL; 285 NvBool prev_nv_ats_supported = nv_ats_supported; 286 NV_STATUS status; 287 NvBool last_bar_64bit = NV_FALSE; 288 NvU8 regs_bar_index = nv_bar_index_to_os_bar_index(pci_dev, 289 NV_GPU_BAR_INDEX_REGS); 290 291 nv_printf(NV_DBG_SETUP, "NVRM: probing 0x%x 0x%x, class 0x%x\n", 292 pci_dev->vendor, pci_dev->device, pci_dev->class); 293 294 if (nv_kmem_cache_alloc_stack(&sp) != 0) 295 { 296 return -1; 297 } 298 299 #ifdef NV_PCI_SRIOV_SUPPORT 300 if (pci_dev->is_virtfn) 301 { 302 #if defined(NV_VGPU_KVM_BUILD) 303 nvl = pci_get_drvdata(pci_dev->physfn); 304 if (!nvl) 305 { 306 nv_printf(NV_DBG_ERRORS, "NVRM: Aborting probe for VF %04x:%02x:%02x.%x " 307 "since PF is not bound to nvidia driver.\n", 308 NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev), 309 NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn)); 310 goto failed; 311 } 312 313 if (pci_dev->dev.bus->iommu_ops == NULL) 314 { 315 nv = NV_STATE_PTR(nvl); 316 if (rm_is_iommu_needed_for_sriov(sp, nv)) 317 { 318 nv_printf(NV_DBG_ERRORS, "NVRM: Aborting probe for VF %04x:%02x:%02x.%x " 319 "since IOMMU is not present on the system.\n", 320 NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev), 321 NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn)); 322 goto failed; 323 } 324 } 325 326 if (nvidia_vgpu_vfio_probe(pci_dev) != NV_OK) 327 { 328 nv_printf(NV_DBG_ERRORS, "NVRM: Failed to register device to vGPU VFIO module"); 329 goto failed; 330 } 331 332 nv_kmem_cache_free_stack(sp); 333 return 0; 334 #else 335 nv_printf(NV_DBG_ERRORS, "NVRM: Ignoring probe for VF %04x:%02x:%02x.%x ", 336 NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev), 337 NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn)); 338 339 goto failed; 340 #endif /* NV_VGPU_KVM_BUILD */ 341 } 342 #endif /* NV_PCI_SRIOV_SUPPORT */ 343 344 if (!rm_is_supported_pci_device( 345 (pci_dev->class >> 16) & 0xFF, 346 (pci_dev->class >> 8) & 0xFF, 347 pci_dev->vendor, 348 pci_dev->device, 349 pci_dev->subsystem_vendor, 350 pci_dev->subsystem_device, 351 NV_FALSE /* print_legacy_warning */)) 352 { 353 nv_printf(NV_DBG_ERRORS, "NVRM: ignoring the legacy GPU %04x:%02x:%02x.%x\n", 354 NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev), 355 NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn)); 356 goto failed; 357 } 358 359 num_probed_nv_devices++; 360 361 if (pci_enable_device(pci_dev) != 0) 362 { 363 nv_printf(NV_DBG_ERRORS, 364 "NVRM: pci_enable_device failed, aborting\n"); 365 goto failed; 366 } 367 368 if ((pci_dev->irq == 0 && !pci_find_capability(pci_dev, PCI_CAP_ID_MSIX)) 369 && nv_treat_missing_irq_as_error()) 370 { 371 nv_printf(NV_DBG_ERRORS, "NVRM: Can't find an IRQ for your NVIDIA card!\n"); 372 nv_printf(NV_DBG_ERRORS, "NVRM: Please check your BIOS settings.\n"); 373 nv_printf(NV_DBG_ERRORS, "NVRM: [Plug & Play OS] should be set to NO\n"); 374 nv_printf(NV_DBG_ERRORS, "NVRM: [Assign IRQ to VGA] should be set to YES \n"); 375 goto failed; 376 } 377 378 for (i = 0, j = 0; i < NVRM_PCICFG_NUM_BARS && j < NV_GPU_NUM_BARS; i++) 379 { 380 if (NV_PCI_RESOURCE_VALID(pci_dev, i)) 381 { 382 #if defined(NV_PCI_MAX_MMIO_BITS_SUPPORTED) 383 if ((NV_PCI_RESOURCE_FLAGS(pci_dev, i) & PCI_BASE_ADDRESS_MEM_TYPE_64) && 384 ((NV_PCI_RESOURCE_START(pci_dev, i) >> NV_PCI_MAX_MMIO_BITS_SUPPORTED))) 385 { 386 nv_printf(NV_DBG_ERRORS, 387 "NVRM: This is a 64-bit BAR mapped above %dGB by the system\n" 388 "NVRM: BIOS or the %s kernel. This PCI I/O region assigned\n" 389 "NVRM: to your NVIDIA device is not supported by the kernel.\n" 390 "NVRM: BAR%d is %dM @ 0x%llx (PCI:%04x:%02x:%02x.%x)\n", 391 (1 << (NV_PCI_MAX_MMIO_BITS_SUPPORTED - 30)), 392 NV_KERNEL_NAME, i, 393 (NV_PCI_RESOURCE_SIZE(pci_dev, i) >> 20), 394 (NvU64)NV_PCI_RESOURCE_START(pci_dev, i), 395 NV_PCI_DOMAIN_NUMBER(pci_dev), 396 NV_PCI_BUS_NUMBER(pci_dev), NV_PCI_SLOT_NUMBER(pci_dev), 397 PCI_FUNC(pci_dev->devfn)); 398 goto failed; 399 } 400 #endif 401 if ((NV_PCI_RESOURCE_FLAGS(pci_dev, i) & PCI_BASE_ADDRESS_MEM_TYPE_64) && 402 (NV_PCI_RESOURCE_FLAGS(pci_dev, i) & PCI_BASE_ADDRESS_MEM_PREFETCH)) 403 { 404 struct pci_dev *bridge = pci_dev->bus->self; 405 NvU32 base_upper, limit_upper; 406 407 last_bar_64bit = NV_TRUE; 408 409 if (bridge == NULL) 410 goto next_bar; 411 412 pci_read_config_dword(pci_dev, NVRM_PCICFG_BAR_OFFSET(i) + 4, 413 &base_upper); 414 if (base_upper == 0) 415 goto next_bar; 416 417 pci_read_config_dword(bridge, PCI_PREF_BASE_UPPER32, 418 &base_upper); 419 pci_read_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, 420 &limit_upper); 421 422 if ((base_upper != 0) && (limit_upper != 0)) 423 goto next_bar; 424 425 nv_printf(NV_DBG_ERRORS, 426 "NVRM: This is a 64-bit BAR mapped above 4GB by the system\n" 427 "NVRM: BIOS or the %s kernel, but the PCI bridge\n" 428 "NVRM: immediately upstream of this GPU does not define\n" 429 "NVRM: a matching prefetchable memory window.\n", 430 NV_KERNEL_NAME); 431 nv_printf(NV_DBG_ERRORS, 432 "NVRM: This may be due to a known Linux kernel bug. Please\n" 433 "NVRM: see the README section on 64-bit BARs for additional\n" 434 "NVRM: information.\n"); 435 goto failed; 436 } 437 438 next_bar: 439 // 440 // If we are here, then we have found a valid BAR -- 32 or 64-bit. 441 // 442 j++; 443 continue; 444 } 445 446 // 447 // If last_bar_64bit is "true" then, we are looking at the 2nd (upper) 448 // half of the 64-bit BAR. This is typically all 0s which looks invalid 449 // but it's normal and not a problem and we can ignore it and continue. 450 // 451 if (last_bar_64bit) 452 { 453 last_bar_64bit = NV_FALSE; 454 continue; 455 } 456 457 // Invalid 32 or 64-bit BAR. 458 nv_printf(NV_DBG_ERRORS, 459 "NVRM: This PCI I/O region assigned to your NVIDIA device is invalid:\n" 460 "NVRM: BAR%d is %dM @ 0x%llx (PCI:%04x:%02x:%02x.%x)\n", i, 461 (NV_PCI_RESOURCE_SIZE(pci_dev, i) >> 20), 462 (NvU64)NV_PCI_RESOURCE_START(pci_dev, i), 463 NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev), 464 NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn)); 465 466 goto failed; 467 } 468 469 if (!request_mem_region(NV_PCI_RESOURCE_START(pci_dev, regs_bar_index), 470 NV_PCI_RESOURCE_SIZE(pci_dev, regs_bar_index), 471 nv_device_name)) 472 { 473 nv_printf(NV_DBG_ERRORS, 474 "NVRM: request_mem_region failed for %dM @ 0x%llx. This can\n" 475 "NVRM: occur when a driver such as rivatv is loaded and claims\n" 476 "NVRM: ownership of the device's registers.\n", 477 (NV_PCI_RESOURCE_SIZE(pci_dev, regs_bar_index) >> 20), 478 (NvU64)NV_PCI_RESOURCE_START(pci_dev, regs_bar_index)); 479 goto failed; 480 } 481 482 if (nv_resize_pcie_bars(pci_dev)) { 483 nv_printf(NV_DBG_ERRORS, 484 "NVRM: Fatal Error while attempting to resize PCIe BARs.\n"); 485 goto failed; 486 } 487 488 NV_KZALLOC(nvl, sizeof(nv_linux_state_t)); 489 if (nvl == NULL) 490 { 491 nv_printf(NV_DBG_ERRORS, "NVRM: failed to allocate memory\n"); 492 goto err_not_supported; 493 } 494 495 nv = NV_STATE_PTR(nvl); 496 497 pci_set_drvdata(pci_dev, (void *)nvl); 498 499 /* default to 32-bit PCI bus address space */ 500 pci_dev->dma_mask = 0xffffffffULL; 501 502 nvl->dev = &pci_dev->dev; 503 nvl->pci_dev = pci_dev; 504 nvl->dma_dev.dev = nvl->dev; 505 506 nv->pci_info.vendor_id = pci_dev->vendor; 507 nv->pci_info.device_id = pci_dev->device; 508 nv->subsystem_id = pci_dev->subsystem_device; 509 nv->subsystem_vendor = pci_dev->subsystem_vendor; 510 nv->os_state = (void *) nvl; 511 nv->dma_dev = &nvl->dma_dev; 512 nv->pci_info.domain = NV_PCI_DOMAIN_NUMBER(pci_dev); 513 nv->pci_info.bus = NV_PCI_BUS_NUMBER(pci_dev); 514 nv->pci_info.slot = NV_PCI_SLOT_NUMBER(pci_dev); 515 nv->handle = pci_dev; 516 nv->flags |= flags; 517 518 if (!nv_lock_init_locks(sp, nv)) 519 { 520 goto err_not_supported; 521 } 522 523 nvl->all_mappings_revoked = NV_TRUE; 524 nvl->safe_to_mmap = NV_TRUE; 525 nvl->gpu_wakeup_callback_needed = NV_TRUE; 526 INIT_LIST_HEAD(&nvl->open_files); 527 528 for (i = 0, j = 0; i < NVRM_PCICFG_NUM_BARS && j < NV_GPU_NUM_BARS; i++) 529 { 530 if ((NV_PCI_RESOURCE_VALID(pci_dev, i)) && 531 (NV_PCI_RESOURCE_FLAGS(pci_dev, i) & PCI_BASE_ADDRESS_SPACE) 532 == PCI_BASE_ADDRESS_SPACE_MEMORY) 533 { 534 nv->bars[j].offset = NVRM_PCICFG_BAR_OFFSET(i); 535 nv->bars[j].cpu_address = NV_PCI_RESOURCE_START(pci_dev, i); 536 nv->bars[j].size = NV_PCI_RESOURCE_SIZE(pci_dev, i); 537 j++; 538 } 539 } 540 nv->regs = &nv->bars[NV_GPU_BAR_INDEX_REGS]; 541 nv->fb = &nv->bars[NV_GPU_BAR_INDEX_FB]; 542 543 nv->interrupt_line = pci_dev->irq; 544 545 NV_ATOMIC_SET(nvl->numa_info.status, NV_IOCTL_NUMA_STATUS_DISABLED); 546 nvl->numa_info.node_id = NUMA_NO_NODE; 547 548 nv_init_ibmnpu_info(nv); 549 550 #if defined(NVCPU_PPC64LE) 551 // Use HW NUMA support as a proxy for ATS support. This is true in the only 552 // PPC64LE platform where ATS is currently supported (IBM P9). 553 nv_ats_supported &= nv_platform_supports_numa(nvl); 554 #else 555 #endif 556 if (nv_ats_supported) 557 { 558 NV_DEV_PRINTF(NV_DBG_INFO, nv, "ATS supported by this GPU!\n"); 559 } 560 else 561 { 562 NV_DEV_PRINTF(NV_DBG_INFO, nv, "ATS not supported by this GPU. " 563 "Disabling ATS support for all the GPUs in the system!\n"); 564 } 565 566 pci_set_master(pci_dev); 567 568 #if defined(CONFIG_VGA_ARB) && !defined(NVCPU_PPC64LE) 569 #if defined(VGA_DEFAULT_DEVICE) 570 #if defined(NV_VGA_TRYGET_PRESENT) 571 vga_tryget(VGA_DEFAULT_DEVICE, VGA_RSRC_LEGACY_MASK); 572 #endif 573 #endif 574 vga_set_legacy_decoding(pci_dev, VGA_RSRC_NONE); 575 #endif 576 577 status = nv_check_gpu_state(nv); 578 if (status == NV_ERR_GPU_IS_LOST) 579 { 580 NV_DEV_PRINTF(NV_DBG_INFO, nv, "GPU is lost, skipping nv_pci_probe\n"); 581 goto err_not_supported; 582 } 583 584 if ((rm_is_supported_device(sp, nv)) != NV_OK) 585 goto err_not_supported; 586 587 if (!rm_init_private_state(sp, nv)) 588 { 589 NV_DEV_PRINTF(NV_DBG_ERRORS, nv, "rm_init_private_state() failed!\n"); 590 goto err_zero_dev; 591 } 592 593 nv_printf(NV_DBG_INFO, 594 "NVRM: PCI:%04x:%02x:%02x.%x (%04x:%04x): BAR0 @ 0x%llx (%lluMB)\n", 595 nv->pci_info.domain, nv->pci_info.bus, nv->pci_info.slot, 596 PCI_FUNC(pci_dev->devfn), nv->pci_info.vendor_id, nv->pci_info.device_id, 597 nv->regs->cpu_address, (nv->regs->size >> 20)); 598 nv_printf(NV_DBG_INFO, 599 "NVRM: PCI:%04x:%02x:%02x.%x (%04x:%04x): BAR1 @ 0x%llx (%lluMB)\n", 600 nv->pci_info.domain, nv->pci_info.bus, nv->pci_info.slot, 601 PCI_FUNC(pci_dev->devfn), nv->pci_info.vendor_id, nv->pci_info.device_id, 602 nv->fb->cpu_address, (nv->fb->size >> 20)); 603 604 num_nv_devices++; 605 606 /* 607 * The newly created nvl object is added to the nv_linux_devices global list 608 * only after all the initialization operations for that nvl object are 609 * completed, so as to protect against simultaneous lookup operations which 610 * may discover a partially initialized nvl object in the list 611 */ 612 LOCK_NV_LINUX_DEVICES(); 613 614 nv_linux_add_device_locked(nvl); 615 616 UNLOCK_NV_LINUX_DEVICES(); 617 618 if (nvidia_frontend_add_device((void *)&nv_fops, nvl) != 0) 619 goto err_remove_device; 620 621 pm_vt_switch_required(nvl->dev, NV_TRUE); 622 623 nv_init_dynamic_power_management(sp, pci_dev); 624 625 nv_procfs_add_gpu(nvl); 626 627 /* Parse and set any per-GPU registry keys specified. */ 628 nv_parse_per_device_option_string(sp); 629 630 rm_set_rm_firmware_requested(sp, nv); 631 632 #if defined(NV_VGPU_KVM_BUILD) 633 if (nvidia_vgpu_vfio_probe(nvl->pci_dev) != NV_OK) 634 { 635 NV_DEV_PRINTF(NV_DBG_ERRORS, nv, "Failed to register device to vGPU VFIO module"); 636 nvidia_frontend_remove_device((void *)&nv_fops, nvl); 637 goto err_vgpu_kvm; 638 } 639 #endif 640 641 nv_check_and_exclude_gpu(sp, nv); 642 643 #if defined(DPM_FLAG_NO_DIRECT_COMPLETE) 644 dev_pm_set_driver_flags(nvl->dev, DPM_FLAG_NO_DIRECT_COMPLETE); 645 #elif defined(DPM_FLAG_NEVER_SKIP) 646 dev_pm_set_driver_flags(nvl->dev, DPM_FLAG_NEVER_SKIP); 647 #endif 648 649 /* 650 * Dynamic power management should be enabled as the last step. 651 * Kernel runtime power management framework can put the device 652 * into the suspended state. Hardware register access should not be done 653 * after enabling dynamic power management. 654 */ 655 rm_enable_dynamic_power_management(sp, nv); 656 nv_kmem_cache_free_stack(sp); 657 658 return 0; 659 660 #if defined(NV_VGPU_KVM_BUILD) 661 err_vgpu_kvm: 662 #endif 663 nv_procfs_remove_gpu(nvl); 664 rm_cleanup_dynamic_power_management(sp, nv); 665 pm_vt_switch_unregister(nvl->dev); 666 err_remove_device: 667 LOCK_NV_LINUX_DEVICES(); 668 nv_linux_remove_device_locked(nvl); 669 UNLOCK_NV_LINUX_DEVICES(); 670 err_zero_dev: 671 rm_free_private_state(sp, nv); 672 err_not_supported: 673 nv_ats_supported = prev_nv_ats_supported; 674 nv_destroy_ibmnpu_info(nv); 675 nv_lock_destroy_locks(sp, nv); 676 if (nvl != NULL) 677 { 678 NV_KFREE(nvl, sizeof(nv_linux_state_t)); 679 } 680 release_mem_region(NV_PCI_RESOURCE_START(pci_dev, regs_bar_index), 681 NV_PCI_RESOURCE_SIZE(pci_dev, regs_bar_index)); 682 NV_PCI_DISABLE_DEVICE(pci_dev); 683 pci_set_drvdata(pci_dev, NULL); 684 failed: 685 nv_kmem_cache_free_stack(sp); 686 return -1; 687 } 688 689 static void 690 nv_pci_remove(struct pci_dev *pci_dev) 691 { 692 nv_linux_state_t *nvl = NULL; 693 nv_state_t *nv; 694 nvidia_stack_t *sp = NULL; 695 NvU8 regs_bar_index = nv_bar_index_to_os_bar_index(pci_dev, 696 NV_GPU_BAR_INDEX_REGS); 697 698 nv_printf(NV_DBG_SETUP, "NVRM: removing GPU %04x:%02x:%02x.%x\n", 699 NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev), 700 NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn)); 701 702 #ifdef NV_PCI_SRIOV_SUPPORT 703 if (pci_dev->is_virtfn) 704 { 705 #if defined(NV_VGPU_KVM_BUILD) 706 /* Arg 2 == NV_TRUE means that the PCI device should be removed */ 707 nvidia_vgpu_vfio_remove(pci_dev, NV_TRUE); 708 #endif /* NV_VGPU_KVM_BUILD */ 709 return; 710 } 711 #endif /* NV_PCI_SRIOV_SUPPORT */ 712 713 if (nv_kmem_cache_alloc_stack(&sp) != 0) 714 { 715 return; 716 } 717 718 LOCK_NV_LINUX_DEVICES(); 719 nvl = pci_get_drvdata(pci_dev); 720 if (!nvl || (nvl->pci_dev != pci_dev)) 721 { 722 goto done; 723 } 724 725 nv = NV_STATE_PTR(nvl); 726 down(&nvl->ldata_lock); 727 728 /* 729 * Sanity check: A removed device shouldn't have a non-zero usage_count. 730 * For eGPU, fall off the bus along with clients active is a valid scenario. 731 * Hence skipping the sanity check for eGPU. 732 */ 733 if ((NV_ATOMIC_READ(nvl->usage_count) != 0) && !(nv->is_external_gpu)) 734 { 735 nv_printf(NV_DBG_ERRORS, 736 "NVRM: Attempting to remove device %04x:%02x:%02x.%x with non-zero usage count!\n", 737 NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev), 738 NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn)); 739 740 /* 741 * We can't return from this function without corrupting state, so we wait for 742 * the usage count to go to zero. 743 */ 744 while (NV_ATOMIC_READ(nvl->usage_count) != 0) 745 { 746 747 /* 748 * While waiting, release the locks so that other threads can make 749 * forward progress. 750 */ 751 up(&nvl->ldata_lock); 752 UNLOCK_NV_LINUX_DEVICES(); 753 754 os_delay(500); 755 756 /* Re-acquire the locks before checking again */ 757 LOCK_NV_LINUX_DEVICES(); 758 nvl = pci_get_drvdata(pci_dev); 759 if (!nvl) 760 { 761 /* The device was not found, which should not happen */ 762 nv_printf(NV_DBG_ERRORS, 763 "NVRM: Failed removal of device %04x:%02x:%02x.%x!\n", 764 NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev), 765 NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn)); 766 WARN_ON(1); 767 goto done; 768 } 769 nv = NV_STATE_PTR(nvl); 770 down(&nvl->ldata_lock); 771 } 772 773 nv_printf(NV_DBG_ERRORS, 774 "NVRM: Continuing with GPU removal for device %04x:%02x:%02x.%x\n", 775 NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev), 776 NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn)); 777 } 778 779 rm_check_for_gpu_surprise_removal(sp, nv); 780 781 nv_linux_remove_device_locked(nvl); 782 783 /* Remove proc entry for this GPU */ 784 nv_procfs_remove_gpu(nvl); 785 786 rm_cleanup_dynamic_power_management(sp, nv); 787 788 nv->removed = NV_TRUE; 789 790 UNLOCK_NV_LINUX_DEVICES(); 791 792 pm_vt_switch_unregister(&pci_dev->dev); 793 794 #if defined(NV_VGPU_KVM_BUILD) 795 /* Arg 2 == NV_TRUE means that the PCI device should be removed */ 796 nvidia_vgpu_vfio_remove(pci_dev, NV_TRUE); 797 #endif 798 799 /* Update the frontend data structures */ 800 if (NV_ATOMIC_READ(nvl->usage_count) == 0) 801 { 802 nvidia_frontend_remove_device((void *)&nv_fops, nvl); 803 } 804 805 if ((nv->flags & NV_FLAG_PERSISTENT_SW_STATE) || (nv->flags & NV_FLAG_OPEN)) 806 { 807 nv_acpi_unregister_notifier(nvl); 808 if (nv->flags & NV_FLAG_PERSISTENT_SW_STATE) 809 { 810 rm_disable_gpu_state_persistence(sp, nv); 811 } 812 nv_shutdown_adapter(sp, nv, nvl); 813 nv_dev_free_stacks(nvl); 814 } 815 816 if (nvl->sysfs_config_file != NULL) 817 { 818 filp_close(nvl->sysfs_config_file, NULL); 819 nvl->sysfs_config_file = NULL; 820 } 821 822 nv_unregister_ibmnpu_devices(nv); 823 nv_destroy_ibmnpu_info(nv); 824 825 if (NV_ATOMIC_READ(nvl->usage_count) == 0) 826 { 827 nv_lock_destroy_locks(sp, nv); 828 } 829 830 num_probed_nv_devices--; 831 832 pci_set_drvdata(pci_dev, NULL); 833 834 rm_i2c_remove_adapters(sp, nv); 835 rm_free_private_state(sp, nv); 836 release_mem_region(NV_PCI_RESOURCE_START(pci_dev, regs_bar_index), 837 NV_PCI_RESOURCE_SIZE(pci_dev, regs_bar_index)); 838 839 num_nv_devices--; 840 841 if (NV_ATOMIC_READ(nvl->usage_count) == 0) 842 { 843 NV_PCI_DISABLE_DEVICE(pci_dev); 844 NV_KFREE(nvl, sizeof(nv_linux_state_t)); 845 } 846 else 847 { 848 up(&nvl->ldata_lock); 849 } 850 851 nv_kmem_cache_free_stack(sp); 852 return; 853 854 done: 855 UNLOCK_NV_LINUX_DEVICES(); 856 nv_kmem_cache_free_stack(sp); 857 } 858 859 static void 860 nv_pci_shutdown(struct pci_dev *pci_dev) 861 { 862 nv_linux_state_t *nvl = pci_get_drvdata(pci_dev); 863 864 if ((nvl != NULL) && nvl->is_forced_shutdown) 865 { 866 nvl->is_forced_shutdown = NV_FALSE; 867 return; 868 } 869 870 if (nvl != NULL) 871 { 872 nvl->nv_state.is_shutdown = NV_TRUE; 873 } 874 875 /* pci_clear_master is not defined for !CONFIG_PCI */ 876 #ifdef CONFIG_PCI 877 pci_clear_master(pci_dev); 878 #endif 879 880 /* SHH HW mandates 1us delay to realise the effects of 881 * Bus Mater Enable(BME) disable. Adding 1us delay for 882 * all the chips as the delay is not in the data path 883 * and not big. Creating HAL for this would be a overkill. 884 */ 885 udelay(1); 886 } 887 888 /*! 889 * @brief This function accepts pci information corresponding to a GPU 890 * and returns a reference to the nv_linux_state_t corresponding to that GPU. 891 * 892 * @param[in] domain Pci domain number for the GPU to be found. 893 * @param[in] bus Pci bus number for the GPU to be found. 894 * @param[in] slot Pci slot number for the GPU to be found. 895 * @param[in] function Pci function number for the GPU to be found. 896 * 897 * @return Pointer to nv_linux_state_t for the GPU if it is found, or NULL otherwise. 898 */ 899 nv_linux_state_t * find_pci(NvU32 domain, NvU8 bus, NvU8 slot, NvU8 function) 900 { 901 nv_linux_state_t *nvl = NULL; 902 903 LOCK_NV_LINUX_DEVICES(); 904 905 for (nvl = nv_linux_devices; nvl != NULL; nvl = nvl->next) 906 { 907 nv_state_t *nv = NV_STATE_PTR(nvl); 908 909 if (nv->pci_info.domain == domain && 910 nv->pci_info.bus == bus && 911 nv->pci_info.slot == slot && 912 nv->pci_info.function == function) 913 { 914 break; 915 } 916 } 917 918 UNLOCK_NV_LINUX_DEVICES(); 919 return nvl; 920 } 921 922 int nvidia_dev_get_pci_info(const NvU8 *uuid, struct pci_dev **pci_dev_out, 923 NvU64 *dma_start, NvU64 *dma_limit) 924 { 925 nv_linux_state_t *nvl; 926 927 /* Takes nvl->ldata_lock */ 928 nvl = find_uuid(uuid); 929 if (!nvl) 930 return -ENODEV; 931 932 *pci_dev_out = nvl->pci_dev; 933 *dma_start = nvl->dma_dev.addressable_range.start; 934 *dma_limit = nvl->dma_dev.addressable_range.limit; 935 936 up(&nvl->ldata_lock); 937 938 return 0; 939 } 940 941 NvU8 nv_find_pci_capability(struct pci_dev *pci_dev, NvU8 capability) 942 { 943 u16 status = 0; 944 u8 cap_ptr = 0, cap_id = 0xff; 945 946 pci_read_config_word(pci_dev, PCI_STATUS, &status); 947 status &= PCI_STATUS_CAP_LIST; 948 if (!status) 949 return 0; 950 951 switch (pci_dev->hdr_type) { 952 case PCI_HEADER_TYPE_NORMAL: 953 case PCI_HEADER_TYPE_BRIDGE: 954 pci_read_config_byte(pci_dev, PCI_CAPABILITY_LIST, &cap_ptr); 955 break; 956 default: 957 return 0; 958 } 959 960 do { 961 cap_ptr &= 0xfc; 962 pci_read_config_byte(pci_dev, cap_ptr + PCI_CAP_LIST_ID, &cap_id); 963 if (cap_id == capability) 964 return cap_ptr; 965 pci_read_config_byte(pci_dev, cap_ptr + PCI_CAP_LIST_NEXT, &cap_ptr); 966 } while (cap_ptr && cap_id != 0xff); 967 968 return 0; 969 } 970 971 /* make sure the pci_driver called probe for all of our devices. 972 * we've seen cases where rivafb claims the device first and our driver 973 * doesn't get called. 974 */ 975 int 976 nv_pci_count_devices(void) 977 { 978 struct pci_dev *pci_dev; 979 int count = 0; 980 981 if (NVreg_RegisterPCIDriver == 0) 982 { 983 return 0; 984 } 985 986 pci_dev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, NULL); 987 while (pci_dev) 988 { 989 if (rm_is_supported_pci_device( 990 PCI_BASE_CLASS_DISPLAY, 991 PCI_CLASS_DISPLAY_VGA & 0xFF, 992 pci_dev->vendor, 993 pci_dev->device, 994 pci_dev->subsystem_vendor, 995 pci_dev->subsystem_device, 996 NV_TRUE /* print_legacy_warning */)) 997 { 998 count++; 999 } 1000 pci_dev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pci_dev); 1001 } 1002 1003 pci_dev = pci_get_class(PCI_CLASS_DISPLAY_3D << 8, NULL); 1004 while (pci_dev) 1005 { 1006 if (rm_is_supported_pci_device( 1007 (pci_dev->class >> 16) & 0xFF, 1008 (pci_dev->class >> 8) & 0xFF, 1009 pci_dev->vendor, 1010 pci_dev->device, 1011 pci_dev->subsystem_vendor, 1012 pci_dev->subsystem_device, 1013 NV_TRUE /* print_legacy_warning */)) 1014 { 1015 count++; 1016 } 1017 pci_dev = pci_get_class(PCI_CLASS_DISPLAY_3D << 8, pci_dev); 1018 } 1019 1020 return count; 1021 } 1022 1023 #if defined(NV_PCI_ERROR_RECOVERY) 1024 static pci_ers_result_t 1025 nv_pci_error_detected( 1026 struct pci_dev *pci_dev, 1027 nv_pci_channel_state_t error 1028 ) 1029 { 1030 nv_linux_state_t *nvl = pci_get_drvdata(pci_dev); 1031 1032 if ((nvl == NULL) || (nvl->pci_dev != pci_dev)) 1033 { 1034 nv_printf(NV_DBG_ERRORS, "NVRM: %s: invalid device!\n", __FUNCTION__); 1035 return PCI_ERS_RESULT_NONE; 1036 } 1037 1038 /* 1039 * Tell Linux to continue recovery of the device. The kernel will enable 1040 * MMIO for the GPU and call the mmio_enabled callback. 1041 */ 1042 return PCI_ERS_RESULT_CAN_RECOVER; 1043 } 1044 1045 static pci_ers_result_t 1046 nv_pci_mmio_enabled( 1047 struct pci_dev *pci_dev 1048 ) 1049 { 1050 NV_STATUS status = NV_OK; 1051 nv_stack_t *sp = NULL; 1052 nv_linux_state_t *nvl = pci_get_drvdata(pci_dev); 1053 nv_state_t *nv = NULL; 1054 1055 if ((nvl == NULL) || (nvl->pci_dev != pci_dev)) 1056 { 1057 nv_printf(NV_DBG_ERRORS, "NVRM: %s: invalid device!\n", __FUNCTION__); 1058 goto done; 1059 } 1060 1061 nv = NV_STATE_PTR(nvl); 1062 1063 if (nv_kmem_cache_alloc_stack(&sp) != 0) 1064 { 1065 nv_printf(NV_DBG_ERRORS, "NVRM: %s: failed to allocate stack!\n", 1066 __FUNCTION__); 1067 goto done; 1068 } 1069 1070 NV_DEV_PRINTF(NV_DBG_ERRORS, nv, "A fatal error was detected.\n"); 1071 1072 /* 1073 * MMIO should be re-enabled now. If we still get bad reads, there's 1074 * likely something wrong with the adapter itself that will require a 1075 * reset. This should let us know whether the GPU has completely fallen 1076 * off the bus or just did something the host didn't like. 1077 */ 1078 status = rm_is_supported_device(sp, nv); 1079 if (status != NV_OK) 1080 { 1081 NV_DEV_PRINTF(NV_DBG_ERRORS, nv, 1082 "The kernel has enabled MMIO for the device,\n" 1083 "NVRM: but it still appears unreachable. The device\n" 1084 "NVRM: will not function properly until it is reset.\n"); 1085 } 1086 1087 status = rm_log_gpu_crash(sp, nv); 1088 if (status != NV_OK) 1089 { 1090 NV_DEV_PRINTF_STATUS(NV_DBG_ERRORS, nv, status, 1091 "Failed to log crash data\n"); 1092 goto done; 1093 } 1094 1095 done: 1096 if (sp != NULL) 1097 { 1098 nv_kmem_cache_free_stack(sp); 1099 } 1100 1101 /* 1102 * Tell Linux to abandon recovery of the device. The kernel might be able 1103 * to recover the device, but RM and clients don't yet support that. 1104 */ 1105 return PCI_ERS_RESULT_DISCONNECT; 1106 } 1107 1108 struct pci_error_handlers nv_pci_error_handlers = { 1109 .error_detected = nv_pci_error_detected, 1110 .mmio_enabled = nv_pci_mmio_enabled, 1111 }; 1112 #endif 1113 1114 #if defined(CONFIG_PM) 1115 extern struct dev_pm_ops nv_pm_ops; 1116 #endif 1117 1118 struct pci_driver nv_pci_driver = { 1119 .name = MODULE_NAME, 1120 .id_table = nv_pci_table, 1121 .probe = nv_pci_probe, 1122 .remove = nv_pci_remove, 1123 .shutdown = nv_pci_shutdown, 1124 #if defined(NV_USE_VFIO_PCI_CORE) && \ 1125 defined(NV_PCI_DRIVER_HAS_DRIVER_MANAGED_DMA) 1126 .driver_managed_dma = NV_TRUE, 1127 #endif 1128 #if defined(CONFIG_PM) 1129 .driver.pm = &nv_pm_ops, 1130 #endif 1131 #if defined(NV_PCI_ERROR_RECOVERY) 1132 .err_handler = &nv_pci_error_handlers, 1133 #endif 1134 }; 1135 1136 void nv_pci_unregister_driver(void) 1137 { 1138 if (NVreg_RegisterPCIDriver == 0) 1139 { 1140 return; 1141 } 1142 return pci_unregister_driver(&nv_pci_driver); 1143 } 1144 1145 int nv_pci_register_driver(void) 1146 { 1147 if (NVreg_RegisterPCIDriver == 0) 1148 { 1149 return 0; 1150 } 1151 return pci_register_driver(&nv_pci_driver); 1152 } 1153