1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "core/core.h" 25 #include "gpu/gpu.h" 26 27 #include <class/cl00fc.h> // FABRIC_VASPACE_A 28 #include "gpu/bus/kern_bus.h" 29 #include "gpu/bus/p2p_api.h" 30 #include "gpu/bif/kernel_bif.h" 31 #include "gpu/mmu/kern_gmmu.h" 32 #include "gpu/mem_mgr/mem_mgr.h" 33 #include "gpu/mem_sys/kern_mem_sys.h" 34 #include "kernel/gpu/nvlink/kernel_nvlink.h" 35 #include "kernel/gpu/mem_mgr/virt_mem_allocator_common.h" 36 #include "mem_mgr/fabric_vaspace.h" 37 #include "mem_mgr/virt_mem_mgr.h" 38 #include "vgpu/rpc.h" 39 #include "virtualization/hypervisor/hypervisor.h" 40 #include "os/os.h" 41 42 #include "mem_mgr/mem_multicast_fabric.h" 43 44 #include "gpu/gpu_fabric_probe.h" 45 #include "published/hopper/gh100/dev_ram.h" 46 #include "published/hopper/gh100/pri_nv_xal_ep.h" 47 #include "published/hopper/gh100/pri_nv_xal_ep_p2p.h" 48 #include "published/hopper/gh100/dev_vm.h" 49 #include "published/hopper/gh100/dev_mmu.h" 50 #include "ctrl/ctrl2080/ctrl2080fla.h" // NV2080_CTRL_CMD_FLA_SETUP_INSTANCE_MEM_BLOCK 51 52 #include "nvRmReg.h" 53 54 // Defines for P2P 55 #define HOPPER_WRITE_MAILBOX_SIZE ((NvU64)64 * 1024) 56 #define HOPPER_MAX_WRITE_MAILBOX_ADDR(pGpu) \ 57 ((HOPPER_WRITE_MAILBOX_SIZE << kbusGetP2PWriteMailboxAddressSize_HAL(pGpu)) - \ 58 HOPPER_WRITE_MAILBOX_SIZE) 59 60 // RM reserved memory region is mapped separately as it is not added to the kernel 61 #define COHERENT_CPU_MAPPING_RM_RESV_REGION COHERENT_CPU_MAPPING_REGION_1 62 63 /*! 64 * @brief Gets the P2P write mailbox address size (NV_XAL_EP_P2P_WMBOX_ADDR_ADDR) 65 * 66 * @returns P2P write mailbox address size (NV_XAL_EP_P2P_WMBOX_ADDR_ADDR) 67 */ 68 NvU32 69 kbusGetP2PWriteMailboxAddressSize_GH100(OBJGPU *pGpu) 70 { 71 return DRF_SIZE(NV_XAL_EP_P2P_WMBOX_ADDR_ADDR); 72 } 73 74 /*! 75 * @brief Writes NV_XAL_EP_BAR0_WINDOW_BASE 76 * 77 * @param[in] pGpu 78 * @param[in] pKernelBus 79 * @param[in] base base address to write 80 * 81 * @returns NV_OK 82 */ 83 NV_STATUS 84 kbusWriteBAR0WindowBase_GH100 85 ( 86 OBJGPU *pGpu, 87 KernelBus *pKernelBus, 88 NvU32 base 89 ) 90 { 91 GPU_FLD_WR_DRF_NUM(pGpu, _XAL_EP, _BAR0_WINDOW, _BASE, base); 92 return NV_OK; 93 } 94 95 /*! 96 * @brief Reads NV_XAL_EP_BAR0_WINDOW_BASE 97 * 98 * @param[in] pGpu 99 * @param[in] pKernelBus 100 * 101 * @returns Contents of NV_XAL_EP_BAR0_WINDOW_BASE 102 */ 103 NvU32 104 kbusReadBAR0WindowBase_GH100 105 ( 106 OBJGPU *pGpu, 107 KernelBus *pKernelBus 108 ) 109 { 110 return GPU_REG_RD_DRF(pGpu, _XAL_EP, _BAR0_WINDOW, _BASE); 111 } 112 113 /*! 114 * @brief Validates that the given base fits within the width of the window base 115 * 116 * @param[in] pGpu 117 * @param[in] pKernelBus 118 * @param[in] base base offset to validate 119 * 120 * @returns Whether given base fits within the width of the window base. 121 */ 122 NvBool 123 kbusValidateBAR0WindowBase_GH100 124 ( 125 OBJGPU *pGpu, 126 KernelBus *pKernelBus, 127 NvU32 base 128 ) 129 { 130 return base <= DRF_MASK(NV_XAL_EP_BAR0_WINDOW_BASE); 131 } 132 133 NV_STATUS 134 kbusSetBAR0WindowVidOffset_GH100 135 ( 136 OBJGPU *pGpu, 137 KernelBus *pKernelBus, 138 NvU64 vidOffset 139 ) 140 { 141 if (KBUS_BAR0_PRAMIN_DISABLED(pGpu)) 142 { 143 NV_ASSERT_FAILED("kbusSetBAR0WindowVidOffset_HAL call in coherent path\n"); 144 return NV_ERR_INVALID_STATE; 145 } 146 147 NV_ASSERT((vidOffset & 0xffff)==0); 148 NV_ASSERT(kbusValidateBAR0WindowBase_HAL(pGpu, pKernelBus, vidOffset >> NV_XAL_EP_BAR0_WINDOW_BASE_SHIFT)); 149 150 // 151 // RM initialises cachedBar0WindowVidOffset with 0. Refresh its value with 152 // current NV_XAL_EP_BAR0_WINDOW_BASE. 153 // 154 if (pKernelBus->cachedBar0WindowVidOffset == 0) 155 { 156 pKernelBus->cachedBar0WindowVidOffset = ((NvU64) kbusReadBAR0WindowBase_HAL(pGpu, pKernelBus)) 157 << NV_XAL_EP_BAR0_WINDOW_BASE_SHIFT; 158 } 159 160 // Update only if the new offset is different from the cached value 161 if (pKernelBus->cachedBar0WindowVidOffset != vidOffset) 162 { 163 NV_PRINTF(LEVEL_INFO, 164 "mapping BAR0_WINDOW to VID:%x'%08x\n", 165 NvU64_HI32(vidOffset), NvU64_LO32(vidOffset)); 166 167 // _BAR0_WINDOW_TARGET field is removed. It's always VIDMEM 168 kbusWriteBAR0WindowBase_HAL(pGpu, pKernelBus, NvU64_LO32(vidOffset >> 16)); 169 170 pKernelBus->cachedBar0WindowVidOffset = vidOffset; 171 } 172 173 return (NV_OK); 174 } 175 176 NvU64 177 kbusGetBAR0WindowVidOffset_GH100 178 ( 179 OBJGPU *pGpu, 180 KernelBus *pKernelBus 181 ) 182 { 183 NvU64 vidOffset; 184 185 // 186 // RM initialises cachedBar0WindowVidOffset with 0. Refresh its value with 187 // current NV_XAL_EP_BAR0_WINDOW_BASE. 188 // 189 if (pKernelBus->cachedBar0WindowVidOffset == 0) 190 { 191 pKernelBus->cachedBar0WindowVidOffset = ((NvU64) kbusReadBAR0WindowBase_HAL(pGpu, pKernelBus)) 192 << NV_XAL_EP_BAR0_WINDOW_BASE_SHIFT; 193 } 194 195 vidOffset = pKernelBus->cachedBar0WindowVidOffset; 196 197 return (vidOffset); 198 } 199 200 /*! 201 * @brief Tests BAR2 against BAR0. 202 * 203 * @param[in] pGpu 204 * @param[in] pKernelBus 205 * @param[in] pMemDescIn If memDescIn is NULL, a test mem desc is created and map/unmapped. 206 * If memDescIn is not NULL and provided, this method assumes that it has 207 * already been alloc'ed and mapping/unmapping is handled outside 208 * this method. 209 * @param[in] offset offset of the test memory 210 * @param[in] size size of the test memory 211 * 212 * @returns NV_OK on success. 213 */ 214 NV_STATUS 215 kbusVerifyBar2_GH100 216 ( 217 OBJGPU *pGpu, 218 KernelBus *pKernelBus, 219 PMEMORY_DESCRIPTOR pMemDescIn, 220 NvU8 *pCpuPtrIn, 221 NvU64 offset, 222 NvU64 size 223 ) 224 { 225 MEMORY_DESCRIPTOR memDesc, *pMemDesc = NULL; 226 NvU8 *pOffset = NULL; 227 NvU32 index = 0; 228 NvU64 bar0Window = 0; 229 NvU64 testMemoryOffset = 0; 230 NvU32 testMemorySize = 0; 231 NV_STATUS status = NV_OK; 232 NvU32 testData = 0; 233 NvU32 temp = 0; 234 NV_ADDRESS_SPACE testAddrSpace = ADDR_FBMEM; 235 NvBool bIsStandaloneTest; 236 const NvU32 SAMPLEDATA = 0xabcdabcd; 237 const NvU32 FBSIZETESTED = 0x10; 238 NvU64 bar0TestAddr = 0; 239 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu); 240 NvU32 flagsClean = 0; 241 NvU64 bar2VirtualAddr = 0; 242 243 NV_ASSERT_OR_RETURN(pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING) == NV_FALSE, NV_ERR_INVALID_STATE); 244 245 // 246 // kbusVerifyBar2 will test BAR0 against sysmem on Tegra; otherwise skip 247 // the test if inst_in_sys is used 248 // 249 if (pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_ALL_INST_IN_SYSMEM) && 250 !IsTEGRA(pGpu)) 251 { 252 return NV_OK; 253 } 254 255 // In L2 Cache only mode or FB broken, don't verify Bar2 256 if (gpuIsCacheOnlyModeEnabled(pGpu) || 257 pGpu->getProperty(pGpu, PDB_PROP_GPU_BROKEN_FB) || 258 kbusIsBar2TestSkipped(pKernelBus)) 259 { 260 return NV_OK; 261 } 262 263 NV_PRINTF(LEVEL_INFO, "\n"); 264 265 flagsClean = NV2080_CTRL_INTERNAL_MEMSYS_L2_INVALIDATE_EVICT_FLAGS_ALL | 266 NV2080_CTRL_INTERNAL_MEMSYS_L2_INVALIDATE_EVICT_FLAGS_CLEAN; 267 if (kmemsysIsL2CleanFbPull(pKernelMemorySystem)) 268 { 269 flagsClean |= NV2080_CTRL_INTERNAL_MEMSYS_L2_INVALIDATE_EVICT_FLAGS_WAIT_FB_PULL; 270 } 271 272 if (pMemDescIn && pCpuPtrIn) 273 { 274 if ((size + offset) > pMemDescIn->Size) 275 { 276 NV_PRINTF(LEVEL_ERROR, 277 "input offset 0x%llx size 0x%llx exceeds surface size 0x%llx\n", 278 offset, size, pMemDescIn->Size); 279 DBG_BREAKPOINT(); 280 return NV_ERR_INVALID_ARGUMENT; 281 } 282 bIsStandaloneTest = NV_FALSE; 283 pOffset = pCpuPtrIn; 284 pMemDesc = pMemDescIn; 285 } 286 else 287 { 288 offset = 0; 289 size = FBSIZETESTED; 290 // Allocate some memory to test virtual BAR2 with 291 if (pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_ALL_INST_IN_SYSMEM)) 292 { 293 memdescCreateExisting(&memDesc, pGpu, size, ADDR_SYSMEM, pGpu->instCacheOverride, MEMDESC_FLAGS_NONE); 294 } 295 else 296 { 297 memdescCreateExisting(&memDesc, pGpu, size, ADDR_FBMEM, NV_MEMORY_UNCACHED, MEMDESC_FLAGS_NONE); 298 } 299 status = memdescAlloc(&memDesc); 300 if (status != NV_OK) 301 { 302 NV_PRINTF(LEVEL_ERROR, 303 "Could not allocate vidmem to test bar2 with\n"); 304 DBG_BREAKPOINT(); 305 return NV_ERR_GENERIC; 306 } 307 308 bIsStandaloneTest = NV_TRUE; 309 pOffset = kbusMapRmAperture_HAL(pGpu, &memDesc); 310 if (pOffset == NULL) 311 { 312 status = NV_ERR_INSUFFICIENT_RESOURCES; 313 goto kbusVerifyBar2_failed; 314 } 315 pMemDesc = &memDesc; 316 } 317 testMemoryOffset = memdescGetPhysAddr(pMemDesc, AT_GPU, 0) + offset; 318 testMemorySize = NvU64_LO32(size); 319 testAddrSpace = kgmmuGetHwPteApertureFromMemdesc(GPU_GET_KERNEL_GMMU(pGpu), pMemDesc); 320 321 if (testAddrSpace != NV_MMU_PTE_APERTURE_VIDEO_MEMORY) 322 { 323 NV_PRINTF(LEVEL_ERROR, 324 "Test is not supported. NV_XAL_EP_BAR0_WINDOW only supports vidmem\n"); 325 DBG_BREAKPOINT(); 326 status = NV_ERR_NOT_SUPPORTED; 327 goto kbusVerifyBar2_failed; 328 } 329 330 // ========================================================== 331 // Does the BAR0 window work? 332 333 NV_PRINTF((IS_EMULATION(pGpu)) ? LEVEL_ERROR : LEVEL_INFO, 334 "Testing BAR0 window...\n"); 335 336 bar0Window = kbusGetBAR0WindowVidOffset_HAL(pGpu, pKernelBus); 337 bar0TestAddr = memdescGetPhysAddr(pMemDesc, AT_GPU, 0); 338 339 kbusWriteBAR0WindowBase_HAL(pGpu, pKernelBus, NvU64_LO32(bar0TestAddr >> 16)); 340 341 testData = GPU_REG_RD32(pGpu, DRF_BASE(NV_PRAMIN) + NvU64_LO32(bar0TestAddr & 0xffff)); 342 343 GPU_REG_WR32(pGpu, DRF_BASE(NV_PRAMIN) + NvU64_LO32(bar0TestAddr & 0xffff), SAMPLEDATA); 344 345 if (GPU_REG_RD32(pGpu, DRF_BASE(NV_PRAMIN) + NvU64_LO32(bar0TestAddr & 0xffff)) != SAMPLEDATA) 346 { 347 // 348 // Ideally, this should hit the L2 cache and even if memory is bad, 349 // unless something in the path up to L2 is messed up, we should not 350 // get here. 351 // 352 NV_PRINTF(LEVEL_ERROR, 353 "Pre-L2 invalidate evict: Address 0x%llx programmed through the bar0 " 354 "window with value 0x%x did not read back the last write.\n", 355 bar0TestAddr, SAMPLEDATA); 356 DBG_BREAKPOINT_REASON(NV_ERR_MEMORY_ERROR); 357 status = NV_ERR_MEMORY_ERROR; 358 goto kbusVerifyBar2_failed; 359 } 360 361 // 362 // Evict L2 to ensure that the next read doesn't hit L2 and mistakenly 363 // assume that the BAR0 window to vidmem works 364 // 365 status = kmemsysSendL2InvalidateEvict(pGpu, pKernelMemorySystem, flagsClean); 366 if (NV_OK != status) 367 { 368 NV_PRINTF(LEVEL_ERROR, "L2 evict failed\n"); 369 goto kbusVerifyBar2_failed; 370 } 371 372 if (GPU_REG_RD32(pGpu, DRF_BASE(NV_PRAMIN) + NvU64_LO32(bar0TestAddr & 0xffff)) != SAMPLEDATA) 373 { 374 NV_PRINTF(LEVEL_ERROR, 375 "Post-L2 invalidate evict: Address 0x%llx programmed through the bar0 " 376 "window with value 0x%x did not read back the last write\n", 377 bar0TestAddr, SAMPLEDATA); 378 if (IS_EMULATION(pGpu)) 379 { 380 NV_PRINTF(LEVEL_ERROR, 381 "Setup a trigger on write<Bar0+0x1700, 0x40> with a 3 quarters post " 382 "trigger capture\n"); 383 NV_PRINTF(LEVEL_ERROR, 384 "and search for the last bar0 window write not returning the same value" 385 " in a subsequent read\n"); 386 } 387 DBG_BREAKPOINT_REASON(NV_ERR_MEMORY_ERROR); 388 status = NV_ERR_MEMORY_ERROR; 389 goto kbusVerifyBar2_failed; 390 } 391 392 NV_PRINTF((IS_EMULATION(pGpu)) ? LEVEL_ERROR : LEVEL_INFO, 393 "Bar0 window tests successfully\n"); 394 GPU_REG_WR32(pGpu, DRF_BASE(NV_PRAMIN) + NvU64_LO32(bar0TestAddr & 0xffff), testData); 395 396 kbusWriteBAR0WindowBase_HAL(pGpu, pKernelBus, NvU64_LO32(bar0Window >> 16)); 397 398 // ========================================================== 399 // Does MMU's translation logic work? 400 401 bar2VirtualAddr = (NvU64)(pOffset - pKernelBus->virtualBar2[GPU_GFID_PF].pCpuMapping); 402 NV_PRINTF(LEVEL_INFO, 403 "MMUTest Writing test data through virtual BAR2 starting at bar2 offset" 404 " (%p - %p) = %p and of size 0x%x\n", (NvU8 *)pOffset, 405 (NvU8 *)pKernelBus->virtualBar2[GPU_GFID_PF].pCpuMapping, 406 (NvU8 *)bar2VirtualAddr, testMemorySize); 407 408 NV_PRINTF((IS_EMULATION(pGpu) == NV_TRUE) ? LEVEL_ERROR : LEVEL_INFO, 409 "MMUTest The physical address being targetted is 0x%llx\n", 410 testMemoryOffset); 411 412 for(index = 0; index < testMemorySize; index += 4) 413 { 414 MEM_WR32(pOffset + index, SAMPLEDATA); 415 } 416 417 // Flush the bar2 writes 418 // A uflush should not be required since a bar0 window read follows after this 419 osFlushCpuWriteCombineBuffer(); 420 421 status = kmemsysSendL2InvalidateEvict(pGpu, pKernelMemorySystem, flagsClean); 422 if (NV_OK != status) 423 { 424 NV_PRINTF(LEVEL_ERROR, "L2 evict failed\n"); 425 goto kbusVerifyBar2_failed; 426 } 427 428 // 429 // Read back using the same BAR2 VA. This can make sure the writes have 430 // gotten to memory after MMU translation. 431 // 432 // What it will catch: 433 // - FB dead (also caught by BAR0 test above) 434 // - MMU translation fails on BAR2 435 // - MMU translation works but other parts of memsys having issues 436 // 437 // It will not verify whether the mapping points to the right physical 438 // memory. The BAR0 readback test will do that next. 439 // 440 for(index = 0; index < testMemorySize; index += 4) 441 { 442 NvU32 bar2ReadbackData = 0; 443 bar2ReadbackData = MEM_RD32(pOffset + index); 444 445 if (bar2ReadbackData != SAMPLEDATA) 446 { 447 NV_PRINTF(LEVEL_ERROR, 448 "MMUTest BAR2 readback VA = 0x%llx returned garbage 0x%x\n", 449 (bar2VirtualAddr + index), bar2ReadbackData); 450 451 DBG_BREAKPOINT_REASON(NV_ERR_MEMORY_ERROR); 452 status = NV_ERR_MEMORY_ERROR; 453 goto kbusVerifyBar2_failed; 454 } 455 } 456 457 // Readback through the bar0 window 458 bar0Window = kbusGetBAR0WindowVidOffset_HAL(pGpu, pKernelBus); 459 460 kbusWriteBAR0WindowBase_HAL(pGpu, pKernelBus, NvU64_LO32(testMemoryOffset >> 16)); 461 462 NV_PRINTF(LEVEL_INFO, 463 "bar0Window = 0x%llx, testMemoryOffset = 0x%llx, testAddrSpace = %d, " 464 "_XAL_EP_BAR0_WINDOW = 0x%08x\n", bar0Window, testMemoryOffset, 465 testAddrSpace, GPU_REG_RD32(pGpu, NV_XAL_EP_BAR0_WINDOW)); 466 467 temp = (DRF_BASE(NV_PRAMIN) + (NvU32)(testMemoryOffset & 0xffff)); 468 for(index = 0; index < testMemorySize; index += 4) 469 { 470 NvU32 bar0WindowData = GPU_REG_RD32(pGpu, temp + index); 471 if (bar0WindowData != SAMPLEDATA) 472 { 473 NV_PRINTF(LEVEL_ERROR, 474 "MMUTest BAR0 window offset 0x%x returned garbage 0x%x\n", 475 temp + index, bar0WindowData); 476 if (IS_EMULATION(pGpu) == NV_TRUE) 477 { 478 NV_PRINTF(LEVEL_ERROR, 479 "Setup a trigger for write<bar0 + 0x1700, 0x40> and in the waves search" 480 " the last few bar2 virtual writes mixed with bar0 window reads\n"); 481 } 482 DBG_BREAKPOINT_REASON(NV_ERR_MEMORY_ERROR); 483 status = NV_ERR_MEMORY_ERROR; 484 goto kbusVerifyBar2_failed; 485 } 486 // Write through the BAR0 window to be readback through BAR2 later 487 GPU_REG_WR32(pGpu, temp + index, SAMPLEDATA + 0x10); 488 } 489 490 kbusWriteBAR0WindowBase_HAL(pGpu, pKernelBus, NvU64_LO32(bar0Window >> 16)); 491 492 status = kbusFlush_HAL(pGpu, pKernelBus, BUS_FLUSH_VIDEO_MEMORY | BUS_FLUSH_USE_PCIE_READ); 493 494 // Bail now if we have encountered any error 495 if (status != NV_OK) 496 { 497 goto kbusVerifyBar2_failed; 498 } 499 500 status = kmemsysSendL2InvalidateEvict(pGpu, pKernelMemorySystem, flagsClean); 501 if (NV_OK != status) 502 { 503 goto kbusVerifyBar2_failed; 504 } 505 506 // Verify BAR2 virtual reads 507 for(index = 0; index < testMemorySize; index +=4) 508 { 509 temp = MEM_RD32(pOffset + index); 510 if (temp != (SAMPLEDATA + 0x10)) 511 { 512 NV_PRINTF(LEVEL_ERROR, 513 "MMUTest BAR2 Read of virtual addr 0x%x returned garbage 0x%x\n", 514 (NvU32)(pOffset - pKernelBus->virtualBar2[GPU_GFID_PF].pCpuMapping) + index, 515 temp); 516 DBG_BREAKPOINT_REASON(NV_ERR_MEMORY_ERROR); 517 status = NV_ERR_MEMORY_ERROR; 518 goto kbusVerifyBar2_failed; 519 } 520 } 521 522 kbusVerifyBar2_failed: 523 if (bIsStandaloneTest) 524 { 525 if (pOffset != NULL) 526 { 527 kbusUnmapRmAperture_HAL(pGpu, pMemDesc, &pOffset, NV_TRUE); 528 } 529 memdescFree(pMemDesc); 530 memdescDestroy(pMemDesc); 531 } 532 533 if (status == NV_OK) 534 { 535 NV_PRINTF(IS_EMULATION(pGpu) ? LEVEL_ERROR : LEVEL_INFO, 536 "BAR2 virtual test passes\n"); 537 } 538 539 return status; 540 } 541 542 /*! 543 * @brief Tear down BAR2 CPU aperture 544 * 545 * 1. Release BAR2 GPU vaspace mappings. 546 * 2. Release BAR2 CPU mapping. 547 * 548 * @param[in] pGpu 549 * @param[in] pKernelBus 550 * @param[in] gfid 551 * 552 * @returns NV_OK on success. 553 */ 554 NV_STATUS 555 kbusTeardownBar2CpuAperture_GH100 556 ( 557 OBJGPU *pGpu, 558 KernelBus *pKernelBus, 559 NvU32 gfid 560 ) 561 { 562 // Nothing to be done in guest in the paravirtualization case. 563 if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu) || IS_GFID_VF(gfid)) 564 { 565 return NV_OK; 566 } 567 568 if (KBUS_BAR2_TUNNELLED(pKernelBus)) 569 { 570 // KBUS-TODO -- dead code path? 571 // KBUS_BAR2_TUNNELLED is never true on HOPPER+ 572 573 // Unmap bar2 space 574 if (pKernelBus->virtualBar2[gfid].pCpuMapping) 575 { 576 // Remove the memory access filter 577 osMemRemoveFilter((NvU64)((NvUPtr)(pKernelBus->virtualBar2[gfid].pCpuMapping))); 578 portMemFree(pKernelBus->virtualBar2[gfid].pCpuMapping); 579 pKernelBus->virtualBar2[gfid].pCpuMapping = NULL; 580 } 581 } 582 else 583 { 584 if (pKernelBus->virtualBar2[gfid].pPageLevels != NULL && 585 pKernelBus->virtualBar2[gfid].pPageLevelsMemDesc != NULL) 586 { 587 memmgrMemDescEndTransfer(GPU_GET_MEMORY_MANAGER(pGpu), 588 pKernelBus->virtualBar2[gfid].pPageLevelsMemDesc, 589 TRANSFER_FLAGS_NONE); 590 pKernelBus->virtualBar2[gfid].pPageLevels = NULL; 591 } 592 593 kbusDestroyCpuPointerForBusFlush_HAL(pGpu, pKernelBus); 594 595 kbusFlushVirtualBar2_HAL(pGpu, pKernelBus, NV_FALSE, gfid); 596 597 if (pKernelBus->virtualBar2[gfid].pCpuMapping) 598 { 599 osUnmapPciMemoryKernelOld(pGpu, (void*)pKernelBus->virtualBar2[gfid].pCpuMapping); 600 // Mark the BAR as un-initialized so that a later call 601 // to initbar2 can succeed. 602 pKernelBus->virtualBar2[gfid].pCpuMapping = NULL; 603 } 604 605 // 606 // make sure that the bar2 mode is physical so that the vesa extended 607 // linear framebuffer works after driver unload. Clear other bits to force 608 // vid. 609 // 610 // if BROKEN_FB, merely rewriting this to 0 (as it already was) causes 611 // FBACKTIMEOUT -- don't do it (Bug 594539) 612 // 613 if (!pGpu->getProperty(pGpu, PDB_PROP_GPU_BROKEN_FB)) 614 { 615 GPU_VREG_FLD_WR_DRF_DEF(pGpu, _VIRTUAL_FUNCTION_PRIV_FUNC, _BAR2_BLOCK_LOW_ADDR, _MODE, _PHYSICAL); 616 // bug 1738008: temporary fix to unblock -inst_in_sys argument 617 // we tried to correct bar2 unbind sequence but didn't fix the real issue 618 // will fix this soon 4/8/16 619 GPU_VREG_RD32(pGpu, NV_VIRTUAL_FUNCTION_PRIV_FUNC_BAR2_BLOCK_LOW_ADDR); 620 } 621 } 622 623 return NV_OK; 624 } 625 626 // 627 // Returns the P2P mailbox attributes such as: 628 // - pMailboxAreaSize: total size 629 // - pMailboxAlignmentBits: aligment in number of bits 630 // - pMailboxMaxOffset: max supported offset 631 // 632 void 633 kbusGetP2PMailboxAttributes_GH100 634 ( 635 OBJGPU *pGpu, 636 KernelBus *pKernelBus, 637 NvU32* pMailboxAreaSize, 638 NvU32* pMailboxAlignmentSize, 639 NvU32* pMailboxBar1MaxOffset64KB 640 ) 641 { 642 KernelBif *pKernelBif = GPU_GET_KERNEL_BIF(pGpu); 643 644 // Initialize null values by default 645 if (pMailboxAreaSize != NULL) 646 { 647 *pMailboxAreaSize = 0; 648 } 649 if (pMailboxAlignmentSize != NULL) 650 { 651 *pMailboxAlignmentSize = 0; 652 } 653 if (pMailboxBar1MaxOffset64KB != NULL) 654 { 655 *pMailboxBar1MaxOffset64KB = 0; 656 } 657 658 if (pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_P2P_READS_DISABLED) && 659 pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_P2P_WRITES_DISABLED)) 660 { 661 // Return null values 662 return; 663 } 664 665 // Retrieve attributes 666 if (pMailboxAreaSize != NULL) 667 { 668 *pMailboxAreaSize = HOPPER_WRITE_MAILBOX_SIZE * P2P_MAX_NUM_PEERS; 669 } 670 671 if (pMailboxAlignmentSize != NULL) 672 { 673 // Write mailbox data window needs to be 64KB aligned. 674 *pMailboxAlignmentSize = 0x10000; 675 } 676 677 if (pMailboxBar1MaxOffset64KB != NULL) 678 { 679 *pMailboxBar1MaxOffset64KB = 680 NvU64_LO32( 681 (HOPPER_MAX_WRITE_MAILBOX_ADDR(pGpu) + HOPPER_WRITE_MAILBOX_SIZE) >> 16 682 ); 683 } 684 685 return; 686 } 687 688 RmPhysAddr 689 kbusSetupP2PDomainAccess_GH100 690 ( 691 OBJGPU *pGpu0, 692 KernelBus *pKernelBus0, 693 OBJGPU *pGpu1, 694 PMEMORY_DESCRIPTOR *ppP2PDomMemDesc 695 ) 696 { 697 return kbusSetupPeerBarAccess(pGpu0, pGpu1, 698 pGpu0->busInfo.gpuPhysAddr + DRF_BASE(NV_XAL_EP_P2P), 699 DRF_SIZE(NV_XAL_EP_P2P), ppP2PDomMemDesc); 700 } 701 702 NV_STATUS 703 kbusFlushPcieForBar0Doorbell_GH100 704 ( 705 OBJGPU *pGpu, 706 KernelBus *pKernelBus 707 ) 708 { 709 return kbusFlush_HAL(pGpu, pKernelBus, BUS_FLUSH_VIDEO_MEMORY | BUS_FLUSH_USE_PCIE_READ); 710 } 711 712 /*! 713 * @brief Create a P2P mapping to a given peer GPU 714 * 715 * @param[in] pGpu0 (local GPU) 716 * @param[in] pKernelBus0 (local GPU) 717 * @param[in] pGpu1 (remote GPU) 718 * @param[in] pKernelBus1 (remote GPU) 719 * @param[out] peer0 Peer ID (local to remote) 720 * @param[out] peer1 Peer ID (remote to local) 721 * @param[in] attributes Sepcial attributes for the mapping 722 * 723 * return NV_OK on success 724 */ 725 NV_STATUS 726 kbusCreateP2PMapping_GH100 727 ( 728 OBJGPU *pGpu0, 729 KernelBus *pKernelBus0, 730 OBJGPU *pGpu1, 731 KernelBus *pKernelBus1, 732 NvU32 *peer0, 733 NvU32 *peer1, 734 NvU32 attributes 735 ) 736 { 737 if (FLD_TEST_DRF(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, _C2C, attributes)) 738 { 739 return kbusCreateP2PMappingForC2C_HAL(pGpu0, pKernelBus0, pGpu1, pKernelBus1, peer0, peer1, attributes); 740 } 741 742 if (FLD_TEST_DRF(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, _NVLINK, attributes) || 743 FLD_TEST_DRF(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, _NVLINK_INDIRECT, attributes)) 744 { 745 return kbusCreateP2PMappingForNvlink_HAL(pGpu0, pKernelBus0, pGpu1, pKernelBus1, peer0, peer1, attributes); 746 } 747 748 if (FLD_TEST_DRF(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, _PCIE_BAR1, attributes)) 749 { 750 return kbusCreateP2PMappingForBar1P2P_HAL(pGpu0, pKernelBus0, pGpu1, pKernelBus1, attributes); 751 } 752 753 if (FLD_TEST_DRF(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, _PCIE, attributes)) 754 { 755 return kbusCreateP2PMappingForMailbox_HAL(pGpu0, pKernelBus0, pGpu1, pKernelBus1, peer0, peer1, attributes); 756 } 757 758 NV_PRINTF(LEVEL_ERROR, "P2P type %d is not supported\n", DRF_VAL(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, attributes)); 759 760 return NV_ERR_NOT_SUPPORTED; 761 } 762 763 /*! 764 * @brief Remove the P2P mapping to a given peer GPU 765 * 766 * @param[in] pGpu0 (local GPU) 767 * @param[in] pKernelBus0 (local GPU) 768 * @param[in] pGpu1 (remote GPU) 769 * @param[in] pKernelBus1 (remote GPU) 770 * @param[out] peer0 Peer ID (local to remote) 771 * @param[out] peer1 Peer ID (remote to local) 772 * @param[in] attributes Sepcial attributes for the mapping 773 * 774 * return NV_OK on success 775 */ 776 NV_STATUS 777 kbusRemoveP2PMapping_GH100 778 ( 779 OBJGPU *pGpu0, 780 KernelBus *pKernelBus0, 781 OBJGPU *pGpu1, 782 KernelBus *pKernelBus1, 783 NvU32 peer0, 784 NvU32 peer1, 785 NvU32 attributes 786 ) 787 { 788 if (FLD_TEST_DRF(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, _C2C, attributes)) 789 { 790 return kbusRemoveP2PMappingForC2C_HAL(pGpu0, pKernelBus0, pGpu1, pKernelBus1, peer0, peer1, attributes); 791 } 792 793 if (FLD_TEST_DRF(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, _NVLINK, attributes) || 794 FLD_TEST_DRF(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, _NVLINK_INDIRECT, attributes)) 795 { 796 return kbusRemoveP2PMappingForNvlink_HAL(pGpu0, pKernelBus0, pGpu1, pKernelBus1, peer0, peer1, attributes); 797 } 798 799 if (FLD_TEST_DRF(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, _PCIE_BAR1, attributes)) 800 { 801 return kbusRemoveP2PMappingForBar1P2P_HAL(pGpu0, pKernelBus0, pGpu1, pKernelBus1, attributes); 802 } 803 804 if (FLD_TEST_DRF(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, _PCIE, attributes)) 805 { 806 return kbusRemoveP2PMappingForMailbox_HAL(pGpu0, pKernelBus0, pGpu1, pKernelBus1, peer0, peer1, attributes); 807 } 808 809 NV_PRINTF(LEVEL_ERROR, "P2P type %d is not supported\n", DRF_VAL(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, attributes)); 810 811 return NV_ERR_NOT_SUPPORTED; 812 } 813 814 /*! 815 * @brief Returns the peer number from pGpu (Local) to pGpuPeer 816 * 817 * @param[in] pGpu Local 818 * @param[in] pKernelBus Local 819 * @param[in] pGpuPeer Remote 820 * 821 * @returns NvU32 bus peer number 822 */ 823 NvU32 824 kbusGetPeerId_GH100 825 ( 826 OBJGPU *pGpu, 827 KernelBus *pKernelBus, 828 OBJGPU *pGpuPeer 829 ) 830 { 831 NvU32 gpuPeerInst = gpuGetInstance(pGpuPeer); 832 NvU32 peerId = pKernelBus->c2cPeerInfo.busC2CPeerNumberMask[gpuPeerInst]; 833 834 // Fall back to Nvlink 835 if (peerId == 0) 836 { 837 NV_PRINTF(LEVEL_INFO, 838 "C2C P2P not set up between GPU%u and GPU%u, checking for Nvlink...\n", 839 gpuGetInstance(pGpu), gpuPeerInst); 840 return kbusGetPeerId_GP100(pGpu, pKernelBus, pGpuPeer); 841 } 842 843 LOWESTBITIDX_32(peerId); 844 return peerId; 845 } 846 847 /** 848 * @brief Returns if the given peerId is a valid for a given GPU 849 * 850 * @param[in] pGpu 851 * @param[in] pKernelBus 852 * @param[in] peerId The peer identifier 853 * 854 * @return return NV_OK is valid 855 */ 856 NV_STATUS 857 kbusIsPeerIdValid_GH100 858 ( 859 OBJGPU *pGpu, 860 KernelBus *pKernelBus, 861 NvU32 peerId 862 ) 863 { 864 NV_ASSERT_OR_RETURN(peerId < P2P_MAX_NUM_PEERS, NV_ERR_INVALID_INDEX); 865 if (pKernelBus->c2cPeerInfo.busC2CPeerNumberMask[gpuGetInstance(pGpu)] & NVBIT(peerId)) 866 return NV_OK; 867 return kbusIsPeerIdValid_GP100(pGpu, pKernelBus, peerId); 868 } 869 870 /*! 871 * @brief Create C2C mappings for FB memory 872 * When this is called, we should not have any BAR1/BAR2 mappings 873 * 874 * @param[in] pGpu OBJGPU pointer 875 * @param[in] pKernelBus Kernel bus pointer 876 * @param[in] numaOnlineMemorySize Size of FB memory to online in 877 * kernel as a NUMA node 878 * @param[in] bFlush Flush CPU cache or not 879 * 880 * @return 'NV_OK' if successful, an RM error code otherwise. 881 */ 882 NV_STATUS 883 kbusCreateCoherentCpuMapping_GH100 884 ( 885 OBJGPU *pGpu, 886 KernelBus *pKernelBus, 887 NvU64 numaOnlineMemorySize, 888 NvBool bFlush 889 ) 890 { 891 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 892 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu); 893 NV_STATUS status = NV_OK; 894 KernelBif *pKernelBif = GPU_GET_KERNEL_BIF(pGpu); 895 NvP64 pCpuMapping = NvP64_NULL; 896 NvU64 fbSize; 897 NvU64 busAddrStart; 898 NvU64 busAddrSize; 899 NvU32 i; 900 NvU64 memblockSize; 901 NvU32 cachingMode[COHERENT_CPU_MAPPING_TOTAL_REGIONS]; 902 903 NV_ASSERT_OR_RETURN(gpuIsSelfHosted(pGpu) && pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_IS_C2C_LINK_UP), NV_ERR_INVALID_STATE); 904 905 // Assert no BAR1/BAR2 mappings 906 NV_ASSERT_OR_RETURN(kbusGetBar1VASpace_HAL(pGpu, pKernelBus) == NULL, 907 NV_ERR_INVALID_STATE); 908 NV_ASSERT_OR_RETURN(listCount(&pKernelBus->virtualBar2[GPU_GFID_PF].usedMapList) == 0, 909 NV_ERR_INVALID_STATE); 910 911 fbSize = (pMemoryManager->Ram.fbTotalMemSizeMb << 20); 912 913 NV_ASSERT_OK_OR_RETURN(osNumaMemblockSize(&memblockSize)); 914 915 pKernelBus->coherentCpuMapping.nrMapping = 2; 916 917 pKernelBus->coherentCpuMapping.physAddr[COHERENT_CPU_MAPPING_REGION_0] = pMemoryManager->Ram.fbRegion[0].base; 918 pKernelBus->coherentCpuMapping.size[COHERENT_CPU_MAPPING_REGION_0] = numaOnlineMemorySize; 919 cachingMode[COHERENT_CPU_MAPPING_REGION_0] = NV_MEMORY_CACHED; 920 921 pKernelBus->coherentCpuMapping.physAddr[COHERENT_CPU_MAPPING_RM_RESV_REGION] = 922 pKernelBus->coherentCpuMapping.physAddr[COHERENT_CPU_MAPPING_REGION_0] + 923 pKernelBus->coherentCpuMapping.size[COHERENT_CPU_MAPPING_REGION_0]; 924 pKernelBus->coherentCpuMapping.size[COHERENT_CPU_MAPPING_RM_RESV_REGION] = 925 fbSize - pKernelBus->coherentCpuMapping.size[COHERENT_CPU_MAPPING_REGION_0]; 926 927 if (pKernelMemorySystem->bBug3656943WAR) 928 { 929 // 930 // RM reserved region should be mapped as Normal Non-cacheable as a SW WAR 931 // for the bug 3656943. NV_MEMORY_WRITECOMBINED translates to linux 932 // kernel ioremap_wc which actually uses the normal non-cacheable type 933 // PROT_NORMAL_NC 934 // 935 cachingMode[COHERENT_CPU_MAPPING_RM_RESV_REGION] = NV_MEMORY_WRITECOMBINED; 936 } 937 else 938 { 939 cachingMode[COHERENT_CPU_MAPPING_RM_RESV_REGION] = NV_MEMORY_CACHED; 940 } 941 942 for (i = COHERENT_CPU_MAPPING_REGION_0; i < pKernelBus->coherentCpuMapping.nrMapping; ++i) 943 { 944 busAddrStart = pKernelMemorySystem->coherentCpuFbBase + pKernelBus->coherentCpuMapping.physAddr[i]; 945 busAddrSize = pKernelBus->coherentCpuMapping.size[i]; 946 947 // In SHH, CPU uses coherent C2C link to access GPU memory and hence it can be accessed cached. 948 status = osMapPciMemoryKernel64(pGpu, 949 (NvUPtr)busAddrStart, 950 (NvU64)busAddrSize, 951 NV_PROTECT_READ_WRITE, 952 &(pCpuMapping), 953 cachingMode[i]); 954 955 NV_ASSERT_OR_RETURN(status == NV_OK, NV_ERR_GENERIC); 956 957 pKernelBus->coherentCpuMapping.pCpuMapping[i] = (NvP64)pCpuMapping; 958 pKernelBus->coherentCpuMapping.size[i] = busAddrSize; 959 960 NV_ASSERT_OR_RETURN(bFlush == NV_FALSE, NV_ERR_NOT_SUPPORTED); 961 962 // Counts the number of outstanding mappings in FB. 963 pKernelBus->coherentCpuMapping.refcnt[i] = 0; 964 } 965 966 pKernelBus->coherentCpuMapping.bCoherentCpuMapping = NV_TRUE; 967 968 NV_PRINTF(LEVEL_INFO, "Enabling CPU->C2C->FBMEM path\n"); 969 970 return status; 971 } 972 973 /*! 974 * @brief Sanity test coherent link between CPU and GPU. 975 * 976 * @param[in] pGpu OBJGPU pointer 977 * @param[in] pKernelBus Kernel bus pointer 978 * 979 * @returns NV_OK on success. 980 */ 981 NV_STATUS 982 kbusVerifyCoherentLink_GH100 983 ( 984 OBJGPU *pGpu, 985 KernelBus *pKernelBus 986 ) 987 { 988 NvU64 size = BUS_COHERENT_LINK_TEST_BUFFER_SIZE; 989 MEMORY_DESCRIPTOR *pMemDesc = NULL; 990 NvU8 *pOffset = NULL; 991 const NvU32 sampleData = 0x12345678; 992 NV_STATUS status = NV_OK; 993 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu); 994 NvU32 index = 0; 995 NvU32 flagsClean = 0; 996 MEMORY_DESCRIPTOR memDesc; 997 998 // Skip the test if 0FB configuration is used. 999 if (pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_ALL_INST_IN_SYSMEM)) 1000 { 1001 NV_PRINTF(IS_EMULATION(pGpu) ? LEVEL_ERROR : LEVEL_INFO, 1002 "Skipping Coherent link test\n"); 1003 return NV_OK; 1004 } 1005 1006 NV_ASSERT_OR_RETURN(pKernelBus->coherentLinkTestBufferBase != 0, NV_ERR_INVALID_STATE); 1007 memdescCreateExisting(&memDesc, pGpu, size, ADDR_FBMEM, NV_MEMORY_CACHED, MEMDESC_FLAGS_NONE); 1008 memdescDescribe(&memDesc, ADDR_FBMEM, pKernelBus->coherentLinkTestBufferBase, size); 1009 1010 pOffset = kbusMapRmAperture_HAL(pGpu, &memDesc); 1011 if (pOffset == NULL) 1012 { 1013 status = NV_ERR_INSUFFICIENT_RESOURCES; 1014 goto busVerifyCoherentLink_failed; 1015 } 1016 pMemDesc = &memDesc; 1017 1018 for(index = 0; index < size; index += 4) 1019 { 1020 MEM_WR32(pOffset + index, sampleData); 1021 } 1022 1023 // Ensure the writes are flushed out of the CPU caches. 1024 osFlushGpuCoherentCpuCacheRange(pGpu->pOsGpuInfo, (NvUPtr)pOffset, size); 1025 1026 flagsClean = NV2080_CTRL_INTERNAL_MEMSYS_L2_INVALIDATE_EVICT_FLAGS_ALL | 1027 NV2080_CTRL_INTERNAL_MEMSYS_L2_INVALIDATE_EVICT_FLAGS_CLEAN; 1028 if (kmemsysIsL2CleanFbPull(pKernelMemorySystem)) 1029 { 1030 flagsClean |= NV2080_CTRL_INTERNAL_MEMSYS_L2_INVALIDATE_EVICT_FLAGS_WAIT_FB_PULL; 1031 } 1032 status = kmemsysSendL2InvalidateEvict(pGpu, pKernelMemorySystem, flagsClean); 1033 if (NV_OK != status) 1034 { 1035 NV_PRINTF(LEVEL_ERROR, "L2 evict failed\n"); 1036 goto busVerifyCoherentLink_failed; 1037 } 1038 1039 for(index = 0; index < size; index += 4) 1040 { 1041 NvU32 readbackData = MEM_RD32(pOffset + index); 1042 1043 if (readbackData != sampleData) 1044 { 1045 NV_PRINTF(LEVEL_ERROR, 1046 "Coherent Link test readback VA = 0x%llx returned garbage 0x%x\n", 1047 (NvUPtr)(pOffset + index), readbackData); 1048 1049 DBG_BREAKPOINT_REASON(NV_ERR_MEMORY_ERROR); 1050 status = NV_ERR_GENERIC; 1051 } 1052 } 1053 1054 busVerifyCoherentLink_failed: 1055 if (pOffset != NULL) 1056 { 1057 kbusUnmapRmAperture_HAL(pGpu, pMemDesc, &pOffset, NV_TRUE); 1058 } 1059 memdescDestroy(pMemDesc); 1060 1061 if (status == NV_OK) 1062 { 1063 NV_PRINTF(IS_EMULATION(pGpu) ? LEVEL_ERROR : LEVEL_INFO, 1064 "Coherent link test passes\n"); 1065 } 1066 1067 return status; 1068 1069 } 1070 1071 /** 1072 * @brief Setup BAR1 P2P capability property. 1073 * All Hopper+ are BAR1 P2P capable. 1074 * 1075 * @param pGpu 1076 * @param pBus 1077 * 1078 * @return void 1079 */ 1080 void kbusSetupBar1P2PCapability_GH100 1081 ( 1082 OBJGPU *pGpu, 1083 KernelBus *pKernelBus 1084 ) 1085 { 1086 NvU64 bar1Size = kbusGetPciBarSize(pKernelBus, 1); 1087 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 1088 NvU64 fbSize = pMemoryManager->Ram.fbUsableMemSize; 1089 1090 // Make sure the BAR1 size is big enough to cover all FB 1091 if((bar1Size >= fbSize) && (fbSize != 0)) 1092 { 1093 NV_PRINTF(LEVEL_INFO, "The gpu %d is BAR1 P2P capable.\n", pGpu->gpuInstance); 1094 kbusSetBar1P2pCapable(pKernelBus, NV_TRUE); 1095 } 1096 else 1097 { 1098 NV_PRINTF(LEVEL_INFO, "The gpu %d is not BAR1 P2P capable.\n", pGpu->gpuInstance); 1099 kbusSetBar1P2pCapable(pKernelBus, NV_FALSE); 1100 } 1101 } 1102 1103 /*! 1104 * @brief check if it can support BAR1 P2P between the GPUs 1105 * At the point this function is called, the system do not support C2C and 1106 * NVLINK P2P and the BAR1 P2P is the preferred option. 1107 * 1108 * @param[in] pGpu0 (local GPU) 1109 * @param[in] pKernelBus0 (local GPU) 1110 * @param[in] pGpu1 (remote GPU) 1111 * @param[in] pKernelBus1 (remote GPU) 1112 * 1113 * return NV_TRUE if the GPU support BAR1 P2P 1114 */ 1115 NvBool 1116 kbusIsPcieBar1P2PMappingSupported_GH100 1117 ( 1118 OBJGPU *pGpu0, 1119 KernelBus *pKernelBus0, 1120 OBJGPU *pGpu1, 1121 KernelBus *pKernelBus1 1122 ) 1123 { 1124 NvU32 gpuInst0 = gpuGetInstance(pGpu0); 1125 NvU32 gpuInst1 = gpuGetInstance(pGpu1); 1126 KernelBif *pKernelBif0 = GPU_GET_KERNEL_BIF(pGpu0); 1127 NvU32 gpu0Gfid; 1128 NvU32 gpu1Gfid; 1129 NV_STATUS status = NV_OK; 1130 1131 // Check if BAR1 P2P is disabled by a regkey 1132 if ((pKernelBif0->forceP2PType != NV_REG_STR_RM_FORCE_P2P_TYPE_DEFAULT) && 1133 (pKernelBif0->forceP2PType != NV_REG_STR_RM_FORCE_P2P_TYPE_BAR1P2P)) 1134 { 1135 return NV_FALSE; 1136 } 1137 1138 // Not loopback support 1139 if (pGpu0 == pGpu1) 1140 { 1141 return NV_FALSE; 1142 } 1143 1144 // Both of GPUs need to support BAR1P2P 1145 if (!kbusIsBar1P2PCapable(pKernelBus0) || 1146 !kbusIsBar1P2PCapable(pKernelBus1)) 1147 { 1148 return NV_FALSE; 1149 } 1150 1151 // 1152 // TODO: To move this check to kbusSetupBar1P2PCapability. It should check bStaticBar1Enabled 1153 // to determine if the GPU is Bar1P2P Capable. 1154 // 1155 NV_ASSERT_OK_OR_ELSE(status, vgpuGetCallingContextGfid(pGpu0, &gpu0Gfid), return NV_FALSE); 1156 NV_ASSERT_OK_OR_ELSE(status, vgpuGetCallingContextGfid(pGpu1, &gpu1Gfid), return NV_FALSE); 1157 if (!pKernelBus0->bar1[gpu0Gfid].bStaticBar1Enabled || 1158 !pKernelBus1->bar1[gpu1Gfid].bStaticBar1Enabled) 1159 { 1160 return NV_FALSE; 1161 } 1162 1163 // 1164 // RM only supports one type of PCIE P2P protocol, either BAR1 P2P or mailbox P2P, between 1165 // two GPUs at a time. For more info on this topic, please check bug 3274549 comment 10 1166 // 1167 // Check if there is p2p mailbox connection between the GPUs. 1168 // 1169 if ((pKernelBus0->p2pPcie.peerNumberMask[gpuInst1] != 0) || 1170 (pKernelBus1->p2pPcie.peerNumberMask[gpuInst0] != 0)) 1171 { 1172 return NV_FALSE; 1173 } 1174 1175 return NV_TRUE; 1176 } 1177 1178 /*! 1179 * @brief Remove source GPU IOMMU mapping for the peer GPU 1180 * 1181 * @param[in] pSrcGpu The source GPU 1182 * @param[in] pSrcKernelBus The source Kernel Bus 1183 * @param[in] pPeerGpu The peer GPU 1184 * 1185 * @returns void 1186 */ 1187 static void 1188 _kbusRemoveStaticBar1IOMMUMapping 1189 ( 1190 OBJGPU *pSrcGpu, 1191 KernelBus *pSrcKernelBus, 1192 OBJGPU *pPeerGpu, 1193 KernelBus *pPeerKernelBus 1194 ) 1195 { 1196 NvU32 peerGfid; 1197 1198 NV_CHECK_OR_RETURN_VOID(LEVEL_ERROR, 1199 vgpuGetCallingContextGfid(pPeerGpu, &peerGfid) == NV_OK); 1200 1201 NV_ASSERT_OR_RETURN_VOID(pPeerKernelBus->bar1[peerGfid].staticBar1.pDmaMemDesc != NULL); 1202 1203 memdescUnmapIommu(pPeerKernelBus->bar1[peerGfid].staticBar1.pDmaMemDesc, 1204 pSrcGpu->busInfo.iovaspaceId); 1205 } 1206 1207 /*! 1208 * @brief Remove GPU IOMMU mapping between the pair of GPUs 1209 * 1210 * @param[in] pGpu0 1211 * @param[in] pKernelBus0 1212 * @param[in] pGpu1 1213 * @param[in] pKernelBus0 1214 * 1215 * @returns void 1216 */ 1217 static void 1218 _kbusRemoveStaticBar1IOMMUMappingForGpuPair 1219 ( 1220 OBJGPU *pGpu0, 1221 KernelBus *pKernelBus0, 1222 OBJGPU *pGpu1, 1223 KernelBus *pKernelBus1 1224 ) 1225 { 1226 _kbusRemoveStaticBar1IOMMUMapping(pGpu0, pKernelBus0, pGpu1, pKernelBus1); 1227 _kbusRemoveStaticBar1IOMMUMapping(pGpu1, pKernelBus1, pGpu0, pKernelBus0); 1228 } 1229 1230 /*! 1231 * @brief Create source GPU IOMMU mapping for the peer GPU 1232 * 1233 * @param[in] pSrcGpu The source GPU 1234 * @param[in] pSrcKernelBus The source Kernel Bus 1235 * @param[in] pPeerGpu The peer GPU 1236 * @param[in] pPeerKernelBus The peer Kernel Bus 1237 * 1238 * @returns NV_OK on success 1239 */ 1240 static NV_STATUS 1241 _kbusCreateStaticBar1IOMMUMapping 1242 ( 1243 OBJGPU *pSrcGpu, 1244 KernelBus *pSrcKernelBus, 1245 OBJGPU *pPeerGpu, 1246 KernelBus *pPeerKernelBus 1247 ) 1248 { 1249 NvU32 peerGpuGfid; 1250 MEMORY_DESCRIPTOR *pPeerDmaMemDesc = NULL; 1251 RmPhysAddr peerDmaAddr; 1252 1253 NV_ASSERT_OK_OR_RETURN(vgpuGetCallingContextGfid(pPeerGpu, &peerGpuGfid)); 1254 1255 pPeerDmaMemDesc = pPeerKernelBus->bar1[peerGpuGfid].staticBar1.pDmaMemDesc; 1256 1257 NV_ASSERT_OR_RETURN(pPeerDmaMemDesc != NULL, NV_ERR_INVALID_STATE); 1258 1259 // Create the source GPU IOMMU mapping on the peer static bar1 1260 NV_ASSERT_OK_OR_RETURN(memdescMapIommu(pPeerDmaMemDesc, 1261 pSrcGpu->busInfo.iovaspaceId)); 1262 1263 // To get the peer DMA address of the memory for the GPU was mapped to 1264 memdescGetPhysAddrsForGpu(pPeerDmaMemDesc, pSrcGpu, 1265 AT_GPU, 0, 0, 1, &peerDmaAddr); 1266 1267 // Check the if it is aligned to max RM_PAGE_SIZE 512M. 1268 if (!NV_IS_ALIGNED64(peerDmaAddr, RM_PAGE_SIZE_512M)) 1269 { 1270 NV_PRINTF(LEVEL_ERROR, "The peer DMA address 0x%llx is not aligned at 0x%llx\n", 1271 peerDmaAddr, RM_PAGE_SIZE_512M); 1272 1273 memdescUnmapIommu(pPeerDmaMemDesc, pSrcGpu->busInfo.iovaspaceId); 1274 1275 return NV_ERR_INVALID_ADDRESS; 1276 } 1277 1278 return NV_OK; 1279 } 1280 1281 /*! 1282 * @brief To create IOMMU mapping between the pair of GPUs 1283 * 1284 * @param[in] pGpu0 1285 * @param[in] pKernelBus0 1286 * @param[in] pGpu1 1287 * @param[in] pKernelBus0 1288 * 1289 * @returns NV_OK on success 1290 */ 1291 static NV_STATUS 1292 _kbusCreateStaticBar1IOMMUMappingForGpuPair 1293 ( 1294 OBJGPU *pGpu0, 1295 KernelBus *pKernelBus0, 1296 OBJGPU *pGpu1, 1297 KernelBus *pKernelBus1 1298 ) 1299 { 1300 NvU32 gpuInst0 = gpuGetInstance(pGpu0); 1301 NvU32 gpuInst1 = gpuGetInstance(pGpu1); 1302 NV_STATUS status; 1303 1304 // Create GPU0 IOMMU mapping to GPU1 BAR1 1305 status = _kbusCreateStaticBar1IOMMUMapping(pGpu0, pKernelBus0, pGpu1, pKernelBus1); 1306 if (status != NV_OK) 1307 { 1308 NV_PRINTF(LEVEL_ERROR, "IOMMU mapping failed from GPU%u to GPU%u\n", 1309 gpuInst0, gpuInst1); 1310 return status; 1311 } 1312 1313 // Create GPU1 IOMMU mapping to GPU0 BAR1 1314 status = _kbusCreateStaticBar1IOMMUMapping(pGpu1, pKernelBus1, pGpu0, pKernelBus0); 1315 if (status != NV_OK) 1316 { 1317 NV_PRINTF(LEVEL_ERROR, "IOMMU mapping failed from GPU%u to GPU%u\n", 1318 gpuInst1, gpuInst0); 1319 1320 // Remove the previous created IOMMU mapping 1321 _kbusRemoveStaticBar1IOMMUMapping(pGpu0, pKernelBus0, pGpu1, pKernelBus1); 1322 } 1323 1324 return status; 1325 } 1326 1327 /*! 1328 * @brief To get the DMA information from the source GPU to the peer GPU 1329 * 1330 * @param[in] pSrcGpu The source GPU 1331 * @param[in] pPeerGpu The peer GPU 1332 * @param[in] pPeerKernelBus The peer Kernel Bus 1333 * @param[out] pDmaAddress The start DMA address for the source GPU 1334 * to access the peer GPU 1335 * @param[out] pDmaSize The size of the DMA transfer range 1336 * 1337 * @returns NV_OK on success 1338 */ 1339 NV_STATUS kbusGetBar1P2PDmaInfo_GH100 1340 ( 1341 OBJGPU *pSrcGpu, 1342 OBJGPU *pPeerGpu, 1343 KernelBus *pPeerKernelBus, 1344 NvU64 *pDmaAddress, 1345 NvU64 *pDmaSize 1346 ) 1347 { 1348 NvU32 peerGfid; 1349 MEMORY_DESCRIPTOR *pPeerDmaMemDesc; 1350 1351 NV_ASSERT_OR_RETURN((pDmaAddress != NULL) && (pDmaSize != NULL), 1352 NV_ERR_INVALID_ARGUMENT); 1353 1354 // Set the default value 1355 *pDmaAddress = NV_U64_MAX; 1356 *pDmaSize = 0; 1357 1358 NV_ASSERT_OK_OR_RETURN(vgpuGetCallingContextGfid(pPeerGpu, &peerGfid)); 1359 1360 pPeerDmaMemDesc = pPeerKernelBus->bar1[peerGfid].staticBar1.pDmaMemDesc; 1361 NV_ASSERT_OR_RETURN(pPeerDmaMemDesc != NULL, NV_ERR_NOT_SUPPORTED); 1362 1363 // Get the peer GPU DMA address for the source GPU 1364 memdescGetPhysAddrsForGpu(pPeerDmaMemDesc, pSrcGpu, 1365 AT_GPU, 0, 0, 1, pDmaAddress); 1366 1367 *pDmaSize = memdescGetSize(pPeerDmaMemDesc); 1368 1369 return NV_OK; 1370 } 1371 1372 /*! 1373 * @brief check if there is BAR1 P2P mapping between given GPUs 1374 * 1375 * @param[in] pGpu0 (local GPU) 1376 * @param[in] pKernelBus0 (local GPU) 1377 * @param[in] pGpu1 (remote GPU) 1378 * @param[in] pKernelBus1 (remote GPU) 1379 * 1380 * return NV_TRUE if the P2P is using BAR1 1381 */ 1382 NvBool 1383 kbusHasPcieBar1P2PMapping_GH100 1384 ( 1385 OBJGPU *pGpu0, 1386 KernelBus *pKernelBus0, 1387 OBJGPU *pGpu1, 1388 KernelBus *pKernelBus1 1389 ) 1390 { 1391 return ((pKernelBus0->p2pPcieBar1.busBar1PeerRefcount[gpuGetInstance(pGpu1)] != 0) && 1392 (pKernelBus1->p2pPcieBar1.busBar1PeerRefcount[gpuGetInstance(pGpu0)] != 0)); 1393 } 1394 1395 /*! 1396 * @brief Create a Bar1 P2P mapping to given GPUs 1397 * 1398 * @param[in] pGpu0 (local GPU) 1399 * @param[in] pKernelBus0 (local GPU) 1400 * @param[in] pGpu1 (remote GPU) 1401 * @param[in] pKernelBus1 (remote GPU) 1402 * @param[in] attributes attributes to control the mapping 1403 * 1404 * return NV_OK on success 1405 * NV_ERR_NOT_SUPPORTED if it fails 1406 */ 1407 NV_STATUS 1408 kbusCreateP2PMappingForBar1P2P_GH100 1409 ( 1410 OBJGPU *pGpu0, 1411 KernelBus *pKernelBus0, 1412 OBJGPU *pGpu1, 1413 KernelBus *pKernelBus1, 1414 NvU32 attributes 1415 ) 1416 { 1417 NvU32 gpuInst0 = gpuGetInstance(pGpu0); 1418 NvU32 gpuInst1 = gpuGetInstance(pGpu1); 1419 NV_STATUS status = NV_OK; 1420 1421 if (IS_VIRTUAL(pGpu0) || IS_VIRTUAL(pGpu1)) 1422 { 1423 return NV_ERR_NOT_SUPPORTED; 1424 } 1425 1426 if (!kbusIsPcieBar1P2PMappingSupported_HAL(pGpu0, pKernelBus0, pGpu1, pKernelBus1)) 1427 { 1428 return NV_ERR_NOT_SUPPORTED; 1429 } 1430 1431 // Only create IOMMU mapping between the pair of GPUs at the first time. 1432 if ((pKernelBus0->p2pPcieBar1.busBar1PeerRefcount[gpuInst1] == 0) && 1433 (pKernelBus1->p2pPcieBar1.busBar1PeerRefcount[gpuInst0] == 0)) 1434 { 1435 NV_ASSERT_OK_OR_RETURN(_kbusCreateStaticBar1IOMMUMappingForGpuPair(pGpu0, pKernelBus0, 1436 pGpu1, pKernelBus1)); 1437 } 1438 1439 pKernelBus0->p2pPcieBar1.busBar1PeerRefcount[gpuInst1]++; 1440 pKernelBus1->p2pPcieBar1.busBar1PeerRefcount[gpuInst0]++; 1441 1442 NV_PRINTF(LEVEL_INFO, "added PCIe BAR1 P2P mapping between GPU%u and GPU%u\n", 1443 gpuInst0, gpuInst1); 1444 1445 return status; 1446 } 1447 1448 /*! 1449 * @brief remove a Bar1 P2P mapping to given GPUs 1450 * 1451 * @param[in] pGpu0 (local GPU) 1452 * @param[in] pKernelBus0 (local GPU) 1453 * @param[in] pGpu1 (remote GPU) 1454 * @param[in] pKernelBus1 (remote GPU) 1455 * @param[in] attributes attributes of the P2P 1456 * 1457 * return NV_OK on success 1458 */ 1459 NV_STATUS 1460 kbusRemoveP2PMappingForBar1P2P_GH100 1461 ( 1462 OBJGPU *pGpu0, 1463 KernelBus *pKernelBus0, 1464 OBJGPU *pGpu1, 1465 KernelBus *pKernelBus1, 1466 NvU32 attributes 1467 ) 1468 { 1469 NvU32 gpuInst0, gpuInst1; 1470 1471 if (IS_VIRTUAL(pGpu0) || IS_VIRTUAL(pGpu1)) 1472 { 1473 return NV_ERR_NOT_SUPPORTED; 1474 } 1475 1476 gpuInst0 = gpuGetInstance(pGpu0); 1477 gpuInst1 = gpuGetInstance(pGpu1); 1478 1479 if ((pKernelBus0->p2pPcieBar1.busBar1PeerRefcount[gpuInst1] == 0) || 1480 (pKernelBus1->p2pPcieBar1.busBar1PeerRefcount[gpuInst0] == 0)) 1481 { 1482 return NV_ERR_INVALID_STATE; 1483 } 1484 1485 pKernelBus0->p2pPcieBar1.busBar1PeerRefcount[gpuInst1]--; 1486 pKernelBus1->p2pPcieBar1.busBar1PeerRefcount[gpuInst0]--; 1487 1488 // Only remove the IOMMU mapping between the pair of GPUs when it is the last mapping. 1489 if ((pKernelBus0->p2pPcieBar1.busBar1PeerRefcount[gpuInst1] == 0) && 1490 (pKernelBus1->p2pPcieBar1.busBar1PeerRefcount[gpuInst0] == 0)) 1491 { 1492 _kbusRemoveStaticBar1IOMMUMappingForGpuPair(pGpu0, pKernelBus0, pGpu1, pKernelBus1); 1493 } 1494 1495 NV_PRINTF(LEVEL_INFO, "removed PCIe BAR1 P2P mapping between GPU%u and GPU%u\n", 1496 gpuInst0, gpuInst1); 1497 1498 return NV_OK; 1499 } 1500 1501 /*! 1502 * @brief Returns the C2C peer ID from pGpu0 to pGpu1 after 1503 * reserving it if peer mapping doesn't exist already 1504 * for the GPU pair 1505 * 1506 * @param[in] pGpu0 (local GPU) 1507 * @param[in] pKernelBus0 (local GPU) 1508 * @param[in] pGpu1 (remote GPU) 1509 * @param[in] pKernelBus1 (remote GPU) 1510 * @param[out] c2cPeer NvU32 pointer contains the peer ID to use 1511 * for local GPU to remote GPU when return value 1512 * is NV_OK 1513 * 1514 * return NV_OK on success 1515 */ 1516 static NV_STATUS 1517 _kbusGetC2CP2PPeerId 1518 ( 1519 OBJGPU *pGpu0, 1520 KernelBus *pKernelBus0, 1521 OBJGPU *pGpu1, 1522 KernelBus *pKernelBus1, 1523 NvU32 *c2cPeer 1524 ) 1525 { 1526 NV_STATUS status = NV_OK; 1527 return status; 1528 } 1529 1530 /*! 1531 * @brief Create C2C mapping to a given peer GPU 1532 * 1533 * 1534 * @param[in] pGpu0 (Local) 1535 * @param[in] pKernelBus0 (Local) 1536 * @param[in] pGpu1 (Remote) 1537 * @param[in] peerId Peer ID to use for local GPU to 1538 * remote GPU mapping. 1539 * 1540 * return NV_OK on success 1541 */ 1542 static NV_STATUS 1543 _kbusCreateC2CPeerMapping 1544 ( 1545 OBJGPU *pGpu0, 1546 KernelBus *pKernelBus0, 1547 OBJGPU *pGpu1, 1548 NvU32 peerId 1549 ) 1550 { 1551 NvU32 gpuInstance = gpuGetInstance(pGpu1); 1552 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu0); 1553 NV2080_CTRL_INTERNAL_BUS_CREATE_C2C_PEER_MAPPING_PARAMS params = {0}; 1554 NV_STATUS status = NV_OK; 1555 1556 // 1557 // Increment the mapping refcount per peerID - since there is a new mapping that 1558 // will use this peerID 1559 // 1560 pKernelBus0->c2cPeerInfo.busC2CMappingRefcountPerPeerId[peerId]++; 1561 1562 // Set the peer IDs in the corresponding peer number masks 1563 pKernelBus0->c2cPeerInfo.busC2CPeerNumberMask[gpuInstance] |= NVBIT(peerId); 1564 1565 params.peerId = peerId; 1566 status = pRmApi->Control(pRmApi, 1567 pGpu0->hInternalClient, 1568 pGpu0->hInternalSubdevice, 1569 NV2080_CTRL_CMD_INTERNAL_BUS_CREATE_C2C_PEER_MAPPING, 1570 ¶ms, 1571 sizeof(NV2080_CTRL_INTERNAL_BUS_CREATE_C2C_PEER_MAPPING_PARAMS)); 1572 NV_ASSERT(status == NV_OK); 1573 1574 return status; 1575 } 1576 1577 /*! 1578 * @brief Create a C2C P2P mapping to a given peer GPU 1579 * 1580 * @param[in] pGpu0 (local GPU) 1581 * @param[in] pKernelBus0 (local GPU) 1582 * @param[in] pGpu1 (remote GPU) 1583 * @param[in] pKernelBus1 (remote GPU) 1584 * @param[out] peer0 Peer ID (local to remote) 1585 * @param[out] peer1 Peer ID (remote to local) 1586 * 1587 * return NV_OK on success 1588 */ 1589 NV_STATUS 1590 kbusCreateP2PMappingForC2C_GH100 1591 ( 1592 OBJGPU *pGpu0, 1593 KernelBus *pKernelBus0, 1594 OBJGPU *pGpu1, 1595 KernelBus *pKernelBus1, 1596 NvU32 *peer0, 1597 NvU32 *peer1, 1598 NvU32 attributes 1599 ) 1600 { 1601 NvU32 gpu0Instance = gpuGetInstance(pGpu0); 1602 NvU32 gpu1Instance = gpuGetInstance(pGpu1); 1603 NvU32 c2cPeer0; 1604 NvU32 c2cPeer1; 1605 NV_STATUS status; 1606 1607 if (IS_VIRTUAL(pGpu0) || IS_VIRTUAL(pGpu1)) 1608 { 1609 return NV_ERR_NOT_SUPPORTED; 1610 } 1611 1612 if (peer0 == NULL || peer1 == NULL) 1613 { 1614 return NV_ERR_INVALID_ARGUMENT; 1615 } 1616 1617 if ((*peer0 != BUS_INVALID_PEER && *peer0 >= P2P_MAX_NUM_PEERS) || 1618 (*peer1 != BUS_INVALID_PEER && *peer1 >= P2P_MAX_NUM_PEERS)) 1619 { 1620 return NV_ERR_INVALID_ARGUMENT; 1621 } 1622 1623 c2cPeer0 = *peer0; 1624 c2cPeer1 = *peer1; 1625 1626 // Get the peer ID pGpu0 should use for P2P over C2C to pGpu1 1627 if ((status = _kbusGetC2CP2PPeerId(pGpu0, pKernelBus0, 1628 pGpu1, pKernelBus1, 1629 &c2cPeer0)) != NV_OK) 1630 { 1631 return status; 1632 } 1633 1634 // Get the peer ID pGpu1 should use for P2P over C2C to pGpu0 1635 if ((status = _kbusGetC2CP2PPeerId(pGpu1, pKernelBus1, 1636 pGpu0, pKernelBus0, 1637 &c2cPeer1)) != NV_OK) 1638 { 1639 return status; 1640 } 1641 1642 if ((c2cPeer0 == BUS_INVALID_PEER) || (c2cPeer1 == BUS_INVALID_PEER)) 1643 { 1644 NV_PRINTF(LEVEL_ERROR, "Failed to create C2C P2P mapping between GPU%u and GPU%u\n", 1645 gpu0Instance, gpu1Instance); 1646 1647 return NV_ERR_INVALID_REQUEST; 1648 } 1649 1650 *peer0 = c2cPeer0; 1651 *peer1 = c2cPeer1; 1652 1653 // 1654 // Does the mapping already exist between the given pair of GPUs using the peerIDs 1655 // *peer0 and *peer1 respectively ? 1656 // 1657 if ((pKernelBus0->c2cPeerInfo.busC2CPeerNumberMask[gpu1Instance] & NVBIT(*peer0)) && 1658 (pKernelBus1->c2cPeerInfo.busC2CPeerNumberMask[gpu0Instance] & NVBIT(*peer1))) 1659 { 1660 // 1661 // Increment the mapping refcount per peerID - since there is another usage 1662 // of a mapping that is using this peerID 1663 // 1664 pKernelBus0->c2cPeerInfo.busC2CMappingRefcountPerPeerId[*peer0]++; 1665 pKernelBus1->c2cPeerInfo.busC2CMappingRefcountPerPeerId[*peer1]++; 1666 1667 NV_PRINTF(LEVEL_INFO, 1668 "- P2P: Peer mapping is already in use for gpu instances %x and %x " 1669 "with peer id's %d and %d. Increasing the mapping refcounts for the" 1670 " peer IDs to %d and %d respectively.\n", 1671 gpu0Instance, gpu1Instance, *peer0, *peer1, 1672 pKernelBus0->c2cPeerInfo.busC2CMappingRefcountPerPeerId[*peer0], 1673 pKernelBus1->c2cPeerInfo.busC2CMappingRefcountPerPeerId[*peer1]); 1674 1675 return NV_OK; 1676 } 1677 1678 // 1679 // Reached here implies the mapping between the given pair of GPUs using the peerIDs 1680 // *peer0 and *peer1 does not exist. Create the mapping 1681 // 1682 1683 NV_ASSERT_OK_OR_RETURN(_kbusCreateC2CPeerMapping(pGpu0, pKernelBus0, pGpu1, *peer0)); 1684 NV_ASSERT_OK_OR_RETURN(_kbusCreateC2CPeerMapping(pGpu1, pKernelBus1, pGpu0, *peer1)); 1685 1686 NV_PRINTF(LEVEL_INFO, 1687 "added C2C P2P mapping between GPU%u (peer %u) and GPU%u (peer %u)\n", 1688 gpu0Instance, *peer0, gpu1Instance, *peer1); 1689 1690 return NV_OK; 1691 } 1692 1693 /*! 1694 * @brief Remove C2C mapping to a given peer GPU 1695 * 1696 * @param[in] pGpu0 (local GPU) 1697 * @param[in] pKernelBus0 (local GPU) 1698 * @param[in] pGpu1 (remote GPU) 1699 * @param[in] peerId Peer ID for local to remote GPU 1700 * 1701 * return NV_OK on success 1702 */ 1703 static NV_STATUS 1704 _kbusRemoveC2CPeerMapping 1705 ( 1706 OBJGPU *pGpu0, 1707 KernelBus *pKernelBus0, 1708 OBJGPU *pGpu1, 1709 NvU32 peerId 1710 ) 1711 { 1712 NV_STATUS status = NV_OK; 1713 1714 if (IS_VIRTUAL(pGpu0) || IS_VIRTUAL(pGpu1)) 1715 { 1716 return NV_ERR_NOT_SUPPORTED; 1717 } 1718 1719 return status; 1720 } 1721 1722 /*! 1723 * @brief Remove the C2C P2P mapping to a pair of GPU 1724 * 1725 * @param[in] pGpu0 (local GPU) 1726 * @param[in] pKernelBus0 (local GPU) 1727 * @param[in] pGpu1 (remote GPU) 1728 * @param[in] pKernelBus1 (remote GPU) 1729 * @param[in] peer0 Peer ID (local to remote) 1730 * @param[in] peer1 Peer ID (remote to local) 1731 * 1732 * return NV_OK on success 1733 */ 1734 NV_STATUS 1735 kbusRemoveP2PMappingForC2C_GH100 1736 ( 1737 OBJGPU *pGpu0, 1738 KernelBus *pKernelBus0, 1739 OBJGPU *pGpu1, 1740 KernelBus *pKernelBus1, 1741 NvU32 peer0, 1742 NvU32 peer1, 1743 NvU32 attributes 1744 ) 1745 { 1746 NV_STATUS status = NV_OK; 1747 1748 // Check if there's C2C mapping 1749 if (((pKernelBus0->c2cPeerInfo.busC2CPeerNumberMask[pGpu1->gpuInstance] & NVBIT(peer0)) == 0) || 1750 ((pKernelBus1->c2cPeerInfo.busC2CPeerNumberMask[pGpu0->gpuInstance] & NVBIT(peer1)) == 0)) 1751 { 1752 return NV_ERR_INVALID_STATE; 1753 } 1754 1755 // C2C mapping exists, remove the C2C mapping 1756 NV_ASSERT_OK_OR_RETURN(_kbusRemoveC2CPeerMapping(pGpu0, pKernelBus0, pGpu1, peer0)); 1757 NV_ASSERT_OK_OR_RETURN(_kbusRemoveC2CPeerMapping(pGpu1, pKernelBus1, pGpu0, peer1)); 1758 1759 return status; 1760 } 1761 1762 NvBool 1763 kbusNeedStaticBar1Mapping_GH100(OBJGPU *pGpu, KernelBus *pKernelBus) 1764 { 1765 KernelBif *pKernelBif = GPU_GET_KERNEL_BIF(pGpu); 1766 1767 // Check if BAR1 P2P is enabled by a regkey 1768 if (pKernelBif->forceP2PType != NV_REG_STR_RM_FORCE_P2P_TYPE_BAR1P2P) 1769 { 1770 return NV_FALSE; 1771 } 1772 1773 // We need static Bar1 only when the GPU is BAR1 P2P capable. 1774 return kbusIsBar1P2PCapable(pKernelBus); 1775 } 1776 1777 /*! 1778 * @brief Setup static Bar1 mapping. 1779 * 1780 * @param[in] pGpu GPU pointer 1781 * @param[in] pKernelBus Kernel bus pointer 1782 * @param[in] reservedFbSize The size to reserve in FB from the address 0 1783 * @param[in] gfid The GFID 1784 * 1785 * @returns NV_OK on success, or rm_status from called functions on failure. 1786 */ 1787 NV_STATUS 1788 kbusEnableStaticBar1Mapping_GH100 1789 ( 1790 OBJGPU *pGpu, 1791 KernelBus *pKernelBus, 1792 NvU64 reservedFbSize, 1793 NvU32 gfid 1794 ) 1795 { 1796 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 1797 MEMORY_DESCRIPTOR *pMemDesc = NULL; 1798 MEMORY_DESCRIPTOR *pDmaMemDesc = NULL; 1799 NvU64 bar1Size = kbusGetPciBarSize(pKernelBus, 1); 1800 NV_STATUS status = NV_OK; 1801 OBJEHEAP *pVASpaceHeap = vaspaceGetHeap(pKernelBus->bar1[gfid].pVAS); 1802 NvU64 bar1Offset = RM_ALIGN_UP(reservedFbSize, RM_PAGE_SIZE_2M); 1803 NvU64 bar1MapSize; 1804 NvU64 staticBar1PhyAddr; 1805 1806 if (!kbusNeedStaticBar1Mapping_HAL(pGpu, pKernelBus)) 1807 { 1808 return NV_ERR_INVALID_STATE; 1809 } 1810 1811 NV_PRINTF(LEVEL_INFO, "Static bar1 size 0x%llx fb size 0x%llx\n", 1812 bar1Size, pMemoryManager->Ram.fbUsableMemSize); 1813 1814 // BAR1 VA size can be smaller than BAR1 size 1815 bar1MapSize = NV_MIN(bar1Size, pVASpaceHeap->rangeHi); 1816 bar1MapSize = NV_MIN(bar1MapSize, pMemoryManager->Ram.fbUsableMemSize); 1817 1818 NV_ASSERT_OR_RETURN(bar1MapSize > bar1Offset, NV_ERR_INVALID_STATE); 1819 1820 // Adjust the offset 1821 bar1MapSize -= bar1Offset; 1822 1823 // 1824 // GPU BAR1 VA also supports the SYSMEM mapping, we need to reserve some 1825 // spaces for such cases, like doorbell mapping which is not backed by 1826 // FBMEM. 1827 // 1828 if ((bar1Size - (bar1MapSize + bar1Offset)) < (4 * RM_PAGE_SIZE_2M)) 1829 { 1830 // 1831 // When BAR1 size much bigger than FB, then there are plenty of 1832 // VA space left for other type of mapping. 1833 // When BAR1 size is slightly bigger or equal FB, the available 1834 // BAR1 VA is very limited. 1835 // Here reserves 4 * 2MB blocks. 1836 // !!! NOTE: Not sure how big Rm need to reserve 1837 // TODO: Need to find a better solution, bug 3869651 1838 // 1839 bar1MapSize -= 4 * RM_PAGE_SIZE_2M; 1840 1841 NV_PRINTF(LEVEL_INFO, "Static bar1 reserved 8 MB from the top of FB\n"); 1842 } 1843 1844 // align to 2MB page size 1845 bar1MapSize = RM_ALIGN_UP(bar1MapSize, RM_PAGE_SIZE_2M); 1846 1847 // 1848 // The static mapping is not backed by an allocated physical FB. 1849 // Here RM describes the memory for the static mapping. 1850 // 1851 NV_ASSERT_OK_OR_RETURN(memdescCreate(&pMemDesc, pGpu, bar1MapSize, 0, 1852 NV_MEMORY_CONTIGUOUS, ADDR_FBMEM, 1853 NV_MEMORY_UNCACHED, MEMDESC_FLAGS_NONE)); 1854 1855 memdescDescribe(pMemDesc, ADDR_FBMEM, bar1Offset, bar1MapSize); 1856 1857 // Set to use RM_PAGE_SIZE_HUGE, 2MB 1858 memdescSetPageSize(pMemDesc, AT_GPU, RM_PAGE_SIZE_HUGE); 1859 1860 // Setup GMK PTE type for this memory 1861 memdescSetPteKind(pMemDesc, NV_MMU_PTE_KIND_GENERIC_MEMORY); 1862 1863 // Deploy the static mapping. 1864 NV_ASSERT_OK_OR_GOTO(status, 1865 kbusMapFbAperture_HAL(pGpu, pKernelBus, pMemDesc, 0, 1866 &bar1Offset, &bar1MapSize, 1867 BUS_MAP_FB_FLAGS_MAP_UNICAST | BUS_MAP_FB_FLAGS_MAP_OFFSET_FIXED, 1868 NV01_NULL_OBJECT), 1869 cleanup_mem); 1870 1871 // Get the system physical address the base address of staticBar1 1872 staticBar1PhyAddr = gpumgrGetGpuPhysFbAddr(pGpu) + bar1Offset; 1873 1874 // 1875 // Create a memory descriptor to describe a SYSMEM target of the GPU 1876 // BAR1 region. This memDesc will be used for P2P DMA related mapping. 1877 // 1878 NV_ASSERT_OK_OR_GOTO(status, 1879 memdescCreate(&pDmaMemDesc, 1880 pGpu, 1881 bar1MapSize, 1882 0, 1883 NV_MEMORY_CONTIGUOUS, 1884 ADDR_SYSMEM, 1885 NV_MEMORY_UNCACHED, 1886 MEMDESC_FLAGS_NONE), 1887 cleanup_bus_map); 1888 1889 memdescDescribe(pDmaMemDesc, ADDR_SYSMEM, staticBar1PhyAddr, bar1MapSize); 1890 1891 pKernelBus->bar1[gfid].bStaticBar1Enabled = NV_TRUE; 1892 pKernelBus->bar1[gfid].staticBar1.pVidMemDesc = pMemDesc; 1893 pKernelBus->bar1[gfid].staticBar1.pDmaMemDesc = pDmaMemDesc; 1894 pKernelBus->bar1[gfid].staticBar1.base = bar1Offset; 1895 pKernelBus->bar1[gfid].staticBar1.size = bar1MapSize; 1896 1897 NV_PRINTF(LEVEL_INFO, "Static bar1 mapped offset 0x%llx size 0x%llx\n", 1898 bar1Offset, bar1MapSize); 1899 1900 return NV_OK; 1901 1902 cleanup_bus_map: 1903 NV_ASSERT_OK(kbusUnmapFbAperture_HAL(pGpu, pKernelBus, 1904 pMemDesc, bar1Offset, bar1MapSize, 1905 BUS_MAP_FB_FLAGS_MAP_UNICAST | 1906 BUS_MAP_FB_FLAGS_MAP_OFFSET_FIXED)); 1907 1908 cleanup_mem: 1909 NV_PRINTF(LEVEL_ERROR, "Failed to create the static bar1 mapping offset" 1910 "0x%llx size 0x%llx\n", bar1Offset, bar1MapSize); 1911 1912 pKernelBus->bar1[gfid].bStaticBar1Enabled = NV_FALSE; 1913 pKernelBus->bar1[gfid].staticBar1.pVidMemDesc = NULL; 1914 pKernelBus->bar1[gfid].staticBar1.pDmaMemDesc = NULL; 1915 1916 memdescDestroy(pDmaMemDesc); 1917 memdescDestroy(pMemDesc); 1918 1919 return status; 1920 } 1921 1922 /*! 1923 * @brief tear down static Bar1 mapping. 1924 * 1925 * @param[in] pGpu GPU pointer 1926 * @param[in] pKernelBus Kernel bus pointer 1927 * @param[in] gfid The GFID 1928 * 1929 * @returns NV_OK on success, or rm_status from called functions on failure. 1930 */ 1931 NV_STATUS 1932 kbusDisableStaticBar1Mapping_GH100(OBJGPU *pGpu, KernelBus *pKernelBus, NvU32 gfid) 1933 { 1934 if (pKernelBus->bar1[gfid].bStaticBar1Enabled) 1935 { 1936 if (pKernelBus->bar1[gfid].staticBar1.pVidMemDesc != NULL) 1937 { 1938 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, 1939 kbusUnmapFbAperture_HAL(pGpu, pKernelBus, 1940 pKernelBus->bar1[gfid].staticBar1.pVidMemDesc, 1941 pKernelBus->bar1[gfid].staticBar1.base, 1942 pKernelBus->bar1[gfid].staticBar1.size, 1943 BUS_MAP_FB_FLAGS_MAP_UNICAST | BUS_MAP_FB_FLAGS_MAP_OFFSET_FIXED)); 1944 1945 memdescDestroy(pKernelBus->bar1[gfid].staticBar1.pVidMemDesc); 1946 1947 pKernelBus->bar1[gfid].staticBar1.pVidMemDesc = NULL; 1948 } 1949 1950 memdescDestroy(pKernelBus->bar1[gfid].staticBar1.pDmaMemDesc); 1951 pKernelBus->bar1[gfid].staticBar1.pDmaMemDesc = NULL; 1952 1953 pKernelBus->bar1[gfid].bStaticBar1Enabled = NV_FALSE; 1954 } 1955 1956 return NV_OK; 1957 } 1958 1959 /*! 1960 * @brief To update the StaticBar1 PTE kind for the specified memory. 1961 * 1962 * The staticbar1 only support GMK (generic memory kind) and other compressed kind. 1963 * By default, the bar1 is statically mapped with GMK at boot when the static bar1 is enabled. 1964 * 1965 * When to map a uncompressed kind memory, RM just return the static bar1 address which is mapped 1966 * to the specified memory. 1967 * 1968 * When to map a compressed kind memory, RM must call this function to change the static mapped 1969 * bar1 range to the specified memory from GMK to the compressed kind. And RM needs to 1970 * call this function to change it back to GMK from the compressed kind after this mapping is released. 1971 * 1972 * @param[in] pGpu GPU pointer 1973 * @param[in] pKernelBus Kernel bus pointer 1974 * @param[in] pMemDesc The memory to update 1975 * @param[in] offset The offset of the memory to update 1976 * @param[in] length The length of the memory to update 1977 * @param[in] bRelease Call to release the mapping 1978 * @param[in] gfid The GFID 1979 * 1980 * return NV_OK on success 1981 */ 1982 NV_STATUS 1983 _kbusUpdateStaticBAR1VAMapping_GH100 1984 ( 1985 OBJGPU *pGpu, 1986 KernelBus *pKernelBus, 1987 MEMORY_DESCRIPTOR *pMemDesc, 1988 NvU64 offset, 1989 NvU64 length, 1990 NvBool bRelease, 1991 NvU32 gfid 1992 ) 1993 { 1994 NV_STATUS status = NV_OK; 1995 VirtMemAllocator *pDma = GPU_GET_DMA(pGpu); 1996 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 1997 OBJVASPACE *pVAS = pKernelBus->bar1[gfid].pVAS; 1998 NvU32 kind; 1999 MEMORY_DESCRIPTOR *pTempMemDesc; 2000 NvU64 vAddr; 2001 NvU64 vaLo; 2002 NvU64 vaHi; 2003 NvU64 physAddr; 2004 NvU64 pageOffset; 2005 NvU64 mapLength; 2006 NvU64 pageSize; 2007 DMA_PAGE_ARRAY pageArray = {0}; 2008 COMPR_INFO comprInfo; 2009 NvBool bCompressed; 2010 2011 NV_ASSERT_OR_RETURN(pMemDesc != NULL, NV_ERR_INVALID_ARGUMENT); 2012 2013 NV_ASSERT_OR_RETURN(memdescGetAddressSpace(pMemDesc) == ADDR_FBMEM, NV_ERR_INVALID_ARGUMENT); 2014 2015 // It only support contiguous memory 2016 NV_ASSERT_OR_RETURN(memdescGetPteArraySize(pMemDesc, AT_GPU) == 1, NV_ERR_INVALID_ARGUMENT); 2017 2018 pTempMemDesc = memdescGetMemDescFromGpu(pMemDesc, pGpu); 2019 2020 pageSize = memdescGetPageSize(pTempMemDesc, VAS_ADDRESS_TRANSLATION(pVAS)); 2021 2022 NV_ASSERT_OK_OR_RETURN(memmgrGetKindComprFromMemDesc(pMemoryManager, pTempMemDesc, 0, &kind, &comprInfo)); 2023 bCompressed = memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE, kind); 2024 2025 // Static BAR1 mapping only support >=2MB page size for compressed memory 2026 NV_CHECK_OR_RETURN(LEVEL_WARNING, bCompressed && (pageSize >= RM_PAGE_SIZE_HUGE), NV_ERR_INVALID_STATE); 2027 2028 if (bRelease) 2029 { 2030 // update the PTE kind to be the uncompressed kind 2031 comprInfo.kind = memmgrGetUncompressedKind_HAL(pGpu, pMemoryManager, kind, NV_FALSE); 2032 } 2033 2034 // Under static BAR1 mapping, BAR1 VA equal to physAddr 2035 physAddr = memdescGetPhysAddr(pTempMemDesc, VAS_ADDRESS_TRANSLATION(pVAS), offset); 2036 vAddr = RM_ALIGN_DOWN(physAddr, pageSize); 2037 2038 pageOffset = physAddr & (pageSize - 1); 2039 mapLength = RM_ALIGN_UP(pageOffset + length, pageSize); 2040 2041 vaLo = vAddr; 2042 vaHi = vaLo + mapLength - 1; 2043 2044 pageArray.count = 1; 2045 pageArray.pData = &physAddr; 2046 2047 status = dmaUpdateVASpace_HAL(pGpu, pDma, pVAS, 2048 pTempMemDesc, NULL, 2049 vaLo, vaHi, 2050 DMA_UPDATE_VASPACE_FLAGS_UPDATE_KIND, // only change KIND 2051 &pageArray, 0, 2052 &comprInfo, 0, 2053 NV_MMU_VER3_PTE_VALID_TRUE, 2054 NV_MMU_VER3_PTE_APERTURE_VIDEO_MEMORY, 2055 BUS_INVALID_PEER, 2056 NVLINK_INVALID_FABRIC_ADDR, 2057 DMA_TLB_INVALIDATE, 2058 NV_FALSE, 2059 pageSize); 2060 2061 if (status != NV_OK) 2062 { 2063 NV_PRINTF(LEVEL_ERROR, "error updating static bar1 VA space.\n"); 2064 } 2065 2066 return status; 2067 } 2068 2069 /*! 2070 * @brief To unmap FB aperture for the specified memory under the static mapping. 2071 * 2072 * @param[in] pGpu GPU pointer 2073 * @param[in] pKernelBus Kernel bus pointer 2074 * @param[in] pMemDesc The memory to update 2075 * @param[in] gfid The GFID 2076 * 2077 * return NV_OK on success 2078 */ 2079 NV_STATUS 2080 kbusStaticUnmapFbAperture_GH100 2081 ( 2082 OBJGPU *pGpu, 2083 KernelBus *pKernelBus, 2084 MEMORY_DESCRIPTOR *pMemDesc, 2085 NvU32 gfid 2086 ) 2087 { 2088 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 2089 NvBool bCompressedkind = memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE, 2090 memdescGetPteKind(pMemDesc)); 2091 2092 // 2093 // For uncompressed type, nothing to do 2094 // For compressed type, restore PTE kind to GMK 2095 // 2096 if (bCompressedkind) 2097 { 2098 NV_ASSERT_OK_OR_RETURN(_kbusUpdateStaticBAR1VAMapping_GH100(pGpu, pKernelBus, 2099 pMemDesc, 0, memdescGetSize(pMemDesc), NV_TRUE, gfid)); 2100 } 2101 2102 // Nothing else to do on static mapping mode 2103 NV_PRINTF(LEVEL_INFO, 2104 "StaticBar1 unmapped at 0x%llx size 0x%llx%s\n", 2105 memdescGetPhysAddr(pMemDesc, AT_GPU, 0), 2106 memdescGetSize(pMemDesc), 2107 bCompressedkind ? " [compressed]" : ""); 2108 2109 return NV_OK; 2110 } 2111 2112 /*! 2113 * @brief To map FB aperture for the specified memory under the static mapping. 2114 * 2115 * @param[in] pGpu GPU pointer 2116 * @param[in] pKernelBus Kernel bus pointer 2117 * @param[in] pMemDesc The memory to update 2118 * @param[in] offset The offset of the memory to map 2119 * @param[out] pAperOffset The Fb Aperture(BAR1) offset of the mapped vidmem 2120 * @param[in] pLength The size of vidmem to map 2121 * @param[in] gfid The GFID 2122 * 2123 * return NV_OK on success 2124 */ 2125 NV_STATUS 2126 kbusStaticMapFbAperture_GH100 2127 ( 2128 OBJGPU *pGpu, 2129 KernelBus *pKernelBus, 2130 MEMORY_DESCRIPTOR *pMemDesc, 2131 NvU64 offset, 2132 NvU64 *pAperOffset, 2133 NvU64 *pLength, 2134 NvU32 gfid 2135 ) 2136 { 2137 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 2138 NvU64 physAddr; 2139 NvU32 status = NV_OK; 2140 NvBool bCompressedkind = memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE, memdescGetPteKind(pMemDesc)); 2141 2142 // It only support contiguous memory 2143 NV_ASSERT_OR_RETURN(memdescGetPteArraySize(pMemDesc, AT_GPU) == 1, NV_ERR_INVALID_ARGUMENT); 2144 2145 physAddr = memdescGetPhysAddr(pMemDesc, AT_GPU, offset); 2146 2147 if (physAddr < pKernelBus->bar1[gfid].staticBar1.base || 2148 physAddr + *pLength >= pKernelBus->bar1[gfid].staticBar1.size) 2149 { 2150 NV_PRINTF(LEVEL_ERROR, 2151 "0x%llx + 0x%llx is out of the range of the StaticBar1 map [0x%llx, 0x%llx]\n", 2152 physAddr, *pLength, pKernelBus->bar1[gfid].staticBar1.base, 2153 pKernelBus->bar1[gfid].staticBar1.base + pKernelBus->bar1[gfid].staticBar1.size); 2154 2155 return NV_ERR_INVALID_ARGUMENT; 2156 } 2157 2158 if (bCompressedkind) 2159 { 2160 // Update PTE to be the compressed kind 2161 NV_ASSERT_OK_OR_RETURN(_kbusUpdateStaticBAR1VAMapping_GH100(pGpu, pKernelBus, pMemDesc, 2162 offset, *pLength, NV_FALSE, gfid)); 2163 } 2164 2165 // When the static bar1 enabled, the Fb aperture offset is the physical address. 2166 *pAperOffset = physAddr; 2167 2168 NV_PRINTF(LEVEL_INFO, "StaticBar1 mapped at 0x%llx size 0x%llx%s\n", 2169 physAddr, *pLength, 2170 bCompressedkind ? " [compressed]" : ""); 2171 2172 return status; 2173 } 2174 2175 void 2176 kbusWriteP2PWmbTag_GH100 2177 ( 2178 OBJGPU *pGpu, 2179 KernelBus *pKernelBus, 2180 NvU32 remote2Local, 2181 NvU64 p2pWmbTag 2182 ) 2183 { 2184 // See bug 3558208 comment 34 and 50 2185 GPU_REG_RD32(pGpu, NV_XAL_EP_P2P_WREQMB_L(remote2Local)); 2186 GPU_REG_WR32(pGpu, NV_XAL_EP_P2P_WREQMB_L(remote2Local), NvU64_LO32(p2pWmbTag)); 2187 GPU_REG_WR32(pGpu, NV_XAL_EP_P2P_WREQMB_H(remote2Local), NvU64_HI32(p2pWmbTag)); 2188 } 2189 2190 /*! 2191 * @brief Determine FLA Base and Size for direct-connected and NvSwitch systems. 2192 * 2193 * @param[in] base VASpace base 2194 * @param[in] size VASpace size 2195 * 2196 * @return NV_OK if successful 2197 */ 2198 NV_STATUS 2199 kbusDetermineFlaRangeAndAllocate_GH100 2200 ( 2201 OBJGPU *pGpu, 2202 KernelBus *pKernelBus, 2203 NvU64 base, 2204 NvU64 size 2205 ) 2206 { 2207 NV_STATUS status = NV_OK; 2208 2209 OBJSYS *pSys = SYS_GET_INSTANCE(); 2210 2211 if ((pSys->getProperty(pSys, PDB_PROP_SYS_NVSWITCH_IS_PRESENT) || 2212 GPU_IS_NVSWITCH_DETECTED(pGpu)) && !gpuFabricProbeIsSupported(pGpu)) 2213 { 2214 return kbusDetermineFlaRangeAndAllocate_GA100(pGpu, pKernelBus, base, size); 2215 } 2216 2217 NV_ASSERT_OK_OR_RETURN(kbusAllocateFlaVaspace_HAL(pGpu, pKernelBus, 0x0, NVBIT64(52))); 2218 2219 return status; 2220 } 2221 2222 /*! 2223 * @brief Sets up the Fabric FLA state for the GPU. This function will allocate fabric VASpace, 2224 * allocates PDB for fabric VAS, allocates instance block and initialize with 2225 * fabric VAS and binds the instance block to HW. 2226 * 2227 * @param[in] base VASpace base 2228 * @param[in] size VASpace size 2229 * 2230 * @return NV_OK if successful 2231 */ 2232 NV_STATUS 2233 kbusAllocateFlaVaspace_GH100 2234 ( 2235 OBJGPU *pGpu, 2236 KernelBus *pKernelBus, 2237 NvU64 base, 2238 NvU64 size 2239 ) 2240 { 2241 NV_STATUS status = NV_OK; 2242 OBJVMM *pVmm = SYS_GET_VMM(SYS_GET_INSTANCE()); 2243 KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu); 2244 INST_BLK_INIT_PARAMS pInstblkParams = {0}; 2245 FABRIC_VASPACE *pFabricVAS; 2246 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 2247 2248 NV_ASSERT_OR_RETURN(pGpu != NULL, NV_ERR_INVALID_ARGUMENT); 2249 NV_ASSERT_OR_RETURN(size != 0, NV_ERR_INVALID_ARGUMENT); 2250 NV_ASSERT_OR_RETURN(!pKernelBus->flaInfo.bFlaAllocated, NV_ERR_INVALID_ARGUMENT); 2251 2252 pKernelBus->flaInfo.base = base; 2253 pKernelBus->flaInfo.size = size; 2254 2255 OBJSYS *pSys = SYS_GET_INSTANCE(); 2256 2257 if ((pSys->getProperty(pSys, PDB_PROP_SYS_NVSWITCH_IS_PRESENT) || 2258 GPU_IS_NVSWITCH_DETECTED(pGpu)) && !gpuFabricProbeIsSupported(pGpu)) 2259 { 2260 return kbusAllocateFlaVaspace_GA100(pGpu, pKernelBus, base, size); 2261 } 2262 2263 // TODO: Remove allocating legaccy FLA Vaspace once CUDA removes the dependency 2264 NV_ASSERT_OK_OR_RETURN(kbusAllocateLegacyFlaVaspace_HAL(pGpu, pKernelBus, base, size)); 2265 2266 // Allocate a FABRIC_VASPACE_A object 2267 status = vmmCreateVaspace(pVmm, FABRIC_VASPACE_A, pGpu->gpuId, gpumgrGetGpuMask(pGpu), 2268 base, base + size - 1, 0, 0, NULL, 0, 2269 &pGpu->pFabricVAS); 2270 2271 if (status != NV_OK) 2272 { 2273 NV_PRINTF(LEVEL_ERROR, "failed allocating fabric vaspace, status=0x%x\n", 2274 status); 2275 goto cleanup; 2276 } 2277 2278 // Pin the VASPACE page directory for pFabricVAS before writing the instance block 2279 status = vaspacePinRootPageDir(pGpu->pFabricVAS, pGpu); 2280 if (status != NV_OK) 2281 { 2282 NV_PRINTF(LEVEL_ERROR, "failed pinning down fabric vaspace, status=0x%x\n", 2283 status); 2284 goto cleanup; 2285 } 2286 2287 // Construct instance block 2288 status = kbusConstructFlaInstBlk_HAL(pGpu, pKernelBus, GPU_GFID_PF); 2289 if (status != NV_OK) 2290 { 2291 NV_PRINTF(LEVEL_ERROR, 2292 "failed constructing instblk for FLA, status=0x%x\n", 2293 status); 2294 goto unpin_rootpagedir; 2295 } 2296 2297 pFabricVAS = dynamicCast(pGpu->pFabricVAS, FABRIC_VASPACE); 2298 2299 // Instantiate Inst Blk for pFlaVAS 2300 status = kgmmuInstBlkInit(pKernelGmmu, 2301 pKernelBus->flaInfo.pInstblkMemDesc, 2302 pFabricVAS->pGVAS, FIFO_PDB_IDX_BASE, 2303 &pInstblkParams); 2304 if (status != NV_OK) 2305 { 2306 NV_PRINTF(LEVEL_ERROR, 2307 "failed instantiating instblk for FLA, status=0x%x\n", 2308 status); 2309 goto free_instblk; 2310 } 2311 2312 // 2313 // For SRIOV PF/VF system, always check for P2P allocation to determine whether 2314 // this function is allowed to bind FLA 2315 // 2316 if (gpuIsSriovEnabled(pGpu) || IS_VIRTUAL(pGpu)) 2317 { 2318 if (gpuCheckIsP2PAllocated_HAL(pGpu)) 2319 { 2320 status = kbusSetupBindFla(pGpu, pKernelBus, pGpu->sriovState.pP2PInfo->gfid); 2321 } 2322 else 2323 { 2324 NV_PRINTF(LEVEL_INFO, "Skipping binding FLA, because no P2P GFID is" 2325 " validated yet\n"); 2326 } 2327 } 2328 else 2329 { 2330 status = kbusSetupBindFla(pGpu, pKernelBus, GPU_GFID_PF); 2331 } 2332 2333 if (status != NV_OK) 2334 { 2335 NV_PRINTF(LEVEL_ERROR, 2336 "failed binding instblk for FLA, status=0x%x\n", status); 2337 goto free_instblk; 2338 } 2339 if (GPU_GET_KERNEL_NVLINK(pGpu) != NULL) 2340 { 2341 NVLINK_INBAND_MSG_CALLBACK inbandMsgCbParams; 2342 2343 inbandMsgCbParams.messageType = NVLINK_INBAND_MSG_TYPE_MC_TEAM_SETUP_RSP; 2344 inbandMsgCbParams.pCallback = &memorymulticastfabricTeamSetupResponseCallback; 2345 inbandMsgCbParams.wqItemFlags = OS_QUEUE_WORKITEM_FLAGS_LOCK_SEMA | 2346 OS_QUEUE_WORKITEM_FLAGS_LOCK_GPUS_RW; 2347 2348 status = knvlinkRegisterInbandCallback(pGpu, 2349 GPU_GET_KERNEL_NVLINK(pGpu), 2350 &inbandMsgCbParams); 2351 if (status != NV_OK) 2352 { 2353 NV_PRINTF(LEVEL_ERROR, "GPU (ID: %d) Registering Inband Cb failed\n", 2354 gpuGetInstance(pGpu)); 2355 goto free_instblk; 2356 } 2357 2358 } 2359 2360 // setup Unicast FLA range in Fabric VAS object 2361 if (!GPU_IS_NVSWITCH_DETECTED(pGpu)) 2362 { 2363 size = gpuGetFlaVasSize_HAL(pGpu, NV_FALSE); 2364 base = pGpu->gpuInstance * size; 2365 2366 NV_ASSERT_OK_OR_GOTO(status, fabricvaspaceInitUCRange( 2367 dynamicCast(pGpu->pFabricVAS, FABRIC_VASPACE), pGpu, 2368 base, size), free_instblk); 2369 } 2370 2371 pKernelBus->flaInfo.bFlaAllocated = NV_TRUE; 2372 2373 return NV_OK; 2374 2375 free_instblk: 2376 kbusDestructFlaInstBlk_HAL(pGpu, pKernelBus); 2377 2378 unpin_rootpagedir: 2379 if (pGpu->pFabricVAS != NULL) 2380 { 2381 vaspaceUnpinRootPageDir(pGpu->pFabricVAS, pGpu); 2382 } 2383 2384 cleanup: 2385 if (pGpu->pFabricVAS != NULL) 2386 { 2387 vmmDestroyVaspace(pVmm, pGpu->pFabricVAS); 2388 pGpu->pFabricVAS = NULL; 2389 } 2390 2391 // TODO: remove this once legacy FLA VAS support is removed. 2392 pRmApi->Free(pRmApi, pKernelBus->flaInfo.hClient, pKernelBus->flaInfo.hClient); 2393 2394 pKernelBus->flaInfo.bFlaAllocated = NV_FALSE; 2395 2396 NV_PRINTF(LEVEL_ERROR, "failed allocating FLA VASpace status=0x%x\n", 2397 status); 2398 2399 return status; 2400 } 2401 2402 void 2403 kbusDestroyFla_GH100 2404 ( 2405 OBJGPU *pGpu, 2406 KernelBus *pKernelBus 2407 ) 2408 { 2409 OBJSYS *pSys = SYS_GET_INSTANCE(); 2410 OBJVMM *pVmm = SYS_GET_VMM(pSys); 2411 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 2412 2413 if (pGpu->pFabricVAS != NULL) 2414 { 2415 if (pKernelBus->flaInfo.bFlaBind) 2416 { 2417 if (IS_VIRTUAL(pGpu) || IS_GSP_CLIENT(pGpu)) 2418 { 2419 kbusSetupUnbindFla_HAL(pGpu, pKernelBus); 2420 } 2421 } 2422 2423 if (pKernelBus->flaInfo.bFlaAllocated) 2424 { 2425 vaspaceUnpinRootPageDir(pGpu->pFabricVAS, pGpu);\ 2426 kbusDestructFlaInstBlk_HAL(pGpu, pKernelBus); 2427 vmmDestroyVaspace(pVmm, pGpu->pFabricVAS); 2428 2429 pGpu->pFabricVAS = NULL; 2430 // TODO: Remove this once legacy FLA VAS support is deprecated 2431 pRmApi->Free(pRmApi, pKernelBus->flaInfo.hClient, pKernelBus->flaInfo.hClient); 2432 portMemSet(&pKernelBus->flaInfo, 0, sizeof(pKernelBus->flaInfo)); 2433 if (GPU_GET_KERNEL_NVLINK(pGpu) != NULL) 2434 { 2435 // Unregister the receive callback 2436 NV_ASSERT_OK(knvlinkUnregisterInbandCallback(pGpu, GPU_GET_KERNEL_NVLINK(pGpu), 2437 NVLINK_INBAND_MSG_TYPE_MC_TEAM_SETUP_RSP)); 2438 } 2439 } 2440 } 2441 } 2442 2443 /*! 2444 * @brief Helper function to extract information from FLA data structure and 2445 * to trigger RPC to Physical RM to BIND FLA VASpace 2446 * 2447 * @param[in] gfid GFID 2448 * 2449 * @return NV_OK if successful 2450 */ 2451 NV_STATUS 2452 kbusSetupBindFla_GH100 2453 ( 2454 OBJGPU *pGpu, 2455 KernelBus *pKernelBus, 2456 NvU32 gfid 2457 ) 2458 { 2459 NV_STATUS status = NV_OK; 2460 NV2080_CTRL_FLA_SETUP_INSTANCE_MEM_BLOCK_PARAMS params = {0}; 2461 MEMORY_DESCRIPTOR *pMemDesc; 2462 RM_API *pRmApi = IS_GSP_CLIENT(pGpu) ? GPU_GET_PHYSICAL_RMAPI(pGpu) 2463 : rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 2464 2465 pMemDesc = pKernelBus->flaInfo.pInstblkMemDesc; 2466 2467 switch( memdescGetAddressSpace(pMemDesc)) 2468 { 2469 case ADDR_FBMEM: 2470 params.addrSpace = NV2080_CTRL_FLA_ADDRSPACE_FBMEM; 2471 break; 2472 case ADDR_SYSMEM: 2473 params.addrSpace = NV2080_CTRL_FLA_ADDRSPACE_SYSMEM; 2474 break; 2475 } 2476 params.imbPhysAddr = memdescGetPhysAddr(pMemDesc, AT_GPU, 0); 2477 params.flaAction = NV2080_CTRL_FLA_ACTION_BIND; 2478 2479 status = pRmApi->Control(pRmApi, 2480 pGpu->hInternalClient, 2481 pGpu->hInternalSubdevice, 2482 NV2080_CTRL_CMD_FLA_SETUP_INSTANCE_MEM_BLOCK, 2483 ¶ms, 2484 sizeof(params)); 2485 2486 if (status != NV_OK) 2487 { 2488 NV_PRINTF(LEVEL_ERROR, "FLA bind failed, status: %x \n", status); 2489 return status; 2490 } 2491 2492 // Since FLA state is tracked in the Guest, Guest RM needs to set it here 2493 pKernelBus->flaInfo.bFlaBind = NV_TRUE; 2494 pKernelBus->bFlaEnabled = NV_TRUE; 2495 2496 return status; 2497 } 2498 2499 /*! 2500 * @brief Helper function to trigger RPC to Physical RM to unbind FLA VASpace 2501 * 2502 * @return NV_OK if successful 2503 */ 2504 NV_STATUS 2505 kbusSetupUnbindFla_GH100 2506 ( 2507 OBJGPU *pGpu, 2508 KernelBus *pKernelBus 2509 ) 2510 { 2511 NV_STATUS status = NV_OK; 2512 NV2080_CTRL_FLA_SETUP_INSTANCE_MEM_BLOCK_PARAMS params = { 0 }; 2513 RM_API *pRmApi = IS_GSP_CLIENT(pGpu) ? GPU_GET_PHYSICAL_RMAPI(pGpu) 2514 : rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 2515 2516 if (!pKernelBus->flaInfo.bFlaBind) 2517 return NV_OK; 2518 2519 params.flaAction = NV2080_CTRL_FLA_ACTION_UNBIND; 2520 2521 status = pRmApi->Control(pRmApi, 2522 pGpu->hInternalClient, 2523 pGpu->hInternalSubdevice, 2524 NV2080_CTRL_CMD_FLA_SETUP_INSTANCE_MEM_BLOCK, 2525 ¶ms, 2526 sizeof(params)); 2527 2528 pKernelBus->flaInfo.bFlaBind = NV_FALSE; 2529 pKernelBus->bFlaEnabled = NV_FALSE; 2530 2531 return status; 2532 } 2533 2534 NV_STATUS 2535 kbusGetFlaRange_GH100 2536 ( 2537 OBJGPU *pGpu, 2538 KernelBus *pKernelBus, 2539 NvU64 *ucFlaBase, 2540 NvU64 *ucFlaSize, 2541 NvBool bIsConntectedToNvswitch 2542 ) 2543 { 2544 if (!GPU_IS_NVSWITCH_DETECTED(pGpu)) 2545 { 2546 *ucFlaSize = gpuGetFlaVasSize_HAL(pGpu, NV_FALSE); 2547 *ucFlaBase = pGpu->gpuInstance * (*ucFlaSize); 2548 } 2549 else 2550 { 2551 FABRIC_VASPACE *pFabricVAS = dynamicCast(pGpu->pFabricVAS, FABRIC_VASPACE); 2552 NvU64 ucFlaLimit; 2553 2554 if (pFabricVAS == NULL) 2555 return NV_ERR_INVALID_STATE; 2556 2557 ucFlaLimit = fabricvaspaceGetUCFlaLimit(pFabricVAS); 2558 if (ucFlaLimit == 0) 2559 return NV_ERR_INVALID_STATE; 2560 2561 *ucFlaBase = fabricvaspaceGetUCFlaStart(pFabricVAS); 2562 *ucFlaSize = ucFlaLimit - *ucFlaBase + 1; 2563 } 2564 2565 return NV_OK; 2566 } 2567 2568 /*! 2569 * @brief Returns the EGM peer ID of pRemoteGpu if it was 2570 * reserved already. 2571 * 2572 * @param[in] pLocalGpu local OBJGPU pointer 2573 * @param[in] pLocalBus local OBJBUS pointer 2574 * @param[in] pRemoteGpu remote OBJGPU pointer 2575 * 2576 * return NV_OK on success 2577 * BUS_INVALID_PEER otherwise 2578 * 2579 */ 2580 NvU32 2581 kbusGetEgmPeerId_GH100 2582 ( 2583 OBJGPU *pLocalGpu, 2584 KernelBus *pLocalKernelBus, 2585 OBJGPU *pRemoteGpu 2586 ) 2587 { 2588 NvU32 gpuPeerInst = gpuGetInstance(pRemoteGpu); 2589 NvU32 peerMask = pLocalKernelBus->p2p.busNvlinkPeerNumberMask[gpuPeerInst]; 2590 NvU32 peerId; 2591 2592 if (peerMask == 0) 2593 { 2594 NV_PRINTF(LEVEL_INFO, 2595 "NVLINK P2P not set up between GPU%u and GPU%u\n", 2596 gpuGetInstance(pLocalGpu), gpuPeerInst); 2597 return BUS_INVALID_PEER; 2598 } 2599 2600 FOR_EACH_INDEX_IN_MASK(32, peerId, peerMask) 2601 { 2602 if (pLocalKernelBus->p2p.bEgmPeer[peerId]) 2603 { 2604 return peerId; 2605 } 2606 } 2607 FOR_EACH_INDEX_IN_MASK_END; 2608 2609 return BUS_INVALID_PEER; 2610 } 2611