1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 25 /***************************** HW State Routines ***************************\ 26 * * 27 * Fabric Virtual Address Space Function Definitions. * 28 * * 29 \***************************************************************************/ 30 31 #include "gpu/mmu/kern_gmmu.h" 32 #include "mem_mgr/vaspace.h" 33 #include "mem_mgr/fabric_vaspace.h" 34 #include "gpu/mem_mgr/mem_mgr.h" 35 #include "mem_mgr/gpu_vaspace.h" 36 #include "gpu/mem_mgr/virt_mem_allocator_common.h" 37 #include "os/os.h" 38 #include "gpu/bus/kern_bus.h" 39 #include "kernel/gpu/fifo/kernel_fifo.h" 40 #include "kernel/gpu/nvlink/kernel_nvlink.h" 41 #include "mmu/mmu_walk.h" 42 #include "lib/base_utils.h" 43 #include "class/cl90f1.h" // FERMI_VASPACE_A 44 #include "class/cl00fc.h" // FABRIC_VASPACE_A 45 #include "class/cl0040.h" // NV01_MEMORY_LOCAL_USER 46 #include "class/cl0080.h" // NV01_DEVICE_0 47 #include "gpu/device/device.h" 48 #include "gpu/subdevice/subdevice.h" 49 #include "deprecated/rmapi_deprecated.h" 50 #include "rmapi/rs_utils.h" 51 #include "vgpu/vgpu_events.h" 52 #include "mem_mgr/virt_mem_mgr.h" 53 54 #include "published/ampere/ga100/dev_mmu.h" 55 #include "vgpu/rpc.h" 56 #include "virtualization/hypervisor/hypervisor.h" 57 58 59 60 // 61 // TODO: To be removed when legacy FLA VAS (pKernelBus->flaInfo.pFlaVAS) is removed" 62 // The instance block is setup during kbusAllocateFlaVaspace_HAL(). However, we 63 // lazily bind it to the new fabric VAS when the very first NV_FABRIC_MEMORY 64 // allocations happens. 65 // 66 static NV_STATUS 67 _fabricvaspaceBindInstBlk 68 ( 69 FABRIC_VASPACE *pFabricVAS 70 ) 71 { 72 OBJVASPACE *pVAS = staticCast(pFabricVAS, OBJVASPACE); 73 OBJGPU *pGpu = gpumgrGetGpu(gpumgrGetDefaultPrimaryGpu(pVAS->gpuMask)); 74 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu); 75 KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu); 76 NV_STATUS status = NV_OK; 77 78 INST_BLK_INIT_PARAMS instblkParams; 79 80 if (!pKernelBus->flaInfo.bToggleBindPoint) 81 { 82 return NV_OK; 83 } 84 85 if (gvaspaceIsInUse(dynamicCast(pKernelBus->flaInfo.pFlaVAS, OBJGVASPACE))) 86 { 87 NV_PRINTF(LEVEL_ERROR, 88 "FabricVAS and FlaVAS cannot be used simultaneously! " 89 "Instance block setup for fabricVAS failed\n"); 90 return NV_ERR_INVALID_OPERATION; 91 } 92 93 // 94 // Check if this is the first fabric vaspace allocation. If this is not the 95 // first allocation, instance block is already setup. Return NV_OK. 96 // 97 if (gvaspaceIsInUse(dynamicCast(pFabricVAS->pGVAS, OBJGVASPACE))) 98 { 99 return NV_OK; 100 } 101 102 // Unbind the instance block for FLA vaspace. 103 status = kbusSetupUnbindFla_HAL(pGpu, pKernelBus); 104 if (status != NV_OK) 105 { 106 NV_PRINTF(LEVEL_ERROR, 107 "Failed to unbind instance block for FlaVAS, status=0x%x\n", 108 status); 109 return status; 110 } 111 112 // Instantiate the instance block for fabric vaspace. 113 portMemSet(&instblkParams, 0, sizeof(instblkParams)); 114 status = kgmmuInstBlkInit(pKernelGmmu, pKernelBus->flaInfo.pInstblkMemDesc, 115 pFabricVAS->pGVAS, FIFO_PDB_IDX_BASE, 116 &instblkParams); 117 if (status != NV_OK) 118 { 119 NV_PRINTF(LEVEL_ERROR, 120 "Failed to setup instance block for fabricVAS, status=0x%x\n", 121 status); 122 goto failed; 123 } 124 125 // Bind the instance block for fabric vaspace. 126 status = kbusSetupBindFla_HAL(pGpu, pKernelBus, pFabricVAS->gfid); 127 if (status != NV_OK) 128 { 129 NV_PRINTF(LEVEL_ERROR, 130 "Failed to bind instance block for fabricVAS, status=0x%x\n", 131 status); 132 goto failed; 133 } 134 135 return NV_OK; 136 137 failed: 138 // Instantiate the instance block for FLA vaspace. 139 portMemSet(&instblkParams, 0, sizeof(instblkParams)); 140 NV_ASSERT(kgmmuInstBlkInit(pKernelGmmu, pKernelBus->flaInfo.pInstblkMemDesc, 141 pKernelBus->flaInfo.pFlaVAS, FIFO_PDB_IDX_BASE, 142 &instblkParams) == NV_OK); 143 144 // Bind the instance block for FLA vaspace. 145 NV_ASSERT(kbusSetupBindFla_HAL(pGpu, pKernelBus, pFabricVAS->gfid) == NV_OK); 146 147 return status; 148 } 149 150 // 151 // TODO: To be removed when legacy FLA VAS (pKernelBus->flaInfo.pFlaVAS) is removed" 152 // The instance block is unbind during kbusDestroyFla_HAL(). However, we unbind 153 // it here and bind back the instance block for the legacy FLA VAS after the 154 // last NV_FABRIC_MEMORY allocation is freed. 155 // 156 static void 157 _fabricvaspaceUnbindInstBlk 158 ( 159 FABRIC_VASPACE *pFabricVAS 160 ) 161 { 162 OBJVASPACE *pVAS = staticCast(pFabricVAS, OBJVASPACE); 163 OBJGPU *pGpu = gpumgrGetGpu(gpumgrGetDefaultPrimaryGpu(pVAS->gpuMask)); 164 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu); 165 KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu); 166 INST_BLK_INIT_PARAMS instblkParams = {0}; 167 168 if (!pKernelBus->flaInfo.bToggleBindPoint) 169 { 170 return; 171 } 172 173 // 174 // Check if there are any pending allocations for the fabric vaspace. 175 // If there are pending allocations, skip restore and return NV_OK. 176 // 177 if (gvaspaceIsInUse(dynamicCast(pFabricVAS->pGVAS, OBJGVASPACE))) 178 { 179 return; 180 } 181 182 // Unbind the instance block for fabric vaspace. 183 NV_ASSERT(kbusSetupUnbindFla_HAL(pGpu, pKernelBus) == NV_OK); 184 185 if (pKernelBus->flaInfo.pFlaVAS != NULL) 186 { 187 // Instantiate the instance block for FLA vaspace. 188 NV_ASSERT(kgmmuInstBlkInit(pKernelGmmu, pKernelBus->flaInfo.pInstblkMemDesc, 189 pKernelBus->flaInfo.pFlaVAS, FIFO_PDB_IDX_BASE, 190 &instblkParams) == NV_OK); 191 192 // Bind the instance block for FLA vaspace. 193 NV_ASSERT(kbusSetupBindFla_HAL(pGpu, pKernelBus, pFabricVAS->gfid) == NV_OK); 194 } 195 } 196 197 NV_STATUS 198 fabricvaspaceConstruct__IMPL 199 ( 200 FABRIC_VASPACE *pFabricVAS, 201 NvU32 classId, 202 NvU32 vaspaceId, 203 NvU64 vaStart, 204 NvU64 vaLimit, 205 NvU64 vaStartInternal, 206 NvU64 vaLimitInternal, 207 NvU32 flags 208 ) 209 { 210 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 211 OBJSYS *pSys = SYS_GET_INSTANCE(); 212 OBJVMM *pVmm = SYS_GET_VMM(pSys); 213 OBJVASPACE *pVAS = staticCast(pFabricVAS, OBJVASPACE); 214 OBJGPU *pGpu = gpumgrGetGpu(gpumgrGetDefaultPrimaryGpu(pVAS->gpuMask)); 215 NV_STATUS status = NV_OK; 216 NvHandle hClient = 0; 217 NvHandle hDevice = 0; 218 NV0080_ALLOC_PARAMETERS devAllocParams = { 0 }; 219 NvU32 gfid = 0; 220 221 // Sanity check input parameters. 222 NV_ASSERT_OR_RETURN(FABRIC_VASPACE_A == classId, NV_ERR_INVALID_ARGUMENT); 223 NV_ASSERT_OR_RETURN(vaStart <= vaLimit, NV_ERR_INVALID_ARGUMENT); 224 NV_ASSERT_OR_RETURN(ONEBITSET(pVAS->gpuMask), NV_ERR_INVALID_ARGUMENT); 225 NV_ASSERT_OR_RETURN(vaspaceId == pGpu->gpuId, NV_ERR_INVALID_ARGUMENT); 226 NV_ASSERT_OK_OR_RETURN(vgpuGetCallingContextGfid(pGpu, &gfid)); 227 228 status = pRmApi->AllocWithHandle(pRmApi, NV01_NULL_OBJECT, 229 NV01_NULL_OBJECT, NV01_NULL_OBJECT, 230 NV01_ROOT, &hClient); 231 if (status != NV_OK) 232 { 233 NV_PRINTF(LEVEL_ERROR, "failed creating client, status=0x%x\n", status); 234 return status; 235 } 236 237 status = serverutilGenResourceHandle(hClient, &hDevice); 238 if (status != NV_OK) 239 { 240 NV_PRINTF(LEVEL_ERROR, "failed creating device handle, status=0x%x\n", status); 241 goto cleanup; 242 } 243 244 // Allocate a device handle 245 devAllocParams.deviceId = gpuGetDeviceInstance(pGpu); 246 status = pRmApi->AllocWithHandle(pRmApi, hClient, hClient, hDevice, 247 NV01_DEVICE_0, &devAllocParams); 248 if (status != NV_OK) 249 { 250 NV_PRINTF(LEVEL_ERROR, "failed creating device, status=0x%x\n", status); 251 goto cleanup; 252 } 253 254 // Save off flags. 255 pFabricVAS->flags = (flags | 256 VASPACE_FLAGS_ALLOW_ZERO_ADDRESS | 257 VASPACE_FLAGS_INVALIDATE_SCOPE_NVLINK_TLB | 258 VASPACE_FLAGS_DISABLE_SPLIT_VAS); 259 260 if (IS_GFID_VF(gfid)) 261 { 262 pFabricVAS->gfid = gfid; 263 pFabricVAS->flags |= VASPACE_FLAGS_ALLOW_PAGES_IN_PHYS_MEM_SUBALLOCATOR; 264 } 265 266 pFabricVAS->bRpcAlloc = IS_VIRTUAL(pGpu) && gpuIsWarBug200577889SriovHeavyEnabled(pGpu); 267 268 // Create the GVASPACE object associated with this fabric vaspace. 269 status = vmmCreateVaspace(pVmm, FERMI_VASPACE_A, 0, pVAS->gpuMask, 270 vaStart, vaLimit, 0, 0, NULL, pFabricVAS->flags, 271 &pFabricVAS->pGVAS); 272 if (status != NV_OK) 273 { 274 NV_PRINTF(LEVEL_ERROR, 275 "Failed allocating gvaspace associated with the fabric vaspace, " 276 "status=0x%x\n", status); 277 goto cleanup; 278 } 279 280 pFabricVAS->hClient = hClient; 281 pFabricVAS->hDevice = hDevice; 282 283 // Capture the vasStart and vasLimit for the fabric vaspace. 284 pVAS->vasStart = pFabricVAS->pGVAS->vasStart; 285 pVAS->vasLimit = pFabricVAS->pGVAS->vasLimit; 286 287 return NV_OK; 288 289 cleanup: 290 NV_ASSERT(pRmApi->Free(pRmApi, hClient, hClient) == NV_OK); 291 292 return status; 293 } 294 295 void 296 fabricvaspaceDestruct_IMPL 297 ( 298 FABRIC_VASPACE *pFabricVAS 299 ) 300 { 301 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 302 OBJSYS *pSys = SYS_GET_INSTANCE(); 303 OBJVMM *pVmm = SYS_GET_VMM(pSys); 304 OBJVASPACE *pVAS = staticCast(pFabricVAS, OBJVASPACE); 305 306 if (pFabricVAS->pGVAS == NULL) 307 return; 308 309 NV_ASSERT(pRmApi->Free(pRmApi, pFabricVAS->hClient, 310 pFabricVAS->hClient) == NV_OK); 311 312 // There should be no vaspace allocations pending at this point. 313 NV_ASSERT(!gvaspaceIsInUse(dynamicCast(pFabricVAS->pGVAS, OBJGVASPACE))); 314 315 // Destroy the GVASPACE object associated with this fabric vaspace. 316 vmmDestroyVaspace(pVmm, pFabricVAS->pGVAS); 317 318 pFabricVAS->pGVAS = NULL; 319 pVAS->vasStart = 0; 320 pVAS->vasLimit = 0; 321 } 322 323 NV_STATUS 324 fabricvaspaceAlloc_IMPL 325 ( 326 FABRIC_VASPACE *pFabricVAS, 327 NvU64 size, 328 NvU64 align, 329 NvU64 rangeLo, 330 NvU64 rangeHi, 331 NvU64 pageSize, 332 VAS_ALLOC_FLAGS flags, 333 NvU64 *pAddr 334 ) 335 { 336 NV_STATUS status = NV_OK; 337 338 // Sanity check the input parameters. 339 NV_ASSERT_OR_RETURN(pFabricVAS->pGVAS != NULL, NV_ERR_OBJECT_NOT_FOUND); 340 NV_ASSERT_OR_RETURN(pAddr != NULL, NV_ERR_INVALID_ARGUMENT); 341 NV_ASSERT_OR_RETURN(pageSize >= RM_PAGE_SIZE_HUGE, NV_ERR_INVALID_ARGUMENT); 342 NV_ASSERT_OR_RETURN(align != 0, NV_ERR_INVALID_ARGUMENT); 343 NV_ASSERT_OR_RETURN(size != 0, NV_ERR_INVALID_ARGUMENT); 344 345 // Check the alignment and size are pageSize aligned 346 NV_ASSERT_OR_RETURN(NV_IS_ALIGNED64(align, pageSize), NV_ERR_INVALID_ARGUMENT); 347 NV_ASSERT_OR_RETURN(NV_IS_ALIGNED64(size, pageSize), NV_ERR_INVALID_ARGUMENT); 348 349 status = _fabricvaspaceBindInstBlk(pFabricVAS); 350 if (status != NV_OK) 351 { 352 NV_PRINTF(LEVEL_ERROR, "Failed to bind instance block for fabric vaspace." 353 " Alloc failed\n"); 354 return status; 355 } 356 357 // Adjust rangeLo and rangeHi 358 rangeLo = NV_ALIGN_DOWN(rangeLo, pageSize); 359 rangeHi = NV_ALIGN_UP(rangeHi, pageSize); 360 361 // 362 // Allocate VA space of the size and alignment requested. 363 // RM_PAGE_SIZE_HUGE is passed since FLA->PA page size is 2MB or 512MB. 364 // 365 status = vaspaceAlloc(pFabricVAS->pGVAS, size, align, rangeLo, rangeHi, 366 RM_PAGE_SIZE_HUGE | RM_PAGE_SHIFT_512M, flags, pAddr); 367 if (status != NV_OK) 368 { 369 NV_PRINTF(LEVEL_ERROR, "Failed to allocate vaspace\n"); 370 goto failed; 371 } 372 373 // Assert that the address returned is pageSize aligned 374 NV_ASSERT(NV_IS_ALIGNED64(*pAddr, pageSize)); 375 376 pFabricVAS->ucFabricFreeSize -= size; 377 pFabricVAS->ucFabricInUseSize += size; 378 379 return NV_OK; 380 381 failed: 382 383 _fabricvaspaceUnbindInstBlk(pFabricVAS); 384 385 return status; 386 } 387 388 NV_STATUS 389 fabricvaspaceAllocNonContiguous_IMPL 390 ( 391 FABRIC_VASPACE *pFabricVAS, 392 NvU64 size, 393 NvU64 align, 394 NvU64 rangeLo, 395 NvU64 rangeHi, 396 NvU64 pageSize, 397 VAS_ALLOC_FLAGS flags, 398 NvU64 **ppAddr, 399 NvU32 *pNumAddr 400 ) 401 { 402 NV_STATUS status = NV_OK; 403 NvU64 freeSize = 0; 404 NvU32 pageCount = (size / pageSize); 405 NvU64 addr; 406 NvU32 idx; 407 NvBool bDefaultAllocMode; 408 409 // Sanity check the input parameters. 410 NV_ASSERT_OR_RETURN(pFabricVAS->pGVAS != NULL, NV_ERR_OBJECT_NOT_FOUND); 411 NV_ASSERT_OR_RETURN(ppAddr != NULL, NV_ERR_INVALID_ARGUMENT); 412 NV_ASSERT_OR_RETURN(pNumAddr != NULL, NV_ERR_INVALID_ARGUMENT); 413 NV_ASSERT_OR_RETURN(pageSize >= RM_PAGE_SIZE_HUGE, NV_ERR_INVALID_ARGUMENT); 414 NV_ASSERT_OR_RETURN(align != 0, NV_ERR_INVALID_ARGUMENT); 415 NV_ASSERT_OR_RETURN(size != 0, NV_ERR_INVALID_ARGUMENT); 416 417 // Check the alignment and size are pageSize aligned. 418 NV_ASSERT_OR_RETURN(NV_IS_ALIGNED64(align, pageSize), NV_ERR_INVALID_ARGUMENT); 419 NV_ASSERT_OR_RETURN(NV_IS_ALIGNED64(size, pageSize), NV_ERR_INVALID_ARGUMENT); 420 421 // Check if heap can satisfy the request. 422 NV_ASSERT_OK_OR_RETURN(fabricvaspaceGetFreeHeap(pFabricVAS, &freeSize)); 423 if (freeSize < size) 424 { 425 NV_PRINTF(LEVEL_ERROR, 426 "Not enough memory in eheap, size requested = 0x%llx, " 427 "free memory = 0x%llx\n", 428 size, freeSize); 429 return NV_ERR_NO_MEMORY; 430 } 431 432 if (flags.bForceNonContig && flags.bForceContig) 433 { 434 NV_PRINTF(LEVEL_ERROR, 435 "Forcing both contiguous and noncontiguous is not allowed\n"); 436 return NV_ERR_INVALID_ARGUMENT; 437 } 438 439 bDefaultAllocMode = (!flags.bForceNonContig && !flags.bForceContig); 440 441 // Adjust rangeLo and rangeHi. 442 rangeLo = NV_ALIGN_DOWN(rangeLo, pageSize); 443 rangeHi = NV_ALIGN_UP(rangeHi, pageSize); 444 445 *ppAddr = portMemAllocNonPaged(sizeof(NvU64) * pageCount); 446 if (*ppAddr == NULL) 447 { 448 return NV_ERR_NO_MEMORY; 449 } 450 portMemSet(*ppAddr, 0, sizeof(NvU64) * pageCount); 451 452 status = _fabricvaspaceBindInstBlk(pFabricVAS); 453 if (status != NV_OK) 454 { 455 NV_PRINTF(LEVEL_ERROR, "Failed to bind instance block for fabric vaspace." 456 " Alloc failed\n"); 457 goto failed; 458 } 459 460 // Initialize number of addresses to 0 461 *pNumAddr = 0; 462 463 // 464 // Attempt to allocate VA space of the size and alignment requested. 465 // RM_PAGE_SIZE_HUGE is passed since FLA->PA page size is 2MB. 466 // 467 if (flags.bForceContig || bDefaultAllocMode) 468 { 469 status = vaspaceAlloc(pFabricVAS->pGVAS, size, align, rangeLo, rangeHi, 470 RM_PAGE_SIZE_HUGE, flags, &addr); 471 if (status == NV_OK) 472 { 473 (*ppAddr)[0] = addr; 474 *pNumAddr = 1; 475 } 476 else if (flags.bForceContig) 477 { 478 NV_PRINTF(LEVEL_ERROR, "Failed to allocate contig vaspace\n"); 479 goto failed; 480 } 481 } 482 483 // 484 // If size could not be allocated in one memblock, break size into 485 // multiple pageSize chunks. RM_PAGE_SIZE_HUGE is passed since 486 // FLA->PA page size is 2MB. 487 // 488 if (flags.bForceNonContig || (bDefaultAllocMode && (status != NV_OK))) 489 { 490 for (idx = 0; idx < pageCount; idx++) 491 { 492 status = vaspaceAlloc(pFabricVAS->pGVAS, pageSize, align, rangeLo, 493 rangeHi, RM_PAGE_SIZE_HUGE, flags, &addr); 494 if (status == NV_OK) 495 { 496 // Assert that the address returned is pageSize aligned 497 NV_ASSERT(NV_IS_ALIGNED64(addr, pageSize)); 498 499 (*ppAddr)[idx] = addr; 500 *pNumAddr = *pNumAddr + 1; 501 } 502 else 503 { 504 NV_PRINTF(LEVEL_ERROR, "Failed to allocate vaspace\n"); 505 goto failed; 506 } 507 } 508 } 509 510 pFabricVAS->ucFabricFreeSize -= size; 511 pFabricVAS->ucFabricInUseSize += size; 512 513 return NV_OK; 514 515 failed: 516 517 fabricvaspaceBatchFree(pFabricVAS, *ppAddr, *pNumAddr, 1); 518 portMemFree(*ppAddr); 519 *ppAddr = NULL; 520 *pNumAddr = 0; 521 522 return status; 523 } 524 525 NV_STATUS 526 fabricvaspaceFree_IMPL 527 ( 528 FABRIC_VASPACE *pFabricVAS, 529 NvU64 vAddr 530 ) 531 { 532 OBJVASPACE *pVAS = staticCast(pFabricVAS, OBJVASPACE); 533 OBJGPU *pGpu = gpumgrGetGpu(gpumgrGetDefaultPrimaryGpu(pVAS->gpuMask)); 534 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu); 535 NvU64 blockSize; 536 NvBool bUcFla; 537 538 NV_ASSERT_OR_RETURN(pFabricVAS->pGVAS != NULL, NV_ERR_OBJECT_NOT_FOUND); 539 540 bUcFla = (vAddr >= fabricvaspaceGetUCFlaStart(pFabricVAS) && 541 vAddr < fabricvaspaceGetUCFlaLimit(pFabricVAS)); 542 543 NV_ASSERT(vaspaceFreeV2(pFabricVAS->pGVAS, vAddr, &blockSize) == NV_OK); 544 545 kbusFlush_HAL(pGpu, pKernelBus, (BUS_FLUSH_VIDEO_MEMORY | 546 BUS_FLUSH_SYSTEM_MEMORY | 547 BUS_FLUSH_USE_PCIE_READ)); 548 549 fabricvaspaceInvalidateTlb(pFabricVAS, pGpu, PTE_DOWNGRADE); 550 551 _fabricvaspaceUnbindInstBlk(pFabricVAS); 552 553 if (bUcFla) 554 { 555 pFabricVAS->ucFabricFreeSize += blockSize; 556 pFabricVAS->ucFabricInUseSize -= blockSize; 557 } 558 559 return NV_OK; 560 } 561 562 NV_STATUS 563 fabricvaspaceMap_IMPL 564 ( 565 FABRIC_VASPACE *pFabricVAS, 566 OBJGPU *pGpu, 567 const NvU64 vaLo, 568 const NvU64 vaHi, 569 const MMU_MAP_TARGET *pTarget, 570 const VAS_MAP_FLAGS flags 571 ) 572 { 573 return NV_ERR_NOT_SUPPORTED; 574 } 575 576 void 577 fabricvaspaceUnmap_IMPL 578 ( 579 FABRIC_VASPACE *pFabricVAS, 580 OBJGPU *pGpu, 581 const NvU64 vaLo, 582 const NvU64 vaHi 583 ) 584 { 585 return; 586 } 587 588 NV_STATUS 589 fabricvaspaceApplyDefaultAlignment_IMPL 590 ( 591 FABRIC_VASPACE *pFabricVAS, 592 const FB_ALLOC_INFO *pAllocInfo, 593 NvU64 *pAlign, 594 NvU64 *pSize, 595 NvU64 *pPageSizeLockMask 596 ) 597 { 598 return NV_ERR_NOT_SUPPORTED; 599 } 600 601 NV_STATUS 602 fabricvaspaceGetVasInfo_IMPL 603 ( 604 FABRIC_VASPACE *pFabricVAS, 605 NV0080_CTRL_DMA_ADV_SCHED_GET_VA_CAPS_PARAMS *pParams 606 ) 607 { 608 return NV_ERR_NOT_SUPPORTED; 609 } 610 611 NV_STATUS 612 fabricvaspacePinRootPageDir_IMPL 613 ( 614 FABRIC_VASPACE *pFabricVAS, 615 OBJGPU *pGpu 616 ) 617 { 618 NV_ASSERT_OR_RETURN(pFabricVAS->pGVAS != NULL, NV_ERR_OBJECT_NOT_FOUND); 619 620 return vaspacePinRootPageDir(pFabricVAS->pGVAS, pGpu); 621 } 622 623 void 624 fabricvaspaceUnpinRootPageDir_IMPL 625 ( 626 FABRIC_VASPACE *pFabricVAS, 627 OBJGPU *pGpu 628 ) 629 { 630 NV_ASSERT(pFabricVAS->pGVAS != NULL); 631 632 vaspaceUnpinRootPageDir(pFabricVAS->pGVAS, pGpu); 633 } 634 635 NV_STATUS 636 fabricvaspaceGetFreeHeap_IMPL 637 ( 638 FABRIC_VASPACE *pFabricVAS, 639 NvU64 *freeSize 640 ) 641 { 642 NV_ASSERT_OR_RETURN(pFabricVAS->pGVAS != NULL, NV_ERR_OBJECT_NOT_FOUND); 643 NV_ASSERT_OR_RETURN(freeSize != NULL, NV_ERR_INVALID_ARGUMENT); 644 645 *freeSize = pFabricVAS->ucFabricFreeSize; 646 return NV_OK; 647 } 648 649 void 650 fabricvaspaceBatchFree_IMPL 651 ( 652 FABRIC_VASPACE *pFabricVAS, 653 NvU64 *pAddr, 654 NvU32 numAddr, 655 NvU32 stride 656 ) 657 { 658 OBJVASPACE *pVAS = staticCast(pFabricVAS, OBJVASPACE); 659 OBJGPU *pGpu = gpumgrGetGpu(gpumgrGetDefaultPrimaryGpu(pVAS->gpuMask)); 660 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu); 661 NvU64 totalFreeSize = 0; 662 NvU64 freeSize; 663 NvU32 count = 0; 664 NvU32 idx = 0; 665 NvBool bUcFla; 666 667 668 for (count = 0; count < numAddr; count++) 669 { 670 bUcFla = (pAddr[idx] >= fabricvaspaceGetUCFlaStart(pFabricVAS) && 671 pAddr[idx] < fabricvaspaceGetUCFlaLimit(pFabricVAS)); 672 673 NV_ASSERT(vaspaceFreeV2(pFabricVAS->pGVAS, pAddr[idx], &freeSize) == NV_OK); 674 675 idx += stride; 676 677 if (bUcFla) 678 totalFreeSize += freeSize; 679 } 680 681 kbusFlush_HAL(pGpu, pKernelBus, (BUS_FLUSH_VIDEO_MEMORY | 682 BUS_FLUSH_SYSTEM_MEMORY | 683 BUS_FLUSH_USE_PCIE_READ)); 684 685 fabricvaspaceInvalidateTlb(pFabricVAS, pGpu, PTE_DOWNGRADE); 686 687 _fabricvaspaceUnbindInstBlk(pFabricVAS); 688 689 pFabricVAS->ucFabricFreeSize += totalFreeSize; 690 pFabricVAS->ucFabricInUseSize -= totalFreeSize; 691 } 692 693 void 694 fabricvaspaceInvalidateTlb_IMPL 695 ( 696 FABRIC_VASPACE *pFabricVAS, 697 OBJGPU *pGpu, 698 VAS_PTE_UPDATE_TYPE type 699 ) 700 { 701 vaspaceInvalidateTlb(pFabricVAS->pGVAS, pGpu, type); 702 } 703 704 NV_STATUS 705 fabricvaspaceGetGpaMemdesc_IMPL 706 ( 707 FABRIC_VASPACE *pFabricVAS, 708 MEMORY_DESCRIPTOR *pFabricMemdesc, 709 OBJGPU *pMappingGpu, 710 MEMORY_DESCRIPTOR **ppAdjustedMemdesc 711 ) 712 { 713 KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pMappingGpu); 714 MEMORY_DESCRIPTOR *pRootMemDesc = NULL; 715 NODE *pNode = NULL; 716 NV_STATUS status = NV_OK; 717 NvU64 rootOffset = 0; 718 NvBool bLoopbackSupported = NV_FALSE; 719 720 NV_ASSERT_OR_RETURN(ppAdjustedMemdesc != NULL, NV_ERR_INVALID_ARGUMENT); 721 722 { 723 bLoopbackSupported = pKernelNvlink != NULL && 724 (knvlinkIsP2pLoopbackSupported(pMappingGpu, pKernelNvlink) || 725 knvlinkIsForcedConfig(pMappingGpu, pKernelNvlink)); 726 } 727 728 if (memdescGetAddressSpace(pFabricMemdesc) != ADDR_FABRIC_V2 || 729 bLoopbackSupported) 730 { 731 *ppAdjustedMemdesc = pFabricMemdesc; 732 return NV_OK; 733 } 734 735 pRootMemDesc = memdescGetRootMemDesc(pFabricMemdesc, &rootOffset); 736 737 RmPhysAddr *pteArray = memdescGetPteArray(pRootMemDesc, AT_GPU); 738 739 // Check if pteArray[0] is within the VAS range for the mapping GPU. 740 if ((pteArray[0] < fabricvaspaceGetUCFlaStart(pFabricVAS)) || 741 (pteArray[0] > fabricvaspaceGetUCFlaLimit(pFabricVAS))) 742 { 743 *ppAdjustedMemdesc = pFabricMemdesc; 744 return NV_OK; 745 } 746 747 // 748 // If the address space is of type ADDR_FABRIC_V2 then determine if the FLA import 749 // is on the mapping GPU. If FLA import is on the mapping GPU and NVLink P2P over 750 // loopback is not supported, then map GVA->PA directly. For discontiguous fabric 751 // memory allocation, searching for the first entry in the pteArray should be fine 752 // to determine if FLA import is on the mapping GPU. 753 // 754 NV_ASSERT_OK_OR_RETURN(btreeSearch(pteArray[0], &pNode, pFabricVAS->pFabricVaToGpaMap)); 755 756 FABRIC_VA_TO_GPA_MAP_NODE *pFabricNode = (FABRIC_VA_TO_GPA_MAP_NODE *)pNode->Data; 757 758 // 759 // Create a sub-memdesc for the offset into the vidMemDesc where the GVA would be 760 // mapped. Note this includes two offsets: 761 // 1. Offset into the fabric memdesc where the GVA is mapped. 762 // 2. Offset into the physical vidmem memdesc where the fabric memory is mapped. 763 // 764 status = memdescCreateSubMem(ppAdjustedMemdesc, pFabricNode->pVidMemDesc, pMappingGpu, 765 rootOffset + pFabricNode->offset, 766 memdescGetSize(pFabricMemdesc)); 767 if (status != NV_OK) 768 { 769 NV_PRINTF(LEVEL_ERROR, "Failed to create submMemdesc for the GVA->PA mapping\n"); 770 return status; 771 } 772 773 return NV_OK; 774 } 775 776 void 777 fabricvaspacePutGpaMemdesc_IMPL 778 ( 779 FABRIC_VASPACE *pFabricVAS, 780 MEMORY_DESCRIPTOR *pMemDesc 781 ) 782 { 783 memdescDestroy(pMemDesc); 784 } 785 786 void 787 fabricvaspaceVaToGpaMapRemove_IMPL 788 ( 789 FABRIC_VASPACE *pFabricVAS, 790 NvU64 vAddr 791 ) 792 { 793 FABRIC_VA_TO_GPA_MAP_NODE *pFabricNode = NULL; 794 NODE *pNode = NULL; 795 796 if (btreeSearch(vAddr, &pNode, pFabricVAS->pFabricVaToGpaMap) == NV_OK) 797 { 798 pFabricNode = (FABRIC_VA_TO_GPA_MAP_NODE *)pNode->Data; 799 800 btreeUnlink(&pFabricNode->Node, &pFabricVAS->pFabricVaToGpaMap); 801 802 portMemFree(pFabricNode); 803 } 804 } 805 806 NV_STATUS 807 fabricvaspaceVaToGpaMapInsert_IMPL 808 ( 809 FABRIC_VASPACE *pFabricVAS, 810 NvU64 vAddr, 811 MEMORY_DESCRIPTOR *pVidMemDesc, 812 NvU64 offset 813 ) 814 { 815 FABRIC_VA_TO_GPA_MAP_NODE *pFabricNode = NULL; 816 NV_STATUS status = NV_OK; 817 818 pFabricNode = portMemAllocNonPaged(sizeof(FABRIC_VA_TO_GPA_MAP_NODE)); 819 if (pFabricNode == NULL) 820 return NV_ERR_NO_MEMORY; 821 822 portMemSet(pFabricNode, 0, sizeof(FABRIC_VA_TO_GPA_MAP_NODE)); 823 824 pFabricNode->pVidMemDesc = pVidMemDesc; 825 pFabricNode->offset = offset; 826 pFabricNode->Node.keyStart = vAddr; 827 pFabricNode->Node.keyEnd = vAddr; 828 pFabricNode->Node.Data = pFabricNode; 829 830 // Insert into the btree tracking memory fabric allocations for this GPU. 831 status = btreeInsert(&pFabricNode->Node, &pFabricVAS->pFabricVaToGpaMap); 832 if (status != NV_OK) 833 { 834 NV_PRINTF(LEVEL_ERROR, 835 "Failed to insert addr 0x%llx into the memory fabric tree\n", 836 pFabricNode->Node.keyStart); 837 838 portMemFree(pFabricNode); 839 return status; 840 } 841 842 return NV_OK; 843 } 844 845 NV_STATUS 846 fabricvaspaceAllocMulticast_IMPL 847 ( 848 FABRIC_VASPACE *pFabricVAS, 849 NvU64 pageSize, 850 NvU64 alignment, 851 VAS_ALLOC_FLAGS flags, 852 NvU64 base, 853 NvU64 size 854 ) 855 { 856 NvU64 rangeLo; 857 NvU64 rangeHi; 858 NvU64 addr = 0; 859 NV_STATUS status; 860 861 NV_ASSERT_OR_RETURN(pFabricVAS->pGVAS != NULL, NV_ERR_OBJECT_NOT_FOUND); 862 NV_ASSERT_OR_RETURN(pageSize >= RM_PAGE_SIZE_HUGE, NV_ERR_INVALID_ARGUMENT); 863 NV_ASSERT_OR_RETURN(alignment != 0, NV_ERR_INVALID_ARGUMENT); 864 NV_ASSERT_OR_RETURN(size != 0, NV_ERR_INVALID_ARGUMENT); 865 NV_ASSERT_OR_RETURN(NV_IS_ALIGNED64(alignment, pageSize), NV_ERR_INVALID_ARGUMENT); 866 NV_ASSERT_OR_RETURN(NV_IS_ALIGNED64(base, pageSize), NV_ERR_INVALID_ARGUMENT); 867 NV_ASSERT_OR_RETURN(NV_IS_ALIGNED64(size, pageSize), NV_ERR_INVALID_ARGUMENT); 868 869 rangeLo = base; 870 rangeHi = base + size - 1; 871 872 status = vaspaceAlloc(pFabricVAS->pGVAS, size, alignment, rangeLo, 873 rangeHi, pageSize, flags, &addr); 874 875 NV_ASSERT(addr == base); 876 877 return status; 878 } 879 880 static NV_STATUS 881 _fabricVaspaceValidateMapAttrs 882 ( 883 NvU64 fabricOffset, 884 NvU64 fabricAllocSize, 885 NvU64 fabricPageSize, 886 NvU64 physMapOffset, 887 NvU64 physMapLength, 888 NvU64 physAllocSize, 889 NvU64 physPageSize 890 ) 891 { 892 // Fabric mem offset should be at least phys page size aligned. 893 if (!NV_IS_ALIGNED64(fabricOffset, physPageSize) || 894 (fabricOffset >= fabricAllocSize)) 895 { 896 NV_PRINTF(LEVEL_ERROR, 897 "Invalid offset passed for the fabric handle\n"); 898 899 return NV_ERR_INVALID_OFFSET; 900 } 901 902 if (!NV_IS_ALIGNED64(physMapOffset, physPageSize) || 903 (physMapOffset >= physAllocSize)) 904 { 905 NV_PRINTF(LEVEL_ERROR, 906 "Invalid offset passed for the physmem handle\n"); 907 908 return NV_ERR_INVALID_OFFSET; 909 } 910 911 if ((physMapLength == 0) || 912 (!NV_IS_ALIGNED64(physMapLength, physPageSize)) || 913 (physMapLength > (physAllocSize - physMapOffset)) || 914 (physMapLength > (fabricAllocSize - fabricOffset))) 915 { 916 NV_PRINTF(LEVEL_ERROR, 917 "Invalid map length passed for the physmem handle\n"); 918 919 return NV_ERR_INVALID_ARGUMENT; 920 } 921 922 return NV_OK; 923 } 924 925 typedef struct FABRIC_VASPACE_MAPPING_REGION 926 { 927 NvU64 offset; 928 NvU64 length; 929 } FABRIC_VASPACE_MAPPING_REGION; 930 931 // 932 // In worst case, we can have three regions to map. Two partially filled fabric 933 // pages and one (or more) fully filled fabric page(s). 934 // 935 #define FABRIC_VASPACE_MAPPING_REGIONS_MAX 3 936 937 typedef struct FABRIC_VASPACE_MAPPING_REGIONS 938 { 939 FABRIC_VASPACE_MAPPING_REGION r[FABRIC_VASPACE_MAPPING_REGIONS_MAX]; 940 } FABRIC_VASPACE_MAPPING_REGIONS; 941 942 static void 943 _fabricvaspaceGetMappingRegions 944 ( 945 NvU64 fabricOffset, 946 NvU64 fabricPageSize, 947 NvU64 physMapLength, 948 FABRIC_VASPACE_MAPPING_REGIONS *pRegions, 949 NvU32 *pNumRegions 950 ) 951 { 952 NvU64 fabricOffsetAligned = NV_ALIGN_UP64(fabricOffset, fabricPageSize); 953 NvU64 mapLengthAligned = NV_ALIGN_DOWN64(physMapLength, fabricPageSize); 954 955 *pNumRegions = 0; 956 957 if ((fabricOffset < fabricOffsetAligned) && 958 (physMapLength >= (fabricOffsetAligned - fabricOffset))) 959 { 960 pRegions->r[*pNumRegions].offset = fabricOffset; 961 pRegions->r[*pNumRegions].length = fabricOffsetAligned - fabricOffset; 962 963 fabricOffset += pRegions->r[*pNumRegions].length; 964 physMapLength -= pRegions->r[*pNumRegions].length; 965 mapLengthAligned = NV_ALIGN_DOWN64(physMapLength, fabricPageSize); 966 967 (*pNumRegions)++; 968 } 969 970 if (physMapLength == 0) 971 return; 972 973 if ((fabricOffset == fabricOffsetAligned) && 974 (mapLengthAligned >= fabricPageSize)) 975 { 976 pRegions->r[*pNumRegions].offset = fabricOffset; 977 pRegions->r[*pNumRegions].length = mapLengthAligned; 978 979 fabricOffset += pRegions->r[*pNumRegions].length; 980 physMapLength -= pRegions->r[*pNumRegions].length; 981 982 (*pNumRegions)++; 983 } 984 985 if (physMapLength == 0) 986 return; 987 988 pRegions->r[*pNumRegions].offset = fabricOffset; 989 pRegions->r[*pNumRegions].length = physMapLength; 990 991 (*pNumRegions)++; 992 } 993 994 void 995 fabricvaspaceUnmapPhysMemdesc_IMPL 996 ( 997 FABRIC_VASPACE *pFabricVAS, 998 MEMORY_DESCRIPTOR *pFabricMemDesc, 999 NvU64 fabricOffset, 1000 MEMORY_DESCRIPTOR *pPhysMemDesc, 1001 NvU64 physMapLength 1002 ) 1003 { 1004 OBJGPU *pGpu = pPhysMemDesc->pGpu; 1005 NvU32 fabricPageCount; 1006 NvU64 fabricAddr; 1007 NvU64 fabricPageSize; 1008 NvU32 i, j; 1009 NvU64 mapLength; 1010 FABRIC_VASPACE_MAPPING_REGIONS regions; 1011 NvU32 numRegions; 1012 1013 fabricPageSize = memdescGetPageSize64(pFabricMemDesc, AT_GPU); 1014 1015 NV_ASSERT_OR_RETURN_VOID(dynamicCast(pGpu->pFabricVAS, FABRIC_VASPACE) == \ 1016 pFabricVAS); 1017 1018 _fabricvaspaceGetMappingRegions(fabricOffset, fabricPageSize, physMapLength, 1019 ®ions, &numRegions); 1020 NV_ASSERT_OR_RETURN_VOID(numRegions != 0); 1021 1022 for (i = 0; i < numRegions; i++) 1023 { 1024 fabricPageCount = ((memdescGetPteArraySize(pFabricMemDesc, AT_GPU) == 1) || 1025 (regions.r[i].length < fabricPageSize)) ? \ 1026 1 : (regions.r[i].length / fabricPageSize); 1027 mapLength = (fabricPageCount == 1) ? regions.r[i].length : fabricPageSize; 1028 fabricOffset = regions.r[i].offset; 1029 1030 for (j = 0; j < fabricPageCount; j++) 1031 { 1032 if (fabricPageCount == 1) 1033 { 1034 fabricAddr = pFabricMemDesc->_pteArray[0] + fabricOffset; 1035 } 1036 else 1037 { 1038 fabricAddr = pFabricMemDesc->_pteArray[fabricOffset / pFabricMemDesc->pageArrayGranularity]; 1039 } 1040 1041 vaspaceUnmap(pFabricVAS->pGVAS, pPhysMemDesc->pGpu, fabricAddr, \ 1042 fabricAddr + mapLength - 1); 1043 1044 fabricOffset = fabricOffset + mapLength; 1045 } 1046 } 1047 1048 fabricvaspaceInvalidateTlb(pFabricVAS, pPhysMemDesc->pGpu, PTE_DOWNGRADE); 1049 } 1050 1051 NV_STATUS 1052 fabricvaspaceMapPhysMemdesc_IMPL 1053 ( 1054 FABRIC_VASPACE *pFabricVAS, 1055 MEMORY_DESCRIPTOR *pFabricMemDesc, 1056 NvU64 fabricOffset, 1057 MEMORY_DESCRIPTOR *pPhysMemDesc, 1058 NvU64 physOffset, 1059 NvU64 physMapLength, 1060 NvU32 flags 1061 ) 1062 { 1063 OBJGPU *pGpu = pPhysMemDesc->pGpu; 1064 VirtMemAllocator *pDma = GPU_GET_DMA(pGpu); 1065 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 1066 NV_STATUS status; 1067 DMA_PAGE_ARRAY pageArray; 1068 NvU32 kind; 1069 COMPR_INFO comprInfo; 1070 NvU32 mapFlags = DMA_UPDATE_VASPACE_FLAGS_UPDATE_ALL | 1071 DMA_UPDATE_VASPACE_FLAGS_SKIP_4K_PTE_CHECK; 1072 NvU32 fabricPageCount; 1073 NvU64 fabricAddr; 1074 NvU64 physPageSize; 1075 NvU64 fabricPageSize; 1076 NvU64 physAddr; 1077 NvU32 i, j; 1078 NvU64 mapLength; 1079 NvBool bReadOnly = !!(flags & FABRIC_VASPACE_MAP_FLAGS_READ_ONLY); 1080 FABRIC_VASPACE_MAPPING_REGIONS regions; 1081 NvU32 numRegions; 1082 MEMORY_DESCRIPTOR *pTempMemdesc; 1083 NvU32 aperture; 1084 1085 NV_ASSERT_OR_RETURN(pFabricMemDesc != NULL, NV_ERR_INVALID_ARGUMENT); 1086 NV_ASSERT_OR_RETURN(pPhysMemDesc != NULL, NV_ERR_INVALID_ARGUMENT); 1087 1088 mapFlags |= bReadOnly ? DMA_UPDATE_VASPACE_FLAGS_READ_ONLY : 0; 1089 1090 NV_ASSERT_OR_RETURN(dynamicCast(pGpu->pFabricVAS, FABRIC_VASPACE) == pFabricVAS, 1091 NV_ERR_INVALID_ARGUMENT); 1092 1093 physPageSize = memdescGetPageSize64(pPhysMemDesc, AT_GPU); 1094 fabricPageSize = memdescGetPageSize64(pFabricMemDesc, AT_GPU); 1095 1096 status = _fabricVaspaceValidateMapAttrs(fabricOffset, 1097 memdescGetSize(pFabricMemDesc), 1098 fabricPageSize, 1099 physOffset, 1100 physMapLength, 1101 memdescGetSize(pPhysMemDesc), 1102 physPageSize); 1103 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, status); 1104 1105 if (pFabricVAS->bRpcAlloc) 1106 return NV_OK; 1107 1108 status = memmgrGetKindComprFromMemDesc(pMemoryManager, pPhysMemDesc, 1109 physOffset, &kind, &comprInfo); 1110 NV_ASSERT_OK_OR_RETURN(status); 1111 1112 _fabricvaspaceGetMappingRegions(fabricOffset, fabricPageSize, physMapLength, 1113 ®ions, &numRegions); 1114 NV_ASSERT_OR_RETURN(numRegions != 0, NV_ERR_INVALID_ARGUMENT); 1115 1116 for (i = 0; i < numRegions; i++) 1117 { 1118 fabricPageCount = ((memdescGetPteArraySize(pFabricMemDesc, AT_GPU) == 1) || 1119 (regions.r[i].length < fabricPageSize)) ? \ 1120 1 : (regions.r[i].length / fabricPageSize); 1121 mapLength = (fabricPageCount == 1) ? regions.r[i].length : fabricPageSize; 1122 fabricOffset = regions.r[i].offset; 1123 1124 portMemSet(&pageArray, 0, sizeof(DMA_PAGE_ARRAY)); 1125 pageArray.count = (memdescGetPteArraySize(pPhysMemDesc, AT_GPU) == 1) ? \ 1126 1 : (mapLength / pPhysMemDesc->pageArrayGranularity); 1127 1128 for (j = 0; j < fabricPageCount; j++) 1129 { 1130 if (fabricPageCount == 1) 1131 { 1132 fabricAddr = pFabricMemDesc->_pteArray[0] + fabricOffset; 1133 } 1134 else 1135 { 1136 fabricAddr = pFabricMemDesc->_pteArray[fabricOffset / pFabricMemDesc->pageArrayGranularity]; 1137 } 1138 1139 if (pageArray.count == 1) 1140 { 1141 physAddr = pPhysMemDesc->_pteArray[0] + physOffset; 1142 pageArray.pData = &physAddr; 1143 } 1144 else 1145 { 1146 pageArray.pData = &pPhysMemDesc->_pteArray[physOffset / pPhysMemDesc->pageArrayGranularity]; 1147 } 1148 1149 // 1150 // When physPageSize is greater than fabricPageSize, to avoid fabric 1151 // VAs getting aligned using physPageSize by dmaUpdateVASpace_HAL, 1152 // create a tempMemdesc and override its pageSize. 1153 // 1154 if (fabricPageSize < physPageSize) 1155 { 1156 status = memdescCreateSubMem(&pTempMemdesc, pPhysMemDesc, 1157 pPhysMemDesc->pGpu, 1158 physOffset, mapLength); 1159 if (status != NV_OK) 1160 goto fail; 1161 1162 memdescSetPageSize(pTempMemdesc, AT_GPU, fabricPageSize); 1163 } 1164 else 1165 { 1166 pTempMemdesc = pPhysMemDesc; 1167 } 1168 1169 if (memdescGetAddressSpace(pPhysMemDesc) == ADDR_FBMEM) 1170 { 1171 aperture = NV_MMU_PTE_APERTURE_VIDEO_MEMORY; 1172 } 1173 else if (memdescGetAddressSpace(pPhysMemDesc) == ADDR_SYSMEM) 1174 { 1175 if (memdescGetCpuCacheAttrib(pPhysMemDesc) == NV_MEMORY_CACHED) 1176 { 1177 aperture = NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY; 1178 } 1179 else 1180 { 1181 aperture = NV_MMU_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY; 1182 } 1183 } 1184 else 1185 { 1186 NV_PRINTF(LEVEL_ERROR, "Unsupported aperture\n"); 1187 NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT); 1188 } 1189 1190 // Map the memory fabric object at the given physical memory offset. 1191 status = dmaUpdateVASpace_HAL(pGpu, pDma, pFabricVAS->pGVAS, pTempMemdesc, 1192 NULL, fabricAddr, fabricAddr + mapLength - 1, 1193 mapFlags, &pageArray, 0, &comprInfo, 0, 1194 NV_MMU_PTE_VALID_TRUE, 1195 aperture, 1196 BUS_INVALID_PEER, NVLINK_INVALID_FABRIC_ADDR, 1197 DMA_DEFER_TLB_INVALIDATE, NV_FALSE, 1198 memdescGetPageSize64(pTempMemdesc, AT_GPU)); 1199 1200 if (pTempMemdesc != pPhysMemDesc) 1201 memdescDestroy(pTempMemdesc); 1202 1203 if (status != NV_OK) 1204 goto fail; 1205 1206 physOffset = physOffset + mapLength; 1207 fabricOffset = fabricOffset + mapLength; 1208 } 1209 } 1210 1211 fabricvaspaceInvalidateTlb(pFabricVAS, pPhysMemDesc->pGpu, PTE_UPGRADE); 1212 1213 return NV_OK; 1214 1215 fail: 1216 for (j = 0; j < i; j++) 1217 fabricvaspaceUnmapPhysMemdesc(pFabricVAS, pFabricMemDesc, 1218 regions.r[j].offset, pPhysMemDesc, 1219 regions.r[j].length); 1220 1221 return status; 1222 } 1223 1224 NV_STATUS 1225 fabricvaspaceInitUCRange_IMPL 1226 ( 1227 FABRIC_VASPACE *pFabricVAS, 1228 OBJGPU *pGpu, 1229 NvU64 fabricBase, 1230 NvU64 fabricSize 1231 ) 1232 { 1233 if (fabricvaspaceGetUCFlaLimit(pFabricVAS) != 0) 1234 return NV_ERR_IN_USE; 1235 1236 if (fabricSize != 0) 1237 { 1238 NV_PRINTF(LEVEL_INFO, "Setting UC Base: %llx, size: %llx \n", 1239 fabricBase, fabricSize); 1240 pFabricVAS->ucFabricBase = fabricBase; 1241 pFabricVAS->ucFabricLimit = fabricBase + fabricSize - 1; 1242 pFabricVAS->ucFabricInUseSize = 0; 1243 pFabricVAS->ucFabricFreeSize = fabricSize; 1244 } 1245 1246 return NV_OK; 1247 } 1248 1249