1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 /*! 25 * @file 26 * @brief Memory descriptor handling utility routines. 27 */ 28 29 #include "gpu/mem_mgr/mem_desc.h" 30 31 #include "gpu/bif/kernel_bif.h" 32 33 #include "os/os.h" 34 35 #include "gpu_mgr/gpu_mgr.h" 36 #include "core/locks.h" 37 #include "mem_mgr/io_vaspace.h" 38 #include "mem_mgr/virt_mem_mgr.h" 39 #include "core/system.h" 40 41 #include "gpu/mem_mgr/virt_mem_allocator.h" 42 43 #include "rmconfig.h" 44 #include "vgpu/rpc.h" 45 #include "mem_mgr/mem.h" 46 47 #include "gpu/mem_mgr/mem_mgr.h" 48 #include "gpu/mem_mgr/mem_utils.h" 49 50 #include "gpu/mem_mgr/heap.h" 51 52 #include "gpu/mem_sys/kern_mem_sys.h" 53 #include "mem_mgr/video_mem.h" 54 55 #include "mem_mgr/ctx_buf_pool.h" 56 57 #include "nvrm_registry.h" // For memdescOverrideInstLoc*() 58 59 #include "deprecated/rmapi_deprecated.h" 60 #include "rmapi/rmapi.h" 61 #include "rmapi/rs_utils.h" 62 #include "class/cl0071.h" // NV01_MEMORY_SYSTEM_OS_DESCRIPTOR 63 64 #include "gpu/bus/kern_bus.h" 65 66 // Structure for keeping track of BAR1 mappings 67 typedef struct 68 { 69 NvU64 FbAperture; 70 NvU64 FbApertureLen; 71 NvP64 pPriv; 72 } FB_MAPPING_INFO; 73 74 // 75 // Common address space lists 76 // 77 const NV_ADDRESS_SPACE ADDRLIST_FBMEM_PREFERRED[] = {ADDR_FBMEM, ADDR_SYSMEM, ADDR_UNKNOWN}; 78 const NV_ADDRESS_SPACE ADDRLIST_SYSMEM_PREFERRED[] = {ADDR_SYSMEM, ADDR_FBMEM, ADDR_UNKNOWN}; 79 const NV_ADDRESS_SPACE ADDRLIST_FBMEM_ONLY[] = {ADDR_FBMEM, ADDR_UNKNOWN}; 80 const NV_ADDRESS_SPACE ADDRLIST_SYSMEM_ONLY[] = {ADDR_SYSMEM, ADDR_UNKNOWN}; 81 82 // XXX These could probably encode the whole list in the u32 bits. 83 NvU32 memdescAddrSpaceListToU32(const NV_ADDRESS_SPACE *addrlist) 84 { 85 if (addrlist == ADDRLIST_FBMEM_PREFERRED) 86 return 1; 87 else if (addrlist == ADDRLIST_SYSMEM_PREFERRED) 88 return 2; 89 else if (addrlist == ADDRLIST_FBMEM_ONLY) 90 return 3; 91 else if (addrlist == ADDRLIST_SYSMEM_ONLY) 92 return 4; 93 else 94 return 0; 95 } 96 97 const NV_ADDRESS_SPACE *memdescU32ToAddrSpaceList(NvU32 index) 98 { 99 switch (index) 100 { 101 case 1: return ADDRLIST_FBMEM_PREFERRED; 102 case 2: return ADDRLIST_SYSMEM_PREFERRED; 103 case 3: return ADDRLIST_FBMEM_ONLY; 104 case 4: return ADDRLIST_SYSMEM_ONLY; 105 default: 106 return NULL; 107 } 108 } 109 110 /* 111 * @brief Setting a MEMDESC_FLAGS_OWNED_BY_CURRENT_DEVICE has to initialize 112 * pHeap and bUsingSubAllocator flags 113 */ 114 static NV_STATUS _memdescSetSubAllocatorFlag 115 ( 116 OBJGPU *pGpu, 117 PMEMORY_DESCRIPTOR pMemDesc, 118 NvBool bSet 119 ) 120 { 121 NV_ASSERT_OR_RETURN(pGpu != NULL, NV_ERR_INVALID_ARGUMENT); 122 123 if (!bSet) 124 { 125 NV_PRINTF(LEVEL_ERROR, 126 "Unsetting MEMDESC_FLAGS_OWNED_BY_CURRENT_DEVICE not supported\n"); 127 NV_ASSERT(0); 128 return NV_ERR_INVALID_ARGUMENT; 129 } 130 131 NV_ASSERT(!(pMemDesc->_flags & MEMDESC_FLAGS_OWNED_BY_CTX_BUF_POOL)); 132 133 // Set flag forcing the allocation to fall into suballocator 134 pMemDesc->_flags |= MEMDESC_FLAGS_OWNED_BY_CURRENT_DEVICE; 135 136 { 137 Heap *pHeap = pMemDesc->pHeap; 138 NvBool bForceSubheap = NV_FALSE; 139 140 NV_ASSERT(pHeap == NULL || pHeap->heapType == HEAP_TYPE_PHYS_MEM_SUBALLOCATOR); 141 142 if (pMemDesc->_flags & MEMDESC_FLAGS_FORCE_ALLOC_FROM_SUBHEAP) 143 { 144 bForceSubheap = NV_TRUE; 145 } 146 147 if (pHeap == NULL) 148 pHeap = memmgrGetDeviceSuballocator(GPU_GET_MEMORY_MANAGER(pGpu), bForceSubheap); 149 150 if (pHeap->heapType == HEAP_TYPE_PHYS_MEM_SUBALLOCATOR) 151 { 152 NV_ASSERT_OK_OR_RETURN(vgpuGetCallingContextGfid(pGpu, &pMemDesc->gfid)); 153 pMemDesc->bUsingSuballocator = NV_TRUE; 154 } 155 } 156 157 return NV_OK; 158 } 159 160 /*! 161 * @brief Initializing GFID for guest allocated memdescs 162 */ 163 static NV_STATUS _memdescSetGuestAllocatedFlag 164 ( 165 OBJGPU *pGpu, 166 PMEMORY_DESCRIPTOR pMemDesc, 167 NvBool bSet 168 ) 169 { 170 // for VGPU header scrubbing in Open Orin package 171 172 NV_ASSERT_OR_RETURN(pGpu != NULL, NV_ERR_INVALID_ARGUMENT); 173 174 if (!bSet) 175 { 176 NV_PRINTF(LEVEL_ERROR, 177 "Unsetting MEMDESC_FLAGS_GUEST_ALLOCATED not supported\n"); 178 NV_ASSERT(0); 179 return NV_ERR_INVALID_ARGUMENT; 180 } 181 182 NV_ASSERT_OK_OR_RETURN(vgpuGetCallingContextGfid(pGpu, &pMemDesc->gfid)); 183 pMemDesc->_flags |= MEMDESC_FLAGS_GUEST_ALLOCATED; 184 185 return NV_OK; 186 } 187 188 /*! 189 * @brief Allocate and initialize a new empty memory descriptor 190 * 191 * Allocate a new memory descriptor. This allocates the memory descriptor 192 * only. memdescAlloc or memdescDescribe are later used to allocate or associate 193 * memory to the memory descriptor. 194 * 195 * This routine takes size and the physical contiguous of the future allocation 196 * in order to size the PTE array for non-contiguous requests. 197 * 198 * memdescDestroy should be called to free a memory descriptor. 199 * 200 * If MEMDESC_FLAGS_PRE_ALLOCATED is specified, use the memory descriptor 201 * supplied by the client instead of allocating a new one. 202 * 203 * @param[out] ppMemDesc Return pointer to new memory descriptor 204 * @param[in] pGpu 205 * @param[in] Size Size of memory descriptor in bytes. 206 * @param[in] PhysicallyContiguous Need physical contig or can it be scattered? 207 * @param[in] AddressSpace NV_ADDRESS_SPACE requested 208 * @param[in] CpuCacheAttrib CPU cacheability requested 209 * @param[in] Flags MEMDESC_FLAGS_* 210 * 211 * @returns NV_OK on success 212 */ 213 NV_STATUS 214 memdescCreate 215 ( 216 MEMORY_DESCRIPTOR **ppMemDesc, 217 OBJGPU *pGpu, 218 NvU64 Size, 219 NvU64 Alignment, 220 NvBool PhysicallyContiguous, 221 NV_ADDRESS_SPACE AddressSpace, 222 NvU32 CpuCacheAttrib, 223 NvU64 Flags 224 ) 225 { 226 MEMORY_DESCRIPTOR *pMemDesc; 227 NvU64 allocSize, MdSize, PageCount; 228 NvU32 gpuCacheAttrib = NV_MEMORY_UNCACHED; 229 NV_STATUS status = NV_OK; 230 231 232 allocSize = Size; 233 234 // 235 // this memdesc may have gotten forced to sysmem if no carveout, 236 // but for VPR it needs to be in vidmem, so check and re-direct here, 237 // unless running with zero-FB 238 // 239 if ((AddressSpace != ADDR_UNKNOWN) && 240 (Flags & MEMDESC_ALLOC_FLAGS_PROTECTED) && 241 (!pGpu->getProperty(pGpu, PDB_PROP_GPU_BROKEN_FB) || 242 gpuIsCacheOnlyModeEnabled(pGpu))) 243 { 244 AddressSpace = ADDR_FBMEM; 245 } 246 247 if (pGpu != NULL) 248 { 249 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 250 251 if (((AddressSpace == ADDR_SYSMEM) || (AddressSpace == ADDR_UNKNOWN)) && 252 !(Flags & MEMDESC_FLAGS_OWNED_BY_CTX_BUF_POOL)) 253 { 254 if (pMemoryManager && pMemoryManager->sysmemPageSize) 255 { 256 allocSize = RM_ALIGN_UP(allocSize, pMemoryManager->sysmemPageSize); 257 if (allocSize < Size) 258 { 259 return NV_ERR_INVALID_ARGUMENT; 260 } 261 } 262 } 263 264 if (RMCFG_FEATURE_PLATFORM_MODS || IsT194(pGpu) || IsT234(pGpu)) 265 { 266 if ( (AddressSpace == ADDR_FBMEM) && 267 !(Flags & MEMDESC_ALLOC_FLAGS_PROTECTED) && 268 memmgrGetUsableMemSizeMB_HAL(pGpu, pMemoryManager) == 0 && 269 gpuIsUnifiedMemorySpaceEnabled(pGpu)) 270 { 271 // On Tegra, force sysmem if carveout and SMMU are not available 272 AddressSpace = ADDR_SYSMEM; 273 if (pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_ALL_INST_IN_SYSMEM)) 274 { 275 CpuCacheAttrib = pGpu->instCacheOverride; 276 } 277 } 278 279 // 280 // Support for aligned contiguous SYSMEM allocations. 281 // 282 if ((AddressSpace == ADDR_SYSMEM || AddressSpace == ADDR_UNKNOWN) && 283 PhysicallyContiguous && (Alignment > RM_PAGE_SIZE)) 284 { 285 if (!portSafeAddU64(allocSize, (Alignment - RM_PAGE_SIZE), &allocSize)) 286 { 287 return NV_ERR_INVALID_ARGUMENT; 288 } 289 } 290 } 291 } 292 293 // 294 // 295 // Note that we allocate one extra PTE, since we don't know what the PteAdjust 296 // is yet; if the PteAdjust is zero, we simply won't use it. This is in the 297 // MEMORY_DESCRIPTOR structure definition. 298 // 299 // RM_PAGE_SIZE is 4k and RM_PAGE_SHIFT is 12, so shift operation can be 300 // modified from ((allocSize + RM_PAGE_SIZE-1) >> RM_PAGE_SHIFT) to below as 301 // (4k >> 12 = 1). This modification helps us to avoid overflow of variable 302 // allocSize, in case caller of this function passes highest value of NvU64. 303 // 304 // If allocSize is passed as 0, PageCount should be returned as 0. 305 // 306 if (allocSize == 0) 307 { 308 PageCount = 0; 309 } 310 else 311 { 312 PageCount = ((allocSize - 1) >> RM_PAGE_SHIFT) + 1; 313 } 314 315 if (PhysicallyContiguous) 316 { 317 MdSize = sizeof(MEMORY_DESCRIPTOR); 318 } 319 else 320 { 321 MdSize = sizeof(MEMORY_DESCRIPTOR) + 322 (sizeof(RmPhysAddr) * PageCount); 323 NV_ASSERT(MdSize <= 0xffffffffULL); 324 if (MdSize > 0xffffffffULL) 325 return NV_ERR_INSUFFICIENT_RESOURCES; 326 } 327 328 if (Flags & MEMDESC_FLAGS_PAGED_SYSMEM) 329 { 330 // The flag MEMDESC_FLAGS_PAGED_SYSMEM is only for Windows 331 return NV_ERR_NOT_SUPPORTED; 332 } 333 334 if (Flags & MEMDESC_FLAGS_PRE_ALLOCATED) 335 { 336 // Only fixed sized memDesc can be supported 337 if (PhysicallyContiguous == NV_FALSE) 338 { 339 return NV_ERR_BUFFER_TOO_SMALL; 340 } 341 342 NV_ASSERT_OR_RETURN(*ppMemDesc, NV_ERR_NOT_SUPPORTED); 343 344 pMemDesc = *ppMemDesc; 345 } 346 else 347 { 348 pMemDesc = portMemAllocNonPaged((NvU32)MdSize); 349 if (pMemDesc == NULL) 350 { 351 return NV_ERR_NO_MEMORY; 352 } 353 } 354 355 portMemSet(pMemDesc, 0, (NvU32)MdSize); 356 357 // Fill in initial non-zero parameters 358 pMemDesc->pGpu = pGpu; 359 pMemDesc->Size = Size; 360 pMemDesc->PageCount = PageCount; 361 pMemDesc->ActualSize = allocSize; 362 pMemDesc->_addressSpace = AddressSpace; 363 pMemDesc->RefCount = 1; 364 pMemDesc->DupCount = 1; 365 pMemDesc->_subDeviceAllocCount = 1; 366 pMemDesc->_flags = Flags; 367 pMemDesc->_gpuCacheAttrib = gpuCacheAttrib; 368 pMemDesc->_gpuP2PCacheAttrib = NV_MEMORY_UNCACHED; 369 pMemDesc->Alignment = Alignment; 370 pMemDesc->gfid = GPU_GFID_PF; 371 pMemDesc->bUsingSuballocator = NV_FALSE; 372 pMemDesc->bDeferredFree = NV_FALSE; 373 374 pMemDesc->libosRegionHandle = 0; 375 pMemDesc->baseVirtualAddress = 0; 376 377 // parameter to determine page granularity 378 pMemDesc->pageArrayGranularity = RM_PAGE_SIZE; 379 380 memdescSetCpuCacheAttrib(pMemDesc, CpuCacheAttrib); 381 382 // Set any additional flags 383 pMemDesc->_flags |= MEMDESC_FLAGS_KERNEL_MODE; 384 if (PhysicallyContiguous) 385 pMemDesc->_flags |= MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS; 386 else 387 pMemDesc->_flags &= ~MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS; 388 389 // OBJHEAP may not be created at this time and pMemDesc->pHeap may be NULL after this if-else 390 if (Flags & MEMDESC_FLAGS_OWNED_BY_CTX_BUF_POOL) 391 { 392 pMemDesc->_flags |= MEMDESC_FLAGS_OWNED_BY_CTX_BUF_POOL; 393 pMemDesc->_flags &= ~MEMDESC_FLAGS_OWNED_BY_CURRENT_DEVICE; 394 } 395 else if (Flags & MEMDESC_FLAGS_OWNED_BY_CURRENT_DEVICE) 396 { 397 NV_ASSERT_OK_OR_GOTO(status, _memdescSetSubAllocatorFlag(pGpu, pMemDesc, NV_TRUE), failed); 398 } 399 400 // In case of guest allocated memory, just initialize GFID 401 if (Flags & MEMDESC_FLAGS_GUEST_ALLOCATED) 402 { 403 NV_ASSERT_OK_OR_GOTO(status, _memdescSetGuestAllocatedFlag(pGpu, pMemDesc, NV_TRUE), failed); 404 } 405 406 failed: 407 if (status != NV_OK) 408 { 409 if (!(Flags & MEMDESC_FLAGS_PRE_ALLOCATED)) 410 { 411 portMemFree(pMemDesc); 412 } 413 } 414 else 415 { 416 *ppMemDesc = pMemDesc; 417 } 418 419 return status; 420 } 421 422 /*! 423 * @brief Initialize an caller allocated memory descriptor 424 * 425 * Helper to make it easier to get the memDesc **, and typically used 426 * with memdescDescribe. 427 * 428 * Only can be used for physically contiguous regions with a fixed 429 * size PTE array. 430 * 431 * memdescDestroy should be called to free a memory descriptor. 432 * 433 * If MEMDESC_FLAGS_PRE_ALLOCATED is specified, use the memory descriptor 434 * supplied by the client instead of allocating a new one. 435 * 436 * @param[out] pMemDesc Return pointer to new memory descriptor 437 * @param[in] pGpu 438 * @param[in] Size Size of memory descriptor in bytes 439 * @param[in] AddressSpace NV_ADDRESS_SPACE requested 440 * @param[in] CpuCacheAttrib CPU cacheability requested 441 * @param[in] Flags MEMDESC_FLAGS_* 442 * 443 * @returns void with no malloc there should be no failure cases 444 */ 445 void 446 memdescCreateExisting 447 ( 448 MEMORY_DESCRIPTOR *pMemDesc, 449 OBJGPU *pGpu, 450 NvU64 Size, 451 NV_ADDRESS_SPACE AddressSpace, 452 NvU32 CpuCacheAttrib, 453 NvU64 Flags 454 ) 455 { 456 NV_STATUS status; 457 status = memdescCreate(&pMemDesc, pGpu, Size, 0, NV_TRUE, AddressSpace, 458 CpuCacheAttrib, 459 Flags | MEMDESC_FLAGS_PRE_ALLOCATED | MEMDESC_FLAGS_SKIP_RESOURCE_COMPUTE); 460 NV_ASSERT(status == NV_OK); 461 } 462 463 464 /*! 465 * Increment ref count 466 */ 467 void memdescAddRef 468 ( 469 MEMORY_DESCRIPTOR *pMemDesc 470 ) 471 { 472 NV_ASSERT(pMemDesc != NULL); 473 ++(pMemDesc->RefCount); 474 } 475 476 /*! 477 * Decrement ref count 478 */ 479 void memdescRemoveRef 480 ( 481 MEMORY_DESCRIPTOR *pMemDesc 482 ) 483 { 484 NV_ASSERT_OR_RETURN_VOID(pMemDesc != NULL); 485 --(pMemDesc->RefCount); 486 } 487 488 // 489 // Destroy all IOMMU mappings under this memdesc, including child 490 // mappings for root memdescs. 491 // 492 // TODO: merge the new IOMMU paths with the SMMU paths (see bug 1625121). 493 // 494 static void 495 _memdescFreeIommuMappings(PMEMORY_DESCRIPTOR pMemDesc) 496 { 497 #if (RMCFG_FEATURE_PLATFORM_UNIX || RMCFG_FEATURE_PLATFORM_MODS) && !NVCPU_IS_ARM 498 PIOVAMAPPING pIovaMapping = pMemDesc->_pIommuMappings; 499 500 if (!pIovaMapping) 501 return; 502 503 if (memdescIsSubMemoryMemDesc(pMemDesc)) 504 { 505 iovaMappingDestroy(pIovaMapping); 506 return; 507 } 508 509 while (pIovaMapping) 510 { 511 PIOVAMAPPING pTmpIovaMapping = pIovaMapping->pNext; 512 iovaMappingDestroy(pIovaMapping); 513 pIovaMapping = pTmpIovaMapping; 514 } 515 516 pMemDesc->_pIommuMappings = NULL; 517 #endif 518 } 519 520 /*! 521 * Destroy a memory descriptor if last reference is released 522 * 523 * If the memory descriptor is down to one reference, we need 524 * to check with the bus code check if that reference needs 525 * to be reclaimed. 526 * 527 * @param[in] pMemDesc Memory descriptor to be destroyed 528 * 529 * @returns None 530 */ 531 void 532 memdescDestroy 533 ( 534 MEMORY_DESCRIPTOR *pMemDesc 535 ) 536 { 537 // Allow null frees 538 if (!pMemDesc) 539 { 540 return; 541 } 542 543 memdescRemoveRef(pMemDesc); 544 545 // if still more references are there for pMemDesc (pMemDesc->RefCount != 0), then bail out. 546 547 if (pMemDesc->RefCount == 0) 548 { 549 MEM_DESC_DESTROY_CALLBACK *pCb = memdescGetDestroyCallbackList(pMemDesc); 550 MEM_DESC_DESTROY_CALLBACK *pNext; 551 552 if (pMemDesc->_flags & MEMDESC_FLAGS_DUMMY_TOPLEVEL) 553 { 554 // When called from RmFreeFrameBuffer() and memdescFree could not do it because it is unallocated. 555 pMemDesc->_pNext = NULL; 556 pMemDesc->_subDeviceAllocCount = 1; 557 } 558 559 NV_ASSERT(pMemDesc->childDescriptorCnt == 0); 560 NV_ASSERT(pMemDesc->_addressSpace == ADDR_FBMEM || pMemDesc->pHeap == NULL); 561 562 // 563 // If there is private memdata, use the CB to free 564 // 565 if (pMemDesc->_pMemData && pMemDesc->_pMemDataReleaseCallback) 566 { 567 pMemDesc->_pMemDataReleaseCallback(pMemDesc); 568 } 569 570 if (pMemDesc->bDeferredFree) 571 { 572 memdescFree(pMemDesc); 573 } 574 else if (pMemDesc->Allocated != 0) 575 { 576 // 577 // The caller forgot to free the actual memory before destroying the memdesc. 578 // Please fix this by calling memdescFree(). 579 // To prevent memory leaks, we explicitly free here until its fixed elsewhere. 580 // 581 NV_PRINTF(LEVEL_ERROR, "Destroying unfreed memory %p\n", pMemDesc); 582 NV_PRINTF(LEVEL_ERROR, "Please call memdescFree()\n"); 583 memdescFree(pMemDesc); 584 NV_ASSERT(!pMemDesc->Allocated); 585 } 586 587 if (memdescGetStandbyBuffer(pMemDesc)) 588 { 589 memdescFree(memdescGetStandbyBuffer(pMemDesc)); 590 memdescDestroy(memdescGetStandbyBuffer(pMemDesc)); 591 memdescSetStandbyBuffer(pMemDesc, NULL); 592 } 593 594 // 595 // Submemory descriptors will be destroyed without going through a free 596 // path, so we need to make sure that we remove the IOMMU submapping 597 // here. For root descriptors, we should already have removed all the 598 // associated IOVA mappings. 599 // 600 // However, for memory descriptors that weren't allocated by the RM, 601 // (e.g., were created from a user allocation), we won't go through a 602 // free path at all. In this case, mappings for other GPUs may still be 603 // attached to this root memory descriptor, so release them now. 604 // 605 _memdescFreeIommuMappings(pMemDesc); 606 607 // Notify all interested parties of destruction 608 while (pCb) 609 { 610 pNext = pCb->pNext; 611 pCb->destroyCallback(pMemDesc->pGpu, pCb->pObject, pMemDesc); 612 // pCb is now invalid 613 pCb = pNext; 614 } 615 616 portMemFree(pMemDesc->pPteSpaMappings); 617 pMemDesc->pPteSpaMappings = NULL; 618 portMemFree(pMemDesc->pSubMemDescList); 619 pMemDesc->pSubMemDescList = NULL; 620 621 if (pMemDesc->_pParentDescriptor) 622 { 623 if ((pMemDesc->_flags & MEMDESC_FLAGS_PRE_ALLOCATED) == 0) 624 pMemDesc->_pParentDescriptor->childDescriptorCnt--; 625 memdescDestroy(pMemDesc->_pParentDescriptor); 626 pMemDesc->_pParentDescriptor = NULL; 627 } 628 629 // Verify memdesc is not top 630 NV_ASSERT(memdescHasSubDeviceMemDescs(pMemDesc) == NV_FALSE); 631 632 if ((pMemDesc->_flags & MEMDESC_FLAGS_PRE_ALLOCATED) == 0) 633 { 634 portMemFree(pMemDesc); 635 } 636 } 637 } 638 639 /*! 640 * @brief Function that frees subdevice memory descriptors. If there are no 641 * subdevice memory descriptors function just simply resets memdesc structure. 642 * Top level memory descriptor is not destroyed. 643 * 644 * @param[in,out] pMemDesc Top level memory descriptor. 645 * 646 * @returns None 647 */ 648 static void 649 _memSubDeviceFreeAndDestroy 650 ( 651 MEMORY_DESCRIPTOR *pMemDesc 652 ) 653 { 654 MEMORY_DESCRIPTOR *pSubDevMemDesc = pMemDesc->_pNext; 655 MEMORY_DESCRIPTOR *pNextMemDesc; 656 OBJGPU *pGpu = pMemDesc->pGpu; 657 NvBool bBcState; 658 659 // No subdevice memdescs 660 if (pSubDevMemDesc == NULL || pGpu == NULL) 661 { 662 return; 663 } 664 665 bBcState = gpumgrGetBcEnabledStatus(pGpu); 666 gpumgrSetBcEnabledStatus(pGpu, NV_FALSE); 667 668 do 669 { 670 pNextMemDesc = pSubDevMemDesc->_pNext; 671 pSubDevMemDesc->_pNext = NULL; 672 memdescFree(pSubDevMemDesc); 673 memdescDestroy(pSubDevMemDesc); 674 pSubDevMemDesc = pNextMemDesc; 675 } while (pSubDevMemDesc != NULL); 676 677 gpumgrSetBcEnabledStatus(pGpu, bBcState); 678 } 679 680 /*! 681 * @brief Lower memdesc allocation layer for the special case of allocation 682 in the VPR region when MODS is managing it. 683 * 684 * @param[in] pMemDesc Memory descriptor to allocate 685 * 686 * @returns NV_OK on successful allocation. 687 * NV_ERR_NOT_SUPPORTED if not supported 688 */ 689 static NV_STATUS 690 _memdescAllocVprRegion 691 ( 692 MEMORY_DESCRIPTOR *pMemDesc 693 ) 694 { 695 return NV_ERR_NOT_SUPPORTED; 696 } 697 698 /*! 699 * @brief Lower memdesc allocation layer. Provides underlying allocation 700 * functionality. 701 * 702 * @param[in,out] pMemDesc Memory descriptor to allocate 703 * 704 * @returns NV_OK on successful allocation. Various NV_ERR_GENERIC codes otherwise. 705 */ 706 static NV_STATUS 707 _memdescAllocInternal 708 ( 709 MEMORY_DESCRIPTOR *pMemDesc 710 ) 711 { 712 OBJGPU *pGpu = pMemDesc->pGpu; 713 NV_STATUS status = NV_OK; 714 FB_ALLOC_INFO *pFbAllocInfo = NULL; 715 FB_ALLOC_PAGE_FORMAT *pFbAllocPageFormat = NULL; 716 717 if (pMemDesc->Allocated) 718 { 719 NV_ASSERT(!pMemDesc->Allocated); 720 return NV_ERR_INVALID_OBJECT_BUFFER; 721 } 722 723 // Special case of an allocation request in MODS managed VPR region. 724 status = _memdescAllocVprRegion(pMemDesc); 725 if (status != NV_ERR_NOT_SUPPORTED) 726 goto done; 727 728 switch (pMemDesc->_addressSpace) 729 { 730 case ADDR_SYSMEM: 731 // System memory can be obtained from osAllocPages 732 status = osAllocPages(pMemDesc); 733 if (status != NV_OK) 734 { 735 goto done; 736 } 737 738 // 739 // The pages have been allocated, so mark the descriptor as 740 // allocated. The IOMMU-mapping code needs the memdesc to be 741 // allocated in order to create the mapping. 742 // 743 pMemDesc->Allocated = 1; 744 745 // 746 // TODO: merge new IOMMU paths with the SMMU paths below (see bug 747 // 1625121). For now they are parallel, and only one will be 748 // used. 749 // 750 if (!memdescGetFlag(pMemDesc, MEMDESC_FLAGS_CPU_ONLY) && 751 !memdescGetFlag(pMemDesc, MEMDESC_FLAGS_SKIP_IOMMU_MAPPING)) 752 { 753 status = memdescMapIommu(pMemDesc, pGpu->busInfo.iovaspaceId); 754 if (status != NV_OK) 755 { 756 pMemDesc->Allocated = 0; 757 osFreePages(pMemDesc); 758 goto done; 759 } 760 } 761 762 if (pMemDesc->_flags & MEMDESC_FLAGS_PROVIDE_IOMMU_MAP) 763 { 764 NV_PRINTF(LEVEL_ERROR, "SMMU mapping allocation is not supported for ARMv7.\n"); 765 NV_ASSERT(0); 766 status = NV_ERR_NOT_SUPPORTED; 767 goto done; 768 } 769 else if ((pMemDesc->_flags & MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS) && 770 RMCFG_FEATURE_PLATFORM_MODS) 771 { 772 if (pMemDesc->Alignment > RM_PAGE_SIZE) 773 { 774 RmPhysAddr addr = memdescGetPhysAddr(pMemDesc, AT_CPU, 0); 775 NvU64 offset; 776 777 NV_ASSERT((addr & (RM_PAGE_SIZE - 1)) == 0); 778 779 NV_ASSERT((pMemDesc->Alignment & (pMemDesc->Alignment - 1)) == 0); 780 offset = addr & (pMemDesc->Alignment - 1); 781 782 if (offset) 783 { 784 NV_ASSERT((pMemDesc->PageCount * pMemDesc->pageArrayGranularity - pMemDesc->Size) >= offset); 785 NV_ASSERT(pMemDesc->PteAdjust == 0); 786 pMemDesc->PteAdjust += NvU64_LO32(pMemDesc->Alignment - offset); 787 } 788 } 789 } 790 791 break; 792 case ADDR_EGM: 793 { 794 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 795 NvU64 i; 796 797 798 NV_ASSERT_OK_OR_GOTO(status, osAllocPages(pMemDesc), done); 799 800 pMemDesc->Allocated = NV_TRUE; 801 802 // 803 // EGM address in the GMMU PTE should be zero base address and so 804 // the EGM base address is subtracted from the allocated EGM 805 // address. HSHUB later adds the socket local EGM base system physical address(SPA) 806 // before sending the transaction to TH500. zero-base address is 807 // required in passthrough virtualization where guest physical address 808 // is programmed in the GMMU PTE and the SPA is available only in 809 // the HSHUB registers. 810 // 811 // zero-base address is possible only when the EGM base address is 812 // available. There are platforms(like off-DUT MODS and dGPU MODS) 813 // where this is not available and full address is populated in 814 // the GMMU PTE itself and HSHUB is not programmed to add the SPA. 815 // 816 if (pMemoryManager->localEgmBasePhysAddr != 0) 817 { 818 for (i = 0; i < memdescGetPteArraySize(pMemDesc, AT_GPU); i++) 819 { 820 RmPhysAddr addr = memdescGetPhysAddr(pMemDesc, AT_GPU, i * pMemDesc->pageArrayGranularity); 821 NV_ASSERT_TRUE_OR_GOTO(status, addr > pMemoryManager->localEgmBasePhysAddr, 822 NV_ERR_INVALID_STATE, done); 823 memdescSetPte(pMemDesc, AT_GPU, i, addr - pMemoryManager->localEgmBasePhysAddr); 824 NV_PRINTF(LEVEL_INFO, "EGM allocation. pte index: %lld addr: 0x%llx zero-base addr: 0x%llx\n", 825 i, addr, addr - pMemoryManager->localEgmBasePhysAddr); 826 } 827 } 828 829 if ((pMemDesc->_flags & MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS) && 830 RMCFG_FEATURE_PLATFORM_MODS) 831 { 832 if (pMemDesc->Alignment > RM_PAGE_SIZE) 833 { 834 RmPhysAddr addr = memdescGetPhysAddr(pMemDesc, AT_CPU, 0); 835 NvU64 offset; 836 837 NV_ASSERT((addr & (RM_PAGE_SIZE - 1)) == 0); 838 839 NV_ASSERT((pMemDesc->Alignment & (pMemDesc->Alignment - 1)) == 0); 840 offset = addr & (pMemDesc->Alignment - 1); 841 842 if (offset) 843 { 844 NV_ASSERT((pMemDesc->PageCount * pMemDesc->pageArrayGranularity - pMemDesc->Size) >= offset); 845 NV_ASSERT(pMemDesc->PteAdjust == 0); 846 pMemDesc->PteAdjust += NvU64_LO32(pMemDesc->Alignment - offset); 847 } 848 } 849 } 850 851 break; 852 } 853 case ADDR_FBMEM: 854 { 855 Heap *pHeap = pMemDesc->pHeap; 856 857 if (RMCFG_FEATURE_PMA && 858 (pMemDesc->_flags & MEMDESC_FLAGS_OWNED_BY_CTX_BUF_POOL)) 859 { 860 CTX_BUF_POOL_INFO *pCtxBufPool = NULL; 861 pCtxBufPool = memdescGetCtxBufPool(pMemDesc); 862 NV_ASSERT_TRUE_OR_GOTO(status, pCtxBufPool != NULL, NV_ERR_INVALID_STATE, done); 863 864 // If pool is setup then allocate from pool 865 NV_ASSERT_OK_OR_GOTO(status, ctxBufPoolAllocate(pCtxBufPool, pMemDesc), done); 866 } 867 else 868 { 869 // XXX Hack! 870 MEMORY_ALLOCATION_REQUEST allocRequest = {0}; 871 NV_MEMORY_ALLOCATION_PARAMS allocData = {0}; 872 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 873 const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig = 874 kmemsysGetStaticConfig(pGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu)); 875 NvU64 requestedSize = pMemDesc->Size; 876 877 allocRequest.pUserParams = &allocData; 878 879 // Don't allow FB allocations if FB is broken unless running in L2 cache only mode 880 if (pGpu->getProperty(pGpu, PDB_PROP_GPU_BROKEN_FB) && 881 !gpuIsCacheOnlyModeEnabled(pGpu)) 882 { 883 DBG_BREAKPOINT(); 884 status = NV_ERR_BROKEN_FB; 885 goto done; 886 } 887 888 allocData.owner = HEAP_OWNER_RM_CHANNEL_CTX_BUFFER; 889 allocData.type = NVOS32_TYPE_IMAGE; 890 allocData.flags = NVOS32_ALLOC_FLAGS_ALIGNMENT_FORCE; 891 892 // remove the "grows_down" flag when bReservedMemAtBottom is set so as to move RM memory to the bottom. 893 if (pMemorySystemConfig != NULL && !pMemorySystemConfig->bReservedMemAtBottom) 894 { 895 allocData.flags |= NVOS32_ALLOC_FLAGS_FORCE_MEM_GROWS_DOWN; 896 } 897 898 // Allocate in high priority memory? 899 if (pMemDesc->_flags & MEMDESC_FLAGS_HIGH_PRIORITY) 900 { 901 allocData.attr2 |= DRF_DEF(OS32, _ATTR2, _PRIORITY, _HIGH); 902 } 903 else if (pMemDesc->_flags & MEMDESC_FLAGS_LOW_PRIORITY) 904 { 905 allocData.attr2 |= DRF_DEF(OS32, _ATTR2, _PRIORITY, _LOW); 906 } 907 908 allocData.size = pMemDesc->Size; 909 allocData.alignment = pMemDesc->Alignment; 910 allocRequest.pMemDesc = pMemDesc; 911 912 if (pMemDesc->_pageSize == RM_PAGE_SIZE) 913 { 914 allocData.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _4KB); 915 } 916 else if (pMemDesc->_pageSize == RM_PAGE_SIZE_64K || 917 pMemDesc->_pageSize == RM_PAGE_SIZE_128K) 918 { 919 allocData.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _BIG); 920 } 921 922 allocData.flags |= pMemDesc->Alignment ? 923 NVOS32_ALLOC_FLAGS_ALIGNMENT_FORCE : 924 NVOS32_ALLOC_FLAGS_FORCE_ALIGN_HOST_PAGE; 925 926 if (pMemDesc->_flags & MEMDESC_FLAGS_FIXED_ADDRESS_ALLOCATE) 927 { 928 allocData.flags |= NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE; 929 allocData.offset = pMemDesc->_pteArray[0]; 930 } 931 932 if (pMemDesc->_gpuCacheAttrib == NV_MEMORY_CACHED) 933 { 934 allocData.attr2 |= DRF_DEF(OS32, _ATTR2, _GPU_CACHEABLE, _YES); 935 } 936 else 937 { 938 // Force internal allocations to uncached unless explicitly requested. 939 allocData.attr2 |= DRF_DEF(OS32, _ATTR2, _GPU_CACHEABLE, _NO); 940 } 941 942 allocData.attr2 = FLD_SET_DRF(OS32, _ATTR2, _INTERNAL, _YES, allocData.attr2); 943 944 if (pMemDesc->_flags & MEMDESC_ALLOC_FLAGS_PROTECTED) 945 { 946 allocData.flags |= NVOS32_ALLOC_FLAGS_PROTECTED; 947 } 948 949 // 950 // Assume all RM internal allocations to go into protected (CPR) 951 // video memory unless specified otherwise explicitly 952 // 953 if (gpuIsCCFeatureEnabled(pGpu)) 954 { 955 if (pMemDesc->_flags & MEMDESC_FLAGS_ALLOC_IN_UNPROTECTED_MEMORY) 956 { 957 // 958 // CC-TODO: Remove this check after non-CPR region is 959 // created. Not sure if RM will ever need to use non-CPR 960 // region for itself 961 // 962 NV_PRINTF(LEVEL_ERROR, "Non-CPR region still not created\n"); 963 NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT); 964 } 965 else 966 { 967 allocData.flags |= NVOS32_ALLOC_FLAGS_PROTECTED; 968 } 969 } 970 971 allocData.attr |= DRF_DEF(OS32, _ATTR, _PHYSICALITY, _CONTIGUOUS); 972 973 pFbAllocInfo = portMemAllocNonPaged(sizeof(FB_ALLOC_INFO)); 974 NV_ASSERT_TRUE_OR_GOTO(status, pFbAllocInfo != NULL, NV_ERR_NO_MEMORY, done); 975 976 pFbAllocPageFormat = portMemAllocNonPaged(sizeof(FB_ALLOC_PAGE_FORMAT)); 977 NV_ASSERT_TRUE_OR_GOTO(status, pFbAllocPageFormat != NULL, NV_ERR_NO_MEMORY, done); 978 979 portMemSet(pFbAllocInfo, 0, sizeof(FB_ALLOC_INFO)); 980 portMemSet(pFbAllocPageFormat, 0, sizeof(FB_ALLOC_PAGE_FORMAT)); 981 pFbAllocInfo->pageFormat = pFbAllocPageFormat; 982 983 memUtilsInitFBAllocInfo(&allocData, pFbAllocInfo, 0, 0); // Client/device N/A 984 985 status = memmgrAllocResources(pGpu, pMemoryManager, &allocRequest, pFbAllocInfo); 986 if (status != NV_OK) 987 goto done; 988 989 status = vidmemAllocResources(pGpu, pMemoryManager, &allocRequest, pFbAllocInfo, pHeap); 990 if (status != NV_OK) 991 goto done; 992 993 pMemDesc->Alignment = allocData.alignment; 994 995 // Update MemDesc GPU cacheability with results of allocation 996 if (DRF_VAL(OS32, _ATTR2, _GPU_CACHEABLE, allocData.attr2) == NVOS32_ATTR2_GPU_CACHEABLE_YES) 997 { 998 pMemDesc->_gpuCacheAttrib = NV_MEMORY_CACHED; 999 } 1000 else 1001 { 1002 pMemDesc->_gpuCacheAttrib = NV_MEMORY_UNCACHED; 1003 } 1004 1005 // 1006 // Adjust size to the requested size, not the heap rounded size. A number of callers 1007 // depend on this. In the future we will have the PageCount be accurate. 1008 // 1009 pMemDesc->Size = requestedSize; 1010 pMemDesc->PageCount = ((pMemDesc->Size + pMemDesc->PteAdjust + pMemDesc->pageArrayGranularity - 1) >> 1011 BIT_IDX_32(pMemDesc->pageArrayGranularity)); 1012 } 1013 // We now have the memory 1014 pMemDesc->Allocated = 1; 1015 1016 // If the allocation succeeds and if its PhysMemSubAlloc, increment the refCount 1017 if ((status == NV_OK) && (pHeap != NULL) && 1018 (pHeap->heapType == HEAP_TYPE_PHYS_MEM_SUBALLOCATOR)) 1019 { 1020 heapAddRef(pHeap); 1021 } 1022 break; 1023 } 1024 1025 default: 1026 // Don't know how to do any other types of memory yet 1027 DBG_BREAKPOINT(); 1028 status = NV_ERR_GENERIC; 1029 goto done; 1030 } 1031 1032 done: 1033 if (status == NV_OK) 1034 memdescPrintMemdesc(pMemDesc, NV_TRUE, MAKE_NV_PRINTF_STR("memdesc allocated")); 1035 1036 portMemFree(pFbAllocPageFormat); 1037 portMemFree(pFbAllocInfo); 1038 1039 return status; 1040 } 1041 1042 /*! 1043 * @brief Upper memdesc allocation layer. Provides support for per-subdevice 1044 * sysmem buffers and lockless sysmem allocation. 1045 * 1046 * @param[in,out] pMemDesc Memory descriptor to allocate 1047 * 1048 * @returns NV_OK on successful allocation. Various NV_ERR_GENERIC codes otherwise. 1049 */ 1050 NV_STATUS 1051 memdescAlloc 1052 ( 1053 MEMORY_DESCRIPTOR *pMemDesc 1054 ) 1055 { 1056 OBJGPU *pGpu = pMemDesc->pGpu; 1057 NV_STATUS status = NV_OK; 1058 NvBool bcState = NV_FALSE; 1059 OBJSYS *pSys = SYS_GET_INSTANCE(); 1060 NvBool reAcquire; 1061 NvU32 gpuMask = 0; 1062 1063 NV_ASSERT_OR_RETURN(!pMemDesc->Allocated, NV_ERR_INVALID_OBJECT_BUFFER); 1064 1065 switch (pMemDesc->_addressSpace) 1066 { 1067 case ADDR_SYSMEM: 1068 case ADDR_EGM: 1069 // Can't alloc sysmem on GSP firmware. 1070 if (RMCFG_FEATURE_PLATFORM_GSP && !memdescGetFlag(pMemDesc, MEMDESC_FLAGS_GUEST_ALLOCATED)) 1071 { 1072 // 1073 // TO DO: Make this an error once existing allocations are cleaned up. 1074 // After that pHeap selection can be moved to memdescAllocInternal() 1075 // 1076 NV_PRINTF(LEVEL_WARNING, 1077 "WARNING sysmem alloc on GSP firmware\n"); 1078 pMemDesc->_addressSpace = ADDR_FBMEM; 1079 pMemDesc->pHeap = GPU_GET_HEAP(pGpu); 1080 } 1081 // 1082 // If AMD SEV is enabled but CC or APM is not enabled on the GPU, 1083 // all RM and client allocations must to to unprotected sysmem. 1084 // So, we override any unprotected/protected flag set by either RM 1085 // or client. 1086 // If APM is enabled and RM is allocating sysmem for its internal use 1087 // use such memory has to be unprotected as protected sysmem is not 1088 // accessible to GPU 1089 // 1090 if ((sysGetStaticConfig(pSys))->bOsCCEnabled) 1091 { 1092 if (!gpuIsCCorApmFeatureEnabled(pGpu) || 1093 (gpuIsApmFeatureEnabled(pGpu) && 1094 !memdescGetFlag(pMemDesc, MEMDESC_FLAGS_SYSMEM_OWNED_BY_CLIENT))) 1095 { 1096 memdescSetFlag(pMemDesc, 1097 MEMDESC_FLAGS_ALLOC_IN_UNPROTECTED_MEMORY, NV_TRUE); 1098 } 1099 } 1100 else 1101 { 1102 // 1103 // This flag has no meaning on non-SEV systems. So, unset it. The 1104 // OS layer currently honours this flag irrespective of whether 1105 // SEV is enabled or not 1106 // 1107 memdescSetFlag(pMemDesc, 1108 MEMDESC_FLAGS_ALLOC_IN_UNPROTECTED_MEMORY, NV_FALSE); 1109 } 1110 1111 break; 1112 case ADDR_FBMEM: 1113 { 1114 // 1115 // When APM is enabled, all RM internal vidmem allocations go to 1116 // unprotected memory. There is an underlying assumption that 1117 // memdescAlloc won't be directly called in the client vidmem alloc 1118 // codepath. Note that memdescAlloc still gets called in the client 1119 // sysmem alloc codepath. See CONFCOMP-529 1120 // 1121 if (gpuIsApmFeatureEnabled(pGpu)) 1122 { 1123 memdescSetFlag(pMemDesc, 1124 MEMDESC_FLAGS_ALLOC_IN_UNPROTECTED_MEMORY, NV_TRUE); 1125 } 1126 // If FB is broken then don't allow the allocation, unless running in L2 cache only mode 1127 if (pGpu->getProperty(pGpu, PDB_PROP_GPU_BROKEN_FB) && 1128 !gpuIsCacheOnlyModeEnabled(pGpu)) 1129 { 1130 status = NV_ERR_BROKEN_FB; 1131 NV_PRINTF(LEVEL_ERROR, "Unsupported FB bound allocation on broken FB(0FB) platform\n"); 1132 DBG_BREAKPOINT(); 1133 } 1134 1135 NV_ASSERT(pMemDesc->pHeap == NULL); 1136 // Set the pHeap based on who owns this allocation 1137 if (pMemDesc->_flags & MEMDESC_FLAGS_OWNED_BY_CTX_BUF_POOL) 1138 { 1139 // 1140 // pHeap is not required in memdesc for ctx buf pools because each ctx buf 1141 // pool is tied to PMA and this pools is cached inside memdesc. 1142 // 1143 CTX_BUF_POOL_INFO *pCtxBufPool = memdescGetCtxBufPool(pMemDesc); 1144 NV_ASSERT_OR_RETURN(pCtxBufPool != NULL, NV_ERR_INVALID_STATE); 1145 } 1146 else if (pMemDesc->_flags & MEMDESC_FLAGS_OWNED_BY_CURRENT_DEVICE) 1147 { 1148 NvBool bForceSubheap = NV_FALSE; 1149 1150 if (pMemDesc->_flags & MEMDESC_FLAGS_FORCE_ALLOC_FROM_SUBHEAP) 1151 { 1152 bForceSubheap = NV_TRUE; 1153 } 1154 1155 pMemDesc->pHeap = memmgrGetDeviceSuballocator(GPU_GET_MEMORY_MANAGER(pGpu), bForceSubheap); 1156 } 1157 else if (GPU_GET_MEMORY_MANAGER(pGpu) != NULL && 1158 RMCFG_MODULE_HEAP && 1159 pMemDesc->_addressSpace == ADDR_FBMEM) 1160 { 1161 pMemDesc->pHeap = GPU_GET_HEAP(pGpu); 1162 } 1163 1164 break; 1165 } 1166 default: 1167 // Don't know how to do any other types of memory yet 1168 DBG_BREAKPOINT(); 1169 return NV_ERR_GENERIC; 1170 } 1171 1172 if (status != NV_OK) 1173 { 1174 return status; 1175 } 1176 1177 if (gpumgrGetBcEnabledStatus(pGpu)) 1178 { 1179 // Broadcast memdescAlloc call with flag set to allocate per subdevice. 1180 if (pMemDesc->_flags & MEMDESC_FLAGS_ALLOC_PER_SUBDEVICE) 1181 { 1182 NvU32 i; 1183 MEMORY_DESCRIPTOR *pSubDevMemDesc = pMemDesc; 1184 MEMORY_DESCRIPTOR *pPrev = pMemDesc; 1185 OBJGPU *pGpuChild; 1186 1187 pMemDesc->_subDeviceAllocCount = NumSubDevices(pGpu); 1188 1189 for (i = 0; i < pMemDesc->_subDeviceAllocCount; i++) 1190 { 1191 // Get pGpu for this subdeviceinst 1192 pGpuChild = gpumgrGetGpuFromSubDeviceInst(gpuGetDeviceInstance(pGpu), i); 1193 if (NULL == pGpuChild) 1194 { 1195 NV_ASSERT(0); 1196 status = NV_ERR_OBJECT_NOT_FOUND; 1197 goto subdeviceAlloc_failed; 1198 } 1199 1200 // 1201 // We are accessing the fields of the top level desc here directly without using the 1202 // accessor routines on purpose. 1203 // 1204 status = memdescCreate(&pSubDevMemDesc, pGpuChild, pMemDesc->Size, pMemDesc->Alignment, 1205 !!(pMemDesc->_flags & MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS), 1206 pMemDesc->_addressSpace, 1207 pMemDesc->_cpuCacheAttrib, 1208 pMemDesc->_flags & ~MEMDESC_FLAGS_ALLOC_PER_SUBDEVICE); 1209 1210 if (status != NV_OK) 1211 { 1212 NV_ASSERT(0); 1213 goto subdeviceAlloc_failed; 1214 } 1215 1216 pSubDevMemDesc->_gpuCacheAttrib = pMemDesc->_gpuCacheAttrib; 1217 pSubDevMemDesc->_pageSize = pMemDesc->_pageSize; 1218 1219 // Force broadcast state to false when allocating a subdevice memdesc 1220 gpumgrSetBcEnabledStatus(pGpuChild, NV_FALSE); 1221 1222 status = memdescAlloc(pSubDevMemDesc); 1223 1224 if (pMemDesc->_addressSpace == ADDR_FBMEM) 1225 { 1226 // 1227 // The top level memdesc could have flags that don't reflect contiguity which 1228 // is set after memdescAlloc. 1229 // 1230 pMemDesc->Alignment = pSubDevMemDesc->Alignment; 1231 pMemDesc->_flags = pSubDevMemDesc->_flags | MEMDESC_FLAGS_ALLOC_PER_SUBDEVICE; 1232 pMemDesc->ActualSize = pSubDevMemDesc->ActualSize; 1233 } 1234 1235 // Restore broadcast state to true after allocating a subdevice memdesc 1236 gpumgrSetBcEnabledStatus(pGpuChild, NV_TRUE); 1237 1238 if (status != NV_OK) 1239 { 1240 memdescDestroy(pSubDevMemDesc); 1241 NV_ASSERT(0); 1242 goto subdeviceAlloc_failed; 1243 } 1244 1245 // Check for similarity in allocations for previous allocated subdev with current allocated subdev. 1246 // If subdev0 ~ subdev1 && subdev1~subdev2 then subdev0 ~ subdev2 and so on...Thus can check symmetry across all subdev allocations 1247 if (i > 0) 1248 { 1249 NV_ASSERT(pPrev->Size == pSubDevMemDesc->Size); 1250 NV_ASSERT(pPrev->PteAdjust == pSubDevMemDesc->PteAdjust); 1251 NV_ASSERT(pPrev->_addressSpace == pSubDevMemDesc->_addressSpace); 1252 NV_ASSERT(pPrev->_flags == pSubDevMemDesc->_flags); 1253 NV_ASSERT(pPrev->_pteKind == pSubDevMemDesc->_pteKind); 1254 NV_ASSERT(pPrev->_pteKindCompressed == pSubDevMemDesc->_pteKindCompressed); 1255 NV_ASSERT(pPrev->pHeap != pSubDevMemDesc->pHeap); 1256 } 1257 1258 pPrev->_pNext = pSubDevMemDesc; 1259 pPrev = pSubDevMemDesc; 1260 } 1261 pMemDesc->Allocated = 1; 1262 return NV_OK; 1263 } 1264 else if (pMemDesc->_addressSpace == ADDR_FBMEM) 1265 { 1266 // Broadcast memdescAlloc call on vidmem *without* flag set to allocate per subdevice 1267 NV_ASSERT(0); 1268 } 1269 } 1270 1271 // Unicast memdescAlloc call but with flag set to allocate per subdevice. 1272 NV_ASSERT(!((pMemDesc->_flags & MEMDESC_FLAGS_ALLOC_PER_SUBDEVICE) && !gpumgrGetBcEnabledStatus(pGpu))); 1273 1274 reAcquire = NV_FALSE; 1275 bcState = NV_FALSE; 1276 1277 if ((pMemDesc->_flags & MEMDESC_FLAGS_LOCKLESS_SYSMEM_ALLOC) && (pMemDesc->_addressSpace != ADDR_FBMEM)) 1278 { 1279 bcState = gpumgrGetBcEnabledStatus(pGpu); 1280 if (RMCFG_FEATURE_RM_BASIC_LOCK_MODEL) 1281 { 1282 // 1283 // There is no equivalent routine for osCondReleaseRmSema in 1284 // the new basic lock model. 1285 1286 // 1287 // However, we can't drop the RM system semaphore in this 1288 // path because on non-windows platforms (i.e. MODS) it 1289 // has undesirable consequences. So for now we must 1290 // bracket this section with a reference to the feature 1291 // flag until we can rework this interface. 1292 // 1293 // 1294 // Check to make sure we own the lock and that we are 1295 // not at elevated IRQL; this models the behavior 1296 // of osCondReleaseRmSema. 1297 // 1298 if (!osIsRaisedIRQL() && 1299 (rmGpuGroupLockIsOwner(pGpu->gpuInstance, GPU_LOCK_GRP_DEVICE, &gpuMask) || 1300 rmGpuGroupLockIsOwner(pGpu->gpuInstance, GPU_LOCK_GRP_SUBDEVICE, &gpuMask))) 1301 { 1302 // 1303 // Release all owned gpu locks rather than just the 1304 // device-related locks because the caller may be holding more 1305 // than the required device locks. All currently owned 1306 // locks will be re-acquired before returning. 1307 // 1308 // This prevents potential GPU locking violations (e.g., if the 1309 // caller is holding all the gpu locks but only releases the 1310 // first of two device locks, then attempting to re-acquire 1311 // the first device lock will be a locking violation with 1312 // respect to the second device lock.) 1313 // 1314 gpuMask = rmGpuLocksGetOwnedMask(); 1315 rmGpuGroupLockRelease(gpuMask, GPUS_LOCK_FLAGS_NONE); 1316 reAcquire = NV_TRUE; 1317 } 1318 } 1319 else 1320 { 1321 reAcquire = osCondReleaseRmSema(pSys->pSema); 1322 } 1323 } 1324 1325 // Actually allocate the memory 1326 NV_CHECK_OK(status, LEVEL_ERROR, _memdescAllocInternal(pMemDesc)); 1327 1328 if (status != NV_OK) 1329 { 1330 pMemDesc->pHeap = NULL; 1331 } 1332 1333 if (reAcquire) 1334 { 1335 if (osAcquireRmSema(pSys->pSema) != NV_OK) 1336 { 1337 DBG_BREAKPOINT(); 1338 1339 } 1340 1341 if (rmGpuGroupLockAcquire(pGpu->gpuInstance, GPU_LOCK_GRP_MASK, 1342 GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_MEM, 1343 &gpuMask) != NV_OK) 1344 { 1345 DBG_BREAKPOINT(); 1346 } 1347 // Releasing the semaphore allows another thread to enter RM and 1348 // modify broadcast state. We need to set it back (see bug 368643) 1349 gpumgrSetBcEnabledStatus(pGpu, bcState); 1350 } 1351 1352 return status; 1353 1354 subdeviceAlloc_failed: 1355 _memSubDeviceFreeAndDestroy(pMemDesc); 1356 pMemDesc->_subDeviceAllocCount = 1; 1357 pMemDesc->_pNext = NULL; 1358 return status; 1359 } 1360 1361 /*! 1362 * Allocate memory from one of the possible locations specified in pList. 1363 * 1364 * @param[in,out] pMemDesc Memory descriptor to allocate 1365 * @param[in] pList List of NV_ADDRESS_SPACE values. Terminated 1366 * by an ADDR_UNKNOWN entry. 1367 * 1368 * @returns NV_OK on successful allocation. Various NV_ERR_GENERIC codes otherwise. 1369 */ 1370 NV_STATUS 1371 memdescAllocList 1372 ( 1373 MEMORY_DESCRIPTOR *pMemDesc, 1374 const NV_ADDRESS_SPACE *pList 1375 ) 1376 { 1377 NV_STATUS status = NV_ERR_INVALID_ARGUMENT; 1378 NvU32 i = 0; 1379 1380 if (!pList) 1381 { 1382 return status; 1383 } 1384 1385 // 1386 // this memdesc may have gotten forced to sysmem if no carveout, 1387 // but for VPR it needs to be in vidmem, so check and re-direct here 1388 // 1389 if (pMemDesc->_flags & MEMDESC_ALLOC_FLAGS_PROTECTED) 1390 { 1391 OBJGPU *pGpu = pMemDesc->pGpu; 1392 1393 // Only force to vidmem if not running with zero-FB. 1394 if (!pGpu->getProperty(pGpu, PDB_PROP_GPU_BROKEN_FB) || 1395 gpuIsCacheOnlyModeEnabled(pGpu)) 1396 { 1397 pList = ADDRLIST_FBMEM_ONLY; 1398 } 1399 } 1400 1401 while (pList[i] != ADDR_UNKNOWN) 1402 { 1403 pMemDesc->_addressSpace = pList[i]; 1404 status = memdescAlloc(pMemDesc); 1405 1406 if (status == NV_OK) 1407 { 1408 return status; 1409 } 1410 1411 i++; 1412 } 1413 1414 return status; 1415 } 1416 1417 /*! 1418 * @brief Lower memdesc free layer. Provides underlying free 1419 * functionality. 1420 * 1421 * @param[in,out] pMemDesc Memory descriptor to free 1422 * 1423 * @returns None 1424 */ 1425 static void 1426 _memdescFreeInternal 1427 ( 1428 MEMORY_DESCRIPTOR *pMemDesc 1429 ) 1430 { 1431 MEM_DESC_DESTROY_CALLBACK *pCb, *pNext; 1432 NvU64 oldSize; 1433 1434 // Allow null frees 1435 if (!pMemDesc) 1436 { 1437 return; 1438 } 1439 1440 pCb = memdescGetDestroyCallbackList(pMemDesc); 1441 1442 // Notify all interested parties of destruction 1443 while (pCb) 1444 { 1445 pNext = pCb->pNext; 1446 pCb->destroyCallback(pMemDesc->pGpu, pCb->pObject, pMemDesc); 1447 // pCb is now invalid 1448 pCb = pNext; 1449 } 1450 1451 if (memdescHasSubDeviceMemDescs(pMemDesc)) 1452 return; 1453 1454 memdescPrintMemdesc(pMemDesc, NV_FALSE, MAKE_NV_PRINTF_STR("memdesc being freed")); 1455 1456 // Bail our early in case this memdesc describes a MODS managed VPR region. 1457 if (memdescGetFlag(pMemDesc, MEMDESC_FLAGS_VPR_REGION_CLIENT_MANAGED)) 1458 return; 1459 1460 switch (pMemDesc->_addressSpace) 1461 { 1462 case ADDR_SYSMEM: 1463 case ADDR_EGM: 1464 // invalidate if memory is cached in FB L2 cache. 1465 if (pMemDesc->_gpuCacheAttrib == NV_MEMORY_CACHED) 1466 { 1467 OBJGPU *pGpu = pMemDesc->pGpu; 1468 1469 // 1470 // If this memdesc managed to outlive its pGpu getting detached, 1471 // we're plenty off the rails already, but avoid using the pGpu 1472 // and carry on as best we can 1473 // 1474 if (gpumgrIsGpuPointerValid(pGpu)) 1475 { 1476 SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY) 1477 { 1478 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu); 1479 NV_ASSERT_OK(kmemsysCacheOp_HAL(pGpu, pKernelMemorySystem, pMemDesc, 1480 FB_CACHE_SYSTEM_MEMORY, 1481 FB_CACHE_INVALIDATE)); 1482 } 1483 SLI_LOOP_END 1484 } 1485 else 1486 { 1487 NV_ASSERT_FAILED("Sysmemdesc outlived its attached pGpu"); 1488 } 1489 } 1490 1491 oldSize = pMemDesc->Size; 1492 pMemDesc->Size = pMemDesc->ActualSize; 1493 pMemDesc->PageCount = ((pMemDesc->ActualSize + pMemDesc->pageArrayGranularity - 1) >> BIT_IDX_64(pMemDesc->pageArrayGranularity)); 1494 1495 osFreePages(pMemDesc); 1496 1497 pMemDesc->Size = oldSize; 1498 pMemDesc->PageCount = ((oldSize + pMemDesc->pageArrayGranularity - 1) >> BIT_IDX_64(pMemDesc->pageArrayGranularity)); 1499 1500 break; 1501 1502 case ADDR_FBMEM: 1503 { 1504 Heap *pHeap = pMemDesc->pHeap; 1505 NV_STATUS status = NV_OK; 1506 OBJGPU *pGpu = pMemDesc->pGpu; 1507 1508 if (RMCFG_FEATURE_PMA && 1509 (pMemDesc->_flags & MEMDESC_FLAGS_OWNED_BY_CTX_BUF_POOL)) 1510 { 1511 CTX_BUF_POOL_INFO *pCtxBufPool = memdescGetCtxBufPool(pMemDesc); 1512 if (pCtxBufPool == NULL) 1513 { 1514 DBG_BREAKPOINT(); 1515 NV_PRINTF(LEVEL_ERROR, "ctx buf pool not found\n"); 1516 return; 1517 } 1518 NV_STATUS status = ctxBufPoolFree(pCtxBufPool, pMemDesc); 1519 if (status != NV_OK) 1520 { 1521 DBG_BREAKPOINT(); 1522 NV_PRINTF(LEVEL_ERROR, "Failed to free memdesc from context buffer pool\n"); 1523 } 1524 } 1525 else 1526 { 1527 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 1528 1529 NV_ASSERT(pHeap != NULL); 1530 1531 if (!pHeap) 1532 return; 1533 1534 status = memmgrFree(pGpu, pMemoryManager, pHeap, 0x0, 0x0, 0x0, 1535 HEAP_OWNER_RM_CHANNEL_CTX_BUFFER, 1536 pMemDesc); 1537 NV_ASSERT(status == NV_OK); 1538 } 1539 1540 // If this heap is being used to manage PMSA memory, reduce the refcount accordingly 1541 if ((status == NV_OK) && (pHeap != NULL) && 1542 (pHeap->heapType == HEAP_TYPE_PHYS_MEM_SUBALLOCATOR)) 1543 { 1544 heapRemoveRef(pHeap); 1545 } 1546 break; 1547 } 1548 1549 default: 1550 // Don't know how to do any other types of memory yet 1551 DBG_BREAKPOINT(); 1552 } 1553 } 1554 1555 /*! 1556 * @brief Upper memdesc free layer. Provides support for per-subdevice 1557 * sysmem buffers and lockless sysmem allocation. Because of SLI and subdevice 1558 * submem allocations (refer to submem chart) support, if memory has never 1559 * been allocated function will just unlink subdevice structure and destroy 1560 * subdevice descriptors. 1561 * 1562 * @param[in,out] pMemDesc Memory descriptor to free 1563 * 1564 * @returns None 1565 */ 1566 void 1567 memdescFree 1568 ( 1569 MEMORY_DESCRIPTOR *pMemDesc 1570 ) 1571 { 1572 // Allow null frees 1573 if (!pMemDesc) 1574 { 1575 return; 1576 } 1577 1578 1579 if (memdescIsSubMemoryMemDesc(pMemDesc)) 1580 { 1581 NV_ASSERT(!pMemDesc->_pInternalMapping); 1582 1583 if (pMemDesc->_addressSpace == ADDR_SYSMEM) 1584 { 1585 // The memdesc is being freed so destroy all of its IOMMU mappings. 1586 _memdescFreeIommuMappings(pMemDesc); 1587 } 1588 1589 if (pMemDesc->_addressSpace != ADDR_FBMEM && 1590 pMemDesc->_addressSpace != ADDR_SYSMEM) 1591 { 1592 return; 1593 } 1594 1595 _memSubDeviceFreeAndDestroy(pMemDesc); 1596 } 1597 else 1598 { 1599 // 1600 // In case RM attempts to free memory that has more than 1 refcount, the free is deferred until refcount reaches 0 1601 // 1602 // Bug 3307574 RM crashes when client's specify sysmem UserD location. 1603 // RM attempts to peek at the client allocated UserD when waiting for a channel to go idle. 1604 // 1605 if (pMemDesc->RefCount > 1 && pMemDesc->Allocated == 1) 1606 { 1607 pMemDesc->bDeferredFree = NV_TRUE; 1608 return; 1609 } 1610 1611 if (!pMemDesc->Allocated) 1612 { 1613 /* 1614 * For sysmem not allocated by RM but only registered to it, we 1615 * would need to update the shared sysmem pfn bitmap here 1616 */ 1617 return; 1618 } 1619 pMemDesc->Allocated--; 1620 if (0 != pMemDesc->Allocated) 1621 { 1622 return; 1623 } 1624 1625 // If standbyBuffer memory was allocated then free it 1626 if (pMemDesc->_pStandbyBuffer) 1627 { 1628 memdescFree(pMemDesc->_pStandbyBuffer); 1629 memdescDestroy(pMemDesc->_pStandbyBuffer); 1630 pMemDesc->_pStandbyBuffer = NULL; 1631 } 1632 1633 NV_ASSERT(!pMemDesc->_pInternalMapping); 1634 1635 if (pMemDesc->_addressSpace == ADDR_SYSMEM) 1636 { 1637 // The memdesc is being freed so destroy all of its IOMMU mappings. 1638 _memdescFreeIommuMappings(pMemDesc); 1639 } 1640 1641 if (pMemDesc->_addressSpace != ADDR_FBMEM && 1642 pMemDesc->_addressSpace != ADDR_EGM && 1643 pMemDesc->_addressSpace != ADDR_SYSMEM) 1644 { 1645 return; 1646 } 1647 1648 _memSubDeviceFreeAndDestroy(pMemDesc); 1649 1650 _memdescFreeInternal(pMemDesc); 1651 } 1652 1653 // Reset tracking state 1654 pMemDesc->_pNext = NULL; 1655 pMemDesc->_subDeviceAllocCount = 1; 1656 1657 // 1658 // Reset tracking state of parent 1659 // Why it is needed: 1660 // When a submemory toplevel memdesc with subdevices is freed, 1661 // the subdecice memdescs and their parent are destroyed or their 1662 // refcount decreased. 1663 // When the parent subdevice descriptors are destroyed, their 1664 // top level descriptor is left alone and has a dangling 1665 // _pNext pointer 1666 // 1667 if ((pMemDesc->_pParentDescriptor != NULL) && 1668 (memdescHasSubDeviceMemDescs(pMemDesc->_pParentDescriptor)) && 1669 (pMemDesc->_pParentDescriptor->RefCount == 1)) 1670 { 1671 pMemDesc->_pParentDescriptor->_pNext = NULL; 1672 pMemDesc->_pParentDescriptor->_subDeviceAllocCount = 1; 1673 } 1674 } 1675 1676 /*! 1677 * @brief Lock the paged virtual memory descripted by the memory descriptor 1678 * 1679 * @param[in] pMemDesc Memory descriptor to lock 1680 * 1681 * @returns NV_OK on successful allocation. Various NV_ERR_GENERIC codes otherwise. 1682 */ 1683 NV_STATUS 1684 memdescLock 1685 ( 1686 MEMORY_DESCRIPTOR *pMemDesc 1687 ) 1688 { 1689 1690 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 1691 if (!(pMemDesc->_flags & MEMDESC_FLAGS_PAGED_SYSMEM)) 1692 { 1693 return NV_ERR_ILLEGAL_ACTION; 1694 } 1695 1696 return osLockMem(pMemDesc); 1697 } 1698 1699 /*! 1700 * @brief Unlock the paged virtual memory descripted by the memory descriptor 1701 * 1702 * @param[in] pMemDesc Memory descriptor to unlock 1703 * 1704 * @returns NV_OK on successful allocation. Various NV_ERR_GENERIC codes otherwise. 1705 */ 1706 NV_STATUS 1707 memdescUnlock 1708 ( 1709 MEMORY_DESCRIPTOR *pMemDesc 1710 ) 1711 { 1712 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 1713 if (!(pMemDesc->_flags & MEMDESC_FLAGS_PAGED_SYSMEM)) 1714 { 1715 return NV_ERR_ILLEGAL_ACTION; 1716 } 1717 1718 return osUnlockMem(pMemDesc); 1719 } 1720 1721 /*! 1722 * @brief Get a CPU mapping to the memory described by a memory descriptor 1723 * 1724 * This is for memory descriptors used by RM clients, not by the RM itself. 1725 * For internal mappings the busMapRmAperture() hal routines are used. 1726 * 1727 * @param[in] pMemDesc Memory descriptor to map 1728 * @param[in] Offset Offset into memory descriptor to start map 1729 * @param[in] Size Size of mapping 1730 * @param[in] Kernel Kernel or user address space 1731 * @param[in] Protect NV_PROTECT_* 1732 * @param[out] pAddress Return address 1733 * @param[out] pPriv Return cookie to be passed back to memdescUnmap 1734 * 1735 * @returns NV_STATUS 1736 */ 1737 1738 NV_STATUS 1739 memdescMapOld 1740 ( 1741 MEMORY_DESCRIPTOR *pMemDesc, 1742 NvU64 Offset, 1743 NvU64 Size, 1744 NvBool Kernel, 1745 NvU32 Protect, 1746 void **pAddress, 1747 void **pPriv 1748 ) 1749 { 1750 NvP64 pAddressP64 = NV_PTR_TO_NvP64(*pAddress); 1751 NvP64 pPrivP64 = NV_PTR_TO_NvP64(*pPriv); 1752 NV_STATUS status; 1753 1754 #if !defined(NV_64_BITS) 1755 NV_ASSERT(Kernel); 1756 #endif 1757 1758 status = memdescMap(pMemDesc, 1759 Offset, 1760 Size, 1761 Kernel, 1762 Protect, 1763 &pAddressP64, 1764 &pPrivP64); 1765 1766 *pAddress = NvP64_VALUE(pAddressP64); 1767 *pPriv = NvP64_VALUE(pPrivP64); 1768 1769 return status; 1770 } 1771 1772 NV_STATUS 1773 memdescMap 1774 ( 1775 MEMORY_DESCRIPTOR *pMemDesc, 1776 NvU64 Offset, 1777 NvU64 Size, 1778 NvBool Kernel, 1779 NvU32 Protect, 1780 NvP64 *pAddress, 1781 NvP64 *pPriv 1782 ) 1783 { 1784 NV_STATUS status = NV_OK; 1785 NvU64 rootOffset = 0; 1786 1787 NV_ASSERT_OR_RETURN(((Offset + Size) <= memdescGetSize(pMemDesc)), NV_ERR_INVALID_ARGUMENT); 1788 1789 pMemDesc = memdescGetRootMemDesc(pMemDesc, &rootOffset); 1790 Offset += rootOffset; 1791 1792 if (pMemDesc->PteAdjust && 1793 (pMemDesc->Alignment > RM_PAGE_SIZE) && 1794 (pMemDesc->_flags & MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS) && 1795 RMCFG_FEATURE_PLATFORM_MODS) 1796 { 1797 Offset += pMemDesc->PteAdjust; 1798 } 1799 1800 NV_ASSERT_OR_RETURN(!memdescHasSubDeviceMemDescs(pMemDesc), NV_ERR_INVALID_OBJECT_BUFFER); 1801 1802 switch (pMemDesc->_addressSpace) 1803 { 1804 case ADDR_SYSMEM: 1805 case ADDR_EGM: 1806 { 1807 status = osMapSystemMemory(pMemDesc, Offset, Size, 1808 Kernel, Protect, pAddress, pPriv); 1809 if (status != NV_OK) 1810 { 1811 return status; 1812 } 1813 break; 1814 } 1815 1816 case ADDR_FBMEM: 1817 { 1818 OBJGPU *pGpu = pMemDesc->pGpu; 1819 NvU32 mode = NV_MEMORY_WRITECOMBINED; 1820 KernelBus *pKernelBus; 1821 FB_MAPPING_INFO *pMapping; 1822 RmPhysAddr bar1PhysAddr; 1823 NvBool bCoherentCpuMapping; 1824 1825 NV_ASSERT_OR_RETURN(pGpu != NULL, NV_ERR_INVALID_ARGUMENT); 1826 1827 pKernelBus = GPU_GET_KERNEL_BUS(pGpu); 1828 bCoherentCpuMapping = pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING); 1829 1830 // Need struct to keep track of the info for this mapping 1831 pMapping = portMemAllocNonPaged(sizeof(FB_MAPPING_INFO)); 1832 if (pMapping == NULL) 1833 { 1834 return NV_ERR_NO_MEMORY; 1835 } 1836 1837 if (bCoherentCpuMapping) 1838 { 1839 NV_ASSERT(pMemDesc->_flags & MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS); 1840 1841 if (Kernel) 1842 { 1843 NvP64 tempCpuPtr = kbusMapCoherentCpuMapping_HAL(pGpu, pKernelBus, pMemDesc); 1844 if (tempCpuPtr == NULL) 1845 { 1846 status = NV_ERR_GENERIC; 1847 } 1848 else 1849 { 1850 status = NV_OK; 1851 tempCpuPtr = NvP64_PLUS_OFFSET(tempCpuPtr, Offset); 1852 } 1853 *pAddress = tempCpuPtr; 1854 } 1855 else 1856 { 1857 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu); 1858 NvU64 fbOffset = pMemDesc->_pteArray[0] + 1859 pMemDesc->PteAdjust + Offset; 1860 bar1PhysAddr = pKernelMemorySystem->coherentCpuFbBase + fbOffset; 1861 mode = NV_MEMORY_CACHED; 1862 1863 status = osMapPciMemoryUser(pGpu->pOsGpuInfo, bar1PhysAddr, 1864 Size, Protect, pAddress, 1865 &pMapping->pPriv, 1866 mode); 1867 } 1868 1869 1870 if (status != NV_OK) 1871 { 1872 portMemFree(pMapping); 1873 return status; 1874 } 1875 1876 NV_PRINTF(LEVEL_INFO, "Allocating coherent link mapping. VA: %p PA: 0x%llx size: 0x%llx\n", 1877 NvP64_VALUE(*pAddress), 1878 memdescGetPhysAddr(pMemDesc, AT_GPU, Offset), Size); 1879 1880 *pPriv = NV_PTR_TO_NvP64(pMapping); 1881 break; 1882 } 1883 1884 // Determine where in BAR1 the mapping will go 1885 pMapping->FbApertureLen = Size; 1886 status = kbusMapFbAperture_HAL(pGpu, pKernelBus, 1887 pMemDesc, Offset, 1888 &pMapping->FbAperture, 1889 &pMapping->FbApertureLen, 1890 BUS_MAP_FB_FLAGS_MAP_UNICAST, 1891 NULL); 1892 if (status != NV_OK) 1893 { 1894 portMemFree(pMapping); 1895 return status; 1896 } 1897 1898 bar1PhysAddr = gpumgrGetGpuPhysFbAddr(pGpu) + pMapping->FbAperture; 1899 mode = NV_MEMORY_WRITECOMBINED; 1900 1901 // Create the mapping 1902 if (Kernel) 1903 { 1904 status = osMapPciMemoryKernel64(pGpu, bar1PhysAddr, 1905 Size, Protect, pAddress, 1906 mode); 1907 } 1908 else 1909 { 1910 status = osMapPciMemoryUser(pGpu->pOsGpuInfo, bar1PhysAddr, 1911 Size, Protect, pAddress, 1912 &pMapping->pPriv, 1913 mode); 1914 } 1915 1916 if (status != NV_OK) 1917 { 1918 if (!bCoherentCpuMapping) 1919 { 1920 kbusUnmapFbAperture_HAL(pGpu, pKernelBus, pMemDesc, 1921 pMapping->FbAperture, 1922 pMapping->FbApertureLen, 1923 BUS_MAP_FB_FLAGS_MAP_UNICAST); 1924 } 1925 portMemFree(pMapping); 1926 return status; 1927 } 1928 1929 *pPriv = NV_PTR_TO_NvP64(pMapping); 1930 break; 1931 } 1932 1933 default: 1934 // Don't know how to do any other types of memory yet 1935 DBG_BREAKPOINT(); 1936 return NV_ERR_GENERIC; 1937 } 1938 return NV_OK; 1939 } 1940 void 1941 memdescUnmapOld 1942 ( 1943 MEMORY_DESCRIPTOR *pMemDesc, 1944 NvBool Kernel, 1945 NvU32 ProcessId, 1946 void *Address, 1947 void *Priv 1948 ) 1949 { 1950 memdescUnmap(pMemDesc, 1951 Kernel, 1952 ProcessId, 1953 NV_PTR_TO_NvP64(Address), 1954 NV_PTR_TO_NvP64(Priv)); 1955 } 1956 1957 /*! 1958 * @brief Remove a mapping for the memory descriptor, reversing memdescMap 1959 * 1960 * @param[in] pMemDesc Memory descriptor to unmap 1961 * @param[in] Kernel Kernel or user address space 1962 * @param[in] ProcessId Process ID if user space 1963 * @param[in] Address Mapped address 1964 * @param[in] Priv Return priv cookie from memdescMap 1965 * 1966 * @returns None 1967 */ 1968 void 1969 memdescUnmap 1970 ( 1971 MEMORY_DESCRIPTOR *pMemDesc, 1972 NvBool Kernel, 1973 NvU32 ProcessId, 1974 NvP64 Address, 1975 NvP64 Priv 1976 ) 1977 { 1978 // Allow null unmaps 1979 if (!Address) 1980 return; 1981 1982 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 1983 1984 1985 // find first allocated parent descriptor 1986 while (!pMemDesc->Allocated && pMemDesc->_pParentDescriptor) 1987 { 1988 pMemDesc = pMemDesc->_pParentDescriptor; 1989 } 1990 1991 switch (pMemDesc->_addressSpace) 1992 { 1993 case ADDR_SYSMEM: 1994 { 1995 osUnmapSystemMemory(pMemDesc, Kernel, ProcessId, Address, Priv); 1996 break; 1997 } 1998 1999 case ADDR_FBMEM: 2000 { 2001 OBJGPU *pGpu = pMemDesc->pGpu; 2002 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu); 2003 FB_MAPPING_INFO *pMapping = (FB_MAPPING_INFO *)NvP64_VALUE(Priv); 2004 NvBool bCoherentCpuMapping = pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING); 2005 NvU64 Size = pMapping->FbApertureLen; 2006 2007 NV_ASSERT(!(pMemDesc->_flags & MEMDESC_FLAGS_CPU_ONLY)); 2008 2009 if (bCoherentCpuMapping) 2010 { 2011 if (Kernel) 2012 { 2013 kbusUnmapCoherentCpuMapping_HAL(pGpu, pKernelBus, pMemDesc); 2014 } 2015 else 2016 { 2017 osUnmapPciMemoryUser(pGpu->pOsGpuInfo, Address, Size, 2018 pMapping->pPriv); 2019 } 2020 2021 portMemFree(pMapping); 2022 break; 2023 } 2024 2025 kbusUnmapFbAperture_HAL(pGpu, pKernelBus, pMemDesc, 2026 pMapping->FbAperture, 2027 Size, 2028 BUS_MAP_FB_FLAGS_MAP_UNICAST); 2029 if (Kernel) 2030 { 2031 osUnmapPciMemoryKernel64(pGpu, Address); 2032 } 2033 else 2034 { 2035 osUnmapPciMemoryUser(pGpu->pOsGpuInfo, Address, Size, 2036 pMapping->pPriv); 2037 } 2038 2039 portMemFree(pMapping); 2040 break; 2041 } 2042 2043 default: 2044 // Don't know how to do any other types of memory yet 2045 DBG_BREAKPOINT(); 2046 } 2047 } 2048 2049 typedef enum 2050 { 2051 MEMDESC_MAP_INTERNAL_TYPE_GSP, // On GSP, use a pre-existing mapping 2052 MEMDESC_MAP_INTERNAL_TYPE_COHERENT_FBMEM, // For NVLINK, use a pre-existing mapping for fbmem 2053 MEMDESC_MAP_INTERNAL_TYPE_BAR2, // Use BAR2 (fbmem or reflected sysmem) 2054 MEMDESC_MAP_INTERNAL_TYPE_SYSMEM_DIRECT, // Use OS to map sysmem 2055 } MEMDESC_MAP_INTERNAL_TYPE; 2056 2057 static MEMDESC_MAP_INTERNAL_TYPE 2058 memdescGetMapInternalType 2059 ( 2060 OBJGPU *pGpu, 2061 MEMORY_DESCRIPTOR *pMemDesc 2062 ) 2063 { 2064 if (RMCFG_FEATURE_PLATFORM_GSP) 2065 { 2066 return MEMDESC_MAP_INTERNAL_TYPE_GSP; 2067 } 2068 else if (pMemDesc->_addressSpace == ADDR_FBMEM && 2069 pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING)) 2070 { 2071 // Temporary hack to keep the same behavior on GV100F (dVOLTA & DFPGA) 2072 if (IsDFPGA(pGpu)) 2073 return MEMDESC_MAP_INTERNAL_TYPE_BAR2; 2074 2075 return MEMDESC_MAP_INTERNAL_TYPE_COHERENT_FBMEM; 2076 } 2077 else 2078 { 2079 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu); 2080 NvBool bUseDirectMap = NV_FALSE; 2081 NV_STATUS status; 2082 2083 status = kbusUseDirectSysmemMap_HAL(pGpu, pKernelBus, pMemDesc, &bUseDirectMap); 2084 NV_ASSERT_OR_RETURN(status == NV_OK, MEMDESC_MAP_INTERNAL_TYPE_BAR2); 2085 2086 return bUseDirectMap ? MEMDESC_MAP_INTERNAL_TYPE_SYSMEM_DIRECT : MEMDESC_MAP_INTERNAL_TYPE_BAR2; 2087 } 2088 2089 return MEMDESC_MAP_INTERNAL_TYPE_SYSMEM_DIRECT; 2090 } 2091 2092 void 2093 memdescFlushGpuCaches 2094 ( 2095 OBJGPU *pGpu, 2096 MEMORY_DESCRIPTOR *pMemDesc 2097 ) 2098 { 2099 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu); 2100 2101 if (memdescGetGpuCacheAttrib(pMemDesc) == NV_MEMORY_CACHED) 2102 { 2103 // 2104 // Only the non-coherent memory path is available, so writeback GPU L2 2105 // invalidate the GPU L2 2106 // 2107 kmemsysCacheOp_HAL(pGpu, pKernelMemorySystem, pMemDesc, FB_CACHE_MEM_UNDEFINED, FB_CACHE_EVICT); 2108 } 2109 } 2110 2111 void 2112 memdescFlushCpuCaches 2113 ( 2114 OBJGPU *pGpu, 2115 MEMORY_DESCRIPTOR *pMemDesc 2116 ) 2117 { 2118 // Flush WC to get the data written to this mapping out to memory 2119 osFlushCpuWriteCombineBuffer(); 2120 2121 KernelBif *pKernelBif = GPU_GET_KERNEL_BIF(pGpu); 2122 2123 // Special care is needed on SOC, where the GPU cannot snoop the CPU L2 2124 if ((pKernelBif != NULL) && 2125 !kbifIsSnoopDmaCapable(pGpu, pKernelBif) && 2126 (memdescGetCpuCacheAttrib(pMemDesc) == NV_MEMORY_CACHED)) 2127 { 2128 // Flush CPU L2 so that the GPU will see any changes the CPU made 2129 osFlushCpuCache(); 2130 } 2131 } 2132 2133 /* 2134 * @brief map memory descriptor for internal access 2135 * 2136 * flags - subset of TRANSFER_FLAGS_ 2137 */ 2138 void* 2139 memdescMapInternal 2140 ( 2141 OBJGPU *pGpu, 2142 MEMORY_DESCRIPTOR *pMemDesc, 2143 NvU32 flags 2144 ) 2145 { 2146 MEMDESC_MAP_INTERNAL_TYPE mapType; 2147 NV_STATUS status; 2148 2149 NV_ASSERT_OR_RETURN(pMemDesc != NULL, NULL); 2150 2151 if (pMemDesc->_addressSpace == ADDR_FBMEM) 2152 { 2153 pMemDesc = memdescGetMemDescFromGpu(pMemDesc, pGpu); 2154 } 2155 2156 mapType = memdescGetMapInternalType(pGpu, pMemDesc); 2157 2158 // We need to flush & invalidate GPU L2 cache only for directed BAR mappings. 2159 // Reflected BAR mappings will access memory via GPU, and hence go through GPU L2 cache. 2160 if (mapType == MEMDESC_MAP_INTERNAL_TYPE_SYSMEM_DIRECT) 2161 memdescFlushGpuCaches(pGpu, pMemDesc); 2162 2163 if (pMemDesc->_pInternalMapping != NULL) 2164 { 2165 NV_ASSERT(pMemDesc->_internalMappingRefCount); 2166 2167 // Existing BAR2 mapping may be invalid due to GPU reset 2168 if (mapType == MEMDESC_MAP_INTERNAL_TYPE_BAR2) 2169 { 2170 pMemDesc->_pInternalMapping = kbusValidateBar2ApertureMapping_HAL(pGpu, GPU_GET_KERNEL_BUS(pGpu), pMemDesc, 2171 pMemDesc->_pInternalMapping); 2172 NV_CHECK_OR_RETURN(LEVEL_ERROR, pMemDesc->_pInternalMapping != NULL, NULL); 2173 } 2174 2175 pMemDesc->_internalMappingRefCount++; 2176 return pMemDesc->_pInternalMapping; 2177 } 2178 2179 switch (mapType) 2180 { 2181 case MEMDESC_MAP_INTERNAL_TYPE_GSP: 2182 NV_CHECK_OR_RETURN(LEVEL_ERROR, pMemDesc->_pInternalMapping != NULL, NULL); 2183 break; 2184 case MEMDESC_MAP_INTERNAL_TYPE_SYSMEM_DIRECT: 2185 { 2186 status = memdescMapOld(pMemDesc, 0, pMemDesc->Size, NV_TRUE, NV_PROTECT_READ_WRITE, 2187 &pMemDesc->_pInternalMapping, &pMemDesc->_pInternalMappingPriv); 2188 NV_CHECK_OR_RETURN(LEVEL_ERROR, status == NV_OK, NULL); 2189 break; 2190 } 2191 case MEMDESC_MAP_INTERNAL_TYPE_COHERENT_FBMEM: 2192 { 2193 NV_ASSERT(pGpu->getProperty(pGpu, PDB_PROP_GPU_ATS_SUPPORTED)); 2194 pMemDesc->_pInternalMapping = kbusMapCoherentCpuMapping_HAL(pGpu, GPU_GET_KERNEL_BUS(pGpu), pMemDesc); 2195 NV_CHECK_OR_RETURN(LEVEL_ERROR, pMemDesc->_pInternalMapping != NULL, NULL); 2196 break; 2197 } 2198 case MEMDESC_MAP_INTERNAL_TYPE_BAR2: 2199 pMemDesc->_pInternalMapping = kbusMapBar2Aperture_HAL(pGpu, GPU_GET_KERNEL_BUS(pGpu), pMemDesc, flags); 2200 NV_CHECK_OR_RETURN(LEVEL_ERROR, pMemDesc->_pInternalMapping != NULL, NULL); 2201 break; 2202 2203 default: 2204 DBG_BREAKPOINT(); 2205 } 2206 2207 pMemDesc->_internalMappingRefCount = 1; 2208 return pMemDesc->_pInternalMapping; 2209 } 2210 2211 void memdescUnmapInternal 2212 ( 2213 OBJGPU *pGpu, 2214 MEMORY_DESCRIPTOR *pMemDesc, 2215 NvU32 flags 2216 ) 2217 { 2218 MEMDESC_MAP_INTERNAL_TYPE mapType; 2219 2220 NV_ASSERT_OR_RETURN_VOID(pMemDesc != NULL); 2221 NV_ASSERT_OR_RETURN_VOID(pMemDesc->_pInternalMapping != NULL && pMemDesc->_internalMappingRefCount != 0); 2222 2223 if (pMemDesc->_addressSpace == ADDR_FBMEM) 2224 { 2225 pMemDesc = memdescGetMemDescFromGpu(pMemDesc, pGpu); 2226 } 2227 2228 mapType = memdescGetMapInternalType(pGpu, pMemDesc); 2229 2230 if (mapType == MEMDESC_MAP_INTERNAL_TYPE_SYSMEM_DIRECT || mapType == MEMDESC_MAP_INTERNAL_TYPE_BAR2) 2231 { 2232 memdescFlushCpuCaches(pGpu, pMemDesc); 2233 } 2234 2235 if (--pMemDesc->_internalMappingRefCount == 0) 2236 { 2237 switch (mapType) 2238 { 2239 case MEMDESC_MAP_INTERNAL_TYPE_GSP: 2240 break; 2241 case MEMDESC_MAP_INTERNAL_TYPE_SYSMEM_DIRECT: 2242 memdescUnmapOld(pMemDesc, NV_TRUE, 0, 2243 pMemDesc->_pInternalMapping, pMemDesc->_pInternalMappingPriv); 2244 break; 2245 2246 case MEMDESC_MAP_INTERNAL_TYPE_COHERENT_FBMEM: 2247 { 2248 kbusUnmapCoherentCpuMapping_HAL(pGpu, GPU_GET_KERNEL_BUS(pGpu), pMemDesc); 2249 break; 2250 } 2251 case MEMDESC_MAP_INTERNAL_TYPE_BAR2: 2252 { 2253 NvU8 *p = (NvU8 *)pMemDesc->_pInternalMapping; 2254 2255 kbusUnmapBar2ApertureWithFlags_HAL(pGpu, GPU_GET_KERNEL_BUS(pGpu), pMemDesc, &p, flags); 2256 break; 2257 } 2258 2259 default: 2260 DBG_BREAKPOINT(); 2261 } 2262 2263 pMemDesc->_pInternalMapping = NULL; 2264 pMemDesc->_pInternalMappingPriv = NULL; 2265 pMemDesc->_internalMappingRefCount = 0; 2266 } 2267 2268 // Flush for direct mappings too to keep the behavior 2269 if (((flags & TRANSFER_FLAGS_DEFER_FLUSH) == 0) && 2270 (mapType == MEMDESC_MAP_INTERNAL_TYPE_SYSMEM_DIRECT || mapType == MEMDESC_MAP_INTERNAL_TYPE_BAR2)) 2271 { 2272 kbusFlush_HAL(pGpu, GPU_GET_KERNEL_BUS(pGpu), 2273 kbusGetFlushAperture(GPU_GET_KERNEL_BUS(pGpu), memdescGetAddressSpace(pMemDesc)) | BUS_FLUSH_USE_PCIE_READ); 2274 } 2275 } 2276 2277 /*! 2278 * Describe an existing region of memory in a memory descriptor 2279 * 2280 * Memory must be physically contiguous. 2281 * 2282 * The memory descriptor must be initialized with 2283 * memdescCreate*(), typically memdescCreateExisting() 2284 * prior to calling memdescDescribe. 2285 * 2286 * memdescDescribe() now only updates the fields needed in the call. 2287 * 2288 * @param[out] pMemDesc Memory descriptor to fill 2289 * @param[in] AddressSpace Address space of memory 2290 * @param[in] Base Physical address of region 2291 * @param[in] Size Size of region 2292 * 2293 * @returns None 2294 */ 2295 void 2296 memdescDescribe 2297 ( 2298 MEMORY_DESCRIPTOR *pMemDesc, 2299 NV_ADDRESS_SPACE AddressSpace, 2300 RmPhysAddr Base, 2301 NvU64 Size 2302 ) 2303 { 2304 // Some sanity checks to see if we went through MemCreate*() first 2305 NV_ASSERT((pMemDesc->RefCount == 1) && 2306 (memdescGetDestroyCallbackList(pMemDesc) == NULL) && 2307 (pMemDesc->PteAdjust == 0)); 2308 2309 NV_ASSERT(pMemDesc->_pIommuMappings == NULL); 2310 NV_ASSERT(pMemDesc->Allocated == 0); 2311 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 2312 2313 // 2314 // Check if the base address accounts for the DMA window start address 2315 // (always in the high, unaddressable bits of the address) and add it 2316 // if necessary. On most platforms, the DMA window start address will 2317 // simply be 0. 2318 // 2319 // This is most likely to happen in cases where the Base address is 2320 // read directly from a register or MMU entry, which does not already 2321 // account for the DMA window. 2322 // 2323 if (pMemDesc->pGpu == NULL) 2324 { 2325 NV_PRINTF(LEVEL_WARNING, 2326 "unable to check Base 0x%016llx for DMA window\n", Base); 2327 } 2328 else if (AddressSpace == ADDR_SYSMEM) 2329 { 2330 OBJGPU *pGpu = pMemDesc->pGpu; 2331 if (pGpu) 2332 { 2333 KernelBif *pKernelBif = GPU_GET_KERNEL_BIF(pGpu); 2334 NvU32 physAddrWidth = gpuGetPhysAddrWidth_HAL(pGpu, ADDR_SYSMEM); 2335 if ((Base & ~(NVBIT64(physAddrWidth) - 1)) == 0) 2336 { 2337 Base += pKernelBif->dmaWindowStartAddress; 2338 } 2339 } 2340 } 2341 2342 if (pMemDesc->Alignment != 0) 2343 { 2344 NV_ASSERT(NV_FLOOR_TO_QUANTA(Base, pMemDesc->Alignment) == Base); 2345 } 2346 2347 pMemDesc->Size = Size; 2348 pMemDesc->ActualSize = Size; 2349 pMemDesc->_flags |= MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS; 2350 pMemDesc->_addressSpace = AddressSpace; 2351 pMemDesc->_pteArray[0] = Base & ~RM_PAGE_MASK; 2352 pMemDesc->_subDeviceAllocCount = 1; 2353 pMemDesc->PteAdjust = NvU64_LO32(Base) & RM_PAGE_MASK; 2354 pMemDesc->PageCount = ((Size + pMemDesc->PteAdjust + RM_PAGE_SIZE - 1) >> RM_PAGE_SHIFT); 2355 pMemDesc->_pParentDescriptor = NULL; 2356 pMemDesc->childDescriptorCnt = 0; 2357 } 2358 2359 /*! 2360 * Static helper called from memdescFillPages. 2361 * When dynamic granularity memdescs are enabled. We only need to copy over the pages 2362 * without worrying about converting them to 4K. 2363 * 2364 * @param[in] pMemDesc Memory descriptor to fill 2365 * @param[in] pageIndex Index into memory descriptor to fill from 2366 * @param[in] pPages Array of physical addresses 2367 * @param[in] pageCount Number of entries in pPages 2368 * @param[in] pageSize Size of each page in pPages 2369 * 2370 * @returns None 2371 */ 2372 static void 2373 _memdescFillPagesAtNativeGranularity 2374 ( 2375 MEMORY_DESCRIPTOR *pMemDesc, 2376 NvU32 pageIndex, 2377 NvU64 *pPages, 2378 NvU32 pageCount, 2379 NvU64 pageSize 2380 ) 2381 { 2382 NV_STATUS status; 2383 2384 NV_ASSERT(pageIndex + pageCount < pMemDesc->PageCount); 2385 2386 status = memdescSetPageArrayGranularity(pMemDesc, pageSize); 2387 if (status != NV_OK) 2388 { 2389 return; 2390 } 2391 2392 for (NvU32 i = 0; i < pageCount; i++) 2393 { 2394 pMemDesc->_pteArray[pageIndex + i] = pPages[i]; 2395 } 2396 2397 pMemDesc->ActualSize = pageCount * pageSize; 2398 } 2399 2400 /*! 2401 * Fill the PTE array of a memory descriptor with an array of addresses 2402 * returned by pmaAllocatePages(). 2403 * 2404 * Memory must be physically discontiguous. For the contiguous case 2405 * memdescDescribe() is more apt. 2406 * 2407 * The memory descriptor must be initialized with memdescCreate*(), 2408 * typically memdescCreateExisting() prior to calling 2409 * memdescFillPages(). 2410 * 2411 * @param[in] pMemDesc Memory descriptor to fill 2412 * @param[in] pageIndex Index into memory descriptor to fill from 2413 * @param[in] pPages Array of physical addresses 2414 * @param[in] pageCount Number of entries in pPages 2415 * @param[in] pageSize Size of each page in pPages 2416 * 2417 * @returns None 2418 */ 2419 void 2420 memdescFillPages 2421 ( 2422 MEMORY_DESCRIPTOR *pMemDesc, 2423 NvU32 pageIndex, 2424 NvU64 *pPages, 2425 NvU32 pageCount, 2426 NvU64 pageSize 2427 ) 2428 { 2429 OBJGPU *pGpu = gpumgrGetSomeGpu(); 2430 NvU32 i, j, k; 2431 NvU32 numChunks4k = pageSize / RM_PAGE_SIZE; 2432 NvU32 offset4k = numChunks4k * pageIndex; 2433 NvU32 pageCount4k = numChunks4k * pageCount; 2434 NvU32 result4k, limit4k; 2435 NvU64 addr; 2436 2437 NV_ASSERT(pMemDesc != NULL); 2438 2439 if (GPU_GET_MEMORY_MANAGER(pGpu)->bEnableDynamicGranularityPageArrays) 2440 { 2441 _memdescFillPagesAtNativeGranularity(pMemDesc, pageIndex, pPages, pageCount, pageSize); 2442 return; 2443 } 2444 2445 NV_ASSERT(offset4k < pMemDesc->PageCount); 2446 NV_ASSERT(portSafeAddU32(offset4k, pageCount4k, &result4k)); 2447 2448 // 2449 // There is a possibility that the pMemDesc was created using 4K aligned 2450 // allocSize, but the actual memory allocator could align up the allocation 2451 // size based on its supported pageSize, (e.g. PMA supports 64K pages). In 2452 // that case, pageCount4k would be greater than pMemdesc->pageCount. We 2453 // limit pageCount4k to stay within pMemdesc->pageCount in that case. 2454 // 2455 if (result4k > pMemDesc->PageCount) 2456 pageCount4k = pMemDesc->PageCount - offset4k; 2457 2458 NV_ASSERT(pageSize > 0); 2459 NV_ASSERT(0 == (pageSize & (RM_PAGE_SIZE - 1))); 2460 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 2461 2462 // Fill _pteArray array using numChunks4k as a stride 2463 for (i = 0, j = offset4k; i < pageCount; i++, j += numChunks4k) 2464 { 2465 pMemDesc->_pteArray[j] = addr = pPages[i]; 2466 2467 // Fill _pteArray at 4K granularity 2468 limit4k = NV_MIN(j + numChunks4k, pageCount4k); 2469 2470 addr += RM_PAGE_SIZE; 2471 for (k = j + 1; k < limit4k; k++, addr += RM_PAGE_SIZE) 2472 pMemDesc->_pteArray[k] = addr; 2473 } 2474 } 2475 2476 /*! 2477 * @brief Acquire exclusive use for memdesc for RM. 2478 * 2479 * @param[inout] pMemDesc Memory descriptor 2480 * 2481 * @returns Boolean indicating whether we successfully acquired the memdesc for exclusive use 2482 */ 2483 NvBool 2484 memdescAcquireRmExclusiveUse 2485 ( 2486 MEMORY_DESCRIPTOR *pMemDesc 2487 ) 2488 { 2489 NV_CHECK_OR_RETURN(LEVEL_ERROR, pMemDesc->_pParentDescriptor == NULL && 2490 !pMemDesc->bRmExclusiveUse && 2491 pMemDesc->DupCount == 1, 2492 NV_FALSE); 2493 2494 pMemDesc->bRmExclusiveUse = NV_TRUE; 2495 return NV_TRUE; 2496 } 2497 2498 // 2499 // SubMemory per subdevice chart: (MD - Memory Descriptor, SD - subdevice) 2500 // 2501 // If we try to create submemory of descriptor which has subdevices: 2502 // 2503 // [Top level MD] 2504 // ^ | 2505 // | +--------> [ Subdevice 0 MD ] --------> [Subdevice 1 MD] 2506 // | ^ ^ 2507 // | | | 2508 // [SubMemory top level MD] | | 2509 // | | | 2510 // +--------> [Subdevice 0 SubMemory MD] --------> [Subdevice 1 SubMemory MD] 2511 // 2512 // Top Level MD : parent of SubMemoryTopLevelMD; has subdescriptors 2513 // for two subdevices 2514 // SubMemory top level MD : has pointer to parent memory descriptor; has two 2515 // subdevice MDs 2516 // Subdevice 0 MD : subdevice MD of topLevelMD and parent of SD0 2517 // submemory descriptor; has pointer to next in the 2518 // list of subdevice MDs 2519 // Subdevice 0 SubMemory MD : submemory of subdevice 0 MD; has pointer to 2520 // parent, subdevice 0 MD and to next in list of 2521 // submemory subdevice memory descriptors 2522 // 2523 2524 2525 2526 /*! 2527 * @brief Create a new memory descriptor that is a subset of pMemDesc. If 2528 * pMemDesc has subdevice memory descriptors subMemory will be created for all 2529 * subdevices and new memory descriptor will be top level for them (ASCII art) 2530 * 2531 * @param[out] ppMemDescNew New memory descriptor 2532 * @param[in] pMemDesc Original memory descriptor 2533 * @param[in] pGpu The GPU that this memory will be mapped to 2534 * @param[in] Offset Sub memory descriptor starts at pMemdesc+Offset 2535 * @param[in] Size For Size bytes 2536 * 2537 * @returns None 2538 */ 2539 NV_STATUS 2540 memdescCreateSubMem 2541 ( 2542 MEMORY_DESCRIPTOR **ppMemDescNew, 2543 MEMORY_DESCRIPTOR *pMemDesc, 2544 OBJGPU *pGpu, 2545 NvU64 Offset, 2546 NvU64 Size 2547 ) 2548 { 2549 NV_STATUS status; 2550 MEMORY_DESCRIPTOR *pMemDescNew; 2551 NvU32 subDevInst; 2552 NvU64 tmpSize = Size; 2553 MEMORY_DESCRIPTOR *pLast; 2554 MEMORY_DESCRIPTOR *pNew; 2555 OBJGPU *pGpuChild; 2556 2557 // Default to the original memdesc's GPU if none is specified 2558 if (pGpu == NULL) 2559 { 2560 pGpu = pMemDesc->pGpu; 2561 } 2562 2563 // Allocation size should be adjusted for the memory descriptor _pageSize. 2564 // Also note that the first 4k page may not be at _pageSize boundary so at 2565 // the time of the mapping, we maybe overmapping at the beginning or end of 2566 // the descriptor. To fix it in the right way, memory descriptor needs to 2567 // be further cleaned. Do not round to page size if client specifies so. 2568 if (!(pMemDesc->_flags & MEMDESC_FLAGS_PAGE_SIZE_ALIGN_IGNORE) && 2569 pMemDesc->_pageSize != 0) 2570 { 2571 PMEMORY_DESCRIPTOR pTempMemDesc = pMemDesc; 2572 NvU64 pageOffset; 2573 2574 if (memdescHasSubDeviceMemDescs(pMemDesc)) 2575 { 2576 NV_ASSERT(pGpu); 2577 pTempMemDesc = memdescGetMemDescFromGpu(pMemDesc, pGpu); 2578 } 2579 2580 pageOffset = memdescGetPhysAddr(pTempMemDesc, AT_CPU, Offset) & 2581 (pTempMemDesc->_pageSize - 1); 2582 2583 // Check for integer overflow 2584 if (!portSafeAddU64(pageOffset, Size, &tmpSize)) 2585 { 2586 return NV_ERR_INVALID_ARGUMENT; 2587 } 2588 2589 tmpSize = RM_ALIGN_UP(pageOffset + Size, pTempMemDesc->_pageSize); 2590 2591 // Check for integer overflow 2592 if (tmpSize < pageOffset + Size) 2593 { 2594 return NV_ERR_INVALID_ARGUMENT; 2595 } 2596 } 2597 2598 // Allocate the new MEMORY_DESCRIPTOR 2599 status = memdescCreate(&pMemDescNew, pGpu, tmpSize, 0, 2600 !!(pMemDesc->_flags & MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS), 2601 pMemDesc->_addressSpace, 2602 pMemDesc->_cpuCacheAttrib, 2603 ((pMemDesc->_flags & ~MEMDESC_FLAGS_PRE_ALLOCATED) | MEMDESC_FLAGS_SKIP_RESOURCE_COMPUTE)); 2604 2605 if (status != NV_OK) 2606 { 2607 return status; 2608 } 2609 2610 // Fill in various fields as best we can; XXX this can get sort of sketchy 2611 // in places, which should be all the more motivation to rip some of these 2612 // fields out of the MEMORY_DESCRIPTOR. 2613 if (pMemDesc->_flags & MEMDESC_FLAGS_KERNEL_MODE) 2614 pMemDescNew->_flags |= MEMDESC_FLAGS_KERNEL_MODE; 2615 else 2616 pMemDescNew->_flags &= ~MEMDESC_FLAGS_KERNEL_MODE; 2617 2618 pMemDescNew->Size = Size; 2619 pMemDescNew->_pteKind = pMemDesc->_pteKind; 2620 pMemDescNew->_hwResId = pMemDesc->_hwResId; 2621 if (pMemDesc->_flags & MEMDESC_FLAGS_ENCRYPTED) 2622 pMemDescNew->_flags |= MEMDESC_FLAGS_ENCRYPTED; 2623 else 2624 pMemDescNew->_flags &= ~MEMDESC_FLAGS_ENCRYPTED; 2625 pMemDescNew->_pageSize = pMemDesc->_pageSize; 2626 pMemDescNew->_gpuCacheAttrib = pMemDesc->_gpuCacheAttrib; 2627 pMemDescNew->_gpuP2PCacheAttrib = pMemDesc->_gpuP2PCacheAttrib; 2628 pMemDescNew->gfid = pMemDesc->gfid; 2629 pMemDescNew->bUsingSuballocator = pMemDesc->bUsingSuballocator; 2630 pMemDescNew->_pParentDescriptor = pMemDesc; 2631 pMemDesc->childDescriptorCnt++; 2632 pMemDescNew->bRmExclusiveUse = pMemDesc->bRmExclusiveUse; 2633 2634 pMemDescNew->subMemOffset = Offset; 2635 2636 // increase refCount of parent descriptor 2637 memdescAddRef(pMemDesc); 2638 2639 // Fill in the PteArray and PteAdjust 2640 if ((pMemDesc->_flags & MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS) || 2641 (pMemDesc->PageCount == 1)) 2642 { 2643 // Compute the base address, then fill it in 2644 RmPhysAddr Base = pMemDesc->_pteArray[0] + pMemDesc->PteAdjust + Offset; 2645 pMemDescNew->_pteArray[0] = Base & ~RM_PAGE_MASK; 2646 pMemDescNew->PteAdjust = NvU64_LO32(Base) & RM_PAGE_MASK; 2647 } 2648 else 2649 { 2650 // More complicated... 2651 RmPhysAddr Adjust; 2652 NvU32 PageIndex, i; 2653 2654 // We start this many bytes into the memory alloc 2655 Adjust = pMemDesc->PteAdjust + Offset; 2656 2657 // Break it down into pages (PageIndex) and bytes (PteAdjust) 2658 PageIndex = (NvU32)(Adjust >> RM_PAGE_SHIFT); 2659 pMemDescNew->PteAdjust = NvU64_LO32(Adjust) & RM_PAGE_MASK; 2660 2661 // Fill in the PTEs; remember to copy the extra PTE, in case we need it 2662 if (pMemDesc->PageCount) 2663 { 2664 for (i = 0; i < pMemDescNew->PageCount+1; i++) 2665 { 2666 NvU32 j = i + PageIndex; 2667 if (j < pMemDesc->PageCount) 2668 { 2669 pMemDescNew->_pteArray[i] = pMemDesc->_pteArray[j]; 2670 } 2671 else 2672 { 2673 // 2674 // This case can happen with page size greater than 4KB. 2675 // Since pages are always tracked at 4KB granularity the 2676 // subset description may overflow the parent memdesc. 2677 // 2678 // In this case the best we can do is describe the contiguous 2679 // memory after the last 4KB page in the sub-memdesc. 2680 // 2681 // TODO: Tracking memdesc pages at native page size would 2682 // remove the need for several hacks, including this one. 2683 // 2684 NV_ASSERT(i > 0); 2685 pMemDescNew->_pteArray[i] = pMemDescNew->_pteArray[i - 1] + pMemDescNew->pageArrayGranularity; 2686 } 2687 } 2688 } 2689 } 2690 2691 if ((pMemDesc->_addressSpace == ADDR_SYSMEM) && 2692 !memdescGetFlag(memdescGetMemDescFromGpu(pMemDesc, pGpu), MEMDESC_FLAGS_CPU_ONLY) && 2693 !memdescGetFlag(memdescGetMemDescFromGpu(pMemDesc, pGpu), MEMDESC_FLAGS_MAP_SYSCOH_OVER_BAR1) && 2694 !memdescGetFlag(memdescGetMemDescFromGpu(pMemDesc, pGpu), MEMDESC_FLAGS_SKIP_IOMMU_MAPPING)) 2695 { 2696 // 2697 // For different IOVA spaces, the IOMMU mapping will often not be a 2698 // subrange of the original mapping. 2699 // 2700 // Request the submapping to be associated with the submemdesc. 2701 // 2702 // TODO: merge the new IOMMU paths with the SMMU path above (see bug 2703 // 1625121). 2704 // 2705 status = memdescMapIommu(pMemDescNew, pGpu->busInfo.iovaspaceId); 2706 if (status != NV_OK) 2707 { 2708 memdescDestroy(pMemDescNew); 2709 return status; 2710 } 2711 } 2712 2713 // Support for SLI submemory per-subdevice allocations (refer to chart) 2714 if (memdescHasSubDeviceMemDescs(pMemDesc)) 2715 { 2716 NvBool bBcState = gpumgrGetBcEnabledStatus(pGpu); 2717 2718 if (gpumgrGetBcEnabledStatus(pGpu) && (pMemDesc->_addressSpace == ADDR_FBMEM)) 2719 { 2720 NV_ASSERT(!!(pMemDesc->_flags & MEMDESC_FLAGS_ALLOC_PER_SUBDEVICE)); 2721 gpumgrSetBcEnabledStatus(pGpu, NV_FALSE); 2722 } 2723 pLast = pMemDescNew; 2724 2725 pMemDescNew->_subDeviceAllocCount = pMemDesc->_subDeviceAllocCount; 2726 2727 for (subDevInst = 0; subDevInst < pMemDesc->_subDeviceAllocCount; subDevInst++) 2728 { 2729 pGpuChild = gpumgrGetGpuFromSubDeviceInst(gpuGetDeviceInstance(pGpu), subDevInst); 2730 status = memdescCreateSubMem(&pNew, memdescGetMemDescFromGpu(pMemDesc, pGpuChild), pGpuChild, Offset, Size); 2731 2732 if (status != NV_OK) 2733 { 2734 while (NULL != pMemDescNew) 2735 { 2736 pNew = pMemDescNew; 2737 pMemDescNew = pMemDescNew->_pNext; 2738 memdescDestroy(pNew); 2739 } 2740 return status; 2741 } 2742 2743 pLast->_pNext = pNew; 2744 pLast = pNew; 2745 } 2746 2747 gpumgrSetBcEnabledStatus(pGpu, bBcState); 2748 } 2749 2750 *ppMemDescNew = pMemDescNew; 2751 2752 return NV_OK; 2753 } 2754 2755 /*! 2756 * Given a memdesc, this checks if the allocated memory falls under subheap or in GPA address space 2757 */ 2758 static NvBool 2759 _memIsSriovMappingsEnabled 2760 ( 2761 PMEMORY_DESCRIPTOR pMemDesc 2762 ) 2763 { 2764 return gpuIsSriovEnabled(pMemDesc->pGpu) && 2765 (((pMemDesc->_flags & MEMDESC_FLAGS_OWNED_BY_CURRENT_DEVICE) && pMemDesc->bUsingSuballocator) || 2766 (pMemDesc->_flags & MEMDESC_FLAGS_GUEST_ALLOCATED)); 2767 } 2768 2769 /*! 2770 * Fills pGpaEntries with numEntries GPAs from pMemDesc->_pteArray starting at 2771 * the given starting index. For physically contiguous memdescs, fills with 2772 * RM_PAGE_SIZE strides. 2773 */ 2774 static void 2775 _memdescFillGpaEntriesForSpaTranslation 2776 ( 2777 PMEMORY_DESCRIPTOR pMemDesc, 2778 RmPhysAddr *pGpaEntries, 2779 NvU32 start, 2780 NvU32 numEntries 2781 ) 2782 { 2783 if (pMemDesc->_flags & MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS) 2784 { 2785 NvU32 i; 2786 2787 for (i = 0; i < numEntries; i++) 2788 { 2789 pGpaEntries[i] = pMemDesc->_pteArray[0] + (((RmPhysAddr) (start + i)) * pMemDesc->pageArrayGranularity); 2790 } 2791 } 2792 else 2793 { 2794 portMemCopy(&pGpaEntries[0], numEntries * sizeof(pGpaEntries[0]), 2795 &pMemDesc->_pteArray[start], numEntries * sizeof(pGpaEntries[0])); 2796 } 2797 } 2798 2799 /*! 2800 * This function translates GPA -> SPA for a given memdesc and updates pPteSpaMappings with list of SPA addresses. 2801 * If memdesc is contiguous and if the translated SPA count > 1, this function fails for now. 2802 */ 2803 NV_STATUS 2804 _memdescUpdateSpaArray 2805 ( 2806 PMEMORY_DESCRIPTOR pMemDesc 2807 ) 2808 { 2809 NV_STATUS status = NV_OK; 2810 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pMemDesc->pGpu); 2811 NvU32 allocCnt; 2812 NvU32 i; 2813 NV2080_CTRL_INTERNAL_VMMU_GET_SPA_FOR_GPA_ENTRIES_PARAMS *pParams = NULL; 2814 2815 if ((pMemDesc->pPteSpaMappings) || (!pMemDesc->PageCount)) 2816 { 2817 status = NV_OK; 2818 goto _memUpdateSpArray_exit; 2819 } 2820 2821 allocCnt = memdescGetPteArraySize(pMemDesc, AT_PA); 2822 2823 // Allocate the array to hold pages up to PageCount 2824 pMemDesc->pPteSpaMappings = portMemAllocNonPaged(sizeof(RmPhysAddr) * allocCnt); 2825 if (pMemDesc->pPteSpaMappings == NULL) 2826 { 2827 status = NV_ERR_NO_MEMORY; 2828 goto _memUpdateSpArray_exit; 2829 } 2830 2831 pParams = portMemAllocStackOrHeap(sizeof(*pParams)); 2832 if (pParams == NULL) 2833 { 2834 status = NV_ERR_NO_MEMORY; 2835 goto _memUpdateSpArray_exit; 2836 } 2837 portMemSet(pParams, 0, sizeof(*pParams)); 2838 2839 pParams->gfid = pMemDesc->gfid; 2840 2841 for (i = 0; i < allocCnt; i += NV2080_CTRL_INTERNAL_VMMU_MAX_SPA_FOR_GPA_ENTRIES) 2842 { 2843 NvU32 numEntries = NV_MIN(allocCnt - i, NV2080_CTRL_INTERNAL_VMMU_MAX_SPA_FOR_GPA_ENTRIES); 2844 pParams->numEntries = numEntries; 2845 2846 _memdescFillGpaEntriesForSpaTranslation(pMemDesc, &pParams->gpaEntries[0], 2847 i, numEntries); 2848 2849 status = pRmApi->Control(pRmApi, 2850 pMemDesc->pGpu->hInternalClient, 2851 pMemDesc->pGpu->hInternalSubdevice, 2852 NV2080_CTRL_CMD_INTERNAL_VMMU_GET_SPA_FOR_GPA_ENTRIES, 2853 pParams, 2854 sizeof(*pParams)); 2855 if (status != NV_OK) 2856 { 2857 NV_PRINTF(LEVEL_ERROR, "Getting SPA for GPA failed: GFID=%u, GPA=0x%llx\n", 2858 pMemDesc->gfid, pMemDesc->_pteArray[i]); 2859 goto _memUpdateSpArray_exit; 2860 } 2861 2862 portMemCopy(&pMemDesc->pPteSpaMappings[i], numEntries * sizeof(pParams->spaEntries[0]), 2863 &pParams->spaEntries[0], numEntries * sizeof(pParams->spaEntries[0])); 2864 } 2865 2866 _memUpdateSpArray_exit: 2867 if (status != NV_OK) 2868 { 2869 portMemFree(pMemDesc->pPteSpaMappings); 2870 pMemDesc->pPteSpaMappings = NULL; 2871 } 2872 portMemFreeStackOrHeap(pParams); 2873 2874 return status; 2875 } 2876 2877 /*! 2878 * @brief Return the physical addresses of pMemdesc 2879 * 2880 * @param[in] pMemDesc Memory descriptor used 2881 * @param[in] pGpu GPU to return the addresses for 2882 * @param[in] addressTranslation Address translation identifier 2883 * @param[in] offset Offset into memory descriptor 2884 * @param[in] stride How much to advance the offset for each 2885 * consecutive address 2886 * @param[in] count How many addresses to retrieve 2887 * @param[out] pAddresses Returned array of addresses 2888 * 2889 */ 2890 void memdescGetPhysAddrsForGpu(MEMORY_DESCRIPTOR *pMemDesc, 2891 OBJGPU *pGpu, 2892 ADDRESS_TRANSLATION addressTranslation, 2893 NvU64 offset, 2894 NvU64 stride, 2895 NvU64 count, 2896 RmPhysAddr *pAddresses) 2897 { 2898 // 2899 // Get the PTE array that we should use for phys addr lookups based on the 2900 // MMU context. (see bug 1625121) 2901 // 2902 NvU64 i; 2903 NvU64 pageIndex; 2904 RmPhysAddr *pteArray = memdescGetPteArrayForGpu(pMemDesc, pGpu, addressTranslation); 2905 const NvBool contiguous = (memdescGetPteArraySize(pMemDesc, addressTranslation) == 1); 2906 2907 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 2908 offset += pMemDesc->PteAdjust; 2909 2910 for (i = 0; i < count; ++i) 2911 { 2912 if (contiguous) 2913 { 2914 pAddresses[i] = pteArray[0] + offset; 2915 } 2916 else 2917 { 2918 pageIndex = offset >> RM_PAGE_SHIFT; 2919 NV_CHECK_OR_RETURN_VOID(LEVEL_ERROR, pageIndex < pMemDesc->PageCount); 2920 pAddresses[i] = pteArray[pageIndex] + (offset & RM_PAGE_MASK); 2921 } 2922 2923 offset += stride; 2924 } 2925 } 2926 2927 2928 /*! 2929 * @brief Return the physical addresses of pMemdesc 2930 * 2931 * @param[in] pMemDesc Memory descriptor used 2932 * @param[in] addressTranslation Address translation identifier 2933 * @param[in] offset Offset into memory descriptor 2934 * @param[in] stride How much to advance the offset for each 2935 * consecutive address 2936 * @param[in] count How many addresses to retrieve 2937 * @param[out] pAddresses Returned array of addresses 2938 * 2939 */ 2940 void memdescGetPhysAddrs(MEMORY_DESCRIPTOR *pMemDesc, 2941 ADDRESS_TRANSLATION addressTranslation, 2942 NvU64 offset, 2943 NvU64 stride, 2944 NvU64 count, 2945 RmPhysAddr *pAddresses) 2946 { 2947 memdescGetPhysAddrsForGpu(pMemDesc, pMemDesc->pGpu, addressTranslation, offset, stride, count, pAddresses); 2948 } 2949 2950 /*! 2951 * @brief Return the physical address of pMemdesc+Offset 2952 * 2953 * "long description" 2954 * 2955 * @param[in] pMemDesc Memory descriptor used 2956 * @param[in] addressTranslation Address translation identifier 2957 * @param[in] offset Offset into memory descriptor 2958 * 2959 * @returns A physical address 2960 */ 2961 RmPhysAddr 2962 memdescGetPhysAddr 2963 ( 2964 MEMORY_DESCRIPTOR *pMemDesc, 2965 ADDRESS_TRANSLATION addressTranslation, 2966 NvU64 offset 2967 ) 2968 { 2969 RmPhysAddr addr; 2970 memdescGetPhysAddrs(pMemDesc, addressTranslation, offset, 0, 1, &addr); 2971 return addr; 2972 } 2973 2974 /*! 2975 * @brief Return physical address for page specified by PteIndex 2976 * 2977 * @param[in] pMemDesc Memory descriptor to use 2978 * @param[in] addressTranslation Address translation identifier 2979 * @param[in] PteIndex Look up this PteIndex 2980 * 2981 * @returns A physical address 2982 */ 2983 RmPhysAddr 2984 memdescGetPte 2985 ( 2986 PMEMORY_DESCRIPTOR pMemDesc, 2987 ADDRESS_TRANSLATION addressTranslation, 2988 NvU32 PteIndex 2989 ) 2990 { 2991 // 2992 // Get the PTE array that we should use for phys addr lookups based on the 2993 // MMU context. (see bug 1625121) 2994 // 2995 RmPhysAddr *pteArray = memdescGetPteArray(pMemDesc, addressTranslation); 2996 RmPhysAddr PhysAddr; 2997 2998 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 2999 3000 if (pMemDesc->_flags & MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS) 3001 { 3002 PhysAddr = pteArray[0] + (PteIndex << RM_PAGE_SHIFT); 3003 } 3004 else 3005 { 3006 PhysAddr = pteArray[PteIndex]; 3007 } 3008 3009 return PhysAddr; 3010 } 3011 3012 /*! 3013 * @brief Return physical address for page specified by PteIndex 3014 * 3015 * @param[in] pMemDesc Memory descriptor to use 3016 * @param[in] addressTranslation Address translation identifier 3017 * @param[in] PteIndex Look up this PteIndex 3018 * @param[in] PhysAddr PTE address 3019 * 3020 * @returns None 3021 */ 3022 void 3023 memdescSetPte 3024 ( 3025 PMEMORY_DESCRIPTOR pMemDesc, 3026 ADDRESS_TRANSLATION addressTranslation, 3027 NvU32 PteIndex, 3028 RmPhysAddr PhysAddr 3029 ) 3030 { 3031 // 3032 // Get the PTE array that we should use for phys addr lookups based on the 3033 // MMU context. (see bug 1625121) 3034 // 3035 RmPhysAddr *pteArray = memdescGetPteArray(pMemDesc, addressTranslation); 3036 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 3037 3038 if (pMemDesc->_flags & MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS) 3039 { 3040 NV_ASSERT_OR_RETURN_VOID(PteIndex == 0); 3041 } 3042 3043 pteArray[PteIndex] = PhysAddr; 3044 3045 // Free pteArraySpa 3046 portMemFree(pMemDesc->pPteSpaMappings); 3047 pMemDesc->pPteSpaMappings = NULL; 3048 } 3049 3050 /*! 3051 * @brief Return page array size based on the MMU context 3052 * For SRIOV, the host context (AT_PA) will 3053 * have discontiguous view of the GPA in SPA space 3054 * This is treated similar to discontiguous memdescs 3055 * 3056 * @param[in] pMemDesc Memory descriptor to use 3057 * @param[in] addressTranslation Address translation identifier 3058 * 3059 * @returns PageArray 3060 */ 3061 NvU32 memdescGetPteArraySize(MEMORY_DESCRIPTOR *pMemDesc, ADDRESS_TRANSLATION addressTranslation) 3062 { 3063 // Contiguous allocations in SPA domain can be non-contiguous at vmmusegment granularity. 3064 // Hence treat SPA domain allocations as non-contiguous by default. 3065 if (!(pMemDesc->_flags & MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS) || 3066 ((addressTranslation == AT_PA) && (pMemDesc->_addressSpace == ADDR_FBMEM) && _memIsSriovMappingsEnabled(pMemDesc))) 3067 { 3068 return NvU64_LO32(pMemDesc->PageCount); 3069 } 3070 return 1; 3071 } 3072 3073 /*! 3074 * @brief Return page array 3075 * 3076 * @param[in] pMemDesc Memory descriptor to use 3077 * @param[in] pGpu GPU to get the PTE array for. 3078 * @param[in] addressTranslation Address translation identifier 3079 * 3080 * @returns PageArray 3081 */ 3082 RmPhysAddr * 3083 memdescGetPteArrayForGpu 3084 ( 3085 PMEMORY_DESCRIPTOR pMemDesc, 3086 OBJGPU *pGpu, 3087 ADDRESS_TRANSLATION addressTranslation 3088 ) 3089 { 3090 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 3091 3092 switch (AT_VALUE(addressTranslation)) 3093 { 3094 // 3095 // In SRIOV systems, an access from guest has to go through the following translations 3096 // GVA -> GPA -> SPA 3097 // 3098 // Given HOST manages channel/memory management for guest, there are certain code paths that 3099 // expects VA -> GPA translations and some may need GPA -> SPA translations. We use addressTranslation 3100 // to differentiate between these cases. 3101 // Since GPA -> SPA is very similar to IOMMU xlation and since existing AT_PA is used only in 3102 // SYSMEM allocations, we decided to reuse AT_PA addressTranslation to fetch GPA -> SPA xlations. 3103 // In case of non-SRIOV systems, using AT_PA will fall back to AT_GPU or default context. 3104 // 3105 // pMemDesc -> _pteArray tracks GVA -> GPA translations 3106 // pMemDesc -> pPteSpaMappings tracks GPA -> SPA translations 3107 // 3108 3109 case AT_VALUE(AT_PA): 3110 { 3111 if (pGpu != NULL) 3112 { 3113 if (pMemDesc->_addressSpace == ADDR_FBMEM) 3114 { 3115 if (_memIsSriovMappingsEnabled(pMemDesc)) 3116 { 3117 if (!pMemDesc->pPteSpaMappings) 3118 _memdescUpdateSpaArray(pMemDesc); 3119 3120 return pMemDesc->pPteSpaMappings; 3121 } 3122 } 3123 } 3124 } 3125 case AT_VALUE(AT_GPU): 3126 { 3127 // Imported ADDR_FABRIC_V2 memdescs are device-less. 3128 if (pGpu != NULL) 3129 { 3130 PIOVAMAPPING pIovaMap = memdescGetIommuMap(pMemDesc, pGpu->busInfo.iovaspaceId); 3131 if (pIovaMap != NULL) 3132 { 3133 return pIovaMap->iovaArray; 3134 } 3135 } 3136 3137 // 3138 // If no IOMMU mapping exists in the default IOVASPACE, fall 3139 // through and use the physical memory descriptor instead. 3140 // 3141 } 3142 default: 3143 { 3144 return pMemDesc->_pteArray; 3145 } 3146 } 3147 } 3148 3149 3150 3151 /*! 3152 * @brief Convert aperture into a descriptive string. 3153 * 3154 * @param[in] addressSpace 3155 * 3156 * @returns String 3157 * 3158 * @todo "text" 3159 */ 3160 const char * 3161 memdescGetApertureString 3162 ( 3163 NV_ADDRESS_SPACE addressSpace 3164 ) 3165 { 3166 static NV_PRINTF_STRING_SECTION const char ADDR_FBMEM_STR[] = "VIDEO MEMORY"; 3167 static NV_PRINTF_STRING_SECTION const char ADDR_SYSMEM_STR[] = "SYSTEM MEMORY"; 3168 3169 if (addressSpace == ADDR_FBMEM) 3170 { 3171 return ADDR_FBMEM_STR; 3172 } 3173 3174 if (addressSpace == ADDR_SYSMEM) 3175 { 3176 return ADDR_SYSMEM_STR; 3177 } 3178 3179 return NULL; 3180 } 3181 3182 /*! 3183 * @brief Compare two memory descriptors to see if the memory described the same 3184 * 3185 * @param[in] pMemDescOne 3186 * @param[in] pMemDescTwo 3187 * 3188 * @returns NV_TRUE if the memory descriptors refer to the same memory 3189 */ 3190 NvBool 3191 memdescDescIsEqual 3192 ( 3193 MEMORY_DESCRIPTOR *pMemDescOne, 3194 MEMORY_DESCRIPTOR *pMemDescTwo 3195 ) 3196 { 3197 if ((pMemDescOne == NULL) || (pMemDescTwo == NULL)) 3198 return NV_FALSE; 3199 3200 if (pMemDescOne->_addressSpace != pMemDescTwo->_addressSpace) 3201 return NV_FALSE; 3202 3203 // All the physical memory views should match. 3204 if ((memdescGetPhysAddr(pMemDescOne, AT_CPU, 0) != memdescGetPhysAddr(pMemDescTwo, AT_CPU, 0)) || 3205 (memdescGetPhysAddr(pMemDescOne, AT_GPU, 0) != memdescGetPhysAddr(pMemDescTwo, AT_GPU, 0))) 3206 return NV_FALSE; 3207 3208 if (memdescGetCpuCacheAttrib(pMemDescOne) != memdescGetCpuCacheAttrib(pMemDescTwo)) 3209 return NV_FALSE; 3210 3211 if (pMemDescOne->Size != pMemDescTwo->Size) 3212 return NV_FALSE; 3213 3214 if (pMemDescOne->Alignment != pMemDescTwo->Alignment) 3215 return NV_FALSE; 3216 3217 if (pMemDescOne->_pageSize != pMemDescTwo->_pageSize) 3218 return NV_FALSE; 3219 3220 return NV_TRUE; 3221 } 3222 3223 /*! 3224 * @brief Add callback block to the destroy callback queue 3225 * 3226 * @param[in] pMemDesc Memory descriptor to update 3227 * @param[in] pCb Callee allocated block with callback func/arg 3228 * 3229 * @returns nothing 3230 */ 3231 void 3232 memdescAddDestroyCallback 3233 ( 3234 MEMORY_DESCRIPTOR *pMemDesc, 3235 MEM_DESC_DESTROY_CALLBACK *pCb 3236 ) 3237 { 3238 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 3239 pCb->pNext = memdescGetDestroyCallbackList(pMemDesc); 3240 memdescSetDestroyCallbackList(pMemDesc, pCb); 3241 } 3242 3243 /*! 3244 * @brief Remove callback block from the destroy callback queue 3245 * 3246 * @param[in] pMemDesc Memory descriptor to update 3247 * @param[in] pRemoveCb Callee allocated block with callback func/arg 3248 * 3249 * @returns nothing 3250 */ 3251 void 3252 memdescRemoveDestroyCallback 3253 ( 3254 MEMORY_DESCRIPTOR *pMemDesc, 3255 MEM_DESC_DESTROY_CALLBACK *pRemoveCb 3256 ) 3257 { 3258 MEM_DESC_DESTROY_CALLBACK *pCb = memdescGetDestroyCallbackList(pMemDesc); 3259 MEM_DESC_DESTROY_CALLBACK *pPrev = NULL; 3260 3261 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 3262 while (pCb) 3263 { 3264 if (pCb == pRemoveCb) 3265 { 3266 if (pPrev == NULL) 3267 { 3268 memdescSetDestroyCallbackList(pMemDesc, pCb->pNext); 3269 } 3270 else 3271 { 3272 pPrev->pNext = pCb->pNext; 3273 } 3274 break; 3275 } 3276 pPrev = pCb; 3277 pCb = pCb->pNext; 3278 } 3279 } 3280 3281 /*! 3282 * @brief Retrieves a subdevice's memory descriptor by subdevice instance 3283 * 3284 * Subdevice memory descriptors are memory descriptors that describe 3285 * per-subdevice memory buffers. This functionality is required by our current 3286 * SLI programming model as our memdescAlloc() calls are primarily broadcast 3287 * operations. A singular memdesc works for video memory as the 3288 * heaps are symmetric. However, we run into trouble when dealing with system 3289 * memory as both GPUs then share the same address space and symmetric 3290 * addressing is no longer possible. 3291 * 3292 * N.B. The rational for exposing this routine is that it keeps SLI-isms out of 3293 * most of the RM -- the alternative approach would've been to pass in the 3294 * subdevice or a pGpu for all memdesc methods which would require more code 3295 * changes solely for SLI. Long term hopefully we can transition to a unicast 3296 * allocation model (SLI loops above memdescAlloc()/memdescCreate()) and the 3297 * subdevice support in memdesc can (easily) be deleted. This approach also 3298 * provides a safety net against misuse, e.g., if we added pGpu to 3299 * memdescGetPhysAddr, current code which utilizes that routine outside an SLI loop 3300 * would execute cleanly even though it's incorrect. 3301 * 3302 * @param[in] pMemDesc Memory descriptor to query 3303 * @param[in] subDeviceInst SLI subdevice instance (subdevice - 1) 3304 * 3305 * @returns Memory descriptor if one exist for the subdevice. 3306 * NULL if none is found. 3307 */ 3308 MEMORY_DESCRIPTOR * 3309 memdescGetMemDescFromSubDeviceInst(MEMORY_DESCRIPTOR *pMemDesc, NvU32 subDeviceInst) 3310 { 3311 if (!memdescHasSubDeviceMemDescs(pMemDesc)) 3312 { 3313 return pMemDesc; 3314 } 3315 else 3316 { 3317 return memdescGetMemDescFromIndex(pMemDesc, subDeviceInst); 3318 } 3319 } 3320 3321 /*! 3322 * @brief Retrieves a subdevice's memory descriptor by GPU object 3323 * 3324 * See memdescGetMemDescFromSubDeviceInst for an explanation of subdevice memory 3325 * descriptors 3326 * 3327 * @param[in] pMemDesc Memory descriptor to query 3328 * @param[in] pGpu 3329 * 3330 * @returns Memory descriptor if one exist for the GPU. 3331 * NULL if none is found. 3332 */ 3333 MEMORY_DESCRIPTOR * 3334 memdescGetMemDescFromGpu(MEMORY_DESCRIPTOR *pMemDesc, OBJGPU *pGpu) 3335 { 3336 NvU32 subDeviceInst = gpumgrGetSubDeviceInstanceFromGpu(pGpu); 3337 3338 return memdescGetMemDescFromSubDeviceInst(pMemDesc, subDeviceInst); 3339 } 3340 3341 /*! 3342 * @brief Retrieves a subdevice's memory descriptor by memdesc index. 3343 * 3344 * See memdescGetMemDescFromSubDeviceInst for an explanation of subdevice memory 3345 * descriptors 3346 * 3347 * @param[in] pMemDesc Memory descriptor to query 3348 * @param[in] index Index into array of memdesc 3349 * 3350 * @returns Memory descriptor if one exist for the GPU. 3351 * NULL if none is found. 3352 */ 3353 MEMORY_DESCRIPTOR * 3354 memdescGetMemDescFromIndex(MEMORY_DESCRIPTOR *pMemDesc, NvU32 index) 3355 { 3356 if (!memdescHasSubDeviceMemDescs(pMemDesc)) 3357 { 3358 return pMemDesc; 3359 } 3360 else 3361 { 3362 MEMORY_DESCRIPTOR *pSubDevMemDesc = pMemDesc->_pNext; 3363 3364 NV_ASSERT(pSubDevMemDesc); 3365 3366 while (index--) 3367 { 3368 pSubDevMemDesc = pSubDevMemDesc->_pNext; 3369 3370 if (!pSubDevMemDesc) 3371 { 3372 NV_ASSERT(0); 3373 return NULL; 3374 } 3375 } 3376 3377 return pSubDevMemDesc; 3378 } 3379 } 3380 3381 /*! 3382 * @brief Set address for a fixed heap allocation. 3383 * 3384 * Offset must refer to the heap. A later memdescAlloc() will 3385 * force this offset. 3386 * 3387 * @param[in] pMemDesc Memory descriptor to update 3388 * @param[in] fbOffset Offset to refer to 3389 * 3390 * @returns nothing 3391 */ 3392 void 3393 memdescSetHeapOffset 3394 ( 3395 MEMORY_DESCRIPTOR *pMemDesc, 3396 RmPhysAddr fbOffset 3397 ) 3398 { 3399 NV_ASSERT(pMemDesc->_addressSpace == ADDR_FBMEM); 3400 NV_ASSERT(pMemDesc->Allocated == NV_FALSE); 3401 3402 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 3403 pMemDesc->_flags |= MEMDESC_FLAGS_FIXED_ADDRESS_ALLOCATE; 3404 pMemDesc->_pteArray[0] = fbOffset; 3405 } 3406 3407 /*! 3408 * @brief Set GPU cacheability 3409 * 3410 * A later memdescAlloc() will use this setting. 3411 * 3412 * @param[in] pMemDesc Memory descriptor to update 3413 * @param[in] cacheAttrib Set memory to GPU cacheable 3414 * 3415 * @returns nothing 3416 */ 3417 void memdescSetGpuCacheAttrib 3418 ( 3419 MEMORY_DESCRIPTOR *pMemDesc, 3420 NvU32 cacheAttrib 3421 ) 3422 { 3423 NV_ASSERT(pMemDesc->Allocated == NV_FALSE); 3424 3425 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 3426 pMemDesc->_gpuCacheAttrib = cacheAttrib; 3427 } 3428 3429 /*! 3430 * @brief Get GPU P2P cache attributes 3431 * 3432 * @param[in] pMemDesc Memory descriptor pointer 3433 * 3434 * @returns Current GPU P2P cache attributes 3435 */ 3436 NvU32 memdescGetGpuP2PCacheAttrib 3437 ( 3438 MEMORY_DESCRIPTOR *pMemDesc 3439 ) 3440 { 3441 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 3442 return pMemDesc->_gpuP2PCacheAttrib; 3443 } 3444 3445 /*! 3446 * @brief Set GPU P2P cacheability 3447 * 3448 * A later memdescAlloc() will use this setting. 3449 * 3450 * @param[in] pMemDesc Memory descriptor to update 3451 * @param[in] cacheAttrib Set memory to GPU P2P cacheable 3452 * 3453 * @returns nothing 3454 */ 3455 void memdescSetGpuP2PCacheAttrib 3456 ( 3457 MEMORY_DESCRIPTOR *pMemDesc, 3458 NvU32 cacheAttrib 3459 ) 3460 { 3461 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 3462 pMemDesc->_gpuP2PCacheAttrib = cacheAttrib; 3463 } 3464 3465 /*! 3466 * @brief Set CPU cacheability 3467 * 3468 * A later memdescAlloc() will use this setting. 3469 * 3470 * @param[in] pMemDesc Memory descriptor to update 3471 * @param[in] cacheAttrib Set memory to CPU cacheable 3472 * 3473 * @returns nothing 3474 */ 3475 void memdescSetCpuCacheAttrib 3476 ( 3477 MEMORY_DESCRIPTOR *pMemDesc, 3478 NvU32 cpuCacheAttrib 3479 ) 3480 { 3481 // 3482 // When running 64-bit MODS on ARM v8, we need to force all CPU mappings as WC. 3483 // This seems to be an issue with glibc. See bug 1556221. 3484 // 3485 // Ideally, this should have been set based on a Core Logic (CL) property. 3486 // But chipset initialization will only happen during bifStateInit(). 3487 // RM can makes sysmem CPU mappings before bifStateInit(). 3488 // 3489 if (RMCFG_FEATURE_PLATFORM_MODS && NVCPU_IS_AARCH64) 3490 { 3491 if (cpuCacheAttrib == NV_MEMORY_UNCACHED) 3492 { 3493 cpuCacheAttrib = NV_MEMORY_WRITECOMBINED; 3494 } 3495 } 3496 3497 pMemDesc->_cpuCacheAttrib = cpuCacheAttrib; 3498 } 3499 3500 /*! 3501 * @brief Print contents of a MEMORY_DESCRIPTOR in a human readable format. 3502 * 3503 * @param[in] pMemDesc Memory Descriptor to print 3504 * @param[in] bPrintIndividualPages Individual pages will also be printed 3505 * iff they are discontiguous 3506 * @param[in] pPrefixMessage Message that will be printed before the contents 3507 * of the Memory Descriptor are printed. 3508 * 3509 * @returns nothing 3510 */ 3511 void memdescPrintMemdesc 3512 ( 3513 MEMORY_DESCRIPTOR *pMemDesc, 3514 NvBool bPrintIndividualPages, 3515 const char *pPrefixMessage 3516 ) 3517 { 3518 #if 0 3519 NvU32 i; 3520 3521 if ((DBG_RMMSG_CHECK(DBG_LEVEL_INFO) == 0) || (pPrefixMessage == NULL) || (pMemDesc == NULL)) 3522 { 3523 return; 3524 } 3525 3526 NV_PRINTF(LEVEL_INFO, 3527 "%s Aperture %s starting at 0x%llx and of size 0x%llx\n", 3528 pPrefixMessage, 3529 memdescGetApertureString(pMemDesc->_addressSpace), 3530 memdescGetPhysAddr(pMemDesc, AT_CPU, 0), 3531 pMemDesc->Size); 3532 3533 if ((bPrintIndividualPages == NV_TRUE) && 3534 (pMemDesc->PageCount > 1) && 3535 (!(pMemDesc->_flags & MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS))) 3536 { 3537 for (i = 0; i < pMemDesc->PageCount; i++) 3538 { 3539 NV_PRINTF(LEVEL_INFO, 3540 " contains page starting @0x%llx\n", 3541 pMemDesc->_pteArray[i]); 3542 } 3543 } 3544 3545 // TODO: merge with SMMU path above (see bug 1625121). 3546 if (pMemDesc->_pIommuMappings != NULL) 3547 { 3548 if (!memdescIsSubMemoryMemDesc(pMemDesc)) 3549 { 3550 PIOVAMAPPING pIovaMap = pMemDesc->_pIommuMappings; 3551 while (pIovaMap != NULL) 3552 { 3553 NV_PRINTF(LEVEL_INFO, 3554 "Has additional IOMMU mapping for IOVA space 0x%x starting @ 0x%llx\n", 3555 pIovaMap->iovaspaceId, 3556 pIovaMap->iovaArray[0]); 3557 pIovaMap = pIovaMap->pNext; 3558 } 3559 } 3560 else 3561 { 3562 NV_PRINTF(LEVEL_INFO, 3563 "Has additional IOMMU mapping starting @ 0x%llx\n", 3564 memdescGetPhysAddr(pMemDesc, AT_PA, 0)); 3565 } 3566 } 3567 #endif // NV_PRINTF_ENABLED 3568 } 3569 3570 /*! 3571 * @brief Return page offset from a MEMORY_DESCRIPTOR for an arbitrary power of two page size 3572 * 3573 * PageAdjust covers the 4KB alignment, but must include bits from the address for big pages. 3574 * 3575 * @param[in] pMemDesc Memory Descriptor to print 3576 * @param[in] pageSize Page size (4096, 64K, 128K, etc) 3577 * 3578 * @returns nothing 3579 */ 3580 NvU64 memdescGetPageOffset 3581 ( 3582 MEMORY_DESCRIPTOR *pMemDesc, 3583 NvU64 pageSize 3584 ) 3585 { 3586 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 3587 return (pMemDesc->PteAdjust + pMemDesc->_pteArray[0]) & (pageSize-1); 3588 } 3589 3590 /*! 3591 * @brief Get PTE kind using GPU 3592 * 3593 * @param[in] pMemDesc Memory descriptor pointer 3594 * @param[in] pGpu GPU to be used get supported kind 3595 * @param[in] addressTranslation Address translation identifier 3596 * 3597 * @returns Current PTE kind value. 3598 */ 3599 NvU32 memdescGetPteKindForGpu 3600 ( 3601 PMEMORY_DESCRIPTOR pMemDesc, 3602 OBJGPU *pGpu 3603 ) 3604 { 3605 return memmgrGetHwPteKindFromSwPteKind_HAL(pGpu, GPU_GET_MEMORY_MANAGER(pGpu), pMemDesc->_pteKind); 3606 } 3607 3608 /*! 3609 * @brief Set PTE kind using GPU. 3610 * 3611 * @param[in] pMemDesc Memory descriptor pointer 3612 * @param[in] pGpu GPU to be used set supported kind 3613 * @param[in] addressTranslation Address translation identifier 3614 * @param[in] pteKind New PTE kind 3615 * 3616 * @returns nothing 3617 */ 3618 void memdescSetPteKindForGpu 3619 ( 3620 PMEMORY_DESCRIPTOR pMemDesc, 3621 OBJGPU *pGpu, 3622 NvU32 pteKind 3623 ) 3624 { 3625 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 3626 pMemDesc->_pteKind = memmgrGetSwPteKindFromHwPteKind_HAL(pGpu, GPU_GET_MEMORY_MANAGER(pGpu), pteKind); 3627 memdescSetFlag(pMemDesc, MEMDESC_FLAGS_SET_KIND, NV_TRUE); 3628 } 3629 3630 /*! 3631 * @brief Set PTE kind compressed value. 3632 * 3633 * @param[in] pMemDesc Memory descriptor pointer 3634 * @param[in] pteKind New PTE kind compressed value 3635 * 3636 * @returns nothing 3637 */ 3638 void memdescSetPteKindCompressed 3639 ( 3640 PMEMORY_DESCRIPTOR pMemDesc, 3641 NvU32 pteKindCmpr 3642 ) 3643 { 3644 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 3645 pMemDesc->_pteKindCompressed = pteKindCmpr; 3646 } 3647 3648 /*! 3649 * @brief Get PTE kind compressed value. 3650 * 3651 * @param[in] pMemDesc Memory descriptor pointer 3652 * @param[in] addressTranslation Address translation identifier 3653 * 3654 * @returns Current PTE kind compressed value. 3655 */ 3656 NvU32 memdescGetPteKindCompressed 3657 ( 3658 PMEMORY_DESCRIPTOR pMemDesc 3659 ) 3660 { 3661 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 3662 return pMemDesc->_pteKindCompressed; 3663 } 3664 3665 /*! 3666 * @brief Get kernel mapping 3667 * 3668 * @param[in] pMemDesc Memory descriptor pointer 3669 * 3670 * @returns Current kernel mapping 3671 */ 3672 NvP64 memdescGetKernelMapping 3673 ( 3674 MEMORY_DESCRIPTOR *pMemDesc 3675 ) 3676 { 3677 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 3678 return pMemDesc->_kernelMapping; 3679 } 3680 3681 /*! 3682 * @brief Set kernel mapping 3683 * 3684 * @param[in] pMemDesc Memory descriptor pointer 3685 * @param[in] kernelMapping New kernel mapping 3686 * 3687 * @returns nothing 3688 */ 3689 void memdescSetKernelMapping 3690 ( 3691 MEMORY_DESCRIPTOR *pMemDesc, 3692 NvP64 kernelMapping 3693 ) 3694 { 3695 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 3696 pMemDesc->_kernelMapping = kernelMapping; 3697 } 3698 3699 /*! 3700 * @brief Get privileged kernel mapping 3701 * 3702 * @param[in] pMemDesc Memory descriptor pointer 3703 * 3704 * @returns Current privileged kernel mapping 3705 */ 3706 NvP64 memdescGetKernelMappingPriv 3707 ( 3708 MEMORY_DESCRIPTOR *pMemDesc 3709 ) 3710 { 3711 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 3712 return pMemDesc->_kernelMappingPriv; 3713 } 3714 3715 /*! 3716 * @brief Set HW resource identifier (HwResId) 3717 * 3718 * @param[in] pMemDesc Memory descriptor pointer 3719 * @param[in] kernelMappingPriv New privileged kernel mapping 3720 * 3721 * @returns nothing 3722 */ 3723 void memdescSetKernelMappingPriv 3724 ( 3725 MEMORY_DESCRIPTOR *pMemDesc, 3726 NvP64 kernelMappingPriv 3727 ) 3728 { 3729 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 3730 pMemDesc->_kernelMappingPriv = kernelMappingPriv; 3731 } 3732 3733 3734 /*! 3735 * @brief Set standby buffer memory descriptor 3736 * 3737 * @param[in] pMemDesc Memory descriptor pointer 3738 * 3739 * @returns Pointer to standby buffer memory descriptor 3740 */ 3741 MEMORY_DESCRIPTOR *memdescGetStandbyBuffer 3742 ( 3743 MEMORY_DESCRIPTOR *pMemDesc 3744 ) 3745 { 3746 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 3747 return pMemDesc->_pStandbyBuffer; 3748 } 3749 3750 /*! 3751 * @brief Set standby buffer memory descriptor 3752 * 3753 * @param[in] pMemDesc Memory descriptor pointer 3754 * @param[in] pStandbyBuffer Standby buffer memory descriptor pointer 3755 * 3756 * @returns nothing 3757 */ 3758 void memdescSetStandbyBuffer 3759 ( 3760 MEMORY_DESCRIPTOR *pMemDesc, 3761 MEMORY_DESCRIPTOR *pStandbyBuffer 3762 ) 3763 { 3764 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 3765 pMemDesc->_pStandbyBuffer = pStandbyBuffer; 3766 } 3767 3768 /*! 3769 * @brief Set mem destroy callback list pointer 3770 * 3771 * @param[in] pMemDesc Memory descriptor pointer 3772 * @param[in] pMemDestroyCallbackList Memory destroy callback list pointer 3773 * 3774 * @returns nothing 3775 */ 3776 void memdescSetDestroyCallbackList 3777 ( 3778 MEMORY_DESCRIPTOR *pMemDesc, 3779 MEM_DESC_DESTROY_CALLBACK *pCb 3780 ) 3781 { 3782 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 3783 pMemDesc->_pMemDestroyCallbackList = pCb; 3784 } 3785 3786 /*! 3787 * @brief Get guest ID for specified memory descriptor 3788 * 3789 * @param[in] pMemDesc Memory descriptor pointer 3790 * 3791 * @returns Guest ID value 3792 */ 3793 NvU64 memdescGetGuestId 3794 ( 3795 MEMORY_DESCRIPTOR *pMemDesc 3796 ) 3797 { 3798 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 3799 return pMemDesc->_guestId; 3800 } 3801 3802 /*! 3803 * @brief Set guest ID for memory descriptor 3804 * 3805 * @param[in] pMemDesc Memory descriptor pointer 3806 * @param[in] guestId New guest ID 3807 * 3808 * @returns nothing 3809 */ 3810 void memdescSetGuestId 3811 ( 3812 MEMORY_DESCRIPTOR *pMemDesc, 3813 NvU64 guestId 3814 ) 3815 { 3816 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 3817 pMemDesc->_guestId = guestId; 3818 } 3819 3820 /*! 3821 * @brief Get value of specified flag 3822 * 3823 * @param[in] pMemDesc Memory descriptor pointer 3824 * @param[in] flag MEMDESC_FLAGS_* value 3825 * 3826 * @returns Boolean value of specified flag 3827 */ 3828 NvBool memdescGetFlag 3829 ( 3830 MEMORY_DESCRIPTOR *pMemDesc, 3831 NvU64 flag 3832 ) 3833 { 3834 // For checking contiguity, use the memdescGetContiguity() api 3835 NV_ASSERT(flag != MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS); 3836 // GPU_IN_RESET is set/got from top level memdesc always. 3837 if (flag != MEMDESC_FLAGS_GPU_IN_RESET) 3838 { 3839 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 3840 } 3841 return !!(pMemDesc->_flags & flag); 3842 } 3843 3844 /*! 3845 * @brief Set value of specified flag 3846 * 3847 * @param[in] pMemDesc Memory descriptor pointer 3848 * @param[in] flag MEMDESC_FLAGS_* value 3849 * @param[in] bValue Boolean value of flag 3850 * 3851 * @returns nothing 3852 */ 3853 void memdescSetFlag 3854 ( 3855 MEMORY_DESCRIPTOR *pMemDesc, 3856 NvU64 flag, 3857 NvBool bValue 3858 ) 3859 { 3860 // For setting contiguity, use the memdescSetContiguity() api 3861 NV_ASSERT(flag != MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS); 3862 3863 // GPU_IN_RESET is set/got from top level memdesc always. 3864 if (flag != MEMDESC_FLAGS_GPU_IN_RESET) 3865 { 3866 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 3867 } 3868 3869 if (flag == MEMDESC_FLAGS_OWNED_BY_CURRENT_DEVICE) 3870 { 3871 NV_ASSERT_OK(_memdescSetSubAllocatorFlag(pMemDesc->pGpu, pMemDesc, bValue)); 3872 return; 3873 } 3874 else if (flag == MEMDESC_FLAGS_GUEST_ALLOCATED) 3875 { 3876 NV_ASSERT_OK(_memdescSetGuestAllocatedFlag(pMemDesc->pGpu, pMemDesc, bValue)); 3877 return; 3878 } 3879 3880 if (bValue) 3881 pMemDesc->_flags |= flag; 3882 else 3883 pMemDesc->_flags &= ~flag; 3884 } 3885 3886 /*! 3887 * @brief Return memory descriptor address pointer 3888 * 3889 * The address value is returned by osAllocPages 3890 * 3891 * @param[in] pMemDesc Memory descriptor pointer 3892 * 3893 * @returns Memory descriptor address pointer 3894 */ 3895 NvP64 memdescGetAddress 3896 ( 3897 MEMORY_DESCRIPTOR *pMemDesc 3898 ) 3899 { 3900 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 3901 return pMemDesc->_address; 3902 } 3903 3904 /*! 3905 * @brief Set memory descriptor address pointer 3906 * 3907 * The address value is returned by osAllocPages 3908 * 3909 * @param[in] pMemDesc Memory descriptor pointer 3910 * @param[in] pAddress Pointer to address information 3911 * 3912 * @returns nothing 3913 */ 3914 void memdescSetAddress 3915 ( 3916 MEMORY_DESCRIPTOR *pMemDesc, 3917 NvP64 pAddress 3918 ) 3919 { 3920 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 3921 pMemDesc->_address = pAddress; 3922 } 3923 3924 /*! 3925 * @brief Get memory descriptor os-specific memory data pointer 3926 * 3927 * The pMemData value is returned by osAllocPages 3928 * 3929 * @param[in] pMemDesc Memory descriptor pointer 3930 * 3931 * @returns Memory data pointer 3932 */ 3933 void *memdescGetMemData 3934 ( 3935 MEMORY_DESCRIPTOR *pMemDesc 3936 ) 3937 { 3938 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 3939 return pMemDesc->_pMemData; 3940 } 3941 3942 /*! 3943 * @brief Set memory descriptor os-specific memory data pointer 3944 * 3945 * The pMemData value is returned by osAllocPages 3946 * 3947 * @param[in] pMemDesc Memory descriptor pointer 3948 * @param[in] pMemData Pointer to new os-specific memory data 3949 * @param[in] pMemDataReleaseCallback Pointer to CB to be called when memdesc 3950 * is freed. 3951 * 3952 * @returns nothing 3953 */ 3954 void memdescSetMemData 3955 ( 3956 MEMORY_DESCRIPTOR *pMemDesc, 3957 void *pMemData, 3958 MEM_DATA_RELEASE_CALL_BACK *pMemDataReleaseCallback 3959 ) 3960 { 3961 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 3962 pMemDesc->_pMemData = pMemData; 3963 pMemDesc->_pMemDataReleaseCallback = pMemDataReleaseCallback; 3964 } 3965 3966 /*! 3967 * @brief Return memory descriptor volatile attribute 3968 * 3969 * @param[in] pMemDesc Memory descriptor pointer 3970 * 3971 * @returns Volatile or not 3972 */ 3973 NvBool memdescGetVolatility 3974 ( 3975 PMEMORY_DESCRIPTOR pMemDesc 3976 ) 3977 { 3978 NvBool bVolatile = NV_FALSE; 3979 3980 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 3981 if (pMemDesc->_addressSpace == ADDR_SYSMEM) 3982 { 3983 bVolatile = (memdescGetGpuCacheAttrib(pMemDesc) == NV_MEMORY_UNCACHED); 3984 } 3985 else 3986 { 3987 NV_ASSERT(pMemDesc->_addressSpace == ADDR_FBMEM); 3988 } 3989 3990 return bVolatile; 3991 } 3992 3993 /*! 3994 * @brief Quick check whether the memory is contiguous or not 3995 * 3996 * @param[in] pMemDesc Memory descriptor used 3997 * @param[in] addressTranslation Address translation identifier 3998 * 3999 * @returns NV_TRUE if contiguous 4000 */ 4001 NvBool memdescGetContiguity(PMEMORY_DESCRIPTOR pMemDesc, ADDRESS_TRANSLATION addressTranslation) 4002 { 4003 return !!(pMemDesc->_flags & MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS); 4004 } 4005 4006 /*! 4007 * @brief Detailed Check whether the memory is contiguous or not 4008 * 4009 * @param[in] pMemDesc Memory descriptor used 4010 * @param[in] addressTranslation Address translation identifier 4011 * 4012 * @returns NV_TRUE if contiguous 4013 */ 4014 NvBool memdescCheckContiguity(PMEMORY_DESCRIPTOR pMemDesc, ADDRESS_TRANSLATION addressTranslation) 4015 { 4016 NvU32 i; 4017 4018 if (!(pMemDesc->_flags & MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS)) 4019 { 4020 for (i = 0; i < (pMemDesc->PageCount - 1); i++) 4021 { 4022 if ((memdescGetPte(pMemDesc, addressTranslation, i) + pMemDesc->pageArrayGranularity) != 4023 memdescGetPte(pMemDesc, addressTranslation, i + 1)) 4024 return NV_FALSE; 4025 } 4026 } 4027 4028 return NV_TRUE; 4029 } 4030 4031 /*! 4032 * @brief Set the contiguity of the memory descriptor 4033 * 4034 * @param[in] pMemDesc Memory descriptor used 4035 * @param[in] addressTranslation Address translation identifier 4036 * @param[in] isContiguous Contiguity value 4037 * 4038 * @returns nothing 4039 */ 4040 void memdescSetContiguity(PMEMORY_DESCRIPTOR pMemDesc, ADDRESS_TRANSLATION addressTranslation, NvBool isContiguous) 4041 { 4042 NV_ASSERT_OR_RETURN_VOID(pMemDesc); 4043 4044 if (isContiguous) 4045 pMemDesc->_flags |= MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS; 4046 else 4047 pMemDesc->_flags &= ~MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS; 4048 } 4049 4050 /*! 4051 * @brief Get the address space of the memory descriptor 4052 * 4053 * @param[in] pMemDesc Memory descriptor used 4054 * @param[in] addressTranslation Address translation identifier 4055 * 4056 * @returns addresspace 4057 */ 4058 NV_ADDRESS_SPACE memdescGetAddressSpace(PMEMORY_DESCRIPTOR pMemDesc) 4059 { 4060 NV_ASSERT_OR_RETURN(pMemDesc != NULL, 0); 4061 return pMemDesc->_addressSpace; 4062 } 4063 4064 /*! 4065 * @brief Get page size 4066 * 4067 * @param[in] pMemDesc Memory descriptor pointer 4068 * @param[in] addressTranslation Address translation identifier 4069 * 4070 * @returns Current page size. 4071 */ 4072 NvU64 memdescGetPageSize 4073 ( 4074 PMEMORY_DESCRIPTOR pMemDesc, 4075 ADDRESS_TRANSLATION addressTranslation 4076 ) 4077 { 4078 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 4079 return pMemDesc->_pageSize; 4080 } 4081 4082 /*! 4083 * @brief Set page size 4084 * 4085 * @param[in] pMemDesc Memory descriptor pointer 4086 * @param[in] addressTranslation Address translation identifier 4087 * @param[in] pteKind New PTE kind 4088 * 4089 * @returns nothing 4090 */ 4091 void memdescSetPageSize 4092 ( 4093 PMEMORY_DESCRIPTOR pMemDesc, 4094 ADDRESS_TRANSLATION addressTranslation, 4095 NvU64 pageSize 4096 ) 4097 { 4098 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 4099 pMemDesc->_pageSize = pageSize; 4100 } 4101 4102 /*! 4103 * @brief Get the Root memory descriptor. 4104 * 4105 * This can also be used to get the root offset as well. 4106 * 4107 * Root memory descriptor is the top level memory descriptor with no parent, 4108 * from which this memory descriptor was derived 4109 * 4110 * @param[in] pMemDesc Pointer to memory descriptor. 4111 * @param[out] pRootOffset Pointer to the root offset parameter. 4112 * 4113 * @returns the Root memory descriptor. 4114 */ 4115 PMEMORY_DESCRIPTOR memdescGetRootMemDesc 4116 ( 4117 PMEMORY_DESCRIPTOR pMemDesc, 4118 NvU64 *pRootOffset 4119 ) 4120 { 4121 NvU64 offset = 0; 4122 4123 // Find the top-level parent descriptor 4124 while (pMemDesc->_pParentDescriptor) 4125 { 4126 // Sanity check, None of the child descriptors should be allocated 4127 NV_ASSERT(!pMemDesc->Allocated); 4128 offset += pMemDesc->subMemOffset; 4129 pMemDesc = pMemDesc->_pParentDescriptor; 4130 } 4131 4132 if (pRootOffset) 4133 { 4134 *pRootOffset = offset; 4135 } 4136 4137 return pMemDesc; 4138 } 4139 /*! 4140 * @brief Sets the CUSTOM_HEAP flag of MEMDESC. 4141 * 4142 * Since we have ACR region, Memory descriptor can be allocated in ACR region 4143 * in that case, we need to set this flag since we are using the custom ACR HEAP 4144 * 4145 * @param[in] pMemDesc Pointer to memory descriptor. 4146 * 4147 * @returns void. 4148 */ 4149 void 4150 memdescSetCustomHeap 4151 ( 4152 PMEMORY_DESCRIPTOR pMemDesc 4153 ) 4154 { 4155 NV_ASSERT(0); 4156 } 4157 4158 /*! 4159 * @brief Returns the ACR CUSTOM_HEAP flag. 4160 * 4161 * 4162 * @param[in] pMemDesc Pointer to memory descriptor. 4163 * 4164 * @returns NV_TRUE if flag MEMDESC_FLAGS_CUSTOM_HEAP_ACR is SET. 4165 */ 4166 NvBool 4167 memdescGetCustomHeap 4168 ( 4169 PMEMORY_DESCRIPTOR pMemDesc 4170 ) 4171 { 4172 return NV_FALSE; 4173 } 4174 4175 PIOVAMAPPING memdescGetIommuMap 4176 ( 4177 PMEMORY_DESCRIPTOR pMemDesc, 4178 NvU32 iovaspaceId 4179 ) 4180 { 4181 PIOVAMAPPING pIommuMap = pMemDesc->_pIommuMappings; 4182 while (pIommuMap != NULL) 4183 { 4184 if (pIommuMap->iovaspaceId == iovaspaceId) 4185 { 4186 break; 4187 } 4188 4189 pIommuMap = pIommuMap->pNext; 4190 } 4191 4192 return pIommuMap; 4193 } 4194 4195 NV_STATUS memdescAddIommuMap 4196 ( 4197 PMEMORY_DESCRIPTOR pMemDesc, 4198 PIOVAMAPPING pIommuMap 4199 ) 4200 { 4201 NV_ASSERT_OR_RETURN((pMemDesc->_pIommuMappings == NULL) || 4202 (!memdescIsSubMemoryMemDesc(pMemDesc)), NV_ERR_INVALID_ARGUMENT); 4203 4204 // 4205 // Only root physical memdescs can have multiple IOMMU mappings. 4206 // Submemdescs can only have one, and the list linkage is used 4207 // instead to link it as a child of the root IOMMU mapping, so we 4208 // don't want to overwrite that here. 4209 // 4210 if (!memdescIsSubMemoryMemDesc(pMemDesc)) 4211 { 4212 pIommuMap->pNext = pMemDesc->_pIommuMappings; 4213 } 4214 4215 pMemDesc->_pIommuMappings = pIommuMap; 4216 4217 return NV_OK; 4218 } 4219 4220 void memdescRemoveIommuMap 4221 ( 4222 PMEMORY_DESCRIPTOR pMemDesc, 4223 PIOVAMAPPING pIommuMap 4224 ) 4225 { 4226 // 4227 // Only root physical memdescs can have multiple IOMMU mappings. 4228 // Submemdescs can only have one, and the list linkage is used 4229 // instead to link it as a child of the root IOMMU mapping, so we 4230 // don't want to overwrite that here. 4231 // 4232 if (!memdescIsSubMemoryMemDesc(pMemDesc)) 4233 { 4234 PIOVAMAPPING *ppTmpIommuMap = &pMemDesc->_pIommuMappings; 4235 while ((*ppTmpIommuMap != NULL) && (*ppTmpIommuMap != pIommuMap)) 4236 { 4237 ppTmpIommuMap = &(*ppTmpIommuMap)->pNext; 4238 } 4239 4240 if (*ppTmpIommuMap != NULL) 4241 { 4242 *ppTmpIommuMap = pIommuMap->pNext; 4243 4244 } 4245 else 4246 { 4247 NV_ASSERT(*ppTmpIommuMap != NULL); 4248 } 4249 } 4250 else if (pMemDesc->_pIommuMappings == pIommuMap) 4251 { 4252 pMemDesc->_pIommuMappings = NULL; 4253 } 4254 else 4255 { 4256 // 4257 // Trying to remove a submemory mapping that doesn't belong to this 4258 // descriptor? 4259 // 4260 NV_ASSERT(pMemDesc->_pIommuMappings == pIommuMap); 4261 } 4262 } 4263 4264 NV_STATUS memdescMapIommu 4265 ( 4266 PMEMORY_DESCRIPTOR pMemDesc, 4267 NvU32 iovaspaceId 4268 ) 4269 { 4270 #if (RMCFG_FEATURE_PLATFORM_UNIX || RMCFG_FEATURE_PLATFORM_MODS) && !NVCPU_IS_ARM 4271 if (iovaspaceId != NV_IOVA_DOMAIN_NONE) 4272 { 4273 NV_ADDRESS_SPACE addrSpace = memdescGetAddressSpace(pMemDesc); 4274 OBJGPU *pMappingGpu = gpumgrGetGpuFromId(iovaspaceId); 4275 PMEMORY_DESCRIPTOR pRootMemDesc = memdescGetRootMemDesc(pMemDesc, NULL); 4276 if ((addrSpace == ADDR_SYSMEM) || gpumgrCheckIndirectPeer(pMappingGpu, pRootMemDesc->pGpu)) 4277 { 4278 NV_STATUS status; 4279 OBJIOVASPACE *pIOVAS = iovaspaceFromId(iovaspaceId); 4280 NV_ASSERT_OR_RETURN(pIOVAS, NV_ERR_OBJECT_NOT_FOUND); 4281 4282 status = iovaspaceAcquireMapping(pIOVAS, pMemDesc); 4283 NV_ASSERT_OR_RETURN(status == NV_OK, status); 4284 } 4285 } 4286 #endif 4287 4288 // 4289 // Verify that the final physical addresses are indeed addressable by the 4290 // GPU. We only need to do this for internally allocated sysmem (RM owned) 4291 // as well as externally allocated/mapped sysmem. Note, addresses for peer 4292 // (P2P mailbox registers) BARs are actually not handled by the GMMU and 4293 // support a full 64-bit address width, hence validation is not needed. 4294 // 4295 if ((pMemDesc->Allocated || 4296 memdescGetFlag(pMemDesc, MEMDESC_FLAGS_EXT_PAGE_ARRAY_MEM) || 4297 memdescGetFlag(pMemDesc, MEMDESC_FLAGS_PEER_IO_MEM)) && 4298 memdescGetAddressSpace(pMemDesc) == ADDR_SYSMEM) 4299 { 4300 // TODO This should look up the GPU corresponding to the IOVAS instead. 4301 OBJGPU *pGpu = pMemDesc->pGpu; 4302 RmPhysAddr dmaWindowStartAddr = gpuGetDmaStartAddress(pGpu); 4303 RmPhysAddr dmaWindowEndAddr = gpuGetDmaEndAddress_HAL(pGpu); 4304 RmPhysAddr physAddr; 4305 4306 if (memdescGetContiguity(pMemDesc, AT_GPU)) 4307 { 4308 physAddr = memdescGetPhysAddr(pMemDesc, AT_GPU, 0); 4309 if ((physAddr < dmaWindowStartAddr) || 4310 (physAddr + pMemDesc->Size - 1 > dmaWindowEndAddr)) 4311 { 4312 NV_PRINTF(LEVEL_ERROR, 4313 "0x%llx-0x%llx is not addressable by GPU 0x%x [0x%llx-0x%llx]\n", 4314 physAddr, physAddr + pMemDesc->Size - 1, 4315 pGpu->gpuId, dmaWindowStartAddr, dmaWindowEndAddr); 4316 memdescUnmapIommu(pMemDesc, iovaspaceId); 4317 return NV_ERR_INVALID_ADDRESS; 4318 } 4319 } 4320 else 4321 { 4322 NvU32 i; 4323 for (i = 0; i < pMemDesc->PageCount; i++) 4324 { 4325 physAddr = memdescGetPte(pMemDesc, AT_GPU, i); 4326 if ((physAddr < dmaWindowStartAddr) || 4327 (physAddr + (pMemDesc->pageArrayGranularity - 1) > dmaWindowEndAddr)) 4328 { 4329 NV_PRINTF(LEVEL_ERROR, 4330 "0x%llx is not addressable by GPU 0x%x [0x%llx-0x%llx]\n", 4331 physAddr, pGpu->gpuId, dmaWindowStartAddr, 4332 dmaWindowEndAddr); 4333 memdescUnmapIommu(pMemDesc, iovaspaceId); 4334 return NV_ERR_INVALID_ADDRESS; 4335 } 4336 } 4337 } 4338 } 4339 4340 return NV_OK; 4341 } 4342 4343 void memdescUnmapIommu 4344 ( 4345 PMEMORY_DESCRIPTOR pMemDesc, 4346 NvU32 iovaspaceId 4347 ) 4348 { 4349 #if (RMCFG_FEATURE_PLATFORM_UNIX || RMCFG_FEATURE_PLATFORM_MODS) && !NVCPU_IS_ARM 4350 PIOVAMAPPING pIovaMapping; 4351 OBJIOVASPACE *pIOVAS; 4352 4353 if (iovaspaceId == NV_IOVA_DOMAIN_NONE) 4354 return; 4355 4356 pIovaMapping = memdescGetIommuMap(pMemDesc, iovaspaceId); 4357 NV_ASSERT(pIovaMapping); 4358 4359 pIOVAS = iovaspaceFromMapping(pIovaMapping); 4360 iovaspaceReleaseMapping(pIOVAS, pIovaMapping); 4361 #endif 4362 } 4363 4364 void memdescCheckSubDevicePageSizeConsistency 4365 ( 4366 OBJGPU *pGpu, 4367 PMEMORY_DESCRIPTOR pMemDesc, 4368 OBJVASPACE *pVAS, 4369 NvU64 pageSize, 4370 NvU64 pageOffset 4371 ) 4372 { 4373 NvU64 tempPageSize, tempPageOffset; 4374 PMEMORY_DESCRIPTOR pTempMemDesc = NULL; 4375 4376 SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY) 4377 pTempMemDesc = memdescGetMemDescFromGpu(pMemDesc, pGpu); 4378 tempPageSize = memdescGetPageSize(pTempMemDesc, VAS_ADDRESS_TRANSLATION(pVAS)); 4379 tempPageOffset = memdescGetPhysAddr(pTempMemDesc, VAS_ADDRESS_TRANSLATION(pVAS), 0) & (tempPageSize - 1); 4380 4381 // Assert if inconsistent 4382 NV_ASSERT(pageSize == tempPageSize); 4383 NV_ASSERT(pageOffset == tempPageOffset); 4384 SLI_LOOP_END 4385 } 4386 4387 void memdescCheckSubDeviceMemContiguityConsistency 4388 ( 4389 OBJGPU *pGpu, 4390 PMEMORY_DESCRIPTOR pMemDesc, 4391 OBJVASPACE *pVAS, 4392 NvBool bIsMemContiguous 4393 ) 4394 { 4395 NvBool bTempIsMemContiguous = NV_FALSE; 4396 4397 SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY) 4398 bTempIsMemContiguous = memdescGetContiguity(memdescGetMemDescFromGpu(pMemDesc, pGpu), VAS_ADDRESS_TRANSLATION(pVAS)); 4399 // Assert if inconsistent 4400 NV_ASSERT(bIsMemContiguous == bTempIsMemContiguous); 4401 SLI_LOOP_END 4402 } 4403 4404 NV_STATUS memdescCheckSubDeviceKindComprConsistency 4405 ( 4406 OBJGPU *pGpu, 4407 MEMORY_DESCRIPTOR *pMemDesc, 4408 OBJVASPACE *pVAS, 4409 NvU32 kind, 4410 COMPR_INFO *pComprInfo 4411 ) 4412 { 4413 SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY) 4414 { 4415 MemoryManager *MemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 4416 NvU32 tempKind; 4417 COMPR_INFO tempComprInfo = {0}; 4418 NV_STATUS status; 4419 4420 status = memmgrGetKindComprFromMemDesc(MemoryManager, 4421 memdescGetMemDescFromGpu(pMemDesc, pGpu), 4422 0, 4423 &tempKind, &tempComprInfo); 4424 4425 if (NV_OK != status) 4426 SLI_LOOP_RETURN(status); 4427 4428 // Assert if inconsistent 4429 NV_ASSERT(kind == tempKind); 4430 NV_ASSERT(tempComprInfo.compPageShift == pComprInfo->compPageShift && 4431 tempComprInfo.kind == pComprInfo->kind && 4432 tempComprInfo.compPageIndexLo == pComprInfo->compPageIndexLo && 4433 tempComprInfo.compPageIndexHi == pComprInfo->compPageIndexHi && 4434 tempComprInfo.compTagLineMin == pComprInfo->compTagLineMin && 4435 tempComprInfo.compTagLineMultiplier == pComprInfo->compTagLineMultiplier); 4436 } 4437 SLI_LOOP_END 4438 4439 return NV_OK; 4440 } 4441 4442 /* @brief Get GPA(guest physical addresses) for given GPU physical addresses. 4443 * 4444 * @param[in] pGpu GPU for which GPAs are needed 4445 * @param[in] pageCount Size of array. Should be 1 for contiguous mappings 4446 * @param[in/out] pGpa Array of GPU PAs to be converted to guest PAs 4447 * 4448 * @returns NV_STATUS 4449 */ 4450 NV_STATUS memdescGetNvLinkGpa 4451 ( 4452 OBJGPU *pGpu, 4453 NvU64 pageCount, 4454 RmPhysAddr *pGpa 4455 ) 4456 { 4457 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu); 4458 4459 NV_ASSERT_OR_RETURN(pGpa, NV_ERR_INVALID_ARGUMENT); 4460 4461 NvU32 pageIndex; 4462 // For each page, do the GPU PA to GPA conversion 4463 for (pageIndex = 0; pageIndex < pageCount; pageIndex++) 4464 { 4465 pGpa[pageIndex] += pKernelMemorySystem->coherentCpuFbBase; 4466 } 4467 4468 return NV_OK; 4469 } 4470 4471 NV_STATUS 4472 memdescSetCtxBufPool 4473 ( 4474 PMEMORY_DESCRIPTOR pMemDesc, 4475 CTX_BUF_POOL_INFO *pCtxBufPool 4476 ) 4477 { 4478 4479 NV_ASSERT_OR_RETURN(!pMemDesc->Allocated, NV_ERR_INVALID_STATE); 4480 NV_ASSERT_OR_RETURN(!memdescHasSubDeviceMemDescs(pMemDesc), NV_ERR_INVALID_ARGUMENT); 4481 4482 pMemDesc->pCtxBufPool = pCtxBufPool; 4483 return NV_OK; 4484 } 4485 4486 CTX_BUF_POOL_INFO* 4487 memdescGetCtxBufPool 4488 ( 4489 PMEMORY_DESCRIPTOR pMemDesc 4490 ) 4491 { 4492 NV_ASSERT_OR_RETURN(!memdescHasSubDeviceMemDescs(pMemDesc), NULL); 4493 return pMemDesc->pCtxBufPool; 4494 } 4495 4496 /*! 4497 * @brief Override the registry INST_LOC two-bit enum to an aperture (list) + cpu attr. 4498 * 4499 * Caller must set initial default values. 4500 */ 4501 void 4502 memdescOverrideInstLocList 4503 ( 4504 NvU32 instLoc, // NV_REG_STR_RM_INST_LOC 4505 const char *name, 4506 const NV_ADDRESS_SPACE **ppAllocList, 4507 NvU32 *pCpuMappingAttr 4508 ) 4509 { 4510 switch (instLoc) 4511 { 4512 case NV_REG_STR_RM_INST_LOC_COH: 4513 NV_PRINTF(LEVEL_INFO, "using coh system memory for %s\n", name); 4514 *ppAllocList = ADDRLIST_SYSMEM_ONLY; 4515 *pCpuMappingAttr = NV_MEMORY_CACHED; 4516 break; 4517 case NV_REG_STR_RM_INST_LOC_NCOH: 4518 NV_PRINTF(LEVEL_INFO, "using ncoh system memory for %s\n", name); 4519 *ppAllocList = ADDRLIST_SYSMEM_ONLY; 4520 *pCpuMappingAttr = NV_MEMORY_UNCACHED; 4521 break; 4522 case NV_REG_STR_RM_INST_LOC_VID: 4523 NV_PRINTF(LEVEL_INFO, "using video memory for %s\n", name); 4524 *ppAllocList = ADDRLIST_FBMEM_ONLY; 4525 *pCpuMappingAttr = NV_MEMORY_WRITECOMBINED; 4526 break; 4527 case NV_REG_STR_RM_INST_LOC_DEFAULT: 4528 default: 4529 // Do not update parameters 4530 break; 4531 } 4532 } 4533 4534 /*! 4535 * @brief Override wrapper for callers needed an aperture 4536 */ 4537 void 4538 memdescOverrideInstLoc 4539 ( 4540 NvU32 instLoc, 4541 const char *name, 4542 NV_ADDRESS_SPACE *pAddrSpace, 4543 NvU32 *pCpuMappingAttr 4544 ) 4545 { 4546 const NV_ADDRESS_SPACE *pAllocList = NULL; 4547 4548 memdescOverrideInstLocList(instLoc, name, &pAllocList, pCpuMappingAttr); 4549 if (pAllocList != NULL) 4550 *pAddrSpace = pAllocList[0]; 4551 } 4552 /*! 4553 * @brief override physical address width 4554 * 4555 * address width to be override in bits. 4556 * @param[in] pGpu 4557 * @param[in] pMemDesc Memory descriptor to update 4558 * @param[in] addresswidth Offset to refer to 4559 * 4560 * @returns nothing 4561 */ 4562 void 4563 memdescOverridePhysicalAddressWidthWindowsWAR 4564 ( 4565 OBJGPU *pGpu, 4566 MEMORY_DESCRIPTOR *pMemDesc, 4567 NvU32 addressWidth 4568 ) 4569 { 4570 return; 4571 } 4572 4573 /*! 4574 * @brief Register MEMDESC to GSP 4575 * Life of the registration: until memdescDeregisterFromGSP is called, 4576 * always occurs when the memory is freed. 4577 * <GSP-TODO> Have argument as pMemory*; Move to NVOC 4578 * 4579 * @param[in] pGpu 4580 * @param[in] hClient NvHandle 4581 * @param[in] hDevice NvHandle 4582 * @param[in] hMemory NvHandle 4583 * 4584 * @returns NV_STATUS 4585 */ 4586 NV_STATUS 4587 memdescRegisterToGSP 4588 ( 4589 OBJGPU *pGpu, 4590 NvHandle hClient, 4591 NvHandle hParent, 4592 NvHandle hMemory 4593 ) 4594 { 4595 NV_STATUS status = NV_OK; 4596 Memory *pMemory = NULL; 4597 RsResourceRef *pMemoryRef = NULL; 4598 MEMORY_DESCRIPTOR *pMemDesc = NULL; 4599 NvU32 hClass; 4600 4601 // Nothing to do without GSP 4602 if (!IS_GSP_CLIENT(pGpu)) 4603 { 4604 return NV_OK; 4605 } 4606 4607 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, serverutilGetResourceRef(hClient, hMemory, &pMemoryRef)); 4608 4609 pMemory = dynamicCast(pMemoryRef->pResource, Memory); 4610 NV_CHECK_OR_RETURN(LEVEL_ERROR, pMemory != NULL, NV_ERR_INVALID_OBJECT); 4611 4612 pMemDesc = pMemory->pMemDesc; 4613 4614 // Check: memory already registered 4615 if ((pMemDesc->_flags & MEMDESC_FLAGS_REGISTERED_TO_GSP) != 0) 4616 { 4617 return NV_OK; 4618 } 4619 4620 // Check: no subdevice memDescs 4621 NV_CHECK_OR_RETURN(LEVEL_ERROR, 4622 !memdescHasSubDeviceMemDescs(pMemDesc), 4623 NV_ERR_INVALID_STATE); 4624 4625 // Check: SYSMEM or FBMEM only 4626 if (memdescGetAddressSpace(pMemDesc) == ADDR_FBMEM) 4627 hClass = NV01_MEMORY_LIST_FBMEM; 4628 else if (memdescGetAddressSpace(pMemDesc) == ADDR_SYSMEM) 4629 hClass = NV01_MEMORY_LIST_SYSTEM; 4630 else 4631 return NV_ERR_INVALID_STATE; 4632 4633 NvU32 os02Flags = 0; 4634 4635 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, 4636 RmDeprecatedConvertOs32ToOs02Flags(pMemory->Attr, 4637 pMemory->Attr2, 4638 pMemory->Flags, 4639 &os02Flags)); 4640 NV_RM_RPC_ALLOC_MEMORY(pGpu, 4641 hClient, 4642 hParent, 4643 hMemory, 4644 hClass, 4645 os02Flags, 4646 pMemDesc, 4647 status); 4648 4649 if (status == NV_OK) 4650 { 4651 // Mark memory as registered in GSP 4652 pMemDesc->_flags |= MEMDESC_FLAGS_REGISTERED_TO_GSP; 4653 } 4654 4655 return status; 4656 } 4657 4658 4659 /*! 4660 * @brief Deregister MEMDESC from GSP 4661 * Is always called when the memory is freed. 4662 * <GSP-TODO> Have argument as pMemory*; Move to NVOC 4663 * 4664 * @param[in] pGpu 4665 * @param[in] hClient NvHandle 4666 * @param[in] hParent NvHandle 4667 * @param[in] hMemory NvHandle 4668 * 4669 * @returns NV_STATUS 4670 */ 4671 NV_STATUS 4672 memdescDeregisterFromGSP 4673 ( 4674 OBJGPU *pGpu, 4675 NvHandle hClient, 4676 NvHandle hParent, 4677 NvHandle hMemory 4678 ) 4679 { 4680 NV_STATUS status = NV_OK; 4681 Memory *pMemory = NULL; 4682 RsResourceRef *pMemoryRef = NULL; 4683 MEMORY_DESCRIPTOR *pMemDesc = NULL; 4684 4685 // Nothing to do without GSP 4686 if ((pGpu == NULL) || 4687 !IS_GSP_CLIENT(pGpu)) 4688 { 4689 return NV_OK; 4690 } 4691 4692 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, serverutilGetResourceRef(hClient, hMemory, &pMemoryRef)); 4693 4694 pMemory = dynamicCast(pMemoryRef->pResource, Memory); 4695 NV_CHECK_OR_RETURN(LEVEL_ERROR, pMemory != NULL, NV_ERR_INVALID_OBJECT); 4696 4697 pMemDesc = pMemory->pMemDesc; 4698 4699 // Nothing to do if memory is not registered to GSP 4700 if ((pMemDesc == NULL) || 4701 (pMemDesc->_flags & MEMDESC_FLAGS_REGISTERED_TO_GSP) == 0) 4702 { 4703 return NV_OK; 4704 } 4705 4706 NV_RM_RPC_FREE(pGpu, 4707 hClient, 4708 hParent, 4709 hMemory, 4710 status); 4711 4712 if (status == NV_OK) 4713 { 4714 // Mark memory as not registered in GSP 4715 pMemDesc->_flags &= ~MEMDESC_FLAGS_REGISTERED_TO_GSP; 4716 } 4717 4718 return status; 4719 } 4720 4721 void 4722 memdescSetName(OBJGPU *pGpu, MEMORY_DESCRIPTOR *pMemDesc, const char *name, const char* suffix) 4723 { 4724 return; 4725 } 4726 4727 NV_STATUS 4728 memdescSendMemDescToGSP(OBJGPU *pGpu, MEMORY_DESCRIPTOR *pMemDesc, NvHandle *pHandle) 4729 { 4730 NV_STATUS status = NV_OK; 4731 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 4732 NvU32 flags = 0; 4733 NvU32 index = 0; 4734 NvU32 hClass; 4735 NvU64 *pageNumberList = NULL; 4736 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 4737 NV_MEMORY_LIST_ALLOCATION_PARAMS listAllocParams = {0}; 4738 4739 // Nothing to do without GSP 4740 if (!IS_GSP_CLIENT(pGpu)) 4741 { 4742 return NV_OK; 4743 } 4744 4745 switch (memdescGetAddressSpace(pMemDesc)) 4746 { 4747 4748 case ADDR_FBMEM: 4749 hClass = NV01_MEMORY_LIST_FBMEM; 4750 break; 4751 4752 case ADDR_SYSMEM: 4753 hClass = NV01_MEMORY_LIST_SYSTEM; 4754 break; 4755 4756 default: 4757 return NV_ERR_NOT_SUPPORTED; 4758 } 4759 4760 // Initialize parameters with pMemDesc information 4761 listAllocParams.pteAdjust = pMemDesc->PteAdjust; 4762 listAllocParams.format = memdescGetPteKind(pMemDesc); 4763 listAllocParams.size = pMemDesc->Size; 4764 listAllocParams.hClient = NV01_NULL_OBJECT; 4765 listAllocParams.hParent = NV01_NULL_OBJECT; 4766 listAllocParams.hObject = NV01_NULL_OBJECT; 4767 listAllocParams.limit = pMemDesc->Size - 1; 4768 listAllocParams.flagsOs02 = (DRF_DEF(OS02,_FLAGS,_MAPPING,_NO_MAP) | 4769 (flags & DRF_SHIFTMASK(NVOS02_FLAGS_COHERENCY))); 4770 4771 // Handle pageCount based on pMemDesc contiguity 4772 if (!memdescGetContiguity(pMemDesc, AT_GPU)) 4773 { 4774 listAllocParams.flagsOs02 |= DRF_DEF(OS02,_FLAGS,_PHYSICALITY,_NONCONTIGUOUS); 4775 listAllocParams.pageCount = pMemDesc->PageCount; 4776 } 4777 else 4778 { 4779 listAllocParams.pageCount = 1; 4780 } 4781 4782 4783 // Initialize pageNumberList 4784 pageNumberList = portMemAllocNonPaged(sizeof(NvU64) * listAllocParams.pageCount); 4785 for (index = 0; index < listAllocParams.pageCount; index++) 4786 pageNumberList[index] = memdescGetPte(pMemDesc, AT_GPU, index) >> RM_PAGE_SHIFT; 4787 listAllocParams.pageNumberList = pageNumberList; 4788 4789 // Create MemoryList object 4790 NV_ASSERT_OK_OR_GOTO(status, 4791 pRmApi->Alloc(pRmApi, 4792 pMemoryManager->hClient, 4793 pMemoryManager->hSubdevice, 4794 pHandle, 4795 hClass, 4796 &listAllocParams, 4797 sizeof(listAllocParams)), 4798 end); 4799 4800 // Register MemoryList object to GSP 4801 NV_ASSERT_OK_OR_GOTO(status, 4802 memdescRegisterToGSP(pGpu, 4803 pMemoryManager->hClient, 4804 pMemoryManager->hSubdevice, 4805 *pHandle), 4806 end); 4807 4808 end: 4809 if ((status != NV_OK) && (*pHandle != NV01_NULL_OBJECT)) 4810 pRmApi->Free(pRmApi, pMemoryManager->hClient, *pHandle); 4811 4812 if (pageNumberList != NULL) 4813 portMemFree(pageNumberList); 4814 4815 return status; 4816 } 4817 4818 NV_STATUS 4819 memdescSetPageArrayGranularity 4820 ( 4821 MEMORY_DESCRIPTOR *pMemDesc, 4822 NvU64 pageArrayGranularity 4823 ) 4824 { 4825 // Make sure pageArrayGranularity is a power of 2 value. 4826 NV_ASSERT_OR_RETURN((pageArrayGranularity & (pageArrayGranularity - 1)) == 0, NV_ERR_INVALID_ARGUMENT); 4827 4828 // Allow setting the same granularity. 4829 if (pMemDesc->pageArrayGranularity == pageArrayGranularity) 4830 { 4831 return NV_OK; 4832 } 4833 4834 // Make sure setting the page array happens before the pteArray is populated. 4835 NV_ASSERT_OR_RETURN(pMemDesc->_pteArray[0] == 0, NV_ERR_INVALID_STATE); 4836 4837 pMemDesc->pageArrayGranularity = pageArrayGranularity; 4838 4839 return NV_OK; 4840 } 4841