1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "gpu/mem_mgr/mem_mgr.h" 25 #include "gpu/mem_mgr/heap_base.h" 26 #include "gpu/mem_mgr/mem_utils.h" 27 #include "gpu/mem_mgr/virt_mem_allocator_common.h" 28 #include "os/nv_memory_type.h" 29 #include "core/locks.h" 30 #include "ctrl/ctrl2080.h" 31 #include "rmapi/rs_utils.h" 32 33 #include "gpu/bus/kern_bus.h" 34 35 // Memory copy block size for if we need to cut up a mapping 36 #define MEMORY_COPY_BLOCK_SIZE 1024 * 1024 37 38 /* ------------------------ Private functions --------------------------------------- */ 39 40 /*! 41 * @brief This utility routine helps in determining the appropriate 42 * memory transfer technique to be used 43 */ 44 static TRANSFER_TYPE 45 memmgrGetMemTransferType 46 ( 47 MemoryManager *pMemoryManager, 48 TRANSFER_SURFACE *pDst, 49 TRANSFER_SURFACE *pSrc 50 ) 51 { 52 TRANSFER_TYPE transferType = TRANSFER_TYPE_PROCESSOR; 53 OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager); 54 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu); 55 56 // 57 // In case of copy, both dest and src will be passed 58 // In case of memset/memread/memwrite either dest or src will be passed 59 // 60 if ((pDst != NULL) && (pSrc != NULL) && 61 (memdescGetAddressSpace(pDst->pMemDesc) == ADDR_SYSMEM) && 62 (memdescGetAddressSpace(pSrc->pMemDesc) == ADDR_SYSMEM)) 63 { 64 transferType = TRANSFER_TYPE_PROCESSOR; 65 } 66 else if (((pDst != NULL) && 67 (memdescGetAddressSpace(pDst->pMemDesc) == ADDR_SYSMEM)) || 68 ((pSrc != NULL) && 69 (memdescGetAddressSpace(pSrc->pMemDesc) == ADDR_SYSMEM))) 70 { 71 transferType = TRANSFER_TYPE_PROCESSOR; 72 } 73 else if (kbusIsBarAccessBlocked(pKernelBus)) 74 { 75 transferType = TRANSFER_TYPE_GSP_DMA; 76 } 77 return transferType; 78 } 79 80 static NV_STATUS 81 _memmgrAllocAndMapSurface 82 ( 83 OBJGPU *pGpu, 84 NvU64 size, 85 MEMORY_DESCRIPTOR **ppMemDesc, 86 void **ppMap, 87 void **ppPriv 88 ) 89 { 90 NV_STATUS status; 91 NvU64 flags = 0; 92 93 NV_ASSERT_OR_RETURN(ppMemDesc != NULL, NV_ERR_INVALID_ARGUMENT); 94 NV_ASSERT_OR_RETURN(ppMap != NULL, NV_ERR_INVALID_ARGUMENT); 95 NV_ASSERT_OR_RETURN(ppPriv != NULL, NV_ERR_INVALID_ARGUMENT); 96 97 flags = MEMDESC_FLAGS_ALLOC_IN_UNPROTECTED_MEMORY; 98 99 NV_ASSERT_OK_OR_RETURN( 100 memdescCreate(ppMemDesc, pGpu, size, RM_PAGE_SIZE, NV_TRUE, 101 ADDR_SYSMEM, NV_MEMORY_UNCACHED, flags)); 102 103 NV_ASSERT_OK_OR_GOTO(status, memdescAlloc(*ppMemDesc), failed); 104 105 NV_ASSERT_OK_OR_GOTO(status, 106 memdescMapOld(*ppMemDesc, 0, size, NV_TRUE, NV_PROTECT_READ_WRITE, 107 ppMap, ppPriv), 108 failed); 109 110 // Clear surface before use 111 portMemSet(*ppMap, 0, size); 112 113 return NV_OK; 114 failed: 115 memdescFree(*ppMemDesc); 116 memdescDestroy(*ppMemDesc); 117 118 *ppMemDesc = NULL; 119 *ppMap = NULL; 120 *ppPriv = NULL; 121 122 return status; 123 } 124 125 static void 126 _memmgrUnmapAndFreeSurface 127 ( 128 MEMORY_DESCRIPTOR *pMemDesc, 129 void *pMap, 130 void *pPriv 131 ) 132 { 133 memdescUnmapOld(pMemDesc, NV_TRUE, 0, pMap, pPriv); 134 135 memdescFree(pMemDesc); 136 memdescDestroy(pMemDesc); 137 } 138 139 /*! 140 * @brief This function is used for writing/reading data to/from a client 141 * provided buffer from/to some source region in vidmem 142 * 143 * @param[in] pDst TRANSFER_SURFACE info for destination region 144 * @param[in] pBuf Client provided buffer 145 * @param[in] size Size in bytes of the memory transfer 146 * @param[in] bRead TRUE for read and FALSE for write 147 */ 148 static NV_STATUS 149 _memmgrMemReadOrWriteWithGsp 150 ( 151 OBJGPU *pGpu, 152 TRANSFER_SURFACE *pDst, 153 void *pBuf, 154 NvU64 size, 155 NvBool bRead 156 ) 157 { 158 NV2080_CTRL_INTERNAL_MEMMGR_MEMORY_TRANSFER_WITH_GSP_PARAMS gspParams; 159 NV_STATUS status; 160 MEMORY_DESCRIPTOR *pStagingBuf = NULL; 161 void *pStagingBufMap = NULL; 162 void *pStagingBufPriv = NULL; 163 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); 164 165 // Do not expect GSP to be used for reading/writing from/to sysmem 166 if (memdescGetAddressSpace(pDst->pMemDesc) == ADDR_SYSMEM) 167 return NV_ERR_NOT_SUPPORTED; 168 169 // Allocate and map the staging buffer 170 NV_ASSERT_OK_OR_RETURN( 171 _memmgrAllocAndMapSurface(pGpu, size, &pStagingBuf, &pStagingBufMap, 172 &pStagingBufPriv)); 173 174 // Copy the data to staging buffer before poking GSP for copying 175 if (!bRead) 176 portMemCopy(pStagingBufMap, size, pBuf, size); 177 178 // Setup control call params 179 portMemSet(&gspParams, 0, sizeof(gspParams)); 180 181 gspParams.memop = NV2080_CTRL_MEMMGR_MEMORY_OP_MEMCPY; 182 gspParams.transferSize = size; 183 184 if (bRead) 185 { 186 // Source surface in vidmem 187 gspParams.src.baseAddr = memdescGetPhysAddr(pDst->pMemDesc, AT_GPU, 0); 188 gspParams.src.size = memdescGetSize(pDst->pMemDesc); 189 gspParams.src.offset = pDst->offset; 190 gspParams.src.cpuCacheAttrib = memdescGetCpuCacheAttrib(pDst->pMemDesc); 191 gspParams.src.aperture = memdescGetAddressSpace(pDst->pMemDesc); 192 193 // Destination surface in unprotected sysmem 194 gspParams.dst.baseAddr = memdescGetPhysAddr(pStagingBuf, AT_GPU, 0); 195 gspParams.dst.size = memdescGetSize(pStagingBuf); 196 gspParams.dst.offset = 0; 197 gspParams.dst.cpuCacheAttrib = memdescGetCpuCacheAttrib(pStagingBuf); 198 gspParams.dst.aperture = memdescGetAddressSpace(pStagingBuf); 199 } 200 else 201 { 202 // Source surface in unprotected sysmem 203 gspParams.src.baseAddr = memdescGetPhysAddr(pStagingBuf, AT_GPU, 0); 204 gspParams.src.size = memdescGetSize(pStagingBuf); 205 gspParams.src.offset = 0; 206 gspParams.src.cpuCacheAttrib = memdescGetCpuCacheAttrib(pStagingBuf); 207 gspParams.src.aperture = memdescGetAddressSpace(pStagingBuf); 208 209 // Destination surface in vidmem 210 gspParams.dst.baseAddr = memdescGetPhysAddr(pDst->pMemDesc, AT_GPU, 0); 211 gspParams.dst.size = memdescGetSize(pDst->pMemDesc); 212 gspParams.dst.offset = pDst->offset; 213 gspParams.dst.cpuCacheAttrib = memdescGetCpuCacheAttrib(pDst->pMemDesc); 214 gspParams.dst.aperture = memdescGetAddressSpace(pDst->pMemDesc); 215 } 216 217 // Send the control call 218 NV_ASSERT_OK_OR_GOTO(status, 219 pRmApi->Control(pRmApi, 220 pGpu->hInternalClient, 221 pGpu->hInternalSubdevice, 222 NV2080_CTRL_CMD_INTERNAL_MEMMGR_MEMORY_TRANSFER_WITH_GSP, 223 &gspParams, 224 sizeof(gspParams)), 225 failed); 226 227 // Read contents from staging buffer after GSP is done copying 228 if (bRead) 229 portMemCopy(pBuf, size, pStagingBufMap, size); 230 231 failed: 232 _memmgrUnmapAndFreeSurface(pStagingBuf, pStagingBufMap, pStagingBufPriv); 233 return status; 234 } 235 236 /*! 237 * @brief This function is used for copying data b/w two memory regions 238 * using GSP. 239 * 240 * @param[in] pDst TRANSFER_SURFACE info for destination region 241 * @param[in] pSrc TRANSFER_SURFACE info for source region 242 * @param[in] size Size in bytes of the memory transfer 243 */ 244 static NV_STATUS 245 _memmgrMemcpyWithGsp 246 ( 247 OBJGPU *pGpu, 248 TRANSFER_SURFACE *pDst, 249 TRANSFER_SURFACE *pSrc, 250 NvU64 size 251 ) 252 { 253 NV2080_CTRL_INTERNAL_MEMMGR_MEMORY_TRANSFER_WITH_GSP_PARAMS gspParams; 254 NV_STATUS status; 255 MEMORY_DESCRIPTOR *pStagingBuf = NULL; 256 void *pStagingBufMap = NULL; 257 void *pStagingBufPriv = NULL; 258 NvU8 *pMap = NULL; 259 void *pPriv = NULL; 260 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); 261 262 // 263 // Do not expect GSP to be used for copying data b/w two surfaces 264 // in sysmem. For SPT, there is no non-CPR vidmem. So, allow vidmem 265 // to vidmem copies in plain text. For copies b/w CPR and non-CPR 266 // vidmem, encryption/decryption needs to happen at the endpoints. 267 // 268 if (memdescGetAddressSpace(pSrc->pMemDesc) == ADDR_SYSMEM && 269 memdescGetAddressSpace(pDst->pMemDesc) == ADDR_SYSMEM) 270 { 271 return NV_ERR_NOT_SUPPORTED; 272 } 273 274 // Allocate and map the bounce buffer 275 NV_ASSERT_OK_OR_RETURN( 276 _memmgrAllocAndMapSurface(pGpu, size, &pStagingBuf, &pStagingBufMap, 277 &pStagingBufPriv)); 278 279 // Setup control call params 280 portMemSet(&gspParams, 0, sizeof(gspParams)); 281 282 gspParams.memop = NV2080_CTRL_MEMMGR_MEMORY_OP_MEMCPY; 283 gspParams.transferSize = size; 284 285 if (memdescGetAddressSpace(pSrc->pMemDesc) == ADDR_SYSMEM) 286 { 287 NV_ASSERT_OK_OR_GOTO(status, 288 memdescMapOld(pSrc->pMemDesc, 0, size, NV_TRUE, 289 NV_PROTECT_READ_WRITE, (void**)&pMap, &pPriv), 290 failed); 291 292 // Copy to staging buffer 293 portMemCopy(pStagingBufMap, size, pMap + pSrc->offset, size); 294 295 memdescUnmapOld(pSrc->pMemDesc, NV_TRUE, 0, (void*)pMap, pPriv); 296 297 // Source surface in unprotected sysmem 298 gspParams.src.baseAddr = memdescGetPhysAddr(pStagingBuf, AT_GPU, 0); 299 gspParams.src.size = memdescGetSize(pStagingBuf); 300 gspParams.src.offset = 0; 301 gspParams.src.cpuCacheAttrib = memdescGetCpuCacheAttrib(pStagingBuf); 302 gspParams.src.aperture = memdescGetAddressSpace(pStagingBuf); 303 304 // Destination surface in vidmem 305 gspParams.dst.baseAddr = memdescGetPhysAddr(pDst->pMemDesc, AT_GPU, 0); 306 gspParams.dst.size = memdescGetSize(pDst->pMemDesc); 307 gspParams.dst.offset = pDst->offset; 308 gspParams.dst.cpuCacheAttrib = memdescGetCpuCacheAttrib(pDst->pMemDesc); 309 gspParams.dst.aperture = memdescGetAddressSpace(pDst->pMemDesc); 310 } 311 else 312 { 313 // Source surface in vidmem 314 gspParams.src.baseAddr = memdescGetPhysAddr(pSrc->pMemDesc, AT_GPU, 0); 315 gspParams.src.size = memdescGetSize(pSrc->pMemDesc); 316 gspParams.src.offset = pSrc->offset; 317 gspParams.src.cpuCacheAttrib = memdescGetCpuCacheAttrib(pSrc->pMemDesc); 318 gspParams.src.aperture = memdescGetAddressSpace(pSrc->pMemDesc); 319 320 if (memdescGetAddressSpace(pDst->pMemDesc) == ADDR_FBMEM) 321 { 322 // Destination surface in vidmem 323 gspParams.dst.baseAddr = memdescGetPhysAddr(pDst->pMemDesc, AT_GPU, 0); 324 gspParams.dst.size = memdescGetSize(pDst->pMemDesc); 325 gspParams.dst.offset = pDst->offset; 326 gspParams.dst.cpuCacheAttrib = memdescGetCpuCacheAttrib(pDst->pMemDesc); 327 gspParams.dst.aperture = memdescGetAddressSpace(pDst->pMemDesc); 328 } 329 else 330 { 331 // Destination surface in unprotected sysmem 332 gspParams.dst.baseAddr = memdescGetPhysAddr(pStagingBuf, AT_GPU, 0); 333 gspParams.dst.size = memdescGetSize(pStagingBuf); 334 gspParams.dst.offset = 0; 335 gspParams.dst.cpuCacheAttrib = memdescGetCpuCacheAttrib(pStagingBuf); 336 gspParams.dst.aperture = memdescGetAddressSpace(pStagingBuf); 337 } 338 } 339 340 // Send the control call 341 NV_ASSERT_OK_OR_GOTO(status, 342 pRmApi->Control(pRmApi, 343 pGpu->hInternalClient, 344 pGpu->hInternalSubdevice, 345 NV2080_CTRL_CMD_INTERNAL_MEMMGR_MEMORY_TRANSFER_WITH_GSP, 346 &gspParams, 347 sizeof(gspParams)), 348 failed); 349 350 // Copy from staging buffer to destination 351 if (memdescGetAddressSpace(pDst->pMemDesc) == ADDR_SYSMEM) 352 { 353 NV_ASSERT_OK_OR_GOTO(status, 354 memdescMapOld(pDst->pMemDesc, 0, size, NV_TRUE, 355 NV_PROTECT_READ_WRITE, (void**)&pMap, &pPriv), 356 failed); 357 358 portMemCopy(pMap + pDst->offset, size, pStagingBufMap, size); 359 360 memdescUnmapOld(pDst->pMemDesc, NV_TRUE, 0, (void*)pMap, pPriv); 361 } 362 363 failed: 364 _memmgrUnmapAndFreeSurface(pStagingBuf, pStagingBufMap, pStagingBufPriv); 365 return status; 366 } 367 368 static NV_STATUS 369 _memmgrMemsetWithGsp 370 ( 371 OBJGPU *pGpu, 372 TRANSFER_SURFACE *pDst, 373 NvU32 value, 374 NvU64 size 375 ) 376 { 377 NV2080_CTRL_INTERNAL_MEMMGR_MEMORY_TRANSFER_WITH_GSP_PARAMS gspParams; 378 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); 379 380 // Do not expect to use GSP to memset surfaces in sysmem 381 if (memdescGetAddressSpace(pDst->pMemDesc) == ADDR_SYSMEM) 382 return NV_ERR_NOT_SUPPORTED; 383 384 portMemSet(&gspParams, 0, sizeof(gspParams)); 385 386 gspParams.memop = NV2080_CTRL_MEMMGR_MEMORY_OP_MEMSET; 387 gspParams.transferSize = size; 388 gspParams.value = value; 389 gspParams.dst.baseAddr = memdescGetPhysAddr(pDst->pMemDesc, AT_GPU, 0); 390 gspParams.dst.size = memdescGetSize(pDst->pMemDesc); 391 gspParams.dst.offset = pDst->offset; 392 gspParams.dst.cpuCacheAttrib = memdescGetCpuCacheAttrib(pDst->pMemDesc); 393 gspParams.dst.aperture = memdescGetAddressSpace(pDst->pMemDesc); 394 395 // Send the control call 396 NV_ASSERT_OK_OR_RETURN( 397 pRmApi->Control(pRmApi, 398 pGpu->hInternalClient, 399 pGpu->hInternalSubdevice, 400 NV2080_CTRL_CMD_INTERNAL_MEMMGR_MEMORY_TRANSFER_WITH_GSP, 401 &gspParams, 402 sizeof(gspParams))); 403 404 return NV_OK; 405 } 406 407 /*! 408 * @brief This function is used for copying data b/w two memory regions 409 * using the specified memory transfer technique. Both memory regions 410 * can be in the same aperture or in different apertures. 411 * 412 * @param[in] pDstInfo TRANSFER_SURFACE info for destination region 413 * @param[in] pSrcInfo TRANSFER_SURFACE info for source region 414 * @param[in] size Size in bytes of the memory transfer 415 * @param[in] transferType Memory transfer technique to be used 416 * @param[in] flags Flags 417 */ 418 static NV_STATUS 419 memmgrMemCopyWithTransferType 420 ( 421 MemoryManager *pMemoryManager, 422 TRANSFER_SURFACE *pDstInfo, 423 TRANSFER_SURFACE *pSrcInfo, 424 NvU32 size, 425 TRANSFER_TYPE transferType, 426 NvU32 flags 427 ) 428 { 429 OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager); 430 NvU8 *pSrc; 431 NvU8 *pDst; 432 433 // Sanitize the input 434 NV_ASSERT_OR_RETURN(pDstInfo != NULL, NV_ERR_INVALID_ARGUMENT); 435 NV_ASSERT_OR_RETURN(pSrcInfo != NULL, NV_ERR_INVALID_ARGUMENT); 436 NV_ASSERT_OR_RETURN(pDstInfo->pMemDesc != NULL, NV_ERR_INVALID_ARGUMENT); 437 NV_ASSERT_OR_RETURN(pSrcInfo->pMemDesc != NULL, NV_ERR_INVALID_ARGUMENT); 438 NV_ASSERT_OR_RETURN(!memdescDescIsEqual(pDstInfo->pMemDesc, pSrcInfo->pMemDesc), 439 NV_ERR_INVALID_ARGUMENT); 440 441 switch (transferType) 442 { 443 case TRANSFER_TYPE_PROCESSOR: 444 pDst = memdescMapInternal(pGpu, pDstInfo->pMemDesc, TRANSFER_FLAGS_NONE); 445 NV_ASSERT_OR_RETURN(pDst != NULL, NV_ERR_INSUFFICIENT_RESOURCES); 446 pSrc = memdescMapInternal(pGpu, pSrcInfo->pMemDesc, TRANSFER_FLAGS_NONE); 447 if (pSrc == NULL) 448 { 449 memdescUnmapInternal(pGpu, pDstInfo->pMemDesc, 0); 450 NV_ASSERT_OR_RETURN(0, NV_ERR_INSUFFICIENT_RESOURCES); 451 } 452 453 portMemCopy(pDst + pDstInfo->offset, size, pSrc + pSrcInfo->offset, size); 454 455 memdescUnmapInternal(pGpu, pSrcInfo->pMemDesc, TRANSFER_FLAGS_NONE); 456 memdescUnmapInternal(pGpu, pDstInfo->pMemDesc, flags); 457 break; 458 case TRANSFER_TYPE_GSP_DMA: 459 if (IS_GSP_CLIENT(pGpu)) 460 { 461 NV_PRINTF(LEVEL_INFO, "Calling GSP DMA task\n"); 462 NV_ASSERT_OK_OR_RETURN( 463 _memmgrMemcpyWithGsp(pGpu, pDstInfo, pSrcInfo, size)); 464 } 465 else 466 { 467 NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT); 468 } 469 break; 470 case TRANSFER_TYPE_CE: 471 NV_PRINTF(LEVEL_INFO, "Add call to CE\n"); 472 break; 473 } 474 475 return NV_OK; 476 } 477 478 /*! 479 * @brief This function is used for setting a memory region to a constant state 480 * using a specified memory transfer technique 481 * 482 * @param[in] pDstInfo TRANSFER_SURFACE info for destination region 483 * @param[in] value Value to be written to the region 484 * @param[in] size Size in bytes of the memory to be initialized 485 * @param[in] transferType Memory transfer technique to be used 486 * @param[in] flags Flags 487 */ 488 static NV_STATUS 489 memmgrMemSetWithTransferType 490 ( 491 MemoryManager *pMemoryManager, 492 TRANSFER_SURFACE *pDstInfo, 493 NvU32 value, 494 NvU32 size, 495 TRANSFER_TYPE transferType, 496 NvU32 flags 497 ) 498 { 499 OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager); 500 NvU8 *pDst; 501 502 // Sanitize the input 503 NV_ASSERT_OR_RETURN(pDstInfo != NULL, NV_ERR_INVALID_ARGUMENT); 504 NV_ASSERT_OR_RETURN(pDstInfo->pMemDesc != NULL, NV_ERR_INVALID_ARGUMENT); 505 NV_ASSERT_OR_RETURN(size > 0, NV_ERR_INVALID_ARGUMENT); 506 NV_ASSERT_OR_RETURN(pDstInfo->offset + size <= pDstInfo->pMemDesc->Size, NV_ERR_INVALID_ARGUMENT); 507 508 switch (transferType) 509 { 510 case TRANSFER_TYPE_PROCESSOR: 511 pDst = memdescMapInternal(pGpu, pDstInfo->pMemDesc, TRANSFER_FLAGS_NONE); 512 NV_ASSERT_OR_RETURN(pDst != NULL, NV_ERR_INSUFFICIENT_RESOURCES); 513 514 portMemSet(pDst + pDstInfo->offset, value, size); 515 516 memdescUnmapInternal(pGpu, pDstInfo->pMemDesc, flags); 517 break; 518 case TRANSFER_TYPE_GSP_DMA: 519 if (IS_GSP_CLIENT(pGpu)) 520 { 521 NV_PRINTF(LEVEL_INFO, "Calling GSP DMA task\n"); 522 NV_ASSERT_OK_OR_RETURN( 523 _memmgrMemsetWithGsp(pGpu, pDstInfo, value, size)); 524 } 525 else 526 { 527 NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT); 528 } 529 break; 530 case TRANSFER_TYPE_CE: 531 NV_PRINTF(LEVEL_INFO, "Add call to CE\n"); 532 break; 533 } 534 535 return NV_OK; 536 } 537 538 /*! 539 * @brief This function is used to map the appropriate memory descriptor, 540 * copy the memory from the given buffer, and then unmap. 541 * 542 * @param[in] pMemDesc Memory descriptor of buffer to write 543 * @param[in] pBuf Buffer allocated by caller 544 * @param[in] offset Offset of buffer to write 545 * @param[in] size Size in bytes of the buffer 546 * @param[in] flags Flags 547 */ 548 static NV_STATUS 549 memmgrMemWriteMapAndCopy 550 ( 551 MemoryManager *pMemoryManager, 552 MEMORY_DESCRIPTOR *pMemDesc, 553 void *pBuf, 554 NvU64 offset, 555 NvU64 size, 556 NvU32 flags 557 ) 558 { 559 NvU8 *pDst = NULL; 560 OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager); 561 562 pDst = memdescMapInternal(pGpu, pMemDesc, TRANSFER_FLAGS_NONE); 563 NV_CHECK_OR_RETURN(LEVEL_SILENT, pDst != NULL, NV_ERR_INSUFFICIENT_RESOURCES); 564 565 portMemCopy(pDst + offset, size, pBuf, size); 566 memdescUnmapInternal(pGpu, pMemDesc, flags); 567 568 return NV_OK; 569 } 570 571 /*! 572 * @brief This function is used for writing data placed in a caller passed buffer 573 * to a given memory region while only mapping regions as large as the given 574 * block size. 575 * 576 * @param[in] pMemDesc Memory descriptor of buffer to write 577 * @param[in] pBuf Buffer allocated by caller 578 * @param[in] baseOffset Offset of entire buffer to write 579 * @param[in] size Size in bytes of the buffer 580 * @param[in] flags Flags 581 * @param[in] blockSize Maximum size of a mapping to use 582 */ 583 static NV_STATUS 584 memmgrMemWriteInBlocks 585 ( 586 MemoryManager *pMemoryManager, 587 MEMORY_DESCRIPTOR *pMemDesc, 588 void *pBuf, 589 NvU64 baseOffset, 590 NvU64 size, 591 NvU32 flags, 592 NvU32 blockSize 593 ) 594 { 595 NV_STATUS status = NV_OK; 596 OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager); 597 NvU64 remaining = size; 598 NvU64 offset = 0; 599 600 while ((remaining > 0) && (status == NV_OK)) 601 { 602 MEMORY_DESCRIPTOR *pSubMemDesc = NULL; 603 NvU32 mapSize = NV_MIN(blockSize, remaining); 604 605 NV_CHECK_OK_OR_RETURN(LEVEL_SILENT, memdescCreateSubMem(&pSubMemDesc, pMemDesc, pGpu, offset + baseOffset, mapSize)); 606 607 // Set the offset to 0, as the sub descriptor already starts at the offset 608 status = memmgrMemWriteMapAndCopy(pMemoryManager, pSubMemDesc, (NvU8 *)pBuf + offset, 609 0, mapSize, flags); 610 611 memdescFree(pSubMemDesc); 612 memdescDestroy(pSubMemDesc); 613 614 offset += mapSize; 615 remaining -= mapSize; 616 } 617 618 return status; 619 } 620 621 /*! 622 * @brief This function is used for writing data placed in a caller passed buffer 623 * to a given memory region using the specified memory transfer technique 624 * 625 * @param[in] pDstInfo TRANSFER_SURFACE info for the destination region 626 * @param[in] pBuf Buffer allocated by caller 627 * @param[in] size Size in bytes of the buffer 628 * @param[in] transferType Memory transfer technique to be used 629 * @param[in] flags Flags 630 */ 631 static NV_STATUS 632 memmgrMemWriteWithTransferType 633 ( 634 MemoryManager *pMemoryManager, 635 TRANSFER_SURFACE *pDstInfo, 636 void *pBuf, 637 NvU64 size, 638 TRANSFER_TYPE transferType, 639 NvU32 flags 640 ) 641 { 642 NvU8 *pMapping = memdescGetKernelMapping(pDstInfo->pMemDesc); 643 OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager); 644 645 // Sanitize the input 646 NV_ASSERT_OR_RETURN(pDstInfo != NULL, NV_ERR_INVALID_ARGUMENT); 647 NV_ASSERT_OR_RETURN(pDstInfo->pMemDesc != NULL, NV_ERR_INVALID_ARGUMENT); 648 NV_ASSERT_OR_RETURN(pBuf != NULL, NV_ERR_INVALID_ARGUMENT); 649 NV_ASSERT_OR_RETURN(size > 0, NV_ERR_INVALID_ARGUMENT); 650 NV_ASSERT_OR_RETURN(pDstInfo->offset + size <= pDstInfo->pMemDesc->Size, NV_ERR_INVALID_ARGUMENT); 651 652 if (pMapping != NULL) 653 { 654 portMemCopy(pMapping + pDstInfo->offset, size, pBuf, size); 655 return NV_OK; 656 } 657 658 switch (transferType) 659 { 660 case TRANSFER_TYPE_PROCESSOR: 661 if (memmgrMemWriteMapAndCopy(pMemoryManager, pDstInfo->pMemDesc, pBuf, pDstInfo->offset, size, flags) != NV_OK) 662 { 663 // If we fail to map a block large enough for the entire transfer, split up the mapping. 664 NV_ASSERT_OK_OR_RETURN(memmgrMemWriteInBlocks(pMemoryManager, pDstInfo->pMemDesc, pBuf, 665 pDstInfo->offset, size, flags, MEMORY_COPY_BLOCK_SIZE)); 666 } 667 break; 668 case TRANSFER_TYPE_GSP_DMA: 669 if (IS_GSP_CLIENT(pGpu)) 670 { 671 NV_PRINTF(LEVEL_INFO, "Calling GSP DMA task\n"); 672 NV_ASSERT_OK_OR_RETURN( 673 _memmgrMemReadOrWriteWithGsp(pGpu, pDstInfo, pBuf, size, 674 NV_FALSE /* bRead */)); 675 } 676 else 677 { 678 NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT); 679 } 680 break; 681 case TRANSFER_TYPE_CE: 682 NV_PRINTF(LEVEL_INFO, "Add call to CE\n"); 683 break; 684 } 685 686 return NV_OK; 687 } 688 689 /*! 690 * @brief This function is used for reading specified number of bytes from 691 * a source memory region into a caller passed buffer using a specified 692 * memory transfer technique 693 * 694 * @param[in] pSrcInfo TRANSFER_SURFACE info for the source region 695 * @param[in] pBuf Caller allocated buffer 696 * @param[in] size Size in bytes of the buffer 697 * @param[in] transferType Memory transfer technique to be used 698 * @param[in] flags Flags 699 */ 700 static NV_STATUS 701 memmgrMemReadWithTransferType 702 ( 703 MemoryManager *pMemoryManager, 704 TRANSFER_SURFACE *pSrcInfo, 705 void *pBuf, 706 NvU64 size, 707 TRANSFER_TYPE transferType, 708 NvU32 flags 709 ) 710 { 711 OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager); 712 NvU8 *pSrc; 713 NvU8 *pMapping = memdescGetKernelMapping(pSrcInfo->pMemDesc); 714 715 716 // Sanitize the input 717 NV_ASSERT_OR_RETURN(pSrcInfo != NULL, NV_ERR_INVALID_ARGUMENT); 718 NV_ASSERT_OR_RETURN(pSrcInfo->pMemDesc != NULL, NV_ERR_INVALID_ARGUMENT); 719 NV_ASSERT_OR_RETURN(pBuf != NULL, NV_ERR_INVALID_ARGUMENT); 720 NV_ASSERT_OR_RETURN(size > 0, NV_ERR_INVALID_ARGUMENT); 721 NV_ASSERT_OR_RETURN(pSrcInfo->offset + size <= pSrcInfo->pMemDesc->Size, NV_ERR_INVALID_ARGUMENT); 722 723 if (pMapping != NULL) 724 { 725 portMemCopy(pBuf, size, pMapping + pSrcInfo->offset, size); 726 return NV_OK; 727 } 728 729 switch (transferType) 730 { 731 case TRANSFER_TYPE_PROCESSOR: 732 pSrc = memdescMapInternal(pGpu, pSrcInfo->pMemDesc, TRANSFER_FLAGS_NONE); 733 NV_ASSERT_OR_RETURN(pSrc != NULL, NV_ERR_INSUFFICIENT_RESOURCES); 734 735 portMemCopy(pBuf, size, pSrc + pSrcInfo->offset, size); 736 737 memdescUnmapInternal(pGpu, pSrcInfo->pMemDesc, 0); 738 break; 739 case TRANSFER_TYPE_GSP_DMA: 740 if (IS_GSP_CLIENT(pGpu)) 741 { 742 NV_PRINTF(LEVEL_INFO, "Calling GSP DMA task\n"); 743 NV_ASSERT_OK_OR_RETURN( 744 _memmgrMemReadOrWriteWithGsp(pGpu, pSrcInfo, pBuf, size, 745 NV_TRUE /* bRead */)); 746 } 747 else 748 { 749 NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT); 750 } 751 break; 752 case TRANSFER_TYPE_CE: 753 NV_PRINTF(LEVEL_INFO, "Add call to CE\n"); 754 break; 755 } 756 757 return NV_OK; 758 } 759 760 /* ------------------------ Public functions --------------------------------------- */ 761 762 NvU64 memUtilsLeastCommonAlignment(NvU64 align1, NvU64 align2) 763 { 764 NvU64 a, b; // For Euclid's algorithm 765 NvU64 lcm; // Least Common Multiple of align1 and align2 766 NvU64 maxAlignment = NV_U64_MAX; 767 768 // WOLOG, make sure align1 >= align2. 769 // 770 if (align2 > align1) 771 { 772 NvU64 tmp = align1; 773 align1 = align2; 774 align2 = tmp; 775 } 776 777 // If align2 is 0, return min(align1, maxAlignment) 778 // 779 if (align2 == 0) 780 { 781 return align1 < maxAlignment ? align1 : maxAlignment; 782 } 783 784 // Use Euclid's algorithm (GCD(a, b) = GCD(b, a % b)) to find the 785 // GCD of the two alignments, and use the GCD to find the LCM. 786 // 787 a = align1; 788 b = align2; 789 while (b != 0) 790 { 791 NvU64 old_a = a; 792 a = b; 793 b = old_a % b; 794 NV_ASSERT(a > b); // Ensure termination. Should never fail. 795 } 796 lcm = align1 * (align2 / a); // May overflow 797 798 // Return min(lcm, maxAlignment). Also return maxAlignment if the 799 // lcm calculation overflowed, since that means it must have been 800 // much bigger than maxAlignment. 801 // 802 if (lcm > maxAlignment || lcm < align1 || 803 0 != (lcm % align1) || 0 != (lcm % align2)) 804 { 805 NV_CHECK_FAILED(LEVEL_ERROR, "Alignment limit exceeded"); 806 return maxAlignment; 807 } 808 return lcm; 809 } 810 811 void memUtilsInitFBAllocInfo 812 ( 813 NV_MEMORY_ALLOCATION_PARAMS *pAllocParams, 814 FB_ALLOC_INFO *pFbAllocInfo, 815 NvHandle hClient, 816 NvHandle hDevice 817 ) 818 { 819 pFbAllocInfo->pageFormat->type = pAllocParams->type; 820 pFbAllocInfo->owner = pAllocParams->owner; 821 pFbAllocInfo->hwResId = 0; 822 pFbAllocInfo->pad = 0; 823 pFbAllocInfo->alignPad = 0; 824 pFbAllocInfo->height = pAllocParams->height; 825 pFbAllocInfo->width = pAllocParams->width; 826 pFbAllocInfo->pitch = pAllocParams->pitch; 827 pFbAllocInfo->size = pAllocParams->size; 828 pFbAllocInfo->origSize = pAllocParams->size; 829 pFbAllocInfo->adjustedSize = pAllocParams->size; 830 pFbAllocInfo->offset = ~0; 831 pFbAllocInfo->pageFormat->flags = pAllocParams->flags; 832 pFbAllocInfo->pageFormat->attr = pAllocParams->attr; 833 pFbAllocInfo->retAttr = pAllocParams->attr; 834 pFbAllocInfo->pageFormat->attr2 = pAllocParams->attr2; 835 pFbAllocInfo->retAttr2 = pAllocParams->attr2; 836 pFbAllocInfo->format = pAllocParams->format; 837 pFbAllocInfo->comprCovg = pAllocParams->comprCovg; 838 pFbAllocInfo->zcullCovg = 0; 839 pFbAllocInfo->ctagOffset = pAllocParams->ctagOffset; 840 pFbAllocInfo->bIsKernelAlloc = NV_FALSE; 841 pFbAllocInfo->internalflags = 0; 842 pFbAllocInfo->hClient = hClient; 843 pFbAllocInfo->hDevice = hDevice; 844 845 if ((pAllocParams->flags & NVOS32_ALLOC_FLAGS_ALIGNMENT_HINT) || 846 (pAllocParams->flags & NVOS32_ALLOC_FLAGS_ALIGNMENT_FORCE)) 847 pFbAllocInfo->align = pAllocParams->alignment; 848 else 849 pFbAllocInfo->align = RM_PAGE_SIZE; 850 851 if (pAllocParams->flags & NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE) 852 { 853 pFbAllocInfo->offset = pAllocParams->offset; 854 pFbAllocInfo->desiredOffset = pAllocParams->offset; 855 } 856 } 857 858 859 MEMORY_DESCRIPTOR * 860 memmgrMemUtilsGetMemDescFromHandle_IMPL 861 ( 862 MemoryManager *pMemoryManager, 863 NvHandle hClient, 864 NvHandle hMemory 865 ) 866 { 867 RsResourceRef *pMemoryRef; 868 Memory *pMemory; 869 870 if (serverutilGetResourceRef(hClient, hMemory, &pMemoryRef) != NV_OK) 871 { 872 return NULL; 873 } 874 875 pMemory = dynamicCast(pMemoryRef->pResource, Memory); 876 if (pMemory == NULL) 877 { 878 return NULL; 879 } 880 return pMemory->pMemDesc; 881 } 882 883 /*! 884 * @brief This function is used for copying data b/w two memory regions 885 * Both memory regions can be in the same aperture of different apertures 886 * 887 * @param[in] pDstInfo TRANSFER_SURFACE info for destination region 888 * @param[in] pSrcInfo TRANSFER_SURFACE info for source region 889 * @param[in] size Size in bytes of the memory transfer 890 * @param[in] flags Flags 891 */ 892 NV_STATUS 893 memmgrMemCopy_IMPL 894 ( 895 MemoryManager *pMemoryManager, 896 TRANSFER_SURFACE *pDstInfo, 897 TRANSFER_SURFACE *pSrcInfo, 898 NvU32 size, 899 NvU32 flags 900 ) 901 { 902 TRANSFER_TYPE transferType = memmgrGetMemTransferType(pMemoryManager, 903 pDstInfo, pSrcInfo); 904 905 return memmgrMemCopyWithTransferType(pMemoryManager, pDstInfo, pSrcInfo, 906 size, transferType, flags); 907 } 908 909 /*! 910 * @brief This function is used for setting a memory region to a constant state 911 * 912 * @param[in] pDstInfo TRANSFER_SURFACE info for the destination region 913 * @param[in] value Value to be written to the region 914 * @param[in] size Size in bytes of the memory to be initialized 915 * @param[in] flags Flags 916 */ 917 NV_STATUS 918 memmgrMemSet_IMPL 919 ( 920 MemoryManager *pMemoryManager, 921 TRANSFER_SURFACE *pDstInfo, 922 NvU32 value, 923 NvU32 size, 924 NvU32 flags 925 ) 926 { 927 TRANSFER_TYPE transferType = memmgrGetMemTransferType(pMemoryManager, 928 pDstInfo, NULL); 929 930 return memmgrMemSetWithTransferType(pMemoryManager, pDstInfo, value, 931 size, transferType, flags); 932 } 933 934 /*! 935 * @brief This function is used for setting a memory region to a constant state 936 * 937 * @param[in] pMemDesc Memory descriptor to end transfer to 938 * @param[in] value Value to be written to the region 939 * @param[in] flags Flags 940 */ 941 NV_STATUS 942 memmgrMemDescMemSet_IMPL 943 ( 944 MemoryManager *pMemoryManager, 945 MEMORY_DESCRIPTOR *pMemDesc, 946 NvU32 value, 947 NvU32 flags 948 ) 949 { 950 TRANSFER_SURFACE transferSurface = {.offset = 0, .pMemDesc = pMemDesc}; 951 TRANSFER_TYPE transferType = memmgrGetMemTransferType(pMemoryManager, 952 &transferSurface, NULL); 953 954 return memmgrMemSetWithTransferType(pMemoryManager, &transferSurface, value, 955 (NvU32)memdescGetSize(pMemDesc), 956 transferType, flags); 957 } 958 959 /*! 960 * @brief This function is used for writing data placed in a user buffer 961 * to a given memory region 962 * 963 * @param[in] pDstInfo TRANSFER_SURFACE info for the destination region 964 * @param[in] pBuf Buffer allocated by caller 965 * @param[in] size Size in bytes of the buffer 966 * @param[in] flags Flags 967 */ 968 NV_STATUS 969 memmgrMemWrite_IMPL 970 ( 971 MemoryManager *pMemoryManager, 972 TRANSFER_SURFACE *pDstInfo, 973 void *pBuf, 974 NvU64 size, 975 NvU32 flags 976 ) 977 { 978 TRANSFER_TYPE transferType = memmgrGetMemTransferType(pMemoryManager, 979 pDstInfo, NULL); 980 981 return memmgrMemWriteWithTransferType(pMemoryManager, pDstInfo, pBuf, 982 size, transferType, flags); 983 } 984 985 /*! 986 * @brief This function is used for reading specified number of bytes from 987 * a source memory region into a caller passed buffer 988 * 989 * @param[in] pSrcInfo TRANSFER_SURFACE info for the source region 990 * @param[in] pBuf Caller allocated buffer 991 * @param[in] size Size in bytes of the buffer 992 * @param[in] flags Flags 993 */ 994 NV_STATUS 995 memmgrMemRead_IMPL 996 ( 997 MemoryManager *pMemoryManager, 998 TRANSFER_SURFACE *pSrcInfo, 999 void *pBuf, 1000 NvU64 size, 1001 NvU32 flags 1002 ) 1003 { 1004 TRANSFER_TYPE transferType = memmgrGetMemTransferType(pMemoryManager, 1005 NULL, pSrcInfo); 1006 1007 return memmgrMemReadWithTransferType(pMemoryManager, pSrcInfo, pBuf, 1008 size, transferType, flags); 1009 } 1010 1011 /*! 1012 * @brief This helper function can be used to begin transfers 1013 * 1014 * @param[in] pTransferInfo Transfer information 1015 * @param[in] shadowBufSize Size of allocated shadow buffer in case of shadow mapping 1016 * @param[in] flags Flags 1017 */ 1018 NvU8 * 1019 memmgrMemBeginTransfer_IMPL 1020 ( 1021 MemoryManager *pMemoryManager, 1022 TRANSFER_SURFACE *pTransferInfo, 1023 NvU64 shadowBufSize, 1024 NvU32 flags 1025 ) 1026 { 1027 TRANSFER_TYPE transferType = memmgrGetMemTransferType(pMemoryManager, 1028 pTransferInfo, NULL); 1029 MEMORY_DESCRIPTOR *pMemDesc = pTransferInfo->pMemDesc; 1030 NvU64 offset = pTransferInfo->offset; 1031 OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager); 1032 NvU8 *pPtr = NULL; 1033 NvU64 memSz = 0; 1034 1035 NV_ASSERT_OR_RETURN(pMemDesc != NULL, NULL); 1036 NV_ASSERT_OR_RETURN((memSz = memdescGetSize(pMemDesc)) >= shadowBufSize, NULL); 1037 NV_ASSERT_OR_RETURN(memdescGetKernelMapping(pMemDesc) == NULL, NULL); 1038 1039 memSz = shadowBufSize == 0 ? memSz : shadowBufSize; 1040 1041 switch (transferType) 1042 { 1043 case TRANSFER_TYPE_PROCESSOR: 1044 if (flags & TRANSFER_FLAGS_USE_BAR1) 1045 { 1046 NvP64 pPriv; 1047 NvU32 protect = NV_PROTECT_READ_WRITE; 1048 1049 if (flags & TRANSFER_FLAGS_MAP_PROTECT_READABLE) 1050 { 1051 protect = NV_PROTECT_READABLE; 1052 } 1053 else if (flags & TRANSFER_FLAGS_MAP_PROTECT_WRITEABLE) 1054 { 1055 protect = NV_PROTECT_WRITEABLE; 1056 } 1057 1058 NV_ASSERT_OR_RETURN(memdescMap(pMemDesc, offset, memSz, NV_TRUE, protect, 1059 (NvP64*) &pPtr, &pPriv) == NV_OK, NULL); 1060 memdescSetKernelMappingPriv(pMemDesc, pPriv); 1061 break; 1062 } 1063 NV_ASSERT_OR_RETURN((pPtr = memdescMapInternal(pGpu, pMemDesc, flags)) != NULL, NULL); 1064 pPtr = &pPtr[offset]; 1065 1066 break; 1067 case TRANSFER_TYPE_GSP_DMA: 1068 case TRANSFER_TYPE_CE: 1069 if (flags & TRANSFER_FLAGS_SHADOW_ALLOC) 1070 { 1071 NV_ASSERT_OR_RETURN((pPtr = portMemAllocNonPaged(memSz)), NULL); 1072 if (flags & TRANSFER_FLAGS_SHADOW_INIT_MEM) 1073 { 1074 NV_ASSERT_OK(memmgrMemRead(pMemoryManager, pTransferInfo, pPtr, memSz, flags)); 1075 } 1076 } 1077 break; 1078 default: 1079 NV_ASSERT(0); 1080 } 1081 memdescSetKernelMapping(pMemDesc, pPtr); 1082 return pPtr; 1083 } 1084 1085 /*! 1086 * @brief This helper function can be used to end transfers 1087 * 1088 * @param[in] pTransferInfo Transfer information 1089 * @param[in] shadowBufSize Size of allocated shadow buffer in case of shadow mapping 1090 * @param[in] flags Flags 1091 */ 1092 void 1093 memmgrMemEndTransfer_IMPL 1094 ( 1095 MemoryManager *pMemoryManager, 1096 TRANSFER_SURFACE *pTransferInfo, 1097 NvU64 shadowBufSize, 1098 NvU32 flags 1099 ) 1100 { 1101 TRANSFER_TYPE transferType = memmgrGetMemTransferType(pMemoryManager, 1102 pTransferInfo, NULL); 1103 MEMORY_DESCRIPTOR *pMemDesc = pTransferInfo->pMemDesc; 1104 NvU64 offset = pTransferInfo->offset; 1105 OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager); 1106 NvU64 memSz = 0; 1107 NvU8 *pMapping = NULL; 1108 1109 NV_ASSERT_OR_RETURN_VOID(pMemDesc != NULL); 1110 pMapping = memdescGetKernelMapping(pMemDesc); 1111 1112 NV_ASSERT_OR_RETURN_VOID((memSz = memdescGetSize(pMemDesc)) >= (shadowBufSize + offset) ); 1113 memSz = shadowBufSize == 0 ? memSz : shadowBufSize; 1114 1115 memdescSetKernelMapping(pMemDesc, NULL); 1116 1117 switch (transferType) 1118 { 1119 case TRANSFER_TYPE_PROCESSOR: 1120 if (flags & TRANSFER_FLAGS_USE_BAR1) 1121 { 1122 NvP64 pPriv = memdescGetKernelMappingPriv(pMemDesc); 1123 memdescSetKernelMappingPriv(pMemDesc, NULL); 1124 if (pMapping != NULL) 1125 { 1126 memdescUnmap(pMemDesc, NV_TRUE, 0, pMapping, pPriv); 1127 } 1128 return; 1129 } 1130 memdescUnmapInternal(pGpu, pMemDesc, flags); 1131 return; 1132 case TRANSFER_TYPE_GSP_DMA: 1133 case TRANSFER_TYPE_CE: 1134 if (pMapping != NULL) 1135 { 1136 NV_ASSERT_OK(memmgrMemWrite(pMemoryManager, pTransferInfo, pMapping, memSz, flags)); 1137 portMemFree(pMapping); 1138 } 1139 return; 1140 default: 1141 NV_ASSERT(0); 1142 } 1143 return; 1144 } 1145 1146 /*! 1147 * @brief Helper function that ends transfers to a memdesc with default offset/size 1148 * 1149 * @param[in] pMemDesc Memory descriptor to end transfer to 1150 * @param[in] flags Flags 1151 */ 1152 void 1153 memmgrMemDescEndTransfer_IMPL 1154 ( 1155 MemoryManager *pMemoryManager, 1156 MEMORY_DESCRIPTOR *pMemDesc, 1157 NvU32 flags 1158 ) 1159 { 1160 if (pMemDesc == NULL) 1161 { 1162 return; 1163 } 1164 1165 TRANSFER_SURFACE transferSurface = {.offset = 0, .pMemDesc = pMemDesc}; 1166 memmgrMemEndTransfer(pMemoryManager, &transferSurface, memdescGetSize(pMemDesc), flags); 1167 } 1168 1169 /*! 1170 * @brief Helper function that begins transfers to a memdesc with default offset/size 1171 * 1172 * @param[in] pMemDesc Memory descriptor to begin transfer to 1173 * @param[in] flags Flags 1174 */ 1175 NvU8 * 1176 memmgrMemDescBeginTransfer_IMPL 1177 ( 1178 MemoryManager *pMemoryManager, 1179 MEMORY_DESCRIPTOR *pMemDesc, 1180 NvU32 flags 1181 ) 1182 { 1183 NV_ASSERT_OR_RETURN(pMemDesc != NULL, NULL); 1184 TRANSFER_SURFACE transferSurface = {.offset = 0, .pMemDesc = pMemDesc}; 1185 return memmgrMemBeginTransfer(pMemoryManager, &transferSurface, memdescGetSize(pMemDesc), flags); 1186 } 1187 1188 /*! 1189 * @brief This function is used to allocate common resources across memory 1190 * classes, and must be used before memory-specific resource alloc. 1191 * 1192 * @param[in/out] pAllocRequest User-provided alloc request struct 1193 * @param[in/out] pFbAllocInfo Initialized FB_ALLOC_INFO struct to alloc 1194 */ 1195 NV_STATUS 1196 memmgrAllocResources_IMPL 1197 ( 1198 OBJGPU *pGpu, 1199 MemoryManager *pMemoryManager, 1200 MEMORY_ALLOCATION_REQUEST *pAllocRequest, 1201 FB_ALLOC_INFO *pFbAllocInfo 1202 ) 1203 { 1204 NV_STATUS status = NV_OK; 1205 NvU64 alignment = 0; 1206 NV_MEMORY_ALLOCATION_PARAMS *pVidHeapAlloc = pAllocRequest->pUserParams; 1207 NV_ADDRESS_SPACE addrSpace = memmgrAllocGetAddrSpace(pMemoryManager, pVidHeapAlloc->flags, 1208 pFbAllocInfo->retAttr); 1209 1210 NvU64 pageSize = 0; 1211 NvBool bAllocedHwRes = NV_FALSE; 1212 1213 // IRQL TEST: must be running at equivalent of passive-level 1214 IRQL_ASSERT_AND_RETURN(!osIsRaisedIRQL()); 1215 1216 // 1217 // Check for valid size. 1218 // 1219 if (pVidHeapAlloc->size == 0) 1220 return NV_ERR_INVALID_ARGUMENT; 1221 1222 // 1223 // Ensure a valid allocation pVidHeapAlloc->type was passed in 1224 // 1225 if (pVidHeapAlloc->type > NVOS32_NUM_MEM_TYPES - 1) 1226 return NV_ERR_INVALID_ARGUMENT; 1227 1228 if (ADDR_VIRTUAL != addrSpace) 1229 { 1230 // If vidmem not requested explicitly, decide on the physical location. 1231 if (FLD_TEST_DRF(OS32, _ATTR, _LOCATION, _PCI, pFbAllocInfo->retAttr) || 1232 FLD_TEST_DRF(OS32, _ATTR, _LOCATION, _ANY, pFbAllocInfo->retAttr)) 1233 { 1234 if (ADDR_FBMEM == addrSpace) 1235 { 1236 pFbAllocInfo->retAttr = FLD_SET_DRF(OS32, _ATTR, _LOCATION, _VIDMEM, pFbAllocInfo->retAttr); 1237 } 1238 else 1239 { 1240 pFbAllocInfo->retAttr = FLD_SET_DRF(OS32, _ATTR, _LOCATION, _PCI, pFbAllocInfo->retAttr); 1241 } 1242 } 1243 } 1244 else // Virtual 1245 { 1246 // Clear location to ANY since virtual does not associate with location. 1247 pFbAllocInfo->retAttr = FLD_SET_DRF(OS32, _ATTR, _LOCATION, _ANY, pFbAllocInfo->retAttr); 1248 } 1249 1250 // Fetch RM page size 1251 pageSize = memmgrDeterminePageSize(pMemoryManager, pFbAllocInfo->hClient, pFbAllocInfo->size, 1252 pFbAllocInfo->format, pFbAllocInfo->pageFormat->flags, 1253 &pFbAllocInfo->retAttr, &pFbAllocInfo->retAttr2); 1254 if (!IsAMODEL(pGpu) && pageSize == 0) 1255 { 1256 status = NV_ERR_INVALID_STATE; 1257 NV_PRINTF(LEVEL_ERROR, "memmgrDeterminePageSize failed, status: 0x%x\n", status); 1258 goto failed; 1259 } 1260 1261 // Fetch memory alignment 1262 status = memmgrAllocDetermineAlignment_HAL(pGpu, pMemoryManager, &pFbAllocInfo->size, &pFbAllocInfo->align, 1263 pFbAllocInfo->alignPad, pFbAllocInfo->pageFormat->flags, 1264 pFbAllocInfo->retAttr, pFbAllocInfo->retAttr2, 0); 1265 if (status != NV_OK) 1266 { 1267 NV_PRINTF(LEVEL_ERROR, "memmgrAllocDetermineAlignment failed, status: 0x%x\n", status); 1268 goto failed; 1269 } 1270 1271 // 1272 // Call into HAL to reserve any hardware resources for 1273 // the specified memory pVidHeapAlloc->type. 1274 // If the alignment was changed due to a HW limitation, and the 1275 // flag NVOS32_ALLOC_FLAGS_ALIGNMENT_FORCE is set, bad_argument 1276 // will be passed back from the HAL 1277 // 1278 status = memmgrAllocHwResources(pGpu, pMemoryManager, pFbAllocInfo); 1279 bAllocedHwRes = NV_TRUE; 1280 1281 pVidHeapAlloc->attr = pFbAllocInfo->retAttr; 1282 pVidHeapAlloc->attr2 = pFbAllocInfo->retAttr2; 1283 pVidHeapAlloc->format = pFbAllocInfo->format; 1284 pVidHeapAlloc->comprCovg = pFbAllocInfo->comprCovg; 1285 pVidHeapAlloc->zcullCovg = pFbAllocInfo->zcullCovg; 1286 1287 if (status != NV_OK) 1288 { 1289 // 1290 // probably means we passed in a bogus pVidHeapAlloc->type or no tiling resources available 1291 // when tiled memory attribute was set to REQUIRED 1292 // 1293 NV_PRINTF(LEVEL_ERROR, "fbAlloc failure!\n"); 1294 goto failed; 1295 } 1296 1297 // call HAL to set resources 1298 status = memmgrSetAllocParameters_HAL(pGpu, pMemoryManager, pFbAllocInfo); 1299 1300 if (status != NV_OK) 1301 { 1302 // 1303 // Two possibilties: either some attribute was set to REQUIRED, ran out of resources, 1304 // or unaligned address / size was passed down. Free up memory and fail this call. 1305 // heapFree will fix up heap pointers. 1306 // 1307 goto failed; 1308 } 1309 1310 // 1311 // for fixed allocation check if the alignment needs to adjusted. 1312 // some hardware units request allocation aligned to smaller than 1313 // page sizes which can be handled through alignPad 1314 // 1315 if (pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE) 1316 { 1317 // 1318 // is our desired offset suitably aligned? 1319 // if not adjust alignment using alignPad(offset into a page), the 1320 // allocation is page size aligned as required for swizzling. 1321 // 1322 if (pFbAllocInfo->desiredOffset % (pFbAllocInfo->align + 1)) 1323 { 1324 pFbAllocInfo->alignPad = pFbAllocInfo->desiredOffset % (pFbAllocInfo->align + 1); 1325 pFbAllocInfo->desiredOffset -= pFbAllocInfo->alignPad; 1326 } 1327 } 1328 1329 // 1330 // Refresh search parameters. 1331 // 1332 pFbAllocInfo->adjustedSize = pFbAllocInfo->size - pFbAllocInfo->alignPad; 1333 pVidHeapAlloc->height = pFbAllocInfo->height; 1334 pVidHeapAlloc->pitch = pFbAllocInfo->pitch; 1335 1336 // 1337 // The api takes alignment-1 (used to be a mask). 1338 // 1339 alignment = pFbAllocInfo->align + 1; 1340 pVidHeapAlloc->alignment = pFbAllocInfo->align + 1; // convert mask to size 1341 1342 // 1343 // Allow caller to request host page alignment to make it easier 1344 // to move things around with host os VM subsystem 1345 // 1346 if ((pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_FORCE_ALIGN_HOST_PAGE) && 1347 (addrSpace == ADDR_FBMEM)) 1348 { 1349 OBJSYS *pSys = SYS_GET_INSTANCE(); 1350 NvU64 hostPageSize = pSys->cpuInfo.hostPageSize; 1351 1352 // hostPageSize *should* always be set, but.... 1353 if (hostPageSize == 0) 1354 hostPageSize = RM_PAGE_SIZE; 1355 1356 alignment = memUtilsLeastCommonAlignment(alignment, hostPageSize); 1357 } 1358 1359 pVidHeapAlloc->alignment = alignment; 1360 pFbAllocInfo->align = alignment - 1; 1361 1362 return status; 1363 1364 failed: 1365 if (bAllocedHwRes) 1366 { 1367 memmgrFreeHwResources(pGpu, pMemoryManager, pFbAllocInfo); 1368 } 1369 1370 return status; 1371 } 1372 1373 /*! 1374 * @brief This function is used to create a memory descriptor if needed. 1375 * 1376 * @param[in/out] pAllocRequest User-provided alloc request struct 1377 * @param[in/out] pFbAllocInfo Initialized FB_ALLOC_INFO struct to alloc 1378 * @param[out] ppMemDesc Double pointer to created descriptor 1379 * @param[in] pHeap Heap pointer to store in descriptor 1380 * @param[in] addrSpace Address space identifier 1381 * @param[in] memDescFlags Memory descriptor alloc flags 1382 * @param[out] bAllocedMemDesc NV_TRUE if a descriptor was created 1383 */ 1384 NV_STATUS 1385 memUtilsAllocMemDesc 1386 ( 1387 OBJGPU *pGpu, 1388 MEMORY_ALLOCATION_REQUEST *pAllocRequest, 1389 FB_ALLOC_INFO *pFbAllocInfo, 1390 MEMORY_DESCRIPTOR **ppMemDesc, 1391 Heap *pHeap, 1392 NV_ADDRESS_SPACE addrSpace, 1393 NvBool bContig, 1394 NvBool *bAllocedMemDesc 1395 ) 1396 { 1397 NV_STATUS status = NV_OK; 1398 1399 // 1400 // Allocate a memory descriptor if needed. We do this after the fbHwAllocResources() call 1401 // so we have the updated size information. Linear callers like memdescAlloc() can live with 1402 // only having access to the requested size in bytes, but block linear callers really do 1403 // need to allocate after fbAlloc() rounding takes place. 1404 // 1405 if (pAllocRequest->pMemDesc == NULL) 1406 { 1407 NvU64 memDescFlags = MEMDESC_FLAGS_SKIP_RESOURCE_COMPUTE; 1408 1409 // 1410 // Allocate a contig vidmem descriptor now; if needed we'll 1411 // allocate a new noncontig memdesc later 1412 // 1413 status = memdescCreate(&pAllocRequest->pMemDesc, pGpu, pFbAllocInfo->adjustedSize, 0, bContig, 1414 addrSpace, NV_MEMORY_UNCACHED, memDescFlags); 1415 1416 if (status != NV_OK) 1417 { 1418 NV_PRINTF(LEVEL_ERROR, "cannot alloc memDesc!\n"); 1419 return status; 1420 } 1421 1422 *bAllocedMemDesc = NV_TRUE; 1423 } 1424 1425 *ppMemDesc = pAllocRequest->pMemDesc; 1426 (*ppMemDesc)->pHeap = pHeap; 1427 1428 // Set attributes tracked by the memdesc 1429 memdescSetPteKind(*ppMemDesc, pFbAllocInfo->format); 1430 memdescSetHwResId(*ppMemDesc, pFbAllocInfo->hwResId); 1431 1432 return status; 1433 } 1434 1435 /*! 1436 * Memsets the memory for the given memory descriptor with the given value. 1437 * This function assumes that BAR2 is not yet available. Thus either the BAR0 1438 * window to FB or a memmap to SYSMEM will be used, depending on the memory 1439 * location. 1440 * 1441 * @param[in] pGpu GPU object pointer 1442 * @param[in] pMemDesc Memory descriptor for the memory to memset 1443 * @param[in] value Value to memset to. 1444 */ 1445 NV_STATUS 1446 memUtilsMemSetNoBAR2(OBJGPU *pGpu, PMEMORY_DESCRIPTOR pMemDesc, NvU8 value) 1447 { 1448 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu); 1449 NvU8 *pMap = NULL; 1450 void *pPriv = NULL; 1451 RmPhysAddr physAddr; 1452 RmPhysAddr physAddrOrig; 1453 NvU64 sizeInDWord; 1454 NvU32 sizeOfDWord = sizeof(NvU32); 1455 NvU32 bar0Addr; 1456 NvU32 i; 1457 1458 NV_ASSERT((pMemDesc != NULL) && 1459 (pMemDesc->Size & (sizeOfDWord-1)) == 0); 1460 sizeInDWord = pMemDesc->Size / sizeOfDWord; 1461 1462 // 1463 // BAR2 is not yet initialized. Thus use either the BAR0 window or 1464 // memmap to initialize the given surface. 1465 // 1466 NV_ASSERT(pKernelBus->virtualBar2[GPU_GFID_PF].pCpuMapping == NULL); 1467 switch (memdescGetAddressSpace(pMemDesc)) 1468 { 1469 case ADDR_FBMEM: 1470 if (KBUS_BAR0_PRAMIN_DISABLED(pGpu)) 1471 { 1472 NvU8 *pMap = kbusMapRmAperture_HAL(pGpu, pMemDesc); 1473 NV_ASSERT_OR_RETURN(pMap != NULL, NV_ERR_INSUFFICIENT_RESOURCES); 1474 portMemSet(pMap, value, pMemDesc->Size); 1475 kbusUnmapRmAperture_HAL(pGpu, pMemDesc, &pMap, NV_TRUE); 1476 1477 break; 1478 } 1479 // 1480 // Set the BAR0 window to encompass the given surface while 1481 // saving off the location to where the BAR0 window was 1482 // previously pointing. 1483 // 1484 physAddr = memdescGetPhysAddr(pMemDesc, AT_GPU, 0); 1485 NV_ASSERT((physAddr & (sizeOfDWord-1)) == 0); 1486 1487 physAddrOrig = kbusGetBAR0WindowVidOffset_HAL(pGpu, pKernelBus); 1488 NV_ASSERT_OK_OR_RETURN( 1489 kbusSetBAR0WindowVidOffset_HAL(pGpu, 1490 pKernelBus, 1491 physAddr & ~0xffffULL)); 1492 bar0Addr = 1493 NvU64_LO32(kbusGetBAR0WindowAddress_HAL(pKernelBus) + 1494 (physAddr - kbusGetBAR0WindowVidOffset_HAL(pGpu, pKernelBus))); 1495 1496 // 1497 // Iterate and initialize the given surface with BAR0 1498 // writes. 1499 // 1500 for (i = 0; i < sizeInDWord; i++) 1501 { 1502 GPU_REG_WR32(pGpu, 1503 bar0Addr + (sizeOfDWord * i), 1504 value); 1505 } 1506 1507 // 1508 // Restore where the BAR0 window was previously pointing 1509 // to. 1510 // 1511 NV_ASSERT_OK_OR_RETURN( 1512 kbusSetBAR0WindowVidOffset_HAL(pGpu, pKernelBus, physAddrOrig)); 1513 1514 break; 1515 1516 case ADDR_SYSMEM: 1517 // Plain old memmap. 1518 NV_ASSERT_OK_OR_RETURN( 1519 memdescMapOld(pMemDesc, 0, 1520 pMemDesc->Size, 1521 NV_TRUE, // kernel, 1522 NV_PROTECT_READ_WRITE, 1523 (void **)&pMap, 1524 &pPriv)); 1525 portMemSet(pMap, value, NvU64_LO32(pMemDesc->Size)); 1526 memdescUnmapOld(pMemDesc, 1, 0, pMap, pPriv); 1527 break; 1528 1529 default: 1530 // Should not happen. 1531 NV_ASSERT(0); 1532 break; 1533 } 1534 1535 return NV_OK; 1536 } 1537