1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "gpu/mem_mgr/mem_mgr.h" 25 #include "gpu/mem_mgr/heap_base.h" 26 #include "gpu/mem_mgr/mem_utils.h" 27 #include "gpu/mem_mgr/virt_mem_allocator_common.h" 28 #include "os/nv_memory_type.h" 29 #include "core/locks.h" 30 #include "ctrl/ctrl2080.h" 31 32 #include "gpu/bus/kern_bus.h" 33 34 // Memory copy block size for if we need to cut up a mapping 35 #define MEMORY_COPY_BLOCK_SIZE 1024 * 1024 36 37 /* ------------------------ Private functions --------------------------------------- */ 38 39 /*! 40 * @brief This utility routine helps in determining the appropriate 41 * memory transfer technique to be used 42 */ 43 static TRANSFER_TYPE 44 memmgrGetMemTransferType 45 ( 46 MemoryManager *pMemoryManager, 47 TRANSFER_SURFACE *pDst, 48 TRANSFER_SURFACE *pSrc 49 ) 50 { 51 TRANSFER_TYPE transferType = TRANSFER_TYPE_PROCESSOR; 52 OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager); 53 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu); 54 55 // 56 // In case of copy, both dest and src will be passed 57 // In case of memset/memread/memwrite either dest or src will be passed 58 // 59 if ((pDst != NULL) && (pSrc != NULL) && 60 (memdescGetAddressSpace(pDst->pMemDesc) == ADDR_SYSMEM) && 61 (memdescGetAddressSpace(pSrc->pMemDesc) == ADDR_SYSMEM)) 62 { 63 transferType = TRANSFER_TYPE_PROCESSOR; 64 } 65 else if (((pDst != NULL) && 66 (memdescGetAddressSpace(pDst->pMemDesc) == ADDR_SYSMEM)) || 67 ((pSrc != NULL) && 68 (memdescGetAddressSpace(pSrc->pMemDesc) == ADDR_SYSMEM))) 69 { 70 transferType = TRANSFER_TYPE_PROCESSOR; 71 } 72 else if (kbusIsBarAccessBlocked(pKernelBus)) 73 { 74 transferType = TRANSFER_TYPE_GSP_DMA; 75 } 76 return transferType; 77 } 78 79 static NV_STATUS 80 _memmgrAllocAndMapSurface 81 ( 82 OBJGPU *pGpu, 83 NvU64 size, 84 MEMORY_DESCRIPTOR **ppMemDesc, 85 void **ppMap, 86 void **ppPriv 87 ) 88 { 89 NV_STATUS status; 90 NvU64 flags = 0; 91 92 NV_ASSERT_OR_RETURN(ppMemDesc != NULL, NV_ERR_INVALID_ARGUMENT); 93 NV_ASSERT_OR_RETURN(ppMap != NULL, NV_ERR_INVALID_ARGUMENT); 94 NV_ASSERT_OR_RETURN(ppPriv != NULL, NV_ERR_INVALID_ARGUMENT); 95 96 flags = MEMDESC_FLAGS_ALLOC_IN_UNPROTECTED_MEMORY; 97 98 NV_ASSERT_OK_OR_RETURN( 99 memdescCreate(ppMemDesc, pGpu, size, RM_PAGE_SIZE, NV_TRUE, 100 ADDR_SYSMEM, NV_MEMORY_UNCACHED, flags)); 101 102 NV_ASSERT_OK_OR_GOTO(status, memdescAlloc(*ppMemDesc), failed); 103 104 NV_ASSERT_OK_OR_GOTO(status, 105 memdescMapOld(*ppMemDesc, 0, size, NV_TRUE, NV_PROTECT_READ_WRITE, 106 ppMap, ppPriv), 107 failed); 108 109 // Clear surface before use 110 portMemSet(*ppMap, 0, size); 111 112 return NV_OK; 113 failed: 114 memdescFree(*ppMemDesc); 115 memdescDestroy(*ppMemDesc); 116 117 *ppMemDesc = NULL; 118 *ppMap = NULL; 119 *ppPriv = NULL; 120 121 return status; 122 } 123 124 static void 125 _memmgrUnmapAndFreeSurface 126 ( 127 MEMORY_DESCRIPTOR *pMemDesc, 128 void *pMap, 129 void *pPriv 130 ) 131 { 132 memdescUnmapOld(pMemDesc, NV_TRUE, 0, pMap, pPriv); 133 134 memdescFree(pMemDesc); 135 memdescDestroy(pMemDesc); 136 } 137 138 /*! 139 * @brief This function is used for writing/reading data to/from a client 140 * provided buffer from/to some source region in vidmem 141 * 142 * @param[in] pDst TRANSFER_SURFACE info for destination region 143 * @param[in] pBuf Client provided buffer 144 * @param[in] size Size in bytes of the memory transfer 145 * @param[in] bRead TRUE for read and FALSE for write 146 */ 147 static NV_STATUS 148 _memmgrMemReadOrWriteWithGsp 149 ( 150 OBJGPU *pGpu, 151 TRANSFER_SURFACE *pDst, 152 void *pBuf, 153 NvU64 size, 154 NvBool bRead 155 ) 156 { 157 NV2080_CTRL_INTERNAL_MEMMGR_MEMORY_TRANSFER_WITH_GSP_PARAMS gspParams; 158 NV_STATUS status; 159 MEMORY_DESCRIPTOR *pStagingBuf = NULL; 160 void *pStagingBufMap = NULL; 161 void *pStagingBufPriv = NULL; 162 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); 163 164 // Do not expect GSP to be used for reading/writing from/to sysmem 165 if (memdescGetAddressSpace(pDst->pMemDesc) == ADDR_SYSMEM) 166 return NV_ERR_NOT_SUPPORTED; 167 168 // Allocate and map the staging buffer 169 NV_ASSERT_OK_OR_RETURN( 170 _memmgrAllocAndMapSurface(pGpu, size, &pStagingBuf, &pStagingBufMap, 171 &pStagingBufPriv)); 172 173 // Copy the data to staging buffer before poking GSP for copying 174 if (!bRead) 175 portMemCopy(pStagingBufMap, size, pBuf, size); 176 177 // Setup control call params 178 portMemSet(&gspParams, 0, sizeof(gspParams)); 179 180 gspParams.memop = NV2080_CTRL_MEMMGR_MEMORY_OP_MEMCPY; 181 gspParams.transferSize = size; 182 183 if (bRead) 184 { 185 // Source surface in vidmem 186 gspParams.src.baseAddr = memdescGetPhysAddr(pDst->pMemDesc, AT_GPU, 0); 187 gspParams.src.size = memdescGetSize(pDst->pMemDesc); 188 gspParams.src.offset = pDst->offset; 189 gspParams.src.cpuCacheAttrib = memdescGetCpuCacheAttrib(pDst->pMemDesc); 190 gspParams.src.aperture = memdescGetAddressSpace(pDst->pMemDesc); 191 192 // Destination surface in unprotected sysmem 193 gspParams.dst.baseAddr = memdescGetPhysAddr(pStagingBuf, AT_GPU, 0); 194 gspParams.dst.size = memdescGetSize(pStagingBuf); 195 gspParams.dst.offset = 0; 196 gspParams.dst.cpuCacheAttrib = memdescGetCpuCacheAttrib(pStagingBuf); 197 gspParams.dst.aperture = memdescGetAddressSpace(pStagingBuf); 198 } 199 else 200 { 201 // Source surface in unprotected sysmem 202 gspParams.src.baseAddr = memdescGetPhysAddr(pStagingBuf, AT_GPU, 0); 203 gspParams.src.size = memdescGetSize(pStagingBuf); 204 gspParams.src.offset = 0; 205 gspParams.src.cpuCacheAttrib = memdescGetCpuCacheAttrib(pStagingBuf); 206 gspParams.src.aperture = memdescGetAddressSpace(pStagingBuf); 207 208 // Destination surface in vidmem 209 gspParams.dst.baseAddr = memdescGetPhysAddr(pDst->pMemDesc, AT_GPU, 0); 210 gspParams.dst.size = memdescGetSize(pDst->pMemDesc); 211 gspParams.dst.offset = pDst->offset; 212 gspParams.dst.cpuCacheAttrib = memdescGetCpuCacheAttrib(pDst->pMemDesc); 213 gspParams.dst.aperture = memdescGetAddressSpace(pDst->pMemDesc); 214 } 215 216 // Send the control call 217 NV_ASSERT_OK_OR_GOTO(status, 218 pRmApi->Control(pRmApi, 219 pGpu->hInternalClient, 220 pGpu->hInternalSubdevice, 221 NV2080_CTRL_CMD_INTERNAL_MEMMGR_MEMORY_TRANSFER_WITH_GSP, 222 &gspParams, 223 sizeof(gspParams)), 224 failed); 225 226 // Read contents from staging buffer after GSP is done copying 227 if (bRead) 228 portMemCopy(pBuf, size, pStagingBufMap, size); 229 230 failed: 231 _memmgrUnmapAndFreeSurface(pStagingBuf, pStagingBufMap, pStagingBufPriv); 232 return status; 233 } 234 235 /*! 236 * @brief This function is used for copying data b/w two memory regions 237 * using GSP. 238 * 239 * @param[in] pDst TRANSFER_SURFACE info for destination region 240 * @param[in] pSrc TRANSFER_SURFACE info for source region 241 * @param[in] size Size in bytes of the memory transfer 242 */ 243 static NV_STATUS 244 _memmgrMemcpyWithGsp 245 ( 246 OBJGPU *pGpu, 247 TRANSFER_SURFACE *pDst, 248 TRANSFER_SURFACE *pSrc, 249 NvU64 size 250 ) 251 { 252 NV2080_CTRL_INTERNAL_MEMMGR_MEMORY_TRANSFER_WITH_GSP_PARAMS gspParams; 253 NV_STATUS status; 254 MEMORY_DESCRIPTOR *pStagingBuf = NULL; 255 void *pStagingBufMap = NULL; 256 void *pStagingBufPriv = NULL; 257 NvU8 *pMap = NULL; 258 void *pPriv = NULL; 259 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); 260 261 // 262 // Do not expect GSP to be used for copying data b/w two surfaces 263 // in sysmem. For SPT, there is no non-CPR vidmem. So, allow vidmem 264 // to vidmem copies in plain text. For copies b/w CPR and non-CPR 265 // vidmem, encryption/decryption needs to happen at the endpoints. 266 // 267 if (memdescGetAddressSpace(pSrc->pMemDesc) == ADDR_SYSMEM && 268 memdescGetAddressSpace(pDst->pMemDesc) == ADDR_SYSMEM) 269 { 270 return NV_ERR_NOT_SUPPORTED; 271 } 272 273 // Allocate and map the bounce buffer 274 NV_ASSERT_OK_OR_RETURN( 275 _memmgrAllocAndMapSurface(pGpu, size, &pStagingBuf, &pStagingBufMap, 276 &pStagingBufPriv)); 277 278 // Setup control call params 279 portMemSet(&gspParams, 0, sizeof(gspParams)); 280 281 gspParams.memop = NV2080_CTRL_MEMMGR_MEMORY_OP_MEMCPY; 282 gspParams.transferSize = size; 283 284 if (memdescGetAddressSpace(pSrc->pMemDesc) == ADDR_SYSMEM) 285 { 286 NV_ASSERT_OK_OR_GOTO(status, 287 memdescMapOld(pSrc->pMemDesc, 0, size, NV_TRUE, 288 NV_PROTECT_READ_WRITE, (void**)&pMap, &pPriv), 289 failed); 290 291 // Copy to staging buffer 292 portMemCopy(pStagingBufMap, size, pMap + pSrc->offset, size); 293 294 memdescUnmapOld(pSrc->pMemDesc, NV_TRUE, 0, (void*)pMap, pPriv); 295 296 // Source surface in unprotected sysmem 297 gspParams.src.baseAddr = memdescGetPhysAddr(pStagingBuf, AT_GPU, 0); 298 gspParams.src.size = memdescGetSize(pStagingBuf); 299 gspParams.src.offset = 0; 300 gspParams.src.cpuCacheAttrib = memdescGetCpuCacheAttrib(pStagingBuf); 301 gspParams.src.aperture = memdescGetAddressSpace(pStagingBuf); 302 303 // Destination surface in vidmem 304 gspParams.dst.baseAddr = memdescGetPhysAddr(pDst->pMemDesc, AT_GPU, 0); 305 gspParams.dst.size = memdescGetSize(pDst->pMemDesc); 306 gspParams.dst.offset = pDst->offset; 307 gspParams.dst.cpuCacheAttrib = memdescGetCpuCacheAttrib(pDst->pMemDesc); 308 gspParams.dst.aperture = memdescGetAddressSpace(pDst->pMemDesc); 309 } 310 else 311 { 312 // Source surface in vidmem 313 gspParams.src.baseAddr = memdescGetPhysAddr(pSrc->pMemDesc, AT_GPU, 0); 314 gspParams.src.size = memdescGetSize(pSrc->pMemDesc); 315 gspParams.src.offset = pSrc->offset; 316 gspParams.src.cpuCacheAttrib = memdescGetCpuCacheAttrib(pSrc->pMemDesc); 317 gspParams.src.aperture = memdescGetAddressSpace(pSrc->pMemDesc); 318 319 if (memdescGetAddressSpace(pDst->pMemDesc) == ADDR_FBMEM) 320 { 321 // Destination surface in vidmem 322 gspParams.dst.baseAddr = memdescGetPhysAddr(pDst->pMemDesc, AT_GPU, 0); 323 gspParams.dst.size = memdescGetSize(pDst->pMemDesc); 324 gspParams.dst.offset = pDst->offset; 325 gspParams.dst.cpuCacheAttrib = memdescGetCpuCacheAttrib(pDst->pMemDesc); 326 gspParams.dst.aperture = memdescGetAddressSpace(pDst->pMemDesc); 327 } 328 else 329 { 330 // Destination surface in unprotected sysmem 331 gspParams.dst.baseAddr = memdescGetPhysAddr(pStagingBuf, AT_GPU, 0); 332 gspParams.dst.size = memdescGetSize(pStagingBuf); 333 gspParams.dst.offset = 0; 334 gspParams.dst.cpuCacheAttrib = memdescGetCpuCacheAttrib(pStagingBuf); 335 gspParams.dst.aperture = memdescGetAddressSpace(pStagingBuf); 336 } 337 } 338 339 // Send the control call 340 NV_ASSERT_OK_OR_GOTO(status, 341 pRmApi->Control(pRmApi, 342 pGpu->hInternalClient, 343 pGpu->hInternalSubdevice, 344 NV2080_CTRL_CMD_INTERNAL_MEMMGR_MEMORY_TRANSFER_WITH_GSP, 345 &gspParams, 346 sizeof(gspParams)), 347 failed); 348 349 // Copy from staging buffer to destination 350 if (memdescGetAddressSpace(pDst->pMemDesc) == ADDR_SYSMEM) 351 { 352 NV_ASSERT_OK_OR_GOTO(status, 353 memdescMapOld(pDst->pMemDesc, 0, size, NV_TRUE, 354 NV_PROTECT_READ_WRITE, (void**)&pMap, &pPriv), 355 failed); 356 357 portMemCopy(pMap + pDst->offset, size, pStagingBufMap, size); 358 359 memdescUnmapOld(pDst->pMemDesc, NV_TRUE, 0, (void*)pMap, pPriv); 360 } 361 362 failed: 363 _memmgrUnmapAndFreeSurface(pStagingBuf, pStagingBufMap, pStagingBufPriv); 364 return status; 365 } 366 367 static NV_STATUS 368 _memmgrMemsetWithGsp 369 ( 370 OBJGPU *pGpu, 371 TRANSFER_SURFACE *pDst, 372 NvU32 value, 373 NvU64 size 374 ) 375 { 376 NV2080_CTRL_INTERNAL_MEMMGR_MEMORY_TRANSFER_WITH_GSP_PARAMS gspParams; 377 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); 378 379 // Do not expect to use GSP to memset surfaces in sysmem 380 if (memdescGetAddressSpace(pDst->pMemDesc) == ADDR_SYSMEM) 381 return NV_ERR_NOT_SUPPORTED; 382 383 portMemSet(&gspParams, 0, sizeof(gspParams)); 384 385 gspParams.memop = NV2080_CTRL_MEMMGR_MEMORY_OP_MEMSET; 386 gspParams.transferSize = size; 387 gspParams.value = value; 388 gspParams.dst.baseAddr = memdescGetPhysAddr(pDst->pMemDesc, AT_GPU, 0); 389 gspParams.dst.size = memdescGetSize(pDst->pMemDesc); 390 gspParams.dst.offset = pDst->offset; 391 gspParams.dst.cpuCacheAttrib = memdescGetCpuCacheAttrib(pDst->pMemDesc); 392 gspParams.dst.aperture = memdescGetAddressSpace(pDst->pMemDesc); 393 394 // Send the control call 395 NV_ASSERT_OK_OR_RETURN( 396 pRmApi->Control(pRmApi, 397 pGpu->hInternalClient, 398 pGpu->hInternalSubdevice, 399 NV2080_CTRL_CMD_INTERNAL_MEMMGR_MEMORY_TRANSFER_WITH_GSP, 400 &gspParams, 401 sizeof(gspParams))); 402 403 return NV_OK; 404 } 405 406 /*! 407 * @brief This function is used for copying data b/w two memory regions 408 * using the specified memory transfer technique. Both memory regions 409 * can be in the same aperture or in different apertures. 410 * 411 * @param[in] pDstInfo TRANSFER_SURFACE info for destination region 412 * @param[in] pSrcInfo TRANSFER_SURFACE info for source region 413 * @param[in] size Size in bytes of the memory transfer 414 * @param[in] transferType Memory transfer technique to be used 415 * @param[in] flags Flags 416 */ 417 static NV_STATUS 418 memmgrMemCopyWithTransferType 419 ( 420 MemoryManager *pMemoryManager, 421 TRANSFER_SURFACE *pDstInfo, 422 TRANSFER_SURFACE *pSrcInfo, 423 NvU32 size, 424 TRANSFER_TYPE transferType, 425 NvU32 flags 426 ) 427 { 428 OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager); 429 NvU8 *pSrc; 430 NvU8 *pDst; 431 432 // Sanitize the input 433 NV_ASSERT_OR_RETURN(pDstInfo != NULL, NV_ERR_INVALID_ARGUMENT); 434 NV_ASSERT_OR_RETURN(pSrcInfo != NULL, NV_ERR_INVALID_ARGUMENT); 435 NV_ASSERT_OR_RETURN(pDstInfo->pMemDesc != NULL, NV_ERR_INVALID_ARGUMENT); 436 NV_ASSERT_OR_RETURN(pSrcInfo->pMemDesc != NULL, NV_ERR_INVALID_ARGUMENT); 437 NV_ASSERT_OR_RETURN(!memdescDescIsEqual(pDstInfo->pMemDesc, pSrcInfo->pMemDesc), 438 NV_ERR_INVALID_ARGUMENT); 439 440 switch (transferType) 441 { 442 case TRANSFER_TYPE_PROCESSOR: 443 pDst = memdescMapInternal(pGpu, pDstInfo->pMemDesc, TRANSFER_FLAGS_NONE); 444 NV_ASSERT_OR_RETURN(pDst != NULL, NV_ERR_INSUFFICIENT_RESOURCES); 445 pSrc = memdescMapInternal(pGpu, pSrcInfo->pMemDesc, TRANSFER_FLAGS_NONE); 446 if (pSrc == NULL) 447 { 448 memdescUnmapInternal(pGpu, pDstInfo->pMemDesc, 0); 449 NV_ASSERT_OR_RETURN(0, NV_ERR_INSUFFICIENT_RESOURCES); 450 } 451 452 portMemCopy(pDst + pDstInfo->offset, size, pSrc + pSrcInfo->offset, size); 453 454 memdescUnmapInternal(pGpu, pSrcInfo->pMemDesc, TRANSFER_FLAGS_NONE); 455 memdescUnmapInternal(pGpu, pDstInfo->pMemDesc, flags); 456 break; 457 case TRANSFER_TYPE_GSP_DMA: 458 if (IS_GSP_CLIENT(pGpu)) 459 { 460 NV_PRINTF(LEVEL_INFO, "Calling GSP DMA task\n"); 461 NV_ASSERT_OK_OR_RETURN( 462 _memmgrMemcpyWithGsp(pGpu, pDstInfo, pSrcInfo, size)); 463 } 464 else 465 { 466 NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT); 467 } 468 break; 469 case TRANSFER_TYPE_CE: 470 NV_PRINTF(LEVEL_INFO, "Add call to CE\n"); 471 break; 472 } 473 474 return NV_OK; 475 } 476 477 /*! 478 * @brief This function is used for setting a memory region to a constant state 479 * using a specified memory transfer technique 480 * 481 * @param[in] pDstInfo TRANSFER_SURFACE info for destination region 482 * @param[in] value Value to be written to the region 483 * @param[in] size Size in bytes of the memory to be initialized 484 * @param[in] transferType Memory transfer technique to be used 485 * @param[in] flags Flags 486 */ 487 static NV_STATUS 488 memmgrMemSetWithTransferType 489 ( 490 MemoryManager *pMemoryManager, 491 TRANSFER_SURFACE *pDstInfo, 492 NvU32 value, 493 NvU32 size, 494 TRANSFER_TYPE transferType, 495 NvU32 flags 496 ) 497 { 498 OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager); 499 NvU8 *pDst; 500 501 // Sanitize the input 502 NV_ASSERT_OR_RETURN(pDstInfo != NULL, NV_ERR_INVALID_ARGUMENT); 503 NV_ASSERT_OR_RETURN(pDstInfo->pMemDesc != NULL, NV_ERR_INVALID_ARGUMENT); 504 NV_ASSERT_OR_RETURN(size > 0, NV_ERR_INVALID_ARGUMENT); 505 NV_ASSERT_OR_RETURN(pDstInfo->offset + size <= pDstInfo->pMemDesc->Size, NV_ERR_INVALID_ARGUMENT); 506 507 switch (transferType) 508 { 509 case TRANSFER_TYPE_PROCESSOR: 510 pDst = memdescMapInternal(pGpu, pDstInfo->pMemDesc, TRANSFER_FLAGS_NONE); 511 NV_ASSERT_OR_RETURN(pDst != NULL, NV_ERR_INSUFFICIENT_RESOURCES); 512 513 portMemSet(pDst + pDstInfo->offset, value, size); 514 515 memdescUnmapInternal(pGpu, pDstInfo->pMemDesc, flags); 516 break; 517 case TRANSFER_TYPE_GSP_DMA: 518 if (IS_GSP_CLIENT(pGpu)) 519 { 520 NV_PRINTF(LEVEL_INFO, "Calling GSP DMA task\n"); 521 NV_ASSERT_OK_OR_RETURN( 522 _memmgrMemsetWithGsp(pGpu, pDstInfo, value, size)); 523 } 524 else 525 { 526 NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT); 527 } 528 break; 529 case TRANSFER_TYPE_CE: 530 NV_PRINTF(LEVEL_INFO, "Add call to CE\n"); 531 break; 532 } 533 534 return NV_OK; 535 } 536 537 /*! 538 * @brief This function is used to map the appropriate memory descriptor, 539 * copy the memory from the given buffer, and then unmap. 540 * 541 * @param[in] pMemDesc Memory descriptor of buffer to write 542 * @param[in] pBuf Buffer allocated by caller 543 * @param[in] offset Offset of buffer to write 544 * @param[in] size Size in bytes of the buffer 545 * @param[in] flags Flags 546 */ 547 static NV_STATUS 548 memmgrMemWriteMapAndCopy 549 ( 550 MemoryManager *pMemoryManager, 551 MEMORY_DESCRIPTOR *pMemDesc, 552 void *pBuf, 553 NvU64 offset, 554 NvU64 size, 555 NvU32 flags 556 ) 557 { 558 NvU8 *pDst = NULL; 559 OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager); 560 561 pDst = memdescMapInternal(pGpu, pMemDesc, TRANSFER_FLAGS_NONE); 562 NV_CHECK_OR_RETURN(LEVEL_SILENT, pDst != NULL, NV_ERR_INSUFFICIENT_RESOURCES); 563 564 portMemCopy(pDst + offset, size, pBuf, size); 565 memdescUnmapInternal(pGpu, pMemDesc, flags); 566 567 return NV_OK; 568 } 569 570 /*! 571 * @brief This function is used for writing data placed in a caller passed buffer 572 * to a given memory region while only mapping regions as large as the given 573 * block size. 574 * 575 * @param[in] pMemDesc Memory descriptor of buffer to write 576 * @param[in] pBuf Buffer allocated by caller 577 * @param[in] baseOffset Offset of entire buffer to write 578 * @param[in] size Size in bytes of the buffer 579 * @param[in] flags Flags 580 * @param[in] blockSize Maximum size of a mapping to use 581 */ 582 static NV_STATUS 583 memmgrMemWriteInBlocks 584 ( 585 MemoryManager *pMemoryManager, 586 MEMORY_DESCRIPTOR *pMemDesc, 587 void *pBuf, 588 NvU64 baseOffset, 589 NvU64 size, 590 NvU32 flags, 591 NvU32 blockSize 592 ) 593 { 594 NV_STATUS status = NV_OK; 595 OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager); 596 NvU64 remaining = size; 597 NvU64 offset = 0; 598 599 while ((remaining > 0) && (status == NV_OK)) 600 { 601 MEMORY_DESCRIPTOR *pSubMemDesc = NULL; 602 NvU32 mapSize = NV_MIN(blockSize, remaining); 603 604 NV_CHECK_OK_OR_RETURN(LEVEL_SILENT, memdescCreateSubMem(&pSubMemDesc, pMemDesc, pGpu, offset + baseOffset, mapSize)); 605 606 // Set the offset to 0, as the sub descriptor already starts at the offset 607 status = memmgrMemWriteMapAndCopy(pMemoryManager, pSubMemDesc, (NvU8 *)pBuf + offset, 608 0, mapSize, flags); 609 610 memdescFree(pSubMemDesc); 611 memdescDestroy(pSubMemDesc); 612 613 offset += mapSize; 614 remaining -= mapSize; 615 } 616 617 return status; 618 } 619 620 /*! 621 * @brief This function is used for writing data placed in a caller passed buffer 622 * to a given memory region using the specified memory transfer technique 623 * 624 * @param[in] pDstInfo TRANSFER_SURFACE info for the destination region 625 * @param[in] pBuf Buffer allocated by caller 626 * @param[in] size Size in bytes of the buffer 627 * @param[in] transferType Memory transfer technique to be used 628 * @param[in] flags Flags 629 */ 630 static NV_STATUS 631 memmgrMemWriteWithTransferType 632 ( 633 MemoryManager *pMemoryManager, 634 TRANSFER_SURFACE *pDstInfo, 635 void *pBuf, 636 NvU64 size, 637 TRANSFER_TYPE transferType, 638 NvU32 flags 639 ) 640 { 641 NvU8 *pMapping = memdescGetKernelMapping(pDstInfo->pMemDesc); 642 OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager); 643 644 // Sanitize the input 645 NV_ASSERT_OR_RETURN(pDstInfo != NULL, NV_ERR_INVALID_ARGUMENT); 646 NV_ASSERT_OR_RETURN(pDstInfo->pMemDesc != NULL, NV_ERR_INVALID_ARGUMENT); 647 NV_ASSERT_OR_RETURN(pBuf != NULL, NV_ERR_INVALID_ARGUMENT); 648 NV_ASSERT_OR_RETURN(size > 0, NV_ERR_INVALID_ARGUMENT); 649 NV_ASSERT_OR_RETURN(pDstInfo->offset + size <= pDstInfo->pMemDesc->Size, NV_ERR_INVALID_ARGUMENT); 650 651 if (pMapping != NULL) 652 { 653 portMemCopy(pMapping + pDstInfo->offset, size, pBuf, size); 654 return NV_OK; 655 } 656 657 switch (transferType) 658 { 659 case TRANSFER_TYPE_PROCESSOR: 660 if (memmgrMemWriteMapAndCopy(pMemoryManager, pDstInfo->pMemDesc, pBuf, pDstInfo->offset, size, flags) != NV_OK) 661 { 662 // If we fail to map a block large enough for the entire transfer, split up the mapping. 663 NV_ASSERT_OK_OR_RETURN(memmgrMemWriteInBlocks(pMemoryManager, pDstInfo->pMemDesc, pBuf, 664 pDstInfo->offset, size, flags, MEMORY_COPY_BLOCK_SIZE)); 665 } 666 break; 667 case TRANSFER_TYPE_GSP_DMA: 668 if (IS_GSP_CLIENT(pGpu)) 669 { 670 NV_PRINTF(LEVEL_INFO, "Calling GSP DMA task\n"); 671 NV_ASSERT_OK_OR_RETURN( 672 _memmgrMemReadOrWriteWithGsp(pGpu, pDstInfo, pBuf, size, 673 NV_FALSE /* bRead */)); 674 } 675 else 676 { 677 NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT); 678 } 679 break; 680 case TRANSFER_TYPE_CE: 681 NV_PRINTF(LEVEL_INFO, "Add call to CE\n"); 682 break; 683 } 684 685 return NV_OK; 686 } 687 688 /*! 689 * @brief This function is used for reading specified number of bytes from 690 * a source memory region into a caller passed buffer using a specified 691 * memory transfer technique 692 * 693 * @param[in] pSrcInfo TRANSFER_SURFACE info for the source region 694 * @param[in] pBuf Caller allocated buffer 695 * @param[in] size Size in bytes of the buffer 696 * @param[in] transferType Memory transfer technique to be used 697 * @param[in] flags Flags 698 */ 699 static NV_STATUS 700 memmgrMemReadWithTransferType 701 ( 702 MemoryManager *pMemoryManager, 703 TRANSFER_SURFACE *pSrcInfo, 704 void *pBuf, 705 NvU64 size, 706 TRANSFER_TYPE transferType, 707 NvU32 flags 708 ) 709 { 710 OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager); 711 NvU8 *pSrc; 712 NvU8 *pMapping = memdescGetKernelMapping(pSrcInfo->pMemDesc); 713 714 715 // Sanitize the input 716 NV_ASSERT_OR_RETURN(pSrcInfo != NULL, NV_ERR_INVALID_ARGUMENT); 717 NV_ASSERT_OR_RETURN(pSrcInfo->pMemDesc != NULL, NV_ERR_INVALID_ARGUMENT); 718 NV_ASSERT_OR_RETURN(pBuf != NULL, NV_ERR_INVALID_ARGUMENT); 719 NV_ASSERT_OR_RETURN(size > 0, NV_ERR_INVALID_ARGUMENT); 720 NV_ASSERT_OR_RETURN(pSrcInfo->offset + size <= pSrcInfo->pMemDesc->Size, NV_ERR_INVALID_ARGUMENT); 721 722 if (pMapping != NULL) 723 { 724 portMemCopy(pBuf, size, pMapping + pSrcInfo->offset, size); 725 return NV_OK; 726 } 727 728 switch (transferType) 729 { 730 case TRANSFER_TYPE_PROCESSOR: 731 pSrc = memdescMapInternal(pGpu, pSrcInfo->pMemDesc, TRANSFER_FLAGS_NONE); 732 NV_ASSERT_OR_RETURN(pSrc != NULL, NV_ERR_INSUFFICIENT_RESOURCES); 733 734 portMemCopy(pBuf, size, pSrc + pSrcInfo->offset, size); 735 736 memdescUnmapInternal(pGpu, pSrcInfo->pMemDesc, 0); 737 break; 738 case TRANSFER_TYPE_GSP_DMA: 739 if (IS_GSP_CLIENT(pGpu)) 740 { 741 NV_PRINTF(LEVEL_INFO, "Calling GSP DMA task\n"); 742 NV_ASSERT_OK_OR_RETURN( 743 _memmgrMemReadOrWriteWithGsp(pGpu, pSrcInfo, pBuf, size, 744 NV_TRUE /* bRead */)); 745 } 746 else 747 { 748 NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT); 749 } 750 break; 751 case TRANSFER_TYPE_CE: 752 NV_PRINTF(LEVEL_INFO, "Add call to CE\n"); 753 break; 754 } 755 756 return NV_OK; 757 } 758 759 /* ------------------------ Public functions --------------------------------------- */ 760 761 NvU64 memUtilsLeastCommonAlignment(NvU64 align1, NvU64 align2) 762 { 763 NvU64 a, b; // For Euclid's algorithm 764 NvU64 lcm; // Least Common Multiple of align1 and align2 765 NvU64 maxAlignment = NV_U64_MAX; 766 767 // WOLOG, make sure align1 >= align2. 768 // 769 if (align2 > align1) 770 { 771 NvU64 tmp = align1; 772 align1 = align2; 773 align2 = tmp; 774 } 775 776 // If align2 is 0, return min(align1, maxAlignment) 777 // 778 if (align2 == 0) 779 { 780 return align1 < maxAlignment ? align1 : maxAlignment; 781 } 782 783 // Use Euclid's algorithm (GCD(a, b) = GCD(b, a % b)) to find the 784 // GCD of the two alignments, and use the GCD to find the LCM. 785 // 786 a = align1; 787 b = align2; 788 while (b != 0) 789 { 790 NvU64 old_a = a; 791 a = b; 792 b = old_a % b; 793 NV_ASSERT(a > b); // Ensure termination. Should never fail. 794 } 795 lcm = align1 * (align2 / a); // May overflow 796 797 // Return min(lcm, maxAlignment). Also return maxAlignment if the 798 // lcm calculation overflowed, since that means it must have been 799 // much bigger than maxAlignment. 800 // 801 if (lcm > maxAlignment || lcm < align1 || 802 0 != (lcm % align1) || 0 != (lcm % align2)) 803 { 804 NV_CHECK_FAILED(LEVEL_ERROR, "Alignment limit exceeded"); 805 return maxAlignment; 806 } 807 return lcm; 808 } 809 810 void memUtilsInitFBAllocInfo 811 ( 812 NV_MEMORY_ALLOCATION_PARAMS *pAllocParams, 813 FB_ALLOC_INFO *pFbAllocInfo, 814 NvHandle hClient, 815 NvHandle hDevice 816 ) 817 { 818 pFbAllocInfo->pageFormat->type = pAllocParams->type; 819 pFbAllocInfo->owner = pAllocParams->owner; 820 pFbAllocInfo->hwResId = 0; 821 pFbAllocInfo->pad = 0; 822 pFbAllocInfo->alignPad = 0; 823 pFbAllocInfo->height = pAllocParams->height; 824 pFbAllocInfo->width = pAllocParams->width; 825 pFbAllocInfo->pitch = pAllocParams->pitch; 826 pFbAllocInfo->size = pAllocParams->size; 827 pFbAllocInfo->origSize = pAllocParams->size; 828 pFbAllocInfo->adjustedSize = pAllocParams->size; 829 pFbAllocInfo->offset = ~0; 830 pFbAllocInfo->pageFormat->flags = pAllocParams->flags; 831 pFbAllocInfo->pageFormat->attr = pAllocParams->attr; 832 pFbAllocInfo->retAttr = pAllocParams->attr; 833 pFbAllocInfo->pageFormat->attr2 = pAllocParams->attr2; 834 pFbAllocInfo->retAttr2 = pAllocParams->attr2; 835 pFbAllocInfo->format = pAllocParams->format; 836 pFbAllocInfo->comprCovg = pAllocParams->comprCovg; 837 pFbAllocInfo->zcullCovg = 0; 838 pFbAllocInfo->ctagOffset = pAllocParams->ctagOffset; 839 pFbAllocInfo->bIsKernelAlloc = NV_FALSE; 840 pFbAllocInfo->internalflags = 0; 841 pFbAllocInfo->hClient = hClient; 842 pFbAllocInfo->hDevice = hDevice; 843 844 if ((pAllocParams->flags & NVOS32_ALLOC_FLAGS_ALIGNMENT_HINT) || 845 (pAllocParams->flags & NVOS32_ALLOC_FLAGS_ALIGNMENT_FORCE)) 846 pFbAllocInfo->align = pAllocParams->alignment; 847 else 848 pFbAllocInfo->align = RM_PAGE_SIZE; 849 850 if (pAllocParams->flags & NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE) 851 { 852 pFbAllocInfo->offset = pAllocParams->offset; 853 pFbAllocInfo->desiredOffset = pAllocParams->offset; 854 } 855 } 856 857 /*! 858 * @brief This function is used for copying data b/w two memory regions 859 * Both memory regions can be in the same aperture of different apertures 860 * 861 * @param[in] pDstInfo TRANSFER_SURFACE info for destination region 862 * @param[in] pSrcInfo TRANSFER_SURFACE info for source region 863 * @param[in] size Size in bytes of the memory transfer 864 * @param[in] flags Flags 865 */ 866 NV_STATUS 867 memmgrMemCopy_IMPL 868 ( 869 MemoryManager *pMemoryManager, 870 TRANSFER_SURFACE *pDstInfo, 871 TRANSFER_SURFACE *pSrcInfo, 872 NvU32 size, 873 NvU32 flags 874 ) 875 { 876 TRANSFER_TYPE transferType = memmgrGetMemTransferType(pMemoryManager, 877 pDstInfo, pSrcInfo); 878 879 return memmgrMemCopyWithTransferType(pMemoryManager, pDstInfo, pSrcInfo, 880 size, transferType, flags); 881 } 882 883 /*! 884 * @brief This function is used for setting a memory region to a constant state 885 * 886 * @param[in] pDstInfo TRANSFER_SURFACE info for the destination region 887 * @param[in] value Value to be written to the region 888 * @param[in] size Size in bytes of the memory to be initialized 889 * @param[in] flags Flags 890 */ 891 NV_STATUS 892 memmgrMemSet_IMPL 893 ( 894 MemoryManager *pMemoryManager, 895 TRANSFER_SURFACE *pDstInfo, 896 NvU32 value, 897 NvU32 size, 898 NvU32 flags 899 ) 900 { 901 TRANSFER_TYPE transferType = memmgrGetMemTransferType(pMemoryManager, 902 pDstInfo, NULL); 903 904 return memmgrMemSetWithTransferType(pMemoryManager, pDstInfo, value, 905 size, transferType, flags); 906 } 907 908 /*! 909 * @brief This function is used for setting a memory region to a constant state 910 * 911 * @param[in] pMemDesc Memory descriptor to end transfer to 912 * @param[in] value Value to be written to the region 913 * @param[in] flags Flags 914 */ 915 NV_STATUS 916 memmgrMemDescMemSet_IMPL 917 ( 918 MemoryManager *pMemoryManager, 919 MEMORY_DESCRIPTOR *pMemDesc, 920 NvU32 value, 921 NvU32 flags 922 ) 923 { 924 TRANSFER_SURFACE transferSurface = {.offset = 0, .pMemDesc = pMemDesc}; 925 TRANSFER_TYPE transferType = memmgrGetMemTransferType(pMemoryManager, 926 &transferSurface, NULL); 927 928 return memmgrMemSetWithTransferType(pMemoryManager, &transferSurface, value, 929 (NvU32)memdescGetSize(pMemDesc), 930 transferType, flags); 931 } 932 933 /*! 934 * @brief This function is used for writing data placed in a user buffer 935 * to a given memory region 936 * 937 * @param[in] pDstInfo TRANSFER_SURFACE info for the destination region 938 * @param[in] pBuf Buffer allocated by caller 939 * @param[in] size Size in bytes of the buffer 940 * @param[in] flags Flags 941 */ 942 NV_STATUS 943 memmgrMemWrite_IMPL 944 ( 945 MemoryManager *pMemoryManager, 946 TRANSFER_SURFACE *pDstInfo, 947 void *pBuf, 948 NvU64 size, 949 NvU32 flags 950 ) 951 { 952 TRANSFER_TYPE transferType = memmgrGetMemTransferType(pMemoryManager, 953 pDstInfo, NULL); 954 955 return memmgrMemWriteWithTransferType(pMemoryManager, pDstInfo, pBuf, 956 size, transferType, flags); 957 } 958 959 /*! 960 * @brief This function is used for reading specified number of bytes from 961 * a source memory region into a caller passed buffer 962 * 963 * @param[in] pSrcInfo TRANSFER_SURFACE info for the source region 964 * @param[in] pBuf Caller allocated buffer 965 * @param[in] size Size in bytes of the buffer 966 * @param[in] flags Flags 967 */ 968 NV_STATUS 969 memmgrMemRead_IMPL 970 ( 971 MemoryManager *pMemoryManager, 972 TRANSFER_SURFACE *pSrcInfo, 973 void *pBuf, 974 NvU64 size, 975 NvU32 flags 976 ) 977 { 978 TRANSFER_TYPE transferType = memmgrGetMemTransferType(pMemoryManager, 979 NULL, pSrcInfo); 980 981 return memmgrMemReadWithTransferType(pMemoryManager, pSrcInfo, pBuf, 982 size, transferType, flags); 983 } 984 985 /*! 986 * @brief This helper function can be used to begin transfers 987 * 988 * @param[in] pTransferInfo Transfer information 989 * @param[in] shadowBufSize Size of allocated shadow buffer in case of shadow mapping 990 * @param[in] flags Flags 991 */ 992 NvU8 * 993 memmgrMemBeginTransfer_IMPL 994 ( 995 MemoryManager *pMemoryManager, 996 TRANSFER_SURFACE *pTransferInfo, 997 NvU64 shadowBufSize, 998 NvU32 flags 999 ) 1000 { 1001 TRANSFER_TYPE transferType = memmgrGetMemTransferType(pMemoryManager, 1002 pTransferInfo, NULL); 1003 MEMORY_DESCRIPTOR *pMemDesc = pTransferInfo->pMemDesc; 1004 NvU64 offset = pTransferInfo->offset; 1005 OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager); 1006 NvU8 *pPtr = NULL; 1007 NvU64 memSz = 0; 1008 1009 NV_ASSERT_OR_RETURN(pMemDesc != NULL, NULL); 1010 NV_ASSERT_OR_RETURN((memSz = memdescGetSize(pMemDesc)) >= shadowBufSize, NULL); 1011 NV_ASSERT_OR_RETURN(memdescGetKernelMapping(pMemDesc) == NULL, NULL); 1012 1013 memSz = shadowBufSize == 0 ? memSz : shadowBufSize; 1014 1015 switch (transferType) 1016 { 1017 case TRANSFER_TYPE_PROCESSOR: 1018 if (flags & TRANSFER_FLAGS_USE_BAR1) 1019 { 1020 NvP64 pPriv; 1021 NvU32 protect = NV_PROTECT_READ_WRITE; 1022 1023 if (flags & TRANSFER_FLAGS_MAP_PROTECT_READABLE) 1024 { 1025 protect = NV_PROTECT_READABLE; 1026 } 1027 else if (flags & TRANSFER_FLAGS_MAP_PROTECT_WRITEABLE) 1028 { 1029 protect = NV_PROTECT_WRITEABLE; 1030 } 1031 1032 NV_ASSERT_OR_RETURN(memdescMap(pMemDesc, offset, memSz, NV_TRUE, protect, 1033 (NvP64*) &pPtr, &pPriv) == NV_OK, NULL); 1034 memdescSetKernelMappingPriv(pMemDesc, pPtr); 1035 break; 1036 } 1037 NV_ASSERT_OR_RETURN((pPtr = memdescMapInternal(pGpu, pMemDesc, flags)) != NULL, NULL); 1038 pPtr = &pPtr[offset]; 1039 1040 break; 1041 case TRANSFER_TYPE_GSP_DMA: 1042 case TRANSFER_TYPE_CE: 1043 if (flags & TRANSFER_FLAGS_SHADOW_ALLOC) 1044 { 1045 NV_ASSERT_OR_RETURN((pPtr = portMemAllocNonPaged(memSz)), NULL); 1046 if (flags & TRANSFER_FLAGS_SHADOW_INIT_MEM) 1047 { 1048 NV_ASSERT_OK(memmgrMemRead(pMemoryManager, pTransferInfo, pPtr, memSz, flags)); 1049 } 1050 } 1051 break; 1052 default: 1053 NV_ASSERT(0); 1054 } 1055 memdescSetKernelMapping(pMemDesc, pPtr); 1056 return pPtr; 1057 } 1058 1059 /*! 1060 * @brief This helper function can be used to end transfers 1061 * 1062 * @param[in] pTransferInfo Transfer information 1063 * @param[in] shadowBufSize Size of allocated shadow buffer in case of shadow mapping 1064 * @param[in] flags Flags 1065 */ 1066 void 1067 memmgrMemEndTransfer_IMPL 1068 ( 1069 MemoryManager *pMemoryManager, 1070 TRANSFER_SURFACE *pTransferInfo, 1071 NvU64 shadowBufSize, 1072 NvU32 flags 1073 ) 1074 { 1075 TRANSFER_TYPE transferType = memmgrGetMemTransferType(pMemoryManager, 1076 pTransferInfo, NULL); 1077 MEMORY_DESCRIPTOR *pMemDesc = pTransferInfo->pMemDesc; 1078 NvU64 offset = pTransferInfo->offset; 1079 OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager); 1080 NvU64 memSz = 0; 1081 NvU8 *pMapping = memdescGetKernelMapping(pMemDesc); 1082 1083 NV_ASSERT_OR_RETURN_VOID(pMemDesc != NULL); 1084 NV_ASSERT_OR_RETURN_VOID((memSz = memdescGetSize(pMemDesc)) >= (shadowBufSize + offset) ); 1085 1086 memSz = shadowBufSize == 0 ? memSz : shadowBufSize; 1087 1088 memdescSetKernelMapping(pMemDesc, NULL); 1089 1090 switch (transferType) 1091 { 1092 case TRANSFER_TYPE_PROCESSOR: 1093 if (flags & TRANSFER_FLAGS_USE_BAR1) 1094 { 1095 NvP64 pPriv = memdescGetKernelMappingPriv(pMemDesc); 1096 memdescSetKernelMappingPriv(pMemDesc, NULL); 1097 memdescUnmap(pMemDesc, NV_TRUE, 0, pMapping, pPriv); 1098 return; 1099 } 1100 memdescUnmapInternal(pGpu, pMemDesc, flags); 1101 return; 1102 case TRANSFER_TYPE_GSP_DMA: 1103 case TRANSFER_TYPE_CE: 1104 if (pMapping != NULL) 1105 { 1106 NV_ASSERT_OK(memmgrMemWrite(pMemoryManager, pTransferInfo, pMapping, memSz, flags)); 1107 portMemFree(pMapping); 1108 } 1109 return; 1110 default: 1111 NV_ASSERT(0); 1112 } 1113 return; 1114 } 1115 1116 /*! 1117 * @brief Helper function that ends transfers to a memdesc with default offset/size 1118 * 1119 * @param[in] pMemDesc Memory descriptor to end transfer to 1120 * @param[in] flags Flags 1121 */ 1122 void 1123 memmgrMemDescEndTransfer_IMPL 1124 ( 1125 MemoryManager *pMemoryManager, 1126 MEMORY_DESCRIPTOR *pMemDesc, 1127 NvU32 flags 1128 ) 1129 { 1130 TRANSFER_SURFACE transferSurface = {.offset = 0, .pMemDesc = pMemDesc}; 1131 memmgrMemEndTransfer(pMemoryManager, &transferSurface, memdescGetSize(pMemDesc), flags); 1132 } 1133 1134 /*! 1135 * @brief Helper function that begins transfers to a memdesc with default offset/size 1136 * 1137 * @param[in] pMemDesc Memory descriptor to begin transfer to 1138 * @param[in] flags Flags 1139 */ 1140 NvU8 * 1141 memmgrMemDescBeginTransfer_IMPL 1142 ( 1143 MemoryManager *pMemoryManager, 1144 MEMORY_DESCRIPTOR *pMemDesc, 1145 NvU32 flags 1146 ) 1147 { 1148 TRANSFER_SURFACE transferSurface = {.offset = 0, .pMemDesc = pMemDesc}; 1149 return memmgrMemBeginTransfer(pMemoryManager, &transferSurface, memdescGetSize(pMemDesc), flags); 1150 } 1151 1152 /*! 1153 * @brief This function is used to allocate common resources across memory 1154 * classes, and must be used before memory-specific resource alloc. 1155 * 1156 * @param[in/out] pAllocRequest User-provided alloc request struct 1157 * @param[in/out] pFbAllocInfo Initialized FB_ALLOC_INFO struct to alloc 1158 */ 1159 NV_STATUS 1160 memmgrAllocResources_IMPL 1161 ( 1162 OBJGPU *pGpu, 1163 MemoryManager *pMemoryManager, 1164 MEMORY_ALLOCATION_REQUEST *pAllocRequest, 1165 FB_ALLOC_INFO *pFbAllocInfo 1166 ) 1167 { 1168 NV_STATUS status = NV_OK; 1169 NvU64 alignment = 0; 1170 NV_MEMORY_ALLOCATION_PARAMS *pVidHeapAlloc = pAllocRequest->pUserParams; 1171 NV_ADDRESS_SPACE addrSpace = memmgrAllocGetAddrSpace(pMemoryManager, pVidHeapAlloc->flags, 1172 pFbAllocInfo->retAttr); 1173 1174 NvU64 pageSize = 0; 1175 NvBool bAllocedHwRes = NV_FALSE; 1176 1177 // IRQL TEST: must be running at equivalent of passive-level 1178 IRQL_ASSERT_AND_RETURN(!osIsRaisedIRQL()); 1179 1180 // 1181 // Check for valid size. 1182 // 1183 if (pVidHeapAlloc->size == 0) 1184 return NV_ERR_INVALID_ARGUMENT; 1185 1186 // 1187 // Ensure a valid allocation pVidHeapAlloc->type was passed in 1188 // 1189 if (pVidHeapAlloc->type > NVOS32_NUM_MEM_TYPES - 1) 1190 return NV_ERR_INVALID_ARGUMENT; 1191 1192 if (ADDR_VIRTUAL != addrSpace) 1193 { 1194 // If vidmem not requested explicitly, decide on the physical location. 1195 if (FLD_TEST_DRF(OS32, _ATTR, _LOCATION, _PCI, pFbAllocInfo->retAttr) || 1196 FLD_TEST_DRF(OS32, _ATTR, _LOCATION, _ANY, pFbAllocInfo->retAttr)) 1197 { 1198 if (ADDR_FBMEM == addrSpace) 1199 { 1200 pFbAllocInfo->retAttr = FLD_SET_DRF(OS32, _ATTR, _LOCATION, _VIDMEM, pFbAllocInfo->retAttr); 1201 } 1202 else 1203 { 1204 pFbAllocInfo->retAttr = FLD_SET_DRF(OS32, _ATTR, _LOCATION, _PCI, pFbAllocInfo->retAttr); 1205 } 1206 } 1207 } 1208 else // Virtual 1209 { 1210 // Clear location to ANY since virtual does not associate with location. 1211 pFbAllocInfo->retAttr = FLD_SET_DRF(OS32, _ATTR, _LOCATION, _ANY, pFbAllocInfo->retAttr); 1212 } 1213 1214 // Fetch RM page size 1215 pageSize = memmgrDeterminePageSize(pMemoryManager, pFbAllocInfo->hClient, pFbAllocInfo->size, 1216 pFbAllocInfo->format, pFbAllocInfo->pageFormat->flags, 1217 &pFbAllocInfo->retAttr, &pFbAllocInfo->retAttr2); 1218 if (!IsAMODEL(pGpu) && pageSize == 0) 1219 { 1220 status = NV_ERR_INVALID_STATE; 1221 NV_PRINTF(LEVEL_ERROR, "memmgrDeterminePageSize failed, status: 0x%x\n", status); 1222 goto failed; 1223 } 1224 1225 // Fetch memory alignment 1226 status = memmgrAllocDetermineAlignment_HAL(pGpu, pMemoryManager, &pFbAllocInfo->size, &pFbAllocInfo->align, 1227 pFbAllocInfo->alignPad, pFbAllocInfo->pageFormat->flags, 1228 pFbAllocInfo->retAttr, pFbAllocInfo->retAttr2, 0); 1229 if (status != NV_OK) 1230 { 1231 NV_PRINTF(LEVEL_ERROR, "memmgrAllocDetermineAlignment failed, status: 0x%x\n", status); 1232 goto failed; 1233 } 1234 1235 // 1236 // Call into HAL to reserve any hardware resources for 1237 // the specified memory pVidHeapAlloc->type. 1238 // If the alignment was changed due to a HW limitation, and the 1239 // flag NVOS32_ALLOC_FLAGS_ALIGNMENT_FORCE is set, bad_argument 1240 // will be passed back from the HAL 1241 // 1242 status = memmgrAllocHwResources(pGpu, pMemoryManager, pFbAllocInfo); 1243 bAllocedHwRes = NV_TRUE; 1244 1245 pVidHeapAlloc->attr = pFbAllocInfo->retAttr; 1246 pVidHeapAlloc->attr2 = pFbAllocInfo->retAttr2; 1247 pVidHeapAlloc->format = pFbAllocInfo->format; 1248 pVidHeapAlloc->comprCovg = pFbAllocInfo->comprCovg; 1249 pVidHeapAlloc->zcullCovg = pFbAllocInfo->zcullCovg; 1250 1251 if (status != NV_OK) 1252 { 1253 // 1254 // probably means we passed in a bogus pVidHeapAlloc->type or no tiling resources available 1255 // when tiled memory attribute was set to REQUIRED 1256 // 1257 NV_PRINTF(LEVEL_ERROR, "fbAlloc failure!\n"); 1258 goto failed; 1259 } 1260 1261 // call HAL to set resources 1262 status = memmgrSetAllocParameters_HAL(pGpu, pMemoryManager, pFbAllocInfo); 1263 1264 if (status != NV_OK) 1265 { 1266 // 1267 // Two possibilties: either some attribute was set to REQUIRED, ran out of resources, 1268 // or unaligned address / size was passed down. Free up memory and fail this call. 1269 // heapFree will fix up heap pointers. 1270 // 1271 goto failed; 1272 } 1273 1274 // 1275 // for fixed allocation check if the alignment needs to adjusted. 1276 // some hardware units request allocation aligned to smaller than 1277 // page sizes which can be handled through alignPad 1278 // 1279 if (pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE) 1280 { 1281 // 1282 // is our desired offset suitably aligned? 1283 // if not adjust alignment using alignPad(offset into a page), the 1284 // allocation is page size aligned as required for swizzling. 1285 // 1286 if (pFbAllocInfo->desiredOffset % (pFbAllocInfo->align + 1)) 1287 { 1288 pFbAllocInfo->alignPad = pFbAllocInfo->desiredOffset % (pFbAllocInfo->align + 1); 1289 pFbAllocInfo->desiredOffset -= pFbAllocInfo->alignPad; 1290 } 1291 } 1292 1293 // 1294 // Refresh search parameters. 1295 // 1296 pFbAllocInfo->adjustedSize = pFbAllocInfo->size - pFbAllocInfo->alignPad; 1297 pVidHeapAlloc->height = pFbAllocInfo->height; 1298 pVidHeapAlloc->pitch = pFbAllocInfo->pitch; 1299 1300 // 1301 // The api takes alignment-1 (used to be a mask). 1302 // 1303 alignment = pFbAllocInfo->align + 1; 1304 pVidHeapAlloc->alignment = pFbAllocInfo->align + 1; // convert mask to size 1305 1306 // 1307 // Allow caller to request host page alignment to make it easier 1308 // to move things around with host os VM subsystem 1309 // 1310 if ((pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_FORCE_ALIGN_HOST_PAGE) && 1311 (addrSpace == ADDR_FBMEM)) 1312 { 1313 OBJSYS *pSys = SYS_GET_INSTANCE(); 1314 NvU64 hostPageSize = pSys->cpuInfo.hostPageSize; 1315 1316 // hostPageSize *should* always be set, but.... 1317 if (hostPageSize == 0) 1318 hostPageSize = RM_PAGE_SIZE; 1319 1320 alignment = memUtilsLeastCommonAlignment(alignment, hostPageSize); 1321 } 1322 1323 pVidHeapAlloc->alignment = alignment; 1324 pFbAllocInfo->align = alignment - 1; 1325 1326 return status; 1327 1328 failed: 1329 if (bAllocedHwRes) 1330 { 1331 memmgrFreeHwResources(pGpu, pMemoryManager, pFbAllocInfo); 1332 } 1333 1334 return status; 1335 } 1336 1337 /*! 1338 * @brief This function is used to create a memory descriptor if needed. 1339 * 1340 * @param[in/out] pAllocRequest User-provided alloc request struct 1341 * @param[in/out] pFbAllocInfo Initialized FB_ALLOC_INFO struct to alloc 1342 * @param[out] ppMemDesc Double pointer to created descriptor 1343 * @param[in] pHeap Heap pointer to store in descriptor 1344 * @param[in] addrSpace Address space identifier 1345 * @param[in] memDescFlags Memory descriptor alloc flags 1346 * @param[out] bAllocedMemDesc NV_TRUE if a descriptor was created 1347 */ 1348 NV_STATUS 1349 memUtilsAllocMemDesc 1350 ( 1351 OBJGPU *pGpu, 1352 MEMORY_ALLOCATION_REQUEST *pAllocRequest, 1353 FB_ALLOC_INFO *pFbAllocInfo, 1354 MEMORY_DESCRIPTOR **ppMemDesc, 1355 Heap *pHeap, 1356 NV_ADDRESS_SPACE addrSpace, 1357 NvBool bContig, 1358 NvBool *bAllocedMemDesc 1359 ) 1360 { 1361 NV_STATUS status = NV_OK; 1362 1363 // 1364 // Allocate a memory descriptor if needed. We do this after the fbHwAllocResources() call 1365 // so we have the updated size information. Linear callers like memdescAlloc() can live with 1366 // only having access to the requested size in bytes, but block linear callers really do 1367 // need to allocate after fbAlloc() rounding takes place. 1368 // 1369 if (pAllocRequest->pMemDesc == NULL) 1370 { 1371 NvU64 memDescFlags = MEMDESC_FLAGS_SKIP_RESOURCE_COMPUTE; 1372 1373 // 1374 // Allocate a contig vidmem descriptor now; if needed we'll 1375 // allocate a new noncontig memdesc later 1376 // 1377 status = memdescCreate(&pAllocRequest->pMemDesc, pGpu, pFbAllocInfo->adjustedSize, 0, bContig, 1378 addrSpace, NV_MEMORY_UNCACHED, memDescFlags); 1379 1380 if (status != NV_OK) 1381 { 1382 NV_PRINTF(LEVEL_ERROR, "cannot alloc memDesc!\n"); 1383 return status; 1384 } 1385 1386 *bAllocedMemDesc = NV_TRUE; 1387 } 1388 1389 *ppMemDesc = pAllocRequest->pMemDesc; 1390 (*ppMemDesc)->pHeap = pHeap; 1391 1392 // Set attributes tracked by the memdesc 1393 memdescSetPteKind(*ppMemDesc, pFbAllocInfo->format); 1394 memdescSetHwResId(*ppMemDesc, pFbAllocInfo->hwResId); 1395 1396 return status; 1397 } 1398 1399 /*! 1400 * Memsets the memory for the given memory descriptor with the given value. 1401 * This function assumes that BAR2 is not yet available. Thus either the BAR0 1402 * window to FB or a memmap to SYSMEM will be used, depending on the memory 1403 * location. 1404 * 1405 * @param[in] pGpu GPU object pointer 1406 * @param[in] pMemDesc Memory descriptor for the memory to memset 1407 * @param[in] value Value to memset to. 1408 */ 1409 NV_STATUS 1410 memUtilsMemSetNoBAR2(OBJGPU *pGpu, PMEMORY_DESCRIPTOR pMemDesc, NvU8 value) 1411 { 1412 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu); 1413 NvU8 *pMap = NULL; 1414 void *pPriv = NULL; 1415 RmPhysAddr physAddr; 1416 RmPhysAddr physAddrOrig; 1417 NvU64 sizeInDWord; 1418 NvU32 sizeOfDWord = sizeof(NvU32); 1419 NvU32 bar0Addr; 1420 NvU32 i; 1421 1422 NV_ASSERT((pMemDesc != NULL) && 1423 (pMemDesc->Size & (sizeOfDWord-1)) == 0); 1424 sizeInDWord = pMemDesc->Size / sizeOfDWord; 1425 1426 // 1427 // BAR2 is not yet initialized. Thus use either the BAR0 window or 1428 // memmap to initialize the given surface. 1429 // 1430 NV_ASSERT(pKernelBus->virtualBar2[GPU_GFID_PF].pCpuMapping == NULL); 1431 switch (memdescGetAddressSpace(pMemDesc)) 1432 { 1433 case ADDR_FBMEM: 1434 if (KBUS_BAR0_PRAMIN_DISABLED(pGpu)) 1435 { 1436 NvU8 *pMap = kbusMapRmAperture_HAL(pGpu, pMemDesc); 1437 NV_ASSERT_OR_RETURN(pMap != NULL, NV_ERR_INSUFFICIENT_RESOURCES); 1438 portMemSet(pMap, value, pMemDesc->Size); 1439 kbusUnmapRmAperture_HAL(pGpu, pMemDesc, &pMap, NV_TRUE); 1440 1441 break; 1442 } 1443 // 1444 // Set the BAR0 window to encompass the given surface while 1445 // saving off the location to where the BAR0 window was 1446 // previously pointing. 1447 // 1448 physAddr = memdescGetPhysAddr(pMemDesc, AT_GPU, 0); 1449 NV_ASSERT((physAddr & (sizeOfDWord-1)) == 0); 1450 1451 physAddrOrig = kbusGetBAR0WindowVidOffset_HAL(pGpu, pKernelBus); 1452 NV_ASSERT_OK_OR_RETURN( 1453 kbusSetBAR0WindowVidOffset_HAL(pGpu, 1454 pKernelBus, 1455 physAddr & ~0xffffULL)); 1456 bar0Addr = 1457 NvU64_LO32(kbusGetBAR0WindowAddress_HAL(pKernelBus) + 1458 (physAddr - kbusGetBAR0WindowVidOffset_HAL(pGpu, pKernelBus))); 1459 1460 // 1461 // Iterate and initialize the given surface with BAR0 1462 // writes. 1463 // 1464 for (i = 0; i < sizeInDWord; i++) 1465 { 1466 GPU_REG_WR32(pGpu, 1467 bar0Addr + (sizeOfDWord * i), 1468 value); 1469 } 1470 1471 // 1472 // Restore where the BAR0 window was previously pointing 1473 // to. 1474 // 1475 NV_ASSERT_OK_OR_RETURN( 1476 kbusSetBAR0WindowVidOffset_HAL(pGpu, pKernelBus, physAddrOrig)); 1477 1478 break; 1479 1480 case ADDR_SYSMEM: 1481 // Plain old memmap. 1482 NV_ASSERT_OK_OR_RETURN( 1483 memdescMapOld(pMemDesc, 0, 1484 pMemDesc->Size, 1485 NV_TRUE, // kernel, 1486 NV_PROTECT_READ_WRITE, 1487 (void **)&pMap, 1488 &pPriv)); 1489 portMemSet(pMap, value, NvU64_LO32(pMemDesc->Size)); 1490 memdescUnmapOld(pMemDesc, 1, 0, pMap, pPriv); 1491 break; 1492 1493 default: 1494 // Should not happen. 1495 NV_ASSERT(0); 1496 break; 1497 } 1498 1499 return NV_OK; 1500 } 1501