1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 25 #include "core/core.h" 26 #include "gpu/ce/kernel_ce.h" 27 #include "gpu/bus/kern_bus.h" 28 #include "kernel/gpu/intr/intr.h" 29 #include "kernel/gpu/fifo/kernel_fifo.h" 30 #include "kernel/gpu/mig_mgr/kernel_mig_manager.h" 31 #include "kernel/gpu/mem_mgr/channel_utils.h" 32 #include "rmapi/rs_utils.h" 33 #include "utils/nvassert.h" 34 #include "core/prelude.h" 35 #include "core/locks.h" 36 #include "gpu/mem_mgr/ce_utils.h" 37 #include "gpu/subdevice/subdevice.h" 38 #include "kernel/gpu/mem_mgr/ce_utils_sizes.h" 39 40 #include "class/clb0b5.h" // MAXWELL_DMA_COPY_A 41 #include "class/clc0b5.h" // PASCAL_DMA_COPY_A 42 #include "class/clc1b5.h" // PASCAL_DMA_COPY_B 43 #include "class/clc3b5.h" // VOLTA_DMA_COPY_A 44 #include "class/clc5b5.h" // TURING_DMA_COPY_A 45 #include "class/clc8b5.h" // HOPPER_DMA_COPY_A 46 #include "class/clc86f.h" // HOPPER_CHANNEL_GPFIFO_A 47 48 #include "class/cl0080.h" 49 50 NV_STATUS 51 ceutilsConstruct_IMPL 52 ( 53 CeUtils *pCeUtils, 54 OBJGPU *pGpu, 55 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGPUInstance, 56 NV0050_ALLOCATION_PARAMETERS *pAllocParams 57 ) 58 { 59 NV_STATUS status = NV_OK; 60 NvU64 allocFlags = pAllocParams->flags; 61 NV_ASSERT_OR_RETURN(pGpu, NV_ERR_INVALID_STATE); 62 63 NvBool bMIGInUse = IS_MIG_IN_USE(pGpu); 64 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 65 66 // Allocate channel with RM internal client 67 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 68 RmClient *pClient; 69 70 OBJCHANNEL *pChannel = (OBJCHANNEL *) portMemAllocNonPaged(sizeof(OBJCHANNEL)); 71 if (pChannel == NULL) 72 { 73 return NV_ERR_INSUFFICIENT_RESOURCES; 74 } 75 76 portMemSet(pChannel, 0, sizeof(OBJCHANNEL)); 77 78 if (pCeUtils->hClient == NV01_NULL_OBJECT) 79 { 80 // Allocate client 81 status = pRmApi->AllocWithHandle(pRmApi, NV01_NULL_OBJECT, NV01_NULL_OBJECT, 82 NV01_NULL_OBJECT, NV01_ROOT, &pCeUtils->hClient, 83 sizeof(pCeUtils->hClient)); 84 NV_ASSERT_OR_GOTO(status == NV_OK, cleanup); 85 } 86 87 pChannel->hClient = pCeUtils->hClient; 88 pClient = serverutilGetClientUnderLock(pChannel->hClient); 89 NV_ASSERT_OR_GOTO(pClient != NULL, free_client); 90 91 status = serverGetClientUnderLock(&g_resServ, pChannel->hClient, &pChannel->pRsClient); 92 NV_ASSERT_OR_GOTO(status == NV_OK, free_client); 93 94 status = clientSetHandleGenerator(staticCast(pClient, RsClient), 1U, ~0U - 1U); 95 NV_ASSERT_OR_GOTO(status == NV_OK, free_client); 96 97 pChannel->bClientAllocated = NV_TRUE; 98 pChannel->pGpu = pGpu; 99 100 pChannel->deviceId = pCeUtils->hDevice; 101 pChannel->subdeviceId = pCeUtils->hSubdevice; 102 103 pChannel->pKernelMIGGpuInstance = pKernelMIGGPUInstance; 104 105 // We'll allocate new VAS for now. Sharing client VAS will be added later 106 pChannel->hVASpaceId = NV01_NULL_OBJECT; 107 pChannel->bUseVasForCeCopy = FLD_TEST_DRF(0050_CEUTILS, _FLAGS, _VIRTUAL_MODE, _TRUE, allocFlags); 108 109 // Detect if we can enable fast scrub on this channel 110 status = memmgrMemUtilsGetCopyEngineClass_HAL(pGpu, pMemoryManager, &pCeUtils->hTdCopyClass); 111 NV_ASSERT_OR_GOTO(status == NV_OK, free_channel); 112 113 if (((pCeUtils->hTdCopyClass == HOPPER_DMA_COPY_A) 114 ) && !pChannel->bUseVasForCeCopy) 115 { 116 pChannel->type = FAST_SCRUBBER_CHANNEL; 117 NV_PRINTF(LEVEL_INFO, "Enabled fast scrubber in construct.\n"); 118 } 119 else 120 { 121 pChannel->type = CE_SCRUBBER_CHANNEL; 122 } 123 124 // Set up various channel resources 125 status = channelSetupIDs(pChannel, pGpu, pChannel->bUseVasForCeCopy, bMIGInUse); 126 NV_ASSERT_OR_GOTO(status == NV_OK, free_client); 127 128 channelSetupChannelBufferSizes(pChannel); 129 130 status = memmgrMemUtilsChannelInitialize_HAL(pGpu, pMemoryManager, pChannel); 131 NV_ASSERT_OR_GOTO(status == NV_OK, free_channel); 132 133 NV_PRINTF(LEVEL_INFO, "Channel alloc successful for ceUtils\n"); 134 pCeUtils->pChannel = pChannel; 135 136 // Allocate CE states 137 status = memmgrMemUtilsCopyEngineInitialize_HAL(pGpu, pMemoryManager, pChannel); 138 NV_ASSERT_OR_GOTO(status == NV_OK, free_channel); 139 140 pCeUtils->pGpu = pGpu; 141 142 return status; 143 144 free_channel: 145 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->channelId); 146 147 if (pAllocParams->hVaspace != NV01_NULL_OBJECT) 148 { 149 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->hVASpaceId); 150 } 151 free_client: 152 if (FLD_TEST_DRF(0050_CEUTILS, _FLAGS, _EXTERNAL, _FALSE, allocFlags)) 153 { 154 // If client allocated client, we should not free it in RM 155 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->hClient); 156 } 157 158 cleanup: 159 portMemFree(pChannel); 160 return status; 161 } 162 163 void 164 ceutilsDestruct_IMPL 165 ( 166 CeUtils *pCeUtils 167 ) 168 { 169 OBJCHANNEL *pChannel = pCeUtils->pChannel; 170 OBJGPU *pGpu = pCeUtils->pGpu; 171 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 172 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 173 174 // Sanity checks 175 if ((pGpu == NULL) || (pChannel == NULL)) 176 { 177 NV_PRINTF(LEVEL_WARNING, "Possible double-free of CeUtils!\n"); 178 return; 179 } 180 else if (pGpu != pChannel->pGpu) 181 { 182 NV_PRINTF(LEVEL_ERROR, "Bad state during ceUtils teardown!\n"); 183 return; 184 } 185 186 if ((pChannel->bClientUserd) && (pChannel->pControlGPFifo != NULL)) 187 { 188 if (kbusIsBarAccessBlocked(GPU_GET_KERNEL_BUS(pGpu))) 189 { 190 // 191 // When PCIE is blocked, mappings should be created, used and torn 192 // down when they are used 193 // 194 NV_PRINTF(LEVEL_ERROR, "Leaked USERD mapping from ceUtils!\n"); 195 } 196 else 197 { 198 memmgrMemDescEndTransfer(pMemoryManager, pChannel->pUserdMemdesc, TRANSFER_FLAGS_USE_BAR1); 199 pChannel->pControlGPFifo = NULL; 200 } 201 } 202 203 if (pChannel->pbCpuVA != NULL) 204 { 205 if (kbusIsBarAccessBlocked(GPU_GET_KERNEL_BUS(pGpu))) 206 { 207 NV_PRINTF(LEVEL_ERROR, "Leaked pushbuffer mapping!\n"); 208 } 209 else 210 { 211 memmgrMemDescEndTransfer(pMemoryManager, pChannel->pChannelBufferMemdesc, TRANSFER_FLAGS_USE_BAR1); 212 pChannel->pbCpuVA = NULL; 213 } 214 } 215 216 if (pChannel->pTokenFromNotifier != NULL) 217 { 218 if (kbusIsBarAccessBlocked(GPU_GET_KERNEL_BUS(pGpu))) 219 { 220 NV_PRINTF(LEVEL_ERROR, "Leaked notifier mapping!\n"); 221 } 222 else 223 { 224 memmgrMemDescEndTransfer(pMemoryManager, pChannel->pErrNotifierMemdesc, TRANSFER_FLAGS_USE_BAR1); 225 pChannel->pTokenFromNotifier = NULL; 226 } 227 } 228 229 // Resource server makes sure no leak can occur 230 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->hClient); 231 portMemFree(pChannel); 232 } 233 234 void 235 ceutilsServiceInterrupts_IMPL(CeUtils *pCeUtils) 236 { 237 OBJCHANNEL *pChannel = pCeUtils->pChannel; 238 239 // 240 // FIXME: Bug 2463959: objmemscrub is called with the rmDeviceGpuLock in the 241 // heapFree_IMPL->_stdmemPmaFree->pmaFreePages->scrubSubmitPages path. 242 // Yielding while holding the rmDeviceGpuLock can lead to deadlock. Instead, 243 // if the lock is held, service any interrupts on the owned CE to make progress. 244 // Bug 2527660 is filed to remove this change. 245 // 246 // pChannel is null when PMA scrub requests are handled in vGPU plugin. 247 // In this case vGpu plugin allocates scrubber channel in PF domain so 248 // above mention deadlock is not present here. 249 // 250 if ((pChannel != NULL) && (rmDeviceGpuLockIsOwner(pChannel->pGpu->gpuInstance))) 251 { 252 channelServiceScrubberInterrupts(pChannel); 253 } 254 else 255 { 256 osSchedule(); 257 } 258 } 259 260 261 static NvBool 262 _ceUtilsFastScrubEnabled 263 ( 264 POBJCHANNEL pChannel, 265 CHANNEL_PB_INFO *pChannelPbInfo 266 ) 267 { 268 OBJGPU *pGpu = pChannel->pGpu; 269 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 270 271 if (!memmgrIsFastScrubberEnabled(pMemoryManager)) 272 { 273 return NV_FALSE; 274 } 275 276 // 277 // Enable the memory fast scrubbing only when 278 // Channel was allocated as fastScrub channel 279 // We are doing a memset operation 280 // Memset pattern is 0 281 // DstPhysMode.target == LOCAL_FB 282 // Address is 4KB aligned 283 // LineLength is 4KB aligned 284 // 285 286 return ((pChannel->type == FAST_SCRUBBER_CHANNEL) && 287 (!pChannelPbInfo->bCeMemcopy) && 288 (pChannelPbInfo->pattern == 0) && 289 (pChannelPbInfo->dstAddressSpace == ADDR_FBMEM) && 290 (NV_IS_ALIGNED64(pChannelPbInfo->dstAddr, MEMUTIL_SCRUB_OFFSET_ALIGNMENT)) && 291 (NV_IS_ALIGNED(pChannelPbInfo->size, MEMUTIL_SCRUB_LINE_LENGTH_ALIGNMENT))); 292 } 293 294 295 // 296 // Helper to deal with CE_MAX_BYTES_PER_LINE 297 // This function may modify some fileds in pChannelPbInfo 298 // 299 static NV_STATUS 300 _ceutilsSubmitPushBuffer 301 ( 302 POBJCHANNEL pChannel, 303 NvBool bPipelined, 304 NvBool bInsertFinishPayload, 305 CHANNEL_PB_INFO * pChannelPbInfo 306 ) 307 { 308 NV_STATUS status = NV_OK; 309 NvU32 methodsLength, putIndex = 0; 310 311 NV_ASSERT_OR_RETURN(pChannelPbInfo != NULL, NV_ERR_INVALID_ARGUMENT); 312 NV_ASSERT_OR_RETURN(pChannel != NULL, NV_ERR_INVALID_ARGUMENT); 313 314 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pChannel->pGpu); 315 NvBool bReleaseMapping = NV_FALSE; 316 317 // 318 // Use BAR1 if CPU access is allowed, otherwise allocate and init shadow 319 // buffer for DMA access 320 // 321 NvU32 transferFlags = (TRANSFER_FLAGS_USE_BAR1 | 322 TRANSFER_FLAGS_SHADOW_ALLOC | 323 TRANSFER_FLAGS_SHADOW_INIT_MEM); 324 NV_PRINTF(LEVEL_INFO, "Actual size of copying to be pushed: %x\n", pChannelPbInfo->size); 325 326 status = channelWaitForFreeEntry(pChannel, &putIndex); 327 if (status != NV_OK) 328 { 329 NV_PRINTF(LEVEL_ERROR, "Cannot get putIndex.\n"); 330 return status; 331 } 332 333 if (pChannel->pbCpuVA == NULL) 334 { 335 pChannel->pbCpuVA = memmgrMemDescBeginTransfer(pMemoryManager, pChannel->pChannelBufferMemdesc, 336 transferFlags); 337 bReleaseMapping = NV_TRUE; 338 } 339 NV_ASSERT_OR_RETURN(pChannel->pbCpuVA != NULL, NV_ERR_GENERIC); 340 341 if (_ceUtilsFastScrubEnabled(pChannel, pChannelPbInfo)) 342 { 343 methodsLength = channelFillPbFastScrub(pChannel, putIndex, bPipelined, bInsertFinishPayload, pChannelPbInfo); 344 } 345 else 346 { 347 methodsLength = channelFillCePb(pChannel, putIndex, bPipelined, bInsertFinishPayload, pChannelPbInfo); 348 } 349 350 if (bReleaseMapping) 351 { 352 memmgrMemDescEndTransfer(pMemoryManager, pChannel->pChannelBufferMemdesc, transferFlags); 353 pChannel->pbCpuVA = NULL; 354 } 355 356 if (methodsLength == 0) 357 { 358 NV_PRINTF(LEVEL_ERROR, "Cannot push methods to channel.\n"); 359 return NV_ERR_NO_FREE_FIFOS; 360 } 361 362 // 363 // Pushbuffer can be written in a batch, but GPFIFO and doorbell require 364 // careful ordering so we do each write one-by-one 365 // 366 status = channelFillGpFifo(pChannel, putIndex, methodsLength); 367 if (status != NV_OK) 368 { 369 NV_PRINTF(LEVEL_ERROR, "Channel operation failures during memcopy\n"); 370 return status; 371 } 372 373 pChannel->lastSubmittedEntry = putIndex; 374 375 return status; 376 } 377 378 379 NV_STATUS 380 ceutilsMemset_IMPL 381 ( 382 CeUtils *pCeUtils, 383 CEUTILS_MEMSET_PARAMS *pParams 384 ) 385 { 386 OBJCHANNEL *pChannel = pCeUtils->pChannel; 387 NV_STATUS status = NV_OK; 388 389 NvU32 pteArraySize; 390 NvU64 offset, memsetLength, size, pageGranularity; 391 NvBool bContiguous; 392 393 MEMORY_DESCRIPTOR *pMemDesc = pParams->pMemDesc; 394 CHANNEL_PB_INFO channelPbInfo = {0}; 395 396 NvBool bPipelined = pParams->flags & NV0050_CTRL_MEMSET_FLAGS_PIPELINED; 397 398 if (pMemDesc == NULL) 399 { 400 NV_PRINTF(LEVEL_ERROR, "Invalid memdesc for CeUtils memset.\n"); 401 return NV_ERR_INVALID_ARGUMENT; 402 } 403 404 if ((memdescGetAddressSpace(pMemDesc) != ADDR_FBMEM) || 405 (pMemDesc->pGpu != pCeUtils->pChannel->pGpu)) 406 { 407 NV_PRINTF(LEVEL_ERROR, "Invalid memory descriptor passed.\n"); 408 return NV_ERR_INVALID_ARGUMENT; 409 } 410 411 size = memdescGetSize(pMemDesc); 412 pteArraySize = memdescGetPteArraySize(pMemDesc, AT_GPU); 413 bContiguous = (pMemDesc->_flags & MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS) || (pteArraySize == 1); 414 415 if (pParams->offset >= size) 416 { 417 NV_PRINTF(LEVEL_ERROR, "Invalid offset passed for the memdesc.\n"); 418 return NV_ERR_INVALID_ARGUMENT; 419 } 420 421 NV_PRINTF(LEVEL_INFO, "CeUtils Args to memset - offset: %llx, size: %llx \n", 422 pParams->offset, pParams->length); 423 424 if ((pParams->length == 0) || (pParams->length > (size - pParams->offset))) 425 { 426 NV_PRINTF(LEVEL_ERROR, "Invalid memset length passed.\n"); 427 return NV_ERR_INVALID_ARGUMENT; 428 } 429 430 channelPbInfo.bCeMemcopy = NV_FALSE; 431 channelPbInfo.payload = pCeUtils->lastSubmittedPayload + 1; 432 pCeUtils->lastSubmittedPayload = channelPbInfo.payload; 433 434 channelPbInfo.pattern = pParams->pattern; 435 channelPbInfo.dstAddressSpace = memdescGetAddressSpace(pMemDesc); 436 channelPbInfo.dstCpuCacheAttrib = pMemDesc->_cpuCacheAttrib; 437 438 pageGranularity = pMemDesc->pageArrayGranularity; 439 memsetLength = pParams->length; 440 offset = pParams->offset; 441 442 do 443 { 444 NvU64 maxContigSize = bContiguous ? memsetLength : (pageGranularity - offset % pageGranularity); 445 NvU32 memsetSizeContig = (NvU32)NV_MIN(NV_MIN(memsetLength, maxContigSize), CE_MAX_BYTES_PER_LINE); 446 447 channelPbInfo.dstAddr = memdescGetPhysAddr(pMemDesc, AT_GPU, offset); 448 449 NV_PRINTF(LEVEL_INFO, "CeUtils Memset dstAddr: %llx, size: %x\n", 450 channelPbInfo.dstAddr, memsetSizeContig); 451 452 channelPbInfo.size = memsetSizeContig; 453 status = _ceutilsSubmitPushBuffer(pChannel, bPipelined, memsetSizeContig == memsetLength, &channelPbInfo); 454 if (status != NV_OK) 455 { 456 NV_PRINTF(LEVEL_ERROR, "Cannot submit push buffer for memset.\n"); 457 return status; 458 } 459 460 // Allow _LAUNCH_DMA methods that belong to the same memset operation to be pipelined after each other, as there are no dependencies 461 bPipelined = NV_TRUE; 462 463 memsetLength -= memsetSizeContig; 464 offset += memsetSizeContig; 465 } while (memsetLength != 0); 466 467 if (pParams->flags & NV0050_CTRL_MEMSET_FLAGS_ASYNC) 468 { 469 NV_PRINTF(LEVEL_INFO, "Async memset payload returned: 0x%x\n", channelPbInfo.payload); 470 pParams->submittedWorkId = channelPbInfo.payload; 471 } 472 else 473 { 474 // Check semaProgress and then timeout 475 status = channelWaitForFinishPayload(pChannel, channelPbInfo.payload); 476 if (status == NV_OK) 477 { 478 NV_PRINTF(LEVEL_INFO, "Work was done from RM PoV lastSubmitted = 0x%x\n", channelPbInfo.payload); 479 } 480 } 481 482 return status; 483 } 484 485 NV_STATUS 486 ceutilsMemcopy_IMPL 487 ( 488 CeUtils *pCeUtils, 489 CEUTILS_MEMCOPY_PARAMS *pParams 490 ) 491 { 492 OBJCHANNEL *pChannel = pCeUtils->pChannel; 493 NV_STATUS status = NV_OK; 494 495 NvU64 srcSize, dstSize, copyLength, srcPageGranularity, dstPageGranularity; 496 NvBool bSrcContig, bDstContig; 497 498 CHANNEL_PB_INFO channelPbInfo = {0}; 499 MEMORY_DESCRIPTOR *pDstMemDesc = pParams->pDstMemDesc; 500 MEMORY_DESCRIPTOR *pSrcMemDesc = pParams->pSrcMemDesc; 501 502 NvU64 length = pParams->length; 503 NvU64 srcOffset = pParams->srcOffset; 504 NvU64 dstOffset = pParams->dstOffset; 505 506 NvBool bPipelined = pParams->flags & NV0050_CTRL_MEMCOPY_FLAGS_PIPELINED; 507 508 // Validate params 509 if ((pSrcMemDesc == NULL) || (pDstMemDesc == NULL)) 510 { 511 NV_PRINTF(LEVEL_ERROR, "Src/Dst Memory descriptor should be valid.\n"); 512 return NV_ERR_INVALID_ARGUMENT; 513 } 514 515 if ((memdescGetAddressSpace(pSrcMemDesc) != ADDR_FBMEM) && 516 (memdescGetAddressSpace(pDstMemDesc) != ADDR_FBMEM)) 517 { 518 NV_PRINTF(LEVEL_ERROR, "Either Dst or Src memory should be in vidmem.\n"); 519 return NV_ERR_INVALID_ARGUMENT; 520 } 521 522 if ((pSrcMemDesc->pGpu != pCeUtils->pChannel->pGpu) || 523 (pDstMemDesc->pGpu != pCeUtils->pChannel->pGpu)) 524 { 525 NV_PRINTF(LEVEL_ERROR, "CeUtils does not support p2p copies right now. \n"); 526 return NV_ERR_INVALID_ARGUMENT; 527 } 528 529 srcSize = memdescGetSize(pSrcMemDesc); 530 dstSize = memdescGetSize(pDstMemDesc); 531 532 if ((srcOffset >= srcSize) || (dstOffset >= dstSize)) 533 { 534 NV_PRINTF(LEVEL_ERROR, "Invalid offset passed for the src/dst memdesc.\n"); 535 return NV_ERR_INVALID_ARGUMENT; 536 } 537 538 if ((length == 0) || 539 (srcOffset + length > srcSize) || (dstOffset + length > dstSize)) 540 { 541 NV_PRINTF(LEVEL_ERROR, "Invalid memcopy length.\n"); 542 return NV_ERR_INVALID_ARGUMENT; 543 } 544 545 channelPbInfo.bCeMemcopy = NV_TRUE; 546 channelPbInfo.payload = pCeUtils->lastSubmittedPayload + 1; 547 pCeUtils->lastSubmittedPayload = channelPbInfo.payload; 548 549 channelPbInfo.srcAddressSpace = memdescGetAddressSpace(pSrcMemDesc); 550 channelPbInfo.dstAddressSpace = memdescGetAddressSpace(pDstMemDesc); 551 552 channelPbInfo.srcCpuCacheAttrib = pSrcMemDesc->_cpuCacheAttrib; 553 channelPbInfo.dstCpuCacheAttrib = pDstMemDesc->_cpuCacheAttrib; 554 555 srcPageGranularity = pSrcMemDesc->pageArrayGranularity; 556 dstPageGranularity = pDstMemDesc->pageArrayGranularity; 557 bSrcContig = memdescGetContiguity(pSrcMemDesc, AT_GPU); 558 bDstContig = memdescGetContiguity(pDstMemDesc, AT_GPU); 559 560 copyLength = length; 561 562 do 563 { 564 // 565 // This algorithm finds the maximum contig region from both src and dst 566 // for each copy and iterate until we submitted the whole range to CE 567 // 568 NvU64 maxContigSizeSrc = bSrcContig ? copyLength : (srcPageGranularity - srcOffset % srcPageGranularity); 569 NvU64 maxContigSizeDst = bDstContig ? copyLength : (dstPageGranularity - dstOffset % dstPageGranularity); 570 NvU32 copySizeContig = (NvU32)NV_MIN(NV_MIN(copyLength, NV_MIN(maxContigSizeSrc, maxContigSizeDst)), CE_MAX_BYTES_PER_LINE); 571 572 channelPbInfo.srcAddr = memdescGetPhysAddr(pSrcMemDesc, AT_GPU, srcOffset); 573 channelPbInfo.dstAddr = memdescGetPhysAddr(pDstMemDesc, AT_GPU, dstOffset); 574 575 NV_PRINTF(LEVEL_INFO, "CeUtils Memcopy dstAddr: %llx, srcAddr: %llx, size: %x\n", 576 channelPbInfo.dstAddr, channelPbInfo.srcAddr, copySizeContig); 577 578 channelPbInfo.size = copySizeContig; 579 status = _ceutilsSubmitPushBuffer(pChannel, bPipelined, copySizeContig == copyLength, &channelPbInfo); 580 if (status != NV_OK) 581 { 582 NV_PRINTF(LEVEL_ERROR, "Cannot submit push buffer for memcopy.\n"); 583 return status; 584 } 585 586 // Allow _LAUNCH_DMA methods that belong to the same copy operation to be pipelined after each other, as there are no dependencies 587 bPipelined = NV_TRUE; 588 589 copyLength -= copySizeContig; 590 srcOffset += copySizeContig; 591 dstOffset += copySizeContig; 592 } while (copyLength != 0); 593 594 if (pParams->flags & NV0050_CTRL_MEMSET_FLAGS_ASYNC) 595 { 596 NV_PRINTF(LEVEL_INFO, "Async memset payload returned: 0x%x\n", channelPbInfo.payload); 597 pParams->submittedWorkId = channelPbInfo.payload; 598 } 599 else 600 { 601 // Check semaProgress and then timeout 602 status = channelWaitForFinishPayload(pChannel, channelPbInfo.payload); 603 if (status == NV_OK) 604 { 605 NV_PRINTF(LEVEL_INFO, "Work was done from RM PoV lastSubmitted = 0x%x\n", channelPbInfo.payload); 606 } 607 } 608 609 return status; 610 } 611 612 613 // This function updates pCeUtils->lastCompletedPayload and handles wrap-around 614 NvU64 615 ceutilsUpdateProgress_IMPL 616 ( 617 CeUtils *pCeUtils 618 ) 619 { 620 NV_ASSERT((pCeUtils != NULL) && (pCeUtils->pChannel != NULL)); 621 622 NvU32 hwCurrentCompletedPayload = 0; 623 NvU64 swLastCompletedPayload = pCeUtils->lastCompletedPayload; 624 625 // 626 // CeUtils uses 64 bit index to track the work submitted. But HW supports 627 // only 32 bit semaphore. The current completed Id is calculated here, based 628 // on the lastSubmittedPayload and current HW semaphore value. 629 // 630 hwCurrentCompletedPayload = READ_CHANNEL_PAYLOAD_SEMA(pCeUtils->pChannel); 631 632 // No work has been completed since we checked last time 633 if (hwCurrentCompletedPayload == (NvU32)swLastCompletedPayload) 634 { 635 return swLastCompletedPayload; 636 } 637 638 // Check for wrap around case. Increment the upper 32 bits 639 if (hwCurrentCompletedPayload < (NvU32)swLastCompletedPayload) 640 { 641 swLastCompletedPayload += 0x100000000ULL; 642 } 643 644 // Update lower 32 bits regardless if wrap-around happened 645 swLastCompletedPayload &= 0xFFFFFFFF00000000ULL; 646 swLastCompletedPayload |= (NvU64)hwCurrentCompletedPayload; 647 648 pCeUtils->lastCompletedPayload = swLastCompletedPayload; 649 return swLastCompletedPayload; 650 } 651 652 #if defined(DEBUG) || defined (DEVELOP) 653 NV_STATUS 654 ceutilsapiCtrlCmdCheckProgress_IMPL 655 ( 656 CeUtilsApi *pCeUtilsApi, 657 NV0050_CTRL_CHECK_PROGRESS_PARAMS *pParams 658 ) 659 { 660 if (pParams->submittedWorkId <= ceutilsUpdateProgress(pCeUtilsApi->pCeUtils)) 661 { 662 pParams->result = NV0050_CTRL_CHECK_PROGRESS_RESULT_FINISHED; 663 } 664 665 return NV_OK; 666 } 667 668 NV_STATUS 669 ceutilsapiConstruct_IMPL 670 ( 671 CeUtilsApi *pCeUtilsApi, 672 CALL_CONTEXT *pCallContext, 673 RS_RES_ALLOC_PARAMS_INTERNAL *pParams 674 ) 675 { 676 NV0050_ALLOCATION_PARAMETERS *pAllocParams = pParams->pAllocParams; 677 678 if (FLD_TEST_DRF(0050_CEUTILS, _FLAGS, _EXTERNAL, _TRUE, pAllocParams->flags)) 679 { 680 NV_PRINTF(LEVEL_ERROR, "CeUtils: unsupported flags = 0x%llx\n", pAllocParams->flags); 681 return NV_ERR_NOT_SUPPORTED; 682 } 683 684 return objCreate(&pCeUtilsApi->pCeUtils, pCeUtilsApi, CeUtils, GPU_RES_GET_GPU(pCeUtilsApi), NULL, pAllocParams); 685 } 686 687 void 688 ceutilsapiDestruct_IMPL 689 ( 690 CeUtilsApi *pCeUtilsApi 691 ) 692 { 693 objDelete(pCeUtilsApi->pCeUtils); 694 } 695 696 NV_STATUS 697 ceutilsapiCtrlCmdMemset_IMPL 698 ( 699 CeUtilsApi *pCeUtilsApi, 700 NV0050_CTRL_MEMSET_PARAMS *pParams 701 ) 702 { 703 NV_STATUS status = NV_OK; 704 NvHandle hClient = RES_GET_CLIENT_HANDLE(pCeUtilsApi); 705 RsResourceRef *pPhysmemRef; 706 MEMORY_DESCRIPTOR *pMemDesc = NULL; 707 CEUTILS_MEMSET_PARAMS internalParams = {0}; 708 709 if (pParams->hMemory == 0) 710 { 711 return NV_ERR_INVALID_ARGUMENT; 712 } 713 714 status = serverutilGetResourceRef(hClient, pParams->hMemory, &pPhysmemRef); 715 if (status != NV_OK) 716 { 717 NV_PRINTF(LEVEL_ERROR, "Failed to get resource in resource server for physical memory handle.\n"); 718 return status; 719 } 720 pMemDesc = (dynamicCast(pPhysmemRef->pResource, Memory))->pMemDesc; 721 722 internalParams.pMemDesc = pMemDesc; 723 internalParams.offset = pParams->offset; 724 internalParams.length = pParams->length; 725 internalParams.pattern = pParams->pattern; 726 internalParams.flags = pParams->flags; 727 728 status = ceutilsMemset(pCeUtilsApi->pCeUtils, &internalParams); 729 if (status == NV_OK) 730 { 731 pParams->submittedWorkId = internalParams.submittedWorkId; 732 } 733 734 return status; 735 } 736 737 NV_STATUS 738 ceutilsapiCtrlCmdMemcopy_IMPL 739 ( 740 CeUtilsApi *pCeUtilsApi, 741 NV0050_CTRL_MEMCOPY_PARAMS *pParams 742 ) 743 { 744 NV_STATUS status = NV_OK; 745 NvHandle hClient = RES_GET_CLIENT_HANDLE(pCeUtilsApi); 746 RsResourceRef *pSrcPhysmemRef; 747 RsResourceRef *pDstPhysmemRef; 748 MEMORY_DESCRIPTOR *pSrcMemDesc = NULL; 749 MEMORY_DESCRIPTOR *pDstMemDesc = NULL; 750 CEUTILS_MEMCOPY_PARAMS internalParams = {0}; 751 752 if ((pParams->hSrcMemory == 0) || (pParams->hDstMemory == 0)) 753 { 754 return NV_ERR_INVALID_ARGUMENT; 755 } 756 757 status = serverutilGetResourceRef(hClient, pParams->hDstMemory, &pDstPhysmemRef); 758 if (status != NV_OK) 759 { 760 NV_PRINTF(LEVEL_ERROR, "Failed to get resource in resource server for physical memory handle.\n"); 761 return status; 762 } 763 pDstMemDesc = (dynamicCast(pDstPhysmemRef->pResource, Memory))->pMemDesc; 764 765 status = serverutilGetResourceRef(hClient, pParams->hSrcMemory, &pSrcPhysmemRef); 766 if (status != NV_OK) 767 { 768 NV_PRINTF(LEVEL_ERROR, "Failed to get resource in resource server for physical memory handle.\n"); 769 return status; 770 } 771 pSrcMemDesc = (dynamicCast(pSrcPhysmemRef->pResource, Memory))->pMemDesc; 772 773 internalParams.pSrcMemDesc = pSrcMemDesc; 774 internalParams.pDstMemDesc = pDstMemDesc; 775 internalParams.srcOffset = pParams->srcOffset; 776 internalParams.dstOffset = pParams->dstOffset; 777 internalParams.length = pParams->length; 778 internalParams.flags = pParams->flags; 779 780 status = ceutilsMemcopy(pCeUtilsApi->pCeUtils, &internalParams); 781 if (status == NV_OK) 782 { 783 pParams->submittedWorkId = internalParams.submittedWorkId; 784 } 785 786 return status; 787 } 788 #endif // defined(DEBUG) || defined (DEVELOP) 789