1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 25 #include "core/core.h" 26 #include "gpu/ce/kernel_ce.h" 27 #include "gpu/bus/kern_bus.h" 28 #include "kernel/gpu/intr/intr.h" 29 #include "kernel/gpu/fifo/kernel_fifo.h" 30 #include "kernel/gpu/mig_mgr/kernel_mig_manager.h" 31 #include "kernel/gpu/mem_mgr/channel_utils.h" 32 #include "rmapi/rs_utils.h" 33 #include "utils/nvassert.h" 34 #include "core/prelude.h" 35 #include "core/locks.h" 36 #include "gpu/mem_mgr/ce_utils.h" 37 #include "gpu/subdevice/subdevice.h" 38 #include "kernel/gpu/mem_mgr/ce_utils_sizes.h" 39 40 #include "class/clb0b5.h" // MAXWELL_DMA_COPY_A 41 #include "class/clc0b5.h" // PASCAL_DMA_COPY_A 42 #include "class/clc1b5.h" // PASCAL_DMA_COPY_B 43 #include "class/clc3b5.h" // VOLTA_DMA_COPY_A 44 #include "class/clc5b5.h" // TURING_DMA_COPY_A 45 #include "class/clc8b5.h" // HOPPER_DMA_COPY_A 46 #include "class/clc86f.h" // HOPPER_CHANNEL_GPFIFO_A 47 48 #include "class/cl0080.h" 49 50 NV_STATUS 51 ceutilsConstruct_IMPL 52 ( 53 CeUtils *pCeUtils, 54 OBJGPU *pGpu, 55 NV0050_ALLOCATION_PARAMETERS *pParams 56 ) 57 { 58 NV_ASSERT_OR_RETURN(pGpu, NV_ERR_INVALID_STATE); 59 return ceutilsInitialize(pCeUtils, pGpu, pParams); 60 } 61 62 63 // This is used by internal callsites without resource server 64 NV_STATUS 65 ceutilsInitialize 66 ( 67 CeUtils *pCeUtils, 68 OBJGPU *pGpu, 69 NV0050_ALLOCATION_PARAMETERS *pAllocParams 70 ) 71 { 72 NV_STATUS status = NV_OK; 73 NvU64 allocFlags = pAllocParams->flags; 74 NV_ASSERT_OR_RETURN(pGpu, NV_ERR_INVALID_STATE); 75 76 NvBool bMIGInUse = IS_MIG_IN_USE(pGpu); 77 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 78 79 // Allocate channel with RM internal client 80 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 81 RmClient *pClient; 82 83 OBJCHANNEL *pChannel = (OBJCHANNEL *) portMemAllocNonPaged(sizeof(OBJCHANNEL)); 84 if (pChannel == NULL) 85 { 86 return NV_ERR_INSUFFICIENT_RESOURCES; 87 } 88 89 portMemSet(pChannel, 0, sizeof(OBJCHANNEL)); 90 91 if (pCeUtils->hClient == NV01_NULL_OBJECT) 92 { 93 // Allocate client 94 status = pRmApi->AllocWithHandle(pRmApi, NV01_NULL_OBJECT, NV01_NULL_OBJECT, 95 NV01_NULL_OBJECT, NV01_ROOT, &pCeUtils->hClient, 96 sizeof(pCeUtils->hClient)); 97 NV_ASSERT_OR_GOTO(status == NV_OK, cleanup); 98 } 99 100 pChannel->hClient = pCeUtils->hClient; 101 pClient = serverutilGetClientUnderLock(pChannel->hClient); 102 NV_ASSERT_OR_GOTO(pClient != NULL, free_client); 103 104 status = serverGetClientUnderLock(&g_resServ, pChannel->hClient, &pChannel->pRsClient); 105 NV_ASSERT_OR_GOTO(status == NV_OK, free_client); 106 107 status = clientSetHandleGenerator(staticCast(pClient, RsClient), 1U, ~0U - 1U); 108 NV_ASSERT_OR_GOTO(status == NV_OK, free_client); 109 110 pChannel->bClientAllocated = NV_TRUE; 111 pChannel->pGpu = pGpu; 112 113 pChannel->deviceId = pCeUtils->hDevice; 114 pChannel->subdeviceId = pCeUtils->hSubdevice; 115 116 // We'll allocate new VAS for now. Sharing client VAS will be added later 117 pChannel->hVASpaceId = NV01_NULL_OBJECT; 118 pChannel->bUseVasForCeCopy = FLD_TEST_DRF(0050_CEUTILS, _FLAGS, _VIRTUAL_MODE, _TRUE, allocFlags); 119 120 // Detect if we can enable fast scrub on this channel 121 status = memmgrMemUtilsGetCopyEngineClass_HAL(pGpu, pMemoryManager, &pCeUtils->hTdCopyClass); 122 NV_ASSERT_OR_GOTO(status == NV_OK, free_channel); 123 124 if (((pCeUtils->hTdCopyClass == HOPPER_DMA_COPY_A) 125 ) && !pChannel->bUseVasForCeCopy) 126 { 127 pChannel->type = FAST_SCRUBBER_CHANNEL; 128 NV_PRINTF(LEVEL_INFO, "Enabled fast scrubber in construct.\n"); 129 } 130 131 // Set up various channel resources 132 status = channelSetupIDs(pChannel, pGpu, pChannel->bUseVasForCeCopy, bMIGInUse); 133 NV_ASSERT_OR_GOTO(status == NV_OK, free_client); 134 135 channelSetupChannelBufferSizes(pChannel); 136 137 if (pCeUtils->pKernelMIGGPUInstance != NULL) 138 { 139 pChannel->pKernelMIGGpuInstance = pCeUtils->pKernelMIGGPUInstance; 140 } 141 142 status = memmgrMemUtilsChannelInitialize_HAL(pGpu, pMemoryManager, pChannel); 143 NV_ASSERT_OR_GOTO(status == NV_OK, free_channel); 144 145 NV_PRINTF(LEVEL_INFO, "Channel alloc successful for ceUtils\n"); 146 pCeUtils->pChannel = pChannel; 147 148 // Allocate CE states 149 status = memmgrMemUtilsCopyEngineInitialize_HAL(pGpu, pMemoryManager, pChannel); 150 NV_ASSERT_OR_GOTO(status == NV_OK, free_channel); 151 152 pCeUtils->pGpu = pGpu; 153 154 return status; 155 156 free_channel: 157 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->channelId); 158 159 if (pAllocParams->hVaspace != NV01_NULL_OBJECT) 160 { 161 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->hVASpaceId); 162 } 163 free_client: 164 if (FLD_TEST_DRF(0050_CEUTILS, _FLAGS, _EXTERNAL, _FALSE, allocFlags)) 165 { 166 // If client allocated client, we should not free it in RM 167 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->hClient); 168 } 169 170 cleanup: 171 portMemFree(pChannel); 172 return status; 173 } 174 175 void 176 ceutilsDeinit 177 ( 178 CeUtils *pCeUtils 179 ) 180 { 181 OBJCHANNEL *pChannel = pCeUtils->pChannel; 182 OBJGPU *pGpu = pCeUtils->pGpu; 183 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 184 185 // Sanity checks 186 if ((pGpu == NULL) || (pChannel == NULL)) 187 { 188 NV_PRINTF(LEVEL_WARNING, "Possible double-free of CeUtils!\n"); 189 return; 190 } 191 else if (pGpu != pChannel->pGpu) 192 { 193 NV_PRINTF(LEVEL_ERROR, "Bad state during ceUtils teardown!\n"); 194 return; 195 } 196 197 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->channelId); 198 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->errNotifierIdPhys); 199 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->pushBufferId); 200 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->errNotifierIdVirt); 201 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->hVASpaceId); 202 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->deviceId); 203 204 // Resource server makes sure no leak can occur 205 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->hClient); 206 portMemFree(pChannel); 207 } 208 209 void 210 ceutilsDestruct_IMPL 211 ( 212 CeUtils *pCeUtils 213 ) 214 { 215 ceutilsDeinit(pCeUtils); 216 } 217 218 void 219 ceutilsServiceInterrupts(CeUtils *pCeUtils) 220 { 221 OBJCHANNEL *pChannel = pCeUtils->pChannel; 222 223 // 224 // FIXME: Bug 2463959: objmemscrub is called with the rmDeviceGpuLock in the 225 // heapFree_IMPL->_stdmemPmaFree->pmaFreePages->scrubSubmitPages path. 226 // Yielding while holding the rmDeviceGpuLock can lead to deadlock. Instead, 227 // if the lock is held, service any interrupts on the owned CE to make progress. 228 // Bug 2527660 is filed to remove this change. 229 // 230 // pChannel is null when PMA scrub requests are handled in vGPU plugin. 231 // In this case vGpu plugin allocates scrubber channel in PF domain so 232 // above mention deadlock is not present here. 233 // 234 if ((pChannel != NULL) && (rmDeviceGpuLockIsOwner(pChannel->pGpu->gpuInstance))) 235 { 236 Intr *pIntr = GPU_GET_INTR(pChannel->pGpu); 237 intrServiceStallSingle_HAL(pChannel->pGpu, pIntr, MC_ENGINE_IDX_CE(pChannel->ceId), NV_FALSE); 238 } 239 else 240 { 241 osSchedule(); 242 } 243 } 244 245 246 static NvBool 247 _ceUtilsFastScrubEnabled 248 ( 249 POBJCHANNEL pChannel, 250 CHANNEL_PB_INFO *pChannelPbInfo 251 ) 252 { 253 OBJGPU *pGpu = pChannel->pGpu; 254 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 255 256 if (!memmgrIsFastScrubberEnabled(pMemoryManager)) 257 { 258 return NV_FALSE; 259 } 260 261 // 262 // Enable the memory fast scrubbing only when 263 // Channel was allocated as fastScrub channel 264 // We are doing a memset operation 265 // Memset pattern is 0 266 // DstPhysMode.target == LOCAL_FB 267 // Address is 4KB aligned 268 // LineLength is 4KB aligned 269 // 270 271 return ((pChannel->type == FAST_SCRUBBER_CHANNEL) && 272 (!pChannelPbInfo->bCeMemcopy) && 273 (pChannelPbInfo->pattern == 0) && 274 (pChannelPbInfo->dstAddressSpace == ADDR_FBMEM) && 275 (NV_IS_ALIGNED64(pChannelPbInfo->dstAddr, MEMUTIL_SCRUB_OFFSET_ALIGNMENT)) && 276 (NV_IS_ALIGNED(pChannelPbInfo->size, MEMUTIL_SCRUB_LINE_LENGTH_ALIGNMENT))); 277 } 278 279 280 // 281 // Helper to deal with CE_MAX_BYTES_PER_LINE 282 // This function may modify some fileds in pChannelPbInfo 283 // 284 static NV_STATUS 285 _ceutilsSubmitPushBuffer 286 ( 287 POBJCHANNEL pChannel, 288 NvU64 opLength, 289 CHANNEL_PB_INFO * pChannelPbInfo 290 291 ) 292 { 293 NV_STATUS status = NV_OK; 294 NvBool bFirstIteration = NV_TRUE; 295 NvBool bInsertFinishPayload = NV_FALSE; 296 NvU32 methodsLength, tempSize, putIndex = 0; 297 NvU64 remainingLength = opLength; 298 299 NV_ASSERT_OR_RETURN(pChannelPbInfo != NULL, NV_ERR_INVALID_ARGUMENT); 300 NV_ASSERT_OR_RETURN(pChannel != NULL, NV_ERR_INVALID_ARGUMENT); 301 NV_ASSERT_OR_RETURN(opLength != 0, NV_ERR_INVALID_ARGUMENT); 302 303 do 304 { 305 tempSize = (NvU32)NV_MIN(remainingLength, CE_MAX_BYTES_PER_LINE); 306 pChannelPbInfo->size = tempSize; 307 bInsertFinishPayload = (remainingLength == tempSize); 308 NV_PRINTF(LEVEL_INFO, "Actual size of copying to be pushed: %x \n", tempSize); 309 310 status = channelWaitForFreeEntry(pChannel, &putIndex); 311 if (status != NV_OK) 312 { 313 NV_PRINTF(LEVEL_ERROR, "Cannot get putIndex.\n"); 314 return status; 315 } 316 317 if (_ceUtilsFastScrubEnabled(pChannel, pChannelPbInfo)) 318 { 319 methodsLength = channelFillPbFastScrub(pChannel, putIndex, bFirstIteration, bInsertFinishPayload, pChannelPbInfo); 320 } 321 else 322 { 323 methodsLength = channelFillPb(pChannel, putIndex, bFirstIteration, bInsertFinishPayload, pChannelPbInfo); 324 } 325 if (methodsLength == 0) 326 { 327 NV_PRINTF(LEVEL_ERROR, "Cannot push methods to channel.\n"); 328 return NV_ERR_NO_FREE_FIFOS; 329 } 330 331 status = channelFillGpFifo(pChannel, putIndex, methodsLength); 332 if (status != NV_OK) 333 { 334 NV_PRINTF(LEVEL_ERROR, "Channel operation failures during memcopy\n"); 335 return status; 336 } 337 338 pChannel->lastSubmittedEntry = putIndex; 339 remainingLength -= tempSize; 340 341 pChannelPbInfo->dstAddr += tempSize; 342 pChannelPbInfo->srcAddr += tempSize; 343 344 bFirstIteration = NV_FALSE; 345 } while (remainingLength > 0); 346 347 return status; 348 } 349 350 351 NV_STATUS 352 ceutilsMemset_IMPL 353 ( 354 CeUtils *pCeUtils, 355 CEUTILS_MEMSET_PARAMS *pParams 356 ) 357 { 358 OBJCHANNEL *pChannel = pCeUtils->pChannel; 359 NV_STATUS status = NV_OK; 360 361 NvU32 pteArraySize; 362 NvU64 offset, memsetLength, size, pageGranularity; 363 NvBool bContiguous; 364 365 MEMORY_DESCRIPTOR *pMemDesc = pParams->pMemDesc; 366 CHANNEL_PB_INFO channelPbInfo = {0}; 367 368 if (pMemDesc == NULL) 369 { 370 NV_PRINTF(LEVEL_ERROR, "Invalid memdesc for CeUtils memset.\n"); 371 return NV_ERR_INVALID_ARGUMENT; 372 } 373 374 if ((memdescGetAddressSpace(pMemDesc) != ADDR_FBMEM) || 375 (pMemDesc->pGpu != pCeUtils->pChannel->pGpu)) 376 { 377 NV_PRINTF(LEVEL_ERROR, "Invalid memory descriptor passed.\n"); 378 return NV_ERR_INVALID_ARGUMENT; 379 } 380 381 size = memdescGetSize(pMemDesc); 382 pteArraySize = memdescGetPteArraySize(pMemDesc, AT_GPU); 383 bContiguous = (pMemDesc->_flags & MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS) || (pteArraySize == 1); 384 385 if (pParams->offset >= size) 386 { 387 NV_PRINTF(LEVEL_ERROR, "Invalid offset passed for the memdesc.\n"); 388 return NV_ERR_INVALID_ARGUMENT; 389 } 390 391 NV_PRINTF(LEVEL_INFO, "CeUtils Args to memset - offset: %llx, size: %llx \n", 392 pParams->offset, pParams->length); 393 394 if ((pParams->length == 0) || (pParams->length > (size - pParams->offset))) 395 { 396 NV_PRINTF(LEVEL_ERROR, "Invalid memset length passed.\n"); 397 return NV_ERR_INVALID_ARGUMENT; 398 } 399 400 channelPbInfo.bCeMemcopy = NV_FALSE; 401 channelPbInfo.payload = pCeUtils->lastSubmittedPayload + 1; 402 pCeUtils->lastSubmittedPayload = channelPbInfo.payload; 403 404 channelPbInfo.pattern = pParams->pattern; 405 channelPbInfo.dstAddressSpace = memdescGetAddressSpace(pMemDesc); 406 channelPbInfo.dstCpuCacheAttrib = pMemDesc->_cpuCacheAttrib; 407 408 pageGranularity = pMemDesc->pageArrayGranularity; 409 memsetLength = pParams->length; 410 offset = pParams->offset; 411 412 do 413 { 414 NvU64 maxContigSize = bContiguous ? memsetLength : (pageGranularity - offset % pageGranularity); 415 NvU64 memsetSizeContig = NV_MIN(memsetLength, maxContigSize); 416 417 channelPbInfo.dstAddr = memdescGetPhysAddr(pMemDesc, AT_GPU, offset); 418 419 NV_PRINTF(LEVEL_INFO, "CeUtils Memset dstAddr: %llx, size: %llx\n", 420 channelPbInfo.dstAddr, memsetSizeContig); 421 422 status = _ceutilsSubmitPushBuffer(pChannel, memsetSizeContig, &channelPbInfo); 423 if (status != NV_OK) 424 { 425 NV_PRINTF(LEVEL_ERROR, "Cannot submit push buffer for memset.\n"); 426 return status; 427 } 428 429 memsetLength -= memsetSizeContig; 430 offset += memsetSizeContig; 431 } while (memsetLength != 0); 432 433 if (pParams->flags & NV0050_CTRL_MEMSET_FLAGS_ASYNC) 434 { 435 NV_PRINTF(LEVEL_INFO, "Async memset payload returned: 0x%x\n", channelPbInfo.payload); 436 pParams->submittedWorkId = channelPbInfo.payload; 437 } 438 else 439 { 440 // Check semaProgress and then timeout 441 status = channelWaitForFinishPayload(pChannel, channelPbInfo.payload); 442 if (status == NV_OK) 443 { 444 NV_PRINTF(LEVEL_INFO, "Work was done from RM PoV lastSubmitted = 0x%x\n", channelPbInfo.payload); 445 } 446 } 447 448 return status; 449 } 450 451 NV_STATUS 452 ceutilsMemcopy_IMPL 453 ( 454 CeUtils *pCeUtils, 455 CEUTILS_MEMCOPY_PARAMS *pParams 456 ) 457 { 458 OBJCHANNEL *pChannel = pCeUtils->pChannel; 459 NV_STATUS status = NV_OK; 460 461 NvU64 srcSize, dstSize, copyLength, srcPageGranularity, dstPageGranularity; 462 NvBool bSrcContig, bDstContig; 463 464 CHANNEL_PB_INFO channelPbInfo = {0}; 465 MEMORY_DESCRIPTOR *pDstMemDesc = pParams->pDstMemDesc; 466 MEMORY_DESCRIPTOR *pSrcMemDesc = pParams->pSrcMemDesc; 467 468 NvU64 length = pParams->length; 469 NvU64 srcOffset = pParams->srcOffset; 470 NvU64 dstOffset = pParams->dstOffset; 471 472 // Validate params 473 if ((pSrcMemDesc == NULL) || (pDstMemDesc == NULL)) 474 { 475 NV_PRINTF(LEVEL_ERROR, "Src/Dst Memory descriptor should be valid.\n"); 476 return NV_ERR_INVALID_ARGUMENT; 477 } 478 479 if ((memdescGetAddressSpace(pSrcMemDesc) != ADDR_FBMEM) && 480 (memdescGetAddressSpace(pDstMemDesc) != ADDR_FBMEM)) 481 { 482 NV_PRINTF(LEVEL_ERROR, "Either Dst or Src memory should be in vidmem.\n"); 483 return NV_ERR_INVALID_ARGUMENT; 484 } 485 486 if ((pSrcMemDesc->pGpu != pCeUtils->pChannel->pGpu) || 487 (pDstMemDesc->pGpu != pCeUtils->pChannel->pGpu)) 488 { 489 NV_PRINTF(LEVEL_ERROR, "CeUtils does not support p2p copies right now. \n"); 490 return NV_ERR_INVALID_ARGUMENT; 491 } 492 493 srcSize = memdescGetSize(pSrcMemDesc); 494 dstSize = memdescGetSize(pDstMemDesc); 495 496 if ((srcOffset >= srcSize) || (dstOffset >= dstSize)) 497 { 498 NV_PRINTF(LEVEL_ERROR, "Invalid offset passed for the src/dst memdesc.\n"); 499 return NV_ERR_INVALID_ARGUMENT; 500 } 501 502 if ((length == 0) || 503 (srcOffset + length > srcSize) || (dstOffset + length > dstSize)) 504 { 505 NV_PRINTF(LEVEL_ERROR, "Invalid memcopy length.\n"); 506 return NV_ERR_INVALID_ARGUMENT; 507 } 508 509 channelPbInfo.bCeMemcopy = NV_TRUE; 510 channelPbInfo.payload = pCeUtils->lastSubmittedPayload + 1; 511 pCeUtils->lastSubmittedPayload = channelPbInfo.payload; 512 513 channelPbInfo.srcAddressSpace = memdescGetAddressSpace(pSrcMemDesc); 514 channelPbInfo.dstAddressSpace = memdescGetAddressSpace(pDstMemDesc); 515 516 channelPbInfo.srcCpuCacheAttrib = pSrcMemDesc->_cpuCacheAttrib; 517 channelPbInfo.dstCpuCacheAttrib = pDstMemDesc->_cpuCacheAttrib; 518 519 srcPageGranularity = pSrcMemDesc->pageArrayGranularity; 520 dstPageGranularity = pDstMemDesc->pageArrayGranularity; 521 bSrcContig = memdescGetContiguity(pSrcMemDesc, AT_GPU); 522 bDstContig = memdescGetContiguity(pDstMemDesc, AT_GPU); 523 524 copyLength = length; 525 526 do 527 { 528 // 529 // This algorithm finds the maximum contig region from both src and dst 530 // for each copy and iterate until we submitted the whole range to CE 531 // 532 NvU64 maxContigSizeSrc = bSrcContig ? copyLength : (srcPageGranularity - srcOffset % srcPageGranularity); 533 NvU64 maxContigSizeDst = bDstContig ? copyLength : (dstPageGranularity - dstOffset % dstPageGranularity); 534 NvU64 copySizeContig = NV_MIN(copyLength, NV_MIN(maxContigSizeSrc, maxContigSizeDst)); 535 536 channelPbInfo.srcAddr = memdescGetPhysAddr(pSrcMemDesc, AT_GPU, srcOffset); 537 channelPbInfo.dstAddr = memdescGetPhysAddr(pDstMemDesc, AT_GPU, dstOffset); 538 539 NV_PRINTF(LEVEL_INFO, "CeUtils Memcopy dstAddr: %llx, srcAddr: %llx, size: %llx\n", 540 channelPbInfo.dstAddr, channelPbInfo.srcAddr, copySizeContig); 541 542 status = _ceutilsSubmitPushBuffer(pChannel, copySizeContig, &channelPbInfo); 543 if (status != NV_OK) 544 { 545 NV_PRINTF(LEVEL_ERROR, "Cannot submit push buffer for memcopy.\n"); 546 return status; 547 } 548 549 copyLength -= copySizeContig; 550 srcOffset += copySizeContig; 551 dstOffset += copySizeContig; 552 } while (copyLength != 0); 553 554 if (pParams->flags & NV0050_CTRL_MEMSET_FLAGS_ASYNC) 555 { 556 NV_PRINTF(LEVEL_INFO, "Async memset payload returned: 0x%x\n", channelPbInfo.payload); 557 pParams->submittedWorkId = channelPbInfo.payload; 558 } 559 else 560 { 561 // Check semaProgress and then timeout 562 status = channelWaitForFinishPayload(pChannel, channelPbInfo.payload); 563 if (status == NV_OK) 564 { 565 NV_PRINTF(LEVEL_INFO, "Work was done from RM PoV lastSubmitted = 0x%x\n", channelPbInfo.payload); 566 } 567 } 568 569 return status; 570 } 571 572 573 // This function updates pCeUtils->lastCompletedPayload and handles wrap-around 574 NvU64 575 ceutilsUpdateProgress_IMPL 576 ( 577 CeUtils *pCeUtils 578 ) 579 { 580 NV_ASSERT((pCeUtils != NULL) && (pCeUtils->pChannel != NULL)); 581 582 NvU32 hwCurrentCompletedPayload = 0; 583 NvU64 swLastCompletedPayload = pCeUtils->lastCompletedPayload; 584 585 // 586 // CeUtils uses 64 bit index to track the work submitted. But HW supports 587 // only 32 bit semaphore. The current completed Id is calculated here, based 588 // on the lastSubmittedPayload and current HW semaphore value. 589 // 590 hwCurrentCompletedPayload = READ_CHANNEL_PAYLOAD_SEMA(pCeUtils->pChannel); 591 592 // No work has been completed since we checked last time 593 if (hwCurrentCompletedPayload == (NvU32)swLastCompletedPayload) 594 { 595 return swLastCompletedPayload; 596 } 597 598 // Check for wrap around case. Increment the upper 32 bits 599 if (hwCurrentCompletedPayload < (NvU32)swLastCompletedPayload) 600 { 601 swLastCompletedPayload += 0x100000000ULL; 602 } 603 604 // Update lower 32 bits regardless if wrap-around happened 605 swLastCompletedPayload &= 0xFFFFFFFF00000000ULL; 606 swLastCompletedPayload |= (NvU64)hwCurrentCompletedPayload; 607 608 pCeUtils->lastCompletedPayload = swLastCompletedPayload; 609 return swLastCompletedPayload; 610 } 611 612 613 void 614 ceutilsRegisterGPUInstance 615 ( 616 CeUtils *pCeUtils, 617 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGPUInstance 618 ) 619 { 620 pCeUtils->pKernelMIGGPUInstance = pKernelMIGGPUInstance; 621 } 622 623 #if defined(DEBUG) || defined (DEVELOP) 624 NV_STATUS 625 ceutilsapiCtrlCmdCheckProgress_IMPL 626 ( 627 CeUtilsApi *pCeUtilsApi, 628 NV0050_CTRL_CHECK_PROGRESS_PARAMS *pParams 629 ) 630 { 631 if (pParams->submittedWorkId <= ceutilsUpdateProgress(pCeUtilsApi->pCeUtils)) 632 { 633 pParams->result = NV0050_CTRL_CHECK_PROGRESS_RESULT_FINISHED; 634 } 635 636 return NV_OK; 637 } 638 639 NV_STATUS 640 ceutilsapiConstruct_IMPL 641 ( 642 CeUtilsApi *pCeUtilsApi, 643 CALL_CONTEXT *pCallContext, 644 RS_RES_ALLOC_PARAMS_INTERNAL *pParams 645 ) 646 { 647 NV0050_ALLOCATION_PARAMETERS *pAllocParams = pParams->pAllocParams; 648 649 if (FLD_TEST_DRF(0050_CEUTILS, _FLAGS, _EXTERNAL, _TRUE, pAllocParams->flags)) 650 { 651 NV_PRINTF(LEVEL_ERROR, "CeUtils: unsupported flags = 0x%llx\n", pAllocParams->flags); 652 return NV_ERR_NOT_SUPPORTED; 653 } 654 655 return objCreate(&pCeUtilsApi->pCeUtils, pCeUtilsApi, CeUtils, GPU_RES_GET_GPU(pCeUtilsApi), pAllocParams); 656 } 657 658 void 659 ceutilsapiDestruct_IMPL 660 ( 661 CeUtilsApi *pCeUtilsApi 662 ) 663 { 664 objDelete(pCeUtilsApi->pCeUtils); 665 } 666 667 NV_STATUS 668 ceutilsapiCtrlCmdMemset_IMPL 669 ( 670 CeUtilsApi *pCeUtilsApi, 671 NV0050_CTRL_MEMSET_PARAMS *pParams 672 ) 673 { 674 NV_STATUS status = NV_OK; 675 NvHandle hClient = RES_GET_CLIENT_HANDLE(pCeUtilsApi); 676 RsResourceRef *pPhysmemRef; 677 MEMORY_DESCRIPTOR *pMemDesc = NULL; 678 CEUTILS_MEMSET_PARAMS internalParams = {0}; 679 680 if (pParams->hMemory == 0) 681 { 682 return NV_ERR_INVALID_ARGUMENT; 683 } 684 685 status = serverutilGetResourceRef(hClient, pParams->hMemory, &pPhysmemRef); 686 if (status != NV_OK) 687 { 688 NV_PRINTF(LEVEL_ERROR, "Failed to get resource in resource server for physical memory handle.\n"); 689 return status; 690 } 691 pMemDesc = (dynamicCast(pPhysmemRef->pResource, Memory))->pMemDesc; 692 693 internalParams.pMemDesc = pMemDesc; 694 internalParams.offset = pParams->offset; 695 internalParams.length = pParams->length; 696 internalParams.pattern = pParams->pattern; 697 internalParams.flags = pParams->flags; 698 699 status = ceutilsMemset(pCeUtilsApi->pCeUtils, &internalParams); 700 if (status == NV_OK) 701 { 702 pParams->submittedWorkId = internalParams.submittedWorkId; 703 } 704 705 return status; 706 } 707 708 NV_STATUS 709 ceutilsapiCtrlCmdMemcopy_IMPL 710 ( 711 CeUtilsApi *pCeUtilsApi, 712 NV0050_CTRL_MEMCOPY_PARAMS *pParams 713 ) 714 { 715 NV_STATUS status = NV_OK; 716 NvHandle hClient = RES_GET_CLIENT_HANDLE(pCeUtilsApi); 717 RsResourceRef *pSrcPhysmemRef; 718 RsResourceRef *pDstPhysmemRef; 719 MEMORY_DESCRIPTOR *pSrcMemDesc = NULL; 720 MEMORY_DESCRIPTOR *pDstMemDesc = NULL; 721 CEUTILS_MEMCOPY_PARAMS internalParams = {0}; 722 723 if ((pParams->hSrcMemory == 0) || (pParams->hDstMemory == 0)) 724 { 725 return NV_ERR_INVALID_ARGUMENT; 726 } 727 728 status = serverutilGetResourceRef(hClient, pParams->hDstMemory, &pDstPhysmemRef); 729 if (status != NV_OK) 730 { 731 NV_PRINTF(LEVEL_ERROR, "Failed to get resource in resource server for physical memory handle.\n"); 732 return status; 733 } 734 pDstMemDesc = (dynamicCast(pDstPhysmemRef->pResource, Memory))->pMemDesc; 735 736 status = serverutilGetResourceRef(hClient, pParams->hSrcMemory, &pSrcPhysmemRef); 737 if (status != NV_OK) 738 { 739 NV_PRINTF(LEVEL_ERROR, "Failed to get resource in resource server for physical memory handle.\n"); 740 return status; 741 } 742 pSrcMemDesc = (dynamicCast(pSrcPhysmemRef->pResource, Memory))->pMemDesc; 743 744 internalParams.pSrcMemDesc = pSrcMemDesc; 745 internalParams.pDstMemDesc = pDstMemDesc; 746 internalParams.srcOffset = pParams->srcOffset; 747 internalParams.dstOffset = pParams->dstOffset; 748 internalParams.length = pParams->length; 749 internalParams.flags = pParams->flags; 750 751 status = ceutilsMemcopy(pCeUtilsApi->pCeUtils, &internalParams); 752 if (status == NV_OK) 753 { 754 pParams->submittedWorkId = internalParams.submittedWorkId; 755 } 756 757 return status; 758 } 759 #endif // defined(DEBUG) || defined (DEVELOP) 760