1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 25 #include "core/core.h" 26 #include "gpu/ce/kernel_ce.h" 27 #include "gpu/bus/kern_bus.h" 28 #include "kernel/gpu/intr/intr.h" 29 #include "kernel/gpu/fifo/kernel_fifo.h" 30 #include "kernel/gpu/mig_mgr/kernel_mig_manager.h" 31 #include "kernel/gpu/mem_mgr/channel_utils.h" 32 #include "rmapi/rs_utils.h" 33 #include "utils/nvassert.h" 34 #include "core/prelude.h" 35 #include "core/locks.h" 36 #include "gpu/mem_mgr/ce_utils.h" 37 #include "kernel/gpu/mem_mgr/ce_utils_sizes.h" 38 #include "vgpu/rpc_headers.h" 39 40 #include "class/clb0b5.h" // MAXWELL_DMA_COPY_A 41 #include "class/clc0b5.h" // PASCAL_DMA_COPY_A 42 #include "class/clc1b5.h" // PASCAL_DMA_COPY_B 43 #include "class/clc3b5.h" // VOLTA_DMA_COPY_A 44 #include "class/clc5b5.h" // TURING_DMA_COPY_A 45 #include "class/clc8b5.h" // HOPPER_DMA_COPY_A 46 #include "class/clc86f.h" // HOPPER_CHANNEL_GPFIFO_A 47 48 #include "class/cl0080.h" 49 50 NV_STATUS 51 ceutilsConstruct_IMPL 52 ( 53 CeUtils *pCeUtils, 54 OBJGPU *pGpu, 55 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGPUInstance, 56 NV0050_ALLOCATION_PARAMETERS *pAllocParams 57 ) 58 { 59 NV_STATUS status = NV_OK; 60 NvU64 allocFlags = pAllocParams->flags; 61 NV_ASSERT_OR_RETURN(pGpu, NV_ERR_INVALID_STATE); 62 63 NvBool bMIGInUse = IS_MIG_IN_USE(pGpu); 64 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 65 66 pCeUtils->pGpu = pGpu; 67 68 if (FLD_TEST_DRF(0050_CEUTILS, _FLAGS, _FIFO_LITE, _TRUE, allocFlags)) 69 { 70 return NV_ERR_NOT_SUPPORTED; 71 } 72 73 // Allocate channel with RM internal client 74 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 75 RmClient *pClient; 76 77 OBJCHANNEL *pChannel = (OBJCHANNEL *) portMemAllocNonPaged(sizeof(OBJCHANNEL)); 78 if (pChannel == NULL) 79 { 80 return NV_ERR_INSUFFICIENT_RESOURCES; 81 } 82 83 portMemSet(pChannel, 0, sizeof(OBJCHANNEL)); 84 85 if (pCeUtils->hClient == NV01_NULL_OBJECT) 86 { 87 // Allocate client 88 status = pRmApi->AllocWithHandle(pRmApi, NV01_NULL_OBJECT, NV01_NULL_OBJECT, 89 NV01_NULL_OBJECT, NV01_ROOT, &pCeUtils->hClient, 90 sizeof(pCeUtils->hClient)); 91 NV_ASSERT_OR_GOTO(status == NV_OK, cleanup); 92 } 93 94 pChannel->hClient = pCeUtils->hClient; 95 pClient = serverutilGetClientUnderLock(pChannel->hClient); 96 NV_ASSERT_OR_GOTO(pClient != NULL, free_client); 97 98 status = serverGetClientUnderLock(&g_resServ, pChannel->hClient, &pChannel->pRsClient); 99 NV_ASSERT_OR_GOTO(status == NV_OK, free_client); 100 101 if (IS_VIRTUAL(pGpu)) 102 { 103 NV_ASSERT_OK_OR_GOTO( 104 status, 105 clientSetHandleGenerator(staticCast(pClient, RsClient), RS_UNIQUE_HANDLE_BASE, 106 RS_UNIQUE_HANDLE_RANGE/2 - VGPU_RESERVED_HANDLE_RANGE), 107 free_client); 108 } 109 else 110 { 111 NV_ASSERT_OK_OR_GOTO( 112 status, 113 clientSetHandleGenerator(staticCast(pClient, RsClient), 1U, ~0U - 1U), 114 free_client); 115 } 116 117 pChannel->bClientAllocated = NV_TRUE; 118 pChannel->pGpu = pGpu; 119 120 pChannel->deviceId = pCeUtils->hDevice; 121 pChannel->subdeviceId = pCeUtils->hSubdevice; 122 123 pChannel->pKernelMIGGpuInstance = pKernelMIGGPUInstance; 124 125 // We'll allocate new VAS for now. Sharing client VAS will be added later 126 pChannel->hVASpaceId = NV01_NULL_OBJECT; 127 pChannel->bUseVasForCeCopy = FLD_TEST_DRF(0050_CEUTILS, _FLAGS, _VIRTUAL_MODE, _TRUE, allocFlags); 128 129 // Detect if we can enable fast scrub on this channel 130 status = memmgrMemUtilsGetCopyEngineClass_HAL(pGpu, pMemoryManager, &pCeUtils->hTdCopyClass); 131 NV_ASSERT_OR_GOTO(status == NV_OK, free_channel); 132 133 if (((pCeUtils->hTdCopyClass == HOPPER_DMA_COPY_A) 134 ) && !pChannel->bUseVasForCeCopy) 135 { 136 pChannel->type = FAST_SCRUBBER_CHANNEL; 137 NV_PRINTF(LEVEL_INFO, "Enabled fast scrubber in construct.\n"); 138 } 139 else 140 { 141 pChannel->type = CE_SCRUBBER_CHANNEL; 142 } 143 144 // For self-hosted Hopper, we can only use VA copy or faster scrubber 145 if (pMemoryManager->bCePhysicalVidmemAccessNotSupported) 146 { 147 if (!pChannel->bUseVasForCeCopy && 148 (pChannel->type != FAST_SCRUBBER_CHANNEL)) 149 { 150 status = NV_ERR_NOT_SUPPORTED; 151 goto free_channel; 152 } 153 } 154 155 // Set up various channel resources 156 status = channelSetupIDs(pChannel, pGpu, pChannel->bUseVasForCeCopy, bMIGInUse); 157 NV_ASSERT_OR_GOTO(status == NV_OK, free_client); 158 159 channelSetupChannelBufferSizes(pChannel); 160 161 status = memmgrMemUtilsChannelInitialize_HAL(pGpu, pMemoryManager, pChannel); 162 NV_ASSERT_OR_GOTO(status == NV_OK, free_channel); 163 164 NV_PRINTF(LEVEL_INFO, "Channel alloc successful for ceUtils\n"); 165 pCeUtils->pChannel = pChannel; 166 167 // Allocate CE states 168 status = memmgrMemUtilsCopyEngineInitialize_HAL(pGpu, pMemoryManager, pChannel); 169 NV_ASSERT_OR_GOTO(status == NV_OK, free_channel); 170 171 return status; 172 173 free_channel: 174 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->channelId); 175 176 if (pAllocParams->hVaspace != NV01_NULL_OBJECT) 177 { 178 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->hVASpaceId); 179 } 180 free_client: 181 if (FLD_TEST_DRF(0050_CEUTILS, _FLAGS, _EXTERNAL, _FALSE, allocFlags)) 182 { 183 // If client allocated client, we should not free it in RM 184 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->hClient); 185 } 186 187 cleanup: 188 portMemFree(pChannel); 189 return status; 190 } 191 192 void 193 ceutilsDestruct_IMPL 194 ( 195 CeUtils *pCeUtils 196 ) 197 { 198 OBJCHANNEL *pChannel = pCeUtils->pChannel; 199 OBJGPU *pGpu = pCeUtils->pGpu; 200 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 201 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 202 203 if ((pChannel->bClientUserd) && (pChannel->pControlGPFifo != NULL)) 204 { 205 if (kbusIsBarAccessBlocked(GPU_GET_KERNEL_BUS(pGpu))) 206 { 207 // 208 // When PCIE is blocked, mappings should be created, used and torn 209 // down when they are used 210 // 211 NV_PRINTF(LEVEL_ERROR, "Leaked USERD mapping from ceUtils!\n"); 212 } 213 else 214 { 215 memmgrMemDescEndTransfer(pMemoryManager, pChannel->pUserdMemdesc, TRANSFER_FLAGS_USE_BAR1); 216 pChannel->pControlGPFifo = NULL; 217 } 218 } 219 220 if (pChannel->pbCpuVA != NULL) 221 { 222 if (kbusIsBarAccessBlocked(GPU_GET_KERNEL_BUS(pGpu))) 223 { 224 NV_PRINTF(LEVEL_ERROR, "Leaked pushbuffer mapping!\n"); 225 } 226 else 227 { 228 memmgrMemDescEndTransfer(pMemoryManager, pChannel->pChannelBufferMemdesc, TRANSFER_FLAGS_USE_BAR1); 229 pChannel->pbCpuVA = NULL; 230 } 231 } 232 233 if (pChannel->pTokenFromNotifier != NULL) 234 { 235 if (kbusIsBarAccessBlocked(GPU_GET_KERNEL_BUS(pGpu))) 236 { 237 NV_PRINTF(LEVEL_ERROR, "Leaked notifier mapping!\n"); 238 } 239 else 240 { 241 memmgrMemDescEndTransfer(pMemoryManager, pChannel->pErrNotifierMemdesc, TRANSFER_FLAGS_USE_BAR1); 242 pChannel->pTokenFromNotifier = NULL; 243 } 244 } 245 246 // Resource server makes sure no leak can occur 247 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->hClient); 248 portMemFree(pChannel); 249 } 250 251 void 252 ceutilsServiceInterrupts_IMPL(CeUtils *pCeUtils) 253 { 254 OBJCHANNEL *pChannel = pCeUtils->pChannel; 255 256 // 257 // FIXME: Bug 2463959: objmemscrub is called with the rmDeviceGpuLock in the 258 // heapFree_IMPL->_stdmemPmaFree->pmaFreePages->scrubSubmitPages path. 259 // Yielding while holding the rmDeviceGpuLock can lead to deadlock. Instead, 260 // if the lock is held, service any interrupts on the owned CE to make progress. 261 // Bug 2527660 is filed to remove this change. 262 // 263 if (rmDeviceGpuLockIsOwner(pChannel->pGpu->gpuInstance)) 264 { 265 channelServiceScrubberInterrupts(pChannel); 266 } 267 else 268 { 269 osSchedule(); 270 } 271 } 272 273 274 static NvBool 275 _ceUtilsFastScrubEnabled 276 ( 277 OBJCHANNEL *pChannel, 278 CHANNEL_PB_INFO *pChannelPbInfo 279 ) 280 { 281 OBJGPU *pGpu = pChannel->pGpu; 282 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 283 284 if (!memmgrIsFastScrubberEnabled(pMemoryManager)) 285 { 286 return NV_FALSE; 287 } 288 289 // 290 // Enable the memory fast scrubbing only when 291 // Channel was allocated as fastScrub channel 292 // We are doing a memset operation 293 // Memset pattern is 0 294 // DstPhysMode.target == LOCAL_FB 295 // Address is 4KB aligned 296 // LineLength is 4KB aligned 297 // 298 299 return ((pChannel->type == FAST_SCRUBBER_CHANNEL) && 300 (!pChannelPbInfo->bCeMemcopy) && 301 (pChannelPbInfo->pattern == 0) && 302 (pChannelPbInfo->dstAddressSpace == ADDR_FBMEM) && 303 (NV_IS_ALIGNED64(pChannelPbInfo->dstAddr, MEMUTIL_SCRUB_OFFSET_ALIGNMENT)) && 304 (NV_IS_ALIGNED(pChannelPbInfo->size, MEMUTIL_SCRUB_LINE_LENGTH_ALIGNMENT))); 305 } 306 307 308 // 309 // Helper to deal with CE_MAX_BYTES_PER_LINE 310 // This function may modify some fileds in pChannelPbInfo 311 // 312 static NV_STATUS 313 _ceutilsSubmitPushBuffer 314 ( 315 OBJCHANNEL *pChannel, 316 NvBool bPipelined, 317 NvBool bInsertFinishPayload, 318 CHANNEL_PB_INFO * pChannelPbInfo 319 ) 320 { 321 NV_STATUS status = NV_OK; 322 NvU32 methodsLength, putIndex = 0; 323 324 NV_ASSERT_OR_RETURN(pChannelPbInfo != NULL, NV_ERR_INVALID_ARGUMENT); 325 NV_ASSERT_OR_RETURN(pChannel != NULL, NV_ERR_INVALID_ARGUMENT); 326 327 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pChannel->pGpu); 328 NvBool bReleaseMapping = NV_FALSE; 329 330 // 331 // Use BAR1 if CPU access is allowed, otherwise allocate and init shadow 332 // buffer for DMA access 333 // 334 NvU32 transferFlags = (TRANSFER_FLAGS_USE_BAR1 | 335 TRANSFER_FLAGS_SHADOW_ALLOC | 336 TRANSFER_FLAGS_SHADOW_INIT_MEM); 337 NV_PRINTF(LEVEL_INFO, "Actual size of copying to be pushed: %x\n", pChannelPbInfo->size); 338 339 status = channelWaitForFreeEntry(pChannel, &putIndex); 340 if (status != NV_OK) 341 { 342 NV_PRINTF(LEVEL_ERROR, "Cannot get putIndex.\n"); 343 return status; 344 } 345 346 if (pChannel->pbCpuVA == NULL) 347 { 348 pChannel->pbCpuVA = memmgrMemDescBeginTransfer(pMemoryManager, pChannel->pChannelBufferMemdesc, 349 transferFlags); 350 bReleaseMapping = NV_TRUE; 351 } 352 NV_ASSERT_OR_RETURN(pChannel->pbCpuVA != NULL, NV_ERR_GENERIC); 353 354 if (_ceUtilsFastScrubEnabled(pChannel, pChannelPbInfo)) 355 { 356 methodsLength = channelFillPbFastScrub(pChannel, putIndex, bPipelined, bInsertFinishPayload, pChannelPbInfo); 357 } 358 else 359 { 360 if (pMemoryManager->bCePhysicalVidmemAccessNotSupported) 361 { 362 // Self-hosted Hopper only supports VA copy or fast scrubber 363 NV_ASSERT_OR_RETURN(pChannel->bUseVasForCeCopy, NV_ERR_NOT_SUPPORTED); 364 } 365 366 methodsLength = channelFillCePb(pChannel, putIndex, bPipelined, bInsertFinishPayload, pChannelPbInfo); 367 } 368 369 if (bReleaseMapping) 370 { 371 memmgrMemDescEndTransfer(pMemoryManager, pChannel->pChannelBufferMemdesc, transferFlags); 372 pChannel->pbCpuVA = NULL; 373 } 374 375 if (methodsLength == 0) 376 { 377 NV_PRINTF(LEVEL_ERROR, "Cannot push methods to channel.\n"); 378 return NV_ERR_NO_FREE_FIFOS; 379 } 380 381 // 382 // Pushbuffer can be written in a batch, but GPFIFO and doorbell require 383 // careful ordering so we do each write one-by-one 384 // 385 status = channelFillGpFifo(pChannel, putIndex, methodsLength); 386 if (status != NV_OK) 387 { 388 NV_PRINTF(LEVEL_ERROR, "Channel operation failures during memcopy\n"); 389 return status; 390 } 391 392 pChannel->lastSubmittedEntry = putIndex; 393 394 return status; 395 } 396 397 398 NV_STATUS 399 ceutilsMemset_IMPL 400 ( 401 CeUtils *pCeUtils, 402 CEUTILS_MEMSET_PARAMS *pParams 403 ) 404 { 405 OBJCHANNEL *pChannel = pCeUtils->pChannel; 406 NV_STATUS status = NV_OK; 407 408 NvU32 pteArraySize; 409 NvU64 offset, memsetLength, size, pageGranularity; 410 NvBool bContiguous; 411 412 MEMORY_DESCRIPTOR *pMemDesc = pParams->pMemDesc; 413 CHANNEL_PB_INFO channelPbInfo = {0}; 414 415 NvBool bPipelined = pParams->flags & NV0050_CTRL_MEMSET_FLAGS_PIPELINED; 416 417 if (pMemDesc == NULL) 418 { 419 NV_PRINTF(LEVEL_ERROR, "Invalid memdesc for CeUtils memset.\n"); 420 return NV_ERR_INVALID_ARGUMENT; 421 } 422 423 if (pMemDesc->pGpu != pCeUtils->pChannel->pGpu) 424 { 425 NV_PRINTF(LEVEL_ERROR, "Invalid memory descriptor passed.\n"); 426 return NV_ERR_INVALID_ARGUMENT; 427 } 428 429 size = memdescGetSize(pMemDesc); 430 pteArraySize = memdescGetPteArraySize(pMemDesc, AT_GPU); 431 bContiguous = (pMemDesc->_flags & MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS) || (pteArraySize == 1); 432 433 if (pParams->offset >= size) 434 { 435 NV_PRINTF(LEVEL_ERROR, "Invalid offset passed for the memdesc.\n"); 436 return NV_ERR_INVALID_ARGUMENT; 437 } 438 439 NV_PRINTF(LEVEL_INFO, "CeUtils Args to memset - offset: %llx, size: %llx \n", 440 pParams->offset, pParams->length); 441 442 if ((pParams->length == 0) || (pParams->length > (size - pParams->offset))) 443 { 444 NV_PRINTF(LEVEL_ERROR, "Invalid memset length passed.\n"); 445 return NV_ERR_INVALID_ARGUMENT; 446 } 447 448 channelPbInfo.bCeMemcopy = NV_FALSE; 449 channelPbInfo.payload = pCeUtils->lastSubmittedPayload + 1; 450 pCeUtils->lastSubmittedPayload = channelPbInfo.payload; 451 452 channelPbInfo.pattern = pParams->pattern; 453 channelPbInfo.dstAddressSpace = memdescGetAddressSpace(pMemDesc); 454 channelPbInfo.dstCpuCacheAttrib = pMemDesc->_cpuCacheAttrib; 455 456 pageGranularity = pMemDesc->pageArrayGranularity; 457 memsetLength = pParams->length; 458 offset = pParams->offset; 459 460 do 461 { 462 NvU64 maxContigSize = bContiguous ? memsetLength : (pageGranularity - offset % pageGranularity); 463 NvU32 memsetSizeContig = (NvU32)NV_MIN(NV_MIN(memsetLength, maxContigSize), CE_MAX_BYTES_PER_LINE); 464 465 channelPbInfo.dstAddr = memdescGetPhysAddr(pMemDesc, AT_GPU, offset); 466 467 NV_PRINTF(LEVEL_INFO, "CeUtils Memset dstAddr: %llx, size: %x\n", 468 channelPbInfo.dstAddr, memsetSizeContig); 469 470 channelPbInfo.size = memsetSizeContig; 471 status = _ceutilsSubmitPushBuffer(pChannel, bPipelined, memsetSizeContig == memsetLength, &channelPbInfo); 472 if (status != NV_OK) 473 { 474 NV_PRINTF(LEVEL_ERROR, "Cannot submit push buffer for memset.\n"); 475 return status; 476 } 477 478 // Allow _LAUNCH_DMA methods that belong to the same memset operation to be pipelined after each other, as there are no dependencies 479 bPipelined = NV_TRUE; 480 481 memsetLength -= memsetSizeContig; 482 offset += memsetSizeContig; 483 } while (memsetLength != 0); 484 485 if (pParams->flags & NV0050_CTRL_MEMSET_FLAGS_ASYNC) 486 { 487 NV_PRINTF(LEVEL_INFO, "Async memset payload returned: 0x%x\n", channelPbInfo.payload); 488 pParams->submittedWorkId = channelPbInfo.payload; 489 } 490 else 491 { 492 // Check semaProgress and then timeout 493 status = channelWaitForFinishPayload(pChannel, channelPbInfo.payload); 494 if (status == NV_OK) 495 { 496 NV_PRINTF(LEVEL_INFO, "Work was done from RM PoV lastSubmitted = 0x%x\n", channelPbInfo.payload); 497 } 498 } 499 500 return status; 501 } 502 503 NV_STATUS 504 ceutilsMemcopy_IMPL 505 ( 506 CeUtils *pCeUtils, 507 CEUTILS_MEMCOPY_PARAMS *pParams 508 ) 509 { 510 OBJCHANNEL *pChannel = pCeUtils->pChannel; 511 NV_STATUS status = NV_OK; 512 513 NvU64 srcSize, dstSize, copyLength, srcPageGranularity, dstPageGranularity; 514 NvBool bSrcContig, bDstContig; 515 516 CHANNEL_PB_INFO channelPbInfo = {0}; 517 MEMORY_DESCRIPTOR *pDstMemDesc = pParams->pDstMemDesc; 518 MEMORY_DESCRIPTOR *pSrcMemDesc = pParams->pSrcMemDesc; 519 520 NvU64 length = pParams->length; 521 NvU64 srcOffset = pParams->srcOffset; 522 NvU64 dstOffset = pParams->dstOffset; 523 524 NvBool bPipelined = pParams->flags & NV0050_CTRL_MEMCOPY_FLAGS_PIPELINED; 525 526 // Validate params 527 if ((pSrcMemDesc == NULL) || (pDstMemDesc == NULL)) 528 { 529 NV_PRINTF(LEVEL_ERROR, "Src/Dst Memory descriptor should be valid.\n"); 530 return NV_ERR_INVALID_ARGUMENT; 531 } 532 533 if ((pSrcMemDesc->pGpu != pCeUtils->pChannel->pGpu) || 534 (pDstMemDesc->pGpu != pCeUtils->pChannel->pGpu)) 535 { 536 NV_PRINTF(LEVEL_ERROR, "CeUtils does not support p2p copies right now. \n"); 537 return NV_ERR_INVALID_ARGUMENT; 538 } 539 540 srcSize = memdescGetSize(pSrcMemDesc); 541 dstSize = memdescGetSize(pDstMemDesc); 542 543 if ((srcOffset >= srcSize) || (dstOffset >= dstSize)) 544 { 545 NV_PRINTF(LEVEL_ERROR, "Invalid offset passed for the src/dst memdesc.\n"); 546 return NV_ERR_INVALID_ARGUMENT; 547 } 548 549 if ((length == 0) || 550 (srcOffset + length > srcSize) || (dstOffset + length > dstSize)) 551 { 552 NV_PRINTF(LEVEL_ERROR, "Invalid memcopy length.\n"); 553 return NV_ERR_INVALID_ARGUMENT; 554 } 555 556 channelPbInfo.bCeMemcopy = NV_TRUE; 557 channelPbInfo.payload = pCeUtils->lastSubmittedPayload + 1; 558 pCeUtils->lastSubmittedPayload = channelPbInfo.payload; 559 560 channelPbInfo.srcAddressSpace = memdescGetAddressSpace(pSrcMemDesc); 561 channelPbInfo.dstAddressSpace = memdescGetAddressSpace(pDstMemDesc); 562 563 channelPbInfo.srcCpuCacheAttrib = pSrcMemDesc->_cpuCacheAttrib; 564 channelPbInfo.dstCpuCacheAttrib = pDstMemDesc->_cpuCacheAttrib; 565 566 srcPageGranularity = pSrcMemDesc->pageArrayGranularity; 567 dstPageGranularity = pDstMemDesc->pageArrayGranularity; 568 bSrcContig = memdescGetContiguity(pSrcMemDesc, AT_GPU); 569 bDstContig = memdescGetContiguity(pDstMemDesc, AT_GPU); 570 571 copyLength = length; 572 573 do 574 { 575 // 576 // This algorithm finds the maximum contig region from both src and dst 577 // for each copy and iterate until we submitted the whole range to CE 578 // 579 NvU64 maxContigSizeSrc = bSrcContig ? copyLength : (srcPageGranularity - srcOffset % srcPageGranularity); 580 NvU64 maxContigSizeDst = bDstContig ? copyLength : (dstPageGranularity - dstOffset % dstPageGranularity); 581 NvU32 copySizeContig = (NvU32)NV_MIN(NV_MIN(copyLength, NV_MIN(maxContigSizeSrc, maxContigSizeDst)), CE_MAX_BYTES_PER_LINE); 582 583 channelPbInfo.srcAddr = memdescGetPhysAddr(pSrcMemDesc, AT_GPU, srcOffset); 584 channelPbInfo.dstAddr = memdescGetPhysAddr(pDstMemDesc, AT_GPU, dstOffset); 585 586 NV_PRINTF(LEVEL_INFO, "CeUtils Memcopy dstAddr: %llx, srcAddr: %llx, size: %x\n", 587 channelPbInfo.dstAddr, channelPbInfo.srcAddr, copySizeContig); 588 589 channelPbInfo.size = copySizeContig; 590 status = _ceutilsSubmitPushBuffer(pChannel, bPipelined, copySizeContig == copyLength, &channelPbInfo); 591 if (status != NV_OK) 592 { 593 NV_PRINTF(LEVEL_ERROR, "Cannot submit push buffer for memcopy.\n"); 594 return status; 595 } 596 597 // Allow _LAUNCH_DMA methods that belong to the same copy operation to be pipelined after each other, as there are no dependencies 598 bPipelined = NV_TRUE; 599 600 copyLength -= copySizeContig; 601 srcOffset += copySizeContig; 602 dstOffset += copySizeContig; 603 } while (copyLength != 0); 604 605 if (pParams->flags & NV0050_CTRL_MEMSET_FLAGS_ASYNC) 606 { 607 NV_PRINTF(LEVEL_INFO, "Async memset payload returned: 0x%x\n", channelPbInfo.payload); 608 pParams->submittedWorkId = channelPbInfo.payload; 609 } 610 else 611 { 612 // Check semaProgress and then timeout 613 status = channelWaitForFinishPayload(pChannel, channelPbInfo.payload); 614 if (status == NV_OK) 615 { 616 NV_PRINTF(LEVEL_INFO, "Work was done from RM PoV lastSubmitted = 0x%x\n", channelPbInfo.payload); 617 } 618 } 619 620 return status; 621 } 622 623 624 // This function updates pCeUtils->lastCompletedPayload and handles wrap-around 625 NvU64 626 ceutilsUpdateProgress_IMPL 627 ( 628 CeUtils *pCeUtils 629 ) 630 { 631 NV_ASSERT((pCeUtils != NULL) && (pCeUtils->pChannel != NULL)); 632 633 NvU32 hwCurrentCompletedPayload = 0; 634 NvU64 swLastCompletedPayload = pCeUtils->lastCompletedPayload; 635 636 // 637 // CeUtils uses 64 bit index to track the work submitted. But HW supports 638 // only 32 bit semaphore. The current completed Id is calculated here, based 639 // on the lastSubmittedPayload and current HW semaphore value. 640 // 641 hwCurrentCompletedPayload = READ_CHANNEL_PAYLOAD_SEMA(pCeUtils->pChannel); 642 643 // No work has been completed since we checked last time 644 if (hwCurrentCompletedPayload == (NvU32)swLastCompletedPayload) 645 { 646 return swLastCompletedPayload; 647 } 648 649 // Check for wrap around case. Increment the upper 32 bits 650 if (hwCurrentCompletedPayload < (NvU32)swLastCompletedPayload) 651 { 652 swLastCompletedPayload += 0x100000000ULL; 653 } 654 655 // Update lower 32 bits regardless if wrap-around happened 656 swLastCompletedPayload &= 0xFFFFFFFF00000000ULL; 657 swLastCompletedPayload |= (NvU64)hwCurrentCompletedPayload; 658 659 pCeUtils->lastCompletedPayload = swLastCompletedPayload; 660 return swLastCompletedPayload; 661 } 662 663 NV_STATUS 664 ceutilsapiCtrlCmdCheckProgress_IMPL 665 ( 666 CeUtilsApi *pCeUtilsApi, 667 NV0050_CTRL_CHECK_PROGRESS_PARAMS *pParams 668 ) 669 { 670 if (pParams->submittedWorkId <= ceutilsUpdateProgress(pCeUtilsApi->pCeUtils)) 671 { 672 pParams->result = NV0050_CTRL_CHECK_PROGRESS_RESULT_FINISHED; 673 } 674 675 return NV_OK; 676 } 677 678 NV_STATUS 679 ceutilsapiConstruct_IMPL 680 ( 681 CeUtilsApi *pCeUtilsApi, 682 CALL_CONTEXT *pCallContext, 683 RS_RES_ALLOC_PARAMS_INTERNAL *pParams 684 ) 685 { 686 NV0050_ALLOCATION_PARAMETERS *pAllocParams = pParams->pAllocParams; 687 688 if (FLD_TEST_DRF(0050_CEUTILS, _FLAGS, _EXTERNAL, _TRUE, pAllocParams->flags)) 689 { 690 NV_PRINTF(LEVEL_ERROR, "CeUtils: unsupported flags = 0x%llx\n", pAllocParams->flags); 691 return NV_ERR_NOT_SUPPORTED; 692 } 693 694 return objCreate(&pCeUtilsApi->pCeUtils, pCeUtilsApi, CeUtils, GPU_RES_GET_GPU(pCeUtilsApi), NULL, pAllocParams); 695 } 696 697 void 698 ceutilsapiDestruct_IMPL 699 ( 700 CeUtilsApi *pCeUtilsApi 701 ) 702 { 703 objDelete(pCeUtilsApi->pCeUtils); 704 } 705 706 NV_STATUS 707 ceutilsapiCtrlCmdMemset_IMPL 708 ( 709 CeUtilsApi *pCeUtilsApi, 710 NV0050_CTRL_MEMSET_PARAMS *pParams 711 ) 712 { 713 NV_STATUS status = NV_OK; 714 NvHandle hClient = RES_GET_CLIENT_HANDLE(pCeUtilsApi); 715 RsResourceRef *pPhysmemRef; 716 MEMORY_DESCRIPTOR *pMemDesc = NULL; 717 CEUTILS_MEMSET_PARAMS internalParams = {0}; 718 719 if (pParams->hMemory == 0) 720 { 721 return NV_ERR_INVALID_ARGUMENT; 722 } 723 724 status = serverutilGetResourceRef(hClient, pParams->hMemory, &pPhysmemRef); 725 if (status != NV_OK) 726 { 727 NV_PRINTF(LEVEL_ERROR, "Failed to get resource in resource server for physical memory handle.\n"); 728 return status; 729 } 730 pMemDesc = (dynamicCast(pPhysmemRef->pResource, Memory))->pMemDesc; 731 732 internalParams.pMemDesc = pMemDesc; 733 internalParams.offset = pParams->offset; 734 internalParams.length = pParams->length; 735 internalParams.pattern = pParams->pattern; 736 internalParams.flags = pParams->flags; 737 738 status = ceutilsMemset(pCeUtilsApi->pCeUtils, &internalParams); 739 if (status == NV_OK) 740 { 741 pParams->submittedWorkId = internalParams.submittedWorkId; 742 } 743 744 return status; 745 } 746 747 NV_STATUS 748 ceutilsapiCtrlCmdMemcopy_IMPL 749 ( 750 CeUtilsApi *pCeUtilsApi, 751 NV0050_CTRL_MEMCOPY_PARAMS *pParams 752 ) 753 { 754 NV_STATUS status = NV_OK; 755 NvHandle hClient = RES_GET_CLIENT_HANDLE(pCeUtilsApi); 756 RsResourceRef *pSrcPhysmemRef; 757 RsResourceRef *pDstPhysmemRef; 758 MEMORY_DESCRIPTOR *pSrcMemDesc = NULL; 759 MEMORY_DESCRIPTOR *pDstMemDesc = NULL; 760 CEUTILS_MEMCOPY_PARAMS internalParams = {0}; 761 762 if ((pParams->hSrcMemory == 0) || (pParams->hDstMemory == 0)) 763 { 764 return NV_ERR_INVALID_ARGUMENT; 765 } 766 767 status = serverutilGetResourceRef(hClient, pParams->hDstMemory, &pDstPhysmemRef); 768 if (status != NV_OK) 769 { 770 NV_PRINTF(LEVEL_ERROR, "Failed to get resource in resource server for physical memory handle.\n"); 771 return status; 772 } 773 pDstMemDesc = (dynamicCast(pDstPhysmemRef->pResource, Memory))->pMemDesc; 774 775 status = serverutilGetResourceRef(hClient, pParams->hSrcMemory, &pSrcPhysmemRef); 776 if (status != NV_OK) 777 { 778 NV_PRINTF(LEVEL_ERROR, "Failed to get resource in resource server for physical memory handle.\n"); 779 return status; 780 } 781 pSrcMemDesc = (dynamicCast(pSrcPhysmemRef->pResource, Memory))->pMemDesc; 782 783 internalParams.pSrcMemDesc = pSrcMemDesc; 784 internalParams.pDstMemDesc = pDstMemDesc; 785 internalParams.srcOffset = pParams->srcOffset; 786 internalParams.dstOffset = pParams->dstOffset; 787 internalParams.length = pParams->length; 788 internalParams.flags = pParams->flags; 789 790 status = ceutilsMemcopy(pCeUtilsApi->pCeUtils, &internalParams); 791 if (status == NV_OK) 792 { 793 pParams->submittedWorkId = internalParams.submittedWorkId; 794 } 795 796 return status; 797 } 798