1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 25 #include "core/core.h" 26 #include "kernel/gpu/intr/intr.h" 27 #include "kernel/gpu/mem_mgr/channel_utils.h" 28 #include "rmapi/rs_utils.h" 29 #include "utils/nvassert.h" 30 #include "core/prelude.h" 31 #include "core/locks.h" 32 #include "gpu/mem_mgr/sec2_utils.h" 33 #include "kernel/gpu/mem_mgr/ce_utils_sizes.h" 34 #include "kernel/gpu/mig_mgr/kernel_mig_manager.h" 35 #include "nvrm_registry.h" 36 #include "platform/chipset/chipset.h" 37 #include "gpu/mem_mgr/heap.h" 38 39 40 #include "class/clcba2.h" // HOPPER_SEC2_WORK_LAUNCH_A 41 #include "class/cl003e.h" // NV01_MEMORY_SYSTEM 42 #include "class/cl50a0.h" // NV50_MEMORY_VIRTUAL 43 44 #include "class/cl0080.h" 45 46 #include "gpu/conf_compute/conf_compute.h" 47 #include "gpu/conf_compute/ccsl.h" 48 49 static NV_STATUS 50 _sec2GetClass(OBJGPU *pGpu, NvU32 *pClass) 51 { 52 NV_STATUS status; 53 NvU32 numClasses = 0; 54 NvU32 *pClassList = NULL; 55 NvU32 class = 0; 56 57 NV_ASSERT_OR_RETURN(pClass != NULL, NV_ERR_INVALID_ARGUMENT); 58 NV_ASSERT_OK_OR_RETURN(gpuGetClassList(pGpu, &numClasses, NULL, ENG_SEC2)); 59 NV_ASSERT_OR_RETURN((numClasses != 0), NV_ERR_NOT_SUPPORTED); 60 61 pClassList = portMemAllocNonPaged(sizeof(*pClassList) * numClasses); 62 NV_ASSERT_OR_RETURN((pClassList != NULL), NV_ERR_INSUFFICIENT_RESOURCES); 63 64 status = gpuGetClassList(pGpu, &numClasses, pClassList, ENG_SEC2); 65 if (status == NV_OK) 66 { 67 for (NvU32 i = 0; i < numClasses; i++) 68 { 69 class = NV_MAX(class, pClassList[i]); 70 } 71 if (class == 0) 72 { 73 status = NV_ERR_INVALID_STATE; 74 } 75 *pClass = class; 76 } 77 portMemFree(pClassList); 78 return status; 79 } 80 81 static NV_STATUS 82 _sec2AllocAndMapBuffer 83 ( 84 Sec2Utils *pSec2Utils, 85 NvU32 size, 86 SEC2UTILS_BUFFER_INFO *pSec2Buf 87 ) 88 { 89 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 90 NV_MEMORY_ALLOCATION_PARAMS memAllocParams; 91 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pSec2Utils->pGpu); 92 93 pSec2Buf->size = size; 94 95 // allocate the physmem 96 portMemSet(&memAllocParams, 0, sizeof(memAllocParams)); 97 memAllocParams.owner = HEAP_OWNER_RM_CLIENT_GENERIC; 98 memAllocParams.type = NVOS32_TYPE_IMAGE; 99 memAllocParams.size = pSec2Buf->size; 100 memAllocParams.attr = DRF_DEF(OS32, _ATTR, _LOCATION, _PCI) | 101 DRF_DEF(OS32, _ATTR, _COHERENCY, _UNCACHED); 102 memAllocParams.attr2 = DRF_DEF(OS32, _ATTR2, _MEMORY_PROTECTION, _UNPROTECTED); 103 memAllocParams.flags = 0; 104 memAllocParams.internalflags = NVOS32_ALLOC_INTERNAL_FLAGS_SKIP_SCRUB; 105 106 NV_CHECK_OK_OR_RETURN( 107 LEVEL_ERROR, 108 pRmApi->AllocWithHandle(pRmApi, 109 pSec2Utils->hClient, 110 pSec2Utils->hDevice, 111 pSec2Buf->hPhysMem, 112 NV01_MEMORY_SYSTEM, 113 &memAllocParams, 114 sizeof(memAllocParams))); 115 116 // allocate Virtual Memory 117 portMemSet(&memAllocParams, 0, sizeof(memAllocParams)); 118 memAllocParams.owner = HEAP_OWNER_RM_CLIENT_GENERIC; 119 memAllocParams.type = NVOS32_TYPE_IMAGE; 120 memAllocParams.size = pSec2Buf->size; 121 memAllocParams.attr = DRF_DEF(OS32, _ATTR, _LOCATION, _PCI); 122 memAllocParams.attr2 = NVOS32_ATTR2_NONE; 123 memAllocParams.flags |= NVOS32_ALLOC_FLAGS_VIRTUAL; 124 memAllocParams.hVASpace = pSec2Buf->hVASpace; 125 126 NV_CHECK_OK_OR_RETURN( 127 LEVEL_ERROR, 128 pRmApi->AllocWithHandle(pRmApi, 129 pSec2Utils->hClient, 130 pSec2Utils->hDevice, 131 pSec2Buf->hVirtMem, 132 NV50_MEMORY_VIRTUAL, 133 &memAllocParams, 134 sizeof(memAllocParams))); 135 136 // map the buffer 137 OBJSYS *pSys = SYS_GET_INSTANCE(); 138 OBJCL *pCl = SYS_GET_CL(pSys); 139 NvU32 cacheSnoopFlag = 0; 140 if (pCl->getProperty(pCl, PDB_PROP_CL_IS_CHIPSET_IO_COHERENT)) 141 { 142 cacheSnoopFlag = DRF_DEF(OS46, _FLAGS, _CACHE_SNOOP, _ENABLE); 143 } 144 NV_CHECK_OK_OR_RETURN( 145 LEVEL_ERROR, 146 pRmApi->Map(pRmApi, pSec2Utils->hClient, pSec2Utils->hDevice, 147 pSec2Buf->hVirtMem, pSec2Buf->hPhysMem, 0, pSec2Buf->size, 148 DRF_DEF(OS46, _FLAGS, _KERNEL_MAPPING, _ENABLE) | cacheSnoopFlag, 149 &pSec2Buf->gpuVA)); 150 151 pSec2Buf->pMemDesc = memmgrMemUtilsGetMemDescFromHandle(pMemoryManager, pSec2Utils->hClient, pSec2Buf->hPhysMem); 152 return NV_OK; 153 } 154 155 static NV_STATUS 156 _sec2InitBuffers 157 ( 158 Sec2Utils *pSec2Utils 159 ) 160 { 161 NV_ASSERT_OK_OR_RETURN(serverutilGenResourceHandle(pSec2Utils->hClient, &pSec2Utils->scrubMthdAuthTagBuf.hPhysMem)); 162 NV_ASSERT_OK_OR_RETURN(serverutilGenResourceHandle(pSec2Utils->hClient, &pSec2Utils->scrubMthdAuthTagBuf.hVirtMem)); 163 NV_ASSERT_OK_OR_RETURN(_sec2AllocAndMapBuffer(pSec2Utils, RM_PAGE_SIZE_64K, &pSec2Utils->scrubMthdAuthTagBuf)); 164 165 NV_ASSERT_OK_OR_RETURN(serverutilGenResourceHandle(pSec2Utils->hClient, &pSec2Utils->semaMthdAuthTagBuf.hPhysMem)); 166 NV_ASSERT_OK_OR_RETURN(serverutilGenResourceHandle(pSec2Utils->hClient, &pSec2Utils->semaMthdAuthTagBuf.hVirtMem)); 167 NV_ASSERT_OK_OR_RETURN(_sec2AllocAndMapBuffer(pSec2Utils, RM_PAGE_SIZE_64K, &pSec2Utils->semaMthdAuthTagBuf)); 168 return NV_OK; 169 } 170 171 NV_STATUS 172 sec2utilsConstruct_IMPL 173 ( 174 Sec2Utils *pSec2Utils, 175 OBJGPU *pGpu, 176 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGPUInstance 177 ) 178 { 179 NV_STATUS status = NV_OK; 180 NV_ASSERT_OR_RETURN(pGpu, NV_ERR_INVALID_STATE); 181 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 182 183 ConfidentialCompute *pConfCompute = GPU_GET_CONF_COMPUTE(pGpu); 184 NV_ASSERT_OR_RETURN(((pConfCompute != NULL) && (pConfCompute->getProperty(pCC, PDB_PROP_CONFCOMPUTE_CC_FEATURE_ENABLED))), 185 NV_ERR_NOT_SUPPORTED); 186 187 pSec2Utils->pGpu = pGpu; 188 189 // Allocate channel with RM internal client 190 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 191 RmClient *pClient = NULL; 192 193 OBJCHANNEL *pChannel = (OBJCHANNEL *) portMemAllocNonPaged(sizeof(OBJCHANNEL)); 194 NV_ASSERT_OR_RETURN(pChannel != NULL, NV_ERR_INSUFFICIENT_RESOURCES); 195 portMemSet(pChannel, 0, sizeof(OBJCHANNEL)); 196 197 // Allocate client 198 NV_ASSERT_OK_OR_GOTO(status, pRmApi->AllocWithHandle(pRmApi, NV01_NULL_OBJECT, NV01_NULL_OBJECT, 199 NV01_NULL_OBJECT, NV01_ROOT, &pSec2Utils->hClient, 200 sizeof(pSec2Utils->hClient)), cleanup); 201 202 pChannel->hClient = pSec2Utils->hClient; 203 pClient = serverutilGetClientUnderLock(pChannel->hClient); 204 NV_ASSERT_OR_GOTO(pClient != NULL, free_client); 205 206 NV_ASSERT_OK_OR_GOTO(status, serverGetClientUnderLock(&g_resServ, pChannel->hClient, &pChannel->pRsClient), free_client); 207 208 NV_ASSERT_OK_OR_GOTO(status, clientSetHandleGenerator(staticCast(pClient, RsClient), 1U, ~0U - 1U), free_client); 209 210 pChannel->bClientAllocated = NV_TRUE; 211 pChannel->pGpu = pGpu; 212 213 pChannel->deviceId = NV01_NULL_OBJECT; 214 pChannel->subdeviceId = NV01_NULL_OBJECT; 215 216 pChannel->pKernelMIGGpuInstance = pKernelMIGGPUInstance; 217 218 pChannel->hVASpaceId = NV01_NULL_OBJECT; 219 pChannel->bUseVasForCeCopy = NV_FALSE; 220 221 pChannel->type = SWL_SCRUBBER_CHANNEL; 222 pChannel->engineType = RM_ENGINE_TYPE_SEC2; 223 224 pChannel->bSecure = NV_TRUE; 225 226 // Detect if we can enable fast scrub on this channel 227 NV_ASSERT_OK_OR_GOTO(status, _sec2GetClass(pGpu, &pSec2Utils->sec2Class), free_client); 228 pChannel->sec2Class = pSec2Utils->sec2Class; 229 230 // Set up various channel resources 231 NV_ASSERT_OK_OR_GOTO(status, channelSetupIDs(pChannel, pGpu, NV_FALSE, IS_MIG_IN_USE(pGpu)), free_client); 232 233 channelSetupChannelBufferSizes(pChannel); 234 235 // save original values 236 NvU32 instLocOverrides4 = pGpu->instLocOverrides4; 237 NvU32 instLocOverrides = pGpu->instLocOverrides; 238 239 // Force PB, GPFIFO, notifier and userd to sysmem before allocating channel 240 pGpu->instLocOverrides4 = FLD_SET_DRF(_REG_STR_RM, _INST_LOC_4, _CHANNEL_PUSHBUFFER, _NCOH, pGpu->instLocOverrides4); 241 pGpu->instLocOverrides = FLD_SET_DRF(_REG_STR_RM, _INST_LOC, _USERD, _NCOH, pGpu->instLocOverrides); 242 243 pChannel->engineType = NV2080_ENGINE_TYPE_SEC2; 244 245 NV_ASSERT_OK_OR_GOTO(status, channelAllocSubdevice(pGpu, pChannel), free_client); 246 247 pMemoryManager->bScrubChannelSetupInProgress = NV_TRUE; 248 NV_ASSERT_OK_OR_GOTO(status, memmgrMemUtilsChannelInitialize_HAL(pGpu, pMemoryManager, pChannel), free_channel); 249 pMemoryManager->bScrubChannelSetupInProgress = NV_FALSE; 250 251 pSec2Utils->hDevice = pChannel->deviceId; 252 pSec2Utils->hSubdevice = pChannel->subdeviceId; 253 254 NV_PRINTF(LEVEL_INFO, "Channel alloc successful for Sec2Utils\n"); 255 pSec2Utils->pChannel = pChannel; 256 257 NV_ASSERT_OK_OR_GOTO(status, memmgrMemUtilsSec2CtxInit_HAL(pGpu, pMemoryManager, pChannel), free_channel); 258 259 // restore original values 260 pGpu->instLocOverrides4 = instLocOverrides4; 261 pGpu->instLocOverrides = instLocOverrides; 262 263 pSec2Utils->lastSubmittedPayload = 0; 264 pSec2Utils->lastCompletedPayload = 0; 265 pSec2Utils->authTagPutIndex = 0; 266 pSec2Utils->authTagGetIndex = 0; 267 268 NV_ASSERT_OK_OR_GOTO(status, _sec2InitBuffers(pSec2Utils), free_channel); 269 270 NV_ASSERT_OK_OR_GOTO(status, ccslContextInitViaChannel(&pSec2Utils->pCcslCtx, pSec2Utils->hClient, pChannel->channelId), free_channel); 271 272 return status; 273 274 free_channel: 275 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->channelId); 276 277 free_client: 278 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->hClient); 279 280 cleanup: 281 portMemFree(pChannel); 282 return status; 283 } 284 285 void 286 sec2utilsDestruct_IMPL 287 ( 288 Sec2Utils *pSec2Utils 289 ) 290 { 291 OBJCHANNEL *pChannel = pSec2Utils->pChannel; 292 OBJGPU *pGpu = pSec2Utils->pGpu; 293 MemoryManager *pMemoryManager = NULL; 294 RM_API *pRmApi = NULL; 295 296 // Sanity checks 297 if ((pGpu == NULL) || (pChannel == NULL)) 298 { 299 NV_PRINTF(LEVEL_WARNING, "Possible double-free of Sec2Utils!\n"); 300 return; 301 } 302 else if (pGpu != pChannel->pGpu) 303 { 304 NV_PRINTF(LEVEL_ERROR, "Bad state during sec2Utils teardown!\n"); 305 return; 306 } 307 308 pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 309 pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 310 311 ccslContextClear(pSec2Utils->pCcslCtx); 312 313 if ((pChannel->bClientUserd) && (pChannel->pControlGPFifo != NULL)) 314 { 315 if (kbusIsBarAccessBlocked(GPU_GET_KERNEL_BUS(pGpu))) 316 { 317 // 318 // When PCIE is blocked, mappings should be created, used and torn 319 // down when they are used 320 // 321 NV_PRINTF(LEVEL_ERROR, "Leaked USERD mapping from ceUtils!\n"); 322 } 323 else 324 { 325 memmgrMemDescEndTransfer(pMemoryManager, pChannel->pUserdMemdesc, TRANSFER_FLAGS_USE_BAR1); 326 pChannel->pControlGPFifo = NULL; 327 } 328 } 329 330 if (pChannel->pbCpuVA != NULL) 331 { 332 if (kbusIsBarAccessBlocked(GPU_GET_KERNEL_BUS(pGpu))) 333 { 334 NV_PRINTF(LEVEL_ERROR, "Leaked pushbuffer mapping!\n"); 335 } 336 else 337 { 338 memmgrMemDescEndTransfer(pMemoryManager, pChannel->pChannelBufferMemdesc, TRANSFER_FLAGS_USE_BAR1); 339 pChannel->pbCpuVA = NULL; 340 } 341 } 342 343 if (pChannel->pTokenFromNotifier != NULL) 344 { 345 if (kbusIsBarAccessBlocked(GPU_GET_KERNEL_BUS(pGpu))) 346 { 347 NV_PRINTF(LEVEL_ERROR, "Leaked notifier mapping!\n"); 348 } 349 else 350 { 351 memmgrMemDescEndTransfer(pMemoryManager, pChannel->pErrNotifierMemdesc, TRANSFER_FLAGS_USE_BAR1); 352 pChannel->pTokenFromNotifier = NULL; 353 } 354 } 355 356 pRmApi->Free(pRmApi, pSec2Utils->hClient, pSec2Utils->scrubMthdAuthTagBuf.hPhysMem); 357 pRmApi->Free(pRmApi, pSec2Utils->hClient, pSec2Utils->scrubMthdAuthTagBuf.hVirtMem); 358 359 pRmApi->Free(pRmApi, pSec2Utils->hClient, pSec2Utils->semaMthdAuthTagBuf.hPhysMem); 360 pRmApi->Free(pRmApi, pSec2Utils->hClient, pSec2Utils->semaMthdAuthTagBuf.hVirtMem); 361 362 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->channelId); 363 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->errNotifierIdPhys); 364 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->pushBufferId); 365 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->errNotifierIdVirt); 366 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->hVASpaceId); 367 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->deviceId); 368 369 // Resource server makes sure no leak can occur 370 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->hClient); 371 portMemFree(pChannel); 372 } 373 374 void 375 sec2utilsServiceInterrupts_IMPL(Sec2Utils *pSec2Utils) 376 { 377 OBJCHANNEL *pChannel = pSec2Utils->pChannel; 378 379 // 380 // FIXME: Bug 2463959: objmemscrub is called with the rmDeviceGpuLock in the 381 // heapFree_IMPL->_stdmemPmaFree->pmaFreePages->scrubSubmitPages path. 382 // This can result in RM waiting for scrubbing to complete and yielding while holding the 383 // rmDeviceGpuLock. This can lead to deadlock. 384 // Instead, if the lock is held, service any interrupts on SEC2 to help the engine make progress. 385 // Bug 2527660 is filed to remove this change. 386 // 387 // pChannel is null when PMA scrub requests are handled in vGPU plugin. 388 // In this case vGpu plugin allocates scrubber channel in PF domain so 389 // above mention deadlock is not present here. 390 // 391 if ((pChannel != NULL) && (rmDeviceGpuLockIsOwner(pChannel->pGpu->gpuInstance))) 392 { 393 channelServiceScrubberInterrupts(pChannel); 394 } 395 else 396 { 397 osSchedule(); 398 } 399 } 400 401 static NvU32 402 _sec2utilsUpdateGetPtr 403 ( 404 Sec2Utils *pSec2Utils 405 ) 406 { 407 return channelReadChannelMemdesc(pSec2Utils->pChannel, pSec2Utils->pChannel->authTagBufSemaOffset); 408 } 409 410 static NV_STATUS 411 _sec2utilsGetNextAuthTagSlot 412 ( 413 Sec2Utils *pSec2Utils 414 ) 415 { 416 NvU32 totalSlots = pSec2Utils->scrubMthdAuthTagBuf.size / SHA_256_HASH_SIZE_BYTES; 417 NvU32 nextPut = (pSec2Utils->authTagPutIndex + 1) % totalSlots; 418 419 // check if slots have freed up 420 NV_STATUS status = NV_OK; 421 RMTIMEOUT timeout; 422 gpuSetTimeout(pSec2Utils->pGpu, GPU_TIMEOUT_DEFAULT, &timeout, GPU_TIMEOUT_FLAGS_BYPASS_THREAD_STATE); 423 while (nextPut == pSec2Utils->authTagGetIndex) 424 { 425 status = gpuCheckTimeout(pSec2Utils->pGpu, &timeout); 426 if (status != NV_OK) 427 { 428 NV_PRINTF(LEVEL_ERROR, "Timed out waiting for next auth tag buf slot to free up: nextPut = %d get = %d\n", 429 nextPut, _sec2utilsUpdateGetPtr(pSec2Utils)); 430 return status; 431 } 432 pSec2Utils->authTagGetIndex = _sec2utilsUpdateGetPtr(pSec2Utils); 433 osSpinLoop(); 434 } 435 436 pSec2Utils->authTagPutIndex = nextPut; 437 return NV_OK; 438 } 439 440 static NV_STATUS 441 _sec2utilsSubmitPushBuffer 442 ( 443 Sec2Utils *pSec2Utils, 444 OBJCHANNEL *pChannel, 445 NvBool bInsertFinishPayload, 446 NvU32 nextIndex, 447 CHANNEL_PB_INFO *pChannelPbInfo 448 ) 449 { 450 NV_STATUS status = NV_OK; 451 NvU32 methodsLength = 0; 452 NvU32 putIndex = 0; 453 454 NV_ASSERT_OR_RETURN(pChannelPbInfo != NULL, NV_ERR_INVALID_ARGUMENT); 455 NV_ASSERT_OR_RETURN(pChannel != NULL, NV_ERR_INVALID_ARGUMENT); 456 457 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pChannel->pGpu); 458 NvBool bReleaseMapping = NV_FALSE; 459 460 // 461 // Use BAR1 if CPU access is allowed, otherwise allocate and init shadow 462 // buffer for DMA access 463 // 464 NvU32 transferFlags = (TRANSFER_FLAGS_USE_BAR1 | 465 TRANSFER_FLAGS_SHADOW_ALLOC | 466 TRANSFER_FLAGS_SHADOW_INIT_MEM); 467 NV_PRINTF(LEVEL_INFO, "Actual size of copying to be pushed: %x\n", pChannelPbInfo->size); 468 469 status = channelWaitForFreeEntry(pChannel, &putIndex); 470 if (status != NV_OK) 471 { 472 NV_PRINTF(LEVEL_ERROR, "Cannot get putIndex.\n"); 473 return status; 474 } 475 476 if (pChannel->pbCpuVA == NULL) 477 { 478 pChannel->pbCpuVA = memmgrMemDescBeginTransfer(pMemoryManager, pChannel->pChannelBufferMemdesc, 479 transferFlags); 480 bReleaseMapping = NV_TRUE; 481 } 482 NV_ASSERT_OR_RETURN(pChannel->pbCpuVA != NULL, NV_ERR_GENERIC); 483 484 NV_ASSERT_OK_OR_RETURN(_sec2utilsGetNextAuthTagSlot(pSec2Utils)); 485 NV_ASSERT_OK_OR_RETURN(channelFillSec2Pb(pChannel, putIndex, bInsertFinishPayload, pChannelPbInfo, pSec2Utils->pCcslCtx, 486 pSec2Utils->scrubMthdAuthTagBuf.pMemDesc, pSec2Utils->semaMthdAuthTagBuf.pMemDesc, 487 pSec2Utils->scrubMthdAuthTagBuf.gpuVA, pSec2Utils->authTagPutIndex, 488 pSec2Utils->semaMthdAuthTagBuf.gpuVA, nextIndex, &methodsLength)); 489 490 if (bReleaseMapping) 491 { 492 memmgrMemDescEndTransfer(pMemoryManager, pChannel->pChannelBufferMemdesc, transferFlags); 493 pChannel->pbCpuVA = NULL; 494 } 495 496 if (methodsLength == 0) 497 { 498 NV_PRINTF(LEVEL_ERROR, "Cannot push methods to channel.\n"); 499 return NV_ERR_NO_FREE_FIFOS; 500 } 501 502 // 503 // Pushbuffer can be written in a batch, but GPFIFO and doorbell require 504 // careful ordering so we do each write one-by-one 505 // 506 status = channelFillGpFifo(pChannel, putIndex, methodsLength); 507 if (status != NV_OK) 508 { 509 NV_PRINTF(LEVEL_ERROR, "Channel operation failures during memcopy\n"); 510 return status; 511 } 512 513 pChannel->lastSubmittedEntry = putIndex; 514 515 return status; 516 } 517 518 519 NV_STATUS 520 sec2utilsMemset_IMPL 521 ( 522 Sec2Utils *pSec2Utils, 523 SEC2UTILS_MEMSET_PARAMS *pParams 524 ) 525 { 526 OBJCHANNEL *pChannel = pSec2Utils->pChannel; 527 NV_STATUS status = NV_OK; 528 RMTIMEOUT timeout; 529 530 NvU32 pteArraySize; 531 NvU64 offset, memsetLength, size, pageGranularity; 532 NvBool bContiguous; 533 534 MEMORY_DESCRIPTOR *pMemDesc = pParams->pMemDesc; 535 CHANNEL_PB_INFO channelPbInfo = {0}; 536 537 if (pMemDesc == NULL) 538 { 539 NV_PRINTF(LEVEL_ERROR, "Invalid memdesc for Sec2Utils memset.\n"); 540 return NV_ERR_INVALID_ARGUMENT; 541 } 542 543 if ((memdescGetAddressSpace(pMemDesc) != ADDR_FBMEM) || 544 (pMemDesc->pGpu != pSec2Utils->pChannel->pGpu)) 545 { 546 NV_PRINTF(LEVEL_ERROR, "Invalid memory descriptor passed.\n"); 547 return NV_ERR_INVALID_ARGUMENT; 548 } 549 550 size = memdescGetSize(pMemDesc); 551 pteArraySize = memdescGetPteArraySize(pMemDesc, AT_GPU); 552 bContiguous = (pMemDesc->_flags & MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS) || (pteArraySize == 1); 553 554 if (pParams->offset >= size) 555 { 556 NV_PRINTF(LEVEL_ERROR, "Invalid offset passed for the memdesc.\n"); 557 return NV_ERR_INVALID_ARGUMENT; 558 } 559 560 if ((pParams->length == 0) || (pParams->length > (size - pParams->offset))) 561 { 562 NV_PRINTF(LEVEL_ERROR, "Invalid memset length passed.\n"); 563 return NV_ERR_INVALID_ARGUMENT; 564 } 565 566 // Make sure free auth tag buffers are available 567 NvU32 totalSlots = pSec2Utils->semaMthdAuthTagBuf.size / SHA_256_HASH_SIZE_BYTES; 568 NvU32 nextIndex = (pSec2Utils->lastSubmittedPayload + 1) % totalSlots; 569 NvU32 lastCompleted = sec2utilsUpdateProgress(pSec2Utils); 570 NvU32 currentIndex = lastCompleted % totalSlots; 571 572 gpuSetTimeout(pSec2Utils->pGpu, GPU_TIMEOUT_DEFAULT, &timeout, GPU_TIMEOUT_FLAGS_BYPASS_THREAD_STATE); 573 while (nextIndex == currentIndex) 574 { 575 status = gpuCheckTimeout(pSec2Utils->pGpu, &timeout); 576 if (status != NV_OK) 577 { 578 NV_PRINTF(LEVEL_ERROR, "Failed to finish previous scrub op before re-using method stream auth tag buf: lastCompleted = %d lastSubmitted = %lld\n", 579 lastCompleted, pSec2Utils->lastSubmittedPayload); 580 return status; 581 } 582 lastCompleted = sec2utilsUpdateProgress(pSec2Utils); 583 currentIndex = lastCompleted % totalSlots; 584 osSpinLoop(); 585 } 586 587 channelPbInfo.payload = pSec2Utils->lastSubmittedPayload + 1; 588 pSec2Utils->lastSubmittedPayload = channelPbInfo.payload; 589 590 channelPbInfo.dstAddressSpace = memdescGetAddressSpace(pMemDesc); 591 channelPbInfo.dstCpuCacheAttrib = pMemDesc->_cpuCacheAttrib; 592 593 pageGranularity = pMemDesc->pageArrayGranularity; 594 memsetLength = pParams->length; 595 offset = pParams->offset; 596 597 do 598 { 599 NvU64 maxContigSize = bContiguous ? memsetLength : (pageGranularity - offset % pageGranularity); 600 NvU32 memsetSizeContig = (NvU32)NV_MIN(NV_MIN(memsetLength, maxContigSize), NVCBA2_DECRYPT_SCRUB_SIZE_MAX_BYTES); 601 602 channelPbInfo.dstAddr = memdescGetPhysAddr(pMemDesc, AT_GPU, offset); 603 604 NV_PRINTF(LEVEL_INFO, "Sec2Utils Memset dstAddr: %llx, size: %x\n", 605 channelPbInfo.dstAddr, memsetSizeContig); 606 607 channelPbInfo.size = memsetSizeContig; 608 609 status = _sec2utilsSubmitPushBuffer(pSec2Utils, pChannel, memsetSizeContig == memsetLength, nextIndex, &channelPbInfo); 610 if (status != NV_OK) 611 { 612 NV_PRINTF(LEVEL_ERROR, "Cannot submit push buffer for memset.\n"); 613 return status; 614 } 615 616 memsetLength -= memsetSizeContig; 617 offset += memsetSizeContig; 618 } while (memsetLength != 0); 619 620 NV_PRINTF(LEVEL_INFO, "Async memset payload returned: 0x%x\n", channelPbInfo.payload); 621 pParams->submittedWorkId = channelPbInfo.payload; 622 623 return status; 624 } 625 626 // This function updates pSec2Utils->lastCompletedPayload and handles wrap-around 627 NvU64 628 sec2utilsUpdateProgress_IMPL 629 ( 630 Sec2Utils *pSec2Utils 631 ) 632 { 633 NV_ASSERT((pSec2Utils != NULL) && (pSec2Utils->pChannel != NULL)); 634 635 NvU32 hwCurrentCompletedPayload = 0; 636 NvU64 swLastCompletedPayload = pSec2Utils->lastCompletedPayload; 637 638 // 639 // Sec2Utils uses 64 bit index to track the work submitted. But HW supports 640 // only 32 bit semaphore. The current completed Id is calculated here, based 641 // on the lastSubmittedPayload and current HW semaphore value. 642 // 643 hwCurrentCompletedPayload = READ_CHANNEL_PAYLOAD_SEMA(pSec2Utils->pChannel); 644 645 // No work has been completed since we checked last time 646 if (hwCurrentCompletedPayload == (NvU32)swLastCompletedPayload) 647 { 648 return swLastCompletedPayload; 649 } 650 651 // Check for wrap around case. Increment the upper 32 bits 652 if (hwCurrentCompletedPayload < (NvU32)swLastCompletedPayload) 653 { 654 swLastCompletedPayload += 0x100000000ULL; 655 } 656 657 // Update lower 32 bits regardless if wrap-around happened 658 swLastCompletedPayload &= 0xFFFFFFFF00000000ULL; 659 swLastCompletedPayload |= (NvU64)hwCurrentCompletedPayload; 660 661 pSec2Utils->lastCompletedPayload = swLastCompletedPayload; 662 return swLastCompletedPayload; 663 } 664