1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 
25 #include "core/core.h"
26 #include "kernel/gpu/intr/intr.h"
27 #include "kernel/gpu/mem_mgr/channel_utils.h"
28 #include "rmapi/rs_utils.h"
29 #include "utils/nvassert.h"
30 #include "core/prelude.h"
31 #include "core/locks.h"
32 #include "gpu/mem_mgr/sec2_utils.h"
33 #include "kernel/gpu/mem_mgr/ce_utils_sizes.h"
34 #include "kernel/gpu/mig_mgr/kernel_mig_manager.h"
35 #include "nvrm_registry.h"
36 #include "platform/chipset/chipset.h"
37 #include "gpu/mem_mgr/heap.h"
38 
39 
40 #include "class/clcba2.h" // HOPPER_SEC2_WORK_LAUNCH_A
41 #include "class/cl003e.h" // NV01_MEMORY_SYSTEM
42 #include "class/cl50a0.h" // NV50_MEMORY_VIRTUAL
43 
44 #include "class/cl0080.h"
45 
46 #include "gpu/conf_compute/conf_compute.h"
47 #include "gpu/conf_compute/ccsl.h"
48 
49 static NV_STATUS
50 _sec2GetClass(OBJGPU *pGpu, NvU32 *pClass)
51 {
52     NV_STATUS status;
53     NvU32 numClasses = 0;
54     NvU32 *pClassList = NULL;
55     NvU32 class = 0;
56 
57     NV_ASSERT_OR_RETURN(pClass != NULL, NV_ERR_INVALID_ARGUMENT);
58     NV_ASSERT_OK_OR_RETURN(gpuGetClassList(pGpu, &numClasses, NULL, ENG_SEC2));
59     NV_ASSERT_OR_RETURN((numClasses != 0), NV_ERR_NOT_SUPPORTED);
60 
61     pClassList = portMemAllocNonPaged(sizeof(*pClassList) * numClasses);
62     NV_ASSERT_OR_RETURN((pClassList != NULL), NV_ERR_INSUFFICIENT_RESOURCES);
63 
64     status = gpuGetClassList(pGpu, &numClasses, pClassList, ENG_SEC2);
65     if (status == NV_OK)
66     {
67         for (NvU32 i = 0; i < numClasses; i++)
68         {
69             class = NV_MAX(class, pClassList[i]);
70         }
71         if (class == 0)
72         {
73             status = NV_ERR_INVALID_STATE;
74         }
75         *pClass = class;
76     }
77     portMemFree(pClassList);
78     return status;
79 }
80 
81 static NV_STATUS
82 _sec2AllocAndMapBuffer
83 (
84     Sec2Utils *pSec2Utils,
85     NvU32 size,
86     SEC2UTILS_BUFFER_INFO *pSec2Buf
87 )
88 {
89     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
90     NV_MEMORY_ALLOCATION_PARAMS memAllocParams;
91     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pSec2Utils->pGpu);
92 
93     pSec2Buf->size = size;
94 
95     // allocate the physmem
96     portMemSet(&memAllocParams, 0, sizeof(memAllocParams));
97     memAllocParams.owner     = HEAP_OWNER_RM_CLIENT_GENERIC;
98     memAllocParams.type      = NVOS32_TYPE_IMAGE;
99     memAllocParams.size      = pSec2Buf->size;
100     memAllocParams.attr      = DRF_DEF(OS32, _ATTR, _LOCATION,  _PCI) |
101                                DRF_DEF(OS32, _ATTR, _COHERENCY, _UNCACHED);
102     memAllocParams.attr2     = DRF_DEF(OS32, _ATTR2, _MEMORY_PROTECTION, _UNPROTECTED);
103     memAllocParams.flags     = 0;
104     memAllocParams.internalflags = NVOS32_ALLOC_INTERNAL_FLAGS_SKIP_SCRUB;
105 
106     NV_CHECK_OK_OR_RETURN(
107         LEVEL_ERROR,
108         pRmApi->AllocWithHandle(pRmApi,
109                                 pSec2Utils->hClient,
110                                 pSec2Utils->hDevice,
111                                 pSec2Buf->hPhysMem,
112                                 NV01_MEMORY_SYSTEM,
113                                 &memAllocParams,
114                                 sizeof(memAllocParams)));
115 
116     // allocate Virtual Memory
117     portMemSet(&memAllocParams, 0, sizeof(memAllocParams));
118     memAllocParams.owner     = HEAP_OWNER_RM_CLIENT_GENERIC;
119     memAllocParams.type      = NVOS32_TYPE_IMAGE;
120     memAllocParams.size      = pSec2Buf->size;
121     memAllocParams.attr      = DRF_DEF(OS32, _ATTR, _LOCATION, _PCI);
122     memAllocParams.attr2     = NVOS32_ATTR2_NONE;
123     memAllocParams.flags    |= NVOS32_ALLOC_FLAGS_VIRTUAL;
124     memAllocParams.hVASpace  = pSec2Buf->hVASpace;
125 
126     NV_CHECK_OK_OR_RETURN(
127         LEVEL_ERROR,
128         pRmApi->AllocWithHandle(pRmApi,
129                                 pSec2Utils->hClient,
130                                 pSec2Utils->hDevice,
131                                 pSec2Buf->hVirtMem,
132                                 NV50_MEMORY_VIRTUAL,
133                                 &memAllocParams,
134                                 sizeof(memAllocParams)));
135 
136     // map the buffer
137     OBJSYS *pSys = SYS_GET_INSTANCE();
138     OBJCL  *pCl = SYS_GET_CL(pSys);
139     NvU32 cacheSnoopFlag = 0;
140     if (pCl->getProperty(pCl, PDB_PROP_CL_IS_CHIPSET_IO_COHERENT))
141     {
142         cacheSnoopFlag = DRF_DEF(OS46, _FLAGS, _CACHE_SNOOP, _ENABLE);
143     }
144     NV_CHECK_OK_OR_RETURN(
145         LEVEL_ERROR,
146         pRmApi->Map(pRmApi, pSec2Utils->hClient, pSec2Utils->hDevice,
147                     pSec2Buf->hVirtMem, pSec2Buf->hPhysMem, 0, pSec2Buf->size,
148                     DRF_DEF(OS46, _FLAGS, _KERNEL_MAPPING, _ENABLE) | cacheSnoopFlag,
149                     &pSec2Buf->gpuVA));
150 
151     pSec2Buf->pMemDesc = memmgrMemUtilsGetMemDescFromHandle(pMemoryManager, pSec2Utils->hClient, pSec2Buf->hPhysMem);
152     return NV_OK;
153 }
154 
155 static NV_STATUS
156 _sec2InitBuffers
157 (
158     Sec2Utils *pSec2Utils
159 )
160 {
161     NV_ASSERT_OK_OR_RETURN(serverutilGenResourceHandle(pSec2Utils->hClient, &pSec2Utils->scrubMthdAuthTagBuf.hPhysMem));
162     NV_ASSERT_OK_OR_RETURN(serverutilGenResourceHandle(pSec2Utils->hClient, &pSec2Utils->scrubMthdAuthTagBuf.hVirtMem));
163     NV_ASSERT_OK_OR_RETURN(_sec2AllocAndMapBuffer(pSec2Utils, RM_PAGE_SIZE_64K, &pSec2Utils->scrubMthdAuthTagBuf));
164 
165     NV_ASSERT_OK_OR_RETURN(serverutilGenResourceHandle(pSec2Utils->hClient, &pSec2Utils->semaMthdAuthTagBuf.hPhysMem));
166     NV_ASSERT_OK_OR_RETURN(serverutilGenResourceHandle(pSec2Utils->hClient, &pSec2Utils->semaMthdAuthTagBuf.hVirtMem));
167     NV_ASSERT_OK_OR_RETURN(_sec2AllocAndMapBuffer(pSec2Utils, RM_PAGE_SIZE_64K, &pSec2Utils->semaMthdAuthTagBuf));
168     return NV_OK;
169 }
170 
171 NV_STATUS
172 sec2utilsConstruct_IMPL
173 (
174     Sec2Utils                    *pSec2Utils,
175     OBJGPU                       *pGpu,
176     KERNEL_MIG_GPU_INSTANCE      *pKernelMIGGPUInstance
177 )
178 {
179     NV_STATUS status = NV_OK;
180     NV_ASSERT_OR_RETURN(pGpu, NV_ERR_INVALID_STATE);
181     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
182 
183     ConfidentialCompute *pConfCompute = GPU_GET_CONF_COMPUTE(pGpu);
184     NV_ASSERT_OR_RETURN(((pConfCompute != NULL) && (pConfCompute->getProperty(pCC, PDB_PROP_CONFCOMPUTE_CC_FEATURE_ENABLED))),
185                           NV_ERR_NOT_SUPPORTED);
186 
187     pSec2Utils->pGpu = pGpu;
188 
189     // Allocate channel with RM internal client
190     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
191     RmClient *pClient = NULL;
192 
193     OBJCHANNEL *pChannel = (OBJCHANNEL *) portMemAllocNonPaged(sizeof(OBJCHANNEL));
194     NV_ASSERT_OR_RETURN(pChannel != NULL, NV_ERR_INSUFFICIENT_RESOURCES);
195     portMemSet(pChannel, 0, sizeof(OBJCHANNEL));
196 
197     // Allocate client
198     NV_ASSERT_OK_OR_GOTO(status, pRmApi->AllocWithHandle(pRmApi, NV01_NULL_OBJECT, NV01_NULL_OBJECT,
199                                                          NV01_NULL_OBJECT, NV01_ROOT, &pSec2Utils->hClient,
200                                                          sizeof(pSec2Utils->hClient)), cleanup);
201 
202     pChannel->hClient = pSec2Utils->hClient;
203     pClient = serverutilGetClientUnderLock(pChannel->hClient);
204     NV_ASSERT_OR_GOTO(pClient != NULL, free_client);
205 
206     NV_ASSERT_OK_OR_GOTO(status, serverGetClientUnderLock(&g_resServ, pChannel->hClient, &pChannel->pRsClient), free_client);
207 
208     NV_ASSERT_OK_OR_GOTO(status, clientSetHandleGenerator(staticCast(pClient, RsClient), 1U, ~0U - 1U), free_client);
209 
210     pChannel->bClientAllocated = NV_TRUE;
211     pChannel->pGpu = pGpu;
212 
213     pChannel->deviceId = NV01_NULL_OBJECT;
214     pChannel->subdeviceId = NV01_NULL_OBJECT;
215 
216     pChannel->pKernelMIGGpuInstance = pKernelMIGGPUInstance;
217 
218     pChannel->hVASpaceId = NV01_NULL_OBJECT;
219     pChannel->bUseVasForCeCopy = NV_FALSE;
220 
221     pChannel->type = SWL_SCRUBBER_CHANNEL;
222     pChannel->engineType = RM_ENGINE_TYPE_SEC2;
223 
224     pChannel->bSecure = NV_TRUE;
225 
226     // Detect if we can enable fast scrub on this channel
227     NV_ASSERT_OK_OR_GOTO(status, _sec2GetClass(pGpu, &pSec2Utils->sec2Class), free_client);
228     pChannel->sec2Class = pSec2Utils->sec2Class;
229 
230     // Set up various channel resources
231     NV_ASSERT_OK_OR_GOTO(status, channelSetupIDs(pChannel, pGpu, NV_FALSE, IS_MIG_IN_USE(pGpu)), free_client);
232 
233     channelSetupChannelBufferSizes(pChannel);
234 
235     // save original values
236     NvU32 instLocOverrides4 = pGpu->instLocOverrides4;
237     NvU32 instLocOverrides = pGpu->instLocOverrides;
238 
239     // Force PB, GPFIFO, notifier and userd to sysmem before allocating channel
240     pGpu->instLocOverrides4 = FLD_SET_DRF(_REG_STR_RM, _INST_LOC_4, _CHANNEL_PUSHBUFFER, _NCOH, pGpu->instLocOverrides4);
241     pGpu->instLocOverrides  = FLD_SET_DRF(_REG_STR_RM, _INST_LOC, _USERD, _NCOH, pGpu->instLocOverrides);
242 
243     pChannel->engineType = NV2080_ENGINE_TYPE_SEC2;
244 
245     NV_ASSERT_OK_OR_GOTO(status, channelAllocSubdevice(pGpu, pChannel), free_client);
246 
247     pMemoryManager->bScrubChannelSetupInProgress = NV_TRUE;
248     NV_ASSERT_OK_OR_GOTO(status, memmgrMemUtilsChannelInitialize_HAL(pGpu, pMemoryManager, pChannel), free_channel);
249     pMemoryManager->bScrubChannelSetupInProgress = NV_FALSE;
250 
251     pSec2Utils->hDevice = pChannel->deviceId;
252     pSec2Utils->hSubdevice = pChannel->subdeviceId;
253 
254     NV_PRINTF(LEVEL_INFO, "Channel alloc successful for Sec2Utils\n");
255     pSec2Utils->pChannel = pChannel;
256 
257     NV_ASSERT_OK_OR_GOTO(status, memmgrMemUtilsSec2CtxInit_HAL(pGpu, pMemoryManager, pChannel), free_channel);
258 
259     // restore original values
260     pGpu->instLocOverrides4 = instLocOverrides4;
261     pGpu->instLocOverrides = instLocOverrides;
262 
263     pSec2Utils->lastSubmittedPayload = 0;
264     pSec2Utils->lastCompletedPayload = 0;
265     pSec2Utils->authTagPutIndex = 0;
266     pSec2Utils->authTagGetIndex = 0;
267 
268     NV_ASSERT_OK_OR_GOTO(status, _sec2InitBuffers(pSec2Utils), free_channel);
269 
270     NV_ASSERT_OK_OR_GOTO(status, ccslContextInitViaChannel(&pSec2Utils->pCcslCtx, pSec2Utils->hClient, pChannel->channelId), free_channel);
271 
272     return status;
273 
274 free_channel:
275     pRmApi->Free(pRmApi, pChannel->hClient, pChannel->channelId);
276 
277 free_client:
278     pRmApi->Free(pRmApi, pChannel->hClient, pChannel->hClient);
279 
280 cleanup:
281     portMemFree(pChannel);
282     return status;
283 }
284 
285 void
286 sec2utilsDestruct_IMPL
287 (
288     Sec2Utils *pSec2Utils
289 )
290 {
291     OBJCHANNEL *pChannel = pSec2Utils->pChannel;
292     OBJGPU *pGpu = pSec2Utils->pGpu;
293     MemoryManager *pMemoryManager = NULL;
294     RM_API *pRmApi = NULL;
295 
296     // Sanity checks
297     if ((pGpu == NULL) || (pChannel == NULL))
298     {
299         NV_PRINTF(LEVEL_WARNING, "Possible double-free of Sec2Utils!\n");
300         return;
301     }
302     else if (pGpu != pChannel->pGpu)
303     {
304         NV_PRINTF(LEVEL_ERROR, "Bad state during sec2Utils teardown!\n");
305         return;
306     }
307 
308     pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
309     pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
310 
311     ccslContextClear(pSec2Utils->pCcslCtx);
312 
313     if ((pChannel->bClientUserd) && (pChannel->pControlGPFifo != NULL))
314     {
315         if (kbusIsBarAccessBlocked(GPU_GET_KERNEL_BUS(pGpu)))
316         {
317             //
318             // When PCIE is blocked, mappings should be created, used and torn
319             // down when they are used
320             //
321             NV_PRINTF(LEVEL_ERROR, "Leaked USERD mapping from ceUtils!\n");
322         }
323         else
324         {
325             memmgrMemDescEndTransfer(pMemoryManager, pChannel->pUserdMemdesc, TRANSFER_FLAGS_USE_BAR1);
326             pChannel->pControlGPFifo = NULL;
327         }
328     }
329 
330     if (pChannel->pbCpuVA != NULL)
331     {
332         if (kbusIsBarAccessBlocked(GPU_GET_KERNEL_BUS(pGpu)))
333         {
334             NV_PRINTF(LEVEL_ERROR, "Leaked pushbuffer mapping!\n");
335         }
336         else
337         {
338             memmgrMemDescEndTransfer(pMemoryManager, pChannel->pChannelBufferMemdesc, TRANSFER_FLAGS_USE_BAR1);
339             pChannel->pbCpuVA = NULL;
340         }
341     }
342 
343     if (pChannel->pTokenFromNotifier != NULL)
344     {
345         if (kbusIsBarAccessBlocked(GPU_GET_KERNEL_BUS(pGpu)))
346         {
347             NV_PRINTF(LEVEL_ERROR, "Leaked notifier mapping!\n");
348         }
349         else
350         {
351             memmgrMemDescEndTransfer(pMemoryManager, pChannel->pErrNotifierMemdesc, TRANSFER_FLAGS_USE_BAR1);
352             pChannel->pTokenFromNotifier = NULL;
353         }
354     }
355 
356     pRmApi->Free(pRmApi, pSec2Utils->hClient, pSec2Utils->scrubMthdAuthTagBuf.hPhysMem);
357     pRmApi->Free(pRmApi, pSec2Utils->hClient, pSec2Utils->scrubMthdAuthTagBuf.hVirtMem);
358 
359     pRmApi->Free(pRmApi, pSec2Utils->hClient, pSec2Utils->semaMthdAuthTagBuf.hPhysMem);
360     pRmApi->Free(pRmApi, pSec2Utils->hClient, pSec2Utils->semaMthdAuthTagBuf.hVirtMem);
361 
362     pRmApi->Free(pRmApi, pChannel->hClient, pChannel->channelId);
363     pRmApi->Free(pRmApi, pChannel->hClient, pChannel->errNotifierIdPhys);
364     pRmApi->Free(pRmApi, pChannel->hClient, pChannel->pushBufferId);
365     pRmApi->Free(pRmApi, pChannel->hClient, pChannel->errNotifierIdVirt);
366     pRmApi->Free(pRmApi, pChannel->hClient, pChannel->hVASpaceId);
367     pRmApi->Free(pRmApi, pChannel->hClient, pChannel->deviceId);
368 
369     // Resource server makes sure no leak can occur
370     pRmApi->Free(pRmApi, pChannel->hClient, pChannel->hClient);
371     portMemFree(pChannel);
372 }
373 
374 void
375 sec2utilsServiceInterrupts_IMPL(Sec2Utils *pSec2Utils)
376 {
377     OBJCHANNEL *pChannel = pSec2Utils->pChannel;
378 
379     //
380     // FIXME: Bug 2463959: objmemscrub is called with the rmDeviceGpuLock in the
381     // heapFree_IMPL->_stdmemPmaFree->pmaFreePages->scrubSubmitPages path.
382     // This can result in RM waiting for scrubbing to complete and yielding while holding the
383     // rmDeviceGpuLock. This can lead to deadlock.
384     // Instead, if the lock is held, service any interrupts on SEC2 to help the engine make progress.
385     // Bug 2527660 is filed to remove this change.
386     //
387     // pChannel is null when PMA scrub requests are handled in vGPU plugin.
388     // In this case vGpu plugin allocates scrubber channel in PF domain so
389     // above mention deadlock is not present here.
390     //
391     if ((pChannel != NULL) && (rmDeviceGpuLockIsOwner(pChannel->pGpu->gpuInstance)))
392     {
393         channelServiceScrubberInterrupts(pChannel);
394     }
395     else
396     {
397         osSchedule();
398     }
399 }
400 
401 static NvU32
402 _sec2utilsUpdateGetPtr
403 (
404     Sec2Utils *pSec2Utils
405 )
406 {
407     return channelReadChannelMemdesc(pSec2Utils->pChannel, pSec2Utils->pChannel->authTagBufSemaOffset);
408 }
409 
410 static NV_STATUS
411 _sec2utilsGetNextAuthTagSlot
412 (
413     Sec2Utils *pSec2Utils
414 )
415 {
416     NvU32 totalSlots = pSec2Utils->scrubMthdAuthTagBuf.size / SHA_256_HASH_SIZE_BYTES;
417     NvU32 nextPut = (pSec2Utils->authTagPutIndex + 1) % totalSlots;
418 
419     // check if slots have freed up
420     NV_STATUS status = NV_OK;
421     RMTIMEOUT timeout;
422     gpuSetTimeout(pSec2Utils->pGpu, GPU_TIMEOUT_DEFAULT, &timeout, GPU_TIMEOUT_FLAGS_BYPASS_THREAD_STATE);
423     while (nextPut == pSec2Utils->authTagGetIndex)
424     {
425         status = gpuCheckTimeout(pSec2Utils->pGpu, &timeout);
426         if (status != NV_OK)
427         {
428             NV_PRINTF(LEVEL_ERROR, "Timed out waiting for next auth tag buf slot to free up: nextPut = %d get = %d\n",
429                 nextPut, _sec2utilsUpdateGetPtr(pSec2Utils));
430             return status;
431         }
432         pSec2Utils->authTagGetIndex = _sec2utilsUpdateGetPtr(pSec2Utils);
433         osSpinLoop();
434     }
435 
436     pSec2Utils->authTagPutIndex = nextPut;
437     return NV_OK;
438 }
439 
440 static NV_STATUS
441 _sec2utilsSubmitPushBuffer
442 (
443     Sec2Utils        *pSec2Utils,
444     OBJCHANNEL       *pChannel,
445     NvBool            bInsertFinishPayload,
446     NvU32             nextIndex,
447     CHANNEL_PB_INFO  *pChannelPbInfo
448 )
449 {
450     NV_STATUS status = NV_OK;
451     NvU32 methodsLength = 0;
452     NvU32 putIndex = 0;
453 
454     NV_ASSERT_OR_RETURN(pChannelPbInfo != NULL, NV_ERR_INVALID_ARGUMENT);
455     NV_ASSERT_OR_RETURN(pChannel != NULL, NV_ERR_INVALID_ARGUMENT);
456 
457     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pChannel->pGpu);
458     NvBool bReleaseMapping = NV_FALSE;
459 
460     //
461     // Use BAR1 if CPU access is allowed, otherwise allocate and init shadow
462     // buffer for DMA access
463     //
464     NvU32 transferFlags = (TRANSFER_FLAGS_USE_BAR1     |
465                            TRANSFER_FLAGS_SHADOW_ALLOC |
466                            TRANSFER_FLAGS_SHADOW_INIT_MEM);
467     NV_PRINTF(LEVEL_INFO, "Actual size of copying to be pushed: %x\n", pChannelPbInfo->size);
468 
469     status = channelWaitForFreeEntry(pChannel, &putIndex);
470     if (status != NV_OK)
471     {
472         NV_PRINTF(LEVEL_ERROR, "Cannot get putIndex.\n");
473         return status;
474     }
475 
476     if (pChannel->pbCpuVA == NULL)
477     {
478         pChannel->pbCpuVA = memmgrMemDescBeginTransfer(pMemoryManager, pChannel->pChannelBufferMemdesc,
479                                                        transferFlags);
480         bReleaseMapping = NV_TRUE;
481     }
482     NV_ASSERT_OR_RETURN(pChannel->pbCpuVA != NULL, NV_ERR_GENERIC);
483 
484     NV_ASSERT_OK_OR_RETURN(_sec2utilsGetNextAuthTagSlot(pSec2Utils));
485     NV_ASSERT_OK_OR_RETURN(channelFillSec2Pb(pChannel, putIndex, bInsertFinishPayload, pChannelPbInfo, pSec2Utils->pCcslCtx,
486                                              pSec2Utils->scrubMthdAuthTagBuf.pMemDesc, pSec2Utils->semaMthdAuthTagBuf.pMemDesc,
487                                              pSec2Utils->scrubMthdAuthTagBuf.gpuVA, pSec2Utils->authTagPutIndex,
488                                              pSec2Utils->semaMthdAuthTagBuf.gpuVA, nextIndex, &methodsLength));
489 
490     if (bReleaseMapping)
491     {
492         memmgrMemDescEndTransfer(pMemoryManager, pChannel->pChannelBufferMemdesc, transferFlags);
493         pChannel->pbCpuVA = NULL;
494     }
495 
496     if (methodsLength == 0)
497     {
498         NV_PRINTF(LEVEL_ERROR, "Cannot push methods to channel.\n");
499         return NV_ERR_NO_FREE_FIFOS;
500     }
501 
502     //
503     // Pushbuffer can be written in a batch, but GPFIFO and doorbell require
504     // careful ordering so we do each write one-by-one
505     //
506     status = channelFillGpFifo(pChannel, putIndex, methodsLength);
507     if (status != NV_OK)
508     {
509         NV_PRINTF(LEVEL_ERROR, "Channel operation failures during memcopy\n");
510         return status;
511     }
512 
513     pChannel->lastSubmittedEntry = putIndex;
514 
515     return status;
516 }
517 
518 
519 NV_STATUS
520 sec2utilsMemset_IMPL
521 (
522     Sec2Utils *pSec2Utils,
523     SEC2UTILS_MEMSET_PARAMS *pParams
524 )
525 {
526     OBJCHANNEL *pChannel = pSec2Utils->pChannel;
527     NV_STATUS   status = NV_OK;
528     RMTIMEOUT   timeout;
529 
530     NvU32 pteArraySize;
531     NvU64 offset, memsetLength, size, pageGranularity;
532     NvBool bContiguous;
533 
534     MEMORY_DESCRIPTOR *pMemDesc = pParams->pMemDesc;
535     CHANNEL_PB_INFO channelPbInfo = {0};
536 
537     if (pMemDesc == NULL)
538     {
539         NV_PRINTF(LEVEL_ERROR, "Invalid memdesc for Sec2Utils memset.\n");
540         return NV_ERR_INVALID_ARGUMENT;
541     }
542 
543     if ((memdescGetAddressSpace(pMemDesc) != ADDR_FBMEM) ||
544         (pMemDesc->pGpu != pSec2Utils->pChannel->pGpu))
545     {
546         NV_PRINTF(LEVEL_ERROR, "Invalid memory descriptor passed.\n");
547         return NV_ERR_INVALID_ARGUMENT;
548     }
549 
550     size = memdescGetSize(pMemDesc);
551     pteArraySize = memdescGetPteArraySize(pMemDesc, AT_GPU);
552     bContiguous = (pMemDesc->_flags & MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS) || (pteArraySize == 1);
553 
554     if (pParams->offset >= size)
555     {
556         NV_PRINTF(LEVEL_ERROR, "Invalid offset passed for the memdesc.\n");
557         return NV_ERR_INVALID_ARGUMENT;
558     }
559 
560     if ((pParams->length == 0) || (pParams->length > (size - pParams->offset)))
561     {
562         NV_PRINTF(LEVEL_ERROR, "Invalid memset length passed.\n");
563         return NV_ERR_INVALID_ARGUMENT;
564     }
565 
566     // Make sure free auth tag buffers are available
567     NvU32 totalSlots = pSec2Utils->semaMthdAuthTagBuf.size / SHA_256_HASH_SIZE_BYTES;
568     NvU32 nextIndex = (pSec2Utils->lastSubmittedPayload + 1) % totalSlots;
569     NvU32 lastCompleted = sec2utilsUpdateProgress(pSec2Utils);
570     NvU32 currentIndex = lastCompleted % totalSlots;
571 
572     gpuSetTimeout(pSec2Utils->pGpu, GPU_TIMEOUT_DEFAULT, &timeout, GPU_TIMEOUT_FLAGS_BYPASS_THREAD_STATE);
573     while (nextIndex == currentIndex)
574     {
575         status = gpuCheckTimeout(pSec2Utils->pGpu, &timeout);
576         if (status != NV_OK)
577         {
578             NV_PRINTF(LEVEL_ERROR, "Failed to finish previous scrub op before re-using method stream auth tag buf: lastCompleted = %d lastSubmitted = %lld\n",
579                lastCompleted, pSec2Utils->lastSubmittedPayload);
580             return status;
581         }
582         lastCompleted = sec2utilsUpdateProgress(pSec2Utils);
583         currentIndex = lastCompleted % totalSlots;
584         osSpinLoop();
585     }
586 
587     channelPbInfo.payload = pSec2Utils->lastSubmittedPayload + 1;
588     pSec2Utils->lastSubmittedPayload = channelPbInfo.payload;
589 
590     channelPbInfo.dstAddressSpace = memdescGetAddressSpace(pMemDesc);
591     channelPbInfo.dstCpuCacheAttrib = pMemDesc->_cpuCacheAttrib;
592 
593     pageGranularity = pMemDesc->pageArrayGranularity;
594     memsetLength = pParams->length;
595     offset = pParams->offset;
596 
597     do
598     {
599         NvU64 maxContigSize = bContiguous ? memsetLength : (pageGranularity - offset % pageGranularity);
600         NvU32 memsetSizeContig = (NvU32)NV_MIN(NV_MIN(memsetLength, maxContigSize), NVCBA2_DECRYPT_SCRUB_SIZE_MAX_BYTES);
601 
602         channelPbInfo.dstAddr = memdescGetPhysAddr(pMemDesc, AT_GPU, offset);
603 
604         NV_PRINTF(LEVEL_INFO, "Sec2Utils Memset dstAddr: %llx,  size: %x\n",
605                   channelPbInfo.dstAddr, memsetSizeContig);
606 
607         channelPbInfo.size = memsetSizeContig;
608 
609         status = _sec2utilsSubmitPushBuffer(pSec2Utils, pChannel, memsetSizeContig == memsetLength, nextIndex, &channelPbInfo);
610         if (status != NV_OK)
611         {
612             NV_PRINTF(LEVEL_ERROR, "Cannot submit push buffer for memset.\n");
613             return status;
614         }
615 
616         memsetLength -= memsetSizeContig;
617         offset       += memsetSizeContig;
618     } while (memsetLength != 0);
619 
620     NV_PRINTF(LEVEL_INFO, "Async memset payload returned: 0x%x\n", channelPbInfo.payload);
621     pParams->submittedWorkId = channelPbInfo.payload;
622 
623     return status;
624 }
625 
626 // This function updates pSec2Utils->lastCompletedPayload and handles wrap-around
627 NvU64
628 sec2utilsUpdateProgress_IMPL
629 (
630     Sec2Utils *pSec2Utils
631 )
632 {
633     NV_ASSERT((pSec2Utils != NULL) && (pSec2Utils->pChannel != NULL));
634 
635     NvU32 hwCurrentCompletedPayload = 0;
636     NvU64 swLastCompletedPayload = pSec2Utils->lastCompletedPayload;
637 
638     //
639     // Sec2Utils uses 64 bit index to track the work submitted. But HW supports
640     // only 32 bit semaphore. The current completed Id is calculated here, based
641     // on the lastSubmittedPayload and current HW semaphore value.
642     //
643     hwCurrentCompletedPayload = READ_CHANNEL_PAYLOAD_SEMA(pSec2Utils->pChannel);
644 
645     // No work has been completed since we checked last time
646     if (hwCurrentCompletedPayload == (NvU32)swLastCompletedPayload)
647     {
648         return swLastCompletedPayload;
649     }
650 
651     // Check for wrap around case. Increment the upper 32 bits
652     if (hwCurrentCompletedPayload < (NvU32)swLastCompletedPayload)
653     {
654         swLastCompletedPayload += 0x100000000ULL;
655     }
656 
657     // Update lower 32 bits regardless if wrap-around happened
658     swLastCompletedPayload &= 0xFFFFFFFF00000000ULL;
659     swLastCompletedPayload |= (NvU64)hwCurrentCompletedPayload;
660 
661     pSec2Utils->lastCompletedPayload = swLastCompletedPayload;
662     return swLastCompletedPayload;
663 }
664