1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 
25 #include "core/core.h"
26 #include "kernel/gpu/intr/intr.h"
27 #include "kernel/gpu/mem_mgr/channel_utils.h"
28 #include "rmapi/rs_utils.h"
29 #include "utils/nvassert.h"
30 #include "core/prelude.h"
31 #include "core/locks.h"
32 #include "gpu/mem_mgr/sec2_utils.h"
33 #include "kernel/gpu/mem_mgr/ce_utils_sizes.h"
34 #include "kernel/gpu/mig_mgr/kernel_mig_manager.h"
35 #include "nvrm_registry.h"
36 #include "platform/chipset/chipset.h"
37 #include "gpu/mem_mgr/heap.h"
38 
39 
40 #include "class/clcba2.h" // HOPPER_SEC2_WORK_LAUNCH_A
41 #include "class/cl003e.h" // NV01_MEMORY_SYSTEM
42 #include "class/cl50a0.h" // NV50_MEMORY_VIRTUAL
43 
44 #include "class/cl0080.h"
45 
46 #include "gpu/conf_compute/conf_compute.h"
47 #include "gpu/conf_compute/ccsl.h"
48 
49 static NV_STATUS
50 _sec2GetClass(OBJGPU *pGpu, NvU32 *pClass)
51 {
52     NV_STATUS status;
53     NvU32 numClasses = 0;
54     NvU32 *pClassList = NULL;
55     NvU32 class = 0;
56 
57     NV_ASSERT_OR_RETURN(pClass != NULL, NV_ERR_INVALID_ARGUMENT);
58     NV_ASSERT_OK_OR_RETURN(gpuGetClassList(pGpu, &numClasses, NULL, ENG_SEC2));
59     NV_ASSERT_OR_RETURN((numClasses != 0), NV_ERR_NOT_SUPPORTED);
60 
61     pClassList = portMemAllocNonPaged(sizeof(*pClassList) * numClasses);
62     NV_ASSERT_OR_RETURN((pClassList != NULL), NV_ERR_INSUFFICIENT_RESOURCES);
63 
64     status = gpuGetClassList(pGpu, &numClasses, pClassList, ENG_SEC2);
65     if (status == NV_OK)
66     {
67         for (NvU32 i = 0; i < numClasses; i++)
68         {
69             class = NV_MAX(class, pClassList[i]);
70         }
71         if (class == 0)
72         {
73             status = NV_ERR_INVALID_STATE;
74         }
75         *pClass = class;
76     }
77     portMemFree(pClassList);
78     return status;
79 }
80 
81 static NV_STATUS
82 _sec2AllocAndMapBuffer
83 (
84     Sec2Utils *pSec2Utils,
85     NvU32 size,
86     SEC2UTILS_BUFFER_INFO *pSec2Buf
87 )
88 {
89     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
90     NV_MEMORY_ALLOCATION_PARAMS memAllocParams;
91     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pSec2Utils->pGpu);
92 
93     pSec2Buf->size = size;
94 
95     // allocate the physmem
96     portMemSet(&memAllocParams, 0, sizeof(memAllocParams));
97     memAllocParams.owner     = HEAP_OWNER_RM_CLIENT_GENERIC;
98     memAllocParams.type      = NVOS32_TYPE_IMAGE;
99     memAllocParams.size      = pSec2Buf->size;
100     memAllocParams.attr      = DRF_DEF(OS32, _ATTR, _LOCATION,  _PCI) |
101                                DRF_DEF(OS32, _ATTR, _COHERENCY, _UNCACHED);
102     memAllocParams.attr2     = DRF_DEF(OS32, _ATTR2, _MEMORY_PROTECTION, _UNPROTECTED);
103     memAllocParams.flags     = 0;
104     memAllocParams.internalflags = NVOS32_ALLOC_INTERNAL_FLAGS_SKIP_SCRUB;
105 
106     NV_CHECK_OK_OR_RETURN(
107         LEVEL_ERROR,
108         pRmApi->AllocWithHandle(pRmApi,
109                                 pSec2Utils->hClient,
110                                 pSec2Utils->hDevice,
111                                 pSec2Buf->hPhysMem,
112                                 NV01_MEMORY_SYSTEM,
113                                 &memAllocParams,
114                                 sizeof(memAllocParams)));
115 
116     // allocate Virtual Memory
117     portMemSet(&memAllocParams, 0, sizeof(memAllocParams));
118     memAllocParams.owner     = HEAP_OWNER_RM_CLIENT_GENERIC;
119     memAllocParams.type      = NVOS32_TYPE_IMAGE;
120     memAllocParams.size      = pSec2Buf->size;
121     memAllocParams.attr      = DRF_DEF(OS32, _ATTR, _LOCATION, _PCI);
122     memAllocParams.attr2     = NVOS32_ATTR2_NONE;
123     memAllocParams.flags    |= NVOS32_ALLOC_FLAGS_VIRTUAL;
124     memAllocParams.hVASpace  = pSec2Buf->hVASpace;
125 
126     NV_CHECK_OK_OR_RETURN(
127         LEVEL_ERROR,
128         pRmApi->AllocWithHandle(pRmApi,
129                                 pSec2Utils->hClient,
130                                 pSec2Utils->hDevice,
131                                 pSec2Buf->hVirtMem,
132                                 NV50_MEMORY_VIRTUAL,
133                                 &memAllocParams,
134                                 sizeof(memAllocParams)));
135 
136     // map the buffer
137     OBJSYS *pSys = SYS_GET_INSTANCE();
138     OBJCL  *pCl = SYS_GET_CL(pSys);
139     NvU32 cacheSnoopFlag = 0;
140     if (pCl->getProperty(pCl, PDB_PROP_CL_IS_CHIPSET_IO_COHERENT))
141     {
142         cacheSnoopFlag = DRF_DEF(OS46, _FLAGS, _CACHE_SNOOP, _ENABLE);
143     }
144     NV_CHECK_OK_OR_RETURN(
145         LEVEL_ERROR,
146         pRmApi->Map(pRmApi, pSec2Utils->hClient, pSec2Utils->hDevice,
147                     pSec2Buf->hVirtMem, pSec2Buf->hPhysMem, 0, pSec2Buf->size,
148                     DRF_DEF(OS46, _FLAGS, _KERNEL_MAPPING, _ENABLE) | cacheSnoopFlag,
149                     &pSec2Buf->gpuVA));
150 
151     pSec2Buf->pMemDesc = memmgrMemUtilsGetMemDescFromHandle(pMemoryManager, pSec2Utils->hClient, pSec2Buf->hPhysMem);
152     return NV_OK;
153 }
154 
155 static NV_STATUS
156 _sec2InitBuffers
157 (
158     Sec2Utils *pSec2Utils
159 )
160 {
161     NV_ASSERT_OK_OR_RETURN(serverutilGenResourceHandle(pSec2Utils->hClient, &pSec2Utils->scrubMthdAuthTagBuf.hPhysMem));
162     NV_ASSERT_OK_OR_RETURN(serverutilGenResourceHandle(pSec2Utils->hClient, &pSec2Utils->scrubMthdAuthTagBuf.hVirtMem));
163     NV_ASSERT_OK_OR_RETURN(_sec2AllocAndMapBuffer(pSec2Utils, RM_PAGE_SIZE_64K, &pSec2Utils->scrubMthdAuthTagBuf));
164 
165     NV_ASSERT_OK_OR_RETURN(serverutilGenResourceHandle(pSec2Utils->hClient, &pSec2Utils->semaMthdAuthTagBuf.hPhysMem));
166     NV_ASSERT_OK_OR_RETURN(serverutilGenResourceHandle(pSec2Utils->hClient, &pSec2Utils->semaMthdAuthTagBuf.hVirtMem));
167     NV_ASSERT_OK_OR_RETURN(_sec2AllocAndMapBuffer(pSec2Utils, RM_PAGE_SIZE_64K, &pSec2Utils->semaMthdAuthTagBuf));
168     return NV_OK;
169 }
170 
171 NV_STATUS
172 sec2utilsConstruct_IMPL
173 (
174     Sec2Utils                    *pSec2Utils,
175     OBJGPU                       *pGpu,
176     KERNEL_MIG_GPU_INSTANCE      *pKernelMIGGPUInstance
177 )
178 {
179     NV_STATUS status = NV_OK;
180     NV_ASSERT_OR_RETURN(pGpu, NV_ERR_INVALID_STATE);
181     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
182 
183     ConfidentialCompute *pConfCompute = GPU_GET_CONF_COMPUTE(pGpu);
184     NV_ASSERT_OR_RETURN(((pConfCompute != NULL) && (pConfCompute->getProperty(pCC, PDB_PROP_CONFCOMPUTE_CC_FEATURE_ENABLED))),
185                           NV_ERR_NOT_SUPPORTED);
186 
187     pSec2Utils->pGpu = pGpu;
188 
189     // Allocate channel with RM internal client
190     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
191     RmClient *pClient = NULL;
192 
193     OBJCHANNEL *pChannel = (OBJCHANNEL *) portMemAllocNonPaged(sizeof(OBJCHANNEL));
194     NV_ASSERT_OR_RETURN(pChannel != NULL, NV_ERR_INSUFFICIENT_RESOURCES);
195     portMemSet(pChannel, 0, sizeof(OBJCHANNEL));
196 
197     // Allocate client
198     NV_ASSERT_OK_OR_GOTO(status, pRmApi->AllocWithHandle(pRmApi, NV01_NULL_OBJECT, NV01_NULL_OBJECT,
199                                                          NV01_NULL_OBJECT, NV01_ROOT, &pSec2Utils->hClient,
200                                                          sizeof(pSec2Utils->hClient)), cleanup);
201 
202     pChannel->hClient = pSec2Utils->hClient;
203     pClient = serverutilGetClientUnderLock(pChannel->hClient);
204     NV_ASSERT_OR_GOTO(pClient != NULL, free_client);
205 
206     NV_ASSERT_OK_OR_GOTO(status, serverGetClientUnderLock(&g_resServ, pChannel->hClient, &pChannel->pRsClient), free_client);
207 
208     NV_ASSERT_OK_OR_GOTO(status, clientSetHandleGenerator(staticCast(pClient, RsClient), 1U, ~0U - 1U), free_client);
209 
210     pChannel->bClientAllocated = NV_TRUE;
211     pChannel->pGpu = pGpu;
212 
213     pChannel->deviceId = NV01_NULL_OBJECT;
214     pChannel->subdeviceId = NV01_NULL_OBJECT;
215 
216     pChannel->pKernelMIGGpuInstance = pKernelMIGGPUInstance;
217 
218     pChannel->hVASpaceId = NV01_NULL_OBJECT;
219     pChannel->bUseVasForCeCopy = NV_FALSE;
220 
221     pChannel->type = SWL_SCRUBBER_CHANNEL;
222     pChannel->engineType = RM_ENGINE_TYPE_SEC2;
223 
224     // Detect if we can enable fast scrub on this channel
225     NV_ASSERT_OK_OR_GOTO(status, _sec2GetClass(pGpu, &pSec2Utils->sec2Class), free_client);
226     pChannel->sec2Class = pSec2Utils->sec2Class;
227 
228     // Set up various channel resources
229     NV_ASSERT_OK_OR_GOTO(status, channelSetupIDs(pChannel, pGpu, NV_FALSE, IS_MIG_IN_USE(pGpu)), free_client);
230 
231     channelSetupChannelBufferSizes(pChannel);
232 
233     // save original values
234     NvU32 instLocOverrides4 = pGpu->instLocOverrides4;
235     NvU32 instLocOverrides = pGpu->instLocOverrides;
236 
237     // Force PB, GPFIFO, notifier and userd to sysmem before allocating channel
238     pGpu->instLocOverrides4 = FLD_SET_DRF(_REG_STR_RM, _INST_LOC_4, _CHANNEL_PUSHBUFFER, _NCOH, pGpu->instLocOverrides4);
239     pGpu->instLocOverrides  = FLD_SET_DRF(_REG_STR_RM, _INST_LOC, _USERD, _NCOH, pGpu->instLocOverrides);
240 
241     pChannel->engineType = NV2080_ENGINE_TYPE_SEC2;
242 
243     pMemoryManager->bScrubChannelSetupInProgress = NV_TRUE;
244     NV_ASSERT_OK_OR_GOTO(status, memmgrMemUtilsChannelInitialize_HAL(pGpu, pMemoryManager, pChannel), free_channel);
245     pMemoryManager->bScrubChannelSetupInProgress = NV_FALSE;
246 
247     pSec2Utils->hDevice = pChannel->deviceId;
248     pSec2Utils->hSubdevice = pChannel->subdeviceId;
249 
250     NV_PRINTF(LEVEL_INFO, "Channel alloc successful for Sec2Utils\n");
251     pSec2Utils->pChannel = pChannel;
252 
253     NV_ASSERT_OK_OR_GOTO(status, memmgrMemUtilsSec2CtxInit_HAL(pGpu, pMemoryManager, pChannel), free_channel);
254 
255     // restore original values
256     pGpu->instLocOverrides4 = instLocOverrides4;
257     pGpu->instLocOverrides = instLocOverrides;
258 
259     pSec2Utils->lastSubmittedPayload = 0;
260     pSec2Utils->lastCompletedPayload = 0;
261     pSec2Utils->authTagPutIndex = 0;
262     pSec2Utils->authTagGetIndex = 0;
263 
264     NV_ASSERT_OK_OR_GOTO(status, _sec2InitBuffers(pSec2Utils), free_channel);
265 
266     NV_ASSERT_OK_OR_GOTO(status, ccslContextInitViaChannel(&pSec2Utils->pCcslCtx, pSec2Utils->hClient, pChannel->channelId), free_channel);
267 
268     return status;
269 
270 free_channel:
271     pRmApi->Free(pRmApi, pChannel->hClient, pChannel->channelId);
272 
273 free_client:
274     pRmApi->Free(pRmApi, pChannel->hClient, pChannel->hClient);
275 
276 cleanup:
277     portMemFree(pChannel);
278     return status;
279 }
280 
281 void
282 sec2utilsDestruct_IMPL
283 (
284     Sec2Utils *pSec2Utils
285 )
286 {
287     OBJCHANNEL *pChannel = pSec2Utils->pChannel;
288     OBJGPU *pGpu = pSec2Utils->pGpu;
289     MemoryManager *pMemoryManager = NULL;
290     RM_API *pRmApi = NULL;
291 
292     // Sanity checks
293     if ((pGpu == NULL) || (pChannel == NULL))
294     {
295         NV_PRINTF(LEVEL_WARNING, "Possible double-free of Sec2Utils!\n");
296         return;
297     }
298     else if (pGpu != pChannel->pGpu)
299     {
300         NV_PRINTF(LEVEL_ERROR, "Bad state during sec2Utils teardown!\n");
301         return;
302     }
303 
304     pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
305     pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
306 
307     ccslContextClear(pSec2Utils->pCcslCtx);
308 
309     if ((pChannel->bClientUserd) && (pChannel->pControlGPFifo != NULL))
310     {
311         if (kbusIsBarAccessBlocked(GPU_GET_KERNEL_BUS(pGpu)))
312         {
313             //
314             // When PCIE is blocked, mappings should be created, used and torn
315             // down when they are used
316             //
317             NV_PRINTF(LEVEL_ERROR, "Leaked USERD mapping from ceUtils!\n");
318         }
319         else
320         {
321             memmgrMemDescEndTransfer(pMemoryManager, pChannel->pUserdMemdesc, TRANSFER_FLAGS_USE_BAR1);
322             pChannel->pControlGPFifo = NULL;
323         }
324     }
325 
326     if (pChannel->pbCpuVA != NULL)
327     {
328         if (kbusIsBarAccessBlocked(GPU_GET_KERNEL_BUS(pGpu)))
329         {
330             NV_PRINTF(LEVEL_ERROR, "Leaked pushbuffer mapping!\n");
331         }
332         else
333         {
334             memmgrMemDescEndTransfer(pMemoryManager, pChannel->pChannelBufferMemdesc, TRANSFER_FLAGS_USE_BAR1);
335             pChannel->pbCpuVA = NULL;
336         }
337     }
338 
339     if (pChannel->pTokenFromNotifier != NULL)
340     {
341         if (kbusIsBarAccessBlocked(GPU_GET_KERNEL_BUS(pGpu)))
342         {
343             NV_PRINTF(LEVEL_ERROR, "Leaked notifier mapping!\n");
344         }
345         else
346         {
347             memmgrMemDescEndTransfer(pMemoryManager, pChannel->pErrNotifierMemdesc, TRANSFER_FLAGS_USE_BAR1);
348             pChannel->pTokenFromNotifier = NULL;
349         }
350     }
351 
352     pRmApi->Free(pRmApi, pSec2Utils->hClient, pSec2Utils->scrubMthdAuthTagBuf.hPhysMem);
353     pRmApi->Free(pRmApi, pSec2Utils->hClient, pSec2Utils->scrubMthdAuthTagBuf.hVirtMem);
354 
355     pRmApi->Free(pRmApi, pSec2Utils->hClient, pSec2Utils->semaMthdAuthTagBuf.hPhysMem);
356     pRmApi->Free(pRmApi, pSec2Utils->hClient, pSec2Utils->semaMthdAuthTagBuf.hVirtMem);
357 
358     pRmApi->Free(pRmApi, pChannel->hClient, pChannel->channelId);
359     pRmApi->Free(pRmApi, pChannel->hClient, pChannel->errNotifierIdPhys);
360     pRmApi->Free(pRmApi, pChannel->hClient, pChannel->pushBufferId);
361     pRmApi->Free(pRmApi, pChannel->hClient, pChannel->errNotifierIdVirt);
362     pRmApi->Free(pRmApi, pChannel->hClient, pChannel->hVASpaceId);
363     pRmApi->Free(pRmApi, pChannel->hClient, pChannel->deviceId);
364 
365     // Resource server makes sure no leak can occur
366     pRmApi->Free(pRmApi, pChannel->hClient, pChannel->hClient);
367     portMemFree(pChannel);
368 }
369 
370 void
371 sec2utilsServiceInterrupts_IMPL(Sec2Utils *pSec2Utils)
372 {
373     OBJCHANNEL *pChannel = pSec2Utils->pChannel;
374 
375     //
376     // FIXME: Bug 2463959: objmemscrub is called with the rmDeviceGpuLock in the
377     // heapFree_IMPL->_stdmemPmaFree->pmaFreePages->scrubSubmitPages path.
378     // This can result in RM waiting for scrubbing to complete and yielding while holding the
379     // rmDeviceGpuLock. This can lead to deadlock.
380     // Instead, if the lock is held, service any interrupts on SEC2 to help the engine make progress.
381     // Bug 2527660 is filed to remove this change.
382     //
383     // pChannel is null when PMA scrub requests are handled in vGPU plugin.
384     // In this case vGpu plugin allocates scrubber channel in PF domain so
385     // above mention deadlock is not present here.
386     //
387     if ((pChannel != NULL) && (rmDeviceGpuLockIsOwner(pChannel->pGpu->gpuInstance)))
388     {
389         channelServiceScrubberInterrupts(pChannel);
390     }
391     else
392     {
393         osSchedule();
394     }
395 }
396 
397 static NvU32
398 _sec2utilsUpdateGetPtr
399 (
400     Sec2Utils *pSec2Utils
401 )
402 {
403     return channelReadChannelMemdesc(pSec2Utils->pChannel, pSec2Utils->pChannel->authTagBufSemaOffset);
404 }
405 
406 static NV_STATUS
407 _sec2utilsGetNextAuthTagSlot
408 (
409     Sec2Utils *pSec2Utils
410 )
411 {
412     NvU32 totalSlots = pSec2Utils->scrubMthdAuthTagBuf.size / SHA_256_HASH_SIZE_BYTES;
413     NvU32 nextPut = (pSec2Utils->authTagPutIndex + 1) % totalSlots;
414 
415     // check if slots have freed up
416     NV_STATUS status = NV_OK;
417     RMTIMEOUT timeout;
418     gpuSetTimeout(pSec2Utils->pGpu, GPU_TIMEOUT_DEFAULT, &timeout, GPU_TIMEOUT_FLAGS_BYPASS_THREAD_STATE);
419     while (nextPut == pSec2Utils->authTagGetIndex)
420     {
421         status = gpuCheckTimeout(pSec2Utils->pGpu, &timeout);
422         if (status != NV_OK)
423         {
424             NV_PRINTF(LEVEL_ERROR, "Timed out waiting for next auth tag buf slot to free up: nextPut = %d get = %d\n",
425                 nextPut, _sec2utilsUpdateGetPtr(pSec2Utils));
426             return status;
427         }
428         pSec2Utils->authTagGetIndex = _sec2utilsUpdateGetPtr(pSec2Utils);
429         osSpinLoop();
430     }
431 
432     pSec2Utils->authTagPutIndex = nextPut;
433     return NV_OK;
434 }
435 
436 static NV_STATUS
437 _sec2utilsSubmitPushBuffer
438 (
439     Sec2Utils        *pSec2Utils,
440     OBJCHANNEL       *pChannel,
441     NvBool            bInsertFinishPayload,
442     NvU32             nextIndex,
443     CHANNEL_PB_INFO  *pChannelPbInfo
444 )
445 {
446     NV_STATUS status = NV_OK;
447     NvU32 methodsLength = 0;
448     NvU32 putIndex = 0;
449 
450     NV_ASSERT_OR_RETURN(pChannelPbInfo != NULL, NV_ERR_INVALID_ARGUMENT);
451     NV_ASSERT_OR_RETURN(pChannel != NULL, NV_ERR_INVALID_ARGUMENT);
452 
453     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pChannel->pGpu);
454     NvBool bReleaseMapping = NV_FALSE;
455 
456     //
457     // Use BAR1 if CPU access is allowed, otherwise allocate and init shadow
458     // buffer for DMA access
459     //
460     NvU32 transferFlags = (TRANSFER_FLAGS_USE_BAR1     |
461                            TRANSFER_FLAGS_SHADOW_ALLOC |
462                            TRANSFER_FLAGS_SHADOW_INIT_MEM);
463     NV_PRINTF(LEVEL_INFO, "Actual size of copying to be pushed: %x\n", pChannelPbInfo->size);
464 
465     status = channelWaitForFreeEntry(pChannel, &putIndex);
466     if (status != NV_OK)
467     {
468         NV_PRINTF(LEVEL_ERROR, "Cannot get putIndex.\n");
469         return status;
470     }
471 
472     if (pChannel->pbCpuVA == NULL)
473     {
474         pChannel->pbCpuVA = memmgrMemDescBeginTransfer(pMemoryManager, pChannel->pChannelBufferMemdesc,
475                                                        transferFlags);
476         bReleaseMapping = NV_TRUE;
477     }
478     NV_ASSERT_OR_RETURN(pChannel->pbCpuVA != NULL, NV_ERR_GENERIC);
479 
480     NV_ASSERT_OK_OR_RETURN(_sec2utilsGetNextAuthTagSlot(pSec2Utils));
481     NV_ASSERT_OK_OR_RETURN(channelFillSec2Pb(pChannel, putIndex, bInsertFinishPayload, pChannelPbInfo, pSec2Utils->pCcslCtx,
482                                              pSec2Utils->scrubMthdAuthTagBuf.pMemDesc, pSec2Utils->semaMthdAuthTagBuf.pMemDesc,
483                                              pSec2Utils->scrubMthdAuthTagBuf.gpuVA, pSec2Utils->authTagPutIndex,
484                                              pSec2Utils->semaMthdAuthTagBuf.gpuVA, nextIndex, &methodsLength));
485 
486     if (bReleaseMapping)
487     {
488         memmgrMemDescEndTransfer(pMemoryManager, pChannel->pChannelBufferMemdesc, transferFlags);
489         pChannel->pbCpuVA = NULL;
490     }
491 
492     if (methodsLength == 0)
493     {
494         NV_PRINTF(LEVEL_ERROR, "Cannot push methods to channel.\n");
495         return NV_ERR_NO_FREE_FIFOS;
496     }
497 
498     //
499     // Pushbuffer can be written in a batch, but GPFIFO and doorbell require
500     // careful ordering so we do each write one-by-one
501     //
502     status = channelFillGpFifo(pChannel, putIndex, methodsLength);
503     if (status != NV_OK)
504     {
505         NV_PRINTF(LEVEL_ERROR, "Channel operation failures during memcopy\n");
506         return status;
507     }
508 
509     pChannel->lastSubmittedEntry = putIndex;
510 
511     return status;
512 }
513 
514 
515 NV_STATUS
516 sec2utilsMemset_IMPL
517 (
518     Sec2Utils *pSec2Utils,
519     SEC2UTILS_MEMSET_PARAMS *pParams
520 )
521 {
522     OBJCHANNEL *pChannel = pSec2Utils->pChannel;
523     NV_STATUS   status = NV_OK;
524     RMTIMEOUT   timeout;
525 
526     NvU32 pteArraySize;
527     NvU64 offset, memsetLength, size, pageGranularity;
528     NvBool bContiguous;
529 
530     MEMORY_DESCRIPTOR *pMemDesc = pParams->pMemDesc;
531     CHANNEL_PB_INFO channelPbInfo = {0};
532 
533     if (pMemDesc == NULL)
534     {
535         NV_PRINTF(LEVEL_ERROR, "Invalid memdesc for Sec2Utils memset.\n");
536         return NV_ERR_INVALID_ARGUMENT;
537     }
538 
539     if ((memdescGetAddressSpace(pMemDesc) != ADDR_FBMEM) ||
540         (pMemDesc->pGpu != pSec2Utils->pChannel->pGpu))
541     {
542         NV_PRINTF(LEVEL_ERROR, "Invalid memory descriptor passed.\n");
543         return NV_ERR_INVALID_ARGUMENT;
544     }
545 
546     size = memdescGetSize(pMemDesc);
547     pteArraySize = memdescGetPteArraySize(pMemDesc, AT_GPU);
548     bContiguous = (pMemDesc->_flags & MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS) || (pteArraySize == 1);
549 
550     if (pParams->offset >= size)
551     {
552         NV_PRINTF(LEVEL_ERROR, "Invalid offset passed for the memdesc.\n");
553         return NV_ERR_INVALID_ARGUMENT;
554     }
555 
556     if ((pParams->length == 0) || (pParams->length > (size - pParams->offset)))
557     {
558         NV_PRINTF(LEVEL_ERROR, "Invalid memset length passed.\n");
559         return NV_ERR_INVALID_ARGUMENT;
560     }
561 
562     // Make sure free auth tag buffers are available
563     NvU32 totalSlots = pSec2Utils->semaMthdAuthTagBuf.size / SHA_256_HASH_SIZE_BYTES;
564     NvU32 nextIndex = (pSec2Utils->lastSubmittedPayload + 1) % totalSlots;
565     NvU32 lastCompleted = sec2utilsUpdateProgress(pSec2Utils);
566     NvU32 currentIndex = lastCompleted % totalSlots;
567 
568     gpuSetTimeout(pSec2Utils->pGpu, GPU_TIMEOUT_DEFAULT, &timeout, GPU_TIMEOUT_FLAGS_BYPASS_THREAD_STATE);
569     while (nextIndex == currentIndex)
570     {
571         status = gpuCheckTimeout(pSec2Utils->pGpu, &timeout);
572         if (status != NV_OK)
573         {
574             NV_PRINTF(LEVEL_ERROR, "Failed to finish previous scrub op before re-using method stream auth tag buf: lastCompleted = %d lastSubmitted = %lld\n",
575                lastCompleted, pSec2Utils->lastSubmittedPayload);
576             return status;
577         }
578         lastCompleted = sec2utilsUpdateProgress(pSec2Utils);
579         currentIndex = lastCompleted % totalSlots;
580         osSpinLoop();
581     }
582 
583     channelPbInfo.payload = pSec2Utils->lastSubmittedPayload + 1;
584     pSec2Utils->lastSubmittedPayload = channelPbInfo.payload;
585 
586     channelPbInfo.dstAddressSpace = memdescGetAddressSpace(pMemDesc);
587     channelPbInfo.dstCpuCacheAttrib = pMemDesc->_cpuCacheAttrib;
588 
589     pageGranularity = pMemDesc->pageArrayGranularity;
590     memsetLength = pParams->length;
591     offset = pParams->offset;
592 
593     do
594     {
595         NvU64 maxContigSize = bContiguous ? memsetLength : (pageGranularity - offset % pageGranularity);
596         NvU32 memsetSizeContig = (NvU32)NV_MIN(NV_MIN(memsetLength, maxContigSize), NVCBA2_DECRYPT_SCRUB_SIZE_MAX_BYTES);
597 
598         channelPbInfo.dstAddr = memdescGetPhysAddr(pMemDesc, AT_GPU, offset);
599 
600         NV_PRINTF(LEVEL_INFO, "Sec2Utils Memset dstAddr: %llx,  size: %x\n",
601                   channelPbInfo.dstAddr, memsetSizeContig);
602 
603         channelPbInfo.size = memsetSizeContig;
604 
605         status = _sec2utilsSubmitPushBuffer(pSec2Utils, pChannel, memsetSizeContig == memsetLength, nextIndex, &channelPbInfo);
606         if (status != NV_OK)
607         {
608             NV_PRINTF(LEVEL_ERROR, "Cannot submit push buffer for memset.\n");
609             return status;
610         }
611 
612         memsetLength -= memsetSizeContig;
613         offset       += memsetSizeContig;
614     } while (memsetLength != 0);
615 
616     NV_PRINTF(LEVEL_INFO, "Async memset payload returned: 0x%x\n", channelPbInfo.payload);
617     pParams->submittedWorkId = channelPbInfo.payload;
618 
619     return status;
620 }
621 
622 // This function updates pSec2Utils->lastCompletedPayload and handles wrap-around
623 NvU64
624 sec2utilsUpdateProgress_IMPL
625 (
626     Sec2Utils *pSec2Utils
627 )
628 {
629     NV_ASSERT((pSec2Utils != NULL) && (pSec2Utils->pChannel != NULL));
630 
631     NvU32 hwCurrentCompletedPayload = 0;
632     NvU64 swLastCompletedPayload = pSec2Utils->lastCompletedPayload;
633 
634     //
635     // Sec2Utils uses 64 bit index to track the work submitted. But HW supports
636     // only 32 bit semaphore. The current completed Id is calculated here, based
637     // on the lastSubmittedPayload and current HW semaphore value.
638     //
639     hwCurrentCompletedPayload = READ_CHANNEL_PAYLOAD_SEMA(pSec2Utils->pChannel);
640 
641     // No work has been completed since we checked last time
642     if (hwCurrentCompletedPayload == (NvU32)swLastCompletedPayload)
643     {
644         return swLastCompletedPayload;
645     }
646 
647     // Check for wrap around case. Increment the upper 32 bits
648     if (hwCurrentCompletedPayload < (NvU32)swLastCompletedPayload)
649     {
650         swLastCompletedPayload += 0x100000000ULL;
651     }
652 
653     // Update lower 32 bits regardless if wrap-around happened
654     swLastCompletedPayload &= 0xFFFFFFFF00000000ULL;
655     swLastCompletedPayload |= (NvU64)hwCurrentCompletedPayload;
656 
657     pSec2Utils->lastCompletedPayload = swLastCompletedPayload;
658     return swLastCompletedPayload;
659 }
660