1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "kernel/gpu/intr/intr.h"
25 #include "kernel/gpu/fifo/kernel_fifo.h"
26 #include "kernel/gpu/fifo/kernel_channel.h"
27 #include "kernel/gpu/mig_mgr/kernel_mig_manager.h"
28 #include "utils/nvassert.h"
29 #include "core/locks.h"
30 #include "gpu/mem_mgr/mem_mgr.h"
31 
32 #include "kernel/gpu/mem_mgr/ce_utils_sizes.h"
33 #include "kernel/gpu/mem_mgr/channel_utils.h"
34 #include "class/clcba2.h"
35 
36 #define SEC2_WL_METHOD_ARRAY_SIZE 16
37 #define SHA_256_HASH_SIZE_BYTE  32
38 
39 /* Static helper functions */
40 static NvU32 channelPushMemoryProperties(OBJCHANNEL *pChannel, CHANNEL_PB_INFO *pChannelPbInfo, NvU32 **ppPtr);
41 static void channelPushMethod(OBJCHANNEL *pChannel, CHANNEL_PB_INFO *pChannelPbInfo,
42                               NvBool bPipelined, NvBool bInsertFinishPayload,
43                               NvU32 launchType, NvU32 semaValue, NvU32 **ppPtr);
44 
45 /* Public APIs */
46 NV_STATUS
47 channelSetupIDs
48 (
49     OBJCHANNEL *pChannel,
50     OBJGPU     *pGpu,
51     NvBool      bUseVasForCeCopy,
52     NvBool      bMIGInUse
53 )
54 {
55     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
56                           serverutilGenResourceHandle(pChannel->hClient, &pChannel->physMemId));
57 
58     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
59                           serverutilGenResourceHandle(pChannel->hClient, &pChannel->channelId));
60 
61     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
62                           serverutilGenResourceHandle(pChannel->hClient, &pChannel->errNotifierIdVirt));
63 
64     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
65                           serverutilGenResourceHandle(pChannel->hClient, &pChannel->errNotifierIdPhys));
66 
67     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
68                           serverutilGenResourceHandle(pChannel->hClient, &pChannel->engineObjectId));
69 
70     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
71                           serverutilGenResourceHandle(pChannel->hClient, &pChannel->eventId));
72 
73     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
74                           serverutilGenResourceHandle(pChannel->hClient, &pChannel->pushBufferId));
75 
76     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
77                           serverutilGenResourceHandle(pChannel->hClient, &pChannel->doorbellRegionHandle));
78 
79     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
80                           serverutilGenResourceHandle(pChannel->hClient, &pChannel->hUserD));
81 
82     // For physical CE channels, we will use RM internal VAS to map channel buffers
83     NV_ASSERT(pChannel->hVASpaceId == NV01_NULL_OBJECT);
84 
85     if (bUseVasForCeCopy || (IS_GSP_CLIENT(pGpu) && bMIGInUse))
86     {
87         NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
88                               serverutilGenResourceHandle(pChannel->hClient, &pChannel->hVASpaceId));
89     }
90 
91     return NV_OK;
92 }
93 
94 void
95 channelSetupChannelBufferSizes
96 (
97     OBJCHANNEL *pChannel
98 )
99 {
100     NV_ASSERT_OR_RETURN_VOID(pChannel != NULL);
101     NV_ASSERT_OR_RETURN_VOID(pChannel->type < MAX_CHANNEL_TYPE);
102 
103     if (pChannel->type == SWL_SCRUBBER_CHANNEL)
104     {
105         pChannel->methodSizePerBlock = SEC2_METHOD_SIZE_PER_BLOCK;
106     }
107     else
108     {
109         pChannel->methodSizePerBlock = CE_METHOD_SIZE_PER_BLOCK;
110         if (pChannel->type == FAST_SCRUBBER_CHANNEL)
111         {
112             pChannel->methodSizePerBlock = FAST_SCRUBBER_METHOD_SIZE_PER_BLOCK;
113         }
114     }
115     pChannel->channelNotifierSize = CHANNEL_NOTIFIER_SIZE;
116     pChannel->channelNumGpFifioEntries = NUM_COPY_BLOCKS;
117 
118     // These sizes depend on size of each block of pushbuffer methods
119     pChannel->channelPbSize = pChannel->methodSizePerBlock * NUM_COPY_BLOCKS;
120     pChannel->channelSize = pChannel->channelPbSize + GPFIFO_SIZE +
121                             CHANNEL_HOST_SEMAPHORE_SIZE + CHANNEL_ENGINE_SEMAPHORE_SIZE;
122     if (pChannel->type == SWL_SCRUBBER_CHANNEL)
123     {
124         pChannel->channelSize = pChannel->channelSize + SEC2_AUTH_TAG_BUF_SEMAPHORE_SIZE;
125     }
126     pChannel->semaOffset = pChannel->channelPbSize + GPFIFO_SIZE;
127     pChannel->finishPayloadOffset = pChannel->semaOffset + CHANNEL_HOST_SEMAPHORE_SIZE;
128     pChannel->authTagBufSemaOffset = pChannel->finishPayloadOffset + CHANNEL_ENGINE_SEMAPHORE_SIZE;
129 }
130 
131 NvU32
132 channelReadChannelMemdesc
133 (
134     OBJCHANNEL *pChannel,
135     NvU32       offset
136 )
137 {
138     NV_ASSERT_OR_RETURN(pChannel != NULL, 0);
139     NV_ASSERT_OR_RETURN(pChannel->pGpu != NULL, 0);
140 
141     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pChannel->pGpu);
142     NvBool bReleaseMapping = NV_FALSE;
143     NvU32 result = 0;
144 
145     //
146     // Use BAR1 if CPU access is allowed, otherwise allocate and init shadow
147     // buffer for DMA access
148     //
149     NvU32 transferFlags = (TRANSFER_FLAGS_USE_BAR1     |
150                            TRANSFER_FLAGS_SHADOW_ALLOC |
151                            TRANSFER_FLAGS_SHADOW_INIT_MEM);
152 
153     if (pChannel->pbCpuVA == NULL)
154     {
155         pChannel->pbCpuVA = memmgrMemDescBeginTransfer(pMemoryManager, pChannel->pChannelBufferMemdesc,
156                                                        transferFlags);
157         bReleaseMapping = NV_TRUE;
158     }
159 
160     NV_ASSERT_OR_RETURN(pChannel->pbCpuVA != NULL, 0);
161 
162     result = MEM_RD32((NvU8*)pChannel->pbCpuVA + offset);
163 
164     if (bReleaseMapping)
165     {
166         memmgrMemDescEndTransfer(pMemoryManager, pChannel->pChannelBufferMemdesc, transferFlags);
167         pChannel->pbCpuVA = NULL;
168     }
169 
170     return result;
171 }
172 
173 /*!
174  * Service any interrupts which may block the CE or SEC2 scrubber associated
175  * with this channel from making forward progress.  That includes interrupts
176  * from the CE/SEC2 engine as well as host/esched that is processing the
177  * pushbuffer.
178  */
179 void
180 channelServiceScrubberInterrupts(
181     OBJCHANNEL *pChannel
182 )
183 {
184     OBJGPU *pGpu = pChannel->pGpu;
185     Intr *pIntr = GPU_GET_INTR(pGpu);
186     KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu);
187     const NvU32 ceId = pChannel->ceId;
188     MC_ENGINE_BITVECTOR engines;
189     NvU32 mcIndex;
190     NvU32 engineType;
191 
192     if (pChannel->type == SWL_SCRUBBER_CHANNEL)
193     {
194         mcIndex = MC_ENGINE_IDX_SEC2;
195         engineType = RM_ENGINE_TYPE_SEC2;
196     }
197     else
198     {
199         mcIndex = MC_ENGINE_IDX_CE(pChannel->ceId);
200         engineType = RM_ENGINE_TYPE_COPY(ceId);
201     }
202 
203     bitVectorClrAll(&engines);
204     bitVectorSet(&engines, mcIndex);
205     if (kfifoIsHostEngineExpansionSupported(pKernelFifo))
206     {
207         NvU32 runlistId;
208         NV_ASSERT_OK(kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo,
209             ENGINE_INFO_TYPE_RM_ENGINE_TYPE, engineType,
210             ENGINE_INFO_TYPE_RUNLIST, &runlistId));
211         bitVectorSet(&engines, MC_ENGINE_IDX_ESCHEDn(runlistId));
212     }
213     else
214     {
215         bitVectorSet(&engines, MC_ENGINE_IDX_FIFO);
216     }
217     intrServiceStallList_HAL(pGpu, pIntr, &engines, NV_FALSE);
218 }
219 
220 NV_STATUS
221 channelWaitForFinishPayload
222 (
223     OBJCHANNEL *pChannel,
224     NvU64       targetPayload
225 )
226 {
227     NV_ASSERT_OR_RETURN(pChannel != NULL, NV_ERR_INVALID_STATE);
228 
229     OBJGPU *pGpu = pChannel->pGpu;
230     RMTIMEOUT timeout;
231     NV_STATUS status = NV_OK;
232 
233     gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout, GPU_TIMEOUT_FLAGS_BYPASS_THREAD_STATE);
234     while (1)
235     {
236         if (READ_CHANNEL_PAYLOAD_SEMA(pChannel) >= targetPayload)
237         {
238             break;
239         }
240 
241         status = gpuCheckTimeout(pGpu, &timeout);
242         if (status == NV_ERR_TIMEOUT)
243         {
244             break;
245         }
246 
247         if (rmDeviceGpuLockIsOwner(pGpu->gpuInstance))
248         {
249             if (!kfifoIsLiteModeEnabled_HAL(pGpu, GPU_GET_KERNEL_FIFO(pGpu)))
250             {
251                 channelServiceScrubberInterrupts(pChannel);
252             }
253         }
254         else
255         {
256             osSchedule();
257         }
258     }
259 
260     return status;
261 }
262 
263 
264 /** helper function which waits for a PB & GPFIO entry to be read by HOST.
265  *  After the HOST reads GPFIFO and PB entry, the semaphore will be released.
266  */
267 NV_STATUS
268 channelWaitForFreeEntry
269 (
270     OBJCHANNEL *pChannel,
271     NvU32      *pPutIndex
272 )
273 {
274     NV_ASSERT_OR_RETURN(pChannel != NULL, NV_ERR_INVALID_STATE);
275 
276     OBJGPU *pGpu = pChannel->pGpu;
277     RMTIMEOUT timeout;
278     NV_STATUS status = NV_OK;
279 
280     NvU32 putIndex = 0;
281     NvU32 getIndex = 0;
282 
283     putIndex = (pChannel->lastSubmittedEntry + 1) % pChannel->channelNumGpFifioEntries;
284 
285     gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout, GPU_TIMEOUT_FLAGS_BYPASS_THREAD_STATE);
286     do
287     {
288         getIndex = READ_CHANNEL_PB_SEMA(pChannel);
289 
290         NV_PRINTF(LEVEL_INFO, "Get Index: %x, PayloadIndex: %x\n", getIndex,
291                               READ_CHANNEL_PAYLOAD_SEMA(pChannel));
292 
293         if (getIndex != putIndex)
294         {
295             break;
296         }
297 
298         status = gpuCheckTimeout(pGpu, &timeout);
299         if (status == NV_ERR_TIMEOUT)
300         {
301             break;
302         }
303 
304         if (rmDeviceGpuLockIsOwner(pGpu->gpuInstance))
305         {
306             if (!kfifoIsLiteModeEnabled_HAL(pGpu, GPU_GET_KERNEL_FIFO(pGpu)))
307             {
308                 channelServiceScrubberInterrupts(pChannel);
309             }
310         }
311         else
312         {
313             osSchedule();
314         }
315     } while(1);
316 
317     if (status == NV_OK)
318     {
319         NV_ASSERT_OR_RETURN(pPutIndex != NULL, NV_ERR_INVALID_STATE);
320         *pPutIndex = putIndex;
321     }
322 
323     return status;
324 }
325 
326 
327 /** helper function to fill GPFIFO entry with a pushbuffer segment. and kick
328  *  off the executiion by HOST.
329  */
330 NV_STATUS
331 channelFillGpFifo
332 (
333     OBJCHANNEL *pChannel,
334     NvU32       putIndex,
335     NvU32       methodsLength
336 )
337 {
338     NvU32  *pGpEntry;
339     NvU32   GpEntry0;
340     NvU32   GpEntry1;
341     NvU64   pbPutOffset;
342     OBJGPU *pGpu;
343     KernelBus *pKernelBus;
344     MemoryManager *pMemoryManager;
345     NvBool bReleaseMapping = NV_FALSE;
346 
347     //
348     // Use BAR1 if CPU access is allowed, otherwise allocate and init shadow
349     // buffer for DMA access
350     //
351     NvU32 transferFlags = (TRANSFER_FLAGS_USE_BAR1     |
352                            TRANSFER_FLAGS_SHADOW_ALLOC |
353                            TRANSFER_FLAGS_SHADOW_INIT_MEM);
354 
355     NV_ASSERT_OR_RETURN(putIndex < pChannel->channelNumGpFifioEntries, NV_ERR_INVALID_STATE);
356     NV_ASSERT_OR_RETURN(pChannel != NULL, NV_ERR_INVALID_STATE);
357 
358     pGpu = pChannel->pGpu;
359     NV_ASSERT_OR_RETURN(pGpu != NULL, NV_ERR_INVALID_STATE);
360 
361     pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
362     pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
363 
364     if (pChannel->pbCpuVA == NULL)
365     {
366         pChannel->pbCpuVA = memmgrMemDescBeginTransfer(pMemoryManager, pChannel->pChannelBufferMemdesc,
367                                                        transferFlags);
368         bReleaseMapping = NV_TRUE;
369     }
370 
371     NV_ASSERT_OR_RETURN(pChannel->pbCpuVA != NULL, NV_ERR_GENERIC);
372 
373     pbPutOffset = (pChannel->pbGpuVA + (putIndex * pChannel->methodSizePerBlock));
374 
375     GpEntry0 = DRF_DEF(906F, _GP_ENTRY0, _NO_CONTEXT_SWITCH, _FALSE) |
376                DRF_NUM(906F, _GP_ENTRY0, _GET, NvU64_LO32(pbPutOffset) >> 2);
377 
378     GpEntry1 = DRF_NUM(906F, _GP_ENTRY1, _GET_HI, NvU64_HI32(pbPutOffset)) |
379                DRF_NUM(906F, _GP_ENTRY1, _LENGTH, methodsLength >> 2) |
380                DRF_DEF(906F, _GP_ENTRY1, _LEVEL, _MAIN);
381 
382     pGpEntry = (NvU32 *)(((NvU8 *)pChannel->pbCpuVA) + pChannel->channelPbSize +
383                 (pChannel->lastSubmittedEntry * NV906F_GP_ENTRY__SIZE));
384 
385     MEM_WR32(&pGpEntry[0], GpEntry0);
386     MEM_WR32(&pGpEntry[1], GpEntry1);
387 
388     if (bReleaseMapping)
389     {
390         memmgrMemDescEndTransfer(pMemoryManager, pChannel->pChannelBufferMemdesc,
391                                  transferFlags);
392         pChannel->pbCpuVA = NULL;
393     }
394 
395     osFlushCpuWriteCombineBuffer();
396 
397     // write GP put
398     if (pChannel->pControlGPFifo == NULL)
399     {
400         pChannel->pControlGPFifo =
401             (void *)memmgrMemDescBeginTransfer(pMemoryManager, pChannel->pUserdMemdesc,
402                                                transferFlags);
403         NV_ASSERT_OR_RETURN(pChannel->pControlGPFifo != NULL, NV_ERR_INVALID_STATE);
404         bReleaseMapping = NV_TRUE;
405     }
406 
407     MEM_WR32(&pChannel->pControlGPFifo->GPPut, putIndex);
408 
409     if (bReleaseMapping)
410     {
411         memmgrMemDescEndTransfer(pMemoryManager, pChannel->pUserdMemdesc, transferFlags);
412         pChannel->pControlGPFifo = NULL;
413     }
414 
415     osFlushCpuWriteCombineBuffer();
416 
417     //
418     // On some architectures, if doorbell is mapped via bar0, we need to send
419     // an extra flush
420     //
421     if (kbusFlushPcieForBar0Doorbell_HAL(pGpu, pKernelBus) != NV_OK)
422     {
423         NV_PRINTF(LEVEL_ERROR, "Busflush failed in _scrubFillGpFifo\n");
424         return NV_ERR_GENERIC;
425     }
426 
427     // Update doorbell with work submission token
428     if (pChannel->bUseDoorbellRegister)
429     {
430         if (pChannel->pTokenFromNotifier == NULL)
431         {
432             NvU8 *pErrNotifierCpuVA =
433                 (void *)memmgrMemDescBeginTransfer(pMemoryManager,
434                     pChannel->pErrNotifierMemdesc, transferFlags);
435 
436             NV_ASSERT_OR_RETURN(pErrNotifierCpuVA != NULL, NV_ERR_INVALID_STATE);
437 
438             pChannel->pTokenFromNotifier =
439                 (NvNotification *)(pErrNotifierCpuVA +
440                                (NV_CHANNELGPFIFO_NOTIFICATION_TYPE_WORK_SUBMIT_TOKEN *
441                                 sizeof(NvNotification)));
442             bReleaseMapping = NV_TRUE;
443         }
444 
445         // Use the token from notifier memory for VM migration support.
446         MEM_WR32(pChannel->pDoorbellRegisterOffset,
447                  MEM_RD32(&(pChannel->pTokenFromNotifier->info32)));
448 
449         if (bReleaseMapping)
450         {
451             memmgrMemDescEndTransfer(pMemoryManager, pChannel->pErrNotifierMemdesc, transferFlags);
452             pChannel->pTokenFromNotifier = NULL;
453         }
454     }
455 
456     return NV_OK;
457 }
458 
459 NvU32
460 channelFillPbFastScrub
461 (
462     OBJCHANNEL      *pChannel,
463     NvU32            putIndex,
464     NvBool           bPipelined,
465     NvBool           bInsertFinishPayload,
466     CHANNEL_PB_INFO *pChannelPbInfo
467 )
468 {
469     NvU32   pipelinedValue = 0;
470     NvU32   flushValue     = 0;
471     NvU32  *pPtr           = (NvU32 *)((NvU8*)pChannel->pbCpuVA + (putIndex * pChannel->methodSizePerBlock));
472     NvU32  *pStartPtr      = pPtr;
473     NvU32   semaValue      = 0;
474     NvU32   data           = 0;
475     NvU64   pSemaAddr      = 0;
476 
477     NV_PRINTF(LEVEL_INFO, "PutIndex: %x, PbOffset: %x\n", putIndex,
478                putIndex * pChannel->methodSizePerBlock);
479     // SET OBJECT
480     NV_PUSH_INC_1U(RM_SUBCHANNEL, NVC86F_SET_OBJECT, pChannel->classEngineID);
481 
482     // Set Pattern for Memset
483     NV_PUSH_INC_1U(RM_SUBCHANNEL, NVC8B5_SET_REMAP_CONST_A, pChannelPbInfo->pattern);
484     NV_PUSH_INC_1U(RM_SUBCHANNEL, NVC8B5_SET_REMAP_CONST_B, pChannelPbInfo->pattern);
485 
486     // Set Component Size to 1
487     NV_PUSH_INC_1U(RM_SUBCHANNEL, NVC8B5_SET_REMAP_COMPONENTS,
488                   DRF_DEF(C8B5, _SET_REMAP_COMPONENTS, _DST_X, _CONST_A)          |
489                   DRF_DEF(C8B5, _SET_REMAP_COMPONENTS, _DST_X, _CONST_B)          |
490                   DRF_DEF(C8B5, _SET_REMAP_COMPONENTS, _COMPONENT_SIZE, _ONE)     |
491                   DRF_DEF(C8B5, _SET_REMAP_COMPONENTS, _NUM_DST_COMPONENTS, _ONE));
492 
493     NV_PUSH_INC_1U(RM_SUBCHANNEL, NVC8B5_SET_DST_PHYS_MODE,
494                   DRF_DEF(C8B5, _SET_DST_PHYS_MODE, _TARGET, _LOCAL_FB));
495 
496     semaValue = (bInsertFinishPayload) ?
497         DRF_DEF(C8B5, _LAUNCH_DMA, _SEMAPHORE_TYPE, _RELEASE_ONE_WORD_SEMAPHORE) : 0;
498 
499     if (bPipelined)
500         pipelinedValue = DRF_DEF(C8B5, _LAUNCH_DMA, _DATA_TRANSFER_TYPE, _PIPELINED);
501     else
502         pipelinedValue = DRF_DEF(C8B5, _LAUNCH_DMA, _DATA_TRANSFER_TYPE, _NON_PIPELINED);
503 
504     if (bInsertFinishPayload)
505         flushValue = DRF_DEF(B0B5, _LAUNCH_DMA, _FLUSH_ENABLE, _TRUE);
506     else
507         flushValue = DRF_DEF(B0B5, _LAUNCH_DMA, _FLUSH_ENABLE, _FALSE);
508 
509     NV_PUSH_INC_2U(RM_SUBCHANNEL,
510         NVC8B5_OFFSET_OUT_UPPER, NvU64_HI32(pChannelPbInfo->dstAddr),
511         NVC8B5_OFFSET_OUT_LOWER, NvU64_LO32(pChannelPbInfo->dstAddr));
512 
513     NV_PUSH_INC_1U(RM_SUBCHANNEL, NVC8B5_LINE_LENGTH_IN, pChannelPbInfo->size);
514 
515     if (semaValue)
516     {
517         NV_PUSH_INC_3U(RM_SUBCHANNEL,
518             NVC8B5_SET_SEMAPHORE_A,       NvU64_HI32(pChannel->pbGpuVA + pChannel->finishPayloadOffset),
519             NVC8B5_SET_SEMAPHORE_B,       NvU64_LO32(pChannel->pbGpuVA + pChannel->finishPayloadOffset),
520             NVC8B5_SET_SEMAPHORE_PAYLOAD, pChannelPbInfo->payload);
521     }
522 
523     NV_PUSH_INC_1U(RM_SUBCHANNEL, NVC8B5_SET_MEMORY_SCRUB_PARAMETERS,
524                        DRF_DEF(C8B5, _SET_MEMORY_SCRUB_PARAMETERS, _DISCARDABLE, _FALSE));
525 
526     NV_PUSH_INC_1U(RM_SUBCHANNEL, NVC8B5_LAUNCH_DMA,
527             DRF_DEF(C8B5, _LAUNCH_DMA, _SRC_MEMORY_LAYOUT, _PITCH)    |
528             DRF_DEF(C8B5, _LAUNCH_DMA, _DST_MEMORY_LAYOUT, _PITCH)    |
529             DRF_DEF(C8B5, _LAUNCH_DMA, _REMAP_ENABLE, _FALSE)         |
530             DRF_DEF(C8B5, _LAUNCH_DMA, _MULTI_LINE_ENABLE, _FALSE)    |
531             DRF_DEF(C8B5, _LAUNCH_DMA, _MEMORY_SCRUB_ENABLE, _TRUE)   |
532             DRF_DEF(C8B5, _LAUNCH_DMA, _DISABLE_PLC, _TRUE)           |
533             DRF_DEF(C8B5, _LAUNCH_DMA, _DST_TYPE, _PHYSICAL)          |
534             DRF_DEF(C8B5, _LAUNCH_DMA, _SRC_TYPE, _PHYSICAL)          |
535             pipelinedValue                                            |
536             flushValue                                                |
537             semaValue);
538 
539     //
540     // This should always be at the bottom the push buffer segment, since this
541     // denotes that HOST has read all the methods needed for this memory operation
542     // and safely assume that this GPFIFO and PB entry can be reused.
543     //
544     data =  DRF_DEF(C86F, _SEM_EXECUTE, _OPERATION, _RELEASE) |
545             DRF_DEF(C86F, _SEM_EXECUTE, _PAYLOAD_SIZE, _32BIT) |
546             DRF_DEF(C86F, _SEM_EXECUTE, _RELEASE_WFI, _DIS);
547 
548     pSemaAddr = (pChannel->pbGpuVA+pChannel->semaOffset);
549 
550     NV_PUSH_INC_4U(RM_SUBCHANNEL,
551             NVC86F_SEM_ADDR_LO,    NvU64_LO32(pSemaAddr),
552             NVC86F_SEM_ADDR_HI,    NvU64_HI32(pSemaAddr),
553             NVC86F_SEM_PAYLOAD_LO, putIndex,
554             NVC86F_SEM_PAYLOAD_HI, 0);
555 
556     NV_PUSH_INC_1U(RM_SUBCHANNEL, NVC86F_SEM_EXECUTE, data);
557 
558     NvU32 methodSize = (NvU32)((NvU8*)pPtr - (NvU8*)pStartPtr);
559     NV_ASSERT_OR_RETURN(methodSize <= pChannel->methodSizePerBlock, 0);
560     return methodSize;
561 }
562 
563 static void
564 channelAddHostSema
565 (
566     OBJCHANNEL *pChannel,
567     NvU32       putIndex,
568     NvU32     **ppPtr
569 )
570 {
571     NvU32  *pPtr = *ppPtr;
572     NvU32  data       = 0;
573     NvU64  pSemaAddr  = 0;
574 
575     // Release HOST semaphore after after gpfifo/pb segments are processed by esched
576     data = DRF_DEF(906F, _SEMAPHORED, _OPERATION, _RELEASE) |
577            DRF_DEF(906F, _SEMAPHORED, _RELEASE_SIZE, _4BYTE) |
578            DRF_DEF(906F, _SEMAPHORED, _RELEASE_WFI, _DIS);
579 
580     pSemaAddr = (pChannel->pbGpuVA + pChannel->semaOffset);
581 
582     //
583     // This should always be at the bottom the push buffer segment, since this
584     // denotes that HOST has read all the methods needed for this memory operation
585     // and safely assume that this GPFIFO and PB entry can be reused.
586     //
587     NV_PUSH_INC_4U(RM_SUBCHANNEL,
588                    NV906F_SEMAPHOREA, NvU64_HI32(pSemaAddr),
589                    NV906F_SEMAPHOREB, NvU64_LO32(pSemaAddr),
590                    NV906F_SEMAPHOREC, putIndex,
591                    NV906F_SEMAPHORED, data);
592     *ppPtr = pPtr;
593 }
594 
595 /** single helper function to fill the push buffer with the methods needed for
596  *  memsetting using CE. This function is much more efficient in the sense it
597  *  decouples the mem(set/copy) operation from managing channel resources.
598  **/
599 
600 NvU32
601 channelFillCePb
602 (
603     OBJCHANNEL      *pChannel,
604     NvU32            putIndex,
605     NvBool           bPipelined,
606     NvBool           bInsertFinishPayload,
607     CHANNEL_PB_INFO *pChannelPbInfo
608 )
609 {
610     NvU32  launchType = 0;
611     NvU32 *pPtr       = (NvU32 *)((NvU8 *)pChannel->pbCpuVA + (putIndex * pChannel->methodSizePerBlock));
612     NvU32 *pStartPtr  = pPtr;
613     NvU32  semaValue  = 0;
614 
615     NV_PRINTF(LEVEL_INFO, "PutIndex: %x, PbOffset: %x\n", putIndex, putIndex * pChannel->methodSizePerBlock);
616 
617     NV_PUSH_INC_1U(RM_SUBCHANNEL, NV906F_SET_OBJECT, pChannel->classEngineID);
618 
619     // Side effect - pushed target addresses, aperture and REMAP method for memset
620     launchType = channelPushMemoryProperties(pChannel, pChannelPbInfo, &pPtr);
621 
622     NV_PUSH_INC_1U(RM_SUBCHANNEL, NVB0B5_LINE_LENGTH_IN, pChannelPbInfo->size);
623 
624     if (bInsertFinishPayload)
625     {
626         semaValue = DRF_DEF(B0B5, _LAUNCH_DMA, _SEMAPHORE_TYPE, _RELEASE_ONE_WORD_SEMAPHORE);
627 
628         // Do not support client semaphore for now
629         NV_ASSERT(pChannelPbInfo->clientSemaAddr == 0);
630 
631         NV_PUSH_INC_3U(RM_SUBCHANNEL,
632             NVB0B5_SET_SEMAPHORE_A,       NvU64_HI32(pChannel->pbGpuVA + pChannel->finishPayloadOffset),
633             NVB0B5_SET_SEMAPHORE_B,       NvU64_LO32(pChannel->pbGpuVA + pChannel->finishPayloadOffset),
634             NVB0B5_SET_SEMAPHORE_PAYLOAD, pChannelPbInfo->payload);
635     }
636 
637     // Side effect - pushed LAUNCH_DMA methods
638     channelPushMethod(pChannel, pChannelPbInfo, bPipelined, bInsertFinishPayload, launchType, semaValue, &pPtr);
639 
640     channelAddHostSema(pChannel, putIndex, &pPtr);
641 
642     NvU32 methodSize = (NvU32)((NvU8*)pPtr - (NvU8*)pStartPtr);
643     NV_ASSERT_OR_RETURN(methodSize <= pChannel->methodSizePerBlock, 0);
644     return methodSize;
645 }
646 
647 static NV_STATUS
648 addMethodsToMethodBuf
649 (
650     NvU32 command,
651     NvU32 data,
652     NvU32 *pMethodBuf,
653     NvU32  index
654 )
655 {
656 
657     NV_ASSERT_OR_RETURN((index < SEC2_WL_METHOD_ARRAY_SIZE), NV_ERR_INVALID_ARGUMENT);
658     pMethodBuf[index*2 + 0] = command;
659     pMethodBuf[index*2 + 1] = data;
660     return NV_OK;
661 }
662 
663 /** single helper function to fill the push buffer with the methods needed for
664  *  memsetting using SEC2. This function is much more efficient in the sense it
665  *  decouples the mem(set/copy) operation from managing channel resources.
666  **/
667 NV_STATUS
668 channelFillSec2Pb
669 (
670     OBJCHANNEL        *pChannel,
671     NvU32              putIndex,
672     NvBool             bInsertFinishPayload,
673     CHANNEL_PB_INFO   *pChannelPbInfo,
674     CCSL_CONTEXT      *pCcslCtx,
675     MEMORY_DESCRIPTOR *pAuthTagBufMemDesc,
676     MEMORY_DESCRIPTOR *pSemaMemDesc,
677     NvU64              scrubMthdAuthTagBufGpuVA,
678     NvU32              scrubAuthTagBufIndex,
679     NvU64              semaMthdAuthTagBufGpuVA,
680     NvU32              semaAuthTagBufIndex,
681     NvU32             *pMethodLength
682 )
683 {
684     NvU32          *pPtr                  = NULL;
685     NvU32          *pStartPtr             = NULL;
686     NvU32           execute               = 0;
687     NvU32           methodIdx             = 0;
688     NvU32          *pMethods              = NULL;
689     NV_STATUS       status                = NV_OK;
690     MemoryManager  *pMemoryManager        = NULL;
691     NvU32           scrubAuthTagBufoffset = scrubAuthTagBufIndex * SHA_256_HASH_SIZE_BYTE;
692     NvU32           semaAuthTagBufoffset  = semaAuthTagBufIndex * SHA_256_HASH_SIZE_BYTE;
693 
694     NV_ASSERT_OR_RETURN((pChannel != NULL), NV_ERR_INVALID_ARGUMENT);
695     NV_ASSERT_OR_RETURN((pChannelPbInfo != NULL), NV_ERR_INVALID_ARGUMENT);
696     NV_ASSERT_OR_RETURN((pCcslCtx != NULL), NV_ERR_INVALID_ARGUMENT);
697     NV_ASSERT_OR_RETURN((pAuthTagBufMemDesc != NULL), NV_ERR_INVALID_ARGUMENT);
698     NV_ASSERT_OR_RETURN((pSemaMemDesc != NULL), NV_ERR_INVALID_ARGUMENT);
699     NV_ASSERT_OR_RETURN((pMethodLength != NULL), NV_ERR_INVALID_ARGUMENT);
700 
701     pPtr = (NvU32 *)((NvU8 *)pChannel->pbCpuVA + (putIndex * pChannel->methodSizePerBlock));
702     pStartPtr = pPtr;
703     pMemoryManager = GPU_GET_MEMORY_MANAGER(pChannel->pGpu);
704 
705     NV_PRINTF(LEVEL_INFO, "PutIndex: %x, PbOffset: %x\n", putIndex, putIndex * pChannel->methodSizePerBlock);
706     NvU32 transferFlags = (TRANSFER_FLAGS_USE_BAR1     |
707                            TRANSFER_FLAGS_SHADOW_ALLOC |
708                            TRANSFER_FLAGS_SHADOW_INIT_MEM);
709     NvU8* pScrubMethdAuthTagBuf = (NvU8 *)memmgrMemDescBeginTransfer(pMemoryManager, pAuthTagBufMemDesc, transferFlags);
710     NV_ASSERT_OR_RETURN((pScrubMethdAuthTagBuf != NULL), NV_ERR_INVALID_STATE);
711 
712     NvU8* pSemaAuthTagBuf = (NvU8 *)memmgrMemDescBeginTransfer(pMemoryManager, pSemaMemDesc, transferFlags);
713     NV_ASSERT_OR_RETURN((pSemaAuthTagBuf != NULL), NV_ERR_INVALID_STATE);
714 
715     pMethods = (NvU32*)portMemAllocNonPaged(sizeof(NvU32) * SEC2_WL_METHOD_ARRAY_SIZE * 2);
716     NV_ASSERT_OR_RETURN(pMethods != NULL, NV_ERR_NO_MEMORY);
717     portMemSet(pMethods, 0, (sizeof(NvU32) * SEC2_WL_METHOD_ARRAY_SIZE * 2));
718 
719     NV_ASSERT_OK_OR_GOTO(status, addMethodsToMethodBuf(NV906F_SET_OBJECT, pChannel->classEngineID, pMethods, methodIdx++), cleanup);
720     NV_ASSERT_OK_OR_GOTO(status, addMethodsToMethodBuf(NVCBA2_DECRYPT_COPY_DST_ADDR_HI, NvU64_HI32(pChannelPbInfo->dstAddr), pMethods, methodIdx++), cleanup);
721     NV_ASSERT_OK_OR_GOTO(status, addMethodsToMethodBuf(NVCBA2_DECRYPT_COPY_DST_ADDR_LO, NvU64_LO32(pChannelPbInfo->dstAddr), pMethods, methodIdx++), cleanup);
722     NV_ASSERT_OK_OR_GOTO(status, addMethodsToMethodBuf(NVCBA2_DECRYPT_COPY_SIZE, pChannelPbInfo->size, pMethods, methodIdx++), cleanup);
723     NV_ASSERT_OK_OR_GOTO(status, addMethodsToMethodBuf(NVCBA2_METHOD_STREAM_AUTH_TAG_ADDR_HI, NvU64_HI32(scrubMthdAuthTagBufGpuVA + scrubAuthTagBufoffset), pMethods, methodIdx++), cleanup);
724     NV_ASSERT_OK_OR_GOTO(status, addMethodsToMethodBuf(NVCBA2_METHOD_STREAM_AUTH_TAG_ADDR_LO, NvU64_LO32(scrubMthdAuthTagBufGpuVA + scrubAuthTagBufoffset), pMethods, methodIdx++), cleanup);
725 
726     // Use this semaphore release to track the current method stream auth tag buffer being used
727     execute |= FLD_SET_DRF(CBA2, _EXECUTE, _NOTIFY, _ENABLE, execute);
728     execute |= FLD_SET_DRF(CBA2, _EXECUTE, _NOTIFY_ON, _END, execute);
729     NV_ASSERT_OK_OR_GOTO(status, addMethodsToMethodBuf(NVCBA2_SEMAPHORE_A, NvU64_HI32(pChannel->pbGpuVA + pChannel->authTagBufSemaOffset), pMethods, methodIdx++), cleanup);
730     NV_ASSERT_OK_OR_GOTO(status, addMethodsToMethodBuf(NVCBA2_SEMAPHORE_B, NvU64_LO32(pChannel->pbGpuVA + pChannel->authTagBufSemaOffset), pMethods, methodIdx++), cleanup);
731     NV_ASSERT_OK_OR_GOTO(status, addMethodsToMethodBuf(NVCBA2_SET_SEMAPHORE_PAYLOAD_LOWER, scrubAuthTagBufIndex, pMethods, methodIdx++), cleanup);
732 
733     execute |= FLD_SET_DRF(CBA2, _EXECUTE, _PHYSICAL_SCRUBBER, _ENABLE, execute);
734     NV_ASSERT_OK_OR_GOTO(status, addMethodsToMethodBuf(NVCBA2_EXECUTE, execute, pMethods, methodIdx++), cleanup);
735 
736     NvU32 hmacBufferSizeBytes = 2 * methodIdx * sizeof(NvU32);
737     NvU8  hmacDigest[SHA_256_HASH_SIZE_BYTE] = {0};
738     NvU8* pBufScrub = &pScrubMethdAuthTagBuf[scrubAuthTagBufoffset];
739 
740     ccslSign(pCcslCtx, hmacBufferSizeBytes, (NvU8 *)pMethods, hmacDigest);
741 
742     portMemCopy((void*)pBufScrub, SHA_256_HASH_SIZE_BYTE, (const void*)&hmacDigest[0], SHA_256_HASH_SIZE_BYTE);
743 
744     if (methodIdx > SEC2_WL_METHOD_ARRAY_SIZE)
745     {
746         status = NV_ERR_INVALID_ARGUMENT;
747         goto cleanup;
748     }
749 
750     for (NvU32 i = 0; i < methodIdx; i++)
751     {
752         NV_PUSH_INC_1U(RM_SUBCHANNEL, pMethods[i*2 + 0], pMethods[i*2 + 1]);
753     }
754 
755     // insert standalone semaphore to track sub operation completion
756     if (bInsertFinishPayload)
757     {
758         NvU32 semaD = 0;
759         methodIdx = 0;
760         portMemSet(pMethods, 0, (sizeof(NvU32) * SEC2_WL_METHOD_ARRAY_SIZE * 2));
761         semaD |= FLD_SET_DRF(CBA2, _SEMAPHORE_D, _FLUSH_DISABLE, _FALSE, execute);
762         NV_ASSERT_OK_OR_GOTO(status, addMethodsToMethodBuf(NVCBA2_METHOD_STREAM_AUTH_TAG_ADDR_HI, NvU64_HI32(semaMthdAuthTagBufGpuVA + semaAuthTagBufoffset), pMethods, methodIdx++), cleanup);
763         NV_ASSERT_OK_OR_GOTO(status, addMethodsToMethodBuf(NVCBA2_METHOD_STREAM_AUTH_TAG_ADDR_LO, NvU64_LO32(semaMthdAuthTagBufGpuVA + semaAuthTagBufoffset), pMethods, methodIdx++), cleanup);
764         NV_ASSERT_OK_OR_GOTO(status, addMethodsToMethodBuf(NVCBA2_SEMAPHORE_A, NvU64_HI32(pChannel->pbGpuVA + pChannel->finishPayloadOffset), pMethods, methodIdx++), cleanup);
765         NV_ASSERT_OK_OR_GOTO(status, addMethodsToMethodBuf(NVCBA2_SEMAPHORE_B, NvU64_LO32(pChannel->pbGpuVA + pChannel->finishPayloadOffset), pMethods, methodIdx++), cleanup);
766         NV_ASSERT_OK_OR_GOTO(status, addMethodsToMethodBuf(NVCBA2_SET_SEMAPHORE_PAYLOAD_LOWER, pChannelPbInfo->payload, pMethods, methodIdx++), cleanup);
767         NV_ASSERT_OK_OR_GOTO(status, addMethodsToMethodBuf(NVCBA2_SEMAPHORE_D, semaD, pMethods, methodIdx++), cleanup);
768 
769         hmacBufferSizeBytes = 2 * methodIdx * sizeof(NvU32);
770         portMemSet(&hmacDigest[0], 0, SHA_256_HASH_SIZE_BYTE);
771         NvU8* pBufSema = &pSemaAuthTagBuf[semaAuthTagBufoffset];
772 
773         ccslSign(pCcslCtx, hmacBufferSizeBytes, (NvU8 *)pMethods, hmacDigest);
774 
775         portMemCopy((void*)pBufSema, SHA_256_HASH_SIZE_BYTE, (const void*)&hmacDigest[0], SHA_256_HASH_SIZE_BYTE);
776 
777         if (methodIdx > SEC2_WL_METHOD_ARRAY_SIZE)
778         {
779             status = NV_ERR_INVALID_ARGUMENT;
780             goto cleanup;
781         }
782 
783         for (NvU32 i = 0; i < methodIdx; i++)
784         {
785             NV_PUSH_INC_1U(RM_SUBCHANNEL, pMethods[i*2 + 0], pMethods[i*2 + 1]);
786         }
787     }
788 
789     channelAddHostSema(pChannel, putIndex, &pPtr);
790 
791     *pMethodLength = 0;
792     NvU32 methodSize = (NvU32)((NvU8*)pPtr - (NvU8*)pStartPtr);
793     NV_ASSERT_OR_RETURN(methodSize <= pChannel->methodSizePerBlock, NV_ERR_INVALID_STATE);
794     *pMethodLength = methodSize;
795 
796 cleanup:
797     if (pSemaAuthTagBuf != NULL)
798     {
799         memmgrMemDescEndTransfer(pMemoryManager, pSemaMemDesc, transferFlags);
800     }
801     if (pScrubMethdAuthTagBuf != NULL)
802     {
803         memmgrMemDescEndTransfer(pMemoryManager, pAuthTagBufMemDesc, transferFlags);
804     }
805     portMemFree(pMethods);
806     return status;
807 }
808 
809 /*** Implementation for static methods ***/
810 static NvU32
811 channelPushMemoryProperties
812 (
813     OBJCHANNEL      *pChannel,
814     CHANNEL_PB_INFO *pChannelPbInfo,
815     NvU32           **ppPtr
816 )
817 {
818     NV_ADDRESS_SPACE dstAddressSpace = pChannelPbInfo->dstAddressSpace;
819     NV_ADDRESS_SPACE srcAddressSpace = pChannelPbInfo->srcAddressSpace;
820     NvU32 dstCpuCacheAttrib = pChannelPbInfo->dstCpuCacheAttrib;
821     NvU32 srcCpuCacheAttrib = pChannelPbInfo->srcCpuCacheAttrib;
822     NvU64 dstAddr = pChannelPbInfo->dstAddr;
823     NvU64 srcAddr = pChannelPbInfo->srcAddr;
824 
825     NvU32 data = 0;
826     NvU32 retVal = 0;
827     NvU32 *pPtr = *ppPtr;
828 
829     if (!pChannelPbInfo->bCeMemcopy)
830     {
831         // If memset, push remap components
832         NV_PUSH_INC_1U(RM_SUBCHANNEL, NVB0B5_SET_REMAP_CONST_A, pChannelPbInfo->pattern);
833         NV_PUSH_INC_1U(RM_SUBCHANNEL, NVB0B5_SET_REMAP_COMPONENTS,
834                        DRF_DEF(B0B5, _SET_REMAP_COMPONENTS, _DST_X, _CONST_A)          |
835                        DRF_DEF(B0B5, _SET_REMAP_COMPONENTS, _COMPONENT_SIZE, _ONE)     |
836                        DRF_DEF(B0B5, _SET_REMAP_COMPONENTS, _NUM_DST_COMPONENTS, _ONE));
837     }
838     else
839     {
840         // If memcopy, push src addr properties
841         if (srcAddressSpace == ADDR_FBMEM)
842         {
843             data = DRF_DEF(B0B5, _SET_SRC_PHYS_MODE, _TARGET, _LOCAL_FB);
844         }
845         else if (srcCpuCacheAttrib == NV_MEMORY_CACHED)
846         {
847             data = DRF_DEF(B0B5, _SET_SRC_PHYS_MODE, _TARGET, _COHERENT_SYSMEM);
848         }
849         else
850         {
851             data = DRF_DEF(B0B5, _SET_SRC_PHYS_MODE, _TARGET, _NONCOHERENT_SYSMEM);
852         }
853 
854         NV_PUSH_INC_1U(RM_SUBCHANNEL, NVB0B5_SET_SRC_PHYS_MODE, data);
855 
856         if (pChannel->bUseVasForCeCopy && srcAddressSpace == ADDR_FBMEM)
857         {
858             srcAddr = srcAddr + pChannel->fbAliasVA - pChannel->startFbOffset;
859             retVal |= DRF_DEF(B0B5, _LAUNCH_DMA, _SRC_TYPE, _VIRTUAL);
860         }
861         else
862         {
863             retVal |= DRF_DEF(B0B5, _LAUNCH_DMA, _SRC_TYPE, _PHYSICAL);
864         }
865 
866         NV_PUSH_INC_2U(RM_SUBCHANNEL,
867             NVB0B5_OFFSET_IN_UPPER, NvU64_HI32(srcAddr),
868             NVB0B5_OFFSET_IN_LOWER, NvU64_LO32(srcAddr));
869     }
870 
871     // Push dst addr properties
872     if (dstAddressSpace == ADDR_FBMEM)
873     {
874         data = DRF_DEF(B0B5, _SET_DST_PHYS_MODE, _TARGET, _LOCAL_FB);
875     }
876     else if (dstCpuCacheAttrib == NV_MEMORY_CACHED)
877     {
878         data = DRF_DEF(B0B5, _SET_DST_PHYS_MODE, _TARGET, _COHERENT_SYSMEM);
879     }
880     else
881     {
882         data = DRF_DEF(B0B5, _SET_DST_PHYS_MODE, _TARGET, _NONCOHERENT_SYSMEM);
883     }
884 
885     NV_PUSH_INC_1U(RM_SUBCHANNEL, NVB0B5_SET_DST_PHYS_MODE, data);
886 
887     if (pChannel->bUseVasForCeCopy && dstAddressSpace == ADDR_FBMEM)
888     {
889         dstAddr = dstAddr + pChannel->fbAliasVA - pChannel->startFbOffset;
890         retVal |= DRF_DEF(B0B5, _LAUNCH_DMA, _DST_TYPE, _VIRTUAL);
891     }
892     else
893     {
894         retVal |= DRF_DEF(B0B5, _LAUNCH_DMA, _DST_TYPE, _PHYSICAL);
895     }
896 
897     NV_PUSH_INC_2U(RM_SUBCHANNEL,
898         NVB0B5_OFFSET_OUT_UPPER, NvU64_HI32(dstAddr),
899         NVB0B5_OFFSET_OUT_LOWER, NvU64_LO32(dstAddr));
900 
901     *ppPtr = pPtr;
902     return retVal;
903 }
904 
905 static void
906 channelPushMethod
907 (
908     OBJCHANNEL      *pChannel,
909     CHANNEL_PB_INFO *pChannelPbInfo,
910     NvBool           bPipelined,
911     NvBool           bInsertFinishPayload,
912     NvU32            launchType,
913     NvU32            semaValue,
914     NvU32          **ppPtr
915 )
916 {
917     NvU32 pipelinedValue = 0;
918     NvU32 flushValue = 0;
919     NvU32 disablePlcKind = 0;
920     NvU32 launchParams = 0;
921     NvU32 *pPtr = *ppPtr;
922 
923     if (bPipelined)
924     {
925         pipelinedValue = DRF_DEF(B0B5, _LAUNCH_DMA, _DATA_TRANSFER_TYPE, _PIPELINED);
926     }
927     else
928     {
929         pipelinedValue = DRF_DEF(B0B5, _LAUNCH_DMA, _DATA_TRANSFER_TYPE, _NON_PIPELINED);
930     }
931 
932     if (bInsertFinishPayload)
933     {
934         flushValue = DRF_DEF(B0B5, _LAUNCH_DMA, _FLUSH_ENABLE, _TRUE);
935     }
936     else
937     {
938         flushValue = DRF_DEF(B0B5, _LAUNCH_DMA, _FLUSH_ENABLE, _FALSE);
939     }
940 
941     if (!pChannelPbInfo->bCeMemcopy)
942     {
943         switch (pChannel->hTdCopyClass)
944         {
945             case MAXWELL_DMA_COPY_A:
946             case PASCAL_DMA_COPY_A:
947             case PASCAL_DMA_COPY_B:
948             case VOLTA_DMA_COPY_A:
949                 disablePlcKind = 0;
950                 break;
951             default: // For anything after Turing, set the kind
952                 disablePlcKind = DRF_DEF(C5B5, _LAUNCH_DMA, _DISABLE_PLC, _TRUE);
953             break;
954         }
955 
956         launchParams =  DRF_DEF(B0B5, _LAUNCH_DMA, _REMAP_ENABLE, _TRUE) | disablePlcKind;
957     }
958 
959     NV_PUSH_INC_1U(RM_SUBCHANNEL, NVB0B5_LAUNCH_DMA,
960                    launchParams |
961                    DRF_DEF(B0B5, _LAUNCH_DMA, _SRC_MEMORY_LAYOUT, _PITCH) |
962                    DRF_DEF(B0B5, _LAUNCH_DMA, _DST_MEMORY_LAYOUT, _PITCH) |
963                    DRF_DEF(B0B5, _LAUNCH_DMA, _MULTI_LINE_ENABLE, _FALSE) |
964                    launchType |
965                    pipelinedValue |
966                    flushValue |
967                    semaValue);
968     *ppPtr = pPtr;
969 }
970