1 /*
2 * SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: MIT
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24
25 #include "core/core.h"
26 #include "gpu/ce/kernel_ce.h"
27 #include "gpu/bus/kern_bus.h"
28 #include "kernel/gpu/intr/intr.h"
29 #include "kernel/gpu/fifo/kernel_fifo.h"
30 #include "kernel/gpu/mig_mgr/kernel_mig_manager.h"
31 #include "kernel/gpu/mem_mgr/channel_utils.h"
32 #include "rmapi/rs_utils.h"
33 #include "utils/nvassert.h"
34 #include "core/prelude.h"
35 #include "core/locks.h"
36 #include "gpu/mem_mgr/ce_utils.h"
37 #include "kernel/gpu/mem_mgr/ce_utils_sizes.h"
38 #include "vgpu/rpc_headers.h"
39 #include "gpu/device/device.h"
40
41 #include "class/clb0b5.h" // MAXWELL_DMA_COPY_A
42 #include "class/clc0b5.h" // PASCAL_DMA_COPY_A
43 #include "class/clc1b5.h" // PASCAL_DMA_COPY_B
44 #include "class/clc3b5.h" // VOLTA_DMA_COPY_A
45 #include "class/clc5b5.h" // TURING_DMA_COPY_A
46 #include "class/clc8b5.h" // HOPPER_DMA_COPY_A
47 #include "class/clc86f.h" // HOPPER_CHANNEL_GPFIFO_A
48
49 #include "class/cl0080.h"
50
_memUtilsGetCe(OBJGPU * pGpu,NvHandle hClient,NvHandle hDevice,NvU32 * pCeInstance)51 static NV_STATUS _memUtilsGetCe
52 (
53 OBJGPU *pGpu,
54 NvHandle hClient,
55 NvHandle hDevice,
56 NvU32 *pCeInstance
57 )
58 {
59 if (IS_MIG_IN_USE(pGpu))
60 {
61 RsClient *pClient;
62 Device *pDevice;
63
64 NV_ASSERT_OK_OR_RETURN(
65 serverGetClientUnderLock(&g_resServ, hClient, &pClient));
66
67 NV_ASSERT_OK_OR_RETURN(
68 deviceGetByHandle(pClient, hDevice, &pDevice));
69
70 NV_ASSERT_OK_OR_RETURN(kmigmgrGetGPUInstanceScrubberCe(pGpu, GPU_GET_KERNEL_MIG_MANAGER(pGpu), pDevice, pCeInstance));
71 return NV_OK;
72 }
73 else
74 {
75 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
76
77 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, gpuUpdateEngineTable(pGpu));
78
79 KernelCE *pKCe = NULL;
80
81 KCE_ITER_ALL_BEGIN(pGpu, pKCe, 0)
82 if (kbusCheckEngine_HAL(pGpu, pKernelBus, ENG_CE(pKCe->publicID)) &&
83 !ceIsCeGrce(pGpu, RM_ENGINE_TYPE_COPY(pKCe->publicID)) &&
84 gpuCheckEngineTable(pGpu, RM_ENGINE_TYPE_COPY(pKCe->publicID)))
85 {
86 *pCeInstance = pKCe->publicID;
87 return NV_OK;
88 }
89 KCE_ITER_END
90 }
91
92 return NV_ERR_INSUFFICIENT_RESOURCES;
93 }
94
95 NV_STATUS
ceutilsConstruct_IMPL(CeUtils * pCeUtils,OBJGPU * pGpu,KERNEL_MIG_GPU_INSTANCE * pKernelMIGGPUInstance,NV0050_ALLOCATION_PARAMETERS * pAllocParams)96 ceutilsConstruct_IMPL
97 (
98 CeUtils *pCeUtils,
99 OBJGPU *pGpu,
100 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGPUInstance,
101 NV0050_ALLOCATION_PARAMETERS *pAllocParams
102 )
103 {
104 NV_STATUS status = NV_OK;
105 NvU64 allocFlags = pAllocParams->flags;
106 NvBool bForceCeId = FLD_TEST_DRF(0050_CEUTILS, _FLAGS, _FORCE_CE_ID, _TRUE, allocFlags);
107 NV_ASSERT_OR_RETURN(pGpu, NV_ERR_INVALID_STATE);
108
109 NvBool bMIGInUse = IS_MIG_IN_USE(pGpu);
110 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
111
112 pCeUtils->pGpu = pGpu;
113
114 if (FLD_TEST_DRF(0050_CEUTILS, _FLAGS, _FIFO_LITE, _TRUE, allocFlags))
115 {
116 return NV_ERR_NOT_SUPPORTED;
117 }
118
119 // Allocate channel with RM internal client
120 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
121 RmClient *pClient;
122
123 OBJCHANNEL *pChannel = (OBJCHANNEL *) portMemAllocNonPaged(sizeof(OBJCHANNEL));
124 if (pChannel == NULL)
125 {
126 return NV_ERR_INSUFFICIENT_RESOURCES;
127 }
128
129 portMemSet(pChannel, 0, sizeof(OBJCHANNEL));
130
131 if (pCeUtils->hClient == NV01_NULL_OBJECT)
132 {
133 // Allocate client
134 status = pRmApi->AllocWithHandle(pRmApi, NV01_NULL_OBJECT, NV01_NULL_OBJECT,
135 NV01_NULL_OBJECT, NV01_ROOT, &pCeUtils->hClient,
136 sizeof(pCeUtils->hClient));
137 NV_ASSERT_OR_GOTO(status == NV_OK, cleanup);
138 }
139
140 pChannel->hClient = pCeUtils->hClient;
141 pClient = serverutilGetClientUnderLock(pChannel->hClient);
142 NV_ASSERT_OR_GOTO(pClient != NULL, free_client);
143
144 status = serverGetClientUnderLock(&g_resServ, pChannel->hClient, &pChannel->pRsClient);
145 NV_ASSERT_OR_GOTO(status == NV_OK, free_client);
146
147 if (IS_VIRTUAL(pGpu))
148 {
149 NV_ASSERT_OK_OR_GOTO(
150 status,
151 clientSetHandleGenerator(staticCast(pClient, RsClient), RS_UNIQUE_HANDLE_BASE,
152 RS_UNIQUE_HANDLE_RANGE/2 - VGPU_RESERVED_HANDLE_RANGE),
153 free_client);
154 }
155 else
156 {
157 NV_ASSERT_OK_OR_GOTO(
158 status,
159 clientSetHandleGenerator(staticCast(pClient, RsClient), 1U, ~0U - 1U),
160 free_client);
161 }
162
163 pChannel->bClientAllocated = NV_TRUE;
164 pChannel->pGpu = pGpu;
165 pChannel->pKernelMIGGpuInstance = pKernelMIGGPUInstance;
166
167 // We'll allocate new VAS for now. Sharing client VAS will be added later
168 pChannel->hVASpaceId = NV01_NULL_OBJECT;
169 pChannel->bUseVasForCeCopy = FLD_TEST_DRF(0050_CEUTILS, _FLAGS, _VIRTUAL_MODE, _TRUE, allocFlags);
170
171 pChannel->bSecure = FLD_TEST_DRF(0050_CEUTILS, _FLAGS, _CC_SECURE, _TRUE, allocFlags);
172
173 // Detect if we can enable fast scrub on this channel
174 status = memmgrMemUtilsGetCopyEngineClass_HAL(pGpu, pMemoryManager, &pCeUtils->hTdCopyClass);
175 NV_ASSERT_OR_GOTO(status == NV_OK, free_channel);
176
177 if (((pCeUtils->hTdCopyClass == HOPPER_DMA_COPY_A)
178 ) && !pChannel->bUseVasForCeCopy)
179 {
180 pChannel->type = FAST_SCRUBBER_CHANNEL;
181 NV_PRINTF(LEVEL_INFO, "Enabled fast scrubber in construct.\n");
182 }
183 else
184 {
185 pChannel->type = CE_SCRUBBER_CHANNEL;
186 }
187
188 // For self-hosted Hopper, we can only use VA copy or faster scrubber
189 if (pMemoryManager->bCePhysicalVidmemAccessNotSupported)
190 {
191 if (!pChannel->bUseVasForCeCopy &&
192 (pChannel->type != FAST_SCRUBBER_CHANNEL))
193 {
194 status = NV_ERR_NOT_SUPPORTED;
195 goto free_channel;
196 }
197 }
198
199 // Set up various channel resources
200 status = channelSetupIDs(pChannel, pGpu, pChannel->bUseVasForCeCopy, bMIGInUse);
201 NV_ASSERT_OR_GOTO(status == NV_OK, free_client);
202
203 channelSetupChannelBufferSizes(pChannel);
204
205 NV_ASSERT_OK_OR_GOTO(status, channelAllocSubdevice(pGpu, pChannel), free_client);
206
207 if (bForceCeId)
208 {
209 pChannel->ceId = pAllocParams->forceCeId;
210 }
211 else
212 {
213 NV_ASSERT_OK_OR_GOTO(status,
214 _memUtilsGetCe(pGpu, pChannel->hClient, pChannel->deviceId, &pChannel->ceId),
215 free_client);
216 }
217
218 status = memmgrMemUtilsChannelInitialize_HAL(pGpu, pMemoryManager, pChannel);
219 NV_ASSERT_OR_GOTO(status == NV_OK, free_channel);
220
221 NV_PRINTF(LEVEL_INFO, "Channel alloc successful for ceUtils\n");
222 pCeUtils->pChannel = pChannel;
223
224 // Allocate CE states
225 status = memmgrMemUtilsCopyEngineInitialize_HAL(pGpu, pMemoryManager, pChannel);
226 NV_ASSERT_OR_GOTO(status == NV_OK, free_channel);
227
228 return status;
229
230 free_channel:
231 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->channelId);
232
233 if (pAllocParams->hVaspace != NV01_NULL_OBJECT)
234 {
235 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->hVASpaceId);
236 }
237 free_client:
238 if (FLD_TEST_DRF(0050_CEUTILS, _FLAGS, _EXTERNAL, _FALSE, allocFlags))
239 {
240 // If client allocated client, we should not free it in RM
241 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->hClient);
242 }
243
244 cleanup:
245 portMemFree(pChannel);
246 return status;
247 }
248
249 void
ceutilsDestruct_IMPL(CeUtils * pCeUtils)250 ceutilsDestruct_IMPL
251 (
252 CeUtils *pCeUtils
253 )
254 {
255 OBJCHANNEL *pChannel = pCeUtils->pChannel;
256 OBJGPU *pGpu = pCeUtils->pGpu;
257 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
258 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
259
260 if ((pChannel->bClientUserd) && (pChannel->pControlGPFifo != NULL))
261 {
262 if (kbusIsBarAccessBlocked(GPU_GET_KERNEL_BUS(pGpu)))
263 {
264 //
265 // When PCIE is blocked, mappings should be created, used and torn
266 // down when they are used
267 //
268 NV_PRINTF(LEVEL_ERROR, "Leaked USERD mapping from ceUtils!\n");
269 }
270 else
271 {
272 memmgrMemDescEndTransfer(pMemoryManager, pChannel->pUserdMemdesc, TRANSFER_FLAGS_USE_BAR1);
273 pChannel->pControlGPFifo = NULL;
274 }
275 }
276
277 if (pChannel->pbCpuVA != NULL)
278 {
279 if (kbusIsBarAccessBlocked(GPU_GET_KERNEL_BUS(pGpu)))
280 {
281 NV_PRINTF(LEVEL_ERROR, "Leaked pushbuffer mapping!\n");
282 }
283 else
284 {
285 memmgrMemDescEndTransfer(pMemoryManager, pChannel->pChannelBufferMemdesc, TRANSFER_FLAGS_USE_BAR1);
286 pChannel->pbCpuVA = NULL;
287 }
288 }
289
290 if (pChannel->pTokenFromNotifier != NULL)
291 {
292 if (kbusIsBarAccessBlocked(GPU_GET_KERNEL_BUS(pGpu)))
293 {
294 NV_PRINTF(LEVEL_ERROR, "Leaked notifier mapping!\n");
295 }
296 else
297 {
298 memmgrMemDescEndTransfer(pMemoryManager, pChannel->pErrNotifierMemdesc, TRANSFER_FLAGS_USE_BAR1);
299 pChannel->pTokenFromNotifier = NULL;
300 }
301 }
302
303 // Resource server makes sure no leak can occur
304 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->hClient);
305 portMemFree(pChannel);
306 }
307
308 void
ceutilsServiceInterrupts_IMPL(CeUtils * pCeUtils)309 ceutilsServiceInterrupts_IMPL(CeUtils *pCeUtils)
310 {
311 OBJCHANNEL *pChannel = pCeUtils->pChannel;
312
313 //
314 // FIXME: Bug 2463959: objmemscrub is called with the rmDeviceGpuLock in the
315 // heapFree_IMPL->_stdmemPmaFree->pmaFreePages->scrubSubmitPages path.
316 // Yielding while holding the rmDeviceGpuLock can lead to deadlock. Instead,
317 // if the lock is held, service any interrupts on the owned CE to make progress.
318 // Bug 2527660 is filed to remove this change.
319 //
320 if (rmDeviceGpuLockIsOwner(pChannel->pGpu->gpuInstance))
321 {
322 channelServiceScrubberInterrupts(pChannel);
323 }
324 else
325 {
326 osSchedule();
327 }
328 }
329
330
331 static NvBool
_ceUtilsFastScrubEnabled(OBJCHANNEL * pChannel,CHANNEL_PB_INFO * pChannelPbInfo)332 _ceUtilsFastScrubEnabled
333 (
334 OBJCHANNEL *pChannel,
335 CHANNEL_PB_INFO *pChannelPbInfo
336 )
337 {
338 OBJGPU *pGpu = pChannel->pGpu;
339 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
340
341 if (!memmgrIsFastScrubberEnabled(pMemoryManager))
342 {
343 return NV_FALSE;
344 }
345
346 //
347 // Enable the memory fast scrubbing only when
348 // Channel was allocated as fastScrub channel
349 // We are doing a memset operation
350 // Memset pattern is 0
351 // DstPhysMode.target == LOCAL_FB
352 // Address is 4KB aligned
353 // LineLength is 4KB aligned
354 //
355
356 return ((pChannel->type == FAST_SCRUBBER_CHANNEL) &&
357 (!pChannelPbInfo->bCeMemcopy) &&
358 (pChannelPbInfo->pattern == 0) &&
359 (pChannelPbInfo->dstAddressSpace == ADDR_FBMEM) &&
360 (NV_IS_ALIGNED64(pChannelPbInfo->dstAddr, MEMUTIL_SCRUB_OFFSET_ALIGNMENT)) &&
361 (NV_IS_ALIGNED(pChannelPbInfo->size, MEMUTIL_SCRUB_LINE_LENGTH_ALIGNMENT)));
362 }
363
364
365 //
366 // Helper to deal with CE_MAX_BYTES_PER_LINE
367 // This function may modify some fileds in pChannelPbInfo
368 //
369 static NV_STATUS
_ceutilsSubmitPushBuffer(OBJCHANNEL * pChannel,NvBool bPipelined,NvBool bInsertFinishPayload,CHANNEL_PB_INFO * pChannelPbInfo)370 _ceutilsSubmitPushBuffer
371 (
372 OBJCHANNEL *pChannel,
373 NvBool bPipelined,
374 NvBool bInsertFinishPayload,
375 CHANNEL_PB_INFO * pChannelPbInfo
376 )
377 {
378 NV_STATUS status = NV_OK;
379 NvU32 methodsLength, putIndex = 0;
380
381 NV_ASSERT_OR_RETURN(pChannelPbInfo != NULL, NV_ERR_INVALID_ARGUMENT);
382 NV_ASSERT_OR_RETURN(pChannel != NULL, NV_ERR_INVALID_ARGUMENT);
383
384 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pChannel->pGpu);
385 NvBool bReleaseMapping = NV_FALSE;
386
387 //
388 // Use BAR1 if CPU access is allowed, otherwise allocate and init shadow
389 // buffer for DMA access
390 //
391 NvU32 transferFlags = (TRANSFER_FLAGS_USE_BAR1 |
392 TRANSFER_FLAGS_SHADOW_ALLOC |
393 TRANSFER_FLAGS_SHADOW_INIT_MEM);
394 NV_PRINTF(LEVEL_INFO, "Actual size of copying to be pushed: %x\n", pChannelPbInfo->size);
395
396 status = channelWaitForFreeEntry(pChannel, &putIndex);
397 if (status != NV_OK)
398 {
399 NV_PRINTF(LEVEL_ERROR, "Cannot get putIndex.\n");
400 return status;
401 }
402
403 if (pChannel->pbCpuVA == NULL)
404 {
405 pChannel->pbCpuVA = memmgrMemDescBeginTransfer(pMemoryManager, pChannel->pChannelBufferMemdesc,
406 transferFlags);
407 bReleaseMapping = NV_TRUE;
408 }
409 NV_ASSERT_OR_RETURN(pChannel->pbCpuVA != NULL, NV_ERR_GENERIC);
410
411 if (_ceUtilsFastScrubEnabled(pChannel, pChannelPbInfo))
412 {
413 methodsLength = channelFillPbFastScrub(pChannel, putIndex, bPipelined, bInsertFinishPayload, pChannelPbInfo);
414 }
415 else
416 {
417 if (pMemoryManager->bCePhysicalVidmemAccessNotSupported)
418 {
419 // Self-hosted Hopper only supports VA copy or fast scrubber
420 NV_ASSERT_OR_RETURN(pChannel->bUseVasForCeCopy, NV_ERR_NOT_SUPPORTED);
421 }
422
423 methodsLength = channelFillCePb(pChannel, putIndex, bPipelined, bInsertFinishPayload, pChannelPbInfo);
424 }
425
426 if (bReleaseMapping)
427 {
428 memmgrMemDescEndTransfer(pMemoryManager, pChannel->pChannelBufferMemdesc, transferFlags);
429 pChannel->pbCpuVA = NULL;
430 }
431
432 if (methodsLength == 0)
433 {
434 NV_PRINTF(LEVEL_ERROR, "Cannot push methods to channel.\n");
435 return NV_ERR_NO_FREE_FIFOS;
436 }
437
438 //
439 // Pushbuffer can be written in a batch, but GPFIFO and doorbell require
440 // careful ordering so we do each write one-by-one
441 //
442 status = channelFillGpFifo(pChannel, putIndex, methodsLength);
443 if (status != NV_OK)
444 {
445 NV_PRINTF(LEVEL_ERROR, "Channel operation failures during memcopy\n");
446 return status;
447 }
448
449 pChannel->lastSubmittedEntry = putIndex;
450
451 return status;
452 }
453
454
455 NV_STATUS
ceutilsMemset_IMPL(CeUtils * pCeUtils,CEUTILS_MEMSET_PARAMS * pParams)456 ceutilsMemset_IMPL
457 (
458 CeUtils *pCeUtils,
459 CEUTILS_MEMSET_PARAMS *pParams
460 )
461 {
462 OBJCHANNEL *pChannel = pCeUtils->pChannel;
463 NV_STATUS status = NV_OK;
464
465 NvU32 pteArraySize;
466 NvU64 offset, memsetLength, size, pageGranularity;
467 NvBool bContiguous;
468
469 MEMORY_DESCRIPTOR *pMemDesc = pParams->pMemDesc;
470 CHANNEL_PB_INFO channelPbInfo = {0};
471
472 NvBool bPipelined = pParams->flags & NV0050_CTRL_MEMSET_FLAGS_PIPELINED;
473
474 if (pMemDesc == NULL)
475 {
476 NV_PRINTF(LEVEL_ERROR, "Invalid memdesc for CeUtils memset.\n");
477 return NV_ERR_INVALID_ARGUMENT;
478 }
479
480 if (pMemDesc->pGpu != pCeUtils->pChannel->pGpu)
481 {
482 NV_PRINTF(LEVEL_ERROR, "Invalid memory descriptor passed.\n");
483 return NV_ERR_INVALID_ARGUMENT;
484 }
485
486 size = memdescGetSize(pMemDesc);
487 pteArraySize = memdescGetPteArraySize(pMemDesc, AT_GPU);
488 bContiguous = (pMemDesc->_flags & MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS) || (pteArraySize == 1);
489
490 if (pParams->offset >= size)
491 {
492 NV_PRINTF(LEVEL_ERROR, "Invalid offset passed for the memdesc.\n");
493 return NV_ERR_INVALID_ARGUMENT;
494 }
495
496 NV_PRINTF(LEVEL_INFO, "CeUtils Args to memset - offset: %llx, size: %llx \n",
497 pParams->offset, pParams->length);
498
499 if ((pParams->length == 0) || (pParams->length > (size - pParams->offset)))
500 {
501 NV_PRINTF(LEVEL_ERROR, "Invalid memset length passed.\n");
502 return NV_ERR_INVALID_ARGUMENT;
503 }
504
505 channelPbInfo.bCeMemcopy = NV_FALSE;
506 channelPbInfo.payload = pCeUtils->lastSubmittedPayload + 1;
507 pCeUtils->lastSubmittedPayload = channelPbInfo.payload;
508
509 channelPbInfo.pattern = pParams->pattern;
510 channelPbInfo.dstAddressSpace = memdescGetAddressSpace(pMemDesc);
511 channelPbInfo.dstCpuCacheAttrib = pMemDesc->_cpuCacheAttrib;
512
513 pageGranularity = pMemDesc->pageArrayGranularity;
514 memsetLength = pParams->length;
515 offset = pParams->offset;
516
517 do
518 {
519 NvU64 maxContigSize = bContiguous ? memsetLength : (pageGranularity - offset % pageGranularity);
520 NvU32 memsetSizeContig = (NvU32)NV_MIN(NV_MIN(memsetLength, maxContigSize), CE_MAX_BYTES_PER_LINE);
521
522 channelPbInfo.dstAddr = memdescGetPhysAddr(pMemDesc, AT_GPU, offset);
523
524 NV_PRINTF(LEVEL_INFO, "CeUtils Memset dstAddr: %llx, size: %x\n",
525 channelPbInfo.dstAddr, memsetSizeContig);
526
527 channelPbInfo.size = memsetSizeContig;
528 status = _ceutilsSubmitPushBuffer(pChannel, bPipelined, memsetSizeContig == memsetLength, &channelPbInfo);
529 if (status != NV_OK)
530 {
531 NV_PRINTF(LEVEL_ERROR, "Cannot submit push buffer for memset.\n");
532 return status;
533 }
534
535 // Allow _LAUNCH_DMA methods that belong to the same memset operation to be pipelined after each other, as there are no dependencies
536 bPipelined = NV_TRUE;
537
538 memsetLength -= memsetSizeContig;
539 offset += memsetSizeContig;
540 } while (memsetLength != 0);
541
542 if (pParams->flags & NV0050_CTRL_MEMSET_FLAGS_ASYNC)
543 {
544 NV_PRINTF(LEVEL_INFO, "Async memset payload returned: 0x%x\n", channelPbInfo.payload);
545 pParams->submittedWorkId = channelPbInfo.payload;
546 }
547 else
548 {
549 // Check semaProgress and then timeout
550 status = channelWaitForFinishPayload(pChannel, channelPbInfo.payload);
551 if (status == NV_OK)
552 {
553 NV_PRINTF(LEVEL_INFO, "Work was done from RM PoV lastSubmitted = 0x%x\n", channelPbInfo.payload);
554 }
555 }
556
557 return status;
558 }
559
560 NV_STATUS
ceutilsMemcopy_IMPL(CeUtils * pCeUtils,CEUTILS_MEMCOPY_PARAMS * pParams)561 ceutilsMemcopy_IMPL
562 (
563 CeUtils *pCeUtils,
564 CEUTILS_MEMCOPY_PARAMS *pParams
565 )
566 {
567 OBJCHANNEL *pChannel = pCeUtils->pChannel;
568 NV_STATUS status = NV_OK;
569
570 NvU64 srcSize, dstSize, copyLength, srcPageGranularity, dstPageGranularity;
571 NvBool bSrcContig, bDstContig;
572
573 CHANNEL_PB_INFO channelPbInfo = {0};
574 MEMORY_DESCRIPTOR *pDstMemDesc = pParams->pDstMemDesc;
575 MEMORY_DESCRIPTOR *pSrcMemDesc = pParams->pSrcMemDesc;
576
577 NvU64 length = pParams->length;
578 NvU64 srcOffset = pParams->srcOffset;
579 NvU64 dstOffset = pParams->dstOffset;
580
581 NvBool bPipelined = pParams->flags & NV0050_CTRL_MEMCOPY_FLAGS_PIPELINED;
582
583 // Validate params
584 if ((pSrcMemDesc == NULL) || (pDstMemDesc == NULL))
585 {
586 NV_PRINTF(LEVEL_ERROR, "Src/Dst Memory descriptor should be valid.\n");
587 return NV_ERR_INVALID_ARGUMENT;
588 }
589
590 if ((pSrcMemDesc->pGpu != pCeUtils->pChannel->pGpu) ||
591 (pDstMemDesc->pGpu != pCeUtils->pChannel->pGpu))
592 {
593 NV_PRINTF(LEVEL_ERROR, "CeUtils does not support p2p copies right now. \n");
594 return NV_ERR_INVALID_ARGUMENT;
595 }
596
597 srcSize = memdescGetSize(pSrcMemDesc);
598 dstSize = memdescGetSize(pDstMemDesc);
599
600 if ((srcOffset >= srcSize) || (dstOffset >= dstSize))
601 {
602 NV_PRINTF(LEVEL_ERROR, "Invalid offset passed for the src/dst memdesc.\n");
603 return NV_ERR_INVALID_ARGUMENT;
604 }
605
606 if ((length == 0) ||
607 (srcOffset + length > srcSize) || (dstOffset + length > dstSize))
608 {
609 NV_PRINTF(LEVEL_ERROR, "Invalid memcopy length.\n");
610 return NV_ERR_INVALID_ARGUMENT;
611 }
612
613 channelPbInfo.bCeMemcopy = NV_TRUE;
614 channelPbInfo.payload = pCeUtils->lastSubmittedPayload + 1;
615 pCeUtils->lastSubmittedPayload = channelPbInfo.payload;
616
617 channelPbInfo.srcAddressSpace = memdescGetAddressSpace(pSrcMemDesc);
618 channelPbInfo.dstAddressSpace = memdescGetAddressSpace(pDstMemDesc);
619
620 channelPbInfo.srcCpuCacheAttrib = pSrcMemDesc->_cpuCacheAttrib;
621 channelPbInfo.dstCpuCacheAttrib = pDstMemDesc->_cpuCacheAttrib;
622
623 channelPbInfo.bSecureCopy = pParams->bSecureCopy;
624 channelPbInfo.bEncrypt = pParams->bEncrypt;
625 channelPbInfo.authTagAddr = pParams->authTagAddr;
626 channelPbInfo.encryptIvAddr = pParams->encryptIvAddr;
627
628 srcPageGranularity = pSrcMemDesc->pageArrayGranularity;
629 dstPageGranularity = pDstMemDesc->pageArrayGranularity;
630 bSrcContig = memdescGetContiguity(pSrcMemDesc, AT_GPU);
631 bDstContig = memdescGetContiguity(pDstMemDesc, AT_GPU);
632
633 copyLength = length;
634
635 do
636 {
637 //
638 // This algorithm finds the maximum contig region from both src and dst
639 // for each copy and iterate until we submitted the whole range to CE
640 //
641 NvU64 maxContigSizeSrc = bSrcContig ? copyLength : (srcPageGranularity - srcOffset % srcPageGranularity);
642 NvU64 maxContigSizeDst = bDstContig ? copyLength : (dstPageGranularity - dstOffset % dstPageGranularity);
643 NvU32 copySizeContig = (NvU32)NV_MIN(NV_MIN(copyLength, NV_MIN(maxContigSizeSrc, maxContigSizeDst)), CE_MAX_BYTES_PER_LINE);
644
645 channelPbInfo.srcAddr = memdescGetPhysAddr(pSrcMemDesc, AT_GPU, srcOffset);
646 channelPbInfo.dstAddr = memdescGetPhysAddr(pDstMemDesc, AT_GPU, dstOffset);
647
648 NV_PRINTF(LEVEL_INFO, "CeUtils Memcopy dstAddr: %llx, srcAddr: %llx, size: %x\n",
649 channelPbInfo.dstAddr, channelPbInfo.srcAddr, copySizeContig);
650
651 channelPbInfo.size = copySizeContig;
652 status = _ceutilsSubmitPushBuffer(pChannel, bPipelined, copySizeContig == copyLength, &channelPbInfo);
653 if (status != NV_OK)
654 {
655 NV_PRINTF(LEVEL_ERROR, "Cannot submit push buffer for memcopy.\n");
656 return status;
657 }
658
659 // Allow _LAUNCH_DMA methods that belong to the same copy operation to be pipelined after each other, as there are no dependencies
660 bPipelined = NV_TRUE;
661
662 copyLength -= copySizeContig;
663 srcOffset += copySizeContig;
664 dstOffset += copySizeContig;
665 } while (copyLength != 0);
666
667 if (pParams->flags & NV0050_CTRL_MEMSET_FLAGS_ASYNC)
668 {
669 NV_PRINTF(LEVEL_INFO, "Async memset payload returned: 0x%x\n", channelPbInfo.payload);
670 pParams->submittedWorkId = channelPbInfo.payload;
671 }
672 else
673 {
674 // Check semaProgress and then timeout
675 status = channelWaitForFinishPayload(pChannel, channelPbInfo.payload);
676 if (status == NV_OK)
677 {
678 NV_PRINTF(LEVEL_INFO, "Work was done from RM PoV lastSubmitted = 0x%x\n", channelPbInfo.payload);
679 }
680 }
681
682 return status;
683 }
684
685
686 // This function updates pCeUtils->lastCompletedPayload and handles wrap-around
687 NvU64
ceutilsUpdateProgress_IMPL(CeUtils * pCeUtils)688 ceutilsUpdateProgress_IMPL
689 (
690 CeUtils *pCeUtils
691 )
692 {
693 NV_ASSERT((pCeUtils != NULL) && (pCeUtils->pChannel != NULL));
694
695 NvU32 hwCurrentCompletedPayload = 0;
696 NvU64 swLastCompletedPayload = pCeUtils->lastCompletedPayload;
697
698 //
699 // CeUtils uses 64 bit index to track the work submitted. But HW supports
700 // only 32 bit semaphore. The current completed Id is calculated here, based
701 // on the lastSubmittedPayload and current HW semaphore value.
702 //
703 hwCurrentCompletedPayload = READ_CHANNEL_PAYLOAD_SEMA(pCeUtils->pChannel);
704
705 // No work has been completed since we checked last time
706 if (hwCurrentCompletedPayload == (NvU32)swLastCompletedPayload)
707 {
708 return swLastCompletedPayload;
709 }
710
711 // Check for wrap around case. Increment the upper 32 bits
712 if (hwCurrentCompletedPayload < (NvU32)swLastCompletedPayload)
713 {
714 swLastCompletedPayload += 0x100000000ULL;
715 }
716
717 // Update lower 32 bits regardless if wrap-around happened
718 swLastCompletedPayload &= 0xFFFFFFFF00000000ULL;
719 swLastCompletedPayload |= (NvU64)hwCurrentCompletedPayload;
720
721 pCeUtils->lastCompletedPayload = swLastCompletedPayload;
722 return swLastCompletedPayload;
723 }
724
725 NV_STATUS
ceutilsapiCtrlCmdCheckProgress_IMPL(CeUtilsApi * pCeUtilsApi,NV0050_CTRL_CHECK_PROGRESS_PARAMS * pParams)726 ceutilsapiCtrlCmdCheckProgress_IMPL
727 (
728 CeUtilsApi *pCeUtilsApi,
729 NV0050_CTRL_CHECK_PROGRESS_PARAMS *pParams
730 )
731 {
732 if (pParams->submittedWorkId <= ceutilsUpdateProgress(pCeUtilsApi->pCeUtils))
733 {
734 pParams->result = NV0050_CTRL_CHECK_PROGRESS_RESULT_FINISHED;
735 }
736
737 return NV_OK;
738 }
739
740 NV_STATUS
ceutilsapiConstruct_IMPL(CeUtilsApi * pCeUtilsApi,CALL_CONTEXT * pCallContext,RS_RES_ALLOC_PARAMS_INTERNAL * pParams)741 ceutilsapiConstruct_IMPL
742 (
743 CeUtilsApi *pCeUtilsApi,
744 CALL_CONTEXT *pCallContext,
745 RS_RES_ALLOC_PARAMS_INTERNAL *pParams
746 )
747 {
748 NV0050_ALLOCATION_PARAMETERS *pAllocParams = pParams->pAllocParams;
749
750 if (FLD_TEST_DRF(0050_CEUTILS, _FLAGS, _EXTERNAL, _TRUE, pAllocParams->flags))
751 {
752 NV_PRINTF(LEVEL_ERROR, "CeUtils: unsupported flags = 0x%llx\n", pAllocParams->flags);
753 return NV_ERR_NOT_SUPPORTED;
754 }
755
756 return objCreate(&pCeUtilsApi->pCeUtils, pCeUtilsApi, CeUtils, GPU_RES_GET_GPU(pCeUtilsApi), NULL, pAllocParams);
757 }
758
759 void
ceutilsapiDestruct_IMPL(CeUtilsApi * pCeUtilsApi)760 ceutilsapiDestruct_IMPL
761 (
762 CeUtilsApi *pCeUtilsApi
763 )
764 {
765 objDelete(pCeUtilsApi->pCeUtils);
766 }
767
768 NV_STATUS
ceutilsapiCtrlCmdMemset_IMPL(CeUtilsApi * pCeUtilsApi,NV0050_CTRL_MEMSET_PARAMS * pParams)769 ceutilsapiCtrlCmdMemset_IMPL
770 (
771 CeUtilsApi *pCeUtilsApi,
772 NV0050_CTRL_MEMSET_PARAMS *pParams
773 )
774 {
775 NV_STATUS status = NV_OK;
776 NvHandle hClient = RES_GET_CLIENT_HANDLE(pCeUtilsApi);
777 RsResourceRef *pPhysmemRef;
778 MEMORY_DESCRIPTOR *pMemDesc = NULL;
779 CEUTILS_MEMSET_PARAMS internalParams = {0};
780
781 if (pParams->hMemory == 0)
782 {
783 return NV_ERR_INVALID_ARGUMENT;
784 }
785
786 status = serverutilGetResourceRef(hClient, pParams->hMemory, &pPhysmemRef);
787 if (status != NV_OK)
788 {
789 NV_PRINTF(LEVEL_ERROR, "Failed to get resource in resource server for physical memory handle.\n");
790 return status;
791 }
792 pMemDesc = (dynamicCast(pPhysmemRef->pResource, Memory))->pMemDesc;
793
794 internalParams.pMemDesc = pMemDesc;
795 internalParams.offset = pParams->offset;
796 internalParams.length = pParams->length;
797 internalParams.pattern = pParams->pattern;
798 internalParams.flags = pParams->flags;
799
800 status = ceutilsMemset(pCeUtilsApi->pCeUtils, &internalParams);
801 if (status == NV_OK)
802 {
803 pParams->submittedWorkId = internalParams.submittedWorkId;
804 }
805
806 return status;
807 }
808
809 NV_STATUS
ceutilsapiCtrlCmdMemcopy_IMPL(CeUtilsApi * pCeUtilsApi,NV0050_CTRL_MEMCOPY_PARAMS * pParams)810 ceutilsapiCtrlCmdMemcopy_IMPL
811 (
812 CeUtilsApi *pCeUtilsApi,
813 NV0050_CTRL_MEMCOPY_PARAMS *pParams
814 )
815 {
816 NV_STATUS status = NV_OK;
817 NvHandle hClient = RES_GET_CLIENT_HANDLE(pCeUtilsApi);
818 RsResourceRef *pSrcPhysmemRef;
819 RsResourceRef *pDstPhysmemRef;
820 MEMORY_DESCRIPTOR *pSrcMemDesc = NULL;
821 MEMORY_DESCRIPTOR *pDstMemDesc = NULL;
822 CEUTILS_MEMCOPY_PARAMS internalParams = {0};
823
824 if ((pParams->hSrcMemory == 0) || (pParams->hDstMemory == 0))
825 {
826 return NV_ERR_INVALID_ARGUMENT;
827 }
828
829 status = serverutilGetResourceRef(hClient, pParams->hDstMemory, &pDstPhysmemRef);
830 if (status != NV_OK)
831 {
832 NV_PRINTF(LEVEL_ERROR, "Failed to get resource in resource server for physical memory handle.\n");
833 return status;
834 }
835 pDstMemDesc = (dynamicCast(pDstPhysmemRef->pResource, Memory))->pMemDesc;
836
837 status = serverutilGetResourceRef(hClient, pParams->hSrcMemory, &pSrcPhysmemRef);
838 if (status != NV_OK)
839 {
840 NV_PRINTF(LEVEL_ERROR, "Failed to get resource in resource server for physical memory handle.\n");
841 return status;
842 }
843 pSrcMemDesc = (dynamicCast(pSrcPhysmemRef->pResource, Memory))->pMemDesc;
844
845 internalParams.pSrcMemDesc = pSrcMemDesc;
846 internalParams.pDstMemDesc = pDstMemDesc;
847 internalParams.srcOffset = pParams->srcOffset;
848 internalParams.dstOffset = pParams->dstOffset;
849 internalParams.length = pParams->length;
850 internalParams.flags = pParams->flags;
851
852 status = ceutilsMemcopy(pCeUtilsApi->pCeUtils, &internalParams);
853 if (status == NV_OK)
854 {
855 pParams->submittedWorkId = internalParams.submittedWorkId;
856 }
857
858 return status;
859 }
860