1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2012-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "core/core.h" 25 #include "gpu/gpu.h" 26 #include "gpu/device/device.h" 27 #include "os/os.h" 28 #include "gpu/bus/kern_bus.h" 29 #include "gpu/mem_mgr/mem_mgr.h" 30 #include "gpu/mem_mgr/heap.h" 31 #include "gpu/mem_mgr/mem_scrub.h" 32 #include "kernel/gpu/mig_mgr/kernel_mig_manager.h" 33 #include "gpu/mem_mgr/mem_desc.h" 34 #include "gpu/ce/kernel_ce.h" 35 #include "gpu/ce/kernel_ce_private.h" 36 #include "mem_mgr/gpu_vaspace.h" 37 #include "core/locks.h" 38 #include "nvRmReg.h" 39 #include "rmapi/rs_utils.h" 40 #include "mem_mgr/ctx_buf_pool.h" 41 #include "vgpu/rpc.h" 42 #include "kernel/gpu/fifo/kernel_channel.h" 43 #include "platform/chipset/chipset.h" 44 #include "platform/sli/sli.h" 45 46 #include "class/clc0b5sw.h" 47 #include "class/cla06fsubch.h" // NVA06F_SUBCHANNEL_COPY_ENGINE 48 #include "class/cl003e.h" // NV01_MEMORY_SYSTEM 49 #include "class/cl0040.h" // NV01_MEMORY_LOCAL_USER 50 #include "class/cl50a0.h" // NV50_MEMORY_VIRTUAL 51 #include "class/cl00c2.h" // NV01_MEMORY_LOCAL_PHYSICAL 52 #include "class/clb0b5.h" // MAXWELL_DMA_COPY_A 53 #include "class/clc8b5.h" // HOPPER_DMA_COPY_A 54 #include "class/cl90f1.h" // FERMI_VASPACE_A 55 56 #define NONSTALL_METHOD_SIZE 8 57 #define SEMAPHORE_ONLY_METHOD_SIZE 32 58 #define MAX_EXTRA_PAYLOAD (NONSTALL_METHOD_SIZE + SEMAPHORE_ONLY_METHOD_SIZE) 59 60 61 static NV_STATUS _memUtilsChannelAllocatePB_GM107(OBJGPU *pGpu, MemoryManager *pMemoryManager, OBJCHANNEL *pChannel); 62 static NV_STATUS _memUtilsAllocateChannel(OBJGPU *pGpu, MemoryManager *pMemoryManager, NvHandle hClientId, 63 NvHandle hDeviceId, NvHandle hChannelId, NvHandle hObjectError, 64 NvHandle hObjectBuffer, OBJCHANNEL *pChannel); 65 static NV_STATUS _memUtilsAllocCe_GM107(OBJGPU *pGpu, MemoryManager *pMemoryManager, OBJCHANNEL *pChannel, 66 NvHandle hClientId, NvHandle hDeviceId, NvHandle hChannelId, NvHandle hCopyObjectId); 67 static NV_STATUS _memUtilsAllocateUserD(OBJGPU *pGpu, MemoryManager *pMemoryManager, NvHandle hClientId, 68 NvHandle hDeviceId, OBJCHANNEL *pChannel); 69 static NV_STATUS _memUtilsMapUserd_GM107(OBJGPU *pGpu, MemoryManager *pMemoryManager, 70 OBJCHANNEL *pChannel, NvHandle hClientId, NvHandle hDeviceId, 71 NvHandle hChannelId, NvBool bUseRmApiForBar1); 72 static NV_STATUS _memUtilsAllocateReductionSema(OBJGPU *pGpu, MemoryManager *pMemoryManager, OBJCHANNEL *pChannel); 73 static NvU32 _ceChannelScheduleWork_GM107(OBJGPU *pGpu, MemoryManager *pMemoryManager, OBJCHANNEL *pChannel, 74 RmPhysAddr src, NV_ADDRESS_SPACE srcAddressSpace, NvU32 srcCpuCacheAttrib, 75 RmPhysAddr dst, NV_ADDRESS_SPACE dstAddressSpace, NvU32 dstCpuCacheAttrib, 76 NvU64 size, NvBool blocking, NvBool insertFinishPayload, NvBool bMemcopy); 77 static void _ceChannelUpdateGpFifo_GM107(OBJGPU *pGpu, MemoryManager *pMemoryManager, OBJCHANNEL *pChannel, 78 NvU32 gpOffset,NvU32 gpSize); 79 static NvU32 _ceChannelPushMethodsBlock_GM107(OBJGPU *pGpu, MemoryManager *pMemoryManager, OBJCHANNEL *pChannel, 80 RmPhysAddr src, NV_ADDRESS_SPACE srcAddressSpace, NvU32 srcCpuCacheAttrib, 81 RmPhysAddr dst, NV_ADDRESS_SPACE dstAddressSpace, NvU32 dstCpuCacheAttrib, 82 NvU64 size, NvU32 **pPtr, NvBool addPayloadSema, 83 NvBool addNonStallIntr, NvBool addFinishPayload, NvBool bMemcopy); 84 static NvU32 _getSpaceInPb(OBJCHANNEL *pChannel); 85 static NvBool _checkSynchronization(OBJGPU *pGpu, MemoryManager *pMemoryManager, OBJCHANNEL *pChannel, NvU32 block); 86 87 static NV_STATUS 88 _memUtilsAllocateReductionSema 89 ( 90 OBJGPU *pGpu, 91 MemoryManager *pMemoryManager, 92 OBJCHANNEL *pChannel 93 ) 94 { 95 96 NV_MEMORY_ALLOCATION_PARAMS memAllocParams; 97 NV_STATUS rmStatus; 98 NvU32 i; 99 NV_STATUS lockStatus; 100 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 101 102 rmGpuLocksRelease(GPUS_LOCK_FLAGS_NONE, NULL); 103 // allocate physical memory for a bit map semaphore 104 portMemSet(&memAllocParams, 0, sizeof(memAllocParams)); 105 106 memAllocParams.owner = HEAP_OWNER_RM_CLIENT_GENERIC; 107 memAllocParams.type = NVOS32_TYPE_IMAGE; 108 memAllocParams.size = (((pChannel->blockCount + 31)/32)*4); 109 memAllocParams.attr = 0; 110 memAllocParams.attr = DRF_DEF(OS32, _ATTR, _LOCATION, _PCI); 111 // memAllocParams.attr |= NVOS32_ATTR_COHERENCY_WRITE_COMBINE; 112 memAllocParams.attr2 = NVOS32_ATTR2_NONE; 113 memAllocParams.flags = 0; 114 115 // 116 // When APM feature is enabled all RM internal sysmem allocations must 117 // be in unprotected memory 118 // When Hopper CC is enabled all RM internal sysmem allocations that 119 // are required to be accessed from GPU should be in unprotected memory 120 // but those sysmem allocations that are not required to be accessed from 121 // GPU should be in protected memory. 122 // 123 124 NV_ASSERT_OK_OR_RETURN( 125 pRmApi->AllocWithHandle(pRmApi, 126 pChannel->hClient, 127 pChannel->deviceId, 128 pChannel->bitMapSemPhysId, 129 NV01_MEMORY_SYSTEM, 130 &memAllocParams, 131 sizeof(memAllocParams))); 132 133 // allocate virtual memory for a bit map semaphore 134 portMemSet(&memAllocParams, 0, sizeof(memAllocParams)); 135 memAllocParams.owner = HEAP_OWNER_RM_CLIENT_GENERIC; 136 memAllocParams.type = NVOS32_TYPE_IMAGE; 137 memAllocParams.size = (((pChannel->blockCount + 31)/32)*4); 138 memAllocParams.attr = DRF_DEF(OS32, _ATTR, _LOCATION, _PCI); 139 memAllocParams.attr2 = NVOS32_ATTR2_NONE; 140 memAllocParams.flags = 0; 141 memAllocParams.flags |= NVOS32_ALLOC_FLAGS_VIRTUAL; 142 143 NV_ASSERT_OK_OR_RETURN( 144 pRmApi->AllocWithHandle(pRmApi, 145 pChannel->hClient, 146 pChannel->deviceId, 147 pChannel->bitMapSemVirtId, 148 NV50_MEMORY_VIRTUAL, 149 &memAllocParams, 150 sizeof(memAllocParams))); 151 152 lockStatus = rmGpuLocksAcquire(GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_MEM); 153 if(lockStatus != NV_OK) 154 { 155 NV_ASSERT_FAILED("Could not get back lock after allocating reduction sema"); 156 return NV_ERR_GENERIC; 157 } 158 159 NV_CHECK_OK_OR_GOTO( 160 rmStatus, 161 LEVEL_ERROR, 162 pRmApi->Map(pRmApi, 163 pChannel->hClient, 164 pChannel->deviceId, 165 pChannel->bitMapSemVirtId, 166 pChannel->bitMapSemPhysId, //hMemory, 167 0, 168 (((pChannel->blockCount + 31)/32)*4), 169 NV04_MAP_MEMORY_FLAGS_NONE, 170 &pChannel->pbGpuBitMapVA), 171 exit_sema_creation); 172 173 NV_CHECK_OK_OR_GOTO( 174 rmStatus, 175 LEVEL_ERROR, 176 pRmApi->MapToCpu(pRmApi, 177 pChannel->hClient, 178 pChannel->deviceId, 179 pChannel->bitMapSemPhysId, 180 0, 181 (((pChannel->blockCount + 31)/32)*4), 182 (void **)&pChannel->pbBitMapVA, 183 0), 184 exit_sema_creation); 185 186 for(i = 0; i < (((pChannel->blockCount + 31) / 32) * 4);) 187 { 188 MEM_WR32((NvU8*)pChannel->pbBitMapVA + (i), 0); 189 i = i + 4; 190 } 191 192 return NV_OK; 193 exit_sema_creation: 194 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->hClient); 195 NV_PRINTF(LEVEL_INFO, "end NV_STATUS=0x%08x\n", rmStatus); 196 return rmStatus; 197 } 198 199 static NV_STATUS 200 _memUtilsChannelAllocatePB_GM107 201 ( 202 OBJGPU *pGpu, 203 MemoryManager *pMemoryManager, 204 OBJCHANNEL *pChannel 205 206 // OBJMEMUTILS *to be added here 207 ) 208 { 209 NV_STATUS rmStatus = NV_OK; 210 NV_MEMORY_ALLOCATION_PARAMS memAllocParams; 211 NvHandle hDevice; 212 NvHandle hPhysMem; 213 NvU64 size; 214 NvHandle hVirtMem; 215 NvU32 hClass; 216 NvU32 attr; 217 NvU32 flags = 0; 218 NvU32 attrNotifier = NVOS32_ATTR_NONE; 219 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 220 221 // Apply registry overrides to channel pushbuffer. 222 switch (DRF_VAL(_REG_STR_RM, _INST_LOC_4, _CHANNEL_PUSHBUFFER, pGpu->instLocOverrides4)) 223 { 224 case NV_REG_STR_RM_INST_LOC_4_CHANNEL_PUSHBUFFER_VID: 225 hClass = NV01_MEMORY_LOCAL_USER; 226 attr = DRF_DEF(OS32, _ATTR, _LOCATION, _VIDMEM) | 227 DRF_DEF(OS32, _ATTR, _COHERENCY, _UNCACHED); 228 229 flags = NVOS32_ALLOC_FLAGS_PERSISTENT_VIDMEM; 230 if (!IS_MIG_IN_USE(pGpu)) 231 { 232 attr |= DRF_DEF(OS32, _ATTR, _ALLOCATE_FROM_RESERVED_HEAP, _YES); 233 } 234 attrNotifier = attr; 235 break; 236 237 case NV_REG_STR_RM_INST_LOC_4_CHANNEL_PUSHBUFFER_COH: 238 hClass = NV01_MEMORY_SYSTEM; 239 attr = DRF_DEF(OS32, _ATTR, _LOCATION, _PCI) | 240 DRF_DEF(OS32, _ATTR, _COHERENCY, _CACHED); 241 attrNotifier = attr; 242 break; 243 244 case NV_REG_STR_RM_INST_LOC_4_CHANNEL_PUSHBUFFER_NCOH: 245 hClass = NV01_MEMORY_SYSTEM; 246 attr = DRF_DEF(OS32, _ATTR, _LOCATION, _PCI) | 247 DRF_DEF(OS32, _ATTR, _COHERENCY, _UNCACHED); 248 attrNotifier = attr; 249 break; 250 251 case NV_REG_STR_RM_INST_LOC_4_CHANNEL_PUSHBUFFER_DEFAULT: 252 default: 253 hClass = NV01_MEMORY_SYSTEM; 254 attr = DRF_DEF(OS32, _ATTR, _LOCATION, _PCI) | 255 DRF_DEF(OS32, _ATTR, _COHERENCY, _UNCACHED); 256 257 // 258 // The work submit token is read from notifier memory to support 259 // VM migration for the memory scrubber. The token is read from 260 // the notifier memory every time when the scrubber submits the work. 261 // It will help performance by changing the default setting of 262 // the notifier memory to be cached. 263 // 264 attrNotifier = DRF_DEF(OS32, _ATTR, _LOCATION, _PCI) | 265 DRF_DEF(OS32, _ATTR, _COHERENCY, _CACHED); 266 break; 267 } 268 269 hDevice = pChannel->deviceId; 270 hPhysMem = pChannel->physMemId; 271 hVirtMem = pChannel->pushBufferId; 272 size = pChannel->channelSize; 273 274 LOCK_ASSERT_AND_RETURN(!rmGpuLockIsOwner()); 275 // allocate the physical memory 276 portMemSet(&memAllocParams, 0, sizeof(memAllocParams)); 277 memAllocParams.owner = HEAP_OWNER_RM_CLIENT_GENERIC; 278 memAllocParams.type = NVOS32_TYPE_IMAGE; 279 memAllocParams.size = size; 280 memAllocParams.attr = attr; 281 memAllocParams.attr2 = NVOS32_ATTR2_NONE; 282 memAllocParams.flags = flags; 283 memAllocParams.internalflags = NVOS32_ALLOC_INTERNAL_FLAGS_SKIP_SCRUB; 284 285 // 286 // When APM is enabled all RM internal allocations must to go to 287 // unprotected memory irrespective of vidmem or sysmem 288 // When Hopper CC is enabled all RM internal sysmem allocations that 289 // are required to be accessed from GPU should be in unprotected memory 290 // but all vidmem allocations must go to protected memory 291 // 292 if (gpuIsApmFeatureEnabled(pGpu) || 293 FLD_TEST_DRF(OS32, _ATTR, _LOCATION, _PCI, memAllocParams.attr)) 294 { 295 memAllocParams.attr2 |= DRF_DEF(OS32, _ATTR2, _MEMORY_PROTECTION, 296 _UNPROTECTED); 297 } 298 299 NV_CHECK_OK_OR_RETURN( 300 LEVEL_ERROR, 301 pRmApi->AllocWithHandle(pRmApi, 302 pChannel->hClient, 303 hDevice, 304 hPhysMem, 305 hClass, 306 &memAllocParams, 307 sizeof(memAllocParams))); 308 309 // allocate the Virtual memory 310 portMemSet(&memAllocParams, 0, sizeof(memAllocParams)); 311 memAllocParams.owner = HEAP_OWNER_RM_CLIENT_GENERIC; 312 memAllocParams.type = NVOS32_TYPE_IMAGE; 313 memAllocParams.size = size; 314 memAllocParams.attr = DRF_DEF(OS32, _ATTR, _LOCATION, _PCI); 315 memAllocParams.attr2 = NVOS32_ATTR2_NONE; 316 memAllocParams.flags |= NVOS32_ALLOC_FLAGS_VIRTUAL; 317 memAllocParams.hVASpace = pChannel->hVASpaceId; 318 319 NV_CHECK_OK_OR_RETURN( 320 LEVEL_ERROR, 321 pRmApi->AllocWithHandle(pRmApi, 322 pChannel->hClient, 323 hDevice, 324 hVirtMem, 325 NV50_MEMORY_VIRTUAL, 326 &memAllocParams, 327 sizeof(memAllocParams))); 328 329 // allocate the physmem for the notifier 330 portMemSet(&memAllocParams, 0, sizeof(memAllocParams)); 331 memAllocParams.owner = HEAP_OWNER_RM_CLIENT_GENERIC; 332 memAllocParams.type = NVOS32_TYPE_IMAGE; 333 memAllocParams.size = pChannel->channelNotifierSize; 334 memAllocParams.attr = attrNotifier; 335 memAllocParams.attr2 = NVOS32_ATTR2_NONE; 336 memAllocParams.flags = 0; 337 memAllocParams.internalflags = NVOS32_ALLOC_INTERNAL_FLAGS_SKIP_SCRUB; 338 339 // 340 // When APM is enabled all RM internal allocations must to go to 341 // unprotected memory irrespective of vidmem or sysmem 342 // When Hopper CC is enabled all RM internal sysmem allocations that 343 // are required to be accessed from GPU should be in unprotected memory 344 // but all vidmem allocations must go to protected memory 345 // 346 if (gpuIsApmFeatureEnabled(pGpu) || 347 FLD_TEST_DRF(OS32, _ATTR, _LOCATION, _PCI, memAllocParams.attr)) 348 { 349 memAllocParams.attr2 |= DRF_DEF(OS32, _ATTR2, _MEMORY_PROTECTION, 350 _UNPROTECTED); 351 } 352 353 NV_CHECK_OK_OR_RETURN( 354 LEVEL_ERROR, 355 pRmApi->AllocWithHandle(pRmApi, 356 pChannel->hClient, 357 hDevice, 358 pChannel->errNotifierIdPhys, 359 hClass, 360 &memAllocParams, 361 sizeof(memAllocParams))); 362 363 // allocate Virtual Memory for the notifier 364 portMemSet(&memAllocParams, 0, sizeof(memAllocParams)); 365 memAllocParams.owner = HEAP_OWNER_RM_CLIENT_GENERIC; 366 memAllocParams.type = NVOS32_TYPE_IMAGE; 367 memAllocParams.size = pChannel->channelNotifierSize; 368 memAllocParams.attr = DRF_DEF(OS32, _ATTR, _LOCATION, _PCI); 369 memAllocParams.attr2 = NVOS32_ATTR2_NONE; 370 memAllocParams.flags |= NVOS32_ALLOC_FLAGS_VIRTUAL; 371 memAllocParams.hVASpace = pChannel->hVASpaceId; 372 373 NV_CHECK_OK_OR_RETURN( 374 LEVEL_ERROR, 375 pRmApi->AllocWithHandle(pRmApi, 376 pChannel->hClient, 377 hDevice, 378 pChannel->errNotifierIdVirt, 379 NV50_MEMORY_VIRTUAL, 380 &memAllocParams, 381 sizeof(memAllocParams))); 382 383 return rmStatus; 384 } 385 386 NV_STATUS 387 memmgrMemUtilsChannelInitialize_GM107 388 ( 389 OBJGPU *pGpu, 390 MemoryManager *pMemoryManager, 391 OBJCHANNEL *pChannel 392 ) 393 { 394 NV_STATUS rmStatus; 395 NV_STATUS lockStatus; 396 RsClient *pRsClient = pChannel->pRsClient; 397 NvHandle hClient = pChannel->hClient; 398 NvHandle hDevice = pChannel->deviceId; 399 NvHandle hPhysMem = pChannel->physMemId; 400 NvU64 size = pChannel->channelSize; 401 NvHandle hChannel = pChannel->channelId; 402 NvHandle hErrNotifierVirt = pChannel->errNotifierIdVirt; 403 NvHandle hErrNotifierPhys = pChannel->errNotifierIdPhys; 404 NvHandle hPushBuffer = pChannel->pushBufferId; 405 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 406 NvBool bMIGInUse = IS_MIG_IN_USE(pGpu); 407 NvU8 *pErrNotifierCpuVA = NULL; 408 NV_ADDRESS_SPACE userdAddrSpace; 409 NV_ADDRESS_SPACE pushBuffAddrSpace; 410 NV_ADDRESS_SPACE gpFifoAddrSpace; 411 OBJSYS *pSys = SYS_GET_INSTANCE(); 412 OBJCL *pCl = SYS_GET_CL(pSys); 413 NvU32 cacheSnoopFlag = 0 ; 414 NvBool bUseRmApiForBar1 = NV_FALSE; 415 416 // 417 // Heap alloc one chunk of memory to hold all of our alloc parameters to 418 // reduce stack usage 419 // 420 union 421 { 422 NV_VASPACE_ALLOCATION_PARAMETERS va; 423 NV_MEMORY_ALLOCATION_PARAMS mem; 424 } *pParams = NULL; 425 426 if (pCl->getProperty(pCl, PDB_PROP_CL_IS_CHIPSET_IO_COHERENT)) 427 { 428 cacheSnoopFlag = DRF_DEF(OS46, _FLAGS, _CACHE_SNOOP, _ENABLE); 429 } 430 431 pParams = portMemAllocNonPaged(sizeof(*pParams)); 432 if (pParams == NULL) 433 { 434 rmStatus = NV_ERR_NO_MEMORY; 435 goto exit_free_client; 436 } 437 438 // 439 // client allocated userd only supported on volta+ 440 // TODO: Use property to check if client allocated userd is supported 441 // 442 pChannel->bClientUserd = IsVOLTAorBetter(pGpu); 443 444 // 445 // We need to allocate a VAS to use for CE copies, but also for 446 // GSP-RM + MIG, so that it doesn't get the device 447 // default VAS during channel bind (which is not properly handled 448 // by split VAS in MIG currently). We only need the identity mapping 449 // when actually using the VAS for copies. 450 // 451 if (pChannel->bUseVasForCeCopy || 452 (IS_GSP_CLIENT(pGpu) && bMIGInUse)) 453 { 454 NvBool bAcquireLock = NV_FALSE; 455 NvU64 startFbOffset = GPU_GET_HEAP(pGpu)->base; 456 NvU64 fbSize = GPU_GET_HEAP(pGpu)->total; 457 NvU64 vaStartOffset = startFbOffset; 458 459 NV_PRINTF(LEVEL_INFO, "Channel VAS heap base: %llx total: %llx \n", GPU_GET_HEAP(pGpu)->base, 460 GPU_GET_HEAP(pGpu)->total); 461 462 pChannel->startFbOffset = startFbOffset; 463 pChannel->fbSize = fbSize; 464 465 if (pChannel->bUseVasForCeCopy) 466 { 467 NV_ASSERT_OK_OR_GOTO(rmStatus, 468 clientGenResourceHandle(pRsClient, &pChannel->hFbAlias), 469 exit_free_client); 470 471 rmStatus = memmgrMemUtilsCreateMemoryAlias_HAL(pGpu, pMemoryManager, pChannel); 472 if (rmStatus != NV_OK) 473 { 474 NV_PRINTF(LEVEL_ERROR, "Setting Identity mapping failed.. status: %x\n", rmStatus); 475 goto exit_free_client; 476 } 477 } 478 479 { 480 NV_VASPACE_ALLOCATION_PARAMETERS *pVa = &pParams->va; 481 482 portMemSet(pVa, 0, sizeof(*pVa)); 483 pVa->index = NV_VASPACE_ALLOCATION_INDEX_GPU_NEW; 484 pVa->vaBase = pChannel->startFbOffset; 485 // 486 // how large should we go here ? we definitely need more than heapSize to allocate 487 // other metadata related to chnanel. Also need to account the discontiguous VA Range 488 // for split VAS, where we allocate 4GB to (4GB + 512MB) for Server VAS (mirrored). 489 // Rough VASpace Layout will be documented here: 490 // 491 // 492 if (gpuIsSplitVasManagementServerClientRmEnabled(pGpu)) 493 { 494 pVa->vaSize += (SPLIT_VAS_SERVER_RM_MANAGED_VA_START + 495 SPLIT_VAS_SERVER_RM_MANAGED_VA_SIZE) ; 496 } 497 pVa->vaSize += fbSize + pChannel->channelSize + SCRUBBER_VASPACE_BUFFER_SIZE; 498 499 // 500 // We definitely need ALLOW_ZERO_ADDRESS, but SKIP_SCRUB_MEMPOOL is a patch 501 // until we figure out the right place for Scrubber page tables 502 // 503 pVa->flags |= NV_VASPACE_ALLOCATION_FLAGS_ALLOW_ZERO_ADDRESS | 504 NV_VASPACE_ALLOCATION_FLAGS_SKIP_SCRUB_MEMPOOL | 505 NV_VASPACE_ALLOCATION_FLAGS_OPTIMIZE_PTETABLE_MEMPOOL_USAGE; 506 507 if (!IS_MIG_IN_USE(pGpu)) 508 { 509 pVa->flags |= NV_VASPACE_ALLOCATION_FLAGS_PTETABLE_HEAP_MANAGED; 510 } 511 512 if (rmDeviceGpuLockIsOwner(pGpu->gpuInstance)) 513 { 514 rmGpuLocksRelease(GPUS_LOCK_FLAGS_NONE, NULL); 515 bAcquireLock = NV_TRUE; 516 pRmApi = rmapiGetInterface(RMAPI_API_LOCK_INTERNAL); 517 } 518 519 rmStatus = pRmApi->AllocWithHandle(pRmApi, hClient, pChannel->deviceId, 520 pChannel->hVASpaceId, FERMI_VASPACE_A, 521 pVa, sizeof(*pVa)); 522 } 523 if (bAcquireLock) 524 { 525 NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(rmStatus, rmGpuLocksAcquire(GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_MEM)); 526 bAcquireLock = NV_FALSE; 527 pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 528 } 529 530 if (rmStatus != NV_OK) 531 { 532 NV_PRINTF(LEVEL_ERROR, "failed allocating scrubber vaspace, status=0x%x\n", 533 rmStatus); 534 goto exit_free_client; 535 } 536 537 rmStatus = vaspaceGetByHandleOrDeviceDefault(pRsClient, 538 pChannel->deviceId, 539 pChannel->hVASpaceId, 540 &pChannel->pVAS); 541 if (rmStatus != NV_OK) 542 { 543 NV_PRINTF(LEVEL_ERROR, 544 "failed getting the scrubber vaspace from handle, status=0x%x\n", 545 rmStatus); 546 goto exit_free_client; 547 } 548 549 if (pChannel->bUseVasForCeCopy) 550 { 551 if (!gpuIsWarBug200577889SriovHeavyEnabled(pGpu)) 552 { 553 rmStatus = vaspacePinRootPageDir(pChannel->pVAS, pGpu); 554 if (rmStatus != NV_OK) 555 { 556 NV_PRINTF(LEVEL_ERROR, "failed pinning down Scrubber VAS, status=0x%x\n", 557 rmStatus); 558 goto exit_free_client; 559 } 560 } 561 562 NV_ASSERT_OK_OR_GOTO(rmStatus, 563 clientGenResourceHandle(pRsClient, &pChannel->hFbAliasVA), exit_free_client); 564 } 565 566 if (gpuIsSplitVasManagementServerClientRmEnabled(pGpu)) 567 { 568 OBJGVASPACE *pGVAS = dynamicCast(pChannel->pVAS, OBJGVASPACE); 569 vaStartOffset += pGVAS->vaLimitServerRMOwned + 1; 570 pChannel->vaStartOffset = vaStartOffset; 571 } 572 573 if (rmDeviceGpuLockIsOwner(pGpu->gpuInstance)) 574 { 575 rmGpuLocksRelease(GPUS_LOCK_FLAGS_NONE, NULL); 576 bAcquireLock = NV_TRUE; 577 pRmApi = rmapiGetInterface(RMAPI_API_LOCK_INTERNAL); 578 } 579 580 // Allocate virtual memory for Identity Mapping 581 if (pChannel->bUseVasForCeCopy) 582 { 583 NV_MEMORY_ALLOCATION_PARAMS *pMem = &pParams->mem; 584 portMemSet(pMem, 0, sizeof(*pMem)); 585 pMem->owner = NVOS32_TYPE_OWNER_RM; 586 pMem->type = NVOS32_TYPE_IMAGE; 587 pMem->size = pChannel->fbSize; 588 pMem->attr = (DRF_DEF(OS32, _ATTR, _LOCATION, _PCI) | 589 DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _BIG)); 590 pMem->attr2 = NVOS32_ATTR2_NONE; 591 pMem->offset = vaStartOffset; 592 pMem->flags = 0; 593 pMem->flags |= NVOS32_ALLOC_FLAGS_VIRTUAL | 594 NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE | 595 NVOS32_ALLOC_FLAGS_LAZY; 596 pMem->hVASpace = pChannel->hVASpaceId; 597 598 rmStatus = pRmApi->AllocWithHandle(pRmApi, 599 hClient, 600 pChannel->deviceId, 601 pChannel->hFbAliasVA, 602 NV50_MEMORY_VIRTUAL, 603 pMem, 604 sizeof(*pMem)); 605 } 606 607 if (bAcquireLock) 608 { 609 NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(rmStatus, rmGpuLocksAcquire(GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_MEM)); 610 bAcquireLock = NV_FALSE; 611 pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 612 } 613 614 if (rmStatus != NV_OK) 615 { 616 NV_PRINTF(LEVEL_ERROR, "Allocating VASpace for (base, size): (%llx, %llx) failed," 617 " with status: %x\n", vaStartOffset, pChannel->fbSize, rmStatus); 618 goto exit_free_client; 619 } 620 621 // set up mapping of VA -> PA 622 if (pChannel->bUseVasForCeCopy) 623 { 624 NV_CHECK_OK_OR_GOTO( 625 rmStatus, 626 LEVEL_ERROR, 627 pRmApi->Map(pRmApi, 628 hClient, 629 pChannel->deviceId, 630 pChannel->hFbAliasVA, 631 pChannel->hFbAlias, 632 0, 633 pChannel->fbSize, 634 DRF_DEF(OS46, _FLAGS, _ACCESS, _READ_WRITE) | 635 DRF_DEF(OS46, _FLAGS, _PAGE_SIZE, _BIG) | 636 DRF_DEF(OS46, _FLAGS, _CACHE_SNOOP, _ENABLE), 637 &pChannel->fbAliasVA), 638 exit_free_client); 639 640 NV_PRINTF(LEVEL_INFO, "Scrubber VAS :%x identity mapped with start addr: %llx, size: %llx\n", 641 pChannel->hFbAliasVA, pChannel->fbAliasVA, pChannel->fbSize); 642 } 643 } 644 645 rmGpuLocksRelease(GPUS_LOCK_FLAGS_NONE, NULL); 646 647 // 648 // Fetch the physical location of the push buffer 649 // 650 // Bug 3434881 filed to track the following 651 // a.Implementation of the utility function to parse the 652 // push buffer and userd regkeys 653 // b.Replace all instances of regkey pushbuffer/userd regkey 654 // parsing with the utility function 655 // 656 switch (DRF_VAL(_REG_STR_RM, _INST_LOC_4, _CHANNEL_PUSHBUFFER, pGpu->instLocOverrides4)) 657 { 658 case NV_REG_STR_RM_INST_LOC_4_CHANNEL_PUSHBUFFER_VID: 659 pushBuffAddrSpace = ADDR_FBMEM; 660 break; 661 662 case NV_REG_STR_RM_INST_LOC_4_CHANNEL_PUSHBUFFER_COH: 663 case NV_REG_STR_RM_INST_LOC_4_CHANNEL_PUSHBUFFER_NCOH: 664 case NV_REG_STR_RM_INST_LOC_4_CHANNEL_PUSHBUFFER_DEFAULT: 665 default: 666 pushBuffAddrSpace = ADDR_SYSMEM; 667 break; 668 } 669 670 gpFifoAddrSpace = pushBuffAddrSpace; 671 672 //Fetch the physical location of userD 673 switch (DRF_VAL(_REG_STR_RM, _INST_LOC, _USERD, pGpu->instLocOverrides)) 674 { 675 case NV_REG_STR_RM_INST_LOC_USERD_NCOH: 676 case NV_REG_STR_RM_INST_LOC_USERD_COH: 677 userdAddrSpace = ADDR_SYSMEM; 678 break; 679 680 case NV_REG_STR_RM_INST_LOC_USERD_VID: 681 case NV_REG_STR_RM_INST_LOC_USERD_DEFAULT: 682 default: 683 userdAddrSpace = ADDR_FBMEM; 684 break; 685 } 686 687 // RM WAR for Bug 3313719 688 // Disallow USERD in sysmem and (GPFIFO or pushbuffer) in vidmem 689 rmStatus = kfifoCheckChannelAllocAddrSpaces_HAL(GPU_GET_KERNEL_FIFO(pGpu), 690 userdAddrSpace, 691 pushBuffAddrSpace, 692 gpFifoAddrSpace); 693 if (rmStatus != NV_OK) 694 { 695 NV_ASSERT_FAILED("USERD in sysmem and PushBuffer/GPFIFO in vidmem not allowed"); 696 goto exit_free_client; 697 } 698 699 _memUtilsChannelAllocatePB_GM107(pGpu, pMemoryManager, pChannel); 700 lockStatus = rmGpuLocksAcquire(GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_MEM); 701 if(lockStatus != NV_OK) 702 { 703 NV_ASSERT_FAILED("Could not get back lock after allocating Push Buffer sema"); 704 goto exit_free_client; 705 } 706 707 // map the pushbuffer 708 rmStatus = pRmApi->Map(pRmApi, hClient, hDevice, 709 hPushBuffer, 710 hPhysMem, //hMemory, 711 0, 712 size, 713 cacheSnoopFlag, 714 &pChannel->pbGpuVA); 715 // map the error notifier 716 rmStatus = pRmApi->Map(pRmApi, hClient, hDevice, 717 hErrNotifierVirt, 718 hErrNotifierPhys, //hMemory, 719 0, 720 pChannel->channelNotifierSize, 721 DRF_DEF(OS46, _FLAGS, _KERNEL_MAPPING, _ENABLE) | cacheSnoopFlag, 722 &pChannel->pbGpuNotifierVA); 723 724 NV_CHECK_OK_OR_GOTO( 725 rmStatus, 726 LEVEL_ERROR, 727 _memUtilsAllocateChannel(pGpu, 728 pMemoryManager, 729 hClient, 730 hDevice, 731 hChannel, 732 hErrNotifierVirt, 733 hPushBuffer, 734 pChannel), 735 exit_free_client); 736 737 // _memUtilsMapUserd 738 NV_CHECK_OK_OR_GOTO( 739 rmStatus, 740 LEVEL_ERROR, 741 _memUtilsMapUserd_GM107(pGpu, pMemoryManager, pChannel, 742 hClient, hDevice, hChannel, bUseRmApiForBar1), 743 exit_free_client); 744 745 // Set up pushbuffer and semaphore memdesc and memset the buffer 746 pChannel->pChannelBufferMemdesc = 747 memmgrMemUtilsGetMemDescFromHandle(pMemoryManager, pChannel->hClient, hPhysMem); 748 NV_ASSERT_OR_GOTO(pChannel->pChannelBufferMemdesc != NULL, exit_free_client); 749 750 // Set up notifier memory 751 pChannel->pErrNotifierMemdesc = 752 memmgrMemUtilsGetMemDescFromHandle(pMemoryManager, pChannel->hClient, hErrNotifierPhys); 753 NV_ASSERT_OR_GOTO(pChannel->pErrNotifierMemdesc != NULL, exit_free_client); 754 755 if (kbusIsBarAccessBlocked(GPU_GET_KERNEL_BUS(pGpu))) 756 { 757 rmStatus = memmgrMemDescMemSet(pMemoryManager, pChannel->pChannelBufferMemdesc, 0, 758 (TRANSFER_FLAGS_SHADOW_ALLOC | TRANSFER_FLAGS_SHADOW_INIT_MEM)); 759 NV_ASSERT_OR_GOTO(rmStatus == NV_OK, exit_free_client); 760 761 pChannel->pbCpuVA = NULL; 762 pChannel->pTokenFromNotifier = NULL; 763 } 764 else 765 { 766 if (bUseRmApiForBar1) 767 { 768 NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR, 769 pRmApi->MapToCpu(pRmApi, hClient, hDevice, hPhysMem, 0, size, 770 (void **)&pChannel->pbCpuVA, 0), 771 exit_free_client); 772 773 NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR, 774 pRmApi->MapToCpu(pRmApi, hClient, hDevice, hErrNotifierPhys, 0, 775 pChannel->channelNotifierSize, (void **)&pErrNotifierCpuVA, 0), 776 exit_free_client); 777 } 778 else 779 { 780 // 781 // Most use cases can migrate to the internal memdescMap path for BAR1 782 // And it is preferred because external path will not work with CC 783 // 784 pChannel->pbCpuVA = memmgrMemDescBeginTransfer(pMemoryManager, 785 pChannel->pChannelBufferMemdesc, TRANSFER_FLAGS_USE_BAR1); 786 NV_ASSERT_OR_GOTO(pChannel->pbCpuVA != NULL, exit_free_client); 787 788 pErrNotifierCpuVA = memmgrMemDescBeginTransfer(pMemoryManager, 789 pChannel->pErrNotifierMemdesc, TRANSFER_FLAGS_USE_BAR1); 790 NV_ASSERT_OR_GOTO(pErrNotifierCpuVA != NULL, exit_free_client); 791 } 792 793 portMemSet(pChannel->pbCpuVA, 0, (NvLength)size); 794 795 pChannel->pTokenFromNotifier = 796 (NvNotification *)(pErrNotifierCpuVA + 797 (NV_CHANNELGPFIFO_NOTIFICATION_TYPE_WORK_SUBMIT_TOKEN * 798 sizeof(NvNotification))); 799 } 800 801 // 802 // Allocate and map the doorbell region to use in scrub on free 803 // Set the doorbellregister to False, since pre-volta chips doesn't support 804 // 805 NV_CHECK_OK_OR_GOTO( 806 rmStatus, 807 LEVEL_ERROR, 808 memmgrScrubMapDoorbellRegion_HAL(pGpu, pMemoryManager, pChannel), 809 exit_free_client); 810 811 portMemFree(pParams); 812 return NV_OK; 813 814 exit_free_client: 815 if(!pChannel->bClientAllocated) 816 { 817 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->hClient); 818 } 819 portMemFree(pParams); 820 NV_PRINTF(LEVEL_INFO, "end NV_STATUS=0x%08x\n", rmStatus); 821 return rmStatus; 822 } 823 824 825 /** memmgrMemUtilsCreateMemoryAlias_GM107 826 * 827 * @brief Creates an alias for the FB region 828 * This function doesn't allocate any memory but just creates memory handle 829 * which refers to FB range. This call can support for both baremetal and vGPU. 830 * @param[in] pChannel CHANNEL Pointer 831 * 832 * @returns NV_OK on success 833 */ 834 NV_STATUS 835 memmgrMemUtilsCreateMemoryAlias_GM107 836 ( 837 OBJGPU *pGpu, 838 MemoryManager *pMemoryManager, 839 OBJCHANNEL *pChannel 840 ) 841 { 842 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 843 NV_STATUS status = NV_OK; 844 845 NV_PHYSICAL_MEMORY_ALLOCATION_PARAMS physMemParams = {0}; 846 847 memmgrGetPteKindForScrubber_HAL(pMemoryManager, &physMemParams.format); 848 849 850 status = pRmApi->AllocWithHandle(pRmApi, 851 pChannel->hClient, 852 pChannel->deviceId, 853 pChannel->hFbAlias, 854 NV01_MEMORY_LOCAL_PHYSICAL, 855 &physMemParams, 856 sizeof(physMemParams)); 857 if (status != NV_OK) 858 { 859 NV_CHECK_OK_FAILED(LEVEL_WARNING, "Aliasing FbListMem", status); 860 return status; 861 } 862 863 NV_PRINTF(LEVEL_INFO, "Allocating FbAlias: %x for size: %llx, kind: %x\n", pChannel->hFbAlias, 864 pChannel->fbSize, physMemParams.format); 865 866 867 return NV_OK; 868 } 869 870 NV_STATUS 871 memmgrMemUtilsCopyEngineInitialize_GM107 872 ( 873 OBJGPU *pGpu, 874 MemoryManager *pMemoryManager, 875 OBJCHANNEL *pChannel 876 ) 877 { 878 NV_STATUS rmStatus = NV_OK; 879 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 880 881 //allocce 882 NV_CHECK_OK_OR_GOTO( 883 rmStatus, 884 LEVEL_ERROR, 885 _memUtilsAllocCe_GM107(pGpu, 886 pMemoryManager, 887 pChannel, 888 pChannel->hClient, 889 pChannel->deviceId, 890 pChannel->channelId, 891 pChannel->engineObjectId), 892 exit_free); 893 894 NV_CHECK_OK_OR_GOTO( 895 rmStatus, 896 LEVEL_ERROR, 897 memmgrMemUtilsChannelSchedulingSetup(pGpu, pMemoryManager, pChannel), exit_free); 898 899 return NV_OK; 900 901 exit_free: 902 pRmApi->Free(pRmApi, pChannel->hClient, pChannel->hClient); 903 NV_PRINTF(LEVEL_INFO, "end NV_STATUS=0x%08x\n", rmStatus); 904 return rmStatus; 905 } 906 907 static NV_STATUS _memUtilsAllocCe_GM107 908 ( 909 OBJGPU *pGpu, 910 MemoryManager *pMemoryManager, 911 OBJCHANNEL *pChannel, 912 NvHandle hClientId, 913 NvHandle hDeviceId, 914 NvHandle hChannelId, 915 NvHandle hCopyObjectId 916 917 ) 918 { 919 NVC0B5_ALLOCATION_PARAMETERS createParams = {0}; 920 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 921 922 createParams.version = NVC0B5_ALLOCATION_PARAMETERS_VERSION_1; 923 createParams.engineType = NV2080_ENGINE_TYPE_COPY(pChannel->ceId); 924 memmgrMemUtilsGetCopyEngineClass_HAL(pGpu, pMemoryManager, &pChannel->hTdCopyClass); 925 pChannel->engineType = gpuGetRmEngineType(createParams.engineType); 926 927 if (!pChannel->hTdCopyClass) 928 { 929 NV_PRINTF(LEVEL_ERROR, "Unable to determine CE's engine class.\n"); 930 return NV_ERR_GENERIC; 931 } 932 933 NV_CHECK_OK_OR_RETURN( 934 LEVEL_ERROR, 935 pRmApi->AllocWithHandle(pRmApi, 936 hClientId, 937 hChannelId, 938 hCopyObjectId, 939 pChannel->hTdCopyClass, 940 &createParams, 941 sizeof(createParams))); 942 943 return NV_OK; 944 } 945 946 static NV_STATUS 947 _memUtilsMapUserd_GM107 948 ( 949 OBJGPU *pGpu, 950 MemoryManager *pMemoryManager, 951 OBJCHANNEL *pChannel, 952 NvHandle hClientId, 953 NvHandle hDeviceId, 954 NvHandle hChannelId, 955 NvBool bUseRmApiForBar1 956 ) 957 { 958 // 959 // The memTransfer API only works for client-allocated USERD 960 // because otherwise we are calling MapToCpu using the channel 961 // handle instead. 962 // 963 if (pChannel->bClientUserd && !bUseRmApiForBar1) 964 { 965 pChannel->pUserdMemdesc = 966 memmgrMemUtilsGetMemDescFromHandle(pMemoryManager, hClientId, pChannel->hUserD); 967 NV_ASSERT_OR_RETURN(pChannel->pUserdMemdesc != NULL, NV_ERR_GENERIC); 968 969 if (kbusIsBarAccessBlocked(GPU_GET_KERNEL_BUS(pGpu))) 970 { 971 // 972 // GPFIFO aceess will not be set up in order to facilitate memTransfer APIs 973 // which will use GSP-DMA/CE with shadow buffers 974 // 975 pChannel->pControlGPFifo = NULL; 976 } 977 else 978 { 979 pChannel->pControlGPFifo = 980 (void *)memmgrMemDescBeginTransfer(pMemoryManager, pChannel->pUserdMemdesc, 981 TRANSFER_FLAGS_USE_BAR1); 982 NV_ASSERT_OR_RETURN(pChannel->pControlGPFifo != NULL, NV_ERR_GENERIC); 983 } 984 } 985 else 986 { 987 NvU32 userdSize = 0; 988 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 989 kfifoGetUserdSizeAlign_HAL(GPU_GET_KERNEL_FIFO(pGpu), &userdSize, NULL); 990 991 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, 992 pRmApi->MapToCpu(pRmApi, hClientId, hDeviceId, 993 pChannel->bClientUserd ? pChannel->hUserD : hChannelId, 0, 994 userdSize, (void **)&pChannel->pControlGPFifo, 0)); 995 } 996 return NV_OK; 997 } 998 999 static NV_STATUS 1000 _memUtilsAllocateUserD 1001 ( 1002 OBJGPU *pGpu, 1003 MemoryManager *pMemoryManager, 1004 NvHandle hClientId, 1005 NvHandle hDeviceId, 1006 OBJCHANNEL *pChannel 1007 ) 1008 { 1009 NV_STATUS rmStatus = NV_OK; 1010 KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu); 1011 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 1012 NV_MEMORY_ALLOCATION_PARAMS memAllocParams; 1013 NvU32 userdMemClass = NV01_MEMORY_LOCAL_USER; 1014 1015 // Ensure that call is not made with lock held 1016 LOCK_ASSERT_AND_RETURN(!rmGpuLockIsOwner()); 1017 1018 portMemSet(&memAllocParams, 0, sizeof(memAllocParams)); 1019 1020 memAllocParams.owner = HEAP_OWNER_RM_CLIENT_GENERIC; 1021 kfifoGetUserdSizeAlign_HAL(pKernelFifo, (NvU32 *)&memAllocParams.size, NULL); 1022 memAllocParams.type = NVOS32_TYPE_IMAGE; 1023 memAllocParams.internalflags = NVOS32_ALLOC_INTERNAL_FLAGS_SKIP_SCRUB; 1024 1025 // Apply registry overrides to USERD. 1026 switch (DRF_VAL(_REG_STR_RM, _INST_LOC, _USERD, pGpu->instLocOverrides)) 1027 { 1028 case NV_REG_STR_RM_INST_LOC_USERD_NCOH: 1029 case NV_REG_STR_RM_INST_LOC_USERD_COH: 1030 userdMemClass = NV01_MEMORY_SYSTEM; 1031 memAllocParams.attr = DRF_DEF(OS32, _ATTR, _LOCATION, _PCI); 1032 break; 1033 1034 case NV_REG_STR_RM_INST_LOC_USERD_VID: 1035 case NV_REG_STR_RM_INST_LOC_USERD_DEFAULT: 1036 userdMemClass = NV01_MEMORY_LOCAL_USER; 1037 memAllocParams.attr = DRF_DEF(OS32, _ATTR, _LOCATION, _VIDMEM); 1038 memAllocParams.flags |= NVOS32_ALLOC_FLAGS_PERSISTENT_VIDMEM; 1039 if (!IS_MIG_IN_USE(pGpu)) 1040 { 1041 memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _ALLOCATE_FROM_RESERVED_HEAP, _YES); 1042 } 1043 break; 1044 } 1045 1046 // 1047 // When APM is enabled all RM internal allocations must to go to 1048 // unprotected memory irrespective of vidmem or sysmem 1049 // When Hopper CC is enabled all RM internal sysmem allocations that 1050 // are required to be accessed from GPU should be in unprotected memory 1051 // but all vidmem allocations must go to protected memory 1052 // 1053 if (gpuIsApmFeatureEnabled(pGpu) || 1054 FLD_TEST_DRF(OS32, _ATTR, _LOCATION, _PCI, memAllocParams.attr)) 1055 { 1056 memAllocParams.attr2 |= DRF_DEF(OS32, _ATTR2, _MEMORY_PROTECTION, 1057 _UNPROTECTED); 1058 } 1059 1060 NV_ASSERT_OK_OR_RETURN(pRmApi->AllocWithHandle(pRmApi, hClientId, hDeviceId, 1061 pChannel->hUserD, 1062 userdMemClass, 1063 &memAllocParams, 1064 sizeof(memAllocParams))); 1065 1066 return rmStatus; 1067 } 1068 1069 static NV_STATUS 1070 _memUtilsAllocateChannel 1071 ( 1072 OBJGPU *pGpu, 1073 MemoryManager *pMemoryManager, 1074 NvHandle hClientId, 1075 NvHandle hDeviceId, 1076 NvHandle hChannelId, 1077 NvHandle hObjectError, 1078 NvHandle hObjectBuffer, 1079 OBJCHANNEL *pChannel 1080 ) 1081 { 1082 NV_CHANNEL_ALLOC_PARAMS channelGPFIFOAllocParams; 1083 NV_STATUS rmStatus = NV_OK; 1084 NvU32 hClass; 1085 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 1086 NvBool bMIGInUse = IS_MIG_IN_USE(pGpu); 1087 NvU32 flags = DRF_DEF(OS04, _FLAGS, _CHANNEL_SKIP_SCRUBBER, _TRUE); 1088 RM_ENGINE_TYPE engineType = (pChannel->type == SWL_SCRUBBER_CHANNEL) ? 1089 RM_ENGINE_TYPE_SEC2 : RM_ENGINE_TYPE_COPY(pChannel->ceId); 1090 1091 if (pChannel->bSecure) 1092 { 1093 flags |= DRF_DEF(OS04, _FLAGS, _CC_SECURE, _TRUE); 1094 } 1095 1096 portMemSet(&channelGPFIFOAllocParams, 0, sizeof(NV_CHANNEL_ALLOC_PARAMS)); 1097 channelGPFIFOAllocParams.hObjectError = hObjectError; 1098 channelGPFIFOAllocParams.hObjectBuffer = hObjectBuffer; 1099 channelGPFIFOAllocParams.gpFifoOffset = pChannel->pbGpuVA + pChannel->channelPbSize; 1100 channelGPFIFOAllocParams.gpFifoEntries = pChannel->channelNumGpFifioEntries; 1101 channelGPFIFOAllocParams.hContextShare = NV01_NULL_OBJECT; 1102 channelGPFIFOAllocParams.flags = flags; 1103 channelGPFIFOAllocParams.hVASpace = pChannel->hVASpaceId; 1104 1105 // 1106 // Use GPU instance local Id if MIG is enabled 1107 // TODO: Maybe we need a VAS for each GPU instance ? 1108 // 1109 if (bMIGInUse && (pChannel->pKernelMIGGpuInstance != NULL)) 1110 { 1111 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu); 1112 MIG_INSTANCE_REF ref; 1113 RM_ENGINE_TYPE localCe; 1114 RsClient *pClient; 1115 Device *pDevice; 1116 1117 NV_ASSERT_OK_OR_RETURN( 1118 serverGetClientUnderLock(&g_resServ, hClientId, &pClient)); 1119 1120 NV_ASSERT_OK_OR_RETURN( 1121 deviceGetByHandle(pClient, hDeviceId, &pDevice)); 1122 1123 NV_ASSERT_OK_OR_RETURN( 1124 kmigmgrGetInstanceRefFromDevice(pGpu, pKernelMIGManager, pDevice, &ref)); 1125 // Clear the Compute instance portion, if present 1126 ref = kmigmgrMakeGIReference(ref.pKernelMIGGpuInstance); 1127 NV_ASSERT_OK_OR_RETURN( 1128 kmigmgrGetGlobalToLocalEngineType(pGpu, pKernelMIGManager, ref, 1129 engineType, 1130 &localCe)); 1131 channelGPFIFOAllocParams.engineType = gpuGetNv2080EngineType(localCe); 1132 } 1133 else 1134 { 1135 channelGPFIFOAllocParams.engineType = gpuGetNv2080EngineType(engineType); 1136 } 1137 1138 hClass = kfifoGetChannelClassId(pGpu, GPU_GET_KERNEL_FIFO(pGpu)); 1139 if (!hClass) 1140 { 1141 NV_PRINTF(LEVEL_ERROR, "Unable to determine CE's channel class.\n"); 1142 return NV_ERR_GENERIC; 1143 } 1144 1145 rmGpuLocksRelease(GPUS_LOCK_FLAGS_NONE, NULL); 1146 1147 if (pChannel->bClientUserd) 1148 { 1149 NV_ASSERT_OK_OR_GOTO( 1150 rmStatus, 1151 _memUtilsAllocateUserD(pGpu, 1152 pMemoryManager, 1153 hClientId, 1154 hDeviceId, 1155 pChannel), 1156 cleanup); 1157 1158 SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY) 1159 channelGPFIFOAllocParams.hUserdMemory[gpumgrGetSubDeviceInstanceFromGpu(pGpu)] = pChannel->hUserD; 1160 channelGPFIFOAllocParams.userdOffset[gpumgrGetSubDeviceInstanceFromGpu(pGpu)] = 0; 1161 SLI_LOOP_END 1162 } 1163 1164 NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR( 1165 rmStatus, 1166 pRmApi->AllocWithHandle(pRmApi, 1167 hClientId, 1168 hDeviceId, 1169 hChannelId, 1170 hClass, 1171 &channelGPFIFOAllocParams, 1172 sizeof(channelGPFIFOAllocParams))); 1173 1174 cleanup: 1175 NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(rmStatus, 1176 rmGpuLocksAcquire(GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_MEM)); 1177 1178 return rmStatus; 1179 } 1180 1181 /*! 1182 * Do a Non Blocking Memeset 1183 * 1184 * @param[in] pChannel OBJCHANNEL pointer 1185 * @param[in] base Offset in FB 1186 * @param[in] size size to scrub 1187 * @param[in] freeToken will be returned as a semaphore 1188 * @param[in] *pNumBlocks returns the number of blocks that were scrubbed 1189 * @returns NV_STATUS 1190 */ 1191 NV_STATUS 1192 memmgrMemUtilsMemSet_GM107 1193 ( 1194 OBJGPU *pGpu, 1195 MemoryManager *pMemoryManager, 1196 OBJCHANNEL *pChannel, 1197 RmPhysAddr base, 1198 NvU64 size, 1199 NvU32 freeToken, 1200 NvU32 *pNumBlocks 1201 ) 1202 { 1203 NvU32 blocksPushed = 0; 1204 1205 if ((size % pChannel->minBlockSize) != 0) 1206 { 1207 NV_PRINTF(LEVEL_ERROR, "Size should be a multiple of %d\n", 1208 pChannel->minBlockSize); 1209 return NV_ERR_GENERIC; 1210 1211 } 1212 1213 NV_ASSERT_OR_RETURN(pChannel->pbCpuVA != NULL, NV_ERR_GENERIC); 1214 NV_ASSERT_OR_RETURN(pChannel->pControlGPFifo != NULL, NV_ERR_GENERIC); 1215 1216 if (pChannel->isProgressChecked) 1217 { 1218 // if progress is checked insert the semaphore with freeToken as payload 1219 pChannel->finishPayload = freeToken; 1220 _ceChannelScheduleWork_GM107(pGpu, pMemoryManager, pChannel, 1221 0, 0, 0, // src parameters 1222 base, ADDR_FBMEM, 0, // dst parameters 1223 size, 1224 NV_FALSE, // blocking 1225 NV_TRUE, // insertFinishPayload 1226 NV_FALSE); // memcopy 1227 } 1228 else 1229 { 1230 // issue a standard async scrub 1231 blocksPushed = _ceChannelScheduleWork_GM107(pGpu, pMemoryManager, pChannel, 1232 0, 0, 0, // src parameters 1233 base, ADDR_FBMEM, 0, // dst parameters 1234 size, 1235 NV_FALSE, // blocking 1236 NV_FALSE, // insertFinishPayload 1237 NV_FALSE); // memcopy 1238 } 1239 *pNumBlocks = blocksPushed; 1240 return NV_OK; 1241 } 1242 1243 /*! 1244 * Do a Blocking Memset 1245 * 1246 * @param[in] pChannel OBJCHANNEL pointer 1247 * @param[in] base Offset in FB 1248 * @param[in] size size to scrub 1249 * @returns NV_STATUS 1250 */ 1251 1252 NV_STATUS 1253 memmgrMemUtilsMemSetBlocking_GM107 1254 ( 1255 OBJGPU *pGpu, 1256 MemoryManager *pMemoryManager, 1257 OBJCHANNEL *pChannel, 1258 RmPhysAddr base, 1259 NvU64 size 1260 ) 1261 { 1262 NvU32 blocksPushed = 0; 1263 1264 if((size % pChannel->minBlockSize) != 0) 1265 { 1266 NV_PRINTF(LEVEL_ERROR, "Size should be a multiple of %d\n", 1267 pChannel->minBlockSize); 1268 DBG_BREAKPOINT(); 1269 return NV_ERR_GENERIC; 1270 1271 } 1272 1273 NV_ASSERT_OR_RETURN(pChannel->pControlGPFifo != NULL, NV_ERR_GENERIC); 1274 NV_ASSERT_OR_RETURN(pChannel->pbCpuVA != NULL, NV_ERR_GENERIC); 1275 1276 blocksPushed = _ceChannelScheduleWork_GM107(pGpu, pMemoryManager, pChannel, 1277 0, 0, 0, // src parameters 1278 base, ADDR_FBMEM, 0, // dst parameters 1279 size, 1280 NV_TRUE, // blocking 1281 NV_FALSE, // insertFinishPayload 1282 NV_FALSE); // memcopy 1283 1284 if (blocksPushed > 0) 1285 { 1286 NvU8 *semAddr = pChannel->pbCpuVA + pChannel->semaOffset; 1287 NV_STATUS timeoutStatus = NV_OK; 1288 RMTIMEOUT timeout; 1289 1290 gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout, 0); 1291 1292 while (MEM_RD32(semAddr) != pChannel->lastPayloadPushed) 1293 { 1294 NV_PRINTF(LEVEL_INFO, "Semaphore Payload is 0x%x last is 0x%x\n", 1295 MEM_RD32(semAddr), pChannel->lastPayloadPushed); 1296 1297 if (timeoutStatus == NV_ERR_TIMEOUT) 1298 { 1299 NV_PRINTF(LEVEL_ERROR, 1300 "Timed Out waiting for CE semaphore\n"); 1301 1302 NV_PRINTF(LEVEL_ERROR, 1303 "GET=0x%x, PUT=0x%x, GPGET=0x%x, GPPUT=0x%x\n", 1304 pChannel->pControlGPFifo->Get, 1305 pChannel->pControlGPFifo->Put, 1306 pChannel->pControlGPFifo->GPGet, 1307 pChannel->pControlGPFifo->GPPut); 1308 1309 DBG_BREAKPOINT_REASON(NV_ERR_TIMEOUT); 1310 return NV_ERR_GENERIC; 1311 } 1312 1313 timeoutStatus = gpuCheckTimeout(pGpu, &timeout); 1314 } 1315 } 1316 1317 return NV_OK; 1318 } 1319 1320 /*! 1321 * This function allocates the ECC scrubber 1322 * 1323 * @param[in] pChannel OBJCHANNEL pointer 1324 * @returns Bool 1325 */ 1326 NV_STATUS 1327 memmgrMemUtilsAllocateEccScrubber_GM107 1328 ( 1329 OBJGPU *pGpu, 1330 MemoryManager *pMemoryManager, 1331 OBJCHANNEL *pChannel 1332 ) 1333 { 1334 NV_ASSERT_OK_OR_RETURN(channelAllocSubdevice(pGpu, pChannel)); 1335 1336 memmgrMemUtilsChannelInitialize_HAL(pGpu, pMemoryManager, pChannel); 1337 1338 memmgrMemUtilsCopyEngineInitialize_HAL(pGpu, pMemoryManager, pChannel); 1339 1340 _memUtilsAllocateReductionSema(pGpu, pMemoryManager, pChannel); 1341 1342 return NV_OK; 1343 } 1344 1345 /*! 1346 * This function allocates the ecc scrubber and the 1347 * DUpes the bitmap semaphore which is used for sync 1348 * 1349 * @param[in] pChannel OBJCHANNEL pointer 1350 * @returns Bool 1351 */ 1352 NV_STATUS 1353 memmgrMemUtilsAllocateEccAllocScrubber_GM107 1354 ( 1355 OBJGPU *pGpu, 1356 MemoryManager *pMemoryManager, 1357 OBJCHANNEL *pChannel 1358 ) 1359 { 1360 OBJSCRUB *pEccTD = &pMemoryManager->eccScrubberState; 1361 OBJCHANNEL *pEccSyncChannel = &pEccTD->allocationScrubberState; 1362 OBJCHANNEL *pEccAsyncChannel = &pEccTD->tdHeapState; 1363 NV_MEMORY_ALLOCATION_PARAMS memAllocParams; 1364 NV_STATUS lockStatus; 1365 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 1366 1367 NV_ASSERT_OK_OR_RETURN(channelAllocSubdevice(pGpu, pChannel)); 1368 1369 memmgrMemUtilsChannelInitialize_HAL(pGpu, pMemoryManager, pEccSyncChannel); 1370 memmgrMemUtilsCopyEngineInitialize_HAL(pGpu, pMemoryManager, pEccSyncChannel); 1371 1372 rmGpuLocksRelease(GPUS_LOCK_FLAGS_NONE, NULL); 1373 1374 // dup the reduction sema bit map object 1375 NV_ASSERT_OK( 1376 pRmApi->DupObject(pRmApi, 1377 pEccSyncChannel->hClient, 1378 pEccSyncChannel->deviceId, 1379 &pEccSyncChannel->bitMapSemPhysId, 1380 pEccAsyncChannel->hClient, 1381 pEccAsyncChannel->bitMapSemPhysId, 1382 0)); 1383 1384 // allocate virtual memory for a bit map semaphore 1385 portMemSet(&memAllocParams, 0, sizeof(memAllocParams)); 1386 memAllocParams.owner = HEAP_OWNER_RM_CLIENT_GENERIC; 1387 memAllocParams.type = NVOS32_TYPE_IMAGE; 1388 memAllocParams.size = (((pEccSyncChannel->blockCount + 31)/32)*4); 1389 memAllocParams.attr = NVOS32_ATTR_NONE; 1390 memAllocParams.attr2 = NVOS32_ATTR2_NONE; 1391 memAllocParams.flags = 0; 1392 memAllocParams.flags |= NVOS32_ALLOC_FLAGS_VIRTUAL; 1393 1394 NV_ASSERT_OK( 1395 pRmApi->AllocWithHandle(pRmApi, 1396 pEccSyncChannel->hClient, 1397 pEccSyncChannel->deviceId, 1398 pEccSyncChannel->bitMapSemVirtId, 1399 NV50_MEMORY_VIRTUAL, 1400 &memAllocParams, 1401 sizeof(memAllocParams))); 1402 1403 lockStatus = rmGpuLocksAcquire(GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_MEM); 1404 1405 if (lockStatus != NV_OK) 1406 { 1407 NV_ASSERT_FAILED("Could not get back lock after allocating reduction sema"); 1408 return NV_ERR_GENERIC; 1409 } 1410 1411 NV_ASSERT_OK( 1412 pRmApi->Map(pRmApi, 1413 pEccSyncChannel->hClient, 1414 pEccSyncChannel->deviceId, 1415 pEccSyncChannel->bitMapSemVirtId, 1416 pEccSyncChannel->bitMapSemPhysId, //hMemory, 1417 0, 1418 (((pEccSyncChannel->blockCount + 31) / 32) * 4), 1419 NV04_MAP_MEMORY_FLAGS_NONE, 1420 &pEccSyncChannel->pbGpuBitMapVA)); 1421 1422 pEccSyncChannel->pbBitMapVA = pEccAsyncChannel->pbBitMapVA; 1423 1424 return NV_OK; 1425 } 1426 1427 /*! 1428 * FUnction calculates the available space in PB 1429 * This is based on the reading the semaphore that 1430 * has the previous PUT pointer where methods were 1431 * inserted 1432 * 1433 * @param[in] pChannel OBJCHANNEL pointer 1434 * @returns size 1435 */ 1436 static NvU32 1437 _getSpaceInPb(OBJCHANNEL *pChannel) 1438 { 1439 NvU32 filledSpace; 1440 NvU32 avlblSpace; 1441 1442 NV_ASSERT_OR_RETURN(pChannel->pbCpuVA != NULL, 0); 1443 1444 if (pChannel->channelPutOffset >= MEM_RD32((NvU8*)pChannel->pbCpuVA + pChannel->semaOffset)) 1445 { 1446 filledSpace = (pChannel->channelPutOffset - MEM_RD32((NvU8*)pChannel->pbCpuVA + pChannel->semaOffset)); 1447 avlblSpace = pChannel->channelPbSize - filledSpace; 1448 1449 } 1450 else 1451 { 1452 avlblSpace = (MEM_RD32((NvU8*)pChannel->pbCpuVA + pChannel->semaOffset) - pChannel->channelPutOffset); 1453 } 1454 1455 NV_PRINTF(LEVEL_INFO, "Space in PB is %d\n", avlblSpace); 1456 1457 return avlblSpace; 1458 1459 } 1460 1461 /*! 1462 * This function manages the PushBUffer 1463 * It will insert methods into the PB, manage wrap around 1464 * and decide when we need to add NON-STALL interrupts 1465 * and etra token semaphores 1466 * 1467 * @param[in] pChannel OBJCHANNEL pointer 1468 * @param[in] src Offset of src to copy from 1469 * @param[in] srcAddressSpace source surface address space type 1470 * @param[in] srcCpuCacheAttrib source surface address space attributes 1471 * @param[in] dst Offset of dst to scrub/copy to 1472 * @param[in] dstAddressSpace destination surface address space type 1473 * @param[in] dstCpuCacheAttrib destination surface address space attributes 1474 * @param[in] size size to scrub/copy 1475 * @param[in] blocking blocking will not insert non-stall 1476 * @param[in] payload will insert a token for the last block submitted 1477 * @param[in] bMemcopy NV_TRUE for memory copy / NV_FALSE for scrubbing 1478 * @returns Bool 1479 */ 1480 static NvU32 1481 _ceChannelScheduleWork_GM107 1482 ( 1483 OBJGPU *pGpu, 1484 MemoryManager *pMemoryManager, 1485 OBJCHANNEL *pChannel, 1486 RmPhysAddr src, 1487 NV_ADDRESS_SPACE srcAddressSpace, 1488 NvU32 srcCpuCacheAttrib, 1489 RmPhysAddr dst, 1490 NV_ADDRESS_SPACE dstAddressSpace, 1491 NvU32 dstCpuCacheAttrib, 1492 NvU64 size, 1493 NvBool blocking, 1494 NvBool insertFinishPayload, 1495 NvBool bMemcopy 1496 ) 1497 { 1498 RMTIMEOUT timeout; 1499 NvU32 spaceInPb; 1500 NvU32 numBytes; 1501 NvU32 *ptr; 1502 NvU32 gpBase; 1503 NvU32 semaCount = 0; 1504 NvBool addNonStallIntr = NV_FALSE; 1505 NvU32 blocksPushed = 0; 1506 NvBool addFinishPayload; 1507 NvU32 blockSize = 0; 1508 1509 NV_ASSERT_OR_RETURN(pChannel->pbCpuVA != NULL, 0); 1510 NV_ASSERT_OR_RETURN(pChannel->pControlGPFifo != NULL, 0); 1511 1512 gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout, 0); 1513 1514 spaceInPb = _getSpaceInPb(pChannel); 1515 1516 NV_PRINTF(LEVEL_INFO, "Space in PB is %d and starting fill at 0x%x\n", 1517 spaceInPb, pChannel->channelPutOffset); 1518 1519 ptr = (NvU32 *)(pChannel->pbCpuVA + pChannel->channelPutOffset); 1520 gpBase = pChannel->channelPutOffset; 1521 numBytes = 0; 1522 do 1523 { 1524 // while we have space greater than one block 1525 while((spaceInPb > (pChannel->methodSizePerBlock+MAX_EXTRA_PAYLOAD))) 1526 { 1527 // if inserting one more block is greater than PB size then wrap around to the beginning 1528 if((pChannel->channelPutOffset + (pChannel->methodSizePerBlock+MAX_EXTRA_PAYLOAD)) > pChannel->channelPbSize) 1529 { 1530 NV_PRINTF(LEVEL_INFO, "Wrap numBytes %d\n", numBytes); 1531 //submit to gpfifo with numBytes and wrap around the PutOffset 1532 if(numBytes > 0) 1533 { 1534 _ceChannelUpdateGpFifo_GM107(pGpu, pMemoryManager, pChannel, (gpBase), numBytes); 1535 } 1536 pChannel->channelPutOffset = 0; 1537 ptr = (NvU32 *)(pChannel->pbCpuVA + pChannel->channelPutOffset); 1538 gpBase = 0; 1539 numBytes = 0; 1540 // update the available space 1541 spaceInPb = _getSpaceInPb(pChannel); 1542 NV_PRINTF(LEVEL_INFO, "Wrapping PB around\n"); 1543 continue; 1544 } 1545 1546 blockSize = (size > pChannel->maxBlockSize) ? 1547 pChannel->maxBlockSize : (NvU32) size; 1548 1549 // add a non-stall interupt every (8th of the size) or when we insert the last block 1550 if((semaCount > (pChannel->channelPbSize >> 3)) || (size <= pChannel->maxBlockSize)) 1551 { 1552 addNonStallIntr = NV_TRUE; 1553 semaCount = 0; 1554 } 1555 else 1556 { 1557 addNonStallIntr = NV_FALSE; 1558 } 1559 // the finsh payload corresponds to inserting a token for every call to scrub that finishes 1560 if((insertFinishPayload) && (size <= pChannel->maxBlockSize)) 1561 { 1562 addFinishPayload = NV_TRUE; 1563 NV_PRINTF(LEVEL_INFO, "Inserting Finish Payload!!!!!!!!!!\n"); 1564 } 1565 else 1566 { 1567 addFinishPayload = NV_FALSE; 1568 } 1569 if(_checkSynchronization(pGpu, pMemoryManager, pChannel, BLOCK_INDEX_FROM_ADDR(dst, pChannel->blockShift))) 1570 { 1571 NvU32 bytesPushed = _ceChannelPushMethodsBlock_GM107(pGpu, pMemoryManager, pChannel, 1572 src, srcAddressSpace, srcCpuCacheAttrib, // src parameters 1573 dst, dstAddressSpace, dstCpuCacheAttrib, // dst parameters 1574 blockSize, &ptr, NV_TRUE, (addNonStallIntr && !blocking), 1575 addFinishPayload, bMemcopy); 1576 spaceInPb = spaceInPb - bytesPushed; 1577 numBytes = numBytes + bytesPushed; 1578 semaCount = semaCount + bytesPushed; 1579 blocksPushed++; 1580 // we are done pushing all methods 1581 } 1582 1583 dst += (NvU64) blockSize; 1584 if (bMemcopy) 1585 src += (NvU64) blockSize; 1586 size -= (NvU64) blockSize; 1587 1588 if(size == 0) 1589 { 1590 _ceChannelUpdateGpFifo_GM107(pGpu, pMemoryManager, pChannel, gpBase, numBytes); 1591 return blocksPushed; 1592 } 1593 } 1594 spaceInPb = _getSpaceInPb(pChannel); 1595 if(spaceInPb <= (pChannel->methodSizePerBlock + MAX_EXTRA_PAYLOAD)) 1596 { 1597 //no space in pb to push all blocks so put what we have and wait for space 1598 if(numBytes > 0) 1599 { 1600 _ceChannelUpdateGpFifo_GM107(pGpu, pMemoryManager, pChannel, gpBase, numBytes); 1601 } 1602 gpBase = pChannel->channelPutOffset; 1603 numBytes = 0; 1604 } 1605 if (gpuCheckTimeout(pGpu, &timeout) == NV_ERR_TIMEOUT) 1606 { 1607 NV_ASSERT_FAILED("Timed out waiting for Space in PB!"); 1608 return NV_ERR_GENERIC; 1609 } 1610 } while(1); 1611 } 1612 1613 1614 /*! 1615 * This function checks if the block has already been submitted 1616 * or scrubbed based on 2 bitmaps. One is a pending bitmap 1617 * updated by the CPU and one is a "Finished" bitmap updated by 1618 * the GPU 1619 * 1620 * @param[in] pChannel OBJCHANNEL pointer 1621 * @param[in] block block number 1622 * 1623 * @returns Bool 1624 */ 1625 static NvBool 1626 _checkSynchronization 1627 ( 1628 OBJGPU *pGpu, 1629 MemoryManager *pMemoryManager, 1630 OBJCHANNEL *pChannel, 1631 NvU32 block 1632 ) 1633 { 1634 NvU32 blockSema; 1635 1636 if (!pChannel->isChannelSynchronized) 1637 { 1638 //synchronization is not required for this channel 1639 return NV_TRUE; 1640 } 1641 1642 blockSema = MEM_RD32((NvU8*)pChannel->pbBitMapVA + ((block/32)*4)); 1643 1644 if( ((blockSema) & (1 << (block%32))) == 0 ) 1645 { 1646 if (((pChannel->pBlockPendingState[block / 32] & (1 << (block % 32))) == 0) && 1647 ((pChannel->pBlockDoneState[block / 32] & (1 << (block % 32))) == 0) ) 1648 { 1649 pChannel->pBlockPendingState[block / 32] |= (1 << (block % 32)); 1650 return NV_TRUE; 1651 } 1652 } 1653 1654 return NV_FALSE; 1655 } 1656 1657 /*! 1658 * Updates the GPfifo with the methods in the PB for 1659 * the given channel 1660 * @param[in] pChannel OBJCHANNEL pointer 1661 * @param[in] gpOffset Offset in the PB 1662 * @param[in] gpSize Size of segment 1663 * @returns None 1664 */ 1665 static void 1666 _ceChannelUpdateGpFifo_GM107 1667 ( 1668 OBJGPU *pGpu, 1669 MemoryManager *pMemoryManager, 1670 OBJCHANNEL *pChannel, 1671 NvU32 gpOffset, 1672 NvU32 gpSize 1673 1674 ) 1675 { 1676 RMTIMEOUT timeout; 1677 NvU32 GPPut; 1678 NvU32 GPGet; 1679 NvU64 get; 1680 NvU32 length; 1681 NvU32 *pGpEntry; 1682 NvU32 GpEntry0; 1683 NvU32 GpEntry1; 1684 NvU32 GPPutNext; 1685 NvU32 workSubmitToken = 0; 1686 KernelChannel *pFifoKernelChannel; 1687 KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu); 1688 1689 NV_ASSERT_OR_RETURN_VOID(pChannel->pbCpuVA != NULL); 1690 NV_ASSERT_OR_RETURN_VOID(pChannel->pControlGPFifo != NULL); 1691 1692 gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout, 0); 1693 GPPut = MEM_RD32(&pChannel->pControlGPFifo->GPPut); 1694 GPGet = MEM_RD32(&pChannel->pControlGPFifo->GPGet); 1695 1696 GPPutNext = (GPPut + 1) % pChannel->channelNumGpFifioEntries; 1697 1698 NV_PRINTF(LEVEL_INFO, "Put %d Get %d PutNext%d\n", GPPut, GPGet, 1699 GPPutNext); 1700 1701 NV_PRINTF(LEVEL_INFO, "gp Base 0x%x, Size %d\n", (NvU32)(gpOffset), 1702 gpSize); 1703 1704 // if the size passed is zero do not update gpput 1705 if (gpSize == 0) 1706 return; 1707 1708 if (GPPut >= pChannel->channelNumGpFifioEntries) 1709 { 1710 // if the Put pointer is invalid, the GPU is likely inaccessible 1711 NV_PRINTF(LEVEL_INFO, "invalid Put %u >= %u\n", GPPut, 1712 pChannel->channelNumGpFifioEntries); 1713 return; 1714 } 1715 1716 while (GPPutNext == GPGet) 1717 { 1718 // need to wait for space 1719 GPGet = MEM_RD32(&pChannel->pControlGPFifo->GPGet); 1720 1721 if (gpuCheckTimeout(pGpu, &timeout) == NV_ERR_TIMEOUT) 1722 { 1723 NV_ASSERT_FAILED("Timed Out waiting for space in GPFIFIO!"); 1724 return; 1725 } 1726 else if (GPGet >= pChannel->channelNumGpFifioEntries) 1727 { 1728 // if the Get pointer is invalid, the GPU is likely inaccessible 1729 NV_PRINTF(LEVEL_INFO, "invalid Get %u >= %u\n", GPGet, 1730 pChannel->channelNumGpFifioEntries); 1731 return; 1732 } 1733 } 1734 1735 get = pChannel->pbGpuVA + gpOffset; 1736 length = gpSize; 1737 1738 GpEntry0 = 1739 DRF_DEF(906F, _GP_ENTRY0, _NO_CONTEXT_SWITCH, _FALSE) | 1740 DRF_NUM(906F, _GP_ENTRY0, _GET, NvU64_LO32(get) >> 2); 1741 GpEntry1 = 1742 DRF_NUM(906F, _GP_ENTRY1, _GET_HI, NvU64_HI32(get)) | 1743 DRF_NUM(906F, _GP_ENTRY1, _LENGTH, length >> 2) | 1744 DRF_DEF(906F, _GP_ENTRY1, _PRIV, _USER) | 1745 DRF_DEF(906F, _GP_ENTRY1, _LEVEL, _MAIN); 1746 1747 1748 pGpEntry = (NvU32 *)(((NvU8*)pChannel->pbCpuVA) + pChannel->channelPbSize + 1749 GPPut*NV906F_GP_ENTRY__SIZE); 1750 1751 MEM_WR32(&pGpEntry[0], GpEntry0); 1752 MEM_WR32(&pGpEntry[1], GpEntry1); 1753 1754 // need to flush WRC buffer 1755 osFlushCpuWriteCombineBuffer(); 1756 1757 // write gpput 1758 MEM_WR32(&pChannel->pControlGPFifo->GPPut, GPPutNext); 1759 osFlushCpuWriteCombineBuffer(); 1760 1761 if (kfifoIsLiteModeEnabled_HAL(pGpu, pKernelFifo)) 1762 { 1763 NV_ASSERT_OR_RETURN_VOID(0); 1764 } 1765 else 1766 { 1767 workSubmitToken = pChannel->workSubmitToken; 1768 NV_ASSERT_OR_RETURN_VOID(CliGetKernelChannelWithDevice(pChannel->pRsClient, 1769 pChannel->deviceId, pChannel->channelId, 1770 &pFifoKernelChannel) == NV_OK); 1771 } 1772 if (!kchannelIsRunlistSet(pGpu, pFifoKernelChannel)) 1773 { 1774 NV_PRINTF(LEVEL_ERROR, 1775 "FAILED Channel 0x%x is not assigned to runlist yet\n", 1776 kchannelGetDebugTag(pFifoKernelChannel)); 1777 return; 1778 } 1779 // update doorbell register 1780 kfifoUpdateUsermodeDoorbell_HAL(pGpu, pKernelFifo, workSubmitToken, kchannelGetRunlistId(pFifoKernelChannel)); 1781 } 1782 1783 /*! 1784 * Inserts methods into the push buffer for one block 1785 * 1786 * @param[in] pChannel OBJCHANNEL pointer 1787 * @param[in] src Offset of src to copy from 1788 * @param[in] srcAddressSpace source surface address space type 1789 * @param[in] srcCpuCacheAttrib source surface address space attributes 1790 * @param[in] dst Offset of dst to scrub/copy to 1791 * @param[in] dstAddressSpace destination surface address space type 1792 * @param[in] dstCpuCacheAttrib destination surface address space attributes 1793 * @param[in] pPtr Double pointer to PB offset 1794 * @returns None 1795 */ 1796 static void 1797 _ceChannelPushMethodAperture_GM107 1798 ( 1799 OBJCHANNEL *pChannel, 1800 NV_ADDRESS_SPACE srcAddressSpace, 1801 NvU32 srcCpuCacheAttrib, 1802 NV_ADDRESS_SPACE dstAddressSpace, 1803 NvU32 dstCpuCacheAttrib, 1804 NvU32 **pPtr 1805 ) 1806 { 1807 NvU32 *ptr = *pPtr; 1808 NvU32 data = 0; 1809 1810 // Set source parameters 1811 data = ((srcAddressSpace == ADDR_FBMEM) ? DRF_DEF(B0B5, _SET_SRC_PHYS_MODE, _TARGET, _LOCAL_FB) : 1812 (srcCpuCacheAttrib == NV_MEMORY_CACHED) ? DRF_DEF(B0B5, _SET_SRC_PHYS_MODE, _TARGET, _COHERENT_SYSMEM) : 1813 DRF_DEF(B0B5, _SET_SRC_PHYS_MODE, _TARGET, _NONCOHERENT_SYSMEM)); 1814 1815 PUSH_PAIR(NVA06F_SUBCHANNEL_COPY_ENGINE, NVB0B5_SET_SRC_PHYS_MODE, data); 1816 1817 // Set destination parameters 1818 data = ((dstAddressSpace == ADDR_FBMEM) ? DRF_DEF(B0B5, _SET_DST_PHYS_MODE, _TARGET, _LOCAL_FB) : 1819 (dstCpuCacheAttrib == NV_MEMORY_CACHED) ? DRF_DEF(B0B5, _SET_DST_PHYS_MODE, _TARGET, _COHERENT_SYSMEM) : 1820 DRF_DEF(B0B5, _SET_DST_PHYS_MODE, _TARGET, _NONCOHERENT_SYSMEM)); 1821 1822 PUSH_PAIR(NVA06F_SUBCHANNEL_COPY_ENGINE, NVB0B5_SET_DST_PHYS_MODE, data); 1823 1824 *pPtr = ptr; 1825 } 1826 1827 /*! 1828 * Inserts methods into the push buffer for one block 1829 * 1830 * @param[in] pChannel OBJCHANNEL pointer 1831 * @param[in] src Offset of src to copy from 1832 * @param[in] srcAddressSpace source surface address space type 1833 * @param[in] srcCpuCacheAttrib source surface address space attributes 1834 * @param[in] dst Offset of dst to scrub/copy to 1835 * @param[in] dstAddressSpace destination surface address space type 1836 * @param[in] dstCpuCacheAttrib destination surface address space attributes 1837 * @param[in] size size of the region to scrub/copy 1838 * @param[in] pPtr Double pointer to PB offset 1839 * @param[in] addPayloadSema Bool to add default payload 1840 * @param[in] addNonStallInt Bool to add a non stall at the end 1841 * @param[in] addFinishPayload Bool to add an extra sema release for token 1842 * @param[in] bMemcopy NV_TRUE for memcopy / NV_FALSE for scrubbing 1843 * @returns None 1844 */ 1845 static NvU32 1846 _ceChannelPushMethodsBlock_GM107 1847 ( 1848 OBJGPU *pGpu, 1849 MemoryManager *pMemoryManager, 1850 OBJCHANNEL *channel, 1851 RmPhysAddr src, 1852 NV_ADDRESS_SPACE srcAddressSpace, 1853 NvU32 srcCpuCacheAttrib, 1854 RmPhysAddr dst, 1855 NV_ADDRESS_SPACE dstAddressSpace, 1856 NvU32 dstCpuCacheAttrib, 1857 NvU64 size, 1858 NvU32 **pPtr, 1859 NvBool addPayloadSema, 1860 NvBool addNonStallIntr, 1861 NvBool addFinishPayload, 1862 NvBool bMemcopy 1863 ) 1864 { 1865 NvU32 launchParams = 0; 1866 NvU32 *ptr = *pPtr; 1867 NvU32 *pStartPtr = ptr; 1868 NvBool addReductionOp = channel->isChannelSynchronized; 1869 NvBool bMemoryScrubEnable = NV_FALSE; 1870 NvU32 remapConstB = 0; 1871 NvU32 remapComponentSize = 0; 1872 1873 NV_PRINTF(LEVEL_INFO, "Base = 0x%llx, Size = 0x%llx, PB location = %p\n", 1874 dst, size, ptr); 1875 1876 PUSH_PAIR(NVA06F_SUBCHANNEL_COPY_ENGINE, NV906F_SET_OBJECT, channel->classEngineID); 1877 1878 if (size > 0) 1879 { 1880 NvU32 payLoad = channel->channelPutOffset + channel->methodSizePerBlock; 1881 1882 if (addNonStallIntr) payLoad = payLoad + NONSTALL_METHOD_SIZE; 1883 if (addReductionOp) payLoad = payLoad + SEMAPHORE_ONLY_METHOD_SIZE; 1884 if (addFinishPayload) payLoad = payLoad + SEMAPHORE_ONLY_METHOD_SIZE; 1885 1886 if (addPayloadSema) 1887 { 1888 memmgrChannelPushSemaphoreMethodsBlock_HAL(pMemoryManager, 1889 NVA06F_SUBCHANNEL_COPY_ENGINE, 1890 channel->pbGpuVA+channel->semaOffset, payLoad, &ptr); 1891 1892 NV_PRINTF(LEVEL_INFO, "Pushing Semaphore Payload 0x%x\n", payLoad); 1893 channel->lastPayloadPushed = payLoad; 1894 } 1895 1896 if (IS_SIMULATION(pGpu)) 1897 { 1898 // 1899 // fmodel CE is slow (compared to emulation) so we don't bother 1900 // scrubbing the whole block. Fmodel already scrubs memory via ramif 1901 // so we'll never get exceptions 1902 // 1903 size = NV_MIN(size, 0x20); 1904 } 1905 1906 memmgrChannelPushAddressMethodsBlock_HAL(pMemoryManager, NV_FALSE, 1907 NVA06F_SUBCHANNEL_COPY_ENGINE, dst, &ptr); 1908 1909 if (bMemcopy) 1910 { 1911 memmgrChannelPushAddressMethodsBlock_HAL(pMemoryManager, NV_TRUE, 1912 NVA06F_SUBCHANNEL_COPY_ENGINE, src, &ptr); 1913 1914 PUSH_PAIR(NVA06F_SUBCHANNEL_COPY_ENGINE, NVB0B5_LINE_LENGTH_IN, NvU64_LO32(size)); 1915 } 1916 else 1917 { 1918 bMemoryScrubEnable = memmgrMemUtilsCheckMemoryFastScrubEnable_HAL(pGpu, 1919 pMemoryManager, 1920 channel->hTdCopyClass, 1921 channel->bUseVasForCeCopy, 1922 dst, 1923 NvU64_LO32(size), 1924 dstAddressSpace); 1925 if (bMemoryScrubEnable) 1926 { 1927 NV_PRINTF(LEVEL_INFO, "Using Fast memory scrubber\n"); 1928 remapConstB = DRF_DEF(B0B5, _SET_REMAP_COMPONENTS, _DST_X, _CONST_B); 1929 PUSH_PAIR(NVA06F_SUBCHANNEL_COPY_ENGINE, NVB0B5_SET_REMAP_CONST_B, 0x00000000); 1930 1931 remapComponentSize = DRF_DEF(B0B5, _SET_REMAP_COMPONENTS, _COMPONENT_SIZE, _ONE); 1932 PUSH_PAIR(NVA06F_SUBCHANNEL_COPY_ENGINE, NVB0B5_LINE_LENGTH_IN, NvU64_LO32(size)); 1933 } 1934 else 1935 { 1936 remapComponentSize = DRF_DEF(B0B5, _SET_REMAP_COMPONENTS, _COMPONENT_SIZE, _FOUR); 1937 PUSH_PAIR(NVA06F_SUBCHANNEL_COPY_ENGINE, NVB0B5_LINE_LENGTH_IN, NvU64_LO32(size >> 2)); 1938 } 1939 1940 PUSH_PAIR(NVA06F_SUBCHANNEL_COPY_ENGINE, NVB0B5_SET_REMAP_COMPONENTS, 1941 DRF_DEF(B0B5, _SET_REMAP_COMPONENTS, _DST_X, _CONST_A) | 1942 DRF_DEF(B0B5, _SET_REMAP_COMPONENTS, _NUM_SRC_COMPONENTS, _ONE) | 1943 DRF_DEF(B0B5, _SET_REMAP_COMPONENTS, _NUM_DST_COMPONENTS, _ONE) | 1944 remapComponentSize | 1945 remapConstB); 1946 1947 PUSH_PAIR(NVA06F_SUBCHANNEL_COPY_ENGINE, NVB0B5_SET_REMAP_CONST_A, 0x00000000); 1948 1949 NV_ASSERT(srcAddressSpace == 0); 1950 NV_ASSERT(dstAddressSpace == ADDR_FBMEM); 1951 1952 srcAddressSpace = ADDR_FBMEM; 1953 } 1954 1955 PUSH_PAIR(NVA06F_SUBCHANNEL_COPY_ENGINE, NVB0B5_LINE_COUNT, 1); 1956 1957 _ceChannelPushMethodAperture_GM107(channel, srcAddressSpace, srcCpuCacheAttrib, dstAddressSpace, dstCpuCacheAttrib, &ptr); 1958 1959 launchParams = DRF_DEF(B0B5, _LAUNCH_DMA, _INTERRUPT_TYPE, _NONE) | 1960 DRF_DEF(B0B5, _LAUNCH_DMA, _SRC_MEMORY_LAYOUT, _PITCH) | 1961 DRF_DEF(B0B5, _LAUNCH_DMA, _SRC_TYPE, _PHYSICAL) | 1962 DRF_DEF(B0B5, _LAUNCH_DMA, _DST_MEMORY_LAYOUT, _PITCH) | 1963 DRF_DEF(B0B5, _LAUNCH_DMA, _DST_TYPE, _PHYSICAL) | 1964 DRF_DEF(B0B5, _LAUNCH_DMA, _DATA_TRANSFER_TYPE, _PIPELINED); 1965 1966 if (addPayloadSema) 1967 { 1968 launchParams |= DRF_DEF(B0B5, _LAUNCH_DMA, _SEMAPHORE_TYPE, _RELEASE_ONE_WORD_SEMAPHORE) | 1969 DRF_DEF(B0B5, _LAUNCH_DMA, _FLUSH_ENABLE, _TRUE); 1970 } 1971 else 1972 { 1973 launchParams |= DRF_DEF(B0B5, _LAUNCH_DMA, _SEMAPHORE_TYPE, _NONE); 1974 } 1975 1976 if (bMemoryScrubEnable) 1977 { 1978 PUSH_PAIR(NVA06F_SUBCHANNEL_COPY_ENGINE, NVC8B5_SET_MEMORY_SCRUB_PARAMETERS, 1979 DRF_DEF(C8B5, _SET_MEMORY_SCRUB_PARAMETERS, _DISCARDABLE, _FALSE)); 1980 1981 launchParams |= DRF_DEF(C8B5, _LAUNCH_DMA, _MEMORY_SCRUB_ENABLE, _TRUE); 1982 launchParams |= DRF_DEF(C8B5, _LAUNCH_DMA, _REMAP_ENABLE, _FALSE); 1983 1984 PUSH_PAIR(NVA06F_SUBCHANNEL_COPY_ENGINE, NVC8B5_LAUNCH_DMA, launchParams); 1985 } 1986 else 1987 { 1988 if (!bMemcopy) 1989 { 1990 launchParams |= DRF_DEF(B0B5, _LAUNCH_DMA, _REMAP_ENABLE, _TRUE); 1991 } 1992 1993 PUSH_PAIR(NVA06F_SUBCHANNEL_COPY_ENGINE, NVB0B5_LAUNCH_DMA, launchParams); 1994 } 1995 } 1996 1997 if (addReductionOp) 1998 { 1999 NvU32 currentBlock = BLOCK_INDEX_FROM_ADDR((dst), channel->blockShift); 2000 NvU32 blockOffset; 2001 NvU32 bitFlip; 2002 2003 blockOffset = (currentBlock / 32) * 4; 2004 bitFlip = ((NvU32)1 << (currentBlock % 32)); 2005 memmgrChannelPushSemaphoreMethodsBlock_HAL(pMemoryManager, 2006 NVA06F_SUBCHANNEL_COPY_ENGINE, 2007 channel->pbGpuBitMapVA+(blockOffset), bitFlip, &ptr); 2008 2009 launchParams = DRF_DEF(B0B5, _LAUNCH_DMA, _SEMAPHORE_TYPE, _RELEASE_ONE_WORD_SEMAPHORE) | 2010 DRF_DEF(B0B5, _LAUNCH_DMA, _INTERRUPT_TYPE, _NONE) | 2011 DRF_DEF(B0B5, _LAUNCH_DMA, _FLUSH_ENABLE, _TRUE) | 2012 DRF_DEF(B0B5, _LAUNCH_DMA, _REMAP_ENABLE, _TRUE) | 2013 DRF_DEF(B0B5, _LAUNCH_DMA, _SRC_MEMORY_LAYOUT, _PITCH) | 2014 DRF_DEF(B0B5, _LAUNCH_DMA, _DST_MEMORY_LAYOUT, _PITCH) | 2015 DRF_DEF(B0B5, _LAUNCH_DMA, _SEMAPHORE_REDUCTION_ENABLE, _TRUE) | 2016 DRF_DEF(B0B5, _LAUNCH_DMA, _SEMAPHORE_REDUCTION_SIGN, _UNSIGNED) | 2017 DRF_DEF(B0B5, _LAUNCH_DMA, _SEMAPHORE_REDUCTION, _IOR) | 2018 DRF_DEF(B0B5, _LAUNCH_DMA, _DATA_TRANSFER_TYPE, _NONE); 2019 // push only the second semaphore release 2020 PUSH_PAIR(NVA06F_SUBCHANNEL_COPY_ENGINE, NVB0B5_LAUNCH_DMA, launchParams); 2021 } 2022 2023 if (addFinishPayload) 2024 { 2025 memmgrChannelPushSemaphoreMethodsBlock_HAL(pMemoryManager, 2026 NVA06F_SUBCHANNEL_COPY_ENGINE, 2027 channel->pbGpuVA+channel->finishPayloadOffset, 2028 channel->finishPayload, &ptr); 2029 2030 launchParams = DRF_DEF(B0B5, _LAUNCH_DMA, _SEMAPHORE_TYPE, _RELEASE_ONE_WORD_SEMAPHORE) | 2031 DRF_DEF(B0B5, _LAUNCH_DMA, _INTERRUPT_TYPE, _NONE) | 2032 DRF_DEF(B0B5, _LAUNCH_DMA, _FLUSH_ENABLE, _TRUE) | 2033 DRF_DEF(B0B5, _LAUNCH_DMA, _REMAP_ENABLE, _TRUE) | 2034 DRF_DEF(B0B5, _LAUNCH_DMA, _SRC_MEMORY_LAYOUT, _PITCH) | 2035 DRF_DEF(B0B5, _LAUNCH_DMA, _DST_MEMORY_LAYOUT, _PITCH) | 2036 DRF_DEF(B0B5, _LAUNCH_DMA, _DATA_TRANSFER_TYPE, _NONE); 2037 PUSH_PAIR(NVA06F_SUBCHANNEL_COPY_ENGINE, NVB0B5_LAUNCH_DMA, launchParams); 2038 NV_PRINTF(LEVEL_INFO, "Pushing Finishing Semaphore Payload 0x%x\n", 2039 channel->finishPayload); 2040 } 2041 2042 if (addNonStallIntr) 2043 { 2044 PUSH_PAIR(NVA06F_SUBCHANNEL_COPY_ENGINE, NV906F_NON_STALL_INTERRUPT, 0); 2045 } 2046 2047 channel->channelPutOffset = (NvU32)((NvU8 *)ptr - (NvU8 *)channel->pbCpuVA); 2048 *pPtr = ptr; 2049 2050 // return length of methods inserted 2051 return (NvU32)((NvU8*)ptr - (NvU8*)pStartPtr); 2052 } 2053 2054 /*! 2055 * Getting the Copy Engine Class 2056 * 2057 * @param[in] pGpu OBJGPU pointer 2058 * @param[out] pClass pointer to class 2059 */ 2060 NV_STATUS 2061 memmgrMemUtilsGetCopyEngineClass_GM107 2062 ( 2063 OBJGPU *pGpu, 2064 MemoryManager *pMemoryManager, 2065 NvU32 *pClass 2066 ) 2067 { 2068 NV_STATUS status; 2069 NvU32 numClasses; 2070 NvU32 *pClassList = NULL; 2071 NvU32 i; 2072 NvU32 class = 0; 2073 NvU32 eng; 2074 2075 // 2076 // Pascal+ chips will have any combination of the 6 CEs 2077 // available. Loop over all the CEs to get the CE class 2078 // for the first available CE instead of using ENG_CE(0) 2079 // 2080 for (eng = 0; eng < ENG_CE__SIZE_1; eng++) 2081 { 2082 NV_ASSERT_OK_OR_ELSE( 2083 status, 2084 gpuGetClassList(pGpu, &numClasses, NULL, ENG_CE(eng)), 2085 return 0); 2086 2087 if (numClasses > 0) 2088 { 2089 break; 2090 } 2091 } 2092 2093 pClassList = portMemAllocNonPaged(sizeof(*pClassList) * numClasses); 2094 NV_ASSERT_OR_RETURN((pClassList != NULL), 0); 2095 2096 if (NV_OK == gpuGetClassList(pGpu, &numClasses, pClassList, ENG_CE(eng))) 2097 { 2098 for (i = 0; i < numClasses; i++) 2099 { 2100 class = NV_MAX(class, pClassList[i]); 2101 } 2102 } 2103 2104 NV_ASSERT(class != 0); 2105 portMemFree(pClassList); 2106 *pClass = class; 2107 2108 return NV_OK; 2109 } 2110