1 /*
2 * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: MIT
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "gpu/gpu_user_shared_data.h"
25 #include "gpu/mem_mgr/mem_mgr.h"
26 #include "gpu/mem_mgr/heap.h"
27 #include "gpu/mem_sys/kern_mem_sys.h"
28 #include "gpu/mem_mgr/mem_utils.h"
29 #include "gpu/mem_mgr/ce_utils.h"
30 #include "mem_mgr/video_mem.h"
31 #include "gpu/mem_mgr/fbsr.h"
32 #include "gpu/mmu/kern_gmmu.h"
33 #include "gpu/bus/kern_bus.h"
34 #include "gpu/bif/kernel_bif.h"
35 #include "core/locks.h"
36 #include "vgpu/vgpu_util.h"
37 #include "virtualization/kernel_vgpu_mgr.h"
38 #include "vgpu/rpc.h"
39 #include "core/thread_state.h"
40 #include "nvRmReg.h"
41 #include "gpu/fsp/kern_fsp.h"
42 #include "gpu/pmu/kern_pmu.h"
43 #include "gpu/mem_mgr/phys_mem_allocator/numa.h"
44 #include "kernel/gpu/mig_mgr/kernel_mig_manager.h"
45 #include "kernel/rmapi/rs_utils.h"
46 #include "rmapi/rmapi_utils.h"
47 #include "mmu/gmmu_fmt.h"
48 #include "class/cl0040.h" // NV01_MEMORY_LOCAL_USER
49 #include "class/cl503c.h"
50 #include "class/cl906f.h" // GF100_CHANNEL_GPFIFO
51 #include "os/os.h"
52 #include "gpu/gsp/kernel_gsp.h"
53 #include "gpu/conf_compute/conf_compute.h"
54 #include "platform/sli/sli.h"
55
56 #include "class/cl0050.h"
57
58 static NV_STATUS _memmgrCreateFBSR(MemoryManager *pMemoryManager, NvU32);
59 static NV_STATUS _memmgrCreateChildObjects(MemoryManager *pMemoryManager);
60 static void _memmgrInitRegistryOverrides(OBJGPU *pGpu, MemoryManager *pMemoryManager);
61 static NV_STATUS _memmgrInitMIGMemoryPartitionHeap(OBJGPU *pGpu, MemoryManager *pMemoryManager,
62 NvU32 swizzId, NV_RANGE *pAddrRange,
63 Heap **ppMemoryPartitionHeap);
64 static NV_STATUS _memmgrAllocInternalClientObjects(OBJGPU *pGpu,
65 MemoryManager *pMemoryManager);
66 static void _memmgrFreeInternalClientObjects(MemoryManager *pMemoryManager);
67 static void _memmgrInitRUSDHeapSize(OBJGPU *pGpu, MemoryManager *pMemoryManager);
68
69 #define MEMUTILS_CHANNEL_GPFIFO_SIZE (NV906F_GP_ENTRY__SIZE * MEMUTILS_NUM_GPFIFIO_ENTRIES)
70
71 NV_STATUS
memmgrConstructEngine_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager,ENGDESCRIPTOR engDesc)72 memmgrConstructEngine_IMPL
73 (
74 OBJGPU *pGpu,
75 MemoryManager *pMemoryManager,
76 ENGDESCRIPTOR engDesc
77 )
78 {
79 NV_STATUS rmStatus;
80
81 pMemoryManager->overrideInitHeapMin = 0;
82 pMemoryManager->overrideHeapMax = ~0ULL;
83 pMemoryManager->Ram.fbOverrideSizeMb = ~0ULL;
84
85 // Create the children
86 rmStatus = _memmgrCreateChildObjects(pMemoryManager);
87 if (rmStatus != NV_OK)
88 return rmStatus;
89
90 pMemoryManager->MIGMemoryPartitioningInfo.hClient = NV01_NULL_OBJECT;
91 pMemoryManager->MIGMemoryPartitioningInfo.hDevice = NV01_NULL_OBJECT;
92 pMemoryManager->MIGMemoryPartitioningInfo.hSubdevice = NV01_NULL_OBJECT;
93 pMemoryManager->MIGMemoryPartitioningInfo.partitionableMemoryRange = NV_RANGE_EMPTY;
94
95 return NV_OK;
96 }
97
98 void
memmgrDestruct_IMPL(MemoryManager * pMemoryManager)99 memmgrDestruct_IMPL
100 (
101 MemoryManager *pMemoryManager
102 )
103 {
104 NvU32 i;
105
106 for (i = 0; i < NUM_FBSR_TYPES; i++)
107 {
108 objDelete(pMemoryManager->pFbsr[i]);
109 pMemoryManager->pFbsr[i] = NULL;
110 }
111
112 objDelete(pMemoryManager->pHeap);
113 pMemoryManager->pHeap = NULL;
114
115 pMemoryManager->MIGMemoryPartitioningInfo.partitionableMemoryRange = NV_RANGE_EMPTY;
116 }
117
118 static void
_memmgrInitRegistryOverrides(OBJGPU * pGpu,MemoryManager * pMemoryManager)119 _memmgrInitRegistryOverrides(OBJGPU *pGpu, MemoryManager *pMemoryManager)
120 {
121 NvU32 data32;
122
123 // Check for ram size override.
124 if ((osReadRegistryDword(pGpu, NV_REG_STR_OVERRIDE_FB_SIZE, &data32) == NV_OK) &&
125 (data32 != 0))
126 {
127 NV_PRINTF(LEVEL_WARNING, "Regkey %s = %dM\n",
128 NV_REG_STR_OVERRIDE_FB_SIZE, data32);
129 // Used to override heap sizing at create
130 pMemoryManager->Ram.fbOverrideSizeMb = data32;
131 }
132 else
133 {
134 pMemoryManager->Ram.fbOverrideSizeMb = ~0ULL;
135 }
136
137 //
138 // Scrub on Free is enabled by default for GK110+
139 // The reg key will be used to disable the scrub on free
140 //
141 if ((osReadRegistryDword(pGpu, NV_REG_STR_RM_DISABLE_SCRUB_ON_FREE,
142 &data32) == NV_OK) && data32)
143 {
144 pMemoryManager->bScrubOnFreeEnabled = NV_FALSE;
145 }
146
147 if ((osReadRegistryDword(pGpu, NV_REG_STR_RM_DISABLE_FAST_SCRUBBER,
148 &data32) == NV_OK) && data32)
149 {
150 pMemoryManager->bFastScrubberEnabled = NV_FALSE;
151 }
152
153 if (NV_OK == osReadRegistryDword(pGpu, NV_REG_STR_RM_SYSMEM_PAGE_SIZE, &data32))
154 {
155 switch (data32)
156 {
157 case RM_PAGE_SIZE:
158 case RM_PAGE_SIZE_64K:
159 case RM_PAGE_SIZE_HUGE:
160 case RM_PAGE_SIZE_512M:
161 break;
162 default:
163 NV_ASSERT(0);
164 NV_PRINTF(LEVEL_ERROR,
165 "Sysmem page size 0x%x not supported! Defaulting to 4KB\n",
166 data32);
167 data32 = RM_PAGE_SIZE;
168 }
169 pMemoryManager->sysmemPageSize = data32;
170 }
171 else
172 {
173 pMemoryManager->sysmemPageSize = RM_PAGE_SIZE;
174
175 }
176
177 if (osReadRegistryDword(pGpu, NV_REG_STR_RM_ALLOW_SYSMEM_LARGE_PAGES, &data32) == NV_OK)
178 {
179 pMemoryManager->bAllowSysmemHugePages = data32 ? NV_TRUE : NV_FALSE;
180 }
181 else
182 {
183 pMemoryManager->bAllowSysmemHugePages = NV_FALSE;
184 }
185
186 // This key should not be used on physical (GSP) RM.
187 if (!RMCFG_FEATURE_PLATFORM_GSP)
188 {
189 // Allow user to increase size of RM reserved heap via a regkey
190 if (osReadRegistryDword(pGpu, NV_REG_STR_RM_INCREASE_RSVD_MEMORY_SIZE_MB,
191 &data32) == NV_OK)
192 {
193 pMemoryManager->rsvdMemorySizeIncrement = (NvU64)data32 << 20;
194 NV_PRINTF(LEVEL_ERROR,
195 "User specified increase in reserved size = %d MBs\n",
196 data32);
197 }
198 }
199
200 if (osReadRegistryDword(pGpu,
201 NV_REG_STR_RM_DISABLE_NONCONTIGUOUS_ALLOCATION,
202 &data32) == NV_OK)
203 {
204 if (data32 == NV_REG_STR_RM_DISABLE_NONCONTIGUOUS_ALLOCATION_TRUE)
205 {
206 pMemoryManager->bAllowNoncontiguousAllocation = NV_FALSE;
207 }
208 }
209
210 if (osReadRegistryDword(pGpu, NV_REG_STR_RM_FBSR_PAGED_DMA, &data32) == NV_OK)
211 {
212 pMemoryManager->bEnableFbsrPagedDma = !!data32;
213 }
214
215 if (osReadRegistryDword(pGpu, NV_REG_STR_RM_FBSR_FILE_MODE, &data32) == NV_OK)
216 {
217 if (data32 && RMCFG_FEATURE_PLATFORM_UNIX)
218 {
219 pMemoryManager->bEnableFbsrFileMode = NV_TRUE;
220 }
221 }
222
223 //
224 // Override PMA enable. PDB_PROP_FB_PMA_ENABLED is reconciled with
225 // PDB_PROP_FB_PLATFORM_PMA_SUPPORT to decide whether to enable PMA.
226 //
227 if (osReadRegistryDword(pGpu, NV_REG_STR_RM_ENABLE_PMA, &data32) == NV_OK)
228 {
229 if (data32 == NV_REG_STR_RM_ENABLE_PMA_YES)
230 {
231 pMemoryManager->bPmaEnabled = NV_TRUE;
232 }
233 else
234 {
235 pMemoryManager->bPmaEnabled = NV_FALSE;
236 }
237 }
238
239 if (RMCFG_FEATURE_PLATFORM_WINDOWS && !pGpu->getProperty(pGpu, PDB_PROP_GPU_IN_TCC_MODE))
240 {
241 pMemoryManager->bFbsrWddmModeEnabled = NV_TRUE;
242 }
243
244 if (osReadRegistryDword(pGpu, NV_REG_STR_RM_FBSR_WDDM_MODE, &data32) == NV_OK)
245 {
246 pMemoryManager->bFbsrWddmModeEnabled = !!data32;
247 }
248
249 //
250 // Override PMA managed client page tables.
251 // NOTE: This is WAR for bug #s 1946145 and 1971628.
252 // This should be removed as part of heap removal and PMA refactor.
253 //
254 if (osReadRegistryDword(pGpu, NV_REG_STR_RM_ENABLE_PMA_MANAGED_PTABLES,
255 &data32) == NV_OK)
256 {
257 if (data32 == NV_REG_STR_RM_ENABLE_PMA_MANAGED_PTABLES_NO)
258 {
259 memmgrSetClientPageTablesPmaManaged(pMemoryManager, NV_FALSE);
260 }
261 }
262
263 if (osReadRegistryDword(pGpu, NV_REG_STR_DISABLE_GLOBAL_CE_UTILS, &data32) == NV_OK &&
264 data32 == NV_REG_STR_DISABLE_GLOBAL_CE_UTILS_YES)
265 {
266 pMemoryManager->bDisableGlobalCeUtils = NV_TRUE;
267 }
268
269 pMemoryManager->bCePhysicalVidmemAccessNotSupported = gpuIsSelfHosted(pGpu);
270 }
271
272 NV_STATUS
memmgrStatePreInitLocked_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager)273 memmgrStatePreInitLocked_IMPL
274 (
275 OBJGPU *pGpu,
276 MemoryManager *pMemoryManager
277 )
278 {
279 if (IS_GSP_CLIENT(pGpu) || IS_VIRTUAL(pGpu))
280 {
281 //
282 // Temporary hack to get OpenRM working without breaking SLI
283 // After fixing CORERM-4078, memmgrInitFbRegions() call should be removed from memsysStateInitLocked()
284 // and only left here
285 //
286 NV_ASSERT_OK_OR_RETURN(memmgrInitFbRegions(pGpu, pMemoryManager));
287 }
288
289 // Determine the size of reserved memory
290 NV_ASSERT_OK_OR_RETURN(memmgrPreInitReservedMemory_HAL(pGpu, pMemoryManager));
291
292 return NV_OK;
293 }
294
295 static NV_STATUS
memmgrTestCeUtils(OBJGPU * pGpu,MemoryManager * pMemoryManager)296 memmgrTestCeUtils
297 (
298 OBJGPU *pGpu,
299 MemoryManager *pMemoryManager
300 )
301 {
302 MEMORY_DESCRIPTOR *pVidMemDesc = NULL;
303 MEMORY_DESCRIPTOR *pSysMemDesc = NULL;
304 TRANSFER_SURFACE vidSurface = {0};
305 TRANSFER_SURFACE sysSurface = {0};
306 NvU32 vidmemData = 0xAABBCCDD;
307 NvU32 sysmemData = 0x11223345;
308 NV_STATUS status;
309
310 NV_ASSERT_OR_RETURN(pMemoryManager->pCeUtils != NULL, NV_ERR_INVALID_STATE);
311
312 if (pMemoryManager->pCeUtils->pLiteKernelChannel != NULL)
313 {
314 //
315 // BUG 4167899: Temporarily skip test in case of lite mode
316 // It sometimes fails when called from acrGatherWprInformation_GM200()
317 // However, ACR is initialized without issues
318 //
319 return NV_OK;
320 }
321
322 NV_ASSERT_OK_OR_GOTO(status,
323 memdescCreate(&pVidMemDesc, pGpu, sizeof vidmemData, RM_PAGE_SIZE, NV_TRUE, ADDR_FBMEM,
324 NV_MEMORY_UNCACHED, MEMDESC_FLAGS_NONE),
325 failed);
326 memdescTagAlloc(status,
327 NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_19, pVidMemDesc);
328 NV_ASSERT_OK_OR_GOTO(status, status, failed);
329 vidSurface.pMemDesc = pVidMemDesc;
330
331 NV_ASSERT_OK_OR_GOTO(status,
332 memdescCreate(&pSysMemDesc, pGpu, sizeof sysmemData, 0, NV_TRUE, ADDR_SYSMEM,
333 NV_MEMORY_UNCACHED, MEMDESC_FLAGS_NONE),
334 failed);
335 memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_138,
336 pSysMemDesc);
337 NV_ASSERT_OK_OR_GOTO(status, status, failed);
338 sysSurface.pMemDesc = pSysMemDesc;
339
340 NV_ASSERT_OK_OR_GOTO(status, memmgrMemWrite(pMemoryManager, &vidSurface, &vidmemData, sizeof vidmemData, TRANSFER_FLAGS_NONE), failed);
341 NV_ASSERT_OK_OR_GOTO(status, memmgrMemWrite(pMemoryManager, &sysSurface, &sysmemData, sizeof sysmemData, TRANSFER_FLAGS_NONE), failed);
342 NV_ASSERT_OK_OR_GOTO(status, memmgrMemCopy (pMemoryManager, &sysSurface, &vidSurface, sizeof vidmemData, TRANSFER_FLAGS_PREFER_CE), failed);
343 NV_ASSERT_OK_OR_GOTO(status, memmgrMemRead (pMemoryManager, &sysSurface, &sysmemData, sizeof sysmemData, TRANSFER_FLAGS_NONE), failed);
344 NV_ASSERT_TRUE_OR_GOTO(status, sysmemData == vidmemData, NV_ERR_INVALID_STATE, failed);
345
346 failed:
347 memdescFree(pVidMemDesc);
348 memdescDestroy(pVidMemDesc);
349 memdescFree(pSysMemDesc);
350 memdescDestroy(pSysMemDesc);
351
352 return status;
353 }
354
355 NV_STATUS
memmgrInitInternalChannels_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager)356 memmgrInitInternalChannels_IMPL
357 (
358 OBJGPU *pGpu,
359 MemoryManager *pMemoryManager
360 )
361 {
362 NV_ASSERT_OK_OR_RETURN(memmgrScrubHandlePostSchedulingEnable_HAL(pGpu, pMemoryManager));
363
364 if (pMemoryManager->bDisableGlobalCeUtils ||
365 pGpu->getProperty(pGpu, PDB_PROP_GPU_BROKEN_FB) ||
366 pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_ALL_INST_IN_SYSMEM) ||
367 pGpu->getProperty(pGpu, PDB_PROP_GPU_ZERO_FB) ||
368 gpuIsCacheOnlyModeEnabled(pGpu) ||
369 (IS_VIRTUAL(pGpu) && !IS_VIRTUAL_WITH_FULL_SRIOV(pGpu)) ||
370 !IS_SILICON(pGpu) ||
371 IsDFPGA(pGpu))
372 {
373 NV_PRINTF(LEVEL_INFO, "Skipping global CeUtils creation (unsupported platform)\n");
374
375 return NV_OK;
376 }
377
378 if (pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_VIRTUALIZATION_MODE_HOST_VGPU) ||
379 !memmgrIsPmaInitialized(pMemoryManager) ||
380 RMCFG_FEATURE_PLATFORM_GSP ||
381 IS_MIG_ENABLED(pGpu) ||
382 gpuIsCCorApmFeatureEnabled(pGpu) ||
383 IsSLIEnabled(pGpu) ||
384 IsUnlinkedSLIEnabled(pGpu) ||
385 gpuIsSelfHosted(pGpu) ||
386 NVCPU_IS_PPC64LE)
387 {
388 // BUG 4167899: Temporarily skip CeUtils creation on platforms where it fails
389 NV_PRINTF(LEVEL_INFO, "Skipping global CeUtils creation\n");
390
391 return NV_OK;
392 }
393
394 NV_PRINTF(LEVEL_INFO, "Initializing global CeUtils instance\n");
395
396 NV_ASSERT_OK_OR_RETURN(memmgrInitCeUtils(pMemoryManager, NV_FALSE));
397
398 return NV_OK;
399 }
400
401 NV_STATUS
memmgrDestroyInternalChannels_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager)402 memmgrDestroyInternalChannels_IMPL
403 (
404 OBJGPU *pGpu,
405 MemoryManager *pMemoryManager
406 )
407 {
408 NV_PRINTF(LEVEL_INFO, "Destroying global CeUtils instance\n");
409
410 memmgrDestroyCeUtils(pMemoryManager, NV_FALSE);
411
412 NV_ASSERT_OK_OR_RETURN(memmgrScrubHandlePreSchedulingDisable_HAL(pGpu, pMemoryManager));
413
414 return NV_OK;
415 }
416
417 static NV_STATUS
memmgrPostSchedulingEnableHandler(OBJGPU * pGpu,void * pUnusedData)418 memmgrPostSchedulingEnableHandler
419 (
420 OBJGPU *pGpu,
421 void *pUnusedData
422 )
423 {
424 return memmgrInitInternalChannels(pGpu, GPU_GET_MEMORY_MANAGER(pGpu));
425 }
426
427 static NV_STATUS
memmgrPreSchedulingDisableHandler(OBJGPU * pGpu,void * pUnusedData)428 memmgrPreSchedulingDisableHandler
429 (
430 OBJGPU *pGpu,
431 void *pUnusedData
432 )
433 {
434 return memmgrDestroyInternalChannels(pGpu, GPU_GET_MEMORY_MANAGER(pGpu));
435 }
436
437 NV_STATUS
memmgrStateInitLocked_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager)438 memmgrStateInitLocked_IMPL
439 (
440 OBJGPU *pGpu,
441 MemoryManager *pMemoryManager
442 )
443 {
444 NV_STATUS status = NV_OK;
445 NvU32 i;
446 NvBool bDynamicPageOffliningDisable = NV_FALSE;
447
448 NV_ASSERT_OK_OR_RETURN(memmgrInitReservedMemory_HAL(pGpu, pMemoryManager, pMemoryManager->Ram.fbAddrSpaceSizeMb << 20));
449
450 _memmgrInitRegistryOverrides(pGpu, pMemoryManager);
451
452 //
453 // Enable dynamic page blacklisting at this point before we call CreateHeap
454 // since it internally calls heapGetBlacklistPages which depends on this property
455 //
456 if (!bDynamicPageOffliningDisable)
457 memmgrEnableDynamicPageOfflining_HAL(pGpu, pMemoryManager);
458
459 memmgrScrubRegistryOverrides_HAL(pGpu, pMemoryManager);
460
461 memmgrScrubInit_HAL(pGpu, pMemoryManager);
462 NV_ASSERT_OK_OR_RETURN(kfifoAddSchedulingHandler(pGpu,
463 GPU_GET_KERNEL_FIFO(pGpu),
464 memmgrPostSchedulingEnableHandler, NULL,
465 memmgrPreSchedulingDisableHandler, NULL));
466
467 //
468 // Allocate framebuffer heap. All memory must be allocated from here to keep the world
469 // consistent (N.B. the heap size has been reduced by the amount of instance memory).
470 //
471 status = memmgrCreateHeap(pMemoryManager);
472 if (status != NV_OK)
473 {
474 return status;
475 }
476
477 //
478 // Just set up the memory pool now (basic init stuff). Actual physical
479 // frames are *NOT* added to the pool at this stage.
480 //
481 status = memmgrPageLevelPoolsCreate(pGpu, pMemoryManager);
482 if (status != NV_OK)
483 {
484 return status;
485 }
486
487 // RMCONFIG: only if FBSR engine is enabled
488 if (RMCFG_MODULE_FBSR)
489 {
490 //
491 // If a configuration is not supported, do not initialize
492 // the corresponding fbsr engine.
493 //
494 if (pMemoryManager->bFbsrWddmModeEnabled)
495 {
496 pMemoryManager->fbsrStartMode = FBSR_TYPE_WDDM_FAST_DMA_DEFERRED_NONPAGED;
497 }
498 else if (pMemoryManager->bEnableFbsrPagedDma)
499 {
500 pMemoryManager->fbsrStartMode = FBSR_TYPE_PAGED_DMA;
501 }
502 else if (pMemoryManager->bEnableFbsrFileMode)
503 {
504 pMemoryManager->fbsrStartMode = FBSR_TYPE_FILE;
505 }
506 else
507 {
508 pMemoryManager->fbsrStartMode = FBSR_TYPE_PERSISTENT;
509 }
510
511 for (i = pMemoryManager->fbsrStartMode; i < NUM_FBSR_TYPES; i++)
512 {
513 if (!pMemoryManager->bPersistentStandbyBuffer &&
514 (i == FBSR_TYPE_PERSISTENT))
515 {
516 continue;
517 }
518
519 if (pGpu->getProperty(pGpu, PDB_PROP_GPU_BROKEN_FB) &&
520 (i == FBSR_TYPE_PAGED_DMA || i == FBSR_TYPE_DMA))
521 {
522 continue;
523 }
524
525 status = fbsrInit_HAL(pGpu, pMemoryManager->pFbsr[i]);
526
527 //
528 // If one fbsr scheme failed, proceed to initializing the other
529 // fallback options.
530 //
531 if (status != NV_OK)
532 {
533 NV_PRINTF(LEVEL_WARNING,
534 "fbsrInit failed for supported type %d suspend-resume scheme\n",
535 i);
536 continue;
537 }
538 }
539 }
540
541 status = gpuCreateRusdMemory_HAL(pGpu);
542 if (status != NV_OK)
543 {
544 return status;
545 }
546
547 if (memmgrIsPmaInitialized(pMemoryManager))
548 {
549 _memmgrInitRUSDHeapSize(pGpu, pMemoryManager);
550 }
551
552 status = _memmgrAllocInternalClientObjects(pGpu, pMemoryManager);
553 if (status != NV_OK)
554 {
555 //
556 // TODO: Bug 3482892: Need a way to roll back StateInit
557 // steps in case of a failure
558 // WAR for now is to cleanup with memmgrStateDestroy().
559 //
560 memmgrStateDestroy(pGpu, pMemoryManager);
561 return status;
562 }
563
564 return NV_OK;
565 }
566
567 NV_STATUS
memmgrVerifyGspDmaOps_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager)568 memmgrVerifyGspDmaOps_IMPL
569 (
570 OBJGPU *pGpu,
571 MemoryManager *pMemoryManager
572 )
573 {
574 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
575 NV_STATUS status = NV_OK;
576 MEMORY_DESCRIPTOR *pMemDesc;
577 NvU8 *pTestBuffer;
578 NvU32 testData = 0xdeadbeef;
579 TRANSFER_SURFACE surf = {0};
580
581 //
582 // Return early if CPU access to CPR vidmem is allowed as GSP DMA
583 // is not needed in this case
584 //
585 if (!kbusIsBarAccessBlocked(pKernelBus))
586 return NV_OK;
587
588 pTestBuffer = portMemAllocNonPaged(4096);
589 NV_ASSERT_OR_RETURN(pTestBuffer != NULL, NV_ERR_INSUFFICIENT_RESOURCES);
590
591 portMemSet(pTestBuffer, 0, 4096);
592
593 status = memdescCreate(&pMemDesc, pGpu, RM_PAGE_SIZE, RM_PAGE_SIZE,
594 NV_TRUE, ADDR_FBMEM, NV_MEMORY_UNCACHED, 0);
595 NV_ASSERT_OR_RETURN(status == NV_OK, status);
596
597 memdescTagAlloc(status,
598 NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_20, pMemDesc);
599 NV_ASSERT_OR_GOTO(status == NV_OK, failed);
600
601 surf.pMemDesc = pMemDesc;
602 surf.offset = sizeof(NvU32); // Choosing a random offset
603
604 // Write test data to FB using GSP
605 status = memmgrMemWrite(pMemoryManager, &surf, &testData, sizeof(NvU32),
606 TRANSFER_FLAGS_NONE);
607 NV_ASSERT_OR_GOTO(status == NV_OK, failed);
608
609 // Read the same location using GSP and confirm that GSP read is also working fine
610 status = memmgrMemRead(pMemoryManager, &surf, pTestBuffer, sizeof(NvU32),
611 TRANSFER_FLAGS_NONE);
612 NV_ASSERT_OR_GOTO(status == NV_OK, failed);
613
614 if (((NvU32*)pTestBuffer)[0] != testData)
615 {
616 NV_PRINTF(LEVEL_ERROR, "####################################################\n");
617 NV_PRINTF(LEVEL_ERROR, " Read back of data using GSP shows mismatch\n");
618 NV_PRINTF(LEVEL_ERROR, " Test data: 0x%x Read Data: 0x%x\n", testData, ((NvU32*)pTestBuffer)[0]);
619 NV_PRINTF(LEVEL_ERROR, "####################################################\n");
620 status = NV_ERR_INVALID_STATE;
621 NV_ASSERT_OR_GOTO(status == NV_OK, failed);
622 }
623 else
624 {
625 NV_PRINTF(LEVEL_INFO, "####################################################\n");
626 NV_PRINTF(LEVEL_INFO, " Read back of data using GSP confirms write\n");
627 NV_PRINTF(LEVEL_INFO, "####################################################\n");
628 }
629
630 failed:
631 memdescFree(pMemDesc);
632 memdescDestroy(pMemDesc);
633 portMemFree(pTestBuffer);
634
635 return status;
636 }
637
638 NV_STATUS
memmgrStateLoad_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager,NvU32 flags)639 memmgrStateLoad_IMPL
640 (
641 OBJGPU *pGpu,
642 MemoryManager *pMemoryManager,
643 NvU32 flags
644 )
645 {
646 // If fbOverrideSizeMb is set, finish setting up the FB parameters now that state init has finished
647 memmgrFinishHandleSizeOverrides_HAL(pGpu, pMemoryManager);
648
649 if ((flags & GPU_STATE_FLAGS_PRESERVING) &&
650 !(flags & GPU_STATE_FLAGS_GC6_TRANSITION))
651 {
652 //
653 // Only do initialization scrubs (i.e. RM reserved region) on
654 // non-GC6 transitions since GC6 cycles leave FB powered.
655 //
656 memmgrScrubInit_HAL(pGpu, pMemoryManager);
657 }
658
659 // Dump FB regions
660 memmgrDumpFbRegions(pGpu, pMemoryManager);
661
662 return NV_OK;
663 }
664
665 NV_STATUS
memmgrStatePreUnload_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager,NvU32 flags)666 memmgrStatePreUnload_IMPL
667 (
668 OBJGPU *pGpu,
669 MemoryManager *pMemoryManager,
670 NvU32 flags
671 )
672 {
673 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
674
675 NV_ASSERT((flags & GPU_STATE_FLAGS_PRESERVING) || pMemoryManager->zbcSurfaces == 0);
676
677 if ((flags & GPU_STATE_FLAGS_PRESERVING))
678 {
679 //
680 // fifo won't send a PreSchedulingDisable callback on StateUnload
681 // destroy the channel manually, so that a CeUtils lite instance can be created for FBSR
682 //
683 memmgrDestroyCeUtils(pMemoryManager, !IS_VIRTUAL(pGpu));
684 }
685
686 if (memmgrIsPmaEnabled(pMemoryManager) &&
687 memmgrIsPmaSupportedOnPlatform(pMemoryManager) &&
688 osNumaOnliningEnabled(pGpu->pOsGpuInfo) &&
689 pKernelMemorySystem->memPartitionNumaInfo[0].bInUse)
690 {
691 pmaNumaOfflined(&pMemoryManager->pHeap->pmaObject);
692 }
693
694 return NV_OK;
695 }
696
697 NV_STATUS
memmgrStateUnload_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager,NvU32 flags)698 memmgrStateUnload_IMPL
699 (
700 OBJGPU *pGpu,
701 MemoryManager *pMemoryManager,
702 NvU32 flags
703 )
704 {
705 if ((flags & GPU_STATE_FLAGS_PRESERVING) &&
706 !(flags & GPU_STATE_FLAGS_GC6_TRANSITION))
707 {
708 //
709 // Initialiation scrubs only happen during StateLoad on non-GC6
710 // transitions.
711 //
712 memmgrScrubDestroy_HAL(pGpu, pMemoryManager);
713 }
714
715 return NV_OK;
716 }
717
718 void
memmgrStateDestroy_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager)719 memmgrStateDestroy_IMPL
720 (
721 OBJGPU *pGpu,
722 MemoryManager *pMemoryManager
723 )
724 {
725 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
726 Heap *pHeap = MEMORY_MANAGER_GET_HEAP(pMemoryManager);
727 NvU32 i;
728
729 _memmgrFreeInternalClientObjects(pMemoryManager);
730
731 gpuDestroyRusdMemory(pGpu);
732
733 // Destroys the SW state of the page level pools
734 memmgrPageLevelPoolsDestroy(pGpu, pMemoryManager);
735
736 // Destroy the heap entirely, and all associated structures
737 if (pHeap)
738 {
739 kmemsysPreHeapDestruct_HAL(pGpu, pKernelMemorySystem);
740
741 objDelete(pHeap);
742 pMemoryManager->pHeap = NULL;
743 }
744
745 // RMCONFIG: only if FBSR engine is enabled
746 if (RMCFG_MODULE_FBSR)
747 {
748 // Cleanup fbsrReservedRanges
749 if (pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_BEFORE_BAR2PTE] != NULL)
750 memdescDestroy(pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_BEFORE_BAR2PTE]);
751
752 if (pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_AFTER_BAR2PTE] != NULL)
753 memdescDestroy(pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_AFTER_BAR2PTE]);
754
755 if (pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_GSP_HEAP] != NULL)
756 memdescDestroy(pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_GSP_HEAP]);
757
758 if (pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_GSP_NON_WPR] != NULL)
759 memdescDestroy(pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_GSP_NON_WPR]);
760
761 if (pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_GSP_WPR] != NULL)
762 memdescDestroy(pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_GSP_WPR]);
763
764 if (pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_VGA_WORKSPACE] != NULL)
765 memdescDestroy(pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_VGA_WORKSPACE]);
766
767 pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_BEFORE_BAR2PTE] = NULL;
768 pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_AFTER_BAR2PTE] = NULL;
769 pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_GSP_HEAP] = NULL;
770 pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_GSP_NON_WPR] = NULL;
771 pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_GSP_WPR] = NULL;
772 pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_VGA_WORKSPACE] = NULL;
773
774 for (i = 0; i < NUM_FBSR_TYPES; i++)
775 {
776 fbsrDestroy_HAL(pGpu, pMemoryManager->pFbsr[i]);
777 }
778 }
779 if (memmgrIsLocalEgmEnabled(pMemoryManager))
780 {
781 if (!IS_VIRTUAL_WITH_SRIOV(pGpu))
782 {
783 kbusUnreserveP2PPeerIds_HAL(pGpu, GPU_GET_KERNEL_BUS(pGpu), NVBIT(pMemoryManager->localEgmPeerId));
784 }
785 pMemoryManager->localEgmPeerId = BUS_INVALID_PEER;
786 pMemoryManager->bLocalEgmEnabled = NV_FALSE;
787 }
788
789 kfifoRemoveSchedulingHandler(pGpu, GPU_GET_KERNEL_FIFO(pGpu),
790 memmgrPostSchedulingEnableHandler, NULL,
791 memmgrPreSchedulingDisableHandler, NULL);
792 memmgrScrubDestroy_HAL(pGpu, pMemoryManager);
793 }
794
795 static NV_STATUS
_memmgrCreateChildObjects(MemoryManager * pMemoryManager)796 _memmgrCreateChildObjects
797 (
798 MemoryManager *pMemoryManager
799 )
800 {
801 NV_STATUS status = NV_OK;
802
803 // RMCONFIG: only if FBSR engine is enabled
804 if (RMCFG_MODULE_FBSR)
805 {
806 NvU32 i;
807
808 // Create FBSR object for every type RM supports.
809 for (i = 0; i < NUM_FBSR_TYPES; i++)
810 {
811 status = _memmgrCreateFBSR(pMemoryManager, i);
812 if (status != NV_OK)
813 {
814 return status;
815 }
816 }
817 }
818
819 return status;
820 }
821
822 NV_STATUS
memmgrCreateHeap_IMPL(MemoryManager * pMemoryManager)823 memmgrCreateHeap_IMPL
824 (
825 MemoryManager *pMemoryManager
826 )
827 {
828 Heap *newHeap;
829 OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager);
830 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
831 NvU64 rsvdSize;
832 NvU64 size;
833 NV_STATUS status = NV_OK;
834
835 // If we're using FB regions then rsvd memory is already marked as a reserved region
836 if ((pMemoryManager->Ram.numFBRegions == 0) || (IS_VIRTUAL_WITH_SRIOV(pGpu)))
837 {
838 if (pMemoryManager->bReservedMemAtBottom)
839 {
840 // rsvd memory is already accounted for in heapStart
841 rsvdSize = 0;
842 }
843 else
844 {
845 rsvdSize = pMemoryManager->rsvdMemorySize;
846 }
847 }
848 else
849 rsvdSize = 0;
850
851 // for vGPU, add extra FB tax incurred by host RM to reserved size
852 rsvdSize += memmgrGetFbTaxSize_HAL(pGpu, pMemoryManager);
853
854 //
855 // Fix up region descriptions to match with any FB override size
856 //
857 memmgrHandleSizeOverrides_HAL(pGpu, pMemoryManager);
858
859 //
860 // Calculate the FB heap size as the address space size, then deduct any reserved memory
861 //
862 size = pMemoryManager->Ram.fbAddrSpaceSizeMb << 20;
863 size -= NV_MIN(size, rsvdSize);
864
865 if((size != 0) || (pMemoryManager->bScanoutSysmem))
866 {
867 status = objCreate(&newHeap, pMemoryManager, Heap);
868 if (status != NV_OK)
869 {
870 return status;
871 }
872
873 pMemoryManager->pHeap = newHeap;
874
875 if (memmgrIsPmaEnabled(pMemoryManager) &&
876 memmgrIsPmaSupportedOnPlatform(pMemoryManager))
877 {
878 portMemSet(&pMemoryManager->pHeap->pmaObject, 0, sizeof(pMemoryManager->pHeap->pmaObject));
879 status = memmgrPmaInitialize(pGpu, pMemoryManager, &pMemoryManager->pHeap->pmaObject);
880 NV_ASSERT_OR_RETURN(status == NV_OK, status);
881 }
882
883 status = heapInit(pGpu, newHeap,
884 pMemoryManager->heapStartOffset,
885 size - pMemoryManager->heapStartOffset, HEAP_TYPE_RM_GLOBAL, GPU_GFID_PF, NULL);
886 NV_ASSERT_OK_OR_RETURN(status);
887
888 if ((memmgrIsPmaInitialized(pMemoryManager)) && (pMemoryManager->pHeap->bHasFbRegions))
889 {
890 status = memmgrPmaRegisterRegions(pGpu, pMemoryManager, pMemoryManager->pHeap,
891 &pMemoryManager->pHeap->pmaObject);
892 NV_ASSERT_OR_RETURN(status == NV_OK, status);
893 }
894
895 NV_ASSERT_OK_OR_RETURN(memmgrValidateFBEndReservation_HAL(pGpu, pMemoryManager));
896
897 NV_ASSERT_OK_OR_RETURN(memmgrReserveMemoryForPmu_HAL(pGpu, pMemoryManager));
898
899 // Reserve vidmem for FSP usage, including FRTS, WPR2
900 status = memmgrReserveMemoryForFsp(pGpu, pMemoryManager);
901 if (status != NV_OK)
902 {
903 NV_PRINTF(LEVEL_ERROR, "Failed to reserve vidmem for WPR and FRTS.\n");
904 return status;
905 }
906
907 if (!IsSLIEnabled(pGpu))
908 {
909 // Do the actual blacklisting of pages from the heap
910 if (newHeap->blackListAddresses.count != 0)
911 {
912 status = heapBlackListPages(pGpu, newHeap);
913
914 if (status != NV_OK)
915 {
916 // Warn and continue
917 NV_PRINTF(LEVEL_WARNING, "Error 0x%x creating blacklist\n",
918 status);
919 }
920 }
921 }
922
923 kmemsysPostHeapCreate_HAL(pGpu, pKernelMemorySystem);
924 }
925
926 return status;
927 }
928
929 /*
930 * @brief Gets per-device suballocator. If it is not available, get shared heap.
931 *
932 * @param[in] pMemoryManager MemoryManager pointer
933 */
934 Heap *
memmgrGetDeviceSuballocator_IMPL(MemoryManager * pMemoryManager,NvBool bForceSubheap)935 memmgrGetDeviceSuballocator_IMPL
936 (
937 MemoryManager *pMemoryManager,
938 NvBool bForceSubheap
939 )
940 {
941
942 if (!bForceSubheap)
943 {
944 // If no suballocator found, use heap
945 return MEMORY_MANAGER_GET_HEAP(pMemoryManager);
946 }
947
948 return NULL;
949 }
950
951 static NV_STATUS
_memmgrCreateFBSR(MemoryManager * pMemoryManager,NvU32 type)952 _memmgrCreateFBSR
953 (
954 MemoryManager *pMemoryManager,
955 NvU32 type
956 )
957 {
958 OBJFBSR *pFbsr;
959 NV_STATUS status;
960
961 status = objCreate(&pFbsr, pMemoryManager, OBJFBSR);
962 if (status != NV_OK)
963 {
964 return status;
965 }
966
967 NV_ASSERT(pFbsr);
968 pMemoryManager->pFbsr[type] = pFbsr;
969
970 fbsrObjectInit(pFbsr, type);
971
972 return NV_OK;
973 }
974
975 static void
_memmgrFreeInternalClientObjects(MemoryManager * pMemoryManager)976 _memmgrFreeInternalClientObjects
977 (
978 MemoryManager *pMemoryManager
979 )
980 {
981 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
982
983 if (pMemoryManager->hThirdPartyP2P != 0)
984 {
985 pRmApi->Free(pRmApi, pMemoryManager->hClient,
986 pMemoryManager->hThirdPartyP2P);
987 pMemoryManager->hThirdPartyP2P = 0;
988 }
989
990 if (pMemoryManager->hClient != 0)
991 {
992 rmapiutilFreeClientAndDeviceHandles(pRmApi,
993 &pMemoryManager->hClient,
994 &pMemoryManager->hDevice,
995 &pMemoryManager->hSubdevice);
996 }
997 }
998
999 static NV_STATUS
_memmgrAllocInternalClientObjects(OBJGPU * pGpu,MemoryManager * pMemoryManager)1000 _memmgrAllocInternalClientObjects
1001 (
1002 OBJGPU *pGpu,
1003 MemoryManager *pMemoryManager
1004 )
1005 {
1006 NV_STATUS status;
1007 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
1008
1009 status = rmapiutilAllocClientAndDeviceHandles(pRmApi, pGpu,
1010 &pMemoryManager->hClient,
1011 &pMemoryManager->hDevice,
1012 &pMemoryManager->hSubdevice);
1013 if (status != NV_OK)
1014 {
1015 goto failed;
1016 }
1017
1018 {
1019 NV503C_ALLOC_PARAMETERS params;
1020 NvHandle hThirdPartyP2P = 0;
1021
1022 NV_ASSERT_OK_OR_GOTO(status, serverutilGenResourceHandle(pMemoryManager->hClient,
1023 &hThirdPartyP2P),
1024 failed);
1025
1026 portMemSet(¶ms, 0, sizeof(params));
1027 if (pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING))
1028 {
1029 params.flags = NV503C_ALLOC_PARAMETERS_FLAGS_TYPE_NVLINK;
1030 }
1031 else
1032 {
1033 params.flags = NV503C_ALLOC_PARAMETERS_FLAGS_TYPE_BAR1;
1034 }
1035 status = pRmApi->AllocWithHandle(pRmApi,
1036 pMemoryManager->hClient,
1037 pMemoryManager->hSubdevice,
1038 hThirdPartyP2P,
1039 NV50_THIRD_PARTY_P2P,
1040 ¶ms,
1041 sizeof(params));
1042 if (status != NV_OK)
1043 {
1044 NV_PRINTF(LEVEL_WARNING, "Error creating internal ThirdPartyP2P object: %x\n",
1045 status);
1046 pMemoryManager->hThirdPartyP2P = 0;
1047 }
1048 else
1049 {
1050 pMemoryManager->hThirdPartyP2P = hThirdPartyP2P;
1051 }
1052
1053 }
1054
1055 return NV_OK;
1056
1057 failed:
1058 _memmgrFreeInternalClientObjects(pMemoryManager);
1059
1060 return status;
1061 }
1062
1063 /*!
1064 * @brief Determine size of FB RAM which is used for RM internal allocations
1065 * and PMA.
1066 *
1067 * @param[out] pFbUsedSize FB used memory size
1068 *
1069 * @returns NV_OK
1070 */
1071 NV_STATUS
memmgrGetUsedRamSize_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager,NvU64 * pFbUsedSize)1072 memmgrGetUsedRamSize_IMPL
1073 (
1074 OBJGPU *pGpu,
1075 MemoryManager *pMemoryManager,
1076 NvU64 *pFbUsedSize
1077 )
1078 {
1079 Heap *pHeap = GPU_GET_HEAP(pGpu);
1080 NvU64 heapFreeSpace, heapTotalSpace, pmaFreeSpace;
1081
1082 //
1083 // Determine free memory in FB and substract with total FB memory.
1084 // If PMA is initialized, then use the free memory size in PMA and
1085 // heap otherwise only use heap free memory for calculation.
1086 //
1087 heapGetFree(pHeap, &heapFreeSpace);
1088 heapGetSize(pHeap, &heapTotalSpace);
1089 if (memmgrIsPmaInitialized(pMemoryManager))
1090 {
1091 pmaGetFreeMemory(&pHeap->pmaObject, &pmaFreeSpace);
1092 *pFbUsedSize = heapTotalSpace - heapFreeSpace - pmaFreeSpace;
1093 }
1094 else
1095 {
1096 *pFbUsedSize = heapTotalSpace - heapFreeSpace;
1097 }
1098
1099 //
1100 // GSP's WPR region has its own save/restore mechanism and does not need
1101 // to be accounted for in total FB size used - which is needed to find out
1102 // how much SYSMEM needs to be allocated to save all FB memory
1103 //
1104 if (IS_GSP_CLIENT(pGpu))
1105 {
1106 KernelGsp *pKernelGsp = GPU_GET_KERNEL_GSP(pGpu);
1107 NvU64 gspWprRegionSize = pKernelGsp->pWprMeta->gspFwWprEnd - pKernelGsp->pWprMeta->gspFwWprStart;
1108
1109 *pFbUsedSize = *pFbUsedSize - gspWprRegionSize;
1110 }
1111
1112 return NV_OK;
1113 }
1114
1115 NV_STATUS
memmgrAllocHwResources_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager,FB_ALLOC_INFO * pFbAllocInfo)1116 memmgrAllocHwResources_IMPL
1117 (
1118 OBJGPU *pGpu,
1119 MemoryManager *pMemoryManager,
1120 FB_ALLOC_INFO *pFbAllocInfo
1121 )
1122 {
1123 MemoryManager *pMemoryManagerLoop;
1124 FB_ALLOC_INFO *pTempInfo = NULL;
1125 NvU32 skipFlag = (pFbAllocInfo->pageFormat->flags & NVOS32_ALLOC_FLAGS_SKIP_RESOURCE_ALLOC);
1126 NV_STATUS rmStatus = NV_OK;
1127
1128 pTempInfo = portMemAllocNonPaged(sizeof(FB_ALLOC_INFO));
1129 if (pTempInfo == NULL)
1130 {
1131 NV_ASSERT(0);
1132 return NV_ERR_NO_MEMORY;
1133 }
1134
1135 // vGPU:
1136 //
1137 // Since vGPU does all real hardware management in the
1138 // host, if we are in guest OS (where IS_VIRTUAL(pGpu) is true),
1139 // we can skip the resource allocation step.
1140 //
1141
1142 if (IS_VIRTUAL(pGpu) && !vgpuIsGuestManagedHwAlloc(pGpu))
1143 {
1144 pFbAllocInfo->pageFormat->flags |= NVOS32_ALLOC_FLAGS_SKIP_RESOURCE_ALLOC;
1145 }
1146
1147 SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY)
1148 {
1149 NV_STATUS tempStatus;
1150 *pTempInfo = *pFbAllocInfo; // struct copy
1151
1152 pMemoryManagerLoop = GPU_GET_MEMORY_MANAGER(pGpu);
1153
1154 tempStatus = memmgrAllocHal_HAL(pGpu, pMemoryManagerLoop, pTempInfo);
1155 // be sure to return an intermediate error
1156 if (NV_OK == rmStatus)
1157 rmStatus = tempStatus;
1158 }
1159 SLI_LOOP_END
1160
1161 *pFbAllocInfo = *pTempInfo; // struct copy
1162 portMemFree(pTempInfo);
1163
1164 pFbAllocInfo->pageFormat->flags &= ~NVOS32_ALLOC_FLAGS_SKIP_RESOURCE_ALLOC;
1165 pFbAllocInfo->pageFormat->flags |= skipFlag;
1166
1167 return rmStatus;
1168 }
1169
1170 NV_STATUS
memmgrFreeHwResources_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager,FB_ALLOC_INFO * pFbAllocInfo)1171 memmgrFreeHwResources_IMPL
1172 (
1173 OBJGPU *pGpu,
1174 MemoryManager *pMemoryManager,
1175 FB_ALLOC_INFO *pFbAllocInfo
1176 )
1177 {
1178 MemoryManager *pMemoryManagerLoop;
1179 NV_STATUS rmStatus = NV_OK;
1180 RMTIMEOUT timeout;
1181 FB_ALLOC_INFO *pTempInfo = NULL;
1182
1183 pTempInfo = portMemAllocNonPaged(sizeof(FB_ALLOC_INFO));
1184 if (pTempInfo == NULL)
1185 {
1186 NV_ASSERT(0);
1187 return NV_ERR_NO_MEMORY;
1188 }
1189
1190 gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout, 0);
1191
1192 if (IS_VIRTUAL(pGpu) && !vgpuIsGuestManagedHwAlloc(pGpu))
1193 {
1194 pFbAllocInfo->pageFormat->flags |= NVOS32_ALLOC_FLAGS_SKIP_RESOURCE_ALLOC;
1195 }
1196
1197 SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY)
1198 {
1199 NV_STATUS tempStatus;
1200 pMemoryManagerLoop = GPU_GET_MEMORY_MANAGER(pGpu);
1201
1202 *pTempInfo = *pFbAllocInfo;
1203
1204 tempStatus = memmgrFreeHal_HAL(pGpu, pMemoryManagerLoop, pTempInfo, &timeout);
1205 // be sure to return an intermediate error
1206 if (NV_OK == rmStatus)
1207 rmStatus = tempStatus;
1208
1209 }
1210 SLI_LOOP_END
1211
1212 *pFbAllocInfo = *pTempInfo;
1213 portMemFree(pTempInfo);
1214
1215 return rmStatus;
1216 }
1217
1218 NvBool
memmgrLargePageSupported_IMPL(MemoryManager * pMemoryManager,NV_ADDRESS_SPACE addrSpace)1219 memmgrLargePageSupported_IMPL
1220 (
1221 MemoryManager *pMemoryManager,
1222 NV_ADDRESS_SPACE addrSpace
1223 )
1224 {
1225 NvBool isSupported = NV_FALSE;
1226
1227 if (addrSpace == ADDR_FBMEM || addrSpace == ADDR_VIRTUAL)
1228 {
1229 isSupported = NV_TRUE;
1230 }
1231 else if (addrSpace == ADDR_SYSMEM)
1232 {
1233 isSupported = (pMemoryManager->sysmemPageSize != RM_PAGE_SIZE);
1234 }
1235 else
1236 {
1237 NV_ASSERT(0);
1238 }
1239
1240 return isSupported;
1241 }
1242
1243 NvBool
memmgrComprSupported_IMPL(MemoryManager * pMemoryManager,NV_ADDRESS_SPACE addrSpace)1244 memmgrComprSupported_IMPL
1245 (
1246 MemoryManager *pMemoryManager,
1247 NV_ADDRESS_SPACE addrSpace
1248 )
1249 {
1250 OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager);
1251 NvBool isSupported = NV_FALSE;
1252
1253 if (GPU_GET_KERNEL_GMMU(pGpu) != NULL)
1254 {
1255 if (memmgrLargePageSupported(pMemoryManager, addrSpace) ||
1256 pMemoryManager->bSmallPageCompression)
1257 {
1258 if (addrSpace == ADDR_FBMEM || addrSpace == ADDR_VIRTUAL)
1259 {
1260 isSupported = NV_TRUE;
1261 }
1262 else if (addrSpace == ADDR_SYSMEM)
1263 {
1264 // Compression is allowed on vidmem or unified aperture (vidmem/sysmem is same w.r.t HW)
1265 isSupported = (gpuIsUnifiedMemorySpaceEnabled(pGpu) &&
1266 pMemoryManager->bSysmemCompressionSupportDef);
1267 NV_PRINTF(LEVEL_INFO, "isSupported=%s\n",
1268 isSupported ? "NV_TRUE" : "NV_FALSE");
1269 }
1270 else
1271 {
1272 NV_ASSERT(0);
1273 }
1274 }
1275 }
1276
1277 return isSupported;
1278 }
1279
1280 NV_ADDRESS_SPACE
memmgrAllocGetAddrSpace_IMPL(MemoryManager * pMemoryManager,NvU32 flags,NvU32 attr)1281 memmgrAllocGetAddrSpace_IMPL
1282 (
1283 MemoryManager *pMemoryManager,
1284 NvU32 flags,
1285 NvU32 attr
1286 )
1287 {
1288 NV_ADDRESS_SPACE addrSpace = ADDR_UNKNOWN;
1289
1290 if (flags & NVOS32_ALLOC_FLAGS_VIRTUAL)
1291 {
1292 addrSpace = ADDR_VIRTUAL;
1293 }
1294 else if (FLD_TEST_DRF(OS32, _ATTR, _LOCATION, _VIDMEM, attr))
1295 {
1296 addrSpace = ADDR_FBMEM;
1297 }
1298 else
1299 {
1300 // In case location is SYSMEM or ANY, allocate in vidmem if protected flag is set.
1301 if (flags & NVOS32_ALLOC_FLAGS_PROTECTED)
1302 {
1303 addrSpace = ADDR_FBMEM;
1304 }
1305 else
1306 {
1307 addrSpace = ADDR_SYSMEM;
1308 }
1309 }
1310
1311 return addrSpace;
1312 }
1313
1314 NvU32
memmgrGetMappableRamSizeMb_IMPL(MemoryManager * pMemoryManager)1315 memmgrGetMappableRamSizeMb_IMPL(MemoryManager *pMemoryManager)
1316 {
1317 return NvU64_LO32(pMemoryManager->Ram.mapRamSizeMb);
1318 }
1319 //
1320 // ZBC clear create/destroy routines.
1321 //
1322
1323 NV_STATUS
memmgrFillMemdescForPhysAttr_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager,PMEMORY_DESCRIPTOR pMemDesc,ADDRESS_TRANSLATION addressTranslation,NvU64 * pOffset,NvU32 * pMemAperture,NvU32 * pMemKind,NvU32 * pZCullId,NvU32 * pGpuCacheAttr,NvU32 * pGpuP2PCacheAttr,NvU64 * contigSegmentSize)1324 memmgrFillMemdescForPhysAttr_IMPL
1325 (
1326 OBJGPU *pGpu,
1327 MemoryManager *pMemoryManager,
1328 PMEMORY_DESCRIPTOR pMemDesc,
1329 ADDRESS_TRANSLATION addressTranslation,
1330 NvU64 *pOffset,
1331 NvU32 *pMemAperture,
1332 NvU32 *pMemKind,
1333 NvU32 *pZCullId,
1334 NvU32 *pGpuCacheAttr,
1335 NvU32 *pGpuP2PCacheAttr,
1336 NvU64 *contigSegmentSize
1337 )
1338 {
1339 NvU64 surfOffset = *pOffset, surfBase, surfLimit;
1340 NvU32 zcbitmap;
1341
1342 surfBase = memdescGetPhysAddr(pMemDesc, addressTranslation, 0);
1343 surfLimit = surfBase + pMemDesc->Size - 1;
1344 *pMemKind = memdescGetPteKind(pMemDesc);
1345
1346 *pOffset = memdescGetPhysAddr(pMemDesc, addressTranslation, surfOffset);
1347
1348 if (memdescGetAddressSpace(pMemDesc) == ADDR_FBMEM )
1349 *pMemAperture = NV0041_CTRL_CMD_GET_SURFACE_PHYS_ATTR_APERTURE_VIDMEM;
1350 else if (memdescGetAddressSpace(pMemDesc) == ADDR_SYSMEM)
1351 *pMemAperture = NV0041_CTRL_CMD_GET_SURFACE_PHYS_ATTR_APERTURE_SYSMEM;
1352 else if (memdescGetAddressSpace(pMemDesc) == ADDR_EGM)
1353 *pMemAperture = NV0041_CTRL_CMD_GET_SURFACE_PHYS_ATTR_APERTURE_SYSMEM;
1354 else if (memdescGetAddressSpace(pMemDesc) == ADDR_VIRTUAL )
1355 {
1356 //
1357 // XXX we could theoretically find whatever phys mem object is plugged
1358 // in at surfOffset w/in the virt object... that'd mean scanning
1359 // pMemory->DmaMappingList
1360 //
1361 return NV_ERR_NOT_SUPPORTED;
1362 }
1363 else
1364 return NV_ERR_GENERIC;
1365
1366 if (memdescGetGpuCacheAttrib(pMemDesc) == NV_MEMORY_CACHED)
1367 {
1368 *pGpuCacheAttr = NV0041_CTRL_GET_SURFACE_PHYS_ATTR_GPU_CACHED;
1369 }
1370 else if (memdescGetGpuCacheAttrib(pMemDesc) == NV_MEMORY_UNCACHED)
1371 {
1372 *pGpuCacheAttr = NV0041_CTRL_GET_SURFACE_PHYS_ATTR_GPU_UNCACHED;
1373 }
1374 else
1375 {
1376 *pGpuCacheAttr = NV0041_CTRL_GET_SURFACE_PHYS_ATTR_GPU_CACHED_UNKNOWN;
1377 }
1378
1379 if (memdescGetGpuP2PCacheAttrib(pMemDesc) == NV_MEMORY_CACHED)
1380 {
1381 *pGpuP2PCacheAttr = NV0041_CTRL_GET_SURFACE_PHYS_ATTR_GPU_CACHED;
1382 }
1383 else if (memdescGetGpuP2PCacheAttrib(pMemDesc) == NV_MEMORY_UNCACHED)
1384 {
1385 *pGpuP2PCacheAttr = NV0041_CTRL_GET_SURFACE_PHYS_ATTR_GPU_UNCACHED;
1386 }
1387 else
1388 {
1389 *pGpuP2PCacheAttr = NV0041_CTRL_GET_SURFACE_PHYS_ATTR_GPU_CACHED_UNKNOWN;
1390 }
1391
1392 zcbitmap = FB_HWRESID_ZCULL_VAL_FERMI(memdescGetHwResId(pMemDesc)); //bitmap form... need a scalar
1393 for ( *pZCullId = 0; zcbitmap; zcbitmap >>= 1, *pZCullId += 1) {;;;}
1394 *pZCullId -= 1; // side effect if there is no zcull id of setting ~0
1395
1396 *contigSegmentSize = surfLimit - (surfBase + surfOffset) + 1;
1397
1398 if ( !memdescGetContiguity(pMemDesc, addressTranslation))
1399 {
1400 // XXX overly conservative. we could scan the PTEs to find out if more pages are contig.
1401 NvU64 surfOffsetLimitSame4KBPage = (4*1024)*((surfBase + surfOffset)/(4*1024)) + (4*1024) - 1;
1402 if ( surfLimit >= surfOffsetLimitSame4KBPage )
1403 *contigSegmentSize = surfOffsetLimitSame4KBPage - (surfBase + surfOffset) + 1;
1404 }
1405
1406 return NV_OK;
1407 }
1408
1409 NvU64
memmgrDeterminePageSize_IMPL(MemoryManager * pMemoryManager,NvHandle hClient,NvU64 memSize,NvU32 memFormat,NvU32 pageFormatFlags,NvU32 * pRetAttr,NvU32 * pRetAttr2)1410 memmgrDeterminePageSize_IMPL
1411 (
1412 MemoryManager *pMemoryManager,
1413 NvHandle hClient,
1414 NvU64 memSize,
1415 NvU32 memFormat,
1416 NvU32 pageFormatFlags,
1417 NvU32 *pRetAttr,
1418 NvU32 *pRetAttr2
1419 )
1420 {
1421 OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager);
1422 KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
1423 NV_ADDRESS_SPACE addrSpace;
1424 NvBool bIsBigPageSupported;
1425 RM_ATTR_PAGE_SIZE pageSizeAttr;
1426 NvU64 pageSize = 0;
1427
1428 if (pGpu->getProperty(pGpu, PDB_PROP_GPU_TEGRA_SOC_NVDISPLAY) || (pKernelGmmu == NULL))
1429 {
1430 pageSize = RM_PAGE_SIZE;
1431 }
1432 // Sanity check the arguments.
1433 else if (pRetAttr == NULL || pRetAttr2 == NULL)
1434 {
1435 NV_ASSERT_OR_RETURN(0, 0);
1436 }
1437 else
1438 {
1439 addrSpace = memmgrAllocGetAddrSpace(pMemoryManager, pageFormatFlags, *pRetAttr);
1440
1441 //
1442 // Bug 4270864: Temp hack until sysmem supports higher order allocations.
1443 // We allow EGM to get allocated at higher page size.
1444 //
1445 if (memmgrIsLocalEgmEnabled(pMemoryManager) &&
1446 addrSpace == ADDR_SYSMEM &&
1447 FLD_TEST_DRF(OS32, _ATTR2, _FIXED_NUMA_NODE_ID, _YES, *pRetAttr2) &&
1448 //
1449 // Bug 4270868: MODS has test cases which pass FIXED_NUMA_NODE_ID,
1450 // but invalid node_id. Will remove once MODS tests get fixed.
1451 //
1452 !RMCFG_FEATURE_MODS_FEATURES)
1453 {
1454 bIsBigPageSupported = NV_TRUE;
1455 }
1456 else
1457 {
1458 bIsBigPageSupported = memmgrLargePageSupported(pMemoryManager, addrSpace);
1459 }
1460 pageSizeAttr = dmaNvos32ToPageSizeAttr(*pRetAttr, *pRetAttr2);
1461
1462 //
1463 // Precedence in page size selection
1464 // 1. CACHE_ONLY mode -> SMALL
1465 // 2. !BigPageSupport (Sysmem && GpuSmmuOff ) -> SMALL
1466 // 3. Client page size override -> Use override
1467 // 4. HugePageSupported && size >= HugePageSize -> HUGE
1468 // 5. Block-linear || size >= minSizeForBigPage || hClient || GpuSmmuOn -> BIG
1469 // 6. none of the above -> SMALL
1470 //
1471 // On Tegra, we don't have a carveout/FB in production. So, we're
1472 // not guaranteed to get BIG page sized or contiguous allocations
1473 // from OS. But we need BIG page sized allocations for efficient Big GPU
1474 // operation. We use the SMMU unit within the Tegra Memory Contoller (MC),
1475 // to construct BIG pages from the 4KB small page allocations from OS.
1476 // SMMU will linearize the discontiguous 4KB allocations into what will
1477 // appear to the GPU as a large contiguous physical allocation.
1478 //
1479 // RM will eventually decide whether a SYSMEM allocation needs BIG page
1480 // via GPU SMMU mapping. Right now, we give an option for RM clients to
1481 // force it, via the SMMU_ON_GPU attribute.
1482 //
1483 if (gpuIsCacheOnlyModeEnabled(pGpu))
1484 {
1485 pageSize = RM_PAGE_SIZE;
1486 }
1487 else if (!bIsBigPageSupported)
1488 {
1489 if (RM_ATTR_PAGE_SIZE_BIG == pageSizeAttr ||
1490 RM_ATTR_PAGE_SIZE_HUGE == pageSizeAttr ||
1491 RM_ATTR_PAGE_SIZE_512MB == pageSizeAttr)
1492 {
1493 NV_PRINTF(LEVEL_ERROR,
1494 "Big/Huge/512MB page size not supported in sysmem.\n");
1495
1496 NV_ASSERT_OR_RETURN(0, 0);
1497 }
1498 else
1499 {
1500 pageSize = RM_PAGE_SIZE;
1501 }
1502 }
1503 else
1504 {
1505 switch (pageSizeAttr)
1506 {
1507 case RM_ATTR_PAGE_SIZE_INVALID:
1508 NV_PRINTF(LEVEL_ERROR, "invalid page size attr\n");
1509 NV_ASSERT_OR_RETURN(0, 0);
1510
1511 case RM_ATTR_PAGE_SIZE_DEFAULT:
1512 {
1513 NvBool bUseDefaultHugePagesize = NV_TRUE;
1514 // WDDMV2 Windows it expect default page size to be 4K /64KB /128KB
1515 if (bUseDefaultHugePagesize &&
1516 kgmmuIsHugePageSupported(pKernelGmmu) &&
1517 (memSize >= RM_PAGE_SIZE_HUGE) && (addrSpace != ADDR_SYSMEM ||
1518 pMemoryManager->sysmemPageSize == RM_PAGE_SIZE_HUGE))
1519 {
1520 pageSize = RM_PAGE_SIZE_HUGE;
1521 break;
1522 }
1523 else if ((memFormat != NVOS32_ATTR_FORMAT_PITCH) ||
1524 (memSize >= kgmmuGetMinBigPageSize(pKernelGmmu)) || hClient ||
1525 FLD_TEST_DRF(OS32, _ATTR2, _SMMU_ON_GPU, _ENABLE, *pRetAttr2))
1526 {
1527 pageSize = kgmmuGetMaxBigPageSize_HAL(pKernelGmmu);
1528 break;
1529 }
1530
1531 pageSize = RM_PAGE_SIZE;
1532 break;
1533 }
1534
1535 case RM_ATTR_PAGE_SIZE_4KB:
1536 pageSize = RM_PAGE_SIZE;
1537 break;
1538
1539 case RM_ATTR_PAGE_SIZE_BIG:
1540 pageSize = kgmmuGetMaxBigPageSize_HAL(pKernelGmmu);
1541 break;
1542
1543 case RM_ATTR_PAGE_SIZE_HUGE:
1544 if (kgmmuIsHugePageSupported(pKernelGmmu))
1545 {
1546 pageSize = RM_PAGE_SIZE_HUGE;
1547 }
1548 else
1549 {
1550 NV_ASSERT_OR_RETURN(0, 0);
1551 }
1552 break;
1553
1554 case RM_ATTR_PAGE_SIZE_512MB:
1555 if (kgmmuIsPageSize512mbSupported(pKernelGmmu))
1556 {
1557 pageSize = RM_PAGE_SIZE_512M;
1558 }
1559 else
1560 {
1561 NV_ASSERT_OR_RETURN(0, 0);
1562 }
1563 break;
1564
1565 default:
1566 NV_ASSERT(0);
1567 }
1568 }
1569 }
1570
1571 switch (pageSize)
1572 {
1573 case RM_PAGE_SIZE:
1574 *pRetAttr = FLD_SET_DRF(OS32, _ATTR, _PAGE_SIZE, _4KB, *pRetAttr);
1575 break;
1576
1577 case RM_PAGE_SIZE_64K:
1578 case RM_PAGE_SIZE_128K:
1579 *pRetAttr = FLD_SET_DRF(OS32, _ATTR, _PAGE_SIZE, _BIG, *pRetAttr);
1580 break;
1581
1582 case RM_PAGE_SIZE_HUGE:
1583 *pRetAttr = FLD_SET_DRF(OS32, _ATTR, _PAGE_SIZE, _HUGE, *pRetAttr);
1584 *pRetAttr2 = FLD_SET_DRF(OS32, _ATTR2, _PAGE_SIZE_HUGE, _2MB, *pRetAttr2);
1585 break;
1586
1587 case RM_PAGE_SIZE_512M:
1588 *pRetAttr = FLD_SET_DRF(OS32, _ATTR, _PAGE_SIZE, _HUGE, *pRetAttr);
1589 *pRetAttr2 = FLD_SET_DRF(OS32, _ATTR2, _PAGE_SIZE_HUGE, _512MB, *pRetAttr2);
1590 break;
1591
1592 default:
1593 NV_ASSERT(0);
1594 }
1595
1596 return pageSize;
1597 }
1598
1599 /*!
1600 * Identify if platform's current configuration supports PMA
1601 */
1602 NV_STATUS
memmgrSetPlatformPmaSupport_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager)1603 memmgrSetPlatformPmaSupport_IMPL
1604 (
1605 OBJGPU *pGpu,
1606 MemoryManager *pMemoryManager
1607 )
1608 {
1609 //
1610 // KMD in WDDM mode will not support pma managed client page tables as
1611 // in both cases client / OS manges it.
1612 //
1613 if (RMCFG_FEATURE_PLATFORM_WINDOWS && !pGpu->getProperty(pGpu, PDB_PROP_GPU_IN_TCC_MODE))
1614 {
1615 memmgrSetClientPageTablesPmaManaged(pMemoryManager, NV_FALSE);
1616 }
1617
1618 //
1619 // FB management should use PMA on Unix/Linux/Mods/Windows
1620 //
1621 if (RMCFG_FEATURE_PLATFORM_UNIX
1622 || RMCFG_FEATURE_PLATFORM_MODS
1623 || RMCFG_FEATURE_PLATFORM_WINDOWS)
1624 {
1625 pMemoryManager->bPmaSupportedOnPlatform = NV_TRUE;
1626 }
1627
1628 //
1629 // PMA memory management is not currently supported in non SRIOV VGPU environment.
1630 // The RPC mechanism needs to be expanded to distinguish allocation types.
1631 // Bug #1735412
1632 //
1633 // TODO : Remove these constraints.
1634 if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu))
1635 {
1636 pMemoryManager->bPmaSupportedOnPlatform = NV_FALSE;
1637 }
1638
1639 if (pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_VIRTUALIZATION_MODE_HOST_VGPU))
1640 {
1641 if (pMemoryManager->bVgpuPmaSupport)
1642 {
1643 memmgrSetClientPageTablesPmaManaged(pMemoryManager, NV_FALSE);
1644 }
1645 else
1646 {
1647 pMemoryManager->bPmaSupportedOnPlatform = NV_FALSE;
1648 }
1649 }
1650 return (NV_OK);
1651 }
1652
1653 /*!
1654 * Allocate console region in CPU-RM based on region table passed from Physical RM
1655 */
1656 NV_STATUS
memmgrAllocateConsoleRegion_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager,FB_REGION_DESCRIPTOR * pConsoleFbRegion)1657 memmgrAllocateConsoleRegion_IMPL
1658 (
1659 OBJGPU *pGpu,
1660 MemoryManager *pMemoryManager,
1661 FB_REGION_DESCRIPTOR *pConsoleFbRegion
1662 )
1663 {
1664
1665 NV_STATUS status = NV_OK;
1666 NvU32 consoleRegionId = 0x0;
1667 NvU64 regionSize;
1668
1669 if (pMemoryManager->Ram.ReservedConsoleDispMemSize > 0)
1670 {
1671 pConsoleFbRegion->base = pMemoryManager->Ram.fbRegion[consoleRegionId].base;
1672 pConsoleFbRegion->limit = pMemoryManager->Ram.fbRegion[consoleRegionId].limit;
1673
1674 regionSize = pConsoleFbRegion->limit - pConsoleFbRegion->base + 1;
1675
1676 // Once the console is reserved, we don't expect to reserve it again
1677 NV_ASSERT_OR_RETURN(pMemoryManager->Ram.pReservedConsoleMemDesc == NULL,
1678 NV_ERR_STATE_IN_USE);
1679
1680 status = memdescCreate(&pMemoryManager->Ram.pReservedConsoleMemDesc, pGpu,
1681 regionSize, RM_PAGE_SIZE_64K, NV_TRUE, ADDR_FBMEM,
1682 NV_MEMORY_UNCACHED,
1683 MEMDESC_FLAGS_SKIP_RESOURCE_COMPUTE);
1684 if (status != NV_OK)
1685 {
1686 pConsoleFbRegion->base = pConsoleFbRegion->limit = 0;
1687 return status;
1688 }
1689
1690 memdescDescribe(pMemoryManager->Ram.pReservedConsoleMemDesc, ADDR_FBMEM,
1691 pConsoleFbRegion->base, regionSize);
1692 memdescSetPageSize(pMemoryManager->Ram.pReservedConsoleMemDesc,
1693 AT_GPU, RM_PAGE_SIZE);
1694
1695
1696 NV_PRINTF(LEVEL_INFO, "Allocating console region of size: %llx, at base : %llx \n ",
1697 regionSize, pConsoleFbRegion->base);
1698 }
1699
1700 return status;
1701 }
1702
1703 void
memmgrReleaseConsoleRegion_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager)1704 memmgrReleaseConsoleRegion_IMPL
1705 (
1706 OBJGPU *pGpu,
1707 MemoryManager *pMemoryManager
1708 )
1709 {
1710 memdescDestroy(pMemoryManager->Ram.pReservedConsoleMemDesc);
1711 pMemoryManager->Ram.pReservedConsoleMemDesc = NULL;
1712 }
1713
1714 PMEMORY_DESCRIPTOR
memmgrGetReservedConsoleMemDesc_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager)1715 memmgrGetReservedConsoleMemDesc_IMPL
1716 (
1717 OBJGPU *pGpu,
1718 MemoryManager *pMemoryManager
1719 )
1720 {
1721 return pMemoryManager->Ram.pReservedConsoleMemDesc;
1722 }
1723
1724 /*!
1725 * Reserve FB for allocating BAR2 Page Dirs and Page Tables
1726 */
1727 void
memmgrReserveBar2BackingStore_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager,NvU64 * pAddr)1728 memmgrReserveBar2BackingStore_IMPL
1729 (
1730 OBJGPU *pGpu,
1731 MemoryManager *pMemoryManager,
1732 NvU64 *pAddr
1733 )
1734 {
1735 NvU64 tmpAddr = *pAddr;
1736 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
1737
1738 NvU32 pageDirsSize = kbusGetSizeOfBar2PageDirs_HAL(pGpu, pKernelBus);
1739 NvU32 pageTblsSize = kbusGetSizeOfBar2PageTables_HAL(pGpu, pKernelBus);
1740
1741 // Reserve space for BAR2 Page Dirs
1742 if (pKernelBus->PDEBAR2Aperture == ADDR_FBMEM)
1743 {
1744 tmpAddr = NV_ROUNDUP(tmpAddr, RM_PAGE_SIZE);
1745 pKernelBus->bar2[GPU_GFID_PF].pdeBase = tmpAddr;
1746 tmpAddr += pageDirsSize;
1747 }
1748
1749 // Reserve space for BAR2 Page Tables
1750 if (pKernelBus->PTEBAR2Aperture == ADDR_FBMEM)
1751 {
1752 tmpAddr = NV_ROUNDUP(tmpAddr, RM_PAGE_SIZE);
1753 pKernelBus->bar2[GPU_GFID_PF].pteBase = tmpAddr;
1754 tmpAddr += pageTblsSize;
1755 }
1756
1757 NV_PRINTF(LEVEL_INFO, "Reserve space for bar2 Page dirs offset = 0x%llx size = 0x%x\n",
1758 pKernelBus->bar2[GPU_GFID_PF].pdeBase, pageDirsSize);
1759
1760 NV_PRINTF(LEVEL_INFO, "Reserve space for bar2 Page tables offset = 0x%llx size = 0x%x\n",
1761 pKernelBus->bar2[GPU_GFID_PF].pteBase, pageTblsSize);
1762
1763 *pAddr = NV_ROUNDUP(tmpAddr, RM_PAGE_SIZE);
1764 }
1765
1766 /*!
1767 * Calculate the Vista reserved memory requirement per FB region for mixed type/density
1768 */
1769 void
memmgrCalcReservedFbSpace_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager)1770 memmgrCalcReservedFbSpace_IMPL
1771 (
1772 OBJGPU *pGpu,
1773 MemoryManager *pMemoryManager
1774 )
1775 {
1776 NvU64 rsvdFastSize = 0;
1777 NvU64 rsvdSlowSize = 0;
1778 NvU64 rsvdISOSize = 0;
1779 NvU32 i;
1780 NvU32 idxISORegion = 0;
1781 NvU32 idxFastRegion = 0;
1782 NvU32 idxSlowRegion = 0;
1783 NvBool bAllocProtected = NV_FALSE;
1784
1785 bAllocProtected = gpuIsCCFeatureEnabled(pGpu);
1786
1787 //
1788 // This is a hack solely for Vista (on Vista the OS controls the majority of heap).
1789 // Linux and Mac don't have reserved memory and doesn't use this function.
1790 //
1791 // On Vista, Fermi's instance memory is not reserved by RM anymore.
1792 // KMD has to reserve enough instance memory for driver private data.
1793 // This function does the calculation of needed space. See bug 642233.
1794 // While it returns the result in Mb, the calculation is made with byte
1795 //
1796
1797 // If we have no usable memory then we can't reserve any.
1798 if (!pMemoryManager->Ram.fbUsableMemSize)
1799 return;
1800
1801 memmgrCalcReservedFbSpaceHal_HAL(pGpu, pMemoryManager, &rsvdFastSize, &rsvdSlowSize, &rsvdISOSize);
1802
1803 // If we have regions defined, fill in the per-segment reserved memory requirement
1804 if (pMemoryManager->Ram.numFBRegions > 0)
1805 {
1806 FB_REGION_DESCRIPTOR *pFbRegion = NULL;
1807 NvU64 regionSize = 0;
1808
1809 //
1810 // Find the fastest and ISO regions. This search makes a soft assumption that
1811 // region #0 is not reserved, fastest, and supports ISO -- that would be stupid
1812 //
1813 for (i = 0; i < pMemoryManager->Ram.numFBRegions; i++)
1814 {
1815 pFbRegion = &pMemoryManager->Ram.fbRegion[i];
1816 regionSize = (pFbRegion->limit - pFbRegion->base +1);
1817
1818 // Check only non-reserved regions (which are typically unpopulated blackholes in address space)
1819 if ((!pFbRegion->bRsvdRegion) &&
1820 (bAllocProtected || !pFbRegion->bProtected) &&
1821 (regionSize >= (rsvdFastSize + rsvdSlowSize + rsvdISOSize)))
1822 {
1823 // Find the fastest region
1824 if ((pFbRegion->performance > pMemoryManager->Ram.fbRegion[idxFastRegion].performance)
1825 || pMemoryManager->Ram.fbRegion[idxFastRegion].bRsvdRegion
1826 || (!bAllocProtected && pMemoryManager->Ram.fbRegion[idxFastRegion].bProtected))
1827 {
1828 idxFastRegion = i;
1829 }
1830 // Find the slowest region
1831 if ((pFbRegion->performance < pMemoryManager->Ram.fbRegion[idxSlowRegion].performance)
1832 || pMemoryManager->Ram.fbRegion[idxSlowRegion].bRsvdRegion
1833 || (!bAllocProtected && pMemoryManager->Ram.fbRegion[idxSlowRegion].bProtected))
1834 {
1835 idxSlowRegion = i;
1836 }
1837 // Find the fastest ISO region
1838 if (pFbRegion->bSupportISO)
1839 {
1840 if ((!pMemoryManager->Ram.fbRegion[idxISORegion].bSupportISO) ||
1841 (pFbRegion->performance > pMemoryManager->Ram.fbRegion[idxISORegion].performance)
1842 || (!bAllocProtected && pMemoryManager->Ram.fbRegion[idxISORegion].bProtected))
1843 {
1844 idxISORegion = i;
1845 }
1846 }
1847 }
1848 }
1849
1850 // There should *ALWAYS* be a region that supports ISO, even if we have no display
1851 NV_ASSERT(pMemoryManager->Ram.fbRegion[idxISORegion].bSupportISO);
1852
1853 // There should *ALWAYS* be a non-reserved region that is faster than reserved and supports ISO
1854 NV_ASSERT(!pMemoryManager->Ram.fbRegion[idxISORegion].bRsvdRegion);
1855 NV_ASSERT(!pMemoryManager->Ram.fbRegion[idxFastRegion].bRsvdRegion);
1856 NV_ASSERT(!pMemoryManager->Ram.fbRegion[idxSlowRegion].bRsvdRegion);
1857
1858 //
1859 // Whenever Hopper CC is enabled, it is mandatory to put allocations
1860 // like page tables, CBC and fault buffers in CPR region. Cannot put
1861 // reserved memory in protected region in non CC cases
1862 //
1863 if (!bAllocProtected)
1864 {
1865 NV_ASSERT(!pMemoryManager->Ram.fbRegion[idxISORegion].bProtected);
1866 NV_ASSERT(!pMemoryManager->Ram.fbRegion[idxFastRegion].bProtected);
1867 NV_ASSERT(!pMemoryManager->Ram.fbRegion[idxSlowRegion].bProtected);
1868 }
1869
1870 //
1871 // Vista expects to be able to VidHeapControl allocate a cursor in ISO
1872 //
1873 // For mixed density reserved memory should be split between "fast" and
1874 // "slow" memory. Fast memory should also support ISO. The policy to
1875 // prefer "slow" vs "fast" memory is platform dependent.
1876 //
1877 pMemoryManager->Ram.fbRegion[idxISORegion].rsvdSize += rsvdISOSize;
1878 pMemoryManager->Ram.fbRegion[idxSlowRegion].rsvdSize += rsvdSlowSize;
1879 pMemoryManager->Ram.fbRegion[idxFastRegion].rsvdSize += rsvdFastSize;
1880 }
1881 }
1882
1883 /*!
1884 * Init channel size
1885 *
1886 * @param[in] pChannel OBJCHANNEL pointer
1887 * @param[in] numCopyBlocks Number of copies that should fit in the push buffer
1888 *
1889 * @returns NV_STATUS
1890 */
1891 void
memmgrMemUtilsSetupChannelBufferSizes_IMPL(MemoryManager * pMemoryManager,OBJCHANNEL * pChannel,NvU32 numCopyBlocks)1892 memmgrMemUtilsSetupChannelBufferSizes_IMPL
1893 (
1894 MemoryManager *pMemoryManager,
1895 OBJCHANNEL *pChannel,
1896 NvU32 numCopyBlocks
1897 )
1898 {
1899 // set channel specific sizes
1900 pChannel->channelPbSize = numCopyBlocks * MEMUTILS_SIZE_PER_BLOCK_INBYTES;
1901 pChannel->channelNotifierSize = MEMUTILS_CHANNEL_NOTIFIER_SIZE;
1902 pChannel->channelNumGpFifioEntries = MEMUTILS_NUM_GPFIFIO_ENTRIES;
1903 pChannel->methodSizePerBlock = MEMUTILS_SIZE_PER_BLOCK_INBYTES;
1904 pChannel->channelSize = pChannel->channelPbSize + MEMUTILS_CHANNEL_GPFIFO_SIZE + MEMUTILS_CHANNEL_SEMAPHORE_SIZE;
1905 pChannel->semaOffset = pChannel->channelPbSize + MEMUTILS_CHANNEL_GPFIFO_SIZE;
1906 pChannel->finishPayloadOffset = pChannel->semaOffset + 4;
1907 }
1908
memmgrFree_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager,Heap * pHeap,NvHandle hClient,NvHandle hDevice,NvHandle hVASpace,NvU32 owner,MEMORY_DESCRIPTOR * pMemDesc)1909 NV_STATUS memmgrFree_IMPL
1910 (
1911 OBJGPU *pGpu,
1912 MemoryManager *pMemoryManager,
1913 Heap *pHeap,
1914 NvHandle hClient,
1915 NvHandle hDevice,
1916 NvHandle hVASpace,
1917 NvU32 owner,
1918 MEMORY_DESCRIPTOR *pMemDesc
1919 )
1920 {
1921 NvU64 offsetAlign;
1922 NV_STATUS status;
1923 NvU32 pmaFreeFlag = 0;
1924
1925 // IRQL TEST: must be running at equivalent of passive-level
1926 IRQL_ASSERT_AND_RETURN(!osIsRaisedIRQL());
1927
1928 if (pMemDesc == NULL)
1929 return NV_ERR_INVALID_ARGUMENT;
1930
1931 offsetAlign = memdescGetPhysAddr(pMemDesc, AT_GPU, 0);
1932
1933 if (owner == NVOS32_BLOCK_TYPE_FREE)
1934 return NV_ERR_INVALID_ARGUMENT;
1935
1936 // Virtual heap allocs are tagged vitual and always own the memdesc
1937 if (memdescGetAddressSpace(pMemDesc) == ADDR_VIRTUAL)
1938 {
1939 OBJVASPACE *pVAS = NULL;
1940 RsClient *pClient;
1941
1942 status = serverGetClientUnderLock(&g_resServ, hClient, &pClient);
1943 if (status != NV_OK)
1944 return status;
1945
1946 status = vaspaceGetByHandleOrDeviceDefault(pClient, hDevice, hVASpace, &pVAS);
1947 if (status != NV_OK)
1948 return status;
1949
1950 status = vaspaceFree(pVAS, offsetAlign);
1951 memdescDestroy(pMemDesc);
1952 return status;
1953 }
1954
1955 // Free up the memory allocated by PMA.
1956 if (pMemDesc->pPmaAllocInfo)
1957 {
1958 FB_ALLOC_INFO *pFbAllocInfo = NULL;
1959 FB_ALLOC_PAGE_FORMAT *pFbAllocPageFormat = NULL;
1960 OBJGPU *pMemdescOwnerGpu = NULL;
1961
1962 //
1963 // A memdesc can be duped under a peer device. In that case, before
1964 // freeing FB make sure the GPU which owns the memdesc is available.
1965 // Otherwise, just assert, destroy the memdesc and return NV_OK to
1966 // make sure rest of the clean up happens correctly as we are on
1967 // destroy path.
1968 // Note this is just a WAR till ressrv bring in cleanup of dup objects
1969 // on GPU tear down.
1970 // RS-TODO: Nuke this check once the cleanup is implemented.
1971 //
1972 if (pGpu != pMemDesc->pGpu)
1973 {
1974 if (!gpumgrIsGpuPointerValid(pMemDesc->pGpu))
1975 {
1976 //
1977 // This should never happen. GPU tear down should always clear
1978 // the duped memory list after resource server implements it.
1979 // For now just assert!
1980 //
1981 NV_ASSERT(0);
1982 memdescDestroy(pMemDesc);
1983 goto pma_free_exit;
1984 }
1985 }
1986
1987 pMemdescOwnerGpu = pMemDesc->pGpu;
1988
1989 //
1990 // Similar to the above WAR, if portMem alocations fail for any reason,
1991 // just assert and return NV_OK to ensure that the rest of the clean up
1992 // happens correctly.
1993 //
1994 pFbAllocInfo = portMemAllocNonPaged(sizeof(FB_ALLOC_INFO));
1995 if (pFbAllocInfo == NULL)
1996 {
1997 NV_ASSERT(0);
1998 goto pma_free_exit;
1999 }
2000
2001 pFbAllocPageFormat = portMemAllocNonPaged(sizeof(FB_ALLOC_PAGE_FORMAT));
2002 if (pFbAllocPageFormat == NULL) {
2003 NV_ASSERT(0);
2004 goto pma_free_exit;
2005 }
2006
2007 portMemSet(pFbAllocInfo, 0, sizeof(FB_ALLOC_INFO));
2008 portMemSet(pFbAllocPageFormat, 0, sizeof(FB_ALLOC_PAGE_FORMAT));
2009 pFbAllocInfo->hClient = hClient;
2010 pFbAllocInfo->hDevice = hDevice;
2011 pFbAllocInfo->pageFormat = pFbAllocPageFormat;
2012
2013 //
2014 // Do not release any HW resources associated with this allocation
2015 // until the last reference to the allocation is freed. Passing
2016 // hwresid = 0 and format = pitch to memmgrFreeHwResources will ensure
2017 // that no comptags/zcull/zbc resources are freed.
2018 //
2019 if (pMemDesc->RefCount == 1)
2020 {
2021 pFbAllocInfo->hwResId = memdescGetHwResId(pMemDesc);
2022 pFbAllocInfo->format = memdescGetPteKind(pMemDesc);
2023 }
2024 else
2025 {
2026 pFbAllocInfo->hwResId = 0;
2027 pFbAllocInfo->format = 0;
2028 }
2029 pFbAllocInfo->offset = offsetAlign;
2030 pFbAllocInfo->size = pMemDesc->Size;
2031
2032 // Free any HW resources allocated.
2033 memmgrFreeHwResources(pMemdescOwnerGpu,
2034 GPU_GET_MEMORY_MANAGER(pMemdescOwnerGpu), pFbAllocInfo);
2035
2036 if (pMemDesc->pPmaAllocInfo != NULL)
2037 {
2038 // Disabling scrub on free for non compressible surfaces
2039 if (RMCFG_FEATURE_PLATFORM_MODS &&
2040 !memmgrIsKind_HAL(GPU_GET_MEMORY_MANAGER(pMemdescOwnerGpu),
2041 FB_IS_KIND_COMPRESSIBLE,
2042 memdescGetPteKind(pMemDesc)))
2043 {
2044 pmaFreeFlag = PMA_FREE_SKIP_SCRUB;
2045 }
2046
2047 vidmemPmaFree(pMemdescOwnerGpu, pHeap, pMemDesc->pPmaAllocInfo, pmaFreeFlag);
2048 NV_PRINTF(LEVEL_INFO, "Freeing PMA allocation\n");
2049 }
2050
2051 pma_free_exit:
2052 portMemFree(pFbAllocInfo);
2053 portMemFree(pFbAllocPageFormat);
2054 memdescDestroy(pMemDesc);
2055
2056 return NV_OK;
2057 }
2058
2059 return heapFree(pGpu, pHeap, hClient, hDevice, owner, pMemDesc);
2060 }
2061
2062 NV_STATUS
memmgrSetPartitionableMem_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager)2063 memmgrSetPartitionableMem_IMPL
2064 (
2065 OBJGPU *pGpu,
2066 MemoryManager *pMemoryManager
2067 )
2068 {
2069 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
2070 NV2080_CTRL_INTERNAL_MEMSYS_SET_PARTITIONABLE_MEM_PARAMS params = {0};
2071 Heap *pHeap = GPU_GET_HEAP(pGpu);
2072 NvU64 bottomRsvdSize = 0;
2073 NvU64 topRsvdSize = 0;
2074 NvU32 bottomRegionIdx = 0xFFFF;
2075 NvU32 topRegionIdx = 0xFFFF;
2076 NvU32 i;
2077 NvU64 size;
2078 NvU64 base;
2079 NvU64 offset;
2080 NvU64 freeMem;
2081
2082 //
2083 // Find out the first and the last region for which internal heap or
2084 // bRsvdRegion is true. In Ampere we should never have more than two
2085 // discontigous RM reserved region
2086 // To-Do - Bug 2301972 - Make sure that reserved memory is aligned to VMMU
2087 // segments
2088 //
2089 for (i = 0; i < pMemoryManager->Ram.numFBRegions; i++)
2090 {
2091 if (pMemoryManager->Ram.fbRegion[i].bInternalHeap ||
2092 pMemoryManager->Ram.fbRegion[i].bRsvdRegion)
2093 {
2094 NvU64 rsvdSize = (pMemoryManager->Ram.fbRegion[i].limit -
2095 pMemoryManager->Ram.fbRegion[i].base + 1);
2096
2097 // Check if this is bottom reserved region
2098 if (pMemoryManager->Ram.fbRegion[i].base == 0)
2099 {
2100 bottomRegionIdx = i;
2101 bottomRsvdSize += rsvdSize;
2102 }
2103 else if (i > 0 && (pMemoryManager->Ram.fbRegion[i-1].bInternalHeap ||
2104 pMemoryManager->Ram.fbRegion[i-1].bRsvdRegion) &&
2105 (pMemoryManager->Ram.fbRegion[i].base == pMemoryManager->Ram.fbRegion[i - 1].limit + 1))
2106 {
2107 // See if this is the contigous region with previous discovery
2108 if (bottomRegionIdx == (i - 1))
2109 {
2110 // Contigous bottom region
2111 bottomRsvdSize += rsvdSize;
2112 }
2113 else
2114 {
2115 // Contigous top region
2116 topRsvdSize += rsvdSize;
2117 }
2118 }
2119 else
2120 {
2121 //
2122 // Make sure we don't have discontigous reserved regions as
2123 // they are not supported by HW also and we need to support
2124 // these by using blacklisting mechanism.
2125 //
2126 if (topRegionIdx != 0xFFFF)
2127 {
2128 NV_PRINTF(LEVEL_ERROR,
2129 "More than two discontigous rsvd regions found. "
2130 "Rsvd region base - 0x%llx, Rsvd region Size - 0x%llx\n",
2131 pMemoryManager->Ram.fbRegion[i].base, rsvdSize);
2132 NV_ASSERT(0);
2133 return NV_ERR_INVALID_STATE;
2134 }
2135
2136 topRegionIdx = i;
2137 topRsvdSize += rsvdSize;
2138 }
2139 }
2140 }
2141
2142 //
2143 // Sanity check against the biggest available memory chunk. Pick the smallest
2144 // of biggest available memory chunk or calculated total - reserved memory as
2145 // in vGPU we are still using OBJHEAP and there are some allocations which
2146 // happens at the top of the heap before we program this register
2147 //
2148 if (!memmgrIsPmaInitialized(pMemoryManager))
2149 {
2150 NvU64 bytesTotal;
2151 const NvU64 vgpuHeapWarSize = 256 *1024 * 1024;
2152 NV_ASSERT_OK_OR_RETURN(heapInfo(pHeap, &freeMem, &bytesTotal, &base,
2153 &offset, &size));
2154
2155 //
2156 // offset is the starting address of biggest empty block whose size is
2157 // returned and we care about the base of largest empty block
2158 //
2159 base = offset;
2160
2161 //
2162 // WAR - Bug-2383259 - TilL PMA is not enabled in vGPU-Host
2163 // we need to delay reserve some memory at the top to full fill lazy
2164 // allocations like FECS and GPCCS uCode. Leave 256MB at the top for
2165 // such lazy allocations
2166 //
2167 if (size > vgpuHeapWarSize)
2168 {
2169 size -= vgpuHeapWarSize;
2170 }
2171 }
2172 else
2173 {
2174 PMA_REGION_DESCRIPTOR *pFirstPmaRegionDesc = NULL;
2175 NvU32 numPmaRegions;
2176 NvU32 pmaConfig = PMA_QUERY_NUMA_ONLINED;
2177
2178 NV_ASSERT_OK_OR_RETURN(pmaGetRegionInfo(&pHeap->pmaObject,
2179 &numPmaRegions, &pFirstPmaRegionDesc));
2180
2181 base = pFirstPmaRegionDesc->base;
2182 pmaGetFreeMemory(&pHeap->pmaObject, &freeMem);
2183 pmaGetTotalMemory(&pHeap->pmaObject, &size);
2184
2185 NV_ASSERT_OK(pmaQueryConfigs(&pHeap->pmaObject, &pmaConfig));
2186
2187 //
2188 // MIG won't be used alongside APM and hence the check below is of no use
2189 // Even if we enable the check for APM the check will fail given that after
2190 // enabling "scrub on free" using virtual CE writes, memory gets consumed by
2191 // page tables backing the scrubber channel virtual mappings and hence the
2192 // calculation below no longer holds good
2193 // In case of HCC, structures like PB, GPFIFO and USERD for scrubber and golden
2194 // channels are required to be in CPR vidmem. This changes the calculation below
2195 // We can ignore this for the non-MIG case.
2196 //
2197 // When FB memory is onlined as NUMA node, kernel can directly alloc FB memory
2198 // and hence free memory can not be expected to be same as total memory.
2199 //
2200 if ((!gpuIsCCorApmFeatureEnabled(pGpu) || IS_MIG_ENABLED(pGpu)) &&
2201 !(pmaConfig & PMA_QUERY_NUMA_ONLINED))
2202 {
2203 NvU64 maxUsedPmaSize = 2 * RM_PAGE_SIZE_128K;
2204 //
2205 // PMA should be completely free at this point, otherwise we risk
2206 // not setting the right partitionable range (pmaGetLargestFree's
2207 // offset argument is not implemented as of this writing, so we
2208 // only get the base address of the region that contains it). There
2209 // is a known allocation from the top-level scrubber/CeUtils channel that
2210 // is expected to be no larger than 128K. Issue a warning for any
2211 // other uses.
2212 //
2213 if ((size > maxUsedPmaSize) &&
2214 (freeMem < (size - maxUsedPmaSize)))
2215 {
2216 NV_PRINTF(LEVEL_ERROR,
2217 "Assumption that PMA is empty (after accounting for the top-level scrubber and CeUtils) is not met!\n");
2218 NV_PRINTF(LEVEL_ERROR,
2219 " free space = 0x%llx bytes, total space = 0x%llx bytes\n",
2220 freeMem, size);
2221 NV_ASSERT_OR_RETURN(freeMem >= (size - maxUsedPmaSize),
2222 NV_ERR_INVALID_STATE);
2223 }
2224 }
2225 }
2226
2227 if (size == 0)
2228 {
2229 NV_PRINTF(LEVEL_ERROR,
2230 "No partitionable memory. MIG memory partitioning can't be enabled.\n");
2231 return NV_OK;
2232 }
2233
2234 if (base != bottomRsvdSize)
2235 {
2236 NV_PRINTF(LEVEL_ERROR,
2237 "Partitionable memory start - 0x%llx not aligned with RM reserved "
2238 "region base-end - 0x%llx\n", base, bottomRsvdSize);
2239 return NV_ERR_INVALID_STATE;
2240 }
2241
2242 params.partitionableMemSize = size;
2243 params.bottomRsvdSize = bottomRsvdSize;
2244 params.topRsvdSize = topRsvdSize;
2245
2246 // Call physical MemorySystem to align and program the partitionable range
2247 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
2248 pRmApi->Control(pRmApi,
2249 pGpu->hInternalClient,
2250 pGpu->hInternalSubdevice,
2251 NV2080_CTRL_CMD_INTERNAL_MEMSYS_SET_PARTITIONABLE_MEM,
2252 ¶ms,
2253 sizeof(params)));
2254
2255 pMemoryManager->MIGMemoryPartitioningInfo.partitionableMemoryRange =
2256 rangeMake(params.partitionableStartAddr, params.partitionableEndAddr);
2257
2258 //
2259 // Make sure the created range is a valid range.
2260 // rangeIsEmpty checks lo > hi, which should be good enough to catch
2261 // inverted range case.
2262 //
2263 NV_ASSERT_OR_RETURN(!rangeIsEmpty(pMemoryManager->MIGMemoryPartitioningInfo.partitionableMemoryRange),
2264 NV_ERR_INVALID_STATE);
2265
2266 if (!KBUS_CPU_VISIBLE_BAR12_DISABLED(pGpu))
2267 {
2268 NV_ASSERT_OK_OR_RETURN(memmgrSetMIGPartitionableBAR1Range(pGpu, pMemoryManager));
2269 }
2270
2271 if (IS_GSP_CLIENT(pGpu))
2272 {
2273 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
2274
2275 //
2276 // The Physical RM initializes its AMAPLIB context via
2277 // memsysSetPartitionableMem_HAL(). The GSP Client RM has a separate
2278 // AMAPLIB context that must also be initialized.
2279 //
2280 kmemsysReadMIGMemoryCfg_HAL(pGpu, pKernelMemorySystem);
2281 }
2282
2283 return NV_OK;
2284 }
2285
2286 NV_STATUS
memmgrFillComprInfo_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager,NvU64 pageSize,NvU32 pageCount,NvU32 kind,NvU64 surfOffset,NvU32 compTagStartOffset,COMPR_INFO * pComprInfo)2287 memmgrFillComprInfo_IMPL
2288 (
2289 OBJGPU *pGpu,
2290 MemoryManager *pMemoryManager,
2291 NvU64 pageSize,
2292 NvU32 pageCount,
2293 NvU32 kind,
2294 NvU64 surfOffset,
2295 NvU32 compTagStartOffset,
2296 COMPR_INFO *pComprInfo
2297 )
2298 {
2299 const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig =
2300 kmemsysGetStaticConfig(pGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu));
2301
2302 portMemSet(pComprInfo, 0, sizeof(*pComprInfo));
2303
2304 pComprInfo->kind = kind;
2305
2306 if (!memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE, kind))
2307 return NV_OK;
2308
2309 // TODO: We will have to support compression on vGPU HOST for AC
2310 NV_ASSERT(compTagStartOffset != ~(NvU32)0);
2311
2312 pComprInfo->compPageShift = pMemorySystemConfig->comprPageShift;
2313 pComprInfo->compTagLineMin = compTagStartOffset;
2314 pComprInfo->compPageIndexLo = (NvU32)(surfOffset >> pComprInfo->compPageShift);
2315 pComprInfo->compPageIndexHi = (NvU32)((surfOffset + pageSize * pageCount - 1) >> pComprInfo->compPageShift);
2316 pComprInfo->compTagLineMultiplier = 1;
2317
2318 return NV_OK;
2319 }
2320
2321 NV_STATUS
memmgrGetKindComprForGpu_KERNEL(MemoryManager * pMemoryManager,MEMORY_DESCRIPTOR * pMemDesc,OBJGPU * pMappingGpu,NvU64 offset,NvU32 * pKind,COMPR_INFO * pComprInfo)2322 memmgrGetKindComprForGpu_KERNEL
2323 (
2324 MemoryManager *pMemoryManager,
2325 MEMORY_DESCRIPTOR *pMemDesc,
2326 OBJGPU *pMappingGpu,
2327 NvU64 offset,
2328 NvU32 *pKind,
2329 COMPR_INFO *pComprInfo
2330 )
2331 {
2332 NvU32 ctagId = FB_HWRESID_CTAGID_VAL_FERMI(memdescGetHwResId(pMemDesc));
2333 NvU32 kind = memdescGetPteKindForGpu(pMemDesc, pMappingGpu);
2334 const MEMORY_SYSTEM_STATIC_CONFIG *pMappingMemSysConfig =
2335 kmemsysGetStaticConfig(pMappingGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pMappingGpu));
2336
2337 // Compression is not supported on memory not backed by a GPU
2338 if (pMemDesc->pGpu != NULL && memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE, kind) &&
2339 (ctagId == 0 || ctagId == FB_HWRESID_CTAGID_VAL_FERMI(-1)))
2340 {
2341 portMemSet(pComprInfo, 0, sizeof(*pComprInfo));
2342
2343 pComprInfo->kind = kind;
2344 pComprInfo->compPageShift = pMappingMemSysConfig->comprPageShift;
2345 pComprInfo->bPhysBasedComptags = NV_TRUE;
2346 pComprInfo->compTagLineMin = 1;
2347 }
2348 else
2349 {
2350 if (ctagId == FB_HWRESID_CTAGID_VAL_FERMI(0xcdcdcdcd))
2351 {
2352 portMemSet(pComprInfo, 0, sizeof(*pComprInfo));
2353
2354 pComprInfo->kind = memmgrGetUncompressedKind_HAL(pMappingGpu, pMemoryManager, kind, NV_TRUE);
2355 }
2356 else
2357 {
2358 memmgrFillComprInfoUncompressed(pMemoryManager, kind, pComprInfo);
2359 }
2360 }
2361
2362 *pKind = pComprInfo->kind;
2363
2364 return NV_OK;
2365 }
2366
2367 NV_STATUS
memmgrGetKindComprFromMemDesc_IMPL(MemoryManager * pMemoryManager,MEMORY_DESCRIPTOR * pMemDesc,NvU64 offset,NvU32 * kind,COMPR_INFO * pComprInfo)2368 memmgrGetKindComprFromMemDesc_IMPL
2369 (
2370 MemoryManager *pMemoryManager,
2371 MEMORY_DESCRIPTOR *pMemDesc,
2372 NvU64 offset,
2373 NvU32 *kind,
2374 COMPR_INFO *pComprInfo
2375 )
2376 {
2377 return memmgrGetKindComprForGpu_HAL(pMemoryManager, pMemDesc, pMemDesc->pGpu,
2378 offset, kind, pComprInfo);
2379 }
2380
2381 void
memmgrSetMIGPartitionableMemoryRange_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager,NV_RANGE range)2382 memmgrSetMIGPartitionableMemoryRange_IMPL
2383 (
2384 OBJGPU *pGpu,
2385 MemoryManager *pMemoryManager,
2386 NV_RANGE range
2387 )
2388 {
2389 pMemoryManager->MIGMemoryPartitioningInfo.partitionableMemoryRange = range;
2390 }
2391
2392 NV_RANGE
memmgrGetMIGPartitionableMemoryRange_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager)2393 memmgrGetMIGPartitionableMemoryRange_IMPL
2394 (
2395 OBJGPU *pGpu,
2396 MemoryManager *pMemoryManager
2397 )
2398 {
2399 return pMemoryManager->MIGMemoryPartitioningInfo.partitionableMemoryRange;
2400 }
2401
2402 /*
2403 * @brief Sets total partitionable BAR1
2404 */
2405 NV_STATUS
memmgrSetMIGPartitionableBAR1Range_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager)2406 memmgrSetMIGPartitionableBAR1Range_IMPL
2407 (
2408 OBJGPU *pGpu,
2409 MemoryManager *pMemoryManager
2410 )
2411 {
2412 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
2413 OBJVASPACE *pBar1VAS = kbusGetBar1VASpace_HAL(pGpu, pKernelBus);
2414 OBJEHEAP *pVASHeap;
2415 NvU64 largestFreeOffset = 0;
2416 NvU64 largestFreeSize = 0;
2417 NvU64 partitionableBar1Start;
2418 NvU64 partitionableBar1End;
2419
2420 if (pGpu->getProperty(pGpu, PDB_PROP_GPU_ZERO_FB))
2421 return NV_OK;
2422
2423 NV_ASSERT_OR_RETURN(pBar1VAS != NULL, NV_ERR_INVALID_STATE);
2424 pVASHeap = vaspaceGetHeap(pBar1VAS);
2425
2426 // Get partitionable BAR1 range
2427 pVASHeap->eheapInfo(pVASHeap, NULL, NULL, &largestFreeOffset, &largestFreeSize, NULL, NULL);
2428
2429 //
2430 // We are not considering alignment here because VA space is reserved/allocated in chunks of pages
2431 // so largestFreeOffset should be already aligned.
2432 //
2433 partitionableBar1Start = largestFreeOffset;
2434 partitionableBar1End = largestFreeOffset + largestFreeSize - 1;
2435 NV_ASSERT_OR_RETURN(partitionableBar1Start >= vaspaceGetVaStart(pBar1VAS), NV_ERR_INVALID_STATE);
2436 NV_ASSERT_OR_RETURN(partitionableBar1End <= vaspaceGetVaLimit(pBar1VAS), NV_ERR_INVALID_STATE);
2437
2438 pMemoryManager->MIGMemoryPartitioningInfo.partitionableBar1Range = rangeMake(partitionableBar1Start, partitionableBar1End);
2439 return NV_OK;
2440 }
2441
2442 NV_RANGE
memmgrGetMIGPartitionableBAR1Range_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager)2443 memmgrGetMIGPartitionableBAR1Range_IMPL
2444 (
2445 OBJGPU *pGpu,
2446 MemoryManager *pMemoryManager
2447 )
2448 {
2449 return pMemoryManager->MIGMemoryPartitioningInfo.partitionableBar1Range;
2450 }
2451
2452 NV_STATUS
memmgrAllocMIGGPUInstanceMemory_VF(OBJGPU * pGpu,MemoryManager * pMemoryManager,NvU32 swizzId,NvHandle * phMemory,NV_RANGE * pAddrRange,Heap ** ppMemoryPartitionHeap)2453 memmgrAllocMIGGPUInstanceMemory_VF
2454 (
2455 OBJGPU *pGpu,
2456 MemoryManager *pMemoryManager,
2457 NvU32 swizzId,
2458 NvHandle *phMemory,
2459 NV_RANGE *pAddrRange,
2460 Heap **ppMemoryPartitionHeap
2461 )
2462 {
2463 // For vGpu we have a static memory allocation
2464 *phMemory = NV01_NULL_OBJECT;
2465 *pAddrRange = pMemoryManager->MIGMemoryPartitioningInfo.partitionableMemoryRange;
2466 *ppMemoryPartitionHeap = GPU_GET_HEAP(pGpu);
2467
2468 return NV_OK;
2469 }
2470
2471 // Function to allocate memory for a GPU instance
2472 NV_STATUS
memmgrAllocMIGGPUInstanceMemory_PF(OBJGPU * pGpu,MemoryManager * pMemoryManager,NvU32 swizzId,NvHandle * phMemory,NV_RANGE * pAddrRange,Heap ** ppMemoryPartitionHeap)2473 memmgrAllocMIGGPUInstanceMemory_PF
2474 (
2475 OBJGPU *pGpu,
2476 MemoryManager *pMemoryManager,
2477 NvU32 swizzId,
2478 NvHandle *phMemory,
2479 NV_RANGE *pAddrRange,
2480 Heap **ppMemoryPartitionHeap
2481 )
2482 {
2483 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
2484 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
2485 NV_STATUS rmStatus = NV_OK;
2486 NvHandle hMemory = 0;
2487 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
2488 NvBool bNumaEnabled = osNumaOnliningEnabled(pGpu->pOsGpuInfo);
2489
2490 NV_ASSERT_OR_RETURN(pKernelMIGManager != NULL, NV_ERR_INVALID_STATE);
2491 NV_ASSERT_OK_OR_RETURN(kmemsysGetMIGGPUInstanceMemInfo(pGpu, pKernelMemorySystem, swizzId, pAddrRange));
2492
2493 //
2494 // Only allocate memory for non swizzID-0 GPU instances as swizzID-0 owns full
2495 // gpu and there is no need to pre-reserve memory for that and non
2496 // coherent systems. In coherent NUMA systems, NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE
2497 // is not supported and the memory comes from the MIG partition memory
2498 // NUMA node.
2499 //
2500 if (kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, swizzId))
2501 {
2502 if(bNumaEnabled)
2503 {
2504 NvS32 numaNodeId;
2505 NvU64 partitionBaseAddr = pAddrRange->lo;
2506 NvU64 partitionSize = rangeLength(*pAddrRange);
2507
2508 if (kmigmgrGetSwizzIdInUseMask(pGpu, pKernelMIGManager) == 0x0)
2509 {
2510 // Remove swizz Id 0 / baremetal GPU memory NUMA node
2511 pmaNumaOfflined(&GPU_GET_HEAP(pGpu)->pmaObject);
2512 kmemsysNumaRemoveMemory_HAL(pGpu, pKernelMemorySystem, 0);
2513 }
2514
2515 //
2516 // The memory gets removed in memmgrFreeMIGGPUInstanceMemory if
2517 // there is any failure after adding the memory.
2518 //
2519 NV_ASSERT_OK_OR_RETURN(kmemsysNumaAddMemory_HAL(pGpu,
2520 pKernelMemorySystem,
2521 swizzId,
2522 partitionBaseAddr,
2523 partitionSize,
2524 &numaNodeId));
2525 }
2526 else
2527 {
2528 //
2529 // Allocate memory using vidHeapControl
2530 //
2531 // vidHeapControl calls should happen outside GPU locks
2532 // This is a PMA requirement as memory allocation calls may invoke eviction
2533 // which UVM could get stuck behind GPU lock
2534 // See Bug 1735851-#24
2535 //
2536 rmGpuLocksRelease(GPUS_LOCK_FLAGS_NONE, NULL);
2537
2538 // Allocate gpfifo entries
2539 NV_MEMORY_ALLOCATION_PARAMS memAllocParams;
2540 portMemSet(&memAllocParams, 0, sizeof(NV_MEMORY_ALLOCATION_PARAMS));
2541 memAllocParams.owner = HEAP_OWNER_RM_CLIENT_GENERIC;
2542 memAllocParams.type = NVOS32_TYPE_IMAGE;
2543 memAllocParams.size = rangeLength(*pAddrRange);
2544 memAllocParams.attr = DRF_DEF(OS32, _ATTR, _LOCATION, _VIDMEM);
2545 memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PHYSICALITY, _CONTIGUOUS);
2546 memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _DEFAULT);
2547 memAllocParams.attr2 = DRF_DEF(OS32, _ATTR2, _PAGE_OFFLINING, _OFF); // free the offlined pages
2548 memAllocParams.flags |= NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE;
2549 memAllocParams.rangeLo = 0;
2550 memAllocParams.rangeHi = 0;
2551 memAllocParams.offset = pAddrRange->lo; // Offset needed if fixed address allocation
2552 memAllocParams.hVASpace = 0; // Physical allocation
2553 memAllocParams.internalflags = NVOS32_ALLOC_INTERNAL_FLAGS_SKIP_SCRUB;
2554
2555 rmStatus = pRmApi->Alloc(pRmApi,
2556 pMemoryManager->MIGMemoryPartitioningInfo.hClient,
2557 pMemoryManager->MIGMemoryPartitioningInfo.hSubdevice,
2558 &hMemory,
2559 NV01_MEMORY_LOCAL_USER,
2560 &memAllocParams,
2561 sizeof(memAllocParams));
2562
2563 // Reaquire the GPU locks
2564 if (rmGpuLocksAcquire(GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_MEM) != NV_OK)
2565 {
2566 NV_PRINTF(LEVEL_ERROR, "failed to grab RM-Lock\n");
2567 DBG_BREAKPOINT();
2568 rmStatus = NV_ERR_GENERIC;
2569 goto cleanup;
2570 }
2571
2572 if (rmStatus != NV_OK)
2573 {
2574 NV_PRINTF(LEVEL_ERROR,
2575 "Unable to allocate physical memory for GPU instance.\n");
2576 return rmStatus;
2577 }
2578 }
2579 }
2580 rmStatus = _memmgrInitMIGMemoryPartitionHeap(pGpu, pMemoryManager, swizzId, pAddrRange, ppMemoryPartitionHeap);
2581 if (rmStatus != NV_OK)
2582 {
2583 NV_PRINTF(LEVEL_ERROR, "Unable to initialize memory partition heap\n");
2584 goto cleanup;
2585 }
2586
2587 NV_PRINTF(LEVEL_INFO,
2588 "Allocated memory partition heap for swizzId - %d with StartAddr - 0x%llx, endAddr - 0x%llx.\n",
2589 swizzId, pAddrRange->lo, pAddrRange->hi);
2590
2591 *phMemory = hMemory;
2592 return rmStatus;
2593
2594 cleanup:
2595 pRmApi->Free(pRmApi, pMemoryManager->MIGMemoryPartitioningInfo.hClient, hMemory);
2596
2597 return rmStatus;
2598 }
2599
2600 // Function to initialize heap for managing MIG partition memory
2601 static NV_STATUS
_memmgrInitMIGMemoryPartitionHeap(OBJGPU * pGpu,MemoryManager * pMemoryManager,NvU32 swizzId,NV_RANGE * pAddrRange,Heap ** ppMemoryPartitionHeap)2602 _memmgrInitMIGMemoryPartitionHeap
2603 (
2604 OBJGPU *pGpu,
2605 MemoryManager *pMemoryManager,
2606 NvU32 swizzId,
2607 NV_RANGE *pAddrRange,
2608 Heap **ppMemoryPartitionHeap
2609 )
2610 {
2611 NV_STATUS status = NV_OK;
2612 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
2613 Heap *pMemoryPartitionHeap = NULL;
2614 NvBool bNumaEnabled = osNumaOnliningEnabled(pGpu->pOsGpuInfo);
2615 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
2616 NvU64 partitionBaseAddr = pAddrRange->lo;
2617 NvU64 partitionSize = rangeLength(*pAddrRange);
2618
2619 // Use default heap for swizzID-0 as we don't prereserve memory for swizzID-0
2620 NV_ASSERT_OR_RETURN(pKernelMIGManager != NULL, NV_ERR_INVALID_STATE);
2621 if (!kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, swizzId))
2622 {
2623 *ppMemoryPartitionHeap = pMemoryManager->pHeap;
2624 return NV_OK;
2625 }
2626 else
2627 {
2628 *ppMemoryPartitionHeap = NULL;
2629 }
2630
2631 NV_ASSERT_OK_OR_GOTO(
2632 status,
2633 objCreate(ppMemoryPartitionHeap, pMemoryManager, Heap),
2634 fail);
2635
2636 pMemoryPartitionHeap = *ppMemoryPartitionHeap;
2637
2638 if (memmgrIsPmaEnabled(pMemoryManager) &&
2639 memmgrIsPmaSupportedOnPlatform(pMemoryManager))
2640 {
2641 portMemSet(&pMemoryPartitionHeap->pmaObject, 0, sizeof(pMemoryPartitionHeap->pmaObject));
2642 NV_ASSERT_OK_OR_GOTO(
2643 status,
2644 memmgrPmaInitialize(pGpu, pMemoryManager, &pMemoryPartitionHeap->pmaObject),
2645 fail);
2646
2647 if (bNumaEnabled)
2648 {
2649 NV_ASSERT_OR_GOTO(pKernelMemorySystem->memPartitionNumaInfo[swizzId].bInUse, fail);
2650 partitionBaseAddr = pKernelMemorySystem->memPartitionNumaInfo[swizzId].offset;
2651 partitionSize = pKernelMemorySystem->memPartitionNumaInfo[swizzId].size;
2652
2653 //
2654 // The base and size passed here is the FB base and size and
2655 // not the partition's. pmaNumaOnlined requires the FB base and
2656 // size to convert between FB local address and SPA.
2657 // memmgrPmaRegisterRegions is where the partition's base and size
2658 // is reported to PMA.
2659 //
2660 NV_ASSERT_OK_OR_GOTO(
2661 status,
2662 pmaNumaOnlined(&pMemoryPartitionHeap->pmaObject,
2663 pKernelMemorySystem->memPartitionNumaInfo[swizzId].numaNodeId,
2664 pKernelMemorySystem->coherentCpuFbBase,
2665 pKernelMemorySystem->numaOnlineSize),
2666 fail);
2667 }
2668 }
2669
2670 NV_ASSERT_OK_OR_GOTO(
2671 status,
2672 heapInit(pGpu, pMemoryPartitionHeap, partitionBaseAddr,
2673 partitionSize,
2674 HEAP_TYPE_PARTITION_LOCAL,
2675 GPU_GFID_PF,
2676 NULL),
2677 fail);
2678
2679 if (memmgrIsPmaInitialized(pMemoryManager) &&
2680 (pMemoryPartitionHeap->bHasFbRegions))
2681 {
2682 NV_ASSERT_OK_OR_GOTO(
2683 status,
2684 memmgrPmaRegisterRegions(pGpu, pMemoryManager, pMemoryPartitionHeap,
2685 &pMemoryPartitionHeap->pmaObject),
2686 fail);
2687 }
2688
2689 if (!IsSLIEnabled(pGpu))
2690 {
2691 // Do the actual blacklisting of pages from the heap
2692 if (pMemoryPartitionHeap->blackListAddresses.count != 0)
2693 {
2694 status = heapBlackListPages(pGpu, pMemoryPartitionHeap);
2695
2696 if (status != NV_OK)
2697 {
2698 // Warn and continue
2699 NV_PRINTF(LEVEL_WARNING, "Error 0x%x creating blacklist\n",
2700 status);
2701 }
2702 }
2703 }
2704
2705 return NV_OK;
2706
2707 fail:
2708
2709 if (pMemoryPartitionHeap != NULL)
2710 {
2711 objDelete(pMemoryPartitionHeap);
2712 *ppMemoryPartitionHeap = NULL;
2713 }
2714
2715 return status;
2716 }
2717
2718 // Function to free GPU instance memory
2719 NV_STATUS
memmgrFreeMIGGPUInstanceMemory_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager,NvU32 swizzId,NvHandle hMemory,Heap ** ppMemoryPartitionHeap)2720 memmgrFreeMIGGPUInstanceMemory_IMPL
2721 (
2722 OBJGPU *pGpu,
2723 MemoryManager *pMemoryManager,
2724 NvU32 swizzId,
2725 NvHandle hMemory,
2726 Heap **ppMemoryPartitionHeap
2727 )
2728 {
2729 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
2730 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
2731 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
2732 NvBool bNumaEnabled = osNumaOnliningEnabled(pGpu->pOsGpuInfo);
2733
2734 NV_ASSERT_OR_RETURN(pKernelMIGManager != NULL, NV_ERR_INVALID_STATE);
2735
2736 // Nothing to do for swizzId 0 as we neither allocate memory nor allocate new heap object
2737 if (!kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, swizzId))
2738 return NV_OK;
2739
2740 objDelete(*ppMemoryPartitionHeap);
2741 *ppMemoryPartitionHeap = NULL;
2742
2743 if (bNumaEnabled)
2744 {
2745 kmemsysNumaRemoveMemory_HAL(pGpu, pKernelMemorySystem, swizzId);
2746
2747 if (kmigmgrGetSwizzIdInUseMask(pGpu, pKernelMIGManager) == 0x0)
2748 {
2749 NvS32 numaNodeId;
2750
2751 // Add back the baremetal GPU memory NUMA node.
2752 NV_ASSERT_OK_OR_RETURN(kmemsysNumaAddMemory_HAL(pGpu,
2753 pKernelMemorySystem,
2754 0,
2755 pKernelMemorySystem->numaOnlineBase,
2756 pKernelMemorySystem->numaOnlineSize,
2757 &numaNodeId));
2758 // Baremetal NUMA node id should be same as pGpu->numaNodeId
2759 NV_ASSERT_OR_RETURN(numaNodeId == pGpu->numaNodeId, NV_ERR_INVALID_STATE);
2760 NV_ASSERT_OK_OR_RETURN(pmaNumaOnlined(&GPU_GET_HEAP(pGpu)->pmaObject,
2761 pGpu->numaNodeId,
2762 pKernelMemorySystem->coherentCpuFbBase,
2763 pKernelMemorySystem->numaOnlineSize));
2764 }
2765 }
2766
2767 // Free allocated memory
2768 if (!bNumaEnabled && (hMemory != NV01_NULL_OBJECT))
2769 {
2770 pRmApi->Free(pRmApi, pMemoryManager->MIGMemoryPartitioningInfo.hClient, hMemory);
2771 }
2772 return NV_OK;
2773 }
2774
memmgrComprInfoDisableCompression_IMPL(MemoryManager * pMemoryManager,COMPR_INFO * pComprInfo)2775 void memmgrComprInfoDisableCompression_IMPL
2776 (
2777 MemoryManager *pMemoryManager,
2778 COMPR_INFO *pComprInfo
2779 )
2780 {
2781 memmgrFillComprInfoUncompressed(pMemoryManager, pComprInfo->kind, pComprInfo);
2782 }
2783
memmgrFillComprInfoUncompressed_IMPL(MemoryManager * pMemoryManager,NvU32 kind,COMPR_INFO * pComprInfo)2784 void memmgrFillComprInfoUncompressed_IMPL
2785 (
2786 MemoryManager *pMemoryManager,
2787 NvU32 kind,
2788 COMPR_INFO *pComprInfo
2789 )
2790 {
2791 if (memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE, kind))
2792 kind = memmgrGetUncompressedKind_HAL(ENG_GET_GPU(pMemoryManager), pMemoryManager, kind, NV_FALSE);
2793
2794 portMemSet(pComprInfo, 0, sizeof(*pComprInfo));
2795 pComprInfo->kind = kind;
2796 }
2797
2798 /*!
2799 * @brief Creates the SW state of the page level pools.
2800 *
2801 * @param pGpu
2802 * @param pMemoryManager
2803 *
2804 * @returns On success, returns NV_OK.
2805 * On failure, returns error code.
2806 */
2807 NV_STATUS
memmgrPageLevelPoolsCreate_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager)2808 memmgrPageLevelPoolsCreate_IMPL
2809 (
2810 OBJGPU *pGpu,
2811 MemoryManager *pMemoryManager
2812 )
2813 {
2814 NV_STATUS status = NV_OK;
2815
2816 if (RMCFG_FEATURE_PMA &&
2817 memmgrIsPmaInitialized(pMemoryManager) &&
2818 memmgrAreClientPageTablesPmaManaged(pMemoryManager))
2819 {
2820 Heap *pHeap = GPU_GET_HEAP(pGpu);
2821 KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
2822 const GMMU_FMT *pFmt = NULL;
2823
2824 pFmt = kgmmuFmtGet(pKernelGmmu, GMMU_FMT_VERSION_DEFAULT, 0);
2825 NV_ASSERT_OR_RETURN(NULL != pFmt, NV_ERR_INVALID_ARGUMENT);
2826
2827 status = rmMemPoolSetup((void *)&pHeap->pmaObject, &pMemoryManager->pPageLevelReserve,
2828 (pFmt->version == GMMU_FMT_VERSION_1) ? POOL_CONFIG_GMMU_FMT_1 : POOL_CONFIG_GMMU_FMT_2);
2829
2830 NV_ASSERT(NV_OK == status);
2831
2832 //
2833 // Allocate the pool in CPR in case of Confidential Compute
2834 // When Hopper Confidential Compute is enabled, page tables
2835 // cannot be in non-CPR region
2836 //
2837 if (gpuIsCCFeatureEnabled(pGpu) && (status == NV_OK))
2838 {
2839 rmMemPoolAllocateProtectedMemory(pMemoryManager->pPageLevelReserve, NV_TRUE);
2840 }
2841 }
2842 return status;
2843 }
2844
2845 /*!
2846 * @brief Destroys the SW state of the page level pools.
2847 *
2848 * @param pGpu
2849 * @param pMemoryManager
2850 *
2851 * @returns
2852 */
2853 void
memmgrPageLevelPoolsDestroy_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager)2854 memmgrPageLevelPoolsDestroy_IMPL
2855 (
2856 OBJGPU *pGpu,
2857 MemoryManager *pMemoryManager
2858 )
2859 {
2860 if (RMCFG_FEATURE_PMA &&
2861 memmgrIsPmaInitialized(pMemoryManager) &&
2862 memmgrAreClientPageTablesPmaManaged(pMemoryManager))
2863 {
2864 rmMemPoolDestroy(pMemoryManager->pPageLevelReserve);
2865 pMemoryManager->pPageLevelReserve = NULL;
2866 }
2867 }
2868
2869 /*!
2870 * @brief Gets page level pool to use
2871 *
2872 * @param pGpu
2873 * @param pMemoryManager
2874 * @param[in] hClient client handle
2875 * @param[out] ppMemPoolInfo page level pool
2876 *
2877 * @returns On success, returns NV_OK.
2878 * On failure, returns error code.
2879 */
2880 NV_STATUS
memmgrPageLevelPoolsGetInfo_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager,Device * pDevice,RM_POOL_ALLOC_MEM_RESERVE_INFO ** ppMemPoolInfo)2881 memmgrPageLevelPoolsGetInfo_IMPL
2882 (
2883 OBJGPU *pGpu,
2884 MemoryManager *pMemoryManager,
2885 Device *pDevice,
2886 RM_POOL_ALLOC_MEM_RESERVE_INFO **ppMemPoolInfo
2887 )
2888 {
2889 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
2890 NvBool bMemPartitioningEnabled = (pKernelMIGManager != NULL) && kmigmgrIsMIGMemPartitioningEnabled(pGpu, pKernelMIGManager);
2891 RM_POOL_ALLOC_MEM_RESERVE_INFO *pMemPool = NULL;
2892 NV_ASSERT_OR_RETURN(ppMemPoolInfo != NULL, NV_ERR_INVALID_ARGUMENT);
2893
2894 if (!memmgrIsPmaInitialized(pMemoryManager) ||
2895 !memmgrAreClientPageTablesPmaManaged(pMemoryManager))
2896 {
2897 return NV_ERR_INVALID_STATE;
2898 }
2899
2900 // If memory partitioning is enabled, then use per-partition pool allocator
2901 if (bMemPartitioningEnabled)
2902 {
2903 MIG_INSTANCE_REF ref;
2904 NV_ASSERT_OK_OR_RETURN(
2905 kmigmgrGetInstanceRefFromDevice(pGpu, pKernelMIGManager, pDevice, &ref));
2906 pMemPool = ref.pKernelMIGGpuInstance->pPageTableMemPool;
2907 }
2908 else
2909 {
2910 pMemPool = pMemoryManager->pPageLevelReserve;
2911 }
2912 NV_ASSERT_OR_RETURN(pMemPool != NULL, NV_ERR_INVALID_STATE);
2913
2914 *ppMemPoolInfo = pMemPool;
2915 return NV_OK;
2916 }
2917
2918 static inline void
_memmgrPmaStatsUpdateCb(void * pCtx,NvU64 freeFrames)2919 _memmgrPmaStatsUpdateCb
2920 (
2921 void *pCtx,
2922 NvU64 freeFrames
2923 )
2924 {
2925 OBJGPU *pGpu = (OBJGPU *) pCtx;
2926 NV00DE_SHARED_DATA *pSharedData;
2927
2928 NV_ASSERT_OR_RETURN_VOID(pGpu != NULL);
2929
2930 pSharedData = gpushareddataWriteStart(pGpu);
2931
2932 pSharedData->freePmaMemory = freeFrames << PMA_PAGE_SHIFT;
2933
2934 gpushareddataWriteFinish(pGpu);
2935 }
2936
2937 static void
_memmgrInitRUSDHeapSize(OBJGPU * pGpu,MemoryManager * pMemoryManager)2938 _memmgrInitRUSDHeapSize
2939 (
2940 OBJGPU *pGpu,
2941 MemoryManager *pMemoryManager
2942 )
2943 {
2944 NV00DE_SHARED_DATA *pSharedData;
2945 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
2946 NvU64 bytesTotal = 0;
2947 PMA *pPma;
2948
2949 NV_ASSERT_OR_RETURN_VOID(memmgrIsPmaInitialized(pMemoryManager));
2950
2951 pPma = &pMemoryManager->pHeap->pmaObject;
2952 pmaGetTotalMemory(pPma, &bytesTotal);
2953 bytesTotal -= ((NvU64)pKernelMemorySystem->fbOverrideStartKb << 10);
2954
2955 pSharedData = gpushareddataWriteStart(pGpu);
2956 pSharedData->totalPmaMemory = bytesTotal;
2957 gpushareddataWriteFinish(pGpu);
2958 }
2959
2960 /*!
2961 * @brief Initialize the PMA object
2962 *
2963 * @param pGpu
2964 * @param pMemoryManager
2965 * @param[in] pPma Pointer to the PMA object to init
2966 *
2967 * @returns On success, returns NV_OK.
2968 * On failure, returns error code.
2969 */
2970 NV_STATUS
memmgrPmaInitialize_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager,PMA * pPma)2971 memmgrPmaInitialize_IMPL
2972 (
2973 OBJGPU *pGpu,
2974 MemoryManager *pMemoryManager,
2975 PMA *pPma
2976 )
2977 {
2978 NvU32 pmaInitFlags = PMA_INIT_NONE;
2979 NV_STATUS status = NV_OK;
2980 NvBool bNumaEnabled = osNumaOnliningEnabled(pGpu->pOsGpuInfo);
2981
2982 NV_ASSERT(memmgrIsPmaEnabled(pMemoryManager) &&
2983 memmgrIsPmaSupportedOnPlatform(pMemoryManager));
2984
2985 if (memmgrIsPmaForcePersistence(pMemoryManager))
2986 {
2987 pmaInitFlags |= PMA_INIT_FORCE_PERSISTENCE;
2988 }
2989
2990 if (memmgrIsScrubOnFreeEnabled(pMemoryManager))
2991 {
2992 pmaInitFlags |= PMA_INIT_SCRUB_ON_FREE;
2993 }
2994
2995 // Disable client page table management on SLI.
2996 if (IsSLIEnabled(pGpu))
2997 {
2998 memmgrSetClientPageTablesPmaManaged(pMemoryManager, NV_FALSE);
2999 }
3000
3001 if (bNumaEnabled)
3002 {
3003 NV_PRINTF(LEVEL_INFO, "Initializing PMA with NUMA flag.\n");
3004 pmaInitFlags |= PMA_INIT_NUMA;
3005
3006 if (gpuIsSelfHosted(pGpu))
3007 {
3008 NV_PRINTF(LEVEL_INFO, "Initializing PMA with NUMA_AUTO_ONLINE flag.\n");
3009 pmaInitFlags |= PMA_INIT_NUMA_AUTO_ONLINE;
3010 }
3011 }
3012
3013 status = pmaInitialize(pPma, pmaInitFlags);
3014 if (status != NV_OK)
3015 {
3016 NV_PRINTF(LEVEL_ERROR, "Failed to initialize PMA!\n");
3017 return status;
3018 }
3019
3020 pmaRegisterUpdateStatsCb(pPma, _memmgrPmaStatsUpdateCb, pGpu);
3021
3022 if (bNumaEnabled)
3023 {
3024 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
3025
3026 NvU32 numaSkipReclaimVal = NV_REG_STR_RM_NUMA_ALLOC_SKIP_RECLAIM_PERCENTAGE_DEFAULT;
3027
3028 if (osReadRegistryDword(pGpu, NV_REG_STR_RM_NUMA_ALLOC_SKIP_RECLAIM_PERCENTAGE, &numaSkipReclaimVal) == NV_OK)
3029 {
3030 if (numaSkipReclaimVal > NV_REG_STR_RM_NUMA_ALLOC_SKIP_RECLAIM_PERCENTAGE_MAX)
3031 {
3032 numaSkipReclaimVal = NV_REG_STR_RM_NUMA_ALLOC_SKIP_RECLAIM_PERCENTAGE_MAX;
3033 }
3034 }
3035 pmaNumaSetReclaimSkipThreshold(pPma, numaSkipReclaimVal);
3036
3037 // Full FB memory is added and onlined already
3038 if (pKernelMemorySystem->memPartitionNumaInfo[0].bInUse)
3039 {
3040 NV_ASSERT_OK_OR_RETURN(pmaNumaOnlined(pPma, pGpu->numaNodeId,
3041 pKernelMemorySystem->coherentCpuFbBase,
3042 pKernelMemorySystem->numaOnlineSize));
3043 }
3044
3045 }
3046
3047 return NV_OK;
3048 }
3049
3050 NV_STATUS
memmgrInitFbRegions_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager)3051 memmgrInitFbRegions_IMPL
3052 (
3053 OBJGPU *pGpu,
3054 MemoryManager *pMemoryManager
3055 )
3056 {
3057 NV_ASSERT_OR_RETURN(pMemoryManager->Ram.numFBRegions == 0, NV_ERR_INVALID_STATE);
3058
3059 // Don't setup regions if FB is broken and we aren't using L2 cache as "FB".
3060 if ((pGpu->getProperty(pGpu, PDB_PROP_GPU_BROKEN_FB) &&
3061 !gpuIsCacheOnlyModeEnabled(pGpu)))
3062 {
3063 //
3064 // Bug 594534: Don't read/write in the FBIO/FBPA space when FB is broken.
3065 // Indicate 32MB FB Memory instead, which is a bit of a hack since zero
3066 // would be more accurate, but zero breaks things.
3067 //
3068
3069 // When ZeroFB + L2Cache mode is enabled, we'll set fbAddrSpaceSizeMb
3070 // appropriately in memmgrInitBaseFbRegions_HAL.
3071 if (!gpuIsCacheOnlyModeEnabled(pGpu))
3072 {
3073 pMemoryManager->Ram.mapRamSizeMb = pMemoryManager->Ram.fbAddrSpaceSizeMb = 32;
3074 NV_PRINTF(LEVEL_ERROR,
3075 "Bug 594534: HACK: Report 32MB of framebuffer instead of reading registers.\n");
3076
3077 }
3078
3079 return NV_OK;
3080 }
3081
3082 NV_ASSERT_OK_OR_RETURN(memmgrInitBaseFbRegions_HAL(pGpu, pMemoryManager));
3083
3084 NV_ASSERT_OK_OR_RETURN(memmgrInitFbRegionsHal_HAL(pGpu, pMemoryManager));
3085
3086 //
3087 // Build a list of regions sorted by allocation priority
3088 // (highest to lowest). Used for allocations using ObjHeap.
3089 //
3090 memmgrRegenerateFbRegionPriority(pGpu, pMemoryManager);
3091
3092 if (RMCFG_FEATURE_PLATFORM_WINDOWS)
3093 {
3094 if (pGpu->getProperty(pGpu, PDB_PROP_GPU_EXTERNAL_HEAP_CONTROL))
3095 {
3096 // KMD in WDDM mode
3097 if (pMemoryManager->bMixedDensityFbp)
3098 {
3099 //
3100 // For mixed memory on LDDM platforms, when we are using kernel-managed
3101 // heap (not TCC mode), we want to prefer allocating in slow memory to conserve
3102 // fast memory for applications.
3103 //
3104 pMemoryManager->bPreferSlowRegion = NV_TRUE;
3105 }
3106 }
3107 }
3108
3109 NV_ASSERT_OK_OR_RETURN(memmgrSetPlatformPmaSupport(pGpu, pMemoryManager));
3110
3111 return NV_OK;
3112 }
3113
3114 /*!
3115 * @brief Register regions to the PMA object
3116 *
3117 * @param pGpu
3118 * @param pMemoryManager
3119 * @param[in] pPma Pointer to the PMA object to register with
3120 *
3121 * @returns On success, returns NV_OK.
3122 * On failure, returns error code.
3123 */
3124 NV_STATUS
memmgrPmaRegisterRegions_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager,Heap * pHeap,PMA * pPma)3125 memmgrPmaRegisterRegions_IMPL
3126 (
3127 OBJGPU *pGpu,
3128 MemoryManager *pMemoryManager,
3129 Heap *pHeap,
3130 PMA *pPma
3131 )
3132 {
3133 HEAP_TYPE_INTERNAL heapType = pHeap->heapType;
3134 PMA_REGION_DESCRIPTOR pmaRegion;
3135 NvU32 pmaRegionIdx = 0;
3136 NvU32 i;
3137 PMA_BLACKLIST_ADDRESS *pBlacklistPages = NULL;
3138 NvU32 blRegionCount = 0;
3139 NvU32 blPageIndex;
3140 NvU32 blackListCount;
3141 NvU64 base, size;
3142 NV_STATUS status = NV_OK;
3143 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
3144
3145 blackListCount = pHeap->blackListAddresses.count;
3146 base = pHeap->base;
3147 size = pHeap->total;
3148
3149 //
3150 // If there are blacklisted pages, prepare a staging buffer to pass the
3151 // per-region blacklisted pages to PMA
3152 //
3153 if (blackListCount > 0)
3154 {
3155 pBlacklistPages = portMemAllocNonPaged(
3156 sizeof(PMA_BLACKLIST_ADDRESS) * blackListCount);
3157 if (pBlacklistPages == NULL)
3158 {
3159 NV_PRINTF(LEVEL_ERROR,
3160 "Could not allocate memory for blackList!\n");
3161 status = NV_ERR_NO_MEMORY;
3162 goto _pmaInitFailed;
3163 }
3164 }
3165
3166 for (i = 0; i < pMemoryManager->Ram.numFBRegions; i++)
3167 {
3168 //
3169 // Skip all regions that are completely outside the heap boundry
3170 // OR marked as internal(used for internal RM allocations)
3171 // OR marked as reserved(used for console, display, link training buffer etc.)
3172 //
3173 if ((pMemoryManager->Ram.fbRegion[i].limit < base ||
3174 pMemoryManager->Ram.fbRegion[i].base >= (base + size)) ||
3175 (pMemoryManager->Ram.fbRegion[i].bInternalHeap) ||
3176 (pMemoryManager->Ram.fbRegion[i].bRsvdRegion))
3177 {
3178 continue;
3179 }
3180
3181 NV_PRINTF(LEVEL_INFO,
3182 "PMA: Register FB region[%d] %llx..%llx EXTERNAL\n", i,
3183 pMemoryManager->Ram.fbRegion[i].base, pMemoryManager->Ram.fbRegion[i].limit);
3184
3185 pmaRegion.base = pMemoryManager->Ram.fbRegion[i].base;
3186 pmaRegion.limit = pMemoryManager->Ram.fbRegion[i].limit;
3187
3188 // Check if the base of managed memory is not based at FB region base.
3189 if (pmaRegion.base < base)
3190 {
3191 pmaRegion.base = base;
3192 }
3193
3194 // check if limit of managed memory is less than FB region limit
3195 if (pmaRegion.limit >= (base + size))
3196 {
3197 pmaRegion.limit = base + size - 1;
3198 }
3199
3200 pmaRegion.performance = pMemoryManager->Ram.fbRegion[i].performance;
3201 pmaRegion.bSupportCompressed = pMemoryManager->Ram.fbRegion[i].bSupportCompressed;
3202 pmaRegion.bSupportISO = pMemoryManager->Ram.fbRegion[i].bSupportISO;
3203 pmaRegion.bProtected = pMemoryManager->Ram.fbRegion[i].bProtected;
3204
3205 //
3206 // Now we know the region, find if it has any blacklisted pages
3207 // TODO: Try to coalesce to unique 64K pages
3208 //
3209 blRegionCount = 0;
3210 if (pBlacklistPages != NULL)
3211 {
3212 for (blPageIndex = 0; blPageIndex < blackListCount; blPageIndex++)
3213 {
3214 if ((pHeap->blackListAddresses.data[blPageIndex].address
3215 != NV2080_CTRL_FB_OFFLINED_PAGES_INVALID_ADDRESS) &&
3216 (pHeap->blackListAddresses.data[blPageIndex].address >= pmaRegion.base) &&
3217 (pHeap->blackListAddresses.data[blPageIndex].address <= pmaRegion.limit))
3218 {
3219 // Collect the region's blacklisted pages
3220 pBlacklistPages[blRegionCount].physOffset = pHeap->blackListAddresses.data[blPageIndex].address;
3221
3222 pBlacklistPages[blRegionCount].bIsDynamic =
3223 ((pHeap->blackListAddresses.data[blPageIndex].type ==
3224 NV2080_CTRL_FB_OFFLINED_PAGES_SOURCE_DPR_MULTIPLE_SBE) ||
3225 (pHeap->blackListAddresses.data[blPageIndex].type ==
3226 NV2080_CTRL_FB_OFFLINED_PAGES_SOURCE_DPR_DBE));
3227
3228 blRegionCount++;
3229 }
3230 }
3231 }
3232
3233 NV_PRINTF(LEVEL_INFO,
3234 "Register FB region %llx..%llx of size %llx with PMA\n",
3235 pmaRegion.base, pmaRegion.limit,
3236 pmaRegion.limit - pmaRegion.base + 1);
3237 //
3238 // Register the region for PMA management, and note if asynchronous
3239 // scrubbing is enabled. Synchronous scrubbing is done before
3240 // heap/PMA is initialized, but asynchronously scrubbed pages will
3241 // need to be unmarked once they are scrubbed.
3242 //
3243 status = pmaRegisterRegion(pPma, pmaRegionIdx,
3244 memmgrEccScrubInProgress_HAL(pGpu, pMemoryManager),
3245 &pmaRegion, blRegionCount,
3246 ((blRegionCount==0) ? NULL : pBlacklistPages));
3247 if (status != NV_OK)
3248 {
3249 NV_PRINTF(LEVEL_ERROR,
3250 "failed to register FB region %llx..%llx with PMA\n",
3251 pmaRegion.base, pmaRegion.limit);
3252 DBG_BREAKPOINT();
3253 goto _pmaInitFailed;
3254 }
3255 pmaRegionIdx++;
3256 }
3257
3258 //
3259 // bug #200354346, make sure the RM reserved region(s) are
3260 // scrubbed during the region creation itself. Top Down scrubber,
3261 // skips the RM reserved region(s) because the assumption is, they
3262 // are pre-scrubbed.
3263 //
3264 if (heapType != HEAP_TYPE_PARTITION_LOCAL)
3265 memmgrScrubInternalRegions_HAL(pGpu, pMemoryManager);
3266
3267 _pmaInitFailed:
3268 portMemFree(pBlacklistPages);
3269
3270 if ((status == NV_OK) && (pKernelMemorySystem->fbOverrideStartKb != 0))
3271 {
3272 NvU64 allocSize = NV_ALIGN_UP(((NvU64)pKernelMemorySystem->fbOverrideStartKb << 10), PMA_GRANULARITY);
3273 NvU32 numPages = (NvU32)(allocSize >> PMA_PAGE_SHIFT);
3274 PMA_ALLOCATION_OPTIONS allocOptions = {0};
3275
3276 allocOptions.flags = PMA_ALLOCATE_CONTIGUOUS;
3277 allocOptions.flags |= PMA_ALLOCATE_SPECIFY_ADDRESS_RANGE;
3278 allocOptions.physBegin = 0;
3279 allocOptions.physEnd = allocSize - 1;
3280
3281 // This is intentionally thrown away
3282 NvU64 *pPages = NULL;
3283 pPages = portMemAllocNonPaged(numPages * sizeof(NvU64));
3284 if (pPages != NULL)
3285 {
3286 // Accommodate the regkey override for FB start
3287 status = pmaAllocatePages(pPma, numPages, _PMA_64KB, &allocOptions, pPages);
3288 portMemFree(pPages);
3289 }
3290 }
3291 if (status != NV_OK)
3292 {
3293 if (memmgrIsPmaInitialized(pMemoryManager))
3294 {
3295 if (heapType != HEAP_TYPE_PARTITION_LOCAL)
3296 {
3297 memmgrSetPmaInitialized(pMemoryManager, NV_FALSE);
3298 }
3299 pmaDestroy(pPma);
3300 }
3301 }
3302
3303 return status;
3304 }
3305
3306 /*!
3307 * @brief Allocate internal handles for MIG partition memory allocation
3308 */
3309 NV_STATUS
memmgrAllocMIGMemoryAllocationInternalHandles_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager)3310 memmgrAllocMIGMemoryAllocationInternalHandles_IMPL
3311 (
3312 OBJGPU *pGpu,
3313 MemoryManager *pMemoryManager
3314 )
3315 {
3316 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
3317
3318 NV_ASSERT_OR_RETURN(pMemoryManager->MIGMemoryPartitioningInfo.hClient == NV01_NULL_OBJECT, NV_ERR_INVALID_STATE);
3319 NV_ASSERT_OK_OR_RETURN(
3320 rmapiutilAllocClientAndDeviceHandles(pRmApi, pGpu,
3321 &pMemoryManager->MIGMemoryPartitioningInfo.hClient,
3322 &pMemoryManager->MIGMemoryPartitioningInfo.hDevice,
3323 &pMemoryManager->MIGMemoryPartitioningInfo.hSubdevice));
3324
3325 return NV_OK;
3326 }
3327
3328 /*!
3329 * @brief Free internal handles used to support MIG memory partitioning
3330 */
3331 void
memmgrFreeMIGMemoryAllocationInternalHandles_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager)3332 memmgrFreeMIGMemoryAllocationInternalHandles_IMPL
3333 (
3334 OBJGPU *pGpu,
3335 MemoryManager *pMemoryManager
3336 )
3337 {
3338 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
3339
3340 rmapiutilFreeClientAndDeviceHandles(pRmApi,
3341 &pMemoryManager->MIGMemoryPartitioningInfo.hClient,
3342 &pMemoryManager->MIGMemoryPartitioningInfo.hDevice,
3343 &pMemoryManager->MIGMemoryPartitioningInfo.hSubdevice);
3344 }
3345
3346 /*!
3347 * @brief Gets free memory (client visible) for all valid GPU instances
3348 */
3349 void
memmgrGetFreeMemoryForAllMIGGPUInstances_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager,NvU64 * pBytes)3350 memmgrGetFreeMemoryForAllMIGGPUInstances_IMPL
3351 (
3352 OBJGPU *pGpu,
3353 MemoryManager *pMemoryManager,
3354 NvU64 *pBytes
3355 )
3356 {
3357 NvU64 val = 0;
3358 Heap *pHeap = NULL;
3359 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
3360 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGPUInstance;
3361
3362 *pBytes = 0;
3363
3364 FOR_EACH_VALID_GPU_INSTANCE(pGpu, pKernelMIGManager, pKernelMIGGPUInstance)
3365 {
3366 NV_ASSERT(pKernelMIGGPUInstance->pMemoryPartitionHeap != NULL);
3367 pHeap = pKernelMIGGPUInstance->pMemoryPartitionHeap;
3368
3369 if (memmgrIsPmaInitialized(pMemoryManager))
3370 pmaGetFreeMemory(&pHeap->pmaObject, &val);
3371 else
3372 heapGetFree(pHeap, &val);
3373
3374 *pBytes += val;
3375 }
3376 FOR_EACH_VALID_GPU_INSTANCE_END();
3377 }
3378
3379 /*!
3380 * @brief Gets total memory for all valid GPU instances
3381 *
3382 * @param pGpu
3383 * @param pMemoryManager
3384 * @param[out] pBytes pointer to the total memory
3385 *
3386 */
3387 void
memmgrGetTotalMemoryForAllMIGGPUInstances_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager,NvU64 * pBytes)3388 memmgrGetTotalMemoryForAllMIGGPUInstances_IMPL
3389 (
3390 OBJGPU *pGpu,
3391 MemoryManager *pMemoryManager,
3392 NvU64 *pBytes
3393 )
3394 {
3395 NvU64 val = 0;
3396 Heap *pHeap = NULL;
3397 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
3398 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGPUInstance;
3399
3400 *pBytes = 0;
3401
3402 FOR_EACH_VALID_GPU_INSTANCE(pGpu, pKernelMIGManager, pKernelMIGGPUInstance)
3403 {
3404 NV_ASSERT(pKernelMIGGPUInstance->pMemoryPartitionHeap != NULL);
3405 pHeap = pKernelMIGGPUInstance->pMemoryPartitionHeap;
3406
3407 if (memmgrIsPmaInitialized(pMemoryManager))
3408 pmaGetTotalMemory(&pHeap->pmaObject, &val);
3409 else
3410 heapGetSize(pHeap, &val);
3411
3412 *pBytes += val;
3413 }
3414 FOR_EACH_VALID_GPU_INSTANCE_END();
3415 }
3416
3417 void
memmgrGetTopLevelScrubberStatus_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager,NvBool * pbTopLevelScrubberEnabled,NvBool * pbTopLevelScrubberConstructed)3418 memmgrGetTopLevelScrubberStatus_IMPL
3419 (
3420 OBJGPU *pGpu,
3421 MemoryManager *pMemoryManager,
3422 NvBool *pbTopLevelScrubberEnabled,
3423 NvBool *pbTopLevelScrubberConstructed
3424 )
3425 {
3426 NvBool bTopLevelScrubberEnabled = NV_FALSE;
3427 NvBool bTopLevelScrubberConstructed = NV_FALSE;
3428 NvU32 pmaConfigs = PMA_QUERY_SCRUB_ENABLED | PMA_QUERY_SCRUB_VALID;
3429
3430 if (memmgrIsPmaInitialized(pMemoryManager))
3431 {
3432 Heap *pHeap = GPU_GET_HEAP(pGpu);
3433 NV_ASSERT_OK(pmaQueryConfigs(&pHeap->pmaObject, &pmaConfigs));
3434 bTopLevelScrubberEnabled = (pmaConfigs & PMA_QUERY_SCRUB_ENABLED) != 0x0;
3435 bTopLevelScrubberConstructed = (pmaConfigs & PMA_QUERY_SCRUB_VALID) != 0x0;
3436 }
3437
3438 if (pbTopLevelScrubberEnabled != NULL)
3439 *pbTopLevelScrubberEnabled = bTopLevelScrubberEnabled;
3440 if (pbTopLevelScrubberConstructed != NULL)
3441 *pbTopLevelScrubberConstructed = bTopLevelScrubberConstructed;
3442 }
3443
3444 /*!
3445 * @brief Return the full address range for the partition assigend for the vGPU.
3446 *
3447 * @param[in] pGpu
3448 * @param[in] pMemoryManager
3449 * @param[out] base reference to the base address of the partition
3450 * @param[out] size reference to the overall size of the partition
3451 */
3452 static void
_memmgrGetFullMIGAddrRange(OBJGPU * pGpu,MemoryManager * pMemoryManager,NvU64 * base,NvU64 * size)3453 _memmgrGetFullMIGAddrRange
3454 (
3455 OBJGPU *pGpu,
3456 MemoryManager *pMemoryManager,
3457 NvU64 *base,
3458 NvU64 *size
3459 )
3460 {
3461 NvU32 i;
3462 NvU64 lo, hi;
3463
3464 *base = 0;
3465 *size = 0;
3466 if (pMemoryManager->Ram.numFBRegions == 0)
3467 {
3468 return;
3469 }
3470
3471 lo = pMemoryManager->Ram.fbRegion[0].base;
3472 hi = pMemoryManager->Ram.fbRegion[0].limit;
3473
3474 for (i = 1; i < pMemoryManager->Ram.numFBRegions; i++)
3475 {
3476 if (pMemoryManager->Ram.fbRegion[i].base < lo)
3477 {
3478 lo = pMemoryManager->Ram.fbRegion[i].base;
3479 }
3480
3481 if (pMemoryManager->Ram.fbRegion[i].limit > hi)
3482 {
3483 hi = pMemoryManager->Ram.fbRegion[i].limit;
3484 }
3485 }
3486
3487 *base = lo;
3488 *size = hi - lo + 1;
3489 }
3490
3491 /*!
3492 * @brief Discover MIG partitionable memory range based on PMA status
3493 */
3494 NV_STATUS
memmgrDiscoverMIGPartitionableMemoryRange_VF(OBJGPU * pGpu,MemoryManager * pMemoryManager,NV_RANGE * pMemoryRange)3495 memmgrDiscoverMIGPartitionableMemoryRange_VF
3496 (
3497 OBJGPU *pGpu,
3498 MemoryManager *pMemoryManager,
3499 NV_RANGE *pMemoryRange
3500 )
3501 {
3502 NvU64 size;
3503 NvU64 base;
3504
3505 // Set memory information
3506 if (!memmgrIsPmaInitialized(pMemoryManager))
3507 {
3508 Heap *pHeap = GPU_GET_HEAP(pGpu);
3509 NvU64 freeMem;
3510 NvU64 bytesTotal;
3511 NvU64 offset;
3512
3513 NV_ASSERT_OK_OR_RETURN(heapInfo(pHeap, &freeMem, &bytesTotal, &base,
3514 &offset, &size));
3515
3516 //
3517 // offset is the starting address of biggest empty block whose size is
3518 // returned and we care about the base of largest empty block
3519 //
3520 base = offset;
3521 }
3522 else
3523 {
3524 //
3525 // In the case of vGPU, pmaGetLargestFree only returns the user-visible
3526 // PMA region and not the reserved/internal regions that constitute the
3527 // overall partition size assigned to the vGPU.
3528 // This is misleading as pMemoryManager->partitionableMemoryRange is expected to
3529 // represent the actual partition size.
3530 //
3531 _memmgrGetFullMIGAddrRange(pGpu, pMemoryManager, &base, &size);
3532 }
3533
3534 *pMemoryRange = rangeMake(base, base + size - 1);
3535
3536 return NV_OK;
3537 }
3538
3539 NV_STATUS
memmgrValidateFBEndReservation_PF(OBJGPU * pGpu,MemoryManager * pMemoryManager)3540 memmgrValidateFBEndReservation_PF
3541 (
3542 OBJGPU *pGpu,
3543 MemoryManager *pMemoryManager
3544 )
3545 {
3546 NV_STATUS status;
3547
3548 NV_ASSERT_TRUE_OR_GOTO(status,
3549 (pGpu != NULL) &&
3550 (pMemoryManager != NULL),
3551 NV_ERR_INVALID_ARGUMENT,
3552 memmgrValidateFBEndReservation_PF_exit);
3553
3554 // If we reserved more memory from RM than we previously estimated
3555 if (pMemoryManager->rsvdMemorySize > memmgrGetFBEndReserveSizeEstimate_HAL(pGpu, pMemoryManager))
3556 {
3557 NV_PRINTF(LEVEL_ERROR,
3558 "End of FB reservation was not enough (%u vs %u). Failing to boot.\n",
3559 memmgrGetFBEndReserveSizeEstimate_HAL(pGpu, pMemoryManager),
3560 pMemoryManager->rsvdMemorySize);
3561
3562 NV_ASSERT_OK_OR_GOTO(status,
3563 NV_ERR_INSUFFICIENT_RESOURCES,
3564 memmgrValidateFBEndReservation_PF_exit);
3565 }
3566
3567 memmgrValidateFBEndReservation_PF_exit:
3568 return status;
3569 }
3570
3571 NV_STATUS
memmgrReserveMemoryForPmu_MONOLITHIC(OBJGPU * pGpu,MemoryManager * pMemoryManager)3572 memmgrReserveMemoryForPmu_MONOLITHIC
3573 (
3574 OBJGPU *pGpu,
3575 MemoryManager *pMemoryManager
3576 )
3577 {
3578 NV_STATUS status = NV_OK;
3579
3580 return status;
3581 }
3582
3583
3584 NV_STATUS
memmgrReserveMemoryForFsp_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager)3585 memmgrReserveMemoryForFsp_IMPL
3586 (
3587 OBJGPU *pGpu,
3588 MemoryManager *pMemoryManager
3589 )
3590 {
3591 KernelFsp *pKernelFsp = GPU_GET_KERNEL_FSP(pGpu);
3592
3593 //
3594 // If we sent FSP commands to boot ACR, we need to allocate the surfaces
3595 // used by FSP and ACR as WPR/FRTS here from the reserved heap
3596 //
3597 if (pKernelFsp && (!pKernelFsp->getProperty(pKernelFsp, PDB_PROP_KFSP_DISABLE_FRTS_VIDMEM) &&
3598 (pKernelFsp->getProperty(pKernelFsp, PDB_PROP_KFSP_BOOT_COMMAND_OK))))
3599 {
3600
3601 // For GSP-RM flow, we don't need to allocate WPR since it is handled by CPU
3602 if (pKernelFsp->getProperty(pKernelFsp, PDB_PROP_KFSP_GSP_MODE_GSPRM))
3603 {
3604 return NV_OK;
3605 }
3606
3607 }
3608 return NV_OK;
3609 }
3610
3611 NvU64
memmgrGetVgpuHostRmReservedFb_KERNEL(OBJGPU * pGpu,MemoryManager * pMemoryManager,NvU32 vgpuTypeId)3612 memmgrGetVgpuHostRmReservedFb_KERNEL
3613 (
3614 OBJGPU *pGpu,
3615 MemoryManager *pMemoryManager,
3616 NvU32 vgpuTypeId
3617 )
3618 {
3619 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
3620 NV2080_CTRL_INTERNAL_MEMMGR_GET_VGPU_CONFIG_HOST_RESERVED_FB_PARAMS params = {0};
3621
3622 params.vgpuTypeId = vgpuTypeId;
3623 // Send to GSP to get amount of FB reserved for the host
3624 NV_ASSERT_OK_OR_RETURN(pRmApi->Control(pRmApi,
3625 pGpu->hInternalClient,
3626 pGpu->hInternalSubdevice,
3627 NV2080_CTRL_CMD_INTERNAL_MEMMGR_GET_VGPU_CONFIG_HOST_RESERVED_FB,
3628 ¶ms,
3629 sizeof(params)));
3630 return params.hostReservedFb;
3631 }
3632
3633 /*!
3634 * @brief Memory Manager State post load
3635 *
3636 * @param[in] pGpu GPU pointer
3637 * @param[in/out] pMemoryManager MemoryManager pointer
3638 * @param[in] flags State transition flags
3639 *
3640 * @returns On success, returns NV_OK.
3641 * On failure, returns error code.
3642 */
3643 NV_STATUS
memmgrStatePostLoad_IMPL(OBJGPU * pGpu,MemoryManager * pMemoryManager,NvU32 flags)3644 memmgrStatePostLoad_IMPL
3645 (
3646 OBJGPU *pGpu,
3647 MemoryManager *pMemoryManager,
3648 NvU32 flags
3649 )
3650 {
3651 if (memmgrIsLocalEgmSupported(pMemoryManager))
3652 {
3653 NvU64 egmPhysAddr, egmSize;
3654 NvS32 egmNodeId;
3655 NvU32 data32;
3656 KernelBif *pKernelBif = GPU_GET_KERNEL_BIF(pGpu);
3657
3658 pMemoryManager->localEgmNodeId = -1;
3659 if (gpuIsSelfHosted(pGpu) &&
3660 pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_IS_C2C_LINK_UP) && // EGM can be enabled only in C2C path.
3661 (osGetEgmInfo(pGpu, &egmPhysAddr, &egmSize, &egmNodeId) == NV_OK) &&
3662 (egmSize != 0))
3663 {
3664 pMemoryManager->localEgmBasePhysAddr = egmPhysAddr;
3665 pMemoryManager->localEgmSize = egmSize;
3666 pMemoryManager->localEgmNodeId = egmNodeId;
3667 //
3668 // Using fixed Peer ID 7 for local EGM so that vGPU
3669 // migration doesn't fail because of peer id conflict in
3670 // the new host system.
3671 //
3672 pMemoryManager->localEgmPeerId = 7;
3673 pMemoryManager->bLocalEgmEnabled = NV_TRUE;
3674 }
3675
3676 //
3677 // regkey can override the production flow values.
3678 // Note that this could cause an issue with vGPU migration
3679 // if one host system uses regkey to override the EGM peer id
3680 // and other host system doesn't.
3681 //
3682 if (osReadRegistryDword(pGpu, NV_REG_STR_RM_ENABLE_LOCAL_EGM_PEER_ID, &data32) == NV_OK)
3683 {
3684 pMemoryManager->bLocalEgmEnabled = NV_TRUE;
3685 pMemoryManager->localEgmPeerId = data32;
3686 }
3687 }
3688
3689 //
3690 // Reserve the peerID used for local EGM so that the peerID isn't
3691 // resused for other peer Gpus.
3692 //
3693 if (memmgrIsLocalEgmEnabled(pMemoryManager))
3694 {
3695 if (kbusReserveP2PPeerIds_HAL(pGpu, GPU_GET_KERNEL_BUS(pGpu), NVBIT(pMemoryManager->localEgmPeerId)) == NV_OK)
3696 {
3697 if (!IS_VIRTUAL_WITH_SRIOV(pGpu))
3698 {
3699 NV2080_CTRL_INTERNAL_HSHUB_EGM_CONFIG_PARAMS params = { 0 };
3700 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
3701 NV_STATUS status;
3702
3703 params.egmPeerId = pMemoryManager->localEgmPeerId;
3704
3705 // Call physical HSHUB to program the EGM PeerId settings.
3706
3707 status = pRmApi->Control(pRmApi,
3708 pGpu->hInternalClient,
3709 pGpu->hInternalSubdevice,
3710 NV2080_CTRL_CMD_INTERNAL_HSHUB_EGM_CONFIG,
3711 ¶ms,
3712 sizeof(params));
3713 if (status != NV_OK)
3714 {
3715 NV_PRINTF(LEVEL_ERROR, "HSHUB programming failed for EGM Peer ID: %u\n",
3716 pMemoryManager->localEgmPeerId);
3717 pMemoryManager->bLocalEgmEnabled = NV_FALSE;
3718 pMemoryManager->localEgmPeerId = BUS_INVALID_PEER;
3719 return status;
3720 }
3721 }
3722 }
3723 else
3724 {
3725 NV_PRINTF(LEVEL_ERROR,
3726 "Peer ID specified for local EGM already in use!\n");
3727 pMemoryManager->bLocalEgmEnabled = NV_FALSE;
3728 pMemoryManager->localEgmPeerId = BUS_INVALID_PEER;
3729 return NV_ERR_INVALID_ARGUMENT;
3730 }
3731 }
3732 else
3733 {
3734 pMemoryManager->localEgmPeerId = BUS_INVALID_PEER;
3735 }
3736 return NV_OK;
3737 }
3738
3739 NV_STATUS
memmgrInitCeUtils_IMPL(MemoryManager * pMemoryManager,NvBool bFifoLite)3740 memmgrInitCeUtils_IMPL
3741 (
3742 MemoryManager *pMemoryManager,
3743 NvBool bFifoLite
3744 )
3745 {
3746 OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager);
3747 NV0050_ALLOCATION_PARAMETERS ceUtilsParams = {0};
3748
3749 NV_ASSERT_OR_RETURN(pMemoryManager->pCeUtils == NULL, NV_ERR_INVALID_STATE);
3750
3751 if (!bFifoLite && pMemoryManager->pCeUtilsSuspended != NULL)
3752 {
3753 pMemoryManager->pCeUtils = pMemoryManager->pCeUtilsSuspended;
3754 pMemoryManager->pCeUtilsSuspended = NULL;
3755 return NV_OK;
3756 }
3757
3758 if (bFifoLite)
3759 ceUtilsParams.flags |= DRF_DEF(0050_CEUTILS, _FLAGS, _FIFO_LITE, _TRUE);
3760
3761 if (pMemoryManager->bCePhysicalVidmemAccessNotSupported)
3762 ceUtilsParams.flags |= DRF_DEF(0050_CEUTILS, _FLAGS, _VIRTUAL_MODE, _TRUE);
3763
3764 NV_ASSERT_OK_OR_RETURN(objCreate(&pMemoryManager->pCeUtils, pMemoryManager, CeUtils, ENG_GET_GPU(pMemoryManager), NULL, &ceUtilsParams));
3765
3766 NV_STATUS status = memmgrTestCeUtils(pGpu, pMemoryManager);
3767 NV_ASSERT_OK(status);
3768 if (status != NV_OK)
3769 {
3770 memmgrDestroyCeUtils(pMemoryManager, NV_FALSE);
3771 }
3772
3773 return status;
3774 }
3775
3776 void
memmgrDestroyCeUtils_IMPL(MemoryManager * pMemoryManager,NvBool bSuspendCeUtils)3777 memmgrDestroyCeUtils_IMPL
3778 (
3779 MemoryManager *pMemoryManager,
3780 NvBool bSuspendCeUtils
3781 )
3782 {
3783 if (bSuspendCeUtils)
3784 {
3785 NV_ASSERT_OR_RETURN_VOID(pMemoryManager->pCeUtilsSuspended == NULL);
3786 pMemoryManager->pCeUtilsSuspended = pMemoryManager->pCeUtils;
3787 }
3788 else
3789 {
3790 objDelete(pMemoryManager->pCeUtils);
3791 }
3792 pMemoryManager->pCeUtils = NULL;
3793 }
3794