1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "gpu/mem_mgr/mem_mgr.h"
25 #include "gpu/mem_mgr/heap.h"
26 #include "gpu/mem_sys/kern_mem_sys.h"
27 #include "gpu/mem_mgr/mem_utils.h"
28 #include "gpu/mem_mgr/ce_utils.h"
29 #include "mem_mgr/video_mem.h"
30 #include "gpu/mem_mgr/fbsr.h"
31 #include "gpu/mmu/kern_gmmu.h"
32 #include "gpu/bus/kern_bus.h"
33 #include "gpu/bif/kernel_bif.h"
34 #include "core/locks.h"
35 #include "virtualization/kernel_vgpu_mgr.h"
36 #include "vgpu/rpc.h"
37 #include "core/thread_state.h"
38 #include "nvRmReg.h"
39 #include "gpu/fsp/kern_fsp.h"
40 #include "gpu/pmu/kern_pmu.h"
41 #include "gpu/mem_mgr/phys_mem_allocator/numa.h"
42 #include "kernel/gpu/mig_mgr/kernel_mig_manager.h"
43 #include "kernel/rmapi/rs_utils.h"
44 #include "rmapi/rmapi_utils.h"
45 #include "mmu/gmmu_fmt.h"
46 #include "class/cl0040.h" // NV01_MEMORY_LOCAL_USER
47 #include "class/cl503c.h"
48 #include "class/cl906f.h" // GF100_CHANNEL_GPFIFO
49 #include "os/os.h"
50 #include "gpu/gsp/kernel_gsp.h"
51 #include "gpu/conf_compute/conf_compute.h"
52 
53 #include "class/cl0050.h"
54 
55 static NV_STATUS _memmgrCreateFBSR(MemoryManager *pMemoryManager, NvU32);
56 static NV_STATUS _memmgrCreateChildObjects(MemoryManager *pMemoryManager);
57 static void _memmgrInitRegistryOverrides(OBJGPU *pGpu, MemoryManager *pMemoryManager);
58 static NV_STATUS _memmgrInitMIGMemoryPartitionHeap(OBJGPU *pGpu, MemoryManager *pMemoryManager,
59                                                    NvU32 swizzId, NV_RANGE *pAddrRange,
60                                                    Heap **ppMemoryPartitionHeap);
61 static NV_STATUS _memmgrAllocInternalClientObjects(OBJGPU *pGpu,
62                                                    MemoryManager *pMemoryManager);
63 static void _memmgrFreeInternalClientObjects(MemoryManager *pMemoryManager);
64 
65 #define MEMUTILS_CHANNEL_GPFIFO_SIZE                  (NV906F_GP_ENTRY__SIZE * MEMUTILS_NUM_GPFIFIO_ENTRIES)
66 
67 NV_STATUS
68 memmgrConstructEngine_IMPL
69 (
70     OBJGPU        *pGpu,
71     MemoryManager *pMemoryManager,
72     ENGDESCRIPTOR  engDesc
73 )
74 {
75     NV_STATUS rmStatus;
76 
77     pMemoryManager->overrideInitHeapMin = 0;
78     pMemoryManager->overrideHeapMax     = ~0ULL;
79     pMemoryManager->Ram.fbOverrideSizeMb = ~0ULL;
80 
81     // Create the children
82     rmStatus = _memmgrCreateChildObjects(pMemoryManager);
83     if (rmStatus != NV_OK)
84         return rmStatus;
85 
86     pMemoryManager->MIGMemoryPartitioningInfo.hClient = NV01_NULL_OBJECT;
87     pMemoryManager->MIGMemoryPartitioningInfo.hDevice = NV01_NULL_OBJECT;
88     pMemoryManager->MIGMemoryPartitioningInfo.hSubdevice = NV01_NULL_OBJECT;
89     pMemoryManager->MIGMemoryPartitioningInfo.partitionableMemoryRange = NV_RANGE_EMPTY;
90 
91     return NV_OK;
92 }
93 
94 void
95 memmgrDestruct_IMPL
96 (
97     MemoryManager *pMemoryManager
98 )
99 {
100     NvU32 i;
101 
102     for (i = 0; i < NUM_FBSR_TYPES; i++)
103     {
104         objDelete(pMemoryManager->pFbsr[i]);
105         pMemoryManager->pFbsr[i] = NULL;
106     }
107 
108     objDelete(pMemoryManager->pHeap);
109     pMemoryManager->pHeap = NULL;
110 
111     pMemoryManager->MIGMemoryPartitioningInfo.partitionableMemoryRange = NV_RANGE_EMPTY;
112 }
113 
114 static void
115 _memmgrInitRegistryOverrides(OBJGPU *pGpu, MemoryManager *pMemoryManager)
116 {
117     NvU32 data32;
118 
119     // Check for ram size override.
120     if ((osReadRegistryDword(pGpu, NV_REG_STR_OVERRIDE_FB_SIZE, &data32) == NV_OK) &&
121         (data32 != 0))
122     {
123         NV_PRINTF(LEVEL_WARNING, "Regkey %s = %dM\n",
124                   NV_REG_STR_OVERRIDE_FB_SIZE, data32);
125         // Used to override heap sizing at create
126         pMemoryManager->Ram.fbOverrideSizeMb = data32;
127     }
128     else
129     {
130         pMemoryManager->Ram.fbOverrideSizeMb = ~0ULL;
131     }
132 
133     //
134     // Scrub on Free is enabled by default for GK110+
135     // The reg key will be used to disable the scrub on free
136     //
137     if ((osReadRegistryDword(pGpu, NV_REG_STR_RM_DISABLE_SCRUB_ON_FREE,
138                              &data32) == NV_OK) && data32)
139     {
140         pMemoryManager->bScrubOnFreeEnabled = NV_FALSE;
141     }
142 
143     if ((osReadRegistryDword(pGpu, NV_REG_STR_RM_DISABLE_FAST_SCRUBBER,
144                              &data32) == NV_OK) && data32)
145     {
146         pMemoryManager->bFastScrubberEnabled = NV_FALSE;
147     }
148 
149     if (NV_OK == osReadRegistryDword(pGpu, NV_REG_STR_RM_SYSMEM_PAGE_SIZE, &data32))
150     {
151         switch (data32)
152         {
153             case RM_PAGE_SIZE:
154             case RM_PAGE_SIZE_64K:
155             case RM_PAGE_SIZE_HUGE:
156             case RM_PAGE_SIZE_512M:
157                 break;
158             default:
159                 NV_ASSERT(0);
160                 NV_PRINTF(LEVEL_ERROR,
161                           "Sysmem page size 0x%x not supported! Defaulting to 4KB\n",
162                           data32);
163                 data32 = RM_PAGE_SIZE;
164         }
165         pMemoryManager->sysmemPageSize = data32;
166     }
167     else
168     {
169         pMemoryManager->sysmemPageSize = RM_PAGE_SIZE;
170 
171     }
172 
173     if (osReadRegistryDword(pGpu, NV_REG_STR_RM_ALLOW_SYSMEM_LARGE_PAGES, &data32) == NV_OK)
174     {
175         pMemoryManager->bAllowSysmemHugePages = data32 ? NV_TRUE : NV_FALSE;
176     }
177     else
178     {
179         pMemoryManager->bAllowSysmemHugePages = NV_FALSE;
180     }
181 
182     // This key should not be used on physical (GSP) RM.
183     if (!RMCFG_FEATURE_PLATFORM_GSP)
184     {
185         // Allow user to increase size of RM reserved heap via a regkey
186         if (osReadRegistryDword(pGpu, NV_REG_STR_RM_INCREASE_RSVD_MEMORY_SIZE_MB,
187                                 &data32) == NV_OK)
188         {
189             pMemoryManager->rsvdMemorySizeIncrement = (NvU64)data32 << 20;
190             NV_PRINTF(LEVEL_ERROR,
191                       "User specified increase in reserved size = %d MBs\n",
192                       data32);
193         }
194     }
195 
196     if (osReadRegistryDword(pGpu,
197                             NV_REG_STR_RM_DISABLE_NONCONTIGUOUS_ALLOCATION,
198                             &data32) == NV_OK)
199     {
200         if (data32 == NV_REG_STR_RM_DISABLE_NONCONTIGUOUS_ALLOCATION_TRUE)
201         {
202             pMemoryManager->bAllowNoncontiguousAllocation = NV_FALSE;
203         }
204     }
205 
206     if (osReadRegistryDword(pGpu, NV_REG_STR_RM_FBSR_PAGED_DMA, &data32) == NV_OK)
207     {
208         pMemoryManager->bEnableFbsrPagedDma = !!data32;
209     }
210 
211     if (osReadRegistryDword(pGpu, NV_REG_STR_RM_FBSR_FILE_MODE, &data32) == NV_OK)
212     {
213         if (data32 && RMCFG_FEATURE_PLATFORM_UNIX)
214         {
215             pMemoryManager->bEnableFbsrFileMode = NV_TRUE;
216         }
217     }
218 
219     //
220     // Override PMA enable.  PDB_PROP_FB_PMA_ENABLED is reconciled with
221     // PDB_PROP_FB_PLATFORM_PMA_SUPPORT to decide whether to enable PMA.
222     //
223     if (osReadRegistryDword(pGpu, NV_REG_STR_RM_ENABLE_PMA, &data32) == NV_OK)
224     {
225         if (data32 == NV_REG_STR_RM_ENABLE_PMA_YES)
226         {
227             pMemoryManager->bPmaEnabled = NV_TRUE;
228         }
229         else
230         {
231             pMemoryManager->bPmaEnabled = NV_FALSE;
232         }
233     }
234 
235     if (RMCFG_FEATURE_PLATFORM_WINDOWS_LDDM && !pGpu->getProperty(pGpu, PDB_PROP_GPU_IN_TCC_MODE))
236     {
237         pMemoryManager->bFbsrWddmModeEnabled = NV_TRUE;
238     }
239 
240     if (osReadRegistryDword(pGpu, NV_REG_STR_RM_FBSR_WDDM_MODE, &data32) == NV_OK)
241     {
242         pMemoryManager->bFbsrWddmModeEnabled = !!data32;
243     }
244 
245     //
246     // Override PMA managed client page tables.
247     // NOTE: This is WAR for bug #s 1946145 and 1971628.
248     // This should be removed as part of heap removal and PMA refactor.
249     //
250     if (osReadRegistryDword(pGpu, NV_REG_STR_RM_ENABLE_PMA_MANAGED_PTABLES,
251                             &data32) == NV_OK)
252     {
253         if (data32 == NV_REG_STR_RM_ENABLE_PMA_MANAGED_PTABLES_NO)
254         {
255             memmgrSetClientPageTablesPmaManaged(pMemoryManager, NV_FALSE);
256         }
257     }
258 
259     if (osReadRegistryDword(pGpu, NV_REG_STR_RM_ENABLE_ADDRTREE, &data32) == NV_OK)
260     {
261         if (data32 == NV_REG_STR_RM_ENABLE_ADDRTREE_YES)
262         {
263             pMemoryManager->bPmaAddrTree = NV_TRUE;
264             NV_PRINTF(LEVEL_ERROR, "Enabled address tree for PMA via regkey.\n");
265         }
266     }
267     else if (RMCFG_FEATURE_PLATFORM_MODS)
268     {
269         pMemoryManager->bPmaAddrTree = NV_TRUE;
270         NV_PRINTF(LEVEL_ERROR, "Enabled address tree for PMA for MODS.\n");
271     }
272 
273     if (osReadRegistryDword(pGpu, NV_REG_STR_DISABLE_GLOBAL_CE_UTILS, &data32) == NV_OK &&
274         data32 == NV_REG_STR_DISABLE_GLOBAL_CE_UTILS_YES)
275     {
276         pMemoryManager->bDisableGlobalCeUtils = NV_TRUE;
277     }
278 }
279 
280 NV_STATUS
281 memmgrStatePreInitLocked_IMPL
282 (
283     OBJGPU        *pGpu,
284     MemoryManager *pMemoryManager
285 )
286 {
287     if (IS_GSP_CLIENT(pGpu))
288     {
289         //
290         // Temporary hack to get OpenRM working without breaking SLI
291         // After fixing CORERM-4078, memmgrInitFbRegions() call should be removed from memsysStateInitLocked()
292         // and only left here
293         //
294         NV_ASSERT_OK_OR_RETURN(memmgrInitFbRegions(pGpu, pMemoryManager));
295     }
296 
297     // Determine the size of reserved memory
298     NV_ASSERT_OK_OR_RETURN(memmgrPreInitReservedMemory_HAL(pGpu, pMemoryManager));
299 
300     return NV_OK;
301 }
302 
303 static NV_STATUS
304 memmgrTestCeUtils
305 (
306     OBJGPU        *pGpu,
307     MemoryManager *pMemoryManager
308 )
309 {
310     MEMORY_DESCRIPTOR *pVidMemDesc   = NULL;
311     MEMORY_DESCRIPTOR *pSysMemDesc   = NULL;
312     TRANSFER_SURFACE   vidSurface    = {0};
313     TRANSFER_SURFACE   sysSurface    = {0};
314     NvU32              vidmemData    = 0xAABBCCDD;
315     NvU32              sysmemData    = 0x11223345;
316     NV_STATUS          status;
317 
318     NV_ASSERT_OR_RETURN(pMemoryManager->pCeUtils != NULL, NV_ERR_INVALID_STATE);
319 
320     if (pMemoryManager->pCeUtils->pLiteKernelChannel != NULL)
321     {
322         //
323         // BUG 4167899: Temporarily skip test in case of lite mode
324         // It sometimes fails when called from acrGatherWprInformation_GM200()
325         // However, ACR is initialized without issues
326         //
327         return NV_OK;
328     }
329 
330     NV_ASSERT_OK_OR_GOTO(status,
331         memdescCreate(&pVidMemDesc, pGpu, sizeof vidmemData, RM_PAGE_SIZE, NV_TRUE, ADDR_FBMEM,
332                       NV_MEMORY_UNCACHED, MEMDESC_FLAGS_NONE),
333         failed);
334     memdescTagAlloc(status,
335                     NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_19, pVidMemDesc);
336     NV_ASSERT_OK_OR_GOTO(status, status, failed);
337     vidSurface.pMemDesc = pVidMemDesc;
338 
339     NV_ASSERT_OK_OR_GOTO(status,
340         memdescCreate(&pSysMemDesc, pGpu, sizeof sysmemData, 0, NV_TRUE, ADDR_SYSMEM,
341                       NV_MEMORY_UNCACHED, MEMDESC_FLAGS_NONE),
342         failed);
343     memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_138,
344                     pSysMemDesc);
345     NV_ASSERT_OK_OR_GOTO(status, status, failed);
346     sysSurface.pMemDesc = pSysMemDesc;
347 
348     NV_ASSERT_OK_OR_GOTO(status, memmgrMemWrite(pMemoryManager, &vidSurface, &vidmemData, sizeof vidmemData, TRANSFER_FLAGS_NONE),      failed);
349     NV_ASSERT_OK_OR_GOTO(status, memmgrMemWrite(pMemoryManager, &sysSurface, &sysmemData, sizeof sysmemData, TRANSFER_FLAGS_NONE),      failed);
350     NV_ASSERT_OK_OR_GOTO(status, memmgrMemCopy (pMemoryManager, &sysSurface, &vidSurface, sizeof vidmemData, TRANSFER_FLAGS_PREFER_CE), failed);
351     NV_ASSERT_OK_OR_GOTO(status, memmgrMemRead (pMemoryManager, &sysSurface, &sysmemData, sizeof sysmemData, TRANSFER_FLAGS_NONE),      failed);
352     NV_ASSERT_TRUE_OR_GOTO(status, sysmemData == vidmemData, NV_ERR_INVALID_STATE, failed);
353 
354 failed:
355     memdescFree(pVidMemDesc);
356     memdescDestroy(pVidMemDesc);
357     memdescFree(pSysMemDesc);
358     memdescDestroy(pSysMemDesc);
359 
360     return status;
361 }
362 
363 NV_STATUS
364 memmgrInitInternalChannels_IMPL
365 (
366     OBJGPU        *pGpu,
367     MemoryManager *pMemoryManager
368 )
369 {
370     NV_ASSERT_OK_OR_RETURN(memmgrScrubHandlePostSchedulingEnable_HAL(pGpu, pMemoryManager));
371 
372     if (pMemoryManager->bDisableGlobalCeUtils ||
373         pGpu->getProperty(pGpu, PDB_PROP_GPU_BROKEN_FB) ||
374         pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_ALL_INST_IN_SYSMEM) ||
375         pGpu->getProperty(pGpu, PDB_PROP_GPU_ZERO_FB) ||
376         gpuIsCacheOnlyModeEnabled(pGpu) ||
377         (IS_VIRTUAL(pGpu) && !IS_VIRTUAL_WITH_FULL_SRIOV(pGpu)) ||
378         IS_SIMULATION(pGpu) ||
379         IsDFPGA(pGpu))
380     {
381         NV_PRINTF(LEVEL_INFO, "Skipping global CeUtils creation (unsupported platform)\n");
382 
383         return NV_OK;
384     }
385 
386     if (pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_VIRTUALIZATION_MODE_HOST_VGPU) ||
387         !memmgrIsPmaInitialized(pMemoryManager) ||
388         RMCFG_FEATURE_PLATFORM_GSP ||
389         RMCFG_FEATURE_PLATFORM_WINDOWS ||
390         IS_MIG_ENABLED(pGpu) ||
391         gpuIsCCorApmFeatureEnabled(pGpu) ||
392         IsSLIEnabled(pGpu) ||
393         RMCFG_FEATURE_ARCH_PPC64LE ||
394         RMCFG_FEATURE_ARCH_AARCH64)
395     {
396         // BUG 4167899: Temporarily skip CeUtils creation on platforms where it fails
397         NV_PRINTF(LEVEL_INFO, "Skipping global CeUtils creation\n");
398 
399         return NV_OK;
400     }
401 
402     NV_PRINTF(LEVEL_INFO, "Initializing global CeUtils instance\n");
403 
404     NV_ASSERT_OK_OR_RETURN(memmgrInitCeUtils(pMemoryManager, NV_FALSE));
405 
406     return NV_OK;
407 }
408 
409 NV_STATUS
410 memmgrDestroyInternalChannels_IMPL
411 (
412     OBJGPU        *pGpu,
413     MemoryManager *pMemoryManager
414 )
415 {
416     NV_PRINTF(LEVEL_INFO, "Destroying global CeUtils instance\n");
417 
418     memmgrDestroyCeUtils(pMemoryManager, NV_FALSE);
419 
420     NV_ASSERT_OK_OR_RETURN(memmgrScrubHandlePreSchedulingDisable_HAL(pGpu, pMemoryManager));
421 
422     return NV_OK;
423 }
424 
425 static NV_STATUS
426 memmgrPostSchedulingEnableHandler
427 (
428     OBJGPU *pGpu,
429     void   *pUnusedData
430 )
431 {
432     return memmgrInitInternalChannels(pGpu, GPU_GET_MEMORY_MANAGER(pGpu));
433 }
434 
435 static NV_STATUS
436 memmgrPreSchedulingDisableHandler
437 (
438     OBJGPU *pGpu,
439     void   *pUnusedData
440 )
441 {
442     return memmgrDestroyInternalChannels(pGpu, GPU_GET_MEMORY_MANAGER(pGpu));
443 }
444 
445 NV_STATUS
446 memmgrStateInitLocked_IMPL
447 (
448     OBJGPU        *pGpu,
449     MemoryManager *pMemoryManager
450 )
451 {
452     NV_STATUS status = NV_OK;
453     NvU32     i;
454     NvBool    bDynamicPageOffliningDisable = NV_FALSE;
455 
456     NV_ASSERT_OK_OR_RETURN(memmgrInitReservedMemory_HAL(pGpu, pMemoryManager, pMemoryManager->Ram.fbAddrSpaceSizeMb << 20));
457 
458     _memmgrInitRegistryOverrides(pGpu, pMemoryManager);
459 
460     //
461     // Enable dynamic page blacklisting at this point before we call CreateHeap
462     // since it internally calls heapGetBlacklistPages which depends on this property
463     //
464     if (!bDynamicPageOffliningDisable)
465         memmgrEnableDynamicPageOfflining_HAL(pGpu, pMemoryManager);
466 
467     memmgrScrubRegistryOverrides_HAL(pGpu, pMemoryManager);
468 
469     memmgrScrubInit_HAL(pGpu, pMemoryManager);
470     NV_ASSERT_OK_OR_RETURN(kfifoAddSchedulingHandler(pGpu,
471                 GPU_GET_KERNEL_FIFO(pGpu),
472                 memmgrPostSchedulingEnableHandler, NULL,
473                 memmgrPreSchedulingDisableHandler, NULL));
474 
475     //
476     // Allocate framebuffer heap.  All memory must be allocated from here to keep the world
477     // consistent (N.B. the heap size has been reduced by the amount of instance memory).
478     //
479     status = memmgrCreateHeap(pMemoryManager);
480     if (status != NV_OK)
481     {
482         return status;
483     }
484 
485     //
486     // Just set up the memory pool now (basic init stuff). Actual physical
487     // frames are *NOT* added to the pool at this stage.
488     //
489     status = memmgrPageLevelPoolsCreate(pGpu, pMemoryManager);
490     if (status != NV_OK)
491     {
492         return status;
493     }
494 
495     // RMCONFIG: only if FBSR engine is enabled
496     if (RMCFG_MODULE_FBSR)
497     {
498         //
499         // If a configuration is not supported, do not initialize
500         // the corresponding fbsr engine.
501         //
502         if (pMemoryManager->bFbsrWddmModeEnabled)
503         {
504             pMemoryManager->fbsrStartMode = FBSR_TYPE_WDDM_FAST_DMA_DEFERRED_NONPAGED;
505         }
506         else if (pMemoryManager->bEnableFbsrPagedDma)
507         {
508             pMemoryManager->fbsrStartMode = FBSR_TYPE_PAGED_DMA;
509         }
510         else if (pMemoryManager->bEnableFbsrFileMode)
511         {
512             pMemoryManager->fbsrStartMode = FBSR_TYPE_FILE;
513         }
514         else
515         {
516             pMemoryManager->fbsrStartMode = FBSR_TYPE_PERSISTENT;
517         }
518 
519         for (i = pMemoryManager->fbsrStartMode; i < NUM_FBSR_TYPES; i++)
520         {
521             if (!pMemoryManager->bPersistentStandbyBuffer &&
522                 (i == FBSR_TYPE_PERSISTENT))
523             {
524                 continue;
525             }
526 
527             if (pGpu->getProperty(pGpu, PDB_PROP_GPU_BROKEN_FB) &&
528                 (i == FBSR_TYPE_PAGED_DMA || i == FBSR_TYPE_DMA))
529             {
530                 continue;
531             }
532 
533             status = fbsrInit_HAL(pGpu, pMemoryManager->pFbsr[i]);
534 
535             //
536             // If one fbsr scheme failed, proceed to initializing the other
537             // fallback options.
538             //
539             if (status != NV_OK)
540             {
541                 NV_PRINTF(LEVEL_WARNING,
542                           "fbsrInit failed for supported type %d suspend-resume scheme\n",
543                           i);
544                 continue;
545             }
546         }
547     }
548 
549     status = _memmgrAllocInternalClientObjects(pGpu, pMemoryManager);
550     if (status != NV_OK)
551     {
552         //
553         // TODO: Bug 3482892: Need a way to roll back StateInit
554         //       steps in case of a failure
555         // WAR for now is to cleanup with memmgrStateDestroy().
556         //
557         memmgrStateDestroy(pGpu, pMemoryManager);
558         return status;
559     }
560 
561     return NV_OK;
562 }
563 
564 NV_STATUS
565 memmgrVerifyGspDmaOps_IMPL
566 (
567     OBJGPU        *pGpu,
568     MemoryManager *pMemoryManager
569 )
570 {
571     KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
572     NV_STATUS status = NV_OK;
573     MEMORY_DESCRIPTOR *pMemDesc;
574     NvU8 *pTestBuffer;
575     NvU32 testData = 0xdeadbeef;
576     TRANSFER_SURFACE surf = {0};
577 
578     //
579     // Return early if CPU access to CPR vidmem is allowed as GSP DMA
580     // is not needed in this case
581     //
582     if (!kbusIsBarAccessBlocked(pKernelBus))
583         return NV_OK;
584 
585     pTestBuffer = portMemAllocNonPaged(4096);
586     NV_ASSERT_OR_RETURN(pTestBuffer != NULL, NV_ERR_INSUFFICIENT_RESOURCES);
587 
588     portMemSet(pTestBuffer, 0, 4096);
589 
590     status = memdescCreate(&pMemDesc, pGpu, RM_PAGE_SIZE, RM_PAGE_SIZE,
591                            NV_TRUE, ADDR_FBMEM, NV_MEMORY_UNCACHED, 0);
592     NV_ASSERT_OR_RETURN(status == NV_OK, status);
593 
594     memdescTagAlloc(status,
595                     NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_20, pMemDesc);
596     NV_ASSERT_OR_GOTO(status == NV_OK, failed);
597 
598     surf.pMemDesc = pMemDesc;
599     surf.offset = sizeof(NvU32); // Choosing a random offset
600 
601     // Write test data to FB using GSP
602     status = memmgrMemWrite(pMemoryManager, &surf, &testData, sizeof(NvU32),
603                             TRANSFER_FLAGS_NONE);
604     NV_ASSERT_OR_GOTO(status == NV_OK, failed);
605 
606     // Read the same location using GSP and confirm that GSP read is also working fine
607     status = memmgrMemRead(pMemoryManager, &surf, pTestBuffer, sizeof(NvU32),
608                            TRANSFER_FLAGS_NONE);
609     NV_ASSERT_OR_GOTO(status == NV_OK, failed);
610 
611     if (((NvU32*)pTestBuffer)[0] != testData)
612     {
613         NV_PRINTF(LEVEL_ERROR, "####################################################\n");
614         NV_PRINTF(LEVEL_ERROR, "    Read back of data using GSP shows mismatch\n");
615         NV_PRINTF(LEVEL_ERROR, "    Test data: 0x%x Read Data: 0x%x\n", testData, ((NvU32*)pTestBuffer)[0]);
616         NV_PRINTF(LEVEL_ERROR, "####################################################\n");
617         status = NV_ERR_INVALID_STATE;
618         NV_ASSERT_OR_GOTO(status == NV_OK, failed);
619     }
620     else
621     {
622         NV_PRINTF(LEVEL_INFO, "####################################################\n");
623         NV_PRINTF(LEVEL_INFO, "    Read back of data using GSP confirms write\n");
624         NV_PRINTF(LEVEL_INFO, "####################################################\n");
625     }
626 
627 failed:
628     memdescFree(pMemDesc);
629     memdescDestroy(pMemDesc);
630     portMemFree(pTestBuffer);
631 
632     return status;
633 }
634 
635 NV_STATUS
636 memmgrStateLoad_IMPL
637 (
638     OBJGPU *pGpu,
639     MemoryManager *pMemoryManager,
640     NvU32 flags
641 )
642 {
643     // If fbOverrideSizeMb is set, finish setting up the FB parameters now that state init has finished
644     memmgrFinishHandleSizeOverrides_HAL(pGpu, pMemoryManager);
645 
646     if ((flags & GPU_STATE_FLAGS_PRESERVING) &&
647         !(flags & GPU_STATE_FLAGS_GC6_TRANSITION))
648     {
649         //
650         // Only do initialization scrubs (i.e. RM reserved region) on
651         // non-GC6 transitions since GC6 cycles leave FB powered.
652         //
653         memmgrScrubInit_HAL(pGpu, pMemoryManager);
654     }
655 
656     // Dump FB regions
657     memmgrDumpFbRegions(pGpu, pMemoryManager);
658 
659     return NV_OK;
660 }
661 
662 NV_STATUS
663 memmgrStatePreUnload_IMPL
664 (
665     OBJGPU *pGpu,
666     MemoryManager *pMemoryManager,
667     NvU32 flags
668 )
669 {
670     KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
671 
672     NV_ASSERT((flags & GPU_STATE_FLAGS_PRESERVING) || pMemoryManager->zbcSurfaces == 0);
673 
674     if (flags & GPU_STATE_FLAGS_PRESERVING)
675     {
676         //
677         // fifo won't send a PreSchedulingDisable callback on StateUnload
678         // destroy the channel manually, so that a CeUtils lite instance can be created for FBSR
679         //
680         memmgrDestroyCeUtils(pMemoryManager, NV_TRUE);
681     }
682 
683     if (memmgrIsPmaEnabled(pMemoryManager) &&
684         memmgrIsPmaSupportedOnPlatform(pMemoryManager) &&
685         osNumaOnliningEnabled(pGpu->pOsGpuInfo) &&
686         pKernelMemorySystem->memPartitionNumaInfo[0].bInUse)
687     {
688         pmaNumaOfflined(&pMemoryManager->pHeap->pmaObject);
689     }
690 
691     return NV_OK;
692 }
693 
694 NV_STATUS
695 memmgrStateUnload_IMPL
696 (
697     OBJGPU *pGpu,
698     MemoryManager *pMemoryManager,
699     NvU32 flags
700 )
701 {
702     if ((flags & GPU_STATE_FLAGS_PRESERVING) &&
703         !(flags & GPU_STATE_FLAGS_GC6_TRANSITION))
704     {
705         //
706         // Initialiation scrubs only happen during StateLoad on non-GC6
707         // transitions.
708         //
709         memmgrScrubDestroy_HAL(pGpu, pMemoryManager);
710     }
711 
712     return NV_OK;
713 }
714 
715 void
716 memmgrStateDestroy_IMPL
717 (
718     OBJGPU        *pGpu,
719     MemoryManager *pMemoryManager
720 )
721 {
722     KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
723     Heap               *pHeap               = MEMORY_MANAGER_GET_HEAP(pMemoryManager);
724     NvU32               i;
725 
726     _memmgrFreeInternalClientObjects(pMemoryManager);
727 
728     // Destroys the SW state of the page level pools
729     memmgrPageLevelPoolsDestroy(pGpu, pMemoryManager);
730 
731     // Destroy the heap entirely, and all associated structures
732     if (pHeap)
733     {
734         kmemsysPreHeapDestruct_HAL(pGpu, pKernelMemorySystem);
735 
736         objDelete(pHeap);
737         pMemoryManager->pHeap = NULL;
738     }
739 
740     // RMCONFIG: only if FBSR engine is enabled
741     if (RMCFG_MODULE_FBSR)
742     {
743         // Cleanup fbsrReservedRanges
744         if (pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_BEFORE_BAR2PTE] != NULL)
745             memdescDestroy(pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_BEFORE_BAR2PTE]);
746 
747         if (pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_AFTER_BAR2PTE] != NULL)
748             memdescDestroy(pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_AFTER_BAR2PTE]);
749 
750         if (pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_GSP_HEAP] != NULL)
751             memdescDestroy(pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_GSP_HEAP]);
752 
753         if (pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_GSP_NON_WPR] != NULL)
754             memdescDestroy(pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_GSP_NON_WPR]);
755 
756         if (pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_GSP_WPR] != NULL)
757             memdescDestroy(pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_GSP_WPR]);
758 
759         if (pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_VGA_WORKSPACE] != NULL)
760             memdescDestroy(pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_VGA_WORKSPACE]);
761 
762         pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_BEFORE_BAR2PTE] = NULL;
763         pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_AFTER_BAR2PTE]  = NULL;
764         pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_GSP_HEAP]       = NULL;
765         pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_GSP_NON_WPR]    = NULL;
766         pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_GSP_WPR]        = NULL;
767         pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_VGA_WORKSPACE]  = NULL;
768 
769         for (i = 0; i < NUM_FBSR_TYPES; i++)
770         {
771             fbsrDestroy_HAL(pGpu, pMemoryManager->pFbsr[i]);
772         }
773     }
774     if (memmgrIsLocalEgmEnabled(pMemoryManager))
775     {
776         if (!IS_VIRTUAL_WITH_SRIOV(pGpu))
777         {
778             kbusUnreserveP2PPeerIds_HAL(pGpu, GPU_GET_KERNEL_BUS(pGpu), NVBIT(pMemoryManager->localEgmPeerId));
779         }
780         pMemoryManager->localEgmPeerId = BUS_INVALID_PEER;
781         pMemoryManager->bLocalEgmEnabled = NV_FALSE;
782     }
783 
784     kfifoRemoveSchedulingHandler(pGpu, GPU_GET_KERNEL_FIFO(pGpu),
785         memmgrPostSchedulingEnableHandler, NULL,
786         memmgrPreSchedulingDisableHandler, NULL);
787     memmgrScrubDestroy_HAL(pGpu, pMemoryManager);
788 }
789 
790 static NV_STATUS
791 _memmgrCreateChildObjects
792 (
793     MemoryManager *pMemoryManager
794 )
795 {
796     NV_STATUS status = NV_OK;
797 
798     // RMCONFIG: only if FBSR engine is enabled
799     if (RMCFG_MODULE_FBSR)
800     {
801         NvU32 i;
802 
803         // Create FBSR object for every type RM supports.
804         for (i = 0; i < NUM_FBSR_TYPES; i++)
805         {
806             status = _memmgrCreateFBSR(pMemoryManager, i);
807             if (status != NV_OK)
808             {
809                 return status;
810             }
811         }
812     }
813 
814     return status;
815 }
816 
817 NV_STATUS
818 memmgrCreateHeap_IMPL
819 (
820     MemoryManager *pMemoryManager
821 )
822 {
823     Heap               *newHeap;
824     OBJGPU             *pGpu                = ENG_GET_GPU(pMemoryManager);
825     KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
826     NvU64               rsvdSize;
827     NvU64               size;
828     NV_STATUS           status              = NV_OK;
829     const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig =
830         kmemsysGetStaticConfig(pGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu));
831 
832     // If we're using FB regions then rsvd memory is already marked as a reserved region
833     if ((pMemoryManager->Ram.numFBRegions == 0) || (IS_VIRTUAL_WITH_SRIOV(pGpu)))
834     {
835         if (pMemorySystemConfig->bReservedMemAtBottom)
836         {
837             // rsvd memory is already accounted for in heapStart
838             rsvdSize = 0;
839         }
840         else
841         {
842             rsvdSize = pMemoryManager->rsvdMemorySize;
843         }
844     }
845     else
846         rsvdSize = 0;
847 
848     // for vGPU, add extra FB tax incurred by host RM to reserved size
849     rsvdSize += memmgrGetFbTaxSize_HAL(pGpu, pMemoryManager);
850 
851     //
852     // Fix up region descriptions to match with any FB override size
853     //
854     memmgrHandleSizeOverrides_HAL(pGpu, pMemoryManager);
855 
856     //
857     // Calculate the FB heap size as the address space size, then deduct any reserved memory
858     //
859     size = pMemoryManager->Ram.fbAddrSpaceSizeMb << 20;
860     size -= NV_MIN(size, rsvdSize);
861 
862     if((size != 0) || (pMemoryManager->bScanoutSysmem))
863     {
864         status = objCreate(&newHeap, pMemoryManager, Heap);
865         if (status != NV_OK)
866         {
867             return status;
868         }
869 
870         pMemoryManager->pHeap = newHeap;
871 
872         if (memmgrIsPmaEnabled(pMemoryManager) &&
873             memmgrIsPmaSupportedOnPlatform(pMemoryManager))
874         {
875             portMemSet(&pMemoryManager->pHeap->pmaObject, 0, sizeof(pMemoryManager->pHeap->pmaObject));
876             status = memmgrPmaInitialize(pGpu, pMemoryManager, &pMemoryManager->pHeap->pmaObject);
877             NV_ASSERT_OR_RETURN(status == NV_OK, status);
878         }
879 
880         status = heapInit(pGpu, newHeap,
881                           pMemoryManager->heapStartOffset,
882                           size - pMemoryManager->heapStartOffset, HEAP_TYPE_RM_GLOBAL, GPU_GFID_PF, NULL);
883         NV_ASSERT_OK_OR_RETURN(status);
884 
885         if ((memmgrIsPmaInitialized(pMemoryManager)) && (pMemoryManager->pHeap->bHasFbRegions))
886         {
887             status = memmgrPmaRegisterRegions(pGpu, pMemoryManager, pMemoryManager->pHeap,
888                                               &pMemoryManager->pHeap->pmaObject);
889             NV_ASSERT_OR_RETURN(status == NV_OK, status);
890         }
891 
892         NV_ASSERT_OK_OR_RETURN(memmgrValidateFBEndReservation_HAL(pGpu, pMemoryManager));
893 
894         NV_ASSERT_OK_OR_RETURN(memmgrReserveMemoryForPmu_HAL(pGpu, pMemoryManager));
895 
896         // Reserve vidmem for FSP usage, including FRTS, WPR2
897         status = memmgrReserveMemoryForFsp(pGpu, pMemoryManager);
898         if (status != NV_OK)
899         {
900             NV_PRINTF(LEVEL_ERROR, "Failed to reserve vidmem for WPR and FRTS.\n");
901             return status;
902         }
903 
904         if (!IsSLIEnabled(pGpu))
905         {
906             // Do the actual blacklisting of pages from the heap
907             if (newHeap->blackListAddresses.count != 0)
908             {
909                 status = heapBlackListPages(pGpu, newHeap);
910 
911                 if (status != NV_OK)
912                 {
913                     // Warn and continue
914                     NV_PRINTF(LEVEL_WARNING, "Error 0x%x creating blacklist\n",
915                               status);
916                 }
917             }
918         }
919 
920         kmemsysPostHeapCreate_HAL(pGpu, pKernelMemorySystem);
921     }
922 
923     return status;
924 }
925 
926 /*
927  * @brief Gets per-device suballocator. If it is not available, get shared heap.
928  *
929  * @param[in] pMemoryManager MemoryManager pointer
930  */
931 Heap *
932 memmgrGetDeviceSuballocator_IMPL
933 (
934     MemoryManager *pMemoryManager,
935     NvBool         bForceSubheap
936 )
937 {
938 
939     if (!bForceSubheap)
940     {
941         // If no suballocator found, use heap
942         return MEMORY_MANAGER_GET_HEAP(pMemoryManager);
943     }
944 
945     return NULL;
946 }
947 
948 static NV_STATUS
949 _memmgrCreateFBSR
950 (
951     MemoryManager *pMemoryManager,
952     NvU32          type
953 )
954 {
955     OBJFBSR *pFbsr;
956     NV_STATUS status;
957 
958     status = objCreate(&pFbsr, pMemoryManager, OBJFBSR);
959     if (status != NV_OK)
960     {
961         return status;
962     }
963 
964     NV_ASSERT(pFbsr);
965     pMemoryManager->pFbsr[type] = pFbsr;
966 
967     fbsrObjectInit(pFbsr, type);
968 
969     return NV_OK;
970 }
971 
972 static void
973 _memmgrFreeInternalClientObjects
974 (
975     MemoryManager *pMemoryManager
976 )
977 {
978     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
979 
980     if (pMemoryManager->hThirdPartyP2P != 0)
981     {
982         pRmApi->Free(pRmApi, pMemoryManager->hClient,
983                      pMemoryManager->hThirdPartyP2P);
984         pMemoryManager->hThirdPartyP2P = 0;
985     }
986 
987     if (pMemoryManager->hClient != 0)
988     {
989         rmapiutilFreeClientAndDeviceHandles(pRmApi,
990                                             &pMemoryManager->hClient,
991                                             &pMemoryManager->hDevice,
992                                             &pMemoryManager->hSubdevice);
993     }
994 }
995 
996 static NV_STATUS
997 _memmgrAllocInternalClientObjects
998 (
999     OBJGPU        *pGpu,
1000     MemoryManager *pMemoryManager
1001 )
1002 {
1003     NV_STATUS status;
1004     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
1005 
1006     status = rmapiutilAllocClientAndDeviceHandles(pRmApi, pGpu,
1007                                                   &pMemoryManager->hClient,
1008                                                   &pMemoryManager->hDevice,
1009                                                   &pMemoryManager->hSubdevice);
1010     if (status != NV_OK)
1011     {
1012         goto failed;
1013     }
1014 
1015     {
1016         NV503C_ALLOC_PARAMETERS params;
1017         NvHandle hThirdPartyP2P = 0;
1018 
1019         NV_ASSERT_OK_OR_GOTO(status, serverutilGenResourceHandle(pMemoryManager->hClient,
1020                                                                  &hThirdPartyP2P),
1021                              failed);
1022 
1023         portMemSet(&params, 0, sizeof(params));
1024         if (pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING))
1025         {
1026             params.flags = NV503C_ALLOC_PARAMETERS_FLAGS_TYPE_NVLINK;
1027         }
1028         else
1029         {
1030             params.flags = NV503C_ALLOC_PARAMETERS_FLAGS_TYPE_BAR1;
1031         }
1032         status = pRmApi->AllocWithHandle(pRmApi,
1033                                          pMemoryManager->hClient,
1034                                          pMemoryManager->hSubdevice,
1035                                          hThirdPartyP2P,
1036                                          NV50_THIRD_PARTY_P2P,
1037                                          &params,
1038                                          sizeof(params));
1039         if (status != NV_OK)
1040         {
1041             NV_PRINTF(LEVEL_WARNING, "Error creating internal ThirdPartyP2P object: %x\n",
1042                       status);
1043             pMemoryManager->hThirdPartyP2P = 0;
1044         }
1045         else
1046         {
1047             pMemoryManager->hThirdPartyP2P = hThirdPartyP2P;
1048         }
1049 
1050     }
1051 
1052     return NV_OK;
1053 
1054 failed:
1055     _memmgrFreeInternalClientObjects(pMemoryManager);
1056 
1057     return status;
1058 }
1059 
1060 /*!
1061  * @brief Determine size of FB RAM which is used for RM internal allocations
1062  *        and PMA.
1063  *
1064  * @param[out] pFbUsedSize  FB used memory size
1065  *
1066  * @returns NV_OK
1067  */
1068 NV_STATUS
1069 memmgrGetUsedRamSize_IMPL
1070 (
1071     OBJGPU        *pGpu,
1072     MemoryManager *pMemoryManager,
1073     NvU64         *pFbUsedSize
1074 )
1075 {
1076     Heap   *pHeap = GPU_GET_HEAP(pGpu);
1077     NvU64   heapFreeSpace, heapTotalSpace, pmaFreeSpace;
1078 
1079     //
1080     // Determine free memory in FB and substract with total FB memory.
1081     // If PMA is initialized, then use the free memory size in PMA and
1082     // heap otherwise only use heap free memory for calculation.
1083     //
1084     heapGetFree(pHeap, &heapFreeSpace);
1085     heapGetSize(pHeap, &heapTotalSpace);
1086     if (memmgrIsPmaInitialized(pMemoryManager))
1087     {
1088         pmaGetFreeMemory(&pHeap->pmaObject, &pmaFreeSpace);
1089         *pFbUsedSize = heapTotalSpace - heapFreeSpace - pmaFreeSpace;
1090     }
1091     else
1092     {
1093         *pFbUsedSize = heapTotalSpace - heapFreeSpace;
1094     }
1095 
1096     //
1097     // GSP's WPR region has its own save/restore mechanism and does not need
1098     // to be accounted for in total FB size used - which is needed to find out
1099     // how much SYSMEM needs to be allocated to save all FB memory
1100     //
1101     if (IS_GSP_CLIENT(pGpu))
1102     {
1103         KernelGsp *pKernelGsp       = GPU_GET_KERNEL_GSP(pGpu);
1104         NvU64      gspWprRegionSize = pKernelGsp->pWprMeta->gspFwWprEnd - pKernelGsp->pWprMeta->gspFwWprStart;
1105 
1106         *pFbUsedSize = *pFbUsedSize - gspWprRegionSize;
1107     }
1108 
1109     return NV_OK;
1110 }
1111 
1112 NV_STATUS
1113 memmgrAllocHwResources_IMPL
1114 (
1115     OBJGPU        *pGpu,
1116     MemoryManager *pMemoryManager,
1117     FB_ALLOC_INFO *pFbAllocInfo
1118 )
1119 {
1120     MemoryManager  *pMemoryManagerLoop;
1121     FB_ALLOC_INFO  *pTempInfo = NULL;
1122     NvU32           skipFlag  = (pFbAllocInfo->pageFormat->flags & NVOS32_ALLOC_FLAGS_SKIP_RESOURCE_ALLOC);
1123     NV_STATUS       rmStatus  = NV_OK;
1124 
1125     pTempInfo = portMemAllocNonPaged(sizeof(FB_ALLOC_INFO));
1126     if (pTempInfo == NULL)
1127     {
1128         NV_ASSERT(0);
1129         return NV_ERR_NO_MEMORY;
1130     }
1131 
1132     SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY)
1133     {
1134         NV_STATUS tempStatus;
1135         *pTempInfo = *pFbAllocInfo;    // struct copy
1136 
1137         pMemoryManagerLoop = GPU_GET_MEMORY_MANAGER(pGpu);
1138 
1139         tempStatus = memmgrAllocHal_HAL(pGpu, pMemoryManagerLoop, pTempInfo);
1140         // be sure to return an intermediate error
1141         if (NV_OK == rmStatus)
1142             rmStatus = tempStatus;
1143     }
1144     SLI_LOOP_END
1145 
1146     *pFbAllocInfo = *pTempInfo;    // struct copy
1147     portMemFree(pTempInfo);
1148 
1149     pFbAllocInfo->pageFormat->flags &= ~NVOS32_ALLOC_FLAGS_SKIP_RESOURCE_ALLOC;
1150     pFbAllocInfo->pageFormat->flags |= skipFlag;
1151 
1152     return rmStatus;
1153 }
1154 
1155 NV_STATUS
1156 memmgrFreeHwResources_IMPL
1157 (
1158     OBJGPU        *pGpu,
1159     MemoryManager *pMemoryManager,
1160     FB_ALLOC_INFO *pFbAllocInfo
1161 )
1162 {
1163     MemoryManager  *pMemoryManagerLoop;
1164     NV_STATUS       rmStatus = NV_OK;
1165     RMTIMEOUT       timeout;
1166     FB_ALLOC_INFO  *pTempInfo = NULL;
1167 
1168     pTempInfo = portMemAllocNonPaged(sizeof(FB_ALLOC_INFO));
1169     if (pTempInfo == NULL)
1170     {
1171         NV_ASSERT(0);
1172         return NV_ERR_NO_MEMORY;
1173     }
1174 
1175     gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout, 0);
1176 
1177     SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY)
1178     {
1179         NV_STATUS tempStatus;
1180         pMemoryManagerLoop = GPU_GET_MEMORY_MANAGER(pGpu);
1181 
1182         *pTempInfo = *pFbAllocInfo;
1183 
1184         tempStatus = memmgrFreeHal_HAL(pGpu, pMemoryManagerLoop, pTempInfo, &timeout);
1185         // be sure to return an intermediate error
1186         if (NV_OK == rmStatus)
1187             rmStatus = tempStatus;
1188 
1189     }
1190     SLI_LOOP_END
1191 
1192     *pFbAllocInfo = *pTempInfo;
1193     portMemFree(pTempInfo);
1194 
1195     return rmStatus;
1196 }
1197 
1198 NvBool
1199 memmgrLargePageSupported_IMPL
1200 (
1201     MemoryManager    *pMemoryManager,
1202     NV_ADDRESS_SPACE  addrSpace
1203 )
1204 {
1205     NvBool isSupported = NV_FALSE;
1206 
1207     if (addrSpace == ADDR_FBMEM || addrSpace == ADDR_VIRTUAL)
1208     {
1209         isSupported = NV_TRUE;
1210     }
1211     else if (addrSpace == ADDR_SYSMEM)
1212     {
1213         isSupported = (pMemoryManager->sysmemPageSize != RM_PAGE_SIZE);
1214     }
1215     else
1216     {
1217         NV_ASSERT(0);
1218     }
1219 
1220     return isSupported;
1221 }
1222 
1223 NvBool
1224 memmgrComprSupported_IMPL
1225 (
1226     MemoryManager    *pMemoryManager,
1227     NV_ADDRESS_SPACE  addrSpace
1228 )
1229 {
1230     OBJGPU *pGpu        = ENG_GET_GPU(pMemoryManager);
1231     NvBool  isSupported = NV_FALSE;
1232 
1233     if (GPU_GET_KERNEL_GMMU(pGpu) != NULL)
1234     {
1235         if (memmgrLargePageSupported(pMemoryManager, addrSpace) ||
1236             pMemoryManager->bSmallPageCompression)
1237         {
1238             if (addrSpace == ADDR_FBMEM || addrSpace == ADDR_VIRTUAL)
1239             {
1240                 isSupported = NV_TRUE;
1241             }
1242             else if (addrSpace == ADDR_SYSMEM)
1243             {
1244                 // Compression is allowed on vidmem or unified aperture (vidmem/sysmem is same w.r.t HW)
1245                 isSupported = (gpuIsUnifiedMemorySpaceEnabled(pGpu) &&
1246                                pMemoryManager->bSysmemCompressionSupportDef);
1247                 NV_PRINTF(LEVEL_ERROR, "isSupported=%s\n",
1248                           isSupported ? "NV_TRUE" : "NV_FALSE");
1249             }
1250             else
1251             {
1252                 NV_ASSERT(0);
1253             }
1254         }
1255     }
1256 
1257     return isSupported;
1258 }
1259 
1260 NV_ADDRESS_SPACE
1261 memmgrAllocGetAddrSpace_IMPL
1262 (
1263     MemoryManager *pMemoryManager,
1264     NvU32          flags,
1265     NvU32          attr
1266 )
1267 {
1268    NV_ADDRESS_SPACE addrSpace = ADDR_UNKNOWN;
1269 
1270    if (flags & NVOS32_ALLOC_FLAGS_VIRTUAL)
1271    {
1272        addrSpace = ADDR_VIRTUAL;
1273    }
1274    else if (FLD_TEST_DRF(OS32, _ATTR, _LOCATION, _VIDMEM, attr))
1275    {
1276        addrSpace = ADDR_FBMEM;
1277    }
1278    else
1279    {
1280         // In case location is SYSMEM or ANY, allocate in vidmem if protected flag is set.
1281         if (flags & NVOS32_ALLOC_FLAGS_PROTECTED)
1282         {
1283             addrSpace = ADDR_FBMEM;
1284         }
1285         else
1286         {
1287             addrSpace = ADDR_SYSMEM;
1288         }
1289    }
1290 
1291    return addrSpace;
1292 }
1293 
1294 NvU32
1295 memmgrGetMappableRamSizeMb_IMPL(MemoryManager *pMemoryManager)
1296 {
1297     return NvU64_LO32(pMemoryManager->Ram.mapRamSizeMb);
1298 }
1299 //
1300 // ZBC clear create/destroy routines.
1301 //
1302 
1303 NV_STATUS
1304 memmgrFillMemdescForPhysAttr_IMPL
1305 (
1306     OBJGPU *pGpu,
1307     MemoryManager *pMemoryManager,
1308     PMEMORY_DESCRIPTOR pMemDesc,
1309     ADDRESS_TRANSLATION addressTranslation,
1310     NvU64 *pOffset,
1311     NvU32 *pMemAperture,
1312     NvU32 *pMemKind,
1313     NvU32 *pZCullId,
1314     NvU32 *pGpuCacheAttr,
1315     NvU32 *pGpuP2PCacheAttr,
1316     NvU64 *contigSegmentSize
1317 )
1318 {
1319     NvU64 surfOffset = *pOffset, surfBase, surfLimit;
1320     NvU32 zcbitmap;
1321 
1322     surfBase  = memdescGetPhysAddr(pMemDesc, addressTranslation, 0);
1323     surfLimit = surfBase + pMemDesc->Size - 1;
1324     *pMemKind = memdescGetPteKind(pMemDesc);
1325 
1326     *pOffset  = memdescGetPhysAddr(pMemDesc, addressTranslation, surfOffset);
1327 
1328     if (memdescGetAddressSpace(pMemDesc) == ADDR_FBMEM )
1329         *pMemAperture = NV0041_CTRL_CMD_GET_SURFACE_PHYS_ATTR_APERTURE_VIDMEM;
1330     else if (memdescGetAddressSpace(pMemDesc) == ADDR_SYSMEM)
1331         *pMemAperture = NV0041_CTRL_CMD_GET_SURFACE_PHYS_ATTR_APERTURE_SYSMEM;
1332     else if (memdescGetAddressSpace(pMemDesc) == ADDR_EGM)
1333         *pMemAperture = NV0041_CTRL_CMD_GET_SURFACE_PHYS_ATTR_APERTURE_SYSMEM;
1334     else if (memdescGetAddressSpace(pMemDesc) == ADDR_VIRTUAL )
1335     {
1336         //
1337         // XXX we could theoretically find whatever phys mem object is plugged
1338         // in at surfOffset w/in the virt object... that'd mean scanning
1339         // pMemory->DmaMappingList
1340         //
1341         return NV_ERR_NOT_SUPPORTED;
1342     }
1343     else
1344         return NV_ERR_GENERIC;
1345 
1346     if (memdescGetGpuCacheAttrib(pMemDesc) == NV_MEMORY_CACHED)
1347     {
1348         *pGpuCacheAttr = NV0041_CTRL_GET_SURFACE_PHYS_ATTR_GPU_CACHED;
1349     }
1350     else if (memdescGetGpuCacheAttrib(pMemDesc) == NV_MEMORY_UNCACHED)
1351     {
1352         *pGpuCacheAttr = NV0041_CTRL_GET_SURFACE_PHYS_ATTR_GPU_UNCACHED;
1353     }
1354     else
1355     {
1356         *pGpuCacheAttr = NV0041_CTRL_GET_SURFACE_PHYS_ATTR_GPU_CACHED_UNKNOWN;
1357     }
1358 
1359     if (memdescGetGpuP2PCacheAttrib(pMemDesc) == NV_MEMORY_CACHED)
1360     {
1361         *pGpuP2PCacheAttr = NV0041_CTRL_GET_SURFACE_PHYS_ATTR_GPU_CACHED;
1362     }
1363     else if (memdescGetGpuP2PCacheAttrib(pMemDesc) == NV_MEMORY_UNCACHED)
1364     {
1365         *pGpuP2PCacheAttr = NV0041_CTRL_GET_SURFACE_PHYS_ATTR_GPU_UNCACHED;
1366     }
1367     else
1368     {
1369         *pGpuP2PCacheAttr = NV0041_CTRL_GET_SURFACE_PHYS_ATTR_GPU_CACHED_UNKNOWN;
1370     }
1371 
1372     zcbitmap = FB_HWRESID_ZCULL_VAL_FERMI(memdescGetHwResId(pMemDesc)); //bitmap form... need a scalar
1373     for ( *pZCullId = 0;  zcbitmap; zcbitmap >>= 1, *pZCullId += 1) {;;;}
1374     *pZCullId -= 1; // side effect if there is no zcull id of setting ~0
1375 
1376     *contigSegmentSize = surfLimit - (surfBase + surfOffset) + 1;
1377 
1378     if ( !memdescGetContiguity(pMemDesc, addressTranslation))
1379     {
1380         // XXX overly conservative.  we could scan the PTEs to find out if more pages are contig.
1381         NvU64 surfOffsetLimitSame4KBPage = (4*1024)*((surfBase + surfOffset)/(4*1024)) + (4*1024) - 1;
1382         if ( surfLimit >= surfOffsetLimitSame4KBPage )
1383             *contigSegmentSize = surfOffsetLimitSame4KBPage - (surfBase + surfOffset) + 1;
1384     }
1385 
1386     return NV_OK;
1387 }
1388 
1389 NvU64
1390 memmgrDeterminePageSize_IMPL
1391 (
1392     MemoryManager *pMemoryManager,
1393     NvHandle       hClient,
1394     NvU64          memSize,
1395     NvU32          memFormat,
1396     NvU32          pageFormatFlags,
1397     NvU32         *pRetAttr,
1398     NvU32         *pRetAttr2
1399 )
1400 {
1401     OBJGPU           *pGpu  = ENG_GET_GPU(pMemoryManager);
1402     KernelGmmu       *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
1403     NV_ADDRESS_SPACE  addrSpace;
1404     NvBool            bIsBigPageSupported;
1405     RM_ATTR_PAGE_SIZE pageSizeAttr;
1406     NvU64             pageSize = 0;
1407 
1408     if (pGpu->getProperty(pGpu, PDB_PROP_GPU_TEGRA_SOC_NVDISPLAY) || (pKernelGmmu == NULL))
1409     {
1410         pageSize = RM_PAGE_SIZE;
1411     }
1412     // Sanity check the arguments.
1413     else if (pRetAttr == NULL || pRetAttr2 == NULL)
1414     {
1415         NV_ASSERT_OR_RETURN(0, 0);
1416     }
1417     else
1418     {
1419         addrSpace = memmgrAllocGetAddrSpace(pMemoryManager, pageFormatFlags, *pRetAttr);
1420 
1421         //
1422         // Bug 4270864: Temp hack until sysmem supports higher order allocations.
1423         // We allow EGM to get allocated at higher page size.
1424         //
1425         if (memmgrIsLocalEgmEnabled(pMemoryManager) &&
1426             addrSpace == ADDR_SYSMEM &&
1427             FLD_TEST_DRF(OS32, _ATTR2, _FIXED_NUMA_NODE_ID, _YES, *pRetAttr2) &&
1428             //
1429             // Bug 4270868: MODS has test cases which pass FIXED_NUMA_NODE_ID,
1430             // but invalid node_id. Will remove once MODS tests get fixed.
1431             //
1432             !RMCFG_FEATURE_MODS_FEATURES)
1433         {
1434             bIsBigPageSupported = NV_TRUE;
1435         }
1436         else
1437         {
1438             bIsBigPageSupported = memmgrLargePageSupported(pMemoryManager, addrSpace);
1439         }
1440         pageSizeAttr = dmaNvos32ToPageSizeAttr(*pRetAttr, *pRetAttr2);
1441 
1442         //
1443         // Precedence in page size selection
1444         // 1. CACHE_ONLY mode                                                   -> SMALL
1445         // 2. !BigPageSupport (Sysmem && GpuSmmuOff )                           -> SMALL
1446         // 3. Client page size override                                         -> Use override
1447         // 4. HugePageSupported && size >= HugePageSize                         -> HUGE
1448         // 5. Block-linear || size >= minSizeForBigPage || hClient || GpuSmmuOn -> BIG
1449         // 6. none of the above                                                 -> SMALL
1450         //
1451         // On Tegra, we don't have a carveout/FB in production. So, we're
1452         // not guaranteed to get BIG page sized or contiguous allocations
1453         // from OS. But we need BIG page sized allocations for efficient Big GPU
1454         // operation. We use the SMMU unit within the Tegra Memory Contoller (MC),
1455         // to construct BIG pages from the 4KB small page allocations from OS.
1456         // SMMU will linearize the discontiguous 4KB allocations into what will
1457         // appear to the GPU as a large contiguous physical allocation.
1458         //
1459         // RM will eventually decide whether a SYSMEM allocation needs BIG page
1460         // via GPU SMMU mapping. Right now, we give an option for RM clients to
1461         // force it, via the SMMU_ON_GPU attribute.
1462         //
1463         if (gpuIsCacheOnlyModeEnabled(pGpu))
1464         {
1465             pageSize = RM_PAGE_SIZE;
1466         }
1467         else if (!bIsBigPageSupported)
1468         {
1469             if (RM_ATTR_PAGE_SIZE_BIG == pageSizeAttr ||
1470                 RM_ATTR_PAGE_SIZE_HUGE == pageSizeAttr ||
1471                 RM_ATTR_PAGE_SIZE_512MB == pageSizeAttr)
1472             {
1473                 NV_PRINTF(LEVEL_ERROR,
1474                           "Big/Huge/512MB page size not supported in sysmem.\n");
1475 
1476                 NV_ASSERT_OR_RETURN(0, 0);
1477             }
1478             else
1479             {
1480                 pageSize = RM_PAGE_SIZE;
1481             }
1482         }
1483         else
1484         {
1485             switch (pageSizeAttr)
1486             {
1487                 case RM_ATTR_PAGE_SIZE_INVALID:
1488                     NV_PRINTF(LEVEL_ERROR, "invalid page size attr\n");
1489                     NV_ASSERT_OR_RETURN(0, 0);
1490 
1491                 case RM_ATTR_PAGE_SIZE_DEFAULT:
1492                 {
1493                     NvBool bUseDefaultHugePagesize = NV_TRUE;
1494                     // WDDMV2 Windows it expect default page size to be 4K /64KB /128KB
1495                     if (bUseDefaultHugePagesize &&
1496                         kgmmuIsHugePageSupported(pKernelGmmu) &&
1497                         (memSize >= RM_PAGE_SIZE_HUGE) && (addrSpace != ADDR_SYSMEM ||
1498                         pMemoryManager->sysmemPageSize == RM_PAGE_SIZE_HUGE))
1499                     {
1500                         pageSize = RM_PAGE_SIZE_HUGE;
1501                         break;
1502                     }
1503                     else if ((memFormat != NVOS32_ATTR_FORMAT_PITCH) ||
1504                              (memSize >= kgmmuGetMinBigPageSize(pKernelGmmu)) || hClient ||
1505                              FLD_TEST_DRF(OS32, _ATTR2, _SMMU_ON_GPU, _ENABLE, *pRetAttr2))
1506                     {
1507                         pageSize = kgmmuGetMaxBigPageSize_HAL(pKernelGmmu);
1508                         break;
1509                     }
1510 
1511                     pageSize = RM_PAGE_SIZE;
1512                     break;
1513                 }
1514 
1515                 case RM_ATTR_PAGE_SIZE_4KB:
1516                     pageSize = RM_PAGE_SIZE;
1517                     break;
1518 
1519                 case RM_ATTR_PAGE_SIZE_BIG:
1520                     pageSize = kgmmuGetMaxBigPageSize_HAL(pKernelGmmu);
1521                     break;
1522 
1523                 case RM_ATTR_PAGE_SIZE_HUGE:
1524                     if (kgmmuIsHugePageSupported(pKernelGmmu))
1525                     {
1526                         pageSize = RM_PAGE_SIZE_HUGE;
1527                     }
1528                     else
1529                     {
1530                         NV_ASSERT_OR_RETURN(0, 0);
1531                     }
1532                     break;
1533 
1534                 case RM_ATTR_PAGE_SIZE_512MB:
1535                     if (kgmmuIsPageSize512mbSupported(pKernelGmmu))
1536                     {
1537                         pageSize = RM_PAGE_SIZE_512M;
1538                     }
1539                     else
1540                     {
1541                         NV_ASSERT_OR_RETURN(0, 0);
1542                     }
1543                     break;
1544 
1545                 default:
1546                     NV_ASSERT(0);
1547             }
1548         }
1549     }
1550 
1551     switch (pageSize)
1552     {
1553         case RM_PAGE_SIZE:
1554             *pRetAttr = FLD_SET_DRF(OS32, _ATTR, _PAGE_SIZE, _4KB, *pRetAttr);
1555             break;
1556 
1557         case RM_PAGE_SIZE_64K:
1558         case RM_PAGE_SIZE_128K:
1559             *pRetAttr = FLD_SET_DRF(OS32, _ATTR, _PAGE_SIZE, _BIG, *pRetAttr);
1560             break;
1561 
1562         case RM_PAGE_SIZE_HUGE:
1563             *pRetAttr = FLD_SET_DRF(OS32, _ATTR, _PAGE_SIZE, _HUGE, *pRetAttr);
1564             *pRetAttr2 = FLD_SET_DRF(OS32, _ATTR2, _PAGE_SIZE_HUGE, _2MB, *pRetAttr2);
1565             break;
1566 
1567         case RM_PAGE_SIZE_512M:
1568             *pRetAttr = FLD_SET_DRF(OS32, _ATTR, _PAGE_SIZE, _HUGE, *pRetAttr);
1569             *pRetAttr2 = FLD_SET_DRF(OS32, _ATTR2, _PAGE_SIZE_HUGE, _512MB,  *pRetAttr2);
1570             break;
1571 
1572         default:
1573             NV_ASSERT(0);
1574     }
1575 
1576     return pageSize;
1577 }
1578 
1579 /*!
1580  * Identify if platform's current configuration supports PMA
1581  */
1582 NV_STATUS
1583 memmgrSetPlatformPmaSupport_IMPL
1584 (
1585     OBJGPU        *pGpu,
1586     MemoryManager *pMemoryManager
1587 )
1588 {
1589     //
1590     // KMD in WDDM mode will not support pma managed client page tables as
1591     // in both cases client / OS manges it.
1592     //
1593     if (RMCFG_FEATURE_PLATFORM_WINDOWS_LDDM && !pGpu->getProperty(pGpu, PDB_PROP_GPU_IN_TCC_MODE))
1594     {
1595         memmgrSetClientPageTablesPmaManaged(pMemoryManager, NV_FALSE);
1596     }
1597 
1598     //
1599     // FB management should use PMA on Unix/Linux/Mods/Windows
1600     //
1601     if (RMCFG_FEATURE_PLATFORM_UNIX
1602         || RMCFG_FEATURE_PLATFORM_MODS
1603         || RMCFG_FEATURE_PLATFORM_WINDOWS_LDDM)
1604     {
1605         pMemoryManager->bPmaSupportedOnPlatform = NV_TRUE;
1606     }
1607 
1608     //
1609     // PMA memory management is not currently supported in non SRIOV VGPU environment.
1610     // The RPC mechanism needs to be expanded to distinguish allocation types.
1611     // Bug #1735412
1612     //
1613     // TODO : Remove these constraints.
1614     if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu))
1615     {
1616         pMemoryManager->bPmaSupportedOnPlatform = NV_FALSE;
1617     }
1618 
1619     if (pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_VIRTUALIZATION_MODE_HOST_VGPU))
1620     {
1621         if (pMemoryManager->bVgpuPmaSupport)
1622         {
1623             memmgrSetClientPageTablesPmaManaged(pMemoryManager, NV_FALSE);
1624         }
1625         else
1626         {
1627             pMemoryManager->bPmaSupportedOnPlatform = NV_FALSE;
1628         }
1629     }
1630     return (NV_OK);
1631 }
1632 
1633 /*!
1634  * Allocate console region in CPU-RM based on region table passed from Physical RM
1635  */
1636 NV_STATUS
1637 memmgrAllocateConsoleRegion_IMPL
1638 (
1639     OBJGPU *pGpu,
1640     MemoryManager *pMemoryManager,
1641     FB_REGION_DESCRIPTOR *pConsoleFbRegion
1642 )
1643 {
1644 
1645     NV_STATUS status     = NV_OK;
1646     NvU32     consoleRegionId = 0x0;
1647     NvU64     regionSize;
1648 
1649     if (pMemoryManager->Ram.ReservedConsoleDispMemSize > 0)
1650     {
1651         pConsoleFbRegion->base = pMemoryManager->Ram.fbRegion[consoleRegionId].base;
1652         pConsoleFbRegion->limit = pMemoryManager->Ram.fbRegion[consoleRegionId].limit;
1653 
1654         regionSize = pConsoleFbRegion->limit - pConsoleFbRegion->base + 1;
1655 
1656         // Once the console is reserved, we don't expect to reserve it again
1657         NV_ASSERT_OR_RETURN(pMemoryManager->Ram.pReservedConsoleMemDesc == NULL,
1658                         NV_ERR_STATE_IN_USE);
1659 
1660         status = memdescCreate(&pMemoryManager->Ram.pReservedConsoleMemDesc, pGpu,
1661                             regionSize, RM_PAGE_SIZE_64K, NV_TRUE, ADDR_FBMEM,
1662                             NV_MEMORY_UNCACHED,
1663                             MEMDESC_FLAGS_SKIP_RESOURCE_COMPUTE);
1664         if (status != NV_OK)
1665         {
1666             pConsoleFbRegion->base = pConsoleFbRegion->limit = 0;
1667             return status;
1668         }
1669 
1670         memdescDescribe(pMemoryManager->Ram.pReservedConsoleMemDesc, ADDR_FBMEM,
1671                         pConsoleFbRegion->base, regionSize);
1672         memdescSetPageSize(pMemoryManager->Ram.pReservedConsoleMemDesc,
1673                     AT_GPU, RM_PAGE_SIZE);
1674 
1675 
1676         NV_PRINTF(LEVEL_INFO, "Allocating console region of size: %llx, at base : %llx \n ",
1677                         regionSize, pConsoleFbRegion->base);
1678     }
1679 
1680     return status;
1681 }
1682 
1683 void
1684 memmgrReleaseConsoleRegion_IMPL
1685 (
1686     OBJGPU        *pGpu,
1687     MemoryManager *pMemoryManager
1688 )
1689 {
1690     memdescDestroy(pMemoryManager->Ram.pReservedConsoleMemDesc);
1691     pMemoryManager->Ram.pReservedConsoleMemDesc = NULL;
1692 }
1693 
1694 PMEMORY_DESCRIPTOR
1695 memmgrGetReservedConsoleMemDesc_IMPL
1696 (
1697     OBJGPU        *pGpu,
1698     MemoryManager *pMemoryManager
1699 )
1700 {
1701     return pMemoryManager->Ram.pReservedConsoleMemDesc;
1702 }
1703 
1704 /*!
1705  * Reserve FB for allocating BAR2 Page Dirs and Page Tables
1706  */
1707 void
1708 memmgrReserveBar2BackingStore_IMPL
1709 (
1710     OBJGPU        *pGpu,
1711     MemoryManager *pMemoryManager,
1712     NvU64         *pAddr
1713 )
1714 {
1715     NvU64             tmpAddr = *pAddr;
1716     KernelBus        *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
1717 
1718     NvU32 pageDirsSize = kbusGetSizeOfBar2PageDirs_HAL(pGpu, pKernelBus);
1719     NvU32 pageTblsSize = kbusGetSizeOfBar2PageTables_HAL(pGpu, pKernelBus);
1720 
1721     // Reserve space for BAR2 Page Dirs
1722     if (pKernelBus->PDEBAR2Aperture == ADDR_FBMEM)
1723     {
1724         tmpAddr = NV_ROUNDUP(tmpAddr, RM_PAGE_SIZE);
1725         pKernelBus->bar2[GPU_GFID_PF].pdeBase  = tmpAddr;
1726         tmpAddr += pageDirsSize;
1727     }
1728 
1729     // Reserve space for BAR2 Page Tables
1730     if (pKernelBus->PTEBAR2Aperture == ADDR_FBMEM)
1731     {
1732         tmpAddr = NV_ROUNDUP(tmpAddr, RM_PAGE_SIZE);
1733         pKernelBus->bar2[GPU_GFID_PF].pteBase = tmpAddr;
1734         tmpAddr += pageTblsSize;
1735     }
1736 
1737     NV_PRINTF(LEVEL_INFO, "Reserve space for bar2 Page dirs offset = 0x%llx size = 0x%x\n",
1738         pKernelBus->bar2[GPU_GFID_PF].pdeBase, pageDirsSize);
1739 
1740     NV_PRINTF(LEVEL_INFO, "Reserve space for bar2 Page tables offset = 0x%llx size = 0x%x\n",
1741         pKernelBus->bar2[GPU_GFID_PF].pteBase, pageTblsSize);
1742 
1743     *pAddr = NV_ROUNDUP(tmpAddr, RM_PAGE_SIZE);
1744 }
1745 
1746 /*!
1747  *  Calculate the Vista reserved memory requirement per FB region for mixed type/density
1748  */
1749 void
1750 memmgrCalcReservedFbSpace_IMPL
1751 (
1752     OBJGPU        *pGpu,
1753     MemoryManager *pMemoryManager
1754 )
1755 {
1756     NvU64   rsvdFastSize  = 0;
1757     NvU64   rsvdSlowSize  = 0;
1758     NvU64   rsvdISOSize   = 0;
1759     NvU32   i;
1760     NvU32   idxISORegion  = 0;
1761     NvU32   idxFastRegion = 0;
1762     NvU32   idxSlowRegion = 0;
1763     NvBool  bAllocProtected = NV_FALSE;
1764 
1765     bAllocProtected = gpuIsCCFeatureEnabled(pGpu);
1766 
1767     //
1768     // This is a hack solely for Vista (on Vista the OS controls the majority of heap).
1769     // Linux and Mac don't have reserved memory and doesn't use this function.
1770     //
1771     // On Vista, Fermi's instance memory is not reserved by RM anymore.
1772     // KMD has to reserve enough instance memory for driver private data.
1773     // This function does the calculation of needed space.  See bug 642233.
1774     // While it returns the result in Mb, the calculation is made with byte
1775     //
1776 
1777     // If we have no usable memory then we can't reserve any.
1778     if (!pMemoryManager->Ram.fbUsableMemSize)
1779         return;
1780 
1781     memmgrCalcReservedFbSpaceHal_HAL(pGpu, pMemoryManager, &rsvdFastSize, &rsvdSlowSize, &rsvdISOSize);
1782 
1783     // If we have regions defined, fill in the per-segment reserved memory requirement
1784     if (pMemoryManager->Ram.numFBRegions > 0)
1785     {
1786         FB_REGION_DESCRIPTOR *pFbRegion = NULL;
1787         NvU64  regionSize = 0;
1788 
1789         //
1790         // Find the fastest and ISO regions.  This search makes a soft assumption that
1791         // region #0 is not reserved, fastest, and supports ISO -- that would be stupid
1792         //
1793         for (i = 0; i < pMemoryManager->Ram.numFBRegions; i++)
1794         {
1795             pFbRegion = &pMemoryManager->Ram.fbRegion[i];
1796             regionSize = (pFbRegion->limit - pFbRegion->base +1);
1797 
1798             // Check only non-reserved regions (which are typically unpopulated blackholes in address space)
1799             if ((!pFbRegion->bRsvdRegion) &&
1800                 (bAllocProtected || !pFbRegion->bProtected)  &&
1801                 (regionSize >= (rsvdFastSize + rsvdSlowSize + rsvdISOSize)))
1802             {
1803                 // Find the fastest region
1804                 if ((pFbRegion->performance > pMemoryManager->Ram.fbRegion[idxFastRegion].performance)
1805                         || pMemoryManager->Ram.fbRegion[idxFastRegion].bRsvdRegion
1806                         || (!bAllocProtected && pMemoryManager->Ram.fbRegion[idxFastRegion].bProtected))
1807                 {
1808                     idxFastRegion = i;
1809                 }
1810                 // Find the slowest region
1811                 if ((pFbRegion->performance < pMemoryManager->Ram.fbRegion[idxSlowRegion].performance)
1812                         || pMemoryManager->Ram.fbRegion[idxSlowRegion].bRsvdRegion
1813                         || (!bAllocProtected && pMemoryManager->Ram.fbRegion[idxSlowRegion].bProtected))
1814                 {
1815                     idxSlowRegion = i;
1816                 }
1817                  // Find the fastest ISO region
1818                 if (pFbRegion->bSupportISO)
1819                 {
1820                     if ((!pMemoryManager->Ram.fbRegion[idxISORegion].bSupportISO) ||
1821                         (pFbRegion->performance > pMemoryManager->Ram.fbRegion[idxISORegion].performance)
1822                         || (!bAllocProtected && pMemoryManager->Ram.fbRegion[idxISORegion].bProtected))
1823                     {
1824                         idxISORegion = i;
1825                     }
1826                 }
1827             }
1828         }
1829 
1830         // There should *ALWAYS* be a region that supports ISO, even if we have no display
1831         NV_ASSERT(pMemoryManager->Ram.fbRegion[idxISORegion].bSupportISO);
1832 
1833         // There should *ALWAYS* be a non-reserved region that is faster than reserved and supports ISO
1834         NV_ASSERT(!pMemoryManager->Ram.fbRegion[idxISORegion].bRsvdRegion);
1835         NV_ASSERT(!pMemoryManager->Ram.fbRegion[idxFastRegion].bRsvdRegion);
1836         NV_ASSERT(!pMemoryManager->Ram.fbRegion[idxSlowRegion].bRsvdRegion);
1837 
1838         //
1839         // Whenever Hopper CC is enabled, it is mandatory to put allocations
1840         // like page tables, CBC and fault buffers in CPR region. Cannot put
1841         // reserved memory in protected region in non CC cases
1842         //
1843         if (!bAllocProtected)
1844         {
1845             NV_ASSERT(!pMemoryManager->Ram.fbRegion[idxISORegion].bProtected);
1846             NV_ASSERT(!pMemoryManager->Ram.fbRegion[idxFastRegion].bProtected);
1847             NV_ASSERT(!pMemoryManager->Ram.fbRegion[idxSlowRegion].bProtected);
1848         }
1849 
1850         //
1851         // Vista expects to be able to VidHeapControl allocate a cursor in ISO
1852         //
1853         // For mixed density reserved memory should be split between "fast" and
1854         // "slow" memory. Fast memory should also support ISO.  The policy to
1855         // prefer "slow" vs "fast" memory is platform dependent.
1856         //
1857         pMemoryManager->Ram.fbRegion[idxISORegion].rsvdSize += rsvdISOSize;
1858         pMemoryManager->Ram.fbRegion[idxSlowRegion].rsvdSize += rsvdSlowSize;
1859         pMemoryManager->Ram.fbRegion[idxFastRegion].rsvdSize += rsvdFastSize;
1860     }
1861 }
1862 
1863 /*!
1864  * Init channel size
1865  *
1866  * @param[in]  pChannel       OBJCHANNEL pointer
1867  * @param[in]  numCopyBlocks  Number of copies that should fit in the push buffer
1868  *
1869  * @returns NV_STATUS
1870  */
1871 void
1872 memmgrMemUtilsSetupChannelBufferSizes_IMPL
1873 (
1874     MemoryManager *pMemoryManager,
1875     OBJCHANNEL    *pChannel,
1876     NvU32          numCopyBlocks
1877 )
1878 {
1879     // set channel specific sizes
1880     pChannel->channelPbSize            = numCopyBlocks * MEMUTILS_SIZE_PER_BLOCK_INBYTES;
1881     pChannel->channelNotifierSize      = MEMUTILS_CHANNEL_NOTIFIER_SIZE;
1882     pChannel->channelNumGpFifioEntries = MEMUTILS_NUM_GPFIFIO_ENTRIES;
1883     pChannel->methodSizePerBlock       = MEMUTILS_SIZE_PER_BLOCK_INBYTES;
1884     pChannel->channelSize              = pChannel->channelPbSize + MEMUTILS_CHANNEL_GPFIFO_SIZE + MEMUTILS_CHANNEL_SEMAPHORE_SIZE;
1885     pChannel->semaOffset               = pChannel->channelPbSize + MEMUTILS_CHANNEL_GPFIFO_SIZE;
1886     pChannel->finishPayloadOffset      = pChannel->semaOffset + 4;
1887 }
1888 
1889 NV_STATUS memmgrFree_IMPL
1890 (
1891     OBJGPU             *pGpu,
1892     MemoryManager      *pMemoryManager,
1893     Heap               *pHeap,
1894     NvHandle            hClient,
1895     NvHandle            hDevice,
1896     NvHandle            hVASpace,
1897     NvU32               owner,
1898     MEMORY_DESCRIPTOR  *pMemDesc
1899 )
1900 {
1901     NvU64       offsetAlign;
1902     NV_STATUS   status;
1903     NvU32       pmaFreeFlag       = 0;
1904 
1905     // IRQL TEST:  must be running at equivalent of passive-level
1906     IRQL_ASSERT_AND_RETURN(!osIsRaisedIRQL());
1907 
1908     if (pMemDesc == NULL)
1909         return NV_ERR_INVALID_ARGUMENT;
1910 
1911     offsetAlign = memdescGetPhysAddr(pMemDesc, AT_GPU, 0);
1912 
1913     if (owner == NVOS32_BLOCK_TYPE_FREE)
1914         return NV_ERR_INVALID_ARGUMENT;
1915 
1916     // Virtual heap allocs are tagged vitual and always own the memdesc
1917     if (memdescGetAddressSpace(pMemDesc) == ADDR_VIRTUAL)
1918     {
1919         OBJVASPACE  *pVAS = NULL;
1920         RsClient    *pClient;
1921 
1922         status = serverGetClientUnderLock(&g_resServ, hClient, &pClient);
1923         if (status != NV_OK)
1924             return status;
1925 
1926         status = vaspaceGetByHandleOrDeviceDefault(pClient, hDevice, hVASpace, &pVAS);
1927         if (status != NV_OK)
1928             return status;
1929 
1930         status = vaspaceFree(pVAS, offsetAlign);
1931         memdescDestroy(pMemDesc);
1932         return status;
1933     }
1934 
1935     // Free up the memory allocated by PMA.
1936     if (pMemDesc->pPmaAllocInfo)
1937     {
1938         FB_ALLOC_INFO        *pFbAllocInfo       = NULL;
1939         FB_ALLOC_PAGE_FORMAT *pFbAllocPageFormat = NULL;
1940         OBJGPU               *pMemdescOwnerGpu   = NULL;
1941 
1942         //
1943         // A memdesc can be duped under a peer device. In that case, before
1944         // freeing FB make sure the GPU which owns the memdesc is available.
1945         // Otherwise, just assert, destroy the memdesc and return NV_OK to
1946         // make sure rest of the clean up happens correctly as we are on
1947         // destroy path.
1948         // Note this is just a WAR till ressrv bring in cleanup of dup objects
1949         // on GPU tear down.
1950         // RS-TODO: Nuke this check once the cleanup is implemented.
1951         //
1952         if (pGpu != pMemDesc->pGpu)
1953         {
1954             if (!gpumgrIsGpuPointerValid(pMemDesc->pGpu))
1955             {
1956                 //
1957                 // This should never happen. GPU tear down should always clear
1958                 // the duped memory list after resource server implements it.
1959                 // For now just assert!
1960                 //
1961                 NV_ASSERT(0);
1962                 memdescDestroy(pMemDesc);
1963                 goto pma_free_exit;
1964             }
1965         }
1966 
1967         pMemdescOwnerGpu = pMemDesc->pGpu;
1968 
1969         //
1970         // Similar to the above WAR, if portMem alocations fail for any reason,
1971         // just assert and return NV_OK to ensure that the rest of the clean up
1972         // happens correctly.
1973         //
1974         pFbAllocInfo = portMemAllocNonPaged(sizeof(FB_ALLOC_INFO));
1975         if (pFbAllocInfo == NULL)
1976         {
1977             NV_ASSERT(0);
1978             goto pma_free_exit;
1979         }
1980 
1981         pFbAllocPageFormat = portMemAllocNonPaged(sizeof(FB_ALLOC_PAGE_FORMAT));
1982         if (pFbAllocPageFormat == NULL) {
1983             NV_ASSERT(0);
1984             goto pma_free_exit;
1985         }
1986 
1987         portMemSet(pFbAllocInfo, 0, sizeof(FB_ALLOC_INFO));
1988         portMemSet(pFbAllocPageFormat, 0, sizeof(FB_ALLOC_PAGE_FORMAT));
1989         pFbAllocInfo->hClient = hClient;
1990         pFbAllocInfo->hDevice = hDevice;
1991         pFbAllocInfo->pageFormat = pFbAllocPageFormat;
1992 
1993         //
1994         // Do not release any HW resources associated with this allocation
1995         // until the last reference to the allocation is freed. Passing
1996         // hwresid = 0 and format = pitch to memmgrFreeHwResources will ensure
1997         // that no comptags/zcull/zbc resources are freed.
1998         //
1999         if (pMemDesc->RefCount == 1)
2000         {
2001             pFbAllocInfo->hwResId = memdescGetHwResId(pMemDesc);
2002             pFbAllocInfo->format  = memdescGetPteKind(pMemDesc);
2003         }
2004         else
2005         {
2006             pFbAllocInfo->hwResId = 0;
2007             pFbAllocInfo->format = 0;
2008         }
2009         pFbAllocInfo->offset  = offsetAlign;
2010         pFbAllocInfo->size    = pMemDesc->Size;
2011 
2012         // Free any HW resources allocated.
2013         memmgrFreeHwResources(pMemdescOwnerGpu,
2014                 GPU_GET_MEMORY_MANAGER(pMemdescOwnerGpu), pFbAllocInfo);
2015 
2016         if (pMemDesc->pPmaAllocInfo != NULL)
2017         {
2018             // Disabling scrub on free for non compressible surfaces
2019             if (RMCFG_FEATURE_PLATFORM_MODS &&
2020                 !memmgrIsKind_HAL(GPU_GET_MEMORY_MANAGER(pMemdescOwnerGpu),
2021                                   FB_IS_KIND_COMPRESSIBLE,
2022                                   memdescGetPteKind(pMemDesc)))
2023             {
2024                 pmaFreeFlag = PMA_FREE_SKIP_SCRUB;
2025             }
2026 
2027             vidmemPmaFree(pMemdescOwnerGpu, pHeap, pMemDesc->pPmaAllocInfo, pmaFreeFlag);
2028             NV_PRINTF(LEVEL_INFO, "Freeing PMA allocation\n");
2029         }
2030 
2031 pma_free_exit:
2032         portMemFree(pFbAllocInfo);
2033         portMemFree(pFbAllocPageFormat);
2034         memdescDestroy(pMemDesc);
2035 
2036         return NV_OK;
2037     }
2038 
2039     return heapFree(pGpu, pHeap, hClient, hDevice, owner, pMemDesc);
2040 }
2041 
2042 NV_STATUS
2043 memmgrSetPartitionableMem_IMPL
2044 (
2045     OBJGPU *pGpu,
2046     MemoryManager *pMemoryManager
2047 )
2048 {
2049     RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
2050     NV2080_CTRL_INTERNAL_MEMSYS_SET_PARTITIONABLE_MEM_PARAMS params = {0};
2051     Heap *pHeap = GPU_GET_HEAP(pGpu);
2052     NvU64 bottomRsvdSize = 0;
2053     NvU64 topRsvdSize = 0;
2054     NvU32 bottomRegionIdx = 0xFFFF;
2055     NvU32 topRegionIdx = 0xFFFF;
2056     NvU32 i;
2057     NvU64 size;
2058     NvU64 base;
2059     NvU64 offset;
2060     NvU64 freeMem;
2061 
2062     //
2063     // Find out the first and the last region for which internal heap or
2064     // bRsvdRegion is true. In Ampere we should never have more than two
2065     // discontigous RM reserved region
2066     // To-Do - Bug 2301972 - Make sure that reserved memory is aligned to VMMU
2067     // segments
2068     //
2069     for (i = 0; i < pMemoryManager->Ram.numFBRegions; i++)
2070     {
2071         if (pMemoryManager->Ram.fbRegion[i].bInternalHeap ||
2072             pMemoryManager->Ram.fbRegion[i].bRsvdRegion)
2073         {
2074             NvU64 rsvdSize = (pMemoryManager->Ram.fbRegion[i].limit -
2075                               pMemoryManager->Ram.fbRegion[i].base + 1);
2076 
2077             // Check if this is bottom reserved region
2078             if (pMemoryManager->Ram.fbRegion[i].base == 0)
2079             {
2080                 bottomRegionIdx = i;
2081                 bottomRsvdSize += rsvdSize;
2082             }
2083             else if (i > 0 && (pMemoryManager->Ram.fbRegion[i-1].bInternalHeap ||
2084                               pMemoryManager->Ram.fbRegion[i-1].bRsvdRegion) &&
2085                     (pMemoryManager->Ram.fbRegion[i].base == pMemoryManager->Ram.fbRegion[i - 1].limit + 1))
2086             {
2087                 // See if this is the contigous region with previous discovery
2088                 if (bottomRegionIdx == (i - 1))
2089                 {
2090                     // Contigous bottom region
2091                     bottomRsvdSize += rsvdSize;
2092                 }
2093                 else
2094                 {
2095                     // Contigous top region
2096                     topRsvdSize += rsvdSize;
2097                 }
2098             }
2099             else
2100             {
2101                 //
2102                 // Make sure we don't have discontigous reserved regions as
2103                 // they are not supported by HW also and we need to support
2104                 // these by using blacklisting mechanism.
2105                 //
2106                 if (topRegionIdx != 0xFFFF)
2107                 {
2108                     NV_PRINTF(LEVEL_ERROR,
2109                               "More than two discontigous rsvd regions found. "
2110                               "Rsvd region base - 0x%llx, Rsvd region Size - 0x%llx\n",
2111                               pMemoryManager->Ram.fbRegion[i].base, rsvdSize);
2112                     NV_ASSERT(0);
2113                     return NV_ERR_INVALID_STATE;
2114                 }
2115 
2116                 topRegionIdx = i;
2117                 topRsvdSize += rsvdSize;
2118             }
2119         }
2120     }
2121 
2122     //
2123     // Sanity check against the biggest available memory chunk. Pick the smallest
2124     // of biggest available memory chunk or calculated total - reserved memory as
2125     // in vGPU we are still using OBJHEAP and there are some allocations which
2126     // happens at the top of the heap before we program this register
2127     //
2128     if (!memmgrIsPmaInitialized(pMemoryManager))
2129     {
2130         NvU64 bytesTotal;
2131         const NvU64 vgpuHeapWarSize = 256 *1024 * 1024;
2132         NV_ASSERT_OK_OR_RETURN(heapInfo(pHeap, &freeMem, &bytesTotal, &base,
2133                                         &offset, &size));
2134 
2135         //
2136         // offset is the starting address of biggest empty block whose size is
2137         // returned and we care about the base of largest empty block
2138         //
2139         base = offset;
2140 
2141         //
2142         // WAR - Bug-2383259 - TilL PMA is not enabled in vGPU-Host
2143         // we need to delay reserve some memory at the top to full fill lazy
2144         // allocations like FECS and GPCCS uCode. Leave 256MB at the top for
2145         // such lazy allocations
2146         //
2147         if (size > vgpuHeapWarSize)
2148         {
2149             size -= vgpuHeapWarSize;
2150         }
2151     }
2152     else
2153     {
2154         PMA_REGION_DESCRIPTOR *pFirstPmaRegionDesc = NULL;
2155         NvU32 numPmaRegions;
2156         NvU32 pmaConfig = PMA_QUERY_NUMA_ONLINED;
2157 
2158         NV_ASSERT_OK_OR_RETURN(pmaGetRegionInfo(&pHeap->pmaObject,
2159             &numPmaRegions, &pFirstPmaRegionDesc));
2160 
2161         base = pFirstPmaRegionDesc->base;
2162         pmaGetFreeMemory(&pHeap->pmaObject, &freeMem);
2163         pmaGetTotalMemory(&pHeap->pmaObject, &size);
2164 
2165         NV_ASSERT_OK(pmaQueryConfigs(&pHeap->pmaObject, &pmaConfig));
2166 
2167         //
2168         // MIG won't be used alongside APM and hence the check below is of no use
2169         // Even if we enable the check for APM the check will fail given that after
2170         // enabling "scrub on free" using virtual CE writes, memory gets consumed by
2171         // page tables backing the scrubber channel virtual mappings and hence the
2172         // calculation below no longer holds good
2173         // In case of HCC, structures like PB, GPFIFO and USERD for scrubber and golden
2174         // channels are required to be in CPR vidmem. This changes the calculation below
2175         // We can ignore this for the non-MIG case.
2176         //
2177         // When FB memory is onlined as NUMA node, kernel can directly alloc FB memory
2178         // and hence free memory can not be expected to be same as total memory.
2179         //
2180         if ((!gpuIsCCorApmFeatureEnabled(pGpu) || IS_MIG_ENABLED(pGpu)) &&
2181             !(pmaConfig & PMA_QUERY_NUMA_ONLINED))
2182         {
2183             NvU64 maxUsedPmaSize = 2 * RM_PAGE_SIZE_128K;
2184             //
2185             // PMA should be completely free at this point, otherwise we risk
2186             // not setting the right partitionable range (pmaGetLargestFree's
2187             // offset argument is not implemented as of this writing, so we
2188             // only get the base address of the region that contains it). There
2189             // is a known allocation from the top-level scrubber/CeUtils channel that
2190             // is expected to be no larger than 128K. Issue a warning for any
2191             // other uses.
2192             //
2193             if ((size > maxUsedPmaSize) &&
2194                 (freeMem < (size - maxUsedPmaSize)))
2195             {
2196                 NV_PRINTF(LEVEL_ERROR,
2197                     "Assumption that PMA is empty (after accounting for the top-level scrubber and CeUtils) is not met!\n");
2198                 NV_PRINTF(LEVEL_ERROR,
2199                     "    free space = 0x%llx bytes, total space = 0x%llx bytes\n",
2200                     freeMem, size);
2201                 NV_ASSERT_OR_RETURN(freeMem >= (size - maxUsedPmaSize),
2202                                     NV_ERR_INVALID_STATE);
2203             }
2204         }
2205     }
2206 
2207     if (size == 0)
2208     {
2209         NV_PRINTF(LEVEL_ERROR,
2210                   "No partitionable memory. MIG memory partitioning can't be enabled.\n");
2211         return NV_OK;
2212     }
2213 
2214     if (base != bottomRsvdSize)
2215     {
2216         NV_PRINTF(LEVEL_ERROR,
2217                   "Partitionable memory start - 0x%llx not aligned with RM reserved "
2218                   "region base-end - 0x%llx\n", base, bottomRsvdSize);
2219         return NV_ERR_INVALID_STATE;
2220     }
2221 
2222     params.partitionableMemSize = size;
2223     params.bottomRsvdSize = bottomRsvdSize;
2224     params.topRsvdSize = topRsvdSize;
2225 
2226     // Call physical MemorySystem to align and program the partitionable range
2227     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
2228         pRmApi->Control(pRmApi,
2229             pGpu->hInternalClient,
2230             pGpu->hInternalSubdevice,
2231             NV2080_CTRL_CMD_INTERNAL_MEMSYS_SET_PARTITIONABLE_MEM,
2232             &params,
2233             sizeof(params)));
2234 
2235     pMemoryManager->MIGMemoryPartitioningInfo.partitionableMemoryRange =
2236         rangeMake(params.partitionableStartAddr, params.partitionableEndAddr);
2237 
2238     //
2239     // Make sure the created range is a valid range.
2240     // rangeIsEmpty checks lo > hi, which should be good enough to catch
2241     // inverted range case.
2242     //
2243     NV_ASSERT_OR_RETURN(!rangeIsEmpty(pMemoryManager->MIGMemoryPartitioningInfo.partitionableMemoryRange),
2244                         NV_ERR_INVALID_STATE);
2245 
2246     if (!KBUS_CPU_VISIBLE_BAR12_DISABLED(pGpu))
2247     {
2248         NV_ASSERT_OK_OR_RETURN(memmgrSetMIGPartitionableBAR1Range(pGpu, pMemoryManager));
2249     }
2250 
2251     if (IS_GSP_CLIENT(pGpu))
2252     {
2253         KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
2254 
2255         //
2256         // The Physical RM initializes its AMAPLIB context via
2257         // memsysSetPartitionableMem_HAL(). The GSP Client RM has a separate
2258         // AMAPLIB context that must also be initialized.
2259         //
2260         kmemsysReadMIGMemoryCfg_HAL(pGpu, pKernelMemorySystem);
2261     }
2262 
2263     return NV_OK;
2264 }
2265 
2266 NV_STATUS
2267 memmgrFillComprInfo_IMPL
2268 (
2269     OBJGPU        *pGpu,
2270     MemoryManager *pMemoryManager,
2271     NvU64          pageSize,
2272     NvU32          pageCount,
2273     NvU32          kind,
2274     NvU64          surfOffset,
2275     NvU32          compTagStartOffset,
2276     COMPR_INFO    *pComprInfo
2277 )
2278 {
2279     const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig =
2280         kmemsysGetStaticConfig(pGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu));
2281 
2282     portMemSet(pComprInfo, 0, sizeof(*pComprInfo));
2283 
2284     pComprInfo->kind = kind;
2285 
2286     if (!memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE, kind))
2287         return NV_OK;
2288 
2289     // TODO: We will have to support compression on vGPU HOST for AC
2290     NV_ASSERT(compTagStartOffset != ~(NvU32)0);
2291 
2292     pComprInfo->compPageShift = pMemorySystemConfig->comprPageShift;
2293     pComprInfo->compTagLineMin = compTagStartOffset;
2294     pComprInfo->compPageIndexLo = (NvU32)(surfOffset >> pComprInfo->compPageShift);
2295     pComprInfo->compPageIndexHi = (NvU32)((surfOffset + pageSize * pageCount - 1) >> pComprInfo->compPageShift);
2296     pComprInfo->compTagLineMultiplier = 1;
2297 
2298     return NV_OK;
2299 }
2300 
2301 NV_STATUS
2302 memmgrGetKindComprForGpu_KERNEL
2303 (
2304     MemoryManager      *pMemoryManager,
2305     MEMORY_DESCRIPTOR  *pMemDesc,
2306     OBJGPU             *pMappingGpu,
2307     NvU64               offset,
2308     NvU32              *pKind,
2309     COMPR_INFO         *pComprInfo
2310 )
2311 {
2312     NvU32               ctagId = FB_HWRESID_CTAGID_VAL_FERMI(memdescGetHwResId(pMemDesc));
2313     NvU32               kind   = memdescGetPteKindForGpu(pMemDesc, pMappingGpu);
2314     const MEMORY_SYSTEM_STATIC_CONFIG *pMappingMemSysConfig =
2315         kmemsysGetStaticConfig(pMappingGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pMappingGpu));
2316 
2317     // Compression is not supported on memory not backed by a GPU
2318     if (pMemDesc->pGpu != NULL && memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE, kind) &&
2319         (ctagId == 0 || ctagId == FB_HWRESID_CTAGID_VAL_FERMI(-1)))
2320     {
2321         portMemSet(pComprInfo, 0, sizeof(*pComprInfo));
2322 
2323         pComprInfo->kind = kind;
2324         pComprInfo->compPageShift = pMappingMemSysConfig->comprPageShift;
2325         pComprInfo->bPhysBasedComptags = NV_TRUE;
2326         pComprInfo->compTagLineMin = 1;
2327     }
2328     else
2329     {
2330         if (ctagId == FB_HWRESID_CTAGID_VAL_FERMI(0xcdcdcdcd))
2331         {
2332             portMemSet(pComprInfo, 0, sizeof(*pComprInfo));
2333 
2334             pComprInfo->kind = memmgrGetUncompressedKind_HAL(pMappingGpu, pMemoryManager, kind, NV_TRUE);
2335         }
2336         else
2337         {
2338             memmgrFillComprInfoUncompressed(pMemoryManager, kind, pComprInfo);
2339         }
2340     }
2341 
2342     *pKind = pComprInfo->kind;
2343 
2344     return NV_OK;
2345 }
2346 
2347 NV_STATUS
2348 memmgrGetKindComprFromMemDesc_IMPL
2349 (
2350     MemoryManager     *pMemoryManager,
2351     MEMORY_DESCRIPTOR *pMemDesc,
2352     NvU64              offset,
2353     NvU32             *kind,
2354     COMPR_INFO        *pComprInfo
2355 )
2356 {
2357     return memmgrGetKindComprForGpu_HAL(pMemoryManager, pMemDesc, pMemDesc->pGpu,
2358                                         offset, kind, pComprInfo);
2359 }
2360 
2361 void
2362 memmgrSetMIGPartitionableMemoryRange_IMPL
2363 (
2364     OBJGPU *pGpu,
2365     MemoryManager *pMemoryManager,
2366     NV_RANGE range
2367 )
2368 {
2369     pMemoryManager->MIGMemoryPartitioningInfo.partitionableMemoryRange = range;
2370 }
2371 
2372 NV_RANGE
2373 memmgrGetMIGPartitionableMemoryRange_IMPL
2374 (
2375     OBJGPU *pGpu,
2376     MemoryManager *pMemoryManager
2377 )
2378 {
2379     return pMemoryManager->MIGMemoryPartitioningInfo.partitionableMemoryRange;
2380 }
2381 
2382 /*
2383  * @brief Sets total partitionable BAR1
2384  */
2385 NV_STATUS
2386 memmgrSetMIGPartitionableBAR1Range_IMPL
2387 (
2388     OBJGPU *pGpu,
2389     MemoryManager *pMemoryManager
2390 )
2391 {
2392     KernelBus  *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
2393     OBJVASPACE *pBar1VAS   = kbusGetBar1VASpace_HAL(pGpu, pKernelBus);
2394     OBJEHEAP   *pVASHeap;
2395     NvU64 largestFreeOffset = 0;
2396     NvU64 largestFreeSize = 0;
2397     NvU64 partitionableBar1Start;
2398     NvU64 partitionableBar1End;
2399 
2400     if (pGpu->getProperty(pGpu, PDB_PROP_GPU_ZERO_FB))
2401         return NV_OK;
2402 
2403     NV_ASSERT_OR_RETURN(pBar1VAS != NULL, NV_ERR_INVALID_STATE);
2404     pVASHeap = vaspaceGetHeap(pBar1VAS);
2405 
2406     // Get partitionable BAR1 range
2407     pVASHeap->eheapInfo(pVASHeap, NULL, NULL, &largestFreeOffset, &largestFreeSize, NULL, NULL);
2408 
2409     //
2410     // We are not considering alignment here because VA space is reserved/allocated in chunks of pages
2411     // so largestFreeOffset should be already aligned.
2412     //
2413     partitionableBar1Start = largestFreeOffset;
2414     partitionableBar1End = largestFreeOffset + largestFreeSize - 1;
2415     NV_ASSERT_OR_RETURN(partitionableBar1Start >= vaspaceGetVaStart(pBar1VAS), NV_ERR_INVALID_STATE);
2416     NV_ASSERT_OR_RETURN(partitionableBar1End <= vaspaceGetVaLimit(pBar1VAS), NV_ERR_INVALID_STATE);
2417 
2418     pMemoryManager->MIGMemoryPartitioningInfo.partitionableBar1Range = rangeMake(partitionableBar1Start, partitionableBar1End);
2419     return NV_OK;
2420 }
2421 
2422 NV_RANGE
2423 memmgrGetMIGPartitionableBAR1Range_IMPL
2424 (
2425     OBJGPU *pGpu,
2426     MemoryManager *pMemoryManager
2427 )
2428 {
2429     return pMemoryManager->MIGMemoryPartitioningInfo.partitionableBar1Range;
2430 }
2431 
2432 NV_STATUS
2433 memmgrAllocMIGGPUInstanceMemory_VF
2434 (
2435     OBJGPU        *pGpu,
2436     MemoryManager *pMemoryManager,
2437     NvU32          swizzId,
2438     NvHandle      *phMemory,
2439     NV_RANGE      *pAddrRange,
2440     Heap         **ppMemoryPartitionHeap
2441 )
2442 {
2443     // For vGpu we have a static memory allocation
2444     *phMemory = NV01_NULL_OBJECT;
2445     *pAddrRange = pMemoryManager->MIGMemoryPartitioningInfo.partitionableMemoryRange;
2446     *ppMemoryPartitionHeap = GPU_GET_HEAP(pGpu);
2447 
2448     return NV_OK;
2449 }
2450 
2451 // Function to allocate memory for a GPU instance
2452 NV_STATUS
2453 memmgrAllocMIGGPUInstanceMemory_PF
2454 (
2455     OBJGPU        *pGpu,
2456     MemoryManager *pMemoryManager,
2457     NvU32          swizzId,
2458     NvHandle      *phMemory,
2459     NV_RANGE      *pAddrRange,
2460     Heap         **ppMemoryPartitionHeap
2461 )
2462 {
2463     KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
2464     KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
2465     NV_STATUS rmStatus = NV_OK;
2466     NvHandle hMemory = 0;
2467     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
2468     NvBool bNumaEnabled = osNumaOnliningEnabled(pGpu->pOsGpuInfo);
2469 
2470     NV_ASSERT_OR_RETURN(pKernelMIGManager != NULL, NV_ERR_INVALID_STATE);
2471     NV_ASSERT_OK_OR_RETURN(kmemsysGetMIGGPUInstanceMemInfo(pGpu, pKernelMemorySystem, swizzId, pAddrRange));
2472 
2473     //
2474     // Only allocate memory for non swizzID-0 GPU instances as swizzID-0 owns full
2475     // gpu and there is no need to pre-reserve memory for that and non
2476     // coherent systems. In coherent NUMA systems, NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE
2477     // is not supported and the memory comes from the MIG partition memory
2478     // NUMA node.
2479     //
2480     if (kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, swizzId))
2481     {
2482         if(bNumaEnabled)
2483         {
2484             NvS32 numaNodeId;
2485             NvU64 partitionBaseAddr = pAddrRange->lo;
2486             NvU64 partitionSize = rangeLength(*pAddrRange);
2487 
2488             if (kmigmgrGetSwizzIdInUseMask(pGpu, pKernelMIGManager) == 0x0)
2489             {
2490                 // Remove swizz Id 0 / baremetal GPU memory NUMA node
2491                 pmaNumaOfflined(&GPU_GET_HEAP(pGpu)->pmaObject);
2492                 kmemsysNumaRemoveMemory_HAL(pGpu, pKernelMemorySystem, 0);
2493             }
2494 
2495             //
2496             // The memory gets removed in memmgrFreeMIGGPUInstanceMemory if
2497             // there is any failure after adding the memory.
2498             //
2499             NV_ASSERT_OK_OR_RETURN(kmemsysNumaAddMemory_HAL(pGpu,
2500                                                             pKernelMemorySystem,
2501                                                             swizzId,
2502                                                             partitionBaseAddr,
2503                                                             partitionSize,
2504                                                             &numaNodeId));
2505         }
2506         else
2507         {
2508             //
2509             // Allocate memory using vidHeapControl
2510             //
2511             // vidHeapControl calls should happen outside GPU locks
2512             // This is a PMA requirement as memory allocation calls may invoke eviction
2513             // which UVM could get stuck behind GPU lock
2514             // See Bug 1735851-#24
2515             //
2516             rmGpuLocksRelease(GPUS_LOCK_FLAGS_NONE, NULL);
2517 
2518             // Allocate gpfifo entries
2519             NV_MEMORY_ALLOCATION_PARAMS memAllocParams;
2520             portMemSet(&memAllocParams, 0, sizeof(NV_MEMORY_ALLOCATION_PARAMS));
2521             memAllocParams.owner     = HEAP_OWNER_RM_CLIENT_GENERIC;
2522             memAllocParams.type      = NVOS32_TYPE_IMAGE;
2523             memAllocParams.size      = rangeLength(*pAddrRange);
2524             memAllocParams.attr      = DRF_DEF(OS32, _ATTR, _LOCATION, _VIDMEM);
2525             memAllocParams.attr     |= DRF_DEF(OS32, _ATTR, _PHYSICALITY, _CONTIGUOUS);
2526             memAllocParams.attr     |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _DEFAULT);
2527             memAllocParams.attr2     = DRF_DEF(OS32, _ATTR2, _PAGE_OFFLINING, _OFF); // free the offlined pages
2528             memAllocParams.flags    |= NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE;
2529             memAllocParams.rangeLo   = 0;
2530             memAllocParams.rangeHi   = 0;
2531             memAllocParams.offset    = pAddrRange->lo; // Offset needed if fixed address allocation
2532             memAllocParams.hVASpace  = 0; // Physical allocation
2533             memAllocParams.internalflags = NVOS32_ALLOC_INTERNAL_FLAGS_SKIP_SCRUB;
2534 
2535             rmStatus = pRmApi->Alloc(pRmApi,
2536                                      pMemoryManager->MIGMemoryPartitioningInfo.hClient,
2537                                      pMemoryManager->MIGMemoryPartitioningInfo.hSubdevice,
2538                                      &hMemory,
2539                                      NV01_MEMORY_LOCAL_USER,
2540                                      &memAllocParams,
2541                                      sizeof(memAllocParams));
2542 
2543             // Reaquire the GPU locks
2544             if (rmGpuLocksAcquire(GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_MEM) != NV_OK)
2545             {
2546                 NV_PRINTF(LEVEL_ERROR, "failed to grab RM-Lock\n");
2547                 DBG_BREAKPOINT();
2548                 rmStatus = NV_ERR_GENERIC;
2549                 goto cleanup;
2550             }
2551 
2552             if (rmStatus != NV_OK)
2553             {
2554                 NV_PRINTF(LEVEL_ERROR,
2555                           "Unable to allocate physical memory for GPU instance.\n");
2556                 return rmStatus;
2557             }
2558         }
2559     }
2560     rmStatus = _memmgrInitMIGMemoryPartitionHeap(pGpu, pMemoryManager, swizzId, pAddrRange, ppMemoryPartitionHeap);
2561     if (rmStatus != NV_OK)
2562     {
2563         NV_PRINTF(LEVEL_ERROR, "Unable to initialize memory partition heap\n");
2564         goto cleanup;
2565     }
2566 
2567     NV_PRINTF(LEVEL_INFO,
2568               "Allocated memory partition heap for swizzId - %d with StartAddr - 0x%llx, endAddr - 0x%llx.\n",
2569               swizzId, pAddrRange->lo, pAddrRange->hi);
2570 
2571     *phMemory = hMemory;
2572     return rmStatus;
2573 
2574 cleanup:
2575     pRmApi->Free(pRmApi, pMemoryManager->MIGMemoryPartitioningInfo.hClient, hMemory);
2576 
2577     return rmStatus;
2578 }
2579 
2580 // Function to initialize heap for managing MIG partition memory
2581 static NV_STATUS
2582 _memmgrInitMIGMemoryPartitionHeap
2583 (
2584     OBJGPU        *pGpu,
2585     MemoryManager *pMemoryManager,
2586     NvU32          swizzId,
2587     NV_RANGE      *pAddrRange,
2588     Heap         **ppMemoryPartitionHeap
2589 )
2590 {
2591     NV_STATUS status = NV_OK;
2592     KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
2593     Heap *pMemoryPartitionHeap = NULL;
2594     NvBool bNumaEnabled = osNumaOnliningEnabled(pGpu->pOsGpuInfo);
2595     KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
2596     NvU64 partitionBaseAddr = pAddrRange->lo;
2597     NvU64 partitionSize = rangeLength(*pAddrRange);
2598 
2599     // Use default heap for swizzID-0 as we don't prereserve memory for swizzID-0
2600     NV_ASSERT_OR_RETURN(pKernelMIGManager != NULL, NV_ERR_INVALID_STATE);
2601     if (!kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, swizzId))
2602     {
2603         *ppMemoryPartitionHeap = pMemoryManager->pHeap;
2604         return NV_OK;
2605     }
2606     else
2607     {
2608         *ppMemoryPartitionHeap  = NULL;
2609     }
2610 
2611     NV_ASSERT_OK_OR_GOTO(
2612         status,
2613         objCreate(ppMemoryPartitionHeap, pMemoryManager, Heap),
2614         fail);
2615 
2616     pMemoryPartitionHeap = *ppMemoryPartitionHeap;
2617 
2618     if (memmgrIsPmaEnabled(pMemoryManager) &&
2619         memmgrIsPmaSupportedOnPlatform(pMemoryManager))
2620     {
2621         portMemSet(&pMemoryPartitionHeap->pmaObject, 0, sizeof(pMemoryPartitionHeap->pmaObject));
2622         NV_ASSERT_OK_OR_GOTO(
2623             status,
2624             memmgrPmaInitialize(pGpu, pMemoryManager, &pMemoryPartitionHeap->pmaObject),
2625             fail);
2626 
2627         if (bNumaEnabled)
2628         {
2629             NV_ASSERT_OR_GOTO(pKernelMemorySystem->memPartitionNumaInfo[swizzId].bInUse, fail);
2630             partitionBaseAddr = pKernelMemorySystem->memPartitionNumaInfo[swizzId].offset;
2631             partitionSize = pKernelMemorySystem->memPartitionNumaInfo[swizzId].size;
2632 
2633             //
2634             // The base and size passed here is the FB base and size and
2635             // not the partition's. pmaNumaOnlined requires the FB base and
2636             // size to convert between FB local address and SPA.
2637             // memmgrPmaRegisterRegions is where the partition's base and size
2638             // is reported to PMA.
2639             //
2640             NV_ASSERT_OK_OR_GOTO(
2641                 status,
2642                 pmaNumaOnlined(&pMemoryPartitionHeap->pmaObject,
2643                                pKernelMemorySystem->memPartitionNumaInfo[swizzId].numaNodeId,
2644                                pKernelMemorySystem->coherentCpuFbBase,
2645                                pKernelMemorySystem->numaOnlineSize),
2646                                fail);
2647         }
2648     }
2649 
2650     NV_ASSERT_OK_OR_GOTO(
2651         status,
2652         heapInit(pGpu, pMemoryPartitionHeap, partitionBaseAddr,
2653                  partitionSize,
2654                  HEAP_TYPE_PARTITION_LOCAL,
2655                  GPU_GFID_PF,
2656                  NULL),
2657         fail);
2658 
2659     if (memmgrIsPmaInitialized(pMemoryManager) &&
2660         (pMemoryPartitionHeap->bHasFbRegions))
2661     {
2662         NV_ASSERT_OK_OR_GOTO(
2663             status,
2664             memmgrPmaRegisterRegions(pGpu, pMemoryManager, pMemoryPartitionHeap,
2665                                      &pMemoryPartitionHeap->pmaObject),
2666             fail);
2667     }
2668 
2669     if (!IsSLIEnabled(pGpu))
2670     {
2671         // Do the actual blacklisting of pages from the heap
2672         if (pMemoryPartitionHeap->blackListAddresses.count != 0)
2673         {
2674             status = heapBlackListPages(pGpu, pMemoryPartitionHeap);
2675 
2676             if (status != NV_OK)
2677             {
2678                 // Warn and continue
2679                 NV_PRINTF(LEVEL_WARNING, "Error 0x%x creating blacklist\n",
2680                           status);
2681             }
2682         }
2683     }
2684 
2685     return NV_OK;
2686 
2687 fail:
2688 
2689     if (pMemoryPartitionHeap != NULL)
2690     {
2691         objDelete(pMemoryPartitionHeap);
2692         *ppMemoryPartitionHeap = NULL;
2693     }
2694 
2695     return status;
2696 }
2697 
2698 // Function to free GPU instance memory
2699 NV_STATUS
2700 memmgrFreeMIGGPUInstanceMemory_IMPL
2701 (
2702     OBJGPU *pGpu,
2703     MemoryManager *pMemoryManager,
2704     NvU32 swizzId,
2705     NvHandle hMemory,
2706     Heap **ppMemoryPartitionHeap
2707 )
2708 {
2709     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
2710     KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
2711     KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
2712     NvBool bNumaEnabled = osNumaOnliningEnabled(pGpu->pOsGpuInfo);
2713 
2714     NV_ASSERT_OR_RETURN(pKernelMIGManager != NULL, NV_ERR_INVALID_STATE);
2715 
2716     // Nothing to do for swizzId 0 as we neither allocate memory nor allocate new heap object
2717     if (!kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, swizzId))
2718         return NV_OK;
2719 
2720     objDelete(*ppMemoryPartitionHeap);
2721     *ppMemoryPartitionHeap = NULL;
2722 
2723     if (bNumaEnabled)
2724     {
2725         kmemsysNumaRemoveMemory_HAL(pGpu, pKernelMemorySystem, swizzId);
2726 
2727         if (kmigmgrGetSwizzIdInUseMask(pGpu, pKernelMIGManager) == 0x0)
2728         {
2729             NvS32 numaNodeId;
2730 
2731             // Add back the baremetal GPU memory NUMA node.
2732             NV_ASSERT_OK_OR_RETURN(kmemsysNumaAddMemory_HAL(pGpu,
2733                                      pKernelMemorySystem,
2734                                      0,
2735                                      pKernelMemorySystem->numaOnlineBase,
2736                                      pKernelMemorySystem->numaOnlineSize,
2737                                      &numaNodeId));
2738             // Baremetal NUMA node id should be same as pGpu->numaNodeId
2739             NV_ASSERT_OR_RETURN(numaNodeId == pGpu->numaNodeId, NV_ERR_INVALID_STATE);
2740             NV_ASSERT_OK_OR_RETURN(pmaNumaOnlined(&GPU_GET_HEAP(pGpu)->pmaObject,
2741                                                   pGpu->numaNodeId,
2742                                                   pKernelMemorySystem->coherentCpuFbBase,
2743                                                   pKernelMemorySystem->numaOnlineSize));
2744         }
2745     }
2746 
2747     // Free allocated memory
2748     if (!bNumaEnabled && (hMemory != NV01_NULL_OBJECT))
2749     {
2750         pRmApi->Free(pRmApi, pMemoryManager->MIGMemoryPartitioningInfo.hClient, hMemory);
2751     }
2752     return NV_OK;
2753 }
2754 
2755 void memmgrComprInfoDisableCompression_IMPL
2756 (
2757     MemoryManager *pMemoryManager,
2758     COMPR_INFO    *pComprInfo
2759 )
2760 {
2761     memmgrFillComprInfoUncompressed(pMemoryManager, pComprInfo->kind, pComprInfo);
2762 }
2763 
2764 void memmgrFillComprInfoUncompressed_IMPL
2765 (
2766     MemoryManager *pMemoryManager,
2767     NvU32 kind,
2768     COMPR_INFO *pComprInfo
2769 )
2770 {
2771     if (memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE, kind))
2772         kind = memmgrGetUncompressedKind_HAL(ENG_GET_GPU(pMemoryManager), pMemoryManager, kind, NV_FALSE);
2773 
2774     portMemSet(pComprInfo, 0, sizeof(*pComprInfo));
2775     pComprInfo->kind = kind;
2776 }
2777 
2778 /*!
2779  * @brief   Creates the SW state of the page level pools.
2780  *
2781  * @param   pGpu
2782  * @param   pMemoryManager
2783  *
2784  * @returns On success, returns NV_OK.
2785  *          On failure, returns error code.
2786  */
2787 NV_STATUS
2788 memmgrPageLevelPoolsCreate_IMPL
2789 (
2790     OBJGPU        *pGpu,
2791     MemoryManager *pMemoryManager
2792 )
2793 {
2794     NV_STATUS status = NV_OK;
2795 
2796     if (RMCFG_FEATURE_PMA &&
2797         memmgrIsPmaInitialized(pMemoryManager) &&
2798         memmgrAreClientPageTablesPmaManaged(pMemoryManager))
2799     {
2800         Heap           *pHeap       = GPU_GET_HEAP(pGpu);
2801         KernelGmmu     *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
2802         const GMMU_FMT *pFmt        = NULL;
2803 
2804         pFmt = kgmmuFmtGet(pKernelGmmu, GMMU_FMT_VERSION_DEFAULT, 0);
2805         NV_ASSERT_OR_RETURN(NULL != pFmt, NV_ERR_INVALID_ARGUMENT);
2806 
2807         status = rmMemPoolSetup((void *)&pHeap->pmaObject, &pMemoryManager->pPageLevelReserve,
2808                                     (pFmt->version == GMMU_FMT_VERSION_1) ? POOL_CONFIG_GMMU_FMT_1 : POOL_CONFIG_GMMU_FMT_2);
2809 
2810         NV_ASSERT(NV_OK == status);
2811 
2812         //
2813         // Allocate the pool in CPR in case of Confidential Compute
2814         // When Hopper Confidential Compute is enabled, page tables
2815         // cannot be in non-CPR region
2816         //
2817         if (gpuIsCCFeatureEnabled(pGpu) && (status == NV_OK))
2818         {
2819             rmMemPoolAllocateProtectedMemory(pMemoryManager->pPageLevelReserve, NV_TRUE);
2820         }
2821     }
2822     return status;
2823 }
2824 
2825 /*!
2826  * @brief   Destroys the SW state of the page level pools.
2827  *
2828  * @param   pGpu
2829  * @param   pMemoryManager
2830  *
2831  * @returns
2832  */
2833 void
2834 memmgrPageLevelPoolsDestroy_IMPL
2835 (
2836     OBJGPU        *pGpu,
2837     MemoryManager *pMemoryManager
2838 )
2839 {
2840     if (RMCFG_FEATURE_PMA &&
2841         memmgrIsPmaInitialized(pMemoryManager) &&
2842         memmgrAreClientPageTablesPmaManaged(pMemoryManager))
2843     {
2844         rmMemPoolDestroy(pMemoryManager->pPageLevelReserve);
2845         pMemoryManager->pPageLevelReserve = NULL;
2846     }
2847 }
2848 
2849 /*!
2850  * @brief   Gets page level pool to use
2851  *
2852  * @param       pGpu
2853  * @param       pMemoryManager
2854  * @param[in]   hClient         client handle
2855  * @param[out]  ppMemPoolInfo   page level pool
2856  *
2857  * @returns On success, returns NV_OK.
2858  *          On failure, returns error code.
2859  */
2860 NV_STATUS
2861 memmgrPageLevelPoolsGetInfo_IMPL
2862 (
2863     OBJGPU        *pGpu,
2864     MemoryManager *pMemoryManager,
2865     Device        *pDevice,
2866     RM_POOL_ALLOC_MEM_RESERVE_INFO **ppMemPoolInfo
2867 )
2868 {
2869     KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
2870     NvBool bMemPartitioningEnabled = (pKernelMIGManager != NULL) && kmigmgrIsMIGMemPartitioningEnabled(pGpu, pKernelMIGManager);
2871     RM_POOL_ALLOC_MEM_RESERVE_INFO *pMemPool = NULL;
2872     NV_ASSERT_OR_RETURN(ppMemPoolInfo != NULL, NV_ERR_INVALID_ARGUMENT);
2873 
2874     if (!memmgrIsPmaInitialized(pMemoryManager) ||
2875         !memmgrAreClientPageTablesPmaManaged(pMemoryManager))
2876     {
2877         return NV_ERR_INVALID_STATE;
2878     }
2879 
2880     // If memory partitioning is enabled, then use per-partition pool allocator
2881     if (bMemPartitioningEnabled)
2882     {
2883         MIG_INSTANCE_REF ref;
2884         NV_ASSERT_OK_OR_RETURN(
2885             kmigmgrGetInstanceRefFromDevice(pGpu, pKernelMIGManager, pDevice, &ref));
2886         pMemPool = ref.pKernelMIGGpuInstance->pPageTableMemPool;
2887     }
2888     else
2889     {
2890         pMemPool = pMemoryManager->pPageLevelReserve;
2891     }
2892     NV_ASSERT_OR_RETURN(pMemPool != NULL, NV_ERR_INVALID_STATE);
2893 
2894     *ppMemPoolInfo = pMemPool;
2895     return NV_OK;
2896 }
2897 
2898 /*!
2899  * @brief Initialize the PMA object
2900  *
2901  * @param       pGpu
2902  * @param       pMemoryManager
2903  * @param[in]   pPma         Pointer to the PMA object to init
2904  *
2905  * @returns On success, returns NV_OK.
2906  *          On failure, returns error code.
2907  */
2908 NV_STATUS
2909 memmgrPmaInitialize_IMPL
2910 (
2911     OBJGPU        *pGpu,
2912     MemoryManager *pMemoryManager,
2913     PMA           *pPma
2914 )
2915 {
2916     NvU32 pmaInitFlags = PMA_INIT_NONE;
2917     NV_STATUS status = NV_OK;
2918     NvBool bNumaEnabled = osNumaOnliningEnabled(pGpu->pOsGpuInfo);
2919 
2920     NV_ASSERT(memmgrIsPmaEnabled(pMemoryManager) &&
2921               memmgrIsPmaSupportedOnPlatform(pMemoryManager));
2922 
2923     if (memmgrIsPmaForcePersistence(pMemoryManager))
2924     {
2925         pmaInitFlags |= PMA_INIT_FORCE_PERSISTENCE;
2926     }
2927 
2928     if (memmgrIsScrubOnFreeEnabled(pMemoryManager))
2929     {
2930         pmaInitFlags |= PMA_INIT_SCRUB_ON_FREE;
2931     }
2932 
2933     // Disable client page table management on SLI.
2934     if (IsSLIEnabled(pGpu))
2935     {
2936         memmgrSetClientPageTablesPmaManaged(pMemoryManager, NV_FALSE);
2937     }
2938 
2939     if (bNumaEnabled)
2940     {
2941         NV_PRINTF(LEVEL_INFO, "Initializing PMA with NUMA flag.\n");
2942         pmaInitFlags |= PMA_INIT_NUMA;
2943     }
2944 
2945     if (gpuIsSelfHosted(pGpu))
2946     {
2947         NV_PRINTF(LEVEL_INFO, "Initializing PMA with NUMA_AUTO_ONLINE flag.\n");
2948         pmaInitFlags |= PMA_INIT_NUMA_AUTO_ONLINE;
2949     }
2950 
2951     if (memmgrIsPmaAddrTree(pMemoryManager))
2952     {
2953         pmaInitFlags |= PMA_INIT_ADDRTREE;
2954     }
2955 
2956     status = pmaInitialize(pPma, pmaInitFlags);
2957     if (status != NV_OK)
2958     {
2959         NV_PRINTF(LEVEL_ERROR, "Failed to initialize PMA!\n");
2960         return status;
2961     }
2962 
2963     if (bNumaEnabled)
2964     {
2965         KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
2966 
2967         NvU32 numaSkipReclaimVal = NV_REG_STR_RM_NUMA_ALLOC_SKIP_RECLAIM_PERCENTAGE_DEFAULT;
2968 
2969         if (osReadRegistryDword(pGpu, NV_REG_STR_RM_NUMA_ALLOC_SKIP_RECLAIM_PERCENTAGE, &numaSkipReclaimVal) == NV_OK)
2970         {
2971             if (numaSkipReclaimVal > NV_REG_STR_RM_NUMA_ALLOC_SKIP_RECLAIM_PERCENTAGE_MAX)
2972             {
2973                 numaSkipReclaimVal = NV_REG_STR_RM_NUMA_ALLOC_SKIP_RECLAIM_PERCENTAGE_MAX;
2974             }
2975         }
2976         pmaNumaSetReclaimSkipThreshold(pPma, numaSkipReclaimVal);
2977 
2978         // Full FB memory is added and onlined already
2979         if (pKernelMemorySystem->memPartitionNumaInfo[0].bInUse)
2980         {
2981             NV_ASSERT_OK_OR_RETURN(pmaNumaOnlined(pPma, pGpu->numaNodeId,
2982                                                   pKernelMemorySystem->coherentCpuFbBase,
2983                                                   pKernelMemorySystem->numaOnlineSize));
2984         }
2985 
2986     }
2987 
2988     return NV_OK;
2989 }
2990 
2991 NV_STATUS
2992 memmgrInitFbRegions_IMPL
2993 (
2994     OBJGPU        *pGpu,
2995     MemoryManager *pMemoryManager
2996 )
2997 {
2998     NV_ASSERT_OR_RETURN(pMemoryManager->Ram.numFBRegions == 0, NV_ERR_INVALID_STATE);
2999 
3000     // Dont setup regions if FB is broken and we aren't using L2 cache as "FB".
3001     if ((pGpu->getProperty(pGpu, PDB_PROP_GPU_BROKEN_FB) &&
3002          !gpuIsCacheOnlyModeEnabled(pGpu)))
3003         return NV_OK;
3004 
3005     NV_ASSERT_OK_OR_RETURN(memmgrInitBaseFbRegions_HAL(pGpu, pMemoryManager));
3006 
3007     NV_ASSERT_OK_OR_RETURN(memmgrInitFbRegionsHal_HAL(pGpu, pMemoryManager));
3008 
3009     //
3010     // Build a list of regions sorted by allocation priority
3011     // (highest to lowest). Used for allocations using ObjHeap.
3012     //
3013     memmgrRegenerateFbRegionPriority(pGpu, pMemoryManager);
3014 
3015     if (RMCFG_FEATURE_PLATFORM_WINDOWS_LDDM)
3016     {
3017         if (pGpu->getProperty(pGpu, PDB_PROP_GPU_EXTERNAL_HEAP_CONTROL))
3018         {
3019             // KMD in WDDM mode
3020             if (pMemoryManager->bMixedDensityFbp)
3021             {
3022                 //
3023                 // For mixed memory on LDDM platforms, when we are using kernel-managed
3024                 // heap (not TCC mode), we want to prefer allocating in slow memory to conserve
3025                 // fast memory for applications.
3026                 //
3027                 pMemoryManager->bPreferSlowRegion = NV_TRUE;
3028             }
3029         }
3030     }
3031 
3032     NV_ASSERT_OK_OR_RETURN(memmgrSetPlatformPmaSupport(pGpu, pMemoryManager));
3033 
3034     return NV_OK;
3035 }
3036 
3037 /*!
3038  * @brief Register regions to the PMA object
3039  *
3040  * @param       pGpu
3041  * @param       pMemoryManager
3042  * @param[in]   pPma         Pointer to the PMA object to register with
3043  *
3044  * @returns On success, returns NV_OK.
3045  *          On failure, returns error code.
3046  */
3047 NV_STATUS
3048 memmgrPmaRegisterRegions_IMPL
3049 (
3050     OBJGPU        *pGpu,
3051     MemoryManager *pMemoryManager,
3052     Heap          *pHeap,
3053     PMA           *pPma
3054 )
3055 {
3056     HEAP_TYPE_INTERNAL heapType = pHeap->heapType;
3057     PMA_REGION_DESCRIPTOR pmaRegion;
3058     NvU32 pmaRegionIdx = 0;
3059     NvU32 i;
3060     PMA_BLACKLIST_ADDRESS *pBlacklistPages = NULL;
3061     NvU32 blRegionCount = 0;
3062     NvU32 blPageIndex;
3063     NvU32 blackListCount;
3064     NvU64 base, size;
3065     NvU64 pmaTotalMemorySize = 0;
3066     NV_STATUS status = NV_OK;
3067     const MEMORY_SYSTEM_STATIC_CONFIG *pMemsysConfig =
3068                kmemsysGetStaticConfig(pGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu));
3069 
3070     blackListCount = pHeap->blackListAddresses.count;
3071     base = pHeap->base;
3072     size = pHeap->total;
3073 
3074     //
3075     // If there are blacklisted pages, prepare a staging buffer to pass the
3076     // per-region blacklisted pages to PMA
3077     //
3078     if (blackListCount > 0)
3079     {
3080         pBlacklistPages = portMemAllocNonPaged(
3081                             sizeof(PMA_BLACKLIST_ADDRESS) * blackListCount);
3082         if (pBlacklistPages == NULL)
3083         {
3084             NV_PRINTF(LEVEL_ERROR,
3085                       "Could not allocate memory for blackList!\n");
3086             status = NV_ERR_NO_MEMORY;
3087             goto _pmaInitFailed;
3088         }
3089     }
3090 
3091     for (i = 0; i < pMemoryManager->Ram.numFBRegions; i++)
3092     {
3093         //
3094         // Skip all regions that are completely outside the heap boundry
3095         // OR marked as internal(used for internal RM allocations)
3096         // OR marked as reserved(used for console, display, link training buffer etc.)
3097         //
3098         if ((pMemoryManager->Ram.fbRegion[i].limit < base ||
3099              pMemoryManager->Ram.fbRegion[i].base >= (base + size)) ||
3100             (pMemoryManager->Ram.fbRegion[i].bInternalHeap) ||
3101             (pMemoryManager->Ram.fbRegion[i].bRsvdRegion))
3102         {
3103             continue;
3104         }
3105 
3106         NV_PRINTF(LEVEL_INFO,
3107                   "PMA: Register FB region[%d] %llx..%llx EXTERNAL\n", i,
3108                   pMemoryManager->Ram.fbRegion[i].base, pMemoryManager->Ram.fbRegion[i].limit);
3109 
3110         pmaRegion.base              = pMemoryManager->Ram.fbRegion[i].base;
3111         pmaRegion.limit             = pMemoryManager->Ram.fbRegion[i].limit;
3112 
3113         // Check if the base of managed memory is not based at FB region base.
3114         if (pmaRegion.base < base)
3115         {
3116             pmaRegion.base = base;
3117         }
3118 
3119         // check if limit of managed memory is less than FB region limit
3120         if (pmaRegion.limit >= (base + size))
3121         {
3122             pmaRegion.limit = base + size - 1;
3123         }
3124 
3125         pmaRegion.performance        = pMemoryManager->Ram.fbRegion[i].performance;
3126         pmaRegion.bSupportCompressed = pMemoryManager->Ram.fbRegion[i].bSupportCompressed;
3127         pmaRegion.bSupportISO        = pMemoryManager->Ram.fbRegion[i].bSupportISO;
3128         pmaRegion.bProtected         = pMemoryManager->Ram.fbRegion[i].bProtected;
3129 
3130         //
3131         // Now we know the region, find if it has any blacklisted pages
3132         // TODO: Try to coalesce to unique 64K pages
3133         //
3134         blRegionCount = 0;
3135         if (pBlacklistPages != NULL)
3136         {
3137             for (blPageIndex = 0; blPageIndex < blackListCount; blPageIndex++)
3138             {
3139                 if ((pHeap->blackListAddresses.data[blPageIndex].address
3140                             != NV2080_CTRL_FB_OFFLINED_PAGES_INVALID_ADDRESS) &&
3141                     (pHeap->blackListAddresses.data[blPageIndex].address >= pmaRegion.base) &&
3142                     (pHeap->blackListAddresses.data[blPageIndex].address <= pmaRegion.limit))
3143                 {
3144                     // Collect the region's blacklisted pages
3145                     pBlacklistPages[blRegionCount].physOffset = pHeap->blackListAddresses.data[blPageIndex].address;
3146 
3147                     pBlacklistPages[blRegionCount].bIsDynamic =
3148                             ((pHeap->blackListAddresses.data[blPageIndex].type ==
3149                                 NV2080_CTRL_FB_OFFLINED_PAGES_SOURCE_DPR_MULTIPLE_SBE) ||
3150                             (pHeap->blackListAddresses.data[blPageIndex].type ==
3151                                 NV2080_CTRL_FB_OFFLINED_PAGES_SOURCE_DPR_DBE));
3152 
3153                     blRegionCount++;
3154                 }
3155             }
3156         }
3157 
3158         pmaTotalMemorySize += (pmaRegion.limit - pmaRegion.base + 1);
3159         NV_PRINTF(LEVEL_INFO,
3160                   "Register FB region %llx..%llx of size %llx with PMA\n",
3161                   pmaRegion.base, pmaRegion.limit,
3162                   pmaRegion.limit - pmaRegion.base + 1);
3163         //
3164         // Register the region for PMA management, and note if asynchronous
3165         // scrubbing is enabled.  Synchronous scrubbing is done before
3166         // heap/PMA is initialized, but asynchronously scrubbed pages will
3167         // need to be unmarked once they are scrubbed.
3168         //
3169         status = pmaRegisterRegion(pPma, pmaRegionIdx,
3170                     memmgrEccScrubInProgress_HAL(pGpu, pMemoryManager),
3171                     &pmaRegion, blRegionCount,
3172                     ((blRegionCount==0) ? NULL : pBlacklistPages));
3173         if (status != NV_OK)
3174         {
3175             NV_PRINTF(LEVEL_ERROR,
3176                       "failed to register FB region %llx..%llx with PMA\n",
3177                       pmaRegion.base, pmaRegion.limit);
3178             DBG_BREAKPOINT();
3179             goto _pmaInitFailed;
3180         }
3181         pmaRegionIdx++;
3182     }
3183 
3184     if (gpuIsSelfHosted(pGpu) && osNumaOnliningEnabled(pGpu->pOsGpuInfo))
3185     {
3186         //
3187         // NUMA onlined memory size should not exceed memory size assigned to PMA.
3188         // TODO : Currently in selfhosted and P9+GV100 systems numaOnlined size is less
3189         // than PMA Memory Size. Ideally both of them should be identical. Bug 4051320.
3190         //
3191         NvU64 numaTotalSize = 0;
3192         NvU64 numaFreeSize = 0;
3193         osGetNumaMemoryUsage(pPma->numaNodeId, &numaFreeSize, &numaTotalSize);
3194         NV_ASSERT_OR_RETURN(pmaTotalMemorySize >= numaTotalSize, NV_ERR_INVALID_STATE);
3195     }
3196     //
3197     // bug #200354346, make sure the RM reserved region(s) are
3198     // scrubbed during the region creation itself. Top Down scrubber,
3199     // skips the RM reserved region(s) because the assumption is, they
3200     // are pre-scrubbed.
3201     //
3202     if (heapType != HEAP_TYPE_PARTITION_LOCAL)
3203         memmgrScrubInternalRegions_HAL(pGpu, pMemoryManager);
3204 
3205 _pmaInitFailed:
3206     portMemFree(pBlacklistPages);
3207 
3208     if ((status == NV_OK) && (pMemsysConfig->fbOverrideStartKb != 0))
3209     {
3210         NvU64 allocSize = NV_ALIGN_UP(((NvU64)pMemsysConfig->fbOverrideStartKb << 10), PMA_GRANULARITY);
3211         NvU32 numPages  = (NvU32)(allocSize >> PMA_PAGE_SHIFT);
3212         PMA_ALLOCATION_OPTIONS allocOptions = {0};
3213 
3214         allocOptions.flags     = PMA_ALLOCATE_CONTIGUOUS;
3215         allocOptions.flags    |= PMA_ALLOCATE_SPECIFY_ADDRESS_RANGE;
3216         allocOptions.physBegin = 0;
3217         allocOptions.physEnd   = allocSize - 1;
3218 
3219         // This is intentionally thrown away
3220         NvU64 *pPages = NULL;
3221         pPages = portMemAllocNonPaged(numPages * sizeof(NvU64));
3222         if (pPages != NULL)
3223         {
3224             // Accommodate the regkey override for FB start
3225             status = pmaAllocatePages(pPma, numPages, _PMA_64KB, &allocOptions, pPages);
3226             portMemFree(pPages);
3227         }
3228     }
3229     if (status != NV_OK)
3230     {
3231         if (memmgrIsPmaInitialized(pMemoryManager))
3232         {
3233             if (heapType != HEAP_TYPE_PARTITION_LOCAL)
3234             {
3235                 memmgrSetPmaInitialized(pMemoryManager, NV_FALSE);
3236             }
3237             pmaDestroy(pPma);
3238         }
3239     }
3240 
3241     return status;
3242 }
3243 
3244 /*!
3245  * @brief Allocate internal handles for MIG partition memory allocation
3246  */
3247 NV_STATUS
3248 memmgrAllocMIGMemoryAllocationInternalHandles_IMPL
3249 (
3250     OBJGPU *pGpu,
3251     MemoryManager *pMemoryManager
3252 )
3253 {
3254     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
3255 
3256     NV_ASSERT_OR_RETURN(pMemoryManager->MIGMemoryPartitioningInfo.hClient == NV01_NULL_OBJECT, NV_ERR_INVALID_STATE);
3257     NV_ASSERT_OK_OR_RETURN(
3258         rmapiutilAllocClientAndDeviceHandles(pRmApi, pGpu,
3259                                              &pMemoryManager->MIGMemoryPartitioningInfo.hClient,
3260                                              &pMemoryManager->MIGMemoryPartitioningInfo.hDevice,
3261                                              &pMemoryManager->MIGMemoryPartitioningInfo.hSubdevice));
3262 
3263     return NV_OK;
3264 }
3265 
3266 /*!
3267  * @brief Free internal handles used to support MIG memory partitioning
3268  */
3269 void
3270 memmgrFreeMIGMemoryAllocationInternalHandles_IMPL
3271 (
3272     OBJGPU *pGpu,
3273     MemoryManager *pMemoryManager
3274 )
3275 {
3276     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
3277 
3278     rmapiutilFreeClientAndDeviceHandles(pRmApi,
3279                                         &pMemoryManager->MIGMemoryPartitioningInfo.hClient,
3280                                         &pMemoryManager->MIGMemoryPartitioningInfo.hDevice,
3281                                         &pMemoryManager->MIGMemoryPartitioningInfo.hSubdevice);
3282 }
3283 
3284 /*!
3285  * @brief Gets free memory (client visible) for all valid GPU instances
3286  */
3287 void
3288 memmgrGetFreeMemoryForAllMIGGPUInstances_IMPL
3289 (
3290     OBJGPU *pGpu,
3291     MemoryManager *pMemoryManager,
3292     NvU64 *pBytes
3293 )
3294 {
3295     NvU64 val = 0;
3296     Heap *pHeap = NULL;
3297     KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
3298     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGPUInstance;
3299 
3300     *pBytes = 0;
3301 
3302     FOR_EACH_VALID_GPU_INSTANCE(pGpu, pKernelMIGManager, pKernelMIGGPUInstance)
3303     {
3304         NV_ASSERT(pKernelMIGGPUInstance->pMemoryPartitionHeap != NULL);
3305         pHeap = pKernelMIGGPUInstance->pMemoryPartitionHeap;
3306 
3307         if (memmgrIsPmaInitialized(pMemoryManager))
3308             pmaGetFreeMemory(&pHeap->pmaObject, &val);
3309         else
3310             heapGetFree(pHeap, &val);
3311 
3312         *pBytes += val;
3313     }
3314     FOR_EACH_VALID_GPU_INSTANCE_END();
3315 }
3316 
3317 /*!
3318  * @brief Gets total memory for all valid GPU instances
3319  *
3320  * @param       pGpu
3321  * @param       pMemoryManager
3322  * @param[out]  pBytes          pointer to the total memory
3323  *
3324  */
3325 void
3326 memmgrGetTotalMemoryForAllMIGGPUInstances_IMPL
3327 (
3328     OBJGPU *pGpu,
3329     MemoryManager *pMemoryManager,
3330     NvU64 *pBytes
3331 )
3332 {
3333     NvU64 val = 0;
3334     Heap *pHeap = NULL;
3335     KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
3336     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGPUInstance;
3337 
3338     *pBytes = 0;
3339 
3340     FOR_EACH_VALID_GPU_INSTANCE(pGpu, pKernelMIGManager, pKernelMIGGPUInstance)
3341     {
3342         NV_ASSERT(pKernelMIGGPUInstance->pMemoryPartitionHeap != NULL);
3343         pHeap = pKernelMIGGPUInstance->pMemoryPartitionHeap;
3344 
3345         if (memmgrIsPmaInitialized(pMemoryManager))
3346             pmaGetTotalMemory(&pHeap->pmaObject, &val);
3347         else
3348             heapGetSize(pHeap, &val);
3349 
3350         *pBytes += val;
3351     }
3352     FOR_EACH_VALID_GPU_INSTANCE_END();
3353 }
3354 
3355 void
3356 memmgrGetTopLevelScrubberStatus_IMPL
3357 (
3358     OBJGPU *pGpu,
3359     MemoryManager *pMemoryManager,
3360     NvBool *pbTopLevelScrubberEnabled,
3361     NvBool *pbTopLevelScrubberConstructed
3362 )
3363 {
3364     NvBool bTopLevelScrubberEnabled = NV_FALSE;
3365     NvBool bTopLevelScrubberConstructed = NV_FALSE;
3366     NvU32 pmaConfigs = PMA_QUERY_SCRUB_ENABLED | PMA_QUERY_SCRUB_VALID;
3367 
3368     if (memmgrIsPmaInitialized(pMemoryManager))
3369     {
3370         Heap *pHeap = GPU_GET_HEAP(pGpu);
3371         NV_ASSERT_OK(pmaQueryConfigs(&pHeap->pmaObject, &pmaConfigs));
3372         bTopLevelScrubberEnabled = (pmaConfigs & PMA_QUERY_SCRUB_ENABLED) != 0x0;
3373         bTopLevelScrubberConstructed = (pmaConfigs & PMA_QUERY_SCRUB_VALID) != 0x0;
3374     }
3375 
3376     if (pbTopLevelScrubberEnabled != NULL)
3377         *pbTopLevelScrubberEnabled = bTopLevelScrubberEnabled;
3378     if (pbTopLevelScrubberConstructed != NULL)
3379         *pbTopLevelScrubberConstructed = bTopLevelScrubberConstructed;
3380 }
3381 
3382 /*!
3383  * @brief       Return the full address range for the partition assigend for the vGPU.
3384  *
3385  * @param[in]   pGpu
3386  * @param[in]   pMemoryManager
3387  * @param[out]  base           reference to the base address of the partition
3388  * @param[out]  size           reference to the overall size of the partition
3389  */
3390 static void
3391 _memmgrGetFullMIGAddrRange
3392 (
3393     OBJGPU *pGpu,
3394     MemoryManager *pMemoryManager,
3395     NvU64 *base,
3396     NvU64 *size
3397 )
3398 {
3399     NvU32 i;
3400     NvU64 lo, hi;
3401 
3402     *base = 0;
3403     *size = 0;
3404     if (pMemoryManager->Ram.numFBRegions == 0)
3405     {
3406         return;
3407     }
3408 
3409     lo = pMemoryManager->Ram.fbRegion[0].base;
3410     hi = pMemoryManager->Ram.fbRegion[0].limit;
3411 
3412     for (i = 1; i < pMemoryManager->Ram.numFBRegions; i++)
3413     {
3414         if (pMemoryManager->Ram.fbRegion[i].base < lo)
3415         {
3416             lo = pMemoryManager->Ram.fbRegion[i].base;
3417         }
3418 
3419         if (pMemoryManager->Ram.fbRegion[i].limit > hi)
3420         {
3421             hi = pMemoryManager->Ram.fbRegion[i].limit;
3422         }
3423     }
3424 
3425     *base = lo;
3426     *size = hi - lo + 1;
3427 }
3428 
3429 /*!
3430  * @brief Discover MIG partitionable memory range based on PMA status
3431  */
3432 NV_STATUS
3433 memmgrDiscoverMIGPartitionableMemoryRange_VF
3434 (
3435     OBJGPU *pGpu,
3436     MemoryManager *pMemoryManager,
3437     NV_RANGE *pMemoryRange
3438 )
3439 {
3440     NvU64 size;
3441     NvU64 base;
3442 
3443     // Set memory information
3444     if (!memmgrIsPmaInitialized(pMemoryManager))
3445     {
3446         Heap *pHeap = GPU_GET_HEAP(pGpu);
3447         NvU64 freeMem;
3448         NvU64 bytesTotal;
3449         NvU64 offset;
3450 
3451         NV_ASSERT_OK_OR_RETURN(heapInfo(pHeap, &freeMem, &bytesTotal, &base,
3452                                         &offset, &size));
3453 
3454         //
3455         // offset is the starting address of biggest empty block whose size is
3456         // returned and we care about the base of largest empty block
3457         //
3458         base = offset;
3459     }
3460     else
3461     {
3462         //
3463         // In the case of vGPU, pmaGetLargestFree only returns the user-visible
3464         // PMA region and not the reserved/internal regions that constitute the
3465         // overall partition size assigned to the vGPU.
3466         // This is misleading as pMemoryManager->partitionableMemoryRange is expected to
3467         // represent the actual partition size.
3468         //
3469         _memmgrGetFullMIGAddrRange(pGpu, pMemoryManager, &base, &size);
3470     }
3471 
3472     *pMemoryRange = rangeMake(base, base + size - 1);
3473 
3474     return NV_OK;
3475 }
3476 
3477 NV_STATUS
3478 memmgrValidateFBEndReservation_PF
3479 (
3480     OBJGPU *pGpu,
3481     MemoryManager *pMemoryManager
3482 )
3483 {
3484     NV_STATUS status;
3485 
3486     NV_ASSERT_TRUE_OR_GOTO(status,
3487         (pGpu != NULL) &&
3488         (pMemoryManager != NULL),
3489         NV_ERR_INVALID_ARGUMENT,
3490         memmgrValidateFBEndReservation_PF_exit);
3491 
3492     // If we reserved more memory from RM than we previously estimated
3493     if (pMemoryManager->rsvdMemorySize > memmgrGetFBEndReserveSizeEstimate_HAL(pGpu, pMemoryManager))
3494     {
3495         NV_PRINTF(LEVEL_ERROR,
3496             "End of FB reservation was not enough (%u vs %u). Failing to boot.\n",
3497             memmgrGetFBEndReserveSizeEstimate_HAL(pGpu, pMemoryManager),
3498             pMemoryManager->rsvdMemorySize);
3499 
3500         NV_ASSERT_OK_OR_GOTO(status,
3501             NV_ERR_INSUFFICIENT_RESOURCES,
3502             memmgrValidateFBEndReservation_PF_exit);
3503     }
3504 
3505 memmgrValidateFBEndReservation_PF_exit:
3506     return status;
3507 }
3508 
3509 NV_STATUS
3510 memmgrReserveMemoryForPmu_MONOLITHIC
3511 (
3512     OBJGPU *pGpu,
3513     MemoryManager *pMemoryManager
3514 )
3515 {
3516     NV_STATUS status = NV_OK;
3517 
3518     return status;
3519 }
3520 
3521 
3522 NV_STATUS
3523 memmgrReserveMemoryForFsp_IMPL
3524 (
3525     OBJGPU *pGpu,
3526     MemoryManager *pMemoryManager
3527 )
3528 {
3529     KernelFsp *pKernelFsp = GPU_GET_KERNEL_FSP(pGpu);
3530 
3531     //
3532     // If we sent FSP commands to boot ACR, we need to allocate the surfaces
3533     // used by FSP and ACR as WPR/FRTS here from the reserved heap
3534     //
3535     if (pKernelFsp && (!pKernelFsp->getProperty(pKernelFsp, PDB_PROP_KFSP_DISABLE_FRTS_VIDMEM) &&
3536         (pKernelFsp->getProperty(pKernelFsp, PDB_PROP_KFSP_BOOT_COMMAND_OK))))
3537     {
3538 
3539         // For GSP-RM flow, we don't need to allocate WPR since it is handled by CPU
3540         if (pKernelFsp->getProperty(pKernelFsp, PDB_PROP_KFSP_GSP_MODE_GSPRM))
3541         {
3542             return NV_OK;
3543         }
3544 
3545     }
3546     return NV_OK;
3547 }
3548 
3549 NvU64
3550 memmgrGetVgpuHostRmReservedFb_KERNEL
3551 (
3552     OBJGPU         *pGpu,
3553     MemoryManager  *pMemoryManager,
3554     NvU32           vgpuTypeId
3555 )
3556 {
3557     RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
3558     NV2080_CTRL_INTERNAL_MEMMGR_GET_VGPU_CONFIG_HOST_RESERVED_FB_PARAMS params = {0};
3559 
3560     params.vgpuTypeId = vgpuTypeId;
3561     // Send to GSP to get amount of FB reserved for the host
3562     NV_ASSERT_OK_OR_RETURN(pRmApi->Control(pRmApi,
3563                                            pGpu->hInternalClient,
3564                                            pGpu->hInternalSubdevice,
3565                                            NV2080_CTRL_CMD_INTERNAL_MEMMGR_GET_VGPU_CONFIG_HOST_RESERVED_FB,
3566                                            &params,
3567                                            sizeof(params)));
3568     return params.hostReservedFb;
3569 }
3570 
3571 /*!
3572  * @brief   Memory Manager State post load
3573  *
3574  * @param[in]       pGpu           GPU pointer
3575  * @param[in/out]   pMemoryManager MemoryManager pointer
3576  * @param[in]       flags          State transition flags
3577  *
3578  * @returns On success, returns NV_OK.
3579  *          On failure, returns error code.
3580  */
3581 NV_STATUS
3582 memmgrStatePostLoad_IMPL
3583 (
3584     OBJGPU *pGpu,
3585     MemoryManager *pMemoryManager,
3586     NvU32 flags
3587 )
3588 {
3589     if (memmgrIsLocalEgmSupported(pMemoryManager))
3590     {
3591         NvU64 egmPhysAddr, egmSize;
3592         NvS32 egmNodeId;
3593         NvU32 data32;
3594         KernelBif *pKernelBif = GPU_GET_KERNEL_BIF(pGpu);
3595 
3596         pMemoryManager->localEgmNodeId = -1;
3597         if (gpuIsSelfHosted(pGpu) &&
3598             pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_IS_C2C_LINK_UP) &&    // EGM can be enabled only in C2C path.
3599             (osGetEgmInfo(pGpu, &egmPhysAddr, &egmSize, &egmNodeId) == NV_OK) &&
3600             (egmSize != 0))
3601         {
3602             pMemoryManager->localEgmBasePhysAddr = egmPhysAddr;
3603             pMemoryManager->localEgmSize = egmSize;
3604             pMemoryManager->localEgmNodeId = egmNodeId;
3605             //
3606             // Using fixed Peer ID 7 for local EGM so that vGPU
3607             // migration doesn't fail because of peer id conflict in
3608             // the new host system.
3609             //
3610             pMemoryManager->localEgmPeerId = 7;
3611             pMemoryManager->bLocalEgmEnabled = NV_TRUE;
3612         }
3613 
3614         //
3615         // regkey can override the production flow values.
3616         // Note that this could cause an issue with vGPU migration
3617         // if one host system uses regkey to override the EGM peer id
3618         // and other host system doesn't.
3619         //
3620         if (osReadRegistryDword(pGpu, NV_REG_STR_RM_ENABLE_LOCAL_EGM_PEER_ID, &data32) == NV_OK)
3621         {
3622             pMemoryManager->bLocalEgmEnabled = NV_TRUE;
3623             pMemoryManager->localEgmPeerId = data32;
3624         }
3625     }
3626 
3627     //
3628     // Reserve the peerID used for local EGM so that the peerID isn't
3629     // resused for other peer Gpus.
3630     //
3631     if (memmgrIsLocalEgmEnabled(pMemoryManager))
3632     {
3633         if (kbusReserveP2PPeerIds_HAL(pGpu, GPU_GET_KERNEL_BUS(pGpu), NVBIT(pMemoryManager->localEgmPeerId)) == NV_OK)
3634         {
3635             {
3636                 NV2080_CTRL_INTERNAL_HSHUB_EGM_CONFIG_PARAMS params = { 0 };
3637                 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
3638                 NV_STATUS status;
3639 
3640                 params.egmPeerId = pMemoryManager->localEgmPeerId;
3641 
3642                 // Call physical HSHUB to program the EGM PeerId settings.
3643 
3644                 status = pRmApi->Control(pRmApi,
3645                                          pGpu->hInternalClient,
3646                                          pGpu->hInternalSubdevice,
3647                                          NV2080_CTRL_CMD_INTERNAL_HSHUB_EGM_CONFIG,
3648                                          &params,
3649                                          sizeof(params));
3650                 if (status != NV_OK)
3651                 {
3652                     NV_PRINTF(LEVEL_ERROR, "HSHUB programming failed for EGM Peer ID: %u\n",
3653                               pMemoryManager->localEgmPeerId);
3654                     pMemoryManager->bLocalEgmEnabled = NV_FALSE;
3655                     pMemoryManager->localEgmPeerId = BUS_INVALID_PEER;
3656                     return status;
3657                 }
3658             }
3659         }
3660         else
3661         {
3662             NV_PRINTF(LEVEL_ERROR,
3663                       "Peer ID specified for local EGM already in use!\n");
3664             pMemoryManager->bLocalEgmEnabled = NV_FALSE;
3665             pMemoryManager->localEgmPeerId = BUS_INVALID_PEER;
3666             return NV_ERR_INVALID_ARGUMENT;
3667         }
3668     }
3669     else
3670     {
3671         pMemoryManager->localEgmPeerId = BUS_INVALID_PEER;
3672     }
3673     return NV_OK;
3674 }
3675 
3676 NV_STATUS
3677 memmgrInitCeUtils_IMPL
3678 (
3679     MemoryManager *pMemoryManager,
3680     NvBool         bFifoLite
3681 )
3682 {
3683     OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager);
3684     NV0050_ALLOCATION_PARAMETERS ceUtilsParams = {0};
3685 
3686     NV_ASSERT_OR_RETURN(pMemoryManager->pCeUtils == NULL, NV_ERR_INVALID_STATE);
3687 
3688     if (!bFifoLite && pMemoryManager->pCeUtilsSuspended != NULL)
3689     {
3690         pMemoryManager->pCeUtils = pMemoryManager->pCeUtilsSuspended;
3691         pMemoryManager->pCeUtilsSuspended = NULL;
3692         return NV_OK;
3693     }
3694 
3695     ceUtilsParams.flags = bFifoLite ? DRF_DEF(0050_CEUTILS, _FLAGS, _FIFO_LITE, _TRUE) : 0;
3696 
3697     NV_ASSERT_OK_OR_RETURN(objCreate(&pMemoryManager->pCeUtils, pMemoryManager, CeUtils, ENG_GET_GPU(pMemoryManager), NULL, &ceUtilsParams));
3698 
3699     NV_STATUS status = memmgrTestCeUtils(pGpu, pMemoryManager);
3700     NV_ASSERT_OK(status);
3701     if (status != NV_OK)
3702     {
3703         memmgrDestroyCeUtils(pMemoryManager, NV_FALSE);
3704     }
3705 
3706     return status;
3707 }
3708 
3709 void
3710 memmgrDestroyCeUtils_IMPL
3711 (
3712     MemoryManager *pMemoryManager,
3713     NvBool         bSuspendCeUtils
3714 )
3715 {
3716     if (bSuspendCeUtils)
3717     {
3718         NV_ASSERT_OR_RETURN_VOID(pMemoryManager->pCeUtilsSuspended == NULL);
3719         pMemoryManager->pCeUtilsSuspended = pMemoryManager->pCeUtils;
3720     }
3721     else
3722     {
3723         objDelete(pMemoryManager->pCeUtils);
3724     }
3725     pMemoryManager->pCeUtils = NULL;
3726 }
3727