1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "os/os.h"
25 #include "gpu/mem_sys/kern_mem_sys.h"
26 #include "gpu/mem_mgr/mem_mgr.h"
27 #include "virtualization/hypervisor/hypervisor.h"
28 #include "vgpu/vgpu_events.h"
29 #include "objrpc.h"
30 #include "gpu/bif/kernel_bif.h"
31 #include "gpu/bus/kern_bus.h"
32 #include "os/os.h"
33 #include "nvRmReg.h"
34 #include "gpu/gsp/gsp_static_config.h"
35 
36 static void
37 kmemsysInitRegistryOverrides
38 (
39     OBJGPU *pGpu,
40     KernelMemorySystem *pKernelMemorySystem
41 )
42 {
43     NvU32 data32;
44 
45     //
46     // Bug 1032432. Check regkey for FB pull
47     //
48     if (osReadRegistryDword(pGpu, NV_REG_STR_RM_L2_CLEAN_FB_PULL, &data32) == NV_OK)
49     {
50         if (data32 == NV_REG_STR_RM_L2_CLEAN_FB_PULL_DISABLED)
51             pKernelMemorySystem->bL2CleanFbPull = NV_FALSE;
52     }
53 }
54 
55 NV_STATUS
56 kmemsysConstructEngine_IMPL
57 (
58     OBJGPU             *pGpu,
59     KernelMemorySystem *pKernelMemorySystem,
60     ENGDESCRIPTOR       engDesc
61 )
62 {
63     pKernelMemorySystem->memPartitionNumaInfo = NULL;
64 
65     kmemsysInitRegistryOverrides(pGpu, pKernelMemorySystem);
66 
67     if (IS_GSP_CLIENT(pGpu))
68     {
69         // Setting up the sysmem flush buffer needs to be done very early in some cases
70         // as it's required for the GPU to perform a system flush. One such case is
71         // resetting GPU FALCONs and in particular resetting the PMU as part of VBIOS
72         // init.
73         NV_ASSERT_OK_OR_RETURN(kmemsysInitFlushSysmemBuffer_HAL(pGpu, pKernelMemorySystem));
74     }
75 
76     return NV_OK;
77 }
78 
79 /*
80  * Initialize the Kernel Memory System state.
81  *
82  * @param[in]  pGpu pointer to the GPU instance.
83  * @param[in]  pKernelMemorySystem pointer to the kernel side KernelMemorySystem instance.
84  *
85  * @return NV_OK upon success.
86  */
87 NV_STATUS kmemsysStateInitLocked_IMPL
88 (
89     OBJGPU *pGpu,
90     KernelMemorySystem *pKernelMemorySystem
91 )
92 {
93     MEMORY_SYSTEM_STATIC_CONFIG *pStaticConfig;
94     NV_STATUS status = NV_OK;
95 
96     NV_ASSERT_OK_OR_GOTO(status, kmemsysEnsureSysmemFlushBufferInitialized(pGpu, pKernelMemorySystem), fail);
97 
98     pStaticConfig = portMemAllocNonPaged(sizeof(*pStaticConfig));
99     NV_CHECK_OR_RETURN(LEVEL_ERROR, pStaticConfig != NULL, NV_ERR_INSUFFICIENT_RESOURCES);
100     portMemSet(pStaticConfig, 0, sizeof(*pStaticConfig));
101 
102     NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
103         kmemsysInitStaticConfig_HAL(pGpu, pKernelMemorySystem, pStaticConfig),
104         fail);
105 
106     pKernelMemorySystem->pStaticConfig = pStaticConfig;
107 
108     KernelBif *pKernelBif = GPU_GET_KERNEL_BIF(pGpu);
109 
110     pKernelMemorySystem->memPartitionNumaInfo = portMemAllocNonPaged(sizeof(MEM_PARTITION_NUMA_INFO) * KMIGMGR_MAX_GPU_SWIZZID);
111     if (pKernelMemorySystem->memPartitionNumaInfo == NULL)
112     {
113         NV_PRINTF(LEVEL_ERROR, "Failed to allocate memory for numa information.\n");
114         status = NV_ERR_NO_MEMORY;
115         NV_ASSERT_OR_GOTO(0, fail);
116     }
117     portMemSet(pKernelMemorySystem->memPartitionNumaInfo, 0, sizeof(MEM_PARTITION_NUMA_INFO) * KMIGMGR_MAX_GPU_SWIZZID);
118 
119     if (gpuIsSelfHosted(pGpu) &&
120         (pKernelBif != NULL) && pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_IS_C2C_LINK_UP))
121     {
122         //
123         // memsysSetupCoherentCpuLink should be done only for the self hosted
124         // configuration(SHH) where the coherent C2C link connects host CPU(TH500) and GPU
125         // and not in the externally hosted(EHH) case where host CPU(say x86) is connected
126         // to GPU through PCIe and C2C only connects the TH500 (for EGM memory) and GPU.
127         // The gpuIsSelfHosted(pGpu) check here is to distinguish between the SHH
128         // and EHH configuration as C2C link is up in both of these cases.
129         //
130 
131         if (IS_GSP_CLIENT(pGpu))
132         {
133             GspStaticConfigInfo *pGSCI = GPU_GET_GSP_STATIC_INFO(pGpu);
134 
135             if (pGSCI->bAtsSupported)
136             {
137                 NV_PRINTF(LEVEL_INFO, "ATS supported\n");
138 
139                 pGpu->setProperty(pGpu, PDB_PROP_GPU_ATS_SUPPORTED, NV_TRUE);
140             }
141 
142             //
143             // PDB_PROP_GPU_C2C_SYSMEM is already set in physical RM but not in
144             // in Kernel-RM where it is actually consumed. setting PDB_PROP_GPU_C2C_SYSMEM
145             // in Kernel-RM when the platform is self-hosted and the C2C links are up, which
146             // indicate the C2C is connected to CPU and Physical-RM would have set up the HSHUB
147             // to route sysmem through C2C.
148             //
149             pGpu->setProperty(pGpu, PDB_PROP_GPU_C2C_SYSMEM, NV_TRUE);
150         }
151 
152         //
153         // kmemesysSetupCoherentCpuLink should not be called from physical RM as
154         // it is intended to be called on kernel side to update
155         // KernelMemorySystem for C2C, NUMA functionality.
156         //
157         NV_ASSERT_OK_OR_GOTO(status, kmemsysSetupCoherentCpuLink(pGpu, pKernelMemorySystem, NV_FALSE), fail);
158     }
159 
160 fail:
161     if (status != NV_OK)
162     {
163         portMemFree((void *)pKernelMemorySystem->pStaticConfig);
164     }
165 
166     return status;
167 }
168 
169 NV_STATUS
170 kmemsysStatePreLoad_IMPL
171 (
172     OBJGPU *pGpu,
173     KernelMemorySystem *pKernelMemorySystem,
174     NvU32 flags
175 )
176 {
177     //
178     // Program the sysmem flush buffer address and assert that the register contents are valid.
179     // The HAL wiring is such that a given RM build will only do one or the other (e.g., RM offloaded
180     // to ucode won't program the register itself but will assert that its contents are valid).
181     //
182     kmemsysProgramSysmemFlushBuffer_HAL(pGpu, pKernelMemorySystem);
183     kmemsysAssertSysmemFlushBufferValid_HAL(pGpu, pKernelMemorySystem);
184 
185     // Self Hosted GPUs should have its memory onlined by now.
186     if (gpuIsSelfHosted(pGpu) &&
187         pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING) &&
188         osNumaOnliningEnabled(pGpu->pOsGpuInfo) &&
189         !pKernelMemorySystem->bNumaNodesAdded)
190     {
191         //
192         // TODO: Bug 1945658: Deferred error checking from stateInit so that stateDestroy
193         // gets called. Refer kmemsysNumaAddMemory_HAL call site for further
194         // details.
195         //
196         return NV_ERR_INVALID_STATE;
197     }
198 
199     return NV_OK;
200 }
201 
202 NV_STATUS
203 kmemsysStatePostLoad_IMPL
204 (
205     OBJGPU *pGpu,
206     KernelMemorySystem *pKernelMemorySystem,
207     NvU32 flags
208 )
209 {
210     if (IS_SILICON(pGpu) &&
211         pGpu->getProperty(pGpu, PDB_PROP_GPU_ATS_SUPPORTED))
212     {
213         NV_STATUS status = kmemsysSetupAllAtsPeers_HAL(pGpu, pKernelMemorySystem);
214         if (status != NV_OK)
215         {
216             NV_PRINTF(LEVEL_ERROR, "ATS peer setup failed.\n");
217             return status;
218         }
219     }
220 
221     return NV_OK;
222 }
223 
224 NV_STATUS
225 kmemsysStatePreUnload_IMPL
226 (
227     OBJGPU *pGpu,
228     KernelMemorySystem *pKernelMemorySystem,
229     NvU32 flags
230 )
231 {
232     if (IS_SILICON(pGpu) &&
233         pGpu->getProperty(pGpu, PDB_PROP_GPU_ATS_SUPPORTED))
234     {
235         kmemsysRemoveAllAtsPeers_HAL(pGpu, pKernelMemorySystem);
236     }
237     return NV_OK;
238 }
239 
240 /*
241  * Release the state accumulated in StateInit.
242  * @param[in]  pGpu pointer to the GPU instance.
243  * @param[in]  pKernelMemorySystem pointer to the kernel side KernelMemorySystem instance.
244  */
245 void kmemsysStateDestroy_IMPL
246 (
247     OBJGPU *pGpu,
248     KernelMemorySystem *pKernelMemorySystem
249 )
250 {
251 
252     // Teardown of Coherent Cpu Link is not required on Physical RM
253     KernelBif *pKernelBif = GPU_GET_KERNEL_BIF(pGpu);
254 
255     if (pKernelBif && pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_IS_C2C_LINK_UP) &&
256         pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING))
257     {
258         kmemsysTeardownCoherentCpuLink(pGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu), NV_FALSE);
259     }
260 
261     portMemFree((void *)pKernelMemorySystem->pStaticConfig);
262 }
263 
264 /*!
265  * Returns MemorySystem settings that are static after GPU state init/load is
266  * finished.
267  */
268 const MEMORY_SYSTEM_STATIC_CONFIG *
269 kmemsysGetStaticConfig_IMPL
270 (
271     OBJGPU *pGpu,
272     KernelMemorySystem *pKernelMemorySystem
273 )
274 {
275     // check if state Init has not completed.
276     NV_ASSERT_OR_ELSE(pKernelMemorySystem != NULL, return NULL);
277 
278     return pKernelMemorySystem->pStaticConfig;
279 }
280 
281 void
282 kmemsysDestruct_IMPL
283 (
284     KernelMemorySystem *pKernelMemorySystem
285 )
286 {
287     OBJGPU *pGpu = ENG_GET_GPU(pKernelMemorySystem);
288 
289     //
290     // kmemsysNumaRemoveAllMemory_HAL() is called here in Destruct instead of
291     // Destroy to guarantee that NUMA memory is removed. This goes against
292     // Init/Destroy symmetry, but it is necessary because kmemsysStateDestroy
293     // may not be called for all cases when kmemsysStateInit was called
294     // (e.g., when kmemsys or another engine afterwards fails Init).
295     //
296     // If NUMA memory is not removed, then all subsequent attempts to add NUMA
297     // memory will fail, which will cause failures in future RM init attempts.
298     //
299     if (pKernelMemorySystem->memPartitionNumaInfo != NULL)
300     {
301         if (pKernelMemorySystem->bNumaNodesAdded == NV_TRUE)
302         {
303             kmemsysNumaRemoveAllMemory_HAL(pGpu, pKernelMemorySystem);
304         }
305         portMemFree(pKernelMemorySystem->memPartitionNumaInfo);
306     }
307 
308     pKernelMemorySystem->sysmemFlushBuffer = 0;
309     memdescFree(pKernelMemorySystem->pSysmemFlushBufferMemDesc);
310     memdescDestroy(pKernelMemorySystem->pSysmemFlushBufferMemDesc);
311     pKernelMemorySystem->pSysmemFlushBufferMemDesc = NULL;
312 
313     portMemSet(pKernelMemorySystem->gpuInstanceMemConfig, 0, sizeof(pKernelMemorySystem->gpuInstanceMemConfig));
314 }
315 
316 NV_STATUS
317 kmemsysAllocComprResources_KERNEL
318 (
319     OBJGPU               *pGpu,
320     KernelMemorySystem   *pKernelMemorySystem,
321     FB_ALLOC_INFO        *pFbAllocInfo,
322     NvU64                 origSize,
323     NvU32                 kindChosen,
324     NvU32                *pRetAttr,
325     NvU32                 retAttr2
326 )
327 {
328     MemoryManager                     *pMemoryManager      = GPU_GET_MEMORY_MANAGER(pGpu);
329     const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig = kmemsysGetStaticConfig(pGpu, pKernelMemorySystem);
330     NvU32                              gfid;
331 
332     NV_ASSERT_OK_OR_RETURN(vgpuGetCallingContextGfid(pGpu, &gfid));
333 
334     NV_ASSERT_OR_RETURN(pMemorySystemConfig->bOneToOneComptagLineAllocation || pMemorySystemConfig->bUseRawModeComptaglineAllocation,
335         NV_ERR_INVALID_STATE);
336 
337     NV_CHECK_OR_RETURN(LEVEL_ERROR,
338         !FLD_TEST_DRF(OS32, _ALLOC, _COMPTAG_OFFSET_USAGE, _FIXED, pFbAllocInfo->ctagOffset),
339         NV_ERR_INVALID_ARGUMENT);
340 
341     // Failing the allocation if scrub on free is disabled
342     if (!memmgrIsScrubOnFreeEnabled(pMemoryManager))
343     {
344         if (!(IS_SIMULATION(pGpu) || IsDFPGA(pGpu) || (IS_EMULATION(pGpu) && RMCFG_FEATURE_PLATFORM_MODS)
345             ||(RMCFG_FEATURE_PLATFORM_WINDOWS && !pGpu->getProperty(pGpu, PDB_PROP_GPU_IN_TCC_MODE))
346             ||hypervisorIsVgxHyper()
347             ||IS_GFID_VF(gfid)
348             ||(IsSLIEnabled(pGpu) && !(RMCFG_FEATURE_PLATFORM_WINDOWS &&
349                                      !pGpu->getProperty(pGpu, PDB_PROP_GPU_IN_TCC_MODE))))
350            )
351         {
352             NV_PRINTF(LEVEL_ERROR, "Compressible surfaces cannot be allocated on a system, "
353                     "where scrub on free is disabled\n");
354             return NV_ERR_INVALID_STATE;
355         }
356     }
357     else if (pMemorySystemConfig->bOneToOneComptagLineAllocation)
358     {
359         NV_ASSERT_OR_RETURN(memmgrUseVasForCeMemoryOps(pMemoryManager), NV_ERR_INVALID_STATE);
360     }
361 
362     FB_SET_HWRESID_CTAGID_FERMI(pFbAllocInfo->hwResId, FB_HWRESID_CTAGID_VAL_FERMI(-1));
363     *pRetAttr = FLD_SET_DRF(OS32, _ATTR, _COMPR, _REQUIRED, *pRetAttr);
364     return NV_OK;
365 }
366 
367 /*!
368  * @brief Initializes static config data from the Physical side.
369  * @param[in]  pGpu pointer to the GPU instance.
370  * @param[in]  pKernelMemorySystem pointer to the kernel side KernelMemorySystem instance.
371  * @param[out] pConfig pointer to the static config init on Physical driver.
372  *
373  * @return NV_OK upon success.
374  *         NV_ERR* otherwise.
375  */
376 NV_STATUS
377 kmemsysInitStaticConfig_KERNEL
378 (
379     OBJGPU *pGpu,
380     KernelMemorySystem *pKernelMemorySystem,
381     MEMORY_SYSTEM_STATIC_CONFIG *pConfig
382 )
383 {
384     RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
385     NV_STATUS status;
386 
387     status = pRmApi->Control(pRmApi, pGpu->hInternalClient, pGpu->hInternalSubdevice,
388                                 NV2080_CTRL_CMD_INTERNAL_MEMSYS_GET_STATIC_CONFIG,
389                                 pConfig, sizeof(*pConfig));
390     return status;
391 }
392 
393 /*!
394  * @brief   Function to map swizzId to mem size given total mem
395  *
396  * @param[IN]   pGpu
397  * @param[IN]   pKernelMemorySystem
398  * @param[IN]   swizzId
399  * @param[IN]   totalRange          total memory range
400  * @param[OUT]  pPartitionSizeFlag  Flag stating partition memory size
401  * @param[OUT]  pSizeInBytes        Memory size in bytes supported by partition
402  */
403 NV_STATUS
404 kmemsysSwizzIdToMIGMemSize_IMPL
405 (
406     OBJGPU *pGpu,
407     KernelMemorySystem *pKernelMemorySystem,
408     NvU32 swizzId,
409     NV_RANGE totalRange,
410     NvU32 *pPartitionSizeFlag,
411     NvU64 *pSizeInBytes
412 )
413 {
414     //
415     // To handle the straddling issue we always consider memory for different
416     // swizzIds as addition of minimum sized segements allowed in partitioning
417     //
418     NvU64 memSize = rangeLength(totalRange) / KMIGMGR_MAX_GPU_INSTANCES;
419 
420     switch (swizzId)
421     {
422         case 0:
423         {
424             *pSizeInBytes = memSize * KMIGMGR_MAX_GPU_INSTANCES;
425             *pPartitionSizeFlag = DRF_DEF(2080_CTRL_GPU, _PARTITION_FLAG, _MEMORY_SIZE, _FULL);
426             break;
427         }
428 
429         case 1:
430         case 2:
431         {
432             *pSizeInBytes = (memSize * (KMIGMGR_MAX_GPU_INSTANCES / 2));
433             *pPartitionSizeFlag = DRF_DEF(2080_CTRL_GPU, _PARTITION_FLAG, _MEMORY_SIZE, _HALF);
434             break;
435         }
436 
437         case 3:
438         case 4:
439         case 5:
440         case 6:
441         {
442             *pSizeInBytes = (memSize * (KMIGMGR_MAX_GPU_INSTANCES / 4));
443             *pPartitionSizeFlag = DRF_DEF(2080_CTRL_GPU, _PARTITION_FLAG, _MEMORY_SIZE, _QUARTER);
444             break;
445         }
446 
447         case 7:
448         case 8:
449         case 9:
450         case 10:
451         case 11:
452         case 12:
453         case 13:
454         case 14:
455         {
456             *pSizeInBytes = memSize;
457             *pPartitionSizeFlag = DRF_DEF(2080_CTRL_GPU, _PARTITION_FLAG, _MEMORY_SIZE, _EIGHTH);
458             break;
459         }
460 
461         default:
462         {
463             NV_PRINTF(LEVEL_ERROR, "Unsupported SwizzId %d\n", swizzId);
464             DBG_BREAKPOINT();
465             return NV_ERR_INVALID_ARGUMENT;
466         }
467     }
468 
469     if ((*pSizeInBytes == 0) &&
470         !pGpu->getProperty(pGpu, PDB_PROP_GPU_ZERO_FB) &&
471         !pGpu->getProperty(pGpu, PDB_PROP_GPU_BROKEN_FB))
472     {
473         NV_PRINTF(LEVEL_ERROR, "Insufficient memory\n");
474         DBG_BREAKPOINT();
475         return NV_ERR_INSUFFICIENT_RESOURCES;
476     }
477     return NV_OK;
478 }
479 
480 /*!
481  * @brief   Function to map swizzId to mem range given total range
482  */
483 NV_STATUS
484 kmemsysSwizzIdToMIGMemRange_IMPL
485 (
486     OBJGPU *pGpu,
487     KernelMemorySystem *pKernelMemorySystem,
488     NvU32 swizzId,
489     NV_RANGE totalRange,
490     NV_RANGE *pAddrRange
491 )
492 {
493     KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
494     NV_STATUS rmStatus = NV_OK;
495     NvU32 memSizeFlag = 0;
496     NvU32 minSwizzId = 0;
497     NvU64 unalignedStartAddr = 0;
498     NvU64 memSize = 0;
499     NV_RANGE swizzIdRange = NV_RANGE_EMPTY;
500 
501     NV_ASSERT_OR_RETURN(!rangeIsEmpty(totalRange), NV_ERR_INVALID_ARGUMENT);
502 
503     // Get SwizzId to size mapping
504     NV_ASSERT_OK_OR_RETURN(
505         kmemsysSwizzIdToMIGMemSize(pGpu, pKernelMemorySystem, swizzId, totalRange, &memSizeFlag, &memSize));
506 
507     swizzIdRange = kmigmgrMemSizeFlagToSwizzIdRange_HAL(pGpu, pKernelMIGManager, memSizeFlag);
508     NV_ASSERT_OR_RETURN(!rangeIsEmpty(swizzIdRange), NV_ERR_INVALID_ARGUMENT);
509 
510     minSwizzId = swizzIdRange.lo;
511 
512     unalignedStartAddr = (totalRange.lo + (memSize * (swizzId - minSwizzId)));
513     *pAddrRange = rangeMake(unalignedStartAddr, unalignedStartAddr + memSize - 1);
514 
515     return rmStatus;
516 }
517 
518 /*!
519  * @brief   Function to return GPU instance memory address range
520  */
521 NV_STATUS
522 kmemsysGetMIGGPUInstanceMemInfo_IMPL
523 (
524     OBJGPU *pGpu,
525     KernelMemorySystem *pKernelMemorySystem,
526     NvU32 swizzId,
527     NV_RANGE *pAddrRange
528 )
529 {
530     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
531     NvU64 vmmuSegmentSize;
532     NvU64 startAddr;
533     NvU64 endAddr;
534 
535     NV_ASSERT_OR_RETURN(pAddrRange != NULL, NV_ERR_INVALID_ARGUMENT);
536     *pAddrRange = NV_RANGE_EMPTY;
537     NV_ASSERT_OR_RETURN(swizzId < KMIGMGR_MAX_GPU_SWIZZID, NV_ERR_INVALID_STATE);
538 
539     // Not supported in vGPU or ZERO_FB configs
540     NV_CHECK_OR_RETURN(LEVEL_SILENT,
541                        !(IS_VIRTUAL(pGpu) || (pGpu->getProperty(pGpu, PDB_PROP_GPU_ZERO_FB))),
542                        NV_OK);
543 
544     //
545     // VMMU not supported in AMODEL. Use legacy swizz-ID calculation instead of relying on vMMU segments
546     // to calculate address range
547     //
548     if (IsAMODEL(pGpu))
549     {
550         NV_RANGE partitionableMemoryRange = memmgrGetMIGPartitionableMemoryRange(pGpu, pMemoryManager);
551         return kmemsysSwizzIdToMIGMemRange(pGpu, pKernelMemorySystem, swizzId, partitionableMemoryRange, pAddrRange);
552     }
553 
554     // Get the VMMU segment size
555     vmmuSegmentSize = gpuGetVmmuSegmentSize(pGpu);
556     NV_ASSERT_OR_RETURN((vmmuSegmentSize != 0), NV_ERR_INVALID_STATE);
557 
558     startAddr = pKernelMemorySystem->gpuInstanceMemConfig[swizzId].startingVmmuSegment * vmmuSegmentSize;
559     endAddr = startAddr + (pKernelMemorySystem->gpuInstanceMemConfig[swizzId].memSizeInVmmuSegment * vmmuSegmentSize) - 1;
560     *pAddrRange = rangeMake(startAddr, endAddr);
561 
562     return NV_OK;
563 }
564 
565 /*!
566  * @brief   Function to populate static GPU instance memory config which will be
567  *          utilized for GPU instance memory query and memory allocation
568  */
569 NV_STATUS
570 kmemsysPopulateMIGGPUInstanceMemConfig_KERNEL
571 (
572     OBJGPU *pGpu,
573     KernelMemorySystem *pKernelMemorySystem
574 )
575 {
576     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
577     NV_RANGE partitionableMemoryRange = memmgrGetMIGPartitionableMemoryRange(pGpu, pMemoryManager);
578     KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
579     NvU64 vmmuSegmentSize;
580     NvU64 totalVmmuSegments;
581     NvU64 alignedStartAddr;
582     NvU64 alignedEndAddr;
583     NvU32 swizzId;
584 
585     // Not needed in vGPU or zero_fb configs
586     NV_CHECK_OR_RETURN(LEVEL_SILENT,
587                        !(IS_VIRTUAL(pGpu) || (pGpu->getProperty(pGpu, PDB_PROP_GPU_ZERO_FB))),
588                        NV_OK);
589 
590     // Nothing to do if MIG is not supported
591     NV_CHECK_OR_RETURN(LEVEL_SILENT, kmigmgrIsMIGSupported(pGpu, pKernelMIGManager), NV_OK);
592 
593     // Get the VMMU segment size
594     vmmuSegmentSize = gpuGetVmmuSegmentSize(pGpu);
595     NV_ASSERT_OR_RETURN((vmmuSegmentSize != 0), NV_ERR_INVALID_STATE);
596 
597     alignedStartAddr = partitionableMemoryRange.lo;
598     alignedEndAddr = partitionableMemoryRange.hi;
599     if (alignedStartAddr != 0)
600     {
601         alignedStartAddr = NV_IS_ALIGNED64(alignedStartAddr, vmmuSegmentSize) ?
602                            alignedStartAddr + vmmuSegmentSize :
603                            NV_ALIGN_UP64(alignedStartAddr, vmmuSegmentSize);
604     }
605 
606     if (NV_IS_ALIGNED64(alignedEndAddr + 1, vmmuSegmentSize))
607     {
608         alignedEndAddr = alignedEndAddr - vmmuSegmentSize;
609     }
610 
611     totalVmmuSegments = (alignedEndAddr - alignedStartAddr + 1) / vmmuSegmentSize;
612     for (swizzId = 0; swizzId < KMIGMGR_MAX_GPU_SWIZZID; swizzId++)
613     {
614         NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
615             kmemsysSwizzIdToVmmuSegmentsRange_HAL(pGpu, pKernelMemorySystem, swizzId, vmmuSegmentSize, totalVmmuSegments));
616     }
617 
618     return NV_OK;
619 }
620 
621 /*!
622  * @brief Gets GPU instance memory configuration based on swizzId
623  */
624 NV_STATUS
625 kmemsysGetMIGGPUInstanceMemConfigFromSwizzId_IMPL
626 (
627     OBJGPU *pGpu,
628     KernelMemorySystem *pKernelMemorySystem,
629     NvU32 swizzId,
630     const MIG_GPU_INSTANCE_MEMORY_CONFIG **ppGPUInstanceMemConfig
631 )
632 {
633     NV_ASSERT_OR_RETURN(swizzId < KMIGMGR_MAX_GPU_SWIZZID, NV_ERR_INVALID_ARGUMENT);
634     // MODS makes a control call to describe GPU instances before this is populated. Return invalid data anyways
635     NV_ASSERT_OR_RETURN(pKernelMemorySystem->gpuInstanceMemConfig[swizzId].bInitialized, NV_ERR_INVALID_STATE);
636 
637     *ppGPUInstanceMemConfig = &pKernelMemorySystem->gpuInstanceMemConfig[swizzId];
638     return NV_OK;
639 }
640 
641 /*!
642  * @brief Set GPU Instance memory config information and mark initialized
643  */
644 NV_STATUS
645 kmemsysInitMIGGPUInstanceMemConfigForSwizzId_IMPL
646 (
647     OBJGPU *pGpu,
648     KernelMemorySystem *pKernelMemorySystem,
649     NvU32 swizzId,
650     NvU64 startingVmmuSegment,
651     NvU64 memSizeInVmmuSegment
652 )
653 {
654     NV_ASSERT_OR_RETURN(swizzId < KMIGMGR_MAX_GPU_SWIZZID, NV_ERR_INVALID_ARGUMENT);
655 
656     pKernelMemorySystem->gpuInstanceMemConfig[swizzId].startingVmmuSegment = startingVmmuSegment;
657     pKernelMemorySystem->gpuInstanceMemConfig[swizzId].memSizeInVmmuSegment = memSizeInVmmuSegment;
658     pKernelMemorySystem->gpuInstanceMemConfig[swizzId].bInitialized = NV_TRUE;
659 
660     NV_PRINTF(LEVEL_INFO,
661         "GPU Instance Mem Config for swizzId = 0x%x : MemStartSegment = 0x%llx, MemSizeInSegments = 0x%llx\n",
662         swizzId,
663         pKernelMemorySystem->gpuInstanceMemConfig[swizzId].startingVmmuSegment,
664         pKernelMemorySystem->gpuInstanceMemConfig[swizzId].memSizeInVmmuSegment);
665 
666     return NV_OK;
667 }
668 
669 /*!
670  * @brief Ensure that the sysmem flush sysmem buffer has been initialized
671  *
672  * Setting up the sysmem flush buffer needs to be done very early in some cases
673  * as it's required for the GPU to perform a system flush. One such case is
674  * resetting GPU FALCONs and in particular resetting the PMU as part of VBIOS
675  * init.
676  *
677  * @returns NV_OK if the sysmem flush buffer has been initialized.
678  */
679 NV_STATUS
680 kmemsysEnsureSysmemFlushBufferInitialized_IMPL
681 (
682     OBJGPU             *pGpu,
683     KernelMemorySystem *pKernelMemorySystem
684 )
685 {
686     if (IS_VIRTUAL(pGpu)                                       ||
687         IS_GSP_CLIENT(pGpu)                                    ||
688         RMCFG_FEATURE_PLATFORM_GSP)
689     {
690         return NV_OK;
691     }
692 
693     return kmemsysInitFlushSysmemBuffer_HAL(pGpu, pKernelMemorySystem);
694 }
695 
696 /*!
697  * @brief Handle sysmem NVLink/C2C, NUMA and ATS functionality
698  *
699  * @param[in] pGpu                OBJGPU pointer
700  * @param[in] pKernelMemorySystem pointer to the kernel side KernelMemorySystem instance.
701  * @param[in] bFlush              Whether the CPU cache of the GPU mapping
702  *                                should be flushed
703  *
704  * @return  NV_OK on success
705  */
706 NV_STATUS
707 kmemsysSetupCoherentCpuLink_IMPL
708 (
709     OBJGPU             *pGpu,
710     KernelMemorySystem *pKernelMemorySystem,
711     NvBool              bFlush
712 )
713 {
714     KernelBus     *pKernelBus     = GPU_GET_KERNEL_BUS(pGpu);
715     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
716     NvU64          numaOnlineSize = 0;
717     NvU64          fbSize         = (pMemoryManager->Ram.fbTotalMemSizeMb << 20);
718     NvU32          data32;
719     NvBool         bCpuMapping    = NV_TRUE; // Default enable
720     NvS32          numaNodeId     = NV0000_CTRL_NO_NUMA_NODE;
721     NvU64          memblockSize   = 0;
722     NvU64          numaOnlineBase = 0;
723     NvU64          rsvdFastSize   = 0;
724     NvU64          rsvdSlowSize   = 0;
725     NvU64          rsvdISOSize    = 0;
726     NvU64          totalRsvdBytes = 0;
727 
728     {
729         NV_ASSERT_OK_OR_RETURN(kmemsysGetFbNumaInfo_HAL(pGpu, pKernelMemorySystem,
730                                                         &pKernelMemorySystem->coherentCpuFbBase,
731                                                         &numaNodeId));
732         if (pKernelMemorySystem->coherentCpuFbBase != 0)
733         {
734             if (gpuIsSelfHosted(pGpu))
735             {
736                 //
737                 // For self-hosted, coherentCpuFbEnd is only till the FB size
738                 // end and NOT till the FB AMAP end since self-hosted doesn't
739                 // support indirect peer and requires GPU nvlink for peer.
740                 //
741                 pKernelMemorySystem->coherentCpuFbEnd = pKernelMemorySystem->coherentCpuFbBase + fbSize;
742             }
743             else
744             {
745                 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
746                 NV2080_CTRL_INTERNAL_GET_COHERENT_FB_APERTURE_SIZE_PARAMS params = {0};
747 
748                 NV_ASSERT_OK_OR_RETURN(pRmApi->Control(pRmApi,
749                                                        pGpu->hInternalClient,
750                                                        pGpu->hInternalSubdevice,
751                                                        NV2080_CTRL_CMD_INTERNAL_GET_COHERENT_FB_APERTURE_SIZE,
752                                                        &params,
753                                                        sizeof(NV2080_CTRL_INTERNAL_GET_COHERENT_FB_APERTURE_SIZE_PARAMS)));
754                 //
755                 // Indirect peer(uses P9 to reach other GV100) in P9+GV100 requires coherentCpuFbEnd to
756                 // also include the entire FB AMAP range even when FB size is less than the FB AMAP size.
757                 //
758                 pKernelMemorySystem->coherentCpuFbEnd = pKernelMemorySystem->coherentCpuFbBase +
759                                                         params.coherentFbApertureSize;
760             }
761         }
762     }
763 
764     if ((osReadRegistryDword(pGpu,
765                              NV_REG_STR_OVERRIDE_GPU_NUMA_NODE_ID, &data32)) == NV_OK)
766     {
767         numaNodeId = (NvS32)data32;
768         NV_PRINTF(LEVEL_ERROR, "Override GPU NUMA node ID %d!\n", numaNodeId);
769     }
770 
771     // Parse regkey here
772     if ((osReadRegistryDword(pGpu,
773                              NV_REG_STR_RM_FORCE_BAR_PATH, &data32) == NV_OK) &&
774         (data32 == 1))
775     {
776         NV_PRINTF(LEVEL_ERROR,
777                   "Force disabling NVLINK/C2C mappings through regkey.\n");
778 
779         bCpuMapping = NV_FALSE;
780     }
781 
782     if ((pKernelMemorySystem->coherentCpuFbBase == 0) || !bCpuMapping)
783     {
784         return NV_OK;
785     }
786 
787     NV_ASSERT_OK_OR_RETURN(osNumaMemblockSize(&memblockSize));
788 
789     memmgrCalcReservedFbSpaceHal_HAL(pGpu, pMemoryManager, &rsvdFastSize, &rsvdSlowSize, &rsvdISOSize);
790 
791     //
792     // Calculate the size of the memory which can be safely onlined to the
793     // kernel after accounting for different reserved memory requirements.
794     //
795     // Align rsvd memory to 64K granularity.
796     // TODO : rsvdMemorySize is not finalized at this point of time in
797     // GH180, currently rsvdMemorySize is not increasing after this
798     // point. This needs to be fixed.
799     //
800     totalRsvdBytes += NV_ALIGN_UP(pMemoryManager->rsvdMemorySize, 0x10000);
801     totalRsvdBytes += (rsvdFastSize + rsvdSlowSize + rsvdISOSize);
802     totalRsvdBytes += pMemoryManager->Ram.reservedMemSize;
803 
804     //
805     // TODO: make sure the onlineable memory is aligned to memblockSize
806     // Currently, if we have leftover memory, it'll just be wasted because no
807     // one can access it. If FB size itself is memblock size unaligned(because
808     // of CBC and row remapper deductions), then the memory wastage is unavoidable.
809     //
810     numaOnlineSize = NV_ALIGN_DOWN64(fbSize - totalRsvdBytes, memblockSize);
811 
812     pKernelMemorySystem->numaOnlineBase   = numaOnlineBase;
813     pKernelMemorySystem->numaOnlineSize   = numaOnlineSize;
814 
815     NV_PRINTF(LEVEL_INFO, "fbSize: 0x%llx NUMA reserved memory size: 0x%llx online memory size: 0x%llx\n",
816                   fbSize, totalRsvdBytes, numaOnlineSize);
817     //
818     // TODO: Bug 1945658: Soldier through on GPU memory add
819     // failure(which is often possible because of missing auto online
820     // setting) and instead check for failure on stateLoad.
821     // Any failure in StateInit results in gpuStateDestroy not getting called.
822     // kgspUnloadRm_IMPL from gpuStateDestroy also doesn't get called leaving
823     // GSP in unclean state and requiring GPU reset to recover from that.
824     //
825     // kmemsysNumaAddMemory_HAL by itself cannot be called from stateLoad
826     // because the memory mapping that follows this call site comes from linear
827     // kernel virtual address when memory is added to the kernel vs the
828     // VMALLOC_START region when memory is not added.
829     //
830     NV_ASSERT_OK(kmemsysNumaAddMemory_HAL(pGpu, pKernelMemorySystem, 0, 0,
831                                           numaOnlineSize, &numaNodeId));
832     pGpu->numaNodeId = numaNodeId;
833 
834     NV_ASSERT_OK_OR_RETURN(kbusCreateCoherentCpuMapping_HAL(pGpu, pKernelBus, numaOnlineSize, bFlush));
835 
836     // Switch the toggle for coherent link mapping only if migration is successful
837     pGpu->setProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING, NV_TRUE);
838 
839     NV_ASSERT_OK_OR_RETURN(kbusVerifyCoherentLink_HAL(pGpu, pKernelBus));
840 
841     return NV_OK;
842 }
843 
844 /*!
845  * @brief Teardown sysmem NVLink/C2C NUMA and ATS functionality
846  *
847  * @param[in] pGpu                 OBJGPU pointer
848  * @param[in] pKernelMemorySystem  Kernel Memory System pointer
849  * @param[in] bFlush               Whether the CPU cache of the GPU mapping
850  *                                 should be flushed
851  */
852 void
853 kmemsysTeardownCoherentCpuLink_IMPL
854 (
855     OBJGPU            *pGpu,
856     KernelMemorySystem *pKernelMemorySystem,
857     NvBool             bFlush
858 )
859 {
860     kbusTeardownCoherentCpuMapping_HAL(pGpu, GPU_GET_KERNEL_BUS(pGpu), bFlush);
861     pGpu->setProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING, NV_FALSE);
862 }
863 
864 NV_STATUS
865 kmemsysSendL2InvalidateEvict_IMPL
866 (
867     OBJGPU             *pGpu,
868     KernelMemorySystem *pKernelMemorySystem,
869     NvU32               flags)
870 {
871     RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
872     NV2080_CTRL_INTERNAL_MEMSYS_L2_INVALIDATE_EVICT_PARAMS params = {0};
873 
874     params.flags = flags;
875 
876     return pRmApi->Control(pRmApi, pGpu->hInternalClient, pGpu->hInternalSubdevice,
877                            NV2080_CTRL_CMD_INTERNAL_MEMSYS_L2_INVALIDATE_EVICT,
878                            &params, sizeof(params));
879 }
880 
881 NV_STATUS
882 kmemsysSendFlushL2AllRamsAndCaches_IMPL
883 (
884     OBJGPU             *pGpu,
885     KernelMemorySystem *pKernelMemorySystem
886 )
887 {
888     RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
889 
890     return pRmApi->Control(pRmApi, pGpu->hInternalClient, pGpu->hInternalSubdevice,
891                            NV2080_CTRL_CMD_INTERNAL_MEMSYS_FLUSH_L2_ALL_RAMS_AND_CACHES,
892                            NULL, 0);
893 }
894 
895 NV_STATUS
896 kmemsysGetUsableFbSize_KERNEL
897 (
898     OBJGPU               *pGpu,
899     KernelMemorySystem   *pKernelMemorySystem,
900     NvU64                *pFbSize
901 )
902 {
903     return kmemsysReadUsableFbSize_HAL(pGpu, pKernelMemorySystem, pFbSize);
904 }
905 
906