1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "os/os.h"
25 #include "gpu/mem_sys/kern_mem_sys.h"
26 #include "gpu/mem_mgr/mem_mgr.h"
27 #include "virtualization/hypervisor/hypervisor.h"
28 #include "vgpu/vgpu_events.h"
29 #include "objrpc.h"
30 #include "gpu/bif/kernel_bif.h"
31 #include "gpu/bus/kern_bus.h"
32 #include "os/os.h"
33 #include "nvRmReg.h"
34 #include "gpu/gsp/gsp_static_config.h"
35 
36 static void
37 kmemsysInitRegistryOverrides
38 (
39     OBJGPU *pGpu,
40     KernelMemorySystem *pKernelMemorySystem
41 )
42 {
43     NvU32 data32;
44 
45     //
46     // Bug 1032432. Check regkey for FB pull
47     //
48     if (osReadRegistryDword(pGpu, NV_REG_STR_RM_L2_CLEAN_FB_PULL, &data32) == NV_OK)
49     {
50         if (data32 == NV_REG_STR_RM_L2_CLEAN_FB_PULL_DISABLED)
51             pKernelMemorySystem->bL2CleanFbPull = NV_FALSE;
52     }
53 }
54 
55 NV_STATUS
56 kmemsysConstructEngine_IMPL
57 (
58     OBJGPU             *pGpu,
59     KernelMemorySystem *pKernelMemorySystem,
60     ENGDESCRIPTOR       engDesc
61 )
62 {
63     pKernelMemorySystem->memPartitionNumaInfo = NULL;
64 
65     kmemsysInitRegistryOverrides(pGpu, pKernelMemorySystem);
66 
67     if (IS_GSP_CLIENT(pGpu))
68     {
69         // Setting up the sysmem flush buffer needs to be done very early in some cases
70         // as it's required for the GPU to perform a system flush. One such case is
71         // resetting GPU FALCONs and in particular resetting the PMU as part of VBIOS
72         // init.
73         NV_ASSERT_OK_OR_RETURN(kmemsysInitFlushSysmemBuffer_HAL(pGpu, pKernelMemorySystem));
74     }
75 
76     return NV_OK;
77 }
78 
79 /*
80  * Initialize the Kernel Memory System state.
81  *
82  * @param[in]  pGpu pointer to the GPU instance.
83  * @param[in]  pKernelMemorySystem pointer to the kernel side KernelMemorySystem instance.
84  *
85  * @return NV_OK upon success.
86  */
87 NV_STATUS kmemsysStateInitLocked_IMPL
88 (
89     OBJGPU *pGpu,
90     KernelMemorySystem *pKernelMemorySystem
91 )
92 {
93     MEMORY_SYSTEM_STATIC_CONFIG *pStaticConfig;
94     NV_STATUS status = NV_OK;
95 
96     NV_ASSERT_OK_OR_GOTO(status, kmemsysEnsureSysmemFlushBufferInitialized(pGpu, pKernelMemorySystem), fail);
97 
98     pStaticConfig = portMemAllocNonPaged(sizeof(*pStaticConfig));
99     NV_CHECK_OR_RETURN(LEVEL_ERROR, pStaticConfig != NULL, NV_ERR_INSUFFICIENT_RESOURCES);
100     portMemSet(pStaticConfig, 0, sizeof(*pStaticConfig));
101 
102     NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
103         kmemsysInitStaticConfig_HAL(pGpu, pKernelMemorySystem, pStaticConfig),
104         fail);
105 
106     pKernelMemorySystem->pStaticConfig = pStaticConfig;
107 
108     KernelBif *pKernelBif = GPU_GET_KERNEL_BIF(pGpu);
109 
110     pKernelMemorySystem->memPartitionNumaInfo = portMemAllocNonPaged(sizeof(MEM_PARTITION_NUMA_INFO) * KMIGMGR_MAX_GPU_SWIZZID);
111     if (pKernelMemorySystem->memPartitionNumaInfo == NULL)
112     {
113         NV_PRINTF(LEVEL_ERROR, "Failed to allocate memory for numa information.\n");
114         status = NV_ERR_NO_MEMORY;
115         NV_ASSERT_OR_GOTO(0, fail);
116     }
117     portMemSet(pKernelMemorySystem->memPartitionNumaInfo, 0, sizeof(MEM_PARTITION_NUMA_INFO) * KMIGMGR_MAX_GPU_SWIZZID);
118 
119     if (gpuIsSelfHosted(pGpu) &&
120         (pKernelBif != NULL) && pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_IS_C2C_LINK_UP))
121     {
122         //
123         // memsysSetupCoherentCpuLink should be done only for the self hosted
124         // configuration(SHH) where the coherent C2C link connects host CPU(TH500) and GPU
125         // and not in the externally hosted(EHH) case where host CPU(say x86) is connected
126         // to GPU through PCIe and C2C only connects the TH500 (for EGM memory) and GPU.
127         // The gpuIsSelfHosted(pGpu) check here is to distinguish between the SHH
128         // and EHH configuration as C2C link is up in both of these cases.
129         //
130 
131         if (IS_GSP_CLIENT(pGpu))
132         {
133             GspStaticConfigInfo *pGSCI = GPU_GET_GSP_STATIC_INFO(pGpu);
134 
135             if (pGSCI->bAtsSupported)
136             {
137                 NV_PRINTF(LEVEL_INFO, "ATS supported\n");
138 
139                 pGpu->setProperty(pGpu, PDB_PROP_GPU_ATS_SUPPORTED, NV_TRUE);
140             }
141         }
142         if (IS_GSP_CLIENT(pGpu) || IS_VIRTUAL_WITH_SRIOV(pGpu))
143         {
144             //
145             // PDB_PROP_GPU_C2C_SYSMEM is already set in physical-RM but not in
146             // in Kernel-RM/Guest-RM where it is actually consumed. setting PDB_PROP_GPU_C2C_SYSMEM
147             // in Kernel-RM/Guest-RM when the platform is self-hosted and the C2C links are up, which
148             // indicate the C2C is connected to CPU and Physical-RM would have set up the HSHUB
149             // to route sysmem through C2C.
150             //
151             pGpu->setProperty(pGpu, PDB_PROP_GPU_C2C_SYSMEM, NV_TRUE);
152         }
153 
154         //
155         // kmemesysSetupCoherentCpuLink should not be called from physical RM as
156         // it is intended to be called on kernel side to update
157         // KernelMemorySystem for C2C, NUMA functionality.
158         //
159         NV_ASSERT_OK_OR_GOTO(status, kmemsysSetupCoherentCpuLink(pGpu, pKernelMemorySystem, NV_FALSE), fail);
160     }
161 
162 fail:
163     if (status != NV_OK)
164     {
165         portMemFree((void *)pKernelMemorySystem->pStaticConfig);
166     }
167 
168     return status;
169 }
170 
171 NV_STATUS
172 kmemsysStatePreLoad_IMPL
173 (
174     OBJGPU *pGpu,
175     KernelMemorySystem *pKernelMemorySystem,
176     NvU32 flags
177 )
178 {
179     //
180     // Program the sysmem flush buffer address and assert that the register contents are valid.
181     // The HAL wiring is such that a given RM build will only do one or the other (e.g., RM offloaded
182     // to ucode won't program the register itself but will assert that its contents are valid).
183     //
184     kmemsysProgramSysmemFlushBuffer_HAL(pGpu, pKernelMemorySystem);
185     kmemsysAssertSysmemFlushBufferValid_HAL(pGpu, pKernelMemorySystem);
186 
187     // Self Hosted GPUs should have its memory onlined by now.
188     if (gpuIsSelfHosted(pGpu) &&
189         pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING) &&
190         osNumaOnliningEnabled(pGpu->pOsGpuInfo) &&
191         !pKernelMemorySystem->bNumaNodesAdded)
192     {
193         //
194         // TODO: Bug 1945658: Deferred error checking from stateInit so that stateDestroy
195         // gets called. Refer kmemsysNumaAddMemory_HAL call site for further
196         // details.
197         //
198         return NV_ERR_INVALID_STATE;
199     }
200 
201     return NV_OK;
202 }
203 
204 NV_STATUS
205 kmemsysStatePostLoad_IMPL
206 (
207     OBJGPU *pGpu,
208     KernelMemorySystem *pKernelMemorySystem,
209     NvU32 flags
210 )
211 {
212     if (IS_SILICON(pGpu) &&
213         pGpu->getProperty(pGpu, PDB_PROP_GPU_ATS_SUPPORTED))
214     {
215         NV_STATUS status = kmemsysSetupAllAtsPeers_HAL(pGpu, pKernelMemorySystem);
216         if (status != NV_OK)
217         {
218             NV_PRINTF(LEVEL_ERROR, "ATS peer setup failed.\n");
219             return status;
220         }
221     }
222 
223     return NV_OK;
224 }
225 
226 NV_STATUS
227 kmemsysStatePreUnload_IMPL
228 (
229     OBJGPU *pGpu,
230     KernelMemorySystem *pKernelMemorySystem,
231     NvU32 flags
232 )
233 {
234     if (IS_SILICON(pGpu) &&
235         pGpu->getProperty(pGpu, PDB_PROP_GPU_ATS_SUPPORTED))
236     {
237         kmemsysRemoveAllAtsPeers_HAL(pGpu, pKernelMemorySystem);
238     }
239     return NV_OK;
240 }
241 
242 /*
243  * Release the state accumulated in StateInit.
244  * @param[in]  pGpu pointer to the GPU instance.
245  * @param[in]  pKernelMemorySystem pointer to the kernel side KernelMemorySystem instance.
246  */
247 void kmemsysStateDestroy_IMPL
248 (
249     OBJGPU *pGpu,
250     KernelMemorySystem *pKernelMemorySystem
251 )
252 {
253 
254     // Teardown of Coherent Cpu Link is not required on Physical RM
255     KernelBif *pKernelBif = GPU_GET_KERNEL_BIF(pGpu);
256 
257     if (pKernelBif && pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_IS_C2C_LINK_UP) &&
258         pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING))
259     {
260         kmemsysTeardownCoherentCpuLink(pGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu), NV_FALSE);
261     }
262 
263     portMemFree((void *)pKernelMemorySystem->pStaticConfig);
264 }
265 
266 /*!
267  * Returns MemorySystem settings that are static after GPU state init/load is
268  * finished.
269  */
270 const MEMORY_SYSTEM_STATIC_CONFIG *
271 kmemsysGetStaticConfig_IMPL
272 (
273     OBJGPU *pGpu,
274     KernelMemorySystem *pKernelMemorySystem
275 )
276 {
277     // check if state Init has not completed.
278     NV_ASSERT_OR_ELSE(pKernelMemorySystem != NULL, return NULL);
279 
280     return pKernelMemorySystem->pStaticConfig;
281 }
282 
283 void
284 kmemsysDestruct_IMPL
285 (
286     KernelMemorySystem *pKernelMemorySystem
287 )
288 {
289     OBJGPU *pGpu = ENG_GET_GPU(pKernelMemorySystem);
290 
291     //
292     // kmemsysNumaRemoveAllMemory_HAL() is called here in Destruct instead of
293     // Destroy to guarantee that NUMA memory is removed. This goes against
294     // Init/Destroy symmetry, but it is necessary because kmemsysStateDestroy
295     // may not be called for all cases when kmemsysStateInit was called
296     // (e.g., when kmemsys or another engine afterwards fails Init).
297     //
298     // If NUMA memory is not removed, then all subsequent attempts to add NUMA
299     // memory will fail, which will cause failures in future RM init attempts.
300     //
301     if (pKernelMemorySystem->memPartitionNumaInfo != NULL)
302     {
303         if (pKernelMemorySystem->bNumaNodesAdded == NV_TRUE)
304         {
305             kmemsysNumaRemoveAllMemory_HAL(pGpu, pKernelMemorySystem);
306         }
307         portMemFree(pKernelMemorySystem->memPartitionNumaInfo);
308     }
309 
310     pKernelMemorySystem->sysmemFlushBuffer = 0;
311     memdescFree(pKernelMemorySystem->pSysmemFlushBufferMemDesc);
312     memdescDestroy(pKernelMemorySystem->pSysmemFlushBufferMemDesc);
313     pKernelMemorySystem->pSysmemFlushBufferMemDesc = NULL;
314 
315     portMemSet(pKernelMemorySystem->gpuInstanceMemConfig, 0, sizeof(pKernelMemorySystem->gpuInstanceMemConfig));
316 
317 }
318 
319 NV_STATUS
320 kmemsysAllocComprResources_KERNEL
321 (
322     OBJGPU               *pGpu,
323     KernelMemorySystem   *pKernelMemorySystem,
324     FB_ALLOC_INFO        *pFbAllocInfo,
325     NvU64                 origSize,
326     NvU32                 kindChosen,
327     NvU32                *pRetAttr,
328     NvU32                 retAttr2
329 )
330 {
331     MemoryManager                     *pMemoryManager      = GPU_GET_MEMORY_MANAGER(pGpu);
332     const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig = kmemsysGetStaticConfig(pGpu, pKernelMemorySystem);
333     NvU32                              gfid;
334 
335     NV_ASSERT_OK_OR_RETURN(vgpuGetCallingContextGfid(pGpu, &gfid));
336 
337     NV_ASSERT_OR_RETURN(pMemorySystemConfig->bOneToOneComptagLineAllocation || pMemorySystemConfig->bUseRawModeComptaglineAllocation,
338         NV_ERR_INVALID_STATE);
339 
340     NV_CHECK_OR_RETURN(LEVEL_ERROR,
341         !FLD_TEST_DRF(OS32, _ALLOC, _COMPTAG_OFFSET_USAGE, _FIXED, pFbAllocInfo->ctagOffset),
342         NV_ERR_INVALID_ARGUMENT);
343 
344     // Failing the allocation if scrub on free is disabled
345     if (!memmgrIsScrubOnFreeEnabled(pMemoryManager))
346     {
347         if (!(IS_SIMULATION(pGpu) || IsDFPGA(pGpu) || (IS_EMULATION(pGpu) && RMCFG_FEATURE_PLATFORM_MODS)
348             ||(RMCFG_FEATURE_PLATFORM_WINDOWS && !pGpu->getProperty(pGpu, PDB_PROP_GPU_IN_TCC_MODE))
349             ||hypervisorIsVgxHyper()
350             ||IS_GFID_VF(gfid)
351             ||(IsSLIEnabled(pGpu) && !(RMCFG_FEATURE_PLATFORM_WINDOWS &&
352                                      !pGpu->getProperty(pGpu, PDB_PROP_GPU_IN_TCC_MODE))))
353            )
354         {
355             NV_PRINTF(LEVEL_ERROR, "Compressible surfaces cannot be allocated on a system, "
356                     "where scrub on free is disabled\n");
357             return NV_ERR_INVALID_STATE;
358         }
359     }
360     else if (pMemorySystemConfig->bOneToOneComptagLineAllocation)
361     {
362         NV_ASSERT_OR_RETURN(memmgrUseVasForCeMemoryOps(pMemoryManager), NV_ERR_INVALID_STATE);
363     }
364 
365     FB_SET_HWRESID_CTAGID_FERMI(pFbAllocInfo->hwResId, FB_HWRESID_CTAGID_VAL_FERMI(-1));
366     *pRetAttr = FLD_SET_DRF(OS32, _ATTR, _COMPR, _REQUIRED, *pRetAttr);
367     return NV_OK;
368 }
369 
370 /*!
371  * @brief Initializes static config data from the Physical side.
372  * @param[in]  pGpu pointer to the GPU instance.
373  * @param[in]  pKernelMemorySystem pointer to the kernel side KernelMemorySystem instance.
374  * @param[out] pConfig pointer to the static config init on Physical driver.
375  *
376  * @return NV_OK upon success.
377  *         NV_ERR* otherwise.
378  */
379 NV_STATUS
380 kmemsysInitStaticConfig_KERNEL
381 (
382     OBJGPU *pGpu,
383     KernelMemorySystem *pKernelMemorySystem,
384     MEMORY_SYSTEM_STATIC_CONFIG *pConfig
385 )
386 {
387     RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
388     NV_STATUS status;
389 
390     status = pRmApi->Control(pRmApi, pGpu->hInternalClient, pGpu->hInternalSubdevice,
391                                 NV2080_CTRL_CMD_INTERNAL_MEMSYS_GET_STATIC_CONFIG,
392                                 pConfig, sizeof(*pConfig));
393     return status;
394 }
395 
396 /*!
397  * @brief   Function to map swizzId to mem size given total mem
398  *
399  * @param[IN]   pGpu
400  * @param[IN]   pKernelMemorySystem
401  * @param[IN]   swizzId
402  * @param[IN]   totalRange          total memory range
403  * @param[OUT]  pPartitionSizeFlag  Flag stating partition memory size
404  * @param[OUT]  pSizeInBytes        Memory size in bytes supported by partition
405  */
406 NV_STATUS
407 kmemsysSwizzIdToMIGMemSize_IMPL
408 (
409     OBJGPU *pGpu,
410     KernelMemorySystem *pKernelMemorySystem,
411     NvU32 swizzId,
412     NV_RANGE totalRange,
413     NvU32 *pPartitionSizeFlag,
414     NvU64 *pSizeInBytes
415 )
416 {
417     //
418     // To handle the straddling issue we always consider memory for different
419     // swizzIds as addition of minimum sized segements allowed in partitioning
420     //
421     NvU64 memSize = rangeLength(totalRange) / KMIGMGR_MAX_GPU_INSTANCES;
422 
423     switch (swizzId)
424     {
425         case 0:
426         {
427             *pSizeInBytes = memSize * KMIGMGR_MAX_GPU_INSTANCES;
428             *pPartitionSizeFlag = DRF_DEF(2080_CTRL_GPU, _PARTITION_FLAG, _MEMORY_SIZE, _FULL);
429             break;
430         }
431 
432         case 1:
433         case 2:
434         {
435             *pSizeInBytes = (memSize * (KMIGMGR_MAX_GPU_INSTANCES / 2));
436             *pPartitionSizeFlag = DRF_DEF(2080_CTRL_GPU, _PARTITION_FLAG, _MEMORY_SIZE, _HALF);
437             break;
438         }
439 
440         case 3:
441         case 4:
442         case 5:
443         case 6:
444         {
445             *pSizeInBytes = (memSize * (KMIGMGR_MAX_GPU_INSTANCES / 4));
446             *pPartitionSizeFlag = DRF_DEF(2080_CTRL_GPU, _PARTITION_FLAG, _MEMORY_SIZE, _QUARTER);
447             break;
448         }
449 
450         case 7:
451         case 8:
452         case 9:
453         case 10:
454         case 11:
455         case 12:
456         case 13:
457         case 14:
458         {
459             *pSizeInBytes = memSize;
460             *pPartitionSizeFlag = DRF_DEF(2080_CTRL_GPU, _PARTITION_FLAG, _MEMORY_SIZE, _EIGHTH);
461             break;
462         }
463 
464         default:
465         {
466             NV_PRINTF(LEVEL_ERROR, "Unsupported SwizzId %d\n", swizzId);
467             DBG_BREAKPOINT();
468             return NV_ERR_INVALID_ARGUMENT;
469         }
470     }
471 
472     if ((*pSizeInBytes == 0) &&
473         !pGpu->getProperty(pGpu, PDB_PROP_GPU_ZERO_FB) &&
474         !pGpu->getProperty(pGpu, PDB_PROP_GPU_BROKEN_FB))
475     {
476         NV_PRINTF(LEVEL_ERROR, "Insufficient memory\n");
477         DBG_BREAKPOINT();
478         return NV_ERR_INSUFFICIENT_RESOURCES;
479     }
480     return NV_OK;
481 }
482 
483 /*!
484  * @brief   Function to map swizzId to mem range given total range
485  */
486 NV_STATUS
487 kmemsysSwizzIdToMIGMemRange_IMPL
488 (
489     OBJGPU *pGpu,
490     KernelMemorySystem *pKernelMemorySystem,
491     NvU32 swizzId,
492     NV_RANGE totalRange,
493     NV_RANGE *pAddrRange
494 )
495 {
496     KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
497     NV_STATUS rmStatus = NV_OK;
498     NvU32 memSizeFlag = 0;
499     NvU32 minSwizzId = 0;
500     NvU64 unalignedStartAddr = 0;
501     NvU64 memSize = 0;
502     NV_RANGE swizzIdRange = NV_RANGE_EMPTY;
503 
504     NV_ASSERT_OR_RETURN(!rangeIsEmpty(totalRange), NV_ERR_INVALID_ARGUMENT);
505 
506     // Get SwizzId to size mapping
507     NV_ASSERT_OK_OR_RETURN(
508         kmemsysSwizzIdToMIGMemSize(pGpu, pKernelMemorySystem, swizzId, totalRange, &memSizeFlag, &memSize));
509 
510     swizzIdRange = kmigmgrMemSizeFlagToSwizzIdRange_HAL(pGpu, pKernelMIGManager, memSizeFlag);
511     NV_ASSERT_OR_RETURN(!rangeIsEmpty(swizzIdRange), NV_ERR_INVALID_ARGUMENT);
512 
513     minSwizzId = swizzIdRange.lo;
514 
515     unalignedStartAddr = (totalRange.lo + (memSize * (swizzId - minSwizzId)));
516     *pAddrRange = rangeMake(unalignedStartAddr, unalignedStartAddr + memSize - 1);
517 
518     return rmStatus;
519 }
520 
521 /*!
522  * @brief   Function to return GPU instance memory address range
523  */
524 NV_STATUS
525 kmemsysGetMIGGPUInstanceMemInfo_IMPL
526 (
527     OBJGPU *pGpu,
528     KernelMemorySystem *pKernelMemorySystem,
529     NvU32 swizzId,
530     NV_RANGE *pAddrRange
531 )
532 {
533     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
534     NvU64 vmmuSegmentSize;
535     NvU64 startAddr;
536     NvU64 endAddr;
537     NvU64 partitionSize;
538 
539     NV_ASSERT_OR_RETURN(pAddrRange != NULL, NV_ERR_INVALID_ARGUMENT);
540     *pAddrRange = NV_RANGE_EMPTY;
541     NV_ASSERT_OR_RETURN(swizzId < KMIGMGR_MAX_GPU_SWIZZID, NV_ERR_INVALID_STATE);
542 
543     // Not supported in vGPU or ZERO_FB configs
544     NV_CHECK_OR_RETURN(LEVEL_SILENT,
545                        !(IS_VIRTUAL(pGpu) || (pGpu->getProperty(pGpu, PDB_PROP_GPU_ZERO_FB))),
546                        NV_OK);
547 
548     //
549     // VMMU not supported in AMODEL. Use legacy swizz-ID calculation instead of relying on vMMU segments
550     // to calculate address range
551     //
552     if (IsAMODEL(pGpu))
553     {
554         NV_RANGE partitionableMemoryRange = memmgrGetMIGPartitionableMemoryRange(pGpu, pMemoryManager);
555         return kmemsysSwizzIdToMIGMemRange(pGpu, pKernelMemorySystem, swizzId, partitionableMemoryRange, pAddrRange);
556     }
557 
558     // Get the VMMU segment size
559     vmmuSegmentSize = gpuGetVmmuSegmentSize(pGpu);
560     NV_ASSERT_OR_RETURN((vmmuSegmentSize != 0), NV_ERR_INVALID_STATE);
561 
562     startAddr = pKernelMemorySystem->gpuInstanceMemConfig[swizzId].startingVmmuSegment * vmmuSegmentSize;
563     partitionSize = pKernelMemorySystem->gpuInstanceMemConfig[swizzId].memSizeInVmmuSegment * vmmuSegmentSize;
564 
565     if (osNumaOnliningEnabled(pGpu->pOsGpuInfo))
566     {
567         NvU64 memblockSize;
568         NvU64 alignedStartAddr;
569 
570         NV_ASSERT_OK_OR_RETURN(osNumaMemblockSize(&memblockSize));
571 
572         //
573         // Align the partition start address and size to memblock size
574         // Some FB memory is wasted here if it is not already aligned.
575         //
576         alignedStartAddr = NV_ALIGN_UP64(startAddr, memblockSize);
577         partitionSize -= (alignedStartAddr - startAddr);
578         startAddr = alignedStartAddr;
579         partitionSize = NV_ALIGN_DOWN64(partitionSize, memblockSize);
580     }
581 
582     endAddr = startAddr + partitionSize - 1;
583 
584     *pAddrRange = rangeMake(startAddr, endAddr);
585 
586     return NV_OK;
587 }
588 
589 /*!
590  * @brief   Function to populate static GPU instance memory config which will be
591  *          utilized for GPU instance memory query and memory allocation
592  */
593 NV_STATUS
594 kmemsysPopulateMIGGPUInstanceMemConfig_KERNEL
595 (
596     OBJGPU *pGpu,
597     KernelMemorySystem *pKernelMemorySystem
598 )
599 {
600     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
601     NV_RANGE partitionableMemoryRange = memmgrGetMIGPartitionableMemoryRange(pGpu, pMemoryManager);
602     KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
603     NvU64 vmmuSegmentSize;
604     NvU64 totalVmmuSegments;
605     NvU64 alignedStartAddr;
606     NvU64 alignedEndAddr;
607     NvU32 swizzId;
608 
609     // Not needed in vGPU or zero_fb configs
610     NV_CHECK_OR_RETURN(LEVEL_SILENT,
611                        !(IS_VIRTUAL(pGpu) || (pGpu->getProperty(pGpu, PDB_PROP_GPU_ZERO_FB))),
612                        NV_OK);
613 
614     // Nothing to do if MIG is not supported
615     NV_CHECK_OR_RETURN(LEVEL_SILENT, kmigmgrIsMIGSupported(pGpu, pKernelMIGManager), NV_OK);
616 
617     // Get the VMMU segment size
618     vmmuSegmentSize = gpuGetVmmuSegmentSize(pGpu);
619     NV_ASSERT_OR_RETURN((vmmuSegmentSize != 0), NV_ERR_INVALID_STATE);
620 
621     alignedStartAddr = partitionableMemoryRange.lo;
622     alignedEndAddr = partitionableMemoryRange.hi;
623     if (alignedStartAddr != 0)
624     {
625         alignedStartAddr = NV_IS_ALIGNED64(alignedStartAddr, vmmuSegmentSize) ?
626                            alignedStartAddr + vmmuSegmentSize :
627                            NV_ALIGN_UP64(alignedStartAddr, vmmuSegmentSize);
628     }
629 
630     if (NV_IS_ALIGNED64(alignedEndAddr + 1, vmmuSegmentSize))
631     {
632         alignedEndAddr = alignedEndAddr - vmmuSegmentSize;
633     }
634 
635     totalVmmuSegments = (alignedEndAddr - alignedStartAddr + 1) / vmmuSegmentSize;
636     for (swizzId = 0; swizzId < KMIGMGR_MAX_GPU_SWIZZID; swizzId++)
637     {
638         NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
639             kmemsysSwizzIdToVmmuSegmentsRange_HAL(pGpu, pKernelMemorySystem, swizzId, vmmuSegmentSize, totalVmmuSegments));
640     }
641 
642     return NV_OK;
643 }
644 
645 /*!
646  * @brief Gets GPU instance memory configuration based on swizzId
647  */
648 NV_STATUS
649 kmemsysGetMIGGPUInstanceMemConfigFromSwizzId_IMPL
650 (
651     OBJGPU *pGpu,
652     KernelMemorySystem *pKernelMemorySystem,
653     NvU32 swizzId,
654     const MIG_GPU_INSTANCE_MEMORY_CONFIG **ppGPUInstanceMemConfig
655 )
656 {
657     NV_ASSERT_OR_RETURN(swizzId < KMIGMGR_MAX_GPU_SWIZZID, NV_ERR_INVALID_ARGUMENT);
658     // MODS makes a control call to describe GPU instances before this is populated. Return invalid data anyways
659     NV_ASSERT_OR_RETURN(pKernelMemorySystem->gpuInstanceMemConfig[swizzId].bInitialized, NV_ERR_INVALID_STATE);
660 
661     *ppGPUInstanceMemConfig = &pKernelMemorySystem->gpuInstanceMemConfig[swizzId];
662     return NV_OK;
663 }
664 
665 /*!
666  * @brief Set GPU Instance memory config information and mark initialized
667  */
668 NV_STATUS
669 kmemsysInitMIGGPUInstanceMemConfigForSwizzId_IMPL
670 (
671     OBJGPU *pGpu,
672     KernelMemorySystem *pKernelMemorySystem,
673     NvU32 swizzId,
674     NvU64 startingVmmuSegment,
675     NvU64 memSizeInVmmuSegment
676 )
677 {
678     NV_ASSERT_OR_RETURN(swizzId < KMIGMGR_MAX_GPU_SWIZZID, NV_ERR_INVALID_ARGUMENT);
679 
680     pKernelMemorySystem->gpuInstanceMemConfig[swizzId].startingVmmuSegment = startingVmmuSegment;
681     pKernelMemorySystem->gpuInstanceMemConfig[swizzId].memSizeInVmmuSegment = memSizeInVmmuSegment;
682     pKernelMemorySystem->gpuInstanceMemConfig[swizzId].bInitialized = NV_TRUE;
683 
684     NV_PRINTF(LEVEL_INFO,
685         "GPU Instance Mem Config for swizzId = 0x%x : MemStartSegment = 0x%llx, MemSizeInSegments = 0x%llx\n",
686         swizzId,
687         pKernelMemorySystem->gpuInstanceMemConfig[swizzId].startingVmmuSegment,
688         pKernelMemorySystem->gpuInstanceMemConfig[swizzId].memSizeInVmmuSegment);
689 
690     return NV_OK;
691 }
692 
693 /*!
694  * @brief Ensure that the sysmem flush sysmem buffer has been initialized
695  *
696  * Setting up the sysmem flush buffer needs to be done very early in some cases
697  * as it's required for the GPU to perform a system flush. One such case is
698  * resetting GPU FALCONs and in particular resetting the PMU as part of VBIOS
699  * init.
700  *
701  * @returns NV_OK if the sysmem flush buffer has been initialized.
702  */
703 NV_STATUS
704 kmemsysEnsureSysmemFlushBufferInitialized_IMPL
705 (
706     OBJGPU             *pGpu,
707     KernelMemorySystem *pKernelMemorySystem
708 )
709 {
710     if (IS_VIRTUAL(pGpu)                                       ||
711         IS_GSP_CLIENT(pGpu)                                    ||
712         RMCFG_FEATURE_PLATFORM_GSP)
713     {
714         return NV_OK;
715     }
716 
717     return kmemsysInitFlushSysmemBuffer_HAL(pGpu, pKernelMemorySystem);
718 }
719 
720 /*!
721  * @brief Handle sysmem NVLink/C2C, NUMA and ATS functionality
722  *
723  * @param[in] pGpu                OBJGPU pointer
724  * @param[in] pKernelMemorySystem pointer to the kernel side KernelMemorySystem instance.
725  * @param[in] bFlush              Whether the CPU cache of the GPU mapping
726  *                                should be flushed
727  *
728  * @return  NV_OK on success
729  */
730 NV_STATUS
731 kmemsysSetupCoherentCpuLink_IMPL
732 (
733     OBJGPU             *pGpu,
734     KernelMemorySystem *pKernelMemorySystem,
735     NvBool              bFlush
736 )
737 {
738     KernelBus     *pKernelBus     = GPU_GET_KERNEL_BUS(pGpu);
739     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
740     NvU64          numaOnlineSize = 0;
741     NvU64          fbSize         = (pMemoryManager->Ram.fbTotalMemSizeMb << 20);
742     NvU32          data32;
743     NvBool         bCpuMapping    = NV_TRUE; // Default enable
744     NvS32          numaNodeId     = NV0000_CTRL_NO_NUMA_NODE;
745     NvU64          memblockSize   = 0;
746     NvU64          numaOnlineBase = 0;
747     NvU64          rsvdFastSize   = 0;
748     NvU64          rsvdSlowSize   = 0;
749     NvU64          rsvdISOSize    = 0;
750     NvU64          totalRsvdBytes = 0;
751 
752     {
753         NV_ASSERT_OK_OR_RETURN(kmemsysGetFbNumaInfo_HAL(pGpu, pKernelMemorySystem,
754                                                         &pKernelMemorySystem->coherentCpuFbBase,
755                                                         &numaNodeId));
756         if (pKernelMemorySystem->coherentCpuFbBase != 0)
757         {
758             if (gpuIsSelfHosted(pGpu))
759             {
760                 //
761                 // For self-hosted, coherentCpuFbEnd is only till the FB size
762                 // end and NOT till the FB AMAP end since self-hosted doesn't
763                 // support indirect peer and requires GPU nvlink for peer.
764                 //
765                 pKernelMemorySystem->coherentCpuFbEnd = pKernelMemorySystem->coherentCpuFbBase + fbSize;
766             }
767             else
768             {
769                 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
770                 NV2080_CTRL_INTERNAL_GET_COHERENT_FB_APERTURE_SIZE_PARAMS params = {0};
771 
772                 NV_ASSERT_OK_OR_RETURN(pRmApi->Control(pRmApi,
773                                                        pGpu->hInternalClient,
774                                                        pGpu->hInternalSubdevice,
775                                                        NV2080_CTRL_CMD_INTERNAL_GET_COHERENT_FB_APERTURE_SIZE,
776                                                        &params,
777                                                        sizeof(NV2080_CTRL_INTERNAL_GET_COHERENT_FB_APERTURE_SIZE_PARAMS)));
778                 //
779                 // Indirect peer(uses P9 to reach other GV100) in P9+GV100 requires coherentCpuFbEnd to
780                 // also include the entire FB AMAP range even when FB size is less than the FB AMAP size.
781                 //
782                 pKernelMemorySystem->coherentCpuFbEnd = pKernelMemorySystem->coherentCpuFbBase +
783                                                         params.coherentFbApertureSize;
784             }
785         }
786     }
787 
788     if ((osReadRegistryDword(pGpu,
789                              NV_REG_STR_OVERRIDE_GPU_NUMA_NODE_ID, &data32)) == NV_OK)
790     {
791         numaNodeId = (NvS32)data32;
792         NV_PRINTF(LEVEL_ERROR, "Override GPU NUMA node ID %d!\n", numaNodeId);
793     }
794 
795     // Parse regkey here
796     if ((osReadRegistryDword(pGpu,
797                              NV_REG_STR_RM_FORCE_BAR_PATH, &data32) == NV_OK) &&
798         (data32 == 1))
799     {
800         NV_PRINTF(LEVEL_ERROR,
801                   "Force disabling NVLINK/C2C mappings through regkey.\n");
802 
803         bCpuMapping = NV_FALSE;
804     }
805 
806     if ((pKernelMemorySystem->coherentCpuFbBase == 0) || !bCpuMapping)
807     {
808         return NV_OK;
809     }
810 
811     NV_ASSERT_OK_OR_RETURN(osNumaMemblockSize(&memblockSize));
812 
813     memmgrCalcReservedFbSpaceHal_HAL(pGpu, pMemoryManager, &rsvdFastSize, &rsvdSlowSize, &rsvdISOSize);
814 
815     //
816     // Calculate the size of the memory which can be safely onlined to the
817     // kernel after accounting for different reserved memory requirements.
818     //
819     // Align rsvd memory to 64K granularity.
820     // TODO : rsvdMemorySize is not finalized at this point of time in
821     // GH180, currently rsvdMemorySize is not increasing after this
822     // point. This needs to be fixed.
823     //
824     totalRsvdBytes += NV_ALIGN_UP(pMemoryManager->rsvdMemorySize, 0x10000);
825     totalRsvdBytes += (rsvdFastSize + rsvdSlowSize + rsvdISOSize);
826     totalRsvdBytes += pMemoryManager->Ram.reservedMemSize;
827 
828     //
829     // TODO: make sure the onlineable memory is aligned to memblockSize
830     // Currently, if we have leftover memory, it'll just be wasted because no
831     // one can access it. If FB size itself is memblock size unaligned(because
832     // of CBC and row remapper deductions), then the memory wastage is unavoidable.
833     //
834     numaOnlineSize = NV_ALIGN_DOWN64(fbSize - totalRsvdBytes, memblockSize);
835 
836     NV_PRINTF(LEVEL_INFO,
837               "fbSize: 0x%llx NUMA reserved memory size: 0x%llx online memory size: 0x%llx\n",
838               fbSize, totalRsvdBytes, numaOnlineSize);
839     if (osNumaOnliningEnabled(pGpu->pOsGpuInfo))
840     {
841         pKernelMemorySystem->numaOnlineBase   = numaOnlineBase;
842         pKernelMemorySystem->numaOnlineSize   = numaOnlineSize;
843         //
844         // TODO: Bug 1945658: Soldier through on GPU memory add
845         // failure(which is often possible because of missing auto online
846         // setting) and instead check for failure on stateLoad.
847         // Any failure in StateInit results in gpuStateDestroy not getting called.
848         // kgspUnloadRm_IMPL from gpuStateDestroy also doesn't get called leaving
849         // GSP in unclean state and requiring GPU reset to recover from that.
850         //
851         // kmemsysNumaAddMemory_HAL by itself cannot be called from stateLoad
852         // because the memory mapping that follows this call site comes from linear
853         // kernel virtual address when memory is added to the kernel vs the
854         // VMALLOC_START region when memory is not added.
855         //
856         NV_ASSERT_OK(kmemsysNumaAddMemory_HAL(pGpu, pKernelMemorySystem, 0, 0,
857                                               numaOnlineSize, &numaNodeId));
858     }
859     pGpu->numaNodeId = numaNodeId;
860 
861     NV_ASSERT_OK_OR_RETURN(kbusCreateCoherentCpuMapping_HAL(pGpu, pKernelBus, numaOnlineSize, bFlush));
862 
863     // Switch the toggle for coherent link mapping only if migration is successful
864     pGpu->setProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING, NV_TRUE);
865 
866     NV_ASSERT_OK_OR_RETURN(kbusVerifyCoherentLink_HAL(pGpu, pKernelBus));
867 
868     return NV_OK;
869 }
870 
871 /*!
872  * @brief Teardown sysmem NVLink/C2C NUMA and ATS functionality
873  *
874  * @param[in] pGpu                 OBJGPU pointer
875  * @param[in] pKernelMemorySystem  Kernel Memory System pointer
876  * @param[in] bFlush               Whether the CPU cache of the GPU mapping
877  *                                 should be flushed
878  */
879 void
880 kmemsysTeardownCoherentCpuLink_IMPL
881 (
882     OBJGPU            *pGpu,
883     KernelMemorySystem *pKernelMemorySystem,
884     NvBool             bFlush
885 )
886 {
887     kbusTeardownCoherentCpuMapping_HAL(pGpu, GPU_GET_KERNEL_BUS(pGpu), bFlush);
888     pGpu->setProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING, NV_FALSE);
889 }
890 
891 NV_STATUS
892 kmemsysSendL2InvalidateEvict_IMPL
893 (
894     OBJGPU             *pGpu,
895     KernelMemorySystem *pKernelMemorySystem,
896     NvU32               flags)
897 {
898     RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
899     NV2080_CTRL_INTERNAL_MEMSYS_L2_INVALIDATE_EVICT_PARAMS params = {0};
900 
901     params.flags = flags;
902 
903     return pRmApi->Control(pRmApi, pGpu->hInternalClient, pGpu->hInternalSubdevice,
904                            NV2080_CTRL_CMD_INTERNAL_MEMSYS_L2_INVALIDATE_EVICT,
905                            &params, sizeof(params));
906 }
907 
908 NV_STATUS
909 kmemsysSendFlushL2AllRamsAndCaches_IMPL
910 (
911     OBJGPU             *pGpu,
912     KernelMemorySystem *pKernelMemorySystem
913 )
914 {
915     RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
916 
917     return pRmApi->Control(pRmApi, pGpu->hInternalClient, pGpu->hInternalSubdevice,
918                            NV2080_CTRL_CMD_INTERNAL_MEMSYS_FLUSH_L2_ALL_RAMS_AND_CACHES,
919                            NULL, 0);
920 }
921 
922 NV_STATUS
923 kmemsysGetUsableFbSize_KERNEL
924 (
925     OBJGPU               *pGpu,
926     KernelMemorySystem   *pKernelMemorySystem,
927     NvU64                *pFbSize
928 )
929 {
930     return kmemsysReadUsableFbSize_HAL(pGpu, pKernelMemorySystem, pFbSize);
931 }
932 
933