11739a20eSAndy Ritger /*
2*3bf16b89SBernhard Stoeckner  * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
31739a20eSAndy Ritger  * SPDX-License-Identifier: MIT
41739a20eSAndy Ritger  *
51739a20eSAndy Ritger  * Permission is hereby granted, free of charge, to any person obtaining a
61739a20eSAndy Ritger  * copy of this software and associated documentation files (the "Software"),
71739a20eSAndy Ritger  * to deal in the Software without restriction, including without limitation
81739a20eSAndy Ritger  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
91739a20eSAndy Ritger  * and/or sell copies of the Software, and to permit persons to whom the
101739a20eSAndy Ritger  * Software is furnished to do so, subject to the following conditions:
111739a20eSAndy Ritger  *
121739a20eSAndy Ritger  * The above copyright notice and this permission notice shall be included in
131739a20eSAndy Ritger  * all copies or substantial portions of the Software.
141739a20eSAndy Ritger  *
151739a20eSAndy Ritger  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
161739a20eSAndy Ritger  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
171739a20eSAndy Ritger  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
181739a20eSAndy Ritger  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
191739a20eSAndy Ritger  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
201739a20eSAndy Ritger  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
211739a20eSAndy Ritger  * DEALINGS IN THE SOFTWARE.
221739a20eSAndy Ritger  */
231739a20eSAndy Ritger 
241739a20eSAndy Ritger #include "os/os.h"
251739a20eSAndy Ritger #include "gpu/mem_sys/kern_mem_sys.h"
261739a20eSAndy Ritger #include "gpu/mem_mgr/mem_mgr.h"
271739a20eSAndy Ritger #include "virtualization/hypervisor/hypervisor.h"
281739a20eSAndy Ritger #include "vgpu/vgpu_events.h"
291739a20eSAndy Ritger #include "objrpc.h"
301739a20eSAndy Ritger #include "gpu/bif/kernel_bif.h"
311739a20eSAndy Ritger #include "gpu/bus/kern_bus.h"
321739a20eSAndy Ritger #include "os/os.h"
3391676d66SBernhard Stoeckner #include "platform/sli/sli.h"
341739a20eSAndy Ritger #include "nvRmReg.h"
3590eb1077SAndy Ritger #include "gpu/gsp/gsp_static_config.h"
361739a20eSAndy Ritger 
371739a20eSAndy Ritger static void
kmemsysInitRegistryOverrides(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem)381739a20eSAndy Ritger kmemsysInitRegistryOverrides
391739a20eSAndy Ritger (
401739a20eSAndy Ritger     OBJGPU *pGpu,
411739a20eSAndy Ritger     KernelMemorySystem *pKernelMemorySystem
421739a20eSAndy Ritger )
431739a20eSAndy Ritger {
441739a20eSAndy Ritger     NvU32 data32;
451739a20eSAndy Ritger 
461739a20eSAndy Ritger     //
471739a20eSAndy Ritger     // Bug 1032432. Check regkey for FB pull
481739a20eSAndy Ritger     //
491739a20eSAndy Ritger     if (osReadRegistryDword(pGpu, NV_REG_STR_RM_L2_CLEAN_FB_PULL, &data32) == NV_OK)
501739a20eSAndy Ritger     {
511739a20eSAndy Ritger         if (data32 == NV_REG_STR_RM_L2_CLEAN_FB_PULL_DISABLED)
521739a20eSAndy Ritger             pKernelMemorySystem->bL2CleanFbPull = NV_FALSE;
531739a20eSAndy Ritger     }
5491676d66SBernhard Stoeckner 
5591676d66SBernhard Stoeckner     if ((osReadRegistryDword(pGpu, NV_REG_STR_RM_OVERRIDE_TO_GMK, &data32) == NV_OK) &&
5691676d66SBernhard Stoeckner         (data32 != NV_REG_STR_RM_OVERRIDE_TO_GMK_DISABLED))
5791676d66SBernhard Stoeckner     {
5891676d66SBernhard Stoeckner         pKernelMemorySystem->overrideToGMK = data32;
5991676d66SBernhard Stoeckner     }
6091676d66SBernhard Stoeckner 
611739a20eSAndy Ritger }
621739a20eSAndy Ritger 
63758b4ee8SAndy Ritger NV_STATUS
kmemsysConstructEngine_IMPL(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem,ENGDESCRIPTOR engDesc)64758b4ee8SAndy Ritger kmemsysConstructEngine_IMPL
65758b4ee8SAndy Ritger (
66758b4ee8SAndy Ritger     OBJGPU             *pGpu,
67758b4ee8SAndy Ritger     KernelMemorySystem *pKernelMemorySystem,
68758b4ee8SAndy Ritger     ENGDESCRIPTOR       engDesc
69758b4ee8SAndy Ritger )
70758b4ee8SAndy Ritger {
71758b4ee8SAndy Ritger     pKernelMemorySystem->memPartitionNumaInfo = NULL;
72758b4ee8SAndy Ritger 
73758b4ee8SAndy Ritger     kmemsysInitRegistryOverrides(pGpu, pKernelMemorySystem);
74758b4ee8SAndy Ritger 
75758b4ee8SAndy Ritger     if (IS_GSP_CLIENT(pGpu))
76758b4ee8SAndy Ritger     {
77758b4ee8SAndy Ritger         // Setting up the sysmem flush buffer needs to be done very early in some cases
78758b4ee8SAndy Ritger         // as it's required for the GPU to perform a system flush. One such case is
79758b4ee8SAndy Ritger         // resetting GPU FALCONs and in particular resetting the PMU as part of VBIOS
80758b4ee8SAndy Ritger         // init.
81758b4ee8SAndy Ritger         NV_ASSERT_OK_OR_RETURN(kmemsysInitFlushSysmemBuffer_HAL(pGpu, pKernelMemorySystem));
8291676d66SBernhard Stoeckner 
83758b4ee8SAndy Ritger     }
84758b4ee8SAndy Ritger 
85758b4ee8SAndy Ritger     return NV_OK;
86758b4ee8SAndy Ritger }
87758b4ee8SAndy Ritger 
881739a20eSAndy Ritger /*
891739a20eSAndy Ritger  * Initialize the Kernel Memory System state.
901739a20eSAndy Ritger  *
911739a20eSAndy Ritger  * @param[in]  pGpu pointer to the GPU instance.
921739a20eSAndy Ritger  * @param[in]  pKernelMemorySystem pointer to the kernel side KernelMemorySystem instance.
931739a20eSAndy Ritger  *
941739a20eSAndy Ritger  * @return NV_OK upon success.
951739a20eSAndy Ritger  */
kmemsysStateInitLocked_IMPL(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem)961739a20eSAndy Ritger NV_STATUS kmemsysStateInitLocked_IMPL
971739a20eSAndy Ritger (
981739a20eSAndy Ritger     OBJGPU *pGpu,
991739a20eSAndy Ritger     KernelMemorySystem *pKernelMemorySystem
1001739a20eSAndy Ritger )
1011739a20eSAndy Ritger {
1021739a20eSAndy Ritger     MEMORY_SYSTEM_STATIC_CONFIG *pStaticConfig;
1031739a20eSAndy Ritger     NV_STATUS status = NV_OK;
1041739a20eSAndy Ritger 
1051739a20eSAndy Ritger     NV_ASSERT_OK_OR_GOTO(status, kmemsysEnsureSysmemFlushBufferInitialized(pGpu, pKernelMemorySystem), fail);
1061739a20eSAndy Ritger 
1071739a20eSAndy Ritger     pStaticConfig = portMemAllocNonPaged(sizeof(*pStaticConfig));
1081739a20eSAndy Ritger     NV_CHECK_OR_RETURN(LEVEL_ERROR, pStaticConfig != NULL, NV_ERR_INSUFFICIENT_RESOURCES);
109758b4ee8SAndy Ritger     portMemSet(pStaticConfig, 0, sizeof(*pStaticConfig));
1101739a20eSAndy Ritger 
1111739a20eSAndy Ritger     NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
1121739a20eSAndy Ritger         kmemsysInitStaticConfig_HAL(pGpu, pKernelMemorySystem, pStaticConfig),
1131739a20eSAndy Ritger         fail);
1141739a20eSAndy Ritger 
1151739a20eSAndy Ritger     pKernelMemorySystem->pStaticConfig = pStaticConfig;
1161739a20eSAndy Ritger 
117eb5c7665SAndy Ritger     KernelBif *pKernelBif = GPU_GET_KERNEL_BIF(pGpu);
118eb5c7665SAndy Ritger 
119eb5c7665SAndy Ritger     pKernelMemorySystem->memPartitionNumaInfo = portMemAllocNonPaged(sizeof(MEM_PARTITION_NUMA_INFO) * KMIGMGR_MAX_GPU_SWIZZID);
120eb5c7665SAndy Ritger     if (pKernelMemorySystem->memPartitionNumaInfo == NULL)
121eb5c7665SAndy Ritger     {
122eb5c7665SAndy Ritger         NV_PRINTF(LEVEL_ERROR, "Failed to allocate memory for numa information.\n");
123eb5c7665SAndy Ritger         status = NV_ERR_NO_MEMORY;
124eb5c7665SAndy Ritger         NV_ASSERT_OR_GOTO(0, fail);
125eb5c7665SAndy Ritger     }
126eb5c7665SAndy Ritger     portMemSet(pKernelMemorySystem->memPartitionNumaInfo, 0, sizeof(MEM_PARTITION_NUMA_INFO) * KMIGMGR_MAX_GPU_SWIZZID);
127eb5c7665SAndy Ritger 
128eb5c7665SAndy Ritger     if (gpuIsSelfHosted(pGpu) &&
129eb5c7665SAndy Ritger         (pKernelBif != NULL) && pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_IS_C2C_LINK_UP))
130eb5c7665SAndy Ritger     {
131eb5c7665SAndy Ritger         //
132eb5c7665SAndy Ritger         // memsysSetupCoherentCpuLink should be done only for the self hosted
133eb5c7665SAndy Ritger         // configuration(SHH) where the coherent C2C link connects host CPU(TH500) and GPU
134eb5c7665SAndy Ritger         // and not in the externally hosted(EHH) case where host CPU(say x86) is connected
135eb5c7665SAndy Ritger         // to GPU through PCIe and C2C only connects the TH500 (for EGM memory) and GPU.
136eb5c7665SAndy Ritger         // The gpuIsSelfHosted(pGpu) check here is to distinguish between the SHH
137eb5c7665SAndy Ritger         // and EHH configuration as C2C link is up in both of these cases.
138eb5c7665SAndy Ritger         //
139eb5c7665SAndy Ritger 
140eb5c7665SAndy Ritger         if (IS_GSP_CLIENT(pGpu))
141eb5c7665SAndy Ritger         {
142eb5c7665SAndy Ritger             GspStaticConfigInfo *pGSCI = GPU_GET_GSP_STATIC_INFO(pGpu);
143eb5c7665SAndy Ritger 
144eb5c7665SAndy Ritger             if (pGSCI->bAtsSupported)
145eb5c7665SAndy Ritger             {
146eb5c7665SAndy Ritger                 NV_PRINTF(LEVEL_INFO, "ATS supported\n");
147eb5c7665SAndy Ritger 
148eb5c7665SAndy Ritger                 pGpu->setProperty(pGpu, PDB_PROP_GPU_ATS_SUPPORTED, NV_TRUE);
149eb5c7665SAndy Ritger             }
150b5bf85a8SAndy Ritger         }
151b5bf85a8SAndy Ritger         if (IS_GSP_CLIENT(pGpu) || IS_VIRTUAL_WITH_SRIOV(pGpu))
152b5bf85a8SAndy Ritger         {
153eb5c7665SAndy Ritger             //
154b5bf85a8SAndy Ritger             // PDB_PROP_GPU_C2C_SYSMEM is already set in physical-RM but not in
155b5bf85a8SAndy Ritger             // in Kernel-RM/Guest-RM where it is actually consumed. setting PDB_PROP_GPU_C2C_SYSMEM
156b5bf85a8SAndy Ritger             // in Kernel-RM/Guest-RM when the platform is self-hosted and the C2C links are up, which
157eb5c7665SAndy Ritger             // indicate the C2C is connected to CPU and Physical-RM would have set up the HSHUB
158eb5c7665SAndy Ritger             // to route sysmem through C2C.
159eb5c7665SAndy Ritger             //
160eb5c7665SAndy Ritger             pGpu->setProperty(pGpu, PDB_PROP_GPU_C2C_SYSMEM, NV_TRUE);
161eb5c7665SAndy Ritger         }
162eb5c7665SAndy Ritger 
163eb5c7665SAndy Ritger         //
164eb5c7665SAndy Ritger         // kmemesysSetupCoherentCpuLink should not be called from physical RM as
165eb5c7665SAndy Ritger         // it is intended to be called on kernel side to update
166eb5c7665SAndy Ritger         // KernelMemorySystem for C2C, NUMA functionality.
167eb5c7665SAndy Ritger         //
168eb5c7665SAndy Ritger         NV_ASSERT_OK_OR_GOTO(status, kmemsysSetupCoherentCpuLink(pGpu, pKernelMemorySystem, NV_FALSE), fail);
169eb5c7665SAndy Ritger     }
170eb5c7665SAndy Ritger 
17191676d66SBernhard Stoeckner     {
17291676d66SBernhard Stoeckner         KernelGmmu   *pKernelGmmu   = GPU_GET_KERNEL_GMMU(pGpu);
17391676d66SBernhard Stoeckner 
17491676d66SBernhard Stoeckner         //
17591676d66SBernhard Stoeckner         // Ask GMMU to set the large page size after we have initialized
17691676d66SBernhard Stoeckner         // memory and before we initialize BAR2.
17791676d66SBernhard Stoeckner         //
17891676d66SBernhard Stoeckner         NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
17991676d66SBernhard Stoeckner             kgmmuCheckAndDecideBigPageSize_HAL(pGpu, pKernelGmmu));
18091676d66SBernhard Stoeckner     }
18191676d66SBernhard Stoeckner 
1821739a20eSAndy Ritger fail:
1831739a20eSAndy Ritger     if (status != NV_OK)
1841739a20eSAndy Ritger     {
1851739a20eSAndy Ritger         portMemFree((void *)pKernelMemorySystem->pStaticConfig);
1861739a20eSAndy Ritger     }
1871739a20eSAndy Ritger 
1881739a20eSAndy Ritger     return status;
1891739a20eSAndy Ritger }
1901739a20eSAndy Ritger 
1911739a20eSAndy Ritger NV_STATUS
kmemsysStatePreLoad_IMPL(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem,NvU32 flags)1921739a20eSAndy Ritger kmemsysStatePreLoad_IMPL
1931739a20eSAndy Ritger (
1941739a20eSAndy Ritger     OBJGPU *pGpu,
1951739a20eSAndy Ritger     KernelMemorySystem *pKernelMemorySystem,
1961739a20eSAndy Ritger     NvU32 flags
1971739a20eSAndy Ritger )
1981739a20eSAndy Ritger {
1991739a20eSAndy Ritger     //
2001739a20eSAndy Ritger     // Program the sysmem flush buffer address and assert that the register contents are valid.
2011739a20eSAndy Ritger     // The HAL wiring is such that a given RM build will only do one or the other (e.g., RM offloaded
2021739a20eSAndy Ritger     // to ucode won't program the register itself but will assert that its contents are valid).
2031739a20eSAndy Ritger     //
2041739a20eSAndy Ritger     kmemsysProgramSysmemFlushBuffer_HAL(pGpu, pKernelMemorySystem);
2051739a20eSAndy Ritger     kmemsysAssertSysmemFlushBufferValid_HAL(pGpu, pKernelMemorySystem);
2061739a20eSAndy Ritger 
207337e28efSBernhard Stoeckner     // Self Hosted GPUs should have its memory onlined by now.
208337e28efSBernhard Stoeckner     if (gpuIsSelfHosted(pGpu) &&
209337e28efSBernhard Stoeckner         pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING) &&
210337e28efSBernhard Stoeckner         osNumaOnliningEnabled(pGpu->pOsGpuInfo) &&
211337e28efSBernhard Stoeckner         !pKernelMemorySystem->bNumaNodesAdded)
212337e28efSBernhard Stoeckner     {
213337e28efSBernhard Stoeckner         //
214337e28efSBernhard Stoeckner         // TODO: Bug 1945658: Deferred error checking from stateInit so that stateDestroy
215337e28efSBernhard Stoeckner         // gets called. Refer kmemsysNumaAddMemory_HAL call site for further
216337e28efSBernhard Stoeckner         // details.
217337e28efSBernhard Stoeckner         //
218337e28efSBernhard Stoeckner         return NV_ERR_INVALID_STATE;
219337e28efSBernhard Stoeckner     }
220337e28efSBernhard Stoeckner 
2211739a20eSAndy Ritger     return NV_OK;
2221739a20eSAndy Ritger }
2231739a20eSAndy Ritger 
224eb5c7665SAndy Ritger NV_STATUS
kmemsysStatePostLoad_IMPL(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem,NvU32 flags)225eb5c7665SAndy Ritger kmemsysStatePostLoad_IMPL
226eb5c7665SAndy Ritger (
227eb5c7665SAndy Ritger     OBJGPU *pGpu,
228eb5c7665SAndy Ritger     KernelMemorySystem *pKernelMemorySystem,
229eb5c7665SAndy Ritger     NvU32 flags
230eb5c7665SAndy Ritger )
231eb5c7665SAndy Ritger {
232eb5c7665SAndy Ritger     if (IS_SILICON(pGpu) &&
233eb5c7665SAndy Ritger         pGpu->getProperty(pGpu, PDB_PROP_GPU_ATS_SUPPORTED))
234eb5c7665SAndy Ritger     {
235eb5c7665SAndy Ritger         NV_STATUS status = kmemsysSetupAllAtsPeers_HAL(pGpu, pKernelMemorySystem);
236eb5c7665SAndy Ritger         if (status != NV_OK)
237eb5c7665SAndy Ritger         {
238eb5c7665SAndy Ritger             NV_PRINTF(LEVEL_ERROR, "ATS peer setup failed.\n");
239eb5c7665SAndy Ritger             return status;
240eb5c7665SAndy Ritger         }
241eb5c7665SAndy Ritger     }
242eb5c7665SAndy Ritger 
243eb5c7665SAndy Ritger     return NV_OK;
244eb5c7665SAndy Ritger }
245eb5c7665SAndy Ritger 
24612c07393SBernhard Stoeckner NV_STATUS
kmemsysStatePreUnload_IMPL(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem,NvU32 flags)24712c07393SBernhard Stoeckner kmemsysStatePreUnload_IMPL
24812c07393SBernhard Stoeckner (
24912c07393SBernhard Stoeckner     OBJGPU *pGpu,
25012c07393SBernhard Stoeckner     KernelMemorySystem *pKernelMemorySystem,
25112c07393SBernhard Stoeckner     NvU32 flags
25212c07393SBernhard Stoeckner )
25312c07393SBernhard Stoeckner {
25412c07393SBernhard Stoeckner     if (IS_SILICON(pGpu) &&
25512c07393SBernhard Stoeckner         pGpu->getProperty(pGpu, PDB_PROP_GPU_ATS_SUPPORTED))
25612c07393SBernhard Stoeckner     {
25712c07393SBernhard Stoeckner         kmemsysRemoveAllAtsPeers_HAL(pGpu, pKernelMemorySystem);
25812c07393SBernhard Stoeckner     }
25912c07393SBernhard Stoeckner     return NV_OK;
26012c07393SBernhard Stoeckner }
26112c07393SBernhard Stoeckner 
2621739a20eSAndy Ritger /*
2631739a20eSAndy Ritger  * Release the state accumulated in StateInit.
2641739a20eSAndy Ritger  * @param[in]  pGpu pointer to the GPU instance.
2651739a20eSAndy Ritger  * @param[in]  pKernelMemorySystem pointer to the kernel side KernelMemorySystem instance.
2661739a20eSAndy Ritger  */
kmemsysStateDestroy_IMPL(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem)2671739a20eSAndy Ritger void kmemsysStateDestroy_IMPL
2681739a20eSAndy Ritger (
2691739a20eSAndy Ritger     OBJGPU *pGpu,
2701739a20eSAndy Ritger     KernelMemorySystem *pKernelMemorySystem
2711739a20eSAndy Ritger )
2721739a20eSAndy Ritger {
2731739a20eSAndy Ritger 
274eb5c7665SAndy Ritger     // Teardown of Coherent Cpu Link is not required on Physical RM
275eb5c7665SAndy Ritger     KernelBif *pKernelBif = GPU_GET_KERNEL_BIF(pGpu);
276eb5c7665SAndy Ritger 
277eb5c7665SAndy Ritger     if (pKernelBif && pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_IS_C2C_LINK_UP) &&
278eb5c7665SAndy Ritger         pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING))
279eb5c7665SAndy Ritger     {
280eb5c7665SAndy Ritger         kmemsysTeardownCoherentCpuLink(pGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu), NV_FALSE);
281eb5c7665SAndy Ritger     }
282eb5c7665SAndy Ritger 
2831739a20eSAndy Ritger     portMemFree((void *)pKernelMemorySystem->pStaticConfig);
2841739a20eSAndy Ritger }
2851739a20eSAndy Ritger 
2861739a20eSAndy Ritger /*!
2871739a20eSAndy Ritger  * Returns MemorySystem settings that are static after GPU state init/load is
2881739a20eSAndy Ritger  * finished.
2891739a20eSAndy Ritger  */
2901739a20eSAndy Ritger const MEMORY_SYSTEM_STATIC_CONFIG *
kmemsysGetStaticConfig_IMPL(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem)2911739a20eSAndy Ritger kmemsysGetStaticConfig_IMPL
2921739a20eSAndy Ritger (
2931739a20eSAndy Ritger     OBJGPU *pGpu,
2941739a20eSAndy Ritger     KernelMemorySystem *pKernelMemorySystem
2951739a20eSAndy Ritger )
2961739a20eSAndy Ritger {
2971739a20eSAndy Ritger     // check if state Init has not completed.
2981739a20eSAndy Ritger     NV_ASSERT_OR_ELSE(pKernelMemorySystem != NULL, return NULL);
2991739a20eSAndy Ritger 
3001739a20eSAndy Ritger     return pKernelMemorySystem->pStaticConfig;
3011739a20eSAndy Ritger }
3021739a20eSAndy Ritger 
3031739a20eSAndy Ritger void
kmemsysDestruct_IMPL(KernelMemorySystem * pKernelMemorySystem)3041739a20eSAndy Ritger kmemsysDestruct_IMPL
3051739a20eSAndy Ritger (
3061739a20eSAndy Ritger     KernelMemorySystem *pKernelMemorySystem
3071739a20eSAndy Ritger )
3081739a20eSAndy Ritger {
309eb5c7665SAndy Ritger     OBJGPU *pGpu = ENG_GET_GPU(pKernelMemorySystem);
310eb5c7665SAndy Ritger 
311eb5c7665SAndy Ritger     //
312eb5c7665SAndy Ritger     // kmemsysNumaRemoveAllMemory_HAL() is called here in Destruct instead of
313eb5c7665SAndy Ritger     // Destroy to guarantee that NUMA memory is removed. This goes against
314eb5c7665SAndy Ritger     // Init/Destroy symmetry, but it is necessary because kmemsysStateDestroy
315eb5c7665SAndy Ritger     // may not be called for all cases when kmemsysStateInit was called
316eb5c7665SAndy Ritger     // (e.g., when kmemsys or another engine afterwards fails Init).
317eb5c7665SAndy Ritger     //
318eb5c7665SAndy Ritger     // If NUMA memory is not removed, then all subsequent attempts to add NUMA
319eb5c7665SAndy Ritger     // memory will fail, which will cause failures in future RM init attempts.
320eb5c7665SAndy Ritger     //
321eb5c7665SAndy Ritger     if (pKernelMemorySystem->memPartitionNumaInfo != NULL)
322eb5c7665SAndy Ritger     {
323eb5c7665SAndy Ritger         if (pKernelMemorySystem->bNumaNodesAdded == NV_TRUE)
324eb5c7665SAndy Ritger         {
325eb5c7665SAndy Ritger             kmemsysNumaRemoveAllMemory_HAL(pGpu, pKernelMemorySystem);
326eb5c7665SAndy Ritger         }
327eb5c7665SAndy Ritger         portMemFree(pKernelMemorySystem->memPartitionNumaInfo);
328eb5c7665SAndy Ritger     }
3294397463eSAndy Ritger 
3301739a20eSAndy Ritger     pKernelMemorySystem->sysmemFlushBuffer = 0;
3311739a20eSAndy Ritger     memdescFree(pKernelMemorySystem->pSysmemFlushBufferMemDesc);
3321739a20eSAndy Ritger     memdescDestroy(pKernelMemorySystem->pSysmemFlushBufferMemDesc);
3331739a20eSAndy Ritger     pKernelMemorySystem->pSysmemFlushBufferMemDesc = NULL;
3341739a20eSAndy Ritger 
3351739a20eSAndy Ritger     portMemSet(pKernelMemorySystem->gpuInstanceMemConfig, 0, sizeof(pKernelMemorySystem->gpuInstanceMemConfig));
336b5bf85a8SAndy Ritger 
3371739a20eSAndy Ritger }
3381739a20eSAndy Ritger 
3391739a20eSAndy Ritger NV_STATUS
kmemsysAllocComprResources_KERNEL(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem,FB_ALLOC_INFO * pFbAllocInfo,NvU64 origSize,NvU32 kindChosen,NvU32 * pRetAttr,NvU32 retAttr2)3401739a20eSAndy Ritger kmemsysAllocComprResources_KERNEL
3411739a20eSAndy Ritger (
3421739a20eSAndy Ritger     OBJGPU               *pGpu,
3431739a20eSAndy Ritger     KernelMemorySystem   *pKernelMemorySystem,
3441739a20eSAndy Ritger     FB_ALLOC_INFO        *pFbAllocInfo,
3451739a20eSAndy Ritger     NvU64                 origSize,
3461739a20eSAndy Ritger     NvU32                 kindChosen,
3471739a20eSAndy Ritger     NvU32                *pRetAttr,
3481739a20eSAndy Ritger     NvU32                 retAttr2
3491739a20eSAndy Ritger )
3501739a20eSAndy Ritger {
3511739a20eSAndy Ritger     MemoryManager                     *pMemoryManager      = GPU_GET_MEMORY_MANAGER(pGpu);
3521739a20eSAndy Ritger     const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig = kmemsysGetStaticConfig(pGpu, pKernelMemorySystem);
3531739a20eSAndy Ritger     NvU32                              gfid;
3541739a20eSAndy Ritger 
3551739a20eSAndy Ritger     NV_ASSERT_OK_OR_RETURN(vgpuGetCallingContextGfid(pGpu, &gfid));
3561739a20eSAndy Ritger 
3571739a20eSAndy Ritger     NV_ASSERT_OR_RETURN(pMemorySystemConfig->bOneToOneComptagLineAllocation || pMemorySystemConfig->bUseRawModeComptaglineAllocation,
3581739a20eSAndy Ritger         NV_ERR_INVALID_STATE);
3591739a20eSAndy Ritger 
3601739a20eSAndy Ritger     NV_CHECK_OR_RETURN(LEVEL_ERROR,
3611739a20eSAndy Ritger         !FLD_TEST_DRF(OS32, _ALLOC, _COMPTAG_OFFSET_USAGE, _FIXED, pFbAllocInfo->ctagOffset),
3621739a20eSAndy Ritger         NV_ERR_INVALID_ARGUMENT);
3631739a20eSAndy Ritger 
3641739a20eSAndy Ritger     // Failing the allocation if scrub on free is disabled
3651739a20eSAndy Ritger     if (!memmgrIsScrubOnFreeEnabled(pMemoryManager))
3661739a20eSAndy Ritger     {
3671739a20eSAndy Ritger         if (!(IS_SIMULATION(pGpu) || IsDFPGA(pGpu) || (IS_EMULATION(pGpu) && RMCFG_FEATURE_PLATFORM_MODS)
3681739a20eSAndy Ritger             ||(RMCFG_FEATURE_PLATFORM_WINDOWS && !pGpu->getProperty(pGpu, PDB_PROP_GPU_IN_TCC_MODE))
369758b4ee8SAndy Ritger             ||hypervisorIsVgxHyper()
370758b4ee8SAndy Ritger             ||IS_GFID_VF(gfid)
3711739a20eSAndy Ritger             ||(IsSLIEnabled(pGpu) && !(RMCFG_FEATURE_PLATFORM_WINDOWS &&
3721739a20eSAndy Ritger                                      !pGpu->getProperty(pGpu, PDB_PROP_GPU_IN_TCC_MODE))))
3731739a20eSAndy Ritger            )
3741739a20eSAndy Ritger         {
3751739a20eSAndy Ritger             NV_PRINTF(LEVEL_ERROR, "Compressible surfaces cannot be allocated on a system, "
3761739a20eSAndy Ritger                     "where scrub on free is disabled\n");
3771739a20eSAndy Ritger             return NV_ERR_INVALID_STATE;
3781739a20eSAndy Ritger         }
3791739a20eSAndy Ritger     }
3801739a20eSAndy Ritger     else if (pMemorySystemConfig->bOneToOneComptagLineAllocation)
3811739a20eSAndy Ritger     {
3821739a20eSAndy Ritger         NV_ASSERT_OR_RETURN(memmgrUseVasForCeMemoryOps(pMemoryManager), NV_ERR_INVALID_STATE);
3831739a20eSAndy Ritger     }
3841739a20eSAndy Ritger 
3851739a20eSAndy Ritger     FB_SET_HWRESID_CTAGID_FERMI(pFbAllocInfo->hwResId, FB_HWRESID_CTAGID_VAL_FERMI(-1));
3861739a20eSAndy Ritger     *pRetAttr = FLD_SET_DRF(OS32, _ATTR, _COMPR, _REQUIRED, *pRetAttr);
3871739a20eSAndy Ritger     return NV_OK;
3881739a20eSAndy Ritger }
3891739a20eSAndy Ritger 
3901739a20eSAndy Ritger /*!
3911739a20eSAndy Ritger  * @brief Initializes static config data from the Physical side.
3921739a20eSAndy Ritger  * @param[in]  pGpu pointer to the GPU instance.
3931739a20eSAndy Ritger  * @param[in]  pKernelMemorySystem pointer to the kernel side KernelMemorySystem instance.
3941739a20eSAndy Ritger  * @param[out] pConfig pointer to the static config init on Physical driver.
3951739a20eSAndy Ritger  *
3961739a20eSAndy Ritger  * @return NV_OK upon success.
3971739a20eSAndy Ritger  *         NV_ERR* otherwise.
3981739a20eSAndy Ritger  */
3991739a20eSAndy Ritger NV_STATUS
kmemsysInitStaticConfig_KERNEL(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem,MEMORY_SYSTEM_STATIC_CONFIG * pConfig)4001739a20eSAndy Ritger kmemsysInitStaticConfig_KERNEL
4011739a20eSAndy Ritger (
4021739a20eSAndy Ritger     OBJGPU *pGpu,
4031739a20eSAndy Ritger     KernelMemorySystem *pKernelMemorySystem,
4041739a20eSAndy Ritger     MEMORY_SYSTEM_STATIC_CONFIG *pConfig
4051739a20eSAndy Ritger )
4061739a20eSAndy Ritger {
4071739a20eSAndy Ritger     RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
4081739a20eSAndy Ritger 
40991676d66SBernhard Stoeckner     return pRmApi->Control(pRmApi, pGpu->hInternalClient, pGpu->hInternalSubdevice,
4101739a20eSAndy Ritger                            NV2080_CTRL_CMD_INTERNAL_MEMSYS_GET_STATIC_CONFIG,
4111739a20eSAndy Ritger                            pConfig, sizeof(*pConfig));
4121739a20eSAndy Ritger }
4131739a20eSAndy Ritger 
4141739a20eSAndy Ritger /*!
4151739a20eSAndy Ritger  * @brief   Function to map swizzId to mem size given total mem
4161739a20eSAndy Ritger  *
4171739a20eSAndy Ritger  * @param[IN]   pGpu
4181739a20eSAndy Ritger  * @param[IN]   pKernelMemorySystem
4191739a20eSAndy Ritger  * @param[IN]   swizzId
4201739a20eSAndy Ritger  * @param[IN]   totalRange          total memory range
4211739a20eSAndy Ritger  * @param[OUT]  pPartitionSizeFlag  Flag stating partition memory size
4221739a20eSAndy Ritger  * @param[OUT]  pSizeInBytes        Memory size in bytes supported by partition
4231739a20eSAndy Ritger  */
4241739a20eSAndy Ritger NV_STATUS
kmemsysSwizzIdToMIGMemSize_IMPL(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem,NvU32 swizzId,NV_RANGE totalRange,NvU32 * pPartitionSizeFlag,NvU64 * pSizeInBytes)4251739a20eSAndy Ritger kmemsysSwizzIdToMIGMemSize_IMPL
4261739a20eSAndy Ritger (
4271739a20eSAndy Ritger     OBJGPU *pGpu,
4281739a20eSAndy Ritger     KernelMemorySystem *pKernelMemorySystem,
4291739a20eSAndy Ritger     NvU32 swizzId,
4301739a20eSAndy Ritger     NV_RANGE totalRange,
4311739a20eSAndy Ritger     NvU32 *pPartitionSizeFlag,
4321739a20eSAndy Ritger     NvU64 *pSizeInBytes
4331739a20eSAndy Ritger )
4341739a20eSAndy Ritger {
4351739a20eSAndy Ritger     //
4361739a20eSAndy Ritger     // To handle the straddling issue we always consider memory for different
4371739a20eSAndy Ritger     // swizzIds as addition of minimum sized segements allowed in partitioning
4381739a20eSAndy Ritger     //
4391739a20eSAndy Ritger     NvU64 memSize = rangeLength(totalRange) / KMIGMGR_MAX_GPU_INSTANCES;
4401739a20eSAndy Ritger 
4411739a20eSAndy Ritger     switch (swizzId)
4421739a20eSAndy Ritger     {
4431739a20eSAndy Ritger         case 0:
4441739a20eSAndy Ritger         {
4451739a20eSAndy Ritger             *pSizeInBytes = memSize * KMIGMGR_MAX_GPU_INSTANCES;
4461739a20eSAndy Ritger             *pPartitionSizeFlag = DRF_DEF(2080_CTRL_GPU, _PARTITION_FLAG, _MEMORY_SIZE, _FULL);
4471739a20eSAndy Ritger             break;
4481739a20eSAndy Ritger         }
4491739a20eSAndy Ritger 
4501739a20eSAndy Ritger         case 1:
4511739a20eSAndy Ritger         case 2:
4521739a20eSAndy Ritger         {
4531739a20eSAndy Ritger             *pSizeInBytes = (memSize * (KMIGMGR_MAX_GPU_INSTANCES / 2));
4541739a20eSAndy Ritger             *pPartitionSizeFlag = DRF_DEF(2080_CTRL_GPU, _PARTITION_FLAG, _MEMORY_SIZE, _HALF);
4551739a20eSAndy Ritger             break;
4561739a20eSAndy Ritger         }
4571739a20eSAndy Ritger 
4581739a20eSAndy Ritger         case 3:
4591739a20eSAndy Ritger         case 4:
4601739a20eSAndy Ritger         case 5:
4611739a20eSAndy Ritger         case 6:
4621739a20eSAndy Ritger         {
4631739a20eSAndy Ritger             *pSizeInBytes = (memSize * (KMIGMGR_MAX_GPU_INSTANCES / 4));
4641739a20eSAndy Ritger             *pPartitionSizeFlag = DRF_DEF(2080_CTRL_GPU, _PARTITION_FLAG, _MEMORY_SIZE, _QUARTER);
4651739a20eSAndy Ritger             break;
4661739a20eSAndy Ritger         }
4671739a20eSAndy Ritger 
4681739a20eSAndy Ritger         case 7:
4691739a20eSAndy Ritger         case 8:
4701739a20eSAndy Ritger         case 9:
4711739a20eSAndy Ritger         case 10:
4721739a20eSAndy Ritger         case 11:
4731739a20eSAndy Ritger         case 12:
4741739a20eSAndy Ritger         case 13:
4751739a20eSAndy Ritger         case 14:
4761739a20eSAndy Ritger         {
4771739a20eSAndy Ritger             *pSizeInBytes = memSize;
4781739a20eSAndy Ritger             *pPartitionSizeFlag = DRF_DEF(2080_CTRL_GPU, _PARTITION_FLAG, _MEMORY_SIZE, _EIGHTH);
4791739a20eSAndy Ritger             break;
4801739a20eSAndy Ritger         }
4811739a20eSAndy Ritger 
4821739a20eSAndy Ritger         default:
4831739a20eSAndy Ritger         {
4841739a20eSAndy Ritger             NV_PRINTF(LEVEL_ERROR, "Unsupported SwizzId %d\n", swizzId);
4851739a20eSAndy Ritger             DBG_BREAKPOINT();
4861739a20eSAndy Ritger             return NV_ERR_INVALID_ARGUMENT;
4871739a20eSAndy Ritger         }
4881739a20eSAndy Ritger     }
4891739a20eSAndy Ritger 
4901739a20eSAndy Ritger     if ((*pSizeInBytes == 0) &&
4911739a20eSAndy Ritger         !pGpu->getProperty(pGpu, PDB_PROP_GPU_ZERO_FB) &&
4921739a20eSAndy Ritger         !pGpu->getProperty(pGpu, PDB_PROP_GPU_BROKEN_FB))
4931739a20eSAndy Ritger     {
4941739a20eSAndy Ritger         NV_PRINTF(LEVEL_ERROR, "Insufficient memory\n");
4951739a20eSAndy Ritger         DBG_BREAKPOINT();
4961739a20eSAndy Ritger         return NV_ERR_INSUFFICIENT_RESOURCES;
4971739a20eSAndy Ritger     }
4981739a20eSAndy Ritger     return NV_OK;
4991739a20eSAndy Ritger }
5001739a20eSAndy Ritger 
5011739a20eSAndy Ritger /*!
5021739a20eSAndy Ritger  * @brief   Function to map swizzId to mem range given total range
5031739a20eSAndy Ritger  */
5041739a20eSAndy Ritger NV_STATUS
kmemsysSwizzIdToMIGMemRange_IMPL(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem,NvU32 swizzId,NV_RANGE totalRange,NV_RANGE * pAddrRange)5051739a20eSAndy Ritger kmemsysSwizzIdToMIGMemRange_IMPL
5061739a20eSAndy Ritger (
5071739a20eSAndy Ritger     OBJGPU *pGpu,
5081739a20eSAndy Ritger     KernelMemorySystem *pKernelMemorySystem,
5091739a20eSAndy Ritger     NvU32 swizzId,
5101739a20eSAndy Ritger     NV_RANGE totalRange,
5111739a20eSAndy Ritger     NV_RANGE *pAddrRange
5121739a20eSAndy Ritger )
5131739a20eSAndy Ritger {
5141739a20eSAndy Ritger     KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
5151739a20eSAndy Ritger     NV_STATUS rmStatus = NV_OK;
5161739a20eSAndy Ritger     NvU32 memSizeFlag = 0;
5171739a20eSAndy Ritger     NvU32 minSwizzId = 0;
5181739a20eSAndy Ritger     NvU64 unalignedStartAddr = 0;
5191739a20eSAndy Ritger     NvU64 memSize = 0;
5201739a20eSAndy Ritger     NV_RANGE swizzIdRange = NV_RANGE_EMPTY;
5211739a20eSAndy Ritger 
5221739a20eSAndy Ritger     NV_ASSERT_OR_RETURN(!rangeIsEmpty(totalRange), NV_ERR_INVALID_ARGUMENT);
5231739a20eSAndy Ritger 
5241739a20eSAndy Ritger     // Get SwizzId to size mapping
5251739a20eSAndy Ritger     NV_ASSERT_OK_OR_RETURN(
5261739a20eSAndy Ritger         kmemsysSwizzIdToMIGMemSize(pGpu, pKernelMemorySystem, swizzId, totalRange, &memSizeFlag, &memSize));
5271739a20eSAndy Ritger 
5281739a20eSAndy Ritger     swizzIdRange = kmigmgrMemSizeFlagToSwizzIdRange_HAL(pGpu, pKernelMIGManager, memSizeFlag);
5291739a20eSAndy Ritger     NV_ASSERT_OR_RETURN(!rangeIsEmpty(swizzIdRange), NV_ERR_INVALID_ARGUMENT);
5301739a20eSAndy Ritger 
5311739a20eSAndy Ritger     minSwizzId = swizzIdRange.lo;
5321739a20eSAndy Ritger 
5331739a20eSAndy Ritger     unalignedStartAddr = (totalRange.lo + (memSize * (swizzId - minSwizzId)));
5341739a20eSAndy Ritger     *pAddrRange = rangeMake(unalignedStartAddr, unalignedStartAddr + memSize - 1);
5351739a20eSAndy Ritger 
5361739a20eSAndy Ritger     return rmStatus;
5371739a20eSAndy Ritger }
5381739a20eSAndy Ritger 
5391739a20eSAndy Ritger /*!
5401739a20eSAndy Ritger  * @brief   Function to return GPU instance memory address range
5411739a20eSAndy Ritger  */
5421739a20eSAndy Ritger NV_STATUS
kmemsysGetMIGGPUInstanceMemInfo_IMPL(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem,NvU32 swizzId,NV_RANGE * pAddrRange)5431739a20eSAndy Ritger kmemsysGetMIGGPUInstanceMemInfo_IMPL
5441739a20eSAndy Ritger (
5451739a20eSAndy Ritger     OBJGPU *pGpu,
5461739a20eSAndy Ritger     KernelMemorySystem *pKernelMemorySystem,
5471739a20eSAndy Ritger     NvU32 swizzId,
5481739a20eSAndy Ritger     NV_RANGE *pAddrRange
5491739a20eSAndy Ritger )
5501739a20eSAndy Ritger {
5511739a20eSAndy Ritger     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
5521739a20eSAndy Ritger     NvU64 vmmuSegmentSize;
5531739a20eSAndy Ritger     NvU64 startAddr;
5541739a20eSAndy Ritger     NvU64 endAddr;
555b5bf85a8SAndy Ritger     NvU64 partitionSize;
5561739a20eSAndy Ritger 
5571739a20eSAndy Ritger     NV_ASSERT_OR_RETURN(pAddrRange != NULL, NV_ERR_INVALID_ARGUMENT);
5581739a20eSAndy Ritger     *pAddrRange = NV_RANGE_EMPTY;
5591739a20eSAndy Ritger     NV_ASSERT_OR_RETURN(swizzId < KMIGMGR_MAX_GPU_SWIZZID, NV_ERR_INVALID_STATE);
5601739a20eSAndy Ritger 
5611739a20eSAndy Ritger     // Not supported in vGPU or ZERO_FB configs
5621739a20eSAndy Ritger     NV_CHECK_OR_RETURN(LEVEL_SILENT,
5631739a20eSAndy Ritger                        !(IS_VIRTUAL(pGpu) || (pGpu->getProperty(pGpu, PDB_PROP_GPU_ZERO_FB))),
5641739a20eSAndy Ritger                        NV_OK);
5651739a20eSAndy Ritger 
5661739a20eSAndy Ritger     //
5671739a20eSAndy Ritger     // VMMU not supported in AMODEL. Use legacy swizz-ID calculation instead of relying on vMMU segments
5681739a20eSAndy Ritger     // to calculate address range
5691739a20eSAndy Ritger     //
5701739a20eSAndy Ritger     if (IsAMODEL(pGpu))
5711739a20eSAndy Ritger     {
5721739a20eSAndy Ritger         NV_RANGE partitionableMemoryRange = memmgrGetMIGPartitionableMemoryRange(pGpu, pMemoryManager);
5731739a20eSAndy Ritger         return kmemsysSwizzIdToMIGMemRange(pGpu, pKernelMemorySystem, swizzId, partitionableMemoryRange, pAddrRange);
5741739a20eSAndy Ritger     }
5751739a20eSAndy Ritger 
5761739a20eSAndy Ritger     // Get the VMMU segment size
5771739a20eSAndy Ritger     vmmuSegmentSize = gpuGetVmmuSegmentSize(pGpu);
5781739a20eSAndy Ritger     NV_ASSERT_OR_RETURN((vmmuSegmentSize != 0), NV_ERR_INVALID_STATE);
5791739a20eSAndy Ritger 
5801739a20eSAndy Ritger     startAddr = pKernelMemorySystem->gpuInstanceMemConfig[swizzId].startingVmmuSegment * vmmuSegmentSize;
581b5bf85a8SAndy Ritger     partitionSize = pKernelMemorySystem->gpuInstanceMemConfig[swizzId].memSizeInVmmuSegment * vmmuSegmentSize;
582b5bf85a8SAndy Ritger 
583b5bf85a8SAndy Ritger     if (osNumaOnliningEnabled(pGpu->pOsGpuInfo))
584b5bf85a8SAndy Ritger     {
585b5bf85a8SAndy Ritger         NvU64 memblockSize;
586b5bf85a8SAndy Ritger         NvU64 alignedStartAddr;
587b5bf85a8SAndy Ritger 
588b5bf85a8SAndy Ritger         NV_ASSERT_OK_OR_RETURN(osNumaMemblockSize(&memblockSize));
589b5bf85a8SAndy Ritger 
590b5bf85a8SAndy Ritger         //
591b5bf85a8SAndy Ritger         // Align the partition start address and size to memblock size
592b5bf85a8SAndy Ritger         // Some FB memory is wasted here if it is not already aligned.
593b5bf85a8SAndy Ritger         //
594b5bf85a8SAndy Ritger         alignedStartAddr = NV_ALIGN_UP64(startAddr, memblockSize);
59591676d66SBernhard Stoeckner 
59691676d66SBernhard Stoeckner         if(pKernelMemorySystem->bNumaMigPartitionSizeEnumerated)
59791676d66SBernhard Stoeckner         {
59891676d66SBernhard Stoeckner             partitionSize = pKernelMemorySystem->numaMigPartitionSize[swizzId];
59991676d66SBernhard Stoeckner         }
60091676d66SBernhard Stoeckner         else
60191676d66SBernhard Stoeckner         {
602b5bf85a8SAndy Ritger             partitionSize -= (alignedStartAddr - startAddr);
60391676d66SBernhard Stoeckner         }
60491676d66SBernhard Stoeckner 
605b5bf85a8SAndy Ritger         partitionSize = NV_ALIGN_DOWN64(partitionSize, memblockSize);
60691676d66SBernhard Stoeckner         startAddr = alignedStartAddr;
607b5bf85a8SAndy Ritger     }
608b5bf85a8SAndy Ritger 
609b5bf85a8SAndy Ritger     endAddr = startAddr + partitionSize - 1;
610b5bf85a8SAndy Ritger 
6111739a20eSAndy Ritger     *pAddrRange = rangeMake(startAddr, endAddr);
6121739a20eSAndy Ritger 
6131739a20eSAndy Ritger     return NV_OK;
6141739a20eSAndy Ritger }
6151739a20eSAndy Ritger 
61691676d66SBernhard Stoeckner /**
61791676d66SBernhard Stoeckner  * @brief Modifies numaMigPartitionSize array such that memory size of
61891676d66SBernhard Stoeckner           all the mig partitions with swizzId between startSwizzId and
61991676d66SBernhard Stoeckner           endSwizzId is assigned the minimum value among all partition's
62091676d66SBernhard Stoeckner           memory size.
62191676d66SBernhard Stoeckner  *
62291676d66SBernhard Stoeckner  * @param[IN]      pKernelMemorySystem
62391676d66SBernhard Stoeckner  * @param[IN]      startSwizzId
62491676d66SBernhard Stoeckner  * @param[IN]      endSwizzId
62591676d66SBernhard Stoeckner  *
62691676d66SBernhard Stoeckner  */
62791676d66SBernhard Stoeckner static void
_kmemsysSetNumaMigPartitionSizeSubArrayToMinimumValue(KernelMemorySystem * pKernelMemorySystem,NvU64 startSwizzId,NvU64 endSwizzId)62891676d66SBernhard Stoeckner _kmemsysSetNumaMigPartitionSizeSubArrayToMinimumValue
62991676d66SBernhard Stoeckner (
63091676d66SBernhard Stoeckner     KernelMemorySystem *pKernelMemorySystem,
63191676d66SBernhard Stoeckner     NvU64 startSwizzId,
63291676d66SBernhard Stoeckner     NvU64 endSwizzId
63391676d66SBernhard Stoeckner )
63491676d66SBernhard Stoeckner {
63591676d66SBernhard Stoeckner     NvU64 minPartitionSize = pKernelMemorySystem->numaMigPartitionSize[startSwizzId];
63691676d66SBernhard Stoeckner     NvU64 index;
63791676d66SBernhard Stoeckner 
63891676d66SBernhard Stoeckner     for (index = startSwizzId; index <= endSwizzId; index++)
63991676d66SBernhard Stoeckner     {
64091676d66SBernhard Stoeckner         if(pKernelMemorySystem->numaMigPartitionSize[index] < minPartitionSize)
64191676d66SBernhard Stoeckner             minPartitionSize = pKernelMemorySystem->numaMigPartitionSize[index];
64291676d66SBernhard Stoeckner     }
64391676d66SBernhard Stoeckner 
64491676d66SBernhard Stoeckner     for (index = startSwizzId; index <= endSwizzId; index++)
64591676d66SBernhard Stoeckner     {
64691676d66SBernhard Stoeckner         pKernelMemorySystem->numaMigPartitionSize[index] = minPartitionSize;
64791676d66SBernhard Stoeckner     }
64891676d66SBernhard Stoeckner }
64991676d66SBernhard Stoeckner 
6501739a20eSAndy Ritger /*!
6511739a20eSAndy Ritger  * @brief   Function to populate static GPU instance memory config which will be
6521739a20eSAndy Ritger  *          utilized for GPU instance memory query and memory allocation
6531739a20eSAndy Ritger  */
6541739a20eSAndy Ritger NV_STATUS
kmemsysPopulateMIGGPUInstanceMemConfig_KERNEL(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem)6551739a20eSAndy Ritger kmemsysPopulateMIGGPUInstanceMemConfig_KERNEL
6561739a20eSAndy Ritger (
6571739a20eSAndy Ritger     OBJGPU *pGpu,
6581739a20eSAndy Ritger     KernelMemorySystem *pKernelMemorySystem
6591739a20eSAndy Ritger )
6601739a20eSAndy Ritger {
6611739a20eSAndy Ritger     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
6621739a20eSAndy Ritger     NV_RANGE partitionableMemoryRange = memmgrGetMIGPartitionableMemoryRange(pGpu, pMemoryManager);
6631739a20eSAndy Ritger     KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
6641739a20eSAndy Ritger     NvU64 vmmuSegmentSize;
6651739a20eSAndy Ritger     NvU64 totalVmmuSegments;
6661739a20eSAndy Ritger     NvU64 alignedStartAddr;
6671739a20eSAndy Ritger     NvU64 alignedEndAddr;
6681739a20eSAndy Ritger     NvU32 swizzId;
6691739a20eSAndy Ritger 
6701739a20eSAndy Ritger     // Not needed in vGPU or zero_fb configs
6711739a20eSAndy Ritger     NV_CHECK_OR_RETURN(LEVEL_SILENT,
6721739a20eSAndy Ritger                        !(IS_VIRTUAL(pGpu) || (pGpu->getProperty(pGpu, PDB_PROP_GPU_ZERO_FB))),
6731739a20eSAndy Ritger                        NV_OK);
6741739a20eSAndy Ritger 
6751739a20eSAndy Ritger     // Nothing to do if MIG is not supported
6761739a20eSAndy Ritger     NV_CHECK_OR_RETURN(LEVEL_SILENT, kmigmgrIsMIGSupported(pGpu, pKernelMIGManager), NV_OK);
6771739a20eSAndy Ritger 
6781739a20eSAndy Ritger     // Get the VMMU segment size
6791739a20eSAndy Ritger     vmmuSegmentSize = gpuGetVmmuSegmentSize(pGpu);
6801739a20eSAndy Ritger     NV_ASSERT_OR_RETURN((vmmuSegmentSize != 0), NV_ERR_INVALID_STATE);
6811739a20eSAndy Ritger 
6821739a20eSAndy Ritger     alignedStartAddr = partitionableMemoryRange.lo;
6831739a20eSAndy Ritger     alignedEndAddr = partitionableMemoryRange.hi;
6841739a20eSAndy Ritger     if (alignedStartAddr != 0)
6851739a20eSAndy Ritger     {
6861739a20eSAndy Ritger         alignedStartAddr = NV_IS_ALIGNED64(alignedStartAddr, vmmuSegmentSize) ?
6871739a20eSAndy Ritger                            alignedStartAddr + vmmuSegmentSize :
6881739a20eSAndy Ritger                            NV_ALIGN_UP64(alignedStartAddr, vmmuSegmentSize);
6891739a20eSAndy Ritger     }
6901739a20eSAndy Ritger 
6911739a20eSAndy Ritger     if (NV_IS_ALIGNED64(alignedEndAddr + 1, vmmuSegmentSize))
6921739a20eSAndy Ritger     {
6931739a20eSAndy Ritger         alignedEndAddr = alignedEndAddr - vmmuSegmentSize;
6941739a20eSAndy Ritger     }
6951739a20eSAndy Ritger 
6961739a20eSAndy Ritger     totalVmmuSegments = (alignedEndAddr - alignedStartAddr + 1) / vmmuSegmentSize;
6971739a20eSAndy Ritger     for (swizzId = 0; swizzId < KMIGMGR_MAX_GPU_SWIZZID; swizzId++)
6981739a20eSAndy Ritger     {
6991739a20eSAndy Ritger         NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
7001739a20eSAndy Ritger             kmemsysSwizzIdToVmmuSegmentsRange_HAL(pGpu, pKernelMemorySystem, swizzId, vmmuSegmentSize, totalVmmuSegments));
7011739a20eSAndy Ritger     }
7021739a20eSAndy Ritger 
70391676d66SBernhard Stoeckner     if (osNumaOnliningEnabled(pGpu->pOsGpuInfo))
70491676d66SBernhard Stoeckner     {
70591676d66SBernhard Stoeckner         NV_RANGE addrRange = NV_RANGE_EMPTY;
70691676d66SBernhard Stoeckner         NvU32 memSize;
70791676d66SBernhard Stoeckner 
70891676d66SBernhard Stoeckner         for(swizzId = 0; swizzId < KMIGMGR_MAX_GPU_SWIZZID; swizzId++)
70991676d66SBernhard Stoeckner         {
71091676d66SBernhard Stoeckner             kmemsysGetMIGGPUInstanceMemInfo(pGpu, pKernelMemorySystem, swizzId, &addrRange);
71191676d66SBernhard Stoeckner             pKernelMemorySystem->numaMigPartitionSize[swizzId] = addrRange.hi - addrRange.lo + 1;
71291676d66SBernhard Stoeckner         }
71391676d66SBernhard Stoeckner 
71491676d66SBernhard Stoeckner         //
71591676d66SBernhard Stoeckner         // In GH180 for all the swizzId's for a given memory profile (FULL, HALF, QUARTER
71691676d66SBernhard Stoeckner         // and EIGHTH partitions) might not be same. Modify numaMigPartitionSize array
71791676d66SBernhard Stoeckner         // for the partition size to be constant for a given profile. BUG 4284299.
71891676d66SBernhard Stoeckner         //
71991676d66SBernhard Stoeckner         for (memSize = NV2080_CTRL_GPU_PARTITION_FLAG_MEMORY_SIZE_FULL; memSize < NV2080_CTRL_GPU_PARTITION_FLAG_MEMORY_SIZE__SIZE; memSize++)
72091676d66SBernhard Stoeckner         {
72191676d66SBernhard Stoeckner             NV_RANGE swizzRange = kmigmgrMemSizeFlagToSwizzIdRange(pGpu, pKernelMIGManager,
72291676d66SBernhard Stoeckner                                       DRF_NUM(2080_CTRL_GPU, _PARTITION_FLAG, _MEMORY_SIZE, memSize));
72391676d66SBernhard Stoeckner             _kmemsysSetNumaMigPartitionSizeSubArrayToMinimumValue(pKernelMemorySystem, swizzRange.lo, swizzRange.hi);
72491676d66SBernhard Stoeckner         }
72591676d66SBernhard Stoeckner         pKernelMemorySystem->bNumaMigPartitionSizeEnumerated = NV_TRUE;
72691676d66SBernhard Stoeckner     }
7271739a20eSAndy Ritger     return NV_OK;
7281739a20eSAndy Ritger }
7291739a20eSAndy Ritger 
7301739a20eSAndy Ritger /*!
7311739a20eSAndy Ritger  * @brief Gets GPU instance memory configuration based on swizzId
7321739a20eSAndy Ritger  */
7331739a20eSAndy Ritger NV_STATUS
kmemsysGetMIGGPUInstanceMemConfigFromSwizzId_IMPL(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem,NvU32 swizzId,const MIG_GPU_INSTANCE_MEMORY_CONFIG ** ppGPUInstanceMemConfig)7341739a20eSAndy Ritger kmemsysGetMIGGPUInstanceMemConfigFromSwizzId_IMPL
7351739a20eSAndy Ritger (
7361739a20eSAndy Ritger     OBJGPU *pGpu,
7371739a20eSAndy Ritger     KernelMemorySystem *pKernelMemorySystem,
7381739a20eSAndy Ritger     NvU32 swizzId,
7391739a20eSAndy Ritger     const MIG_GPU_INSTANCE_MEMORY_CONFIG **ppGPUInstanceMemConfig
7401739a20eSAndy Ritger )
7411739a20eSAndy Ritger {
7421739a20eSAndy Ritger     NV_ASSERT_OR_RETURN(swizzId < KMIGMGR_MAX_GPU_SWIZZID, NV_ERR_INVALID_ARGUMENT);
74391676d66SBernhard Stoeckner 
74491676d66SBernhard Stoeckner     if (IS_VIRTUAL(pGpu))
74591676d66SBernhard Stoeckner     {
74691676d66SBernhard Stoeckner         // VMMU Segment details are populated on Host and not Guest.
74791676d66SBernhard Stoeckner         return NV_ERR_NOT_SUPPORTED;
74891676d66SBernhard Stoeckner     }
74991676d66SBernhard Stoeckner 
7501739a20eSAndy Ritger     // MODS makes a control call to describe GPU instances before this is populated. Return invalid data anyways
7511739a20eSAndy Ritger     NV_ASSERT_OR_RETURN(pKernelMemorySystem->gpuInstanceMemConfig[swizzId].bInitialized, NV_ERR_INVALID_STATE);
7521739a20eSAndy Ritger 
7531739a20eSAndy Ritger     *ppGPUInstanceMemConfig = &pKernelMemorySystem->gpuInstanceMemConfig[swizzId];
7541739a20eSAndy Ritger     return NV_OK;
7551739a20eSAndy Ritger }
7561739a20eSAndy Ritger 
7571739a20eSAndy Ritger /*!
7581739a20eSAndy Ritger  * @brief Set GPU Instance memory config information and mark initialized
7591739a20eSAndy Ritger  */
7601739a20eSAndy Ritger NV_STATUS
kmemsysInitMIGGPUInstanceMemConfigForSwizzId_IMPL(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem,NvU32 swizzId,NvU64 startingVmmuSegment,NvU64 memSizeInVmmuSegment)7611739a20eSAndy Ritger kmemsysInitMIGGPUInstanceMemConfigForSwizzId_IMPL
7621739a20eSAndy Ritger (
7631739a20eSAndy Ritger     OBJGPU *pGpu,
7641739a20eSAndy Ritger     KernelMemorySystem *pKernelMemorySystem,
7651739a20eSAndy Ritger     NvU32 swizzId,
7661739a20eSAndy Ritger     NvU64 startingVmmuSegment,
7671739a20eSAndy Ritger     NvU64 memSizeInVmmuSegment
7681739a20eSAndy Ritger )
7691739a20eSAndy Ritger {
7701739a20eSAndy Ritger     NV_ASSERT_OR_RETURN(swizzId < KMIGMGR_MAX_GPU_SWIZZID, NV_ERR_INVALID_ARGUMENT);
7711739a20eSAndy Ritger 
7721739a20eSAndy Ritger     pKernelMemorySystem->gpuInstanceMemConfig[swizzId].startingVmmuSegment = startingVmmuSegment;
7731739a20eSAndy Ritger     pKernelMemorySystem->gpuInstanceMemConfig[swizzId].memSizeInVmmuSegment = memSizeInVmmuSegment;
7741739a20eSAndy Ritger     pKernelMemorySystem->gpuInstanceMemConfig[swizzId].bInitialized = NV_TRUE;
7751739a20eSAndy Ritger 
7761739a20eSAndy Ritger     NV_PRINTF(LEVEL_INFO,
7771739a20eSAndy Ritger         "GPU Instance Mem Config for swizzId = 0x%x : MemStartSegment = 0x%llx, MemSizeInSegments = 0x%llx\n",
7781739a20eSAndy Ritger         swizzId,
7791739a20eSAndy Ritger         pKernelMemorySystem->gpuInstanceMemConfig[swizzId].startingVmmuSegment,
7801739a20eSAndy Ritger         pKernelMemorySystem->gpuInstanceMemConfig[swizzId].memSizeInVmmuSegment);
7811739a20eSAndy Ritger 
7821739a20eSAndy Ritger     return NV_OK;
7831739a20eSAndy Ritger }
7841739a20eSAndy Ritger 
7851739a20eSAndy Ritger /*!
7861739a20eSAndy Ritger  * @brief Ensure that the sysmem flush sysmem buffer has been initialized
7871739a20eSAndy Ritger  *
7881739a20eSAndy Ritger  * Setting up the sysmem flush buffer needs to be done very early in some cases
7891739a20eSAndy Ritger  * as it's required for the GPU to perform a system flush. One such case is
7901739a20eSAndy Ritger  * resetting GPU FALCONs and in particular resetting the PMU as part of VBIOS
7911739a20eSAndy Ritger  * init.
7921739a20eSAndy Ritger  *
7931739a20eSAndy Ritger  * @returns NV_OK if the sysmem flush buffer has been initialized.
7941739a20eSAndy Ritger  */
7951739a20eSAndy Ritger NV_STATUS
kmemsysEnsureSysmemFlushBufferInitialized_IMPL(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem)7961739a20eSAndy Ritger kmemsysEnsureSysmemFlushBufferInitialized_IMPL
7971739a20eSAndy Ritger (
7981739a20eSAndy Ritger     OBJGPU             *pGpu,
7991739a20eSAndy Ritger     KernelMemorySystem *pKernelMemorySystem
8001739a20eSAndy Ritger )
8011739a20eSAndy Ritger {
8021739a20eSAndy Ritger     if (IS_VIRTUAL(pGpu)                                       ||
8031739a20eSAndy Ritger         IS_GSP_CLIENT(pGpu)                                    ||
8041739a20eSAndy Ritger         RMCFG_FEATURE_PLATFORM_GSP)
8051739a20eSAndy Ritger     {
8061739a20eSAndy Ritger         return NV_OK;
8071739a20eSAndy Ritger     }
8081739a20eSAndy Ritger 
8091739a20eSAndy Ritger     return kmemsysInitFlushSysmemBuffer_HAL(pGpu, pKernelMemorySystem);
8101739a20eSAndy Ritger }
8111739a20eSAndy Ritger 
8121739a20eSAndy Ritger /*!
8131739a20eSAndy Ritger  * @brief Handle sysmem NVLink/C2C, NUMA and ATS functionality
8141739a20eSAndy Ritger  *
8151739a20eSAndy Ritger  * @param[in] pGpu                OBJGPU pointer
8161739a20eSAndy Ritger  * @param[in] pKernelMemorySystem pointer to the kernel side KernelMemorySystem instance.
8171739a20eSAndy Ritger  * @param[in] bFlush              Whether the CPU cache of the GPU mapping
8181739a20eSAndy Ritger  *                                should be flushed
8191739a20eSAndy Ritger  *
8201739a20eSAndy Ritger  * @return  NV_OK on success
8211739a20eSAndy Ritger  */
8221739a20eSAndy Ritger NV_STATUS
kmemsysSetupCoherentCpuLink_IMPL(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem,NvBool bFlush)8231739a20eSAndy Ritger kmemsysSetupCoherentCpuLink_IMPL
8241739a20eSAndy Ritger (
8251739a20eSAndy Ritger     OBJGPU             *pGpu,
8261739a20eSAndy Ritger     KernelMemorySystem *pKernelMemorySystem,
8271739a20eSAndy Ritger     NvBool              bFlush
8281739a20eSAndy Ritger )
8291739a20eSAndy Ritger {
8301739a20eSAndy Ritger     KernelBus     *pKernelBus     = GPU_GET_KERNEL_BUS(pGpu);
8311739a20eSAndy Ritger     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
8321739a20eSAndy Ritger     NvU64          numaOnlineSize = 0;
8334397463eSAndy Ritger     NvU64          fbSize         = (pMemoryManager->Ram.fbTotalMemSizeMb << 20);
8341739a20eSAndy Ritger     NvU32          data32;
835758b4ee8SAndy Ritger     NvBool         bCpuMapping    = NV_TRUE; // Default enable
8364397463eSAndy Ritger     NvS32          numaNodeId     = NV0000_CTRL_NO_NUMA_NODE;
837eb5c7665SAndy Ritger     NvU64          memblockSize   = 0;
838eb5c7665SAndy Ritger     NvU64          numaOnlineBase = 0;
839eb5c7665SAndy Ritger     NvU64          rsvdFastSize   = 0;
840eb5c7665SAndy Ritger     NvU64          rsvdSlowSize   = 0;
841eb5c7665SAndy Ritger     NvU64          rsvdISOSize    = 0;
842eb5c7665SAndy Ritger     NvU64          totalRsvdBytes = 0;
8431739a20eSAndy Ritger 
8441739a20eSAndy Ritger     {
8451739a20eSAndy Ritger         NV_ASSERT_OK_OR_RETURN(kmemsysGetFbNumaInfo_HAL(pGpu, pKernelMemorySystem,
8461739a20eSAndy Ritger                                                         &pKernelMemorySystem->coherentCpuFbBase,
84791676d66SBernhard Stoeckner                                                         &pKernelMemorySystem->coherentRsvdFbBase,
8484397463eSAndy Ritger                                                         &numaNodeId));
8491739a20eSAndy Ritger         if (pKernelMemorySystem->coherentCpuFbBase != 0)
8501739a20eSAndy Ritger         {
851eb5c7665SAndy Ritger             if (gpuIsSelfHosted(pGpu))
852eb5c7665SAndy Ritger             {
853eb5c7665SAndy Ritger                 //
854eb5c7665SAndy Ritger                 // For self-hosted, coherentCpuFbEnd is only till the FB size
855eb5c7665SAndy Ritger                 // end and NOT till the FB AMAP end since self-hosted doesn't
856eb5c7665SAndy Ritger                 // support indirect peer and requires GPU nvlink for peer.
857eb5c7665SAndy Ritger                 //
858eb5c7665SAndy Ritger                 pKernelMemorySystem->coherentCpuFbEnd = pKernelMemorySystem->coherentCpuFbBase + fbSize;
859eb5c7665SAndy Ritger             }
860eb5c7665SAndy Ritger             else
8614397463eSAndy Ritger             {
86294eaea97SAndy Ritger                 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
86394eaea97SAndy Ritger                 NV2080_CTRL_INTERNAL_GET_COHERENT_FB_APERTURE_SIZE_PARAMS params = {0};
86494eaea97SAndy Ritger 
86594eaea97SAndy Ritger                 NV_ASSERT_OK_OR_RETURN(pRmApi->Control(pRmApi,
86694eaea97SAndy Ritger                                                        pGpu->hInternalClient,
86794eaea97SAndy Ritger                                                        pGpu->hInternalSubdevice,
86894eaea97SAndy Ritger                                                        NV2080_CTRL_CMD_INTERNAL_GET_COHERENT_FB_APERTURE_SIZE,
86994eaea97SAndy Ritger                                                        &params,
87094eaea97SAndy Ritger                                                        sizeof(NV2080_CTRL_INTERNAL_GET_COHERENT_FB_APERTURE_SIZE_PARAMS)));
8714397463eSAndy Ritger                 //
8724397463eSAndy Ritger                 // Indirect peer(uses P9 to reach other GV100) in P9+GV100 requires coherentCpuFbEnd to
8734397463eSAndy Ritger                 // also include the entire FB AMAP range even when FB size is less than the FB AMAP size.
8744397463eSAndy Ritger                 //
8751739a20eSAndy Ritger                 pKernelMemorySystem->coherentCpuFbEnd = pKernelMemorySystem->coherentCpuFbBase +
87694eaea97SAndy Ritger                                                         params.coherentFbApertureSize;
8771739a20eSAndy Ritger             }
8781739a20eSAndy Ritger         }
8794397463eSAndy Ritger     }
8801739a20eSAndy Ritger 
8811739a20eSAndy Ritger     if ((osReadRegistryDword(pGpu,
8821739a20eSAndy Ritger                              NV_REG_STR_OVERRIDE_GPU_NUMA_NODE_ID, &data32)) == NV_OK)
8831739a20eSAndy Ritger     {
8844397463eSAndy Ritger         numaNodeId = (NvS32)data32;
8854397463eSAndy Ritger         NV_PRINTF(LEVEL_ERROR, "Override GPU NUMA node ID %d!\n", numaNodeId);
8861739a20eSAndy Ritger     }
8871739a20eSAndy Ritger 
8881739a20eSAndy Ritger     // Parse regkey here
8891739a20eSAndy Ritger     if ((osReadRegistryDword(pGpu,
8901739a20eSAndy Ritger                              NV_REG_STR_RM_FORCE_BAR_PATH, &data32) == NV_OK) &&
8911739a20eSAndy Ritger         (data32 == 1))
8921739a20eSAndy Ritger     {
8931739a20eSAndy Ritger         NV_PRINTF(LEVEL_ERROR,
8941739a20eSAndy Ritger                   "Force disabling NVLINK/C2C mappings through regkey.\n");
8951739a20eSAndy Ritger 
8961739a20eSAndy Ritger         bCpuMapping = NV_FALSE;
8971739a20eSAndy Ritger     }
8981739a20eSAndy Ritger 
8991739a20eSAndy Ritger     if ((pKernelMemorySystem->coherentCpuFbBase == 0) || !bCpuMapping)
9001739a20eSAndy Ritger     {
9011739a20eSAndy Ritger         return NV_OK;
9021739a20eSAndy Ritger     }
903758b4ee8SAndy Ritger 
9044397463eSAndy Ritger     NV_ASSERT_OK_OR_RETURN(osNumaMemblockSize(&memblockSize));
9054397463eSAndy Ritger 
906eb5c7665SAndy Ritger     memmgrCalcReservedFbSpaceHal_HAL(pGpu, pMemoryManager, &rsvdFastSize, &rsvdSlowSize, &rsvdISOSize);
907eb5c7665SAndy Ritger 
9084397463eSAndy Ritger     //
909eb5c7665SAndy Ritger     // Calculate the size of the memory which can be safely onlined to the
910eb5c7665SAndy Ritger     // kernel after accounting for different reserved memory requirements.
911eb5c7665SAndy Ritger     //
912eb5c7665SAndy Ritger     // Align rsvd memory to 64K granularity.
913eb5c7665SAndy Ritger     // TODO : rsvdMemorySize is not finalized at this point of time in
914eb5c7665SAndy Ritger     // GH180, currently rsvdMemorySize is not increasing after this
915eb5c7665SAndy Ritger     // point. This needs to be fixed.
916eb5c7665SAndy Ritger     //
917eb5c7665SAndy Ritger     totalRsvdBytes += NV_ALIGN_UP(pMemoryManager->rsvdMemorySize, 0x10000);
918eb5c7665SAndy Ritger     totalRsvdBytes += (rsvdFastSize + rsvdSlowSize + rsvdISOSize);
919eb5c7665SAndy Ritger     totalRsvdBytes += pMemoryManager->Ram.reservedMemSize;
920eb5c7665SAndy Ritger 
92191676d66SBernhard Stoeckner     // For SRIOV guest, take into account FB tax paid on host side for each VF
92291676d66SBernhard Stoeckner     // This FB tax is non zero only for SRIOV guest RM environment.
92391676d66SBernhard Stoeckner     totalRsvdBytes += memmgrGetFbTaxSize_HAL(pGpu, pMemoryManager);
92491676d66SBernhard Stoeckner 
9254397463eSAndy Ritger     //
9264397463eSAndy Ritger     // TODO: make sure the onlineable memory is aligned to memblockSize
9274397463eSAndy Ritger     // Currently, if we have leftover memory, it'll just be wasted because no
9284397463eSAndy Ritger     // one can access it. If FB size itself is memblock size unaligned(because
9294397463eSAndy Ritger     // of CBC and row remapper deductions), then the memory wastage is unavoidable.
9304397463eSAndy Ritger     //
931eb5c7665SAndy Ritger     numaOnlineSize = NV_ALIGN_DOWN64(fbSize - totalRsvdBytes, memblockSize);
932eb5c7665SAndy Ritger 
93391676d66SBernhard Stoeckner     if (IS_PASSTHRU(pGpu) && pKernelMemorySystem->bBug3656943WAR)
93491676d66SBernhard Stoeckner     {
93591676d66SBernhard Stoeckner         // For passthrough case, reserved memory size is fixed as 1GB
93691676d66SBernhard Stoeckner         NvU64 rsvdSize = 1 * 1024 * 1024 * 1024;
93791676d66SBernhard Stoeckner 
93891676d66SBernhard Stoeckner         NV_ASSERT_OR_RETURN(rsvdSize >= totalRsvdBytes, NV_ERR_INVALID_STATE);
93991676d66SBernhard Stoeckner         totalRsvdBytes = rsvdSize;
94091676d66SBernhard Stoeckner         //
94191676d66SBernhard Stoeckner         // Aligning to hardcoded 512MB size as both host and guest need to use
94291676d66SBernhard Stoeckner         // the same alignment irrespective of the kernel page size. 512MB size
94391676d66SBernhard Stoeckner         // works for both 4K and 64K page size kernels but more memory is
94491676d66SBernhard Stoeckner         // wasted being part of non onlined region which can't be avoided
94591676d66SBernhard Stoeckner         // per the design.
94691676d66SBernhard Stoeckner         //
94791676d66SBernhard Stoeckner         numaOnlineSize = NV_ALIGN_DOWN64(fbSize - totalRsvdBytes, 512 * 1024 * 1024);
94891676d66SBernhard Stoeckner     }
94991676d66SBernhard Stoeckner 
95091676d66SBernhard Stoeckner 
951b5bf85a8SAndy Ritger     NV_PRINTF(LEVEL_INFO,
952b5bf85a8SAndy Ritger               "fbSize: 0x%llx NUMA reserved memory size: 0x%llx online memory size: 0x%llx\n",
953b5bf85a8SAndy Ritger               fbSize, totalRsvdBytes, numaOnlineSize);
954b5bf85a8SAndy Ritger     if (osNumaOnliningEnabled(pGpu->pOsGpuInfo))
955b5bf85a8SAndy Ritger     {
956eb5c7665SAndy Ritger         pKernelMemorySystem->numaOnlineBase   = numaOnlineBase;
957eb5c7665SAndy Ritger         pKernelMemorySystem->numaOnlineSize   = numaOnlineSize;
958eb5c7665SAndy Ritger         //
959337e28efSBernhard Stoeckner         // TODO: Bug 1945658: Soldier through on GPU memory add
960337e28efSBernhard Stoeckner         // failure(which is often possible because of missing auto online
961337e28efSBernhard Stoeckner         // setting) and instead check for failure on stateLoad.
962337e28efSBernhard Stoeckner         // Any failure in StateInit results in gpuStateDestroy not getting called.
963337e28efSBernhard Stoeckner         // kgspUnloadRm_IMPL from gpuStateDestroy also doesn't get called leaving
964337e28efSBernhard Stoeckner         // GSP in unclean state and requiring GPU reset to recover from that.
965eb5c7665SAndy Ritger         //
966337e28efSBernhard Stoeckner         // kmemsysNumaAddMemory_HAL by itself cannot be called from stateLoad
967337e28efSBernhard Stoeckner         // because the memory mapping that follows this call site comes from linear
968337e28efSBernhard Stoeckner         // kernel virtual address when memory is added to the kernel vs the
969337e28efSBernhard Stoeckner         // VMALLOC_START region when memory is not added.
970eb5c7665SAndy Ritger         //
971337e28efSBernhard Stoeckner         NV_ASSERT_OK(kmemsysNumaAddMemory_HAL(pGpu, pKernelMemorySystem, 0, 0,
972337e28efSBernhard Stoeckner                                               numaOnlineSize, &numaNodeId));
973b5bf85a8SAndy Ritger     }
9744397463eSAndy Ritger     pGpu->numaNodeId = numaNodeId;
9754397463eSAndy Ritger 
976eb5c7665SAndy Ritger     NV_ASSERT_OK_OR_RETURN(kbusCreateCoherentCpuMapping_HAL(pGpu, pKernelBus, numaOnlineSize, bFlush));
9771739a20eSAndy Ritger 
9781739a20eSAndy Ritger     // Switch the toggle for coherent link mapping only if migration is successful
9791739a20eSAndy Ritger     pGpu->setProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING, NV_TRUE);
9801739a20eSAndy Ritger 
981eb5c7665SAndy Ritger     NV_ASSERT_OK_OR_RETURN(kbusVerifyCoherentLink_HAL(pGpu, pKernelBus));
9821739a20eSAndy Ritger 
9831739a20eSAndy Ritger     return NV_OK;
9841739a20eSAndy Ritger }
9851739a20eSAndy Ritger 
9861739a20eSAndy Ritger /*!
9871739a20eSAndy Ritger  * @brief Teardown sysmem NVLink/C2C NUMA and ATS functionality
9881739a20eSAndy Ritger  *
9891739a20eSAndy Ritger  * @param[in] pGpu                 OBJGPU pointer
9901739a20eSAndy Ritger  * @param[in] pKernelMemorySystem  Kernel Memory System pointer
9911739a20eSAndy Ritger  * @param[in] bFlush               Whether the CPU cache of the GPU mapping
9921739a20eSAndy Ritger  *                                 should be flushed
9931739a20eSAndy Ritger  */
9941739a20eSAndy Ritger void
kmemsysTeardownCoherentCpuLink_IMPL(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem,NvBool bFlush)9951739a20eSAndy Ritger kmemsysTeardownCoherentCpuLink_IMPL
9961739a20eSAndy Ritger (
9971739a20eSAndy Ritger     OBJGPU            *pGpu,
9981739a20eSAndy Ritger     KernelMemorySystem *pKernelMemorySystem,
9991739a20eSAndy Ritger     NvBool             bFlush
10001739a20eSAndy Ritger )
10011739a20eSAndy Ritger {
10021739a20eSAndy Ritger     kbusTeardownCoherentCpuMapping_HAL(pGpu, GPU_GET_KERNEL_BUS(pGpu), bFlush);
10031739a20eSAndy Ritger     pGpu->setProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING, NV_FALSE);
10041739a20eSAndy Ritger }
10051739a20eSAndy Ritger 
10061739a20eSAndy Ritger NV_STATUS
kmemsysSendL2InvalidateEvict_IMPL(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem,NvU32 flags)10071739a20eSAndy Ritger kmemsysSendL2InvalidateEvict_IMPL
10081739a20eSAndy Ritger (
10091739a20eSAndy Ritger     OBJGPU             *pGpu,
10101739a20eSAndy Ritger     KernelMemorySystem *pKernelMemorySystem,
10111739a20eSAndy Ritger     NvU32               flags)
10121739a20eSAndy Ritger {
10131739a20eSAndy Ritger     RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
10141739a20eSAndy Ritger     NV2080_CTRL_INTERNAL_MEMSYS_L2_INVALIDATE_EVICT_PARAMS params = {0};
10151739a20eSAndy Ritger 
10161739a20eSAndy Ritger     params.flags = flags;
10171739a20eSAndy Ritger 
10181739a20eSAndy Ritger     return pRmApi->Control(pRmApi, pGpu->hInternalClient, pGpu->hInternalSubdevice,
10191739a20eSAndy Ritger                            NV2080_CTRL_CMD_INTERNAL_MEMSYS_L2_INVALIDATE_EVICT,
10201739a20eSAndy Ritger                            &params, sizeof(params));
10211739a20eSAndy Ritger }
10221739a20eSAndy Ritger 
10231739a20eSAndy Ritger NV_STATUS
kmemsysSendFlushL2AllRamsAndCaches_IMPL(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem)10241739a20eSAndy Ritger kmemsysSendFlushL2AllRamsAndCaches_IMPL
10251739a20eSAndy Ritger (
10261739a20eSAndy Ritger     OBJGPU             *pGpu,
10271739a20eSAndy Ritger     KernelMemorySystem *pKernelMemorySystem
10281739a20eSAndy Ritger )
10291739a20eSAndy Ritger {
10301739a20eSAndy Ritger     RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
10311739a20eSAndy Ritger 
10321739a20eSAndy Ritger     return pRmApi->Control(pRmApi, pGpu->hInternalClient, pGpu->hInternalSubdevice,
10331739a20eSAndy Ritger                            NV2080_CTRL_CMD_INTERNAL_MEMSYS_FLUSH_L2_ALL_RAMS_AND_CACHES,
10341739a20eSAndy Ritger                            NULL, 0);
10351739a20eSAndy Ritger }
10361739a20eSAndy Ritger 
10371739a20eSAndy Ritger NV_STATUS
kmemsysGetUsableFbSize_KERNEL(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem,NvU64 * pFbSize)10381739a20eSAndy Ritger kmemsysGetUsableFbSize_KERNEL
10391739a20eSAndy Ritger (
10401739a20eSAndy Ritger     OBJGPU               *pGpu,
10411739a20eSAndy Ritger     KernelMemorySystem   *pKernelMemorySystem,
10421739a20eSAndy Ritger     NvU64                *pFbSize
10431739a20eSAndy Ritger )
10441739a20eSAndy Ritger {
10451739a20eSAndy Ritger     return kmemsysReadUsableFbSize_HAL(pGpu, pKernelMemorySystem, pFbSize);
10461739a20eSAndy Ritger }
10474397463eSAndy Ritger 
104891676d66SBernhard Stoeckner NV_STATUS
kmemsysStateLoad_VF(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem,NvU32 flags)104991676d66SBernhard Stoeckner kmemsysStateLoad_VF(OBJGPU *pGpu, KernelMemorySystem *pKernelMemorySystem, NvU32 flags)
105091676d66SBernhard Stoeckner {
105191676d66SBernhard Stoeckner     NV_STATUS status = NV_OK;
105291676d66SBernhard Stoeckner 
105391676d66SBernhard Stoeckner     if (flags & GPU_STATE_FLAGS_PRESERVING)
105491676d66SBernhard Stoeckner     {
105591676d66SBernhard Stoeckner         MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
105691676d66SBernhard Stoeckner 
105791676d66SBernhard Stoeckner         NV_ASSERT(!(flags & GPU_STATE_FLAGS_GC6_TRANSITION));
105891676d66SBernhard Stoeckner 
105991676d66SBernhard Stoeckner         status = memmgrRestorePowerMgmtState(pGpu, pMemoryManager);
106091676d66SBernhard Stoeckner         if (status != NV_OK)
106191676d66SBernhard Stoeckner             memmgrFreeFbsrMemory(pGpu, pMemoryManager);
106291676d66SBernhard Stoeckner 
106391676d66SBernhard Stoeckner         NV_ASSERT_OK(status);
106491676d66SBernhard Stoeckner     }
106591676d66SBernhard Stoeckner 
106691676d66SBernhard Stoeckner     return status;
106791676d66SBernhard Stoeckner }
106891676d66SBernhard Stoeckner 
106991676d66SBernhard Stoeckner NV_STATUS
kmemsysStateUnload_VF(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem,NvU32 flags)107091676d66SBernhard Stoeckner kmemsysStateUnload_VF(OBJGPU *pGpu, KernelMemorySystem *pKernelMemorySystem, NvU32 flags)
107191676d66SBernhard Stoeckner {
107291676d66SBernhard Stoeckner     NV_STATUS status = NV_OK;
107391676d66SBernhard Stoeckner 
107491676d66SBernhard Stoeckner     if (flags & GPU_STATE_FLAGS_PRESERVING)
107591676d66SBernhard Stoeckner     {
107691676d66SBernhard Stoeckner         MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
107791676d66SBernhard Stoeckner 
107891676d66SBernhard Stoeckner         NV_ASSERT(!(flags & GPU_STATE_FLAGS_GC6_TRANSITION));
107991676d66SBernhard Stoeckner 
108091676d66SBernhard Stoeckner         status = memmgrSavePowerMgmtState(pGpu, pMemoryManager);
108191676d66SBernhard Stoeckner         if (status != NV_OK)
108291676d66SBernhard Stoeckner             memmgrFreeFbsrMemory(pGpu, pMemoryManager);
108391676d66SBernhard Stoeckner 
108491676d66SBernhard Stoeckner         NV_ASSERT_OK(status);
108591676d66SBernhard Stoeckner     }
108691676d66SBernhard Stoeckner 
108791676d66SBernhard Stoeckner     return status;
108891676d66SBernhard Stoeckner }
1089