1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2014-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "core/core.h"
25 #include "gpu/gpu.h"
26 #include "gpu/mem_mgr/mem_mgr.h"
27 #include "platform/sli/sli.h"
28 
29 #include "published/pascal/gp100/dev_mmu.h"
30 #include "class/cl906f.h"   // GF100_CHANNEL_GPFIFO
31 #include "class/clc0b5.h"   // PASCAL_DMA_COPY_A
32 
33 /*!
34  * @brief Determine the kind of compressed PTE for a given allocation for color.
35  *
36  * @param[in]   pFbAllocInfo FB_ALLOC_INFO pointer
37  *
38  * @returns     PTE Kind.
39  */
40 NvU32
memmgrChooseKindCompressC_GP100(OBJGPU * pGpu,MemoryManager * pMemoryManager,FB_ALLOC_PAGE_FORMAT * pFbAllocPageFormat)41 memmgrChooseKindCompressC_GP100
42 (
43     OBJGPU                 *pGpu,
44     MemoryManager          *pMemoryManager,
45     FB_ALLOC_PAGE_FORMAT   *pFbAllocPageFormat
46 )
47 {
48     extern NvU32 memmgrChooseKindCompressC_GM107(OBJGPU *pGpu, MemoryManager *pMemoryManager, FB_ALLOC_PAGE_FORMAT *pFbAllocPageFormat);
49     NvU32  kind         = NV_MMU_PTE_KIND_PITCH;
50     NvU32  attrdepth    = DRF_VAL(OS32, _ATTR, _DEPTH, pFbAllocPageFormat->attr);
51     NvU32  aasamples    = DRF_VAL(OS32, _ATTR, _AA_SAMPLES, pFbAllocPageFormat->attr);
52 
53     if ((attrdepth == NVOS32_ATTR_DEPTH_32) &&
54          ((aasamples == NVOS32_ATTR_AA_SAMPLES_4) ||
55           (aasamples == NVOS32_ATTR_AA_SAMPLES_4_ROTATED) ||
56           (aasamples == NVOS32_ATTR_AA_SAMPLES_4_VIRTUAL_8) ||
57           (aasamples == NVOS32_ATTR_AA_SAMPLES_4_VIRTUAL_16)))
58     {
59         kind = NV_MMU_PTE_KIND_C32_MS4_4CBRA;
60     }
61     else if ((attrdepth == NVOS32_ATTR_DEPTH_64) &&
62               ((aasamples == NVOS32_ATTR_AA_SAMPLES_4) ||
63               (aasamples == NVOS32_ATTR_AA_SAMPLES_4_ROTATED) ||
64               (aasamples == NVOS32_ATTR_AA_SAMPLES_4_VIRTUAL_8) ||
65               (aasamples == NVOS32_ATTR_AA_SAMPLES_4_VIRTUAL_16)))
66     {
67         kind = NV_MMU_PTE_KIND_C64_MS4_4CBRA;
68     }
69     else
70     {
71         kind = memmgrChooseKindCompressC_GM107(pGpu, pMemoryManager, pFbAllocPageFormat);
72     }
73 
74     return kind;
75 
76 }
77 
78 void
memmgrHandleSizeOverrides_GP100(OBJGPU * pGpu,MemoryManager * pMemoryManager)79 memmgrHandleSizeOverrides_GP100
80 (
81     OBJGPU        *pGpu,
82     MemoryManager *pMemoryManager
83 )
84 {
85     // If the fbOverrideSizeMb is set, insert a reserved region to "remove" the memory
86     if (pMemoryManager->Ram.fbTotalMemSizeMb > pMemoryManager->Ram.fbOverrideSizeMb)
87     {
88         FB_REGION_DESCRIPTOR newRegion = {0};
89         NvU32 newRegionIndex;
90         NvU64 memDiff = (pMemoryManager->Ram.fbTotalMemSizeMb - pMemoryManager->Ram.fbOverrideSizeMb) << 20;
91         //
92         // overrideheapmax till scrub end is marked as reserved and unusable
93         //
94         NvU64 regionLimit = pMemoryManager->Ram.fbRegion[0].limit;
95         NvU64 regionBase;
96 
97         // Ensure that regionLimit is 64KB aligned - necessary for PMA
98         regionLimit = NV_ALIGN_UP(regionLimit, 0x10000) - 1;
99 
100         //
101         // If there is an overridden heap max already, then reserve everything
102         // above that. Otherwise, just go with where it would already land
103         //
104         regionBase = NV_MIN(pMemoryManager->overrideHeapMax, regionLimit - memDiff) + 1;
105 
106         newRegion.base = regionBase;
107         newRegion.limit = regionLimit;
108         newRegion.rsvdSize = 0;
109         newRegion.bRsvdRegion = NV_TRUE;
110         newRegion.performance = 0;
111         newRegion.bSupportCompressed = NV_FALSE;
112         newRegion.bSupportISO = NV_FALSE;
113         newRegion.bProtected = NV_FALSE;
114         newRegion.bInternalHeap = NV_FALSE;
115 
116         newRegionIndex = memmgrInsertFbRegion(pGpu, pMemoryManager, &newRegion);
117 
118         pMemoryManager->overrideHeapMax = pMemoryManager->Ram.fbRegion[newRegionIndex].base - 1;
119     }
120 }
121 
122 NV_STATUS
memmgrFinishHandleSizeOverrides_GP100(OBJGPU * pGpu,MemoryManager * pMemoryManager)123 memmgrFinishHandleSizeOverrides_GP100
124 (
125     OBJGPU        *pGpu,
126     MemoryManager *pMemoryManager
127 )
128 {
129     NV_STATUS rmStatus = NV_OK;
130 
131     if (pMemoryManager->overrideInitHeapMin > 0)
132     {
133         //
134         // We want all the memory above the overrideHeapMax to be inaccessible,
135         // so make everything above the MAX now reserved
136         //
137         NvU32 i;
138         for (i = 0; i < pMemoryManager->Ram.numFBRegions; i++)
139         {
140             if (pMemoryManager->Ram.fbRegion[i].limit > pMemoryManager->overrideHeapMax)
141             {
142                 if (pMemoryManager->Ram.fbRegion[i].base >= pMemoryManager->overrideHeapMax + 1)
143                 {
144                     // If the region is completely above the max, just mark it internal
145                     pMemoryManager->Ram.fbRegion[i].bRsvdRegion = NV_TRUE;
146                 }
147                 else if (!pMemoryManager->Ram.fbRegion[i].bRsvdRegion)
148                 {
149                     //
150                     // Otherwise, if the region is straddling and not already reserved,
151                     // split it into one reserved and one non-reserved region
152                     //
153                     FB_REGION_DESCRIPTOR newRegion = {0};
154                     newRegion.base = pMemoryManager->overrideHeapMax + 1;
155                     newRegion.limit = pMemoryManager->Ram.fbRegion[i].limit;
156                     newRegion.rsvdSize = 0;
157                     newRegion.bRsvdRegion = NV_TRUE;
158                     newRegion.performance = 0;
159                     newRegion.bSupportCompressed = NV_FALSE;
160                     newRegion.bSupportISO = NV_FALSE;
161                     newRegion.bProtected = NV_FALSE;
162                     newRegion.bInternalHeap = NV_FALSE;
163                     i = memmgrInsertFbRegion(pGpu, pMemoryManager, &newRegion);
164                 }
165             }
166         }
167 
168         //
169         // Scrubbing should be finished before the next allocation, so this can
170         // safely be reset.
171         //
172         pMemoryManager->overrideInitHeapMin = 0;
173     }
174 
175     return rmStatus;
176 }
177 
178 /*!
179  *  Returns the max context size
180  *
181  *  @returns NvU64
182  */
183 NvU64
memmgrGetMaxContextSize_GP100(OBJGPU * pGpu,MemoryManager * pMemoryManager)184 memmgrGetMaxContextSize_GP100
185 (
186     OBJGPU        *pGpu,
187     MemoryManager *pMemoryManager
188 )
189 {
190     extern NvU64 memmgrGetMaxContextSize_GM200(OBJGPU *pGpu, MemoryManager *pMemoryManager);
191 
192     NvU64 size = memmgrGetMaxContextSize_GM200(pGpu, pMemoryManager);
193 
194     //
195     // This function's original purpose was to estimate how much heap memory RM
196     // needs to keep in reserve from Windows LDDM driver to pass WHQL MaxContexts
197     // test.  This estimation is done after heap init before KMD allocates a
198     // kernel-managed chunk.
199     // UVM & PMA similarly require RM to estimate how much heap memory RM needs
200     // to reserve for page tables, contexts, etc.  This estimation is used during
201     // heap init to divide the FB into internal heap and external PMA managed
202     // spaces.
203     // Update for Pascal+ chips: on WDDMv2 KMD manages the reserve by locking down
204     // lowest level PDEs at RM device creation time (=process creation) via
205     // NV90F1_CTRL_CMD_VASPACE_RESERVE_ENTRIES rmControl call. Thus RM has to allocate
206     // the low level PTs for the entire reserve which is 4Gb (range 4Gb-8Gb).
207     // When PD0 is locked down and RM PD1 entries are valid, KMD can simply copy them
208     // at the setRootPageTable ddi call and don't restore at the unsetRootPT time.
209     // Because of the above reservation RM has to create quite a few 4k page tables and
210     // this results in extra ~28k consumption per default DX device (with default 2 contexts).
211     // On Kepler and Maxwell, the up-to-date wddm2 driver supports up to ~400 processes.
212     // On Pascal, with the same amount of reserve, we can only have ~200 processes.
213     // Hence we need to increase the RM physical reserve size for MMUv2 enabled chips
214     // to have supported process count on parity with previous chips.
215     // If any changes to RM reserve are introduced, for testing it with multi-process scenarios,
216     // a new kmdtest (CreateNProcesses) should be used.
217 
218 
219     if (RMCFG_FEATURE_PLATFORM_WINDOWS)
220     {
221         // Only needs increase in single GPU case as 400 process requirement is satisfied on SLI with the additional SLI reserve
222         if (!IsSLIEnabled(pGpu) && pGpu->getProperty(pGpu, PDB_PROP_GPU_EXTERNAL_HEAP_CONTROL))
223         {
224             // KMD in WDDM mode
225         }
226     }
227 
228     return size;
229 }
230