1 /*
2 * SPDX-FileCopyrightText: Copyright (c) 2014-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: MIT
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "core/core.h"
25 #include "gpu/gpu.h"
26 #include "gpu/mem_mgr/mem_mgr.h"
27 #include "platform/sli/sli.h"
28
29 #include "published/pascal/gp100/dev_mmu.h"
30 #include "class/cl906f.h" // GF100_CHANNEL_GPFIFO
31 #include "class/clc0b5.h" // PASCAL_DMA_COPY_A
32
33 /*!
34 * @brief Determine the kind of compressed PTE for a given allocation for color.
35 *
36 * @param[in] pFbAllocInfo FB_ALLOC_INFO pointer
37 *
38 * @returns PTE Kind.
39 */
40 NvU32
memmgrChooseKindCompressC_GP100(OBJGPU * pGpu,MemoryManager * pMemoryManager,FB_ALLOC_PAGE_FORMAT * pFbAllocPageFormat)41 memmgrChooseKindCompressC_GP100
42 (
43 OBJGPU *pGpu,
44 MemoryManager *pMemoryManager,
45 FB_ALLOC_PAGE_FORMAT *pFbAllocPageFormat
46 )
47 {
48 extern NvU32 memmgrChooseKindCompressC_GM107(OBJGPU *pGpu, MemoryManager *pMemoryManager, FB_ALLOC_PAGE_FORMAT *pFbAllocPageFormat);
49 NvU32 kind = NV_MMU_PTE_KIND_PITCH;
50 NvU32 attrdepth = DRF_VAL(OS32, _ATTR, _DEPTH, pFbAllocPageFormat->attr);
51 NvU32 aasamples = DRF_VAL(OS32, _ATTR, _AA_SAMPLES, pFbAllocPageFormat->attr);
52
53 if ((attrdepth == NVOS32_ATTR_DEPTH_32) &&
54 ((aasamples == NVOS32_ATTR_AA_SAMPLES_4) ||
55 (aasamples == NVOS32_ATTR_AA_SAMPLES_4_ROTATED) ||
56 (aasamples == NVOS32_ATTR_AA_SAMPLES_4_VIRTUAL_8) ||
57 (aasamples == NVOS32_ATTR_AA_SAMPLES_4_VIRTUAL_16)))
58 {
59 kind = NV_MMU_PTE_KIND_C32_MS4_4CBRA;
60 }
61 else if ((attrdepth == NVOS32_ATTR_DEPTH_64) &&
62 ((aasamples == NVOS32_ATTR_AA_SAMPLES_4) ||
63 (aasamples == NVOS32_ATTR_AA_SAMPLES_4_ROTATED) ||
64 (aasamples == NVOS32_ATTR_AA_SAMPLES_4_VIRTUAL_8) ||
65 (aasamples == NVOS32_ATTR_AA_SAMPLES_4_VIRTUAL_16)))
66 {
67 kind = NV_MMU_PTE_KIND_C64_MS4_4CBRA;
68 }
69 else
70 {
71 kind = memmgrChooseKindCompressC_GM107(pGpu, pMemoryManager, pFbAllocPageFormat);
72 }
73
74 return kind;
75
76 }
77
78 void
memmgrHandleSizeOverrides_GP100(OBJGPU * pGpu,MemoryManager * pMemoryManager)79 memmgrHandleSizeOverrides_GP100
80 (
81 OBJGPU *pGpu,
82 MemoryManager *pMemoryManager
83 )
84 {
85 // If the fbOverrideSizeMb is set, insert a reserved region to "remove" the memory
86 if (pMemoryManager->Ram.fbTotalMemSizeMb > pMemoryManager->Ram.fbOverrideSizeMb)
87 {
88 FB_REGION_DESCRIPTOR newRegion = {0};
89 NvU32 newRegionIndex;
90 NvU64 memDiff = (pMemoryManager->Ram.fbTotalMemSizeMb - pMemoryManager->Ram.fbOverrideSizeMb) << 20;
91 //
92 // overrideheapmax till scrub end is marked as reserved and unusable
93 //
94 NvU64 regionLimit = pMemoryManager->Ram.fbRegion[0].limit;
95 NvU64 regionBase;
96
97 // Ensure that regionLimit is 64KB aligned - necessary for PMA
98 regionLimit = NV_ALIGN_UP(regionLimit, 0x10000) - 1;
99
100 //
101 // If there is an overridden heap max already, then reserve everything
102 // above that. Otherwise, just go with where it would already land
103 //
104 regionBase = NV_MIN(pMemoryManager->overrideHeapMax, regionLimit - memDiff) + 1;
105
106 newRegion.base = regionBase;
107 newRegion.limit = regionLimit;
108 newRegion.rsvdSize = 0;
109 newRegion.bRsvdRegion = NV_TRUE;
110 newRegion.performance = 0;
111 newRegion.bSupportCompressed = NV_FALSE;
112 newRegion.bSupportISO = NV_FALSE;
113 newRegion.bProtected = NV_FALSE;
114 newRegion.bInternalHeap = NV_FALSE;
115
116 newRegionIndex = memmgrInsertFbRegion(pGpu, pMemoryManager, &newRegion);
117
118 pMemoryManager->overrideHeapMax = pMemoryManager->Ram.fbRegion[newRegionIndex].base - 1;
119 }
120 }
121
122 NV_STATUS
memmgrFinishHandleSizeOverrides_GP100(OBJGPU * pGpu,MemoryManager * pMemoryManager)123 memmgrFinishHandleSizeOverrides_GP100
124 (
125 OBJGPU *pGpu,
126 MemoryManager *pMemoryManager
127 )
128 {
129 NV_STATUS rmStatus = NV_OK;
130
131 if (pMemoryManager->overrideInitHeapMin > 0)
132 {
133 //
134 // We want all the memory above the overrideHeapMax to be inaccessible,
135 // so make everything above the MAX now reserved
136 //
137 NvU32 i;
138 for (i = 0; i < pMemoryManager->Ram.numFBRegions; i++)
139 {
140 if (pMemoryManager->Ram.fbRegion[i].limit > pMemoryManager->overrideHeapMax)
141 {
142 if (pMemoryManager->Ram.fbRegion[i].base >= pMemoryManager->overrideHeapMax + 1)
143 {
144 // If the region is completely above the max, just mark it internal
145 pMemoryManager->Ram.fbRegion[i].bRsvdRegion = NV_TRUE;
146 }
147 else if (!pMemoryManager->Ram.fbRegion[i].bRsvdRegion)
148 {
149 //
150 // Otherwise, if the region is straddling and not already reserved,
151 // split it into one reserved and one non-reserved region
152 //
153 FB_REGION_DESCRIPTOR newRegion = {0};
154 newRegion.base = pMemoryManager->overrideHeapMax + 1;
155 newRegion.limit = pMemoryManager->Ram.fbRegion[i].limit;
156 newRegion.rsvdSize = 0;
157 newRegion.bRsvdRegion = NV_TRUE;
158 newRegion.performance = 0;
159 newRegion.bSupportCompressed = NV_FALSE;
160 newRegion.bSupportISO = NV_FALSE;
161 newRegion.bProtected = NV_FALSE;
162 newRegion.bInternalHeap = NV_FALSE;
163 i = memmgrInsertFbRegion(pGpu, pMemoryManager, &newRegion);
164 }
165 }
166 }
167
168 //
169 // Scrubbing should be finished before the next allocation, so this can
170 // safely be reset.
171 //
172 pMemoryManager->overrideInitHeapMin = 0;
173 }
174
175 return rmStatus;
176 }
177
178 /*!
179 * Returns the max context size
180 *
181 * @returns NvU64
182 */
183 NvU64
memmgrGetMaxContextSize_GP100(OBJGPU * pGpu,MemoryManager * pMemoryManager)184 memmgrGetMaxContextSize_GP100
185 (
186 OBJGPU *pGpu,
187 MemoryManager *pMemoryManager
188 )
189 {
190 extern NvU64 memmgrGetMaxContextSize_GM200(OBJGPU *pGpu, MemoryManager *pMemoryManager);
191
192 NvU64 size = memmgrGetMaxContextSize_GM200(pGpu, pMemoryManager);
193
194 //
195 // This function's original purpose was to estimate how much heap memory RM
196 // needs to keep in reserve from Windows LDDM driver to pass WHQL MaxContexts
197 // test. This estimation is done after heap init before KMD allocates a
198 // kernel-managed chunk.
199 // UVM & PMA similarly require RM to estimate how much heap memory RM needs
200 // to reserve for page tables, contexts, etc. This estimation is used during
201 // heap init to divide the FB into internal heap and external PMA managed
202 // spaces.
203 // Update for Pascal+ chips: on WDDMv2 KMD manages the reserve by locking down
204 // lowest level PDEs at RM device creation time (=process creation) via
205 // NV90F1_CTRL_CMD_VASPACE_RESERVE_ENTRIES rmControl call. Thus RM has to allocate
206 // the low level PTs for the entire reserve which is 4Gb (range 4Gb-8Gb).
207 // When PD0 is locked down and RM PD1 entries are valid, KMD can simply copy them
208 // at the setRootPageTable ddi call and don't restore at the unsetRootPT time.
209 // Because of the above reservation RM has to create quite a few 4k page tables and
210 // this results in extra ~28k consumption per default DX device (with default 2 contexts).
211 // On Kepler and Maxwell, the up-to-date wddm2 driver supports up to ~400 processes.
212 // On Pascal, with the same amount of reserve, we can only have ~200 processes.
213 // Hence we need to increase the RM physical reserve size for MMUv2 enabled chips
214 // to have supported process count on parity with previous chips.
215 // If any changes to RM reserve are introduced, for testing it with multi-process scenarios,
216 // a new kmdtest (CreateNProcesses) should be used.
217
218
219 if (RMCFG_FEATURE_PLATFORM_WINDOWS)
220 {
221 // Only needs increase in single GPU case as 400 process requirement is satisfied on SLI with the additional SLI reserve
222 if (!IsSLIEnabled(pGpu) && pGpu->getProperty(pGpu, PDB_PROP_GPU_EXTERNAL_HEAP_CONTROL))
223 {
224 // KMD in WDDM mode
225 }
226 }
227
228 return size;
229 }
230