1 /*
2 * SPDX-FileCopyrightText: Copyright (c) 2006-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: MIT
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "core/core.h"
25 #include "gpu/gpu.h"
26 #include "os/os.h"
27 #include "gpu/mem_mgr/mem_mgr.h"
28 #include "gpu/mem_sys/kern_mem_sys.h"
29 #include "gpu/mem_mgr/heap.h"
30 #include "gpu/mem_mgr/mem_desc.h"
31 #include "gpu/mem_mgr/fermi_dma.h"
32 #include "gpu/mem_mgr/virt_mem_allocator.h"
33 #include "kernel/gpu/gr/kernel_graphics.h"
34 #include "gpu/mmu/kern_gmmu.h"
35 #include "gpu/bus/kern_bus.h"
36 #include "kernel/gpu/mig_mgr/kernel_mig_manager.h"
37 #include "platform/sli/sli.h"
38 #include "nvrm_registry.h"
39
40 #include "gpu/bif/kernel_bif.h"
41 #include "gpu/device/device.h"
42 #include "gpu/subdevice/subdevice.h"
43 #include "gpu/disp/inst_mem/disp_inst_mem.h"
44
45 #include "rmifrif.h"
46
47 #include "published/maxwell/gm107/dev_mmu.h"
48 #include "published/maxwell/gm107/dev_ram.h"
49
50 #include "vgpu/rpc.h"
51 #include "vgpu/vgpu_events.h"
52
53 //
54 // statics
55 //
56 static NV_STATUS memmgrComputeAndSetVgaDisplayMemoryBase_GM107(OBJGPU *, NvU64);
57
58 NvU32
memmgrChooseKindCompressC_GM107(OBJGPU * pGpu,MemoryManager * pMemoryManager,FB_ALLOC_PAGE_FORMAT * pFbAllocPageFormat)59 memmgrChooseKindCompressC_GM107
60 (
61 OBJGPU *pGpu,
62 MemoryManager *pMemoryManager,
63 FB_ALLOC_PAGE_FORMAT *pFbAllocPageFormat)
64 {
65 NvU32 kind = NV_MMU_PTE_KIND_PITCH;
66 NvU32 attrdepth = DRF_VAL(OS32, _ATTR, _DEPTH, pFbAllocPageFormat->attr);
67 NvU32 aasamples = DRF_VAL(OS32, _ATTR, _AA_SAMPLES, pFbAllocPageFormat->attr);
68 NvBool prefer_zbc = !FLD_TEST_DRF(OS32, _ATTR2, _ZBC, _PREFER_NO_ZBC, pFbAllocPageFormat->attr2);
69
70 switch (attrdepth)
71 {
72 case NVOS32_ATTR_DEPTH_UNKNOWN:
73 case NVOS32_ATTR_DEPTH_8:
74 case NVOS32_ATTR_DEPTH_16:
75 kind = NV_MMU_PTE_KIND_GENERIC_16BX2;
76 break;
77 case NVOS32_ATTR_DEPTH_32:
78 switch (aasamples)
79 {
80 case NVOS32_ATTR_AA_SAMPLES_1:
81 kind = NV_MMU_PTE_KIND_C32_2CRA;
82 break;
83 case NVOS32_ATTR_AA_SAMPLES_2:
84 kind = memmgrChooseKindCompressCForMS2_HAL(pGpu, pMemoryManager, attrdepth);
85 break;
86 case NVOS32_ATTR_AA_SAMPLES_4:
87 case NVOS32_ATTR_AA_SAMPLES_4_ROTATED:
88 case NVOS32_ATTR_AA_SAMPLES_4_VIRTUAL_8:
89 case NVOS32_ATTR_AA_SAMPLES_4_VIRTUAL_16:
90 kind = prefer_zbc? NV_MMU_PTE_KIND_C32_MS4_2CBR : NV_MMU_PTE_KIND_C32_MS4_2BRA;
91 break;
92 case NVOS32_ATTR_AA_SAMPLES_8:
93 case NVOS32_ATTR_AA_SAMPLES_16:
94 case NVOS32_ATTR_AA_SAMPLES_8_VIRTUAL_16:
95 case NVOS32_ATTR_AA_SAMPLES_8_VIRTUAL_32:
96 kind = NV_MMU_PTE_KIND_C32_MS8_MS16_2CRA;
97 break;
98 }
99 break;
100 case NVOS32_ATTR_DEPTH_64:
101 switch (aasamples)
102 {
103 case NVOS32_ATTR_AA_SAMPLES_1:
104 kind = NV_MMU_PTE_KIND_C64_2CRA;
105 break;
106 case NVOS32_ATTR_AA_SAMPLES_2:
107 kind = memmgrChooseKindCompressCForMS2_HAL(pGpu, pMemoryManager, attrdepth);
108 break;
109 case NVOS32_ATTR_AA_SAMPLES_4:
110 case NVOS32_ATTR_AA_SAMPLES_4_ROTATED:
111 case NVOS32_ATTR_AA_SAMPLES_4_VIRTUAL_8:
112 case NVOS32_ATTR_AA_SAMPLES_4_VIRTUAL_16:
113 kind = prefer_zbc? NV_MMU_PTE_KIND_C64_MS4_2CBR : NV_MMU_PTE_KIND_C64_MS4_2BRA;
114 break;
115 case NVOS32_ATTR_AA_SAMPLES_8:
116 case NVOS32_ATTR_AA_SAMPLES_16:
117 case NVOS32_ATTR_AA_SAMPLES_8_VIRTUAL_16:
118 case NVOS32_ATTR_AA_SAMPLES_8_VIRTUAL_32:
119 kind = NV_MMU_PTE_KIND_C64_MS8_MS16_2CRA;
120 break;
121 }
122 break;
123 case NVOS32_ATTR_DEPTH_128:
124 switch (aasamples)
125 {
126 case NVOS32_ATTR_AA_SAMPLES_1:
127 kind = NV_MMU_PTE_KIND_C128_2CR;
128 break;
129 case NVOS32_ATTR_AA_SAMPLES_2:
130 kind = NV_MMU_PTE_KIND_C128_MS2_2CR;
131 break;
132 case NVOS32_ATTR_AA_SAMPLES_4:
133 case NVOS32_ATTR_AA_SAMPLES_4_ROTATED:
134 case NVOS32_ATTR_AA_SAMPLES_4_VIRTUAL_8:
135 case NVOS32_ATTR_AA_SAMPLES_4_VIRTUAL_16:
136 kind = NV_MMU_PTE_KIND_C128_MS4_2CR;
137 break;
138 case NVOS32_ATTR_AA_SAMPLES_8:
139 case NVOS32_ATTR_AA_SAMPLES_16:
140 case NVOS32_ATTR_AA_SAMPLES_8_VIRTUAL_16:
141 case NVOS32_ATTR_AA_SAMPLES_8_VIRTUAL_32:
142 kind = NV_MMU_PTE_KIND_C128_MS8_MS16_2CR;
143 break;
144 }
145 break;
146 }
147
148 return kind;
149 }
150
151 NV_STATUS
memmgrAllocDetermineAlignment_GM107(OBJGPU * pGpu,MemoryManager * pMemoryManager,NvU64 * pMemSize,NvU64 * pAlign,NvU64 alignPad,NvU32 allocFlags,NvU32 retAttr,NvU32 retAttr2,NvU64 hwAlignment)152 memmgrAllocDetermineAlignment_GM107
153 (
154 OBJGPU *pGpu,
155 MemoryManager *pMemoryManager,
156 NvU64 *pMemSize,
157 NvU64 *pAlign,
158 NvU64 alignPad,
159 NvU32 allocFlags,
160 NvU32 retAttr,
161 NvU32 retAttr2,
162 NvU64 hwAlignment
163 )
164 {
165 KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
166 const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig =
167 kmemsysGetStaticConfig(pGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu));
168
169 switch (dmaNvos32ToPageSizeAttr(retAttr, retAttr2))
170 {
171 case RM_ATTR_PAGE_SIZE_4KB:
172 hwAlignment = NV_MAX(hwAlignment, RM_PAGE_SIZE - 1);
173 break;
174 case RM_ATTR_PAGE_SIZE_BIG:
175 // we will always align to the biggest page size
176 hwAlignment = NV_MAX(hwAlignment, kgmmuGetMaxBigPageSize_HAL(pKernelGmmu) - 1);
177 break;
178 case RM_ATTR_PAGE_SIZE_HUGE:
179 NV_ASSERT_OR_RETURN(kgmmuIsHugePageSupported(pKernelGmmu),
180 NV_ERR_INVALID_ARGUMENT);
181 hwAlignment = NV_MAX(hwAlignment, RM_PAGE_SIZE_HUGE - 1);
182 break;
183 case RM_ATTR_PAGE_SIZE_512MB:
184 NV_ASSERT_OR_RETURN(kgmmuIsPageSize512mbSupported(pKernelGmmu),
185 NV_ERR_INVALID_ARGUMENT);
186 hwAlignment = NV_MAX(hwAlignment, RM_PAGE_SIZE_512M - 1);
187 break;
188 case RM_ATTR_PAGE_SIZE_DEFAULT:
189 case RM_ATTR_PAGE_SIZE_INVALID:
190 NV_PRINTF(LEVEL_ERROR, "- invalid page size specified\n");
191 return NV_ERR_INVALID_ARGUMENT;
192 }
193
194 if (!FLD_TEST_DRF(OS32, _ATTR, _COMPR, _NONE, retAttr))
195 {
196 if (FLD_TEST_DRF(OS32, _ATTR, _PAGE_SIZE, _4KB, retAttr) &&
197 !(allocFlags & NVOS32_ALLOC_FLAGS_VIRTUAL) &&
198 pMemoryManager->bSmallPageCompression)
199 {
200 //
201 // No offset alignment requirement for 4KB compression.
202 // The size should be aligned to compression pagesize.
203 //
204 NvU64 comprPageSize = pMemorySystemConfig->comprPageSize;
205 *pMemSize = ((*pMemSize + alignPad + comprPageSize - 1) / comprPageSize) * comprPageSize;
206 }
207 else
208 {
209 // Both size and offset should be aligned to compression pagesize.
210 hwAlignment = NV_MAX(hwAlignment, pMemorySystemConfig->comprPageSize - 1);
211
212 if (FLD_TEST_DRF(OS32, _ATTR, _PAGE_SIZE, _4KB, retAttr) &&
213 !pMemoryManager->bSmallPageCompression)
214 {
215 NV_PRINTF(LEVEL_INFO,
216 "Compression requested on small page size mappings\n");
217 }
218 }
219 }
220
221 // a non-zero alignment means it's a requested alignment. Ensure the requested
222 // alignment is still aligned to the hw requirements
223 if ((*pAlign) &&
224 (((*pAlign > hwAlignment) && !(*pAlign % (hwAlignment+1))) || // align is >=1 multiple of hwAlignment
225 ((*pAlign <= hwAlignment+1) && !((hwAlignment+1) % *pAlign)))) // hwAlignment is a >= mulitple of align
226 {
227 if ( *pAlign <= hwAlignment+1 )
228 {
229 *pAlign = hwAlignment + 1;
230 }
231
232 (*pAlign)--; // convert to (alignment-1) (not really a "mask")
233 // calculate the new size based on hw alignment
234 *pMemSize = ((*pMemSize + alignPad + hwAlignment) / (hwAlignment+1)) * (hwAlignment+1);
235 hwAlignment = *pAlign; // this aligns the offset to the requested alignment
236 }
237 else
238 {
239 // if this alignment was a force or fail, fail it here
240 if (*pAlign != 0 && (allocFlags & NVOS32_ALLOC_FLAGS_ALIGNMENT_FORCE))
241 {
242 *pAlign = 0;
243 return (NV_ERR_INVALID_ARGUMENT);
244 }
245
246 //
247 // By default round to the hw alignment. It is important to pad to the page size
248 // on fermi for all allocations as we cannot mix page size on the same physical
249 // memory due to page swizzle.
250 //
251 *pAlign = hwAlignment;
252 *pMemSize = ((*pMemSize + alignPad + hwAlignment) / (hwAlignment+1)) * (hwAlignment+1);
253 }
254
255 return NV_OK;
256 }
257
258 static void
memmgrSetZbcReferenced(OBJGPU * pGpu,NvHandle hClient,NvHandle hDevice,NvBool bZbcSurfacesExist)259 memmgrSetZbcReferenced
260 (
261 OBJGPU *pGpu,
262 NvHandle hClient,
263 NvHandle hDevice,
264 NvBool bZbcSurfacesExist
265 )
266 {
267 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
268 NV2080_CTRL_INTERNAL_MEMSYS_SET_ZBC_REFERENCED_PARAMS params = {0};
269 RsClient *pClient;
270 Subdevice *pSubdevice;
271 NvHandle hSubdevice;
272 NvU32 subDevInst;
273
274 // Allocations are RPCed to host, so they are counted there
275 if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu))
276 return;
277
278 params.bZbcSurfacesExist = bZbcSurfacesExist;
279
280 NV_ASSERT_OR_RETURN_VOID(
281 serverGetClientUnderLock(&g_resServ, hClient, &pClient) == NV_OK);
282
283 subDevInst = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
284
285 NV_ASSERT_OR_RETURN_VOID(subdeviceGetByInstance(pClient, hDevice, subDevInst, &pSubdevice) == NV_OK);
286
287 hSubdevice = RES_GET_HANDLE(pSubdevice);
288
289 NV_ASSERT_OK(
290 pRmApi->Control(
291 pRmApi,
292 hClient,
293 hSubdevice,
294 NV2080_CTRL_CMD_INTERNAL_MEMSYS_SET_ZBC_REFERENCED,
295 ¶ms,
296 sizeof(params)));
297 }
298
299 //
300 // Update user alloc request parameter according to memory
301 // type and (possibly) reserve hw resources.
302 //
303 NV_STATUS
memmgrAllocHal_GM107(OBJGPU * pGpu,MemoryManager * pMemoryManager,FB_ALLOC_INFO * pFbAllocInfo)304 memmgrAllocHal_GM107
305 (
306 OBJGPU *pGpu,
307 MemoryManager *pMemoryManager,
308 FB_ALLOC_INFO *pFbAllocInfo
309 )
310 {
311 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
312 NV_STATUS status = NV_OK;
313 NvU32 comprAttr, zcullAttr, type;
314 NvU32 cacheAttr;
315 NvU32 format, kind, bAlignPhase;
316 NvU32 retAttr = pFbAllocInfo->retAttr;
317 NvU32 retAttr2 = pFbAllocInfo->retAttr2;
318 NV_ADDRESS_SPACE addrSpace;
319 NvBool bComprWar = NV_FALSE;
320 const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig =
321 kmemsysGetStaticConfig(pGpu, pKernelMemorySystem);
322
323 // get the specified attribute values
324 comprAttr = DRF_VAL(OS32, _ATTR, _COMPR, pFbAllocInfo->pageFormat->attr);
325 zcullAttr = DRF_VAL(OS32, _ATTR, _ZCULL, pFbAllocInfo->pageFormat->attr);
326 format = DRF_VAL(OS32, _ATTR, _FORMAT, pFbAllocInfo->pageFormat->attr);
327 cacheAttr = DRF_VAL(OS32, _ATTR2, _GPU_CACHEABLE, pFbAllocInfo->pageFormat->attr2);
328 type = pFbAllocInfo->pageFormat->type;
329 addrSpace = memmgrAllocGetAddrSpace(pMemoryManager, pFbAllocInfo->pageFormat->flags, retAttr);
330
331 if ( NVOS32_ATTR_LOCATION_AGP == DRF_VAL(OS32, _ATTR, _LOCATION, pFbAllocInfo->pageFormat->attr) )
332 return NV_ERR_NOT_SUPPORTED; // only local vid & pci (sysmem) supported
333
334 bAlignPhase = !!(pFbAllocInfo->pageFormat->flags & NVOS32_ALLOC_FLAGS_SKIP_RESOURCE_ALLOC);
335
336 //
337 // errorcheck specified attributes
338 // NOTE: With the new macro, the distinction between 32-bit colour
339 // compression and Z compression is in the value of 'type' - DEPTH or IMAGE
340 // So the caller is urged to verify integrity.
341 //
342 if (
343 // check the value of compression attribute
344 // attributes verification for compressed surfaces
345 !(memmgrVerifyComprAttrs_HAL(pMemoryManager, type, format, comprAttr)) ||
346 // depth buffer attribute verification
347 !(memmgrVerifyDepthSurfaceAttrs_HAL(pMemoryManager, type, format))
348 || (zcullAttr == NVOS32_ATTR_ZCULL_REQUIRED) || (zcullAttr == NVOS32_ATTR_ZCULL_SHARED)
349 )
350 {
351 return NV_ERR_INVALID_ARGUMENT;
352 }
353
354 if (cacheAttr == NVOS32_ATTR2_GPU_CACHEABLE_DEFAULT)
355 {
356 //
357 // The GPU cache is not sysmem coherent. Caching sysmem in GPU cache requires
358 // clients to issue GPU cache invalidates to maintain coherency.
359 //
360 if (addrSpace == ADDR_SYSMEM)
361 {
362 retAttr2 = FLD_SET_DRF(OS32, _ATTR2, _GPU_CACHEABLE, _NO, retAttr2);
363 }
364 else
365 {
366 retAttr2 = FLD_SET_DRF(OS32, _ATTR2, _GPU_CACHEABLE, _YES, retAttr2);
367 }
368 }
369
370 if (!FLD_TEST_DRF(OS32, _ATTR, _COMPR, _NONE, retAttr))
371 {
372 if (pMemorySystemConfig->bDisableCompbitBacking)
373 {
374 NV_PRINTF(LEVEL_INFO, "compression disabled due to regkey\n");
375 retAttr = FLD_SET_DRF(OS32, _ATTR, _COMPR, _NONE, retAttr);
376 }
377 else if (!memmgrComprSupported(pMemoryManager, addrSpace))
378 {
379 if (FLD_TEST_DRF(OS32, _ATTR, _COMPR, _REQUIRED, retAttr))
380 {
381 NV_PRINTF(LEVEL_ERROR,
382 "Compression not supported for this configuration.\n");
383 return NV_ERR_NOT_SUPPORTED;
384 }
385 else
386 {
387 retAttr = FLD_SET_DRF(OS32, _ATTR, _COMPR, _NONE, retAttr);
388 }
389 }
390 }
391
392 {
393 status = memmgrChooseKind_HAL(pGpu, pMemoryManager, pFbAllocInfo->pageFormat,
394 DRF_VAL(OS32, _ATTR, _COMPR, retAttr), &kind);
395 if (status != NV_OK)
396 {
397 return NV_ERR_INVALID_ARGUMENT;
398 }
399 }
400
401 pFbAllocInfo->pageFormat->kind = kind;
402
403 //
404 // See Bug 351429: It should not be an error to specify an uncompressible kind
405 // via -pte_kind[CZ] should be sufficient even if -compress[CZ] not specified
406 //
407 if(!memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE, kind))
408 {
409 retAttr = FLD_SET_DRF(OS32, _ATTR, _COMPR, _NONE, retAttr);
410 }
411
412 // Ideally compression should only be enabled on big/huge page mapping
413 if (FLD_TEST_DRF(OS32, _ATTR, _PAGE_SIZE, _4KB, retAttr) &&
414 !FLD_TEST_DRF(OS32, _ATTR, _COMPR, _NONE, retAttr) &&
415 !pMemoryManager->bSmallPageCompression)
416 {
417 if (FLD_TEST_DRF(OS32, _ATTR, _COMPR, _REQUIRED, retAttr))
418 {
419 // We allow in MODS due to test requirement.
420 if (!RMCFG_FEATURE_PLATFORM_MODS)
421 {
422 NV_PRINTF(LEVEL_ERROR,
423 "ERROR: Compression requested for small page allocation.\n");
424 return NV_ERR_NOT_SUPPORTED;
425 }
426 }
427 else
428 {
429 if (RMCFG_FEATURE_PLATFORM_MODS)
430 {
431 retAttr = FLD_SET_DRF(OS32, _ATTR, _COMPR, _NONE, retAttr);
432 if(memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE, kind))
433 {
434 kind = memmgrGetUncompressedKind_HAL(pGpu, pMemoryManager, kind, NV_FALSE);
435 pFbAllocInfo->pageFormat->kind = kind;
436 }
437 }
438 else
439 {
440 bComprWar = NV_TRUE;
441 }
442 }
443 }
444
445 // Allocate zcull before we save the pitch and size
446 pFbAllocInfo->hwResId = 0;
447
448 //
449 // Attempt to allocate ctags to go with this allocation.
450 // Note: The way things work we're actually allocating ctags for a region
451 // which hasn't been allocated yet. We only know the size the region will be.
452 // Later we'll get a call to bind (fbsetallocparams). But, this fbsetallocparams
453 // call isn't late enough! We need a contextdma to actually bind with!
454 // So we have to keep track of the allocation by creating a marker for it and using
455 // the hwresid to invoke it later :(
456 //
457 if (!FLD_TEST_DRF(OS32, _ATTR, _COMPR, _NONE, retAttr))
458 {
459 if (!bAlignPhase && !bComprWar &&
460 !(pFbAllocInfo->pageFormat->flags & NVOS32_ALLOC_FLAGS_VIRTUAL))
461 {
462 status = kmemsysAllocComprResources_HAL(pGpu, pKernelMemorySystem, pFbAllocInfo,
463 pFbAllocInfo->origSize, 1, &retAttr, retAttr2);
464 if (status != NV_OK)
465 {
466 NV_PRINTF(LEVEL_ERROR, "memsysAllocComprResources failed\n");
467
468 return status;
469 }
470 }
471 }
472 //
473 // !!WARNING!!!
474 //
475 // This flag is introduced as a temporary WAR to enable color compression
476 // without ZBC. RM will skip refcounting the ZBC table when this flag is set.
477 // PTE Kind could still support ZBC (there are sometimes no non-zbc equivalent)
478 // Hence UMD has to disable zbc for the app by masking all the zbc slots.
479 // It's a temporary WAR until we implement per process zbc slot management.
480 //
481 if (FLD_TEST_DRF(OS32, _ATTR2, _ZBC_SKIP_ZBCREFCOUNT, _NO, pFbAllocInfo->pageFormat->attr2))
482 {
483 if (
484 !IS_MIG_ENABLED(pGpu) &&
485 memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_ZBC, pFbAllocInfo->pageFormat->kind) &&
486 !(pFbAllocInfo->pageFormat->flags & NVOS32_ALLOC_FLAGS_VIRTUAL))
487 {
488 retAttr2 = FLD_SET_DRF(OS32, _ATTR2, _ZBC, _PREFER_ZBC, retAttr2);
489 if (!bAlignPhase)
490 {
491 pMemoryManager->zbcSurfaces++;
492 NV_PRINTF(LEVEL_INFO,
493 "zbcSurfaces = 0x%x, hwResId = 0x%x\n",
494 pMemoryManager->zbcSurfaces, pFbAllocInfo->hwResId);
495
496 if (pMemoryManager->zbcSurfaces == 1)
497 memmgrSetZbcReferenced(pGpu, pFbAllocInfo->hClient, pFbAllocInfo->hDevice, NV_TRUE);
498 }
499 }
500 else
501 {
502 retAttr2 = FLD_SET_DRF(OS32, _ATTR2, _ZBC, _PREFER_NO_ZBC, retAttr2);
503 }
504 }
505 else
506 {
507 NV_ASSERT(FLD_TEST_DRF(OS32, _ATTR2, _ZBC, _PREFER_NO_ZBC, retAttr2));
508 }
509
510 pFbAllocInfo->format = kind;
511 pFbAllocInfo->retAttr = retAttr;
512 pFbAllocInfo->retAttr2 = retAttr2;
513
514 return (NV_OK);
515 }
516
517 NV_STATUS
memmgrSetAllocParameters_GM107(OBJGPU * pGpu,MemoryManager * pMemoryManager,FB_ALLOC_INFO * pFbAllocInfo)518 memmgrSetAllocParameters_GM107
519 (
520 OBJGPU *pGpu,
521 MemoryManager *pMemoryManager,
522 FB_ALLOC_INFO *pFbAllocInfo
523 )
524 {
525
526 return NV_OK;
527 }
528
529 //
530 // Release tile back to the free pool.
531 //
532 NV_STATUS
memmgrFreeHal_GM107(OBJGPU * pGpu,MemoryManager * pMemoryManager,FB_ALLOC_INFO * pFbAllocInfo,PRMTIMEOUT pTimeout)533 memmgrFreeHal_GM107
534 (
535 OBJGPU *pGpu,
536 MemoryManager *pMemoryManager,
537 FB_ALLOC_INFO *pFbAllocInfo,
538 PRMTIMEOUT pTimeout
539 )
540 {
541 NvU32 commitResId = pFbAllocInfo->hwResId;
542
543 if (pFbAllocInfo->pageFormat->flags & NVOS32_ALLOC_FLAGS_SKIP_RESOURCE_ALLOC)
544 {
545 // for vGPU, we set this flag in memmgrAllocHwResources
546 return NV_OK;
547 }
548
549 // We might want to move this check to higher-level
550 if (IS_MIG_ENABLED(pGpu))
551 {
552 // In SMC mode, we do not program ZCULL or ZBC
553 return NV_OK;
554 }
555
556 kmemsysFreeComprResources_HAL(pGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu), commitResId);
557
558 if (FLD_TEST_DRF(OS32, _ATTR2, _ZBC_SKIP_ZBCREFCOUNT, _NO, pFbAllocInfo->pageFormat->attr2) &&
559 memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_ZBC, pFbAllocInfo->format))
560 {
561 NV_ASSERT(pMemoryManager->zbcSurfaces !=0 );
562 if (pMemoryManager->zbcSurfaces != 0)
563 {
564 pMemoryManager->zbcSurfaces--;
565
566 if (pMemoryManager->zbcSurfaces == 0)
567 memmgrSetZbcReferenced(pGpu, pFbAllocInfo->hClient, pFbAllocInfo->hDevice, NV_FALSE);
568 }
569
570 NV_PRINTF(LEVEL_INFO,
571 "[1] hwResId = 0x%x, offset = 0x%llx, size = 0x%llx\n",
572 pFbAllocInfo->hwResId, pFbAllocInfo->offset,
573 pFbAllocInfo->size);
574
575 NV_PRINTF(LEVEL_INFO, "[2] zbcSurfaces = 0x%x\n",
576 pMemoryManager->zbcSurfaces);
577 }
578
579 return NV_OK;
580 }
581
582 NV_STATUS
memmgrGetBAR1InfoForDevice_GM107(OBJGPU * pGpu,MemoryManager * pMemoryManager,Device * pDevice,PGETBAR1INFO bar1Info)583 memmgrGetBAR1InfoForDevice_GM107
584 (
585 OBJGPU *pGpu,
586 MemoryManager *pMemoryManager,
587 Device *pDevice,
588 PGETBAR1INFO bar1Info
589 )
590 {
591 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
592 NvU64 largestFreeSize;
593 NvU64 freeSize;
594 OBJVASPACE *pBar1VAS;
595 OBJEHEAP *pVASHeap;
596 NV_RANGE bar1VARange = NV_RANGE_EMPTY;
597 RsClient *pClient = RES_GET_CLIENT(pDevice);
598
599 /*
600 * For legacy vGPU and SRIOV heavy, get BAR1 information from vGPU plugin.
601 */
602 if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu) ||
603 (IS_VIRTUAL(pGpu) && gpuIsWarBug200577889SriovHeavyEnabled(pGpu)))
604 {
605 NV_STATUS status = NV_OK;
606 NV2080_CTRL_FB_GET_INFO_V2_PARAMS fbInfoParams = {0};
607 Subdevice *pSubdevice;
608
609 NV_ASSERT_OK_OR_RETURN(
610 subdeviceGetByInstance(pClient,
611 RES_GET_HANDLE(pDevice),
612 gpumgrGetSubDeviceInstanceFromGpu(pGpu),
613 &pSubdevice));
614
615 fbInfoParams.fbInfoList[0].index = NV2080_CTRL_FB_INFO_INDEX_BAR1_SIZE;
616 fbInfoParams.fbInfoList[1].index = NV2080_CTRL_FB_INFO_INDEX_BAR1_AVAIL_SIZE;
617 fbInfoParams.fbInfoList[2].index = NV2080_CTRL_FB_INFO_INDEX_BAR1_MAX_CONTIGUOUS_AVAIL_SIZE;
618 fbInfoParams.fbInfoList[3].index = NV2080_CTRL_FB_INFO_INDEX_BANK_SWIZZLE_ALIGNMENT;
619
620 fbInfoParams.fbInfoListSize = 4;
621
622 NV_RM_RPC_CONTROL(pGpu, pClient->hClient, RES_GET_HANDLE(pSubdevice),
623 NV2080_CTRL_CMD_FB_GET_INFO_V2,
624 &fbInfoParams, sizeof(fbInfoParams),
625 status);
626 if (status == NV_OK) {
627 bar1Info->bar1Size = fbInfoParams.fbInfoList[0].data;
628 bar1Info->bar1AvailSize = fbInfoParams.fbInfoList[1].data;
629 bar1Info->bar1MaxContigAvailSize = fbInfoParams.fbInfoList[2].data;
630 bar1Info->bankSwizzleAlignment = fbInfoParams.fbInfoList[3].data;
631 }
632 return status;
633 }
634
635 if (!KBUS_CPU_VISIBLE_BAR12_DISABLED(pGpu))
636 {
637 pBar1VAS = kbusGetBar1VASpace_HAL(pGpu, pKernelBus);
638 NV_ASSERT_OR_RETURN(pBar1VAS != NULL, NV_ERR_INVALID_STATE);
639 pVASHeap = vaspaceGetHeap(pBar1VAS);
640
641 NV_ASSERT_OK_OR_RETURN(kbusGetBar1VARangeForDevice(pGpu, pKernelBus, pDevice, &bar1VARange));
642 bar1Info->bar1Size = (NvU32)(rangeLength(bar1VARange) / 1024);
643 bar1Info->bankSwizzleAlignment = vaspaceGetBigPageSize(pBar1VAS);
644
645 bar1Info->bar1AvailSize = 0;
646
647 if (pVASHeap != NULL)
648 {
649 pVASHeap->eheapInfoForRange(pVASHeap, bar1VARange, NULL, &largestFreeSize, NULL, &freeSize);
650 bar1Info->bar1AvailSize = (NvU32)(freeSize / 1024);
651 bar1Info->bar1MaxContigAvailSize = (NvU32)(largestFreeSize / 1024);
652 }
653 }
654 else
655 {
656 // When coherent C2C path is enabled, BAR1 is disabled
657 bar1Info->bar1Size = 0;
658 bar1Info->bar1AvailSize = 0;
659 bar1Info->bar1MaxContigAvailSize = 0;
660 bar1Info->bankSwizzleAlignment = 0;
661 }
662 return NV_OK;
663 }
664
665 NvU32
memmgrGetReservedHeapSizeMb_GM107(OBJGPU * pGpu,MemoryManager * pMemoryManager)666 memmgrGetReservedHeapSizeMb_GM107
667 (
668 OBJGPU *pGpu,
669 MemoryManager *pMemoryManager
670 )
671 {
672 NvU32 i;
673 NvU64 rsvdSize = 0;
674
675 // Display and tally the results to make sure the numbers add up.
676 for (i = 0; i < pMemoryManager->Ram.numFBRegions; i++)
677 {
678 rsvdSize += pMemoryManager->Ram.fbRegion[i].rsvdSize;
679
680 if (pMemoryManager->Ram.fbRegion[i].rsvdSize > 0)
681 {
682 NV_PRINTF(LEVEL_INFO, "FB region #%d:rsvdSize=%d\n", i,
683 NvU64_LO32(pMemoryManager->Ram.fbRegion[i].rsvdSize));
684 }
685 }
686
687 rsvdSize = rsvdSize / (1024 * 1024); // convert byte to MB
688
689 return (NvU64_LO32(rsvdSize));
690 }
691
692 /*!
693 * @brief Set up additional RM reserved memory space for physical carveout.
694 */
695 static void
memmgrStateInitReservedMemory(OBJGPU * pGpu,MemoryManager * pMemoryManager)696 memmgrStateInitReservedMemory
697 (
698 OBJGPU *pGpu,
699 MemoryManager *pMemoryManager
700 )
701 {
702 if (IS_GSP_CLIENT(pGpu) || IS_VIRTUAL(pGpu))
703 return;
704
705 }
706
707 /*!
708 * @brief Correct RM reserved memory addresses by adding region base to them.
709 * Before this point, all reserved memory addresses started at 0.
710 */
711 static NV_STATUS
memmgrStateInitAdjustReservedMemory(OBJGPU * pGpu,MemoryManager * pMemoryManager)712 memmgrStateInitAdjustReservedMemory
713 (
714 OBJGPU *pGpu,
715 MemoryManager *pMemoryManager
716 )
717 {
718
719 if (!IS_GSP_CLIENT(pGpu))
720 {
721 NV_STATUS status;
722 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
723 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
724 NvU32 allocFlags = MEMDESC_FLAGS_NONE;
725 const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig =
726 kmemsysGetStaticConfig(pGpu, pKernelMemorySystem);
727
728 // check for allocating VPR memory
729 if (pKernelBus->bInstProtectedMem)
730 allocFlags |= MEMDESC_ALLOC_FLAGS_PROTECTED;
731
732 if ((status = memdescCreate(&pKernelBus->bar1[GPU_GFID_PF].pInstBlkMemDesc, pGpu,
733 GF100_BUS_INSTANCEBLOCK_SIZE,
734 GF100_BUS_INSTANCEBLOCK_SIZE,
735 NV_TRUE,
736 pKernelBus->InstBlkAperture,
737 pKernelBus->InstBlkAttr,
738 allocFlags)) != NV_OK)
739 {
740 return status;
741 }
742
743 if ((memdescGetAddressSpace(pKernelBus->bar1[GPU_GFID_PF].pInstBlkMemDesc) == ADDR_SYSMEM) &&
744 (gpuIsInstanceMemoryAlwaysCached(pGpu)))
745 {
746 memdescSetGpuCacheAttrib(pKernelBus->bar1[GPU_GFID_PF].pInstBlkMemDesc, NV_MEMORY_CACHED);
747 }
748
749 if ((status = memdescCreate(&pKernelBus->bar2[GPU_GFID_PF].pInstBlkMemDesc, pGpu,
750 GF100_BUS_INSTANCEBLOCK_SIZE,
751 GF100_BUS_INSTANCEBLOCK_SIZE,
752 NV_TRUE,
753 pKernelBus->InstBlkAperture,
754 pKernelBus->InstBlkAttr,
755 allocFlags)) != NV_OK)
756 {
757 return status;
758 }
759
760 if ((memdescGetAddressSpace(pKernelBus->bar2[GPU_GFID_PF].pInstBlkMemDesc) == ADDR_SYSMEM) &&
761 (gpuIsInstanceMemoryAlwaysCached(pGpu)))
762 {
763 memdescSetGpuCacheAttrib(pKernelBus->bar2[GPU_GFID_PF].pInstBlkMemDesc, NV_MEMORY_CACHED);
764 }
765
766 switch (pKernelBus->InstBlkAperture)
767 {
768 default:
769 case ADDR_FBMEM:
770 pKernelBus->bar1[GPU_GFID_PF].instBlockBase += pMemoryManager->rsvdMemoryBase;
771 memdescDescribe(pKernelBus->bar1[GPU_GFID_PF].pInstBlkMemDesc,
772 pKernelBus->InstBlkAperture,
773 pKernelBus->bar1[GPU_GFID_PF].instBlockBase,
774 GF100_BUS_INSTANCEBLOCK_SIZE);
775 pKernelBus->bar2[GPU_GFID_PF].instBlockBase += pMemoryManager->rsvdMemoryBase;
776 memdescDescribe(pKernelBus->bar2[GPU_GFID_PF].pInstBlkMemDesc,
777 pKernelBus->InstBlkAperture,
778 pKernelBus->bar2[GPU_GFID_PF].instBlockBase,
779 GF100_BUS_INSTANCEBLOCK_SIZE);
780
781 // Pre-fill cache to prevent FB read accesses if in cache only mode and not doing one time pre-fill
782 if (gpuIsCacheOnlyModeEnabled(pGpu) &&
783 !pMemorySystemConfig->bL2PreFill)
784 {
785 kmemsysPreFillCacheOnlyMemory_HAL(pGpu, pKernelMemorySystem, pKernelBus->bar1[GPU_GFID_PF].instBlockBase, GF100_BUS_INSTANCEBLOCK_SIZE);
786 kmemsysPreFillCacheOnlyMemory_HAL(pGpu, pKernelMemorySystem, pKernelBus->bar2[GPU_GFID_PF].instBlockBase, GF100_BUS_INSTANCEBLOCK_SIZE);
787 }
788 break;
789
790 case ADDR_SYSMEM:
791 memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_135,
792 (pKernelBus->bar1[GPU_GFID_PF].pInstBlkMemDesc));
793 if (status != NV_OK)
794 {
795 NV_PRINTF(LEVEL_ERROR,
796 "couldn't allocate BAR1 instblk in sysmem\n");
797 return status;
798 }
799 pKernelBus->bar1[GPU_GFID_PF].instBlockBase = memdescGetPhysAddr(pKernelBus->bar1[GPU_GFID_PF].pInstBlkMemDesc, AT_GPU, 0);
800 memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_136,
801 (pKernelBus->bar2[GPU_GFID_PF].pInstBlkMemDesc));
802 if (status != NV_OK)
803 {
804 NV_PRINTF(LEVEL_ERROR,
805 "couldn't allocate BAR2 instblk in sysmem\n");
806 return status;
807 }
808 pKernelBus->bar2[GPU_GFID_PF].instBlockBase = memdescGetPhysAddr(pKernelBus->bar2[GPU_GFID_PF].pInstBlkMemDesc, AT_GPU, 0);
809 break;
810 }
811 }
812
813 return NV_OK;
814 }
815
816 /*!
817 * @brief Checks if the reserved memory size passed fits in the bar0 window
818 *
819 * @param[in] rsvdMemorySizeBytes The value to check against the bar0 size
820 */
821 NV_STATUS
memmgrCheckReservedMemorySize_GK104(OBJGPU * pGpu,MemoryManager * pMemoryManager)822 memmgrCheckReservedMemorySize_GK104
823 (
824 OBJGPU *pGpu,
825 MemoryManager *pMemoryManager
826 )
827 {
828 NV_ASSERT_OR_RETURN(pMemoryManager->rsvdMemorySize < DRF_SIZE(NV_PRAMIN), NV_ERR_INSUFFICIENT_RESOURCES);
829
830 return NV_OK;
831 }
832
833 /*!
834 * @brief - This routine initializes the reserved video memory
835 * regions specific to GPUs using this HAL entry point; size
836 * arguments are in units of bytes.
837 *
838 * @param[in] fbSize The size of video memory
839 */
840 NV_STATUS
memmgrInitReservedMemory_GM107(OBJGPU * pGpu,MemoryManager * pMemoryManager,NvU64 fbSize)841 memmgrInitReservedMemory_GM107
842 (
843 OBJGPU *pGpu,
844 MemoryManager *pMemoryManager,
845 NvU64 fbSize
846 )
847 {
848 NvU64 tmpAddr = 0;
849 NvU32 i;
850 NvBool bRsvdRegionIsValid = NV_FALSE;
851 NvU32 rsvdRegion = 0;
852 NvU64 rsvdTopOfMem = 0;
853 NvU64 rsvdAlignment = 0;
854 NvBool bMemoryProtectionEnabled = NV_FALSE;
855
856 if (!IS_VIRTUAL(pGpu) && !IS_GSP_CLIENT(pGpu))
857 {
858 if (memmgrComputeAndSetVgaDisplayMemoryBase_GM107(pGpu, fbSize) != NV_OK)
859 {
860 NV_PRINTF(LEVEL_ERROR,
861 "failed to compute/set VGA display memory base!\n");
862 DBG_BREAKPOINT();
863 }
864 }
865
866 bMemoryProtectionEnabled = gpuIsCCFeatureEnabled(pGpu);
867
868 memmgrStateInitReservedMemory(pGpu, pMemoryManager);
869
870 // Align reserved memory to 64K granularity
871 pMemoryManager->rsvdMemorySize = NV_ALIGN_UP(pMemoryManager->rsvdMemorySize, 0x10000);
872
873 NV_PRINTF(LEVEL_INFO, "Final reserved memory size = 0x%x\n", pMemoryManager->rsvdMemorySize);
874
875 if (!IS_VIRTUAL(pGpu))
876 {
877 //
878 // Reserved memory must fit in BAR0 window - well compression backing is after this.
879 // Does not matter for GSP itself as BAR0 is not used.
880 //
881 NV_ASSERT_OR_RETURN(RMCFG_FEATURE_PLATFORM_GSP ||
882 memmgrCheckReservedMemorySize_HAL(pGpu, pMemoryManager) == NV_OK, NV_ERR_INSUFFICIENT_RESOURCES);
883 }
884
885 NV_PRINTF(LEVEL_INFO, "RESERVED Memory size: 0x%x\n", pMemoryManager->rsvdMemorySize);
886
887 // ***************************************************************
888 // Done sizing reserved memory
889 // ***************************************************************
890
891 if (pMemoryManager->Ram.numFBRegions > 0)
892 {
893 //
894 // Find the last region in memory which is not already reserved or
895 // protected. RM's reserved memory will then be carved out of it below
896 // (once the final size and address are determined).
897 // RM internal data like BAR2 page tables, BAR1/2 instance blocks, etc should
898 // always be in protected memory whenever memory protection is enabled using Hopper
899 // Confidential Compute. For uses outside Hopper Confidential Compute, RM internal
900 // data should always be in unprotected video memory.
901 //
902 for (i = 0; i < pMemoryManager->Ram.numFBRegions; i++)
903 {
904 if (pMemoryManager->Ram.fbRegion[i].bRsvdRegion ||
905 (bMemoryProtectionEnabled && !pMemoryManager->Ram.fbRegion[i].bProtected) ||
906 (!bMemoryProtectionEnabled && pMemoryManager->Ram.fbRegion[i].bProtected))
907 {
908 continue;
909 }
910
911 bRsvdRegionIsValid = NV_TRUE;
912 rsvdRegion = i;
913 }
914 }
915
916
917 // No need to create a reserved region for vGPU.
918 // For vGPU, memory required for the host will be reserved separately.
919 //
920 if (IS_VIRTUAL(pGpu))
921 {
922 bRsvdRegionIsValid = NV_FALSE;
923 }
924
925 rsvdAlignment = RM_PAGE_SIZE;
926
927 //
928 // Generate the FB physical offset of reserved mem.
929 //
930 // In L2 cache only mode, base this off the size of L2 cache
931 // If reserved memory at top of FB, base this off the size of FB
932 //
933 if (gpuIsCacheOnlyModeEnabled(pGpu) || !pMemoryManager->bReservedMemAtBottom)
934 {
935 const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig =
936 kmemsysGetStaticConfig(pGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu));
937
938 if (gpuIsCacheOnlyModeEnabled(pGpu))
939 {
940 rsvdTopOfMem = pMemorySystemConfig->l2CacheSize;
941 }
942 else
943 {
944 rsvdTopOfMem = pMemoryManager->Ram.fbAddrSpaceSizeMb << 20;
945
946 //
947 // We are assuming that subheap is at the end of guest FB. We place
948 // the guest RM reserved region at the end of the guest client owned
949 // portion of the guest FB (total guest FB minus the subheap). The
950 // guest FB is partitioned in the following way (Addresses increasing
951 // from left to right).
952 //
953 // Region 0 Region 1 Region 2
954 // [Guest client owned FB] [Guest RM reserved region] [Guest subheap]
955 //
956 // Guest heap is created only for Region 0.
957 //
958 if (IS_VIRTUAL_WITH_SRIOV(pGpu))
959 rsvdTopOfMem -= memmgrGetFbTaxSize_HAL(pGpu, pMemoryManager);
960
961 if (bRsvdRegionIsValid)
962 {
963 rsvdTopOfMem = NV_MIN(pMemoryManager->Ram.fbRegion[rsvdRegion].limit + 1, rsvdTopOfMem);
964 }
965 }
966 tmpAddr = rsvdTopOfMem - pMemoryManager->rsvdMemorySize;
967 pMemoryManager->rsvdMemoryBase = RM_ALIGN_DOWN(tmpAddr, rsvdAlignment);
968 pMemoryManager->rsvdMemorySize = NvU64_LO32(rsvdTopOfMem - pMemoryManager->rsvdMemoryBase);
969
970 // make sure we didn't just blindly truncate that...
971 NV_ASSERT(0 == NvU64_HI32(rsvdTopOfMem - pMemoryManager->rsvdMemoryBase));
972 }
973 // Reserved memory located at bottom of FB, base this at start of FB
974 else
975 {
976 tmpAddr = pMemoryManager->heapStartOffset;
977 if (bRsvdRegionIsValid)
978 {
979 tmpAddr = NV_MAX(pMemoryManager->Ram.fbRegion[rsvdRegion].base, tmpAddr);
980 }
981 pMemoryManager->rsvdMemoryBase = RM_ALIGN_UP(tmpAddr, rsvdAlignment);
982 pMemoryManager->rsvdMemorySize = RM_PAGE_ALIGN_UP(pMemoryManager->rsvdMemorySize);
983 }
984
985 if (!pGpu->getProperty(pGpu, PDB_PROP_GPU_BROKEN_FB))
986 {
987 NV_ASSERT(pMemoryManager->Ram.fbUsableMemSize >= pMemoryManager->rsvdMemorySize);
988 pMemoryManager->Ram.fbUsableMemSize -= RM_PAGE_ALIGN_UP(pMemoryManager->rsvdMemorySize);
989 }
990
991 // Now update the region table to remove rsvd memory
992 if (bRsvdRegionIsValid && pMemoryManager->rsvdMemorySize)
993 {
994 FB_REGION_DESCRIPTOR rsvdFbRegion;
995 portMemSet(&rsvdFbRegion, 0, sizeof(rsvdFbRegion));
996
997 // Add a new region that is a hole for reserved memory
998 rsvdFbRegion.bRsvdRegion = NV_TRUE;
999 rsvdFbRegion.base = pMemoryManager->rsvdMemoryBase;
1000 rsvdFbRegion.limit =
1001 pMemoryManager->rsvdMemoryBase + pMemoryManager->rsvdMemorySize - 1;
1002 rsvdFbRegion.performance = 0;
1003 rsvdFbRegion.bSupportCompressed = NV_FALSE;
1004 rsvdFbRegion.bSupportISO = NV_FALSE;
1005 rsvdFbRegion.rsvdSize = pMemoryManager->rsvdMemorySize;
1006 rsvdFbRegion.bProtected = bMemoryProtectionEnabled;
1007 rsvdFbRegion.bInternalHeap = NV_TRUE;
1008
1009 memmgrInsertFbRegion(pGpu, pMemoryManager, &rsvdFbRegion);
1010 }
1011
1012 // Add above reserved FB region base to reserved memory
1013 NV_ASSERT_OK_OR_RETURN(memmgrStateInitAdjustReservedMemory(pGpu, pMemoryManager));
1014
1015 return NV_OK;
1016 }
1017
1018 /**
1019 * @brief Compares two addresses and apertures and returns if they are equivalent.
1020 *
1021 * @param[in] target0
1022 * @param[in] address0
1023 * @param[in] target1
1024 * @param[in] address1
1025 *
1026 * @return NV_TRUE if adresses refer to same memory location, NV_FALSE otherwise
1027 */
1028 NvBool
memmgrComparePhysicalAddresses_GM107(OBJGPU * pGpu,MemoryManager * pMemoryManager,NvU32 target0,NvU64 address0,NvU32 target1,NvU64 address1)1029 memmgrComparePhysicalAddresses_GM107
1030 (
1031 OBJGPU *pGpu,
1032 MemoryManager *pMemoryManager,
1033 NvU32 target0,
1034 NvU64 address0,
1035 NvU32 target1,
1036 NvU64 address1
1037 )
1038 {
1039 KernelBif *pKernelBif = GPU_GET_KERNEL_BIF(pGpu);
1040
1041 //
1042 // Sysmem inst blocks can be flipped:
1043 // * PDB_PROP_FIFO_BUG_442481_NCOH_INST_BLOCK_DEF: ncoh -> coh
1044 // For system memory there is no harm in matching both sysmem
1045 // apertures, it is really only vital that we check between vid
1046 // and system memory. Force both to SYS NCOH if system coherent.
1047 //
1048 if (target0 == NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY)
1049 target0 = NV_MMU_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY;
1050 if (target1 == NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY)
1051 target1 = NV_MMU_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY;
1052
1053 if (target0 == NV_MMU_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY)
1054 {
1055 //
1056 // One of the addresses may not account for the DMA window while the
1057 // other does. Given the nature of the DMA window (its offset must be
1058 // outside the addressable range of the GPU or 0), there's no danger
1059 // in trying to account for it here; it can't cause any false
1060 // positives.
1061 //
1062 if (address0 < address1)
1063 address0 += pKernelBif->dmaWindowStartAddress;
1064 else if (address1 < address0)
1065 address1 += pKernelBif->dmaWindowStartAddress;
1066 }
1067
1068 return (target0 == target1) && (address0 == address1);
1069 }
1070
1071 /*!
1072 * @brief - This routine computes the location in memory to
1073 * relocate VGA display memory to; size arguments are
1074 * expected in units of bytes.
1075 *
1076 * @param[in] pGpu GPU object pointer
1077 * @param[in] pFb FB object pointer
1078 * @param[in] vbiosSpaceSize The current size of the VBIOS space
1079 * @param[in] fbSize The size of video memory
1080 */
1081 static NV_STATUS
memmgrComputeAndSetVgaDisplayMemoryBase_GM107(OBJGPU * pGpu,NvU64 fbSize)1082 memmgrComputeAndSetVgaDisplayMemoryBase_GM107
1083 (
1084 OBJGPU *pGpu,
1085 NvU64 fbSize
1086 )
1087 {
1088
1089 return NV_OK;
1090 }
1091
1092 /*!
1093 * @brief: Returns the PTE kind of block linear surfaces
1094 */
1095 NvU32
memmgrGetPteKindBl_GM107(OBJGPU * pGpu,MemoryManager * pMemoryManager)1096 memmgrGetPteKindBl_GM107
1097 (
1098 OBJGPU *pGpu,
1099 MemoryManager *pMemoryManager
1100 )
1101 {
1102 return NV_MMU_PTE_KIND_GENERIC_16BX2;
1103 }
1104
1105 /*!
1106 * @brief: Returns the PTE kind of pitch linear surfaces
1107 */
1108 NvU32
memmgrGetPteKindPitch_GM107(OBJGPU * pGpu,MemoryManager * pMemoryManager)1109 memmgrGetPteKindPitch_GM107
1110 (
1111 OBJGPU *pGpu,
1112 MemoryManager *pMemoryManager
1113 )
1114 {
1115 return NV_MMU_PTE_KIND_PITCH;
1116 }
1117
1118 #define PAGE_ALIGN_MATCH(value, pageSize) ((value & (pageSize - 1)) == 0)
1119
1120 //
1121 // Try to determine the optimial page size. See if both the aligment of the
1122 // physical address and the alignment of the allocation size fit one of the
1123 // larger page sizes.
1124 //
1125 static RM_ATTR_PAGE_SIZE
_memmgrGetOptimalSysmemPageSize(RmPhysAddr physAddr,MEMORY_DESCRIPTOR * pMemDesc,NvU64 bigPageSize,NvU64 sysmemPageSize)1126 _memmgrGetOptimalSysmemPageSize
1127 (
1128 RmPhysAddr physAddr,
1129 MEMORY_DESCRIPTOR *pMemDesc,
1130 NvU64 bigPageSize,
1131 NvU64 sysmemPageSize
1132 )
1133 {
1134 NvBool bIsContiguous = memdescGetContiguity(pMemDesc, AT_GPU);
1135
1136 //
1137 // Optimization currently only applies to contiguous memory.
1138 //
1139 if (bIsContiguous)
1140 {
1141 if (PAGE_ALIGN_MATCH(physAddr, RM_PAGE_SIZE_HUGE) &&
1142 PAGE_ALIGN_MATCH(pMemDesc->Size, RM_PAGE_SIZE_HUGE))
1143 {
1144 return RM_PAGE_SIZE_HUGE;
1145 }
1146
1147 if (PAGE_ALIGN_MATCH(physAddr, bigPageSize) &&
1148 PAGE_ALIGN_MATCH(pMemDesc->Size, bigPageSize))
1149 {
1150 return bigPageSize;
1151 }
1152 }
1153
1154 return sysmemPageSize;
1155 }
1156
1157 #undef PAGE_ALIGN_MATCH
1158
1159 //
1160 // Set the page size for the memory descriptor. The page size for a piece of memory
1161 // may be set when it is mapped or when it is bound to a display channel. Current
1162 // algorithm is simple. Default is 4KB in system memory (only choice) or large
1163 // pages in video memory if the allocation is larger than the small page size.
1164 //
1165 // Some APIs allow the page size to be specified. Allow this if the page size is unset,
1166 // other wise error check it against the existing page size.
1167 //
1168 // We depend on fbgf100.c rounding up allocations to 4KB or bigPageSize to have coherent
1169 // mapping sizes. This does not show up in pMemDesc->Size at this point, so we have
1170 // to trust that nothing is overlapping and cannot do full error checking.
1171 //
1172 // Big and huge pages are supported only in vidmem by default. In order to support
1173 // big/huge pages in sysmem as is required by ATS (on Volta) and a few arch tests on Pascal
1174 // (better TLB hit), we need to set the regkey RMSysmemPageSize equal to the page size.
1175 // See bugs 1700272 and 1622233.
1176 //
1177 // TODO: Due to the page size swizzling, allocations should not physically overlap
1178 // within their swizzle range. I am not sure the heap enforces this.
1179 //
1180 // NOTE: Scattered vidmem with big pages not checked right as it is not supported yet
1181 //
1182 NV_STATUS
memmgrSetMemDescPageSize_GM107(OBJGPU * pGpu,MemoryManager * pMemoryManager,MEMORY_DESCRIPTOR * pMemDesc,ADDRESS_TRANSLATION addressTranslation,RM_ATTR_PAGE_SIZE pageSizeAttr)1183 memmgrSetMemDescPageSize_GM107
1184 (
1185 OBJGPU *pGpu,
1186 MemoryManager *pMemoryManager,
1187 MEMORY_DESCRIPTOR *pMemDesc,
1188 ADDRESS_TRANSLATION addressTranslation,
1189 RM_ATTR_PAGE_SIZE pageSizeAttr
1190 )
1191 {
1192 NvU64 newPageSize = RM_PAGE_SIZE;
1193 KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
1194 NV_ADDRESS_SPACE addrSpace = memdescGetAddressSpace(pMemDesc);
1195 NvU64 oldPageSize;
1196
1197 // This policy is meaningless for virtual memdescs, so abort early.
1198 if (ADDR_VIRTUAL == addrSpace)
1199 {
1200 return NV_OK;
1201 }
1202
1203 if (ADDR_SYSMEM == addrSpace)
1204 {
1205 RmPhysAddr physAddr = memdescGetPte(pMemDesc, addressTranslation, 0);
1206 switch (pageSizeAttr)
1207 {
1208 case RM_ATTR_PAGE_SIZE_INVALID:
1209 NV_PRINTF(LEVEL_ERROR, "invalid page size attr\n");
1210 return NV_ERR_INVALID_ARGUMENT;
1211 case RM_ATTR_PAGE_SIZE_DEFAULT:
1212 newPageSize = _memmgrGetOptimalSysmemPageSize(physAddr,
1213 pMemDesc, kgmmuGetBigPageSize_HAL(pKernelGmmu),
1214 pMemoryManager->sysmemPageSize);
1215 break;
1216 case RM_ATTR_PAGE_SIZE_4KB:
1217 newPageSize = RM_PAGE_SIZE;
1218 break;
1219 case RM_ATTR_PAGE_SIZE_BIG:
1220 newPageSize = kgmmuGetBigPageSize_HAL(pKernelGmmu);
1221 break;
1222 case RM_ATTR_PAGE_SIZE_HUGE:
1223 NV_ASSERT_OR_RETURN(kgmmuIsHugePageSupported(pKernelGmmu),
1224 NV_ERR_NOT_SUPPORTED);
1225 // If forcing the huge page size the underlying memory must be aligned
1226 NV_ASSERT_OR_RETURN(0 == (physAddr & (RM_PAGE_SIZE_HUGE - 1)), NV_ERR_INVALID_OFFSET);
1227 newPageSize = RM_PAGE_SIZE_HUGE;
1228 break;
1229 case RM_ATTR_PAGE_SIZE_512MB:
1230 NV_ASSERT_OR_RETURN(kgmmuIsPageSize512mbSupported(pKernelGmmu),
1231 NV_ERR_NOT_SUPPORTED);
1232 // If forcing the 512MB page size the underlying memory must be aligned
1233 NV_ASSERT_OR_RETURN(0 == (physAddr & (RM_PAGE_SIZE_512M - 1)), NV_ERR_INVALID_OFFSET);
1234 newPageSize = RM_PAGE_SIZE_512M;
1235 break;
1236 }
1237 }
1238 else if (ADDR_FBMEM == addrSpace)
1239 {
1240 RmPhysAddr physAddr = memdescGetPte(pMemDesc, addressTranslation, 0);
1241 switch (pageSizeAttr)
1242 {
1243 case RM_ATTR_PAGE_SIZE_INVALID:
1244 NV_PRINTF(LEVEL_ERROR, "invalid page size attr\n");
1245 return NV_ERR_INVALID_ARGUMENT;
1246 case RM_ATTR_PAGE_SIZE_DEFAULT:
1247 {
1248 NvBool bUseDefaultHugePagesize = NV_TRUE;
1249 // WDDMV2 Windows it expect default page size to be 4K /64KB /128KB
1250 // Big enough and aligned for huge pages?
1251 if (bUseDefaultHugePagesize &&
1252 kgmmuIsHugePageSupported(pKernelGmmu) &&
1253 (pMemDesc->Size >= RM_PAGE_SIZE_HUGE) &&
1254 (0 == (physAddr & (RM_PAGE_SIZE_HUGE - 1))))
1255 {
1256 newPageSize = RM_PAGE_SIZE_HUGE;
1257 }
1258 // Big enough and aligned for big pages?
1259 else if (((pMemDesc->Size >= kgmmuGetMinBigPageSize(pKernelGmmu)) ||
1260 (memmgrIsKindCompressible_HAL(pMemoryManager, memdescGetPteKind(pMemDesc)))) &&
1261 ((physAddr & (kgmmuGetMaxBigPageSize_HAL(pKernelGmmu) - 1)) == 0))
1262 {
1263 newPageSize = kgmmuGetBigPageSize_HAL(pKernelGmmu);
1264 }
1265 break;
1266 }
1267 case RM_ATTR_PAGE_SIZE_4KB:
1268 newPageSize = RM_PAGE_SIZE;
1269 break;
1270 case RM_ATTR_PAGE_SIZE_BIG:
1271 newPageSize = kgmmuGetBigPageSize_HAL(pKernelGmmu);
1272 // If forcing the big page size the underlying memory must be aligned
1273 NV_ASSERT_OR_RETURN(0 == (physAddr & (newPageSize - 1)), NV_ERR_INVALID_OFFSET);
1274 break;
1275 case RM_ATTR_PAGE_SIZE_HUGE:
1276 NV_ASSERT_OR_RETURN(kgmmuIsHugePageSupported(pKernelGmmu),
1277 NV_ERR_NOT_SUPPORTED);
1278 // If forcing the huge page size the underlying memory must be aligned
1279 NV_ASSERT_OR_RETURN(0 == (physAddr & (RM_PAGE_SIZE_HUGE - 1)), NV_ERR_INVALID_OFFSET);
1280 newPageSize = RM_PAGE_SIZE_HUGE;
1281 break;
1282 case RM_ATTR_PAGE_SIZE_512MB:
1283 NV_ASSERT_OR_RETURN(kgmmuIsPageSize512mbSupported(pKernelGmmu),
1284 NV_ERR_NOT_SUPPORTED);
1285 // If forcing the 512MB page size the underlying memory must be aligned
1286 NV_ASSERT_OR_RETURN(0 == (physAddr & (RM_PAGE_SIZE_512M - 1)), NV_ERR_INVALID_OFFSET);
1287 newPageSize = RM_PAGE_SIZE_512M;
1288 break;
1289 }
1290 }
1291
1292 // Only update the memory descriptor if it is unset
1293 oldPageSize = memdescGetPageSize(pMemDesc, addressTranslation);
1294 if (0 == oldPageSize)
1295 {
1296 memdescSetPageSize(pMemDesc, addressTranslation, newPageSize);
1297 }
1298 else if (pageSizeAttr != RM_ATTR_PAGE_SIZE_DEFAULT)
1299 {
1300 // If this memdesc already has a page size, the override must match
1301 NV_ASSERT_OR_RETURN(oldPageSize == newPageSize, NV_ERR_INVALID_ARGUMENT);
1302 }
1303
1304 return NV_OK;
1305 }
1306
1307 /*!
1308 * @brief Calculate the Vista reserved memory requirement
1309 * per FB region for UVM to create sysmem mappings for UVM objects.
1310 *
1311 * @param[out] rsvdSlowSize generic reserved RM memory needed in slow region
1312 */
1313 void
memmgrCalcReservedFbSpaceForUVM_GM107(OBJGPU * pGpu,MemoryManager * pMemoryManager,NvU64 * rsvdSlowSize)1314 memmgrCalcReservedFbSpaceForUVM_GM107
1315 (
1316 OBJGPU *pGpu,
1317 MemoryManager *pMemoryManager,
1318 NvU64 *rsvdSlowSize
1319 )
1320 {
1321 //
1322 // For WDDM-UVM, reserve space to create identity mapping (deviceVA = devicePA). (Kepler only)
1323 //
1324 *rsvdSlowSize +=
1325 1 * 1024 * 1024; // 1MB space to map 4K pages of ~512MB sysmem = Pushbuffers(480MB) + SemaphoreVA(8KB) + PDE(512 entries * 8)
1326 }
1327
1328 /*!
1329 * @brief Calculate the reserved memory requirement for pre-allocated UserD.
1330 *
1331 * @return Size of UserD reserved memory in bytes.
1332 */
1333 NvU32
memmgrGetUserdReservedFbSpace_GM107(OBJGPU * pGpu,MemoryManager * pMemoryManager)1334 memmgrGetUserdReservedFbSpace_GM107
1335 (
1336 OBJGPU *pGpu,
1337 MemoryManager *pMemoryManager
1338 )
1339 {
1340 return 2 * 1024 * 1024;
1341 }
1342
1343 /*!
1344 * @brief - This function will return the size reserved for WDDM
1345 * S/R buffer. RM is returning a bigger size just so that, our S/R buffer
1346 * consumption will never go beyond that. There is no deterministic way to find this
1347 * value, so returning a constant here.
1348 * Note: OS doesn't really allocate any buffer for the size, we will get the real
1349 * memory only when we try to map during suspend/resume.
1350 */
1351 NvU64
memmgrGetRsvdSizeForSr_GM107(OBJGPU * pGpu,MemoryManager * pMemoryManager)1352 memmgrGetRsvdSizeForSr_GM107
1353 (
1354 OBJGPU *pGpu,
1355 MemoryManager *pMemoryManager
1356 )
1357 {
1358 if (((pMemoryManager->Ram.fbTotalMemSizeMb >> 10) > 32) || IS_GSP_CLIENT(pGpu))
1359 {
1360 //
1361 // We need to reserve more memory for S/R if
1362 // 1. FB size is > 32GB Bug Id: 2468357
1363 // 2. Or GSP is enabled Bug Id: 4312881
1364 //
1365 return 512 * 1024 * 1024;
1366 }
1367 else
1368 {
1369 return 256 * 1024 * 1024;
1370 }
1371 }
1372
1373 NvU32
memmgrGetRunlistEntriesReservedFbSpace_GM107(OBJGPU * pGpu,MemoryManager * pMemoryManager)1374 memmgrGetRunlistEntriesReservedFbSpace_GM107
1375 (
1376 OBJGPU *pGpu,
1377 MemoryManager *pMemoryManager
1378 )
1379 {
1380 // Kepler Runlist: 4096 entries * 8B/entry * 7 engines * 2 runlists/engine = 458K
1381 return (4096 * 8 * 7 * 2);
1382 }
1383
1384
1385 /*!
1386 * @brief Override Scrubber related PDB properties based on regkeys and platform configs
1387 */
1388 void
memmgrScrubRegistryOverrides_GM107(OBJGPU * pGpu,MemoryManager * pMemoryManager)1389 memmgrScrubRegistryOverrides_GM107
1390 (
1391 OBJGPU *pGpu,
1392 MemoryManager *pMemoryManager
1393 )
1394 {
1395 const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig =
1396 kmemsysGetStaticConfig(pGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu));
1397
1398 //
1399 // Disabling the SCRUB_ON_FREE property for all the platforms except Windows TCC Mode.
1400 // Disabling in Non-TCC windows because the OS manages FB
1401 // Disabling in RTLSIM and FMODEL because the feature is slower in simulation platforms
1402 // Disabling in DFPGA, since they skip the Host Load
1403 // Disabling in MODS, because scrub on free slows down the MODS run time.
1404 // Disabling for vGPU (host), since the plugin has scrubbing support
1405 // Disabling for VGPU (guest), blocked on bug #1929798
1406 // Disabling for SLI for now, until the bug # 1790190 is fixed.
1407 // Disabling for GSP-RM ucode, since scrubbing is done from CPU-side kernel RM.
1408 // Disabling when reg key override is used
1409 if ((RMCFG_FEATURE_PLATFORM_WINDOWS && !pGpu->getProperty(pGpu, PDB_PROP_GPU_IN_TCC_MODE)) ||
1410 IS_RTLSIM(pGpu) || IS_FMODEL(pGpu) || IsDFPGA(pGpu) ||
1411 (RMCFG_FEATURE_PLATFORM_MODS && !pMemorySystemConfig->bOneToOneComptagLineAllocation) ||
1412 pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_VIRTUALIZATION_MODE_HOST_VGPU) ||
1413 IS_VIRTUAL_WITHOUT_SRIOV(pGpu) ||
1414 RMCFG_FEATURE_PLATFORM_GSP ||
1415 IsSLIEnabled(pGpu))
1416 {
1417 pMemoryManager->bScrubOnFreeEnabled = NV_FALSE;
1418 }
1419
1420 if (pMemoryManager->bDisableAsyncScrubforMods)
1421 {
1422 // need to force disable the scrub on free in case the wrong set of regkeys are set
1423 pMemoryManager->bScrubOnFreeEnabled = NV_FALSE;
1424 }
1425
1426 if ((IS_VIRTUAL_WITH_SRIOV(pGpu) && gpuIsWarBug200577889SriovHeavyEnabled(pGpu)) ||
1427 pMemorySystemConfig->bOneToOneComptagLineAllocation)
1428 {
1429 pMemoryManager->bUseVasForCeMemoryOps = NV_TRUE;
1430 }
1431 }
1432
1433 /*!
1434 * @Get the top of memory in MB
1435 *
1436 * Calculate the actual physical address space size of FB, without
1437 * regard for overrides or caps.
1438 *
1439 * @returns the physical address space size of FB, which is greater
1440 * than or equal to the populated FB memory size
1441 */
1442 NvU32
memmgrGetAddrSpaceSizeMB_GM107(OBJGPU * pGpu,MemoryManager * pMemoryManager)1443 memmgrGetAddrSpaceSizeMB_GM107
1444 (
1445 OBJGPU *pGpu,
1446 MemoryManager *pMemoryManager
1447 )
1448 {
1449 NV_ASSERT(pMemoryManager->Ram.fbAddrSpaceSizeMb != 0);
1450
1451 return NvU64_LO32(pMemoryManager->Ram.fbAddrSpaceSizeMb);
1452 }
1453
1454 //
1455 // Get fb ram size (usable and mappable).
1456 //
1457 NvU32
memmgrGetUsableMemSizeMB_GM107(OBJGPU * pGpu,MemoryManager * pMemoryManager)1458 memmgrGetUsableMemSizeMB_GM107
1459 (
1460 OBJGPU *pGpu,
1461 MemoryManager *pMemoryManager
1462 )
1463 {
1464 NV_ASSERT(pMemoryManager->Ram.fbAddrSpaceSizeMb != 0);
1465
1466 // we shouldn't ever need this, but...
1467 NV_ASSERT(0 == NvU64_HI32(pMemoryManager->Ram.fbUsableMemSize >> 20));
1468 return NvU64_LO32(pMemoryManager->Ram.fbUsableMemSize >> 20);
1469 }
1470
1471 #define _MAX_COVG (100*NVOS32_ALLOC_COMPR_COVG_SCALE)
1472
1473 //
1474 // memmgrGetBankPlacementData
1475 //
1476 NV_STATUS
memmgrGetBankPlacementData_GM107(OBJGPU * pGpu,MemoryManager * pMemoryManager,NvU32 * pPlacementStrategy)1477 memmgrGetBankPlacementData_GM107
1478 (
1479 OBJGPU *pGpu,
1480 MemoryManager *pMemoryManager,
1481 NvU32 *pPlacementStrategy
1482 )
1483 {
1484 // set up bank placement arrays
1485 pPlacementStrategy[BANK_PLACEMENT_IMAGE] = ((0)
1486 | BANK_MEM_GROW_UP
1487 | MEM_GROW_UP
1488 | 0xFFFFFF00);
1489 pPlacementStrategy[BANK_PLACEMENT_DEPTH] = ((0)
1490 | BANK_MEM_GROW_DOWN
1491 | MEM_GROW_DOWN
1492 | 0xFFFFFF00);
1493 pPlacementStrategy[BANK_PLACEMENT_TEX_OVERLAY_FONT] = ((0)
1494 | BANK_MEM_GROW_DOWN
1495 | MEM_GROW_UP
1496 | 0xFFFFFF00);
1497 pPlacementStrategy[BANK_PLACEMENT_OTHER] = ((0)
1498 | BANK_MEM_GROW_DOWN
1499 | MEM_GROW_DOWN
1500 | 0xFFFFFF00);
1501 return (NV_OK);
1502 }
1503
1504 //
1505 // memmgrDirtyForPmTest
1506 // Developed soley for testing suspend/resume path. Goal here is before
1507 // resuming the GPU, we want to dirty the entire FB to verify whether
1508 // RM has saved and restored all the critical data structures and states
1509 // during suspend/resume. Called using the RMCTRL NV208F_CTRL_CMD_SUSPEND_RESUME_QUICK.
1510 // WARNING: This function uses BAR0 window (which is always physical)
1511 // to dirty the FB contents. Upon exit of this call FB ram is dirty and
1512 // cannot be used, unless all the data needed is restored during resume.
1513 //
1514 void
memmgrDirtyForPmTest_GM107(OBJGPU * pGpu,MemoryManager * pMemoryManager,NvBool partialDirty)1515 memmgrDirtyForPmTest_GM107
1516 (
1517 OBJGPU *pGpu,
1518 MemoryManager *pMemoryManager,
1519 NvBool partialDirty
1520 )
1521 {
1522 }
1523
1524 /*!
1525 * @Return an invalid placeholder FB offset. Should be 128KB aligned for Fermi chips.
1526 *
1527 * @returns offset
1528 */
1529 RmPhysAddr
memmgrGetInvalidOffset_GM107(OBJGPU * pGpu,MemoryManager * pMemoryManager)1530 memmgrGetInvalidOffset_GM107
1531 (
1532 OBJGPU *pGpu,
1533 MemoryManager *pMemoryManager
1534 )
1535 {
1536 return 0xdead000000000000ull;
1537 }
1538
1539 /*!
1540 * @brief Get blacklist page details.
1541 *
1542 * @param[in] pGpu OBJGPU
1543 * @param[in] pMemoryManager MemoryManager
1544 * @param[out] pBlAddrs BLACKLIST_ADDRESSES where count is taken
1545 * as input and the addressed and count is
1546 * returned.
1547 * @param[in/out] pCount Takes size of pBlAddrs as input and returns
1548 * the number of populated addresses in
1549 * pBlAddrs.
1550 @returns NV_STATUS
1551 *
1552 */
1553 NV_STATUS
memmgrGetBlackListPages_GM107(OBJGPU * pGpu,MemoryManager * pMemoryManager,BLACKLIST_ADDRESS * pBlAddrs,NvU32 * pCount)1554 memmgrGetBlackListPages_GM107
1555 (
1556 OBJGPU *pGpu,
1557 MemoryManager *pMemoryManager,
1558 BLACKLIST_ADDRESS *pBlAddrs,
1559 NvU32 *pCount
1560 )
1561 {
1562 RM_API *pRmApi;
1563 NV_STATUS status = NV_OK;
1564 NvU32 idx;
1565 NvU32 entryIdx = 0;
1566 NV2080_CTRL_FB_GET_OFFLINED_PAGES_PARAMS *pParams;
1567 const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig =
1568 kmemsysGetStaticConfig(pGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu));
1569
1570 if (!pGpu->getProperty(pGpu, PDB_PROP_GPU_ALLOW_PAGE_RETIREMENT) ||
1571 !gpuCheckPageRetirementSupport_HAL(pGpu))
1572 {
1573 return NV_ERR_NOT_SUPPORTED;
1574 }
1575
1576 //
1577 // Read the inforom for a list of pages to blacklist.
1578 // SLI support requires investigation to ensure
1579 // identical heaps on both devices (bug 756971).
1580 //
1581 if (IsSLIEnabled(pGpu) && !gpuIsEccPageRetirementWithSliAllowed(pGpu))
1582 {
1583 return NV_ERR_NOT_SUPPORTED;
1584 }
1585
1586 pParams = portMemAllocStackOrHeap(sizeof(*pParams));
1587 if (pParams == NULL)
1588 {
1589 return NV_ERR_NO_MEMORY;
1590 }
1591 portMemSet(pParams, 0, sizeof(*pParams));
1592
1593 pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
1594
1595 status = pRmApi->Control(pRmApi,
1596 pGpu->hInternalClient,
1597 pGpu->hInternalSubdevice,
1598 NV2080_CTRL_CMD_FB_GET_OFFLINED_PAGES,
1599 pParams,
1600 sizeof(*pParams));
1601
1602 if (status == NV_OK)
1603 {
1604 for (idx = 0; idx < pParams->validEntries; idx++)
1605 {
1606 if (entryIdx >= *pCount)
1607 {
1608 status = NV_ERR_BUFFER_TOO_SMALL;
1609 goto done;
1610 }
1611 if (pMemorySystemConfig->bEnabledEccFBPA)
1612 {
1613 pBlAddrs[entryIdx].address =
1614 pParams->offlined[idx].pageAddressWithEccOn << RM_PAGE_SHIFT;
1615 }
1616 else
1617 {
1618 pBlAddrs[entryIdx].address =
1619 pParams->offlined[idx].pageAddressWithEccOff << RM_PAGE_SHIFT;
1620 }
1621 pBlAddrs[entryIdx].type = pParams->offlined[idx].source;
1622 entryIdx++;
1623 }
1624 }
1625 else if (NV_ERR_NOT_SUPPORTED == status)
1626 {
1627 NV_PRINTF(LEVEL_INFO,
1628 "Offlining pages not supported\n");
1629 }
1630 else
1631 {
1632 NV_PRINTF(LEVEL_ERROR,
1633 "Failed to read offlined addresses\n");
1634 }
1635
1636 done:
1637 *pCount = entryIdx;
1638
1639 portMemFreeStackOrHeap(pParams);
1640
1641 return status;
1642 }
1643
1644 //
1645 // Get the blackList pages and notify heap
1646 //
1647 NV_STATUS
memmgrGetBlackListPagesForHeap_GM107(OBJGPU * pGpu,MemoryManager * pMemoryManager,Heap * pHeap)1648 memmgrGetBlackListPagesForHeap_GM107
1649 (
1650 OBJGPU *pGpu,
1651 MemoryManager *pMemoryManager,
1652 Heap *pHeap
1653 )
1654 {
1655 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
1656 BLACKLIST_ADDRESS *pBlAddrs;
1657 NvU32 idx;
1658 NV_STATUS status;
1659 NvU32 count;
1660
1661 count = kmemsysGetMaximumBlacklistPages(pGpu, pKernelMemorySystem);
1662 pBlAddrs = portMemAllocNonPaged(sizeof(BLACKLIST_ADDRESS) * count);
1663 if (pBlAddrs == NULL)
1664 {
1665 return NV_ERR_NO_MEMORY;
1666 }
1667
1668 status = memmgrGetBlackListPages_HAL(pGpu, pMemoryManager, pBlAddrs, &count);
1669 NV_ASSERT(status != NV_ERR_BUFFER_TOO_SMALL);
1670
1671 if (status == NV_OK)
1672 {
1673 for (idx = 0; idx < count; idx++)
1674 {
1675
1676 status = heapAddPageToBlackList(pGpu, pHeap,
1677 pBlAddrs[idx].address >> RM_PAGE_SHIFT,
1678 pBlAddrs[idx].type);
1679
1680 if (NV_OK != status)
1681 {
1682 NV_PRINTF(LEVEL_ERROR, "No more space in blacklist, status: %x!\n", status);
1683 NV_ASSERT(0);
1684 break;
1685 }
1686 }
1687 }
1688
1689 portMemFree(pBlAddrs);
1690
1691 // Failure to read offlined pages from host is not fatal
1692 return NV_OK;
1693 }
1694
1695 NvU32
memmgrGetFBEndReserveSizeEstimate_GM107(OBJGPU * pGpu,MemoryManager * pMemoryManager)1696 memmgrGetFBEndReserveSizeEstimate_GM107
1697 (
1698 OBJGPU *pGpu,
1699 MemoryManager *pMemoryManager
1700 )
1701 {
1702 const NvU32 ESTIMATED_RESERVE_FB = 0x200000;
1703
1704 return ESTIMATED_RESERVE_FB;
1705 }
1706
1707 /*!
1708 * @brief Calculate the reserved memory requirement
1709 * per FB region for mixed type/density
1710 *
1711 * @param[out] rsvdFastSize generic reserved RM memory needed in fast region
1712 * @param[out] rsvdSlowSize generic reserved RM memory needed in slow region
1713 * @param[out] rsvdISOSize ISO-specific reserved RM memory needed
1714 *
1715 * @returns void
1716 *
1717 */
1718 void
memmgrCalcReservedFbSpaceHal_GM107(OBJGPU * pGpu,MemoryManager * pMemoryManager,NvU64 * rsvdFastSize,NvU64 * rsvdSlowSize,NvU64 * rsvdISOSize)1719 memmgrCalcReservedFbSpaceHal_GM107
1720 (
1721 OBJGPU *pGpu,
1722 MemoryManager *pMemoryManager,
1723 NvU64 *rsvdFastSize,
1724 NvU64 *rsvdSlowSize,
1725 NvU64 *rsvdISOSize
1726 )
1727 {
1728 KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
1729 KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu);
1730 KernelGraphics *pKernelGraphics = GPU_GET_KERNEL_GRAPHICS(pGpu, 0);
1731 NvU64 smallPagePte = 0;
1732 NvU64 bigPagePte = 0;
1733 NvU32 attribBufferSize;
1734 NvU64 maxContextSize = 0;
1735 NvU64 userdReservedSize = 0;
1736 NvU64 runlistEntriesReservedSize = 0;
1737 NvU64 mmuFaultBufferSize = 0;
1738 NvU64 faultMethodBufferSize = 0;
1739 NV_STATUS status = NV_OK;
1740
1741 // Initialize reserved block logging data structure
1742 NV_FB_RSVD_BLOCK_LOG_INIT(pMemoryManager);
1743
1744 if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu) ||
1745 (IS_VIRTUAL(pGpu) && gpuIsWarBug200577889SriovHeavyEnabled(pGpu)))
1746 {
1747 //
1748 // 4MB of reserved memory for vGPU.
1749 // Mainly to satisfy KMD memory allocations.
1750 //
1751 *rsvdFastSize = 4 * 1024 * 1024;
1752 *rsvdSlowSize = 0;
1753 *rsvdISOSize = 0;
1754 return;
1755 }
1756
1757 #if defined(NV_UNIX)
1758 if (IS_VIRTUAL_WITH_SRIOV(pGpu) && pMemoryManager->Ram.fbTotalMemSizeMb <= 1024)
1759 {
1760 //
1761 // 88MB of reserved memory for vGPU guest with lower FB size(1GB)
1762 // in full SRIOV mode. On lower vGPU profiles, available FB memory
1763 // is already very less. To compensate for that, reducing the guest
1764 // reserved FB memory.
1765 //
1766 *rsvdFastSize = 88 * 1024 * 1024;
1767 *rsvdSlowSize = 0;
1768 *rsvdISOSize = 0;
1769 return;
1770 }
1771 #endif
1772
1773 {
1774 *rsvdFastSize = 0;
1775
1776 // Allow reservation up to half of usable FB size
1777 if (pMemoryManager->rsvdMemorySizeIncrement > (pMemoryManager->Ram.fbUsableMemSize / 2))
1778 {
1779 pMemoryManager->rsvdMemorySizeIncrement = pMemoryManager->Ram.fbUsableMemSize / 2;
1780 NV_PRINTF(LEVEL_ERROR,
1781 "RM can only increase reserved heap by 0x%llx bytes\n",
1782 pMemoryManager->rsvdMemorySizeIncrement);
1783 }
1784 NV_PRINTF(LEVEL_INFO, "RT::: incrementing the reserved size by: %llx\n",
1785 pMemoryManager->rsvdMemorySizeIncrement);
1786 *rsvdSlowSize = pMemoryManager->rsvdMemorySizeIncrement;
1787 *rsvdISOSize = 0;
1788 }
1789
1790 if (RMCFG_FEATURE_PLATFORM_WINDOWS && pMemoryManager->bBug2301372IncreaseRmReserveMemoryWar)
1791 {
1792 *rsvdFastSize += 30 * 1024 * 1024;
1793 }
1794
1795 attribBufferSize = memmgrGetGrHeapReservationSize_HAL(pGpu, pMemoryManager);
1796
1797 // Fast: Attribute buffer
1798 NV_FB_RSVD_BLOCK_LOG_ENTRY_ADD(status, pMemoryManager, NV_FB_ALLOC_RM_INTERNAL_OWNER_ATTR_BUFFER,
1799 attribBufferSize);
1800 *rsvdFastSize += attribBufferSize;
1801
1802 // Fast: Circular buffer & fudge
1803 NV_FB_RSVD_BLOCK_LOG_ENTRY_ADD(status, pMemoryManager, NV_FB_ALLOC_RM_INTERNAL_OWNER_CIRCULAR_BUFFER,
1804 1 *1024 *1024);
1805 *rsvdFastSize += 1 *1024 *1024;
1806
1807 if (!RMCFG_FEATURE_PLATFORM_GSP)
1808 {
1809 // smallPagePte = FBSize /4k * 8 (Small page PTE for whole FB)
1810 smallPagePte = NV_ROUNDUP((pMemoryManager->Ram.fbUsableMemSize / FERMI_SMALL_PAGESIZE) * 8, RM_PAGE_SIZE);
1811
1812 // bigPagePte = FBSize /bigPageSize * 8 (Big page PTE for whole FB)
1813 bigPagePte = NV_ROUNDUP((pMemoryManager->Ram.fbUsableMemSize / (kgmmuGetMaxBigPageSize_HAL(pKernelGmmu))) * 8,
1814 RM_PAGE_SIZE);
1815
1816 NV_FB_RSVD_BLOCK_LOG_ENTRY_ADD(status, pMemoryManager, NV_FB_ALLOC_RM_INTERNAL_OWNER_PAGE_PTE,
1817 (smallPagePte + bigPagePte));
1818 }
1819
1820 userdReservedSize = memmgrGetUserdReservedFbSpace_HAL(pGpu, pMemoryManager);
1821 NV_FB_RSVD_BLOCK_LOG_ENTRY_ADD(status, pMemoryManager, NV_FB_ALLOC_RM_INTERNAL_OWNER_USERD_BUFFER,
1822 userdReservedSize);
1823
1824 runlistEntriesReservedSize = memmgrGetRunlistEntriesReservedFbSpace_HAL(pGpu, pMemoryManager);
1825 NV_FB_RSVD_BLOCK_LOG_ENTRY_ADD(status, pMemoryManager, NV_FB_ALLOC_RM_INTERNAL_OWNER_RUNLIST_ENTRIES,
1826 runlistEntriesReservedSize);
1827
1828 maxContextSize = memmgrGetMaxContextSize_HAL(pGpu, pMemoryManager);
1829 NV_FB_RSVD_BLOCK_LOG_ENTRY_ADD(status, pMemoryManager, NV_FB_ALLOC_RM_INTERNAL_OWNER_CONTEXT_BUFFER,
1830 maxContextSize);
1831 *rsvdSlowSize +=
1832 userdReservedSize + // Kepler USERD
1833 runlistEntriesReservedSize + // Kepler Runlist entries
1834 smallPagePte + // small page Pte
1835 bigPagePte + // big page pte
1836 maxContextSize;
1837
1838 // Reserve FB for UVM on WDDM
1839 memmgrCalcReservedFbSpaceForUVM_HAL(pGpu, pMemoryManager, rsvdSlowSize);
1840
1841 // Reserve FB for MMU fault buffers
1842 mmuFaultBufferSize = kgmmuGetFaultBufferReservedFbSpaceSize(pGpu, pKernelGmmu);
1843 NV_FB_RSVD_BLOCK_LOG_ENTRY_ADD(status, pMemoryManager, NV_FB_ALLOC_RM_INTERNAL_OWNER_MMU_FAULT_BUFFER,
1844 mmuFaultBufferSize);
1845 *rsvdSlowSize += mmuFaultBufferSize;
1846
1847 // Reserve FB for Fault method buffers
1848 if (!RMCFG_FEATURE_PLATFORM_GSP)
1849 {
1850 faultMethodBufferSize = kfifoCalcTotalSizeOfFaultMethodBuffers_HAL(pGpu, pKernelFifo, NV_TRUE);
1851 NV_FB_RSVD_BLOCK_LOG_ENTRY_ADD(status, pMemoryManager, NV_FB_ALLOC_RM_INTERNAL_OWNER_FAULT_METHOD,
1852 faultMethodBufferSize);
1853 *rsvdSlowSize += faultMethodBufferSize;
1854 }
1855
1856 // The access map is fairly large (512KB) so we account for it specifically
1857 if (kgraphicsDoesUcodeSupportPrivAccessMap(pGpu, pKernelGraphics))
1858 {
1859 *rsvdSlowSize += pGpu->userRegisterAccessMapSize;
1860 NV_FB_RSVD_BLOCK_LOG_ENTRY_ADD(status, pMemoryManager, NV_FB_ALLOC_RM_INTERNAL_OWNER_ACCESS_MAP,
1861 pGpu->userRegisterAccessMapSize);
1862 }
1863
1864 if (*rsvdFastSize + *rsvdSlowSize > pMemoryManager->Ram.fbUsableMemSize / 2)
1865 {
1866 NV_PRINTF(LEVEL_ERROR,
1867 "Before capping: rsvdFastSize = 0x%llx bytes rsvdSlowSize = 0x%llx "
1868 "bytes Usable FB = 0x%llx bytes\n", *rsvdFastSize,
1869 *rsvdSlowSize, pMemoryManager->Ram.fbUsableMemSize);
1870 if (pMemoryManager->rsvdMemorySizeIncrement > 0)
1871 {
1872 NV_PRINTF(LEVEL_ERROR,
1873 "Fail the rsvd memory capping in case of user specified increase = %llx bytes\n",
1874 pMemoryManager->rsvdMemorySizeIncrement);
1875 *rsvdFastSize = 0;
1876 *rsvdSlowSize = 0;
1877 NV_ASSERT(0);
1878 return;
1879 }
1880 // Scale down fast and slow proportionally
1881 *rsvdFastSize = *rsvdFastSize * pMemoryManager->Ram.fbUsableMemSize / 2
1882 / (*rsvdFastSize + *rsvdSlowSize);
1883 *rsvdSlowSize = pMemoryManager->Ram.fbUsableMemSize / 2 - *rsvdFastSize;
1884 NV_PRINTF(LEVEL_ERROR,
1885 "After capping: rsvdFastSize = 0x%llx bytes rsvdSlowSize = 0x%llx bytes\n",
1886 *rsvdFastSize, *rsvdSlowSize);
1887 }
1888
1889 if (!pMemoryManager->bPreferSlowRegion)
1890 {
1891 *rsvdFastSize = *rsvdFastSize + *rsvdSlowSize;
1892 *rsvdSlowSize = 0;
1893 }
1894
1895 //
1896 // Memory should be blocked off with 64K granularity. This makes PMA and
1897 // and VA space management more efficient.
1898 //
1899 *rsvdFastSize = NV_ROUNDUP(*rsvdFastSize, RM_PAGE_SIZE_64K);
1900 *rsvdSlowSize = NV_ROUNDUP(*rsvdSlowSize, RM_PAGE_SIZE_64K);
1901 *rsvdISOSize = NV_ROUNDUP(*rsvdISOSize, RM_PAGE_SIZE_64K);
1902
1903 // If any of the reservation logging fails then print error message
1904 if (status != NV_OK)
1905 {
1906 NV_PRINTF(LEVEL_WARNING, "Error logging the FB reservation entries\n");
1907 }
1908 }
1909
1910 /*!
1911 * Set up RM reserved memory space
1912 */
1913 NV_STATUS
memmgrPreInitReservedMemory_GM107(OBJGPU * pGpu,MemoryManager * pMemoryManager)1914 memmgrPreInitReservedMemory_GM107
1915 (
1916 OBJGPU *pGpu,
1917 MemoryManager *pMemoryManager
1918 )
1919 {
1920 KernelDisplay *pKernelDisplay = GPU_GET_KERNEL_DISPLAY(pGpu);
1921 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
1922 NvU64 tmpAddr = 0;
1923 NV_STATUS status = NV_OK;
1924 NvU32 instBlkBarOverride = 0;
1925
1926 // ***************************************************************
1927 // Determine the size of reserved memory & tell the HW where it is
1928 // Note that the order of these matters for optimum alignment &
1929 // FB usage. The order must be highest alignment requirement to
1930 // lowest alignment requirement, with the last item being the
1931 // vbios image / workspace area
1932 // ***************************************************************
1933
1934 if (IS_GSP_CLIENT(pGpu) && pKernelDisplay != NULL)
1935 {
1936 // TODO: Determine the correct size of display instance memory
1937 // via instmemGetSize_HAL(), as well as other parameters.
1938 // I.e. refactor and leverage the code performing these tasks
1939 // in memmgrPreInitReservedMemory_GM107() today.
1940 tmpAddr += 0x10000;
1941 }
1942
1943 {
1944 instBlkBarOverride = DRF_VAL(_REG_STR_RM, _INST_LOC, _INSTBLK, pGpu->instLocOverrides);
1945 }
1946
1947 pKernelBus->InstBlkAperture = ADDR_FBMEM;
1948 pKernelBus->InstBlkAttr = NV_MEMORY_WRITECOMBINED;
1949
1950 memdescOverrideInstLoc(instBlkBarOverride, "BAR instblk",
1951 &pKernelBus->InstBlkAperture,
1952 &pKernelBus->InstBlkAttr);
1953
1954 if (pKernelBus->InstBlkAperture == ADDR_FBMEM)
1955 {
1956 // Reserve space for BAR1 and BAR2 instance blocks
1957 tmpAddr = NV_ROUNDUP(tmpAddr, GF100_BUS_INSTANCEBLOCK_SIZE);
1958 pKernelBus->bar1[GPU_GFID_PF].instBlockBase = tmpAddr;
1959 tmpAddr += GF100_BUS_INSTANCEBLOCK_SIZE;
1960
1961 tmpAddr = NV_ROUNDUP(tmpAddr, GF100_BUS_INSTANCEBLOCK_SIZE);
1962 pKernelBus->bar2[GPU_GFID_PF].instBlockBase = tmpAddr;
1963 tmpAddr += GF100_BUS_INSTANCEBLOCK_SIZE;
1964
1965 NV_PRINTF(LEVEL_INFO, "Reserve space for Bar1 inst block offset = 0x%llx size = 0x%x\n",
1966 pKernelBus->bar1[GPU_GFID_PF].instBlockBase, GF100_BUS_INSTANCEBLOCK_SIZE);
1967
1968 NV_PRINTF(LEVEL_INFO, "Reserve space for Bar2 inst block offset = 0x%llx size = 0x%x\n",
1969 pKernelBus->bar2[GPU_GFID_PF].instBlockBase, GF100_BUS_INSTANCEBLOCK_SIZE);
1970 }
1971
1972 if (gpuIsSelfHosted(pGpu) && !RMCFG_FEATURE_PLATFORM_GSP)
1973 {
1974 //
1975 // Reserve space for the test buffer used in coherent link test
1976 // that is run early when memory allocation is not ready yet.
1977 //
1978 pKernelBus->coherentLinkTestBufferBase = tmpAddr;
1979 tmpAddr += BUS_COHERENT_LINK_TEST_BUFFER_SIZE;
1980 }
1981
1982 //
1983 // This has to be the very *last* thing in reserved memory as it
1984 // will may grow past the 1MB reserved memory window. We cannot
1985 // size it until memsysStateInitLockedHal_GM107.
1986 //
1987 memmgrReserveBar2BackingStore(pGpu, pMemoryManager, &tmpAddr);
1988
1989 //
1990 // Store the size of rsvd memory excluding VBIOS space. Size finalized in memmgrStateInitReservedMemory.
1991 //
1992 NV_ASSERT(NvU64_LO32(tmpAddr) == tmpAddr);
1993 pMemoryManager->rsvdMemorySize = NvU64_LO32(tmpAddr);
1994
1995 NV_PRINTF(LEVEL_INFO, "Calculated size of reserved memory = 0x%x. Size finalized in StateInit.\n", pMemoryManager->rsvdMemorySize);
1996
1997 return status;
1998 }
1999