1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2013-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 
25 /***************************** HW State Routines ***************************\
26 *                                                                           *
27 *         GPU Virtual Address Space Function Definitions.                   *
28 *                                                                           *
29 \***************************************************************************/
30 
31 #include "gpu/mmu/kern_gmmu.h"
32 #include "mem_mgr/gpu_vaspace.h"
33 #include "mem_mgr/fabric_vaspace.h"
34 #include "gpu/mem_mgr/virt_mem_allocator_common.h"
35 #include "gpu/mem_mgr/virt_mem_allocator.h"
36 #include "os/os.h"
37 #include "containers/eheap_old.h"
38 #include "gpu/mem_mgr/mem_desc.h"
39 #include "gpu/bus/kern_bus.h"
40 #include "mmu/mmu_walk.h"
41 #include "lib/base_utils.h"
42 #include "class/cl90f1.h"  // FERMI_VASPACE_A
43 #include "ctrl/ctrl90f1.h"  // FERMI_VASPACE_A
44 #include "class/cl0040.h" // NV01_MEMORY_LOCAL_USER
45 #include "vgpu/rpc.h"
46 #include "gpu/mem_mgr/mem_mgr.h"
47 #include "gpu/mem_sys/kern_mem_sys.h"
48 #include "gpu/device/device.h"
49 #include "kernel/gpu/fifo/kernel_channel_group.h"
50 #include "kernel/gpu/nvlink/kernel_nvlink.h"
51 #include "gpu/subdevice/subdevice.h"
52 #include "core/locks.h"
53 #include "mem_mgr/pool_alloc.h"
54 #include "deprecated/rmapi_deprecated.h"
55 #include "rmapi/rs_utils.h"
56 #include "gpu/mem_mgr/vaspace_api.h"
57 
58 
59 
60 #define GMMU_PD1_VADDR_BIT_LO                        29
61 
62 static const NvU64 pageSizes[] = {
63     RM_PAGE_SIZE,
64     RM_PAGE_SIZE_64K,
65     RM_PAGE_SIZE_HUGE,
66     RM_PAGE_SIZE_512M
67 };
68 
69 static const NvU32 pageSizeCount = sizeof (pageSizes) / sizeof (*pageSizes);
70 
71 static NV_STATUS
72 _gvaspaceGpuStateConstruct
73 (
74     OBJGVASPACE    *pGVAS,
75     OBJGPU         *pGpu,
76     GVAS_GPU_STATE *pGpuState,
77     const NvU64     reqBigPageSize,
78     const NvU64     vaStart,
79     const NvU64     vaLimit,
80     const NvU64     vaStartInternal,
81     const NvU64     vaLimitInternal,
82     const NvU32     flags,
83     const NvBool    bFirst,
84     NvU64          *pFullPdeCoverage,
85     NvU32          *pPartialPdeExpMax
86 );
87 
88 static void
89 _gvaspaceGpuStateDestruct
90 (
91     OBJGVASPACE    *pGVAS,
92     OBJGPU         *pGpu,
93     GVAS_GPU_STATE *pGpuState
94 );
95 
96 static NV_STATUS
97 _gvaspaceReserveTopForGrowth
98 (
99     OBJGVASPACE    *pGVAS
100 );
101 
102 static NV_STATUS
103 _gvaspaceReserveRange
104 (
105     OBJGVASPACE *pGVAS,
106     NvU64 rangeLo,
107     NvU64 rangeHi
108 );
109 
110 static NV_STATUS
111 _gvaspacePinLazyPageTables
112 (
113     OBJGVASPACE       *pGVAS,
114     OBJGPU            *pGpu,
115     const NvU64        va
116 );
117 
118 static NV_STATUS
119 _gvaspaceFreeVASBlock
120 (
121     OBJEHEAP  *pHeap,
122     void      *pEnv,
123     PEMEMBLOCK pMemBlock,
124     NvU32     *pContinue,
125     NvU32     *pInvalCursor
126 );
127 
128 static NV_STATUS
129 _gvaspaceMappingInsert
130 (
131     OBJGVASPACE        *pGVAS,
132     OBJGPU             *pGpu,
133     GVAS_BLOCK         *pVASBlock,
134     const NvU64         vaLo,
135     const NvU64         vaHi,
136     const VAS_MAP_FLAGS flags
137 );
138 
139 static NV_STATUS
140 _gvaspaceMappingRemove
141 (
142     OBJGVASPACE       *pGVAS,
143     OBJGPU            *pGpu,
144     GVAS_BLOCK        *pVASBlock,
145     const NvU64        vaLo,
146     const NvU64        vaHi
147 );
148 
149 static void
150 _gvaspaceAddPartialPtRange
151 (
152     OBJGVASPACE       *pGVAS,
153     const NvU64        va
154 );
155 
156 static NV_STATUS
157 _gvaspaceSetExternalPageDirBase
158 (
159     OBJGVASPACE       *pGVAS,
160     OBJGPU            *pGpu,
161     MEMORY_DESCRIPTOR *pMemDesc
162 );
163 
164 static NV_STATUS
165 _gvaspaceReservePageTableEntries
166 (
167     OBJGVASPACE *pGVAS,
168     OBJGPU      *pGpu,
169     const NvU64  vaLo,
170     const NvU64  vaHi,
171     const NvU64  pageSizeMask
172 );
173 
174 static NV_STATUS
175 _gvaspaceReleasePageTableEntries
176 (
177     OBJGVASPACE *pGVAS,
178     OBJGPU      *pGpu,
179     const NvU64  vaLo,
180     const NvU64  vaHi,
181     const NvU64  pageSizeMask
182 );
183 
184 static NV_STATUS
185 _gvaspaceReleaseUnreservedPTEs
186 (
187     OBJGVASPACE *pGVAS,
188     OBJGPU      *pGpu,
189     const NvU64  vaLo,
190     const NvU64  vaHi,
191     const MMU_FMT_LEVEL *pLevelFmt
192 );
193 
194 static NV_STATUS
195 _gvaspaceCopyServerRmReservedPdesToServerRm
196 (
197     NvHandle                                              hClient,
198     NvHandle                                              hVASpace,
199     OBJGPU                                               *pGpu,
200     NV90F1_CTRL_VASPACE_COPY_SERVER_RESERVED_PDES_PARAMS *pPdeCopyParams
201 );
202 
203 static void
204 _gvaspaceForceFreePageLevelInstances
205 (
206     OBJGVASPACE    *pGVAS,
207     OBJGPU         *pGpu,
208     GVAS_GPU_STATE *pGpuState
209 );
210 
211 static NV_STATUS
212 _gvaspacePopulatePDEentries
213 (
214     OBJGVASPACE    *pGVAS,
215     OBJGPU         *pGpu,
216     NV90F1_CTRL_VASPACE_COPY_SERVER_RESERVED_PDES_PARAMS *pPdeCopyParams
217 );
218 
219 static NV_STATUS
220 _gvaspaceBar1VaSpaceConstructFW
221 (
222     OBJGVASPACE *pGVAS,
223     OBJGPU      *pGpu
224 )
225 {
226     NV_STATUS status = NV_OK;
227 
228     if (!RMCFG_FEATURE_PLATFORM_GSP)
229     {
230         return NV_OK;
231     }
232 
233     status = gvaspacePinRootPageDir(pGVAS, pGpu);
234     NV_ASSERT_OR_RETURN(NV_OK == status, status);
235 
236     return status;
237 }
238 
239 static NV_STATUS
240 _gvaspaceBar1VaSpaceConstructClient
241 (
242     OBJGVASPACE *pGVAS,
243     OBJGPU      *pGpu
244 )
245 {
246     NV_STATUS         status  = NV_OK;
247     OBJVASPACE       *pVAS    = staticCast(pGVAS, OBJVASPACE);
248     MMU_WALK_USER_CTX userCtx = {0};
249 
250     if (!RMCFG_FEATURE_PLATFORM_GSP)
251     {
252         gvaspaceWalkUserCtxAcquire(pGVAS, pGpu, NULL, &userCtx);
253         NV_ASSERT_OR_RETURN(NULL != userCtx.pGpuState, NV_ERR_INVALID_STATE);
254 
255         status = mmuWalkSparsify(userCtx.pGpuState->pWalk, vaspaceGetVaStart(pVAS),
256                                  vaspaceGetVaLimit(pVAS), NV_FALSE);
257 
258         gvaspaceWalkUserCtxRelease(pGVAS, &userCtx);
259     }
260 
261     return status;
262 }
263 
264 static NV_STATUS
265 _gvaspaceBar1VaSpaceConstruct
266 (
267     OBJGVASPACE *pGVAS,
268     OBJGPU      *pGpu
269 )
270 {
271     NV_STATUS status = NV_OK;
272 
273     status = _gvaspaceBar1VaSpaceConstructFW(pGVAS, pGpu);
274     NV_ASSERT_OR_RETURN(status == NV_OK, status);
275 
276     status = _gvaspaceBar1VaSpaceConstructClient(pGVAS, pGpu);
277     NV_ASSERT_OR_RETURN(status == NV_OK, status);
278 
279     return status;
280 }
281 
282 static NV_STATUS
283 _gvaspaceReserveVaForServerRm
284 (
285     OBJGVASPACE *pGVAS,
286     OBJGPU      *pGpu
287 )
288 {
289     NV_STATUS   status = NV_OK;
290     OBJVASPACE *pVAS   = staticCast(pGVAS, OBJVASPACE);
291 
292     // Reserve everything below vaStartServerRMOwned as non allocable by server RM.
293     if (pVAS->vasStart < pGVAS->vaStartServerRMOwned)
294     {
295         status = _gvaspaceReserveRange(pGVAS, pVAS->vasStart,
296                                        pGVAS->vaStartServerRMOwned - 1);
297         NV_ASSERT_OR_RETURN(status == NV_OK, status);
298     }
299 
300     // Reserve everything above vaLimitServerRMOwned as non allocable by server RM.
301     if (pGVAS->vaLimitServerRMOwned < pGVAS->vaLimitInternal)
302     {
303         status = _gvaspaceReserveRange(pGVAS, pGVAS->vaLimitServerRMOwned + 1,
304                                        pGVAS->vaLimitInternal);
305         NV_ASSERT_OR_RETURN(status == NV_OK, status);
306     }
307 
308     return status;
309 }
310 
311 static NV_STATUS
312 _gvaspaceReserveVaForClientRm
313 (
314     OBJGVASPACE *pGVAS,
315     OBJGPU      *pGpu
316 )
317 {
318     NV_STATUS   status = NV_OK;
319     OBJVASPACE *pVAS   = staticCast(pGVAS, OBJVASPACE);
320 
321     //
322     // Client RM needs to hold the GPU lock for any GPU it wants to RPC to.
323     // We don't actually know which locks we potentially hold here, so use
324     // SAFE_LOCK_UPGRADE.
325     //
326     GPU_MASK gpuMask = pVAS->gpuMask;
327     status = rmGpuGroupLockAcquire(0, GPU_LOCK_GRP_MASK,
328                 GPU_LOCK_FLAGS_SAFE_LOCK_UPGRADE, RM_LOCK_MODULES_MEM, &gpuMask);
329 
330     // If we get NOTHING_TO_DO, we already have the needed locks, so don't free them
331     if (status == NV_WARN_NOTHING_TO_DO)
332         gpuMask = 0;
333     else if (status != NV_OK)
334         return status;
335 
336     //
337     // Reserve everything in the range [vaStartServerRMOwned, vaLimitServerRMOwned]
338     // as non allocable by client RM. This range is reserved for server RM.
339     //
340     status = _gvaspaceReserveRange(pGVAS, pGVAS->vaStartServerRMOwned,
341                                    pGVAS->vaLimitServerRMOwned);
342     NV_ASSERT_OR_GOTO(status == NV_OK, done);
343 
344     if (pGVAS->flags & VASPACE_FLAGS_PTETABLE_PMA_MANAGED)
345     {
346         // Loop over each GPU associated with VAS.
347         FOR_EACH_GPU_IN_MASK_UC(32, pSys, pGpu, pVAS->gpuMask)
348         {
349             MemoryManager  *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
350 
351             if (pMemoryManager->pPageLevelReserve == NULL)
352             {
353                 NV_ASSERT(0);
354                 status = NV_ERR_INVALID_STATE;
355                 break;
356             }
357         }
358         FOR_EACH_GPU_IN_MASK_UC_END
359 
360         NV_ASSERT_OR_GOTO(status == NV_OK, done);
361     }
362 
363     // Loop over each GPU associated with VAS.
364     FOR_EACH_GPU_IN_MASK_UC(32, pSys, pGpu, pVAS->gpuMask)
365     {
366         MMU_WALK_USER_CTX userCtx  = {0};
367         gvaspaceWalkUserCtxAcquire(pGVAS, pGpu, NULL, &userCtx);
368 
369         if (NULL == userCtx.pGpuState)
370         {
371             status = NV_ERR_INVALID_STATE;
372             break;
373         }
374         else
375         {
376             //
377             // We're pinning only till PD1 for now to conserve memory. We don't know
378             // how much memory will be eventually consumed by leaf page tables.
379             //
380             const MMU_FMT_LEVEL *pLevelFmt =
381                    mmuFmtFindLevelWithPageShift(userCtx.pGpuState->pFmt->pRoot, GMMU_PD1_VADDR_BIT_LO);
382             status = mmuWalkReserveEntries(userCtx.pGpuState->pWalk,
383                                            pLevelFmt,
384                                            pGVAS->vaStartServerRMOwned,
385                                            pGVAS->vaLimitServerRMOwned,
386                                            NV_TRUE);
387 
388             gvaspaceWalkUserCtxRelease(pGVAS, &userCtx);
389 
390             if (status != NV_OK)
391             {
392                 break;
393             }
394         }
395 
396         status = gvaspaceCopyServerRmReservedPdesToServerRm(pGVAS, pGpu);
397         if (status != NV_OK)
398         {
399             break;
400         }
401     }
402     FOR_EACH_GPU_IN_MASK_UC_END
403 
404 done:
405     if (gpuMask != 0)
406     {
407         rmGpuGroupLockRelease(gpuMask, GPUS_LOCK_FLAGS_NONE);
408     }
409     return status;
410 }
411 
412 NV_STATUS
413 gvaspaceReserveSplitVaSpace_IMPL
414 (
415     OBJGVASPACE *pGVAS,
416     OBJGPU      *pGpu
417 )
418 {
419     NV_STATUS status    = NV_OK;
420     NvBool    bClientRm = NV_FALSE;
421     NvBool    bServerRm = NV_FALSE;
422     NvU32     gfid;
423 
424     NV_ASSERT_OK_OR_RETURN(vgpuGetCallingContextGfid(pGpu, &gfid));
425 
426     if (IS_VIRTUAL_WITH_SRIOV(pGpu) || IS_GSP_CLIENT(pGpu))
427     {
428         bClientRm = NV_TRUE;
429     }
430     else if (IS_GFID_VF(gfid))
431     {
432         bServerRm = NV_TRUE;
433     }
434 
435     if (bServerRm || bClientRm)
436     {
437         OBJVASPACE *pVAS = staticCast(pGVAS, OBJVASPACE);
438 
439         pGVAS->vaStartServerRMOwned = NV_MIN(pGVAS->vaLimitInternal -
440                                          SPLIT_VAS_SERVER_RM_MANAGED_VA_SIZE + 1,
441                                          SPLIT_VAS_SERVER_RM_MANAGED_VA_START);
442 
443         if (pVAS->vasStart > pGVAS->vaStartServerRMOwned)
444         {
445             pGVAS->vaStartServerRMOwned = pVAS->vasStart + SPLIT_VAS_SERVER_RM_MANAGED_VA_START;
446         }
447 
448         pGVAS->vaLimitServerRMOwned = pGVAS->vaStartServerRMOwned +
449                                       SPLIT_VAS_SERVER_RM_MANAGED_VA_SIZE - 1;
450 
451         // Base and limit + 1 should be aligned to 512MB.
452         if (!NV_IS_ALIGNED(pGVAS->vaStartServerRMOwned, NVBIT64(GMMU_PD1_VADDR_BIT_LO)))
453         {
454             NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT);
455         }
456 
457         if (!NV_IS_ALIGNED(pGVAS->vaLimitServerRMOwned + 1, NVBIT64(GMMU_PD1_VADDR_BIT_LO)))
458         {
459             NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT);
460         }
461 
462         // Validate limit.
463         if (pGVAS->vaLimitServerRMOwned > pGVAS->vaLimitInternal)
464         {
465             NV_PRINTF(LEVEL_ERROR, "vaLimitServerRMOwned (0x%llx)"
466                       "> vaLimitInternal (0x%llx)\n",
467                        pGVAS->vaLimitServerRMOwned, pGVAS->vaLimitInternal);
468             NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT);
469         }
470 
471         //
472         // If we are running inside server on behalf of a client, server RM can assign VA
473         // only inside the range [vaStartServerRMOwned, vaLimitServerRMOwned].
474         //
475         if (bServerRm)
476         {
477             status = _gvaspaceReserveVaForServerRm(pGVAS, pGpu);
478         }
479         else if (bClientRm)
480         {
481             status = _gvaspaceReserveVaForClientRm(pGVAS, pGpu);
482         }
483     }
484     return status;
485 }
486 
487 NV_STATUS
488 gvaspaceConstruct__IMPL
489 (
490     OBJGVASPACE *pGVAS,
491     NvU32        classId,
492     NvU32        vaspaceId,
493     NvU64        vaStart,
494     NvU64        vaLimit,
495     NvU64        vaStartInternal,
496     NvU64        vaLimitInternal,
497     NvU32        flags
498 )
499 {
500     OBJVASPACE      *pVAS  = staticCast(pGVAS, OBJVASPACE);
501     OBJGPU          *pGpu  = gpumgrGetGpu(gpumgrGetDefaultPrimaryGpu(pVAS->gpuMask));
502     NvU64            reqBigPageSize;
503     NV_STATUS        status = NV_OK;
504     GVAS_GPU_STATE  *pGpuState;
505     NvU32            highestBitIdx;
506     NvU64            fullPdeCoverage = 0;
507     NvU32            partialPdeExpMax = 0;
508     NvBool           bFirst = NV_TRUE;
509     NvBool           bCallingContextPlugin;
510 
511     NV_ASSERT_OR_RETURN(FERMI_VASPACE_A == classId, NV_ERR_INVALID_ARGUMENT);
512 
513     // Save off flags.
514     pGVAS->flags = flags;
515 
516     // Save off UVM mirroring flag.
517     if (flags & VASPACE_FLAGS_SET_MIRRORED)
518     {
519         NV_ASSERT_OR_RETURN(!(pGVAS->flags & VASPACE_FLAGS_BAR), NV_ERR_ILLEGAL_ACTION);
520         NV_ASSERT_OR_RETURN(!(pGVAS->flags & VASPACE_FLAGS_IS_EXTERNALLY_OWNED), NV_ERR_INVALID_ARGUMENT);
521         pGVAS->bIsMirrored = NV_TRUE;
522     }
523 
524     if (flags & VASPACE_FLAGS_ENABLE_FAULTING)
525     {
526         // All channels in this address space will have faulting enabled.
527        pGVAS->bIsFaultCapable = NV_TRUE;
528     }
529     if (flags & VASPACE_FLAGS_IS_EXTERNALLY_OWNED)
530     {
531         // This address space is managed by the UVM driver.
532        pGVAS->bIsExternallyOwned = NV_TRUE;
533     }
534     if (flags & VASPACE_FLAGS_ENABLE_ATS)
535     {
536         pGVAS->bIsAtsEnabled = NV_TRUE;
537         NV_PRINTF(LEVEL_INFO, "ATS Enabled VaSpace\n");
538         //
539         // Initialize with invalid PASID value for sanity checking later during
540         // PASID programming in HW.
541         // For non-MODS case, PASID is programmed via control call
542         // NV0080_CTRL_DMA_SET_PAGE_DIRECTORY
543         //
544         pGVAS->processAddrSpaceId = NV_U32_MAX;
545     }
546 
547     if (flags & VASPACE_FLAGS_FLA)
548     {
549         pGVAS->flags |= VASPACE_FLAGS_INVALIDATE_SCOPE_NVLINK_TLB;
550     }
551 
552     // Determine requested big page size based on flags.
553     switch (DRF_VAL(_VASPACE, _FLAGS, _BIG_PAGE_SIZE, flags))
554     {
555         case NV_VASPACE_FLAGS_BIG_PAGE_SIZE_64K:
556             reqBigPageSize = RM_PAGE_SIZE_64K;
557             break;
558         case NV_VASPACE_FLAGS_BIG_PAGE_SIZE_128K:
559             reqBigPageSize = RM_PAGE_SIZE_128K;
560             break;
561         case NV_VASPACE_FLAGS_BIG_PAGE_SIZE_DEFAULT:
562             reqBigPageSize = 0; // Let GMMU pick based on format.
563             break;
564         default:
565             NV_ASSERT_OR_RETURN(0, NV_ERR_NOT_SUPPORTED);
566             break;
567     }
568 
569     // Create per-GPU state array.
570     highestBitIdx = pVAS->gpuMask;
571     HIGHESTBITIDX_32(highestBitIdx);
572     pGVAS->pGpuStates = portMemAllocNonPaged(sizeof(*pGVAS->pGpuStates) * (highestBitIdx + 1));
573     NV_ASSERT_OR_RETURN(NULL != pGVAS->pGpuStates, NV_ERR_NO_MEMORY);
574     portMemSet(pGVAS->pGpuStates, 0, sizeof(*pGVAS->pGpuStates) * (highestBitIdx + 1));
575 
576     // Initialize channel group map
577     mapInit(&pGVAS->chanGrpMap, portMemAllocatorGetGlobalNonPaged());
578 
579     // Loop over each GPU associated with VAS.
580     FOR_EACH_GPU_IN_MASK_UC(32, pSys, pGpu, pVAS->gpuMask)
581     {
582         pGpuState = gvaspaceGetGpuState(pGVAS, pGpu);
583         status = _gvaspaceGpuStateConstruct(pGVAS, pGpu, pGpuState, reqBigPageSize,
584                                             vaStart, vaLimit,  vaStartInternal,
585                                             vaLimitInternal, flags,
586                                             bFirst,
587                                             &fullPdeCoverage, &partialPdeExpMax);
588         if (NV_OK != status)
589         {
590             DBG_BREAKPOINT();
591             break;
592         }
593         bFirst = NV_FALSE;
594     }
595     FOR_EACH_GPU_IN_MASK_UC_END
596     if (NV_OK != status)
597     {
598         goto catch;
599     }
600 
601     // Validate limit.
602     NV_ASSERT_OR_RETURN(pVAS->vasStart <= pVAS->vasLimit, NV_ERR_INVALID_ARGUMENT);
603     // External limit is applied to the HW, so must be at least the internal limit.
604     NV_ASSERT_OR_RETURN(pVAS->vasLimit >= pGVAS->vaLimitInternal, NV_ERR_INVALID_ARGUMENT);
605 
606     // Create virtual address heap (BC state).
607     pGVAS->pHeap = portMemAllocNonPaged(sizeof(*pGVAS->pHeap));
608     if (pGVAS->pHeap == NULL)
609     {
610         status = NV_ERR_NO_MEMORY;
611         NV_ASSERT_OR_GOTO(NULL != pGVAS->pHeap, catch);
612     }
613 
614     constructObjEHeap(pGVAS->pHeap, pVAS->vasStart, pGVAS->vaLimitMax + 1,
615                       sizeof(GVAS_BLOCK), 0);
616 
617     if (gpuIsSplitVasManagementServerClientRmEnabled(pGpu) &&
618         !(pGVAS->flags & VASPACE_FLAGS_BAR) &&
619         !(pGVAS->flags & VASPACE_FLAGS_FLA) &&
620         !(pGVAS->flags & VASPACE_FLAGS_PMU) &&
621         !(pGVAS->flags & VASPACE_FLAGS_HDA) &&
622         !(pGVAS->flags & VASPACE_FLAGS_HWPM) &&
623         !(pGVAS->flags & VASPACE_FLAGS_PERFMON) &&
624         !(pGVAS->flags & VASPACE_FLAGS_DISABLE_SPLIT_VAS))
625     {
626         NV_ASSERT_OK_OR_GOTO(status, vgpuIsCallingContextPlugin(pGpu, &bCallingContextPlugin), catch);
627         if (IS_VGPU_GSP_PLUGIN_OFFLOAD_ENABLED(pGpu) || !bCallingContextPlugin)
628         {
629             status = gvaspaceReserveSplitVaSpace(pGVAS, pGpu);
630             NV_ASSERT_OR_GOTO(NV_OK == status, catch);
631         }
632     }
633 
634     // Reserve VA block between current limit and max limit for later growth.
635     if (flags & VASPACE_FLAGS_RESTRICTED_RM_INTERNAL_VALIMITS)
636     {
637         // MAC could overcommit VA, so let the entire va range include RM internal and Client VA be available.
638         // Reserve only the varange outside the vaspace.
639         // i.e., vaStart to vaStartInternal - 1 (enforce 32 bit client VA), vaStartInternal - vaLimitInternal (RM internal VA)
640         // vaLimitInternal+1 - vasLimit (client VA)
641 
642         // By default allocations will be routed within RM internal va range.
643         pGVAS->bRMInternalRestrictedVaRange = NV_TRUE;
644 
645         if (pVAS->vasLimit != pGVAS->vaLimitMax)
646             status = _gvaspaceReserveRange(pGVAS, pVAS->vasLimit + 1, pGVAS->vaLimitMax);
647     }
648     else
649     {
650         status = _gvaspaceReserveTopForGrowth(pGVAS);
651     }
652     NV_ASSERT_OR_GOTO(NV_OK == status, catch);
653 
654     // Reserve VA holes for partial page tables if requested and supported.
655     if ((flags & VASPACE_FLAGS_MINIMIZE_PTETABLE_SIZE) && (partialPdeExpMax > 0))
656     {
657         const NvU64         partialSize        = fullPdeCoverage >> partialPdeExpMax;
658         const NvU64         pdeAlignedVasStart = NV_ALIGN_DOWN64(pVAS->vasStart, fullPdeCoverage);
659         const NvU64         pdeAlignedVasLimit = NV_ALIGN_UP64(pGVAS->vaLimitInternal + 1, fullPdeCoverage) - 1;
660         const NvU64         pdeAlignedVasSize  = pdeAlignedVasLimit - pdeAlignedVasStart + 1;
661         const NvU64         maxRangeSize       = NV_ALIGN_DOWN64(pdeAlignedVasSize / 4, fullPdeCoverage);
662         NvU32               i;
663 
664         NV_ASSERT_OR_RETURN(!(flags & VASPACE_FLAGS_RESTRICTED_RM_INTERNAL_VALIMITS), NV_ERR_ILLEGAL_ACTION);
665 
666         pGpu = gpumgrGetGpu(gpumgrGetDefaultPrimaryGpu(pVAS->gpuMask));
667 
668         //
669         // Pick a size for partial page table ranges.
670         //
671         // This optimization is required for WHQL MaxContexts on pre-Pascal.
672         // In this scenario each (minimal) context allocates at least one
673         // small page table.
674         // Each full small page table requires 256KB a piece
675         // (assuming 128KB big page size).
676         // With 100 contexts, this would require 100 * 256KB = 25MB of
677         // reserved FB memory.
678         // While system memory fallback is enabled it is not reliable.
679         // OS makes no guarantees for such large contiguous sysmem
680         // allocations.
681         //
682         // The optimization uses a heuristic based on two competing constraints:
683         // 1. Need to cover enough small allocations that page table memory is
684         //    not wasted incrementally (especially for small applications).
685         // 2. Need to leave enough contiguous VA to satisfy large requests.
686         //
687         // There are drawbacks in both directions, so we pick a simple policy.
688         // We statically partition the VA space into areas where partial
689         // page tables will be used and areas that will use full page tables.
690         // We pick the partitioning size to be the smaller of 256MB and
691         // 1/4th of the VAS heap size to satisfy the above two constraints.
692         //
693         pGVAS->partialPtVaRangeSize = NV_MIN(NVBIT64(28), maxRangeSize);
694 
695         //
696         // We also need to pick where to place the partial page table VA ranges.
697         // We use a static heuristic: initial VA allocations usually land
698         // at the beginning (normal) and end (grow down) of the VA heap.
699         // Grow down is an important case since KMD reserves push buffers and other
700         // special allocations at the end of the heap.
701         //
702         // There is also the complication that virtual address within 32-bits
703         // are optimal for some UMDs and chips - e.g. UMD can restrict and detect 32-bit
704         // addresses and compile shaders dynamically with more efficient instructions.
705         // For these configurations we also allocate partial ranges above and
706         // below the 4GB offset to catch allocations with 32-bit restricted ranges.
707         // The range just above 32-bits catches unrestricted allocations
708         // which are moved above 32-bits to stay out of the way and conserve
709         // the 32-bit space.
710         //
711         // If the application uses a large amount of the VA it will eventually use
712         // the middle of the heap, but at that point incremental page table waste
713         // is amortized (low overall overhead).
714         //
715         // An alternative approach is to pick the partial PDEs dynamically,
716         // for example the first N PDEs used.
717         // However this significantly complicates VA heap allocation,
718         // especially for grow down requests (think about it).
719         // The original RM VAS code used this approach, but it was
720         // proved to cause stuttering in allocation-heavy apps due to the
721         // complex "reject PDE" loops that were required (see Bug 1551532).
722         //
723         // Another alternative considered was to dynamically grow
724         // partial page tables - e.g. migrate from 1/8th to 1/4th as the upper VA
725         // is allocated. This would remove the need for static heuristics and
726         // place no restriction on VA heap allocation (great!), BUT:
727         //
728         // 1. WDDMv1 allows paging (mapping with CE) to take place concurrently
729         //    with respect to allocation (page table pinning),
730         //    so migration is not possible without the pager being able
731         //    to synchronize dependencies (WDDMv2). Darn.
732         // 2. Even if it were possible, if page tables were migrated through
733         //    BAR2 the read performance during the copy would be dreadful.
734         //    RM would need internal CE support (e.g. leverage ECC scrubber)
735         //    for this to be feasible.
736         //
737         // Hence, we are using these static heuristics.
738         //
739 
740         // Bottom of heap.
741         _gvaspaceAddPartialPtRange(pGVAS, pdeAlignedVasStart);
742 
743         // Handle 1GB offset. See usage of KMD MINIMUM_GPU_VIRTUAL_ADDRESS.
744         if ((pdeAlignedVasLimit + 1) > NVBIT64(30))
745         {
746             _gvaspaceAddPartialPtRange(pGVAS, NVBIT64(30));
747         }
748 
749         VirtMemAllocator *pDma = GPU_GET_DMA(pGpu);
750         // Handle 32-bit restricted pointer ranges.
751         if (((pdeAlignedVasLimit + 1) > NVBIT64(32)) &&
752             (pDma->getProperty(pDma, PDB_PROP_DMA_ENFORCE_32BIT_POINTER)))
753         {
754             // Top of 32-bit range.
755             _gvaspaceAddPartialPtRange(pGVAS,
756                 NVBIT64(32) - pGVAS->partialPtVaRangeSize);
757 
758             // Bottom of range above 32-bits.
759             _gvaspaceAddPartialPtRange(pGVAS, NVBIT64(32));
760         }
761         // Top of heap.
762         _gvaspaceAddPartialPtRange(pGVAS,
763             pdeAlignedVasLimit - pGVAS->partialPtVaRangeSize + 1);
764 
765         // Reserve the VA holes at the end of each partial PDE.
766         for (i = 0; i < pGVAS->numPartialPtRanges; ++i)
767         {
768             NvU64 off;
769             for (off = 0; off < pGVAS->partialPtVaRangeSize; off += fullPdeCoverage)
770             {
771                 EMEMBLOCK  *pBlock;
772                 NvU32       allocFlags = NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE;
773                 NvU64       allocOffset;
774                 NvU64       allocSize;
775                 const NvU64 currPdeStart = pGVAS->partialPtVaRangeBase[i] + off;
776                 const NvU64 nextPdeStart = currPdeStart + fullPdeCoverage;
777 
778                 // Clamp to VAS start and limit.
779                 allocOffset = NV_MAX(pVAS->vasStart, currPdeStart + partialSize);
780 
781                 // Only reserve the hole if the VA limit extends past the partial size.
782                 if (allocOffset <= pGVAS->vaLimitInternal)
783                 {
784                     allocSize = NV_MIN(pGVAS->vaLimitInternal + 1, nextPdeStart) - allocOffset;
785 
786                     status = pGVAS->pHeap->eheapAlloc(pGVAS->pHeap, VAS_EHEAP_OWNER_RSVD,
787                                                       &allocFlags, &allocOffset, &allocSize,
788                                                       1, 1, &pBlock, NULL, NULL);
789                     NV_ASSERT_OR_GOTO(NV_OK == status, catch);
790                 }
791             }
792         }
793     }
794 
795     // Sparsify entire VAS for BAR1
796     if (pGVAS->flags & VASPACE_FLAGS_BAR_BAR1)
797     {
798         // Loop over each GPU associated with VAS.
799         FOR_EACH_GPU_IN_MASK_UC(32, pSys, pGpu, pVAS->gpuMask)
800         {
801             status = _gvaspaceBar1VaSpaceConstruct(pGVAS, pGpu);
802             NV_ASSERT(NV_OK == status);
803         }
804         FOR_EACH_GPU_IN_MASK_UC_END
805     }
806 
807 catch:
808     if (status != NV_OK)
809     {
810         gvaspaceDestruct_IMPL(pGVAS);
811     }
812 
813     return status;
814 }
815 
816 static void
817 _gvaspaceBar1VaSpaceDestructFW
818 (
819     OBJGVASPACE *pGVAS,
820     OBJGPU      *pGpu
821 )
822 {
823     if (!RMCFG_FEATURE_PLATFORM_GSP)
824     {
825         return;
826     }
827 
828     gvaspaceUnpinRootPageDir(pGVAS, pGpu);
829 }
830 
831 static NV_STATUS
832 _gvaspaceBar1VaSpaceDestructClient
833 (
834     OBJGVASPACE *pGVAS,
835     OBJGPU      *pGpu
836 )
837 {
838     NV_STATUS         status  = NV_OK;
839     OBJVASPACE       *pVAS    = staticCast(pGVAS, OBJVASPACE);
840     MMU_WALK_USER_CTX userCtx = {0};
841 
842     if (!RMCFG_FEATURE_PLATFORM_GSP)
843     {
844 
845         gvaspaceWalkUserCtxAcquire(pGVAS, pGpu, NULL, &userCtx);
846         NV_ASSERT_OR_RETURN(NULL != userCtx.pGpuState, NV_ERR_INVALID_STATE);
847 
848         status = mmuWalkUnmap(userCtx.pGpuState->pWalk, vaspaceGetVaStart(pVAS), vaspaceGetVaLimit(pVAS));
849 
850         gvaspaceWalkUserCtxRelease(pGVAS, &userCtx);
851     }
852 
853     return status;
854 }
855 
856 static NV_STATUS
857 _gvaspaceBar1VaSpaceDestruct
858 (
859     OBJGVASPACE *pGVAS,
860     OBJGPU      *pGpu
861 )
862 {
863     NV_STATUS status = NV_OK;
864 
865     _gvaspaceBar1VaSpaceDestructFW(pGVAS, pGpu);
866 
867     status = _gvaspaceBar1VaSpaceDestructClient(pGVAS, pGpu);
868     NV_ASSERT_OR_RETURN(status == NV_OK, status);
869 
870     return status;
871 }
872 
873 static NV_STATUS
874 _gvaspaceFlaVaspaceDestruct
875 (
876     OBJGVASPACE *pGVAS,
877     OBJGPU      *pGpu
878 )
879 {
880     NV_STATUS status = NV_OK;
881     MMU_WALK_USER_CTX userCtx = {0};
882     KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
883     OBJVASPACE *pVAS = staticCast(pGVAS, OBJVASPACE);
884 
885     gvaspaceUnpinRootPageDir(pGVAS, pGpu);
886 
887     gvaspaceWalkUserCtxAcquire(pGVAS, pGpu, NULL, &userCtx);
888     NV_ASSERT_OR_RETURN(NULL != userCtx.pGpuState, NV_OK);
889 
890     status = mmuWalkUnmap(userCtx.pGpuState->pWalk, vaspaceGetVaStart(pVAS), vaspaceGetVaLimit(pVAS));
891     NV_ASSERT_OR_RETURN(NV_OK == status, status);
892 
893     gvaspaceWalkUserCtxRelease(pGVAS, &userCtx);
894 
895     NV_PRINTF(LEVEL_INFO, "Releasing legacy FLA VASPACE, gpu: %x \n",
896             pGpu->gpuInstance);
897 
898     pKernelBus->flaInfo.hFlaVASpace = NV01_NULL_OBJECT;
899     pKernelBus->flaInfo.pFlaVAS = NULL;
900 
901     return status;
902 }
903 
904 static NV_STATUS
905 _gvaspaceReleaseVaForServerRm
906 (
907     OBJGVASPACE *pGVAS,
908     OBJGPU      *pGpu
909 )
910 {
911     NV_STATUS         status   = NV_OK;
912     MMU_WALK_USER_CTX userCtx  = {0};
913     gvaspaceWalkUserCtxAcquire(pGVAS, pGpu, NULL, &userCtx);
914 
915     if (NULL == userCtx.pGpuState)
916     {
917         status = NV_ERR_INVALID_STATE;
918         NV_ASSERT(0);
919     }
920     else
921     {
922         const MMU_FMT_LEVEL *pLevelFmt =
923                mmuFmtFindLevelWithPageShift(userCtx.pGpuState->pFmt->pRoot, GMMU_PD1_VADDR_BIT_LO);
924         status = mmuWalkReleaseEntries(userCtx.pGpuState->pWalk,
925                                        pLevelFmt,
926                                        pGVAS->vaStartServerRMOwned,
927                                        pGVAS->vaLimitServerRMOwned);
928     }
929     gvaspaceWalkUserCtxRelease(pGVAS, &userCtx);
930 
931     return status;
932 }
933 
934 void
935 gvaspaceDestruct_IMPL(OBJGVASPACE *pGVAS)
936 {
937     // Destroy BC state.
938     if (NULL != pGVAS->pHeap)
939     {
940         pGVAS->pHeap->eheapTraverse(pGVAS->pHeap,
941                                     pGVAS,
942                                     _gvaspaceFreeVASBlock,
943                                     1 /*forwards*/);
944         pGVAS->pHeap->eheapDestruct(pGVAS->pHeap);
945         portMemFree(pGVAS->pHeap);
946         pGVAS->pHeap = NULL;
947     }
948 
949     // Destroy channel group map
950     if (mapCount(&pGVAS->chanGrpMap))
951     {
952         NV_ASSERT(0);
953         NV_PRINTF(LEVEL_ERROR,
954                   "GVAS is still used by some channel group(s)\n");
955     }
956     mapDestroy(&pGVAS->chanGrpMap);
957 
958     // Destroy per-GPU state.
959     if (NULL != pGVAS->pGpuStates)
960     {
961         OBJVASPACE      *pVAS = staticCast(pGVAS, OBJVASPACE);
962         OBJGPU          *pGpu = NULL;
963         GVAS_GPU_STATE  *pGpuState;
964         NV_STATUS        status;
965 
966         FOR_EACH_GPU_IN_MASK_UC(32, pSys, pGpu, pVAS->gpuMask)
967         {
968             // Unsparsify entire VAS for BAR1.
969             if (pGVAS->flags & VASPACE_FLAGS_BAR_BAR1)
970             {
971                 status = _gvaspaceBar1VaSpaceDestruct(pGVAS, pGpu);
972                 NV_ASSERT(NV_OK == status);
973             }
974 
975             if (pGVAS->flags & VASPACE_FLAGS_FLA)
976             {
977                 status = _gvaspaceFlaVaspaceDestruct(pGVAS, pGpu);
978                 NV_ASSERT(NV_OK == status);
979             }
980         }
981         FOR_EACH_GPU_IN_MASK_UC_END
982 
983         // Release the PDEs for the server owned portion of the VA range
984         FOR_EACH_GPU_IN_MASK_UC(32, pSys, pGpu, pVAS->gpuMask)
985         {
986             NvBool bClientRm = (IS_VIRTUAL_WITH_SRIOV(pGpu) || IS_GSP_CLIENT(pGpu));
987 
988             if (bClientRm && (0 != pGVAS->vaStartServerRMOwned))
989             {
990                 NV_ASSERT(NV_OK == _gvaspaceReleaseVaForServerRm(pGVAS, pGpu));
991             }
992         }
993         FOR_EACH_GPU_IN_MASK_UC_END
994 
995         FOR_EACH_GPU_IN_MASK_UC(32, pSys, pGpu, pVAS->gpuMask)
996         {
997             pGpuState = gvaspaceGetGpuState(pGVAS, pGpu);
998             _gvaspaceGpuStateDestruct(pGVAS, pGpu, pGpuState);
999         }
1000         FOR_EACH_GPU_IN_MASK_UC_END
1001 
1002         //
1003         // Release the Big Page Table (BPT) caches *only* after all page level
1004         // updates have been completed on all the GPUs in SLI. Destroying the
1005         // cache on one GPU with unreleased BPT instances on another GPU can
1006         // cause memory leaks in a SLI scenario. This is because in SLI, a GPU
1007         // can share a BPT instance from another GPU's cache.
1008         //
1009         FOR_EACH_GPU_IN_MASK_UC(32, pSys, pGpu, pVAS->gpuMask)
1010         {
1011             pGpuState = gvaspaceGetGpuState(pGVAS, pGpu);
1012             gmmuMemDescCacheFree(pGpuState);
1013         }
1014         FOR_EACH_GPU_IN_MASK_UC_END
1015 
1016         FOR_EACH_GPU_IN_MASK_UC(32, pSys, pGpu, pVAS->gpuMask)
1017         {
1018             MemoryManager   *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
1019 
1020             if (RMCFG_FEATURE_PMA &&
1021                 pMemoryManager->pPageLevelReserve != NULL)
1022             {
1023                 if (pGVAS->pPageTableMemPool != NULL)
1024                     rmMemPoolRelease(pGVAS->pPageTableMemPool, pGVAS->flags);
1025             }
1026         }
1027         FOR_EACH_GPU_IN_MASK_UC_END
1028 
1029         portMemFree(pGVAS->pGpuStates);
1030         pGVAS->pGpuStates = NULL;
1031     }
1032 }
1033 
1034 /*!
1035  * Add a region of VA reserved for partial page tables.
1036  */
1037 static void
1038 _gvaspaceAddPartialPtRange
1039 (
1040     OBJGVASPACE *pGVAS,
1041     const NvU64  va
1042 )
1043 {
1044     NV_ASSERT_OR_RETURN_VOID(pGVAS->numPartialPtRanges <
1045                           GVAS_MAX_PARTIAL_PAGE_TABLE_RANGES);
1046 
1047     // Only add the range if it is first range or above the previous range.
1048     if ((0 == pGVAS->numPartialPtRanges) ||
1049         (va >= (pGVAS->partialPtVaRangeBase[pGVAS->numPartialPtRanges - 1] +
1050                 pGVAS->partialPtVaRangeSize)))
1051     {
1052         pGVAS->partialPtVaRangeBase[pGVAS->numPartialPtRanges] = va;
1053         pGVAS->numPartialPtRanges++;
1054     }
1055 }
1056 
1057 /*!
1058  * Construct unicast GPU state associated with a VAS and reconcile
1059  * differences between GMMU settings (currently must be homogenous).
1060  */
1061 static NV_STATUS
1062 _gvaspaceGpuStateConstruct
1063 (
1064     OBJGVASPACE    *pGVAS,
1065     OBJGPU         *pGpu,
1066     GVAS_GPU_STATE *pGpuState,
1067     const NvU64     reqBigPageSize,
1068     const NvU64     vaStart,
1069     const NvU64     vaLimit,
1070     const NvU64     vaStartInternal,
1071     const NvU64     vaLimitInternal,
1072     const NvU32     flags,
1073     const NvBool    bFirst,
1074     NvU64          *pFullPdeCoverage,
1075     NvU32          *pPartialPdeExpMax
1076 )
1077 {
1078     OBJVASPACE          *pVAS  = staticCast(pGVAS, OBJVASPACE);
1079     KernelGmmu          *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
1080     NvU64                pageSizeMask;
1081     NvU64                vaStartMin;
1082     NvU64                vaLimitMax;
1083     NvU64                bigPageSize;
1084     NvU64                compPageSize;
1085     NvU64                extManagedAlign;
1086     NvU64                vaLimitExt;
1087     const GMMU_FMT      *pFmt;
1088     const MMU_FMT_LEVEL *pBigPT;
1089     MMU_WALK_FLAGS       walkFlags = {0};
1090     NvU64                fullPdeCoverage;
1091     NvU32                partialPdeExpMax = 0;
1092     NvU64                vaStartInt = 0;
1093     NvU64                vaLimitInt = 0;
1094     const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig =
1095         kmemsysGetStaticConfig(pGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu));
1096 
1097     // Must be in UC.
1098     NV_ASSERT_OR_RETURN(!gpumgrGetBcEnabledStatus(pGpu), NV_ERR_INVALID_STATE);
1099 
1100     // Get GMMU format for this GPU.
1101     pFmt = kgmmuFmtGet(pKernelGmmu, GMMU_FMT_VERSION_DEFAULT, reqBigPageSize);
1102     NV_ASSERT_OR_RETURN(NULL != pFmt, NV_ERR_NOT_SUPPORTED);
1103     pGpuState->pFmt = pFmt;
1104 
1105     // UVM mirroring works only with pre-Pascal format.
1106     if (flags & VASPACE_FLAGS_SET_MIRRORED)
1107     {
1108         NV_ASSERT_OR_RETURN(GMMU_FMT_VERSION_1 == pFmt->version, NV_ERR_ILLEGAL_ACTION);
1109     }
1110 
1111     // Determine GPU's page size settings.
1112     pageSizeMask = mmuFmtAllPageSizes(pFmt->pRoot);
1113     bigPageSize = pageSizeMask & (RM_PAGE_SIZE_64K | RM_PAGE_SIZE_128K);
1114 
1115     compPageSize = pMemorySystemConfig->comprPageSize;
1116 
1117     // Determine externally managed VA alignment from big page table coverage.
1118     pBigPT = mmuFmtFindLevelWithPageShift(pFmt->pRoot, nvLogBase2(bigPageSize));
1119     NV_ASSERT_OR_RETURN(NULL != pBigPT, NV_ERR_INVALID_ARGUMENT);
1120     extManagedAlign = NVBIT64(pBigPT->virtAddrBitHi + 1);
1121 
1122     // Determine partial page table parameters.
1123     fullPdeCoverage = mmuFmtLevelVirtAddrMask(pBigPT) + 1;
1124     if (nvFieldIsValid32(&pFmt->pPdeMulti->fldSizeRecipExp))
1125     {
1126         partialPdeExpMax = pFmt->pPdeMulti->fldSizeRecipExp.maskPos >>
1127                            pFmt->pPdeMulti->fldSizeRecipExp.shift;
1128     }
1129 
1130     // set VA start address to non-zero reserved VA space base.
1131     vaStartMin = gvaspaceGetReservedVaspaceBase(pGVAS, pGpu);
1132 
1133     vaLimitMax = NVBIT64(pFmt->pRoot->virtAddrBitHi + 1) - 1;
1134 
1135     // Calculate the desired internal and external VAS limits.
1136     if (0 == vaLimit)
1137     {
1138         // Default: allow maximum VAS limit.
1139         vaLimitExt = vaLimitMax;
1140     }
1141     else
1142     {
1143         // Otherwise ensure requested limit does not exeed max HW limit.
1144         NV_ASSERT_OR_RETURN(vaLimit <= vaLimitMax, NV_ERR_INVALID_ARGUMENT);
1145 
1146         vaLimitExt = vaLimit;
1147     }
1148 
1149     if (flags & VASPACE_FLAGS_RESTRICTED_RM_INTERNAL_VALIMITS)
1150     {
1151         NV_ASSERT_OR_RETURN(vaLimitInternal <= vaLimitMax, NV_ERR_INVALID_ARGUMENT);
1152         NV_ASSERT_OR_RETURN(vaStartInternal <= vaLimitInternal, NV_ERR_INVALID_ARGUMENT);
1153         NV_ASSERT_OR_RETURN(vaStartInternal >= vaStartMin, NV_ERR_INVALID_ARGUMENT);
1154 
1155         vaStartInt = vaStartInternal;
1156         vaLimitInt = vaLimitInternal;
1157     }
1158     else
1159     {
1160         vaStartInt = vaStart;
1161         vaLimitInt = vaLimitExt;
1162     }
1163 
1164 
1165     //
1166     // Shared management external limit is aligned to root PDE coverage.
1167     // This allows KMD/OS to hook external PDEs beneath an RM-allocated root.
1168     //
1169     if (flags & VASPACE_FLAGS_SHARED_MANAGEMENT)
1170     {
1171         vaLimitExt = NV_ALIGN_UP64(vaLimitExt + 1, mmuFmtLevelPageSize(pFmt->pRoot)) - 1;
1172     }
1173 
1174     // First GPU sets the precedent.
1175     if (bFirst)
1176     {
1177         pGVAS->bigPageSize  = bigPageSize;
1178         pGVAS->compPageSize = compPageSize;
1179         pGVAS->extManagedAlign = extManagedAlign;
1180 
1181         //
1182         // Determine VAS start and limit.
1183         // vaStart of 0 is allowed if explicitly requested (e.g. BAR1).
1184         //
1185         if ((0 == vaStart) && !(flags & VASPACE_FLAGS_ALLOW_ZERO_ADDRESS))
1186         {
1187             pVAS->vasStart = vaStartMin;
1188         }
1189         else
1190         {
1191             pVAS->vasStart = vaStart;
1192         }
1193 
1194         if (vaStartInt == 0)
1195         {
1196             vaStartInt = pVAS->vasStart;
1197         }
1198 
1199         pGVAS->vaStartInternal = vaStartInt;
1200         pGVAS->vaLimitInternal = vaLimitInt;
1201 
1202         pVAS->vasLimit         = vaLimitExt;
1203         pGVAS->vaLimitInternal = vaLimitInt;
1204         pGVAS->vaLimitMax      = vaLimitMax;
1205         *pFullPdeCoverage      = fullPdeCoverage;
1206         *pPartialPdeExpMax     = partialPdeExpMax;
1207     }
1208     // Remaining must either match or take best-fit.
1209     else
1210     {
1211         NV_ASSERT_OR_RETURN(bigPageSize == pGVAS->bigPageSize, NV_ERR_INVALID_ARGUMENT);
1212         NV_ASSERT_OR_RETURN(compPageSize == pGVAS->compPageSize, NV_ERR_INVALID_ARGUMENT);
1213         NV_ASSERT_OR_RETURN(extManagedAlign == pGVAS->extManagedAlign, NV_ERR_INVALID_ARGUMENT);
1214         if ((0 == vaStart) && !(flags & VASPACE_FLAGS_ALLOW_ZERO_ADDRESS))
1215         {
1216             pVAS->vasStart = NV_MAX(pVAS->vasStart, vaStartMin);
1217         }
1218         pVAS->vasLimit         = NV_MIN(pVAS->vasLimit,         vaLimitExt);
1219         pGVAS->vaStartInternal = NV_MAX(pGVAS->vaStartInternal, vaStartInt);
1220         pGVAS->vaLimitInternal = NV_MIN(pGVAS->vaLimitInternal, vaLimitInt);
1221         pGVAS->vaLimitMax      = NV_MIN(pGVAS->vaLimitMax,      vaLimitMax);
1222         NV_ASSERT_OR_RETURN(*pFullPdeCoverage  == fullPdeCoverage,  NV_ERR_INVALID_ARGUMENT);
1223         NV_ASSERT_OR_RETURN(*pPartialPdeExpMax == partialPdeExpMax, NV_ERR_INVALID_ARGUMENT);
1224     }
1225 
1226     //
1227     // Create MMU walker library state.
1228     // Set ats flag to enable related functionality/functionalities in MMU walker
1229     // e.g. NV4K state for 64K PTEs
1230     //
1231     walkFlags.bAtsEnabled = gvaspaceIsAtsEnabled(pGVAS);
1232     walkFlags.bUseIterative = gpuIsIterativeMmuWalkerEnabled(pGpu);
1233     NV_ASSERT_OK_OR_RETURN(
1234         mmuWalkCreate(pFmt->pRoot, NULL,
1235                       &g_gmmuWalkCallbacks,
1236                       walkFlags,
1237                       &pGpuState->pWalk,
1238                       NULL));
1239 
1240     listInit(&pGpuState->reservedPageTableEntries,
1241              portMemAllocatorGetGlobalNonPaged());
1242 
1243     listInitIntrusive(&pGpuState->unpackedMemDescList);
1244 
1245     return NV_OK;
1246 }
1247 
1248 /*!
1249  * Destruct unicast GPU state associated with a VAS.
1250  */
1251 static void
1252 _gvaspaceGpuStateDestruct
1253 (
1254     OBJGVASPACE    *pGVAS,
1255     OBJGPU         *pGpu,
1256     GVAS_GPU_STATE *pGpuState
1257 )
1258 {
1259     NV_ASSERT_OR_RETURN_VOID(!gpumgrGetBcEnabledStatus(pGpu));
1260     if (NULL != pGpuState->pRootInternal)
1261     {
1262         // Cleanup if client didn't call UnsetPageDir.
1263         NV0080_CTRL_DMA_UNSET_PAGE_DIRECTORY_PARAMS params = {0};
1264         NV_STATUS                                   status;
1265         status = gvaspaceExternalRootDirRevoke(pGVAS, pGpu, &params);
1266         NV_ASSERT(NV_OK == status);
1267     }
1268 
1269     //
1270     // Force free all page level instances. This can come in
1271     // handy on systems that support surprise removal.
1272     //
1273     _gvaspaceForceFreePageLevelInstances(pGVAS, pGpu, pGpuState);
1274 
1275     mmuWalkDestroy(pGpuState->pWalk);
1276     pGpuState->pWalk = NULL;
1277     NV_ASSERT(NULL == pGpuState->pMirroredRoot);
1278 
1279     NV_ASSERT(NULL == listHead(&pGpuState->reservedPageTableEntries));
1280     listDestroy(&pGpuState->reservedPageTableEntries);
1281 }
1282 
1283 static void
1284 _gvaspaceCleanupFlaDummyPagesForFlaRange
1285 (
1286     OBJGVASPACE    *pGVAS,
1287     OBJGPU         *pGpu,
1288     GVAS_GPU_STATE *pGpuState
1289 )
1290 {
1291     RM_API *pRmApi = rmapiGetInterface(RMAPI_API_LOCK_INTERNAL);
1292     KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
1293 
1294     if (pGpuState->flaDummyPage.hMemory != NV01_NULL_OBJECT)
1295     {
1296         portMemSet(&pGpuState->flaDummyPage.pte, 0, sizeof(pGpuState->flaDummyPage.pte));
1297         pRmApi->Free(pRmApi, pKernelBus->flaInfo.hClient, pGpuState->flaDummyPage.hMemory);
1298         pGpuState->flaDummyPage.hMemory =  NV01_NULL_OBJECT;
1299     }
1300 }
1301 
1302 static NV_STATUS
1303 _gvaspaceAllocateFlaDummyPagesForFlaRange
1304 (
1305     OBJGVASPACE    *pGVAS,
1306     OBJGPU         *pGpu,
1307     GVAS_GPU_STATE *pGpuState
1308 )
1309 {
1310     NV_STATUS                   status;
1311     KernelGmmu                 *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
1312     KernelBus                  *pKernelBus  = GPU_GET_KERNEL_BUS(pGpu);
1313     const GMMU_FMT_FAMILY      *pFam = kgmmuFmtGetFamily(pKernelGmmu, pGpuState->pFmt->version);
1314     NvU64                       addr;
1315     NvBool                      bAcquireLock = NV_FALSE;
1316     RM_API                     *pRmApi = rmapiGetInterface(RMAPI_API_LOCK_INTERNAL);
1317     NV_MEMORY_ALLOCATION_PARAMS memAllocParams;
1318     RsClient                   *pClient;
1319     Memory                     *pMemory;
1320 
1321     if (!kbusIsFlaDummyPageEnabled(pKernelBus))
1322         return NV_OK;
1323 
1324     portMemSet(&memAllocParams, 0, sizeof(memAllocParams));
1325     memAllocParams.owner     = VAS_EHEAP_OWNER_NVRM;
1326     memAllocParams.size      = RM_PAGE_SIZE_64K;
1327     memAllocParams.type      = NVOS32_TYPE_IMAGE;
1328     memAllocParams.attr      = DRF_DEF(OS32, _ATTR, _LOCATION, _VIDMEM) |
1329                                DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _BIG) |
1330                                DRF_DEF(OS32, _ATTR, _PHYSICALITY, _CONTIGUOUS);
1331     memAllocParams.flags     = NVOS32_ALLOC_FLAGS_PERSISTENT_VIDMEM |
1332                                NVOS32_ALLOC_FLAGS_ALIGNMENT_FORCE;
1333     memAllocParams.alignment = RM_PAGE_SIZE_64K;
1334 
1335     NV_ASSERT_OK_OR_GOTO(status,
1336        serverutilGenResourceHandle(pKernelBus->flaInfo.hClient, &pGpuState->flaDummyPage.hMemory),
1337        cleanup);
1338 
1339     //
1340     // Allocate memory using vidHeapControl
1341     //
1342     // vidHeapControl calls should happen outside GPU locks
1343     // This is a PMA requirement as memory allocation calls may invoke eviction
1344     // which UVM could get stuck behind GPU lock
1345     //
1346     if (rmDeviceGpuLockIsOwner(pGpu->gpuInstance) || rmGpuLockIsOwner())
1347     {
1348         rmDeviceGpuLocksRelease(pGpu, GPUS_LOCK_FLAGS_NONE, NULL);
1349         bAcquireLock = NV_TRUE;
1350         pRmApi = rmapiGetInterface(RMAPI_API_LOCK_INTERNAL);
1351     }
1352 
1353     status = pRmApi->AllocWithHandle(pRmApi, pKernelBus->flaInfo.hClient, pKernelBus->flaInfo.hSubDevice,
1354                                     pGpuState->flaDummyPage.hMemory, NV01_MEMORY_LOCAL_USER,
1355                                     &memAllocParams, sizeof(memAllocParams));
1356 
1357     if (bAcquireLock)
1358     {
1359         // Reacquire the GPU locks
1360         if (rmDeviceGpuLocksAcquire(pGpu, GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_MEM) != NV_OK)
1361         {
1362             NV_ASSERT(0);
1363             status = NV_ERR_GENERIC;
1364             goto cleanup;
1365         }
1366         bAcquireLock = NV_FALSE;
1367     }
1368 
1369     if (status != NV_OK)
1370     {
1371         NV_PRINTF(LEVEL_ERROR, "failed to allocate dummy page for FLA, status: %x\n", status);
1372         goto cleanup;
1373     }
1374 
1375     NV_ASSERT_OK_OR_GOTO(status,
1376                          serverGetClientUnderLock(&g_resServ, pKernelBus->flaInfo.hClient, &pClient),
1377                          cleanup);
1378 
1379     NV_ASSERT_OK_OR_GOTO(status,
1380                          memGetByHandle(pClient, pGpuState->flaDummyPage.hMemory, &pMemory),
1381                          cleanup);
1382 
1383     // prefill the big pte
1384     const GMMU_APERTURE pgAperture = kgmmuGetMemAperture(pKernelGmmu, pMemory->pMemDesc);
1385 
1386     nvFieldSetBool(&pFam->pte.fldValid, NV_TRUE, pGpuState->flaDummyPage.pte.v8);
1387     nvFieldSetBool(&pFam->pte.fldVolatile, memdescGetVolatility(pMemory->pMemDesc),
1388                     pGpuState->flaDummyPage.pte.v8);
1389     gmmuFieldSetAperture(&pFam->pte.fldAperture, pgAperture,
1390                             pGpuState->flaDummyPage.pte.v8);
1391 
1392     addr = kgmmuEncodePhysAddr(pKernelGmmu, pgAperture,
1393                        memdescGetPhysAddr(pMemory->pMemDesc, AT_GPU, 0),
1394                        NVLINK_INVALID_FABRIC_ADDR);
1395 
1396     gmmuFieldSetAddress(gmmuFmtPtePhysAddrFld(&pFam->pte, pgAperture), addr, pGpuState->flaDummyPage.pte.v8);
1397 
1398     return NV_OK;
1399 
1400 cleanup:
1401     _gvaspaceCleanupFlaDummyPagesForFlaRange(pGVAS, pGpu, pGpuState);
1402     return status;
1403 }
1404 
1405 NV_STATUS
1406 gvaspaceAlloc_IMPL
1407 (
1408     OBJGVASPACE     *pGVAS,
1409     NvU64            size,
1410     NvU64            align,
1411     NvU64            rangeLo,
1412     NvU64            rangeHi,
1413     NvU64            pageSizeLockMask,
1414     VAS_ALLOC_FLAGS  flags,
1415     NvU64           *pAddr
1416 )
1417 {
1418     OBJVASPACE *pVAS       = staticCast(pGVAS, OBJVASPACE);
1419     OBJGPU     *pGpu       = gpumgrGetGpu(gpumgrGetDefaultPrimaryGpu(pVAS->gpuMask));
1420     KernelBus  *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
1421     NvU32       eheapFlags = 0;
1422     NV_STATUS   status     = NV_OK;
1423     OBJEHEAP   *pHeap      = pGVAS->pHeap;
1424     EMEMBLOCK  *pMemBlock;
1425     GVAS_BLOCK *pVASBlock;
1426     NvU64       origRangeLo = pHeap->rangeLo;
1427     NvU64       origRangeHi = pHeap->rangeHi;
1428 
1429     if (pGVAS->bIsExternallyOwned)
1430     {
1431         NV_PRINTF(LEVEL_ERROR,
1432                   "Cannot reserve VA on an externally owned VASPACE\n");
1433 
1434         return NV_ERR_NOT_SUPPORTED;
1435     }
1436 
1437     //
1438     // TODO: To be removed after pKernelBus->flaInfo.pFlaVAS is removed.
1439     // In case of FLA vaspace, check that fabric vaspace is not in use.
1440     //
1441     if ((pVAS == pKernelBus->flaInfo.pFlaVAS) && (pGpu->pFabricVAS != NULL))
1442     {
1443         FABRIC_VASPACE *pFabricVAS = dynamicCast(pGpu->pFabricVAS, FABRIC_VASPACE);
1444 
1445         if (gvaspaceIsInUse(dynamicCast(pFabricVAS->pGVAS, OBJGVASPACE)))
1446         {
1447             NV_PRINTF(LEVEL_ERROR, "FabricVAS and FlaVAS cannot be used simultaneously! "
1448                       "FlaVAS Alloc failed\n");
1449             return NV_ERR_INVALID_OPERATION;
1450         }
1451     }
1452 
1453     // Clip the input range to the original range.
1454     rangeLo = NV_MAX(rangeLo, origRangeLo);
1455     rangeHi = NV_MIN(rangeHi, origRangeHi);
1456 
1457     // Check RM's internal allocation is only used.
1458     if (gvaspaceIsInternalVaRestricted(pGVAS))
1459     {
1460         if (!flags.bClientAllocation) // ignore the 32bit restriction here.
1461         {
1462             rangeLo = pGVAS->vaStartInternal;
1463             rangeHi = pGVAS->vaLimitInternal;
1464         }
1465         else
1466         {
1467             // Fixed address range Check
1468 
1469             // Does not interfere with RM internal VA range.
1470             if (flags.bFixedAddressRange &&
1471                  ((rangeLo >= pGVAS->vaStartInternal && rangeLo <= pGVAS->vaLimitInternal) || \
1472                  (rangeHi <= pGVAS->vaLimitInternal && rangeHi >= pGVAS->vaStartInternal)))
1473             {
1474                 return NV_ERR_INVALID_PARAMETER;
1475             }
1476 
1477             // Flexible address range
1478 
1479             // Place above RM va internal as much as possible
1480             if (!flags.bFixedAddressRange && !(rangeHi < pGVAS->vaStartInternal || rangeLo > pGVAS->vaLimitInternal))
1481             {
1482                 if ((rangeHi > pGVAS->vaLimitInternal) && (rangeHi - pGVAS->vaLimitInternal) >= size)
1483                 {
1484                     rangeLo = pGVAS->vaLimitInternal + 1;
1485                 }
1486                 else if (rangeLo < pGVAS->vaStartInternal && pGVAS->vaStartInternal - rangeLo  >= size)
1487                 {
1488                     rangeHi = pGVAS->vaStartInternal - 1;
1489                 }
1490                 else
1491                 {
1492                     return NV_ERR_INSUFFICIENT_RESOURCES;
1493                 }
1494                 // else do nothing as the ranges are disjoint
1495             }
1496         }
1497     }
1498 
1499     //
1500     // If this address space is marked as mirrored, then we will
1501     // cap user allocations to be under the top PDE.
1502     // If the allocations are privileged, then we will restrict the
1503     // allocations to the top PDE.
1504     //
1505     if (pGVAS->bIsMirrored)
1506     {
1507         if (flags.bPrivileged)
1508         {
1509             //
1510             // This is a kernel allocation so restrict the Allocations to
1511             // the topmost PDE.
1512             //
1513             rangeLo = NV_MAX(rangeLo, pGVAS->vaLimitInternal -
1514                                       UVM_KERNEL_PRIVILEGED_REGION_LENGTH + 1);
1515             rangeHi = NV_MIN(rangeHi, pGVAS->vaLimitInternal);
1516 
1517             // Verify the allocation range is within UVM_PRIVILEGED_REGION
1518             NV_ASSERT_OR_RETURN(rangeLo >= UVM_KERNEL_PRIVILEGED_REGION_START,
1519                               NV_ERR_OUT_OF_RANGE);
1520             NV_ASSERT_OR_RETURN(rangeHi < UVM_KERNEL_PRIVILEGED_REGION_START +
1521                                         UVM_KERNEL_PRIVILEGED_REGION_LENGTH,
1522                               NV_ERR_OUT_OF_RANGE);
1523         }
1524         else
1525         {
1526             //
1527             // This is a user space allocation. Restrict allocation from the last PDB
1528             // because that is privileged
1529             // vaRangeLo can still be based on the users override. We will return an error
1530             // if the user requested for an address in the last PDE range
1531             //
1532             rangeHi = NV_MIN(rangeHi, pGVAS->vaLimitInternal -
1533                                       UVM_KERNEL_PRIVILEGED_REGION_LENGTH);
1534 
1535             // Verify range is not in the priviledged region.
1536             NV_ASSERT_OR_RETURN(rangeHi < UVM_KERNEL_PRIVILEGED_REGION_START,
1537                               NV_ERR_OUT_OF_RANGE);
1538         }
1539     }
1540 
1541     //
1542     // Sanity check the range before applying to eheap since
1543     // eheapSetAllocRange auto-clips (silencing potential range bugs).
1544     //
1545     NV_ASSERT_OR_RETURN(origRangeLo <= rangeLo,          NV_ERR_INVALID_ARGUMENT);
1546     NV_ASSERT_OR_RETURN(rangeLo <= rangeHi,              NV_ERR_INVALID_ARGUMENT);
1547     NV_ASSERT_OR_RETURN(rangeHi <= origRangeHi,          NV_ERR_INVALID_ARGUMENT);
1548     NV_ASSERT_OR_RETURN(size <= (rangeHi - rangeLo + 1), NV_ERR_INVALID_ARGUMENT);
1549     NV_ASSERT_OK_OR_RETURN(pHeap->eheapSetAllocRange(pHeap, rangeLo, rangeHi));
1550     // !!! All return paths after this point must "goto catch" to restore. !!!
1551 
1552     // Honor reverse flag for non-BAR VA spaces.
1553     if (flags.bReverse || (pGVAS->flags & VASPACE_FLAGS_REVERSE))
1554     {
1555         eheapFlags |= NVOS32_ALLOC_FLAGS_FORCE_MEM_GROWS_DOWN;
1556     }
1557 
1558     if (flags.bFixedAddressAllocate)
1559     {
1560         eheapFlags |= NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE;
1561     }
1562 
1563     // Attempt to allocate VA space of the size and alignment requested.
1564     if (NV_OK != pHeap->eheapAlloc(pHeap, VAS_EHEAP_OWNER_NVRM, &eheapFlags,
1565                                    pAddr, &size, align, 1,
1566                                    &pMemBlock, NULL, NULL))
1567     {
1568         status = NV_ERR_NO_MEMORY;
1569         goto catch;
1570     }
1571     pVASBlock = (PGVAS_BLOCK)pMemBlock->pData;
1572 
1573     // Save flags for VA initialization
1574     pVASBlock->flags = flags;
1575     pVASBlock->pageSizeLockMask = pageSizeLockMask;
1576 
1577     if (flags.bExternallyManaged)
1578     {
1579         pVASBlock->management = VA_MANAGEMENT_PDES_ONLY;
1580     }
1581 
1582     //
1583     // VA reserved as sparse is sparsified immediately, changing its
1584     // unmapped state from "invalid" to "zero."
1585     //
1586     if (flags.bSparse || (pGVAS->flags & VASPACE_FLAGS_BAR_BAR1))
1587     {
1588         // Loop over each GPU associated with VAS.
1589         FOR_EACH_GPU_IN_MASK_UC(32, pSys, pGpu, pVAS->gpuMask)
1590         {
1591             KernelBus  *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
1592             MMU_WALK_USER_CTX userCtx = {0};
1593 
1594             // Sparsify the VA range.
1595             gvaspaceWalkUserCtxAcquire(pGVAS, pGpu, pVASBlock, &userCtx);
1596 
1597             if (NULL == userCtx.pGpuState)
1598             {
1599                 status = NV_ERR_INVALID_STATE;
1600                 NV_ASSERT(0);
1601             }
1602             else
1603             {
1604                 status = mmuWalkSparsify(userCtx.pGpuState->pWalk, *pAddr,
1605                                          *pAddr + size - 1, NV_FALSE);
1606             }
1607             gvaspaceWalkUserCtxRelease(pGVAS, &userCtx);
1608             if (NV_OK != status)
1609             {
1610                 DBG_BREAKPOINT();
1611                 break;
1612             }
1613 
1614             // Invalidate TLB to apply new sparse state.
1615             kbusFlush_HAL(pGpu, pKernelBus, BUS_FLUSH_VIDEO_MEMORY  |
1616                                             BUS_FLUSH_SYSTEM_MEMORY |
1617                                             BUS_FLUSH_USE_PCIE_READ);
1618             gvaspaceInvalidateTlb(pGVAS, pGpu, PTE_UPGRADE);
1619         }
1620         FOR_EACH_GPU_IN_MASK_UC_END
1621         if (NV_OK != status)
1622         {
1623             FOR_EACH_GPU_IN_MASK_UC(32, pSys, pGpu, pVAS->gpuMask)
1624             {
1625                 KernelBus  *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
1626                 MMU_WALK_USER_CTX userCtx = {0};
1627 
1628                 // Unsparsify the VA range.
1629                 gvaspaceWalkUserCtxAcquire(pGVAS, pGpu, pVASBlock, &userCtx);
1630                 if (NULL == userCtx.pGpuState)
1631                 {
1632                     // Intentionally not clobbering status
1633                     NV_ASSERT(0);
1634                 }
1635                 else
1636                 {
1637                     // Not checking the returns status
1638                     mmuWalkUnmap(userCtx.pGpuState->pWalk,
1639                               pMemBlock->begin, pMemBlock->end);
1640                 }
1641                 gvaspaceWalkUserCtxRelease(pGVAS, &userCtx);
1642                 // Invalidate TLB to apply new sparse state.
1643                 kbusFlush_HAL(pGpu, pKernelBus, BUS_FLUSH_VIDEO_MEMORY  |
1644                                          BUS_FLUSH_SYSTEM_MEMORY |
1645                                          BUS_FLUSH_USE_PCIE_READ);
1646                 gvaspaceInvalidateTlb(pGVAS, pGpu, PTE_UPGRADE);
1647             }
1648             FOR_EACH_GPU_IN_MASK_UC_END
1649 
1650             goto catch;
1651         }
1652     }
1653     // Pin page tables upfront for non-lazy, non-external VA reservations.
1654     else if (!(flags.bLazy || flags.bExternallyManaged) &&
1655              (0 != pVASBlock->pageSizeLockMask))
1656     {
1657         // Loop over each GPU associated with VAS.
1658         FOR_EACH_GPU_IN_MASK_UC(32, pSys, pGpu, pVAS->gpuMask)
1659         {
1660             NvU32             pageShift;
1661             MMU_WALK_USER_CTX userCtx = {0};
1662 
1663             gvaspaceWalkUserCtxAcquire(pGVAS, pGpu, pVASBlock, &userCtx);
1664 
1665             if (NULL == userCtx.pGpuState)
1666             {
1667                 status = NV_ERR_INVALID_STATE;
1668                 NV_ASSERT(0);
1669             }
1670             else
1671             {
1672                 if (pGVAS->flags & VASPACE_FLAGS_FLA)
1673                 {
1674                     // currently FLA VASpace is associated with only GPU.
1675                     NV_ASSERT(ONEBITSET(pVAS->gpuMask));
1676                     status = _gvaspaceAllocateFlaDummyPagesForFlaRange(pGVAS, pGpu, userCtx.pGpuState);
1677                 }
1678                 // Loop over each page size requested by client.
1679                 FOR_EACH_INDEX_IN_MASK(64, pageShift, pVASBlock->pageSizeLockMask)
1680                 {
1681                     // Pre-reserve page level instances in the VA range.
1682                     const MMU_FMT_LEVEL *pLevelFmt =
1683                         mmuFmtFindLevelWithPageShift(userCtx.pGpuState->pFmt->pRoot, pageShift);
1684                     status = mmuWalkReserveEntries(userCtx.pGpuState->pWalk, pLevelFmt,
1685                                                    *pAddr, *pAddr + size - 1, NV_TRUE);
1686                     if (NV_OK != status)
1687                     {
1688                         DBG_BREAKPOINT();
1689                         break;
1690                     }
1691                 }
1692                 FOR_EACH_INDEX_IN_MASK_END
1693             }
1694 
1695             gvaspaceWalkUserCtxRelease(pGVAS, &userCtx);
1696             if (NV_OK != status)
1697             {
1698                 break;
1699             }
1700         }
1701         FOR_EACH_GPU_IN_MASK_UC_END
1702         if (NV_OK != status)
1703         {
1704             // Unpin page tables for each GPU associated with VAS.
1705             FOR_EACH_GPU_IN_MASK_UC(32, pSys, pGpu, pVAS->gpuMask)
1706             {
1707                 NvU32             pageShift;
1708                 MMU_WALK_USER_CTX userCtx = {0};
1709 
1710                 gvaspaceWalkUserCtxAcquire(pGVAS, pGpu, pVASBlock, &userCtx);
1711 
1712                 if (NULL == userCtx.pGpuState)
1713                 {
1714                     // Intentionally not clobbering status
1715                     NV_ASSERT(0);
1716                 }
1717                 else
1718                 {
1719                     if (pGVAS->flags & VASPACE_FLAGS_FLA)
1720                     {
1721                         _gvaspaceCleanupFlaDummyPagesForFlaRange(pGVAS, pGpu, userCtx.pGpuState);
1722                     }
1723                     // Loop over each page size requested by client during VA reservation.
1724                     FOR_EACH_INDEX_IN_MASK(64, pageShift, pVASBlock->pageSizeLockMask)
1725                     {
1726                         // Release page level instances in the VA range.
1727                         const MMU_FMT_LEVEL *pLevelFmt =
1728                             mmuFmtFindLevelWithPageShift(userCtx.pGpuState->pFmt->pRoot, pageShift);
1729                        // Not checking the returns status
1730                        mmuWalkReleaseEntries(userCtx.pGpuState->pWalk, pLevelFmt,
1731                                              pMemBlock->begin, pMemBlock->end);
1732                     }
1733                     FOR_EACH_INDEX_IN_MASK_END
1734                 }
1735 
1736                 gvaspaceWalkUserCtxRelease(pGVAS, &userCtx);
1737             }
1738             FOR_EACH_GPU_IN_MASK_UC_END
1739             goto catch;
1740         }
1741     }
1742 
1743 catch:
1744     pHeap->eheapSetAllocRange(pHeap, origRangeLo, origRangeHi);
1745     return status;
1746 }
1747 
1748 static NV_STATUS
1749 _gvaspaceInternalFree
1750 (
1751     OBJGVASPACE  *pGVAS,
1752     NvU64         vAddr,
1753     EMEMBLOCK    *pMemBlock
1754 )
1755 {
1756     PGVAS_BLOCK   pVASBlock;
1757     GVAS_MAPPING *pMapNode;
1758     OBJVASPACE   *pVAS   = staticCast(pGVAS, OBJVASPACE);
1759     OBJGPU       *pGpu   = gpumgrGetGpu(gpumgrGetDefaultPrimaryGpu(pVAS->gpuMask));
1760     NV_STATUS     status = NV_OK;
1761 
1762     if (pGpu == NULL)
1763     {
1764         return NV_ERR_INVALID_STATE;
1765     }
1766 
1767     pVASBlock = (PGVAS_BLOCK)pMemBlock->pData;
1768 
1769     if (pMemBlock->refCount > 1)
1770     {
1771         pMemBlock->refCount--;
1772         return NV_OK;
1773     }
1774 
1775     // Before unmapping any CPU visible surfaces, make sure any CPU writes are flushed to L2.
1776     if (pGVAS->flags & VASPACE_FLAGS_BAR)
1777     {
1778         // Loop over each GPU associated with VAS.
1779         FOR_EACH_GPU_IN_MASK_UC(32, pSys, pGpu, pVAS->gpuMask)
1780         {
1781             KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
1782             kbusFlush_HAL(pGpu, pKernelBus, BUS_FLUSH_VIDEO_MEMORY  |
1783                                             BUS_FLUSH_SYSTEM_MEMORY |
1784                                             BUS_FLUSH_USE_PCIE_READ);
1785         }
1786         FOR_EACH_GPU_IN_MASK_UC_END
1787     }
1788 
1789     //
1790     // Unmap any leaked mappings.
1791     //
1792     btreeEnumStart(0, (NODE**)&pMapNode, &pVASBlock->pMapTree->node);
1793     while (NULL != pMapNode)
1794     {
1795         FOR_EACH_GPU_IN_MASK_UC(32, pSys, pGpu, pMapNode->gpuMask)
1796         {
1797             gvaspaceUnmap(pGVAS, pGpu, pMapNode->node.keyStart, pMapNode->node.keyEnd);
1798         }
1799         FOR_EACH_GPU_IN_MASK_UC_END
1800 
1801         btreeEnumStart(0, (NODE**)&pMapNode, &pVASBlock->pMapTree->node);
1802     }
1803 
1804     // Unpin page tables for each GPU associated with VAS.
1805     FOR_EACH_GPU_IN_MASK_UC(32, pSys, pGpu, pVAS->gpuMask)
1806     {
1807         GVAS_GPU_STATE *pGpuState = gvaspaceGetGpuState(pGVAS, pGpu);
1808         NvU32           pageShift;
1809 
1810         NV_ASSERT(NULL != pGpuState);
1811 
1812         if (NULL != pGpuState)
1813         {
1814             FOR_EACH_INDEX_IN_MASK(64, pageShift, pVASBlock->pageSizeLockMask)
1815             {
1816                 // Release page level instances in the VA range.
1817                 const MMU_FMT_LEVEL *pLevelFmt =
1818                     mmuFmtFindLevelWithPageShift(pGpuState->pFmt->pRoot,
1819                                                  pageShift);
1820 
1821                 status = _gvaspaceReleaseUnreservedPTEs(pGVAS, pGpu,
1822                                                         pMemBlock->begin,
1823                                                         pMemBlock->end,
1824                                                         pLevelFmt);
1825                 NV_ASSERT(NV_OK == status);
1826             }
1827             FOR_EACH_INDEX_IN_MASK_END
1828         }
1829     }
1830     FOR_EACH_GPU_IN_MASK_UC_END
1831 
1832     if (!pVASBlock->flags.bSkipTlbInvalidateOnFree)
1833     {
1834         // Invalidate TLB on each GPU associated with VAS.
1835         FOR_EACH_GPU_IN_MASK_UC(32, pSys, pGpu, pVAS->gpuMask)
1836         {
1837             KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
1838             kbusFlush_HAL(pGpu, pKernelBus, BUS_FLUSH_VIDEO_MEMORY  |
1839                                             BUS_FLUSH_SYSTEM_MEMORY |
1840                                             BUS_FLUSH_USE_PCIE_READ);
1841             gvaspaceInvalidateTlb(pGVAS, pGpu, PTE_DOWNGRADE);
1842         }
1843         FOR_EACH_GPU_IN_MASK_UC_END
1844     }
1845 
1846     pGVAS->pHeap->eheapFree(pGVAS->pHeap, pMemBlock->begin);
1847 
1848     return NV_OK;
1849 }
1850 
1851 NV_STATUS
1852 gvaspaceFree_IMPL
1853 (
1854     OBJGVASPACE  *pGVAS,
1855     NvU64         vAddr
1856 )
1857 {
1858     EMEMBLOCK *pMemBlock = pGVAS->pHeap->eheapGetBlock(pGVAS->pHeap, vAddr, 0);
1859     NV_ASSERT_OR_RETURN(NULL != pMemBlock, NV_ERR_INVALID_ARGUMENT);
1860 
1861     return _gvaspaceInternalFree(pGVAS, vAddr, pMemBlock);
1862 }
1863 
1864 NV_STATUS
1865 gvaspaceApplyDefaultAlignment_IMPL
1866 (
1867     OBJGVASPACE         *pGVAS,
1868     const FB_ALLOC_INFO *pAllocInfo,
1869     NvU64               *pAlign,
1870     NvU64               *pSize,
1871     NvU64               *pPageSizeLockMask
1872 )
1873 {
1874     OBJVASPACE *pVAS = staticCast(pGVAS, OBJVASPACE);
1875     OBJGPU     *pGpu = gpumgrGetGpu(gpumgrGetDefaultPrimaryGpu(pVAS->gpuMask));
1876     NvU64       bigPageSize  = pGVAS->bigPageSize;
1877     NvU64       maxPageSize  = RM_PAGE_SIZE;
1878     NvU64       compPageSize = pGVAS->compPageSize;
1879     NvU64       pageSizeMask = 0;
1880 
1881     //
1882     // In L2 cache only mode, force the page size to 4K in order to
1883     // conserve memory, otherwise we end up wasting a lot of memory
1884     // aligning allocations to the big page size
1885     //
1886     if (gpuIsCacheOnlyModeEnabled(pGpu))
1887     {
1888         NV_PRINTF(LEVEL_ERROR,
1889                   "Overriding page size to 4k in Cache only Mode\n");
1890         pageSizeMask |= RM_PAGE_SIZE;
1891     }
1892     else
1893     {
1894         KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
1895 
1896         // Determine the page size to align to based on user hint.
1897         switch (dmaNvos32ToPageSizeAttr(pAllocInfo->pageFormat->attr, pAllocInfo->pageFormat->attr2))
1898         {
1899             case RM_ATTR_PAGE_SIZE_4KB:
1900                 pageSizeMask |= RM_PAGE_SIZE;
1901                 break;
1902             case RM_ATTR_PAGE_SIZE_DEFAULT:
1903                 pageSizeMask |= RM_PAGE_SIZE;
1904                 pageSizeMask |= bigPageSize;
1905                 maxPageSize   = bigPageSize;
1906 
1907                 if (FLD_TEST_DRF(OS32, _ATTR, _PAGE_SIZE, _HUGE, pAllocInfo->retAttr))
1908                 {
1909                     NV_ASSERT_OR_RETURN(kgmmuIsHugePageSupported(pKernelGmmu),
1910                                         NV_ERR_NOT_SUPPORTED);
1911                     pageSizeMask |= RM_PAGE_SIZE_HUGE;
1912                     maxPageSize   = RM_PAGE_SIZE_HUGE;
1913                 }
1914                 break;
1915             case RM_ATTR_PAGE_SIZE_BIG:
1916                 pageSizeMask |= bigPageSize;
1917                 maxPageSize   = bigPageSize;
1918                 break;
1919             case RM_ATTR_PAGE_SIZE_HUGE:
1920                 NV_ASSERT_OR_RETURN(kgmmuIsHugePageSupported(pKernelGmmu),
1921                                     NV_ERR_NOT_SUPPORTED);
1922                 pageSizeMask |= RM_PAGE_SIZE_HUGE;
1923                 maxPageSize   = RM_PAGE_SIZE_HUGE;
1924                 break;
1925             case RM_ATTR_PAGE_SIZE_512MB:
1926                 NV_ASSERT_OR_RETURN(kgmmuIsPageSize512mbSupported(pKernelGmmu),
1927                                   NV_ERR_NOT_SUPPORTED);
1928                 pageSizeMask |= RM_PAGE_SIZE_512M;
1929                 maxPageSize   = RM_PAGE_SIZE_512M;
1930                 break;
1931             case RM_ATTR_PAGE_SIZE_INVALID:
1932                 NV_PRINTF(LEVEL_ERROR, "Invalid page size attr\n");
1933                 return NV_ERR_INVALID_ARGUMENT;
1934         }
1935     }
1936 
1937     // Save page sizes that will have page tables pinned (either upfront or lazily).
1938     *pPageSizeLockMask |= pageSizeMask;
1939 
1940     // Size must be aligned to maximum potential map page size.
1941     *pSize = RM_ALIGN_UP(*pSize, maxPageSize);
1942 
1943     //
1944     // Offset must be aligned to maximum potential map page size and
1945     // compression page size.
1946     //
1947     // However, the client may force alignment if it is known the VA range will
1948     // not be mapped to compressed physical memory.
1949     // The forced alignment better be aligned to the mapping page size,
1950     // but this isn't enforced until map time.
1951     //
1952     if (!(pAllocInfo->pageFormat->flags & NVOS32_ALLOC_FLAGS_ALIGNMENT_FORCE))
1953     {
1954         *pAlign = NV_MAX(*pAlign, NV_MAX(maxPageSize, compPageSize));
1955     }
1956 
1957     // Offset and size must be aligned to PDE stride for external management.
1958     if (pAllocInfo->pageFormat->flags & NVOS32_ALLOC_FLAGS_EXTERNALLY_MANAGED)
1959     {
1960             *pAlign =      NV_MAX(*pAlign, pGVAS->extManagedAlign);
1961             *pSize  = RM_ALIGN_UP(*pSize,  pGVAS->extManagedAlign);
1962     }
1963 
1964     return NV_OK;
1965 }
1966 
1967 NV_STATUS
1968 gvaspaceIncAllocRefCnt_IMPL
1969 (
1970     OBJGVASPACE *pGVAS,
1971     NvU64        vAddr
1972 )
1973 {
1974     EMEMBLOCK  *pVASpaceBlock;
1975 
1976     pVASpaceBlock = pGVAS->pHeap->eheapGetBlock(pGVAS->pHeap, vAddr, 0);
1977     if (NULL == pVASpaceBlock)
1978     {
1979         return NV_ERR_INVALID_ARGUMENT;
1980     }
1981 
1982     pVASpaceBlock->refCount++;
1983 
1984     return NV_OK;
1985 }
1986 
1987 POBJEHEAP
1988 gvaspaceGetHeap_IMPL(OBJGVASPACE *pGVAS)
1989 {
1990     return pGVAS->pHeap;
1991 }
1992 
1993 NvU64
1994 gvaspaceGetMapPageSize_IMPL
1995 (
1996     OBJGVASPACE *pGVAS,
1997     OBJGPU      *pGpu,
1998     EMEMBLOCK   *pMemBlock
1999 )
2000 {
2001     GVAS_GPU_STATE      *pGpuState    = gvaspaceGetGpuState(pGVAS, pGpu);
2002     const MMU_FMT_LEVEL *pRootFmtLvl  = pGpuState->pFmt->pRoot;
2003     const NvU64          pageSizeMask = mmuFmtAllPageSizes(pRootFmtLvl);
2004     NvU32                i;
2005 
2006     for (i = 0; i < 64; ++i)
2007     {
2008         if (pageSizeMask & NVBIT64(i))
2009         {
2010             const MMU_FMT_LEVEL *pTargetFmt = NULL;
2011             MEMORY_DESCRIPTOR   *pMemDesc   = NULL;
2012             NvU32                memSize    = 0;
2013 
2014             pTargetFmt = mmuFmtFindLevelWithPageShift(pRootFmtLvl, i);
2015             mmuWalkGetPageLevelInfo(pGpuState->pWalk, pTargetFmt, pMemBlock->begin,
2016                                     (const MMU_WALK_MEMDESC**)&pMemDesc, &memSize);
2017             if (NULL != pMemDesc)
2018             {
2019                 return NVBIT64(i);
2020             }
2021         }
2022     }
2023 
2024     NV_ASSERT(0);
2025     return 0;
2026 }
2027 
2028 NvU64
2029 gvaspaceGetBigPageSize_IMPL(OBJGVASPACE *pGVAS)
2030 {
2031     return pGVAS->bigPageSize;
2032 }
2033 
2034 NvBool
2035 gvaspaceIsMirrored_IMPL(OBJGVASPACE *pGVAS)
2036 {
2037     return pGVAS->bIsMirrored;
2038 }
2039 
2040 NvBool
2041 gvaspaceIsFaultCapable_IMPL(OBJGVASPACE *pGVAS)
2042 {
2043     return pGVAS->bIsFaultCapable;
2044 }
2045 
2046 NvBool
2047 gvaspaceIsExternallyOwned_IMPL(OBJGVASPACE *pGVAS)
2048 {
2049     return pGVAS->bIsExternallyOwned;
2050 }
2051 
2052 NvBool
2053 gvaspaceIsAtsEnabled_IMPL(OBJGVASPACE *pGVAS)
2054 {
2055     NvBool bAtsEnabled = pGVAS->bIsAtsEnabled;
2056 
2057     // ATS is supported with MIG memory partitioning only when VA Space has it enabled.
2058     if (bAtsEnabled)
2059     {
2060         OBJVASPACE *pVAS = staticCast(pGVAS, OBJVASPACE);
2061         OBJGPU *pGpu     = gpumgrGetGpu(gpumgrGetDefaultPrimaryGpu(pVAS->gpuMask));
2062         KernelMIGManager *pKernelMIGManager = (pGpu != NULL) ? GPU_GET_KERNEL_MIG_MANAGER(pGpu) : NULL;
2063 
2064         if ((pKernelMIGManager != NULL) && kmigmgrIsMIGMemPartitioningEnabled(pGpu, pKernelMIGManager))
2065         {
2066             return gpuIsAtsSupportedWithSmcMemPartitioning_HAL(pGpu);
2067         }
2068     }
2069 
2070     return bAtsEnabled;
2071 }
2072 
2073 NV_STATUS
2074 gvaspaceGetPasid_IMPL(OBJGVASPACE *pGVAS, NvU32 *pPasid)
2075 {
2076     NV_ASSERT_OR_RETURN(pPasid != NULL, NV_ERR_INVALID_ARGUMENT);
2077 
2078     NV_PRINTF(LEVEL_INFO, "ATS enabled: %u PASID: %u\n",
2079               pGVAS->bIsAtsEnabled, pGVAS->processAddrSpaceId);
2080 
2081     NV_ASSERT_OR_RETURN(pGVAS->bIsAtsEnabled, NV_ERR_INVALID_STATE);
2082     NV_ASSERT_OR_RETURN(pGVAS->processAddrSpaceId != NV_U32_MAX, NV_ERR_INVALID_STATE);
2083     *pPasid = pGVAS->processAddrSpaceId;
2084     return NV_OK;
2085 }
2086 
2087 NvU32
2088 gvaspaceGetFlags_IMPL(OBJGVASPACE *pGVAS)
2089 {
2090     return pGVAS->flags;
2091 }
2092 
2093 MEMORY_DESCRIPTOR*
2094 gvaspaceGetPageDirBase_IMPL(OBJGVASPACE *pGVAS, OBJGPU *pGpu)
2095 {
2096     GVAS_GPU_STATE    *pGpuState = gvaspaceGetGpuState(pGVAS, pGpu);
2097     MEMORY_DESCRIPTOR *pRootMem  = NULL;
2098     NvU32              rootSize  = 0;
2099 
2100     NV_ASSERT_OR_RETURN(!gpumgrGetBcEnabledStatus(pGpu), NULL);
2101 
2102 
2103     if (pGVAS->bIsExternallyOwned)
2104     {
2105         return pGVAS->pExternalPDB;
2106     }
2107 
2108     mmuWalkGetPageLevelInfo(pGpuState->pWalk, pGpuState->pFmt->pRoot, 0,
2109                             (const MMU_WALK_MEMDESC**)&pRootMem, &rootSize);
2110     return pRootMem;
2111 }
2112 
2113 MEMORY_DESCRIPTOR*
2114 gvaspaceGetKernelPageDirBase_IMPL(OBJGVASPACE *pGVAS, OBJGPU *pGpu)
2115 {
2116     NV_ASSERT_OR_RETURN(!gpumgrGetBcEnabledStatus(pGpu), NULL);
2117 
2118     GVAS_GPU_STATE *pGpuState = gvaspaceGetGpuState(pGVAS, pGpu);
2119     return (MEMORY_DESCRIPTOR*)pGpuState->pMirroredRoot;
2120 }
2121 
2122 const GMMU_FMT *
2123 gvaspaceGetGmmuFmt_IMPL(OBJGVASPACE *pGVAS, OBJGPU *pGpu)
2124 {
2125     GVAS_GPU_STATE *pGpuState = gvaspaceGetGpuState(pGVAS, pGpu);
2126     NV_ASSERT_OR_RETURN(NULL != pGpuState, NULL);
2127     return pGpuState->pFmt;
2128 }
2129 
2130 GVAS_GPU_STATE *
2131 gvaspaceGetGpuState_IMPL(OBJGVASPACE *pGVAS, OBJGPU *pGpu)
2132 {
2133     OBJVASPACE *pVAS = staticCast(pGVAS, OBJVASPACE);
2134     NV_ASSERT_OR_RETURN(NULL != pGVAS->pGpuStates, NULL);
2135     NV_ASSERT_OR_RETURN(pVAS->gpuMask & NVBIT32(pGpu->gpuInstance), NULL);
2136     return pGVAS->pGpuStates + nvMaskPos32(pVAS->gpuMask, pGpu->gpuInstance);
2137 }
2138 
2139 NV_STATUS
2140 gvaspacePinRootPageDir_IMPL(OBJGVASPACE *pGVAS, OBJGPU *pGpu)
2141 {
2142     MMU_WALK_USER_CTX    userCtx = {0};
2143     const MMU_FMT_LEVEL *pLevelFmt;
2144     NV_STATUS            status;
2145     NvU64                rootPdeCoverage;
2146     NvU64                vaLo;
2147     NvU64                vaHi;
2148 
2149     if (NULL == pGVAS->pGpuStates)
2150     {
2151         // TODO: VMM must be enabled - remove once default.
2152         return NV_ERR_NOT_SUPPORTED;
2153     }
2154 
2155     gvaspaceWalkUserCtxAcquire(pGVAS, pGpu, NULL, &userCtx);
2156 
2157     if (NULL == userCtx.pGpuState)
2158     {
2159         status = NV_ERR_INVALID_STATE;
2160         NV_ASSERT_OR_GOTO(0, done);
2161     }
2162 
2163     // Determine aligned range to pin.
2164     pLevelFmt = userCtx.pGpuState->pFmt->pRoot;
2165     rootPdeCoverage = mmuFmtLevelPageSize(pLevelFmt);
2166     vaLo = NV_ALIGN_DOWN64(gvaspaceGetVaStart(pGVAS),   rootPdeCoverage);
2167     vaHi = NV_ALIGN_UP64(gvaspaceGetVaLimit(pGVAS) + 1, rootPdeCoverage) - 1;
2168 
2169     // Alloc and bind root level instance.
2170     status = mmuWalkReserveEntries(userCtx.pGpuState->pWalk,
2171                                    pLevelFmt, vaLo, vaHi, NV_TRUE);
2172     NV_ASSERT_OR_GOTO(NV_OK == status, done);
2173 
2174 done:
2175     gvaspaceWalkUserCtxRelease(pGVAS, &userCtx);
2176     return status;
2177 }
2178 
2179 void
2180 gvaspaceUnpinRootPageDir_IMPL(OBJGVASPACE *pGVAS, OBJGPU *pGpu)
2181 {
2182     MMU_WALK_USER_CTX    userCtx = {0};
2183     const MMU_FMT_LEVEL *pLevelFmt;
2184     NV_STATUS            status;
2185     NvU64                rootPdeCoverage;
2186     NvU64                vaLo;
2187     NvU64                vaHi;
2188 
2189     if (NULL == pGVAS->pGpuStates)
2190     {
2191         // TODO: VMM must be enabled - remove once default.
2192         return;
2193     }
2194 
2195     gvaspaceWalkUserCtxAcquire(pGVAS, pGpu, NULL, &userCtx);
2196 
2197     if (NULL == userCtx.pGpuState)
2198     {
2199         NV_ASSERT_OR_GOTO(0, done);
2200     }
2201 
2202     // Determine aligned range to unpin.
2203     pLevelFmt = userCtx.pGpuState->pFmt->pRoot;
2204     rootPdeCoverage = mmuFmtLevelPageSize(pLevelFmt);
2205     vaLo = NV_ALIGN_DOWN64(gvaspaceGetVaStart(pGVAS),   rootPdeCoverage);
2206     vaHi = NV_ALIGN_UP64(gvaspaceGetVaLimit(pGVAS) + 1, rootPdeCoverage) - 1;
2207 
2208     // Unreserve root level instance (won't free it if there are still mappings).
2209     status = mmuWalkReleaseEntries(userCtx.pGpuState->pWalk,
2210                                    pLevelFmt, vaLo, vaHi);
2211     NV_ASSERT_OR_GOTO(NV_OK == status, done);
2212 
2213 done:
2214     gvaspaceWalkUserCtxRelease(pGVAS, &userCtx);
2215 }
2216 
2217 NV_STATUS
2218 gvaspaceMap_IMPL
2219 (
2220     OBJGVASPACE          *pGVAS,
2221     OBJGPU               *pGpu,
2222     const NvU64           vaLo,
2223     const NvU64           vaHi,
2224     const MMU_MAP_TARGET *pTarget,
2225     const VAS_MAP_FLAGS   flags
2226 )
2227 {
2228     NV_STATUS         status    = NV_OK;
2229     EMEMBLOCK        *pMemBlock = NULL;
2230     GVAS_BLOCK       *pVASBlock = NULL;
2231     NvU64             pageSize  = mmuFmtLevelPageSize(pTarget->pLevelFmt);
2232     MMU_WALK_USER_CTX userCtx   = {0};
2233 
2234     // Enforce unicast.
2235     NV_ASSERT_OR_RETURN(!gpumgrGetBcEnabledStatus(pGpu), NV_ERR_INVALID_STATE);
2236 
2237     // Check VA alignment.
2238     NV_ASSERT_OR_RETURN(0 == (vaLo       & (pageSize - 1)), NV_ERR_INVALID_ARGUMENT);
2239     NV_ASSERT_OR_RETURN(0 == ((vaHi + 1) & (pageSize - 1)), NV_ERR_INVALID_ARGUMENT);
2240 
2241     //
2242     // Register the mapping unless remapping an existing mapping.
2243     // Remapping an existing mapping is used in two cases:
2244     // 1. [MODS-only]    Release/reacquire compression for verif.
2245     // 2. [Windows-only] BAR1 force clobber for BSOD during bugcheck.
2246     //
2247     if (!flags.bRemap)
2248     {
2249         // Get VA block.
2250         pMemBlock = pGVAS->pHeap->eheapGetBlock(pGVAS->pHeap, vaLo, 0);
2251         NV_ASSERT_OR_RETURN(NULL != pMemBlock, NV_ERR_INVALID_ARGUMENT);
2252         pVASBlock = (GVAS_BLOCK*)pMemBlock->pData;
2253 
2254         // Check VA containment.
2255         NV_ASSERT_OR_RETURN(vaHi <= pMemBlock->end, NV_ERR_INVALID_ARGUMENT);
2256 
2257         // Insert range into VAS block mapping tree.
2258         status = _gvaspaceMappingInsert(pGVAS, pGpu, pVASBlock, vaLo, vaHi, flags);
2259         NV_ASSERT_OR_RETURN(NV_OK == status, status);
2260     }
2261 
2262     // Call MMU walker to map.
2263     gvaspaceWalkUserCtxAcquire(pGVAS, pGpu, pVASBlock, &userCtx);
2264 
2265     if (NULL == userCtx.pGpuState)
2266     {
2267         status = NV_ERR_INVALID_STATE;
2268         NV_ASSERT_OR_GOTO(0, catch);
2269     }
2270 
2271     status = mmuWalkMap(userCtx.pGpuState->pWalk, vaLo, vaHi, pTarget);
2272     gvaspaceWalkUserCtxRelease(pGVAS, &userCtx);
2273     NV_ASSERT_OR_GOTO(NV_OK == status, catch);
2274 
2275 catch:
2276     if (NV_OK != status && (!flags.bRemap))
2277     {
2278         _gvaspaceMappingRemove(pGVAS, pGpu, pVASBlock, vaLo, vaHi);
2279     }
2280     return status;
2281 }
2282 
2283 void
2284 gvaspaceUnmap_IMPL
2285 (
2286     OBJGVASPACE *pGVAS,
2287     OBJGPU      *pGpu,
2288     const NvU64  vaLo,
2289     const NvU64  vaHi
2290 )
2291 {
2292     NV_STATUS         status    = NV_OK;
2293     EMEMBLOCK        *pMemBlock = NULL;
2294     GVAS_BLOCK       *pVASBlock = NULL;
2295     MMU_WALK_USER_CTX userCtx   = {0};
2296 
2297     // Enforce unicast.
2298     NV_ASSERT_OR_RETURN_VOID(!gpumgrGetBcEnabledStatus(pGpu));
2299 
2300     // Get VA block.
2301     pMemBlock = pGVAS->pHeap->eheapGetBlock(pGVAS->pHeap, vaLo, 0);
2302     NV_ASSERT_OR_RETURN_VOID(NULL != pMemBlock);
2303     pVASBlock = (GVAS_BLOCK*)pMemBlock->pData;
2304 
2305     // Unregister the mapping
2306     status = _gvaspaceMappingRemove(pGVAS, pGpu, pVASBlock, vaLo, vaHi);
2307     NV_ASSERT_OR_RETURN_VOID(NV_OK == status);
2308 
2309     gvaspaceWalkUserCtxAcquire(pGVAS, pGpu, pVASBlock, &userCtx);
2310 
2311     if (NULL == userCtx.pGpuState)
2312     {
2313         NV_ASSERT(0);
2314     }
2315     else
2316     {
2317         if (pVASBlock->flags.bSparse || (pGVAS->flags & VASPACE_FLAGS_BAR_BAR1)
2318             ||((pMemBlock->refCount >1) && (pGVAS->flags & VASPACE_FLAGS_FLA))
2319            )
2320         {
2321             // Return back to Sparse if that was the original state of this allocation.
2322             status = mmuWalkSparsify(userCtx.pGpuState->pWalk, vaLo, vaHi, NV_FALSE);
2323             NV_ASSERT(NV_OK == status);
2324         }
2325         else
2326         {
2327             // Plain old unmap
2328             status = mmuWalkUnmap(userCtx.pGpuState->pWalk, vaLo, vaHi);
2329             NV_ASSERT(NV_OK == status);
2330         }
2331     }
2332 
2333     gvaspaceWalkUserCtxRelease(pGVAS, &userCtx);
2334 }
2335 
2336 void
2337 gvaspaceInvalidateTlb_IMPL
2338 (
2339     OBJGVASPACE         *pGVAS,
2340     OBJGPU              *pGpu,
2341     VAS_PTE_UPDATE_TYPE  update_type
2342 )
2343 {
2344     OBJVASPACE *pVAS = staticCast(pGVAS, OBJVASPACE);
2345     NvU32      gfid  = GPU_GFID_PF;
2346 
2347     NV_ASSERT_OR_RETURN_VOID(!gpumgrGetBcEnabledStatus(pGpu));
2348     NV_ASSERT_OR_RETURN_VOID(0 != (NVBIT(pGpu->gpuInstance) & pVAS->gpuMask));
2349 
2350     GVAS_GPU_STATE    *pGpuState = gvaspaceGetGpuState(pGVAS, pGpu);
2351     MEMORY_DESCRIPTOR *pRootMem  = NULL;
2352     NvU32              rootSize  = 0;
2353     NvU32              invalidation_scope = NV_GMMU_INVAL_SCOPE_ALL_TLBS;
2354     NvBool             bCallingContextPlugin;
2355 
2356     NV_ASSERT_OR_RETURN_VOID(vgpuIsCallingContextPlugin(pGpu, &bCallingContextPlugin) == NV_OK);
2357     if (!bCallingContextPlugin)
2358     {
2359         NV_ASSERT_OR_RETURN_VOID(vgpuGetCallingContextGfid(pGpu, &gfid) == NV_OK);
2360     }
2361 
2362     if (pGVAS->flags & VASPACE_FLAGS_INVALIDATE_SCOPE_NVLINK_TLB)
2363     {
2364         invalidation_scope = NV_GMMU_INVAL_SCOPE_LINK_TLBS;
2365     }
2366     else
2367     {
2368         invalidation_scope = NV_GMMU_INVAL_SCOPE_NON_LINK_TLBS;
2369     }
2370 
2371     mmuWalkGetPageLevelInfo(pGpuState->pWalk, pGpuState->pFmt->pRoot, 0,
2372                             (const MMU_WALK_MEMDESC**)&pRootMem, &rootSize);
2373     if (pRootMem != NULL)
2374     {
2375         KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
2376         kgmmuInvalidateTlb_HAL(pGpu, pKernelGmmu, pRootMem,
2377                               pGVAS->flags,
2378                               update_type, gfid,
2379                               invalidation_scope);
2380 
2381         if (pGVAS->bIsMirrored)
2382         {
2383             kgmmuInvalidateTlb_HAL(pGpu, pKernelGmmu,
2384                                   (MEMORY_DESCRIPTOR*)pGpuState->pMirroredRoot,
2385                                   pGVAS->flags,
2386                                   update_type, gfid,
2387                                   invalidation_scope);
2388         }
2389     }
2390 }
2391 
2392 NV_STATUS
2393 gvaspaceGetVasInfo_IMPL
2394 (
2395     OBJGVASPACE                                   *pGVAS,
2396     NV0080_CTRL_DMA_ADV_SCHED_GET_VA_CAPS_PARAMS  *pParams
2397 )
2398 {
2399     OBJVASPACE          *pVAS  = staticCast(pGVAS, OBJVASPACE);
2400     OBJGPU              *pGpu  = gpumgrGetGpu(gpumgrGetDefaultPrimaryGpu(pVAS->gpuMask));
2401     KernelGmmu          *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
2402     const MMU_FMT_LEVEL *pBigPageTable   = NULL;
2403     const MMU_FMT_LEVEL *pSmallPageTable = NULL;
2404     const GMMU_FMT      *pFmt            = gvaspaceGetGpuState(pGVAS, pGpu)->pFmt;
2405 
2406     NV_ASSERT_OR_RETURN(NULL != pParams, NV_ERR_INVALID_PARAM_STRUCT);
2407 
2408     // Retrive the number of VA bits for this format.
2409     pParams->vaBitCount = pFmt->pRoot->virtAddrBitHi + 1;
2410 
2411     // Check if the page sizes are supported
2412     pSmallPageTable = mmuFmtFindLevelWithPageShift(pFmt->pRoot, RM_PAGE_SHIFT);
2413     NV_ASSERT_OR_RETURN(pSmallPageTable, NV_ERR_INVALID_EVENT);
2414 
2415     pBigPageTable = mmuFmtFindLevelWithPageShift(pFmt->pRoot, nvLogBase2(pGVAS->bigPageSize));
2416     NV_ASSERT_OR_RETURN(pBigPageTable, NV_ERR_INVALID_EVENT);
2417     pParams->bigPageSize = pGVAS->bigPageSize;
2418 
2419     pParams->supportedPageSizeMask = RM_PAGE_SIZE | pParams->bigPageSize;
2420 
2421     if (kgmmuIsHugePageSupported(pKernelGmmu))
2422         pParams->supportedPageSizeMask |= RM_PAGE_SIZE_HUGE;
2423 
2424     if (kgmmuIsPageSize512mbSupported(pKernelGmmu))
2425         pParams->supportedPageSizeMask |= RM_PAGE_SIZE_512M;
2426 
2427     // Dual Page Table is supported for all Fermi-and-later chips
2428     pParams->dualPageTableSupported = (NvU32)NV_TRUE;
2429 
2430     // Big Page Table caps
2431 
2432     // VA bits covered by a PDE (for Big Page Table), in a terminal Page Directory.
2433     pParams->pdeCoverageBitCount = pBigPageTable->virtAddrBitHi + 1;
2434     // Physical size of Page Table in bytes
2435     pParams->pageTableBigFormat.pageTableSize = mmuFmtLevelSize(pBigPageTable);
2436     // VA extent of a Big Page Table
2437     pParams->pageTableBigFormat.pageTableCoverage =
2438         (NvU32)mmuFmtLevelVirtAddrMask(pBigPageTable) + 1;
2439 
2440     // Small Page Table caps, similar to Big Page Table caps
2441     //<! TODO: num4KPageTableFormats should change after partial PT support is added.
2442     pParams->num4KPageTableFormats = 1;
2443     pParams->pageTable4KFormat[0].pageTableSize = mmuFmtLevelSize(pSmallPageTable);
2444     pParams->pageTable4KFormat[0].pageTableCoverage =
2445         (NvU32)mmuFmtLevelVirtAddrMask(pSmallPageTable) + 1;
2446 
2447     pParams->idealVRAMPageSize = pParams->bigPageSize;
2448 
2449     pParams->vaRangeLo = vaspaceGetVaStart(pVAS);
2450 
2451     return NV_OK;
2452 }
2453 
2454 NV_STATUS
2455 gvaspaceGetPageTableInfo_IMPL
2456 (
2457     OBJGVASPACE                           *pGVAS,
2458     NV0080_CTRL_DMA_GET_PDE_INFO_PARAMS   *pParams
2459 )
2460 {
2461     OBJVASPACE              *pVAS = staticCast(pGVAS, OBJVASPACE);
2462     OBJGPU                  *pGpu = gpumgrGetGpu(gpumgrGetDefaultPrimaryGpu(pVAS->gpuMask));
2463     GVAS_GPU_STATE          *pGpuState;
2464     MMU_WALK                *pWalk;
2465     const MMU_FMT_LEVEL     *pRootFmt;
2466     PMEMORY_DESCRIPTOR       pRootMem = NULL;
2467     NvU32                    rootSize = 0;
2468     NvU32                    pteBlockIdx = 0;
2469     NvU32                    i;
2470     NvBool                   bOrigBcState;
2471     NV_STATUS                rmStatus;
2472 
2473     NV_ASSERT_OR_RETURN(NULL != pParams, NV_ERR_INVALID_PARAM_STRUCT);
2474 
2475     // Pick a specific sub-device if requested.
2476     if (0 != pParams->subDeviceId)
2477     {
2478         pGpu = gpumgrGetGpuFromSubDeviceInst(gpuGetDeviceInstance(pGpu),
2479                                              pParams->subDeviceId - 1);
2480         NV_ASSERT_OR_RETURN(NULL != pGpu, NV_ERR_INVALID_ARGUMENT);
2481     }
2482 
2483     // Page tables are pinned and queried in UC. Force GPU to unicast.
2484     bOrigBcState = gpumgrGetBcEnabledStatus(pGpu);
2485     gpumgrSetBcEnabledStatus(pGpu, NV_FALSE);
2486 
2487     pGpuState = gvaspaceGetGpuState(pGVAS, pGpu);
2488     pWalk     = pGpuState->pWalk;
2489     pRootFmt  = pGpuState->pFmt->pRoot;
2490 
2491     // Pin lazy page tables for WDDMv1 KMD.
2492     rmStatus = _gvaspacePinLazyPageTables(pGVAS, pGpu, pParams->gpuAddr);
2493 
2494     gpumgrSetBcEnabledStatus(pGpu, bOrigBcState);
2495 
2496     NV_ASSERT_OR_RETURN((NV_OK == rmStatus), rmStatus);
2497 
2498     for (i = 0; i < pageSizeCount; i++)
2499     {
2500         PMEMORY_DESCRIPTOR                  pMemDesc  = NULL;
2501         NvU32                               memSize   = 0;
2502         NV0080_CTRL_DMA_PDE_INFO_PTE_BLOCK *pPteBlock = NULL;
2503         NvU64                               pageSize;
2504         const MMU_FMT_LEVEL                *pLevelFmt;
2505         const MMU_FMT_LEVEL                *pParentFmt;
2506         NvU32                               subLevel;
2507 
2508 
2509         pageSize = (VAS_PAGESIZE_IDX_BIG == i) ? pGVAS->bigPageSize : pageSizes[i];
2510         pLevelFmt = mmuFmtFindLevelWithPageShift(pRootFmt, BIT_IDX_64(pageSize));
2511         if (NULL == pLevelFmt)
2512         {
2513             continue;
2514         }
2515 
2516         pParentFmt = mmuFmtFindLevelParent(pRootFmt, pLevelFmt, &subLevel);
2517         NV_ASSERT_OR_RETURN(NULL != pParentFmt, NV_ERR_INVALID_ARGUMENT);
2518 
2519         NV_ASSERT_OK_OR_RETURN(
2520             mmuWalkGetPageLevelInfo(pWalk, pLevelFmt, pParams->gpuAddr,
2521                                     (const MMU_WALK_MEMDESC**)&pMemDesc, &memSize));
2522         if (NULL == pMemDesc)
2523             continue;
2524 
2525         // These only need to be calculated once, but we need the parent level format.
2526         if (0 == pteBlockIdx)
2527         {
2528             // The base VA of the PDE
2529             pParams->pdeVirtAddr = mmuFmtLevelVirtAddrLo(pLevelFmt, pParams->gpuAddr);
2530 
2531             // Number of bytes occupied by one PDE
2532             pParams->pdeEntrySize = pParentFmt->entrySize;
2533         }
2534 
2535         NV_ASSERT_OR_RETURN(pteBlockIdx < NV0080_CTRL_DMA_PDE_INFO_PTE_BLOCKS, NV_ERR_INVALID_STATE);
2536         pPteBlock = &pParams->pteBlocks[pteBlockIdx++];
2537 
2538         // Page size supported by this page table
2539         pPteBlock->pageSize       = pageSize;
2540 
2541         // Phys addr of the Page Table
2542         pPteBlock->ptePhysAddr    = memdescGetPhysAddr(pMemDesc, VAS_ADDRESS_TRANSLATION(pVAS), 0);
2543 
2544         // Number of bytes occupied by one PTE
2545         pPteBlock->pteEntrySize   = pLevelFmt->entrySize;
2546 
2547         // VA extent of one PDE, i.e. of one entire Page Table.
2548         pPteBlock->pdeVASpaceSize = (NvU32)mmuFmtLevelVirtAddrMask(pLevelFmt) + 1;
2549 
2550         // Caching attributes
2551         pPteBlock->pteCacheAttrib = memdescGetCpuCacheAttrib(pMemDesc);
2552 
2553         // Addr space of the Page Table
2554         switch (memdescGetAddressSpace(pMemDesc))
2555         {
2556             case ADDR_FBMEM:
2557                 pPteBlock->pteAddrSpace =
2558                     NV0080_CTRL_DMA_GET_PDE_INFO_PARAMS_PTE_ADDR_SPACE_VIDEO_MEMORY;
2559                 break;
2560             case ADDR_SYSMEM:
2561                 if (memdescGetCpuCacheAttrib(pMemDesc) == NV_MEMORY_CACHED)
2562                 {
2563                     pPteBlock->pteAddrSpace =
2564                         NV0080_CTRL_DMA_GET_PDE_INFO_PARAMS_PTE_ADDR_SPACE_SYSTEM_COHERENT_MEMORY;
2565                 }
2566                 else
2567                 {
2568                     pPteBlock->pteAddrSpace =
2569                         NV0080_CTRL_DMA_GET_PDE_INFO_PARAMS_PTE_ADDR_SPACE_SYSTEM_NON_COHERENT_MEMORY;
2570                 }
2571                 break;
2572             default:
2573                 NV_ASSERT(0);
2574                 return NV_ERR_INVALID_STATE;
2575         }
2576     }
2577 
2578     // Addr of the root Page Dir
2579     NV_ASSERT_OK_OR_RETURN(
2580         mmuWalkGetPageLevelInfo(pWalk, pRootFmt, 0,
2581                                 (const MMU_WALK_MEMDESC**)&pRootMem, &rootSize));
2582     if (NULL == pRootMem)
2583     {
2584         return NV_ERR_INVALID_STATE;
2585     }
2586     pParams->pdbAddr = memdescGetPhysAddr(pRootMem, VAS_ADDRESS_TRANSLATION(pVAS), 0);
2587 
2588     // Addr Space of the Page Dir.
2589     switch (memdescGetAddressSpace(pRootMem))
2590     {
2591         case ADDR_FBMEM:
2592             pParams->pdeAddrSpace =
2593                 NV0080_CTRL_DMA_GET_PDE_INFO_PARAMS_PDE_ADDR_SPACE_VIDEO_MEMORY;
2594             break;
2595         case ADDR_SYSMEM:
2596             if (memdescGetCpuCacheAttrib(pRootMem) == NV_MEMORY_CACHED)
2597             {
2598                 pParams->pdeAddrSpace =
2599                     NV0080_CTRL_DMA_GET_PDE_INFO_PARAMS_PDE_ADDR_SPACE_SYSTEM_COHERENT_MEMORY;
2600             }
2601             else
2602             {
2603                 pParams->pdeAddrSpace =
2604                      NV0080_CTRL_DMA_GET_PDE_INFO_PARAMS_PDE_ADDR_SPACE_SYSTEM_NON_COHERENT_MEMORY;
2605             }
2606             break;
2607         default:
2608             NV_ASSERT(0);
2609             return NV_ERR_INVALID_STATE;
2610     }
2611 
2612     return NV_OK;
2613 }
2614 
2615 NV_STATUS
2616 gvaspaceGetPteInfo_IMPL
2617 (
2618     OBJGVASPACE        *pGVAS,
2619     OBJGPU             *pGpu,
2620     NV0080_CTRL_DMA_GET_PTE_INFO_PARAMS *pParams,
2621     RmPhysAddr         *pPhysAddr
2622 )
2623 {
2624     KernelGmmu     *pKernelGmmu;
2625     MemoryManager  *pMemoryManager;
2626     GVAS_GPU_STATE *pGpuState;
2627     MMU_WALK       *pWalk;
2628     const GMMU_FMT *pFmt;
2629     NV_STATUS      status = NV_OK;
2630     NvU32          i;
2631     NvU32          pteBlockIndex = 0;
2632     const MMU_FMT_LEVEL *pRootFmt;
2633     TRANSFER_SURFACE surf = {0};
2634 
2635     NV_ASSERT_OR_RETURN(NULL != pParams, NV_ERR_INVALID_PARAM_STRUCT);
2636 
2637     // Pick a specific sub-device if requested.
2638     if (0 != pParams->subDeviceId)
2639     {
2640         pGpu = gpumgrGetGpuFromSubDeviceInst(gpuGetDeviceInstance(pGpu),
2641                                              pParams->subDeviceId - 1);
2642         NV_ASSERT_OR_RETURN(NULL != pGpu, NV_ERR_INVALID_ARGUMENT);
2643     }
2644     pGpuState = gvaspaceGetGpuState(pGVAS, pGpu);
2645     pWalk     = pGpuState->pWalk;
2646     pFmt      = pGpuState->pFmt;
2647     pRootFmt  = pFmt->pRoot;
2648     pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
2649     pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
2650 
2651     //
2652     // We will try all available page sizes for valid allocation a the give VA.
2653     // Will flag error if mutiple valid allocations exist.
2654     //
2655 
2656     for (i = 0; i < pageSizeCount; i++)
2657     {
2658         NV0080_CTRL_DMA_PTE_INFO_PTE_BLOCK *pPteBlock = NULL;
2659         const GMMU_FIELD_ADDRESS           *pAddrField;
2660         const MMU_FMT_LEVEL                *pLevelFmt     = NULL;
2661         PMEMORY_DESCRIPTOR                  pMemDesc      = NULL;
2662         NvU32                               memSize       = 0;
2663         NvU32                               pteIndex;
2664         GMMU_ENTRY_VALUE                    pte       = {{0}};
2665         NvU64                               pageSize;
2666 
2667         pageSize = (VAS_PAGESIZE_IDX_BIG == i) ? pGVAS->bigPageSize : pageSizes[i];
2668         pLevelFmt = mmuFmtFindLevelWithPageShift(pRootFmt, BIT_IDX_64(pageSize));
2669         if (NULL == pLevelFmt)
2670         {
2671             continue;
2672         }
2673 
2674         NV_ASSERT_OK_OR_RETURN(
2675             mmuWalkGetPageLevelInfo(pWalk, pLevelFmt, pParams->gpuAddr,
2676                                     (const MMU_WALK_MEMDESC**)&pMemDesc, &memSize));
2677         if (NULL == pMemDesc)
2678         {
2679             // Skip if not allocated.
2680             continue;
2681         }
2682 
2683         pteIndex = mmuFmtVirtAddrToEntryIndex(pLevelFmt, pParams->gpuAddr);
2684 
2685         // Read the PTE
2686         surf.pMemDesc = pMemDesc;
2687         surf.offset = pteIndex * pLevelFmt->entrySize;
2688 
2689         NV_ASSERT_OK_OR_RETURN(memmgrMemRead(pMemoryManager, &surf, pte.v8,
2690                                              pLevelFmt->entrySize,
2691                                              TRANSFER_FLAGS_DEFER_FLUSH));
2692 
2693         NV_ASSERT_OR_RETURN(pteBlockIndex < NV0080_CTRL_DMA_PDE_INFO_PTE_BLOCKS, NV_ERR_INVALID_STATE);
2694         pPteBlock = &pParams->pteBlocks[pteBlockIndex++];
2695 
2696         // Page size supported by this page table
2697         pPteBlock->pageSize = pageSize;
2698 
2699         // Number of bytes occupied by one PTE
2700         pPteBlock->pteEntrySize = pLevelFmt->entrySize;
2701 
2702         kgmmuExtractPteInfo(pKernelGmmu, &pte, pPteBlock, pFmt, pLevelFmt);
2703 
2704         //
2705         // Get phys addr encoded in the PTE, but only
2706         // if requested and the PTE is valid. Only one page size
2707         // PTE should be valid at a time.
2708         //
2709         if ((NULL != pPhysAddr) &&
2710             nvFieldGetBool(&pFmt->pPte->fldValid, pte.v8))
2711         {
2712             pAddrField = gmmuFmtPtePhysAddrFld(pFmt->pPte,
2713                 gmmuFieldGetAperture(&pFmt->pPte->fldAperture, pte.v8));
2714             *pPhysAddr = (RmPhysAddr)gmmuFieldGetAddress(pAddrField, pte.v8);
2715         }
2716     }
2717 
2718     //
2719     // We don't support pre-initializing paga tables on VMM.
2720     // Page tables are allocated on-demand during mapping.
2721     // So we assert if this ctrl call is called with request to init page tables (skipVASpaceInit ==  NV_FALSE),
2722     // and the page tables are not already allocated.
2723     //
2724     NV_ASSERT_OR_RETURN((pteBlockIndex > 0) || pParams->skipVASpaceInit, NV_ERR_INVALID_REQUEST);
2725 
2726     return status;
2727 }
2728 
2729 static inline
2730 NvBool isPteDowngrade(KernelGmmu *pKernelGmmu, const GMMU_FMT *pFmt, NvU32 pteInputFlags, GMMU_ENTRY_VALUE curPte)
2731 {
2732     NvBool bReadOnly      = NV_FALSE;
2733     NvBool curPteReadOnly = NV_FALSE;
2734 
2735     NvBool bPteInvalid = (!FLD_TEST_DRF(0080, _CTRL_DMA_PTE_INFO_PARAMS_FLAGS,
2736                             _VALID, _TRUE, pteInputFlags)
2737                             && nvFieldGetBool(&pFmt->pPte->fldValid, curPte.v8));
2738 
2739     if (pFmt->version == GMMU_FMT_VERSION_3)
2740     {
2741         NvU32 ptePcfHw = 0;
2742         NvU32 ptePcfSw = 0;
2743 
2744         ptePcfHw = nvFieldGet32(&pFmt->pPte->fldPtePcf, curPte.v8);
2745         NV_ASSERT_OR_RETURN((kgmmuTranslatePtePcfFromHw_HAL(pKernelGmmu, ptePcfHw, nvFieldGetBool(&pFmt->pPte->fldValid, curPte.v8),
2746                                                            &ptePcfSw) == NV_OK), NV_ERR_INVALID_ARGUMENT);
2747         curPteReadOnly = ptePcfSw & (1 << SW_MMU_PCF_RO_IDX);
2748     }
2749     else
2750     {
2751         curPteReadOnly = nvFieldGetBool(&pFmt->pPte->fldReadOnly, curPte.v8);
2752     }
2753     bReadOnly = (!FLD_TEST_DRF(0080, _CTRL_DMA_PTE_INFO_PARAMS_FLAGS,
2754                      _READ_ONLY, _TRUE, pteInputFlags)
2755                      && !curPteReadOnly);
2756 
2757     return (bPteInvalid || bReadOnly);
2758 }
2759 
2760 NV_STATUS
2761 gvaspaceSetPteInfo_IMPL
2762 (
2763     OBJGVASPACE                         *pGVAS,
2764     OBJGPU                              *pGpu,
2765     NV0080_CTRL_DMA_SET_PTE_INFO_PARAMS *pParams
2766 )
2767 {
2768     OBJVASPACE              *pVAS = staticCast(pGVAS, OBJVASPACE);
2769     MemoryManager           *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
2770     NvU8                     i;
2771     NV_STATUS                status = NV_OK;
2772     NvBool                   bDowngrade = NV_FALSE;
2773     TRANSFER_SURFACE         surf = {0};
2774 
2775     NV_ASSERT_OR_RETURN(NULL != pParams, NV_ERR_INVALID_PARAM_STRUCT);
2776 
2777     // Loop over each GPU associated with the VAS.
2778     FOR_EACH_GPU_IN_MASK_UC(32, pSys, pGpu, pVAS->gpuMask)
2779     {
2780         GVAS_GPU_STATE      *pGpuState = gvaspaceGetGpuState(pGVAS, pGpu);
2781         MMU_WALK            *pWalk     = pGpuState->pWalk;
2782         const GMMU_FMT      *pFmt      = pGpuState->pFmt;
2783         const MMU_FMT_LEVEL *pRootFmt  = pFmt->pRoot;
2784         bDowngrade = NV_FALSE;
2785 
2786         // Skip the GPU if the caller requested a different specific sub-device.
2787         if ((0 != pParams->subDeviceId) &&
2788             (pGpu->subdeviceInstance != (pParams->subDeviceId - 1)))
2789         {
2790             goto catchGpu;
2791         }
2792 
2793         for (i = 0; i < NV0080_CTRL_DMA_SET_PTE_INFO_PTE_BLOCKS; i++)
2794         {
2795             NV0080_CTRL_DMA_PTE_INFO_PTE_BLOCK *pPteBlock = NULL;
2796             const MMU_FMT_LEVEL                *pLevelFmt;
2797             PMEMORY_DESCRIPTOR                  pMemDesc  = NULL;
2798             NvU32                               memSize   = 0;
2799             GMMU_ENTRY_VALUE                    pte       = {{0}};
2800             NvBool                              bValid;
2801             NvBool                              bEncrypted;
2802             NvBool                              bReadOnly;
2803             NvBool                              bVolatile = NV_FALSE;
2804             NvU32                               aperture;
2805             NvU32                               pteIndex;
2806             NvU64                               surfOffset;
2807 
2808             //
2809             // Ignore the index if a page size of 0 is specified. This is a valid
2810             // check as the caller may send down 0 page size for indxes
2811             // which are not of interest.
2812             //
2813             if (0 == pParams->pteBlocks[i].pageSize)
2814                 continue;
2815 
2816             //
2817             // Continue the loop if we see an unsupported page size.
2818             // Ideally we should assert, but we're emulating the behavior of the old
2819             // API @ref dmaSetPteInfo_GF100 here.
2820             //
2821             if (!(pGVAS->bigPageSize == pParams->pteBlocks[i].pageSize ||
2822                   RM_PAGE_SIZE_HUGE == pParams->pteBlocks[i].pageSize ||
2823                   RM_PAGE_SIZE_512M == pParams->pteBlocks[i].pageSize ||
2824                   RM_PAGE_SIZE == pParams->pteBlocks[i].pageSize))
2825             {
2826                 continue;
2827             }
2828 
2829             // Query the Page Tables.
2830             pLevelFmt = mmuFmtFindLevelWithPageShift(pRootFmt,
2831                                                      BIT_IDX_64(pParams->pteBlocks[i].pageSize));
2832             if (NULL == pLevelFmt)
2833             {
2834                 status = NV_ERR_INVALID_STATE;
2835                 NV_ASSERT_OR_GOTO(0, catchGpu);
2836             }
2837             status = mmuWalkGetPageLevelInfo(pWalk, pLevelFmt, pParams->gpuAddr,
2838                                              (const MMU_WALK_MEMDESC**)&pMemDesc, &memSize);
2839             NV_ASSERT_OR_GOTO(NV_OK == status, catchGpu);
2840 
2841             // Can't set PTE for an unallocated VA.
2842             if (NULL == pMemDesc)
2843             {
2844                 status = NV_ERR_INVALID_ADDRESS;
2845                 NV_ASSERT_OR_GOTO(0, catchGpu);
2846             }
2847 
2848             pPteBlock  = &pParams->pteBlocks[i];
2849 
2850             // Read the PTE
2851             pteIndex = mmuFmtVirtAddrToEntryIndex(pLevelFmt, pParams->gpuAddr);
2852 
2853             surf.pMemDesc = pMemDesc;
2854             surf.offset = pteIndex * pLevelFmt->entrySize;
2855 
2856             NV_ASSERT_OK_OR_GOTO(status,
2857                 memmgrMemRead(pMemoryManager, &surf, pte.v8,
2858                               pLevelFmt->entrySize,
2859                               TRANSFER_FLAGS_NONE),
2860                 catchGpu);
2861 
2862             //
2863             // If any of these entries are being downgraded, we need to perform
2864             // a full flush.
2865             //
2866             bDowngrade = isPteDowngrade(GPU_GET_KERNEL_GMMU(pGpu), pFmt, pPteBlock->pteFlags, pte);
2867 
2868 
2869             // Insert the PTE fields from pParams
2870 
2871             // Valid
2872             bValid = FLD_TEST_DRF(0080, _CTRL_DMA_PTE_INFO_PARAMS_FLAGS,
2873                                        _VALID, _TRUE, pPteBlock->pteFlags);
2874 
2875             // Aperture
2876             aperture = DRF_VAL(0080_CTRL, _DMA_PTE_INFO,
2877                                          _PARAMS_FLAGS_APERTURE, pPteBlock->pteFlags);
2878 
2879             // Encryption
2880             bEncrypted = FLD_TEST_DRF(0080, _CTRL_DMA_PTE_INFO_PARAMS_FLAGS,
2881                                            _ENCRYPTED, _TRUE, pPteBlock->pteFlags);
2882 
2883             // Read Only
2884             bReadOnly = FLD_TEST_DRF(0080, _CTRL_DMA_PTE_INFO_PARAMS_FLAGS,
2885                                            _READ_ONLY, _TRUE, pPteBlock->pteFlags);
2886 
2887             if (!FLD_TEST_DRF(0080, _CTRL_DMA_PTE_INFO_PARAMS_FLAGS,
2888                     _GPU_CACHED, _NOT_SUPPORTED, pPteBlock->pteFlags))
2889             {
2890                 bVolatile = FLD_TEST_DRF(0080, _CTRL_DMA_PTE_INFO_PARAMS_FLAGS,
2891                     _GPU_CACHED, _FALSE, pPteBlock->pteFlags);
2892             }
2893 
2894             if (pFmt->version == GMMU_FMT_VERSION_3)
2895             {
2896                 NvU32 ptePcfHw  = 0;
2897                 NvU32 ptePcfSw  = 0;
2898 
2899                 if (bValid)
2900                 {
2901                     nvFieldSetBool(&pFmt->pPte->fldValid, NV_TRUE, pte.v8);
2902                     nvFieldSet32(&pFmt->pPte->fldAperture._enum.desc, aperture, pte.v8);
2903                     nvFieldSet32(&pFmt->pPte->fldKind, pPteBlock->kind, pte.v8);
2904                     ptePcfSw |= bVolatile ? (1 << SW_MMU_PCF_UNCACHED_IDX) : 0;
2905                     if (bReadOnly)
2906                     {
2907                         ptePcfSw |= 1 << SW_MMU_PCF_RO_IDX;
2908                         ptePcfSw |= 1 << SW_MMU_PCF_NOATOMIC_IDX;
2909                     }
2910                     ptePcfSw |= (1 << SW_MMU_PCF_REGULAR_IDX);
2911                     ptePcfSw |= (1 << SW_MMU_PCF_ACE_IDX);
2912                 }
2913                 else
2914                 {
2915                     ptePcfSw |= (1 << SW_MMU_PCF_INVALID_IDX);
2916                 }
2917                 NV_ASSERT_OR_RETURN((kgmmuTranslatePtePcfFromSw_HAL(GPU_GET_KERNEL_GMMU(pGpu), ptePcfSw, &ptePcfHw) == NV_OK),
2918                                      NV_ERR_INVALID_ARGUMENT);
2919                 nvFieldSet32(&pFmt->pPte->fldPtePcf, ptePcfHw, pte.v8);
2920             }
2921             else
2922             {
2923                 nvFieldSetBool(&pFmt->pPte->fldValid, bValid, pte.v8);
2924                 if (bValid)
2925                 {
2926                     nvFieldSet32(&pFmt->pPte->fldAperture._enum.desc, aperture, pte.v8);
2927                     nvFieldSetBool(&pFmt->pPte->fldEncrypted, bEncrypted, pte.v8);
2928                     nvFieldSetBool(&pFmt->pPte->fldReadOnly, bReadOnly, pte.v8);
2929                     nvFieldSetBool(&pFmt->pPte->fldVolatile, bVolatile, pte.v8);
2930 
2931                     //
2932                     // gmmuFmtInitPteCompTags assumes that lower/upper half of CTL is
2933                     // determined by the surfOffset passed in.
2934                     // We calculate the surface offset here based on the pteIndex to
2935                     // match the pre-GM20X behavior of selecting half based on VA alignment.
2936                     //
2937                     // compPageIndexLo/Hi are also set to always allow compression on
2938                     // the page index we are overriding. The 2MB page require 0x20 comptags to be
2939                     // contiguous, so set the endPage limit proprerly as well.
2940                     //
2941                     if (memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE, pPteBlock->kind))
2942                     {
2943                         COMPR_INFO                comprInfo;
2944                         const GMMU_FIELD_ADDRESS *pAddrField = gmmuFmtPtePhysAddrFld(pFmt->pPte,
2945                                                                                      gmmuFieldGetAperture(&pFmt->pPte->fldAperture, pte.v8));
2946                         RmPhysAddr                physAddr = (RmPhysAddr)gmmuFieldGetAddress(pAddrField, pte.v8);
2947 
2948                         surfOffset = pteIndex * pPteBlock->pageSize;
2949                         NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, memmgrFillComprInfo(pGpu, pMemoryManager, pPteBlock->pageSize, 1, pPteBlock->kind,
2950                                                                                surfOffset, pPteBlock->comptagLine, &comprInfo));
2951                         kgmmuFieldSetKindCompTags(GPU_GET_KERNEL_GMMU(pGpu), pFmt, pLevelFmt, &comprInfo, physAddr, surfOffset, pteIndex, pte.v8);
2952                     }
2953                     else
2954                     {
2955                         nvFieldSet32(&pFmt->pPte->fldKind, pPteBlock->kind, pte.v8);
2956                     }
2957                 }
2958             }
2959 
2960             // Copy back the overwritten values to the actual PTE memory
2961             NV_ASSERT_OK_OR_GOTO(status,
2962                 memmgrMemWrite(pMemoryManager, &surf, pte.v8,
2963                                pLevelFmt->entrySize,
2964                                TRANSFER_FLAGS_NONE),
2965                 catchGpu);
2966 
2967         }
2968 
2969         // Invalidate TLB
2970         gvaspaceInvalidateTlb(pGVAS, pGpu, bDowngrade ? PTE_DOWNGRADE : PTE_UPGRADE);
2971 
2972 catchGpu:
2973         if (NV_OK != status)
2974         {
2975             break;
2976         }
2977     }
2978     FOR_EACH_GPU_IN_MASK_UC_END
2979 
2980     return status;
2981 }
2982 
2983 static void
2984 _gmmuWalkCBFillEntries_SkipExternal
2985 (
2986     MMU_WALK_USER_CTX         *pUserCtx,
2987     const MMU_FMT_LEVEL       *pLevelFmt,
2988     const MMU_WALK_MEMDESC    *pLevelMem,
2989     const NvU32                entryIndexLo,
2990     const NvU32                entryIndexHi,
2991     const MMU_WALK_FILL_STATE  fillState,
2992     NvU32                     *pProgress
2993 )
2994 {
2995     OBJGVASPACE *pGVAS = pUserCtx->pGVAS;
2996     OBJVASPACE  *pVAS  = staticCast(pGVAS, OBJVASPACE);
2997 
2998     // Clamp index range to RM-internal entries.
2999     const NvU32 entryIndexLoClamp = NV_MAX(entryIndexLo,
3000                     mmuFmtVirtAddrToEntryIndex(pLevelFmt, pVAS->vasStart));
3001     const NvU32 entryIndexHiClamp = NV_MIN(entryIndexHi,
3002                     mmuFmtVirtAddrToEntryIndex(pLevelFmt, pGVAS->vaLimitInternal));
3003 
3004     // Clamp may negate range.
3005     if (entryIndexHiClamp >= entryIndexLoClamp)
3006     {
3007         // Reuse normal fill callback.
3008         g_gmmuWalkCallbacks.FillEntries(pUserCtx, pLevelFmt, pLevelMem,
3009                                         entryIndexLoClamp, entryIndexHiClamp,
3010                                         fillState, pProgress);
3011         NV_ASSERT_OR_RETURN_VOID(*pProgress == (entryIndexHiClamp - entryIndexLoClamp + 1));
3012     }
3013 
3014     // Report full range complete on success.
3015     *pProgress = entryIndexHi - entryIndexLo + 1;
3016 }
3017 
3018 static void
3019 _gmmuWalkCBCopyEntries_SkipExternal
3020 (
3021     MMU_WALK_USER_CTX         *pUserCtx,
3022     const MMU_FMT_LEVEL       *pLevelFmt,
3023     const MMU_WALK_MEMDESC    *pSrcMem,
3024     const MMU_WALK_MEMDESC    *pDstMem,
3025     const NvU32                entryIndexLo,
3026     const NvU32                entryIndexHi,
3027     NvU32                     *pProgress
3028 )
3029 {
3030     OBJGVASPACE *pGVAS = pUserCtx->pGVAS;
3031     OBJVASPACE  *pVAS  = staticCast(pGVAS, OBJVASPACE);
3032 
3033     // Clamp index range to RM-internal entries.
3034     const NvU32 entryIndexLoClamp = NV_MAX(entryIndexLo,
3035                     mmuFmtVirtAddrToEntryIndex(pLevelFmt, pVAS->vasStart));
3036     const NvU32 entryIndexHiClamp = NV_MIN(entryIndexHi,
3037                     mmuFmtVirtAddrToEntryIndex(pLevelFmt, pGVAS->vaLimitInternal));
3038 
3039     // Clamp may negate range.
3040     if (entryIndexHiClamp >= entryIndexLoClamp)
3041     {
3042         // Reuse normal copy callback.
3043         g_gmmuWalkCallbacks.CopyEntries(pUserCtx, pLevelFmt, pSrcMem, pDstMem,
3044                                         entryIndexLoClamp, entryIndexHiClamp,
3045                                         pProgress);
3046         NV_ASSERT_OR_RETURN_VOID(*pProgress == (entryIndexHiClamp - entryIndexLoClamp + 1));
3047     }
3048 
3049     // Report full range complete.
3050     *pProgress = entryIndexHi - entryIndexLo + 1;
3051 }
3052 
3053 //
3054 // TODO: hClient is added temporarily and will be removed after RS_PRIV_LEVEL is used
3055 // in osCreateMemFromOsDescriptor. See bug #2107861.
3056 //
3057 NV_STATUS
3058 gvaspaceExternalRootDirCommit_IMPL
3059 (
3060     OBJGVASPACE                               *pGVAS,
3061     NvHandle                                   hClient,
3062     OBJGPU                                    *pGpu,
3063     NV0080_CTRL_DMA_SET_PAGE_DIRECTORY_PARAMS *pParams
3064 )
3065 {
3066     OBJVASPACE               *pVAS               = staticCast(pGVAS, OBJVASPACE);
3067     MEMORY_DESCRIPTOR        *pRootMemNew        = NULL;
3068     NvU64                     rootSizeNew;
3069     NV_STATUS                 status;
3070     NV_ADDRESS_SPACE          aperture;
3071     NvU32                     cpuCacheAttr;
3072     NvU64                     vaLimitOld;
3073     NvU64                     vaLimitNew;
3074     NvU32                     attr;
3075     NvU32                     attr2;
3076     NvU32                     os02Flags          = 0;
3077     const MMU_WALK_CALLBACKS *pCb                = NULL;
3078     MMU_WALK_CALLBACKS        callbacks;
3079     MMU_WALK_USER_CTX         userCtx            = {0};
3080     GVAS_GPU_STATE           *pGpuState          = gvaspaceGetGpuState(pGVAS, pGpu);
3081     const NvBool              bAllChannels       = FLD_TEST_DRF(0080_CTRL_DMA_SET_PAGE_DIRECTORY,
3082                                                 _FLAGS, _ALL_CHANNELS, _TRUE, pParams->flags);
3083     const NvBool              bFirstCommit       = (NULL == pGpuState->pRootInternal);
3084     const NvBool              bIgnoreChannelBusy = FLD_TEST_DRF(0080_CTRL_DMA_SET_PAGE_DIRECTORY,
3085                                                 _FLAGS, _IGNORE_CHANNEL_BUSY, _TRUE, pParams->flags);
3086     const NvU64               rootPdeCoverage    = mmuFmtLevelPageSize(pGpuState->pFmt->pRoot);
3087     NvU64                     vaInternalLo       = NV_ALIGN_DOWN64(gvaspaceGetVaStart(pGVAS), rootPdeCoverage);
3088     NvU64                     vaInternalHi       = NV_ALIGN_UP64(pGVAS->vaLimitInternal + 1, rootPdeCoverage) - 1;
3089     NvU32                     gfid;
3090 
3091     NV_ASSERT_OK_OR_RETURN(vgpuGetCallingContextGfid(pGpu, &gfid));
3092 
3093     //
3094     // For external VAS, create subcontext only after SetPageDirectory() call is made.
3095     // This will ensure that new PDB will be updated in all channels subcontext array.
3096     // bug 1805222 comment 11 for more details.
3097     //
3098     if (vaspaceIsExternallyOwned(pVAS))
3099     {
3100         MEMORY_DESCRIPTOR *pPDB = vaspaceGetPageDirBase(pVAS, pGpu);
3101         NV_ASSERT_OR_RETURN(pPDB == NULL, NV_ERR_INVALID_STATE);
3102     }
3103 
3104     switch (DRF_VAL(0080_CTRL_DMA_SET_PAGE_DIRECTORY, _FLAGS, _APERTURE, pParams->flags))
3105     {
3106         case NV0080_CTRL_DMA_SET_PAGE_DIRECTORY_FLAGS_APERTURE_VIDMEM:
3107             aperture     = ADDR_FBMEM;
3108             cpuCacheAttr = NV_MEMORY_UNCACHED;
3109             break;
3110         case NV0080_CTRL_DMA_SET_PAGE_DIRECTORY_FLAGS_APERTURE_SYSMEM_COH:
3111             aperture     = ADDR_SYSMEM;
3112             cpuCacheAttr = NV_MEMORY_CACHED;
3113             break;
3114         case NV0080_CTRL_DMA_SET_PAGE_DIRECTORY_FLAGS_APERTURE_SYSMEM_NONCOH:
3115             aperture     = ADDR_SYSMEM;
3116             cpuCacheAttr = NV_MEMORY_UNCACHED;
3117             break;
3118         default:
3119             NV_ASSERT_OR_RETURN(!"invalid aperture", NV_ERR_INVALID_ARGUMENT);
3120     }
3121 
3122     NV_ASSERT_OR_RETURN(!pGVAS->bIsMirrored, NV_ERR_NOT_SUPPORTED);
3123     // Ensure new page directory is not smaller than RM-managed region of the VA heap.
3124     vaLimitOld = pVAS->vasLimit;
3125     vaLimitNew = mmuFmtEntryIndexVirtAddrHi(pGpuState->pFmt->pRoot, 0, pParams->numEntries - 1);
3126 
3127     NV_ASSERT_OR_RETURN(vaLimitNew >= pGVAS->vaLimitInternal, NV_ERR_INVALID_ARGUMENT);
3128     NV_ASSERT_OR_RETURN(vaLimitNew <= pGVAS->vaLimitMax,      NV_ERR_INVALID_ARGUMENT);
3129 
3130     // We have to truncate this later so we check for overflow here
3131     NV_ASSERT_OR_RETURN((NvU64)pParams->numEntries * (NvU64)pGpuState->pFmt->pRoot->entrySize <= NV_U32_MAX,
3132                       NV_ERR_INVALID_ARGUMENT);
3133 
3134     // Describe the new page directory.
3135     rootSizeNew = (NvU64)pParams->numEntries * (NvU64)pGpuState->pFmt->pRoot->entrySize;
3136 
3137     if (pGVAS->bIsAtsEnabled)
3138     {
3139         NV_PRINTF(LEVEL_INFO, "PASID: %u\n", pParams->pasid);
3140         pGVAS->processAddrSpaceId = pParams->pasid;
3141     }
3142 
3143     NV_ASSERT_OR_RETURN((pGVAS->flags & VASPACE_FLAGS_SHARED_MANAGEMENT) || vaspaceIsExternallyOwned(pVAS),
3144                      NV_ERR_NOT_SUPPORTED);
3145 
3146     // If we have coherent cpu mapping, it is functionally required that we use direct BAR2 mappings
3147     if ((aperture == ADDR_SYSMEM) && pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING) &&
3148         !vaspaceIsExternallyOwned(pVAS))
3149     {
3150         NV_CHECK_OR_RETURN(LEVEL_ERROR, IS_GFID_PF(gfid), NV_ERR_INVALID_ARGUMENT);
3151 
3152         //
3153         // If the page tables are in sysmem, we need to explicitly state this is
3154         // allocated outside RM and need to register with OS layer so that RM
3155         // can map the memory later in the busMapRmAperture code path.
3156         //
3157         attr  = DRF_DEF(OS32, _ATTR, _PHYSICALITY, _CONTIGUOUS);
3158         attr2 = 0;
3159 
3160         status = RmDeprecatedConvertOs32ToOs02Flags(attr, attr2, 0, &os02Flags);
3161         NV_ASSERT_OR_GOTO(NV_OK == status, catch);
3162 
3163         status = osCreateMemFromOsDescriptor(pGpu, (NvP64)pParams->physAddress,
3164                                              hClient, os02Flags,
3165                                              &rootSizeNew, &pRootMemNew,
3166                                              NVOS32_DESCRIPTOR_TYPE_OS_PHYS_ADDR,
3167                                              RS_PRIV_LEVEL_KERNEL);
3168         NV_ASSERT_OR_GOTO(NV_OK == status, catch);
3169     }
3170     else
3171     {
3172         NvU32 flags = MEMDESC_FLAGS_NONE;
3173 
3174         if (IS_GFID_VF(gfid))
3175         {
3176             // In SRIOV-heavy PDB address is GPA
3177             NV_CHECK_OR_RETURN(LEVEL_ERROR, aperture == ADDR_FBMEM, NV_ERR_INVALID_ARGUMENT);
3178             flags |= MEMDESC_FLAGS_GUEST_ALLOCATED;
3179         }
3180 
3181         // TODO: PDB alignment
3182         status = memdescCreate(&pRootMemNew, pGpu, (NvU32)rootSizeNew, RM_PAGE_SIZE, NV_TRUE, aperture,
3183                            cpuCacheAttr, flags);
3184         NV_ASSERT_OR_GOTO(NV_OK == status, catch);
3185         memdescDescribe(pRootMemNew, aperture, pParams->physAddress, (NvU32)rootSizeNew);
3186         memdescSetPageSize(pRootMemNew, VAS_ADDRESS_TRANSLATION(pVAS), RM_PAGE_SIZE);
3187     }
3188 
3189     if (vaspaceIsExternallyOwned(pVAS))
3190     {
3191         //
3192         // For externally owned vaspace we will associate a PDB that is provided
3193         // by the owner of the vaspace. THis is different from the "shared management"
3194         // scenario because, in the externally owned case RM will not allocate any page tables
3195         // or VA for this address space. This is a way to make sure RM programs the correct PDB
3196         // when clients use this address space to create a channel.
3197         // TODO: Make externally owned vaspace a separate vaspace class.
3198         //
3199         status = _gvaspaceSetExternalPageDirBase(pGVAS, pGpu, pRootMemNew);
3200         return status;
3201     }
3202 
3203     // Acquire MMU walker user context (always released below in catch label).
3204     gvaspaceWalkUserCtxAcquire(pGVAS, pGpu, NULL, &userCtx);
3205     if (!bAllChannels)
3206     {
3207         // Specify single channel ID for which to update PDB if required by caller.
3208         userCtx.pChID = &pParams->chId;
3209     }
3210 
3211     //
3212     // If RM-internal page directory has not been saved yet, this is
3213     // the first external page directory committed.
3214     //
3215     if (bFirstCommit)
3216     {
3217         NvU32 rootSizeOld;
3218 
3219         //
3220         // Lock-down the root entries of the RM-internal VA range.
3221         // This forces the internal root page directory to be allocated if it
3222         // is not already.
3223         //
3224         status = mmuWalkReserveEntries(pGpuState->pWalk, pGpuState->pFmt->pRoot,
3225                                        vaInternalLo, vaInternalHi, NV_TRUE);
3226         NV_ASSERT_OR_GOTO(NV_OK == status, catch);
3227 
3228         // Save a reference to the RM-internal root for later revoke.
3229         mmuWalkGetPageLevelInfo(pGpuState->pWalk, pGpuState->pFmt->pRoot, 0,
3230                                 (const MMU_WALK_MEMDESC**)&pGpuState->pRootInternal,
3231                                 &rootSizeOld);
3232         NV_ASSERT(NULL != pGpuState->pRootInternal);
3233 
3234         // TODO: Proper refcount with memdesc cleanup - inverse of memdescFree/memdescDestroy.
3235         ++pGpuState->pRootInternal->RefCount;
3236         if (pGpuState->pRootInternal->Allocated > 0)
3237         {
3238             ++pGpuState->pRootInternal->Allocated;
3239         }
3240     }
3241 
3242     //
3243     // Invalidate MMU to kick out any entries associated with old PDB.
3244     // Because we're copying the PTE entry, we need to ensure all accesses
3245     // referring to the old entry are flushed, so we use PTE_DOWNGRADE here.
3246     //
3247     gvaspaceInvalidateTlb(pGVAS, pGpu, PTE_DOWNGRADE);
3248 
3249     //
3250     // Override callbacks for migration.
3251     // Copy and Fill callbacks are replaced to skip external entries.
3252     //
3253     pCb = mmuWalkGetCallbacks(pGpuState->pWalk);
3254     callbacks = *pCb;
3255     callbacks.CopyEntries = _gmmuWalkCBCopyEntries_SkipExternal;
3256     callbacks.FillEntries = _gmmuWalkCBFillEntries_SkipExternal;
3257     mmuWalkSetCallbacks(pGpuState->pWalk, &callbacks);
3258 
3259     // Track latest limit for PDB commit.
3260     pVAS->vasLimit = vaLimitNew;
3261 
3262     // Migrate root to the new memory.
3263     status = mmuWalkMigrateLevelInstance(pGpuState->pWalk, pGpuState->pFmt->pRoot, 0,
3264                                          (MMU_WALK_MEMDESC *)pRootMemNew, (NvU32)rootSizeNew,
3265                                          bIgnoreChannelBusy);
3266     NV_ASSERT_OR_GOTO(NV_OK == status, catch);
3267 
3268 catch:
3269     // Restore walker callbacks.
3270     if (NULL != pCb)
3271     {
3272         mmuWalkSetCallbacks(pGpuState->pWalk, pCb);
3273     }
3274     // Rollback on failure.
3275     if (NV_OK != status)
3276     {
3277         pVAS->vasLimit = vaLimitOld;
3278         if (bFirstCommit)
3279         {
3280             if (NULL != pGpuState->pRootInternal)
3281             {
3282                 memdescFree(pGpuState->pRootInternal);
3283                 memdescDestroy(pGpuState->pRootInternal);
3284                 pGpuState->pRootInternal = NULL;
3285 
3286                 mmuWalkReleaseEntries(pGpuState->pWalk, pGpuState->pFmt->pRoot,
3287                                       vaInternalLo, vaInternalHi);
3288             }
3289         }
3290         memdescDestroy(pRootMemNew);
3291         pRootMemNew = NULL;
3292     }
3293     // Release MMU walker user context.
3294     gvaspaceWalkUserCtxRelease(pGVAS, &userCtx);
3295 
3296     return status;
3297 }
3298 
3299 NV_STATUS
3300 gvaspaceExternalRootDirRevoke_IMPL
3301 (
3302     OBJGVASPACE                                 *pGVAS,
3303     OBJGPU                                      *pGpu,
3304     NV0080_CTRL_DMA_UNSET_PAGE_DIRECTORY_PARAMS *pParams
3305 )
3306 {
3307     OBJVASPACE               *pVAS        = staticCast(pGVAS, OBJVASPACE);
3308     NV_STATUS                 status      = NV_OK;
3309     MEMORY_DESCRIPTOR        *pRootMemNew = NULL;
3310     NvU32                     rootSizeNew;
3311     const MMU_WALK_CALLBACKS *pCb = NULL;
3312     MMU_WALK_CALLBACKS        callbacks;
3313     MMU_WALK_USER_CTX         userCtx = {0};
3314     GVAS_GPU_STATE           *pGpuState = gvaspaceGetGpuState(pGVAS, pGpu);
3315     const NvU64               rootPdeCoverage = mmuFmtLevelPageSize(pGpuState->pFmt->pRoot);
3316     const NvU64               vaInternalLo = NV_ALIGN_DOWN64(pVAS->vasStart,           rootPdeCoverage);
3317     const NvU64               vaInternalHi = NV_ALIGN_UP64(pGVAS->vaLimitInternal + 1, rootPdeCoverage) - 1;
3318 
3319     if (vaspaceIsExternallyOwned(pVAS))
3320     {
3321         MEMORY_DESCRIPTOR *pExternalPDB = NULL;
3322 
3323         // get the PDB
3324         pExternalPDB = vaspaceGetPageDirBase(pVAS, pGpu);
3325         memdescDestroy(pExternalPDB);
3326         pExternalPDB = NULL;
3327         status = _gvaspaceSetExternalPageDirBase(pGVAS, pGpu, pExternalPDB);
3328         return status;
3329     }
3330 
3331     NV_ASSERT_OR_RETURN(!pGVAS->bIsMirrored, NV_ERR_NOT_SUPPORTED);
3332     NV_ASSERT_OR_RETURN(NULL != pGpuState->pRootInternal, NV_ERR_INVALID_STATE);
3333 
3334     pRootMemNew = pGpuState->pRootInternal;
3335     rootSizeNew = (NvU32)pRootMemNew->Size;
3336 
3337     //
3338     // Invalidate MMU to kick out old entries before changing PDBs.
3339     // Because we're copying the PTE entry, we need to ensure all accesses
3340     // referring to the old entry are flushed, so we use PTE_DOWNGRADE here.
3341     //
3342     gvaspaceInvalidateTlb(pGVAS, pGpu, PTE_DOWNGRADE);
3343 
3344     // Acquire walker user context.
3345     gvaspaceWalkUserCtxAcquire(pGVAS, pGpu, NULL, &userCtx);
3346 
3347     //
3348     // Override callbacks for migration.
3349     // Copy and Fill callbacks are replaced to skip external entries.
3350     //
3351     pCb = mmuWalkGetCallbacks(pGpuState->pWalk);
3352     callbacks = *pCb;
3353     callbacks.CopyEntries = _gmmuWalkCBCopyEntries_SkipExternal;
3354     callbacks.FillEntries = _gmmuWalkCBFillEntries_SkipExternal;
3355     mmuWalkSetCallbacks(pGpuState->pWalk, &callbacks);
3356 
3357     // Restore limit for PDB commit.
3358     pVAS->vasLimit = pGVAS->vaLimitInternal;
3359 
3360     // Migrate root to the new memory.
3361     status = mmuWalkMigrateLevelInstance(pGpuState->pWalk, pGpuState->pFmt->pRoot, 0,
3362                                          (MMU_WALK_MEMDESC *)pRootMemNew, rootSizeNew, NV_FALSE);
3363     NV_ASSERT(NV_OK == status);
3364 
3365     // RM-internal root ownership has transferred back to walker.
3366     pGpuState->pRootInternal = NULL;
3367 
3368     // Release locked-down internal root entries.
3369     status = mmuWalkReleaseEntries(pGpuState->pWalk, pGpuState->pFmt->pRoot,
3370                                    vaInternalLo, vaInternalHi);
3371     NV_ASSERT(NV_OK == status);
3372 
3373     // Restore callbacks.
3374     mmuWalkSetCallbacks(pGpuState->pWalk, pCb);
3375 
3376     // Release walker user context.
3377     gvaspaceWalkUserCtxRelease(pGVAS, &userCtx);
3378 
3379     // No possible response to failure - above asserts are best we can do.
3380     return NV_OK;
3381 }
3382 
3383 NV_STATUS
3384 gvaspaceResize_IMPL
3385 (
3386     OBJGVASPACE                              *pGVAS,
3387     NV0080_CTRL_DMA_SET_VA_SPACE_SIZE_PARAMS *pParams
3388 )
3389 {
3390     OBJGPU         *pGpu = NULL;
3391     OBJVASPACE     *pVAS = staticCast(pGVAS, OBJVASPACE);
3392     NvU64           vaLimitNew;
3393     NV_STATUS       status = NV_OK;
3394 
3395     NV_ASSERT_OR_RETURN(!pGVAS->bIsMirrored, NV_ERR_NOT_SUPPORTED);
3396     NV_ASSERT_OR_RETURN(!(pGVAS->flags & VASPACE_FLAGS_SHARED_MANAGEMENT),
3397                      NV_ERR_NOT_SUPPORTED);
3398 
3399     // Calculate and check new VA range size (limit + 1).
3400     if (NV0080_CTRL_DMA_SET_VA_SPACE_SIZE_MAX == pParams->vaSpaceSize)
3401     {
3402         vaLimitNew = pGVAS->vaLimitMax;
3403     }
3404     else
3405     {
3406         vaLimitNew = pParams->vaSpaceSize - 1;
3407     }
3408 
3409     // Abort early if not changing the size.
3410     if (vaLimitNew == pVAS->vasLimit)
3411     {
3412         goto done;
3413     }
3414 
3415     // Shrinking VAS space is not currently supported.
3416     NV_ASSERT_OR_RETURN(vaLimitNew >= pVAS->vasLimit,    NV_ERR_INVALID_LIMIT);
3417     NV_ASSERT_OR_RETURN(vaLimitNew <= pGVAS->vaLimitMax, NV_ERR_INVALID_LIMIT);
3418 
3419     if (gvaspaceIsInternalVaRestricted(pGVAS))
3420     {
3421         // This is not supported because, VASPACE_SHARED_MANAGEMENT supported
3422         // clients use Set/Revoke Root Page Dir to expand/shrink their VAs.
3423         // I do not find documented use case for this.
3424         NV_PRINTF(LEVEL_ERROR, "doesn't support clientVA expansion\n");
3425         NV_ASSERT(0);
3426     }
3427 
3428 
3429     // Commit new limit.
3430     pVAS->vasLimit         = vaLimitNew;
3431     pGVAS->vaLimitInternal = vaLimitNew;
3432 
3433     // Shrink the top VA region reserved for growth.
3434     pGVAS->pHeap->eheapFree(pGVAS->pHeap, pGVAS->vaLimitMax);
3435     NV_ASSERT_OK_OR_RETURN(_gvaspaceReserveTopForGrowth(pGVAS));
3436 
3437     // Loop over each GPU associated with the VAS.
3438     FOR_EACH_GPU_IN_MASK_UC(32, pSys, pGpu, pVAS->gpuMask)
3439     {
3440         GVAS_GPU_STATE           *pGpuState = gvaspaceGetGpuState(pGVAS, pGpu);
3441         MMU_WALK_USER_CTX         userCtx  = {0};
3442         const MMU_WALK_CALLBACKS *pCb      = mmuWalkGetCallbacks(pGpuState->pWalk);
3443         const MMU_FMT_LEVEL      *pRootFmt = pGpuState->pFmt->pRoot;
3444         MMU_WALK_MEMDESC         *pRootMem = NULL;
3445         NvU32                     rootSize = 0;
3446         NvBool                    bChanged = NV_FALSE;
3447 
3448         // If root has not been allocated yet it will use the new limit later.
3449         if (NULL == gvaspaceGetPageDirBase(pGVAS, pGpu))
3450         {
3451             goto doneGpu;
3452         }
3453 
3454         // Acquire walker context.
3455         gvaspaceWalkUserCtxAcquire(pGVAS, pGpu, NULL, &userCtx);
3456 
3457         status = mmuWalkGetPageLevelInfo(pGpuState->pWalk, pRootFmt, 0,
3458                                          (const MMU_WALK_MEMDESC**)&pRootMem, &rootSize);
3459         NV_ASSERT_OR_GOTO(NV_OK == status, doneGpu);
3460 
3461         //
3462         // Allocate new root manually. If realloc is not needed, LevelAlloc()
3463         // will retain the existing PDB.
3464         //
3465         status = pCb->LevelAlloc(&userCtx, pRootFmt, 0, vaLimitNew, NV_TRUE,
3466                                  &pRootMem, &rootSize, &bChanged);
3467         NV_ASSERT_OR_GOTO(NV_OK == status, doneGpu);
3468 
3469         // Migrate to new root (if reallocated), updating PDB and VA limit accordingly.
3470         if (bChanged)
3471         {
3472             //
3473             // Invalidate MMU to kick out any entries associated with old PDB.
3474             // Because we're copying the PTE entry, we need to ensure all accesses
3475             // referring to the old entry are flushed, so we use PTE_DOWNGRADE here.
3476             //
3477             gvaspaceInvalidateTlb(pGVAS, pGpu, PTE_DOWNGRADE);
3478 
3479             status = mmuWalkMigrateLevelInstance(pGpuState->pWalk, pRootFmt, 0,
3480                                                  pRootMem, rootSize, NV_FALSE);
3481             NV_ASSERT_OR_GOTO(NV_OK == status, doneGpu);
3482         }
3483         else
3484         {
3485             NvBool bDone;
3486 
3487             // Trigger an UpdatePdb to commit the new vaLimit to HW.
3488             bDone = pCb->UpdatePdb(&userCtx, pRootFmt, (const MMU_WALK_MEMDESC *)pRootMem,
3489                                    NV_FALSE);
3490             if (!bDone)
3491             {
3492                 status = NV_ERR_INVALID_STATE;
3493                 NV_ASSERT_OR_GOTO(bDone, doneGpu);
3494             }
3495         }
3496 
3497 doneGpu:
3498         if ((NV_OK != status) && (NULL != pRootMem))
3499         {
3500             pCb->LevelFree(&userCtx, pRootFmt, 0, pRootMem);
3501         }
3502         // Release walker context.
3503         if (NULL != userCtx.pGpu)
3504         {
3505             gvaspaceWalkUserCtxRelease(pGVAS, &userCtx);
3506         }
3507         if (NV_OK != status)
3508         {
3509             break;
3510         }
3511     }
3512     FOR_EACH_GPU_IN_MASK_UC_END
3513 
3514 done:
3515     if (NV_OK == status)
3516     {
3517         // On success, return usable VA space size.
3518         pParams->vaSpaceSize = pGVAS->vaLimitInternal - pVAS->vasStart + 1;
3519     }
3520 
3521     return status;
3522 }
3523 
3524 struct MMU_MAP_ITERATOR
3525 {
3526     GMMU_ENTRY_VALUE entry;
3527 };
3528 
3529 static void
3530 _gmmuWalkCBMapSingleEntry
3531 (
3532     MMU_WALK_USER_CTX        *pUserCtx,
3533     const MMU_MAP_TARGET     *pTarget,
3534     const MMU_WALK_MEMDESC   *pLevelMem,
3535     const NvU32               entryIndexLo,
3536     const NvU32               entryIndexHi,
3537     NvU32                    *pProgress
3538 )
3539 {
3540     OBJGPU              *pGpu           = pUserCtx->pGpu;
3541     MemoryManager       *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
3542     MMU_MAP_ITERATOR    *pIter          = pTarget->pIter;
3543     MEMORY_DESCRIPTOR   *pMemDesc       = (MEMORY_DESCRIPTOR*)pLevelMem;
3544     TRANSFER_SURFACE     surf           = {0};
3545 
3546     NV_PRINTF(LEVEL_INFO, "[GPU%u]: PA 0x%llX, Entries 0x%X-0x%X\n",
3547               pUserCtx->pGpu->gpuInstance,
3548               memdescGetPhysAddr(pMemDesc, AT_GPU, 0), entryIndexLo,
3549               entryIndexHi);
3550 
3551     NV_ASSERT_OR_RETURN_VOID(entryIndexLo == entryIndexHi);
3552 
3553     surf.pMemDesc = pMemDesc;
3554     surf.offset = entryIndexLo * pTarget->pLevelFmt->entrySize;
3555 
3556     NV_ASSERT_OR_RETURN_VOID(memmgrMemWrite(pMemoryManager, &surf,
3557                                             pIter->entry.v8,
3558                                             pTarget->pLevelFmt->entrySize,
3559                                             TRANSFER_FLAGS_NONE) == NV_OK);
3560 
3561     *pProgress = entryIndexHi - entryIndexLo + 1;
3562 }
3563 
3564 NV_STATUS
3565 gvaspaceUpdatePde2_IMPL
3566 (
3567     OBJGVASPACE                         *pGVAS,
3568     OBJGPU                              *pGpu,
3569     NV0080_CTRL_DMA_UPDATE_PDE_2_PARAMS *pParams
3570 )
3571 {
3572     MMU_WALK_USER_CTX    userCtx   = {0};
3573     MMU_MAP_TARGET       mapTarget = {0};
3574     MMU_MAP_ITERATOR     mapIter;
3575     NvU32                numValidPTs = 0;
3576     GVAS_GPU_STATE      *pGpuState = gvaspaceGetGpuState(pGVAS, pGpu);
3577     const GMMU_FMT      *pFmt      = pGpuState->pFmt;
3578     const MMU_FMT_LEVEL *pPTBig;
3579     const MMU_FMT_LEVEL *pPT4KB;
3580     NvU32                bigIdx;
3581     NvU32                ptIdx;
3582     const NvBool         bSparse = FLD_TEST_DRF(0080_CTRL_DMA_UPDATE_PDE_2, _FLAGS,
3583                                                 _SPARSE, _TRUE, pParams->flags);
3584     NvU8                *pPdeBuffer = KERNEL_POINTER_FROM_NvP64(NvU8*, pParams->pPdeBuffer);
3585 
3586     portMemSet(&mapIter, 0, sizeof(mapIter));
3587 
3588     // Lookup leaf page table formats.
3589     pPTBig = mmuFmtFindLevelWithPageShift(pFmt->pRoot,
3590                                           BIT_IDX_64(gvaspaceGetBigPageSize(pGVAS)));
3591     pPT4KB = mmuFmtFindLevelWithPageShift(pFmt->pRoot, 12);
3592 
3593     NV_ASSERT_OR_RETURN(NULL != pPTBig, NV_ERR_NOT_SUPPORTED);
3594     NV_ASSERT_OR_RETURN(NULL != pPT4KB, NV_ERR_NOT_SUPPORTED);
3595 
3596     // Setup map target.
3597     mapTarget.pLevelFmt      = mmuFmtFindLevelParent(pFmt->pRoot, pPTBig, &bigIdx);
3598     mapTarget.pIter          = &mapIter;
3599     mapTarget.MapNextEntries = _gmmuWalkCBMapSingleEntry;
3600 
3601     NV_ASSERT_OR_RETURN(NULL != mapTarget.pLevelFmt,            NV_ERR_NOT_SUPPORTED);
3602     NV_ASSERT_OR_RETURN(2 == mapTarget.pLevelFmt->numSubLevels, NV_ERR_NOT_SUPPORTED);
3603 
3604     // Setup PDE value.
3605     for (ptIdx = 0; ptIdx < NV0080_CTRL_DMA_UPDATE_PDE_2_PT_IDX__SIZE; ++ptIdx)
3606     {
3607         NV0080_CTRL_DMA_UPDATE_PDE_2_PAGE_TABLE_PARAMS *pPtParams = &pParams->ptParams[ptIdx];
3608         const GMMU_FMT_PDE                             *pPdeFmt;
3609         GMMU_APERTURE                                   aperture;
3610 
3611         // Select PDE format.
3612         switch (ptIdx)
3613         {
3614             case NV0080_CTRL_DMA_UPDATE_PDE_2_PT_IDX_SMALL:
3615                 pPdeFmt = gmmuFmtGetPde(pFmt, mapTarget.pLevelFmt, !bigIdx);
3616                 break;
3617             case NV0080_CTRL_DMA_UPDATE_PDE_2_PT_IDX_BIG:
3618                 pPdeFmt = gmmuFmtGetPde(pFmt, mapTarget.pLevelFmt, bigIdx);
3619                 break;
3620             default:
3621                 NV_ASSERT_OR_RETURN(0, NV_ERR_NOT_SUPPORTED);
3622         }
3623 
3624         // Translate aperture field.
3625         switch (pPtParams->aperture)
3626         {
3627             case NV0080_CTRL_DMA_UPDATE_PDE_2_PT_APERTURE_INVALID:
3628                 aperture = GMMU_APERTURE_INVALID;
3629                 break;
3630             case NV0080_CTRL_DMA_UPDATE_PDE_2_PT_APERTURE_VIDEO_MEMORY:
3631                 aperture = GMMU_APERTURE_VIDEO;
3632                 break;
3633             case NV0080_CTRL_DMA_UPDATE_PDE_2_PT_APERTURE_SYSTEM_COHERENT_MEMORY:
3634                 aperture = GMMU_APERTURE_SYS_COH;
3635                 break;
3636             case NV0080_CTRL_DMA_UPDATE_PDE_2_PT_APERTURE_SYSTEM_NON_COHERENT_MEMORY:
3637                 aperture = GMMU_APERTURE_SYS_NONCOH;
3638                 break;
3639             default:
3640                 NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT);
3641         }
3642         gmmuFieldSetAperture(&pPdeFmt->fldAperture, aperture,
3643                              mapIter.entry.v8);
3644 
3645         if (GMMU_APERTURE_INVALID != aperture)
3646         {
3647             KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
3648             MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
3649 
3650             gmmuFieldSetAddress(gmmuFmtPdePhysAddrFld(pPdeFmt, aperture),
3651                 kgmmuEncodePhysAddr(pKernelGmmu, aperture, pPtParams->physAddr,
3652                     NVLINK_INVALID_FABRIC_ADDR),
3653                 mapIter.entry.v8);
3654 
3655             if (pFmt->version == GMMU_FMT_VERSION_3)
3656             {
3657                 NvU32                 pdePcfHw = 0;
3658                 NvU32                 pdePcfSw = 0;
3659                 PMEMORY_DESCRIPTOR    pMemDesc = NULL;
3660                 NvU32                 memSize  = 0;
3661                 GMMU_ENTRY_VALUE      pde      = {{0}};
3662                 GMMU_APERTURE         currAperture;
3663                 TRANSFER_SURFACE      surf     = {0};
3664 
3665                 NV_ASSERT_OK_OR_RETURN(
3666                     mmuWalkGetPageLevelInfo(pGpuState->pWalk, mapTarget.pLevelFmt,
3667                         (pParams->pdeIndex * mmuFmtLevelPageSize(mapTarget.pLevelFmt)),
3668                             (const MMU_WALK_MEMDESC**)&pMemDesc, &memSize));
3669 
3670                 surf.pMemDesc = pMemDesc;
3671                 surf.offset = pParams->pdeIndex * mapTarget.pLevelFmt->entrySize;
3672 
3673                 NV_ASSERT_OK_OR_RETURN(memmgrMemRead(pMemoryManager, &surf,
3674                                                      pde.v8,
3675                                                      mapTarget.pLevelFmt->entrySize,
3676                                                      TRANSFER_FLAGS_DEFER_FLUSH));
3677 
3678                 pdePcfHw = nvFieldGet32(&pPdeFmt->fldPdePcf, pde.v8);
3679                 currAperture = gmmuFieldGetAperture(&pPdeFmt->fldAperture, pde.v8);
3680 
3681                 if (currAperture != GMMU_APERTURE_INVALID)
3682                 {
3683                     NV_ASSERT_OR_RETURN(
3684                        (kgmmuTranslatePdePcfFromHw_HAL(pKernelGmmu, pdePcfHw, currAperture, &pdePcfSw) == NV_OK), NV_ERR_INVALID_ARGUMENT);
3685                     pdePcfSw |= 1 << SW_MMU_PCF_UNCACHED_IDX;
3686                 }
3687                 else
3688                 {
3689                     pdePcfSw = 1 << SW_MMU_PCF_UNCACHED_IDX;
3690                 }
3691                 NV_ASSERT_OR_RETURN(
3692                     (kgmmuTranslatePdePcfFromSw_HAL(pKernelGmmu, pdePcfSw, &pdePcfHw) == NV_OK), NV_ERR_INVALID_ARGUMENT);
3693                 nvFieldSet32(&pPdeFmt->fldPdePcf, pdePcfHw, mapIter.entry.v8);
3694             }
3695             else
3696             {
3697                 nvFieldSetBool(&pPdeFmt->fldVolatile, NV_TRUE, mapIter.entry.v8);
3698             }
3699             numValidPTs++;
3700         }
3701     }
3702 
3703     if (0 == numValidPTs)
3704     {
3705         // Check for sparse flags and invalid PDEs
3706         if (bSparse)
3707         {
3708             KernelGmmu            *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
3709             const GMMU_FMT_FAMILY *pFam        = kgmmuFmtGetFamily(pKernelGmmu, pFmt->version);
3710             mapIter.entry = pFam->sparsePdeMulti;
3711         }
3712     }
3713     else
3714     {
3715         const NvU32 sizeFracExp =
3716             DRF_VAL(0080_CTRL_DMA_UPDATE_PDE_2, _FLAGS, _PDE_SIZE, pParams->flags);
3717 
3718         NV_ASSERT_OR_RETURN(!bSparse, NV_ERR_INVALID_ARGUMENT);
3719 
3720         // Translate PDE_SIZE field.
3721         if (sizeFracExp > 0)
3722         {
3723             NV_ASSERT_OR_RETURN(nvFieldIsValid32(&pFmt->pPdeMulti->fldSizeRecipExp),
3724                              NV_ERR_INVALID_ARGUMENT);
3725             nvFieldSet32(&pFmt->pPdeMulti->fldSizeRecipExp, sizeFracExp, mapIter.entry.v8);
3726         }
3727     }
3728 
3729     if (NULL != pPdeBuffer)
3730     {
3731         // Copy value to user supplied buffer if provided.
3732         portMemCopy(pPdeBuffer, mapTarget.pLevelFmt->entrySize,
3733                     mapIter.entry.v8, mapTarget.pLevelFmt->entrySize);
3734     }
3735     else
3736     {
3737         OBJVASPACE  *pVAS = staticCast(pGVAS, OBJVASPACE);
3738         const NvU64  vaLo = pParams->pdeIndex * mmuFmtLevelPageSize(mapTarget.pLevelFmt);
3739         const NvU64  vaHi = vaLo + mmuFmtEntryVirtAddrMask(mapTarget.pLevelFmt);
3740         NV_STATUS    status;
3741 
3742         NV_ASSERT_OR_RETURN(vaLo >= pVAS->vasStart,         NV_ERR_INVALID_ARGUMENT);
3743         NV_ASSERT_OR_RETURN(vaHi <= pGVAS->vaLimitInternal, NV_ERR_INVALID_ARGUMENT);
3744 
3745         // Call walker to map the PDE.
3746         gvaspaceWalkUserCtxAcquire(pGVAS, pGpu, NULL, &userCtx);
3747 
3748         if (NULL == userCtx.pGpuState)
3749         {
3750             status = NV_ERR_INVALID_STATE;
3751             NV_ASSERT(0);
3752         }
3753         else
3754         {
3755             status = mmuWalkMap(userCtx.pGpuState->pWalk, vaLo, vaHi, &mapTarget);
3756         }
3757         gvaspaceWalkUserCtxRelease(pGVAS, &userCtx);
3758         NV_ASSERT_OR_RETURN(NV_OK == status, status);
3759 
3760         // Flush TLB if requested.
3761         if (FLD_TEST_DRF(0080_CTRL_DMA_UPDATE_PDE_2, _FLAGS, _FLUSH_PDE_CACHE, _TRUE,
3762                          pParams->flags))
3763         {
3764             gvaspaceInvalidateTlb(pGVAS, pGpu, PTE_UPGRADE);
3765         }
3766     }
3767 
3768     return NV_OK;
3769 }
3770 
3771 void
3772 gvaspaceWalkUserCtxAcquire_IMPL
3773 (
3774     OBJGVASPACE       *pGVAS,
3775     OBJGPU            *pGpu,
3776     const GVAS_BLOCK  *pVASBlock,
3777     MMU_WALK_USER_CTX *pUserCtx
3778 )
3779 {
3780     // Must be UC.
3781     NV_ASSERT(!gpumgrGetBcEnabledStatus(pGpu));
3782 
3783     pUserCtx->pGVAS     = pGVAS;
3784     pUserCtx->pGpu      = pGpu;
3785     pUserCtx->pGpuState = gvaspaceGetGpuState(pGVAS, pGpu);
3786     pUserCtx->pBlock    = pVASBlock;
3787 
3788     // The following two asserts were added for a rare issue hit during eGPU surprise disconnect on Mac
3789     NV_ASSERT_OR_RETURN_VOID(pUserCtx->pGpuState != NULL);
3790     NV_ASSERT_OR_RETURN_VOID(pUserCtx->pGpuState->pWalk != NULL);
3791 
3792     // If current context is non-NULL, a previous release was missed.
3793     NV_ASSERT(NULL == mmuWalkGetUserCtx(pUserCtx->pGpuState->pWalk));
3794     NV_ASSERT_OK(mmuWalkSetUserCtx(pUserCtx->pGpuState->pWalk, pUserCtx));
3795 }
3796 
3797 void
3798 gvaspaceWalkUserCtxRelease_IMPL
3799 (
3800     OBJGVASPACE       *pGVAS,
3801     MMU_WALK_USER_CTX *pUserCtx
3802 )
3803 {
3804     // If current context doesn't match, must have nested acquires (not allowed).
3805     NV_ASSERT_OR_RETURN_VOID(pUserCtx->pGpuState);
3806     NV_ASSERT_OR_RETURN_VOID(pUserCtx->pGpuState->pWalk);
3807     NV_ASSERT(pUserCtx == mmuWalkGetUserCtx(pUserCtx->pGpuState->pWalk));
3808     NV_ASSERT_OK(mmuWalkSetUserCtx(pUserCtx->pGpuState->pWalk, NULL));
3809 }
3810 
3811 NV_STATUS
3812 gvaspaceIncChanGrpRefCnt_IMPL
3813 (
3814     OBJGVASPACE        *pGVAS,
3815     KernelChannelGroup *pKernelChannelGroup
3816 )
3817 {
3818     NvU32 *pChanGrpRefCnt;
3819 
3820     NV_ASSERT_OR_RETURN(pGVAS != NULL, NV_ERR_INVALID_ARGUMENT);
3821     NV_ASSERT_OR_RETURN(pKernelChannelGroup != NULL, NV_ERR_INVALID_ARGUMENT);
3822 
3823     pChanGrpRefCnt = mapFind(&pGVAS->chanGrpMap,
3824                              (NvU64)NV_PTR_TO_NvP64(pKernelChannelGroup));
3825 
3826     if (pChanGrpRefCnt != NULL)
3827     {
3828         (*pChanGrpRefCnt)++;
3829     }
3830     else
3831     {
3832         pChanGrpRefCnt = mapInsertNew(&pGVAS->chanGrpMap,
3833             (NvU64)NV_PTR_TO_NvP64(pKernelChannelGroup));
3834         NV_ASSERT_OR_RETURN(pChanGrpRefCnt != NULL, NV_ERR_INVALID_STATE);
3835         NV_PRINTF(LEVEL_INFO, "ChanGrp 0x%x on runlist 0x%x registered.\n",
3836                   pKernelChannelGroup->grpID, pKernelChannelGroup->runlistId);
3837         *pChanGrpRefCnt = 1;
3838     }
3839 
3840     NV_PRINTF(LEVEL_INFO,
3841               "ChanGrp 0x%x on runlist 0x%x refCnt increased to 0x%x\n",
3842               pKernelChannelGroup->grpID,
3843               pKernelChannelGroup->runlistId,
3844               *pChanGrpRefCnt);
3845 
3846     return NV_OK;
3847 }
3848 
3849 NV_STATUS
3850 gvaspaceDecChanGrpRefCnt_IMPL
3851 (
3852     OBJGVASPACE        *pGVAS,
3853     KernelChannelGroup *pKernelChannelGroup
3854 )
3855 {
3856     NvU32 *pChanGrpRefCnt;
3857 
3858     NV_ASSERT_OR_RETURN(pGVAS != NULL, NV_ERR_INVALID_ARGUMENT);
3859     NV_ASSERT_OR_RETURN(pKernelChannelGroup != NULL, NV_ERR_INVALID_ARGUMENT);
3860 
3861     pChanGrpRefCnt = mapFind(&pGVAS->chanGrpMap,
3862                              (NvU64)NV_PTR_TO_NvP64(pKernelChannelGroup));
3863 
3864     NV_ASSERT_OR_RETURN(pChanGrpRefCnt != NULL, NV_ERR_INVALID_STATE);
3865     NV_ASSERT_OR_RETURN(*pChanGrpRefCnt != 0, NV_ERR_INVALID_STATE);
3866 
3867     (*pChanGrpRefCnt)--;
3868 
3869     NV_PRINTF(LEVEL_INFO,
3870               "ChanGrp 0x%x on runlist 0x%x refCnt decreased to 0x%x\n",
3871               pKernelChannelGroup->grpID,
3872               pKernelChannelGroup->runlistId,
3873               *pChanGrpRefCnt);
3874 
3875     if (*pChanGrpRefCnt == 0)
3876     {
3877         mapRemove(&pGVAS->chanGrpMap, pChanGrpRefCnt);
3878         NV_PRINTF(LEVEL_INFO, "ChanGrp 0x%x on runlist 0x%x unregistered.\n",
3879                   pKernelChannelGroup->grpID, pKernelChannelGroup->runlistId);
3880     }
3881 
3882     return NV_OK;
3883 }
3884 
3885 NvU32
3886 gvaspaceGetChanGrpRefCnt_IMPL
3887 (
3888     OBJGVASPACE        *pGVAS,
3889     KernelChannelGroup *pKernelChannelGroup
3890 )
3891 {
3892     NvU32 refCnt = 0;
3893     NvU32 *pChanGrpRefCnt;
3894 
3895     NV_ASSERT_OR_RETURN(pGVAS != NULL, NV_ERR_INVALID_ARGUMENT);
3896     NV_ASSERT_OR_RETURN(pKernelChannelGroup != NULL, NV_ERR_INVALID_ARGUMENT);
3897 
3898     pChanGrpRefCnt = mapFind(&pGVAS->chanGrpMap,
3899                              (NvU64)NV_PTR_TO_NvP64(pKernelChannelGroup));
3900 
3901     if (pChanGrpRefCnt != NULL)
3902     {
3903         refCnt = *pChanGrpRefCnt;
3904     }
3905     else
3906     {
3907         NV_PRINTF(LEVEL_INFO,
3908                   "grpID 0x%x on runlistId 0x%x not registered to the VAS\n",
3909                   pKernelChannelGroup->grpID,
3910                   pKernelChannelGroup->runlistId);
3911     }
3912 
3913     return refCnt;
3914 }
3915 
3916 NV_STATUS
3917 gvaspaceCheckChanGrpRefCnt_IMPL
3918 (
3919     OBJGVASPACE        *pGVAS,
3920     KernelChannelGroup *pKernelChannelGroup
3921 )
3922 {
3923     // @todo Implement
3924     return NV_OK;
3925 }
3926 
3927 NV_STATUS
3928 gvaspaceUnregisterAllChanGrps_IMPL
3929 (
3930     OBJGVASPACE *pGVAS,
3931     OBJGPU      *pGpu
3932 )
3933 {
3934     KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu);
3935 
3936     // TODO: This function should be made Physcial only
3937     if(IS_GSP_CLIENT(pGpu) || IS_VIRTUAL(pGpu))
3938     {
3939         return NV_OK;
3940     }
3941 
3942     if (!kfifoIsSubcontextSupported(pKernelFifo))
3943     {
3944         return NV_OK;
3945     }
3946 
3947     return NV_ERR_NOT_SUPPORTED;
3948 }
3949 
3950 NV_STATUS
3951 gvaspaceGetPageLevelInfo_IMPL
3952 (
3953     OBJGVASPACE                                    *pGVAS,
3954     OBJGPU                                         *pGpu,
3955     NV90F1_CTRL_VASPACE_GET_PAGE_LEVEL_INFO_PARAMS *pParams
3956 )
3957 {
3958     OBJVASPACE          *pVAS       = staticCast(pGVAS, OBJVASPACE);
3959     MMU_WALK            *pWalk      = NULL;
3960     const MMU_FMT_LEVEL *pLevelFmt  = NULL;
3961     const MMU_FMT_LEVEL *pTargetFmt = NULL;
3962     NvU32                level      = 0;
3963     NvU32                sublevel   = 0;
3964     GVAS_GPU_STATE      *pGpuState;
3965 
3966     if (NULL == pGVAS->pGpuStates)
3967     {
3968         // TODO: VMM must be enabled - remove once default.
3969         return NV_ERR_NOT_SUPPORTED;
3970     }
3971 
3972     pGpuState  = gvaspaceGetGpuState(pGVAS, pGpu);
3973     NV_ASSERT_OR_RETURN(NULL != pGpuState, NV_ERR_INVALID_ARGUMENT);
3974 
3975     pWalk      = pGpuState->pWalk;
3976     pLevelFmt  = pGpuState->pFmt->pRoot;
3977 
3978     pTargetFmt = mmuFmtFindLevelWithPageShift(pLevelFmt, BIT_IDX_64(pParams->pageSize));
3979     NV_ASSERT_OR_RETURN(NULL != pTargetFmt, NV_ERR_INVALID_ARGUMENT);
3980 
3981     for (level = 0; NULL != pLevelFmt; ++level)
3982     {
3983         MEMORY_DESCRIPTOR *pMemDesc = NULL;
3984         NvU32              memSize  = 0;
3985 
3986         NV_ASSERT_OR_RETURN(level < GMMU_FMT_MAX_LEVELS, NV_ERR_INVALID_STATE);
3987 
3988         NV_ASSERT_OK_OR_RETURN(
3989             mmuWalkGetPageLevelInfo(pWalk, pLevelFmt, pParams->virtAddress,
3990                                     (const MMU_WALK_MEMDESC**)&pMemDesc, &memSize));
3991         if (NULL == pMemDesc)
3992         {
3993             break;
3994         }
3995 
3996         pParams->levels[level].pFmt = (MMU_FMT_LEVEL *) pLevelFmt;
3997         pParams->levels[level].size = memSize;
3998 
3999         // Copy level formats
4000         portMemCopy((void *)&(pParams->levels[level].levelFmt), sizeof(MMU_FMT_LEVEL), (void *)pLevelFmt, sizeof(MMU_FMT_LEVEL));
4001 
4002         for (sublevel = 0; (sublevel < MMU_FMT_MAX_SUB_LEVELS) && (sublevel < pLevelFmt->numSubLevels); sublevel++)
4003         {
4004             portMemCopy((void *)&(pParams->levels[level].sublevelFmt[sublevel]), sizeof(MMU_FMT_LEVEL), (void *)(pLevelFmt->subLevels + sublevel), sizeof(MMU_FMT_LEVEL));
4005         }
4006 
4007         pParams->levels[level].physAddress =
4008             memdescGetPhysAddr(pMemDesc, VAS_ADDRESS_TRANSLATION(pVAS), 0);
4009 
4010         switch (memdescGetAddressSpace(pMemDesc))
4011         {
4012             case ADDR_FBMEM:
4013                 pParams->levels[level].aperture = GMMU_APERTURE_VIDEO;
4014                 break;
4015             case ADDR_SYSMEM:
4016                 if (NV_MEMORY_CACHED == memdescGetCpuCacheAttrib(pMemDesc))
4017                 {
4018                     pParams->levels[level].aperture = GMMU_APERTURE_SYS_COH;
4019                 }
4020                 else
4021                 {
4022                     pParams->levels[level].aperture = GMMU_APERTURE_SYS_NONCOH;
4023                 }
4024                 break;
4025             default:
4026                 NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_STATE);
4027         }
4028 
4029         pLevelFmt = mmuFmtGetNextLevel(pLevelFmt, pTargetFmt);
4030     }
4031 
4032     pParams->numLevels = level;
4033 
4034     return NV_OK;
4035 }
4036 
4037 NV_STATUS
4038 gvaspaceCopyServerRmReservedPdesToServerRm_IMPL
4039 (
4040     OBJGVASPACE *pGVAS,
4041     OBJGPU      *pGpu
4042 )
4043 {
4044     CALL_CONTEXT *pContext = resservGetTlsCallContext();
4045     NV_STATUS     status   = NV_OK;
4046 
4047     if ((!IS_VIRTUAL_WITH_SRIOV(pGpu) && !IS_GSP_CLIENT(pGpu)) ||
4048         (0 == pGVAS->vaStartServerRMOwned))
4049     {
4050         return NV_OK;
4051     }
4052 
4053     NvHandle                                             hClient;
4054     NvBool                                               bFreeNeeded  = NV_FALSE;
4055     NvHandle                                             hDevice;
4056     NvHandle                                             hVASpace;
4057     OBJGPUGRP                                           *pGpuGrp = gpumgrGetGpuGrpFromGpu(pGpu);
4058 
4059     if (NULL != pContext)
4060     {
4061         NV90F1_CTRL_VASPACE_COPY_SERVER_RESERVED_PDES_PARAMS pdeCopyParams;
4062         RsResourceRef                                       *pResourceRef = pContext->pResourceRef;
4063         RsResourceRef                                       *pDeviceRef   = NULL;
4064 
4065         hClient = pContext->pClient->hClient;
4066 
4067         if (pResourceRef->internalClassId == classId(VaSpaceApi))
4068         {
4069             hVASpace = pResourceRef->hResource;
4070         }
4071         else
4072         {
4073             // Can't think of a way we can reach here with a non zero handle.
4074             hVASpace = 0;
4075         }
4076 
4077         // Find the device ref.
4078         if (pResourceRef->internalClassId == classId(Device))
4079         {
4080             pDeviceRef = pResourceRef;
4081         }
4082         else
4083         {
4084             status = refFindAncestorOfType(pResourceRef, classId(Device), &pDeviceRef);
4085             NV_ASSERT_OR_RETURN(NV_OK == status, status);
4086         }
4087 
4088         hDevice = pDeviceRef->hResource;
4089 
4090         //
4091         // VAS handle is 0 for the device vaspace. Trigger an allocation on
4092         // server RM so that the plugin has a valid handle to the device VAS
4093         // under this client. This handle will be required by the plugin when
4094         // we make the RPC later.
4095         //
4096         if ((0 == hVASpace) && IS_GSP_CLIENT(pGpu))
4097         {
4098             NV_VASPACE_ALLOCATION_PARAMETERS vaParams = {0};
4099 
4100             status = serverutilGenResourceHandle(hClient, &hVASpace);
4101             NV_ASSERT_OR_RETURN(NV_OK == status, status);
4102 
4103             vaParams.index = NV_VASPACE_ALLOCATION_INDEX_GPU_DEVICE;
4104 
4105             NV_RM_RPC_ALLOC_OBJECT(pGpu,
4106                                    hClient,
4107                                    hDevice,
4108                                    hVASpace,
4109                                    FERMI_VASPACE_A,
4110                                    &vaParams,
4111                                    sizeof(vaParams),
4112                                    status);
4113 
4114             NV_ASSERT_OR_RETURN(NV_OK == status, status);
4115             bFreeNeeded = NV_TRUE;
4116         }
4117 
4118         status = _gvaspacePopulatePDEentries(pGVAS, pGpu, &pdeCopyParams);
4119         NV_ASSERT_OR_GOTO(NV_OK == status, done);
4120 
4121         //
4122         // RPC the details of these reserved PDEs to server RM so that server RM can
4123         // mirror these PDEs in its mmu walker state. Any lower level PDEs/PTEs
4124         // allocated under these top level PDEs will be modified exclusively by
4125         // server RM. Client RM won't touch those.
4126         //
4127         status = _gvaspaceCopyServerRmReservedPdesToServerRm(hClient, hVASpace, pGpu, &pdeCopyParams);
4128         NV_ASSERT_OR_GOTO(NV_OK == status, done);
4129 done:
4130         if (bFreeNeeded)
4131         {
4132             NV_STATUS tmpStatus = NV_OK;
4133 
4134             NV_RM_RPC_FREE(pGpu, hClient, hDevice, hVASpace, tmpStatus);
4135             NV_ASSERT_OR_RETURN(NV_OK == tmpStatus, tmpStatus);
4136         }
4137     }
4138     //check to ensure server reserved PDEs are copied when global va space is created
4139     else if(!IS_VIRTUAL(pGpu) && pGpuGrp->pGlobalVASpace == dynamicCast(pGVAS, OBJVASPACE))
4140     {
4141         NV2080_CTRL_INTERNAL_GMMU_COPY_RESERVED_SPLIT_GVASPACE_PDES_TO_SERVER_PARAMS globalCopyParams;
4142 
4143         NV_ASSERT_OK_OR_RETURN(_gvaspacePopulatePDEentries(pGVAS, pGpu, &globalCopyParams.PdeCopyParams));
4144 
4145         RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
4146 
4147         NV_ASSERT_OK_OR_RETURN(pRmApi->Control(pRmApi,
4148                                 pGpu->hInternalClient,
4149                                 pGpu->hInternalSubdevice,
4150                                 NV2080_CTRL_CMD_INTERNAL_GMMU_COPY_RESERVED_SPLIT_GVASPACE_PDES_TO_SERVER,
4151                                 &globalCopyParams,
4152                                 sizeof(NV2080_CTRL_INTERNAL_GMMU_COPY_RESERVED_SPLIT_GVASPACE_PDES_TO_SERVER_PARAMS)));
4153     }
4154 
4155     return status;
4156 }
4157 
4158 static NV_STATUS
4159 _gvaspaceControl_Prolog
4160 (
4161     VaSpaceApi   *pVaspaceApi,
4162     NvHandle      hSubDevice,
4163     NvU32         subDeviceId,
4164     OBJGVASPACE **ppGVAS,
4165     OBJGPU      **ppGpu
4166 )
4167 {
4168     OBJVASPACE *pVAS = NULL;
4169 
4170     NV_ASSERT_OK_OR_RETURN(
4171         vaspaceGetByHandleOrDeviceDefault(RES_GET_CLIENT(pVaspaceApi),
4172                                           RES_GET_PARENT_HANDLE(pVaspaceApi),
4173                                           RES_GET_HANDLE(pVaspaceApi),
4174                                           &pVAS));
4175     *ppGVAS = dynamicCast(pVAS, OBJGVASPACE);
4176     NV_ASSERT_OR_RETURN(NULL != *ppGVAS, NV_ERR_INVALID_CLASS);
4177 
4178     if (0 != hSubDevice)
4179     {
4180         Subdevice *pSubDevice;
4181 
4182         NV_ASSERT_OK_OR_RETURN(
4183                 subdeviceGetByHandle(RES_GET_CLIENT(pVaspaceApi),
4184                     hSubDevice, &pSubDevice));
4185 
4186         *ppGpu = GPU_RES_GET_GPU(pSubDevice);
4187 
4188         GPU_RES_SET_THREAD_BC_STATE(pSubDevice);
4189     }
4190     else
4191     {
4192         *ppGpu = gpumgrGetGpu(gpumgrGetDefaultPrimaryGpu(pVAS->gpuMask));
4193         // Pick a specific sub-device if requested.
4194         if (0 != subDeviceId)
4195         {
4196             *ppGpu = gpumgrGetGpuFromSubDeviceInst(gpuGetDeviceInstance(*ppGpu), subDeviceId - 1);
4197             NV_ASSERT_OR_RETURN(NULL != *ppGpu, NV_ERR_INVALID_ARGUMENT);
4198         }
4199         gpumgrSetBcEnabledStatus(*ppGpu, NV_FALSE);
4200     }
4201 
4202     return NV_OK;
4203 }
4204 
4205 
4206 NV_STATUS
4207 vaspaceapiCtrlCmdVaspaceGetGmmuFormat_IMPL
4208 (
4209     VaSpaceApi                                 *pVaspaceApi,
4210     NV90F1_CTRL_VASPACE_GET_GMMU_FORMAT_PARAMS *pGmmuFormatParams
4211 )
4212 {
4213     OBJGVASPACE *pGVAS = NULL;
4214     OBJGPU      *pGpu  = NULL;
4215 
4216     NV_ASSERT_OK_OR_RETURN(
4217         _gvaspaceControl_Prolog(pVaspaceApi, pGmmuFormatParams->hSubDevice,
4218                                 pGmmuFormatParams->subDeviceId, &pGVAS, &pGpu));
4219 
4220     pGmmuFormatParams->pFmt = (GMMU_FMT *) gvaspaceGetGmmuFmt_IMPL(pGVAS, pGpu);
4221     NV_ASSERT_OR_RETURN(NULL != pGmmuFormatParams->pFmt, NV_ERR_INVALID_STATE);
4222 
4223     return NV_OK;
4224 }
4225 
4226 NV_STATUS
4227 vaspaceapiCtrlCmdVaspaceGetPageLevelInfo_IMPL
4228 (
4229     VaSpaceApi                                     *pVaspaceApi,
4230     NV90F1_CTRL_VASPACE_GET_PAGE_LEVEL_INFO_PARAMS *pPageLevelInfoParams
4231 )
4232 {
4233     OBJGVASPACE *pGVAS = NULL;
4234     OBJGPU      *pGpu  = NULL;
4235 
4236     NV_ASSERT_OK_OR_RETURN(
4237         _gvaspaceControl_Prolog(pVaspaceApi, pPageLevelInfoParams->hSubDevice,
4238                                 pPageLevelInfoParams->subDeviceId, &pGVAS, &pGpu));
4239 
4240     if (NULL == pGVAS->pGpuStates)
4241     {
4242         // TODO: VMM must be enabled - remove once default.
4243         return NV_ERR_NOT_SUPPORTED;
4244     }
4245 
4246     if (IS_VIRTUAL(pGpu) && !gpuIsSplitVasManagementServerClientRmEnabled(pGpu))
4247     {
4248         NV_STATUS status = NV_OK;
4249         if (!(gvaspaceGetFlags(pGVAS) & VASPACE_FLAGS_FLA))
4250         {
4251             CALL_CONTEXT *pCallContext = resservGetTlsCallContext();
4252             RmCtrlParams *pRmCtrlParams = pCallContext->pControlParams->pLegacyParams;
4253 
4254             NV_RM_RPC_CONTROL(pGpu,
4255                               pRmCtrlParams->hClient,
4256                               pRmCtrlParams->hObject,
4257                               pRmCtrlParams->cmd,
4258                               pRmCtrlParams->pParams,
4259                               pRmCtrlParams->paramsSize,
4260                               status);
4261             return status;
4262         }
4263     }
4264 
4265     return gvaspaceGetPageLevelInfo(pGVAS, pGpu, pPageLevelInfoParams);
4266 }
4267 
4268 #if defined(DEBUG) || defined(DEVELOP) || defined(NV_VERIF_FEATURES)
4269 NV_STATUS
4270 vaspaceapiCtrlCmdVaspaceGetPageLevelInfoVerif_IMPL
4271 (
4272     VaSpaceApi                                     *pVaspaceApi,
4273     NV90F1_CTRL_VASPACE_GET_PAGE_LEVEL_INFO_PARAMS *pPageLevelInfoParams
4274 )
4275 {
4276     return vaspaceapiCtrlCmdVaspaceGetPageLevelInfo_IMPL(pVaspaceApi, pPageLevelInfoParams);
4277 }
4278 #endif
4279 
4280 NV_STATUS
4281 vaspaceapiCtrlCmdVaspaceReserveEntries_IMPL
4282 (
4283     VaSpaceApi                                 *pVaspaceApi,
4284     NV90F1_CTRL_VASPACE_RESERVE_ENTRIES_PARAMS *pReserveEntriesParams
4285 )
4286 {
4287     OBJGVASPACE    *pGVAS  = NULL;
4288     OBJGPU         *pGpu   = NULL;
4289     NV_STATUS       status = NV_OK;
4290     GVAS_GPU_STATE *pGpuState;
4291 
4292     NV_ASSERT_OK_OR_RETURN(
4293         _gvaspaceControl_Prolog(pVaspaceApi, pReserveEntriesParams->hSubDevice,
4294                                 pReserveEntriesParams->subDeviceId, &pGVAS, &pGpu));
4295 
4296     if (NULL == pGVAS->pGpuStates)
4297     {
4298         // TODO: VMM must be enabled - remove once default.
4299         return NV_ERR_NOT_SUPPORTED;
4300     }
4301 
4302     NV_ASSERT_OR_RETURN(ONEBITSET(pReserveEntriesParams->pageSize), NV_ERR_INVALID_ARGUMENT);
4303 
4304     if (IS_VIRTUAL(pGpu) && !gpuIsSplitVasManagementServerClientRmEnabled(pGpu))
4305     {
4306         if (!(gvaspaceGetFlags(pGVAS) & VASPACE_FLAGS_FLA))
4307         {
4308             CALL_CONTEXT *pCallContext = resservGetTlsCallContext();
4309             RmCtrlParams *pRmCtrlParams = pCallContext->pControlParams->pLegacyParams;
4310 
4311             NV_RM_RPC_CONTROL(pGpu,
4312                               pRmCtrlParams->hClient,
4313                               pRmCtrlParams->hObject,
4314                               pRmCtrlParams->cmd,
4315                               pRmCtrlParams->pParams,
4316                               pRmCtrlParams->paramsSize,
4317                               status);
4318             return status;
4319         }
4320     }
4321 
4322     pGpuState = gvaspaceGetGpuState(pGVAS, pGpu);
4323     NV_ASSERT_OR_RETURN(NULL != pGpuState, NV_ERR_INVALID_STATE);
4324 
4325     // Alloc and bind page level instances.
4326     status = gvaspaceReservePageTableEntries(pGVAS, pGpu, pReserveEntriesParams->virtAddrLo,
4327                                              pReserveEntriesParams->virtAddrHi,
4328                                              pReserveEntriesParams->pageSize);
4329     NV_ASSERT(NV_OK == status);
4330 
4331     return status;
4332 }
4333 
4334 NV_STATUS
4335 vaspaceapiCtrlCmdVaspaceReleaseEntries_IMPL
4336 (
4337     VaSpaceApi                                 *pVaspaceApi,
4338     NV90F1_CTRL_VASPACE_RELEASE_ENTRIES_PARAMS *pReleaseEntriesParams
4339 )
4340 {
4341     OBJGVASPACE *pGVAS  = NULL;
4342     OBJGPU      *pGpu   = NULL;
4343     NV_STATUS    status = NV_OK;
4344 
4345     NV_ASSERT_OK_OR_RETURN(
4346         _gvaspaceControl_Prolog(pVaspaceApi, pReleaseEntriesParams->hSubDevice,
4347                                 pReleaseEntriesParams->subDeviceId, &pGVAS, &pGpu));
4348 
4349     if (NULL == pGVAS->pGpuStates)
4350     {
4351         // TODO: VMM must be enabled - remove once default.
4352         return NV_ERR_NOT_SUPPORTED;
4353     }
4354 
4355     NV_ASSERT_OR_RETURN(ONEBITSET(pReleaseEntriesParams->pageSize), NV_ERR_INVALID_ARGUMENT);
4356 
4357     if (IS_VIRTUAL(pGpu) && !gpuIsSplitVasManagementServerClientRmEnabled(pGpu))
4358     {
4359         if (!(gvaspaceGetFlags(pGVAS) & VASPACE_FLAGS_FLA))
4360         {
4361             CALL_CONTEXT *pCallContext = resservGetTlsCallContext();
4362             RmCtrlParams *pRmCtrlParams = pCallContext->pControlParams->pLegacyParams;
4363 
4364             NV_RM_RPC_CONTROL(pGpu,
4365                               pRmCtrlParams->hClient,
4366                               pRmCtrlParams->hObject,
4367                               pRmCtrlParams->cmd,
4368                               pRmCtrlParams->pParams,
4369                               pRmCtrlParams->paramsSize,
4370                               status);
4371             return status;
4372         }
4373     }
4374 
4375     // Unbind and free page level instances.
4376     status = gvaspaceReleasePageTableEntries(pGVAS, pGpu, pReleaseEntriesParams->virtAddrLo,
4377                                              pReleaseEntriesParams->virtAddrHi,
4378                                              pReleaseEntriesParams->pageSize);
4379     NV_ASSERT(NV_OK == status);
4380 
4381     return status;
4382 }
4383 
4384 NV_STATUS
4385 vaspaceapiCtrlCmdVaspaceCopyServerReservedPdes_IMPL
4386 (
4387     VaSpaceApi                                           *pVaspaceApi,
4388     NV90F1_CTRL_VASPACE_COPY_SERVER_RESERVED_PDES_PARAMS *pCopyServerReservedPdesParams
4389 )
4390 {
4391     OBJGVASPACE      *pGVAS   = NULL;
4392     OBJGPU           *pGpu    = NULL;
4393 
4394     NV_ASSERT_OK_OR_RETURN(
4395         _gvaspaceControl_Prolog(pVaspaceApi, pCopyServerReservedPdesParams->hSubDevice,
4396                                 pCopyServerReservedPdesParams->subDeviceId, &pGVAS, &pGpu));
4397 
4398     return gvaspaceCopyServerReservedPdes(pGVAS, pGpu, pCopyServerReservedPdesParams);
4399 }
4400 
4401 NV_STATUS
4402 gvaspaceCopyServerReservedPdes_IMPL
4403 (
4404     OBJGVASPACE      *pGVAS,
4405     OBJGPU           *pGpu,
4406     NV90F1_CTRL_VASPACE_COPY_SERVER_RESERVED_PDES_PARAMS *pCopyServerReservedPdesParams
4407 )
4408 {
4409 
4410     OBJVASPACE       *pVAS = NULL;
4411     KernelGmmu       *pKernelGmmu   = NULL;
4412     NV_STATUS         status  = NV_OK;
4413     MMU_WALK_USER_CTX userCtx = {0};
4414     GVAS_GPU_STATE   *pGpuState;
4415     NvS32             i;
4416 
4417     pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
4418     pVAS = staticCast(pGVAS, OBJVASPACE);
4419 
4420     if (NULL == pGVAS->pGpuStates)
4421     {
4422         // TODO: VMM must be enabled - remove once default.
4423         return NV_ERR_NOT_SUPPORTED;
4424     }
4425 
4426     NV_ASSERT_OR_RETURN(pCopyServerReservedPdesParams->numLevelsToCopy <= GMMU_FMT_MAX_LEVELS, NV_ERR_INVALID_ARGUMENT);
4427 
4428     NV_ASSERT_OR_RETURN(ONEBITSET(pCopyServerReservedPdesParams->pageSize), NV_ERR_INVALID_ARGUMENT);
4429 
4430     if (IS_VIRTUAL(pGpu))
4431     {
4432         CALL_CONTEXT *pCallContext = resservGetTlsCallContext();
4433         RmCtrlParams *pRmCtrlParams = pCallContext->pControlParams->pLegacyParams;
4434 
4435         NV_RM_RPC_CONTROL(pGpu,
4436                           pRmCtrlParams->hClient,
4437                           pRmCtrlParams->hObject,
4438                           pRmCtrlParams->cmd,
4439                           pRmCtrlParams->pParams,
4440                           pRmCtrlParams->paramsSize,
4441                           status);
4442         return status;
4443     }
4444 
4445     pGpuState = gvaspaceGetGpuState(pGVAS, pGpu);
4446     NV_ASSERT_OR_RETURN(NULL != pGpuState, NV_ERR_INVALID_STATE);
4447 
4448     // Alloc and bind page level instances.
4449     status = gvaspaceReservePageTableEntries(pGVAS, pGpu, pCopyServerReservedPdesParams->virtAddrLo,
4450                                              pCopyServerReservedPdesParams->virtAddrHi,
4451                                              pCopyServerReservedPdesParams->pageSize);
4452     NV_ASSERT_OR_RETURN(NV_OK == status, status);
4453 
4454     // Kick out any stale TLB entries.
4455     gvaspaceInvalidateTlb(pGVAS, pGpu, PTE_DOWNGRADE);
4456 
4457     // Acquire walker context.
4458     gvaspaceWalkUserCtxAcquire(pGVAS, pGpu, NULL, &userCtx);
4459 
4460     for (i = pCopyServerReservedPdesParams->numLevelsToCopy - 1; i >= 0; i--)
4461     {
4462         MEMORY_DESCRIPTOR *pMemDescNew;
4463         NV_ADDRESS_SPACE  aperture;
4464         NvU64 flags = 0;
4465 
4466         if (!RMCFG_FEATURE_PLATFORM_GSP)
4467         {
4468             flags = MEMDESC_FLAGS_GUEST_ALLOCATED;
4469         }
4470 
4471         switch(pCopyServerReservedPdesParams->levels[i].aperture)
4472         {
4473             case GMMU_APERTURE_VIDEO:
4474                 aperture = ADDR_FBMEM;
4475                 break;
4476             case GMMU_APERTURE_SYS_COH:
4477             case GMMU_APERTURE_SYS_NONCOH:
4478                 aperture = ADDR_SYSMEM;
4479                 break;
4480             default:
4481                 NV_ASSERT_OR_GOTO(0, done);
4482         }
4483 
4484         status = memdescCreate(&pMemDescNew, pGpu,
4485                                pCopyServerReservedPdesParams->levels[i].size,
4486                                RM_PAGE_SIZE,
4487                                NV_TRUE,
4488                                aperture,
4489                                kgmmuGetPTEAttr(pKernelGmmu),
4490                                flags);
4491         NV_ASSERT_OR_GOTO(NV_OK == status, done);
4492 
4493         // Setup a memdesc in server to describe the client's PDE backing memory
4494         memdescDescribe(pMemDescNew, aperture, pCopyServerReservedPdesParams->levels[i].physAddress,
4495                         pCopyServerReservedPdesParams->levels[i].size);
4496         memdescSetPageSize(pMemDescNew, VAS_ADDRESS_TRANSLATION(pVAS), RM_PAGE_SIZE);
4497 
4498         // Modify the server's walker state with the new backing memory.
4499         const MMU_FMT_LEVEL *pLevelFmt =
4500               mmuFmtFindLevelWithPageShift(pGpuState->pFmt->pRoot,
4501                                     pCopyServerReservedPdesParams->levels[i].pageShift);
4502         status = mmuWalkModifyLevelInstance(pGpuState->pWalk,
4503                                             pLevelFmt,
4504                                             pGVAS->vaStartServerRMOwned,
4505                                             (MMU_WALK_MEMDESC*)pMemDescNew,
4506                                             (NvU32)pCopyServerReservedPdesParams->levels[i].size,
4507                                             NV_FALSE,
4508                                             NV_FALSE,
4509                                             NV_FALSE);
4510         NV_ASSERT_OR_GOTO(NV_OK == status, done);
4511     }
4512 
4513 done:
4514     gvaspaceWalkUserCtxRelease(pGVAS, &userCtx);
4515     return status;
4516 }
4517 
4518 /********************Local routines used in this file alone*******************/
4519 
4520 /*!
4521  * Reserve VA block between current limit and max limit for later growth.
4522  */
4523 static NV_STATUS
4524 _gvaspaceReserveTopForGrowth
4525 (
4526     OBJGVASPACE *pGVAS
4527 )
4528 {
4529     if (pGVAS->vaLimitInternal < pGVAS->vaLimitMax)
4530     {
4531         EMEMBLOCK *pBlock;
4532         NvU32      allocFlags;
4533         NvU64      allocOffset;
4534         NvU64      allocSize;
4535 
4536         allocFlags  = NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE;
4537         allocOffset = pGVAS->vaLimitInternal + 1;
4538         allocSize   = pGVAS->vaLimitMax - pGVAS->vaLimitInternal;
4539 
4540         return pGVAS->pHeap->eheapAlloc(pGVAS->pHeap, VAS_EHEAP_OWNER_RSVD,
4541                                         &allocFlags, &allocOffset, &allocSize,
4542                                         1, 1, &pBlock, NULL, NULL);
4543     }
4544     return NV_OK;
4545 }
4546 
4547 /*!
4548  * Reserve a range of VA from rangeLo to rangeHi for later use.
4549  */
4550 static NV_STATUS
4551 _gvaspaceReserveRange
4552 (
4553     OBJGVASPACE *pGVAS,
4554     NvU64 rangeLo,
4555     NvU64 rangeHi
4556 )
4557 {
4558     if (rangeLo <= rangeHi)
4559     {
4560         EMEMBLOCK *pBlock;
4561         NvU32      allocFlags;
4562         NvU64      allocOffset;
4563         NvU64      allocSize;
4564 
4565         allocFlags  = NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE;
4566         allocOffset = rangeLo;
4567         allocSize   = rangeHi - rangeLo + 1;
4568 
4569         return pGVAS->pHeap->eheapAlloc(pGVAS->pHeap, VAS_EHEAP_OWNER_RSVD,
4570                                         &allocFlags, &allocOffset, &allocSize,
4571                                         1, 1, &pBlock, NULL, NULL);
4572     }
4573     return NV_ERR_INVALID_ARGUMENT;
4574 }
4575 
4576 /*!
4577  * Pin the page tables covering external management (leaf PDE) alignment if the
4578  * provided VA belongs to a lazy VA reservation. This is used for lazy WDDMv1 page tables.
4579  */
4580 static NV_STATUS
4581 _gvaspacePinLazyPageTables
4582 (
4583     OBJGVASPACE       *pGVAS,
4584     OBJGPU            *pGpu,
4585     const NvU64        va
4586 )
4587 {
4588     NV_STATUS     status = NV_OK;
4589     PEMEMBLOCK    pMemBlock;
4590     PGVAS_BLOCK   pVASBlock;
4591 
4592     // Search for the VA block, abort if not found.
4593     pMemBlock = pGVAS->pHeap->eheapGetBlock(pGVAS->pHeap, va, 0);
4594     if (!pMemBlock)
4595     {
4596         return NV_OK;
4597     }
4598     pVASBlock = (PGVAS_BLOCK)pMemBlock->pData;
4599 
4600     // Only proceed if lazy and we have lock mask info.
4601     if (pVASBlock->flags.bLazy && (0 != pVASBlock->pageSizeLockMask))
4602     {
4603         NvU32             pageShift;
4604         MMU_WALK_USER_CTX userCtx = {0};
4605         NvU64             vaLo = NV_ALIGN_DOWN64(va, pGVAS->extManagedAlign);
4606         NvU64             vaHi = NV_ALIGN_UP64(va + 1, pGVAS->extManagedAlign) - 1;
4607 
4608         // Clamp VA to block limits.
4609         vaLo = NV_MAX(vaLo, pMemBlock->begin);
4610         vaHi = NV_MIN(vaHi, pMemBlock->end);
4611 
4612         gvaspaceWalkUserCtxAcquire(pGVAS, pGpu, pVASBlock, &userCtx);
4613 
4614         if (NULL == userCtx.pGpuState)
4615         {
4616             status = NV_ERR_INVALID_STATE;
4617             NV_ASSERT(0);
4618         }
4619         else
4620         {
4621             // Loop over each page size requested by client.
4622             FOR_EACH_INDEX_IN_MASK(64, pageShift, pVASBlock->pageSizeLockMask)
4623             {
4624                 // Pre-reserve page level instances in the VA range.
4625                 const MMU_FMT_LEVEL *pLevelFmt =
4626                     mmuFmtFindLevelWithPageShift(userCtx.pGpuState->pFmt->pRoot, pageShift);
4627                 status = mmuWalkReserveEntries(userCtx.pGpuState->pWalk, pLevelFmt,
4628                                                vaLo, vaHi, NV_TRUE);
4629                 if (NV_OK != status)
4630                 {
4631                     DBG_BREAKPOINT();
4632                     break;
4633                 }
4634             }
4635             FOR_EACH_INDEX_IN_MASK_END
4636         }
4637 
4638         gvaspaceWalkUserCtxRelease(pGVAS, &userCtx);
4639     }
4640 
4641     return status;
4642 }
4643 
4644 /*!
4645  * @brief Callback to free leaked virtual allocations.
4646  *
4647  * @param[in[     pHeap        Heap being traversed.
4648  * @param[in]     pEnv         Callback environment.
4649  * @param[in]     pMemBlock    Current heap block (may be free or used).
4650  * @param[in,out] pContinue    Initially 1, if set to 0 traversal stops.
4651  * @param[in,out] pInvalCursor Initially 0, must be set to 1 if current block
4652  *                             is deleted during the callback (to prevent it
4653  *                             from being used to find next block).
4654  */
4655 static NV_STATUS
4656 _gvaspaceFreeVASBlock
4657 (
4658     OBJEHEAP  *pHeap,
4659     void      *pEnv,
4660     PEMEMBLOCK pMemBlock,
4661     NvU32     *pContinue,
4662     NvU32     *pInvalCursor
4663 )
4664 {
4665     OBJGVASPACE *pGVAS = pEnv;
4666 
4667     if (NVOS32_BLOCK_TYPE_FREE != pMemBlock->owner && VAS_EHEAP_OWNER_RSVD != pMemBlock->owner)
4668     {
4669         NV_PRINTF(LEVEL_WARNING,
4670                   "Virtual allocation leak in range 0x%llX-0x%llX\n",
4671                   pMemBlock->begin, pMemBlock->end);
4672 
4673         gvaspaceFree(pGVAS, pMemBlock->begin);
4674         *pInvalCursor = 1;
4675     }
4676 
4677     return NV_OK;
4678 }
4679 
4680 /*!
4681  * @brief Register a mapping range for a given GPU.
4682  */
4683 static NV_STATUS
4684 _gvaspaceMappingInsert
4685 (
4686     OBJGVASPACE        *pGVAS,
4687     OBJGPU             *pGpu,
4688     GVAS_BLOCK         *pVASBlock,
4689     const NvU64         vaLo,
4690     const NvU64         vaHi,
4691     const VAS_MAP_FLAGS flags
4692 )
4693 {
4694     NV_STATUS     status    = NV_OK;
4695     OBJVASPACE   *pVAS      = staticCast(pGVAS, OBJVASPACE);
4696     GVAS_MAPPING *pMapNode  = NULL;
4697     const NvU32   gpuMask   = NVBIT(pGpu->gpuInstance);
4698 
4699     // Ensure this is not a remap.
4700     NV_ASSERT_OR_RETURN(!flags.bRemap, NV_ERR_INVALID_ARGUMENT);
4701 
4702     // Check if GPU is valid for this VAS.
4703     NV_ASSERT_OR_RETURN(gpuMask == (pVAS->gpuMask & gpuMask), NV_ERR_INVALID_ARGUMENT);
4704 
4705     // Search for existing mapping.
4706     status = btreeSearch(vaLo, (NODE**)&pMapNode, &pVASBlock->pMapTree->node);
4707     if (NV_OK == status)
4708     {
4709         // If it already exists, check for consistency.
4710         NV_ASSERT_OR_RETURN(0 == (pMapNode->gpuMask & gpuMask),
4711                           NV_ERR_INVALID_ARGUMENT);
4712         NV_ASSERT_OR_RETURN(pMapNode->node.keyStart == vaLo, NV_ERR_INVALID_ARGUMENT);
4713         NV_ASSERT_OR_RETURN(pMapNode->node.keyEnd   == vaHi, NV_ERR_INVALID_ARGUMENT);
4714 
4715         // Commit the new GPU mask to the mapping.
4716         pMapNode->gpuMask |= gpuMask;
4717     }
4718     else
4719     {
4720         // If it doesn't exist, allocate new node.
4721         pMapNode = portMemAllocNonPaged(sizeof(*pMapNode));
4722         NV_ASSERT_OR_RETURN(NULL != pMapNode, NV_ERR_NO_MEMORY);
4723 
4724         portMemSet(pMapNode, 0, sizeof(*pMapNode));
4725         pMapNode->node.keyStart = vaLo;
4726         pMapNode->node.keyEnd   = vaHi;
4727         pMapNode->gpuMask       = gpuMask;
4728 
4729         // Insert mapping node.
4730         status = btreeInsert(&pMapNode->node, (NODE**)&pVASBlock->pMapTree);
4731         NV_ASSERT_OR_GOTO(NV_OK == status, catch);
4732 
4733 catch:
4734         if (NV_OK != status)
4735         {
4736             portMemFree(pMapNode);
4737         }
4738     }
4739 
4740     return status;
4741 }
4742 
4743 /*
4744  *  @brief Override to an externally owned PDB.
4745  *  For externally owned vaspace we will associate a PDB that is provided
4746  *  by the owner of the vaspace. THis is different from the "shared management"
4747  *  scenario because, in the externally owned case RM will not allocate any page tables
4748  *  or VA for this address space. This is a way to make sure RM programs the correct PDB
4749  *  when clients use this address space to create a channel.
4750  *  TODO: Make externally owned vaspace a separate vaspace class.
4751 */
4752 static NV_STATUS
4753 _gvaspaceSetExternalPageDirBase
4754 (
4755     OBJGVASPACE       *pGVAS,
4756     OBJGPU            *pGpu,
4757     MEMORY_DESCRIPTOR *pPDB
4758 )
4759 {
4760     NV_ASSERT_OR_RETURN(!gpumgrGetBcEnabledStatus(pGpu), NV_ERR_INVALID_STATE);
4761 
4762     if (pGVAS->bIsExternallyOwned)
4763     {
4764         pGVAS->pExternalPDB = pPDB;
4765     }
4766     return NV_OK;
4767 }
4768 
4769 /*!
4770  * @brief Unregister a mapping range for a given GPU.
4771  */
4772 static NV_STATUS
4773 _gvaspaceMappingRemove
4774 (
4775     OBJGVASPACE       *pGVAS,
4776     OBJGPU            *pGpu,
4777     GVAS_BLOCK        *pVASBlock,
4778     const NvU64        vaLo,
4779     const NvU64        vaHi
4780 )
4781 {
4782     NV_STATUS     status    = NV_OK;
4783     GVAS_MAPPING *pMapNode  = NULL;
4784     const NvU32   gpuMask   = NVBIT(pGpu->gpuInstance);
4785 
4786     // Search for existing mapping.
4787     status = btreeSearch(vaLo, (NODE**)&pMapNode, &pVASBlock->pMapTree->node);
4788     NV_ASSERT_OR_RETURN(NV_OK == status, status);
4789 
4790     // Check for consistency.
4791     NV_ASSERT_OR_RETURN(gpuMask == (pMapNode->gpuMask & gpuMask), NV_ERR_INVALID_ARGUMENT);
4792     NV_ASSERT_OR_RETURN(pMapNode->node.keyStart == vaLo,          NV_ERR_INVALID_ARGUMENT);
4793     NV_ASSERT_OR_RETURN(pMapNode->node.keyEnd   == vaHi,          NV_ERR_INVALID_ARGUMENT);
4794 
4795     // Remove GPU from mapping mask.
4796     pMapNode->gpuMask &= ~gpuMask;
4797 
4798     // Remove mapping if unused.
4799     if (0 == pMapNode->gpuMask)
4800     {
4801         btreeUnlink(&pMapNode->node, (NODE**)&pVASBlock->pMapTree);
4802         portMemFree(pMapNode);
4803     }
4804 
4805     return status;
4806 }
4807 
4808 /*!
4809  * Returns if the RM va space is restricted
4810  * True only for MAC GPU Va space
4811  * @param[in] pGVAS Pointer to the OBJGVASPACE object
4812  */
4813 NvBool
4814 gvaspaceIsInternalVaRestricted_IMPL(OBJGVASPACE *pGVAS)
4815 {
4816     return pGVAS->bRMInternalRestrictedVaRange;
4817 }
4818 
4819 NV_STATUS
4820 gvaspaceReservePageTableEntries_IMPL
4821 (
4822     OBJGVASPACE *pGVAS,
4823     OBJGPU      *pGpu,
4824     const NvU64  vaLo,
4825     const NvU64  vaHi,
4826     const NvU64  pageSizeMask
4827 )
4828 {
4829     GVAS_GPU_STATE *pGpuState;
4830     VA_RANGE_GPU   *pIter;
4831     VA_RANGE_GPU    newReservedPageTableEntry;
4832 
4833     pGpuState = gvaspaceGetGpuState(pGVAS, pGpu);
4834     NV_ASSERT_OR_RETURN(NULL != pGpuState, NV_ERR_GENERIC);
4835     pIter = listHead(&pGpuState->reservedPageTableEntries);
4836 
4837     // Move past any entries before this
4838     while (pIter && pIter->vaLo < vaLo)
4839         pIter = listNext(&pGpuState->reservedPageTableEntries, pIter);
4840 
4841     // Insert this range and and reserve it
4842     newReservedPageTableEntry.vaLo = vaLo;
4843     newReservedPageTableEntry.vaHi = vaHi;
4844     NV_ASSERT_OR_RETURN(listInsertValue(&pGpuState->reservedPageTableEntries,
4845                                      pIter, &newReservedPageTableEntry),
4846                      NV_ERR_GENERIC);
4847     return _gvaspaceReservePageTableEntries(pGVAS, pGpu, vaLo, vaHi,
4848                                             pageSizeMask);
4849 }
4850 
4851 NV_STATUS
4852 gvaspaceReleasePageTableEntries_IMPL
4853 (
4854     OBJGVASPACE *pGVAS,
4855     OBJGPU      *pGpu,
4856     const NvU64  vaLo,
4857     const NvU64  vaHi,
4858     const NvU64  pageSizeMask
4859 )
4860 {
4861     GVAS_GPU_STATE *pGpuState;
4862     VA_RANGE_GPU   *pIter;
4863 
4864     pGpuState = gvaspaceGetGpuState(pGVAS, pGpu);
4865     NV_ASSERT_OR_RETURN(NULL != pGpuState, NV_ERR_GENERIC);
4866     pIter = listHead(&pGpuState->reservedPageTableEntries);
4867 
4868     while (pIter != NULL)
4869     {
4870         if ((pIter->vaLo == vaLo) && (pIter->vaHi == vaHi))
4871         {
4872             listRemove(&pGpuState->reservedPageTableEntries, pIter);
4873             return _gvaspaceReleasePageTableEntries(pGVAS, pGpu, vaLo, vaHi,
4874                                                     pageSizeMask);
4875         }
4876 
4877         pIter = listNext(&pGpuState->reservedPageTableEntries, pIter);
4878     }
4879 
4880     NV_PRINTF(LEVEL_ERROR, "Cannot find the reserved PTE to release.\n");
4881 
4882     return NV_ERR_GENERIC;
4883 }
4884 
4885 static NV_STATUS
4886 _gvaspaceReservePageTableEntries
4887 (
4888     OBJGVASPACE *pGVAS,
4889     OBJGPU      *pGpu,
4890     const NvU64  vaLo,
4891     const NvU64  vaHi,
4892     const NvU64  pageSizeMask
4893 )
4894 {
4895     NV_STATUS         status = NV_OK;
4896     NvU32             pageShift;
4897     MMU_WALK_USER_CTX userCtx = {0};
4898 
4899     gvaspaceWalkUserCtxAcquire(pGVAS, pGpu, NULL, &userCtx);
4900 
4901     // Loop over each page size requested by client.
4902     FOR_EACH_INDEX_IN_MASK(64, pageShift, pageSizeMask)
4903     {
4904         // Pre-reserve page level instances in the VA range
4905         const MMU_FMT_LEVEL *pLevelFmt =
4906             mmuFmtFindLevelWithPageShift(
4907                     userCtx.pGpuState->pFmt->pRoot, pageShift);
4908         status = mmuWalkReserveEntries(userCtx.pGpuState->pWalk,
4909                                        pLevelFmt, vaLo, vaHi, NV_TRUE);
4910         if (status != NV_OK)
4911         {
4912             NV_PRINTF(LEVEL_ERROR, "Failed to Reserve Entries.\n");
4913             break;
4914         }
4915 
4916         if (pGVAS->flags & VASPACE_FLAGS_BAR_BAR1)
4917         {
4918             status = mmuWalkSparsify(userCtx.pGpuState->pWalk, vaLo, vaHi, NV_FALSE);
4919             if (status != NV_OK)
4920             {
4921                 NV_PRINTF(LEVEL_ERROR, "Failed to sparsify reserved BAR1 page tables.\n");
4922                 break;
4923             }
4924         }
4925     }
4926     FOR_EACH_INDEX_IN_MASK_END
4927 
4928     gvaspaceWalkUserCtxRelease(pGVAS, &userCtx);
4929 
4930     return status;
4931 }
4932 
4933 static NV_STATUS
4934 _gvaspaceReleasePageTableEntries
4935 (
4936     OBJGVASPACE *pGVAS,
4937     OBJGPU      *pGpu,
4938     const NvU64  vaLo,
4939     const NvU64  vaHi,
4940     const NvU64  pageSizeMask
4941 )
4942 {
4943     GVAS_GPU_STATE *pGpuState = gvaspaceGetGpuState(pGVAS, pGpu);
4944     NvU32 pageShift;
4945 
4946     NV_ASSERT(NULL != pGpuState);
4947 
4948 
4949     // Loop over each page size.
4950     FOR_EACH_INDEX_IN_MASK(64, pageShift, pageSizeMask)
4951     {
4952         // Release page level instances in the VA range.
4953         const MMU_FMT_LEVEL *pLevelFmt =
4954             mmuFmtFindLevelWithPageShift(pGpuState->pFmt->pRoot, pageShift);
4955 
4956         NV_ASSERT_OK_OR_RETURN(
4957                 _gvaspaceReleaseUnreservedPTEs(pGVAS, pGpu, vaLo, vaHi,
4958                                                pLevelFmt));
4959     }
4960     FOR_EACH_INDEX_IN_MASK_END
4961 
4962     return NV_OK;
4963 }
4964 
4965 static NV_STATUS
4966 _gvaspaceReleaseUnreservedPTEs
4967 (
4968     OBJGVASPACE *pGVAS,
4969     OBJGPU      *pGpu,
4970     const NvU64  vaLo,
4971     const NvU64  vaHi,
4972     const MMU_FMT_LEVEL *pLevelFmt
4973 )
4974 {
4975     NV_STATUS   status = NV_OK;
4976 
4977     MMU_WALK_USER_CTX userCtx = {0};
4978     GVAS_GPU_STATE   *pGpuState;
4979     VA_RANGE_GPU     *pIter;
4980     NvU64             piecewiseStart;
4981     NvU64             piecewiseEnd;
4982 
4983     pGpuState = gvaspaceGetGpuState(pGVAS, pGpu);
4984     NV_ASSERT_OR_RETURN(NULL != pGpuState, NV_ERR_GENERIC);
4985     pIter = listHead(&pGpuState->reservedPageTableEntries);
4986 
4987     gvaspaceWalkUserCtxAcquire(pGVAS, pGpu, NULL, &userCtx);
4988 
4989     NV_ASSERT(NULL != userCtx.pGpuState);
4990 
4991     piecewiseStart = vaLo;
4992     while (piecewiseStart <= vaHi)
4993     {
4994         piecewiseEnd = 0;
4995 
4996         // If there is no reservation, free the whole range.
4997         if (!pIter)
4998             piecewiseEnd = vaHi;
4999         // If there is a reservation on this GPU, free the memory space
5000         // before its low address.
5001         else if (pIter->vaLo > piecewiseStart)
5002             piecewiseEnd = NV_MIN(vaHi,
5003                                   pIter->vaLo - 1);
5004 
5005         if (piecewiseEnd)
5006         {
5007             if (!(pGVAS->flags & VASPACE_FLAGS_BAR_BAR1))
5008             {
5009                 // Clear out any mappings or sparse state.
5010                 status = mmuWalkUnmap(userCtx.pGpuState->pWalk,
5011                                       piecewiseStart, piecewiseEnd);
5012                 NV_ASSERT(NV_OK == status);
5013             }
5014 
5015             // Release page level instances in the VA range.
5016             status = mmuWalkReleaseEntries(userCtx.pGpuState->pWalk,
5017                                            pLevelFmt,
5018                                            piecewiseStart,
5019                                            piecewiseEnd);
5020             NV_ASSERT(NV_OK == status);
5021 
5022             piecewiseStart = piecewiseEnd + 1;
5023         }
5024 
5025         if (pIter)
5026         {
5027             /* Skip over the memory space of the reservation */
5028             piecewiseStart = NV_MAX(piecewiseStart, pIter->vaHi + 1);
5029             pIter = listNext(&pGpuState->reservedPageTableEntries, pIter);
5030         }
5031     }
5032 
5033 
5034     if (pGVAS->flags & VASPACE_FLAGS_FLA)
5035     {
5036         // free the dummy vas block
5037         _gvaspaceCleanupFlaDummyPagesForFlaRange(pGVAS, pGpu, userCtx.pGpuState);
5038     }
5039 
5040     gvaspaceWalkUserCtxRelease(pGVAS, &userCtx);
5041 
5042     return status;
5043 }
5044 
5045 static NV_STATUS
5046 _gvaspaceCopyServerRmReservedPdesToServerRm
5047 (
5048     NvHandle                                              hClient,
5049     NvHandle                                              hVASpace,
5050     OBJGPU                                               *pGpu,
5051     NV90F1_CTRL_VASPACE_COPY_SERVER_RESERVED_PDES_PARAMS *pPdeCopyParams
5052 )
5053 {
5054     NV_STATUS    status = NV_OK;
5055     RmCtrlParams rmCtrlParams;
5056 
5057     portMemSet(&rmCtrlParams, 0, sizeof(RmCtrlParams));
5058 
5059     rmCtrlParams.hClient    = hClient;
5060     rmCtrlParams.hObject    = hVASpace;
5061     rmCtrlParams.cmd        = NV90F1_CTRL_CMD_VASPACE_COPY_SERVER_RESERVED_PDES;
5062     rmCtrlParams.pParams    = pPdeCopyParams;
5063     rmCtrlParams.paramsSize = sizeof(NV90F1_CTRL_VASPACE_COPY_SERVER_RESERVED_PDES_PARAMS);
5064 
5065     NV_RM_RPC_CONTROL(pGpu,
5066                       rmCtrlParams.hClient,
5067                       rmCtrlParams.hObject,
5068                       rmCtrlParams.cmd,
5069                       rmCtrlParams.pParams,
5070                       rmCtrlParams.paramsSize,
5071                       status);
5072     NV_ASSERT(NV_OK == status);
5073 
5074     return status;
5075 }
5076 
5077 static void
5078 _gvaspaceForceFreePageLevelInstances
5079 (
5080     OBJGVASPACE    *pGVAS,
5081     OBJGPU         *pGpu,
5082     GVAS_GPU_STATE *pGpuState
5083 )
5084 {
5085     VA_RANGE_GPU      *pIter   = NULL;
5086     MMU_WALK_USER_CTX  userCtx = {0};
5087 
5088     pIter = listHead(&(pGpuState->reservedPageTableEntries));
5089     while (NULL != pIter)
5090     {
5091         VA_RANGE_GPU *pNext;
5092         pNext = listNext(&(pGpuState->reservedPageTableEntries), pIter);
5093         listRemove(&(pGpuState->reservedPageTableEntries), pIter);
5094         pIter = pNext;
5095     }
5096 
5097     gvaspaceWalkUserCtxAcquire(pGVAS, pGpu, NULL, &userCtx);
5098     mmuWalkLevelInstancesForceFree(pGpuState->pWalk);
5099     gvaspaceWalkUserCtxRelease(pGVAS, &userCtx);
5100 }
5101 
5102 static NV_STATUS
5103 _gvaspacePopulatePDEentries
5104 (
5105     OBJGVASPACE    *pGVAS,
5106     OBJGPU         *pGpu,
5107     NV90F1_CTRL_VASPACE_COPY_SERVER_RESERVED_PDES_PARAMS *pPdeCopyParams
5108 )
5109 {
5110     NV90F1_CTRL_VASPACE_GET_PAGE_LEVEL_INFO_PARAMS pdeInfo;
5111     NvU32                                          i;
5112     NV_STATUS                                      status = NV_OK;
5113 
5114     portMemSet(&pdeInfo, 0, sizeof(NV90F1_CTRL_VASPACE_GET_PAGE_LEVEL_INFO_PARAMS));
5115     portMemSet(pPdeCopyParams, 0, sizeof(NV90F1_CTRL_VASPACE_COPY_SERVER_RESERVED_PDES_PARAMS));
5116 
5117     // Populate the input params.
5118     pdeInfo.pageSize    = NVBIT64(GMMU_PD1_VADDR_BIT_LO);
5119     pdeInfo.virtAddress = pGVAS->vaStartServerRMOwned;
5120 
5121     // Fetch the details of the PDEs backing server RM's VA range.
5122     status = gvaspaceGetPageLevelInfo(pGVAS, pGpu, &pdeInfo);
5123     NV_ASSERT_OR_RETURN(NV_OK == status, status);
5124 
5125     // Populate the input params.
5126     for (i = 0; i < pdeInfo.numLevels; i++)
5127     {
5128         pPdeCopyParams->levels[i].pageShift   = pdeInfo.levels[i].levelFmt.virtAddrBitLo;
5129         pPdeCopyParams->levels[i].physAddress = pdeInfo.levels[i].physAddress;
5130         pPdeCopyParams->levels[i].aperture    = pdeInfo.levels[i].aperture;
5131         pPdeCopyParams->levels[i].size        = pdeInfo.levels[i].size;
5132     }
5133 
5134     pPdeCopyParams->numLevelsToCopy = pdeInfo.numLevels;
5135     pPdeCopyParams->subDeviceId     = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
5136     pPdeCopyParams->pageSize        = NVBIT64(GMMU_PD1_VADDR_BIT_LO);
5137     pPdeCopyParams->virtAddrLo      = pGVAS->vaStartServerRMOwned;
5138     pPdeCopyParams->virtAddrHi      = pPdeCopyParams->virtAddrLo +
5139                                         SPLIT_VAS_SERVER_RM_MANAGED_VA_SIZE - 1;
5140 
5141     return status;
5142 }
5143 
5144 
5145 /*!
5146  * @brief Reserve mempool for page levels.
5147  *
5148  * @param[in] pGpu               OBJGPU pointer
5149  * @param[in] pClient            RsClient
5150  * @param[in] pVAS               Virtual address space
5151  * @param[in] size               Size of the mapping (assumes mapping is page aligned)
5152  * @param[in] pageSizeLockMask   Mask of page sizes locked down at VA reservation
5153  */
5154 NV_STATUS
5155 gvaspaceReserveMempool_IMPL
5156 (
5157     OBJGVASPACE *pGVAS,
5158     OBJGPU      *pGpu,
5159     NvHandle     hClient,
5160     NvU64        size,
5161     NvU64        pageSizeLockMask,
5162     NvU32        flags
5163 )
5164 {
5165     NV_STATUS               status           = NV_OK;
5166     RM_POOL_ALLOC_MEM_RESERVE_INFO *pMemPool = NULL;
5167 
5168     if (RMCFG_FEATURE_PMA &&
5169         pGVAS->flags & VASPACE_FLAGS_PTETABLE_PMA_MANAGED)
5170     {
5171         KernelGmmu     *pKernelGmmu    = GPU_GET_KERNEL_GMMU(pGpu);
5172         MemoryManager  *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
5173         const GMMU_FMT *pFmt           = kgmmuFmtGet(pKernelGmmu, GMMU_FMT_VERSION_DEFAULT, 0);
5174 
5175         //
5176         // Always assume worst case of 4K mapping even if client has
5177         // requested bigger page size. This is to ensure that we have
5178         // sufficient memory in pools. Some MODS tests query for free
5179         // framebuffer and allocate the entire available. In such cases
5180         // we can run into OOM errors during page table allocation when
5181         // the test tries to map a big surface and the pools are short
5182         // of memory.
5183         //
5184         if (ONEBITSET(pageSizeLockMask))
5185         {
5186             //
5187             // There is a requirement of serial ATS enabled vaspaces to have
5188             // both small and big page tables allocated at the same time. This
5189             // is required for the 4K not valid feature. This is irrespective
5190             // of the actual page size requested by the client.
5191             //
5192             if (gvaspaceIsAtsEnabled(pGVAS))
5193             {
5194                 pageSizeLockMask = RM_PAGE_SIZE | pGVAS->bigPageSize;
5195             }
5196             else if (!(flags & VASPACE_RESERVE_FLAGS_ALLOC_UPTO_TARGET_LEVEL_ONLY))
5197             {
5198                 pageSizeLockMask = RM_PAGE_SIZE;
5199             }
5200         }
5201         else
5202         {
5203             NV_ASSERT_OR_RETURN(((pageSizeLockMask & RM_PAGE_SIZE) != 0),
5204                                 NV_ERR_INVALID_ARGUMENT);
5205         }
5206 
5207         NvU64 poolSize = kgmmuGetSizeOfPageDirs(pGpu, pKernelGmmu, pFmt, 0, size - 1,
5208                                                 pageSizeLockMask) +
5209                          kgmmuGetSizeOfPageTables(pGpu, pKernelGmmu, pFmt, 0, size - 1,
5210                                                   pageSizeLockMask);
5211 
5212         NV_ASSERT_OK_OR_RETURN(memmgrPageLevelPoolsGetInfo(pGpu, pMemoryManager, hClient, &pMemPool));
5213         status = rmMemPoolReserve(pMemPool, poolSize, pGVAS->flags);
5214         if ((pGVAS->flags & VASPACE_FLAGS_RETRY_PTE_ALLOC_IN_SYS) &&
5215             (status == NV_ERR_NO_MEMORY))
5216         {
5217             //
5218             // It is okay to change the status to NV_OK here since it is understood that
5219             // we may run out of video memory at some time. The RETRY_PTE_ALLOC_IN_SYS
5220             // flag ensures that RM retries allocating the page tables in sysmem if such
5221             // a situation arises. So, running out of video memory here need not be fatal.
5222             // It may be fatal if allocation in sysmem also fails. In that case RM will
5223             // return an error from elsewhere.
5224             //
5225             status = NV_OK;
5226         }
5227         else
5228         {
5229             NV_ASSERT_OR_RETURN((NV_OK == status), status);
5230 
5231             // setup page table pool in VA space if reservation to pool succeeds
5232             if (pGVAS->pPageTableMemPool != NULL)
5233             {
5234                 if (pGVAS->pPageTableMemPool != pMemPool)
5235                 {
5236                     rmMemPoolRelease(pMemPool, pGVAS->flags);
5237                     NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_STATE);
5238                 }
5239             }
5240             else
5241             {
5242                 pGVAS->pPageTableMemPool = pMemPool;
5243             }
5244         }
5245     }
5246 
5247     return status;
5248 }
5249 
5250 NV_STATUS
5251 gvaspaceGetFreeHeap_IMPL
5252 (
5253     OBJGVASPACE *pGVAS,
5254     NvU64       *pFreeSize
5255 )
5256 {
5257     NV_ASSERT_OR_RETURN(pFreeSize != NULL, NV_ERR_INVALID_ARGUMENT);
5258 
5259     return pGVAS->pHeap->eheapGetFree(pGVAS->pHeap, pFreeSize);
5260 }
5261 
5262 NvBool
5263 gvaspaceIsInUse_IMPL
5264 (
5265     OBJGVASPACE *pGVAS
5266 )
5267 {
5268     NvU64 freeSize  = 0;
5269     NvU64 totalSize = 0;
5270 
5271     // Get the free heap size.
5272     NV_ASSERT(gvaspaceGetFreeHeap(pGVAS, &freeSize) == NV_OK);
5273 
5274     // Get the total heap size for FLA vaspace.
5275     totalSize = vaspaceGetVaLimit(staticCast(pGVAS, OBJVASPACE)) -
5276                 vaspaceGetVaStart(staticCast(pGVAS, OBJVASPACE)) + 1;
5277 
5278     return (totalSize != freeSize);
5279 }
5280 
5281 NV_STATUS
5282 gvaspaceFreeV2_IMPL
5283 (
5284     OBJGVASPACE *pGVAS,
5285     NvU64        vAddr,
5286     NvU64       *pSize
5287 )
5288 {
5289     EMEMBLOCK *pMemBlock = pGVAS->pHeap->eheapGetBlock(pGVAS->pHeap, vAddr, 0);
5290     NV_ASSERT_OR_RETURN(NULL != pMemBlock, NV_ERR_INVALID_ARGUMENT);
5291 
5292     *pSize = pMemBlock->end - pMemBlock->begin +1;
5293     return _gvaspaceInternalFree(pGVAS, vAddr, pMemBlock);
5294 }
5295