1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2014-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /*!
25  * @file
26  * @brief Describes the structures and interfaces used to walk N level page tables
27  */
28 
29 /*--------------------------------Includes------------------------------------*/
30 #if defined(SRT_BUILD)
31 
32 #include "shrdebug.h"
33 #else
34 #include "os/os.h"
35 #endif
36 #include "nvport/nvport.h"
37 #include "nvctassert.h"
38 #include "mmu_walk_private.h"
39 
40 /*--------------------------Static Function Prototypes------------------------*/
41 static NV_STATUS
42 _mmuWalkLevelInit(const MMU_WALK *pWalk, MMU_WALK_LEVEL *pParent,
43                   const MMU_FMT_LEVEL *pLevelFmt, MMU_WALK_LEVEL *pLevel);
44 static void
45 _mmuWalkLevelDestroy(const MMU_WALK *pWalk, MMU_WALK_LEVEL *pLevel);
46 static NV_STATUS
47 _mmuWalkLevelInstAcquire(const MMU_WALK *pWalk, MMU_WALK_LEVEL *pLevel,
48                          const NvU64 vaLo, const NvU64 vaHi, const NvBool bTarget,
49                          const NvBool bRelease, const NvBool bCommit,
50                          NvBool *pBChanged, MMU_WALK_LEVEL_INST **ppLevelInst,
51                          const NvBool bInitNv4k);
52 static void
53 _mmuWalkLevelInstRelease(const MMU_WALK *pWalk, MMU_WALK_LEVEL *pLevel,
54                          MMU_WALK_LEVEL_INST *pLevelInst);
55 static NV_STATUS NV_NOINLINE
56 _mmuWalkPdeAcquire(const MMU_WALK *pWalk, const MMU_WALK_OP_PARAMS *pOpParams,
57                    MMU_WALK_LEVEL *pLevel, MMU_WALK_LEVEL_INST *pLevelInst,
58                    const NvU32 entryIndex, const NvU32 subLevel,
59                    const NvU64 vaLo, const NvU64 vaHi,
60                    MMU_WALK_LEVEL_INST *pSubLevelInsts[]);
61 static void NV_NOINLINE
62 _mmuWalkPdeRelease(const MMU_WALK *pWalk, const MMU_WALK_OP_PARAMS *pOpParams,
63                    MMU_WALK_LEVEL *pLevel, MMU_WALK_LEVEL_INST *pLevelInst,
64                    const NvU32 entryIndex, const NvU64 entryVaLo);
65 static NV_STATUS NV_NOINLINE
66 _mmuWalkResolveSubLevelConflicts(const MMU_WALK *pWalk, const MMU_WALK_OP_PARAMS *pOpParams,
67                                  MMU_WALK_LEVEL *pLevel, MMU_WALK_LEVEL_INST *pSubLevelInsts[],
68                                  NvU32 subLevel, NvU64 clippedVaLo, NvU64 clippedVaHi);
69 static void
70 _mmuWalkLevelInstancesForceFree(MMU_WALK *pWalk, MMU_WALK_LEVEL *pLevel);
71 
72 /* -----------------------------Inline Functions----------------------------- */
73 /*!
74    Returns the @ref MMU_ENTRY_STATE of the entry.
75  */
76 MMU_ENTRY_STATE
77 mmuWalkGetEntryState(MMU_WALK_LEVEL_INST *pLevelInst, NvU32 entryIndex)
78 {
79     return (MMU_ENTRY_STATE)pLevelInst->pStateTracker[entryIndex].state;
80 }
81 
82 /*----------------------------Public Functions--------------------------------*/
83 
84 NV_STATUS
85 mmuWalkCreate
86 (
87     const MMU_FMT_LEVEL      *pRootFmt,
88     MMU_WALK_USER_CTX        *pUserCtx,
89     const MMU_WALK_CALLBACKS *pCb,
90     const MMU_WALK_FLAGS      flags,
91     MMU_WALK                **ppWalk,
92     MMU_WALK_MEMDESC         *pStagingBuffer
93 )
94 {
95     NV_STATUS  status = NV_OK;
96     MMU_WALK  *pWalk  = NULL;
97 
98     NV_ASSERT_OR_RETURN(NULL != pRootFmt, NV_ERR_INVALID_ARGUMENT);
99     NV_ASSERT_OR_RETURN(NULL != pCb,      NV_ERR_INVALID_ARGUMENT);
100     NV_ASSERT_OR_RETURN(NULL != ppWalk,   NV_ERR_INVALID_ARGUMENT);
101 
102     // Alloc and init walker structure.
103     pWalk = portMemAllocNonPaged(sizeof(*pWalk));
104     status = (pWalk == NULL) ? NV_ERR_NO_MEMORY : NV_OK;
105     NV_ASSERT_OR_GOTO(NV_OK == status, done);
106     portMemSet(pWalk, 0, sizeof(*pWalk));
107 
108     pWalk->pUserCtx             = pUserCtx;
109     pWalk->pCb                  = pCb;
110     pWalk->flags                = flags;
111     pWalk->pStagingBuffer       = pStagingBuffer;
112     pWalk->bUseStagingBuffer    = NV_FALSE;
113     pWalk->bInvalidateOnReserve = NV_TRUE;
114 
115     // Create level hierarchy.
116     status = _mmuWalkLevelInit(pWalk, NULL, pRootFmt, &pWalk->root);
117     NV_ASSERT_OR_GOTO(NV_OK == status, done);
118 
119     // Commit.
120     *ppWalk = pWalk;
121 
122 done:
123     if (NV_OK != status)
124     {
125         mmuWalkDestroy(pWalk);
126     }
127     return status;
128 }
129 
130 void
131 mmuWalkDestroy
132 (
133     MMU_WALK *pWalk
134 )
135 {
136     if (NULL != pWalk)
137     {
138         // Destroy level hierarchy.
139         _mmuWalkLevelDestroy(pWalk, &pWalk->root);
140 
141         // Free walker struct.
142         portMemFree(pWalk);
143     }
144 }
145 
146 NV_STATUS
147 mmuWalkContinue
148 (
149     MMU_WALK *pWalk
150 )
151 {
152     return NV_ERR_NOT_SUPPORTED;
153 }
154 
155 void
156 mmuWalkCommit
157 (
158     MMU_WALK *pWalk
159 )
160 {
161     // TODO
162 }
163 
164 MMU_WALK_USER_CTX *
165 mmuWalkGetUserCtx
166 (
167     const MMU_WALK *pWalk
168 )
169 {
170     return pWalk->pUserCtx;
171 }
172 
173 void
174 mmuWalkSetUserCtx
175 (
176     MMU_WALK          *pWalk,
177     MMU_WALK_USER_CTX *pUserCtx
178 )
179 {
180     pWalk->pUserCtx = pUserCtx;
181 }
182 
183 const MMU_WALK_CALLBACKS *
184 mmuWalkGetCallbacks
185 (
186     const MMU_WALK *pWalk
187 )
188 {
189     return pWalk->pCb;
190 }
191 
192 void
193 mmuWalkSetCallbacks
194 (
195     MMU_WALK                 *pWalk,
196     const MMU_WALK_CALLBACKS *pCb
197 )
198 {
199     pWalk->pCb = pCb;
200 }
201 
202 void
203 mmuWalkLevelInstancesForceFree
204 (
205     MMU_WALK *pWalk
206 )
207 {
208     if (pWalk != NULL)
209     {
210         _mmuWalkLevelInstancesForceFree(pWalk, &pWalk->root);
211     }
212 }
213 
214 /*----------------------------Private Functions--------------------------------*/
215 
216 const MMU_WALK_LEVEL *
217 mmuWalkFindLevel
218 (
219     const MMU_WALK      *pWalk,
220     const MMU_FMT_LEVEL *pLevelFmt
221 )
222 {
223     const MMU_WALK_LEVEL *pLevel = &pWalk->root;
224     while (pLevel->pFmt != pLevelFmt)
225     {
226         NvU32 subLevel;
227         // Single sub-level always continues.
228         if (1 == pLevel->pFmt->numSubLevels)
229         {
230             pLevel = pLevel->subLevels;
231             continue;
232         }
233         // Multi sub-level must pick branch based on target.
234         for (subLevel = 0; subLevel < pLevel->pFmt->numSubLevels; ++subLevel)
235         {
236             if ((pLevel->pFmt->subLevels + subLevel) == pLevelFmt)
237             {
238                 return pLevel->subLevels + subLevel;
239             }
240         }
241         // Nothing found.
242         return NULL;
243     }
244     return pLevel;
245 }
246 
247 /*!
248  * @brief This function traverses the topology described by @ref
249  * MMU_FMT_LEVEL and @ref MMU_DESC_PDE. The @ref MmuOpFunc
250  * opFunc implements the actions needed to be perfomed at each
251  * sublevel in the recursion.
252  *
253  * @param[in]  vaLo          The lower end of the Virtual Address range that is
254  *                           being processed.
255  * @param[in]  vaHi          The upper end of the Virtual Address range that is
256  *                           being processed
257  *
258  * @return NV_OK if processing this level succeeds.
259  *         Other errors, if not.
260  */
261 NV_STATUS mmuWalkProcessPdes
262 (
263     const MMU_WALK           *pWalk,
264     const MMU_WALK_OP_PARAMS *pOpParams,
265     MMU_WALK_LEVEL           *pLevel,
266     MMU_WALK_LEVEL_INST      *pLevelInst,
267     NvU64                     vaLo,
268     NvU64                     vaHi
269 )
270 {
271 
272     if (pWalk->flags.bUseIterative)
273     {
274         // Iterative MMU Walker Implementation
275         NV_STATUS status = NV_OK;
276         NV_ASSERT_OR_RETURN(pOpParams != NULL, NV_ERR_INVALID_ARGUMENT);
277 
278         // Call opFunc inititially to see if we need to walk
279         status = pOpParams->opFunc(pWalk,
280                                    pOpParams,
281                                    pLevel,
282                                    pLevelInst,
283                                    vaLo,
284                                    vaHi);
285 
286         //
287         // If NV_ERR_MORE_PROCESSING_REQUIRED is returned above,
288         // the recursive MMU Walker would have started recursing down,
289         // so here we kick off the iteration.
290         // If NV_OK is returned above, the recursive MMU Walker would
291         // not recurse at all, so return immediately.
292         //
293         if (NV_ERR_MORE_PROCESSING_REQUIRED == status)
294         {
295             status = NV_OK;
296 
297             NvU64 vaLevelBase  = mmuFmtLevelVirtAddrLo(pLevel->pFmt, vaLo);
298             NvU32 entryIndexLo = mmuFmtVirtAddrToEntryIndex(pLevel->pFmt, vaLo);
299             NvU32 entryIndexHi = mmuFmtVirtAddrToEntryIndex(pLevel->pFmt, vaHi);
300             NvU32 entryIndex;
301             NvU32 index;
302             NvU32 entryIndexFillStart = 0;
303             NvU32 entryIndexFillEnd;
304             NvU32 pendingFillCount = 0;
305 
306             // Declarations for mmuWalk recursion conversion
307             MMU_WALK_PROCESS_PDES_ENTRY *pProcessPdeEntry;
308             MMU_WALK_RELEASE_PDES_ENTRY *pReleasePdeEntry;
309             PROCESS_PDES_STACK processPdesStack;
310             RELEASE_PDES_STACK releasePdesStack;
311             listInit(&processPdesStack, portMemAllocatorGetGlobalNonPaged());
312             listInit(&releasePdesStack, portMemAllocatorGetGlobalNonPaged());
313 
314             //
315             // Walk over each relevant entry (PDE) within this Page Level
316             // Do one initial loop to kick off iteration
317             // Add entries in reverse order because processPdesStack is a stack
318             //
319             for (entryIndex = entryIndexHi; entryIndex >= entryIndexLo; entryIndex--)
320             {
321                 pProcessPdeEntry = listPrependNew(&processPdesStack);
322                 if (pProcessPdeEntry == NULL)
323                 {
324                     status = NV_ERR_NO_MEMORY;
325                     NV_ASSERT_OR_GOTO(0, cleanupIter);
326                 }
327 
328                 //
329                 // The values pushed to the stack must NOT be pointers to variables on the stack
330                 // All of these are simple values or pointers to a variable allocated by a function
331                 // calling the MMU Walker.
332                 //
333                 pProcessPdeEntry->pLevel       = pLevel;
334                 pProcessPdeEntry->pLevelInst   = pLevelInst;
335                 pProcessPdeEntry->vaLo         = vaLo;
336                 pProcessPdeEntry->vaHi         = vaHi;
337                 pProcessPdeEntry->vaLevelBase  = vaLevelBase;
338                 pProcessPdeEntry->entryIndexHi = entryIndexHi;
339                 pProcessPdeEntry->entryIndex   = entryIndex;
340 
341                 // Prevent underflow because of adding entries in reverse order
342                 if (entryIndex == 0) break;
343             }
344 
345             while ((pProcessPdeEntry = listHead(&processPdesStack)) != NULL)
346             {
347                 pLevel       = pProcessPdeEntry->pLevel;
348                 pLevelInst   = pProcessPdeEntry->pLevelInst;
349                 vaLo         = pProcessPdeEntry->vaLo;
350                 vaHi         = pProcessPdeEntry->vaHi;
351                 vaLevelBase  = pProcessPdeEntry->vaLevelBase;
352                 entryIndexHi = pProcessPdeEntry->entryIndexHi;
353                 entryIndex   = pProcessPdeEntry->entryIndex;
354 
355                 listRemove(&processPdesStack, pProcessPdeEntry);
356 
357                 const NvU64           entryVaLo   = mmuFmtEntryIndexVirtAddrLo(pLevel->pFmt,
358                                                                          vaLevelBase, entryIndex);
359                 const NvU64           entryVaHi   = mmuFmtEntryIndexVirtAddrHi(pLevel->pFmt,
360                                                                          vaLevelBase, entryIndex);
361                 const NvU64           clippedVaLo = NV_MAX(vaLo, entryVaLo);
362                 const NvU64           clippedVaHi = NV_MIN(vaHi, entryVaHi);
363                 const MMU_ENTRY_STATE currEntryState = mmuWalkGetEntryState(pLevelInst, entryIndex);
364                 NvU32                 subLevel       = 0;
365                 MMU_WALK_LEVEL_INST  *pSubLevelInsts[MMU_FMT_MAX_SUB_LEVELS] = {0};
366 
367                 // Optimizations for release operations.
368                 if (pOpParams->bRelease)
369                 {
370                     // Skip this entry if it is neither a PDE nor marked as a hybrid entry.
371                     if ((MMU_ENTRY_STATE_IS_PDE != currEntryState) &&
372                         !pLevelInst->pStateTracker[entryIndex].bHybrid)
373                         continue;
374                 }
375 
376                 // Optimizations for fill operations.
377                 if (pOpParams->bFill)
378                 {
379                     const MMU_FILL_TARGET *pTarget = (const MMU_FILL_TARGET *) pOpParams->pOpCtx;
380 
381                     if (pendingFillCount == 0)
382                         entryIndexFillStart = entryIndexFillEnd = entryIndex;
383 
384                     //
385                     // Check if the entire entry's coverage is being filled to
386                     // a constant state.
387                     //
388                     // If this entry is not currently a PDE we can
389                     // apply the fill operation directly
390                     // at this level and avoid "splitting" the PDE.
391                     //
392                     // If this entry is currently a PDE we must
393                     // clear the entries of the lower levels to free
394                     // unused level instances.
395                     //
396                     if ((pTarget->entryState != currEntryState) &&
397                         (MMU_ENTRY_STATE_IS_PDE != currEntryState) &&
398                         (entryVaLo == clippedVaLo) &&
399                         (entryVaHi == clippedVaHi))
400                     {
401                         entryIndexFillEnd = entryIndex;
402                         pendingFillCount++;
403 
404                         // Not the last iteration, keep batching..
405                         if (entryIndex < entryIndexHi)
406                             continue;
407                     }
408 
409                     if (pendingFillCount != 0)
410                     {
411                         NvU32 progress = 0;
412 
413                         // Flush pending fills
414                         pWalk->pCb->FillEntries(pWalk->pUserCtx,
415                                                 pLevel->pFmt,
416                                                 pLevelInst->pMemDesc,
417                                                 entryIndexFillStart,
418                                                 entryIndexFillEnd,
419                                                 pTarget->fillState,
420                                                 &progress);
421 
422                         if (progress != (entryIndexFillEnd - entryIndexFillStart + 1))
423                         {
424                             status = NV_ERR_INVALID_STATE;
425                             NV_ASSERT_OR_GOTO(0, cleanupIter);
426                         }
427 
428                         for (index = entryIndexFillStart; index <= entryIndexFillEnd; index++)
429                             mmuWalkSetEntryState(pLevelInst, index, pTarget->entryState);
430 
431                         pendingFillCount = 0;
432                     }
433 
434                     // Recheck the state after fill. If nothing to do, continue..
435                     if (pTarget->entryState == mmuWalkGetEntryState(pLevelInst, entryIndex))
436                         continue;
437 
438                 } // End of fill optimizations.
439 
440                 // Determine the sublevel we need to operate on.
441                 status = pOpParams->selectSubLevel(pOpParams->pOpCtx,
442                                                    pLevel,
443                                                    &subLevel,
444                                                    clippedVaLo,
445                                                    clippedVaHi);
446                 NV_ASSERT_OR_GOTO(NV_OK == status, cleanupIter);
447 
448                 //
449                 // Allocate the sublevel instances for the current PDE and update the current
450                 // Page Dir (i.e. write the PDE into the Page Dir) if needed.
451                 //
452                 status = _mmuWalkPdeAcquire(pWalk,
453                                             pOpParams,
454                                             pLevel,
455                                             pLevelInst,
456                                             entryIndex,
457                                             subLevel,
458                                             clippedVaLo,
459                                             clippedVaHi,
460                                             pSubLevelInsts);
461                 NV_ASSERT_OR_GOTO(NV_OK == status, cleanupIter);
462 
463                 // Release op is done if the target sub-level is absent.
464                 if (pOpParams->bRelease && (NULL == pSubLevelInsts[subLevel]))
465                 {
466                     continue;
467                 }
468 
469                 //
470                 // Split sparse PDE's range.
471                 // When only a subrange of the original PDE's VA range is being operated
472                 // on we sparsify the remaining range lying outside the operational
473                 // subrange (clippedVaLo to clippedVaHi)
474                 //
475                 if (MMU_ENTRY_STATE_SPARSE == currEntryState)
476                 {
477                     //
478                     // Sparsify the lower part of the VA range that outside the operational
479                     // subrange.
480                     //
481                     if (clippedVaLo > entryVaLo)
482                     {
483                         status = mmuWalkProcessPdes(pWalk,
484                                                     &g_opParamsSparsify,
485                                                     pLevel->subLevels + subLevel,
486                                                     pSubLevelInsts[subLevel],
487                                                     entryVaLo,
488                                                     clippedVaLo - 1);
489                         NV_ASSERT_OR_GOTO(NV_OK == status, cleanupIter);
490                     }
491 
492                     //
493                     // Sparsify the upper part of the VA range that is outside the operational
494                     // subrange.
495                     //
496                     if (clippedVaHi < entryVaHi)
497                     {
498                         status = mmuWalkProcessPdes(pWalk,
499                                                     &g_opParamsSparsify,
500                                                     pLevel->subLevels + subLevel,
501                                                     pSubLevelInsts[subLevel],
502                                                     clippedVaHi + 1,
503                                                     entryVaHi);
504                         NV_ASSERT_OR_GOTO(NV_OK == status, cleanupIter);
505                     }
506                 } // Sparse PDE split
507 
508                 // Resolve potential conflicts in multiple sized page tables
509                 if (pLevel->pFmt->numSubLevels != 1 &&
510                     !pOpParams->bIgnoreSubLevelConflicts)
511                 {
512                     status = _mmuWalkResolveSubLevelConflicts(pWalk,
513                                                               pOpParams,
514                                                               pLevel,
515                                                               pSubLevelInsts,
516                                                               subLevel,
517                                                               clippedVaLo,
518                                                               clippedVaHi);
519                     NV_ASSERT_OR_GOTO(NV_OK == status, cleanupIter);
520                 }
521 
522                 status = pOpParams->opFunc(pWalk,
523                                            pOpParams,
524                                            pLevel->subLevels + subLevel,
525                                            pSubLevelInsts[subLevel],
526                                            clippedVaLo,
527                                            clippedVaHi);
528 
529                 if (NV_ERR_MORE_PROCESSING_REQUIRED == status)
530                 {
531                     //
532                     // If NV_ERR_MORE_PROCESSING_REQUIRED is returned above,
533                     // the recursive MMU Walker would have recursed down one
534                     // more level. In this code block, we keep the iteration
535                     // going by doing everything the recursion previously did.
536                     //
537                     status = NV_OK;
538                     pReleasePdeEntry = listPrependNew(&releasePdesStack);
539                     if (pReleasePdeEntry == NULL)
540                     {
541                         status = NV_ERR_NO_MEMORY;
542                         NV_ASSERT_OR_GOTO(0, cleanupIter);
543                     }
544 
545                     //
546                     // Queue the current level for pdeRelease so that pdeRelease
547                     // can be called AFTER exploring the current level's sublevels.
548                     //
549                     pReleasePdeEntry->pLevel       = pLevel;
550                     pReleasePdeEntry->pLevelInst   = pLevelInst;
551                     pReleasePdeEntry->entryVaLo    = entryVaLo;
552                     pReleasePdeEntry->entryIndexHi = entryIndexHi;
553                     pReleasePdeEntry->entryIndex   = entryIndex;
554 
555                     //
556                     // Here use variables that would be used in the next recursion downwards.
557                     // Calculate new vaLevelBase, entryIndexLo, entryIndexHi, entryIndex
558                     //
559                     vaLevelBase  = mmuFmtLevelVirtAddrLo((pLevel->subLevels + subLevel)->pFmt, clippedVaLo);
560                     entryIndexLo = mmuFmtVirtAddrToEntryIndex((pLevel->subLevels + subLevel)->pFmt, clippedVaLo);
561                     entryIndexHi = mmuFmtVirtAddrToEntryIndex((pLevel->subLevels + subLevel)->pFmt, clippedVaHi);
562 
563                     for (entryIndex = entryIndexHi; entryIndex >= entryIndexLo; entryIndex--)
564                     {
565                         pProcessPdeEntry = listPrependNew(&processPdesStack);
566                         if (pProcessPdeEntry == NULL)
567                         {
568                             status = NV_ERR_NO_MEMORY;
569                             NV_ASSERT_OR_GOTO(0, cleanupIter);
570                         }
571 
572                         pProcessPdeEntry->pLevel       = pLevel->subLevels + subLevel;
573                         pProcessPdeEntry->pLevelInst   = pSubLevelInsts[subLevel];
574                         pProcessPdeEntry->vaLo         = clippedVaLo;
575                         pProcessPdeEntry->vaHi         = clippedVaHi;
576                         pProcessPdeEntry->vaLevelBase  = vaLevelBase;
577                         pProcessPdeEntry->entryIndexHi = entryIndexHi;
578                         pProcessPdeEntry->entryIndex   = entryIndex;
579 
580                         if (entryIndex == 0) break;
581                     }
582                 }
583                 else if (NV_OK == status)
584                 {
585                     //
586                     // If NV_OK is returned above, the recursive MMU Walker would have reached
587                     // the target format level and so reached the base case of its recursion.
588                     // It would then return from  recursive function calls an call pdeRelease
589                     // for all levels whose sublevels are done being processed.
590                     //
591 
592                     // PdeRelease itself immediately since this level does not recurse.
593                     _mmuWalkPdeRelease(pWalk,
594                                        pOpParams,
595                                        pLevel,
596                                        pLevelInst,
597                                        entryIndex,
598                                        entryVaLo);
599 
600                     //
601                     // If this is the last processed sublevel of a level, pdeRelease the level.
602                     // Continue doing so for all parent levels.
603                     //
604                     while (entryIndex == entryIndexHi)
605                     {
606                         if ((pReleasePdeEntry = listHead(&releasePdesStack)) != NULL)
607                         {
608                             // Extract variables for the next loop around.
609                             entryIndexHi = pReleasePdeEntry->entryIndexHi;
610                             entryIndex   = pReleasePdeEntry->entryIndex;
611 
612                             _mmuWalkPdeRelease(pWalk,
613                                                pOpParams,
614                                                pReleasePdeEntry->pLevel,
615                                                pReleasePdeEntry->pLevelInst,
616                                                pReleasePdeEntry->entryIndex,
617                                                pReleasePdeEntry->entryVaLo);
618 
619                             listRemove(&releasePdesStack, pReleasePdeEntry);
620                         }
621                         else
622                         {
623                             break;
624                         }
625                     }
626                 }
627                 else
628                 {
629                     // Stop processing PDEs if we are in error state.
630                     goto cleanupIter;
631                 }
632             } // per entry loop
633 
634 
635             if (listHead(&processPdesStack) != NULL)
636             {
637                 //
638                 // If this assertion fails, it is a result of a programming
639                 // error in the iterative MMU Walker implementation.
640                 //
641                 status = NV_ERR_INVALID_STATE;
642                 NV_ASSERT_OR_GOTO(0, cleanupIter);
643             }
644 
645             //
646             // Note that if releasePdesStack is not empty at this point,
647             // we hit an empty sublevel, but we still need to pdeRelease
648             // the parent sublevels in cleanup below.
649             //
650 
651 // Temporarily change the name of this label to avoid conflicting with other "cleanup"
652 cleanupIter:
653 
654             //
655             // In the recrusive MMU Walker, when a sublevel failed, that level would pdeRelease,
656             // return to the parent, and the parent would pdeRelease and return to its parent and so on.
657             // Here emulate that and pdeRelease all parents.
658             //
659 
660             while ((pReleasePdeEntry = listHead(&releasePdesStack)) != NULL)
661             {
662                 _mmuWalkPdeRelease(pWalk,
663                                    pOpParams,
664                                    pReleasePdeEntry->pLevel ,
665                                    pReleasePdeEntry->pLevelInst,
666                                    pReleasePdeEntry->entryIndex,
667                                    pReleasePdeEntry->entryVaLo);
668 
669                 listRemove(&releasePdesStack, pReleasePdeEntry);
670             }
671 
672             listDestroy(&processPdesStack);
673             listDestroy(&releasePdesStack);
674 
675 
676         }
677         return status;
678     }
679     else
680     {
681         // Recursive MMU Walker Implementation
682         NV_STATUS    status       = NV_OK;
683         NvU64        vaLevelBase  = mmuFmtLevelVirtAddrLo(pLevel->pFmt, vaLo);
684         NvU32        entryIndexLo = mmuFmtVirtAddrToEntryIndex(pLevel->pFmt, vaLo);
685         NvU32        entryIndexHi = mmuFmtVirtAddrToEntryIndex(pLevel->pFmt, vaHi);
686         NvU32        entryIndex;
687         NvU32        index;
688         NvU32        entryIndexFillStart = 0;
689         NvU32        entryIndexFillEnd;
690         NvU32        pendingFillCount = 0;
691 
692         NV_ASSERT_OR_RETURN(NULL != pOpParams, NV_ERR_INVALID_ARGUMENT);
693 
694         // Walk over each relevant entry (PDE) within this Page Level
695         for (entryIndex = entryIndexLo; entryIndex <= entryIndexHi; entryIndex++)
696         {
697             const NvU64           entryVaLo   = mmuFmtEntryIndexVirtAddrLo(pLevel->pFmt,
698                                                                      vaLevelBase, entryIndex);
699             const NvU64           entryVaHi   = mmuFmtEntryIndexVirtAddrHi(pLevel->pFmt,
700                                                                      vaLevelBase, entryIndex);
701             const NvU64           clippedVaLo = NV_MAX(vaLo, entryVaLo);
702             const NvU64           clippedVaHi = NV_MIN(vaHi, entryVaHi);
703             const MMU_ENTRY_STATE currEntryState = mmuWalkGetEntryState(pLevelInst, entryIndex);
704             NvU32                 subLevel       = 0;
705             MMU_WALK_LEVEL_INST  *pSubLevelInsts[MMU_FMT_MAX_SUB_LEVELS] = {0};
706 
707             // Optimizations for release operations.
708             if (pOpParams->bRelease)
709             {
710                 // Skip this entry if it is neither a PDE nor marked as a hybrid entry.
711                 if ((MMU_ENTRY_STATE_IS_PDE != currEntryState) &&
712                     !pLevelInst->pStateTracker[entryIndex].bHybrid)
713                     continue;
714             }
715 
716             // Optimizations for fill operations.
717             if (pOpParams->bFill)
718             {
719                 const MMU_FILL_TARGET *pTarget = (const MMU_FILL_TARGET *) pOpParams->pOpCtx;
720 
721                 if (pendingFillCount == 0)
722                     entryIndexFillStart = entryIndexFillEnd = entryIndex;
723 
724                 //
725                 // Check if the entire entry's coverage is being filled to
726                 // a constant state.
727                 //
728                 // If this entry is not currently a PDE we can
729                 // apply the fill operation directly
730                 // at this level and avoid "splitting" the PDE.
731                 //
732                 // If this entry is currently a PDE we must
733                 // clear the entries of the lower levels to free
734                 // unused level instances.
735                 //
736                 if ((pTarget->entryState != currEntryState) &&
737                     (MMU_ENTRY_STATE_IS_PDE != currEntryState) &&
738                     (entryVaLo == clippedVaLo) &&
739                     (entryVaHi == clippedVaHi))
740                 {
741                     entryIndexFillEnd = entryIndex;
742                     pendingFillCount++;
743 
744                     // Not the last iteration, keep batching..
745                     if (entryIndex < entryIndexHi)
746                         continue;
747                 }
748 
749                 if (pendingFillCount != 0)
750                 {
751                     NvU32 progress = 0;
752 
753                     // Flush pending fills
754                     pWalk->pCb->FillEntries(pWalk->pUserCtx,
755                                             pLevel->pFmt,
756                                             pLevelInst->pMemDesc,
757                                             entryIndexFillStart,
758                                             entryIndexFillEnd,
759                                             pTarget->fillState,
760                                             &progress);
761 
762                     NV_ASSERT_OR_RETURN(
763                         progress == (entryIndexFillEnd - entryIndexFillStart + 1),
764                         NV_ERR_INVALID_STATE);
765 
766                     for (index = entryIndexFillStart; index <= entryIndexFillEnd; index++)
767                         mmuWalkSetEntryState(pLevelInst, index, pTarget->entryState);
768 
769                     pendingFillCount = 0;
770                 }
771 
772                 // Recheck the state after fill. If nothing to do, continue..
773                 if (pTarget->entryState == mmuWalkGetEntryState(pLevelInst, entryIndex))
774                     continue;
775 
776             } // End of fill optimizations.
777 
778             // Determine the sublevel we need to operate on.
779             status = pOpParams->selectSubLevel(pOpParams->pOpCtx,
780                                                pLevel,
781                                                &subLevel,
782                                                clippedVaLo,
783                                                clippedVaHi);
784             NV_ASSERT_OR_GOTO(NV_OK == status, cleanup);
785 
786             //
787             // Allocate the sublevel instances for the current PDE and update the current
788             // Page Dir (i.e. write the PDE into the Page Dir) if needed.
789             //
790             status = _mmuWalkPdeAcquire(pWalk,
791                                         pOpParams,
792                                         pLevel,
793                                         pLevelInst,
794                                         entryIndex,  //PDE index being processed
795                                         subLevel,    //Sub level processed within the PDE
796                                         clippedVaLo, //Low VA for the PDE
797                                         clippedVaHi, //High VA for the PDE
798                                         pSubLevelInsts);
799             NV_ASSERT_OR_GOTO(NV_OK == status, cleanup);
800 
801             // Release op is done if the target sub-level is absent.
802             if (pOpParams->bRelease && (NULL == pSubLevelInsts[subLevel]))
803             {
804                 continue;
805             }
806 
807             //
808             // Split sparse PDE's range.
809             // When only a subrange of the original PDE's VA range is being operated
810             // on we sparsify the remaining range lying outside the operational
811             // subrange (clippedVaLo to clippedVaHi)
812             //
813             if (MMU_ENTRY_STATE_SPARSE == currEntryState)
814             {
815                 //
816                 // Sparsify the lower part of the VA range that outside the operational
817                 // subrange.
818                 //
819                 if (clippedVaLo > entryVaLo)
820                 {
821                     status = g_opParamsSparsify.opFunc(pWalk,
822                                                        &g_opParamsSparsify,
823                                                        pLevel->subLevels + subLevel,
824                                                        pSubLevelInsts[subLevel],
825                                                        entryVaLo,
826                                                        clippedVaLo - 1);
827                     NV_ASSERT_OR_GOTO(NV_OK == status, cleanup);
828                 }
829 
830                 //
831                 // Sparsify the upper part of the VA range that is outside the operational
832                 // subrange.
833                 //
834                 if (clippedVaHi < entryVaHi)
835                 {
836                     status = g_opParamsSparsify.opFunc(pWalk,
837                                                        &g_opParamsSparsify,
838                                                        pLevel->subLevels + subLevel,
839                                                        pSubLevelInsts[subLevel],
840                                                        clippedVaHi + 1,
841                                                        entryVaHi);
842                     NV_ASSERT_OR_GOTO(NV_OK == status, cleanup);
843                 }
844             } // Sparse PDE split
845 
846             // Resolve potential conflicts in multiple sized page tables
847             if (pLevel->pFmt->numSubLevels != 1 &&
848                 !pOpParams->bIgnoreSubLevelConflicts)
849             {
850                 status = _mmuWalkResolveSubLevelConflicts(pWalk,
851                                                           pOpParams,
852                                                           pLevel,
853                                                           pSubLevelInsts,
854                                                           subLevel,
855                                                           clippedVaLo,
856                                                           clippedVaHi);
857                 NV_ASSERT_OR_GOTO(NV_OK == status, cleanup);
858             }
859 
860             // Recurse to update the next level for this PDE
861             status = pOpParams->opFunc(pWalk,
862                                        pOpParams,
863                                        pLevel->subLevels + subLevel,
864                                        pSubLevelInsts[subLevel],
865                                        clippedVaLo,
866                                        clippedVaHi);
867             NV_ASSERT_OR_GOTO(NV_OK == status, cleanup);
868 
869 cleanup:
870             // Free unused sublevel instances. Clear the PDE if all sublevels are deallocated.
871             _mmuWalkPdeRelease(pWalk,
872                                pOpParams,
873                                pLevel,
874                                pLevelInst,
875                                entryIndex,
876                                entryVaLo);
877 
878             // Stop processing PDEs if we are in error state.
879             if (NV_OK != status)
880                 break;
881         } // per entry loop
882         return status;
883     }
884 
885 
886 }
887 
888 /*!
889  * @brief This function allocates the root Page Directory and commits it the
890  * related channels.
891  *
892  * @param[in]  vaLo          The lower end of the Virtual Address range that is
893  *                           being processed.
894  * @param[in]  vaHi          The upper end of the Virtual Address range that is
895  *                           being processed
896  *
897  * @param[in]  bCommit       Force commit the PDB
898  *
899  * @return NV_OK of allocating this level succeeds.
900  *         Other errors, if not.
901  */
902 NV_STATUS
903 mmuWalkRootAcquire
904 (
905     MMU_WALK *pWalk,
906     NvU64     vaLo,
907     NvU64     vaHi,
908     NvBool    bCommit
909 )
910 {
911     MMU_WALK_LEVEL_INST *pLevelInst = NULL;
912     NvBool               bChanged   = NV_FALSE;
913 
914     // Acquire root level instance memory.
915     NV_ASSERT_OK_OR_RETURN(
916         _mmuWalkLevelInstAcquire(pWalk, &pWalk->root, vaLo, vaHi,
917                                  NV_TRUE, NV_FALSE, bCommit, &bChanged,
918                                  &pLevelInst, NV_FALSE /*bInitNv4k*/));
919 
920     // We check pLevelInst to catch the corner case, where Commit() is called before PDB allocation.
921     if (bChanged || (bCommit && pLevelInst))
922     {
923         NvBool bDone;
924 
925         // Bind this Page Dir to the affected channels
926         bDone = pWalk->pCb->UpdatePdb(pWalk->pUserCtx, pWalk->root.pFmt,
927                                       pLevelInst->pMemDesc, NV_FALSE);
928         NV_ASSERT_OR_RETURN(bDone, NV_ERR_INVALID_STATE);
929     }
930 
931     return NV_OK;
932 }
933 
934 /*!
935  * @brief This function releases the root Page Directory
936  */
937 void
938 mmuWalkRootRelease
939 (
940     MMU_WALK *pWalk
941 )
942 {
943     MMU_WALK_LEVEL_INST *pLevelInst = pWalk->root.pInstances;
944     if (NULL != pLevelInst)
945     {
946         // Free the level instance if the entry ref count is 0.
947         if ((0 == pLevelInst->numValid + pLevelInst->numSparse) &&
948             (0 == pLevelInst->numReserved))
949         {
950             NvBool bDone;
951 
952             // Commit NULL root page directory (clear usage).
953             bDone = pWalk->pCb->UpdatePdb(pWalk->pUserCtx, pWalk->root.pFmt, NULL, NV_FALSE);
954             NV_ASSERT(bDone);
955 
956             // Free unused root memory.
957             _mmuWalkLevelInstRelease(pWalk, &pWalk->root, pLevelInst);
958         }
959     }
960 }
961 
962 /*!
963  * @brief This function updates the @ref MMU_WALK_LEVEL_INST::pStateTracker for an
964  *        entry specified by the entryIndex.
965  *
966  * @param[in]     entryIndex   Index of the entry whose state needs to be updated.
967  * @param[in]     newEntryState  The new state of the entry specified by entryIndex
968  */
969 void
970 mmuWalkSetEntryState
971 (
972     MMU_WALK_LEVEL_INST *pLevelInst,
973     NvU32                entryIndex,
974     MMU_ENTRY_STATE      newEntryState
975 )
976 {
977     MMU_ENTRY_STATE currEntryState = mmuWalkGetEntryState(pLevelInst, entryIndex);
978 
979     // Decrement ref count for current state
980     switch (currEntryState)
981     {
982         case MMU_ENTRY_STATE_IS_PTE:
983         case MMU_ENTRY_STATE_IS_PDE:
984             NV_ASSERT(0 != pLevelInst->numValid);
985             pLevelInst->numValid--;
986             break;
987         case MMU_ENTRY_STATE_SPARSE:
988             NV_ASSERT(0 != pLevelInst->numSparse);
989             pLevelInst->numSparse--;
990             break;
991         case MMU_ENTRY_STATE_NV4K:
992             NV_ASSERT(0 != pLevelInst->numNv4k);
993             pLevelInst->numNv4k--;
994             break;
995         case MMU_ENTRY_STATE_INVALID:
996             break;
997         default:
998             NV_ASSERT(0);
999     }
1000 
1001     // Increment new state ref count
1002     switch (newEntryState)
1003     {
1004         case MMU_ENTRY_STATE_IS_PTE:
1005         case MMU_ENTRY_STATE_IS_PDE:
1006             pLevelInst->numValid++;
1007             break;
1008         case MMU_ENTRY_STATE_SPARSE:
1009             pLevelInst->numSparse++;
1010             break;
1011         case MMU_ENTRY_STATE_NV4K:
1012             pLevelInst->numNv4k++;
1013             break;
1014         case MMU_ENTRY_STATE_INVALID:
1015             break;
1016         default:
1017             NV_ASSERT(0);
1018     }
1019 
1020     // Commit new state.
1021     pLevelInst->pStateTracker[entryIndex].state = newEntryState;
1022 }
1023 
1024 void
1025 mmuWalkSetEntryReserved
1026 (
1027     MMU_WALK_LEVEL_INST *pLevelInst,
1028     NvU32                entryIndex,
1029     NvBool               bReserved
1030 )
1031 {
1032     if (pLevelInst->pStateTracker[entryIndex].bReserved)
1033     {
1034         NV_ASSERT(0 != pLevelInst->numReserved);
1035         pLevelInst->numReserved--;
1036     }
1037     if (bReserved)
1038     {
1039         pLevelInst->numReserved++;
1040     }
1041     pLevelInst->pStateTracker[entryIndex].bReserved = bReserved;
1042 }
1043 
1044 void
1045 mmuWalkSetEntryHybrid
1046 (
1047     MMU_WALK_LEVEL_INST *pLevelInst,
1048     NvU32                entryIndex,
1049     NvBool               bHybrid
1050 )
1051 {
1052     if (pLevelInst->pStateTracker[entryIndex].bHybrid)
1053     {
1054         NV_ASSERT(0 != pLevelInst->numHybrid);
1055         pLevelInst->numHybrid--;
1056     }
1057     if (bHybrid)
1058     {
1059         pLevelInst->numHybrid++;
1060     }
1061     pLevelInst->pStateTracker[entryIndex].bHybrid = bHybrid;
1062 }
1063 
1064 /**
1065  * @brief      Calculate target entry indices that covers VA range for
1066  *             source entries
1067  *
1068  * @details    For example, entry 1 in 64K PT is aligned to 4K PT entry 0 to
1069  *             15. 4K PTE 1 to 18 will be covered by 64K PTE 0 to 1.
1070  *
1071  *             It is introduced by NV4K encoding. Updating big page table
1072  *             according to small page table requires index transfering
1073  *
1074  * @param[in]  pPageFmtIn   Source format
1075  * @param[in]  indexLoIn    The index lower in
1076  * @param[in]  indexHiIn    The index higher in
1077  * @param[in]  pPageFmtOut  Target format
1078  * @param[out] pIndexLoOut  The lower result index
1079  * @param[out] pIndexHiOut  The higher result index
1080  */
1081 void
1082 mmuFmtCalcAlignedEntryIndices
1083 (
1084     const MMU_FMT_LEVEL *pPageFmtIn,
1085     const NvU32 indexLoIn,
1086     const NvU32 indexHiIn,
1087     const MMU_FMT_LEVEL *pPageFmtOut,
1088     NvU32 *pIndexLoOut,
1089     NvU32 *pIndexHiOut
1090 )
1091 {
1092     NvU64 pageSizeIn, pageSizeOut;
1093     NvU64 pageSizeRatio;
1094     NV_ASSERT(pIndexLoOut != NULL && pIndexHiOut != NULL);
1095     NV_ASSERT(pPageFmtIn != NULL && pPageFmtOut != NULL);
1096 
1097     pageSizeIn = mmuFmtLevelPageSize(pPageFmtIn);
1098     pageSizeOut = mmuFmtLevelPageSize(pPageFmtOut);
1099 
1100     if (pageSizeIn < pageSizeOut)
1101     {
1102         pageSizeRatio = pageSizeOut / pageSizeIn;
1103         NV_ASSERT(NvU64_HI32(pageSizeRatio) == 0);
1104         *pIndexLoOut = (NvU32)(indexLoIn / pageSizeRatio);
1105         *pIndexHiOut = (NvU32)(indexHiIn / pageSizeRatio);
1106     }
1107     else
1108     {
1109         pageSizeRatio = pageSizeIn / pageSizeOut;
1110         NV_ASSERT(NvU64_HI32(pageSizeRatio) == 0);
1111         *pIndexLoOut = (NvU32)(indexLoIn * pageSizeRatio);
1112         *pIndexHiOut = (NvU32)((indexHiIn + 1) * pageSizeRatio - 1);
1113     }
1114 }
1115 
1116 /*----------------------------Static Functions--------------------------------*/
1117 
1118 static NV_STATUS
1119 _mmuWalkLevelInit
1120 (
1121     const MMU_WALK       *pWalk,
1122     MMU_WALK_LEVEL       *pParent,
1123     const MMU_FMT_LEVEL  *pLevelFmt,
1124     MMU_WALK_LEVEL       *pLevel
1125 )
1126 {
1127     // Init pointers.
1128     pLevel->pFmt    = pLevelFmt;
1129     pLevel->pParent = pParent;
1130 
1131     if (0 != pLevelFmt->numSubLevels)
1132     {
1133         NvU32       subLevel;
1134         const NvU32 size = pLevelFmt->numSubLevels * (NvU32)sizeof(*pLevel->subLevels);
1135 
1136         // Allocate sub-level array.
1137         pLevel->subLevels = portMemAllocNonPaged(size);
1138         if (pLevel->subLevels == NULL)
1139             return NV_ERR_NO_MEMORY;
1140 
1141         portMemSet(pLevel->subLevels, 0, size);
1142 
1143         // Recursively create each sub-level.
1144         for (subLevel = 0; subLevel < pLevelFmt->numSubLevels; ++subLevel)
1145         {
1146             NV_ASSERT_OK_OR_RETURN(
1147                 _mmuWalkLevelInit(pWalk, pLevel, pLevelFmt->subLevels + subLevel,
1148                                   pLevel->subLevels + subLevel));
1149         }
1150     }
1151 
1152     return NV_OK;
1153 }
1154 
1155 static void
1156 _mmuWalkLevelDestroy
1157 (
1158     const MMU_WALK *pWalk,
1159     MMU_WALK_LEVEL *pLevel
1160 )
1161 {
1162     NvU32 subLevel;
1163 
1164     if (NULL != pLevel->subLevels)
1165     {
1166         // Recursively destroy each sub-level.
1167         for (subLevel = 0; subLevel < pLevel->pFmt->numSubLevels; ++subLevel)
1168         {
1169             _mmuWalkLevelDestroy(pWalk, pLevel->subLevels + subLevel);
1170         }
1171         // Free sub-level array.
1172         portMemFree(pLevel->subLevels);
1173     }
1174 
1175     // All level instance memory should be freed already.
1176     NV_ASSERT(NULL == pLevel->pInstances);
1177 }
1178 
1179 /**
1180  * @brief      Resolve upcoming state conflicts before mmu walk operations
1181  *
1182  * @example    Say we are to mmuWalkMap VA range [vaLo, vaHi] on small PT.
1183  * Assume we have 4K PT and 64K PT as our small PT and big PT, and [vaLo, vaHi]
1184  * is a strict subset of VA range covered by BigPTE[1, 3] and SmallPTE[18, 61].
1185  * Let's say BigPTE[1, 3] are sparse right now.
1186  *
1187  * To resolve the conflict, we need to preserve sparse state for part of the
1188  * VA range that is not going to be mapped. We need to move those states from
1189  * BigPT to SmallPT.
1190  *
1191  * Before:
1192  *  BigPTE[1, 3]: sparse,   SmallPTE[16 - 63]: invalid
1193  *  (BigPTE[1, 3] and SmallPTE[16 - 63] are VA aligned)
1194  * After:
1195  *  BigPTE[1, 3]: invalid,  SmallPTE[16 - 17]: sparse
1196  *                          SmallPTE[18 - 61]: invalid, will later be mapped
1197  *                          SmallPTE[62 - 63]: sparse
1198  *
1199  * @example    If we are to mmuWalkMap on big PT instead of samll PT,
1200  * and sparse state was on small PT, we just need to invalidate the small PTEs.
1201  *
1202  * Before:
1203  *  BigPTE[1, 3]:       invalid,
1204  *  SmallPTE[16 - 63]:  sparse
1205  * After:
1206  *  BigPTE[1, 3]:       invalid, will later be mapped
1207  *  SmallPTE[16 - 63]:  invalid
1208  *
1209  * @return     NV_OK on success, no other values for now
1210  */
1211 static NV_STATUS NV_NOINLINE
1212 _mmuWalkResolveSubLevelConflicts
1213 (
1214     const MMU_WALK              *pWalk,
1215     const MMU_WALK_OP_PARAMS    *pOpParams,
1216     MMU_WALK_LEVEL              *pLevel,
1217     MMU_WALK_LEVEL_INST         *pSubLevelInsts[],
1218     NvU32                        subLevelIdx,
1219     NvU64                        clippedVaLo,
1220     NvU64                        clippedVaHi
1221 )
1222 {
1223     NvU32     i                             = 0;
1224     NvU32     progress                      = 0;
1225     NV_STATUS status                        = NV_OK;
1226     NvBool    bConflictLo                   = NV_FALSE;
1227     NvBool    bConflictHi                   = NV_FALSE;
1228     const MMU_FMT_LEVEL *pLevelFmtBig       = pLevel->subLevels[0].pFmt;
1229     const MMU_FMT_LEVEL *pLevelFmtSmall     = pLevel->subLevels[1].pFmt;
1230     MMU_WALK_LEVEL_INST *pLevelBigInst      = pSubLevelInsts[0];
1231     MMU_WALK_LEVEL_INST *pLevelSmallInst    = pSubLevelInsts[1];
1232     // Entry indicies for target page table
1233     NvU32 entryIndexLo, entryIndexHi;
1234     // Entry indicies involved in both page tables
1235     NvU32 indexLo_Small, indexHi_Small, indexLo_Big, indexHi_Big;
1236 
1237     if (0 == subLevelIdx)
1238     {
1239         entryIndexLo = mmuFmtVirtAddrToEntryIndex(pLevelFmtBig, clippedVaLo);
1240         entryIndexHi = mmuFmtVirtAddrToEntryIndex(pLevelFmtBig, clippedVaHi);
1241         indexLo_Big = entryIndexLo;
1242         indexHi_Big = entryIndexHi;
1243         mmuFmtCalcAlignedEntryIndices(pLevelFmtBig, indexLo_Big, indexHi_Big,
1244             pLevelFmtSmall, &indexLo_Small, &indexHi_Small);
1245     }
1246     else
1247     {
1248         entryIndexLo = mmuFmtVirtAddrToEntryIndex(pLevelFmtSmall, clippedVaLo);
1249         entryIndexHi = mmuFmtVirtAddrToEntryIndex(pLevelFmtSmall, clippedVaHi);
1250         mmuFmtCalcAlignedEntryIndices(pLevelFmtSmall, entryIndexLo,
1251             entryIndexHi, pLevelFmtBig, &indexLo_Big, &indexHi_Big);
1252         mmuFmtCalcAlignedEntryIndices(pLevelFmtBig, indexLo_Big, indexHi_Big,
1253             pLevelFmtSmall, &indexLo_Small, &indexHi_Small);
1254     }
1255 
1256     // check if involved Small PTEs need to be sparsified
1257     if (1 == subLevelIdx && NULL != pLevelSmallInst && NULL != pLevelBigInst)
1258     {
1259         // check lower part
1260         MMU_ENTRY_STATE entryStateBig;
1261         entryStateBig = mmuWalkGetEntryState(pLevelBigInst, indexLo_Big);
1262         bConflictLo = (MMU_ENTRY_STATE_SPARSE == entryStateBig);
1263 
1264         // check higher part
1265         entryStateBig = mmuWalkGetEntryState(pLevelBigInst, indexHi_Big);
1266         bConflictHi = (MMU_ENTRY_STATE_SPARSE == entryStateBig);
1267     }
1268 
1269     if (bConflictLo && entryIndexLo > indexLo_Small)
1270     {
1271         // sparsify lower range of entries
1272         pWalk->pCb->FillEntries(pWalk->pUserCtx, pLevelFmtSmall,
1273             pLevelSmallInst->pMemDesc, indexLo_Small, entryIndexLo - 1,
1274             MMU_WALK_FILL_SPARSE, &progress);
1275         NV_ASSERT_OR_RETURN(progress == entryIndexLo - indexLo_Small,
1276             NV_ERR_INVALID_STATE);
1277 
1278         for (i = indexLo_Small; i <= entryIndexLo - 1; i++)
1279         {
1280             mmuWalkSetEntryState(pLevelSmallInst, i, MMU_ENTRY_STATE_SPARSE);
1281         }
1282     }
1283 
1284     if (bConflictHi && entryIndexHi < indexHi_Small)
1285     {
1286         // sparsify higher range of entries
1287         pWalk->pCb->FillEntries(pWalk->pUserCtx, pLevelFmtSmall,
1288             pLevelSmallInst->pMemDesc, entryIndexHi + 1, indexHi_Small,
1289             MMU_WALK_FILL_SPARSE, &progress);
1290         NV_ASSERT_OR_RETURN(progress == indexHi_Small - entryIndexHi,
1291             NV_ERR_INVALID_STATE);
1292 
1293         for (i = entryIndexHi + 1; i <= indexHi_Small; i++)
1294         {
1295             mmuWalkSetEntryState(pLevelSmallInst, i, MMU_ENTRY_STATE_SPARSE);
1296         }
1297     }
1298 
1299     // invalidate the VA range in the other page table
1300     if (NULL != pLevelSmallInst && NULL != pLevelBigInst)
1301     {
1302         NvU32 indexLo_tmp, indexHi_tmp;
1303         const MMU_FMT_LEVEL *pSubLevelFmt;
1304         MMU_WALK_LEVEL_INST *pSubLevelInst;
1305 
1306         if (subLevelIdx == 0)
1307         {
1308             indexLo_tmp = indexLo_Small;
1309             indexHi_tmp = indexHi_Small;
1310             pSubLevelFmt = pLevelFmtSmall;
1311             pSubLevelInst = pLevelSmallInst;
1312         }
1313         else
1314         {
1315             indexLo_tmp = indexLo_Big;
1316             indexHi_tmp = indexHi_Big;
1317             pSubLevelFmt = pLevelFmtBig;
1318             pSubLevelInst = pLevelBigInst;
1319         }
1320 
1321         pWalk->pCb->FillEntries(pWalk->pUserCtx, pSubLevelFmt,
1322             pSubLevelInst->pMemDesc, indexLo_tmp, indexHi_tmp,
1323             MMU_WALK_FILL_INVALID, &progress);
1324         NV_ASSERT_OR_RETURN(progress == indexHi_tmp - indexLo_tmp + 1,
1325             NV_ERR_INVALID_STATE);
1326 
1327         for (i = indexLo_tmp; i <= indexHi_tmp; i++)
1328         {
1329             mmuWalkSetEntryState(pSubLevelInst, i, MMU_ENTRY_STATE_INVALID);
1330         }
1331     }
1332 
1333     return status;
1334 }
1335 
1336 /*!
1337  * Lazily allocates and initializes a level instance.
1338  */
1339 static NV_STATUS
1340 _mmuWalkLevelInstAcquire
1341 (
1342     const MMU_WALK       *pWalk,
1343     MMU_WALK_LEVEL       *pLevel,
1344     const NvU64           vaLo,
1345     const NvU64           vaHi,
1346     const NvBool          bTarget,
1347     const NvBool          bRelease,
1348     const NvBool          bCommit,
1349     NvBool               *pBChanged,
1350     MMU_WALK_LEVEL_INST **ppLevelInst,
1351     const NvBool          bInitNv4k
1352 )
1353 {
1354     NV_STATUS            status;
1355     MMU_WALK_MEMDESC    *pOldMem;
1356     NvU32                oldSize;
1357     MMU_WALK_LEVEL_INST *pLevelInst = NULL;
1358     NvBool               bNew       = NV_FALSE;
1359 
1360     // Lookup level instance.
1361     if (NV_OK != btreeSearch(vaLo, (NODE**)&pLevelInst, (NODE*)pLevel->pInstances))
1362     {
1363         NvU32 numBytes;
1364 
1365         if (!bTarget || bRelease)
1366         {
1367             // Skip missing non-target instances.
1368             *ppLevelInst = NULL;
1369             return NV_OK;
1370         }
1371 
1372         // We only call Commit() on already allocated page directory.
1373         NV_ASSERT_OR_RETURN(!bCommit, NV_ERR_INVALID_STATE);
1374 
1375         // Mark as newly allocated.
1376         bNew = NV_TRUE;
1377 
1378         // Allocate missing target instances.
1379         pLevelInst = portMemAllocNonPaged(sizeof(*pLevelInst));
1380         status = (pLevelInst == NULL) ? NV_ERR_NO_MEMORY : NV_OK;
1381         NV_ASSERT_OR_GOTO(NV_OK == status, done);
1382         portMemSet(pLevelInst, 0, sizeof(*pLevelInst));
1383 
1384         // Insert the new node into the tree of instances for this page level.
1385         pLevelInst->node.keyStart = mmuFmtLevelVirtAddrLo(pLevel->pFmt, vaLo);
1386         pLevelInst->node.keyEnd   = mmuFmtLevelVirtAddrHi(pLevel->pFmt, vaHi);
1387 
1388         status = btreeInsert(&pLevelInst->node, (NODE**)&pLevel->pInstances);
1389         NV_ASSERT_OR_GOTO(NV_OK == status, done);
1390 
1391         // Allocate entry tracker.
1392         numBytes = mmuFmtLevelEntryCount(pLevel->pFmt) * sizeof(MMU_ENTRY_INFO);
1393         pLevelInst->pStateTracker = portMemAllocNonPaged(numBytes);
1394         status = (pLevelInst->pStateTracker == NULL) ? NV_ERR_NO_MEMORY : NV_OK;
1395         NV_ASSERT_OR_GOTO(NV_OK == status, done);
1396         portMemSet(pLevelInst->pStateTracker, 0, numBytes);
1397         if (bInitNv4k)
1398         {
1399             NvU32 i;
1400             for (i = 0; i < mmuFmtLevelEntryCount(pLevel->pFmt); ++i)
1401             {
1402                  mmuWalkSetEntryState(pLevelInst, i, MMU_ENTRY_STATE_NV4K);
1403             }
1404         }
1405     }
1406 
1407     // Save original memory info.
1408     pOldMem = pLevelInst->pMemDesc;
1409     oldSize = pLevelInst->memSize;
1410 
1411     // Allocate (possibly reallocating) memory for this level instance.
1412     status = pWalk->pCb->LevelAlloc(pWalk->pUserCtx,
1413                                     pLevel->pFmt,
1414                                     mmuFmtLevelVirtAddrLo(pLevel->pFmt, vaLo),
1415                                     vaHi,
1416                                     bTarget,
1417                                     &pLevelInst->pMemDesc,
1418                                     &pLevelInst->memSize,
1419                                     pBChanged);
1420     NV_ASSERT_OR_GOTO(NV_OK == status, done);
1421 
1422     if (*pBChanged)
1423     {
1424         const NvU32 entryIndexLo = oldSize / pLevel->pFmt->entrySize;
1425         const NvU32 entryIndexHi = (pLevelInst->memSize / pLevel->pFmt->entrySize) - 1;
1426         NvU32       progress     = 0;
1427 
1428         //
1429         // default state for new entries
1430         // NV4K for big page table if ATS is enabled
1431         //
1432         MMU_WALK_FILL_STATE newEntryState = bInitNv4k ? MMU_WALK_FILL_NV4K :
1433                                                         MMU_WALK_FILL_INVALID;
1434 
1435         NV_ASSERT(NULL != pLevelInst->pMemDesc);
1436         NV_ASSERT(entryIndexLo <= entryIndexHi);
1437 
1438         // We only call Commit() on already allocated page directory.
1439         if (bCommit)
1440         {
1441             status = NV_ERR_INVALID_STATE;
1442             NV_ASSERT_OR_GOTO(NV_OK == status, done);
1443         }
1444 
1445         // Copy old entries from old to new.
1446         if (entryIndexLo > 0)
1447         {
1448             NV_ASSERT(NULL != pWalk->pCb->CopyEntries);
1449             pWalk->pCb->CopyEntries(pWalk->pUserCtx,
1450                                     pLevel->pFmt,
1451                                     pOldMem,
1452                                     pLevelInst->pMemDesc,
1453                                     0,
1454                                     entryIndexLo - 1,
1455                                     &progress);
1456             NV_ASSERT(progress == entryIndexLo);
1457 
1458             // Free old memory.
1459             pWalk->pCb->LevelFree(pWalk->pUserCtx, pLevel->pFmt,
1460                                   pLevelInst->node.keyStart, pOldMem);
1461         }
1462 
1463         if(pWalk->bInvalidateOnReserve)
1464         {
1465             // Clear new entries to invalid.
1466             pWalk->pCb->FillEntries(pWalk->pUserCtx,
1467                                     pLevel->pFmt,
1468                                     pLevelInst->pMemDesc,
1469                                     entryIndexLo,
1470                                     entryIndexHi,
1471                                     newEntryState,
1472                                     &progress);
1473             NV_ASSERT(progress == entryIndexHi - entryIndexLo + 1);
1474         }
1475     }
1476     else
1477     {
1478         // Ensure hasn't changed.
1479         NV_ASSERT(pOldMem == pLevelInst->pMemDesc && oldSize == pLevelInst->memSize);
1480     }
1481 
1482     // Commit return.
1483     *ppLevelInst = pLevelInst;
1484 
1485 done:
1486     // Cleanup newly allocated instance on failure.
1487     if (NV_OK != status &&
1488         bNew && NULL != pLevelInst)
1489     {
1490         _mmuWalkLevelInstRelease(pWalk, pLevel, pLevelInst);
1491     }
1492     return status;
1493 }
1494 
1495 /*!
1496  * Frees an unused level instance.
1497  */
1498 static void
1499 _mmuWalkLevelInstRelease
1500 (
1501     const MMU_WALK      *pWalk,
1502     MMU_WALK_LEVEL      *pLevel,
1503     MMU_WALK_LEVEL_INST *pLevelInst
1504 )
1505 {
1506     NV_ASSERT(0 == pLevelInst->numValid);
1507     NV_ASSERT(0 == pLevelInst->numReserved);
1508     // Unlink.
1509     btreeUnlink(&pLevelInst->node, (NODE**)&pLevel->pInstances);
1510     // Free.
1511     if (NULL != pLevelInst->pMemDesc)
1512     {
1513         pWalk->pCb->LevelFree(pWalk->pUserCtx, pLevel->pFmt, pLevelInst->node.keyStart,
1514                               pLevelInst->pMemDesc);
1515     }
1516     portMemFree(pLevelInst->pStateTracker);
1517     portMemFree(pLevelInst);
1518 }
1519 
1520 /*!
1521  * This function is used to allocate a sublevel MMU_WALK_LEVEL_INST
1522  * for a given PDE. If the sublevel allocation succeeds, the parent Level is
1523  * updated.
1524  */
1525 static NV_STATUS NV_NOINLINE
1526 _mmuWalkPdeAcquire
1527 (
1528     const MMU_WALK           *pWalk,
1529     const MMU_WALK_OP_PARAMS *pOpParams,
1530     MMU_WALK_LEVEL           *pLevel,
1531     MMU_WALK_LEVEL_INST      *pLevelInst,
1532     const NvU32               entryIndex,
1533     const NvU32               subLevel,
1534     const NvU64               vaLo,
1535     const NvU64               vaHi,
1536     MMU_WALK_LEVEL_INST      *pSubLevelInsts[]
1537 )
1538 {
1539     NV_STATUS               status  = NV_OK;
1540     NvBool                  bCommit = NV_FALSE;
1541     NvU32                   i;
1542     const MMU_WALK_MEMDESC *pSubMemDescs[MMU_FMT_MAX_SUB_LEVELS] = {0};
1543     NvU64                   vaLimit = vaHi;
1544     const NvU32             numSubLevels = pLevel->pFmt->numSubLevels;
1545     MMU_WALK_LEVEL_INST    *pCurSubLevelInsts[MMU_FMT_MAX_SUB_LEVELS] = {0};
1546 
1547     //
1548     // Determine minimum VA limit of existing sub-levels.
1549     // This is required to keep parallel partial page tables in sync.
1550     // MMU HW that supports partial size tables selects the size in the
1551     // parent PDE so each sub-level *MUST* be the same partial size
1552     // once allocated.
1553     //
1554     if (numSubLevels > 1)
1555     {
1556         for (i = 0; i < numSubLevels; ++i)
1557         {
1558             // Lookup sub-level instance.
1559             if (NV_OK == btreeSearch(vaLo, (NODE**)&pCurSubLevelInsts[i],
1560                                      (NODE*)pLevel->subLevels[i].pInstances))
1561             {
1562                 const MMU_FMT_LEVEL *pSubLevelFmt = pLevel->pFmt->subLevels + i;
1563                 const NvU64          minVaLimit =
1564                     mmuFmtLevelVirtAddrLo(pSubLevelFmt, vaLo) +
1565                         (pCurSubLevelInsts[i]->memSize /
1566                          pSubLevelFmt->entrySize *
1567                          mmuFmtLevelPageSize(pSubLevelFmt)) - 1;
1568 
1569                 vaLimit = NV_MAX(vaLimit, minVaLimit);
1570             }
1571         }
1572     }
1573 
1574     //
1575     // the loop was reversed for NV4K, if there are multiple sublevels
1576     // handling small PT first, then the big PT
1577     //
1578     for (i = numSubLevels; i > 0; --i)
1579     {
1580         NvBool bChanged = NV_FALSE;
1581         NvU32  subLevelIdx = i - 1;
1582         NvBool bTarget = (subLevelIdx == subLevel);
1583         NvBool bInitNv4k = NV_FALSE;
1584 
1585         //
1586         // If NV4K is required (when ATS is enabled), acquire 64K PT
1587         // whenever the 4K PT has been acquired and 64K PT was not
1588         // there
1589         //
1590         if (pWalk->flags.bAtsEnabled && subLevelIdx == 0 &&
1591             numSubLevels > 1 && !pOpParams->bRelease)
1592         {
1593             if (pSubLevelInsts[1] != NULL)
1594             {
1595                 bTarget = NV_TRUE;
1596             }
1597             if (pSubLevelInsts[0] == NULL)
1598             {
1599                 bInitNv4k = NV_TRUE;
1600             }
1601         }
1602 
1603         // Acquire sub-level instance.
1604         NV_ASSERT_OK_OR_RETURN(
1605             _mmuWalkLevelInstAcquire(pWalk, pLevel->subLevels + subLevelIdx,
1606                                      vaLo, vaLimit, bTarget,
1607                                      pOpParams->bRelease, pOpParams->bCommit,
1608                                      &bChanged, &pSubLevelInsts[subLevelIdx],
1609                                      bInitNv4k));
1610         if (NULL == pSubLevelInsts[subLevelIdx])
1611         {
1612             // Skip missing non-target instances.
1613             NV_ASSERT(pOpParams->bRelease || !bTarget);
1614             continue;
1615         }
1616 
1617         // Track info for commit.
1618         bCommit        |= bChanged;
1619         pSubMemDescs[subLevelIdx] = pSubLevelInsts[subLevelIdx]->pMemDesc;
1620     }
1621 
1622     // DEBUG assert
1623     if (pWalk->flags.bAtsEnabled &&
1624         numSubLevels > 1 &&
1625         pSubLevelInsts[1] != NULL &&
1626         pSubLevelInsts[0] == NULL)
1627     {
1628         NV_ASSERT(0);
1629     }
1630 
1631     if (bCommit || pOpParams->bCommit)
1632     {
1633         NvBool bDone;
1634 
1635         // Update the current pde
1636         bDone = pWalk->pCb->UpdatePde(pWalk->pUserCtx, pLevel->pFmt, pLevelInst->pMemDesc,
1637                                       entryIndex, pSubMemDescs);
1638         NV_ASSERT_OR_RETURN(bDone, NV_ERR_INVALID_STATE);
1639 
1640         // Track entry as a PDE.
1641         mmuWalkSetEntryState(pLevelInst, entryIndex, MMU_ENTRY_STATE_IS_PDE);
1642     }
1643 
1644     return status;
1645 }
1646 
1647 /*!
1648  * Frees the sub levels of the PDE passed in if thier refcount is 0. It
1649  * also clears the PDE if both sublevels are released.
1650  */
1651 static void NV_NOINLINE
1652 _mmuWalkPdeRelease
1653 (
1654     const MMU_WALK           *pWalk,
1655     const MMU_WALK_OP_PARAMS *pOpParams,
1656     MMU_WALK_LEVEL           *pLevel,
1657     MMU_WALK_LEVEL_INST      *pLevelInst,
1658     const NvU32               entryIndex,
1659     const NvU64               entryVaLo
1660 )
1661 {
1662     MMU_WALK_LEVEL_INST    *pSubLevelInsts[MMU_FMT_MAX_SUB_LEVELS] = {0};
1663     const MMU_WALK_MEMDESC *pSubMemDescs[MMU_FMT_MAX_SUB_LEVELS] = {0};
1664     NvBool                  bChanged = NV_FALSE;
1665     NvU32                   subLevel, i;
1666     MMU_ENTRY_STATE         state = MMU_ENTRY_STATE_INVALID;
1667 
1668     // Apply target state if this is a fill operation.
1669     if (pOpParams->bFill)
1670     {
1671         const MMU_FILL_TARGET *pTarget = (const MMU_FILL_TARGET *)pOpParams->pOpCtx;
1672         state = pTarget->entryState;
1673     }
1674 
1675     //
1676     // Loop through the sublevels and free up those with 0 ref count.
1677     // We operate on a temp copy of the PDE because we want to update the
1678     // PDE memory before releasing the actual sublevel pointers. We need this order
1679     // to prevent any state inconsistency between the parent MMU_DESC_PDE and
1680     // the sublevel MMU_WALK_LEVEL_INST structures.
1681     //
1682     for (i = pLevel->pFmt->numSubLevels; i > 0; --i)
1683     {
1684         subLevel = i - 1;
1685         if (NV_OK == btreeSearch(entryVaLo, (NODE**)&pSubLevelInsts[subLevel],
1686                                  (NODE*)pLevel->subLevels[subLevel].pInstances))
1687         {
1688             MMU_WALK_LEVEL_INST *pSubLevelInst = pSubLevelInsts[subLevel];
1689 
1690             // for ATS NV4K, check if we need to free the big page
1691             if (pLevel->pFmt->numSubLevels == 2 && subLevel == 0)
1692             {
1693                 if (pWalk->flags.bAtsEnabled)
1694                 {
1695                     if (pSubLevelInsts[0]->numNv4k ==
1696                             mmuFmtLevelEntryCount(pLevel->subLevels[0].pFmt) &&
1697                         (0 == pSubLevelInsts[0]->numReserved) &&
1698                         (pSubMemDescs[1] == NULL || bChanged == NV_TRUE))
1699                     {
1700                         bChanged = NV_TRUE;
1701                         continue;
1702                     }
1703                     else
1704                     {
1705                         state = MMU_ENTRY_STATE_IS_PDE;
1706                         pSubMemDescs[subLevel] = pSubLevelInst->pMemDesc;
1707                         continue;
1708                     }
1709                 }
1710             }
1711 
1712             if ((0 != (pSubLevelInst->numValid + pSubLevelInst->numSparse)) ||
1713                 (0 != (pSubLevelInst->numReserved + pSubLevelInst->numHybrid)))
1714             {
1715                 // We've got at least one non-empty sublevel, so leave it mapped.
1716                 state = MMU_ENTRY_STATE_IS_PDE;
1717                 pSubMemDescs[subLevel] = pSubLevelInst->pMemDesc;
1718             }
1719             else if (NULL != pSubLevelInst->pMemDesc)
1720             {
1721                 // We're going to free a sub-level.
1722                 bChanged = NV_TRUE;
1723             }
1724         }
1725     }
1726 
1727     //
1728     // Failure path may have aborted early before sub-levels processed,
1729     // so also check that current state matches expected.
1730     //
1731     bChanged |= (state != mmuWalkGetEntryState(pLevelInst, entryIndex));
1732 
1733     //
1734     // If we've changed any sublevel we need to update the PDE in the parent
1735     // Page Directory
1736     //
1737     if (bChanged)
1738     {
1739         NvBool bDone;
1740         NvU32  progress = 0;
1741 
1742         // Init the PDE attribs with the temp PDE which has the cleared sublevel
1743         switch (state)
1744         {
1745         case MMU_ENTRY_STATE_SPARSE:
1746         case MMU_ENTRY_STATE_INVALID:
1747             pWalk->pCb->FillEntries(pWalk->pUserCtx,
1748                                     pLevel->pFmt,
1749                                     pLevelInst->pMemDesc,
1750                                     entryIndex,
1751                                     entryIndex,
1752                                     MMU_ENTRY_STATE_SPARSE == state ?
1753                                         MMU_WALK_FILL_SPARSE : MMU_WALK_FILL_INVALID,
1754                                     &progress);
1755             NV_ASSERT_OR_RETURN_VOID(progress == 1);
1756             // Clear the hybrid flag since all sub-levels are now released.
1757             if (pLevelInst->pStateTracker[entryIndex].bHybrid)
1758             {
1759                 mmuWalkSetEntryHybrid(pLevelInst, entryIndex, NV_FALSE);
1760             }
1761             break;
1762         case MMU_ENTRY_STATE_IS_PDE:
1763             bDone = pWalk->pCb->UpdatePde(pWalk->pUserCtx, pLevel->pFmt, pLevelInst->pMemDesc,
1764                                           entryIndex, pSubMemDescs);
1765             NV_ASSERT_OR_RETURN_VOID(bDone);
1766             break;
1767         default:
1768             NV_ASSERT_OR_RETURN_VOID(0);
1769         }
1770 
1771         // Track new state of entry.
1772         mmuWalkSetEntryState(pLevelInst, entryIndex, state);
1773     }
1774 
1775     // Free up the actual sublevels from the PDE
1776     for (subLevel = 0; subLevel < pLevel->pFmt->numSubLevels; ++subLevel)
1777     {
1778         MMU_WALK_LEVEL_INST *pSubLevelInst = pSubLevelInsts[subLevel];
1779         if (NULL != pSubLevelInst &&
1780             NULL == pSubMemDescs[subLevel])
1781         {
1782             _mmuWalkLevelInstRelease(pWalk, pLevel->subLevels + subLevel,
1783                                       pSubLevelInst);
1784         }
1785     }
1786 }
1787 
1788 static void
1789 _mmuWalkLevelInstancesForceFree
1790 (
1791     MMU_WALK       *pWalk,
1792     MMU_WALK_LEVEL *pLevel
1793 )
1794 {
1795     MMU_WALK_LEVEL_INST *pLevelInst = NULL;
1796     NvU32                subLevel;
1797 
1798     if (NULL == pLevel)
1799         return;
1800 
1801     // Free all instances at this level.
1802     btreeEnumStart(0, (NODE **)&pLevelInst, (NODE*)pLevel->pInstances);
1803     while (NULL != pLevelInst)
1804     {
1805         //
1806         // Since we are force freeing everything, it is okay to reset these fields
1807         // in order to avoid hitting asserts in _mmuWalkLevelInstRelease.
1808         //
1809         pLevelInst->numValid    = 0;
1810         pLevelInst->numReserved = 0;
1811         _mmuWalkLevelInstRelease(pWalk, pLevel, pLevelInst);
1812         btreeEnumStart(0, (NODE **)&pLevelInst, (NODE*)pLevel->pInstances);
1813     }
1814     pLevel->pInstances = NULL;
1815 
1816     if (NULL != pLevel->subLevels)
1817     {
1818         for (subLevel = 0; subLevel < pLevel->pFmt->numSubLevels; subLevel++)
1819         {
1820             _mmuWalkLevelInstancesForceFree(pWalk, pLevel->subLevels + subLevel);
1821         }
1822     }
1823 }
1824 
1825