1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2014-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /*!
25  * @file
26  * @brief Describes the structures and interfaces used to walk N level page tables
27  */
28 
29 /*--------------------------------Includes------------------------------------*/
30 #if defined(SRT_BUILD)
31 
32 #include "shrdebug.h"
33 #else
34 #include "os/os.h"
35 #endif
36 #include "nvport/nvport.h"
37 #include "nvctassert.h"
38 #include "mmu_walk_private.h"
39 
40 /*--------------------------Static Function Prototypes------------------------*/
41 static NV_STATUS
42 _mmuWalkLevelInit(const MMU_WALK *pWalk, MMU_WALK_LEVEL *pParent,
43                   const MMU_FMT_LEVEL *pLevelFmt, MMU_WALK_LEVEL *pLevel);
44 static void
45 _mmuWalkLevelDestroy(const MMU_WALK *pWalk, MMU_WALK_LEVEL *pLevel);
46 static NV_STATUS
47 _mmuWalkLevelInstAcquire(const MMU_WALK *pWalk, MMU_WALK_LEVEL *pLevel,
48                          const NvU64 vaLo, const NvU64 vaHi, const NvBool bTarget,
49                          const NvBool bRelease, const NvBool bCommit,
50                          NvBool *pBChanged, MMU_WALK_LEVEL_INST **ppLevelInst,
51                          const NvBool bInitNv4k);
52 static void
53 _mmuWalkLevelInstRelease(const MMU_WALK *pWalk, MMU_WALK_LEVEL *pLevel,
54                          MMU_WALK_LEVEL_INST *pLevelInst);
55 static NV_STATUS NV_NOINLINE
56 _mmuWalkPdeAcquire(const MMU_WALK *pWalk, const MMU_WALK_OP_PARAMS *pOpParams,
57                    MMU_WALK_LEVEL *pLevel, MMU_WALK_LEVEL_INST *pLevelInst,
58                    const NvU32 entryIndex, const NvU32 subLevel,
59                    const NvU64 vaLo, const NvU64 vaHi,
60                    MMU_WALK_LEVEL_INST *pSubLevelInsts[]);
61 static void NV_NOINLINE
62 _mmuWalkPdeRelease(const MMU_WALK *pWalk, const MMU_WALK_OP_PARAMS *pOpParams,
63                    MMU_WALK_LEVEL *pLevel, MMU_WALK_LEVEL_INST *pLevelInst,
64                    const NvU32 entryIndex, const NvU64 entryVaLo);
65 static NV_STATUS NV_NOINLINE
66 _mmuWalkResolveSubLevelConflicts(const MMU_WALK *pWalk, const MMU_WALK_OP_PARAMS *pOpParams,
67                                  MMU_WALK_LEVEL *pLevel, MMU_WALK_LEVEL_INST *pSubLevelInsts[],
68                                  NvU32 subLevel, NvU64 clippedVaLo, NvU64 clippedVaHi);
69 static void
70 _mmuWalkLevelInstancesForceFree(MMU_WALK *pWalk, MMU_WALK_LEVEL *pLevel);
71 
72 /* -----------------------------Inline Functions----------------------------- */
73 /*!
74    Returns the @ref MMU_ENTRY_STATE of the entry.
75  */
76 MMU_ENTRY_STATE
mmuWalkGetEntryState(MMU_WALK_LEVEL_INST * pLevelInst,NvU32 entryIndex)77 mmuWalkGetEntryState(MMU_WALK_LEVEL_INST *pLevelInst, NvU32 entryIndex)
78 {
79     return (MMU_ENTRY_STATE)pLevelInst->pStateTracker[entryIndex].state;
80 }
81 
82 /*----------------------------Public Functions--------------------------------*/
83 
84 NV_STATUS
mmuWalkCreate(const MMU_FMT_LEVEL * pRootFmt,MMU_WALK_USER_CTX * pUserCtx,const MMU_WALK_CALLBACKS * pCb,const MMU_WALK_FLAGS flags,MMU_WALK ** ppWalk,MMU_WALK_MEMDESC * pStagingBuffer)85 mmuWalkCreate
86 (
87     const MMU_FMT_LEVEL      *pRootFmt,
88     MMU_WALK_USER_CTX        *pUserCtx,
89     const MMU_WALK_CALLBACKS *pCb,
90     const MMU_WALK_FLAGS      flags,
91     MMU_WALK                **ppWalk,
92     MMU_WALK_MEMDESC         *pStagingBuffer
93 )
94 {
95     NV_STATUS  status = NV_OK;
96     MMU_WALK  *pWalk  = NULL;
97 
98     NV_ASSERT_OR_RETURN(NULL != pRootFmt, NV_ERR_INVALID_ARGUMENT);
99     NV_ASSERT_OR_RETURN(NULL != pCb,      NV_ERR_INVALID_ARGUMENT);
100     NV_ASSERT_OR_RETURN(NULL != ppWalk,   NV_ERR_INVALID_ARGUMENT);
101 
102     // Alloc and init walker structure.
103     pWalk = portMemAllocNonPaged(sizeof(*pWalk));
104     status = (pWalk == NULL) ? NV_ERR_NO_MEMORY : NV_OK;
105     NV_ASSERT_OR_GOTO(NV_OK == status, done);
106     portMemSet(pWalk, 0, sizeof(*pWalk));
107 
108     pWalk->pUserCtx             = pUserCtx;
109     pWalk->pCb                  = pCb;
110     pWalk->flags                = flags;
111     pWalk->pStagingBuffer       = pStagingBuffer;
112     pWalk->bUseStagingBuffer    = NV_FALSE;
113     pWalk->bInvalidateOnReserve = NV_TRUE;
114 
115     // Create level hierarchy.
116     status = _mmuWalkLevelInit(pWalk, NULL, pRootFmt, &pWalk->root);
117     NV_ASSERT_OR_GOTO(NV_OK == status, done);
118 
119     // Commit.
120     *ppWalk = pWalk;
121 
122 done:
123     if (NV_OK != status)
124     {
125         mmuWalkDestroy(pWalk);
126     }
127     return status;
128 }
129 
130 void
mmuWalkDestroy(MMU_WALK * pWalk)131 mmuWalkDestroy
132 (
133     MMU_WALK *pWalk
134 )
135 {
136     if (NULL != pWalk)
137     {
138         // Destroy level hierarchy.
139         _mmuWalkLevelDestroy(pWalk, &pWalk->root);
140 
141         // Free walker struct.
142         portMemFree(pWalk);
143     }
144 }
145 
146 NV_STATUS
mmuWalkContinue(MMU_WALK * pWalk)147 mmuWalkContinue
148 (
149     MMU_WALK *pWalk
150 )
151 {
152     return NV_ERR_NOT_SUPPORTED;
153 }
154 
155 void
mmuWalkCommit(MMU_WALK * pWalk)156 mmuWalkCommit
157 (
158     MMU_WALK *pWalk
159 )
160 {
161     // TODO
162 }
163 
164 MMU_WALK_USER_CTX *
mmuWalkGetUserCtx(const MMU_WALK * pWalk)165 mmuWalkGetUserCtx
166 (
167     const MMU_WALK *pWalk
168 )
169 {
170     return pWalk->pUserCtx;
171 }
172 
173 NV_STATUS
mmuWalkSetUserCtx(MMU_WALK * pWalk,MMU_WALK_USER_CTX * pUserCtx)174 mmuWalkSetUserCtx
175 (
176     MMU_WALK          *pWalk,
177     MMU_WALK_USER_CTX *pUserCtx
178 )
179 {
180     NV_ASSERT_OR_RETURN(NULL != pWalk, NV_ERR_INVALID_STATE);
181 
182     pWalk->pUserCtx = pUserCtx;
183     return NV_OK;
184 }
185 
186 const MMU_WALK_CALLBACKS *
mmuWalkGetCallbacks(const MMU_WALK * pWalk)187 mmuWalkGetCallbacks
188 (
189     const MMU_WALK *pWalk
190 )
191 {
192     return pWalk->pCb;
193 }
194 
195 void
mmuWalkSetCallbacks(MMU_WALK * pWalk,const MMU_WALK_CALLBACKS * pCb)196 mmuWalkSetCallbacks
197 (
198     MMU_WALK                 *pWalk,
199     const MMU_WALK_CALLBACKS *pCb
200 )
201 {
202     pWalk->pCb = pCb;
203 }
204 
205 void
mmuWalkLevelInstancesForceFree(MMU_WALK * pWalk)206 mmuWalkLevelInstancesForceFree
207 (
208     MMU_WALK *pWalk
209 )
210 {
211     if (pWalk != NULL)
212     {
213         _mmuWalkLevelInstancesForceFree(pWalk, &pWalk->root);
214     }
215 }
216 
217 /*----------------------------Private Functions--------------------------------*/
218 
219 const MMU_WALK_LEVEL *
mmuWalkFindLevel(const MMU_WALK * pWalk,const MMU_FMT_LEVEL * pLevelFmt)220 mmuWalkFindLevel
221 (
222     const MMU_WALK      *pWalk,
223     const MMU_FMT_LEVEL *pLevelFmt
224 )
225 {
226     const MMU_WALK_LEVEL *pLevel = &pWalk->root;
227     while (pLevel != NULL && pLevel->pFmt != pLevelFmt)
228     {
229         NvU32 subLevel;
230 
231         NV_ASSERT_OR_RETURN(pLevel->pFmt != NULL, NULL);
232 
233         // Single sub-level always continues.
234         if (1 == pLevel->pFmt->numSubLevels)
235         {
236             pLevel = pLevel->subLevels;
237             continue;
238         }
239         // Multi sub-level must pick branch based on target.
240         for (subLevel = 0; subLevel < pLevel->pFmt->numSubLevels; ++subLevel)
241         {
242             if ((pLevel->pFmt->subLevels + subLevel) == pLevelFmt)
243             {
244                 return pLevel->subLevels + subLevel;
245             }
246         }
247         // Nothing found.
248         return NULL;
249     }
250     return pLevel;
251 }
252 
253 /*!
254  * @brief This function traverses the topology described by @ref
255  * MMU_FMT_LEVEL and @ref MMU_DESC_PDE. The @ref MmuOpFunc
256  * opFunc implements the actions needed to be perfomed at each
257  * sublevel in the recursion.
258  *
259  * @param[in]  vaLo          The lower end of the Virtual Address range that is
260  *                           being processed.
261  * @param[in]  vaHi          The upper end of the Virtual Address range that is
262  *                           being processed
263  *
264  * @return NV_OK if processing this level succeeds.
265  *         Other errors, if not.
266  */
mmuWalkProcessPdes(const MMU_WALK * pWalk,const MMU_WALK_OP_PARAMS * pOpParams,MMU_WALK_LEVEL * pLevel,MMU_WALK_LEVEL_INST * pLevelInst,NvU64 vaLo,NvU64 vaHi)267 NV_STATUS mmuWalkProcessPdes
268 (
269     const MMU_WALK           *pWalk,
270     const MMU_WALK_OP_PARAMS *pOpParams,
271     MMU_WALK_LEVEL           *pLevel,
272     MMU_WALK_LEVEL_INST      *pLevelInst,
273     NvU64                     vaLo,
274     NvU64                     vaHi
275 )
276 {
277     if (pWalk->flags.bUseIterative)
278     {
279         NV_STATUS status = NV_OK;
280         const MMU_WALK_LEVEL *pLevelOrig = pLevel;
281         NV_ASSERT_OR_RETURN(pOpParams != NULL, NV_ERR_INVALID_ARGUMENT);
282 
283         // Call opFunc inititially to see if we need to walk
284         status = pOpParams->opFunc(pWalk,
285                                    pOpParams,
286                                    pLevel,
287                                    pLevelInst,
288                                    vaLo,
289                                    vaHi);
290 
291         //
292         // If NV_ERR_MORE_PROCESSING_REQUIRED is returned above,
293         // the recursive MMU Walker would have started recursing down,
294         // so here we kick off the iteration.
295         // If NV_OK is returned above, the recursive MMU Walker would
296         // not recurse at all, so return immediately.
297         //
298         if (NV_ERR_MORE_PROCESSING_REQUIRED == status)
299         {
300             status = NV_OK;
301 
302             NvU64 vaLevelBase  = mmuFmtLevelVirtAddrLo(pLevel->pFmt, vaLo);
303             NvU32 entryIndexLo = mmuFmtVirtAddrToEntryIndex(pLevel->pFmt, vaLo);
304             NvU32 entryIndexHi = mmuFmtVirtAddrToEntryIndex(pLevel->pFmt, vaHi);
305             NvU32 entryIndex   = entryIndexLo;
306             NvU32 entryIndexFillStart = 0;
307             NvU32 entryIndexFillEnd   = 0;
308             NvU32 pendingFillCount    = 0;
309 
310             //
311             // entryIndex, entryIndexHi are modified in the loop itself
312             // as we iterate through levels.
313             //
314             while (entryIndex <= entryIndexHi)
315             {
316                 const NvU64           entryVaLo   = mmuFmtEntryIndexVirtAddrLo(pLevel->pFmt,
317                                                                          vaLevelBase, entryIndex);
318                 const NvU64           entryVaHi   = mmuFmtEntryIndexVirtAddrHi(pLevel->pFmt,
319                                                                          vaLevelBase, entryIndex);
320                 const NvU64           clippedVaLo = NV_MAX(vaLo, entryVaLo);
321                 const NvU64           clippedVaHi = NV_MIN(vaHi, entryVaHi);
322                 const MMU_ENTRY_STATE currEntryState = mmuWalkGetEntryState(pLevelInst, entryIndex);
323                 NvU32                 subLevel       = 0;
324                 MMU_WALK_LEVEL_INST  *pSubLevelInsts[MMU_FMT_MAX_SUB_LEVELS] = {0};
325 
326                 // Optimizations for release operations.
327                 if (pOpParams->bRelease)
328                 {
329                     // Skip this entry if it is neither a PDE nor marked as a hybrid entry.
330                     if ((MMU_ENTRY_STATE_IS_PDE != currEntryState) &&
331                         !pLevelInst->pStateTracker[entryIndex].bHybrid)
332                     {
333                         goto check_last_entry;
334                     }
335                 }
336 
337                 // Optimizations for fill operations.
338                 if (pOpParams->bFill)
339                 {
340                     const MMU_FILL_TARGET *pTarget = (const MMU_FILL_TARGET *) pOpParams->pOpCtx;
341 
342                     if (pendingFillCount == 0)
343                         entryIndexFillStart = entryIndexFillEnd = entryIndex;
344 
345                     //
346                     // Check if the entire entry's coverage is being filled to
347                     // a constant state.
348                     //
349                     // If this entry is not currently a PDE we can
350                     // apply the fill operation directly
351                     // at this level and avoid "splitting" the PDE.
352                     //
353                     // If this entry is currently a PDE we must
354                     // clear the entries of the lower levels to free
355                     // unused level instances.
356                     //
357                     if ((pTarget->entryState != currEntryState) &&
358                         (MMU_ENTRY_STATE_IS_PDE != currEntryState) &&
359                         (entryVaLo == clippedVaLo) &&
360                         (entryVaHi == clippedVaHi))
361                     {
362                         entryIndexFillEnd = entryIndex;
363                         pendingFillCount++;
364 
365                         // Not the last iteration, keep batching..
366                         if (entryIndex < entryIndexHi)
367                         {
368                             //
369                             // This won't be the last entry, but we'll
370                             // do the iteration there
371                             //
372                             goto check_last_entry;
373                         }
374                     }
375 
376                     if (pendingFillCount != 0)
377                     {
378                         NvU32 progress = 0;
379                         NvU32 index;
380 
381                         // Flush pending fills
382                         pWalk->pCb->FillEntries(pWalk->pUserCtx,
383                                                 pLevel->pFmt,
384                                                 pLevelInst->pMemDesc,
385                                                 entryIndexFillStart,
386                                                 entryIndexFillEnd,
387                                                 pTarget->fillState,
388                                                 &progress);
389 
390                         if (progress != (entryIndexFillEnd - entryIndexFillStart + 1))
391                         {
392                             status = NV_ERR_INVALID_STATE;
393                             NV_ASSERT_OR_GOTO(0, cleanupIter);
394                         }
395 
396                         for (index = entryIndexFillStart; index <= entryIndexFillEnd; index++)
397                             mmuWalkSetEntryState(pLevelInst, index, pTarget->entryState);
398 
399                         pendingFillCount = 0;
400                     }
401 
402                     // Recheck the state after fill. If nothing to do, continue..
403                     if (pTarget->entryState == mmuWalkGetEntryState(pLevelInst, entryIndex))
404                     {
405                         goto check_last_entry;
406                     }
407 
408                 } // End of fill optimizations.
409 
410                 // Determine the sublevel we need to operate on.
411                 status = pOpParams->selectSubLevel(pOpParams->pOpCtx,
412                                                    pLevel,
413                                                    &subLevel,
414                                                    clippedVaLo,
415                                                    clippedVaHi);
416                 NV_ASSERT_OR_GOTO(NV_OK == status, cleanupIter);
417 
418                 //
419                 // Allocate the sublevel instances for the current PDE and update the current
420                 // Page Dir (i.e. write the PDE into the Page Dir) if needed.
421                 //
422                 status = _mmuWalkPdeAcquire(pWalk,
423                                             pOpParams,
424                                             pLevel,
425                                             pLevelInst,
426                                             entryIndex,
427                                             subLevel,
428                                             clippedVaLo,
429                                             clippedVaHi,
430                                             pSubLevelInsts);
431                 NV_ASSERT_OR_GOTO(NV_OK == status, cleanupIter);
432 
433                 // Release op is done if the target sub-level is absent.
434                 if (pOpParams->bRelease && (NULL == pSubLevelInsts[subLevel]))
435                 {
436                     goto check_last_entry;
437                 }
438 
439                 //
440                 // Split sparse PDE's range.
441                 // When only a subrange of the original PDE's VA range is being operated
442                 // on we sparsify the remaining range lying outside the operational
443                 // subrange (clippedVaLo to clippedVaHi)
444                 //
445                 if (MMU_ENTRY_STATE_SPARSE == currEntryState)
446                 {
447                     //
448                     // Sparsify the lower part of the VA range that outside the operational
449                     // subrange.
450                     //
451                     if (clippedVaLo > entryVaLo)
452                     {
453                         status = mmuWalkProcessPdes(pWalk,
454                                                     &g_opParamsSparsify,
455                                                     pLevel->subLevels + subLevel,
456                                                     pSubLevelInsts[subLevel],
457                                                     entryVaLo,
458                                                     clippedVaLo - 1);
459                         NV_ASSERT_OR_GOTO(NV_OK == status, cleanupIter);
460                     }
461 
462                     //
463                     // Sparsify the upper part of the VA range that is outside the operational
464                     // subrange.
465                     //
466                     if (clippedVaHi < entryVaHi)
467                     {
468                         status = mmuWalkProcessPdes(pWalk,
469                                                     &g_opParamsSparsify,
470                                                     pLevel->subLevels + subLevel,
471                                                     pSubLevelInsts[subLevel],
472                                                     clippedVaHi + 1,
473                                                     entryVaHi);
474                         NV_ASSERT_OR_GOTO(NV_OK == status, cleanupIter);
475                     }
476                 } // Sparse PDE split
477 
478                 // Resolve potential conflicts in multiple sized page tables
479                 if (pLevel->pFmt->numSubLevels != 1 &&
480                     !pOpParams->bIgnoreSubLevelConflicts)
481                 {
482                     status = _mmuWalkResolveSubLevelConflicts(pWalk,
483                                                               pOpParams,
484                                                               pLevel,
485                                                               pSubLevelInsts,
486                                                               subLevel,
487                                                               clippedVaLo,
488                                                               clippedVaHi);
489                     NV_ASSERT_OR_GOTO(NV_OK == status, cleanupIter);
490                 }
491 
492                 status = pOpParams->opFunc(pWalk,
493                                            pOpParams,
494                                            pLevel->subLevels + subLevel,
495                                            pSubLevelInsts[subLevel],
496                                            clippedVaLo,
497                                            clippedVaHi);
498 
499                 if (NV_ERR_MORE_PROCESSING_REQUIRED == status)
500                 {
501                     //
502                     // If NV_ERR_MORE_PROCESSING_REQUIRED is returned above,
503                     // the recursive MMU Walker would have recursed down one
504                     // more level. In this code block, we keep the iteration
505                     // going by doing everything the recursion previously did.
506                     //
507                     status = NV_OK;
508 
509                     // Save off the current state of iteration for this level
510                     pLevel->iterInfo.pLevelInst   = pLevelInst;
511                     pLevel->iterInfo.vaLo         = vaLo;
512                     pLevel->iterInfo.vaHi         = vaHi;
513                     pLevel->iterInfo.vaLevelBase  = vaLevelBase;
514                     pLevel->iterInfo.entryIndexHi = entryIndexHi;
515                     pLevel->iterInfo.entryIndex   = entryIndex;
516                     pLevel->iterInfo.entryIndexFillStart = entryIndexFillStart;
517                     pLevel->iterInfo.entryIndexFillEnd   = entryIndexFillEnd;
518                     pLevel->iterInfo.pendingFillCount    = pendingFillCount;
519                     pLevel->iterInfo.entryVaLo    = entryVaLo;
520 
521                     //
522                     // Here use variables that would be used in the next recursion downwards.
523                     // Calculate new vaLevelBase, entryIndexLo, entryIndexHi, entryIndex
524                     //
525                     pLevel       = pLevel->subLevels + subLevel;
526 
527                     vaLevelBase  = mmuFmtLevelVirtAddrLo(pLevel->pFmt, clippedVaLo);
528                     entryIndexLo = mmuFmtVirtAddrToEntryIndex(pLevel->pFmt, clippedVaLo);
529                     entryIndexHi = mmuFmtVirtAddrToEntryIndex(pLevel->pFmt, clippedVaHi);
530 
531                     // Now replace the current stack frame with the frame that is one level down
532                     // pLevel replaced above
533                     pLevelInst   = pSubLevelInsts[subLevel];
534                     vaLo         = clippedVaLo;
535                     vaHi         = clippedVaHi;
536                     vaLevelBase  = vaLevelBase;
537                     entryIndexHi = entryIndexHi;
538                     entryIndex   = entryIndexLo;
539                     entryIndexFillStart = 0;
540                     entryIndexFillEnd   = 0;
541                     pendingFillCount    = 0;
542                 }
543                 else
544                 {
545                     NV_ASSERT_OR_GOTO(NV_OK == status, cleanupIter);
546                     //
547                     // If NV_OK is returned above, the recursive MMU Walker would have reached
548                     // the target format level and so reached the base case of its recursion.
549                     // It would then return from recursive function calls and call pdeRelease
550                     // for all levels whose sublevels are done being processed.
551                     //
552 
553 cleanupIter:
554                     // PdeRelease itself immediately since this level does not recurse.
555 #if defined(__GNUC__) && !defined(__clang__)
556                     // gcc is falsely reporting entryVaLo; entryVaLo is definitely initialized
557 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
558 #endif
559                     _mmuWalkPdeRelease(pWalk,
560                                        pOpParams,
561                                        pLevel,
562                                        pLevelInst,
563                                        entryIndex,
564                                        entryVaLo);
565 
566 check_last_entry:
567                     //
568                     // If the recursive MMU Walker did a continue on the current level,
569                     // then it didn't do a pdeRelease of the current level.
570                     // Even with the continue, for the current iteration,
571                     // if entryIndex == entryIndexHi, then we're done with this level
572                     // and need to do a pdeRelease on the next level up since we would
573                     // return from the recursion.
574                     //
575 
576                     //
577                     // If we're at the original level and entryIndex = entryIndexHi,
578                     // then we're done and need to exit the entire loop.
579                     // If this is true, we've already done the _mmuWalkPdeRelease:
580                     // Either we already called _mmuWalkPdeRelease right before this
581                     // or we skipped it from a goto check_last_entry continue.
582                     // The MMU Walker is re-entrant and will otherwise pick up on
583                     // parent levels when mmuWalkProcessPdes is called on sublevels
584                     //
585                     if ((pLevel == pLevelOrig) && (entryIndex == entryIndexHi))
586                     {
587                         goto done;
588                     }
589 
590                     //
591                     // Now restore and finish previous frame(s)
592                     //
593                     // If this is the last processed sublevel of a level or an error has
594                     // previously occurred, pdeRelease the level.
595                     // Continue doing so for all parent levels.
596                     // Once we're reached a non-finished level, iterate to the next entry.
597                     //
598                     while (entryIndex == entryIndexHi || status != NV_OK)
599                     {
600                         //
601                         // Now replace the current stack frame with the frame that was one
602                         // level above. This should never be NULL, since we'll already have
603                         // exited after processing the root level. If it is NULL, we can't
604                         // clean up any more anyway, so return immediately.
605                         //
606                         NV_ASSERT_OR_RETURN(pLevel->pParent != NULL, NV_ERR_INVALID_STATE);
607 
608                         pLevel       = pLevel->pParent;
609                         pLevelInst   = pLevel->iterInfo.pLevelInst;
610                         vaLo         = pLevel->iterInfo.vaLo;
611                         vaHi         = pLevel->iterInfo.vaHi;
612                         vaLevelBase  = pLevel->iterInfo.vaLevelBase;
613                         entryIndexHi = pLevel->iterInfo.entryIndexHi;
614                         entryIndex   = pLevel->iterInfo.entryIndex;
615                         entryIndexFillStart = pLevel->iterInfo.entryIndexFillStart;
616                         entryIndexFillEnd   = pLevel->iterInfo.entryIndexFillEnd;
617                         pendingFillCount    = pLevel->iterInfo.pendingFillCount;
618 
619                         _mmuWalkPdeRelease(pWalk,
620                                            pOpParams,
621                                            pLevel,
622                                            pLevelInst,
623                                            entryIndex,
624                                            pLevel->iterInfo.entryVaLo);
625 
626                         //
627                         // If we're at the original level and entryIndex = entryIndexHi,
628                         // then we're done and need to exit the entire loop
629                         //
630                         if ((pLevel == pLevelOrig) && (entryIndex == entryIndexHi))
631                         {
632                             goto done;
633                         }
634                     }
635 
636                     //
637                     // Once the above loop is done and we reach here, then we're
638                     // ready to process the next entry in the list. Only iterate here,
639                     // not in the overall loop since we may have iterated down in the
640                     // above else block and don't want to increment before processing
641                     // the first entry on a new lower level.
642                     //
643                     entryIndex++;
644                 }
645             } // per entry loop
646 
647             //
648             // If this assertion fails, it is a result of a programming
649             // error in the iterative MMU Walker implementation. We should
650             // have iterated back updwards through the MMU state to the original
651             // level even on failure.
652             //
653             NV_ASSERT_OR_RETURN(pLevel != pLevelOrig, NV_ERR_INVALID_STATE);
654         }
655 done:
656         return status;
657     }
658     else
659     {
660         // Recursive MMU Walker Implementation
661         NV_STATUS    status       = NV_OK;
662         NvU64        vaLevelBase  = mmuFmtLevelVirtAddrLo(pLevel->pFmt, vaLo);
663         NvU32        entryIndexLo = mmuFmtVirtAddrToEntryIndex(pLevel->pFmt, vaLo);
664         NvU32        entryIndexHi = mmuFmtVirtAddrToEntryIndex(pLevel->pFmt, vaHi);
665         NvU32        entryIndex;
666         NvU32        index;
667         NvU32        entryIndexFillStart = 0;
668         NvU32        entryIndexFillEnd;
669         NvU32        pendingFillCount = 0;
670 
671         NV_ASSERT_OR_RETURN(NULL != pOpParams, NV_ERR_INVALID_ARGUMENT);
672 
673         // Walk over each relevant entry (PDE) within this Page Level
674         for (entryIndex = entryIndexLo; entryIndex <= entryIndexHi; entryIndex++)
675         {
676             const NvU64           entryVaLo   = mmuFmtEntryIndexVirtAddrLo(pLevel->pFmt,
677                                                                      vaLevelBase, entryIndex);
678             const NvU64           entryVaHi   = mmuFmtEntryIndexVirtAddrHi(pLevel->pFmt,
679                                                                      vaLevelBase, entryIndex);
680             const NvU64           clippedVaLo = NV_MAX(vaLo, entryVaLo);
681             const NvU64           clippedVaHi = NV_MIN(vaHi, entryVaHi);
682             const MMU_ENTRY_STATE currEntryState = mmuWalkGetEntryState(pLevelInst, entryIndex);
683             NvU32                 subLevel       = 0;
684             MMU_WALK_LEVEL_INST  *pSubLevelInsts[MMU_FMT_MAX_SUB_LEVELS] = {0};
685 
686             // Optimizations for release operations.
687             if (pOpParams->bRelease)
688             {
689                 // Skip this entry if it is neither a PDE nor marked as a hybrid entry.
690                 if ((MMU_ENTRY_STATE_IS_PDE != currEntryState) &&
691                     !pLevelInst->pStateTracker[entryIndex].bHybrid)
692                     continue;
693             }
694 
695             // Optimizations for fill operations.
696             if (pOpParams->bFill)
697             {
698                 const MMU_FILL_TARGET *pTarget = (const MMU_FILL_TARGET *) pOpParams->pOpCtx;
699 
700                 if (pendingFillCount == 0)
701                     entryIndexFillStart = entryIndexFillEnd = entryIndex;
702 
703                 //
704                 // Check if the entire entry's coverage is being filled to
705                 // a constant state.
706                 //
707                 // If this entry is not currently a PDE we can
708                 // apply the fill operation directly
709                 // at this level and avoid "splitting" the PDE.
710                 //
711                 // If this entry is currently a PDE we must
712                 // clear the entries of the lower levels to free
713                 // unused level instances.
714                 //
715                 if ((pTarget->entryState != currEntryState) &&
716                     (MMU_ENTRY_STATE_IS_PDE != currEntryState) &&
717                     (entryVaLo == clippedVaLo) &&
718                     (entryVaHi == clippedVaHi))
719                 {
720                     entryIndexFillEnd = entryIndex;
721                     pendingFillCount++;
722 
723                     // Not the last iteration, keep batching..
724                     if (entryIndex < entryIndexHi)
725                         continue;
726                 }
727 
728                 if (pendingFillCount != 0)
729                 {
730                     NvU32 progress = 0;
731 
732                     // Flush pending fills
733                     pWalk->pCb->FillEntries(pWalk->pUserCtx,
734                                             pLevel->pFmt,
735                                             pLevelInst->pMemDesc,
736                                             entryIndexFillStart,
737                                             entryIndexFillEnd,
738                                             pTarget->fillState,
739                                             &progress);
740 
741                     NV_ASSERT_OR_RETURN(
742                         progress == (entryIndexFillEnd - entryIndexFillStart + 1),
743                         NV_ERR_INVALID_STATE);
744 
745                     for (index = entryIndexFillStart; index <= entryIndexFillEnd; index++)
746                         mmuWalkSetEntryState(pLevelInst, index, pTarget->entryState);
747 
748                     pendingFillCount = 0;
749                 }
750 
751                 // Recheck the state after fill. If nothing to do, continue..
752                 if (pTarget->entryState == mmuWalkGetEntryState(pLevelInst, entryIndex))
753                     continue;
754 
755             } // End of fill optimizations.
756 
757             // Determine the sublevel we need to operate on.
758             status = pOpParams->selectSubLevel(pOpParams->pOpCtx,
759                                                pLevel,
760                                                &subLevel,
761                                                clippedVaLo,
762                                                clippedVaHi);
763             NV_ASSERT_OR_GOTO(NV_OK == status, cleanup);
764 
765             //
766             // Allocate the sublevel instances for the current PDE and update the current
767             // Page Dir (i.e. write the PDE into the Page Dir) if needed.
768             //
769             status = _mmuWalkPdeAcquire(pWalk,
770                                         pOpParams,
771                                         pLevel,
772                                         pLevelInst,
773                                         entryIndex,  //PDE index being processed
774                                         subLevel,    //Sub level processed within the PDE
775                                         clippedVaLo, //Low VA for the PDE
776                                         clippedVaHi, //High VA for the PDE
777                                         pSubLevelInsts);
778             NV_ASSERT_OR_GOTO(NV_OK == status, cleanup);
779 
780             // Release op is done if the target sub-level is absent.
781             if (pOpParams->bRelease && (NULL == pSubLevelInsts[subLevel]))
782             {
783                 continue;
784             }
785 
786             //
787             // Split sparse PDE's range.
788             // When only a subrange of the original PDE's VA range is being operated
789             // on we sparsify the remaining range lying outside the operational
790             // subrange (clippedVaLo to clippedVaHi)
791             //
792             if (MMU_ENTRY_STATE_SPARSE == currEntryState)
793             {
794                 //
795                 // Sparsify the lower part of the VA range that outside the operational
796                 // subrange.
797                 //
798                 if (clippedVaLo > entryVaLo)
799                 {
800                     status = g_opParamsSparsify.opFunc(pWalk,
801                                                        &g_opParamsSparsify,
802                                                        pLevel->subLevels + subLevel,
803                                                        pSubLevelInsts[subLevel],
804                                                        entryVaLo,
805                                                        clippedVaLo - 1);
806                     NV_ASSERT_OR_GOTO(NV_OK == status, cleanup);
807                 }
808 
809                 //
810                 // Sparsify the upper part of the VA range that is outside the operational
811                 // subrange.
812                 //
813                 if (clippedVaHi < entryVaHi)
814                 {
815                     status = g_opParamsSparsify.opFunc(pWalk,
816                                                        &g_opParamsSparsify,
817                                                        pLevel->subLevels + subLevel,
818                                                        pSubLevelInsts[subLevel],
819                                                        clippedVaHi + 1,
820                                                        entryVaHi);
821                     NV_ASSERT_OR_GOTO(NV_OK == status, cleanup);
822                 }
823             } // Sparse PDE split
824 
825             // Resolve potential conflicts in multiple sized page tables
826             if (pLevel->pFmt->numSubLevels != 1 &&
827                 !pOpParams->bIgnoreSubLevelConflicts)
828             {
829                 status = _mmuWalkResolveSubLevelConflicts(pWalk,
830                                                           pOpParams,
831                                                           pLevel,
832                                                           pSubLevelInsts,
833                                                           subLevel,
834                                                           clippedVaLo,
835                                                           clippedVaHi);
836                 NV_ASSERT_OR_GOTO(NV_OK == status, cleanup);
837             }
838 
839             // Recurse to update the next level for this PDE
840             status = pOpParams->opFunc(pWalk,
841                                        pOpParams,
842                                        pLevel->subLevels + subLevel,
843                                        pSubLevelInsts[subLevel],
844                                        clippedVaLo,
845                                        clippedVaHi);
846             NV_ASSERT_OR_GOTO(NV_OK == status, cleanup);
847 
848 cleanup:
849             // Free unused sublevel instances. Clear the PDE if all sublevels are deallocated.
850             _mmuWalkPdeRelease(pWalk,
851                                pOpParams,
852                                pLevel,
853                                pLevelInst,
854                                entryIndex,
855                                entryVaLo);
856 
857             // Stop processing PDEs if we are in error state.
858             if (NV_OK != status)
859                 break;
860         } // per entry loop
861         return status;
862     }
863 
864 
865 }
866 
867 /*!
868  * @brief This function allocates the root Page Directory and commits it the
869  * related channels.
870  *
871  * @param[in]  vaLo          The lower end of the Virtual Address range that is
872  *                           being processed.
873  * @param[in]  vaHi          The upper end of the Virtual Address range that is
874  *                           being processed
875  *
876  * @param[in]  bCommit       Force commit the PDB
877  *
878  * @return NV_OK of allocating this level succeeds.
879  *         Other errors, if not.
880  */
881 NV_STATUS
mmuWalkRootAcquire(MMU_WALK * pWalk,NvU64 vaLo,NvU64 vaHi,NvBool bCommit)882 mmuWalkRootAcquire
883 (
884     MMU_WALK *pWalk,
885     NvU64     vaLo,
886     NvU64     vaHi,
887     NvBool    bCommit
888 )
889 {
890     MMU_WALK_LEVEL_INST *pLevelInst = NULL;
891     NvBool               bChanged   = NV_FALSE;
892 
893     // Acquire root level instance memory.
894     NV_ASSERT_OK_OR_RETURN(
895         _mmuWalkLevelInstAcquire(pWalk, &pWalk->root, vaLo, vaHi,
896                                  NV_TRUE, NV_FALSE, bCommit, &bChanged,
897                                  &pLevelInst, NV_FALSE /*bInitNv4k*/));
898 
899     // We check pLevelInst to catch the corner case, where Commit() is called before PDB allocation.
900     if (bChanged || (bCommit && pLevelInst))
901     {
902         NvBool bDone;
903 
904         // Bind this Page Dir to the affected channels
905         bDone = pWalk->pCb->UpdatePdb(pWalk->pUserCtx, pWalk->root.pFmt,
906                                       pLevelInst->pMemDesc, NV_FALSE);
907         NV_ASSERT_OR_RETURN(bDone, NV_ERR_INVALID_STATE);
908     }
909 
910     return NV_OK;
911 }
912 
913 /*!
914  * @brief This function releases the root Page Directory
915  */
916 void
mmuWalkRootRelease(MMU_WALK * pWalk)917 mmuWalkRootRelease
918 (
919     MMU_WALK *pWalk
920 )
921 {
922     MMU_WALK_LEVEL_INST *pLevelInst = pWalk->root.pInstances;
923     if (NULL != pLevelInst)
924     {
925         // Free the level instance if the entry ref count is 0.
926         if ((0 == pLevelInst->numValid + pLevelInst->numSparse) &&
927             (0 == pLevelInst->numReserved))
928         {
929             NvBool bDone;
930 
931             // Commit NULL root page directory (clear usage).
932             bDone = pWalk->pCb->UpdatePdb(pWalk->pUserCtx, pWalk->root.pFmt, NULL, NV_FALSE);
933             NV_ASSERT(bDone);
934 
935             // Free unused root memory.
936             _mmuWalkLevelInstRelease(pWalk, &pWalk->root, pLevelInst);
937         }
938     }
939 }
940 
941 /*!
942  * @brief This function updates the @ref MMU_WALK_LEVEL_INST::pStateTracker for an
943  *        entry specified by the entryIndex.
944  *
945  * @param[in]     entryIndex   Index of the entry whose state needs to be updated.
946  * @param[in]     newEntryState  The new state of the entry specified by entryIndex
947  */
948 void
mmuWalkSetEntryState(MMU_WALK_LEVEL_INST * pLevelInst,NvU32 entryIndex,MMU_ENTRY_STATE newEntryState)949 mmuWalkSetEntryState
950 (
951     MMU_WALK_LEVEL_INST *pLevelInst,
952     NvU32                entryIndex,
953     MMU_ENTRY_STATE      newEntryState
954 )
955 {
956     MMU_ENTRY_STATE currEntryState = mmuWalkGetEntryState(pLevelInst, entryIndex);
957 
958     // Decrement ref count for current state
959     switch (currEntryState)
960     {
961         case MMU_ENTRY_STATE_IS_PTE:
962         case MMU_ENTRY_STATE_IS_PDE:
963             NV_ASSERT(0 != pLevelInst->numValid);
964             pLevelInst->numValid--;
965             break;
966         case MMU_ENTRY_STATE_SPARSE:
967             NV_ASSERT(0 != pLevelInst->numSparse);
968             pLevelInst->numSparse--;
969             break;
970         case MMU_ENTRY_STATE_NV4K:
971             NV_ASSERT(0 != pLevelInst->numNv4k);
972             pLevelInst->numNv4k--;
973             break;
974         case MMU_ENTRY_STATE_INVALID:
975             break;
976         default:
977             NV_ASSERT(0);
978     }
979 
980     // Increment new state ref count
981     switch (newEntryState)
982     {
983         case MMU_ENTRY_STATE_IS_PTE:
984         case MMU_ENTRY_STATE_IS_PDE:
985             pLevelInst->numValid++;
986             break;
987         case MMU_ENTRY_STATE_SPARSE:
988             pLevelInst->numSparse++;
989             break;
990         case MMU_ENTRY_STATE_NV4K:
991             pLevelInst->numNv4k++;
992             break;
993         case MMU_ENTRY_STATE_INVALID:
994             break;
995         default:
996             NV_ASSERT(0);
997     }
998 
999     // Commit new state.
1000     pLevelInst->pStateTracker[entryIndex].state = newEntryState;
1001 }
1002 
1003 void
mmuWalkSetEntryReserved(MMU_WALK_LEVEL_INST * pLevelInst,NvU32 entryIndex,NvBool bReserved)1004 mmuWalkSetEntryReserved
1005 (
1006     MMU_WALK_LEVEL_INST *pLevelInst,
1007     NvU32                entryIndex,
1008     NvBool               bReserved
1009 )
1010 {
1011     if (pLevelInst->pStateTracker[entryIndex].bReserved)
1012     {
1013         NV_ASSERT(0 != pLevelInst->numReserved);
1014         pLevelInst->numReserved--;
1015     }
1016     if (bReserved)
1017     {
1018         pLevelInst->numReserved++;
1019     }
1020     pLevelInst->pStateTracker[entryIndex].bReserved = bReserved;
1021 }
1022 
1023 void
mmuWalkSetEntryHybrid(MMU_WALK_LEVEL_INST * pLevelInst,NvU32 entryIndex,NvBool bHybrid)1024 mmuWalkSetEntryHybrid
1025 (
1026     MMU_WALK_LEVEL_INST *pLevelInst,
1027     NvU32                entryIndex,
1028     NvBool               bHybrid
1029 )
1030 {
1031     if (pLevelInst->pStateTracker[entryIndex].bHybrid)
1032     {
1033         NV_ASSERT(0 != pLevelInst->numHybrid);
1034         pLevelInst->numHybrid--;
1035     }
1036     if (bHybrid)
1037     {
1038         pLevelInst->numHybrid++;
1039     }
1040     pLevelInst->pStateTracker[entryIndex].bHybrid = bHybrid;
1041 }
1042 
1043 /**
1044  * @brief      Calculate target entry indices that covers VA range for
1045  *             source entries
1046  *
1047  * @details    For example, entry 1 in 64K PT is aligned to 4K PT entry 0 to
1048  *             15. 4K PTE 1 to 18 will be covered by 64K PTE 0 to 1.
1049  *
1050  *             It is introduced by NV4K encoding. Updating big page table
1051  *             according to small page table requires index transfering
1052  *
1053  * @param[in]  pPageFmtIn   Source format
1054  * @param[in]  indexLoIn    The index lower in
1055  * @param[in]  indexHiIn    The index higher in
1056  * @param[in]  pPageFmtOut  Target format
1057  * @param[out] pIndexLoOut  The lower result index
1058  * @param[out] pIndexHiOut  The higher result index
1059  */
1060 void
mmuFmtCalcAlignedEntryIndices(const MMU_FMT_LEVEL * pPageFmtIn,const NvU32 indexLoIn,const NvU32 indexHiIn,const MMU_FMT_LEVEL * pPageFmtOut,NvU32 * pIndexLoOut,NvU32 * pIndexHiOut)1061 mmuFmtCalcAlignedEntryIndices
1062 (
1063     const MMU_FMT_LEVEL *pPageFmtIn,
1064     const NvU32 indexLoIn,
1065     const NvU32 indexHiIn,
1066     const MMU_FMT_LEVEL *pPageFmtOut,
1067     NvU32 *pIndexLoOut,
1068     NvU32 *pIndexHiOut
1069 )
1070 {
1071     NvU64 pageSizeIn, pageSizeOut;
1072     NvU64 pageSizeRatio;
1073     NV_ASSERT(pIndexLoOut != NULL && pIndexHiOut != NULL);
1074     NV_ASSERT(pPageFmtIn != NULL && pPageFmtOut != NULL);
1075 
1076     pageSizeIn = mmuFmtLevelPageSize(pPageFmtIn);
1077     pageSizeOut = mmuFmtLevelPageSize(pPageFmtOut);
1078 
1079     if (pageSizeIn < pageSizeOut)
1080     {
1081         pageSizeRatio = pageSizeOut / pageSizeIn;
1082         NV_ASSERT(NvU64_HI32(pageSizeRatio) == 0);
1083         *pIndexLoOut = (NvU32)(indexLoIn / pageSizeRatio);
1084         *pIndexHiOut = (NvU32)(indexHiIn / pageSizeRatio);
1085     }
1086     else
1087     {
1088         pageSizeRatio = pageSizeIn / pageSizeOut;
1089         NV_ASSERT(NvU64_HI32(pageSizeRatio) == 0);
1090         *pIndexLoOut = (NvU32)(indexLoIn * pageSizeRatio);
1091         *pIndexHiOut = (NvU32)((indexHiIn + 1) * pageSizeRatio - 1);
1092     }
1093 }
1094 
1095 /*----------------------------Static Functions--------------------------------*/
1096 
1097 static NV_STATUS
_mmuWalkLevelInit(const MMU_WALK * pWalk,MMU_WALK_LEVEL * pParent,const MMU_FMT_LEVEL * pLevelFmt,MMU_WALK_LEVEL * pLevel)1098 _mmuWalkLevelInit
1099 (
1100     const MMU_WALK       *pWalk,
1101     MMU_WALK_LEVEL       *pParent,
1102     const MMU_FMT_LEVEL  *pLevelFmt,
1103     MMU_WALK_LEVEL       *pLevel
1104 )
1105 {
1106     // Init pointers.
1107     pLevel->pFmt    = pLevelFmt;
1108     pLevel->pParent = pParent;
1109 
1110     if (0 != pLevelFmt->numSubLevels)
1111     {
1112         NvU32       subLevel;
1113         const NvU32 size = pLevelFmt->numSubLevels * (NvU32)sizeof(*pLevel->subLevels);
1114 
1115         // Allocate sub-level array.
1116         pLevel->subLevels = portMemAllocNonPaged(size);
1117         if (pLevel->subLevels == NULL)
1118             return NV_ERR_NO_MEMORY;
1119 
1120         portMemSet(pLevel->subLevels, 0, size);
1121 
1122         // Recursively create each sub-level.
1123         for (subLevel = 0; subLevel < pLevelFmt->numSubLevels; ++subLevel)
1124         {
1125             NV_ASSERT_OK_OR_RETURN(
1126                 _mmuWalkLevelInit(pWalk, pLevel, pLevelFmt->subLevels + subLevel,
1127                                   pLevel->subLevels + subLevel));
1128         }
1129     }
1130 
1131     return NV_OK;
1132 }
1133 
1134 static void
_mmuWalkLevelDestroy(const MMU_WALK * pWalk,MMU_WALK_LEVEL * pLevel)1135 _mmuWalkLevelDestroy
1136 (
1137     const MMU_WALK *pWalk,
1138     MMU_WALK_LEVEL *pLevel
1139 )
1140 {
1141     NvU32 subLevel;
1142 
1143     if (NULL != pLevel->subLevels)
1144     {
1145         // Recursively destroy each sub-level.
1146         for (subLevel = 0; subLevel < pLevel->pFmt->numSubLevels; ++subLevel)
1147         {
1148             _mmuWalkLevelDestroy(pWalk, pLevel->subLevels + subLevel);
1149         }
1150         // Free sub-level array.
1151         portMemFree(pLevel->subLevels);
1152     }
1153 
1154     // All level instance memory should be freed already.
1155     NV_ASSERT(NULL == pLevel->pInstances);
1156 }
1157 
1158 /**
1159  * @brief      Resolve upcoming state conflicts before mmu walk operations
1160  *
1161  * @example    Say we are to mmuWalkMap VA range [vaLo, vaHi] on small PT.
1162  * Assume we have 4K PT and 64K PT as our small PT and big PT, and [vaLo, vaHi]
1163  * is a strict subset of VA range covered by BigPTE[1, 3] and SmallPTE[18, 61].
1164  * Let's say BigPTE[1, 3] are sparse right now.
1165  *
1166  * To resolve the conflict, we need to preserve sparse state for part of the
1167  * VA range that is not going to be mapped. We need to move those states from
1168  * BigPT to SmallPT.
1169  *
1170  * Before:
1171  *  BigPTE[1, 3]: sparse,   SmallPTE[16 - 63]: invalid
1172  *  (BigPTE[1, 3] and SmallPTE[16 - 63] are VA aligned)
1173  * After:
1174  *  BigPTE[1, 3]: invalid,  SmallPTE[16 - 17]: sparse
1175  *                          SmallPTE[18 - 61]: invalid, will later be mapped
1176  *                          SmallPTE[62 - 63]: sparse
1177  *
1178  * @example    If we are to mmuWalkMap on big PT instead of samll PT,
1179  * and sparse state was on small PT, we just need to invalidate the small PTEs.
1180  *
1181  * Before:
1182  *  BigPTE[1, 3]:       invalid,
1183  *  SmallPTE[16 - 63]:  sparse
1184  * After:
1185  *  BigPTE[1, 3]:       invalid, will later be mapped
1186  *  SmallPTE[16 - 63]:  invalid
1187  *
1188  * @return     NV_OK on success, no other values for now
1189  */
1190 static NV_STATUS NV_NOINLINE
_mmuWalkResolveSubLevelConflicts(const MMU_WALK * pWalk,const MMU_WALK_OP_PARAMS * pOpParams,MMU_WALK_LEVEL * pLevel,MMU_WALK_LEVEL_INST * pSubLevelInsts[],NvU32 subLevelIdx,NvU64 clippedVaLo,NvU64 clippedVaHi)1191 _mmuWalkResolveSubLevelConflicts
1192 (
1193     const MMU_WALK              *pWalk,
1194     const MMU_WALK_OP_PARAMS    *pOpParams,
1195     MMU_WALK_LEVEL              *pLevel,
1196     MMU_WALK_LEVEL_INST         *pSubLevelInsts[],
1197     NvU32                        subLevelIdx,
1198     NvU64                        clippedVaLo,
1199     NvU64                        clippedVaHi
1200 )
1201 {
1202     NvU32     i                             = 0;
1203     NvU32     progress                      = 0;
1204     NV_STATUS status                        = NV_OK;
1205     NvBool    bConflictLo                   = NV_FALSE;
1206     NvBool    bConflictHi                   = NV_FALSE;
1207     const MMU_FMT_LEVEL *pLevelFmtBig       = pLevel->subLevels[0].pFmt;
1208     const MMU_FMT_LEVEL *pLevelFmtSmall     = pLevel->subLevels[1].pFmt;
1209     MMU_WALK_LEVEL_INST *pLevelBigInst      = pSubLevelInsts[0];
1210     MMU_WALK_LEVEL_INST *pLevelSmallInst    = pSubLevelInsts[1];
1211     // Entry indicies for target page table
1212     NvU32 entryIndexLo, entryIndexHi;
1213     // Entry indicies involved in both page tables
1214     NvU32 indexLo_Small, indexHi_Small, indexLo_Big, indexHi_Big;
1215 
1216     if (0 == subLevelIdx)
1217     {
1218         entryIndexLo = mmuFmtVirtAddrToEntryIndex(pLevelFmtBig, clippedVaLo);
1219         entryIndexHi = mmuFmtVirtAddrToEntryIndex(pLevelFmtBig, clippedVaHi);
1220         indexLo_Big = entryIndexLo;
1221         indexHi_Big = entryIndexHi;
1222         mmuFmtCalcAlignedEntryIndices(pLevelFmtBig, indexLo_Big, indexHi_Big,
1223             pLevelFmtSmall, &indexLo_Small, &indexHi_Small);
1224     }
1225     else
1226     {
1227         entryIndexLo = mmuFmtVirtAddrToEntryIndex(pLevelFmtSmall, clippedVaLo);
1228         entryIndexHi = mmuFmtVirtAddrToEntryIndex(pLevelFmtSmall, clippedVaHi);
1229         mmuFmtCalcAlignedEntryIndices(pLevelFmtSmall, entryIndexLo,
1230             entryIndexHi, pLevelFmtBig, &indexLo_Big, &indexHi_Big);
1231         mmuFmtCalcAlignedEntryIndices(pLevelFmtBig, indexLo_Big, indexHi_Big,
1232             pLevelFmtSmall, &indexLo_Small, &indexHi_Small);
1233     }
1234 
1235     // check if involved Small PTEs need to be sparsified
1236     if (1 == subLevelIdx && NULL != pLevelSmallInst && NULL != pLevelBigInst)
1237     {
1238         // check lower part
1239         MMU_ENTRY_STATE entryStateBig;
1240         entryStateBig = mmuWalkGetEntryState(pLevelBigInst, indexLo_Big);
1241         bConflictLo = (MMU_ENTRY_STATE_SPARSE == entryStateBig);
1242 
1243         // check higher part
1244         entryStateBig = mmuWalkGetEntryState(pLevelBigInst, indexHi_Big);
1245         bConflictHi = (MMU_ENTRY_STATE_SPARSE == entryStateBig);
1246     }
1247 
1248     if (bConflictLo && entryIndexLo > indexLo_Small)
1249     {
1250         // sparsify lower range of entries
1251         pWalk->pCb->FillEntries(pWalk->pUserCtx, pLevelFmtSmall,
1252             pLevelSmallInst->pMemDesc, indexLo_Small, entryIndexLo - 1,
1253             MMU_WALK_FILL_SPARSE, &progress);
1254         NV_ASSERT_OR_RETURN(progress == entryIndexLo - indexLo_Small,
1255             NV_ERR_INVALID_STATE);
1256 
1257         for (i = indexLo_Small; i <= entryIndexLo - 1; i++)
1258         {
1259             mmuWalkSetEntryState(pLevelSmallInst, i, MMU_ENTRY_STATE_SPARSE);
1260         }
1261     }
1262 
1263     if (bConflictHi && entryIndexHi < indexHi_Small)
1264     {
1265         // sparsify higher range of entries
1266         pWalk->pCb->FillEntries(pWalk->pUserCtx, pLevelFmtSmall,
1267             pLevelSmallInst->pMemDesc, entryIndexHi + 1, indexHi_Small,
1268             MMU_WALK_FILL_SPARSE, &progress);
1269         NV_ASSERT_OR_RETURN(progress == indexHi_Small - entryIndexHi,
1270             NV_ERR_INVALID_STATE);
1271 
1272         for (i = entryIndexHi + 1; i <= indexHi_Small; i++)
1273         {
1274             mmuWalkSetEntryState(pLevelSmallInst, i, MMU_ENTRY_STATE_SPARSE);
1275         }
1276     }
1277 
1278     // invalidate the VA range in the other page table
1279     if (NULL != pLevelSmallInst && NULL != pLevelBigInst)
1280     {
1281         NvU32 indexLo_tmp, indexHi_tmp;
1282         const MMU_FMT_LEVEL *pSubLevelFmt;
1283         MMU_WALK_LEVEL_INST *pSubLevelInst;
1284 
1285         if (subLevelIdx == 0)
1286         {
1287             indexLo_tmp = indexLo_Small;
1288             indexHi_tmp = indexHi_Small;
1289             pSubLevelFmt = pLevelFmtSmall;
1290             pSubLevelInst = pLevelSmallInst;
1291         }
1292         else
1293         {
1294             indexLo_tmp = indexLo_Big;
1295             indexHi_tmp = indexHi_Big;
1296             pSubLevelFmt = pLevelFmtBig;
1297             pSubLevelInst = pLevelBigInst;
1298         }
1299 
1300         pWalk->pCb->FillEntries(pWalk->pUserCtx, pSubLevelFmt,
1301             pSubLevelInst->pMemDesc, indexLo_tmp, indexHi_tmp,
1302             MMU_WALK_FILL_INVALID, &progress);
1303         NV_ASSERT_OR_RETURN(progress == indexHi_tmp - indexLo_tmp + 1,
1304             NV_ERR_INVALID_STATE);
1305 
1306         for (i = indexLo_tmp; i <= indexHi_tmp; i++)
1307         {
1308             mmuWalkSetEntryState(pSubLevelInst, i, MMU_ENTRY_STATE_INVALID);
1309         }
1310     }
1311 
1312     return status;
1313 }
1314 
1315 /*!
1316  * Lazily allocates and initializes a level instance.
1317  */
1318 static NV_STATUS
_mmuWalkLevelInstAcquire(const MMU_WALK * pWalk,MMU_WALK_LEVEL * pLevel,const NvU64 vaLo,const NvU64 vaHi,const NvBool bTarget,const NvBool bRelease,const NvBool bCommit,NvBool * pBChanged,MMU_WALK_LEVEL_INST ** ppLevelInst,const NvBool bInitNv4k)1319 _mmuWalkLevelInstAcquire
1320 (
1321     const MMU_WALK       *pWalk,
1322     MMU_WALK_LEVEL       *pLevel,
1323     const NvU64           vaLo,
1324     const NvU64           vaHi,
1325     const NvBool          bTarget,
1326     const NvBool          bRelease,
1327     const NvBool          bCommit,
1328     NvBool               *pBChanged,
1329     MMU_WALK_LEVEL_INST **ppLevelInst,
1330     const NvBool          bInitNv4k
1331 )
1332 {
1333     NV_STATUS            status;
1334     MMU_WALK_MEMDESC    *pOldMem;
1335     NvU32                oldSize;
1336     MMU_WALK_LEVEL_INST *pLevelInst = NULL;
1337     NvBool               bNew       = NV_FALSE;
1338 
1339     // Lookup level instance.
1340     if (NV_OK != btreeSearch(vaLo, (NODE**)&pLevelInst, (NODE*)pLevel->pInstances))
1341     {
1342         NvU32 numBytes;
1343 
1344         if (!bTarget || bRelease)
1345         {
1346             // Skip missing non-target instances.
1347             *ppLevelInst = NULL;
1348             return NV_OK;
1349         }
1350 
1351         // We only call Commit() on already allocated page directory.
1352         NV_ASSERT_OR_RETURN(!bCommit, NV_ERR_INVALID_STATE);
1353 
1354         // Mark as newly allocated.
1355         bNew = NV_TRUE;
1356 
1357         // Allocate missing target instances.
1358         pLevelInst = portMemAllocNonPaged(sizeof(*pLevelInst));
1359         status = (pLevelInst == NULL) ? NV_ERR_NO_MEMORY : NV_OK;
1360         NV_ASSERT_OR_GOTO(NV_OK == status, done);
1361         portMemSet(pLevelInst, 0, sizeof(*pLevelInst));
1362 
1363         // Insert the new node into the tree of instances for this page level.
1364         pLevelInst->node.keyStart = mmuFmtLevelVirtAddrLo(pLevel->pFmt, vaLo);
1365         pLevelInst->node.keyEnd   = mmuFmtLevelVirtAddrHi(pLevel->pFmt, vaHi);
1366 
1367         status = btreeInsert(&pLevelInst->node, (NODE**)&pLevel->pInstances);
1368         NV_ASSERT_OR_GOTO(NV_OK == status, done);
1369 
1370         // Allocate entry tracker.
1371         numBytes = mmuFmtLevelEntryCount(pLevel->pFmt) * sizeof(MMU_ENTRY_INFO);
1372         pLevelInst->pStateTracker = portMemAllocNonPaged(numBytes);
1373         status = (pLevelInst->pStateTracker == NULL) ? NV_ERR_NO_MEMORY : NV_OK;
1374         NV_ASSERT_OR_GOTO(NV_OK == status, done);
1375         portMemSet(pLevelInst->pStateTracker, 0, numBytes);
1376         if (bInitNv4k)
1377         {
1378             NvU32 i;
1379             for (i = 0; i < mmuFmtLevelEntryCount(pLevel->pFmt); ++i)
1380             {
1381                  mmuWalkSetEntryState(pLevelInst, i, MMU_ENTRY_STATE_NV4K);
1382             }
1383         }
1384     }
1385 
1386     // Save original memory info.
1387     pOldMem = pLevelInst->pMemDesc;
1388     oldSize = pLevelInst->memSize;
1389 
1390     // Allocate (possibly reallocating) memory for this level instance.
1391     status = pWalk->pCb->LevelAlloc(pWalk->pUserCtx,
1392                                     pLevel->pFmt,
1393                                     mmuFmtLevelVirtAddrLo(pLevel->pFmt, vaLo),
1394                                     vaHi,
1395                                     bTarget,
1396                                     &pLevelInst->pMemDesc,
1397                                     &pLevelInst->memSize,
1398                                     pBChanged);
1399     NV_ASSERT_OR_GOTO(NV_OK == status, done);
1400 
1401     if (*pBChanged)
1402     {
1403         const NvU32 entryIndexLo = oldSize / pLevel->pFmt->entrySize;
1404         const NvU32 entryIndexHi = (pLevelInst->memSize / pLevel->pFmt->entrySize) - 1;
1405         NvU32       progress     = 0;
1406 
1407         //
1408         // default state for new entries
1409         // NV4K for big page table if ATS is enabled
1410         //
1411         MMU_WALK_FILL_STATE newEntryState = bInitNv4k ? MMU_WALK_FILL_NV4K :
1412                                                         MMU_WALK_FILL_INVALID;
1413 
1414         NV_ASSERT(NULL != pLevelInst->pMemDesc);
1415         NV_ASSERT(entryIndexLo <= entryIndexHi);
1416 
1417         // We only call Commit() on already allocated page directory.
1418         if (bCommit)
1419         {
1420             status = NV_ERR_INVALID_STATE;
1421             NV_ASSERT_OR_GOTO(NV_OK == status, done);
1422         }
1423 
1424         // Copy old entries from old to new.
1425         if (entryIndexLo > 0)
1426         {
1427             NV_ASSERT(NULL != pWalk->pCb->CopyEntries);
1428             pWalk->pCb->CopyEntries(pWalk->pUserCtx,
1429                                     pLevel->pFmt,
1430                                     pOldMem,
1431                                     pLevelInst->pMemDesc,
1432                                     0,
1433                                     entryIndexLo - 1,
1434                                     &progress);
1435             NV_ASSERT(progress == entryIndexLo);
1436 
1437             // Free old memory.
1438             pWalk->pCb->LevelFree(pWalk->pUserCtx, pLevel->pFmt,
1439                                   pLevelInst->node.keyStart, pOldMem);
1440         }
1441 
1442         if(pWalk->bInvalidateOnReserve)
1443         {
1444             // Clear new entries to invalid.
1445             pWalk->pCb->FillEntries(pWalk->pUserCtx,
1446                                     pLevel->pFmt,
1447                                     pLevelInst->pMemDesc,
1448                                     entryIndexLo,
1449                                     entryIndexHi,
1450                                     newEntryState,
1451                                     &progress);
1452             NV_ASSERT(progress == entryIndexHi - entryIndexLo + 1);
1453         }
1454     }
1455     else
1456     {
1457         // Ensure hasn't changed.
1458         NV_ASSERT(pOldMem == pLevelInst->pMemDesc && oldSize == pLevelInst->memSize);
1459     }
1460 
1461     // Commit return.
1462     *ppLevelInst = pLevelInst;
1463 
1464 done:
1465     // Cleanup newly allocated instance on failure.
1466     if (NV_OK != status &&
1467         bNew && NULL != pLevelInst)
1468     {
1469         _mmuWalkLevelInstRelease(pWalk, pLevel, pLevelInst);
1470     }
1471     return status;
1472 }
1473 
1474 /*!
1475  * Frees an unused level instance.
1476  */
1477 static void
_mmuWalkLevelInstRelease(const MMU_WALK * pWalk,MMU_WALK_LEVEL * pLevel,MMU_WALK_LEVEL_INST * pLevelInst)1478 _mmuWalkLevelInstRelease
1479 (
1480     const MMU_WALK      *pWalk,
1481     MMU_WALK_LEVEL      *pLevel,
1482     MMU_WALK_LEVEL_INST *pLevelInst
1483 )
1484 {
1485     NV_ASSERT(0 == pLevelInst->numValid);
1486     NV_ASSERT(0 == pLevelInst->numReserved);
1487     // Unlink.
1488     btreeUnlink(&pLevelInst->node, (NODE**)&pLevel->pInstances);
1489     // Free.
1490     if (NULL != pLevelInst->pMemDesc)
1491     {
1492         pWalk->pCb->LevelFree(pWalk->pUserCtx, pLevel->pFmt, pLevelInst->node.keyStart,
1493                               pLevelInst->pMemDesc);
1494     }
1495     portMemFree(pLevelInst->pStateTracker);
1496     portMemFree(pLevelInst);
1497 }
1498 
1499 /*!
1500  * This function is used to allocate a sublevel MMU_WALK_LEVEL_INST
1501  * for a given PDE. If the sublevel allocation succeeds, the parent Level is
1502  * updated.
1503  */
1504 static NV_STATUS NV_NOINLINE
_mmuWalkPdeAcquire(const MMU_WALK * pWalk,const MMU_WALK_OP_PARAMS * pOpParams,MMU_WALK_LEVEL * pLevel,MMU_WALK_LEVEL_INST * pLevelInst,const NvU32 entryIndex,const NvU32 subLevel,const NvU64 vaLo,const NvU64 vaHi,MMU_WALK_LEVEL_INST * pSubLevelInsts[])1505 _mmuWalkPdeAcquire
1506 (
1507     const MMU_WALK           *pWalk,
1508     const MMU_WALK_OP_PARAMS *pOpParams,
1509     MMU_WALK_LEVEL           *pLevel,
1510     MMU_WALK_LEVEL_INST      *pLevelInst,
1511     const NvU32               entryIndex,
1512     const NvU32               subLevel,
1513     const NvU64               vaLo,
1514     const NvU64               vaHi,
1515     MMU_WALK_LEVEL_INST      *pSubLevelInsts[]
1516 )
1517 {
1518     NV_STATUS               status  = NV_OK;
1519     NvBool                  bCommit = NV_FALSE;
1520     NvU32                   i;
1521     const MMU_WALK_MEMDESC *pSubMemDescs[MMU_FMT_MAX_SUB_LEVELS] = {0};
1522     NvU64                   vaLimit = vaHi;
1523     const NvU32             numSubLevels = pLevel->pFmt->numSubLevels;
1524     MMU_WALK_LEVEL_INST    *pCurSubLevelInsts[MMU_FMT_MAX_SUB_LEVELS] = {0};
1525 
1526     //
1527     // Determine minimum VA limit of existing sub-levels.
1528     // This is required to keep parallel partial page tables in sync.
1529     // MMU HW that supports partial size tables selects the size in the
1530     // parent PDE so each sub-level *MUST* be the same partial size
1531     // once allocated.
1532     //
1533     if (numSubLevels > 1)
1534     {
1535         for (i = 0; i < numSubLevels; ++i)
1536         {
1537             // Lookup sub-level instance.
1538             if (NV_OK == btreeSearch(vaLo, (NODE**)&pCurSubLevelInsts[i],
1539                                      (NODE*)pLevel->subLevels[i].pInstances))
1540             {
1541                 const MMU_FMT_LEVEL *pSubLevelFmt = pLevel->pFmt->subLevels + i;
1542                 const NvU64          minVaLimit =
1543                     mmuFmtLevelVirtAddrLo(pSubLevelFmt, vaLo) +
1544                         (pCurSubLevelInsts[i]->memSize /
1545                          pSubLevelFmt->entrySize *
1546                          mmuFmtLevelPageSize(pSubLevelFmt)) - 1;
1547 
1548                 vaLimit = NV_MAX(vaLimit, minVaLimit);
1549             }
1550         }
1551     }
1552 
1553     //
1554     // the loop was reversed for NV4K, if there are multiple sublevels
1555     // handling small PT first, then the big PT
1556     //
1557     for (i = numSubLevels; i > 0; --i)
1558     {
1559         NvBool bChanged = NV_FALSE;
1560         NvU32  subLevelIdx = i - 1;
1561         NvBool bTarget = (subLevelIdx == subLevel);
1562         NvBool bInitNv4k = NV_FALSE;
1563 
1564         //
1565         // If NV4K is required (when ATS is enabled), acquire 64K PT
1566         // whenever the 4K PT has been acquired and 64K PT was not
1567         // there
1568         //
1569         if (pWalk->flags.bAtsEnabled && subLevelIdx == 0 &&
1570             numSubLevels > 1 && !pOpParams->bRelease)
1571         {
1572             if (pSubLevelInsts[1] != NULL)
1573             {
1574                 bTarget = NV_TRUE;
1575             }
1576             if (pSubLevelInsts[0] == NULL)
1577             {
1578                 bInitNv4k = NV_TRUE;
1579             }
1580         }
1581 
1582         // Acquire sub-level instance.
1583         NV_ASSERT_OK_OR_RETURN(
1584             _mmuWalkLevelInstAcquire(pWalk, pLevel->subLevels + subLevelIdx,
1585                                      vaLo, vaLimit, bTarget,
1586                                      pOpParams->bRelease, pOpParams->bCommit,
1587                                      &bChanged, &pSubLevelInsts[subLevelIdx],
1588                                      bInitNv4k));
1589         if (NULL == pSubLevelInsts[subLevelIdx])
1590         {
1591             // Skip missing non-target instances.
1592             NV_ASSERT(pOpParams->bRelease || !bTarget);
1593             continue;
1594         }
1595 
1596         // Track info for commit.
1597         bCommit        |= bChanged;
1598         pSubMemDescs[subLevelIdx] = pSubLevelInsts[subLevelIdx]->pMemDesc;
1599     }
1600 
1601     // DEBUG assert
1602     if (pWalk->flags.bAtsEnabled &&
1603         numSubLevels > 1 &&
1604         pSubLevelInsts[1] != NULL &&
1605         pSubLevelInsts[0] == NULL)
1606     {
1607         NV_ASSERT(0);
1608     }
1609 
1610     if (bCommit || pOpParams->bCommit)
1611     {
1612         NvBool bDone;
1613 
1614         // Update the current pde
1615         bDone = pWalk->pCb->UpdatePde(pWalk->pUserCtx, pLevel->pFmt, pLevelInst->pMemDesc,
1616                                       entryIndex, pSubMemDescs);
1617         NV_ASSERT_OR_RETURN(bDone, NV_ERR_INVALID_STATE);
1618 
1619         // Track entry as a PDE.
1620         mmuWalkSetEntryState(pLevelInst, entryIndex, MMU_ENTRY_STATE_IS_PDE);
1621     }
1622 
1623     return status;
1624 }
1625 
1626 /*!
1627  * Frees the sub levels of the PDE passed in if thier refcount is 0. It
1628  * also clears the PDE if both sublevels are released.
1629  */
1630 static void NV_NOINLINE
_mmuWalkPdeRelease(const MMU_WALK * pWalk,const MMU_WALK_OP_PARAMS * pOpParams,MMU_WALK_LEVEL * pLevel,MMU_WALK_LEVEL_INST * pLevelInst,const NvU32 entryIndex,const NvU64 entryVaLo)1631 _mmuWalkPdeRelease
1632 (
1633     const MMU_WALK           *pWalk,
1634     const MMU_WALK_OP_PARAMS *pOpParams,
1635     MMU_WALK_LEVEL           *pLevel,
1636     MMU_WALK_LEVEL_INST      *pLevelInst,
1637     const NvU32               entryIndex,
1638     const NvU64               entryVaLo
1639 )
1640 {
1641     MMU_WALK_LEVEL_INST    *pSubLevelInsts[MMU_FMT_MAX_SUB_LEVELS] = {0};
1642     const MMU_WALK_MEMDESC *pSubMemDescs[MMU_FMT_MAX_SUB_LEVELS] = {0};
1643     NvBool                  bChanged = NV_FALSE;
1644     NvU32                   subLevel, i;
1645     MMU_ENTRY_STATE         state = MMU_ENTRY_STATE_INVALID;
1646 
1647     // Apply target state if this is a fill operation.
1648     if (pOpParams->bFill)
1649     {
1650         const MMU_FILL_TARGET *pTarget = (const MMU_FILL_TARGET *)pOpParams->pOpCtx;
1651         state = pTarget->entryState;
1652     }
1653 
1654     //
1655     // Loop through the sublevels and free up those with 0 ref count.
1656     // We operate on a temp copy of the PDE because we want to update the
1657     // PDE memory before releasing the actual sublevel pointers. We need this order
1658     // to prevent any state inconsistency between the parent MMU_DESC_PDE and
1659     // the sublevel MMU_WALK_LEVEL_INST structures.
1660     //
1661     for (i = pLevel->pFmt->numSubLevels; i > 0; --i)
1662     {
1663         subLevel = i - 1;
1664         if (NV_OK == btreeSearch(entryVaLo, (NODE**)&pSubLevelInsts[subLevel],
1665                                  (NODE*)pLevel->subLevels[subLevel].pInstances))
1666         {
1667             MMU_WALK_LEVEL_INST *pSubLevelInst = pSubLevelInsts[subLevel];
1668 
1669             // for ATS NV4K, check if we need to free the big page
1670             if (pLevel->pFmt->numSubLevels == 2 && subLevel == 0)
1671             {
1672                 if (pWalk->flags.bAtsEnabled)
1673                 {
1674                     if (pSubLevelInsts[0]->numNv4k ==
1675                             mmuFmtLevelEntryCount(pLevel->subLevels[0].pFmt) &&
1676                         (0 == pSubLevelInsts[0]->numReserved) &&
1677                         (pSubMemDescs[1] == NULL || bChanged == NV_TRUE))
1678                     {
1679                         bChanged = NV_TRUE;
1680                         continue;
1681                     }
1682                     else
1683                     {
1684                         state = MMU_ENTRY_STATE_IS_PDE;
1685                         pSubMemDescs[subLevel] = pSubLevelInst->pMemDesc;
1686                         continue;
1687                     }
1688                 }
1689             }
1690 
1691             if ((0 != (pSubLevelInst->numValid + pSubLevelInst->numSparse)) ||
1692                 (0 != (pSubLevelInst->numReserved + pSubLevelInst->numHybrid)))
1693             {
1694                 // We've got at least one non-empty sublevel, so leave it mapped.
1695                 state = MMU_ENTRY_STATE_IS_PDE;
1696                 pSubMemDescs[subLevel] = pSubLevelInst->pMemDesc;
1697             }
1698             else if (NULL != pSubLevelInst->pMemDesc)
1699             {
1700                 // We're going to free a sub-level.
1701                 bChanged = NV_TRUE;
1702             }
1703         }
1704     }
1705 
1706     //
1707     // Failure path may have aborted early before sub-levels processed,
1708     // so also check that current state matches expected.
1709     //
1710     bChanged |= (state != mmuWalkGetEntryState(pLevelInst, entryIndex));
1711 
1712     //
1713     // If we've changed any sublevel we need to update the PDE in the parent
1714     // Page Directory
1715     //
1716     if (bChanged)
1717     {
1718         NvBool bDone;
1719         NvU32  progress = 0;
1720 
1721         // Init the PDE attribs with the temp PDE which has the cleared sublevel
1722         switch (state)
1723         {
1724         case MMU_ENTRY_STATE_SPARSE:
1725         case MMU_ENTRY_STATE_INVALID:
1726             pWalk->pCb->FillEntries(pWalk->pUserCtx,
1727                                     pLevel->pFmt,
1728                                     pLevelInst->pMemDesc,
1729                                     entryIndex,
1730                                     entryIndex,
1731                                     MMU_ENTRY_STATE_SPARSE == state ?
1732                                         MMU_WALK_FILL_SPARSE : MMU_WALK_FILL_INVALID,
1733                                     &progress);
1734             NV_ASSERT_OR_RETURN_VOID(progress == 1);
1735             // Clear the hybrid flag since all sub-levels are now released.
1736             if (pLevelInst->pStateTracker[entryIndex].bHybrid)
1737             {
1738                 mmuWalkSetEntryHybrid(pLevelInst, entryIndex, NV_FALSE);
1739             }
1740             break;
1741         case MMU_ENTRY_STATE_IS_PDE:
1742             bDone = pWalk->pCb->UpdatePde(pWalk->pUserCtx, pLevel->pFmt, pLevelInst->pMemDesc,
1743                                           entryIndex, pSubMemDescs);
1744             NV_ASSERT_OR_RETURN_VOID(bDone);
1745             break;
1746         default:
1747             NV_ASSERT_OR_RETURN_VOID(0);
1748         }
1749 
1750         // Track new state of entry.
1751         mmuWalkSetEntryState(pLevelInst, entryIndex, state);
1752     }
1753 
1754     // Free up the actual sublevels from the PDE
1755     for (subLevel = 0; subLevel < pLevel->pFmt->numSubLevels; ++subLevel)
1756     {
1757         MMU_WALK_LEVEL_INST *pSubLevelInst = pSubLevelInsts[subLevel];
1758         if (NULL != pSubLevelInst &&
1759             NULL == pSubMemDescs[subLevel])
1760         {
1761             _mmuWalkLevelInstRelease(pWalk, pLevel->subLevels + subLevel,
1762                                       pSubLevelInst);
1763         }
1764     }
1765 }
1766 
1767 static void
_mmuWalkLevelInstancesForceFree(MMU_WALK * pWalk,MMU_WALK_LEVEL * pLevel)1768 _mmuWalkLevelInstancesForceFree
1769 (
1770     MMU_WALK       *pWalk,
1771     MMU_WALK_LEVEL *pLevel
1772 )
1773 {
1774     MMU_WALK_LEVEL_INST *pLevelInst = NULL;
1775     NvU32                subLevel;
1776 
1777     if (NULL == pLevel)
1778         return;
1779 
1780     // Free all instances at this level.
1781     btreeEnumStart(0, (NODE **)&pLevelInst, (NODE*)pLevel->pInstances);
1782     while (NULL != pLevelInst)
1783     {
1784         //
1785         // Since we are force freeing everything, it is okay to reset these fields
1786         // in order to avoid hitting asserts in _mmuWalkLevelInstRelease.
1787         //
1788         pLevelInst->numValid    = 0;
1789         pLevelInst->numReserved = 0;
1790         _mmuWalkLevelInstRelease(pWalk, pLevel, pLevelInst);
1791         btreeEnumStart(0, (NODE **)&pLevelInst, (NODE*)pLevel->pInstances);
1792     }
1793     pLevel->pInstances = NULL;
1794 
1795     if (NULL != pLevel->subLevels)
1796     {
1797         for (subLevel = 0; subLevel < pLevel->pFmt->numSubLevels; subLevel++)
1798         {
1799             _mmuWalkLevelInstancesForceFree(pWalk, pLevel->subLevels + subLevel);
1800         }
1801     }
1802 }
1803 
1804