1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2014-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 /*! 25 * @file 26 * @brief Describes the structures and interfaces used to walk N level page tables 27 */ 28 29 /*--------------------------------Includes------------------------------------*/ 30 #if defined(SRT_BUILD) 31 32 #include "shrdebug.h" 33 #else 34 #include "os/os.h" 35 #endif 36 #include "nvport/nvport.h" 37 #include "nvctassert.h" 38 #include "mmu_walk_private.h" 39 40 /*--------------------------Static Function Prototypes------------------------*/ 41 static NV_STATUS 42 _mmuWalkLevelInit(const MMU_WALK *pWalk, MMU_WALK_LEVEL *pParent, 43 const MMU_FMT_LEVEL *pLevelFmt, MMU_WALK_LEVEL *pLevel); 44 static void 45 _mmuWalkLevelDestroy(const MMU_WALK *pWalk, MMU_WALK_LEVEL *pLevel); 46 static NV_STATUS 47 _mmuWalkLevelInstAcquire(const MMU_WALK *pWalk, MMU_WALK_LEVEL *pLevel, 48 const NvU64 vaLo, const NvU64 vaHi, const NvBool bTarget, 49 const NvBool bRelease, const NvBool bCommit, 50 NvBool *pBChanged, MMU_WALK_LEVEL_INST **ppLevelInst, 51 const NvBool bInitNv4k); 52 static void 53 _mmuWalkLevelInstRelease(const MMU_WALK *pWalk, MMU_WALK_LEVEL *pLevel, 54 MMU_WALK_LEVEL_INST *pLevelInst); 55 static NV_STATUS NV_NOINLINE 56 _mmuWalkPdeAcquire(const MMU_WALK *pWalk, const MMU_WALK_OP_PARAMS *pOpParams, 57 MMU_WALK_LEVEL *pLevel, MMU_WALK_LEVEL_INST *pLevelInst, 58 const NvU32 entryIndex, const NvU32 subLevel, 59 const NvU64 vaLo, const NvU64 vaHi, 60 MMU_WALK_LEVEL_INST *pSubLevelInsts[]); 61 static void NV_NOINLINE 62 _mmuWalkPdeRelease(const MMU_WALK *pWalk, const MMU_WALK_OP_PARAMS *pOpParams, 63 MMU_WALK_LEVEL *pLevel, MMU_WALK_LEVEL_INST *pLevelInst, 64 const NvU32 entryIndex, const NvU64 entryVaLo); 65 static NV_STATUS NV_NOINLINE 66 _mmuWalkResolveSubLevelConflicts(const MMU_WALK *pWalk, const MMU_WALK_OP_PARAMS *pOpParams, 67 MMU_WALK_LEVEL *pLevel, MMU_WALK_LEVEL_INST *pSubLevelInsts[], 68 NvU32 subLevel, NvU64 clippedVaLo, NvU64 clippedVaHi); 69 static void 70 _mmuWalkLevelInstancesForceFree(MMU_WALK *pWalk, MMU_WALK_LEVEL *pLevel); 71 72 /* -----------------------------Inline Functions----------------------------- */ 73 /*! 74 Returns the @ref MMU_ENTRY_STATE of the entry. 75 */ 76 MMU_ENTRY_STATE 77 mmuWalkGetEntryState(MMU_WALK_LEVEL_INST *pLevelInst, NvU32 entryIndex) 78 { 79 return (MMU_ENTRY_STATE)pLevelInst->pStateTracker[entryIndex].state; 80 } 81 82 /*----------------------------Public Functions--------------------------------*/ 83 84 NV_STATUS 85 mmuWalkCreate 86 ( 87 const MMU_FMT_LEVEL *pRootFmt, 88 MMU_WALK_USER_CTX *pUserCtx, 89 const MMU_WALK_CALLBACKS *pCb, 90 const MMU_WALK_FLAGS flags, 91 MMU_WALK **ppWalk, 92 MMU_WALK_MEMDESC *pStagingBuffer 93 ) 94 { 95 NV_STATUS status = NV_OK; 96 MMU_WALK *pWalk = NULL; 97 98 NV_ASSERT_OR_RETURN(NULL != pRootFmt, NV_ERR_INVALID_ARGUMENT); 99 NV_ASSERT_OR_RETURN(NULL != pCb, NV_ERR_INVALID_ARGUMENT); 100 NV_ASSERT_OR_RETURN(NULL != ppWalk, NV_ERR_INVALID_ARGUMENT); 101 102 // Alloc and init walker structure. 103 pWalk = portMemAllocNonPaged(sizeof(*pWalk)); 104 status = (pWalk == NULL) ? NV_ERR_NO_MEMORY : NV_OK; 105 NV_ASSERT_OR_GOTO(NV_OK == status, done); 106 portMemSet(pWalk, 0, sizeof(*pWalk)); 107 108 pWalk->pUserCtx = pUserCtx; 109 pWalk->pCb = pCb; 110 pWalk->flags = flags; 111 pWalk->pStagingBuffer = pStagingBuffer; 112 pWalk->bUseStagingBuffer = NV_FALSE; 113 pWalk->bInvalidateOnReserve = NV_TRUE; 114 115 // Create level hierarchy. 116 status = _mmuWalkLevelInit(pWalk, NULL, pRootFmt, &pWalk->root); 117 NV_ASSERT_OR_GOTO(NV_OK == status, done); 118 119 // Commit. 120 *ppWalk = pWalk; 121 122 done: 123 if (NV_OK != status) 124 { 125 mmuWalkDestroy(pWalk); 126 } 127 return status; 128 } 129 130 void 131 mmuWalkDestroy 132 ( 133 MMU_WALK *pWalk 134 ) 135 { 136 if (NULL != pWalk) 137 { 138 // Destroy level hierarchy. 139 _mmuWalkLevelDestroy(pWalk, &pWalk->root); 140 141 // Free walker struct. 142 portMemFree(pWalk); 143 } 144 } 145 146 NV_STATUS 147 mmuWalkContinue 148 ( 149 MMU_WALK *pWalk 150 ) 151 { 152 return NV_ERR_NOT_SUPPORTED; 153 } 154 155 void 156 mmuWalkCommit 157 ( 158 MMU_WALK *pWalk 159 ) 160 { 161 // TODO 162 } 163 164 MMU_WALK_USER_CTX * 165 mmuWalkGetUserCtx 166 ( 167 const MMU_WALK *pWalk 168 ) 169 { 170 return pWalk->pUserCtx; 171 } 172 173 void 174 mmuWalkSetUserCtx 175 ( 176 MMU_WALK *pWalk, 177 MMU_WALK_USER_CTX *pUserCtx 178 ) 179 { 180 pWalk->pUserCtx = pUserCtx; 181 } 182 183 const MMU_WALK_CALLBACKS * 184 mmuWalkGetCallbacks 185 ( 186 const MMU_WALK *pWalk 187 ) 188 { 189 return pWalk->pCb; 190 } 191 192 void 193 mmuWalkSetCallbacks 194 ( 195 MMU_WALK *pWalk, 196 const MMU_WALK_CALLBACKS *pCb 197 ) 198 { 199 pWalk->pCb = pCb; 200 } 201 202 void 203 mmuWalkLevelInstancesForceFree 204 ( 205 MMU_WALK *pWalk 206 ) 207 { 208 if (pWalk != NULL) 209 { 210 _mmuWalkLevelInstancesForceFree(pWalk, &pWalk->root); 211 } 212 } 213 214 /*----------------------------Private Functions--------------------------------*/ 215 216 const MMU_WALK_LEVEL * 217 mmuWalkFindLevel 218 ( 219 const MMU_WALK *pWalk, 220 const MMU_FMT_LEVEL *pLevelFmt 221 ) 222 { 223 const MMU_WALK_LEVEL *pLevel = &pWalk->root; 224 while (pLevel->pFmt != pLevelFmt) 225 { 226 NvU32 subLevel; 227 // Single sub-level always continues. 228 if (1 == pLevel->pFmt->numSubLevels) 229 { 230 pLevel = pLevel->subLevels; 231 continue; 232 } 233 // Multi sub-level must pick branch based on target. 234 for (subLevel = 0; subLevel < pLevel->pFmt->numSubLevels; ++subLevel) 235 { 236 if ((pLevel->pFmt->subLevels + subLevel) == pLevelFmt) 237 { 238 return pLevel->subLevels + subLevel; 239 } 240 } 241 // Nothing found. 242 return NULL; 243 } 244 return pLevel; 245 } 246 247 /*! 248 * @brief This function traverses the topology described by @ref 249 * MMU_FMT_LEVEL and @ref MMU_DESC_PDE. The @ref MmuOpFunc 250 * opFunc implements the actions needed to be perfomed at each 251 * sublevel in the recursion. 252 * 253 * @param[in] vaLo The lower end of the Virtual Address range that is 254 * being processed. 255 * @param[in] vaHi The upper end of the Virtual Address range that is 256 * being processed 257 * 258 * @return NV_OK if processing this level succeeds. 259 * Other errors, if not. 260 */ 261 NV_STATUS mmuWalkProcessPdes 262 ( 263 const MMU_WALK *pWalk, 264 const MMU_WALK_OP_PARAMS *pOpParams, 265 MMU_WALK_LEVEL *pLevel, 266 MMU_WALK_LEVEL_INST *pLevelInst, 267 NvU64 vaLo, 268 NvU64 vaHi 269 ) 270 { 271 272 if (pWalk->flags.bUseIterative) 273 { 274 // Iterative MMU Walker Implementation 275 NV_STATUS status = NV_OK; 276 NV_ASSERT_OR_RETURN(pOpParams != NULL, NV_ERR_INVALID_ARGUMENT); 277 278 // Call opFunc inititially to see if we need to walk 279 status = pOpParams->opFunc(pWalk, 280 pOpParams, 281 pLevel, 282 pLevelInst, 283 vaLo, 284 vaHi); 285 286 // 287 // If NV_ERR_MORE_PROCESSING_REQUIRED is returned above, 288 // the recursive MMU Walker would have started recursing down, 289 // so here we kick off the iteration. 290 // If NV_OK is returned above, the recursive MMU Walker would 291 // not recurse at all, so return immediately. 292 // 293 if (NV_ERR_MORE_PROCESSING_REQUIRED == status) 294 { 295 status = NV_OK; 296 297 NvU64 vaLevelBase = mmuFmtLevelVirtAddrLo(pLevel->pFmt, vaLo); 298 NvU32 entryIndexLo = mmuFmtVirtAddrToEntryIndex(pLevel->pFmt, vaLo); 299 NvU32 entryIndexHi = mmuFmtVirtAddrToEntryIndex(pLevel->pFmt, vaHi); 300 NvU32 entryIndex; 301 NvU32 index; 302 NvU32 entryIndexFillStart = 0; 303 NvU32 entryIndexFillEnd; 304 NvU32 pendingFillCount = 0; 305 306 // Declarations for mmuWalk recursion conversion 307 MMU_WALK_PROCESS_PDES_ENTRY *pProcessPdeEntry; 308 MMU_WALK_RELEASE_PDES_ENTRY *pReleasePdeEntry; 309 PROCESS_PDES_STACK processPdesStack; 310 RELEASE_PDES_STACK releasePdesStack; 311 listInit(&processPdesStack, portMemAllocatorGetGlobalNonPaged()); 312 listInit(&releasePdesStack, portMemAllocatorGetGlobalNonPaged()); 313 314 // 315 // Walk over each relevant entry (PDE) within this Page Level 316 // Do one initial loop to kick off iteration 317 // Add entries in reverse order because processPdesStack is a stack 318 // 319 for (entryIndex = entryIndexHi; entryIndex >= entryIndexLo; entryIndex--) 320 { 321 pProcessPdeEntry = listPrependNew(&processPdesStack); 322 if (pProcessPdeEntry == NULL) 323 { 324 status = NV_ERR_NO_MEMORY; 325 NV_ASSERT_OR_GOTO(0, cleanupIter); 326 } 327 328 // 329 // The values pushed to the stack must NOT be pointers to variables on the stack 330 // All of these are simple values or pointers to a variable allocated by a function 331 // calling the MMU Walker. 332 // 333 pProcessPdeEntry->pLevel = pLevel; 334 pProcessPdeEntry->pLevelInst = pLevelInst; 335 pProcessPdeEntry->vaLo = vaLo; 336 pProcessPdeEntry->vaHi = vaHi; 337 pProcessPdeEntry->vaLevelBase = vaLevelBase; 338 pProcessPdeEntry->entryIndexHi = entryIndexHi; 339 pProcessPdeEntry->entryIndex = entryIndex; 340 341 // Prevent underflow because of adding entries in reverse order 342 if (entryIndex == 0) break; 343 } 344 345 while ((pProcessPdeEntry = listHead(&processPdesStack)) != NULL) 346 { 347 pLevel = pProcessPdeEntry->pLevel; 348 pLevelInst = pProcessPdeEntry->pLevelInst; 349 vaLo = pProcessPdeEntry->vaLo; 350 vaHi = pProcessPdeEntry->vaHi; 351 vaLevelBase = pProcessPdeEntry->vaLevelBase; 352 entryIndexHi = pProcessPdeEntry->entryIndexHi; 353 entryIndex = pProcessPdeEntry->entryIndex; 354 355 listRemove(&processPdesStack, pProcessPdeEntry); 356 357 const NvU64 entryVaLo = mmuFmtEntryIndexVirtAddrLo(pLevel->pFmt, 358 vaLevelBase, entryIndex); 359 const NvU64 entryVaHi = mmuFmtEntryIndexVirtAddrHi(pLevel->pFmt, 360 vaLevelBase, entryIndex); 361 const NvU64 clippedVaLo = NV_MAX(vaLo, entryVaLo); 362 const NvU64 clippedVaHi = NV_MIN(vaHi, entryVaHi); 363 const MMU_ENTRY_STATE currEntryState = mmuWalkGetEntryState(pLevelInst, entryIndex); 364 NvU32 subLevel = 0; 365 MMU_WALK_LEVEL_INST *pSubLevelInsts[MMU_FMT_MAX_SUB_LEVELS] = {0}; 366 367 // Optimizations for release operations. 368 if (pOpParams->bRelease) 369 { 370 // Skip this entry if it is neither a PDE nor marked as a hybrid entry. 371 if ((MMU_ENTRY_STATE_IS_PDE != currEntryState) && 372 !pLevelInst->pStateTracker[entryIndex].bHybrid) 373 continue; 374 } 375 376 // Optimizations for fill operations. 377 if (pOpParams->bFill) 378 { 379 const MMU_FILL_TARGET *pTarget = (const MMU_FILL_TARGET *) pOpParams->pOpCtx; 380 381 if (pendingFillCount == 0) 382 entryIndexFillStart = entryIndexFillEnd = entryIndex; 383 384 // 385 // Check if the entire entry's coverage is being filled to 386 // a constant state. 387 // 388 // If this entry is not currently a PDE we can 389 // apply the fill operation directly 390 // at this level and avoid "splitting" the PDE. 391 // 392 // If this entry is currently a PDE we must 393 // clear the entries of the lower levels to free 394 // unused level instances. 395 // 396 if ((pTarget->entryState != currEntryState) && 397 (MMU_ENTRY_STATE_IS_PDE != currEntryState) && 398 (entryVaLo == clippedVaLo) && 399 (entryVaHi == clippedVaHi)) 400 { 401 entryIndexFillEnd = entryIndex; 402 pendingFillCount++; 403 404 // Not the last iteration, keep batching.. 405 if (entryIndex < entryIndexHi) 406 continue; 407 } 408 409 if (pendingFillCount != 0) 410 { 411 NvU32 progress = 0; 412 413 // Flush pending fills 414 pWalk->pCb->FillEntries(pWalk->pUserCtx, 415 pLevel->pFmt, 416 pLevelInst->pMemDesc, 417 entryIndexFillStart, 418 entryIndexFillEnd, 419 pTarget->fillState, 420 &progress); 421 422 if (progress != (entryIndexFillEnd - entryIndexFillStart + 1)) 423 { 424 status = NV_ERR_INVALID_STATE; 425 NV_ASSERT_OR_GOTO(0, cleanupIter); 426 } 427 428 for (index = entryIndexFillStart; index <= entryIndexFillEnd; index++) 429 mmuWalkSetEntryState(pLevelInst, index, pTarget->entryState); 430 431 pendingFillCount = 0; 432 } 433 434 // Recheck the state after fill. If nothing to do, continue.. 435 if (pTarget->entryState == mmuWalkGetEntryState(pLevelInst, entryIndex)) 436 continue; 437 438 } // End of fill optimizations. 439 440 // Determine the sublevel we need to operate on. 441 status = pOpParams->selectSubLevel(pOpParams->pOpCtx, 442 pLevel, 443 &subLevel, 444 clippedVaLo, 445 clippedVaHi); 446 NV_ASSERT_OR_GOTO(NV_OK == status, cleanupIter); 447 448 // 449 // Allocate the sublevel instances for the current PDE and update the current 450 // Page Dir (i.e. write the PDE into the Page Dir) if needed. 451 // 452 status = _mmuWalkPdeAcquire(pWalk, 453 pOpParams, 454 pLevel, 455 pLevelInst, 456 entryIndex, 457 subLevel, 458 clippedVaLo, 459 clippedVaHi, 460 pSubLevelInsts); 461 NV_ASSERT_OR_GOTO(NV_OK == status, cleanupIter); 462 463 // Release op is done if the target sub-level is absent. 464 if (pOpParams->bRelease && (NULL == pSubLevelInsts[subLevel])) 465 { 466 continue; 467 } 468 469 // 470 // Split sparse PDE's range. 471 // When only a subrange of the original PDE's VA range is being operated 472 // on we sparsify the remaining range lying outside the operational 473 // subrange (clippedVaLo to clippedVaHi) 474 // 475 if (MMU_ENTRY_STATE_SPARSE == currEntryState) 476 { 477 // 478 // Sparsify the lower part of the VA range that outside the operational 479 // subrange. 480 // 481 if (clippedVaLo > entryVaLo) 482 { 483 status = mmuWalkProcessPdes(pWalk, 484 &g_opParamsSparsify, 485 pLevel->subLevels + subLevel, 486 pSubLevelInsts[subLevel], 487 entryVaLo, 488 clippedVaLo - 1); 489 NV_ASSERT_OR_GOTO(NV_OK == status, cleanupIter); 490 } 491 492 // 493 // Sparsify the upper part of the VA range that is outside the operational 494 // subrange. 495 // 496 if (clippedVaHi < entryVaHi) 497 { 498 status = mmuWalkProcessPdes(pWalk, 499 &g_opParamsSparsify, 500 pLevel->subLevels + subLevel, 501 pSubLevelInsts[subLevel], 502 clippedVaHi + 1, 503 entryVaHi); 504 NV_ASSERT_OR_GOTO(NV_OK == status, cleanupIter); 505 } 506 } // Sparse PDE split 507 508 // Resolve potential conflicts in multiple sized page tables 509 if (pLevel->pFmt->numSubLevels != 1 && 510 !pOpParams->bIgnoreSubLevelConflicts) 511 { 512 status = _mmuWalkResolveSubLevelConflicts(pWalk, 513 pOpParams, 514 pLevel, 515 pSubLevelInsts, 516 subLevel, 517 clippedVaLo, 518 clippedVaHi); 519 NV_ASSERT_OR_GOTO(NV_OK == status, cleanupIter); 520 } 521 522 status = pOpParams->opFunc(pWalk, 523 pOpParams, 524 pLevel->subLevels + subLevel, 525 pSubLevelInsts[subLevel], 526 clippedVaLo, 527 clippedVaHi); 528 529 if (NV_ERR_MORE_PROCESSING_REQUIRED == status) 530 { 531 // 532 // If NV_ERR_MORE_PROCESSING_REQUIRED is returned above, 533 // the recursive MMU Walker would have recursed down one 534 // more level. In this code block, we keep the iteration 535 // going by doing everything the recursion previously did. 536 // 537 status = NV_OK; 538 pReleasePdeEntry = listPrependNew(&releasePdesStack); 539 if (pReleasePdeEntry == NULL) 540 { 541 status = NV_ERR_NO_MEMORY; 542 NV_ASSERT_OR_GOTO(0, cleanupIter); 543 } 544 545 // 546 // Queue the current level for pdeRelease so that pdeRelease 547 // can be called AFTER exploring the current level's sublevels. 548 // 549 pReleasePdeEntry->pLevel = pLevel; 550 pReleasePdeEntry->pLevelInst = pLevelInst; 551 pReleasePdeEntry->entryVaLo = entryVaLo; 552 pReleasePdeEntry->entryIndexHi = entryIndexHi; 553 pReleasePdeEntry->entryIndex = entryIndex; 554 555 // 556 // Here use variables that would be used in the next recursion downwards. 557 // Calculate new vaLevelBase, entryIndexLo, entryIndexHi, entryIndex 558 // 559 vaLevelBase = mmuFmtLevelVirtAddrLo((pLevel->subLevels + subLevel)->pFmt, clippedVaLo); 560 entryIndexLo = mmuFmtVirtAddrToEntryIndex((pLevel->subLevels + subLevel)->pFmt, clippedVaLo); 561 entryIndexHi = mmuFmtVirtAddrToEntryIndex((pLevel->subLevels + subLevel)->pFmt, clippedVaHi); 562 563 for (entryIndex = entryIndexHi; entryIndex >= entryIndexLo; entryIndex--) 564 { 565 pProcessPdeEntry = listPrependNew(&processPdesStack); 566 if (pProcessPdeEntry == NULL) 567 { 568 status = NV_ERR_NO_MEMORY; 569 NV_ASSERT_OR_GOTO(0, cleanupIter); 570 } 571 572 pProcessPdeEntry->pLevel = pLevel->subLevels + subLevel; 573 pProcessPdeEntry->pLevelInst = pSubLevelInsts[subLevel]; 574 pProcessPdeEntry->vaLo = clippedVaLo; 575 pProcessPdeEntry->vaHi = clippedVaHi; 576 pProcessPdeEntry->vaLevelBase = vaLevelBase; 577 pProcessPdeEntry->entryIndexHi = entryIndexHi; 578 pProcessPdeEntry->entryIndex = entryIndex; 579 580 if (entryIndex == 0) break; 581 } 582 } 583 else if (NV_OK == status) 584 { 585 // 586 // If NV_OK is returned above, the recursive MMU Walker would have reached 587 // the target format level and so reached the base case of its recursion. 588 // It would then return from recursive function calls an call pdeRelease 589 // for all levels whose sublevels are done being processed. 590 // 591 592 // PdeRelease itself immediately since this level does not recurse. 593 _mmuWalkPdeRelease(pWalk, 594 pOpParams, 595 pLevel, 596 pLevelInst, 597 entryIndex, 598 entryVaLo); 599 600 // 601 // If this is the last processed sublevel of a level, pdeRelease the level. 602 // Continue doing so for all parent levels. 603 // 604 while (entryIndex == entryIndexHi) 605 { 606 if ((pReleasePdeEntry = listHead(&releasePdesStack)) != NULL) 607 { 608 // Extract variables for the next loop around. 609 entryIndexHi = pReleasePdeEntry->entryIndexHi; 610 entryIndex = pReleasePdeEntry->entryIndex; 611 612 _mmuWalkPdeRelease(pWalk, 613 pOpParams, 614 pReleasePdeEntry->pLevel, 615 pReleasePdeEntry->pLevelInst, 616 pReleasePdeEntry->entryIndex, 617 pReleasePdeEntry->entryVaLo); 618 619 listRemove(&releasePdesStack, pReleasePdeEntry); 620 } 621 else 622 { 623 break; 624 } 625 } 626 } 627 else 628 { 629 // Stop processing PDEs if we are in error state. 630 goto cleanupIter; 631 } 632 } // per entry loop 633 634 635 if (listHead(&processPdesStack) != NULL) 636 { 637 // 638 // If this assertion fails, it is a result of a programming 639 // error in the iterative MMU Walker implementation. 640 // 641 status = NV_ERR_INVALID_STATE; 642 NV_ASSERT_OR_GOTO(0, cleanupIter); 643 } 644 645 // 646 // Note that if releasePdesStack is not empty at this point, 647 // we hit an empty sublevel, but we still need to pdeRelease 648 // the parent sublevels in cleanup below. 649 // 650 651 // Temporarily change the name of this label to avoid conflicting with other "cleanup" 652 cleanupIter: 653 654 // 655 // In the recrusive MMU Walker, when a sublevel failed, that level would pdeRelease, 656 // return to the parent, and the parent would pdeRelease and return to its parent and so on. 657 // Here emulate that and pdeRelease all parents. 658 // 659 660 while ((pReleasePdeEntry = listHead(&releasePdesStack)) != NULL) 661 { 662 _mmuWalkPdeRelease(pWalk, 663 pOpParams, 664 pReleasePdeEntry->pLevel , 665 pReleasePdeEntry->pLevelInst, 666 pReleasePdeEntry->entryIndex, 667 pReleasePdeEntry->entryVaLo); 668 669 listRemove(&releasePdesStack, pReleasePdeEntry); 670 } 671 672 listDestroy(&processPdesStack); 673 listDestroy(&releasePdesStack); 674 675 676 } 677 return status; 678 } 679 else 680 { 681 // Recursive MMU Walker Implementation 682 NV_STATUS status = NV_OK; 683 NvU64 vaLevelBase = mmuFmtLevelVirtAddrLo(pLevel->pFmt, vaLo); 684 NvU32 entryIndexLo = mmuFmtVirtAddrToEntryIndex(pLevel->pFmt, vaLo); 685 NvU32 entryIndexHi = mmuFmtVirtAddrToEntryIndex(pLevel->pFmt, vaHi); 686 NvU32 entryIndex; 687 NvU32 index; 688 NvU32 entryIndexFillStart = 0; 689 NvU32 entryIndexFillEnd; 690 NvU32 pendingFillCount = 0; 691 692 NV_ASSERT_OR_RETURN(NULL != pOpParams, NV_ERR_INVALID_ARGUMENT); 693 694 // Walk over each relevant entry (PDE) within this Page Level 695 for (entryIndex = entryIndexLo; entryIndex <= entryIndexHi; entryIndex++) 696 { 697 const NvU64 entryVaLo = mmuFmtEntryIndexVirtAddrLo(pLevel->pFmt, 698 vaLevelBase, entryIndex); 699 const NvU64 entryVaHi = mmuFmtEntryIndexVirtAddrHi(pLevel->pFmt, 700 vaLevelBase, entryIndex); 701 const NvU64 clippedVaLo = NV_MAX(vaLo, entryVaLo); 702 const NvU64 clippedVaHi = NV_MIN(vaHi, entryVaHi); 703 const MMU_ENTRY_STATE currEntryState = mmuWalkGetEntryState(pLevelInst, entryIndex); 704 NvU32 subLevel = 0; 705 MMU_WALK_LEVEL_INST *pSubLevelInsts[MMU_FMT_MAX_SUB_LEVELS] = {0}; 706 707 // Optimizations for release operations. 708 if (pOpParams->bRelease) 709 { 710 // Skip this entry if it is neither a PDE nor marked as a hybrid entry. 711 if ((MMU_ENTRY_STATE_IS_PDE != currEntryState) && 712 !pLevelInst->pStateTracker[entryIndex].bHybrid) 713 continue; 714 } 715 716 // Optimizations for fill operations. 717 if (pOpParams->bFill) 718 { 719 const MMU_FILL_TARGET *pTarget = (const MMU_FILL_TARGET *) pOpParams->pOpCtx; 720 721 if (pendingFillCount == 0) 722 entryIndexFillStart = entryIndexFillEnd = entryIndex; 723 724 // 725 // Check if the entire entry's coverage is being filled to 726 // a constant state. 727 // 728 // If this entry is not currently a PDE we can 729 // apply the fill operation directly 730 // at this level and avoid "splitting" the PDE. 731 // 732 // If this entry is currently a PDE we must 733 // clear the entries of the lower levels to free 734 // unused level instances. 735 // 736 if ((pTarget->entryState != currEntryState) && 737 (MMU_ENTRY_STATE_IS_PDE != currEntryState) && 738 (entryVaLo == clippedVaLo) && 739 (entryVaHi == clippedVaHi)) 740 { 741 entryIndexFillEnd = entryIndex; 742 pendingFillCount++; 743 744 // Not the last iteration, keep batching.. 745 if (entryIndex < entryIndexHi) 746 continue; 747 } 748 749 if (pendingFillCount != 0) 750 { 751 NvU32 progress = 0; 752 753 // Flush pending fills 754 pWalk->pCb->FillEntries(pWalk->pUserCtx, 755 pLevel->pFmt, 756 pLevelInst->pMemDesc, 757 entryIndexFillStart, 758 entryIndexFillEnd, 759 pTarget->fillState, 760 &progress); 761 762 NV_ASSERT_OR_RETURN( 763 progress == (entryIndexFillEnd - entryIndexFillStart + 1), 764 NV_ERR_INVALID_STATE); 765 766 for (index = entryIndexFillStart; index <= entryIndexFillEnd; index++) 767 mmuWalkSetEntryState(pLevelInst, index, pTarget->entryState); 768 769 pendingFillCount = 0; 770 } 771 772 // Recheck the state after fill. If nothing to do, continue.. 773 if (pTarget->entryState == mmuWalkGetEntryState(pLevelInst, entryIndex)) 774 continue; 775 776 } // End of fill optimizations. 777 778 // Determine the sublevel we need to operate on. 779 status = pOpParams->selectSubLevel(pOpParams->pOpCtx, 780 pLevel, 781 &subLevel, 782 clippedVaLo, 783 clippedVaHi); 784 NV_ASSERT_OR_GOTO(NV_OK == status, cleanup); 785 786 // 787 // Allocate the sublevel instances for the current PDE and update the current 788 // Page Dir (i.e. write the PDE into the Page Dir) if needed. 789 // 790 status = _mmuWalkPdeAcquire(pWalk, 791 pOpParams, 792 pLevel, 793 pLevelInst, 794 entryIndex, //PDE index being processed 795 subLevel, //Sub level processed within the PDE 796 clippedVaLo, //Low VA for the PDE 797 clippedVaHi, //High VA for the PDE 798 pSubLevelInsts); 799 NV_ASSERT_OR_GOTO(NV_OK == status, cleanup); 800 801 // Release op is done if the target sub-level is absent. 802 if (pOpParams->bRelease && (NULL == pSubLevelInsts[subLevel])) 803 { 804 continue; 805 } 806 807 // 808 // Split sparse PDE's range. 809 // When only a subrange of the original PDE's VA range is being operated 810 // on we sparsify the remaining range lying outside the operational 811 // subrange (clippedVaLo to clippedVaHi) 812 // 813 if (MMU_ENTRY_STATE_SPARSE == currEntryState) 814 { 815 // 816 // Sparsify the lower part of the VA range that outside the operational 817 // subrange. 818 // 819 if (clippedVaLo > entryVaLo) 820 { 821 status = g_opParamsSparsify.opFunc(pWalk, 822 &g_opParamsSparsify, 823 pLevel->subLevels + subLevel, 824 pSubLevelInsts[subLevel], 825 entryVaLo, 826 clippedVaLo - 1); 827 NV_ASSERT_OR_GOTO(NV_OK == status, cleanup); 828 } 829 830 // 831 // Sparsify the upper part of the VA range that is outside the operational 832 // subrange. 833 // 834 if (clippedVaHi < entryVaHi) 835 { 836 status = g_opParamsSparsify.opFunc(pWalk, 837 &g_opParamsSparsify, 838 pLevel->subLevels + subLevel, 839 pSubLevelInsts[subLevel], 840 clippedVaHi + 1, 841 entryVaHi); 842 NV_ASSERT_OR_GOTO(NV_OK == status, cleanup); 843 } 844 } // Sparse PDE split 845 846 // Resolve potential conflicts in multiple sized page tables 847 if (pLevel->pFmt->numSubLevels != 1 && 848 !pOpParams->bIgnoreSubLevelConflicts) 849 { 850 status = _mmuWalkResolveSubLevelConflicts(pWalk, 851 pOpParams, 852 pLevel, 853 pSubLevelInsts, 854 subLevel, 855 clippedVaLo, 856 clippedVaHi); 857 NV_ASSERT_OR_GOTO(NV_OK == status, cleanup); 858 } 859 860 // Recurse to update the next level for this PDE 861 status = pOpParams->opFunc(pWalk, 862 pOpParams, 863 pLevel->subLevels + subLevel, 864 pSubLevelInsts[subLevel], 865 clippedVaLo, 866 clippedVaHi); 867 NV_ASSERT_OR_GOTO(NV_OK == status, cleanup); 868 869 cleanup: 870 // Free unused sublevel instances. Clear the PDE if all sublevels are deallocated. 871 _mmuWalkPdeRelease(pWalk, 872 pOpParams, 873 pLevel, 874 pLevelInst, 875 entryIndex, 876 entryVaLo); 877 878 // Stop processing PDEs if we are in error state. 879 if (NV_OK != status) 880 break; 881 } // per entry loop 882 return status; 883 } 884 885 886 } 887 888 /*! 889 * @brief This function allocates the root Page Directory and commits it the 890 * related channels. 891 * 892 * @param[in] vaLo The lower end of the Virtual Address range that is 893 * being processed. 894 * @param[in] vaHi The upper end of the Virtual Address range that is 895 * being processed 896 * 897 * @param[in] bCommit Force commit the PDB 898 * 899 * @return NV_OK of allocating this level succeeds. 900 * Other errors, if not. 901 */ 902 NV_STATUS 903 mmuWalkRootAcquire 904 ( 905 MMU_WALK *pWalk, 906 NvU64 vaLo, 907 NvU64 vaHi, 908 NvBool bCommit 909 ) 910 { 911 MMU_WALK_LEVEL_INST *pLevelInst = NULL; 912 NvBool bChanged = NV_FALSE; 913 914 // Acquire root level instance memory. 915 NV_ASSERT_OK_OR_RETURN( 916 _mmuWalkLevelInstAcquire(pWalk, &pWalk->root, vaLo, vaHi, 917 NV_TRUE, NV_FALSE, bCommit, &bChanged, 918 &pLevelInst, NV_FALSE /*bInitNv4k*/)); 919 920 // We check pLevelInst to catch the corner case, where Commit() is called before PDB allocation. 921 if (bChanged || (bCommit && pLevelInst)) 922 { 923 NvBool bDone; 924 925 // Bind this Page Dir to the affected channels 926 bDone = pWalk->pCb->UpdatePdb(pWalk->pUserCtx, pWalk->root.pFmt, 927 pLevelInst->pMemDesc, NV_FALSE); 928 NV_ASSERT_OR_RETURN(bDone, NV_ERR_INVALID_STATE); 929 } 930 931 return NV_OK; 932 } 933 934 /*! 935 * @brief This function releases the root Page Directory 936 */ 937 void 938 mmuWalkRootRelease 939 ( 940 MMU_WALK *pWalk 941 ) 942 { 943 MMU_WALK_LEVEL_INST *pLevelInst = pWalk->root.pInstances; 944 if (NULL != pLevelInst) 945 { 946 // Free the level instance if the entry ref count is 0. 947 if ((0 == pLevelInst->numValid + pLevelInst->numSparse) && 948 (0 == pLevelInst->numReserved)) 949 { 950 NvBool bDone; 951 952 // Commit NULL root page directory (clear usage). 953 bDone = pWalk->pCb->UpdatePdb(pWalk->pUserCtx, pWalk->root.pFmt, NULL, NV_FALSE); 954 NV_ASSERT(bDone); 955 956 // Free unused root memory. 957 _mmuWalkLevelInstRelease(pWalk, &pWalk->root, pLevelInst); 958 } 959 } 960 } 961 962 /*! 963 * @brief This function updates the @ref MMU_WALK_LEVEL_INST::pStateTracker for an 964 * entry specified by the entryIndex. 965 * 966 * @param[in] entryIndex Index of the entry whose state needs to be updated. 967 * @param[in] newEntryState The new state of the entry specified by entryIndex 968 */ 969 void 970 mmuWalkSetEntryState 971 ( 972 MMU_WALK_LEVEL_INST *pLevelInst, 973 NvU32 entryIndex, 974 MMU_ENTRY_STATE newEntryState 975 ) 976 { 977 MMU_ENTRY_STATE currEntryState = mmuWalkGetEntryState(pLevelInst, entryIndex); 978 979 // Decrement ref count for current state 980 switch (currEntryState) 981 { 982 case MMU_ENTRY_STATE_IS_PTE: 983 case MMU_ENTRY_STATE_IS_PDE: 984 NV_ASSERT(0 != pLevelInst->numValid); 985 pLevelInst->numValid--; 986 break; 987 case MMU_ENTRY_STATE_SPARSE: 988 NV_ASSERT(0 != pLevelInst->numSparse); 989 pLevelInst->numSparse--; 990 break; 991 case MMU_ENTRY_STATE_NV4K: 992 NV_ASSERT(0 != pLevelInst->numNv4k); 993 pLevelInst->numNv4k--; 994 break; 995 case MMU_ENTRY_STATE_INVALID: 996 break; 997 default: 998 NV_ASSERT(0); 999 } 1000 1001 // Increment new state ref count 1002 switch (newEntryState) 1003 { 1004 case MMU_ENTRY_STATE_IS_PTE: 1005 case MMU_ENTRY_STATE_IS_PDE: 1006 pLevelInst->numValid++; 1007 break; 1008 case MMU_ENTRY_STATE_SPARSE: 1009 pLevelInst->numSparse++; 1010 break; 1011 case MMU_ENTRY_STATE_NV4K: 1012 pLevelInst->numNv4k++; 1013 break; 1014 case MMU_ENTRY_STATE_INVALID: 1015 break; 1016 default: 1017 NV_ASSERT(0); 1018 } 1019 1020 // Commit new state. 1021 pLevelInst->pStateTracker[entryIndex].state = newEntryState; 1022 } 1023 1024 void 1025 mmuWalkSetEntryReserved 1026 ( 1027 MMU_WALK_LEVEL_INST *pLevelInst, 1028 NvU32 entryIndex, 1029 NvBool bReserved 1030 ) 1031 { 1032 if (pLevelInst->pStateTracker[entryIndex].bReserved) 1033 { 1034 NV_ASSERT(0 != pLevelInst->numReserved); 1035 pLevelInst->numReserved--; 1036 } 1037 if (bReserved) 1038 { 1039 pLevelInst->numReserved++; 1040 } 1041 pLevelInst->pStateTracker[entryIndex].bReserved = bReserved; 1042 } 1043 1044 void 1045 mmuWalkSetEntryHybrid 1046 ( 1047 MMU_WALK_LEVEL_INST *pLevelInst, 1048 NvU32 entryIndex, 1049 NvBool bHybrid 1050 ) 1051 { 1052 if (pLevelInst->pStateTracker[entryIndex].bHybrid) 1053 { 1054 NV_ASSERT(0 != pLevelInst->numHybrid); 1055 pLevelInst->numHybrid--; 1056 } 1057 if (bHybrid) 1058 { 1059 pLevelInst->numHybrid++; 1060 } 1061 pLevelInst->pStateTracker[entryIndex].bHybrid = bHybrid; 1062 } 1063 1064 /** 1065 * @brief Calculate target entry indices that covers VA range for 1066 * source entries 1067 * 1068 * @details For example, entry 1 in 64K PT is aligned to 4K PT entry 0 to 1069 * 15. 4K PTE 1 to 18 will be covered by 64K PTE 0 to 1. 1070 * 1071 * It is introduced by NV4K encoding. Updating big page table 1072 * according to small page table requires index transfering 1073 * 1074 * @param[in] pPageFmtIn Source format 1075 * @param[in] indexLoIn The index lower in 1076 * @param[in] indexHiIn The index higher in 1077 * @param[in] pPageFmtOut Target format 1078 * @param[out] pIndexLoOut The lower result index 1079 * @param[out] pIndexHiOut The higher result index 1080 */ 1081 void 1082 mmuFmtCalcAlignedEntryIndices 1083 ( 1084 const MMU_FMT_LEVEL *pPageFmtIn, 1085 const NvU32 indexLoIn, 1086 const NvU32 indexHiIn, 1087 const MMU_FMT_LEVEL *pPageFmtOut, 1088 NvU32 *pIndexLoOut, 1089 NvU32 *pIndexHiOut 1090 ) 1091 { 1092 NvU64 pageSizeIn, pageSizeOut; 1093 NvU64 pageSizeRatio; 1094 NV_ASSERT(pIndexLoOut != NULL && pIndexHiOut != NULL); 1095 NV_ASSERT(pPageFmtIn != NULL && pPageFmtOut != NULL); 1096 1097 pageSizeIn = mmuFmtLevelPageSize(pPageFmtIn); 1098 pageSizeOut = mmuFmtLevelPageSize(pPageFmtOut); 1099 1100 if (pageSizeIn < pageSizeOut) 1101 { 1102 pageSizeRatio = pageSizeOut / pageSizeIn; 1103 NV_ASSERT(NvU64_HI32(pageSizeRatio) == 0); 1104 *pIndexLoOut = (NvU32)(indexLoIn / pageSizeRatio); 1105 *pIndexHiOut = (NvU32)(indexHiIn / pageSizeRatio); 1106 } 1107 else 1108 { 1109 pageSizeRatio = pageSizeIn / pageSizeOut; 1110 NV_ASSERT(NvU64_HI32(pageSizeRatio) == 0); 1111 *pIndexLoOut = (NvU32)(indexLoIn * pageSizeRatio); 1112 *pIndexHiOut = (NvU32)((indexHiIn + 1) * pageSizeRatio - 1); 1113 } 1114 } 1115 1116 /*----------------------------Static Functions--------------------------------*/ 1117 1118 static NV_STATUS 1119 _mmuWalkLevelInit 1120 ( 1121 const MMU_WALK *pWalk, 1122 MMU_WALK_LEVEL *pParent, 1123 const MMU_FMT_LEVEL *pLevelFmt, 1124 MMU_WALK_LEVEL *pLevel 1125 ) 1126 { 1127 // Init pointers. 1128 pLevel->pFmt = pLevelFmt; 1129 pLevel->pParent = pParent; 1130 1131 if (0 != pLevelFmt->numSubLevels) 1132 { 1133 NvU32 subLevel; 1134 const NvU32 size = pLevelFmt->numSubLevels * (NvU32)sizeof(*pLevel->subLevels); 1135 1136 // Allocate sub-level array. 1137 pLevel->subLevels = portMemAllocNonPaged(size); 1138 if (pLevel->subLevels == NULL) 1139 return NV_ERR_NO_MEMORY; 1140 1141 portMemSet(pLevel->subLevels, 0, size); 1142 1143 // Recursively create each sub-level. 1144 for (subLevel = 0; subLevel < pLevelFmt->numSubLevels; ++subLevel) 1145 { 1146 NV_ASSERT_OK_OR_RETURN( 1147 _mmuWalkLevelInit(pWalk, pLevel, pLevelFmt->subLevels + subLevel, 1148 pLevel->subLevels + subLevel)); 1149 } 1150 } 1151 1152 return NV_OK; 1153 } 1154 1155 static void 1156 _mmuWalkLevelDestroy 1157 ( 1158 const MMU_WALK *pWalk, 1159 MMU_WALK_LEVEL *pLevel 1160 ) 1161 { 1162 NvU32 subLevel; 1163 1164 if (NULL != pLevel->subLevels) 1165 { 1166 // Recursively destroy each sub-level. 1167 for (subLevel = 0; subLevel < pLevel->pFmt->numSubLevels; ++subLevel) 1168 { 1169 _mmuWalkLevelDestroy(pWalk, pLevel->subLevels + subLevel); 1170 } 1171 // Free sub-level array. 1172 portMemFree(pLevel->subLevels); 1173 } 1174 1175 // All level instance memory should be freed already. 1176 NV_ASSERT(NULL == pLevel->pInstances); 1177 } 1178 1179 /** 1180 * @brief Resolve upcoming state conflicts before mmu walk operations 1181 * 1182 * @example Say we are to mmuWalkMap VA range [vaLo, vaHi] on small PT. 1183 * Assume we have 4K PT and 64K PT as our small PT and big PT, and [vaLo, vaHi] 1184 * is a strict subset of VA range covered by BigPTE[1, 3] and SmallPTE[18, 61]. 1185 * Let's say BigPTE[1, 3] are sparse right now. 1186 * 1187 * To resolve the conflict, we need to preserve sparse state for part of the 1188 * VA range that is not going to be mapped. We need to move those states from 1189 * BigPT to SmallPT. 1190 * 1191 * Before: 1192 * BigPTE[1, 3]: sparse, SmallPTE[16 - 63]: invalid 1193 * (BigPTE[1, 3] and SmallPTE[16 - 63] are VA aligned) 1194 * After: 1195 * BigPTE[1, 3]: invalid, SmallPTE[16 - 17]: sparse 1196 * SmallPTE[18 - 61]: invalid, will later be mapped 1197 * SmallPTE[62 - 63]: sparse 1198 * 1199 * @example If we are to mmuWalkMap on big PT instead of samll PT, 1200 * and sparse state was on small PT, we just need to invalidate the small PTEs. 1201 * 1202 * Before: 1203 * BigPTE[1, 3]: invalid, 1204 * SmallPTE[16 - 63]: sparse 1205 * After: 1206 * BigPTE[1, 3]: invalid, will later be mapped 1207 * SmallPTE[16 - 63]: invalid 1208 * 1209 * @return NV_OK on success, no other values for now 1210 */ 1211 static NV_STATUS NV_NOINLINE 1212 _mmuWalkResolveSubLevelConflicts 1213 ( 1214 const MMU_WALK *pWalk, 1215 const MMU_WALK_OP_PARAMS *pOpParams, 1216 MMU_WALK_LEVEL *pLevel, 1217 MMU_WALK_LEVEL_INST *pSubLevelInsts[], 1218 NvU32 subLevelIdx, 1219 NvU64 clippedVaLo, 1220 NvU64 clippedVaHi 1221 ) 1222 { 1223 NvU32 i = 0; 1224 NvU32 progress = 0; 1225 NV_STATUS status = NV_OK; 1226 NvBool bConflictLo = NV_FALSE; 1227 NvBool bConflictHi = NV_FALSE; 1228 const MMU_FMT_LEVEL *pLevelFmtBig = pLevel->subLevels[0].pFmt; 1229 const MMU_FMT_LEVEL *pLevelFmtSmall = pLevel->subLevels[1].pFmt; 1230 MMU_WALK_LEVEL_INST *pLevelBigInst = pSubLevelInsts[0]; 1231 MMU_WALK_LEVEL_INST *pLevelSmallInst = pSubLevelInsts[1]; 1232 // Entry indicies for target page table 1233 NvU32 entryIndexLo, entryIndexHi; 1234 // Entry indicies involved in both page tables 1235 NvU32 indexLo_Small, indexHi_Small, indexLo_Big, indexHi_Big; 1236 1237 if (0 == subLevelIdx) 1238 { 1239 entryIndexLo = mmuFmtVirtAddrToEntryIndex(pLevelFmtBig, clippedVaLo); 1240 entryIndexHi = mmuFmtVirtAddrToEntryIndex(pLevelFmtBig, clippedVaHi); 1241 indexLo_Big = entryIndexLo; 1242 indexHi_Big = entryIndexHi; 1243 mmuFmtCalcAlignedEntryIndices(pLevelFmtBig, indexLo_Big, indexHi_Big, 1244 pLevelFmtSmall, &indexLo_Small, &indexHi_Small); 1245 } 1246 else 1247 { 1248 entryIndexLo = mmuFmtVirtAddrToEntryIndex(pLevelFmtSmall, clippedVaLo); 1249 entryIndexHi = mmuFmtVirtAddrToEntryIndex(pLevelFmtSmall, clippedVaHi); 1250 mmuFmtCalcAlignedEntryIndices(pLevelFmtSmall, entryIndexLo, 1251 entryIndexHi, pLevelFmtBig, &indexLo_Big, &indexHi_Big); 1252 mmuFmtCalcAlignedEntryIndices(pLevelFmtBig, indexLo_Big, indexHi_Big, 1253 pLevelFmtSmall, &indexLo_Small, &indexHi_Small); 1254 } 1255 1256 // check if involved Small PTEs need to be sparsified 1257 if (1 == subLevelIdx && NULL != pLevelSmallInst && NULL != pLevelBigInst) 1258 { 1259 // check lower part 1260 MMU_ENTRY_STATE entryStateBig; 1261 entryStateBig = mmuWalkGetEntryState(pLevelBigInst, indexLo_Big); 1262 bConflictLo = (MMU_ENTRY_STATE_SPARSE == entryStateBig); 1263 1264 // check higher part 1265 entryStateBig = mmuWalkGetEntryState(pLevelBigInst, indexHi_Big); 1266 bConflictHi = (MMU_ENTRY_STATE_SPARSE == entryStateBig); 1267 } 1268 1269 if (bConflictLo && entryIndexLo > indexLo_Small) 1270 { 1271 // sparsify lower range of entries 1272 pWalk->pCb->FillEntries(pWalk->pUserCtx, pLevelFmtSmall, 1273 pLevelSmallInst->pMemDesc, indexLo_Small, entryIndexLo - 1, 1274 MMU_WALK_FILL_SPARSE, &progress); 1275 NV_ASSERT_OR_RETURN(progress == entryIndexLo - indexLo_Small, 1276 NV_ERR_INVALID_STATE); 1277 1278 for (i = indexLo_Small; i <= entryIndexLo - 1; i++) 1279 { 1280 mmuWalkSetEntryState(pLevelSmallInst, i, MMU_ENTRY_STATE_SPARSE); 1281 } 1282 } 1283 1284 if (bConflictHi && entryIndexHi < indexHi_Small) 1285 { 1286 // sparsify higher range of entries 1287 pWalk->pCb->FillEntries(pWalk->pUserCtx, pLevelFmtSmall, 1288 pLevelSmallInst->pMemDesc, entryIndexHi + 1, indexHi_Small, 1289 MMU_WALK_FILL_SPARSE, &progress); 1290 NV_ASSERT_OR_RETURN(progress == indexHi_Small - entryIndexHi, 1291 NV_ERR_INVALID_STATE); 1292 1293 for (i = entryIndexHi + 1; i <= indexHi_Small; i++) 1294 { 1295 mmuWalkSetEntryState(pLevelSmallInst, i, MMU_ENTRY_STATE_SPARSE); 1296 } 1297 } 1298 1299 // invalidate the VA range in the other page table 1300 if (NULL != pLevelSmallInst && NULL != pLevelBigInst) 1301 { 1302 NvU32 indexLo_tmp, indexHi_tmp; 1303 const MMU_FMT_LEVEL *pSubLevelFmt; 1304 MMU_WALK_LEVEL_INST *pSubLevelInst; 1305 1306 if (subLevelIdx == 0) 1307 { 1308 indexLo_tmp = indexLo_Small; 1309 indexHi_tmp = indexHi_Small; 1310 pSubLevelFmt = pLevelFmtSmall; 1311 pSubLevelInst = pLevelSmallInst; 1312 } 1313 else 1314 { 1315 indexLo_tmp = indexLo_Big; 1316 indexHi_tmp = indexHi_Big; 1317 pSubLevelFmt = pLevelFmtBig; 1318 pSubLevelInst = pLevelBigInst; 1319 } 1320 1321 pWalk->pCb->FillEntries(pWalk->pUserCtx, pSubLevelFmt, 1322 pSubLevelInst->pMemDesc, indexLo_tmp, indexHi_tmp, 1323 MMU_WALK_FILL_INVALID, &progress); 1324 NV_ASSERT_OR_RETURN(progress == indexHi_tmp - indexLo_tmp + 1, 1325 NV_ERR_INVALID_STATE); 1326 1327 for (i = indexLo_tmp; i <= indexHi_tmp; i++) 1328 { 1329 mmuWalkSetEntryState(pSubLevelInst, i, MMU_ENTRY_STATE_INVALID); 1330 } 1331 } 1332 1333 return status; 1334 } 1335 1336 /*! 1337 * Lazily allocates and initializes a level instance. 1338 */ 1339 static NV_STATUS 1340 _mmuWalkLevelInstAcquire 1341 ( 1342 const MMU_WALK *pWalk, 1343 MMU_WALK_LEVEL *pLevel, 1344 const NvU64 vaLo, 1345 const NvU64 vaHi, 1346 const NvBool bTarget, 1347 const NvBool bRelease, 1348 const NvBool bCommit, 1349 NvBool *pBChanged, 1350 MMU_WALK_LEVEL_INST **ppLevelInst, 1351 const NvBool bInitNv4k 1352 ) 1353 { 1354 NV_STATUS status; 1355 MMU_WALK_MEMDESC *pOldMem; 1356 NvU32 oldSize; 1357 MMU_WALK_LEVEL_INST *pLevelInst = NULL; 1358 NvBool bNew = NV_FALSE; 1359 1360 // Lookup level instance. 1361 if (NV_OK != btreeSearch(vaLo, (NODE**)&pLevelInst, (NODE*)pLevel->pInstances)) 1362 { 1363 NvU32 numBytes; 1364 1365 if (!bTarget || bRelease) 1366 { 1367 // Skip missing non-target instances. 1368 *ppLevelInst = NULL; 1369 return NV_OK; 1370 } 1371 1372 // We only call Commit() on already allocated page directory. 1373 NV_ASSERT_OR_RETURN(!bCommit, NV_ERR_INVALID_STATE); 1374 1375 // Mark as newly allocated. 1376 bNew = NV_TRUE; 1377 1378 // Allocate missing target instances. 1379 pLevelInst = portMemAllocNonPaged(sizeof(*pLevelInst)); 1380 status = (pLevelInst == NULL) ? NV_ERR_NO_MEMORY : NV_OK; 1381 NV_ASSERT_OR_GOTO(NV_OK == status, done); 1382 portMemSet(pLevelInst, 0, sizeof(*pLevelInst)); 1383 1384 // Insert the new node into the tree of instances for this page level. 1385 pLevelInst->node.keyStart = mmuFmtLevelVirtAddrLo(pLevel->pFmt, vaLo); 1386 pLevelInst->node.keyEnd = mmuFmtLevelVirtAddrHi(pLevel->pFmt, vaHi); 1387 1388 status = btreeInsert(&pLevelInst->node, (NODE**)&pLevel->pInstances); 1389 NV_ASSERT_OR_GOTO(NV_OK == status, done); 1390 1391 // Allocate entry tracker. 1392 numBytes = mmuFmtLevelEntryCount(pLevel->pFmt) * sizeof(MMU_ENTRY_INFO); 1393 pLevelInst->pStateTracker = portMemAllocNonPaged(numBytes); 1394 status = (pLevelInst->pStateTracker == NULL) ? NV_ERR_NO_MEMORY : NV_OK; 1395 NV_ASSERT_OR_GOTO(NV_OK == status, done); 1396 portMemSet(pLevelInst->pStateTracker, 0, numBytes); 1397 if (bInitNv4k) 1398 { 1399 NvU32 i; 1400 for (i = 0; i < mmuFmtLevelEntryCount(pLevel->pFmt); ++i) 1401 { 1402 mmuWalkSetEntryState(pLevelInst, i, MMU_ENTRY_STATE_NV4K); 1403 } 1404 } 1405 } 1406 1407 // Save original memory info. 1408 pOldMem = pLevelInst->pMemDesc; 1409 oldSize = pLevelInst->memSize; 1410 1411 // Allocate (possibly reallocating) memory for this level instance. 1412 status = pWalk->pCb->LevelAlloc(pWalk->pUserCtx, 1413 pLevel->pFmt, 1414 mmuFmtLevelVirtAddrLo(pLevel->pFmt, vaLo), 1415 vaHi, 1416 bTarget, 1417 &pLevelInst->pMemDesc, 1418 &pLevelInst->memSize, 1419 pBChanged); 1420 NV_ASSERT_OR_GOTO(NV_OK == status, done); 1421 1422 if (*pBChanged) 1423 { 1424 const NvU32 entryIndexLo = oldSize / pLevel->pFmt->entrySize; 1425 const NvU32 entryIndexHi = (pLevelInst->memSize / pLevel->pFmt->entrySize) - 1; 1426 NvU32 progress = 0; 1427 1428 // 1429 // default state for new entries 1430 // NV4K for big page table if ATS is enabled 1431 // 1432 MMU_WALK_FILL_STATE newEntryState = bInitNv4k ? MMU_WALK_FILL_NV4K : 1433 MMU_WALK_FILL_INVALID; 1434 1435 NV_ASSERT(NULL != pLevelInst->pMemDesc); 1436 NV_ASSERT(entryIndexLo <= entryIndexHi); 1437 1438 // We only call Commit() on already allocated page directory. 1439 if (bCommit) 1440 { 1441 status = NV_ERR_INVALID_STATE; 1442 NV_ASSERT_OR_GOTO(NV_OK == status, done); 1443 } 1444 1445 // Copy old entries from old to new. 1446 if (entryIndexLo > 0) 1447 { 1448 NV_ASSERT(NULL != pWalk->pCb->CopyEntries); 1449 pWalk->pCb->CopyEntries(pWalk->pUserCtx, 1450 pLevel->pFmt, 1451 pOldMem, 1452 pLevelInst->pMemDesc, 1453 0, 1454 entryIndexLo - 1, 1455 &progress); 1456 NV_ASSERT(progress == entryIndexLo); 1457 1458 // Free old memory. 1459 pWalk->pCb->LevelFree(pWalk->pUserCtx, pLevel->pFmt, 1460 pLevelInst->node.keyStart, pOldMem); 1461 } 1462 1463 if(pWalk->bInvalidateOnReserve) 1464 { 1465 // Clear new entries to invalid. 1466 pWalk->pCb->FillEntries(pWalk->pUserCtx, 1467 pLevel->pFmt, 1468 pLevelInst->pMemDesc, 1469 entryIndexLo, 1470 entryIndexHi, 1471 newEntryState, 1472 &progress); 1473 NV_ASSERT(progress == entryIndexHi - entryIndexLo + 1); 1474 } 1475 } 1476 else 1477 { 1478 // Ensure hasn't changed. 1479 NV_ASSERT(pOldMem == pLevelInst->pMemDesc && oldSize == pLevelInst->memSize); 1480 } 1481 1482 // Commit return. 1483 *ppLevelInst = pLevelInst; 1484 1485 done: 1486 // Cleanup newly allocated instance on failure. 1487 if (NV_OK != status && 1488 bNew && NULL != pLevelInst) 1489 { 1490 _mmuWalkLevelInstRelease(pWalk, pLevel, pLevelInst); 1491 } 1492 return status; 1493 } 1494 1495 /*! 1496 * Frees an unused level instance. 1497 */ 1498 static void 1499 _mmuWalkLevelInstRelease 1500 ( 1501 const MMU_WALK *pWalk, 1502 MMU_WALK_LEVEL *pLevel, 1503 MMU_WALK_LEVEL_INST *pLevelInst 1504 ) 1505 { 1506 NV_ASSERT(0 == pLevelInst->numValid); 1507 NV_ASSERT(0 == pLevelInst->numReserved); 1508 // Unlink. 1509 btreeUnlink(&pLevelInst->node, (NODE**)&pLevel->pInstances); 1510 // Free. 1511 if (NULL != pLevelInst->pMemDesc) 1512 { 1513 pWalk->pCb->LevelFree(pWalk->pUserCtx, pLevel->pFmt, pLevelInst->node.keyStart, 1514 pLevelInst->pMemDesc); 1515 } 1516 portMemFree(pLevelInst->pStateTracker); 1517 portMemFree(pLevelInst); 1518 } 1519 1520 /*! 1521 * This function is used to allocate a sublevel MMU_WALK_LEVEL_INST 1522 * for a given PDE. If the sublevel allocation succeeds, the parent Level is 1523 * updated. 1524 */ 1525 static NV_STATUS NV_NOINLINE 1526 _mmuWalkPdeAcquire 1527 ( 1528 const MMU_WALK *pWalk, 1529 const MMU_WALK_OP_PARAMS *pOpParams, 1530 MMU_WALK_LEVEL *pLevel, 1531 MMU_WALK_LEVEL_INST *pLevelInst, 1532 const NvU32 entryIndex, 1533 const NvU32 subLevel, 1534 const NvU64 vaLo, 1535 const NvU64 vaHi, 1536 MMU_WALK_LEVEL_INST *pSubLevelInsts[] 1537 ) 1538 { 1539 NV_STATUS status = NV_OK; 1540 NvBool bCommit = NV_FALSE; 1541 NvU32 i; 1542 const MMU_WALK_MEMDESC *pSubMemDescs[MMU_FMT_MAX_SUB_LEVELS] = {0}; 1543 NvU64 vaLimit = vaHi; 1544 const NvU32 numSubLevels = pLevel->pFmt->numSubLevels; 1545 MMU_WALK_LEVEL_INST *pCurSubLevelInsts[MMU_FMT_MAX_SUB_LEVELS] = {0}; 1546 1547 // 1548 // Determine minimum VA limit of existing sub-levels. 1549 // This is required to keep parallel partial page tables in sync. 1550 // MMU HW that supports partial size tables selects the size in the 1551 // parent PDE so each sub-level *MUST* be the same partial size 1552 // once allocated. 1553 // 1554 if (numSubLevels > 1) 1555 { 1556 for (i = 0; i < numSubLevels; ++i) 1557 { 1558 // Lookup sub-level instance. 1559 if (NV_OK == btreeSearch(vaLo, (NODE**)&pCurSubLevelInsts[i], 1560 (NODE*)pLevel->subLevels[i].pInstances)) 1561 { 1562 const MMU_FMT_LEVEL *pSubLevelFmt = pLevel->pFmt->subLevels + i; 1563 const NvU64 minVaLimit = 1564 mmuFmtLevelVirtAddrLo(pSubLevelFmt, vaLo) + 1565 (pCurSubLevelInsts[i]->memSize / 1566 pSubLevelFmt->entrySize * 1567 mmuFmtLevelPageSize(pSubLevelFmt)) - 1; 1568 1569 vaLimit = NV_MAX(vaLimit, minVaLimit); 1570 } 1571 } 1572 } 1573 1574 // 1575 // the loop was reversed for NV4K, if there are multiple sublevels 1576 // handling small PT first, then the big PT 1577 // 1578 for (i = numSubLevels; i > 0; --i) 1579 { 1580 NvBool bChanged = NV_FALSE; 1581 NvU32 subLevelIdx = i - 1; 1582 NvBool bTarget = (subLevelIdx == subLevel); 1583 NvBool bInitNv4k = NV_FALSE; 1584 1585 // 1586 // If NV4K is required (when ATS is enabled), acquire 64K PT 1587 // whenever the 4K PT has been acquired and 64K PT was not 1588 // there 1589 // 1590 if (pWalk->flags.bAtsEnabled && subLevelIdx == 0 && 1591 numSubLevels > 1 && !pOpParams->bRelease) 1592 { 1593 if (pSubLevelInsts[1] != NULL) 1594 { 1595 bTarget = NV_TRUE; 1596 } 1597 if (pSubLevelInsts[0] == NULL) 1598 { 1599 bInitNv4k = NV_TRUE; 1600 } 1601 } 1602 1603 // Acquire sub-level instance. 1604 NV_ASSERT_OK_OR_RETURN( 1605 _mmuWalkLevelInstAcquire(pWalk, pLevel->subLevels + subLevelIdx, 1606 vaLo, vaLimit, bTarget, 1607 pOpParams->bRelease, pOpParams->bCommit, 1608 &bChanged, &pSubLevelInsts[subLevelIdx], 1609 bInitNv4k)); 1610 if (NULL == pSubLevelInsts[subLevelIdx]) 1611 { 1612 // Skip missing non-target instances. 1613 NV_ASSERT(pOpParams->bRelease || !bTarget); 1614 continue; 1615 } 1616 1617 // Track info for commit. 1618 bCommit |= bChanged; 1619 pSubMemDescs[subLevelIdx] = pSubLevelInsts[subLevelIdx]->pMemDesc; 1620 } 1621 1622 // DEBUG assert 1623 if (pWalk->flags.bAtsEnabled && 1624 numSubLevels > 1 && 1625 pSubLevelInsts[1] != NULL && 1626 pSubLevelInsts[0] == NULL) 1627 { 1628 NV_ASSERT(0); 1629 } 1630 1631 if (bCommit || pOpParams->bCommit) 1632 { 1633 NvBool bDone; 1634 1635 // Update the current pde 1636 bDone = pWalk->pCb->UpdatePde(pWalk->pUserCtx, pLevel->pFmt, pLevelInst->pMemDesc, 1637 entryIndex, pSubMemDescs); 1638 NV_ASSERT_OR_RETURN(bDone, NV_ERR_INVALID_STATE); 1639 1640 // Track entry as a PDE. 1641 mmuWalkSetEntryState(pLevelInst, entryIndex, MMU_ENTRY_STATE_IS_PDE); 1642 } 1643 1644 return status; 1645 } 1646 1647 /*! 1648 * Frees the sub levels of the PDE passed in if thier refcount is 0. It 1649 * also clears the PDE if both sublevels are released. 1650 */ 1651 static void NV_NOINLINE 1652 _mmuWalkPdeRelease 1653 ( 1654 const MMU_WALK *pWalk, 1655 const MMU_WALK_OP_PARAMS *pOpParams, 1656 MMU_WALK_LEVEL *pLevel, 1657 MMU_WALK_LEVEL_INST *pLevelInst, 1658 const NvU32 entryIndex, 1659 const NvU64 entryVaLo 1660 ) 1661 { 1662 MMU_WALK_LEVEL_INST *pSubLevelInsts[MMU_FMT_MAX_SUB_LEVELS] = {0}; 1663 const MMU_WALK_MEMDESC *pSubMemDescs[MMU_FMT_MAX_SUB_LEVELS] = {0}; 1664 NvBool bChanged = NV_FALSE; 1665 NvU32 subLevel, i; 1666 MMU_ENTRY_STATE state = MMU_ENTRY_STATE_INVALID; 1667 1668 // Apply target state if this is a fill operation. 1669 if (pOpParams->bFill) 1670 { 1671 const MMU_FILL_TARGET *pTarget = (const MMU_FILL_TARGET *)pOpParams->pOpCtx; 1672 state = pTarget->entryState; 1673 } 1674 1675 // 1676 // Loop through the sublevels and free up those with 0 ref count. 1677 // We operate on a temp copy of the PDE because we want to update the 1678 // PDE memory before releasing the actual sublevel pointers. We need this order 1679 // to prevent any state inconsistency between the parent MMU_DESC_PDE and 1680 // the sublevel MMU_WALK_LEVEL_INST structures. 1681 // 1682 for (i = pLevel->pFmt->numSubLevels; i > 0; --i) 1683 { 1684 subLevel = i - 1; 1685 if (NV_OK == btreeSearch(entryVaLo, (NODE**)&pSubLevelInsts[subLevel], 1686 (NODE*)pLevel->subLevels[subLevel].pInstances)) 1687 { 1688 MMU_WALK_LEVEL_INST *pSubLevelInst = pSubLevelInsts[subLevel]; 1689 1690 // for ATS NV4K, check if we need to free the big page 1691 if (pLevel->pFmt->numSubLevels == 2 && subLevel == 0) 1692 { 1693 if (pWalk->flags.bAtsEnabled) 1694 { 1695 if (pSubLevelInsts[0]->numNv4k == 1696 mmuFmtLevelEntryCount(pLevel->subLevels[0].pFmt) && 1697 (0 == pSubLevelInsts[0]->numReserved) && 1698 (pSubMemDescs[1] == NULL || bChanged == NV_TRUE)) 1699 { 1700 bChanged = NV_TRUE; 1701 continue; 1702 } 1703 else 1704 { 1705 state = MMU_ENTRY_STATE_IS_PDE; 1706 pSubMemDescs[subLevel] = pSubLevelInst->pMemDesc; 1707 continue; 1708 } 1709 } 1710 } 1711 1712 if ((0 != (pSubLevelInst->numValid + pSubLevelInst->numSparse)) || 1713 (0 != (pSubLevelInst->numReserved + pSubLevelInst->numHybrid))) 1714 { 1715 // We've got at least one non-empty sublevel, so leave it mapped. 1716 state = MMU_ENTRY_STATE_IS_PDE; 1717 pSubMemDescs[subLevel] = pSubLevelInst->pMemDesc; 1718 } 1719 else if (NULL != pSubLevelInst->pMemDesc) 1720 { 1721 // We're going to free a sub-level. 1722 bChanged = NV_TRUE; 1723 } 1724 } 1725 } 1726 1727 // 1728 // Failure path may have aborted early before sub-levels processed, 1729 // so also check that current state matches expected. 1730 // 1731 bChanged |= (state != mmuWalkGetEntryState(pLevelInst, entryIndex)); 1732 1733 // 1734 // If we've changed any sublevel we need to update the PDE in the parent 1735 // Page Directory 1736 // 1737 if (bChanged) 1738 { 1739 NvBool bDone; 1740 NvU32 progress = 0; 1741 1742 // Init the PDE attribs with the temp PDE which has the cleared sublevel 1743 switch (state) 1744 { 1745 case MMU_ENTRY_STATE_SPARSE: 1746 case MMU_ENTRY_STATE_INVALID: 1747 pWalk->pCb->FillEntries(pWalk->pUserCtx, 1748 pLevel->pFmt, 1749 pLevelInst->pMemDesc, 1750 entryIndex, 1751 entryIndex, 1752 MMU_ENTRY_STATE_SPARSE == state ? 1753 MMU_WALK_FILL_SPARSE : MMU_WALK_FILL_INVALID, 1754 &progress); 1755 NV_ASSERT_OR_RETURN_VOID(progress == 1); 1756 // Clear the hybrid flag since all sub-levels are now released. 1757 if (pLevelInst->pStateTracker[entryIndex].bHybrid) 1758 { 1759 mmuWalkSetEntryHybrid(pLevelInst, entryIndex, NV_FALSE); 1760 } 1761 break; 1762 case MMU_ENTRY_STATE_IS_PDE: 1763 bDone = pWalk->pCb->UpdatePde(pWalk->pUserCtx, pLevel->pFmt, pLevelInst->pMemDesc, 1764 entryIndex, pSubMemDescs); 1765 NV_ASSERT_OR_RETURN_VOID(bDone); 1766 break; 1767 default: 1768 NV_ASSERT_OR_RETURN_VOID(0); 1769 } 1770 1771 // Track new state of entry. 1772 mmuWalkSetEntryState(pLevelInst, entryIndex, state); 1773 } 1774 1775 // Free up the actual sublevels from the PDE 1776 for (subLevel = 0; subLevel < pLevel->pFmt->numSubLevels; ++subLevel) 1777 { 1778 MMU_WALK_LEVEL_INST *pSubLevelInst = pSubLevelInsts[subLevel]; 1779 if (NULL != pSubLevelInst && 1780 NULL == pSubMemDescs[subLevel]) 1781 { 1782 _mmuWalkLevelInstRelease(pWalk, pLevel->subLevels + subLevel, 1783 pSubLevelInst); 1784 } 1785 } 1786 } 1787 1788 static void 1789 _mmuWalkLevelInstancesForceFree 1790 ( 1791 MMU_WALK *pWalk, 1792 MMU_WALK_LEVEL *pLevel 1793 ) 1794 { 1795 MMU_WALK_LEVEL_INST *pLevelInst = NULL; 1796 NvU32 subLevel; 1797 1798 if (NULL == pLevel) 1799 return; 1800 1801 // Free all instances at this level. 1802 btreeEnumStart(0, (NODE **)&pLevelInst, (NODE*)pLevel->pInstances); 1803 while (NULL != pLevelInst) 1804 { 1805 // 1806 // Since we are force freeing everything, it is okay to reset these fields 1807 // in order to avoid hitting asserts in _mmuWalkLevelInstRelease. 1808 // 1809 pLevelInst->numValid = 0; 1810 pLevelInst->numReserved = 0; 1811 _mmuWalkLevelInstRelease(pWalk, pLevel, pLevelInst); 1812 btreeEnumStart(0, (NODE **)&pLevelInst, (NODE*)pLevel->pInstances); 1813 } 1814 pLevel->pInstances = NULL; 1815 1816 if (NULL != pLevel->subLevels) 1817 { 1818 for (subLevel = 0; subLevel < pLevel->pFmt->numSubLevels; subLevel++) 1819 { 1820 _mmuWalkLevelInstancesForceFree(pWalk, pLevel->subLevels + subLevel); 1821 } 1822 } 1823 } 1824 1825