1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2014-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 /*! 25 * @file 26 * @brief Describes the structures and interfaces used to walk N level page tables 27 */ 28 29 /*--------------------------------Includes------------------------------------*/ 30 #if defined(SRT_BUILD) 31 32 #include "shrdebug.h" 33 #else 34 #include "os/os.h" 35 #endif 36 #include "nvport/nvport.h" 37 #include "nvctassert.h" 38 #include "mmu_walk_private.h" 39 40 /*--------------------------Static Function Prototypes------------------------*/ 41 static NV_STATUS 42 _mmuWalkLevelInit(const MMU_WALK *pWalk, MMU_WALK_LEVEL *pParent, 43 const MMU_FMT_LEVEL *pLevelFmt, MMU_WALK_LEVEL *pLevel); 44 static void 45 _mmuWalkLevelDestroy(const MMU_WALK *pWalk, MMU_WALK_LEVEL *pLevel); 46 static NV_STATUS 47 _mmuWalkLevelInstAcquire(const MMU_WALK *pWalk, MMU_WALK_LEVEL *pLevel, 48 const NvU64 vaLo, const NvU64 vaHi, const NvBool bTarget, 49 const NvBool bRelease, const NvBool bCommit, 50 NvBool *pBChanged, MMU_WALK_LEVEL_INST **ppLevelInst, 51 const NvBool bInitNv4k); 52 static void 53 _mmuWalkLevelInstRelease(const MMU_WALK *pWalk, MMU_WALK_LEVEL *pLevel, 54 MMU_WALK_LEVEL_INST *pLevelInst); 55 static NV_STATUS NV_NOINLINE 56 _mmuWalkPdeAcquire(const MMU_WALK *pWalk, const MMU_WALK_OP_PARAMS *pOpParams, 57 MMU_WALK_LEVEL *pLevel, MMU_WALK_LEVEL_INST *pLevelInst, 58 const NvU32 entryIndex, const NvU32 subLevel, 59 const NvU64 vaLo, const NvU64 vaHi, 60 MMU_WALK_LEVEL_INST *pSubLevelInsts[]); 61 static void NV_NOINLINE 62 _mmuWalkPdeRelease(const MMU_WALK *pWalk, const MMU_WALK_OP_PARAMS *pOpParams, 63 MMU_WALK_LEVEL *pLevel, MMU_WALK_LEVEL_INST *pLevelInst, 64 const NvU32 entryIndex, const NvU64 entryVaLo); 65 static NV_STATUS NV_NOINLINE 66 _mmuWalkResolveSubLevelConflicts(const MMU_WALK *pWalk, const MMU_WALK_OP_PARAMS *pOpParams, 67 MMU_WALK_LEVEL *pLevel, MMU_WALK_LEVEL_INST *pSubLevelInsts[], 68 NvU32 subLevel, NvU64 clippedVaLo, NvU64 clippedVaHi); 69 static void 70 _mmuWalkLevelInstancesForceFree(MMU_WALK *pWalk, MMU_WALK_LEVEL *pLevel); 71 72 /* -----------------------------Inline Functions----------------------------- */ 73 /*! 74 Returns the @ref MMU_ENTRY_STATE of the entry. 75 */ 76 MMU_ENTRY_STATE 77 mmuWalkGetEntryState(MMU_WALK_LEVEL_INST *pLevelInst, NvU32 entryIndex) 78 { 79 return (MMU_ENTRY_STATE)pLevelInst->pStateTracker[entryIndex].state; 80 } 81 82 /*----------------------------Public Functions--------------------------------*/ 83 84 NV_STATUS 85 mmuWalkCreate 86 ( 87 const MMU_FMT_LEVEL *pRootFmt, 88 MMU_WALK_USER_CTX *pUserCtx, 89 const MMU_WALK_CALLBACKS *pCb, 90 const MMU_WALK_FLAGS flags, 91 MMU_WALK **ppWalk, 92 MMU_WALK_MEMDESC *pStagingBuffer 93 ) 94 { 95 NV_STATUS status = NV_OK; 96 MMU_WALK *pWalk = NULL; 97 98 NV_ASSERT_OR_RETURN(NULL != pRootFmt, NV_ERR_INVALID_ARGUMENT); 99 NV_ASSERT_OR_RETURN(NULL != pCb, NV_ERR_INVALID_ARGUMENT); 100 NV_ASSERT_OR_RETURN(NULL != ppWalk, NV_ERR_INVALID_ARGUMENT); 101 102 // Alloc and init walker structure. 103 pWalk = portMemAllocNonPaged(sizeof(*pWalk)); 104 status = (pWalk == NULL) ? NV_ERR_NO_MEMORY : NV_OK; 105 NV_ASSERT_OR_GOTO(NV_OK == status, done); 106 portMemSet(pWalk, 0, sizeof(*pWalk)); 107 108 pWalk->pUserCtx = pUserCtx; 109 pWalk->pCb = pCb; 110 pWalk->flags = flags; 111 pWalk->pStagingBuffer = pStagingBuffer; 112 pWalk->bUseStagingBuffer = NV_FALSE; 113 pWalk->bInvalidateOnReserve = NV_TRUE; 114 115 // Create level hierarchy. 116 status = _mmuWalkLevelInit(pWalk, NULL, pRootFmt, &pWalk->root); 117 NV_ASSERT_OR_GOTO(NV_OK == status, done); 118 119 // Commit. 120 *ppWalk = pWalk; 121 122 done: 123 if (NV_OK != status) 124 { 125 mmuWalkDestroy(pWalk); 126 } 127 return status; 128 } 129 130 void 131 mmuWalkDestroy 132 ( 133 MMU_WALK *pWalk 134 ) 135 { 136 if (NULL != pWalk) 137 { 138 // Destroy level hierarchy. 139 _mmuWalkLevelDestroy(pWalk, &pWalk->root); 140 141 // Free walker struct. 142 portMemFree(pWalk); 143 } 144 } 145 146 NV_STATUS 147 mmuWalkContinue 148 ( 149 MMU_WALK *pWalk 150 ) 151 { 152 return NV_ERR_NOT_SUPPORTED; 153 } 154 155 void 156 mmuWalkCommit 157 ( 158 MMU_WALK *pWalk 159 ) 160 { 161 // TODO 162 } 163 164 MMU_WALK_USER_CTX * 165 mmuWalkGetUserCtx 166 ( 167 const MMU_WALK *pWalk 168 ) 169 { 170 return pWalk->pUserCtx; 171 } 172 173 NV_STATUS 174 mmuWalkSetUserCtx 175 ( 176 MMU_WALK *pWalk, 177 MMU_WALK_USER_CTX *pUserCtx 178 ) 179 { 180 181 pWalk->pUserCtx = pUserCtx; 182 return NV_OK; 183 } 184 185 const MMU_WALK_CALLBACKS * 186 mmuWalkGetCallbacks 187 ( 188 const MMU_WALK *pWalk 189 ) 190 { 191 return pWalk->pCb; 192 } 193 194 void 195 mmuWalkSetCallbacks 196 ( 197 MMU_WALK *pWalk, 198 const MMU_WALK_CALLBACKS *pCb 199 ) 200 { 201 pWalk->pCb = pCb; 202 } 203 204 void 205 mmuWalkLevelInstancesForceFree 206 ( 207 MMU_WALK *pWalk 208 ) 209 { 210 if (pWalk != NULL) 211 { 212 _mmuWalkLevelInstancesForceFree(pWalk, &pWalk->root); 213 } 214 } 215 216 /*----------------------------Private Functions--------------------------------*/ 217 218 const MMU_WALK_LEVEL * 219 mmuWalkFindLevel 220 ( 221 const MMU_WALK *pWalk, 222 const MMU_FMT_LEVEL *pLevelFmt 223 ) 224 { 225 const MMU_WALK_LEVEL *pLevel = &pWalk->root; 226 while (pLevel->pFmt != pLevelFmt) 227 { 228 NvU32 subLevel; 229 // Single sub-level always continues. 230 if (1 == pLevel->pFmt->numSubLevels) 231 { 232 pLevel = pLevel->subLevels; 233 continue; 234 } 235 // Multi sub-level must pick branch based on target. 236 for (subLevel = 0; subLevel < pLevel->pFmt->numSubLevels; ++subLevel) 237 { 238 if ((pLevel->pFmt->subLevels + subLevel) == pLevelFmt) 239 { 240 return pLevel->subLevels + subLevel; 241 } 242 } 243 // Nothing found. 244 return NULL; 245 } 246 return pLevel; 247 } 248 249 /*! 250 * @brief This function traverses the topology described by @ref 251 * MMU_FMT_LEVEL and @ref MMU_DESC_PDE. The @ref MmuOpFunc 252 * opFunc implements the actions needed to be perfomed at each 253 * sublevel in the recursion. 254 * 255 * @param[in] vaLo The lower end of the Virtual Address range that is 256 * being processed. 257 * @param[in] vaHi The upper end of the Virtual Address range that is 258 * being processed 259 * 260 * @return NV_OK if processing this level succeeds. 261 * Other errors, if not. 262 */ 263 NV_STATUS mmuWalkProcessPdes 264 ( 265 const MMU_WALK *pWalk, 266 const MMU_WALK_OP_PARAMS *pOpParams, 267 MMU_WALK_LEVEL *pLevel, 268 MMU_WALK_LEVEL_INST *pLevelInst, 269 NvU64 vaLo, 270 NvU64 vaHi 271 ) 272 { 273 if (pWalk->flags.bUseIterative) 274 { 275 NV_STATUS status = NV_OK; 276 const MMU_WALK_LEVEL *pLevelOrig = pLevel; 277 NV_ASSERT_OR_RETURN(pOpParams != NULL, NV_ERR_INVALID_ARGUMENT); 278 279 // Call opFunc inititially to see if we need to walk 280 status = pOpParams->opFunc(pWalk, 281 pOpParams, 282 pLevel, 283 pLevelInst, 284 vaLo, 285 vaHi); 286 287 // 288 // If NV_ERR_MORE_PROCESSING_REQUIRED is returned above, 289 // the recursive MMU Walker would have started recursing down, 290 // so here we kick off the iteration. 291 // If NV_OK is returned above, the recursive MMU Walker would 292 // not recurse at all, so return immediately. 293 // 294 if (NV_ERR_MORE_PROCESSING_REQUIRED == status) 295 { 296 status = NV_OK; 297 298 NvU64 vaLevelBase = mmuFmtLevelVirtAddrLo(pLevel->pFmt, vaLo); 299 NvU32 entryIndexLo = mmuFmtVirtAddrToEntryIndex(pLevel->pFmt, vaLo); 300 NvU32 entryIndexHi = mmuFmtVirtAddrToEntryIndex(pLevel->pFmt, vaHi); 301 NvU32 entryIndex = entryIndexLo; 302 NvU32 entryIndexFillStart = 0; 303 NvU32 entryIndexFillEnd = 0; 304 NvU32 pendingFillCount = 0; 305 306 // 307 // entryIndex, entryIndexHi are modified in the loop itself 308 // as we iterate through levels. 309 // 310 while (entryIndex <= entryIndexHi) 311 { 312 const NvU64 entryVaLo = mmuFmtEntryIndexVirtAddrLo(pLevel->pFmt, 313 vaLevelBase, entryIndex); 314 const NvU64 entryVaHi = mmuFmtEntryIndexVirtAddrHi(pLevel->pFmt, 315 vaLevelBase, entryIndex); 316 const NvU64 clippedVaLo = NV_MAX(vaLo, entryVaLo); 317 const NvU64 clippedVaHi = NV_MIN(vaHi, entryVaHi); 318 const MMU_ENTRY_STATE currEntryState = mmuWalkGetEntryState(pLevelInst, entryIndex); 319 NvU32 subLevel = 0; 320 MMU_WALK_LEVEL_INST *pSubLevelInsts[MMU_FMT_MAX_SUB_LEVELS] = {0}; 321 322 // Optimizations for release operations. 323 if (pOpParams->bRelease) 324 { 325 // Skip this entry if it is neither a PDE nor marked as a hybrid entry. 326 if ((MMU_ENTRY_STATE_IS_PDE != currEntryState) && 327 !pLevelInst->pStateTracker[entryIndex].bHybrid) 328 { 329 goto check_last_entry; 330 } 331 } 332 333 // Optimizations for fill operations. 334 if (pOpParams->bFill) 335 { 336 const MMU_FILL_TARGET *pTarget = (const MMU_FILL_TARGET *) pOpParams->pOpCtx; 337 338 if (pendingFillCount == 0) 339 entryIndexFillStart = entryIndexFillEnd = entryIndex; 340 341 // 342 // Check if the entire entry's coverage is being filled to 343 // a constant state. 344 // 345 // If this entry is not currently a PDE we can 346 // apply the fill operation directly 347 // at this level and avoid "splitting" the PDE. 348 // 349 // If this entry is currently a PDE we must 350 // clear the entries of the lower levels to free 351 // unused level instances. 352 // 353 if ((pTarget->entryState != currEntryState) && 354 (MMU_ENTRY_STATE_IS_PDE != currEntryState) && 355 (entryVaLo == clippedVaLo) && 356 (entryVaHi == clippedVaHi)) 357 { 358 entryIndexFillEnd = entryIndex; 359 pendingFillCount++; 360 361 // Not the last iteration, keep batching.. 362 if (entryIndex < entryIndexHi) 363 { 364 // 365 // This won't be the last entry, but we'll 366 // do the iteration there 367 // 368 goto check_last_entry; 369 } 370 } 371 372 if (pendingFillCount != 0) 373 { 374 NvU32 progress = 0; 375 NvU32 index; 376 377 // Flush pending fills 378 pWalk->pCb->FillEntries(pWalk->pUserCtx, 379 pLevel->pFmt, 380 pLevelInst->pMemDesc, 381 entryIndexFillStart, 382 entryIndexFillEnd, 383 pTarget->fillState, 384 &progress); 385 386 if (progress != (entryIndexFillEnd - entryIndexFillStart + 1)) 387 { 388 status = NV_ERR_INVALID_STATE; 389 NV_ASSERT_OR_GOTO(0, cleanupIter); 390 } 391 392 for (index = entryIndexFillStart; index <= entryIndexFillEnd; index++) 393 mmuWalkSetEntryState(pLevelInst, index, pTarget->entryState); 394 395 pendingFillCount = 0; 396 } 397 398 // Recheck the state after fill. If nothing to do, continue.. 399 if (pTarget->entryState == mmuWalkGetEntryState(pLevelInst, entryIndex)) 400 { 401 goto check_last_entry; 402 } 403 404 } // End of fill optimizations. 405 406 // Determine the sublevel we need to operate on. 407 status = pOpParams->selectSubLevel(pOpParams->pOpCtx, 408 pLevel, 409 &subLevel, 410 clippedVaLo, 411 clippedVaHi); 412 NV_ASSERT_OR_GOTO(NV_OK == status, cleanupIter); 413 414 // 415 // Allocate the sublevel instances for the current PDE and update the current 416 // Page Dir (i.e. write the PDE into the Page Dir) if needed. 417 // 418 status = _mmuWalkPdeAcquire(pWalk, 419 pOpParams, 420 pLevel, 421 pLevelInst, 422 entryIndex, 423 subLevel, 424 clippedVaLo, 425 clippedVaHi, 426 pSubLevelInsts); 427 NV_ASSERT_OR_GOTO(NV_OK == status, cleanupIter); 428 429 // Release op is done if the target sub-level is absent. 430 if (pOpParams->bRelease && (NULL == pSubLevelInsts[subLevel])) 431 { 432 goto check_last_entry; 433 } 434 435 // 436 // Split sparse PDE's range. 437 // When only a subrange of the original PDE's VA range is being operated 438 // on we sparsify the remaining range lying outside the operational 439 // subrange (clippedVaLo to clippedVaHi) 440 // 441 if (MMU_ENTRY_STATE_SPARSE == currEntryState) 442 { 443 // 444 // Sparsify the lower part of the VA range that outside the operational 445 // subrange. 446 // 447 if (clippedVaLo > entryVaLo) 448 { 449 status = mmuWalkProcessPdes(pWalk, 450 &g_opParamsSparsify, 451 pLevel->subLevels + subLevel, 452 pSubLevelInsts[subLevel], 453 entryVaLo, 454 clippedVaLo - 1); 455 NV_ASSERT_OR_GOTO(NV_OK == status, cleanupIter); 456 } 457 458 // 459 // Sparsify the upper part of the VA range that is outside the operational 460 // subrange. 461 // 462 if (clippedVaHi < entryVaHi) 463 { 464 status = mmuWalkProcessPdes(pWalk, 465 &g_opParamsSparsify, 466 pLevel->subLevels + subLevel, 467 pSubLevelInsts[subLevel], 468 clippedVaHi + 1, 469 entryVaHi); 470 NV_ASSERT_OR_GOTO(NV_OK == status, cleanupIter); 471 } 472 } // Sparse PDE split 473 474 // Resolve potential conflicts in multiple sized page tables 475 if (pLevel->pFmt->numSubLevels != 1 && 476 !pOpParams->bIgnoreSubLevelConflicts) 477 { 478 status = _mmuWalkResolveSubLevelConflicts(pWalk, 479 pOpParams, 480 pLevel, 481 pSubLevelInsts, 482 subLevel, 483 clippedVaLo, 484 clippedVaHi); 485 NV_ASSERT_OR_GOTO(NV_OK == status, cleanupIter); 486 } 487 488 status = pOpParams->opFunc(pWalk, 489 pOpParams, 490 pLevel->subLevels + subLevel, 491 pSubLevelInsts[subLevel], 492 clippedVaLo, 493 clippedVaHi); 494 495 if (NV_ERR_MORE_PROCESSING_REQUIRED == status) 496 { 497 // 498 // If NV_ERR_MORE_PROCESSING_REQUIRED is returned above, 499 // the recursive MMU Walker would have recursed down one 500 // more level. In this code block, we keep the iteration 501 // going by doing everything the recursion previously did. 502 // 503 status = NV_OK; 504 505 // Save off the current state of iteration for this level 506 pLevel->iterInfo.pLevelInst = pLevelInst; 507 pLevel->iterInfo.vaLo = vaLo; 508 pLevel->iterInfo.vaHi = vaHi; 509 pLevel->iterInfo.vaLevelBase = vaLevelBase; 510 pLevel->iterInfo.entryIndexHi = entryIndexHi; 511 pLevel->iterInfo.entryIndex = entryIndex; 512 pLevel->iterInfo.entryIndexFillStart = entryIndexFillStart; 513 pLevel->iterInfo.entryIndexFillEnd = entryIndexFillEnd; 514 pLevel->iterInfo.pendingFillCount = pendingFillCount; 515 pLevel->iterInfo.entryVaLo = entryVaLo; 516 517 // 518 // Here use variables that would be used in the next recursion downwards. 519 // Calculate new vaLevelBase, entryIndexLo, entryIndexHi, entryIndex 520 // 521 pLevel = pLevel->subLevels + subLevel; 522 523 vaLevelBase = mmuFmtLevelVirtAddrLo(pLevel->pFmt, clippedVaLo); 524 entryIndexLo = mmuFmtVirtAddrToEntryIndex(pLevel->pFmt, clippedVaLo); 525 entryIndexHi = mmuFmtVirtAddrToEntryIndex(pLevel->pFmt, clippedVaHi); 526 527 // Now replace the current stack frame with the frame that is one level down 528 // pLevel replaced above 529 pLevelInst = pSubLevelInsts[subLevel]; 530 vaLo = clippedVaLo; 531 vaHi = clippedVaHi; 532 vaLevelBase = vaLevelBase; 533 entryIndexHi = entryIndexHi; 534 entryIndex = entryIndexLo; 535 entryIndexFillStart = 0; 536 entryIndexFillEnd = 0; 537 pendingFillCount = 0; 538 } 539 else 540 { 541 NV_ASSERT_OR_GOTO(NV_OK == status, cleanupIter); 542 // 543 // If NV_OK is returned above, the recursive MMU Walker would have reached 544 // the target format level and so reached the base case of its recursion. 545 // It would then return from recursive function calls and call pdeRelease 546 // for all levels whose sublevels are done being processed. 547 // 548 549 cleanupIter: 550 // PdeRelease itself immediately since this level does not recurse. 551 #if defined(__GNUC__) && !defined(__clang__) 552 // gcc is falsely reporting entryVaLo; entryVaLo is definitely initialized 553 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" 554 #endif 555 _mmuWalkPdeRelease(pWalk, 556 pOpParams, 557 pLevel, 558 pLevelInst, 559 entryIndex, 560 entryVaLo); 561 562 check_last_entry: 563 // 564 // If the recursive MMU Walker did a continue on the current level, 565 // then it didn't do a pdeRelease of the current level. 566 // Even with the continue, for the current iteration, 567 // if entryIndex == entryIndexHi, then we're done with this level 568 // and need to do a pdeRelease on the next level up since we would 569 // return from the recursion. 570 // 571 572 // 573 // If we're at the original level and entryIndex = entryIndexHi, 574 // then we're done and need to exit the entire loop. 575 // If this is true, we've already done the _mmuWalkPdeRelease: 576 // Either we already called _mmuWalkPdeRelease right before this 577 // or we skipped it from a goto check_last_entry continue. 578 // The MMU Walker is re-entrant and will otherwise pick up on 579 // parent levels when mmuWalkProcessPdes is called on sublevels 580 // 581 if ((pLevel == pLevelOrig) && (entryIndex == entryIndexHi)) 582 { 583 goto done; 584 } 585 586 // 587 // Now restore and finish previous frame(s) 588 // 589 // If this is the last processed sublevel of a level or an error has 590 // previously occurred, pdeRelease the level. 591 // Continue doing so for all parent levels. 592 // Once we're reached a non-finished level, iterate to the next entry. 593 // 594 while (entryIndex == entryIndexHi || status != NV_OK) 595 { 596 // 597 // Now replace the current stack frame with the frame that was one 598 // level above. This should never be NULL, since we'll already have 599 // exited after processing the root level. If it is NULL, we can't 600 // clean up any more anyway, so return immediately. 601 // 602 NV_ASSERT_OR_RETURN(pLevel->pParent != NULL, NV_ERR_INVALID_STATE); 603 604 pLevel = pLevel->pParent; 605 pLevelInst = pLevel->iterInfo.pLevelInst; 606 vaLo = pLevel->iterInfo.vaLo; 607 vaHi = pLevel->iterInfo.vaHi; 608 vaLevelBase = pLevel->iterInfo.vaLevelBase; 609 entryIndexHi = pLevel->iterInfo.entryIndexHi; 610 entryIndex = pLevel->iterInfo.entryIndex; 611 entryIndexFillStart = pLevel->iterInfo.entryIndexFillStart; 612 entryIndexFillEnd = pLevel->iterInfo.entryIndexFillEnd; 613 pendingFillCount = pLevel->iterInfo.pendingFillCount; 614 615 _mmuWalkPdeRelease(pWalk, 616 pOpParams, 617 pLevel, 618 pLevelInst, 619 entryIndex, 620 pLevel->iterInfo.entryVaLo); 621 622 // 623 // If we're at the original level and entryIndex = entryIndexHi, 624 // then we're done and need to exit the entire loop 625 // 626 if ((pLevel == pLevelOrig) && (entryIndex == entryIndexHi)) 627 { 628 goto done; 629 } 630 } 631 632 // 633 // Once the above loop is done and we reach here, then we're 634 // ready to process the next entry in the list. Only iterate here, 635 // not in the overall loop since we may have iterated down in the 636 // above else block and don't want to increment before processing 637 // the first entry on a new lower level. 638 // 639 entryIndex++; 640 } 641 } // per entry loop 642 643 // 644 // If this assertion fails, it is a result of a programming 645 // error in the iterative MMU Walker implementation. We should 646 // have iterated back updwards through the MMU state to the original 647 // level even on failure. 648 // 649 NV_ASSERT_OR_RETURN(pLevel != pLevelOrig, NV_ERR_INVALID_STATE); 650 } 651 done: 652 return status; 653 } 654 else 655 { 656 // Recursive MMU Walker Implementation 657 NV_STATUS status = NV_OK; 658 NvU64 vaLevelBase = mmuFmtLevelVirtAddrLo(pLevel->pFmt, vaLo); 659 NvU32 entryIndexLo = mmuFmtVirtAddrToEntryIndex(pLevel->pFmt, vaLo); 660 NvU32 entryIndexHi = mmuFmtVirtAddrToEntryIndex(pLevel->pFmt, vaHi); 661 NvU32 entryIndex; 662 NvU32 index; 663 NvU32 entryIndexFillStart = 0; 664 NvU32 entryIndexFillEnd; 665 NvU32 pendingFillCount = 0; 666 667 NV_ASSERT_OR_RETURN(NULL != pOpParams, NV_ERR_INVALID_ARGUMENT); 668 669 // Walk over each relevant entry (PDE) within this Page Level 670 for (entryIndex = entryIndexLo; entryIndex <= entryIndexHi; entryIndex++) 671 { 672 const NvU64 entryVaLo = mmuFmtEntryIndexVirtAddrLo(pLevel->pFmt, 673 vaLevelBase, entryIndex); 674 const NvU64 entryVaHi = mmuFmtEntryIndexVirtAddrHi(pLevel->pFmt, 675 vaLevelBase, entryIndex); 676 const NvU64 clippedVaLo = NV_MAX(vaLo, entryVaLo); 677 const NvU64 clippedVaHi = NV_MIN(vaHi, entryVaHi); 678 const MMU_ENTRY_STATE currEntryState = mmuWalkGetEntryState(pLevelInst, entryIndex); 679 NvU32 subLevel = 0; 680 MMU_WALK_LEVEL_INST *pSubLevelInsts[MMU_FMT_MAX_SUB_LEVELS] = {0}; 681 682 // Optimizations for release operations. 683 if (pOpParams->bRelease) 684 { 685 // Skip this entry if it is neither a PDE nor marked as a hybrid entry. 686 if ((MMU_ENTRY_STATE_IS_PDE != currEntryState) && 687 !pLevelInst->pStateTracker[entryIndex].bHybrid) 688 continue; 689 } 690 691 // Optimizations for fill operations. 692 if (pOpParams->bFill) 693 { 694 const MMU_FILL_TARGET *pTarget = (const MMU_FILL_TARGET *) pOpParams->pOpCtx; 695 696 if (pendingFillCount == 0) 697 entryIndexFillStart = entryIndexFillEnd = entryIndex; 698 699 // 700 // Check if the entire entry's coverage is being filled to 701 // a constant state. 702 // 703 // If this entry is not currently a PDE we can 704 // apply the fill operation directly 705 // at this level and avoid "splitting" the PDE. 706 // 707 // If this entry is currently a PDE we must 708 // clear the entries of the lower levels to free 709 // unused level instances. 710 // 711 if ((pTarget->entryState != currEntryState) && 712 (MMU_ENTRY_STATE_IS_PDE != currEntryState) && 713 (entryVaLo == clippedVaLo) && 714 (entryVaHi == clippedVaHi)) 715 { 716 entryIndexFillEnd = entryIndex; 717 pendingFillCount++; 718 719 // Not the last iteration, keep batching.. 720 if (entryIndex < entryIndexHi) 721 continue; 722 } 723 724 if (pendingFillCount != 0) 725 { 726 NvU32 progress = 0; 727 728 // Flush pending fills 729 pWalk->pCb->FillEntries(pWalk->pUserCtx, 730 pLevel->pFmt, 731 pLevelInst->pMemDesc, 732 entryIndexFillStart, 733 entryIndexFillEnd, 734 pTarget->fillState, 735 &progress); 736 737 NV_ASSERT_OR_RETURN( 738 progress == (entryIndexFillEnd - entryIndexFillStart + 1), 739 NV_ERR_INVALID_STATE); 740 741 for (index = entryIndexFillStart; index <= entryIndexFillEnd; index++) 742 mmuWalkSetEntryState(pLevelInst, index, pTarget->entryState); 743 744 pendingFillCount = 0; 745 } 746 747 // Recheck the state after fill. If nothing to do, continue.. 748 if (pTarget->entryState == mmuWalkGetEntryState(pLevelInst, entryIndex)) 749 continue; 750 751 } // End of fill optimizations. 752 753 // Determine the sublevel we need to operate on. 754 status = pOpParams->selectSubLevel(pOpParams->pOpCtx, 755 pLevel, 756 &subLevel, 757 clippedVaLo, 758 clippedVaHi); 759 NV_ASSERT_OR_GOTO(NV_OK == status, cleanup); 760 761 // 762 // Allocate the sublevel instances for the current PDE and update the current 763 // Page Dir (i.e. write the PDE into the Page Dir) if needed. 764 // 765 status = _mmuWalkPdeAcquire(pWalk, 766 pOpParams, 767 pLevel, 768 pLevelInst, 769 entryIndex, //PDE index being processed 770 subLevel, //Sub level processed within the PDE 771 clippedVaLo, //Low VA for the PDE 772 clippedVaHi, //High VA for the PDE 773 pSubLevelInsts); 774 NV_ASSERT_OR_GOTO(NV_OK == status, cleanup); 775 776 // Release op is done if the target sub-level is absent. 777 if (pOpParams->bRelease && (NULL == pSubLevelInsts[subLevel])) 778 { 779 continue; 780 } 781 782 // 783 // Split sparse PDE's range. 784 // When only a subrange of the original PDE's VA range is being operated 785 // on we sparsify the remaining range lying outside the operational 786 // subrange (clippedVaLo to clippedVaHi) 787 // 788 if (MMU_ENTRY_STATE_SPARSE == currEntryState) 789 { 790 // 791 // Sparsify the lower part of the VA range that outside the operational 792 // subrange. 793 // 794 if (clippedVaLo > entryVaLo) 795 { 796 status = g_opParamsSparsify.opFunc(pWalk, 797 &g_opParamsSparsify, 798 pLevel->subLevels + subLevel, 799 pSubLevelInsts[subLevel], 800 entryVaLo, 801 clippedVaLo - 1); 802 NV_ASSERT_OR_GOTO(NV_OK == status, cleanup); 803 } 804 805 // 806 // Sparsify the upper part of the VA range that is outside the operational 807 // subrange. 808 // 809 if (clippedVaHi < entryVaHi) 810 { 811 status = g_opParamsSparsify.opFunc(pWalk, 812 &g_opParamsSparsify, 813 pLevel->subLevels + subLevel, 814 pSubLevelInsts[subLevel], 815 clippedVaHi + 1, 816 entryVaHi); 817 NV_ASSERT_OR_GOTO(NV_OK == status, cleanup); 818 } 819 } // Sparse PDE split 820 821 // Resolve potential conflicts in multiple sized page tables 822 if (pLevel->pFmt->numSubLevels != 1 && 823 !pOpParams->bIgnoreSubLevelConflicts) 824 { 825 status = _mmuWalkResolveSubLevelConflicts(pWalk, 826 pOpParams, 827 pLevel, 828 pSubLevelInsts, 829 subLevel, 830 clippedVaLo, 831 clippedVaHi); 832 NV_ASSERT_OR_GOTO(NV_OK == status, cleanup); 833 } 834 835 // Recurse to update the next level for this PDE 836 status = pOpParams->opFunc(pWalk, 837 pOpParams, 838 pLevel->subLevels + subLevel, 839 pSubLevelInsts[subLevel], 840 clippedVaLo, 841 clippedVaHi); 842 NV_ASSERT_OR_GOTO(NV_OK == status, cleanup); 843 844 cleanup: 845 // Free unused sublevel instances. Clear the PDE if all sublevels are deallocated. 846 _mmuWalkPdeRelease(pWalk, 847 pOpParams, 848 pLevel, 849 pLevelInst, 850 entryIndex, 851 entryVaLo); 852 853 // Stop processing PDEs if we are in error state. 854 if (NV_OK != status) 855 break; 856 } // per entry loop 857 return status; 858 } 859 860 861 } 862 863 /*! 864 * @brief This function allocates the root Page Directory and commits it the 865 * related channels. 866 * 867 * @param[in] vaLo The lower end of the Virtual Address range that is 868 * being processed. 869 * @param[in] vaHi The upper end of the Virtual Address range that is 870 * being processed 871 * 872 * @param[in] bCommit Force commit the PDB 873 * 874 * @return NV_OK of allocating this level succeeds. 875 * Other errors, if not. 876 */ 877 NV_STATUS 878 mmuWalkRootAcquire 879 ( 880 MMU_WALK *pWalk, 881 NvU64 vaLo, 882 NvU64 vaHi, 883 NvBool bCommit 884 ) 885 { 886 MMU_WALK_LEVEL_INST *pLevelInst = NULL; 887 NvBool bChanged = NV_FALSE; 888 889 // Acquire root level instance memory. 890 NV_ASSERT_OK_OR_RETURN( 891 _mmuWalkLevelInstAcquire(pWalk, &pWalk->root, vaLo, vaHi, 892 NV_TRUE, NV_FALSE, bCommit, &bChanged, 893 &pLevelInst, NV_FALSE /*bInitNv4k*/)); 894 895 // We check pLevelInst to catch the corner case, where Commit() is called before PDB allocation. 896 if (bChanged || (bCommit && pLevelInst)) 897 { 898 NvBool bDone; 899 900 // Bind this Page Dir to the affected channels 901 bDone = pWalk->pCb->UpdatePdb(pWalk->pUserCtx, pWalk->root.pFmt, 902 pLevelInst->pMemDesc, NV_FALSE); 903 NV_ASSERT_OR_RETURN(bDone, NV_ERR_INVALID_STATE); 904 } 905 906 return NV_OK; 907 } 908 909 /*! 910 * @brief This function releases the root Page Directory 911 */ 912 void 913 mmuWalkRootRelease 914 ( 915 MMU_WALK *pWalk 916 ) 917 { 918 MMU_WALK_LEVEL_INST *pLevelInst = pWalk->root.pInstances; 919 if (NULL != pLevelInst) 920 { 921 // Free the level instance if the entry ref count is 0. 922 if ((0 == pLevelInst->numValid + pLevelInst->numSparse) && 923 (0 == pLevelInst->numReserved)) 924 { 925 NvBool bDone; 926 927 // Commit NULL root page directory (clear usage). 928 bDone = pWalk->pCb->UpdatePdb(pWalk->pUserCtx, pWalk->root.pFmt, NULL, NV_FALSE); 929 NV_ASSERT(bDone); 930 931 // Free unused root memory. 932 _mmuWalkLevelInstRelease(pWalk, &pWalk->root, pLevelInst); 933 } 934 } 935 } 936 937 /*! 938 * @brief This function updates the @ref MMU_WALK_LEVEL_INST::pStateTracker for an 939 * entry specified by the entryIndex. 940 * 941 * @param[in] entryIndex Index of the entry whose state needs to be updated. 942 * @param[in] newEntryState The new state of the entry specified by entryIndex 943 */ 944 void 945 mmuWalkSetEntryState 946 ( 947 MMU_WALK_LEVEL_INST *pLevelInst, 948 NvU32 entryIndex, 949 MMU_ENTRY_STATE newEntryState 950 ) 951 { 952 MMU_ENTRY_STATE currEntryState = mmuWalkGetEntryState(pLevelInst, entryIndex); 953 954 // Decrement ref count for current state 955 switch (currEntryState) 956 { 957 case MMU_ENTRY_STATE_IS_PTE: 958 case MMU_ENTRY_STATE_IS_PDE: 959 NV_ASSERT(0 != pLevelInst->numValid); 960 pLevelInst->numValid--; 961 break; 962 case MMU_ENTRY_STATE_SPARSE: 963 NV_ASSERT(0 != pLevelInst->numSparse); 964 pLevelInst->numSparse--; 965 break; 966 case MMU_ENTRY_STATE_NV4K: 967 NV_ASSERT(0 != pLevelInst->numNv4k); 968 pLevelInst->numNv4k--; 969 break; 970 case MMU_ENTRY_STATE_INVALID: 971 break; 972 default: 973 NV_ASSERT(0); 974 } 975 976 // Increment new state ref count 977 switch (newEntryState) 978 { 979 case MMU_ENTRY_STATE_IS_PTE: 980 case MMU_ENTRY_STATE_IS_PDE: 981 pLevelInst->numValid++; 982 break; 983 case MMU_ENTRY_STATE_SPARSE: 984 pLevelInst->numSparse++; 985 break; 986 case MMU_ENTRY_STATE_NV4K: 987 pLevelInst->numNv4k++; 988 break; 989 case MMU_ENTRY_STATE_INVALID: 990 break; 991 default: 992 NV_ASSERT(0); 993 } 994 995 // Commit new state. 996 pLevelInst->pStateTracker[entryIndex].state = newEntryState; 997 } 998 999 void 1000 mmuWalkSetEntryReserved 1001 ( 1002 MMU_WALK_LEVEL_INST *pLevelInst, 1003 NvU32 entryIndex, 1004 NvBool bReserved 1005 ) 1006 { 1007 if (pLevelInst->pStateTracker[entryIndex].bReserved) 1008 { 1009 NV_ASSERT(0 != pLevelInst->numReserved); 1010 pLevelInst->numReserved--; 1011 } 1012 if (bReserved) 1013 { 1014 pLevelInst->numReserved++; 1015 } 1016 pLevelInst->pStateTracker[entryIndex].bReserved = bReserved; 1017 } 1018 1019 void 1020 mmuWalkSetEntryHybrid 1021 ( 1022 MMU_WALK_LEVEL_INST *pLevelInst, 1023 NvU32 entryIndex, 1024 NvBool bHybrid 1025 ) 1026 { 1027 if (pLevelInst->pStateTracker[entryIndex].bHybrid) 1028 { 1029 NV_ASSERT(0 != pLevelInst->numHybrid); 1030 pLevelInst->numHybrid--; 1031 } 1032 if (bHybrid) 1033 { 1034 pLevelInst->numHybrid++; 1035 } 1036 pLevelInst->pStateTracker[entryIndex].bHybrid = bHybrid; 1037 } 1038 1039 /** 1040 * @brief Calculate target entry indices that covers VA range for 1041 * source entries 1042 * 1043 * @details For example, entry 1 in 64K PT is aligned to 4K PT entry 0 to 1044 * 15. 4K PTE 1 to 18 will be covered by 64K PTE 0 to 1. 1045 * 1046 * It is introduced by NV4K encoding. Updating big page table 1047 * according to small page table requires index transfering 1048 * 1049 * @param[in] pPageFmtIn Source format 1050 * @param[in] indexLoIn The index lower in 1051 * @param[in] indexHiIn The index higher in 1052 * @param[in] pPageFmtOut Target format 1053 * @param[out] pIndexLoOut The lower result index 1054 * @param[out] pIndexHiOut The higher result index 1055 */ 1056 void 1057 mmuFmtCalcAlignedEntryIndices 1058 ( 1059 const MMU_FMT_LEVEL *pPageFmtIn, 1060 const NvU32 indexLoIn, 1061 const NvU32 indexHiIn, 1062 const MMU_FMT_LEVEL *pPageFmtOut, 1063 NvU32 *pIndexLoOut, 1064 NvU32 *pIndexHiOut 1065 ) 1066 { 1067 NvU64 pageSizeIn, pageSizeOut; 1068 NvU64 pageSizeRatio; 1069 NV_ASSERT(pIndexLoOut != NULL && pIndexHiOut != NULL); 1070 NV_ASSERT(pPageFmtIn != NULL && pPageFmtOut != NULL); 1071 1072 pageSizeIn = mmuFmtLevelPageSize(pPageFmtIn); 1073 pageSizeOut = mmuFmtLevelPageSize(pPageFmtOut); 1074 1075 if (pageSizeIn < pageSizeOut) 1076 { 1077 pageSizeRatio = pageSizeOut / pageSizeIn; 1078 NV_ASSERT(NvU64_HI32(pageSizeRatio) == 0); 1079 *pIndexLoOut = (NvU32)(indexLoIn / pageSizeRatio); 1080 *pIndexHiOut = (NvU32)(indexHiIn / pageSizeRatio); 1081 } 1082 else 1083 { 1084 pageSizeRatio = pageSizeIn / pageSizeOut; 1085 NV_ASSERT(NvU64_HI32(pageSizeRatio) == 0); 1086 *pIndexLoOut = (NvU32)(indexLoIn * pageSizeRatio); 1087 *pIndexHiOut = (NvU32)((indexHiIn + 1) * pageSizeRatio - 1); 1088 } 1089 } 1090 1091 /*----------------------------Static Functions--------------------------------*/ 1092 1093 static NV_STATUS 1094 _mmuWalkLevelInit 1095 ( 1096 const MMU_WALK *pWalk, 1097 MMU_WALK_LEVEL *pParent, 1098 const MMU_FMT_LEVEL *pLevelFmt, 1099 MMU_WALK_LEVEL *pLevel 1100 ) 1101 { 1102 // Init pointers. 1103 pLevel->pFmt = pLevelFmt; 1104 pLevel->pParent = pParent; 1105 1106 if (0 != pLevelFmt->numSubLevels) 1107 { 1108 NvU32 subLevel; 1109 const NvU32 size = pLevelFmt->numSubLevels * (NvU32)sizeof(*pLevel->subLevels); 1110 1111 // Allocate sub-level array. 1112 pLevel->subLevels = portMemAllocNonPaged(size); 1113 if (pLevel->subLevels == NULL) 1114 return NV_ERR_NO_MEMORY; 1115 1116 portMemSet(pLevel->subLevels, 0, size); 1117 1118 // Recursively create each sub-level. 1119 for (subLevel = 0; subLevel < pLevelFmt->numSubLevels; ++subLevel) 1120 { 1121 NV_ASSERT_OK_OR_RETURN( 1122 _mmuWalkLevelInit(pWalk, pLevel, pLevelFmt->subLevels + subLevel, 1123 pLevel->subLevels + subLevel)); 1124 } 1125 } 1126 1127 return NV_OK; 1128 } 1129 1130 static void 1131 _mmuWalkLevelDestroy 1132 ( 1133 const MMU_WALK *pWalk, 1134 MMU_WALK_LEVEL *pLevel 1135 ) 1136 { 1137 NvU32 subLevel; 1138 1139 if (NULL != pLevel->subLevels) 1140 { 1141 // Recursively destroy each sub-level. 1142 for (subLevel = 0; subLevel < pLevel->pFmt->numSubLevels; ++subLevel) 1143 { 1144 _mmuWalkLevelDestroy(pWalk, pLevel->subLevels + subLevel); 1145 } 1146 // Free sub-level array. 1147 portMemFree(pLevel->subLevels); 1148 } 1149 1150 // All level instance memory should be freed already. 1151 NV_ASSERT(NULL == pLevel->pInstances); 1152 } 1153 1154 /** 1155 * @brief Resolve upcoming state conflicts before mmu walk operations 1156 * 1157 * @example Say we are to mmuWalkMap VA range [vaLo, vaHi] on small PT. 1158 * Assume we have 4K PT and 64K PT as our small PT and big PT, and [vaLo, vaHi] 1159 * is a strict subset of VA range covered by BigPTE[1, 3] and SmallPTE[18, 61]. 1160 * Let's say BigPTE[1, 3] are sparse right now. 1161 * 1162 * To resolve the conflict, we need to preserve sparse state for part of the 1163 * VA range that is not going to be mapped. We need to move those states from 1164 * BigPT to SmallPT. 1165 * 1166 * Before: 1167 * BigPTE[1, 3]: sparse, SmallPTE[16 - 63]: invalid 1168 * (BigPTE[1, 3] and SmallPTE[16 - 63] are VA aligned) 1169 * After: 1170 * BigPTE[1, 3]: invalid, SmallPTE[16 - 17]: sparse 1171 * SmallPTE[18 - 61]: invalid, will later be mapped 1172 * SmallPTE[62 - 63]: sparse 1173 * 1174 * @example If we are to mmuWalkMap on big PT instead of samll PT, 1175 * and sparse state was on small PT, we just need to invalidate the small PTEs. 1176 * 1177 * Before: 1178 * BigPTE[1, 3]: invalid, 1179 * SmallPTE[16 - 63]: sparse 1180 * After: 1181 * BigPTE[1, 3]: invalid, will later be mapped 1182 * SmallPTE[16 - 63]: invalid 1183 * 1184 * @return NV_OK on success, no other values for now 1185 */ 1186 static NV_STATUS NV_NOINLINE 1187 _mmuWalkResolveSubLevelConflicts 1188 ( 1189 const MMU_WALK *pWalk, 1190 const MMU_WALK_OP_PARAMS *pOpParams, 1191 MMU_WALK_LEVEL *pLevel, 1192 MMU_WALK_LEVEL_INST *pSubLevelInsts[], 1193 NvU32 subLevelIdx, 1194 NvU64 clippedVaLo, 1195 NvU64 clippedVaHi 1196 ) 1197 { 1198 NvU32 i = 0; 1199 NvU32 progress = 0; 1200 NV_STATUS status = NV_OK; 1201 NvBool bConflictLo = NV_FALSE; 1202 NvBool bConflictHi = NV_FALSE; 1203 const MMU_FMT_LEVEL *pLevelFmtBig = pLevel->subLevels[0].pFmt; 1204 const MMU_FMT_LEVEL *pLevelFmtSmall = pLevel->subLevels[1].pFmt; 1205 MMU_WALK_LEVEL_INST *pLevelBigInst = pSubLevelInsts[0]; 1206 MMU_WALK_LEVEL_INST *pLevelSmallInst = pSubLevelInsts[1]; 1207 // Entry indicies for target page table 1208 NvU32 entryIndexLo, entryIndexHi; 1209 // Entry indicies involved in both page tables 1210 NvU32 indexLo_Small, indexHi_Small, indexLo_Big, indexHi_Big; 1211 1212 if (0 == subLevelIdx) 1213 { 1214 entryIndexLo = mmuFmtVirtAddrToEntryIndex(pLevelFmtBig, clippedVaLo); 1215 entryIndexHi = mmuFmtVirtAddrToEntryIndex(pLevelFmtBig, clippedVaHi); 1216 indexLo_Big = entryIndexLo; 1217 indexHi_Big = entryIndexHi; 1218 mmuFmtCalcAlignedEntryIndices(pLevelFmtBig, indexLo_Big, indexHi_Big, 1219 pLevelFmtSmall, &indexLo_Small, &indexHi_Small); 1220 } 1221 else 1222 { 1223 entryIndexLo = mmuFmtVirtAddrToEntryIndex(pLevelFmtSmall, clippedVaLo); 1224 entryIndexHi = mmuFmtVirtAddrToEntryIndex(pLevelFmtSmall, clippedVaHi); 1225 mmuFmtCalcAlignedEntryIndices(pLevelFmtSmall, entryIndexLo, 1226 entryIndexHi, pLevelFmtBig, &indexLo_Big, &indexHi_Big); 1227 mmuFmtCalcAlignedEntryIndices(pLevelFmtBig, indexLo_Big, indexHi_Big, 1228 pLevelFmtSmall, &indexLo_Small, &indexHi_Small); 1229 } 1230 1231 // check if involved Small PTEs need to be sparsified 1232 if (1 == subLevelIdx && NULL != pLevelSmallInst && NULL != pLevelBigInst) 1233 { 1234 // check lower part 1235 MMU_ENTRY_STATE entryStateBig; 1236 entryStateBig = mmuWalkGetEntryState(pLevelBigInst, indexLo_Big); 1237 bConflictLo = (MMU_ENTRY_STATE_SPARSE == entryStateBig); 1238 1239 // check higher part 1240 entryStateBig = mmuWalkGetEntryState(pLevelBigInst, indexHi_Big); 1241 bConflictHi = (MMU_ENTRY_STATE_SPARSE == entryStateBig); 1242 } 1243 1244 if (bConflictLo && entryIndexLo > indexLo_Small) 1245 { 1246 // sparsify lower range of entries 1247 pWalk->pCb->FillEntries(pWalk->pUserCtx, pLevelFmtSmall, 1248 pLevelSmallInst->pMemDesc, indexLo_Small, entryIndexLo - 1, 1249 MMU_WALK_FILL_SPARSE, &progress); 1250 NV_ASSERT_OR_RETURN(progress == entryIndexLo - indexLo_Small, 1251 NV_ERR_INVALID_STATE); 1252 1253 for (i = indexLo_Small; i <= entryIndexLo - 1; i++) 1254 { 1255 mmuWalkSetEntryState(pLevelSmallInst, i, MMU_ENTRY_STATE_SPARSE); 1256 } 1257 } 1258 1259 if (bConflictHi && entryIndexHi < indexHi_Small) 1260 { 1261 // sparsify higher range of entries 1262 pWalk->pCb->FillEntries(pWalk->pUserCtx, pLevelFmtSmall, 1263 pLevelSmallInst->pMemDesc, entryIndexHi + 1, indexHi_Small, 1264 MMU_WALK_FILL_SPARSE, &progress); 1265 NV_ASSERT_OR_RETURN(progress == indexHi_Small - entryIndexHi, 1266 NV_ERR_INVALID_STATE); 1267 1268 for (i = entryIndexHi + 1; i <= indexHi_Small; i++) 1269 { 1270 mmuWalkSetEntryState(pLevelSmallInst, i, MMU_ENTRY_STATE_SPARSE); 1271 } 1272 } 1273 1274 // invalidate the VA range in the other page table 1275 if (NULL != pLevelSmallInst && NULL != pLevelBigInst) 1276 { 1277 NvU32 indexLo_tmp, indexHi_tmp; 1278 const MMU_FMT_LEVEL *pSubLevelFmt; 1279 MMU_WALK_LEVEL_INST *pSubLevelInst; 1280 1281 if (subLevelIdx == 0) 1282 { 1283 indexLo_tmp = indexLo_Small; 1284 indexHi_tmp = indexHi_Small; 1285 pSubLevelFmt = pLevelFmtSmall; 1286 pSubLevelInst = pLevelSmallInst; 1287 } 1288 else 1289 { 1290 indexLo_tmp = indexLo_Big; 1291 indexHi_tmp = indexHi_Big; 1292 pSubLevelFmt = pLevelFmtBig; 1293 pSubLevelInst = pLevelBigInst; 1294 } 1295 1296 pWalk->pCb->FillEntries(pWalk->pUserCtx, pSubLevelFmt, 1297 pSubLevelInst->pMemDesc, indexLo_tmp, indexHi_tmp, 1298 MMU_WALK_FILL_INVALID, &progress); 1299 NV_ASSERT_OR_RETURN(progress == indexHi_tmp - indexLo_tmp + 1, 1300 NV_ERR_INVALID_STATE); 1301 1302 for (i = indexLo_tmp; i <= indexHi_tmp; i++) 1303 { 1304 mmuWalkSetEntryState(pSubLevelInst, i, MMU_ENTRY_STATE_INVALID); 1305 } 1306 } 1307 1308 return status; 1309 } 1310 1311 /*! 1312 * Lazily allocates and initializes a level instance. 1313 */ 1314 static NV_STATUS 1315 _mmuWalkLevelInstAcquire 1316 ( 1317 const MMU_WALK *pWalk, 1318 MMU_WALK_LEVEL *pLevel, 1319 const NvU64 vaLo, 1320 const NvU64 vaHi, 1321 const NvBool bTarget, 1322 const NvBool bRelease, 1323 const NvBool bCommit, 1324 NvBool *pBChanged, 1325 MMU_WALK_LEVEL_INST **ppLevelInst, 1326 const NvBool bInitNv4k 1327 ) 1328 { 1329 NV_STATUS status; 1330 MMU_WALK_MEMDESC *pOldMem; 1331 NvU32 oldSize; 1332 MMU_WALK_LEVEL_INST *pLevelInst = NULL; 1333 NvBool bNew = NV_FALSE; 1334 1335 // Lookup level instance. 1336 if (NV_OK != btreeSearch(vaLo, (NODE**)&pLevelInst, (NODE*)pLevel->pInstances)) 1337 { 1338 NvU32 numBytes; 1339 1340 if (!bTarget || bRelease) 1341 { 1342 // Skip missing non-target instances. 1343 *ppLevelInst = NULL; 1344 return NV_OK; 1345 } 1346 1347 // We only call Commit() on already allocated page directory. 1348 NV_ASSERT_OR_RETURN(!bCommit, NV_ERR_INVALID_STATE); 1349 1350 // Mark as newly allocated. 1351 bNew = NV_TRUE; 1352 1353 // Allocate missing target instances. 1354 pLevelInst = portMemAllocNonPaged(sizeof(*pLevelInst)); 1355 status = (pLevelInst == NULL) ? NV_ERR_NO_MEMORY : NV_OK; 1356 NV_ASSERT_OR_GOTO(NV_OK == status, done); 1357 portMemSet(pLevelInst, 0, sizeof(*pLevelInst)); 1358 1359 // Insert the new node into the tree of instances for this page level. 1360 pLevelInst->node.keyStart = mmuFmtLevelVirtAddrLo(pLevel->pFmt, vaLo); 1361 pLevelInst->node.keyEnd = mmuFmtLevelVirtAddrHi(pLevel->pFmt, vaHi); 1362 1363 status = btreeInsert(&pLevelInst->node, (NODE**)&pLevel->pInstances); 1364 NV_ASSERT_OR_GOTO(NV_OK == status, done); 1365 1366 // Allocate entry tracker. 1367 numBytes = mmuFmtLevelEntryCount(pLevel->pFmt) * sizeof(MMU_ENTRY_INFO); 1368 pLevelInst->pStateTracker = portMemAllocNonPaged(numBytes); 1369 status = (pLevelInst->pStateTracker == NULL) ? NV_ERR_NO_MEMORY : NV_OK; 1370 NV_ASSERT_OR_GOTO(NV_OK == status, done); 1371 portMemSet(pLevelInst->pStateTracker, 0, numBytes); 1372 if (bInitNv4k) 1373 { 1374 NvU32 i; 1375 for (i = 0; i < mmuFmtLevelEntryCount(pLevel->pFmt); ++i) 1376 { 1377 mmuWalkSetEntryState(pLevelInst, i, MMU_ENTRY_STATE_NV4K); 1378 } 1379 } 1380 } 1381 1382 // Save original memory info. 1383 pOldMem = pLevelInst->pMemDesc; 1384 oldSize = pLevelInst->memSize; 1385 1386 // Allocate (possibly reallocating) memory for this level instance. 1387 status = pWalk->pCb->LevelAlloc(pWalk->pUserCtx, 1388 pLevel->pFmt, 1389 mmuFmtLevelVirtAddrLo(pLevel->pFmt, vaLo), 1390 vaHi, 1391 bTarget, 1392 &pLevelInst->pMemDesc, 1393 &pLevelInst->memSize, 1394 pBChanged); 1395 NV_ASSERT_OR_GOTO(NV_OK == status, done); 1396 1397 if (*pBChanged) 1398 { 1399 const NvU32 entryIndexLo = oldSize / pLevel->pFmt->entrySize; 1400 const NvU32 entryIndexHi = (pLevelInst->memSize / pLevel->pFmt->entrySize) - 1; 1401 NvU32 progress = 0; 1402 1403 // 1404 // default state for new entries 1405 // NV4K for big page table if ATS is enabled 1406 // 1407 MMU_WALK_FILL_STATE newEntryState = bInitNv4k ? MMU_WALK_FILL_NV4K : 1408 MMU_WALK_FILL_INVALID; 1409 1410 NV_ASSERT(NULL != pLevelInst->pMemDesc); 1411 NV_ASSERT(entryIndexLo <= entryIndexHi); 1412 1413 // We only call Commit() on already allocated page directory. 1414 if (bCommit) 1415 { 1416 status = NV_ERR_INVALID_STATE; 1417 NV_ASSERT_OR_GOTO(NV_OK == status, done); 1418 } 1419 1420 // Copy old entries from old to new. 1421 if (entryIndexLo > 0) 1422 { 1423 NV_ASSERT(NULL != pWalk->pCb->CopyEntries); 1424 pWalk->pCb->CopyEntries(pWalk->pUserCtx, 1425 pLevel->pFmt, 1426 pOldMem, 1427 pLevelInst->pMemDesc, 1428 0, 1429 entryIndexLo - 1, 1430 &progress); 1431 NV_ASSERT(progress == entryIndexLo); 1432 1433 // Free old memory. 1434 pWalk->pCb->LevelFree(pWalk->pUserCtx, pLevel->pFmt, 1435 pLevelInst->node.keyStart, pOldMem); 1436 } 1437 1438 if(pWalk->bInvalidateOnReserve) 1439 { 1440 // Clear new entries to invalid. 1441 pWalk->pCb->FillEntries(pWalk->pUserCtx, 1442 pLevel->pFmt, 1443 pLevelInst->pMemDesc, 1444 entryIndexLo, 1445 entryIndexHi, 1446 newEntryState, 1447 &progress); 1448 NV_ASSERT(progress == entryIndexHi - entryIndexLo + 1); 1449 } 1450 } 1451 else 1452 { 1453 // Ensure hasn't changed. 1454 NV_ASSERT(pOldMem == pLevelInst->pMemDesc && oldSize == pLevelInst->memSize); 1455 } 1456 1457 // Commit return. 1458 *ppLevelInst = pLevelInst; 1459 1460 done: 1461 // Cleanup newly allocated instance on failure. 1462 if (NV_OK != status && 1463 bNew && NULL != pLevelInst) 1464 { 1465 _mmuWalkLevelInstRelease(pWalk, pLevel, pLevelInst); 1466 } 1467 return status; 1468 } 1469 1470 /*! 1471 * Frees an unused level instance. 1472 */ 1473 static void 1474 _mmuWalkLevelInstRelease 1475 ( 1476 const MMU_WALK *pWalk, 1477 MMU_WALK_LEVEL *pLevel, 1478 MMU_WALK_LEVEL_INST *pLevelInst 1479 ) 1480 { 1481 NV_ASSERT(0 == pLevelInst->numValid); 1482 NV_ASSERT(0 == pLevelInst->numReserved); 1483 // Unlink. 1484 btreeUnlink(&pLevelInst->node, (NODE**)&pLevel->pInstances); 1485 // Free. 1486 if (NULL != pLevelInst->pMemDesc) 1487 { 1488 pWalk->pCb->LevelFree(pWalk->pUserCtx, pLevel->pFmt, pLevelInst->node.keyStart, 1489 pLevelInst->pMemDesc); 1490 } 1491 portMemFree(pLevelInst->pStateTracker); 1492 portMemFree(pLevelInst); 1493 } 1494 1495 /*! 1496 * This function is used to allocate a sublevel MMU_WALK_LEVEL_INST 1497 * for a given PDE. If the sublevel allocation succeeds, the parent Level is 1498 * updated. 1499 */ 1500 static NV_STATUS NV_NOINLINE 1501 _mmuWalkPdeAcquire 1502 ( 1503 const MMU_WALK *pWalk, 1504 const MMU_WALK_OP_PARAMS *pOpParams, 1505 MMU_WALK_LEVEL *pLevel, 1506 MMU_WALK_LEVEL_INST *pLevelInst, 1507 const NvU32 entryIndex, 1508 const NvU32 subLevel, 1509 const NvU64 vaLo, 1510 const NvU64 vaHi, 1511 MMU_WALK_LEVEL_INST *pSubLevelInsts[] 1512 ) 1513 { 1514 NV_STATUS status = NV_OK; 1515 NvBool bCommit = NV_FALSE; 1516 NvU32 i; 1517 const MMU_WALK_MEMDESC *pSubMemDescs[MMU_FMT_MAX_SUB_LEVELS] = {0}; 1518 NvU64 vaLimit = vaHi; 1519 const NvU32 numSubLevels = pLevel->pFmt->numSubLevels; 1520 MMU_WALK_LEVEL_INST *pCurSubLevelInsts[MMU_FMT_MAX_SUB_LEVELS] = {0}; 1521 1522 // 1523 // Determine minimum VA limit of existing sub-levels. 1524 // This is required to keep parallel partial page tables in sync. 1525 // MMU HW that supports partial size tables selects the size in the 1526 // parent PDE so each sub-level *MUST* be the same partial size 1527 // once allocated. 1528 // 1529 if (numSubLevels > 1) 1530 { 1531 for (i = 0; i < numSubLevels; ++i) 1532 { 1533 // Lookup sub-level instance. 1534 if (NV_OK == btreeSearch(vaLo, (NODE**)&pCurSubLevelInsts[i], 1535 (NODE*)pLevel->subLevels[i].pInstances)) 1536 { 1537 const MMU_FMT_LEVEL *pSubLevelFmt = pLevel->pFmt->subLevels + i; 1538 const NvU64 minVaLimit = 1539 mmuFmtLevelVirtAddrLo(pSubLevelFmt, vaLo) + 1540 (pCurSubLevelInsts[i]->memSize / 1541 pSubLevelFmt->entrySize * 1542 mmuFmtLevelPageSize(pSubLevelFmt)) - 1; 1543 1544 vaLimit = NV_MAX(vaLimit, minVaLimit); 1545 } 1546 } 1547 } 1548 1549 // 1550 // the loop was reversed for NV4K, if there are multiple sublevels 1551 // handling small PT first, then the big PT 1552 // 1553 for (i = numSubLevels; i > 0; --i) 1554 { 1555 NvBool bChanged = NV_FALSE; 1556 NvU32 subLevelIdx = i - 1; 1557 NvBool bTarget = (subLevelIdx == subLevel); 1558 NvBool bInitNv4k = NV_FALSE; 1559 1560 // 1561 // If NV4K is required (when ATS is enabled), acquire 64K PT 1562 // whenever the 4K PT has been acquired and 64K PT was not 1563 // there 1564 // 1565 if (pWalk->flags.bAtsEnabled && subLevelIdx == 0 && 1566 numSubLevels > 1 && !pOpParams->bRelease) 1567 { 1568 if (pSubLevelInsts[1] != NULL) 1569 { 1570 bTarget = NV_TRUE; 1571 } 1572 if (pSubLevelInsts[0] == NULL) 1573 { 1574 bInitNv4k = NV_TRUE; 1575 } 1576 } 1577 1578 // Acquire sub-level instance. 1579 NV_ASSERT_OK_OR_RETURN( 1580 _mmuWalkLevelInstAcquire(pWalk, pLevel->subLevels + subLevelIdx, 1581 vaLo, vaLimit, bTarget, 1582 pOpParams->bRelease, pOpParams->bCommit, 1583 &bChanged, &pSubLevelInsts[subLevelIdx], 1584 bInitNv4k)); 1585 if (NULL == pSubLevelInsts[subLevelIdx]) 1586 { 1587 // Skip missing non-target instances. 1588 NV_ASSERT(pOpParams->bRelease || !bTarget); 1589 continue; 1590 } 1591 1592 // Track info for commit. 1593 bCommit |= bChanged; 1594 pSubMemDescs[subLevelIdx] = pSubLevelInsts[subLevelIdx]->pMemDesc; 1595 } 1596 1597 // DEBUG assert 1598 if (pWalk->flags.bAtsEnabled && 1599 numSubLevels > 1 && 1600 pSubLevelInsts[1] != NULL && 1601 pSubLevelInsts[0] == NULL) 1602 { 1603 NV_ASSERT(0); 1604 } 1605 1606 if (bCommit || pOpParams->bCommit) 1607 { 1608 NvBool bDone; 1609 1610 // Update the current pde 1611 bDone = pWalk->pCb->UpdatePde(pWalk->pUserCtx, pLevel->pFmt, pLevelInst->pMemDesc, 1612 entryIndex, pSubMemDescs); 1613 NV_ASSERT_OR_RETURN(bDone, NV_ERR_INVALID_STATE); 1614 1615 // Track entry as a PDE. 1616 mmuWalkSetEntryState(pLevelInst, entryIndex, MMU_ENTRY_STATE_IS_PDE); 1617 } 1618 1619 return status; 1620 } 1621 1622 /*! 1623 * Frees the sub levels of the PDE passed in if thier refcount is 0. It 1624 * also clears the PDE if both sublevels are released. 1625 */ 1626 static void NV_NOINLINE 1627 _mmuWalkPdeRelease 1628 ( 1629 const MMU_WALK *pWalk, 1630 const MMU_WALK_OP_PARAMS *pOpParams, 1631 MMU_WALK_LEVEL *pLevel, 1632 MMU_WALK_LEVEL_INST *pLevelInst, 1633 const NvU32 entryIndex, 1634 const NvU64 entryVaLo 1635 ) 1636 { 1637 MMU_WALK_LEVEL_INST *pSubLevelInsts[MMU_FMT_MAX_SUB_LEVELS] = {0}; 1638 const MMU_WALK_MEMDESC *pSubMemDescs[MMU_FMT_MAX_SUB_LEVELS] = {0}; 1639 NvBool bChanged = NV_FALSE; 1640 NvU32 subLevel, i; 1641 MMU_ENTRY_STATE state = MMU_ENTRY_STATE_INVALID; 1642 1643 // Apply target state if this is a fill operation. 1644 if (pOpParams->bFill) 1645 { 1646 const MMU_FILL_TARGET *pTarget = (const MMU_FILL_TARGET *)pOpParams->pOpCtx; 1647 state = pTarget->entryState; 1648 } 1649 1650 // 1651 // Loop through the sublevels and free up those with 0 ref count. 1652 // We operate on a temp copy of the PDE because we want to update the 1653 // PDE memory before releasing the actual sublevel pointers. We need this order 1654 // to prevent any state inconsistency between the parent MMU_DESC_PDE and 1655 // the sublevel MMU_WALK_LEVEL_INST structures. 1656 // 1657 for (i = pLevel->pFmt->numSubLevels; i > 0; --i) 1658 { 1659 subLevel = i - 1; 1660 if (NV_OK == btreeSearch(entryVaLo, (NODE**)&pSubLevelInsts[subLevel], 1661 (NODE*)pLevel->subLevels[subLevel].pInstances)) 1662 { 1663 MMU_WALK_LEVEL_INST *pSubLevelInst = pSubLevelInsts[subLevel]; 1664 1665 // for ATS NV4K, check if we need to free the big page 1666 if (pLevel->pFmt->numSubLevels == 2 && subLevel == 0) 1667 { 1668 if (pWalk->flags.bAtsEnabled) 1669 { 1670 if (pSubLevelInsts[0]->numNv4k == 1671 mmuFmtLevelEntryCount(pLevel->subLevels[0].pFmt) && 1672 (0 == pSubLevelInsts[0]->numReserved) && 1673 (pSubMemDescs[1] == NULL || bChanged == NV_TRUE)) 1674 { 1675 bChanged = NV_TRUE; 1676 continue; 1677 } 1678 else 1679 { 1680 state = MMU_ENTRY_STATE_IS_PDE; 1681 pSubMemDescs[subLevel] = pSubLevelInst->pMemDesc; 1682 continue; 1683 } 1684 } 1685 } 1686 1687 if ((0 != (pSubLevelInst->numValid + pSubLevelInst->numSparse)) || 1688 (0 != (pSubLevelInst->numReserved + pSubLevelInst->numHybrid))) 1689 { 1690 // We've got at least one non-empty sublevel, so leave it mapped. 1691 state = MMU_ENTRY_STATE_IS_PDE; 1692 pSubMemDescs[subLevel] = pSubLevelInst->pMemDesc; 1693 } 1694 else if (NULL != pSubLevelInst->pMemDesc) 1695 { 1696 // We're going to free a sub-level. 1697 bChanged = NV_TRUE; 1698 } 1699 } 1700 } 1701 1702 // 1703 // Failure path may have aborted early before sub-levels processed, 1704 // so also check that current state matches expected. 1705 // 1706 bChanged |= (state != mmuWalkGetEntryState(pLevelInst, entryIndex)); 1707 1708 // 1709 // If we've changed any sublevel we need to update the PDE in the parent 1710 // Page Directory 1711 // 1712 if (bChanged) 1713 { 1714 NvBool bDone; 1715 NvU32 progress = 0; 1716 1717 // Init the PDE attribs with the temp PDE which has the cleared sublevel 1718 switch (state) 1719 { 1720 case MMU_ENTRY_STATE_SPARSE: 1721 case MMU_ENTRY_STATE_INVALID: 1722 pWalk->pCb->FillEntries(pWalk->pUserCtx, 1723 pLevel->pFmt, 1724 pLevelInst->pMemDesc, 1725 entryIndex, 1726 entryIndex, 1727 MMU_ENTRY_STATE_SPARSE == state ? 1728 MMU_WALK_FILL_SPARSE : MMU_WALK_FILL_INVALID, 1729 &progress); 1730 NV_ASSERT_OR_RETURN_VOID(progress == 1); 1731 // Clear the hybrid flag since all sub-levels are now released. 1732 if (pLevelInst->pStateTracker[entryIndex].bHybrid) 1733 { 1734 mmuWalkSetEntryHybrid(pLevelInst, entryIndex, NV_FALSE); 1735 } 1736 break; 1737 case MMU_ENTRY_STATE_IS_PDE: 1738 bDone = pWalk->pCb->UpdatePde(pWalk->pUserCtx, pLevel->pFmt, pLevelInst->pMemDesc, 1739 entryIndex, pSubMemDescs); 1740 NV_ASSERT_OR_RETURN_VOID(bDone); 1741 break; 1742 default: 1743 NV_ASSERT_OR_RETURN_VOID(0); 1744 } 1745 1746 // Track new state of entry. 1747 mmuWalkSetEntryState(pLevelInst, entryIndex, state); 1748 } 1749 1750 // Free up the actual sublevels from the PDE 1751 for (subLevel = 0; subLevel < pLevel->pFmt->numSubLevels; ++subLevel) 1752 { 1753 MMU_WALK_LEVEL_INST *pSubLevelInst = pSubLevelInsts[subLevel]; 1754 if (NULL != pSubLevelInst && 1755 NULL == pSubMemDescs[subLevel]) 1756 { 1757 _mmuWalkLevelInstRelease(pWalk, pLevel->subLevels + subLevel, 1758 pSubLevelInst); 1759 } 1760 } 1761 } 1762 1763 static void 1764 _mmuWalkLevelInstancesForceFree 1765 ( 1766 MMU_WALK *pWalk, 1767 MMU_WALK_LEVEL *pLevel 1768 ) 1769 { 1770 MMU_WALK_LEVEL_INST *pLevelInst = NULL; 1771 NvU32 subLevel; 1772 1773 if (NULL == pLevel) 1774 return; 1775 1776 // Free all instances at this level. 1777 btreeEnumStart(0, (NODE **)&pLevelInst, (NODE*)pLevel->pInstances); 1778 while (NULL != pLevelInst) 1779 { 1780 // 1781 // Since we are force freeing everything, it is okay to reset these fields 1782 // in order to avoid hitting asserts in _mmuWalkLevelInstRelease. 1783 // 1784 pLevelInst->numValid = 0; 1785 pLevelInst->numReserved = 0; 1786 _mmuWalkLevelInstRelease(pWalk, pLevel, pLevelInst); 1787 btreeEnumStart(0, (NODE **)&pLevelInst, (NODE*)pLevel->pInstances); 1788 } 1789 pLevel->pInstances = NULL; 1790 1791 if (NULL != pLevel->subLevels) 1792 { 1793 for (subLevel = 0; subLevel < pLevel->pFmt->numSubLevels; subLevel++) 1794 { 1795 _mmuWalkLevelInstancesForceFree(pWalk, pLevel->subLevels + subLevel); 1796 } 1797 } 1798 } 1799 1800