1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2018-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #define NVOC_KERN_GMMU_H_PRIVATE_ACCESS_ALLOWED 25 26 #include "gpu/mmu/kern_gmmu.h" 27 #include "gpu/mem_mgr/mem_mgr.h" 28 #include "gpu/mem_mgr/mem_utils.h" 29 #include "gpu/bus/kern_bus.h" 30 #include "gpu/nvlink/kernel_nvlink.h" 31 32 #include "published/ampere/ga100/dev_vm.h" 33 34 /*! 35 * @brief Sets the Invalidation scope field in the register 36 * 37 * @param[in] pGpu 38 * @param[in] pKernelGmmu 39 * @param[in] flags 40 * @param[in/out] TLB_INVALIDATE_PARAMS pointer 41 * 42 * @returns NV_ERR_INVALID_ARGUMENT on input validation 43 * NV_OK on success 44 */ 45 NV_STATUS 46 kgmmuSetTlbInvalidationScope_GA100 47 ( 48 OBJGPU *pGpu, 49 KernelGmmu *pKernelGmmu, 50 NvU32 flags, 51 TLB_INVALIDATE_PARAMS *pParams 52 ) 53 { 54 switch(flags) 55 { 56 case NV_GMMU_INVAL_SCOPE_ALL_TLBS: 57 pParams->regVal = FLD_SET_DRF(_VIRTUAL_FUNCTION_PRIV, _MMU_INVALIDATE, _INVAL_SCOPE, 58 _ALL_TLBS, pParams->regVal); 59 break; 60 case NV_GMMU_INVAL_SCOPE_LINK_TLBS: 61 pParams->regVal = FLD_SET_DRF(_VIRTUAL_FUNCTION_PRIV, _MMU_INVALIDATE, _INVAL_SCOPE, 62 _LINK_TLBS, pParams->regVal); 63 break; 64 case NV_GMMU_INVAL_SCOPE_NON_LINK_TLBS: 65 pParams->regVal = FLD_SET_DRF(_VIRTUAL_FUNCTION_PRIV, _MMU_INVALIDATE, _INVAL_SCOPE, 66 _NON_LINK_TLBS, pParams->regVal); 67 break; 68 default: 69 return NV_ERR_INVALID_ARGUMENT; 70 } 71 72 return NV_OK; 73 } 74 75 /*! 76 * @brief Validates fabric base address. 77 * 78 * @param pKernelGmmu 79 * @param fabricBaseAddr 80 * 81 * @returns On success, NV_OK. 82 * On failure, returns NV_ERR_XXX. 83 */ 84 NV_STATUS 85 kgmmuValidateFabricBaseAddress_GA100 86 ( 87 KernelGmmu *pKernelGmmu, 88 NvU64 fabricBaseAddr 89 ) 90 { 91 OBJGPU *pGpu = ENG_GET_GPU(pKernelGmmu); 92 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 93 NvU64 fbSizeBytes; 94 NvU64 fbUpperLimit; 95 96 fbSizeBytes = pMemoryManager->Ram.fbTotalMemSizeMb << 20; 97 98 // 99 // Ampere SKUs will be paired with NVSwitches (Limerock) supporting 2K 100 // mapslots that can cover 64GB each. Make sure that the fabric base 101 // address being used is valid to cover whole frame buffer. 102 // 103 104 // Check if fabric address is aligned to mapslot size. 105 if (fabricBaseAddr & (NVBIT64(36) - 1)) 106 { 107 return NV_ERR_INVALID_ARGUMENT; 108 } 109 110 // Align fbSize to mapslot size. 111 fbSizeBytes = RM_ALIGN_UP(fbSizeBytes, NVBIT64(36)); 112 113 fbUpperLimit = fabricBaseAddr + fbSizeBytes; 114 115 // Make sure the address range doesn't go beyond the limit, (2K * 64GB). 116 if (fbUpperLimit > NVBIT64(47)) 117 { 118 return NV_ERR_INVALID_ARGUMENT; 119 } 120 121 return NV_OK; 122 } 123 124 NV_STATUS 125 kgmmuSetupWarForBug2720120_GA100 126 ( 127 KernelGmmu *pKernelGmmu, 128 GMMU_FMT_FAMILY *pFam 129 ) 130 { 131 NV_STATUS status = NV_OK; 132 OBJGPU *pGpu = ENG_GET_GPU(pKernelGmmu); 133 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu); 134 const GMMU_FMT *pFmt = kgmmuFmtGet(pKernelGmmu, GMMU_FMT_VERSION_DEFAULT, 0); 135 const MMU_FMT_LEVEL *pPageDir1 = mmuFmtFindLevelWithPageShift(pFmt->pRoot, 29); 136 const MMU_FMT_LEVEL *pPageDir0 = mmuFmtFindLevelWithPageShift(pFmt->pRoot, 21); 137 const MMU_FMT_LEVEL *pSmallPT = mmuFmtFindLevelWithPageShift(pFmt->pRoot, 12); 138 const GMMU_FMT_PDE *pPde0Fmt = gmmuFmtGetPde(pFmt, pPageDir0, 1); 139 const GMMU_FMT_PDE *pPde1Fmt = gmmuFmtGetPde(pFmt, pPageDir1, 0); 140 NvU8 *pMap = NULL; 141 void *pPriv = NULL; 142 NvU32 sizeOfDWord = sizeof(NvU32); 143 RmPhysAddr physAddr; 144 RmPhysAddr physAddrOrig; 145 NvU64 sizeInDWord; 146 NvU32 bar0Addr; 147 NvU32 entryIndex; 148 NvU32 entryIndexHi; 149 NvU32 entryOffset; 150 151 // 152 // BAR2 is not yet initialized. Thus use either the BAR0 window or 153 // memmap to initialize the given surface. 154 // 155 NV_ASSERT(pKernelBus->virtualBar2[GPU_GFID_PF].pCpuMapping == NULL); 156 157 // Initialize the memdescs to NULL before use 158 pKernelGmmu->pWarSmallPageTable = NULL; 159 pKernelGmmu->pWarPageDirectory0 = NULL; 160 161 // Bug 2720120: Allocate a small page table consisting of all invalid entries 162 NV_ASSERT_OK_OR_RETURN(memdescCreate(&pKernelGmmu->pWarSmallPageTable, pGpu, 163 mmuFmtLevelSize(pSmallPT), 164 RM_PAGE_SIZE, NV_TRUE, 165 kgmmuGetPTEAperture(pKernelGmmu), 166 kgmmuGetPTEAttr(pKernelGmmu), 0)); 167 168 NV_ASSERT_OK_OR_GOTO(status, memdescAlloc(pKernelGmmu->pWarSmallPageTable), failed); 169 170 switch (memdescGetAddressSpace(pKernelGmmu->pWarSmallPageTable)) 171 { 172 case ADDR_FBMEM: 173 memUtilsMemSetNoBAR2(pGpu, pKernelGmmu->pWarSmallPageTable, 0); 174 break; 175 176 case ADDR_SYSMEM: 177 // Plain old memmap. 178 NV_ASSERT_OK_OR_GOTO(status, memdescMapOld(pKernelGmmu->pWarSmallPageTable, 0, 179 pKernelGmmu->pWarSmallPageTable->Size, 180 NV_TRUE, // kernel, 181 NV_PROTECT_READ_WRITE, 182 (void **)&pMap, 183 &pPriv), failed); 184 185 portMemSet(pMap, 0, pKernelGmmu->pWarSmallPageTable->Size); 186 187 memdescUnmapOld(pKernelGmmu->pWarSmallPageTable, 1, 0, pMap, pPriv); 188 break; 189 190 default: 191 // Should not happen. 192 status = NV_ERR_INVALID_ARGUMENT; 193 NV_ASSERT_OR_GOTO(status == NV_OK, failed); 194 break; 195 } 196 197 // The WAR PDE0 points to the small page table allocated above 198 { 199 const GMMU_APERTURE aperture = kgmmuGetMemAperture(pKernelGmmu, pKernelGmmu->pWarSmallPageTable); 200 201 nvFieldSetBool(&pPde0Fmt->fldVolatile, 202 memdescGetVolatility(pKernelGmmu->pWarSmallPageTable), 203 pFam->bug2720120WarPde0.v8); 204 gmmuFieldSetAperture(&pPde0Fmt->fldAperture, aperture, 205 pFam->bug2720120WarPde0.v8); 206 gmmuFieldSetAddress(gmmuFmtPdePhysAddrFld(pPde0Fmt, aperture), 207 kgmmuEncodePhysAddr(pKernelGmmu, aperture, 208 memdescGetPhysAddr(pKernelGmmu->pWarSmallPageTable, 209 AT_GPU, 0), 210 NVLINK_INVALID_FABRIC_ADDR), 211 pFam->bug2720120WarPde0.v8); 212 } 213 214 // 215 // Bug 2720120: Allocate a PD0 instance all of whose entries point to 216 // the small page table allocated above 217 // 218 NV_ASSERT_OK_OR_GOTO(status, memdescCreate(&pKernelGmmu->pWarPageDirectory0, 219 pGpu, mmuFmtLevelSize(pPageDir0), 220 RM_PAGE_SIZE, NV_TRUE, 221 kgmmuGetPTEAperture(pKernelGmmu), 222 kgmmuGetPTEAttr(pKernelGmmu), 0), failed); 223 224 NV_ASSERT_OK_OR_GOTO(status, memdescAlloc(pKernelGmmu->pWarPageDirectory0), failed); 225 226 entryIndexHi = mmuFmtLevelEntryCount(pPageDir0) - 1; 227 switch (memdescGetAddressSpace(pKernelGmmu->pWarPageDirectory0)) 228 { 229 case ADDR_FBMEM: 230 // 231 // Set the BAR0 window to encompass the given surface while 232 // saving off the location to where the BAR0 window was 233 // previously pointing. 234 // 235 physAddr = memdescGetPhysAddr(pKernelGmmu->pWarPageDirectory0, AT_GPU, 0); 236 NV_ASSERT_OR_GOTO(NV_IS_ALIGNED64(physAddr, sizeOfDWord), failed); 237 238 physAddrOrig = kbusGetBAR0WindowVidOffset_HAL(pGpu, pKernelBus); 239 NV_ASSERT_OK_OR_GOTO(status, 240 kbusSetBAR0WindowVidOffset_HAL(pGpu, 241 pKernelBus, 242 physAddr & ~0xffffULL), 243 failed); 244 245 bar0Addr = NvU64_LO32(kbusGetBAR0WindowAddress_HAL(pKernelBus) + 246 (physAddr - kbusGetBAR0WindowVidOffset_HAL(pGpu, pKernelBus))); 247 248 // 249 // Iterate and initialize the given surface with BAR0 250 // writes. 251 // 252 sizeInDWord = (NvU32)NV_DIV_AND_CEIL(pPageDir0->entrySize, sizeOfDWord); 253 for (entryIndex = 0; entryIndex <= entryIndexHi; entryIndex++) 254 { 255 entryOffset = entryIndex * pPageDir0->entrySize; 256 NvU32 i; 257 for (i = 0; i < sizeInDWord; i++) 258 { 259 GPU_REG_WR32(pGpu, 260 bar0Addr + entryOffset + (sizeOfDWord * i), 261 pFam->bug2720120WarPde0.v32[i]); 262 } 263 } 264 265 // Restore where the BAR0 window was previously pointing to 266 NV_ASSERT_OK_OR_GOTO(status, 267 kbusSetBAR0WindowVidOffset_HAL(pGpu, pKernelBus, 268 physAddrOrig), 269 failed); 270 271 break; 272 273 case ADDR_SYSMEM: 274 // Plain old memmap. 275 NV_ASSERT_OK_OR_GOTO(status, memdescMapOld(pKernelGmmu->pWarPageDirectory0, 0, 276 pKernelGmmu->pWarPageDirectory0->Size, 277 NV_TRUE, // kernel, 278 NV_PROTECT_READ_WRITE, 279 (void **)&pMap, 280 &pPriv), failed); 281 282 for (entryIndex = 0; entryIndex <= entryIndexHi; entryIndex++) 283 { 284 entryOffset = entryIndex * pPageDir0->entrySize; 285 286 // Memory-mapped write. 287 portMemCopy(pMap + entryOffset, 288 pPageDir0->entrySize, 289 pFam->bug2720120WarPde0.v8, 290 pPageDir0->entrySize); 291 } 292 293 memdescUnmapOld(pKernelGmmu->pWarPageDirectory0, 1, 0, pMap, pPriv); 294 break; 295 296 default: 297 // Should not happen. 298 status = NV_ERR_INVALID_ARGUMENT; 299 NV_ASSERT_OR_GOTO(status == NV_OK, failed); 300 break; 301 } 302 303 // The WAR PDE1 points to the PD0 instance allocated above 304 { 305 const GMMU_APERTURE aperture = kgmmuGetMemAperture(pKernelGmmu, pKernelGmmu->pWarPageDirectory0); 306 307 nvFieldSetBool(&pPde1Fmt->fldVolatile, 308 memdescGetVolatility(pKernelGmmu->pWarPageDirectory0), 309 pFam->bug2720120WarPde1.v8); 310 gmmuFieldSetAperture(&pPde1Fmt->fldAperture, aperture, 311 pFam->bug2720120WarPde1.v8); 312 gmmuFieldSetAddress(gmmuFmtPdePhysAddrFld(pPde1Fmt, aperture), 313 kgmmuEncodePhysAddr(pKernelGmmu, aperture, 314 memdescGetPhysAddr(pKernelGmmu->pWarPageDirectory0, 315 AT_GPU, 0), 316 NVLINK_INVALID_FABRIC_ADDR), 317 pFam->bug2720120WarPde1.v8); 318 } 319 320 failed: 321 if (status != NV_OK) 322 { 323 if (pKernelGmmu->pWarSmallPageTable != NULL) 324 { 325 memdescFree(pKernelGmmu->pWarSmallPageTable); 326 memdescDestroy(pKernelGmmu->pWarSmallPageTable); 327 pKernelGmmu->pWarSmallPageTable = NULL; 328 } 329 if (pKernelGmmu->pWarPageDirectory0 != NULL) 330 { 331 memdescFree(pKernelGmmu->pWarPageDirectory0); 332 memdescDestroy(pKernelGmmu->pWarPageDirectory0); 333 pKernelGmmu->pWarPageDirectory0 = NULL; 334 } 335 } 336 return status; 337 } 338