1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2018-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #define NVOC_KERN_GMMU_H_PRIVATE_ACCESS_ALLOWED
25 
26 #include "gpu/mmu/kern_gmmu.h"
27 #include "gpu/mem_mgr/mem_mgr.h"
28 #include "gpu/mem_mgr/mem_utils.h"
29 #include "gpu/bus/kern_bus.h"
30 #include "gpu/nvlink/kernel_nvlink.h"
31 
32 #include "published/ampere/ga100/dev_vm.h"
33 
34 /*!
35  * @brief   Sets the Invalidation scope field in the register
36  *
37  * @param[in]       pGpu
38  * @param[in]       pKernelGmmu
39  * @param[in]       flags
40  * @param[in/out]   TLB_INVALIDATE_PARAMS pointer
41  *
42  * @returns NV_ERR_INVALID_ARGUMENT on input validation
43  *          NV_OK on success
44  */
45 NV_STATUS
46 kgmmuSetTlbInvalidationScope_GA100
47 (
48     OBJGPU                *pGpu,
49     KernelGmmu            *pKernelGmmu,
50     NvU32                  flags,
51     TLB_INVALIDATE_PARAMS *pParams
52 )
53 {
54     switch(flags)
55     {
56         case NV_GMMU_INVAL_SCOPE_ALL_TLBS:
57             pParams->regVal = FLD_SET_DRF(_VIRTUAL_FUNCTION_PRIV, _MMU_INVALIDATE, _INVAL_SCOPE,
58                                           _ALL_TLBS, pParams->regVal);
59             break;
60         case NV_GMMU_INVAL_SCOPE_LINK_TLBS:
61             pParams->regVal = FLD_SET_DRF(_VIRTUAL_FUNCTION_PRIV, _MMU_INVALIDATE, _INVAL_SCOPE,
62                                           _LINK_TLBS, pParams->regVal);
63             break;
64         case NV_GMMU_INVAL_SCOPE_NON_LINK_TLBS:
65             pParams->regVal = FLD_SET_DRF(_VIRTUAL_FUNCTION_PRIV, _MMU_INVALIDATE, _INVAL_SCOPE,
66                                           _NON_LINK_TLBS, pParams->regVal);
67             break;
68         default:
69             return NV_ERR_INVALID_ARGUMENT;
70     }
71 
72     return NV_OK;
73 }
74 
75 /*!
76  * @brief   Validates fabric base address.
77  *
78  * @param   pKernelGmmu
79  * @param   fabricBaseAddr
80  *
81  * @returns On success, NV_OK.
82  *          On failure, returns NV_ERR_XXX.
83  */
84 NV_STATUS
85 kgmmuValidateFabricBaseAddress_GA100
86 (
87     KernelGmmu *pKernelGmmu,
88     NvU64       fabricBaseAddr
89 )
90 {
91     OBJGPU        *pGpu = ENG_GET_GPU(pKernelGmmu);
92     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
93     NvU64 fbSizeBytes;
94     NvU64 fbUpperLimit;
95 
96     fbSizeBytes = pMemoryManager->Ram.fbTotalMemSizeMb << 20;
97 
98     //
99     // Ampere SKUs will be paired with NVSwitches (Limerock) supporting 2K
100     // mapslots that can cover 64GB each. Make sure that the fabric base
101     // address being used is valid to cover whole frame buffer.
102     //
103 
104     // Check if fabric address is aligned to mapslot size.
105     if (fabricBaseAddr & (NVBIT64(36) - 1))
106     {
107         return NV_ERR_INVALID_ARGUMENT;
108     }
109 
110     // Align fbSize to mapslot size.
111     fbSizeBytes = RM_ALIGN_UP(fbSizeBytes, NVBIT64(36));
112 
113     fbUpperLimit = fabricBaseAddr + fbSizeBytes;
114 
115     // Make sure the address range doesn't go beyond the limit, (2K * 64GB).
116     if (fbUpperLimit > NVBIT64(47))
117     {
118         return NV_ERR_INVALID_ARGUMENT;
119     }
120 
121     return NV_OK;
122 }
123 
124 NV_STATUS
125 kgmmuSetupWarForBug2720120_GA100
126 (
127     KernelGmmu      *pKernelGmmu,
128     GMMU_FMT_FAMILY *pFam
129 )
130 {
131     NV_STATUS            status      = NV_OK;
132     OBJGPU              *pGpu        = ENG_GET_GPU(pKernelGmmu);
133     KernelBus           *pKernelBus  = GPU_GET_KERNEL_BUS(pGpu);
134     const GMMU_FMT      *pFmt        = kgmmuFmtGet(pKernelGmmu, GMMU_FMT_VERSION_DEFAULT, 0);
135     const MMU_FMT_LEVEL *pPageDir1   = mmuFmtFindLevelWithPageShift(pFmt->pRoot, 29);
136     const MMU_FMT_LEVEL *pPageDir0   = mmuFmtFindLevelWithPageShift(pFmt->pRoot, 21);
137     const MMU_FMT_LEVEL *pSmallPT    = mmuFmtFindLevelWithPageShift(pFmt->pRoot, 12);
138     const GMMU_FMT_PDE  *pPde0Fmt    = gmmuFmtGetPde(pFmt, pPageDir0, 1);
139     const GMMU_FMT_PDE  *pPde1Fmt    = gmmuFmtGetPde(pFmt, pPageDir1, 0);
140     NvU8                *pMap        = NULL;
141     void                *pPriv       = NULL;
142     NvU32                sizeOfDWord = sizeof(NvU32);
143     RmPhysAddr           physAddr;
144     RmPhysAddr           physAddrOrig;
145     NvU64                sizeInDWord;
146     NvU32                bar0Addr;
147     NvU32                entryIndex;
148     NvU32                entryIndexHi;
149     NvU32                entryOffset;
150 
151     //
152     // BAR2 is not yet initialized. Thus use either the BAR0 window or
153     // memmap to initialize the given surface.
154     //
155     NV_ASSERT(pKernelBus->virtualBar2[GPU_GFID_PF].pCpuMapping == NULL);
156 
157     // Initialize the memdescs to NULL before use
158     pKernelGmmu->pWarSmallPageTable = NULL;
159     pKernelGmmu->pWarPageDirectory0 = NULL;
160 
161     // Bug 2720120: Allocate a small page table consisting of all invalid entries
162     NV_ASSERT_OK_OR_RETURN(memdescCreate(&pKernelGmmu->pWarSmallPageTable, pGpu,
163                                          mmuFmtLevelSize(pSmallPT),
164                                          RM_PAGE_SIZE, NV_TRUE,
165                                          kgmmuGetPTEAperture(pKernelGmmu),
166                                          kgmmuGetPTEAttr(pKernelGmmu), 0));
167 
168     NV_ASSERT_OK_OR_GOTO(status, memdescAlloc(pKernelGmmu->pWarSmallPageTable), failed);
169 
170     switch (memdescGetAddressSpace(pKernelGmmu->pWarSmallPageTable))
171     {
172         case ADDR_FBMEM:
173             memUtilsMemSetNoBAR2(pGpu, pKernelGmmu->pWarSmallPageTable, 0);
174             break;
175 
176         case ADDR_SYSMEM:
177             // Plain old memmap.
178             NV_ASSERT_OK_OR_GOTO(status, memdescMapOld(pKernelGmmu->pWarSmallPageTable, 0,
179                                                        pKernelGmmu->pWarSmallPageTable->Size,
180                                                        NV_TRUE, // kernel,
181                                                        NV_PROTECT_READ_WRITE,
182                                                        (void **)&pMap,
183                                                        &pPriv), failed);
184 
185             portMemSet(pMap, 0, pKernelGmmu->pWarSmallPageTable->Size);
186 
187             memdescUnmapOld(pKernelGmmu->pWarSmallPageTable, 1, 0, pMap, pPriv);
188             break;
189 
190         default:
191             // Should not happen.
192             status = NV_ERR_INVALID_ARGUMENT;
193             NV_ASSERT_OR_GOTO(status == NV_OK, failed);
194             break;
195     }
196 
197     // The WAR PDE0 points to the small page table allocated above
198     {
199         const GMMU_APERTURE aperture = kgmmuGetMemAperture(pKernelGmmu, pKernelGmmu->pWarSmallPageTable);
200 
201         nvFieldSetBool(&pPde0Fmt->fldVolatile,
202                        memdescGetVolatility(pKernelGmmu->pWarSmallPageTable),
203                        pFam->bug2720120WarPde0.v8);
204         gmmuFieldSetAperture(&pPde0Fmt->fldAperture, aperture,
205                              pFam->bug2720120WarPde0.v8);
206         gmmuFieldSetAddress(gmmuFmtPdePhysAddrFld(pPde0Fmt, aperture),
207                              kgmmuEncodePhysAddr(pKernelGmmu, aperture,
208                                      memdescGetPhysAddr(pKernelGmmu->pWarSmallPageTable,
209                                                         AT_GPU, 0),
210                                      NVLINK_INVALID_FABRIC_ADDR),
211                              pFam->bug2720120WarPde0.v8);
212     }
213 
214     //
215     // Bug 2720120: Allocate a PD0 instance all of whose entries point to
216     // the small page table allocated above
217     //
218     NV_ASSERT_OK_OR_GOTO(status, memdescCreate(&pKernelGmmu->pWarPageDirectory0,
219                                                pGpu, mmuFmtLevelSize(pPageDir0),
220                                                RM_PAGE_SIZE, NV_TRUE,
221                                                kgmmuGetPTEAperture(pKernelGmmu),
222                                                kgmmuGetPTEAttr(pKernelGmmu), 0), failed);
223 
224     NV_ASSERT_OK_OR_GOTO(status, memdescAlloc(pKernelGmmu->pWarPageDirectory0), failed);
225 
226     entryIndexHi = mmuFmtLevelEntryCount(pPageDir0) - 1;
227     switch (memdescGetAddressSpace(pKernelGmmu->pWarPageDirectory0))
228     {
229         case ADDR_FBMEM:
230             //
231             // Set the BAR0 window to encompass the given surface while
232             // saving off the location to where the BAR0 window was
233             // previously pointing.
234             //
235             physAddr = memdescGetPhysAddr(pKernelGmmu->pWarPageDirectory0, AT_GPU, 0);
236             NV_ASSERT_OR_GOTO(NV_IS_ALIGNED64(physAddr, sizeOfDWord), failed);
237 
238             physAddrOrig = kbusGetBAR0WindowVidOffset_HAL(pGpu, pKernelBus);
239             NV_ASSERT_OK_OR_GOTO(status,
240                                  kbusSetBAR0WindowVidOffset_HAL(pGpu,
241                                                                 pKernelBus,
242                                                                 physAddr & ~0xffffULL),
243                                  failed);
244 
245             bar0Addr = NvU64_LO32(kbusGetBAR0WindowAddress_HAL(pKernelBus) +
246                           (physAddr - kbusGetBAR0WindowVidOffset_HAL(pGpu, pKernelBus)));
247 
248             //
249             // Iterate and initialize the given surface with BAR0
250             // writes.
251             //
252             sizeInDWord = (NvU32)NV_DIV_AND_CEIL(pPageDir0->entrySize, sizeOfDWord);
253             for (entryIndex = 0; entryIndex <= entryIndexHi; entryIndex++)
254             {
255                 entryOffset = entryIndex * pPageDir0->entrySize;
256                 NvU32 i;
257                 for (i = 0; i < sizeInDWord; i++)
258                 {
259                     GPU_REG_WR32(pGpu,
260                                  bar0Addr + entryOffset + (sizeOfDWord * i),
261                                  pFam->bug2720120WarPde0.v32[i]);
262                 }
263             }
264 
265             // Restore where the BAR0 window was previously pointing to
266             NV_ASSERT_OK_OR_GOTO(status,
267                                  kbusSetBAR0WindowVidOffset_HAL(pGpu, pKernelBus,
268                                                                 physAddrOrig),
269                                  failed);
270 
271             break;
272 
273         case ADDR_SYSMEM:
274             // Plain old memmap.
275             NV_ASSERT_OK_OR_GOTO(status, memdescMapOld(pKernelGmmu->pWarPageDirectory0, 0,
276                                                        pKernelGmmu->pWarPageDirectory0->Size,
277                                                        NV_TRUE, // kernel,
278                                                        NV_PROTECT_READ_WRITE,
279                                                        (void **)&pMap,
280                                                        &pPriv), failed);
281 
282             for (entryIndex = 0; entryIndex <= entryIndexHi; entryIndex++)
283             {
284                 entryOffset = entryIndex * pPageDir0->entrySize;
285 
286                 // Memory-mapped write.
287                 portMemCopy(pMap + entryOffset,
288                             pPageDir0->entrySize,
289                             pFam->bug2720120WarPde0.v8,
290                             pPageDir0->entrySize);
291             }
292 
293             memdescUnmapOld(pKernelGmmu->pWarPageDirectory0, 1, 0, pMap, pPriv);
294             break;
295 
296         default:
297             // Should not happen.
298             status = NV_ERR_INVALID_ARGUMENT;
299             NV_ASSERT_OR_GOTO(status == NV_OK, failed);
300             break;
301     }
302 
303     // The WAR PDE1 points to the PD0 instance allocated above
304     {
305         const GMMU_APERTURE aperture = kgmmuGetMemAperture(pKernelGmmu, pKernelGmmu->pWarPageDirectory0);
306 
307         nvFieldSetBool(&pPde1Fmt->fldVolatile,
308                        memdescGetVolatility(pKernelGmmu->pWarPageDirectory0),
309                        pFam->bug2720120WarPde1.v8);
310         gmmuFieldSetAperture(&pPde1Fmt->fldAperture, aperture,
311                              pFam->bug2720120WarPde1.v8);
312         gmmuFieldSetAddress(gmmuFmtPdePhysAddrFld(pPde1Fmt, aperture),
313                              kgmmuEncodePhysAddr(pKernelGmmu, aperture,
314                                      memdescGetPhysAddr(pKernelGmmu->pWarPageDirectory0,
315                                                         AT_GPU, 0),
316                                      NVLINK_INVALID_FABRIC_ADDR),
317                              pFam->bug2720120WarPde1.v8);
318     }
319 
320 failed:
321     if (status != NV_OK)
322     {
323         if (pKernelGmmu->pWarSmallPageTable != NULL)
324         {
325             memdescFree(pKernelGmmu->pWarSmallPageTable);
326             memdescDestroy(pKernelGmmu->pWarSmallPageTable);
327             pKernelGmmu->pWarSmallPageTable = NULL;
328         }
329         if (pKernelGmmu->pWarPageDirectory0 != NULL)
330         {
331             memdescFree(pKernelGmmu->pWarPageDirectory0);
332             memdescDestroy(pKernelGmmu->pWarPageDirectory0);
333             pKernelGmmu->pWarPageDirectory0 = NULL;
334         }
335     }
336     return status;
337 }
338