1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2013-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "gpu/gpu.h"
25 #include "gpu/mmu/kern_gmmu.h"
26 #include "gpu/mem_sys/kern_mem_sys.h"
27 #include "gpu/mem_mgr/mem_mgr.h"
28 #include "gpu/nvlink/kernel_nvlink.h"
29 #include "gpu/bus/kern_bus.h"
30 #include "mem_mgr/gpu_vaspace.h"
31 #include "mmu/mmu_walk.h"
32 #include "vgpu/vgpu_events.h"
33 
34 /*!
35  * @file
36  * @brief struct MMU_WALK_CALLBACKS g_bar2WalkCallbacks and the callback
37  *        function implementations.
38  */
39 
40 /*!
41  * Implementation of @ref MmuWalkCBUpdatePde for BAR2
42  */
43 static NvBool
_bar2WalkCBUpdatePdb(MMU_WALK_USER_CTX * pUserCtx,const MMU_FMT_LEVEL * pRootFmt,const MMU_WALK_MEMDESC * pRootMem,const NvBool bIgnoreChannelBusy)44 _bar2WalkCBUpdatePdb
45 (
46     MMU_WALK_USER_CTX       *pUserCtx,
47     const MMU_FMT_LEVEL     *pRootFmt,
48     const MMU_WALK_MEMDESC  *pRootMem,
49     const NvBool             bIgnoreChannelBusy
50 )
51 {
52     OBJGPU             *pGpu        = pUserCtx->pGpu;
53     KernelBus          *pKernelBus  = GPU_GET_KERNEL_BUS(pGpu);
54     KernelGmmu         *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
55     NV_STATUS           status      = NV_OK;
56     NvU32               gfid;
57     NvBool              bUseTempMemDesc;
58 
59     NV_ASSERT_OR_RETURN(vgpuGetCallingContextGfid(pGpu, &gfid) == NV_OK, NV_FALSE);
60 
61     bUseTempMemDesc = pKernelBus->bar2[gfid].bBootstrap &&
62                       kbusIsPhysicalBar2InitPagetableEnabled(pKernelBus) &&
63                       (NULL != pKernelBus->bar2[gfid].pPDEMemDescForBootstrap);
64 
65     if (NULL == pRootMem)
66     {
67         //
68         // Ignoring uncommits for now since kbusInitInstBlk_HAL can't handle
69         // NULL memdesc and it doesn't matter functionally.
70         //
71         goto done;
72     }
73 
74     switch (pKernelBus->InstBlkAperture)
75     {
76         // BAR2 will use the default big page size chosen by the system.
77         default:
78         case ADDR_FBMEM:
79             if (pKernelBus->bar2[gfid].bBootstrap)
80             {
81                 status = kbusInitInstBlk_HAL(pGpu, pKernelBus,
82                                              NULL /* use BAR0 window */,
83                                              (bUseTempMemDesc ?
84                                              pKernelBus->bar2[gfid].pPDEMemDescForBootstrap :
85                                              pKernelBus->bar2[gfid].pPDEMemDesc),
86                                              pKernelBus->bar2[gfid].vaLimit,
87                                              kgmmuGetBigPageSize_HAL(pKernelGmmu), NULL);
88             }
89             else
90             {
91                 status = kbusInitInstBlk_HAL(pGpu, pKernelBus,
92                                              pKernelBus->bar2[gfid].pInstBlkMemDesc,
93                                              pKernelBus->bar2[gfid].pPDEMemDesc,
94                                              pKernelBus->bar2[gfid].vaLimit,
95                                              kgmmuGetBigPageSize_HAL(pKernelGmmu), NULL);
96             }
97             NV_ASSERT_OR_GOTO(NV_OK == status, done);
98             break;
99         case ADDR_SYSMEM:
100             status = kbusInitInstBlk_HAL(pGpu, pKernelBus,
101                                         pKernelBus->bar2[gfid].pInstBlkMemDesc,
102                                         (bUseTempMemDesc ?
103                                         pKernelBus->bar2[gfid].pPDEMemDescForBootstrap :
104                                         pKernelBus->bar2[gfid].pPDEMemDesc),
105                                         pKernelBus->bar2[gfid].vaLimit,
106                                         kgmmuGetBigPageSize_HAL(pKernelGmmu), NULL);
107             NV_ASSERT_OR_GOTO(NV_OK == status, done);
108             break;
109     }
110 
111 done:
112     return NV_OK == status;
113 }
114 
115 /*!
116  * Implementation of @ref MmuWalkCBUpdatePde for BAR2
117  */
118 static void
_bar2WalkCBFillEntries(MMU_WALK_USER_CTX * pUserCtx,const MMU_FMT_LEVEL * pLevelFmt,const MMU_WALK_MEMDESC * pLevelMem,const NvU32 entryIndexLo,const NvU32 entryIndexHi,const MMU_WALK_FILL_STATE fillState,NvU32 * pProgress)119 _bar2WalkCBFillEntries
120 (
121     MMU_WALK_USER_CTX         *pUserCtx,
122     const MMU_FMT_LEVEL       *pLevelFmt,
123     const MMU_WALK_MEMDESC    *pLevelMem,
124     const NvU32                entryIndexLo,
125     const NvU32                entryIndexHi,
126     const MMU_WALK_FILL_STATE  fillState,
127     NvU32                     *pProgress
128 )
129 {
130     OBJGPU                *pGpu         = pUserCtx->pGpu;
131     KernelBus             *pKernelBus   = GPU_GET_KERNEL_BUS(pGpu);
132     KernelGmmu            *pKernelGmmu  = GPU_GET_KERNEL_GMMU(pGpu);
133     NvU32                  gfid         = pUserCtx->gfid;
134     const GMMU_FMT        *pFmt         = NULL;
135     const GMMU_FMT_FAMILY *pFam         = NULL;
136     MEMORY_DESCRIPTOR     *pMemDesc     = (MEMORY_DESCRIPTOR*)pLevelMem;
137     NvU8                  *pMap         = NULL;
138     void                  *pPriv        = NULL;
139     NV_STATUS              status       = NV_OK;
140     GMMU_ENTRY_VALUE       entryValue;
141     ADDRESS_TRANSLATION    addressTranslation = AT_GPU;
142     NvU32                  sizeInDWord  = (NvU32)NV_CEIL(pLevelFmt->entrySize, sizeof(NvU32));
143     NvU32                  entryIndex;
144     NvU32                  entryOffset;
145     NvU64                  entryStart;
146     NvU32                  i;
147 
148     pFmt = pKernelBus->bar2[gfid].pFmt;
149     pFam = kgmmuFmtGetFamily(pKernelGmmu, pFmt->version);
150 
151     // Determine what entry value to write.
152     switch (fillState)
153     {
154         case MMU_WALK_FILL_INVALID:
155             portMemSet(&entryValue, 0, sizeof(entryValue));
156             break;
157         case MMU_WALK_FILL_SPARSE:
158             if (pLevelFmt->numSubLevels > 0)
159             {
160                 // Select sparse entry template based on number of sub-levels.
161                 if (pLevelFmt->numSubLevels > 1)
162                 {
163                     entryValue = pFam->sparsePdeMulti;
164                 }
165                 else
166                 {
167                     NV_ASSERT(pLevelFmt->numSubLevels == 1);
168                     entryValue = pFam->sparsePde;
169                 }
170             }
171             else
172             {
173                 entryValue = pFam->sparsePte;
174             }
175             break;
176         // case MMU_WALK_FILL_NV4K not supported on bar2 gmmu
177         default:
178             NV_ASSERT(0);
179     }
180 
181     // Determine how to write the entry value.
182     if (memdescGetAddressSpace(pMemDesc) == ADDR_FBMEM)
183     {
184         if (kbusIsBarAccessBlocked(pKernelBus))
185         {
186             MemoryManager   *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
187             TRANSFER_SURFACE surf           = {0};
188             NvU32            sizeOfEntries;
189 
190             NV_ASSERT_OR_RETURN_VOID(pKernelBus->virtualBar2[gfid].pPageLevels == NULL);
191 
192             surf.pMemDesc = pMemDesc;
193             surf.offset = entryIndexLo * pLevelFmt->entrySize;
194 
195             sizeOfEntries = (entryIndexHi - entryIndexLo + 1) * pLevelFmt->entrySize;
196 
197             pMap = memmgrMemBeginTransfer(pMemoryManager, &surf, sizeOfEntries,
198                                           TRANSFER_FLAGS_SHADOW_ALLOC);
199 
200             for (entryIndex = entryIndexLo; entryIndex <= entryIndexHi; entryIndex++)
201             {
202                 NvU32 index = (entryIndex - entryIndexLo) * pLevelFmt->entrySize;
203                 portMemCopy(&pMap[index], pLevelFmt->entrySize,
204                             entryValue.v8, pLevelFmt->entrySize);
205             }
206 
207             memmgrMemEndTransfer(pMemoryManager, &surf, sizeOfEntries,
208                                  TRANSFER_FLAGS_SHADOW_ALLOC);
209         }
210         else if (pKernelBus->bar2[gfid].bBootstrap)
211         {
212             if (kbusIsPhysicalBar2InitPagetableEnabled(pKernelBus))
213             {
214                 pMap = kbusCpuOffsetInBar2WindowGet(pGpu, pKernelBus, pMemDesc);
215 
216                 for (entryIndex = entryIndexLo; entryIndex <= entryIndexHi; entryIndex++ )
217                 {
218                     entryOffset = entryIndex * pLevelFmt->entrySize;
219 
220                     // Memory write via BAR2's CPU mapping.
221                     portMemCopy(pMap + entryOffset,
222                                 pLevelFmt->entrySize,
223                                 entryValue.v8,
224                                 pLevelFmt->entrySize);
225                 }
226             }
227             else
228             {
229                 //
230                 // No CPU mapping to the BAR2 VAS page levels is available yet.
231                 // Must use the BAR0 window to directly write to the physical
232                 // addresses where the BAR2 VAS page levels are located in FB.
233                 //
234                 NV_ASSERT_OR_RETURN_VOID(pKernelBus->virtualBar2[gfid].pPageLevels == NULL);
235 
236                 for ( entryIndex = entryIndexLo; entryIndex <= entryIndexHi; entryIndex++ )
237                 {
238                     entryOffset = entryIndex * pLevelFmt->entrySize;
239                     entryStart = memdescGetPhysAddr(pMemDesc, FORCE_VMMU_TRANSLATION(pMemDesc, addressTranslation), entryOffset);
240                     for (i = 0; i < sizeInDWord; i++)
241                     {
242                         // BAR0 write.
243                         status = kbusMemAccessBar0Window_HAL(pGpu, pKernelBus,
244                                               (entryStart + (sizeof(NvU32) * i)),
245                                               &entryValue.v32[i],
246                                               sizeof(NvU32),
247                                               NV_FALSE,
248                                               ADDR_FBMEM);
249                         NV_ASSERT_OR_RETURN_VOID(NV_OK == status);
250                     }
251                 }
252             }
253         }
254         else
255         {
256             //
257             // Determine the start of the desired page level offsetted from
258             // the CPU mapping to the start of the BAR2 VAS page levels.
259             //
260 
261             NV_ASSERT_OR_RETURN_VOID(pKernelBus->virtualBar2[gfid].pPageLevels != NULL);
262 
263             pMap = memdescGetPhysAddr(pMemDesc, addressTranslation, 0) -
264                                   pKernelBus->bar2[gfid].pdeBase +
265                                   pKernelBus->virtualBar2[gfid].pPageLevels;
266 
267             for ( entryIndex = entryIndexLo; entryIndex <= entryIndexHi; entryIndex++ )
268             {
269                 entryOffset = entryIndex * pLevelFmt->entrySize;
270 
271                 // Memory write via BAR2.
272                 portMemCopy(pMap + entryOffset,
273                             pLevelFmt->entrySize,
274                             entryValue.v8,
275                             pLevelFmt->entrySize);
276             }
277         }
278     }
279     else
280     {
281         NV_ASSERT(memdescGetAddressSpace(pMemDesc) == ADDR_SYSMEM);
282 
283         // Plain old memmap.
284         status = memdescMapOld(pMemDesc, 0,
285                                pMemDesc->Size,
286                                NV_TRUE, // kernel,
287                                NV_PROTECT_READ_WRITE,
288                                (void **)&pMap,
289                                &pPriv);
290         NV_ASSERT_OR_RETURN_VOID(NV_OK == status);
291 
292         for ( entryIndex = entryIndexLo; entryIndex <= entryIndexHi; entryIndex++ )
293         {
294             entryOffset = entryIndex * pLevelFmt->entrySize;
295 
296             // Memory-mapped write.
297             portMemCopy(pMap + entryOffset,
298                         pLevelFmt->entrySize,
299                         entryValue.v8,
300                         pLevelFmt->entrySize);
301         }
302 
303         memdescUnmapOld(pMemDesc, 1, 0, pMap, pPriv);
304     }
305 
306     *pProgress = entryIndexHi - entryIndexLo + 1;
307 }
308 
309 /*!
310  * Implementation of @ref MmuWalkCBUpdatePde for BAR2
311  */
312 static NvBool
_bar2WalkCBUpdatePde(MMU_WALK_USER_CTX * pUserCtx,const MMU_FMT_LEVEL * pLevelFmt,const MMU_WALK_MEMDESC * pLevelMem,const NvU32 entryIndex,const MMU_WALK_MEMDESC ** pSubLevels)313 _bar2WalkCBUpdatePde
314 (
315     MMU_WALK_USER_CTX       *pUserCtx,
316     const MMU_FMT_LEVEL     *pLevelFmt,
317     const MMU_WALK_MEMDESC  *pLevelMem,
318     const NvU32              entryIndex,
319     const MMU_WALK_MEMDESC **pSubLevels
320 )
321 {
322     OBJGPU             *pGpu        = pUserCtx->pGpu;
323     KernelGmmu         *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
324     KernelBus          *pKernelBus  = GPU_GET_KERNEL_BUS(pGpu);
325     NvU32               gfid;
326     const GMMU_FMT     *pFmt;
327     MEMORY_DESCRIPTOR  *pMemDesc    = (MEMORY_DESCRIPTOR*)pLevelMem;
328     NvU8               *pMap        = NULL;
329     void               *pPriv       = NULL;
330     NV_STATUS           status      = NV_OK;
331     GMMU_ENTRY_VALUE    entry;
332     NvU32               i;
333     NvU32               sizeInDWord;
334     NvU32               entryOffset;
335     NvU64               entryStart;
336 
337     NV_ASSERT_OR_RETURN(vgpuGetCallingContextGfid(pGpu, &gfid) == NV_OK, NV_FALSE);
338 
339     pFmt = pKernelBus->bar2[gfid].pFmt;
340 
341     // Clear out the temp copy of the PDE
342     portMemSet(entry.v8, 0, pLevelFmt->entrySize);
343 
344     for (i = 0; i < pLevelFmt->numSubLevels; ++i)
345     {
346         const GMMU_FMT_PDE *pPde        = gmmuFmtGetPde(pFmt, pLevelFmt, i);
347         MEMORY_DESCRIPTOR  *pSubMemDesc = (MEMORY_DESCRIPTOR*)pSubLevels[i];
348 
349         if (NULL != pSubMemDesc)
350         {
351             const GMMU_APERTURE       aperture = kgmmuGetMemAperture(pKernelGmmu, pSubMemDesc);
352             const GMMU_FIELD_ADDRESS *pFldAddr = gmmuFmtPdePhysAddrFld(pPde, aperture);
353             const NvU64               physAddr = memdescGetPhysAddr(pSubMemDesc, AT_GPU, 0);
354 
355             // Set fields within the temp PDE
356             if (pFmt->version == GMMU_FMT_VERSION_3)
357             {
358                 NvU32 pdePcfHw  = 0;
359                 NvU32 pdePcfSw  = 0;
360 
361                 pdePcfSw |= memdescGetVolatility(pSubMemDesc) ? (1 << SW_MMU_PCF_UNCACHED_IDX) : 0;
362                 NV_ASSERT_OR_RETURN((kgmmuTranslatePdePcfFromSw_HAL(pKernelGmmu, pdePcfSw, &pdePcfHw) == NV_OK),
363                                       NV_ERR_INVALID_ARGUMENT);
364                 nvFieldSet32(&pPde->fldPdePcf, pdePcfHw, entry.v8);
365             }
366             else
367             {
368                 nvFieldSetBool(&pPde->fldVolatile, memdescGetVolatility(pSubMemDesc), entry.v8);
369             }
370             gmmuFieldSetAperture(&pPde->fldAperture, aperture, entry.v8);
371             gmmuFieldSetAddress(pFldAddr,
372                 kgmmuEncodePhysAddr(pKernelGmmu, aperture, physAddr,
373                     NVLINK_INVALID_FABRIC_ADDR),
374                 entry.v8);
375 
376             NV_PRINTF(LEVEL_INFO, "    SubLevel %u = PA 0x%llX\n", i,
377                       physAddr);
378         }
379         else
380         {
381             NV_PRINTF(LEVEL_INFO, "    SubLevel %u = INVALID\n", i);
382         }
383     }
384 
385     entryOffset  = entryIndex * pLevelFmt->entrySize;
386 
387     if (pKernelBus->PDEBAR2Aperture == ADDR_FBMEM)
388     {
389         if (kbusIsBarAccessBlocked(pKernelBus))
390         {
391             MemoryManager   *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
392             TRANSFER_SURFACE surf           = {0};
393 
394             NV_ASSERT_OR_RETURN(pKernelBus->virtualBar2[gfid].pPageLevels == NULL,
395                                 NV_FALSE);
396 
397             surf.pMemDesc = pMemDesc;
398             surf.offset = entryOffset;
399 
400             NV_ASSERT_OR_RETURN(memmgrMemWrite(pMemoryManager, &surf,
401                                                entry.v8, pLevelFmt->entrySize,
402                                                TRANSFER_FLAGS_NONE) ==  NV_OK,
403                                 NV_FALSE);
404         }
405         // If we are setting up BAR2, we need special handling.
406         else if (pKernelBus->bar2[gfid].bBootstrap)
407         {
408             if (kbusIsPhysicalBar2InitPagetableEnabled(pKernelBus))
409             {
410                 pMap = kbusCpuOffsetInBar2WindowGet(pGpu, pKernelBus, pMemDesc);
411                 portMemCopy(pMap + entryOffset, pLevelFmt->entrySize, entry.v8, pLevelFmt->entrySize);
412             }
413             else
414             {
415                 entryStart  = memdescGetPhysAddr(pMemDesc, AT_PA, entryOffset);
416                 sizeInDWord = (NvU32)NV_CEIL(pLevelFmt->entrySize, sizeof(NvU32));
417 
418                 for (i = 0; i < sizeInDWord; i++)
419                 {
420                     status = kbusMemAccessBar0Window_HAL(pGpu, pKernelBus,
421                                           (entryStart + (sizeof(NvU32) * i)),
422                                           &entry.v32[i],
423                                           sizeof(NvU32),
424                                           NV_FALSE,
425                                           ADDR_FBMEM);
426                     NV_ASSERT_OR_RETURN(NV_OK == status, NV_FALSE);
427                 }
428             }
429         }
430         else if (pKernelBus->bar2[gfid].bMigrating || IS_GFID_VF(gfid) ||
431                  KBUS_BAR0_PRAMIN_DISABLED(pGpu))
432         {
433             NV_ASSERT(NULL != pKernelBus->virtualBar2[gfid].pPageLevels);
434 
435             pMap = memdescGetPhysAddr(pMemDesc, AT_GPU, 0) -
436                                   pKernelBus->bar2[gfid].pdeBase +
437                                   pKernelBus->virtualBar2[gfid].pPageLevels;
438             NV_ASSERT(NULL != pMap);
439             portMemCopy(pMap + entryOffset, pLevelFmt->entrySize, entry.v8, pLevelFmt->entrySize);
440         }
441         else
442         {
443             NV_ASSERT_OR_RETURN(0, NV_FALSE); // Not yet supported.
444         }
445     }
446     else if (pKernelBus->PDEBAR2Aperture == ADDR_SYSMEM)
447     {
448         // Plain old memmap.
449         status = memdescMapOld(pMemDesc, 0,
450                                pMemDesc->Size,
451                                NV_TRUE, // kernel,
452                                NV_PROTECT_READ_WRITE,
453                                (void **)&pMap,
454                                &pPriv);
455         NV_ASSERT_OR_RETURN(NV_OK == status, NV_FALSE);
456         portMemCopy(pMap + entryOffset, pLevelFmt->entrySize, entry.v8, pLevelFmt->entrySize);
457         memdescUnmapOld(pMemDesc, 1, 0, pMap, pPriv);
458     }
459     else
460     {
461         NV_ASSERT_OR_RETURN(0, NV_FALSE); // only SYSMEM and FBMEM are supported.
462     }
463 
464     return NV_TRUE;
465 }
466 
467 /*!
468  * Implementation of @ref MmuWalkCBLevelFree for BAR2
469  */
470 static void
_bar2WalkCBLevelFree(MMU_WALK_USER_CTX * pUserCtx,const MMU_FMT_LEVEL * pLevelFmt,const NvU64 vaBase,MMU_WALK_MEMDESC * pOldMem)471 _bar2WalkCBLevelFree
472 (
473     MMU_WALK_USER_CTX   *pUserCtx,
474     const MMU_FMT_LEVEL *pLevelFmt,
475     const NvU64          vaBase,
476     MMU_WALK_MEMDESC    *pOldMem
477 )
478 {
479     MEMORY_DESCRIPTOR *pMemDesc = (MEMORY_DESCRIPTOR*)pOldMem;
480 
481     NV_PRINTF(LEVEL_INFO, "PA 0x%llX for VA 0x%llX-0x%llX\n",
482               memdescGetPhysAddr(pMemDesc, AT_GPU, 0),
483               mmuFmtLevelVirtAddrLo(pLevelFmt, vaBase),
484               mmuFmtLevelVirtAddrHi(pLevelFmt, vaBase));
485 
486     memdescFree(pMemDesc);
487     memdescDestroy(pMemDesc);
488 }
489 
490 /*!
491  * Implementation of @ref MmuWalkCBLevelAlloc for BAR2
492  */
493 static NV_STATUS
_bar2WalkCBLevelAlloc(MMU_WALK_USER_CTX * pUserCtx,const MMU_FMT_LEVEL * pLevelFmt,const NvU64 vaBase,const NvU64 vaLimit,const NvBool bTarget,MMU_WALK_MEMDESC ** ppMemDesc,NvU32 * pMemSize,NvBool * pBChanged)494 _bar2WalkCBLevelAlloc
495 (
496     MMU_WALK_USER_CTX       *pUserCtx,
497     const MMU_FMT_LEVEL     *pLevelFmt,
498     const NvU64              vaBase,
499     const NvU64              vaLimit,
500     const NvBool             bTarget,
501     MMU_WALK_MEMDESC       **ppMemDesc,
502     NvU32                   *pMemSize,
503     NvBool                  *pBChanged
504 )
505 {
506     OBJGPU             *pGpu    = pUserCtx->pGpu;
507     KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
508     KernelBus          *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
509     NvU32               gfid;
510     NvU64               pdeBase = 0;
511     NvU64               pteBase = 0;
512     NvU32               allocSize;
513     NvU32               memOffset;
514     MEMORY_DESCRIPTOR   *pMemDesc = NULL;
515 
516     const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig =
517         kmemsysGetStaticConfig(pGpu, pKernelMemorySystem);
518     NvBool bPreFillCache = gpuIsCacheOnlyModeEnabled(pGpu) &&
519                                       !pMemorySystemConfig->bL2PreFill;
520     NV_STATUS         status = NV_OK;
521 
522     NV_ASSERT_OK_OR_RETURN(vgpuGetCallingContextGfid(pGpu, &gfid));
523 
524     // Abort early if level is not targeted or already allocated.
525     if (!bTarget || (NULL != *ppMemDesc))
526     {
527         return NV_OK;
528     }
529 
530     // Specify which Page Level we are initializing.
531     if (pKernelBus->bar2[gfid].bBootstrap || pKernelBus->bar2[gfid].bMigrating ||
532         IS_GFID_VF(gfid) || KBUS_BAR0_PRAMIN_DISABLED(pGpu) ||
533         kbusIsBarAccessBlocked(pKernelBus))
534     {
535         if (pLevelFmt == pKernelBus->bar2[gfid].pFmt->pRoot)
536         {
537             pKernelBus->bar2[gfid].pageDirInit = 0;
538             pKernelBus->bar2[gfid].pageTblInit = 0;
539         }
540 
541         NV_ASSERT_OR_RETURN(pKernelBus->bar2[gfid].pageDirInit + pKernelBus->bar2[gfid].pageTblInit <
542                             pKernelBus->bar2[gfid].numPageDirs + pKernelBus->bar2[gfid].numPageTbls,
543                             NV_ERR_INVALID_STATE);
544     }
545 
546     if (kbusIsPhysicalBar2InitPagetableEnabled(pKernelBus) &&
547         (ADDR_FBMEM == pKernelBus->PDEBAR2Aperture))
548     {
549         if (pKernelBus->bar2[gfid].bBootstrap)
550         {
551             pdeBase = pKernelBus->bar2[gfid].pdeBaseForBootstrap;
552             pteBase = pKernelBus->bar2[gfid].pteBaseForBootstrap;
553         }
554         else if (pKernelBus->bar2[gfid].bMigrating)
555         {
556             pdeBase = pKernelBus->bar2[gfid].pdeBase;
557             pteBase = pKernelBus->bar2[gfid].pteBase;
558         }
559        else
560         {
561             status = NV_ERR_INVALID_OPERATION;
562             NV_ASSERT_OR_GOTO(NV_OK == status, cleanup);
563         }
564     }
565     else
566     {
567             NV_ASSERT(pKernelBus->bar2[gfid].bBootstrap || IS_GFID_VF(gfid) ||
568                       KBUS_BAR0_PRAMIN_DISABLED(pGpu) ||
569                       KBUS_CPU_VISIBLE_BAR12_DISABLED(pGpu) ||
570                       kbusIsBarAccessBlocked(pKernelBus));
571             pdeBase = pKernelBus->bar2[gfid].pdeBase;
572             pteBase = pKernelBus->bar2[gfid].pteBase;
573     }
574 
575     // Process Page Dirs
576     if (0 != pLevelFmt->numSubLevels)
577     {
578         allocSize = pKernelBus->bar2[gfid].pageDirSize;
579         status = memdescCreate(&pMemDesc, pGpu,
580                                allocSize,
581                                RM_PAGE_SIZE,
582                                NV_TRUE,
583                                pKernelBus->PDEBAR2Aperture,
584                                pKernelBus->PDEBAR2Attr,
585                                MEMDESC_FLAGS_OWNED_BY_CURRENT_DEVICE);
586         NV_ASSERT_OR_GOTO(NV_OK == status, cleanup);
587 
588         switch (pKernelBus->PDEBAR2Aperture)
589         {
590             default:
591             case ADDR_FBMEM:
592                 //
593                 // Reserved FB memory for BAR2 Page Levels is contiiguous, hence
594                 // we simply offset from page dir base.
595                 //
596                 memOffset = pKernelBus->bar2[gfid].pageDirInit * pKernelBus->bar2[gfid].pageDirSize;
597                 memdescDescribe(pMemDesc,
598                                 pKernelBus->PDEBAR2Aperture,
599                                 pdeBase + memOffset,
600                                 allocSize);
601 
602                 //
603                 // Pre-fill cache to prevent FB read accesses if in cache only
604                 // mode and not doing one time pre-fill.
605                 //
606                 if (bPreFillCache)
607                 {
608                     kmemsysPreFillCacheOnlyMemory_HAL(pGpu, pKernelMemorySystem,
609                                                       pdeBase + memOffset,
610                                                       allocSize);
611                 }
612                 break;
613 
614             case ADDR_SYSMEM:
615                 memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_142,
616                                 pMemDesc);
617                 NV_ASSERT_OR_GOTO(NV_OK == status, cleanup);
618                 break;
619         }
620 
621         if (pLevelFmt == pKernelBus->bar2[gfid].pFmt->pRoot)
622         {
623             if (kbusIsPhysicalBar2InitPagetableEnabled(pKernelBus) &&
624                 (ADDR_FBMEM == pKernelBus->PDEBAR2Aperture))
625             {
626                 if (pKernelBus->bar2[gfid].bBootstrap)
627                 {
628                     // Cache the temporary root Page Dir setup at bottom of FB.
629                     pKernelBus->bar2[gfid].pdeBaseForBootstrap = memdescGetPhysAddr(pMemDesc,
630                                                               AT_GPU, 0);
631                     pKernelBus->bar2[gfid].pPDEMemDescForBootstrap = pMemDesc;
632                 }
633                 else if (pKernelBus->bar2[gfid].bMigrating)
634                 {
635                     //
636                     // Cache the root Page Dir setup at top of FB.
637                     //
638                     pKernelBus->bar2[gfid].pdeBase = memdescGetPhysAddr(pMemDesc,
639                                                         AT_GPU, 0);
640                     pKernelBus->bar2[gfid].pPDEMemDesc = pMemDesc;
641                 }
642                 else
643                 {
644                     status = NV_ERR_INVALID_OPERATION;
645                     NV_ASSERT_OR_GOTO(NV_OK == status, cleanup);
646                 }
647             }
648             else
649             {
650                 NV_ASSERT(pKernelBus->bar2[gfid].bBootstrap || IS_GFID_VF(gfid) ||
651                           KBUS_BAR0_PRAMIN_DISABLED(pGpu) ||
652                           KBUS_CPU_VISIBLE_BAR12_DISABLED(pGpu) ||
653                           kbusIsBarAccessBlocked(pKernelBus));
654                 pKernelBus->bar2[gfid].pdeBase = memdescGetPhysAddr(pMemDesc, AT_GPU, 0);
655                 pKernelBus->bar2[gfid].pPDEMemDesc = pMemDesc;
656             }
657         }
658         if (pKernelBus->bar2[gfid].bBootstrap || pKernelBus->bar2[gfid].bMigrating ||
659             IS_GFID_VF(gfid) || KBUS_BAR0_PRAMIN_DISABLED(pGpu) ||
660             kbusIsBarAccessBlocked(pKernelBus))
661         {
662             pKernelBus->bar2[gfid].pageDirInit++;
663         }
664     }
665     else // Alloc Page Table
666     {
667         allocSize = pKernelBus->bar2[gfid].pageTblSize;
668         status = memdescCreate(&pMemDesc, pGpu,
669                                allocSize,
670                                RM_PAGE_SIZE,
671                                NV_TRUE,
672                                pKernelBus->PTEBAR2Aperture,
673                                pKernelBus->PTEBAR2Attr,
674                                MEMDESC_FLAGS_OWNED_BY_CURRENT_DEVICE);
675         NV_ASSERT_OR_GOTO(NV_OK == status, cleanup);
676 
677         switch (pKernelBus->PTEBAR2Aperture)
678         {
679             default:
680             case ADDR_FBMEM:
681                 //
682                 // Reserved FB memory for BAR2 Page Levels is contiiguous, hence
683                 // we simply offset from the page table base.
684                 // pageTblInit gives us the page table number we are
685                 // initializing.
686                 //
687                 memOffset = pKernelBus->bar2[gfid].pageTblInit * allocSize;
688                 memdescDescribe(pMemDesc,
689                                 pKernelBus->PTEBAR2Aperture,
690                                 pteBase + memOffset,
691                                 allocSize);
692 
693                 //
694                 // Pre-fill cache to prevent FB read accesses if in cache only mode
695                 // and not doing one time pre-fill
696                 //
697                 if (bPreFillCache)
698                 {
699                     kmemsysPreFillCacheOnlyMemory_HAL(pGpu, pKernelMemorySystem,
700                                                       pteBase + memOffset,
701                                                       allocSize);
702                 }
703                 break;
704 
705             case ADDR_SYSMEM:
706                 memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_143,
707                                 pMemDesc);
708                 NV_ASSERT_OR_GOTO(NV_OK == status, cleanup);
709                 break;
710         }
711 
712         if (pKernelBus->bar2[gfid].pageTblInit == 0)
713         {
714             if (kbusIsPhysicalBar2InitPagetableEnabled(pKernelBus) &&
715                 (ADDR_FBMEM == pKernelBus->PTEBAR2Aperture))
716             {
717                 // Cache the first Page Table memdesc
718                 if (pKernelBus->bar2[gfid].bBootstrap)
719                 {
720                     pKernelBus->bar2[gfid].pteBaseForBootstrap = memdescGetPhysAddr(pMemDesc,
721                                                                                     AT_GPU, 0);
722                 }
723                 else if (pKernelBus->bar2[gfid].bMigrating)
724                 {
725                     pKernelBus->bar2[gfid].pteBase = memdescGetPhysAddr(pMemDesc,
726                                                                         AT_GPU, 0);
727                 }
728                 else
729                 {
730                     status = NV_ERR_INVALID_OPERATION;
731                     NV_ASSERT_OR_GOTO(NV_OK == status, cleanup);
732                 }
733             }
734             else
735             {
736                 NV_ASSERT(pKernelBus->bar2[gfid].bBootstrap || IS_GFID_VF(gfid) ||
737                           KBUS_BAR0_PRAMIN_DISABLED(pGpu) ||
738                           KBUS_CPU_VISIBLE_BAR12_DISABLED(pGpu) ||
739                           kbusIsBarAccessBlocked(pKernelBus));
740                 pKernelBus->bar2[gfid].pteBase = memdescGetPhysAddr(pMemDesc,
741                                                                     AT_GPU, 0);
742             }
743             pKernelBus->virtualBar2[gfid].pPTEMemDesc = pMemDesc;
744         }
745         if (pKernelBus->bar2[gfid].bBootstrap || pKernelBus->bar2[gfid].bMigrating ||
746             IS_GFID_VF(gfid) || KBUS_BAR0_PRAMIN_DISABLED(pGpu) ||
747             kbusIsBarAccessBlocked(pKernelBus))
748         {
749             pKernelBus->bar2[gfid].pageTblInit++;
750         }
751     }
752 
753     // Return the allocated memdesc
754     *ppMemDesc = (MMU_WALK_MEMDESC*)pMemDesc;
755     *pMemSize  = allocSize;
756     *pBChanged = NV_TRUE;
757 
758 cleanup:
759     if (NV_OK != status)
760     {
761         memdescFree(pMemDesc);
762         memdescDestroy(pMemDesc);
763     }
764     return status;
765 }
766 
767 /*!
768  * Implementation of @ref MmuWalkCBWriteBuffer for BAR2
769  */
770 static void
_bar2WalkCBWriteBuffer(MMU_WALK_USER_CTX * pUserCtx,MMU_WALK_MEMDESC * pStagingBuffer,MMU_WALK_MEMDESC * pLevelBuffer,NvU64 entryIndexLo,NvU64 entryIndexHi,NvU64 tableSize,NvU64 entrySize)771 _bar2WalkCBWriteBuffer
772 (
773      MMU_WALK_USER_CTX    *pUserCtx,
774      MMU_WALK_MEMDESC     *pStagingBuffer,
775      MMU_WALK_MEMDESC     *pLevelBuffer,
776      NvU64                 entryIndexLo,
777      NvU64                 entryIndexHi,
778      NvU64                 tableSize,
779      NvU64                 entrySize
780 )
781 {
782     OBJGPU            *pGpu               = pUserCtx->pGpu;
783     KernelBus         *pKernelBus         = GPU_GET_KERNEL_BUS(pGpu);
784     NvU32              gfid;
785     MEMORY_DESCRIPTOR *pStagingBufferDesc = (MEMORY_DESCRIPTOR*) pStagingBuffer;
786     MEMORY_DESCRIPTOR *pOutputBufferDesc  = (MEMORY_DESCRIPTOR*) pLevelBuffer;
787     NvBool             bRestore           = NV_FALSE;
788     NvU64              firstEntryOffset   = entryIndexLo * entrySize;
789     NvU64              entryRangeSize     = (entryIndexHi - entryIndexLo + 1llu) * (entrySize);
790     NvU64              oldBar0Mapping     = 0;
791     NvU8              *pStagingBufferMapping;
792     NvU8              *pStagingDescMapping;
793     NvU8              *pOutputBufferMapping;
794     void              *pPriv;
795 
796     NV_ASSERT_OR_RETURN_VOID(vgpuGetCallingContextGfid(pGpu, &gfid) == NV_OK);
797 
798     // TODO: Stash this mapping somewhere permanent to avoid constant remapping
799     NV_ASSERT_OR_RETURN_VOID(
800         memdescMapOld(pStagingBufferDesc,
801                       0,
802                       pStagingBufferDesc->Size,
803                       NV_TRUE, // kernel,
804                       NV_PROTECT_READ_WRITE,
805                       (void **)&pStagingDescMapping,
806                       &pPriv)
807         == NV_OK);
808 
809     pStagingBufferMapping = &pStagingDescMapping[firstEntryOffset % tableSize];
810 
811     if (kbusIsBarAccessBlocked(pKernelBus))
812     {
813         MemoryManager   *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
814         TRANSFER_SURFACE surf           = {0};
815 
816         NV_ASSERT_OR_RETURN_VOID(pKernelBus->virtualBar2[gfid].pPageLevels == NULL);
817 
818         surf.pMemDesc = pOutputBufferDesc;
819         surf.offset = firstEntryOffset;
820 
821         pOutputBufferMapping = memmgrMemBeginTransfer(pMemoryManager, &surf,
822                                                       entryRangeSize,
823                                                       TRANSFER_FLAGS_SHADOW_ALLOC);
824 
825         portMemCopy(pOutputBufferMapping, entryRangeSize,
826                     pStagingBufferMapping, entryRangeSize);
827 
828         memmgrMemEndTransfer(pMemoryManager, &surf, entryRangeSize,
829                              TRANSFER_FLAGS_SHADOW_ALLOC);
830 
831         goto unmap_and_exit;
832     }
833     else if (pKernelBus->bar2[gfid].bBootstrap)
834     {
835         if (kbusIsPhysicalBar2InitPagetableEnabled(pKernelBus))
836         {
837             // BAR2 in physical mode, using top of FB
838             NvU8 *pOutputDescMapping = kbusCpuOffsetInBar2WindowGet(pGpu, pKernelBus, pOutputBufferDesc);
839             pOutputBufferMapping = &pOutputDescMapping[firstEntryOffset];
840         }
841         else
842         {
843             // Get the physical address of the memdesc
844             NvU64 phys = memdescGetPhysAddr(pOutputBufferDesc,
845                                             FORCE_VMMU_TRANSLATION(pOutputBufferDesc, AT_GPU),
846                                             firstEntryOffset);
847             // Get BAR0 info
848             NvU8 *pWindowAddress = pKernelBus->pDefaultBar0Pointer;
849             NvU64 windowSize = pKernelBus->physicalBar0WindowSize;
850 
851             //
852             // Set PRAMIN window offset to the page needed,
853             // logic is copied from kbusMemAccessBar0Window_GM107
854             //
855             NvU64 currentBar0Mapping = kbusGetBAR0WindowVidOffset_HAL(pGpu, pKernelBus);
856 
857             //
858             // First check if start of window is in range,
859             // then check if end of window is in range
860             //
861             if (phys < currentBar0Mapping ||
862                 phys + entryRangeSize >= currentBar0Mapping + windowSize)
863             {
864                 kbusSetBAR0WindowVidOffset_HAL(pGpu,
865                                                pKernelBus,
866                                                (phys & ~(windowSize - 1llu)));
867                 oldBar0Mapping = currentBar0Mapping;
868                 currentBar0Mapping = (phys & ~(windowSize - 1llu));
869                 bRestore = NV_TRUE;
870             }
871 
872             pOutputBufferMapping = &pWindowAddress[phys - currentBar0Mapping];
873         }
874     }
875     else
876     {
877         // BAR2 in virtual mode
878         pOutputBufferMapping = memdescGetPhysAddr(pOutputBufferDesc,
879                                                  FORCE_VMMU_TRANSLATION(pOutputBufferDesc, AT_GPU),
880                                                  firstEntryOffset) -
881                                pKernelBus->bar2[gfid].pdeBase +
882                                pKernelBus->virtualBar2[gfid].pPageLevels;
883     }
884 
885     portMemCopy(pOutputBufferMapping,
886                 entryRangeSize,
887                 pStagingBufferMapping,
888                 entryRangeSize);
889 
890 unmap_and_exit:
891     memdescUnmapOld(pStagingBufferDesc, NV_TRUE, 0, pStagingDescMapping, pPriv);
892 
893     if (bRestore)
894     {
895         kbusSetBAR0WindowVidOffset_HAL(pGpu, pKernelBus, oldBar0Mapping);
896     }
897 }
898 
899 const MMU_WALK_CALLBACKS g_bar2WalkCallbacks =
900 {
901     _bar2WalkCBLevelAlloc,
902     _bar2WalkCBLevelFree,
903     _bar2WalkCBUpdatePdb,
904     _bar2WalkCBUpdatePde,
905     _bar2WalkCBFillEntries,
906     NULL,
907     _bar2WalkCBWriteBuffer,
908 };
909