1 /*
2 * SPDX-FileCopyrightText: Copyright (c) 2004-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: MIT
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /*!
25 * @file
26 * @brief Common Virtual BAR2 support. Because of this we cannot
27 * include any chip specific headers.
28 */
29
30 #include "core/core.h"
31 #include "gpu/gpu.h"
32 #include "gpu/bus/kern_bus.h"
33 #include "gpu/mem_mgr/mem_mgr.h"
34 #include "mem_mgr/io_vaspace.h"
35 #include "vgpu/vgpu_events.h"
36
37 // Prototypes for static functions
38 static NV_STATUS _kbusConstructVirtualBar2Heaps(KernelBus *pKernelBus, NvU32 gfid);
39 static NV_STATUS _kbusConstructVirtualBar2Lists(KernelBus *pKernelBus, NvU32 gfid);
40 static void _kbusDestructVirtualBar2Heaps(KernelBus *pKernelBus, NvU32 gfid);
41 static void _kbusDestructVirtualBar2Lists(KernelBus *pKernelBus, NvU32 gfid);
42 static void _freeRmApertureMap_VBAR2(OBJGPU *, KernelBus *, VirtualBar2MapEntry *, NvU32 flags);
43 static MemDescDestroyCallBack _kbusReleaseRmAperture_wrapper;
44
45 /*!
46 * Internal function to allocate various bar2 heaps
47 * @returns NV_STATUS
48 *
49 * @note This function handles the memory cleanup for heaps for failure paths.
50 */
_kbusConstructVirtualBar2Heaps(KernelBus * pKernelBus,NvU32 gfid)51 static NV_STATUS _kbusConstructVirtualBar2Heaps(KernelBus *pKernelBus, NvU32 gfid)
52 {
53 NV_STATUS status = NV_OK;
54
55 //
56 // Setup eheap for RM bar2 space management.
57 //
58 // The number of maximum eheap region descriptors needed is 2 times the number
59 // of cached BAR2 mappings, as the worst case is (alloc,free,alloc,free, etc.)
60 // in VA space
61 //
62
63 NV_ASSERT_OK_OR_GOTO(status,
64 kbusConstructVirtualBar2CpuVisibleHeap_HAL(pKernelBus, gfid), cleanup);
65
66 NV_ASSERT_OK_OR_GOTO(status,
67 kbusConstructVirtualBar2CpuInvisibleHeap_HAL(pKernelBus, gfid), cleanup);
68
69 cleanup:
70 if (NV_OK != status)
71 _kbusDestructVirtualBar2Heaps(pKernelBus, gfid);
72 return status;
73 }
74
75 /*!
76 * Internal function to allocate various bar2 lists
77 * @returns NV_STATUS
78 *
79 * @note This function handles the memory cleanup for failure paths.
80 */
_kbusConstructVirtualBar2Lists(KernelBus * pKernelBus,NvU32 gfid)81 static NV_STATUS _kbusConstructVirtualBar2Lists(KernelBus *pKernelBus, NvU32 gfid)
82 {
83 //
84 // TODO: This if() will go away when kbusConstructVirtualBar2 is moved back to kbusConstruct
85 // from kbusStatePreInit().
86 //
87 if (pKernelBus->virtualBar2[gfid].pMapListMemory == NULL)
88 {
89 NvU32 i;
90
91 // Pre-alloc the mapping list used for bar2 allocations
92 listInitIntrusive(&pKernelBus->virtualBar2[gfid].freeMapList);
93 listInitIntrusive(&pKernelBus->virtualBar2[gfid].cachedMapList);
94 listInitIntrusive(&pKernelBus->virtualBar2[gfid].usedMapList);
95
96 pKernelBus->virtualBar2[gfid].pMapListMemory = portMemAllocNonPaged(
97 sizeof(VirtualBar2MapEntry) * BUS_BAR2_MAX_MAPPINGS);
98 if (pKernelBus->virtualBar2[gfid].pMapListMemory == NULL)
99 {
100 NV_PRINTF(LEVEL_ERROR, "Unable to alloc bar2 mapping list!\n");
101 DBG_BREAKPOINT();
102 _kbusDestructVirtualBar2Lists(pKernelBus, gfid);
103 return NV_ERR_NO_MEMORY;
104 }
105 portMemSet(pKernelBus->virtualBar2[gfid].pMapListMemory, 0, sizeof(VirtualBar2MapEntry) * BUS_BAR2_MAX_MAPPINGS);
106
107 // Initialize the free mapping list
108 for (i = 0; i < BUS_BAR2_MAX_MAPPINGS; i++)
109 {
110 listAppendExisting(&pKernelBus->virtualBar2[gfid].freeMapList, &(pKernelBus->virtualBar2[gfid].pMapListMemory[i]));
111 }
112 }
113 return NV_OK;
114 }
115
116 /*!
117 * Initialize common virtual BAR2 data structures.
118 *
119 * @param[in] pGpu
120 * @param[in] pKernelBus
121 * @param[in] gfid
122 *
123 * @returns None
124 */
125 NV_STATUS
kbusConstructVirtualBar2_VBAR2(OBJGPU * pGpu,KernelBus * pKernelBus,NvU32 gfid)126 kbusConstructVirtualBar2_VBAR2(OBJGPU *pGpu, KernelBus *pKernelBus, NvU32 gfid)
127 {
128 NV_STATUS status = NV_OK;
129
130 //
131 // TODO: Enable these when cpu invisible heap gets separated from bar2 virtual heap
132 // construction and virtual bar2 heap construction moves under kbusConstruct.
133 //
134 // NV_ASSERT_OR_RETURN(NULL == pKernelBus->virtualBar2.pMapListMemory, NV_ERR_INVALID_STATE);
135 // NV_ASSERT_OR_RETURN(NULL == pKernelBus->virtualBar2.pVASpaceHeap, NV_ERR_INVALID_STATE);
136 // NV_ASSERT_OR_RETURN(NULL == pKernelBus->virtualBar2.pVASpaceHiddenHeap, NV_ERR_INVALID_STATE);
137
138 //
139 // GSP-RM and VF in SRIOV heavy mode don't use the cpuVisible BAR2,
140 // so no need to construct the BAR2 lists
141 //
142 if (!RMCFG_FEATURE_PLATFORM_GSP && IS_GFID_PF(gfid))
143 {
144 // Construct the various lists needed by BAR2
145 status = _kbusConstructVirtualBar2Lists(pKernelBus, gfid);
146 NV_ASSERT_OR_RETURN(NV_OK == status, status);
147 }
148
149 // Construct various eheaps needed by BAR2
150 status = _kbusConstructVirtualBar2Heaps(pKernelBus, gfid);
151 NV_ASSERT_OR_RETURN(NV_OK == status, status);
152
153 // Default to 4KB alignment
154 pKernelBus->virtualBar2[gfid].vAlignment = RM_PAGE_SIZE;
155
156 // Used for issuing TLB invalidates
157 pKernelBus->virtualBar2[gfid].flags = VASPACE_FLAGS_BAR | VASPACE_FLAGS_BAR_BAR2;
158 pKernelBus->virtualBar2[gfid].pPDB = NULL;
159
160 #if (NV_PRINTF_ENABLED)
161 pKernelBus->virtualBar2[gfid].mapCount = 0;
162 pKernelBus->virtualBar2[gfid].cacheHit = 0;
163 pKernelBus->virtualBar2[gfid].evictions = 0;
164 #endif
165
166 return status;
167 }
168
169 /*!
170 * Allocate and construct the cpu-visible bar2 heap
171 *
172 * @param pKernelBus
173 * @param bfid
174 *
175 * @return NV_OK or bubble up the returned error code from the callee
176 */
177 NV_STATUS
kbusConstructVirtualBar2CpuVisibleHeap_VBAR2(KernelBus * pKernelBus,NvU32 gfid)178 kbusConstructVirtualBar2CpuVisibleHeap_VBAR2
179 (
180 KernelBus *pKernelBus,
181 NvU32 gfid
182 )
183 {
184 if (IS_GFID_VF(gfid))
185 {
186 return NV_OK;
187 }
188
189 //
190 // TODO: This if() will go away when kbusConstructVirtualBar2 is moved back to kbusConstruct
191 // from kbusStatePreInit().
192 //
193 if (pKernelBus->virtualBar2[gfid].pVASpaceHeap == NULL)
194 {
195 pKernelBus->virtualBar2[gfid].pVASpaceHeap = portMemAllocNonPaged(sizeof(OBJEHEAP));
196 if (pKernelBus->virtualBar2[gfid].pVASpaceHeap == NULL)
197 {
198 NV_PRINTF(LEVEL_ERROR, "Unable to alloc bar2 eheap!\n");
199 DBG_BREAKPOINT();
200 return NV_ERR_NO_MEMORY;
201 }
202 constructObjEHeap(pKernelBus->virtualBar2[gfid].pVASpaceHeap,
203 (pKernelBus->bar2[gfid].cpuVisibleBase),
204 (pKernelBus->bar2[gfid].cpuVisibleLimit + 1),
205 0,
206 BUS_BAR2_MAX_MAPPINGS * 2);
207 }
208 return NV_OK;
209 }
210
211 /*!
212 * Allocate and construct the cpu-invisible bar2 heap
213 *
214 * @param pKernelBus
215 * @param gfid
216 *
217 * @return NV_OK or bubble up the returned error code from the callee
218 */
219 NV_STATUS
kbusConstructVirtualBar2CpuInvisibleHeap_VBAR2(KernelBus * pKernelBus,NvU32 gfid)220 kbusConstructVirtualBar2CpuInvisibleHeap_VBAR2
221 (
222 KernelBus *pKernelBus,
223 NvU32 gfid
224 )
225 {
226 //
227 // TODO: Move the cpu invisible Heap construction out of BAR2 construction and into kbusPreInit
228 // so that virtual BAR2 can be constructed during kbusConstruct
229 //
230 // Setup eheap for Hidden bar2 space management only if Invisible region is required
231 // Hidden heap doesn't require any pre-allocated memory structs.
232 //
233 if (pKernelBus->virtualBar2[gfid].pVASpaceHiddenHeap == NULL)
234 {
235 if (pKernelBus->bar2[gfid].cpuInvisibleLimit > pKernelBus->bar2[gfid].cpuInvisibleBase)
236 {
237 pKernelBus->virtualBar2[gfid].pVASpaceHiddenHeap = portMemAllocNonPaged(sizeof(OBJEHEAP));
238 if (pKernelBus->virtualBar2[gfid].pVASpaceHiddenHeap == NULL)
239 {
240 NV_PRINTF(LEVEL_ERROR, "Unable to alloc hidden bar2 eheap!\n");
241 DBG_BREAKPOINT();
242 return NV_ERR_NO_MEMORY;
243 }
244 constructObjEHeap(pKernelBus->virtualBar2[gfid].pVASpaceHiddenHeap,
245 pKernelBus->bar2[gfid].cpuInvisibleBase,
246 (pKernelBus->bar2[gfid].cpuInvisibleLimit + 1), 0, 0);
247 }
248 }
249 return NV_OK;
250 }
251
252 /*!
253 * Internal function to destroy all heap objects under bar2
254 * @returns void
255 */
_kbusDestructVirtualBar2Heaps(KernelBus * pKernelBus,NvU32 gfid)256 static void _kbusDestructVirtualBar2Heaps(KernelBus *pKernelBus, NvU32 gfid)
257 {
258 if (NULL != pKernelBus->virtualBar2[gfid].pVASpaceHiddenHeap)
259 {
260 pKernelBus->virtualBar2[gfid].pVASpaceHiddenHeap->eheapDestruct(pKernelBus->virtualBar2[gfid].pVASpaceHiddenHeap);
261 portMemFree(pKernelBus->virtualBar2[gfid].pVASpaceHiddenHeap);
262 pKernelBus->virtualBar2[gfid].pVASpaceHiddenHeap = NULL;
263 }
264
265 if (NULL != pKernelBus->virtualBar2[gfid].pVASpaceHeap)
266 {
267 pKernelBus->virtualBar2[gfid].pVASpaceHeap->eheapDestruct(pKernelBus->virtualBar2[gfid].pVASpaceHeap);
268 portMemFree(pKernelBus->virtualBar2[gfid].pVASpaceHeap);
269 pKernelBus->virtualBar2[gfid].pVASpaceHeap = NULL;
270 }
271 }
272
273 /*!
274 * Internal function to clean up various mapping lists
275 * @returns void
276 */
_kbusDestructVirtualBar2Lists(KernelBus * pKernelBus,NvU32 gfid)277 static void _kbusDestructVirtualBar2Lists(KernelBus *pKernelBus, NvU32 gfid)
278 {
279 listDestroy(&pKernelBus->virtualBar2[gfid].freeMapList);
280 listDestroy(&pKernelBus->virtualBar2[gfid].cachedMapList);
281 listDestroy(&pKernelBus->virtualBar2[gfid].usedMapList);
282
283 portMemFree(pKernelBus->virtualBar2[gfid].pMapListMemory);
284 pKernelBus->virtualBar2[gfid].pMapListMemory = NULL;
285 }
286
287 /*!
288 * Clean-up and free virtual BAR2 SW resources
289 *
290 * @param[in] pGpu
291 * @param[in] pKernelBus
292 * @param[in] shutdown True if shutting down
293 * @param[in] gfid
294 *
295 * @returns void
296 */
297 void
kbusDestructVirtualBar2_VBAR2(OBJGPU * pGpu,KernelBus * pKernelBus,NvBool shutdown,NvU32 gfid)298 kbusDestructVirtualBar2_VBAR2(OBJGPU *pGpu, KernelBus *pKernelBus, NvBool shutdown, NvU32 gfid)
299 {
300 kbusFlushVirtualBar2_HAL(pGpu, pKernelBus, shutdown, gfid);
301 _kbusDestructVirtualBar2Lists(pKernelBus, gfid);
302 _kbusDestructVirtualBar2Heaps(pKernelBus, gfid);
303
304 if (IS_GFID_PF(gfid))
305 {
306 NV_PRINTF(LEVEL_INFO,
307 "MapCount: %d Bar2 Hits: %d Evictions: %d\n",
308 pKernelBus->virtualBar2[gfid].mapCount,
309 pKernelBus->virtualBar2[gfid].cacheHit,
310 pKernelBus->virtualBar2[gfid].evictions);
311 }
312 }
313
314 /*!
315 * Clean-up virtual cache structures.
316 *
317 * Verify that there are no leaked or unreleased mappings.
318 *
319 * When shutting down the RM we should not have any outstanding memory descriptors
320 * remaining in BAR2, so allow an error check for this.
321 *
322 * When suspending we only need to release them as a memory descriptor may live across
323 * a resume, but we don't want to save the BAR2 mappings as BAR2 is destroyed and
324 * rebuilt on resume. We use this call directly on suspend as we don't need to reclaim
325 * data structures, just flush the cached mappings.
326 *
327 * @param[in] pGpu
328 * @param[in] pKernelBus
329 * @param[in] True if shutting down
330 *
331 * @returns None
332 */
333 void
kbusFlushVirtualBar2_VBAR2(OBJGPU * pGpu,KernelBus * pKernelBus,NvBool shutdown,NvU32 gfid)334 kbusFlushVirtualBar2_VBAR2(OBJGPU *pGpu, KernelBus *pKernelBus, NvBool shutdown, NvU32 gfid)
335 {
336 if (IS_GFID_VF(gfid))
337 {
338 return;
339 }
340
341 //
342 // There should be no there are no active BAR2 mappings on shutdown. Failure indicates
343 // there is a missing unmap BAR2 call somewhere in RM.
344 //
345 NV_ASSERT(listCount(&pKernelBus->virtualBar2[gfid].usedMapList) == 0);
346
347 // There should be no unreleased mappings at shutdown
348 NV_ASSERT(!shutdown || (listCount(&pKernelBus->virtualBar2[gfid].cachedMapList) == 0));
349
350 // Release memory descriptors we still have cached
351 while (listCount(&pKernelBus->virtualBar2[gfid].cachedMapList))
352 {
353 VirtualBar2MapEntry *pMap = listHead(&pKernelBus->virtualBar2[gfid].cachedMapList);
354
355 NV_ASSERT(pMap->pMemDesc != NULL);
356
357 _freeRmApertureMap_VBAR2(pGpu, pKernelBus, pMap,
358 UPDATE_RM_APERTURE_FLAGS_INVALIDATE | UPDATE_RM_APERTURE_FLAGS_DISCARD);
359 }
360 }
361
362 /*!
363 * @brief one-time init of BAR2 Virtual Memory Manager.
364 *
365 * Sets up CPU pointer to the page tables at the top of FB.
366 *
367 * @param[in] pGpu
368 * @param[in] pKernelBus
369 *
370 * @returns NV_OK on success, relevant error code otherwise
371 */
372 NV_STATUS
kbusInitVirtualBar2_VBAR2(OBJGPU * pGpu,KernelBus * pKernelBus)373 kbusInitVirtualBar2_VBAR2
374 (
375 OBJGPU *pGpu,
376 KernelBus *pKernelBus
377 )
378 {
379 NvU32 gfid;
380 MEMORY_DESCRIPTOR *pMemDesc;
381 NV_STATUS status = NV_OK;
382
383 NV_ASSERT_OK_OR_RETURN(vgpuGetCallingContextGfid(pGpu, &gfid));
384
385 pMemDesc = pKernelBus->virtualBar2[gfid].pPageLevelsMemDesc;
386
387 if (kbusIsBarAccessBlocked(pKernelBus))
388 {
389 return NV_OK;
390 }
391
392 if ((pMemDesc != NULL) &&
393 (memdescGetAddressSpace(pMemDesc) == ADDR_FBMEM))
394 {
395 // Get a CPU pointer to BAR2 page tables
396 pKernelBus->virtualBar2[gfid].pPageLevels =
397 memmgrMemDescBeginTransfer(GPU_GET_MEMORY_MANAGER(pGpu),
398 pMemDesc,
399 TRANSFER_FLAGS_PERSISTENT_CPU_MAPPING);
400 NV_ASSERT_OR_RETURN(pKernelBus->virtualBar2[gfid].pPageLevels,
401 NV_ERR_INSUFFICIENT_RESOURCES);
402 }
403 else
404 {
405 //
406 // In SYSMEM, page level instances are allocated one at a time. It is
407 // not guaranteed that they are contiguous. Thus, SYSMEM page level
408 // instances are dynamically mapped-in via memmap as needed instead of
409 // having one static mapping.
410 //
411 NV_ASSERT(pMemDesc == NULL);
412 pKernelBus->virtualBar2[gfid].pPageLevels = NULL;
413 }
414
415 return status;
416 }
417
418 /*!
419 * @brief Sets up CPU pointer to the temporary page tables setup at
420 * the bottom of FB.
421 *
422 * Sets up CPU pointer to the temporary page tables at the bottom of FB.
423 *
424 * @param[in] pGpu
425 * @param[in] pKernelBus
426 *
427 * @returns NV_OK on success, relevant error code otherwise
428 */
429 NV_STATUS
kbusPreInitVirtualBar2_VBAR2(OBJGPU * pGpu,KernelBus * pKernelBus)430 kbusPreInitVirtualBar2_VBAR2
431 (
432 OBJGPU *pGpu,
433 KernelBus *pKernelBus
434 )
435 {
436 NvU32 gfid;
437 MEMORY_DESCRIPTOR *pMemDesc;
438 NV_STATUS status = NV_OK;
439
440 NV_ASSERT_OK_OR_RETURN(vgpuGetCallingContextGfid(pGpu, &gfid));
441
442 pMemDesc = pKernelBus->virtualBar2[gfid].pPageLevelsMemDescForBootstrap;
443
444 if (kbusIsBarAccessBlocked(pKernelBus))
445 {
446 return NV_OK;
447 }
448
449 if ((pMemDesc != NULL) &&
450 (memdescGetAddressSpace(pMemDesc) == ADDR_FBMEM))
451 {
452 // Get a fast CPU pointer to BAR2 page tables (either direct or BAR2).
453 pKernelBus->virtualBar2[gfid].pPageLevelsForBootstrap = kbusMapRmAperture_HAL(pGpu,
454 pMemDesc);
455 NV_ASSERT_OR_RETURN(pKernelBus->virtualBar2[gfid].pPageLevelsForBootstrap,
456 NV_ERR_INSUFFICIENT_RESOURCES);
457 }
458
459 return status;
460 }
461
462 /*!
463 * @brief Helper routine to clean-up a unreferenced mapping
464 *
465 * Mapping will be moved from the cached list to the free list.
466 *
467 * @param[in] pGpu
468 * @param[in] pKernelBus
469 * @param[in] pMap Mapping to delete
470 * @param[in] flags Flags for kbusUpdateRmAperture_HAL
471 *
472 * @returns None
473 */
474 static void
_freeRmApertureMap_VBAR2(OBJGPU * pGpu,KernelBus * pKernelBus,VirtualBar2MapEntry * pMap,NvU32 flags)475 _freeRmApertureMap_VBAR2
476 (
477 OBJGPU *pGpu,
478 KernelBus *pKernelBus,
479 VirtualBar2MapEntry *pMap,
480 NvU32 flags
481 )
482 {
483 OBJEHEAP *pVASpaceHeap = pKernelBus->virtualBar2[GPU_GFID_PF].pVASpaceHeap;
484 EMEMBLOCK *pBlockFree;
485 NvU64 vAddr, vAddrSize;
486
487 listRemove(&pKernelBus->virtualBar2[GPU_GFID_PF].cachedMapList, pMap);
488
489 if (pKernelBus->virtualBar2[GPU_GFID_PF].pCpuMapping)
490 {
491 pBlockFree = pVASpaceHeap->eheapGetBlock(pVASpaceHeap, pMap->vAddr, NV_FALSE);
492
493 if (pBlockFree != NULL)
494 {
495 vAddr = pBlockFree->begin;
496 vAddrSize = pBlockFree->end - vAddr + 1;
497
498 kbusUpdateRmAperture_HAL(pGpu, pKernelBus,
499 pMap->pMemDesc, vAddr, vAddrSize, flags);
500 }
501 }
502
503 pVASpaceHeap->eheapFree(pVASpaceHeap, pMap->vAddr);
504
505 memdescRemoveDestroyCallback(pMap->pMemDesc, &pMap->memDescCallback);
506 pMap->pMemDesc = NULL;
507
508 listPrependExisting(&pKernelBus->virtualBar2[GPU_GFID_PF].freeMapList, pMap);
509 }
510
511 /*!
512 * Second level of the RmAperture support for when a mapping is going to be in BAR2.
513 *
514 * Multiple mappings of a single MEMORY_DESCRIPTOR is now refernced counted in
515 * the memory descriptor code.
516 *
517 * If this requests needs to update PTEs, call kbusUpdateRmAperture().
518 *
519 * It operates on a single GPU. SLI is handled above this call.
520 *
521 * @param[in] pGpu
522 * @param[in] pKernelBus
523 * @param[in] pMemDesc The memory descriptor being mapped
524 *
525 * @returns A CPU pointer to the memory
526 */
527 static NvU8 *
kbusMapBar2ApertureCached_VBAR2(OBJGPU * pGpu,KernelBus * pKernelBus,MEMORY_DESCRIPTOR * pMemDesc,NvU32 flags)528 kbusMapBar2ApertureCached_VBAR2
529 (
530 OBJGPU *pGpu,
531 KernelBus *pKernelBus,
532 MEMORY_DESCRIPTOR *pMemDesc,
533 NvU32 flags
534 )
535 {
536 VirtualBar2MapEntry *pMapNew;
537 OBJEHEAP *pVASpaceHeap = NULL;
538 NvU64 vAddr = 0;
539 NvU32 allocFlags = 0;
540 NvU64 allocSize = 0;
541 NvBool bEvictNeeded = NV_FALSE;
542 VirtualBar2MapListIter it;
543
544 NV_ASSERT(pMemDesc->pGpu == pGpu);
545
546 #if NV_PRINTF_ENABLED
547 pKernelBus->virtualBar2[GPU_GFID_PF].mapCount++;
548 #endif
549
550 //
551 // Reject a illegal memdesc. Mappings that are too big will fail when
552 // they can't find space in the eheap.
553 //
554 NV_ASSERT_OR_RETURN((pMemDesc->Size != 0) && (pMemDesc->PageCount != 0), NULL);
555
556 NV_ASSERT_OR_RETURN(pKernelBus->virtualBar2[GPU_GFID_PF].pVASpaceHeap != NULL, NULL);
557
558 //
559 // Check the cached list for a recently used mapping
560 //
561 it = listIterAll(&pKernelBus->virtualBar2[GPU_GFID_PF].cachedMapList);
562 while (listIterNext(&it))
563 {
564 VirtualBar2MapEntry *pMap = it.pValue;
565
566 NV_ASSERT(pMap->pMemDesc);
567
568 if (pMap->pMemDesc == pMemDesc)
569 {
570 // Move the mapping from the cached list to the used list
571 listRemove(&pKernelBus->virtualBar2[GPU_GFID_PF].cachedMapList, pMap);
572 listPrependExisting(&pKernelBus->virtualBar2[GPU_GFID_PF].usedMapList, pMap);
573
574 #if NV_PRINTF_ENABLED
575 pKernelBus->virtualBar2[GPU_GFID_PF].cacheHit++;
576 #endif
577 return pMap->pRtnPtr;
578 }
579 }
580
581 //
582 // We didn't find an existing mapping. If there are no free mappings
583 // list entries available, bail here
584 //
585 if ((listCount(&pKernelBus->virtualBar2[GPU_GFID_PF].freeMapList) == 0) &&
586 (listCount(&pKernelBus->virtualBar2[GPU_GFID_PF].cachedMapList) == 0))
587 {
588 NV_PRINTF(LEVEL_ERROR, "No free bar2 mapping struct left!\n");
589 DBG_BREAKPOINT();
590 return NULL;
591 }
592
593 //
594 // Pack persistent mappings at the end of BAR2 space to avoid
595 // fragmentation.
596 //
597 if (flags & TRANSFER_FLAGS_PERSISTENT_CPU_MAPPING)
598 {
599 allocFlags |= NVOS32_ALLOC_FLAGS_FORCE_MEM_GROWS_DOWN;
600 }
601
602 //
603 // Allocate VA SPACE
604 //
605 pVASpaceHeap = pKernelBus->virtualBar2[GPU_GFID_PF].pVASpaceHeap;
606 allocSize = pMemDesc->PageCount << RM_PAGE_SHIFT;
607 bEvictNeeded =
608 (NV_OK != pVASpaceHeap->eheapAlloc(pVASpaceHeap, VAS_EHEAP_OWNER_NVRM,
609 &allocFlags, &vAddr, &allocSize,
610 pKernelBus->virtualBar2[GPU_GFID_PF].vAlignment,
611 pKernelBus->virtualBar2[GPU_GFID_PF].vAlignment,
612 NULL, NULL, NULL));
613
614 if (bEvictNeeded)
615 {
616 //
617 // Is a single mapping big enough to fit the new request? If so, lets evict it.
618 // Search in reverse to find the oldest mapping.
619 //
620 VirtualBar2MapEntry *pMap;
621
622 for (pMap = listTail(&pKernelBus->virtualBar2[GPU_GFID_PF].cachedMapList);
623 pMap != NULL;
624 pMap = listPrev(&pKernelBus->virtualBar2[GPU_GFID_PF].cachedMapList, pMap))
625 {
626 NV_ASSERT(pMap->pMemDesc != NULL);
627 if (pMap->pMemDesc->PageCount >= pMemDesc->PageCount)
628 {
629 #if NV_PRINTF_ENABLED
630 pKernelBus->virtualBar2[GPU_GFID_PF].evictions++;
631 #endif
632 _freeRmApertureMap_VBAR2(pGpu, pKernelBus, pMap,
633 UPDATE_RM_APERTURE_FLAGS_INVALIDATE | UPDATE_RM_APERTURE_FLAGS_DISCARD);
634 bEvictNeeded = NV_FALSE;
635 break;
636 }
637 }
638
639 //
640 // If no single allocation has enough room, free all cached mappings and
641 // hope we get enough contiguous VASpace.
642 //
643 if (bEvictNeeded)
644 {
645 while (listCount(&pKernelBus->virtualBar2[GPU_GFID_PF].cachedMapList))
646 {
647 VirtualBar2MapEntry *pMap = listHead(&pKernelBus->virtualBar2[GPU_GFID_PF].cachedMapList);
648
649 NV_ASSERT(pMap->pMemDesc != NULL);
650
651 #if NV_PRINTF_ENABLED
652 pKernelBus->virtualBar2[GPU_GFID_PF].evictions++;
653 #endif
654 _freeRmApertureMap_VBAR2(pGpu, pKernelBus, pMap,
655 UPDATE_RM_APERTURE_FLAGS_INVALIDATE | UPDATE_RM_APERTURE_FLAGS_DISCARD);
656 }
657 }
658
659 // try to reallocate BAR2|CPU space via the eheap
660 if ( NV_OK != pVASpaceHeap->eheapAlloc(pVASpaceHeap, VAS_EHEAP_OWNER_NVRM, &allocFlags, &vAddr,
661 &allocSize,
662 pKernelBus->virtualBar2[GPU_GFID_PF].vAlignment,
663 pKernelBus->virtualBar2[GPU_GFID_PF].vAlignment,
664 NULL, NULL, NULL) )
665 {
666 NV_PRINTF(LEVEL_ERROR,
667 "Not enough contiguous BAR2 VA space left allocSize %llx!\n",
668 allocSize);
669 DBG_BREAKPOINT();
670 return NULL;
671 }
672 }
673
674 //
675 // Allocate pMap - evict oldest (last) cached entry if no free entries
676 //
677 if (listCount(&pKernelBus->virtualBar2[GPU_GFID_PF].freeMapList) == 0)
678 {
679 #if NV_PRINTF_ENABLED
680 pKernelBus->virtualBar2[GPU_GFID_PF].evictions++;
681 #endif
682 _freeRmApertureMap_VBAR2(pGpu, pKernelBus,
683 listTail(&pKernelBus->virtualBar2[GPU_GFID_PF].cachedMapList),
684 UPDATE_RM_APERTURE_FLAGS_INVALIDATE | UPDATE_RM_APERTURE_FLAGS_DISCARD);
685 }
686 pMapNew = listHead(&pKernelBus->virtualBar2[GPU_GFID_PF].freeMapList);
687
688 listRemove(&pKernelBus->virtualBar2[GPU_GFID_PF].freeMapList, pMapNew);
689
690 // Update the page tables
691 if (pKernelBus->virtualBar2[GPU_GFID_PF].pCpuMapping == NULL ||
692 (NV_OK != kbusUpdateRmAperture_HAL(pGpu, pKernelBus, pMemDesc, vAddr,
693 pMemDesc->PageCount * pMemDesc->pageArrayGranularity,
694 UPDATE_RM_APERTURE_FLAGS_INVALIDATE)))
695 {
696 pVASpaceHeap->eheapFree(pVASpaceHeap, vAddr);
697 listPrependExisting(&pKernelBus->virtualBar2[GPU_GFID_PF].freeMapList, pMapNew);
698 return NULL;
699 }
700
701 // Fill in the new mapping data
702 pMapNew->pRtnPtr = pKernelBus->virtualBar2[GPU_GFID_PF].pCpuMapping + NvU64_LO32(vAddr) + pMemDesc->PteAdjust; // CPU pointer
703 pMapNew->pMemDesc = pMemDesc;
704 pMapNew->vAddr = vAddr;
705
706 // Request notification when this memDesc is destroyed
707 pMapNew->memDescCallback.destroyCallback = &_kbusReleaseRmAperture_wrapper;
708 pMapNew->memDescCallback.pObject = (void *)pKernelBus;
709 memdescAddDestroyCallback(pMemDesc, &pMapNew->memDescCallback);
710
711 listPrependExisting(&pKernelBus->virtualBar2[GPU_GFID_PF].usedMapList, pMapNew);
712
713 return pMapNew->pRtnPtr;
714 }
715
716 /*!
717 * This is a wrapper function to trigger kbusReleaseRmAperture_HAL().
718 * This must be kept compat with MemDescDestroyCallBack.
719 */
720 static void
_kbusReleaseRmAperture_wrapper(OBJGPU * pGpu,void * pObject,MEMORY_DESCRIPTOR * pMemDesc)721 _kbusReleaseRmAperture_wrapper
722 (
723 OBJGPU *pGpu,
724 void *pObject,
725 MEMORY_DESCRIPTOR *pMemDesc
726 )
727 {
728 KernelBus *pKernelBus = reinterpretCast(pObject, KernelBus*);
729 kbusReleaseRmAperture_HAL(ENG_GET_GPU(pKernelBus), pKernelBus, pMemDesc);
730 }
731
732 /*!
733 * Second level of the RmAperture support for when a mapping is going to be in BAR2.
734 * We don't update PTEs here unless SPARSIFY flag is passed, just leave the mapping
735 * cached and move on. This is faster and we may get to reuse them later.
736 *
737 * This is common code shared by all chips after NV50
738 *
739 * @param[out] pGpu
740 * @param[in] pKernelBus
741 * @param[in] pMemDesc Memory descriptor to unmap
742 * @param[in] flags TRANSFER_FLAGS
743 * @returns None
744 */
745 static void
kbusUnmapBar2ApertureCached_VBAR2(OBJGPU * pGpu,KernelBus * pKernelBus,MEMORY_DESCRIPTOR * pMemDesc,NvU32 flags)746 kbusUnmapBar2ApertureCached_VBAR2
747 (
748 OBJGPU *pGpu,
749 KernelBus *pKernelBus,
750 MEMORY_DESCRIPTOR *pMemDesc,
751 NvU32 flags
752 )
753 {
754 VirtualBar2MapListIter it;
755
756 it = listIterAll(&pKernelBus->virtualBar2[GPU_GFID_PF].usedMapList);
757 while (listIterNext(&it))
758 {
759 VirtualBar2MapEntry *pMap = it.pValue;
760
761 if (pMap->pMemDesc == pMemDesc)
762 {
763 //
764 // Remove from used list and move to the end start of the cached list.
765 // Remapping of recent buffers is common.
766 //
767 listRemove(&pKernelBus->virtualBar2[GPU_GFID_PF].usedMapList, pMap);
768 listPrependExisting(&pKernelBus->virtualBar2[GPU_GFID_PF].cachedMapList, pMap);
769
770 if (flags & TRANSFER_FLAGS_DESTROY_MAPPING)
771 {
772 _freeRmApertureMap_VBAR2(pGpu, pKernelBus, pMap,
773 UPDATE_RM_APERTURE_FLAGS_INVALIDATE | UPDATE_RM_APERTURE_FLAGS_SPARSIFY);
774 }
775
776 return;
777 }
778 }
779
780 // Whoops, we didn't find the mapping region - something's wrong!
781 NV_PRINTF(LEVEL_ERROR, "can't find mapping struct!\n");
782 DBG_BREAKPOINT();
783 }
784
785 /*!
786 * @brief Rubber-stamp scratch mapping as valid
787 */
788 NvU8 *
kbusValidateBar2ApertureMapping_SCRATCH(OBJGPU * pGpu,KernelBus * pKernelBus,MEMORY_DESCRIPTOR * pMemDesc,NvU8 * pCpu)789 kbusValidateBar2ApertureMapping_SCRATCH
790 (
791 OBJGPU *pGpu,
792 KernelBus *pKernelBus,
793 MEMORY_DESCRIPTOR *pMemDesc,
794 NvU8 *pCpu
795 )
796 {
797 return pCpu;
798 }
799
800 /*!
801 * @brief validate existing BAR2 mapping is still valid vs GPU reset
802 *
803 * @returns Existing or updated scratch buffer pointer
804 */
805 NvU8 *
kbusValidateBar2ApertureMapping_VBAR2(OBJGPU * pGpu,KernelBus * pKernelBus,MEMORY_DESCRIPTOR * pMemDesc,NvU8 * pCpu)806 kbusValidateBar2ApertureMapping_VBAR2
807 (
808 OBJGPU *pGpu,
809 KernelBus *pKernelBus,
810 MEMORY_DESCRIPTOR *pMemDesc,
811 NvU8 *pCpu
812 )
813 {
814 if (API_GPU_IN_RESET_SANITY_CHECK(pGpu) &&
815 !memdescGetFlag(pMemDesc, MEMDESC_FLAGS_GPU_IN_RESET))
816 {
817 //
818 // Release existing mapping and replace it with a new mapping.
819 //
820 // The callee is responsbile for updating the pointer
821 // after it is validated. We cannot handle fixing stale
822 // pointers to allocated before a GPU reset here.
823 //
824 kbusUnmapBar2ApertureWithFlags_HAL(pGpu, pKernelBus, pMemDesc, &pCpu,
825 TRANSFER_FLAGS_NONE);
826 return kbusMapBar2Aperture_HAL(pGpu, pKernelBus, pMemDesc,
827 TRANSFER_FLAGS_NONE);
828 }
829
830 return pCpu;
831 }
832
833 /*!
834 * @brief validate existing BAR2 mapping is still valid vs GPU reset
835 *
836 * @returns Existing or updated scratch buffer pointer
837 */
838 NvU8 *
kbusValidateBar2ApertureMapping_VBAR2_SRIOV(OBJGPU * pGpu,KernelBus * pKernelBus,MEMORY_DESCRIPTOR * pMemDesc,NvU8 * pCpu)839 kbusValidateBar2ApertureMapping_VBAR2_SRIOV
840 (
841 OBJGPU *pGpu,
842 KernelBus *pKernelBus,
843 MEMORY_DESCRIPTOR *pMemDesc,
844 NvU8 *pCpu
845 )
846 {
847 if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu) || gpuIsWarBug200577889SriovHeavyEnabled(pGpu))
848 return kbusValidateBar2ApertureMapping_SCRATCH(pGpu, pKernelBus, pMemDesc, pCpu);
849
850 return kbusValidateBar2ApertureMapping_VBAR2(pGpu, pKernelBus, pMemDesc, pCpu);
851 }
852
853 /*!
854 * @brief Fake BAR2 map API to a scratch buffer.
855 *
856 * Use for old VGPU w/o SRIOV guard cases, and when we are recovering from TDR.
857 */
858 NvU8 *
kbusMapBar2Aperture_SCRATCH(OBJGPU * pGpu,KernelBus * pKernelBus,MEMORY_DESCRIPTOR * pMemDesc,NvU32 flags)859 kbusMapBar2Aperture_SCRATCH
860 (
861 OBJGPU *pGpu,
862 KernelBus *pKernelBus,
863 MEMORY_DESCRIPTOR *pMemDesc,
864 NvU32 flags
865 )
866 {
867 if (pMemDesc->Size >= NV_U32_MAX)
868 {
869 return NULL;
870 }
871
872 return portMemAllocNonPaged(pMemDesc->Size);
873 }
874
875 /*!
876 * Dynamically map memory either a virtual BAR2 or with a directly CPU
877 * mapping. This is the HAL entry point.
878 *
879 * This is common code shared by all chips after NV50
880 *
881 * @param[in] pGpu
882 * @param[in] pKernelBus
883 * @param[in] pMemDesc Map this memory descriptor
884 * @param[in] flags Subset of TRANSFER_FLAGS
885 *
886 * @returns Master CPU pointer and an SLI set of CPU pointers
887 *
888 * @todo When using BAR2 this routine could not fail, but now with direct maps it can.
889 */
890 NvU8 *
kbusMapBar2Aperture_VBAR2(OBJGPU * pGpu,KernelBus * pKernelBus,MEMORY_DESCRIPTOR * pMemDesc,NvU32 flags)891 kbusMapBar2Aperture_VBAR2
892 (
893 OBJGPU *pGpu,
894 KernelBus *pKernelBus,
895 MEMORY_DESCRIPTOR *pMemDesc,
896 NvU32 flags
897 )
898 {
899 //
900 // Fail the mapping when BAR2 access to CPR vidmem is blocked (for HCC)
901 // It is however legal to allow non-CPR vidmem to be mapped to BAR2
902 // Certain mapping requests which arrive with a specific flag set are allowed
903 // to go through only in HCC devtools mode.
904 //
905 if (kbusIsBarAccessBlocked(pKernelBus) &&
906 (!gpuIsCCDevToolsModeEnabled(pGpu) || !(flags & TRANSFER_FLAGS_PREFER_PROCESSOR)) &&
907 !memdescGetFlag(pMemDesc, MEMDESC_FLAGS_ALLOC_IN_UNPROTECTED_MEMORY))
908 {
909 os_dump_stack();
910 NV_PRINTF(LEVEL_ERROR, "Cannot map/unmap CPR vidmem into/from BAR2\n");
911 return NULL;
912 }
913
914 if (API_GPU_IN_RESET_SANITY_CHECK(pGpu))
915 {
916 //
917 // If the gpu is no longer in a state where any gpu access is allowed,
918 // create some dummy system memory and return the pointer to the
919 // caller. All of the caller operations should now become nops. Only
920 // reads of this data might cause problems.
921 //
922 memdescSetFlag(pMemDesc, MEMDESC_FLAGS_GPU_IN_RESET, NV_TRUE);
923 return kbusMapBar2Aperture_SCRATCH(pGpu, pKernelBus, pMemDesc, flags);
924 }
925
926 #if 0 // Useful for finding leaks
927 NV_PRINTF(LEVEL_ERROR,
928 "memDesc %p from function %p\n",
929 pMemDesc, __builtin_return_address(0));
930 #endif
931
932 //
933 // Raise warning on encountering Reflected Mapping on setups with sysmem nvlink.
934 // On 0 FB systems, Reflected mapping may be used, so don't raise warning for that.
935 //
936 if ((memdescGetAddressSpace(pMemDesc) == ADDR_SYSMEM) &&
937 (pGpu->getProperty(pGpu, PDB_PROP_GPU_NVLINK_SYSMEM)) &&
938 !(pGpu->getProperty(pGpu, PDB_PROP_GPU_BROKEN_FB)))
939 {
940 //
941 // Reflected mapping is deprecated and may cause GPU to enter deadlock
942 // on certain systems and result into GPU fall off the bus. (B1829446)
943 // If you see any hangs after this print, please fix the allocation
944 // code in client for the memory tracked by this memDesc to avoid
945 // reflected mapping.
946 //
947 NV_PRINTF(LEVEL_ERROR,
948 "GPU %d: Warning: Reflected Mapping Found: MapType = BAR and "
949 "AddressSpace = SYSMEM.\n", pGpu->gpuInstance);
950 NV_ASSERT(0);
951 }
952
953 // Call the lower-level routine
954 return kbusMapBar2ApertureCached_VBAR2(pGpu, pKernelBus, pMemDesc, flags);
955 }
956
957 /*!
958 * @brief SRIOV BAR2 map filter to decide between SRIOV and classic VGPU behavior
959 *
960 * Turing/GA100 can run in both modes, so we need the dynamic check.
961 */
962 NvU8 *
kbusMapBar2Aperture_VBAR2_SRIOV(OBJGPU * pGpu,KernelBus * pKernelBus,MEMORY_DESCRIPTOR * pMemDesc,NvU32 flags)963 kbusMapBar2Aperture_VBAR2_SRIOV
964 (
965 OBJGPU *pGpu,
966 KernelBus *pKernelBus,
967 MEMORY_DESCRIPTOR *pMemDesc,
968 NvU32 flags
969 )
970 {
971 if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu) || gpuIsWarBug200577889SriovHeavyEnabled(pGpu))
972 return kbusMapBar2Aperture_SCRATCH(pGpu, pKernelBus, pMemDesc, flags);
973
974 return kbusMapBar2Aperture_VBAR2(pGpu, pKernelBus, pMemDesc, flags);
975 }
976
977 /*!
978 * @brief Fake BAR2 unmap API to a scratch buffer.
979 *
980 * Use for old VGPU w/o SRIOV guard cases, and when we are recovering from TDR.
981 */
982 void
kbusUnmapBar2ApertureWithFlags_SCRATCH(OBJGPU * pGpu,KernelBus * pKernelBus,MEMORY_DESCRIPTOR * pMemDesc,NvU8 ** pCpuPtr,NvU32 flags)983 kbusUnmapBar2ApertureWithFlags_SCRATCH
984 (
985 OBJGPU *pGpu,
986 KernelBus *pKernelBus,
987 MEMORY_DESCRIPTOR *pMemDesc,
988 NvU8 **pCpuPtr,
989 NvU32 flags
990 )
991 {
992 portMemFree(*pCpuPtr);
993 kbusFlush_HAL(pGpu, pKernelBus, kbusGetFlushAperture(pKernelBus, memdescGetAddressSpace(pMemDesc)));
994 }
995
996 /*!
997 * @brief Unmap instance memory, reversing kbusMapRmAperture_VBAR2
998 *
999 * If a Destroy flag is passed, actually clear the PTE mappings, and don't
1000 * leave on the cached free list.
1001 *
1002 * The value of *pCpuPtr must be the same as the value returned from
1003 * kbusMapRmAperture_VBAR2 when the original mapping was performed.
1004 *
1005 * @param[in] pGpu
1006 * @param[in] pKernelBus
1007 * @param[in] pMemDesc Unmap this memory descriptor
1008 * @param[in] pCpuPtr CPU VA previously returned by busMapRmAperture_VBAR2
1009 * @param[in] flags Bitfield of flags to perform various operations
1010 *
1011 * @returns None
1012 */
1013 void
kbusUnmapBar2ApertureWithFlags_VBAR2(OBJGPU * pGpu,KernelBus * pKernelBus,MEMORY_DESCRIPTOR * pMemDesc,NvU8 ** pCpuPtr,NvU32 flags)1014 kbusUnmapBar2ApertureWithFlags_VBAR2
1015 (
1016 OBJGPU *pGpu,
1017 KernelBus *pKernelBus,
1018 MEMORY_DESCRIPTOR *pMemDesc,
1019 NvU8 **pCpuPtr,
1020 NvU32 flags
1021 )
1022 {
1023 //
1024 // Fail the mapping when BAR2 access to CPR vidmem is blocked (for HCC)
1025 // It is however legal to allow non-CPR vidmem to be mapped to BAR2
1026 // Certain mapping requests which arrive with a specific flag set are allowed
1027 // to go through only in HCC devtools mode.
1028 //
1029 if (kbusIsBarAccessBlocked(pKernelBus) &&
1030 (!gpuIsCCDevToolsModeEnabled(pGpu) || !(flags & TRANSFER_FLAGS_PREFER_PROCESSOR)) &&
1031 !memdescGetFlag(pMemDesc, MEMDESC_FLAGS_ALLOC_IN_UNPROTECTED_MEMORY))
1032 {
1033 NV_ASSERT(0);
1034 NV_PRINTF(LEVEL_ERROR, "Cannot map/unmap CPR vidmem into/from BAR2\n");
1035 return;
1036 }
1037
1038 //
1039 // Free the dummy data we allocated for handling a reset GPU.
1040 // Let a map created before the reset go through the normal path
1041 // to clear out the memory.
1042 //
1043 if (memdescGetFlag(pMemDesc, MEMDESC_FLAGS_GPU_IN_RESET))
1044 {
1045 kbusUnmapBar2ApertureWithFlags_SCRATCH(pGpu, pKernelBus, pMemDesc, pCpuPtr, flags);
1046 memdescSetFlag(pMemDesc, MEMDESC_FLAGS_GPU_IN_RESET, NV_FALSE);
1047 return;
1048 }
1049
1050 // Call the lower-level routine
1051 kbusUnmapBar2ApertureCached_VBAR2(pGpu, pKernelBus, pMemDesc, flags);
1052 }
1053
1054 /*!
1055 * @brief SRIOV BAR2 unmap filter to decide between SRIOV and classic VGPU behavior
1056 *
1057 * Turing/GA100 can run in both modes, so we need the dynamic check.
1058 */
1059 void
kbusUnmapBar2ApertureWithFlags_VBAR2_SRIOV(OBJGPU * pGpu,KernelBus * pKernelBus,MEMORY_DESCRIPTOR * pMemDesc,NvU8 ** pCpuPtr,NvU32 flags)1060 kbusUnmapBar2ApertureWithFlags_VBAR2_SRIOV
1061 (
1062 OBJGPU *pGpu,
1063 KernelBus *pKernelBus,
1064 MEMORY_DESCRIPTOR *pMemDesc,
1065 NvU8 **pCpuPtr,
1066 NvU32 flags
1067 )
1068 {
1069 // If SR-IOV is enabled, BAR2 mappings are managed by the guest.
1070 if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu) || gpuIsWarBug200577889SriovHeavyEnabled(pGpu))
1071 {
1072 kbusUnmapBar2ApertureWithFlags_SCRATCH(pGpu, pKernelBus, pMemDesc, pCpuPtr, flags);
1073 return;
1074 }
1075
1076 kbusUnmapBar2ApertureWithFlags_VBAR2(pGpu, pKernelBus, pMemDesc, pCpuPtr, flags);
1077 }
1078
1079 /*!
1080 * Release cached memory descriptor so the memory descriptor can be freed.
1081 *
1082 * This is called from the memdescDestroy/memdescRelease path when ending the
1083 * life of a memory descriptor.
1084 *
1085 * We assume this should be on the free list and already unmapped. If this
1086 * doesn't happen it will show up as a leaked mapping when shutting down. On
1087 * debug drivers we check used list to help pinpoint source of a leaked
1088 * mapping.
1089 *
1090 * @param[in] pGpu
1091 * @param[in] pKernelBus
1092 * @param[in] pMemDesc Map this memory descriptor
1093 */
1094 void
kbusReleaseRmAperture_VBAR2(OBJGPU * pGpu,KernelBus * pKernelBus,MEMORY_DESCRIPTOR * pMemDesc)1095 kbusReleaseRmAperture_VBAR2
1096 (
1097 OBJGPU *pGpu,
1098 KernelBus *pKernelBus,
1099 MEMORY_DESCRIPTOR *pMemDesc
1100 )
1101 {
1102 VirtualBar2MapListIter it;
1103
1104 it = listIterAll(&pKernelBus->virtualBar2[GPU_GFID_PF].cachedMapList);
1105 while (listIterNext(&it))
1106 {
1107 VirtualBar2MapEntry *pMap = it.pValue;
1108
1109 if (pMap->pMemDesc == pMemDesc)
1110 {
1111 _freeRmApertureMap_VBAR2(pGpu, pKernelBus, pMap,
1112 UPDATE_RM_APERTURE_FLAGS_INVALIDATE | UPDATE_RM_APERTURE_FLAGS_DISCARD);
1113 return;
1114 }
1115 }
1116
1117 #ifdef DEBUG
1118 it = listIterAll(&pKernelBus->virtualBar2[GPU_GFID_PF].usedMapList);
1119 while (listIterNext(&it))
1120 {
1121 VirtualBar2MapEntry *pMap = it.pValue;
1122
1123 if (pMap->pMemDesc == pMemDesc)
1124 {
1125 NV_PRINTF(LEVEL_ERROR,
1126 "Leaked mapping detected. Mapping not unmapped before memdescDestroy call.\n");
1127 DBG_BREAKPOINT();
1128
1129 // Must be on cached listed to be freed
1130 listRemove(&pKernelBus->virtualBar2[GPU_GFID_PF].usedMapList, pMap);
1131 listAppendExisting(&pKernelBus->virtualBar2[GPU_GFID_PF].cachedMapList, pMap);
1132
1133 _freeRmApertureMap_VBAR2(pGpu, pKernelBus, pMap,
1134 UPDATE_RM_APERTURE_FLAGS_INVALIDATE | UPDATE_RM_APERTURE_FLAGS_DISCARD);
1135
1136 break;
1137 }
1138 }
1139 #endif
1140 }
1141
kbusMapCpuInvisibleBar2Aperture_VBAR2(OBJGPU * pGpu,KernelBus * pKernelBus,PMEMORY_DESCRIPTOR pMemDesc,NvU64 * pVaddr,NvU64 allocSize,NvU32 allocFlags,NvU32 gfid)1142 NV_STATUS kbusMapCpuInvisibleBar2Aperture_VBAR2
1143 (
1144 OBJGPU *pGpu,
1145 KernelBus *pKernelBus,
1146 PMEMORY_DESCRIPTOR pMemDesc,
1147 NvU64 *pVaddr,
1148 NvU64 allocSize,
1149 NvU32 allocFlags,
1150 NvU32 gfid
1151 )
1152 {
1153 OBJEHEAP *pVASpaceHiddenHeap = pKernelBus->virtualBar2[gfid].pVASpaceHiddenHeap;
1154 NV_STATUS status;
1155
1156 status = pVASpaceHiddenHeap->eheapAlloc(pVASpaceHiddenHeap, VAS_EHEAP_OWNER_NVRM,
1157 &allocFlags, pVaddr, &allocSize,
1158 pKernelBus->virtualBar2[gfid].vAlignment,
1159 pKernelBus->virtualBar2[gfid].vAlignment,
1160 NULL, NULL, NULL);
1161
1162 if (status != NV_OK)
1163 {
1164 goto done;
1165 }
1166
1167 if (IS_GFID_VF(gfid) && (pKernelBus->virtualBar2[gfid].pPageLevels == NULL))
1168 {
1169 pKernelBus->virtualBar2[gfid].pPageLevels = kbusMapRmAperture_HAL(pGpu,
1170 pKernelBus->virtualBar2[gfid].pPageLevelsMemDesc);
1171 NV_ASSERT_OR_RETURN(pKernelBus->virtualBar2[gfid].pPageLevels,
1172 NV_ERR_INSUFFICIENT_RESOURCES);
1173 }
1174
1175 status = kbusUpdateRmAperture_HAL(pGpu, pKernelBus, pMemDesc, *pVaddr,
1176 pMemDesc->PageCount * pMemDesc->pageArrayGranularity, UPDATE_RM_APERTURE_FLAGS_INVALIDATE |
1177 UPDATE_RM_APERTURE_FLAGS_CPU_INVISIBLE_RANGE);
1178
1179 if (IS_GFID_VF(gfid) && (pKernelBus->virtualBar2[gfid].pPageLevels != NULL))
1180 {
1181 kbusUnmapRmAperture_HAL(pGpu,
1182 pKernelBus->virtualBar2[gfid].pPageLevelsMemDesc,
1183 &pKernelBus->virtualBar2[gfid].pPageLevels, NV_TRUE);
1184 pKernelBus->virtualBar2[gfid].pPageLevels = NULL;
1185 }
1186
1187 if (status != NV_OK)
1188 {
1189 pVASpaceHiddenHeap->eheapFree(pVASpaceHiddenHeap, *pVaddr);
1190 *pVaddr = 0;
1191 }
1192
1193 done:
1194 return status;
1195 }
1196
kbusUnmapCpuInvisibleBar2Aperture_VBAR2(OBJGPU * pGpu,KernelBus * pKernelBus,PMEMORY_DESCRIPTOR pMemDesc,NvU64 vAddr,NvU32 gfid)1197 void kbusUnmapCpuInvisibleBar2Aperture_VBAR2
1198 (
1199 OBJGPU *pGpu,
1200 KernelBus *pKernelBus,
1201 PMEMORY_DESCRIPTOR pMemDesc,
1202 NvU64 vAddr,
1203 NvU32 gfid
1204 )
1205 {
1206 OBJEHEAP *pVASpaceHiddenHeap = pKernelBus->virtualBar2[gfid].pVASpaceHiddenHeap;
1207
1208 if (!pVASpaceHiddenHeap)
1209 {
1210 return;
1211 }
1212
1213 pVASpaceHiddenHeap->eheapFree(pVASpaceHiddenHeap, vAddr);
1214 }
1215
1216 /*
1217 * @brief This function simply rewrites the PTEs for an already
1218 * existing mapping cached in the usedMapList.
1219 *
1220 * This is currently used for updating the PTEs in the BAR2 page
1221 * tables at the top of FB after bootstrapping is done. The PTEs
1222 * for this mapping may be already existing in the page tables at
1223 * the bottom of FB. But those PTEs will be discarded once migration
1224 * to the page tables at the top of FB is done. So, before switching
1225 * to the new page tables, we should be rewrite the PTEs so that the
1226 * cached mapping does not become invalid. The *only* use case currently
1227 * is the CPU pointer to the new page tables at the top of FB.
1228 *
1229 * @param[in] pGpu OBJGPU pointer
1230 * @param[in] pKernelBus KernelBus pointer
1231 * @param[in] pMemDesc MEMORY_DESCRIPTOR pointer.
1232 *
1233 * @return NV_OK if operation is OK
1234 * Error otherwise.
1235 */
1236 NV_STATUS
kbusRewritePTEsForExistingMapping_VBAR2(OBJGPU * pGpu,KernelBus * pKernelBus,PMEMORY_DESCRIPTOR pMemDesc)1237 kbusRewritePTEsForExistingMapping_VBAR2
1238 (
1239 OBJGPU *pGpu,
1240 KernelBus *pKernelBus,
1241 PMEMORY_DESCRIPTOR pMemDesc
1242 )
1243 {
1244 VirtualBar2MapListIter it;
1245
1246 it = listIterAll(&pKernelBus->virtualBar2[GPU_GFID_PF].usedMapList);
1247 while (listIterNext(&it))
1248 {
1249 VirtualBar2MapEntry *pMap = it.pValue;
1250
1251 if (pMap->pMemDesc == pMemDesc)
1252 {
1253 return kbusUpdateRmAperture_HAL(pGpu, pKernelBus, pMemDesc, pMap->vAddr,
1254 pMemDesc->Size, 0);
1255 }
1256 }
1257 return NV_ERR_INVALID_OPERATION;
1258 }
1259