1 /*
2 * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: MIT
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24
25 /***************************** HW State Routines ***************************\
26 * *
27 * Fabric Virtual Address Space Function Definitions. *
28 * *
29 \***************************************************************************/
30
31 #include "gpu/mmu/kern_gmmu.h"
32 #include "mem_mgr/vaspace.h"
33 #include "mem_mgr/fabric_vaspace.h"
34 #include "gpu/mem_mgr/mem_mgr.h"
35 #include "mem_mgr/gpu_vaspace.h"
36 #include "gpu/mem_mgr/virt_mem_allocator_common.h"
37 #include "os/os.h"
38 #include "gpu/bus/kern_bus.h"
39 #include "kernel/gpu/fifo/kernel_fifo.h"
40 #include "kernel/gpu/nvlink/kernel_nvlink.h"
41 #include "mmu/mmu_walk.h"
42 #include "lib/base_utils.h"
43 #include "class/cl90f1.h" // FERMI_VASPACE_A
44 #include "class/cl00fc.h" // FABRIC_VASPACE_A
45 #include "class/cl0040.h" // NV01_MEMORY_LOCAL_USER
46 #include "class/cl0080.h" // NV01_DEVICE_0
47 #include "deprecated/rmapi_deprecated.h"
48 #include "rmapi/rs_utils.h"
49 #include "vgpu/vgpu_events.h"
50 #include "mem_mgr/virt_mem_mgr.h"
51
52 #include "published/ampere/ga100/dev_mmu.h"
53 #include "vgpu/rpc.h"
54 #include "virtualization/hypervisor/hypervisor.h"
55
56
57
58 //
59 // TODO: To be removed when legacy FLA VAS (pKernelBus->flaInfo.pFlaVAS) is removed"
60 // The instance block is setup during kbusAllocateFlaVaspace_HAL(). However, we
61 // lazily bind it to the new fabric VAS when the very first NV_FABRIC_MEMORY
62 // allocations happens.
63 //
64 static NV_STATUS
_fabricvaspaceBindInstBlk(FABRIC_VASPACE * pFabricVAS)65 _fabricvaspaceBindInstBlk
66 (
67 FABRIC_VASPACE *pFabricVAS
68 )
69 {
70 OBJVASPACE *pVAS = staticCast(pFabricVAS, OBJVASPACE);
71 OBJGPU *pGpu = gpumgrGetGpu(gpumgrGetDefaultPrimaryGpu(pVAS->gpuMask));
72 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
73 KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
74 NV_STATUS status = NV_OK;
75
76 INST_BLK_INIT_PARAMS instblkParams;
77
78 if (!pKernelBus->flaInfo.bToggleBindPoint)
79 {
80 return NV_OK;
81 }
82
83 if (gvaspaceIsInUse(dynamicCast(pKernelBus->flaInfo.pFlaVAS, OBJGVASPACE)))
84 {
85 NV_PRINTF(LEVEL_ERROR,
86 "FabricVAS and FlaVAS cannot be used simultaneously! "
87 "Instance block setup for fabricVAS failed\n");
88 return NV_ERR_INVALID_OPERATION;
89 }
90
91 //
92 // Check if this is the first fabric vaspace allocation. If this is not the
93 // first allocation, instance block is already setup. Return NV_OK.
94 //
95 if (gvaspaceIsInUse(dynamicCast(pFabricVAS->pGVAS, OBJGVASPACE)))
96 {
97 return NV_OK;
98 }
99
100 // Unbind the instance block for FLA vaspace.
101 status = kbusSetupUnbindFla_HAL(pGpu, pKernelBus);
102 if (status != NV_OK)
103 {
104 NV_PRINTF(LEVEL_ERROR,
105 "Failed to unbind instance block for FlaVAS, status=0x%x\n",
106 status);
107 return status;
108 }
109
110 // Instantiate the instance block for fabric vaspace.
111 portMemSet(&instblkParams, 0, sizeof(instblkParams));
112 status = kgmmuInstBlkInit(pKernelGmmu, pKernelBus->flaInfo.pInstblkMemDesc,
113 pFabricVAS->pGVAS, FIFO_PDB_IDX_BASE,
114 &instblkParams);
115 if (status != NV_OK)
116 {
117 NV_PRINTF(LEVEL_ERROR,
118 "Failed to setup instance block for fabricVAS, status=0x%x\n",
119 status);
120 goto failed;
121 }
122
123 // Bind the instance block for fabric vaspace.
124 status = kbusSetupBindFla_HAL(pGpu, pKernelBus, pFabricVAS->gfid);
125 if (status != NV_OK)
126 {
127 NV_PRINTF(LEVEL_ERROR,
128 "Failed to bind instance block for fabricVAS, status=0x%x\n",
129 status);
130 goto failed;
131 }
132
133 return NV_OK;
134
135 failed:
136 // Instantiate the instance block for FLA vaspace.
137 portMemSet(&instblkParams, 0, sizeof(instblkParams));
138 NV_ASSERT(kgmmuInstBlkInit(pKernelGmmu, pKernelBus->flaInfo.pInstblkMemDesc,
139 pKernelBus->flaInfo.pFlaVAS, FIFO_PDB_IDX_BASE,
140 &instblkParams) == NV_OK);
141
142 // Bind the instance block for FLA vaspace.
143 NV_ASSERT(kbusSetupBindFla_HAL(pGpu, pKernelBus, pFabricVAS->gfid) == NV_OK);
144
145 return status;
146 }
147
148 //
149 // TODO: To be removed when legacy FLA VAS (pKernelBus->flaInfo.pFlaVAS)is removed"
150 // The instance block is unbind during kbusDestroyFla_HAL(). However, we unbind
151 // it here and bind back the instance block for the legacy FLA VAS after the
152 // last NV_FABRIC_MEMORY allocation is freed.
153 //
154 static void
_fabricvaspaceUnbindInstBlk(FABRIC_VASPACE * pFabricVAS)155 _fabricvaspaceUnbindInstBlk
156 (
157 FABRIC_VASPACE *pFabricVAS
158 )
159 {
160 OBJVASPACE *pVAS = staticCast(pFabricVAS, OBJVASPACE);
161 OBJGPU *pGpu = gpumgrGetGpu(gpumgrGetDefaultPrimaryGpu(pVAS->gpuMask));
162 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
163 KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
164 INST_BLK_INIT_PARAMS instblkParams = {0};
165
166 if (!pKernelBus->flaInfo.bToggleBindPoint)
167 {
168 return;
169 }
170
171 //
172 // Check if there are any pending allocations for the fabric vaspace.
173 // If there are pending allocations, skip restore and return NV_OK.
174 //
175 if (gvaspaceIsInUse(dynamicCast(pFabricVAS->pGVAS, OBJGVASPACE)))
176 {
177 return;
178 }
179
180 // Unbind the instance block for fabric vaspace.
181 NV_ASSERT(kbusSetupUnbindFla_HAL(pGpu, pKernelBus) == NV_OK);
182
183 if (pKernelBus->flaInfo.pFlaVAS != NULL)
184 {
185 // Instantiate the instance block for FLA vaspace.
186 NV_ASSERT(kgmmuInstBlkInit(pKernelGmmu,
187 pKernelBus->flaInfo.pInstblkMemDesc,
188 pKernelBus->flaInfo.pFlaVAS,
189 FIFO_PDB_IDX_BASE,
190 &instblkParams) == NV_OK);
191
192 // Bind the instance block for FLA vaspace.
193 NV_ASSERT(kbusSetupBindFla_HAL(pGpu, pKernelBus,
194 pFabricVAS->gfid) == NV_OK);
195 }
196 }
197
198 NV_STATUS
fabricvaspaceConstruct__IMPL(FABRIC_VASPACE * pFabricVAS,NvU32 classId,NvU32 vaspaceId,NvU64 vaStart,NvU64 vaLimit,NvU64 vaStartInternal,NvU64 vaLimitInternal,NvU32 flags)199 fabricvaspaceConstruct__IMPL
200 (
201 FABRIC_VASPACE *pFabricVAS,
202 NvU32 classId,
203 NvU32 vaspaceId,
204 NvU64 vaStart,
205 NvU64 vaLimit,
206 NvU64 vaStartInternal,
207 NvU64 vaLimitInternal,
208 NvU32 flags
209 )
210 {
211 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
212 OBJSYS *pSys = SYS_GET_INSTANCE();
213 OBJVMM *pVmm = SYS_GET_VMM(pSys);
214 OBJVASPACE *pVAS = staticCast(pFabricVAS, OBJVASPACE);
215 OBJGPU *pGpu = gpumgrGetGpu(gpumgrGetDefaultPrimaryGpu(pVAS->gpuMask));
216 NV_STATUS status = NV_OK;
217 NvHandle hClient = 0;
218 NvHandle hDevice = 0;
219 NV0080_ALLOC_PARAMETERS devAllocParams = { 0 };
220 NvU32 gfid = 0;
221
222 // Sanity check input parameters.
223 NV_ASSERT_OR_RETURN(FABRIC_VASPACE_A == classId, NV_ERR_INVALID_ARGUMENT);
224 NV_ASSERT_OR_RETURN(vaStart <= vaLimit, NV_ERR_INVALID_ARGUMENT);
225 NV_ASSERT_OR_RETURN(ONEBITSET(pVAS->gpuMask), NV_ERR_INVALID_ARGUMENT);
226 NV_ASSERT_OR_RETURN(vaspaceId == pGpu->gpuId, NV_ERR_INVALID_ARGUMENT);
227 NV_ASSERT_OK_OR_RETURN(vgpuGetCallingContextGfid(pGpu, &gfid));
228
229 status = pRmApi->AllocWithHandle(pRmApi, NV01_NULL_OBJECT,
230 NV01_NULL_OBJECT, NV01_NULL_OBJECT,
231 NV01_ROOT, &hClient, sizeof(hClient));
232 if (status != NV_OK)
233 {
234 NV_PRINTF(LEVEL_ERROR, "failed creating client, status=0x%x\n", status);
235 return status;
236 }
237
238 status = serverutilGenResourceHandle(hClient, &hDevice);
239 if (status != NV_OK)
240 {
241 NV_PRINTF(LEVEL_ERROR,
242 "failed creating device handle, status=0x%x\n", status);
243 goto cleanup;
244 }
245
246 // Allocate a device handle
247 devAllocParams.deviceId = gpuGetDeviceInstance(pGpu);
248 status = pRmApi->AllocWithHandle(pRmApi, hClient, hClient, hDevice,
249 NV01_DEVICE_0,
250 &devAllocParams, sizeof(devAllocParams));
251 if (status != NV_OK)
252 {
253 NV_PRINTF(LEVEL_ERROR, "failed creating device, status=0x%x\n", status);
254 goto cleanup;
255 }
256
257 // Save off flags.
258 pFabricVAS->flags = (flags |
259 VASPACE_FLAGS_ALLOW_ZERO_ADDRESS |
260 VASPACE_FLAGS_INVALIDATE_SCOPE_NVLINK_TLB |
261 VASPACE_FLAGS_DISABLE_SPLIT_VAS);
262
263 if (IS_GFID_VF(gfid))
264 {
265 pFabricVAS->gfid = gfid;
266 pFabricVAS->flags |= VASPACE_FLAGS_ALLOW_PAGES_IN_PHYS_MEM_SUBALLOCATOR;
267 }
268
269 pFabricVAS->bRpcAlloc = IS_VIRTUAL(pGpu) &&
270 gpuIsWarBug200577889SriovHeavyEnabled(pGpu);
271
272 // Create the GVASPACE object associated with this fabric vaspace.
273 status = vmmCreateVaspace(pVmm, FERMI_VASPACE_A, 0, pVAS->gpuMask,
274 vaStart, vaLimit, 0, 0, NULL, pFabricVAS->flags,
275 &pFabricVAS->pGVAS);
276 if (status != NV_OK)
277 {
278 NV_PRINTF(LEVEL_ERROR,
279 "Failed allocating gvaspace associated with the fabric vaspace, "
280 "status=0x%x\n", status);
281 goto cleanup;
282 }
283
284 pFabricVAS->hClient = hClient;
285 pFabricVAS->hDevice = hDevice;
286
287 // Capture the vasStart and vasLimit for the fabric vaspace.
288 pVAS->vasStart = pFabricVAS->pGVAS->vasStart;
289 pVAS->vasLimit = pFabricVAS->pGVAS->vasLimit;
290
291 return NV_OK;
292
293 cleanup:
294 NV_ASSERT(pRmApi->Free(pRmApi, hClient, hClient) == NV_OK);
295
296 return status;
297 }
298
299 void
fabricvaspaceDestruct_IMPL(FABRIC_VASPACE * pFabricVAS)300 fabricvaspaceDestruct_IMPL
301 (
302 FABRIC_VASPACE *pFabricVAS
303 )
304 {
305 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
306 OBJSYS *pSys = SYS_GET_INSTANCE();
307 OBJVMM *pVmm = SYS_GET_VMM(pSys);
308 OBJVASPACE *pVAS = staticCast(pFabricVAS, OBJVASPACE);
309
310 if (pFabricVAS->pGVAS == NULL)
311 return;
312
313 NV_ASSERT(pRmApi->Free(pRmApi, pFabricVAS->hClient,
314 pFabricVAS->hClient) == NV_OK);
315
316 // There should be no vaspace allocations pending at this point.
317 NV_ASSERT(!gvaspaceIsInUse(dynamicCast(pFabricVAS->pGVAS, OBJGVASPACE)));
318
319 // Destroy the GVASPACE object associated with this fabric vaspace.
320 vmmDestroyVaspace(pVmm, pFabricVAS->pGVAS);
321
322 pFabricVAS->pGVAS = NULL;
323 pVAS->vasStart = 0;
324 pVAS->vasLimit = 0;
325 }
326
327 NV_STATUS
fabricvaspaceAlloc_IMPL(FABRIC_VASPACE * pFabricVAS,NvU64 size,NvU64 align,NvU64 rangeLo,NvU64 rangeHi,NvU64 pageSize,VAS_ALLOC_FLAGS flags,NvU64 * pAddr)328 fabricvaspaceAlloc_IMPL
329 (
330 FABRIC_VASPACE *pFabricVAS,
331 NvU64 size,
332 NvU64 align,
333 NvU64 rangeLo,
334 NvU64 rangeHi,
335 NvU64 pageSize,
336 VAS_ALLOC_FLAGS flags,
337 NvU64 *pAddr
338 )
339 {
340 //
341 // TODO: If needed, can call into fabricvaspaceAllocNonContiguous_IMPL()
342 // by forcing contig flag.
343 //
344 return NV_ERR_NOT_SUPPORTED;
345 }
346
347 NV_STATUS
fabricvaspaceAllocNonContiguous_IMPL(FABRIC_VASPACE * pFabricVAS,NvU64 size,NvU64 align,NvU64 rangeLo,NvU64 rangeHi,NvU64 pageSize,VAS_ALLOC_FLAGS flags,NvU64 ** ppAddr,NvU32 * pNumAddr)348 fabricvaspaceAllocNonContiguous_IMPL
349 (
350 FABRIC_VASPACE *pFabricVAS,
351 NvU64 size,
352 NvU64 align,
353 NvU64 rangeLo,
354 NvU64 rangeHi,
355 NvU64 pageSize,
356 VAS_ALLOC_FLAGS flags,
357 NvU64 **ppAddr,
358 NvU32 *pNumAddr
359 )
360 {
361 NV_STATUS status = NV_OK;
362 NvU64 freeSize = 0;
363 NvU32 pageCount = (size / pageSize);
364 NvU64 addr;
365 NvU32 idx;
366 NvBool bDefaultAllocMode;
367
368 // Sanity check the input parameters.
369 NV_ASSERT_OR_RETURN(pFabricVAS->pGVAS != NULL, NV_ERR_OBJECT_NOT_FOUND);
370 NV_ASSERT_OR_RETURN(ppAddr != NULL, NV_ERR_INVALID_ARGUMENT);
371 NV_ASSERT_OR_RETURN(pNumAddr != NULL, NV_ERR_INVALID_ARGUMENT);
372 NV_ASSERT_OR_RETURN(pageSize >= RM_PAGE_SIZE_HUGE, NV_ERR_INVALID_ARGUMENT);
373 NV_ASSERT_OR_RETURN(align != 0, NV_ERR_INVALID_ARGUMENT);
374 NV_ASSERT_OR_RETURN(size != 0, NV_ERR_INVALID_ARGUMENT);
375
376 // Check the alignment and size are pageSize aligned.
377 NV_ASSERT_OR_RETURN(NV_IS_ALIGNED64(align, pageSize), NV_ERR_INVALID_ARGUMENT);
378 NV_ASSERT_OR_RETURN(NV_IS_ALIGNED64(size, pageSize), NV_ERR_INVALID_ARGUMENT);
379
380 // Check if heap can satisfy the request.
381 NV_ASSERT_OK_OR_RETURN(fabricvaspaceGetFreeHeap(pFabricVAS, &freeSize));
382 if (freeSize < size)
383 {
384 NV_PRINTF(LEVEL_ERROR,
385 "Not enough memory in eheap, size requested = 0x%llx, "
386 "free memory = 0x%llx\n",
387 size, freeSize);
388 return NV_ERR_NO_MEMORY;
389 }
390
391 if (flags.bForceNonContig && flags.bForceContig)
392 {
393 NV_PRINTF(LEVEL_ERROR,
394 "Forcing both contiguous and noncontiguous is not allowed\n");
395 return NV_ERR_INVALID_ARGUMENT;
396 }
397
398 bDefaultAllocMode = (!flags.bForceNonContig && !flags.bForceContig);
399
400 // Adjust rangeLo and rangeHi.
401 rangeLo = NV_ALIGN_DOWN(rangeLo, pageSize);
402 rangeHi = NV_ALIGN_UP(rangeHi, pageSize);
403
404 *ppAddr = portMemAllocNonPaged(sizeof(NvU64) * pageCount);
405 if (*ppAddr == NULL)
406 {
407 return NV_ERR_NO_MEMORY;
408 }
409 portMemSet(*ppAddr, 0, sizeof(NvU64) * pageCount);
410
411 status = _fabricvaspaceBindInstBlk(pFabricVAS);
412 if (status != NV_OK)
413 {
414 NV_PRINTF(LEVEL_ERROR, "Failed to bind instance block for fabric vaspace."
415 " Alloc failed\n");
416 goto failed;
417 }
418
419 // Initialize number of addresses to 0
420 *pNumAddr = 0;
421
422 //
423 // Attempt to allocate VA space of the size and alignment requested.
424 //
425 // RM_PAGE_SIZE_HUGE is passed since FLA->PA mappings support minimum
426 // 2MB pagesize.
427 //
428 if (flags.bForceContig || bDefaultAllocMode)
429 {
430 status = vaspaceAlloc(pFabricVAS->pGVAS, size, align, rangeLo, rangeHi,
431 RM_PAGE_SIZE_HUGE, flags, &addr);
432 if (status == NV_OK)
433 {
434 (*ppAddr)[0] = addr;
435 *pNumAddr = 1;
436 }
437 else if (flags.bForceContig)
438 {
439 NV_PRINTF(LEVEL_ERROR, "Failed to allocate contig vaspace\n");
440 goto failed;
441 }
442 }
443
444 //
445 // If size could not be allocated in one memblock, break size into
446 // multiple pageSize chunks.
447 //
448 // RM_PAGE_SIZE_HUGE is passed since FLA->PA mappings support minimum
449 // 2MB pagesize.
450 //
451 if (flags.bForceNonContig || (bDefaultAllocMode && (status != NV_OK)))
452 {
453 for (idx = 0; idx < pageCount; idx++)
454 {
455 status = vaspaceAlloc(pFabricVAS->pGVAS, pageSize, align, rangeLo,
456 rangeHi, RM_PAGE_SIZE_HUGE, flags, &addr);
457 if (status == NV_OK)
458 {
459 // Assert that the address returned is pageSize aligned
460 NV_ASSERT(NV_IS_ALIGNED64(addr, pageSize));
461
462 (*ppAddr)[idx] = addr;
463 *pNumAddr = *pNumAddr + 1;
464 }
465 else
466 {
467 NV_PRINTF(LEVEL_ERROR, "Failed to allocate vaspace\n");
468 goto failed;
469 }
470 }
471 }
472
473 pFabricVAS->ucFabricFreeSize -= size;
474 pFabricVAS->ucFabricInUseSize += size;
475
476 return NV_OK;
477
478 failed:
479
480 fabricvaspaceBatchFree(pFabricVAS, *ppAddr, *pNumAddr, 1);
481 portMemFree(*ppAddr);
482 *ppAddr = NULL;
483 *pNumAddr = 0;
484
485 return status;
486 }
487
488 NV_STATUS
fabricvaspaceFree_IMPL(FABRIC_VASPACE * pFabricVAS,NvU64 vAddr)489 fabricvaspaceFree_IMPL
490 (
491 FABRIC_VASPACE *pFabricVAS,
492 NvU64 vAddr
493 )
494 {
495 OBJVASPACE *pVAS = staticCast(pFabricVAS, OBJVASPACE);
496 OBJGPU *pGpu = gpumgrGetGpu(gpumgrGetDefaultPrimaryGpu(pVAS->gpuMask));
497 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
498 NvU64 blockSize;
499 NvBool bUcFla;
500
501 NV_ASSERT_OR_RETURN(pFabricVAS->pGVAS != NULL, NV_ERR_OBJECT_NOT_FOUND);
502
503 bUcFla = (vAddr >= fabricvaspaceGetUCFlaStart(pFabricVAS) &&
504 vAddr < fabricvaspaceGetUCFlaLimit(pFabricVAS));
505
506 NV_ASSERT(vaspaceFreeV2(pFabricVAS->pGVAS, vAddr, &blockSize) == NV_OK);
507
508 kbusFlush_HAL(pGpu, pKernelBus, (BUS_FLUSH_VIDEO_MEMORY |
509 BUS_FLUSH_SYSTEM_MEMORY));
510
511 fabricvaspaceInvalidateTlb(pFabricVAS, pGpu, PTE_DOWNGRADE);
512
513 _fabricvaspaceUnbindInstBlk(pFabricVAS);
514
515 if (bUcFla)
516 {
517 pFabricVAS->ucFabricFreeSize += blockSize;
518 pFabricVAS->ucFabricInUseSize -= blockSize;
519 }
520
521 return NV_OK;
522 }
523
524 NV_STATUS
fabricvaspaceMap_IMPL(FABRIC_VASPACE * pFabricVAS,OBJGPU * pGpu,const NvU64 vaLo,const NvU64 vaHi,const MMU_MAP_TARGET * pTarget,const VAS_MAP_FLAGS flags)525 fabricvaspaceMap_IMPL
526 (
527 FABRIC_VASPACE *pFabricVAS,
528 OBJGPU *pGpu,
529 const NvU64 vaLo,
530 const NvU64 vaHi,
531 const MMU_MAP_TARGET *pTarget,
532 const VAS_MAP_FLAGS flags
533 )
534 {
535 return NV_ERR_NOT_SUPPORTED;
536 }
537
538 void
fabricvaspaceUnmap_IMPL(FABRIC_VASPACE * pFabricVAS,OBJGPU * pGpu,const NvU64 vaLo,const NvU64 vaHi)539 fabricvaspaceUnmap_IMPL
540 (
541 FABRIC_VASPACE *pFabricVAS,
542 OBJGPU *pGpu,
543 const NvU64 vaLo,
544 const NvU64 vaHi
545 )
546 {
547 return;
548 }
549
550 NV_STATUS
fabricvaspaceApplyDefaultAlignment_IMPL(FABRIC_VASPACE * pFabricVAS,const FB_ALLOC_INFO * pAllocInfo,NvU64 * pAlign,NvU64 * pSize,NvU64 * pPageSizeLockMask)551 fabricvaspaceApplyDefaultAlignment_IMPL
552 (
553 FABRIC_VASPACE *pFabricVAS,
554 const FB_ALLOC_INFO *pAllocInfo,
555 NvU64 *pAlign,
556 NvU64 *pSize,
557 NvU64 *pPageSizeLockMask
558 )
559 {
560 return NV_ERR_NOT_SUPPORTED;
561 }
562
563 NV_STATUS
fabricvaspaceGetVasInfo_IMPL(FABRIC_VASPACE * pFabricVAS,NV0080_CTRL_DMA_ADV_SCHED_GET_VA_CAPS_PARAMS * pParams)564 fabricvaspaceGetVasInfo_IMPL
565 (
566 FABRIC_VASPACE *pFabricVAS,
567 NV0080_CTRL_DMA_ADV_SCHED_GET_VA_CAPS_PARAMS *pParams
568 )
569 {
570 return NV_ERR_NOT_SUPPORTED;
571 }
572
573 NV_STATUS
fabricvaspacePinRootPageDir_IMPL(FABRIC_VASPACE * pFabricVAS,OBJGPU * pGpu)574 fabricvaspacePinRootPageDir_IMPL
575 (
576 FABRIC_VASPACE *pFabricVAS,
577 OBJGPU *pGpu
578 )
579 {
580 NV_ASSERT_OR_RETURN(pFabricVAS->pGVAS != NULL, NV_ERR_OBJECT_NOT_FOUND);
581
582 return vaspacePinRootPageDir(pFabricVAS->pGVAS, pGpu);
583 }
584
585 void
fabricvaspaceUnpinRootPageDir_IMPL(FABRIC_VASPACE * pFabricVAS,OBJGPU * pGpu)586 fabricvaspaceUnpinRootPageDir_IMPL
587 (
588 FABRIC_VASPACE *pFabricVAS,
589 OBJGPU *pGpu
590 )
591 {
592 NV_ASSERT(pFabricVAS->pGVAS != NULL);
593
594 vaspaceUnpinRootPageDir(pFabricVAS->pGVAS, pGpu);
595 }
596
597 NV_STATUS
fabricvaspaceGetFreeHeap_IMPL(FABRIC_VASPACE * pFabricVAS,NvU64 * freeSize)598 fabricvaspaceGetFreeHeap_IMPL
599 (
600 FABRIC_VASPACE *pFabricVAS,
601 NvU64 *freeSize
602 )
603 {
604 NV_ASSERT_OR_RETURN(pFabricVAS->pGVAS != NULL, NV_ERR_OBJECT_NOT_FOUND);
605 NV_ASSERT_OR_RETURN(freeSize != NULL, NV_ERR_INVALID_ARGUMENT);
606
607 *freeSize = pFabricVAS->ucFabricFreeSize;
608 return NV_OK;
609 }
610
611 void
fabricvaspaceBatchFree_IMPL(FABRIC_VASPACE * pFabricVAS,NvU64 * pAddr,NvU32 numAddr,NvU32 stride)612 fabricvaspaceBatchFree_IMPL
613 (
614 FABRIC_VASPACE *pFabricVAS,
615 NvU64 *pAddr,
616 NvU32 numAddr,
617 NvU32 stride
618 )
619 {
620 OBJVASPACE *pVAS = staticCast(pFabricVAS, OBJVASPACE);
621 OBJGPU *pGpu = gpumgrGetGpu(gpumgrGetDefaultPrimaryGpu(pVAS->gpuMask));
622 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
623 NvU64 totalFreeSize = 0;
624 NvU64 freeSize;
625 NvU32 count = 0;
626 NvU32 idx = 0;
627 NvBool bUcFla;
628
629
630 for (count = 0; count < numAddr; count++)
631 {
632 bUcFla = (pAddr[idx] >= fabricvaspaceGetUCFlaStart(pFabricVAS) &&
633 pAddr[idx] < fabricvaspaceGetUCFlaLimit(pFabricVAS));
634
635 NV_ASSERT(vaspaceFreeV2(pFabricVAS->pGVAS,
636 pAddr[idx], &freeSize) == NV_OK);
637
638 idx += stride;
639
640 if (bUcFla)
641 totalFreeSize += freeSize;
642 }
643
644 kbusFlush_HAL(pGpu, pKernelBus, (BUS_FLUSH_VIDEO_MEMORY |
645 BUS_FLUSH_SYSTEM_MEMORY));
646
647 fabricvaspaceInvalidateTlb(pFabricVAS, pGpu, PTE_DOWNGRADE);
648
649 _fabricvaspaceUnbindInstBlk(pFabricVAS);
650
651 pFabricVAS->ucFabricFreeSize += totalFreeSize;
652 pFabricVAS->ucFabricInUseSize -= totalFreeSize;
653 }
654
655 void
fabricvaspaceInvalidateTlb_IMPL(FABRIC_VASPACE * pFabricVAS,OBJGPU * pGpu,VAS_PTE_UPDATE_TYPE type)656 fabricvaspaceInvalidateTlb_IMPL
657 (
658 FABRIC_VASPACE *pFabricVAS,
659 OBJGPU *pGpu,
660 VAS_PTE_UPDATE_TYPE type
661 )
662 {
663 vaspaceInvalidateTlb(pFabricVAS->pGVAS, pGpu, type);
664 }
665
666 NV_STATUS
fabricvaspaceGetGpaMemdesc_IMPL(FABRIC_VASPACE * pFabricVAS,MEMORY_DESCRIPTOR * pFabricMemdesc,OBJGPU * pMappingGpu,MEMORY_DESCRIPTOR ** ppAdjustedMemdesc)667 fabricvaspaceGetGpaMemdesc_IMPL
668 (
669 FABRIC_VASPACE *pFabricVAS,
670 MEMORY_DESCRIPTOR *pFabricMemdesc,
671 OBJGPU *pMappingGpu,
672 MEMORY_DESCRIPTOR **ppAdjustedMemdesc
673 )
674 {
675 KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pMappingGpu);
676 MEMORY_DESCRIPTOR *pRootMemDesc = NULL;
677 NODE *pNode = NULL;
678 NV_STATUS status = NV_OK;
679 NvU64 rootOffset = 0;
680 NvBool bLoopbackSupported = NV_FALSE;
681
682 NV_ASSERT_OR_RETURN(ppAdjustedMemdesc != NULL, NV_ERR_INVALID_ARGUMENT);
683
684 {
685 bLoopbackSupported = pKernelNvlink != NULL &&
686 (knvlinkIsP2pLoopbackSupported(pMappingGpu, pKernelNvlink) ||
687 knvlinkIsForcedConfig(pMappingGpu, pKernelNvlink));
688 }
689
690 if (memdescGetAddressSpace(pFabricMemdesc) != ADDR_FABRIC_V2 ||
691 bLoopbackSupported)
692 {
693 *ppAdjustedMemdesc = pFabricMemdesc;
694 return NV_OK;
695 }
696
697 pRootMemDesc = memdescGetRootMemDesc(pFabricMemdesc, &rootOffset);
698
699 RmPhysAddr *pteArray = memdescGetPteArray(pRootMemDesc, AT_GPU);
700
701 // Check if pteArray[0] is within the VAS range for the mapping GPU.
702 if ((pteArray[0] < fabricvaspaceGetUCFlaStart(pFabricVAS)) ||
703 (pteArray[0] > fabricvaspaceGetUCFlaLimit(pFabricVAS)))
704 {
705 *ppAdjustedMemdesc = pFabricMemdesc;
706 return NV_OK;
707 }
708
709 //
710 // If the address space is of type ADDR_FABRIC_V2 then determine if the
711 // FLA import is on the mapping GPU. If FLA import is on the mapping GPU
712 // and NVLink P2P over loopback is not supported, then map GVA->PA directly.
713 // For discontiguous fabric memory allocation, searching for the first entry
714 // in the pteArray should be fine to determine if FLA import is on the
715 // mapping GPU.
716 //
717 NV_ASSERT_OK_OR_RETURN(btreeSearch(pteArray[0], &pNode,
718 pFabricVAS->pFabricVaToGpaMap));
719
720 FABRIC_VA_TO_GPA_MAP_NODE *pFabricNode =
721 (FABRIC_VA_TO_GPA_MAP_NODE *)pNode->Data;
722
723 //
724 // Create a sub-memdesc for the offset into the vidMemDesc where the GVA
725 // would be mapped. Note this includes two offsets:
726 // 1. Offset into the fabric memdesc where the GVA is mapped.
727 // 2. Offset into the physical vidmem memdesc where the fabric memory is
728 // mapped.
729 //
730 status = memdescCreateSubMem(ppAdjustedMemdesc, pFabricNode->pVidMemDesc,
731 pMappingGpu,
732 rootOffset + pFabricNode->offset,
733 memdescGetSize(pFabricMemdesc));
734 if (status != NV_OK)
735 {
736 NV_PRINTF(LEVEL_ERROR,
737 "Failed to create submMemdesc for the GVA->PA mapping\n");
738 return status;
739 }
740
741 return NV_OK;
742 }
743
744 void
fabricvaspacePutGpaMemdesc_IMPL(FABRIC_VASPACE * pFabricVAS,MEMORY_DESCRIPTOR * pMemDesc)745 fabricvaspacePutGpaMemdesc_IMPL
746 (
747 FABRIC_VASPACE *pFabricVAS,
748 MEMORY_DESCRIPTOR *pMemDesc
749 )
750 {
751 memdescDestroy(pMemDesc);
752 }
753
754 void
fabricvaspaceVaToGpaMapRemove_IMPL(FABRIC_VASPACE * pFabricVAS,NvU64 vAddr)755 fabricvaspaceVaToGpaMapRemove_IMPL
756 (
757 FABRIC_VASPACE *pFabricVAS,
758 NvU64 vAddr
759 )
760 {
761 FABRIC_VA_TO_GPA_MAP_NODE *pFabricNode = NULL;
762 NODE *pNode = NULL;
763
764 if (btreeSearch(vAddr, &pNode, pFabricVAS->pFabricVaToGpaMap) == NV_OK)
765 {
766 pFabricNode = (FABRIC_VA_TO_GPA_MAP_NODE *)pNode->Data;
767
768 btreeUnlink(&pFabricNode->Node, &pFabricVAS->pFabricVaToGpaMap);
769
770 portMemFree(pFabricNode);
771 }
772 }
773
774 NV_STATUS
fabricvaspaceVaToGpaMapInsert_IMPL(FABRIC_VASPACE * pFabricVAS,NvU64 vAddr,MEMORY_DESCRIPTOR * pVidMemDesc,NvU64 offset)775 fabricvaspaceVaToGpaMapInsert_IMPL
776 (
777 FABRIC_VASPACE *pFabricVAS,
778 NvU64 vAddr,
779 MEMORY_DESCRIPTOR *pVidMemDesc,
780 NvU64 offset
781 )
782 {
783 FABRIC_VA_TO_GPA_MAP_NODE *pFabricNode = NULL;
784 NV_STATUS status = NV_OK;
785
786 pFabricNode = portMemAllocNonPaged(sizeof(FABRIC_VA_TO_GPA_MAP_NODE));
787 if (pFabricNode == NULL)
788 return NV_ERR_NO_MEMORY;
789
790 portMemSet(pFabricNode, 0, sizeof(FABRIC_VA_TO_GPA_MAP_NODE));
791
792 pFabricNode->pVidMemDesc = pVidMemDesc;
793 pFabricNode->offset = offset;
794 pFabricNode->Node.keyStart = vAddr;
795 pFabricNode->Node.keyEnd = vAddr;
796 pFabricNode->Node.Data = pFabricNode;
797
798 // Insert into the btree tracking memory fabric allocations for this GPU.
799 status = btreeInsert(&pFabricNode->Node, &pFabricVAS->pFabricVaToGpaMap);
800 if (status != NV_OK)
801 {
802 NV_PRINTF(LEVEL_ERROR,
803 "Failed to insert addr 0x%llx into the memory fabric tree\n",
804 pFabricNode->Node.keyStart);
805
806 portMemFree(pFabricNode);
807 return status;
808 }
809
810 return NV_OK;
811 }
812
813 NV_STATUS
fabricvaspaceAllocMulticast_IMPL(FABRIC_VASPACE * pFabricVAS,NvU64 pageSize,NvU64 alignment,VAS_ALLOC_FLAGS flags,NvU64 base,NvU64 size)814 fabricvaspaceAllocMulticast_IMPL
815 (
816 FABRIC_VASPACE *pFabricVAS,
817 NvU64 pageSize,
818 NvU64 alignment,
819 VAS_ALLOC_FLAGS flags,
820 NvU64 base,
821 NvU64 size
822 )
823 {
824 NvU64 rangeLo;
825 NvU64 rangeHi;
826 NvU64 addr = 0;
827 NV_STATUS status;
828
829 NV_ASSERT_OR_RETURN(pFabricVAS->pGVAS != NULL, NV_ERR_OBJECT_NOT_FOUND);
830 NV_ASSERT_OR_RETURN(pageSize >= RM_PAGE_SIZE_HUGE, NV_ERR_INVALID_ARGUMENT);
831 NV_ASSERT_OR_RETURN(alignment != 0, NV_ERR_INVALID_ARGUMENT);
832 NV_ASSERT_OR_RETURN(size != 0, NV_ERR_INVALID_ARGUMENT);
833 NV_ASSERT_OR_RETURN(NV_IS_ALIGNED64(alignment, pageSize), NV_ERR_INVALID_ARGUMENT);
834 NV_ASSERT_OR_RETURN(NV_IS_ALIGNED64(base, pageSize), NV_ERR_INVALID_ARGUMENT);
835 NV_ASSERT_OR_RETURN(NV_IS_ALIGNED64(size, pageSize), NV_ERR_INVALID_ARGUMENT);
836
837 rangeLo = base;
838 rangeHi = base + size - 1;
839
840 //
841 // RM_PAGE_SIZE_HUGE is passed since MCFLA->PA mappings support minimum
842 // 2MB pagesize.
843 //
844 status = vaspaceAlloc(pFabricVAS->pGVAS, size, alignment, rangeLo,
845 rangeHi, RM_PAGE_SIZE_HUGE, flags, &addr);
846
847 NV_ASSERT(addr == base);
848
849 return status;
850 }
851
852 static NV_STATUS
_fabricVaspaceValidateMapAttrs(NvU64 fabricOffset,NvU64 fabricAllocSize,NvU64 fabricPageSize,NvU64 physMapOffset,NvU64 physMapLength,NvU64 physAllocSize,NvU64 physPageSize)853 _fabricVaspaceValidateMapAttrs
854 (
855 NvU64 fabricOffset,
856 NvU64 fabricAllocSize,
857 NvU64 fabricPageSize,
858 NvU64 physMapOffset,
859 NvU64 physMapLength,
860 NvU64 physAllocSize,
861 NvU64 physPageSize
862 )
863 {
864 // Fabric mem offset should be at least phys page size aligned.
865 if (!NV_IS_ALIGNED64(fabricOffset, physPageSize) ||
866 (fabricOffset >= fabricAllocSize))
867 {
868 NV_PRINTF(LEVEL_ERROR,
869 "Invalid offset passed for the fabric handle\n");
870
871 return NV_ERR_INVALID_OFFSET;
872 }
873
874 if (!NV_IS_ALIGNED64(physMapOffset, physPageSize) ||
875 (physMapOffset >= physAllocSize))
876 {
877 NV_PRINTF(LEVEL_ERROR,
878 "Invalid offset passed for the physmem handle\n");
879
880 return NV_ERR_INVALID_OFFSET;
881 }
882
883 if ((physMapLength == 0) ||
884 (!NV_IS_ALIGNED64(physMapLength, physPageSize)) ||
885 (physMapLength > (physAllocSize - physMapOffset)) ||
886 (physMapLength > (fabricAllocSize - fabricOffset)))
887 {
888 NV_PRINTF(LEVEL_ERROR,
889 "Invalid map length passed for the physmem handle\n");
890
891 return NV_ERR_INVALID_ARGUMENT;
892 }
893
894 return NV_OK;
895 }
896
897 typedef struct FABRIC_VASPACE_MAPPING_REGION
898 {
899 NvU64 offset;
900 NvU64 length;
901 } FABRIC_VASPACE_MAPPING_REGION;
902
903 //
904 // In worst case, we can have three regions to map. Two partially filled fabric
905 // pages and one (or more) fully filled fabric page(s).
906 //
907 #define FABRIC_VASPACE_MAPPING_REGIONS_MAX 3
908
909 typedef struct FABRIC_VASPACE_MAPPING_REGIONS
910 {
911 FABRIC_VASPACE_MAPPING_REGION r[FABRIC_VASPACE_MAPPING_REGIONS_MAX];
912 } FABRIC_VASPACE_MAPPING_REGIONS;
913
914 static void
_fabricvaspaceGetMappingRegions(NvU64 fabricOffset,NvU64 fabricPageSize,NvU64 physMapLength,FABRIC_VASPACE_MAPPING_REGIONS * pRegions,NvU32 * pNumRegions)915 _fabricvaspaceGetMappingRegions
916 (
917 NvU64 fabricOffset,
918 NvU64 fabricPageSize,
919 NvU64 physMapLength,
920 FABRIC_VASPACE_MAPPING_REGIONS *pRegions,
921 NvU32 *pNumRegions
922 )
923 {
924 NvU64 fabricOffsetAligned = NV_ALIGN_UP64(fabricOffset, fabricPageSize);
925 NvU64 mapLengthAligned = NV_ALIGN_DOWN64(physMapLength, fabricPageSize);
926
927 *pNumRegions = 0;
928
929 if ((fabricOffset < fabricOffsetAligned) &&
930 (physMapLength >= (fabricOffsetAligned - fabricOffset)))
931 {
932 pRegions->r[*pNumRegions].offset = fabricOffset;
933 pRegions->r[*pNumRegions].length = fabricOffsetAligned - fabricOffset;
934
935 fabricOffset += pRegions->r[*pNumRegions].length;
936 physMapLength -= pRegions->r[*pNumRegions].length;
937 mapLengthAligned = NV_ALIGN_DOWN64(physMapLength, fabricPageSize);
938
939 (*pNumRegions)++;
940 }
941
942 if (physMapLength == 0)
943 return;
944
945 if ((fabricOffset == fabricOffsetAligned) &&
946 (mapLengthAligned >= fabricPageSize))
947 {
948 pRegions->r[*pNumRegions].offset = fabricOffset;
949 pRegions->r[*pNumRegions].length = mapLengthAligned;
950
951 fabricOffset += pRegions->r[*pNumRegions].length;
952 physMapLength -= pRegions->r[*pNumRegions].length;
953
954 (*pNumRegions)++;
955 }
956
957 if (physMapLength == 0)
958 return;
959
960 pRegions->r[*pNumRegions].offset = fabricOffset;
961 pRegions->r[*pNumRegions].length = physMapLength;
962
963 (*pNumRegions)++;
964 }
965
966 void
fabricvaspaceUnmapPhysMemdesc_IMPL(FABRIC_VASPACE * pFabricVAS,MEMORY_DESCRIPTOR * pFabricMemDesc,NvU64 fabricOffset,MEMORY_DESCRIPTOR * pPhysMemDesc,NvU64 physMapLength)967 fabricvaspaceUnmapPhysMemdesc_IMPL
968 (
969 FABRIC_VASPACE *pFabricVAS,
970 MEMORY_DESCRIPTOR *pFabricMemDesc,
971 NvU64 fabricOffset,
972 MEMORY_DESCRIPTOR *pPhysMemDesc,
973 NvU64 physMapLength
974 )
975 {
976 OBJGPU *pGpu = pPhysMemDesc->pGpu;
977 NvU32 fabricPageCount;
978 NvU64 fabricAddr;
979 NvU64 fabricPageSize;
980 NvU32 i, j;
981 NvU64 mapLength;
982 FABRIC_VASPACE_MAPPING_REGIONS regions;
983 NvU32 numRegions;
984 RmPhysAddr *pFabricPteArray;
985
986 fabricPageSize = memdescGetPageSize(pFabricMemDesc, AT_GPU);
987
988 NV_ASSERT_OR_RETURN_VOID(dynamicCast(pGpu->pFabricVAS, FABRIC_VASPACE) ==
989 pFabricVAS);
990
991 _fabricvaspaceGetMappingRegions(fabricOffset, fabricPageSize, physMapLength,
992 ®ions, &numRegions);
993 NV_ASSERT_OR_RETURN_VOID(numRegions != 0);
994
995 pFabricPteArray = memdescGetPteArray(pFabricMemDesc, AT_GPU);
996
997 for (i = 0; i < numRegions; i++)
998 {
999 fabricPageCount =
1000 ((memdescGetPteArraySize(pFabricMemDesc, AT_GPU) == 1) ||
1001 (regions.r[i].length < fabricPageSize)) ?
1002 1 : (regions.r[i].length / fabricPageSize);
1003
1004 mapLength = (fabricPageCount == 1) ? regions.r[i].length : fabricPageSize;
1005
1006 fabricOffset = regions.r[i].offset;
1007
1008 for (j = 0; j < fabricPageCount; j++)
1009 {
1010 if (fabricPageCount == 1)
1011 {
1012 fabricAddr = pFabricPteArray[0] + fabricOffset;
1013 }
1014 else
1015 {
1016 fabricAddr = pFabricPteArray[fabricOffset /
1017 pFabricMemDesc->pageArrayGranularity];
1018 }
1019
1020 vaspaceUnmap(pFabricVAS->pGVAS, pPhysMemDesc->pGpu, fabricAddr,
1021 fabricAddr + mapLength - 1);
1022
1023 fabricOffset = fabricOffset + mapLength;
1024 }
1025 }
1026
1027 fabricvaspaceInvalidateTlb(pFabricVAS, pPhysMemDesc->pGpu, PTE_DOWNGRADE);
1028 }
1029
1030 NV_STATUS
fabricvaspaceMapPhysMemdesc_IMPL(FABRIC_VASPACE * pFabricVAS,MEMORY_DESCRIPTOR * pFabricMemDesc,NvU64 fabricOffset,MEMORY_DESCRIPTOR * pPhysMemDesc,NvU64 physOffset,NvU64 physMapLength,NvU32 flags)1031 fabricvaspaceMapPhysMemdesc_IMPL
1032 (
1033 FABRIC_VASPACE *pFabricVAS,
1034 MEMORY_DESCRIPTOR *pFabricMemDesc,
1035 NvU64 fabricOffset,
1036 MEMORY_DESCRIPTOR *pPhysMemDesc,
1037 NvU64 physOffset,
1038 NvU64 physMapLength,
1039 NvU32 flags
1040 )
1041 {
1042 OBJGPU *pGpu = pPhysMemDesc->pGpu;
1043 VirtMemAllocator *pDma = GPU_GET_DMA(pGpu);
1044 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
1045 NV_STATUS status;
1046 DMA_PAGE_ARRAY pageArray;
1047 NvU32 kind;
1048 COMPR_INFO comprInfo;
1049 NvU32 mapFlags = DMA_UPDATE_VASPACE_FLAGS_UPDATE_ALL |
1050 DMA_UPDATE_VASPACE_FLAGS_SKIP_4K_PTE_CHECK;
1051 NvU32 fabricPageCount;
1052 NvU64 fabricAddr;
1053 NvU64 physPageSize;
1054 NvU64 fabricPageSize;
1055 NvU64 physAddr;
1056 NvU32 i, j;
1057 NvU64 mapLength;
1058 NvBool bReadOnly = !!(flags & FABRIC_VASPACE_MAP_FLAGS_READ_ONLY);
1059 FABRIC_VASPACE_MAPPING_REGIONS regions;
1060 NvU32 numRegions;
1061 MEMORY_DESCRIPTOR *pTempMemdesc;
1062 NvU32 aperture;
1063 NvU32 peerNumber = BUS_INVALID_PEER;
1064 RmPhysAddr *pFabricPteArray;
1065 RmPhysAddr *pPhysPteArray;
1066
1067 NV_ASSERT_OR_RETURN(pFabricMemDesc != NULL, NV_ERR_INVALID_ARGUMENT);
1068 NV_ASSERT_OR_RETURN(pPhysMemDesc != NULL, NV_ERR_INVALID_ARGUMENT);
1069
1070 mapFlags |= bReadOnly ? DMA_UPDATE_VASPACE_FLAGS_READ_ONLY : 0;
1071
1072 NV_ASSERT_OR_RETURN(dynamicCast(pGpu->pFabricVAS, FABRIC_VASPACE) == pFabricVAS,
1073 NV_ERR_INVALID_ARGUMENT);
1074
1075 physPageSize = memdescGetPageSize(pPhysMemDesc, AT_GPU);
1076 fabricPageSize = memdescGetPageSize(pFabricMemDesc, AT_GPU);
1077
1078 status = _fabricVaspaceValidateMapAttrs(fabricOffset,
1079 memdescGetSize(pFabricMemDesc),
1080 fabricPageSize,
1081 physOffset,
1082 physMapLength,
1083 memdescGetSize(pPhysMemDesc),
1084 physPageSize);
1085 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, status);
1086
1087 if (pFabricVAS->bRpcAlloc)
1088 return NV_OK;
1089
1090 status = memmgrGetKindComprFromMemDesc(pMemoryManager, pPhysMemDesc,
1091 physOffset, &kind, &comprInfo);
1092 NV_ASSERT_OK_OR_RETURN(status);
1093
1094 if (memdescGetAddressSpace(pPhysMemDesc) == ADDR_FBMEM)
1095 {
1096 aperture = NV_MMU_PTE_APERTURE_VIDEO_MEMORY;
1097 }
1098 else if (memdescIsEgm(pPhysMemDesc))
1099 {
1100 aperture = NV_MMU_PTE_APERTURE_PEER_MEMORY;
1101 //
1102 // Make sure that we receive a mapping request for EGM memory
1103 // only if local EGM is enabled.
1104 //
1105 NV_ASSERT_OR_RETURN(pMemoryManager->bLocalEgmEnabled, NV_ERR_INVALID_STATE);
1106 peerNumber = pMemoryManager->localEgmPeerId;
1107 }
1108 else if (memdescGetAddressSpace(pPhysMemDesc) == ADDR_SYSMEM)
1109 {
1110 if (memdescGetCpuCacheAttrib(pPhysMemDesc) == NV_MEMORY_CACHED)
1111 {
1112 aperture = NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY;
1113 }
1114 else
1115 {
1116 aperture = NV_MMU_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY;
1117 }
1118 }
1119 else
1120 {
1121 NV_PRINTF(LEVEL_ERROR, "Unsupported aperture\n");
1122 NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT);
1123 }
1124
1125 _fabricvaspaceGetMappingRegions(fabricOffset, fabricPageSize, physMapLength,
1126 ®ions, &numRegions);
1127 NV_ASSERT_OR_RETURN(numRegions != 0, NV_ERR_INVALID_ARGUMENT);
1128
1129 pFabricPteArray = memdescGetPteArray(pFabricMemDesc, AT_GPU);
1130 pPhysPteArray = memdescGetPteArray(pPhysMemDesc, AT_GPU);
1131
1132 for (i = 0; i < numRegions; i++)
1133 {
1134 fabricPageCount = ((memdescGetPteArraySize(pFabricMemDesc, AT_GPU) == 1) ||
1135 (regions.r[i].length < fabricPageSize)) ? \
1136 1 : (regions.r[i].length / fabricPageSize);
1137 mapLength = (fabricPageCount == 1) ? regions.r[i].length : fabricPageSize;
1138 fabricOffset = regions.r[i].offset;
1139
1140 portMemSet(&pageArray, 0, sizeof(DMA_PAGE_ARRAY));
1141 pageArray.count = (memdescGetPteArraySize(pPhysMemDesc, AT_GPU) == 1) ? \
1142 1 : (mapLength / pPhysMemDesc->pageArrayGranularity);
1143
1144 for (j = 0; j < fabricPageCount; j++)
1145 {
1146 if (fabricPageCount == 1)
1147 {
1148 fabricAddr = pFabricPteArray[0] + fabricOffset;
1149 }
1150 else
1151 {
1152 fabricAddr = pFabricPteArray[fabricOffset /
1153 pFabricMemDesc->pageArrayGranularity];
1154 }
1155
1156 if (pageArray.count == 1)
1157 {
1158 physAddr = pPhysPteArray[0] + physOffset;
1159 pageArray.pData = &physAddr;
1160 }
1161 else
1162 {
1163 pageArray.pData = &pPhysPteArray[physOffset /
1164 pPhysMemDesc->pageArrayGranularity];
1165 }
1166
1167 //
1168 // When physPageSize is greater than fabricPageSize, to avoid fabric
1169 // VAs getting aligned using physPageSize by dmaUpdateVASpace_HAL,
1170 // create a tempMemdesc and override its pageSize.
1171 //
1172 if (fabricPageSize < physPageSize)
1173 {
1174 status = memdescCreateSubMem(&pTempMemdesc, pPhysMemDesc,
1175 pPhysMemDesc->pGpu,
1176 physOffset, mapLength);
1177 if (status != NV_OK)
1178 goto fail;
1179
1180 memdescSetPageSize(pTempMemdesc, AT_GPU, fabricPageSize);
1181 }
1182 else
1183 {
1184 pTempMemdesc = pPhysMemDesc;
1185 }
1186
1187 // Map the memory fabric object at the given physical memory offset.
1188 status = dmaUpdateVASpace_HAL(pGpu, pDma, pFabricVAS->pGVAS, pTempMemdesc,
1189 NULL, fabricAddr, fabricAddr + mapLength - 1,
1190 mapFlags, &pageArray, 0, &comprInfo, 0,
1191 NV_MMU_PTE_VALID_TRUE,
1192 aperture,
1193 peerNumber, NVLINK_INVALID_FABRIC_ADDR,
1194 DMA_DEFER_TLB_INVALIDATE, NV_FALSE,
1195 memdescGetPageSize(pTempMemdesc, AT_GPU));
1196
1197 if (pTempMemdesc != pPhysMemDesc)
1198 memdescDestroy(pTempMemdesc);
1199
1200 if (status != NV_OK)
1201 goto fail;
1202
1203 physOffset = physOffset + mapLength;
1204 fabricOffset = fabricOffset + mapLength;
1205 }
1206 }
1207
1208 fabricvaspaceInvalidateTlb(pFabricVAS, pPhysMemDesc->pGpu, PTE_UPGRADE);
1209
1210 return NV_OK;
1211
1212 fail:
1213 for (j = 0; j < i; j++)
1214 fabricvaspaceUnmapPhysMemdesc(pFabricVAS, pFabricMemDesc,
1215 regions.r[j].offset, pPhysMemDesc,
1216 regions.r[j].length);
1217
1218 return status;
1219 }
1220
1221 NV_STATUS
fabricvaspaceInitUCRange_IMPL(FABRIC_VASPACE * pFabricVAS,OBJGPU * pGpu,NvU64 fabricBase,NvU64 fabricSize)1222 fabricvaspaceInitUCRange_IMPL
1223 (
1224 FABRIC_VASPACE *pFabricVAS,
1225 OBJGPU *pGpu,
1226 NvU64 fabricBase,
1227 NvU64 fabricSize
1228 )
1229 {
1230 if (fabricvaspaceGetUCFlaLimit(pFabricVAS) != 0)
1231 return NV_ERR_IN_USE;
1232
1233 if (fabricSize != 0)
1234 {
1235 NV_PRINTF(LEVEL_INFO, "Setting UC Base: %llx, size: %llx \n",
1236 fabricBase, fabricSize);
1237 pFabricVAS->ucFabricBase = fabricBase;
1238 pFabricVAS->ucFabricLimit = fabricBase + fabricSize - 1;
1239 pFabricVAS->ucFabricInUseSize = 0;
1240 pFabricVAS->ucFabricFreeSize = fabricSize;
1241
1242 if (IS_VIRTUAL(pGpu))
1243 {
1244 VGPU_STATIC_INFO *pVSI = GPU_GET_STATIC_INFO(pGpu);
1245
1246 pVSI->flaInfo.base = fabricBase;
1247 pVSI->flaInfo.size = fabricSize;
1248 }
1249 }
1250
1251 return NV_OK;
1252 }
1253
1254 void
fabricvaspaceClearUCRange_IMPL(FABRIC_VASPACE * pFabricVAS)1255 fabricvaspaceClearUCRange_IMPL
1256 (
1257 FABRIC_VASPACE *pFabricVAS
1258 )
1259 {
1260 pFabricVAS->ucFabricBase = 0;
1261 pFabricVAS->ucFabricLimit = 0;
1262 pFabricVAS->ucFabricInUseSize = 0;
1263 pFabricVAS->ucFabricFreeSize = 0;
1264 }
1265
1266 NV_STATUS
fabricvaspaceGetPageLevelInfo_IMPL(FABRIC_VASPACE * pFabricVAS,OBJGPU * pGpu,NV90F1_CTRL_VASPACE_GET_PAGE_LEVEL_INFO_PARAMS * pParams)1267 fabricvaspaceGetPageLevelInfo_IMPL
1268 (
1269 FABRIC_VASPACE *pFabricVAS,
1270 OBJGPU *pGpu,
1271 NV90F1_CTRL_VASPACE_GET_PAGE_LEVEL_INFO_PARAMS *pParams
1272 )
1273 {
1274 OBJGVASPACE *pGVAS = dynamicCast(pFabricVAS->pGVAS, OBJGVASPACE);
1275 NV_ASSERT_OR_RETURN(pGVAS != NULL, NV_ERR_OBJECT_NOT_FOUND);
1276
1277 return gvaspaceGetPageLevelInfo(pGVAS, pGpu, pParams);
1278 }
1279
1280 NvBool
fabricvaspaceIsInUse_IMPL(FABRIC_VASPACE * pFabricVAS)1281 fabricvaspaceIsInUse_IMPL
1282 (
1283 FABRIC_VASPACE *pFabricVAS
1284 )
1285 {
1286 return gvaspaceIsInUse(dynamicCast(pFabricVAS->pGVAS, OBJGVASPACE));
1287 }
1288