1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /******************************************************************************
25  *
26  *   Description:
27  *       This file contains the functions managing memory export class
28  *
29  *****************************************************************************/
30 
31 #define NVOC_MEM_EXPORT_H_PRIVATE_ACCESS_ALLOWED
32 
33 /*
34  * Lock ordering
35  *
36  * RMAPI Lock (can be removed once RM-core allows it, for now RO-only)
37  * |_External Client Lock (e.g. CUDA client)
38  *   |_Export Module Lock
39  *     |_Export Descriptor Lock
40  *       |_GPU(s) Lock as needed
41  */
42 
43 #include "os/os.h"
44 #include "core/locks.h"
45 #include "os/capability.h"
46 #include "core/system.h"
47 #include "core/thread_state.h"
48 #include "mem_mgr/mem_export.h"
49 #include "resserv/rs_resource.h"
50 #include "rmapi/client.h"
51 #include "compute/fabric.h"
52 #include "kernel/gpu/mem_mgr/mem_mgr.h"
53 #include "kernel/gpu/mig_mgr/kernel_mig_manager.h"
54 #include "gpu/device/device.h"
55 #include "gpu/subdevice/subdevice.h"
56 #include "containers/list.h"
57 
58 typedef struct mem_export_uuid
59 {
60     NvU64 expId;
61     NvU16 nodeId;
62     NvU8  unused[6];
63 } MEM_EXPORT_UUID;
64 
65 typedef struct attached_parent_info
66 {
67     void     *pGpuOsInfo;
68     OBJGPU   *pGpu;
69     NvU64     refCount;
70     NvHandle  hClient;
71     NvHandle  hParent; // hDevice or hSubdevice
72 
73     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance;
74 } ATTACHED_PARENT_INFO;
75 
76 MAKE_LIST(ATTACHED_PARENT_INFO_LIST, ATTACHED_PARENT_INFO);
77 
78 typedef struct attached_mem_info
79 {
80     ATTACHED_PARENT_INFO *pParentInfo;
81 
82     NvHandle hDupedMem;
83     NvU8     addressSpace;
84 } ATTACHED_MEM_INFO;
85 
86 typedef struct mem_export_info
87 {
88     NV00E0_ALLOCATION_PARAMETERS cachedParams;
89 
90     NvU64           refCount;
91     MEM_EXPORT_UUID uuid;
92 
93     NvS32 imexChannel;
94 
95     struct
96     {
97         NvU32 gpu;
98         NvU32 migGi[KMIGMGR_MAX_GPU_SWIZZID];
99     } attachedUsageCount[NV_MAX_DEVICES];
100 
101     ATTACHED_PARENT_INFO_LIST parentInfoList;
102 
103     // Temp stash before committing the change.
104     ATTACHED_MEM_INFO stashMemInfos[NV00E0_MAX_EXPORT_HANDLES];
105 
106     //
107     // The lock protects MEM_EXPORT_INFO, the mem export descriptor.
108     //
109     // The lock should be taken only if a mem export descriptor is safe
110     // to access i.e., holding the module lock or the accessing thread
111     // has the mem export descriptor refcounted.
112     //
113     PORT_RWLOCK *pLock;
114 
115     // Should be last
116     ATTACHED_MEM_INFO memInfos[0];
117 } MEM_EXPORT_INFO;
118 
119 NvU16
120 memoryExportGetNodeId
121 (
122     NV_EXPORT_MEM_PACKET *pExportPacket
123 )
124 {
125     ct_assert(sizeof(MEM_EXPORT_UUID) == sizeof(pExportPacket->uuid));
126 
127     MEM_EXPORT_UUID *pExportUuid = (MEM_EXPORT_UUID*)pExportPacket->uuid;
128 
129     return pExportUuid->nodeId;
130 }
131 
132 static NV_STATUS
133 _memoryexportDup
134 (
135     MemoryExport                 *pMemoryExport,
136     NV00E0_ALLOCATION_PARAMETERS *pAllocParams
137 )
138 {
139     OBJSYS *pSys = SYS_GET_INSTANCE();
140     SysMemExportCacheEntry *pEntry;
141     MEM_EXPORT_INFO *pExportInfo;
142     MEM_EXPORT_UUID uuid;
143     RmClient *pRmClient = dynamicCast(RES_GET_CLIENT(pMemoryExport), RmClient);
144     NV_STATUS status = NV_OK;
145     NvBool bImexDaemon;
146     NvS32 imexChannel = pAllocParams->imexChannel;
147 
148     ct_assert(sizeof(NV_EXPORT_MEM_PACKET) == NV_MEM_EXPORT_PACKET_LEN);
149     ct_assert(sizeof(MEM_EXPORT_UUID) == NV_MEM_EXPORT_UUID_LEN);
150 
151     portMemCopy(&uuid, NV_MEM_EXPORT_UUID_LEN,
152                 pAllocParams->packet.uuid, NV_MEM_EXPORT_UUID_LEN);
153 
154     //
155     // Keep the module lock throughout so that the destructor can't delete
156     // pExportInfo underneath us until we refcount it.
157     //
158     portSyncRwLockAcquireRead(pSys->pSysMemExportModuleLock);
159 
160     pEntry = multimapFindItem(&pSys->sysMemExportCache, uuid.nodeId,
161                               uuid.expId);
162     if (pEntry == NULL)
163     {
164         portSyncRwLockReleaseRead(pSys->pSysMemExportModuleLock);
165         return NV_ERR_OBJECT_NOT_FOUND;
166     }
167 
168     pExportInfo = pEntry->pData;
169 
170     portSyncRwLockAcquireWrite(pExportInfo->pLock);
171 
172     //
173     // If the client is IMEX daemon, we need to trust the channel
174     // provided by it.
175     //
176     bImexDaemon = rmclientIsCapable(pRmClient, NV_RM_CAP_SYS_FABRIC_IMEX_MGMT);
177 
178     if (bImexDaemon && (pExportInfo->imexChannel != imexChannel))
179     {
180         status = NV_ERR_INSUFFICIENT_PERMISSIONS;
181     }
182     else if (!bImexDaemon && (pExportInfo->imexChannel != pRmClient->imexChannel))
183     {
184         status = NV_ERR_INSUFFICIENT_PERMISSIONS;
185     }
186     else
187     {
188         *pAllocParams = pExportInfo->cachedParams;
189 
190         pExportInfo->refCount++;
191 
192         pMemoryExport->pExportInfo = pExportInfo;
193     }
194 
195     portSyncRwLockReleaseWrite(pExportInfo->pLock);
196 
197     portSyncRwLockReleaseRead(pSys->pSysMemExportModuleLock);
198 
199     return status;
200 }
201 
202 static void
203 _memoryexportGenerateUuid
204 (
205     MEM_EXPORT_UUID *pUuid
206 )
207 {
208     OBJSYS *pSys = SYS_GET_INSTANCE();
209     Fabric *pFabric = SYS_GET_FABRIC(pSys);
210 
211     ct_assert(sizeof(MEM_EXPORT_UUID) == NV_MEM_EXPORT_UUID_LEN);
212 
213     pUuid->nodeId = fabricGetNodeId(pFabric);
214     pUuid->expId = portAtomicExIncrementU64(&pSys->sysExportObjectCounter);
215 }
216 
217 static NV_STATUS
218 _memoryexportConstruct
219 (
220     MemoryExport                 *pMemoryExport,
221     NV00E0_ALLOCATION_PARAMETERS *pAllocParams
222 )
223 {
224     OBJSYS *pSys = SYS_GET_INSTANCE();
225     MEM_EXPORT_INFO *pExportInfo;
226     NvU64 size;
227     NV_STATUS status;
228     SysMemExportCacheEntry *pEntry;
229     SYS_MEM_EXPORT_CACHESubmap *pSubmap = NULL;
230     RmClient *pRmClient = dynamicCast(RES_GET_CLIENT(pMemoryExport), RmClient);
231 
232     if (pRmClient->imexChannel == -1)
233         return NV_ERR_INSUFFICIENT_PERMISSIONS;
234 
235     if (pAllocParams->numMaxHandles == 0)
236         return NV_ERR_INVALID_ARGUMENT;
237 
238     size = sizeof(*pExportInfo) + (pAllocParams->numMaxHandles *
239            sizeof(pExportInfo->memInfos[0]));
240 
241     pExportInfo = portMemAllocNonPaged(size);
242     if (pExportInfo == NULL)
243         return NV_ERR_NO_MEMORY;
244 
245     portMemSet(pExportInfo, 0, size);
246 
247     pExportInfo->imexChannel = pRmClient->imexChannel;
248 
249     _memoryexportGenerateUuid(&pExportInfo->uuid);
250 
251     pExportInfo->pLock = portSyncRwLockCreate(
252                             portMemAllocatorGetGlobalNonPaged());
253     if (pExportInfo->pLock == NULL)
254     {
255         status = NV_ERR_NO_MEMORY;
256         goto free_mem;
257     }
258 
259     portMemCopy(pAllocParams->packet.uuid, NV_MEM_EXPORT_UUID_LEN,
260                 &pExportInfo->uuid, NV_MEM_EXPORT_UUID_LEN);
261 
262     pExportInfo->cachedParams.packet = pAllocParams->packet;
263     pExportInfo->cachedParams.numMaxHandles = pAllocParams->numMaxHandles;
264     pExportInfo->cachedParams.flags = pAllocParams->flags;
265 
266     portMemCopy(pExportInfo->cachedParams.metadata,
267                 NV_MEM_EXPORT_METADATA_LEN,
268                 pAllocParams->metadata,
269                 NV_MEM_EXPORT_METADATA_LEN);
270 
271     listInit(&pExportInfo->parentInfoList, portMemAllocatorGetGlobalNonPaged());
272 
273     pExportInfo->refCount = 1;
274 
275     pMemoryExport->pExportInfo = pExportInfo;
276 
277     // Cache entry now...
278     portSyncRwLockAcquireWrite(pSys->pSysMemExportModuleLock);
279 
280     if (multimapFindSubmap(&pSys->sysMemExportCache,
281                            pExportInfo->uuid.nodeId) == NULL)
282     {
283         if ((pSubmap = multimapInsertSubmap(&pSys->sysMemExportCache,
284                            pExportInfo->uuid.nodeId)) == NULL)
285         {
286             status = NV_ERR_NO_MEMORY;
287             goto fail;
288         }
289     }
290 
291     pEntry = multimapInsertItemNew(&pSys->sysMemExportCache,
292                                    pExportInfo->uuid.nodeId,
293                                    pExportInfo->uuid.expId);
294     if (pEntry == NULL)
295     {
296         status = NV_ERR_INSERT_DUPLICATE_NAME;
297         goto fail;
298     }
299 
300     pEntry->pData = pExportInfo;
301 
302     portSyncRwLockReleaseWrite(pSys->pSysMemExportModuleLock);
303 
304     return NV_OK;
305 
306 fail:
307     listDestroy(&pExportInfo->parentInfoList);
308 
309     if (pSubmap != NULL)
310         multimapRemoveSubmap(&pSys->sysMemExportCache, pSubmap);
311 
312     portSyncRwLockReleaseWrite(pSys->pSysMemExportModuleLock);
313 
314     portSyncRwLockDestroy(pExportInfo->pLock);
315     pExportInfo->pLock = NULL;
316 
317 free_mem:
318     portMemFree(pExportInfo);
319 
320     return status;
321 }
322 
323 NV_STATUS
324 memoryexportConstruct_IMPL
325 (
326     MemoryExport                 *pMemoryExport,
327     CALL_CONTEXT                 *pCallContext,
328     RS_RES_ALLOC_PARAMS_INTERNAL *pParams
329 )
330 {
331     NV_STATUS status;
332     NV00E0_ALLOCATION_PARAMETERS *pAllocParams = pParams->pAllocParams;
333 
334     if (pAllocParams->flags & NV_MEM_EXPORT_FLAGS_DUP_BY_UUID)
335         status = _memoryexportDup(pMemoryExport, pAllocParams);
336     else
337         status = _memoryexportConstruct(pMemoryExport, pAllocParams);
338 
339     return status;
340 }
341 
342 static void
343 _memoryexportDetachParent
344 (
345     MEM_EXPORT_INFO      *pExportInfo,
346     ATTACHED_PARENT_INFO *pParentInfo
347 )
348 {
349     THREAD_STATE_NODE *pThreadNode;
350     THREAD_STATE_FREE_CALLBACK freeCallback;
351     NvU32 deviceInstance;
352 
353     NV_ASSERT_OR_RETURN_VOID(pParentInfo->refCount == 0);
354 
355     if (pParentInfo->pGpuOsInfo != NULL)
356     {
357         deviceInstance = gpuGetDeviceInstance(pParentInfo->pGpu);
358 
359         if (pParentInfo->pKernelMIGGpuInstance != NULL)
360         {
361             KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance = pParentInfo->pKernelMIGGpuInstance;
362             NvU32 swizzId = pKernelMIGGpuInstance->swizzId;
363 
364             pExportInfo->attachedUsageCount[deviceInstance].migGi[swizzId]--;
365 
366             // Update giIdMasks if the MIG instance is no more used..
367             if (pExportInfo->attachedUsageCount[deviceInstance].migGi[swizzId] == 0)
368                 pExportInfo->cachedParams.giIdMasks[deviceInstance] &= !NVBIT(swizzId);
369 
370             // Drop refcount on GPU instance..
371             NV_ASSERT_OK(kmigmgrDecRefCount(pKernelMIGGpuInstance->pShare));
372         }
373 
374         // Drop refcount on GPU
375         NV_ASSERT_OR_RETURN_VOID(threadStateGetCurrent(&pThreadNode,
376                                                        NULL) == NV_OK);
377 
378         freeCallback.pCb = osReleaseGpuOsInfo;
379         freeCallback.pCbData = (void *)pParentInfo->pGpuOsInfo;
380 
381         NV_ASSERT_OK(threadStateEnqueueCallbackOnFree(pThreadNode,
382                                                       &freeCallback));
383 
384         pExportInfo->attachedUsageCount[deviceInstance].gpu--;
385 
386         // Update deviceInstanceMask if the GPU is no more used..
387         if (pExportInfo->attachedUsageCount[deviceInstance].gpu == 0)
388             pExportInfo->cachedParams.deviceInstanceMask &= !NVBIT(deviceInstance);
389     }
390 
391     listRemove(&pExportInfo->parentInfoList, pParentInfo);
392 }
393 
394 static void
395 _memoryexportUndupMem
396 (
397     ATTACHED_PARENT_INFO *pParentInfo,
398     ATTACHED_MEM_INFO    *pMemInfo
399 )
400 {
401     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
402 
403     NV_ASSERT_OK(pRmApi->Free(pRmApi, pParentInfo->hClient,
404                               pMemInfo->hDupedMem));
405 
406     pMemInfo->hDupedMem = 0;
407 }
408 
409 static void
410 _memoryexportDetachMemAndParent
411 (
412     MEM_EXPORT_INFO    *pExportInfo,
413     ATTACHED_MEM_INFO  *pMemInfo
414 )
415 {
416     ATTACHED_PARENT_INFO *pParentInfo = pMemInfo->pParentInfo;
417 
418     _memoryexportUndupMem(pParentInfo, pMemInfo);
419 
420     pParentInfo->refCount--;
421     pMemInfo->pParentInfo = NULL;
422 
423     pExportInfo->cachedParams.numCurHandles--;
424 
425     // If parent info is unused, drop it.
426     if (pParentInfo->refCount == 0)
427         _memoryexportDetachParent(pExportInfo, pParentInfo);
428 }
429 
430 void
431 memoryexportDestruct_IMPL
432 (
433     MemoryExport *pMemoryExport
434 )
435 {
436     OBJSYS *pSys = SYS_GET_INSTANCE();
437     MEM_EXPORT_INFO *pExportInfo = pMemoryExport->pExportInfo;
438     SYS_MEM_EXPORT_CACHESubmap *pSubmap;
439     NvU16 i, j;
440     NvU32 gpuMask = GPUS_LOCK_ALL;
441 
442     if (pExportInfo == NULL)
443         return;
444 
445     pMemoryExport->pExportInfo = NULL;
446 
447     //
448     // Take pSysMemExportModuleLock to synchronize with _memoryexportDup().
449     // We don't want to delete pExportInfo under it.
450     //
451     portSyncRwLockAcquireWrite(pSys->pSysMemExportModuleLock);
452     portSyncRwLockAcquireWrite(pExportInfo->pLock);
453 
454     pExportInfo->refCount--;
455 
456     if (pExportInfo->refCount > 0)
457     {
458         portSyncRwLockReleaseWrite(pExportInfo->pLock);
459         portSyncRwLockReleaseWrite(pSys->pSysMemExportModuleLock);
460 
461         return;
462     }
463 
464     //
465     // Continue to hold pSysMemExportModuleLock to update the
466     // sysMemExportCache and may undup/detach memory from
467     // hSysMemExportClient
468     //
469 
470     // Empty caches so new calls to _memoryexportDup() fail.
471     multimapRemoveItemByKey(&pSys->sysMemExportCache,
472                             pExportInfo->uuid.nodeId,
473                             pExportInfo->uuid.expId);
474 
475     pSubmap = multimapFindSubmap(&pSys->sysMemExportCache,
476                                  pExportInfo->uuid.nodeId);
477     if ((pSubmap != NULL) &&
478         (multimapCountSubmapItems(&pSys->sysMemExportCache, pSubmap) == 0))
479         multimapRemoveSubmap(&pSys->sysMemExportCache, pSubmap);
480 
481     // Now I am the only one holding the pExportInfo, drop the lock.
482     portSyncRwLockReleaseWrite(pExportInfo->pLock);
483 
484     NV_ASSERT_OK(rmGpuGroupLockAcquire(0, GPU_LOCK_GRP_MASK,
485                                        GPUS_LOCK_FLAGS_NONE,
486                                        RM_LOCK_MODULES_MEM,
487                                        &gpuMask));
488 
489     for (i = 0; i < pExportInfo->cachedParams.numMaxHandles; i++)
490     {
491         if (pExportInfo->memInfos[i].hDupedMem != 0)
492         {
493             _memoryexportDetachMemAndParent(pExportInfo,
494                                             &pExportInfo->memInfos[i]);
495         }
496     }
497 
498     // Drop rest of the locks as memory undup/detach is done..
499     rmGpuGroupLockRelease(gpuMask, GPUS_LOCK_FLAGS_NONE);
500 
501     portSyncRwLockReleaseWrite(pSys->pSysMemExportModuleLock);
502 
503     for (j = 0; j < NV_MAX_DEVICES; j++)
504     {
505         NV_ASSERT(pExportInfo->attachedUsageCount[j].gpu == 0);
506 
507         for (i = 0; i < KMIGMGR_MAX_GPU_SWIZZID; i++)
508             NV_ASSERT(pExportInfo->attachedUsageCount[j].migGi[i] == 0);
509         NV_ASSERT(pExportInfo->cachedParams.giIdMasks[j] == 0);
510     }
511 
512     NV_ASSERT(pExportInfo->cachedParams.numCurHandles == 0);
513     NV_ASSERT(pExportInfo->cachedParams.deviceInstanceMask == 0);
514 
515     NV_ASSERT(listCount(&pExportInfo->parentInfoList) == 0);
516     listDestroy(&pExportInfo->parentInfoList);
517 
518     portSyncRwLockDestroy(pExportInfo->pLock);
519     pExportInfo->pLock = NULL;
520 
521     portMemFree(pExportInfo);
522     pMemoryExport->pExportInfo = NULL;
523 }
524 
525 NvBool
526 memoryexportCanCopy_IMPL
527 (
528     MemoryExport *pMemoryExport
529 )
530 {
531     return NV_FALSE;
532 }
533 
534 static NV_STATUS
535 _memoryexportVerifyMem
536 (
537     OBJGPU                  *pGpu,
538     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance,
539     RsClient                *pClient,
540     NvHandle                 hSrcHandle,
541     NvU8                    *pAddrSpace
542 )
543 {
544     NV_STATUS status;
545     RsResourceRef *pSrcMemoryRef;
546     Memory *pSrcMemory;
547     MEMORY_DESCRIPTOR *pMemDesc;
548     CALL_CONTEXT callContext;
549     NvU32 addrSpace;
550     NvU32 mapFlags;
551 
552     //
553     // Don't not use memGetByHandle() or access pSrcMemory->pMemDesc here.
554     // There are certain memory types like MCFLA which might not be ready
555     // at the time of attachment, and hence could cause memGetByHandle()
556     // fail with NV_ERR_NOT_READY, which we don't want in this case.
557     //
558     status = clientGetResourceRef(pClient, hSrcHandle, &pSrcMemoryRef);
559     if (status != NV_OK)
560         return status;
561 
562     pSrcMemory = dynamicCast(pSrcMemoryRef->pResource, Memory);
563     if (pSrcMemory == NULL)
564         return NV_ERR_INVALID_OBJECT_HANDLE;
565 
566     if (!memIsExportAllowed(pSrcMemory))
567         return NV_ERR_NOT_SUPPORTED;
568 
569     portMemSet(&callContext, 0, sizeof(callContext));
570     callContext.pClient = pClient;
571     callContext.pResourceRef = pSrcMemoryRef;
572 
573     //
574     // rmApiGetEffectiveAddrSpace expect mapping flags to be set as DIRECT for
575     // GPU cacheable Sysmem for Pre-Ampere chips. We are not doing any mapping
576     // here, so passing this as workaround to get the expected address space.
577     //
578     mapFlags = FLD_SET_DRF(OS33, _FLAGS, _MAPPING, _DIRECT, 0);
579 
580     status = memGetMapAddrSpace(pSrcMemory, &callContext, mapFlags, &addrSpace);
581     if (status != NV_OK)
582     {
583         NV_PRINTF(LEVEL_ERROR, "Failed to query address space: 0x%x\n", status);
584         return status;
585     }
586 
587     switch(addrSpace)
588     {
589         case ADDR_SYSMEM:
590             *pAddrSpace = NV00E0_ADDR_SPACE_TYPE_SYSMEM;
591             break;
592         case ADDR_FBMEM:
593             *pAddrSpace = NV00E0_ADDR_SPACE_TYPE_VIDMEM;
594             break;
595         case ADDR_FABRIC_V2:
596             *pAddrSpace = NV00E0_ADDR_SPACE_TYPE_FABRIC;
597             break;
598         case ADDR_FABRIC_MC:
599             *pAddrSpace = NV00E0_ADDR_SPACE_TYPE_FABRIC_MC;
600             break;
601         default:
602             return NV_ERR_NOT_SUPPORTED;
603     }
604 
605     // No need to inspect parent GPU if a deviceless object
606     if (pGpu == NULL)
607         return NV_OK;
608 
609     if (pKernelMIGGpuInstance != NULL)
610     {
611         if ((pKernelMIGGpuInstance->pMemoryPartitionHeap != pSrcMemory->pHeap))
612             return NV_ERR_INVALID_OBJECT_PARENT;
613     }
614 
615     // Check if hMemory belongs to the same pGpu
616     pMemDesc = pSrcMemory->pMemDesc;
617     if ((pSrcMemory->pGpu != pGpu) && (pMemDesc->pGpu != pGpu))
618         return NV_ERR_INVALID_OBJECT_PARENT;
619 
620     return NV_OK;
621 }
622 
623 static NV_STATUS
624 _memoryexportValidateAndDupMem
625 (
626     RsClient             *pSrcClient,
627     NvHandle              hSrcMem,
628     ATTACHED_PARENT_INFO *pDestParentInfo,
629     ATTACHED_MEM_INFO    *pMemInfo
630 )
631 {
632     NV_STATUS status;
633     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
634 
635     status = _memoryexportVerifyMem(pDestParentInfo->pGpu,
636                                     pDestParentInfo->pKernelMIGGpuInstance,
637                                     pSrcClient, hSrcMem,
638                                     &pMemInfo->addressSpace);
639     if (status != NV_OK)
640         return status;
641 
642     status = pRmApi->DupObject(pRmApi, pDestParentInfo->hClient,
643                                pDestParentInfo->hParent,
644                                &pMemInfo->hDupedMem,
645                                pSrcClient->hClient, hSrcMem, 0);
646     if (status != NV_OK)
647         return status;
648 
649     return NV_OK;
650 }
651 
652 //
653 // This function is called without holding GPU lock, so don't access GPU until
654 // validated.
655 //
656 static NV_STATUS
657 _memoryexportValidateParent
658 (
659     NV00E0_CTRL_EXPORT_MEM_PARAMS  *pExportParams,
660     void                          **ppGpuOsInfo,
661     OBJGPU                        **ppGpu,
662     Device                        **ppDevice,
663     Subdevice                     **ppSubdevice
664 )
665 {
666     NV_STATUS status;
667     CALL_CONTEXT *pCallContext = resservGetTlsCallContext();
668     RS_RES_CONTROL_PARAMS_INTERNAL *pParams = pCallContext->pControlParams;
669     RsClient *pClient = pCallContext->pClient;
670 
671     *ppGpuOsInfo = NULL;
672     *ppGpu = NULL;
673     *ppDevice = NULL;
674     *ppSubdevice = NULL;
675 
676     // Non-device parent, nothing to do..
677     if (pClient->hClient == pExportParams->hParent)
678         return NV_OK;
679 
680     //
681     // Validate the parent subdevice/device is same as that of ref-counted by
682     // the OS layer.
683     //
684     status = deviceGetByHandle(pClient, pExportParams->hParent, ppDevice);
685     if (status != NV_OK)
686     {
687         status = subdeviceGetByHandle(pClient, pExportParams->hParent,
688                                       ppSubdevice);
689         if (status != NV_OK)
690             return status;
691 
692         *ppGpu = GPU_RES_GET_GPU(*ppSubdevice);
693     }
694     else
695     {
696         *ppGpu = GPU_RES_GET_GPU(*ppDevice);
697     }
698 
699     if(!osMatchGpuOsInfo(*ppGpu, pParams->secInfo.gpuOsInfo))
700         return NV_ERR_INVALID_DEVICE;
701 
702     *ppGpuOsInfo = pParams->secInfo.gpuOsInfo;
703 
704     return NV_OK;
705 }
706 
707 static NV_STATUS
708 _memoryexportGetParentHandles
709 (
710     OBJGPU                   *pGpu,
711     Device                   *pDevice,
712     Subdevice                *pSubdevice,
713     KERNEL_MIG_GPU_INSTANCE **ppKernelMIGGpuInstance,
714     NvHandle                 *phParentClient,
715     NvHandle                 *phParentObject
716 )
717 {
718     NV_STATUS status;
719     OBJSYS *pSys = SYS_GET_INSTANCE();
720 
721     *phParentClient = pSys->hSysMemExportClient;
722     *phParentObject = pSys->hSysMemExportClient;
723     *ppKernelMIGGpuInstance = NULL;
724 
725     // Non-device parent, nothing to do..
726     if (pGpu == NULL)
727         return NV_OK;
728 
729     if (IS_MIG_ENABLED(pGpu))
730     {
731         MIG_INSTANCE_REF ref;
732         Device *pTempDevice = (pSubdevice != NULL) ? GPU_RES_GET_DEVICE(pSubdevice) : pDevice;
733 
734         status = kmigmgrGetInstanceRefFromDevice(pGpu, GPU_GET_KERNEL_MIG_MANAGER(pGpu),
735                                                  pTempDevice, &ref);
736         if (status != NV_OK)
737             return status;
738 
739         if (!kmigmgrIsMIGReferenceValid(&ref))
740             return NV_ERR_INVALID_STATE;
741 
742         status = kmigmgrIncRefCount(ref.pKernelMIGGpuInstance->pShare);
743         if (status != NV_OK)
744             return status;
745 
746         *ppKernelMIGGpuInstance = ref.pKernelMIGGpuInstance;
747 
748         if (pSubdevice != NULL)
749         {
750             *phParentClient = ref.pKernelMIGGpuInstance->instanceHandles.hClient;
751             *phParentObject = ref.pKernelMIGGpuInstance->instanceHandles.hSubdevice;
752         }
753         else
754         {
755             *phParentClient = ref.pKernelMIGGpuInstance->instanceHandles.hClient;
756             *phParentObject = ref.pKernelMIGGpuInstance->instanceHandles.hDevice;
757         }
758     }
759     else
760     {
761         if (pSubdevice != NULL)
762         {
763             *phParentClient = GPU_GET_MEMORY_MANAGER(pGpu)->hClient;
764             *phParentObject = GPU_GET_MEMORY_MANAGER(pGpu)->hSubdevice;
765         }
766         else
767         {
768             *phParentClient = GPU_GET_MEMORY_MANAGER(pGpu)->hClient;
769             *phParentObject = GPU_GET_MEMORY_MANAGER(pGpu)->hDevice;
770         }
771     }
772 
773     return NV_OK;
774 }
775 
776 NV_STATUS
777 memoryexportCtrlExportMem_IMPL
778 (
779     MemoryExport                  *pMemoryExport,
780     NV00E0_CTRL_EXPORT_MEM_PARAMS *pParams
781 )
782 {
783     NV_STATUS status;
784     NvU16 result;
785     NvU16 i = 0, j;
786     MEM_EXPORT_INFO *pExportInfo = pMemoryExport->pExportInfo;
787     ATTACHED_PARENT_INFO *pParentInfo;
788     ATTACHED_MEM_INFO *pMemInfo;
789     OBJGPU *pGpu;
790     void *pGpuOsInfo;
791     Device *pDevice;
792     Subdevice *pSubdevice;
793     OBJSYS *pSys = SYS_GET_INSTANCE();
794     NvU32 gpuMask = GPUS_LOCK_ALL;
795     NvBool bModuleLockAcquired = NV_FALSE;
796     NvBool bAllGpuLockAcquired = NV_FALSE;
797 
798     if ((pParams->numHandles == 0) ||
799         (pParams->numHandles > NV00E0_MAX_EXPORT_HANDLES))
800     {
801         return NV_ERR_INVALID_ARGUMENT;
802     }
803 
804     //
805     // pExportInfo->cachedParams.numMaxHandles is an immutable attribute,
806     // can be queried outside of the locks.
807     //
808     if ((!portSafeAddU16(pParams->index, pParams->numHandles, &result)) ||
809         (result > pExportInfo->cachedParams.numMaxHandles))
810     {
811         return NV_ERR_OUT_OF_RANGE;
812     }
813 
814     status = _memoryexportValidateParent(pParams, &pGpuOsInfo, &pGpu, &pDevice, &pSubdevice);
815     if (status != NV_OK)
816     {
817         return status;
818     }
819 
820     //
821     // During device-less memory dup, take module lock to protect
822     // pSys->hSysMemExportClient. RM-core expects client locking to
823     // be managed by the caller in this case, rather than relying
824     // on dual client locking.
825     //
826     if (pGpu == NULL)
827     {
828         portSyncRwLockAcquireWrite(pSys->pSysMemExportModuleLock);
829         bModuleLockAcquired = NV_TRUE;
830     }
831 
832     portSyncRwLockAcquireWrite(pExportInfo->pLock);
833 
834     pParentInfo = listAppendNew(&pExportInfo->parentInfoList);
835     if (pParentInfo == NULL)
836     {
837         status = NV_ERR_NO_MEMORY;
838         goto done;
839     }
840 
841     pParentInfo->pGpuOsInfo = pGpuOsInfo;
842     pParentInfo->pGpu = pGpu;
843     pParentInfo->refCount = 0;
844     pParentInfo->hClient = 0;
845     pParentInfo->hParent = 0;
846     pParentInfo->pKernelMIGGpuInstance = NULL;
847 
848     status = rmGpuGroupLockAcquire(0, GPU_LOCK_GRP_MASK,
849                                    GPUS_LOCK_FLAGS_NONE,
850                                    RM_LOCK_MODULES_MEM,
851                                    &gpuMask);
852     if (status != NV_OK)
853     {
854         NV_ASSERT(0);
855         goto fail;
856     }
857 
858     bAllGpuLockAcquired = NV_TRUE;
859 
860     status = _memoryexportGetParentHandles(pGpu, pDevice, pSubdevice,
861                                            &pParentInfo->pKernelMIGGpuInstance,
862                                            &pParentInfo->hClient,
863                                            &pParentInfo->hParent);
864     if (status != NV_OK)
865         goto fail;
866 
867     // Duped memory and store it in the stash
868     for (i = 0; i < pParams->numHandles; i++)
869     {
870         if (pParams->handles[i] == 0)
871             continue;
872 
873         pMemInfo = &pExportInfo->stashMemInfos[i];
874 
875         status = _memoryexportValidateAndDupMem(RES_GET_CLIENT(pMemoryExport),
876                                                 pParams->handles[i],
877                                                 pParentInfo, pMemInfo);
878         if (status != NV_OK)
879         {
880             NV_PRINTF(LEVEL_ERROR, "Failed to duping 0x%x\n", status);
881             goto fail;
882         }
883     }
884 
885     // Start committing now.
886 
887     // !!! Don't expect any failures from this point onward !!!
888 
889     for (i = 0; i < pParams->numHandles; i++)
890     {
891         pMemInfo = &pExportInfo->memInfos[i + pParams->index];
892 
893         // If the handle already exists in this position, detach it
894         if (pMemInfo->hDupedMem != 0)
895         {
896             _memoryexportDetachMemAndParent(pExportInfo, pMemInfo);
897 
898             NV_ASSERT(pMemInfo->hDupedMem == 0);
899         }
900 
901         // Nothing to attach, continue..
902         if (pExportInfo->stashMemInfos[i].hDupedMem == 0)
903             continue;
904 
905         // Attach successful
906         *pMemInfo = pExportInfo->stashMemInfos[i];
907 
908         // Ref-count parent and cache for future use.
909         pParentInfo->refCount++;
910         pMemInfo->pParentInfo = pParentInfo;
911 
912         pExportInfo->cachedParams.numCurHandles++;
913 
914         // Clear stash
915         pExportInfo->stashMemInfos[i].hDupedMem = 0;
916     }
917 
918     // Attach GPU if applicable...
919     if (pGpu != NULL)
920     {
921         NvU32 deviceInstance = gpuGetDeviceInstance(pGpu);
922 
923         pExportInfo->attachedUsageCount[deviceInstance].gpu++;
924         pExportInfo->cachedParams.deviceInstanceMask |= NVBIT(deviceInstance);
925 
926         if (pParentInfo->pKernelMIGGpuInstance != NULL)
927         {
928             NvU32 swizzId = pParentInfo->pKernelMIGGpuInstance->swizzId;
929 
930             pExportInfo->attachedUsageCount[deviceInstance].migGi[swizzId]++;
931             pExportInfo->cachedParams.giIdMasks[deviceInstance] |= NVBIT(swizzId);
932         }
933     }
934 
935     //
936     // If this was mem detach only call, detach the new parent info which is
937     // unused.
938     //
939     if (pParentInfo->refCount == 0)
940         _memoryexportDetachParent(pExportInfo, pParentInfo);
941 
942     goto done;
943 
944 fail:
945     for (j = 0; j < i; j++)
946     {
947         pMemInfo = &pExportInfo->stashMemInfos[j];
948         if (pMemInfo->hDupedMem != 0)
949             _memoryexportUndupMem(pParentInfo, pMemInfo);
950     }
951 
952     if (pParentInfo->pKernelMIGGpuInstance != NULL)
953         NV_ASSERT_OK(kmigmgrDecRefCount(pParentInfo->pKernelMIGGpuInstance->pShare));
954 
955     listRemove(&pExportInfo->parentInfoList, pParentInfo);
956 
957 done:
958     if (bAllGpuLockAcquired)
959         rmGpuGroupLockRelease(gpuMask, GPUS_LOCK_FLAGS_NONE);
960 
961     portSyncRwLockReleaseWrite(pExportInfo->pLock);
962 
963     if (bModuleLockAcquired)
964         portSyncRwLockReleaseWrite(pSys->pSysMemExportModuleLock);
965 
966     return status;
967 }
968 
969 static NV_STATUS
970 _memoryexportFindImporterMIGParent
971 (
972     RsClient             *pImpClient,
973     ATTACHED_PARENT_INFO *pSrcParentInfo,
974     NvHandle             *pImpParentHandle
975 )
976 {
977     NV_STATUS status;
978     OBJGPU *pSrcGpu;
979     KERNEL_MIG_GPU_INSTANCE *pSrcKernelMIGGpuInstance;
980     NvBool bDevice;
981     RS_ITERATOR it;
982 
983     pSrcGpu = pSrcParentInfo->pGpu;
984     NV_ASSERT_OR_RETURN(pSrcGpu != NULL, NV_ERR_INVALID_STATE);
985 
986     pSrcKernelMIGGpuInstance = pSrcParentInfo->pKernelMIGGpuInstance;
987     NV_ASSERT_OR_RETURN(pSrcKernelMIGGpuInstance != NULL, NV_ERR_INVALID_STATE);
988 
989     *pImpParentHandle = 0;
990 
991     bDevice = (pSrcKernelMIGGpuInstance->instanceHandles.hDevice  == pSrcParentInfo->hParent);
992 
993     // In MIG, multiple devices of same the device instance are feasible.
994     it = clientRefIter(pImpClient, NULL, classId(Device), RS_ITERATE_CHILDREN, NV_TRUE);
995 
996     while (clientRefIterNext(pImpClient, &it))
997     {
998         Device *pImpDevice = dynamicCast(it.pResourceRef->pResource, Device);
999         Subdevice *pImpSubdevice;
1000         MIG_INSTANCE_REF impRef;
1001 
1002         if (pImpDevice == NULL)
1003             continue;
1004 
1005         status = kmigmgrGetInstanceRefFromDevice(pSrcGpu, GPU_GET_KERNEL_MIG_MANAGER(pSrcGpu),
1006                                                  pImpDevice, &impRef);
1007         if (status != NV_OK)
1008             continue;
1009 
1010         if (pSrcKernelMIGGpuInstance != impRef.pKernelMIGGpuInstance)
1011             continue;
1012 
1013         if (bDevice)
1014         {
1015             *pImpParentHandle = RES_GET_HANDLE(pImpDevice);
1016         }
1017         else
1018         {
1019             status = subdeviceGetByDeviceAndGpu(pImpClient, pImpDevice, pSrcGpu, &pImpSubdevice);
1020             if (status != NV_OK)
1021                 continue;
1022 
1023             *pImpParentHandle = RES_GET_HANDLE(pImpSubdevice);
1024         }
1025 
1026         break;
1027     }
1028 
1029     return (*pImpParentHandle == 0) ? NV_ERR_OBJECT_NOT_FOUND : NV_OK;
1030 }
1031 
1032 static NV_STATUS
1033 _memoryexportFindImporterParent
1034 (
1035     RsClient             *pImpClient,
1036     ATTACHED_PARENT_INFO *pSrcParentInfo,
1037     NvHandle             *pImpParentHandle
1038 )
1039 {
1040     NV_STATUS status;
1041     OBJGPU *pSrcGpu = pSrcParentInfo->pGpu;
1042     NvBool bDevice;
1043     Device *pImpDevice;
1044     Subdevice *pImpSubdevice;
1045 
1046     // If device-less memory, use client as importer parent
1047     if (pSrcGpu == NULL)
1048     {
1049         *pImpParentHandle = pImpClient->hClient;
1050         return NV_OK;
1051     }
1052 
1053     if (IS_MIG_ENABLED(pSrcGpu))
1054     {
1055         return _memoryexportFindImporterMIGParent(pImpClient, pSrcParentInfo,
1056                                                   pImpParentHandle);
1057     }
1058 
1059     // If source is device, then return device as parent
1060     bDevice = (GPU_GET_MEMORY_MANAGER(pSrcGpu)->hDevice == pSrcParentInfo->hParent);
1061 
1062     status = deviceGetByGpu(pImpClient, pSrcGpu, NV_TRUE, &pImpDevice);
1063     if (status != NV_OK)
1064         return status;
1065 
1066     if (bDevice)
1067     {
1068         *pImpParentHandle = RES_GET_HANDLE(pImpDevice);
1069         return NV_OK;
1070     }
1071 
1072     status = subdeviceGetByDeviceAndGpu(pImpClient, pImpDevice, pSrcGpu, &pImpSubdevice);
1073     if (status != NV_OK)
1074         return status;
1075 
1076     *pImpParentHandle = RES_GET_HANDLE(pImpSubdevice);
1077 
1078     return NV_OK;
1079 }
1080 
1081 NV_STATUS
1082 memoryexportCtrlImportMem_IMPL
1083 (
1084     MemoryExport                  *pMemoryExport,
1085     NV00E0_CTRL_IMPORT_MEM_PARAMS *pParams
1086 )
1087 {
1088     OBJSYS *pSys = SYS_GET_INSTANCE();
1089     NV_STATUS status;
1090     NvU16 result;
1091     NvU16 i = 0, j;
1092     MEM_EXPORT_INFO *pExportInfo = pMemoryExport->pExportInfo;
1093     ATTACHED_MEM_INFO *pMemInfo;
1094     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
1095     NvHandle hDstParent;
1096     NvHandle hDstClient = RES_GET_CLIENT(pMemoryExport)->hClient;
1097     NvU32 gpuMask = GPUS_LOCK_ALL;
1098     NvBool bAllGpuLockAcquired = NV_FALSE;
1099 
1100     if ((pParams->numHandles == 0) ||
1101         (pParams->numHandles > NV00E0_MAX_IMPORT_HANDLES))
1102         return NV_ERR_INVALID_ARGUMENT;
1103 
1104     //
1105     // pExportInfo->cachedParams.numMaxHandles is an immutable attribute,
1106     // can be queried outside of the locks.
1107     //
1108     if ((!portSafeAddU16(pParams->index, pParams->numHandles, &result)) ||
1109         (result > pExportInfo->cachedParams.numMaxHandles))
1110         return NV_ERR_OUT_OF_RANGE;
1111 
1112     //
1113     // During import we might read (dup) the hSysMemExportClient's client
1114     // database to dup from it. So, during device-less memory dup, take module
1115     // lock to protect pSys->hSysMemExportClient. RM-core expects client
1116     // locking to be managed by the caller in this case, rather than relying
1117     // on dual client locking (as there could be a lock inversion issue to
1118     // lock another client, if one client is already locked).
1119     //
1120     portSyncRwLockAcquireRead(pSys->pSysMemExportModuleLock);
1121 
1122     portSyncRwLockAcquireWrite(pExportInfo->pLock);
1123 
1124     status = rmGpuGroupLockAcquire(0, GPU_LOCK_GRP_MASK,
1125                                    GPUS_LOCK_FLAGS_NONE,
1126                                    RM_LOCK_MODULES_MEM,
1127                                    &gpuMask);
1128     if (status != NV_OK)
1129     {
1130         NV_ASSERT(0);
1131         goto done;
1132     }
1133 
1134     bAllGpuLockAcquired = NV_TRUE;
1135 
1136     for (i = 0; i < pParams->numHandles; i++)
1137     {
1138         pMemInfo = &pExportInfo->memInfos[i + pParams->index];
1139 
1140         // Nothing to import
1141         if (pMemInfo->hDupedMem == 0)
1142         {
1143             portMemSet(&pParams->memInfos[i], 0, sizeof(pParams->memInfos[0]));
1144             continue;
1145         }
1146 
1147         if (pParams->handles[i] == 0)
1148         {
1149             NV_PRINTF(LEVEL_ERROR, "Invalid handle\n");
1150             status = NV_ERR_INVALID_OBJECT_HANDLE;
1151             goto fail;
1152         }
1153 
1154         status = _memoryexportFindImporterParent(RES_GET_CLIENT(pMemoryExport),
1155                                                  pMemInfo->pParentInfo,
1156                                                  &hDstParent);
1157         if (status != NV_OK)
1158         {
1159             NV_PRINTF(LEVEL_ERROR, "Failed to find parent: 0x%x\n", status);
1160             goto fail;
1161         }
1162 
1163         status = pRmApi->DupObject(pRmApi,
1164                                    hDstClient,
1165                                    hDstParent,
1166                                    &pParams->handles[i],
1167                                    pMemInfo->pParentInfo->hClient,
1168                                    pMemInfo->hDupedMem, 0);
1169         if (status != NV_OK)
1170         {
1171             NV_PRINTF(LEVEL_ERROR, "Failed to duping 0x%x\n", status);
1172             goto fail;
1173         }
1174 
1175         pParams->memInfos[i].addrSpace = pMemInfo->addressSpace;
1176         pParams->memInfos[i].hParent = hDstParent;
1177     }
1178 
1179     goto done;
1180 
1181 fail:
1182     for (j = 0; j < i; j++)
1183     {
1184         if (pParams->memInfos[j].hParent != 0)
1185             pRmApi->Free(pRmApi, hDstClient, pParams->handles[j]);
1186     }
1187 
1188 done:
1189     if (bAllGpuLockAcquired)
1190         rmGpuGroupLockRelease(gpuMask, GPUS_LOCK_FLAGS_NONE);
1191 
1192     portSyncRwLockReleaseWrite(pExportInfo->pLock);
1193 
1194     portSyncRwLockReleaseRead(pSys->pSysMemExportModuleLock);
1195 
1196     return status;
1197 }
1198 
1199 NV_STATUS
1200 memoryexportCtrlGetInfo_IMPL
1201 (
1202     MemoryExport                *pMemoryExport,
1203     NV00E0_CTRL_GET_INFO_PARAMS *pParams
1204 )
1205 {
1206     MEM_EXPORT_INFO *pExportInfo = pMemoryExport->pExportInfo;
1207 
1208     portSyncRwLockAcquireRead(pExportInfo->pLock);
1209 
1210     pParams->info = pMemoryExport->pExportInfo->cachedParams;
1211 
1212     portSyncRwLockReleaseRead(pExportInfo->pLock);
1213 
1214     return NV_OK;
1215 }
1216 
1217 NV_STATUS
1218 memoryexportControl_IMPL
1219 (
1220     MemoryExport                   *pMemoryExport,
1221     CALL_CONTEXT                   *pCallContext,
1222     RS_RES_CONTROL_PARAMS_INTERNAL *pParams
1223 )
1224 {
1225     //
1226     // Note: GPU lock(s) is required for some control calls. Thus, it is
1227     // incorrect to take the leaf lock here. resControl_IMPL() attempts to
1228     // acquire the GPU locks before it calls the control call body.
1229     //
1230     return resControl_IMPL(staticCast(pMemoryExport, RsResource),
1231                            pCallContext, pParams);
1232 }
1233 
1234 void
1235 memoryexportClearCache
1236 (
1237     NvU16 nodeId
1238 )
1239 {
1240     OBJSYS *pSys = SYS_GET_INSTANCE();
1241     SYS_MEM_EXPORT_CACHESubmap *pSubmap;
1242 
1243     portSyncRwLockAcquireWrite(pSys->pSysMemExportModuleLock);
1244 
1245     pSubmap = multimapFindSubmap(&pSys->sysMemExportCache, nodeId);
1246 
1247     if (pSubmap != NULL)
1248     {
1249         SYS_MEM_EXPORT_CACHEIter it =
1250                 multimapSubmapIterItems(&pSys->sysMemExportCache, pSubmap);
1251 
1252         while (multimapItemIterNext(&it) != 0)
1253             multimapRemoveItem(&pSys->sysMemExportCache, it.pValue);
1254 
1255         multimapRemoveSubmap(&pSys->sysMemExportCache, pSubmap);
1256     }
1257 
1258     portSyncRwLockReleaseWrite(pSys->pSysMemExportModuleLock);
1259 }
1260