1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /******************************************************************************
25  *
26  *   Description:
27  *       This file contains the functions managing the memory multicast fabric
28  *
29  *****************************************************************************/
30 #include "os/os.h"
31 #include "core/locks.h"
32 #include "nvport/nvport.h"
33 #include "rmapi/rs_utils.h"
34 #include "rmapi/rmapi_utils.h"
35 #include "compute/fabric.h"
36 #include "gpu/gpu.h"
37 #include "gpu/bus/kern_bus.h"
38 #include "gpu/mem_mgr/mem_desc.h"
39 #include "gpu/mem_mgr/mem_mgr.h"
40 #include "gpu/subdevice/subdevice.h"
41 #include "kernel/gpu/nvlink/kernel_nvlink.h"
42 #include "mem_mgr/fabric_vaspace.h"
43 #include "mem_mgr/mem_multicast_fabric.h"
44 #include "published/hopper/gh100/dev_mmu.h"
45 
46 #include "gpu/gpu_fabric_probe.h"
47 
48 static
49 NV_STATUS
50 _memMulticastFabricValidateAllocParams
51 (
52     NV00FD_ALLOCATION_PARAMETERS *pAllocParams
53 )
54 {
55     // Only page size 512MB is supported
56     if (pAllocParams->pageSize != NV_MEMORY_MULTICAST_FABRIC_PAGE_SIZE_512M)
57     {
58         NV_PRINTF(LEVEL_ERROR,
59                   "Unsupported pageSize: 0x%x. Only 512MB pagesize is supported\n",
60                   pAllocParams->pageSize);
61         return NV_ERR_INVALID_ARGUMENT;
62     }
63 
64     // Alignment should be pageSize aligned
65     if (!NV_IS_ALIGNED64(pAllocParams->alignment, pAllocParams->pageSize))
66     {
67         NV_PRINTF(LEVEL_ERROR,
68                   "Alignment should be pageSize aligned\n");
69         return NV_ERR_INVALID_ARGUMENT;
70     }
71 
72     // AllocSize should be page size aligned
73     if (!NV_IS_ALIGNED64(pAllocParams->allocSize, pAllocParams->pageSize))
74     {
75         NV_PRINTF(LEVEL_ERROR,
76                   "AllocSize should be pageSize aligned\n");
77         return NV_ERR_INVALID_ARGUMENT;
78     }
79 
80     if (pAllocParams->numGpus == 0)
81     {
82         NV_PRINTF(LEVEL_ERROR,
83                   "Number of GPUs to attach must be non-zero\n");
84         return NV_ERR_INVALID_ARGUMENT;
85     }
86 
87     // Allocation flags must be zero
88     if (pAllocParams->allocFlags != 0)
89     {
90         NV_PRINTF(LEVEL_ERROR, "allocFlags must be zero\n");
91         return NV_ERR_INVALID_ARGUMENT;
92     }
93 
94     return NV_OK;
95 }
96 
97 static
98 MEM_MULTICAST_FABRIC_DESCRIPTOR*
99 _memMulticastFabricDescriptorAllocUnderLock
100 (
101     MemoryMulticastFabric        *pMemoryMulticastFabric,
102     NV00FD_ALLOCATION_PARAMETERS *pAllocParams
103 )
104 {
105     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc;
106 
107     pMulticastFabricDesc = portMemAllocNonPaged(sizeof(MEM_MULTICAST_FABRIC_DESCRIPTOR));
108     if (pMulticastFabricDesc == NULL)
109         return NULL;
110 
111     portMemSet(pMulticastFabricDesc, 0, sizeof(MEM_MULTICAST_FABRIC_DESCRIPTOR));
112 
113     listInit(&pMulticastFabricDesc->waitingClientsList,
114              portMemAllocatorGetGlobalNonPaged());
115 
116     listInit(&pMulticastFabricDesc->gpuInfoList,
117              portMemAllocatorGetGlobalNonPaged());
118 
119     pMulticastFabricDesc->refCount = 1;
120     pMulticastFabricDesc->mcTeamStatus = NV_ERR_NOT_READY;
121     pMulticastFabricDesc->attachedGpusMask = 0;
122     pMulticastFabricDesc->alignment  = pAllocParams->alignment;
123     pMulticastFabricDesc->allocSize  = pAllocParams->allocSize;
124     pMulticastFabricDesc->pageSize   = pAllocParams->pageSize;
125     pMulticastFabricDesc->allocFlags = pAllocParams->allocFlags;
126     pMulticastFabricDesc->numMaxGpus = pAllocParams->numGpus;
127 
128     return pMulticastFabricDesc;
129 }
130 
131 static void
132 _memMulticastFabricDescriptorFlushClientsUnderLock
133 (
134     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc
135 )
136 {
137     MEM_MULTICAST_FABRIC_CLIENT_INFO *pNode;
138 
139     while ((pNode = listHead(&pMulticastFabricDesc->waitingClientsList)) != NULL)
140     {
141         if (pNode->pOsEvent != NULL)
142         {
143             osSetEvent(NULL, pNode->pOsEvent);
144             NV_ASSERT_OK(osDereferenceObjectCount(pNode->pOsEvent));
145         }
146 
147         listRemove(&pMulticastFabricDesc->waitingClientsList, pNode);
148     }
149 
150     return;
151 }
152 
153 static NV_STATUS
154 _memMulticastFabricDescriptorEnqueueWaitUnderLock
155 (
156     NvHandle                         hClient,
157     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc,
158     NvP64                            pOsEvent,
159     Memory                          *pMemory
160 )
161 {
162     MEM_MULTICAST_FABRIC_CLIENT_INFO *pNode;
163     NvP64                             pValidatedOsEvent = NULL;
164     NV_STATUS                         status;
165 
166     if (pOsEvent != NULL)
167     {
168         status = osUserHandleToKernelPtr(hClient, pOsEvent, &pValidatedOsEvent);
169         if (status != NV_OK)
170             return status;
171     }
172 
173     pNode = listAppendNew(&pMulticastFabricDesc->waitingClientsList);
174     if (pNode == NULL)
175     {
176         if (pOsEvent != NULL)
177             osDereferenceObjectCount(pValidatedOsEvent);
178 
179         return NV_ERR_NO_MEMORY;
180     }
181 
182     pNode->pOsEvent = pValidatedOsEvent;
183     pNode->pMemory = pMemory;
184 
185     // In case the multicast object's memdesc is ready, unblock clients waiting on it
186     if (pMulticastFabricDesc->bMemdescInstalled)
187         _memMulticastFabricDescriptorFlushClientsUnderLock(pMulticastFabricDesc);
188 
189     return NV_OK;
190 }
191 
192 static void
193 _memMulticastFabricDescriptorDequeueWaitUnderLock
194 (
195     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc,
196     Memory                          *pMemory
197 )
198 {
199     MEM_MULTICAST_FABRIC_CLIENT_INFO *pNode;
200     MEM_MULTICAST_FABRIC_CLIENT_INFO *pNodeNext;
201 
202     pNode = listHead(&pMulticastFabricDesc->waitingClientsList);
203 
204     // There can be multiple events per memory object, so delete all.
205     while (pNode != NULL)
206     {
207         pNodeNext = listNext(&pMulticastFabricDesc->waitingClientsList, pNode);
208 
209         if (pNode->pMemory == pMemory)
210         {
211             if (pNode->pOsEvent != NULL)
212                 osDereferenceObjectCount(pNode->pOsEvent);
213 
214             listRemove(&pMulticastFabricDesc->waitingClientsList, pNode);
215         }
216 
217         pNode = pNodeNext;
218     }
219 }
220 
221 NV_STATUS
222 _memMulticastFabricGpuInfoAddUnderLock
223 (
224     MemoryMulticastFabric          *pMemoryMulticastFabric,
225     RS_RES_CONTROL_PARAMS_INTERNAL *pParams
226 )
227 {
228     NV00FD_CTRL_ATTACH_GPU_PARAMS *pAttachParams = pParams->pParams;
229     Subdevice *pSubdevice = NULL;
230     MEM_MULTICAST_FABRIC_GPU_INFO *pNode;
231     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc = \
232         pMemoryMulticastFabric->pMulticastFabricDesc;
233     OBJGPU *pGpu;
234     MEM_MULTICAST_FABRIC_GPU_INFO *pNodeItr;
235 
236     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
237         subdeviceGetByHandle(RES_GET_CLIENT(pMemoryMulticastFabric),
238             pAttachParams->hSubdevice, &pSubdevice));
239 
240     pGpu = GPU_RES_GET_GPU(pSubdevice);
241 
242     if(!osMatchGpuOsInfo(pGpu, pParams->secInfo.gpuOsInfo))
243         return NV_ERR_INVALID_DEVICE;
244 
245     for (pNodeItr = listHead(&pMulticastFabricDesc->gpuInfoList);
246          pNodeItr != NULL;
247          pNodeItr = listNext(&pMulticastFabricDesc->gpuInfoList, pNodeItr))
248     {
249         if (pNodeItr->pGpu == pGpu)
250         {
251            NV_PRINTF(LEVEL_ERROR, "GPU %x has already attached\n",
252                                    pGpu->gpuInstance);
253            return NV_ERR_IN_USE;
254         }
255     }
256 
257     pNode = listAppendNew(&pMulticastFabricDesc->gpuInfoList);
258     if (pNode == NULL)
259         return NV_ERR_NO_MEMORY;
260 
261     pNode->pGpuOsInfo = pParams->secInfo.gpuOsInfo;
262     pNode->pGpu = GPU_RES_GET_GPU(pSubdevice);
263     pNode->pAttachMemInfoTree = NULL;
264     pNode->bMcflaAlloc = NV_FALSE;
265 
266     return NV_OK;
267 }
268 
269 static void
270 _memMulticastFabricGpuInfoRemoveUnderLock
271 (
272     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc
273 )
274 {
275     MEM_MULTICAST_FABRIC_GPU_INFO *pNode = NULL;
276     THREAD_STATE_NODE *pThreadNode = NULL;
277     THREAD_STATE_FREE_CALLBACK freeCallback;
278 
279     NV_ASSERT_OK(threadStateGetCurrent(&pThreadNode, NULL));
280 
281     while ((pNode = listHead(&pMulticastFabricDesc->gpuInfoList)) != NULL)
282     {
283         freeCallback.pCb = osReleaseGpuOsInfo;
284         freeCallback.pCbData = (void *)pNode->pGpuOsInfo;
285 
286         NV_ASSERT_OK(threadStateEnqueueCallbackOnFree(pThreadNode, &freeCallback));
287         listRemove(&pMulticastFabricDesc->gpuInfoList, pNode);
288     }
289 
290     pMulticastFabricDesc->attachedGpusMask = 0;
291     pMulticastFabricDesc->numAttachedGpus = 0;
292 }
293 
294 NV_STATUS
295 _memMulticastFabricSendInbandTeamSetupRequestV1UnderLock
296 (
297     OBJGPU                          *pGpu,
298     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc
299 )
300 {
301     Fabric *pFabric = SYS_GET_FABRIC(SYS_GET_INSTANCE());
302     NV2080_CTRL_NVLINK_INBAND_SEND_DATA_PARAMS *sendDataParams;
303     nvlink_inband_mc_team_setup_req_msg_t *pMcTeamSetupReqMsg = NULL;
304     nvlink_inband_mc_team_setup_req_t *pMcTeamSetupReq = NULL;
305     MEM_MULTICAST_FABRIC_GPU_INFO *pNode;
306     NvU64 requestId;
307     NvU32 idx = 0;
308     NvU32 payloadSize;
309     NvU32 sendDataSize;
310     NV_STATUS status = NV_OK;
311 
312     sendDataParams = \
313         (NV2080_CTRL_NVLINK_INBAND_SEND_DATA_PARAMS *)
314         portMemAllocNonPaged(sizeof(NV2080_CTRL_NVLINK_INBAND_SEND_DATA_PARAMS));
315 
316     if (sendDataParams == NULL)
317         return NV_ERR_NO_MEMORY;
318 
319     pMcTeamSetupReqMsg = \
320         (nvlink_inband_mc_team_setup_req_msg_t *)&sendDataParams->buffer[0];
321 
322     pMcTeamSetupReq = \
323         (nvlink_inband_mc_team_setup_req_t *)&pMcTeamSetupReqMsg->mcTeamSetupReq;
324 
325     payloadSize = (NvU32)(sizeof(nvlink_inband_mc_team_setup_req_t) + \
326                       (sizeof(pMcTeamSetupReq->gpuHandles[0]) * pMulticastFabricDesc->numMaxGpus));
327 
328     sendDataSize = (NvU32)(sizeof(nvlink_inband_msg_header_t) + payloadSize);
329 
330     NV_ASSERT((NvU32)sendDataSize <= sizeof(sendDataParams->buffer));
331 
332     portMemSet(sendDataParams, 0, sendDataSize);
333 
334     pMcTeamSetupReq->mcAllocSize = pMulticastFabricDesc->allocSize;
335     pMcTeamSetupReq->numGpuHandles = pMulticastFabricDesc->numMaxGpus;
336 
337     for (pNode = listHead(&pMulticastFabricDesc->gpuInfoList);
338          pNode != NULL;
339          pNode = listNext(&pMulticastFabricDesc->gpuInfoList, pNode))
340         pMcTeamSetupReq->gpuHandles[idx++] = pNode->gpuProbeHandle;
341 
342     NV_ASSERT(idx == pMcTeamSetupReq->numGpuHandles);
343 
344     sendDataParams->dataSize = sendDataSize;
345 
346     status = fabricInitInbandMsgHdr(&pMcTeamSetupReqMsg->msgHdr,
347                                     NVLINK_INBAND_MSG_TYPE_MC_TEAM_SETUP_REQ,
348                                     payloadSize);
349 
350     if (status != NV_OK)
351         goto done;
352 
353     requestId = pMcTeamSetupReqMsg->msgHdr.requestId;
354 
355     status = fabricMulticastSetupCacheInsertUnderLock_IMPL(pFabric,
356                                                            requestId,
357                                                            pMulticastFabricDesc);
358     if (status != NV_OK)
359         goto done;
360 
361     status = knvlinkSendInbandData(pGpu, GPU_GET_KERNEL_NVLINK(pGpu), sendDataParams);
362     if (status != NV_OK)
363     {
364         fabricMulticastSetupCacheDeleteUnderLock_IMPL(pFabric, requestId);
365         goto done;
366     }
367 
368     pMulticastFabricDesc->bInbandReqInProgress = NV_TRUE;
369     pMulticastFabricDesc->inbandReqId = requestId;
370 
371 done:
372     portMemFree(sendDataParams);
373 
374     return status;
375 }
376 
377 NV_STATUS
378 _memMulticastFabricSendInbandTeamReleaseRequestV1UnderLock
379 (
380     OBJGPU *pGpu,
381     NvU64   mcTeamHandle
382 )
383 {
384     NV2080_CTRL_NVLINK_INBAND_SEND_DATA_PARAMS *sendDataParams;
385     nvlink_inband_mc_team_release_req_msg_t *pMcTeamReleaseReqMsg = NULL;
386     nvlink_inband_mc_team_release_req_t *pMcTeamReleaseReq = NULL;
387     NvU32 payloadSize;
388     NvU32 sendDataSize;
389     NV_STATUS status = NV_OK;
390 
391     sendDataParams = \
392         (NV2080_CTRL_NVLINK_INBAND_SEND_DATA_PARAMS *)portMemAllocNonPaged(sizeof(NV2080_CTRL_NVLINK_INBAND_SEND_DATA_PARAMS));
393 
394     if (sendDataParams == NULL)
395         return NV_ERR_NO_MEMORY;
396 
397     pMcTeamReleaseReqMsg = \
398         (nvlink_inband_mc_team_release_req_msg_t *)&sendDataParams->buffer[0];
399 
400     pMcTeamReleaseReq = \
401         (nvlink_inband_mc_team_release_req_t *)&pMcTeamReleaseReqMsg->mcTeamReleaseReq;
402 
403     payloadSize = (NvU32)(sizeof(nvlink_inband_mc_team_release_req_t));
404 
405     sendDataSize = (NvU32)(sizeof(nvlink_inband_msg_header_t) + payloadSize);
406 
407     portMemSet(sendDataParams, 0, sendDataSize);
408 
409     pMcTeamReleaseReq->mcTeamHandle = mcTeamHandle;
410 
411     status = fabricInitInbandMsgHdr(&pMcTeamReleaseReqMsg->msgHdr,
412                                     NVLINK_INBAND_MSG_TYPE_MC_TEAM_RELEASE_REQ,
413                                     payloadSize);
414     if (status != NV_OK)
415         goto done;
416 
417     sendDataParams->dataSize = sendDataSize;
418 
419     status = knvlinkSendInbandData(pGpu, GPU_GET_KERNEL_NVLINK(pGpu), sendDataParams);
420 
421 done:
422     portMemFree(sendDataParams);
423 
424     return status;
425 }
426 
427 NV_STATUS
428 _memMulticastFabricSendInbandTeamSetupRequestUnderlock
429 (
430     OBJGPU                          *pGpu,
431     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc
432 )
433 {
434     NvU64 fmCaps;
435     NV_STATUS status = NV_OK;
436 
437     status = gpuFabricProbeGetfmCaps(pGpu->pGpuFabricProbeInfoKernel, &fmCaps);
438     if (status != NV_OK)
439         return status;
440 
441     if (!(fmCaps & NVLINK_INBAND_FM_CAPS_MC_TEAM_SETUP_V1))
442         return NV_ERR_NOT_SUPPORTED;
443 
444     return _memMulticastFabricSendInbandTeamSetupRequestV1UnderLock(pGpu,
445                                                                     pMulticastFabricDesc);
446 }
447 
448 NV_STATUS
449 _memMulticastFabricSendInbandTeamReleaseRequestUnderLock
450 (
451     OBJGPU *pGpu,
452     NvU64   mcTeamHandle
453 )
454 {
455     NvU64 fmCaps;
456     NV_STATUS status = NV_OK;
457 
458     status = gpuFabricProbeGetfmCaps(pGpu->pGpuFabricProbeInfoKernel, &fmCaps);
459     if (status != NV_OK)
460         return status;
461 
462     if (!(fmCaps & NVLINK_INBAND_FM_CAPS_MC_TEAM_RELEASE_V1))
463         return NV_ERR_NOT_SUPPORTED;
464 
465     return _memMulticastFabricSendInbandTeamReleaseRequestV1UnderLock(pGpu,
466                                                                       mcTeamHandle);
467 }
468 
469 NV_STATUS
470 _memMulticastFabricSendInbandRequestUnderLock
471 (
472     OBJGPU                            *pGpu,
473     MEM_MULTICAST_FABRIC_DESCRIPTOR   *pMulticastFabricDesc,
474     MEM_MULTICAST_FABRIC_REQUEST_TYPE  requestType
475 )
476 {
477     NV_STATUS status = NV_OK;
478 
479     // If pGpu is NULL, pick the first one attached to the object.
480     if (pGpu == NULL)
481         pGpu = listHead(&pMulticastFabricDesc->gpuInfoList)->pGpu;
482 
483     switch (requestType)
484     {
485         case MEM_MULTICAST_FABRIC_TEAM_SETUP_REQUEST:
486             status = _memMulticastFabricSendInbandTeamSetupRequestUnderlock(pGpu,
487                                                                 pMulticastFabricDesc);
488             break;
489         case MEM_MULTICAST_FABRIC_TEAM_RELEASE_REQUEST:
490             status = _memMulticastFabricSendInbandTeamReleaseRequestUnderLock(pGpu,
491                                                    pMulticastFabricDesc->mcTeamHandle);
492             break;
493         default:
494             status = NV_ERR_NOT_SUPPORTED;
495             break;
496     }
497 
498     return status;
499 }
500 
501 static void
502 _memorymulticastfabricDetachMem
503 (
504     FABRIC_VASPACE     *pFabricVAS,
505     MEMORY_DESCRIPTOR  *pFabricMemDesc,
506     NODE               *pMemNode
507 )
508 {
509     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
510     MEMORY_DESCRIPTOR *pPhysMemDesc;
511     MEM_MULTICAST_FABRIC_ATTACH_MEM_INFO_NODE *pAttachMemInfoNode;
512 
513     pAttachMemInfoNode = \
514         (MEM_MULTICAST_FABRIC_ATTACH_MEM_INFO_NODE *)pMemNode->Data;
515     pPhysMemDesc = pAttachMemInfoNode->pPhysMemDesc;
516 
517     fabricvaspaceUnmapPhysMemdesc(pFabricVAS, pFabricMemDesc,
518                                   pMemNode->keyStart,
519                                   pPhysMemDesc,
520                                   pAttachMemInfoNode->physMapLength);
521 
522     NV_ASSERT_OK(pRmApi->Free(pRmApi, pFabricVAS->hClient,
523                               pAttachMemInfoNode->hDupedPhysMem));
524 }
525 
526 static void
527 _memorymulticastfabricBatchDetachMem
528 (
529     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc
530 )
531 {
532     MEMORY_DESCRIPTOR *pFabricMemDesc;
533     MEM_MULTICAST_FABRIC_GPU_INFO *pGpuNode;
534     NODE *pMemNode;
535     FABRIC_VASPACE *pFabricVAS;
536 
537     pFabricMemDesc = pMulticastFabricDesc->pMemDesc;
538     NV_ASSERT_OR_RETURN_VOID(pFabricMemDesc != NULL);
539 
540     for (pGpuNode = listHead(&pMulticastFabricDesc->gpuInfoList);
541          pGpuNode != NULL;
542          pGpuNode = listNext(&pMulticastFabricDesc->gpuInfoList, pGpuNode))
543     {
544         pFabricVAS = dynamicCast(pGpuNode->pGpu->pFabricVAS, FABRIC_VASPACE);
545         if (pFabricVAS == NULL)
546         {
547             NV_ASSERT(0);
548             continue;
549         }
550 
551         btreeEnumStart(0, &pMemNode, pGpuNode->pAttachMemInfoTree);
552         while (pMemNode != NULL)
553         {
554             _memorymulticastfabricDetachMem(pFabricVAS, pFabricMemDesc, pMemNode);
555 
556             btreeUnlink(pMemNode, &pGpuNode->pAttachMemInfoTree);
557             portMemFree(pMemNode->Data);
558 
559             btreeEnumStart(0, &pMemNode, pGpuNode->pAttachMemInfoTree);
560         }
561 
562         // Everything is detached during object cleanup, free MCFLA now.
563         if (pGpuNode->bMcflaAlloc)
564         {
565             fabricvaspaceBatchFree(pFabricVAS, &pFabricMemDesc->_pteArray[0],
566                                    1, 1);
567             pGpuNode->bMcflaAlloc = NV_FALSE;
568         }
569     }
570 }
571 
572 static void
573 _memMulticastFabricDescriptorFreeUnderLock
574 (
575     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc
576 )
577 {
578     if (pMulticastFabricDesc == NULL)
579         return;
580 
581     pMulticastFabricDesc->refCount--;
582 
583     if (pMulticastFabricDesc->refCount == 0)
584     {
585         if (pMulticastFabricDesc->pMemDesc != NULL)
586         {
587             NV_ASSERT(pMulticastFabricDesc->bMemdescInstalled);
588 
589             _memorymulticastfabricBatchDetachMem(pMulticastFabricDesc);
590 
591             _memMulticastFabricSendInbandRequestUnderLock(NULL, pMulticastFabricDesc,
592                                                           MEM_MULTICAST_FABRIC_TEAM_RELEASE_REQUEST);
593         }
594 
595         if (pMulticastFabricDesc->bInbandReqInProgress)
596         {
597             Fabric *pFabric = SYS_GET_FABRIC(SYS_GET_INSTANCE());
598             OS_WAIT_QUEUE *pWq;
599             THREAD_STATE_NODE *pThreadNode = NULL;
600             THREAD_STATE_FREE_CALLBACK freeCallback;
601 
602             fabricMulticastSetupCacheDeleteUnderLock_IMPL(pFabric,
603                                                           pMulticastFabricDesc->inbandReqId);
604 
605             NV_ASSERT_OK(osAllocWaitQueue(&pWq));
606 
607             if (pWq != NULL)
608             {
609                 NV_ASSERT_OK(fabricMulticastCleanupCacheInsertUnderLock_IMPL(pFabric,
610                                                                              pMulticastFabricDesc->inbandReqId,
611                                                                              pWq));
612 
613                 NV_ASSERT_OK(threadStateGetCurrent(&pThreadNode, NULL));
614 
615                 freeCallback.pCb = fabricMulticastWaitOnTeamCleanupCallback;
616                 freeCallback.pCbData = (void *)pMulticastFabricDesc->inbandReqId;
617 
618                 NV_ASSERT_OK(threadStateEnqueueCallbackOnFree(pThreadNode, &freeCallback));
619             }
620         }
621 
622         _memMulticastFabricGpuInfoRemoveUnderLock(pMulticastFabricDesc);
623 
624         NV_ASSERT(listCount(&pMulticastFabricDesc->gpuInfoList) == 0);
625         listDestroy(&pMulticastFabricDesc->gpuInfoList);
626 
627         NV_ASSERT(pMulticastFabricDesc->numAttachedGpus == 0);
628         NV_ASSERT(pMulticastFabricDesc->attachedGpusMask == 0);
629 
630         NV_ASSERT(listCount(&pMulticastFabricDesc->waitingClientsList) == 0);
631         listDestroy(&pMulticastFabricDesc->waitingClientsList);
632 
633         memdescDestroy(pMulticastFabricDesc->pMemDesc);
634 
635         portMemFree(pMulticastFabricDesc);
636     }
637 }
638 
639 NV_STATUS
640 _memMulticastFabricConstructUnderLock
641 (
642     MemoryMulticastFabric        *pMemoryMulticastFabric,
643     CALL_CONTEXT                 *pCallContext,
644     RS_RES_ALLOC_PARAMS_INTERNAL *pParams
645 )
646 {
647     Memory                           *pMemory       = staticCast(pMemoryMulticastFabric, Memory);
648     NV00FD_ALLOCATION_PARAMETERS     *pAllocParams  = pParams->pAllocParams;
649     MEM_MULTICAST_FABRIC_DESCRIPTOR  *pMulticastFabricDesc;
650     NV_STATUS                         status        = NV_OK;
651 
652     pMulticastFabricDesc = _memMulticastFabricDescriptorAllocUnderLock(pMemoryMulticastFabric,
653                                                                        pAllocParams);
654 
655     if (pMulticastFabricDesc == NULL)
656         return NV_ERR_NO_MEMORY;
657 
658     status = _memMulticastFabricDescriptorEnqueueWaitUnderLock(pParams->hClient,
659                                                                pMulticastFabricDesc,
660                                                                pAllocParams->pOsEvent,
661                                                                pMemory);
662     if (status != NV_OK)
663         goto fail;
664 
665     pMemoryMulticastFabric->pMulticastFabricDesc = pMulticastFabricDesc;
666 
667     return NV_OK;
668 
669 fail:
670     _memMulticastFabricDescriptorFreeUnderLock(pMulticastFabricDesc);
671 
672     return status;
673 }
674 
675 NV_STATUS
676 _memMulticastFabricCreateMemDescUnderLock
677 (
678     MEM_MULTICAST_FABRIC_DESCRIPTOR  *pMulticastFabricDesc,
679     NvU64                             mcAddressBase,
680     MEMORY_DESCRIPTOR               **ppMemDesc
681 )
682 {
683     NV_STATUS status;
684     MEMORY_DESCRIPTOR *pTempMemDesc = NULL;
685 
686     status = memdescCreate(&pTempMemDesc, NULL, pMulticastFabricDesc->allocSize,
687                            0, NV_TRUE, ADDR_FABRIC_MC, NV_MEMORY_UNCACHED,
688                            MEMDESC_FLAGS_NONE);
689     if (status != NV_OK)
690     {
691         NV_PRINTF(LEVEL_ERROR,
692                   "Failed to allocate memory descriptor for multicast object\n");
693         return status;
694     }
695 
696     memdescSetPte(pTempMemDesc, AT_GPU, 0, mcAddressBase);
697 
698     memdescSetPageSize(pTempMemDesc, AT_GPU, pMulticastFabricDesc->pageSize);
699 
700     pTempMemDesc->_pteKind = NV_MMU_PTE_KIND_SMSKED_MESSAGE;
701 
702     memdescSetFlag(pTempMemDesc, MEMDESC_FLAGS_SET_KIND, NV_TRUE);
703 
704     memdescSetGpuCacheAttrib(pTempMemDesc, NV_MEMORY_UNCACHED);
705 
706     *ppMemDesc = pTempMemDesc;
707 
708     return NV_OK;
709 }
710 
711 void
712 _memMulticastFabricInstallMemDescUnderLock
713 (
714     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc,
715     MEMORY_DESCRIPTOR               *pMemDesc,
716     NvU64                            mcTeamHandle,
717     NV_STATUS                        status
718 )
719 {
720     NV_ASSERT(pMulticastFabricDesc->pMemDesc == NULL);
721 
722     pMulticastFabricDesc->pMemDesc = pMemDesc;
723     pMulticastFabricDesc->bMemdescInstalled = NV_TRUE;
724     pMulticastFabricDesc->mcTeamHandle = mcTeamHandle;
725     pMulticastFabricDesc->mcTeamStatus = status;
726 
727     _memMulticastFabricDescriptorFlushClientsUnderLock(pMulticastFabricDesc);
728 }
729 
730 static NV_STATUS
731 _memorymulticastFabricAllocVasUnderLock
732 (
733     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc,
734     MEMORY_DESCRIPTOR               *pFabricMemDesc
735 )
736 {
737     NV_STATUS status = NV_OK;
738     FABRIC_VASPACE *pFabricVAS;
739     MEM_MULTICAST_FABRIC_GPU_INFO *pGpuInfo;
740     VAS_ALLOC_FLAGS flags = { 0 };
741     NvU64 gpuProbeHandle;
742 
743     for (pGpuInfo = listHead(&pMulticastFabricDesc->gpuInfoList);
744          pGpuInfo != NULL;
745          pGpuInfo = listNext(&pMulticastFabricDesc->gpuInfoList, pGpuInfo))
746     {
747         OBJGPU *pGpu = pGpuInfo->pGpu;
748 
749         pFabricVAS = dynamicCast(pGpu->pFabricVAS, FABRIC_VASPACE);
750         if (pFabricVAS == NULL)
751         {
752             status = NV_ERR_INVALID_STATE;
753             goto cleanup;
754         }
755 
756         //
757         // The fabric handle might not be available or have changed, if fabric
758         // state was ever invalidated while MCFLA allocation was in progress.
759         //
760         status = gpuFabricProbeGetGpuFabricHandle(pGpu->pGpuFabricProbeInfoKernel,
761                                                   &gpuProbeHandle);
762         if ((status != NV_OK) || (pGpuInfo->gpuProbeHandle != gpuProbeHandle))
763         {
764             NV_PRINTF(LEVEL_ERROR, "Attached GPU's probe handle is stale\n");
765             status = NV_ERR_INVALID_DEVICE;
766             goto cleanup;
767         }
768 
769         status = fabricvaspaceAllocMulticast(pFabricVAS,
770                                     memdescGetPageSize(pFabricMemDesc, AT_GPU),
771                                     pMulticastFabricDesc->alignment,
772                                     flags, pFabricMemDesc->_pteArray[0],
773                                     pMulticastFabricDesc->allocSize);
774         if (status != NV_OK)
775         {
776             NV_PRINTF(LEVEL_ERROR,
777                       "Fabric VA space alloc failed for GPU %d\n",
778                       pGpuInfo->pGpu->gpuInstance);
779             goto cleanup;
780         }
781 
782         pGpuInfo->bMcflaAlloc = NV_TRUE;
783     }
784 
785     return NV_OK;
786 
787 cleanup:
788     for (pGpuInfo = listHead(&pMulticastFabricDesc->gpuInfoList);
789          pGpuInfo != NULL;
790          pGpuInfo = listNext(&pMulticastFabricDesc->gpuInfoList, pGpuInfo))
791     {
792         if (pGpuInfo->bMcflaAlloc)
793         {
794             pFabricVAS = dynamicCast(pGpuInfo->pGpu->pFabricVAS, FABRIC_VASPACE);
795 
796             fabricvaspaceBatchFree(pFabricVAS, &pFabricMemDesc->_pteArray[0],
797                                    1, 1);
798 
799             pGpuInfo->bMcflaAlloc = NV_FALSE;
800         }
801     }
802 
803     return status;
804 }
805 
806 NV_STATUS
807 _memMulticastFabricAttachGpuPostProcessorUnderLock
808 (
809     OBJGPU                          *pGpu,
810     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc,
811     NV_STATUS                        mcTeamStatus,
812     NvU64                            mcTeamHandle,
813     NvU64                            mcAddressBase,
814     NvU64                            mcAddressSize
815 )
816 {
817     NV_STATUS status = mcTeamStatus;
818     MEMORY_DESCRIPTOR *pMemDesc = NULL;
819 
820     //
821     // FM is never expected to return NV_ERR_NOT_READY
822     // as part of the inband response.
823     //
824     NV_ASSERT(mcTeamStatus != NV_ERR_NOT_READY);
825 
826     if (mcTeamStatus != NV_OK)
827         goto installMemDesc;
828 
829     if (mcAddressSize < pMulticastFabricDesc->allocSize)
830     {
831         NV_PRINTF(LEVEL_ERROR,
832                   "Insufficient mcAddressSize returned from Fabric Manager\n");
833         status = NV_ERR_INSUFFICIENT_RESOURCES;
834         goto installMemDesc;
835     }
836 
837     if (!NV_IS_ALIGNED64(mcAddressBase, NV_MEMORY_MULTICAST_FABRIC_PAGE_SIZE_512M))
838     {
839         NV_PRINTF(LEVEL_ERROR,
840                   "Insufficient mcAddressSize returned from Fabric Manager\n");
841         status = NV_ERR_INVALID_ADDRESS;
842         goto installMemDesc;
843     }
844 
845     status = _memMulticastFabricCreateMemDescUnderLock(pMulticastFabricDesc,
846                                                        mcAddressBase, &pMemDesc);
847     if (status != NV_OK)
848     {
849         NV_PRINTF(LEVEL_ERROR, "Failed to allocate fabric memdesc\n");
850         goto installMemDesc;
851     }
852 
853     status = _memorymulticastFabricAllocVasUnderLock(pMulticastFabricDesc,
854                                                      pMemDesc);
855     if (status != NV_OK)
856     {
857         NV_PRINTF(LEVEL_ERROR, "Failed to allocate fabric VAS\n");
858         memdescDestroy(pMemDesc);
859         pMemDesc = NULL;
860         goto installMemDesc;
861     }
862 
863 installMemDesc:
864     _memMulticastFabricInstallMemDescUnderLock(pMulticastFabricDesc,
865                                                pMemDesc,
866                                                mcTeamHandle,
867                                                status);
868 
869      if ((status != NV_OK) && (mcTeamStatus == NV_OK))
870          _memMulticastFabricSendInbandRequestUnderLock(pGpu, pMulticastFabricDesc,
871                                         MEM_MULTICAST_FABRIC_TEAM_RELEASE_REQUEST);
872 
873     return status;
874 }
875 
876 void
877 _memorymulticastfabricDestructUnderLock
878 (
879     MemoryMulticastFabric *pMemoryMulticastFabric
880 )
881 {
882     Memory *pMemory = staticCast(pMemoryMulticastFabric, Memory);
883 
884     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc = \
885         pMemoryMulticastFabric->pMulticastFabricDesc;
886 
887     memDestructCommon(pMemory);
888 
889     _memMulticastFabricDescriptorDequeueWaitUnderLock(pMulticastFabricDesc, pMemory);
890 
891     _memMulticastFabricDescriptorFreeUnderLock(pMulticastFabricDesc);
892 }
893 
894 NV_STATUS
895 memorymulticastfabricTeamSetupResponseCallback
896 (
897     NvU32                                           gpuInstance,
898     NV2080_CTRL_NVLINK_INBAND_RECEIVED_DATA_PARAMS *pInbandRcvParams
899 )
900 {
901     Fabric *pFabric = SYS_GET_FABRIC(SYS_GET_INSTANCE());
902     nvlink_inband_mc_team_setup_rsp_msg_t *pMcTeamSetupRspMsg;
903     nvlink_inband_mc_team_setup_rsp_t *pMcTeamSetupRsp;
904     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc;
905     NvU64 requestId;
906     NV_STATUS mcTeamStatus;
907     NvU64 mcTeamHandle = 0;
908     NvU64 mcAddressBase = 0;
909     NvU64 mcAddressSize = 0;
910     NvU8 *pRsvd = NULL;
911     OBJGPU *pGpu;
912 
913     NV_ASSERT(pInbandRcvParams != NULL);
914     NV_ASSERT(rmGpuLockIsOwner());
915 
916     if ((pGpu = gpumgrGetGpu(gpuInstance)) == NULL)
917     {
918         NV_ASSERT_FAILED("Invalid GPU instance");
919         return NV_ERR_INVALID_ARGUMENT;
920     }
921 
922     pMcTeamSetupRspMsg = \
923         (nvlink_inband_mc_team_setup_rsp_msg_t *)&pInbandRcvParams->data[0];
924 
925     pMcTeamSetupRsp = \
926         (nvlink_inband_mc_team_setup_rsp_t *)&pMcTeamSetupRspMsg->mcTeamSetupRsp;
927 
928     requestId = pMcTeamSetupRspMsg->msgHdr.requestId;
929 
930     mcTeamStatus = pMcTeamSetupRspMsg->msgHdr.status;
931 
932     if (mcTeamStatus == NV_OK)
933     {
934         mcTeamHandle = pMcTeamSetupRsp->mcTeamHandle;
935         mcAddressBase = pMcTeamSetupRsp->mcAddressBase;
936         mcAddressSize = pMcTeamSetupRsp->mcAddressSize;
937 
938         // Make sure that the reserved fields are initialized to 0
939         pRsvd = &pMcTeamSetupRsp->reserved[0];
940 
941         NV_ASSERT((pRsvd[0] == 0) && portMemCmp(pRsvd, pRsvd + 1,
942                   (sizeof(pMcTeamSetupRsp->reserved) - 1)) == 0);
943     }
944 
945     fabricMulticastFabricOpsMutexAcquire(pFabric);
946 
947     pMulticastFabricDesc = \
948         fabricMulticastSetupCacheGetUnderLock_IMPL(pFabric, requestId);
949 
950     if (pMulticastFabricDesc != NULL)
951     {
952         pMulticastFabricDesc->bInbandReqInProgress = NV_FALSE;
953 
954         pMulticastFabricDesc->inbandReqId = 0;
955 
956         fabricMulticastSetupCacheDeleteUnderLock_IMPL(pFabric, requestId);
957 
958         (void)_memMulticastFabricAttachGpuPostProcessorUnderLock(pGpu,
959                                                                  pMulticastFabricDesc,
960                                                                  mcTeamStatus,
961                                                                  mcTeamHandle,
962                                                                  mcAddressBase,
963                                                                  mcAddressSize);
964     }
965     else
966     {
967         OS_WAIT_QUEUE *pWq;
968 
969         if (mcTeamStatus == NV_OK)
970             (void)_memMulticastFabricSendInbandTeamReleaseRequestUnderLock(pGpu,
971                                                                     mcTeamHandle);
972 
973         //
974         // Check if there is any thread waiting for team release and
975         // wake it up.
976         //
977         // The multicast fabric descriptor could have undergone the
978         // destruct sequence while an inband team setup request was in
979         // progress with FM.
980         //
981         // In such a scenario the last thread to free the multicast
982         // descriptor is put to sleep until the team setup response
983         // is received and a subsequent team release request is sent.
984         //
985 
986         pWq = (OS_WAIT_QUEUE *)fabricMulticastCleanupCacheGetUnderLock_IMPL(pFabric,
987                                                                             requestId);
988 
989         if (pWq != NULL)
990              osWakeUp(pWq);
991     }
992 
993     fabricMulticastFabricOpsMutexRelease(pFabric);
994 
995     return NV_OK;
996 }
997 
998 NV_STATUS
999 memorymulticastfabricConstruct_IMPL
1000 (
1001     MemoryMulticastFabric        *pMemoryMulticastFabric,
1002     CALL_CONTEXT                 *pCallContext,
1003     RS_RES_ALLOC_PARAMS_INTERNAL *pParams
1004 )
1005 {
1006     Fabric                       *pFabric      = SYS_GET_FABRIC(SYS_GET_INSTANCE());
1007     NV00FD_ALLOCATION_PARAMETERS *pAllocParams = pParams->pAllocParams;
1008     NV_STATUS                     status       = NV_OK;
1009 
1010     if (RS_IS_COPY_CTOR(pParams))
1011     {
1012         return memorymulticastfabricCopyConstruct_IMPL(pMemoryMulticastFabric,
1013                                                        pCallContext,
1014                                                        pParams);
1015     }
1016 
1017     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, _memMulticastFabricValidateAllocParams(pAllocParams));
1018 
1019     fabricMulticastFabricOpsMutexAcquire(pFabric);
1020 
1021     status = _memMulticastFabricConstructUnderLock(pMemoryMulticastFabric,
1022                                                    pCallContext,
1023                                                    pParams);
1024 
1025     fabricMulticastFabricOpsMutexRelease(pFabric);
1026 
1027     return status;
1028 }
1029 
1030 NV_STATUS
1031 memorymulticastfabricCtrlAttachGpu_IMPL
1032 (
1033     MemoryMulticastFabric         *pMemoryMulticastFabric,
1034     NV00FD_CTRL_ATTACH_GPU_PARAMS *pParams
1035 )
1036 {
1037     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc = \
1038                                 pMemoryMulticastFabric->pMulticastFabricDesc;
1039     NV_STATUS status = NV_OK;
1040     Subdevice *pSubdevice;
1041     OBJGPU *pGpu;
1042     FABRIC_VASPACE *pFabricVAS;
1043     NvU64 gpuProbeHandle;
1044     MEM_MULTICAST_FABRIC_GPU_INFO *pNode = \
1045                                 listTail(&pMulticastFabricDesc->gpuInfoList);
1046 
1047     if (pParams->flags != 0)
1048     {
1049         NV_PRINTF(LEVEL_ERROR, "flags passed for attach mem must be zero\n");
1050         status = NV_ERR_INVALID_ARGUMENT;
1051         goto fail;
1052     }
1053 
1054     // Check if the Multicast FLA object has any additional slots for GPUs
1055     if (pMulticastFabricDesc->numAttachedGpus == pMulticastFabricDesc->numMaxGpus)
1056     {
1057         NV_PRINTF(LEVEL_ERROR, "Max no. of GPUs have already attached!\n");
1058         return NV_ERR_INVALID_OPERATION;
1059     }
1060 
1061     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
1062         subdeviceGetByHandle(RES_GET_CLIENT(pMemoryMulticastFabric),
1063             pParams->hSubdevice, &pSubdevice));
1064 
1065     pGpu = GPU_RES_GET_GPU(pSubdevice);
1066 
1067     if (RMCFG_FEATURE_PLATFORM_WINDOWS ||
1068         gpuIsCCFeatureEnabled(pGpu) ||
1069         IS_VIRTUAL(pGpu))
1070     {
1071         NV_PRINTF(LEVEL_ERROR,
1072                   "Multicast attach not supported on Windows/CC/vGPU modes\n");
1073         status = NV_ERR_NOT_SUPPORTED;
1074         goto fail;
1075     }
1076 
1077     status = gpuFabricProbeGetGpuFabricHandle(pGpu->pGpuFabricProbeInfoKernel,
1078                                               &gpuProbeHandle);
1079     if (status != NV_OK)
1080     {
1081         NV_PRINTF(LEVEL_ERROR,
1082                   "Attaching GPU does not have a valid probe handle\n");
1083         goto fail;
1084     }
1085 
1086     pFabricVAS = dynamicCast(pGpu->pFabricVAS, FABRIC_VASPACE);
1087     if (pFabricVAS == NULL)
1088     {
1089         NV_PRINTF(LEVEL_ERROR,
1090                   "Fabric vaspace object not available for GPU %x\n",
1091                   pGpu->gpuInstance);
1092         status = NV_ERR_NOT_SUPPORTED;
1093         goto fail;
1094     }
1095 
1096     pNode->gpuProbeHandle = gpuProbeHandle;
1097 
1098     if ((pMulticastFabricDesc->numAttachedGpus + 1)  == pMulticastFabricDesc->numMaxGpus)
1099     {
1100         status = _memMulticastFabricSendInbandRequestUnderLock(NULL, pMulticastFabricDesc,
1101                                             MEM_MULTICAST_FABRIC_TEAM_SETUP_REQUEST);
1102         if (status != NV_OK)
1103         {
1104             NV_PRINTF(LEVEL_ERROR,
1105                       "Inband request submission to FM for Multicast Team Setup failed!\n");
1106             goto fail;
1107         }
1108     }
1109 
1110     pMulticastFabricDesc->numAttachedGpus++;
1111     pMulticastFabricDesc->attachedGpusMask |= NVBIT32(pGpu->gpuInstance);
1112 
1113     return NV_OK;
1114 
1115 fail:
1116     // Remove GPU OS info added in the prologue.
1117     listRemove(&pMulticastFabricDesc->gpuInfoList, pNode);
1118 
1119     return status;
1120 }
1121 
1122 static MEM_MULTICAST_FABRIC_GPU_INFO*
1123 _memorymulticastfabricGetAttchedGpuInfo
1124 (
1125     MemoryMulticastFabric  *pMemoryMulticastFabric,
1126     NvHandle                hSubdevice
1127 )
1128 {
1129     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc = \
1130                                 pMemoryMulticastFabric->pMulticastFabricDesc;
1131     MEM_MULTICAST_FABRIC_GPU_INFO *pNodeItr;
1132     Subdevice *pSubdevice = NULL;
1133     NV_STATUS status;
1134 
1135     status = subdeviceGetByHandle(RES_GET_CLIENT(pMemoryMulticastFabric),
1136                                   hSubdevice, &pSubdevice);
1137     if (status != NV_OK)
1138         return NULL;
1139 
1140     for (pNodeItr = listHead(&pMulticastFabricDesc->gpuInfoList);
1141          pNodeItr != NULL;
1142          pNodeItr = listNext(&pMulticastFabricDesc->gpuInfoList, pNodeItr))
1143     {
1144         if (pNodeItr->pGpu == GPU_RES_GET_GPU(pSubdevice))
1145             return pNodeItr;
1146     }
1147 
1148     return NULL;
1149 }
1150 
1151 NV_STATUS
1152 memorymulticastfabricCtrlDetachMem_IMPL
1153 (
1154     MemoryMulticastFabric         *pMemoryMulticastFabric,
1155     NV00FD_CTRL_DETACH_MEM_PARAMS *pParams
1156 )
1157 {
1158     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc = \
1159                                 pMemoryMulticastFabric->pMulticastFabricDesc;
1160     MEM_MULTICAST_FABRIC_GPU_INFO *pGpuInfo;
1161     NODE *pNode;
1162     MEMORY_DESCRIPTOR *pFabricMemDesc;
1163     FABRIC_VASPACE *pFabricVAS;
1164     NV_STATUS status;
1165 
1166     if (pParams->flags != 0)
1167         return NV_ERR_INVALID_ARGUMENT;
1168 
1169     pGpuInfo = _memorymulticastfabricGetAttchedGpuInfo(pMemoryMulticastFabric,
1170                                                        pParams->hSubdevice);
1171     if (pGpuInfo == NULL)
1172         return NV_ERR_INVALID_DEVICE;
1173 
1174     status = btreeSearch(pParams->offset, &pNode, pGpuInfo->pAttachMemInfoTree);
1175     if (status != NV_OK)
1176         return status;
1177 
1178     pFabricMemDesc = pMulticastFabricDesc->pMemDesc;
1179     NV_ASSERT_OR_RETURN(pFabricMemDesc != NULL, NV_ERR_INVALID_STATE);
1180 
1181     pFabricVAS = dynamicCast(pGpuInfo->pGpu->pFabricVAS, FABRIC_VASPACE);
1182     NV_ASSERT_OR_RETURN(pFabricVAS != NULL, NV_ERR_INVALID_STATE);
1183 
1184     _memorymulticastfabricDetachMem(pFabricVAS, pFabricMemDesc, pNode);
1185 
1186     btreeUnlink(pNode, &pGpuInfo->pAttachMemInfoTree);
1187     portMemFree(pNode->Data);
1188 
1189     return NV_OK;
1190 }
1191 
1192 static NV_STATUS
1193 _memorymulticastfabricValidatePhysMem
1194 (
1195     MemoryMulticastFabric *pMemoryMulticastFabric,
1196     NvHandle               hPhysMem,
1197     OBJGPU                *pAttachedGpu,
1198     MEMORY_DESCRIPTOR    **ppPhysMemDesc
1199 )
1200 {
1201     RsResourceRef *pPhysmemRef;
1202     MEMORY_DESCRIPTOR *pPhysMemDesc;
1203     NvU64 physPageSize;
1204     NV_STATUS status;
1205 
1206     status = serverutilGetResourceRef(RES_GET_CLIENT_HANDLE(pMemoryMulticastFabric),
1207                                       hPhysMem, &pPhysmemRef);
1208     if (status != NV_OK)
1209     {
1210         NV_PRINTF(LEVEL_ERROR,
1211                   "Failed to get resource in resserv for physmem handle\n");
1212 
1213         return status;
1214     }
1215 
1216     pPhysMemDesc = (dynamicCast(pPhysmemRef->pResource, Memory))->pMemDesc;
1217 
1218     if (memdescGetAddressSpace(pPhysMemDesc) != ADDR_FBMEM ||
1219         (pAttachedGpu != pPhysMemDesc->pGpu))
1220     {
1221         NV_PRINTF(LEVEL_ERROR, "Invalid physmem handle passed\n");
1222 
1223         return NV_ERR_INVALID_ARGUMENT;
1224     }
1225 
1226     physPageSize = memdescGetPageSize(pPhysMemDesc, AT_GPU);
1227     if ((physPageSize != RM_PAGE_SIZE_HUGE) &&
1228         (physPageSize != RM_PAGE_SIZE_512M))
1229     {
1230         NV_PRINTF(LEVEL_ERROR, "Physmem page size should be 2MB\n");
1231 
1232         return NV_ERR_INVALID_ARGUMENT;
1233     }
1234 
1235     *ppPhysMemDesc = pPhysMemDesc;
1236 
1237     return NV_OK;
1238 }
1239 
1240 NV_STATUS
1241 memorymulticastfabricCtrlAttachMem_IMPL
1242 (
1243     MemoryMulticastFabric         *pMemoryMulticastFabric,
1244     NV00FD_CTRL_ATTACH_MEM_PARAMS *pParams
1245 )
1246 {
1247     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc = \
1248                                 pMemoryMulticastFabric->pMulticastFabricDesc;
1249     MEM_MULTICAST_FABRIC_GPU_INFO *pGpuInfo;
1250     NV_STATUS status;
1251     MEMORY_DESCRIPTOR *pPhysMemDesc;
1252     MEMORY_DESCRIPTOR *pFabricMemDesc;
1253     NvHandle hDupedPhysMem = 0;
1254     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
1255     FABRIC_VASPACE *pFabricVAS;
1256     MEM_MULTICAST_FABRIC_ATTACH_MEM_INFO_NODE *pNode;
1257 
1258     if (pParams->flags != 0)
1259         return NV_ERR_INVALID_ARGUMENT;
1260 
1261     pGpuInfo = _memorymulticastfabricGetAttchedGpuInfo(pMemoryMulticastFabric,
1262                                                        pParams->hSubdevice);
1263     if (pGpuInfo == NULL)
1264         return NV_ERR_INVALID_DEVICE;
1265 
1266     status = _memorymulticastfabricValidatePhysMem(pMemoryMulticastFabric,
1267                                                    pParams->hMemory,
1268                                                    pGpuInfo->pGpu,
1269                                                    &pPhysMemDesc);
1270     if (status != NV_OK)
1271     {
1272         NV_PRINTF(LEVEL_ERROR, "Failed to validate physmem handle\n");
1273         return status;
1274     }
1275 
1276     pFabricVAS = dynamicCast(pGpuInfo->pGpu->pFabricVAS, FABRIC_VASPACE);
1277     NV_ASSERT_OR_RETURN(pFabricVAS != NULL, NV_ERR_INVALID_STATE);
1278 
1279     pFabricMemDesc = pMulticastFabricDesc->pMemDesc;
1280     NV_ASSERT_OR_RETURN(pFabricMemDesc != NULL, NV_ERR_INVALID_STATE);
1281 
1282     NV_ASSERT_OR_RETURN(pGpuInfo->bMcflaAlloc, NV_ERR_INVALID_STATE);
1283 
1284     status = pRmApi->DupObject(pRmApi, pFabricVAS->hClient,
1285                                pFabricVAS->hDevice, &hDupedPhysMem,
1286                                RES_GET_CLIENT_HANDLE(pMemoryMulticastFabric),
1287                                pParams->hMemory, 0);
1288     if (status != NV_OK)
1289     {
1290         NV_PRINTF(LEVEL_ERROR, "Failed to dup physmem handle\n");
1291         return status;
1292     }
1293 
1294     status = fabricvaspaceMapPhysMemdesc(pFabricVAS,
1295                                          pFabricMemDesc,
1296                                          pParams->offset,
1297                                          pPhysMemDesc,
1298                                          pParams->mapOffset,
1299                                          pParams->mapLength,
1300                                          0);
1301     if (status != NV_OK)
1302     {
1303         NV_PRINTF(LEVEL_ERROR, "Failed to map FLA\n");
1304         goto freeDupedMem;
1305     }
1306 
1307     pNode = portMemAllocNonPaged(sizeof(*pNode));
1308     if (pNode == NULL)
1309     {
1310         status = NV_ERR_NO_MEMORY;
1311         goto unmapVas;
1312     }
1313 
1314     portMemSet(pNode, 0, sizeof(*pNode));
1315 
1316     pNode->node.keyStart = pParams->offset;
1317     pNode->node.keyEnd   = pParams->offset;
1318     pNode->physMapLength = pParams->mapLength;
1319     pNode->pPhysMemDesc  = pPhysMemDesc;
1320     pNode->hDupedPhysMem = hDupedPhysMem;
1321     pNode->node.Data     = pNode;
1322 
1323     status = btreeInsert(&pNode->node, &pGpuInfo->pAttachMemInfoTree);
1324     if (status != NV_OK)
1325     {
1326         NV_PRINTF(LEVEL_ERROR, "Failed to track attach mem info\n");
1327         goto freeNode;
1328     }
1329 
1330     return NV_OK;
1331 
1332 freeNode:
1333     portMemFree(pNode);
1334 
1335 unmapVas:
1336     fabricvaspaceUnmapPhysMemdesc(pFabricVAS, pFabricMemDesc, pParams->offset,
1337                                   pPhysMemDesc, pParams->mapLength);
1338 
1339 freeDupedMem:
1340     NV_ASSERT_OK(pRmApi->Free(pRmApi, pFabricVAS->hClient, hDupedPhysMem));
1341 
1342     return status;
1343 }
1344 
1345 void
1346 memorymulticastfabricDestruct_IMPL
1347 (
1348     MemoryMulticastFabric *pMemoryMulticastFabric
1349 )
1350 {
1351     Fabric *pFabric = SYS_GET_FABRIC(SYS_GET_INSTANCE());
1352 
1353     fabricMulticastFabricOpsMutexAcquire(pFabric);
1354 
1355     _memorymulticastfabricDestructUnderLock(pMemoryMulticastFabric);
1356 
1357     fabricMulticastFabricOpsMutexRelease(pFabric);
1358 }
1359 
1360 NvBool
1361 memorymulticastfabricCanCopy_IMPL
1362 (
1363     MemoryMulticastFabric *pMemoryMulticastFabric
1364 )
1365 {
1366     return NV_TRUE;
1367 }
1368 
1369 NV_STATUS
1370 memorymulticastfabricCopyConstruct_IMPL
1371 (
1372     MemoryMulticastFabric        *pMemoryMulticastFabric,
1373     CALL_CONTEXT                 *pCallContext,
1374     RS_RES_ALLOC_PARAMS_INTERNAL *pParams
1375 )
1376 {
1377     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc;
1378     Fabric *pFabric = SYS_GET_FABRIC(SYS_GET_INSTANCE());
1379 
1380     fabricMulticastFabricOpsMutexAcquire(pFabric);
1381 
1382     MemoryMulticastFabric *pSourceMemoryMulticastFabric =
1383         dynamicCast(pParams->pSrcRef->pResource, MemoryMulticastFabric);
1384 
1385     pMulticastFabricDesc = pSourceMemoryMulticastFabric->pMulticastFabricDesc;
1386 
1387     pMemoryMulticastFabric->pMulticastFabricDesc = pMulticastFabricDesc;
1388 
1389     pMulticastFabricDesc->refCount++;
1390 
1391     fabricMulticastFabricOpsMutexRelease(pFabric);
1392 
1393     return NV_OK;
1394 }
1395 
1396 NV_STATUS
1397 memorymulticastfabricCtrlGetInfo_IMPL
1398 (
1399     MemoryMulticastFabric       *pMemoryMulticastFabric,
1400     NV00FD_CTRL_GET_INFO_PARAMS *pParams
1401 )
1402 {
1403     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc;
1404 
1405     pMulticastFabricDesc = pMemoryMulticastFabric->pMulticastFabricDesc;
1406 
1407     pParams->alignment       = pMulticastFabricDesc->alignment;
1408     pParams->allocSize       = pMulticastFabricDesc->allocSize;
1409     pParams->pageSize        = pMulticastFabricDesc->pageSize;
1410     pParams->numMaxGpus      = pMulticastFabricDesc->numMaxGpus;
1411     pParams->numAttachedGpus = pMulticastFabricDesc->numAttachedGpus;
1412 
1413     return NV_OK;
1414 }
1415 
1416 NV_STATUS
1417 memorymulticastfabricIsReady_IMPL
1418 (
1419     MemoryMulticastFabric *pMemoryMulticastFabric,
1420     NvBool                 bCopyConstructorContext
1421 )
1422 {
1423     Fabric *pFabric = SYS_GET_FABRIC(SYS_GET_INSTANCE());
1424     Memory *pMemory = staticCast(pMemoryMulticastFabric, Memory);
1425     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc;
1426     NV_STATUS mcTeamStatus;
1427 
1428     fabricMulticastFabricOpsMutexAcquire(pFabric);
1429 
1430     pMulticastFabricDesc = pMemoryMulticastFabric->pMulticastFabricDesc;
1431     mcTeamStatus = pMulticastFabricDesc->mcTeamStatus;
1432 
1433     if (bCopyConstructorContext && (mcTeamStatus == NV_ERR_NOT_READY))
1434     {
1435         fabricMulticastFabricOpsMutexRelease(pFabric);
1436         return NV_OK;
1437     }
1438 
1439     if (pMemory->pMemDesc != pMulticastFabricDesc->pMemDesc)
1440     {
1441         // This function only initializes pMemory so it should never fail.
1442         NV_ASSERT_OK(memConstructCommon(pMemory,
1443                                         NV_MEMORY_MULTICAST_FABRIC,
1444                                         0, pMulticastFabricDesc->pMemDesc,
1445                                         0, NULL, 0, 0, 0, 0,
1446                                         NVOS32_MEM_TAG_NONE, NULL));
1447     }
1448 
1449     fabricMulticastFabricOpsMutexRelease(pFabric);
1450 
1451     return mcTeamStatus;
1452 }
1453 
1454 NV_STATUS
1455 memorymulticastfabricCtrlRegisterEvent_IMPL
1456 (
1457     MemoryMulticastFabric             *pMemoryMulticastFabric,
1458     NV00FD_CTRL_REGISTER_EVENT_PARAMS *pParams
1459 )
1460 {
1461     Memory    *pMemory = staticCast(pMemoryMulticastFabric, Memory);
1462     NvHandle   hClient = RES_GET_CLIENT_HANDLE(pMemoryMulticastFabric);
1463 
1464     return _memMulticastFabricDescriptorEnqueueWaitUnderLock(hClient,
1465            pMemoryMulticastFabric->pMulticastFabricDesc,
1466            pParams->pOsEvent, pMemory);
1467 }
1468 
1469 NV_STATUS
1470 memorymulticastfabricControl_Prologue_IMPL
1471 (
1472     MemoryMulticastFabric          *pMemoryMulticastFabric,
1473     CALL_CONTEXT                   *pCallContext,
1474     RS_RES_CONTROL_PARAMS_INTERNAL *pParams
1475 )
1476 {
1477     RmResource *pResource = staticCast(pMemoryMulticastFabric, RmResource);
1478 
1479     // Other control calls, nothing to be validated.
1480     if (pParams->cmd != NV00FD_CTRL_CMD_ATTACH_GPU)
1481         return rmresControl_Prologue_IMPL(pResource, pCallContext, pParams);
1482 
1483     return _memMulticastFabricGpuInfoAddUnderLock(pMemoryMulticastFabric, pParams);
1484 }
1485 
1486 NV_STATUS
1487 memorymulticastfabricControl_IMPL
1488 (
1489     MemoryMulticastFabric          *pMemoryMulticastFabric,
1490     CALL_CONTEXT                   *pCallContext,
1491     RS_RES_CONTROL_PARAMS_INTERNAL *pParams
1492 )
1493 {
1494     Fabric *pFabric = SYS_GET_FABRIC(SYS_GET_INSTANCE());
1495     NV_STATUS status = NV_OK;
1496 
1497     if (pParams->cmd != NV00FD_CTRL_CMD_ATTACH_GPU)
1498         status = memorymulticastfabricIsReady(pMemoryMulticastFabric, NV_FALSE);
1499 
1500     //
1501     // If clients try to register when the multicast object
1502     // is ready, then there is nothing left to do as the memory
1503     // descriptor is already installed.
1504     //
1505     // If the status is NV_ERR_NOT_READY then we are yet to
1506     // receive the inband response and we register the event.
1507     //
1508     if (pParams->cmd == NV00FD_CTRL_CMD_REGISTER_EVENT)
1509     {
1510         if (status == NV_OK)
1511             return NV_WARN_NOTHING_TO_DO;
1512 
1513         if (status != NV_ERR_NOT_READY)
1514             return status;
1515     }
1516     else
1517     {
1518         // Clients may busy-loop on this error status, don't log error.
1519         if (status == NV_ERR_NOT_READY)
1520             return status;
1521 
1522         NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, status);
1523     }
1524 
1525     fabricMulticastFabricOpsMutexAcquire(pFabric);
1526 
1527     status = resControl_IMPL(staticCast(pMemoryMulticastFabric, RsResource),
1528                              pCallContext, pParams);
1529 
1530     fabricMulticastFabricOpsMutexRelease(pFabric);
1531 
1532     return status;
1533 }
1534 
1535 NvBool
1536 memorymulticastfabricIsGpuMapAllowed_IMPL
1537 (
1538     MemoryMulticastFabric *pMemoryMulticastFabric,
1539     OBJGPU                *pGpu
1540 )
1541 {
1542     Fabric *pFabric = SYS_GET_FABRIC(SYS_GET_INSTANCE());
1543     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc;
1544     NvU32 attachedGpusMask;
1545 
1546     fabricMulticastFabricOpsMutexAcquire(pFabric);
1547 
1548     pMulticastFabricDesc = pMemoryMulticastFabric->pMulticastFabricDesc;
1549 
1550     attachedGpusMask = pMulticastFabricDesc->attachedGpusMask;
1551 
1552     fabricMulticastFabricOpsMutexRelease(pFabric);
1553 
1554     return ((attachedGpusMask & NVBIT32(pGpu->gpuInstance)) != 0U);
1555 }
1556 
1557 NV_STATUS
1558 memorymulticastfabricGetMapAddrSpace_IMPL
1559 (
1560     MemoryMulticastFabric *pMemoryMulticastFabric,
1561     CALL_CONTEXT          *pCallContext,
1562     NvU32                  mapFlags,
1563     NV_ADDRESS_SPACE      *pAddrSpace
1564 )
1565 {
1566     *pAddrSpace = ADDR_FABRIC_MC;
1567     return NV_OK;
1568 }
1569