1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /******************************************************************************
25  *
26  *   Description:
27  *       This file contains the functions managing the memory multicast fabric
28  *
29  *****************************************************************************/
30 #include "os/os.h"
31 #include "core/locks.h"
32 #include "nvport/nvport.h"
33 #include "rmapi/rs_utils.h"
34 #include "rmapi/rmapi_utils.h"
35 #include "compute/fabric.h"
36 #include "gpu/gpu.h"
37 #include "gpu/bus/kern_bus.h"
38 #include "gpu/mem_mgr/mem_desc.h"
39 #include "gpu/mem_mgr/mem_mgr.h"
40 #include "gpu/subdevice/subdevice.h"
41 #include "kernel/gpu/nvlink/kernel_nvlink.h"
42 #include "mem_mgr/fabric_vaspace.h"
43 #include "mem_mgr/mem_multicast_fabric.h"
44 #include "published/hopper/gh100/dev_mmu.h"
45 
46 #include "gpu/gpu_fabric_probe.h"
47 
48 static
49 NV_STATUS
50 _memMulticastFabricValidateAllocParams
51 (
52     NV00FD_ALLOCATION_PARAMETERS *pAllocParams
53 )
54 {
55     // Only page size 512MB is supported
56     if (pAllocParams->pageSize != NV_MEMORY_MULTICAST_FABRIC_PAGE_SIZE_512M)
57     {
58         NV_PRINTF(LEVEL_ERROR,
59                   "Unsupported pageSize: 0x%x. Only 512MB pagesize is supported\n",
60                   pAllocParams->pageSize);
61         return NV_ERR_INVALID_ARGUMENT;
62     }
63 
64     // Alignment should be pageSize aligned
65     if (!NV_IS_ALIGNED64(pAllocParams->alignment, pAllocParams->pageSize))
66     {
67         NV_PRINTF(LEVEL_ERROR,
68                   "Alignment should be pageSize aligned\n");
69         return NV_ERR_INVALID_ARGUMENT;
70     }
71 
72     // AllocSize should be page size aligned
73     if (!NV_IS_ALIGNED64(pAllocParams->allocSize, pAllocParams->pageSize))
74     {
75         NV_PRINTF(LEVEL_ERROR,
76                   "AllocSize should be pageSize aligned\n");
77         return NV_ERR_INVALID_ARGUMENT;
78     }
79 
80     if (pAllocParams->numGpus == 0)
81     {
82         NV_PRINTF(LEVEL_ERROR,
83                   "Number of GPUs to attach must be non-zero\n");
84         return NV_ERR_INVALID_ARGUMENT;
85     }
86 
87     // Allocation flags must be zero
88     if (pAllocParams->allocFlags != 0)
89     {
90         NV_PRINTF(LEVEL_ERROR, "allocFlags must be zero\n");
91         return NV_ERR_INVALID_ARGUMENT;
92     }
93 
94     return NV_OK;
95 }
96 
97 static
98 MEM_MULTICAST_FABRIC_DESCRIPTOR*
99 _memMulticastFabricDescriptorAllocUnderLock
100 (
101     MemoryMulticastFabric        *pMemoryMulticastFabric,
102     NV00FD_ALLOCATION_PARAMETERS *pAllocParams
103 )
104 {
105     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc;
106 
107     pMulticastFabricDesc = portMemAllocNonPaged(sizeof(MEM_MULTICAST_FABRIC_DESCRIPTOR));
108     if (pMulticastFabricDesc == NULL)
109         return NULL;
110 
111     portMemSet(pMulticastFabricDesc, 0, sizeof(MEM_MULTICAST_FABRIC_DESCRIPTOR));
112 
113     listInit(&pMulticastFabricDesc->waitingClientsList,
114              portMemAllocatorGetGlobalNonPaged());
115 
116     listInit(&pMulticastFabricDesc->gpuInfoList,
117              portMemAllocatorGetGlobalNonPaged());
118 
119     pMulticastFabricDesc->refCount = 1;
120     pMulticastFabricDesc->mcTeamStatus = NV_ERR_NOT_READY;
121     pMulticastFabricDesc->attachedGpusMask = 0;
122     pMulticastFabricDesc->alignment  = pAllocParams->alignment;
123     pMulticastFabricDesc->allocSize  = pAllocParams->allocSize;
124     pMulticastFabricDesc->pageSize   = pAllocParams->pageSize;
125     pMulticastFabricDesc->allocFlags = pAllocParams->allocFlags;
126     pMulticastFabricDesc->numMaxGpus = pAllocParams->numGpus;
127 
128     return pMulticastFabricDesc;
129 }
130 
131 static void
132 _memMulticastFabricDescriptorFlushClientsUnderLock
133 (
134     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc
135 )
136 {
137     MEM_MULTICAST_FABRIC_CLIENT_INFO *pNode;
138 
139     while ((pNode = listHead(&pMulticastFabricDesc->waitingClientsList)) != NULL)
140     {
141         if (pNode->pOsEvent != NULL)
142         {
143             osSetEvent(NULL, pNode->pOsEvent);
144             NV_ASSERT_OK(osDereferenceObjectCount(pNode->pOsEvent));
145         }
146 
147         listRemove(&pMulticastFabricDesc->waitingClientsList, pNode);
148     }
149 
150     return;
151 }
152 
153 static NV_STATUS
154 _memMulticastFabricDescriptorEnqueueWaitUnderLock
155 (
156     NvHandle                         hClient,
157     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc,
158     NvP64                            pOsEvent,
159     Memory                          *pMemory
160 )
161 {
162     MEM_MULTICAST_FABRIC_CLIENT_INFO *pNode;
163     NvP64                             pValidatedOsEvent = NULL;
164     NV_STATUS                         status;
165 
166     if (pOsEvent != NULL)
167     {
168         status = osUserHandleToKernelPtr(hClient, pOsEvent, &pValidatedOsEvent);
169         if (status != NV_OK)
170             return status;
171     }
172 
173     pNode = listAppendNew(&pMulticastFabricDesc->waitingClientsList);
174     if (pNode == NULL)
175     {
176         if (pOsEvent != NULL)
177             osDereferenceObjectCount(pValidatedOsEvent);
178 
179         return NV_ERR_NO_MEMORY;
180     }
181 
182     pNode->pOsEvent = pValidatedOsEvent;
183     pNode->pMemory = pMemory;
184 
185     // In case the multicast object's memdesc is ready, unblock clients waiting on it
186     if (pMulticastFabricDesc->bMemdescInstalled)
187         _memMulticastFabricDescriptorFlushClientsUnderLock(pMulticastFabricDesc);
188 
189     return NV_OK;
190 }
191 
192 static void
193 _memMulticastFabricDescriptorDequeueWaitUnderLock
194 (
195     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc,
196     Memory                          *pMemory
197 )
198 {
199     MEM_MULTICAST_FABRIC_CLIENT_INFO *pNode;
200     MEM_MULTICAST_FABRIC_CLIENT_INFO *pNodeNext;
201 
202     pNode = listHead(&pMulticastFabricDesc->waitingClientsList);
203 
204     // There can be multiple events per memory object, so delete all.
205     while (pNode != NULL)
206     {
207         pNodeNext = listNext(&pMulticastFabricDesc->waitingClientsList, pNode);
208 
209         if (pNode->pMemory == pMemory)
210         {
211             if (pNode->pOsEvent != NULL)
212                 osDereferenceObjectCount(pNode->pOsEvent);
213 
214             listRemove(&pMulticastFabricDesc->waitingClientsList, pNode);
215         }
216 
217         pNode = pNodeNext;
218     }
219 }
220 
221 static NV_STATUS
222 _memMulticastFabricGpuInfoAddUnderLock
223 (
224     MemoryMulticastFabric          *pMemoryMulticastFabric,
225     RS_RES_CONTROL_PARAMS_INTERNAL *pParams
226 )
227 {
228     NV00FD_CTRL_ATTACH_GPU_PARAMS *pAttachParams = pParams->pParams;
229     Subdevice *pSubdevice = NULL;
230     MEM_MULTICAST_FABRIC_GPU_INFO *pNode;
231     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc = \
232         pMemoryMulticastFabric->pMulticastFabricDesc;
233     OBJGPU *pGpu;
234     MEM_MULTICAST_FABRIC_GPU_INFO *pNodeItr;
235 
236     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
237         subdeviceGetByHandle(RES_GET_CLIENT(pMemoryMulticastFabric),
238             pAttachParams->hSubdevice, &pSubdevice));
239 
240     pGpu = GPU_RES_GET_GPU(pSubdevice);
241 
242     if(!osMatchGpuOsInfo(pGpu, pParams->secInfo.gpuOsInfo))
243         return NV_ERR_INVALID_DEVICE;
244 
245     for (pNodeItr = listHead(&pMulticastFabricDesc->gpuInfoList);
246          pNodeItr != NULL;
247          pNodeItr = listNext(&pMulticastFabricDesc->gpuInfoList, pNodeItr))
248     {
249         if (pNodeItr->pGpu == pGpu)
250         {
251            NV_PRINTF(LEVEL_ERROR, "GPU %x has already attached\n",
252                                    pGpu->gpuInstance);
253            return NV_ERR_IN_USE;
254         }
255     }
256 
257     pNode = listAppendNew(&pMulticastFabricDesc->gpuInfoList);
258     if (pNode == NULL)
259         return NV_ERR_NO_MEMORY;
260 
261     pNode->pGpuOsInfo = pParams->secInfo.gpuOsInfo;
262     pNode->pGpu = GPU_RES_GET_GPU(pSubdevice);
263     pNode->pAttachMemInfoTree = NULL;
264     pNode->bMcflaAlloc = NV_FALSE;
265 
266     return NV_OK;
267 }
268 
269 static void
270 _memMulticastFabricGpuInfoRemoveUnderLock
271 (
272     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc
273 )
274 {
275     MEM_MULTICAST_FABRIC_GPU_INFO *pNode = NULL;
276     THREAD_STATE_NODE *pThreadNode = NULL;
277     THREAD_STATE_FREE_CALLBACK freeCallback;
278 
279     NV_ASSERT_OK(threadStateGetCurrent(&pThreadNode, NULL));
280 
281     while ((pNode = listHead(&pMulticastFabricDesc->gpuInfoList)) != NULL)
282     {
283         freeCallback.pCb = osReleaseGpuOsInfo;
284         freeCallback.pCbData = (void *)pNode->pGpuOsInfo;
285 
286         NV_ASSERT_OK(threadStateEnqueueCallbackOnFree(pThreadNode, &freeCallback));
287         listRemove(&pMulticastFabricDesc->gpuInfoList, pNode);
288     }
289 
290     pMulticastFabricDesc->attachedGpusMask = 0;
291     pMulticastFabricDesc->numAttachedGpus = 0;
292 }
293 
294 NV_STATUS
295 _memMulticastFabricSendInbandTeamSetupRequestV1UnderLock
296 (
297     OBJGPU                          *pGpu,
298     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc
299 )
300 {
301     Fabric *pFabric = SYS_GET_FABRIC(SYS_GET_INSTANCE());
302     NV2080_CTRL_NVLINK_INBAND_SEND_DATA_PARAMS *sendDataParams;
303     nvlink_inband_mc_team_setup_req_msg_t *pMcTeamSetupReqMsg = NULL;
304     nvlink_inband_mc_team_setup_req_t *pMcTeamSetupReq = NULL;
305     MEM_MULTICAST_FABRIC_GPU_INFO *pNode;
306     NvU64 requestId;
307     NvU32 idx = 0;
308     NvU32 payloadSize;
309     NvU32 sendDataSize;
310     NV_STATUS status = NV_OK;
311 
312     sendDataParams = \
313         (NV2080_CTRL_NVLINK_INBAND_SEND_DATA_PARAMS *)
314         portMemAllocNonPaged(sizeof(NV2080_CTRL_NVLINK_INBAND_SEND_DATA_PARAMS));
315 
316     if (sendDataParams == NULL)
317         return NV_ERR_NO_MEMORY;
318 
319     pMcTeamSetupReqMsg = \
320         (nvlink_inband_mc_team_setup_req_msg_t *)&sendDataParams->buffer[0];
321 
322     pMcTeamSetupReq = \
323         (nvlink_inband_mc_team_setup_req_t *)&pMcTeamSetupReqMsg->mcTeamSetupReq;
324 
325     payloadSize = (NvU32)(sizeof(nvlink_inband_mc_team_setup_req_t) + \
326                       (sizeof(pMcTeamSetupReq->gpuHandles[0]) * pMulticastFabricDesc->numMaxGpus));
327 
328     sendDataSize = (NvU32)(sizeof(nvlink_inband_msg_header_t) + payloadSize);
329 
330     NV_ASSERT((NvU32)sendDataSize <= sizeof(sendDataParams->buffer));
331 
332     portMemSet(sendDataParams, 0, sendDataSize);
333 
334     pMcTeamSetupReq->mcAllocSize = pMulticastFabricDesc->allocSize;
335     pMcTeamSetupReq->numGpuHandles = pMulticastFabricDesc->numMaxGpus;
336 
337     for (pNode = listHead(&pMulticastFabricDesc->gpuInfoList);
338          pNode != NULL;
339          pNode = listNext(&pMulticastFabricDesc->gpuInfoList, pNode))
340         pMcTeamSetupReq->gpuHandles[idx++] = pNode->gpuProbeHandle;
341 
342     NV_ASSERT(idx == pMcTeamSetupReq->numGpuHandles);
343 
344     sendDataParams->dataSize = sendDataSize;
345 
346     status = fabricInitInbandMsgHdr(&pMcTeamSetupReqMsg->msgHdr,
347                                     NVLINK_INBAND_MSG_TYPE_MC_TEAM_SETUP_REQ,
348                                     payloadSize);
349 
350     if (status != NV_OK)
351         goto done;
352 
353     requestId = pMcTeamSetupReqMsg->msgHdr.requestId;
354 
355     status = fabricMulticastSetupCacheInsertUnderLock_IMPL(pFabric,
356                                                            requestId,
357                                                            pMulticastFabricDesc);
358     if (status != NV_OK)
359         goto done;
360 
361     status = knvlinkSendInbandData(pGpu, GPU_GET_KERNEL_NVLINK(pGpu), sendDataParams);
362     if (status != NV_OK)
363     {
364         fabricMulticastSetupCacheDeleteUnderLock_IMPL(pFabric, requestId);
365         goto done;
366     }
367 
368     pMulticastFabricDesc->bInbandReqInProgress = NV_TRUE;
369     pMulticastFabricDesc->inbandReqId = requestId;
370 
371 done:
372     portMemFree(sendDataParams);
373 
374     return status;
375 }
376 
377 NV_STATUS
378 _memMulticastFabricSendInbandTeamReleaseRequestV1UnderLock
379 (
380     OBJGPU *pGpu,
381     NvU64   mcTeamHandle
382 )
383 {
384     NV2080_CTRL_NVLINK_INBAND_SEND_DATA_PARAMS *sendDataParams;
385     nvlink_inband_mc_team_release_req_msg_t *pMcTeamReleaseReqMsg = NULL;
386     nvlink_inband_mc_team_release_req_t *pMcTeamReleaseReq = NULL;
387     NvU32 payloadSize;
388     NvU32 sendDataSize;
389     NV_STATUS status = NV_OK;
390 
391     sendDataParams = \
392         (NV2080_CTRL_NVLINK_INBAND_SEND_DATA_PARAMS *)portMemAllocNonPaged(sizeof(NV2080_CTRL_NVLINK_INBAND_SEND_DATA_PARAMS));
393 
394     if (sendDataParams == NULL)
395         return NV_ERR_NO_MEMORY;
396 
397     pMcTeamReleaseReqMsg = \
398         (nvlink_inband_mc_team_release_req_msg_t *)&sendDataParams->buffer[0];
399 
400     pMcTeamReleaseReq = \
401         (nvlink_inband_mc_team_release_req_t *)&pMcTeamReleaseReqMsg->mcTeamReleaseReq;
402 
403     payloadSize = (NvU32)(sizeof(nvlink_inband_mc_team_release_req_t));
404 
405     sendDataSize = (NvU32)(sizeof(nvlink_inband_msg_header_t) + payloadSize);
406 
407     portMemSet(sendDataParams, 0, sendDataSize);
408 
409     pMcTeamReleaseReq->mcTeamHandle = mcTeamHandle;
410 
411     status = fabricInitInbandMsgHdr(&pMcTeamReleaseReqMsg->msgHdr,
412                                     NVLINK_INBAND_MSG_TYPE_MC_TEAM_RELEASE_REQ,
413                                     payloadSize);
414     if (status != NV_OK)
415         goto done;
416 
417     sendDataParams->dataSize = sendDataSize;
418 
419     status = knvlinkSendInbandData(pGpu, GPU_GET_KERNEL_NVLINK(pGpu), sendDataParams);
420 
421 done:
422     portMemFree(sendDataParams);
423 
424     return status;
425 }
426 
427 NV_STATUS
428 _memMulticastFabricSendInbandTeamSetupRequestUnderlock
429 (
430     OBJGPU                          *pGpu,
431     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc
432 )
433 {
434     NvU64 fmCaps;
435     NV_STATUS status = NV_OK;
436 
437     status = gpuFabricProbeGetfmCaps(pGpu->pGpuFabricProbeInfoKernel, &fmCaps);
438     if (status != NV_OK)
439         return status;
440 
441     if (!(fmCaps & NVLINK_INBAND_FM_CAPS_MC_TEAM_SETUP_V1))
442         return NV_ERR_NOT_SUPPORTED;
443 
444     return _memMulticastFabricSendInbandTeamSetupRequestV1UnderLock(pGpu,
445                                                                     pMulticastFabricDesc);
446 }
447 
448 NV_STATUS
449 _memMulticastFabricSendInbandTeamReleaseRequestUnderLock
450 (
451     OBJGPU *pGpu,
452     NvU64   mcTeamHandle
453 )
454 {
455     NvU64 fmCaps;
456     NV_STATUS status = NV_OK;
457 
458     status = gpuFabricProbeGetfmCaps(pGpu->pGpuFabricProbeInfoKernel, &fmCaps);
459     if (status != NV_OK)
460         return status;
461 
462     if (!(fmCaps & NVLINK_INBAND_FM_CAPS_MC_TEAM_RELEASE_V1))
463         return NV_ERR_NOT_SUPPORTED;
464 
465     return _memMulticastFabricSendInbandTeamReleaseRequestV1UnderLock(pGpu,
466                                                                       mcTeamHandle);
467 }
468 
469 NV_STATUS
470 _memMulticastFabricSendInbandRequestUnderLock
471 (
472     OBJGPU                            *pGpu,
473     MEM_MULTICAST_FABRIC_DESCRIPTOR   *pMulticastFabricDesc,
474     MEM_MULTICAST_FABRIC_REQUEST_TYPE  requestType
475 )
476 {
477     NV_STATUS status = NV_OK;
478 
479     // If pGpu is NULL, pick the first one attached to the object.
480     if (pGpu == NULL)
481         pGpu = listHead(&pMulticastFabricDesc->gpuInfoList)->pGpu;
482 
483     switch (requestType)
484     {
485         case MEM_MULTICAST_FABRIC_TEAM_SETUP_REQUEST:
486             status = _memMulticastFabricSendInbandTeamSetupRequestUnderlock(pGpu,
487                                                                 pMulticastFabricDesc);
488             break;
489         case MEM_MULTICAST_FABRIC_TEAM_RELEASE_REQUEST:
490             status = _memMulticastFabricSendInbandTeamReleaseRequestUnderLock(pGpu,
491                                                    pMulticastFabricDesc->mcTeamHandle);
492             break;
493         default:
494             status = NV_ERR_NOT_SUPPORTED;
495             break;
496     }
497 
498     return status;
499 }
500 
501 static void
502 _memorymulticastfabricDetachMem
503 (
504     FABRIC_VASPACE     *pFabricVAS,
505     MEMORY_DESCRIPTOR  *pFabricMemDesc,
506     NODE               *pMemNode
507 )
508 {
509     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
510     MEMORY_DESCRIPTOR *pPhysMemDesc;
511     MEM_MULTICAST_FABRIC_ATTACH_MEM_INFO_NODE *pAttachMemInfoNode;
512 
513     pAttachMemInfoNode = \
514         (MEM_MULTICAST_FABRIC_ATTACH_MEM_INFO_NODE *)pMemNode->Data;
515     pPhysMemDesc = pAttachMemInfoNode->pPhysMemDesc;
516 
517     fabricvaspaceUnmapPhysMemdesc(pFabricVAS, pFabricMemDesc,
518                                   pMemNode->keyStart,
519                                   pPhysMemDesc,
520                                   pAttachMemInfoNode->physMapLength);
521 
522     NV_ASSERT_OK(pRmApi->Free(pRmApi, pFabricVAS->hClient,
523                               pAttachMemInfoNode->hDupedPhysMem));
524 }
525 
526 static void
527 _memorymulticastfabricBatchDetachMem
528 (
529     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc
530 )
531 {
532     MEMORY_DESCRIPTOR *pFabricMemDesc;
533     MEM_MULTICAST_FABRIC_GPU_INFO *pGpuNode;
534     NODE *pMemNode;
535     FABRIC_VASPACE *pFabricVAS;
536 
537     pFabricMemDesc = pMulticastFabricDesc->pMemDesc;
538     NV_ASSERT_OR_RETURN_VOID(pFabricMemDesc != NULL);
539 
540     for (pGpuNode = listHead(&pMulticastFabricDesc->gpuInfoList);
541          pGpuNode != NULL;
542          pGpuNode = listNext(&pMulticastFabricDesc->gpuInfoList, pGpuNode))
543     {
544         pFabricVAS = dynamicCast(pGpuNode->pGpu->pFabricVAS, FABRIC_VASPACE);
545         if (pFabricVAS == NULL)
546         {
547             NV_ASSERT(0);
548             continue;
549         }
550 
551         btreeEnumStart(0, &pMemNode, pGpuNode->pAttachMemInfoTree);
552         while (pMemNode != NULL)
553         {
554             _memorymulticastfabricDetachMem(pFabricVAS, pFabricMemDesc, pMemNode);
555 
556             btreeUnlink(pMemNode, &pGpuNode->pAttachMemInfoTree);
557             portMemFree(pMemNode->Data);
558 
559             btreeEnumStart(0, &pMemNode, pGpuNode->pAttachMemInfoTree);
560         }
561 
562         // Everything is detached during object cleanup, free MCFLA now.
563         if (pGpuNode->bMcflaAlloc)
564         {
565             fabricvaspaceBatchFree(pFabricVAS, &pFabricMemDesc->_pteArray[0],
566                                    1, 1);
567             pGpuNode->bMcflaAlloc = NV_FALSE;
568         }
569     }
570 }
571 
572 static void
573 _memMulticastFabricDescriptorFreeUnderLock
574 (
575     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc
576 )
577 {
578     if (pMulticastFabricDesc == NULL)
579         return;
580 
581     pMulticastFabricDesc->refCount--;
582 
583     if (pMulticastFabricDesc->refCount == 0)
584     {
585         if (pMulticastFabricDesc->pMemDesc != NULL)
586         {
587             NV_ASSERT(pMulticastFabricDesc->bMemdescInstalled);
588 
589             _memorymulticastfabricBatchDetachMem(pMulticastFabricDesc);
590 
591             _memMulticastFabricSendInbandRequestUnderLock(NULL, pMulticastFabricDesc,
592                                                           MEM_MULTICAST_FABRIC_TEAM_RELEASE_REQUEST);
593         }
594 
595         if (pMulticastFabricDesc->bInbandReqInProgress)
596         {
597             Fabric *pFabric = SYS_GET_FABRIC(SYS_GET_INSTANCE());
598             OS_WAIT_QUEUE *pWq;
599             THREAD_STATE_NODE *pThreadNode = NULL;
600             THREAD_STATE_FREE_CALLBACK freeCallback;
601 
602             fabricMulticastSetupCacheDeleteUnderLock_IMPL(pFabric,
603                                                           pMulticastFabricDesc->inbandReqId);
604 
605             NV_ASSERT_OK(osAllocWaitQueue(&pWq));
606 
607             if (pWq != NULL)
608             {
609                 NV_ASSERT_OK(fabricMulticastCleanupCacheInsertUnderLock_IMPL(pFabric,
610                                                                              pMulticastFabricDesc->inbandReqId,
611                                                                              pWq));
612 
613                 NV_ASSERT_OK(threadStateGetCurrent(&pThreadNode, NULL));
614 
615                 freeCallback.pCb = fabricMulticastWaitOnTeamCleanupCallback;
616                 freeCallback.pCbData = (void *)pMulticastFabricDesc->inbandReqId;
617 
618                 NV_ASSERT_OK(threadStateEnqueueCallbackOnFree(pThreadNode, &freeCallback));
619             }
620         }
621 
622         _memMulticastFabricGpuInfoRemoveUnderLock(pMulticastFabricDesc);
623 
624         NV_ASSERT(listCount(&pMulticastFabricDesc->gpuInfoList) == 0);
625         listDestroy(&pMulticastFabricDesc->gpuInfoList);
626 
627         NV_ASSERT(pMulticastFabricDesc->numAttachedGpus == 0);
628         NV_ASSERT(pMulticastFabricDesc->attachedGpusMask == 0);
629 
630         NV_ASSERT(listCount(&pMulticastFabricDesc->waitingClientsList) == 0);
631         listDestroy(&pMulticastFabricDesc->waitingClientsList);
632 
633         memdescDestroy(pMulticastFabricDesc->pMemDesc);
634 
635         portMemFree(pMulticastFabricDesc);
636     }
637 }
638 
639 NV_STATUS
640 _memMulticastFabricConstructUnderLock
641 (
642     MemoryMulticastFabric        *pMemoryMulticastFabric,
643     CALL_CONTEXT                 *pCallContext,
644     RS_RES_ALLOC_PARAMS_INTERNAL *pParams
645 )
646 {
647     Memory                           *pMemory       = staticCast(pMemoryMulticastFabric, Memory);
648     NV00FD_ALLOCATION_PARAMETERS     *pAllocParams  = pParams->pAllocParams;
649     MEM_MULTICAST_FABRIC_DESCRIPTOR  *pMulticastFabricDesc;
650     NV_STATUS                         status        = NV_OK;
651 
652     pMulticastFabricDesc = _memMulticastFabricDescriptorAllocUnderLock(pMemoryMulticastFabric,
653                                                                        pAllocParams);
654 
655     if (pMulticastFabricDesc == NULL)
656         return NV_ERR_NO_MEMORY;
657 
658     status = _memMulticastFabricDescriptorEnqueueWaitUnderLock(pParams->hClient,
659                                                                pMulticastFabricDesc,
660                                                                pAllocParams->pOsEvent,
661                                                                pMemory);
662     if (status != NV_OK)
663         goto fail;
664 
665     pMemoryMulticastFabric->pMulticastFabricDesc = pMulticastFabricDesc;
666 
667     return NV_OK;
668 
669 fail:
670     _memMulticastFabricDescriptorFreeUnderLock(pMulticastFabricDesc);
671 
672     return status;
673 }
674 
675 NV_STATUS
676 _memMulticastFabricCreateMemDescUnderLock
677 (
678     MEM_MULTICAST_FABRIC_DESCRIPTOR  *pMulticastFabricDesc,
679     NvU64                             mcAddressBase,
680     MEMORY_DESCRIPTOR               **ppMemDesc
681 )
682 {
683     NV_STATUS status;
684     MEMORY_DESCRIPTOR *pTempMemDesc = NULL;
685 
686     status = memdescCreate(&pTempMemDesc, NULL, pMulticastFabricDesc->allocSize,
687                            0, NV_TRUE, ADDR_FABRIC_MC, NV_MEMORY_UNCACHED,
688                            MEMDESC_FLAGS_NONE);
689     if (status != NV_OK)
690     {
691         NV_PRINTF(LEVEL_ERROR,
692                   "Failed to allocate memory descriptor for multicast object\n");
693         return status;
694     }
695 
696     memdescSetPte(pTempMemDesc, AT_GPU, 0, mcAddressBase);
697 
698     memdescSetPageSize(pTempMemDesc, AT_GPU, pMulticastFabricDesc->pageSize);
699 
700     pTempMemDesc->_pteKind = NV_MMU_PTE_KIND_SMSKED_MESSAGE;
701 
702     memdescSetFlag(pTempMemDesc, MEMDESC_FLAGS_SET_KIND, NV_TRUE);
703 
704     memdescSetGpuCacheAttrib(pTempMemDesc, NV_MEMORY_UNCACHED);
705 
706     *ppMemDesc = pTempMemDesc;
707 
708     return NV_OK;
709 }
710 
711 void
712 _memMulticastFabricInstallMemDescUnderLock
713 (
714     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc,
715     MEMORY_DESCRIPTOR               *pMemDesc,
716     NvU64                            mcTeamHandle,
717     NV_STATUS                        status
718 )
719 {
720     NV_ASSERT(pMulticastFabricDesc->pMemDesc == NULL);
721 
722     pMulticastFabricDesc->pMemDesc = pMemDesc;
723     pMulticastFabricDesc->bMemdescInstalled = NV_TRUE;
724     pMulticastFabricDesc->mcTeamHandle = mcTeamHandle;
725     pMulticastFabricDesc->mcTeamStatus = status;
726 
727     _memMulticastFabricDescriptorFlushClientsUnderLock(pMulticastFabricDesc);
728 }
729 
730 static NV_STATUS
731 _memorymulticastFabricAllocVasUnderLock
732 (
733     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc,
734     MEMORY_DESCRIPTOR               *pFabricMemDesc
735 )
736 {
737     NV_STATUS status = NV_OK;
738     FABRIC_VASPACE *pFabricVAS;
739     MEM_MULTICAST_FABRIC_GPU_INFO *pGpuInfo;
740     VAS_ALLOC_FLAGS flags = { 0 };
741     NvU64 gpuProbeHandle;
742 
743     for (pGpuInfo = listHead(&pMulticastFabricDesc->gpuInfoList);
744          pGpuInfo != NULL;
745          pGpuInfo = listNext(&pMulticastFabricDesc->gpuInfoList, pGpuInfo))
746     {
747         OBJGPU *pGpu = pGpuInfo->pGpu;
748 
749         pFabricVAS = dynamicCast(pGpu->pFabricVAS, FABRIC_VASPACE);
750         if (pFabricVAS == NULL)
751         {
752             status = NV_ERR_INVALID_STATE;
753             goto cleanup;
754         }
755 
756         //
757         // The fabric handle might not be available or have changed, if fabric
758         // state was ever invalidated while MCFLA allocation was in progress.
759         //
760         status = gpuFabricProbeGetGpuFabricHandle(pGpu->pGpuFabricProbeInfoKernel,
761                                                   &gpuProbeHandle);
762         if ((status != NV_OK) || (pGpuInfo->gpuProbeHandle != gpuProbeHandle))
763         {
764             NV_PRINTF(LEVEL_ERROR, "Attached GPU's probe handle is stale\n");
765             status = NV_ERR_INVALID_DEVICE;
766             goto cleanup;
767         }
768 
769         status = fabricvaspaceAllocMulticast(pFabricVAS,
770                                     memdescGetPageSize(pFabricMemDesc, AT_GPU),
771                                     pMulticastFabricDesc->alignment,
772                                     flags, pFabricMemDesc->_pteArray[0],
773                                     pMulticastFabricDesc->allocSize);
774         if (status != NV_OK)
775         {
776             NV_PRINTF(LEVEL_ERROR,
777                       "Fabric VA space alloc failed for GPU %d\n",
778                       pGpuInfo->pGpu->gpuInstance);
779             goto cleanup;
780         }
781 
782         pGpuInfo->bMcflaAlloc = NV_TRUE;
783     }
784 
785     return NV_OK;
786 
787 cleanup:
788     for (pGpuInfo = listHead(&pMulticastFabricDesc->gpuInfoList);
789          pGpuInfo != NULL;
790          pGpuInfo = listNext(&pMulticastFabricDesc->gpuInfoList, pGpuInfo))
791     {
792         if (pGpuInfo->bMcflaAlloc)
793         {
794             pFabricVAS = dynamicCast(pGpuInfo->pGpu->pFabricVAS, FABRIC_VASPACE);
795 
796             fabricvaspaceBatchFree(pFabricVAS, &pFabricMemDesc->_pteArray[0],
797                                    1, 1);
798 
799             pGpuInfo->bMcflaAlloc = NV_FALSE;
800         }
801     }
802 
803     return status;
804 }
805 
806 NV_STATUS
807 _memMulticastFabricAttachGpuPostProcessorUnderLock
808 (
809     OBJGPU                          *pGpu,
810     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc,
811     NV_STATUS                        mcTeamStatus,
812     NvU64                            mcTeamHandle,
813     NvU64                            mcAddressBase,
814     NvU64                            mcAddressSize
815 )
816 {
817     NV_STATUS status = mcTeamStatus;
818     MEMORY_DESCRIPTOR *pMemDesc = NULL;
819 
820     //
821     // FM is never expected to return NV_ERR_NOT_READY
822     // as part of the inband response.
823     //
824     NV_ASSERT(mcTeamStatus != NV_ERR_NOT_READY);
825 
826     if (mcTeamStatus != NV_OK)
827         goto installMemDesc;
828 
829     if (mcAddressSize < pMulticastFabricDesc->allocSize)
830     {
831         NV_PRINTF(LEVEL_ERROR,
832                   "Insufficient mcAddressSize returned from Fabric Manager\n");
833         status = NV_ERR_INSUFFICIENT_RESOURCES;
834         goto installMemDesc;
835     }
836 
837     if (!NV_IS_ALIGNED64(mcAddressBase, NV_MEMORY_MULTICAST_FABRIC_PAGE_SIZE_512M))
838     {
839         NV_PRINTF(LEVEL_ERROR,
840                   "Insufficient mcAddressSize returned from Fabric Manager\n");
841         status = NV_ERR_INVALID_ADDRESS;
842         goto installMemDesc;
843     }
844 
845     status = _memMulticastFabricCreateMemDescUnderLock(pMulticastFabricDesc,
846                                                        mcAddressBase, &pMemDesc);
847     if (status != NV_OK)
848     {
849         NV_PRINTF(LEVEL_ERROR, "Failed to allocate fabric memdesc\n");
850         goto installMemDesc;
851     }
852 
853     status = _memorymulticastFabricAllocVasUnderLock(pMulticastFabricDesc,
854                                                      pMemDesc);
855     if (status != NV_OK)
856     {
857         NV_PRINTF(LEVEL_ERROR, "Failed to allocate fabric VAS\n");
858         memdescDestroy(pMemDesc);
859         pMemDesc = NULL;
860         goto installMemDesc;
861     }
862 
863 installMemDesc:
864     _memMulticastFabricInstallMemDescUnderLock(pMulticastFabricDesc,
865                                                pMemDesc,
866                                                mcTeamHandle,
867                                                status);
868 
869      if ((status != NV_OK) && (mcTeamStatus == NV_OK))
870          _memMulticastFabricSendInbandRequestUnderLock(pGpu, pMulticastFabricDesc,
871                                         MEM_MULTICAST_FABRIC_TEAM_RELEASE_REQUEST);
872 
873     return status;
874 }
875 
876 void
877 _memorymulticastfabricDestructUnderLock
878 (
879     MemoryMulticastFabric *pMemoryMulticastFabric
880 )
881 {
882     Memory *pMemory = staticCast(pMemoryMulticastFabric, Memory);
883 
884     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc = \
885         pMemoryMulticastFabric->pMulticastFabricDesc;
886 
887     memDestructCommon(pMemory);
888 
889     _memMulticastFabricDescriptorDequeueWaitUnderLock(pMulticastFabricDesc, pMemory);
890 
891     _memMulticastFabricDescriptorFreeUnderLock(pMulticastFabricDesc);
892 }
893 
894 NV_STATUS
895 memorymulticastfabricTeamSetupResponseCallback
896 (
897     NvU32                                           gpuInstance,
898     NV2080_CTRL_NVLINK_INBAND_RECEIVED_DATA_PARAMS *pInbandRcvParams
899 )
900 {
901     Fabric *pFabric = SYS_GET_FABRIC(SYS_GET_INSTANCE());
902     nvlink_inband_mc_team_setup_rsp_msg_t *pMcTeamSetupRspMsg;
903     nvlink_inband_mc_team_setup_rsp_t *pMcTeamSetupRsp;
904     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc;
905     NvU64 requestId;
906     NV_STATUS mcTeamStatus;
907     NvU64 mcTeamHandle = 0;
908     NvU64 mcAddressBase = 0;
909     NvU64 mcAddressSize = 0;
910     NvU8 *pRsvd = NULL;
911     OBJGPU *pGpu;
912 
913     NV_ASSERT(pInbandRcvParams != NULL);
914     NV_ASSERT(rmGpuLockIsOwner());
915 
916     if ((pGpu = gpumgrGetGpu(gpuInstance)) == NULL)
917     {
918         NV_ASSERT_FAILED("Invalid GPU instance");
919         return NV_ERR_INVALID_ARGUMENT;
920     }
921 
922     pMcTeamSetupRspMsg = \
923         (nvlink_inband_mc_team_setup_rsp_msg_t *)&pInbandRcvParams->data[0];
924 
925     pMcTeamSetupRsp = \
926         (nvlink_inband_mc_team_setup_rsp_t *)&pMcTeamSetupRspMsg->mcTeamSetupRsp;
927 
928     requestId = pMcTeamSetupRspMsg->msgHdr.requestId;
929 
930     mcTeamStatus = pMcTeamSetupRspMsg->msgHdr.status;
931 
932     if (mcTeamStatus == NV_OK)
933     {
934         mcTeamHandle = pMcTeamSetupRsp->mcTeamHandle;
935         mcAddressBase = pMcTeamSetupRsp->mcAddressBase;
936         mcAddressSize = pMcTeamSetupRsp->mcAddressSize;
937 
938         // Make sure that the reserved fields are initialized to 0
939         pRsvd = &pMcTeamSetupRsp->reserved[0];
940 
941         NV_ASSERT((pRsvd[0] == 0) && portMemCmp(pRsvd, pRsvd + 1,
942                   (sizeof(pMcTeamSetupRsp->reserved) - 1)) == 0);
943     }
944 
945     fabricMulticastFabricOpsMutexAcquire(pFabric);
946 
947     pMulticastFabricDesc = \
948         fabricMulticastSetupCacheGetUnderLock_IMPL(pFabric, requestId);
949 
950     if (pMulticastFabricDesc != NULL)
951     {
952         pMulticastFabricDesc->bInbandReqInProgress = NV_FALSE;
953 
954         pMulticastFabricDesc->inbandReqId = 0;
955 
956         fabricMulticastSetupCacheDeleteUnderLock_IMPL(pFabric, requestId);
957 
958         (void)_memMulticastFabricAttachGpuPostProcessorUnderLock(pGpu,
959                                                                  pMulticastFabricDesc,
960                                                                  mcTeamStatus,
961                                                                  mcTeamHandle,
962                                                                  mcAddressBase,
963                                                                  mcAddressSize);
964     }
965     else
966     {
967         OS_WAIT_QUEUE *pWq;
968 
969         if (mcTeamStatus == NV_OK)
970             (void)_memMulticastFabricSendInbandTeamReleaseRequestUnderLock(pGpu,
971                                                                     mcTeamHandle);
972 
973         //
974         // Check if there is any thread waiting for team release and
975         // wake it up.
976         //
977         // The multicast fabric descriptor could have undergone the
978         // destruct sequence while an inband team setup request was in
979         // progress with FM.
980         //
981         // In such a scenario the last thread to free the multicast
982         // descriptor is put to sleep until the team setup response
983         // is received and a subsequent team release request is sent.
984         //
985 
986         pWq = (OS_WAIT_QUEUE *)fabricMulticastCleanupCacheGetUnderLock_IMPL(pFabric,
987                                                                             requestId);
988 
989         if (pWq != NULL)
990              osWakeUp(pWq);
991     }
992 
993     fabricMulticastFabricOpsMutexRelease(pFabric);
994 
995     return NV_OK;
996 }
997 
998 NV_STATUS
999 memorymulticastfabricConstruct_IMPL
1000 (
1001     MemoryMulticastFabric        *pMemoryMulticastFabric,
1002     CALL_CONTEXT                 *pCallContext,
1003     RS_RES_ALLOC_PARAMS_INTERNAL *pParams
1004 )
1005 {
1006     Fabric                       *pFabric      = SYS_GET_FABRIC(SYS_GET_INSTANCE());
1007     NV00FD_ALLOCATION_PARAMETERS *pAllocParams = pParams->pAllocParams;
1008     NV_STATUS                     status       = NV_OK;
1009 
1010     if (RS_IS_COPY_CTOR(pParams))
1011     {
1012         return memorymulticastfabricCopyConstruct_IMPL(pMemoryMulticastFabric,
1013                                                        pCallContext,
1014                                                        pParams);
1015     }
1016 
1017     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, _memMulticastFabricValidateAllocParams(pAllocParams));
1018 
1019     fabricMulticastFabricOpsMutexAcquire(pFabric);
1020 
1021     status = _memMulticastFabricConstructUnderLock(pMemoryMulticastFabric,
1022                                                    pCallContext,
1023                                                    pParams);
1024 
1025     fabricMulticastFabricOpsMutexRelease(pFabric);
1026 
1027     return status;
1028 }
1029 
1030 static NV_STATUS
1031 _memorymulticastfabricCtrlAttachGpu
1032 (
1033     MemoryMulticastFabric         *pMemoryMulticastFabric,
1034     NV00FD_CTRL_ATTACH_GPU_PARAMS *pParams
1035 )
1036 {
1037     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc = \
1038                                 pMemoryMulticastFabric->pMulticastFabricDesc;
1039     NV_STATUS status = NV_OK;
1040     Subdevice *pSubdevice;
1041     OBJGPU *pGpu;
1042     FABRIC_VASPACE *pFabricVAS;
1043     NvU64 gpuProbeHandle;
1044     MEM_MULTICAST_FABRIC_GPU_INFO *pNode = NULL;
1045     CALL_CONTEXT *pCallContext = resservGetTlsCallContext();
1046 
1047     if (pParams->flags != 0)
1048     {
1049         NV_PRINTF(LEVEL_ERROR, "flags passed for attach mem must be zero\n");
1050         return NV_ERR_INVALID_ARGUMENT;
1051     }
1052 
1053     // Check if the Multicast FLA object has any additional slots for GPUs
1054     if (pMulticastFabricDesc->numAttachedGpus == pMulticastFabricDesc->numMaxGpus)
1055     {
1056         NV_PRINTF(LEVEL_ERROR, "Max no. of GPUs have already attached!\n");
1057         return NV_ERR_INVALID_OPERATION;
1058     }
1059 
1060     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
1061         subdeviceGetByHandle(RES_GET_CLIENT(pMemoryMulticastFabric),
1062             pParams->hSubdevice, &pSubdevice));
1063 
1064     pGpu = GPU_RES_GET_GPU(pSubdevice);
1065 
1066     if (RMCFG_FEATURE_PLATFORM_WINDOWS ||
1067         gpuIsCCFeatureEnabled(pGpu) ||
1068         IS_VIRTUAL(pGpu))
1069     {
1070         NV_PRINTF(LEVEL_ERROR,
1071                   "Multicast attach not supported on Windows/CC/vGPU modes\n");
1072         return NV_ERR_NOT_SUPPORTED;
1073     }
1074 
1075     status = _memMulticastFabricGpuInfoAddUnderLock(pMemoryMulticastFabric,
1076                                                     pCallContext->pControlParams);
1077     if (status != NV_OK)
1078     {
1079         NV_PRINTF(LEVEL_ERROR, "Failed to populate GPU info\n");
1080         return status;
1081     }
1082 
1083     pNode = listTail(&pMulticastFabricDesc->gpuInfoList);
1084 
1085     status = gpuFabricProbeGetGpuFabricHandle(pGpu->pGpuFabricProbeInfoKernel,
1086                                               &gpuProbeHandle);
1087     if (status != NV_OK)
1088     {
1089         NV_PRINTF(LEVEL_ERROR,
1090                   "Attaching GPU does not have a valid probe handle\n");
1091         goto fail;
1092     }
1093 
1094     pFabricVAS = dynamicCast(pGpu->pFabricVAS, FABRIC_VASPACE);
1095     if (pFabricVAS == NULL)
1096     {
1097         NV_PRINTF(LEVEL_ERROR,
1098                   "Fabric vaspace object not available for GPU %x\n",
1099                   pGpu->gpuInstance);
1100         status = NV_ERR_NOT_SUPPORTED;
1101         goto fail;
1102     }
1103 
1104     pNode->gpuProbeHandle = gpuProbeHandle;
1105 
1106     if ((pMulticastFabricDesc->numAttachedGpus + 1)  == pMulticastFabricDesc->numMaxGpus)
1107     {
1108         status = _memMulticastFabricSendInbandRequestUnderLock(NULL, pMulticastFabricDesc,
1109                                             MEM_MULTICAST_FABRIC_TEAM_SETUP_REQUEST);
1110         if (status != NV_OK)
1111         {
1112             NV_PRINTF(LEVEL_ERROR,
1113                       "Inband request submission to FM for Multicast Team Setup failed!\n");
1114             goto fail;
1115         }
1116     }
1117 
1118     pMulticastFabricDesc->numAttachedGpus++;
1119     pMulticastFabricDesc->attachedGpusMask |= NVBIT32(pGpu->gpuInstance);
1120 
1121     return NV_OK;
1122 
1123 fail:
1124     // Remove GPU OS info added in the prologue.
1125     listRemove(&pMulticastFabricDesc->gpuInfoList, pNode);
1126 
1127     return status;
1128 }
1129 
1130 NV_STATUS
1131 memorymulticastfabricCtrlAttachGpu_IMPL
1132 (
1133     MemoryMulticastFabric         *pMemoryMulticastFabric,
1134     NV00FD_CTRL_ATTACH_GPU_PARAMS *pParams
1135 )
1136 {
1137     Fabric *pFabric = SYS_GET_FABRIC(SYS_GET_INSTANCE());
1138     NV_STATUS status = NV_OK;
1139 
1140     fabricMulticastFabricOpsMutexAcquire(pFabric);
1141 
1142     status = _memorymulticastfabricCtrlAttachGpu(pMemoryMulticastFabric,
1143                                                  pParams);
1144 
1145     fabricMulticastFabricOpsMutexRelease(pFabric);
1146 
1147     return status;
1148 }
1149 
1150 static MEM_MULTICAST_FABRIC_GPU_INFO*
1151 _memorymulticastfabricGetAttchedGpuInfo
1152 (
1153     MemoryMulticastFabric  *pMemoryMulticastFabric,
1154     NvHandle                hSubdevice
1155 )
1156 {
1157     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc = \
1158                                 pMemoryMulticastFabric->pMulticastFabricDesc;
1159     MEM_MULTICAST_FABRIC_GPU_INFO *pNodeItr;
1160     Subdevice *pSubdevice = NULL;
1161     NV_STATUS status;
1162 
1163     status = subdeviceGetByHandle(RES_GET_CLIENT(pMemoryMulticastFabric),
1164                                   hSubdevice, &pSubdevice);
1165     if (status != NV_OK)
1166         return NULL;
1167 
1168     for (pNodeItr = listHead(&pMulticastFabricDesc->gpuInfoList);
1169          pNodeItr != NULL;
1170          pNodeItr = listNext(&pMulticastFabricDesc->gpuInfoList, pNodeItr))
1171     {
1172         if (pNodeItr->pGpu == GPU_RES_GET_GPU(pSubdevice))
1173             return pNodeItr;
1174     }
1175 
1176     return NULL;
1177 }
1178 
1179 static NV_STATUS
1180 _memorymulticastfabricCtrlDetachMem
1181 (
1182     MemoryMulticastFabric         *pMemoryMulticastFabric,
1183     NV00FD_CTRL_DETACH_MEM_PARAMS *pParams
1184 )
1185 {
1186     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc = \
1187                                 pMemoryMulticastFabric->pMulticastFabricDesc;
1188     MEM_MULTICAST_FABRIC_GPU_INFO *pGpuInfo;
1189     NODE *pNode;
1190     MEMORY_DESCRIPTOR *pFabricMemDesc;
1191     FABRIC_VASPACE *pFabricVAS;
1192     NV_STATUS status;
1193 
1194     if (pParams->flags != 0)
1195         return NV_ERR_INVALID_ARGUMENT;
1196 
1197     pGpuInfo = _memorymulticastfabricGetAttchedGpuInfo(pMemoryMulticastFabric,
1198                                                        pParams->hSubdevice);
1199     if (pGpuInfo == NULL)
1200         return NV_ERR_INVALID_DEVICE;
1201 
1202     status = btreeSearch(pParams->offset, &pNode, pGpuInfo->pAttachMemInfoTree);
1203     if (status != NV_OK)
1204         return status;
1205 
1206     pFabricMemDesc = pMulticastFabricDesc->pMemDesc;
1207     NV_ASSERT_OR_RETURN(pFabricMemDesc != NULL, NV_ERR_INVALID_STATE);
1208 
1209     pFabricVAS = dynamicCast(pGpuInfo->pGpu->pFabricVAS, FABRIC_VASPACE);
1210     NV_ASSERT_OR_RETURN(pFabricVAS != NULL, NV_ERR_INVALID_STATE);
1211 
1212     _memorymulticastfabricDetachMem(pFabricVAS, pFabricMemDesc, pNode);
1213 
1214     btreeUnlink(pNode, &pGpuInfo->pAttachMemInfoTree);
1215     portMemFree(pNode->Data);
1216 
1217     return NV_OK;
1218 }
1219 
1220 NV_STATUS
1221 memorymulticastfabricCtrlDetachMem_IMPL
1222 (
1223     MemoryMulticastFabric         *pMemoryMulticastFabric,
1224     NV00FD_CTRL_DETACH_MEM_PARAMS *pParams
1225 )
1226 {
1227     Fabric *pFabric = SYS_GET_FABRIC(SYS_GET_INSTANCE());
1228     NV_STATUS status = NV_OK;
1229 
1230     fabricMulticastFabricOpsMutexAcquire(pFabric);
1231 
1232     status = _memorymulticastfabricCtrlDetachMem(pMemoryMulticastFabric,
1233                                                  pParams);
1234 
1235     fabricMulticastFabricOpsMutexRelease(pFabric);
1236 
1237     return status;
1238 }
1239 
1240 static NV_STATUS
1241 _memorymulticastfabricValidatePhysMem
1242 (
1243     MemoryMulticastFabric *pMemoryMulticastFabric,
1244     NvHandle               hPhysMem,
1245     OBJGPU                *pAttachedGpu,
1246     MEMORY_DESCRIPTOR    **ppPhysMemDesc
1247 )
1248 {
1249     RsResourceRef *pPhysmemRef;
1250     MEMORY_DESCRIPTOR *pPhysMemDesc;
1251     NvU64 physPageSize;
1252     NV_STATUS status;
1253     Memory *pMemory;
1254 
1255     status = serverutilGetResourceRef(RES_GET_CLIENT_HANDLE(pMemoryMulticastFabric),
1256                                       hPhysMem, &pPhysmemRef);
1257     if (status != NV_OK)
1258     {
1259         NV_PRINTF(LEVEL_ERROR,
1260                   "Failed to get resource in resserv for physmem handle\n");
1261 
1262         return status;
1263     }
1264 
1265     pMemory = dynamicCast(pPhysmemRef->pResource, Memory);
1266     if (pMemory == NULL)
1267     {
1268         NV_PRINTF(LEVEL_ERROR, "Invalid memory handle\n");
1269         return NV_ERR_INVALID_OBJECT_HANDLE;
1270     }
1271 
1272     pPhysMemDesc = pMemory->pMemDesc;
1273     if (pPhysMemDesc == NULL)
1274     {
1275         NV_PRINTF(LEVEL_ERROR, "Invalid memory handle\n");
1276         return NV_ERR_INVALID_OBJECT_HANDLE;
1277     }
1278 
1279     if (memdescGetAddressSpace(pPhysMemDesc) != ADDR_FBMEM ||
1280         (pAttachedGpu != pPhysMemDesc->pGpu))
1281     {
1282         NV_PRINTF(LEVEL_ERROR, "Invalid physmem handle passed\n");
1283 
1284         return NV_ERR_INVALID_ARGUMENT;
1285     }
1286 
1287     physPageSize = memdescGetPageSize(pPhysMemDesc, AT_GPU);
1288     if ((physPageSize != RM_PAGE_SIZE_HUGE) &&
1289         (physPageSize != RM_PAGE_SIZE_512M))
1290     {
1291         NV_PRINTF(LEVEL_ERROR, "Physmem page size should be 2MB\n");
1292 
1293         return NV_ERR_INVALID_ARGUMENT;
1294     }
1295 
1296     *ppPhysMemDesc = pPhysMemDesc;
1297 
1298     return NV_OK;
1299 }
1300 
1301 static NV_STATUS
1302 _memorymulticastfabricCtrlAttachMem
1303 (
1304     MemoryMulticastFabric         *pMemoryMulticastFabric,
1305     NV00FD_CTRL_ATTACH_MEM_PARAMS *pParams
1306 )
1307 {
1308     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc = \
1309                                 pMemoryMulticastFabric->pMulticastFabricDesc;
1310     MEM_MULTICAST_FABRIC_GPU_INFO *pGpuInfo;
1311     NV_STATUS status;
1312     MEMORY_DESCRIPTOR *pPhysMemDesc;
1313     MEMORY_DESCRIPTOR *pFabricMemDesc;
1314     NvHandle hDupedPhysMem = 0;
1315     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
1316     FABRIC_VASPACE *pFabricVAS;
1317     MEM_MULTICAST_FABRIC_ATTACH_MEM_INFO_NODE *pNode;
1318 
1319     if (pParams->flags != 0)
1320         return NV_ERR_INVALID_ARGUMENT;
1321 
1322     pGpuInfo = _memorymulticastfabricGetAttchedGpuInfo(pMemoryMulticastFabric,
1323                                                        pParams->hSubdevice);
1324     if (pGpuInfo == NULL)
1325         return NV_ERR_INVALID_DEVICE;
1326 
1327     status = _memorymulticastfabricValidatePhysMem(pMemoryMulticastFabric,
1328                                                    pParams->hMemory,
1329                                                    pGpuInfo->pGpu,
1330                                                    &pPhysMemDesc);
1331     if (status != NV_OK)
1332     {
1333         NV_PRINTF(LEVEL_ERROR, "Failed to validate physmem handle\n");
1334         return status;
1335     }
1336 
1337     pFabricVAS = dynamicCast(pGpuInfo->pGpu->pFabricVAS, FABRIC_VASPACE);
1338     NV_ASSERT_OR_RETURN(pFabricVAS != NULL, NV_ERR_INVALID_STATE);
1339 
1340     pFabricMemDesc = pMulticastFabricDesc->pMemDesc;
1341     NV_ASSERT_OR_RETURN(pFabricMemDesc != NULL, NV_ERR_INVALID_STATE);
1342 
1343     NV_ASSERT_OR_RETURN(pGpuInfo->bMcflaAlloc, NV_ERR_INVALID_STATE);
1344 
1345     status = pRmApi->DupObject(pRmApi, pFabricVAS->hClient,
1346                                pFabricVAS->hDevice, &hDupedPhysMem,
1347                                RES_GET_CLIENT_HANDLE(pMemoryMulticastFabric),
1348                                pParams->hMemory, 0);
1349     if (status != NV_OK)
1350     {
1351         NV_PRINTF(LEVEL_ERROR, "Failed to dup physmem handle\n");
1352         return status;
1353     }
1354 
1355     status = fabricvaspaceMapPhysMemdesc(pFabricVAS,
1356                                          pFabricMemDesc,
1357                                          pParams->offset,
1358                                          pPhysMemDesc,
1359                                          pParams->mapOffset,
1360                                          pParams->mapLength,
1361                                          0);
1362     if (status != NV_OK)
1363     {
1364         NV_PRINTF(LEVEL_ERROR, "Failed to map FLA\n");
1365         goto freeDupedMem;
1366     }
1367 
1368     pNode = portMemAllocNonPaged(sizeof(*pNode));
1369     if (pNode == NULL)
1370     {
1371         status = NV_ERR_NO_MEMORY;
1372         goto unmapVas;
1373     }
1374 
1375     portMemSet(pNode, 0, sizeof(*pNode));
1376 
1377     pNode->node.keyStart = pParams->offset;
1378     pNode->node.keyEnd   = pParams->offset;
1379     pNode->physMapLength = pParams->mapLength;
1380     pNode->pPhysMemDesc  = pPhysMemDesc;
1381     pNode->hDupedPhysMem = hDupedPhysMem;
1382     pNode->node.Data     = pNode;
1383 
1384     status = btreeInsert(&pNode->node, &pGpuInfo->pAttachMemInfoTree);
1385     if (status != NV_OK)
1386     {
1387         NV_PRINTF(LEVEL_ERROR, "Failed to track attach mem info\n");
1388         goto freeNode;
1389     }
1390 
1391     return NV_OK;
1392 
1393 freeNode:
1394     portMemFree(pNode);
1395 
1396 unmapVas:
1397     fabricvaspaceUnmapPhysMemdesc(pFabricVAS, pFabricMemDesc, pParams->offset,
1398                                   pPhysMemDesc, pParams->mapLength);
1399 
1400 freeDupedMem:
1401     NV_ASSERT_OK(pRmApi->Free(pRmApi, pFabricVAS->hClient, hDupedPhysMem));
1402 
1403     return status;
1404 }
1405 
1406 NV_STATUS
1407 memorymulticastfabricCtrlAttachMem_IMPL
1408 (
1409     MemoryMulticastFabric         *pMemoryMulticastFabric,
1410     NV00FD_CTRL_ATTACH_MEM_PARAMS *pParams
1411 )
1412 {
1413     Fabric *pFabric = SYS_GET_FABRIC(SYS_GET_INSTANCE());
1414     NV_STATUS status = NV_OK;
1415 
1416     fabricMulticastFabricOpsMutexAcquire(pFabric);
1417 
1418     status = _memorymulticastfabricCtrlAttachMem(pMemoryMulticastFabric,
1419                                                  pParams);
1420 
1421     fabricMulticastFabricOpsMutexRelease(pFabric);
1422 
1423     return status;
1424 }
1425 
1426 void
1427 memorymulticastfabricDestruct_IMPL
1428 (
1429     MemoryMulticastFabric *pMemoryMulticastFabric
1430 )
1431 {
1432     Fabric *pFabric = SYS_GET_FABRIC(SYS_GET_INSTANCE());
1433 
1434     fabricMulticastFabricOpsMutexAcquire(pFabric);
1435 
1436     _memorymulticastfabricDestructUnderLock(pMemoryMulticastFabric);
1437 
1438     fabricMulticastFabricOpsMutexRelease(pFabric);
1439 }
1440 
1441 NvBool
1442 memorymulticastfabricCanCopy_IMPL
1443 (
1444     MemoryMulticastFabric *pMemoryMulticastFabric
1445 )
1446 {
1447     return NV_TRUE;
1448 }
1449 
1450 NV_STATUS
1451 memorymulticastfabricCopyConstruct_IMPL
1452 (
1453     MemoryMulticastFabric        *pMemoryMulticastFabric,
1454     CALL_CONTEXT                 *pCallContext,
1455     RS_RES_ALLOC_PARAMS_INTERNAL *pParams
1456 )
1457 {
1458     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc;
1459     Fabric *pFabric = SYS_GET_FABRIC(SYS_GET_INSTANCE());
1460 
1461     fabricMulticastFabricOpsMutexAcquire(pFabric);
1462 
1463     MemoryMulticastFabric *pSourceMemoryMulticastFabric =
1464         dynamicCast(pParams->pSrcRef->pResource, MemoryMulticastFabric);
1465 
1466     pMulticastFabricDesc = pSourceMemoryMulticastFabric->pMulticastFabricDesc;
1467 
1468     pMemoryMulticastFabric->pMulticastFabricDesc = pMulticastFabricDesc;
1469 
1470     pMulticastFabricDesc->refCount++;
1471 
1472     fabricMulticastFabricOpsMutexRelease(pFabric);
1473 
1474     return NV_OK;
1475 }
1476 
1477 static NV_STATUS
1478 _memorymulticastfabricCtrlGetInfo
1479 (
1480     MemoryMulticastFabric       *pMemoryMulticastFabric,
1481     NV00FD_CTRL_GET_INFO_PARAMS *pParams
1482 )
1483 {
1484     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc;
1485 
1486     pMulticastFabricDesc = pMemoryMulticastFabric->pMulticastFabricDesc;
1487 
1488     pParams->alignment       = pMulticastFabricDesc->alignment;
1489     pParams->allocSize       = pMulticastFabricDesc->allocSize;
1490     pParams->pageSize        = pMulticastFabricDesc->pageSize;
1491     pParams->numMaxGpus      = pMulticastFabricDesc->numMaxGpus;
1492     pParams->numAttachedGpus = pMulticastFabricDesc->numAttachedGpus;
1493 
1494     return NV_OK;
1495 }
1496 
1497 NV_STATUS
1498 memorymulticastfabricCtrlGetInfo_IMPL
1499 (
1500     MemoryMulticastFabric       *pMemoryMulticastFabric,
1501     NV00FD_CTRL_GET_INFO_PARAMS *pParams
1502 )
1503 {
1504     Fabric *pFabric = SYS_GET_FABRIC(SYS_GET_INSTANCE());
1505     NV_STATUS status = NV_OK;
1506 
1507     fabricMulticastFabricOpsMutexAcquire(pFabric);
1508 
1509     status = _memorymulticastfabricCtrlGetInfo(pMemoryMulticastFabric,
1510                                                pParams);
1511 
1512     fabricMulticastFabricOpsMutexRelease(pFabric);
1513 
1514     return status;
1515 }
1516 
1517 NV_STATUS
1518 memorymulticastfabricIsReady_IMPL
1519 (
1520     MemoryMulticastFabric *pMemoryMulticastFabric,
1521     NvBool                 bCopyConstructorContext
1522 )
1523 {
1524     Fabric *pFabric = SYS_GET_FABRIC(SYS_GET_INSTANCE());
1525     Memory *pMemory = staticCast(pMemoryMulticastFabric, Memory);
1526     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc;
1527     NV_STATUS mcTeamStatus;
1528 
1529     fabricMulticastFabricOpsMutexAcquire(pFabric);
1530 
1531     pMulticastFabricDesc = pMemoryMulticastFabric->pMulticastFabricDesc;
1532     mcTeamStatus = pMulticastFabricDesc->mcTeamStatus;
1533 
1534     if (bCopyConstructorContext && (mcTeamStatus == NV_ERR_NOT_READY))
1535     {
1536         fabricMulticastFabricOpsMutexRelease(pFabric);
1537         return NV_OK;
1538     }
1539 
1540     if (pMemory->pMemDesc != pMulticastFabricDesc->pMemDesc)
1541     {
1542         // This function only initializes pMemory so it should never fail.
1543         NV_ASSERT_OK(memConstructCommon(pMemory,
1544                                         NV_MEMORY_MULTICAST_FABRIC,
1545                                         0, pMulticastFabricDesc->pMemDesc,
1546                                         0, NULL, 0, 0, 0, 0,
1547                                         NVOS32_MEM_TAG_NONE, NULL));
1548     }
1549 
1550     fabricMulticastFabricOpsMutexRelease(pFabric);
1551 
1552     return mcTeamStatus;
1553 }
1554 
1555 static NV_STATUS
1556 _memorymulticastfabricCtrlRegisterEvent
1557 (
1558     MemoryMulticastFabric             *pMemoryMulticastFabric,
1559     NV00FD_CTRL_REGISTER_EVENT_PARAMS *pParams
1560 )
1561 {
1562     Memory    *pMemory = staticCast(pMemoryMulticastFabric, Memory);
1563     NvHandle   hClient = RES_GET_CLIENT_HANDLE(pMemoryMulticastFabric);
1564 
1565     return _memMulticastFabricDescriptorEnqueueWaitUnderLock(hClient,
1566            pMemoryMulticastFabric->pMulticastFabricDesc,
1567            pParams->pOsEvent, pMemory);
1568 }
1569 
1570 NV_STATUS
1571 memorymulticastfabricCtrlRegisterEvent_IMPL
1572 (
1573     MemoryMulticastFabric             *pMemoryMulticastFabric,
1574     NV00FD_CTRL_REGISTER_EVENT_PARAMS *pParams
1575 )
1576 {
1577     Fabric *pFabric = SYS_GET_FABRIC(SYS_GET_INSTANCE());
1578     NV_STATUS status = NV_OK;
1579 
1580     fabricMulticastFabricOpsMutexAcquire(pFabric);
1581 
1582     status = _memorymulticastfabricCtrlRegisterEvent(pMemoryMulticastFabric,
1583                                                      pParams);
1584 
1585     fabricMulticastFabricOpsMutexRelease(pFabric);
1586 
1587     return status;
1588 }
1589 
1590 NV_STATUS
1591 memorymulticastfabricControl_IMPL
1592 (
1593     MemoryMulticastFabric          *pMemoryMulticastFabric,
1594     CALL_CONTEXT                   *pCallContext,
1595     RS_RES_CONTROL_PARAMS_INTERNAL *pParams
1596 )
1597 {
1598     NV_STATUS status = NV_OK;
1599 
1600     if (pParams->cmd != NV00FD_CTRL_CMD_ATTACH_GPU)
1601         status = memorymulticastfabricIsReady(pMemoryMulticastFabric, NV_FALSE);
1602 
1603     //
1604     // If clients try to register when the multicast object
1605     // is ready, then there is nothing left to do as the memory
1606     // descriptor is already installed.
1607     //
1608     // If the status is NV_ERR_NOT_READY then we are yet to
1609     // receive the inband response and we register the event.
1610     //
1611     if (pParams->cmd == NV00FD_CTRL_CMD_REGISTER_EVENT)
1612     {
1613         if (status == NV_OK)
1614             return NV_WARN_NOTHING_TO_DO;
1615 
1616         if (status != NV_ERR_NOT_READY)
1617             return status;
1618     }
1619     else
1620     {
1621         // Clients may busy-loop on this error status, don't log error.
1622         if (status == NV_ERR_NOT_READY)
1623             return status;
1624 
1625         NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, status);
1626     }
1627 
1628     //
1629     // Note: GPU lock(s) is required for some control calls. Thus, it is
1630     // incorrect to take the leaf lock here. resControl_IMPL() attempts to
1631     // acquire the GPU locks before it calls the control call body.
1632     //
1633     return resControl_IMPL(staticCast(pMemoryMulticastFabric, RsResource),
1634                            pCallContext, pParams);
1635 }
1636 
1637 NvBool
1638 memorymulticastfabricIsGpuMapAllowed_IMPL
1639 (
1640     MemoryMulticastFabric *pMemoryMulticastFabric,
1641     OBJGPU                *pGpu
1642 )
1643 {
1644     Fabric *pFabric = SYS_GET_FABRIC(SYS_GET_INSTANCE());
1645     MEM_MULTICAST_FABRIC_DESCRIPTOR *pMulticastFabricDesc;
1646     NvU32 attachedGpusMask;
1647 
1648     fabricMulticastFabricOpsMutexAcquire(pFabric);
1649 
1650     pMulticastFabricDesc = pMemoryMulticastFabric->pMulticastFabricDesc;
1651 
1652     attachedGpusMask = pMulticastFabricDesc->attachedGpusMask;
1653 
1654     fabricMulticastFabricOpsMutexRelease(pFabric);
1655 
1656     return ((attachedGpusMask & NVBIT32(pGpu->gpuInstance)) != 0U);
1657 }
1658 
1659 NV_STATUS
1660 memorymulticastfabricGetMapAddrSpace_IMPL
1661 (
1662     MemoryMulticastFabric *pMemoryMulticastFabric,
1663     CALL_CONTEXT          *pCallContext,
1664     NvU32                  mapFlags,
1665     NV_ADDRESS_SPACE      *pAddrSpace
1666 )
1667 {
1668     *pAddrSpace = ADDR_FABRIC_MC;
1669     return NV_OK;
1670 }
1671