1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #define NVOC_KERNEL_MIG_MANAGER_H_PRIVATE_ACCESS_ALLOWED
25 
26 // FIXME XXX
27 #define NVOC_KERNEL_GRAPHICS_MANAGER_H_PRIVATE_ACCESS_ALLOWED
28 #define NVOC_GPU_INSTANCE_SUBSCRIPTION_H_PRIVATE_ACCESS_ALLOWED
29 #define NVOC_COMPUTE_INSTANCE_SUBSCRIPTION_H_PRIVATE_ACCESS_ALLOWED
30 #define NVOC_KERNEL_NVLINK_H_PRIVATE_ACCESS_ALLOWED
31 
32 #include "kernel/gpu/mig_mgr/kernel_mig_manager.h"
33 #include "kernel/gpu/gr/kernel_graphics.h"
34 #include "kernel/gpu/rc/kernel_rc.h"
35 #include "kernel/gpu/subdevice/subdevice.h"
36 #include "kernel/gpu/mig_mgr/compute_instance_subscription.h"
37 #include "kernel/gpu/mig_mgr/gpu_instance_subscription.h"
38 #include "kernel/gpu/mem_mgr/mem_mgr.h"
39 #include "kernel/gpu/mem_sys/kern_mem_sys.h"
40 #include "kernel/gpu/ce/kernel_ce.h"
41 #include "kernel/gpu/mem_mgr/mem_mgr.h"
42 #include "kernel/gpu/mmu/kern_gmmu.h"
43 #include "kernel/gpu/mem_mgr/heap.h"
44 #include "kernel/gpu/nvlink/kernel_nvlink.h"
45 #include "kernel/gpu/gpu_engine_type.h"
46 #include "kernel/gpu/gpu_fabric_probe.h"
47 #include "rmapi/client.h"
48 #include "rmapi/rs_utils.h"
49 #include "rmapi/rmapi_utils.h"
50 #include "gpu/mem_mgr/mem_scrub.h"
51 #include "vgpu/rpc.h"
52 #include "virtualization/kernel_vgpu_mgr.h"
53 #include "kernel/gpu/gr/kernel_graphics_manager.h"
54 #include "kernel/gpu/gr/kernel_graphics.h"
55 #include "kernel/core/locks.h"
56 #include "class/cl503b.h"
57 #include "nv_ref.h"
58 #include "nvRmReg.h"
59 
60 #include "kernel/gpu/ccu/kernel_ccu.h"
61 
62 struct KERNEL_MIG_MANAGER_PRIVATE_DATA
63 {
64     NvBool bInitialized;
65     KERNEL_MIG_MANAGER_STATIC_INFO staticInfo;
66 };
67 
68 /*!
69  * @brief   Function to increment gi/ci refcount
70  */
71 NV_STATUS
72 kmigmgrIncRefCount_IMPL
73 (
74     RsShared *pShared
75 )
76 {
77     NvS32 refCount;
78 
79     NV_ASSERT_OR_RETURN(pShared != NULL, NV_ERR_INVALID_ARGUMENT);
80 
81     serverRefShare(&g_resServ, pShared);
82     refCount = serverGetShareRefCount(&g_resServ, pShared);
83 
84     // Make sure refCount didn't overflow
85     NV_ASSERT_OR_RETURN(refCount > 0, NV_ERR_INVALID_STATE);
86     return NV_OK;
87 }
88 
89 /*!
90  * @brief   Function to decrement gi/ci refcount
91  */
92 NV_STATUS
93 kmigmgrDecRefCount_IMPL
94 (
95     RsShared *pShared
96 )
97 {
98     NvS32 refCount;
99 
100     NV_ASSERT_OR_RETURN(pShared != NULL, NV_ERR_INVALID_ARGUMENT);
101 
102     refCount = serverGetShareRefCount(&g_resServ, pShared);
103     serverFreeShare(&g_resServ, pShared);
104     --refCount;
105 
106     // Make sure refCount didn't underflow
107     NV_ASSERT_OR_RETURN(refCount > 0, NV_ERR_INVALID_STATE);
108     return NV_OK;
109 }
110 
111 /*! @brief create a reference to a single GPU instance, no compute instance */
112 MIG_INSTANCE_REF
113 kmigmgrMakeGIReference_IMPL
114 (
115     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance
116 )
117 {
118     MIG_INSTANCE_REF ref = { pKernelMIGGpuInstance, NULL };
119     return ref;
120 }
121 
122 /*! @brief create a reference to a compute instance */
123 MIG_INSTANCE_REF
124 kmigmgrMakeCIReference_IMPL
125 (
126     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance,
127     MIG_COMPUTE_INSTANCE *pMIGComputeInstance
128 )
129 {
130     MIG_INSTANCE_REF ref = { pKernelMIGGpuInstance, pMIGComputeInstance };
131     return ref;
132 }
133 
134 /*! @brief create a Ref referencing no GI/CI */
135 MIG_INSTANCE_REF
136 kmigmgrMakeNoMIGReference_IMPL(void)
137 {
138     MIG_INSTANCE_REF ref = { NULL, NULL };
139     return ref;
140 }
141 
142 /*! @brief check if MIG attribution id is valid for max instances */
143 NvBool
144 kmigmgrIsInstanceAttributionIdValid_IMPL
145 (
146     NvU16 id
147 )
148 {
149     return (((id / KMIGMGR_MAX_GPU_SWIZZID) <= KMIGMGR_MAX_GPU_INSTANCES) &&
150             ((id % KMIGMGR_MAX_GPU_SWIZZID) <= KMIGMGR_MAX_COMPUTE_INSTANCES));
151 }
152 
153 /*! @brief check if existing valid instance ref is passed in */
154 NvBool
155 kmigmgrIsMIGReferenceValid_IMPL
156 (
157     MIG_INSTANCE_REF *pRef
158 )
159 {
160     // Invalid argument
161     NV_CHECK_OR_RETURN(LEVEL_SILENT, pRef != NULL, NV_FALSE);
162     // Invalid argument
163     NV_CHECK_OR_RETURN(LEVEL_SILENT, !((pRef->pKernelMIGGpuInstance == NULL) &&
164                        (pRef->pMIGComputeInstance != NULL)), NV_FALSE);
165 
166     NV_CHECK_OR_RETURN(LEVEL_SILENT, pRef->pKernelMIGGpuInstance != NULL, NV_FALSE);
167     NV_ASSERT_OR_RETURN(pRef->pKernelMIGGpuInstance->bValid, NV_FALSE);
168 
169     // If we reached this point, the GPU instance is valid
170     NV_CHECK_OR_RETURN(LEVEL_SILENT, pRef->pMIGComputeInstance != NULL, NV_TRUE);
171     NV_ASSERT_OR_RETURN(pRef->pMIGComputeInstance->bValid, NV_FALSE);
172 
173     return NV_TRUE;
174 }
175 
176 /*! @brief check if the same instance(s) are passed in; only compare GI if lhs has no CI */
177 NvBool
178 kmigmgrAreMIGReferencesSame_IMPL
179 (
180     MIG_INSTANCE_REF *pRefA,
181     MIG_INSTANCE_REF *pRefB
182 )
183 {
184     NV_CHECK_OR_RETURN(LEVEL_SILENT, kmigmgrIsMIGReferenceValid(pRefA) &&
185                        kmigmgrIsMIGReferenceValid(pRefB), NV_FALSE);
186 
187     if ((pRefA->pKernelMIGGpuInstance != pRefB->pKernelMIGGpuInstance) ||
188         ((pRefA->pMIGComputeInstance != NULL) &&
189          (pRefA->pMIGComputeInstance != pRefB->pMIGComputeInstance)))
190     {
191       return NV_FALSE;
192     }
193 
194     return NV_TRUE;
195 }
196 
197 /*!
198  * @brief Count set bits within range indicated by given base type in bitvector
199  *
200  * @param[in] pEngines     Bitvector to count
201  * @param[in] rmEngineType 0th index RM_ENGINE_TYPE, only partitionable engines supported
202  */
203 NvU32
204 kmigmgrCountEnginesOfType_IMPL
205 (
206     const ENGTYPE_BIT_VECTOR *pEngines,
207     RM_ENGINE_TYPE rmEngineType
208 )
209 {
210     NV_RANGE range = rangeMake(rmEngineType, rmEngineType);
211     ENGTYPE_BIT_VECTOR mask;
212 
213     if (pEngines == NULL)
214         return 0;
215 
216     if (!RM_ENGINE_TYPE_IS_VALID(rmEngineType))
217         return 0;
218 
219     if (RM_ENGINE_TYPE_IS_GR(rmEngineType))
220         range = RM_ENGINE_RANGE_GR();
221     else if (RM_ENGINE_TYPE_IS_COPY(rmEngineType))
222         range = RM_ENGINE_RANGE_COPY();
223     else if (RM_ENGINE_TYPE_IS_NVDEC(rmEngineType))
224         range = RM_ENGINE_RANGE_NVDEC();
225     else if (RM_ENGINE_TYPE_IS_NVENC(rmEngineType))
226         range = RM_ENGINE_RANGE_NVENC();
227     else if (RM_ENGINE_TYPE_IS_NVJPEG(rmEngineType))
228         range = RM_ENGINE_RANGE_NVJPEG();
229 
230     bitVectorClrAll(&mask);
231     bitVectorSetRange(&mask, range);
232     bitVectorAnd(&mask, &mask, pEngines);
233     return bitVectorCountSetBits(&mask);
234 }
235 
236 /*!
237  * @brief Calculate the attribution ID for the given MIG instance reference.
238  *
239  * @note the attribution ID is an encoding of gpu/compute instance IDs dependent
240  *       upon the maximum values of these IDs which must be queried by the
241  *       recipient in order to decode. Attribution values for NULL or lone
242  *       GPU instances will produce non-zero attribution IDs which will decode to
243  *       out-of-range values for both IDs.
244  *
245  * @param[in] ref   Reference to a Gi/CI
246  *
247  * @return the encoded attribution ID
248  */
249 NvU16
250 kmigmgrGetAttributionIdFromMIGReference_IMPL
251 (
252     MIG_INSTANCE_REF ref
253 )
254 {
255     NvU16 giID = KMIGMGR_MAX_GPU_SWIZZID;
256     NvU16 ciID = KMIGMGR_MAX_COMPUTE_INSTANCES;
257 
258     //
259     // Inverting this encoding depends upon the compute instance IDs having a
260     // shorter range than the gpu instance IDs, otherwise high compute instance
261     // IDs will cause aliasing
262     //
263     ct_assert(KMIGMGR_MAX_COMPUTE_INSTANCES < KMIGMGR_MAX_GPU_SWIZZID);
264 
265     // We are also depending on this encoding fitting in 16 bits...
266     ct_assert((KMIGMGR_MAX_GPU_SWIZZID * KMIGMGR_MAX_COMPUTE_INSTANCES) <= NV_U16_MAX);
267 
268     if (kmigmgrIsMIGReferenceValid(&ref) &&
269         (ref.pKernelMIGGpuInstance->swizzId < KMIGMGR_MAX_GPU_SWIZZID))
270     {
271         giID = (NvU16)ref.pKernelMIGGpuInstance->swizzId;
272         if ((ref.pMIGComputeInstance != NULL) &&
273             (ref.pMIGComputeInstance->id < KMIGMGR_MAX_COMPUTE_INSTANCES))
274         {
275             ciID = (NvU16)ref.pMIGComputeInstance->id;
276         }
277     }
278 
279     return (giID * KMIGMGR_MAX_GPU_SWIZZID) + ciID;
280 }
281 
282 /*!
283  * @brief   Function to convert an engine type from one bitvector to a
284  *          corresponding engine type in another bitvector. The two bitvectors
285  *          are expected to have the same set bit count.
286  */
287 NV_STATUS
288 kmigmgrEngineTypeXlate_IMPL
289 (
290     ENGTYPE_BIT_VECTOR *pSrc,
291     RM_ENGINE_TYPE srcEngineType,
292     ENGTYPE_BIT_VECTOR *pDst,
293     RM_ENGINE_TYPE *pDstEngineType
294 )
295 {
296     RM_ENGINE_TYPE tempSrcEngineType;
297     RM_ENGINE_TYPE tempDstEngineType;
298     NvBool bFound;
299 
300     NV_ASSERT_OR_RETURN(pSrc != NULL, NV_ERR_INVALID_ARGUMENT);
301     NV_ASSERT_OR_RETURN(pDst != NULL, NV_ERR_INVALID_ARGUMENT);
302     NV_ASSERT_OR_RETURN(pDstEngineType != NULL, NV_ERR_INVALID_ARGUMENT);
303 
304     if (!bitVectorTest(pSrc, srcEngineType))
305         return NV_ERR_OBJECT_NOT_FOUND;
306 
307     // Iterate over both masks at the same time
308     bFound = NV_FALSE;
309     FOR_EACH_IN_BITVECTOR_PAIR(pSrc, tempSrcEngineType, pDst, tempDstEngineType)
310     {
311         bFound = (srcEngineType == tempSrcEngineType);
312         if (bFound)
313             break;
314     }
315     FOR_EACH_IN_BITVECTOR_PAIR_END();
316 
317     // We already checked that the engine is present above, this should never fire
318     NV_ASSERT(bFound);
319 
320     *pDstEngineType = tempDstEngineType;
321 
322     return NV_OK;
323 }
324 
325 //
326 // below algorithm depends on contiguity of all partitionable engine values
327 // in RM_ENGINE_TYPE, so add asserts here.
328 // Note - this only checks the first and last ID, a proper check would account
329 // for all entries, but that's not possible at this time.
330 //
331 ct_assert((RM_ENGINE_TYPE_GR(RM_ENGINE_TYPE_GR_SIZE - 1) -
332            RM_ENGINE_TYPE_GR(0)) == (RM_ENGINE_TYPE_GR_SIZE - 1));
333 ct_assert((RM_ENGINE_TYPE_COPY(RM_ENGINE_TYPE_COPY_SIZE - 1) -
334            RM_ENGINE_TYPE_COPY(0)) == (RM_ENGINE_TYPE_COPY_SIZE - 1));
335 ct_assert((RM_ENGINE_TYPE_NVDEC(RM_ENGINE_TYPE_NVDEC_SIZE - 1) -
336            RM_ENGINE_TYPE_NVDEC(0)) == (RM_ENGINE_TYPE_NVDEC_SIZE - 1));
337 ct_assert((RM_ENGINE_TYPE_NVENC(RM_ENGINE_TYPE_NVENC_SIZE - 1) -
338            RM_ENGINE_TYPE_NVENC(0)) == (RM_ENGINE_TYPE_NVENC_SIZE - 1));
339 
340 /*!
341  * @brief   Chooses the engines of the given type to allocate. Supports
342  *          shared/exclusive ownership arbitration.
343  *
344  * @param[IN]   pSourceEngines       Mask of engines in an instance
345  * @param[IN}   bShared              NV_TRUE if engines should be shared
346  * @param[IN]   engTypeRange         NV_RANGE of bit indices for this eng type
347  * @param[IN]   regEngCount          Requested number of engines in this CI
348  * @param[I/O]  pOutEngines          Mask of engines already/newly allocated
349  * @param[I/O]  pExclusiveEngines    Mask of already exclusively-allocated engines
350  * @param[I/O]  pSharedEngines       Mask of engines shared by other instances
351  * @param[IN]   pAllocatableEngines  Mask of engines that are allocatable
352  */
353 NV_STATUS
354 kmigmgrAllocateInstanceEngines_IMPL
355 (
356     ENGTYPE_BIT_VECTOR *pSourceEngines,
357     NvBool bShared,
358     NV_RANGE engTypeRange,
359     NvU32 reqEngCount,
360     ENGTYPE_BIT_VECTOR *pOutEngines,
361     ENGTYPE_BIT_VECTOR *pExclusiveEngines,
362     ENGTYPE_BIT_VECTOR *pSharedEngines,
363     ENGTYPE_BIT_VECTOR *pAllocatableEngines
364 )
365 {
366     NvU32 allocated = 0;
367     ENGTYPE_BIT_VECTOR engines;
368     RM_ENGINE_TYPE rmEngineType;
369     NvU32 localIdx;
370 
371     // Ensure allocatableEngines is subset of sourceEngines
372     bitVectorClrAll(&engines);
373     bitVectorAnd(&engines, pAllocatableEngines, pSourceEngines);
374     NV_ASSERT_OR_RETURN(bitVectorTestEqual(&engines, pAllocatableEngines), NV_ERR_INVALID_STATE);
375 
376     // If using shared engines, allocate as many from existing shared engines as possible
377     if (bShared)
378     {
379         bitVectorClrAll(&engines);
380         bitVectorSetRange(&engines, engTypeRange);
381         bitVectorAnd(&engines, &engines, pSourceEngines);
382         localIdx = 0;
383         FOR_EACH_IN_BITVECTOR(&engines, rmEngineType)
384         {
385             if (allocated == reqEngCount)
386                 break;
387 
388             // Skip engines that aren't allocatable or aren't in the shared pool already
389             if (!bitVectorTest(pAllocatableEngines, rmEngineType) ||
390                 !bitVectorTest(pSharedEngines, rmEngineType))
391             {
392                 localIdx++;
393                 continue;
394             }
395 
396             // assign the engine
397             bitVectorSet(pOutEngines, engTypeRange.lo + localIdx);
398 
399             localIdx++;
400             allocated++;
401         }
402         FOR_EACH_IN_BITVECTOR_END();
403     }
404 
405     // Allocate the rest from the free pool
406     bitVectorClrAll(&engines);
407     bitVectorSetRange(&engines, engTypeRange);
408     bitVectorAnd(&engines, &engines, pSourceEngines);
409     localIdx = 0;
410     FOR_EACH_IN_BITVECTOR(&engines, rmEngineType)
411     {
412         if (allocated == reqEngCount)
413             break;
414 
415         // Skip non-allocatable or in-use engines
416         if (!bitVectorTest(pAllocatableEngines, rmEngineType) ||
417             bitVectorTest(pSharedEngines, rmEngineType) ||
418             bitVectorTest(pExclusiveEngines, rmEngineType))
419         {
420             localIdx++;
421             continue;
422         }
423 
424         // Add the engine to the appropriate in-use pool
425         bitVectorSet((bShared ? pSharedEngines : pExclusiveEngines), rmEngineType);
426 
427         // Assign the engine
428         bitVectorSet(pOutEngines, engTypeRange.lo + localIdx);
429 
430         localIdx++;
431         allocated++;
432     }
433     FOR_EACH_IN_BITVECTOR_END();
434 
435     NV_CHECK_OR_RETURN(LEVEL_SILENT, allocated == reqEngCount, NV_ERR_INSUFFICIENT_RESOURCES);
436     return NV_OK;
437 }
438 
439 /*!
440  * @brief Convert global/physical engine mask to logical/local (no-hole) mask
441  *
442  * @param[in] pPhysicalEngineMask   Bitvector storing physical mask
443  * @param[in] pLocalEngineMask      Bitvector storing local mask
444  */
445 void
446 kmigmgrGetLocalEngineMask_IMPL
447 (
448     ENGTYPE_BIT_VECTOR *pPhysicalEngineMask,
449     ENGTYPE_BIT_VECTOR *pLocalEngineMask
450 )
451 {
452     NV_RANGE range;
453     NvU32 count;
454     bitVectorClrAll(pLocalEngineMask);
455 
456     count = kmigmgrCountEnginesOfType(pPhysicalEngineMask, RM_ENGINE_TYPE_GR(0));
457     if (count > 0)
458     {
459         range = rangeMake(RM_ENGINE_TYPE_GR(0), RM_ENGINE_TYPE_GR(count - 1));
460         bitVectorSetRange(pLocalEngineMask, range);
461     }
462 
463     count = kmigmgrCountEnginesOfType(pPhysicalEngineMask, RM_ENGINE_TYPE_COPY(0));
464     if (count > 0)
465     {
466         range = rangeMake(RM_ENGINE_TYPE_COPY(0), RM_ENGINE_TYPE_COPY(count - 1));
467         bitVectorSetRange(pLocalEngineMask, range);
468     }
469 
470     count = kmigmgrCountEnginesOfType(pPhysicalEngineMask, RM_ENGINE_TYPE_NVDEC(0));
471     if (count > 0)
472     {
473         range = rangeMake(RM_ENGINE_TYPE_NVDEC(0), RM_ENGINE_TYPE_NVDEC(count - 1));
474         bitVectorSetRange(pLocalEngineMask, range);
475     }
476 
477     count = kmigmgrCountEnginesOfType(pPhysicalEngineMask, RM_ENGINE_TYPE_NVENC(0));
478     if (count > 0)
479     {
480         range = rangeMake(RM_ENGINE_TYPE_NVENC(0), RM_ENGINE_TYPE_NVENC(count - 1));
481         bitVectorSetRange(pLocalEngineMask, range);
482     }
483 
484     count = kmigmgrCountEnginesOfType(pPhysicalEngineMask, RM_ENGINE_TYPE_NVJPEG(0));
485     if (count > 0)
486     {
487         range = rangeMake(RM_ENGINE_TYPE_NVJPEG(0), RM_ENGINE_TYPE_NVJPEG(count - 1));
488         bitVectorSetRange(pLocalEngineMask, range);
489     }
490 
491     count = kmigmgrCountEnginesOfType(pPhysicalEngineMask, RM_ENGINE_TYPE_OFA);
492     if (count > 0)
493         bitVectorSet(pLocalEngineMask, RM_ENGINE_TYPE_OFA);
494 }
495 
496 /*!
497  * @brief   Create client and subdevice handles to make calls into this gpu instance
498  */
499 NV_STATUS
500 kmigmgrAllocGPUInstanceHandles_IMPL
501 (
502     OBJGPU *pGpu,
503     NvU32 swizzId,
504     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance
505 )
506 {
507     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
508     NvHandle hSubscription = NV01_NULL_OBJECT;
509     NvHandle hClient;
510     NvHandle hDevice;
511     NvHandle hSubdevice;
512     NVC637_ALLOCATION_PARAMETERS params;
513 
514     NV_ASSERT_OK_OR_RETURN(
515         rmapiutilAllocClientAndDeviceHandles(pRmApi, pGpu, &hClient, &hDevice, &hSubdevice));
516 
517     portMemSet(&params, 0, sizeof(params));
518     params.swizzId = swizzId;
519     NV_ASSERT_OK_OR_RETURN(
520         pRmApi->Alloc(pRmApi, hClient, hSubdevice, &hSubscription, AMPERE_SMC_PARTITION_REF, &params, sizeof(params)));
521 
522     pKernelMIGGpuInstance->instanceHandles.hClient = hClient;
523     pKernelMIGGpuInstance->instanceHandles.hDevice = hDevice;
524     pKernelMIGGpuInstance->instanceHandles.hSubdevice = hSubdevice;
525     pKernelMIGGpuInstance->instanceHandles.hSubscription = hSubscription;
526 
527     return NV_OK;
528 }
529 
530 /*!
531  * @brief   Delete created gpu instance handles if they exist
532  */
533 void
534 kmigmgrFreeGPUInstanceHandles_IMPL
535 (
536     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance
537 )
538 {
539     if (pKernelMIGGpuInstance->instanceHandles.hClient != NV01_NULL_OBJECT)
540     {
541         RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
542 
543         pRmApi->Free(pRmApi, pKernelMIGGpuInstance->instanceHandles.hClient, pKernelMIGGpuInstance->instanceHandles.hClient);
544         pKernelMIGGpuInstance->instanceHandles.hClient = NV01_NULL_OBJECT;
545         pKernelMIGGpuInstance->instanceHandles.hDevice = NV01_NULL_OBJECT;
546         pKernelMIGGpuInstance->instanceHandles.hSubdevice = NV01_NULL_OBJECT;
547         pKernelMIGGpuInstance->instanceHandles.hSubscription = NV01_NULL_OBJECT;
548     }
549 }
550 
551 /*!
552  * @brief   Checks if all references to gpu instance are internal
553  */
554 NvBool
555 kmigmgrIsGPUInstanceReadyToBeDestroyed_IMPL
556 (
557     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance
558 )
559 {
560     NvS32 targetRefCount;
561     NvS32 actualRefCount;
562 
563     NV_CHECK_OR_RETURN(LEVEL_SILENT, pKernelMIGGpuInstance->pShare != NULL, NV_TRUE);
564 
565     //
566     // Initial refCount is increased to "1" when gpu instance is created and then
567     // every subscription by a client should increase the refcount
568     //
569     targetRefCount = 1;
570 
571     // A client handle is allocated to support internal GR Routing
572     if (pKernelMIGGpuInstance->instanceHandles.hClient != NV01_NULL_OBJECT)
573         targetRefCount++;
574 
575     //
576     // GPU instance scrubber is initialized during gpu instance creation and deleted
577     // when gpu instance is invalidated, and subscribes to the gpu instance, so must
578     // be accounted for in the target ref count
579     //
580     if (pKernelMIGGpuInstance->bMemoryPartitionScrubberInitialized)
581         targetRefCount++;
582 
583     actualRefCount = serverGetShareRefCount(&g_resServ, pKernelMIGGpuInstance->pShare);
584     if (actualRefCount > targetRefCount)
585         return NV_FALSE;
586 
587     // Mismatch here indicates programming error
588     NV_ASSERT(actualRefCount == targetRefCount);
589     return NV_TRUE;
590 }
591 
592 NV_STATUS
593 kmigmgrConstructEngine_IMPL
594 (
595     OBJGPU           *pGpu,
596     KernelMIGManager *pKernelMIGManager,
597     ENGDESCRIPTOR    engDesc
598 )
599 {
600     NvU32 GIIdx;
601     KERNEL_MIG_MANAGER_PRIVATE_DATA *pPrivate;
602 
603     pKernelMIGManager->bMIGEnabled = NV_FALSE;
604     pKernelMIGManager->swizzIdInUseMask = 0x0;
605 
606     pPrivate = portMemAllocNonPaged(sizeof(*pPrivate));
607     NV_CHECK_OR_RETURN(LEVEL_ERROR, pPrivate != NULL, NV_ERR_NO_MEMORY);
608     portMemSet(pPrivate, 0, sizeof(*pPrivate));
609     pKernelMIGManager->pPrivate = pPrivate;
610 
611     for (GIIdx = 0; GIIdx < NV_ARRAY_ELEMENTS(pKernelMIGManager->kernelMIGGpuInstance); ++GIIdx)
612     {
613         kmigmgrInitGPUInstanceInfo(pGpu, pKernelMIGManager,
614                                    &pKernelMIGManager->kernelMIGGpuInstance[GIIdx]);
615     }
616 
617     kmigmgrInitRegistryOverrides(pGpu, pKernelMIGManager);
618 
619     return NV_OK;
620 }
621 
622 void
623 kmigmgrDestruct_IMPL
624 (
625     KernelMIGManager *pKernelMIGManager
626 )
627 {
628     NvU32 GIIdx;
629     NvU32 CIIdx;
630 
631     portMemFree(pKernelMIGManager->pPrivate->staticInfo.pProfiles);
632     pKernelMIGManager->pPrivate->staticInfo.pProfiles = NULL;
633     portMemFree(pKernelMIGManager->pPrivate->staticInfo.pSwizzIdFbMemPageRanges);
634     pKernelMIGManager->pPrivate->staticInfo.pSwizzIdFbMemPageRanges = NULL;
635     portMemFree(pKernelMIGManager->pPrivate->staticInfo.pCIProfiles);
636     pKernelMIGManager->pPrivate->staticInfo.pCIProfiles = NULL;
637     portMemFree(pKernelMIGManager->pPrivate->staticInfo.pSkylineInfo);
638     pKernelMIGManager->pPrivate->staticInfo.pSkylineInfo = NULL;
639 
640     portMemFree(pKernelMIGManager->pPrivate);
641     pKernelMIGManager->pPrivate = NULL;
642 
643     for (GIIdx = 0; GIIdx < NV_ARRAY_ELEMENTS(pKernelMIGManager->kernelMIGGpuInstance); ++GIIdx)
644     {
645         KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance = &pKernelMIGManager->kernelMIGGpuInstance[GIIdx];
646 
647         // Shouldn't have any valid GPU instance
648         if (pKernelMIGGpuInstance->bValid)
649         {
650             NV_PRINTF(LEVEL_ERROR,
651                       "Deleting valid GPU instance with swizzId - %d. Should have been deleted before shutdown!\n",
652                       pKernelMIGGpuInstance->swizzId);
653         }
654 
655         for (CIIdx = 0;
656              CIIdx < NV_ARRAY_ELEMENTS(pKernelMIGGpuInstance->MIGComputeInstance);
657              ++CIIdx)
658         {
659             MIG_COMPUTE_INSTANCE *pMIGComputeInstance = &pKernelMIGGpuInstance->MIGComputeInstance[CIIdx];
660 
661             // Shouldn't have any valid compute instance
662             if (pMIGComputeInstance->bValid)
663             {
664                 NV_PRINTF(LEVEL_ERROR,
665                           "Deleting valid compute instance - %d. Should have been deleted before shutdown!\n",
666                           CIIdx);
667             }
668         }
669     }
670 }
671 
672 /*!
673  * @brief   Handle KMIGMGR init which must occur after GPU post load.
674  *
675  * @param[in] pGpu
676  * @param[in] pUnusedData Unused callback data
677  */
678 static NV_STATUS
679 _kmigmgrHandlePostSchedulingEnableCallback
680 (
681     OBJGPU *pGpu,
682     void   *pUnusedData
683 )
684 {
685     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
686     KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
687 
688     if (!IS_VIRTUAL(pGpu))
689     {
690         NvBool bTopLevelScrubberEnabled = NV_FALSE;
691         NvBool bTopLevelScrubberConstructed = NV_FALSE;
692 
693         memmgrGetTopLevelScrubberStatus(pGpu, pMemoryManager,
694             &bTopLevelScrubberEnabled, &bTopLevelScrubberConstructed);
695 
696         //
697         // This callback is handled as part of the same routine that triggers
698         // scrubber initialization. Unfortunately this callback depends on the
699         // scrubber being initialized first, and we cannot enforce that the scrubber
700         // callback always goes first. However, the trigger routine does support a
701         // retry mechanism that will allow us to get called back after all of the
702         // other callbacks in the list are completed. We signal for retry by
703         // returning NV_WARN_MORE_PROCESSING_REQUIRED if the scrubber is enabled but
704         // hasn't been intialized yet. The warning will be quashed on the first
705         // attempt, but will then be reported and trigger initialization failure if
706         // it happens again on the retry.
707         //
708         // Bug: 2997744, skipping the check here because top level scrubber creation is dealyed until
709         // GPU instances are created in MIG enabled guest
710         //
711         NV_CHECK_OR_RETURN(LEVEL_SILENT,
712                            !bTopLevelScrubberEnabled || bTopLevelScrubberConstructed,
713                            NV_WARN_MORE_PROCESSING_REQUIRED);
714     }
715 
716     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
717         memmgrSetPartitionableMem_HAL(pGpu, pMemoryManager));
718 
719     if ((pKernelMIGManager == NULL) || !kmigmgrIsMIGSupported(pGpu, pKernelMIGManager))
720     {
721         NV_PRINTF(LEVEL_INFO, "MIG not supported on this GPU.\n");
722         return NV_ERR_NOT_SUPPORTED;
723     }
724 
725     if (!IS_MIG_ENABLED(pGpu) && !IS_VIRTUAL(pGpu) &&
726         pGpu->getProperty(pGpu, PDB_PROP_GPU_RESETLESS_MIG_SUPPORTED) &&
727         (gpumgrIsSystemMIGEnabled(gpuGetDBDF(pGpu)) || pKernelMIGManager->bMIGAutoOnlineEnabled))
728     {
729         RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
730         NV2080_CTRL_GPU_SET_PARTITIONING_MODE_PARAMS params;
731 
732         portMemSet(&params, 0x0, sizeof(params));
733         params.partitioningMode = NV2080_CTRL_GPU_SET_PARTITIONING_MODE_REPARTITIONING_FAST_RECONFIG;
734         NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
735             pRmApi->Control(pRmApi,
736                             pGpu->hInternalClient,
737                             pGpu->hInternalSubdevice,
738                             NV2080_CTRL_CMD_INTERNAL_MIGMGR_SET_PARTITIONING_MODE,
739                             &params,
740                             sizeof(params)));
741 
742         NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
743             kmigmgrSetPartitioningMode(pGpu, pKernelMIGManager));
744     }
745 
746     if (IS_MIG_ENABLED(pGpu))
747     {
748         //
749         // Populate static GPU instance memory config which will be used to manage
750         // GPU instance memory
751         //
752         KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
753         NV_ASSERT_OK_OR_RETURN(kmemsysPopulateMIGGPUInstanceMemConfig_HAL(pGpu, pKernelMemorySystem));
754 
755         // Initialize static info derived from physical RM
756         NV_ASSERT_OK_OR_RETURN(kmigmgrLoadStaticInfo_HAL(pGpu, pKernelMIGManager));
757 
758         // KERNEL_ONLY variants require static info to detect reduced configs
759         kmigmgrDetectReducedConfig_HAL(pGpu, pKernelMIGManager);
760     }
761 
762     NV_ASSERT_OK(kmigmgrRestoreFromPersistence_HAL(pGpu, pKernelMIGManager));
763 
764     return NV_OK;
765 }
766 
767 static NV_STATUS _kmigmgrHandlePreSchedulingDisableCallback
768 (
769     OBJGPU *pGpu,
770     void *pUnusedData
771 )
772 {
773     NvU32 GIIdx;
774     NvU32 CIIdx;
775     NV_STATUS rmStatus = NV_OK;
776     NvBool bDisable = NV_FALSE;
777     KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
778 
779     for (GIIdx = 0; GIIdx < NV_ARRAY_ELEMENTS(pKernelMIGManager->kernelMIGGpuInstance); ++GIIdx)
780     {
781         if (pKernelMIGManager->kernelMIGGpuInstance[GIIdx].bValid)
782         {
783             kmigmgrDestroyGPUInstanceScrubber(pGpu, pKernelMIGManager, &pKernelMIGManager->kernelMIGGpuInstance[GIIdx]);
784         }
785     }
786 
787     if (IS_VIRTUAL(pGpu) && kmigmgrUseLegacyVgpuPolicy(pGpu, pKernelMIGManager))
788         return NV_OK;
789 
790     //
791     // Update persistent instance topology so that we can recreate it on next
792     // GPU attach.
793     //
794     NV_ASSERT_OK(kmigmgrSaveToPersistence(pGpu, pKernelMIGManager));
795 
796     if (!IS_VIRTUAL(pGpu) && !IS_GSP_CLIENT(pGpu))
797         return NV_OK;
798 
799     for (GIIdx = 0; GIIdx < NV_ARRAY_ELEMENTS(pKernelMIGManager->kernelMIGGpuInstance); ++GIIdx)
800     {
801         KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance = &pKernelMIGManager->kernelMIGGpuInstance[GIIdx];
802         NvU32 swizzId;
803 
804         // Skip invalid gpu instances
805         if (!pKernelMIGGpuInstance->bValid)
806             continue;
807 
808         swizzId = pKernelMIGGpuInstance->swizzId;
809 
810         // Shouldn't be any valid gpu instances
811         NV_PRINTF(LEVEL_ERROR,
812                   "Invalidating valid gpu instance with swizzId = %d\n",
813                   swizzId);
814 
815         for (CIIdx = 0;
816              CIIdx < NV_ARRAY_ELEMENTS(pKernelMIGGpuInstance->MIGComputeInstance);
817              ++CIIdx)
818         {
819             MIG_COMPUTE_INSTANCE *pMIGComputeInstance =
820                 &pKernelMIGGpuInstance->MIGComputeInstance[CIIdx];
821 
822             // Skip invalid compute instances
823             if (!pMIGComputeInstance->bValid)
824                 continue;
825 
826             // Shouldn't be any valid compute instances
827             NV_PRINTF(LEVEL_ERROR,
828                       "Invalidating valid compute instance with id = %d\n",
829                       CIIdx);
830 
831             NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(rmStatus,
832                 kmigmgrDeleteComputeInstance(pGpu, pKernelMIGManager, pKernelMIGGpuInstance, CIIdx, NV_TRUE));
833 
834             if (IS_GSP_CLIENT(pGpu))
835             {
836                 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
837                 NVC637_CTRL_EXEC_PARTITIONS_DELETE_PARAMS params;
838 
839                 portMemSet(&params, 0, sizeof(params));
840                 params.execPartCount = 1;
841                 params.execPartId[0] = CIIdx;
842 
843                 NV_ASSERT_OK(
844                     pRmApi->Control(pRmApi,
845                                     pKernelMIGGpuInstance->instanceHandles.hClient,
846                                     pKernelMIGGpuInstance->instanceHandles.hSubscription,
847                                     NVC637_CTRL_CMD_EXEC_PARTITIONS_DELETE,
848                                     &params,
849                                     sizeof(params)));
850             }
851         }
852 
853         NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(rmStatus,
854             kmigmgrInvalidateGPUInstance(pGpu, pKernelMIGManager, swizzId, NV_TRUE));
855 
856         if (IS_GSP_CLIENT(pGpu))
857         {
858             RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
859             NV2080_CTRL_GPU_SET_PARTITIONS_PARAMS params;
860 
861             portMemSet(&params, 0, sizeof(params));
862             params.partitionCount = 1;
863             params.partitionInfo[0].bValid = NV_FALSE;
864             params.partitionInfo[0].swizzId = swizzId;
865 
866             NV_ASSERT_OK(
867                 pRmApi->Control(pRmApi,
868                                 pGpu->hInternalClient,
869                                 pGpu->hInternalSubdevice,
870                                 NV2080_CTRL_CMD_INTERNAL_MIGMGR_SET_GPU_INSTANCES,
871                                 &params,
872                                 sizeof(params)));
873         }
874 
875         // There was an active gpu instance, we need to disable MIG later
876         bDisable = NV_TRUE;
877     }
878 
879     // Disable MIG
880     if (pKernelMIGManager->swizzIdInUseMask != 0x0)
881     {
882         NV_ASSERT(0);
883         NV_PRINTF(LEVEL_ERROR, "leaked swizzid mask 0x%llx !!\n", pKernelMIGManager->swizzIdInUseMask);
884     }
885 
886     if (bDisable)
887     {
888         NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(rmStatus,
889             kmigmgrSetMIGState(pGpu, pKernelMIGManager, NV_TRUE, NV_FALSE, NV_TRUE));
890     }
891 
892     return NV_OK;
893 }
894 
895 NV_STATUS
896 kmigmgrStateInitLocked_IMPL
897 (
898     OBJGPU *pGpu,
899     KernelMIGManager *pKernelMIGManager
900 )
901 {
902     //
903     // Configure MIG Mode based on devinit's determination of MIG enable
904     // preconditions being met or not. Devinit will set SW_SCRATCH bit if MIG
905     // mode was requested and was able to be supported / enabled.
906     //
907     if (kmigmgrIsDevinitMIGBitSet_HAL(pGpu, pKernelMIGManager))
908         pKernelMIGManager->bMIGEnabled = NV_TRUE;
909 
910     NV_CHECK_OR_RETURN(LEVEL_SILENT, kmigmgrIsMIGSupported(pGpu, pKernelMIGManager), NV_OK);
911 
912     // Setup a callback to initialize state at the very end of GPU post load
913     NV_ASSERT_OK(
914         kfifoAddSchedulingHandler(pGpu, GPU_GET_KERNEL_FIFO(pGpu),
915             _kmigmgrHandlePostSchedulingEnableCallback, NULL,
916             _kmigmgrHandlePreSchedulingDisableCallback, NULL));
917 
918     return NV_OK;
919 }
920 
921 /*! State unload */
922 NV_STATUS
923 kmigmgrStateUnload_IMPL
924 (
925     OBJGPU *pGpu,
926     KernelMIGManager *pKernelMIGManager,
927     NvU32 flags
928 )
929 {
930     kmigmgrClearStaticInfo_HAL(pGpu, pKernelMIGManager);
931 
932     // Nothing to do if MIG is not supported
933     NV_CHECK_OR_RETURN(LEVEL_SILENT, kmigmgrIsMIGSupported(pGpu, pKernelMIGManager), NV_OK);
934 
935     kfifoRemoveSchedulingHandler(pGpu, GPU_GET_KERNEL_FIFO(pGpu),
936         _kmigmgrHandlePostSchedulingEnableCallback, NULL,
937         _kmigmgrHandlePreSchedulingDisableCallback, NULL);
938 
939     return NV_OK;
940 }
941 
942 /*! Init registry overrides */
943 void
944 kmigmgrInitRegistryOverrides_IMPL
945 (
946     OBJGPU *pGpu,
947     KernelMIGManager *pKernelMIGManager
948 )
949 {
950 }
951 
952 /**
953  * @brief Retrieve data block for GPU instance at given slot
954  */
955 KERNEL_MIG_GPU_INSTANCE *
956 kmigmgrGetMIGGpuInstanceSlot_IMPL
957 (
958     OBJGPU *pGpu,
959     KernelMIGManager *pKernelMIGManager,
960     NvU32 i
961 )
962 {
963     NV_ASSERT_OR_RETURN(i < NV_ARRAY_ELEMENTS(pKernelMIGManager->kernelMIGGpuInstance), NULL);
964     return &pKernelMIGManager->kernelMIGGpuInstance[i];
965 }
966 
967 /**
968  * @brief Returns true if MIG is supported.
969  * Also MIG is not supported on platforms that support ATS over NVLink.
970  */
971 NvBool
972 kmigmgrIsMIGSupported_IMPL
973 (
974     OBJGPU *pGpu,
975     KernelMIGManager *pKernelMIGManager
976 )
977 {
978     return pGpu->getProperty(pGpu, PDB_PROP_GPU_MIG_SUPPORTED);
979 }
980 
981 /*!
982  * @brief Determines if MIG is enabled in supported system or not
983  */
984 NvBool
985 kmigmgrIsMIGEnabled_IMPL
986 (
987     OBJGPU *pGpu,
988     KernelMIGManager *pKernelMIGManager
989 )
990 {
991     return kmigmgrIsMIGSupported(pGpu, pKernelMIGManager) && pKernelMIGManager->bMIGEnabled;
992 }
993 
994 /*!
995  * @brief Determines if MIG GPU instancing is enabled
996  */
997 NvBool
998 kmigmgrIsMIGGpuInstancingEnabled_IMPL
999 (
1000     OBJGPU *pGpu,
1001     KernelMIGManager *pKernelMIGManager
1002 )
1003 {
1004     return (IS_MIG_ENABLED(pGpu) &&
1005             (pKernelMIGManager->swizzIdInUseMask != 0));
1006 }
1007 
1008 /*!
1009  * @brief Determines if MIG memory partitioning is enabled
1010  */
1011 NvBool
1012 kmigmgrIsMIGMemPartitioningEnabled_IMPL
1013 (
1014     OBJGPU *pGpu,
1015     KernelMIGManager *pKernelMIGManager
1016 )
1017 {
1018     NvU32 swizzId;
1019 
1020     if (!IS_MIG_IN_USE(pGpu))
1021     {
1022         return NV_FALSE;
1023     }
1024 
1025     FOR_EACH_INDEX_IN_MASK(64, swizzId, pKernelMIGManager->swizzIdInUseMask)
1026     {
1027         if (kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, swizzId))
1028         {
1029             return NV_TRUE;
1030         }
1031     }
1032     FOR_EACH_INDEX_IN_MASK_END;
1033 
1034     return NV_FALSE;
1035 }
1036 
1037 /*!
1038  * @brief Determines if NvLink and P2P are compatible with MIG
1039  */
1040 NvBool
1041 kmigmgrIsMIGNvlinkP2PSupported_IMPL
1042 (
1043     OBJGPU *pGpu,
1044     KernelMIGManager *pKernelMIGManager
1045 )
1046 {
1047     //
1048     // No need to make decision based on any override if MIG is not supported/enabled
1049     // on a specific chip
1050     //
1051     if (!IS_MIG_ENABLED(pGpu))
1052     {
1053         return NV_TRUE;
1054     }
1055 
1056     // MIG+NVLINK not supported by default
1057     return NV_FALSE;
1058 }
1059 
1060 /*! Retrieve immutable static data */
1061 const KERNEL_MIG_MANAGER_STATIC_INFO *
1062 kmigmgrGetStaticInfo_IMPL
1063 (
1064     OBJGPU *pGpu,
1065     KernelMIGManager *pKernelMIGManager
1066 )
1067 {
1068     KERNEL_MIG_MANAGER_PRIVATE_DATA *pPrivate = (KERNEL_MIG_MANAGER_PRIVATE_DATA *)pKernelMIGManager->pPrivate;
1069     return ((pPrivate != NULL) && pPrivate->bInitialized) ? &pPrivate->staticInfo : NULL;
1070 }
1071 
1072 /*! Initialize static information queried from Physical RM */
1073 NV_STATUS
1074 kmigmgrLoadStaticInfo_KERNEL
1075 (
1076     OBJGPU *pGpu,
1077     KernelMIGManager *pKernelMIGManager
1078 )
1079 {
1080     KERNEL_MIG_MANAGER_PRIVATE_DATA *pPrivate = (KERNEL_MIG_MANAGER_PRIVATE_DATA *)pKernelMIGManager->pPrivate;
1081     RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
1082     NV_STATUS status;
1083     NV2080_CTRL_INTERNAL_STATIC_MIGMGR_GET_PARTITIONABLE_ENGINES_PARAMS params = {0};
1084     NvU32 nv2080EngineMask[NVGPU_ENGINE_CAPS_MASK_ARRAY_MAX];
1085 
1086     NV_ASSERT_OR_RETURN(pPrivate != NULL, NV_ERR_INVALID_STATE);
1087 
1088     if (pPrivate->bInitialized)
1089         return NV_OK;
1090 
1091     //
1092     // HACK
1093     // Some of the static data implementations depend on other static data. We
1094     // must publish early to make the data accessible as it becomes available.
1095     //
1096     pPrivate->bInitialized = NV_TRUE;
1097 
1098     portMemSet(pPrivate->staticInfo.partitionableEngineMask, 0x0, sizeof(pPrivate->staticInfo.partitionableEngineMask));
1099 
1100     NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
1101         pRmApi->Control(pRmApi,
1102                         pGpu->hInternalClient,
1103                         pGpu->hInternalSubdevice,
1104                         NV2080_CTRL_CMD_INTERNAL_STATIC_KMIGMGR_GET_PARTITIONABLE_ENGINES,
1105                         &params,
1106                         sizeof(params)),
1107         failed);
1108 
1109     ct_assert(NVGPU_ENGINE_CAPS_MASK_ARRAY_MAX == 2);
1110 
1111     nv2080EngineMask[0] = NvU64_LO32(params.engineMask);
1112     nv2080EngineMask[1] = NvU64_HI32(params.engineMask);
1113 
1114     NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
1115         gpuGetRmEngineTypeCapMask(nv2080EngineMask,
1116                                   NVGPU_ENGINE_CAPS_MASK_ARRAY_MAX,
1117                                   pPrivate->staticInfo.partitionableEngineMask),
1118         failed);
1119 
1120     pPrivate->staticInfo.pSkylineInfo = portMemAllocNonPaged(sizeof(*pPrivate->staticInfo.pSkylineInfo));
1121     NV_CHECK_OR_ELSE(LEVEL_ERROR,
1122         pPrivate->staticInfo.pSkylineInfo != NULL,
1123         status = NV_ERR_NO_MEMORY;
1124         goto failed;);
1125     portMemSet(pPrivate->staticInfo.pSkylineInfo, 0x0, sizeof(*pPrivate->staticInfo.pSkylineInfo));
1126 
1127     NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
1128         pRmApi->Control(pRmApi,
1129                         pGpu->hInternalClient,
1130                         pGpu->hInternalSubdevice,
1131                         NV2080_CTRL_CMD_INTERNAL_STATIC_GRMGR_GET_SKYLINE_INFO,
1132                         pPrivate->staticInfo.pSkylineInfo,
1133                         sizeof(*pPrivate->staticInfo.pSkylineInfo)),
1134         failed);
1135 
1136     pPrivate->staticInfo.pCIProfiles = portMemAllocNonPaged(sizeof(*pPrivate->staticInfo.pCIProfiles));
1137     NV_CHECK_OR_ELSE(LEVEL_ERROR,
1138         pPrivate->staticInfo.pCIProfiles != NULL,
1139         status = NV_ERR_NO_MEMORY;
1140         goto failed;);
1141     portMemSet(pPrivate->staticInfo.pCIProfiles, 0x0, sizeof(*pPrivate->staticInfo.pCIProfiles));
1142 
1143     NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
1144         pRmApi->Control(pRmApi,
1145                         pGpu->hInternalClient,
1146                         pGpu->hInternalSubdevice,
1147                         NV2080_CTRL_CMD_INTERNAL_STATIC_KMIGMGR_GET_COMPUTE_PROFILES,
1148                         pPrivate->staticInfo.pCIProfiles,
1149                         sizeof(*pPrivate->staticInfo.pCIProfiles)),
1150         failed);
1151 
1152     pPrivate->staticInfo.pProfiles = portMemAllocNonPaged(sizeof(*pPrivate->staticInfo.pProfiles));
1153     NV_CHECK_OR_ELSE(LEVEL_ERROR,
1154         pPrivate->staticInfo.pProfiles != NULL,
1155         status = NV_ERR_NO_MEMORY;
1156         goto failed;);
1157     portMemSet(pPrivate->staticInfo.pProfiles, 0x0, sizeof(*pPrivate->staticInfo.pProfiles));
1158 
1159     NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
1160         pRmApi->Control(pRmApi,
1161                         pGpu->hInternalClient,
1162                         pGpu->hInternalSubdevice,
1163                         NV2080_CTRL_CMD_INTERNAL_STATIC_KMIGMGR_GET_PROFILES,
1164                         pPrivate->staticInfo.pProfiles,
1165                         sizeof(*pPrivate->staticInfo.pProfiles)),
1166         failed);
1167 
1168     pPrivate->staticInfo.pSwizzIdFbMemPageRanges = portMemAllocNonPaged(sizeof(*pPrivate->staticInfo.pSwizzIdFbMemPageRanges));
1169     NV_CHECK_OR_ELSE(LEVEL_ERROR,
1170         pPrivate->staticInfo.pSwizzIdFbMemPageRanges != NULL,
1171         status = NV_ERR_NO_MEMORY;
1172         goto failed;);
1173     portMemSet(pPrivate->staticInfo.pSwizzIdFbMemPageRanges, 0x0, sizeof(*pPrivate->staticInfo.pSwizzIdFbMemPageRanges));
1174 
1175     status = pRmApi->Control(pRmApi,
1176                              pGpu->hInternalClient,
1177                              pGpu->hInternalSubdevice,
1178                              NV2080_CTRL_CMD_INTERNAL_STATIC_KMIGMGR_GET_SWIZZ_ID_FB_MEM_PAGE_RANGES,
1179                              pPrivate->staticInfo.pSwizzIdFbMemPageRanges,
1180                              sizeof(*pPrivate->staticInfo.pSwizzIdFbMemPageRanges));
1181 
1182     if (status == NV_ERR_NOT_SUPPORTED)
1183     {
1184         // Only supported on specific GPU's
1185         status = NV_OK;
1186         portMemFree(pPrivate->staticInfo.pSwizzIdFbMemPageRanges);
1187         pPrivate->staticInfo.pSwizzIdFbMemPageRanges = NULL;
1188     }
1189     else if (status != NV_OK)
1190     {
1191         NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, status, failed);
1192     }
1193 
1194     return status;
1195 
1196 failed:
1197     portMemFree(pPrivate->staticInfo.pProfiles);
1198     pPrivate->staticInfo.pProfiles = NULL;
1199     portMemFree(pPrivate->staticInfo.pSwizzIdFbMemPageRanges);
1200     pPrivate->staticInfo.pSwizzIdFbMemPageRanges = NULL;
1201     portMemFree(pPrivate->staticInfo.pCIProfiles);
1202     pPrivate->staticInfo.pCIProfiles = NULL;
1203     portMemFree(pKernelMIGManager->pPrivate->staticInfo.pSkylineInfo);
1204     pKernelMIGManager->pPrivate->staticInfo.pSkylineInfo = NULL;
1205 
1206     pPrivate->bInitialized = NV_FALSE;
1207 
1208     return status;
1209 }
1210 
1211 /*!
1212  * @brief Clears Static information set for vGPU
1213  */
1214 void
1215 kmigmgrClearStaticInfo_VF
1216 (
1217     OBJGPU *pGpu,
1218     KernelMIGManager *pKernelMIGManager
1219 )
1220 {
1221     NvU32 i;
1222 
1223     // Nothing to do
1224     if (!kmigmgrUseLegacyVgpuPolicy(pGpu, pKernelMIGManager))
1225         return;
1226 
1227     for (i = 0; i < KMIGMGR_MAX_GPU_INSTANCES; ++i)
1228     {
1229         if (pKernelMIGManager->kernelMIGGpuInstance[i].pShare != NULL)
1230         {
1231             serverFreeShare(&g_resServ, pKernelMIGManager->kernelMIGGpuInstance[i].pShare);
1232             pKernelMIGManager->kernelMIGGpuInstance[i].pShare = NULL;
1233         }
1234 
1235         kmigmgrInitGPUInstanceInfo(pGpu, pKernelMIGManager, &pKernelMIGManager->kernelMIGGpuInstance[i]);
1236     }
1237 }
1238 
1239 /*!
1240  * @brief Disable RC Watchdog
1241  */
1242 NV_STATUS
1243 kmigmgrDisableWatchdog_IMPL
1244 (
1245     OBJGPU *pGpu,
1246     KernelMIGManager *pKernelMigManager
1247 )
1248 {
1249     KernelRc *pKernelRc = GPU_GET_KERNEL_RC(pGpu);
1250     NvU32 wdFlags = pKernelRc->watchdog.flags;
1251     NvS32 enableRequestsRefcount;
1252     NvS32 disableRequestsRefcount;
1253     NvS32 softDisableRequestsRefcount;
1254 
1255     krcWatchdogGetReservationCounts(pKernelRc,
1256                                     &enableRequestsRefcount,
1257                                     &disableRequestsRefcount,
1258                                     &softDisableRequestsRefcount);
1259 
1260     //
1261     // If clients have made requests to the watchdog, we can't enable MIG until
1262     // these clients have gone away because we disallow them from modifying WD
1263     // state while MIG is active but these clients need to release their
1264     // refcount on exit
1265     //
1266     if ((enableRequestsRefcount != 0) || (disableRequestsRefcount != 0) ||
1267         (softDisableRequestsRefcount != 0))
1268     {
1269         NV_PRINTF(LEVEL_ERROR,
1270                   "Failed to disable watchdog with outstanding reservations - enable: %d disable: %d softDisable: %d.\n",
1271                   enableRequestsRefcount,
1272                   disableRequestsRefcount,
1273                   softDisableRequestsRefcount);
1274 
1275         return NV_ERR_STATE_IN_USE;
1276     }
1277 
1278     NV_CHECK_OR_RETURN(LEVEL_SILENT, (wdFlags & WATCHDOG_FLAGS_INITIALIZED) != 0x0, NV_OK);
1279 
1280     pKernelMigManager->bRestoreWatchdog = NV_TRUE;
1281     pKernelMigManager->bReenableWatchdog = (wdFlags & WATCHDOG_FLAGS_DISABLED) == 0x0;
1282 
1283     return krcWatchdogShutdown(pGpu, pKernelRc);
1284 }
1285 
1286 /*!
1287  * @brief Enable RC Watchdog if it was enabled before kmigmgrDisableWatchdog invocation
1288  */
1289 NV_STATUS
1290 kmigmgrRestoreWatchdog_IMPL
1291 (
1292     OBJGPU *pGpu,
1293     KernelMIGManager *pKernelMigManager
1294 )
1295 {
1296     KernelRc *pKernelRc = GPU_GET_KERNEL_RC(pGpu);
1297 
1298     NV_CHECK_OR_RETURN(LEVEL_SILENT, pKernelMigManager->bRestoreWatchdog, NV_OK);
1299 
1300     if (pKernelMigManager->bReenableWatchdog)
1301     {
1302         krcWatchdogEnable(pKernelRc, NV_FALSE /* bOverRide */);
1303     }
1304 
1305     pKernelMigManager->bRestoreWatchdog = NV_FALSE;
1306     pKernelMigManager->bReenableWatchdog = NV_FALSE;
1307 
1308     return krcWatchdogInit_HAL(pGpu, pKernelRc);
1309 }
1310 
1311 /*!
1312  * @brief   Function to set swizzId in use
1313  */
1314 NV_STATUS
1315 kmigmgrSetSwizzIdInUse_IMPL
1316 (
1317     OBJGPU *pGpu,
1318     KernelMIGManager *pKernelMIGManager,
1319     NvU32 swizzId
1320 )
1321 {
1322     // Validate that same ID is not already set and then set the ID
1323     NvU64 mask = NVBIT64(swizzId);
1324 
1325     if (swizzId >= KMIGMGR_MAX_GPU_SWIZZID)
1326     {
1327         return NV_ERR_INVALID_ARGUMENT;
1328     }
1329 
1330     if (mask & pKernelMIGManager->swizzIdInUseMask)
1331     {
1332         NV_PRINTF(LEVEL_ERROR, "SwizzID - %d already in use\n", swizzId);
1333         DBG_BREAKPOINT();
1334         return NV_ERR_STATE_IN_USE;
1335     }
1336 
1337     pKernelMIGManager->swizzIdInUseMask |= mask;
1338 
1339     return NV_OK;
1340 }
1341 
1342 /*!
1343  * @brief   Function to mark swizzId free
1344  */
1345 NV_STATUS
1346 kmigmgrClearSwizzIdInUse_IMPL
1347 (
1348     OBJGPU *pGpu,
1349     KernelMIGManager *pKernelMIGManager,
1350     NvU32 swizzId
1351 )
1352 {
1353     // Validate that same ID is not already set and then set the ID
1354     NvU64 mask = NVBIT64(swizzId);
1355 
1356     if (swizzId >= KMIGMGR_MAX_GPU_SWIZZID)
1357     {
1358         return NV_ERR_INVALID_ARGUMENT;
1359     }
1360 
1361     if (!(mask & pKernelMIGManager->swizzIdInUseMask))
1362     {
1363         NV_PRINTF(LEVEL_ERROR, "SwizzID - %d not in use\n", swizzId);
1364         DBG_BREAKPOINT();
1365         return NV_ERR_INVALID_STATE;
1366     }
1367 
1368     pKernelMIGManager->swizzIdInUseMask &= ~mask;
1369 
1370     return NV_OK;
1371 }
1372 
1373 /*!
1374  * @brief   Function to see if swizzId in use
1375  */
1376 NvBool
1377 kmigmgrIsSwizzIdInUse_IMPL
1378 (
1379     OBJGPU *pGpu,
1380     KernelMIGManager *pKernelMIGManager,
1381     NvU32 swizzId
1382 )
1383 {
1384     NvU64 mask = NVBIT64(swizzId);
1385 
1386     if (mask & pKernelMIGManager->swizzIdInUseMask)
1387         return NV_TRUE;
1388 
1389     return NV_FALSE;
1390 }
1391 
1392 /*
1393  * @brief Return global swizzId mask
1394  */
1395 NvU64
1396 kmigmgrGetSwizzIdInUseMask_IMPL
1397 (
1398     OBJGPU *pGpu,
1399     KernelMIGManager *pKernelMIGManager
1400 )
1401 {
1402     return pKernelMIGManager->swizzIdInUseMask;
1403 }
1404 
1405 /*!
1406  * @brief   Marks the given engines as in use by some GPU instance
1407  */
1408 NV_STATUS
1409 kmigmgrSetEnginesInUse_IMPL
1410 (
1411     OBJGPU *pGpu,
1412     KernelMIGManager *pKernelMIGManager,
1413     ENGTYPE_BIT_VECTOR *pEngines
1414 )
1415 {
1416     ENGTYPE_BIT_VECTOR tempEngines;
1417 
1418     NV_ASSERT_OR_RETURN(pEngines != NULL, NV_ERR_INVALID_ARGUMENT);
1419 
1420     bitVectorAnd(&tempEngines, pEngines, &pKernelMIGManager->partitionableEnginesInUse);
1421     // Ensure no engine in given mask is marked as in-use
1422     NV_ASSERT_OR_RETURN(bitVectorTestAllCleared(&tempEngines), NV_ERR_STATE_IN_USE);
1423 
1424     // partitionableEnginesInUse |= pEngines
1425     bitVectorOr(&pKernelMIGManager->partitionableEnginesInUse,
1426                 &pKernelMIGManager->partitionableEnginesInUse,
1427                 pEngines);
1428     return NV_OK;
1429 }
1430 
1431 /*!
1432  * @brief   Marks the given sys pipes as no longer in use by any GPU instance
1433  */
1434 NV_STATUS
1435 kmigmgrClearEnginesInUse_IMPL
1436 (
1437     OBJGPU *pGpu,
1438     KernelMIGManager *pKernelMIGManager,
1439     ENGTYPE_BIT_VECTOR *pEngines
1440 )
1441 {
1442     ENGTYPE_BIT_VECTOR tempEngines;
1443 
1444     NV_ASSERT_OR_RETURN(pEngines != NULL, NV_ERR_INVALID_ARGUMENT);
1445 
1446     bitVectorAnd(&tempEngines, pEngines, &pKernelMIGManager->partitionableEnginesInUse);
1447     // Ensure every engine in given mask is marked as in-use
1448     NV_ASSERT_OR_RETURN(bitVectorTestEqual(&tempEngines, pEngines), NV_ERR_STATE_IN_USE);
1449 
1450     // partitionableEnginesInUse &= ~(pEngines)
1451     bitVectorComplement(&tempEngines, pEngines);
1452     bitVectorAnd(&pKernelMIGManager->partitionableEnginesInUse,
1453                  &pKernelMIGManager->partitionableEnginesInUse,
1454                  &tempEngines);
1455     return NV_OK;
1456 }
1457 
1458 /*!
1459  * @brief   Checks whether given engine is in use by any GPU instance
1460  */
1461 NvBool
1462 kmigmgrIsEngineInUse_IMPL
1463 (
1464     OBJGPU *pGpu,
1465     KernelMIGManager *pKernelMIGManager,
1466     RM_ENGINE_TYPE rmEngineType
1467 )
1468 {
1469     return bitVectorTest(&pKernelMIGManager->partitionableEnginesInUse, rmEngineType);
1470 }
1471 
1472 /*
1473  * @brief   Determines whether RM_ENGINE_TYPE can be partitioned
1474  */
1475 NvBool
1476 kmigmgrIsEnginePartitionable_IMPL
1477 (
1478     OBJGPU *pGpu,
1479     KernelMIGManager *pKernelMIGManager,
1480     RM_ENGINE_TYPE rmEngineType
1481 )
1482 {
1483     return kmigmgrIsMIGSupported(pGpu, pKernelMIGManager) &&
1484            (RM_ENGINE_TYPE_IS_COPY(rmEngineType) ||
1485             RM_ENGINE_TYPE_IS_GR(rmEngineType) ||
1486             RM_ENGINE_TYPE_IS_NVDEC(rmEngineType) ||
1487             RM_ENGINE_TYPE_IS_NVENC(rmEngineType) ||
1488             RM_ENGINE_TYPE_IS_NVJPEG(rmEngineType) ||
1489             (rmEngineType == RM_ENGINE_TYPE_OFA));
1490 }
1491 
1492 /*!
1493  * @brief   Function to determine whether global RM_ENGINE_TYPE belongs to given
1494  *          gpu/compute instance.
1495  *
1496  * @return NV_TRUE if this engine falls within the given instance. NV_FALSE
1497  * otherwise. Non-partitioned engines fall within all instances.
1498  */
1499 NvBool
1500 kmigmgrIsEngineInInstance_IMPL
1501 (
1502     OBJGPU *pGpu,
1503     KernelMIGManager *pKernelMIGManager,
1504     RM_ENGINE_TYPE globalRmEngType,
1505     MIG_INSTANCE_REF ref
1506 )
1507 {
1508     RM_ENGINE_TYPE unused;
1509     return kmigmgrGetGlobalToLocalEngineType(pGpu, pKernelMIGManager, ref,
1510                                              globalRmEngType,
1511                                              &unused) == NV_OK;
1512 }
1513 
1514 /*!
1515  * @brief   Trim runlist buffer pools
1516  */
1517 void
1518 kmigmgrTrimInstanceRunlistBufPools_IMPL
1519 (
1520     OBJGPU *pGpu,
1521     KernelMIGManager *pKernelMIGManager,
1522     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance
1523 )
1524 {
1525     RM_ENGINE_TYPE rmEngineType;
1526     KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu);
1527 
1528     if (!kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, pKernelMIGGpuInstance->swizzId))
1529         return;
1530 
1531     if (!ctxBufPoolIsSupported(pGpu))
1532         return;
1533 
1534     for (rmEngineType = 0; rmEngineType < RM_ENGINE_TYPE_LAST; rmEngineType++)
1535     {
1536         if (!RM_ENGINE_TYPE_IS_VALID(rmEngineType) ||
1537             !kmigmgrIsEnginePartitionable(pGpu, pKernelMIGManager, rmEngineType) ||
1538             !kmigmgrIsEngineInInstance(pGpu, pKernelMIGManager, rmEngineType, kmigmgrMakeGIReference(pKernelMIGGpuInstance)))
1539         {
1540             continue;
1541         }
1542 
1543         if (kfifoGetRunlistBufPool(pGpu, pKernelFifo, rmEngineType) != NULL)
1544         {
1545             ctxBufPoolTrim(kfifoGetRunlistBufPool(pGpu, pKernelFifo, rmEngineType));
1546         }
1547     }
1548 }
1549 
1550 //
1551 // Creates runlist buffers for engines belonging to this GPU instance from non-partitionable memory and
1552 // recreates these runlist buffers in GPU instance's memory.
1553 //
1554 NV_STATUS
1555 kmigmgrCreateGPUInstanceRunlists_FWCLIENT
1556 (
1557     OBJGPU *pGpu,
1558     KernelMIGManager *pKernelMIGManager,
1559     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance
1560 )
1561 {
1562     KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu);
1563     NvU32 index;
1564     NvU32 runlistId;
1565     RM_ENGINE_TYPE rmEngineType;
1566     NvU32 engDesc;
1567     NV_STATUS status = NV_OK;
1568     NvU32 numEngines = kfifoGetNumEschedDrivenEngines(pKernelFifo);
1569     NvU32 maxRunlists = kfifoGetMaxNumRunlists_HAL(pGpu, pKernelFifo);
1570     NvU64 runlistAlign;
1571     NvU64 allocFlags;
1572     NvU32 attr;
1573     NV_ADDRESS_SPACE aperture;
1574     RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
1575     NV2080_CTRL_INTERNAL_FIFO_PROMOTE_RUNLIST_BUFFERS_PARAMS *pParams;
1576 
1577     // TODO: Mem partitioning check should suffice here
1578     if (!kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, pKernelMIGGpuInstance->swizzId) ||
1579         !ctxBufPoolIsSupported(pGpu))
1580     {
1581         return NV_OK;
1582     }
1583 
1584     kfifoRunlistGetBufAllocParams(pGpu, &aperture, &attr, &allocFlags);
1585     allocFlags |= MEMDESC_FLAGS_OWNED_BY_CTX_BUF_POOL;
1586 
1587     for (index = 0; index < numEngines; index++)
1588     {
1589         NV_ASSERT_OK_OR_GOTO(status,
1590             kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo,
1591                                      ENGINE_INFO_TYPE_INVALID, index,
1592                                      ENGINE_INFO_TYPE_RUNLIST, &runlistId),
1593             failed);
1594 
1595         if ((runlistId >= maxRunlists) || (runlistId >= NV_NBITS_IN_TYPE(pKernelMIGGpuInstance->runlistIdMask)))
1596         {
1597             status = NV_ERR_INVALID_STATE;
1598             goto failed;
1599         }
1600 
1601         // some engines share runlists. so skip if have already dealt with this runlist
1602         if ((pKernelMIGGpuInstance->runlistIdMask & NVBIT64(runlistId)) != 0x0)
1603         {
1604             continue;
1605         }
1606 
1607         NV_ASSERT_OK_OR_GOTO(status,
1608             kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo,
1609                                      ENGINE_INFO_TYPE_RUNLIST, runlistId,
1610                                      ENGINE_INFO_TYPE_RM_ENGINE_TYPE, (NvU32 *)&rmEngineType),
1611             failed);
1612 
1613         NV_ASSERT_OK_OR_GOTO(status,
1614             kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo,
1615                                      ENGINE_INFO_TYPE_RUNLIST, runlistId,
1616                                      ENGINE_INFO_TYPE_ENG_DESC, &engDesc),
1617             failed);
1618 
1619         // Check if this is a partitionable engine. Non-partitionable engine runlists can stay in RM reserved memory
1620         if (!kmigmgrIsEnginePartitionable(pGpu, pKernelMIGManager, rmEngineType))
1621         {
1622             continue;
1623         }
1624 
1625         // if partitionable engine doesn't belong to this GPU instance then nothing to do
1626         if (!kmigmgrIsEngineInInstance(pGpu, pKernelMIGManager, rmEngineType, kmigmgrMakeGIReference(pKernelMIGGpuInstance)))
1627         {
1628             continue;
1629         }
1630 
1631         //
1632         // Sched is only managed by Physical RM.
1633         // If running on GSP client, we will instead allocate the runlist buffers from the ctxbuf pool
1634         // and promote them to GSP later. GSP will skip the runlist buffer allocation during schedInit
1635         // and wait for the RPC to memdescDescribe the allocation from client RM.
1636         //
1637         // OBJSCHEDMGR is not valid in kernel RM. Allocate and store runlist buffers in OBJFIFO,
1638         // which will be sent to GSP to store in its schedmgr
1639         //
1640         NV_ASSERT_OK_OR_GOTO(status,
1641             kfifoRunlistAllocBuffers(pGpu, pKernelFifo,
1642                                      NV_TRUE,
1643                                      aperture,
1644                                      runlistId,
1645                                      attr,
1646                                      allocFlags,
1647                                      0,
1648                                      NV_TRUE,
1649                                      pKernelFifo->pppRunlistBufMemDesc[runlistId]),
1650             failed);
1651 
1652         // Add runlist to GPU instance
1653         pKernelMIGGpuInstance->runlistIdMask |= NVBIT64(runlistId);
1654     }
1655 
1656     runlistAlign = NVBIT64(kfifoRunlistGetBaseShift_HAL(pKernelFifo));
1657 
1658     pParams = portMemAllocNonPaged(sizeof(*pParams));
1659     NV_ASSERT_OR_GOTO(pParams != NULL, failed);
1660 
1661     ct_assert(sizeof(pParams->runlistIdMask) == sizeof(pKernelMIGGpuInstance->runlistIdMask));
1662     pParams->runlistIdMask = pKernelMIGGpuInstance->runlistIdMask;
1663     pParams->swizzId = pKernelMIGGpuInstance->swizzId;
1664 
1665     for (runlistId = 0; runlistId < maxRunlists; runlistId++)
1666     {
1667         if (pParams->runlistIdMask & NVBIT64(runlistId))
1668         {
1669             for (index = 0; index < NUM_BUFFERS_PER_RUNLIST; index++)
1670             {
1671                 MEMORY_DESCRIPTOR *pSourceMemDesc = pKernelFifo->pppRunlistBufMemDesc[runlistId][index];
1672 
1673                 pParams->rlBuffers[runlistId][index].base = (NvU64)memdescGetPhysAddr(pSourceMemDesc, AT_GPU, 0);
1674                 pParams->rlBuffers[runlistId][index].size = pSourceMemDesc->ActualSize;
1675                 pParams->rlBuffers[runlistId][index].alignment = runlistAlign;
1676                 pParams->rlBuffers[runlistId][index].addressSpace = memdescGetAddressSpace(pSourceMemDesc);
1677                 pParams->rlBuffers[runlistId][index].cpuCacheAttrib = attr;
1678 
1679             }
1680         }
1681     }
1682 
1683     status = pRmApi->Control(pRmApi,
1684                              pGpu->hInternalClient,
1685                              pGpu->hInternalSubdevice,
1686                              NV2080_CTRL_CMD_INTERNAL_FIFO_PROMOTE_RUNLIST_BUFFERS,
1687                              pParams,
1688                              sizeof(*pParams));
1689 
1690     portMemFree(pParams);
1691 
1692     NV_ASSERT_OK_OR_GOTO(status, status, failed);
1693 
1694     //
1695     // Trim out any additional memory after runlist buffers are allocated
1696     // from ctx buf pools
1697     //
1698     kmigmgrTrimInstanceRunlistBufPools(pGpu, pKernelMIGManager, pKernelMIGGpuInstance);
1699 
1700     return NV_OK;
1701 
1702 failed:
1703     NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(status,
1704         kmigmgrDeleteGPUInstanceRunlists_HAL(pGpu, pKernelMIGManager, pKernelMIGGpuInstance));
1705 
1706     return status;
1707 }
1708 
1709 //
1710 // Deletes runlist buffers for all partitionable engines from GPU instance's memory and
1711 // reallocates these runlist buffers in non-partitionable memory.
1712 //
1713 NV_STATUS
1714 kmigmgrDeleteGPUInstanceRunlists_FWCLIENT
1715 (
1716     OBJGPU *pGpu,
1717     KernelMIGManager *pKernelMIGManager,
1718     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance
1719 )
1720 {
1721     KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu);
1722     NvU32 runlistId;
1723     NV_STATUS status = NV_OK;
1724     NvU32 bufIdx;
1725     MEMORY_DESCRIPTOR **ppRlBuffer;
1726 
1727     if (!kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, pKernelMIGGpuInstance->swizzId) ||
1728         !ctxBufPoolIsSupported(pGpu))
1729     {
1730         NV_ASSERT_OR_RETURN(pKernelMIGGpuInstance->runlistIdMask == 0, NV_ERR_INVALID_STATE);
1731         return NV_OK;
1732     }
1733 
1734     FOR_EACH_INDEX_IN_MASK(64, runlistId, pKernelMIGGpuInstance->runlistIdMask)
1735     {
1736         for (bufIdx = 0; bufIdx < NUM_BUFFERS_PER_RUNLIST; bufIdx++)
1737         {
1738             ppRlBuffer = &(pKernelFifo->pppRunlistBufMemDesc[runlistId][bufIdx]);
1739 
1740             if (*ppRlBuffer != NULL)
1741             {
1742                 memdescFree(*ppRlBuffer);
1743                 memdescDestroy(*ppRlBuffer);
1744                 *ppRlBuffer = NULL;
1745             }
1746         }
1747 
1748         // remove runlist from GPU instance
1749         pKernelMIGGpuInstance->runlistIdMask &= ~(NVBIT64(runlistId));
1750 
1751     }
1752     FOR_EACH_INDEX_IN_MASK_END;
1753 
1754     return status;
1755 }
1756 
1757 /*!
1758  * @brief   Load MIG instance topology from persistence, if available.
1759  *          If MIG is disabled, this operation will be skipped with a warning.
1760  */
1761 NV_STATUS
1762 kmigmgrRestoreFromPersistence_PF
1763 (
1764     OBJGPU *pGpu,
1765     KernelMIGManager *pKernelMIGManager
1766 )
1767 {
1768     NV_STATUS status = NV_OK;
1769     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
1770     GPUMGR_SAVE_MIG_INSTANCE_TOPOLOGY *pTopologySave = NULL;
1771     NV2080_CTRL_INTERNAL_KMIGMGR_IMPORT_EXPORT_GPU_INSTANCE_PARAMS *pPartImportParams = NULL;
1772     NVC637_CTRL_EXEC_PARTITIONS_IMPORT_EXPORT_PARAMS *pExecPartImportParams = NULL;
1773     NvU32 GIIdx;
1774     NvU32 CIIdx;
1775     NvBool bTopologyValid;
1776     NvHandle hClient = NV01_NULL_OBJECT;
1777     NvHandle hDevice = NV01_NULL_OBJECT;
1778     NvHandle hSubdevice = NV01_NULL_OBJECT;
1779 
1780     NV_CHECK_OR_RETURN(LEVEL_SILENT,
1781                        gpumgrGetSystemMIGInstanceTopo(gpuGetDBDF(pGpu), &pTopologySave),
1782                        NV_OK);
1783 
1784     // Check to see whether there was actually anything saved
1785     for (GIIdx = 0; GIIdx < NV_ARRAY_ELEMENTS(pTopologySave->saveGI); ++GIIdx)
1786     {
1787         GPUMGR_SAVE_GPU_INSTANCE *pGPUInstanceSave = &pTopologySave->saveGI[GIIdx];
1788         if (pGPUInstanceSave->bValid)
1789             break;
1790     }
1791 
1792     bTopologyValid = (GIIdx < NV_ARRAY_ELEMENTS(pTopologySave->saveGI));
1793     NV_CHECK_OR_RETURN(LEVEL_SILENT, bTopologyValid, NV_OK);
1794 
1795     if (!IS_MIG_ENABLED(pGpu))
1796     {
1797         NV_PRINTF(LEVEL_WARNING, "Skipping reinitialization of persistent MIG instances due to MIG disablement!\n");
1798         //
1799         // If we ended up here, we have inconsistent state in that there are instances to be restored
1800         // but MIG is disabled. This also means, that /proc filesystem is populated with nodes for the
1801         // instances that we are expected to restore, but wont do so. Clean them up.
1802         //
1803         gpumgrUnregisterRmCapsForMIGGI(gpuGetDBDF(pGpu));
1804         return NV_OK;
1805     }
1806 
1807     NV_ASSERT_OK_OR_RETURN(
1808         rmapiutilAllocClientAndDeviceHandles(pRmApi, pGpu, &hClient, &hDevice, &hSubdevice));
1809 
1810     pPartImportParams = portMemAllocNonPaged(sizeof(*pPartImportParams));
1811     NV_CHECK_OR_ELSE(LEVEL_ERROR, pPartImportParams != NULL,
1812         status = NV_ERR_NO_MEMORY;
1813         goto cleanup; );
1814     pExecPartImportParams = portMemAllocNonPaged(sizeof(*pExecPartImportParams));
1815     NV_CHECK_OR_ELSE(LEVEL_ERROR, pExecPartImportParams != NULL,
1816         status = NV_ERR_NO_MEMORY;
1817         goto cleanup; );
1818 
1819     for (GIIdx = 0; GIIdx < NV_ARRAY_ELEMENTS(pTopologySave->saveGI); ++GIIdx)
1820     {
1821         GPUMGR_SAVE_GPU_INSTANCE *pGPUInstanceSave = &pTopologySave->saveGI[GIIdx];
1822         KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance;
1823 
1824         if (!pGPUInstanceSave->bValid)
1825             continue;
1826 
1827         portMemSet(pPartImportParams, 0, sizeof(*pPartImportParams));
1828         pPartImportParams->swizzId = pGPUInstanceSave->swizzId;
1829         portMemCopy(&pPartImportParams->info, sizeof(pPartImportParams->info),
1830                     &pGPUInstanceSave->giInfo, sizeof(pGPUInstanceSave->giInfo));
1831 
1832         NV_ASSERT_OK_OR_GOTO(status,
1833             pRmApi->Control(pRmApi,
1834                             hClient,
1835                             hSubdevice,
1836                             NV2080_CTRL_CMD_INTERNAL_KMIGMGR_IMPORT_GPU_INSTANCE,
1837                             pPartImportParams,
1838                             sizeof(*pPartImportParams)),
1839             cleanup);
1840 
1841         NV_ASSERT_OK_OR_GOTO(status,
1842             kmigmgrGetGPUInstanceInfo(pGpu, pKernelMIGManager, pGPUInstanceSave->swizzId, &pKernelMIGGpuInstance),
1843             cleanup);
1844 
1845         // Restore capability caps
1846         pKernelMIGGpuInstance->pOsRmCaps = pGPUInstanceSave->pOsRmCaps;
1847 
1848         for (CIIdx = 0; CIIdx < NV_ARRAY_ELEMENTS(pGPUInstanceSave->saveCI); ++CIIdx)
1849         {
1850             GPUMGR_SAVE_COMPUTE_INSTANCE *pComputeInstanceSave = &pGPUInstanceSave->saveCI[CIIdx];
1851             NvHandle hSubscription;
1852             NVC637_ALLOCATION_PARAMETERS alloc;
1853 
1854             if (!pComputeInstanceSave->bValid)
1855                 continue;
1856 
1857             portMemSet(&alloc, 0, sizeof(alloc));
1858             alloc.swizzId = pGPUInstanceSave->swizzId;
1859             NV_ASSERT_OK_OR_GOTO(status,
1860                 pRmApi->AllocWithSecInfo(pRmApi,
1861                                          hClient,
1862                                          hSubdevice,
1863                                          &hSubscription,
1864                                          AMPERE_SMC_PARTITION_REF,
1865                                          &alloc,
1866                                          sizeof(alloc),
1867                                          RMAPI_ALLOC_FLAGS_NONE,
1868                                          NULL,
1869                                          &pRmApi->defaultSecInfo),
1870                 cleanup);
1871 
1872             portMemSet(pExecPartImportParams, 0, sizeof(*pExecPartImportParams));
1873             pExecPartImportParams->id = pComputeInstanceSave->id;
1874             pExecPartImportParams->bCreateCap = NV_FALSE;
1875             portMemCopy(&pExecPartImportParams->info, sizeof(pExecPartImportParams->info),
1876                         &pComputeInstanceSave->ciInfo, sizeof(pComputeInstanceSave->ciInfo));
1877 
1878             NV_ASSERT_OK_OR_GOTO(status,
1879                 pRmApi->Control(pRmApi,
1880                                 hClient,
1881                                 hSubscription,
1882                                 NVC637_CTRL_CMD_EXEC_PARTITIONS_IMPORT,
1883                                 pExecPartImportParams,
1884                                 sizeof(*pExecPartImportParams)),
1885                 cleanup);
1886 
1887             // Restore capability caps
1888             pKernelMIGGpuInstance->MIGComputeInstance[pExecPartImportParams->id].pOsRmCaps = pComputeInstanceSave->pOsRmCaps;
1889 
1890             pRmApi->Free(pRmApi, hClient, hSubscription);
1891         }
1892     }
1893 
1894 cleanup:
1895     rmapiutilFreeClientAndDeviceHandles(pRmApi, &hClient, &hDevice, &hSubdevice);
1896     portMemFree(pPartImportParams);
1897     portMemFree(pExecPartImportParams);
1898 
1899     //
1900     // Let stateUnload handle an error teardown case, since it has to be
1901     // coordinated between CPU/GSP
1902     //
1903     return status;
1904 }
1905 
1906 /*!
1907  * @brief   Load MIG instance topology from persistence, if available.
1908  *          If MIG is disabled, this operation will be skipped with a warning.
1909  */
1910 NV_STATUS
1911 kmigmgrRestoreFromPersistence_VF
1912 (
1913     OBJGPU *pGpu,
1914     KernelMIGManager *pKernelMIGManager
1915 )
1916 {
1917     NV_STATUS status = NV_OK;
1918     GPUMGR_SAVE_MIG_INSTANCE_TOPOLOGY *pTopologySave = NULL;
1919     NvU32 GIIdx;
1920     NvU32 CIIdx;
1921     NvBool bTopologyValid;
1922     NvBool bMemoryPartitioningNeeded;
1923     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGPUInstance;
1924 
1925     NV_CHECK_OR_RETURN(LEVEL_SILENT,
1926                        gpumgrGetSystemMIGInstanceTopo(gpuGetDBDF(pGpu), &pTopologySave),
1927                        NV_OK);
1928 
1929     // Check to see whether there was actually anything saved
1930     for (GIIdx = 0; GIIdx < NV_ARRAY_ELEMENTS(pTopologySave->saveGI); ++GIIdx)
1931     {
1932         GPUMGR_SAVE_GPU_INSTANCE *pGPUInstanceSave = &pTopologySave->saveGI[GIIdx];
1933         if (pGPUInstanceSave->bValid)
1934             break;
1935     }
1936 
1937     bTopologyValid = (GIIdx < NV_ARRAY_ELEMENTS(pTopologySave->saveGI));
1938     NV_CHECK_OR_RETURN(LEVEL_SILENT, bTopologyValid, NV_OK);
1939 
1940     if (!IS_MIG_ENABLED(pGpu))
1941     {
1942         NV_PRINTF(LEVEL_WARNING, "Skipping reinitialization of persistent MIG instances due to MIG disablement!\n");
1943         gpumgrUnregisterRmCapsForMIGGI(gpuGetDBDF(pGpu));
1944         return NV_OK;
1945     }
1946 
1947     bMemoryPartitioningNeeded = kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, pTopologySave->saveGI[0].swizzId);
1948 
1949     // Perform all initialization that must be done when MIG is first enabled
1950     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
1951         kmigmgrSetMIGState(pGpu, pKernelMIGManager, bMemoryPartitioningNeeded, NV_TRUE, NV_FALSE));
1952 
1953     for (GIIdx = 0; GIIdx < NV_ARRAY_ELEMENTS(pTopologySave->saveGI); ++GIIdx)
1954     {
1955         KERNEL_MIG_GPU_INSTANCE *pKernelMIGGPUInstance;
1956         GPUMGR_SAVE_GPU_INSTANCE *pGPUInstanceSave = &pTopologySave->saveGI[GIIdx];
1957         KMIGMGR_CREATE_GPU_INSTANCE_PARAMS restore =
1958         {
1959             .type = KMIGMGR_CREATE_GPU_INSTANCE_PARAMS_TYPE_RESTORE,
1960             .inst.restore.pGPUInstanceSave = pGPUInstanceSave
1961         };
1962         NvU32 swizzId;
1963 
1964         if (!pGPUInstanceSave->bValid)
1965             continue;
1966 
1967         // Create a GPU instance using the saved data
1968         NV_CHECK_OK_OR_GOTO(status, LEVEL_WARNING,
1969             kmigmgrCreateGPUInstance(pGpu, pKernelMIGManager, &swizzId, restore, NV_TRUE, NV_FALSE),
1970             fail);
1971 
1972         NV_ASSERT_OK_OR_GOTO(status,
1973             kmigmgrGetGPUInstanceInfo(pGpu, pKernelMIGManager, swizzId, &pKernelMIGGPUInstance),
1974             fail);
1975 
1976         // Restore capability caps
1977         pKernelMIGGPUInstance->pOsRmCaps = pGPUInstanceSave->pOsRmCaps;
1978 
1979         for (CIIdx = 0; CIIdx < NV_ARRAY_ELEMENTS(pGPUInstanceSave->saveCI); ++CIIdx)
1980         {
1981             GPUMGR_SAVE_COMPUTE_INSTANCE *pComputeInstanceSave = &pGPUInstanceSave->saveCI[CIIdx];
1982             KMIGMGR_CREATE_COMPUTE_INSTANCE_PARAMS restore =
1983             {
1984                 .type = KMIGMGR_CREATE_COMPUTE_INSTANCE_PARAMS_TYPE_RESTORE,
1985                 .inst.restore.pComputeInstanceSave = pComputeInstanceSave
1986             };
1987             //
1988             // This id variable actually doesn't need to be initialized since the callee
1989             // is not referencing to its value. But GCC13 is unhappy with that, thus WAR
1990             // this issue by initializing it.
1991             //
1992             NvU32 id = pComputeInstanceSave->id;
1993 
1994             if (!pComputeInstanceSave->bValid)
1995                 continue;
1996 
1997             // Create a compute instance on this GPU instance using the saved data
1998             NV_CHECK_OK_OR_GOTO(status, LEVEL_WARNING,
1999                 kmigmgrCreateComputeInstances_HAL(pGpu, pKernelMIGManager, pKernelMIGGPUInstance, NV_FALSE, restore, &id, NV_FALSE),
2000                 fail);
2001 
2002             // Restore capability caps
2003             pKernelMIGGPUInstance->MIGComputeInstance[id].pOsRmCaps = pComputeInstanceSave->pOsRmCaps;
2004         }
2005     }
2006 
2007     return NV_OK;
2008 
2009 fail:
2010 
2011     // Clean up anything we created and bail
2012     FOR_EACH_VALID_GPU_INSTANCE(pGpu, pKernelMIGManager, pKernelMIGGPUInstance)
2013     {
2014         for (CIIdx = 0; CIIdx < NV_ARRAY_ELEMENTS(pKernelMIGGPUInstance->MIGComputeInstance); ++CIIdx)
2015         {
2016             MIG_COMPUTE_INSTANCE *pMIGComputeInstance = &pKernelMIGGPUInstance->MIGComputeInstance[CIIdx];
2017 
2018             // Skip invalid compute instances
2019             if (!pMIGComputeInstance->bValid)
2020                 continue;
2021 
2022             NV_CHECK_OK_OR_CAPTURE_FIRST_ERROR(status, LEVEL_ERROR,
2023                 kmigmgrDeleteComputeInstance(pGpu, pKernelMIGManager, pKernelMIGGPUInstance, CIIdx, NV_TRUE));
2024         }
2025 
2026         NV_CHECK_OK_OR_CAPTURE_FIRST_ERROR(status, LEVEL_ERROR,
2027             kmigmgrInvalidateGPUInstance(pGpu, pKernelMIGManager, pKernelMIGGPUInstance->swizzId, NV_TRUE));
2028     }
2029     FOR_EACH_VALID_GPU_INSTANCE_END();
2030 
2031     NV_CHECK_OK_OR_CAPTURE_FIRST_ERROR(status, LEVEL_ERROR,
2032         kmigmgrSetMIGState(pGpu, pKernelMIGManager, bMemoryPartitioningNeeded, NV_FALSE, NV_FALSE));
2033 
2034     return status;
2035 }
2036 
2037 /*
2038  * @brief Initialize MIG gpu instance
2039  */
2040 void
2041 kmigmgrInitGPUInstanceInfo_IMPL
2042 (
2043     OBJGPU *pGpu,
2044     KernelMIGManager *pKernelMIGManager,
2045     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance
2046 )
2047 {
2048     NvU32 i;
2049 
2050     bitVectorClrAll(&pKernelMIGGpuInstance->exclusiveEngMask);
2051     bitVectorClrAll(&pKernelMIGGpuInstance->sharedEngMask);
2052 
2053     for (i = 0; i < NV_ARRAY_ELEMENTS(pKernelMIGGpuInstance->MIGComputeInstance); ++i)
2054     {
2055         NV_ASSERT(!pKernelMIGGpuInstance->MIGComputeInstance[i].bValid);
2056         pKernelMIGGpuInstance->MIGComputeInstance[i].pOsRmCaps = NULL;
2057         pKernelMIGGpuInstance->MIGComputeInstance[i].id = KMIGMGR_COMPUTE_INSTANCE_ID_INVALID;
2058     }
2059 
2060     pKernelMIGGpuInstance->swizzId = KMIGMGR_SWIZZID_INVALID;
2061     pKernelMIGGpuInstance->hMemory = NV01_NULL_OBJECT;
2062     pKernelMIGGpuInstance->pShare = NULL;
2063     pKernelMIGGpuInstance->pMemoryPartitionHeap = NULL;
2064     pKernelMIGGpuInstance->bValid = NV_FALSE;
2065     pKernelMIGGpuInstance->memRange = NV_RANGE_EMPTY;
2066     pKernelMIGGpuInstance->pMIGGpuInstance = NULL;
2067     pKernelMIGGpuInstance->pOsRmCaps = NULL;
2068     pKernelMIGGpuInstance->pProfile = NULL;
2069 
2070     portMemSet(&pKernelMIGGpuInstance->resourceAllocation, 0x0, sizeof(pKernelMIGGpuInstance->resourceAllocation));
2071 }
2072 
2073 /*!
2074  * @brief   Function to set device profiling in use
2075  */
2076 NV_STATUS
2077 kmigmgrSetDeviceProfilingInUse_IMPL
2078 (
2079     OBJGPU *pGpu,
2080     KernelMIGManager *pKernelMIGManager
2081 )
2082 {
2083     NV_ASSERT_OR_RETURN(!kmigmgrIsDeviceProfilingInUse(pGpu, pKernelMIGManager),
2084                         NV_ERR_STATE_IN_USE);
2085     pKernelMIGManager->bDeviceProfilingInUse = NV_TRUE;
2086     return NV_OK;
2087 }
2088 
2089 /*!
2090  * @brief   Function to clear device profiling in-use
2091  */
2092 void
2093 kmigmgrClearDeviceProfilingInUse_IMPL
2094 (
2095     OBJGPU *pGpu,
2096     KernelMIGManager *pKernelMIGManager
2097 )
2098 {
2099     pKernelMIGManager->bDeviceProfilingInUse = NV_FALSE;
2100 }
2101 
2102 /*!
2103  * @brief   Function to check if device profiling is in-use
2104  */
2105 NvBool
2106 kmigmgrIsDeviceProfilingInUse_IMPL
2107 (
2108     OBJGPU *pGpu,
2109     KernelMIGManager *pKernelMIGManager
2110 )
2111 {
2112     return pKernelMIGManager->bDeviceProfilingInUse;
2113 }
2114 
2115 /*!
2116  * @brief   Function to check if specific client is subscribed to DeviceProfiling
2117  */
2118 NvBool
2119 kmigmgrIsClientUsingDeviceProfiling_IMPL
2120 (
2121     OBJGPU *pGpu,
2122     KernelMIGManager *pKernelMIGManager,
2123     NvHandle hClient
2124 )
2125 {
2126     RsClient *pRsClient;
2127     Device *pDevice;
2128     NV_STATUS status;
2129 
2130     NV_CHECK_OR_RETURN(LEVEL_SILENT, IS_MIG_ENABLED(pGpu), NV_FALSE);
2131 
2132     if (!kmigmgrIsDeviceProfilingInUse(pGpu, pKernelMIGManager))
2133     {
2134         return NV_FALSE;
2135     }
2136 
2137     NV_CHECK_OK_OR_ELSE(status, LEVEL_ERROR,
2138         serverGetClientUnderLock(&g_resServ, hClient, &pRsClient),
2139         return NV_FALSE; );
2140 
2141     NV_CHECK_OK_OR_ELSE(status, LEVEL_ERROR,
2142         deviceGetByGpu(pRsClient, pGpu, NV_TRUE, &pDevice),
2143         return NV_FALSE; );
2144 
2145     return kmigmgrIsDeviceUsingDeviceProfiling(pGpu, pKernelMIGManager, pDevice);
2146 }
2147 
2148 /*!
2149  * @brief   Function to check if specific device is subscribed to DeviceProfiling
2150  */
2151 NvBool
2152 kmigmgrIsDeviceUsingDeviceProfiling_IMPL
2153 (
2154     OBJGPU *pGpu,
2155     KernelMIGManager *pKernelMIGManager,
2156     Device *pDevice
2157 )
2158 {
2159     RsClient *pRsClient;
2160     GPUInstanceSubscription *pGPUInstanceSubscription;
2161     Subdevice *pSubdevice;
2162     NV_STATUS status;
2163 
2164     NV_CHECK_OR_RETURN(LEVEL_SILENT, IS_MIG_ENABLED(pGpu), NV_FALSE);
2165 
2166     if (!kmigmgrIsDeviceProfilingInUse(pGpu, pKernelMIGManager))
2167     {
2168         return NV_FALSE;
2169     }
2170 
2171     pRsClient = RES_GET_CLIENT(pDevice);
2172 
2173     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
2174         subdeviceGetByInstance(pRsClient, RES_GET_HANDLE(pDevice), 0, &pSubdevice));
2175 
2176     NV_CHECK_OK_OR_ELSE(status, LEVEL_ERROR,
2177         gisubscriptionGetGPUInstanceSubscription(pRsClient, RES_GET_HANDLE(pSubdevice), &pGPUInstanceSubscription),
2178         return NV_FALSE; );
2179 
2180     return gisubscriptionIsDeviceProfiling(pGPUInstanceSubscription);
2181 }
2182 
2183 /*!
2184  * @brief enable all LCE engines for use by GPU instances
2185  */
2186 NV_STATUS
2187 kmigmgrEnableAllLCEs_IMPL
2188 (
2189     OBJGPU *pGpu,
2190     KernelMIGManager *pKernelMIGManager,
2191     NvBool bEnableAllLCEs
2192 )
2193 {
2194     KernelCE *pKCe = NULL;
2195 
2196     //
2197     // AMODEL support of CEs is faked. No actual work needs to be done for
2198     // AMODEL here, so just return NV_OK early to avoid triggering assertions.
2199     //
2200     NV_CHECK_OR_RETURN(LEVEL_SILENT, !IsAMODEL(pGpu), NV_OK);
2201 
2202     NV_ASSERT_OK_OR_RETURN(kceFindFirstInstance(pGpu, &pKCe));
2203 
2204     if (bEnableAllLCEs)
2205         NV_ASSERT_OK_OR_RETURN(kceUpdateClassDB_HAL(pGpu, pKCe));
2206     else
2207         NV_ASSERT_OK_OR_RETURN(kceTopLevelPceLceMappingsUpdate(pGpu, pKCe));
2208 
2209     return NV_OK;
2210 }
2211 
2212 /*!
2213  * @brief   Retrieves instance(s) associated with a device, if applicable
2214  */
2215 NV_STATUS
2216 kmigmgrGetInstanceRefFromDevice_IMPL
2217 (
2218     OBJGPU *pGpu,
2219     KernelMIGManager *pKernelMIGManager,
2220     Device *pDevice,
2221     MIG_INSTANCE_REF *pRef
2222 )
2223 {
2224     NV_STATUS status = NV_OK;
2225     RsClient *pRsClient;
2226     GPUInstanceSubscription *pGPUInstanceSubscription;
2227     ComputeInstanceSubscription *pComputeInstanceSubscription = NULL;
2228     Subdevice *pSubdevice;
2229     MIG_INSTANCE_REF ref;
2230 
2231     NV_ASSERT_OR_RETURN(pRef != NULL, NV_ERR_INVALID_ARGUMENT);
2232     *pRef = kmigmgrMakeNoMIGReference();
2233 
2234     if (!IS_MIG_IN_USE(pGpu))
2235     {
2236         return NV_ERR_INVALID_STATE;
2237     }
2238 
2239     pRsClient = RES_GET_CLIENT(pDevice);
2240 
2241     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
2242         subdeviceGetByInstance(pRsClient, RES_GET_HANDLE(pDevice), 0, &pSubdevice));
2243 
2244     NV_CHECK_OK_OR_RETURN(LEVEL_NOTICE,
2245         gisubscriptionGetGPUInstanceSubscription(pRsClient, RES_GET_HANDLE(pSubdevice),
2246                                                  &pGPUInstanceSubscription));
2247 
2248     ref.pKernelMIGGpuInstance = pGPUInstanceSubscription->pKernelMIGGpuInstance;
2249 
2250     status = cisubscriptionGetComputeInstanceSubscription(pRsClient,
2251                                                           RES_GET_HANDLE(pGPUInstanceSubscription),
2252                                                           &pComputeInstanceSubscription);
2253     if (status == NV_OK)
2254     {
2255         ref = kmigmgrMakeCIReference(pGPUInstanceSubscription->pKernelMIGGpuInstance,
2256                                    pComputeInstanceSubscription->pMIGComputeInstance);
2257     }
2258     else
2259     {
2260         ref = kmigmgrMakeGIReference(pGPUInstanceSubscription->pKernelMIGGpuInstance);
2261         // Quash status, this is optional
2262         status = NV_OK;
2263     }
2264 
2265     NV_CHECK_OR_RETURN(LEVEL_SILENT, kmigmgrIsMIGReferenceValid(&ref), NV_ERR_INVALID_STATE);
2266     *pRef = ref;
2267     return status;
2268 }
2269 
2270 /*!
2271  * @brief   Retrieves instance(s) associated with a client, if applicable
2272  */
2273 NV_STATUS
2274 kmigmgrGetInstanceRefFromClient_IMPL
2275 (
2276     OBJGPU *pGpu,
2277     KernelMIGManager *pKernelMIGManager,
2278     NvHandle hClient,
2279     MIG_INSTANCE_REF *pRef
2280 )
2281 {
2282     RsClient *pRsClient;
2283     Device *pDevice;
2284 
2285     NV_ASSERT_OR_RETURN(pRef != NULL, NV_ERR_INVALID_ARGUMENT);
2286     *pRef = kmigmgrMakeNoMIGReference();
2287 
2288     if (!IS_MIG_IN_USE(pGpu))
2289     {
2290         return NV_ERR_INVALID_STATE;
2291     }
2292 
2293     NV_ASSERT_OK_OR_RETURN(serverGetClientUnderLock(&g_resServ, hClient, &pRsClient));
2294 
2295     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
2296         deviceGetByGpu(pRsClient, pGpu, NV_TRUE, &pDevice));
2297 
2298     return kmigmgrGetInstanceRefFromDevice(pGpu, pKernelMIGManager,
2299                                            pDevice, pRef);
2300 }
2301 
2302 /*!
2303  * @brief   Retrieves GPU instance heap associated with a device, if applicable
2304  */
2305 NV_STATUS
2306 kmigmgrGetMemoryPartitionHeapFromDevice_IMPL
2307 (
2308     OBJGPU           *pGpu,
2309     KernelMIGManager *pKernelMIGManager,
2310     Device           *pDevice,
2311     Heap            **ppMemoryPartitionHeap
2312 )
2313 {
2314     MIG_INSTANCE_REF ref;
2315     NV_STATUS rmStatus = NV_OK;
2316     NvHandle hClient;
2317 
2318     NV_ASSERT_OR_RETURN(IS_MIG_IN_USE(pGpu), NV_ERR_INVALID_STATE);
2319 
2320     hClient = RES_GET_CLIENT_HANDLE(pDevice);
2321 
2322     rmStatus = kmigmgrGetInstanceRefFromDevice(pGpu, pKernelMIGManager, pDevice, &ref);
2323     if ((rmStatus != NV_OK) || !kmigmgrIsMIGReferenceValid(&ref))
2324     {
2325         RS_PRIV_LEVEL privLevel = rmclientGetCachedPrivilegeByHandle(hClient);
2326 
2327         // It's okay for kernel/root clients to not be associated to a GPU instance
2328         if (privLevel >= RS_PRIV_LEVEL_KERNEL)
2329         {
2330             rmStatus = NV_OK;
2331         }
2332         else
2333         {
2334             NV_PRINTF(LEVEL_ERROR,
2335                       "Failed to get GPU instance for non-privileged client hClient=0x%08x!\n",
2336                       hClient);
2337 
2338             // if we got here due to a bogus GPU instance info, actually return an error
2339             if (rmStatus == NV_OK)
2340                 rmStatus = NV_ERR_INVALID_STATE;
2341         }
2342     }
2343     else
2344     {
2345         NV_ASSERT_OR_RETURN(ppMemoryPartitionHeap != NULL, NV_ERR_INVALID_ARGUMENT);
2346         *ppMemoryPartitionHeap = ref.pKernelMIGGpuInstance->pMemoryPartitionHeap;
2347         NV_PRINTF(LEVEL_INFO,
2348                   "GPU instance heap found for hClient = 0x%08x with swizzId = %d!\n",
2349                   hClient, ref.pKernelMIGGpuInstance->swizzId);
2350     }
2351 
2352     return rmStatus;
2353 }
2354 
2355 /*!
2356  * @brief   Retrieves swizzid associated with a client, if applicable
2357  */
2358 NV_STATUS
2359 kmigmgrGetSwizzIdFromDevice_IMPL
2360 (
2361     OBJGPU *pGpu,
2362     KernelMIGManager *pKernelMIGManager,
2363     Device *pDevice,
2364     NvU32 *pSwizzId
2365 )
2366 {
2367     MIG_INSTANCE_REF ref;
2368     NV_ASSERT_OK_OR_RETURN(
2369         kmigmgrGetInstanceRefFromDevice(pGpu, pKernelMIGManager, pDevice, &ref));
2370 
2371     *pSwizzId = ref.pKernelMIGGpuInstance->swizzId;
2372     return NV_OK;
2373 }
2374 
2375 /*!
2376  * @brief   Printout properties of specified MIG gpu instance
2377  */
2378 void
2379 kmigmgrPrintGPUInstanceInfo_IMPL
2380 (
2381     OBJGPU *pGpu,
2382     KernelMIGManager *pKernelMIGManager,
2383     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance
2384 )
2385 {
2386 #if NV_PRINTF_LEVEL_ENABLED(LEVEL_INFO)
2387     NV_STATUS status;
2388     const MIG_GPU_INSTANCE_MEMORY_CONFIG *pGPUInstanceMemConfig;
2389     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
2390     KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
2391     NV_RANGE partitionableMemoryRange = memmgrGetMIGPartitionableMemoryRange(pGpu, pMemoryManager);
2392 
2393     NvU32 grCount = kmigmgrCountEnginesOfType(&pKernelMIGGpuInstance->resourceAllocation.engines,
2394                                               RM_ENGINE_TYPE_GR(0));
2395     NvU32 ceCount = kmigmgrCountEnginesOfType(&pKernelMIGGpuInstance->resourceAllocation.engines,
2396                                               RM_ENGINE_TYPE_COPY(0));
2397     NvU32 decCount = kmigmgrCountEnginesOfType(&pKernelMIGGpuInstance->resourceAllocation.engines,
2398                                                RM_ENGINE_TYPE_NVDEC(0));
2399     NvU32 encCount = kmigmgrCountEnginesOfType(&pKernelMIGGpuInstance->resourceAllocation.engines,
2400                                                RM_ENGINE_TYPE_NVENC(0));
2401     NvU32 jpgCount = kmigmgrCountEnginesOfType(&pKernelMIGGpuInstance->resourceAllocation.engines,
2402                                                RM_ENGINE_TYPE_NVJPG);
2403     NvU32 ofaCount = kmigmgrCountEnginesOfType(&pKernelMIGGpuInstance->resourceAllocation.engines,
2404                                                RM_ENGINE_TYPE_OFA);
2405 
2406 #define PADDING_STR "-----------------------------------------------------------------"
2407 
2408     NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR);
2409     NV_PRINTF(LEVEL_INFO, "| %18s | %18s | %18s  |\n",
2410               "SwizzId",
2411               "SwizzId Table Mask",
2412               "Gpc Count");
2413     NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR);
2414     NV_PRINTF(LEVEL_INFO, "| %18d | %18s | %18d  |\n",
2415               pKernelMIGGpuInstance->swizzId,
2416               "NOT IMPLEMENTED",
2417               pKernelMIGGpuInstance->resourceAllocation.gpcCount);
2418     NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR);
2419     NV_PRINTF(LEVEL_INFO, "| %18s | %18s | %18s  |\n",
2420               "OBJGR Count",
2421               "OBJCE Count",
2422               "NVDEC Count");
2423     NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR);
2424     NV_PRINTF(LEVEL_INFO, "| %18d | %18d | %18d  |\n",
2425               grCount,
2426               ceCount,
2427               decCount);
2428     NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR);
2429     NV_PRINTF(LEVEL_INFO, "| %18s | %18s | %18s  |\n",
2430               "NVENC Count",
2431               "NVJPG Count",
2432               "NVOFA Count");
2433     NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR);
2434     NV_PRINTF(LEVEL_INFO, "| %18d | %18d | %18d  |\n",
2435               encCount,
2436               jpgCount,
2437               ofaCount);
2438     NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR);
2439     NV_PRINTF(LEVEL_INFO, "| %18s | %18s | %18s  |\n",
2440               "VEID Offset",
2441               "VEID Count",
2442               "VEID-GR Map");
2443     NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR);
2444     NV_PRINTF(LEVEL_INFO, "| %18d | %18d | %18llx  |\n",
2445               pKernelMIGGpuInstance->resourceAllocation.veidOffset,
2446               pKernelMIGGpuInstance->resourceAllocation.veidCount,
2447               DRF_MASK64(pKernelMIGGpuInstance->resourceAllocation.veidCount : 0) << pKernelMIGGpuInstance->resourceAllocation.veidOffset);
2448     NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR);
2449     NV_PRINTF(LEVEL_INFO, "| %29s | %29s |\n",
2450               "Partitionable",
2451               "Partitionable");
2452     NV_PRINTF(LEVEL_INFO, "| %29s | %29s |\n",
2453               "Memory Start Addr",
2454               "Memory End Addr");
2455     NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR);
2456     NV_PRINTF(LEVEL_INFO, "| %29llx | %29llx |\n",
2457               partitionableMemoryRange.lo,
2458               partitionableMemoryRange.hi);
2459     NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR);
2460     NV_PRINTF(LEVEL_INFO, "| %18s | %18s | %18s  |\n",
2461               "Local Instance",
2462               "Local Instance",
2463               "Local Instance");
2464     NV_PRINTF(LEVEL_INFO, "| %18s | %18s | %18s  |\n",
2465               "Memory Start Addr",
2466               "Memory End Addr",
2467               "Size in Bytes");
2468     NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR);
2469     NV_PRINTF(LEVEL_INFO, "| %18llx | %18llx | %18llx  |\n",
2470               pKernelMIGGpuInstance->memRange.lo,
2471               pKernelMIGGpuInstance->memRange.hi,
2472               rangeLength(pKernelMIGGpuInstance->memRange));
2473     NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR);
2474     NV_PRINTF(LEVEL_INFO, "| %18s | %18s | %18s  |\n",
2475               "Local Instance",
2476               "Local Instance",
2477               "Local Instance");
2478     NV_PRINTF(LEVEL_INFO, "| %18s | %18s | %18s  |\n",
2479               "Start VMMU Seg.",
2480               "End VMMU Seg.",
2481               "Size in VMMU Seg.");
2482     NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR);
2483 
2484     NV_ASSERT_OK_OR_ELSE(status,
2485         kmemsysGetMIGGPUInstanceMemConfigFromSwizzId(pGpu, pKernelMemorySystem, pKernelMIGGpuInstance->swizzId, &pGPUInstanceMemConfig),
2486         return;);
2487     NV_PRINTF(LEVEL_INFO, "| %18llx | %18llx | %18llx  |\n",
2488               pGPUInstanceMemConfig->startingVmmuSegment,
2489               (pGPUInstanceMemConfig->startingVmmuSegment +
2490                pGPUInstanceMemConfig->memSizeInVmmuSegment) - 1,
2491               pGPUInstanceMemConfig->memSizeInVmmuSegment);
2492     NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR);
2493 #undef PADDING_STR
2494 #endif // NV_PRINTF_LEVEL_ENABLED(LEVEL_INFO)
2495 }
2496 
2497 /*!
2498  * @brief   Function to set GPU instance information representing provided swizzId.
2499  */
2500 NV_STATUS
2501 kmigmgrSetGPUInstanceInfo_IMPL
2502 (
2503     OBJGPU *pGpu,
2504     KernelMIGManager *pKernelMIGManager,
2505     NvU32 swizzId,
2506     KMIGMGR_CREATE_GPU_INSTANCE_PARAMS params
2507 )
2508 {
2509     NvU32 i;
2510     NvHandle hMemory = NV01_NULL_OBJECT;
2511     NV_RANGE addrRange = NV_RANGE_EMPTY;
2512     NV_STATUS rmStatus = NV_OK;
2513     Heap *pMemoryPartitionHeap = NULL;
2514     NvU32 partitionFlag = (params.type == KMIGMGR_CREATE_GPU_INSTANCE_PARAMS_TYPE_REQUEST)
2515         ? params.inst.request.partitionFlag
2516         : params.inst.restore.pGPUInstanceSave->giInfo.partitionFlags;
2517 
2518     if (swizzId >= KMIGMGR_MAX_GPU_SWIZZID)
2519     {
2520         return NV_ERR_INVALID_ARGUMENT;
2521     }
2522 
2523     for (i = 0; i < KMIGMGR_MAX_GPU_INSTANCES; ++i)
2524     {
2525         KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance = &pKernelMIGManager->kernelMIGGpuInstance[i];
2526 
2527         // Find first invalid GPU instance and use it to save GPU instance data
2528         if (!pKernelMIGGpuInstance->bValid)
2529         {
2530             MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
2531 
2532             rmStatus = memmgrAllocMIGGPUInstanceMemory_HAL(pGpu, pMemoryManager, swizzId,
2533                                                            &hMemory, &addrRange,
2534                                                            &pMemoryPartitionHeap);
2535             NV_CHECK_OR_RETURN(LEVEL_ERROR, rmStatus == NV_OK, rmStatus);
2536 
2537             // Mark GPU instance as valid as we use GPU instance Invalidation for cleanup
2538             pKernelMIGGpuInstance->bValid = NV_TRUE;
2539             pKernelMIGGpuInstance->swizzId = swizzId;
2540             pKernelMIGGpuInstance->hMemory = hMemory;
2541             pKernelMIGGpuInstance->memRange = addrRange;
2542             pKernelMIGGpuInstance->pMemoryPartitionHeap = pMemoryPartitionHeap;
2543             pKernelMIGGpuInstance->partitionFlag = partitionFlag;
2544 
2545             //
2546             // Offloading of VGPU to GSP requires that the memRange in KERNEL_MIG_GPU_INSTANCE
2547             // be populated, as the plugin will query only within GSP for GPU INSTANCE information.
2548             // CPU-RM is the entity which actually calculates and allocates memory, so with
2549             // VGPU offloaded, GSP-RM must be updated with the memRange info.
2550             //
2551             if (IS_GSP_CLIENT(pGpu) && !IS_VIRTUAL(pGpu) && IS_VGPU_GSP_PLUGIN_OFFLOAD_ENABLED(pGpu))
2552             {
2553                 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
2554                 NV2080_CTRL_INTERNAL_KMIGMGR_PROMOTE_GPU_INSTANCE_MEM_RANGE_PARAMS memParams;
2555 
2556                 memParams.swizzId = pKernelMIGGpuInstance->swizzId;
2557                 memParams.memAddrRange.lo = pKernelMIGGpuInstance->memRange.lo;
2558                 memParams.memAddrRange.hi = pKernelMIGGpuInstance->memRange.hi;
2559                 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
2560                     pRmApi->Control(pRmApi,
2561                                     pGpu->hInternalClient,
2562                                     pGpu->hInternalSubdevice,
2563                                     NV2080_CTRL_CMD_INTERNAL_KMIGMGR_PROMOTE_GPU_INSTANCE_MEM_RANGE,
2564                                     &memParams,
2565                                     sizeof(memParams)));
2566             }
2567 
2568             NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
2569                 kmigmgrGetProfileByPartitionFlag(pGpu, pKernelMIGManager, partitionFlag, &pKernelMIGGpuInstance->pProfile));
2570 
2571             // Allocate RsShared for the GPU instance
2572             NV_ASSERT_OK_OR_RETURN(serverAllocShare(&g_resServ, classInfo(RsShared),
2573                                                     &pKernelMIGGpuInstance->pShare));
2574 
2575             // Get resources associated with this swizzId
2576             NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
2577                 kmigmgrSwizzIdToResourceAllocation(pGpu, pKernelMIGManager, swizzId, params,
2578                                                    pKernelMIGGpuInstance,
2579                                                    &pKernelMIGGpuInstance->resourceAllocation));
2580 
2581             pKernelMIGGpuInstance->resourceAllocation.gfxGpcCount = pKernelMIGGpuInstance->pProfile->gfxGpcCount;
2582 
2583             // Set assigned engines as in use
2584             NV_ASSERT_OK_OR_RETURN(
2585                 kmigmgrSetEnginesInUse(pGpu, pKernelMIGManager, &pKernelMIGGpuInstance->resourceAllocation.engines));
2586 
2587             // Update engine tracking bitmasks for CI management later
2588             bitVectorClrAll(&pKernelMIGGpuInstance->exclusiveEngMask);
2589             bitVectorClrAll(&pKernelMIGGpuInstance->sharedEngMask);
2590 
2591             // Print GPU instance info for debug
2592             NV_PRINTF(LEVEL_INFO, "CREATING GPU instance\n");
2593             kmigmgrPrintGPUInstanceInfo(pGpu, pKernelMIGManager, pKernelMIGGpuInstance);
2594 
2595             break;
2596         }
2597     }
2598 
2599     NV_ASSERT_OR_RETURN(i < KMIGMGR_MAX_GPU_INSTANCES, NV_ERR_INSUFFICIENT_RESOURCES);
2600     return rmStatus;
2601 }
2602 
2603 /*!
2604  * @brief   Function to get GPU instance information representing provided swizzId.
2605  */
2606 NV_STATUS
2607 kmigmgrGetGPUInstanceInfo_IMPL
2608 (
2609     OBJGPU *pGpu,
2610     KernelMIGManager *pKernelMIGManager,
2611     NvU32 swizzId,
2612     KERNEL_MIG_GPU_INSTANCE **ppKernelMIGGpuInstance
2613 )
2614 {
2615     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGPUInstance;
2616 
2617     if (swizzId >= KMIGMGR_MAX_GPU_SWIZZID)
2618     {
2619         return NV_ERR_INVALID_ARGUMENT;
2620     }
2621 
2622     FOR_EACH_VALID_GPU_INSTANCE(pGpu, pKernelMIGManager, pKernelMIGGPUInstance)
2623     {
2624         if (pKernelMIGGPUInstance->swizzId == swizzId)
2625         {
2626             *ppKernelMIGGpuInstance = pKernelMIGGPUInstance;
2627             return NV_OK;
2628         }
2629     }
2630     FOR_EACH_VALID_GPU_INSTANCE_END();
2631 
2632     return NV_ERR_INVALID_ARGUMENT;
2633 }
2634 
2635 /*!
2636  * @brief   Function to convert local RM_ENGINE_TYPE to global
2637  *          RM_ENGINE_TYPE for partitionable engines
2638  *          Currently It support GR, CE, NVDEC, NVENC, NVJPG
2639  */
2640 NV_STATUS
2641 kmigmgrGetLocalToGlobalEngineType_IMPL
2642 (
2643     OBJGPU *pGpu,
2644     KernelMIGManager *pKernelMIGManager,
2645     MIG_INSTANCE_REF ref,
2646     RM_ENGINE_TYPE localEngType,
2647     RM_ENGINE_TYPE *pGlobalEngType
2648 )
2649 {
2650     NV_ASSERT_OR_RETURN(kmigmgrIsMIGReferenceValid(&ref), NV_ERR_INVALID_ARGUMENT);
2651     NV_ASSERT_OR_RETURN(RM_ENGINE_TYPE_IS_VALID(localEngType),
2652                         NV_ERR_INVALID_ARGUMENT);
2653 
2654     if (!kmigmgrIsEnginePartitionable(pGpu, pKernelMIGManager, localEngType))
2655     {
2656         //
2657         // Return same engineId as local if called for non-partitioned
2658         // 2080type engines like host engines, PMU SEC etc.
2659         //
2660         *pGlobalEngType = localEngType;
2661         return NV_OK;
2662     }
2663 
2664     if (ref.pMIGComputeInstance != NULL)
2665     {
2666         // Replace the CI-local input index with GI-local
2667         if (kmigmgrEngineTypeXlate(&ref.pMIGComputeInstance->resourceAllocation.localEngines, localEngType,
2668                                    &ref.pMIGComputeInstance->resourceAllocation.engines, &localEngType) != NV_OK)
2669         {
2670             NV_PRINTF(LEVEL_ERROR,
2671                        "Compute instance Local Engine type 0x%x is not allocated to Compute instance\n",
2672                        localEngType);
2673              return NV_ERR_INVALID_ARGUMENT;
2674         }
2675     }
2676 
2677     // Replace the GI-local input index with global
2678     if (kmigmgrEngineTypeXlate(&ref.pKernelMIGGpuInstance->resourceAllocation.localEngines, localEngType,
2679                                &ref.pKernelMIGGpuInstance->resourceAllocation.engines, &localEngType) != NV_OK)
2680     {
2681          NV_PRINTF(LEVEL_ERROR,
2682                    "GPU instance Local Engine type 0x%x is not allocated to GPU instance\n",
2683                    localEngType);
2684          return NV_ERR_INVALID_ARGUMENT;
2685     }
2686 
2687     *pGlobalEngType = localEngType;
2688     return NV_OK;
2689 }
2690 
2691 /*!
2692  * @brief   Function to convert global RM_ENGINE_TYPE to local
2693  *          RM_ENGINE_TYPE for partitionable engines
2694  *          Currently it supports GR, CE, NVDEC, NVENC, NVJPG
2695  */
2696 NV_STATUS
2697 kmigmgrGetGlobalToLocalEngineType_IMPL
2698 (
2699     OBJGPU *pGpu,
2700     KernelMIGManager *pKernelMIGManager,
2701     MIG_INSTANCE_REF ref,
2702     RM_ENGINE_TYPE globalEngType,
2703     RM_ENGINE_TYPE *pLocalEngType
2704 )
2705 {
2706     NV_ASSERT_OR_RETURN(kmigmgrIsMIGReferenceValid(&ref), NV_ERR_INVALID_ARGUMENT);
2707     NV_ASSERT_OR_RETURN(RM_ENGINE_TYPE_IS_VALID(globalEngType),
2708                         NV_ERR_INVALID_ARGUMENT);
2709 
2710     if (!kmigmgrIsEnginePartitionable(pGpu, pKernelMIGManager, globalEngType))
2711     {
2712         //
2713         // Return same engineId as global if called for non-partitioned
2714         // rm engine types like host engines, PMU SEC etc.
2715         //
2716         *pLocalEngType = globalEngType;
2717         return NV_OK;
2718     }
2719 
2720     // Replace the global input index with GI-local
2721     if (kmigmgrEngineTypeXlate(&ref.pKernelMIGGpuInstance->resourceAllocation.engines, globalEngType,
2722                                &ref.pKernelMIGGpuInstance->resourceAllocation.localEngines, &globalEngType) != NV_OK)
2723     {
2724          NV_PRINTF(LEVEL_ERROR,
2725                    "Global Engine type 0x%x is not allocated to GPU instance\n",
2726                    globalEngType);
2727          return NV_ERR_INVALID_ARGUMENT;
2728     }
2729 
2730     if (ref.pMIGComputeInstance != NULL)
2731     {
2732         // Replace the GI-local input index with CI-local
2733         if (kmigmgrEngineTypeXlate(&ref.pMIGComputeInstance->resourceAllocation.engines, globalEngType,
2734                                    &ref.pMIGComputeInstance->resourceAllocation.localEngines, &globalEngType) != NV_OK)
2735         {
2736              NV_PRINTF(LEVEL_ERROR,
2737                        "GPU instance Local Engine type 0x%x is not allocated to compute instance\n",
2738                        globalEngType);
2739              return NV_ERR_INVALID_ARGUMENT;
2740         }
2741     }
2742 
2743     *pLocalEngType = globalEngType;
2744     return NV_OK;
2745 }
2746 
2747 /*!
2748  * @brief   Function to retrieve list of engine types belonging to this
2749  *          GPU instance. When MIG is enabled, GRCEs are filtered from the engine
2750  *          list, as well as any local GR engine indices outside of the range
2751  *          allocated to this GPU instance. When MIG is disabled, all non-legacy GR
2752  *          engines are filtered from the enginelist, but no CEs are filtered.
2753  *
2754  * @param[IN]   pGpu
2755  * @param[IN]   pKernelMIGManager
2756  * @param[IN]   pSubdevice
2757  * @param[OUT]  pEngineTypes       Engine type list
2758  * @param[OUT]  pEngineCount       Engine type count
2759  *
2760  * @return NV_STATUS
2761  *         NV_OK on success
2762  *         NV_ERR_INVALID_ARGUMENT if invalid subdevice
2763  *         NV_ERR_INVALID_STATE if subdevice is not partitioned
2764  */
2765 NV_STATUS
2766 kmigmgrFilterEngineList_IMPL
2767 (
2768     OBJGPU *pGpu,
2769     KernelMIGManager *pKernelMIGManager,
2770     Subdevice *pSubdevice,
2771     RM_ENGINE_TYPE *pEngineTypes,
2772     NvU32 *pEngineCount
2773 )
2774 {
2775     MIG_INSTANCE_REF ref;
2776     NvBool bMIGInUse = IS_MIG_IN_USE(pGpu);
2777     NvU32 i;
2778 
2779     if (bMIGInUse)
2780     {
2781         NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
2782             kmigmgrGetInstanceRefFromClient(pGpu, pKernelMIGManager, RES_GET_CLIENT_HANDLE(pSubdevice), &ref));
2783     }
2784 
2785     *pEngineCount = 0;
2786     for (i = 0; i < pGpu->engineDB.size; ++i)
2787     {
2788         RM_ENGINE_TYPE rmEngineType = pGpu->engineDB.pType[i];
2789         RM_ENGINE_TYPE newEngineType = rmEngineType;
2790         NvBool bAddEngine = NV_TRUE;
2791 
2792         if (bMIGInUse)
2793         {
2794             if (kmigmgrIsEngineInInstance(pGpu, pKernelMIGManager, rmEngineType, ref))
2795             {
2796                 // Override the engine type with the local engine idx
2797                 NV_ASSERT_OK(kmigmgrGetGlobalToLocalEngineType(pGpu, pKernelMIGManager, ref,
2798                                                                rmEngineType,
2799                                                                &newEngineType));
2800             }
2801             else
2802             {
2803                 bAddEngine = NV_FALSE;
2804             }
2805         }
2806         else if (RM_ENGINE_TYPE_IS_GR(rmEngineType) &&
2807                 (0 != RM_ENGINE_TYPE_GR_IDX(rmEngineType)))
2808         {
2809             bAddEngine = NV_FALSE;
2810         }
2811 
2812         if (bAddEngine)
2813         {
2814             pEngineTypes[(*pEngineCount)++] = newEngineType;
2815         }
2816     }
2817 
2818     return NV_OK;
2819 }
2820 
2821 /**
2822  * @brief Removes all engines which are not in this client's GPU instance from the
2823  *        partnerlist.
2824  *
2825  * @param[IN]      pGpu
2826  * @param[IN]      pKernelMIGManager
2827  * @param[IN]      pSubdevice
2828  * @param[IN/OUT]  pPartnerListParams   Client Partner list params
2829  *
2830  * @return NV_STATUS
2831  *         NV_OK on success or MIG disabled
2832  *         NV_ERR_INVALID_ARGUMENT on bad pParams
2833  */
2834 NV_STATUS
2835 kmigmgrFilterEnginePartnerList_IMPL
2836 (
2837     OBJGPU *pGpu,
2838     KernelMIGManager *pKernelMIGManager,
2839     Subdevice *pSubdevice,
2840     NV2080_CTRL_GPU_GET_ENGINE_PARTNERLIST_PARAMS *pPartnerListParams
2841 )
2842 {
2843     NvU32 i, j;
2844     MIG_INSTANCE_REF ref;
2845 
2846     NV_ASSERT_OR_RETURN(NULL != pPartnerListParams, NV_ERR_INVALID_ARGUMENT);
2847 
2848     // MIG disabled, nothing to do
2849     if (!IS_MIG_IN_USE(pGpu))
2850     {
2851         return NV_OK;
2852     }
2853 
2854     NV_ASSERT_OK_OR_RETURN(
2855         kmigmgrGetInstanceRefFromClient(pGpu, pKernelMIGManager, RES_GET_CLIENT_HANDLE(pSubdevice), &ref));
2856 
2857     for (i = 0; i < pPartnerListParams->numPartners; ++i)
2858     {
2859         RM_ENGINE_TYPE rmEngineType = pPartnerListParams->partnerList[i];
2860 
2861         if (!kmigmgrIsEngineInInstance(pGpu, pKernelMIGManager, rmEngineType, ref))
2862         {
2863             // Filter this entry from the partner list
2864             for (j = i; j < pPartnerListParams->numPartners - 1; ++j)
2865             {
2866                 pPartnerListParams->partnerList[j] = pPartnerListParams->partnerList[j + 1];
2867             }
2868 
2869             pPartnerListParams->numPartners--;
2870 
2871             // Break early to prevent underflow of i
2872             if (0 == pPartnerListParams->numPartners)
2873             {
2874                 break;
2875             }
2876 
2877             i--;
2878         }
2879     }
2880 
2881     return NV_OK;
2882 }
2883 
2884 /*!
2885  * @brief   Finds a GPU Instance profile matching the input request flag
2886  */
2887 NV_STATUS
2888 kmigmgrGetProfileByPartitionFlag_IMPL
2889 (
2890     OBJGPU *pGpu,
2891     KernelMIGManager *pKernelMIGManager,
2892     NvU32 partitionFlag,
2893     const NV2080_CTRL_INTERNAL_MIGMGR_PROFILE_INFO **ppProfile
2894 )
2895 {
2896     const KERNEL_MIG_MANAGER_STATIC_INFO *pStaticInfo = kmigmgrGetStaticInfo(pGpu, pKernelMIGManager);
2897     NvU32 i;
2898 
2899     NV_ASSERT_OR_RETURN(pStaticInfo != NULL, NV_ERR_INVALID_STATE);
2900     NV_ASSERT_OR_RETURN(pStaticInfo->pProfiles != NULL, NV_ERR_INVALID_STATE);
2901 
2902     for (i = 0; i < pStaticInfo->pProfiles->count; ++i)
2903     {
2904         if (pStaticInfo->pProfiles->table[i].partitionFlag == partitionFlag)
2905         {
2906             *ppProfile = &pStaticInfo->pProfiles->table[i];
2907             return NV_OK;
2908         }
2909     }
2910 
2911     return NV_ERR_INVALID_STATE;
2912 }
2913 
2914 /*
2915  * @brief Determine illegal swizzIds based on global swizzId mask
2916  */
2917 NV_STATUS
2918 kmigmgrGetInvalidSwizzIdMask_IMPL
2919 (
2920     OBJGPU *pGpu,
2921     KernelMIGManager *pKernelMIGManager,
2922     NvU32 swizzId,
2923     NvU64 *pUnsupportedSwizzIdMask
2924 )
2925 {
2926     NvU64 i;
2927     NvU64 gpuSlice[KGRMGR_MAX_GR] =
2928     {
2929         (NVBIT64(0) | NVBIT64(1) | NVBIT64(3) | NVBIT64(7)),
2930         (NVBIT64(0) | NVBIT64(1) | NVBIT64(3) | NVBIT64(8)),
2931         (NVBIT64(0) | NVBIT64(1) | NVBIT64(4) | NVBIT64(9)),
2932         (NVBIT64(0) | NVBIT64(1) | NVBIT64(4) | NVBIT64(10)),
2933         (NVBIT64(0) | NVBIT64(2) | NVBIT64(5) | NVBIT64(11)),
2934         (NVBIT64(0) | NVBIT64(2) | NVBIT64(5) | NVBIT64(12)),
2935         (NVBIT64(0) | NVBIT64(2) | NVBIT64(6) | NVBIT64(13)),
2936         (NVBIT64(0) | NVBIT64(2) | NVBIT64(6) | NVBIT64(14))
2937     };
2938 
2939     NV_ASSERT_OR_RETURN(NULL != pUnsupportedSwizzIdMask, NV_ERR_INVALID_ARGUMENT);
2940 
2941     // All bits corresponding to nonexistent swizzids are invalid
2942     *pUnsupportedSwizzIdMask = DRF_SHIFTMASK64(63:KMIGMGR_MAX_GPU_SWIZZID);
2943 
2944     for (i = 0; i < KGRMGR_MAX_GR; ++i)
2945     {
2946         if (0 != (gpuSlice[i] & NVBIT64(swizzId)))
2947         {
2948             *pUnsupportedSwizzIdMask |= gpuSlice[i];
2949         }
2950     }
2951 
2952     return NV_OK;
2953 }
2954 
2955 /*!
2956  * @brief Processes request to update partitioning mode to the given value.
2957  */
2958 NV_STATUS
2959 kmigmgrSetPartitioningMode_IMPL
2960 (
2961     OBJGPU *pGpu,
2962     KernelMIGManager *pKernelMIGManager
2963 )
2964 {
2965     RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
2966     NV2080_CTRL_INTERNAL_GPU_GET_SMC_MODE_PARAMS params;
2967     KernelCcu *pKccu = GPU_GET_KERNEL_CCU(pGpu);
2968 
2969     portMemSet(&params, 0x0, sizeof(params));
2970     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
2971         pRmApi->Control(pRmApi,
2972                         pGpu->hInternalClient,
2973                         pGpu->hInternalSubdevice,
2974                         NV2080_CTRL_CMD_INTERNAL_GPU_GET_SMC_MODE,
2975                         &params,
2976                         sizeof(params)));
2977 
2978     // Should never have reached this far
2979     NV_ASSERT_OR_RETURN(params.smcMode != NV2080_CTRL_GPU_INFO_GPU_SMC_MODE_UNSUPPORTED,
2980                         NV_ERR_INVALID_STATE);
2981 
2982     //
2983     // If pending state, do not update mode in response to request. Mode will be
2984     // updated on next GPU reset.
2985     //
2986     if ((params.smcMode == NV2080_CTRL_GPU_INFO_GPU_SMC_MODE_DISABLE_PENDING) ||
2987         (params.smcMode == NV2080_CTRL_GPU_INFO_GPU_SMC_MODE_ENABLE_PENDING))
2988     {
2989         return NV_OK;
2990     }
2991 
2992     pKernelMIGManager->bMIGEnabled = (params.smcMode == NV2080_CTRL_GPU_INFO_GPU_SMC_MODE_ENABLED);
2993 
2994     // MIG Mode might not have been enabled yet, so load static info if enabled
2995     if (IS_MIG_ENABLED(pGpu))
2996     {
2997         // Initialize static info derived from physical RM
2998         NV_ASSERT_OK_OR_RETURN(kmigmgrLoadStaticInfo_HAL(pGpu, pKernelMIGManager));
2999 
3000         //
3001         // Populate static GPU instance memory config which will be used to manage
3002         // GPU instance memory
3003         //
3004         KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
3005         NV_ASSERT_OK_OR_RETURN(kmemsysPopulateMIGGPUInstanceMemConfig_HAL(pGpu, pKernelMemorySystem));
3006     }
3007 
3008     if (pKccu)
3009     {
3010         kccuMigShrBufHandler_HAL(pGpu, pKccu, pKernelMIGManager->bMIGEnabled);
3011     }
3012     return NV_OK;
3013 }
3014 
3015 /**
3016  * @brief   Function to get reference of gpu / compute instance which
3017  * contains the given engine. If no instances are found, an error is returned.
3018  */
3019 NV_STATUS
3020 kmigmgrGetMIGReferenceFromEngineType_IMPL
3021 (
3022     OBJGPU *pGpu,
3023     KernelMIGManager *pKernelMIGManager,
3024     RM_ENGINE_TYPE rmEngineType,
3025     MIG_INSTANCE_REF *pRef
3026 )
3027 {
3028     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGPUInstance;
3029     MIG_COMPUTE_INSTANCE *pMIGComputeInstance;
3030     NvU32 CIIdx;
3031 
3032     NV_ASSERT_OR_RETURN(pRef != NULL, NV_ERR_INVALID_ARGUMENT);
3033     // Default to non-attributed channel
3034     *pRef = kmigmgrMakeNoMIGReference();
3035 
3036     // Bail out early if there are no instances to attribute to
3037     if (!IS_MIG_IN_USE(pGpu))
3038         return NV_ERR_NOT_SUPPORTED;
3039 
3040     //
3041     // if this happens to be an RM internal channel not bound to an engine,
3042     // attribute it to no instance
3043     //
3044     if (!RM_ENGINE_TYPE_IS_VALID(rmEngineType))
3045         return NV_ERR_INVALID_ARGUMENT;
3046 
3047     // Engine is not partitionable, attribute to no instance
3048     if (!kmigmgrIsEnginePartitionable(pGpu, pKernelMIGManager, rmEngineType))
3049         return NV_ERR_INVALID_ARGUMENT;
3050 
3051     pKernelMIGGPUInstance = NULL;
3052     FOR_EACH_VALID_GPU_INSTANCE(pGpu, pKernelMIGManager, pKernelMIGGPUInstance)
3053     {
3054         if (kmigmgrIsEngineInInstance(pGpu, pKernelMIGManager, rmEngineType,
3055                                       kmigmgrMakeGIReference(pKernelMIGGPUInstance)))
3056         {
3057             break;
3058         }
3059     }
3060     FOR_EACH_VALID_GPU_INSTANCE_END();
3061 
3062     // Engine was partitionable, but not in any of our gpu instance.
3063     if ((pKernelMIGGPUInstance == NULL) || !pKernelMIGGPUInstance->bValid)
3064         return NV_ERR_INVALID_STATE;
3065 
3066     *pRef = kmigmgrMakeGIReference(pKernelMIGGPUInstance);
3067 
3068     // Attempt to find a compute instance which contains this engine
3069     for (CIIdx = 0;
3070          CIIdx < NV_ARRAY_ELEMENTS(pKernelMIGGPUInstance->MIGComputeInstance);
3071          ++CIIdx)
3072     {
3073         pMIGComputeInstance = &pKernelMIGGPUInstance->MIGComputeInstance[CIIdx];
3074 
3075         if (!pMIGComputeInstance->bValid)
3076             continue;
3077 
3078         if (kmigmgrIsEngineInInstance(pGpu, pKernelMIGManager, rmEngineType,
3079                                       kmigmgrMakeCIReference(pKernelMIGGPUInstance, pMIGComputeInstance)))
3080         {
3081             break;
3082         }
3083     }
3084 
3085     if (CIIdx < NV_ARRAY_ELEMENTS(pKernelMIGGPUInstance->MIGComputeInstance))
3086         *pRef = kmigmgrMakeCIReference(pKernelMIGGPUInstance, pMIGComputeInstance);
3087 
3088     return NV_OK;
3089 }
3090 
3091 /*!
3092  * @brief Check if we are running on a reduced config GPU then set the corresponding flag
3093  */
3094 void
3095 kmigmgrDetectReducedConfig_KERNEL
3096 (
3097     OBJGPU *pGpu,
3098     KernelMIGManager *pKernelMIGManager
3099 )
3100 {
3101     const KERNEL_MIG_MANAGER_STATIC_INFO *pStaticInfo = kmigmgrGetStaticInfo(pGpu, pKernelMIGManager);
3102     NvU32 i;
3103 
3104     for (i = 0; i < pStaticInfo->pCIProfiles->profileCount; ++i)
3105     {
3106         // Reduced config A100 does not support 1/8 compute size
3107         if (pStaticInfo->pCIProfiles->profiles[i].computeSize == NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_EIGHTH)
3108         {
3109             return;
3110         }
3111     }
3112 
3113     pKernelMIGManager->bIsA100ReducedConfig = NV_TRUE;
3114 }
3115 
3116 /*!
3117  * @brief   Get the CE in GI that can be used for scrubbing
3118  *
3119  * @param[IN]   pGpu
3120  * @param[IN]   pKernelMIGManager
3121  * @param[IN]   pDevice            Device subscribed to GI
3122  * @param[OUT]  ppCe               Scrubber CE
3123  */
3124 NV_STATUS
3125 kmigmgrGetGPUInstanceScrubberCe_IMPL
3126 (
3127     OBJGPU *pGpu,
3128     KernelMIGManager *pKernelMIGManager,
3129     Device *pDevice,
3130     NvU32 *ceInst
3131 )
3132 {
3133     MIG_INSTANCE_REF ref;
3134     ENGTYPE_BIT_VECTOR ces;
3135 
3136     NV_ASSERT_OK_OR_RETURN(
3137         kmigmgrGetInstanceRefFromDevice(pGpu, pKernelMIGManager, pDevice, &ref));
3138 
3139     bitVectorClrAll(&ces);
3140     bitVectorSetRange(&ces, RM_ENGINE_RANGE_COPY());
3141     bitVectorAnd(&ces, &ces, &ref.pKernelMIGGpuInstance->resourceAllocation.engines);
3142 
3143     NV_ASSERT_OR_RETURN(!bitVectorTestAllCleared(&ces), NV_ERR_INSUFFICIENT_RESOURCES);
3144 
3145     // Pick the first CE in the instance
3146     *ceInst = RM_ENGINE_TYPE_COPY_IDX(bitVectorCountTrailingZeros(&ces));
3147 
3148     return NV_OK;
3149 }
3150 
3151 /*!
3152  * @brief   Copy gpu instance type cache to user provided params for
3153  *          DESCRIBE_PARTITIONS
3154  */
3155 NV_STATUS
3156 kmigmgrDescribeGPUInstances_IMPL
3157 (
3158     OBJGPU *pGpu,
3159     KernelMIGManager *pKernelMIGManager,
3160     NV2080_CTRL_GPU_DESCRIBE_PARTITIONS_PARAMS *pParams
3161 )
3162 {
3163     const KERNEL_MIG_MANAGER_STATIC_INFO *pStaticInfo = kmigmgrGetStaticInfo(pGpu, pKernelMIGManager);
3164     NvU32 i;
3165     NvU32 entryCount;
3166 
3167     if ((pStaticInfo == NULL) || (pStaticInfo->pProfiles == NULL))
3168         return NV_ERR_NOT_SUPPORTED;
3169 
3170     entryCount = 0;
3171     for (i = 0; i < pStaticInfo->pProfiles->count; ++i)
3172     {
3173         {
3174             KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
3175             NV_RANGE addrRange = NV_RANGE_EMPTY;
3176             NvU32 swizzId;
3177             NvU32 memorySize = DRF_VAL(2080_CTRL_GPU, _PARTITION_FLAG, _MEMORY_SIZE,
3178                                        pStaticInfo->pProfiles->table[i].partitionFlag);
3179 
3180             // Retrieve a valid id for this flag combination
3181             switch (memorySize)
3182             {
3183                 case NV2080_CTRL_GPU_PARTITION_FLAG_MEMORY_SIZE_FULL:
3184                     swizzId = 0;
3185                     break;
3186                 case NV2080_CTRL_GPU_PARTITION_FLAG_MEMORY_SIZE_HALF:
3187                     swizzId = 1;
3188                     break;
3189                 case NV2080_CTRL_GPU_PARTITION_FLAG_MEMORY_SIZE_QUARTER:
3190                     swizzId = 3;
3191                     break;
3192                 case NV2080_CTRL_GPU_PARTITION_FLAG_MEMORY_SIZE_EIGHTH:
3193                     swizzId = 7;
3194                     break;
3195                 default:
3196                     NV_ASSERT(0);
3197                     continue;
3198             }
3199 
3200             NV_ASSERT_OK(kmemsysGetMIGGPUInstanceMemInfo(pGpu, pKernelMemorySystem, swizzId, &addrRange));
3201             pParams->partitionDescs[entryCount].memorySize = rangeLength(addrRange);
3202         }
3203 
3204         pParams->partitionDescs[entryCount].partitionFlag   = pStaticInfo->pProfiles->table[i].partitionFlag;
3205         pParams->partitionDescs[entryCount].grCount         = pStaticInfo->pProfiles->table[i].grCount;
3206         pParams->partitionDescs[entryCount].gfxGrCount      = pStaticInfo->pProfiles->table[i].gfxGrCount;
3207         pParams->partitionDescs[entryCount].gpcCount        = pStaticInfo->pProfiles->table[i].gpcCount;
3208         pParams->partitionDescs[entryCount].gfxGpcCount     = pStaticInfo->pProfiles->table[i].gfxGpcCount;
3209         pParams->partitionDescs[entryCount].virtualGpcCount = pStaticInfo->pProfiles->table[i].virtualGpcCount;
3210         pParams->partitionDescs[entryCount].veidCount       = pStaticInfo->pProfiles->table[i].veidCount;
3211         pParams->partitionDescs[entryCount].smCount         = pStaticInfo->pProfiles->table[i].smCount;
3212         pParams->partitionDescs[entryCount].ceCount         = pStaticInfo->pProfiles->table[i].ceCount;
3213         pParams->partitionDescs[entryCount].nvEncCount      = pStaticInfo->pProfiles->table[i].nvEncCount;
3214         pParams->partitionDescs[entryCount].nvDecCount      = pStaticInfo->pProfiles->table[i].nvDecCount;
3215         pParams->partitionDescs[entryCount].nvJpgCount      = pStaticInfo->pProfiles->table[i].nvJpgCount;
3216         pParams->partitionDescs[entryCount].nvOfaCount      = pStaticInfo->pProfiles->table[i].nvOfaCount;
3217 
3218         entryCount++;
3219     }
3220     pParams->descCount = pStaticInfo->pProfiles->count;
3221 
3222     return NV_OK;
3223 }
3224 
3225 /*!
3226  * @brief   Saves MIG compute instance topology in provided structure
3227  */
3228 NV_STATUS
3229 kmigmgrSaveComputeInstances_IMPL
3230 (
3231     OBJGPU *pGpu,
3232     KernelMIGManager *pKernelMIGManager,
3233     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance,
3234     GPUMGR_SAVE_COMPUTE_INSTANCE *pComputeInstanceSaves
3235 )
3236 {
3237     NvU32 CIIdx;
3238     NvU32 ciCount = 0;
3239 
3240     // Sanity checks
3241     NV_ASSERT_OR_RETURN((pKernelMIGGpuInstance != NULL) && (pComputeInstanceSaves != NULL),
3242                         NV_ERR_INVALID_ARGUMENT);
3243 
3244     for (CIIdx = 0; CIIdx < NV_ARRAY_ELEMENTS(pKernelMIGGpuInstance->MIGComputeInstance); ++CIIdx)
3245     {
3246         MIG_COMPUTE_INSTANCE *pMIGComputeInstance = &pKernelMIGGpuInstance->MIGComputeInstance[CIIdx];
3247         GPUMGR_SAVE_COMPUTE_INSTANCE *pComputeInstanceSave = &pComputeInstanceSaves[ciCount];
3248         NvU32 gpcIdx;
3249 
3250         // Skip invalid compute instances
3251         if (!pMIGComputeInstance->bValid)
3252             continue;
3253 
3254         portMemSet(pComputeInstanceSave, 0, sizeof(*pComputeInstanceSave));
3255         pComputeInstanceSave->bValid = NV_TRUE;
3256         pComputeInstanceSave->ciInfo.sharedEngFlags = pMIGComputeInstance->sharedEngFlag;
3257         pComputeInstanceSave->id = CIIdx;
3258         pComputeInstanceSave->pOsRmCaps = pMIGComputeInstance->pOsRmCaps;
3259         bitVectorToRaw(&pMIGComputeInstance->resourceAllocation.engines,
3260                        &pComputeInstanceSave->ciInfo.enginesMask,
3261                        sizeof(pComputeInstanceSave->ciInfo.enginesMask));
3262         if (IS_GSP_CLIENT(pGpu))
3263         {
3264             for (gpcIdx = 0; gpcIdx < pMIGComputeInstance->resourceAllocation.gpcCount; ++gpcIdx)
3265             {
3266                  pComputeInstanceSave->ciInfo.gpcMask |=
3267                      NVBIT32(pMIGComputeInstance->resourceAllocation.gpcIds[gpcIdx]);
3268             }
3269         }
3270         else
3271         {
3272             pComputeInstanceSave->ciInfo.gpcMask = DRF_MASK(pMIGComputeInstance->resourceAllocation.gpcCount - 1 : 0);
3273         }
3274 
3275         pComputeInstanceSave->ciInfo.gfxGpcCount = pMIGComputeInstance->resourceAllocation.gfxGpcCount;
3276         pComputeInstanceSave->ciInfo.veidOffset = pMIGComputeInstance->resourceAllocation.veidOffset;
3277         pComputeInstanceSave->ciInfo.veidCount = pMIGComputeInstance->resourceAllocation.veidCount;
3278         pComputeInstanceSave->ciInfo.smCount = pMIGComputeInstance->resourceAllocation.smCount;
3279         pComputeInstanceSave->ciInfo.spanStart = pMIGComputeInstance->spanStart;
3280         pComputeInstanceSave->ciInfo.computeSize = pMIGComputeInstance->computeSize;
3281 
3282         portMemCopy(pComputeInstanceSave->ciInfo.uuid, sizeof(pComputeInstanceSave->ciInfo.uuid),
3283                     pMIGComputeInstance->uuid.uuid, sizeof(pMIGComputeInstance->uuid.uuid));
3284 
3285         ++ciCount;
3286     }
3287 
3288     return NV_OK;
3289 }
3290 
3291 /*!
3292  * @brief   Function to get SwizzId to allowed GrIdx, physical GPC_IDs,
3293  *          physical CE_IDs and VEIDs in a GPU instance
3294  *
3295  * @param[IN]   swizzId              SwizzId used by the GPU instance
3296  * @param[OUT]  pResourceAllocation  Structure containing engine configs for a
3297  *                                   GPU instance. This contains engineCount and
3298  *                                   engine Ids.
3299  */
3300 NV_STATUS
3301 kmigmgrSwizzIdToResourceAllocation_IMPL
3302 (
3303     OBJGPU *pGpu,
3304     KernelMIGManager *pKernelMIGManager,
3305     NvU32 swizzId,
3306     KMIGMGR_CREATE_GPU_INSTANCE_PARAMS params,
3307     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance,
3308     MIG_RESOURCE_ALLOCATION *pResourceAllocation
3309 )
3310 {
3311     NV2080_CTRL_INTERNAL_KMIGMGR_EXPORTED_GPU_INSTANCE_INFO info;
3312     NvU32 tempGpcMask;
3313 
3314     NV_CHECK_OR_RETURN(LEVEL_ERROR, swizzId < KMIGMGR_MAX_GPU_SWIZZID, NV_ERR_INVALID_ARGUMENT);
3315 
3316     if (params.type == KMIGMGR_CREATE_GPU_INSTANCE_PARAMS_TYPE_REQUEST)
3317     {
3318         NV2080_CTRL_INTERNAL_KMIGMGR_IMPORT_EXPORT_GPU_INSTANCE_PARAMS export;
3319         RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
3320 
3321         portMemSet(&export, 0, sizeof(export));
3322         export.swizzId = swizzId;
3323 
3324         // Retrieve the info of the gpu instance GSP just created
3325         NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
3326             pRmApi->Control(pRmApi,
3327                             pGpu->hInternalClient,
3328                             pGpu->hInternalSubdevice,
3329                             NV2080_CTRL_CMD_INTERNAL_MIGMGR_EXPORT_GPU_INSTANCE,
3330                             &export,
3331                             sizeof(export)));
3332         info = export.info;
3333     }
3334     else
3335     {
3336         info = params.inst.restore.pGPUInstanceSave->giInfo;
3337     }
3338 
3339     pResourceAllocation->gpcCount = 0;
3340     tempGpcMask = info.gpcMask;
3341     while (tempGpcMask != 0x0)
3342     {
3343         NvU32 gpcIdx = portUtilCountTrailingZeros32(tempGpcMask);
3344         pResourceAllocation->gpcIds[(pResourceAllocation->gpcCount)++] = gpcIdx;
3345         tempGpcMask &= ~(NVBIT32(gpcIdx));
3346     }
3347 
3348     pResourceAllocation->veidCount = info.veidCount;
3349     pResourceAllocation->veidOffset = info.veidOffset;
3350     pResourceAllocation->virtualGpcCount = info.virtualGpcCount;
3351 
3352     // Use profile SM count for filling the resource allocation
3353     pResourceAllocation->smCount = pKernelMIGGpuInstance->pProfile->smCount;
3354 
3355     bitVectorFromRaw(&pResourceAllocation->engines, info.enginesMask, sizeof(info.enginesMask));
3356 
3357     // Cache the local engine mask for this instance
3358     kmigmgrGetLocalEngineMask(&pResourceAllocation->engines, &pResourceAllocation->localEngines);
3359 
3360     return NV_OK;
3361 }
3362 
3363 // Create client and subdevice handles to make calls into this compute instance
3364 NV_STATUS
3365 kmigmgrAllocComputeInstanceHandles_IMPL
3366 (
3367     OBJGPU *pGpu,
3368     KernelMIGManager *pKernelMIGManager,
3369     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance,
3370     MIG_COMPUTE_INSTANCE *pMIGComputeInstance
3371 )
3372 {
3373     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
3374     NvHandle hGPUInstanceSubscription = NV01_NULL_OBJECT;
3375     NvHandle hComputeInstanceSubscription = NV01_NULL_OBJECT;
3376     NvHandle hClient;
3377     NvHandle hDevice;
3378     NvHandle hSubdevice;
3379     NV_STATUS status;
3380 
3381     NV_ASSERT_OK_OR_RETURN(
3382         rmapiutilAllocClientAndDeviceHandles(pRmApi, pGpu, &hClient, &hDevice, &hSubdevice));
3383 
3384     {
3385         NVC637_ALLOCATION_PARAMETERS params;
3386         portMemSet(&params, 0, sizeof(params));
3387         params.swizzId = pKernelMIGGpuInstance->swizzId;
3388         NV_ASSERT_OK_OR_GOTO(status,
3389             pRmApi->Alloc(pRmApi, hClient, hSubdevice, &hGPUInstanceSubscription, AMPERE_SMC_PARTITION_REF, &params, sizeof(params)),
3390             failed);
3391     }
3392 
3393     {
3394         NVC638_ALLOCATION_PARAMETERS params;
3395         portMemSet(&params, 0, sizeof(params));
3396         params.execPartitionId = pMIGComputeInstance->id;
3397         NV_ASSERT_OK_OR_GOTO(status,
3398             pRmApi->Alloc(pRmApi, hClient, hGPUInstanceSubscription, &hComputeInstanceSubscription, AMPERE_SMC_EXEC_PARTITION_REF, &params, sizeof(params)),
3399             failed);
3400     }
3401 
3402     pMIGComputeInstance->instanceHandles.hClient = hClient;
3403     pMIGComputeInstance->instanceHandles.hSubdevice = hSubdevice;
3404     pMIGComputeInstance->instanceHandles.hSubscription = hComputeInstanceSubscription;
3405 
3406     return NV_OK;
3407 
3408 failed:
3409     pRmApi->Free(pRmApi, hClient, hClient);
3410     return status;
3411 }
3412 
3413 /*!
3414  * @brief   create compute instances
3415  *
3416  * @param[IN]  pGpu
3417  * @param[IN]  pKernelMIGManager
3418  * @param[IN]  pKernelMIGGpuInstance
3419  * @param[IN]  bQuery                If NV_TRUE, don't save created instances
3420  * @param[IN]  params                List of requested compute instance to create
3421  * @param[OUT] pCIIDs                IDs of created instances
3422  * @param[IN]  bCreateCap            Flag stating if MIG CI capabilities needs to be created
3423  */
3424 NV_STATUS
3425 kmigmgrCreateComputeInstances_VF
3426 (
3427     OBJGPU *pGpu,
3428     KernelMIGManager *pKernelMIGManager,
3429     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance,
3430     NvBool bQuery,
3431     KMIGMGR_CREATE_COMPUTE_INSTANCE_PARAMS params,
3432     NvU32 *pCIIDs,
3433     NvBool bCreateCap
3434 )
3435 {
3436     NV_STATUS status = NV_OK;
3437     NvU32 count;
3438     ENGTYPE_BIT_VECTOR shadowExclusiveEngMask;
3439     ENGTYPE_BIT_VECTOR shadowSharedEngMask;
3440     MIG_COMPUTE_INSTANCE *pComputeInstanceInfo;
3441     NvU32 CIIdx;
3442     NvU32 freeSlots;
3443     NvU32 createdInstances;
3444     NvU32 inUseGpcCount;
3445     NvU32 remainingGpcCount;
3446     NvU32 i;
3447     NvU64 shadowCTSInUseMask;
3448     NvU64 shadowVeidInUseMask;
3449     NvU32 maxVeidsPerGpc;
3450     KernelGraphicsManager *pKernelGraphicsManager = GPU_GET_KERNEL_GRAPHICS_MANAGER(pGpu);
3451     KMIGMGR_CONFIGURE_INSTANCE_REQUEST *pConfigRequestPerCi = NULL;
3452     NvBool bIsCTSRequired = kmigmgrIsCTSAlignmentRequired_HAL(pGpu, pKernelMIGManager);
3453 
3454     NV_ASSERT_OR_RETURN(pKernelMIGGpuInstance != NULL, NV_ERR_INVALID_ARGUMENT);
3455 
3456     count = (params.type == KMIGMGR_CREATE_COMPUTE_INSTANCE_PARAMS_TYPE_REQUEST)
3457             ? params.inst.request.count
3458             : 1;
3459 
3460     NV_CHECK_OR_RETURN(LEVEL_SILENT, count != 0, NV_ERR_INVALID_ARGUMENT);
3461 
3462     pComputeInstanceInfo = portMemAllocNonPaged(sizeof(*pComputeInstanceInfo) *
3463                                                 KMIGMGR_MAX_COMPUTE_INSTANCES);
3464     NV_CHECK_OR_RETURN(LEVEL_NOTICE, pComputeInstanceInfo != NULL, NV_ERR_NO_MEMORY);
3465 
3466     portMemSet(pComputeInstanceInfo, 0, sizeof(*pComputeInstanceInfo) *
3467                                         KMIGMGR_MAX_COMPUTE_INSTANCES);
3468 
3469     pConfigRequestPerCi = portMemAllocStackOrHeap(sizeof(*pConfigRequestPerCi) * KMIGMGR_MAX_COMPUTE_INSTANCES);
3470     NV_ASSERT_OR_ELSE(pConfigRequestPerCi != NULL, status = NV_ERR_NO_MEMORY; goto done;);
3471 
3472     portMemSet(pConfigRequestPerCi, 0, sizeof(*pConfigRequestPerCi) * KMIGMGR_MAX_COMPUTE_INSTANCES);
3473 
3474     NV_ASSERT_OK_OR_GOTO(status,
3475         kgrmgrGetMaxVeidsPerGpc(pGpu, pKernelGraphicsManager, &maxVeidsPerGpc),
3476         done);
3477 
3478     // Check that there's enough open compute instance slots, and count used GPCs
3479     freeSlots = 0;
3480     inUseGpcCount = 0;
3481     for (CIIdx = 0;
3482          CIIdx < NV_ARRAY_ELEMENTS(pKernelMIGGpuInstance->MIGComputeInstance);
3483          ++CIIdx)
3484     {
3485         MIG_COMPUTE_INSTANCE *pMIGComputeInstance = &pKernelMIGGpuInstance->MIGComputeInstance[CIIdx];
3486 
3487         if (pMIGComputeInstance->bValid)
3488         {
3489             NvU32 smCount = pMIGComputeInstance->resourceAllocation.smCount;
3490             NV2080_CTRL_INTERNAL_MIGMGR_COMPUTE_PROFILE ciProfile;
3491 
3492             NV_CHECK_OK_OR_ELSE(status, LEVEL_ERROR,
3493                 kmigmgrGetComputeProfileFromSmCount(pGpu, pKernelMIGManager, smCount, &ciProfile),
3494                 goto done; );
3495 
3496             inUseGpcCount += ciProfile.gpcCount;
3497         }
3498         else
3499         {
3500             freeSlots++;
3501         }
3502     }
3503     NV_CHECK_OR_ELSE(LEVEL_SILENT, freeSlots >= count,
3504                      status = NV_ERR_INSUFFICIENT_RESOURCES; goto done);
3505 
3506     //
3507     // Check that we have enough spare GPCs. We're going to reuse the GPU Instance
3508     // configuration logic later on to do the actual allocation, so for now just
3509     // check the count.
3510     //
3511     NV_ASSERT_OR_ELSE(pKernelMIGGpuInstance->resourceAllocation.virtualGpcCount >= inUseGpcCount,
3512                       status = NV_ERR_INVALID_STATE; goto done);
3513     remainingGpcCount = pKernelMIGGpuInstance->resourceAllocation.virtualGpcCount - inUseGpcCount;
3514 
3515     //
3516     // Cache local copies of the resource pools, we'll commit them later if we
3517     // have to
3518     //
3519     bitVectorCopy(&shadowExclusiveEngMask, &pKernelMIGGpuInstance->exclusiveEngMask);
3520     bitVectorCopy(&shadowSharedEngMask, &pKernelMIGGpuInstance->sharedEngMask);
3521     shadowCTSInUseMask = pKernelMIGGpuInstance->ctsIdsInUseMask;
3522     shadowVeidInUseMask = pKernelGraphicsManager->veidInUseMask;
3523     for (CIIdx = 0; CIIdx < count; ++CIIdx)
3524     {
3525         NV2080_CTRL_INTERNAL_MIGMGR_COMPUTE_PROFILE *pCIProfile;
3526         MIG_COMPUTE_INSTANCE *pMIGComputeInstance = &pComputeInstanceInfo[CIIdx];
3527         MIG_RESOURCE_ALLOCATION *pResourceAllocation = &pMIGComputeInstance->resourceAllocation;
3528         NvU32 smCount =
3529                 (params.type == KMIGMGR_CREATE_COMPUTE_INSTANCE_PARAMS_TYPE_REQUEST)
3530                 ? params.inst.request.pReqComputeInstanceInfo[CIIdx].smCount
3531                 : params.inst.restore.pComputeInstanceSave->ciInfo.smCount;
3532         NvU32 gpcCount =
3533                 (params.type == KMIGMGR_CREATE_COMPUTE_INSTANCE_PARAMS_TYPE_REQUEST)
3534                 ? params.inst.request.pReqComputeInstanceInfo[CIIdx].gpcCount
3535                 : nvPopCount32(params.inst.restore.pComputeInstanceSave->ciInfo.gpcMask);
3536         pMIGComputeInstance->bValid = NV_TRUE;
3537         pMIGComputeInstance->sharedEngFlag =
3538                 (params.type == KMIGMGR_CREATE_COMPUTE_INSTANCE_PARAMS_TYPE_REQUEST)
3539                 ? params.inst.request.pReqComputeInstanceInfo[CIIdx].sharedEngFlag
3540                 : params.inst.restore.pComputeInstanceSave->ciInfo.sharedEngFlags;
3541         NvU32 spanStart;
3542         NvU32 ctsId;
3543 
3544         if (params.type == KMIGMGR_CREATE_COMPUTE_INSTANCE_PARAMS_TYPE_REQUEST)
3545         {
3546             spanStart = KMIGMGR_SPAN_OFFSET_INVALID;
3547             if (FLD_TEST_REF(NVC637_CTRL_DMA_EXEC_PARTITIONS_CREATE_REQUEST_AT_SPAN, _TRUE, params.inst.request.requestFlags))
3548             {
3549                 //
3550                 // Select spanStart from spanStart field, else calculate the spanStart using the veid offset passed in.
3551                 // This is done specifically to accomodate legacy flows which don't have knowledge of the new spanStart field
3552                 //
3553                 spanStart = (params.inst.request.pReqComputeInstanceInfo[CIIdx].spanStart != 0)
3554                 ? params.inst.request.pReqComputeInstanceInfo[CIIdx].spanStart
3555                             : params.inst.request.pReqComputeInstanceInfo[CIIdx].veidStartOffset / maxVeidsPerGpc;
3556             }
3557         }
3558         else
3559         {
3560             spanStart = params.inst.restore.pComputeInstanceSave->ciInfo.spanStart;
3561         }
3562 
3563         pConfigRequestPerCi[CIIdx].veidSpanStart = spanStart;
3564         pCIProfile = &pConfigRequestPerCi[CIIdx].profile;
3565         ctsId = KMIGMGR_CTSID_INVALID;
3566         if ((kmigmgrGetComputeProfileFromSmCount(pGpu, pKernelMIGManager, smCount, pCIProfile) == NV_OK) ||
3567             (kmigmgrGetComputeProfileFromGpcCount(pGpu, pKernelMIGManager, gpcCount, pCIProfile) == NV_OK))
3568         {
3569             // CTS and Span allocation is done early to help prevent spurious requests
3570             if (bIsCTSRequired)
3571             {
3572                 if (spanStart != KMIGMGR_SPAN_OFFSET_INVALID)
3573                 {
3574                     NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
3575                         kmigmgrXlateSpanStartToCTSId(pGpu, pKernelMIGManager,
3576                                                      pCIProfile->computeSize,
3577                                                      spanStart,
3578                                                      &ctsId),
3579                         done);
3580 
3581                     NV_CHECK_OR_ELSE(LEVEL_ERROR,
3582                         kmigmgrIsCTSIdAvailable(pGpu, pKernelMIGManager,
3583                                                 pKernelMIGGpuInstance->pProfile->validCTSIdMask,
3584                                                 shadowCTSInUseMask,
3585                                                 ctsId),
3586                         status = NV_ERR_STATE_IN_USE; goto done; );
3587 
3588                     shadowCTSInUseMask |= NVBIT64(ctsId);
3589                 }
3590                 else
3591                 {
3592                     NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
3593                         kmigmgrGetFreeCTSId(pGpu, pKernelMIGManager,
3594                                             &ctsId,
3595                                             pKernelMIGGpuInstance->pProfile->validCTSIdMask,
3596                                             shadowCTSInUseMask,
3597                                             pCIProfile->computeSize),
3598                         done);
3599                 }
3600 
3601                 pConfigRequestPerCi[CIIdx].veidSpanStart = kmigmgrGetSpanStartFromCTSId(pGpu, pKernelMIGManager, ctsId);
3602                 shadowCTSInUseMask |= NVBIT64(ctsId);
3603             }
3604         }
3605         else
3606         {
3607             // If no CI profile was available. Populate one with bare-necessities
3608             pCIProfile->computeSize = KMIGMGR_COMPUTE_SIZE_INVALID;
3609             pCIProfile->gpcCount = gpcCount;
3610             pCIProfile->smCount = gpcCount * (pKernelMIGGpuInstance->pProfile->smCount / pKernelMIGGpuInstance->pProfile->gpcCount);
3611             pCIProfile->veidCount = maxVeidsPerGpc * gpcCount;
3612 
3613             // Force non-profile requests to go through VEID allocator
3614             pConfigRequestPerCi[CIIdx].veidSpanStart = KMIGMGR_SPAN_OFFSET_INVALID;
3615         }
3616 
3617         pConfigRequestPerCi[CIIdx].ctsId = ctsId;
3618 
3619         // Perform VEID request checks or use the best fit allocator to find a slot
3620         NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
3621             kgrmgrCheckVeidsRequest(pGpu, pKernelGraphicsManager,
3622                                     &shadowVeidInUseMask,
3623                                     pCIProfile->veidCount,
3624                                     &pConfigRequestPerCi[CIIdx].veidSpanStart,
3625                                     pKernelMIGGpuInstance),
3626             done);
3627 
3628         // Perform checks and VEID allocation
3629         if (!bIsCTSRequired)
3630         {
3631             //
3632             // Only perform explicit GPC checks if CTS alignment isn't required. A similar case
3633             // is covered by CTS requirements.
3634             //
3635             if (remainingGpcCount < pCIProfile->gpcCount)
3636             {
3637                 NV_PRINTF(LEVEL_ERROR,
3638                           "Not enough remaining GPCs (%d) for compute instance request (%d).\n",
3639                           remainingGpcCount, pCIProfile->gpcCount);
3640                 status = NV_ERR_INSUFFICIENT_RESOURCES;
3641                 goto done;
3642             }
3643             remainingGpcCount -= pCIProfile->gpcCount;
3644         }
3645 
3646         if (params.type == KMIGMGR_CREATE_COMPUTE_INSTANCE_PARAMS_TYPE_RESTORE)
3647         {
3648             ENGTYPE_BIT_VECTOR engines;
3649             bitVectorClrAll(&pResourceAllocation->engines);
3650 
3651             // Set engines requested directly in resource allocation mask
3652             bitVectorFromRaw(&pResourceAllocation->engines,
3653                              params.inst.restore.pComputeInstanceSave->ciInfo.enginesMask,
3654                              sizeof(params.inst.restore.pComputeInstanceSave->ciInfo.enginesMask));
3655 
3656             // Sanity check that all engines requested exist in the GI engine mask
3657             bitVectorClrAll(&engines);
3658             bitVectorAnd(&engines, &pResourceAllocation->engines, &pKernelMIGGpuInstance->resourceAllocation.localEngines);
3659             NV_CHECK_OR_ELSE(LEVEL_ERROR,
3660                 bitVectorTestEqual(&engines, &pResourceAllocation->engines),
3661                 status = NV_ERR_INVALID_ARGUMENT; goto done;);
3662 
3663             // Set Shared/Exclusive Engine Masks for GRs restored
3664             bitVectorClrAll(&engines);
3665             bitVectorSetRange(&engines, RM_ENGINE_RANGE_GR());
3666             bitVectorAnd(&engines, &engines, &pResourceAllocation->engines);
3667 
3668             // Only 1 GR can be requested per compute instance
3669             NV_CHECK_OR_ELSE(LEVEL_ERROR,
3670                 (kmigmgrCountEnginesOfType(&engines, RM_ENGINE_TYPE_GR(0)) == 1),
3671                 status = NV_ERR_INVALID_ARGUMENT; goto done;);
3672 
3673             if ((pMIGComputeInstance->sharedEngFlag & NVC637_CTRL_EXEC_PARTITIONS_SHARED_FLAG_NONE) != 0x0)
3674                 bitVectorOr(&shadowSharedEngMask, &shadowSharedEngMask, &engines);
3675             else
3676             {
3677                 ENGTYPE_BIT_VECTOR tempVector;
3678 
3679                 // Exclusive engine mask should not intersect with the current exclusive mask
3680                 bitVectorAnd(&tempVector, &engines, &shadowExclusiveEngMask);
3681                 NV_CHECK_OR_ELSE(LEVEL_ERROR,
3682                     bitVectorTestAllCleared(&tempVector),
3683                     status = NV_ERR_STATE_IN_USE; goto done;);
3684                 bitVectorOr(&shadowExclusiveEngMask, &shadowExclusiveEngMask, &engines);
3685             }
3686 
3687             // Set Shared/Exclusive Engine Masks for CEs restored
3688             bitVectorClrAll(&engines);
3689             bitVectorSetRange(&engines, RM_ENGINE_RANGE_COPY());
3690             bitVectorAnd(&engines, &engines, &pResourceAllocation->engines);
3691             if ((pMIGComputeInstance->sharedEngFlag & NVC637_CTRL_EXEC_PARTITIONS_SHARED_FLAG_CE) != 0x0)
3692                 bitVectorOr(&shadowSharedEngMask, &shadowSharedEngMask, &engines);
3693             else
3694             {
3695                 ENGTYPE_BIT_VECTOR tempVector;
3696 
3697                 // Exclusive engine mask should not intersect with the current exclusive mask
3698                 bitVectorAnd(&tempVector, &engines, &shadowExclusiveEngMask);
3699                 NV_CHECK_OR_ELSE(LEVEL_ERROR,
3700                     bitVectorTestAllCleared(&tempVector),
3701                     status = NV_ERR_STATE_IN_USE; goto done;);
3702                 bitVectorOr(&shadowExclusiveEngMask, &shadowExclusiveEngMask, &engines);
3703             }
3704 
3705             // Set Shared/Exclusive Engine Masks for NVDECs restored
3706             bitVectorClrAll(&engines);
3707             bitVectorSetRange(&engines, RM_ENGINE_RANGE_NVDEC());
3708             bitVectorAnd(&engines, &engines, &pResourceAllocation->engines);
3709             if ((pMIGComputeInstance->sharedEngFlag & NVC637_CTRL_EXEC_PARTITIONS_SHARED_FLAG_NVDEC) != 0x0)
3710                 bitVectorOr(&shadowSharedEngMask, &shadowSharedEngMask, &engines);
3711             else
3712             {
3713                 ENGTYPE_BIT_VECTOR tempVector;
3714 
3715                 // Exclusive engine mask should not intersect with the current exclusive mask
3716                 bitVectorAnd(&tempVector, &engines, &shadowExclusiveEngMask);
3717                 NV_CHECK_OR_ELSE(LEVEL_ERROR,
3718                     bitVectorTestAllCleared(&tempVector),
3719                     status = NV_ERR_STATE_IN_USE; goto done;);
3720                 bitVectorOr(&shadowExclusiveEngMask, &shadowExclusiveEngMask, &engines);
3721             }
3722 
3723             // Set Shared/Exclusive Engine Masks for NVENCs restored
3724             bitVectorClrAll(&engines);
3725             bitVectorSetRange(&engines, RM_ENGINE_RANGE_NVENC());
3726             bitVectorAnd(&engines, &engines, &pResourceAllocation->engines);
3727             if ((pMIGComputeInstance->sharedEngFlag & NVC637_CTRL_EXEC_PARTITIONS_SHARED_FLAG_NVENC) != 0x0)
3728                 bitVectorOr(&shadowSharedEngMask, &shadowSharedEngMask, &engines);
3729             else
3730             {
3731                 ENGTYPE_BIT_VECTOR tempVector;
3732 
3733                 // Exclusive engine mask should not intersect with the current exclusive mask
3734                 bitVectorAnd(&tempVector, &engines, &shadowExclusiveEngMask);
3735                 NV_CHECK_OR_ELSE(LEVEL_ERROR,
3736                     bitVectorTestAllCleared(&tempVector),
3737                     status = NV_ERR_STATE_IN_USE; goto done;);
3738                 bitVectorOr(&shadowExclusiveEngMask, &shadowExclusiveEngMask, &engines);
3739             }
3740 
3741             // Set Shared/Exclusive Engine Masks for NVJPEGs restored
3742             bitVectorClrAll(&engines);
3743             bitVectorSetRange(&engines, RM_ENGINE_RANGE_NVJPEG());
3744             bitVectorAnd(&engines, &engines, &pResourceAllocation->engines);
3745             if ((pMIGComputeInstance->sharedEngFlag & NVC637_CTRL_EXEC_PARTITIONS_SHARED_FLAG_NVJPG) != 0x0)
3746                 bitVectorOr(&shadowSharedEngMask, &shadowSharedEngMask, &engines);
3747             else
3748             {
3749                 ENGTYPE_BIT_VECTOR tempVector;
3750 
3751                 // Exclusive engine mask should not intersect with the current exclusive mask
3752                 bitVectorAnd(&tempVector, &engines, &shadowExclusiveEngMask);
3753                 NV_CHECK_OR_ELSE(LEVEL_ERROR,
3754                     bitVectorTestAllCleared(&tempVector),
3755                     status = NV_ERR_STATE_IN_USE; goto done;);
3756                 bitVectorOr(&shadowExclusiveEngMask, &shadowExclusiveEngMask, &engines);
3757             }
3758 
3759             // Set Shared/Exclusive Engine Masks for OFAs restored
3760             bitVectorClrAll(&engines);
3761             bitVectorSetRange(&engines, rangeMake(RM_ENGINE_TYPE_OFA, RM_ENGINE_TYPE_OFA));
3762             bitVectorAnd(&engines, &engines, &pResourceAllocation->engines);
3763             if ((pMIGComputeInstance->sharedEngFlag & NVC637_CTRL_EXEC_PARTITIONS_SHARED_FLAG_OFA) != 0x0)
3764                 bitVectorOr(&shadowSharedEngMask, &shadowSharedEngMask, &engines);
3765             else
3766             {
3767                 ENGTYPE_BIT_VECTOR tempVector;
3768 
3769                 // Exclusive engine mask should not intersect with the current exclusive mask
3770                 bitVectorAnd(&tempVector, &engines, &shadowExclusiveEngMask);
3771                 NV_CHECK_OR_ELSE(LEVEL_ERROR,
3772                     bitVectorTestAllCleared(&tempVector),
3773                     status = NV_ERR_STATE_IN_USE; goto done;);
3774                 bitVectorOr(&shadowExclusiveEngMask, &shadowExclusiveEngMask, &engines);
3775             }
3776         }
3777         else
3778         {
3779             NvU32 grCount = 1;
3780             NvU32 ceCount = params.inst.request.pReqComputeInstanceInfo[CIIdx].ceCount;
3781             NvU32 decCount = params.inst.request.pReqComputeInstanceInfo[CIIdx].nvDecCount;
3782             NvU32 encCount = params.inst.request.pReqComputeInstanceInfo[CIIdx].nvEncCount;
3783             NvU32 jpgCount = params.inst.request.pReqComputeInstanceInfo[CIIdx].nvJpgCount;
3784             NvU32 ofaCount = params.inst.request.pReqComputeInstanceInfo[CIIdx].ofaCount;
3785 
3786             bitVectorClrAll(&pResourceAllocation->engines);
3787 
3788             // Allocate the GR engines for this compute instance
3789             NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
3790                 kmigmgrAllocateInstanceEngines(&pKernelMIGGpuInstance->resourceAllocation.engines,
3791                                                ((pMIGComputeInstance->sharedEngFlag &
3792                                                 NVC637_CTRL_EXEC_PARTITIONS_SHARED_FLAG_NONE) != 0x0),
3793                                                RM_ENGINE_RANGE_GR(),
3794                                                grCount,
3795                                                &pResourceAllocation->engines,
3796                                                &shadowExclusiveEngMask,
3797                                                &shadowSharedEngMask,
3798                                                &pKernelMIGGpuInstance->resourceAllocation.engines), done);
3799 
3800             // Allocate the Copy engines for this compute instance
3801             NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
3802                 kmigmgrAllocateInstanceEngines(&pKernelMIGGpuInstance->resourceAllocation.engines,
3803                                                ((pMIGComputeInstance->sharedEngFlag &
3804                                                 NVC637_CTRL_EXEC_PARTITIONS_SHARED_FLAG_CE) != 0x0),
3805                                                RM_ENGINE_RANGE_COPY(),
3806                                                ceCount,
3807                                                &pResourceAllocation->engines,
3808                                                &shadowExclusiveEngMask,
3809                                                &shadowSharedEngMask,
3810                                                &pKernelMIGGpuInstance->resourceAllocation.engines), done);
3811 
3812             // Allocate the NVDEC engines for this compute instance
3813             NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
3814                 kmigmgrAllocateInstanceEngines(&pKernelMIGGpuInstance->resourceAllocation.engines,
3815                                                ((pMIGComputeInstance->sharedEngFlag &
3816                                                 NVC637_CTRL_EXEC_PARTITIONS_SHARED_FLAG_NVDEC) != 0x0),
3817                                                RM_ENGINE_RANGE_NVDEC(),
3818                                                decCount,
3819                                                &pResourceAllocation->engines,
3820                                                &shadowExclusiveEngMask,
3821                                                &shadowSharedEngMask,
3822                                                &pKernelMIGGpuInstance->resourceAllocation.engines), done);
3823 
3824             // Allocate the NVENC engines for this compute instance
3825             NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
3826                 kmigmgrAllocateInstanceEngines(&pKernelMIGGpuInstance->resourceAllocation.engines,
3827                                                ((pMIGComputeInstance->sharedEngFlag &
3828                                                 NVC637_CTRL_EXEC_PARTITIONS_SHARED_FLAG_NVENC) != 0x0),
3829                                                RM_ENGINE_RANGE_NVENC(),
3830                                                encCount,
3831                                                &pResourceAllocation->engines,
3832                                                &shadowExclusiveEngMask,
3833                                                &shadowSharedEngMask,
3834                                                &pKernelMIGGpuInstance->resourceAllocation.engines), done);
3835 
3836             // Allocate the NVJPG engines for this compute instance
3837             NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
3838                 kmigmgrAllocateInstanceEngines(&pKernelMIGGpuInstance->resourceAllocation.engines,
3839                                                ((pMIGComputeInstance->sharedEngFlag &
3840                                                 NVC637_CTRL_EXEC_PARTITIONS_SHARED_FLAG_NVJPG) != 0x0),
3841                                                RM_ENGINE_RANGE_NVJPEG(),
3842                                                jpgCount,
3843                                                &pResourceAllocation->engines,
3844                                                &shadowExclusiveEngMask,
3845                                                &shadowSharedEngMask,
3846                                                &pKernelMIGGpuInstance->resourceAllocation.engines), done);
3847 
3848             // Allocate the NVOFA engines for this compute instance
3849             NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
3850                 kmigmgrAllocateInstanceEngines(&pKernelMIGGpuInstance->resourceAllocation.engines,
3851                                                ((pMIGComputeInstance->sharedEngFlag &
3852                                                 NVC637_CTRL_EXEC_PARTITIONS_SHARED_FLAG_OFA) != 0x0),
3853                                                rangeMake(RM_ENGINE_TYPE_OFA, RM_ENGINE_TYPE_OFA),
3854                                                ofaCount,
3855                                                &pResourceAllocation->engines,
3856                                                &shadowExclusiveEngMask,
3857                                                &shadowSharedEngMask,
3858                                                &pKernelMIGGpuInstance->resourceAllocation.engines), done);
3859         }
3860 
3861         // Cache local mask of engine IDs for this compute instance
3862         kmigmgrGetLocalEngineMask(&pResourceAllocation->engines,
3863                                   &pResourceAllocation->localEngines);
3864     }
3865 
3866     // Commit the allocations to the instance
3867     if (!bQuery)
3868     {
3869         NvU32 swizzId = pKernelMIGGpuInstance->swizzId;
3870         KMIGMGR_CONFIGURE_INSTANCE_REQUEST configRequestsPerCiOrdered[KMIGMGR_MAX_COMPUTE_INSTANCES] = {0};
3871         NvU32 updateEngMask;
3872         NvU32 updateEngMaskShadow;
3873 
3874         // Populate configure GPU instance parameters with compute instance info
3875         updateEngMask = 0x0;
3876 
3877         for (CIIdx = 0; CIIdx < count; ++CIIdx)
3878         {
3879             MIG_COMPUTE_INSTANCE *pMIGComputeInstance = &pComputeInstanceInfo[CIIdx];
3880             MIG_RESOURCE_ALLOCATION *pComputeResourceAllocation = &pMIGComputeInstance->resourceAllocation;
3881             RM_ENGINE_TYPE localEngineType;
3882 
3883             //
3884             // Xlate from CI-local GR 0 to GI-local GR idx
3885             // We can't use kmigmgrGetLocalToGlobalEngineType because these
3886             // compute instances aren't committed yet
3887             //
3888             NV_ASSERT_OK(
3889                 kmigmgrEngineTypeXlate(&pComputeResourceAllocation->localEngines, RM_ENGINE_TYPE_GR(0),
3890                                        &pComputeResourceAllocation->engines, &localEngineType));
3891 
3892             updateEngMask |= NVBIT32(RM_ENGINE_TYPE_GR_IDX(localEngineType));
3893         }
3894 
3895         //
3896         // Reorder the entries in pConfigRequestPerCi per the GR engine assigned to each CI
3897         // (Sorted from lower GR to higer GR), so kmigmgrConfigureGPUInstance can configure
3898         // each CI with correct GR.
3899         //
3900         updateEngMaskShadow = updateEngMask;
3901         i = 0;
3902         while (updateEngMaskShadow != 0)
3903         {
3904             for (CIIdx = 0; CIIdx < count; ++CIIdx)
3905             {
3906                 RM_ENGINE_TYPE localRmEngineType;
3907                 MIG_COMPUTE_INSTANCE *pMIGComputeInstance = &pComputeInstanceInfo[CIIdx];
3908                 MIG_RESOURCE_ALLOCATION *pComputeResourceAllocation = &pMIGComputeInstance->resourceAllocation;
3909                 NV_ASSERT_OK(
3910                     kmigmgrEngineTypeXlate(&pComputeResourceAllocation->localEngines, RM_ENGINE_TYPE_GR(0),
3911                                            &pComputeResourceAllocation->engines, &localRmEngineType));
3912 
3913                 if (portUtilCountTrailingZeros32(updateEngMaskShadow) == RM_ENGINE_TYPE_GR_IDX(localRmEngineType))
3914                 {
3915                     configRequestsPerCiOrdered[i] = pConfigRequestPerCi[CIIdx];
3916                     updateEngMaskShadow &= ~NVBIT32(RM_ENGINE_TYPE_GR_IDX(localRmEngineType));
3917                     i++;
3918                     break;
3919                 }
3920             }
3921             NV_ASSERT(CIIdx < count);
3922         }
3923 
3924         // Configure the GR engines for each compute instance
3925         status = kmigmgrConfigureGPUInstance(pGpu, pKernelMIGManager, swizzId,
3926                                              configRequestsPerCiOrdered,
3927                                              updateEngMask);
3928 
3929         // Do our best to deconfigure the engines we configured so far, then bail
3930         if (status != NV_OK)
3931         {
3932             portMemSet(pConfigRequestPerCi, 0x0, sizeof(*pConfigRequestPerCi) * KMIGMGR_MAX_COMPUTE_INSTANCES);
3933             // Quash status. This is best-effort cleanup
3934             (void)kmigmgrConfigureGPUInstance(pGpu, pKernelMIGManager, swizzId,
3935                                               pConfigRequestPerCi,
3936                                               updateEngMask);
3937 
3938             goto done;
3939         }
3940 
3941         // Update the GI pools with the result of this allocation
3942         bitVectorCopy(&pKernelMIGGpuInstance->exclusiveEngMask, &shadowExclusiveEngMask);
3943         bitVectorCopy(&pKernelMIGGpuInstance->sharedEngMask, &shadowSharedEngMask);
3944 
3945         // update each compute instance gpc ids and veid info
3946         for (CIIdx = 0; CIIdx < count; ++CIIdx)
3947         {
3948             MIG_RESOURCE_ALLOCATION *pResourceAllocation = &pKernelMIGGpuInstance->resourceAllocation;
3949             MIG_COMPUTE_INSTANCE *pMIGComputeInstance = &pComputeInstanceInfo[CIIdx];
3950             MIG_RESOURCE_ALLOCATION *pComputeResourceAllocation = &pMIGComputeInstance->resourceAllocation;
3951             NV2080_CTRL_INTERNAL_MIGMGR_COMPUTE_PROFILE *pCIProfile;
3952             RM_ENGINE_TYPE globalEngineType;
3953             NvU32 globalGrIdx;
3954 
3955             //
3956             // Xlate from CI-local GR 0 to global GR idx
3957             // We can't use kmigmgrGetLocalToGlobalEngineType because these
3958             // compute instances aren't committed yet
3959             //
3960             NV_ASSERT_OK(
3961                 kmigmgrEngineTypeXlate(&pComputeResourceAllocation->localEngines, RM_ENGINE_TYPE_GR(0),
3962                                        &pComputeResourceAllocation->engines, &globalEngineType));
3963 
3964             NV_ASSERT_OK(
3965                 kmigmgrEngineTypeXlate(&pResourceAllocation->localEngines, globalEngineType,
3966                                        &pResourceAllocation->engines, &globalEngineType));
3967             globalGrIdx = RM_ENGINE_TYPE_GR_IDX(globalEngineType);
3968             pCIProfile = &pConfigRequestPerCi[CIIdx].profile;
3969 
3970             pComputeResourceAllocation->gpcCount = pCIProfile->gpcCount;
3971             pComputeResourceAllocation->smCount = pCIProfile->smCount;
3972             if (pCIProfile->computeSize != KMIGMGR_COMPUTE_SIZE_INVALID)
3973             {
3974                 pComputeResourceAllocation->veidCount = pCIProfile->veidCount;
3975             }
3976             else
3977             {
3978                 pComputeResourceAllocation->veidCount = (pResourceAllocation->veidCount / pResourceAllocation->gpcCount) *
3979                                                          pComputeResourceAllocation->virtualGpcCount;
3980             }
3981 
3982             pMIGComputeInstance->spanStart = pConfigRequestPerCi[CIIdx].veidSpanStart;
3983             pMIGComputeInstance->computeSize = pConfigRequestPerCi[CIIdx].profile.computeSize;
3984 
3985             kgrmgrGetVeidBaseForGrIdx(pGpu, pKernelGraphicsManager, globalGrIdx, &pComputeResourceAllocation->veidOffset);
3986 
3987             pComputeResourceAllocation->veidOffset = pComputeResourceAllocation->veidOffset - pResourceAllocation->veidOffset;
3988         }
3989 
3990         // Copy over the local cached compute instance info
3991         createdInstances = 0;
3992         for (CIIdx = 0;
3993              CIIdx < NV_ARRAY_ELEMENTS(pKernelMIGGpuInstance->MIGComputeInstance);
3994              ++CIIdx)
3995         {
3996             if (pKernelMIGGpuInstance->MIGComputeInstance[CIIdx].bValid)
3997                 continue;
3998 
3999             if ((params.type == KMIGMGR_CREATE_COMPUTE_INSTANCE_PARAMS_TYPE_RESTORE) &&
4000                 (params.inst.restore.pComputeInstanceSave->id != CIIdx))
4001             {
4002                 continue;
4003             }
4004 
4005             if (FLD_TEST_REF(NVC637_CTRL_DMA_EXEC_PARTITIONS_CREATE_REQUEST_WITH_PART_ID, _TRUE, params.inst.request.requestFlags) &&
4006                 (pCIIDs[0] != CIIdx))
4007             {
4008                 continue;
4009             }
4010 
4011             NV_ASSERT(pKernelMIGGpuInstance->MIGComputeInstance[CIIdx].id ==
4012                       KMIGMGR_COMPUTE_INSTANCE_ID_INVALID);
4013 
4014             portMemCopy(&pKernelMIGGpuInstance->MIGComputeInstance[CIIdx],
4015                         sizeof(pKernelMIGGpuInstance->MIGComputeInstance[CIIdx]),
4016                         &pComputeInstanceInfo[createdInstances],
4017                         sizeof(pKernelMIGGpuInstance->MIGComputeInstance[CIIdx]));
4018 
4019             pKernelMIGGpuInstance->MIGComputeInstance[CIIdx].id = CIIdx;
4020 
4021             pCIIDs[createdInstances++] = CIIdx;
4022 
4023             if (createdInstances == count)
4024                 break;
4025         }
4026 
4027         for (i = 0; i < createdInstances; ++i)
4028         {
4029             MIG_RESOURCE_ALLOCATION *pResourceAllocation;
4030             MIG_RESOURCE_ALLOCATION *pComputeResourceAllocation;
4031             MIG_COMPUTE_INSTANCE *pMIGComputeInstance;
4032             RM_ENGINE_TYPE globalEngineType;
4033             NvU32 globalGrIdx;
4034 
4035             //
4036             // As per the current design, index for the pMIGComputeInstance
4037             // array is same as the compute instance ID.
4038             //
4039             CIIdx = pCIIDs[i];
4040 
4041             pResourceAllocation = &pKernelMIGGpuInstance->resourceAllocation;
4042 
4043             pMIGComputeInstance = &pKernelMIGGpuInstance->MIGComputeInstance[CIIdx];
4044             pComputeResourceAllocation = &pMIGComputeInstance->resourceAllocation;
4045 
4046             NV_ASSERT_OK(
4047                 kmigmgrEngineTypeXlate(&pComputeResourceAllocation->localEngines, RM_ENGINE_TYPE_GR(0),
4048                                        &pComputeResourceAllocation->engines, &globalEngineType));
4049             NV_ASSERT_OK(
4050                 kmigmgrEngineTypeXlate(&pResourceAllocation->localEngines, globalEngineType,
4051                                        &pResourceAllocation->engines, &globalEngineType));
4052             globalGrIdx = RM_ENGINE_TYPE_GR_IDX(globalEngineType);
4053 
4054             NV_ASSERT(pMIGComputeInstance->id == CIIdx);
4055 
4056             //
4057             // Register instance with the capability framework only if it explicitly
4058             // requested. Otherwise, we rely on the persistent state.
4059             //
4060             if (bCreateCap)
4061             {
4062                 // Register compute instance with the capability framework
4063                 NV_ASSERT_OK_OR_GOTO(status,
4064                     osRmCapRegisterSmcExecutionPartition(pKernelMIGGpuInstance->pOsRmCaps,
4065                                                          &pMIGComputeInstance->pOsRmCaps,
4066                                                          pMIGComputeInstance->id),
4067                     cleanup_created_instances);
4068             }
4069 
4070             // Populate UUID
4071             NV_ASSERT_OK_OR_GOTO(status,
4072                 kmigmgrGenerateComputeInstanceUuid_HAL(pGpu, pKernelMIGManager, swizzId, globalGrIdx,
4073                                                        &pMIGComputeInstance->uuid),
4074                 cleanup_created_instances);
4075 
4076             // Allocate RsShared for the instance
4077             NV_ASSERT_OK_OR_GOTO(
4078                 status,
4079                 serverAllocShare(&g_resServ, classInfo(RsShared),
4080                                  &pMIGComputeInstance->pShare),
4081                 cleanup_created_instances);
4082 
4083             // Allocate subscribed handles for this instance
4084             NV_ASSERT_OK_OR_GOTO(status,
4085                 kmigmgrAllocComputeInstanceHandles(pGpu, pKernelMIGManager, pKernelMIGGpuInstance, pMIGComputeInstance),
4086                 cleanup_created_instances);
4087 
4088             {
4089                 KernelGraphics *pKernelGraphics = GPU_GET_KERNEL_GRAPHICS(pGpu, globalGrIdx);
4090                 fecsSetRoutingInfo(pGpu,
4091                                    pKernelGraphics,
4092                                    pMIGComputeInstance->instanceHandles.hClient,
4093                                    pMIGComputeInstance->instanceHandles.hSubdevice,
4094                                    0);
4095 
4096                 NV_ASSERT_OK_OR_GOTO(status,
4097                     kgraphicsCreateGoldenImageChannel(pGpu, pKernelGraphics),
4098                     cleanup_created_instances);
4099             }
4100         }
4101     }
4102 
4103     status = NV_OK;
4104     goto done;
4105 
4106 cleanup_created_instances:
4107     for (i = 0; i < createdInstances; ++i)
4108     {
4109         (void)kmigmgrDeleteComputeInstance(pGpu, pKernelMIGManager, pKernelMIGGpuInstance,
4110                                            pCIIDs[i], NV_FALSE);
4111     }
4112 
4113 done:
4114     portMemFree(pComputeInstanceInfo);
4115     portMemFreeStackOrHeap(pConfigRequestPerCi);
4116 
4117     return status;
4118 }
4119 
4120 /*!
4121  * @brief   create compute instances for CPU-RM
4122  *
4123  * @param[IN]  pGpu
4124  * @param[IN]  pKernelMIGManager
4125  * @param[IN]  pKernelMIGGpuInstance
4126  * @param[IN]  bQuery                If NV_TRUE, don't save created instances
4127  * @param[IN]  params                List of requested compute instance to create
4128  * @param[OUT] pCIIDs                IDs of created instances
4129  * @param[IN]  bCreateCap            Flag stating if MIG CI capabilities needs to be created
4130  */
4131 NV_STATUS
4132 kmigmgrCreateComputeInstances_FWCLIENT
4133 (
4134     OBJGPU *pGpu,
4135     KernelMIGManager *pKernelMIGManager,
4136     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance,
4137     NvBool bQuery,
4138     KMIGMGR_CREATE_COMPUTE_INSTANCE_PARAMS params,
4139     NvU32 *pCIIDs,
4140     NvBool bCreateCap
4141 )
4142 {
4143     KernelGraphicsManager *pKernelGraphicsManager = GPU_GET_KERNEL_GRAPHICS_MANAGER(pGpu);
4144     NV_STATUS status = NV_OK;
4145     KernelGraphics *pKernelGraphics;
4146     MIG_COMPUTE_INSTANCE *pMIGComputeInstance;
4147     MIG_RESOURCE_ALLOCATION *pResourceAllocation;
4148     MIG_RESOURCE_ALLOCATION *pComputeResourceAllocation;
4149     NVC637_CTRL_EXEC_PARTITIONS_EXPORTED_INFO info;
4150     NvU32 CIIdx = pCIIDs[0];
4151     NvU32 tempGpcMask;
4152     KMIGMGR_CONFIGURE_INSTANCE_REQUEST *pConfigRequestPerCi;
4153     RM_ENGINE_TYPE localEngineType;
4154     RM_ENGINE_TYPE globalEngineType;
4155     NvU32 globalGrIdx;
4156     NvU32 maxVeidsPerGpc;
4157     NvU64 shadowVeidInUseMask;
4158 
4159     NV_ASSERT_OR_RETURN(pKernelMIGGpuInstance != NULL, NV_ERR_INVALID_ARGUMENT);
4160     NV_ASSERT_OR_RETURN(params.type == KMIGMGR_CREATE_COMPUTE_INSTANCE_PARAMS_TYPE_RESTORE, NV_ERR_INVALID_ARGUMENT);
4161     NV_ASSERT_OR_RETURN(params.inst.restore.pComputeInstanceSave != NULL, NV_ERR_INVALID_ARGUMENT);
4162     NV_ASSERT_OR_RETURN(params.inst.restore.pComputeInstanceSave->bValid, NV_ERR_INVALID_ARGUMENT);
4163 
4164     // CPU-RM will always restore the CI state created by GSP-RM, so will always be commit operation
4165     NV_ASSERT_OR_RETURN(!bQuery, NV_ERR_INVALID_ARGUMENT);
4166 
4167     pMIGComputeInstance = portMemAllocNonPaged(sizeof(*pMIGComputeInstance));
4168     NV_CHECK_OR_RETURN(LEVEL_NOTICE, pMIGComputeInstance != NULL, NV_ERR_NO_MEMORY);
4169 
4170     portMemSet(pMIGComputeInstance, 0, sizeof(*pMIGComputeInstance));
4171 
4172     pResourceAllocation = &pKernelMIGGpuInstance->resourceAllocation;
4173     pComputeResourceAllocation = &pMIGComputeInstance->resourceAllocation;
4174 
4175     NV_ASSERT_OR_RETURN(!pMIGComputeInstance->bValid, NV_ERR_INVALID_STATE);
4176 
4177     pConfigRequestPerCi = portMemAllocStackOrHeap(sizeof(*pConfigRequestPerCi) * KMIGMGR_MAX_COMPUTE_INSTANCES);
4178     NV_ASSERT_OR_RETURN(pConfigRequestPerCi != NULL, NV_ERR_NO_MEMORY);
4179 
4180     portMemSet(pConfigRequestPerCi, 0x0, sizeof(*pConfigRequestPerCi) * KMIGMGR_MAX_COMPUTE_INSTANCES);
4181 
4182     NV_ASSERT_OK_OR_GOTO(status,
4183         kgrmgrGetMaxVeidsPerGpc(pGpu, GPU_GET_KERNEL_GRAPHICS_MANAGER(pGpu), &maxVeidsPerGpc),
4184         done);
4185 
4186     info = params.inst.restore.pComputeInstanceSave->ciInfo;
4187 
4188     if (kmigmgrIsCTSAlignmentRequired_HAL(pGpu, pKernelMIGManager))
4189     {
4190 
4191         NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
4192             kmigmgrXlateSpanStartToCTSId(pGpu, pKernelMIGManager,
4193                                          info.computeSize,
4194                                          info.spanStart,
4195                                          &pConfigRequestPerCi[0].ctsId),
4196             done);
4197 
4198             NV_CHECK_OR_ELSE(LEVEL_ERROR,
4199                 kmigmgrIsCTSIdAvailable(pGpu, pKernelMIGManager,
4200                                         pKernelMIGGpuInstance->pProfile->validCTSIdMask,
4201                                         pKernelMIGGpuInstance->ctsIdsInUseMask,
4202                                         pConfigRequestPerCi[0].ctsId),
4203                 status = NV_ERR_STATE_IN_USE; goto done; );
4204     }
4205     else
4206     {
4207         pConfigRequestPerCi[0].ctsId = KMIGMGR_CTSID_INVALID;
4208     }
4209 
4210     portMemCopy(pMIGComputeInstance->uuid.uuid, sizeof(pMIGComputeInstance->uuid.uuid),
4211                 info.uuid, sizeof(info.uuid));
4212     pMIGComputeInstance->sharedEngFlag = info.sharedEngFlags;
4213 
4214     pComputeResourceAllocation->gpcCount = 0;
4215     tempGpcMask = info.gpcMask;
4216     while (tempGpcMask != 0x0)
4217     {
4218         NvU32 gpcIdx = portUtilCountTrailingZeros32(tempGpcMask);
4219         pComputeResourceAllocation->gpcIds[(pComputeResourceAllocation->gpcCount)++] = gpcIdx;
4220         tempGpcMask &= ~(NVBIT32(gpcIdx));
4221     }
4222     pComputeResourceAllocation->gfxGpcCount = info.gfxGpcCount;
4223     pComputeResourceAllocation->veidCount = info.veidCount;
4224     pComputeResourceAllocation->veidOffset = info.veidOffset;
4225     pComputeResourceAllocation->smCount = info.smCount;
4226     pMIGComputeInstance->computeSize = info.computeSize;
4227 
4228     bitVectorFromRaw(&pComputeResourceAllocation->engines, info.enginesMask, sizeof(info.enginesMask));
4229 
4230     // Cache the local engine mask for this CI
4231     kmigmgrGetLocalEngineMask(&pComputeResourceAllocation->engines, &pComputeResourceAllocation->localEngines);
4232 
4233     pMIGComputeInstance->bValid = NV_TRUE;
4234     pMIGComputeInstance->id = CIIdx;
4235 
4236     // Populate configure GPU instance parameters with compute instance info
4237 
4238     //
4239     // Xlate from CI-local GR 0 to GI-local GR idx
4240     // We can't use kmigmgrGetLocalToGlobalEngineType because these
4241     // compute instances aren't committed yet
4242     //
4243     NV_ASSERT_OK(
4244         kmigmgrEngineTypeXlate(&pComputeResourceAllocation->localEngines, RM_ENGINE_TYPE_GR(0),
4245                                &pComputeResourceAllocation->engines, &localEngineType));
4246 
4247     // Create a pseduo-profile based upon info retrieved from GSP-RM
4248     pConfigRequestPerCi[0].profile.computeSize = info.computeSize;
4249     pConfigRequestPerCi[0].profile.smCount     = pComputeResourceAllocation->smCount;
4250     pConfigRequestPerCi[0].profile.gpcCount    = pComputeResourceAllocation->gpcCount;
4251     pConfigRequestPerCi[0].profile.veidCount   = pComputeResourceAllocation->veidCount;
4252     pConfigRequestPerCi[0].veidSpanStart       = info.spanStart;
4253 
4254     shadowVeidInUseMask = pKernelGraphicsManager->veidInUseMask;
4255     NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
4256         kgrmgrCheckVeidsRequest(pGpu, pKernelGraphicsManager,
4257                                 &shadowVeidInUseMask,
4258                                 pConfigRequestPerCi[0].profile.veidCount,
4259                                 &pConfigRequestPerCi[0].veidSpanStart,
4260                                 pKernelMIGGpuInstance),
4261         done);
4262 
4263     // Configure the GR engines for each compute instance
4264     status = kmigmgrConfigureGPUInstance(pGpu, pKernelMIGManager, pKernelMIGGpuInstance->swizzId,
4265                                          pConfigRequestPerCi,
4266                                          NVBIT32(RM_ENGINE_TYPE_GR_IDX(localEngineType)));
4267 
4268     // Do our best to deconfigure the engines we configured so far, then bail
4269     if (status != NV_OK)
4270     {
4271         portMemSet(pConfigRequestPerCi, 0x0, sizeof(*pConfigRequestPerCi) * KMIGMGR_MAX_COMPUTE_INSTANCES);
4272         // Quash status. This is best-effort cleanup
4273         (void)kmigmgrConfigureGPUInstance(pGpu, pKernelMIGManager, pKernelMIGGpuInstance->swizzId,
4274                                           pConfigRequestPerCi,
4275                                           NVBIT32(RM_ENGINE_TYPE_GR_IDX(localEngineType)));
4276 
4277         goto done;
4278     }
4279 
4280     NV_ASSERT(pKernelMIGGpuInstance->MIGComputeInstance[CIIdx].id == KMIGMGR_COMPUTE_INSTANCE_ID_INVALID);
4281 
4282     pMIGComputeInstance->spanStart = pConfigRequestPerCi[0].veidSpanStart;
4283 
4284     portMemCopy(&pKernelMIGGpuInstance->MIGComputeInstance[CIIdx],
4285                 sizeof(pKernelMIGGpuInstance->MIGComputeInstance[CIIdx]),
4286                 pMIGComputeInstance,
4287                 sizeof(*pMIGComputeInstance));
4288 
4289     //
4290     // Register instance with the capability framework only if it explicitly
4291     // requested. Otherwise, we rely on the persistent state.
4292     //
4293     if (bCreateCap)
4294     {
4295         // Register compute instance with the capability framework
4296         NV_ASSERT_OK_OR_GOTO(status,
4297             osRmCapRegisterSmcExecutionPartition(pKernelMIGGpuInstance->pOsRmCaps,
4298                                                  &pKernelMIGGpuInstance->MIGComputeInstance[CIIdx].pOsRmCaps,
4299                                                  pKernelMIGGpuInstance->MIGComputeInstance[CIIdx].id),
4300             cleanup_created_instances);
4301     }
4302 
4303     // Allocate RsShared for the instance
4304     NV_ASSERT_OK_OR_GOTO(status,
4305         serverAllocShare(&g_resServ, classInfo(RsShared),
4306                          &pKernelMIGGpuInstance->MIGComputeInstance[CIIdx].pShare),
4307         cleanup_created_instances);
4308 
4309     // Allocate subscribed handles for this instance
4310     if (!IS_VGPU_GSP_PLUGIN_OFFLOAD_ENABLED(pGpu))
4311     {
4312         NV_ASSERT_OK_OR_GOTO(status,
4313         kmigmgrAllocComputeInstanceHandles(pGpu, pKernelMIGManager, pKernelMIGGpuInstance, &pKernelMIGGpuInstance->MIGComputeInstance[CIIdx]),
4314         cleanup_created_instances);
4315 
4316         NV_ASSERT_OK(
4317             kmigmgrEngineTypeXlate(&pComputeResourceAllocation->localEngines, RM_ENGINE_TYPE_GR(0),
4318                                    &pComputeResourceAllocation->engines, &globalEngineType));
4319         NV_ASSERT_OK(
4320             kmigmgrEngineTypeXlate(&pResourceAllocation->localEngines, globalEngineType,
4321                                    &pResourceAllocation->engines, &globalEngineType));
4322         globalGrIdx = RM_ENGINE_TYPE_GR_IDX(globalEngineType);
4323 
4324         pKernelGraphics = GPU_GET_KERNEL_GRAPHICS(pGpu, globalGrIdx);
4325         fecsSetRoutingInfo(pGpu,
4326                            pKernelGraphics,
4327                            pKernelMIGGpuInstance->MIGComputeInstance[CIIdx].instanceHandles.hClient,
4328                            pKernelMIGGpuInstance->MIGComputeInstance[CIIdx].instanceHandles.hSubdevice,
4329                            0);
4330 
4331         NV_ASSERT_OK_OR_GOTO(status,
4332             kgraphicsCreateGoldenImageChannel(pGpu, pKernelGraphics),
4333             cleanup_created_instances);
4334     }
4335 
4336     status = NV_OK;
4337     goto done;
4338 
4339 cleanup_created_instances:
4340     (void)kmigmgrDeleteComputeInstance(pGpu, pKernelMIGManager, pKernelMIGGpuInstance,
4341                                        CIIdx, NV_FALSE);
4342 done:
4343     portMemFreeStackOrHeap(pConfigRequestPerCi);
4344     portMemFree(pMIGComputeInstance);
4345     return status;
4346 }
4347 
4348 // Delete created instance handles if they exist
4349 void
4350 kmigmgrFreeComputeInstanceHandles_IMPL
4351 (
4352     OBJGPU *pGpu,
4353     KernelMIGManager *pKernelMIGManager,
4354     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance,
4355     MIG_COMPUTE_INSTANCE *pMIGComputeInstance
4356 )
4357 {
4358     if (pMIGComputeInstance->instanceHandles.hClient != NV01_NULL_OBJECT)
4359     {
4360         RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
4361 
4362         pRmApi->Free(pRmApi, pMIGComputeInstance->instanceHandles.hClient, pMIGComputeInstance->instanceHandles.hClient);
4363         pMIGComputeInstance->instanceHandles.hClient = NV01_NULL_OBJECT;
4364         pMIGComputeInstance->instanceHandles.hSubdevice = NV01_NULL_OBJECT;
4365         pMIGComputeInstance->instanceHandles.hSubscription = NV01_NULL_OBJECT;
4366     }
4367 }
4368 
4369 /*!
4370  * @brief   Releases the engines owned by this Compute Instance of the given class
4371  *          of engine (GR, COPY, etc) to the GPU Instance resource pools.
4372  */
4373 void
4374 kmigmgrReleaseComputeInstanceEngines_IMPL
4375 (
4376     OBJGPU *pGpu,
4377     KernelMIGManager *pKernelMIGManager,
4378     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance,
4379     MIG_COMPUTE_INSTANCE *pMIGComputeInstance
4380 )
4381 {
4382     RM_ENGINE_TYPE globalEngineType;
4383     RM_ENGINE_TYPE localEngineType;
4384     ENGTYPE_BIT_VECTOR *pGlobalMask;
4385     ENGTYPE_BIT_VECTOR *pLocalMask;
4386 
4387     NV_ASSERT_OR_RETURN_VOID(pKernelMIGGpuInstance != NULL);
4388     NV_ASSERT_OR_RETURN_VOID(pMIGComputeInstance != NULL);
4389 
4390     pGlobalMask = &pKernelMIGGpuInstance->resourceAllocation.engines;
4391     pLocalMask = &pKernelMIGGpuInstance->resourceAllocation.localEngines;
4392 
4393     // Iterate over both global/local masks at the same time
4394     FOR_EACH_IN_BITVECTOR_PAIR(pGlobalMask, globalEngineType, pLocalMask, localEngineType)
4395     {
4396         NvU32 CIIdx;
4397 
4398         // Skip anything not owned by this compute instance
4399         if (!bitVectorTest(&pMIGComputeInstance->resourceAllocation.engines, localEngineType))
4400             continue;
4401 
4402         //
4403         // Clear this engine from the exclusive ownership mask. If it was being
4404         // shared, it already isn't in the exclusive ownership mask, so doing
4405         // this for all engines in this compute instance isn't harmful.
4406         //
4407         bitVectorClr(&pKernelMIGGpuInstance->exclusiveEngMask, globalEngineType);
4408 
4409         // If this engine was exclusively owned, nothing else to do
4410         if (!bitVectorTest(&pKernelMIGGpuInstance->sharedEngMask, globalEngineType))
4411             continue;
4412 
4413         // Determine if any other compute instance owns this engine
4414         for (CIIdx = 0;
4415              CIIdx < NV_ARRAY_ELEMENTS(pKernelMIGGpuInstance->MIGComputeInstance);
4416              ++CIIdx)
4417         {
4418             if (!pKernelMIGGpuInstance->MIGComputeInstance[CIIdx].bValid)
4419                 continue;
4420 
4421             if (bitVectorTest(&pKernelMIGGpuInstance->MIGComputeInstance[CIIdx].resourceAllocation.engines,
4422                               localEngineType))
4423             {
4424                 break;
4425             }
4426         }
4427 
4428         // If engine is still owned by someone, don't mark it unused
4429         if (CIIdx < NV_ARRAY_ELEMENTS(pKernelMIGGpuInstance->MIGComputeInstance))
4430             continue;
4431 
4432         // mark this engine as no longer being shared by anyone
4433         bitVectorClr(&pKernelMIGGpuInstance->sharedEngMask, globalEngineType);
4434     }
4435     FOR_EACH_IN_BITVECTOR_PAIR_END();
4436 }
4437 
4438 /*!
4439  * @brief   Function to delete Compute Instance
4440  *
4441  * @param[IN]  pGpu
4442  * @param[IN]  pKernelMIGManager
4443  * @param[IN]  pKernelMIGGpuInstance
4444  * @param[IN]  CIID                  Compute Instance ID
4445  * @param[IN]  bUnload               NV_TRUE if called during gpu state unload path
4446  */
4447 NV_STATUS
4448 kmigmgrDeleteComputeInstance_IMPL
4449 (
4450     OBJGPU *pGpu,
4451     KernelMIGManager *pKernelMIGManager,
4452     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance,
4453     NvU32 CIID,
4454     NvBool bUnload
4455 )
4456 {
4457     MIG_COMPUTE_INSTANCE *pMIGComputeInstance;
4458     MIG_RESOURCE_ALLOCATION *pComputeResourceAllocation;
4459     ENGTYPE_BIT_VECTOR grEngines;
4460     NvU32 swizzId;
4461     KMIGMGR_CONFIGURE_INSTANCE_REQUEST *pConfigRequestPerCi;
4462     NvU32 updateEngMask;
4463     NV_STATUS status = NV_OK;
4464 
4465     NV_ASSERT_OR_RETURN(pKernelMIGGpuInstance != NULL, NV_ERR_INVALID_ARGUMENT);
4466     NV_ASSERT_OR_RETURN(CIID < NV_ARRAY_ELEMENTS(pKernelMIGGpuInstance->MIGComputeInstance),
4467                         NV_ERR_INVALID_ARGUMENT);
4468 
4469     // Make sure that the targeted compute instance is still valid
4470     NV_CHECK_OR_RETURN(LEVEL_SILENT,
4471                        pKernelMIGGpuInstance->MIGComputeInstance[CIID].bValid,
4472                        NV_WARN_NOTHING_TO_DO);
4473 
4474     pMIGComputeInstance = &pKernelMIGGpuInstance->MIGComputeInstance[CIID];
4475     pComputeResourceAllocation = &pMIGComputeInstance->resourceAllocation;
4476 
4477     //
4478     // Initial refCount is increased to "1" when instance is created and then
4479     // every subscription by a client should increase the refcount
4480     //
4481     if ((pMIGComputeInstance->pShare != NULL) &&
4482         (serverGetShareRefCount(&g_resServ, pMIGComputeInstance->pShare) > 2))
4483     {
4484         NV_PRINTF(LEVEL_ERROR,
4485                   "Compute Instance with id - %d still in use by other clients\n",
4486                   CIID);
4487 
4488         return NV_ERR_STATE_IN_USE;
4489     }
4490 
4491     if (!bUnload)
4492     {
4493         //
4494         // Unregister instance from the capability framework only if
4495         // it is explicitly destroyed i.e. not during GPU state unload path.
4496         //
4497         // Note that the saved instance persistent state will be freed by
4498         // _gpumgrUnregisterRmCapsForMIGCI during driver unload.
4499         //
4500         osRmCapUnregister(&pMIGComputeInstance->pOsRmCaps);
4501     }
4502 
4503     // Deconfigure the GR engine for this compute instance
4504     swizzId = pKernelMIGGpuInstance->swizzId;
4505 
4506     pConfigRequestPerCi = portMemAllocStackOrHeap(sizeof(*pConfigRequestPerCi) * KMIGMGR_MAX_COMPUTE_INSTANCES);
4507     NV_ASSERT_OR_RETURN(pConfigRequestPerCi != NULL, NV_ERR_NO_MEMORY);
4508 
4509     portMemSet(pConfigRequestPerCi, 0x0, sizeof(*pConfigRequestPerCi) * KMIGMGR_MAX_COMPUTE_INSTANCES);
4510 
4511     bitVectorClrAll(&grEngines);
4512     bitVectorSetRange(&grEngines, RM_ENGINE_RANGE_GR());
4513     bitVectorAnd(&grEngines, &grEngines, &pComputeResourceAllocation->engines);
4514     NV_ASSERT_OR_ELSE(!bitVectorTestAllCleared(&grEngines), status = NV_ERR_INVALID_STATE; goto done;);
4515     updateEngMask = NVBIT32(RM_ENGINE_TYPE_GR_IDX(bitVectorCountTrailingZeros(&grEngines)));
4516     NV_ASSERT_OK_OR_GOTO(status,
4517         kmigmgrConfigureGPUInstance(pGpu, pKernelMIGManager, swizzId, pConfigRequestPerCi, updateEngMask),
4518         done);
4519 
4520     {
4521         RM_ENGINE_TYPE globalRmEngType;
4522         MIG_INSTANCE_REF ref = kmigmgrMakeCIReference(pKernelMIGGpuInstance, pMIGComputeInstance);
4523         NV_ASSERT_OK_OR_GOTO(status,
4524             kmigmgrGetLocalToGlobalEngineType(pGpu, pKernelMIGManager, ref,
4525                                               RM_ENGINE_TYPE_GR(0),
4526                                               &globalRmEngType),
4527             done);
4528 
4529         // Free up the internal handles for this compute instance
4530         kmigmgrFreeComputeInstanceHandles(pGpu, pKernelMIGManager, pKernelMIGGpuInstance, pMIGComputeInstance);
4531 
4532         fecsSetRoutingInfo(pGpu,
4533                            GPU_GET_KERNEL_GRAPHICS(pGpu, RM_ENGINE_TYPE_GR_IDX(globalRmEngType)),
4534                            pKernelMIGGpuInstance->instanceHandles.hClient,
4535                            pKernelMIGGpuInstance->instanceHandles.hSubdevice,
4536                            RM_ENGINE_TYPE_GR_IDX(bitVectorCountTrailingZeros(&grEngines)));
4537 
4538         if (pMIGComputeInstance->pShare != NULL)
4539         {
4540             serverFreeShare(&g_resServ, pMIGComputeInstance->pShare);
4541             pMIGComputeInstance->pShare = NULL;
4542         }
4543     }
4544 
4545     // Mark this compute instance as invalid
4546     pMIGComputeInstance->bValid = NV_FALSE;
4547 
4548     // Release this compute instance's engines
4549     kmigmgrReleaseComputeInstanceEngines(pGpu, pKernelMIGManager, pKernelMIGGpuInstance, pMIGComputeInstance);
4550 
4551     // Now that we no longer need it, clear the shared engine flag
4552     pMIGComputeInstance->sharedEngFlag = 0x0;
4553     pMIGComputeInstance->id = KMIGMGR_COMPUTE_INSTANCE_ID_INVALID;
4554 
4555     pMIGComputeInstance->pOsRmCaps = NULL;
4556 
4557 done:
4558     portMemFreeStackOrHeap(pConfigRequestPerCi);
4559 
4560     return status;
4561 }
4562 
4563 /*!
4564  * @brief print out the CI configuration of this GI
4565  */
4566 static void
4567 _kmigmgrPrintComputeInstances
4568 (
4569     OBJGPU *pGpu,
4570     KernelMIGManager *pKernelMIGManager,
4571     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance
4572 )
4573 {
4574 #if NV_PRINTF_LEVEL_ENABLED(LEVEL_INFO)
4575 #define PADDING_STR "----------------------------------------------------"
4576     RM_ENGINE_TYPE rmEngineType;
4577     NvU32 CIIdx;
4578 
4579     NV_PRINTF(LEVEL_INFO, "\n");
4580     NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR);
4581     NV_PRINTF(LEVEL_INFO, "| %14s | %14s | %14s |\n",
4582               "SwizzId",
4583               "GR Count",
4584               "Gpc Count");
4585     NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR);
4586     NV_PRINTF(LEVEL_INFO, "| %14d | %14d | %14d |\n",
4587               pKernelMIGGpuInstance->swizzId,
4588               kmigmgrCountEnginesOfType(&pKernelMIGGpuInstance->resourceAllocation.engines, RM_ENGINE_TYPE_GR(0)),
4589               pKernelMIGGpuInstance->resourceAllocation.gpcCount);
4590 
4591     for (CIIdx = 0;
4592          CIIdx < NV_ARRAY_ELEMENTS(pKernelMIGGpuInstance->MIGComputeInstance);
4593          ++CIIdx)
4594     {
4595         MIG_RESOURCE_ALLOCATION *pComputeResourceAllocation;
4596 
4597         if (!pKernelMIGGpuInstance->MIGComputeInstance[CIIdx].bValid)
4598         {
4599             continue;
4600         }
4601 
4602         pComputeResourceAllocation = &pKernelMIGGpuInstance->MIGComputeInstance[CIIdx].resourceAllocation;
4603 
4604         NV_ASSERT_OK(
4605             kmigmgrEngineTypeXlate(&pComputeResourceAllocation->localEngines, RM_ENGINE_TYPE_GR(0),
4606                                    &pComputeResourceAllocation->engines, &rmEngineType));
4607 
4608         NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR);
4609         if (IS_GSP_CLIENT(pGpu))
4610         {
4611             NvU32 gpcIdx;
4612             NvU32 gpcMask = 0x0;
4613 
4614             for (gpcIdx = 0; gpcIdx < pComputeResourceAllocation->gpcCount; ++gpcIdx)
4615             {
4616                  gpcMask |= NVBIT32(pComputeResourceAllocation->gpcIds[gpcIdx]);
4617             }
4618             NV_PRINTF(LEVEL_INFO, "| %23s | %23s |\n",
4619                       "Gr Engine IDX",
4620                       "GPC Mask");
4621             NV_PRINTF(LEVEL_INFO, "| %23d | %23X |\n",
4622                       RM_ENGINE_TYPE_GR_IDX(rmEngineType),
4623                       gpcMask);
4624         }
4625         else
4626         {
4627             // gpcMask is not meaningful in VGPU, thus only printing gpcCount
4628             NV_PRINTF(LEVEL_INFO, "| %23s | %23s |\n",
4629                       "Gr Engine IDX",
4630                       "GPC Count");
4631             NV_PRINTF(LEVEL_INFO, "| %23d | %23X |\n",
4632                       RM_ENGINE_TYPE_GR_IDX(rmEngineType),
4633                       pComputeResourceAllocation->gpcCount);
4634         }
4635     }
4636     NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR);
4637 
4638 #undef PADDING_STR
4639 #endif // NV_PRINTF_LEVEL_ENABLED(LEVEL_INFO)
4640 }
4641 
4642 /*!
4643  * @brief   Function to configure a specific GPU instance by setting available
4644  *          GPCs with requested GR Engines
4645  *
4646  * @param[IN]   pGpu
4647  * @param[IN}   pKernelMIGManager
4648  * @param[OUT]  swizzId             SwizzId for this GPU instance
4649  * @param[IN]   pGpcCountPerGr      Requested num GPCs for every GR engine in
4650  *                                  this instance
4651  * @param[IN]   updateEngMask       Entry valid flag for each engine in instance
4652  *
4653  * @return  Returns NV_STATUS
4654  *          NV_OK
4655  *          NV_ERR_INVALID_ARGUMENT
4656  *          NV_WARN_NOTHING_TO_DO
4657  *          NV_ERR_INSUFFICIENT_RESOURCES
4658  */
4659 NV_STATUS
4660 kmigmgrConfigureGPUInstance_IMPL
4661 (
4662     OBJGPU *pGpu,
4663     KernelMIGManager *pKernelMIGManager,
4664     NvU32 swizzId,
4665     const KMIGMGR_CONFIGURE_INSTANCE_REQUEST *pConfigRequestsPerCi,
4666     NvU32 updateEngMask
4667 )
4668 {
4669     KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu);
4670     NV_STATUS status = NV_OK;
4671     NvU32 i;
4672     NvU32 j;
4673     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance = NULL;
4674     NvBool bAssigning;
4675     RM_ENGINE_TYPE checkGrs[RM_ENGINE_TYPE_GR_SIZE];
4676     NvU32 checkGrCount = 0;
4677     RM_ENGINE_TYPE rmEngineType;
4678     KernelGraphicsManager *pKernelGraphicsManager = GPU_GET_KERNEL_GRAPHICS_MANAGER(pGpu);
4679     NvBool bIsCTSRequired = kmigmgrIsCTSAlignmentRequired_HAL(pGpu, pKernelMIGManager);
4680     NvU32 localIdx;
4681 
4682     // Sanity check the GPU instance requested to be configured
4683     if (!kmigmgrIsSwizzIdInUse(pGpu, pKernelMIGManager, swizzId))
4684     {
4685         NV_PRINTF(LEVEL_ERROR, "Invalid swizzId - %d.\n", swizzId);
4686         return NV_ERR_INVALID_ARGUMENT;
4687     }
4688 
4689     status = kmigmgrGetGPUInstanceInfo(pGpu, pKernelMIGManager, swizzId, &pKernelMIGGpuInstance);
4690     NV_CHECK_OR_RETURN(LEVEL_SILENT, status == NV_OK, status);
4691 
4692     bAssigning = NV_FALSE;
4693     portMemSet(checkGrs, 0, sizeof(checkGrs));
4694 
4695     i = 0;
4696     localIdx = 0;
4697     FOR_EACH_IN_BITVECTOR(&pKernelMIGGpuInstance->resourceAllocation.engines, rmEngineType)
4698     {
4699         NvU32 engineIdx;
4700         if (!RM_ENGINE_TYPE_IS_GR(rmEngineType))
4701             continue;
4702 
4703         engineIdx = RM_ENGINE_TYPE_GR_IDX(rmEngineType);
4704 
4705         // Skip over invalid entries
4706         if (!(updateEngMask & NVBIT32(i)))
4707         {
4708             i++;
4709             continue;
4710         }
4711 
4712         // Resource checks are verified by CTS ID assignment when required, else use GPC count
4713         if (bIsCTSRequired)
4714         {
4715             NV_CHECK_OR_RETURN(LEVEL_ERROR,
4716                 pConfigRequestsPerCi[localIdx].ctsId != KMIGMGR_CTSID_INVALID,
4717                 NV_ERR_INSUFFICIENT_RESOURCES);
4718         }
4719         else
4720         {
4721             // Make sure no requested GPC count is greater than instance GPC count
4722             if (pConfigRequestsPerCi[localIdx].profile.gpcCount > pKernelMIGGpuInstance->resourceAllocation.gpcCount)
4723             {
4724                  NV_PRINTF(LEVEL_ERROR,
4725                            "Invalid GPC count - %d requested for GrIdx - %d.\n",
4726                            pConfigRequestsPerCi[localIdx].profile.gpcCount,
4727                            engineIdx);
4728                  return NV_ERR_INVALID_ARGUMENT;
4729             }
4730         }
4731 
4732         bAssigning = bAssigning || pConfigRequestsPerCi[localIdx].profile.gpcCount > 0;
4733         checkGrs[checkGrCount++] = rmEngineType;
4734 
4735         localIdx++;
4736         i++;
4737     }
4738     FOR_EACH_IN_BITVECTOR_END();
4739 
4740     //
4741     // Return an error if there are any channels on any engines targeted by this
4742     // request
4743     //
4744     NV_CHECK_OR_RETURN(LEVEL_SILENT,
4745                        !kfifoEngineListHasChannel(pGpu, pKernelFifo, checkGrs, checkGrCount),
4746                        NV_ERR_STATE_IN_USE);
4747 
4748     if (!bAssigning)
4749     {
4750         // Invalidate targeted engines
4751         i = 0;
4752         FOR_EACH_IN_BITVECTOR(&pKernelMIGGpuInstance->resourceAllocation.engines, rmEngineType)
4753         {
4754             NvU32 engineIdx;
4755 
4756             if (!RM_ENGINE_TYPE_IS_GR(rmEngineType))
4757                 continue;
4758 
4759             engineIdx = RM_ENGINE_TYPE_GR_IDX(rmEngineType);
4760 
4761             if (updateEngMask & NVBIT32(i))
4762             {
4763                 NV_ASSERT_OK_OR_RETURN(
4764                     kmigmgrInvalidateGr(pGpu, pKernelMIGManager, pKernelMIGGpuInstance, engineIdx));
4765             }
4766 
4767             i++;
4768         }
4769         FOR_EACH_IN_BITVECTOR_END();
4770 
4771         return NV_OK;
4772     }
4773 
4774     //
4775     // Client passes the logical GR-IDs while RM works with physical GR-IDs
4776     // Walk the list of physical GRs associated with this GPU instance and then
4777     // set GPCs as requested
4778     //
4779     i = 0;
4780     localIdx = 0;
4781     FOR_EACH_IN_BITVECTOR(&pKernelMIGGpuInstance->resourceAllocation.engines, rmEngineType)
4782     {
4783         NvU32 engineIdx;
4784         NvU32 gpcCount = pConfigRequestsPerCi[localIdx].profile.gpcCount;
4785 
4786         if (!RM_ENGINE_TYPE_IS_GR(rmEngineType))
4787             continue;
4788 
4789         engineIdx = RM_ENGINE_TYPE_GR_IDX(rmEngineType);
4790 
4791         if (!(updateEngMask & NVBIT32(i)))
4792         {
4793             i++;
4794             continue;
4795         }
4796 
4797         if (gpcCount == 0)
4798         {
4799             localIdx++;
4800             i++;
4801             continue;
4802         }
4803 
4804         // Update the GR to VEID mapping
4805         NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
4806             kgrmgrAllocVeidsForGrIdx(pGpu,
4807                                      pKernelGraphicsManager,
4808                                      engineIdx,
4809                                      pConfigRequestsPerCi[localIdx].veidSpanStart,
4810                                      pConfigRequestsPerCi[localIdx].profile.veidCount,
4811                                      pKernelMIGGpuInstance),
4812             cleanup);
4813 
4814         i++;
4815     }
4816     FOR_EACH_IN_BITVECTOR_END();
4817 
4818     _kmigmgrPrintComputeInstances(pGpu, pKernelMIGManager, pKernelMIGGpuInstance);
4819 
4820     i = 0;
4821     localIdx = 0;
4822     FOR_EACH_IN_BITVECTOR(&pKernelMIGGpuInstance->resourceAllocation.engines, rmEngineType)
4823     {
4824         NvU32 engineIdx;
4825         NvU32 gpcCount = pConfigRequestsPerCi[localIdx].profile.gpcCount;
4826         KernelGraphics *pKGr;
4827 
4828         if (!RM_ENGINE_TYPE_IS_GR(rmEngineType))
4829             continue;
4830 
4831         engineIdx = RM_ENGINE_TYPE_GR_IDX(rmEngineType);
4832 
4833         if (!(updateEngMask & NVBIT32(i)))
4834         {
4835             i++;
4836             continue;
4837         }
4838 
4839         if (gpcCount == 0)
4840         {
4841             localIdx++;
4842             i++;
4843             continue;
4844         }
4845 
4846         if (bIsCTSRequired)
4847             kmigmgrSetCTSIdInUse(pKernelMIGGpuInstance, pConfigRequestsPerCi[localIdx].ctsId, engineIdx, NV_TRUE);
4848 
4849         pKGr = GPU_GET_KERNEL_GRAPHICS(pGpu, engineIdx);
4850         // Re-pull public static data for kernel graphics
4851         status = kgraphicsLoadStaticInfo_HAL(pGpu, pKGr, pKernelMIGGpuInstance->swizzId);
4852         if (status != NV_OK)
4853             goto cleanup;
4854 
4855         // record sizes of local GR ctx buffers for this GR
4856         status = kgrmgrDiscoverMaxLocalCtxBufInfo(pGpu, pKernelGraphicsManager, pKGr, swizzId);
4857         if (status != NV_OK)
4858             goto cleanup;
4859 
4860         i++;
4861     }
4862     FOR_EACH_IN_BITVECTOR_END();
4863 
4864     return status;
4865 
4866 cleanup:
4867 
4868     j = 0;
4869     FOR_EACH_IN_BITVECTOR(&pKernelMIGGpuInstance->resourceAllocation.engines, rmEngineType)
4870     {
4871         NvU32 engineIdx;
4872 
4873         // Rollback all previous validations
4874         if (j == i)
4875             break;
4876 
4877         if (!RM_ENGINE_TYPE_IS_GR(rmEngineType))
4878             continue;
4879 
4880         engineIdx = RM_ENGINE_TYPE_GR_IDX(rmEngineType);
4881 
4882         if (updateEngMask & NVBIT32(j))
4883         {
4884             NV_PRINTF(LEVEL_ERROR,
4885                       "Failed to configure GPU instance. Invalidating GRID - %d\n",
4886                       engineIdx);
4887 
4888             // Invalidate assignments to this GR, clear global state
4889             kmigmgrInvalidateGr(pGpu, pKernelMIGManager, pKernelMIGGpuInstance, engineIdx);
4890         }
4891 
4892         j++;
4893     }
4894     FOR_EACH_IN_BITVECTOR_END();
4895 
4896     return status;
4897 }
4898 
4899 // invalidate GR to GPC mappings
4900 NV_STATUS
4901 kmigmgrInvalidateGrGpcMapping_IMPL
4902 (
4903     OBJGPU *pGpu,
4904     KernelMIGManager *pKernelMIGManager,
4905     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance,
4906     NvU32 grIdx
4907 )
4908 {
4909     NV_STATUS status = NV_OK;
4910     NvU32 gfid;
4911     NvBool bCallingContextPlugin;
4912     KernelGraphics *pKernelGraphics;
4913 
4914     NV_ASSERT_OK_OR_RETURN(vgpuGetCallingContextGfid(pGpu, &gfid));
4915     NV_ASSERT_OK_OR_RETURN(vgpuIsCallingContextPlugin(pGpu, &bCallingContextPlugin));
4916     if (bCallingContextPlugin)
4917     {
4918         gfid = GPU_GFID_PF;
4919     }
4920 
4921     // Release CTS-ID fields
4922     if (kmigmgrIsCTSAlignmentRequired_HAL(pGpu, pKernelMIGManager))
4923         kmigmgrSetCTSIdInUse(pKernelMIGGpuInstance, KMIGMGR_CTSID_INVALID, grIdx, NV_FALSE);
4924 
4925     // Free global ctx buffers, this will need to be regenerated
4926     pKernelGraphics = GPU_GET_KERNEL_GRAPHICS(pGpu, grIdx);
4927     fecsBufferTeardown(pGpu, pKernelGraphics);
4928     kgraphicsFreeGlobalCtxBuffers(pGpu, pKernelGraphics, gfid);
4929 
4930     // clear cached ctx buf sizes
4931     kgraphicsClearCtxBufferInfo(pGpu, pKernelGraphics);
4932 
4933     return status;
4934 }
4935 
4936 // invalidate a GR engine
4937 NV_STATUS
4938 kmigmgrInvalidateGr_IMPL
4939 (
4940     OBJGPU *pGpu,
4941     KernelMIGManager *pKernelMIGManager,
4942     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance,
4943     NvU32 grIdx
4944 )
4945 {
4946     KernelGraphics *pKGr = GPU_GET_KERNEL_GRAPHICS(pGpu, grIdx);
4947     KernelGraphicsManager *pKernelGraphicsManager = GPU_GET_KERNEL_GRAPHICS_MANAGER(pGpu);
4948 
4949     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
4950         kmigmgrInvalidateGrGpcMapping(pGpu, pKernelMIGManager, pKernelMIGGpuInstance, grIdx));
4951 
4952     kgrmgrClearVeidsForGrIdx(pGpu, pKernelGraphicsManager, grIdx);
4953 
4954     kgraphicsInvalidateStaticInfo(pGpu, pKGr);
4955     return NV_OK;
4956 }
4957 
4958 /*!
4959  * @brief   Function to invalidate a gpu instance
4960  *
4961  * @param[IN]   pGpu
4962  * @param[IN]   pKernelMIGManager
4963  * @param[IN]   swizzId             swizzId which is getting invalidated
4964  * @param[IN]   bUnload             NV_TRUE if called from gpu state unload path
4965  *
4966  * @return  Returns NV_STATUS
4967  *          NV_OK
4968  *          NV_ERR_INVALID_ARGUMENT   No GPC associated with Gr
4969  */
4970 NV_STATUS
4971 kmigmgrInvalidateGPUInstance_IMPL
4972 (
4973     OBJGPU *pGpu,
4974     KernelMIGManager *pKernelMIGManager,
4975     NvU32 swizzId,
4976     NvBool bUnload
4977 )
4978 {
4979     NV_STATUS rmStatus = NV_OK;
4980     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
4981     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance = NULL;
4982     NvU32 i;
4983     RM_ENGINE_TYPE rmEngineType;
4984     KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
4985 
4986     // Sanity checks
4987     rmStatus = kmigmgrGetGPUInstanceInfo(pGpu, pKernelMIGManager, swizzId, &pKernelMIGGpuInstance);
4988     if (rmStatus != NV_OK)
4989     {
4990         // Didn't find requested gpu instance
4991         NV_PRINTF(LEVEL_ERROR, "No valid gpu instance with SwizzId - %d found\n",
4992                   swizzId);
4993         return rmStatus;
4994     }
4995 
4996     // Make sure that no client is using this gpu instance
4997     if (!kmigmgrIsGPUInstanceReadyToBeDestroyed(pKernelMIGGpuInstance))
4998     {
4999         NV_PRINTF(LEVEL_ERROR,
5000                   "Gpu instance with SwizzId - %d still in use by other clients\n",
5001                   swizzId);
5002 
5003         kmigmgrPrintSubscribingClients(pGpu, pKernelMIGManager, swizzId);
5004         return NV_ERR_STATE_IN_USE;
5005     }
5006 
5007     for (i = 0; i < NV_ARRAY_ELEMENTS(pKernelMIGGpuInstance->MIGComputeInstance); ++i)
5008     {
5009         if (pKernelMIGGpuInstance->MIGComputeInstance[i].bValid)
5010         {
5011             NV_PRINTF(LEVEL_ERROR,
5012                       "Cannot destroy gpu instance %u with valid compute instance %d \n",
5013                       swizzId, i);
5014 
5015             return NV_ERR_STATE_IN_USE;
5016         }
5017     }
5018 
5019     NV_PRINTF(LEVEL_INFO, "FREEING GPU INSTANCE\n");
5020     kmigmgrPrintGPUInstanceInfo(pGpu, pKernelMIGManager, pKernelMIGGpuInstance);
5021 
5022     if (!bUnload)
5023     {
5024         //
5025         // Unregister gpu instance from the capability framework only if
5026         // it is explicitly destroyed i.e. not during GPU state unload path.
5027         //
5028         // Note that the saved gpu instance persistent state will be freed by
5029         // _gpumgrUnregisterRmCapsForSmcPartitions during driver unload.
5030         //
5031         osRmCapUnregister(&pKernelMIGGpuInstance->pOsRmCaps);
5032     }
5033 
5034     // Remove GR->GPC mappings in GPU instance Info
5035     FOR_EACH_IN_BITVECTOR(&pKernelMIGGpuInstance->resourceAllocation.engines, rmEngineType)
5036     {
5037         NvU32 engineIdx;
5038         KernelGraphics *pKernelGraphics;
5039 
5040         if (!RM_ENGINE_TYPE_IS_GR(rmEngineType))
5041             continue;
5042 
5043         engineIdx = RM_ENGINE_TYPE_GR_IDX(rmEngineType);
5044 
5045         NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(rmStatus,
5046             kmigmgrInvalidateGr(pGpu, pKernelMIGManager, pKernelMIGGpuInstance, engineIdx));
5047 
5048         pKernelGraphics = GPU_GET_KERNEL_GRAPHICS(pGpu, engineIdx);
5049         fecsClearRoutingInfo(pGpu, pKernelGraphics);
5050     }
5051     FOR_EACH_IN_BITVECTOR_END();
5052 
5053     // Delete client handle after all GR's are invalidated
5054     kmigmgrFreeGPUInstanceHandles(pKernelMIGGpuInstance);
5055 
5056     NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(rmStatus,
5057         kmigmgrClearEnginesInUse(pGpu, pKernelMIGManager, &pKernelMIGGpuInstance->resourceAllocation.engines));
5058 
5059     // Destroy runlist buffer pools
5060     kmigmgrDestroyGPUInstanceGrBufPools(pGpu, pKernelMIGManager, pKernelMIGGpuInstance);
5061 
5062     if (kmigmgrIsSwizzIdInUse(pGpu, pKernelMIGManager, swizzId))
5063     {
5064         NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(rmStatus,
5065             kmigmgrClearSwizzIdInUse(pGpu, pKernelMIGManager, swizzId));
5066     }
5067 
5068     // Sanity check that requested swizzID is not set in swizzIdMask
5069     NV_ASSERT_OR_ELSE(!(NVBIT64(swizzId) & pKernelMIGManager->swizzIdInUseMask), rmStatus = NV_ERR_INVALID_STATE);
5070 
5071     NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(rmStatus,
5072         kmemsysInitMIGMemoryPartitionTable_HAL(pGpu, pKernelMemorySystem));
5073 
5074     // Destroy gpu instance scrubber
5075     kmigmgrDestroyGPUInstanceScrubber(pGpu, pKernelMIGManager, pKernelMIGGpuInstance);
5076 
5077     // Destroy gpu instance pool for page table mem
5078     kmigmgrDestroyGPUInstancePool(pGpu, pKernelMIGManager, pKernelMIGGpuInstance);
5079 
5080     // Delete gpu instance engine runlists
5081     NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(rmStatus,
5082         kmigmgrDeleteGPUInstanceRunlists_HAL(pGpu, pKernelMIGManager, pKernelMIGGpuInstance));
5083 
5084     // Destroy runlist buffer pools
5085     kmigmgrDestroyGPUInstanceRunlistBufPools(pGpu, pKernelMIGManager, pKernelMIGGpuInstance);
5086 
5087     // Free gpu instance memory
5088     NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(rmStatus,
5089         memmgrFreeMIGGPUInstanceMemory(pGpu, pMemoryManager, swizzId, pKernelMIGGpuInstance->hMemory, &pKernelMIGGpuInstance->pMemoryPartitionHeap));
5090 
5091     if (pKernelMIGGpuInstance->pShare != NULL)
5092     {
5093         serverFreeShare(&g_resServ, pKernelMIGGpuInstance->pShare);
5094         pKernelMIGGpuInstance->pShare = NULL;
5095     }
5096 
5097     // Initialize gpu instance info to initial value
5098     kmigmgrInitGPUInstanceInfo(pGpu, pKernelMIGManager, pKernelMIGGpuInstance);
5099 
5100     return rmStatus;
5101 }
5102 
5103 /*!
5104  * @brief   Init gpu instance scrubber
5105  */
5106 NV_STATUS
5107 kmigmgrInitGPUInstanceScrubber_IMPL
5108 (
5109     OBJGPU *pGpu,
5110     KernelMIGManager *pKernelMIGManager,
5111     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance
5112 )
5113 {
5114     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
5115 
5116     if (!IsSLIEnabled(pGpu) &&
5117          memmgrIsScrubOnFreeEnabled(pMemoryManager) &&
5118          memmgrIsPmaInitialized(pMemoryManager))
5119     {
5120         NV_ASSERT_OK_OR_RETURN(scrubberConstruct(pGpu, pKernelMIGGpuInstance->pMemoryPartitionHeap));
5121         pKernelMIGGpuInstance->bMemoryPartitionScrubberInitialized = NV_TRUE;
5122     }
5123 
5124     return NV_OK;
5125 }
5126 
5127 /*!
5128  * @brief   Destroy gpu instance scrubber
5129  */
5130 void
5131 kmigmgrDestroyGPUInstanceScrubber_IMPL
5132 (
5133     OBJGPU *pGpu,
5134     KernelMIGManager *pKernelMIGManager,
5135     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance
5136 )
5137 {
5138     OBJMEMSCRUB *pMemscrub = NULL;
5139     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
5140 
5141     if (!pKernelMIGGpuInstance->bMemoryPartitionScrubberInitialized)
5142         return;
5143 
5144     if (!IsSLIEnabled(pGpu) &&
5145          memmgrIsScrubOnFreeEnabled(pMemoryManager) &&
5146          memmgrIsPmaInitialized(pMemoryManager))
5147     {
5148         pMemscrub = pKernelMIGGpuInstance->pMemoryPartitionHeap->pmaObject.pScrubObj;
5149         scrubberDestruct(pGpu, pKernelMIGGpuInstance->pMemoryPartitionHeap, pMemscrub);
5150         pKernelMIGGpuInstance->bMemoryPartitionScrubberInitialized = NV_FALSE;
5151     }
5152 }
5153 
5154 /*!
5155  * @brief   Releases GR buffer memory back from global buffer pools and destroys
5156  *          these pools for all GR engines that belong to this gpu instance.
5157  */
5158 void
5159 kmigmgrDestroyGPUInstanceGrBufPools_IMPL
5160 (
5161     OBJGPU *pGpu,
5162     KernelMIGManager *pKernelMIGManager,
5163     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance
5164 )
5165 {
5166     RM_ENGINE_TYPE rmEngineType;
5167 
5168     if (!ctxBufPoolIsSupported(pGpu))
5169         return;
5170 
5171     NV_ASSERT(pKernelMIGGpuInstance != NULL);
5172 
5173     FOR_EACH_IN_BITVECTOR(&pKernelMIGGpuInstance->resourceAllocation.engines, rmEngineType)
5174     {
5175         NvU32 engineIdx;
5176         KernelGraphics *pKernelGraphics;
5177 
5178         if (!RM_ENGINE_TYPE_IS_GR(rmEngineType))
5179             continue;
5180 
5181         engineIdx = RM_ENGINE_TYPE_GR_IDX(rmEngineType);
5182         pKernelGraphics = GPU_GET_KERNEL_GRAPHICS(pGpu, engineIdx);
5183 
5184         kgraphicsDestroyCtxBufPool(pGpu, pKernelGraphics);
5185     }
5186     FOR_EACH_IN_BITVECTOR_END();
5187 }
5188 
5189 /*!
5190  * @brief   Destroy per-gpu instance memory pool for client page tables
5191  */
5192 void
5193 kmigmgrDestroyGPUInstancePool_IMPL
5194 (
5195     OBJGPU *pGpu,
5196     KernelMIGManager *pKernelMIGManager,
5197     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance
5198 )
5199 {
5200     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
5201 
5202     if (!memmgrIsPmaInitialized(pMemoryManager) ||
5203         !memmgrAreClientPageTablesPmaManaged(pMemoryManager))
5204     {
5205         NV_ASSERT_OR_GOTO((pKernelMIGGpuInstance->pPageTableMemPool == NULL), destroy_pool);
5206         return;
5207     }
5208 
5209     if (!kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, pKernelMIGGpuInstance->swizzId))
5210     {
5211         NV_ASSERT_OR_GOTO((pKernelMIGGpuInstance->pPageTableMemPool == NULL), destroy_pool);
5212         return;
5213     }
5214 
5215     if (pKernelMIGGpuInstance->pPageTableMemPool == NULL)
5216     {
5217         NV_PRINTF(LEVEL_INFO, "page table memory pool not setup\n");
5218         return;
5219     }
5220 
5221 destroy_pool:
5222     rmMemPoolDestroy(pKernelMIGGpuInstance->pPageTableMemPool);
5223     pKernelMIGGpuInstance->pPageTableMemPool = NULL;
5224 }
5225 
5226 /*!
5227  * @brief   Releases runlist buffer memory back from runlist buffer pools and destroys the
5228  *          runlist buffer pools for engines that belong to these gpu instance.
5229  */
5230 void
5231 kmigmgrDestroyGPUInstanceRunlistBufPools_IMPL
5232 (
5233     OBJGPU *pGpu,
5234     KernelMIGManager *pKernelMIGManager,
5235     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance
5236 )
5237 {
5238     RM_ENGINE_TYPE rmEngineType;
5239     KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu);
5240 
5241     if (!kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, pKernelMIGGpuInstance->swizzId))
5242         return;
5243 
5244     if (!ctxBufPoolIsSupported(pGpu))
5245         return;
5246 
5247     for (rmEngineType = 0; rmEngineType < RM_ENGINE_TYPE_LAST; rmEngineType++)
5248     {
5249         if (!RM_ENGINE_TYPE_IS_VALID(rmEngineType) ||
5250             !kmigmgrIsEnginePartitionable(pGpu, pKernelMIGManager, rmEngineType) ||
5251             !kmigmgrIsEngineInInstance(pGpu, pKernelMIGManager, rmEngineType, kmigmgrMakeGIReference(pKernelMIGGpuInstance)))
5252         {
5253             continue;
5254         }
5255 
5256         if (pKernelFifo->pRunlistBufPool[rmEngineType] != NULL)
5257         {
5258             ctxBufPoolRelease(pKernelFifo->pRunlistBufPool[rmEngineType]);
5259             ctxBufPoolDestroy(&pKernelFifo->pRunlistBufPool[rmEngineType]);
5260         }
5261     }
5262 }
5263 
5264 /*!
5265  * @brief   Print out clients subscribing to specified gpu instance
5266  */
5267 void
5268 kmigmgrPrintSubscribingClients_IMPL
5269 (
5270     OBJGPU *pGpu,
5271     KernelMIGManager *pKernelMIGManager,
5272     NvU32 swizzId
5273 )
5274 {
5275     RmClient **ppClient;
5276     for (ppClient = serverutilGetFirstClientUnderLock();
5277          ppClient != NULL;
5278          ppClient = serverutilGetNextClientUnderLock(ppClient))
5279     {
5280         RmClient *pClient = *ppClient;
5281         RsClient *pRsClient = staticCast(pClient, RsClient);
5282         NvHandle hClient = pRsClient->hClient;
5283         MIG_INSTANCE_REF ref;
5284         RS_PRIV_LEVEL privLevel = rmclientGetCachedPrivilege(pClient);
5285 
5286         NV_STATUS status = kmigmgrGetInstanceRefFromClient(pGpu, pKernelMIGManager,
5287                                                            hClient,
5288                                                            &ref);
5289 
5290         if (status != NV_OK)
5291             continue;
5292 
5293         if (ref.pKernelMIGGpuInstance->swizzId != swizzId)
5294             continue;
5295 
5296         (void)privLevel;
5297         NV_PRINTF(LEVEL_INFO, "%s client %x currently subscribed to swizzId %u\n",
5298                   (privLevel >= RS_PRIV_LEVEL_KERNEL) ? "Kernel" : "Usermode",
5299                   hClient, swizzId);
5300     }
5301 }
5302 
5303 /*!
5304  * @brief   Function to enable/disable MIG mode
5305  *
5306  * @param[IN]   pGpu
5307  * @param[IN]   pKernelMIGManager
5308  * @param[IN]   bMemoryPartitioningNeeded   Is Memory partitioning required?
5309  * @param[IN]   bEnable                     Enable/Disable MIG
5310  * @param[IN]   bUnload                     RM unload path
5311  *
5312  * @return  Returns NV_STATUS
5313  *          NV_OK
5314  *          NV_WARN_NOTHING_TO_DO
5315  *          NV_ERR_INVALID_STATE
5316  */
5317 NV_STATUS
5318 kmigmgrSetMIGState_VF
5319 (
5320     OBJGPU *pGpu,
5321     KernelMIGManager *pKernelMIGManager,
5322     NvBool bMemoryPartitioningNeeded,
5323     NvBool bEnable,
5324     NvBool bUnload
5325 )
5326 {
5327     if (bEnable)
5328     {
5329         KernelGraphics *pKGr = GPU_GET_KERNEL_GRAPHICS(pGpu, 0);
5330 
5331         kgraphicsInvalidateStaticInfo(pGpu, pKGr);
5332     }
5333 
5334     return NV_OK;
5335 }
5336 
5337 /*!
5338  * @brief   Function to enable/disable MIG mode
5339  *
5340  * @param[IN]   pGpu
5341  * @param[IN]   pKernelMIGManager
5342  * @param[IN]   bMemoryPartitioningNeeded   Is Memory partitioning required?
5343  * @param[IN]   bEnable                     Enable/Disable MIG
5344  * @param[IN]   bUnload                     RM unload path
5345  *
5346  * @return  Returns NV_STATUS
5347  *          NV_OK
5348  *          NV_WARN_NOTHING_TO_DO
5349  *          NV_ERR_INVALID_STATE
5350  */
5351 NV_STATUS
5352 kmigmgrSetMIGState_FWCLIENT
5353 (
5354     OBJGPU *pGpu,
5355     KernelMIGManager *pKernelMIGManager,
5356     NvBool bMemoryPartitioningNeeded,
5357     NvBool bEnable,
5358     NvBool bUnload
5359 )
5360 {
5361     KernelGraphicsManager *pKernelGraphicsManager = GPU_GET_KERNEL_GRAPHICS_MANAGER(pGpu);
5362     NV_STATUS rmStatus = NV_OK;
5363     KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu);
5364     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
5365     NvBool bPrevMIGState = pKernelMIGManager->bMIGEnabled;
5366 
5367     if (bEnable)
5368     {
5369         KernelGraphics *pKGr = GPU_GET_KERNEL_GRAPHICS(pGpu, 0);
5370 
5371         NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR,
5372             kgrmgrDiscoverMaxGlobalCtxBufSizes(pGpu, pKernelGraphicsManager, pKGr, bMemoryPartitioningNeeded),
5373             done);
5374 
5375         NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR,
5376             kmigmgrDisableWatchdog(pGpu, pKernelMIGManager),
5377             cleanup_disableWatchdog);
5378 
5379         // Before enabling MIG, deconfigure GR0 in legacy mode
5380         kgraphicsInvalidateStaticInfo(pGpu, pKGr);
5381 
5382         //
5383         // Destroy all global ctx buffers, we will need to recreate them in
5384         // partitionable memory later.
5385         //
5386         fecsBufferTeardown(pGpu, pKGr);
5387 
5388         kgraphicsFreeGlobalCtxBuffers(pGpu, pKGr, GPU_GFID_PF);
5389 
5390         //
5391         // Save the pre-MIG top-level scrubber status for later
5392         // Destroy the top level scrubber if it exists
5393         //
5394         NV_ASSERT_OK_OR_GOTO(rmStatus,
5395             memmgrSaveAndDestroyTopLevelScrubber(pGpu, pMemoryManager),
5396             cleanup_destroyTopLevelScrubber);
5397 
5398         //
5399         // Preexisting channel and memory allocation checks should be done after
5400         // all buffers(like global Gr buffers) and pre-created channels(like scrubber, watchdog etc.)
5401         // are destroyed.
5402         //
5403         NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR,
5404             kmigmgrCreateGPUInstanceCheck_HAL(pGpu, pKernelMIGManager, bMemoryPartitioningNeeded),
5405             cleanup_createPartitionCheck);
5406 
5407         // On Nvswitch based systems, suspend gpu fabric probe on nvlink inband
5408         gpuFabricProbeSuspend(pGpu->pGpuFabricProbeInfoKernel);
5409 
5410         // Ensure NVLINK is shutdown before enabling MIG
5411         if (!kmigmgrIsMIGNvlinkP2PSupportOverridden(pGpu, pKernelMIGManager) ||
5412             bMemoryPartitioningNeeded)
5413         {
5414 #if (defined(DEBUG) || defined(DEVELOP))
5415             KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu);
5416 
5417             if (pKernelNvlink != NULL)
5418             {
5419                 NvU32 linkId;
5420 
5421                 //TODO: Remove below code once a more robust SRT is available to test for this condition
5422                 FOR_EACH_INDEX_IN_MASK(32, linkId, pKernelNvlink->enabledLinks)
5423                 {
5424                     NV2080_CTRL_NVLINK_CORE_CALLBACK_PARAMS params;
5425 
5426                     params.linkId = linkId;
5427                     params.callbackType.type = NV2080_CTRL_NVLINK_CALLBACK_TYPE_GET_DL_LINK_MODE;
5428                     NV_CHECK_OK(rmStatus, LEVEL_ERROR,
5429                         knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
5430                                             NV2080_CTRL_CMD_NVLINK_CORE_CALLBACK,
5431                                             (void *)&params, sizeof(params)));
5432 
5433                     if ((params.callbackType.callbackParams.getDlLinkMode.mode != NV2080_NVLINK_CORE_LINK_STATE_SLEEP) ||
5434                         (params.callbackType.callbackParams.getDlLinkMode.mode != NV2080_NVLINK_CORE_LINK_STATE_OFF))
5435                     {
5436                         NV_PRINTF(LEVEL_ERROR, "Nvlink %d is not asleep upon enteing MIG mode!\n", linkId);
5437                     }
5438                 }
5439                 FOR_EACH_INDEX_IN_MASK_END
5440             }
5441             rmStatus = NV_OK;
5442 #endif
5443             NV_ASSERT_OK_OR_GOTO(rmStatus,
5444                 gpuDeleteClassFromClassDBByClassId(pGpu, NV50_P2P),
5445                 cleanup_disableNvlink);
5446         }
5447 
5448         // Enable ctx buf pool before allocating any resources that uses it.
5449         if (bMemoryPartitioningNeeded)
5450         {
5451             pGpu->setProperty(pGpu, PDB_PROP_GPU_MOVE_CTX_BUFFERS_TO_PMA, NV_TRUE);
5452         }
5453 
5454         // Add the MIG-specific classes
5455         NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(rmStatus,
5456             gpuAddClassToClassDBByClassId(pGpu, AMPERE_SMC_PARTITION_REF));
5457 
5458         if (rmStatus != NV_OK)
5459             goto cleanup_addClassToClassDB;
5460 
5461         // Allocate handles for memory partitioning if needed
5462         if (bMemoryPartitioningNeeded)
5463         {
5464             NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR,
5465                 memmgrAllocMIGMemoryAllocationInternalHandles(pGpu, pMemoryManager),
5466                 cleanup_memsysConfigL2EvictLast);
5467         }
5468 
5469         // initialize pKernelFifo->pppRunlistBufMemDesc based on max possible # of runlists.
5470         {
5471             MEMORY_DESCRIPTOR ***pppMemDesc = NULL;
5472             NvU32 maxRunlists = kfifoGetMaxNumRunlists_HAL(pGpu, pKernelFifo);
5473             NvU32 rowSize = sizeof(pppMemDesc) * maxRunlists;
5474             NvU32 arrSize =  rowSize * NV2080_CTRL_INTERNAL_FIFO_MAX_RUNLIST_BUFFERS;
5475             NvU32 i;
5476 
5477             // Should not have already been initialized
5478             NV_ASSERT(pKernelFifo->pppRunlistBufMemDesc == NULL);
5479 
5480             pppMemDesc = portMemAllocNonPaged(rowSize);
5481             NV_ASSERT_OR_ELSE(pppMemDesc != NULL, rmStatus = NV_ERR_NO_MEMORY; goto cleanup_initialize_runlistBufMemDesc;);
5482             portMemSet(pppMemDesc, 0, rowSize);
5483 
5484             *pppMemDesc = portMemAllocNonPaged(arrSize);
5485             NV_ASSERT_OR_ELSE(*pppMemDesc != NULL, rmStatus = NV_ERR_NO_MEMORY; goto cleanup_initialize_runlistBufMemDesc;);
5486             portMemSet(*pppMemDesc, 0, arrSize);
5487 
5488             // Set up pointers for the 2D array
5489             for (i = 0; i < maxRunlists; i++)
5490             {
5491                 pppMemDesc[i] = *pppMemDesc + (NV2080_CTRL_INTERNAL_FIFO_MAX_RUNLIST_BUFFERS * i);
5492             }
5493 
5494             pKernelFifo->pppRunlistBufMemDesc = pppMemDesc;
5495         }
5496 
5497         //
5498         // Populate static GPU instance memory config which will be used to manage
5499         // GPU instance memory
5500         //
5501         KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
5502         NV_ASSERT_OK_OR_RETURN(kmemsysPopulateMIGGPUInstanceMemConfig_HAL(pGpu, pKernelMemorySystem));
5503     }
5504     else
5505     {
5506         if (bMemoryPartitioningNeeded)
5507         {
5508             memmgrFreeMIGMemoryAllocationInternalHandles(pGpu, pMemoryManager);
5509         }
5510 
5511 cleanup_initialize_runlistBufMemDesc:
5512 
5513         if (pKernelFifo->pppRunlistBufMemDesc != NULL)
5514         {
5515             portMemFree(*(pKernelFifo->pppRunlistBufMemDesc));
5516             portMemFree(pKernelFifo->pppRunlistBufMemDesc);
5517         }
5518 
5519         pKernelFifo->pppRunlistBufMemDesc = NULL;
5520 
5521 cleanup_memsysConfigL2EvictLast:
5522 
5523 cleanup_addClassToClassDB:
5524         // Delete the MIG GR classes as MIG is disabled
5525         NV_ASSERT_OK(
5526             gpuDeleteClassFromClassDBByClassId(pGpu, AMPERE_SMC_PARTITION_REF));
5527 
5528         //
5529         // Disable ctx buf pool after freeing any resources that uses it.
5530         // Leave enabled on platforms that support it outside MIG.
5531         //
5532         pGpu->setProperty(pGpu, PDB_PROP_GPU_MOVE_CTX_BUFFERS_TO_PMA,
5533             gpuIsCtxBufAllocInPmaSupported_HAL(pGpu));
5534 
5535         //
5536         // HACK: GSP-RM always enables/disables LCEs during MIG enable/disable.
5537         //       Client-RM must always follow it to update its settings accordingly,
5538         //       so it should only call it for MIG disable (and not as part of MIG
5539         //       enable).
5540         //
5541         if (!bEnable)
5542         {
5543             NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(rmStatus,
5544                 kmigmgrEnableAllLCEs(pGpu, pKernelMIGManager, NV_FALSE));
5545         }
5546 
5547 cleanup_disableNvlink:
5548         // Add P2P class back to class DB as memory partitioning is disabled
5549         NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(rmStatus,
5550             gpuAddClassToClassDBByClassId(pGpu, NV50_P2P));
5551 
5552         //
5553         // On Nvswitch based systems, resume the gpu fabric probe
5554         // request on nvlink inband to register the GPU with the nvswitch fabric
5555         //
5556         if (pGpu->pGpuFabricProbeInfoKernel != NULL)
5557         {
5558             NV_ASSERT_OK(gpuFabricProbeResume(pGpu->pGpuFabricProbeInfoKernel));
5559         }
5560 
5561 cleanup_createPartitionCheck:
5562         if (!bUnload)
5563         {
5564             // Init top level scrubber if it existed before
5565             NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(rmStatus,
5566                 memmgrInitSavedTopLevelScrubber(pGpu, pMemoryManager));
5567         }
5568 cleanup_destroyTopLevelScrubber:
5569 
5570         // Set kmigmgr state to reflect MIG disabled while reconfiguring for NON-MIG
5571         pKernelMIGManager->bMIGEnabled = NV_FALSE;
5572 
5573         if (!bUnload)
5574         {
5575             KernelGraphics *pKGr = GPU_GET_KERNEL_GRAPHICS(pGpu, 0);
5576 
5577             // Since MIG is now disabled, reconfigure GR0 in legacy mode
5578             NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(rmStatus,
5579                 kgraphicsLoadStaticInfo(pGpu, pKGr, KMIGMGR_SWIZZID_INVALID));
5580             NV_ASSERT_OK(
5581                 kmigmgrRestoreWatchdog(pGpu, pKernelMIGManager));
5582         }
5583 
5584         //
5585         // Restore previous kmigmgr MIG state. kmigmgrSetMIGState should not
5586         // permanently modify bMIGEnabled. Restore the value to whatever was
5587         // present on entry to this function.
5588         //
5589         pKernelMIGManager->bMIGEnabled = bPrevMIGState;
5590 
5591 cleanup_disableWatchdog:
5592         goto done;
5593     }
5594 
5595 done:
5596     //
5597     // Restore previous kmigmgr MIG state. kmigmgrSetMIGState should not
5598     // permanently modify bMIGEnabled. Restore the value to whatever was
5599     // present on entry to this function.
5600     //
5601     pKernelMIGManager->bMIGEnabled = bPrevMIGState;
5602     return rmStatus;
5603 }
5604 
5605 /*!
5606  * @brief   Function to create or destroy GPU instance
5607  *
5608  * @param[IN]   pGpu
5609  * @param[IN]   pKernelMIGManager
5610  * @param[OUT]  pSwizzId            Output swizzId allocated for this gpu instance
5611  * @param[IN]   params              Gpu instance creation parameters
5612  * @param[IN]   bValid              Flag stating if gpu instance is created or destroyed
5613  * @param[IN]   bCreateCap          Flag stating if MIG capabilities needs to be created
5614  */
5615 NV_STATUS
5616 kmigmgrCreateGPUInstance_IMPL
5617 (
5618     OBJGPU *pGpu,
5619     KernelMIGManager *pKernelMIGManager,
5620     NvU32 *pSwizzId,
5621     KMIGMGR_CREATE_GPU_INSTANCE_PARAMS params,
5622     NvBool bValid,
5623     NvBool bCreateCap
5624 )
5625 {
5626     NV_STATUS rmStatus = NV_OK;
5627 
5628     // If making a gpu instance valid, memory should be allocated accordingly
5629     if (bValid)
5630     {
5631         KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance = NULL;
5632         KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
5633         RM_ENGINE_TYPE rmEngineType;
5634 
5635         //
5636         // Determine SwizzID for this gpu instance. If this isn't a restore, this
5637         // has already been determined by physical RM.
5638         //
5639         if (params.type == KMIGMGR_CREATE_GPU_INSTANCE_PARAMS_TYPE_RESTORE)
5640         {
5641             NvU32 swizzId = params.inst.restore.pGPUInstanceSave->swizzId;
5642             NV_ASSERT_OR_RETURN(!kmigmgrIsSwizzIdInUse(pGpu, pKernelMIGManager, swizzId),
5643                                 NV_ERR_INVALID_STATE);
5644             *pSwizzId = swizzId;
5645         }
5646 
5647         //
5648         // HACK: GSP-RM updated the PCE-LCE mappings while setting MIG state.
5649         //       The Client-RM hasn't had an opportunity to refresh its mappings
5650         //       yet until the first gpu instance creation, so do it now.
5651         //
5652         if ((pKernelMIGManager->swizzIdInUseMask == 0x0) && IS_GSP_CLIENT(pGpu))
5653         {
5654             NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR,
5655                 kmigmgrEnableAllLCEs(pGpu, pKernelMIGManager, NV_TRUE), invalidate);
5656         }
5657 
5658         NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR,
5659             kmigmgrSetGPUInstanceInfo(pGpu, pKernelMIGManager, *pSwizzId, params), invalidate);
5660 
5661         // Mark swizzId as "in-use" in cached mask
5662         NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR,
5663             kmigmgrSetSwizzIdInUse(pGpu, pKernelMIGManager, *pSwizzId), invalidate);
5664 
5665         NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR,
5666             kmigmgrGetGPUInstanceInfo(pGpu, pKernelMIGManager, *pSwizzId, &pKernelMIGGpuInstance), invalidate);
5667 
5668         NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR,
5669             kmigmgrAllocGPUInstanceHandles(pGpu, *pSwizzId, pKernelMIGGpuInstance), invalidate);
5670 
5671         NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR,
5672             kmigmgrInitGPUInstanceBufPools(pGpu, pKernelMIGManager, pKernelMIGGpuInstance), invalidate);
5673 
5674         NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_SILENT,
5675             kmigmgrCreateGPUInstanceRunlists_HAL(pGpu, pKernelMIGManager, pKernelMIGGpuInstance), invalidate);
5676 
5677         NV_ASSERT_OK_OR_GOTO(rmStatus,
5678             kmemsysInitMIGMemoryPartitionTable_HAL(pGpu, pKernelMemorySystem), invalidate);
5679 
5680         FOR_EACH_IN_BITVECTOR(&pKernelMIGGpuInstance->resourceAllocation.engines, rmEngineType)
5681         {
5682             NvU32 engineIdx;
5683             KernelGraphics *pKernelGraphics;
5684             RM_ENGINE_TYPE localEngineType;
5685 
5686             if (!RM_ENGINE_TYPE_IS_GR(rmEngineType))
5687                 continue;
5688 
5689             engineIdx = RM_ENGINE_TYPE_GR_IDX(rmEngineType);
5690             pKernelGraphics = GPU_GET_KERNEL_GRAPHICS(pGpu, engineIdx);
5691 
5692             NV_ASSERT_OK_OR_GOTO(rmStatus,
5693                 kmigmgrGetGlobalToLocalEngineType(pGpu,
5694                                                   pKernelMIGManager,
5695                                                   kmigmgrMakeGIReference(pKernelMIGGpuInstance),
5696                                                   rmEngineType,
5697                                                   &localEngineType),
5698                 invalidate);
5699 
5700             fecsSetRoutingInfo(pGpu,
5701                                pKernelGraphics,
5702                                pKernelMIGGpuInstance->instanceHandles.hClient,
5703                                pKernelMIGGpuInstance->instanceHandles.hSubdevice,
5704                                RM_ENGINE_TYPE_GR_IDX(localEngineType));
5705         }
5706         FOR_EACH_IN_BITVECTOR_END();
5707 
5708         // Init gpu instance pool for page table mem
5709         NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR,
5710             kmigmgrInitGPUInstancePool(pGpu, pKernelMIGManager, pKernelMIGGpuInstance), invalidate);
5711 
5712         // Init gpu instance scrubber
5713         NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR,
5714             kmigmgrInitGPUInstanceScrubber(pGpu, pKernelMIGManager, pKernelMIGGpuInstance), invalidate);
5715 
5716         //
5717         // Register gpu instance with the capability framework only if it explicitly
5718         // requested. Otherwise, we rely on the persistent state.
5719         //
5720         if (bCreateCap)
5721         {
5722             NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR,
5723                 osRmCapRegisterSmcPartition(pGpu->pOsRmCaps, &pKernelMIGGpuInstance->pOsRmCaps,
5724                                             pKernelMIGGpuInstance->swizzId), invalidate);
5725         }
5726     }
5727     else
5728     {
5729         NV_PRINTF(LEVEL_INFO, "Invalidating swizzId - %d.\n", *pSwizzId);
5730 
5731         NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
5732             kmigmgrInvalidateGPUInstance(pGpu, pKernelMIGManager, *pSwizzId, NV_FALSE));
5733     }
5734 
5735     return rmStatus;
5736 
5737 invalidate:
5738     kmigmgrInvalidateGPUInstance(pGpu, pKernelMIGManager, *pSwizzId, NV_FALSE);
5739 
5740     return rmStatus;
5741 }
5742 
5743 /*
5744  * @brief   Init per-gpu instance memory pool so that memory for client page tables
5745  *          can be allocated from this memory pool
5746  */
5747 NV_STATUS
5748 kmigmgrInitGPUInstancePool_IMPL
5749 (
5750     OBJGPU *pGpu,
5751     KernelMIGManager *pKernelMIGManager,
5752     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance
5753 )
5754 {
5755     KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
5756     const GMMU_FMT *pFmt = kgmmuFmtGet(pKernelGmmu, GMMU_FMT_VERSION_DEFAULT, 0);
5757     NvU32 version;
5758     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
5759 
5760     NV_ASSERT_OR_RETURN(pKernelMIGGpuInstance != NULL, NV_ERR_INVALID_ARGUMENT);
5761 
5762     if (!memmgrIsPmaInitialized(pMemoryManager) ||
5763         !memmgrAreClientPageTablesPmaManaged(pMemoryManager))
5764     {
5765         return NV_OK;
5766     }
5767 
5768     if (!kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, pKernelMIGGpuInstance->swizzId))
5769         return NV_OK;
5770 
5771     NV_ASSERT_OR_RETURN(pFmt != NULL, NV_ERR_INVALID_ARGUMENT);
5772     NV_ASSERT_OR_RETURN(pKernelMIGGpuInstance->pMemoryPartitionHeap != NULL, NV_ERR_INVALID_STATE);
5773 
5774     version = ((pFmt->version == GMMU_FMT_VERSION_1) ? POOL_CONFIG_GMMU_FMT_1 : POOL_CONFIG_GMMU_FMT_2);
5775 
5776     NV_ASSERT_OK_OR_RETURN(
5777         rmMemPoolSetup((void*)&pKernelMIGGpuInstance->pMemoryPartitionHeap->pmaObject,
5778                        &pKernelMIGGpuInstance->pPageTableMemPool, version));
5779 
5780     // Allocate the pool in CPR in case of Confidential Compute
5781     if (gpuIsCCFeatureEnabled(pGpu))
5782     {
5783         rmMemPoolAllocateProtectedMemory(pKernelMIGGpuInstance->pPageTableMemPool, NV_TRUE);
5784     }
5785 
5786     return NV_OK;
5787 }
5788 
5789 /*
5790  * @brief   Initializes ctx buf pools for runlist buffer and GR global ctx buffers
5791  *          for engines that belong to this gpu instance.
5792  */
5793 NV_STATUS
5794 kmigmgrInitGPUInstanceBufPools_IMPL
5795 (
5796     OBJGPU *pGpu,
5797     KernelMIGManager *pKernelMIGManager,
5798     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance
5799 )
5800 {
5801     Heap *pHeap;
5802     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
5803     NvU32 pmaConfig = PMA_QUERY_NUMA_ONLINED;
5804     NV_ASSERT_OR_RETURN(pKernelMIGGpuInstance != NULL, NV_ERR_INVALID_ARGUMENT);
5805     pHeap = pKernelMIGGpuInstance->pMemoryPartitionHeap;
5806     NV_ASSERT_OR_RETURN(pHeap != NULL, NV_ERR_INVALID_STATE);
5807 
5808     if (!ctxBufPoolIsSupported(pGpu))
5809         return NV_OK;
5810 
5811     //
5812     // We have to drop GPU lock before making allocations from PMA
5813     // as RM allocations can trigger UVM evictions.
5814     // However, in this case we can skip dropping GPU lock as gpu instance PMA
5815     // isn't visible to UVM yet.
5816     // This is just a sanity check to make sure this assumption is correct and
5817     // allocation from PMA cannot trigger UVM evictions.
5818     //
5819     // When FB memory is onlined as NUMA node, kernel can directly alloc FB memory
5820     // and hence free memory can not be expected to be same as total memory.
5821     //
5822     if (memmgrIsPmaInitialized(pMemoryManager) &&
5823         (pmaQueryConfigs(&pHeap->pmaObject, &pmaConfig) == NV_OK) &&
5824         !(pmaConfig & PMA_QUERY_NUMA_ONLINED))
5825     {
5826         NvU64 freeSpace, totalSpace;
5827         pmaGetFreeMemory(&pHeap->pmaObject, &freeSpace);
5828         pmaGetTotalMemory(&pHeap->pmaObject, &totalSpace);
5829         if (freeSpace != totalSpace)
5830         {
5831             NV_PRINTF(LEVEL_ERROR, "Assumption that PMA is empty at this time is broken\n");
5832             NV_PRINTF(LEVEL_ERROR, "free space = 0x%llx bytes total space = 0x%llx bytes\n",
5833                 freeSpace, totalSpace);
5834             NV_PRINTF(LEVEL_ERROR, "This means PMA allocations may trigger UVM evictions at this point causing deadlocks!\n");
5835             return NV_ERR_INVALID_STATE;
5836         }
5837     }
5838 
5839     NV_ASSERT_OK_OR_RETURN(kmigmgrInitGPUInstanceRunlistBufPools(pGpu, pKernelMIGManager, pKernelMIGGpuInstance));
5840     NV_ASSERT_OK_OR_RETURN(kmigmgrInitGPUInstanceGrBufPools(pGpu, pKernelMIGManager, pKernelMIGGpuInstance));
5841     return NV_OK;
5842 }
5843 
5844 /*
5845  * Initializes the runlist buffer pools for engines that belong to this gpu instance
5846  * Also reserves memory for runlist buffers into these pools.
5847  * later, runlists will be allocated from these pools.
5848  */
5849 NV_STATUS
5850 kmigmgrInitGPUInstanceRunlistBufPools_IMPL
5851 (
5852     OBJGPU *pGpu,
5853     KernelMIGManager *pKernelMIGManager,
5854     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance
5855 )
5856 {
5857     RM_ENGINE_TYPE    rmEngineType;
5858     KernelFifo       *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu);
5859     CTX_BUF_INFO      runlistBufInfo[NUM_BUFFERS_PER_RUNLIST] = {0};
5860     NvU64             rlSize;
5861     NvU64             rlAlign;
5862     NvU32             swizzId;
5863     NvU32             i;
5864     NvU32             runlistId;
5865     Heap             *pHeap;
5866 
5867     NV_ASSERT_OR_RETURN(pKernelMIGGpuInstance != NULL, NV_ERR_INVALID_ARGUMENT);
5868     swizzId = pKernelMIGGpuInstance->swizzId;
5869     pHeap = pKernelMIGGpuInstance->pMemoryPartitionHeap;
5870     NV_ASSERT_OR_RETURN(pHeap != NULL, NV_ERR_INVALID_STATE);
5871 
5872     if (!kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, swizzId))
5873         return NV_OK;
5874 
5875     for (rmEngineType = 0; rmEngineType < RM_ENGINE_TYPE_LAST; rmEngineType++)
5876     {
5877         if (!RM_ENGINE_TYPE_IS_VALID(rmEngineType) ||
5878             !kmigmgrIsEnginePartitionable(pGpu, pKernelMIGManager, rmEngineType) ||
5879             !kmigmgrIsEngineInInstance(pGpu, pKernelMIGManager, rmEngineType, kmigmgrMakeGIReference(pKernelMIGGpuInstance)))
5880         {
5881             continue;
5882         }
5883 
5884         // Get runlist ID for Engine type.
5885         NV_ASSERT_OK_OR_RETURN(kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo,
5886                                                         ENGINE_INFO_TYPE_RM_ENGINE_TYPE, (NvU32)rmEngineType,
5887                                                         ENGINE_INFO_TYPE_RUNLIST, &runlistId));
5888 
5889         //
5890         // ctx buf pools only support HW runlists today
5891         // we assume TSGs are supported for all runlists which is true for Ampere
5892         //
5893         for (i = 0; i < NUM_BUFFERS_PER_RUNLIST; i++)
5894         {
5895             NV_ASSERT_OK_OR_RETURN(kfifoGetRunlistBufInfo(pGpu, pKernelFifo, runlistId, NV_TRUE,
5896                                    0, &rlSize, &rlAlign));
5897             runlistBufInfo[i].size = rlSize;
5898             runlistBufInfo[i].align = rlAlign;
5899             runlistBufInfo[i].attr = RM_ATTR_PAGE_SIZE_DEFAULT;
5900             runlistBufInfo[i].bContig = NV_TRUE;
5901         }
5902 
5903         NV_ASSERT_OK_OR_RETURN(ctxBufPoolInit(pGpu, pHeap, &pKernelFifo->pRunlistBufPool[rmEngineType]));
5904         NV_ASSERT_OR_RETURN(pKernelFifo->pRunlistBufPool[rmEngineType] != NULL, NV_ERR_INVALID_STATE);
5905 
5906         //
5907         // Skip scrubber for runlist buffer alloctions since gpu instance scrubber is not setup yet
5908         // and it will be destroyed before deleting the runlist buffer pool.
5909         //
5910         ctxBufPoolSetScrubSkip(pKernelFifo->pRunlistBufPool[rmEngineType], NV_TRUE);
5911         NV_ASSERT_OK_OR_RETURN(ctxBufPoolReserve(pGpu, pKernelFifo->pRunlistBufPool[rmEngineType], &runlistBufInfo[0], NUM_BUFFERS_PER_RUNLIST));
5912     }
5913 
5914     return NV_OK;
5915 }
5916 
5917 /*
5918  * @brief   Initializes gr buffer pools for all GR engines that belong to this gpu instance
5919  *          Also reserves memory for global GR buffers into these pools.
5920  */
5921 NV_STATUS
5922 kmigmgrInitGPUInstanceGrBufPools_IMPL
5923 (
5924     OBJGPU *pGpu,
5925     KernelMIGManager *pKernelMIGManager,
5926     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance
5927 )
5928 {
5929     KernelGraphicsManager *pKernelGraphicsManager = GPU_GET_KERNEL_GRAPHICS_MANAGER(pGpu);
5930     GR_GLOBALCTX_BUFFER    bufId;
5931     NvU32                  bufCount;
5932     CTX_BUF_INFO           globalCtxBufInfo[GR_GLOBALCTX_BUFFER_COUNT];
5933     Heap                  *pHeap = NULL;
5934     NV_STATUS              rmStatus = NV_OK;
5935     RM_ENGINE_TYPE         rmEngineType;
5936 
5937     NV_ASSERT_OR_RETURN(pKernelMIGGpuInstance != NULL, NV_ERR_INVALID_ARGUMENT);
5938     pHeap = pKernelMIGGpuInstance->pMemoryPartitionHeap;
5939     NV_ASSERT_OR_RETURN(pHeap != NULL, NV_ERR_INVALID_STATE);
5940 
5941     bufCount = 0;
5942     FOR_EACH_IN_ENUM(GR_GLOBALCTX_BUFFER, bufId)
5943     {
5944         if (kgrmgrIsGlobalCtxBufSupported(bufId, NV_FALSE))
5945         {
5946             const CTX_BUF_INFO *pBufInfo = kgrmgrGetGlobalCtxBufInfo(pGpu, pKernelGraphicsManager, bufId);
5947             NV_ASSERT_OR_RETURN(pBufInfo != NULL, NV_ERR_INVALID_STATE);
5948 
5949             globalCtxBufInfo[bufCount] = *pBufInfo;
5950 
5951             if ((bufId == GR_GLOBALCTX_BUFFER_FECS_EVENT) || (bufId == GR_GLOBAL_BUFFER_GLOBAL_PRIV_ACCESS_MAP))
5952             {
5953                 globalCtxBufInfo[bufCount].bContig = NV_TRUE;
5954             }
5955             else if ((bufId == GR_GLOBALCTX_BUFFER_PRIV_ACCESS_MAP) || (bufId == GR_GLOBALCTX_BUFFER_UNRESTRICTED_PRIV_ACCESS_MAP))
5956             {
5957                 globalCtxBufInfo[bufCount].bContig = gpuIsClientRmAllocatedCtxBufferEnabled(pGpu);
5958             }
5959             kgrmgrSetGlobalCtxBufInfo(pGpu, pKernelGraphicsManager, bufId,
5960                                       globalCtxBufInfo[bufCount].size,
5961                                       globalCtxBufInfo[bufCount].align,
5962                                       globalCtxBufInfo[bufCount].attr,
5963                                       globalCtxBufInfo[bufCount].bContig);
5964             bufCount++;
5965         }
5966     }
5967     FOR_EACH_IN_ENUM_END;
5968 
5969     FOR_EACH_IN_BITVECTOR(&pKernelMIGGpuInstance->resourceAllocation.engines, rmEngineType)
5970     {
5971         NvU32 engineIdx;
5972         KernelGraphics *pKernelGraphics;
5973         CTX_BUF_POOL_INFO *pGrCtxBufPool;
5974 
5975         if (!RM_ENGINE_TYPE_IS_GR(rmEngineType))
5976             continue;
5977 
5978         engineIdx = RM_ENGINE_TYPE_GR_IDX(rmEngineType);
5979         pKernelGraphics = GPU_GET_KERNEL_GRAPHICS(pGpu, engineIdx);
5980 
5981         NV_ASSERT_OK_OR_GOTO(rmStatus,
5982             kgraphicsInitCtxBufPool(pGpu, pKernelGraphics, pHeap),
5983             failed);
5984 
5985         pGrCtxBufPool = kgraphicsGetCtxBufPool(pGpu, pKernelGraphics);
5986 
5987         if (pGrCtxBufPool == NULL)
5988         {
5989             rmStatus = NV_ERR_INVALID_STATE;
5990             goto failed;
5991         }
5992 
5993         //
5994         // Skip scrubber for GR buffer alloctions since gpu instance scrubber is not setup yet
5995         // and it will be destroyed before deleting the GR buffer pool.
5996         //
5997         ctxBufPoolSetScrubSkip(pGrCtxBufPool, NV_TRUE);
5998         NV_ASSERT_OK_OR_GOTO(
5999             rmStatus,
6000             ctxBufPoolReserve(pGpu, pGrCtxBufPool, &globalCtxBufInfo[0], bufCount),
6001             failed);
6002     }
6003     FOR_EACH_IN_BITVECTOR_END();
6004 
6005     return NV_OK;
6006 
6007 failed:
6008     kmigmgrDestroyGPUInstanceGrBufPools(pGpu, pKernelMIGManager, pKernelMIGGpuInstance);
6009     return rmStatus;
6010 }
6011 
6012 /*!
6013  * @brief   Save MIG instance topology to persistence, if available.
6014  */
6015 NV_STATUS
6016 kmigmgrSaveToPersistence_IMPL
6017 (
6018     OBJGPU *pGpu,
6019     KernelMIGManager *pKernelMIGManager
6020 )
6021 {
6022     GPUMGR_SAVE_MIG_INSTANCE_TOPOLOGY *pTopologySave = NULL;
6023     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGPUInstance;
6024     NvU32 gpcIdx;
6025     NvU32 savedGIIdx;
6026 
6027     NV_CHECK_OR_RETURN(LEVEL_SILENT,
6028                        gpumgrGetSystemMIGInstanceTopo(gpuGetDBDF(pGpu), &pTopologySave),
6029                        NV_OK);
6030 
6031     // Clear existing topology, if any.
6032     portMemSet(pTopologySave->saveGI, 0, sizeof(pTopologySave->saveGI));
6033 
6034     // Update the MIG enablement bit
6035     if (pGpu->getProperty(pGpu, PDB_PROP_GPU_RESETLESS_MIG_SUPPORTED))
6036     {
6037         gpumgrSetSystemMIGEnabled(gpuGetDBDF(pGpu), pKernelMIGManager->bMIGEnabled);
6038     }
6039 
6040     // If there are no instances then don't bother checking anything.
6041     NV_CHECK_OR_RETURN(LEVEL_SILENT, IS_MIG_IN_USE(pGpu), NV_OK);
6042 
6043     savedGIIdx = 0;
6044     FOR_EACH_VALID_GPU_INSTANCE(pGpu, pKernelMIGManager, pKernelMIGGPUInstance)
6045     {
6046         GPUMGR_SAVE_GPU_INSTANCE *pGPUInstanceSave = &pTopologySave->saveGI[savedGIIdx];
6047 
6048         pGPUInstanceSave->bValid = NV_TRUE;
6049         pGPUInstanceSave->swizzId = pKernelMIGGPUInstance->swizzId;
6050         pGPUInstanceSave->pOsRmCaps = pKernelMIGGPUInstance->pOsRmCaps;
6051         pGPUInstanceSave->giInfo.partitionFlags = pKernelMIGGPUInstance->partitionFlag;
6052         bitVectorToRaw(&pKernelMIGGPUInstance->resourceAllocation.engines,
6053                        pGPUInstanceSave->giInfo.enginesMask, sizeof(pGPUInstanceSave->giInfo.enginesMask));
6054         for (gpcIdx = 0; gpcIdx < pKernelMIGGPUInstance->resourceAllocation.gpcCount; ++gpcIdx)
6055         {
6056              pGPUInstanceSave->giInfo.gpcMask |= NVBIT32(pKernelMIGGPUInstance->resourceAllocation.gpcIds[gpcIdx]);
6057         }
6058         pGPUInstanceSave->giInfo.veidOffset = pKernelMIGGPUInstance->resourceAllocation.veidOffset;
6059         pGPUInstanceSave->giInfo.veidCount = pKernelMIGGPUInstance->resourceAllocation.veidCount;
6060         pGPUInstanceSave->giInfo.virtualGpcCount = pKernelMIGGPUInstance->resourceAllocation.virtualGpcCount;
6061 
6062         NV_ASSERT_OK_OR_RETURN(kmigmgrSaveComputeInstances(pGpu, pKernelMIGManager, pKernelMIGGPUInstance,
6063                                                            pGPUInstanceSave->saveCI));
6064 
6065         ++savedGIIdx;
6066     }
6067     FOR_EACH_VALID_GPU_INSTANCE_END();
6068 
6069     return NV_OK;
6070 }
6071 
6072 /*!
6073  * @brief  Update MIG CI config for CPU-RM if compute instance is created
6074  * by a guest and RPC is directly handled by GSP-RM
6075  */
6076 NV_STATUS
6077 kmigmgrUpdateCiConfigForVgpu_IMPL
6078 (
6079     OBJGPU *pGpu,
6080     KernelMIGManager *pKernelMIGManager,
6081     NvU32 execPartCount,
6082     NvU32 *pExecPartId,
6083     NvU32 gfid,
6084     NvBool bDelete
6085 )
6086 {
6087     return NV_ERR_NOT_SUPPORTED;
6088 }
6089 
6090 // Control call for getting active gpu instance Ids
6091 NV_STATUS
6092 subdeviceCtrlCmdGpuGetActivePartitionIds_IMPL
6093 (
6094     Subdevice *pSubdevice,
6095     NV2080_CTRL_GPU_GET_ACTIVE_PARTITION_IDS_PARAMS *pParams
6096 )
6097 {
6098     OBJGPU           *pGpu = GPU_RES_GET_GPU(pSubdevice);
6099     KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
6100     NvU64             validSwizzIdMask;
6101 
6102     pParams->partitionCount = 0;
6103 
6104     ct_assert(NV2080_CTRL_GPU_MAX_PARTITIONS == KMIGMGR_MAX_GPU_INSTANCES);
6105 
6106     LOCK_ASSERT_AND_RETURN(rmapiLockIsOwner() && rmGpuLockIsOwner());
6107 
6108     if ((pKernelMIGManager == NULL) || !pGpu->getProperty(pGpu, PDB_PROP_GPU_MIG_SUPPORTED))
6109     {
6110         NV_PRINTF(LEVEL_INFO, "MIG not supported on this GPU.\n");
6111         return NV_ERR_NOT_SUPPORTED;
6112     }
6113 
6114     if (!IS_MIG_ENABLED(pGpu))
6115     {
6116         NV_PRINTF(LEVEL_INFO, "MIG Mode has not been turned on.\n");
6117         return NV_ERR_NOT_SUPPORTED;
6118     }
6119 
6120     //
6121     // We can always have device_monitoring swizzID available in system even without
6122     // GPU split into MIG instances
6123     //
6124     pParams->swizzId[pParams->partitionCount++] = NVC637_DEVICE_LEVEL_SWIZZID;
6125 
6126     // Populate all active swizzIDs
6127     validSwizzIdMask = pKernelMIGManager->swizzIdInUseMask;
6128     while(validSwizzIdMask != 0x0)
6129     {
6130         pParams->swizzId[pParams->partitionCount] = portUtilCountTrailingZeros64(validSwizzIdMask);
6131         validSwizzIdMask &= ~NVBIT64(pParams->swizzId[pParams->partitionCount]);
6132         pParams->partitionCount++;
6133     }
6134 
6135     return NV_OK;
6136 }
6137 
6138 //
6139 // Control call to determine the number of gpu instances of the given size which
6140 // can still be created, given the current configuration of the GPU.
6141 //
6142 NV_STATUS
6143 subdeviceCtrlCmdGpuGetPartitionCapacity_IMPL
6144 (
6145     Subdevice *pSubdevice,
6146     NV2080_CTRL_GPU_GET_PARTITION_CAPACITY_PARAMS *pParams
6147 )
6148 {
6149     NV_STATUS         status = NV_OK;
6150     OBJGPU           *pGpu = GPU_RES_GET_GPU(pSubdevice);
6151     KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
6152     NvHandle          hClient = RES_GET_CLIENT_HANDLE(pSubdevice);
6153 
6154     LOCK_ASSERT_AND_RETURN(rmapiLockIsOwner() && rmGpuLockIsOwner());
6155 
6156     NV_CHECK_OR_RETURN(LEVEL_INFO, IS_MIG_ENABLED(pGpu), NV_ERR_NOT_SUPPORTED);
6157 
6158     if (IS_VIRTUAL(pGpu))
6159     {
6160         // This is not supported in legacy MIG vGPU policy
6161         if (kmigmgrUseLegacyVgpuPolicy(pGpu, pKernelMIGManager))
6162             return NV_ERR_NOT_SUPPORTED;
6163 
6164         if (!pParams->bStaticInfo)
6165         {
6166             CALL_CONTEXT *pCallContext = resservGetTlsCallContext();
6167 
6168             NV_ASSERT_OR_RETURN(pCallContext != NULL, NV_ERR_INVALID_STATE);
6169 
6170             // Only expose current capacity to admins or capable clients.
6171             if (!rmclientIsCapableOrAdminByHandle(hClient,
6172                                                   NV_RM_CAP_SYS_SMC_CONFIG,
6173                                                   pCallContext->secInfo.privLevel))
6174             {
6175                 return NV_ERR_INSUFFICIENT_PERMISSIONS;
6176             }
6177 
6178             if (!kmigmgrIsGPUInstanceCombinationValid_HAL(pGpu, pKernelMIGManager, pParams->partitionFlag) ||
6179                 !FLD_TEST_DRF(2080_CTRL_GPU, _PARTITION_FLAG, _COMPUTE_SIZE, _FULL, pParams->partitionFlag))
6180             {
6181                 pParams->partitionCount = 0;
6182                 pParams->availableSpansCount = 0;
6183             }
6184             else
6185             {
6186                 if (IS_MIG_IN_USE(pGpu))
6187                 {
6188                     pParams->partitionCount = 0;
6189                     pParams->availableSpansCount = 0;
6190                 }
6191                 else
6192                 {
6193                     pParams->partitionCount = 1;
6194                     pParams->availableSpansCount = 1;
6195                     pParams->availableSpans[0].lo = NV_RANGE_EMPTY.lo;
6196                     pParams->availableSpans[0].hi = NV_RANGE_EMPTY.hi;
6197                 }
6198             }
6199         }
6200 
6201         if (!kmigmgrIsGPUInstanceCombinationValid_HAL(pGpu, pKernelMIGManager, pParams->partitionFlag) ||
6202             !FLD_TEST_DRF(2080_CTRL_GPU, _PARTITION_FLAG, _COMPUTE_SIZE, _FULL, pParams->partitionFlag))
6203         {
6204             pParams->totalPartitionCount = 0;
6205             pParams->totalSpansCount = 0;
6206         }
6207         else
6208         {
6209             pParams->totalPartitionCount = 1;
6210             pParams->totalSpansCount = 1;
6211             pParams->totalSpans[0].lo = NV_RANGE_EMPTY.lo;
6212             pParams->totalSpans[0].hi = NV_RANGE_EMPTY.hi;
6213         }
6214 
6215         return NV_OK;
6216     }
6217 
6218     return NV_ERR_NOT_SUPPORTED;
6219 
6220     return status;
6221 }
6222 
6223 //
6224 // Control call to provide information about gpu instances which can be created on
6225 // this GPU.
6226 //
6227 NV_STATUS
6228 subdeviceCtrlCmdGpuDescribePartitions_IMPL
6229 (
6230     Subdevice *pSubdevice,
6231     NV2080_CTRL_GPU_DESCRIBE_PARTITIONS_PARAMS *pParams
6232 )
6233 {
6234     OBJGPU *pGpu = GPU_RES_GET_GPU(pSubdevice);
6235     KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
6236 
6237     LOCK_ASSERT_AND_RETURN(rmapiLockIsOwner() && rmGpuLockIsOwner());
6238 
6239     if (!pGpu->getProperty(pGpu, PDB_PROP_GPU_MIG_SUPPORTED))
6240     {
6241         NV_PRINTF(LEVEL_INFO, "MIG not supported on this GPU.\n");
6242         return NV_ERR_NOT_SUPPORTED;
6243     }
6244 
6245     if (!IS_MIG_ENABLED(pGpu))
6246     {
6247         NV_PRINTF(LEVEL_ERROR, "Entered MIG API with MIG disabled.\n");
6248     }
6249 
6250     return kmigmgrDescribeGPUInstances(pGpu, pKernelMIGManager, pParams);
6251 }
6252 
6253 //
6254 // Control call to set the global partitioning mode for this GPU. This call may
6255 // require a PF-FLR to be performed on the GPU before work may be submitted on
6256 // the GPU.
6257 //
6258 NV_STATUS
6259 subdeviceCtrlCmdGpuSetPartitioningMode_IMPL
6260 (
6261     Subdevice *pSubdevice,
6262     NV2080_CTRL_GPU_SET_PARTITIONING_MODE_PARAMS *pParams
6263 )
6264 {
6265     OBJGPU *pGpu = GPU_RES_GET_GPU(pSubdevice);
6266     KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
6267     RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
6268 
6269     LOCK_ASSERT_AND_RETURN(rmapiLockIsOwner() && rmGpuLockIsOwner());
6270 
6271     if (IS_VIRTUAL(pGpu))
6272     {
6273         return NV_ERR_NOT_SUPPORTED;
6274     }
6275 
6276     if ((pKernelMIGManager == NULL) || !kmigmgrIsMIGSupported(pGpu, pKernelMIGManager))
6277     {
6278         NV_PRINTF(LEVEL_INFO, "MIG not supported on this GPU.\n");
6279         return NV_ERR_NOT_SUPPORTED;
6280     }
6281 
6282     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
6283         pRmApi->Control(pRmApi,
6284                         pGpu->hInternalClient,
6285                         pGpu->hInternalSubdevice,
6286                         NV2080_CTRL_CMD_INTERNAL_MIGMGR_SET_PARTITIONING_MODE,
6287                         pParams,
6288                         sizeof(*pParams)));
6289 
6290     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
6291         kmigmgrSetPartitioningMode(pGpu, pKernelMIGManager));
6292 
6293     return NV_OK;
6294 }
6295 
6296 /*!
6297  * @brief  Process a single request to create / destroy a gpu instance.
6298  *         Handles enabling / disabling MIG mode on entry/exit.
6299  */
6300 static NV_STATUS
6301 _kmigmgrProcessGPUInstanceEntry
6302 (
6303     OBJGPU *pGpu,
6304     KernelMIGManager *pKernelMIGManager,
6305     NV2080_CTRL_GPU_SET_PARTITION_INFO *pEntry
6306 )
6307 {
6308     NV_STATUS status = NV_OK;
6309     NV2080_CTRL_GPU_SET_PARTITIONS_PARAMS *pParams = portMemAllocNonPaged(sizeof(*pParams));
6310     CALL_CONTEXT *pCallContext = resservGetTlsCallContext();
6311     RmCtrlParams *pRmCtrlParams = pCallContext->pControlParams;
6312     RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
6313 
6314     NV_CHECK_OR_RETURN(LEVEL_ERROR, pParams != NULL, NV_ERR_NO_MEMORY);
6315 
6316     pParams->partitionCount = 1;
6317     pParams->partitionInfo[0] = *pEntry;
6318 
6319     //
6320     // Mirrored GPU Instance Management:
6321     // 1: CPU enable MIG
6322     // 2: GSP enable MIG
6323     // 3: GSP create gpu instance
6324     // 4: CPU create gpu instance
6325     // 5: CPU delete gpu instance
6326     // 6: GSP delete gpu instance
6327     // 7: GSP disable MIG
6328     // 8: CPU disable MIG
6329     //
6330 
6331     // Step 1, 2: If this is the first gpu instance, enable MIG
6332     if (pEntry->bValid && (pKernelMIGManager->swizzIdInUseMask == 0x0))
6333     {
6334         NvBool bMemoryPartitioningRequested = kmigmgrIsMemoryPartitioningRequested_HAL(pGpu, pKernelMIGManager, pEntry->partitionFlag);
6335 
6336         NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
6337             kmigmgrSetMIGState(pGpu, pKernelMIGManager, bMemoryPartitioningRequested, NV_TRUE, NV_FALSE),
6338             cleanup_params);
6339     }
6340 
6341     if (pEntry->bValid)
6342     {
6343         NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
6344             pRmApi->Control(pRmApi,
6345                             pRmCtrlParams->hClient,
6346                             pRmCtrlParams->hObject,
6347                             NV2080_CTRL_CMD_INTERNAL_MIGMGR_SET_GPU_INSTANCES,
6348                             pParams,
6349                             sizeof(*pParams)),
6350             cleanup_smc_state);
6351         pEntry->swizzId = pParams->partitionInfo[0].swizzId;
6352     }
6353 
6354     if (IS_GSP_CLIENT(pGpu))
6355     {
6356         KMIGMGR_CREATE_GPU_INSTANCE_PARAMS request =
6357         {
6358             .type = KMIGMGR_CREATE_GPU_INSTANCE_PARAMS_TYPE_REQUEST,
6359             .inst.request.partitionFlag = pEntry->partitionFlag,
6360             .inst.request.bUsePlacement =
6361                 FLD_TEST_REF(NV2080_CTRL_GPU_PARTITION_FLAG_PLACE_AT_SPAN, _ENABLE,
6362                              pEntry->partitionFlag),
6363             .inst.request.placement = rangeMake(pEntry->placement.lo, pEntry->placement.hi)
6364         };
6365         request.inst.request.partitionFlag = FLD_SET_DRF(2080_CTRL_GPU, _PARTITION_FLAG, _PLACE_AT_SPAN, _DISABLE,
6366                                                          request.inst.request.partitionFlag);
6367 
6368         // Step 3, 4, 5, 6: Create / delete gpu instance
6369         NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
6370             kmigmgrCreateGPUInstance(pGpu, pKernelMIGManager, &pEntry->swizzId, request, pEntry->bValid,
6371                                      NV_TRUE /* create MIG capabilities */),
6372             cleanup_rpc);
6373     }
6374 
6375     if (!pEntry->bValid)
6376     {
6377         NV_ASSERT_OK_OR_GOTO(status,
6378             pRmApi->Control(pRmApi,
6379                             pRmCtrlParams->hClient,
6380                             pRmCtrlParams->hObject,
6381                             NV2080_CTRL_CMD_INTERNAL_MIGMGR_SET_GPU_INSTANCES,
6382                             pParams,
6383                             sizeof(*pParams)),
6384             cleanup_params);
6385     }
6386 
6387     // Step 7, 8: If this is the last gpu instance to go, disable MIG
6388     if (pKernelMIGManager->swizzIdInUseMask == 0x0)
6389     {
6390         NvBool bMemoryPartitioningNeeded = kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, pParams->partitionInfo[0].swizzId);
6391 
6392         NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
6393             kmigmgrSetMIGState(pGpu, pKernelMIGManager, bMemoryPartitioningNeeded, NV_FALSE, NV_FALSE),
6394             cleanup_params);
6395     }
6396 
6397     portMemFree(pParams);
6398     return status;
6399 
6400 cleanup_rpc:
6401     if (pEntry->bValid)
6402     {
6403         // Reuse the same RPC information we prepared earlier, but flip the bValid bit
6404         pParams->partitionInfo[0].bValid = NV_FALSE;
6405         NV_ASSERT_OK(pRmApi->Control(pRmApi,
6406                                      pRmCtrlParams->hClient,
6407                                      pRmCtrlParams->hObject,
6408                                      NV2080_CTRL_CMD_INTERNAL_MIGMGR_SET_GPU_INSTANCES,
6409                                      pParams,
6410                                      sizeof(*pParams)));
6411     }
6412 
6413 cleanup_smc_state:
6414     if (pEntry->bValid && (pKernelMIGManager->swizzIdInUseMask == 0x0))
6415     {
6416         NvBool bMemoryPartitioningRequested = kmigmgrIsMemoryPartitioningRequested_HAL(pGpu, pKernelMIGManager, pEntry->partitionFlag);
6417 
6418         NV_ASSERT_OK(
6419             kmigmgrSetMIGState(pGpu, pKernelMIGManager, bMemoryPartitioningRequested, NV_FALSE, NV_FALSE));
6420     }
6421 
6422 cleanup_params:
6423     portMemFree(pParams);
6424     return status;
6425 }
6426 
6427 /*!
6428  * @brief  Control call for dividing GPU into requested gpu instances
6429  *
6430  * @returns NV_OK if successful.
6431  *          NV_ERR_INVALID_ARGUMENT if parameter is not found
6432  *          NV_ERR_NOT_SUPPORTED if parameter is not supported
6433  *
6434  */
6435 NV_STATUS
6436 subdeviceCtrlCmdGpuSetPartitions_IMPL
6437 (
6438     Subdevice *pSubdevice,
6439     NV2080_CTRL_GPU_SET_PARTITIONS_PARAMS *pParams
6440 )
6441 {
6442     NV_STATUS         rmStatus = NV_OK;
6443     NvU32             i;
6444     NvU32             j;
6445     OBJGPU           *pGpu = GPU_RES_GET_GPU(pSubdevice);
6446     NvHandle          hClient = RES_GET_CLIENT_HANDLE(pSubdevice);
6447     KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
6448     CALL_CONTEXT     *pCallContext = resservGetTlsCallContext();
6449 
6450     LOCK_ASSERT_AND_RETURN(rmapiLockIsOwner() && rmGpuLockIsOwner());
6451 
6452     NV_ASSERT_OR_RETURN(pCallContext != NULL, NV_ERR_INVALID_STATE);
6453 
6454     if (!rmclientIsCapableOrAdminByHandle(hClient,
6455                                           NV_RM_CAP_SYS_SMC_CONFIG,
6456                                           pCallContext->secInfo.privLevel))
6457     {
6458         NV_PRINTF(LEVEL_ERROR, "Non-privileged context issued privileged cmd\n");
6459         return NV_ERR_INSUFFICIENT_PERMISSIONS;
6460     }
6461 
6462     NV_CHECK_OR_RETURN(LEVEL_INFO, IS_MIG_ENABLED(pGpu), NV_ERR_NOT_SUPPORTED);
6463 
6464     // Sanity checks
6465     if (pParams->partitionCount > KMIGMGR_MAX_GPU_INSTANCES)
6466     {
6467         return NV_ERR_INVALID_ARGUMENT;
6468     }
6469     else if (0 == pParams->partitionCount)
6470     {
6471         return NV_WARN_NOTHING_TO_DO;
6472     }
6473 
6474     for (i = 0; i < pParams->partitionCount; i++)
6475     {
6476         if (pParams->partitionInfo[i].bValid)
6477         {
6478             NvU32 partitionFlag = FLD_SET_DRF(2080_CTRL_GPU, _PARTITION_FLAG, _PLACE_AT_SPAN, _DISABLE,
6479                                               pParams->partitionInfo[i].partitionFlag);
6480             NV_CHECK_OR_RETURN(LEVEL_ERROR,
6481                 kmigmgrIsGPUInstanceCombinationValid_HAL(pGpu, pKernelMIGManager, partitionFlag),
6482                 NV_ERR_NOT_SUPPORTED);
6483         }
6484     }
6485 
6486     // This is not supported in vGPU
6487     if (IS_VIRTUAL(pGpu))
6488     {
6489         return NV_ERR_NOT_SUPPORTED;
6490     }
6491 
6492     for (i = 0; i < pParams->partitionCount; i++)
6493     {
6494         NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_INFO,
6495             _kmigmgrProcessGPUInstanceEntry(pGpu, pKernelMIGManager, &pParams->partitionInfo[i]),
6496             cleanup);
6497     }
6498 
6499     //
6500     // Generate a subdevice event stating something has changed in GPU instance
6501     // config. Clients currently do not care about changes and their scope
6502     //
6503     gpuNotifySubDeviceEvent(pGpu, NV2080_NOTIFIERS_SMC_CONFIG_UPDATE, NULL, 0, 0, 0);
6504 
6505     return rmStatus;
6506 
6507 cleanup:
6508     // Invalidate gpu instances which has been created
6509     for (j = 0; j < i; j++)
6510     {
6511         pParams->partitionInfo[i].bValid = !pParams->partitionInfo[i].bValid;
6512         NV_ASSERT_OK(
6513             _kmigmgrProcessGPUInstanceEntry(pGpu, pKernelMIGManager, &pParams->partitionInfo[i]));
6514         pParams->partitionInfo[i].bValid = !pParams->partitionInfo[i].bValid;
6515     }
6516 
6517     return rmStatus;
6518 }
6519 
6520 // Control call for getting specific gpu instance info
6521 NV_STATUS
6522 subdeviceCtrlCmdGpuGetPartitions_IMPL
6523 (
6524     Subdevice *pSubdevice,
6525     NV2080_CTRL_GPU_GET_PARTITIONS_PARAMS *pParams
6526 )
6527 {
6528     NV_STATUS                             rmStatus = NV_OK;
6529     NvU32                                 i;
6530     OBJGPU                               *pGpu = GPU_RES_GET_GPU(pSubdevice);
6531     KernelMIGManager                     *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
6532     MIG_INSTANCE_REF                      ref;
6533     NvU64                                 validSwizzIdMask;
6534     NvHandle                              hClient = RES_GET_CLIENT_HANDLE(pSubdevice);
6535     NV2080_CTRL_GPU_GET_PARTITIONS_PARAMS *pRpcParams = NULL;
6536 
6537     ct_assert(NV2080_CTRL_GPU_MAX_PARTITIONS == KMIGMGR_MAX_GPU_INSTANCES);
6538     ct_assert(NV2080_CTRL_GPU_MAX_GPC_PER_SMC == KGRMGR_MAX_GPC);
6539 
6540     LOCK_ASSERT_AND_RETURN(rmapiLockIsOwner() && rmGpuLockIsOwner());
6541 
6542     pRpcParams = portMemAllocNonPaged(sizeof(*pRpcParams));
6543     NV_CHECK_OR_RETURN(LEVEL_INFO, pRpcParams != NULL, NV_ERR_NO_MEMORY);
6544 
6545     *pRpcParams = *pParams;
6546 
6547     if (!IS_VIRTUAL(pGpu))
6548     {
6549         CALL_CONTEXT *pCallContext = resservGetTlsCallContext();
6550         RmCtrlParams *pRmCtrlParams = pCallContext->pControlParams;
6551         RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
6552 
6553 
6554         NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_WARNING,
6555             pRmApi->Control(pRmApi,
6556                             pRmCtrlParams->hClient,
6557                             pRmCtrlParams->hObject,
6558                             NV2080_CTRL_CMD_INTERNAL_MIGMGR_GET_GPU_INSTANCES,
6559                             pRpcParams,
6560                             sizeof(*pRpcParams)), done);
6561     }
6562 
6563     if (!pGpu->getProperty(pGpu, PDB_PROP_GPU_MIG_SUPPORTED))
6564     {
6565         NV_PRINTF(LEVEL_INFO, "MIG not supported on this GPU.\n");
6566         rmStatus = NV_ERR_NOT_SUPPORTED;
6567         goto done;
6568     }
6569 
6570     if (!IS_MIG_ENABLED(pGpu))
6571         NV_PRINTF(LEVEL_INFO, "Entered MIG API with MIG disabled.\n");
6572 
6573     if (!IS_MIG_IN_USE(pGpu))
6574     {
6575         // set the valid gpu instance count to "0" and return
6576         pParams->validPartitionCount = 0;
6577         rmStatus = NV_OK;
6578         goto done;
6579     }
6580 
6581     // See if all gpu instances are requested and get info for all gpu instance
6582     if (pParams->bGetAllPartitionInfo)
6583     {
6584         CALL_CONTEXT *pCallContext = resservGetTlsCallContext();
6585 
6586         NV_ASSERT_OR_ELSE(pCallContext != NULL,
6587                           rmStatus = NV_ERR_INVALID_STATE; goto done);
6588 
6589         if (!rmclientIsCapableOrAdminByHandle(hClient,
6590                                               NV_RM_CAP_SYS_SMC_CONFIG,
6591                                               pCallContext->secInfo.privLevel))
6592         {
6593             NV_PRINTF(LEVEL_ERROR,
6594                       "Non privileged client requesting global gpu instance info\n");
6595             rmStatus = NV_ERR_INSUFFICIENT_PERMISSIONS;
6596             goto done;
6597         }
6598 
6599         // Take all swizzId's for consideration
6600         validSwizzIdMask = pKernelMIGManager->swizzIdInUseMask;
6601     }
6602     else
6603     {
6604         rmStatus = kmigmgrGetInstanceRefFromClient(pGpu, pKernelMIGManager, hClient, &ref);
6605         if (rmStatus != NV_OK)
6606         {
6607             // set the valid gpu instance count to "0" and return
6608             pParams->validPartitionCount = 0;
6609             rmStatus = NV_OK;
6610             goto done;
6611         }
6612 
6613         validSwizzIdMask = NVBIT64(ref.pKernelMIGGpuInstance->swizzId);
6614     }
6615 
6616     pParams->validPartitionCount = 0;
6617     for (i = 0; i < KMIGMGR_MAX_GPU_INSTANCES; i++)
6618     {
6619         MIG_RESOURCE_ALLOCATION *pResourceAllocation;
6620         NvU32 swizzId = portUtilCountTrailingZeros64(validSwizzIdMask);
6621         NvU32 j;
6622         RM_ENGINE_TYPE rmEngineType;
6623 
6624         rmStatus = kmigmgrGetGPUInstanceInfo(pGpu, pKernelMIGManager, swizzId, &ref.pKernelMIGGpuInstance);
6625         if (rmStatus != NV_OK)
6626         {
6627             NV_PRINTF(LEVEL_ERROR,
6628                       "Unable to get gpu instance info for swizzId - %d\n",
6629                       swizzId);
6630             goto done;
6631         }
6632 
6633         pResourceAllocation = &ref.pKernelMIGGpuInstance->resourceAllocation;
6634 
6635         pParams->queryPartitionInfo[i].partitionFlag = ref.pKernelMIGGpuInstance->partitionFlag;
6636         pParams->queryPartitionInfo[i].swizzId = ref.pKernelMIGGpuInstance->swizzId;
6637         pParams->queryPartitionInfo[i].grEngCount =
6638             kmigmgrCountEnginesOfType(&pResourceAllocation->engines, RM_ENGINE_TYPE_GR(0));
6639         pParams->queryPartitionInfo[i].smCount = ref.pKernelMIGGpuInstance->pProfile->smCount;
6640         pParams->queryPartitionInfo[i].veidCount = pResourceAllocation->veidCount;
6641         pParams->queryPartitionInfo[i].ceCount =
6642             kmigmgrCountEnginesOfType(&pResourceAllocation->engines, RM_ENGINE_TYPE_COPY(0));
6643         pParams->queryPartitionInfo[i].gpcCount = pResourceAllocation->gpcCount;
6644         pParams->queryPartitionInfo[i].gfxGpcCount = pResourceAllocation->gfxGpcCount;
6645         pParams->queryPartitionInfo[i].virtualGpcCount = pResourceAllocation->virtualGpcCount;
6646         pParams->queryPartitionInfo[i].nvDecCount =
6647             kmigmgrCountEnginesOfType(&pResourceAllocation->engines, RM_ENGINE_TYPE_NVDEC(0));
6648         pParams->queryPartitionInfo[i].nvEncCount =
6649             kmigmgrCountEnginesOfType(&pResourceAllocation->engines, RM_ENGINE_TYPE_NVENC(0));
6650         pParams->queryPartitionInfo[i].nvJpgCount =
6651             kmigmgrCountEnginesOfType(&pResourceAllocation->engines, RM_ENGINE_TYPE_NVJPG);
6652         pParams->queryPartitionInfo[i].nvOfaCount =
6653             kmigmgrCountEnginesOfType(&pResourceAllocation->engines, RM_ENGINE_TYPE_OFA);
6654         pParams->queryPartitionInfo[i].memSize = rangeLength(ref.pKernelMIGGpuInstance->memRange);
6655         pParams->queryPartitionInfo[i].validCTSIdMask = ref.pKernelMIGGpuInstance->pProfile->validCTSIdMask;
6656         pParams->queryPartitionInfo[i].bValid = NV_TRUE;
6657 
6658         {
6659             NV_ASSERT_OR_ELSE(pRpcParams->queryPartitionInfo[i].bValid,
6660                               rmStatus = NV_ERR_INVALID_STATE; goto done);
6661             NV_ASSERT_OR_ELSE(
6662                 pParams->queryPartitionInfo[i].swizzId == pRpcParams->queryPartitionInfo[i].swizzId,
6663                 rmStatus = NV_ERR_INVALID_STATE; goto done);
6664 
6665             // Fill GPCs associated with every GR
6666             j = 0;
6667             FOR_EACH_IN_BITVECTOR(&pResourceAllocation->engines, rmEngineType)
6668             {
6669                 if (!RM_ENGINE_TYPE_IS_GR(rmEngineType))
6670                     continue;
6671 
6672                 pParams->queryPartitionInfo[i].gpcsPerGr[j] = pRpcParams->queryPartitionInfo[i].gpcsPerGr[j];
6673                 pParams->queryPartitionInfo[i].gfxGpcPerGr[j] = pRpcParams->queryPartitionInfo[i].gfxGpcPerGr[j];
6674                 pParams->queryPartitionInfo[i].veidsPerGr[j] = pRpcParams->queryPartitionInfo[i].veidsPerGr[j];
6675                 pParams->queryPartitionInfo[i].virtualGpcsPerGr[j] = pRpcParams->queryPartitionInfo[i].virtualGpcsPerGr[j];
6676 
6677                 j++;
6678             }
6679             FOR_EACH_IN_BITVECTOR_END();
6680 
6681             // Take the value provided by physical
6682             pParams->queryPartitionInfo[i].bPartitionError = pRpcParams->queryPartitionInfo[i].bPartitionError;
6683             pParams->queryPartitionInfo[i].span = pRpcParams->queryPartitionInfo[i].span;
6684         }
6685 
6686         ++pParams->validPartitionCount;
6687 
6688         validSwizzIdMask &= ~NVBIT64(swizzId);
6689         if (validSwizzIdMask == 0)
6690         {
6691             break;
6692         }
6693     }
6694 
6695 done:
6696     portMemFree(pRpcParams);
6697 
6698     return rmStatus;
6699 }
6700 
6701 NV_STATUS
6702 subdeviceCtrlCmdInternalKMIGmgrExportGPUInstance_IMPL
6703 (
6704     Subdevice *pSubdevice,
6705     NV2080_CTRL_INTERNAL_KMIGMGR_IMPORT_EXPORT_GPU_INSTANCE_PARAMS *pParams
6706 )
6707 {
6708     OBJGPU *pGpu = GPU_RES_GET_GPU(pSubdevice);
6709     CALL_CONTEXT *pCallContext = resservGetTlsCallContext();
6710     RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
6711 
6712     // No gpu instances to export
6713     if (!IS_MIG_IN_USE(pGpu))
6714         return NV_ERR_NOT_SUPPORTED;
6715 
6716     // An unprivileged client has no use case for import/export
6717     if (!rmclientIsCapableOrAdminByHandle(RES_GET_CLIENT_HANDLE(pSubdevice),
6718                                           NV_RM_CAP_SYS_SMC_CONFIG,
6719                                           pCallContext->secInfo.privLevel))
6720     {
6721         return NV_ERR_INSUFFICIENT_PERMISSIONS;
6722     }
6723 
6724     // Guest RM does not support import/export
6725     if (IS_VIRTUAL(pGpu))
6726     {
6727         return NV_ERR_NOT_SUPPORTED;
6728     }
6729 
6730     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
6731         pRmApi->Control(pRmApi,
6732                         pGpu->hInternalClient,
6733                         pGpu->hInternalSubdevice,
6734                         NV2080_CTRL_CMD_INTERNAL_MIGMGR_EXPORT_GPU_INSTANCE,
6735                         pParams,
6736                         sizeof(*pParams)));
6737 
6738     return NV_OK;
6739 }
6740 
6741 NV_STATUS
6742 subdeviceCtrlCmdInternalKMIGmgrImportGPUInstance_IMPL
6743 (
6744     Subdevice *pSubdevice,
6745     NV2080_CTRL_INTERNAL_KMIGMGR_IMPORT_EXPORT_GPU_INSTANCE_PARAMS *pParams
6746 )
6747 {
6748     OBJGPU *pGpu = GPU_RES_GET_GPU(pSubdevice);
6749     NV_STATUS status = NV_OK;
6750     KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
6751     CALL_CONTEXT *pCallContext = resservGetTlsCallContext();
6752     RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
6753 
6754     if (!pGpu->getProperty(pGpu, PDB_PROP_GPU_MIG_SUPPORTED))
6755         return NV_ERR_NOT_SUPPORTED;
6756 
6757     NV_ASSERT_OR_RETURN(pCallContext != NULL, NV_ERR_INVALID_STATE);
6758 
6759     // An unprivileged client has no use case for import/export
6760     if (!rmclientIsCapableOrAdminByHandle(RES_GET_CLIENT_HANDLE(pSubdevice),
6761                                           NV_RM_CAP_SYS_SMC_CONFIG,
6762                                           pCallContext->secInfo.privLevel))
6763     {
6764         return NV_ERR_INSUFFICIENT_PERMISSIONS;
6765     }
6766 
6767     // Guest RM does not support import/export
6768     if (IS_VIRTUAL(pGpu))
6769     {
6770         return NV_ERR_NOT_SUPPORTED;
6771     }
6772 
6773     if (kmigmgrGetSwizzIdInUseMask(pGpu, pKernelMIGManager) == 0x0)
6774     {
6775         NvBool bMemoryPartitioningNeeded = kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, pParams->swizzId);
6776 
6777         NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
6778             kmigmgrSetMIGState(pGpu, GPU_GET_KERNEL_MIG_MANAGER(pGpu), bMemoryPartitioningNeeded, NV_TRUE, NV_FALSE));
6779     }
6780 
6781     NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
6782         pRmApi->Control(pRmApi,
6783                         pGpu->hInternalClient,
6784                         pGpu->hInternalSubdevice,
6785                         NV2080_CTRL_CMD_INTERNAL_MIGMGR_IMPORT_GPU_INSTANCE,
6786                         pParams,
6787                         sizeof(*pParams)),
6788         cleanup_mig_state);
6789 
6790     if (IS_GSP_CLIENT(pGpu))
6791     {
6792         GPUMGR_SAVE_GPU_INSTANCE *pSave = portMemAllocNonPaged(sizeof(*pSave));
6793         NV_CHECK_OR_ELSE(LEVEL_ERROR,
6794             pSave != NULL,
6795             status = NV_ERR_NO_MEMORY;
6796             goto cleanup_mig_state;);
6797 
6798         KMIGMGR_CREATE_GPU_INSTANCE_PARAMS restore =
6799         {
6800             .type = KMIGMGR_CREATE_GPU_INSTANCE_PARAMS_TYPE_RESTORE,
6801             .inst.restore.pGPUInstanceSave = pSave,
6802         };
6803         pSave->bValid = NV_TRUE;
6804         pSave->swizzId = pParams->swizzId;
6805         pSave->pOsRmCaps = NULL;
6806         portMemCopy(&(pSave->giInfo), sizeof(pSave->giInfo), &pParams->info, sizeof(pParams->info));
6807 
6808         status = kmigmgrCreateGPUInstance(pGpu, pKernelMIGManager, &pParams->swizzId, restore, NV_TRUE, NV_FALSE);
6809 
6810         portMemFree(pSave);
6811         NV_CHECK_OR_GOTO(LEVEL_ERROR, status == NV_OK, cleanup_rpc);
6812     }
6813 
6814     return NV_OK;
6815 
6816 cleanup_rpc:
6817     {
6818         NV2080_CTRL_GPU_SET_PARTITIONS_PARAMS params;
6819 
6820         portMemSet(&params, 0, sizeof(params));
6821         params.partitionCount = 1;
6822         params.partitionInfo[0].bValid = NV_FALSE;
6823         params.partitionInfo[0].swizzId = pParams->swizzId;
6824 
6825         NV_ASSERT_OK(
6826             pRmApi->Control(pRmApi,
6827                             pGpu->hInternalClient,
6828                             pGpu->hInternalSubdevice,
6829                             NV2080_CTRL_CMD_INTERNAL_MIGMGR_SET_GPU_INSTANCES,
6830                             pParams,
6831                             sizeof(*pParams)));
6832     }
6833 
6834 cleanup_mig_state:
6835     if (kmigmgrGetSwizzIdInUseMask(pGpu, pKernelMIGManager) == 0x0)
6836     {
6837         NvBool bMemoryPartitioningNeeded = kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, pParams->swizzId);
6838 
6839         NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
6840             kmigmgrSetMIGState(pGpu, GPU_GET_KERNEL_MIG_MANAGER(pGpu), bMemoryPartitioningNeeded, NV_FALSE, NV_FALSE));
6841     }
6842 
6843     return status;
6844 }
6845 
6846 NV_STATUS
6847 subdeviceCtrlCmdGpuGetComputeProfiles_IMPL
6848 (
6849     Subdevice *pSubdevice,
6850     NV2080_CTRL_GPU_GET_COMPUTE_PROFILES_PARAMS *pParams
6851 )
6852 {
6853     OBJGPU *pGpu = GPU_RES_GET_GPU(pSubdevice);
6854     KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
6855     const KERNEL_MIG_MANAGER_STATIC_INFO *pStaticInfo = kmigmgrGetStaticInfo(pGpu, pKernelMIGManager);
6856     NvHandle hClient = RES_GET_CLIENT_HANDLE(pSubdevice);
6857     NvU32 maxSmCount = NV_U32_MAX;
6858     MIG_INSTANCE_REF ref;
6859     NvU32 entryCount;
6860     NvU32 i;
6861 
6862     if (!IS_MIG_ENABLED(pGpu))
6863         return NV_ERR_INVALID_STATE;
6864 
6865     //
6866     // Grab MIG partition reference if available. The profile's SM count is used
6867     // to filter out compute profiles which wouldn't fit on the GI anyway. This
6868     // is not fatal as we still want to allow compute profiles for entire GPU view
6869     // to be queried without a specific GPU instance.
6870     //
6871     if (kmigmgrGetInstanceRefFromClient(pGpu, pKernelMIGManager, hClient, &ref) == NV_OK)
6872     {
6873         maxSmCount = ref.pKernelMIGGpuInstance->pProfile->smCount;
6874     }
6875 
6876     NV_CHECK_OR_RETURN(LEVEL_ERROR, pStaticInfo != NULL, NV_ERR_INVALID_STATE);
6877     NV_CHECK_OR_RETURN(LEVEL_ERROR, pStaticInfo->pCIProfiles != NULL, NV_ERR_INVALID_STATE);
6878     NV_ASSERT(pStaticInfo->pCIProfiles->profileCount <= NV_ARRAY_ELEMENTS(pParams->profiles));
6879 
6880     entryCount = 0;
6881     for (i = 0; i < pStaticInfo->pCIProfiles->profileCount; i++)
6882     {
6883         if (pStaticInfo->pCIProfiles->profiles[i].smCount > maxSmCount)
6884             continue;
6885 
6886         // If there are any duplicate compute profiles (i.e. same GPC and SM counts), skip broadcasting the
6887         // profile out.
6888         if ((entryCount > 0) &&
6889             (pParams->profiles[entryCount - 1].gfxGpcCount == pStaticInfo->pCIProfiles->profiles[i].gfxGpcCount) &&
6890             (pParams->profiles[entryCount - 1].gpcCount == pStaticInfo->pCIProfiles->profiles[i].gpcCount) &&
6891             (pParams->profiles[entryCount - 1].smCount == pStaticInfo->pCIProfiles->profiles[i].smCount))
6892         {
6893            continue;
6894         }
6895 
6896         pParams->profiles[entryCount].computeSize = pStaticInfo->pCIProfiles->profiles[i].computeSize;
6897         pParams->profiles[entryCount].gfxGpcCount = pStaticInfo->pCIProfiles->profiles[i].gfxGpcCount;
6898         pParams->profiles[entryCount].gpcCount    = pStaticInfo->pCIProfiles->profiles[i].physicalSlots;
6899         pParams->profiles[entryCount].smCount     = pStaticInfo->pCIProfiles->profiles[i].smCount;
6900         pParams->profiles[entryCount].veidCount   = pStaticInfo->pCIProfiles->profiles[i].veidCount;
6901         entryCount++;
6902     }
6903     pParams->profileCount = entryCount;
6904     return NV_OK;
6905 }
6906 
6907 /*!
6908  * @brief   Function to get the next computeSize flag either larger or smaller than
6909  *          the passed in flag.
6910  *
6911  * @param[IN]     bGetNextSmallest   Flag controlling whether the next largest or smallest
6912  *                                   compute size is returned
6913  * @param[IN]     computeSize        Base computeSize to lookup
6914  *
6915  * @return        Input is the original compute size
6916  *                  a.) If compute size input is KMIGMGR_COMPUTE_SIZE_INVALID, out is:
6917  *                     1.) NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_FULL if bGetNextSmallest
6918  *                     2.) NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_EIGHTH if !bGetNextSmallest
6919  *                  b.) Else output is next largest/smallest based upon bGetNextSmallest
6920  */
6921 NvU32
6922 kmigmgrGetNextComputeSize_IMPL
6923 (
6924     NvBool bGetNextSmallest,
6925     NvU32 computeSize
6926 )
6927 {
6928     const NvU32 computeSizeFlags[] =
6929     {
6930         KMIGMGR_COMPUTE_SIZE_INVALID,
6931         NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_FULL,
6932         NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_HALF,
6933         NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_MINI_HALF,
6934         NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_QUARTER,
6935         NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_MINI_QUARTER,
6936         NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_EIGHTH,
6937         KMIGMGR_COMPUTE_SIZE_INVALID
6938     };
6939 
6940     NV_ASSERT_OR_RETURN(computeSize <= KMIGMGR_COMPUTE_SIZE_INVALID, KMIGMGR_COMPUTE_SIZE_INVALID);
6941 
6942     if (computeSize == KMIGMGR_COMPUTE_SIZE_INVALID)
6943     {
6944         return (bGetNextSmallest) ? computeSizeFlags[1] : computeSizeFlags[NV_ARRAY_ELEMENTS(computeSizeFlags) - 2];
6945     }
6946     else
6947     {
6948         NvU32 i;
6949 
6950         for (i = 1; i < NV_ARRAY_ELEMENTS(computeSizeFlags) - 1; i++)
6951             if (computeSizeFlags[i] == computeSize)
6952                 return (bGetNextSmallest) ? computeSizeFlags[i + 1] : computeSizeFlags[i - 1];
6953 
6954         // Requested input flag was not found
6955         return KMIGMGR_COMPUTE_SIZE_INVALID;
6956     }
6957 }
6958 
6959 /*!
6960  * @brief   Function to lookup a skyline for a given compute size
6961  *
6962  * @param[IN]   pGpu
6963  * @param[IN]   pKernelMIGManager
6964  * @param[IN]   computeSize             Compute size to find skyline for
6965  * @param[OUT]  pSkyline                Pointer to NV2080_CTRL_INTERNAL_GRMGR_SKYLINE_INFO struct filled with
6966  *                                      a copy of the skyline info associated with the gpc count
6967  */
6968 NV_STATUS
6969 kmigmgrGetSkylineFromSize_IMPL
6970 (
6971     OBJGPU *pGpu,
6972     KernelMIGManager *pKernelMIGManager,
6973     NvU32 computeSize,
6974     const NV2080_CTRL_INTERNAL_GRMGR_SKYLINE_INFO **ppSkyline
6975 )
6976 {
6977     const KERNEL_MIG_MANAGER_STATIC_INFO *pStaticInfo = kmigmgrGetStaticInfo(pGpu, pKernelMIGManager);
6978     NvU32 i;
6979 
6980     NV_ASSERT_OR_RETURN(ppSkyline != NULL, NV_ERR_INVALID_ARGUMENT);
6981     NV_CHECK_OR_RETURN(LEVEL_ERROR, pStaticInfo != NULL, NV_ERR_OBJECT_NOT_FOUND);
6982     NV_CHECK_OR_RETURN(LEVEL_WARNING, pStaticInfo->pSkylineInfo != NULL, NV_ERR_OBJECT_NOT_FOUND);
6983 
6984     for (i = 0; i < pStaticInfo->pSkylineInfo->validEntries; i++)
6985     {
6986         if (pStaticInfo->pSkylineInfo->skylineTable[i].computeSizeFlag == computeSize)
6987         {
6988             *ppSkyline = &pStaticInfo->pSkylineInfo->skylineTable[i];
6989             return NV_OK;
6990         }
6991     }
6992     NV_PRINTF(LEVEL_INFO, "No skyline for with compute size %d\n", computeSize);
6993     return NV_ERR_OBJECT_NOT_FOUND;
6994 }
6995 
6996 /*!
6997  * @brief   Function to lookup a compute profile for a given compute size
6998  *
6999  * @param[IN]   pGpu
7000  * @param[IN]   pKernelMIGManager
7001  * @param[IN]   computeSize             Compute size to find skyline for
7002  * @param[OUT]  pProfile                Pointer to  NV2080_CTRL_INTERNAL_MIGMGR_COMPUTE_PROFILE struct filled with
7003  *                                      a copy of the compute profile info associated with the gpc count
7004  */
7005 NV_STATUS
7006 kmigmgrGetComputeProfileFromSize_IMPL
7007 (
7008     OBJGPU *pGpu,
7009     KernelMIGManager *pKernelMIGManager,
7010     NvU32 computeSize,
7011     NV2080_CTRL_INTERNAL_MIGMGR_COMPUTE_PROFILE *pProfile
7012 )
7013 {
7014     const KERNEL_MIG_MANAGER_STATIC_INFO *pStaticInfo = kmigmgrGetStaticInfo(pGpu, pKernelMIGManager);
7015     NvU32 i;
7016 
7017     NV_ASSERT_OR_RETURN(pProfile != NULL, NV_ERR_INVALID_ARGUMENT);
7018     NV_CHECK_OR_RETURN(LEVEL_ERROR, pStaticInfo != NULL, NV_ERR_OBJECT_NOT_FOUND);
7019     NV_CHECK_OR_RETURN(LEVEL_WARNING, pStaticInfo->pCIProfiles != NULL, NV_ERR_OBJECT_NOT_FOUND);
7020 
7021     for (i = 0; i < pStaticInfo->pCIProfiles->profileCount; i++)
7022     {
7023         if (pStaticInfo->pCIProfiles->profiles[i].computeSize == computeSize)
7024         {
7025             portMemCopy(pProfile, sizeof(*pProfile), &pStaticInfo->pCIProfiles->profiles[i], sizeof(pStaticInfo->pCIProfiles->profiles[i]));
7026             return NV_OK;
7027         }
7028     }
7029     NV_PRINTF(LEVEL_INFO, "Found no Compute Profile for computeSize=%d\n", computeSize);
7030     return NV_ERR_OBJECT_NOT_FOUND;
7031 }
7032 
7033 /*!
7034  * @brief   Function to lookup a compute profile for a given SM count
7035  *
7036  * @param[IN]   pGpu
7037  * @param[IN]   pKernelMIGManager
7038  * @param[IN]   smCount                 SM Count to look up the associated compute profile
7039  * @param[OUT]  pProfile                Pointer to  NV2080_CTRL_INTERNAL_MIGMGR_COMPUTE_PROFILE struct filled with
7040  *                                      a copy of the compute profile info associated with the SM count
7041  */
7042 NV_STATUS
7043 kmigmgrGetComputeProfileFromSmCount_IMPL
7044 (
7045     OBJGPU *pGpu,
7046     KernelMIGManager *pKernelMIGManager,
7047     NvU32 smCount,
7048     NV2080_CTRL_INTERNAL_MIGMGR_COMPUTE_PROFILE *pProfile
7049 )
7050 {
7051     const KERNEL_MIG_MANAGER_STATIC_INFO *pStaticInfo = kmigmgrGetStaticInfo(pGpu, pKernelMIGManager);
7052     NvU32 i;
7053 
7054     NV_ASSERT_OR_RETURN(pProfile != NULL, NV_ERR_INVALID_ARGUMENT);
7055     NV_CHECK_OR_RETURN(LEVEL_ERROR, pStaticInfo != NULL, NV_ERR_OBJECT_NOT_FOUND);
7056     NV_CHECK_OR_RETURN(LEVEL_WARNING, pStaticInfo->pCIProfiles != NULL, NV_ERR_OBJECT_NOT_FOUND);
7057 
7058     for (i = 0; i < pStaticInfo->pCIProfiles->profileCount; i++)
7059     {
7060         if (pStaticInfo->pCIProfiles->profiles[i].smCount == smCount)
7061         {
7062             portMemCopy(pProfile, sizeof(*pProfile), &pStaticInfo->pCIProfiles->profiles[i], sizeof(pStaticInfo->pCIProfiles->profiles[i]));
7063             return NV_OK;
7064         }
7065     }
7066     NV_PRINTF(LEVEL_ERROR, "Found no Compute Profile for smCount=%d\n", smCount);
7067     return NV_ERR_OBJECT_NOT_FOUND;
7068 }
7069 
7070 
7071 /*!
7072  * @brief   Function to lookup a compute profile for a given GPC count. This function converts
7073  *          the provided gpcCount into a COMPUTE_SIZE partition flag which is then looked up
7074  *          in the static info compute profile list.
7075  *
7076  * @param[IN]   pGpu
7077  * @param[IN]   pKernelMIGManager
7078  * @param[IN]   gpcCount                GPC Count to look up the associated compute profile
7079  * @param[OUT]  pProfile                Pointer to  NV2080_CTRL_INTERNAL_MIGMGR_COMPUTE_PROFILE struct filled with
7080  *                                      a copy of the compute profile info associated with the GPC count
7081  */
7082 NV_STATUS
7083 kmigmgrGetComputeProfileFromGpcCount_IMPL
7084 (
7085     OBJGPU *pGpu,
7086     KernelMIGManager *pKernelMIGManager,
7087     NvU32 gpcCount,
7088     NV2080_CTRL_INTERNAL_MIGMGR_COMPUTE_PROFILE *pProfile
7089 )
7090 {
7091     KernelGraphicsManager *pKernelGraphicsManager = GPU_GET_KERNEL_GRAPHICS_MANAGER(pGpu);
7092     const KERNEL_MIG_MANAGER_STATIC_INFO *pStaticInfo = kmigmgrGetStaticInfo(pGpu, pKernelMIGManager);
7093     NvBool bReducedConfig = kmigmgrIsA100ReducedConfig(pGpu, pKernelMIGManager);
7094     NvU32 compSize;
7095     NvU32 maxGpc;
7096     NvU32 i;
7097 
7098     NV_ASSERT_OR_RETURN(pProfile != NULL, NV_ERR_INVALID_ARGUMENT);
7099     NV_CHECK_OR_RETURN(LEVEL_ERROR, pStaticInfo != NULL, NV_ERR_OBJECT_NOT_FOUND);
7100     NV_CHECK_OR_RETURN(LEVEL_WARNING, pStaticInfo->pCIProfiles != NULL, NV_ERR_OBJECT_NOT_FOUND);
7101 
7102     maxGpc = pKernelGraphicsManager->legacyKgraphicsStaticInfo.pGrInfo->infoList[NV2080_CTRL_GR_INFO_INDEX_LITTER_NUM_GPCS].data;
7103     if (bReducedConfig)
7104         maxGpc /= 2;
7105 
7106     if (gpcCount <= (maxGpc / 8))
7107         compSize = NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_EIGHTH;
7108     else if (gpcCount <= (maxGpc / 4))
7109         compSize = NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_QUARTER;
7110     else if (gpcCount <= ((maxGpc / 2) - 1))
7111         compSize = NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_MINI_HALF;
7112     else if (gpcCount <= (maxGpc / 2))
7113         compSize = NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_HALF;
7114     else
7115         compSize = NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_FULL;
7116 
7117     for (i = 0; i < pStaticInfo->pCIProfiles->profileCount; i++)
7118     {
7119         if (pStaticInfo->pCIProfiles->profiles[i].computeSize == compSize)
7120         {
7121             portMemCopy(pProfile, sizeof(*pProfile), &pStaticInfo->pCIProfiles->profiles[i], sizeof(pStaticInfo->pCIProfiles->profiles[i]));
7122             return NV_OK;
7123         }
7124     }
7125 
7126     return NV_ERR_OBJECT_NOT_FOUND;
7127 }
7128 
7129 /*!
7130  * @brief   Function to lookup a compute profile for a given cts ID
7131  *
7132  * @param[IN]   pGpu
7133  * @param[IN]   pKernelMIGManager
7134  * @param[IN]   ctsId                   CTS ID to find compute profile for
7135  * @param[OUT]  pProfile                Pointer to  NV2080_CTRL_INTERNAL_MIGMGR_COMPUTE_PROFILE struct filled with
7136  *                                      a copy of the compute profile info associated with the gpc count
7137  */
7138 NV_STATUS
7139 kmigmgrGetComputeProfileFromCTSId_IMPL
7140 (
7141     OBJGPU *pGpu,
7142     KernelMIGManager *pKernelMIGManager,
7143     NvU32 ctsId,
7144     NV2080_CTRL_INTERNAL_MIGMGR_COMPUTE_PROFILE *pProfile
7145 )
7146 {
7147     const KERNEL_MIG_MANAGER_STATIC_INFO *pStaticInfo = kmigmgrGetStaticInfo(pGpu, pKernelMIGManager);
7148     NvU32 computeSize;
7149 
7150     NV_ASSERT_OR_RETURN(pProfile != NULL, NV_ERR_INVALID_ARGUMENT);
7151     NV_CHECK_OR_RETURN(LEVEL_ERROR, pStaticInfo != NULL, NV_ERR_OBJECT_NOT_FOUND);
7152     NV_CHECK_OR_RETURN(LEVEL_WARNING, pStaticInfo->pCIProfiles != NULL, NV_ERR_OBJECT_NOT_FOUND);
7153 
7154     computeSize = kmigmgrGetComputeSizeFromCTSId(ctsId);
7155     return kmigmgrGetComputeProfileFromSize(pGpu, pKernelMIGManager, computeSize, pProfile);
7156 }
7157 
7158 /*!
7159  * @brief   Function which returns a mask of CTS IDs which are not usable when the input CTS
7160  *          ID is in-use.
7161  *
7162  * @param[IN]   pGpu
7163  * @param[IN]   pKernelMIGManager
7164  * @param[IN]   ctsId              Input CTS ID to look-up invalid mask for
7165  * @param[OUT]  pInvalidCTSIdMask  Output mask of CTS IDs not useable with input ID
7166  */
7167 NV_STATUS
7168 kmigmgrGetInvalidCTSIdMask_IMPL
7169 (
7170     OBJGPU *pGpu,
7171     KernelMIGManager *pKernelMIGManager,
7172     NvU32 ctsId,
7173     NvU64 *pInvalidCTSIdMask
7174 )
7175 {
7176     //
7177     // +---------------------------------------+
7178     // |                   0                   |
7179     // +-------------------+-------------------+
7180     // |         1         |         2         |
7181     // +-------------------+-------------------+
7182     // |         3         |         4         |
7183     // +---------+---------+---------+---------+
7184     // |    5    |    6    |    7    |    8    |
7185     // +---------+---------+---------+---------+
7186     // |    9    |    10   |    11   |    12   |
7187     // +----+----+----+----+----+----+----+----+
7188     // | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 |
7189     // +----+----+----+----+----+----+----+----+
7190     //
7191     NvU64 gpcSlot[KGRMGR_MAX_GR] =
7192     {
7193         (NVBIT64(0) | NVBIT64(1) | NVBIT64(3) | NVBIT64(5) | NVBIT64(9)  | NVBIT64(13)),
7194         (NVBIT64(0) | NVBIT64(1) | NVBIT64(3) | NVBIT64(5) | NVBIT64(9)  | NVBIT64(14)),
7195         (NVBIT64(0) | NVBIT64(1) | NVBIT64(3) | NVBIT64(6) | NVBIT64(10) | NVBIT64(15)),
7196         (NVBIT64(0) | NVBIT64(1) | NVBIT64(3) | NVBIT64(6) | NVBIT64(10) | NVBIT64(16)),
7197         (NVBIT64(0) | NVBIT64(2) | NVBIT64(4) | NVBIT64(7) | NVBIT64(11) | NVBIT64(17)),
7198         (NVBIT64(0) | NVBIT64(2) | NVBIT64(4) | NVBIT64(7) | NVBIT64(11) | NVBIT64(18)),
7199         (NVBIT64(0) | NVBIT64(2) | NVBIT64(4) | NVBIT64(8) | NVBIT64(12) | NVBIT64(19)),
7200         (NVBIT64(0) | NVBIT64(2) | NVBIT64(4) | NVBIT64(8) | NVBIT64(12) | NVBIT64(20))
7201     };
7202     NvU64 i;
7203 
7204     NV_ASSERT_OR_RETURN(NULL != pInvalidCTSIdMask, NV_ERR_INVALID_ARGUMENT);
7205 
7206     // All bits corresponding to nonexistent CTS ids are invalid
7207     *pInvalidCTSIdMask = DRF_SHIFTMASK64(63:KMIGMGR_MAX_GPU_CTSID);
7208 
7209     for (i = 0; i < KGRMGR_MAX_GR; ++i)
7210     {
7211         if (0 != (gpcSlot[i] & NVBIT64(ctsId)))
7212         {
7213             *pInvalidCTSIdMask |= gpcSlot[i];
7214         }
7215     }
7216 
7217     return NV_OK;
7218 }
7219 
7220 /*!
7221  * @brief Returns the range of possible CTS IDs for a given compute size flag
7222  */
7223 NV_RANGE
7224 kmigmgrComputeProfileSizeToCTSIdRange_IMPL
7225 (
7226     NvU32 computeSize
7227 )
7228 {
7229     switch (computeSize)
7230     {
7231         case NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_FULL:
7232             return rangeMake(0,0);
7233 
7234         case NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_HALF:
7235             return rangeMake(1,2);
7236 
7237         case NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_MINI_HALF:
7238             return rangeMake(3,4);
7239 
7240         case NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_QUARTER:
7241             return rangeMake(5,8);
7242 
7243         case NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_MINI_QUARTER:
7244             return rangeMake(9,12);
7245 
7246         case NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_EIGHTH:
7247             return rangeMake(13,20);
7248 
7249         default:
7250             return NV_RANGE_EMPTY;
7251     }
7252 }
7253 
7254 /*!
7255  * @brief   Function to get next free CTS ID
7256  *
7257  * @param[IN]   pGpu
7258  * @param[IN]   pMIGManager
7259  * @param[OUT]  pCtsId              CTS ID to be used if NV_OK returned
7260  * @param[IN]   globalValidCtsMask  Mask of CTS IDs which could possibly be allocated
7261  * @param[IN]   ctsIdsInUseMask     Mask of CTS IDs currently in use
7262  * @param[IN]   profileSize         Profile size to get a CTS ID for
7263  *
7264  * @return  Returns NV_STATUS
7265  *          NV_OK
7266  *          NV_ERR_INVALID_ARGUMENT        If un-supported partition size is
7267  *                                         requested
7268  *          NV_ERR_INSUFFICIENT_RESOURCES  If a CTS ID cannot be assigned
7269  */
7270 NV_STATUS
7271 kmigmgrGetFreeCTSId_IMPL
7272 (
7273     OBJGPU *pGpu,
7274     KernelMIGManager *pKernelMIGManager,
7275     NvU32 *pCtsId,
7276     NvU64 globalValidCtsMask,
7277     NvU64 ctsIdsInUseMask,
7278     NvU32 profileSize
7279 )
7280 {
7281     NV_RANGE ctsRange = kmigmgrComputeProfileSizeToCTSIdRange(profileSize);
7282     NvU64 validMask;
7283     NvU32 maxRemainingCapacity;
7284     NvU32 idealCTSId;
7285     NvU32 ctsId;
7286     NvU64 shadowValidCTSIdMask;
7287 
7288     NV_CHECK_OR_RETURN(LEVEL_WARNING, !rangeIsEmpty(ctsRange), NV_ERR_INSUFFICIENT_RESOURCES);
7289     NV_ASSERT_OR_RETURN(pCtsId != NULL, NV_ERR_INVALID_ARGUMENT);
7290 
7291     // construct a mask of all non-floorswept ctsIds
7292     validMask = globalValidCtsMask;
7293 
7294     // Remove all ctsIds with slices currently in use
7295     FOR_EACH_INDEX_IN_MASK(64, ctsId, ctsIdsInUseMask)
7296     {
7297         NvU64 invalidMask;
7298 
7299         NV_ASSERT_OK(kmigmgrGetInvalidCTSIdMask(pGpu, pKernelMIGManager, ctsId, &invalidMask));
7300 
7301         validMask &= ~invalidMask;
7302     }
7303     FOR_EACH_INDEX_IN_MASK_END;
7304 
7305     // compute valid ctsIds for this request that can still be assigned
7306     shadowValidCTSIdMask = validMask;
7307     validMask &= DRF_SHIFTMASK64(ctsRange.hi:ctsRange.lo);
7308 
7309     // If there are no valid, open ctsIds, then bail here
7310     NV_CHECK_OR_RETURN(LEVEL_SILENT, validMask != 0x0, NV_ERR_INSUFFICIENT_RESOURCES);
7311 
7312     // Determine which available CTS ids will reduce the remaining capacity the least
7313     maxRemainingCapacity = 0;
7314     idealCTSId = portUtilCountTrailingZeros64(validMask);
7315     FOR_EACH_INDEX_IN_MASK(64, ctsId, validMask)
7316     {
7317         NvU64 invalidMask;
7318         NV_ASSERT_OK(kmigmgrGetInvalidCTSIdMask(pGpu, pKernelMIGManager, ctsId, &invalidMask));
7319 
7320         NvU32 remainingCapacity = nvPopCount64(shadowValidCTSIdMask & ~invalidMask);
7321 
7322         if (remainingCapacity > maxRemainingCapacity)
7323         {
7324             maxRemainingCapacity = remainingCapacity;
7325             idealCTSId = ctsId;
7326         }
7327     }
7328     FOR_EACH_INDEX_IN_MASK_END;
7329 
7330     *pCtsId = idealCTSId;
7331     return NV_OK;
7332 }
7333 
7334 /*! @brief  This function determines whether or not CTS alignment and slot requirements are needed.
7335  *          For PF, this is determined by whether or not a MINI_QUARTER skyline exists.
7336  */
7337 NvBool
7338 kmigmgrIsCTSAlignmentRequired_PF
7339 (
7340     OBJGPU *pGpu,
7341     KernelMIGManager *pKernelMIGManager
7342 )
7343 {
7344     const NV2080_CTRL_INTERNAL_GRMGR_SKYLINE_INFO *pUnused;
7345 
7346     // CTS alignment is always required when a unique MINI_QUARTER is present
7347     return (kmigmgrGetSkylineFromSize(pGpu, pKernelMIGManager,
7348                 NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_MINI_QUARTER, &pUnused) == NV_OK);
7349 }
7350 
7351 /*! @brief  This function determines whether or not CTS alignment and slot requirements are needed.
7352  *          For VF, this is determined by whether or not a MINI_QUARTER compute profile exists.
7353  */
7354 NvBool
7355 kmigmgrIsCTSAlignmentRequired_VF
7356 (
7357     OBJGPU *pGpu,
7358     KernelMIGManager *pKernelMIGManager
7359 )
7360 {
7361     NV2080_CTRL_INTERNAL_MIGMGR_COMPUTE_PROFILE unused;
7362 
7363     // CTS alignment is always required when a unique MINI_QUARTER is present
7364     return (kmigmgrGetComputeProfileFromSize(pGpu, pKernelMIGManager,
7365                 NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_MINI_QUARTER, &unused) == NV_OK);
7366 }
7367 
7368 /*!
7369  * @brief  Returns the computeSize flag of a given CTS ID
7370  */
7371 NvU32
7372 kmigmgrGetComputeSizeFromCTSId_IMPL
7373 (
7374     NvU32 ctsId
7375 )
7376 {
7377     NvU32 computeSize = kmigmgrGetNextComputeSize(NV_TRUE, KMIGMGR_COMPUTE_SIZE_INVALID);
7378 
7379     while (computeSize != KMIGMGR_COMPUTE_SIZE_INVALID)
7380     {
7381         NV_RANGE range = kmigmgrComputeProfileSizeToCTSIdRange(computeSize);
7382         if ((range.lo <= ctsId) && (ctsId <= range.hi))
7383             break;
7384         computeSize = kmigmgrGetNextComputeSize(NV_TRUE, computeSize);
7385     }
7386 
7387     return computeSize;
7388 }
7389 
7390 /*!
7391  * @brief Returns Compute size of the smallest supported compute profile
7392  */
7393 NvU32
7394 kmigmgrSmallestComputeProfileSize_IMPL
7395 (
7396     OBJGPU *pGpu,
7397     KernelMIGManager *pKernelMIGManager
7398 )
7399 {
7400     NvU32 computeSize = kmigmgrGetNextComputeSize(NV_FALSE, KMIGMGR_COMPUTE_SIZE_INVALID);
7401 
7402     while (computeSize != KMIGMGR_COMPUTE_SIZE_INVALID)
7403     {
7404         NV2080_CTRL_INTERNAL_MIGMGR_COMPUTE_PROFILE unused;
7405         if (kmigmgrGetComputeProfileFromSize(pGpu, pKernelMIGManager, computeSize, &unused) == NV_OK)
7406             break;
7407         computeSize = kmigmgrGetNextComputeSize(NV_FALSE, computeSize);
7408     }
7409 
7410     return computeSize;
7411 }
7412 
7413 /*!
7414  * @brief  Sets/resets various CTS tracking structures in a GPU instance
7415  *         based upon whether bInUse is set
7416  *
7417  * @param[IN]   pKernelMIGGpuInstance
7418  * @param[IN]   ctsId                 CTS ID to be set/reset
7419  * @param[IN]   grId                  Global GR engine targeted for CTS ID
7420  * @param[IN]   bInUse                Flag indicating to set/reset cts tracking structures
7421  *
7422  */
7423 void
7424 kmigmgrSetCTSIdInUse_IMPL
7425 (
7426     KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance,
7427     NvU32 ctsId,
7428     NvU32 grId,
7429     NvBool bInUse
7430 )
7431 {
7432     NV_ASSERT_OR_RETURN_VOID(pKernelMIGGpuInstance != NULL);
7433 
7434     if (bInUse)
7435     {
7436         pKernelMIGGpuInstance->grCtsIdMap[grId] = ctsId;
7437 
7438         // Nothing to set in ctsIdInUseMask if KMIGMGR_CTSID_INVALID passed in
7439         NV_ASSERT_OR_RETURN_VOID(ctsId != KMIGMGR_CTSID_INVALID);
7440 
7441         pKernelMIGGpuInstance->ctsIdsInUseMask |= NVBIT64(ctsId);
7442     }
7443     else
7444     {
7445         //
7446         // Take CTS ID directly from gr mapping array to ensure both structures
7447         // remain in-sync.
7448         //
7449         ctsId = pKernelMIGGpuInstance->grCtsIdMap[grId];
7450 
7451         // Nothing to do if nothing was set
7452         NV_CHECK_OR_RETURN_VOID(LEVEL_WARNING, ctsId != KMIGMGR_CTSID_INVALID);
7453 
7454         pKernelMIGGpuInstance->ctsIdsInUseMask &= ~NVBIT64(ctsId);
7455         pKernelMIGGpuInstance->grCtsIdMap[grId] = KMIGMGR_CTSID_INVALID;
7456     }
7457 }
7458 
7459 /*!
7460  * @brief  Translates a spanStart and computeSize to the corresponding CTS ID.
7461  *         When an invalid compute size is passed in, this function will still
7462  *         return NV_OK, but populates an invalid CTS ID for use.
7463  *
7464  * @param[IN]   pGpu
7465  * @param[IN]   pKernelMIGManager
7466  * @param[IN]   computeSize        Compute size of CTS to get span offset of
7467  * @param[IN]   spanStart          spanStart requested
7468  * @param[OUT]   pCtsId             Output CTS ID in computeSize's range
7469  *
7470  */
7471 NV_STATUS
7472 kmigmgrXlateSpanStartToCTSId_IMPL
7473 (
7474     OBJGPU *pGpu,
7475     KernelMIGManager *pKernelMIGManager,
7476     NvU32 computeSize,
7477     NvU32 spanStart,
7478     NvU32 *pCtsId
7479 )
7480 {
7481     NV_RANGE computeSizeIdRange;
7482     NvU64 computeSizeIdMask;
7483     NvU64 slotBasisMask;
7484     NvU32 slotsPerCTS;
7485 
7486     NV_ASSERT_OR_RETURN(pCtsId != NULL, NV_ERR_INVALID_ARGUMENT);
7487 
7488     //
7489     // Initialize output to invalid CTS ID, as KMIGMGR_COMPUTE_SIZE_INVALID may have been passed in
7490     // which is ok. It Is the callers rsponsibility to check for the CTS ID validitiy.
7491     //
7492     *pCtsId = KMIGMGR_CTSID_INVALID;
7493 
7494     NV_CHECK_OR_RETURN(LEVEL_WARNING, computeSize != KMIGMGR_COMPUTE_SIZE_INVALID, NV_OK);
7495     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, kmigmgrGetSlotBasisMask(pGpu, pKernelMIGManager, &slotBasisMask));
7496 
7497     // Validate that the spanStart does not exceed the basis slot count (which constitutes the acceptable span range)
7498     NV_CHECK_OR_RETURN(LEVEL_ERROR, spanStart < nvPopCount64(slotBasisMask), NV_ERR_INVALID_ARGUMENT);
7499 
7500     computeSizeIdRange = kmigmgrComputeProfileSizeToCTSIdRange(computeSize);
7501 
7502     // Grab the first CTS ID for computeSize, as it doesn't really mater which one we choose here.
7503     NV_ASSERT_OK(kmigmgrGetInvalidCTSIdMask(pGpu, pKernelMIGManager, computeSizeIdRange.lo, &computeSizeIdMask));
7504 
7505     // slots per CTSID is number of basis IDs marked in the invalid mask for this ID
7506     slotsPerCTS = nvPopCount64(computeSizeIdMask & slotBasisMask);
7507 
7508     if ((spanStart % slotsPerCTS) != 0)
7509     {
7510         NV_PRINTF(LEVEL_ERROR, "Compute span start of %d is not aligned\n", spanStart);
7511         return NV_ERR_INVALID_ARGUMENT;
7512     }
7513 
7514     *pCtsId = computeSizeIdRange.lo + (spanStart / slotsPerCTS);
7515 
7516     // The ID returned should be within the computeSize's range at this point
7517     NV_ASSERT((computeSizeIdRange.lo <= *pCtsId) && (*pCtsId <= computeSizeIdRange.hi));
7518 
7519     return NV_OK;
7520 }
7521 
7522 /*!
7523  * @brief  Retrievies the mask of CTS IDs which are used  to derive other properties
7524  *         such as spans, offsets, and capacities.
7525  *
7526  * @param[IN]   pGpu
7527  * @param[IN]   pKernelMIGManager
7528  * @param[OUT]  computeSize        Mask of all CTS IDs part of the profile slot basis
7529  */
7530 NV_STATUS
7531 kmigmgrGetSlotBasisMask_IMPL
7532 (
7533     OBJGPU *pGpu,
7534     KernelMIGManager *pKernelMIGManager,
7535     NvU64 *pMask
7536 )
7537 {
7538     NV_RANGE slotBasisIdRange;
7539     NvU32 slotBasisComputeSize;
7540 
7541     NV_CHECK_OR_RETURN(LEVEL_ERROR, pMask != NULL, NV_ERR_INVALID_ARGUMENT);
7542 
7543     slotBasisComputeSize = kmigmgrSmallestComputeProfileSize(pGpu, pKernelMIGManager);
7544     slotBasisIdRange = kmigmgrComputeProfileSizeToCTSIdRange(slotBasisComputeSize);
7545 
7546     NV_ASSERT_OR_RETURN(!rangeIsEmpty(slotBasisIdRange), NV_ERR_INVALID_STATE);
7547 
7548     *pMask = DRF_SHIFTMASK64(slotBasisIdRange.hi:slotBasisIdRange.lo);
7549 
7550     return NV_OK;
7551 }
7552 
7553 /*!
7554  * @brief  Translates a CTS ID to the corresponding spanStart of the CTS
7555  *
7556  * @param[IN]   pGpu
7557  * @param[IN]   pKernelMIGManager
7558  * @param[IN]   ctsId
7559  *
7560  */
7561 NvU32
7562 kmigmgrGetSpanStartFromCTSId_IMPL
7563 (
7564     OBJGPU *pGpu,
7565     KernelMIGManager *pKernelMIGManager,
7566     NvU32 ctsId
7567 )
7568 {
7569     NvU32 computeSize = kmigmgrGetComputeSizeFromCTSId(ctsId);
7570     NV_RANGE computeSizeIdRange;
7571     NvU64 computeSizeIdMask;
7572     NvU64 slotBasisMask;
7573     NvU32 slotsPerCTS;
7574 
7575     NV_CHECK_OR_RETURN(LEVEL_WARNING, computeSize != KMIGMGR_COMPUTE_SIZE_INVALID, 0);
7576 
7577     computeSizeIdRange = kmigmgrComputeProfileSizeToCTSIdRange(computeSize);
7578 
7579     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, kmigmgrGetSlotBasisMask(pGpu, pKernelMIGManager, &slotBasisMask));
7580 
7581     // Grab the first CTS ID for computeSize, as it doesn't really mater which one we choose here.
7582     NV_ASSERT_OK(kmigmgrGetInvalidCTSIdMask(pGpu, pKernelMIGManager, computeSizeIdRange.lo, &computeSizeIdMask));
7583 
7584     // slots per CTSID is number of basis IDs marked in the invalid mask for this ID
7585     slotsPerCTS = nvPopCount64(computeSizeIdMask & slotBasisMask);
7586 
7587     return (ctsId - computeSizeIdRange.lo) * slotsPerCTS;
7588 }
7589 
7590 /*!
7591  * @brief   Function checking whether the passed-in ctsId is available given the
7592  *          current states of ctsIdValidMask and ctsIdInUseMask
7593  *
7594  * @param[IN]   pGpu
7595  * @param[IN]   pKernelMIGManager
7596  * @param[IN]   ctsIdValidMask  Valid CTS ID mask to compare against
7597  * @param[IN]   ctsIdInUseMask  Mask of CTS IDs which are marked as being used
7598  * @param[IN]   ctsid           CTS ID to check
7599  */
7600 NvBool
7601 kmigmgrIsCTSIdAvailable_IMPL
7602 (
7603     OBJGPU *pGpu,
7604     KernelMIGManager *pKernelMIGManager,
7605     NvU64 ctsIdValidMask,
7606     NvU64 ctsIdInUseMask,
7607     NvU32 ctsId
7608 )
7609 {
7610     NvU64 invalidMask = 0x0;
7611     NvU32 i;
7612 
7613     FOR_EACH_INDEX_IN_MASK(64, i, ctsIdInUseMask)
7614     {
7615         NvU64 mask;
7616 
7617         NV_ASSERT_OK(kmigmgrGetInvalidCTSIdMask(pGpu, pKernelMIGManager, i, &mask));
7618 
7619         invalidMask |= mask;
7620     }
7621     FOR_EACH_INDEX_IN_MASK_END;
7622     return !!((ctsIdValidMask & ~invalidMask) & NVBIT64(ctsId));
7623 }
7624