1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2013-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "core/prelude.h"
25 
26 
27 #include <class/cl0002.h>
28 #include <class/cl0005.h>
29 #include <class/cl003e.h> // NV01_MEMORY_SYSTEM
30 #include <class/cl0040.h> // NV01_MEMORY_LOCAL_USER
31 #include <class/cl0080.h>
32 #include <class/cl503b.h>
33 #include <class/cl50a0.h> // NV50_MEMORY_VIRTUAL
34 #include <class/cl90e6.h>
35 #include <class/cl90f1.h>
36 #include <class/cla06f.h>
37 #include <class/clb069.h>
38 #include <class/clb069sw.h>
39 #include <class/clb06f.h>
40 #include <class/clb0b5.h>
41 #include <class/clb0b5sw.h>
42 #include <class/clb0c0.h>
43 #include <class/clb1c0.h>
44 #include <class/clc06f.h>
45 #include <class/clc076.h>
46 #include <class/clc0b5.h>
47 #include <class/clc0c0.h>
48 #include <class/clc1b5.h>
49 #include <class/clc1c0.h>
50 #include <class/clc361.h>
51 #include <class/clc365.h>
52 #include <class/clc369.h>
53 #include <class/clc36f.h>
54 #include <class/clc3b5.h>
55 #include <class/clc3c0.h>
56 #include <class/clc46f.h>
57 #include <class/clc4c0.h>
58 #include <class/clc56f.h>
59 #include <class/clc572.h> // PHYSICAL_CHANNEL_GPFIFO
60 #include <class/clc574.h> // UVM_CHANNEL_RETAINER
61 #include <class/clc5b5.h>
62 #include <class/clc5c0.h>
63 #include <class/clc637.h>
64 #include <class/clc6b5.h>
65 #include <class/clc6c0.h>
66 #include <class/clc7b5.h>
67 #include <class/clc7c0.h>
68 #include <class/clc661.h> // HOPPER_USERMODE_A
69 #include <class/clc8b5.h> // HOPPER_DMA_COPY_A
70 #include <class/clcbc0.h> // HOPPER_COMPUTE_A
71 
72 #include <ctrl/ctrl0000/ctrl0000gpu.h>
73 #include <ctrl/ctrl0000/ctrl0000system.h>
74 #include <ctrl/ctrl0080/ctrl0080fifo.h>
75 #include <ctrl/ctrl0080/ctrl0080gpu.h>
76 #include <ctrl/ctrl2080/ctrl2080fb.h>
77 #include <ctrl/ctrl2080/ctrl2080fifo.h>
78 #include <ctrl/ctrl2080/ctrl2080gpu.h>
79 #include <ctrl/ctrl2080/ctrl2080gr.h>
80 #include <ctrl/ctrl90e6.h>
81 #include <ctrl/ctrl90f1.h>
82 #include <ctrl/ctrla06f.h>
83 #include <ctrl/ctrlb069.h>
84 #include <ctrl/ctrlc365.h>
85 #include <ctrl/ctrlc369.h>
86 #include <ctrl/ctrlc36f.h>
87 
88 #include <ampere/ga100/dev_runlist.h>
89 #include <containers/queue.h>
90 #include <core/locks.h>
91 #include <gpu/bus/kern_bus.h>
92 #include <gpu/device/device.h>
93 #include <gpu/gpu.h>
94 #include <gpu/mem_mgr/heap.h>
95 #include <gpu/mem_mgr/mem_mgr.h>
96 #include <gpu/mem_mgr/virt_mem_allocator.h>
97 #include <gpu/mem_sys/kern_mem_sys.h>
98 #include <gpu/mmu/kern_gmmu.h>
99 #include <gpu/subdevice/subdevice.h>
100 #include <gpu_mgr/gpu_mgr.h>
101 #include <kepler/gk104/dev_timer.h>
102 #include <kernel/gpu/fifo/kernel_channel.h>
103 #include <kernel/gpu/fifo/kernel_channel_group.h>
104 #include <kernel/gpu/fifo/kernel_channel_group_api.h>
105 #include <kernel/gpu/fifo/kernel_ctxshare.h>
106 #include <kernel/gpu/gr/kernel_graphics.h>
107 #include <kernel/gpu/mig_mgr/gpu_instance_subscription.h>
108 #include <kernel/gpu/mig_mgr/kernel_mig_manager.h>
109 #include <kernel/gpu/nvlink/kernel_nvlink.h>
110 #include <mem_mgr/fabric_vaspace.h>
111 #include <mem_mgr/fla_mem.h>
112 #include <mem_mgr/gpu_vaspace.h>
113 #include <mem_mgr/vaspace.h>
114 #include <mmu/gmmu_fmt.h>
115 #include <nv_uvm_types.h>
116 #include <objrpc.h>
117 #include <os/os.h>
118 #include <resserv/rs_client.h>
119 #include <rmapi/client.h>
120 #include <rmapi/nv_gpu_ops.h>
121 #include <rmapi/rs_utils.h>
122 #include <turing/tu102/dev_vm.h>
123 #include <gpu/mem_mgr/vaspace_api.h>
124 #include <vgpu/rpc.h>
125 
126 #include <pascal/gp100/dev_mmu.h>
127 
128 #define NV_GPU_OPS_NUM_GPFIFO_ENTRIES_DEFAULT 1024
129 #define NV_GPU_SMALL_PAGESIZE (4 * 1024)
130 
131 #define PAGE_SIZE_DEFAULT UVM_PAGE_SIZE_DEFAULT
132 
133 typedef struct
134 {
135     NODE btreeNode;
136     NvU64 address;
137     NvHandle handle;
138     NvU64 size;
139     // childHandle tightly couples a physical allocation with a VA memdesc.
140     // A VA memsdesc is considered as a parent memdesc i.e. childHandle will be non-zero (valid).
141     //    - If childHandle is non-zero,there is a corresponding PA allocation present.
142     //    - If childHandle is zero, this is an invalid state for a VA memdesc.
143     NvHandle childHandle;
144 } gpuMemDesc;
145 
146 typedef struct
147 {
148     NvU64    pageSize;           // default is 4k or 64k else use pagesize = 2M.
149     NvU64    alignment;
150 } gpuVaAllocInfo;
151 
152 typedef struct
153 {
154     NODE btreeNode;
155     NvU64 cpuPointer;
156     NvHandle handle;
157 } cpuMappingDesc;
158 
159 typedef struct
160 {
161     NODE btreeNode;
162     PORT_RWLOCK *btreeLock;
163     NvHandle deviceHandle;
164     PNODE subDevices;
165     NvU32 subDeviceCount;
166     NvU32 arch;
167     NvU32 implementation;
168 } deviceDesc;
169 
170 typedef struct
171 {
172     NODE btreeNode;
173     NvHandle subDeviceHandle;
174     NvU64 refCount;
175     struct
176     {
177         NvHandle                 handle;
178 
179         // Pointer to the SMC partition information. It is used as a flag to
180         // indicate that the SMC information has been initialized.
181         KERNEL_MIG_GPU_INSTANCE *info;
182     } smcPartition;
183     NvU32              eccOffset;
184     NvU32              eccMask;
185     void               *eccReadLocation;
186     NvHandle           eccMasterHandle;
187     NvHandle           eccCallbackHandle;
188     NvBool             bEccInitialized;
189     NvBool             bEccEnabled;
190     NvBool             eccErrorNotifier;
191     NVOS10_EVENT_KERNEL_CALLBACK_EX eccDbeCallback;
192 
193     // The below is used for controlling channel(s) in the GPU.
194     // Example: Volta maps the doorbell work submission register in this
195     // region.
196     NvHandle           clientRegionHandle;
197     volatile void      *clientRegionMapping;
198 } subDeviceDesc;
199 
200 struct gpuSession
201 {
202     NvHandle handle;
203     PNODE devices;
204     PORT_RWLOCK *btreeLock;
205 };
206 
207 
208 MAKE_MAP(MemdescMap, PMEMORY_DESCRIPTOR);
209 
210 struct gpuDevice
211 {
212     deviceDesc         *rmDevice;
213     subDeviceDesc      *rmSubDevice;
214 
215     // same as rmDevice->deviceHandle
216     NvHandle           handle;
217 
218     // same as rmSubDevice->subDeviceHandle
219     NvHandle           subhandle;
220 
221     NvU32              deviceInstance;
222     NvU32              subdeviceInstance;
223     NvU32              gpuId;
224     NvU32              hostClass;
225     NvU32              ceClass;
226     NvU32              sec2Class;
227     NvU32              computeClass;
228     NvU32              faultBufferClass;
229     NvU32              accessCounterBufferClass;
230     NvBool             isTccMode;
231     NvBool             isWddmMode;
232     struct gpuSession  *session;
233     NvU8               gpuUUID[NV_GPU_UUID_LEN];
234     gpuFbInfo          fbInfo;
235     UVM_LINK_TYPE      sysmemLink;
236     NvU32              sysmemLinkRateMBps;
237     NvBool             connectedToSwitch;
238 
239     MemdescMap         kern2PhysDescrMap;
240 
241     PORT_MUTEX         *pPagingChannelRpcMutex;
242 };
243 
244 struct gpuAddressSpace
245 {
246     NvHandle              handle;
247     struct gpuDevice     *device;
248     PNODE                 allocations;
249     PORT_RWLOCK           *allocationsLock;
250     PNODE                 cpuMappings;
251     PORT_RWLOCK           *cpuMappingsLock;
252     PNODE                 physAllocations;
253     PORT_RWLOCK           *physAllocationsLock;
254     NvU64                 vaBase;
255     NvU64                 vaSize;
256     // Dummy BAR1 allocation required on PCIe systems when GPPut resides in
257     // sysmem.
258     struct
259     {
260         NvU64             refCount;
261         NvU64             gpuAddr;
262         volatile void     *cpuAddr;
263     } dummyGpuAlloc;
264 };
265 
266 struct gpuChannel
267 {
268     NvHandle                     channelHandle;
269     NvU32                        hwRunlistId;
270     NvU32                        hwChannelId;
271     UVM_GPU_CHANNEL_ENGINE_TYPE  engineType;
272 
273     // If engineType is CE, engineIndex is a zero-based offset from
274     // RM_ENGINE_TYPE_COPY0. If engineType is GR, engineIndex is a
275     // zero-based offset from NV2080_ENGINE_TYPE_GR0.
276     NvU32                        engineIndex;
277     struct gpuAddressSpace       *vaSpace;
278     NvU64                        gpFifo;
279     NvNotification               *errorNotifier;
280     NvU64                        errorNotifierOffset;
281     NvU64                        *gpFifoEntries;
282     NvU32                        fifoEntries;
283     KeplerAControlGPFifo         *controlPage;
284     struct gpuObject             *nextAttachedEngine;
285     NvHandle                     hFaultCancelSwMethodClass;
286     volatile unsigned            *workSubmissionOffset;
287     NvU32                        workSubmissionToken;
288     volatile NvU32               *pWorkSubmissionToken;
289     NvHandle                     hUserdPhysHandle;
290     NvU64                        userdGpuAddr;
291     UVM_BUFFER_LOCATION          gpFifoLoc;
292     UVM_BUFFER_LOCATION          gpPutLoc;
293     NvBool                       retainedDummyAlloc;
294 };
295 
296 // Add 3 to include local ctx buffer, patch context buffer and PM ctxsw buffer
297 ct_assert(UVM_GPU_CHANNEL_MAX_RESOURCES >= (GR_GLOBALCTX_BUFFER_COUNT + 3));
298 
299 // A retained channel is a user client's channel which has been registered with
300 // the UVM driver.
301 struct gpuRetainedChannel_struct
302 {
303     struct gpuDevice            *device;
304     deviceDesc                  *rmDevice;
305     subDeviceDesc               *rmSubDevice;
306     struct gpuSession           *session;
307     OBJGPU                      *pGpu;
308     MEMORY_DESCRIPTOR           *instanceMemDesc;
309     MEMORY_DESCRIPTOR           *resourceMemDesc[UVM_GPU_CHANNEL_MAX_RESOURCES];
310     UVM_GPU_CHANNEL_ENGINE_TYPE channelEngineType;
311     NvU32                       resourceCount;
312     NvU32                       chId;
313     NvU32                       runlistId;
314     NvU32                       grIdx;
315 
316     // Dup of user's TSG (if one exists) under our RM client
317     NvHandle                    hDupTsg;
318 
319     // Dup to context share object
320     NvHandle                    hDupKernelCtxShare;
321 
322     // Handle for object that retains chId and instance mem
323     NvHandle                    hChannelRetainer;
324 };
325 
326 struct gpuObject
327 {
328     NvHandle         handle;
329     NvU32            type;
330     struct gpuObject *next;
331 };
332 
333 struct allocFlags
334 {
335     NvBool bGetKernelVA;
336     NvBool bfixedAddressAllocate;
337 };
338 
339 struct ChannelAllocInfo
340 {
341     NV_CHANNEL_ALLOC_PARAMS gpFifoAllocParams;
342     gpuAllocInfo gpuAllocInfo;
343 };
344 
345 struct systemP2PCaps
346 {
347     // peerId[i] contains gpu[i]'s peer id of gpu[1 - i]
348     NvU32 peerIds[2];
349 
350     // true if the two GPUs are direct NvLink or PCIe peers
351     NvU32 accessSupported : 1;
352 
353     // true if the two GPUs are indirect (NvLink) peers
354     NvU32 indirectAccessSupported : 1;
355 
356     // true if the two GPUs are direct NvLink peers
357     NvU32 nvlinkSupported : 1;
358 
359     NvU32 atomicSupported : 1;
360 
361     // optimalNvlinkWriteCEs[i] contains the index of the optimal CE to use when
362     // writing from gpu[i] to gpu[1 - i]
363     NvU32 optimalNvlinkWriteCEs[2];
364 };
365 
366 static NV_STATUS findUvmAddressSpace(NvHandle hClient, NvU32 gpuInstance, NvHandle *pHandle, OBJVASPACE **ppVaspace);
367 static NV_STATUS nvGpuOpsGpuMalloc(struct gpuAddressSpace *vaSpace,
368                                    NvBool isSystemMemory,
369                                    NvLength length,
370                                    NvU64 *gpuOffset,
371                                    struct allocFlags flags,
372                                    gpuAllocInfo *allocInfo);
373 static NV_STATUS trackDescriptor(PNODE *pRoot, NvU64 key, void *desc);
374 static NV_STATUS findDescriptor(PNODE pRoot, NvU64 key, void **desc);
375 static NV_STATUS deleteDescriptor(PNODE *pRoot, NvU64 key, void **desc);
376 static NV_STATUS destroyAllGpuMemDescriptors(NvHandle hClient, PNODE pNode);
377 static NV_STATUS getHandleForVirtualAddr(struct gpuAddressSpace *vaSpace,
378                                          NvU64 allocationVa,
379                                          NvBool bPhysical,
380                                          NvHandle *pHandle);
381 static NV_STATUS findDeviceClasses(NvHandle hRoot,
382                                    NvHandle hDevice,
383                                    NvHandle hSubdevice,
384                                    NvU32 *hostClass,
385                                    NvU32 *ceClass,
386                                    NvU32 *computeClass,
387                                    NvU32 *faultBufferClass,
388                                    NvU32 *accessCounterBufferClass,
389                                    NvU32 *sec2Class);
390 static NV_STATUS queryCopyEngines(struct gpuDevice *gpu, gpuCesCaps *cesCaps);
391 static void nvGpuOpsFreeVirtual(struct gpuAddressSpace *vaSpace,
392                                 NvU64 vaOffset);
393 static NvBool isDeviceVoltaPlus(const struct gpuDevice *device);
394 static NvBool isDeviceTuringPlus(const struct gpuDevice *device);
395 static NV_STATUS gpuDeviceMapUsermodeRegion(struct gpuDevice *device);
396 static void gpuDeviceDestroyUsermodeRegion(struct gpuDevice *device);
397 static void gpuDeviceUnmapCpuFreeHandle(struct gpuDevice *device,
398                                         NvHandle handle,
399                                         void *ptr,
400                                         NvU32 flags);
401 static NV_STATUS allocNvlinkStatusForSubdevice(struct gpuDevice *device,
402                                                NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS **nvlinkStatusOut);
403 static NvU32 getNvlinkConnectionToNpu(const NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus,
404                                       NvBool *atomicSupported,
405                                       NvU32 *linkBandwidthMBps);
406 static NvU32 getNvlinkConnectionToSwitch(const NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus,
407                                          NvU32 *linkBandwidthMBps);
408 static NV_STATUS nvGpuOpsGetMemoryByHandle(NvHandle hClient, NvHandle hMemory, Memory **ppMemory);
409 static void _nvGpuOpsReleaseChannel(gpuRetainedChannel *retainedChannel);
410 static NV_STATUS _nvGpuOpsRetainChannelResources(struct gpuDevice *device,
411                                                  NvHandle hClient,
412                                                  NvHandle hKernelChannel,
413                                                  gpuRetainedChannel *retainedChannel,
414                                                  gpuChannelInstanceInfo *channelInstanceInfo);
415 static void _nvGpuOpsReleaseChannelResources(gpuRetainedChannel *retainedChannel);
416 
417 /*
418  * This function will lock the RM API lock according to rmApiLockFlags, and then
419  * examine numLocksNeeded. If this is 0, no GPU locks will be acquired. If it
420  * is 1, the GPU lock for deviceInstance1 will be locked. If it is 2, both GPU
421  * locks for deviceInstance1 and deviceInstance2 will be locked. If it is any
422  * other number, all the GPU locks will be acquired.
423  *
424  * This function will attempt to grab the needed GPU locks, and will write the
425  * resulting mask into *lockedGpusMask. In the event of a failure to acquire any
426  * needed GPU locks, the written mask is 0 and the function returns
427  * NV_ERR_INVALID_LOCK_STATE. In this case, all locks held are released and the
428  * caller does not need to release any locks.
429  */
430 
431 typedef struct nvGpuOpsLockSet
432 {
433     NvBool isRmLockAcquired;
434     NvBool isRmSemaAcquired;
435     GPU_MASK gpuMask;
436     RsClient *pClientLocked;
437 } nvGpuOpsLockSet;
438 
439 static void _nvGpuOpsLocksRelease(nvGpuOpsLockSet *acquiredLocks)
440 {
441     OBJSYS *pSys;
442     pSys = SYS_GET_INSTANCE();
443 
444     if (acquiredLocks->gpuMask != 0)
445     {
446         rmGpuGroupLockRelease(acquiredLocks->gpuMask, GPUS_LOCK_FLAGS_NONE);
447         acquiredLocks->gpuMask = 0;
448     }
449 
450     if (acquiredLocks->pClientLocked != NULL)
451     {
452         serverReleaseClient(&g_resServ, LOCK_ACCESS_WRITE, acquiredLocks->pClientLocked);
453         acquiredLocks->pClientLocked = NULL;
454     }
455 
456     if (acquiredLocks->isRmLockAcquired == NV_TRUE)
457     {
458         rmapiLockRelease();
459         acquiredLocks->isRmLockAcquired = NV_FALSE;
460     }
461 
462     if (acquiredLocks->isRmSemaAcquired == NV_TRUE)
463     {
464         osReleaseRmSema(pSys->pSema, NULL);
465         acquiredLocks->isRmSemaAcquired = NV_FALSE;
466     }
467 }
468 
469 static NV_STATUS _nvGpuOpsLocksAcquire(NvU32 rmApiLockFlags,
470                                        NvHandle hClient,
471                                        RsClient **ppClient,
472                                        NvU32 numLocksNeeded,
473                                        NvU32 deviceInstance1,
474                                        NvU32 deviceInstance2,
475                                        nvGpuOpsLockSet *acquiredLocks)
476 {
477     NV_STATUS status;
478     OBJSYS  *pSys;
479     GPU_MASK gpuMaskRequested;
480     GPU_MASK gpuMaskAcquired;
481 
482     acquiredLocks->isRmSemaAcquired = NV_FALSE;
483     acquiredLocks->isRmLockAcquired = NV_FALSE;
484     acquiredLocks->gpuMask = 0;
485     acquiredLocks->pClientLocked = NULL;
486 
487     pSys = SYS_GET_INSTANCE();
488     if (pSys == NULL)
489     {
490         return NV_ERR_GENERIC;
491     }
492 
493     status = osAcquireRmSema(pSys->pSema);
494     if (status != NV_OK)
495     {
496         return status;
497     }
498     acquiredLocks->isRmSemaAcquired = NV_TRUE;
499 
500     status = rmapiLockAcquire(rmApiLockFlags, RM_LOCK_MODULES_GPU_OPS);
501     if (status != NV_OK)
502     {
503         _nvGpuOpsLocksRelease(acquiredLocks);
504         return status;
505     }
506     acquiredLocks->isRmLockAcquired = NV_TRUE;
507 
508     if (hClient != NV01_NULL_OBJECT)
509     {
510         status = serverAcquireClient(&g_resServ, hClient, LOCK_ACCESS_WRITE, &acquiredLocks->pClientLocked);
511 
512         if (status != NV_OK)
513         {
514             _nvGpuOpsLocksRelease(acquiredLocks);
515             return status;
516         }
517 
518         if (ppClient != NULL)
519             *ppClient = acquiredLocks->pClientLocked;
520     }
521 
522     //
523     // Determine the GPU lock mask we need. If we are asked for 0, 1, or 2 locks
524     // then we should use neither, just the first, or both deviceInstance
525     // parameters, respectively. If any other number of locks is requested, we
526     // acquire all of the lockable GPUS.
527     //
528     // We cannot simply determine the mask outside of this function and pass in
529     // the mask, because gpumgrGetDeviceGpuMask requires that we hold the RM API
530     // lock. Otherwise, SLI rewiring could preempt lock acquisition and render
531     // the mask invalid.
532     //
533     gpuMaskRequested = 0;
534 
535     if (numLocksNeeded > 2)
536     {
537         gpuMaskRequested = GPUS_LOCK_ALL;
538     }
539     else
540     {
541         if (numLocksNeeded > 0)
542         {
543             gpuMaskRequested |= gpumgrGetDeviceGpuMask(deviceInstance1);
544         }
545 
546         if (numLocksNeeded > 1)
547         {
548             gpuMaskRequested |= gpumgrGetDeviceGpuMask(deviceInstance2);
549         }
550     }
551 
552     //
553     // The gpuMask parameter to rmGpuGroupLockAcquire is both input and output,
554     // so we have to copy in what we want here to make comparisons later.
555     //
556     gpuMaskAcquired = gpuMaskRequested;
557     if (gpuMaskRequested != 0)
558     {
559         status = rmGpuGroupLockAcquire(0, GPU_LOCK_GRP_MASK,
560                                        GPUS_LOCK_FLAGS_NONE,
561                                        RM_LOCK_MODULES_GPU_OPS, &gpuMaskAcquired);
562     }
563     acquiredLocks->gpuMask = gpuMaskAcquired;
564 
565     //
566     // If we cannot acquire all the locks requested, we release all the locks
567     // we *were* able to get and bail out here. There is never a safe way to
568     // proceed with a GPU ops function with fewer locks than requested. If there
569     // was a safe way to proceed, the client should have asked for fewer locks
570     // in the first place.
571     //
572     // That said, callers sometimes want "all available GPUs", and then the call
573     // to rmGpuGroupLockAcquire will mask off invalid GPUs for us. Hence the
574     // exception for GPUS_LOCK_ALL.
575     //
576     if (gpuMaskAcquired != gpuMaskRequested && gpuMaskRequested != GPUS_LOCK_ALL)
577     {
578         status = NV_ERR_INVALID_LOCK_STATE;
579     }
580 
581     if (status != NV_OK)
582     {
583         _nvGpuOpsLocksRelease(acquiredLocks);
584     }
585 
586     return status;
587 }
588 
589 static NV_STATUS _nvGpuOpsLocksAcquireAll(NvU32 rmApiLockFlags,
590                                           NvHandle hClient, RsClient **ppClient,
591                                           nvGpuOpsLockSet *acquiredLocks)
592 {
593     return _nvGpuOpsLocksAcquire(rmApiLockFlags, hClient, ppClient, 3, 0, 0, acquiredLocks);
594 }
595 
596 static NV_STATUS nvGpuOpsCreateClient(RM_API *pRmApi, NvHandle *hClient)
597 {
598     NV_STATUS status;
599     RS_SHARE_POLICY sharePolicy;
600 
601     *hClient = NV01_NULL_OBJECT;
602     status = pRmApi->Alloc(pRmApi, NV01_NULL_OBJECT, NV01_NULL_OBJECT,
603                            hClient, NV01_ROOT, hClient);
604     if (status != NV_OK)
605     {
606         return status;
607     }
608 
609     // Override default system share policy. Prohibit sharing of any and all
610     // objects owned by this client.
611     portMemSet(&sharePolicy, 0, sizeof(sharePolicy));
612     sharePolicy.type = RS_SHARE_TYPE_ALL;
613     sharePolicy.action = RS_SHARE_ACTION_FLAG_REVOKE;
614     RS_ACCESS_MASK_ADD(&sharePolicy.accessMask, RS_ACCESS_DUP_OBJECT);
615 
616     status = pRmApi->Share(pRmApi, *hClient, *hClient, &sharePolicy);
617     if (status != NV_OK)
618     {
619         pRmApi->Free(pRmApi, *hClient, *hClient);
620     }
621 
622     return status;
623 }
624 
625 NV_STATUS nvGpuOpsCreateSession(struct gpuSession **session)
626 {
627     struct gpuSession *gpuSession = NULL;
628     NV_STATUS status;
629     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
630     PORT_MEM_ALLOCATOR *pAlloc = portMemAllocatorGetGlobalNonPaged();
631 
632     gpuSession = portMemAllocNonPaged(sizeof(*gpuSession));
633     if (gpuSession == NULL)
634         return NV_ERR_NO_MEMORY;
635 
636     portMemSet(gpuSession, 0, sizeof(*gpuSession));
637 
638     status = nvGpuOpsCreateClient(pRmApi, &gpuSession->handle);
639     if (status != NV_OK)
640     {
641         portMemFree(gpuSession);
642         return status;
643     }
644 
645     gpuSession->devices = NULL;
646     gpuSession->btreeLock = portSyncRwLockCreate(pAlloc);
647     *session = (gpuSession);
648     return status;
649 }
650 
651 NV_STATUS nvGpuOpsDestroySession(struct gpuSession *session)
652 {
653     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
654 
655     if (!session)
656         return NV_OK;
657 
658     // Sanity Check: There should not be any attached devices with the session!
659     NV_ASSERT(!session->devices);
660 
661     // freeing session will free everything under it
662     pRmApi->Free(pRmApi, session->handle, session->handle);
663     portSyncRwLockDestroy(session->btreeLock);
664     portMemFree(session);
665     return NV_OK;
666 }
667 
668 static void *gpuBar0BaseAddress(OBJGPU *pGpu)
669 {
670     DEVICE_MAPPING *pMapping = gpuGetDeviceMapping(pGpu, DEVICE_INDEX_GPU, 0);
671 
672     NV_ASSERT(pMapping);
673 
674     return pMapping->gpuNvAddr;
675 }
676 
677 static void eccErrorCallback(void *pArg, void *pData, NvHandle hEvent,
678                              NvU32 data, NvU32 status)
679 {
680     subDeviceDesc *rmSubDevice = (subDeviceDesc *)pArg;
681 
682     NV_ASSERT(rmSubDevice);
683 
684     rmSubDevice->eccErrorNotifier = NV_TRUE;
685 }
686 
687 static NvBool deviceNeedsDummyAlloc(struct gpuDevice *device)
688 {
689     // The dummy mapping is needed so the client can issue a read to flush out
690     // any CPU BAR1 PCIE writes prior to updating GPPUT. This is only needed
691     // when the bus is non-coherent and when not in ZeroFB (where there can't be
692     // any BAR1 mappings).
693     return device->sysmemLink < UVM_LINK_TYPE_NVLINK_2 && !device->fbInfo.bZeroFb;
694 }
695 
696 static NV_STATUS nvGpuOpsVaSpaceRetainDummyAlloc(struct gpuAddressSpace *vaSpace)
697 {
698     struct gpuDevice *device;
699     NV_STATUS status = NV_OK;
700     gpuAllocInfo allocInfo = {0};
701     struct allocFlags flags = {0};
702 
703     device = vaSpace->device;
704     NV_ASSERT(device);
705     NV_ASSERT(deviceNeedsDummyAlloc(device));
706 
707     if (vaSpace->dummyGpuAlloc.refCount > 0)
708         goto done;
709 
710     flags.bGetKernelVA = NV_FALSE;
711     status = nvGpuOpsGpuMalloc(vaSpace,
712                                NV_FALSE,
713                                NV_GPU_SMALL_PAGESIZE,
714                                &vaSpace->dummyGpuAlloc.gpuAddr,
715                                flags,
716                                &allocInfo);
717     if (status != NV_OK)
718         return status;
719 
720     status = nvGpuOpsMemoryCpuMap(vaSpace,
721                                   vaSpace->dummyGpuAlloc.gpuAddr,
722                                   NV_GPU_SMALL_PAGESIZE,
723                                   (void **)&vaSpace->dummyGpuAlloc.cpuAddr,
724                                   PAGE_SIZE_DEFAULT);
725     if (status != NV_OK)
726         nvGpuOpsMemoryFree(vaSpace, vaSpace->dummyGpuAlloc.gpuAddr);
727 
728 done:
729     if (status == NV_OK)
730     {
731         ++vaSpace->dummyGpuAlloc.refCount;
732         NV_ASSERT(vaSpace->dummyGpuAlloc.gpuAddr);
733         NV_ASSERT(vaSpace->dummyGpuAlloc.cpuAddr);
734     }
735 
736     return status;
737 }
738 
739 static void nvGpuOpsVaSpaceReleaseDummyAlloc(struct gpuAddressSpace *vaSpace)
740 {
741     NV_ASSERT(deviceNeedsDummyAlloc(vaSpace->device));
742     NV_ASSERT(vaSpace->dummyGpuAlloc.refCount != 0);
743 
744     if (--vaSpace->dummyGpuAlloc.refCount > 0)
745         return;
746 
747     if (vaSpace->dummyGpuAlloc.cpuAddr)
748         nvGpuOpsMemoryCpuUnMap(vaSpace, (void *)vaSpace->dummyGpuAlloc.cpuAddr);
749 
750     if (vaSpace->dummyGpuAlloc.gpuAddr)
751         nvGpuOpsMemoryFree(vaSpace, vaSpace->dummyGpuAlloc.gpuAddr);
752 
753     vaSpace->dummyGpuAlloc.cpuAddr = NULL;
754     vaSpace->dummyGpuAlloc.gpuAddr = 0;
755 }
756 
757 static NV_STATUS nvGpuOpsDisableVaSpaceChannels(struct gpuAddressSpace *vaSpace)
758 {
759     NV_STATUS   status = NV_OK;
760     OBJVASPACE *pVAS = NULL;
761     Device     *pDevice;
762     RsClient   *pClient;
763     RS_ORDERED_ITERATOR it;
764     RM_API     *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
765     NV2080_CTRL_FIFO_DISABLE_CHANNELS_PARAMS disableParams = {0};
766 
767     if (vaSpace == NULL)
768         return NV_ERR_INVALID_ARGUMENT;
769 
770     status = serverGetClientUnderLock(&g_resServ, vaSpace->device->session->handle, &pClient);
771     if (status != NV_OK)
772         return status;
773 
774     status = deviceGetByHandle(pClient, vaSpace->device->handle, &pDevice);
775     if (status != NV_OK)
776         return status;
777 
778     GPU_RES_SET_THREAD_BC_STATE(pDevice);
779 
780     status = vaspaceGetByHandleOrDeviceDefault(pClient,
781                                                vaSpace->device->handle,
782                                                vaSpace->handle,
783                                                &pVAS);
784     if ((status != NV_OK) || (pVAS == NULL))
785         return NV_ERR_INVALID_ARGUMENT;
786 
787     // Stop all channels under the VAS, but leave them bound.
788     it = kchannelGetIter(pClient, RES_GET_REF(pDevice));
789     while (clientRefOrderedIterNext(pClient, &it))
790     {
791         KernelChannel *pKernelChannel = dynamicCast(it.pResourceRef->pResource, KernelChannel);
792 
793         NV_ASSERT_OR_ELSE(pKernelChannel != NULL, continue);
794         if (pKernelChannel->pVAS != pVAS)
795             continue;
796 
797         NV_ASSERT_OR_RETURN(disableParams.numChannels < NV2080_CTRL_FIFO_DISABLE_CHANNELS_MAX_ENTRIES, NV_ERR_NOT_SUPPORTED);
798         disableParams.hClientList[disableParams.numChannels] = RES_GET_CLIENT_HANDLE(pKernelChannel);
799         disableParams.hChannelList[disableParams.numChannels] = RES_GET_HANDLE(pKernelChannel);
800         disableParams.numChannels++;
801     }
802 
803     if (disableParams.numChannels == 0)
804         return status;
805 
806     disableParams.bDisable = NV2080_CTRL_FIFO_DISABLE_CHANNEL_TRUE;
807     status = pRmApi->Control(pRmApi,
808                              vaSpace->device->session->handle,
809                              vaSpace->device->subhandle,
810                              NV2080_CTRL_CMD_FIFO_DISABLE_CHANNELS,
811                              &disableParams,
812                              sizeof(disableParams));
813     return status;
814 }
815 
816 static NV_STATUS nvGpuOpsEnableVaSpaceChannels(struct gpuAddressSpace *vaSpace)
817 {
818     NV_STATUS    status = NV_OK;
819     OBJVASPACE  *pVAS = NULL;
820     Device      *pDevice;
821     RsClient    *pClient;
822     RS_ORDERED_ITERATOR it;
823     NV2080_CTRL_FIFO_DISABLE_CHANNELS_PARAMS disableParams = {0};
824     RM_API      *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
825 
826     if (vaSpace == NULL)
827         return NV_ERR_INVALID_ARGUMENT;
828 
829     status = serverGetClientUnderLock(&g_resServ, vaSpace->device->session->handle, &pClient);
830     if (status != NV_OK)
831         return status;
832 
833     status = deviceGetByHandle(pClient, vaSpace->device->handle, &pDevice);
834     if (status != NV_OK)
835         return status;
836 
837     GPU_RES_SET_THREAD_BC_STATE(pDevice);
838 
839     status = vaspaceGetByHandleOrDeviceDefault(pClient,
840                                                vaSpace->device->handle,
841                                                vaSpace->handle,
842                                                &pVAS);
843     if ((status != NV_OK) || (pVAS == NULL))
844         return NV_ERR_INVALID_ARGUMENT;
845 
846     it = kchannelGetIter(pClient, RES_GET_REF(pDevice));
847     while (clientRefOrderedIterNext(pClient, &it))
848     {
849         KernelChannel *pKernelChannel = dynamicCast(it.pResourceRef->pResource, KernelChannel);
850 
851         NV_ASSERT_OR_ELSE(pKernelChannel != NULL, continue);
852         if (pKernelChannel->pVAS != pVAS)
853             continue;
854 
855         NV_ASSERT_OR_RETURN(disableParams.numChannels < NV2080_CTRL_FIFO_DISABLE_CHANNELS_MAX_ENTRIES, NV_ERR_NOT_SUPPORTED);
856         disableParams.hClientList[disableParams.numChannels] = RES_GET_CLIENT_HANDLE(pKernelChannel);
857         disableParams.hChannelList[disableParams.numChannels] = RES_GET_HANDLE(pKernelChannel);
858         disableParams.numChannels++;
859     }
860 
861     if (disableParams.numChannels == 0)
862         return status;
863 
864     disableParams.bDisable = NV2080_CTRL_FIFO_DISABLE_CHANNEL_FALSE;
865     status = pRmApi->Control(pRmApi,
866                              vaSpace->device->session->handle,
867                              vaSpace->device->subhandle,
868                              NV2080_CTRL_CMD_FIFO_DISABLE_CHANNELS,
869                              &disableParams,
870                              sizeof(disableParams));
871     return status;
872 }
873 
874 static NV_STATUS nvGpuOpsRmDeviceCreate(struct gpuDevice *device)
875 {
876     NV_STATUS status;
877     NV0080_ALLOC_PARAMETERS nv0080AllocParams = { 0 };
878     deviceDesc *rmDevice = NULL;
879     struct gpuSession *session = device->session;
880     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
881     PORT_MEM_ALLOCATOR *pAlloc = portMemAllocatorGetGlobalNonPaged();
882     OBJGPU *pGpu;
883 
884     // Find the existing rmDevice.
885     // Otherwise, allocate an rmDevice.
886     portSyncRwLockAcquireRead(session->btreeLock);
887     status = findDescriptor(session->devices, device->deviceInstance, (void**)&rmDevice);
888     portSyncRwLockReleaseRead(session->btreeLock);
889     if (status == NV_OK)
890     {
891         NV_ASSERT(rmDevice);
892         device->rmDevice = rmDevice;
893         device->handle = rmDevice->deviceHandle;
894         return NV_OK;
895     }
896 
897     rmDevice = portMemAllocNonPaged(sizeof(*rmDevice));
898     if (rmDevice == NULL)
899         return NV_ERR_INSUFFICIENT_RESOURCES;
900 
901     portMemSet(rmDevice, 0, sizeof(*rmDevice));
902 
903     nv0080AllocParams.deviceId = device->deviceInstance;
904     nv0080AllocParams.hClientShare = session->handle;
905     device->handle = NV01_NULL_OBJECT;
906     status =  pRmApi->Alloc(pRmApi,
907                             session->handle,
908                             session->handle,
909                             &device->handle,
910                             NV01_DEVICE_0,
911                             &nv0080AllocParams);
912     if (status != NV_OK)
913         goto cleanup_device_desc;
914 
915     device->rmDevice = rmDevice;
916     rmDevice->deviceHandle = device->handle;
917     rmDevice->subDevices = NULL;
918     rmDevice->subDeviceCount = 0;
919 
920     portSyncRwLockAcquireWrite(session->btreeLock);
921     status = trackDescriptor(&session->devices, device->deviceInstance, rmDevice);
922     portSyncRwLockReleaseWrite(session->btreeLock);
923     if (status != NV_OK)
924         goto cleanup_device;
925 
926     // TODO: Acquired because CliGetGpuContext expects RMAPI lock. Necessary?
927     status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS);
928     if (status != NV_OK)
929         goto cleanup_device;
930     status = CliSetGpuContext(session->handle, device->handle, &pGpu, NULL);
931     rmapiLockRelease();
932     if (status != NV_OK)
933         goto cleanup_device;
934 
935     rmDevice->arch = gpuGetChipArch(pGpu);
936     rmDevice->implementation = gpuGetChipImpl(pGpu);
937     rmDevice->btreeLock = portSyncRwLockCreate(pAlloc);
938 
939     return NV_OK;
940 
941 cleanup_device:
942     pRmApi->Free(pRmApi, session->handle, device->handle);
943 cleanup_device_desc:
944     portMemFree(rmDevice);
945     return status;
946 }
947 
948 static void nvGpuOpsRmDeviceDestroy(struct gpuDevice *device)
949 {
950     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
951     deviceDesc *rmDevice = device->rmDevice;
952 
953     NV_ASSERT(rmDevice != NULL);
954 
955     if (rmDevice->subDeviceCount == 0)
956     {
957         struct gpuSession *session = device->session;
958         portSyncRwLockAcquireWrite(session->btreeLock);
959         deleteDescriptor(&session->devices, device->deviceInstance, (void**)&rmDevice);
960         pRmApi->Free(pRmApi, session->handle, rmDevice->deviceHandle);
961         portSyncRwLockDestroy(rmDevice->btreeLock);
962         portMemFree(rmDevice);
963         portSyncRwLockReleaseWrite(session->btreeLock);
964     }
965 }
966 
967 static void gpuDeviceRmSubDeviceDeinitEcc(struct gpuDevice *device)
968 {
969     NV2080_CTRL_EVENT_SET_NOTIFICATION_PARAMS eventDbeParams = {0};
970     subDeviceDesc *rmSubDevice = device->rmSubDevice;
971     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
972 
973     if (!rmSubDevice->bEccInitialized || !rmSubDevice->bEccEnabled)
974         return;
975 
976     // Disable all notifications specific to ECC on this device
977     eventDbeParams.event = NV2080_NOTIFIERS_ECC_DBE;
978     eventDbeParams.action = NV2080_CTRL_EVENT_SET_NOTIFICATION_ACTION_DISABLE;
979 
980     pRmApi->Control(pRmApi,
981                     device->session->handle,
982                     device->subhandle,
983                     NV2080_CTRL_CMD_EVENT_SET_NOTIFICATION,
984                     (void *)&eventDbeParams,
985                     sizeof(eventDbeParams));
986 
987     if (!isDeviceTuringPlus(device))
988     {
989         gpuDeviceUnmapCpuFreeHandle(device,
990                                     rmSubDevice->eccMasterHandle,
991                                     rmSubDevice->eccReadLocation,
992                                     DRF_DEF(OS33, _FLAGS, _ACCESS, _READ_ONLY));
993     }
994 
995     rmSubDevice->eccReadLocation = NULL;
996 
997     if (rmSubDevice->eccCallbackHandle)
998         pRmApi->Free(pRmApi, device->session->handle, rmSubDevice->eccCallbackHandle);
999 
1000     rmSubDevice->bEccEnabled = NV_FALSE;
1001     rmSubDevice->bEccInitialized = NV_FALSE;
1002 }
1003 
1004 //
1005 // Initialize the ECC state for an RM subdevice
1006 //
1007 // This can only be done once per RM subdevice as GF100_SUBDEVICE_MASTER can
1008 // only be allocated once.
1009 //
1010 static NV_STATUS gpuDeviceRmSubDeviceInitEcc(struct gpuDevice *device)
1011 {
1012     NV_STATUS status = NV_OK;
1013     NvU32 i = 0;
1014     int tempPtr = 0;
1015 
1016     struct
1017     {
1018         NV2080_CTRL_GPU_QUERY_ECC_STATUS_PARAMS eccStatus;
1019         NV90E6_CTRL_MASTER_GET_ECC_INTR_OFFSET_MASK_PARAMS eccMask;
1020         NV90E6_CTRL_MASTER_GET_VIRTUAL_FUNCTION_ERROR_CONT_INTR_MASK_PARAMS errContIntrMask;
1021         NV2080_CTRL_EVENT_SET_NOTIFICATION_PARAMS eventDbe;
1022         NV0005_ALLOC_PARAMETERS allocDbe;
1023     } *pParams = NULL;
1024     OBJGPU *pGpu = NULL;
1025     NvBool supportedOnAnyUnits = NV_FALSE;
1026     subDeviceDesc *rmSubDevice = device->rmSubDevice;
1027     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
1028 
1029     NV_ASSERT(device);
1030 
1031     // TODO: Acquired because CliGetGpuContext expects RMAPI lock. Necessary?
1032     status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS);
1033     if (status != NV_OK)
1034         return status;
1035     status = CliSetGpuContext(device->session->handle, device->handle, &pGpu, NULL);
1036     rmapiLockRelease();
1037     if (status != NV_OK)
1038         return status;
1039 
1040     rmSubDevice->eccOffset = 0;
1041     rmSubDevice->eccMask   = 0;
1042     rmSubDevice->eccReadLocation = NULL;
1043     rmSubDevice->eccMasterHandle = 0;
1044     rmSubDevice->bEccInitialized = NV_FALSE;
1045     rmSubDevice->bEccEnabled = NV_FALSE;
1046 
1047     // Do not initialize ECC for this device if SMC is enabled, but no partition
1048     // was subscribed to.  This will be the case for select devices created
1049     // on behalf of the UVM driver.
1050     if (IS_MIG_IN_USE(pGpu) && rmSubDevice->smcPartition.info == NULL)
1051         return NV_OK;
1052 
1053     pParams = portMemAllocNonPaged(sizeof(*pParams));
1054     if (pParams == NULL)
1055     {
1056         return NV_ERR_NO_MEMORY;
1057     }
1058 
1059     portMemSet(pParams, 0, sizeof(*pParams));
1060 
1061     // Check ECC before doing anything here
1062     status = pRmApi->Control(pRmApi,
1063                              device->session->handle,
1064                              device->subhandle,
1065                              NV2080_CTRL_CMD_GPU_QUERY_ECC_STATUS,
1066                              &pParams->eccStatus,
1067                              sizeof(pParams->eccStatus));
1068 
1069     if (status == NV_ERR_NOT_SUPPORTED)
1070     {
1071         // Nothing to do if ECC not supported
1072         rmSubDevice->bEccEnabled = NV_FALSE;
1073         status = NV_OK;
1074         goto done;
1075     }
1076     else if (status != NV_OK)
1077     {
1078         goto done;
1079     }
1080 
1081     //
1082     // ECC is considered as supported only if it's enabled for all supported units,
1083     // and there's at least 1 supported unit
1084     //
1085     rmSubDevice->bEccEnabled = NV_TRUE;
1086 
1087     for (i = 0; i < NV2080_CTRL_GPU_ECC_UNIT_COUNT; i++)
1088     {
1089         // Check the ECC status only on the units supported by HW
1090         if (pParams->eccStatus.units[i].supported)
1091         {
1092             supportedOnAnyUnits = NV_TRUE;
1093             if (!pParams->eccStatus.units[i].enabled)
1094                 rmSubDevice->bEccEnabled = NV_FALSE;
1095         }
1096     }
1097 
1098     if (!supportedOnAnyUnits)
1099         rmSubDevice->bEccEnabled = NV_FALSE;
1100 
1101     if (!rmSubDevice->bEccEnabled)
1102     {
1103         // ECC not enabled, early-out
1104         status = NV_OK;
1105         goto done;
1106     }
1107 
1108     //Allocate memory for interrupt tree
1109     rmSubDevice->eccMasterHandle = NV01_NULL_OBJECT;
1110     status = pRmApi->Alloc(pRmApi, device->session->handle,
1111                            device->subhandle,
1112                            &rmSubDevice->eccMasterHandle,
1113                            GF100_SUBDEVICE_MASTER,
1114                            &tempPtr);
1115     if (status != NV_OK)
1116         goto done;
1117 
1118     if (isDeviceTuringPlus(device))
1119     {
1120         rmSubDevice->eccReadLocation = gpuBar0BaseAddress(pGpu);
1121         status = pRmApi->Control(pRmApi,
1122                                  device->session->handle,
1123                                  rmSubDevice->eccMasterHandle,
1124                                  NV90E6_CTRL_CMD_MASTER_GET_VIRTUAL_FUNCTION_ERROR_CONT_INTR_MASK,
1125                                  &pParams->errContIntrMask,
1126                                  sizeof(pParams->errContIntrMask));
1127         if (status != NV_OK)
1128             goto done;
1129 
1130         rmSubDevice->eccOffset = GPU_GET_VREG_OFFSET(pGpu, NV_VIRTUAL_FUNCTION_ERR_CONT);
1131         rmSubDevice->eccMask = pParams->errContIntrMask.eccMask;
1132     }
1133     else
1134     {
1135         // Map memory
1136         status = pRmApi->MapToCpu(pRmApi,
1137                                   device->session->handle,
1138                                   device->subhandle,
1139                                   rmSubDevice->eccMasterHandle, 0,
1140                                   sizeof(GF100MASTERMap),
1141                                   (void **)(&rmSubDevice->eccReadLocation),
1142                                   DRF_DEF(OS33, _FLAGS, _ACCESS, _READ_ONLY));
1143         if (status != NV_OK)
1144             goto done;
1145 
1146         NV_ASSERT(rmSubDevice->eccReadLocation);
1147 
1148         status = pRmApi->Control(pRmApi,
1149                                  device->session->handle,
1150                                  rmSubDevice->eccMasterHandle,
1151                                  NV90E6_CTRL_CMD_MASTER_GET_ECC_INTR_OFFSET_MASK,
1152                                  &pParams->eccMask,
1153                                  sizeof(pParams->eccMask));
1154         if (status != NV_OK)
1155             goto done;
1156 
1157         // Fill the mask and offset which has been read from control call
1158         rmSubDevice->eccOffset = pParams->eccMask.offset;
1159         rmSubDevice->eccMask   = pParams->eccMask.mask;
1160     }
1161 
1162     // Setup callback for ECC DBE
1163     rmSubDevice->eccDbeCallback.func = eccErrorCallback;
1164     rmSubDevice->eccDbeCallback.arg = rmSubDevice;
1165 
1166     pParams->allocDbe.hParentClient = device->session->handle;
1167     pParams->allocDbe.hClass = NV01_EVENT_KERNEL_CALLBACK_EX;
1168     pParams->allocDbe.notifyIndex = NV2080_NOTIFIERS_ECC_DBE;
1169     pParams->allocDbe.data = NV_PTR_TO_NvP64(&rmSubDevice->eccDbeCallback);
1170 
1171     rmSubDevice->eccCallbackHandle = NV01_NULL_OBJECT;
1172     status = pRmApi->Alloc(pRmApi, device->session->handle,
1173                            device->subhandle,
1174                            &rmSubDevice->eccCallbackHandle,
1175                            NV01_EVENT_KERNEL_CALLBACK_EX,
1176                            &pParams->allocDbe);
1177 
1178     if (status != NV_OK)
1179         goto done;
1180 
1181     pParams->eventDbe.event = NV2080_NOTIFIERS_ECC_DBE;
1182     pParams->eventDbe.action = NV2080_CTRL_EVENT_SET_NOTIFICATION_ACTION_SINGLE;
1183 
1184     status = pRmApi->Control(pRmApi,
1185                              device->session->handle,
1186                              device->subhandle,
1187                              NV2080_CTRL_CMD_EVENT_SET_NOTIFICATION,
1188                              &pParams->eventDbe,
1189                              sizeof(pParams->eventDbe));
1190     if (status != NV_OK)
1191         goto done;
1192 
1193 done:
1194     portMemFree(pParams);
1195 
1196     if (status == NV_OK)
1197         rmSubDevice->bEccInitialized = NV_TRUE;
1198     else
1199         gpuDeviceRmSubDeviceDeinitEcc(device);
1200 
1201     return status;
1202 }
1203 
1204 static NV_STATUS getSwizzIdFromSmcPartHandle(RM_API *pRmApi,
1205                                              NvHandle hClient,
1206                                              NvHandle hGPUInstanceSubscription,
1207                                              NvU32 *swizzId)
1208 {
1209     NV_STATUS status;
1210     RsResourceRef *pSmcResourceRef;
1211     GPUInstanceSubscription *pGPUInstanceSubscription;
1212 
1213     // get GPUInstanceSubscription handle
1214     // TODO: Acquired because serverutilGetResourceRef expects RMAPI lock. Necessary?
1215     status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS);
1216     if (status != NV_OK)
1217         return status;
1218     status = serverutilGetResourceRef(hClient, hGPUInstanceSubscription, &pSmcResourceRef);
1219     rmapiLockRelease();
1220     if (status != NV_OK)
1221         return status;
1222 
1223     pGPUInstanceSubscription = dynamicCast(pSmcResourceRef->pResource, GPUInstanceSubscription);
1224     if (!pGPUInstanceSubscription)
1225         return NV_ERR_INVALID_OBJECT;
1226 
1227     *swizzId = pGPUInstanceSubscription->pKernelMIGGpuInstance->swizzId;
1228 
1229     return NV_OK;
1230 }
1231 
1232 //
1233 // Determine an SMC partition's swizzId given a user subscription
1234 //
1235 // This requires temporarily duplicating the handle to validate it, as well
1236 // as to prevent removal of the partition for the duration of the look-up.
1237 // However, neither the partition, nor the swizzId uniquely identifying
1238 // it (within the scope of its parent GPU) are guaranteed to remain valid, and
1239 // callers of this function must be prepared for removal of the partition
1240 // between nvGpuOpsGetGpuInfo() and nvGpuOpsDeviceCreate().
1241 //
1242 static NV_STATUS getSwizzIdFromUserSmcPartHandle(RM_API *pRmApi,
1243                                                  NvHandle hClient,
1244                                                  NvHandle hParent,
1245                                                  NvHandle hUserClient,
1246                                                  NvHandle hUserGPUInstanceSubscription,
1247                                                  NvU32 *swizzId)
1248 {
1249     NV_STATUS status;
1250     NvHandle dupedGPUInstanceSubscription;
1251 
1252     // TODO: Acquired because serverutilGenResourceHandle expects RMAPI lock. Necessary?
1253     status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS);
1254     if (status != NV_OK)
1255         return status;
1256     status = serverutilGenResourceHandle(hClient, &dupedGPUInstanceSubscription);
1257     rmapiLockRelease();
1258     if (status != NV_OK)
1259         return status;
1260 
1261     status = pRmApi->DupObject(pRmApi,
1262                                hClient,
1263                                hParent,
1264                                &dupedGPUInstanceSubscription,
1265                                hUserClient,
1266                                hUserGPUInstanceSubscription,
1267                                NV04_DUP_HANDLE_FLAGS_REJECT_KERNEL_DUP_PRIVILEGE);
1268     if (status != NV_OK)
1269         return status;
1270 
1271     // get GPUInstanceSubscription handle
1272     status = getSwizzIdFromSmcPartHandle(pRmApi, hClient, dupedGPUInstanceSubscription,
1273                                          swizzId);
1274 
1275     pRmApi->Free(pRmApi, hClient, dupedGPUInstanceSubscription);
1276 
1277     return status;
1278 }
1279 
1280 static void nvGpuOpsRmSmcPartitionDestroy(struct gpuDevice *device)
1281 {
1282     subDeviceDesc *rmSubDevice = device->rmSubDevice;
1283 
1284     if (rmSubDevice->smcPartition.info != NULL)
1285     {
1286         RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
1287 
1288         pRmApi->Free(pRmApi,
1289                      device->session->handle,
1290                      rmSubDevice->smcPartition.handle);
1291 
1292         rmSubDevice->smcPartition.info = NULL;
1293     }
1294 }
1295 
1296 static NV_STATUS nvGpuOpsRmSmcPartitionCreate(struct gpuDevice *device, const gpuInfo *pGpuInfo)
1297 {
1298     NV_STATUS status;
1299     OBJGPU *pGpu = NULL;
1300     subDeviceDesc *rmSubDevice = device->rmSubDevice;
1301     NvHandle dupUserHandle;
1302     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
1303     struct gpuSession *session = device->session;
1304     RsResourceRef *pSmcResourceRef;
1305     GPUInstanceSubscription *pGPUInstanceSubscription;
1306     NvU32 swizzId;
1307 
1308     NV_ASSERT(rmSubDevice->smcPartition.info == NULL);
1309 
1310     if (!pGpuInfo->smcEnabled)
1311         return NV_ERR_INVALID_ARGUMENT;
1312 
1313     // TODO: Acquired because CliSetGpuContext expects RMAPI lock. Necessary?
1314     status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS);
1315     if (status != NV_OK)
1316         return status;
1317     status = CliSetGpuContext(session->handle, device->handle, &pGpu, NULL);
1318     rmapiLockRelease();
1319     if (status != NV_OK)
1320         return status;
1321 
1322     // Allocate the SMC partition object
1323 
1324     // SMC GPU partitioning was disabled since we detected the partition in
1325     // nvGpuOpsGetGpuInfo
1326     if (!IS_MIG_IN_USE(pGpu))
1327         return NV_ERR_INVALID_STATE;
1328 
1329     status = pRmApi->DupObject(pRmApi,
1330                                session->handle,
1331                                rmSubDevice->subDeviceHandle,
1332                                &dupUserHandle,
1333                                pGpuInfo->smcUserClientInfo.hClient,
1334                                pGpuInfo->smcUserClientInfo.hSmcPartRef,
1335                                NV04_DUP_HANDLE_FLAGS_REJECT_KERNEL_DUP_PRIVILEGE);
1336     if (status != NV_OK)
1337         return status;
1338 
1339     status = getSwizzIdFromSmcPartHandle(pRmApi,
1340                                          session->handle,
1341                                          dupUserHandle,
1342                                          &swizzId);
1343     if (status != NV_OK)
1344         goto cleanup_dup_user_handle;
1345 
1346     // The swizzId changed since the call to nvGpuOpsGetGpuInfo: either the
1347     // object identified by smcUser*Handle changed, or else its configuration
1348     // was altered.
1349     if (swizzId != pGpuInfo->smcSwizzId)
1350     {
1351         status = NV_ERR_INVALID_STATE;
1352         goto cleanup_dup_user_handle;
1353     }
1354 
1355     rmSubDevice->smcPartition.handle = dupUserHandle;
1356 
1357     // get GPUInstanceSubscription handle
1358     // TODO: Acquired because serverutilGetResourceRef expects RMAPI lock. Necessary?
1359     status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS);
1360     if (status != NV_OK)
1361         goto cleanup_dup_user_handle;
1362     status = serverutilGetResourceRef(session->handle, rmSubDevice->smcPartition.handle, &pSmcResourceRef);
1363     rmapiLockRelease();
1364     if (status != NV_OK)
1365         goto cleanup_dup_user_handle;
1366 
1367     pGPUInstanceSubscription = dynamicCast(pSmcResourceRef->pResource, GPUInstanceSubscription);
1368     NV_ASSERT(pGPUInstanceSubscription != NULL);
1369 
1370     NV_ASSERT(pGPUInstanceSubscription->pKernelMIGGpuInstance->swizzId == pGpuInfo->smcSwizzId);
1371 
1372     rmSubDevice->smcPartition.info = pGPUInstanceSubscription->pKernelMIGGpuInstance;
1373 
1374     return NV_OK;
1375 
1376 cleanup_dup_user_handle:
1377     pRmApi->Free(pRmApi, session->handle, dupUserHandle);
1378 
1379     return status;
1380 }
1381 
1382 static NV_STATUS nvGpuOpsRmSubDeviceCreate(struct gpuDevice *device)
1383 {
1384     NV_STATUS status;
1385     NV2080_ALLOC_PARAMETERS nv2080AllocParams = { 0 };
1386     deviceDesc *rmDevice = NULL;
1387     subDeviceDesc *rmSubDevice = NULL;
1388     struct gpuSession *session = device->session;
1389     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
1390 
1391     NV_ASSERT(session);
1392 
1393     // Query the rmDevice which needed to create an rmSubDevice.
1394     portSyncRwLockAcquireRead(session->btreeLock);
1395     status = findDescriptor(session->devices, device->deviceInstance, (void**)&rmDevice);
1396     if (status != NV_OK)
1397     {
1398         portSyncRwLockReleaseRead(session->btreeLock);
1399         return status;
1400     }
1401 
1402     NV_ASSERT(rmDevice);
1403     NV_ASSERT(rmDevice->deviceHandle == device->handle);
1404 
1405     // Find the existing rmSubDevice.
1406     // Otherwise, allocate an rmSubDevice.
1407     portSyncRwLockAcquireWrite(rmDevice->btreeLock);
1408     if (findDescriptor(rmDevice->subDevices, device->subdeviceInstance, (void**)&rmSubDevice) == NV_OK)
1409     {
1410         NV_ASSERT(rmSubDevice);
1411         device->rmSubDevice = rmSubDevice;
1412         device->subhandle = rmSubDevice->subDeviceHandle;
1413         rmSubDevice->refCount++;
1414         portSyncRwLockReleaseWrite(rmDevice->btreeLock);
1415         portSyncRwLockReleaseRead(session->btreeLock);
1416         return NV_OK;
1417     }
1418 
1419     rmSubDevice = portMemAllocNonPaged(sizeof(*rmSubDevice));
1420     if (rmSubDevice == NULL)
1421        return NV_ERR_INSUFFICIENT_RESOURCES;
1422 
1423     portMemSet(rmSubDevice, 0, sizeof(*rmSubDevice));
1424 
1425     device->rmSubDevice = rmSubDevice;
1426     rmSubDevice->refCount = 1;
1427     nv2080AllocParams.subDeviceId = device->subdeviceInstance;
1428     device->subhandle = NV01_NULL_OBJECT;
1429     status = pRmApi->Alloc(pRmApi,
1430                            session->handle,
1431                            device->handle,
1432                            &device->subhandle,
1433                            NV20_SUBDEVICE_0,
1434                            &nv2080AllocParams);
1435     if (status != NV_OK)
1436         goto cleanup_subdevice_desc;
1437     rmSubDevice->subDeviceHandle = device->subhandle;
1438 
1439     status = trackDescriptor(&rmDevice->subDevices, device->subdeviceInstance, rmSubDevice);
1440     if (status != NV_OK)
1441         goto cleanup_subdevice;
1442 
1443     rmDevice->subDeviceCount++;
1444 
1445     portSyncRwLockReleaseWrite(rmDevice->btreeLock);
1446     portSyncRwLockReleaseRead(session->btreeLock);
1447     return NV_OK;
1448 
1449 cleanup_subdevice:
1450     pRmApi->Free(pRmApi, session->handle, device->subhandle);
1451 cleanup_subdevice_desc:
1452     portMemFree(rmSubDevice);
1453     portSyncRwLockReleaseWrite(rmDevice->btreeLock);
1454     portSyncRwLockReleaseRead(session->btreeLock);
1455     return status;
1456 }
1457 
1458 static NvBool isDevicePascalPlus(const struct gpuDevice *device)
1459 {
1460     NV_ASSERT(device->rmDevice);
1461     return device->rmDevice->arch >= GPU_ARCHITECTURE_PASCAL;
1462 }
1463 
1464 static NvBool isDeviceVoltaPlus(const struct gpuDevice *device)
1465 {
1466     NV_ASSERT(device->rmDevice);
1467     return device->rmDevice->arch >= GPU_ARCHITECTURE_VOLTA;
1468 }
1469 
1470 static NvBool isDeviceTuringPlus(const struct gpuDevice *device)
1471 {
1472     NV_ASSERT(device->rmDevice);
1473     return device->rmDevice->arch >= GPU_ARCHITECTURE_TURING;
1474 }
1475 
1476 static NvBool isDeviceAmperePlus(const struct gpuDevice *device)
1477 {
1478     NV_ASSERT(device->rmDevice);
1479     return device->rmDevice->arch >= GPU_ARCHITECTURE_AMPERE;
1480 }
1481 
1482 static UVM_LINK_TYPE rmControlToUvmNvlinkVersion(NvU32 nvlinkVersion)
1483 {
1484     if (nvlinkVersion == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID)
1485         return UVM_LINK_TYPE_NONE;
1486     else if (nvlinkVersion == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_1_0)
1487         return UVM_LINK_TYPE_NVLINK_1;
1488     else if (nvlinkVersion == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_2_0 ||
1489              nvlinkVersion == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_2_2)
1490         return UVM_LINK_TYPE_NVLINK_2;
1491     else if (nvlinkVersion == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_3_0 ||
1492              nvlinkVersion == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_3_1)
1493         return UVM_LINK_TYPE_NVLINK_3;
1494     else if (nvlinkVersion == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_4_0)
1495         return UVM_LINK_TYPE_NVLINK_4;
1496 
1497     NV_ASSERT(0);
1498     return (NvU32)-1;
1499 }
1500 
1501 static NV_STATUS queryFbInfo(struct gpuDevice *device)
1502 {
1503     NV_STATUS nvStatus = NV_OK;
1504     NV2080_CTRL_FB_GET_INFO_PARAMS fbInfoParams;
1505     NV2080_CTRL_CMD_FB_GET_FB_REGION_INFO_PARAMS *fbRegionInfoParams;
1506     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
1507     NvU32 i;
1508 
1509     struct fbInputParams
1510     {
1511         NV2080_CTRL_FB_INFO heapSize;
1512         NV2080_CTRL_FB_INFO reservedHeapSize;
1513         NV2080_CTRL_FB_INFO zeroFb;
1514     } fbParams;
1515 
1516     fbRegionInfoParams = portMemAllocNonPaged(sizeof(*fbRegionInfoParams));
1517     if (fbRegionInfoParams == NULL)
1518         return NV_ERR_NO_MEMORY;
1519 
1520     portMemSet(fbRegionInfoParams, 0, sizeof(*fbRegionInfoParams));
1521     portMemSet(&fbInfoParams, 0, sizeof(fbInfoParams));
1522     portMemSet(&fbParams, 0, sizeof(fbParams));
1523 
1524     // Set up the list of parameters we are looking to extract
1525     fbParams.heapSize.index         = NV2080_CTRL_FB_INFO_INDEX_HEAP_SIZE;
1526     fbParams.reservedHeapSize.index = NV2080_CTRL_FB_INFO_INDEX_VISTA_RESERVED_HEAP_SIZE;
1527     fbParams.zeroFb.index           = NV2080_CTRL_FB_INFO_INDEX_FB_IS_BROKEN;
1528 
1529     fbInfoParams.fbInfoListSize = sizeof(fbParams) / sizeof(fbParams.heapSize);
1530     fbInfoParams.fbInfoList = NV_PTR_TO_NvP64(&fbParams);
1531 
1532     nvStatus = pRmApi->Control(pRmApi,
1533                                device->session->handle,
1534                                device->subhandle,
1535                                NV2080_CTRL_CMD_FB_GET_INFO,
1536                                &fbInfoParams,
1537                                sizeof(fbInfoParams));
1538     if (nvStatus != NV_OK)
1539         goto out;
1540 
1541     nvStatus = pRmApi->Control(pRmApi,
1542                                device->session->handle,
1543                                device->subhandle,
1544                                NV2080_CTRL_CMD_FB_GET_FB_REGION_INFO,
1545                                fbRegionInfoParams,
1546                                sizeof(*fbRegionInfoParams));
1547     if (nvStatus != NV_OK)
1548         goto out;
1549 
1550     device->fbInfo.heapSize         = fbParams.heapSize.data;
1551     device->fbInfo.reservedHeapSize = fbParams.reservedHeapSize.data;
1552     device->fbInfo.bZeroFb          = (NvBool)fbParams.zeroFb.data;
1553 
1554     device->fbInfo.maxAllocatableAddress = 0;
1555 
1556     for (i = 0; i < fbRegionInfoParams->numFBRegions; ++i)
1557     {
1558         device->fbInfo.maxAllocatableAddress = NV_MAX(device->fbInfo.maxAllocatableAddress,
1559                                                       fbRegionInfoParams->fbRegion[i].limit);
1560     }
1561 
1562 out:
1563     portMemFree(fbRegionInfoParams);
1564     return nvStatus;
1565 }
1566 
1567 // Return the PCIE link cap max speed associated with the given GPU in
1568 // megabytes per seconds..
1569 static NV_STATUS getPCIELinkRateMBps(struct gpuDevice *device, NvU32 *pcieLinkRate)
1570 {
1571     // PCI Express Base Specification: https://www.pcisig.com/specifications/pciexpress
1572     const NvU32 PCIE_1_ENCODING_RATIO_TOTAL = 10;
1573     const NvU32 PCIE_1_ENCODING_RATIO_EFFECTIVE = 8;
1574     const NvU32 PCIE_2_ENCODING_RATIO_TOTAL = 10;
1575     const NvU32 PCIE_2_ENCODING_RATIO_EFFECTIVE = 8;
1576     const NvU32 PCIE_3_ENCODING_RATIO_TOTAL = 130;
1577     const NvU32 PCIE_3_ENCODING_RATIO_EFFECTIVE = 128;
1578     const NvU32 PCIE_4_ENCODING_RATIO_TOTAL = 130;
1579     const NvU32 PCIE_4_ENCODING_RATIO_EFFECTIVE = 128;
1580     const NvU32 PCIE_5_ENCODING_RATIO_TOTAL = 130;
1581     const NvU32 PCIE_5_ENCODING_RATIO_EFFECTIVE = 128;
1582     const NvU32 PCIE_6_ENCODING_RATIO_TOTAL = 256;
1583     const NvU32 PCIE_6_ENCODING_RATIO_EFFECTIVE = 242;
1584 
1585     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
1586     NV2080_CTRL_BUS_INFO busInfo = {0};
1587     NV2080_CTRL_BUS_GET_INFO_PARAMS busInfoParams = {0};
1588     NvU32 linkRate = 0;
1589     NvU32 lanes;
1590 
1591     busInfo.index = NV2080_CTRL_BUS_INFO_INDEX_PCIE_GPU_LINK_CAPS;
1592     busInfoParams.busInfoListSize = 1;
1593     busInfoParams.busInfoList = NV_PTR_TO_NvP64(&busInfo);
1594 
1595     NV_STATUS status = pRmApi->Control(pRmApi,
1596                                        device->session->handle,
1597                                        device->subhandle,
1598                                        NV2080_CTRL_CMD_BUS_GET_INFO,
1599                                        &busInfoParams,
1600                                        sizeof(busInfoParams));
1601     if (status != NV_OK)
1602     {
1603         NV_PRINTF(LEVEL_ERROR, "%s:%d: %s\n", __FUNCTION__,
1604                   __LINE__, nvstatusToString(status));
1605         return status;
1606     }
1607 
1608     lanes = DRF_VAL(2080, _CTRL_BUS_INFO, _PCIE_LINK_CAP_MAX_WIDTH, busInfo.data);
1609 
1610     // Bug 2606540: RM reports PCIe transfer rate in GT/s but labels it as Gbps
1611     switch (DRF_VAL(2080, _CTRL_BUS_INFO, _PCIE_LINK_CAP_MAX_SPEED, busInfo.data))
1612     {
1613         case NV2080_CTRL_BUS_INFO_PCIE_LINK_CAP_MAX_SPEED_2500MBPS:
1614             linkRate = ((2500 * lanes * PCIE_1_ENCODING_RATIO_EFFECTIVE)
1615                 / PCIE_1_ENCODING_RATIO_TOTAL) / 8;
1616             break;
1617         case NV2080_CTRL_BUS_INFO_PCIE_LINK_CAP_MAX_SPEED_5000MBPS:
1618             linkRate = ((5000 * lanes * PCIE_2_ENCODING_RATIO_EFFECTIVE)
1619                 / PCIE_2_ENCODING_RATIO_TOTAL) / 8;
1620             break;
1621         case NV2080_CTRL_BUS_INFO_PCIE_LINK_CAP_MAX_SPEED_8000MBPS:
1622             linkRate = ((8000 * lanes * PCIE_3_ENCODING_RATIO_EFFECTIVE)
1623                 / PCIE_3_ENCODING_RATIO_TOTAL) / 8;
1624             break;
1625         case NV2080_CTRL_BUS_INFO_PCIE_LINK_CAP_MAX_SPEED_16000MBPS:
1626             linkRate = ((16000 * lanes * PCIE_4_ENCODING_RATIO_EFFECTIVE)
1627                 / PCIE_4_ENCODING_RATIO_TOTAL) / 8;
1628             break;
1629         case NV2080_CTRL_BUS_INFO_PCIE_LINK_CAP_MAX_SPEED_32000MBPS:
1630             linkRate = ((32000 * lanes * PCIE_5_ENCODING_RATIO_EFFECTIVE)
1631                 / PCIE_5_ENCODING_RATIO_TOTAL) / 8;
1632             break;
1633         case NV2080_CTRL_BUS_INFO_PCIE_LINK_CAP_MAX_SPEED_64000MBPS:
1634             linkRate = ((64000 * lanes * PCIE_6_ENCODING_RATIO_EFFECTIVE)
1635                 / PCIE_6_ENCODING_RATIO_TOTAL) / 8;
1636             break;
1637         default:
1638             status = NV_ERR_INVALID_STATE;
1639             NV_PRINTF(LEVEL_ERROR, "Unknown PCIe speed\n");
1640     }
1641 
1642     *pcieLinkRate = linkRate;
1643 
1644     return status;
1645 }
1646 
1647 NV_STATUS nvGpuOpsDeviceCreate(struct gpuSession *session,
1648                                const gpuInfo *pGpuInfo,
1649                                const NvProcessorUuid *gpuUuid,
1650                                struct gpuDevice **outDevice,
1651                                NvBool bCreateSmcPartition)
1652 {
1653     NV_STATUS status;
1654     struct gpuDevice *device = NULL;
1655     NV0000_CTRL_GPU_GET_UUID_INFO_PARAMS gpuIdInfoParams = {{0}};
1656     NV2080_CTRL_BUS_GET_INFO_V2_PARAMS *busInfoParams;
1657     NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus;
1658     NvU32 nvlinkVersion;
1659     NvU32 sysmemLink;
1660     NvU32 linkBandwidthMBps;
1661     NvU32 sysmemConnType;
1662     NvBool atomicSupported;
1663     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
1664     OBJGPU *pGpu;
1665 
1666     device = portMemAllocNonPaged(sizeof(*device));
1667     if (device == NULL)
1668         return NV_ERR_INSUFFICIENT_RESOURCES;
1669     portMemSet(device, 0, sizeof(*device));
1670     device->session = session;
1671 
1672     portMemCopy(&gpuIdInfoParams.gpuUuid, NV_UUID_LEN, gpuUuid->uuid, NV_UUID_LEN);
1673     gpuIdInfoParams.flags = NV0000_CTRL_CMD_GPU_GET_UUID_INFO_FLAGS_FORMAT_BINARY;
1674     status = pRmApi->Control(pRmApi,
1675                              session->handle,
1676                              session->handle,
1677                              NV0000_CTRL_CMD_GPU_GET_UUID_INFO,
1678                              &gpuIdInfoParams,
1679                              sizeof(gpuIdInfoParams));
1680     if (status != NV_OK)
1681         goto cleanup_device_obj;
1682 
1683     device->deviceInstance = gpuIdInfoParams.deviceInstance;
1684     device->subdeviceInstance = gpuIdInfoParams.subdeviceInstance;
1685     device->gpuId = gpuIdInfoParams.gpuId;
1686 
1687     status = nvGpuOpsRmDeviceCreate(device);
1688     if (status != NV_OK)
1689         goto cleanup_device_obj;
1690 
1691     status = nvGpuOpsRmSubDeviceCreate(device);
1692     if (status != NV_OK)
1693         goto cleanup_rm_device;
1694 
1695     if (bCreateSmcPartition)
1696     {
1697         status = nvGpuOpsRmSmcPartitionCreate(device, pGpuInfo);
1698         if (status != NV_OK)
1699             goto cleanup_rm_subdevice;
1700     }
1701 
1702     // Create the work submission info mapping:
1703     //  * SMC is disabled, we create for the device.
1704     //  * SMC is enabled, we create only for SMC partitions.
1705     if (isDeviceVoltaPlus(device) && (!pGpuInfo->smcEnabled || bCreateSmcPartition))
1706     {
1707         status = gpuDeviceMapUsermodeRegion(device);
1708         if (status != NV_OK)
1709             goto cleanup_smc_partition;
1710     }
1711 
1712     status = gpuDeviceRmSubDeviceInitEcc(device);
1713     if (status != NV_OK)
1714         goto cleanup_subdevice_usermode;
1715 
1716     status = queryFbInfo(device);
1717     if (status != NV_OK)
1718         goto cleanup_ecc;
1719 
1720     device->isTccMode = NV_FALSE;
1721 
1722     // Non-TCC mode on Windows implies WDDM mode.
1723     device->isWddmMode = !device->isTccMode;
1724 
1725     status = findDeviceClasses(session->handle,
1726                                device->handle,
1727                                device->subhandle,
1728                                &device->hostClass,
1729                                &device->ceClass,
1730                                &device->computeClass,
1731                                &device->faultBufferClass,
1732                                &device->accessCounterBufferClass,
1733                                &device->sec2Class);
1734     if (status != NV_OK)
1735         goto cleanup_ecc;
1736 
1737     busInfoParams = portMemAllocNonPaged(sizeof(*busInfoParams));
1738     if (busInfoParams == NULL)
1739     {
1740         status = NV_ERR_INSUFFICIENT_RESOURCES;
1741         goto cleanup_ecc;
1742     }
1743     portMemSet(busInfoParams, 0, sizeof(*busInfoParams));
1744     busInfoParams->busInfoListSize = 1;
1745     busInfoParams->busInfoList[0].index = NV2080_CTRL_BUS_INFO_INDEX_SYSMEM_CONNECTION_TYPE;
1746     status = pRmApi->Control(pRmApi,
1747                              device->session->handle,
1748                              device->subhandle,
1749                              NV2080_CTRL_CMD_BUS_GET_INFO_V2,
1750                              busInfoParams,
1751                              sizeof(*busInfoParams));
1752     if (status != NV_OK)
1753     {
1754         portMemFree(busInfoParams);
1755         goto cleanup_ecc;
1756     }
1757 
1758     sysmemConnType = busInfoParams->busInfoList[0].data;
1759     portMemFree(busInfoParams);
1760 
1761     sysmemLink = UVM_LINK_TYPE_NONE;
1762     switch (sysmemConnType)
1763     {
1764         case NV2080_CTRL_BUS_INFO_INDEX_SYSMEM_CONNECTION_TYPE_NVLINK:
1765         {
1766             status = allocNvlinkStatusForSubdevice(device, &nvlinkStatus);
1767             if (status != NV_OK)
1768                 goto cleanup_ecc;
1769 
1770             nvlinkVersion = getNvlinkConnectionToNpu(nvlinkStatus,
1771                                                      &atomicSupported,
1772                                                      &linkBandwidthMBps);
1773 
1774             sysmemLink = rmControlToUvmNvlinkVersion(nvlinkVersion);
1775 
1776             portMemFree(nvlinkStatus);
1777             nvlinkStatus = NULL;
1778             break;
1779         }
1780         case NV2080_CTRL_BUS_INFO_INDEX_SYSMEM_CONNECTION_TYPE_PCIE:
1781         {
1782             sysmemLink = UVM_LINK_TYPE_PCIE;
1783             status = getPCIELinkRateMBps(device, &linkBandwidthMBps);
1784             if (status != NV_OK)
1785                 goto cleanup_ecc;
1786             break;
1787         }
1788         default:
1789         {
1790             NV_PRINTF(LEVEL_ERROR, "Unsupported sysmem connection type: %d\n",
1791                      sysmemConnType);
1792             NV_ASSERT(0);
1793             break;
1794         }
1795     }
1796 
1797     NV_PRINTF(LEVEL_INFO, "sysmem link type: %d bw: %u\n", sysmemLink, linkBandwidthMBps);
1798 
1799     NV_ASSERT(sysmemLink != UVM_LINK_TYPE_NONE);
1800     device->sysmemLink = sysmemLink;
1801     device->sysmemLinkRateMBps = linkBandwidthMBps;
1802 
1803     status = allocNvlinkStatusForSubdevice(device, &nvlinkStatus);
1804     if (status != NV_OK)
1805         goto cleanup_ecc;
1806     nvlinkVersion = getNvlinkConnectionToSwitch(nvlinkStatus,
1807                                                 &linkBandwidthMBps);
1808 
1809     if (rmControlToUvmNvlinkVersion(nvlinkVersion) != UVM_LINK_TYPE_NONE)
1810     {
1811         NV_ASSERT(rmControlToUvmNvlinkVersion(nvlinkVersion) != UVM_LINK_TYPE_NVLINK_1);
1812 
1813         // If the GPU is ever connected to the CPU via a switch, sysmemLink
1814         // and sysmemLinkRateMBps need to be updated accordingly.
1815         NV_ASSERT(sysmemConnType != NV2080_CTRL_BUS_INFO_INDEX_SYSMEM_CONNECTION_TYPE_NVLINK);
1816 
1817         device->connectedToSwitch = NV_TRUE;
1818     }
1819 
1820     portMemFree(nvlinkStatus);
1821 
1822     mapInit(&device->kern2PhysDescrMap, portMemAllocatorGetGlobalNonPaged());
1823 
1824     status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS);
1825     if (status != NV_OK)
1826         goto cleanup_ecc;
1827     status = CliSetGpuContext(session->handle, device->handle, &pGpu, NULL);
1828     rmapiLockRelease();
1829     if (status != NV_OK)
1830         goto cleanup_ecc;
1831 
1832     if (IS_VIRTUAL_WITH_HEAVY_SRIOV(pGpu))
1833     {
1834         device->pPagingChannelRpcMutex = portSyncMutexCreate(portMemAllocatorGetGlobalNonPaged());
1835         if (device->pPagingChannelRpcMutex == NULL)
1836         {
1837             status = NV_ERR_NO_MEMORY;
1838             goto cleanup_ecc;
1839         }
1840     }
1841 
1842     *outDevice = device;
1843     return NV_OK;
1844 
1845 cleanup_ecc:
1846     gpuDeviceRmSubDeviceDeinitEcc(device);
1847 cleanup_subdevice_usermode:
1848     gpuDeviceDestroyUsermodeRegion(device);
1849 cleanup_smc_partition:
1850     nvGpuOpsRmSmcPartitionDestroy(device);
1851 cleanup_rm_subdevice:
1852     nvGpuOpsDeviceDestroy(device);
1853     device = NULL;
1854 cleanup_rm_device:
1855     if (device)
1856         nvGpuOpsRmDeviceDestroy(device);
1857 cleanup_device_obj:
1858     portMemFree(device);
1859     return status;
1860 }
1861 
1862 NV_STATUS nvGpuOpsDeviceDestroy(struct gpuDevice *device)
1863 {
1864     deviceDesc *rmDevice = device->rmDevice;
1865     subDeviceDesc *rmSubDevice = device->rmSubDevice;
1866     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
1867 
1868     rmSubDevice->refCount--;
1869 
1870     if (rmSubDevice->refCount == 0)
1871     {
1872         gpuDeviceDestroyUsermodeRegion(device);
1873 
1874         gpuDeviceRmSubDeviceDeinitEcc(device);
1875 
1876         nvGpuOpsRmSmcPartitionDestroy(device);
1877 
1878         portSyncRwLockAcquireWrite(rmDevice->btreeLock);
1879         rmDevice->subDeviceCount--;
1880         deleteDescriptor(&rmDevice->subDevices, device->subdeviceInstance, (void**)&rmSubDevice);
1881         pRmApi->Free(pRmApi, device->session->handle, rmSubDevice->subDeviceHandle);
1882         portMemFree(rmSubDevice);
1883         portSyncRwLockReleaseWrite(rmDevice->btreeLock);
1884 
1885         nvGpuOpsRmDeviceDestroy(device);
1886     }
1887 
1888     mapDestroy(&device->kern2PhysDescrMap);
1889 
1890     if (device->pPagingChannelRpcMutex != NULL)
1891         portSyncMutexDestroy(device->pPagingChannelRpcMutex);
1892 
1893     portMemFree(device);
1894     return NV_OK;
1895 }
1896 
1897 NV_STATUS nvGpuOpsOwnPageFaultIntr(struct gpuDevice *device,
1898                                    NvBool bOwnInterrupts)
1899 {
1900     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
1901     NV2080_CTRL_MC_CHANGE_REPLAYABLE_FAULT_OWNERSHIP_PARAMS changeParams = {0};
1902     changeParams.bOwnedByRm = !bOwnInterrupts;
1903     return pRmApi->Control(pRmApi,
1904                            device->session->handle,
1905                            device->subhandle,
1906                            NV2080_CTRL_CMD_MC_CHANGE_REPLAYABLE_FAULT_OWNERSHIP,
1907                            &changeParams,
1908                            sizeof(changeParams));
1909 }
1910 
1911 static NV_STATUS getAddressSpaceInfo(struct gpuAddressSpace *vaSpace,
1912                                      OBJGPU *pGpu,
1913                                      UvmGpuAddressSpaceInfo *vaSpaceInfo)
1914 {
1915     NV_STATUS status;
1916     NV0080_CTRL_DMA_ADV_SCHED_GET_VA_CAPS_PARAMS params = {0};
1917     OBJVASPACE *pVAS = NULL;
1918     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
1919     struct gpuDevice *device = vaSpace->device;
1920     struct gpuSession *session = device->session;
1921     subDeviceDesc *rmSubDevice = device->rmSubDevice;
1922 
1923     params.hVASpace = vaSpace->handle;
1924     status = pRmApi->Control(pRmApi,
1925                              session->handle,
1926                              device->handle,
1927                              NV0080_CTRL_CMD_DMA_ADV_SCHED_GET_VA_CAPS,
1928                              &params,
1929                              sizeof(params));
1930     if (status != NV_OK)
1931         return status;
1932 
1933     vaSpaceInfo->bigPageSize = params.bigPageSize;
1934 
1935     // TODO: Acquired because resserv expects RMAPI lock. Necessary?
1936     {
1937         RsClient *pClient;
1938         status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS);
1939         if (status != NV_OK)
1940             return status;
1941 
1942         status = serverGetClientUnderLock(&g_resServ, session->handle, &pClient);
1943         if (status == NV_OK)
1944         {
1945             status = vaspaceGetByHandleOrDeviceDefault(pClient, device->handle, vaSpace->handle, &pVAS);
1946         }
1947         rmapiLockRelease();
1948         if (status != NV_OK)
1949             return status;
1950     }
1951 
1952     vaSpaceInfo->atsEnabled = vaspaceIsAtsEnabled(pVAS);
1953 
1954     if (isDeviceTuringPlus(vaSpace->device))
1955     {
1956         //
1957         // On Turing+ use the VIRTUAL_FUNCTION so this works fine in hosts and
1958         // guests
1959         //
1960         void *bar0Mapping = gpuBar0BaseAddress(pGpu);
1961         vaSpaceInfo->time0Offset = (NvU32 *)((NvU8*)bar0Mapping + GPU_GET_VREG_OFFSET(pGpu, NV_VIRTUAL_FUNCTION_TIME_0));
1962         vaSpaceInfo->time1Offset = (NvU32 *)((NvU8*)bar0Mapping + GPU_GET_VREG_OFFSET(pGpu, NV_VIRTUAL_FUNCTION_TIME_1));
1963     }
1964     else if (isDeviceVoltaPlus(vaSpace->device))
1965     {
1966         NV_ASSERT(rmSubDevice->clientRegionMapping);
1967 
1968         //
1969         // On Volta prefer USERMODE mappings for better passthrough
1970         // performance on some hypervisors (see CL23003453 for more details)
1971         //
1972         vaSpaceInfo->time0Offset = (NvU32 *)((NvU8*)rmSubDevice->clientRegionMapping + NVC361_TIME_0);
1973         vaSpaceInfo->time1Offset = (NvU32 *)((NvU8*)rmSubDevice->clientRegionMapping + NVC361_TIME_1);
1974     }
1975     else
1976     {
1977         void *bar0Mapping = gpuBar0BaseAddress(pGpu);
1978         vaSpaceInfo->time0Offset = (NvU32 *)((NvU8*)bar0Mapping + NV_PTIMER_TIME_0);
1979         vaSpaceInfo->time1Offset = (NvU32 *)((NvU8*)bar0Mapping + NV_PTIMER_TIME_1);
1980     }
1981 
1982     if (IS_MIG_IN_USE(pGpu))
1983     {
1984         KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance = rmSubDevice->smcPartition.info;
1985         MIG_RESOURCE_ALLOCATION *pResourceAllocation = &pKernelMIGGpuInstance->resourceAllocation;
1986 
1987         vaSpaceInfo->maxSubctxCount = pResourceAllocation->veidCount;
1988         vaSpaceInfo->smcGpcCount = pResourceAllocation->gpcCount;
1989     }
1990     else
1991     {
1992         NV2080_CTRL_FIFO_GET_INFO_PARAMS *fifoGetInfoParams;
1993 
1994         //
1995         // NV2080_CTRL_FIFO_GET_INFO_PARAMS takes over 2KB, so we use a heap
1996         // allocation
1997         //
1998         fifoGetInfoParams = portMemAllocNonPaged(sizeof(*fifoGetInfoParams));
1999         if (fifoGetInfoParams == NULL)
2000             return NV_ERR_NO_MEMORY;
2001 
2002         fifoGetInfoParams->fifoInfoTblSize = 1;
2003         fifoGetInfoParams->fifoInfoTbl[0].index = NV2080_CTRL_FIFO_INFO_INDEX_MAX_SUBCONTEXT_PER_GROUP;
2004 
2005         status = pRmApi->Control(pRmApi,
2006                                  session->handle,
2007                                  rmSubDevice->subDeviceHandle,
2008                                  NV2080_CTRL_CMD_FIFO_GET_INFO,
2009                                  fifoGetInfoParams,
2010                                  sizeof(*fifoGetInfoParams));
2011 
2012         vaSpaceInfo->maxSubctxCount = fifoGetInfoParams->fifoInfoTbl[0].data;
2013 
2014         portMemFree(fifoGetInfoParams);
2015 
2016         if (status != NV_OK)
2017             return status;
2018     }
2019 
2020     return NV_OK;
2021 }
2022 
2023 // This function will create a new address space object of type FERMI_VASPACE_A.
2024 NV_STATUS nvGpuOpsAddressSpaceCreate(struct gpuDevice *device,
2025                                      NvU64 vaBase,
2026                                      NvU64 vaSize,
2027                                      struct gpuAddressSpace **vaSpace,
2028                                      UvmGpuAddressSpaceInfo *vaSpaceInfo)
2029 {
2030     NV_STATUS status;
2031     struct gpuAddressSpace *gpuVaSpace = NULL;
2032     OBJGPU *pGpu = NULL;
2033     NV_VASPACE_ALLOCATION_PARAMETERS vaParams = {0};
2034     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
2035     PORT_MEM_ALLOCATOR *pAlloc = portMemAllocatorGetGlobalNonPaged();
2036 
2037     gpuVaSpace = portMemAllocNonPaged(sizeof(*gpuVaSpace));
2038     if (gpuVaSpace == NULL)
2039         return NV_ERR_NO_MEMORY;
2040 
2041     portMemSet(gpuVaSpace, 0, sizeof(*gpuVaSpace));
2042     gpuVaSpace->vaBase = vaBase;
2043     gpuVaSpace->vaSize = vaSize;
2044     gpuVaSpace->handle = NV01_NULL_OBJECT;
2045     gpuVaSpace->allocationsLock = portSyncRwLockCreate(pAlloc);
2046     gpuVaSpace->cpuMappingsLock = portSyncRwLockCreate(pAlloc);
2047     gpuVaSpace->physAllocationsLock = portSyncRwLockCreate(pAlloc);
2048 
2049     *vaSpace = NULL;
2050     portMemSet(vaSpaceInfo, 0, sizeof(*vaSpaceInfo));
2051 
2052     // Create a new vaSpace object
2053     vaParams.index= NV_VASPACE_ALLOCATION_INDEX_GPU_NEW;
2054     vaParams.vaBase = gpuVaSpace->vaBase;
2055     vaParams.vaSize = gpuVaSpace->vaSize;
2056     vaParams.flags  = gpuVaSpace->vaSize ?
2057                       NV_VASPACE_ALLOCATION_FLAGS_SHARED_MANAGEMENT :
2058                       NV_VASPACE_ALLOCATION_FLAGS_NONE;
2059 
2060     // TODO: Acquired because CliSetGpuContext expects RMAPI lock. Necessary?
2061     status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS);
2062     if (status != NV_OK)
2063         goto cleanup_vaspace;
2064     status = CliSetGpuContext(device->session->handle, device->handle, &pGpu, NULL);
2065     rmapiLockRelease();
2066     if (status != NV_OK)
2067     {
2068         goto cleanup_vaspace;
2069     }
2070 
2071     status =  pRmApi->Alloc(pRmApi,
2072                             device->session->handle,
2073                             device->handle,
2074                             &gpuVaSpace->handle, FERMI_VASPACE_A,
2075                             &vaParams);
2076     if (status != NV_OK)
2077     {
2078         goto cleanup_struct;
2079     }
2080 
2081     // If base & Size were not provided before, they would have been filled now
2082     gpuVaSpace->vaBase = vaParams.vaBase;
2083     gpuVaSpace->vaSize = vaParams.vaSize;
2084     gpuVaSpace->device = device;
2085 
2086     status = getAddressSpaceInfo(gpuVaSpace, pGpu, vaSpaceInfo);
2087     if (status != NV_OK)
2088     {
2089         goto cleanup_vaspace;
2090     }
2091 
2092     *vaSpace = gpuVaSpace;
2093     return status;
2094 
2095 cleanup_vaspace:
2096     pRmApi->Free(pRmApi, device->session->handle, gpuVaSpace->handle);
2097 
2098 cleanup_struct:
2099     portSyncRwLockDestroy(gpuVaSpace->allocationsLock);
2100     portSyncRwLockDestroy(gpuVaSpace->cpuMappingsLock);
2101     portSyncRwLockDestroy(gpuVaSpace->physAllocationsLock);
2102     portMemFree(gpuVaSpace);
2103     return status;
2104 }
2105 
2106 NV_STATUS nvGpuOpsDupAddressSpace(struct gpuDevice *device,
2107                                   NvHandle hUserClient,
2108                                   NvHandle hUserVASpace,
2109                                   struct gpuAddressSpace **vaSpace,
2110                                   UvmGpuAddressSpaceInfo *vaSpaceInfo)
2111 {
2112     NV_STATUS status = NV_OK;
2113     struct gpuAddressSpace *gpuVaSpace = NULL;
2114     struct gpuSession *session = device->session;
2115     OBJVASPACE *pVAS = NULL;
2116     OBJGPU *pGpu = NULL;
2117     RsResourceRef *pVaSpaceRef;
2118     RsResourceRef *pDeviceRef;
2119     Device *pDevice = NULL;
2120     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
2121     PORT_MEM_ALLOCATOR *pAlloc = portMemAllocatorGetGlobalNonPaged();
2122 
2123     *vaSpace = NULL;
2124     portMemSet(vaSpaceInfo, 0, sizeof(*vaSpaceInfo));
2125 
2126     // TODO - Move this check to RMDupObject later.
2127     // TODO: Acquired because serverutilGetResourceRef expects RMAPI lock. Necessary?
2128     // Find the device associated with the hUserVASpace and verify that the UUID belongs to it.
2129     status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS);
2130     if (status != NV_OK)
2131         return status;
2132     status = serverutilGetResourceRef(hUserClient, hUserVASpace, &pVaSpaceRef);
2133     rmapiLockRelease();
2134     if (status != NV_OK)
2135         return status;
2136 
2137     if (!dynamicCast(pVaSpaceRef->pResource, VaSpaceApi))
2138         return NV_ERR_INVALID_OBJECT;
2139 
2140     // The parent must be valid and a device if this is a VA space handle
2141     // TODO: Acquired because serverutilGetResourceRef expects RMAPI lock. Necessary?
2142     status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS);
2143     if (status != NV_OK)
2144         return status;
2145     status = serverutilGetResourceRef(hUserClient, pVaSpaceRef->pParentRef->hResource, &pDeviceRef);
2146     rmapiLockRelease();
2147     NV_ASSERT(status == NV_OK);
2148 
2149     pDevice = dynamicCast(pDeviceRef->pResource, Device);
2150     NV_ASSERT(pDevice != NULL);
2151 
2152     if (pDevice->deviceInst != device->deviceInstance)
2153         return NV_ERR_OTHER_DEVICE_FOUND;
2154 
2155     gpuVaSpace = portMemAllocNonPaged(sizeof(*gpuVaSpace));
2156     if (gpuVaSpace == NULL)
2157         return NV_ERR_NO_MEMORY;
2158 
2159     portMemSet(gpuVaSpace, 0, sizeof(*gpuVaSpace));
2160 
2161     gpuVaSpace->device = device;
2162     gpuVaSpace->allocationsLock = portSyncRwLockCreate(pAlloc);
2163     gpuVaSpace->cpuMappingsLock = portSyncRwLockCreate(pAlloc);
2164     gpuVaSpace->physAllocationsLock = portSyncRwLockCreate(pAlloc);
2165 
2166     // dup the vaspace
2167     gpuVaSpace->handle = NV01_NULL_OBJECT;
2168     status = pRmApi->DupObject(pRmApi,
2169                                session->handle,
2170                                device->handle,
2171                                &gpuVaSpace->handle,
2172                                hUserClient,
2173                                hUserVASpace,
2174                                NV04_DUP_HANDLE_FLAGS_REJECT_KERNEL_DUP_PRIVILEGE);
2175     if (status != NV_OK)
2176         goto cleanup_vaspace;
2177 
2178     // TODO: Acquired because these functions expect RMAPI lock. Necessary?
2179     {
2180         RsClient *pClient;
2181         status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS);
2182         if (status != NV_OK)
2183             goto cleanup_dup_vaspace;
2184 
2185         status = serverGetClientUnderLock(&g_resServ, session->handle, &pClient);
2186         if (status == NV_OK)
2187         {
2188             status = CliSetGpuContext(session->handle, device->handle, &pGpu, NULL);
2189             if (status == NV_OK)
2190             {
2191                 status = vaspaceGetByHandleOrDeviceDefault(pClient, device->handle, gpuVaSpace->handle, &pVAS);
2192             }
2193         }
2194         rmapiLockRelease();
2195         if (status != NV_OK)
2196             goto cleanup_dup_vaspace;
2197     }
2198 
2199     if (!vaspaceIsExternallyOwned(pVAS))
2200     {
2201         status = NV_ERR_INVALID_FLAGS;
2202         goto cleanup_dup_vaspace;
2203     }
2204 
2205     status = getAddressSpaceInfo(gpuVaSpace, pGpu, vaSpaceInfo);
2206     if (status != NV_OK)
2207         goto cleanup_dup_vaspace;
2208 
2209     *vaSpace = gpuVaSpace;
2210 
2211     return NV_OK;
2212 
2213 cleanup_dup_vaspace:
2214     pRmApi->Free(pRmApi, session->handle, gpuVaSpace->handle);
2215 cleanup_vaspace:
2216     portSyncRwLockDestroy(gpuVaSpace->allocationsLock);
2217     portSyncRwLockDestroy(gpuVaSpace->cpuMappingsLock);
2218     portSyncRwLockDestroy(gpuVaSpace->physAllocationsLock);
2219     portMemFree(gpuVaSpace);
2220     return status;
2221 }
2222 
2223 // Get the NVLink connection status for the given device. On success, caller is
2224 // responsible of freeing the memory.
2225 static NV_STATUS allocNvlinkStatusForSubdevice(struct gpuDevice *device,
2226                                                NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS **nvlinkStatusOut)
2227 {
2228     NV_STATUS status;
2229     NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus;
2230     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
2231 
2232     *nvlinkStatusOut = NULL;
2233 
2234     nvlinkStatus = portMemAllocNonPaged(sizeof(*nvlinkStatus));
2235     if (nvlinkStatus == NULL)
2236         return NV_ERR_NO_MEMORY;
2237 
2238     portMemSet(nvlinkStatus, 0, sizeof(*nvlinkStatus));
2239     status = pRmApi->Control(pRmApi,
2240                              device->session->handle,
2241                              device->subhandle,
2242                              NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS,
2243                              nvlinkStatus,
2244                              sizeof(*nvlinkStatus));
2245     if (status == NV_ERR_NOT_SUPPORTED)
2246     {
2247         portMemSet(nvlinkStatus, 0, sizeof(*nvlinkStatus));
2248     }
2249     else if (status != NV_OK)
2250     {
2251         portMemFree(nvlinkStatus);
2252         NV_PRINTF(LEVEL_ERROR, "%s:%d: %s\n", __FUNCTION__,
2253                   __LINE__, nvstatusToString(status));
2254         return status;
2255     }
2256 
2257     *nvlinkStatusOut = nvlinkStatus;
2258 
2259     return NV_OK;
2260 }
2261 
2262 // If the given NvLink connection has a GPU device as an endpoint, return the
2263 // version of the NvLink connection with that GPU , and the maximum
2264 // unidirectional bandwidth in megabytes per second. Otherwise, return
2265 // NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID.
2266 static NvU32 getNvlinkConnectionToGpu(const NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus,
2267                                       OBJGPU *pGpu,
2268                                       NvU32 *linkBandwidthMBps)
2269 {
2270     NvU32 i;
2271 
2272     NvU32 version  = NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID;
2273     NvU32 domain   = gpuGetDomain(pGpu);
2274     NvU16 bus      = gpuGetBus(pGpu);
2275     NvU16 device   = gpuGetDevice(pGpu);
2276     NvU32 bwMBps   = 0;
2277 
2278     for (i = 0; i < NV2080_CTRL_NVLINK_MAX_LINKS; ++i)
2279     {
2280         if (((1 << i) & nvlinkStatus->enabledLinkMask) == 0)
2281             continue;
2282 
2283         if (!nvlinkStatus->linkInfo[i].connected)
2284             continue;
2285 
2286         // Skip loopback/loopout connections
2287         if (nvlinkStatus->linkInfo[i].loopProperty != NV2080_CTRL_NVLINK_STATUS_LOOP_PROPERTY_NONE)
2288             continue;
2289 
2290         if (nvlinkStatus->linkInfo[i].remoteDeviceInfo.deviceType == NV2080_CTRL_NVLINK_DEVICE_INFO_DEVICE_TYPE_GPU)
2291         {
2292             if ((nvlinkStatus->linkInfo[i].remoteDeviceInfo.deviceIdFlags &
2293                  NV2080_CTRL_NVLINK_DEVICE_INFO_DEVICE_ID_FLAGS_PCI) == 0)
2294             {
2295                 NV_PRINTF(LEVEL_ERROR, "No PCI information for GPU.\n");
2296                 continue;
2297             }
2298 
2299             if ((domain                   == nvlinkStatus->linkInfo[i].remoteDeviceInfo.domain) &&
2300                 (bus                      == nvlinkStatus->linkInfo[i].remoteDeviceInfo.bus)    &&
2301                 (device                   == nvlinkStatus->linkInfo[i].remoteDeviceInfo.device) &&
2302                 (pGpu->idInfo.PCIDeviceID == nvlinkStatus->linkInfo[i].remoteDeviceInfo.pciDeviceId))
2303             {
2304                 NvU32 capsTbl = nvlinkStatus->linkInfo[i].capsTbl;
2305 
2306                 NV_ASSERT(NV2080_CTRL_NVLINK_GET_CAP(((NvU8 *)&capsTbl), NV2080_CTRL_NVLINK_CAPS_P2P_ATOMICS));
2307 
2308                 if (bwMBps == 0)
2309                     version = nvlinkStatus->linkInfo[i].nvlinkVersion;
2310 
2311                 bwMBps += nvlinkStatus->linkInfo[i].nvlinkLineRateMbps;
2312                 NV_ASSERT(version == nvlinkStatus->linkInfo[i].nvlinkVersion);
2313             }
2314         }
2315     }
2316 
2317     *linkBandwidthMBps = bwMBps;
2318     if (version == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID)
2319         NV_ASSERT(*linkBandwidthMBps == 0);
2320 
2321     return version;
2322 }
2323 
2324 // If the given NvLink connection has a NPU device as an endpoint, return the
2325 // version of the NvLink connection with that NPU , and the maximum
2326 // unidirectional bandwidth in megabytes per second. Otherwise, return
2327 // NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID.
2328 static NvU32 getNvlinkConnectionToNpu(const NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus,
2329                                       NvBool *atomicSupported,
2330                                       NvU32 *linkBandwidthMBps)
2331 {
2332     NvU32 i;
2333     NvU32 bwMBps   = 0;
2334     NvU32 version  = NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID;
2335 
2336     *atomicSupported = NV_FALSE;
2337 
2338     for (i = 0; i < NV2080_CTRL_NVLINK_MAX_LINKS; ++i)
2339     {
2340         if (((1 << i) & nvlinkStatus->enabledLinkMask) == 0)
2341             continue;
2342 
2343         if (!nvlinkStatus->linkInfo[i].connected)
2344             continue;
2345 
2346         // Skip loopback/loopout connections
2347         if (nvlinkStatus->linkInfo[i].loopProperty != NV2080_CTRL_NVLINK_STATUS_LOOP_PROPERTY_NONE)
2348             continue;
2349 
2350         if (nvlinkStatus->linkInfo[i].remoteDeviceInfo.deviceType == NV2080_CTRL_NVLINK_DEVICE_INFO_DEVICE_TYPE_NPU)
2351         {
2352             NvU32 capsTbl = nvlinkStatus->linkInfo[i].capsTbl;
2353             NvBool atomicCap = !!NV2080_CTRL_NVLINK_GET_CAP(((NvU8 *)&capsTbl), NV2080_CTRL_NVLINK_CAPS_SYSMEM_ATOMICS);
2354 
2355             if (bwMBps == 0)
2356             {
2357                 *atomicSupported = atomicCap;
2358                 version = nvlinkStatus->linkInfo[i].nvlinkVersion;
2359             }
2360             bwMBps += nvlinkStatus->linkInfo[i].nvlinkLineRateMbps;
2361             NV_ASSERT(version == nvlinkStatus->linkInfo[i].nvlinkVersion);
2362             NV_ASSERT(*atomicSupported == atomicCap);
2363         }
2364     }
2365 
2366     *linkBandwidthMBps = bwMBps;
2367     if (version == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID)
2368         NV_ASSERT(*linkBandwidthMBps == 0);
2369 
2370     return version;
2371 }
2372 
2373 // If the given NvLink connection has a switch as an endpoint, return the
2374 // version of the NvLink connection with that switch, and the maximum
2375 // unidirectional bandwidth in megabytes per second. Otherwise, return
2376 // NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID.
2377 static NvU32 getNvlinkConnectionToSwitch(const NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus,
2378                                          NvU32 *linkBandwidthMBps)
2379 {
2380     NvU32 i;
2381     NvU32 bwMBps   = 0;
2382     NvU32 version  = NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID;
2383 
2384     for (i = 0; i < NV2080_CTRL_NVLINK_MAX_LINKS; ++i)
2385     {
2386         if (((1 << i) & nvlinkStatus->enabledLinkMask) == 0)
2387             continue;
2388 
2389         if (!nvlinkStatus->linkInfo[i].connected)
2390             continue;
2391 
2392         // Skip loopback/loopout connections
2393         if (nvlinkStatus->linkInfo[i].loopProperty != NV2080_CTRL_NVLINK_STATUS_LOOP_PROPERTY_NONE)
2394             continue;
2395 
2396         if (nvlinkStatus->linkInfo[i].remoteDeviceInfo.deviceType == NV2080_CTRL_NVLINK_DEVICE_INFO_DEVICE_TYPE_SWITCH)
2397         {
2398             if (bwMBps == 0)
2399                 version = nvlinkStatus->linkInfo[i].nvlinkVersion;
2400 
2401             bwMBps += nvlinkStatus->linkInfo[i].nvlinkLineRateMbps;
2402             NV_ASSERT(version == nvlinkStatus->linkInfo[i].nvlinkVersion);
2403        }
2404     }
2405 
2406     *linkBandwidthMBps = bwMBps;
2407     if (version == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID)
2408         NV_ASSERT(*linkBandwidthMBps == 0);
2409 
2410     return version;
2411 }
2412 
2413 // Compute whether the non-peer GPUs with the given NVLink connections can
2414 // communicate through P9 NPUs
2415 static NV_STATUS gpusHaveNpuNvlink(NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus1,
2416                                    NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus2,
2417                                    NvU32 *nvlinkVersion,
2418                                    NvU32 *linkBandwidthMBps)
2419 {
2420     NvU32 nvlinkVersion1, nvlinkVersion2;
2421     NvU32 tmpLinkBandwidthMBps;
2422     NvBool atomicSupported1, atomicSupported2;
2423 
2424     *nvlinkVersion = NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID;
2425 
2426     nvlinkVersion1 = getNvlinkConnectionToNpu(nvlinkStatus1,
2427                                               &atomicSupported1,
2428                                               &tmpLinkBandwidthMBps);
2429     nvlinkVersion2 = getNvlinkConnectionToNpu(nvlinkStatus2,
2430                                               &atomicSupported2,
2431                                               &tmpLinkBandwidthMBps);
2432 
2433     if (nvlinkVersion1 == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID ||
2434         nvlinkVersion2 == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID)
2435         return NV_OK;
2436 
2437     // Non-peer GPU communication over NPU is only supported on NVLink 2.0 or
2438     // greater
2439     if (nvlinkVersion1 == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_1_0 ||
2440         nvlinkVersion2 == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_1_0)
2441     {
2442         // NVLink1 devices cannot be mixed with other versions. NVLink3
2443         // supports mixing NVLink2 and NVLink3 devices
2444         NV_ASSERT(nvlinkVersion1 == nvlinkVersion2);
2445         return NV_OK;
2446     }
2447 
2448     NV_ASSERT(atomicSupported1);
2449     NV_ASSERT(atomicSupported2);
2450 
2451     // We do not explore the whole connectivity graph. We assume that NPUs
2452     // connected to NVLink2 (and greater) can forward memory requests so that
2453     // if GPU A is connected to NPU M and GPU B is connected to NPU N, A can
2454     // access B.
2455     *nvlinkVersion = NV_MIN(nvlinkVersion1, nvlinkVersion2);
2456 
2457     // Link bandwidth not provided because the intermediate link rate could
2458     // vary a lot with system topologies & current load, making this bandwidth
2459     // obsolete.
2460     *linkBandwidthMBps = 0;
2461 
2462     return NV_OK;
2463 }
2464 
2465 static NV_STATUS rmSystemP2PCapsControl(struct gpuDevice *device1,
2466                                         struct gpuDevice *device2,
2467                                         NV0000_CTRL_SYSTEM_GET_P2P_CAPS_V2_PARAMS *p2pCapsParams)
2468 {
2469     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
2470 
2471     portMemSet(p2pCapsParams, 0, sizeof(*p2pCapsParams));
2472     p2pCapsParams->gpuIds[0] = device1->gpuId;
2473     p2pCapsParams->gpuIds[1] = device2->gpuId;
2474     p2pCapsParams->gpuCount = 2;
2475 
2476     NvHandle handle = device1->session->handle;
2477     NV_STATUS status = pRmApi->Control(pRmApi,
2478                                        handle,
2479                                        handle,
2480                                        NV0000_CTRL_CMD_SYSTEM_GET_P2P_CAPS_V2,
2481                                        p2pCapsParams,
2482                                        sizeof(*p2pCapsParams));
2483     if (status != NV_OK)
2484     {
2485         NV_PRINTF(LEVEL_ERROR, "%s:%d: %s\n", __FUNCTION__,
2486                   __LINE__, nvstatusToString(status));
2487     }
2488 
2489     return status;
2490 }
2491 
2492 // Get R/W/A access capabilities and the link type between the two given GPUs
2493 static NV_STATUS getSystemP2PCaps(struct gpuDevice *device1,
2494                                   struct gpuDevice *device2,
2495                                   struct systemP2PCaps *p2pCaps)
2496 {
2497     NV0000_CTRL_SYSTEM_GET_P2P_CAPS_V2_PARAMS *p2pCapsParams = NULL;
2498     NV_STATUS status = NV_OK;
2499 
2500     p2pCapsParams = portMemAllocNonPaged(sizeof(*p2pCapsParams));
2501     if (p2pCapsParams == NULL)
2502     {
2503         status = NV_ERR_NO_MEMORY;
2504         goto done;
2505     }
2506 
2507     status = rmSystemP2PCapsControl(device1, device2, p2pCapsParams);
2508     if (status != NV_OK)
2509         goto done;
2510 
2511     portMemSet(p2pCaps, 0, sizeof(*p2pCaps));
2512     p2pCaps->peerIds[0] = p2pCapsParams->busPeerIds[0 * 2 + 1];
2513     p2pCaps->peerIds[1] = p2pCapsParams->busPeerIds[1 * 2 + 0];
2514     p2pCaps->nvlinkSupported = !!REF_VAL(NV0000_CTRL_SYSTEM_GET_P2P_CAPS_NVLINK_SUPPORTED, p2pCapsParams->p2pCaps);
2515     p2pCaps->atomicSupported = !!REF_VAL(NV0000_CTRL_SYSTEM_GET_P2P_CAPS_ATOMICS_SUPPORTED, p2pCapsParams->p2pCaps);
2516     p2pCaps->indirectAccessSupported = !!REF_VAL(NV0000_CTRL_SYSTEM_GET_P2P_CAPS_INDIRECT_NVLINK_SUPPORTED,
2517                                                  p2pCapsParams->p2pCaps);
2518 
2519     // TODO: Bug 1768805: Check both reads and writes since RM seems to be
2520     //       currently incorrectly reporting just the P2P write cap on some
2521     //       systems that cannot support P2P at all. See the bug for more
2522     //       details.
2523     if (REF_VAL(NV0000_CTRL_SYSTEM_GET_P2P_CAPS_READS_SUPPORTED, p2pCapsParams->p2pCaps) &&
2524         REF_VAL(NV0000_CTRL_SYSTEM_GET_P2P_CAPS_WRITES_SUPPORTED, p2pCapsParams->p2pCaps))
2525     {
2526         NV_ASSERT(!p2pCaps->indirectAccessSupported);
2527 
2528         p2pCaps->accessSupported = NV_TRUE;
2529     }
2530 
2531     if (p2pCaps->nvlinkSupported || p2pCaps->indirectAccessSupported)
2532     {
2533         // Exactly one CE is expected to be recommended for transfers between
2534         // NvLink peers
2535         NV_ASSERT(nvPopCount32(p2pCapsParams->p2pOptimalWriteCEs) == 1);
2536 
2537         // Query the write mask only; UVM has no use for the read mask
2538         p2pCaps->optimalNvlinkWriteCEs[0] = BIT_IDX_32(p2pCapsParams->p2pOptimalWriteCEs);
2539 
2540         // Query the P2P capabilities of device2->device1, which may be
2541         // different from those of device1->device2
2542         status = rmSystemP2PCapsControl(device2, device1, p2pCapsParams);
2543         if (status != NV_OK)
2544             goto done;
2545 
2546         NV_ASSERT(nvPopCount32(p2pCapsParams->p2pOptimalWriteCEs) == 1);
2547 
2548         p2pCaps->optimalNvlinkWriteCEs[1] = BIT_IDX_32(p2pCapsParams->p2pOptimalWriteCEs);
2549     }
2550 
2551 done:
2552     portMemFree(p2pCapsParams);
2553     return status;
2554 }
2555 
2556 // Return the NVLink P2P capabilities of the peer GPUs with the given devices
2557 static NV_STATUS getNvlinkP2PCaps(struct gpuDevice *device1,
2558                                   struct gpuDevice *device2,
2559                                   NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus1,
2560                                   NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus2,
2561                                   NvU32 *nvlinkVersion,
2562                                   NvU32 *linkBandwidthMBps)
2563 {
2564     NvU32 nvlinkVersion1, nvlinkVersion2;
2565     NvU32 linkBandwidthMBps1, linkBandwidthMBps2;
2566 
2567     *nvlinkVersion = NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID;
2568 
2569     if (device1->connectedToSwitch && device2->connectedToSwitch)
2570     {
2571         nvlinkVersion1 = getNvlinkConnectionToSwitch(nvlinkStatus1,
2572                                                      &linkBandwidthMBps1);
2573         nvlinkVersion2 = getNvlinkConnectionToSwitch(nvlinkStatus2,
2574                                                      &linkBandwidthMBps2);
2575     }
2576     else
2577     {
2578         OBJGPU *pGpu1, *pGpu2;
2579 
2580         pGpu1 = gpumgrGetGpuFromId(device1->gpuId);
2581         if (!pGpu1)
2582             return NV_ERR_OBJECT_NOT_FOUND;
2583 
2584         pGpu2 = gpumgrGetGpuFromId(device2->gpuId);
2585         if (!pGpu2)
2586             return NV_ERR_OBJECT_NOT_FOUND;
2587 
2588         nvlinkVersion1 = getNvlinkConnectionToGpu(nvlinkStatus1,
2589                                                   pGpu2,
2590                                                   &linkBandwidthMBps1);
2591         nvlinkVersion2 = getNvlinkConnectionToGpu(nvlinkStatus2,
2592                                                   pGpu1,
2593                                                   &linkBandwidthMBps2);
2594     }
2595 
2596     if (nvlinkVersion1 == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID ||
2597         nvlinkVersion2 == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID)
2598     {
2599         *linkBandwidthMBps = 0;
2600         return NV_OK;
2601     }
2602 
2603     // NVLink1 devices cannot be mixed with other versions. NVLink3 supports
2604     // mixing NVLink2 and NVLink3 devices. NVLink4 devices cannot be mixed with
2605     // prior NVLink versions.
2606     if (nvlinkVersion1 == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_1_0 ||
2607         nvlinkVersion2 == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_1_0 ||
2608         nvlinkVersion1 == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_4_0 ||
2609         nvlinkVersion2 == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_4_0)
2610     {
2611         NV_ASSERT(nvlinkVersion1 == nvlinkVersion2);
2612         NV_ASSERT(linkBandwidthMBps1 == linkBandwidthMBps2);
2613     }
2614 
2615     *nvlinkVersion = NV_MIN(nvlinkVersion1, nvlinkVersion2);
2616     *linkBandwidthMBps = NV_MIN(linkBandwidthMBps1, linkBandwidthMBps2);
2617 
2618     return NV_OK;
2619 }
2620 
2621 NV_STATUS nvGpuOpsGetP2PCaps(struct gpuDevice *device1,
2622                              struct gpuDevice *device2,
2623                              getP2PCapsParams *p2pCapsParams)
2624 {
2625     NV_STATUS status = NV_OK;
2626     NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus1 = NULL;
2627     NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus2 = NULL;
2628     struct systemP2PCaps p2pCaps;
2629 
2630     if (!device1 || !device2)
2631         return NV_ERR_INVALID_ARGUMENT;
2632 
2633     if (device1->session != device2->session)
2634         return NV_ERR_INVALID_ARGUMENT;
2635 
2636     if (!p2pCapsParams)
2637         return NV_ERR_INVALID_ARGUMENT;
2638 
2639     status = allocNvlinkStatusForSubdevice(device1, &nvlinkStatus1);
2640     if (status != NV_OK)
2641         goto cleanup;
2642 
2643     status = allocNvlinkStatusForSubdevice(device2, &nvlinkStatus2);
2644     if (status != NV_OK)
2645         goto cleanup;
2646 
2647     portMemSet(p2pCapsParams, 0, sizeof(*p2pCapsParams));
2648     p2pCapsParams->peerIds[0]      = (NvU32)-1;
2649     p2pCapsParams->peerIds[1]      = (NvU32)-1;
2650     p2pCapsParams->p2pLink         = UVM_LINK_TYPE_NONE;
2651     p2pCapsParams->indirectAccess  = NV_FALSE;
2652 
2653     status = getSystemP2PCaps(device1, device2, &p2pCaps);
2654     if (status != NV_OK)
2655         goto cleanup;
2656 
2657     if (p2pCaps.indirectAccessSupported)
2658     {
2659         NvU32 nvlinkVersion;
2660         NvU32 linkBandwidthMBps;
2661         NvU32 p2pLink;
2662 
2663         status = gpusHaveNpuNvlink(nvlinkStatus1,
2664                                    nvlinkStatus2,
2665                                    &nvlinkVersion,
2666                                    &linkBandwidthMBps);
2667         if (status != NV_OK)
2668             goto cleanup;
2669 
2670         p2pLink = rmControlToUvmNvlinkVersion(nvlinkVersion);
2671 
2672         NV_ASSERT(p2pLink >= UVM_LINK_TYPE_NVLINK_2);
2673         NV_ASSERT(linkBandwidthMBps == 0);
2674 
2675         p2pCapsParams->indirectAccess           = NV_TRUE;
2676         p2pCapsParams->p2pLink                  = p2pLink;
2677         p2pCapsParams->optimalNvlinkWriteCEs[0] = p2pCaps.optimalNvlinkWriteCEs[0];
2678         p2pCapsParams->optimalNvlinkWriteCEs[1] = p2pCaps.optimalNvlinkWriteCEs[1];
2679         p2pCapsParams->totalLinkLineRateMBps    = linkBandwidthMBps;
2680     }
2681     else if (p2pCaps.accessSupported)
2682     {
2683         p2pCapsParams->peerIds[0] = p2pCaps.peerIds[0];
2684         p2pCapsParams->peerIds[1] = p2pCaps.peerIds[1];
2685 
2686         if (p2pCaps.nvlinkSupported)
2687         {
2688             NvU32 nvlinkVersion;
2689             NvU32 linkBandwidthMBps;
2690 
2691             NV_ASSERT(p2pCaps.atomicSupported);
2692 
2693             status = getNvlinkP2PCaps(device1,
2694                                       device2,
2695                                       nvlinkStatus1,
2696                                       nvlinkStatus2,
2697                                       &nvlinkVersion,
2698                                       &linkBandwidthMBps);
2699             if (status != NV_OK)
2700                 goto cleanup;
2701 
2702             p2pCapsParams->p2pLink                  = rmControlToUvmNvlinkVersion(nvlinkVersion);
2703             p2pCapsParams->optimalNvlinkWriteCEs[0] = p2pCaps.optimalNvlinkWriteCEs[0];
2704             p2pCapsParams->optimalNvlinkWriteCEs[1] = p2pCaps.optimalNvlinkWriteCEs[1];
2705 
2706             NV_ASSERT(p2pCapsParams->p2pLink != UVM_LINK_TYPE_NONE);
2707             NV_ASSERT(linkBandwidthMBps != 0);
2708 
2709             p2pCapsParams->totalLinkLineRateMBps    = linkBandwidthMBps;
2710         }
2711         else
2712         {
2713             NvU32 linkBandwidthMBps1, linkBandwidthMBps2;
2714 
2715             status = getPCIELinkRateMBps(device1, &linkBandwidthMBps1);
2716             if (status != NV_OK)
2717                 goto cleanup;
2718 
2719             status = getPCIELinkRateMBps(device2, &linkBandwidthMBps2);
2720             if (status != NV_OK)
2721                 goto cleanup;
2722 
2723             p2pCapsParams->p2pLink               = UVM_LINK_TYPE_PCIE;
2724             p2pCapsParams->totalLinkLineRateMBps = NV_MIN(linkBandwidthMBps1, linkBandwidthMBps2);
2725         }
2726     }
2727 
2728 cleanup:
2729     portMemFree(nvlinkStatus1);
2730     portMemFree(nvlinkStatus2);
2731 
2732     return status;
2733 }
2734 
2735 static NV_STATUS nvGpuOpsGetExternalAllocP2pInfo(struct gpuSession *session,
2736                                                  NvU32 memOwnerGpuId,
2737                                                  NvU32 gpuId,
2738                                                  NvBool *isPeerSupported,
2739                                                  NvU32 *peerId)
2740 {
2741     NV_STATUS status = NV_OK;
2742     NV0000_CTRL_SYSTEM_GET_P2P_CAPS_V2_PARAMS *p2pCapsParams = NULL;
2743     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
2744 
2745     NV_ASSERT(gpuId != memOwnerGpuId);
2746 
2747     p2pCapsParams = portMemAllocNonPaged(sizeof(*p2pCapsParams));
2748     if (p2pCapsParams == NULL)
2749     {
2750         status = NV_ERR_NO_MEMORY;
2751         goto done;
2752     }
2753 
2754     portMemSet(p2pCapsParams, 0, sizeof(*p2pCapsParams));
2755     p2pCapsParams->gpuIds[0] = gpuId;
2756     p2pCapsParams->gpuIds[1] = memOwnerGpuId;
2757     p2pCapsParams->gpuCount = 2;
2758 
2759     status = pRmApi->Control(pRmApi,
2760                              session->handle,
2761                              session->handle,
2762                              NV0000_CTRL_CMD_SYSTEM_GET_P2P_CAPS_V2,
2763                              p2pCapsParams,
2764                              sizeof(*p2pCapsParams));
2765     if (status != NV_OK)
2766         goto done;
2767 
2768     *isPeerSupported =
2769             (REF_VAL(NV0000_CTRL_SYSTEM_GET_P2P_CAPS_WRITES_SUPPORTED, p2pCapsParams->p2pCaps) &&
2770              REF_VAL(NV0000_CTRL_SYSTEM_GET_P2P_CAPS_READS_SUPPORTED, p2pCapsParams->p2pCaps));
2771 
2772     *peerId = p2pCapsParams->busPeerIds[0 * 2 + 1];
2773 
2774 done:
2775     portMemFree(p2pCapsParams);
2776     return status;
2777 }
2778 
2779 static GMMU_APERTURE nvGpuOpsGetExternalAllocAperture(PMEMORY_DESCRIPTOR pMemDesc,
2780                                                       NvBool isIndirectPeerSupported,
2781                                                       NvBool isPeerSupported)
2782 {
2783     // Don't support both direct and indirect peers
2784     NV_ASSERT(!(isIndirectPeerSupported && isPeerSupported));
2785 
2786     // Get the aperture
2787     if (memdescGetAddressSpace(pMemDesc) == ADDR_FBMEM)
2788     {
2789         if (isIndirectPeerSupported)
2790             return GMMU_APERTURE_SYS_COH;
2791 
2792         if (isPeerSupported)
2793             return GMMU_APERTURE_PEER;
2794 
2795         return GMMU_APERTURE_VIDEO;
2796     }
2797     else if (
2798              (memdescGetAddressSpace(pMemDesc) == ADDR_FABRIC_MC) ||
2799              (memdescGetAddressSpace(pMemDesc) == ADDR_FABRIC_V2))
2800     {
2801         return GMMU_APERTURE_PEER;
2802     }
2803     else
2804     {
2805         return GMMU_APERTURE_SYS_COH;
2806     }
2807 }
2808 
2809 static NvBool nvGpuOpsGetExternalAllocVolatility(PMEMORY_DESCRIPTOR pMemDesc,
2810                                                  GMMU_APERTURE aperture,
2811                                                  NvBool isIndirectPeerSupported,
2812                                                  UvmRmGpuCachingType cachingType)
2813 {
2814     if (cachingType == UvmRmGpuCachingTypeDefault)
2815     {
2816         if (aperture == GMMU_APERTURE_PEER || isIndirectPeerSupported)
2817             return (memdescGetGpuP2PCacheAttrib(pMemDesc) == NV_MEMORY_UNCACHED) ? NV_TRUE : NV_FALSE;
2818         else
2819             return (memdescGetGpuCacheAttrib(pMemDesc) == NV_MEMORY_UNCACHED) ? NV_TRUE : NV_FALSE;
2820     }
2821     else if (cachingType == UvmRmGpuCachingTypeForceUncached)
2822     {
2823         return NV_TRUE;
2824     }
2825     else
2826     {
2827         return NV_FALSE;
2828     }
2829 }
2830 
2831 static NV_STATUS nvGpuOpsGetExternalAllocMappingAttribute(UvmRmGpuMappingType mappingType,
2832                                                           PMEMORY_DESCRIPTOR pMemDesc,
2833                                                           NvBool *readOnly,
2834                                                           NvBool *atomic)
2835 {
2836     *readOnly = NV_FALSE;
2837     *atomic = NV_FALSE;
2838 
2839     if (memdescGetFlag(pMemDesc, MEMDESC_FLAGS_DEVICE_READ_ONLY))
2840     {
2841         if (mappingType != UvmRmGpuMappingTypeDefault &&
2842             mappingType != UvmRmGpuMappingTypeReadOnly)
2843             return NV_ERR_INVALID_ACCESS_TYPE;
2844 
2845         *readOnly = NV_TRUE;
2846         *atomic = NV_FALSE;
2847     }
2848     else
2849     {
2850         *readOnly = (mappingType == UvmRmGpuMappingTypeReadOnly);
2851         *atomic = (mappingType == UvmRmGpuMappingTypeDefault ||
2852                    mappingType == UvmRmGpuMappingTypeReadWriteAtomic);
2853     }
2854 
2855     return NV_OK;
2856 }
2857 
2858 static NV_STATUS nvGpuOpsGetPteKind(OBJGPU *pMappingGpu,
2859                                     MemoryManager *pMemoryManager,
2860                                     PMEMORY_DESCRIPTOR pMemDesc,
2861                                     Memory *pMemory,
2862                                     gpuExternalMappingInfo *pGpuExternalMappingInfo,
2863                                     NvU32 *newKind)
2864 {
2865     NV_STATUS               status              = NV_OK;
2866     FB_ALLOC_PAGE_FORMAT    fbAllocPageFormat   = {0};
2867     NvU32                   ctagId;
2868 
2869     if (pGpuExternalMappingInfo->compressionType == UvmRmGpuCompressionTypeEnabledNoPlc)
2870     {
2871         if (memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE, *newKind))
2872         {
2873             status = memmgrChooseKind_HAL(pMappingGpu,
2874                                           pMemoryManager,
2875                                           &fbAllocPageFormat,
2876                                           NVOS32_ATTR_COMPR_DISABLE_PLC_ANY,
2877                                           newKind);
2878         }
2879         else
2880         {
2881             status = NV_ERR_INVALID_ARGUMENT;
2882         }
2883 
2884         if (status != NV_OK)
2885             return status;
2886     }
2887 
2888     if (pGpuExternalMappingInfo->formatType != UvmRmGpuFormatTypeDefault)
2889     {
2890         NV_ASSERT(pGpuExternalMappingInfo->elementBits != UvmRmGpuFormatElementBitsDefault);
2891 
2892         fbAllocPageFormat.attr = pMemory->Attr;
2893         fbAllocPageFormat.attr2 = pMemory->Attr2;
2894         fbAllocPageFormat.flags = pMemory->Flags;
2895         fbAllocPageFormat.type = pMemory->Type;
2896 
2897         switch (pGpuExternalMappingInfo->formatType)
2898         {
2899             case UvmRmGpuFormatTypeBlockLinear:
2900                 fbAllocPageFormat.attr = FLD_SET_DRF(OS32, _ATTR, _FORMAT, _BLOCK_LINEAR, fbAllocPageFormat.attr);
2901                 break;
2902             default:
2903                 break;
2904         }
2905 
2906         switch (pGpuExternalMappingInfo->elementBits)
2907         {
2908             case UvmRmGpuFormatElementBits8:
2909                 fbAllocPageFormat.attr = FLD_SET_DRF(OS32, _ATTR, _DEPTH, _8, fbAllocPageFormat.attr);
2910                 break;
2911             case UvmRmGpuFormatElementBits16:
2912                 fbAllocPageFormat.attr = FLD_SET_DRF(OS32, _ATTR, _DEPTH, _16, fbAllocPageFormat.attr);
2913                 break;
2914             // CUDA does not support 24-bit width
2915             case UvmRmGpuFormatElementBits32:
2916                 fbAllocPageFormat.attr = FLD_SET_DRF(OS32, _ATTR, _DEPTH, _32, fbAllocPageFormat.attr);
2917                 break;
2918             case UvmRmGpuFormatElementBits64:
2919                 fbAllocPageFormat.attr = FLD_SET_DRF(OS32, _ATTR, _DEPTH, _64, fbAllocPageFormat.attr);
2920                 break;
2921             case UvmRmGpuFormatElementBits128:
2922                 fbAllocPageFormat.attr = FLD_SET_DRF(OS32, _ATTR, _DEPTH, _128, fbAllocPageFormat.attr);
2923                 break;
2924             default:
2925                 break;
2926         }
2927 
2928         status = memmgrChooseKind_HAL(pMappingGpu, pMemoryManager, &fbAllocPageFormat,
2929                                       DRF_VAL(OS32, _ATTR, _COMPR, fbAllocPageFormat.attr),
2930                                       newKind);
2931         if (status != NV_OK)
2932         {
2933             NV_PRINTF(LEVEL_ERROR, "Invalid kind type (%x)\n", *newKind);
2934             return status;
2935         }
2936 
2937         //
2938         // Check that the memory descriptor already has allocated comptags
2939         // if the new mapping enables compression. Downgrade the kind if no
2940         // comptags are present.
2941         //
2942         ctagId = FB_HWRESID_CTAGID_VAL_FERMI(memdescGetHwResId(pMemDesc));
2943         if (memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE, *newKind) && !ctagId)
2944             *newKind = memmgrGetUncompressedKind_HAL(pMappingGpu, pMemoryManager, *newKind, NV_FALSE);
2945 
2946         if (*newKind == NV_MMU_PTE_KIND_INVALID)
2947             return NV_ERR_INVALID_ARGUMENT;
2948     }
2949     else
2950     {
2951         NV_ASSERT((pGpuExternalMappingInfo->elementBits == UvmRmGpuFormatElementBitsDefault) ||
2952                   (pGpuExternalMappingInfo->elementBits == UvmRmGpuFormatElementBits8));
2953     }
2954 
2955     return NV_OK;
2956 }
2957 
2958 static
2959 NV_STATUS
2960 nvGpuOpsMemGetPageSize
2961 (
2962     OBJGPU *pGpu,
2963     MEMORY_DESCRIPTOR *pMemDesc,
2964     NvU64  *pPageSize
2965 )
2966 {
2967     NvU64 pageSize;
2968     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
2969     NV_STATUS status;
2970 
2971     pageSize = memdescGetPageSize(pMemDesc, AT_GPU);
2972     if (pageSize == 0)
2973     {
2974         status = memmgrSetMemDescPageSize_HAL(pGpu,
2975                                               pMemoryManager,
2976                                               pMemDesc,
2977                                               AT_GPU,
2978                                               RM_ATTR_PAGE_SIZE_DEFAULT);
2979         if (status != NV_OK)
2980             return status;
2981 
2982         pageSize = memdescGetPageSize(pMemDesc, AT_GPU);
2983         NV_ASSERT(pageSize != 0);
2984     }
2985 
2986     *pPageSize = pageSize;
2987 
2988     return NV_OK;
2989 }
2990 
2991 static
2992 NV_STATUS
2993 nvGpuOpsBuildExternalAllocPtes
2994 (
2995     OBJVASPACE *pVAS,
2996     OBJGPU     *pMappingGpu,
2997     MEMORY_DESCRIPTOR *pMemDesc,
2998     Memory     *pMemory,
2999     NvU64       offset,
3000     NvU64       size,
3001     NvBool      isIndirectPeerSupported,
3002     NvBool      isPeerSupported,
3003     NvU32       peerId,
3004     gpuExternalMappingInfo *pGpuExternalMappingInfo
3005 )
3006 {
3007     NV_STATUS               status              = NV_OK;
3008     OBJGVASPACE            *pGVAS               = NULL;
3009     const GMMU_FMT         *pFmt                = NULL;
3010     const GMMU_FMT_PTE     *pPteFmt             = NULL;
3011     const MMU_FMT_LEVEL    *pLevelFmt           = NULL;
3012     GMMU_APERTURE           aperture;
3013     COMPR_INFO              comprInfo;
3014     GMMU_ENTRY_VALUE        pte                 = {{0}};
3015 
3016     NvU64         fabricBaseAddress   = NVLINK_INVALID_FABRIC_ADDR;
3017     NvU32         kind;
3018     NvU64         pageSize;
3019     NvU32         skipPteCount;
3020     NvBool        vol, atomic, readOnly;
3021     NvBool        encrypted, privileged;
3022     NvU64         iter, physAddr, mappingSize, pteCount;
3023     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pMappingGpu);
3024     KernelGmmu    *pKernelGmmu = GPU_GET_KERNEL_GMMU(pMappingGpu);
3025     NvU64          allocSize;
3026     NvBool         isCompressedKind;
3027     NvU64         *physicalAddresses = NULL;
3028     NvU32          newKind, oldKind;
3029     NvBool         kindChanged = NV_FALSE;
3030     NvU64          gpaOffset;
3031     NvBool         *isPLCable = NULL;
3032     NvU64          *guestPhysicalAddress = NULL;
3033     NvU64          mappingPageSize = pGpuExternalMappingInfo->mappingPageSize;
3034 
3035     NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc));
3036 
3037     status = nvGpuOpsMemGetPageSize(pMappingGpu,
3038                                     pMemDesc,
3039                                     &pageSize);
3040     if (status != NV_OK)
3041         return status;
3042 
3043     //
3044     // Default mappingPageSize to allocation's page size if passed as 0.
3045     // If mappingPageSize is non-zero, it must be a multiple of pageSize.
3046     // Also, mapping page size cannot be larger than alloc page size.
3047     //
3048     if (mappingPageSize == 0)
3049     {
3050         mappingPageSize = pageSize;
3051     }
3052     else if ((mappingPageSize > pageSize) ||
3053              (pageSize % mappingPageSize != 0))
3054     {
3055         return NV_ERR_INVALID_ARGUMENT;
3056     }
3057 
3058     // memdescGetSize returns the requested size of the allocation. But, the
3059     // actual allocation size could be larger than the requested size due
3060     // to alignment requirement. So, make sure the correct size is used.
3061     // Note, alignment can be greater than the pageSize.
3062     allocSize = RM_ALIGN_UP(pMemDesc->ActualSize, pageSize);
3063 
3064     if (offset >= allocSize)
3065         return NV_ERR_INVALID_BASE;
3066 
3067     if ((offset + size) > allocSize)
3068         return NV_ERR_INVALID_LIMIT;
3069 
3070     if ((size & (mappingPageSize - 1)) != 0)
3071         return NV_ERR_INVALID_ARGUMENT;
3072 
3073     if ((offset & (mappingPageSize - 1)) != 0)
3074         return NV_ERR_INVALID_ARGUMENT;
3075 
3076     pGVAS = dynamicCast(pVAS, OBJGVASPACE);
3077 
3078     // Get the GMMU format
3079     pFmt = gvaspaceGetGmmuFmt(pGVAS, pMappingGpu);
3080     pPteFmt = (GMMU_FMT_PTE*)pFmt->pPte;
3081     pLevelFmt = mmuFmtFindLevelWithPageShift(pFmt->pRoot, BIT_IDX_64(mappingPageSize));
3082 
3083     oldKind = newKind = memdescGetPteKindForGpu(pMemDesc, pMappingGpu);
3084     if (pMemory)
3085     {
3086         //
3087         // The physical memory layout can be specified after allocation using
3088         // UvmMapExternalAllocation, so the kind attribute needs to be computed
3089         // again
3090         //
3091         status = nvGpuOpsGetPteKind(pMappingGpu, pMemoryManager, pMemDesc, pMemory,
3092                                     pGpuExternalMappingInfo, &newKind);
3093 
3094         if (status != NV_OK)
3095             return status;
3096 
3097         if (oldKind != newKind)
3098         {
3099             memdescSetPteKindForGpu(pMemDesc, pMappingGpu, newKind);
3100             kindChanged = NV_TRUE;
3101         }
3102     }
3103 
3104     // Get the CompTag range and Kind.
3105     status = memmgrGetKindComprForGpu_HAL(pMemoryManager, pMemDesc, pMappingGpu, 0, &kind, &comprInfo);
3106     if (status != NV_OK)
3107         return status;
3108 
3109     if (kindChanged)
3110         memdescSetPteKindForGpu(pMemDesc, pMappingGpu, oldKind);
3111 
3112     aperture = nvGpuOpsGetExternalAllocAperture(pMemDesc, isIndirectPeerSupported, isPeerSupported);
3113 
3114     vol = nvGpuOpsGetExternalAllocVolatility(pMemDesc, aperture, isIndirectPeerSupported,
3115                                              pGpuExternalMappingInfo->cachingType);
3116 
3117     status = nvGpuOpsGetExternalAllocMappingAttribute(pGpuExternalMappingInfo->mappingType,
3118                                                       pMemDesc,
3119                                                       &readOnly,
3120                                                       &atomic);
3121     if (status != NV_OK)
3122         return status;
3123 
3124     encrypted = memdescGetFlag(pMemDesc, MEMDESC_FLAGS_ENCRYPTED);
3125 
3126     privileged = memdescGetFlag(pMemDesc, MEMDESC_FLAGS_GPU_PRIVILEGED);
3127 
3128     mappingSize = size ? size : allocSize;
3129 
3130     skipPteCount = pLevelFmt->entrySize / sizeof(NvU64);
3131 
3132     isCompressedKind = memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE, kind);
3133 
3134     //
3135     // Specifying mapping page size for compressed
3136     // allocations is not yet supported.
3137     //
3138     if (isCompressedKind && (pGpuExternalMappingInfo->mappingPageSize != 0) &&
3139         (pGpuExternalMappingInfo->mappingPageSize != pageSize))
3140     {
3141         return NV_ERR_NOT_SUPPORTED;
3142     }
3143 
3144     pteCount = NV_MIN((pGpuExternalMappingInfo->pteBufferSize / pLevelFmt->entrySize),
3145                       (mappingSize / mappingPageSize));
3146     if (!pteCount)
3147         return NV_ERR_BUFFER_TOO_SMALL;
3148 
3149     if (pFmt->version == GMMU_FMT_VERSION_3)
3150     {
3151         NvU32 ptePcfSw  = 0;
3152         NvU32 ptePcfHw  = 0;
3153 
3154          nvFieldSetBool(&pPteFmt->fldValid, NV_TRUE, pte.v8);
3155          gmmuFieldSetAperture(&pPteFmt->fldAperture, aperture, pte.v8);
3156          nvFieldSet32(&pPteFmt->fldKind, kind, pte.v8);
3157 
3158          ptePcfSw |= vol         ? (1 << SW_MMU_PCF_UNCACHED_IDX) : 0;
3159          ptePcfSw |= readOnly    ? (1 << SW_MMU_PCF_RO_IDX)       : 0;
3160          ptePcfSw |= !atomic     ? (1 << SW_MMU_PCF_NOATOMIC_IDX) : 0;
3161          ptePcfSw |= !privileged ? (1 << SW_MMU_PCF_REGULAR_IDX)  : 0;
3162 
3163          if ((memdescGetAddressSpace(pMemDesc) == ADDR_FABRIC_MC))
3164          {
3165              ptePcfSw |= (1 << SW_MMU_PCF_ACE_IDX);
3166          }
3167 
3168          NV_CHECK_OR_RETURN(LEVEL_ERROR,
3169                             (kgmmuTranslatePtePcfFromSw_HAL(pKernelGmmu, ptePcfSw, &ptePcfHw) == NV_OK),
3170                             NV_ERR_INVALID_ARGUMENT);
3171          nvFieldSet32(&pPteFmt->fldPtePcf, ptePcfHw, pte.v8);
3172     }
3173     else
3174     {
3175         if (nvFieldIsValid32(&pPteFmt->fldValid.desc))
3176             nvFieldSetBool(&pPteFmt->fldValid, NV_TRUE, pte.v8);
3177 
3178         if (nvFieldIsValid32(&pPteFmt->fldVolatile.desc))
3179             nvFieldSetBool(&pPteFmt->fldVolatile, vol, pte.v8);
3180 
3181         if (nvFieldIsValid32(&pPteFmt->fldPrivilege.desc))
3182             nvFieldSetBool(&pPteFmt->fldPrivilege, privileged, pte.v8);
3183 
3184         if (nvFieldIsValid32(&pPteFmt->fldEncrypted.desc))
3185             nvFieldSetBool(&pPteFmt->fldEncrypted, encrypted, pte.v8);
3186 
3187         if (nvFieldIsValid32(&pPteFmt->fldReadOnly.desc))
3188             nvFieldSetBool(&pPteFmt->fldReadOnly, readOnly, pte.v8);
3189 
3190         if (nvFieldIsValid32(&pPteFmt->fldWriteDisable.desc))
3191             nvFieldSetBool(&pPteFmt->fldWriteDisable, readOnly, pte.v8);
3192 
3193         if (nvFieldIsValid32(&pPteFmt->fldReadDisable.desc))
3194             nvFieldSetBool(&pPteFmt->fldReadDisable, NV_FALSE, pte.v8);
3195 
3196         if (nvFieldIsValid32(&pPteFmt->fldAtomicDisable.desc))
3197             nvFieldSetBool(&pPteFmt->fldAtomicDisable, !atomic, pte.v8);
3198 
3199         gmmuFieldSetAperture(&pPteFmt->fldAperture, aperture, pte.v8);
3200 
3201         if (!isCompressedKind)
3202         {
3203             nvFieldSet32(&pPteFmt->fldKind, kind, pte.v8);
3204             nvFieldSet32(&pPteFmt->fldCompTagLine, 0, pte.v8);
3205             if (nvFieldIsValid32(&pPteFmt->fldCompTagSubIndex))
3206                 nvFieldSet32(&pPteFmt->fldCompTagSubIndex, 0, pte.v8);
3207         }
3208     }
3209 
3210     if (aperture == GMMU_APERTURE_PEER)
3211     {
3212         FlaMemory* pFlaMemory = dynamicCast(pMemory, FlaMemory);
3213         nvFieldSet32(&pPteFmt->fldPeerIndex, peerId, pte.v8);
3214 
3215         if (
3216             (memdescGetAddressSpace(pMemDesc) == ADDR_FABRIC_MC) ||
3217             (memdescGetAddressSpace(pMemDesc) == ADDR_FABRIC_V2) || pFlaMemory)
3218         {
3219             //
3220             // Any fabric memory descriptors are pre-encoded with the fabric base address
3221             // use NVLINK_INVALID_FABRIC_ADDR to avoid encoding twice
3222             //
3223             fabricBaseAddress = NVLINK_INVALID_FABRIC_ADDR;
3224         }
3225         else
3226         {
3227             KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pMemDesc->pGpu);
3228             if (pKernelNvlink == NULL)
3229             {
3230                 fabricBaseAddress = NVLINK_INVALID_FABRIC_ADDR;
3231             }
3232             else
3233             {
3234                 fabricBaseAddress = knvlinkGetUniqueFabricBaseAddress(pMemDesc->pGpu, pKernelNvlink);
3235             }
3236         }
3237     }
3238 
3239     //
3240     // Both memdescGetPhysAddr() and kgmmuEncodePhysAddr() have pretty high overhead.
3241     // To avoid it, allocate an array for the physical addresses and use the
3242     // flavors of the APIs that work on multiple addresses at a time.
3243     //
3244     // Notably the pteBuffer array could be re-used for that, but it gets a bit
3245     // tricky if skipPteCount is greater than 1 so just keep it simple.
3246     //
3247     physicalAddresses = portMemAllocNonPaged((NvU32)pteCount * sizeof(*physicalAddresses));
3248     if (physicalAddresses == NULL)
3249         return NV_ERR_NO_MEMORY;
3250 
3251     //
3252     // Ask for physical addresses for the GPU being mapped as it may not be the
3253     // same as the GPU owning the memdesc. This matters for sysmem as accessing
3254     // it requires IOMMU mappings to be set up and these are different for each
3255     // GPU. The IOMMU mappings are currently added by nvGpuOpsDupMemory().
3256     //
3257     memdescGetPhysAddrsForGpu(pMemDesc, pMappingGpu, AT_GPU, offset, mappingPageSize,
3258                               pteCount, physicalAddresses);
3259     kgmmuEncodePhysAddrs(pKernelGmmu, aperture, physicalAddresses, fabricBaseAddress, pteCount);
3260 
3261 
3262     //
3263     // Get information whether given physical address needs PLCable kind
3264     //
3265     if (IS_VIRTUAL_WITH_SRIOV(pMappingGpu) &&
3266         gpuIsWarBug200577889SriovHeavyEnabled(pMappingGpu) &&
3267         isCompressedKind &&
3268         !memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_DISALLOW_PLC, comprInfo.kind))
3269     {
3270         guestPhysicalAddress = portMemAllocNonPaged((NvU32)pteCount * sizeof(*guestPhysicalAddress));
3271         if (guestPhysicalAddress == NULL)
3272         {
3273             status = NV_ERR_NO_MEMORY;
3274             goto done;
3275         }
3276 
3277         portMemSet(guestPhysicalAddress, 0, ((NvU32)pteCount * sizeof(*guestPhysicalAddress)));
3278 
3279         gpaOffset = offset;
3280         for (iter = 0; iter < pteCount; iter++)
3281         {
3282             guestPhysicalAddress[iter] = gpaOffset;
3283             gpaOffset += mappingPageSize;
3284         }
3285 
3286         isPLCable = portMemAllocNonPaged((NvU32)pteCount * sizeof(*isPLCable));
3287         if (isPLCable == NULL)
3288         {
3289             status = NV_ERR_NO_MEMORY;
3290             goto done;
3291         }
3292 
3293         portMemSet(isPLCable, 0, ((NvU32)pteCount * sizeof(*isPLCable)));
3294 
3295         NV_RM_RPC_GET_PLCABLE_ADDRESS_KIND(pMappingGpu, guestPhysicalAddress, mappingPageSize, (NvU32)pteCount,
3296                                            isPLCable, status);
3297         if (status != NV_OK)
3298             goto done;
3299     }
3300 
3301     for (iter = 0; iter < pteCount; iter++)
3302     {
3303         physAddr = physicalAddresses[iter];
3304 
3305         gmmuFieldSetAddress(gmmuFmtPtePhysAddrFld(pPteFmt, aperture),
3306                             physAddr,
3307                             pte.v8);
3308 
3309         if (isCompressedKind)
3310         {
3311             // We have to reset pte.v8 fields in care of partially compressed allocations
3312             // Otherwise, non-compressed PTEs will get bits from compressed PTEs
3313             if (pFmt->version <= GMMU_FMT_VERSION_2)
3314             {
3315                 NvBool bIsWarApplied = NV_FALSE;
3316                 NvU32  savedKind = comprInfo.kind;
3317                 MemoryManager  *pMemoryManager = GPU_GET_MEMORY_MANAGER(pMappingGpu);
3318                 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pMappingGpu);
3319                 const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig =
3320                     kmemsysGetStaticConfig(pMappingGpu, pKernelMemorySystem);
3321 
3322                 nvFieldSet32(&pPteFmt->fldKind, 0, pte.v8);
3323                 nvFieldSet32(&pPteFmt->fldCompTagLine, 0, pte.v8);
3324                 if (nvFieldIsValid32(&pPteFmt->fldCompTagSubIndex))
3325                     nvFieldSet32(&pPteFmt->fldCompTagSubIndex, 0, pte.v8);
3326 
3327                 if (pMemorySystemConfig->bUseRawModeComptaglineAllocation &&
3328                     pMemorySystemConfig->bDisablePlcForCertainOffsetsBug3046774 &&
3329                         !memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_DISALLOW_PLC, comprInfo.kind))
3330                 {
3331                     NvBool bEnablePlc = NV_TRUE;
3332 
3333                     if (IS_VIRTUAL_WITH_SRIOV(pMappingGpu) &&
3334                         gpuIsWarBug200577889SriovHeavyEnabled(pMappingGpu))
3335                     {
3336                         bEnablePlc = isPLCable[iter];
3337                     }
3338                     else
3339                     {
3340                         bEnablePlc = kmemsysIsPagePLCable_HAL(pMappingGpu, pKernelMemorySystem,
3341                                                               offset, mappingPageSize);
3342                     }
3343 
3344                     if (!bEnablePlc)
3345                     {
3346                         bIsWarApplied = NV_TRUE;
3347                         memmgrGetDisablePlcKind_HAL(pMemoryManager, &comprInfo.kind);
3348                     }
3349                 }
3350 
3351                 kgmmuFieldSetKindCompTags(GPU_GET_KERNEL_GMMU(pMappingGpu), pFmt, pLevelFmt, &comprInfo, physAddr,
3352                                           offset, mmuFmtVirtAddrToEntryIndex(pLevelFmt, offset), pte.v8);
3353                     //
3354                     // restore the kind to PLC if changd, since kind is associated with entire surface, and the WAR applies to
3355                     // individual pages in the surface.
3356                     if (bIsWarApplied)
3357                         comprInfo.kind = savedKind;
3358             }
3359         }
3360 
3361         portMemCopy(&pGpuExternalMappingInfo->pteBuffer[iter * skipPteCount], pLevelFmt->entrySize, pte.v8, pLevelFmt->entrySize);
3362 
3363         offset += mappingPageSize;
3364     }
3365 
3366     pGpuExternalMappingInfo->numWrittenPtes = pteCount;
3367     pGpuExternalMappingInfo->numRemainingPtes = (mappingSize / mappingPageSize) - pteCount;
3368     pGpuExternalMappingInfo->pteSize = pLevelFmt->entrySize;
3369 
3370 done:
3371     portMemFree(physicalAddresses);
3372 
3373     portMemFree(guestPhysicalAddress);
3374 
3375     portMemFree(isPLCable);
3376 
3377     return status;
3378 }
3379 
3380 NV_STATUS nvGpuOpsGetExternalAllocPtes(struct gpuAddressSpace *vaSpace,
3381                                        NvHandle hMemory,
3382                                        NvU64 offset,
3383                                        NvU64 size,
3384                                        gpuExternalMappingInfo *pGpuExternalMappingInfo)
3385 {
3386     NV_STATUS status = NV_OK;
3387     nvGpuOpsLockSet acquiredLocks;
3388     THREAD_STATE_NODE threadState;
3389     Memory *pMemory = NULL;
3390     PMEMORY_DESCRIPTOR pMemDesc = NULL;
3391     OBJGPU *pMappingGpu = NULL;
3392     NvU32 peerId = 0;
3393     NvBool isSliSupported = NV_FALSE;
3394     NvBool isPeerSupported = NV_FALSE;
3395     NvBool isIndirectPeerSupported = NV_FALSE;
3396     OBJVASPACE *pVAS = NULL;
3397     FlaMemory *pFlaMemory = NULL;
3398     OBJGPU    *pSrcGpu = NULL;
3399     OBJGPU    *pPeerGpu = NULL;
3400     RsClient  *pClient;
3401     MEMORY_DESCRIPTOR *pAdjustedMemDesc = NULL;
3402     FABRIC_VASPACE *pFabricVAS = NULL;
3403 
3404     if (!pGpuExternalMappingInfo || !hMemory || !vaSpace)
3405         return NV_ERR_INVALID_ARGUMENT;
3406 
3407     threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
3408     status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ,
3409                                       vaSpace->device->session->handle,
3410                                       &pClient,
3411                                       &acquiredLocks);
3412     if (status != NV_OK)
3413     {
3414         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
3415         return status;
3416     }
3417 
3418     status = vaspaceGetByHandleOrDeviceDefault(pClient,
3419                                                vaSpace->device->handle,
3420                                                vaSpace->handle,
3421                                                &pVAS);
3422     if (status != NV_OK)
3423         goto done;
3424 
3425     status = nvGpuOpsGetMemoryByHandle(vaSpace->device->session->handle,
3426                                        hMemory,
3427                                        &pMemory);
3428     if (status != NV_OK)
3429         goto done;
3430 
3431     // RM client allocations can't have multiple subDevice memdescs.
3432     pMemDesc = pMemory->pMemDesc;
3433     NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc));
3434 
3435     // Do not support mapping on anything other than sysmem/vidmem/fabric!
3436     if ((memdescGetAddressSpace(pMemDesc) != ADDR_SYSMEM) &&
3437         (memdescGetAddressSpace(pMemDesc) != ADDR_FBMEM)  &&
3438         (memdescGetAddressSpace(pMemDesc) != ADDR_FABRIC_MC) &&
3439         (memdescGetAddressSpace(pMemDesc) != ADDR_FABRIC_V2))
3440     {
3441         status = NV_ERR_NOT_SUPPORTED;
3442         goto done;
3443     }
3444 
3445     status = CliSetGpuContext(vaSpace->device->session->handle,
3446                               vaSpace->device->handle,
3447                               &pMappingGpu,
3448                               NULL);
3449     if (status != NV_OK)
3450         goto done;
3451 
3452     pAdjustedMemDesc = pMemDesc;
3453     pFabricVAS       = dynamicCast(pMappingGpu->pFabricVAS, FABRIC_VASPACE);
3454     if (pFabricVAS != NULL)
3455     {
3456         status = fabricvaspaceGetGpaMemdesc(pFabricVAS, pMemDesc, pMappingGpu, &pAdjustedMemDesc);
3457         if (status != NV_OK)
3458             goto done;
3459     }
3460 
3461     // Check if P2P supported
3462     if (
3463         (memdescGetAddressSpace(pAdjustedMemDesc) == ADDR_FABRIC_MC) ||
3464         (memdescGetAddressSpace(pAdjustedMemDesc) == ADDR_FABRIC_V2))
3465     {
3466         KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pMappingGpu);
3467 
3468         isPeerSupported = NV_TRUE;
3469         pPeerGpu        = pAdjustedMemDesc->pGpu;
3470         peerId          = BUS_INVALID_PEER;
3471 
3472         if (!memIsGpuMapAllowed(pMemory, pMappingGpu))
3473         {
3474             NV_PRINTF(LEVEL_ERROR,
3475                       "Mapping Gpu is not attached to the given memory object\n");
3476             status = NV_ERR_INVALID_STATE;
3477             goto freeGpaMemdesc;
3478         }
3479 
3480         if (pPeerGpu != NULL)
3481         {
3482             if ((pKernelNvlink != NULL) &&
3483                 knvlinkIsNvlinkP2pSupported(pMappingGpu, pKernelNvlink, pPeerGpu))
3484             {
3485                 peerId = kbusGetPeerId_HAL(pMappingGpu, GPU_GET_KERNEL_BUS(pMappingGpu), pPeerGpu);
3486             }
3487         }
3488         else
3489         {
3490             peerId = kbusGetNvSwitchPeerId_HAL(pMappingGpu,
3491                                                GPU_GET_KERNEL_BUS(pMappingGpu));
3492         }
3493 
3494         if (peerId == BUS_INVALID_PEER)
3495         {
3496             status = NV_ERR_INVALID_STATE;
3497             goto freeGpaMemdesc;
3498         }
3499     }
3500     else if (memdescGetAddressSpace(pAdjustedMemDesc) == ADDR_FBMEM &&
3501              (pAdjustedMemDesc->pGpu->gpuId != pMappingGpu->gpuId ||
3502               dynamicCast(pMemory, FlaMemory)))
3503     {
3504         if (gpumgrCheckIndirectPeer(pAdjustedMemDesc->pGpu, pMappingGpu))
3505         {
3506             isIndirectPeerSupported = NV_TRUE;
3507         }
3508         else
3509         {
3510             pFlaMemory = dynamicCast(pMemory, FlaMemory);
3511             if (pFlaMemory != NULL)
3512             {
3513                 pSrcGpu = gpumgrGetGpu(pFlaMemory->peerGpuInst);
3514                 if (!pSrcGpu)
3515                 {
3516                     status = NV_ERR_INVALID_ARGUMENT;
3517                     goto freeGpaMemdesc;
3518                 }
3519             }
3520 
3521             status = nvGpuOpsGetExternalAllocP2pInfo(vaSpace->device->session,
3522                                                        (pFlaMemory) ? (pSrcGpu->gpuId) :(pAdjustedMemDesc->pGpu->gpuId),
3523                                                        pMappingGpu->gpuId,
3524                                                        &isPeerSupported,
3525                                                        &peerId);
3526             if (status != NV_OK)
3527                 goto freeGpaMemdesc;
3528         }
3529 
3530         //
3531         // If GPUs are in the same SLI group, don't do peer mappings even if the GPUs are different. In SLI config,
3532         // if a caller can try to map a memory on a GPU other than the GPU which is associated with the memdesc,
3533         // always return local VIDMEM mapping because RM shares a memdesc among such GPUs for client allocations.
3534         // Note: This check could be avoided if we could know that pMemDesc->pGpu is always the SLI master i.e. same
3535         // as the pGPU returned by CliSetGpuContext.
3536         //
3537         if (!pFlaMemory && pAdjustedMemDesc->pGpu->deviceInstance == pMappingGpu->deviceInstance)
3538         {
3539             isPeerSupported = NV_FALSE;
3540             isSliSupported = NV_TRUE;
3541         }
3542 
3543         // Even if the RM returns P2P or indirect peer supported, make sure the GPUs are not from different SLI groups. See Bug# 759980.
3544         if ((isPeerSupported || isIndirectPeerSupported) &&
3545             (IsSLIEnabled(pMappingGpu) || IsSLIEnabled(pAdjustedMemDesc->pGpu)))
3546         {
3547             status = NV_ERR_NOT_SUPPORTED;
3548             goto freeGpaMemdesc;
3549         }
3550 
3551         NV_ASSERT(!(isPeerSupported && isSliSupported));
3552 
3553         // If a caller is trying to map VIDMEM on GPUs with no P2P support and are not in the same SLI group, error out.
3554         if (!isPeerSupported && !isIndirectPeerSupported && !isSliSupported)
3555         {
3556             status = NV_ERR_NOT_SUPPORTED;
3557             goto freeGpaMemdesc;
3558         }
3559     }
3560 
3561     status = nvGpuOpsBuildExternalAllocPtes(pVAS, pMappingGpu, pAdjustedMemDesc, pMemory, offset, size,
3562                                             isIndirectPeerSupported, isPeerSupported, peerId,
3563                                             pGpuExternalMappingInfo);
3564 
3565 freeGpaMemdesc:
3566     if (pAdjustedMemDesc != pMemDesc)
3567         fabricvaspacePutGpaMemdesc(pFabricVAS, pAdjustedMemDesc);
3568 
3569 done:
3570     _nvGpuOpsLocksRelease(&acquiredLocks);
3571     threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
3572     return status;
3573 }
3574 
3575 void nvGpuOpsAddressSpaceDestroy(struct gpuAddressSpace *vaSpace)
3576 {
3577     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
3578 
3579     NV_ASSERT(vaSpace->dummyGpuAlloc.refCount == 0);
3580 
3581     // free all the mallocs
3582     if (vaSpace->allocations)
3583     {
3584         portSyncRwLockAcquireWrite(vaSpace->allocationsLock);
3585         destroyAllGpuMemDescriptors(vaSpace->device->session->handle,
3586                                     vaSpace->allocations);
3587         portSyncRwLockReleaseWrite(vaSpace->allocationsLock);
3588     }
3589 
3590     // free all the physical allocations
3591     if (vaSpace->physAllocations)
3592     {
3593         portSyncRwLockAcquireWrite(vaSpace->physAllocationsLock);
3594         destroyAllGpuMemDescriptors(vaSpace->device->session->handle,
3595                                     vaSpace->physAllocations);
3596         portSyncRwLockReleaseWrite(vaSpace->physAllocationsLock);
3597     }
3598 
3599     // Destroy CPU mappings
3600     if (vaSpace->cpuMappings)
3601     {
3602         portSyncRwLockAcquireWrite(vaSpace->cpuMappingsLock);
3603         btreeDestroyData(vaSpace->cpuMappings);
3604         portSyncRwLockReleaseWrite(vaSpace->cpuMappingsLock);
3605     }
3606 
3607     if (vaSpace->handle)
3608         pRmApi->Free(pRmApi, vaSpace->device->session->handle, vaSpace->handle);
3609 
3610     portSyncRwLockDestroy(vaSpace->allocationsLock);
3611     portSyncRwLockDestroy(vaSpace->cpuMappingsLock);
3612     portSyncRwLockDestroy(vaSpace->physAllocationsLock);
3613 
3614     portMemFree(vaSpace);
3615 }
3616 
3617 static NV_STATUS nvGpuOpsAllocPhysical(struct gpuDevice *device,
3618                                        NvBool isSystemMemory,
3619                                        NvLength length,
3620                                        NvU64 *paOffset,
3621                                        gpuAllocInfo *allocInfo)
3622 {
3623     NV_MEMORY_ALLOCATION_PARAMS memAllocParams = {0};
3624     NV_STATUS status = NV_OK;
3625     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
3626 
3627     NvHandle physHandle  = 0;
3628 
3629     NV_ASSERT(allocInfo);
3630     NV_ASSERT(device);
3631     NV_ASSERT(paOffset);
3632 
3633     // then allocate the physical memory in either sysmem or fb.
3634     memAllocParams.owner = HEAP_OWNER_RM_KERNEL_CLIENT;
3635 
3636     // Physical allocations don't expect vaSpace handles
3637     memAllocParams.hVASpace = 0;
3638 
3639     // Reset previous offset
3640     memAllocParams.offset = 0;
3641 
3642     memAllocParams.size = length;
3643     memAllocParams.type = NVOS32_TYPE_IMAGE;
3644     memAllocParams.attr = isSystemMemory ?
3645                                       DRF_DEF(OS32, _ATTR, _LOCATION, _PCI) :
3646                                       DRF_DEF(OS32, _ATTR, _LOCATION, _VIDMEM);
3647 
3648     // Always enable caching for System Memory as all the currently supported
3649     // platforms are IO coherent.
3650     memAllocParams.attr |= isSystemMemory ?
3651                                        DRF_DEF(OS32, _ATTR, _COHERENCY, _CACHED):
3652                                        DRF_DEF(OS32, _ATTR, _COHERENCY, _UNCACHED);
3653 
3654     // Allocate contigous allocation if requested by client
3655     memAllocParams.attr |= allocInfo->bContiguousPhysAlloc ?
3656                                        DRF_DEF(OS32, _ATTR, _PHYSICALITY, _CONTIGUOUS):
3657                                        DRF_DEF(OS32, _ATTR, _PHYSICALITY, _DEFAULT);
3658 
3659     // Set pageSize for PA-allocation. RM default is Big page size
3660     switch (allocInfo->pageSize)
3661     {
3662         case RM_PAGE_SIZE:
3663             memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _4KB);
3664             break;
3665         case RM_PAGE_SIZE_64K:
3666         case RM_PAGE_SIZE_128K:
3667             memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _BIG);
3668             break;
3669         case RM_PAGE_SIZE_HUGE:
3670             memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _HUGE);
3671             memAllocParams.attr2 |= DRF_DEF(OS32, _ATTR2, _PAGE_SIZE_HUGE, _2MB);
3672             break;
3673         case RM_PAGE_SIZE_512M:
3674             memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _HUGE);
3675             memAllocParams.attr2 |= DRF_DEF(OS32, _ATTR2, _PAGE_SIZE_HUGE, _512MB);
3676             break;
3677         default:
3678             memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _DEFAULT);
3679             break;
3680     }
3681 
3682     // Do we need to allocate at top of FB
3683     if (allocInfo->bMemGrowsDown)
3684         memAllocParams.flags |= NVOS32_ALLOC_FLAGS_FORCE_MEM_GROWS_DOWN;
3685 
3686     // Ask RM to allocate persistent video memory
3687     if (!isSystemMemory && allocInfo->bPersistentVidmem)
3688         memAllocParams.flags |= NVOS32_ALLOC_FLAGS_PERSISTENT_VIDMEM;
3689 
3690     //
3691     // vid heap ctrl has a different policy as compared to other internal APIS
3692     // it expects the gpu lock to not be held. This means we have to drop the gpu lock
3693     // here. It is safe in this scenario because we still have the API lock and nothing
3694     // from a GPU interrupt can change anything in the OPS state.
3695     //
3696 
3697     physHandle = NV01_NULL_OBJECT;
3698     NV_ASSERT_OK_OR_GOTO(status, pRmApi->Alloc(pRmApi,
3699                                                 device->session->handle,
3700                                                 isSystemMemory ? device->handle : device->subhandle,
3701                                                 &physHandle,
3702                                                 isSystemMemory ? NV01_MEMORY_SYSTEM : NV01_MEMORY_LOCAL_USER,
3703                                                 &memAllocParams), done);
3704     if (allocInfo->bContiguousPhysAlloc)
3705         allocInfo->gpuPhysOffset = memAllocParams.offset;
3706 
3707     allocInfo->hPhysHandle = physHandle;
3708     *paOffset = (NvU64)allocInfo->gpuPhysOffset;
3709 
3710 done:
3711 
3712     if (status != NV_OK)
3713         pRmApi->Free(pRmApi, device->session->handle, physHandle);
3714 
3715     return status;
3716 }
3717 
3718 // The call allocates a virtual memory and associates a PA with it.
3719 static NV_STATUS nvGpuOpsAllocVirtual(struct gpuAddressSpace *vaSpace,
3720                                       NvLength length,
3721                                       NvU64 *vaOffset,
3722                                       NvHandle physHandle,
3723                                       struct allocFlags flags,
3724                                       gpuVaAllocInfo *allocInfo)
3725 {
3726     NV_MEMORY_ALLOCATION_PARAMS memAllocParams = { 0 };
3727     NV_STATUS status;
3728     gpuMemDesc *memDesc = NULL;
3729     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
3730 
3731     NV_ASSERT(allocInfo);
3732     NV_ASSERT(vaSpace);
3733     NV_ASSERT(vaOffset);
3734     NV_ASSERT(physHandle);
3735 
3736     memDesc = portMemAllocNonPaged(sizeof(*memDesc));
3737     if (memDesc == NULL)
3738         return NV_ERR_NO_MEMORY;
3739 
3740     // first allocate the virtual memory
3741 
3742     memAllocParams.owner = HEAP_OWNER_RM_KERNEL_CLIENT;
3743     memAllocParams.size = length;
3744     memAllocParams.type = NVOS32_TYPE_IMAGE;
3745     memAllocParams.alignment = allocInfo->alignment ? allocInfo->alignment : NV_GPU_SMALL_PAGESIZE;
3746     memAllocParams.flags = NVOS32_ALLOC_FLAGS_VIRTUAL |
3747                                        NVOS32_ALLOC_FLAGS_ALIGNMENT_FORCE |
3748                                        NVOS32_ALLOC_FLAGS_ALLOCATE_KERNEL_PRIVILEGED;
3749 
3750     // Set pageSize for VA-allocation. RM default is Big page size
3751     switch (allocInfo->pageSize)
3752     {
3753         case RM_PAGE_SIZE:
3754             memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _4KB);
3755             break;
3756         case RM_PAGE_SIZE_64K:
3757         case RM_PAGE_SIZE_128K:
3758             memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _BIG);
3759             break;
3760         case RM_PAGE_SIZE_HUGE:
3761             memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _HUGE);
3762             memAllocParams.attr2 |= DRF_DEF(OS32, _ATTR2, _PAGE_SIZE_HUGE, _2MB);
3763             break;
3764         case RM_PAGE_SIZE_512M:
3765             memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _HUGE);
3766             memAllocParams.attr2 |= DRF_DEF(OS32, _ATTR2, _PAGE_SIZE_HUGE, _512MB);
3767             break;
3768         default:
3769             memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _DEFAULT);
3770             break;
3771     }
3772 
3773     memAllocParams.hVASpace = vaSpace->handle;
3774 
3775     memDesc->handle = NV01_NULL_OBJECT;
3776     NV_ASSERT_OK_OR_GOTO(status, pRmApi->Alloc(pRmApi,
3777                                                 vaSpace->device->session->handle,
3778                                                 vaSpace->device->handle,
3779                                                 &memDesc->handle,
3780                                                 NV50_MEMORY_VIRTUAL,
3781                                                 &memAllocParams), done);
3782     memDesc->address = (NvU64)memAllocParams.offset;
3783     memDesc->size = length;
3784     memDesc->childHandle = physHandle;
3785 
3786     portSyncRwLockAcquireWrite(vaSpace->allocationsLock);
3787     status = trackDescriptor(&vaSpace->allocations, memDesc->address, memDesc);
3788     portSyncRwLockReleaseWrite(vaSpace->allocationsLock);
3789 
3790     if (status != NV_OK)
3791         goto done;
3792 
3793     // return the allocated GPU VA
3794     *vaOffset = memDesc->address;
3795 
3796 done:
3797 
3798     if (status != NV_OK)
3799         pRmApi->Free(pRmApi, vaSpace->device->session->handle, memDesc->handle);
3800 
3801     if ((status != NV_OK) && (memDesc != NULL))
3802         portMemFree(memDesc);
3803 
3804     return status;
3805 }
3806 
3807 // will need to support offset within allocation
3808 static NV_STATUS nvGpuOpsMapGpuMemory(struct gpuAddressSpace *vaSpace,
3809                                       NvU64 vaOffset,
3810                                       NvLength length,
3811                                       NvU64 pageSize,
3812                                       NvU64 *gpuOffset,
3813                                       struct allocFlags flags)
3814 {
3815     gpuMemDesc *memDescVa = NULL;
3816     NV_STATUS status;
3817     NvU64 mappedVa = 0;
3818     NvU32 mapFlags = 0;
3819     NvU64 mapPageSize = 0;
3820     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
3821 
3822     if (!vaSpace || !gpuOffset)
3823         return NV_ERR_INVALID_ARGUMENT;
3824 
3825     portSyncRwLockAcquireRead(vaSpace->allocationsLock);
3826     status = findDescriptor(vaSpace->allocations, vaOffset, (void**)&memDescVa);
3827     portSyncRwLockReleaseRead(vaSpace->allocationsLock);
3828     if (status != NV_OK)
3829         return status;
3830 
3831     NV_ASSERT(memDescVa);
3832     NV_ASSERT(memDescVa->handle);
3833     NV_ASSERT(memDescVa->childHandle);
3834 
3835     if (pageSize == RM_PAGE_SIZE)
3836     {
3837         mapPageSize |= DRF_DEF(OS46, _FLAGS, _PAGE_SIZE, _4KB);
3838     }
3839     else if (pageSize == RM_PAGE_SIZE_HUGE)
3840     {
3841         // TODO: this flag is ignored, remove it once it is deprecated
3842         mapPageSize |= DRF_DEF(OS46, _FLAGS, _PAGE_SIZE, _HUGE);
3843     }
3844     else
3845     {
3846        mapPageSize |= DRF_DEF(OS46, _FLAGS, _PAGE_SIZE, _DEFAULT);
3847     }
3848 
3849      // map the 2 surfaces
3850     mapFlags |= ((flags.bGetKernelVA) ? DRF_DEF(OS46, _FLAGS, _KERNEL_MAPPING, _ENABLE) :
3851             DRF_DEF(OS46, _FLAGS, _KERNEL_MAPPING, _NONE));
3852     mapFlags |= mapPageSize;
3853 
3854     // Always enable snooping as that's what's needed for sysmem allocations and
3855     // it's ignored for vidmem.
3856     mapFlags |= DRF_DEF(OS46, _FLAGS, _CACHE_SNOOP, _ENABLE);
3857 
3858     // map the 2 surfaces
3859     status = pRmApi->Map(pRmApi,
3860                          vaSpace->device->session->handle,
3861                          vaSpace->device->handle,
3862                          memDescVa->handle,
3863                          memDescVa->childHandle,
3864                          0,
3865                          length,
3866                          mapFlags,
3867                          &mappedVa);
3868     if (status != NV_OK)
3869         return status;
3870 
3871     NV_ASSERT(memDescVa->address == mappedVa);
3872 
3873     *gpuOffset = memDescVa->address;
3874 
3875     return NV_OK;
3876 }
3877 
3878 //
3879 // This function provides a gpu virtual address to a physical region
3880 // that can either be in sysmem or vidmem.
3881 //
3882 static NV_STATUS nvGpuOpsGpuMalloc(struct gpuAddressSpace *vaSpace,
3883                                    NvBool isSystemMemory,
3884                                    NvLength length,
3885                                    NvU64 *gpuOffset,
3886                                    struct allocFlags flags,
3887                                    gpuAllocInfo *allocInfo)
3888 {
3889     NV_STATUS status;
3890     NvU64 vaOffset = 0;
3891     NvU64 paOffset = 0;
3892     gpuVaAllocInfo vaAllocInfo = { 0 };
3893     NvHandle paMemDescHandle;
3894     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
3895 
3896     NV_ASSERT(allocInfo);
3897     NV_ASSERT(vaSpace);
3898     NV_ASSERT(gpuOffset);
3899 
3900     // Allocate physical memory first. So that we can associate PA with the memDesc of VA.
3901     // This simplifies tracking of VA and PA handles.
3902     status = nvGpuOpsAllocPhysical(vaSpace->device, isSystemMemory, length,
3903                                    &paOffset, allocInfo);
3904     if (status != NV_OK)
3905         return status;
3906 
3907     NV_ASSERT(allocInfo->hPhysHandle);
3908 
3909     paMemDescHandle = allocInfo->hPhysHandle;
3910     vaAllocInfo.pageSize = allocInfo->pageSize;
3911     vaAllocInfo.alignment = allocInfo->alignment;
3912 
3913     status = nvGpuOpsAllocVirtual(vaSpace, length, &vaOffset, paMemDescHandle,
3914                                   flags, &vaAllocInfo);
3915     if (status != NV_OK)
3916         goto cleanup_physical;
3917 
3918     status = nvGpuOpsMapGpuMemory(vaSpace, vaOffset, length,
3919                                   allocInfo->pageSize, gpuOffset, flags);
3920     if (status != NV_OK)
3921         goto cleanup_virtual;
3922 
3923     return NV_OK;
3924 
3925 cleanup_virtual:
3926     nvGpuOpsFreeVirtual(vaSpace, vaOffset);
3927 cleanup_physical:
3928     pRmApi->Free(pRmApi, vaSpace->device->session->handle, paMemDescHandle);
3929     return status;
3930 }
3931 
3932 static void nvGpuOpsFreeVirtual(struct gpuAddressSpace *vaSpace, NvU64 vaOffset)
3933 {
3934     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
3935     gpuMemDesc *memDescVa = NULL;
3936     portSyncRwLockAcquireWrite(vaSpace->allocationsLock);
3937     deleteDescriptor(&vaSpace->allocations, vaOffset, (void**)&memDescVa);
3938     portSyncRwLockReleaseWrite(vaSpace->allocationsLock);
3939     NV_ASSERT(memDescVa);
3940     pRmApi->Free(pRmApi, vaSpace->device->session->handle, memDescVa->handle);
3941     portMemFree(memDescVa);
3942 }
3943 
3944 NV_STATUS nvGpuOpsMemoryAllocFb(struct gpuAddressSpace *vaSpace,
3945                                 NvLength length,
3946                                 NvU64 *gpuOffset,
3947                                 gpuAllocInfo *allocInfo)
3948 {
3949     gpuAllocInfo allocInfoTemp = {0};
3950     gpuAllocInfo *pAllocInfo;
3951     struct allocFlags flags = {0};
3952 
3953     if (!vaSpace || !gpuOffset)
3954         return NV_ERR_INVALID_ARGUMENT;
3955 
3956     // Use default settings if user hasn't provided one.
3957     if (allocInfo == NULL)
3958     {
3959         pAllocInfo = &allocInfoTemp;
3960     }
3961     else
3962     {
3963         pAllocInfo = allocInfo;
3964     }
3965 
3966     return nvGpuOpsGpuMalloc(vaSpace, NV_FALSE, length, gpuOffset, flags,
3967                                pAllocInfo);
3968 }
3969 
3970 NV_STATUS nvGpuOpsMemoryAllocSys(struct gpuAddressSpace *vaSpace,
3971                                  NvLength length,
3972                                  NvU64 *gpuOffset,
3973                                  gpuAllocInfo *allocInfo)
3974 {
3975     gpuAllocInfo allocInfoTemp = {0};
3976     gpuAllocInfo *pAllocInfo;
3977     struct allocFlags flags = {0};
3978 
3979     if (!vaSpace || !gpuOffset)
3980         return NV_ERR_INVALID_ARGUMENT;
3981 
3982     // Use default settings if user hasn't provided one.
3983     if (allocInfo == NULL)
3984     {
3985         pAllocInfo = &allocInfoTemp;
3986     }
3987     else
3988     {
3989         pAllocInfo = allocInfo;
3990     }
3991 
3992     return nvGpuOpsGpuMalloc(vaSpace, NV_TRUE, length, gpuOffset, flags,
3993                              pAllocInfo);
3994 }
3995 
3996 NV_STATUS nvGpuOpsMemoryReopen(struct gpuAddressSpace *vaSpace,
3997                                NvHandle hSrcClient,
3998                                NvHandle hSrcAllocation,
3999                                NvLength length,
4000                                NvU64 *gpuOffset)
4001 {
4002     NV_STATUS status;
4003     NvHandle hAllocation = 0;
4004     gpuVaAllocInfo allocInfoTemp = { 0 };
4005     struct allocFlags flags = { 0 };
4006     NvU64 vaOffset;
4007     NvHandle hVirtual = 0;
4008     RsResourceRef *pResourceRef;
4009     NvU64 addressOffset = 0;
4010     NvHandle hParent;
4011     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
4012 
4013     // find device type
4014     // TODO: Acquired because serverutilGetResourceRef expects RMAPI lock. Necessary?
4015     status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS);
4016     if (status != NV_OK)
4017         return status;
4018 
4019     status = serverutilGetResourceRef(hSrcClient, hSrcAllocation, &pResourceRef);
4020     if (status != NV_OK)
4021     {
4022         rmapiLockRelease();
4023         return NV_ERR_OBJECT_NOT_FOUND;
4024     }
4025 
4026     if (!dynamicCast(pResourceRef->pResource, Memory))
4027     {
4028         rmapiLockRelease();
4029         return NV_ERR_INVALID_OBJECT_HANDLE;
4030     }
4031 
4032     hParent = pResourceRef->pParentRef ? pResourceRef->pParentRef->hResource : 0;
4033 
4034     status = serverutilGetResourceRef(hSrcClient, hParent, &pResourceRef);
4035     rmapiLockRelease();
4036     if (status != NV_OK || !dynamicCast(pResourceRef->pResource, Device))
4037         return NV_ERR_GENERIC;
4038 
4039     if (!vaSpace || !gpuOffset || !hSrcAllocation || !hSrcClient)
4040         return NV_ERR_INVALID_ARGUMENT;
4041 
4042     // Dup the physical memory object
4043     hAllocation = NV01_NULL_OBJECT;
4044     status = pRmApi->DupObject(pRmApi,
4045                                vaSpace->device->session->handle,
4046                                vaSpace->device->handle,
4047                                &hAllocation,
4048                                hSrcClient,
4049                                hSrcAllocation,
4050                                NV04_DUP_HANDLE_FLAGS_REJECT_KERNEL_DUP_PRIVILEGE);
4051     if (status != NV_OK)
4052         return status;
4053 
4054     // Associate the duped object with the newly created virtual memory object
4055     status = nvGpuOpsAllocVirtual(vaSpace, length, &vaOffset, hAllocation,
4056                                   flags, &allocInfoTemp);
4057     if (status != NV_OK)
4058         goto cleanup_dup;
4059 
4060     status = getHandleForVirtualAddr(vaSpace, vaOffset, NV_FALSE, &hVirtual);
4061     if (status != NV_OK)
4062         goto cleanup_virt_allocation;
4063 
4064     // map the memory
4065     status = pRmApi->Map(pRmApi,
4066                          vaSpace->device->session->handle,
4067                          vaSpace->device->handle,
4068                          hVirtual,
4069                          hAllocation,
4070                          0,
4071                          length,
4072                          0,
4073                          &addressOffset);
4074     if (status != NV_OK)
4075         goto cleanup_virt_allocation;
4076 
4077     NV_ASSERT((vaOffset == addressOffset) && "nvGpuOpsMemoryReopen: VA offset Mistmatch!");
4078 
4079     // return the mapped GPU pointer
4080     *gpuOffset = vaOffset;
4081 
4082     return NV_OK;
4083 
4084 cleanup_virt_allocation:
4085     nvGpuOpsFreeVirtual(vaSpace, vaOffset);
4086 cleanup_dup:
4087     pRmApi->Free(pRmApi, vaSpace->device->session->handle, hAllocation);
4088     return status;
4089 }
4090 
4091 NV_STATUS nvGpuOpsPmaAllocPages(void *pPma, NvLength pageCount, NvU64 pageSize,
4092                                 gpuPmaAllocationOptions *pPmaAllocOptions,
4093                                 NvU64 *pPages)
4094 {
4095     NV_STATUS status;
4096     gpuPmaAllocationOptions pmaAllocOptionsTemp = {0};
4097     gpuPmaAllocationOptions *pAllocInfo;
4098     THREAD_STATE_NODE threadState;
4099 
4100     if (!pPma || !pPages)
4101         return NV_ERR_INVALID_ARGUMENT;
4102 
4103     threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
4104 
4105     // Use default settings if user hasn't provided one.
4106     if (NULL == pPmaAllocOptions)
4107     {
4108         pAllocInfo = &pmaAllocOptionsTemp;
4109     }
4110     else
4111     {
4112         pAllocInfo = pPmaAllocOptions;
4113     }
4114 
4115     // Invoke PMA module to alloc pages.
4116     status = pmaAllocatePages((PMA *)pPma,
4117                               pageCount,
4118                               pageSize,
4119                               (PMA_ALLOCATION_OPTIONS *)pAllocInfo,
4120                               pPages);
4121 
4122     threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
4123     return status;
4124 }
4125 
4126 //
4127 // When this API is called from UVM as part of PMA eviction, the thread state
4128 // should have been initialized already and recursive re-init needs to be
4129 // skipped as it's not supported.
4130 //
4131 NV_STATUS nvGpuOpsPmaPinPages(void *pPma,
4132                               NvU64 *pPages,
4133                               NvLength pageCount,
4134                               NvU64 pageSize,
4135                               NvU32 flags)
4136 {
4137     NV_STATUS status;
4138     THREAD_STATE_NODE threadState;
4139     NvBool pmaEvictionCall = (flags & UVM_PMA_CALLED_FROM_PMA_EVICTION) != 0;
4140 
4141     if (!pPma || !pPages)
4142         return NV_ERR_INVALID_ARGUMENT;
4143 
4144     if (!pmaEvictionCall)
4145         threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
4146 
4147     // Invoke PMA module to Pin pages.
4148     status = pmaPinPages((PMA *)pPma, pPages, pageCount, pageSize);
4149 
4150     if (!pmaEvictionCall)
4151         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
4152     return status;
4153 }
4154 
4155 NV_STATUS nvGpuOpsPmaUnpinPages(void *pPma,
4156                                 NvU64 *pPages,
4157                                 NvLength pageCount,
4158                                 NvU64 pageSize)
4159 {
4160     NV_STATUS status;
4161     THREAD_STATE_NODE threadState;
4162     threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
4163 
4164     if (!pPma || !pPages)
4165         return NV_ERR_INVALID_ARGUMENT;
4166 
4167     // Invoke PMA module to Unpin pages.
4168     status = pmaUnpinPages((PMA *)pPma, pPages, pageCount, pageSize);
4169 
4170     threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
4171     return status;
4172 }
4173 
4174 //
4175 // When this API is called from UVM as part of PMA eviction, the thread state
4176 // should have been initialized already and recursive re-init needs to be
4177 // skipped as it's not supported.
4178 //
4179 void nvGpuOpsPmaFreePages(void *pPma,
4180                           NvU64 *pPages,
4181                           NvLength pageCount,
4182                           NvU64 pageSize,
4183                           NvU32 flags)
4184 {
4185     THREAD_STATE_NODE threadState;
4186     NvU32 pmaFreeFlag = ((flags & UVM_PMA_FREE_IS_ZERO) ? PMA_FREE_SKIP_SCRUB : 0);
4187     NvBool pmaEvictionCall = (flags & UVM_PMA_CALLED_FROM_PMA_EVICTION) != 0;
4188 
4189     if (!pmaEvictionCall)
4190         threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
4191 
4192     if (!pPma || !pPages)
4193         return;
4194 
4195     // Invoke PMA module to free pages.
4196     if (flags & UVM_PMA_ALLOCATE_CONTIGUOUS)
4197         pmaFreePages((PMA *)pPma, pPages, 1, pageCount * pageSize, pmaFreeFlag);
4198     else
4199         pmaFreePages((PMA *)pPma, pPages, pageCount, pageSize, pmaFreeFlag);
4200 
4201     if (!pmaEvictionCall)
4202         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
4203 }
4204 
4205 static NV_STATUS nvGpuOpsChannelGetHwChannelId(struct gpuChannel *channel,
4206                                                NvU32 *hwChannelId)
4207 {
4208     NV0080_CTRL_FIFO_GET_CHANNELLIST_PARAMS params = {0};
4209     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
4210 
4211     params.numChannels        = 1;
4212     params.pChannelHandleList = NV_PTR_TO_NvP64(&channel->channelHandle);
4213     params.pChannelList       = NV_PTR_TO_NvP64(hwChannelId);
4214 
4215     return pRmApi->Control(pRmApi,
4216                            channel->vaSpace->device->session->handle,
4217                            channel->vaSpace->device->handle,
4218                            NV0080_CTRL_CMD_FIFO_GET_CHANNELLIST,
4219                            &params,
4220                            sizeof(params));
4221 }
4222 
4223 static void gpuDeviceUnmapCpuFreeHandle(struct gpuDevice *device,
4224                                         NvHandle handle,
4225                                         void *ptr,
4226                                         NvU32 flags)
4227 {
4228     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
4229     struct gpuSession *session = device->session;
4230 
4231     // Unmap the pointer
4232     if (ptr)
4233     {
4234         NV_STATUS status;
4235         const NvU32 pid = osGetCurrentProcess();
4236 
4237         status = pRmApi->UnmapFromCpu(pRmApi, session->handle, device->subhandle, handle, ptr, flags, pid);
4238         NV_ASSERT(status == NV_OK);
4239     }
4240 
4241     // Free the handle
4242     if (handle)
4243         pRmApi->Free(pRmApi, session->handle, handle);
4244 }
4245 
4246 static void gpuDeviceDestroyUsermodeRegion(struct gpuDevice *device)
4247 {
4248     subDeviceDesc *rmSubDevice = device->rmSubDevice;
4249 
4250     gpuDeviceUnmapCpuFreeHandle(device,
4251                                 rmSubDevice->clientRegionHandle,
4252                                 (void *)rmSubDevice->clientRegionMapping,
4253                                 0);
4254 }
4255 
4256 static NV_STATUS gpuDeviceMapUsermodeRegion(struct gpuDevice *device)
4257 {
4258     NV_STATUS status = NV_OK;
4259     NvHandle regionHandle = 0;
4260     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
4261     struct gpuSession *session = device->session;
4262     subDeviceDesc *rmSubDevice = device->rmSubDevice;
4263     NvU32 usermodeClass = VOLTA_USERMODE_A;
4264     void *pParams = NULL;
4265     NV_HOPPER_USERMODE_A_PARAMS hopperParams =
4266     {
4267         .bBar1Mapping = NV_TRUE,
4268         .bPriv = NV_FALSE
4269     };
4270 
4271     if (device->rmDevice->arch >= GPU_ARCHITECTURE_HOPPER)
4272     {
4273         usermodeClass = HOPPER_USERMODE_A;
4274         pParams = &hopperParams;
4275     }
4276 
4277     NV_ASSERT(isDeviceVoltaPlus(device));
4278     NV_ASSERT(rmSubDevice->clientRegionHandle == 0 && rmSubDevice->clientRegionMapping == NULL);
4279 
4280     regionHandle = NV01_NULL_OBJECT;
4281     status = pRmApi->Alloc(pRmApi,
4282                            session->handle,
4283                            device->subhandle,
4284                            &regionHandle,
4285                            usermodeClass,
4286                            pParams);
4287     if (NV_OK != status)
4288         return status;
4289 
4290     status = pRmApi->MapToCpu(pRmApi,
4291                               session->handle,
4292                               device->subhandle,
4293                               regionHandle,
4294                               0,
4295                               NVC361_NV_USERMODE__SIZE,
4296                               (void **)(&rmSubDevice->clientRegionMapping),
4297                               DRF_DEF(OS33, _FLAGS, _ACCESS, _WRITE_ONLY));
4298     if (NV_OK != status)
4299         goto failure_case;
4300 
4301     rmSubDevice->clientRegionHandle = regionHandle;
4302     return status;
4303 
4304 failure_case:
4305     pRmApi->Free(pRmApi, device->session->handle, regionHandle);
4306     return status;
4307 }
4308 
4309 //
4310 // In Volta+, a channel can submit work by "ringing a doorbell" on the gpu after
4311 // updating the GP_PUT. The doorbell is a register mapped in the client's address
4312 // space and can be shared by all channels in that address space. Each channel writes
4313 // a channel-specific token to the doorbell to trigger the work.
4314 //
4315 static NV_STATUS nvGpuOpsGetWorkSubmissionInfo(struct gpuAddressSpace *vaSpace,
4316                                                struct gpuChannel *channel)
4317 {
4318     NV_STATUS status = NV_OK;
4319     NVC36F_CTRL_CMD_GPFIFO_GET_WORK_SUBMIT_TOKEN_PARAMS params = {0};
4320     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
4321     struct gpuDevice *device = vaSpace->device;
4322     struct gpuSession *session = device->session;
4323     subDeviceDesc *rmSubDevice = device->rmSubDevice;
4324 
4325     // Only valid for VOLTA+ (sub)Devices.
4326     NV_ASSERT(isDeviceVoltaPlus(vaSpace->device));
4327 
4328     // Now get the token for submission on given channel.
4329     status = pRmApi->Control(pRmApi,
4330                              session->handle,
4331                              channel->channelHandle,
4332                              NVC36F_CTRL_CMD_GPFIFO_GET_WORK_SUBMIT_TOKEN,
4333                              &params,
4334                              sizeof(params));
4335     if (status != NV_OK)
4336         return status;
4337 
4338     channel->workSubmissionOffset = (NvU32 *)((NvU8*)rmSubDevice->clientRegionMapping + NVC361_NOTIFY_CHANNEL_PENDING);
4339     channel->workSubmissionToken = params.workSubmitToken;
4340 
4341     //
4342     // pWorkSubmissionToken cannot be NULL even if errorNotifier is NULL.
4343     // errorNotifier is checked for NULL previously, so just an assert is
4344     // sufficient.
4345     //
4346     NV_ASSERT_OR_RETURN((channel->errorNotifier != NULL), NV_ERR_INVALID_POINTER);
4347 
4348     channel->pWorkSubmissionToken =
4349         (NvU32 *)((NvU8 *)channel->errorNotifier +
4350             (NV_CHANNELGPFIFO_NOTIFICATION_TYPE_WORK_SUBMIT_TOKEN * sizeof(NvNotification)) +
4351             NV_OFFSETOF(NvNotification, info32));
4352 
4353     return status;
4354 }
4355 
4356 static NvBool channelNeedsDummyAlloc(struct gpuChannel *channel)
4357 {
4358     return channel->gpPutLoc == UVM_BUFFER_LOCATION_SYS && deviceNeedsDummyAlloc(channel->vaSpace->device);
4359 }
4360 
4361 static NV_STATUS channelRetainDummyAlloc(struct gpuChannel *channel, gpuChannelInfo *channelInfo)
4362 {
4363     struct gpuAddressSpace *vaSpace = channel->vaSpace;
4364     NV_STATUS status;
4365 
4366     if (!channelNeedsDummyAlloc(channel))
4367         return NV_OK;
4368 
4369     status = nvGpuOpsVaSpaceRetainDummyAlloc(vaSpace);
4370     if (status != NV_OK)
4371         return status;
4372 
4373     channel->retainedDummyAlloc = NV_TRUE;
4374     channelInfo->dummyBar1Mapping = vaSpace->dummyGpuAlloc.cpuAddr;
4375 
4376     return NV_OK;
4377 }
4378 
4379 static void channelReleaseDummyAlloc(struct gpuChannel *channel)
4380 {
4381     if (channel != NULL && channel->retainedDummyAlloc)
4382     {
4383         NV_ASSERT(channelNeedsDummyAlloc(channel));
4384         nvGpuOpsVaSpaceReleaseDummyAlloc(channel->vaSpace);
4385     }
4386 }
4387 
4388 static RM_ENGINE_TYPE channelEngineType(const struct gpuChannel *channel)
4389 {
4390     if (channel->engineType == UVM_GPU_CHANNEL_ENGINE_TYPE_CE)
4391         return RM_ENGINE_TYPE_COPY(channel->engineIndex);
4392     else if (channel->engineType == UVM_GPU_CHANNEL_ENGINE_TYPE_SEC2)
4393         return RM_ENGINE_TYPE_SEC2;
4394     else
4395         return RM_ENGINE_TYPE_GR(channel->engineIndex);
4396 }
4397 
4398 static NV_STATUS channelAllocate(struct gpuAddressSpace *vaSpace,
4399                                  UVM_GPU_CHANNEL_ENGINE_TYPE engineType,
4400                                  const gpuChannelAllocParams *params,
4401                                  struct gpuChannel **channelHandle,
4402                                  gpuChannelInfo *channelInfo)
4403 {
4404     NV_STATUS status;
4405     struct gpuChannel *channel = NULL;
4406     struct gpuDevice *device = NULL;
4407     struct gpuSession *session = NULL;
4408     void *cpuMap = NULL;
4409     NvHandle hErrorNotifier;
4410     struct ChannelAllocInfo *pAllocInfo = NULL;
4411     void *gpfifoCtrl = NULL;
4412     PCLI_DMA_MAPPING_INFO pDmaMappingInfo = NULL;
4413     struct allocFlags flags = {0};
4414     OBJGPU *pGpu = NULL;
4415     KernelFifo *pKernelFifo = NULL;
4416     NvU32 pid = osGetCurrentProcess();
4417     NvU32 subdeviceInstance;
4418     UVM_BUFFER_LOCATION gpFifoLoc;
4419     UVM_BUFFER_LOCATION gpPutLoc;
4420     NvLength gpFifoSize, errorNotifierSize;
4421     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
4422 
4423     if (!vaSpace || !channelHandle || !params || !channelInfo)
4424         return NV_ERR_INVALID_ARGUMENT;
4425 
4426     if (params->numGpFifoEntries == 0)
4427         return NV_ERR_INVALID_ARGUMENT;
4428 
4429     if (engineType != UVM_GPU_CHANNEL_ENGINE_TYPE_CE &&
4430         engineType != UVM_GPU_CHANNEL_ENGINE_TYPE_GR &&
4431         engineType != UVM_GPU_CHANNEL_ENGINE_TYPE_SEC2)
4432         return NV_ERR_INVALID_ARGUMENT;
4433 
4434     // TODO: Bug 2458492: Ampere-SMC Verify GR/CE indices within partition/SMC Engine
4435 
4436     device = vaSpace->device;
4437     NV_ASSERT(device);
4438     session = device->session;
4439     NV_ASSERT(session);
4440 
4441     // Set location defaults
4442     gpFifoLoc = UVM_BUFFER_LOCATION_SYS;
4443     if (device->fbInfo.bZeroFb)
4444         gpPutLoc = UVM_BUFFER_LOCATION_SYS;
4445     else
4446         gpPutLoc = UVM_BUFFER_LOCATION_VID;
4447 
4448     if (isDeviceVoltaPlus(device))
4449     {
4450         if (params->gpFifoLoc > UVM_BUFFER_LOCATION_VID)
4451             return NV_ERR_INVALID_ARGUMENT;
4452         if (params->gpPutLoc > UVM_BUFFER_LOCATION_VID)
4453             return NV_ERR_INVALID_ARGUMENT;
4454 
4455         if (params->gpFifoLoc != UVM_BUFFER_LOCATION_DEFAULT)
4456             gpFifoLoc = params->gpFifoLoc;
4457         if (params->gpPutLoc != UVM_BUFFER_LOCATION_DEFAULT)
4458             gpPutLoc = params->gpPutLoc;
4459     }
4460     else
4461     {
4462         // GPFIFO needs to be placed in sysmem on Pascal and
4463         // pre-Pascal devices (Bug 1750713)
4464         if (params->gpFifoLoc != UVM_BUFFER_LOCATION_DEFAULT || params->gpPutLoc != UVM_BUFFER_LOCATION_DEFAULT)
4465             return NV_ERR_INVALID_ARGUMENT;
4466     }
4467 
4468     // TODO: Acquired because CliSetGpuContext expects RMAPI lock. Necessary?
4469     status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS);
4470     if (status != NV_OK)
4471         return status;
4472     status = CliSetGpuContext(session->handle, device->handle, &pGpu, NULL);
4473     rmapiLockRelease();
4474     if (status != NV_OK)
4475         return status;
4476 
4477     pAllocInfo = portMemAllocNonPaged(sizeof(*pAllocInfo));
4478     if (pAllocInfo == NULL)
4479         return NV_ERR_NO_MEMORY;
4480 
4481     portMemSet(pAllocInfo, 0, sizeof(*pAllocInfo));
4482 
4483     subdeviceInstance = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
4484 
4485     channel = portMemAllocNonPaged(sizeof(*channel));
4486     if (channel == NULL)
4487     {
4488         status = NV_ERR_NO_MEMORY;
4489         goto cleanup_free_memory;
4490     }
4491 
4492     portMemSet(channel, 0, sizeof(*channel));
4493 
4494     channel->vaSpace = vaSpace;
4495     channel->fifoEntries = params->numGpFifoEntries;
4496     channel->gpFifoLoc   = gpFifoLoc;
4497     channel->gpPutLoc    = gpPutLoc;
4498 
4499     // Remember which engine we are using, so that RC recovery can reset it if
4500     // it hangs:
4501     channel->engineType = engineType;
4502     channel->engineIndex = params->engineIndex;
4503 
4504     gpFifoSize = (NvLength)params->numGpFifoEntries * NVA06F_GP_ENTRY__SIZE;
4505 
4506     // If the allocation is vidmem ask RM to allocate persistent vidmem
4507     pAllocInfo->gpuAllocInfo.bPersistentVidmem = NV_TRUE;
4508 
4509     // 1. Allocate the GPFIFO entries. Dont pass any special flags.
4510     flags.bGetKernelVA = NV_FALSE;
4511     status = nvGpuOpsGpuMalloc(vaSpace,
4512                                gpFifoLoc == UVM_BUFFER_LOCATION_SYS,
4513                                gpFifoSize,
4514                                &channel->gpFifo,
4515                                flags,
4516                                &pAllocInfo->gpuAllocInfo);
4517     if (status != NV_OK)
4518         goto cleanup_free_memory;
4519 
4520     // 2. Map the gpfifo entries
4521     status = nvGpuOpsMemoryCpuMap(vaSpace,
4522                                   channel->gpFifo,
4523                                   gpFifoSize,
4524                                   &cpuMap,
4525                                   PAGE_SIZE_DEFAULT);
4526     if (status != NV_OK)
4527         goto cleanup_free_gpfifo_entries;
4528 
4529     channel->gpFifoEntries = (NvU64 *) cpuMap;
4530 
4531     //
4532     // 3. Allocate memory for the error notifier. Make the allocation
4533     // sufficiently large to also accommodate any other channel
4534     // notifiers, and request a kernel VA and CPU caching.
4535     //
4536 
4537     flags.bGetKernelVA = NV_TRUE;
4538     errorNotifierSize = sizeof(NvNotification) *
4539                         NV_CHANNELGPFIFO_NOTIFICATION_TYPE__SIZE_1;
4540     status = nvGpuOpsGpuMalloc(vaSpace,
4541                                NV_TRUE,
4542                                errorNotifierSize,
4543                                &channel->errorNotifierOffset,
4544                                flags,
4545                                &pAllocInfo->gpuAllocInfo);
4546     if (status != NV_OK)
4547         goto cleanup_unmap_gpfifo_entries;
4548 
4549     NV_ASSERT(channel->errorNotifierOffset);
4550 
4551     status = getHandleForVirtualAddr(vaSpace,
4552                                      channel->errorNotifierOffset,
4553                                      NV_FALSE /*virtual*/,
4554                                      &hErrorNotifier);
4555     if (status != NV_OK)
4556         goto cleanup_free_virtual;
4557 
4558     // 4. Find and share the VA with UVM driver
4559 
4560     // TODO: Acquired because CliGetDmaMappingInfo expects RMAPI lock. Necessary?
4561     status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS);
4562     if (status != NV_OK)
4563         goto cleanup_free_virtual;
4564 
4565     if (!CliGetDmaMappingInfo(session->handle,
4566                               device->handle,
4567                               hErrorNotifier,
4568                               channel->errorNotifierOffset,
4569                               gpumgrGetDeviceGpuMask(device->deviceInstance),
4570                               &pDmaMappingInfo))
4571     {
4572         rmapiLockRelease();
4573         status = NV_ERR_GENERIC;
4574         goto cleanup_free_virtual;
4575     }
4576 
4577     rmapiLockRelease();
4578 
4579     //
4580     // RM uses the parent subdevice index to fill the notifier on SYSMEM. So use the same.
4581     // NOTE: the same assumption does not hold for VIDMEM allocations.
4582     //
4583     channel->errorNotifier = (NvNotification*)pDmaMappingInfo->KernelVAddr[subdeviceInstance];
4584     if (!channel->errorNotifier)
4585     {
4586         status = NV_ERR_GENERIC;
4587         goto cleanup_free_virtual;
4588     }
4589 
4590     // Let's allocate the channel
4591     pAllocInfo->gpFifoAllocParams.hObjectError  = hErrorNotifier;
4592     status = getHandleForVirtualAddr(vaSpace,
4593                                      channel->gpFifo,
4594                                      NV_FALSE /*virtual*/,
4595                                      &pAllocInfo->gpFifoAllocParams.hObjectBuffer);
4596     if (status != NV_OK)
4597         goto cleanup_free_virtual;
4598 
4599     pAllocInfo->gpFifoAllocParams.gpFifoOffset  = channel->gpFifo;
4600     pAllocInfo->gpFifoAllocParams.gpFifoEntries = channel->fifoEntries;
4601     // If zero then it will attach to the device address space
4602     pAllocInfo->gpFifoAllocParams.hVASpace = vaSpace->handle;
4603     pAllocInfo->gpFifoAllocParams.engineType = gpuGetNv2080EngineType(channelEngineType(channel));
4604 
4605     if (isDeviceVoltaPlus(device))
4606     {
4607 
4608         flags.bGetKernelVA = NV_FALSE;
4609         status = nvGpuOpsGpuMalloc(vaSpace,
4610                                    gpPutLoc == UVM_BUFFER_LOCATION_SYS,
4611                                    sizeof(KeplerAControlGPFifo),
4612                                    &channel->userdGpuAddr,
4613                                    flags,
4614                                    &pAllocInfo->gpuAllocInfo);
4615         if (status != NV_OK)
4616             goto cleanup_free_virtual;
4617 
4618         channel->hUserdPhysHandle = pAllocInfo->gpuAllocInfo.hPhysHandle;
4619 
4620         SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY)
4621         pAllocInfo->gpFifoAllocParams.hUserdMemory[gpumgrGetSubDeviceInstanceFromGpu(pGpu)] = channel->hUserdPhysHandle;
4622         pAllocInfo->gpFifoAllocParams.userdOffset[gpumgrGetSubDeviceInstanceFromGpu(pGpu)] = 0;
4623         SLI_LOOP_END
4624 
4625         status = nvGpuOpsMemoryCpuMap(vaSpace,
4626                                       channel->userdGpuAddr,
4627                                       sizeof(KeplerAControlGPFifo),
4628                                       &gpfifoCtrl,
4629                                       PAGE_SIZE_DEFAULT);
4630         if (status != NV_OK)
4631             goto cleanup_free_virtual;
4632     }
4633 
4634     channel->channelHandle = NV01_NULL_OBJECT;
4635     status = pRmApi->Alloc(pRmApi, session->handle,
4636                            device->handle,
4637                            &channel->channelHandle,
4638                            device->hostClass,
4639                            &pAllocInfo->gpFifoAllocParams);
4640     if (status != NV_OK)
4641     {
4642         goto cleanup_free_virtual;
4643     }
4644 
4645     // Query runlist ID
4646     pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu);
4647     status = kfifoEngineInfoXlate_HAL(pGpu,
4648                                       pKernelFifo,
4649                                       ENGINE_INFO_TYPE_RM_ENGINE_TYPE,
4650                                       (NvU32)channelEngineType(channel),
4651                                       ENGINE_INFO_TYPE_RUNLIST,
4652                                       &channel->hwRunlistId);
4653     if (status != NV_OK)
4654         goto cleanup_free_virtual;
4655 
4656     // Query channel ID
4657     status = nvGpuOpsChannelGetHwChannelId(channel, &channel->hwChannelId);
4658     if (status != NV_OK)
4659         goto cleanup_free_channel;
4660 
4661     // Map USERD (controlPage)
4662     if (!isDeviceVoltaPlus(device))
4663     {
4664         status = pRmApi->MapToCpu(pRmApi,
4665                                   session->handle,
4666                                   device->subhandle,
4667                                   channel->channelHandle,
4668                                   0,
4669                                   sizeof(KeplerAControlGPFifo),
4670                                   &gpfifoCtrl,
4671                                   0);
4672         if (status != NV_OK)
4673             goto cleanup_free_channel;
4674     }
4675 
4676     channel->controlPage = gpfifoCtrl;
4677 
4678     status = channelRetainDummyAlloc(channel, channelInfo);
4679     if (status != NV_OK)
4680         goto cleanup_free_controlpage;
4681 
4682     // Allocate the SW method class for fault cancel
4683     if (isDevicePascalPlus(device) && (engineType != UVM_GPU_CHANNEL_ENGINE_TYPE_SEC2))
4684     {
4685         channel->hFaultCancelSwMethodClass = NV01_NULL_OBJECT;
4686         status = pRmApi->Alloc(pRmApi,
4687                                session->handle,
4688                                channel->channelHandle,
4689                                &channel->hFaultCancelSwMethodClass,
4690                                GP100_UVM_SW,
4691                                NULL);
4692         if (status != NV_OK)
4693             goto cleanup_free_controlpage;
4694     }
4695 
4696     portMemFree(pAllocInfo);
4697 
4698     *channelHandle = channel;
4699     channelInfo->gpGet = &channel->controlPage->GPGet;
4700     channelInfo->gpPut = &channel->controlPage->GPPut;
4701     channelInfo->gpFifoEntries = channel->gpFifoEntries;
4702     channelInfo->channelClassNum = device->hostClass;
4703     channelInfo->numGpFifoEntries = channel->fifoEntries;
4704     channelInfo->errorNotifier = channel->errorNotifier;
4705     channelInfo->hwRunlistId = channel->hwRunlistId;
4706     channelInfo->hwChannelId = channel->hwChannelId;
4707 
4708     return NV_OK;
4709 
4710 cleanup_free_controlpage:
4711     if (!isDeviceVoltaPlus(device) && (gpfifoCtrl != NULL))
4712         pRmApi->UnmapFromCpu(pRmApi, session->handle, device->subhandle, channel->channelHandle, gpfifoCtrl, 0, pid);
4713 cleanup_free_channel:
4714     pRmApi->Free(pRmApi, session->handle, channel->channelHandle);
4715 cleanup_free_virtual:
4716     if (isDeviceVoltaPlus(device))
4717     {
4718         if (gpfifoCtrl != NULL)
4719             nvGpuOpsMemoryCpuUnMap(vaSpace, gpfifoCtrl);
4720 
4721         if (channel->userdGpuAddr != 0)
4722             nvGpuOpsMemoryFree(vaSpace, channel->userdGpuAddr);
4723     }
4724 
4725     nvGpuOpsMemoryFree(vaSpace, channel->errorNotifierOffset);
4726 cleanup_unmap_gpfifo_entries:
4727     nvGpuOpsMemoryCpuUnMap(vaSpace, channel->gpFifoEntries);
4728 cleanup_free_gpfifo_entries:
4729     nvGpuOpsMemoryFree(vaSpace, channel->gpFifo);
4730 cleanup_free_memory:
4731     channelReleaseDummyAlloc(channel);
4732     portMemFree(channel);
4733     portMemFree(pAllocInfo);
4734 
4735     return status;
4736 }
4737 
4738 static NV_STATUS engineAllocate(struct gpuChannel *channel, gpuChannelInfo *channelInfo, UVM_GPU_CHANNEL_ENGINE_TYPE engineType)
4739 {
4740     NV_STATUS status = NV_OK;
4741     struct gpuObject *object = NULL;
4742     NVB0B5_ALLOCATION_PARAMETERS ceAllocParams = {0};
4743     NVA06F_CTRL_GPFIFO_SCHEDULE_PARAMS channelGrpParams = {0};
4744     struct gpuAddressSpace *vaSpace = NULL;
4745     struct gpuDevice *device = NULL;
4746     struct gpuSession *session = NULL;
4747     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
4748     NvU32 class;
4749     void *params;
4750 
4751     NV_ASSERT(channel);
4752     NV_ASSERT(channelInfo);
4753     NV_ASSERT(channel->engineType == UVM_GPU_CHANNEL_ENGINE_TYPE_CE ||
4754         channel->engineType == UVM_GPU_CHANNEL_ENGINE_TYPE_SEC2);
4755 
4756     // TODO: Bug 2458492: Ampere-SMC Verify GR/CE indices within partition
4757 
4758     vaSpace = channel->vaSpace;
4759     NV_ASSERT(vaSpace);
4760     device = vaSpace->device;
4761     NV_ASSERT(device);
4762     session = device->session;
4763     NV_ASSERT(session);
4764 
4765     object = portMemAllocNonPaged(sizeof(*object));
4766     if (object == NULL)
4767         return NV_ERR_NO_MEMORY;
4768 
4769     object->handle = NV01_NULL_OBJECT;
4770 
4771     if (engineType == UVM_GPU_CHANNEL_ENGINE_TYPE_CE)
4772     {
4773         ceAllocParams.version = NVB0B5_ALLOCATION_PARAMETERS_VERSION_1;
4774         ceAllocParams.engineType = NV2080_ENGINE_TYPE_COPY(channel->engineIndex);
4775         params = &ceAllocParams;
4776         class = device->ceClass;
4777     }
4778     else
4779     {
4780         params = NULL;
4781         class = device->sec2Class;
4782     }
4783 
4784     status = pRmApi->Alloc(pRmApi, session->handle,
4785                        channel->channelHandle,
4786                        &object->handle,
4787                        class,
4788                        params);
4789 
4790     if (status != NV_OK)
4791         goto cleanup_free_memory;
4792 
4793     // In volta+ gpus, the channel has a submission offset used as doorbell.
4794     if (isDeviceVoltaPlus(device))
4795     {
4796         status = nvGpuOpsGetWorkSubmissionInfo(vaSpace, channel);
4797         if (status != NV_OK)
4798             goto cleanup_free_engine;
4799 
4800         channelInfo->workSubmissionOffset = channel->workSubmissionOffset;
4801         channelInfo->workSubmissionToken = channel->workSubmissionToken;
4802         channelInfo->pWorkSubmissionToken = channel->pWorkSubmissionToken;
4803     }
4804 
4805     // Schedule the channel
4806     channelGrpParams.bEnable = NV_TRUE;
4807     status = pRmApi->Control(pRmApi,
4808                              session->handle,
4809                              channel->channelHandle,
4810                              NVA06F_CTRL_CMD_GPFIFO_SCHEDULE,
4811                              &channelGrpParams,
4812                              sizeof(channelGrpParams));
4813 
4814     if (status != NV_OK)
4815         goto cleanup_free_engine;
4816 
4817     object->next = channel->nextAttachedEngine;
4818     channel->nextAttachedEngine = object;
4819     object->type = class;
4820 
4821     return NV_OK;
4822 
4823 cleanup_free_engine:
4824     pRmApi->Free(pRmApi, session->handle, object->handle);
4825 cleanup_free_memory:
4826     portMemFree(object);
4827     return status;
4828 }
4829 
4830 NV_STATUS nvGpuOpsChannelAllocate(struct gpuAddressSpace *vaSpace,
4831                                   const gpuChannelAllocParams *params,
4832                                   struct gpuChannel **channelHandle,
4833                                   gpuChannelInfo *channelInfo)
4834 {
4835     NV_STATUS status;
4836     UVM_GPU_CHANNEL_ENGINE_TYPE channelType = params->engineType;
4837 
4838     NV_ASSERT_OR_RETURN((channelType == UVM_GPU_CHANNEL_ENGINE_TYPE_CE || channelType == UVM_GPU_CHANNEL_ENGINE_TYPE_SEC2), NV_ERR_NOT_SUPPORTED);
4839 
4840     status = channelAllocate(vaSpace, channelType, params,
4841                              channelHandle, channelInfo);
4842     if (status != NV_OK)
4843         return status;
4844 
4845     status = engineAllocate(*channelHandle, channelInfo, channelType);
4846     if (status != NV_OK)
4847         nvGpuOpsChannelDestroy(*channelHandle);
4848 
4849     return status;
4850 }
4851 
4852 void nvGpuOpsChannelDestroy(struct gpuChannel *channel)
4853 {
4854     struct gpuObject *nextEngine;
4855     struct gpuObject *currEngine;
4856     NvU32 pid = osGetCurrentProcess();
4857     struct gpuAddressSpace *vaSpace = NULL;
4858     struct gpuDevice *device = NULL;
4859     struct gpuSession *session = NULL;
4860     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
4861 
4862     if (!channel)
4863         return;
4864 
4865     vaSpace = channel->vaSpace;
4866     NV_ASSERT(vaSpace);
4867     device = vaSpace->device;
4868     NV_ASSERT(device);
4869     session = device->session;
4870     NV_ASSERT(session);
4871 
4872     // destroy the engines under this channel
4873     if (channel->nextAttachedEngine)
4874     {
4875         currEngine = channel->nextAttachedEngine;
4876         nextEngine = currEngine;
4877         do
4878         {
4879             currEngine = nextEngine;
4880             nextEngine = currEngine->next;
4881             pRmApi->Free(pRmApi, session->handle, currEngine->handle);
4882             portMemFree(currEngine);
4883         } while (nextEngine != NULL);
4884     }
4885 
4886     // Tear down the channel
4887     if (isDevicePascalPlus(device))
4888         pRmApi->Free(pRmApi, session->handle, channel->hFaultCancelSwMethodClass);
4889 
4890     if (isDeviceVoltaPlus(device))
4891     {
4892         nvGpuOpsMemoryCpuUnMap(vaSpace, (void *)channel->controlPage);
4893         nvGpuOpsMemoryFree(vaSpace, channel->userdGpuAddr);
4894     }
4895     else
4896     {
4897         pRmApi->UnmapFromCpu(pRmApi,
4898                              session->handle,
4899                              device->subhandle,
4900                              channel->channelHandle,
4901                              (void *)channel->controlPage,
4902                              0,
4903                              pid);
4904     }
4905 
4906     // Free the channel
4907     pRmApi->Free(pRmApi, session->handle, channel->channelHandle);
4908 
4909     nvGpuOpsMemoryFree(vaSpace, channel->errorNotifierOffset);
4910 
4911     nvGpuOpsMemoryCpuUnMap(vaSpace, channel->gpFifoEntries);
4912 
4913     nvGpuOpsMemoryFree(vaSpace, channel->gpFifo);
4914 
4915     channelReleaseDummyAlloc(channel);
4916 
4917     portMemFree(channel);
4918 }
4919 
4920 static NV_STATUS trackDescriptor(PNODE *pRoot, NvU64 key, void *desc)
4921 {
4922     PNODE btreeNode;
4923     NV_ASSERT(desc);
4924     NV_ASSERT(pRoot);
4925 
4926     btreeNode = (PNODE)desc;
4927 
4928     btreeNode->keyStart = key;
4929     btreeNode->keyEnd = key;
4930     btreeNode->Data = desc;
4931     return btreeInsert(btreeNode, pRoot);
4932 }
4933 
4934 static NV_STATUS findDescriptor(PNODE pRoot, NvU64 key, void **desc)
4935 {
4936     PNODE btreeNode = NULL;
4937     NV_STATUS status = NV_OK;
4938 
4939     NV_ASSERT(desc);
4940 
4941     status = btreeSearch(key, &btreeNode, pRoot);
4942     if (status != NV_OK)
4943         return status;
4944 
4945     *desc = btreeNode->Data;
4946     return NV_OK;
4947 }
4948 
4949 static NV_STATUS deleteDescriptor(PNODE *pRoot, NvU64 key, void **desc)
4950 {
4951     PNODE btreeNode = NULL;
4952     NV_STATUS status = NV_OK;
4953 
4954     NV_ASSERT(desc);
4955     NV_ASSERT(pRoot);
4956 
4957     status = btreeSearch(key, &btreeNode, *pRoot);
4958     if (status != NV_OK)
4959         return status ;
4960 
4961     *desc = btreeNode->Data;
4962     status = btreeUnlink(btreeNode, pRoot);
4963     return NV_OK;
4964 }
4965 
4966 static NV_STATUS destroyAllGpuMemDescriptors(NvHandle hClient, PNODE pNode)
4967 {
4968     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
4969     gpuMemDesc *memDesc = NULL;
4970 
4971     if (pNode == NULL)
4972         return NV_OK;
4973 
4974     destroyAllGpuMemDescriptors(hClient, pNode->left);
4975     destroyAllGpuMemDescriptors(hClient, pNode->right);
4976 
4977     memDesc = (gpuMemDesc*)pNode->Data;
4978     if (memDesc->childHandle)
4979         pRmApi->Free(pRmApi, hClient, memDesc->childHandle);
4980 
4981     if (memDesc->handle)
4982         pRmApi->Free(pRmApi, hClient, memDesc->handle);
4983 
4984     portMemFree(pNode->Data);
4985 
4986     return NV_OK;
4987 }
4988 
4989 // Returns childHandle/handle to a VA memdesc associated with a VA.
4990 static NV_STATUS getHandleForVirtualAddr(struct gpuAddressSpace *vaSpace,
4991                                          NvU64 allocationAddress,
4992                                          NvBool bPhysical,
4993                                          NvHandle *pHandle)
4994 {
4995     NV_STATUS status = NV_OK;
4996     gpuMemDesc *memDesc = NULL;
4997 
4998     NV_ASSERT(vaSpace);
4999     NV_ASSERT(pHandle);
5000 
5001     portSyncRwLockAcquireRead(vaSpace->allocationsLock);
5002     status = findDescriptor(vaSpace->allocations, allocationAddress, (void**)&memDesc);
5003     portSyncRwLockReleaseRead(vaSpace->allocationsLock);
5004     if (status != NV_OK)
5005         return status;
5006 
5007     NV_ASSERT(memDesc);
5008 
5009     *pHandle =  bPhysical ? memDesc->childHandle : memDesc->handle;
5010 
5011     if (!*pHandle)
5012         return NV_ERR_GENERIC;
5013 
5014     return NV_OK;
5015 }
5016 
5017 //
5018 // Returns a cpu mapping to the provided GPU Offset
5019 //
5020 NV_STATUS nvGpuOpsMemoryCpuMap(struct gpuAddressSpace *vaSpace,
5021                                NvU64 memory,
5022                                NvLength length,
5023                                void **cpuPtr,
5024                                NvU64 pageSize)
5025 {
5026     gpuMemDesc *memDesc = NULL;
5027     cpuMappingDesc *cpuMapDesc = NULL;
5028     NV_STATUS status;
5029     void *pMappedAddr = NULL;
5030     NvP64 mappedAddr = 0;
5031     NvU32 flags = 0;
5032     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
5033 
5034     if (!vaSpace || !cpuPtr)
5035         return NV_ERR_INVALID_ARGUMENT;
5036 
5037     cpuMapDesc = portMemAllocNonPaged(sizeof(*cpuMapDesc));
5038     if (cpuMapDesc == NULL)
5039         return NV_ERR_GENERIC;
5040 
5041     portSyncRwLockAcquireRead(vaSpace->allocationsLock);
5042     status = findDescriptor(vaSpace->allocations, memory, (void**)&memDesc);
5043     portSyncRwLockReleaseRead(vaSpace->allocationsLock);
5044     if (status != NV_OK)
5045         goto cleanup_desc;
5046 
5047     NV_ASSERT(memDesc);
5048     NV_ASSERT(memDesc->childHandle);
5049 
5050     //
5051     // Set correct page size for Bar mappings.
5052     //
5053     if (pageSize == RM_PAGE_SIZE)
5054     {
5055         flags |= DRF_DEF(OS46, _FLAGS, _PAGE_SIZE, _4KB);
5056     }
5057     else if (pageSize == RM_PAGE_SIZE_HUGE)
5058     {
5059         // TODO: this flag is ignored, remove it once it is deprecated
5060         flags |= DRF_DEF(OS46, _FLAGS, _PAGE_SIZE, _HUGE);
5061     }
5062     else
5063     {
5064         flags |= DRF_DEF(OS46, _FLAGS, _PAGE_SIZE, _DEFAULT);
5065     }
5066 
5067     //
5068     // If the length passed in is zero we will force the mapping
5069     // to the size that was used for allocation of the passed in
5070     // NvU64
5071     //
5072     status = pRmApi->MapToCpu(pRmApi,
5073                               vaSpace->device->session->handle,
5074                               vaSpace->device->subhandle,
5075                               memDesc->childHandle,
5076                               0,
5077                               length != 0 ? length : memDesc->size,
5078                               &pMappedAddr,
5079                               flags);
5080     if (status != NV_OK)
5081         goto cleanup_desc;
5082 
5083     mappedAddr = NV_PTR_TO_NvP64(pMappedAddr);
5084 
5085     cpuMapDesc->cpuPointer = (NvUPtr) mappedAddr;
5086     cpuMapDesc->handle = memDesc->childHandle;
5087     cpuMapDesc->btreeNode.keyStart = (NvU64)cpuMapDesc->cpuPointer;
5088     cpuMapDesc->btreeNode.keyEnd = (NvU64)cpuMapDesc->cpuPointer;
5089     cpuMapDesc->btreeNode.Data = (void *) cpuMapDesc;
5090 
5091     // Track CPU memdesc
5092     portSyncRwLockAcquireWrite(vaSpace->cpuMappingsLock);
5093     status = btreeInsert(&cpuMapDesc->btreeNode, &vaSpace->cpuMappings);
5094     portSyncRwLockReleaseWrite(vaSpace->cpuMappingsLock);
5095     if (status != NV_OK)
5096         goto cleanup_desc;
5097 
5098     // can use this address as key as Bar1 address space is unique
5099     *cpuPtr = NvP64_VALUE(mappedAddr);
5100 
5101     return NV_OK;
5102 
5103 cleanup_desc:
5104     portMemFree(cpuMapDesc);
5105     return status;
5106 }
5107 
5108 void nvGpuOpsMemoryCpuUnMap(struct gpuAddressSpace *vaSpace, void *cpuPtr)
5109 {
5110     unsigned pid =0;
5111     cpuMappingDesc *mappingDesc = NULL;
5112     PNODE btreeNode;
5113     NV_STATUS status = NV_OK;
5114     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
5115 
5116     if (!vaSpace || !cpuPtr)
5117         return;
5118 
5119     portSyncRwLockAcquireRead(vaSpace->cpuMappingsLock);
5120     status = btreeSearch((NvUPtr)cpuPtr, &btreeNode, vaSpace->cpuMappings);
5121     portSyncRwLockReleaseRead(vaSpace->cpuMappingsLock);
5122     if (status != NV_OK)
5123         return;
5124 
5125     mappingDesc = (cpuMappingDesc *)btreeNode->Data;
5126     if (mappingDesc)
5127     {
5128         pid = osGetCurrentProcess();
5129         status = pRmApi->UnmapFromCpu(pRmApi,
5130                                       vaSpace->device->session->handle,
5131                                       vaSpace->device->subhandle,
5132                                       mappingDesc->handle,
5133                                       NvP64_VALUE(((NvP64)mappingDesc->cpuPointer)),
5134                                       0,
5135                                       pid);
5136         NV_ASSERT(status == NV_OK);
5137     }
5138 
5139     portSyncRwLockAcquireWrite(vaSpace->cpuMappingsLock);
5140     btreeUnlink(btreeNode, &vaSpace->cpuMappings);
5141     portSyncRwLockReleaseWrite(vaSpace->cpuMappingsLock);
5142 
5143     portMemFree(mappingDesc);
5144     return;
5145 }
5146 
5147 // This function frees both physical and and virtual memory allocations
5148 // This is a counter-function of nvGpuOpsGpuMalloc!
5149 void nvGpuOpsMemoryFree(struct gpuAddressSpace *vaSpace, NvU64 pointer)
5150 {
5151     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
5152     gpuMemDesc *memDesc = NULL;
5153 
5154     NV_ASSERT(vaSpace);
5155 
5156     portSyncRwLockAcquireWrite(vaSpace->allocationsLock);
5157     deleteDescriptor(&vaSpace->allocations, pointer, (void**)&memDesc);
5158     portSyncRwLockReleaseWrite(vaSpace->allocationsLock);
5159 
5160     NV_ASSERT(memDesc);
5161     NV_ASSERT(memDesc->childHandle);
5162     NV_ASSERT(memDesc->handle);
5163 
5164     // Free physical allocation
5165     pRmApi->Free(pRmApi, vaSpace->device->session->handle, memDesc->childHandle);
5166 
5167     // Free virtual allocation
5168     pRmApi->Free(pRmApi, vaSpace->device->session->handle, memDesc->handle);
5169 
5170     portMemFree(memDesc);
5171 }
5172 
5173 
5174 
5175 NV_STATUS nvGpuOpsQueryCesCaps(struct gpuDevice *device,
5176                                gpuCesCaps *cesCaps)
5177 {
5178     NV_STATUS status;
5179     nvGpuOpsLockSet acquiredLocks;
5180     THREAD_STATE_NODE threadState;
5181 
5182     if (!device || !cesCaps)
5183         return NV_ERR_INVALID_ARGUMENT;
5184 
5185     threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
5186     status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ,
5187                                       device->session->handle,
5188                                       NULL,
5189                                       &acquiredLocks);
5190     if (status != NV_OK)
5191     {
5192         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
5193         return status;
5194     }
5195 
5196     // Refresh CE information, which may have changed if a GPU has been
5197     // initialized by RM for the first time
5198     status = queryCopyEngines(device, cesCaps);
5199     _nvGpuOpsLocksRelease(&acquiredLocks);
5200     threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
5201     return status;
5202 }
5203 
5204 NV_STATUS nvGpuOpsQueryCaps(struct gpuDevice *device, gpuCaps *caps)
5205 {
5206     NV_STATUS status;
5207     nvGpuOpsLockSet acquiredLocks;
5208     THREAD_STATE_NODE threadState;
5209     OBJGPU *pGpu = NULL;
5210     KernelMemorySystem *pKernelMemorySystem;
5211     NV0000_CTRL_GPU_GET_ID_INFO_V2_PARAMS infoParams = {0};
5212     struct gpuSession *session = device->session;
5213     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
5214 
5215     threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
5216     status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, device->session->handle, NULL, &acquiredLocks);
5217     if (status != NV_OK)
5218     {
5219         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
5220         return status;
5221     }
5222 
5223     caps->sysmemLink = device->sysmemLink;
5224     caps->sysmemLinkRateMBps = device->sysmemLinkRateMBps;
5225     caps->connectedToSwitch = device->connectedToSwitch;
5226 
5227     infoParams.gpuId = device->gpuId;
5228     status = pRmApi->Control(pRmApi,
5229                              session->handle,
5230                              session->handle,
5231                              NV0000_CTRL_CMD_GPU_GET_ID_INFO_V2,
5232                              &infoParams,
5233                              sizeof(infoParams));
5234     if (status != NV_OK)
5235     {
5236         _nvGpuOpsLocksRelease(&acquiredLocks);
5237         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
5238         return status;
5239     }
5240 
5241     if (infoParams.numaId != NV0000_CTRL_NO_NUMA_NODE)
5242     {
5243         caps->numaEnabled = NV_TRUE;
5244         caps->numaNodeId = infoParams.numaId;
5245     }
5246 
5247     status = CliSetGpuContext(session->handle, device->handle, &pGpu, NULL);
5248     if (status != NV_OK)
5249     {
5250         _nvGpuOpsLocksRelease(&acquiredLocks);
5251         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
5252         return status;
5253     }
5254 
5255     pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
5256     if (!pKernelMemorySystem)
5257     {
5258         _nvGpuOpsLocksRelease(&acquiredLocks);
5259         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
5260         return NV_ERR_OBJECT_NOT_FOUND;
5261     }
5262 
5263     if (pGpu->getProperty(pGpu, PDB_PROP_GPU_ATS_SUPPORTED))
5264     {
5265         caps->systemMemoryWindowStart = pKernelMemorySystem->coherentCpuFbBase;
5266         caps->systemMemoryWindowSize = pKernelMemorySystem->coherentCpuFbEnd -
5267             pKernelMemorySystem->coherentCpuFbBase;
5268     }
5269     else
5270     {
5271         caps->systemMemoryWindowStart = 0;
5272         caps->systemMemoryWindowSize = 0;
5273     }
5274 
5275     if (device->connectedToSwitch)
5276     {
5277         KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu);
5278         if (pKernelNvlink == NULL)
5279         {
5280             caps->nvswitchMemoryWindowStart = NVLINK_INVALID_FABRIC_ADDR;
5281         }
5282         else
5283         {
5284             caps->nvswitchMemoryWindowStart = knvlinkGetUniqueFabricBaseAddress(
5285                                                             pGpu, pKernelNvlink);
5286         }
5287     }
5288 
5289     _nvGpuOpsLocksRelease(&acquiredLocks);
5290     threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
5291     return NV_OK;
5292 }
5293 
5294 static NV_STATUS findVaspaceFromPid(unsigned pid, unsigned gpuId,
5295                  NvHandle *hClient, NvHandle *hDevice,
5296                  NvHandle *hSubdevice, NvHandle *hVaSpace)
5297 {
5298     //
5299     // This function iterates through all the vaspace objects under the client,
5300     // that matches the pid argument, and returns any address space that is
5301     // tagged as UVM.
5302     //
5303     Device *pDevice = NULL;
5304     Subdevice *pSubDevice = NULL;
5305     OBJVASPACE *pVAS = NULL;
5306     OBJGPU *pGpu;
5307     unsigned hDeviceLocal = 0;
5308     unsigned hSubDeviceLocal = 0;
5309     NV_STATUS status;
5310     RmClient **ppClient;
5311     RmClient  *pClient;
5312     RsClient  *pRsClient;
5313 
5314     for (ppClient = serverutilGetFirstClientUnderLock();
5315          ppClient;
5316          ppClient = serverutilGetNextClientUnderLock(ppClient))
5317     {
5318         pClient = *ppClient;
5319         pRsClient = staticCast(pClient, RsClient);
5320         if (pClient->ProcID == pid)
5321         {
5322             pGpu = gpumgrGetGpuFromId(gpuId);
5323             if (!pGpu)
5324                 return NV_ERR_INVALID_ARGUMENT;
5325 
5326             pSubDevice = CliGetSubDeviceInfoFromGpu(pRsClient->hClient,
5327                                                     pGpu);
5328 
5329             status = deviceGetByGpu(pRsClient, pGpu, NV_TRUE, &pDevice);
5330             if (status == NV_OK)
5331             {
5332                 hDeviceLocal = RES_GET_HANDLE(pDevice);
5333 
5334                 if (pSubDevice != NULL)
5335                     hSubDeviceLocal = RES_GET_HANDLE(pSubDevice);
5336 
5337                 *hClient = pRsClient->hClient;
5338                 *hDevice = hDeviceLocal;
5339                 *hSubdevice = hSubDeviceLocal;
5340 
5341                 if (pDevice->vaMode !=
5342                     NV_DEVICE_ALLOCATION_VAMODE_MULTIPLE_VASPACES)
5343                 {
5344                     status = vaspaceGetByHandleOrDeviceDefault(pRsClient, hDeviceLocal, 0, &pVAS);
5345                     if ((status != NV_OK) || (pVAS == NULL))
5346                         return NV_ERR_GENERIC;
5347 
5348                     //
5349                     // TODO: Bug 1632484:
5350                     // Check to see if pVAS is UVM_MANAGED, once
5351                     // that vaspace property is introduced.
5352                     // No need to check FaultCapable.
5353                     //
5354                     if ((vaspaceIsMirrored(pVAS)) ||
5355                         (vaspaceIsFaultCapable(pVAS)))
5356                     {
5357                         //
5358                         // This means that this client is
5359                         // using the vaspace associated to its device
5360                         //
5361                         *hVaSpace = 0;
5362                         return NV_OK;
5363                     }
5364                 }
5365 
5366                 //
5367                 // if the default VASPACE is not tagged as UVM
5368                 // will search for all vaspace objects under
5369                 // this client for this device to find the first
5370                 // vaspace that is tagged as UVM.
5371                 //
5372                 if (findUvmAddressSpace(*hClient, pGpu->gpuInstance, hVaSpace, &pVAS) == NV_OK)
5373                 {
5374                     return NV_OK;
5375                 }
5376             }
5377         }
5378     }
5379     return NV_ERR_GENERIC;
5380 }
5381 
5382 //
5383 // This function will look through all the vaspaces under a client for a device and return
5384 // the one that is tagged as UVM, or NULL if there is no UVM vaspace.
5385 //
5386 static NV_STATUS findUvmAddressSpace(NvHandle hClient, NvU32 gpuInstance, NvHandle *phVaSpace, OBJVASPACE **ppVASpace)
5387 {
5388     RsResourceRef *pResourceRef;
5389     RS_ITERATOR    iter;
5390     NvU32          gpuMask = NVBIT(gpuInstance);
5391 
5392     iter = serverutilRefIter(hClient, NV01_NULL_OBJECT, classId(VaSpaceApi), RS_ITERATE_DESCENDANTS, NV_TRUE);
5393 
5394     while (clientRefIterNext(iter.pClient, &iter))
5395     {
5396         pResourceRef = iter.pResourceRef;
5397 
5398         *ppVASpace = dynamicCast(pResourceRef->pResource, VaSpaceApi)->pVASpace;
5399         *phVaSpace = pResourceRef->hResource;
5400 
5401         if ((vaspaceIsMirrored(*ppVASpace) || vaspaceIsExternallyOwned(*ppVASpace)) &&
5402             (((*ppVASpace)->gpuMask & gpuMask) == gpuMask))
5403         {
5404             return NV_OK;
5405         }
5406     }
5407     *phVaSpace = 0;
5408     *ppVASpace = NULL;
5409     return NV_ERR_INVALID_ARGUMENT;
5410 }
5411 
5412 // Make sure UVM_GPU_NAME_LENGTH has the same length as
5413 // NV2080_GPU_MAX_NAME_STRING_LENGTH.
5414 ct_assert(NV2080_GPU_MAX_NAME_STRING_LENGTH == UVM_GPU_NAME_LENGTH);
5415 
5416 static void getGpcTpcInfo(OBJGPU *pGpu, gpuInfo *pGpuInfo)
5417 {
5418     KernelGraphicsManager *pKernelGraphicsManager = GPU_GET_KERNEL_GRAPHICS_MANAGER(pGpu);
5419 
5420     pGpuInfo->maxTpcPerGpcCount = 0;
5421     pGpuInfo->maxGpcCount = 0;
5422     pGpuInfo->gpcCount = 0;
5423     pGpuInfo->tpcCount = 0;
5424 
5425     NV_ASSERT_OR_RETURN_VOID(pKernelGraphicsManager->legacyKgraphicsStaticInfo.bInitialized);
5426     NV_ASSERT_OR_RETURN_VOID(pKernelGraphicsManager->legacyKgraphicsStaticInfo.pGrInfo != NULL);
5427 
5428     pGpuInfo->maxTpcPerGpcCount =
5429         pKernelGraphicsManager->legacyKgraphicsStaticInfo.pGrInfo->infoList[NV2080_CTRL_GR_INFO_INDEX_LITTER_NUM_TPC_PER_GPC].data;
5430     pGpuInfo->maxGpcCount =
5431         pKernelGraphicsManager->legacyKgraphicsStaticInfo.pGrInfo->infoList[NV2080_CTRL_GR_INFO_INDEX_LITTER_NUM_GPCS].data;
5432     pGpuInfo->gpcCount =
5433         nvPopCount32(pKernelGraphicsManager->legacyKgraphicsStaticInfo.floorsweepingMasks.gpcMask);
5434 
5435     //
5436     // When MIG GPU partitioning is enabled, compute the upper bound on the number
5437     // of TPCs that may be available in this partition, to enable UVM to
5438     // conservatively size relevant data structures.
5439     //
5440     if (IS_MIG_IN_USE(pGpu))
5441     {
5442         pGpuInfo->tpcCount = pGpuInfo->gpcCount * pGpuInfo->maxTpcPerGpcCount;
5443     }
5444     else
5445     {
5446         KernelGraphics *pKernelGraphics = GPU_GET_KERNEL_GRAPHICS(pGpu, 0);
5447         const KGRAPHICS_STATIC_INFO *pKernelGraphicsStaticInfo = kgraphicsGetStaticInfo(pGpu, pKernelGraphics);
5448 
5449         NV_ASSERT_OR_RETURN_VOID(pKernelGraphicsStaticInfo != NULL);
5450         pGpuInfo->tpcCount = pKernelGraphicsStaticInfo->pGrInfo->infoList[NV2080_CTRL_GR_INFO_INDEX_SHADER_PIPE_SUB_COUNT].data;
5451     }
5452 }
5453 
5454 static NV_STATUS queryVirtMode(NvHandle hClient, NvHandle hDevice, NvU32 *virtMode)
5455 {
5456     NV_STATUS status = NV_OK;
5457     *virtMode = UVM_VIRT_MODE_NONE;
5458     return status;
5459 }
5460 
5461 NV_STATUS nvGpuOpsGetGpuInfo(const NvProcessorUuid *pUuid,
5462                              const gpuClientInfo *pGpuClientInfo,
5463                              gpuInfo *pGpuInfo)
5464 {
5465     NV_STATUS                            status;
5466     NV0080_ALLOC_PARAMETERS              nv0080AllocParams = {0};
5467     NV2080_ALLOC_PARAMETERS              nv2080AllocParams = {0};
5468     NV0000_CTRL_GPU_GET_UUID_INFO_PARAMS gpuIdInfoParams = {{0}};
5469     NV2080_CTRL_MC_GET_ARCH_INFO_PARAMS  archInfoParams = {0};
5470     NV2080_CTRL_GPU_GET_NAME_STRING_PARAMS gpuNameParams = {0};
5471     NvHandle                             clientHandle           = 0;
5472     NvHandle                             deviceHandle           = 1;
5473     NvHandle                             subDeviceHandle        = 2;
5474     NvBool                               isClientAllocated      = NV_FALSE;
5475     NvBool                               isDeviceAllocated      = NV_FALSE;
5476     NvBool                               isSubdeviceAllocated   = NV_FALSE;
5477     NV0080_CTRL_GPU_GET_NUM_SUBDEVICES_PARAMS subDevParams = { 0 };
5478     NV2080_CTRL_GPU_GET_SIMULATION_INFO_PARAMS simulationInfoParams = {0};
5479     OBJGPU                              *pGpu = NULL;
5480     RM_API                              *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
5481     NvU32                                dummy;
5482 
5483     pGpu = gpumgrGetGpuFromUuid(pUuid->uuid,
5484                                 DRF_DEF(2080_GPU_CMD, _GPU_GET_GID_FLAGS, _TYPE, _SHA1) |
5485                                 DRF_DEF(2080_GPU_CMD, _GPU_GET_GID_FLAGS, _FORMAT, _BINARY));
5486     if (!pGpu)
5487     {
5488         return NV_ERR_GPU_UUID_NOT_FOUND;
5489     }
5490 
5491     if (!osIsGpuAccessible(pGpu))
5492     {
5493         return NV_ERR_INSUFFICIENT_PERMISSIONS;
5494     }
5495 
5496     status = nvGpuOpsCreateClient(pRmApi, &clientHandle);
5497     if (status != NV_OK)
5498     {
5499         return status;
5500     }
5501 
5502     isClientAllocated = NV_TRUE;
5503 
5504     portMemCopy(&gpuIdInfoParams.gpuUuid, sizeof(*pUuid), pUuid, sizeof(*pUuid));
5505 
5506     gpuIdInfoParams.flags = NV0000_CTRL_CMD_GPU_GET_UUID_INFO_FLAGS_FORMAT_BINARY;
5507     status = pRmApi->Control(pRmApi,
5508                              clientHandle,
5509                              clientHandle,
5510                              NV0000_CTRL_CMD_GPU_GET_UUID_INFO,
5511                              &gpuIdInfoParams,
5512                              sizeof(gpuIdInfoParams));
5513     if (NV_OK != status)
5514         goto cleanup;
5515 
5516     nv0080AllocParams.deviceId = gpuIdInfoParams.deviceInstance;
5517 
5518     status = pRmApi->Alloc(pRmApi,
5519                            clientHandle,
5520                            clientHandle,
5521                            &deviceHandle,
5522                            NV01_DEVICE_0,
5523                            &nv0080AllocParams);
5524     if (NV_OK != status)
5525         goto cleanup;
5526 
5527     isDeviceAllocated = NV_TRUE;
5528 
5529     nv2080AllocParams.subDeviceId = gpuIdInfoParams.subdeviceInstance;
5530     status = pRmApi->Alloc(pRmApi,
5531                            clientHandle,
5532                            deviceHandle,
5533                            &subDeviceHandle,
5534                            NV20_SUBDEVICE_0,
5535                            &nv2080AllocParams);
5536     if (NV_OK != status)
5537         goto cleanup;
5538 
5539     isSubdeviceAllocated = NV_TRUE;
5540 
5541     portMemCopy(&pGpuInfo->uuid, sizeof(*pUuid), pUuid, sizeof(*pUuid));
5542 
5543     status = pRmApi->Control(pRmApi,
5544                              clientHandle,
5545                              subDeviceHandle,
5546                              NV2080_CTRL_CMD_MC_GET_ARCH_INFO,
5547                              &archInfoParams,
5548                              sizeof(archInfoParams));
5549     if (NV_OK != status)
5550         goto cleanup;
5551 
5552     pGpuInfo->gpuArch = archInfoParams.architecture;
5553     pGpuInfo->gpuImplementation = archInfoParams.implementation;
5554 
5555     gpuNameParams.gpuNameStringFlags = NV2080_CTRL_GPU_GET_NAME_STRING_FLAGS_TYPE_ASCII;
5556     status = pRmApi->Control(pRmApi,
5557                              clientHandle,
5558                              subDeviceHandle,
5559                              NV2080_CTRL_CMD_GPU_GET_NAME_STRING,
5560                              &gpuNameParams,
5561                              sizeof(gpuNameParams));
5562     if (NV_OK != status)
5563         goto cleanup;
5564 
5565     portStringCopy(pGpuInfo->name, sizeof(pGpuInfo->name),
5566                    (const char *)gpuNameParams.gpuNameString.ascii,
5567                    sizeof(gpuNameParams.gpuNameString.ascii));
5568 
5569     status = queryVirtMode(clientHandle, deviceHandle, &pGpuInfo->virtMode);
5570     if (status != NV_OK)
5571         goto cleanup;
5572 
5573     pGpuInfo->gpuInTcc = NV_FALSE;
5574 
5575     status = findDeviceClasses(clientHandle,
5576                                deviceHandle,
5577                                subDeviceHandle,
5578                                &pGpuInfo->hostClass,
5579                                &pGpuInfo->ceClass,
5580                                &pGpuInfo->computeClass,
5581                                &dummy,
5582                                &dummy,
5583                                &dummy);
5584     if (status != NV_OK)
5585         goto cleanup;
5586 
5587     status = pRmApi->Control(pRmApi,
5588                              clientHandle,
5589                              deviceHandle,
5590                              NV0080_CTRL_CMD_GPU_GET_NUM_SUBDEVICES,
5591                              &subDevParams,
5592                              sizeof(subDevParams));
5593     if (status != NV_OK)
5594         goto cleanup;
5595 
5596     pGpuInfo->subdeviceCount = subDevParams.numSubDevices;
5597 
5598     getGpcTpcInfo(pGpu, pGpuInfo);
5599 
5600     if (IS_MIG_IN_USE(pGpu))
5601     {
5602         NvU32 swizzId;
5603 
5604         NV_ASSERT(pGpuInfo->subdeviceCount == 1);
5605 
5606         status = getSwizzIdFromUserSmcPartHandle(pRmApi,
5607                                                  clientHandle,
5608                                                  deviceHandle,
5609                                                  pGpuClientInfo->hClient,
5610                                                  pGpuClientInfo->hSmcPartRef,
5611                                                  &swizzId);
5612         if (status != NV_OK)
5613             goto cleanup;
5614 
5615         pGpuInfo->smcEnabled              = NV_TRUE;
5616         pGpuInfo->smcSwizzId              = swizzId;
5617         pGpuInfo->smcUserClientInfo.hClient     = pGpuClientInfo->hClient;
5618         pGpuInfo->smcUserClientInfo.hSmcPartRef = pGpuClientInfo->hSmcPartRef;
5619     }
5620 
5621     status = pRmApi->Control(pRmApi,
5622                              clientHandle,
5623                              subDeviceHandle,
5624                              NV2080_CTRL_CMD_GPU_GET_SIMULATION_INFO,
5625                              &simulationInfoParams,
5626                              sizeof(simulationInfoParams));
5627     if (status != NV_OK)
5628         goto cleanup;
5629 
5630     pGpuInfo->isSimulated = (simulationInfoParams.type != NV2080_CTRL_GPU_GET_SIMULATION_INFO_TYPE_NONE);
5631 
5632 cleanup:
5633     if (isSubdeviceAllocated)
5634         pRmApi->Free(pRmApi, clientHandle, subDeviceHandle);
5635 
5636     if (isDeviceAllocated)
5637         pRmApi->Free(pRmApi, clientHandle, deviceHandle);
5638 
5639     if (isClientAllocated)
5640         pRmApi->Free(pRmApi, clientHandle, clientHandle);
5641 
5642     return status;
5643 }
5644 
5645 NV_STATUS nvGpuOpsGetGpuIds(const NvU8 *pUuid,
5646                             unsigned uuidLength,
5647                             NvU32 *pDeviceId,
5648                             NvU32 *pSubdeviceId)
5649 {
5650     NV_STATUS nvStatus;
5651     nvGpuOpsLockSet acquiredLocks;
5652     THREAD_STATE_NODE threadState;
5653     NV0000_CTRL_GPU_GET_UUID_INFO_PARAMS gpuIdInfoParams = {{0}};
5654     NvHandle clientHandle = 0;
5655     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
5656 
5657     threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
5658     nvStatus = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_NONE, NV01_NULL_OBJECT, NULL, &acquiredLocks);
5659     if (nvStatus != NV_OK)
5660     {
5661         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
5662         return nvStatus;
5663     }
5664 
5665     nvStatus = nvGpuOpsCreateClient(pRmApi, &clientHandle);
5666     if (nvStatus != NV_OK)
5667     {
5668         _nvGpuOpsLocksRelease(&acquiredLocks);
5669         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
5670         return nvStatus;
5671     }
5672 
5673     portMemCopy(&gpuIdInfoParams.gpuUuid, uuidLength, pUuid, uuidLength);
5674 
5675     gpuIdInfoParams.flags = NV0000_CTRL_CMD_GPU_GET_UUID_INFO_FLAGS_FORMAT_BINARY;
5676     nvStatus = pRmApi->Control(pRmApi,
5677                                clientHandle,
5678                                clientHandle,
5679                                NV0000_CTRL_CMD_GPU_GET_UUID_INFO,
5680                                &gpuIdInfoParams,
5681                                sizeof(gpuIdInfoParams));
5682     if (NV_OK == nvStatus)
5683     {
5684         *pDeviceId = gpuIdInfoParams.deviceInstance;
5685         *pSubdeviceId = gpuIdInfoParams.subdeviceInstance;
5686     }
5687 
5688     pRmApi->Free(pRmApi, clientHandle, clientHandle);
5689 
5690     _nvGpuOpsLocksRelease(&acquiredLocks);
5691     threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
5692     return nvStatus;
5693 }
5694 
5695 NV_STATUS nvGpuOpsServiceDeviceInterruptsRM(struct gpuDevice *device)
5696 {
5697     NV_STATUS status;
5698     nvGpuOpsLockSet acquiredLocks;
5699     THREAD_STATE_NODE threadState;
5700     NV2080_CTRL_MC_SERVICE_INTERRUPTS_PARAMS params = {0};
5701     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
5702 
5703     threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
5704     status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_NONE, device->session->handle, NULL, &acquiredLocks);
5705     if (status != NV_OK)
5706     {
5707         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
5708         return status;
5709     }
5710 
5711     params.engines = NV2080_CTRL_MC_ENGINE_ID_ALL;
5712     status = pRmApi->Control(pRmApi,
5713                              device->session->handle,
5714                              device->subhandle,
5715                              NV2080_CTRL_CMD_MC_SERVICE_INTERRUPTS,
5716                              &params,
5717                              sizeof(params));
5718 
5719     _nvGpuOpsLocksRelease(&acquiredLocks);
5720     threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
5721     return status;
5722 }
5723 
5724 NV_STATUS nvGpuOpsCheckEccErrorSlowpath(struct gpuChannel *channel,
5725                                         NvBool *bEccDbeSet)
5726 {
5727     NV_STATUS status = NV_OK;
5728     nvGpuOpsLockSet acquiredLocks;
5729     THREAD_STATE_NODE threadState;
5730     NV2080_CTRL_GPU_QUERY_ECC_STATUS_PARAMS eccStatus;
5731     NvU32 i = 0;
5732     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
5733 
5734     if (!channel || !bEccDbeSet)
5735     {
5736         return NV_ERR_INVALID_ARGUMENT;
5737     }
5738 
5739     threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
5740     status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ,
5741                                       channel->vaSpace->device->session->handle,
5742                                       NULL,
5743                                       &acquiredLocks);
5744     if (status != NV_OK)
5745     {
5746         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
5747         return status;
5748     }
5749 
5750     *bEccDbeSet = NV_FALSE;
5751 
5752     // Do anything only if ECC is enabled on this device
5753     if (channel->vaSpace->device->rmSubDevice->bEccEnabled)
5754     {
5755         portMemSet(&eccStatus, 0, sizeof(eccStatus));
5756 
5757         status = pRmApi->Control(pRmApi,
5758                                  channel->vaSpace->device->session->handle,
5759                                  channel->vaSpace->device->subhandle,
5760                                  NV2080_CTRL_CMD_GPU_QUERY_ECC_STATUS,
5761                                  &eccStatus,
5762                                  sizeof(eccStatus));
5763         if (status != NV_OK)
5764         {
5765             _nvGpuOpsLocksRelease(&acquiredLocks);
5766             threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
5767             return NV_ERR_GENERIC;
5768         }
5769 
5770         for (i = 0; i < NV2080_CTRL_GPU_ECC_UNIT_COUNT; i++)
5771         {
5772             if (eccStatus.units[i].dbe.count != 0)
5773             {
5774                 *bEccDbeSet = NV_TRUE;
5775             }
5776         }
5777     }
5778 
5779     _nvGpuOpsLocksRelease(&acquiredLocks);
5780     threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
5781     return status;
5782 }
5783 
5784 static NV_STATUS nvGpuOpsFillGpuMemoryInfo(PMEMORY_DESCRIPTOR pMemDesc,
5785                                            OBJGPU *pMappingGpu,
5786                                            gpuMemoryInfo *pGpuMemoryInfo)
5787 {
5788     NV_STATUS status;
5789     PMEMORY_DESCRIPTOR pRootMemDesc = memdescGetRootMemDesc(pMemDesc, NULL);
5790     OBJGPU *pGpu = (pMemDesc->pGpu == NULL) ? pMappingGpu : pMemDesc->pGpu;
5791 
5792     status = nvGpuOpsMemGetPageSize(pMappingGpu,
5793                                     pMemDesc,
5794                                     &pGpuMemoryInfo->pageSize);
5795     if (status != NV_OK)
5796         return status;
5797 
5798     pGpuMemoryInfo->size = memdescGetSize(pMemDesc);
5799 
5800     pGpuMemoryInfo->contig = memdescGetContiguity(pMemDesc, AT_GPU);
5801 
5802     if (pGpuMemoryInfo->contig)
5803     {
5804         GMMU_APERTURE aperture = nvGpuOpsGetExternalAllocAperture(pMemDesc, NV_FALSE, NV_FALSE);
5805         NvU64 physAddr;
5806 
5807         memdescGetPhysAddrsForGpu(pMemDesc, pMappingGpu, AT_GPU, 0, 0, 1, &physAddr);
5808 
5809         pGpuMemoryInfo->physAddr =
5810             kgmmuEncodePhysAddr(GPU_GET_KERNEL_GMMU(pGpu), aperture, physAddr, NVLINK_INVALID_FABRIC_ADDR);
5811     }
5812 
5813     pGpuMemoryInfo->kind = memdescGetPteKindForGpu(pMemDesc, pMappingGpu);
5814 
5815     pGpuMemoryInfo->sysmem = (memdescGetAddressSpace(pMemDesc) == ADDR_SYSMEM);
5816 
5817     pGpuMemoryInfo->deviceDescendant = pRootMemDesc->pGpu != NULL;
5818 
5819     if (pGpuMemoryInfo->deviceDescendant)
5820     {
5821         NvU8 *uuid;
5822         NvU32 uuidLength, flags;
5823         NV_STATUS status;
5824         flags = DRF_DEF(2080_GPU_CMD, _GPU_GET_GID_FLAGS, _TYPE, _SHA1) |
5825                 DRF_DEF(2080_GPU_CMD, _GPU_GET_GID_FLAGS, _FORMAT, _BINARY);
5826 
5827         // on success, allocates memory for uuid
5828         status = gpuGetGidInfo(pGpu, &uuid, &uuidLength, flags);
5829         if (status != NV_OK)
5830             return status;
5831 
5832         portMemCopy(&pGpuMemoryInfo->uuid, uuidLength, uuid, uuidLength);
5833         portMemFree(uuid);
5834     }
5835 
5836     return NV_OK;
5837 }
5838 
5839 static NvBool memdescIsSysmem(PMEMORY_DESCRIPTOR pMemDesc)
5840 {
5841     return memdescGetAddressSpace(pMemDesc) == ADDR_SYSMEM;
5842 }
5843 
5844 static NV_STATUS dupMemory(struct gpuDevice *device,
5845                            NvHandle hClient,
5846                            NvHandle hPhysMemory,
5847                            NvU32 flags,
5848                            NvHandle *hDupMemory,
5849                            gpuMemoryInfo *pGpuMemoryInfo)
5850 {
5851     NV_STATUS status = NV_OK;
5852     nvGpuOpsLockSet acquiredLocks;
5853     THREAD_STATE_NODE threadState;
5854     NvHandle  dupedMemHandle;
5855     Memory *pMemory =  NULL;
5856     PMEMORY_DESCRIPTOR pMemDesc = NULL;
5857     MEMORY_DESCRIPTOR *pAdjustedMemDesc = NULL;
5858     FABRIC_VASPACE *pFabricVAS = NULL;
5859     OBJGPU *pMappingGpu;
5860     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
5861     RsResourceRef *pResourceRef;
5862     RsResourceRef *pParentRef;
5863     struct gpuSession *session;
5864     NvHandle hParent;
5865     NvHandle hSubDevice;
5866     NvBool bIsIndirectPeer = NV_FALSE;
5867 
5868     if (!device || !hDupMemory)
5869         return NV_ERR_INVALID_ARGUMENT;
5870 
5871     NV_ASSERT((flags == NV04_DUP_HANDLE_FLAGS_REJECT_KERNEL_DUP_PRIVILEGE) || (flags == NV04_DUP_HANDLE_FLAGS_NONE));
5872 
5873     threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
5874 
5875     // RS-TODO use dual client locking
5876     status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_NONE, NV01_NULL_OBJECT, NULL, &acquiredLocks);
5877     if (status != NV_OK)
5878     {
5879         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
5880         return status;
5881     }
5882 
5883     status = CliSetSubDeviceContext(device->session->handle,
5884                                     device->subhandle,
5885                                     &hSubDevice,
5886                                     &pMappingGpu);
5887 
5888     if (status != NV_OK)
5889         goto done;
5890 
5891     // Get all the necessary information about the memory
5892     status = nvGpuOpsGetMemoryByHandle(hClient,
5893                                        hPhysMemory,
5894                                        &pMemory);
5895     if (status != NV_OK)
5896         goto done;
5897 
5898     // RM client allocations can't have multiple memdesc.
5899     pMemDesc = pMemory->pMemDesc;
5900     NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc));
5901 
5902     pAdjustedMemDesc = pMemDesc;
5903     pFabricVAS       = dynamicCast(pMappingGpu->pFabricVAS, FABRIC_VASPACE);
5904     if (pFabricVAS != NULL)
5905     {
5906         status = fabricvaspaceGetGpaMemdesc(pFabricVAS, pMemDesc, pMappingGpu, &pAdjustedMemDesc);
5907         if (status != NV_OK)
5908             goto done;
5909     }
5910 
5911     if (memdescGetAddressSpace(pAdjustedMemDesc) != ADDR_FBMEM &&
5912         memdescGetAddressSpace(pAdjustedMemDesc) != ADDR_SYSMEM &&
5913         memdescGetAddressSpace(pAdjustedMemDesc) != ADDR_FABRIC_MC &&
5914         memdescGetAddressSpace(pAdjustedMemDesc) != ADDR_FABRIC_V2)
5915     {
5916         status = NV_ERR_NOT_SUPPORTED;
5917         goto freeGpaMemdesc;
5918     }
5919 
5920     // For SYSMEM or indirect peer mappings
5921     bIsIndirectPeer = gpumgrCheckIndirectPeer(pMappingGpu, pAdjustedMemDesc->pGpu);
5922     if (bIsIndirectPeer ||
5923         memdescIsSysmem(pAdjustedMemDesc))
5924     {
5925         // For sysmem allocations, the dup done below is very shallow and in
5926         // particular doesn't create IOMMU mappings required for the mapped GPU
5927         // to access the memory. That's a problem if the mapped GPU is different
5928         // from the GPU that the allocation was created under. Add them
5929         // explicitly here and remove them when the memory is freed in n
5930         // nvGpuOpsFreeDupedHandle(). Notably memdescMapIommu() refcounts the
5931         // mappings so it's ok to call it if the mappings are already there.
5932         //
5933         // TODO: Bug 1811060: Add native support for this use-case in RM API.
5934         status = memdescMapIommu(pAdjustedMemDesc, pMappingGpu->busInfo.iovaspaceId);
5935         if (status != NV_OK)
5936             goto freeGpaMemdesc;
5937     }
5938 
5939     session = device->session;
5940 
5941     if (pGpuMemoryInfo)
5942     {
5943         RsClient *pClient;
5944         status = serverGetClientUnderLock(&g_resServ, session->handle, &pClient);
5945         if (status != NV_OK)
5946             goto freeGpaMemdesc;
5947 
5948         status = nvGpuOpsFillGpuMemoryInfo(pAdjustedMemDesc, pMappingGpu, pGpuMemoryInfo);
5949         if (status != NV_OK)
5950             goto freeGpaMemdesc;
5951     }
5952 
5953     pResourceRef = RES_GET_REF(pMemory);
5954     pParentRef = pResourceRef->pParentRef;
5955 
5956     // TODO: Bug 2479851: temporarily detect the type of the parent of the
5957     // memory object (device or subdevice). Once CUDA switches to subdevices,
5958     // we will use subdevice handles unconditionally, here.
5959     if (dynamicCast(pParentRef->pResource, Subdevice))
5960     {
5961         hParent = device->subhandle;
5962     }
5963     else if (dynamicCast(pParentRef->pResource, RsClientResource))
5964     {
5965         NvBool bAssert = (
5966                           (memdescGetAddressSpace(pAdjustedMemDesc) == ADDR_FABRIC_MC) ||
5967                           (memdescGetAddressSpace(pAdjustedMemDesc) == ADDR_FABRIC_V2));
5968 
5969         NV_ASSERT(bAssert);
5970 
5971         hParent = session->handle;
5972     }
5973     else
5974     {
5975         NV_ASSERT(dynamicCast(pParentRef->pResource, Device));
5976         hParent = device->handle;
5977     }
5978 
5979     dupedMemHandle = NV01_NULL_OBJECT;
5980     status = pRmApi->DupObject(pRmApi,
5981                                session->handle,
5982                                hParent,
5983                                &dupedMemHandle,
5984                                hClient,
5985                                hPhysMemory,
5986                                flags);
5987     if (status != NV_OK)
5988         goto freeGpaMemdesc;
5989 
5990     *hDupMemory = dupedMemHandle;
5991 
5992 freeGpaMemdesc:
5993     if (pAdjustedMemDesc != pMemDesc)
5994         fabricvaspacePutGpaMemdesc(pFabricVAS, pAdjustedMemDesc);
5995 
5996 done:
5997     _nvGpuOpsLocksRelease(&acquiredLocks);
5998     threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
5999     return status;
6000 }
6001 
6002 NV_STATUS nvGpuOpsDupMemory(struct gpuDevice *device,
6003                             NvHandle hClient,
6004                             NvHandle hPhysMemory,
6005                             NvHandle *hDupMemory,
6006                             gpuMemoryInfo *pGpuMemoryInfo)
6007 {
6008     return dupMemory(device,
6009                      hClient,
6010                      hPhysMemory,
6011                      NV04_DUP_HANDLE_FLAGS_REJECT_KERNEL_DUP_PRIVILEGE,
6012                      hDupMemory,
6013                      pGpuMemoryInfo);
6014 }
6015 
6016 NV_STATUS nvGpuOpsDupAllocation(struct gpuAddressSpace *srcVaSpace,
6017                                 NvU64 srcAddress,
6018                                 struct gpuAddressSpace *dstVaSpace,
6019                                 NvU64 dstVaAlignment,
6020                                 NvU64 *dstAddress)
6021 {
6022     NV_STATUS status;
6023     NvHandle dstPhysHandle = 0;
6024     NvHandle srcPhysHandle = 0;
6025     NvU64 tmpDstAddress = 0;
6026     gpuMemoryInfo gpuMemoryInfo = {0};
6027     gpuVaAllocInfo allocInfo = {0};
6028     struct allocFlags flags = {0};
6029     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
6030 
6031     NV_CHECK_OR_RETURN(LEVEL_ERROR, srcVaSpace != 0, NV_ERR_INVALID_ARGUMENT);
6032     NV_CHECK_OR_RETURN(LEVEL_ERROR, dstVaSpace != 0, NV_ERR_INVALID_ARGUMENT);
6033 
6034     NV_CHECK_OR_RETURN(LEVEL_ERROR, srcVaSpace != dstVaSpace, NV_ERR_INVALID_ARGUMENT);
6035     NV_CHECK_OR_RETURN(LEVEL_ERROR, srcAddress != 0, NV_ERR_INVALID_ARGUMENT);
6036     NV_CHECK_OR_RETURN(LEVEL_ERROR, dstAddress != NULL, NV_ERR_INVALID_ARGUMENT);
6037 
6038     // If the given combination of source VA space and address does not
6039     // correspond to a previous allocation, the physical handle retrieval fails
6040     status = getHandleForVirtualAddr(srcVaSpace, srcAddress, NV_TRUE, &srcPhysHandle);
6041     if (status != NV_OK)
6042         return status;
6043 
6044     // Dupe the physical allocation, and return information about the associated
6045     // memory descriptor
6046     //
6047     // Passing NV04_DUP_HANDLE_FLAGS_NONE allows duping across MIG partitions
6048     status = dupMemory(dstVaSpace->device,
6049                        srcVaSpace->device->session->handle,
6050                        srcPhysHandle,
6051                        NV04_DUP_HANDLE_FLAGS_NONE,
6052                        &dstPhysHandle,
6053                        &gpuMemoryInfo);
6054 
6055     if (status != NV_OK)
6056         return status;
6057 
6058     // Vidmem dups across GPUs are not currently supported
6059     if (!gpuMemoryInfo.sysmem && (srcVaSpace->device != dstVaSpace->device))
6060     {
6061         status = NV_ERR_NOT_SUPPORTED;
6062         goto cleanup_dup;
6063     }
6064 
6065     // The virtual allocation and mapping use the size, page size, and alignment
6066     // of the destination memory descriptor.
6067     allocInfo.pageSize = gpuMemoryInfo.pageSize;
6068     allocInfo.alignment = dstVaAlignment;
6069 
6070     status = nvGpuOpsAllocVirtual(dstVaSpace,
6071                                   gpuMemoryInfo.size,
6072                                   dstAddress,
6073                                   dstPhysHandle,
6074                                   flags,
6075                                   &allocInfo);
6076     if (status != NV_OK)
6077         goto cleanup_dup;
6078 
6079     // Map the entire memory
6080     status = nvGpuOpsMapGpuMemory(dstVaSpace,
6081                                   *dstAddress,
6082                                   gpuMemoryInfo.size,
6083                                   gpuMemoryInfo.pageSize,
6084                                   &tmpDstAddress,
6085                                   flags);
6086 
6087     if (status != NV_OK)
6088         goto cleanup_virt_allocation;
6089 
6090     NV_ASSERT(tmpDstAddress == *dstAddress);
6091 
6092     return NV_OK;
6093 
6094 cleanup_virt_allocation:
6095     nvGpuOpsFreeVirtual(dstVaSpace, *dstAddress);
6096 
6097 cleanup_dup:
6098     pRmApi->Free(pRmApi, dstVaSpace->device->session->handle, dstPhysHandle);
6099     return status;
6100 }
6101 
6102 NV_STATUS nvGpuOpsGetGuid(NvHandle hClient, NvHandle hDevice,
6103                           NvHandle hSubDevice, NvU8 *gpuGuid,
6104                           unsigned guidLength)
6105 {
6106     NV_STATUS status;
6107     nvGpuOpsLockSet acquiredLocks;
6108     THREAD_STATE_NODE threadState;
6109     NV2080_CTRL_GPU_GET_GID_INFO_PARAMS getGidParams = {0};
6110     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
6111 
6112     if (!gpuGuid)
6113         return NV_ERR_INVALID_ARGUMENT;
6114 
6115     threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
6116     status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, hClient, NULL, &acquiredLocks);
6117     if (status != NV_OK)
6118     {
6119         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
6120         return status;
6121     }
6122 
6123     getGidParams.index = 0;
6124     getGidParams.flags = NV2080_GPU_CMD_GPU_GET_GID_FLAGS_FORMAT_BINARY;
6125     status = pRmApi->Control(pRmApi,
6126                              hClient, hSubDevice,
6127                              NV2080_CTRL_CMD_GPU_GET_GID_INFO,
6128                              &getGidParams,
6129                              sizeof(getGidParams));
6130 
6131     if ((guidLength !=  getGidParams.length) || (status != NV_OK))
6132     {
6133         _nvGpuOpsLocksRelease(&acquiredLocks);
6134         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
6135         return NV_ERR_INVALID_ARGUMENT;
6136     }
6137 
6138     portMemCopy(gpuGuid, guidLength, &getGidParams.data, guidLength);
6139 
6140     _nvGpuOpsLocksRelease(&acquiredLocks);
6141     threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
6142     return status;
6143 }
6144 
6145 // Make sure UVM_COPY_ENGINE_COUNT_MAX is at least the number of copy engines
6146 // supported by RM.
6147 ct_assert(UVM_COPY_ENGINE_COUNT_MAX >= NV2080_ENGINE_TYPE_COPY_SIZE);
6148 
6149 static void setCeCaps(const NvU8 *rmCeCaps, gpuCeCaps *ceCaps)
6150 {
6151     ceCaps->grce        = !!NV2080_CTRL_CE_GET_CAP(rmCeCaps, NV2080_CTRL_CE_CAPS_CE_GRCE);
6152     ceCaps->shared      = !!NV2080_CTRL_CE_GET_CAP(rmCeCaps, NV2080_CTRL_CE_CAPS_CE_SHARED);
6153     ceCaps->sysmemRead  = !!NV2080_CTRL_CE_GET_CAP(rmCeCaps, NV2080_CTRL_CE_CAPS_CE_SYSMEM_READ);
6154     ceCaps->sysmemWrite = !!NV2080_CTRL_CE_GET_CAP(rmCeCaps, NV2080_CTRL_CE_CAPS_CE_SYSMEM_WRITE);
6155     ceCaps->nvlinkP2p   = !!NV2080_CTRL_CE_GET_CAP(rmCeCaps, NV2080_CTRL_CE_CAPS_CE_NVLINK_P2P);
6156     ceCaps->sysmem      = !!NV2080_CTRL_CE_GET_CAP(rmCeCaps, NV2080_CTRL_CE_CAPS_CE_SYSMEM);
6157     ceCaps->p2p         = !!NV2080_CTRL_CE_GET_CAP(rmCeCaps, NV2080_CTRL_CE_CAPS_CE_P2P);
6158 }
6159 
6160 static NV_STATUS queryCopyEngines(struct gpuDevice *gpu, gpuCesCaps *cesCaps)
6161 {
6162     NV_STATUS status = NV_OK;
6163     NV2080_CTRL_GPU_GET_ENGINES_PARAMS getEnginesParams = {0};
6164     NvU32 *engineList;
6165     NvU32 i;
6166     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
6167 
6168     status = pRmApi->Control(pRmApi,
6169                              gpu->session->handle,
6170                              gpu->subhandle,
6171                              NV2080_CTRL_CMD_GPU_GET_ENGINES,
6172                              &getEnginesParams,
6173                              sizeof(getEnginesParams));
6174     if (status != NV_OK)
6175         return status;
6176 
6177     engineList = portMemAllocNonPaged(
6178                         sizeof(*engineList) * getEnginesParams.engineCount);
6179     if (engineList == NULL)
6180         return NV_ERR_NO_MEMORY;
6181 
6182     getEnginesParams.engineList = NV_PTR_TO_NvP64(engineList);
6183 
6184     status = pRmApi->Control(pRmApi,
6185                              gpu->session->handle,
6186                              gpu->subhandle,
6187                              NV2080_CTRL_CMD_GPU_GET_ENGINES,
6188                              &getEnginesParams,
6189                              sizeof(getEnginesParams));
6190     if (status != NV_OK)
6191         goto done;
6192 
6193     portMemSet(cesCaps, 0, sizeof(*cesCaps));
6194 
6195     for (i = 0; i < getEnginesParams.engineCount; i++)
6196     {
6197         NV2080_CTRL_CE_GET_CAPS_PARAMS ceParams = {0};
6198         NV2080_CTRL_CE_GET_CE_PCE_MASK_PARAMS pceMaskParams = {0};
6199         NvU8 rmCeCaps[NV2080_CTRL_CE_CAPS_TBL_SIZE] = {0};
6200         UvmGpuCopyEngineCaps *ceCaps;
6201         NvU32 ceIndex;
6202 
6203         if (!NV2080_ENGINE_TYPE_IS_COPY(engineList[i]))
6204             continue;
6205 
6206         ceIndex = NV2080_ENGINE_TYPE_COPY_IDX(engineList[i]);
6207         if (ceIndex >= NV2080_ENGINE_TYPE_COPY_SIZE)
6208             continue;
6209 
6210         ceParams.ceEngineType = NV2080_ENGINE_TYPE_COPY(ceIndex);
6211         ceParams.capsTblSize = NV2080_CTRL_CE_CAPS_TBL_SIZE;
6212         ceParams.capsTbl = NV_PTR_TO_NvP64(rmCeCaps);
6213 
6214         status = pRmApi->Control(pRmApi,
6215                                  gpu->session->handle,
6216                                  gpu->subhandle,
6217                                  NV2080_CTRL_CMD_CE_GET_CAPS,
6218                                  &ceParams,
6219                                  sizeof(ceParams));
6220         if (status != NV_OK)
6221         {
6222             NV_PRINTF(LEVEL_ERROR, "%s:%d: %s\n", __FUNCTION__,
6223                       __LINE__, nvstatusToString(status));
6224             goto done;
6225         }
6226 
6227         ceCaps = cesCaps->copyEngineCaps + ceIndex;
6228         setCeCaps(rmCeCaps, ceCaps);
6229 
6230         pceMaskParams.ceEngineType = NV2080_ENGINE_TYPE_COPY(ceIndex);
6231         pceMaskParams.pceMask = 0;
6232         status = pRmApi->Control(pRmApi,
6233                                  gpu->session->handle,
6234                                  gpu->subhandle,
6235                                  NV2080_CTRL_CMD_CE_GET_CE_PCE_MASK,
6236                                  &pceMaskParams,
6237                                  sizeof(pceMaskParams));
6238         if (status != NV_OK)
6239         {
6240             NV_PRINTF(LEVEL_ERROR, "%s:%d: %s\n", __FUNCTION__,
6241                       __LINE__, nvstatusToString(status));
6242             goto done;
6243         }
6244         ceCaps->cePceMask = pceMaskParams.pceMask;
6245 
6246         ceCaps->supported = NV_TRUE;
6247     }
6248 
6249 done:
6250     portMemFree(engineList);
6251     return status;
6252 }
6253 
6254 static NvBool isClassHost(NvU32 class)
6255 {
6256     NvBool bHostClass = NV_FALSE;
6257     CLI_CHANNEL_CLASS_INFO classInfo;
6258     CliGetChannelClassInfo(class, &classInfo);
6259     bHostClass = (classInfo.classType == CHANNEL_CLASS_TYPE_GPFIFO);
6260     return bHostClass;
6261 }
6262 
6263 static NvBool isClassCE(NvU32 class)
6264 {
6265     switch (class)
6266     {
6267         case MAXWELL_DMA_COPY_A:
6268         case PASCAL_DMA_COPY_A:
6269         case PASCAL_DMA_COPY_B:
6270         case VOLTA_DMA_COPY_A:
6271         case TURING_DMA_COPY_A:
6272         case AMPERE_DMA_COPY_A:
6273         case AMPERE_DMA_COPY_B:
6274         case HOPPER_DMA_COPY_A:
6275             return NV_TRUE;
6276 
6277         default:
6278             return NV_FALSE;
6279     }
6280 }
6281 
6282 static NvBool isClassSec2(NvU32 class)
6283 {
6284     switch (class)
6285     {
6286 
6287         default:
6288             return NV_FALSE;
6289     }
6290 }
6291 
6292 static NvBool isClassCompute(NvU32 class)
6293 {
6294     switch (class)
6295     {
6296         case MAXWELL_COMPUTE_A:
6297         case MAXWELL_COMPUTE_B:
6298         case PASCAL_COMPUTE_A:
6299         case PASCAL_COMPUTE_B:
6300         case VOLTA_COMPUTE_A:
6301         case VOLTA_COMPUTE_B:
6302         case TURING_COMPUTE_A:
6303         case AMPERE_COMPUTE_A:
6304         case AMPERE_COMPUTE_B:
6305         case HOPPER_COMPUTE_A:
6306             return NV_TRUE;
6307 
6308         default:
6309             return NV_FALSE;
6310     }
6311 }
6312 
6313 static NvBool isClassFaultBuffer(NvU32 class)
6314 {
6315     switch (class)
6316     {
6317         case MAXWELL_FAULT_BUFFER_A:
6318         case MMU_FAULT_BUFFER:
6319             return NV_TRUE;
6320 
6321         default:
6322             return NV_FALSE;
6323     }
6324 }
6325 
6326 static NvBool isClassAccessCounterBuffer(NvU32 class)
6327 {
6328     switch (class)
6329     {
6330         case ACCESS_COUNTER_NOTIFY_BUFFER:
6331             return NV_TRUE;
6332 
6333         default:
6334             return NV_FALSE;
6335     }
6336 }
6337 
6338 static NV_STATUS findDeviceClasses(NvHandle hRoot,
6339                                    NvHandle hDevice,
6340                                    NvHandle hSubdevice,
6341                                    NvU32 *hostClass,
6342                                    NvU32 *ceClass,
6343                                    NvU32 *computeClass,
6344                                    NvU32 *faultBufferClass,
6345                                    NvU32 *accessCounterBufferClass,
6346                                    NvU32 *sec2Class)
6347 {
6348     NvU32 *classList;
6349     NV_STATUS status = NV_OK;
6350     NV0080_CTRL_GPU_GET_CLASSLIST_PARAMS classParams = {0};
6351     NvU32 i = 0;
6352     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
6353 
6354     *hostClass                = 0;
6355     *ceClass                  = 0;
6356     *computeClass             = 0;
6357     *faultBufferClass         = 0;
6358     *accessCounterBufferClass = 0;
6359 
6360     status = pRmApi->Control(pRmApi,
6361                              hRoot,
6362                              hDevice,
6363                              NV0080_CTRL_CMD_GPU_GET_CLASSLIST,
6364                              &classParams,
6365                              sizeof(classParams));
6366     if (status != NV_OK)
6367         return status;
6368 
6369     classList = portMemAllocNonPaged(
6370                         (sizeof(NvU32) * classParams.numClasses));
6371 
6372     if (classList == NULL)
6373     {
6374         return NV_ERR_INSUFFICIENT_RESOURCES;
6375     }
6376 
6377     classParams.classList = NV_PTR_TO_NvP64(classList);
6378     status = pRmApi->Control(pRmApi,
6379                              hRoot,
6380                              hDevice,
6381                              NV0080_CTRL_CMD_GPU_GET_CLASSLIST,
6382                              &classParams,
6383                              sizeof(classParams));
6384 
6385     if (status != NV_OK)
6386         goto Cleanup_classlist;
6387 
6388     for (i = 0; i < classParams.numClasses; i++)
6389     {
6390         if (classList[i] == PHYSICAL_CHANNEL_GPFIFO)
6391             continue;
6392         if (isClassHost(classList[i]))
6393             *hostClass = NV_MAX(*hostClass, classList[i]);
6394         else if (isClassCE(classList[i]))
6395             *ceClass = NV_MAX(*ceClass, classList[i]);
6396         else if (isClassCompute(classList[i]))
6397             *computeClass = NV_MAX(*computeClass, classList[i]);
6398         else if (isClassFaultBuffer(classList[i]))
6399             *faultBufferClass = NV_MAX(*faultBufferClass, classList[i]);
6400         else if (isClassAccessCounterBuffer(classList[i]))
6401         {
6402             NV_ASSERT(accessCounterBufferClass);
6403             *accessCounterBufferClass = NV_MAX(*accessCounterBufferClass, classList[i]);
6404         }
6405         else if (isClassSec2(classList[i]))
6406             *sec2Class = NV_MAX(*sec2Class, classList[i]);
6407     }
6408 
6409 Cleanup_classlist:
6410     portMemFree(classList);
6411     return status;
6412 }
6413 
6414 NV_STATUS nvGpuOpsGetClientInfoFromPid(unsigned pid,
6415                                        const NvU8 *gpuUuid,
6416                                        NvHandle *hClient,
6417                                        NvHandle *hDevice,
6418                                        NvHandle *hSubDevice)
6419 {
6420     NV0000_CTRL_GPU_GET_UUID_INFO_PARAMS gpuIdInfoParams = {{0}};
6421     unsigned                             gpuId          = 0;
6422     NvHandle                             hPidClient     = 0;
6423     NvHandle                             hPidDevice     = 0;
6424     NvHandle                             hPidVaSpace    = 0;
6425     NvHandle                             hPidSubDevice  = 0;
6426     NvHandle                             clientHandle   = 0;
6427     NV_STATUS                            status;
6428     nvGpuOpsLockSet                      acquiredLocks;
6429     THREAD_STATE_NODE                    threadState;
6430     RM_API                              *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
6431 
6432     threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
6433     status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_NONE, NV01_NULL_OBJECT, NULL, &acquiredLocks);
6434     if (status != NV_OK)
6435     {
6436         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
6437         return status;
6438     }
6439 
6440     status = nvGpuOpsCreateClient(pRmApi, &clientHandle);
6441     if (status != NV_OK)
6442     {
6443         _nvGpuOpsLocksRelease(&acquiredLocks);
6444         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
6445         return status;
6446     }
6447 
6448     // find the gpuId from the given uuid
6449     portMemCopy(&gpuIdInfoParams.gpuUuid, NV_GPU_UUID_LEN, gpuUuid, NV_GPU_UUID_LEN);
6450     gpuIdInfoParams.flags = NV0000_CTRL_CMD_GPU_GET_UUID_INFO_FLAGS_FORMAT_BINARY;
6451     status = pRmApi->Control(pRmApi,
6452                              clientHandle,
6453                              clientHandle,
6454                              NV0000_CTRL_CMD_GPU_GET_UUID_INFO,
6455                              &gpuIdInfoParams,
6456                              sizeof(gpuIdInfoParams));
6457     if (status != NV_OK)
6458         goto cleanup;
6459 
6460     gpuId = gpuIdInfoParams.gpuId;
6461 
6462     status = findVaspaceFromPid(pid, gpuId, &hPidClient,
6463                                 &hPidDevice, &hPidSubDevice, &hPidVaSpace);
6464 
6465     // free the session we just created
6466     pRmApi->Free(pRmApi, clientHandle, clientHandle);
6467     if (status != NV_OK)
6468         goto cleanup;
6469 
6470     *hClient = hPidClient;
6471     *hDevice = hPidDevice;
6472     *hSubDevice = hPidSubDevice;
6473     _nvGpuOpsLocksRelease(&acquiredLocks);
6474     threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
6475     return NV_OK;
6476 
6477 cleanup:
6478     *hClient = 0;
6479     *hDevice = 0;
6480     *hSubDevice = 0;
6481     _nvGpuOpsLocksRelease(&acquiredLocks);
6482     threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
6483     return status;
6484 }
6485 
6486 NV_STATUS nvGpuOpsSetPageDirectory(struct gpuAddressSpace *vaSpace,
6487                                    NvU64 physAddress,
6488                                    unsigned numEntries,
6489                                    NvBool bVidMemAperture, NvU32 pasid)
6490 {
6491     NV_STATUS status;
6492     nvGpuOpsLockSet acquiredLocks;
6493     THREAD_STATE_NODE threadState;
6494     NV0080_CTRL_DMA_SET_PAGE_DIRECTORY_PARAMS params = {0};
6495     OBJGPU *pGpu = NULL;
6496     OBJVASPACE *pVAS = NULL;
6497     RsClient *pClient;
6498     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
6499 
6500     if (!vaSpace || !numEntries)
6501         return NV_ERR_INVALID_ARGUMENT;
6502 
6503     threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
6504     status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ,
6505                                       vaSpace->device->session->handle,
6506                                       &pClient,
6507                                       &acquiredLocks);
6508     if (status != NV_OK)
6509     {
6510         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
6511         return status;
6512     }
6513 
6514     status = CliSetGpuContext(vaSpace->device->session->handle,
6515                               vaSpace->device->handle,
6516                               &pGpu,
6517                               NULL);
6518     if (status != NV_OK)
6519     {
6520         _nvGpuOpsLocksRelease(&acquiredLocks);
6521         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
6522         return status;
6523     }
6524 
6525     status = vaspaceGetByHandleOrDeviceDefault(pClient,
6526                                                vaSpace->device->handle,
6527                                                vaSpace->handle,
6528                                                &pVAS);
6529     if ((status != NV_OK) || (pVAS == NULL))
6530     {
6531         _nvGpuOpsLocksRelease(&acquiredLocks);
6532         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
6533         return NV_ERR_INVALID_ARGUMENT;
6534     }
6535 
6536     if (vaspaceIsExternallyOwned(pVAS))
6537     {
6538         // make sure there is no PDB set if already externally owned
6539         if ((NULL != vaspaceGetPageDirBase(pVAS, pGpu)))
6540         {
6541             _nvGpuOpsLocksRelease(&acquiredLocks);
6542             threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
6543             return NV_ERR_NOT_SUPPORTED;
6544         }
6545 
6546         // Stop all channels under the VAS
6547         status = nvGpuOpsDisableVaSpaceChannels(vaSpace);
6548         if (status != NV_OK)
6549         {
6550             //
6551             // If stopping any channels failed, reenable the channels which were
6552             // able to be stopped before bailing
6553             //
6554             nvGpuOpsEnableVaSpaceChannels(vaSpace);
6555             _nvGpuOpsLocksRelease(&acquiredLocks);
6556             threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
6557             return status;
6558         }
6559     }
6560 
6561     params.physAddress = physAddress;
6562     params.numEntries = numEntries;
6563     params.hVASpace = vaSpace->handle;
6564     params.flags = bVidMemAperture ?
6565                    DRF_DEF(0080, _CTRL_DMA_SET_PAGE_DIRECTORY_FLAGS, _APERTURE, _VIDMEM) :
6566                    DRF_DEF(0080, _CTRL_DMA_SET_PAGE_DIRECTORY_FLAGS, _APERTURE, _SYSMEM_COH);
6567     params.flags |= DRF_DEF(0080, _CTRL_DMA_SET_PAGE_DIRECTORY_FLAGS,
6568                             _ALL_CHANNELS, _TRUE);
6569     params.pasid = pasid;
6570 
6571     // Always do Unicast by passing non-zero subDeviceId!
6572     params.subDeviceId = vaSpace->device->subdeviceInstance + 1;
6573 
6574     status = pRmApi->Control(pRmApi,
6575                              vaSpace->device->session->handle,
6576                              vaSpace->device->handle,
6577                              NV0080_CTRL_CMD_DMA_SET_PAGE_DIRECTORY,
6578                              &params,
6579                              sizeof(params));
6580 
6581     if (vaspaceIsExternallyOwned(pVAS))
6582     {
6583         // Reschedule all channels in this VAS
6584         nvGpuOpsEnableVaSpaceChannels(vaSpace);
6585     }
6586 
6587     _nvGpuOpsLocksRelease(&acquiredLocks);
6588     threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
6589     return status;
6590 }
6591 
6592 NV_STATUS nvGpuOpsUnsetPageDirectory(struct gpuAddressSpace *vaSpace)
6593 {
6594     NV_STATUS status;
6595     nvGpuOpsLockSet acquiredLocks;
6596     THREAD_STATE_NODE threadState;
6597     NV0080_CTRL_DMA_UNSET_PAGE_DIRECTORY_PARAMS params = {0};
6598     OBJGPU *pGpu = NULL;
6599     OBJVASPACE *pVAS = NULL;
6600     RsClient *pClient;
6601     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
6602 
6603     if (!vaSpace)
6604         return NV_ERR_INVALID_ARGUMENT;
6605 
6606     threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
6607     status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ,
6608                                       vaSpace->device->session->handle,
6609                                       &pClient,
6610                                       &acquiredLocks);
6611     if (status != NV_OK)
6612     {
6613         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
6614         return status;
6615     }
6616 
6617     status = CliSetGpuContext(vaSpace->device->session->handle,
6618                               vaSpace->device->handle,
6619                               &pGpu,
6620                               NULL);
6621     if (status != NV_OK)
6622     {
6623         _nvGpuOpsLocksRelease(&acquiredLocks);
6624         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
6625         return status;
6626     }
6627 
6628     status = vaspaceGetByHandleOrDeviceDefault(pClient,
6629                                                vaSpace->device->handle,
6630                                                vaSpace->handle,
6631                                                &pVAS);
6632     if ((status != NV_OK) || (pVAS == NULL))
6633     {
6634         _nvGpuOpsLocksRelease(&acquiredLocks);
6635         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
6636         return NV_ERR_INVALID_ARGUMENT;
6637     }
6638 
6639     if (vaspaceIsExternallyOwned(pVAS))
6640     {
6641         // Stop all channels under the VAS
6642         status = nvGpuOpsDisableVaSpaceChannels(vaSpace);
6643         if (status != NV_OK)
6644         {
6645             //
6646             // If stopping any channels failed, reenable the channels which were
6647             // able to be stopped before bailing
6648             //
6649             nvGpuOpsEnableVaSpaceChannels(vaSpace);
6650             _nvGpuOpsLocksRelease(&acquiredLocks);
6651             threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
6652             return status;
6653         }
6654     }
6655 
6656     params.hVASpace = vaSpace->handle;
6657 
6658     // Always do Unicast by passing non-zero subDeviceId!
6659     params.subDeviceId = vaSpace->device->subdeviceInstance + 1;
6660 
6661     status = pRmApi->Control(pRmApi,
6662                              vaSpace->device->session->handle,
6663                              vaSpace->device->handle,
6664                              NV0080_CTRL_CMD_DMA_UNSET_PAGE_DIRECTORY,
6665                              &params,
6666                              sizeof(params));
6667 
6668     if (vaspaceIsExternallyOwned(pVAS))
6669     {
6670         // Reschedule all channels in this VAS
6671         nvGpuOpsEnableVaSpaceChannels(vaSpace);
6672     }
6673 
6674     _nvGpuOpsLocksRelease(&acquiredLocks);
6675     threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
6676     return status;
6677 }
6678 
6679 NV_STATUS nvGpuOpsGetGmmuFmt(struct gpuAddressSpace *vaSpace, void **pFmt)
6680 {
6681     NV_STATUS status = NV_OK;
6682     nvGpuOpsLockSet acquiredLocks;
6683     THREAD_STATE_NODE threadState;
6684     NV90F1_CTRL_VASPACE_GET_GMMU_FORMAT_PARAMS params = {0};
6685     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
6686 
6687     if (!vaSpace || !pFmt)
6688         return NV_ERR_INVALID_ARGUMENT;
6689 
6690     if (!vaSpace->handle)
6691         return NV_ERR_INVALID_OBJECT_HANDLE;
6692 
6693     threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
6694     status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ,
6695                                       vaSpace->device->session->handle,
6696                                       NULL,
6697                                       &acquiredLocks);
6698     if (status != NV_OK)
6699     {
6700         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
6701         return status;
6702     }
6703 
6704     *pFmt = NULL;
6705     params.hSubDevice = vaSpace->device->subhandle;
6706 
6707     status = pRmApi->Control(pRmApi,
6708                              vaSpace->device->session->handle,
6709                              vaSpace->handle,
6710                              NV90F1_CTRL_CMD_VASPACE_GET_GMMU_FORMAT,
6711                              &params,
6712                              sizeof(params));
6713     if (status == NV_OK)
6714         *pFmt = (void *)params.pFmt;
6715 
6716     _nvGpuOpsLocksRelease(&acquiredLocks);
6717     threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
6718     return status;
6719 }
6720 
6721 NV_STATUS nvGpuOpsInvalidateTlb(struct gpuAddressSpace *vaSpace)
6722 {
6723     NV2080_CTRL_DMA_INVALIDATE_TLB_PARAMS params = {0};
6724     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
6725 
6726     if (!vaSpace)
6727         return NV_ERR_INVALID_ARGUMENT;
6728 
6729     params.hVASpace = vaSpace->handle;
6730     return pRmApi->Control(pRmApi,
6731                            vaSpace->device->session->handle,
6732                            vaSpace->device->subhandle,
6733                            NV2080_CTRL_CMD_DMA_INVALIDATE_TLB,
6734                            &params,
6735                            sizeof(params));
6736 }
6737 
6738 NV_STATUS nvGpuOpsGetFbInfo(struct gpuDevice *device, gpuFbInfo *fbInfo)
6739 {
6740     NV_STATUS status;
6741     nvGpuOpsLockSet acquiredLocks;
6742     THREAD_STATE_NODE threadState;
6743 
6744     if (!device || !fbInfo)
6745         return NV_ERR_INVALID_ARGUMENT;
6746 
6747     threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
6748     status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ,
6749                                       device->session->handle,
6750                                       NULL,
6751                                       &acquiredLocks);
6752     if (status != NV_OK)
6753     {
6754         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
6755         return status;
6756     }
6757 
6758     portMemCopy(fbInfo, sizeof(*fbInfo), &device->fbInfo, sizeof(*fbInfo));
6759 
6760     _nvGpuOpsLocksRelease(&acquiredLocks);
6761     threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
6762     return NV_OK;
6763 }
6764 
6765 NV_STATUS nvGpuOpsGetEccInfo(struct gpuDevice *device, gpuEccInfo *eccInfo)
6766 {
6767     subDeviceDesc *rmSubDevice;
6768 
6769     if (!device || !eccInfo)
6770         return NV_ERR_INVALID_ARGUMENT;
6771 
6772     rmSubDevice = device->rmSubDevice;
6773 
6774     if (!rmSubDevice->bEccInitialized)
6775         return NV_ERR_NOT_SUPPORTED;
6776 
6777     eccInfo->eccMask = rmSubDevice->eccMask;
6778     eccInfo->eccOffset = rmSubDevice->eccOffset;
6779     eccInfo->eccReadLocation = rmSubDevice->eccReadLocation;
6780     eccInfo->bEccEnabled = rmSubDevice->bEccEnabled;
6781     eccInfo->eccErrorNotifier = &rmSubDevice->eccErrorNotifier;
6782 
6783     return NV_OK;
6784 }
6785 
6786 //
6787 // Do not acquire the GPU locks as all nvGpuOpsFreeDupedHandle() does is
6788 // call pRmApi->Free(pRmApi, ) that drops the GPU locks if acquired (and
6789 // re-acquires it later).
6790 //
6791 NV_STATUS nvGpuOpsFreeDupedHandle(struct gpuDevice *device,
6792                                   NvHandle hPhysHandle)
6793 {
6794     NV_STATUS status = NV_OK;
6795     nvGpuOpsLockSet acquiredLocks;
6796     THREAD_STATE_NODE threadState;
6797     Memory *pMemory = NULL;
6798     OBJGPU *pMappingGpu = NULL;
6799     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
6800     NvHandle hClient;
6801     NvHandle hSubDevice;
6802 
6803     if (!device)
6804         return NV_ERR_INVALID_ARGUMENT;
6805 
6806     hClient = device->session->handle;
6807 
6808     threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
6809     status = _nvGpuOpsLocksAcquire(RMAPI_LOCK_FLAGS_READ, hClient, NULL, 0, 0, 0, &acquiredLocks);
6810     if (status != NV_OK)
6811     {
6812         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
6813         return status;
6814     }
6815 
6816     status = CliSetSubDeviceContext(device->session->handle,
6817                                     device->subhandle,
6818                                     &hSubDevice,
6819                                     &pMappingGpu);
6820     if (status != NV_OK)
6821         goto out;
6822 
6823     status = nvGpuOpsGetMemoryByHandle(device->session->handle,
6824                                        hPhysHandle,
6825                                        &pMemory);
6826     if (status != NV_OK)
6827         goto out;
6828 
6829     if (memdescIsSysmem(pMemory->pMemDesc))
6830     {
6831         // Release the mappings acquired in nvGpuOpsDupMemory().
6832         //
6833         // TODO: Bug 1811060: Add native support for this use-case in RM API.
6834         memdescUnmapIommu(pMemory->pMemDesc, pMappingGpu->busInfo.iovaspaceId);
6835     }
6836 
6837 out:
6838     pRmApi->Free(pRmApi, device->session->handle, hPhysHandle);
6839     _nvGpuOpsLocksRelease(&acquiredLocks);
6840     threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
6841     return status;
6842 }
6843 
6844 NV_STATUS nvGpuOpsInitFaultInfo(struct gpuDevice *device,
6845                                 gpuFaultInfo *pFaultInfo)
6846 {
6847     struct gpuSession *session = device->session;
6848     NV_STATUS status = NV_OK;
6849     NVB069_ALLOCATION_PARAMETERS faultBufferAllocParams = {0};
6850     NVB069_CTRL_FAULTBUFFER_GET_SIZE_PARAMS sizeParams = {0};
6851     NVB069_CTRL_CMD_FAULTBUFFER_GET_REGISTER_MAPPINGS_PARAMS registermappingsParams = {0};
6852     void *bufferAddress = NULL;
6853     NvU32 faultBufferSize = 0;
6854     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
6855 
6856     pFaultInfo->faultBufferHandle = NV01_NULL_OBJECT;
6857     status = pRmApi->Alloc(pRmApi,
6858                            session->handle,
6859                            device->subhandle,
6860                            &pFaultInfo->faultBufferHandle,
6861                            device->faultBufferClass,
6862                            &faultBufferAllocParams);
6863     if (status != NV_OK)
6864         goto cleanup;
6865 
6866 
6867     {
6868         // Get the Size of the fault buffer
6869         status = pRmApi->Control(pRmApi,
6870                                  session->handle,
6871                                  pFaultInfo->faultBufferHandle,
6872                                  NVB069_CTRL_CMD_FAULTBUFFER_GET_SIZE,
6873                                  &sizeParams,
6874                                  sizeof(sizeParams));
6875         if (status != NV_OK)
6876             goto cleanup_fault_buffer;
6877 
6878         faultBufferSize = sizeParams.faultBufferSize;
6879 
6880         // Map the fault buffer pointer to CPU
6881         status = pRmApi->MapToCpu(pRmApi,
6882                                   session->handle,
6883                                   device->subhandle,
6884                                   pFaultInfo->faultBufferHandle,
6885                                   0,
6886                                   pFaultInfo->replayable.bufferSize,
6887                                   &bufferAddress,
6888                                   0);
6889         if (status != NV_OK)
6890             goto cleanup_fault_buffer;
6891     }
6892 
6893     if (isDeviceVoltaPlus(device))
6894     {
6895         NVC369_CTRL_MMU_FAULT_BUFFER_REGISTER_NON_REPLAY_BUF_PARAMS nonReplayableFaultsParams = {0};
6896 
6897         status = pRmApi->Control(pRmApi,
6898                                  session->handle,
6899                                  pFaultInfo->faultBufferHandle,
6900                                  NVC369_CTRL_CMD_MMU_FAULT_BUFFER_REGISTER_NON_REPLAY_BUF,
6901                                  &nonReplayableFaultsParams,
6902                                  sizeof(nonReplayableFaultsParams));
6903         if (status != NV_OK)
6904             goto cleanup_fault_buffer;
6905 
6906         pFaultInfo->nonReplayable.shadowBufferAddress = (void *)NvP64_VALUE(nonReplayableFaultsParams.pShadowBuffer);
6907         pFaultInfo->nonReplayable.shadowBufferContext = (void *)NvP64_VALUE(nonReplayableFaultsParams.pShadowBufferContext);
6908         pFaultInfo->nonReplayable.bufferSize          = nonReplayableFaultsParams.bufferSize;
6909     }
6910 
6911     registermappingsParams.faultBufferType = NVB069_CTRL_FAULT_BUFFER_REPLAYABLE;
6912     status = pRmApi->Control(pRmApi,
6913                              session->handle,
6914                              pFaultInfo->faultBufferHandle,
6915                              NVB069_CTRL_CMD_FAULTBUFFER_GET_REGISTER_MAPPINGS,
6916                              &registermappingsParams,
6917                              sizeof(registermappingsParams));
6918     if (status != NV_OK)
6919         goto cleanup_fault_buffer;
6920 
6921     pFaultInfo->replayable.pFaultBufferGet        = (NvU32*)(NvUPtr)registermappingsParams.pFaultBufferGet;
6922     pFaultInfo->replayable.pFaultBufferPut        = (NvU32*)(NvUPtr)registermappingsParams.pFaultBufferPut;
6923     pFaultInfo->replayable.pFaultBufferInfo       = (NvU32*)(NvUPtr)registermappingsParams.pFaultBufferInfo;
6924     pFaultInfo->replayable.pPmcIntr               = (NvU32*)(NvUPtr)registermappingsParams.pPmcIntr;
6925     pFaultInfo->replayable.pPmcIntrEnSet          = (NvU32*)(NvUPtr)registermappingsParams.pPmcIntrEnSet;
6926     pFaultInfo->replayable.pPmcIntrEnClear        = (NvU32*)(NvUPtr)registermappingsParams.pPmcIntrEnClear;
6927     pFaultInfo->replayable.replayableFaultMask    = registermappingsParams.replayableFaultMask;
6928     pFaultInfo->replayable.pPrefetchCtrl          = (NvU32*)(NvUPtr)registermappingsParams.pPrefetchCtrl;
6929     pFaultInfo->replayable.bufferSize             = faultBufferSize;
6930     pFaultInfo->replayable.bufferAddress          = bufferAddress;
6931 
6932     return NV_OK;
6933 
6934 cleanup_fault_buffer:
6935     {
6936         gpuDeviceUnmapCpuFreeHandle(device,
6937                                     pFaultInfo->faultBufferHandle,
6938                                     pFaultInfo->replayable.bufferAddress,
6939                                     0);
6940     }
6941 cleanup:
6942     portMemSet(pFaultInfo, 0, sizeof(*pFaultInfo));
6943     return status;
6944 }
6945 
6946 NV_STATUS nvGpuOpsInitAccessCntrInfo(struct gpuDevice *device,
6947                                      gpuAccessCntrInfo *pAccessCntrInfo)
6948 {
6949     struct gpuSession *session = device->session;
6950     NV_STATUS status = NV_OK;
6951     NvU32 accessCntrBufferAllocParams = {0};
6952     NVC365_CTRL_ACCESS_CNTR_BUFFER_GET_SIZE_PARAMS sizeParams = {0};
6953     NVC365_CTRL_ACCESS_CNTR_BUFFER_GET_REGISTER_MAPPINGS_PARAMS registermappings;
6954     void *bufferAddress;
6955     NV0080_CTRL_BIF_GET_DMA_BASE_SYSMEM_ADDR_PARAMS getDmaBaseSysmemAddrParams = {0};
6956     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
6957     OBJGPU *pGpu = NULL;
6958 
6959     // TODO: Acquired because CliSetGpuContext expects RMAPI lock. Necessary?
6960     status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS);
6961     if (status != NV_OK)
6962         return status;
6963     status = CliSetGpuContext(session->handle, device->handle, &pGpu, NULL);
6964     rmapiLockRelease();
6965     if (status != NV_OK)
6966         return status;
6967 
6968     pAccessCntrInfo->accessCntrBufferHandle = NV01_NULL_OBJECT;
6969     status = pRmApi->Alloc(pRmApi,
6970                            session->handle,
6971                            device->subhandle,
6972                            &pAccessCntrInfo->accessCntrBufferHandle,
6973                            device->accessCounterBufferClass,
6974                            &accessCntrBufferAllocParams);
6975 
6976     if (status != NV_OK)
6977         goto cleanup;
6978 
6979     status = pRmApi->MapToCpu(pRmApi, session->handle, device->subhandle, pAccessCntrInfo->accessCntrBufferHandle,
6980                               0, pAccessCntrInfo->bufferSize, &bufferAddress, 0);
6981 
6982     if (status != NV_OK)
6983         goto cleanup_access_ctr_buffer;
6984 
6985     pAccessCntrInfo->bufferAddress = bufferAddress;
6986 
6987     status = pRmApi->Control(pRmApi,
6988                              session->handle,
6989                              pAccessCntrInfo->accessCntrBufferHandle,
6990                              NVC365_CTRL_CMD_ACCESS_CNTR_BUFFER_GET_SIZE,
6991                              &sizeParams,
6992                              sizeof(sizeParams));
6993 
6994     if (status != NV_OK)
6995         goto cleanup_access_ctr_buffer;
6996 
6997     pAccessCntrInfo->bufferSize = sizeParams.accessCntrBufferSize;
6998 
6999     status = pRmApi->Control(pRmApi,
7000                              session->handle,
7001                              pAccessCntrInfo->accessCntrBufferHandle,
7002                              NVC365_CTRL_CMD_ACCESS_CNTR_BUFFER_GET_REGISTER_MAPPINGS,
7003                              &registermappings,
7004                              sizeof(registermappings));
7005     if (status != NV_OK)
7006         goto cleanup_access_ctr_buffer;
7007 
7008     status = pRmApi->Control(pRmApi,
7009                              session->handle,
7010                              device->handle,
7011                              NV0080_CTRL_CMD_BIF_GET_DMA_BASE_SYSMEM_ADDR,
7012                              &getDmaBaseSysmemAddrParams,
7013                              sizeof(getDmaBaseSysmemAddrParams));
7014     if (status != NV_OK)
7015         goto cleanup_access_ctr_buffer;
7016 
7017     pAccessCntrInfo->pAccessCntrBufferGet  = (NvU32*)(NvUPtr)registermappings.pAccessCntrBufferGet;
7018     pAccessCntrInfo->pAccessCntrBufferPut  = (NvU32*)(NvUPtr)registermappings.pAccessCntrBufferPut;
7019     pAccessCntrInfo->pAccessCntrBufferFull = (NvU32*)(NvUPtr)registermappings.pAccessCntrBufferFull;
7020     pAccessCntrInfo->pHubIntr              = (NvU32*)(NvUPtr)registermappings.pHubIntr;
7021     pAccessCntrInfo->pHubIntrEnSet         = (NvU32*)(NvUPtr)registermappings.pHubIntrEnSet;
7022     pAccessCntrInfo->pHubIntrEnClear       = (NvU32*)(NvUPtr)registermappings.pHubIntrEnClear;
7023     pAccessCntrInfo->accessCounterMask     = registermappings.accessCntrMask;
7024     pAccessCntrInfo->baseDmaSysmemAddr     = getDmaBaseSysmemAddrParams.baseDmaSysmemAddr;
7025 
7026     return NV_OK;
7027 
7028 cleanup_access_ctr_buffer:
7029     gpuDeviceUnmapCpuFreeHandle(device,
7030                                 pAccessCntrInfo->accessCntrBufferHandle,
7031                                 pAccessCntrInfo->bufferAddress,
7032                                 0);
7033 cleanup:
7034     pAccessCntrInfo->accessCntrBufferHandle = 0;
7035     pAccessCntrInfo->bufferAddress = 0;
7036     return status;
7037 }
7038 
7039 static NV_STATUS
7040 getAccessCounterGranularityValue(UVM_ACCESS_COUNTER_GRANULARITY granularity, NvU32 *value)
7041 {
7042     *value = 0;
7043 
7044     switch (granularity)
7045     {
7046         case UVM_ACCESS_COUNTER_GRANULARITY_64K:
7047             *value = NVC365_CTRL_ACCESS_COUNTER_GRANULARITY_64K;
7048             break;
7049         case UVM_ACCESS_COUNTER_GRANULARITY_2M:
7050             *value = NVC365_CTRL_ACCESS_COUNTER_GRANULARITY_2M;
7051             break;
7052         case UVM_ACCESS_COUNTER_GRANULARITY_16M:
7053             *value = NVC365_CTRL_ACCESS_COUNTER_GRANULARITY_16M;
7054             break;
7055         case UVM_ACCESS_COUNTER_GRANULARITY_16G:
7056             *value = NVC365_CTRL_ACCESS_COUNTER_GRANULARITY_16G;
7057             break;
7058         default:
7059             return NV_ERR_INVALID_ARGUMENT;
7060     };
7061 
7062     return NV_OK;
7063 }
7064 
7065 static NV_STATUS
7066 getAccessCounterLimitValue(UVM_ACCESS_COUNTER_USE_LIMIT limit, NvU32 *value)
7067 {
7068     *value = 0;
7069 
7070     switch (limit)
7071     {
7072         case UVM_ACCESS_COUNTER_USE_LIMIT_NONE:
7073             *value = NVC365_CTRL_ACCESS_COUNTER_USE_LIMIT_NONE;
7074             break;
7075         case UVM_ACCESS_COUNTER_USE_LIMIT_QTR:
7076             *value = NVC365_CTRL_ACCESS_COUNTER_USE_LIMIT_QTR;
7077             break;
7078         case UVM_ACCESS_COUNTER_USE_LIMIT_HALF:
7079             *value = NVC365_CTRL_ACCESS_COUNTER_USE_LIMIT_HALF;
7080             break;
7081         case UVM_ACCESS_COUNTER_USE_LIMIT_FULL:
7082             *value = NVC365_CTRL_ACCESS_COUNTER_USE_LIMIT_FULL;
7083             break;
7084         default:
7085             return NV_ERR_INVALID_ARGUMENT;
7086     };
7087 
7088     return NV_OK;
7089 }
7090 
7091 NV_STATUS nvGpuOpsEnableAccessCntr(struct gpuDevice *device,
7092                                    gpuAccessCntrInfo *pAccessCntrInfo,
7093                                    gpuAccessCntrConfig *pAccessCntrConfig)
7094 {
7095     NV_STATUS status = NV_OK;
7096     NVC365_CTRL_ACCESS_CNTR_SET_CONFIG_PARAMS setConfigParams = { 0 };
7097     NVC365_CTRL_ACCESS_CNTR_BUFFER_ENABLE_PARAMS enableParams = { 0 };
7098     struct gpuSession *session = device->session;
7099     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
7100 
7101     status = getAccessCounterGranularityValue(pAccessCntrConfig->mimcGranularity, &setConfigParams.mimcGranularity);
7102     if (status != NV_OK)
7103         return status;
7104 
7105     status = getAccessCounterGranularityValue(pAccessCntrConfig->momcGranularity, &setConfigParams.momcGranularity);
7106     if (status != NV_OK)
7107         return status;
7108 
7109     status = getAccessCounterLimitValue(pAccessCntrConfig->mimcUseLimit, &setConfigParams.mimcLimit);
7110     if (status != NV_OK)
7111         return status;
7112 
7113     status = getAccessCounterLimitValue(pAccessCntrConfig->momcUseLimit, &setConfigParams.momcLimit);
7114     if (status != NV_OK)
7115         return status;
7116 
7117     setConfigParams.threshold = pAccessCntrConfig->threshold;
7118     setConfigParams.cmd = NVC365_CTRL_ACCESS_COUNTER_SET_MIMC_GRANULARITY |
7119                           NVC365_CTRL_ACCESS_COUNTER_SET_MOMC_GRANULARITY |
7120                           NVC365_CTRL_ACCESS_COUNTER_SET_MIMC_LIMIT |
7121                           NVC365_CTRL_ACCESS_COUNTER_SET_MOMC_LIMIT |
7122                           NVC365_CTRL_ACCESS_COUNTER_SET_THRESHOLD;
7123 
7124     status = pRmApi->Control(pRmApi,
7125                              session->handle,
7126                              pAccessCntrInfo->accessCntrBufferHandle,
7127                              NVC365_CTRL_CMD_ACCESS_CNTR_SET_CONFIG,
7128                              &setConfigParams,
7129                              sizeof(setConfigParams));
7130     if (status != NV_OK)
7131         return status;
7132 
7133     enableParams.intrOwnership = NVC365_CTRL_ACCESS_COUNTER_INTERRUPT_OWNERSHIP_NOT_RM;
7134     enableParams.enable        = NV_TRUE;
7135 
7136     status = pRmApi->Control(pRmApi,
7137                              session->handle,
7138                              pAccessCntrInfo->accessCntrBufferHandle,
7139                              NVC365_CTRL_CMD_ACCESS_CNTR_BUFFER_ENABLE,
7140                              &enableParams,
7141                              sizeof(enableParams));
7142     return status;
7143 }
7144 
7145 NV_STATUS nvGpuOpsDisableAccessCntr(struct gpuDevice *device,
7146                                     gpuAccessCntrInfo *pAccessCntrInfo)
7147 {
7148     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
7149     NVC365_CTRL_ACCESS_CNTR_BUFFER_ENABLE_PARAMS enableParams = { 0 };
7150 
7151     enableParams.intrOwnership = NVC365_CTRL_ACCESS_COUNTER_INTERRUPT_OWNERSHIP_RM;
7152     enableParams.enable        = NV_FALSE;
7153     return pRmApi->Control(pRmApi,
7154                            device->session->handle,
7155                            pAccessCntrInfo->accessCntrBufferHandle,
7156                            NVC365_CTRL_CMD_ACCESS_CNTR_BUFFER_ENABLE,
7157                            &enableParams,
7158                            sizeof(enableParams));
7159 }
7160 
7161 NV_STATUS nvGpuOpsDestroyAccessCntrInfo(struct gpuDevice *device,
7162                                         gpuAccessCntrInfo *pAccessCntrInfo)
7163 {
7164     gpuDeviceUnmapCpuFreeHandle(device,
7165                                 pAccessCntrInfo->accessCntrBufferHandle,
7166                                 pAccessCntrInfo->bufferAddress,
7167                                 0);
7168     portMemSet(pAccessCntrInfo, 0, sizeof(gpuAccessCntrInfo));
7169     return NV_OK;
7170 }
7171 
7172 NV_STATUS nvGpuOpsDestroyFaultInfo(struct gpuDevice *device,
7173                                    gpuFaultInfo *pFaultInfo)
7174 {
7175     NV_STATUS status = NV_OK;
7176     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
7177 
7178     if (pFaultInfo->faultBufferHandle && isDeviceVoltaPlus(device))
7179     {
7180         NVC369_CTRL_MMU_FAULT_BUFFER_UNREGISTER_NON_REPLAY_BUF_PARAMS params = {0};
7181 
7182         params.pShadowBuffer = NV_PTR_TO_NvP64(pFaultInfo->nonReplayable.shadowBufferAddress);
7183 
7184         status = pRmApi->Control(pRmApi,
7185                                  device->session->handle,
7186                                  pFaultInfo->faultBufferHandle,
7187                                  NVC369_CTRL_CMD_MMU_FAULT_BUFFER_UNREGISTER_NON_REPLAY_BUF,
7188                                  &params,
7189                                  sizeof(params));
7190         NV_ASSERT(status == NV_OK);
7191     }
7192 
7193     {
7194         gpuDeviceUnmapCpuFreeHandle(device,
7195                                     pFaultInfo->faultBufferHandle,
7196                                     pFaultInfo->replayable.bufferAddress,
7197                                     0);
7198     }
7199 
7200     portMemSet(pFaultInfo, 0, sizeof(gpuFaultInfo));
7201     return status;
7202 }
7203 
7204 NV_STATUS nvGpuOpsHasPendingNonReplayableFaults(gpuFaultInfo *pFaultInfo,
7205                                                 NvBool *hasPendingFaults)
7206 {
7207     GMMU_SHADOW_FAULT_BUF *pQueue =
7208         (GMMU_SHADOW_FAULT_BUF *) pFaultInfo->nonReplayable.shadowBufferAddress;
7209 
7210     if (!pQueue || !hasPendingFaults)
7211         return NV_ERR_INVALID_ARGUMENT;
7212 
7213     *hasPendingFaults = !queueIsEmpty(pQueue);
7214 
7215     return NV_OK;
7216 }
7217 
7218 NV_STATUS nvGpuOpsGetNonReplayableFaults(gpuFaultInfo *pFaultInfo,
7219                                          void *faultBuffer,
7220                                          NvU32 *numFaults)
7221 {
7222     GMMU_SHADOW_FAULT_BUF *pQueue =
7223         (GMMU_SHADOW_FAULT_BUF *) pFaultInfo->nonReplayable.shadowBufferAddress;
7224     QueueContext *pQueueCtx =
7225         (QueueContext *) pFaultInfo->nonReplayable.shadowBufferContext;
7226 
7227     if (!pQueue || !faultBuffer || !numFaults)
7228         return NV_ERR_INVALID_ARGUMENT;
7229 
7230     *numFaults = 0;
7231 
7232     // Copy all faults in the client shadow fault buffer to the given buffer
7233     while (queuePopAndCopyNonManaged(pQueue, pQueueCtx, faultBuffer))
7234     {
7235         ++(*numFaults);
7236         faultBuffer = (char *)faultBuffer + NVC369_BUF_SIZE;
7237     }
7238 
7239     return NV_OK;
7240 }
7241 
7242 NV_STATUS nvGpuOpsFlushReplayableFaultBuffer(struct gpuDevice *device)
7243 {
7244     NV_STATUS   status;
7245     NvHandle    hClient = device->session->handle;
7246     RsClient   *pClient;
7247     Device     *pDevice;
7248     OBJGPU     *pGpu;
7249     KernelGmmu *pKernelGmmu;
7250 
7251     status = serverGetClientUnderLock(&g_resServ, hClient, &pClient);
7252     if (status != NV_OK)
7253         return NV_ERR_INVALID_ARGUMENT;
7254 
7255     status = deviceGetByHandle(pClient, device->handle, &pDevice);
7256     if (status != NV_OK)
7257         return NV_ERR_INVALID_ARGUMENT;
7258 
7259     GPU_RES_SET_THREAD_BC_STATE(pDevice);
7260 
7261     pGpu = GPU_RES_GET_GPU(pDevice);
7262     pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
7263 
7264     return kgmmuIssueReplayableFaultBufferFlush_HAL(pGpu, pKernelGmmu);
7265 }
7266 
7267 static NV_STATUS nvGpuOpsVerifyChannel(struct gpuAddressSpace *vaSpace,
7268                                        NvHandle hClient,
7269                                        NvHandle hKernelChannel,
7270                                        OBJGPU **pGpu,
7271                                        KernelChannel **ppKernelChannel)
7272 {
7273     NV_STATUS status = NV_OK;
7274     NvHandle hDevice, hSubDevice;
7275     OBJVASPACE *pVAS = NULL;
7276     OBJGPU *pVaSpaceGpu;
7277     RsClient *pClient;
7278 
7279     NV_ASSERT_OR_RETURN(ppKernelChannel != NULL, NV_ERR_INVALID_ARGUMENT);
7280 
7281     status = serverGetClientUnderLock(&g_resServ, vaSpace->device->session->handle, &pClient);
7282     if (status != NV_OK)
7283         return status;
7284 
7285     status = vaspaceGetByHandleOrDeviceDefault(pClient,
7286                                                vaSpace->device->handle,
7287                                                vaSpace->handle,
7288                                                &pVAS);
7289     if (status != NV_OK)
7290         return status;
7291 
7292     status = CliGetKernelChannel(hClient, hKernelChannel, ppKernelChannel);
7293     if (status != NV_OK)
7294         return NV_ERR_INVALID_OBJECT_HANDLE;
7295 
7296     hDevice = RES_GET_HANDLE(GPU_RES_GET_DEVICE(*ppKernelChannel));
7297     status = CliSetGpuContext(hClient, hDevice, pGpu, NULL);
7298     if (status != NV_OK)
7299         return status;
7300 
7301     if ((*ppKernelChannel)->pVAS != pVAS)
7302     {
7303         if (CliSetGpuContext(vaSpace->device->session->handle,
7304                              vaSpace->device->handle,
7305                              &pVaSpaceGpu,
7306                              NULL) == NV_OK && pVaSpaceGpu != *pGpu)
7307             return NV_ERR_OTHER_DEVICE_FOUND;
7308 
7309         return NV_ERR_INVALID_CHANNEL;
7310     }
7311 
7312     // In SLI config, RM's internal allocations such as channel instance
7313     // are tracked with a memdesc per subdevice. Hence, Get the correct pGpu.
7314     status = CliSetSubDeviceContext(vaSpace->device->session->handle,
7315                                     vaSpace->device->subhandle,
7316                                     &hSubDevice,
7317                                     pGpu);
7318     if (status != NV_OK)
7319         return status;
7320 
7321     return NV_OK;
7322 }
7323 
7324 static NV_STATUS nvGpuOpsGetChannelEngineType(OBJGPU *pGpu,
7325                                               KernelChannel *pKernelChannel,
7326                                               UVM_GPU_CHANNEL_ENGINE_TYPE *engineType)
7327 {
7328     KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu);
7329     NvU32 engDesc;
7330     RM_ENGINE_TYPE rmEngineType;
7331     NV_STATUS status;
7332 
7333     NV_ASSERT_OR_RETURN(pKernelChannel != NULL, NV_ERR_INVALID_ARGUMENT);
7334 
7335     status = kchannelGetEngine_HAL(pGpu, pKernelChannel, &engDesc);
7336     if (status != NV_OK)
7337         return status;
7338 
7339     status = kfifoEngineInfoXlate_HAL(pGpu,
7340                                       pKernelFifo,
7341                                       ENGINE_INFO_TYPE_ENG_DESC,
7342                                       engDesc,
7343                                       ENGINE_INFO_TYPE_RM_ENGINE_TYPE,
7344                                       (NvU32 *)&rmEngineType);
7345     if (status != NV_OK)
7346         return status;
7347 
7348     if (RM_ENGINE_TYPE_IS_GR(rmEngineType))
7349         *engineType = UVM_GPU_CHANNEL_ENGINE_TYPE_GR;
7350     else if (rmEngineType == RM_ENGINE_TYPE_SEC2)
7351         *engineType = UVM_GPU_CHANNEL_ENGINE_TYPE_SEC2;
7352     else
7353         *engineType = UVM_GPU_CHANNEL_ENGINE_TYPE_CE;
7354 
7355     return NV_OK;
7356 }
7357 
7358 static void _memdescRetain(MEMORY_DESCRIPTOR *pMemDesc)
7359 {
7360     if (pMemDesc->Allocated > 0)
7361     {
7362         pMemDesc->Allocated++;
7363     }
7364 
7365     memdescAddRef(pMemDesc);
7366 }
7367 
7368 static NV_STATUS nvGpuOpsGetChannelInstanceMemInfo(gpuRetainedChannel *retainedChannel,
7369                                                    gpuChannelInstanceInfo *channelInstanceInfo)
7370 {
7371     PMEMORY_DESCRIPTOR pMemDesc = NULL;
7372     NV2080_CTRL_FIFO_MEM_INFO instanceMemInfo;
7373     NV_STATUS status;
7374     KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(retainedChannel->pGpu);
7375     KernelChannel *pKernelChannel = NULL;
7376     CHID_MGR *pChidMgr = kfifoGetChidMgr(retainedChannel->pGpu,
7377                                          pKernelFifo,
7378                                          retainedChannel->runlistId);
7379 
7380     pKernelChannel = kfifoChidMgrGetKernelChannel(retainedChannel->pGpu,
7381                                                   pKernelFifo,
7382                                                   pChidMgr,
7383                                                   channelInstanceInfo->chId);
7384     NV_CHECK_OR_RETURN(LEVEL_ERROR, pKernelChannel != NULL, NV_ERR_INVALID_CHANNEL);
7385 
7386     status = kfifoChannelGetFifoContextMemDesc_HAL(retainedChannel->pGpu,
7387                                                    pKernelFifo,
7388                                                    pKernelChannel,
7389                                                    FIFO_CTX_INST_BLOCK,
7390                                                    &pMemDesc);
7391     if (status != NV_OK)
7392         return status;
7393 
7394     pMemDesc = memdescGetMemDescFromGpu(pMemDesc, retainedChannel->pGpu);
7395 
7396     kfifoFillMemInfo(pKernelFifo, pMemDesc, &instanceMemInfo);
7397 
7398     if (instanceMemInfo.aperture == NV2080_CTRL_CMD_FIFO_GET_CHANNEL_MEM_APERTURE_INVALID)
7399         return NV_ERR_INVALID_OBJECT_HANDLE;
7400 
7401     retainedChannel->instanceMemDesc = pMemDesc;
7402     channelInstanceInfo->base = instanceMemInfo.base;
7403     channelInstanceInfo->sysmem = (instanceMemInfo.aperture != NV2080_CTRL_CMD_FIFO_GET_CHANNEL_MEM_APERTURE_VIDMEM);
7404 
7405     return NV_OK;
7406 }
7407 
7408 static NV_STATUS nvGpuOpsGetChannelTsgInfo(gpuRetainedChannel *retainedChannel,
7409                                            gpuChannelInstanceInfo *channelInstanceInfo,
7410                                            KernelChannel *pKernelChannel)
7411 {
7412     OBJGPU             *pGpu = retainedChannel->pGpu;
7413     KernelFifo         *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu);
7414     RM_API             *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
7415     KernelChannelGroup *pKernelChannelGroup;
7416     NvHandle            hDupTsg;
7417     NvU32               tsgMaxSubctxCount;
7418     NV_STATUS           status;
7419     NvBool              bLockAcquire = NV_FALSE;
7420 
7421     NV_ASSERT_OR_RETURN(pKernelChannel != NULL, NV_ERR_INVALID_ARGUMENT);
7422     pKernelChannelGroup = pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup;
7423     NV_ASSERT_OR_RETURN(pKernelChannelGroup != NULL, NV_ERR_INVALID_STATE);
7424 
7425     if (rmGpuLockIsOwner())
7426     {
7427         rmGpuLocksRelease(GPUS_LOCK_FLAGS_NONE, NULL);
7428         bLockAcquire = NV_TRUE;
7429     }
7430     // Take a reference on the TSG ID by duping the TSG. Note that this is
7431     // the easy way out because we dup more than strictly necessary: every
7432     // channel registered under the same TSG will re-dup that TSG. In
7433     // practice there's very little overhead to re-duping the TSG for each
7434     // channel.
7435     hDupTsg = NV01_NULL_OBJECT;
7436     status  = pRmApi->DupObject(pRmApi,
7437         retainedChannel->session->handle,
7438         retainedChannel->rmDevice->deviceHandle,
7439         &hDupTsg,
7440         RES_GET_CLIENT_HANDLE(pKernelChannel),
7441         RES_GET_HANDLE(pKernelChannel->pKernelChannelGroupApi),
7442         NV04_DUP_HANDLE_FLAGS_REJECT_KERNEL_DUP_PRIVILEGE);
7443     if (status != NV_OK)
7444     {
7445         if (bLockAcquire)
7446         {
7447             NV_ASSERT_OK_OR_RETURN(rmGpuLocksAcquire(GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_GPU_OPS));
7448         }
7449         return status;
7450     }
7451 
7452     if (bLockAcquire)
7453     {
7454         if ((status = rmGpuLocksAcquire(GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_GPU_OPS)) != NV_OK)
7455         {
7456             pRmApi->Free(pRmApi, retainedChannel->session->handle, hDupTsg);
7457             return status;
7458         }
7459     }
7460 
7461     tsgMaxSubctxCount = kfifoChannelGroupGetLocalMaxSubcontext_HAL(
7462         pGpu, pKernelFifo,
7463         pKernelChannelGroup,
7464         pKernelChannelGroup->bLegacyMode);
7465 
7466     channelInstanceInfo->bTsgChannel = NV_TRUE;
7467     channelInstanceInfo->tsgId = pKernelChannelGroup->grpID;
7468     channelInstanceInfo->tsgMaxSubctxCount = tsgMaxSubctxCount;
7469 
7470     retainedChannel->hDupTsg = hDupTsg;
7471 
7472     return NV_OK;
7473 }
7474 
7475 
7476 static NV_STATUS nvGpuOpsGetChannelSmcInfo(gpuRetainedChannel *retainedChannel,
7477                                            gpuChannelInstanceInfo *channelInstanceInfo,
7478                                            KernelChannel *pKernelChannel,
7479                                            struct gpuDevice *device)
7480 {
7481     channelInstanceInfo->smcEngineId         = 0;
7482     channelInstanceInfo->smcEngineVeIdOffset = 0;
7483 
7484     NV_ASSERT_OR_RETURN(pKernelChannel != NULL, NV_ERR_INVALID_ARGUMENT);
7485 
7486     if (isDeviceAmperePlus(device) && retainedChannel->channelEngineType == UVM_GPU_CHANNEL_ENGINE_TYPE_GR)
7487     {
7488         OBJGPU *pGpu = retainedChannel->pGpu;
7489 
7490         if (IS_MIG_IN_USE(pGpu))
7491         {
7492             NvU32 grFaultId;
7493             NvU32 grMmuFaultEngId;
7494 
7495             const NvU32 grIdx = RM_ENGINE_TYPE_GR_IDX(kchannelGetEngineType(pKernelChannel));
7496 
7497             NV_ASSERT_OK_OR_RETURN(kfifoEngineInfoXlate_HAL(pGpu,
7498                                                             GPU_GET_KERNEL_FIFO(pGpu),
7499                                                             ENGINE_INFO_TYPE_ENG_DESC,
7500                                                             ENG_GR(grIdx),
7501                                                             ENGINE_INFO_TYPE_MMU_FAULT_ID,
7502                                                             &grFaultId));
7503 
7504             grMmuFaultEngId = kgmmuGetGraphicsEngineId_HAL(GPU_GET_KERNEL_GMMU(pGpu));
7505             NV_ASSERT(grFaultId >= grMmuFaultEngId);
7506 
7507             channelInstanceInfo->smcEngineId         = grIdx;
7508             channelInstanceInfo->smcEngineVeIdOffset = grFaultId - grMmuFaultEngId;
7509         }
7510     }
7511 
7512     return NV_OK;
7513 }
7514 
7515 
7516 static void nvGpuOpsGetChannelSubctxInfo(gpuRetainedChannel *retainedChannel,
7517                                          gpuChannelInstanceInfo *channelInstanceInfo,
7518                                          KernelChannel *pKernelChannel)
7519 {
7520     OBJGPU *pGpu = retainedChannel->pGpu;
7521     KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu);
7522     NvHandle hDupKernelCtxShare = NV01_NULL_OBJECT;
7523     RM_API *pRmApi;
7524     NV_STATUS status = NV_OK;
7525 
7526     NV_ASSERT_OR_RETURN_VOID(pKernelChannel != NULL);
7527 
7528     pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
7529 
7530     // Subcontexts are parented by the TSG, so we must have a reference on the
7531     // TSG in order to retain the subcontext. The exception is if this channel
7532     // was allocated without a TSG, in which case RM creates an internal TSG and
7533     // subcontext which we shouldn't attempt to retain. In that case, we will
7534     // have skipped duping the TSG earlier and hDupTsg == 0.
7535     //
7536     // pKernelChannelGroup->bLegacyMode means that the subcontext was
7537     // created internally by RM, not by the user.
7538     if (kfifoIsSubcontextSupported(pKernelFifo) &&
7539         pKernelChannel->pKernelCtxShareApi &&
7540         retainedChannel->channelEngineType == UVM_GPU_CHANNEL_ENGINE_TYPE_GR &&
7541         retainedChannel->hDupTsg &&
7542         !pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->bLegacyMode)
7543     {
7544 
7545         status = pRmApi->DupObject(pRmApi,
7546                                    retainedChannel->session->handle,
7547                                    retainedChannel->hDupTsg,
7548                                    &hDupKernelCtxShare,
7549                                    RES_GET_CLIENT_HANDLE(pKernelChannel),
7550                                    RES_GET_HANDLE(pKernelChannel->pKernelCtxShareApi),
7551                                    NV04_DUP_HANDLE_FLAGS_REJECT_KERNEL_DUP_PRIVILEGE);
7552 
7553         NV_ASSERT(status == NV_OK);
7554         retainedChannel->hDupKernelCtxShare = hDupKernelCtxShare;
7555 
7556         // Faults report the VEID (aka subcontext ID), so we need to retain the
7557         // subcontext ID. We do that by taking a reference on the entire
7558         // subcontext object.
7559         //
7560         // pKernelCtxShare->pShareData is a pointer to the broadcast kctxshare data object
7561         // We get VEID for this retained channel's GPU through that.
7562 
7563         // Possibly better to go through the handle qw just duped for this? Nor sure how to do so.
7564         channelInstanceInfo->subctxId = pKernelChannel->pKernelCtxShareApi->pShareData->subctxId;
7565         channelInstanceInfo->bInSubctx = NV_TRUE;
7566 
7567         // Make sure that we saw our GPU
7568         NV_ASSERT(channelInstanceInfo->bInSubctx);
7569         NV_ASSERT(channelInstanceInfo->subctxId < channelInstanceInfo->tsgMaxSubctxCount);
7570     }
7571     else
7572     {
7573         channelInstanceInfo->subctxId = 0;
7574         channelInstanceInfo->bInSubctx = NV_FALSE;
7575     }
7576 }
7577 
7578 // This function verifies that the instance pointer of the retainedChannel still
7579 // refers to a valid channel.
7580 static NV_STATUS nvGpuOpsGetChannelData(gpuRetainedChannel *retainedChannel,
7581                                         KernelChannel **ppKernelChannel)
7582 {
7583     NV2080_CTRL_FIFO_MEM_INFO instanceMemInfo;
7584     INST_BLOCK_DESC inst;
7585     KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(retainedChannel->pGpu);
7586 
7587     kfifoFillMemInfo(pKernelFifo, retainedChannel->instanceMemDesc, &instanceMemInfo);
7588 
7589     switch (instanceMemInfo.aperture)
7590     {
7591         case NV2080_CTRL_CMD_FIFO_GET_CHANNEL_MEM_APERTURE_SYSMEM_COH:
7592             inst.aperture = NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY;
7593             break;
7594         case NV2080_CTRL_CMD_FIFO_GET_CHANNEL_MEM_APERTURE_SYSMEM_NCOH:
7595             inst.aperture = NV_MMU_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY;
7596             break;
7597         case NV2080_CTRL_CMD_FIFO_GET_CHANNEL_MEM_APERTURE_VIDMEM:
7598             inst.aperture = NV_MMU_PTE_APERTURE_VIDEO_MEMORY;
7599             break;
7600         default:
7601             return NV_ERR_INVALID_CHANNEL;
7602     }
7603 
7604     inst.address = instanceMemInfo.base;
7605     inst.gfid = GPU_GFID_PF;      // Run in VF context w/o GFID
7606 
7607     return kfifoConvertInstToKernelChannel_HAL(retainedChannel->pGpu,
7608                                               pKernelFifo,
7609                                               &inst,
7610                                               ppKernelChannel);
7611 }
7612 
7613 NV_STATUS nvGpuOpsRetainChannel(struct gpuAddressSpace *vaSpace,
7614                                 NvHandle hClient,
7615                                 NvHandle hKernelChannel,
7616                                 gpuRetainedChannel **retainedChannel,
7617                                 gpuChannelInstanceInfo *channelInstanceInfo)
7618 {
7619     nvGpuOpsLockSet acquiredLocks;
7620     THREAD_STATE_NODE threadState;
7621     KernelChannel *pKernelChannel = NULL;
7622     OBJGPU *pGpu = NULL;
7623     gpuRetainedChannel *channel = NULL;
7624     NV_STATUS status = NV_OK;
7625     struct gpuDevice *device;
7626     subDeviceDesc *rmSubDevice;
7627     NVC36F_CTRL_CMD_GPFIFO_GET_WORK_SUBMIT_TOKEN_PARAMS params = {0};
7628     NV_UVM_CHANNEL_RETAINER_ALLOC_PARAMS channelRetainerParams = {0};
7629     RM_API *pRmApi = NULL;
7630     NvHandle hChannelParent = 0;
7631 
7632     if (!vaSpace || !channelInstanceInfo)
7633         return NV_ERR_INVALID_ARGUMENT;
7634 
7635     threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
7636     status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ,
7637                                       hClient,
7638                                       NULL,
7639                                       &acquiredLocks);
7640     if (status != NV_OK)
7641     {
7642         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
7643         return status;
7644     }
7645 
7646     device = vaSpace->device;
7647     rmSubDevice = device->rmSubDevice;
7648 
7649     status = nvGpuOpsVerifyChannel(vaSpace, hClient, hKernelChannel, &pGpu,
7650                                    &pKernelChannel);
7651     if (status != NV_OK)
7652     {
7653         _nvGpuOpsLocksRelease(&acquiredLocks);
7654         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
7655         return status;
7656     }
7657 
7658     portMemSet(channelInstanceInfo, 0, sizeof(*channelInstanceInfo));
7659 
7660     channel = portMemAllocNonPaged(sizeof(*channel));
7661     if (channel == NULL)
7662     {
7663         status = NV_ERR_NO_MEMORY;
7664         _nvGpuOpsLocksRelease(&acquiredLocks);
7665         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
7666         return status;
7667     }
7668 
7669     portMemSet(channel, 0, sizeof(*channel));
7670     channel->device = device;
7671     channel->rmDevice = device->rmDevice;
7672     channel->rmSubDevice = rmSubDevice;
7673     channel->session = device->session;
7674     channel->pGpu = pGpu;
7675 
7676     channelInstanceInfo->runlistId = kchannelGetRunlistId(pKernelChannel);
7677     channelInstanceInfo->chId = pKernelChannel->ChID;
7678     channel->chId = pKernelChannel->ChID;
7679     channel->runlistId = kchannelGetRunlistId(pKernelChannel);
7680 
7681     status = nvGpuOpsGetChannelEngineType(pGpu, pKernelChannel, &channel->channelEngineType);
7682     if (status != NV_OK)
7683         goto error;
7684 
7685     status = nvGpuOpsGetChannelInstanceMemInfo(channel, channelInstanceInfo);
7686     if (status != NV_OK)
7687         goto error;
7688 
7689     status = nvGpuOpsGetChannelTsgInfo(channel, channelInstanceInfo,
7690                                        pKernelChannel);
7691     if (status != NV_OK)
7692         goto error;
7693 
7694     status = nvGpuOpsGetChannelSmcInfo(channel, channelInstanceInfo,
7695                                        pKernelChannel, device);
7696     if (status != NV_OK)
7697         goto error;
7698 
7699     nvGpuOpsGetChannelSubctxInfo(channel, channelInstanceInfo, pKernelChannel);
7700 
7701     pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
7702 
7703     if (channelInstanceInfo->bTsgChannel)
7704         hChannelParent = channel->hDupTsg;
7705     else
7706         hChannelParent = channel->rmDevice->deviceHandle;
7707 
7708     channelRetainerParams.hClient = hClient;
7709     channelRetainerParams.hChannel = hKernelChannel;
7710 
7711     NV_PRINTF(LEVEL_INFO, "%s:Channel duping is not supported. Fall back to UVM_CHANNEL_RETAINER\n",
7712               __FUNCTION__);
7713 
7714     status = pRmApi->Alloc(pRmApi,
7715                            device->session->handle,
7716                            hChannelParent,
7717                           &channel->hChannelRetainer,
7718                            UVM_CHANNEL_RETAINER,
7719                           &channelRetainerParams);
7720     if (status != NV_OK)
7721         goto error;
7722 
7723     // Now get the token for submission on given channel.
7724     status = pRmApi->Control(pRmApi,
7725                              hClient,
7726                              hKernelChannel,
7727                              NVC36F_CTRL_CMD_GPFIFO_GET_WORK_SUBMIT_TOKEN,
7728                              &params,
7729                              sizeof(params));
7730 
7731     if (status != NV_OK)
7732     {
7733         NV_PRINTF(LEVEL_ERROR, "%s:%d: %s\n", __FUNCTION__,
7734                   __LINE__, nvstatusToString(status));
7735         goto error;
7736     }
7737 
7738     // In Turing+ gpus, the CLEAR_FAULTED method requires a RM-provided handle
7739     // to identify the channel.
7740     //
7741     // TODO: Bug 1905719: We are currently using the channel handle that is
7742     // used for the work submission usermode doorbell mechanism. However, the
7743     // values may differ in the future, so we may need a dedicated API to get
7744     // the channel handle for CLEAR_FAULTED in RM.
7745     channelInstanceInfo->clearFaultedToken = params.workSubmitToken;
7746 
7747     if (isDeviceAmperePlus(device))
7748     {
7749         void *bar0Mapping = gpuBar0BaseAddress(pGpu);
7750         NvU32 chramPri;
7751         NvU32 runlistPri;
7752 
7753         NV_ASSERT_OK_OR_GOTO(status, kfifoEngineInfoXlate_HAL(pGpu,
7754                                                               GPU_GET_KERNEL_FIFO(pGpu),
7755                                                               ENGINE_INFO_TYPE_RUNLIST,
7756                                                               kchannelGetRunlistId(pKernelChannel),
7757                                                               ENGINE_INFO_TYPE_CHRAM_PRI_BASE,
7758                                                               &chramPri), error);
7759 
7760         chramPri += NV_CHRAM_CHANNEL(pKernelChannel->ChID);
7761 
7762         channelInstanceInfo->pChramChannelRegister = (NvU32 *)((NvU8*)bar0Mapping + chramPri);
7763 
7764         NV_ASSERT_OK_OR_GOTO(status, kfifoEngineInfoXlate_HAL(pGpu,
7765                                                               GPU_GET_KERNEL_FIFO(pGpu),
7766                                                               ENGINE_INFO_TYPE_RUNLIST,
7767                                                               kchannelGetRunlistId(pKernelChannel),
7768                                                               ENGINE_INFO_TYPE_RUNLIST_PRI_BASE,
7769                                                               &runlistPri), error);
7770 
7771         channelInstanceInfo->pRunlistPRIBaseRegister = (NvU32 *)((NvU8*)bar0Mapping + runlistPri);
7772     }
7773 
7774     status = _nvGpuOpsRetainChannelResources(device,
7775                                              hClient,
7776                                              hKernelChannel,
7777                                              channel,
7778                                              channelInstanceInfo);
7779     if (status != NV_OK)
7780     {
7781         NV_PRINTF(LEVEL_ERROR, "%s:%d: %s\n", __FUNCTION__,
7782                   __LINE__, nvstatusToString(status));
7783         goto error;
7784     }
7785 
7786     channelInstanceInfo->channelEngineType = channel->channelEngineType;
7787     *retainedChannel = channel;
7788 
7789     _nvGpuOpsLocksRelease(&acquiredLocks);
7790     threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
7791     return NV_OK;
7792 
7793 error:
7794     _nvGpuOpsReleaseChannel(channel);
7795     _nvGpuOpsLocksRelease(&acquiredLocks);
7796     threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
7797     return status;
7798 }
7799 
7800 static void _nvGpuOpsReleaseChannel(gpuRetainedChannel *retainedChannel)
7801 {
7802     NV_STATUS status = NV_OK;
7803     struct gpuSession *session;
7804     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
7805 
7806     if (!retainedChannel)
7807         return;
7808 
7809     _nvGpuOpsReleaseChannelResources(retainedChannel);
7810 
7811     session = retainedChannel->session;
7812     NV_ASSERT(session);
7813 
7814     if (retainedChannel->hChannelRetainer)
7815     {
7816         status = pRmApi->Free(pRmApi, session->handle, retainedChannel->hChannelRetainer);
7817         NV_ASSERT(status == NV_OK);
7818     }
7819 
7820     // Release the subcontext if we retained it. Subcontexts are parented by the
7821     // TSG, so we must release the subcontext before releasing the TSG.
7822     if (retainedChannel->hDupKernelCtxShare)
7823     {
7824         NV_ASSERT(retainedChannel->hDupTsg);
7825         status = pRmApi->Free(pRmApi, session->handle, retainedChannel->hDupKernelCtxShare);
7826         NV_ASSERT(status == NV_OK);
7827     }
7828 
7829     if (retainedChannel->hDupTsg)
7830     {
7831         status = pRmApi->Free(pRmApi, session->handle, retainedChannel->hDupTsg);
7832         NV_ASSERT(status == NV_OK);
7833     }
7834 
7835 
7836     // Releasing the channel ID can only fail if the ID is no longer valid,
7837     // which indicates a bug elsewhere.
7838     NV_ASSERT(status == NV_OK);
7839 
7840     portMemFree(retainedChannel);
7841 }
7842 
7843 void nvGpuOpsReleaseChannel(gpuRetainedChannel *retainedChannel)
7844 {
7845     nvGpuOpsLockSet acquiredLocks;
7846     THREAD_STATE_NODE threadState;
7847     threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
7848     // TODO can we lock fewer GPUS with Channel information?
7849     if (_nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ,
7850                                  retainedChannel->session->handle,
7851                                  NULL,
7852                                  &acquiredLocks) != NV_OK)
7853     {
7854         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
7855         return;
7856     }
7857     _nvGpuOpsReleaseChannel(retainedChannel);
7858     _nvGpuOpsLocksRelease(&acquiredLocks);
7859     threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
7860 }
7861 
7862 static void
7863 _shadowMemdescDestroy(gpuRetainedChannel *retainedChannel,
7864                       MEMORY_DESCRIPTOR *pMemDesc)
7865 {
7866     if (pMemDesc->RefCount == 1)
7867     {
7868         mapRemoveByKey(&retainedChannel->device->kern2PhysDescrMap, (NvU64) pMemDesc);
7869     }
7870 
7871     memdescDestroy(pMemDesc);
7872 }
7873 
7874 NvBool _memDescFindAndRetain(gpuRetainedChannel *retainedChannel,
7875                              MEMORY_DESCRIPTOR *pBufferHandle,
7876                              MEMORY_DESCRIPTOR **ppMemDesc)
7877 {
7878     MEMORY_DESCRIPTOR *pMemDesc = NULL;
7879     MemdescMapIter iter = mapIterAll(&retainedChannel->device->kern2PhysDescrMap);
7880     while (mapIterNext(&iter))
7881     {
7882         MEMORY_DESCRIPTOR **ppValue = iter.pValue;
7883         if (pBufferHandle == *ppValue)
7884         {
7885             NvU64 key = mapKey(&retainedChannel->device->kern2PhysDescrMap, ppValue);
7886             pMemDesc = (MEMORY_DESCRIPTOR *) key;
7887             break;
7888         }
7889     }
7890 
7891     if (pMemDesc != NULL)
7892     {
7893         _memdescRetain(pMemDesc);
7894         *ppMemDesc = pMemDesc;
7895         return NV_TRUE;
7896     }
7897     return NV_FALSE;
7898 }
7899 
7900 static NV_STATUS
7901 _shadowMemdescCreateFlcn(gpuRetainedChannel *retainedChannel,
7902                      NV2080_CTRL_FLCN_GET_CTX_BUFFER_INFO_PARAMS *pCtxBufferInfo,
7903                      MEMORY_DESCRIPTOR **ppMemDesc)
7904 {
7905     MEMORY_DESCRIPTOR *pMemDesc = NULL;
7906     MEMORY_DESCRIPTOR *pBufferHandle = (MEMORY_DESCRIPTOR *) pCtxBufferInfo->bufferHandle;
7907     NV_STATUS status = NV_OK;
7908 
7909     NV_ASSERT_OR_RETURN(pCtxBufferInfo->bIsContigous, NV_ERR_INVALID_STATE);
7910 
7911     if (_memDescFindAndRetain(retainedChannel, pBufferHandle, ppMemDesc))
7912         return status;
7913 
7914     status = memdescCreate(&pMemDesc,
7915         retainedChannel->pGpu,
7916         pCtxBufferInfo->size,
7917         pCtxBufferInfo->alignment,
7918         pCtxBufferInfo->bIsContigous,
7919         pCtxBufferInfo->aperture,
7920         NV_MEMORY_CACHED,
7921         MEMDESC_FLAGS_NONE
7922     );
7923     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, status);
7924 
7925     memdescSetPageSize(pMemDesc, 0, pCtxBufferInfo->pageSize);
7926 
7927     memdescDescribe(pMemDesc, pCtxBufferInfo->aperture, pCtxBufferInfo->physAddr, pCtxBufferInfo->size);
7928 
7929     (void) mapInsertValue(&retainedChannel->device->kern2PhysDescrMap,
7930                           (NvU64) pMemDesc,
7931                           &pBufferHandle);
7932     *ppMemDesc = pMemDesc;
7933 
7934     return status;
7935 }
7936 
7937 
7938 static NV_STATUS
7939 _shadowMemdescCreate(gpuRetainedChannel *retainedChannel,
7940                      NV2080_CTRL_GR_CTX_BUFFER_INFO *pCtxBufferInfo,
7941                      MEMORY_DESCRIPTOR **ppMemDesc)
7942 {
7943     NvU32 j;
7944     NvU64 pageSize = pCtxBufferInfo->pageSize;
7945     NvU32 numBufferPages = NV_ROUNDUP(pCtxBufferInfo->size, pageSize) / pageSize;
7946     MEMORY_DESCRIPTOR *pMemDesc = NULL;
7947     MEMORY_DESCRIPTOR *pBufferHandle = (MEMORY_DESCRIPTOR *) pCtxBufferInfo->bufferHandle;
7948     NV2080_CTRL_KGR_GET_CTX_BUFFER_PTES_PARAMS *pParams = NULL;
7949     NvU64 *pPages = NULL;
7950     NV_STATUS status = NV_OK;
7951     KernelChannel *pKernelChannel;
7952     RM_API *pRmApi;
7953 
7954     if (_memDescFindAndRetain(retainedChannel, pBufferHandle, ppMemDesc))
7955         goto done;
7956 
7957     pPages = portMemAllocNonPaged(sizeof(*pPages) * numBufferPages);
7958     if (pPages == NULL)
7959     {
7960         status = NV_ERR_NO_MEMORY;
7961         goto done;
7962     }
7963 
7964     status = nvGpuOpsGetChannelData(retainedChannel, &pKernelChannel);
7965     if (status != NV_OK)
7966     {
7967         goto done;
7968     }
7969 
7970     pParams = portMemAllocNonPaged(sizeof(*pParams));
7971     if (pParams == NULL)
7972     {
7973         status = NV_ERR_NO_MEMORY;
7974         goto done;
7975     }
7976 
7977     portMemSet(pParams, 0, sizeof(*pParams));
7978 
7979     pParams->hUserClient = RES_GET_CLIENT_HANDLE(pKernelChannel);
7980     pParams->hChannel = RES_GET_HANDLE(pKernelChannel);
7981     pParams->bufferType = pCtxBufferInfo->bufferType;
7982 
7983     pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
7984 
7985     for (j = 0; j < numBufferPages;)
7986     {
7987         pParams->firstPage = j;
7988         status = pRmApi->Control(pRmApi,
7989                                  retainedChannel->session->handle,
7990                                  retainedChannel->rmSubDevice->subDeviceHandle,
7991                                  NV2080_CTRL_CMD_KGR_GET_CTX_BUFFER_PTES,
7992                                  pParams,
7993                                  sizeof(*pParams));
7994         if (status != NV_OK)
7995         {
7996             goto done;
7997         }
7998 
7999         NV_ASSERT(j + pParams->numPages <= numBufferPages);
8000 
8001         if (pCtxBufferInfo->bIsContigous)
8002         {
8003             pPages[0] = (NvU64)pParams->physAddrs[0];
8004             break;
8005         }
8006 
8007         portMemCopy(&pPages[j], pParams->numPages * sizeof(*pPages),
8008                     pParams->physAddrs, pParams->numPages * sizeof(*pPages));
8009         j += pParams->numPages;
8010     }
8011 
8012     NV_ASSERT(pParams->bNoMorePages);
8013 
8014     status = memdescCreate(&pMemDesc,
8015         retainedChannel->pGpu,
8016         pCtxBufferInfo->size,
8017         pCtxBufferInfo->alignment,
8018         pCtxBufferInfo->bIsContigous,
8019         pCtxBufferInfo->aperture,
8020         NV_MEMORY_CACHED,
8021         MEMDESC_FLAGS_NONE
8022     );
8023     if (status != NV_OK)
8024     {
8025         goto done;
8026     }
8027 
8028 
8029     memdescSetPageSize(pMemDesc, 0, pCtxBufferInfo->pageSize);
8030 
8031     if (pCtxBufferInfo->bIsContigous)
8032     {
8033         memdescDescribe(pMemDesc, pCtxBufferInfo->aperture, pPages[0], pCtxBufferInfo->size);
8034     }
8035     else
8036     {
8037         memdescFillPages(pMemDesc, 0, pPages, numBufferPages, pCtxBufferInfo->pageSize);
8038     }
8039 
8040     (void) mapInsertValue(&retainedChannel->device->kern2PhysDescrMap,
8041                           (NvU64) pMemDesc,
8042                           &pBufferHandle);
8043     *ppMemDesc = pMemDesc;
8044 
8045 done:
8046     portMemFree(pParams);
8047     portMemFree(pPages);
8048     return status;
8049 }
8050 
8051 static NV_STATUS _nvGpuOpsRetainChannelResources(struct gpuDevice *device,
8052                                                  NvHandle hClient,
8053                                                  NvHandle hKernelChannel,
8054                                                  gpuRetainedChannel *retainedChannel,
8055                                                  gpuChannelInstanceInfo *channelInstanceInfo)
8056 {
8057     NV_STATUS status = NV_OK;
8058     NV2080_CTRL_GR_GET_CTX_BUFFER_INFO_PARAMS *pParams = NULL;
8059     NV2080_CTRL_FLCN_GET_CTX_BUFFER_INFO_PARAMS *pFlcnParams = NULL;
8060     gpuChannelResourceInfo *channelResourceInfo = channelInstanceInfo->resourceInfo;
8061     KernelChannel *pKernelChannel;
8062     RM_API *pRmApi;
8063     NvU32 channelEngineType = retainedChannel->channelEngineType;
8064     NvU32 i;
8065     NvU32 j;
8066 
8067     NV_ASSERT(channelEngineType == UVM_GPU_CHANNEL_ENGINE_TYPE_CE ||
8068               channelEngineType == UVM_GPU_CHANNEL_ENGINE_TYPE_GR ||
8069               channelEngineType == UVM_GPU_CHANNEL_ENGINE_TYPE_SEC2);
8070 
8071     // CE channels have 0 resources, so they skip this step
8072     if (channelEngineType == UVM_GPU_CHANNEL_ENGINE_TYPE_CE)
8073     {
8074         goto done;
8075     }
8076 
8077     status = nvGpuOpsGetChannelData(retainedChannel, &pKernelChannel);
8078     if (status != NV_OK)
8079     {
8080         goto done;
8081     }
8082 
8083     if (channelEngineType == UVM_GPU_CHANNEL_ENGINE_TYPE_SEC2)
8084     {
8085         // get engine context memdesc, then get its PTEs.
8086         MEMORY_DESCRIPTOR *pMemDesc = NULL;
8087 
8088         // single buffer
8089         NV_ASSERT_OR_GOTO(NV_ARRAY_ELEMENTS(channelInstanceInfo->resourceInfo) >= 1, done);
8090 
8091         pFlcnParams = portMemAllocNonPaged(sizeof(*pFlcnParams));
8092         if (pFlcnParams == NULL)
8093         {
8094             status = NV_ERR_NO_MEMORY;
8095             goto done;
8096         }
8097         pFlcnParams->hUserClient = RES_GET_CLIENT_HANDLE(pKernelChannel);
8098         pFlcnParams->hChannel = RES_GET_HANDLE(pKernelChannel);
8099 
8100         pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
8101 
8102         // This RM CTRL refcounts all the resource memdescs.
8103         status = pRmApi->Control(pRmApi,
8104                                  retainedChannel->session->handle,
8105                                  retainedChannel->rmSubDevice->subDeviceHandle,
8106                                  NV2080_CTRL_CMD_FLCN_GET_CTX_BUFFER_INFO,
8107                                  pFlcnParams,
8108                                  sizeof(*pFlcnParams));
8109         if (status != NV_OK)
8110             goto done;
8111 
8112         gpuMemoryInfo *pGpuMemoryInfo = &channelResourceInfo[0].resourceInfo;
8113 
8114         channelResourceInfo[0].resourceDescriptor = pFlcnParams->bufferHandle;
8115         channelResourceInfo[0].alignment = pFlcnParams->alignment;
8116         pGpuMemoryInfo->pageSize = pFlcnParams->pageSize;
8117         pGpuMemoryInfo->size = pFlcnParams->size;
8118         pGpuMemoryInfo->contig = pFlcnParams->bIsContigous;
8119         pGpuMemoryInfo->physAddr = pFlcnParams->physAddr;
8120         pGpuMemoryInfo->kind = pFlcnParams->kind;
8121         pGpuMemoryInfo->sysmem = pFlcnParams->aperture == ADDR_SYSMEM;
8122         pGpuMemoryInfo->deviceDescendant = pFlcnParams->bDeviceDescendant;
8123 
8124         portMemCopy(pGpuMemoryInfo->uuid.uuid, sizeof(pGpuMemoryInfo->uuid.uuid),
8125             pFlcnParams->uuid, sizeof(pFlcnParams->uuid));
8126 
8127         status = _shadowMemdescCreateFlcn(retainedChannel, pFlcnParams, &pMemDesc);
8128         if (status != NV_OK)
8129             goto done;
8130 
8131         channelResourceInfo[0].resourceDescriptor = (NvP64) pMemDesc;
8132         retainedChannel->resourceMemDesc[0] =  pMemDesc;
8133 
8134         channelInstanceInfo->resourceCount = 1;
8135         retainedChannel->resourceCount = 1;
8136         goto done;
8137     }
8138 
8139     pParams = portMemAllocNonPaged(sizeof(*pParams));
8140     if (pParams == NULL)
8141     {
8142         status = NV_ERR_NO_MEMORY;
8143         goto done;
8144     }
8145 
8146     pParams->hUserClient = RES_GET_CLIENT_HANDLE(pKernelChannel);
8147     pParams->hChannel = RES_GET_HANDLE(pKernelChannel);
8148 
8149     pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
8150 
8151     // This RM CTRL refcounts all the resource memdescs.
8152     status = pRmApi->Control(pRmApi,
8153                              retainedChannel->session->handle,
8154                              retainedChannel->rmSubDevice->subDeviceHandle,
8155                              NV2080_CTRL_CMD_GR_GET_CTX_BUFFER_INFO,
8156                              pParams,
8157                              sizeof(*pParams));
8158     if (status != NV_OK)
8159         goto done;
8160 
8161     NV_ASSERT(pParams->bufferCount <= NV_ARRAY_ELEMENTS(channelInstanceInfo->resourceInfo));
8162 
8163     for (i = 0; i < pParams->bufferCount; i++)
8164     {
8165         MEMORY_DESCRIPTOR *pMemDesc = NULL;
8166         NV2080_CTRL_GR_CTX_BUFFER_INFO *pCtxBufferInfo = &pParams->ctxBufferInfo[i];
8167         gpuMemoryInfo *pGpuMemoryInfo = &channelResourceInfo[i].resourceInfo;
8168 
8169         channelResourceInfo[i].resourceDescriptor = pCtxBufferInfo->bufferHandle;
8170         channelResourceInfo[i].resourceId = pCtxBufferInfo->bufferType;
8171         channelResourceInfo[i].alignment = pCtxBufferInfo->alignment;
8172         pGpuMemoryInfo->pageSize = pCtxBufferInfo->pageSize;
8173         pGpuMemoryInfo->size = pCtxBufferInfo->size;
8174         pGpuMemoryInfo->contig = pCtxBufferInfo->bIsContigous;
8175         pGpuMemoryInfo->physAddr = pCtxBufferInfo->physAddr;
8176         pGpuMemoryInfo->kind = pCtxBufferInfo->kind;
8177         pGpuMemoryInfo->sysmem = pCtxBufferInfo->aperture == ADDR_SYSMEM;
8178         pGpuMemoryInfo->deviceDescendant = pCtxBufferInfo->bDeviceDescendant;
8179 
8180         portMemCopy(pGpuMemoryInfo->uuid.uuid, sizeof(pGpuMemoryInfo->uuid.uuid),
8181                     pCtxBufferInfo->uuid, sizeof(pCtxBufferInfo->uuid));
8182 
8183         status = _shadowMemdescCreate(retainedChannel, pCtxBufferInfo, &pMemDesc);
8184         if (status != NV_OK)
8185             goto cleanup;
8186 
8187         channelResourceInfo[i].resourceDescriptor = (NvP64) pMemDesc;
8188         retainedChannel->resourceMemDesc[i] =  pMemDesc;
8189     }
8190 
8191     channelInstanceInfo->resourceCount = pParams->bufferCount;
8192     retainedChannel->resourceCount = pParams->bufferCount;
8193 
8194 cleanup:
8195     if (status != NV_OK)
8196     {
8197         for (j = 0; j < i; j++)
8198         {
8199             _shadowMemdescDestroy(retainedChannel, retainedChannel->resourceMemDesc[j]);
8200         }
8201     }
8202 
8203 done:
8204     portMemFree(pParams);
8205     portMemFree(pFlcnParams);
8206     return status;
8207 }
8208 
8209 static void _nvGpuOpsReleaseChannelResources(gpuRetainedChannel *retainedChannel)
8210 {
8211     NvU32 i;
8212     NvU32 descriptorCount = retainedChannel->resourceCount;
8213 
8214     for (i = 0; i < descriptorCount; i++)
8215     {
8216         MEMORY_DESCRIPTOR *pMemDesc = retainedChannel->resourceMemDesc[i];
8217 
8218         _shadowMemdescDestroy(retainedChannel, pMemDesc);
8219     }
8220 }
8221 
8222 NV_STATUS nvGpuOpsGetChannelResourcePtes(struct gpuAddressSpace *vaSpace,
8223                                          NvP64 resourceDescriptor,
8224                                          NvU64 offset,
8225                                          NvU64 size,
8226                                          gpuExternalMappingInfo *pGpuExternalMappingInfo)
8227 {
8228     NV_STATUS status = NV_OK;
8229     nvGpuOpsLockSet acquiredLocks;
8230     THREAD_STATE_NODE threadState;
8231     NvHandle hSubDevice;
8232     PMEMORY_DESCRIPTOR pMemDesc = NULL;
8233     OBJGPU *pMappingGpu = NULL;
8234     OBJVASPACE *pVAS = NULL;
8235     RsClient *pClient;
8236 
8237     if (!vaSpace || !resourceDescriptor || !pGpuExternalMappingInfo)
8238         return NV_ERR_INVALID_ARGUMENT;
8239 
8240     if (pGpuExternalMappingInfo->mappingPageSize != 0)
8241     {
8242         return NV_ERR_NOT_SUPPORTED;
8243     }
8244 
8245     threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
8246     status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ,
8247                                       vaSpace->device->session->handle,
8248                                       NULL,
8249                                       &acquiredLocks);
8250     if (status != NV_OK)
8251     {
8252         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
8253         return status;
8254     }
8255 
8256     pMemDesc = (MEMORY_DESCRIPTOR *) NvP64_VALUE(resourceDescriptor);
8257 
8258     status = CliSetSubDeviceContext(vaSpace->device->session->handle,
8259                                     vaSpace->device->subhandle,
8260                                     &hSubDevice,
8261                                     &pMappingGpu);
8262     if (status != NV_OK)
8263     {
8264         _nvGpuOpsLocksRelease(&acquiredLocks);
8265         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
8266         return status;
8267     }
8268 
8269     if (pMemDesc->pGpu != pMappingGpu)
8270     {
8271         _nvGpuOpsLocksRelease(&acquiredLocks);
8272         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
8273         return NV_ERR_NOT_SUPPORTED;
8274     }
8275 
8276     // Do not support mapping on anything other than sysmem/vidmem!
8277     if ((memdescGetAddressSpace(pMemDesc) != ADDR_SYSMEM) &&
8278         (memdescGetAddressSpace(pMemDesc) != ADDR_FBMEM))
8279     {
8280         _nvGpuOpsLocksRelease(&acquiredLocks);
8281         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
8282         return NV_ERR_NOT_SUPPORTED;
8283     }
8284 
8285     status = serverGetClientUnderLock(&g_resServ, vaSpace->device->session->handle, &pClient);
8286     if (status != NV_OK)
8287     {
8288         _nvGpuOpsLocksRelease(&acquiredLocks);
8289         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
8290         return status;
8291     }
8292 
8293     status = vaspaceGetByHandleOrDeviceDefault(pClient,
8294                                                vaSpace->device->handle,
8295                                                vaSpace->handle,
8296                                                &pVAS);
8297     if (status != NV_OK)
8298     {
8299         _nvGpuOpsLocksRelease(&acquiredLocks);
8300         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
8301         return status;
8302     }
8303 
8304     status = nvGpuOpsBuildExternalAllocPtes(pVAS, pMappingGpu, pMemDesc, NULL,
8305                                             offset, size, NV_FALSE, NV_FALSE,
8306                                             0, pGpuExternalMappingInfo);
8307 
8308     _nvGpuOpsLocksRelease(&acquiredLocks);
8309     threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
8310     return status;
8311 }
8312 
8313 NV_STATUS nvGpuOpsBindChannelResources(gpuRetainedChannel *retainedChannel,
8314                                        gpuChannelResourceBindParams *channelResourceBindParams)
8315 {
8316     NV_STATUS status = NV_OK;
8317     nvGpuOpsLockSet acquiredLocks;
8318     THREAD_STATE_NODE threadState;
8319     NV2080_CTRL_GPU_PROMOTE_CTX_PARAMS *pParams;
8320     NvU32 i;
8321     KernelChannel *pKernelChannel = NULL;
8322     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
8323 
8324     threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
8325 
8326     status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ,
8327                                       retainedChannel->session->handle,
8328                                       NULL,
8329                                       &acquiredLocks);
8330     if (status != NV_OK)
8331     {
8332         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
8333         return status;
8334     }
8335 
8336     status = nvGpuOpsGetChannelData(retainedChannel, &pKernelChannel);
8337     if (status != NV_OK)
8338     {
8339         _nvGpuOpsLocksRelease(&acquiredLocks);
8340         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
8341         return status;
8342     }
8343 
8344     // Unregister channel resources. CE channels have 0 resources, so they skip this step
8345     if (retainedChannel->resourceCount != 0)
8346     {
8347         RM_ENGINE_TYPE rmEngineType;
8348 
8349         pParams = portMemAllocNonPaged(sizeof(*pParams));
8350         if (pParams == NULL)
8351         {
8352             status = NV_ERR_NO_MEMORY;
8353             _nvGpuOpsLocksRelease(&acquiredLocks);
8354             threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
8355             return NV_ERR_INSUFFICIENT_RESOURCES;
8356         }
8357 
8358         portMemSet(pParams, 0, sizeof(*pParams));
8359 
8360         pParams->hChanClient = RES_GET_CLIENT_HANDLE(pKernelChannel);
8361         pParams->hObject = RES_GET_HANDLE(pKernelChannel);
8362         pParams->entryCount = retainedChannel->resourceCount;
8363 
8364         status = kfifoEngineInfoXlate_HAL(retainedChannel->pGpu,
8365                                           GPU_GET_KERNEL_FIFO(retainedChannel->pGpu),
8366                                           ENGINE_INFO_TYPE_RUNLIST,
8367                                           retainedChannel->runlistId,
8368                                           ENGINE_INFO_TYPE_RM_ENGINE_TYPE,
8369                                           (NvU32 *)&rmEngineType);
8370 
8371         pParams->engineType = gpuGetNv2080EngineType(rmEngineType);
8372 
8373         for (i = 0; i < retainedChannel->resourceCount; i++)
8374         {
8375             if (RM_ENGINE_TYPE_IS_GR(rmEngineType))
8376                 pParams->promoteEntry[i].bufferId = channelResourceBindParams[i].resourceId;
8377 
8378             pParams->promoteEntry[i].gpuVirtAddr = channelResourceBindParams[i].resourceVa;
8379         }
8380 
8381         status = pRmApi->Control(pRmApi,
8382                                  retainedChannel->session->handle,
8383                                  retainedChannel->rmSubDevice->subDeviceHandle,
8384                                  NV2080_CTRL_CMD_GPU_PROMOTE_CTX,
8385                                  pParams,
8386                                  sizeof(*pParams));
8387 
8388         portMemFree(pParams);
8389     }
8390 
8391     if (NV_OK == status)
8392     {
8393         pKernelChannel->bIsContextBound = NV_TRUE;
8394     }
8395 
8396     _nvGpuOpsLocksRelease(&acquiredLocks);
8397     threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
8398     return status;
8399 }
8400 
8401 // nvGpuOpsRetainChannelResources only increments the ref-counts of the memdescs under the channel.
8402 // It does not prevent the user from freeing the associated hClient and hChannel handles, which means
8403 // the instance pointer may no longer be associated with a user object at this point.
8404 // If the instance pointer still has an associated channel, the channel is preempted and disabled.
8405 // Otherwise that must have already happened, so we just need to drop the ref counts on the resources
8406 void nvGpuOpsStopChannel(gpuRetainedChannel *retainedChannel,
8407                          NvBool bImmediate)
8408 {
8409     NV_STATUS status = NV_OK;
8410     nvGpuOpsLockSet acquiredLocks;
8411     THREAD_STATE_NODE threadState;
8412     KernelChannel *pKernelChannel = NULL;
8413     RsResourceRef *pResourceRef;
8414     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
8415     NVA06F_CTRL_STOP_CHANNEL_PARAMS stopChannelParams = {0};
8416 
8417     threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
8418 
8419     if (_nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ,
8420                                  retainedChannel->session->handle,
8421                                  NULL,
8422                                  &acquiredLocks) != NV_OK)
8423     {
8424         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
8425         return;
8426     }
8427 
8428     status = nvGpuOpsGetChannelData(retainedChannel, &pKernelChannel);
8429     if (status != NV_OK)
8430     {
8431         _nvGpuOpsLocksRelease(&acquiredLocks);
8432         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
8433         return;
8434     }
8435 
8436     // Verify this channel handle is still valid
8437     status = serverutilGetResourceRef(RES_GET_CLIENT_HANDLE(pKernelChannel), RES_GET_HANDLE(pKernelChannel), &pResourceRef);
8438     if (status != NV_OK)
8439     {
8440         NV_ASSERT(0);
8441         _nvGpuOpsLocksRelease(&acquiredLocks);
8442         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
8443         return;
8444     }
8445 
8446     stopChannelParams.bImmediate = bImmediate;
8447     NV_ASSERT_OK(
8448         pRmApi->Control(pRmApi,
8449                         RES_GET_CLIENT_HANDLE(pKernelChannel),
8450                         RES_GET_HANDLE(pKernelChannel),
8451                         NVA06F_CTRL_CMD_STOP_CHANNEL,
8452                         &stopChannelParams,
8453                         sizeof(stopChannelParams)));
8454 
8455     pKernelChannel->bIsContextBound = NV_FALSE;
8456 
8457     if (retainedChannel->channelEngineType == UVM_GPU_CHANNEL_ENGINE_TYPE_GR)
8458     {
8459         NV2080_CTRL_GPU_EVICT_CTX_PARAMS params;
8460 
8461         portMemSet(&params, 0, sizeof(params));
8462         params.engineType = NV2080_ENGINE_TYPE_GR(0);
8463         params.hClient = retainedChannel->session->handle;
8464         params.hChanClient = RES_GET_CLIENT_HANDLE(pKernelChannel);
8465         params.hObject = RES_GET_HANDLE(pKernelChannel);
8466 
8467         NV_ASSERT_OK(
8468             pRmApi->Control(pRmApi,
8469                             retainedChannel->session->handle,
8470                             retainedChannel->rmSubDevice->subDeviceHandle,
8471                             NV2080_CTRL_CMD_GPU_EVICT_CTX,
8472                             &params,
8473                             sizeof(params)));
8474     }
8475 
8476     _nvGpuOpsLocksRelease(&acquiredLocks);
8477     threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
8478 }
8479 
8480 // Make sure the UVM and PMA structs are in sync
8481 // The following location(s) need to be synced as well:
8482 // - uvm8_pmm_gpu.c:uvm8_test_pmm_query_pma_stats
8483 ct_assert(sizeof(UvmPmaStatistics) == sizeof(PMA_STATS));
8484 ct_assert(NV_OFFSETOF(UvmPmaStatistics, numPages2m) == NV_OFFSETOF(PMA_STATS, num2mbPages));
8485 ct_assert(NV_OFFSETOF(UvmPmaStatistics, numFreePages64k) == NV_OFFSETOF(PMA_STATS, numFreeFrames));
8486 ct_assert(NV_OFFSETOF(UvmPmaStatistics, numFreePages2m) == NV_OFFSETOF(PMA_STATS, numFree2mbPages));
8487 
8488 /*!
8489  *  Retrieve the PMA (Physical Memory Allocator) object initialized by RM
8490  *  for the given device.
8491  *
8492  *  @param[in]  device          device handle obtained in a prior call
8493  *                              to nvGpuOpsRmDeviceCreate.
8494  *
8495  *  @param[out] pPmaObject      Void pointer to RM PMA object of associated GPU
8496  *                              NULL if PMA not enabled & initialized.
8497  *  @param[out] pPmaPubStats    Pointer to UVM PMA statistics object of
8498  *                              associated GPU. Cannot be NULL.
8499  *
8500  * @returns     NV_OK on success,
8501  *              NV_ERR_INVALID_ARGUMENT if NULL pPmaObject,
8502  *              NV_ERR_OBJECT_NOT_FOUND if PMA object not found
8503  *              NV_ERR_NOT_SUPPORTED if PMA not supported
8504  */
8505 NV_STATUS nvGpuOpsGetPmaObject(struct gpuDevice *device,
8506                                void **pPmaObject,
8507                                const UvmPmaStatistics **pPmaStats)
8508 {
8509     nvGpuOpsLockSet     acquiredLocks;
8510     THREAD_STATE_NODE   threadState;
8511     OBJGPU             *pGpu    = NULL;
8512     Heap               *pHeap   = NULL;
8513     MemoryManager *pMemoryManager;
8514     struct gpuSession *session = device->session;
8515     NV_STATUS status;
8516 
8517     threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
8518     status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, session->handle, NULL, &acquiredLocks);
8519     if (status != NV_OK)
8520     {
8521         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
8522         return status;
8523     }
8524 
8525     status = CliSetGpuContext(session->handle, device->handle, &pGpu, NULL);
8526     if (status != NV_OK)
8527     {
8528         _nvGpuOpsLocksRelease(&acquiredLocks);
8529         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
8530         return NV_ERR_OBJECT_NOT_FOUND;
8531     }
8532 
8533     pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
8534     if (pMemoryManager == NULL)
8535     {
8536         _nvGpuOpsLocksRelease(&acquiredLocks);
8537         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
8538         return NV_ERR_OBJECT_NOT_FOUND;
8539     }
8540 
8541     if (IS_MIG_IN_USE(pGpu))
8542     {
8543         KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
8544 
8545         status = kmigmgrGetMemoryPartitionHeapFromClient(pGpu, pKernelMIGManager, session->handle, &pHeap);
8546         if (status != NV_OK)
8547             return status;
8548     }
8549     else
8550         pHeap = GPU_GET_HEAP(pGpu);
8551 
8552     if (pHeap == NULL)
8553     {
8554         _nvGpuOpsLocksRelease(&acquiredLocks);
8555         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
8556         return NV_ERR_OBJECT_NOT_FOUND;
8557     }
8558 
8559     if (!memmgrIsPmaInitialized(pMemoryManager))
8560     {
8561         _nvGpuOpsLocksRelease(&acquiredLocks);
8562         threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
8563         return NV_ERR_OBJECT_NOT_FOUND;
8564     }
8565 
8566     *pPmaObject = (void *)&pHeap->pmaObject;
8567     *pPmaStats = (const UvmPmaStatistics *)&pHeap->pmaObject.pmaStats;
8568 
8569     _nvGpuOpsLocksRelease(&acquiredLocks);
8570     threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
8571     return NV_OK;
8572 }
8573 
8574 NV_STATUS nvGpuOpsP2pObjectCreate(struct gpuDevice *device1,
8575                                   struct gpuDevice *device2,
8576                                   NvHandle *hP2pObject)
8577 {
8578     NV_STATUS status;
8579     NV503B_ALLOC_PARAMETERS p2pAllocParams = {0};
8580     NvHandle hTemp = 0;
8581     struct systemP2PCaps p2pCaps;
8582     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
8583     struct gpuSession *session;
8584 
8585     if (!device1 || !device2 || !hP2pObject)
8586         return NV_ERR_INVALID_ARGUMENT;
8587 
8588     if (device1->session != device2->session)
8589         return NV_ERR_INVALID_ARGUMENT;
8590 
8591     status = getSystemP2PCaps(device1, device2, &p2pCaps);
8592     if (status != NV_OK)
8593         return status;
8594 
8595     if (!p2pCaps.accessSupported)
8596         return NV_ERR_NOT_SUPPORTED;
8597 
8598     p2pAllocParams.hSubDevice = device1->subhandle;
8599     p2pAllocParams.hPeerSubDevice = device2->subhandle;
8600 
8601     session = device1->session;
8602     hTemp = NV01_NULL_OBJECT;
8603     status = pRmApi->Alloc(pRmApi, session->handle, session->handle, &hTemp, NV50_P2P, &p2pAllocParams);
8604     if (status == NV_OK)
8605         *hP2pObject = hTemp;
8606 
8607     return status;
8608 }
8609 
8610 NV_STATUS nvGpuOpsP2pObjectDestroy(struct gpuSession *session,
8611                                    NvHandle hP2pObject)
8612 {
8613     NV_STATUS status = NV_OK;
8614     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
8615     NV_ASSERT(session);
8616 
8617     status = pRmApi->Free(pRmApi, session->handle, hP2pObject);
8618     NV_ASSERT(status == NV_OK);
8619     return status;
8620 }
8621 
8622 NV_STATUS nvGpuOpsReportNonReplayableFault(struct gpuDevice *device,
8623                                            const void *pFaultPacket)
8624 {
8625     NV_STATUS status = NV_OK;
8626     NV2080_CTRL_GPU_REPORT_NON_REPLAYABLE_FAULT_PARAMS params;
8627     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
8628 
8629     if (device == NULL || pFaultPacket == NULL)
8630         return NV_ERR_INVALID_ARGUMENT;
8631 
8632     portMemSet(&params, 0, sizeof(params));
8633 
8634     portMemCopy(&params.faultPacket.data,
8635                 NV2080_CTRL_GPU_FAULT_PACKET_SIZE,
8636                 pFaultPacket,
8637                 NV2080_CTRL_GPU_FAULT_PACKET_SIZE);
8638 
8639     status = pRmApi->Control(pRmApi,
8640                              device->session->handle,
8641                              device->subhandle,
8642                              NV2080_CTRL_CMD_GPU_REPORT_NON_REPLAYABLE_FAULT,
8643                              &params,
8644                              sizeof(params));
8645     if (status != NV_OK)
8646     {
8647         NV_PRINTF(LEVEL_ERROR,
8648                   "%s: NV2080_CTRL_CMD_GPU_REPORT_NON_REPLAYABLE_FAULTreturned error %s!\n",
8649                   __FUNCTION__, nvstatusToString(status));
8650     }
8651 
8652     return status;
8653 }
8654 
8655 NV_STATUS nvGpuOpsPagingChannelAllocate(struct gpuDevice *device,
8656                                         const gpuPagingChannelAllocParams *params,
8657                                         gpuPagingChannelHandle *channelHandle,
8658                                         gpuPagingChannelInfo *channelInfo)
8659 {
8660     NV_STATUS status, status2;
8661     UvmGpuPagingChannel *channel = NULL;
8662     Device *pDevice;
8663     RsClient *pClient;
8664     NvHandle hClient;
8665     NvLength errorNotifierSize;
8666     NvU64 paOffset;
8667     gpuAllocInfo allocInfo = {0};
8668     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
8669     NvU32 pid = osGetCurrentProcess();
8670 
8671     if (!device || !params || !channelHandle || !channelInfo)
8672         return NV_ERR_INVALID_ARGUMENT;
8673 
8674     if (!NV2080_ENGINE_TYPE_IS_COPY(NV2080_ENGINE_TYPE_COPY(params->engineIndex)))
8675         return NV_ERR_INVALID_ARGUMENT;
8676 
8677     hClient = device->session->handle;
8678     NV_ASSERT(hClient);
8679 
8680     channel = portMemAllocNonPaged(sizeof(*channel));
8681     if (!channel)
8682         return NV_ERR_NO_MEMORY;
8683 
8684     portMemSet(channel, 0, sizeof(*channel));
8685     channel->device = device;
8686 
8687     errorNotifierSize = sizeof(NvNotification) *
8688                         NV_CHANNELGPFIFO_NOTIFICATION_TYPE__SIZE_1;
8689     status = nvGpuOpsAllocPhysical(device,
8690                                    NV_TRUE,
8691                                    errorNotifierSize,
8692                                    &paOffset,
8693                                    &allocInfo);
8694     if (status != NV_OK)
8695         goto cleanup_free_channel;
8696 
8697     channel->errorNotifierHandle = allocInfo.hPhysHandle;
8698     NV_ASSERT(channel->errorNotifierHandle);
8699 
8700     status = pRmApi->MapToCpu(pRmApi,
8701                               hClient,
8702                               device->subhandle,
8703                               channel->errorNotifierHandle,
8704                               0,
8705                               errorNotifierSize,
8706                               (void **)&channel->errorNotifier,
8707                               0);
8708     if (status != NV_OK)
8709         goto cleanup_free_error_notifier;
8710 
8711     NV_ASSERT(channel->errorNotifier);
8712 
8713     // Ideally, we need to acquire there locks (in that order):
8714     // a. RM API lock
8715     // b. device->handle GPU lock
8716     // c. RPC lock
8717     // (b) GPU lock is optional because RM will acquire all needed locks automatically.
8718     // (c) RPC lock is optional because currently there is no scenario in which channel allocation/destruction
8719     // can be run concurrently with any other SR-IOV heavy API that results on an RPC (Map/Unmap/PushStream).
8720     //
8721     // However, if we acquire GPU locks, NV_RM_RPC_UVM_PAGING_CHANNEL_ALLOCATE would fail.
8722     // It's because PAGING_CHANNEL_ALLOCATE allocates AMPERE_CHANNEL_GPFIFO_A, that allocates
8723     // KernelChannelGroupApi. KernelChannelGroupApi would fail because
8724     // 'TSG alloc should be called without acquiring GPU lock'.
8725     // KernelChannelGroupApi acquires GPU locks manually after allocating TSG.
8726     //
8727     // The TSG allocation requirement just described not only precludes the acquisition
8728     // of any GPU lock in this function, but also the acquisition of the RPC lock,
8729     // because it would result on a lock order violation: the RPC lock is acquired
8730     // before the GPU lock. As a result, nvGpuOpsPagingChannelAllocate only acquires
8731     // the RM API lock, and so does nvGpuOpsPagingChannelDestroy.
8732     status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS);
8733     if (status != NV_OK)
8734         goto cleanup_unmap_error_notifier;
8735 
8736     status = serverGetClientUnderLock(&g_resServ, hClient, &pClient);
8737     if (status != NV_OK)
8738         goto cleanup_under_rmapi_lock;
8739 
8740     status = deviceGetByHandle(pClient, device->handle, &pDevice);
8741     if (status != NV_OK)
8742         goto cleanup_under_rmapi_lock;
8743 
8744     channel->pDevice = pDevice;
8745 
8746     GPU_RES_SET_THREAD_BC_STATE(pDevice);
8747 
8748     if (status != NV_OK)
8749         goto cleanup_under_rmapi_lock;
8750 
8751     rmapiLockRelease();
8752 
8753     *channelHandle = channel;
8754 
8755     channelInfo->shadowErrorNotifier = channel->errorNotifier;
8756 
8757     return NV_OK;
8758 
8759 cleanup_under_rmapi_lock:
8760     rmapiLockRelease();
8761 
8762 cleanup_unmap_error_notifier:
8763     status2 = pRmApi->UnmapFromCpu(pRmApi,
8764                                    hClient,
8765                                    device->subhandle,
8766                                    channel->errorNotifierHandle,
8767                                    (void *)channel->errorNotifier,
8768                                    0,
8769                                    pid);
8770     NV_ASSERT(status2 == NV_OK);
8771 
8772 cleanup_free_error_notifier:
8773     pRmApi->Free(pRmApi, hClient, channel->errorNotifierHandle);
8774 
8775 cleanup_free_channel:
8776     portMemFree(channel);
8777 
8778     return status;
8779 }
8780 
8781 void nvGpuOpsPagingChannelDestroy(UvmGpuPagingChannel *channel)
8782 {
8783     NV_STATUS status;
8784     struct gpuDevice *device;
8785     Device *pDevice;
8786     RsClient *pClient;
8787     NvHandle hClient;
8788     RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL);
8789     NvU32 pid = osGetCurrentProcess();
8790 
8791     NV_ASSERT(channel);
8792 
8793     device = channel->device;
8794     NV_ASSERT(device);
8795 
8796     hClient = device->session->handle;
8797     NV_ASSERT(hClient);
8798 
8799     // We acquire only RM API lock here. See comment in nvGpuOpsPagingChannelAllocate.
8800     status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS);
8801     NV_ASSERT(status == NV_OK);
8802     if (status != NV_OK)
8803     {
8804         NV_PRINTF(LEVEL_ERROR,
8805                   "%s: rmapiLockAcquire returned error %s!\n",
8806                   __FUNCTION__, nvstatusToString(status));
8807         goto cleanup;
8808     }
8809 
8810     status = serverGetClientUnderLock(&g_resServ, hClient, &pClient);
8811     NV_ASSERT(status == NV_OK);
8812     if (status != NV_OK)
8813     {
8814         NV_PRINTF(LEVEL_ERROR,
8815                   "%s: serverGetClientUnderLock returned error %s!\n",
8816                   __FUNCTION__, nvstatusToString(status));
8817         goto cleanup_under_rmapi_lock;
8818     }
8819 
8820     status = deviceGetByHandle(pClient, device->handle, &pDevice);
8821     NV_ASSERT(status == NV_OK);
8822     if (status != NV_OK)
8823     {
8824         NV_PRINTF(LEVEL_ERROR,
8825                   "%s: deviceGetByHandle returned error %s!\n",
8826                   __FUNCTION__, nvstatusToString(status));
8827         goto cleanup_under_rmapi_lock;
8828     }
8829 
8830     GPU_RES_SET_THREAD_BC_STATE(pDevice);
8831 
8832 cleanup_under_rmapi_lock:
8833     rmapiLockRelease();
8834 
8835 cleanup:
8836     status = pRmApi->UnmapFromCpu(pRmApi,
8837                                   hClient,
8838                                   device->subhandle,
8839                                   channel->errorNotifierHandle,
8840                                   (void *)channel->errorNotifier,
8841                                   0,
8842                                   pid);
8843     NV_ASSERT(status == NV_OK);
8844     if (status != NV_OK)
8845     {
8846         NV_PRINTF(LEVEL_ERROR,
8847                   "%s: UnmapFromCpu returned error %s!\n",
8848                   __FUNCTION__, nvstatusToString(status));
8849     }
8850 
8851     pRmApi->Free(pRmApi, hClient, channel->errorNotifierHandle);
8852     portMemFree(channel);
8853 }
8854 
8855 NV_STATUS nvGpuOpsPagingChannelsMap(struct gpuAddressSpace *srcVaSpace,
8856                                     NvU64 srcAddress,
8857                                     struct gpuDevice *device,
8858                                     NvU64 *dstAddress)
8859 {
8860     NV_STATUS status;
8861     Device *pDevice;
8862     RsClient *pClient;
8863     NvHandle hAllocation;
8864     NvHandle hClient;
8865     nvGpuOpsLockSet acquiredLocks;
8866 
8867     if (!srcVaSpace || !device || !dstAddress)
8868         return NV_ERR_INVALID_ARGUMENT;
8869 
8870     hClient = device->session->handle;
8871     NV_ASSERT(hClient);
8872 
8873 
8874     status = getHandleForVirtualAddr(srcVaSpace, srcAddress, NV_TRUE, &hAllocation);
8875     if (status != NV_OK)
8876         return status;
8877 
8878     status = _nvGpuOpsLocksAcquire(RMAPI_LOCK_FLAGS_NONE, hClient, NULL, 2,
8879                                    device->deviceInstance, srcVaSpace->device->deviceInstance, &acquiredLocks);
8880     if (status != NV_OK)
8881         return status;
8882 
8883     status = serverGetClientUnderLock(&g_resServ, hClient, &pClient);
8884     if (status != NV_OK)
8885         goto exit_under_locks;
8886 
8887     status = deviceGetByHandle(pClient, device->handle, &pDevice);
8888     if (status != NV_OK)
8889         goto exit_under_locks;
8890 
8891     GPU_RES_SET_THREAD_BC_STATE(pDevice);
8892 
8893     portSyncMutexAcquire(device->pPagingChannelRpcMutex);
8894 
8895     portSyncMutexRelease(device->pPagingChannelRpcMutex);
8896 
8897 exit_under_locks:
8898     _nvGpuOpsLocksRelease(&acquiredLocks);
8899 
8900     return status;
8901 }
8902 
8903 void nvGpuOpsPagingChannelsUnmap(struct gpuAddressSpace *srcVaSpace,
8904                                  NvU64 srcAddress,
8905                                  struct gpuDevice *device)
8906 {
8907     NV_STATUS status;
8908     Device *pDevice;
8909     RsClient *pClient;
8910     NvHandle hAllocation;
8911     NvHandle hClient;
8912     nvGpuOpsLockSet acquiredLocks;
8913 
8914     NV_ASSERT(srcVaSpace && device);
8915     if (!srcVaSpace || !device)
8916         return;
8917 
8918     hClient = device->session->handle;
8919     NV_ASSERT(hClient);
8920 
8921     status = getHandleForVirtualAddr(srcVaSpace, srcAddress, NV_TRUE, &hAllocation);
8922     NV_ASSERT(status == NV_OK);
8923     if (status != NV_OK)
8924     {
8925         NV_PRINTF(LEVEL_ERROR,
8926                   "%s: getHandleForVirtualAddr returned error %s!\n",
8927                   __FUNCTION__, nvstatusToString(status));
8928         return;
8929     }
8930 
8931     status = _nvGpuOpsLocksAcquire(RMAPI_LOCK_FLAGS_NONE, hClient, NULL, 2,
8932                                    device->deviceInstance, srcVaSpace->device->deviceInstance, &acquiredLocks);
8933     if (status != NV_OK)
8934     {
8935         NV_PRINTF(LEVEL_ERROR,
8936                   "%s: _nvGpuOpsLocksAcquire returned error %s!\n",
8937                   __FUNCTION__, nvstatusToString(status));
8938         return;
8939     }
8940 
8941     status = serverGetClientUnderLock(&g_resServ, hClient, &pClient);
8942     NV_ASSERT(status == NV_OK);
8943     if (status != NV_OK)
8944     {
8945         NV_PRINTF(LEVEL_ERROR,
8946                   "%s: serverGetClientUnderLock returned error %s!\n",
8947                   __FUNCTION__, nvstatusToString(status));
8948         goto exit_under_locks;
8949     }
8950 
8951     status = deviceGetByHandle(pClient, device->handle, &pDevice);
8952     NV_ASSERT(status == NV_OK);
8953     if (status != NV_OK)
8954     {
8955         NV_PRINTF(LEVEL_ERROR,
8956                   "%s: deviceGetByHandle returned error %s!\n",
8957                   __FUNCTION__, nvstatusToString(status));
8958         goto exit_under_locks;
8959     }
8960 
8961     GPU_RES_SET_THREAD_BC_STATE(pDevice);
8962 
8963     portSyncMutexAcquire(device->pPagingChannelRpcMutex);
8964 
8965     portSyncMutexRelease(device->pPagingChannelRpcMutex);
8966 
8967 exit_under_locks:
8968     _nvGpuOpsLocksRelease(&acquiredLocks);
8969 }
8970 
8971 NV_STATUS nvGpuOpsPagingChannelPushStream(UvmGpuPagingChannel *channel,
8972                                           char *methodStream,
8973                                           NvU32 methodStreamSize)
8974 {
8975     NV_STATUS status = NV_OK;
8976     struct gpuDevice *device = NULL;
8977 
8978     if (!channel || !methodStream)
8979         return NV_ERR_INVALID_ARGUMENT;
8980     if (methodStreamSize == 0)
8981         return NV_OK;
8982 
8983     device = channel->device;
8984     NV_ASSERT(device);
8985 
8986     GPU_RES_SET_THREAD_BC_STATE(channel->pDevice);
8987 
8988     portSyncMutexAcquire(device->pPagingChannelRpcMutex);
8989 
8990     portSyncMutexRelease(device->pPagingChannelRpcMutex);
8991 
8992     return status;
8993 }
8994 
8995 static NV_STATUS nvGpuOpsGetMemoryByHandle(NvHandle hClient, NvHandle hMemory, Memory **ppMemory)
8996 {
8997     RsClient *pRsClient = NULL;
8998 
8999     NV_ASSERT_OK_OR_RETURN(serverGetClientUnderLock(&g_resServ,
9000         hClient, &pRsClient));
9001 
9002     return memGetByHandle(pRsClient,
9003                           hMemory,
9004                           ppMemory);
9005 }
9006 
9007