1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #define NVOC_KERNEL_SM_DEBUGGER_SESSION_H_PRIVATE_ACCESS_ALLOWED
25 
26 // FIXME XXX
27 #define NVOC_KERNEL_GRAPHICS_OBJECT_H_PRIVATE_ACCESS_ALLOWED
28 
29 #include "kernel/os/os.h"
30 #include "kernel/core/locks.h"
31 #include "kernel/gpu/gr/kernel_sm_debugger_session.h"
32 #include "kernel/gpu/gr/kernel_graphics_object.h"
33 #include "kernel/gpu/mig_mgr/kernel_mig_manager.h"
34 #include "kernel/gpu/subdevice/subdevice.h"
35 #include "kernel/gpu/device/device.h"
36 #include "libraries/resserv/rs_client.h"
37 #include "kernel/rmapi/rs_utils.h"
38 #include "virtualization/hypervisor/hypervisor.h"
39 
40 #include "ctrl/ctrl83de/ctrl83dedebug.h"
41 
42 #include "class/cl0080.h"
43 #include "class/clc637.h"
44 #include "class/cl2080.h"
45 #include "class/cl83de.h"
46 
47 // Macro to validate two clients having the same security tokens
48 #define VALIDATE_MATCHING_SEC_TOKENS(handle1, handle2, secInfo, status)                         \
49     do {                                                                                        \
50         OBJSYS *pSys = SYS_GET_INSTANCE();                                                      \
51         if (pSys->getProperty(pSys, PDB_PROP_SYS_VALIDATE_CLIENT_HANDLE) &&                     \
52             ((secInfo).privLevel < RS_PRIV_LEVEL_USER_ROOT))                                    \
53         {                                                                                       \
54             status = osValidateClientTokens((void*)rmclientGetSecurityTokenByHandle(handle1),   \
55                                             (void*)rmclientGetSecurityTokenByHandle(handle2));  \
56             NV_ASSERT_OR_RETURN(status == NV_OK, status);                                       \
57         }                                                                                       \
58     } while (0);
59 
60 static NV_STATUS _ShareDebugger(KernelSMDebuggerSession *, RsResourceRef *, RsResourceRef *);
61 
62 void
63 dbgSessionRemoveDependant_IMPL
64 (
65     RmDebuggerSession *pDbgSession,
66     RsResourceRef     *pResourceRef
67 )
68 {
69     RsSession *pSession = dynamicCast(pDbgSession, RsSession);
70 
71     // Freeing a KernelSMDebuggerSession dependant should just call the destructor normally
72     if (pSession->bValid && (pResourceRef->externalClassId == GT200_DEBUGGER))
73         ksmdbgssnFreeCallback(dynamicCast(pResourceRef->pResource, KernelSMDebuggerSession));
74 
75     sessionRemoveDependant_IMPL(staticCast(pDbgSession, RsSession), pResourceRef);
76 }
77 
78 void
79 dbgSessionRemoveDependency_IMPL
80 (
81     RmDebuggerSession *pDbgSession,
82     RsResourceRef     *pResourceRef
83 )
84 {
85     RsSession *pSession = dynamicCast(pDbgSession, RsSession);
86 
87     //
88     // Call all registered KernelSMDebuggerSessions' free callbacks (destructor basically)
89     // when the underlying KernelGraphicsObject goes away. This invalidates the KernelSMDebuggerSession
90     // and causes all control calls on it to fail since the KernelGraphicsObject dependancy has disappeared.
91     //
92     if (pSession->bValid)
93     {
94         RsResourceRefListIter it;
95 
96         it = listIterAll(&pSession->dependants);
97 
98         while (listIterNext(&it))
99         {
100             RsResourceRef *pDependency = *(it.pValue);
101 
102             if (pDependency->externalClassId == GT200_DEBUGGER)
103                 ksmdbgssnFreeCallback(dynamicCast(pDependency->pResource, KernelSMDebuggerSession));
104         }
105     }
106 
107     // This call will invalidate the RmDebuggerSession
108     sessionRemoveDependency_IMPL(staticCast(pDbgSession, RsSession), pResourceRef);
109 }
110 
111 static NV_STATUS
112 _ksmdbgssnInitClient
113 (
114     OBJGPU *pGpu,
115     KernelSMDebuggerSession *pKernelSMDebuggerSession
116 )
117 {
118     NV0080_ALLOC_PARAMETERS nv0080AllocParams;
119     NV2080_ALLOC_PARAMETERS nv2080AllocParams;
120     NV_STATUS status = NV_OK;
121     KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
122     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
123     NvBool bMIGInUse = IS_MIG_IN_USE(pGpu);
124 
125     pKernelSMDebuggerSession->hInternalClient = NV01_NULL_OBJECT;
126 
127     // Allocate a (kernel-space) client.
128     NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
129         pRmApi->AllocWithHandle(pRmApi,
130                                 NV01_NULL_OBJECT,
131                                 NV01_NULL_OBJECT,
132                                 NV01_NULL_OBJECT,
133                                 NV01_ROOT,
134                                 &pKernelSMDebuggerSession->hInternalClient,
135                                 sizeof(pKernelSMDebuggerSession->hInternalClient)),
136         failed);
137 
138     // Allocate a device.
139     NV_ASSERT_OK_OR_GOTO(status,
140         serverutilGenResourceHandle(pKernelSMDebuggerSession->hInternalClient,
141                                     &pKernelSMDebuggerSession->hInternalDevice),
142         failed);
143     portMemSet(&nv0080AllocParams, 0, sizeof(nv0080AllocParams));
144     nv0080AllocParams.deviceId = gpuGetDeviceInstance(pGpu);
145     nv0080AllocParams.hClientShare = pKernelSMDebuggerSession->hInternalClient;
146     NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
147         pRmApi->AllocWithHandle(pRmApi,
148                                 pKernelSMDebuggerSession->hInternalClient,
149                                 pKernelSMDebuggerSession->hInternalClient,
150                                 pKernelSMDebuggerSession->hInternalDevice,
151                                 NV01_DEVICE_0,
152                                 &nv0080AllocParams,
153                                 sizeof(nv0080AllocParams)),
154         failed);
155 
156     // Allocate a subdevice.
157     NV_ASSERT_OK_OR_GOTO(status,
158         serverutilGenResourceHandle(pKernelSMDebuggerSession->hInternalClient,
159                                     &pKernelSMDebuggerSession->hInternalSubdevice),
160         failed);
161     portMemSet(&nv2080AllocParams, 0, sizeof(nv2080AllocParams));
162     nv2080AllocParams.subDeviceId = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
163     NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
164         pRmApi->AllocWithHandle(pRmApi,
165                                 pKernelSMDebuggerSession->hInternalClient,
166                                 pKernelSMDebuggerSession->hInternalDevice,
167                                 pKernelSMDebuggerSession->hInternalSubdevice,
168                                 NV20_SUBDEVICE_0,
169                                 &nv2080AllocParams,
170                                 sizeof(nv2080AllocParams)),
171         failed);
172 
173     if (bMIGInUse)
174     {
175         NVC637_ALLOCATION_PARAMETERS nvC637AllocParams;
176         MIG_INSTANCE_REF ref;
177 
178         portMemSet(&nvC637AllocParams, 0, sizeof(nvC637AllocParams));
179         NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
180             kmigmgrGetInstanceRefFromClient(pGpu, pKernelMIGManager, pKernelSMDebuggerSession->hDebuggerClient, &ref),
181             failed);
182 
183         NV_ASSERT_OK_OR_GOTO(status,
184             serverutilGenResourceHandle(pKernelSMDebuggerSession->hInternalClient,
185                                         &pKernelSMDebuggerSession->hInternalSubscription),
186             failed);
187         nvC637AllocParams.swizzId = ref.pKernelMIGGpuInstance->swizzId;
188         NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
189             pRmApi->AllocWithHandle(pRmApi,
190                                     pKernelSMDebuggerSession->hInternalClient,
191                                     pKernelSMDebuggerSession->hInternalSubdevice,
192                                     pKernelSMDebuggerSession->hInternalSubscription,
193                                     AMPERE_SMC_PARTITION_REF,
194                                     &nvC637AllocParams,
195                                     sizeof(nvC637AllocParams)),
196             failed);
197     }
198 
199     return NV_OK;
200 failed:
201     if (pKernelSMDebuggerSession->hInternalClient != NV01_NULL_OBJECT)
202     {
203         pRmApi->Free(pRmApi, pKernelSMDebuggerSession->hInternalClient, pKernelSMDebuggerSession->hInternalClient);
204         pKernelSMDebuggerSession->hInternalClient = NV01_NULL_OBJECT;
205         pKernelSMDebuggerSession->hInternalDevice = NV01_NULL_OBJECT;
206         pKernelSMDebuggerSession->hInternalSubdevice = NV01_NULL_OBJECT;
207         pKernelSMDebuggerSession->hInternalSubscription = NV01_NULL_OBJECT;
208         pKernelSMDebuggerSession->hInternalMemMapping = NV01_NULL_OBJECT;
209     }
210 
211     return status;
212 }
213 
214 NV_STATUS
215 ksmdbgssnConstruct_IMPL
216 (
217     KernelSMDebuggerSession      *pKernelSMDebuggerSession,
218     CALL_CONTEXT                 *pCallContext,
219     RS_RES_ALLOC_PARAMS_INTERNAL *pParams
220 )
221 {
222     NV83DE_ALLOC_PARAMETERS  *pNv83deAllocParams = pParams->pAllocParams;
223     OBJGPU                   *pGpu;
224     NvHandle                  hAppChannel;
225     NvHandle                  hAppClient;
226     NvHandle                  hClass3dObject;
227     NvHandle                  hKernelSMDebuggerSession;
228     NvHandle                  hSubdevice;
229     NV_STATUS                 status = NV_OK;
230     RsClient                 *pAppClient;
231     Subdevice                *pSubdevice;
232     RsResourceRef            *pGrResourceRef;
233     RsResourceRef            *pParentRef;
234 
235     // The app using the new interface should initialize this to 0.
236     if (pNv83deAllocParams->hDebuggerClient_Obsolete)
237     {
238         NV_ASSERT_FAILED("Old Nv83deAllocParams interface not supported");
239         return NV_ERR_INVALID_ARGUMENT;
240     }
241 
242     hAppClient = pNv83deAllocParams->hAppClient;
243     hClass3dObject = pNv83deAllocParams->hClass3dObject;
244     hKernelSMDebuggerSession = pParams->hResource;
245 
246     // If given a zero hAppClient, assume the client meant to target the calling hClient.
247     if (hAppClient == NV01_NULL_OBJECT)
248     {
249         hAppClient = pParams->hClient;
250     }
251 
252     // Validate + lookup the application client
253     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
254         serverGetClientUnderLock(&g_resServ, hAppClient, &pAppClient));
255 
256     status = clientGetResourceRef(pAppClient, hClass3dObject, &pGrResourceRef);
257 
258     if (status != NV_OK)
259     {
260        NV_PRINTF(LEVEL_ERROR, "hObject 0x%x not found for client 0x%x\n",
261                  pParams->hParent, pParams->hClient);
262        return NV_ERR_INVALID_OBJECT_PARENT;
263     }
264 
265     //
266     // On GSP, the security token is either the GFID or NULL. When the security token is
267     // set to the GFID this will properly constrain debugger access to wtihin a single
268     // Guest on GHV environments. When it is NULL, this allows access to any client in
269     // the system but in order to take advantage of this CPU-RM would already have
270     // to have been compromised anyway.
271     //
272     // On legacy vGPU systems, the security token will match the Guest's vGPU plugin. So you'd
273     // only be able to access other resources allocated by the same Guest.
274     //
275     if (RMCFG_FEATURE_PLATFORM_GSP || hypervisorIsVgxHyper())
276     {
277         API_SECURITY_INFO *pSecInfo = pParams->pSecInfo;
278 
279         VALIDATE_MATCHING_SEC_TOKENS((pCallContext->pClient->hClient), hAppClient,
280                                       *pSecInfo, status);
281     }
282     else
283     {
284         RS_ACCESS_MASK debugAccessMask;
285 
286         //
287         // On CPU-RM and Guest RM systems check that debugging rights were shared.
288         //
289         // Check that the application client allowed debugging rights for the debugger
290         // client on the compute object (i.e. the current client allocating this object).
291         //
292         //
293         RS_ACCESS_MASK_CLEAR(&debugAccessMask);
294         RS_ACCESS_MASK_ADD(&debugAccessMask, RS_ACCESS_DEBUG);
295 
296         status = rsAccessCheckRights(pGrResourceRef, pCallContext->pClient,
297                                      &debugAccessMask);
298 
299         NV_CHECK_OR_ELSE(LEVEL_ERROR,
300                 status == NV_OK,
301                 NV_PRINTF(LEVEL_ERROR, "Current user does not have debugging rights on the compute object. Status = 0x%x\n", status);
302                 return NV_ERR_INSUFFICIENT_PERMISSIONS;);
303     }
304 
305     pKernelSMDebuggerSession->pObject = dynamicCast(pGrResourceRef->pResource, KernelGraphicsObject);
306     if (pKernelSMDebuggerSession->pObject == NULL)
307     {
308         return NV_ERR_INVALID_OBJECT;
309     }
310 
311     pParentRef  = pGrResourceRef->pParentRef;
312     hAppChannel = pParentRef->hResource;
313 
314     // Ensure that debugger session is created under same device as the object under debug
315     pGpu = GPU_RES_GET_GPU(pKernelSMDebuggerSession);
316     NV_CHECK_OR_RETURN(LEVEL_ERROR, pGpu == GPU_RES_GET_GPU(pKernelSMDebuggerSession->pObject),
317                            NV_ERR_INVALID_ARGUMENT);
318 
319     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, subdeviceGetByGpu(pAppClient, pGpu, &pSubdevice));
320 
321     GPU_RES_SET_THREAD_BC_STATE(pSubdevice);
322 
323     hSubdevice = RES_GET_HANDLE(pSubdevice);
324 
325     // Initialize the object info
326     pKernelSMDebuggerSession->hChannelClient  = pAppClient->hClient;
327     pKernelSMDebuggerSession->hDebugger       = hKernelSMDebuggerSession;
328     pKernelSMDebuggerSession->hDebuggerClient = pCallContext->pClient->hClient;
329     pKernelSMDebuggerSession->hChannel        = hAppChannel;
330     pKernelSMDebuggerSession->hSubdevice      = hSubdevice;
331 
332     // Insert it into this Object's debugger list
333     if (listAppendValue(&pKernelSMDebuggerSession->pObject->activeDebuggers, &pKernelSMDebuggerSession) == NULL)
334     {
335         NV_PRINTF(LEVEL_ERROR,
336                   "Failed to insert Debugger into channel list, handle = 0x%x\n",
337                   pKernelSMDebuggerSession->hDebugger);
338         return NV_ERR_INSUFFICIENT_RESOURCES;
339     }
340 
341     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
342         _ksmdbgssnInitClient(pGpu, pKernelSMDebuggerSession));
343 
344     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
345         _ShareDebugger(pKernelSMDebuggerSession, pCallContext->pResourceRef, pGrResourceRef));
346 
347     return status;
348 }
349 
350 /**
351  * @brief Binds a debugger object to the given KernelGraphicsObject's RsSession object
352  *        or allocates a new one if it's not currently referencing one.
353  *
354  * @param[in]     pKernelSMDebuggerSession Underlying debugger object
355  * @param[in,out] pGrResourceRef  RsResourceRef for the channel that will be
356  *                                bound to an RsSession if one isn't already
357  *                                there.
358  * @param[in,out] pDebuggerRef    RsResourceRef for the debugger object that will
359  *                                be bound to a new RsSession or the channel's
360  *                                existing one.
361  *
362  * @return NV_OK on success, error code on failure
363  */
364 static NV_STATUS
365 _ShareDebugger
366 (
367     KernelSMDebuggerSession *pKernelSMDebuggerSession,
368     RsResourceRef           *pDebuggerRef,
369     RsResourceRef           *pGrResourceRef
370 )
371 {
372     NV_STATUS  status = NV_OK;
373     RsSession *pRsSession;
374 
375     // Allocate a new RsSession if the KernelGraphicsObject doesn't reference one already
376     if (pGrResourceRef->pDependantSession == NULL)
377     {
378         RsShared *pShared = NULL;
379 
380         status = serverAllocShare(&g_resServ, classInfo(RmDebuggerSession), &pShared);
381         if (status != NV_OK)
382             return status;
383 
384         pKernelSMDebuggerSession->pDebugSession = dynamicCast(pShared, RmDebuggerSession);
385         pRsSession = staticCast(pKernelSMDebuggerSession->pDebugSession, RsSession);
386 
387         // Add KernelGraphicsObject as a dependency
388         sessionAddDependency(pRsSession, pGrResourceRef);
389 
390         // Add debugger object as a dependant of the new RsSession object
391         sessionAddDependant(pRsSession, pDebuggerRef);
392 
393         //
394         // Decrease ref count if newly allocated, we only want the ref
395         // count for the RsSession object to be 2 in this case
396         //
397         serverFreeShare(&g_resServ, pShared);
398     }
399     else
400     {
401         pKernelSMDebuggerSession->pDebugSession = dynamicCast(pGrResourceRef->pDependantSession,
402                                                      RmDebuggerSession);
403 
404         if (pKernelSMDebuggerSession->pDebugSession == NULL)
405         {
406             NV_PRINTF(LEVEL_ERROR, "KernelGraphicsObject already a dependent of a non-debugger session\n");
407             return NV_ERR_INVALID_STATE;
408         }
409 
410         // Add debugger object as a dependant of the existing RsSession object
411         pRsSession = staticCast(pKernelSMDebuggerSession->pDebugSession, RsSession);
412         sessionAddDependant(pRsSession, pDebuggerRef);
413     }
414 
415     return NV_OK;
416 }
417 
418 //
419 // Empty destructor since the destruction is done in the free callback which is invoked
420 // by Resource Server when the RmDebuggerSession shared object is invalidated due to either
421 // the KernelSMDebuggerSession being freed or the underlying KernelGraphicsObject dependancy being freed.
422 //
423 void
424 ksmdbgssnDestruct_IMPL
425 (
426     KernelSMDebuggerSession *pKernelSMDebuggerSession
427 )
428 {}
429 
430 //
431 // The free callback will always be invoked before the destructor for either the KernelSMDebuggerSession
432 // (empty since we clean up here) and before the KernelGraphicsObject dependancy's destructor. This is a bit
433 // different from how other Resource Server classes clean up since there is a dependancy owned
434 // by a different RM client.
435 //
436 void
437 ksmdbgssnFreeCallback_IMPL
438 (
439     KernelSMDebuggerSession *pKernelSMDebuggerSession
440 )
441 {
442     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
443 
444     // This should free the entire hierarchy of objects.
445     pRmApi->Free(pRmApi, pKernelSMDebuggerSession->hInternalClient, pKernelSMDebuggerSession->hInternalClient);
446 
447     // Remove it from the pObject debugger list
448     listRemoveFirstByValue(&pKernelSMDebuggerSession->pObject->activeDebuggers, &pKernelSMDebuggerSession);
449 
450 }
451 
452 NV_STATUS
453 ksmdbgssnInternalControlForward_IMPL
454 (
455     KernelSMDebuggerSession *pKernelSMDebuggerSession,
456     NvU32 command,
457     void *pParams,
458     NvU32 size
459 )
460 {
461     return gpuresInternalControlForward_IMPL(staticCast(pKernelSMDebuggerSession, GpuResource), command, pParams, size);
462 }
463 
464 NvHandle
465 ksmdbgssnGetInternalObjectHandle_IMPL(KernelSMDebuggerSession *pKernelSMDebuggerSession)
466 {
467     return NV01_NULL_OBJECT;
468 }
469