1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2017-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 
25 /*!
26  * @file
27  * @brief This file contains the functions managing the FmSession
28  */
29 
30 #include "core/core.h"
31 #include "os/os.h"
32 #include "compute/fm_session_api.h"
33 #include "class/cl000f.h"
34 #include "resserv/rs_client.h"
35 #include "core/system.h"
36 #include "core/locks.h"
37 #include "compute/fabric.h"
38 #include "Nvcm.h"
39 #include "gpu_mgr/gpu_mgr.h"
40 #include "kernel/gpu/gpu.h"
41 #include "ctrl/ctrl2080/ctrl2080internal.h"
42 #include "rmapi/client.h"
43 
44 static void
_clearOutstandingComputeChannels(void)45 _clearOutstandingComputeChannels(void)
46 {
47     OBJGPU *pGpu = NULL;
48     NvU32 gpuMask = 0;
49     NvU32 gpuCount = 0;
50     NvU32 gpuInstance = 0;
51     RM_API *pRmApi;
52 
53     NV_ASSERT(rmGpuLockIsOwner());
54 
55     gpumgrGetGpuAttachInfo(&gpuCount, &gpuMask);
56 
57     while ((pGpu = gpumgrGetNextGpu(gpuMask, &gpuInstance)) != NULL)
58     {
59         pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
60 
61         if (pRmApi->Control(pRmApi,
62                             pGpu->hInternalClient,
63                             pGpu->hInternalSubdevice,
64                             NV2080_CTRL_CMD_INTERNAL_RECOVER_ALL_COMPUTE_CONTEXTS,
65                             NULL,
66                             0) != NV_OK)
67         {
68             NV_PRINTF(LEVEL_ERROR,
69                       "Failed to recover all compute channels for GPU %d\n",
70                       pGpu->gpuInstance);
71         }
72     }
73 }
74 
75 static void
_clearFmState(void)76 _clearFmState(void)
77 {
78     OBJSYS *pSys = SYS_GET_INSTANCE();
79     Fabric *pFabric = SYS_GET_FABRIC(pSys);
80     NvU32 flags = fabricGetFmSessionFlags(pFabric);
81 
82     if (!pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_MANAGER_IS_INITIALIZED))
83     {
84         NV_PRINTF(LEVEL_INFO,
85                   "Fabric manager state is already cleared.\n");
86         return;
87     }
88 
89     pSys->setProperty(pSys, PDB_PROP_SYS_FABRIC_MANAGER_IS_INITIALIZED, NV_FALSE);
90 
91     NV_PRINTF(LEVEL_INFO, "Fabric manager state is cleared.\n");
92 
93     if (FLD_TEST_REF(NV000F_FLAGS_CHANNEL_RECOVERY, _ENABLED, flags))
94     {
95         _clearOutstandingComputeChannels();
96     }
97 }
98 
99 NV_STATUS
fmsessionapiConstruct_IMPL(FmSessionApi * pFmSessionApi,CALL_CONTEXT * pCallContext,RS_RES_ALLOC_PARAMS_INTERNAL * pParams)100 fmsessionapiConstruct_IMPL
101 (
102     FmSessionApi                 *pFmSessionApi,
103     CALL_CONTEXT                 *pCallContext,
104     RS_RES_ALLOC_PARAMS_INTERNAL *pParams
105 )
106 {
107     OBJSYS *pSys = SYS_GET_INSTANCE();
108     Fabric *pFabric = SYS_GET_FABRIC(pSys);
109     NvHandle hClient = pCallContext->pClient->hClient;
110     NV000F_ALLOCATION_PARAMETERS *pAllocParams = pParams->pAllocParams;
111     NV_STATUS status;
112 
113     NV_ASSERT_OR_RETURN(RMCFG_FEATURE_KERNEL_RM, NV_ERR_NOT_SUPPORTED);
114 
115     osRmCapInitDescriptor(&pFmSessionApi->dupedCapDescriptor);
116 
117     if ((pCallContext->secInfo.privLevel >= RS_PRIV_LEVEL_KERNEL)
118         && !RMCFG_FEATURE_PLATFORM_MODS)
119     {
120         NV_PRINTF(LEVEL_ERROR,
121                   "only supported for usermode clients\n");
122         return NV_ERR_NOT_SUPPORTED;
123     }
124 
125     if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_MANAGER_IS_REGISTERED))
126     {
127         NV_PRINTF(LEVEL_ERROR, "duplicate object creation\n");
128         return NV_ERR_STATE_IN_USE;
129     }
130 
131     status = osRmCapAcquire(NULL, NV_RM_CAP_EXT_FABRIC_MGMT,
132                             pAllocParams->capDescriptor,
133                             &pFmSessionApi->dupedCapDescriptor);
134 
135     //
136     // On platforms where capability isn't implemented,
137     // enforce the admin-only check.
138     //
139     if (status == NV_ERR_NOT_SUPPORTED)
140     {
141         if (rmclientIsAdminByHandle(hClient, pCallContext->secInfo.privLevel))
142         {
143             status = NV_OK;
144         }
145         else
146         {
147             NV_PRINTF(LEVEL_ERROR, "insufficient permissions\n");
148             return NV_ERR_INSUFFICIENT_PERMISSIONS;
149         }
150     }
151     else if (status != NV_OK)
152     {
153          NV_PRINTF(LEVEL_ERROR, "Capability validation failed\n");
154          return status;
155     }
156 
157     if (pFabric != NULL)
158     {
159         fabricSetFmSessionFlags(pFabric, pAllocParams->flags);
160     }
161 
162     pSys->setProperty(pSys, PDB_PROP_SYS_FABRIC_MANAGER_IS_REGISTERED, NV_TRUE);
163 
164     return NV_OK;
165 }
166 
167 void
fmsessionapiDestruct_IMPL(FmSessionApi * pFmSessionApi)168 fmsessionapiDestruct_IMPL
169 (
170     FmSessionApi *pFmSessionApi
171 )
172 {
173     OBJSYS *pSys = SYS_GET_INSTANCE();
174 
175     NV_PRINTF(LEVEL_INFO, "Fabric manager is shutting down.\n");
176 
177     _clearFmState();
178 
179     osRmCapRelease(pFmSessionApi->dupedCapDescriptor);
180     pSys->setProperty(pSys, PDB_PROP_SYS_FABRIC_MANAGER_IS_REGISTERED, NV_FALSE);
181 }
182 
183 NV_STATUS
fmsessionapiCtrlCmdSetFmState_IMPL(FmSessionApi * pFmSessionApi)184 fmsessionapiCtrlCmdSetFmState_IMPL
185 (
186     FmSessionApi *pFmSessionApi
187 )
188 {
189     OBJSYS *pSys = SYS_GET_INSTANCE();
190 
191     if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_MANAGER_IS_INITIALIZED))
192     {
193         NV_PRINTF(LEVEL_INFO,
194                   "Fabric manager state is already set.\n");
195         return NV_OK;
196     }
197 
198     pSys->setProperty(pSys, PDB_PROP_SYS_FABRIC_MANAGER_IS_INITIALIZED, NV_TRUE);
199 
200     NV_PRINTF(LEVEL_INFO, "Fabric manager state is set.\n");
201 
202     return NV_OK;
203 }
204 
205 NV_STATUS
fmsessionapiCtrlCmdClearFmState_IMPL(FmSessionApi * pFmSessionApi)206 fmsessionapiCtrlCmdClearFmState_IMPL
207 (
208     FmSessionApi *pFmSessionApi
209 )
210 {
211     _clearFmState();
212 
213     return NV_OK;
214 }
215