1 /*
2 * SPDX-FileCopyrightText: Copyright (c) 2017-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: MIT
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24
25 /*!
26 * @file
27 * @brief This file contains the functions managing the FmSession
28 */
29
30 #include "core/core.h"
31 #include "os/os.h"
32 #include "compute/fm_session_api.h"
33 #include "class/cl000f.h"
34 #include "resserv/rs_client.h"
35 #include "core/system.h"
36 #include "core/locks.h"
37 #include "compute/fabric.h"
38 #include "Nvcm.h"
39 #include "gpu_mgr/gpu_mgr.h"
40 #include "kernel/gpu/gpu.h"
41 #include "ctrl/ctrl2080/ctrl2080internal.h"
42 #include "rmapi/client.h"
43
44 static void
_clearOutstandingComputeChannels(void)45 _clearOutstandingComputeChannels(void)
46 {
47 OBJGPU *pGpu = NULL;
48 NvU32 gpuMask = 0;
49 NvU32 gpuCount = 0;
50 NvU32 gpuInstance = 0;
51 RM_API *pRmApi;
52
53 NV_ASSERT(rmGpuLockIsOwner());
54
55 gpumgrGetGpuAttachInfo(&gpuCount, &gpuMask);
56
57 while ((pGpu = gpumgrGetNextGpu(gpuMask, &gpuInstance)) != NULL)
58 {
59 pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
60
61 if (pRmApi->Control(pRmApi,
62 pGpu->hInternalClient,
63 pGpu->hInternalSubdevice,
64 NV2080_CTRL_CMD_INTERNAL_RECOVER_ALL_COMPUTE_CONTEXTS,
65 NULL,
66 0) != NV_OK)
67 {
68 NV_PRINTF(LEVEL_ERROR,
69 "Failed to recover all compute channels for GPU %d\n",
70 pGpu->gpuInstance);
71 }
72 }
73 }
74
75 static void
_clearFmState(void)76 _clearFmState(void)
77 {
78 OBJSYS *pSys = SYS_GET_INSTANCE();
79 Fabric *pFabric = SYS_GET_FABRIC(pSys);
80 NvU32 flags = fabricGetFmSessionFlags(pFabric);
81
82 if (!pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_MANAGER_IS_INITIALIZED))
83 {
84 NV_PRINTF(LEVEL_INFO,
85 "Fabric manager state is already cleared.\n");
86 return;
87 }
88
89 pSys->setProperty(pSys, PDB_PROP_SYS_FABRIC_MANAGER_IS_INITIALIZED, NV_FALSE);
90
91 NV_PRINTF(LEVEL_INFO, "Fabric manager state is cleared.\n");
92
93 if (FLD_TEST_REF(NV000F_FLAGS_CHANNEL_RECOVERY, _ENABLED, flags))
94 {
95 _clearOutstandingComputeChannels();
96 }
97 }
98
99 NV_STATUS
fmsessionapiConstruct_IMPL(FmSessionApi * pFmSessionApi,CALL_CONTEXT * pCallContext,RS_RES_ALLOC_PARAMS_INTERNAL * pParams)100 fmsessionapiConstruct_IMPL
101 (
102 FmSessionApi *pFmSessionApi,
103 CALL_CONTEXT *pCallContext,
104 RS_RES_ALLOC_PARAMS_INTERNAL *pParams
105 )
106 {
107 OBJSYS *pSys = SYS_GET_INSTANCE();
108 Fabric *pFabric = SYS_GET_FABRIC(pSys);
109 NvHandle hClient = pCallContext->pClient->hClient;
110 NV000F_ALLOCATION_PARAMETERS *pAllocParams = pParams->pAllocParams;
111 NV_STATUS status;
112
113 NV_ASSERT_OR_RETURN(RMCFG_FEATURE_KERNEL_RM, NV_ERR_NOT_SUPPORTED);
114
115 osRmCapInitDescriptor(&pFmSessionApi->dupedCapDescriptor);
116
117 if ((pCallContext->secInfo.privLevel >= RS_PRIV_LEVEL_KERNEL)
118 && !RMCFG_FEATURE_PLATFORM_MODS)
119 {
120 NV_PRINTF(LEVEL_ERROR,
121 "only supported for usermode clients\n");
122 return NV_ERR_NOT_SUPPORTED;
123 }
124
125 if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_MANAGER_IS_REGISTERED))
126 {
127 NV_PRINTF(LEVEL_ERROR, "duplicate object creation\n");
128 return NV_ERR_STATE_IN_USE;
129 }
130
131 status = osRmCapAcquire(NULL, NV_RM_CAP_EXT_FABRIC_MGMT,
132 pAllocParams->capDescriptor,
133 &pFmSessionApi->dupedCapDescriptor);
134
135 //
136 // On platforms where capability isn't implemented,
137 // enforce the admin-only check.
138 //
139 if (status == NV_ERR_NOT_SUPPORTED)
140 {
141 if (rmclientIsAdminByHandle(hClient, pCallContext->secInfo.privLevel))
142 {
143 status = NV_OK;
144 }
145 else
146 {
147 NV_PRINTF(LEVEL_ERROR, "insufficient permissions\n");
148 return NV_ERR_INSUFFICIENT_PERMISSIONS;
149 }
150 }
151 else if (status != NV_OK)
152 {
153 NV_PRINTF(LEVEL_ERROR, "Capability validation failed\n");
154 return status;
155 }
156
157 if (pFabric != NULL)
158 {
159 fabricSetFmSessionFlags(pFabric, pAllocParams->flags);
160 }
161
162 pSys->setProperty(pSys, PDB_PROP_SYS_FABRIC_MANAGER_IS_REGISTERED, NV_TRUE);
163
164 return NV_OK;
165 }
166
167 void
fmsessionapiDestruct_IMPL(FmSessionApi * pFmSessionApi)168 fmsessionapiDestruct_IMPL
169 (
170 FmSessionApi *pFmSessionApi
171 )
172 {
173 OBJSYS *pSys = SYS_GET_INSTANCE();
174
175 NV_PRINTF(LEVEL_INFO, "Fabric manager is shutting down.\n");
176
177 _clearFmState();
178
179 osRmCapRelease(pFmSessionApi->dupedCapDescriptor);
180 pSys->setProperty(pSys, PDB_PROP_SYS_FABRIC_MANAGER_IS_REGISTERED, NV_FALSE);
181 }
182
183 NV_STATUS
fmsessionapiCtrlCmdSetFmState_IMPL(FmSessionApi * pFmSessionApi)184 fmsessionapiCtrlCmdSetFmState_IMPL
185 (
186 FmSessionApi *pFmSessionApi
187 )
188 {
189 OBJSYS *pSys = SYS_GET_INSTANCE();
190
191 if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_MANAGER_IS_INITIALIZED))
192 {
193 NV_PRINTF(LEVEL_INFO,
194 "Fabric manager state is already set.\n");
195 return NV_OK;
196 }
197
198 pSys->setProperty(pSys, PDB_PROP_SYS_FABRIC_MANAGER_IS_INITIALIZED, NV_TRUE);
199
200 NV_PRINTF(LEVEL_INFO, "Fabric manager state is set.\n");
201
202 return NV_OK;
203 }
204
205 NV_STATUS
fmsessionapiCtrlCmdClearFmState_IMPL(FmSessionApi * pFmSessionApi)206 fmsessionapiCtrlCmdClearFmState_IMPL
207 (
208 FmSessionApi *pFmSessionApi
209 )
210 {
211 _clearFmState();
212
213 return NV_OK;
214 }
215