1 /*
2 * SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: MIT
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #ifndef NVLINK_INBAND_MSG_HDR_H
25 #define NVLINK_INBAND_MSG_HDR_H
26
27 /*
28 * Messages do not have individual versioning, instead a strict ABI is maintained. When a change is
29 * required on existing message, instead of modifying corresponding message structure, a completely
30 * new message type (like INBAND_MSG_TYPE_XXX_V1, INBAND_MSG_TYPE_XXX_V2) and corresponding message
31 * definition structure needs to be added. Do not modify existing structs in any way.
32 *
33 * Messages may contain fields which are debug only and must be used for logging purpose. Such
34 * fields shouldn't be trusted.
35 *
36 * - Avoid use of enums or bitfields. Always use fixed types.
37 * - Avoid conditional fields in the structs.
38 * - Avoid nested and complex structs. Keep them simple and flat for ease of encoding and decoding.
39 * - Avoid embedded pointers. Flexible arrays at the end of the struct are allowed.
40 * - Always use the packed struct to typecast inband messages. More details:
41 * - Always have reserved flags or fields to CYA given the stable ABI conditions.
42 */
43
44 /* Align to byte boundaries */
45 #pragma pack(push, 1)
46
47 #include "nvtypes.h"
48 #include "nvmisc.h"
49 #include "nvCpuUuid.h"
50 #include "nvstatus.h"
51 #include "nvstatuscodes.h"
52
53 #define NVLINK_INBAND_MAX_MSG_SIZE 5120
54 #define NVLINK_INBAND_MSG_MAGIC_ID_FM 0xadbc
55
56 /* Nvlink Inband messages types */
57 #define NVLINK_INBAND_MSG_TYPE_GPU_PROBE_REQ 0
58 #define NVLINK_INBAND_MSG_TYPE_GPU_PROBE_RSP 1
59 #define NVLINK_INBAND_MSG_TYPE_MC_TEAM_SETUP_REQ 2
60 #define NVLINK_INBAND_MSG_TYPE_MC_TEAM_SETUP_RSP 3
61 #define NVLINK_INBAND_MSG_TYPE_MC_TEAM_RELEASE_REQ 4
62 #define NVLINK_INBAND_MSG_TYPE_MC_TEAM_SETUP_REQ_V2 5
63 #define NVLINK_INBAND_MSG_TYPE_GPU_PROBE_UPDATE_REQ 6
64 #define NVLINK_INBAND_MSG_TYPE_GPU_PROBE_REPLAY_REQ 7
65 #define NVLINK_INBAND_MSG_TYPE_GPU_PROBE_REPLAY_RSP NVLINK_INBAND_MSG_TYPE_GPU_PROBE_RSP
66 #define NVLINK_INBAND_MSG_TYPE_MC_TEAM_SETUP_REPLAY_REQ 8
67 #define NVLINK_INBAND_MSG_TYPE_MC_TEAM_SETUP_REPLAY_RSP NVLINK_INBAND_MSG_TYPE_MC_TEAM_SETUP_RSP
68 #define NVLINK_INBAND_MSG_TYPE_MAX 9
69
70 /* Nvlink Inband message packet header */
71 typedef struct
72 {
73 NvU16 magicId; /* Identifier to represent in-band msg, will be NVLINK_INBAND_MSG_MAGIC_ID */
74 NvU64 requestId; /* Unique Id for a request and response will carry same id */
75 NV_STATUS status; /* High level status of the message/request */
76 NvU16 type; /* Type of encoded message. One of NVLINK_INBAND_MSG_TYPE_xxx */
77 NvU32 length; /* Length of encoded message */
78 NvU8 reserved[8]; /* For future use. Must be initialized to zero */
79 } nvlink_inband_msg_header_t;
80
81 #define NVLINK_INBAND_GPU_PROBE_CAPS_SRIOV_ENABLED NVBIT(0)
82 #define NVLINK_INBAND_GPU_PROBE_CAPS_PROBE_UPDATE NVBIT(1)
83 #define NVLINK_INBAND_GPU_PROBE_CAPS_EGM_SUPPORT NVBIT(2)
84
85 /* Add more caps as need in the future */
86
87 #define NVLINK_INBAND_BW_MODE_FULL 0
88 #define NVLINK_INBAND_BW_MODE_OFF 1
89 #define NVLINK_INBAND_BW_MODE_MIN 2
90 #define NVLINK_INBAND_BW_MODE_HALF 3
91 #define NVLINK_INBAND_BW_MODE_3QUARTER 4
92
93 typedef struct
94 {
95 NvU64 pciInfo; /* Encoded as Domain(63:32):Bus(15:8):Device(0:7). (debug only) */
96 NvU8 moduleId; /* GPIO based physical/module ID of the GPU. (debug only) */
97 NvUuid gpuUuid; /* UUID of the GPU. (debug only) */
98 NvU64 discoveredLinkMask; /* GPU's discovered NVLink mask info. (debug only) */
99 NvU64 enabledLinkMask; /* GPU's currently enabled NvLink mask info. (debug only) */
100
101 NvU32 gpuCapMask; /* GPU capabilities, one of NVLINK_INBAND_GPU_PROBE_CAPS */
102 NvU8 bwMode; /* NVLink bandwidth mode, one of NVLINK_INBAND_BW_MODE */
103 NvU8 reserved[31]; /* For future use. Must be initialized to zero */
104 } nvlink_inband_gpu_probe_req_t;
105
106 typedef struct
107 {
108 nvlink_inband_msg_header_t msgHdr;
109 nvlink_inband_gpu_probe_req_t probeReq;
110 } nvlink_inband_gpu_probe_req_msg_t;
111
112 #define NVLINK_INBAND_FM_CAPS_MC_TEAM_SETUP_V1 NVBIT64(0)
113 #define NVLINK_INBAND_FM_CAPS_MC_TEAM_RELEASE_V1 NVBIT64(1)
114 #define NVLINK_INBAND_FM_CAPS_BW_MODE_MIN NVBIT64(2)
115 #define NVLINK_INBAND_FM_CAPS_BW_MODE_HALF NVBIT64(3)
116 #define NVLINK_INBAND_FM_CAPS_BW_MODE_3QUARTER NVBIT64(4)
117 #define NVLINK_INBAND_FM_CAPS_MC_TEAM_SETUP_V2 NVBIT64(5)
118 #define NVLINK_INBAND_FM_CAPS_EGM_ENABLED NVBIT64(6)
119
120 #define NVLINK_INBAND_FABRIC_HEALTH_MASK_DEGRADED_BW 1:0
121 #define NVLINK_INBAND_FABRIC_HEALTH_MASK_DEGRADED_BW_NOT_SUPPORTED 0
122 #define NVLINK_INBAND_FABRIC_HEALTH_MASK_DEGRADED_BW_TRUE 1
123 #define NVLINK_INBAND_FABRIC_HEALTH_MASK_DEGRADED_BW_FALSE 2
124
125 typedef struct
126 {
127 NvU64 gpuHandle; /* Unique handle assigned by initialization entity for this GPU */
128 NvU32 gfId; /* GFID which supports NVLink */
129 NvU64 fmCaps; /* Capability of FM e.g. what features FM support. */
130 NvUuid clusterUuid; /* Cluster UUID to which this node belongs */
131 NvU16 fabricPartitionId; /* Partition ID if the GPU belongs to a fabric partition */
132 NvU64 gpaAddress; /* GPA starting address for the GPU */
133 NvU64 gpaAddressRange; /* GPU GPA address range */
134 NvU64 flaAddress; /* FLA starting address for the GPU */
135 NvU64 flaAddressRange; /* GPU FLA address range */
136 NvU32 linkMaskToBeReduced; /* bit mask of unused NVLink ports for P2P */
137 NvU32 cliqueId; /* Fabric Clique Id */
138 NvU32 fabricHealthMask; /* Mask containing bits indicating various fabric health parameters */
139 NvU32 gpaAddressEGMHi; /* GPA Address for EGM. Don't use if EGM support is not present in GFM */
140 NvU8 reserved[16]; /* For future use. Must be initialized to zero */
141 } nvlink_inband_gpu_probe_rsp_t;
142
143 typedef struct
144 {
145 nvlink_inband_msg_header_t msgHdr;
146 nvlink_inband_gpu_probe_rsp_t probeRsp;
147 } nvlink_inband_gpu_probe_rsp_msg_t;
148
149 typedef struct
150 {
151 NvU64 gpuHandle; /* Unique handle assigned by initialization entity for this GPU */
152 NvU32 cliqueId; /* Fabric Clique Id*/
153 NvU32 fabricHealthMask; /* Mask containing bits indicating various fabric health parameters */
154 NvU8 reserved[32]; /* For future use. Must be initialized to zero */
155 } nvlink_inband_gpu_probe_update_req_t;
156
157 typedef struct
158 {
159 nvlink_inband_msg_header_t msgHdr;
160 nvlink_inband_gpu_probe_update_req_t probeUpdate;
161 } nvlink_inband_gpu_probe_update_req_msg_t;
162
163 typedef struct
164 {
165 NvU64 mcAllocSize; /* Multicast allocation size requested */
166 NvU32 flags; /* For future use. Must be initialized to zero */
167 NvU8 reserved[8]; /* For future use. Must be initialized to zero */
168 NvU16 numGpuHandles; /* Number of GPUs in this team */
169 NvU64 gpuHandles[]; /* Array of probed handles, should be last */
170 } nvlink_inband_mc_team_setup_req_t;
171
172 typedef struct
173 {
174 nvlink_inband_msg_header_t msgHdr;
175 nvlink_inband_mc_team_setup_req_t mcTeamSetupReq;
176 } nvlink_inband_mc_team_setup_req_msg_t;
177
178 typedef struct
179 {
180 NvU64 mcAllocSize; /* Multicast allocation size requested */
181 NvU32 flags; /* For future use. Must be initialized to zero */
182 NvU8 reserved[8]; /* For future use. Must be initialized to zero */
183 NvU16 numGpuHandles; /* Number of GPUs in this team */
184 NvU16 numKeys; /* Number of keys (a.k.a request ID) used by FM to send response */
185 NvU64 gpuHandlesAndKeys[]; /* Array of probed handles and keys, should be last */
186
187 /*
188 * The array will be grouped and ordered as: <allGpuHandlesOfNodeA, allGpuHandlesOfNodeB,...
189 * keyForNodeA, keyForNodeB>. The first group of gpuHandles will belong to the exporter node,
190 * which will be followed by the importer nodes.
191 *
192 * Test case: If the exporter and importer nodes are same, then the message will
193 * have multiple keys belonging to the same node as: <allGpuHandlesOfNodeA,...
194 * key1ForNodeA, key2ForNodeA>. Even though all gpuHandles belong to the same node, the
195 * first key should be considered from the exporter node and the rest from the importer
196 * nodes.
197 */
198 } nvlink_inband_mc_team_setup_req_v2_t;
199
200 typedef struct
201 {
202 nvlink_inband_msg_header_t msgHdr;
203 nvlink_inband_mc_team_setup_req_v2_t mcTeamSetupReq;
204 } nvlink_inband_mc_team_setup_req_v2_msg_t;
205
206 typedef struct
207 {
208 NvU64 mcTeamHandle; /* Unique handle assigned for this Multicast team */
209 /* Should be zero if the response is sent to the importer nodes */
210 NvU32 flags; /* For future use. Must be initialized to zero */
211 NvU8 reserved[8]; /* For future use. Must be initialized to zero */
212 NvU64 mcAddressBase; /* FLA starting address assigned for the Multicast slot */
213 NvU64 mcAddressSize; /* Should be same as mcAllocSize */
214 } nvlink_inband_mc_team_setup_rsp_t;
215
216 typedef struct
217 {
218 nvlink_inband_msg_header_t msgHdr;
219 nvlink_inband_mc_team_setup_rsp_t mcTeamSetupRsp;
220 } nvlink_inband_mc_team_setup_rsp_msg_t;
221
222 typedef struct
223 {
224 NvU64 mcTeamHandle; /* Unique handle assigned for the Multicast team */
225 NvU32 flags; /* For future use. Must be initialized to zero */
226 NvU8 reserved[8]; /* For future use. Must be initialized to zero */
227 } nvlink_inband_mc_team_release_req_t;
228
229 typedef struct
230 {
231 nvlink_inband_msg_header_t msgHdr;
232 nvlink_inband_mc_team_release_req_t mcTeamReleaseReq;
233 } nvlink_inband_mc_team_release_req_msg_t;
234
235 typedef struct
236 {
237 /* Fields to be replayed */
238 NvU64 gpuHandle; /* Unique handle that was provided by FM pre-migration. */
239
240 /* Other fields from the request */
241 NvU64 pciInfo; /* Encoded as Domain(63:32):Bus(15:8):Device(0:7). (debug only) */
242 NvU8 moduleId; /* GPIO based physical/module ID of the GPU. (debug only) */
243 NvUuid gpuUuid; /* UUID of the GPU. (debug only) */
244 NvU64 discoveredLinkMask; /* GPU's discovered NVLink mask info. (debug only) */
245 NvU64 enabledLinkMask; /* GPU's currently enabled NvLink mask info. (debug only) */
246
247 NvU32 gpuCapMask; /* GPU capabilities, one of NVLINK_INBAND_GPU_PROBE_CAPS */
248 NvU8 bwMode; /* NVLink bandwidth mode, one of NVLINK_INBAND_BW_MODE */
249 NvU8 reserved[31]; /* For future use. Must be initialized to zero */
250 } nvlink_inband_gpu_probe_replay_req_t;
251
252 typedef struct
253 {
254 nvlink_inband_msg_header_t msgHdr;
255 nvlink_inband_gpu_probe_replay_req_t probeReplayReq;
256 } nvlink_inband_gpu_probe_replay_req_msg_t;
257
258 typedef nvlink_inband_gpu_probe_rsp_t nvlink_inband_gpu_probe_replay_rsp_t;
259 typedef nvlink_inband_gpu_probe_rsp_msg_t nvlink_inband_gpu_probe_replay_rsp_msg_t;
260
261 typedef struct
262 {
263 /* Fields to be replayed */
264 NvU64 mcTeamHandle; /* Unique handle assigned for this Multicast team */
265 NvU64 mcAddressBase; /* FLA starting address assigned for the Multicast slot */
266 NvU64 mcAddressSize; /* Size of FLA assigned to the Multicast slot */
267
268 /* Other fields from the request */
269 NvU64 mcAllocSize; /* Multicast allocation size requested */
270 NvU32 flags; /* For future use. Must be initialized to zero */
271 NvU8 reserved[8]; /* For future use. Must be initialized to zero */
272 NvU16 numGpuHandles; /* Number of GPUs in this team */
273 NvU16 numKeys; /* Number of keys (a.k.a request ID) used by FM to send response */
274 NvU64 gpuHandlesAndKeys[]; /* Array of probed handles and keys, should be last */
275 } nvlink_inband_mc_team_setup_replay_req_t;
276
277
278 typedef struct
279 {
280 nvlink_inband_msg_header_t msgHdr;
281 nvlink_inband_mc_team_setup_replay_req_t mcTeamSetupReplayReq;
282 } nvlink_inband_mc_team_setup_replay_req_msg_t;
283
284 typedef nvlink_inband_mc_team_setup_rsp_t nvlink_inband_mc_team_setup_replay_rsp_t;
285 typedef nvlink_inband_mc_team_setup_rsp_msg_t nvlink_inband_mc_team_setup_replay_rsp_msg_t;
286
287 #pragma pack(pop)
288
289 /********************* Don't add any message structs after this line ******************************/
290
291 /* Helpers */
nvlinkInitInbandMsgHdr(nvlink_inband_msg_header_t * pMsgHdr,NvU16 type,NvU32 len,NvU64 requestId)292 static NV_INLINE void nvlinkInitInbandMsgHdr
293 (
294 nvlink_inband_msg_header_t *pMsgHdr,
295 NvU16 type,
296 NvU32 len,
297 NvU64 requestId
298 )
299 {
300 NvU8 i;
301
302 pMsgHdr->requestId = requestId;
303 pMsgHdr->magicId = NVLINK_INBAND_MSG_MAGIC_ID_FM;
304 pMsgHdr->type = type;
305 pMsgHdr->length = len;
306 pMsgHdr->status = NV_OK;
307
308 for (i = 0; i < sizeof(pMsgHdr->reserved); i++)
309 pMsgHdr->reserved[i] = 0;
310 }
311
312 #endif
313