1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #ifndef NVLINK_INBAND_MSG_HDR_H
25 #define NVLINK_INBAND_MSG_HDR_H
26 
27 /*
28  * Messages do not have individual versioning, instead a strict ABI is maintained. When a change is
29  * required on existing message, instead of modifying corresponding message structure, a completely
30  * new message type (like INBAND_MSG_TYPE_XXX_V1, INBAND_MSG_TYPE_XXX_V2) and corresponding message
31  * definition structure needs to be added. Do not modify existing structs in any way.
32  *
33  * Messages may contain fields which are debug only and must be used for logging purpose. Such
34  * fields shouldn't be trusted.
35  *
36  * - Avoid use of enums or bitfields. Always use fixed types.
37  * - Avoid conditional fields in the structs.
38  * - Avoid nested and complex structs. Keep them simple and flat for ease of encoding and decoding.
39  * - Avoid embedded pointers. Flexible arrays at the end of the struct are allowed.
40  * - Always use the packed struct to typecast inband messages. More details:
41  * - Always have reserved flags or fields to CYA given the stable ABI conditions.
42  */
43 
44 /* Align to byte boundaries */
45 #pragma pack(push, 1)
46 
47 #include "nvtypes.h"
48 #include "nvmisc.h"
49 #include "nvCpuUuid.h"
50 #include "nvstatus.h"
51 #include "nvstatuscodes.h"
52 
53 #define NVLINK_INBAND_MAX_MSG_SIZE     5120
54 #define NVLINK_INBAND_MSG_MAGIC_ID_FM  0xadbc
55 
56 /* Nvlink Inband messages types */
57 #define NVLINK_INBAND_MSG_TYPE_GPU_PROBE_REQ             0
58 #define NVLINK_INBAND_MSG_TYPE_GPU_PROBE_RSP             1
59 #define NVLINK_INBAND_MSG_TYPE_MC_TEAM_SETUP_REQ         2
60 #define NVLINK_INBAND_MSG_TYPE_MC_TEAM_SETUP_RSP         3
61 #define NVLINK_INBAND_MSG_TYPE_MC_TEAM_RELEASE_REQ       4
62 #define NVLINK_INBAND_MSG_TYPE_MC_TEAM_SETUP_REQ_V2      5
63 #define NVLINK_INBAND_MSG_TYPE_GPU_PROBE_UPDATE_REQ      6
64 #define NVLINK_INBAND_MSG_TYPE_GPU_PROBE_REPLAY_REQ      7
65 #define NVLINK_INBAND_MSG_TYPE_GPU_PROBE_REPLAY_RSP      NVLINK_INBAND_MSG_TYPE_GPU_PROBE_RSP
66 #define NVLINK_INBAND_MSG_TYPE_MC_TEAM_SETUP_REPLAY_REQ  8
67 #define NVLINK_INBAND_MSG_TYPE_MC_TEAM_SETUP_REPLAY_RSP  NVLINK_INBAND_MSG_TYPE_MC_TEAM_SETUP_RSP
68 #define NVLINK_INBAND_MSG_TYPE_MAX                       9
69 
70 /* Nvlink Inband message packet header */
71 typedef struct
72 {
73     NvU16     magicId;           /* Identifier to represent in-band msg, will be NVLINK_INBAND_MSG_MAGIC_ID */
74     NvU64     requestId;         /* Unique Id for a request and response will carry same id */
75     NV_STATUS status;            /* High level status of the message/request */
76     NvU16     type;              /* Type of encoded message. One of NVLINK_INBAND_MSG_TYPE_xxx */
77     NvU32     length;            /* Length of encoded message */
78     NvU8      reserved[8];       /* For future use. Must be initialized to zero */
79 } nvlink_inband_msg_header_t;
80 
81 #define NVLINK_INBAND_GPU_PROBE_CAPS_SRIOV_ENABLED NVBIT(0)
82 #define NVLINK_INBAND_GPU_PROBE_CAPS_PROBE_UPDATE  NVBIT(1)
83 #define NVLINK_INBAND_GPU_PROBE_CAPS_EGM_SUPPORT   NVBIT(2)
84 
85 /* Add more caps as need in the future */
86 
87 #define NVLINK_INBAND_BW_MODE_FULL     0
88 #define NVLINK_INBAND_BW_MODE_OFF      1
89 #define NVLINK_INBAND_BW_MODE_MIN      2
90 #define NVLINK_INBAND_BW_MODE_HALF     3
91 #define NVLINK_INBAND_BW_MODE_3QUARTER 4
92 
93 typedef struct
94 {
95     NvU64  pciInfo;              /* Encoded as Domain(63:32):Bus(15:8):Device(0:7). (debug only) */
96     NvU8   moduleId;             /* GPIO based physical/module ID of the GPU. (debug only) */
97     NvUuid gpuUuid;              /* UUID of the GPU. (debug only) */
98     NvU64  discoveredLinkMask;   /* GPU's discovered NVLink mask info. (debug only) */
99     NvU64  enabledLinkMask;      /* GPU's currently enabled NvLink mask info. (debug only) */
100 
101     NvU32  gpuCapMask;           /* GPU capabilities, one of NVLINK_INBAND_GPU_PROBE_CAPS */
102     NvU8   bwMode;               /* NVLink bandwidth mode, one of NVLINK_INBAND_BW_MODE */
103     NvU8   reserved[31];         /* For future use. Must be initialized to zero */
104 } nvlink_inband_gpu_probe_req_t;
105 
106 typedef struct
107 {
108     nvlink_inband_msg_header_t           msgHdr;
109     nvlink_inband_gpu_probe_req_t        probeReq;
110 } nvlink_inband_gpu_probe_req_msg_t;
111 
112 #define NVLINK_INBAND_FM_CAPS_MC_TEAM_SETUP_V1   NVBIT64(0)
113 #define NVLINK_INBAND_FM_CAPS_MC_TEAM_RELEASE_V1 NVBIT64(1)
114 #define NVLINK_INBAND_FM_CAPS_BW_MODE_MIN        NVBIT64(2)
115 #define NVLINK_INBAND_FM_CAPS_BW_MODE_HALF       NVBIT64(3)
116 #define NVLINK_INBAND_FM_CAPS_BW_MODE_3QUARTER   NVBIT64(4)
117 #define NVLINK_INBAND_FM_CAPS_MC_TEAM_SETUP_V2   NVBIT64(5)
118 #define NVLINK_INBAND_FM_CAPS_EGM_ENABLED        NVBIT64(6)
119 
120 #define NVLINK_INBAND_FABRIC_HEALTH_MASK_DEGRADED_BW 1:0
121 #define NVLINK_INBAND_FABRIC_HEALTH_MASK_DEGRADED_BW_NOT_SUPPORTED 0
122 #define NVLINK_INBAND_FABRIC_HEALTH_MASK_DEGRADED_BW_TRUE          1
123 #define NVLINK_INBAND_FABRIC_HEALTH_MASK_DEGRADED_BW_FALSE         2
124 
125 typedef struct
126 {
127     NvU64  gpuHandle;             /* Unique handle assigned by initialization entity for this GPU */
128     NvU32  gfId;                  /* GFID which supports NVLink */
129     NvU64  fmCaps;                /* Capability of FM e.g. what features FM support. */
130     NvUuid clusterUuid;           /* Cluster UUID to which this node belongs */
131     NvU16  fabricPartitionId;     /* Partition ID if the GPU belongs to a fabric partition */
132     NvU64  gpaAddress;            /* GPA starting address for the GPU */
133     NvU64  gpaAddressRange;       /* GPU GPA address range */
134     NvU64  flaAddress;            /* FLA starting address for the GPU */
135     NvU64  flaAddressRange;       /* GPU FLA address range */
136     NvU32  linkMaskToBeReduced;   /* bit mask of unused NVLink ports for P2P */
137     NvU32  cliqueId;              /* Fabric Clique Id */
138     NvU32  fabricHealthMask;      /* Mask containing bits indicating various fabric health parameters */
139     NvU32  gpaAddressEGMHi;       /* GPA Address for EGM. Don't use if EGM support is not present in GFM */
140     NvU8   reserved[16];          /* For future use. Must be initialized to zero */
141 } nvlink_inband_gpu_probe_rsp_t;
142 
143 typedef struct
144 {
145     nvlink_inband_msg_header_t           msgHdr;
146     nvlink_inband_gpu_probe_rsp_t        probeRsp;
147 } nvlink_inband_gpu_probe_rsp_msg_t;
148 
149 typedef struct
150 {
151     NvU64  gpuHandle;             /* Unique handle assigned by initialization entity for this GPU */
152     NvU32  cliqueId;              /* Fabric Clique Id*/
153     NvU32  fabricHealthMask;      /* Mask containing bits indicating various fabric health parameters */
154     NvU8   reserved[32];          /* For future use. Must be initialized to zero */
155 } nvlink_inband_gpu_probe_update_req_t;
156 
157 typedef struct
158 {
159     nvlink_inband_msg_header_t               msgHdr;
160     nvlink_inband_gpu_probe_update_req_t     probeUpdate;
161 } nvlink_inband_gpu_probe_update_req_msg_t;
162 
163 typedef struct
164 {
165     NvU64 mcAllocSize;           /* Multicast allocation size requested */
166     NvU32 flags;                 /* For future use. Must be initialized to zero */
167     NvU8  reserved[8];           /* For future use. Must be initialized to zero */
168     NvU16 numGpuHandles;         /* Number of GPUs in this team */
169     NvU64 gpuHandles[];          /* Array of probed handles, should be last */
170 } nvlink_inband_mc_team_setup_req_t;
171 
172 typedef struct
173 {
174     nvlink_inband_msg_header_t           msgHdr;
175     nvlink_inband_mc_team_setup_req_t    mcTeamSetupReq;
176 } nvlink_inband_mc_team_setup_req_msg_t;
177 
178 typedef struct
179 {
180     NvU64 mcAllocSize;           /* Multicast allocation size requested */
181     NvU32 flags;                 /* For future use. Must be initialized to zero */
182     NvU8  reserved[8];           /* For future use. Must be initialized to zero */
183     NvU16 numGpuHandles;         /* Number of GPUs in this team */
184     NvU16 numKeys;               /* Number of keys (a.k.a request ID) used by FM to send response */
185     NvU64 gpuHandlesAndKeys[];   /* Array of probed handles and keys, should be last */
186 
187     /*
188      * The array will be grouped and ordered as: <allGpuHandlesOfNodeA, allGpuHandlesOfNodeB,...
189      * keyForNodeA, keyForNodeB>. The first group of gpuHandles will belong to the exporter node,
190      * which will be followed by the importer nodes.
191      *
192      * Test case: If the exporter and importer nodes are same, then the message will
193      * have multiple keys belonging to the same node as: <allGpuHandlesOfNodeA,...
194      * key1ForNodeA, key2ForNodeA>. Even though all gpuHandles belong to the same node, the
195      * first key should be considered from the exporter node and the rest from the importer
196      * nodes.
197      */
198 } nvlink_inband_mc_team_setup_req_v2_t;
199 
200 typedef struct
201 {
202     nvlink_inband_msg_header_t           msgHdr;
203     nvlink_inband_mc_team_setup_req_v2_t mcTeamSetupReq;
204 } nvlink_inband_mc_team_setup_req_v2_msg_t;
205 
206 typedef struct
207 {
208     NvU64 mcTeamHandle;          /* Unique handle assigned for this Multicast team */
209                                  /* Should be zero if the response is sent to the importer nodes */
210     NvU32 flags;                 /* For future use. Must be initialized to zero */
211     NvU8  reserved[8];           /* For future use. Must be initialized to zero */
212     NvU64 mcAddressBase;         /* FLA starting address assigned for the Multicast slot */
213     NvU64 mcAddressSize;         /* Should be same as mcAllocSize */
214 } nvlink_inband_mc_team_setup_rsp_t;
215 
216 typedef struct
217 {
218     nvlink_inband_msg_header_t           msgHdr;
219     nvlink_inband_mc_team_setup_rsp_t    mcTeamSetupRsp;
220 } nvlink_inband_mc_team_setup_rsp_msg_t;
221 
222 typedef struct
223 {
224     NvU64 mcTeamHandle;          /* Unique handle assigned for the Multicast team */
225     NvU32 flags;                 /* For future use. Must be initialized to zero */
226     NvU8  reserved[8];           /* For future use. Must be initialized to zero */
227 } nvlink_inband_mc_team_release_req_t;
228 
229 typedef struct
230 {
231     nvlink_inband_msg_header_t           msgHdr;
232     nvlink_inband_mc_team_release_req_t  mcTeamReleaseReq;
233 } nvlink_inband_mc_team_release_req_msg_t;
234 
235 typedef struct
236 {
237     /* Fields to be replayed */
238     NvU64  gpuHandle;            /* Unique handle that was provided by FM pre-migration. */
239 
240     /* Other fields from the request */
241     NvU64  pciInfo;              /* Encoded as Domain(63:32):Bus(15:8):Device(0:7). (debug only) */
242     NvU8   moduleId;             /* GPIO based physical/module ID of the GPU. (debug only) */
243     NvUuid gpuUuid;              /* UUID of the GPU. (debug only) */
244     NvU64  discoveredLinkMask;   /* GPU's discovered NVLink mask info. (debug only) */
245     NvU64  enabledLinkMask;      /* GPU's currently enabled NvLink mask info. (debug only) */
246 
247     NvU32  gpuCapMask;           /* GPU capabilities, one of NVLINK_INBAND_GPU_PROBE_CAPS */
248     NvU8   bwMode;               /* NVLink bandwidth mode, one of NVLINK_INBAND_BW_MODE */
249     NvU8   reserved[31];         /* For future use. Must be initialized to zero */
250 } nvlink_inband_gpu_probe_replay_req_t;
251 
252 typedef struct
253 {
254     nvlink_inband_msg_header_t           msgHdr;
255     nvlink_inband_gpu_probe_replay_req_t probeReplayReq;
256 } nvlink_inband_gpu_probe_replay_req_msg_t;
257 
258 typedef nvlink_inband_gpu_probe_rsp_t nvlink_inband_gpu_probe_replay_rsp_t;
259 typedef nvlink_inband_gpu_probe_rsp_msg_t nvlink_inband_gpu_probe_replay_rsp_msg_t;
260 
261 typedef struct
262 {
263     /* Fields to be replayed */
264     NvU64 mcTeamHandle;          /* Unique handle assigned for this Multicast team */
265     NvU64 mcAddressBase;         /* FLA starting address assigned for the Multicast slot */
266     NvU64 mcAddressSize;         /* Size of FLA assigned to the Multicast slot */
267 
268     /* Other fields from the request */
269     NvU64 mcAllocSize;           /* Multicast allocation size requested */
270     NvU32 flags;                 /* For future use. Must be initialized to zero */
271     NvU8  reserved[8];           /* For future use. Must be initialized to zero */
272     NvU16 numGpuHandles;         /* Number of GPUs in this team */
273     NvU16 numKeys;               /* Number of keys (a.k.a request ID) used by FM to send response */
274     NvU64 gpuHandlesAndKeys[];   /* Array of probed handles and keys, should be last */
275 } nvlink_inband_mc_team_setup_replay_req_t;
276 
277 
278 typedef struct
279 {
280     nvlink_inband_msg_header_t               msgHdr;
281     nvlink_inband_mc_team_setup_replay_req_t mcTeamSetupReplayReq;
282 } nvlink_inband_mc_team_setup_replay_req_msg_t;
283 
284 typedef nvlink_inband_mc_team_setup_rsp_t nvlink_inband_mc_team_setup_replay_rsp_t;
285 typedef nvlink_inband_mc_team_setup_rsp_msg_t nvlink_inband_mc_team_setup_replay_rsp_msg_t;
286 
287 #pragma pack(pop)
288 
289 /********************* Don't add any message structs after this line ******************************/
290 
291 /* Helpers */
nvlinkInitInbandMsgHdr(nvlink_inband_msg_header_t * pMsgHdr,NvU16 type,NvU32 len,NvU64 requestId)292 static NV_INLINE void nvlinkInitInbandMsgHdr
293 (
294     nvlink_inband_msg_header_t *pMsgHdr,
295     NvU16                       type,
296     NvU32                       len,
297     NvU64                       requestId
298 )
299 {
300     NvU8 i;
301 
302     pMsgHdr->requestId = requestId;
303     pMsgHdr->magicId = NVLINK_INBAND_MSG_MAGIC_ID_FM;
304     pMsgHdr->type = type;
305     pMsgHdr->length = len;
306     pMsgHdr->status = NV_OK;
307 
308     for (i = 0; i < sizeof(pMsgHdr->reserved); i++)
309         pMsgHdr->reserved[i] = 0;
310 }
311 
312 #endif
313