1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #ifndef NVLINK_INBAND_MSG_HDR_H
25 #define NVLINK_INBAND_MSG_HDR_H
26 
27 /*
28  * Messages do not have individual versioning, instead a strict ABI is maintained. When a change is
29  * required on existing message, instead of modifying corresponding message structure, a completely
30  * new message type (like INBAND_MSG_TYPE_XXX_V1, INBAND_MSG_TYPE_XXX_V2) and corresponding message
31  * definition structure needs to be added. Do not modify existing structs in any way.
32  *
33  * Messages may contain fields which are debug only and must be used for logging purpose. Such
34  * fields shouldn't be trusted.
35  *
36  * - Avoid use of enums or bitfields. Always use fixed types.
37  * - Avoid conditional fields in the structs.
38  * - Avoid nested and complex structs. Keep them simple and flat for ease of encoding and decoding.
39  * - Avoid embedded pointers. Flexible arrays at the end of the struct are allowed.
40  * - Always use the packed struct to typecast inband messages. More details:
41  * - Always have reserved flags or fields to CYA given the stable ABI conditions.
42  */
43 
44 /* Align to byte boundaries */
45 #pragma pack(push, 1)
46 
47 #include "nvtypes.h"
48 #include "nvmisc.h"
49 #include "nvCpuUuid.h"
50 #include "nvstatus.h"
51 #include "nvstatuscodes.h"
52 
53 #define NVLINK_INBAND_MAX_MSG_SIZE     5120
54 #define NVLINK_INBAND_MSG_MAGIC_ID_FM  0xadbc
55 
56 /* Nvlink Inband messages types */
57 #define NVLINK_INBAND_MSG_TYPE_GPU_PROBE_REQ             0
58 #define NVLINK_INBAND_MSG_TYPE_GPU_PROBE_RSP             1
59 #define NVLINK_INBAND_MSG_TYPE_MC_TEAM_SETUP_REQ         2
60 #define NVLINK_INBAND_MSG_TYPE_MC_TEAM_SETUP_RSP         3
61 #define NVLINK_INBAND_MSG_TYPE_MC_TEAM_RELEASE_REQ       4
62 #define NVLINK_INBAND_MSG_TYPE_MC_TEAM_SETUP_REQ_V2      5
63 #define NVLINK_INBAND_MSG_TYPE_GPU_PROBE_UPDATE_REQ      6
64 #define NVLINK_INBAND_MSG_TYPE_GPU_PROBE_REPLAY_REQ      7
65 #define NVLINK_INBAND_MSG_TYPE_GPU_PROBE_REPLAY_RSP      NVLINK_INBAND_MSG_TYPE_GPU_PROBE_RSP
66 #define NVLINK_INBAND_MSG_TYPE_MC_TEAM_SETUP_REPLAY_REQ  8
67 #define NVLINK_INBAND_MSG_TYPE_MC_TEAM_SETUP_REPLAY_RSP  NVLINK_INBAND_MSG_TYPE_MC_TEAM_SETUP_RSP
68 #define NVLINK_INBAND_MSG_TYPE_MAX                       9
69 
70 /* Nvlink Inband message packet header */
71 typedef struct
72 {
73     NvU16     magicId;           /* Identifier to represent in-band msg, will be NVLINK_INBAND_MSG_MAGIC_ID */
74     NvU64     requestId;         /* Unique Id for a request and response will carry same id */
75     NV_STATUS status;            /* High level status of the message/request */
76     NvU16     type;              /* Type of encoded message. One of NVLINK_INBAND_MSG_TYPE_xxx */
77     NvU32     length;            /* Length of encoded message */
78     NvU8      reserved[8];       /* For future use. Must be initialized to zero */
79 } nvlink_inband_msg_header_t;
80 
81 #define NVLINK_INBAND_GPU_PROBE_CAPS_SRIOV_ENABLED NVBIT(0)
82 #define NVLINK_INBAND_GPU_PROBE_CAPS_PROBE_UPDATE  NVBIT(1)
83 
84 /* Add more caps as need in the future */
85 
86 #define NVLINK_INBAND_BW_MODE_FULL     0
87 #define NVLINK_INBAND_BW_MODE_OFF      1
88 #define NVLINK_INBAND_BW_MODE_MIN      2
89 #define NVLINK_INBAND_BW_MODE_HALF     3
90 #define NVLINK_INBAND_BW_MODE_3QUARTER 4
91 
92 typedef struct
93 {
94     NvU64  pciInfo;              /* Encoded as Domain(63:32):Bus(15:8):Device(0:7). (debug only) */
95     NvU8   moduleId;             /* GPIO based physical/module ID of the GPU. (debug only) */
96     NvUuid gpuUuid;              /* UUID of the GPU. (debug only) */
97     NvU64  discoveredLinkMask;   /* GPU's discovered NVLink mask info. (debug only) */
98     NvU64  enabledLinkMask;      /* GPU's currently enabled NvLink mask info. (debug only) */
99 
100     NvU32  gpuCapMask;           /* GPU capabilities, one of NVLINK_INBAND_GPU_PROBE_CAPS */
101     NvU8   bwMode;               /* NVLink bandwidth mode, one of NVLINK_INBAND_BW_MODE */
102     NvU8   reserved[31];         /* For future use. Must be initialized to zero */
103 } nvlink_inband_gpu_probe_req_t;
104 
105 typedef struct
106 {
107     nvlink_inband_msg_header_t           msgHdr;
108     nvlink_inband_gpu_probe_req_t        probeReq;
109 } nvlink_inband_gpu_probe_req_msg_t;
110 
111 #define NVLINK_INBAND_FM_CAPS_MC_TEAM_SETUP_V1   NVBIT64(0)
112 #define NVLINK_INBAND_FM_CAPS_MC_TEAM_RELEASE_V1 NVBIT64(1)
113 #define NVLINK_INBAND_FM_CAPS_BW_MODE_MIN        NVBIT64(2)
114 #define NVLINK_INBAND_FM_CAPS_BW_MODE_HALF       NVBIT64(3)
115 #define NVLINK_INBAND_FM_CAPS_BW_MODE_3QUARTER   NVBIT64(4)
116 #define NVLINK_INBAND_FM_CAPS_MC_TEAM_SETUP_V2   NVBIT64(5)
117 
118 #define NVLINK_INBAND_FABRIC_HEALTH_MASK_DEGRADED_BW 1:0
119 #define NVLINK_INBAND_FABRIC_HEALTH_MASK_DEGRADED_BW_NOT_SUPPORTED 0
120 #define NVLINK_INBAND_FABRIC_HEALTH_MASK_DEGRADED_BW_TRUE          1
121 #define NVLINK_INBAND_FABRIC_HEALTH_MASK_DEGRADED_BW_FALSE         2
122 
123 typedef struct
124 {
125     NvU64  gpuHandle;             /* Unique handle assigned by initialization entity for this GPU */
126     NvU32  gfId;                  /* GFID which supports NVLink */
127     NvU64  fmCaps;                /* Capability of FM e.g. what features FM support. */
128     NvUuid clusterUuid;           /* Cluster UUID to which this node belongs */
129     NvU16  fabricPartitionId;     /* Partition ID if the GPU belongs to a fabric partition */
130     NvU64  gpaAddress;            /* GPA starting address for the GPU */
131     NvU64  gpaAddressRange;       /* GPU GPA address range */
132     NvU64  flaAddress;            /* FLA starting address for the GPU */
133     NvU64  flaAddressRange;       /* GPU FLA address range */
134     NvU32  linkMaskToBeReduced;   /* bit mask of unused NVLink ports for P2P */
135     NvU32  cliqueId;              /* Fabric Clique Id */
136     NvU32  fabricHealthMask;      /* Mask containing bits indicating various fabric health parameters */
137     NvU8   reserved[20];          /* For future use. Must be initialized to zero */
138 } nvlink_inband_gpu_probe_rsp_t;
139 
140 typedef struct
141 {
142     nvlink_inband_msg_header_t           msgHdr;
143     nvlink_inband_gpu_probe_rsp_t        probeRsp;
144 } nvlink_inband_gpu_probe_rsp_msg_t;
145 
146 typedef struct
147 {
148     NvU64  gpuHandle;             /* Unique handle assigned by initialization entity for this GPU */
149     NvU32  cliqueId;              /* Fabric Clique Id*/
150     NvU32  fabricHealthMask;      /* Mask containing bits indicating various fabric health parameters */
151     NvU8   reserved[32];          /* For future use. Must be initialized to zero */
152 } nvlink_inband_gpu_probe_update_req_t;
153 
154 typedef struct
155 {
156     nvlink_inband_msg_header_t               msgHdr;
157     nvlink_inband_gpu_probe_update_req_t     probeUpdate;
158 } nvlink_inband_gpu_probe_update_req_msg_t;
159 
160 typedef struct
161 {
162     NvU64 mcAllocSize;           /* Multicast allocation size requested */
163     NvU32 flags;                 /* For future use. Must be initialized to zero */
164     NvU8  reserved[8];           /* For future use. Must be initialized to zero */
165     NvU16 numGpuHandles;         /* Number of GPUs in this team */
166     NvU64 gpuHandles[];          /* Array of probed handles, should be last */
167 } nvlink_inband_mc_team_setup_req_t;
168 
169 typedef struct
170 {
171     nvlink_inband_msg_header_t           msgHdr;
172     nvlink_inband_mc_team_setup_req_t    mcTeamSetupReq;
173 } nvlink_inband_mc_team_setup_req_msg_t;
174 
175 typedef struct
176 {
177     NvU64 mcAllocSize;           /* Multicast allocation size requested */
178     NvU32 flags;                 /* For future use. Must be initialized to zero */
179     NvU8  reserved[8];           /* For future use. Must be initialized to zero */
180     NvU16 numGpuHandles;         /* Number of GPUs in this team */
181     NvU16 numKeys;               /* Number of keys (a.k.a request ID) used by FM to send response */
182     NvU64 gpuHandlesAndKeys[];   /* Array of probed handles and keys, should be last */
183 
184     /*
185      * The array will be grouped and ordered as: <allGpuHandlesOfNodeA, allGpuHandlesOfNodeB,...
186      * keyForNodeA, keyForNodeB>. The first group of gpuHandles will belong to the exporter node,
187      * which will be followed by the importer nodes.
188      *
189      * Test case: If the exporter and importer nodes are same, then the message will
190      * have multiple keys belonging to the same node as: <allGpuHandlesOfNodeA,...
191      * key1ForNodeA, key2ForNodeA>. Even though all gpuHandles belong to the same node, the
192      * first key should be considered from the exporter node and the rest from the importer
193      * nodes.
194      */
195 } nvlink_inband_mc_team_setup_req_v2_t;
196 
197 typedef struct
198 {
199     nvlink_inband_msg_header_t           msgHdr;
200     nvlink_inband_mc_team_setup_req_v2_t mcTeamSetupReq;
201 } nvlink_inband_mc_team_setup_req_v2_msg_t;
202 
203 typedef struct
204 {
205     NvU64 mcTeamHandle;          /* Unique handle assigned for this Multicast team */
206                                  /* Should be zero if the response is sent to the importer nodes */
207     NvU32 flags;                 /* For future use. Must be initialized to zero */
208     NvU8  reserved[8];           /* For future use. Must be initialized to zero */
209     NvU64 mcAddressBase;         /* FLA starting address assigned for the Multicast slot */
210     NvU64 mcAddressSize;         /* Should be same as mcAllocSize */
211 } nvlink_inband_mc_team_setup_rsp_t;
212 
213 typedef struct
214 {
215     nvlink_inband_msg_header_t           msgHdr;
216     nvlink_inband_mc_team_setup_rsp_t    mcTeamSetupRsp;
217 } nvlink_inband_mc_team_setup_rsp_msg_t;
218 
219 typedef struct
220 {
221     NvU64 mcTeamHandle;          /* Unique handle assigned for the Multicast team */
222     NvU32 flags;                 /* For future use. Must be initialized to zero */
223     NvU8  reserved[8];           /* For future use. Must be initialized to zero */
224 } nvlink_inband_mc_team_release_req_t;
225 
226 typedef struct
227 {
228     nvlink_inband_msg_header_t           msgHdr;
229     nvlink_inband_mc_team_release_req_t  mcTeamReleaseReq;
230 } nvlink_inband_mc_team_release_req_msg_t;
231 
232 typedef struct
233 {
234     /* Fields to be replayed */
235     NvU64  gpuHandle;            /* Unique handle that was provided by FM pre-migration. */
236 
237     /* Other fields from the request */
238     NvU64  pciInfo;              /* Encoded as Domain(63:32):Bus(15:8):Device(0:7). (debug only) */
239     NvU8   moduleId;             /* GPIO based physical/module ID of the GPU. (debug only) */
240     NvUuid gpuUuid;              /* UUID of the GPU. (debug only) */
241     NvU64  discoveredLinkMask;   /* GPU's discovered NVLink mask info. (debug only) */
242     NvU64  enabledLinkMask;      /* GPU's currently enabled NvLink mask info. (debug only) */
243 
244     NvU32  gpuCapMask;           /* GPU capabilities, one of NVLINK_INBAND_GPU_PROBE_CAPS */
245     NvU8   bwMode;               /* NVLink bandwidth mode, one of NVLINK_INBAND_BW_MODE */
246     NvU8   reserved[31];         /* For future use. Must be initialized to zero */
247 } nvlink_inband_gpu_probe_replay_req_t;
248 
249 typedef struct
250 {
251     nvlink_inband_msg_header_t           msgHdr;
252     nvlink_inband_gpu_probe_replay_req_t probeReplayReq;
253 } nvlink_inband_gpu_probe_replay_req_msg_t;
254 
255 typedef nvlink_inband_gpu_probe_rsp_t nvlink_inband_gpu_probe_replay_rsp_t;
256 typedef nvlink_inband_gpu_probe_rsp_msg_t nvlink_inband_gpu_probe_replay_rsp_msg_t;
257 
258 typedef struct
259 {
260     /* Fields to be replayed */
261     NvU64 mcTeamHandle;          /* Unique handle assigned for this Multicast team */
262     NvU64 mcAddressBase;         /* FLA starting address assigned for the Multicast slot */
263     NvU64 mcAddressSize;         /* Size of FLA assigned to the Multicast slot */
264 
265     /* Other fields from the request */
266     NvU64 mcAllocSize;           /* Multicast allocation size requested */
267     NvU32 flags;                 /* For future use. Must be initialized to zero */
268     NvU8  reserved[8];           /* For future use. Must be initialized to zero */
269     NvU16 numGpuHandles;         /* Number of GPUs in this team */
270     NvU16 numKeys;               /* Number of keys (a.k.a request ID) used by FM to send response */
271     NvU64 gpuHandlesAndKeys[];   /* Array of probed handles and keys, should be last */
272 } nvlink_inband_mc_team_setup_replay_req_t;
273 
274 
275 typedef struct
276 {
277     nvlink_inband_msg_header_t               msgHdr;
278     nvlink_inband_mc_team_setup_replay_req_t mcTeamSetupReplayReq;
279 } nvlink_inband_mc_team_setup_replay_req_msg_t;
280 
281 typedef nvlink_inband_mc_team_setup_rsp_t nvlink_inband_mc_team_setup_replay_rsp_t;
282 typedef nvlink_inband_mc_team_setup_rsp_msg_t nvlink_inband_mc_team_setup_replay_rsp_msg_t;
283 
284 #pragma pack(pop)
285 
286 /********************* Don't add any message structs after this line ******************************/
287 
288 /* Helpers */
289 static NV_INLINE void nvlinkInitInbandMsgHdr
290 (
291     nvlink_inband_msg_header_t *pMsgHdr,
292     NvU16                       type,
293     NvU32                       len,
294     NvU64                       requestId
295 )
296 {
297     NvU8 i;
298 
299     pMsgHdr->requestId = requestId;
300     pMsgHdr->magicId = NVLINK_INBAND_MSG_MAGIC_ID_FM;
301     pMsgHdr->type = type;
302     pMsgHdr->length = len;
303     pMsgHdr->status = NV_OK;
304 
305     for (i = 0; i < sizeof(pMsgHdr->reserved); i++)
306         pMsgHdr->reserved[i] = 0;
307 }
308 
309 #endif
310