1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #ifndef NVLINK_INBAND_MSG_HDR_H 25 #define NVLINK_INBAND_MSG_HDR_H 26 27 /* 28 * Messages do not have individual versioning, instead a strict ABI is maintained. When a change is 29 * required on existing message, instead of modifying corresponding message structure, a completely 30 * new message type (like INBAND_MSG_TYPE_XXX_V1, INBAND_MSG_TYPE_XXX_V2) and corresponding message 31 * definition structure needs to be added. Do not modify existing structs in any way. 32 * 33 * Messages may contain fields which are debug only and must be used for logging purpose. Such 34 * fields shouldn't be trusted. 35 * 36 * - Avoid use of enums or bitfields. Always use fixed types. 37 * - Avoid conditional fields in the structs. 38 * - Avoid nested and complex structs. Keep them simple and flat for ease of encoding and decoding. 39 * - Avoid embedded pointers. Flexible arrays at the end of the struct are allowed. 40 * - Always use the packed struct to typecast inband messages. More details: 41 * - Always have reserved flags or fields to CYA given the stable ABI conditions. 42 */ 43 44 /* Align to byte boundaries */ 45 #pragma pack(push, 1) 46 47 #include "nvtypes.h" 48 #include "nvmisc.h" 49 #include "nvCpuUuid.h" 50 #include "nvstatus.h" 51 #include "nvstatuscodes.h" 52 53 #define NVLINK_INBAND_MAX_MSG_SIZE 5120 54 #define NVLINK_INBAND_MSG_MAGIC_ID_FM 0xadbc 55 56 /* Nvlink Inband messages types */ 57 #define NVLINK_INBAND_MSG_TYPE_GPU_PROBE_REQ 0 58 #define NVLINK_INBAND_MSG_TYPE_GPU_PROBE_RSP 1 59 #define NVLINK_INBAND_MSG_TYPE_MC_TEAM_SETUP_REQ 2 60 #define NVLINK_INBAND_MSG_TYPE_MC_TEAM_SETUP_RSP 3 61 #define NVLINK_INBAND_MSG_TYPE_MC_TEAM_RELEASE_REQ 4 62 #define NVLINK_INBAND_MSG_TYPE_MC_TEAM_SETUP_REQ_V2 5 63 #define NVLINK_INBAND_MSG_TYPE_GPU_PROBE_UPDATE_REQ 6 64 #define NVLINK_INBAND_MSG_TYPE_GPU_PROBE_REPLAY_REQ 7 65 #define NVLINK_INBAND_MSG_TYPE_GPU_PROBE_REPLAY_RSP NVLINK_INBAND_MSG_TYPE_GPU_PROBE_RSP 66 #define NVLINK_INBAND_MSG_TYPE_MC_TEAM_SETUP_REPLAY_REQ 8 67 #define NVLINK_INBAND_MSG_TYPE_MC_TEAM_SETUP_REPLAY_RSP NVLINK_INBAND_MSG_TYPE_MC_TEAM_SETUP_RSP 68 #define NVLINK_INBAND_MSG_TYPE_MAX 9 69 70 /* Nvlink Inband message packet header */ 71 typedef struct 72 { 73 NvU16 magicId; /* Identifier to represent in-band msg, will be NVLINK_INBAND_MSG_MAGIC_ID */ 74 NvU64 requestId; /* Unique Id for a request and response will carry same id */ 75 NV_STATUS status; /* High level status of the message/request */ 76 NvU16 type; /* Type of encoded message. One of NVLINK_INBAND_MSG_TYPE_xxx */ 77 NvU32 length; /* Length of encoded message */ 78 NvU8 reserved[8]; /* For future use. Must be initialized to zero */ 79 } nvlink_inband_msg_header_t; 80 81 #define NVLINK_INBAND_GPU_PROBE_CAPS_SRIOV_ENABLED NVBIT(0) 82 #define NVLINK_INBAND_GPU_PROBE_CAPS_PROBE_UPDATE NVBIT(1) 83 84 /* Add more caps as need in the future */ 85 86 #define NVLINK_INBAND_BW_MODE_FULL 0 87 #define NVLINK_INBAND_BW_MODE_OFF 1 88 #define NVLINK_INBAND_BW_MODE_MIN 2 89 #define NVLINK_INBAND_BW_MODE_HALF 3 90 #define NVLINK_INBAND_BW_MODE_3QUARTER 4 91 92 typedef struct 93 { 94 NvU64 pciInfo; /* Encoded as Domain(63:32):Bus(15:8):Device(0:7). (debug only) */ 95 NvU8 moduleId; /* GPIO based physical/module ID of the GPU. (debug only) */ 96 NvUuid gpuUuid; /* UUID of the GPU. (debug only) */ 97 NvU64 discoveredLinkMask; /* GPU's discovered NVLink mask info. (debug only) */ 98 NvU64 enabledLinkMask; /* GPU's currently enabled NvLink mask info. (debug only) */ 99 100 NvU32 gpuCapMask; /* GPU capabilities, one of NVLINK_INBAND_GPU_PROBE_CAPS */ 101 NvU8 bwMode; /* NVLink bandwidth mode, one of NVLINK_INBAND_BW_MODE */ 102 NvU8 reserved[31]; /* For future use. Must be initialized to zero */ 103 } nvlink_inband_gpu_probe_req_t; 104 105 typedef struct 106 { 107 nvlink_inband_msg_header_t msgHdr; 108 nvlink_inband_gpu_probe_req_t probeReq; 109 } nvlink_inband_gpu_probe_req_msg_t; 110 111 #define NVLINK_INBAND_FM_CAPS_MC_TEAM_SETUP_V1 NVBIT64(0) 112 #define NVLINK_INBAND_FM_CAPS_MC_TEAM_RELEASE_V1 NVBIT64(1) 113 #define NVLINK_INBAND_FM_CAPS_BW_MODE_MIN NVBIT64(2) 114 #define NVLINK_INBAND_FM_CAPS_BW_MODE_HALF NVBIT64(3) 115 #define NVLINK_INBAND_FM_CAPS_BW_MODE_3QUARTER NVBIT64(4) 116 #define NVLINK_INBAND_FM_CAPS_MC_TEAM_SETUP_V2 NVBIT64(5) 117 118 #define NVLINK_INBAND_FABRIC_HEALTH_MASK_DEGRADED_BW 1:0 119 #define NVLINK_INBAND_FABRIC_HEALTH_MASK_DEGRADED_BW_NOT_SUPPORTED 0 120 #define NVLINK_INBAND_FABRIC_HEALTH_MASK_DEGRADED_BW_TRUE 1 121 #define NVLINK_INBAND_FABRIC_HEALTH_MASK_DEGRADED_BW_FALSE 2 122 123 typedef struct 124 { 125 NvU64 gpuHandle; /* Unique handle assigned by initialization entity for this GPU */ 126 NvU32 gfId; /* GFID which supports NVLink */ 127 NvU64 fmCaps; /* Capability of FM e.g. what features FM support. */ 128 NvUuid clusterUuid; /* Cluster UUID to which this node belongs */ 129 NvU16 fabricPartitionId; /* Partition ID if the GPU belongs to a fabric partition */ 130 NvU64 gpaAddress; /* GPA starting address for the GPU */ 131 NvU64 gpaAddressRange; /* GPU GPA address range */ 132 NvU64 flaAddress; /* FLA starting address for the GPU */ 133 NvU64 flaAddressRange; /* GPU FLA address range */ 134 NvU32 linkMaskToBeReduced; /* bit mask of unused NVLink ports for P2P */ 135 NvU32 cliqueId; /* Fabric Clique Id */ 136 NvU32 fabricHealthMask; /* Mask containing bits indicating various fabric health parameters */ 137 NvU8 reserved[20]; /* For future use. Must be initialized to zero */ 138 } nvlink_inband_gpu_probe_rsp_t; 139 140 typedef struct 141 { 142 nvlink_inband_msg_header_t msgHdr; 143 nvlink_inband_gpu_probe_rsp_t probeRsp; 144 } nvlink_inband_gpu_probe_rsp_msg_t; 145 146 typedef struct 147 { 148 NvU64 gpuHandle; /* Unique handle assigned by initialization entity for this GPU */ 149 NvU32 cliqueId; /* Fabric Clique Id*/ 150 NvU32 fabricHealthMask; /* Mask containing bits indicating various fabric health parameters */ 151 NvU8 reserved[32]; /* For future use. Must be initialized to zero */ 152 } nvlink_inband_gpu_probe_update_req_t; 153 154 typedef struct 155 { 156 nvlink_inband_msg_header_t msgHdr; 157 nvlink_inband_gpu_probe_update_req_t probeUpdate; 158 } nvlink_inband_gpu_probe_update_req_msg_t; 159 160 typedef struct 161 { 162 NvU64 mcAllocSize; /* Multicast allocation size requested */ 163 NvU32 flags; /* For future use. Must be initialized to zero */ 164 NvU8 reserved[8]; /* For future use. Must be initialized to zero */ 165 NvU16 numGpuHandles; /* Number of GPUs in this team */ 166 NvU64 gpuHandles[]; /* Array of probed handles, should be last */ 167 } nvlink_inband_mc_team_setup_req_t; 168 169 typedef struct 170 { 171 nvlink_inband_msg_header_t msgHdr; 172 nvlink_inband_mc_team_setup_req_t mcTeamSetupReq; 173 } nvlink_inband_mc_team_setup_req_msg_t; 174 175 typedef struct 176 { 177 NvU64 mcAllocSize; /* Multicast allocation size requested */ 178 NvU32 flags; /* For future use. Must be initialized to zero */ 179 NvU8 reserved[8]; /* For future use. Must be initialized to zero */ 180 NvU16 numGpuHandles; /* Number of GPUs in this team */ 181 NvU16 numKeys; /* Number of keys (a.k.a request ID) used by FM to send response */ 182 NvU64 gpuHandlesAndKeys[]; /* Array of probed handles and keys, should be last */ 183 184 /* 185 * The array will be grouped and ordered as: <allGpuHandlesOfNodeA, allGpuHandlesOfNodeB,... 186 * keyForNodeA, keyForNodeB>. The first group of gpuHandles will belong to the exporter node, 187 * which will be followed by the importer nodes. 188 * 189 * Test case: If the exporter and importer nodes are same, then the message will 190 * have multiple keys belonging to the same node as: <allGpuHandlesOfNodeA,... 191 * key1ForNodeA, key2ForNodeA>. Even though all gpuHandles belong to the same node, the 192 * first key should be considered from the exporter node and the rest from the importer 193 * nodes. 194 */ 195 } nvlink_inband_mc_team_setup_req_v2_t; 196 197 typedef struct 198 { 199 nvlink_inband_msg_header_t msgHdr; 200 nvlink_inband_mc_team_setup_req_v2_t mcTeamSetupReq; 201 } nvlink_inband_mc_team_setup_req_v2_msg_t; 202 203 typedef struct 204 { 205 NvU64 mcTeamHandle; /* Unique handle assigned for this Multicast team */ 206 /* Should be zero if the response is sent to the importer nodes */ 207 NvU32 flags; /* For future use. Must be initialized to zero */ 208 NvU8 reserved[8]; /* For future use. Must be initialized to zero */ 209 NvU64 mcAddressBase; /* FLA starting address assigned for the Multicast slot */ 210 NvU64 mcAddressSize; /* Should be same as mcAllocSize */ 211 } nvlink_inband_mc_team_setup_rsp_t; 212 213 typedef struct 214 { 215 nvlink_inband_msg_header_t msgHdr; 216 nvlink_inband_mc_team_setup_rsp_t mcTeamSetupRsp; 217 } nvlink_inband_mc_team_setup_rsp_msg_t; 218 219 typedef struct 220 { 221 NvU64 mcTeamHandle; /* Unique handle assigned for the Multicast team */ 222 NvU32 flags; /* For future use. Must be initialized to zero */ 223 NvU8 reserved[8]; /* For future use. Must be initialized to zero */ 224 } nvlink_inband_mc_team_release_req_t; 225 226 typedef struct 227 { 228 nvlink_inband_msg_header_t msgHdr; 229 nvlink_inband_mc_team_release_req_t mcTeamReleaseReq; 230 } nvlink_inband_mc_team_release_req_msg_t; 231 232 typedef struct 233 { 234 /* Fields to be replayed */ 235 NvU64 gpuHandle; /* Unique handle that was provided by FM pre-migration. */ 236 237 /* Other fields from the request */ 238 NvU64 pciInfo; /* Encoded as Domain(63:32):Bus(15:8):Device(0:7). (debug only) */ 239 NvU8 moduleId; /* GPIO based physical/module ID of the GPU. (debug only) */ 240 NvUuid gpuUuid; /* UUID of the GPU. (debug only) */ 241 NvU64 discoveredLinkMask; /* GPU's discovered NVLink mask info. (debug only) */ 242 NvU64 enabledLinkMask; /* GPU's currently enabled NvLink mask info. (debug only) */ 243 244 NvU32 gpuCapMask; /* GPU capabilities, one of NVLINK_INBAND_GPU_PROBE_CAPS */ 245 NvU8 bwMode; /* NVLink bandwidth mode, one of NVLINK_INBAND_BW_MODE */ 246 NvU8 reserved[31]; /* For future use. Must be initialized to zero */ 247 } nvlink_inband_gpu_probe_replay_req_t; 248 249 typedef struct 250 { 251 nvlink_inband_msg_header_t msgHdr; 252 nvlink_inband_gpu_probe_replay_req_t probeReplayReq; 253 } nvlink_inband_gpu_probe_replay_req_msg_t; 254 255 typedef nvlink_inband_gpu_probe_rsp_t nvlink_inband_gpu_probe_replay_rsp_t; 256 typedef nvlink_inband_gpu_probe_rsp_msg_t nvlink_inband_gpu_probe_replay_rsp_msg_t; 257 258 typedef struct 259 { 260 /* Fields to be replayed */ 261 NvU64 mcTeamHandle; /* Unique handle assigned for this Multicast team */ 262 NvU64 mcAddressBase; /* FLA starting address assigned for the Multicast slot */ 263 NvU64 mcAddressSize; /* Size of FLA assigned to the Multicast slot */ 264 265 /* Other fields from the request */ 266 NvU64 mcAllocSize; /* Multicast allocation size requested */ 267 NvU32 flags; /* For future use. Must be initialized to zero */ 268 NvU8 reserved[8]; /* For future use. Must be initialized to zero */ 269 NvU16 numGpuHandles; /* Number of GPUs in this team */ 270 NvU16 numKeys; /* Number of keys (a.k.a request ID) used by FM to send response */ 271 NvU64 gpuHandlesAndKeys[]; /* Array of probed handles and keys, should be last */ 272 } nvlink_inband_mc_team_setup_replay_req_t; 273 274 275 typedef struct 276 { 277 nvlink_inband_msg_header_t msgHdr; 278 nvlink_inband_mc_team_setup_replay_req_t mcTeamSetupReplayReq; 279 } nvlink_inband_mc_team_setup_replay_req_msg_t; 280 281 typedef nvlink_inband_mc_team_setup_rsp_t nvlink_inband_mc_team_setup_replay_rsp_t; 282 typedef nvlink_inband_mc_team_setup_rsp_msg_t nvlink_inband_mc_team_setup_replay_rsp_msg_t; 283 284 #pragma pack(pop) 285 286 /********************* Don't add any message structs after this line ******************************/ 287 288 /* Helpers */ 289 static NV_INLINE void nvlinkInitInbandMsgHdr 290 ( 291 nvlink_inband_msg_header_t *pMsgHdr, 292 NvU16 type, 293 NvU32 len, 294 NvU64 requestId 295 ) 296 { 297 NvU8 i; 298 299 pMsgHdr->requestId = requestId; 300 pMsgHdr->magicId = NVLINK_INBAND_MSG_MAGIC_ID_FM; 301 pMsgHdr->type = type; 302 pMsgHdr->length = len; 303 pMsgHdr->status = NV_OK; 304 305 for (i = 0; i < sizeof(pMsgHdr->reserved); i++) 306 pMsgHdr->reserved[i] = 0; 307 } 308 309 #endif 310