1 /* 2 * This file contains definitions used in OFED defined user/kernel 3 * interfaces. These are imported from the OFED header <linux/rds.h>. Oracle 4 * elects to have and use the contents of <linux/rds.h> under and governed 5 * by the OpenIB.org BSD license (see below for full license text). However, 6 * the following notice accompanied the original version of this file: 7 */ 8 /* 9 * Copyright (c) 2008 Oracle. All rights reserved. 10 * 11 * This software is available to you under a choice of one of two 12 * licenses. You may choose to be licensed under the terms of the GNU 13 * General Public License (GPL) Version 2, available from the file 14 * COPYING in the main directory of this source tree, or the 15 * OpenIB.org BSD license below: 16 * 17 * Redistribution and use in source and binary forms, with or 18 * without modification, are permitted provided that the following 19 * conditions are met: 20 * 21 * - Redistributions of source code must retain the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer. 24 * 25 * - Redistributions in binary form must reproduce the above 26 * copyright notice, this list of conditions and the following 27 * disclaimer in the documentation and/or other materials 28 * provided with the distribution. 29 * 30 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 31 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 32 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 33 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 34 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 35 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 36 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 37 * SOFTWARE. 38 * 39 */ 40 #ifndef _SYS_RDS_H 41 #define _SYS_RDS_H 42 43 #ifdef __cplusplus 44 extern "C" { 45 #endif 46 47 #if !(defined(__SVR4) && defined(__sun)) 48 #include <linux/types.h> 49 #else 50 #include <sys/types.h> 51 #include <sys/socket.h> 52 #endif 53 54 /* 55 * These sparse annotated types shouldn't be in any user 56 * visible header file. We should clean this up rather 57 * than kludging around them. 58 */ 59 #if !(defined(__SVR4) && defined(__sun)) 60 #ifndef __KERNEL__ 61 #define __be16 u_int16_t 62 #define __be32 u_int32_t 63 #define __be64 u_int64_t 64 #endif 65 #else 66 #define u_int8_t uint8_t 67 #define u_int16_t uint16_t 68 #define u_int32_t uint32_t 69 #define u_int64_t uint64_t 70 #endif 71 72 #define RDS_IB_ABI_VERSION 0x301 73 74 #define AF_RDS AF_INET_OFFLOAD 75 #define PF_RDS AF_INET_OFFLOAD 76 #define SOL_RDS 272 77 78 /* 79 * setsockopt/getsockopt for SOL_RDS 80 */ 81 #define RDS_CANCEL_SENT_TO 1 82 #define RDS_GET_MR 2 83 #define RDS_FREE_MR 3 84 /* deprecated: RDS_BARRIER 4 */ 85 #define RDS_RECVERR 5 86 #define RDS_CONG_MONITOR 6 87 #define RDS_GET_MR_FOR_DEST 7 88 89 90 /* 91 * Control message types for SOL_RDS. 92 * 93 * CMSG_RDMA_ARGS (sendmsg) 94 * Request a RDMA transfer to/from the specified 95 * memory ranges. 96 * The cmsg_data is a struct rds_rdma_args. 97 * RDS_CMSG_RDMA_DEST (recvmsg, sendmsg) 98 * Kernel informs application about intended 99 * source/destination of a RDMA transfer 100 * RDS_CMSG_RDMA_MAP (sendmsg) 101 * Application asks kernel to map the given 102 * memory range into a IB MR, and send the 103 * R_Key along in an RDS extension header. 104 * The cmsg_data is a struct rds_get_mr_args, 105 * the same as for the GET_MR setsockopt. 106 * RDS_CMSG_RDMA_STATUS (recvmsg) 107 * Returns the status of a completed RDMA operation. 108 */ 109 #define RDS_CMSG_RDMA_ARGS 1 110 #define RDS_CMSG_RDMA_DEST 2 111 #define RDS_CMSG_RDMA_MAP 3 112 #define RDS_CMSG_RDMA_STATUS 4 113 #define RDS_CMSG_CONG_UPDATE 5 114 115 #define RDS_INFO_FIRST 10000 116 #define RDS_INFO_COUNTERS 10000 117 #define RDS_INFO_CONNECTIONS 10001 118 /* 10002 aka RDS_INFO_FLOWS is deprecated */ 119 #define RDS_INFO_SEND_MESSAGES 10003 120 #define RDS_INFO_RETRANS_MESSAGES 10004 121 #define RDS_INFO_RECV_MESSAGES 10005 122 #define RDS_INFO_SOCKETS 10006 123 #define RDS_INFO_TCP_SOCKETS 10007 124 #define RDS_INFO_IB_CONNECTIONS 10008 125 #define RDS_INFO_CONNECTION_STATS 10009 126 #define RDS_INFO_IWARP_CONNECTIONS 10010 127 #define RDS_INFO_LAST 10010 128 129 #if defined(__SVR4) && defined(__sun) 130 struct rds_info_arg { 131 uint64_t lenp; 132 uint64_t datap; 133 }; 134 #endif 135 136 #ifndef __lock_lint 137 #pragma pack(1) 138 struct rds_info_counter { 139 u_int8_t name[32]; 140 u_int64_t value; 141 } __attribute__((packed)); 142 #pragma pack() 143 #else 144 struct rds_info_counter { 145 u_int8_t name[32]; 146 u_int64_t value; 147 }; 148 #endif 149 150 #define RDS_INFO_CONNECTION_FLAG_SENDING 0x01 151 #define RDS_INFO_CONNECTION_FLAG_CONNECTING 0x02 152 #define RDS_INFO_CONNECTION_FLAG_CONNECTED 0x04 153 154 #define TRANSNAMSIZ 16 155 156 #ifndef __lock_lint 157 #pragma pack(1) 158 struct rds_info_connection { 159 u_int64_t next_tx_seq; 160 u_int64_t next_rx_seq; 161 u_int32_t laddr; 162 u_int32_t faddr; 163 u_int8_t transport[TRANSNAMSIZ]; /* null term ascii */ 164 u_int8_t flags; 165 } __attribute__((packed)); 166 #pragma pack() 167 #else 168 struct rds_info_connection { 169 u_int64_t next_tx_seq; 170 u_int64_t next_rx_seq; 171 u_int32_t laddr; 172 u_int32_t faddr; 173 u_int8_t transport[TRANSNAMSIZ]; /* null term ascii */ 174 u_int8_t flags; 175 }; 176 #endif 177 178 #ifndef __lock_lint 179 #pragma pack(1) 180 struct rds_info_flow { 181 u_int32_t laddr; 182 u_int32_t faddr; 183 u_int32_t bytes; 184 u_int16_t lport; 185 u_int16_t fport; 186 } __attribute__((packed)); 187 #pragma pack() 188 #else 189 struct rds_info_flow { 190 u_int32_t laddr; 191 u_int32_t faddr; 192 u_int32_t bytes; 193 u_int16_t lport; 194 u_int16_t fport; 195 }; 196 #endif 197 198 #define RDS_INFO_MESSAGE_FLAG_ACK 0x01 199 #define RDS_INFO_MESSAGE_FLAG_FAST_ACK 0x02 200 201 #ifndef __lock_lint 202 #pragma pack(1) 203 struct rds_info_message { 204 u_int64_t seq; 205 u_int32_t len; 206 u_int32_t laddr; 207 u_int32_t faddr; 208 u_int16_t lport; 209 u_int16_t fport; 210 u_int8_t flags; 211 } __attribute__((packed)); 212 #pragma pack() 213 #else 214 struct rds_info_message { 215 u_int64_t seq; 216 u_int32_t len; 217 u_int32_t laddr; 218 u_int32_t faddr; 219 u_int16_t lport; 220 u_int16_t fport; 221 u_int8_t flags; 222 }; 223 #endif 224 225 #ifndef __lock_lint 226 #pragma pack(1) 227 struct rds_info_socket { 228 u_int32_t sndbuf; 229 u_int32_t bound_addr; 230 u_int32_t connected_addr; 231 u_int16_t bound_port; 232 u_int16_t connected_port; 233 u_int32_t rcvbuf; 234 u_int64_t inum; 235 } __attribute__((packed)); 236 #pragma pack() 237 #else 238 struct rds_info_socket { 239 u_int32_t sndbuf; 240 u_int32_t bound_addr; 241 u_int32_t connected_addr; 242 u_int16_t bound_port; 243 u_int16_t connected_port; 244 u_int32_t rcvbuf; 245 u_int64_t inum; 246 }; 247 #endif 248 249 #ifndef __lock_lint 250 #pragma pack(1) 251 struct rds_info_tcp_socket { 252 u_int32_t local_addr; 253 u_int16_t local_port; 254 u_int32_t peer_addr; 255 u_int16_t peer_port; 256 u_int64_t hdr_rem; 257 u_int64_t data_rem; 258 u_int32_t last_sent_nxt; 259 u_int32_t last_expected_una; 260 u_int32_t last_seen_una; 261 } __attribute__((packed)); 262 #pragma pack() 263 #else 264 struct rds_info_tcp_socket { 265 u_int32_t local_addr; 266 u_int16_t local_port; 267 u_int32_t peer_addr; 268 u_int16_t peer_port; 269 u_int64_t hdr_rem; 270 u_int64_t data_rem; 271 u_int32_t last_sent_nxt; 272 u_int32_t last_expected_una; 273 u_int32_t last_seen_una; 274 } __attribute__((packed)); 275 }; 276 #endif 277 278 #define RDS_IB_GID_LEN 16 279 struct rds_info_rdma_connection { 280 u_int32_t src_addr; 281 u_int32_t dst_addr; 282 uint8_t src_gid[RDS_IB_GID_LEN]; 283 uint8_t dst_gid[RDS_IB_GID_LEN]; 284 285 uint32_t max_send_wr; 286 uint32_t max_recv_wr; 287 uint32_t max_send_sge; 288 uint32_t rdma_mr_max; 289 uint32_t rdma_mr_size; 290 }; 291 292 /* 293 * Congestion monitoring. 294 * Congestion control in RDS happens at the host connection 295 * level by exchanging a bitmap marking congested ports. 296 * By default, a process sleeping in poll() is always woken 297 * up when the congestion map is updated. 298 * With explicit monitoring, an application can have more 299 * fine-grained control. 300 * The application installs a 64bit mask value in the socket, 301 * where each bit corresponds to a group of ports. 302 * When a congestion update arrives, RDS checks the set of 303 * ports that are now uncongested against the list bit mask 304 * installed in the socket, and if they overlap, we queue a 305 * cong_notification on the socket. 306 * 307 * To install the congestion monitor bitmask, use RDS_CONG_MONITOR 308 * with the 64bit mask. 309 * Congestion updates are received via RDS_CMSG_CONG_UPDATE 310 * control messages. 311 * 312 * The correspondence between bits and ports is 313 * 1 << (portnum % 64) 314 */ 315 #define RDS_CONG_MONITOR_SIZE 64 316 #define RDS_CONG_MONITOR_BIT(port) \ 317 (((unsigned int) port) % RDS_CONG_MONITOR_SIZE) 318 #define RDS_CONG_MONITOR_MASK(port) (1ULL << RDS_CONG_MONITOR_BIT(port)) 319 320 /* 321 * RDMA related types 322 */ 323 324 /* 325 * This encapsulates a remote memory location. 326 * In the current implementation, it contains the R_Key 327 * of the remote memory region, and the offset into it 328 * (so that the application does not have to worry about 329 * alignment). 330 */ 331 typedef u_int64_t rds_rdma_cookie_t; 332 333 struct rds_iovec { 334 u_int64_t addr; 335 u_int64_t bytes; 336 }; 337 338 struct rds_get_mr_args { 339 struct rds_iovec vec; 340 u_int64_t cookie_addr; 341 uint64_t flags; 342 }; 343 344 struct rds_get_mr_for_dest_args { 345 struct sockaddr_storage dest_addr; 346 struct rds_iovec vec; 347 u_int64_t cookie_addr; 348 uint64_t flags; 349 }; 350 351 352 struct rds_free_mr_args { 353 rds_rdma_cookie_t cookie; 354 u_int64_t flags; 355 }; 356 357 struct rds_rdma_args { 358 rds_rdma_cookie_t cookie; 359 struct rds_iovec remote_vec; 360 u_int64_t local_vec_addr; 361 u_int64_t nr_local; 362 u_int64_t flags; 363 u_int64_t user_token; 364 }; 365 366 struct rds_rdma_notify { 367 u_int64_t user_token; 368 int32_t status; 369 }; 370 371 #define RDS_RDMA_SUCCESS 0 372 #define RDS_RDMA_REMOTE_ERROR 1 373 #define RDS_RDMA_CANCELED 2 374 #define RDS_RDMA_DROPPED 3 375 #define RDS_RDMA_OTHER_ERROR 4 376 377 /* 378 * Common set of flags for all RDMA related structs 379 */ 380 #define RDS_RDMA_READWRITE 0x0001 381 #define RDS_RDMA_FENCE 0x0002 /* use FENCE for immediate send */ 382 #define RDS_RDMA_INVALIDATE 0x0004 /* invalidate R_Key after freeing MR */ 383 #define RDS_RDMA_USE_ONCE 0x0008 /* free MR after use */ 384 #define RDS_RDMA_DONTWAIT 0x0010 /* Don't wait in SET_BARRIER */ 385 #define RDS_RDMA_NOTIFY_ME 0x0020 /* Notify when operation completes */ 386 387 #ifdef __cplusplus 388 } 389 #endif 390 391 #endif /* _SYS_RDS_H */ 392