1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #ifndef _RDSV3_RDSV3_H 26 #define _RDSV3_RDSV3_H 27 28 /* 29 * The name of this file is rds.h in ofed. 30 */ 31 32 #ifdef __cplusplus 33 extern "C" { 34 #endif 35 36 #include <sys/sunndi.h> 37 #include <netinet/in.h> 38 #include <sys/synch.h> 39 #include <sys/stropts.h> 40 #include <sys/socket.h> 41 #include <sys/socketvar.h> 42 #include <inet/ip.h> 43 #include <sys/avl.h> 44 #include <sys/param.h> 45 #include <sys/rds.h> 46 47 #include <sys/ib/ibtl/ibti.h> 48 #include <sys/ib/clients/of/rdma/ib_verbs.h> 49 #include <sys/ib/clients/of/rdma/ib_addr.h> 50 #include <sys/ib/clients/of/rdma/rdma_cm.h> 51 #include <sys/ib/clients/rdsv3/rdsv3_impl.h> 52 #include <sys/ib/clients/rdsv3/info.h> 53 54 #define NIPQUAD(addr) \ 55 (unsigned char)((ntohl(addr) >> 24) & 0xFF), \ 56 (unsigned char)((ntohl(addr) >> 16) & 0xFF), \ 57 (unsigned char)((ntohl(addr) >> 8) & 0xFF), \ 58 (unsigned char)(ntohl(addr) & 0xFF) 59 60 /* 61 * RDS Network protocol version 62 */ 63 #define RDS_PROTOCOL_3_0 0x0300 64 #define RDS_PROTOCOL_3_1 0x0301 65 #define RDS_PROTOCOL_VERSION RDS_PROTOCOL_3_1 66 #define RDS_PROTOCOL_MAJOR(v) ((v) >> 8) 67 #define RDS_PROTOCOL_MINOR(v) ((v) & 255) 68 #define RDS_PROTOCOL(maj, min) (((maj) << 8) | min) 69 70 /* 71 * XXX randomly chosen, but at least seems to be unused: 72 * # 18464-18768 Unassigned 73 * We should do better. We want a reserved port to discourage unpriv'ed 74 * userspace from listening. 75 * 76 * port 18633 was the version that had ack frames on the wire. 77 */ 78 #define RDSV3_PORT 18634 79 80 #include <sys/ib/clients/rdsv3/info.h> 81 82 /* 83 * RDS trace facilities 84 */ 85 enum { 86 RDSV3_BIND = 0, 87 RDSV3_CONG, 88 RDSV3_CONNECTION, 89 RDSV3_RDMA, 90 RDSV3_PAGE, 91 RDSV3_SEND, 92 RDSV3_RECV, 93 RDSV3_THREADS, 94 RDSV3_INFO, 95 RDSV3_MESSAGE, 96 RDSV3_IB, 97 RDSV3_IB_CM, 98 RDSV3_IB_RDMA, 99 RDSV3_IB_RING, 100 RDSV3_IB_RECV, 101 RDSV3_IB_SEND, 102 RDSV3_TCP, 103 RDSV3_TCP_CONNECT, 104 RDSV3_TCP_LISTEN, 105 RDSV3_TCP_RECV, 106 RDSV3_TCP_SEND 107 }; 108 109 enum { 110 RDSV3_ALWAYS = 0, 111 RDSV3_MINIMAL, 112 RDSV3_LOW, 113 RDSV3_MEDIUM, 114 RDSV3_HIGH, 115 RDSV3_VERBOSE 116 }; 117 118 /* 119 * This is the sad making. Some kernels have a bug in the per_cpu() api which 120 * makes DEFINE_PER_CPU trigger an oops on insmod because the per-cpu section 121 * in the module is not cacheline-aligned. As much as we'd like to tell users 122 * with older kernels to stuff it, that's not reasonable. We'll roll our own 123 * until this doesn't have to build against older kernels. 124 */ 125 #define RDSV3_DEFINE_PER_CPU(type, var) type var[NR_CPUS] 126 #define RDSV3_DECLARE_PER_CPU(type, var) extern type var[NR_CPUS] 127 #define rdsv3_per_cpu(var, cpu) var[cpu] 128 129 static inline ulong_t 130 ceil(ulong_t x, ulong_t y) 131 { 132 return ((x + y - 1) / y); 133 } 134 135 #define RDSV3_FRAG_SHIFT 12 136 #define RDSV3_FRAG_SIZE ((unsigned int)(1 << RDSV3_FRAG_SHIFT)) 137 138 #define RDSV3_CONG_MAP_BYTES (65536 / 8) 139 #define RDSV3_CONG_MAP_LONGS (RDSV3_CONG_MAP_BYTES / sizeof (unsigned long)) 140 #define RDSV3_CONG_MAP_PAGES (RDSV3_CONG_MAP_BYTES / PAGE_SIZE) 141 #define RDSV3_CONG_MAP_PAGE_BITS (PAGE_SIZE * 8) 142 143 struct rdsv3_cong_map { 144 struct avl_node m_rb_node; 145 uint32_be_t m_addr; 146 rdsv3_wait_queue_t m_waitq; 147 struct list m_conn_list; 148 unsigned long m_page_addrs[RDSV3_CONG_MAP_PAGES]; 149 }; 150 151 152 /* 153 * This is how we will track the connection state: 154 * A connection is always in one of the following 155 * states. Updates to the state are atomic and imply 156 * a memory barrier. 157 */ 158 enum { 159 RDSV3_CONN_DOWN = 0, 160 RDSV3_CONN_CONNECTING, 161 RDSV3_CONN_DISCONNECTING, 162 RDSV3_CONN_UP, 163 RDSV3_CONN_ERROR, 164 }; 165 166 /* Bits for c_flags */ 167 #define RDSV3_LL_SEND_FULL 0 168 #define RDSV3_RECONNECT_PENDING 1 169 170 struct rdsv3_connection { 171 struct avl_node c_hash_node; 172 uint32_be_t c_laddr; 173 uint32_be_t c_faddr; 174 unsigned int c_loopback:1; 175 struct rdsv3_connection *c_passive; 176 177 struct rdsv3_cong_map *c_lcong; 178 struct rdsv3_cong_map *c_fcong; 179 180 struct mutex c_send_lock; /* protect send ring */ 181 struct rdsv3_message *c_xmit_rm; 182 unsigned long c_xmit_sg; 183 unsigned int c_xmit_hdr_off; 184 unsigned int c_xmit_data_off; 185 unsigned int c_xmit_rdma_sent; 186 187 kmutex_t c_lock; /* protect msg queues */ 188 uint64_t c_next_tx_seq; 189 struct list c_send_queue; 190 struct list c_retrans; 191 192 uint64_t c_next_rx_seq; 193 194 struct rdsv3_transport *c_trans; 195 void *c_transport_data; 196 197 atomic_t c_state; 198 unsigned long c_flags; 199 unsigned long c_reconnect_jiffies; 200 struct rdsv3_delayed_work_s c_send_w; 201 struct rdsv3_delayed_work_s c_recv_w; 202 struct rdsv3_delayed_work_s c_conn_w; 203 struct rdsv3_work_s c_down_w; 204 struct mutex c_cm_lock; /* protect conn state & cm */ 205 206 struct list_node c_map_item; 207 unsigned long c_map_queued; 208 unsigned long c_map_offset; 209 unsigned long c_map_bytes; 210 211 unsigned int c_unacked_packets; 212 unsigned int c_unacked_bytes; 213 214 /* Protocol version */ 215 unsigned int c_version; 216 }; 217 218 #define RDSV3_FLAG_CONG_BITMAP 0x01 219 #define RDSV3_FLAG_ACK_REQUIRED 0x02 220 #define RDSV3_FLAG_RETRANSMITTED 0x04 221 #define RDSV3_MAX_ADV_CREDIT 255 222 223 /* 224 * Maximum space available for extension headers. 225 */ 226 #define RDSV3_HEADER_EXT_SPACE 16 227 228 struct rdsv3_header { 229 uint64_be_t h_sequence; 230 uint64_be_t h_ack; 231 uint32_be_t h_len; 232 uint16_be_t h_sport; 233 uint16_be_t h_dport; 234 uint8_t h_flags; 235 uint8_t h_credit; 236 uint8_t h_padding[4]; 237 uint16_be_t h_csum; 238 239 uint8_t h_exthdr[RDSV3_HEADER_EXT_SPACE]; 240 }; 241 242 /* Reserved - indicates end of extensions */ 243 #define RDSV3_EXTHDR_NONE 0 244 245 /* 246 * This extension header is included in the very 247 * first message that is sent on a new connection, 248 * and identifies the protocol level. This will help 249 * rolling updates if a future change requires breaking 250 * the protocol. 251 */ 252 #define RDSV3_EXTHDR_VERSION 1 253 struct rdsv3_ext_header_version { 254 uint32_be_t h_version; 255 }; 256 257 /* 258 * This extension header is included in the RDS message 259 * chasing an RDMA operation. 260 */ 261 #define RDSV3_EXTHDR_RDMA 2 262 struct rdsv3_ext_header_rdma { 263 uint32_be_t h_rdma_rkey; 264 }; 265 266 /* 267 * This extension header tells the peer about the 268 * destination <R_Key,offset> of the requested RDMA 269 * operation. 270 */ 271 #define RDSV3_EXTHDR_RDMA_DEST 3 272 struct rdsv3_ext_header_rdma_dest { 273 uint32_be_t h_rdma_rkey; 274 uint32_be_t h_rdma_offset; 275 }; 276 277 #define __RDSV3_EXTHDR_MAX 16 /* for now */ 278 279 struct rdsv3_incoming { 280 atomic_t i_refcount; 281 struct list_node i_item; 282 struct rdsv3_connection *i_conn; 283 struct rdsv3_header i_hdr; 284 unsigned long i_rx_jiffies; 285 uint32_be_t i_saddr; 286 287 rdsv3_rdma_cookie_t i_rdma_cookie; 288 }; 289 290 /* 291 * m_sock_item and m_conn_item are on lists that are serialized under 292 * conn->c_lock. m_sock_item has additional meaning in that once it is empty 293 * the message will not be put back on the retransmit list after being sent. 294 * messages that are canceled while being sent rely on this. 295 * 296 * m_inc is used by loopback so that it can pass an incoming message straight 297 * back up into the rx path. It embeds a wire header which is also used by 298 * the send path, which is kind of awkward. 299 * 300 * m_sock_item indicates the message's presence on a socket's send or receive 301 * queue. m_rs will point to that socket. 302 * 303 * m_daddr is used by cancellation to prune messages to a given destination. 304 * 305 * The RDS_MSG_ON_SOCK and RDS_MSG_ON_CONN flags are used to avoid lock 306 * nesting. As paths iterate over messages on a sock, or conn, they must 307 * also lock the conn, or sock, to remove the message from those lists too. 308 * Testing the flag to determine if the message is still on the lists lets 309 * us avoid testing the list_head directly. That means each path can use 310 * the message's list_head to keep it on a local list while juggling locks 311 * without confusing the other path. 312 * 313 * m_ack_seq is an optional field set by transports who need a different 314 * sequence number range to invalidate. They can use this in a callback 315 * that they pass to rdsv3_send_drop_acked() to see if each message has been 316 * acked. The HAS_ACK_SEQ flag can be used to detect messages which haven't 317 * had ack_seq set yet. 318 */ 319 #define RDSV3_MSG_ON_SOCK 1 320 #define RDSV3_MSG_ON_CONN 2 321 #define RDSV3_MSG_HAS_ACK_SEQ 3 322 #define RDSV3_MSG_ACK_REQUIRED 4 323 #define RDSV3_MSG_RETRANSMITTED 5 324 #define RDSV3_MSG_MAPPED 6 325 #define RDSV3_MSG_PAGEVEC 7 326 327 struct rdsv3_message { 328 atomic_t m_refcount; 329 struct list_node m_sock_item; 330 struct list_node m_conn_item; 331 struct rdsv3_incoming m_inc; 332 uint64_t m_ack_seq; 333 uint32_be_t m_daddr; 334 unsigned long m_flags; 335 336 /* 337 * Never access m_rs without holding m_rs_lock. 338 * Lock nesting is 339 * rm->m_rs_lock 340 * -> rs->rs_lock 341 */ 342 kmutex_t m_rs_lock; 343 struct rdsv3_sock *m_rs; 344 struct rdsv3_rdma_op *m_rdma_op; 345 rdsv3_rdma_cookie_t m_rdma_cookie; 346 struct rdsv3_mr *m_rdma_mr; 347 unsigned int m_nents; 348 unsigned int m_count; 349 struct rdsv3_scatterlist m_sg[1]; 350 }; 351 352 /* 353 * The RDS notifier is used (optionally) to tell the application about 354 * completed RDMA operations. Rather than keeping the whole rds message 355 * around on the queue, we allocate a small notifier that is put on the 356 * socket's notifier_list. Notifications are delivered to the application 357 * through control messages. 358 */ 359 struct rdsv3_notifier { 360 list_node_t n_list; 361 uint64_t n_user_token; 362 int n_status; 363 }; 364 365 /* 366 * struct rdsv3_transport - transport specific behavioural hooks 367 * 368 * @xmit: .xmit is called by rdsv3_send_xmit() to tell the transport to send 369 * part of a message. The caller serializes on the send_sem so this 370 * doesn't need to be reentrant for a given conn. The header must be 371 * sent before the data payload. .xmit must be prepared to send a 372 * message with no data payload. .xmit should return the number of 373 * bytes that were sent down the connection, including header bytes. 374 * Returning 0 tells the caller that it doesn't need to perform any 375 * additional work now. This is usually the case when the transport has 376 * filled the sending queue for its connection and will handle 377 * triggering the rds thread to continue the send when space becomes 378 * available. Returning -EAGAIN tells the caller to retry the send 379 * immediately. Returning -ENOMEM tells the caller to retry the send at 380 * some point in the future. 381 * 382 * @conn_shutdown: conn_shutdown stops traffic on the given connection. Once 383 * it returns the connection can not call rdsv3_recv_incoming(). 384 * This will only be called once after conn_connect returns 385 * non-zero success and will The caller serializes this with 386 * the send and connecting paths (xmit_* and conn_*). The 387 * transport is responsible for other serialization, including 388 * rdsv3_recv_incoming(). This is called in process context but 389 * should try hard not to block. 390 * 391 * @xmit_cong_map: This asks the transport to send the local bitmap down the 392 * given connection. XXX get a better story about the bitmap 393 * flag and header. 394 */ 395 396 struct rdsv3_transport { 397 struct list_node t_item; 398 char *t_name; 399 unsigned int t_prefer_loopback:1; 400 401 int (*laddr_check)(uint32_be_t addr); 402 int (*conn_alloc)(struct rdsv3_connection *conn, int gfp); 403 void (*conn_free)(void *data); 404 int (*conn_connect)(struct rdsv3_connection *conn); 405 void (*conn_shutdown)(struct rdsv3_connection *conn); 406 void (*xmit_prepare)(struct rdsv3_connection *conn); 407 void (*xmit_complete)(struct rdsv3_connection *conn); 408 int (*xmit)(struct rdsv3_connection *conn, struct rdsv3_message *rm, 409 unsigned int hdr_off, unsigned int sg, unsigned int off); 410 int (*xmit_cong_map)(struct rdsv3_connection *conn, 411 struct rdsv3_cong_map *map, unsigned long offset); 412 int (*xmit_rdma)(struct rdsv3_connection *conn, 413 struct rdsv3_rdma_op *op); 414 int (*recv)(struct rdsv3_connection *conn); 415 int (*inc_copy_to_user)(struct rdsv3_incoming *inc, uio_t *uio, 416 size_t size); 417 void (*inc_purge)(struct rdsv3_incoming *inc); 418 void (*inc_free)(struct rdsv3_incoming *inc); 419 420 int (*cm_handle_connect)(struct rdma_cm_id *cm_id, 421 struct rdma_cm_event *event); 422 int (*cm_initiate_connect)(struct rdma_cm_id *cm_id); 423 void (*cm_connect_complete)(struct rdsv3_connection *conn, 424 struct rdma_cm_event *event); 425 426 unsigned int (*stats_info_copy)(struct rdsv3_info_iterator *iter, 427 unsigned int avail); 428 void (*exit)(void); 429 void *(*get_mr)(struct rdsv3_iovec *sg, unsigned long nr_sg, 430 struct rdsv3_sock *rs, uint32_t *key_ret); 431 void (*sync_mr)(void *trans_private, int direction); 432 void (*free_mr)(void *trans_private, int invalidate); 433 void (*flush_mrs)(void); 434 }; 435 436 struct rdsv3_sock { 437 struct rsock *rs_sk; 438 439 uint64_t rs_user_addr; 440 uint64_t rs_user_bytes; 441 442 /* 443 * bound_addr used for both incoming and outgoing, no INADDR_ANY 444 * support. 445 */ 446 struct avl_node rs_bound_node; 447 uint32_be_t rs_bound_addr; 448 uint32_be_t rs_conn_addr; 449 uint16_be_t rs_bound_port; 450 uint16_be_t rs_conn_port; 451 452 /* 453 * This is only used to communicate the transport between bind and 454 * initiating connections. All other trans use is referenced through 455 * the connection. 456 */ 457 struct rdsv3_transport *rs_transport; 458 459 /* 460 * rdsv3_sendmsg caches the conn it used the last time around. 461 * This helps avoid costly lookups. 462 */ 463 struct rdsv3_connection *rs_conn; 464 kmutex_t rs_conn_lock; 465 466 /* flag indicating we were congested or not */ 467 int rs_congested; 468 469 /* rs_lock protects all these adjacent members before the newline */ 470 kmutex_t rs_lock; 471 struct list rs_send_queue; 472 uint32_t rs_snd_bytes; 473 int rs_rcv_bytes; 474 /* currently used for failed RDMAs */ 475 struct list rs_notify_queue; 476 477 /* 478 * Congestion wake_up. If rs_cong_monitor is set, we use cong_mask 479 * to decide whether the application should be woken up. 480 * If not set, we use rs_cong_track to find out whether a cong map 481 * update arrived. 482 */ 483 uint64_t rs_cong_mask; 484 uint64_t rs_cong_notify; 485 struct list_node rs_cong_list; 486 unsigned long rs_cong_track; 487 488 /* 489 * rs_recv_lock protects the receive queue, and is 490 * used to serialize with rdsv3_release. 491 */ 492 krwlock_t rs_recv_lock; 493 struct list rs_recv_queue; 494 495 /* just for stats reporting */ 496 struct list_node rs_item; 497 498 /* these have their own lock */ 499 kmutex_t rs_rdma_lock; 500 struct avl_tree rs_rdma_keys; 501 502 /* Socket options - in case there will be more */ 503 unsigned char rs_recverr, 504 rs_cong_monitor; 505 506 cred_t *rs_cred; 507 zoneid_t rs_zoneid; 508 }; 509 510 static inline struct rdsv3_sock * 511 rdsv3_sk_to_rs(const struct rsock *sk) 512 { 513 return ((struct rdsv3_sock *)sk->sk_protinfo); 514 } 515 516 static inline struct rsock * 517 rdsv3_rs_to_sk(const struct rdsv3_sock *rs) 518 { 519 return ((struct rsock *)rs->rs_sk); 520 } 521 522 /* 523 * The stack assigns sk_sndbuf and sk_rcvbuf to twice the specified value 524 * to account for overhead. We don't account for overhead, we just apply 525 * the number of payload bytes to the specified value. 526 */ 527 static inline int 528 rdsv3_sk_sndbuf(struct rdsv3_sock *rs) 529 { 530 /* XXX */ 531 return (rdsv3_rs_to_sk(rs)->sk_sndbuf); 532 } 533 534 static inline int 535 rdsv3_sk_rcvbuf(struct rdsv3_sock *rs) 536 { 537 /* XXX */ 538 return (rdsv3_rs_to_sk(rs)->sk_rcvbuf); 539 } 540 541 struct rdsv3_statistics { 542 uint64_t s_conn_reset; 543 uint64_t s_recv_drop_bad_checksum; 544 uint64_t s_recv_drop_old_seq; 545 uint64_t s_recv_drop_no_sock; 546 uint64_t s_recv_drop_dead_sock; 547 uint64_t s_recv_deliver_raced; 548 uint64_t s_recv_delivered; 549 uint64_t s_recv_queued; 550 uint64_t s_recv_immediate_retry; 551 uint64_t s_recv_delayed_retry; 552 uint64_t s_recv_ack_required; 553 uint64_t s_recv_rdma_bytes; 554 uint64_t s_recv_ping; 555 uint64_t s_send_queue_empty; 556 uint64_t s_send_queue_full; 557 uint64_t s_send_sem_contention; 558 uint64_t s_send_sem_queue_raced; 559 uint64_t s_send_immediate_retry; 560 uint64_t s_send_delayed_retry; 561 uint64_t s_send_drop_acked; 562 uint64_t s_send_ack_required; 563 uint64_t s_send_queued; 564 uint64_t s_send_rdma; 565 uint64_t s_send_rdma_bytes; 566 uint64_t s_send_pong; 567 uint64_t s_page_remainder_hit; 568 uint64_t s_page_remainder_miss; 569 uint64_t s_copy_to_user; 570 uint64_t s_copy_from_user; 571 uint64_t s_cong_update_queued; 572 uint64_t s_cong_update_received; 573 uint64_t s_cong_send_error; 574 uint64_t s_cong_send_blocked; 575 }; 576 577 /* af_rds.c */ 578 void rdsv3_sock_addref(struct rdsv3_sock *rs); 579 void rdsv3_sock_put(struct rdsv3_sock *rs); 580 void rdsv3_wake_sk_sleep(struct rdsv3_sock *rs); 581 void __rdsv3_wake_sk_sleep(struct rsock *sk); 582 583 extern rdsv3_wait_queue_t rdsv3_poll_waitq; 584 585 /* bind.c */ 586 int rdsv3_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 587 socklen_t len, cred_t *cr); 588 void rdsv3_remove_bound(struct rdsv3_sock *rs); 589 struct rdsv3_sock *rdsv3_find_bound(uint32_be_t addr, uint16_be_t port); 590 591 /* conn.c */ 592 int rdsv3_conn_init(void); 593 void rdsv3_conn_exit(void); 594 struct rdsv3_connection *rdsv3_conn_create(uint32_be_t laddr, uint32_be_t faddr, 595 struct rdsv3_transport *trans, int gfp); 596 struct rdsv3_connection *rdsv3_conn_create_outgoing(uint32_be_t laddr, 597 uint32_be_t faddr, 598 struct rdsv3_transport *trans, int gfp); 599 void rdsv3_conn_destroy(struct rdsv3_connection *conn); 600 void rdsv3_conn_reset(struct rdsv3_connection *conn); 601 void rdsv3_conn_drop(struct rdsv3_connection *conn); 602 void rdsv3_for_each_conn_info(struct rsock *sock, unsigned int len, 603 struct rdsv3_info_iterator *iter, 604 struct rdsv3_info_lengths *lens, 605 int (*visitor)(struct rdsv3_connection *, void *), 606 size_t item_len); 607 608 static inline int 609 rdsv3_conn_transition(struct rdsv3_connection *conn, int old, int new) 610 { 611 return (atomic_cmpxchg(&conn->c_state, old, new) == old); 612 } 613 614 static inline int 615 rdsv3_conn_state(struct rdsv3_connection *conn) 616 { 617 return (atomic_get(&conn->c_state)); 618 } 619 620 static inline int 621 rdsv3_conn_up(struct rdsv3_connection *conn) 622 { 623 return (atomic_get(&conn->c_state) == RDSV3_CONN_UP); 624 } 625 626 static inline int 627 rdsv3_conn_connecting(struct rdsv3_connection *conn) 628 { 629 return (atomic_get(&conn->c_state) == RDSV3_CONN_CONNECTING); 630 } 631 632 /* recv.c */ 633 void rdsv3_inc_init(struct rdsv3_incoming *inc, struct rdsv3_connection *conn, 634 uint32_be_t saddr); 635 void rdsv3_inc_addref(struct rdsv3_incoming *inc); 636 void rdsv3_inc_put(struct rdsv3_incoming *inc); 637 void rdsv3_recv_incoming(struct rdsv3_connection *conn, uint32_be_t saddr, 638 uint32_be_t daddr, 639 struct rdsv3_incoming *inc, int gfp); 640 int rdsv3_recvmsg(struct rdsv3_sock *rs, uio_t *uio, 641 struct msghdr *msg, size_t size, int msg_flags); 642 void rdsv3_clear_recv_queue(struct rdsv3_sock *rs); 643 int rdsv3_notify_queue_get(struct rdsv3_sock *rs, struct msghdr *msg); 644 void rdsv3_inc_info_copy(struct rdsv3_incoming *inc, 645 struct rdsv3_info_iterator *iter, 646 uint32_be_t saddr, uint32_be_t daddr, int flip); 647 648 /* page.c */ 649 int rdsv3_page_remainder_alloc(struct rdsv3_scatterlist *scat, 650 unsigned long bytes, int gfp); 651 652 /* send.c */ 653 int rdsv3_sendmsg(struct rdsv3_sock *rs, uio_t *uio, struct nmsghdr *msg, 654 size_t payload_len); 655 void rdsv3_send_reset(struct rdsv3_connection *conn); 656 int rdsv3_send_xmit(struct rdsv3_connection *conn); 657 struct sockaddr_in; 658 void rdsv3_send_drop_to(struct rdsv3_sock *rs, struct sockaddr_in *dest); 659 typedef int (*is_acked_func)(struct rdsv3_message *rm, uint64_t ack); 660 void rdsv3_send_drop_acked(struct rdsv3_connection *conn, uint64_t ack, 661 is_acked_func is_acked); 662 int rdsv3_send_acked_before(struct rdsv3_connection *conn, uint64_t seq); 663 void rdsv3_send_remove_from_sock(struct list *messages, int status); 664 int rdsv3_send_pong(struct rdsv3_connection *conn, uint16_be_t dport); 665 struct rdsv3_message *rdsv3_send_get_message(struct rdsv3_connection *, 666 struct rdsv3_rdma_op *); 667 668 /* rdma.c */ 669 void rdsv3_rdma_unuse(struct rdsv3_sock *rs, uint32_t r_key, int force); 670 671 /* cong.c */ 672 void rdsv3_cong_init(void); 673 int rdsv3_cong_get_maps(struct rdsv3_connection *conn); 674 void rdsv3_cong_add_conn(struct rdsv3_connection *conn); 675 void rdsv3_cong_remove_conn(struct rdsv3_connection *conn); 676 void rdsv3_cong_set_bit(struct rdsv3_cong_map *map, uint16_be_t port); 677 void rdsv3_cong_clear_bit(struct rdsv3_cong_map *map, uint16_be_t port); 678 int rdsv3_cong_wait(struct rdsv3_cong_map *map, uint16_be_t port, int nonblock, 679 struct rdsv3_sock *rs); 680 void rdsv3_cong_queue_updates(struct rdsv3_cong_map *map); 681 void rdsv3_cong_map_updated(struct rdsv3_cong_map *map, uint64_t); 682 int rdsv3_cong_updated_since(unsigned long *recent); 683 void rdsv3_cong_add_socket(struct rdsv3_sock *); 684 void rdsv3_cong_remove_socket(struct rdsv3_sock *); 685 void rdsv3_cong_exit(void); 686 struct rdsv3_message *rdsv3_cong_update_alloc(struct rdsv3_connection *conn); 687 688 /* stats.c */ 689 RDSV3_DECLARE_PER_CPU(struct rdsv3_statistics, rdsv3_stats); 690 #define rdsv3_stats_inc_which(which, member) do { \ 691 rdsv3_per_cpu(which, get_cpu()).member++; \ 692 put_cpu(); \ 693 } while (0) 694 #define rdsv3_stats_inc(member) rdsv3_stats_inc_which(rdsv3_stats, member) 695 #define rdsv3_stats_add_which(which, member, count) do { \ 696 rdsv3_per_cpu(which, get_cpu()).member += count; \ 697 put_cpu(); \ 698 } while (0) 699 #define rdsv3_stats_add(member, count) \ 700 rdsv3_stats_add_which(rdsv3_stats, member, count) 701 int rdsv3_stats_init(void); 702 void rdsv3_stats_exit(void); 703 void rdsv3_stats_info_copy(struct rdsv3_info_iterator *iter, 704 uint64_t *values, char **names, size_t nr); 705 706 707 /* sysctl.c */ 708 int rdsv3_sysctl_init(void); 709 void rdsv3_sysctl_exit(void); 710 extern unsigned long rdsv3_sysctl_sndbuf_min; 711 extern unsigned long rdsv3_sysctl_sndbuf_default; 712 extern unsigned long rdsv3_sysctl_sndbuf_max; 713 extern unsigned long rdsv3_sysctl_reconnect_min_jiffies; 714 extern unsigned long rdsv3_sysctl_reconnect_max_jiffies; 715 extern unsigned int rdsv3_sysctl_max_unacked_packets; 716 extern unsigned int rdsv3_sysctl_max_unacked_bytes; 717 extern unsigned int rdsv3_sysctl_ping_enable; 718 extern unsigned long rdsv3_sysctl_trace_flags; 719 extern unsigned int rdsv3_sysctl_trace_level; 720 721 /* threads.c */ 722 int rdsv3_threads_init(); 723 void rdsv3_threads_exit(void); 724 extern struct rdsv3_workqueue_struct_s *rdsv3_wq; 725 void rdsv3_connect_worker(struct rdsv3_work_s *); 726 void rdsv3_shutdown_worker(struct rdsv3_work_s *); 727 void rdsv3_send_worker(struct rdsv3_work_s *); 728 void rdsv3_recv_worker(struct rdsv3_work_s *); 729 void rdsv3_connect_complete(struct rdsv3_connection *conn); 730 731 /* transport.c */ 732 int rdsv3_trans_register(struct rdsv3_transport *trans); 733 void rdsv3_trans_unregister(struct rdsv3_transport *trans); 734 struct rdsv3_transport *rdsv3_trans_get_preferred(uint32_be_t addr); 735 unsigned int rdsv3_trans_stats_info_copy(struct rdsv3_info_iterator *iter, 736 unsigned int avail); 737 void rdsv3_trans_exit(void); 738 739 /* message.c */ 740 struct rdsv3_message *rdsv3_message_alloc(unsigned int nents, int gfp); 741 struct rdsv3_message *rdsv3_message_copy_from_user(struct uio *uiop, 742 size_t total_len); 743 struct rdsv3_message *rdsv3_message_map_pages(unsigned long *page_addrs, 744 unsigned int total_len); 745 void rdsv3_message_populate_header(struct rdsv3_header *hdr, uint16_be_t sport, 746 uint16_be_t dport, uint64_t seq); 747 int rdsv3_message_add_extension(struct rdsv3_header *hdr, 748 unsigned int type, const void *data, unsigned int len); 749 int rdsv3_message_next_extension(struct rdsv3_header *hdr, 750 unsigned int *pos, void *buf, unsigned int *buflen); 751 int rdsv3_message_add_version_extension(struct rdsv3_header *hdr, 752 unsigned int version); 753 int rdsv3_message_get_version_extension(struct rdsv3_header *hdr, 754 unsigned int *version); 755 int rdsv3_message_add_rdma_dest_extension(struct rdsv3_header *hdr, 756 uint32_t r_key, uint32_t offset); 757 int rdsv3_message_inc_copy_to_user(struct rdsv3_incoming *inc, 758 uio_t *uio, size_t size); 759 void rdsv3_message_inc_purge(struct rdsv3_incoming *inc); 760 void rdsv3_message_inc_free(struct rdsv3_incoming *inc); 761 void rdsv3_message_addref(struct rdsv3_message *rm); 762 void rdsv3_message_put(struct rdsv3_message *rm); 763 void rdsv3_message_wait(struct rdsv3_message *rm); 764 void rdsv3_message_unmapped(struct rdsv3_message *rm); 765 766 static inline void 767 rdsv3_message_make_checksum(struct rdsv3_header *hdr) 768 { 769 hdr->h_csum = 0; 770 hdr->h_csum = 771 rdsv3_ip_fast_csum((void *)hdr, sizeof (*hdr) >> 2); 772 } 773 774 static inline int 775 rdsv3_message_verify_checksum(const struct rdsv3_header *hdr) 776 { 777 return (!hdr->h_csum || 778 rdsv3_ip_fast_csum((void *)hdr, sizeof (*hdr) >> 2) == 0); 779 } 780 781 /* rdsv3_sc.c */ 782 extern boolean_t rdsv3_if_lookup_by_name(char *if_name); 783 extern int rdsv3_sc_path_lookup(ipaddr_t *localip, ipaddr_t *remip); 784 extern ipaddr_t rdsv3_scaddr_to_ibaddr(ipaddr_t addr); 785 786 #ifdef __cplusplus 787 } 788 #endif 789 790 #endif /* _RDSV3_RDSV3_H */ 791