1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #ifndef _RDSV3_RDSV3_H
26 #define	_RDSV3_RDSV3_H
27 
28 /*
29  * The name of this file is rds.h in ofed.
30  */
31 
32 #ifdef __cplusplus
33 extern "C" {
34 #endif
35 
36 #include <sys/sunndi.h>
37 #include <netinet/in.h>
38 #include <sys/synch.h>
39 #include <sys/stropts.h>
40 #include <sys/socket.h>
41 #include <sys/socketvar.h>
42 #include <inet/ip.h>
43 #include <sys/avl.h>
44 #include <sys/param.h>
45 #include <sys/rds.h>
46 
47 #include <sys/ib/ibtl/ibti.h>
48 #include <sys/ib/clients/of/rdma/ib_verbs.h>
49 #include <sys/ib/clients/of/rdma/ib_addr.h>
50 #include <sys/ib/clients/of/rdma/rdma_cm.h>
51 #include <sys/ib/clients/rdsv3/rdsv3_impl.h>
52 #include <sys/ib/clients/rdsv3/info.h>
53 
54 #define	NIPQUAD(addr)					\
55 	(unsigned char)((ntohl(addr) >> 24) & 0xFF),	\
56 	(unsigned char)((ntohl(addr) >> 16) & 0xFF),	\
57 	(unsigned char)((ntohl(addr) >>  8) & 0xFF),	\
58 	(unsigned char)(ntohl(addr) & 0xFF)
59 
60 /*
61  * RDS Network protocol version
62  */
63 #define	RDS_PROTOCOL_3_0	0x0300
64 #define	RDS_PROTOCOL_3_1	0x0301
65 #define	RDS_PROTOCOL_VERSION	RDS_PROTOCOL_3_1
66 #define	RDS_PROTOCOL_MAJOR(v)	((v) >> 8)
67 #define	RDS_PROTOCOL_MINOR(v)	((v) & 255)
68 #define	RDS_PROTOCOL(maj, min)	(((maj) << 8) | min)
69 
70 /*
71  * XXX randomly chosen, but at least seems to be unused:
72  * #               18464-18768 Unassigned
73  * We should do better.  We want a reserved port to discourage unpriv'ed
74  * userspace from listening.
75  *
76  * port 18633 was the version that had ack frames on the wire.
77  */
78 #define	RDSV3_PORT	18634
79 
80 #include <sys/ib/clients/rdsv3/info.h>
81 
82 /*
83  * RDS trace facilities
84  */
85 enum {
86 	RDSV3_BIND = 0,
87 	RDSV3_CONG,
88 	RDSV3_CONNECTION,
89 	RDSV3_RDMA,
90 	RDSV3_PAGE,
91 	RDSV3_SEND,
92 	RDSV3_RECV,
93 	RDSV3_THREADS,
94 	RDSV3_INFO,
95 	RDSV3_MESSAGE,
96 	RDSV3_IB,
97 	RDSV3_IB_CM,
98 	RDSV3_IB_RDMA,
99 	RDSV3_IB_RING,
100 	RDSV3_IB_RECV,
101 	RDSV3_IB_SEND,
102 	RDSV3_TCP,
103 	RDSV3_TCP_CONNECT,
104 	RDSV3_TCP_LISTEN,
105 	RDSV3_TCP_RECV,
106 	RDSV3_TCP_SEND
107 };
108 
109 enum {
110 	RDSV3_ALWAYS = 0,
111 	RDSV3_MINIMAL,
112 	RDSV3_LOW,
113 	RDSV3_MEDIUM,
114 	RDSV3_HIGH,
115 	RDSV3_VERBOSE
116 };
117 
118 /*
119  * This is the sad making.  Some kernels have a bug in the per_cpu() api which
120  * makes DEFINE_PER_CPU trigger an oops on insmod because the per-cpu section
121  * in the module is not cacheline-aligned.  As much as we'd like to tell users
122  * with older kernels to stuff it, that's not reasonable.  We'll roll our own
123  * until this doesn't have to build against older kernels.
124  */
125 #define	RDSV3_DEFINE_PER_CPU(type, var)  type var[NR_CPUS]
126 #define	RDSV3_DECLARE_PER_CPU(type, var)  extern type var[NR_CPUS]
127 #define	rdsv3_per_cpu(var, cpu)  var[cpu]
128 
129 static inline ulong_t
130 ceil(ulong_t x, ulong_t y)
131 {
132 	return ((x + y - 1) / y);
133 }
134 
135 #define	RDSV3_FRAG_SHIFT	12
136 #define	RDSV3_FRAG_SIZE	((unsigned int)(1 << RDSV3_FRAG_SHIFT))
137 
138 #define	RDSV3_CONG_MAP_BYTES	(65536 / 8)
139 #define	RDSV3_CONG_MAP_LONGS	(RDSV3_CONG_MAP_BYTES / sizeof (unsigned long))
140 #define	RDSV3_CONG_MAP_PAGES	(RDSV3_CONG_MAP_BYTES / PAGE_SIZE)
141 #define	RDSV3_CONG_MAP_PAGE_BITS	(PAGE_SIZE * 8)
142 
143 struct rdsv3_cong_map {
144 	struct avl_node		m_rb_node;
145 	uint32_be_t		m_addr;
146 	rdsv3_wait_queue_t	m_waitq;
147 	struct list		m_conn_list;
148 	unsigned long		m_page_addrs[RDSV3_CONG_MAP_PAGES];
149 };
150 
151 
152 /*
153  * This is how we will track the connection state:
154  * A connection is always in one of the following
155  * states. Updates to the state are atomic and imply
156  * a memory barrier.
157  */
158 enum {
159 	RDSV3_CONN_DOWN = 0,
160 	RDSV3_CONN_CONNECTING,
161 	RDSV3_CONN_DISCONNECTING,
162 	RDSV3_CONN_UP,
163 	RDSV3_CONN_ERROR,
164 };
165 
166 /* Bits for c_flags */
167 #define	RDSV3_LL_SEND_FULL	0
168 #define	RDSV3_RECONNECT_PENDING	1
169 
170 struct rdsv3_connection {
171 	struct avl_node		c_hash_node;
172 	uint32_be_t		c_laddr;
173 	uint32_be_t		c_faddr;
174 	unsigned int		c_loopback:1;
175 	struct rdsv3_connection	*c_passive;
176 
177 	struct rdsv3_cong_map	*c_lcong;
178 	struct rdsv3_cong_map	*c_fcong;
179 
180 	struct mutex		c_send_lock;    /* protect send ring */
181 	struct rdsv3_message	*c_xmit_rm;
182 	unsigned long		c_xmit_sg;
183 	unsigned int		c_xmit_hdr_off;
184 	unsigned int		c_xmit_data_off;
185 	unsigned int		c_xmit_rdma_sent;
186 
187 	kmutex_t		c_lock;		/* protect msg queues */
188 	uint64_t		c_next_tx_seq;
189 	struct list		c_send_queue;
190 	struct list		c_retrans;
191 
192 	uint64_t		c_next_rx_seq;
193 
194 	struct rdsv3_transport	*c_trans;
195 	void			*c_transport_data;
196 
197 	atomic_t		c_state;
198 	unsigned long		c_flags;
199 	unsigned long		c_reconnect_jiffies;
200 	struct rdsv3_delayed_work_s	c_send_w;
201 	struct rdsv3_delayed_work_s	c_recv_w;
202 	struct rdsv3_delayed_work_s	c_conn_w;
203 	struct rdsv3_work_s	c_down_w;
204 	struct mutex		c_cm_lock;	/* protect conn state & cm */
205 
206 	struct list_node	c_map_item;
207 	unsigned long		c_map_queued;
208 	unsigned long		c_map_offset;
209 	unsigned long		c_map_bytes;
210 
211 	unsigned int		c_unacked_packets;
212 	unsigned int		c_unacked_bytes;
213 
214 	/* Protocol version */
215 	unsigned int		c_version;
216 };
217 
218 #define	RDSV3_FLAG_CONG_BITMAP		0x01
219 #define	RDSV3_FLAG_ACK_REQUIRED		0x02
220 #define	RDSV3_FLAG_RETRANSMITTED	0x04
221 #define	RDSV3_MAX_ADV_CREDIT		255
222 
223 /*
224  * Maximum space available for extension headers.
225  */
226 #define	RDSV3_HEADER_EXT_SPACE    16
227 
228 struct rdsv3_header {
229 	uint64_be_t	h_sequence;
230 	uint64_be_t	h_ack;
231 	uint32_be_t	h_len;
232 	uint16_be_t	h_sport;
233 	uint16_be_t	h_dport;
234 	uint8_t		h_flags;
235 	uint8_t		h_credit;
236 	uint8_t		h_padding[4];
237 	uint16_be_t	h_csum;
238 
239 	uint8_t		h_exthdr[RDSV3_HEADER_EXT_SPACE];
240 };
241 
242 /* Reserved - indicates end of extensions */
243 #define	RDSV3_EXTHDR_NONE		0
244 
245 /*
246  * This extension header is included in the very
247  * first message that is sent on a new connection,
248  * and identifies the protocol level. This will help
249  * rolling updates if a future change requires breaking
250  * the protocol.
251  */
252 #define	RDSV3_EXTHDR_VERSION	1
253 struct rdsv3_ext_header_version {
254 	uint32_be_t	h_version;
255 };
256 
257 /*
258  * This extension header is included in the RDS message
259  * chasing an RDMA operation.
260  */
261 #define	RDSV3_EXTHDR_RDMA		2
262 struct rdsv3_ext_header_rdma {
263 	uint32_be_t	h_rdma_rkey;
264 };
265 
266 /*
267  * This extension header tells the peer about the
268  * destination <R_Key,offset> of the requested RDMA
269  * operation.
270  */
271 #define	RDSV3_EXTHDR_RDMA_DEST    3
272 struct rdsv3_ext_header_rdma_dest {
273 	uint32_be_t		h_rdma_rkey;
274 	uint32_be_t		h_rdma_offset;
275 };
276 
277 #define	__RDSV3_EXTHDR_MAX	16 /* for now */
278 
279 struct rdsv3_incoming {
280 	atomic_t		i_refcount;
281 	struct list_node	i_item;
282 	struct rdsv3_connection	*i_conn;
283 	struct rdsv3_header	i_hdr;
284 	unsigned long		i_rx_jiffies;
285 	uint32_be_t		i_saddr;
286 
287 	rdsv3_rdma_cookie_t	i_rdma_cookie;
288 };
289 
290 /*
291  * m_sock_item and m_conn_item are on lists that are serialized under
292  * conn->c_lock.  m_sock_item has additional meaning in that once it is empty
293  * the message will not be put back on the retransmit list after being sent.
294  * messages that are canceled while being sent rely on this.
295  *
296  * m_inc is used by loopback so that it can pass an incoming message straight
297  * back up into the rx path.  It embeds a wire header which is also used by
298  * the send path, which is kind of awkward.
299  *
300  * m_sock_item indicates the message's presence on a socket's send or receive
301  * queue.  m_rs will point to that socket.
302  *
303  * m_daddr is used by cancellation to prune messages to a given destination.
304  *
305  * The RDS_MSG_ON_SOCK and RDS_MSG_ON_CONN flags are used to avoid lock
306  * nesting.  As paths iterate over messages on a sock, or conn, they must
307  * also lock the conn, or sock, to remove the message from those lists too.
308  * Testing the flag to determine if the message is still on the lists lets
309  * us avoid testing the list_head directly.  That means each path can use
310  * the message's list_head to keep it on a local list while juggling locks
311  * without confusing the other path.
312  *
313  * m_ack_seq is an optional field set by transports who need a different
314  * sequence number range to invalidate.  They can use this in a callback
315  * that they pass to rdsv3_send_drop_acked() to see if each message has been
316  * acked.  The HAS_ACK_SEQ flag can be used to detect messages which haven't
317  * had ack_seq set yet.
318  */
319 #define	RDSV3_MSG_ON_SOCK		1
320 #define	RDSV3_MSG_ON_CONN		2
321 #define	RDSV3_MSG_HAS_ACK_SEQ		3
322 #define	RDSV3_MSG_ACK_REQUIRED		4
323 #define	RDSV3_MSG_RETRANSMITTED		5
324 #define	RDSV3_MSG_MAPPED		6
325 #define	RDSV3_MSG_PAGEVEC		7
326 
327 struct rdsv3_message {
328 	atomic_t		m_refcount;
329 	struct list_node	m_sock_item;
330 	struct list_node	m_conn_item;
331 	struct rdsv3_incoming	m_inc;
332 	uint64_t		m_ack_seq;
333 	uint32_be_t		m_daddr;
334 	unsigned long		m_flags;
335 
336 	/*
337 	 * Never access m_rs without holding m_rs_lock.
338 	 * Lock nesting is
339 	 *  rm->m_rs_lock
340 	 *   -> rs->rs_lock
341 	 */
342 	kmutex_t		m_rs_lock;
343 	struct rdsv3_sock	*m_rs;
344 	struct rdsv3_rdma_op	*m_rdma_op;
345 	rdsv3_rdma_cookie_t	m_rdma_cookie;
346 	struct rdsv3_mr		*m_rdma_mr;
347 	unsigned int		m_nents;
348 	unsigned int		m_count;
349 	struct rdsv3_scatterlist	m_sg[1];
350 };
351 
352 /*
353  * The RDS notifier is used (optionally) to tell the application about
354  * completed RDMA operations. Rather than keeping the whole rds message
355  * around on the queue, we allocate a small notifier that is put on the
356  * socket's notifier_list. Notifications are delivered to the application
357  * through control messages.
358  */
359 struct rdsv3_notifier {
360 	list_node_t	n_list;
361 	uint64_t	n_user_token;
362 	int		n_status;
363 };
364 
365 /*
366  * struct rdsv3_transport -  transport specific behavioural hooks
367  *
368  * @xmit: .xmit is called by rdsv3_send_xmit() to tell the transport to send
369  *	  part of a message.  The caller serializes on the send_sem so this
370  *	  doesn't need to be reentrant for a given conn.  The header must be
371  *	  sent before the data payload.  .xmit must be prepared to send a
372  *	  message with no data payload.  .xmit should return the number of
373  *	  bytes that were sent down the connection, including header bytes.
374  *	  Returning 0 tells the caller that it doesn't need to perform any
375  *	  additional work now.  This is usually the case when the transport has
376  *	  filled the sending queue for its connection and will handle
377  *	  triggering the rds thread to continue the send when space becomes
378  *	  available.  Returning -EAGAIN tells the caller to retry the send
379  *	  immediately.  Returning -ENOMEM tells the caller to retry the send at
380  *	  some point in the future.
381  *
382  * @conn_shutdown: conn_shutdown stops traffic on the given connection.  Once
383  *		   it returns the connection can not call rdsv3_recv_incoming().
384  *		   This will only be called once after conn_connect returns
385  *		   non-zero success and will The caller serializes this with
386  *		   the send and connecting paths (xmit_* and conn_*).  The
387  *		   transport is responsible for other serialization, including
388  *		   rdsv3_recv_incoming().  This is called in process context but
389  *		   should try hard not to block.
390  *
391  * @xmit_cong_map: This asks the transport to send the local bitmap down the
392  *		   given connection.  XXX get a better story about the bitmap
393  *		   flag and header.
394  */
395 
396 struct rdsv3_transport {
397 	struct list_node	t_item;
398 	char			*t_name;
399 	unsigned int		t_prefer_loopback:1;
400 
401 	int (*laddr_check)(uint32_be_t addr);
402 	int (*conn_alloc)(struct rdsv3_connection *conn, int gfp);
403 	void (*conn_free)(void *data);
404 	int (*conn_connect)(struct rdsv3_connection *conn);
405 	void (*conn_shutdown)(struct rdsv3_connection *conn);
406 	void (*xmit_prepare)(struct rdsv3_connection *conn);
407 	void (*xmit_complete)(struct rdsv3_connection *conn);
408 	int (*xmit)(struct rdsv3_connection *conn, struct rdsv3_message *rm,
409 	    unsigned int hdr_off, unsigned int sg, unsigned int off);
410 	int (*xmit_cong_map)(struct rdsv3_connection *conn,
411 	    struct rdsv3_cong_map *map, unsigned long offset);
412 	int (*xmit_rdma)(struct rdsv3_connection *conn,
413 	    struct rdsv3_rdma_op *op);
414 	int (*recv)(struct rdsv3_connection *conn);
415 	int (*inc_copy_to_user)(struct rdsv3_incoming *inc, uio_t *uio,
416 	    size_t size);
417 	void (*inc_purge)(struct rdsv3_incoming *inc);
418 	void (*inc_free)(struct rdsv3_incoming *inc);
419 
420 	int (*cm_handle_connect)(struct rdma_cm_id *cm_id,
421 	    struct rdma_cm_event *event);
422 	int (*cm_initiate_connect)(struct rdma_cm_id *cm_id);
423 	void (*cm_connect_complete)(struct rdsv3_connection *conn,
424 	    struct rdma_cm_event *event);
425 
426 	unsigned int (*stats_info_copy)(struct rdsv3_info_iterator *iter,
427 	    unsigned int avail);
428 	void (*exit)(void);
429 	void *(*get_mr)(struct rdsv3_iovec *sg, unsigned long nr_sg,
430 	    struct rdsv3_sock *rs, uint32_t *key_ret);
431 	void (*sync_mr)(void *trans_private, int direction);
432 	void (*free_mr)(void *trans_private, int invalidate);
433 	void (*flush_mrs)(void);
434 };
435 
436 struct rdsv3_sock {
437 	struct rsock		*rs_sk;
438 
439 	uint64_t		rs_user_addr;
440 	uint64_t		rs_user_bytes;
441 
442 	/*
443 	 * bound_addr used for both incoming and outgoing, no INADDR_ANY
444 	 * support.
445 	 */
446 	struct avl_node		rs_bound_node;
447 	uint32_be_t		rs_bound_addr;
448 	uint32_be_t		rs_conn_addr;
449 	uint16_be_t		rs_bound_port;
450 	uint16_be_t		rs_conn_port;
451 
452 	/*
453 	 * This is only used to communicate the transport between bind and
454 	 * initiating connections. All other trans use is referenced through
455 	 * the connection.
456 	 */
457 	struct rdsv3_transport	*rs_transport;
458 
459 	/*
460 	 * rdsv3_sendmsg caches the conn it used the last time around.
461 	 * This helps avoid costly lookups.
462 	 */
463 	struct rdsv3_connection	*rs_conn;
464 	kmutex_t 		rs_conn_lock;
465 
466 	/* flag indicating we were congested or not */
467 	int			rs_congested;
468 
469 	/* rs_lock protects all these adjacent members before the newline */
470 	kmutex_t		rs_lock;
471 	struct list		rs_send_queue;
472 	uint32_t		rs_snd_bytes;
473 	int			rs_rcv_bytes;
474 	/* currently used for failed RDMAs */
475 	struct list		rs_notify_queue;
476 
477 	/*
478 	 * Congestion wake_up. If rs_cong_monitor is set, we use cong_mask
479 	 * to decide whether the application should be woken up.
480 	 * If not set, we use rs_cong_track to find out whether a cong map
481 	 * update arrived.
482 	 */
483 	uint64_t		rs_cong_mask;
484 	uint64_t		rs_cong_notify;
485 	struct list_node	rs_cong_list;
486 	unsigned long		rs_cong_track;
487 
488 	/*
489 	 * rs_recv_lock protects the receive queue, and is
490 	 * used to serialize with rdsv3_release.
491 	 */
492 	krwlock_t		rs_recv_lock;
493 	struct list		rs_recv_queue;
494 
495 	/* just for stats reporting */
496 	struct list_node	rs_item;
497 
498 	/* these have their own lock */
499 	kmutex_t		rs_rdma_lock;
500 	struct avl_tree		rs_rdma_keys;
501 
502 	/* Socket options - in case there will be more */
503 	unsigned char		rs_recverr,
504 				rs_cong_monitor;
505 
506 	cred_t			*rs_cred;
507 	zoneid_t		rs_zoneid;
508 };
509 
510 static inline struct rdsv3_sock *
511 rdsv3_sk_to_rs(const struct rsock *sk)
512 {
513 	return ((struct rdsv3_sock *)sk->sk_protinfo);
514 }
515 
516 static inline struct rsock *
517 rdsv3_rs_to_sk(const struct rdsv3_sock *rs)
518 {
519 	return ((struct rsock *)rs->rs_sk);
520 }
521 
522 /*
523  * The stack assigns sk_sndbuf and sk_rcvbuf to twice the specified value
524  * to account for overhead.  We don't account for overhead, we just apply
525  * the number of payload bytes to the specified value.
526  */
527 static inline int
528 rdsv3_sk_sndbuf(struct rdsv3_sock *rs)
529 {
530 	/* XXX */
531 	return (rdsv3_rs_to_sk(rs)->sk_sndbuf);
532 }
533 
534 static inline int
535 rdsv3_sk_rcvbuf(struct rdsv3_sock *rs)
536 {
537 	/* XXX */
538 	return (rdsv3_rs_to_sk(rs)->sk_rcvbuf);
539 }
540 
541 struct rdsv3_statistics {
542 	uint64_t	s_conn_reset;
543 	uint64_t	s_recv_drop_bad_checksum;
544 	uint64_t	s_recv_drop_old_seq;
545 	uint64_t	s_recv_drop_no_sock;
546 	uint64_t	s_recv_drop_dead_sock;
547 	uint64_t	s_recv_deliver_raced;
548 	uint64_t	s_recv_delivered;
549 	uint64_t	s_recv_queued;
550 	uint64_t	s_recv_immediate_retry;
551 	uint64_t	s_recv_delayed_retry;
552 	uint64_t	s_recv_ack_required;
553 	uint64_t	s_recv_rdma_bytes;
554 	uint64_t	s_recv_ping;
555 	uint64_t	s_send_queue_empty;
556 	uint64_t	s_send_queue_full;
557 	uint64_t	s_send_sem_contention;
558 	uint64_t	s_send_sem_queue_raced;
559 	uint64_t	s_send_immediate_retry;
560 	uint64_t	s_send_delayed_retry;
561 	uint64_t	s_send_drop_acked;
562 	uint64_t	s_send_ack_required;
563 	uint64_t	s_send_queued;
564 	uint64_t	s_send_rdma;
565 	uint64_t	s_send_rdma_bytes;
566 	uint64_t	s_send_pong;
567 	uint64_t	s_page_remainder_hit;
568 	uint64_t	s_page_remainder_miss;
569 	uint64_t	s_copy_to_user;
570 	uint64_t	s_copy_from_user;
571 	uint64_t	s_cong_update_queued;
572 	uint64_t	s_cong_update_received;
573 	uint64_t	s_cong_send_error;
574 	uint64_t	s_cong_send_blocked;
575 };
576 
577 /* af_rds.c */
578 void rdsv3_sock_addref(struct rdsv3_sock *rs);
579 void rdsv3_sock_put(struct rdsv3_sock *rs);
580 void rdsv3_wake_sk_sleep(struct rdsv3_sock *rs);
581 void __rdsv3_wake_sk_sleep(struct rsock *sk);
582 
583 extern rdsv3_wait_queue_t rdsv3_poll_waitq;
584 
585 /* bind.c */
586 int rdsv3_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
587     socklen_t len, cred_t *cr);
588 void rdsv3_remove_bound(struct rdsv3_sock *rs);
589 struct rdsv3_sock *rdsv3_find_bound(uint32_be_t addr, uint16_be_t port);
590 
591 /* conn.c */
592 int rdsv3_conn_init(void);
593 void rdsv3_conn_exit(void);
594 struct rdsv3_connection *rdsv3_conn_create(uint32_be_t laddr, uint32_be_t faddr,
595     struct rdsv3_transport *trans, int gfp);
596 struct rdsv3_connection *rdsv3_conn_create_outgoing(uint32_be_t laddr,
597     uint32_be_t faddr,
598     struct rdsv3_transport *trans, int gfp);
599 void rdsv3_conn_destroy(struct rdsv3_connection *conn);
600 void rdsv3_conn_reset(struct rdsv3_connection *conn);
601 void rdsv3_conn_drop(struct rdsv3_connection *conn);
602 void rdsv3_for_each_conn_info(struct rsock *sock, unsigned int len,
603     struct rdsv3_info_iterator *iter,
604     struct rdsv3_info_lengths *lens,
605     int (*visitor)(struct rdsv3_connection *, void *),
606     size_t item_len);
607 
608 static inline int
609 rdsv3_conn_transition(struct rdsv3_connection *conn, int old, int new)
610 {
611 	return (atomic_cmpxchg(&conn->c_state, old, new) == old);
612 }
613 
614 static inline int
615 rdsv3_conn_state(struct rdsv3_connection *conn)
616 {
617 	return (atomic_get(&conn->c_state));
618 }
619 
620 static inline int
621 rdsv3_conn_up(struct rdsv3_connection *conn)
622 {
623 	return (atomic_get(&conn->c_state) == RDSV3_CONN_UP);
624 }
625 
626 static inline int
627 rdsv3_conn_connecting(struct rdsv3_connection *conn)
628 {
629 	return (atomic_get(&conn->c_state) == RDSV3_CONN_CONNECTING);
630 }
631 
632 /* recv.c */
633 void rdsv3_inc_init(struct rdsv3_incoming *inc, struct rdsv3_connection *conn,
634     uint32_be_t saddr);
635 void rdsv3_inc_addref(struct rdsv3_incoming *inc);
636 void rdsv3_inc_put(struct rdsv3_incoming *inc);
637 void rdsv3_recv_incoming(struct rdsv3_connection *conn, uint32_be_t saddr,
638     uint32_be_t daddr,
639     struct rdsv3_incoming *inc, int gfp);
640 int rdsv3_recvmsg(struct rdsv3_sock *rs, uio_t *uio,
641     struct msghdr *msg, size_t size, int msg_flags);
642 void rdsv3_clear_recv_queue(struct rdsv3_sock *rs);
643 int rdsv3_notify_queue_get(struct rdsv3_sock *rs, struct msghdr *msg);
644 void rdsv3_inc_info_copy(struct rdsv3_incoming *inc,
645     struct rdsv3_info_iterator *iter,
646     uint32_be_t saddr, uint32_be_t daddr, int flip);
647 
648 /* page.c */
649 int rdsv3_page_remainder_alloc(struct rdsv3_scatterlist *scat,
650     unsigned long bytes, int gfp);
651 
652 /* send.c */
653 int rdsv3_sendmsg(struct rdsv3_sock *rs, uio_t *uio, struct nmsghdr *msg,
654     size_t payload_len);
655 void rdsv3_send_reset(struct rdsv3_connection *conn);
656 int rdsv3_send_xmit(struct rdsv3_connection *conn);
657 struct sockaddr_in;
658 void rdsv3_send_drop_to(struct rdsv3_sock *rs, struct sockaddr_in *dest);
659 typedef int (*is_acked_func)(struct rdsv3_message *rm, uint64_t ack);
660 void rdsv3_send_drop_acked(struct rdsv3_connection *conn, uint64_t ack,
661     is_acked_func is_acked);
662 int rdsv3_send_acked_before(struct rdsv3_connection *conn, uint64_t seq);
663 void rdsv3_send_remove_from_sock(struct list *messages, int status);
664 int rdsv3_send_pong(struct rdsv3_connection *conn, uint16_be_t dport);
665 struct rdsv3_message *rdsv3_send_get_message(struct rdsv3_connection *,
666     struct rdsv3_rdma_op *);
667 
668 /* rdma.c */
669 void rdsv3_rdma_unuse(struct rdsv3_sock *rs, uint32_t r_key, int force);
670 
671 /* cong.c */
672 void rdsv3_cong_init(void);
673 int rdsv3_cong_get_maps(struct rdsv3_connection *conn);
674 void rdsv3_cong_add_conn(struct rdsv3_connection *conn);
675 void rdsv3_cong_remove_conn(struct rdsv3_connection *conn);
676 void rdsv3_cong_set_bit(struct rdsv3_cong_map *map, uint16_be_t port);
677 void rdsv3_cong_clear_bit(struct rdsv3_cong_map *map, uint16_be_t port);
678 int rdsv3_cong_wait(struct rdsv3_cong_map *map, uint16_be_t port, int nonblock,
679     struct rdsv3_sock *rs);
680 void rdsv3_cong_queue_updates(struct rdsv3_cong_map *map);
681 void rdsv3_cong_map_updated(struct rdsv3_cong_map *map, uint64_t);
682 int rdsv3_cong_updated_since(unsigned long *recent);
683 void rdsv3_cong_add_socket(struct rdsv3_sock *);
684 void rdsv3_cong_remove_socket(struct rdsv3_sock *);
685 void rdsv3_cong_exit(void);
686 struct rdsv3_message *rdsv3_cong_update_alloc(struct rdsv3_connection *conn);
687 
688 /* stats.c */
689 RDSV3_DECLARE_PER_CPU(struct rdsv3_statistics, rdsv3_stats);
690 #define	rdsv3_stats_inc_which(which, member) do {		\
691 	rdsv3_per_cpu(which, get_cpu()).member++;		\
692 	put_cpu();					\
693 } while (0)
694 #define	rdsv3_stats_inc(member) rdsv3_stats_inc_which(rdsv3_stats, member)
695 #define	rdsv3_stats_add_which(which, member, count) do {		\
696 	rdsv3_per_cpu(which, get_cpu()).member += count;	\
697 	put_cpu();					\
698 } while (0)
699 #define	rdsv3_stats_add(member, count)	\
700 	rdsv3_stats_add_which(rdsv3_stats, member, count)
701 int rdsv3_stats_init(void);
702 void rdsv3_stats_exit(void);
703 void rdsv3_stats_info_copy(struct rdsv3_info_iterator *iter,
704     uint64_t *values, char **names, size_t nr);
705 
706 
707 /* sysctl.c */
708 int rdsv3_sysctl_init(void);
709 void rdsv3_sysctl_exit(void);
710 extern unsigned long rdsv3_sysctl_sndbuf_min;
711 extern unsigned long rdsv3_sysctl_sndbuf_default;
712 extern unsigned long rdsv3_sysctl_sndbuf_max;
713 extern unsigned long rdsv3_sysctl_reconnect_min_jiffies;
714 extern unsigned long rdsv3_sysctl_reconnect_max_jiffies;
715 extern unsigned int  rdsv3_sysctl_max_unacked_packets;
716 extern unsigned int  rdsv3_sysctl_max_unacked_bytes;
717 extern unsigned int  rdsv3_sysctl_ping_enable;
718 extern unsigned long rdsv3_sysctl_trace_flags;
719 extern unsigned int  rdsv3_sysctl_trace_level;
720 
721 /* threads.c */
722 int rdsv3_threads_init();
723 void rdsv3_threads_exit(void);
724 extern struct rdsv3_workqueue_struct_s *rdsv3_wq;
725 void rdsv3_connect_worker(struct rdsv3_work_s *);
726 void rdsv3_shutdown_worker(struct rdsv3_work_s *);
727 void rdsv3_send_worker(struct rdsv3_work_s *);
728 void rdsv3_recv_worker(struct rdsv3_work_s *);
729 void rdsv3_connect_complete(struct rdsv3_connection *conn);
730 
731 /* transport.c */
732 int rdsv3_trans_register(struct rdsv3_transport *trans);
733 void rdsv3_trans_unregister(struct rdsv3_transport *trans);
734 struct rdsv3_transport *rdsv3_trans_get_preferred(uint32_be_t addr);
735 unsigned int rdsv3_trans_stats_info_copy(struct rdsv3_info_iterator *iter,
736     unsigned int avail);
737 void rdsv3_trans_exit(void);
738 
739 /* message.c */
740 struct rdsv3_message *rdsv3_message_alloc(unsigned int nents, int gfp);
741 struct rdsv3_message *rdsv3_message_copy_from_user(struct uio *uiop,
742     size_t total_len);
743 struct rdsv3_message *rdsv3_message_map_pages(unsigned long *page_addrs,
744     unsigned int total_len);
745 void rdsv3_message_populate_header(struct rdsv3_header *hdr, uint16_be_t sport,
746     uint16_be_t dport, uint64_t seq);
747 int rdsv3_message_add_extension(struct rdsv3_header *hdr,
748     unsigned int type, const void *data, unsigned int len);
749 int rdsv3_message_next_extension(struct rdsv3_header *hdr,
750     unsigned int *pos, void *buf, unsigned int *buflen);
751 int rdsv3_message_add_version_extension(struct rdsv3_header *hdr,
752     unsigned int version);
753 int rdsv3_message_get_version_extension(struct rdsv3_header *hdr,
754     unsigned int *version);
755 int rdsv3_message_add_rdma_dest_extension(struct rdsv3_header *hdr,
756     uint32_t r_key, uint32_t offset);
757 int rdsv3_message_inc_copy_to_user(struct rdsv3_incoming *inc,
758     uio_t *uio, size_t size);
759 void rdsv3_message_inc_purge(struct rdsv3_incoming *inc);
760 void rdsv3_message_inc_free(struct rdsv3_incoming *inc);
761 void rdsv3_message_addref(struct rdsv3_message *rm);
762 void rdsv3_message_put(struct rdsv3_message *rm);
763 void rdsv3_message_wait(struct rdsv3_message *rm);
764 void rdsv3_message_unmapped(struct rdsv3_message *rm);
765 
766 static inline void
767 rdsv3_message_make_checksum(struct rdsv3_header *hdr)
768 {
769 	hdr->h_csum = 0;
770 	hdr->h_csum =
771 	    rdsv3_ip_fast_csum((void *)hdr, sizeof (*hdr) >> 2);
772 }
773 
774 static inline int
775 rdsv3_message_verify_checksum(const struct rdsv3_header *hdr)
776 {
777 	return (!hdr->h_csum ||
778 	    rdsv3_ip_fast_csum((void *)hdr, sizeof (*hdr) >> 2) == 0);
779 }
780 
781 /* rdsv3_sc.c */
782 extern boolean_t rdsv3_if_lookup_by_name(char *if_name);
783 extern int rdsv3_sc_path_lookup(ipaddr_t *localip, ipaddr_t *remip);
784 extern ipaddr_t rdsv3_scaddr_to_ibaddr(ipaddr_t addr);
785 
786 #ifdef	__cplusplus
787 }
788 #endif
789 
790 #endif /* _RDSV3_RDSV3_H */
791