xref: /freebsd/sys/dev/cxgbe/iw_cxgbe/cm.c (revision 38069501)
1 /*
2  * Copyright (c) 2009-2013, 2016 Chelsio, Inc. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *	  copyright notice, this list of conditions and the following
16  *	  disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *	  copyright notice, this list of conditions and the following
20  *	  disclaimer in the documentation and/or other materials
21  *	  provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include "opt_inet.h"
36 
37 #ifdef TCP_OFFLOAD
38 #include <sys/types.h>
39 #include <sys/malloc.h>
40 #include <sys/socket.h>
41 #include <sys/socketvar.h>
42 #include <sys/sockio.h>
43 #include <sys/taskqueue.h>
44 #include <netinet/in.h>
45 #include <net/route.h>
46 
47 #include <netinet/in_systm.h>
48 #include <netinet/in_pcb.h>
49 #include <netinet/ip.h>
50 #include <netinet/in_fib.h>
51 #include <netinet/ip_var.h>
52 #include <netinet/tcp_var.h>
53 #include <netinet/tcp.h>
54 #include <netinet/tcpip.h>
55 
56 #include <netinet/toecore.h>
57 
58 struct sge_iq;
59 struct rss_header;
60 struct cpl_set_tcb_rpl;
61 #include <linux/types.h>
62 #include "offload.h"
63 #include "tom/t4_tom.h"
64 
65 #define TOEPCB(so)  ((struct toepcb *)(so_sototcpcb((so))->t_toe))
66 
67 #include "iw_cxgbe.h"
68 #include <linux/module.h>
69 #include <linux/workqueue.h>
70 #include <linux/notifier.h>
71 #include <linux/inetdevice.h>
72 #include <linux/if_vlan.h>
73 #include <net/netevent.h>
74 
75 static spinlock_t req_lock;
76 static TAILQ_HEAD(c4iw_ep_list, c4iw_ep_common) req_list;
77 static struct work_struct c4iw_task;
78 static struct workqueue_struct *c4iw_taskq;
79 static LIST_HEAD(err_cqe_list);
80 static spinlock_t err_cqe_lock;
81 
82 static void process_req(struct work_struct *ctx);
83 static void start_ep_timer(struct c4iw_ep *ep);
84 static int stop_ep_timer(struct c4iw_ep *ep);
85 static int set_tcpinfo(struct c4iw_ep *ep);
86 static void process_timeout(struct c4iw_ep *ep);
87 static void process_err_cqes(void);
88 static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc);
89 static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate);
90 static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate);
91 static void *alloc_ep(int size, gfp_t flags);
92 static int find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
93 		__be16 peer_port, u8 tos, struct nhop4_extended *pnh4);
94 static void close_socket(struct socket *so);
95 static int send_mpa_req(struct c4iw_ep *ep);
96 static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen);
97 static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen);
98 static void close_complete_upcall(struct c4iw_ep *ep, int status);
99 static int send_abort(struct c4iw_ep *ep);
100 static void peer_close_upcall(struct c4iw_ep *ep);
101 static void peer_abort_upcall(struct c4iw_ep *ep);
102 static void connect_reply_upcall(struct c4iw_ep *ep, int status);
103 static int connect_request_upcall(struct c4iw_ep *ep);
104 static void established_upcall(struct c4iw_ep *ep);
105 static int process_mpa_reply(struct c4iw_ep *ep);
106 static int process_mpa_request(struct c4iw_ep *ep);
107 static void process_peer_close(struct c4iw_ep *ep);
108 static void process_conn_error(struct c4iw_ep *ep);
109 static void process_close_complete(struct c4iw_ep *ep);
110 static void ep_timeout(unsigned long arg);
111 static void setiwsockopt(struct socket *so);
112 static void init_iwarp_socket(struct socket *so, void *arg);
113 static void uninit_iwarp_socket(struct socket *so);
114 static void process_data(struct c4iw_ep *ep);
115 static void process_connected(struct c4iw_ep *ep);
116 static int c4iw_so_upcall(struct socket *so, void *arg, int waitflag);
117 static void process_socket_event(struct c4iw_ep *ep);
118 static void release_ep_resources(struct c4iw_ep *ep);
119 static int process_terminate(struct c4iw_ep *ep);
120 static int terminate(struct sge_iq *iq, const struct rss_header *rss,
121     struct mbuf *m);
122 static int add_ep_to_req_list(struct c4iw_ep *ep, int ep_events);
123 #define START_EP_TIMER(ep) \
124     do { \
125 	    CTR3(KTR_IW_CXGBE, "start_ep_timer (%s:%d) ep %p", \
126 		__func__, __LINE__, (ep)); \
127 	    start_ep_timer(ep); \
128     } while (0)
129 
130 #define STOP_EP_TIMER(ep) \
131     ({ \
132 	    CTR3(KTR_IW_CXGBE, "stop_ep_timer (%s:%d) ep %p", \
133 		__func__, __LINE__, (ep)); \
134 	    stop_ep_timer(ep); \
135     })
136 
137 #ifdef KTR
138 static char *states[] = {
139 	"idle",
140 	"listen",
141 	"connecting",
142 	"mpa_wait_req",
143 	"mpa_req_sent",
144 	"mpa_req_rcvd",
145 	"mpa_rep_sent",
146 	"fpdu_mode",
147 	"aborting",
148 	"closing",
149 	"moribund",
150 	"dead",
151 	NULL,
152 };
153 #endif
154 
155 
156 static void deref_cm_id(struct c4iw_ep_common *epc)
157 {
158       epc->cm_id->rem_ref(epc->cm_id);
159       epc->cm_id = NULL;
160       set_bit(CM_ID_DEREFED, &epc->history);
161 }
162 
163 static void ref_cm_id(struct c4iw_ep_common *epc)
164 {
165       set_bit(CM_ID_REFED, &epc->history);
166       epc->cm_id->add_ref(epc->cm_id);
167 }
168 
169 static void deref_qp(struct c4iw_ep *ep)
170 {
171 	c4iw_qp_rem_ref(&ep->com.qp->ibqp);
172 	clear_bit(QP_REFERENCED, &ep->com.flags);
173 	set_bit(QP_DEREFED, &ep->com.history);
174 }
175 
176 static void ref_qp(struct c4iw_ep *ep)
177 {
178 	set_bit(QP_REFERENCED, &ep->com.flags);
179 	set_bit(QP_REFED, &ep->com.history);
180 	c4iw_qp_add_ref(&ep->com.qp->ibqp);
181 }
182 
183 static void process_timeout(struct c4iw_ep *ep)
184 {
185 	struct c4iw_qp_attributes attrs;
186 	int abort = 1;
187 
188 	mutex_lock(&ep->com.mutex);
189 	CTR4(KTR_IW_CXGBE, "%s ep :%p, tid:%u, state %d", __func__,
190 			ep, ep->hwtid, ep->com.state);
191 	set_bit(TIMEDOUT, &ep->com.history);
192 	switch (ep->com.state) {
193 	case MPA_REQ_SENT:
194 		connect_reply_upcall(ep, -ETIMEDOUT);
195 		break;
196 	case MPA_REQ_WAIT:
197 	case MPA_REQ_RCVD:
198 	case MPA_REP_SENT:
199 	case FPDU_MODE:
200 		break;
201 	case CLOSING:
202 	case MORIBUND:
203 		if (ep->com.cm_id && ep->com.qp) {
204 			attrs.next_state = C4IW_QP_STATE_ERROR;
205 			c4iw_modify_qp(ep->com.dev, ep->com.qp,
206 					C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
207 		}
208 		close_complete_upcall(ep, -ETIMEDOUT);
209 		break;
210 	case ABORTING:
211 	case DEAD:
212 		/*
213 		 * These states are expected if the ep timed out at the same
214 		 * time as another thread was calling stop_ep_timer().
215 		 * So we silently do nothing for these states.
216 		 */
217 		abort = 0;
218 		break;
219 	default:
220 		CTR4(KTR_IW_CXGBE, "%s unexpected state ep %p tid %u state %u\n"
221 				, __func__, ep, ep->hwtid, ep->com.state);
222 		abort = 0;
223 	}
224 	mutex_unlock(&ep->com.mutex);
225 	if (abort)
226 		c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
227 	c4iw_put_ep(&ep->com);
228 	return;
229 }
230 
231 struct cqe_list_entry {
232 	struct list_head entry;
233 	struct c4iw_dev *rhp;
234 	struct t4_cqe err_cqe;
235 };
236 
237 static void
238 process_err_cqes(void)
239 {
240 	unsigned long flag;
241 	struct cqe_list_entry *cle;
242 
243 	spin_lock_irqsave(&err_cqe_lock, flag);
244 	while (!list_empty(&err_cqe_list)) {
245 		struct list_head *tmp;
246 		tmp = err_cqe_list.next;
247 		list_del(tmp);
248 		tmp->next = tmp->prev = NULL;
249 		spin_unlock_irqrestore(&err_cqe_lock, flag);
250 		cle = list_entry(tmp, struct cqe_list_entry, entry);
251 		c4iw_ev_dispatch(cle->rhp, &cle->err_cqe);
252 		free(cle, M_CXGBE);
253 		spin_lock_irqsave(&err_cqe_lock, flag);
254 	}
255 	spin_unlock_irqrestore(&err_cqe_lock, flag);
256 
257 	return;
258 }
259 
260 static void
261 process_req(struct work_struct *ctx)
262 {
263 	struct c4iw_ep_common *epc;
264 	unsigned long flag;
265 	int ep_events;
266 
267 	process_err_cqes();
268 	spin_lock_irqsave(&req_lock, flag);
269 	while (!TAILQ_EMPTY(&req_list)) {
270 		epc = TAILQ_FIRST(&req_list);
271 		TAILQ_REMOVE(&req_list, epc, entry);
272 		epc->entry.tqe_prev = NULL;
273 		ep_events = epc->ep_events;
274 		epc->ep_events = 0;
275 		spin_unlock_irqrestore(&req_lock, flag);
276 		CTR4(KTR_IW_CXGBE, "%s: so %p, ep %p, events 0x%x", __func__,
277 		    epc->so, epc, ep_events);
278 		if (ep_events & C4IW_EVENT_TERM)
279 			process_terminate((struct c4iw_ep *)epc);
280 		if (ep_events & C4IW_EVENT_TIMEOUT)
281 			process_timeout((struct c4iw_ep *)epc);
282 		if (ep_events & C4IW_EVENT_SOCKET)
283 			process_socket_event((struct c4iw_ep *)epc);
284 		c4iw_put_ep(epc);
285 		process_err_cqes();
286 		spin_lock_irqsave(&req_lock, flag);
287 	}
288 	spin_unlock_irqrestore(&req_lock, flag);
289 }
290 
291 /*
292  * XXX: doesn't belong here in the iWARP driver.
293  * XXX: assumes that the connection was offloaded by cxgbe/t4_tom if TF_TOE is
294  *      set.  Is this a valid assumption for active open?
295  */
296 static int
297 set_tcpinfo(struct c4iw_ep *ep)
298 {
299 	struct socket *so = ep->com.so;
300 	struct inpcb *inp = sotoinpcb(so);
301 	struct tcpcb *tp;
302 	struct toepcb *toep;
303 	int rc = 0;
304 
305 	INP_WLOCK(inp);
306 	tp = intotcpcb(inp);
307 	if ((tp->t_flags & TF_TOE) == 0) {
308 		rc = EINVAL;
309 		log(LOG_ERR, "%s: connection not offloaded (so %p, ep %p)\n",
310 		    __func__, so, ep);
311 		goto done;
312 	}
313 	toep = TOEPCB(so);
314 
315 	ep->hwtid = toep->tid;
316 	ep->snd_seq = tp->snd_nxt;
317 	ep->rcv_seq = tp->rcv_nxt;
318 	ep->emss = max(tp->t_maxseg, 128);
319 done:
320 	INP_WUNLOCK(inp);
321 	return (rc);
322 
323 }
324 
325 static int
326 find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
327 		__be16 peer_port, u8 tos, struct nhop4_extended *pnh4)
328 {
329 	struct in_addr addr;
330 	int err;
331 
332 	CTR5(KTR_IW_CXGBE, "%s:frtB %x, %x, %d, %d", __func__, local_ip,
333 	    peer_ip, ntohs(local_port), ntohs(peer_port));
334 
335 	addr.s_addr = peer_ip;
336 	err = fib4_lookup_nh_ext(RT_DEFAULT_FIB, addr, NHR_REF, 0, pnh4);
337 
338 	CTR2(KTR_IW_CXGBE, "%s:frtE %d", __func__, err);
339 	return err;
340 }
341 
342 static void
343 close_socket(struct socket *so)
344 {
345 
346 	uninit_iwarp_socket(so);
347 	sodisconnect(so);
348 }
349 
350 static void
351 process_peer_close(struct c4iw_ep *ep)
352 {
353 	struct c4iw_qp_attributes attrs;
354 	int disconnect = 1;
355 	int release = 0;
356 
357 	CTR4(KTR_IW_CXGBE, "%s:ppcB ep %p so %p state %s", __func__, ep,
358 	    ep->com.so, states[ep->com.state]);
359 
360 	mutex_lock(&ep->com.mutex);
361 	switch (ep->com.state) {
362 
363 		case MPA_REQ_WAIT:
364 			CTR2(KTR_IW_CXGBE, "%s:ppc1 %p MPA_REQ_WAIT CLOSING",
365 			    __func__, ep);
366 			__state_set(&ep->com, CLOSING);
367 			break;
368 
369 		case MPA_REQ_SENT:
370 			CTR2(KTR_IW_CXGBE, "%s:ppc2 %p MPA_REQ_SENT CLOSING",
371 			    __func__, ep);
372 			__state_set(&ep->com, DEAD);
373 			connect_reply_upcall(ep, -ECONNABORTED);
374 
375 			disconnect = 0;
376 			STOP_EP_TIMER(ep);
377 			close_socket(ep->com.so);
378 			deref_cm_id(&ep->com);
379 			release = 1;
380 			break;
381 
382 		case MPA_REQ_RCVD:
383 
384 			/*
385 			 * We're gonna mark this puppy DEAD, but keep
386 			 * the reference on it until the ULP accepts or
387 			 * rejects the CR.
388 			 */
389 			CTR2(KTR_IW_CXGBE, "%s:ppc3 %p MPA_REQ_RCVD CLOSING",
390 			    __func__, ep);
391 			__state_set(&ep->com, CLOSING);
392 			c4iw_get_ep(&ep->com);
393 			break;
394 
395 		case MPA_REP_SENT:
396 			CTR2(KTR_IW_CXGBE, "%s:ppc4 %p MPA_REP_SENT CLOSING",
397 			    __func__, ep);
398 			__state_set(&ep->com, CLOSING);
399 			break;
400 
401 		case FPDU_MODE:
402 			CTR2(KTR_IW_CXGBE, "%s:ppc5 %p FPDU_MODE CLOSING",
403 			    __func__, ep);
404 			START_EP_TIMER(ep);
405 			__state_set(&ep->com, CLOSING);
406 			attrs.next_state = C4IW_QP_STATE_CLOSING;
407 			c4iw_modify_qp(ep->com.dev, ep->com.qp,
408 					C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
409 			peer_close_upcall(ep);
410 			break;
411 
412 		case ABORTING:
413 			CTR2(KTR_IW_CXGBE, "%s:ppc6 %p ABORTING (disconn)",
414 			    __func__, ep);
415 			disconnect = 0;
416 			break;
417 
418 		case CLOSING:
419 			CTR2(KTR_IW_CXGBE, "%s:ppc7 %p CLOSING MORIBUND",
420 			    __func__, ep);
421 			__state_set(&ep->com, MORIBUND);
422 			disconnect = 0;
423 			break;
424 
425 		case MORIBUND:
426 			CTR2(KTR_IW_CXGBE, "%s:ppc8 %p MORIBUND DEAD", __func__,
427 			    ep);
428 			STOP_EP_TIMER(ep);
429 			if (ep->com.cm_id && ep->com.qp) {
430 				attrs.next_state = C4IW_QP_STATE_IDLE;
431 				c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
432 						C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
433 			}
434 			close_socket(ep->com.so);
435 			close_complete_upcall(ep, 0);
436 			__state_set(&ep->com, DEAD);
437 			release = 1;
438 			disconnect = 0;
439 			break;
440 
441 		case DEAD:
442 			CTR2(KTR_IW_CXGBE, "%s:ppc9 %p DEAD (disconn)",
443 			    __func__, ep);
444 			disconnect = 0;
445 			break;
446 
447 		default:
448 			panic("%s: ep %p state %d", __func__, ep,
449 			    ep->com.state);
450 			break;
451 	}
452 
453 	mutex_unlock(&ep->com.mutex);
454 
455 	if (disconnect) {
456 
457 		CTR2(KTR_IW_CXGBE, "%s:ppca %p", __func__, ep);
458 		c4iw_ep_disconnect(ep, 0, M_NOWAIT);
459 	}
460 	if (release) {
461 
462 		CTR2(KTR_IW_CXGBE, "%s:ppcb %p", __func__, ep);
463 		c4iw_put_ep(&ep->com);
464 	}
465 	CTR2(KTR_IW_CXGBE, "%s:ppcE %p", __func__, ep);
466 	return;
467 }
468 
469 static void
470 process_conn_error(struct c4iw_ep *ep)
471 {
472 	struct c4iw_qp_attributes attrs;
473 	int ret;
474 	int state;
475 
476 	mutex_lock(&ep->com.mutex);
477 	state = ep->com.state;
478 	CTR5(KTR_IW_CXGBE, "%s:pceB ep %p so %p so->so_error %u state %s",
479 	    __func__, ep, ep->com.so, ep->com.so->so_error,
480 	    states[ep->com.state]);
481 
482 	switch (state) {
483 
484 		case MPA_REQ_WAIT:
485 			STOP_EP_TIMER(ep);
486 			break;
487 
488 		case MPA_REQ_SENT:
489 			STOP_EP_TIMER(ep);
490 			connect_reply_upcall(ep, -ECONNRESET);
491 			break;
492 
493 		case MPA_REP_SENT:
494 			ep->com.rpl_err = ECONNRESET;
495 			CTR1(KTR_IW_CXGBE, "waking up ep %p", ep);
496 			break;
497 
498 		case MPA_REQ_RCVD:
499 
500 			/*
501 			 * We're gonna mark this puppy DEAD, but keep
502 			 * the reference on it until the ULP accepts or
503 			 * rejects the CR.
504 			 */
505 			c4iw_get_ep(&ep->com);
506 			break;
507 
508 		case MORIBUND:
509 		case CLOSING:
510 			STOP_EP_TIMER(ep);
511 			/*FALLTHROUGH*/
512 		case FPDU_MODE:
513 
514 			if (ep->com.cm_id && ep->com.qp) {
515 
516 				attrs.next_state = C4IW_QP_STATE_ERROR;
517 				ret = c4iw_modify_qp(ep->com.qp->rhp,
518 					ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
519 					&attrs, 1);
520 				if (ret)
521 					log(LOG_ERR,
522 							"%s - qp <- error failed!\n",
523 							__func__);
524 			}
525 			peer_abort_upcall(ep);
526 			break;
527 
528 		case ABORTING:
529 			break;
530 
531 		case DEAD:
532 			CTR2(KTR_IW_CXGBE, "%s so_error %d IN DEAD STATE!!!!",
533 			    __func__, ep->com.so->so_error);
534 			mutex_unlock(&ep->com.mutex);
535 			return;
536 
537 		default:
538 			panic("%s: ep %p state %d", __func__, ep, state);
539 			break;
540 	}
541 
542 	if (state != ABORTING) {
543 		close_socket(ep->com.so);
544 		__state_set(&ep->com, DEAD);
545 		c4iw_put_ep(&ep->com);
546 	}
547 	mutex_unlock(&ep->com.mutex);
548 	CTR2(KTR_IW_CXGBE, "%s:pceE %p", __func__, ep);
549 	return;
550 }
551 
552 static void
553 process_close_complete(struct c4iw_ep *ep)
554 {
555 	struct c4iw_qp_attributes attrs;
556 	int release = 0;
557 
558 	CTR4(KTR_IW_CXGBE, "%s:pccB ep %p so %p state %s", __func__, ep,
559 	    ep->com.so, states[ep->com.state]);
560 
561 	/* The cm_id may be null if we failed to connect */
562 	mutex_lock(&ep->com.mutex);
563 	set_bit(CLOSE_CON_RPL, &ep->com.history);
564 
565 	switch (ep->com.state) {
566 
567 		case CLOSING:
568 			CTR2(KTR_IW_CXGBE, "%s:pcc1 %p CLOSING MORIBUND",
569 			    __func__, ep);
570 			__state_set(&ep->com, MORIBUND);
571 			break;
572 
573 		case MORIBUND:
574 			CTR2(KTR_IW_CXGBE, "%s:pcc1 %p MORIBUND DEAD", __func__,
575 			    ep);
576 			STOP_EP_TIMER(ep);
577 
578 			if ((ep->com.cm_id) && (ep->com.qp)) {
579 
580 				CTR2(KTR_IW_CXGBE, "%s:pcc2 %p QP_STATE_IDLE",
581 				    __func__, ep);
582 				attrs.next_state = C4IW_QP_STATE_IDLE;
583 				c4iw_modify_qp(ep->com.dev,
584 						ep->com.qp,
585 						C4IW_QP_ATTR_NEXT_STATE,
586 						&attrs, 1);
587 			}
588 
589 			close_socket(ep->com.so);
590 			close_complete_upcall(ep, 0);
591 			__state_set(&ep->com, DEAD);
592 			release = 1;
593 			break;
594 
595 		case ABORTING:
596 			CTR2(KTR_IW_CXGBE, "%s:pcc5 %p ABORTING", __func__, ep);
597 			break;
598 
599 		case DEAD:
600 			CTR2(KTR_IW_CXGBE, "%s:pcc6 %p DEAD", __func__, ep);
601 			break;
602 		default:
603 			CTR2(KTR_IW_CXGBE, "%s:pcc7 %p unknown ep state",
604 					__func__, ep);
605 			panic("%s:pcc6 %p unknown ep state", __func__, ep);
606 			break;
607 	}
608 	mutex_unlock(&ep->com.mutex);
609 
610 	if (release) {
611 
612 		CTR2(KTR_IW_CXGBE, "%s:pcc8 %p", __func__, ep);
613 		c4iw_put_ep(&ep->com);
614 	}
615 	CTR2(KTR_IW_CXGBE, "%s:pccE %p", __func__, ep);
616 	return;
617 }
618 
619 static void
620 setiwsockopt(struct socket *so)
621 {
622 	int rc;
623 	struct sockopt sopt;
624 	int on = 1;
625 
626 	sopt.sopt_dir = SOPT_SET;
627 	sopt.sopt_level = IPPROTO_TCP;
628 	sopt.sopt_name = TCP_NODELAY;
629 	sopt.sopt_val = (caddr_t)&on;
630 	sopt.sopt_valsize = sizeof on;
631 	sopt.sopt_td = NULL;
632 	rc = sosetopt(so, &sopt);
633 	if (rc) {
634 		log(LOG_ERR, "%s: can't set TCP_NODELAY on so %p (%d)\n",
635 		    __func__, so, rc);
636 	}
637 }
638 
639 static void
640 init_iwarp_socket(struct socket *so, void *arg)
641 {
642 
643 	SOCKBUF_LOCK(&so->so_rcv);
644 	soupcall_set(so, SO_RCV, c4iw_so_upcall, arg);
645 	so->so_state |= SS_NBIO;
646 	SOCKBUF_UNLOCK(&so->so_rcv);
647 }
648 
649 static void
650 uninit_iwarp_socket(struct socket *so)
651 {
652 
653 	SOCKBUF_LOCK(&so->so_rcv);
654 	soupcall_clear(so, SO_RCV);
655 	SOCKBUF_UNLOCK(&so->so_rcv);
656 }
657 
658 static void
659 process_data(struct c4iw_ep *ep)
660 {
661 	struct sockaddr_in *local, *remote;
662 	int disconnect = 0;
663 
664 	CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s, sbused %d", __func__,
665 	    ep->com.so, ep, states[ep->com.state], sbused(&ep->com.so->so_rcv));
666 
667 	switch (state_read(&ep->com)) {
668 	case MPA_REQ_SENT:
669 		disconnect = process_mpa_reply(ep);
670 		break;
671 	case MPA_REQ_WAIT:
672 		in_getsockaddr(ep->com.so, (struct sockaddr **)&local);
673 		in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote);
674 		ep->com.local_addr = *local;
675 		ep->com.remote_addr = *remote;
676 		free(local, M_SONAME);
677 		free(remote, M_SONAME);
678 		disconnect = process_mpa_request(ep);
679 		break;
680 	default:
681 		if (sbused(&ep->com.so->so_rcv))
682 			log(LOG_ERR, "%s: Unexpected streaming data. ep %p, "
683 			    "state %d, so %p, so_state 0x%x, sbused %u\n",
684 			    __func__, ep, state_read(&ep->com), ep->com.so,
685 			    ep->com.so->so_state, sbused(&ep->com.so->so_rcv));
686 		break;
687 	}
688 	if (disconnect)
689 		c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL);
690 
691 }
692 
693 static void
694 process_connected(struct c4iw_ep *ep)
695 {
696 	struct socket *so = ep->com.so;
697 
698 	if ((so->so_state & SS_ISCONNECTED) && !so->so_error) {
699 		if (send_mpa_req(ep))
700 			goto err;
701 	} else {
702 		connect_reply_upcall(ep, -so->so_error);
703 		goto err;
704 	}
705 	return;
706 err:
707 	close_socket(so);
708 	state_set(&ep->com, DEAD);
709 	c4iw_put_ep(&ep->com);
710 	return;
711 }
712 
713 void
714 process_newconn(struct iw_cm_id *parent_cm_id, struct socket *child_so)
715 {
716 	struct c4iw_ep *child_ep;
717 	struct sockaddr_in *local;
718 	struct sockaddr_in *remote;
719 	struct c4iw_ep *parent_ep = parent_cm_id->provider_data;
720 	int ret = 0;
721 
722 	MPASS(child_so != NULL);
723 
724 	child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
725 
726 	CTR5(KTR_IW_CXGBE,
727 	    "%s: parent so %p, parent ep %p, child so %p, child ep %p",
728 	     __func__, parent_ep->com.so, parent_ep, child_so, child_ep);
729 
730 	in_getsockaddr(child_so, (struct sockaddr **)&local);
731 	in_getpeeraddr(child_so, (struct sockaddr **)&remote);
732 
733 	child_ep->com.local_addr = *local;
734 	child_ep->com.remote_addr = *remote;
735 	child_ep->com.dev = parent_ep->com.dev;
736 	child_ep->com.so = child_so;
737 	child_ep->com.cm_id = NULL;
738 	child_ep->com.thread = parent_ep->com.thread;
739 	child_ep->parent_ep = parent_ep;
740 
741 	free(local, M_SONAME);
742 	free(remote, M_SONAME);
743 
744 	setiwsockopt(child_so);
745 	init_iwarp_socket(child_so, &child_ep->com);
746 	c4iw_get_ep(&parent_ep->com);
747 	init_timer(&child_ep->timer);
748 	state_set(&child_ep->com, MPA_REQ_WAIT);
749 	START_EP_TIMER(child_ep);
750 
751 	/* maybe the request has already been queued up on the socket... */
752 	ret = process_mpa_request(child_ep);
753 	if (ret == 2)
754 		/* ABORT */
755 		c4iw_ep_disconnect(child_ep, 1, GFP_KERNEL);
756 	else if (ret == 1)
757 		/* CLOSE */
758 		c4iw_ep_disconnect(child_ep, 0, GFP_KERNEL);
759 
760 	return;
761 }
762 
763 static int
764 add_ep_to_req_list(struct c4iw_ep *ep, int new_ep_event)
765 {
766 	unsigned long flag;
767 
768 	spin_lock_irqsave(&req_lock, flag);
769 	if (ep && ep->com.so) {
770 		ep->com.ep_events |= new_ep_event;
771 		if (!ep->com.entry.tqe_prev) {
772 			c4iw_get_ep(&ep->com);
773 			TAILQ_INSERT_TAIL(&req_list, &ep->com, entry);
774 			queue_work(c4iw_taskq, &c4iw_task);
775 		}
776 	}
777 	spin_unlock_irqrestore(&req_lock, flag);
778 
779 	return (0);
780 }
781 
782 static int
783 c4iw_so_upcall(struct socket *so, void *arg, int waitflag)
784 {
785 	struct c4iw_ep *ep = arg;
786 
787 	CTR6(KTR_IW_CXGBE,
788 	    "%s: so %p, so_state 0x%x, ep %p, ep_state %s, tqe_prev %p",
789 	    __func__, so, so->so_state, ep, states[ep->com.state],
790 	    ep->com.entry.tqe_prev);
791 
792 	MPASS(ep->com.so == so);
793 	add_ep_to_req_list(ep, C4IW_EVENT_SOCKET);
794 
795 	return (SU_OK);
796 }
797 
798 
799 static int
800 terminate(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
801 {
802 	struct adapter *sc = iq->adapter;
803 	const struct cpl_rdma_terminate *cpl = mtod(m, const void *);
804 	unsigned int tid = GET_TID(cpl);
805 	struct toepcb *toep = lookup_tid(sc, tid);
806 	struct socket *so;
807 	struct c4iw_ep *ep;
808 
809 	INP_WLOCK(toep->inp);
810 	so = inp_inpcbtosocket(toep->inp);
811 	ep = so->so_rcv.sb_upcallarg;
812 	INP_WUNLOCK(toep->inp);
813 
814 	CTR3(KTR_IW_CXGBE, "%s: so %p, ep %p", __func__, so, ep);
815 	add_ep_to_req_list(ep, C4IW_EVENT_TERM);
816 
817 	return 0;
818 }
819 
820 static void
821 process_socket_event(struct c4iw_ep *ep)
822 {
823 	int state = state_read(&ep->com);
824 	struct socket *so = ep->com.so;
825 
826 	CTR6(KTR_IW_CXGBE, "process_socket_event: so %p, so_state 0x%x, "
827 	    "so_err %d, sb_state 0x%x, ep %p, ep_state %s", so, so->so_state,
828 	    so->so_error, so->so_rcv.sb_state, ep, states[state]);
829 
830 	if (state == CONNECTING) {
831 		process_connected(ep);
832 		return;
833 	}
834 
835 	if (state == LISTEN) {
836 		/* socket listening events are handled at IWCM */
837 		CTR3(KTR_IW_CXGBE, "%s Invalid ep state:%u, ep:%p", __func__,
838 			    ep->com.state, ep);
839 		BUG();
840 		return;
841 	}
842 
843 	/* connection error */
844 	if (so->so_error) {
845 		process_conn_error(ep);
846 		return;
847 	}
848 
849 	/* peer close */
850 	if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && state <= CLOSING) {
851 		process_peer_close(ep);
852 		/*
853 		 * check whether socket disconnect event is pending before
854 		 * returning. Fallthrough if yes.
855 		 */
856 		if (!(so->so_state & SS_ISDISCONNECTED))
857 			return;
858 	}
859 
860 	/* close complete */
861 	if (so->so_state & SS_ISDISCONNECTED) {
862 		process_close_complete(ep);
863 		return;
864 	}
865 
866 	/* rx data */
867 	process_data(ep);
868 }
869 
870 SYSCTL_NODE(_hw, OID_AUTO, iw_cxgbe, CTLFLAG_RD, 0, "iw_cxgbe driver parameters");
871 
872 static int dack_mode = 0;
873 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, dack_mode, CTLFLAG_RWTUN, &dack_mode, 0,
874 		"Delayed ack mode (default = 0)");
875 
876 int c4iw_max_read_depth = 8;
877 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_max_read_depth, CTLFLAG_RWTUN, &c4iw_max_read_depth, 0,
878 		"Per-connection max ORD/IRD (default = 8)");
879 
880 static int enable_tcp_timestamps;
881 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_timestamps, CTLFLAG_RWTUN, &enable_tcp_timestamps, 0,
882 		"Enable tcp timestamps (default = 0)");
883 
884 static int enable_tcp_sack;
885 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_sack, CTLFLAG_RWTUN, &enable_tcp_sack, 0,
886 		"Enable tcp SACK (default = 0)");
887 
888 static int enable_tcp_window_scaling = 1;
889 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_window_scaling, CTLFLAG_RWTUN, &enable_tcp_window_scaling, 0,
890 		"Enable tcp window scaling (default = 1)");
891 
892 int c4iw_debug = 0;
893 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_debug, CTLFLAG_RWTUN, &c4iw_debug, 0,
894 		"Enable debug logging (default = 0)");
895 
896 static int peer2peer = 1;
897 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, peer2peer, CTLFLAG_RWTUN, &peer2peer, 0,
898 		"Support peer2peer ULPs (default = 1)");
899 
900 static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ;
901 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, p2p_type, CTLFLAG_RWTUN, &p2p_type, 0,
902 		"RDMAP opcode to use for the RTR message: 1 = RDMA_READ 0 = RDMA_WRITE (default 1)");
903 
904 static int ep_timeout_secs = 60;
905 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, ep_timeout_secs, CTLFLAG_RWTUN, &ep_timeout_secs, 0,
906 		"CM Endpoint operation timeout in seconds (default = 60)");
907 
908 static int mpa_rev = 1;
909 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, mpa_rev, CTLFLAG_RWTUN, &mpa_rev, 0,
910 		"MPA Revision, 0 supports amso1100, 1 is RFC5044 spec compliant, 2 is IETF MPA Peer Connect Draft compliant (default = 1)");
911 
912 static int markers_enabled;
913 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, markers_enabled, CTLFLAG_RWTUN, &markers_enabled, 0,
914 		"Enable MPA MARKERS (default(0) = disabled)");
915 
916 static int crc_enabled = 1;
917 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, crc_enabled, CTLFLAG_RWTUN, &crc_enabled, 0,
918 		"Enable MPA CRC (default(1) = enabled)");
919 
920 static int rcv_win = 256 * 1024;
921 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, rcv_win, CTLFLAG_RWTUN, &rcv_win, 0,
922 		"TCP receive window in bytes (default = 256KB)");
923 
924 static int snd_win = 128 * 1024;
925 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, snd_win, CTLFLAG_RWTUN, &snd_win, 0,
926 		"TCP send window in bytes (default = 128KB)");
927 
928 static void
929 start_ep_timer(struct c4iw_ep *ep)
930 {
931 
932 	if (timer_pending(&ep->timer)) {
933 		CTR2(KTR_IW_CXGBE, "%s: ep %p, already started", __func__, ep);
934 		printk(KERN_ERR "%s timer already started! ep %p\n", __func__,
935 		    ep);
936 		return;
937 	}
938 	clear_bit(TIMEOUT, &ep->com.flags);
939 	c4iw_get_ep(&ep->com);
940 	ep->timer.expires = jiffies + ep_timeout_secs * HZ;
941 	ep->timer.data = (unsigned long)ep;
942 	ep->timer.function = ep_timeout;
943 	add_timer(&ep->timer);
944 }
945 
946 static int
947 stop_ep_timer(struct c4iw_ep *ep)
948 {
949 
950 	del_timer_sync(&ep->timer);
951 	if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
952 		c4iw_put_ep(&ep->com);
953 		return 0;
954 	}
955 	return 1;
956 }
957 
958 static enum
959 c4iw_ep_state state_read(struct c4iw_ep_common *epc)
960 {
961 	enum c4iw_ep_state state;
962 
963 	mutex_lock(&epc->mutex);
964 	state = epc->state;
965 	mutex_unlock(&epc->mutex);
966 
967 	return (state);
968 }
969 
970 static void
971 __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
972 {
973 
974 	epc->state = new;
975 }
976 
977 static void
978 state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
979 {
980 
981 	mutex_lock(&epc->mutex);
982 	__state_set(epc, new);
983 	mutex_unlock(&epc->mutex);
984 }
985 
986 static void *
987 alloc_ep(int size, gfp_t gfp)
988 {
989 	struct c4iw_ep_common *epc;
990 
991 	epc = kzalloc(size, gfp);
992 	if (epc == NULL)
993 		return (NULL);
994 
995 	kref_init(&epc->kref);
996 	mutex_init(&epc->mutex);
997 	c4iw_init_wr_wait(&epc->wr_wait);
998 
999 	return (epc);
1000 }
1001 
1002 void _c4iw_free_ep(struct kref *kref)
1003 {
1004 	struct c4iw_ep *ep;
1005 	struct c4iw_ep_common *epc;
1006 
1007 	ep = container_of(kref, struct c4iw_ep, com.kref);
1008 	epc = &ep->com;
1009 	KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list",
1010 	    __func__, epc));
1011 	if (test_bit(QP_REFERENCED, &ep->com.flags))
1012 		deref_qp(ep);
1013 	CTR4(KTR_IW_CXGBE, "%s: ep %p, history 0x%lx, flags 0x%lx",
1014 	    __func__, ep, epc->history, epc->flags);
1015 	kfree(ep);
1016 }
1017 
1018 static void release_ep_resources(struct c4iw_ep *ep)
1019 {
1020 	CTR2(KTR_IW_CXGBE, "%s:rerB %p", __func__, ep);
1021 	set_bit(RELEASE_RESOURCES, &ep->com.flags);
1022 	c4iw_put_ep(&ep->com);
1023 	CTR2(KTR_IW_CXGBE, "%s:rerE %p", __func__, ep);
1024 }
1025 
1026 static int
1027 send_mpa_req(struct c4iw_ep *ep)
1028 {
1029 	int mpalen;
1030 	struct mpa_message *mpa;
1031 	struct mpa_v2_conn_params mpa_v2_params;
1032 	struct mbuf *m;
1033 	char mpa_rev_to_use = mpa_rev;
1034 	int err = 0;
1035 
1036 	if (ep->retry_with_mpa_v1)
1037 		mpa_rev_to_use = 1;
1038 	mpalen = sizeof(*mpa) + ep->plen;
1039 	if (mpa_rev_to_use == 2)
1040 		mpalen += sizeof(struct mpa_v2_conn_params);
1041 
1042 	mpa = malloc(mpalen, M_CXGBE, M_NOWAIT);
1043 	if (mpa == NULL) {
1044 		err = -ENOMEM;
1045 		CTR3(KTR_IW_CXGBE, "%s:smr1 ep: %p , error: %d",
1046 				__func__, ep, err);
1047 		goto err;
1048 	}
1049 
1050 	memset(mpa, 0, mpalen);
1051 	memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
1052 	mpa->flags = (crc_enabled ? MPA_CRC : 0) |
1053 		(markers_enabled ? MPA_MARKERS : 0) |
1054 		(mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0);
1055 	mpa->private_data_size = htons(ep->plen);
1056 	mpa->revision = mpa_rev_to_use;
1057 
1058 	if (mpa_rev_to_use == 1) {
1059 		ep->tried_with_mpa_v1 = 1;
1060 		ep->retry_with_mpa_v1 = 0;
1061 	}
1062 
1063 	if (mpa_rev_to_use == 2) {
1064 		mpa->private_data_size +=
1065 			htons(sizeof(struct mpa_v2_conn_params));
1066 		mpa_v2_params.ird = htons((u16)ep->ird);
1067 		mpa_v2_params.ord = htons((u16)ep->ord);
1068 
1069 		if (peer2peer) {
1070 			mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
1071 
1072 			if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) {
1073 				mpa_v2_params.ord |=
1074 				    htons(MPA_V2_RDMA_WRITE_RTR);
1075 			} else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) {
1076 				mpa_v2_params.ord |=
1077 					htons(MPA_V2_RDMA_READ_RTR);
1078 			}
1079 		}
1080 		memcpy(mpa->private_data, &mpa_v2_params,
1081 			sizeof(struct mpa_v2_conn_params));
1082 
1083 		if (ep->plen) {
1084 
1085 			memcpy(mpa->private_data +
1086 				sizeof(struct mpa_v2_conn_params),
1087 				ep->mpa_pkt + sizeof(*mpa), ep->plen);
1088 		}
1089 	} else {
1090 
1091 		if (ep->plen)
1092 			memcpy(mpa->private_data,
1093 					ep->mpa_pkt + sizeof(*mpa), ep->plen);
1094 		CTR2(KTR_IW_CXGBE, "%s:smr7 %p", __func__, ep);
1095 	}
1096 
1097 	m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA);
1098 	if (m == NULL) {
1099 		err = -ENOMEM;
1100 		CTR3(KTR_IW_CXGBE, "%s:smr2 ep: %p , error: %d",
1101 				__func__, ep, err);
1102 		free(mpa, M_CXGBE);
1103 		goto err;
1104 	}
1105 	m_copyback(m, 0, mpalen, (void *)mpa);
1106 	free(mpa, M_CXGBE);
1107 
1108 	err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT,
1109 			ep->com.thread);
1110 	if (err) {
1111 		CTR3(KTR_IW_CXGBE, "%s:smr3 ep: %p , error: %d",
1112 				__func__, ep, err);
1113 		goto err;
1114 	}
1115 
1116 	START_EP_TIMER(ep);
1117 	state_set(&ep->com, MPA_REQ_SENT);
1118 	ep->mpa_attr.initiator = 1;
1119 	CTR3(KTR_IW_CXGBE, "%s:smrE %p, error: %d", __func__, ep, err);
1120 	return 0;
1121 err:
1122 	connect_reply_upcall(ep, err);
1123 	CTR3(KTR_IW_CXGBE, "%s:smrE %p, error: %d", __func__, ep, err);
1124 	return err;
1125 }
1126 
1127 static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
1128 {
1129 	int mpalen ;
1130 	struct mpa_message *mpa;
1131 	struct mpa_v2_conn_params mpa_v2_params;
1132 	struct mbuf *m;
1133 	int err;
1134 
1135 	CTR4(KTR_IW_CXGBE, "%s:smrejB %p %u %d", __func__, ep, ep->hwtid,
1136 	    ep->plen);
1137 
1138 	mpalen = sizeof(*mpa) + plen;
1139 
1140 	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1141 
1142 		mpalen += sizeof(struct mpa_v2_conn_params);
1143 		CTR4(KTR_IW_CXGBE, "%s:smrej1 %p %u %d", __func__, ep,
1144 		    ep->mpa_attr.version, mpalen);
1145 	}
1146 
1147 	mpa = malloc(mpalen, M_CXGBE, M_NOWAIT);
1148 	if (mpa == NULL)
1149 		return (-ENOMEM);
1150 
1151 	memset(mpa, 0, mpalen);
1152 	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
1153 	mpa->flags = MPA_REJECT;
1154 	mpa->revision = mpa_rev;
1155 	mpa->private_data_size = htons(plen);
1156 
1157 	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1158 
1159 		mpa->flags |= MPA_ENHANCED_RDMA_CONN;
1160 		mpa->private_data_size +=
1161 			htons(sizeof(struct mpa_v2_conn_params));
1162 		mpa_v2_params.ird = htons(((u16)ep->ird) |
1163 				(peer2peer ? MPA_V2_PEER2PEER_MODEL :
1164 				 0));
1165 		mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ?
1166 					(p2p_type ==
1167 					 FW_RI_INIT_P2PTYPE_RDMA_WRITE ?
1168 					 MPA_V2_RDMA_WRITE_RTR : p2p_type ==
1169 					 FW_RI_INIT_P2PTYPE_READ_REQ ?
1170 					 MPA_V2_RDMA_READ_RTR : 0) : 0));
1171 		memcpy(mpa->private_data, &mpa_v2_params,
1172 				sizeof(struct mpa_v2_conn_params));
1173 
1174 		if (ep->plen)
1175 			memcpy(mpa->private_data +
1176 					sizeof(struct mpa_v2_conn_params), pdata, plen);
1177 		CTR5(KTR_IW_CXGBE, "%s:smrej3 %p %d %d %d", __func__, ep,
1178 		    mpa_v2_params.ird, mpa_v2_params.ord, ep->plen);
1179 	} else
1180 		if (plen)
1181 			memcpy(mpa->private_data, pdata, plen);
1182 
1183 	m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA);
1184 	if (m == NULL) {
1185 		free(mpa, M_CXGBE);
1186 		return (-ENOMEM);
1187 	}
1188 	m_copyback(m, 0, mpalen, (void *)mpa);
1189 	free(mpa, M_CXGBE);
1190 
1191 	err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread);
1192 	if (!err)
1193 		ep->snd_seq += mpalen;
1194 	CTR4(KTR_IW_CXGBE, "%s:smrejE %p %u %d", __func__, ep, ep->hwtid, err);
1195 	return err;
1196 }
1197 
1198 static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
1199 {
1200 	int mpalen;
1201 	struct mpa_message *mpa;
1202 	struct mbuf *m;
1203 	struct mpa_v2_conn_params mpa_v2_params;
1204 	int err;
1205 
1206 	CTR2(KTR_IW_CXGBE, "%s:smrepB %p", __func__, ep);
1207 
1208 	mpalen = sizeof(*mpa) + plen;
1209 
1210 	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1211 
1212 		CTR3(KTR_IW_CXGBE, "%s:smrep1 %p %d", __func__, ep,
1213 		    ep->mpa_attr.version);
1214 		mpalen += sizeof(struct mpa_v2_conn_params);
1215 	}
1216 
1217 	mpa = malloc(mpalen, M_CXGBE, M_NOWAIT);
1218 	if (mpa == NULL)
1219 		return (-ENOMEM);
1220 
1221 	memset(mpa, 0, sizeof(*mpa));
1222 	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
1223 	mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
1224 		(markers_enabled ? MPA_MARKERS : 0);
1225 	mpa->revision = ep->mpa_attr.version;
1226 	mpa->private_data_size = htons(plen);
1227 
1228 	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1229 
1230 		mpa->flags |= MPA_ENHANCED_RDMA_CONN;
1231 		mpa->private_data_size +=
1232 			htons(sizeof(struct mpa_v2_conn_params));
1233 		mpa_v2_params.ird = htons((u16)ep->ird);
1234 		mpa_v2_params.ord = htons((u16)ep->ord);
1235 		CTR5(KTR_IW_CXGBE, "%s:smrep3 %p %d %d %d", __func__, ep,
1236 		    ep->mpa_attr.version, mpa_v2_params.ird, mpa_v2_params.ord);
1237 
1238 		if (peer2peer && (ep->mpa_attr.p2p_type !=
1239 			FW_RI_INIT_P2PTYPE_DISABLED)) {
1240 
1241 			mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
1242 
1243 			if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) {
1244 
1245 				mpa_v2_params.ord |=
1246 					htons(MPA_V2_RDMA_WRITE_RTR);
1247 				CTR5(KTR_IW_CXGBE, "%s:smrep4 %p %d %d %d",
1248 				    __func__, ep, p2p_type, mpa_v2_params.ird,
1249 				    mpa_v2_params.ord);
1250 			}
1251 			else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) {
1252 
1253 				mpa_v2_params.ord |=
1254 					htons(MPA_V2_RDMA_READ_RTR);
1255 				CTR5(KTR_IW_CXGBE, "%s:smrep5 %p %d %d %d",
1256 				    __func__, ep, p2p_type, mpa_v2_params.ird,
1257 				    mpa_v2_params.ord);
1258 			}
1259 		}
1260 
1261 		memcpy(mpa->private_data, &mpa_v2_params,
1262 			sizeof(struct mpa_v2_conn_params));
1263 
1264 		if (ep->plen)
1265 			memcpy(mpa->private_data +
1266 				sizeof(struct mpa_v2_conn_params), pdata, plen);
1267 	} else
1268 		if (plen)
1269 			memcpy(mpa->private_data, pdata, plen);
1270 
1271 	m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA);
1272 	if (m == NULL) {
1273 		free(mpa, M_CXGBE);
1274 		return (-ENOMEM);
1275 	}
1276 	m_copyback(m, 0, mpalen, (void *)mpa);
1277 	free(mpa, M_CXGBE);
1278 
1279 
1280 	state_set(&ep->com, MPA_REP_SENT);
1281 	ep->snd_seq += mpalen;
1282 	err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT,
1283 			ep->com.thread);
1284 	CTR3(KTR_IW_CXGBE, "%s:smrepE %p %d", __func__, ep, err);
1285 	return err;
1286 }
1287 
1288 
1289 
1290 static void close_complete_upcall(struct c4iw_ep *ep, int status)
1291 {
1292 	struct iw_cm_event event;
1293 
1294 	CTR2(KTR_IW_CXGBE, "%s:ccuB %p", __func__, ep);
1295 	memset(&event, 0, sizeof(event));
1296 	event.event = IW_CM_EVENT_CLOSE;
1297 	event.status = status;
1298 
1299 	if (ep->com.cm_id) {
1300 
1301 		CTR2(KTR_IW_CXGBE, "%s:ccu1 %1", __func__, ep);
1302 		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1303 		deref_cm_id(&ep->com);
1304 		set_bit(CLOSE_UPCALL, &ep->com.history);
1305 	}
1306 	CTR2(KTR_IW_CXGBE, "%s:ccuE %p", __func__, ep);
1307 }
1308 
1309 static int
1310 send_abort(struct c4iw_ep *ep)
1311 {
1312 	struct socket *so = ep->com.so;
1313 	struct sockopt sopt;
1314 	int rc;
1315 	struct linger l;
1316 
1317 	CTR5(KTR_IW_CXGBE, "%s ep %p so %p state %s tid %d", __func__, ep, so,
1318 	    states[ep->com.state], ep->hwtid);
1319 
1320 	l.l_onoff = 1;
1321 	l.l_linger = 0;
1322 
1323 	/* linger_time of 0 forces RST to be sent */
1324 	sopt.sopt_dir = SOPT_SET;
1325 	sopt.sopt_level = SOL_SOCKET;
1326 	sopt.sopt_name = SO_LINGER;
1327 	sopt.sopt_val = (caddr_t)&l;
1328 	sopt.sopt_valsize = sizeof l;
1329 	sopt.sopt_td = NULL;
1330 	rc = sosetopt(so, &sopt);
1331 	if (rc != 0) {
1332 		log(LOG_ERR, "%s: sosetopt(%p, linger = 0) failed with %d.\n",
1333 		    __func__, so, rc);
1334 	}
1335 
1336 	uninit_iwarp_socket(so);
1337 	sodisconnect(so);
1338 	set_bit(ABORT_CONN, &ep->com.history);
1339 
1340 	/*
1341 	 * TBD: iw_cxgbe driver should receive ABORT reply for every ABORT
1342 	 * request it has sent. But the current TOE driver is not propagating
1343 	 * this ABORT reply event (via do_abort_rpl) to iw_cxgbe. So as a work-
1344 	 * around de-refer 'ep' (which was refered before sending ABORT request)
1345 	 * here instead of doing it in abort_rpl() handler of iw_cxgbe driver.
1346 	 */
1347 	c4iw_put_ep(&ep->com);
1348 
1349 	return (0);
1350 }
1351 
1352 static void peer_close_upcall(struct c4iw_ep *ep)
1353 {
1354 	struct iw_cm_event event;
1355 
1356 	CTR2(KTR_IW_CXGBE, "%s:pcuB %p", __func__, ep);
1357 	memset(&event, 0, sizeof(event));
1358 	event.event = IW_CM_EVENT_DISCONNECT;
1359 
1360 	if (ep->com.cm_id) {
1361 
1362 		CTR2(KTR_IW_CXGBE, "%s:pcu1 %p", __func__, ep);
1363 		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1364 		set_bit(DISCONN_UPCALL, &ep->com.history);
1365 	}
1366 	CTR2(KTR_IW_CXGBE, "%s:pcuE %p", __func__, ep);
1367 }
1368 
1369 static void peer_abort_upcall(struct c4iw_ep *ep)
1370 {
1371 	struct iw_cm_event event;
1372 
1373 	CTR2(KTR_IW_CXGBE, "%s:pauB %p", __func__, ep);
1374 	memset(&event, 0, sizeof(event));
1375 	event.event = IW_CM_EVENT_CLOSE;
1376 	event.status = -ECONNRESET;
1377 
1378 	if (ep->com.cm_id) {
1379 
1380 		CTR2(KTR_IW_CXGBE, "%s:pau1 %p", __func__, ep);
1381 		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1382 		deref_cm_id(&ep->com);
1383 		set_bit(ABORT_UPCALL, &ep->com.history);
1384 	}
1385 	CTR2(KTR_IW_CXGBE, "%s:pauE %p", __func__, ep);
1386 }
1387 
1388 static void connect_reply_upcall(struct c4iw_ep *ep, int status)
1389 {
1390 	struct iw_cm_event event;
1391 
1392 	CTR3(KTR_IW_CXGBE, "%s:cruB %p, status: %d", __func__, ep, status);
1393 	memset(&event, 0, sizeof(event));
1394 	event.event = IW_CM_EVENT_CONNECT_REPLY;
1395 	event.status = ((status == -ECONNABORTED) || (status == -EPIPE)) ?
1396 					-ECONNRESET : status;
1397 	event.local_addr = ep->com.local_addr;
1398 	event.remote_addr = ep->com.remote_addr;
1399 
1400 	if ((status == 0) || (status == -ECONNREFUSED)) {
1401 
1402 		if (!ep->tried_with_mpa_v1) {
1403 
1404 			CTR2(KTR_IW_CXGBE, "%s:cru1 %p", __func__, ep);
1405 			/* this means MPA_v2 is used */
1406 			event.private_data_len = ep->plen -
1407 				sizeof(struct mpa_v2_conn_params);
1408 			event.private_data = ep->mpa_pkt +
1409 				sizeof(struct mpa_message) +
1410 				sizeof(struct mpa_v2_conn_params);
1411 		} else {
1412 
1413 			CTR2(KTR_IW_CXGBE, "%s:cru2 %p", __func__, ep);
1414 			/* this means MPA_v1 is used */
1415 			event.private_data_len = ep->plen;
1416 			event.private_data = ep->mpa_pkt +
1417 				sizeof(struct mpa_message);
1418 		}
1419 	}
1420 
1421 	if (ep->com.cm_id) {
1422 
1423 		CTR2(KTR_IW_CXGBE, "%s:cru3 %p", __func__, ep);
1424 		set_bit(CONN_RPL_UPCALL, &ep->com.history);
1425 		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1426 	}
1427 
1428 	if(status == -ECONNABORTED) {
1429 
1430 		CTR3(KTR_IW_CXGBE, "%s:cruE %p %d", __func__, ep, status);
1431 		return;
1432 	}
1433 
1434 	if (status < 0) {
1435 
1436 		CTR3(KTR_IW_CXGBE, "%s:cru4 %p %d", __func__, ep, status);
1437 		deref_cm_id(&ep->com);
1438 	}
1439 
1440 	CTR2(KTR_IW_CXGBE, "%s:cruE %p", __func__, ep);
1441 }
1442 
1443 static int connect_request_upcall(struct c4iw_ep *ep)
1444 {
1445 	struct iw_cm_event event;
1446 	int ret;
1447 
1448 	CTR3(KTR_IW_CXGBE, "%s: ep %p, mpa_v1 %d", __func__, ep,
1449 	    ep->tried_with_mpa_v1);
1450 
1451 	memset(&event, 0, sizeof(event));
1452 	event.event = IW_CM_EVENT_CONNECT_REQUEST;
1453 	event.local_addr = ep->com.local_addr;
1454 	event.remote_addr = ep->com.remote_addr;
1455 	event.provider_data = ep;
1456 	event.so = ep->com.so;
1457 
1458 	if (!ep->tried_with_mpa_v1) {
1459 		/* this means MPA_v2 is used */
1460 		event.ord = ep->ord;
1461 		event.ird = ep->ird;
1462 		event.private_data_len = ep->plen -
1463 			sizeof(struct mpa_v2_conn_params);
1464 		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) +
1465 			sizeof(struct mpa_v2_conn_params);
1466 	} else {
1467 
1468 		/* this means MPA_v1 is used. Send max supported */
1469 		event.ord = c4iw_max_read_depth;
1470 		event.ird = c4iw_max_read_depth;
1471 		event.private_data_len = ep->plen;
1472 		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
1473 	}
1474 
1475 	c4iw_get_ep(&ep->com);
1476 	ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id,
1477 	    &event);
1478 	if(ret)
1479 		c4iw_put_ep(&ep->com);
1480 
1481 	set_bit(CONNREQ_UPCALL, &ep->com.history);
1482 	c4iw_put_ep(&ep->parent_ep->com);
1483 	return ret;
1484 }
1485 
1486 static void established_upcall(struct c4iw_ep *ep)
1487 {
1488 	struct iw_cm_event event;
1489 
1490 	CTR2(KTR_IW_CXGBE, "%s:euB %p", __func__, ep);
1491 	memset(&event, 0, sizeof(event));
1492 	event.event = IW_CM_EVENT_ESTABLISHED;
1493 	event.ird = ep->ird;
1494 	event.ord = ep->ord;
1495 
1496 	if (ep->com.cm_id) {
1497 
1498 		CTR2(KTR_IW_CXGBE, "%s:eu1 %p", __func__, ep);
1499 		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1500 		set_bit(ESTAB_UPCALL, &ep->com.history);
1501 	}
1502 	CTR2(KTR_IW_CXGBE, "%s:euE %p", __func__, ep);
1503 }
1504 
1505 
1506 /*
1507  * process_mpa_reply - process streaming mode MPA reply
1508  *
1509  * Returns:
1510  *
1511  * 0 upon success indicating a connect request was delivered to the ULP
1512  * or the mpa request is incomplete but valid so far.
1513  *
1514  * 1 if a failure requires the caller to close the connection.
1515  *
1516  * 2 if a failure requires the caller to abort the connection.
1517  */
1518 static int process_mpa_reply(struct c4iw_ep *ep)
1519 {
1520 	struct mpa_message *mpa;
1521 	struct mpa_v2_conn_params *mpa_v2_params;
1522 	u16 plen;
1523 	u16 resp_ird, resp_ord;
1524 	u8 rtr_mismatch = 0, insuff_ird = 0;
1525 	struct c4iw_qp_attributes attrs;
1526 	enum c4iw_qp_attr_mask mask;
1527 	int err;
1528 	struct mbuf *top, *m;
1529 	int flags = MSG_DONTWAIT;
1530 	struct uio uio;
1531 	int disconnect = 0;
1532 
1533 	CTR2(KTR_IW_CXGBE, "%s:pmrB %p", __func__, ep);
1534 
1535 	/*
1536 	 * Stop mpa timer.  If it expired, then
1537 	 * we ignore the MPA reply.  process_timeout()
1538 	 * will abort the connection.
1539 	 */
1540 	if (STOP_EP_TIMER(ep))
1541 		return 0;
1542 
1543 	uio.uio_resid = 1000000;
1544 	uio.uio_td = ep->com.thread;
1545 	err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags);
1546 
1547 	if (err) {
1548 
1549 		if (err == EWOULDBLOCK) {
1550 
1551 			CTR2(KTR_IW_CXGBE, "%s:pmr1 %p", __func__, ep);
1552 			START_EP_TIMER(ep);
1553 			return 0;
1554 		}
1555 		err = -err;
1556 		CTR2(KTR_IW_CXGBE, "%s:pmr2 %p", __func__, ep);
1557 		goto err;
1558 	}
1559 
1560 	if (ep->com.so->so_rcv.sb_mb) {
1561 
1562 		CTR2(KTR_IW_CXGBE, "%s:pmr3 %p", __func__, ep);
1563 		printf("%s data after soreceive called! so %p sb_mb %p top %p\n",
1564 		       __func__, ep->com.so, ep->com.so->so_rcv.sb_mb, top);
1565 	}
1566 
1567 	m = top;
1568 
1569 	do {
1570 
1571 		CTR2(KTR_IW_CXGBE, "%s:pmr4 %p", __func__, ep);
1572 		/*
1573 		 * If we get more than the supported amount of private data
1574 		 * then we must fail this connection.
1575 		 */
1576 		if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) {
1577 
1578 			CTR3(KTR_IW_CXGBE, "%s:pmr5 %p %d", __func__, ep,
1579 			    ep->mpa_pkt_len + m->m_len);
1580 			err = (-EINVAL);
1581 			goto err_stop_timer;
1582 		}
1583 
1584 		/*
1585 		 * copy the new data into our accumulation buffer.
1586 		 */
1587 		m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len]));
1588 		ep->mpa_pkt_len += m->m_len;
1589 		if (!m->m_next)
1590 			m = m->m_nextpkt;
1591 		else
1592 			m = m->m_next;
1593 	} while (m);
1594 
1595 	m_freem(top);
1596 	/*
1597 	 * if we don't even have the mpa message, then bail.
1598 	 */
1599 	if (ep->mpa_pkt_len < sizeof(*mpa)) {
1600 		return 0;
1601 	}
1602 	mpa = (struct mpa_message *) ep->mpa_pkt;
1603 
1604 	/* Validate MPA header. */
1605 	if (mpa->revision > mpa_rev) {
1606 
1607 		CTR4(KTR_IW_CXGBE, "%s:pmr6 %p %d %d", __func__, ep,
1608 		    mpa->revision, mpa_rev);
1609 		printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d, "
1610 				" Received = %d\n", __func__, mpa_rev, mpa->revision);
1611 		err = -EPROTO;
1612 		goto err_stop_timer;
1613 	}
1614 
1615 	if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
1616 
1617 		CTR2(KTR_IW_CXGBE, "%s:pmr7 %p", __func__, ep);
1618 		err = -EPROTO;
1619 		goto err_stop_timer;
1620 	}
1621 
1622 	plen = ntohs(mpa->private_data_size);
1623 
1624 	/*
1625 	 * Fail if there's too much private data.
1626 	 */
1627 	if (plen > MPA_MAX_PRIVATE_DATA) {
1628 
1629 		CTR2(KTR_IW_CXGBE, "%s:pmr8 %p", __func__, ep);
1630 		err = -EPROTO;
1631 		goto err_stop_timer;
1632 	}
1633 
1634 	/*
1635 	 * If plen does not account for pkt size
1636 	 */
1637 	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
1638 
1639 		CTR2(KTR_IW_CXGBE, "%s:pmr9 %p", __func__, ep);
1640 		STOP_EP_TIMER(ep);
1641 		err = -EPROTO;
1642 		goto err_stop_timer;
1643 	}
1644 
1645 	ep->plen = (u8) plen;
1646 
1647 	/*
1648 	 * If we don't have all the pdata yet, then bail.
1649 	 * We'll continue process when more data arrives.
1650 	 */
1651 	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) {
1652 
1653 		CTR2(KTR_IW_CXGBE, "%s:pmra %p", __func__, ep);
1654 		return 0;
1655 	}
1656 
1657 	if (mpa->flags & MPA_REJECT) {
1658 
1659 		CTR2(KTR_IW_CXGBE, "%s:pmrb %p", __func__, ep);
1660 		err = -ECONNREFUSED;
1661 		goto err_stop_timer;
1662 	}
1663 
1664 	/*
1665 	 * If we get here we have accumulated the entire mpa
1666 	 * start reply message including private data. And
1667 	 * the MPA header is valid.
1668 	 */
1669 	state_set(&ep->com, FPDU_MODE);
1670 	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1671 	ep->mpa_attr.recv_marker_enabled = markers_enabled;
1672 	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1673 	ep->mpa_attr.version = mpa->revision;
1674 	ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1675 
1676 	if (mpa->revision == 2) {
1677 
1678 		CTR2(KTR_IW_CXGBE, "%s:pmrc %p", __func__, ep);
1679 		ep->mpa_attr.enhanced_rdma_conn =
1680 			mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1681 
1682 		if (ep->mpa_attr.enhanced_rdma_conn) {
1683 
1684 			CTR2(KTR_IW_CXGBE, "%s:pmrd %p", __func__, ep);
1685 			mpa_v2_params = (struct mpa_v2_conn_params *)
1686 				(ep->mpa_pkt + sizeof(*mpa));
1687 			resp_ird = ntohs(mpa_v2_params->ird) &
1688 				MPA_V2_IRD_ORD_MASK;
1689 			resp_ord = ntohs(mpa_v2_params->ord) &
1690 				MPA_V2_IRD_ORD_MASK;
1691 
1692 			/*
1693 			 * This is a double-check. Ideally, below checks are
1694 			 * not required since ird/ord stuff has been taken
1695 			 * care of in c4iw_accept_cr
1696 			 */
1697 			if ((ep->ird < resp_ord) || (ep->ord > resp_ird)) {
1698 
1699 				CTR2(KTR_IW_CXGBE, "%s:pmre %p", __func__, ep);
1700 				err = -ENOMEM;
1701 				ep->ird = resp_ord;
1702 				ep->ord = resp_ird;
1703 				insuff_ird = 1;
1704 			}
1705 
1706 			if (ntohs(mpa_v2_params->ird) &
1707 				MPA_V2_PEER2PEER_MODEL) {
1708 
1709 				CTR2(KTR_IW_CXGBE, "%s:pmrf %p", __func__, ep);
1710 				if (ntohs(mpa_v2_params->ord) &
1711 					MPA_V2_RDMA_WRITE_RTR) {
1712 
1713 					CTR2(KTR_IW_CXGBE, "%s:pmrg %p", __func__, ep);
1714 					ep->mpa_attr.p2p_type =
1715 						FW_RI_INIT_P2PTYPE_RDMA_WRITE;
1716 				}
1717 				else if (ntohs(mpa_v2_params->ord) &
1718 					MPA_V2_RDMA_READ_RTR) {
1719 
1720 					CTR2(KTR_IW_CXGBE, "%s:pmrh %p", __func__, ep);
1721 					ep->mpa_attr.p2p_type =
1722 						FW_RI_INIT_P2PTYPE_READ_REQ;
1723 				}
1724 			}
1725 		}
1726 	} else {
1727 
1728 		CTR2(KTR_IW_CXGBE, "%s:pmri %p", __func__, ep);
1729 
1730 		if (mpa->revision == 1) {
1731 
1732 			CTR2(KTR_IW_CXGBE, "%s:pmrj %p", __func__, ep);
1733 
1734 			if (peer2peer) {
1735 
1736 				CTR2(KTR_IW_CXGBE, "%s:pmrk %p", __func__, ep);
1737 				ep->mpa_attr.p2p_type = p2p_type;
1738 			}
1739 		}
1740 	}
1741 
1742 	if (set_tcpinfo(ep)) {
1743 
1744 		CTR2(KTR_IW_CXGBE, "%s:pmrl %p", __func__, ep);
1745 		printf("%s set_tcpinfo error\n", __func__);
1746 		err = -ECONNRESET;
1747 		goto err;
1748 	}
1749 
1750 	CTR6(KTR_IW_CXGBE, "%s - crc_enabled = %d, recv_marker_enabled = %d, "
1751 	    "xmit_marker_enabled = %d, version = %d p2p_type = %d", __func__,
1752 	    ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
1753 	    ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
1754 	    ep->mpa_attr.p2p_type);
1755 
1756 	/*
1757 	 * If responder's RTR does not match with that of initiator, assign
1758 	 * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not
1759 	 * generated when moving QP to RTS state.
1760 	 * A TERM message will be sent after QP has moved to RTS state
1761 	 */
1762 	if ((ep->mpa_attr.version == 2) && peer2peer &&
1763 		(ep->mpa_attr.p2p_type != p2p_type)) {
1764 
1765 		CTR2(KTR_IW_CXGBE, "%s:pmrm %p", __func__, ep);
1766 		ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1767 		rtr_mismatch = 1;
1768 	}
1769 
1770 
1771 	//ep->ofld_txq = TOEPCB(ep->com.so)->ofld_txq;
1772 	attrs.mpa_attr = ep->mpa_attr;
1773 	attrs.max_ird = ep->ird;
1774 	attrs.max_ord = ep->ord;
1775 	attrs.llp_stream_handle = ep;
1776 	attrs.next_state = C4IW_QP_STATE_RTS;
1777 
1778 	mask = C4IW_QP_ATTR_NEXT_STATE |
1779 		C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_MPA_ATTR |
1780 		C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_MAX_ORD;
1781 
1782 	/* bind QP and TID with INIT_WR */
1783 	err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1);
1784 
1785 	if (err) {
1786 
1787 		CTR2(KTR_IW_CXGBE, "%s:pmrn %p", __func__, ep);
1788 		goto err;
1789 	}
1790 
1791 	/*
1792 	 * If responder's RTR requirement did not match with what initiator
1793 	 * supports, generate TERM message
1794 	 */
1795 	if (rtr_mismatch) {
1796 
1797 		CTR2(KTR_IW_CXGBE, "%s:pmro %p", __func__, ep);
1798 		printk(KERN_ERR "%s: RTR mismatch, sending TERM\n", __func__);
1799 		attrs.layer_etype = LAYER_MPA | DDP_LLP;
1800 		attrs.ecode = MPA_NOMATCH_RTR;
1801 		attrs.next_state = C4IW_QP_STATE_TERMINATE;
1802 		err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1803 			C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
1804 		err = -ENOMEM;
1805 		disconnect = 1;
1806 		goto out;
1807 	}
1808 
1809 	/*
1810 	 * Generate TERM if initiator IRD is not sufficient for responder
1811 	 * provided ORD. Currently, we do the same behaviour even when
1812 	 * responder provided IRD is also not sufficient as regards to
1813 	 * initiator ORD.
1814 	 */
1815 	if (insuff_ird) {
1816 
1817 		CTR2(KTR_IW_CXGBE, "%s:pmrp %p", __func__, ep);
1818 		printk(KERN_ERR "%s: Insufficient IRD, sending TERM\n",
1819 				__func__);
1820 		attrs.layer_etype = LAYER_MPA | DDP_LLP;
1821 		attrs.ecode = MPA_INSUFF_IRD;
1822 		attrs.next_state = C4IW_QP_STATE_TERMINATE;
1823 		err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1824 			C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
1825 		err = -ENOMEM;
1826 		disconnect = 1;
1827 		goto out;
1828 	}
1829 	goto out;
1830 err_stop_timer:
1831 	STOP_EP_TIMER(ep);
1832 err:
1833 	disconnect = 2;
1834 out:
1835 	connect_reply_upcall(ep, err);
1836 	CTR2(KTR_IW_CXGBE, "%s:pmrE %p", __func__, ep);
1837 	return disconnect;
1838 }
1839 
1840 /*
1841  * process_mpa_request - process streaming mode MPA request
1842  *
1843  * Returns:
1844  *
1845  * 0 upon success indicating a connect request was delivered to the ULP
1846  * or the mpa request is incomplete but valid so far.
1847  *
1848  * 1 if a failure requires the caller to close the connection.
1849  *
1850  * 2 if a failure requires the caller to abort the connection.
1851  */
1852 static int
1853 process_mpa_request(struct c4iw_ep *ep)
1854 {
1855 	struct mpa_message *mpa;
1856 	u16 plen;
1857 	int flags = MSG_DONTWAIT;
1858 	int rc;
1859 	struct iovec iov;
1860 	struct uio uio;
1861 	enum c4iw_ep_state state = state_read(&ep->com);
1862 
1863 	CTR3(KTR_IW_CXGBE, "%s: ep %p, state %s", __func__, ep, states[state]);
1864 
1865 	if (state != MPA_REQ_WAIT)
1866 		return 0;
1867 
1868 	iov.iov_base = &ep->mpa_pkt[ep->mpa_pkt_len];
1869 	iov.iov_len = sizeof(ep->mpa_pkt) - ep->mpa_pkt_len;
1870 	uio.uio_iov = &iov;
1871 	uio.uio_iovcnt = 1;
1872 	uio.uio_offset = 0;
1873 	uio.uio_resid = sizeof(ep->mpa_pkt) - ep->mpa_pkt_len;
1874 	uio.uio_segflg = UIO_SYSSPACE;
1875 	uio.uio_rw = UIO_READ;
1876 	uio.uio_td = NULL; /* uio.uio_td = ep->com.thread; */
1877 
1878 	rc = soreceive(ep->com.so, NULL, &uio, NULL, NULL, &flags);
1879 	if (rc == EAGAIN)
1880 		return 0;
1881 	else if (rc)
1882 		goto err_stop_timer;
1883 
1884 	KASSERT(uio.uio_offset > 0, ("%s: sorecieve on so %p read no data",
1885 	    __func__, ep->com.so));
1886 	ep->mpa_pkt_len += uio.uio_offset;
1887 
1888 	/*
1889 	 * If we get more than the supported amount of private data then we must
1890 	 * fail this connection.  XXX: check so_rcv->sb_cc, or peek with another
1891 	 * soreceive, or increase the size of mpa_pkt by 1 and abort if the last
1892 	 * byte is filled by the soreceive above.
1893 	 */
1894 
1895 	/* Don't even have the MPA message.  Wait for more data to arrive. */
1896 	if (ep->mpa_pkt_len < sizeof(*mpa))
1897 		return 0;
1898 	mpa = (struct mpa_message *) ep->mpa_pkt;
1899 
1900 	/*
1901 	 * Validate MPA Header.
1902 	 */
1903 	if (mpa->revision > mpa_rev) {
1904 		log(LOG_ERR, "%s: MPA version mismatch. Local = %d,"
1905 		    " Received = %d\n", __func__, mpa_rev, mpa->revision);
1906 		goto err_stop_timer;
1907 	}
1908 
1909 	if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)))
1910 		goto err_stop_timer;
1911 
1912 	/*
1913 	 * Fail if there's too much private data.
1914 	 */
1915 	plen = ntohs(mpa->private_data_size);
1916 	if (plen > MPA_MAX_PRIVATE_DATA)
1917 		goto err_stop_timer;
1918 
1919 	/*
1920 	 * If plen does not account for pkt size
1921 	 */
1922 	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen))
1923 		goto err_stop_timer;
1924 
1925 	ep->plen = (u8) plen;
1926 
1927 	/*
1928 	 * If we don't have all the pdata yet, then bail.
1929 	 */
1930 	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
1931 		return 0;
1932 
1933 	/*
1934 	 * If we get here we have accumulated the entire mpa
1935 	 * start reply message including private data.
1936 	 */
1937 	ep->mpa_attr.initiator = 0;
1938 	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1939 	ep->mpa_attr.recv_marker_enabled = markers_enabled;
1940 	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1941 	ep->mpa_attr.version = mpa->revision;
1942 	if (mpa->revision == 1)
1943 		ep->tried_with_mpa_v1 = 1;
1944 	ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1945 
1946 	if (mpa->revision == 2) {
1947 		ep->mpa_attr.enhanced_rdma_conn =
1948 		    mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1949 		if (ep->mpa_attr.enhanced_rdma_conn) {
1950 			struct mpa_v2_conn_params *mpa_v2_params;
1951 			u16 ird, ord;
1952 
1953 			mpa_v2_params = (void *)&ep->mpa_pkt[sizeof(*mpa)];
1954 			ird = ntohs(mpa_v2_params->ird);
1955 			ord = ntohs(mpa_v2_params->ord);
1956 
1957 			ep->ird = ird & MPA_V2_IRD_ORD_MASK;
1958 			ep->ord = ord & MPA_V2_IRD_ORD_MASK;
1959 			if (ird & MPA_V2_PEER2PEER_MODEL && peer2peer) {
1960 				if (ord & MPA_V2_RDMA_WRITE_RTR) {
1961 					ep->mpa_attr.p2p_type =
1962 					    FW_RI_INIT_P2PTYPE_RDMA_WRITE;
1963 				} else if (ord & MPA_V2_RDMA_READ_RTR) {
1964 					ep->mpa_attr.p2p_type =
1965 					    FW_RI_INIT_P2PTYPE_READ_REQ;
1966 				}
1967 			}
1968 		}
1969 	} else if (mpa->revision == 1 && peer2peer)
1970 		ep->mpa_attr.p2p_type = p2p_type;
1971 
1972 	if (set_tcpinfo(ep))
1973 		goto err_stop_timer;
1974 
1975 	CTR5(KTR_IW_CXGBE, "%s: crc_enabled = %d, recv_marker_enabled = %d, "
1976 	    "xmit_marker_enabled = %d, version = %d", __func__,
1977 	    ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
1978 	    ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
1979 
1980 	state_set(&ep->com, MPA_REQ_RCVD);
1981 	STOP_EP_TIMER(ep);
1982 
1983 	/* drive upcall */
1984 	mutex_lock(&ep->parent_ep->com.mutex);
1985 	if (ep->parent_ep->com.state != DEAD) {
1986 		if (connect_request_upcall(ep))
1987 			goto err_unlock_parent;
1988 	} else
1989 		goto err_unlock_parent;
1990 	mutex_unlock(&ep->parent_ep->com.mutex);
1991 	return 0;
1992 
1993 err_unlock_parent:
1994 	mutex_unlock(&ep->parent_ep->com.mutex);
1995 	goto err_out;
1996 err_stop_timer:
1997 	STOP_EP_TIMER(ep);
1998 err_out:
1999 	return 2;
2000 }
2001 
2002 /*
2003  * Upcall from the adapter indicating data has been transmitted.
2004  * For us its just the single MPA request or reply.  We can now free
2005  * the skb holding the mpa message.
2006  */
2007 int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
2008 {
2009 	int err;
2010 	struct c4iw_ep *ep = to_ep(cm_id);
2011 	CTR2(KTR_IW_CXGBE, "%s:crcB %p", __func__, ep);
2012 	int abort = 0;
2013 
2014 	if ((state_read(&ep->com) == DEAD) ||
2015 			(state_read(&ep->com) != MPA_REQ_RCVD)) {
2016 
2017 		CTR2(KTR_IW_CXGBE, "%s:crc1 %p", __func__, ep);
2018 		c4iw_put_ep(&ep->com);
2019 		return -ECONNRESET;
2020 	}
2021 	set_bit(ULP_REJECT, &ep->com.history);
2022 
2023 	if (mpa_rev == 0) {
2024 
2025 		CTR2(KTR_IW_CXGBE, "%s:crc2 %p", __func__, ep);
2026 		abort = 1;
2027 	}
2028 	else {
2029 
2030 		CTR2(KTR_IW_CXGBE, "%s:crc3 %p", __func__, ep);
2031 		abort = send_mpa_reject(ep, pdata, pdata_len);
2032 	}
2033 	stop_ep_timer(ep);
2034 	err = c4iw_ep_disconnect(ep, abort != 0, GFP_KERNEL);
2035 	c4iw_put_ep(&ep->com);
2036 	CTR3(KTR_IW_CXGBE, "%s:crc4 %p, err: %d", __func__, ep, err);
2037 	return 0;
2038 }
2039 
2040 int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2041 {
2042 	int err;
2043 	struct c4iw_qp_attributes attrs;
2044 	enum c4iw_qp_attr_mask mask;
2045 	struct c4iw_ep *ep = to_ep(cm_id);
2046 	struct c4iw_dev *h = to_c4iw_dev(cm_id->device);
2047 	struct c4iw_qp *qp = get_qhp(h, conn_param->qpn);
2048 	int abort = 0;
2049 
2050 	CTR2(KTR_IW_CXGBE, "%s:cacB %p", __func__, ep);
2051 
2052 	if (state_read(&ep->com) == DEAD) {
2053 
2054 		CTR2(KTR_IW_CXGBE, "%s:cac1 %p", __func__, ep);
2055 		err = -ECONNRESET;
2056 		goto err_out;
2057 	}
2058 
2059 	BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
2060 	BUG_ON(!qp);
2061 
2062 	set_bit(ULP_ACCEPT, &ep->com.history);
2063 
2064 	if ((conn_param->ord > c4iw_max_read_depth) ||
2065 		(conn_param->ird > c4iw_max_read_depth)) {
2066 
2067 		CTR2(KTR_IW_CXGBE, "%s:cac2 %p", __func__, ep);
2068 		err = -EINVAL;
2069 		goto err_abort;
2070 	}
2071 
2072 	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
2073 
2074 		CTR2(KTR_IW_CXGBE, "%s:cac3 %p", __func__, ep);
2075 
2076 		if (conn_param->ord > ep->ird) {
2077 
2078 			CTR2(KTR_IW_CXGBE, "%s:cac4 %p", __func__, ep);
2079 			ep->ird = conn_param->ird;
2080 			ep->ord = conn_param->ord;
2081 			send_mpa_reject(ep, conn_param->private_data,
2082 					conn_param->private_data_len);
2083 			err = -ENOMEM;
2084 			goto err_abort;
2085 		}
2086 
2087 		if (conn_param->ird > ep->ord) {
2088 
2089 			CTR2(KTR_IW_CXGBE, "%s:cac5 %p", __func__, ep);
2090 
2091 			if (!ep->ord) {
2092 
2093 				CTR2(KTR_IW_CXGBE, "%s:cac6 %p", __func__, ep);
2094 				conn_param->ird = 1;
2095 			}
2096 			else {
2097 				CTR2(KTR_IW_CXGBE, "%s:cac7 %p", __func__, ep);
2098 				err = -ENOMEM;
2099 				goto err_abort;
2100 			}
2101 		}
2102 
2103 	}
2104 	ep->ird = conn_param->ird;
2105 	ep->ord = conn_param->ord;
2106 
2107 	if (ep->mpa_attr.version != 2) {
2108 
2109 		CTR2(KTR_IW_CXGBE, "%s:cac8 %p", __func__, ep);
2110 
2111 		if (peer2peer && ep->ird == 0) {
2112 
2113 			CTR2(KTR_IW_CXGBE, "%s:cac9 %p", __func__, ep);
2114 			ep->ird = 1;
2115 		}
2116 	}
2117 
2118 
2119 	ep->com.cm_id = cm_id;
2120 	ref_cm_id(&ep->com);
2121 	ep->com.qp = qp;
2122 	ref_qp(ep);
2123 	//ep->ofld_txq = TOEPCB(ep->com.so)->ofld_txq;
2124 
2125 	/* bind QP to EP and move to RTS */
2126 	attrs.mpa_attr = ep->mpa_attr;
2127 	attrs.max_ird = ep->ird;
2128 	attrs.max_ord = ep->ord;
2129 	attrs.llp_stream_handle = ep;
2130 	attrs.next_state = C4IW_QP_STATE_RTS;
2131 
2132 	/* bind QP and TID with INIT_WR */
2133 	mask = C4IW_QP_ATTR_NEXT_STATE |
2134 		C4IW_QP_ATTR_LLP_STREAM_HANDLE |
2135 		C4IW_QP_ATTR_MPA_ATTR |
2136 		C4IW_QP_ATTR_MAX_IRD |
2137 		C4IW_QP_ATTR_MAX_ORD;
2138 
2139 	err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1);
2140 
2141 	if (err) {
2142 
2143 		CTR2(KTR_IW_CXGBE, "%s:caca %p", __func__, ep);
2144 		goto err_defef_cm_id;
2145 	}
2146 	err = send_mpa_reply(ep, conn_param->private_data,
2147 			conn_param->private_data_len);
2148 
2149 	if (err) {
2150 
2151 		CTR2(KTR_IW_CXGBE, "%s:caca %p", __func__, ep);
2152 		goto err_defef_cm_id;
2153 	}
2154 
2155 	state_set(&ep->com, FPDU_MODE);
2156 	established_upcall(ep);
2157 	c4iw_put_ep(&ep->com);
2158 	CTR2(KTR_IW_CXGBE, "%s:cacE %p", __func__, ep);
2159 	return 0;
2160 err_defef_cm_id:
2161 	deref_cm_id(&ep->com);
2162 err_abort:
2163 	abort = 1;
2164 err_out:
2165 	if (abort)
2166 		c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
2167 	c4iw_put_ep(&ep->com);
2168 	CTR2(KTR_IW_CXGBE, "%s:cacE err %p", __func__, ep);
2169 	return err;
2170 }
2171 
2172 
2173 
2174 int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2175 {
2176 	int err = 0;
2177 	struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
2178 	struct c4iw_ep *ep = NULL;
2179 	struct nhop4_extended nh4;
2180 
2181 	CTR2(KTR_IW_CXGBE, "%s:ccB %p", __func__, cm_id);
2182 
2183 	if ((conn_param->ord > c4iw_max_read_depth) ||
2184 		(conn_param->ird > c4iw_max_read_depth)) {
2185 
2186 		CTR2(KTR_IW_CXGBE, "%s:cc1 %p", __func__, cm_id);
2187 		err = -EINVAL;
2188 		goto out;
2189 	}
2190 	ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
2191 	init_timer(&ep->timer);
2192 	ep->plen = conn_param->private_data_len;
2193 
2194 	if (ep->plen) {
2195 
2196 		CTR2(KTR_IW_CXGBE, "%s:cc3 %p", __func__, ep);
2197 		memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
2198 				conn_param->private_data, ep->plen);
2199 	}
2200 	ep->ird = conn_param->ird;
2201 	ep->ord = conn_param->ord;
2202 
2203 	if (peer2peer && ep->ord == 0) {
2204 
2205 		CTR2(KTR_IW_CXGBE, "%s:cc4 %p", __func__, ep);
2206 		ep->ord = 1;
2207 	}
2208 
2209 	ep->com.dev = dev;
2210 	ep->com.cm_id = cm_id;
2211 	ref_cm_id(&ep->com);
2212 	ep->com.qp = get_qhp(dev, conn_param->qpn);
2213 
2214 	if (!ep->com.qp) {
2215 
2216 		CTR2(KTR_IW_CXGBE, "%s:cc5 %p", __func__, ep);
2217 		err = -EINVAL;
2218 		goto fail2;
2219 	}
2220 	ref_qp(ep);
2221 	ep->com.thread = curthread;
2222 	ep->com.so = cm_id->so;
2223 
2224 	/* find a route */
2225 	err = find_route(
2226 		cm_id->local_addr.sin_addr.s_addr,
2227 		cm_id->remote_addr.sin_addr.s_addr,
2228 		cm_id->local_addr.sin_port,
2229 		cm_id->remote_addr.sin_port, 0, &nh4);
2230 
2231 	if (err) {
2232 
2233 		CTR2(KTR_IW_CXGBE, "%s:cc7 %p", __func__, ep);
2234 		printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
2235 		err = -EHOSTUNREACH;
2236 		goto fail2;
2237 	}
2238 
2239 	if (!(nh4.nh_ifp->if_capenable & IFCAP_TOE) ||
2240 	    TOEDEV(nh4.nh_ifp) == NULL) {
2241 		err = -ENOPROTOOPT;
2242 		goto fail3;
2243 	}
2244 	fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4);
2245 
2246 	setiwsockopt(cm_id->so);
2247 	state_set(&ep->com, CONNECTING);
2248 	ep->tos = 0;
2249 	ep->com.local_addr = cm_id->local_addr;
2250 	ep->com.remote_addr = cm_id->remote_addr;
2251 	err = -soconnect(ep->com.so, (struct sockaddr *)&ep->com.remote_addr,
2252 		ep->com.thread);
2253 
2254 	if (!err) {
2255 		init_iwarp_socket(cm_id->so, &ep->com);
2256 		goto out;
2257 	} else {
2258 		goto fail2;
2259 	}
2260 
2261 fail3:
2262 	fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4);
2263 fail2:
2264 	deref_cm_id(&ep->com);
2265 	c4iw_put_ep(&ep->com);
2266 	ep = NULL;	/* CTR shouldn't display already-freed ep. */
2267 out:
2268 	CTR2(KTR_IW_CXGBE, "%s:ccE %p", __func__, ep);
2269 	return err;
2270 }
2271 
2272 /*
2273  * iwcm->create_listen_ep.  Returns -errno on failure.
2274  */
2275 int
2276 c4iw_create_listen_ep(struct iw_cm_id *cm_id, int backlog)
2277 {
2278 	struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
2279 	struct c4iw_listen_ep *ep;
2280 	struct socket *so = cm_id->so;
2281 
2282 	ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
2283 	ep->com.cm_id = cm_id;
2284 	ref_cm_id(&ep->com);
2285 	ep->com.dev = dev;
2286 	ep->backlog = backlog;
2287 	ep->com.local_addr = cm_id->local_addr;
2288 	ep->com.thread = curthread;
2289 	state_set(&ep->com, LISTEN);
2290 	ep->com.so = so;
2291 
2292 	cm_id->provider_data = ep;
2293 	return (0);
2294 }
2295 
2296 void
2297 c4iw_destroy_listen_ep(struct iw_cm_id *cm_id)
2298 {
2299 	struct c4iw_listen_ep *ep = to_listen_ep(cm_id);
2300 
2301 	CTR4(KTR_IW_CXGBE, "%s: cm_id %p, so %p, state %s", __func__, cm_id,
2302 	    cm_id->so, states[ep->com.state]);
2303 
2304 	state_set(&ep->com, DEAD);
2305 	deref_cm_id(&ep->com);
2306 	c4iw_put_ep(&ep->com);
2307 
2308 	return;
2309 }
2310 
2311 int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
2312 {
2313 	int ret = 0;
2314 	int close = 0;
2315 	int fatal = 0;
2316 	struct c4iw_rdev *rdev;
2317 
2318 	mutex_lock(&ep->com.mutex);
2319 
2320 	CTR2(KTR_IW_CXGBE, "%s:cedB %p", __func__, ep);
2321 
2322 	rdev = &ep->com.dev->rdev;
2323 
2324 	if (c4iw_fatal_error(rdev)) {
2325 
2326 		CTR2(KTR_IW_CXGBE, "%s:ced1 %p", __func__, ep);
2327 		fatal = 1;
2328 		close_complete_upcall(ep, -ECONNRESET);
2329 		ep->com.state = DEAD;
2330 	}
2331 	CTR3(KTR_IW_CXGBE, "%s:ced2 %p %s", __func__, ep,
2332 	    states[ep->com.state]);
2333 
2334 	switch (ep->com.state) {
2335 
2336 		case MPA_REQ_WAIT:
2337 		case MPA_REQ_SENT:
2338 		case MPA_REQ_RCVD:
2339 		case MPA_REP_SENT:
2340 		case FPDU_MODE:
2341 			close = 1;
2342 			if (abrupt)
2343 				ep->com.state = ABORTING;
2344 			else {
2345 				ep->com.state = CLOSING;
2346 				START_EP_TIMER(ep);
2347 			}
2348 			set_bit(CLOSE_SENT, &ep->com.flags);
2349 			break;
2350 
2351 		case CLOSING:
2352 
2353 			if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
2354 
2355 				close = 1;
2356 				if (abrupt) {
2357 					STOP_EP_TIMER(ep);
2358 					ep->com.state = ABORTING;
2359 				} else
2360 					ep->com.state = MORIBUND;
2361 			}
2362 			break;
2363 
2364 		case MORIBUND:
2365 		case ABORTING:
2366 		case DEAD:
2367 			CTR3(KTR_IW_CXGBE,
2368 			    "%s ignoring disconnect ep %p state %u", __func__,
2369 			    ep, ep->com.state);
2370 			break;
2371 
2372 		default:
2373 			BUG();
2374 			break;
2375 	}
2376 
2377 	mutex_unlock(&ep->com.mutex);
2378 
2379 	if (close) {
2380 
2381 		CTR2(KTR_IW_CXGBE, "%s:ced3 %p", __func__, ep);
2382 
2383 		if (abrupt) {
2384 
2385 			CTR2(KTR_IW_CXGBE, "%s:ced4 %p", __func__, ep);
2386 			set_bit(EP_DISC_ABORT, &ep->com.history);
2387 			close_complete_upcall(ep, -ECONNRESET);
2388 			ret = send_abort(ep);
2389 			if (ret)
2390 				fatal = 1;
2391 		} else {
2392 
2393 			CTR2(KTR_IW_CXGBE, "%s:ced5 %p", __func__, ep);
2394 			set_bit(EP_DISC_CLOSE, &ep->com.history);
2395 
2396 			if (!ep->parent_ep)
2397 				__state_set(&ep->com, MORIBUND);
2398 			sodisconnect(ep->com.so);
2399 		}
2400 
2401 	}
2402 
2403 	if (fatal) {
2404 		set_bit(EP_DISC_FAIL, &ep->com.history);
2405 		if (!abrupt) {
2406 			STOP_EP_TIMER(ep);
2407 			close_complete_upcall(ep, -EIO);
2408 		}
2409 		if (ep->com.qp) {
2410 			struct c4iw_qp_attributes attrs;
2411 
2412 			attrs.next_state = C4IW_QP_STATE_ERROR;
2413 			ret = c4iw_modify_qp(ep->com.dev, ep->com.qp,
2414 						C4IW_QP_ATTR_NEXT_STATE,
2415 						&attrs, 1);
2416 			if (ret) {
2417 				CTR2(KTR_IW_CXGBE, "%s:ced7 %p", __func__, ep);
2418 				printf("%s - qp <- error failed!\n", __func__);
2419 			}
2420 		}
2421 		release_ep_resources(ep);
2422 		ep->com.state = DEAD;
2423 		CTR2(KTR_IW_CXGBE, "%s:ced6 %p", __func__, ep);
2424 	}
2425 	CTR2(KTR_IW_CXGBE, "%s:cedE %p", __func__, ep);
2426 	return ret;
2427 }
2428 
2429 #ifdef C4IW_EP_REDIRECT
2430 int c4iw_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
2431 		struct l2t_entry *l2t)
2432 {
2433 	struct c4iw_ep *ep = ctx;
2434 
2435 	if (ep->dst != old)
2436 		return 0;
2437 
2438 	PDBG("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new,
2439 			l2t);
2440 	dst_hold(new);
2441 	cxgb4_l2t_release(ep->l2t);
2442 	ep->l2t = l2t;
2443 	dst_release(old);
2444 	ep->dst = new;
2445 	return 1;
2446 }
2447 #endif
2448 
2449 
2450 
2451 static void ep_timeout(unsigned long arg)
2452 {
2453 	struct c4iw_ep *ep = (struct c4iw_ep *)arg;
2454 
2455 	if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
2456 
2457 		/*
2458 		 * Only insert if it is not already on the list.
2459 		 */
2460 		if (!(ep->com.ep_events & C4IW_EVENT_TIMEOUT)) {
2461 			CTR2(KTR_IW_CXGBE, "%s:et1 %p", __func__, ep);
2462 			add_ep_to_req_list(ep, C4IW_EVENT_TIMEOUT);
2463 		}
2464 	}
2465 }
2466 
2467 static int fw6_wr_rpl(struct adapter *sc, const __be64 *rpl)
2468 {
2469 	uint64_t val = be64toh(*rpl);
2470 	int ret;
2471 	struct c4iw_wr_wait *wr_waitp;
2472 
2473 	ret = (int)((val >> 8) & 0xff);
2474 	wr_waitp = (struct c4iw_wr_wait *)rpl[1];
2475 	CTR3(KTR_IW_CXGBE, "%s wr_waitp %p ret %u", __func__, wr_waitp, ret);
2476 	if (wr_waitp)
2477 		c4iw_wake_up(wr_waitp, ret ? -ret : 0);
2478 
2479 	return (0);
2480 }
2481 
2482 static int fw6_cqe_handler(struct adapter *sc, const __be64 *rpl)
2483 {
2484 	struct cqe_list_entry *cle;
2485 	unsigned long flag;
2486 
2487 	cle = malloc(sizeof(*cle), M_CXGBE, M_NOWAIT);
2488 	cle->rhp = sc->iwarp_softc;
2489 	cle->err_cqe = *(const struct t4_cqe *)(&rpl[0]);
2490 
2491 	spin_lock_irqsave(&err_cqe_lock, flag);
2492 	list_add_tail(&cle->entry, &err_cqe_list);
2493 	queue_work(c4iw_taskq, &c4iw_task);
2494 	spin_unlock_irqrestore(&err_cqe_lock, flag);
2495 
2496 	return (0);
2497 }
2498 
2499 static int
2500 process_terminate(struct c4iw_ep *ep)
2501 {
2502 	struct c4iw_qp_attributes attrs;
2503 
2504 	CTR2(KTR_IW_CXGBE, "%s:tB %p %d", __func__, ep);
2505 
2506 	if (ep && ep->com.qp) {
2507 
2508 		printk(KERN_WARNING MOD "TERM received tid %u qpid %u\n",
2509 				ep->hwtid, ep->com.qp->wq.sq.qid);
2510 		attrs.next_state = C4IW_QP_STATE_TERMINATE;
2511 		c4iw_modify_qp(ep->com.dev, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs,
2512 				1);
2513 	} else
2514 		printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n",
2515 								ep->hwtid);
2516 	CTR2(KTR_IW_CXGBE, "%s:tE %p %d", __func__, ep);
2517 
2518 	return 0;
2519 }
2520 
2521 int __init c4iw_cm_init(void)
2522 {
2523 
2524 	t4_register_cpl_handler(CPL_RDMA_TERMINATE, terminate);
2525 	t4_register_fw_msg_handler(FW6_TYPE_WR_RPL, fw6_wr_rpl);
2526 	t4_register_fw_msg_handler(FW6_TYPE_CQE, fw6_cqe_handler);
2527 	t4_register_an_handler(c4iw_ev_handler);
2528 
2529 	TAILQ_INIT(&req_list);
2530 	spin_lock_init(&req_lock);
2531 	INIT_LIST_HEAD(&err_cqe_list);
2532 	spin_lock_init(&err_cqe_lock);
2533 
2534 	INIT_WORK(&c4iw_task, process_req);
2535 
2536 	c4iw_taskq = create_singlethread_workqueue("iw_cxgbe");
2537 	if (!c4iw_taskq)
2538 		return -ENOMEM;
2539 
2540 	return 0;
2541 }
2542 
2543 void __exit c4iw_cm_term(void)
2544 {
2545 	WARN_ON(!TAILQ_EMPTY(&req_list));
2546 	WARN_ON(!list_empty(&err_cqe_list));
2547 	flush_workqueue(c4iw_taskq);
2548 	destroy_workqueue(c4iw_taskq);
2549 
2550 	t4_register_cpl_handler(CPL_RDMA_TERMINATE, NULL);
2551 	t4_register_fw_msg_handler(FW6_TYPE_WR_RPL, NULL);
2552 	t4_register_fw_msg_handler(FW6_TYPE_CQE, NULL);
2553 	t4_register_an_handler(NULL);
2554 }
2555 #endif
2556