1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3  *
4  * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
5  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
6  * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
7  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
8  * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
9  * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
10  *
11  * This software is available to you under a choice of one of two
12  * licenses.  You may choose to be licensed under the terms of the GNU
13  * General Public License (GPL) Version 2, available from the file
14  * COPYING in the main directory of this source tree, or the
15  * OpenIB.org BSD license below:
16  *
17  *     Redistribution and use in source and binary forms, with or
18  *     without modification, are permitted provided that the following
19  *     conditions are met:
20  *
21  *      - Redistributions of source code must retain the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer.
24  *
25  *      - Redistributions in binary form must reproduce the above
26  *        copyright notice, this list of conditions and the following
27  *        disclaimer in the documentation and/or other materials
28  *        provided with the distribution.
29  *
30  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
31  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
33  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
34  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
35  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
36  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
37  * SOFTWARE.
38  */
39 
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD$");
42 
43 #include <linux/dma-mapping.h>
44 #include <linux/err.h>
45 #include <linux/idr.h>
46 #include <linux/interrupt.h>
47 #include <linux/rbtree.h>
48 #include <linux/sched.h>
49 #include <linux/spinlock.h>
50 #include <linux/workqueue.h>
51 #include <linux/completion.h>
52 #include <linux/slab.h>
53 #include <linux/module.h>
54 #include <linux/wait.h>
55 
56 #include <rdma/iw_cm.h>
57 #include <rdma/ib_addr.h>
58 #include <rdma/iw_portmap.h>
59 
60 #include "iwcm.h"
61 
62 MODULE_AUTHOR("Tom Tucker");
63 MODULE_DESCRIPTION("iWARP CM");
64 MODULE_LICENSE("Dual BSD/GPL");
65 
66 static struct workqueue_struct *iwcm_wq;
67 struct iwcm_work {
68 	struct work_struct work;
69 	struct iwcm_id_private *cm_id;
70 	struct list_head list;
71 	struct iw_cm_event event;
72 	struct list_head free_list;
73 };
74 
75 static unsigned int default_backlog = 256;
76 
77 /*
78  * The following services provide a mechanism for pre-allocating iwcm_work
79  * elements.  The design pre-allocates them  based on the cm_id type:
80  *	LISTENING IDS: 	Get enough elements preallocated to handle the
81  *			listen backlog.
82  *	ACTIVE IDS:	4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE
83  *	PASSIVE IDS:	3: ESTABLISHED, DISCONNECT, CLOSE
84  *
85  * Allocating them in connect and listen avoids having to deal
86  * with allocation failures on the event upcall from the provider (which
87  * is called in the interrupt context).
88  *
89  * One exception is when creating the cm_id for incoming connection requests.
90  * There are two cases:
91  * 1) in the event upcall, cm_event_handler(), for a listening cm_id.  If
92  *    the backlog is exceeded, then no more connection request events will
93  *    be processed.  cm_event_handler() returns -ENOMEM in this case.  Its up
94  *    to the provider to reject the connection request.
95  * 2) in the connection request workqueue handler, cm_conn_req_handler().
96  *    If work elements cannot be allocated for the new connect request cm_id,
97  *    then IWCM will call the provider reject method.  This is ok since
98  *    cm_conn_req_handler() runs in the workqueue thread context.
99  */
100 
101 static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv)
102 {
103 	struct iwcm_work *work;
104 
105 	if (list_empty(&cm_id_priv->work_free_list))
106 		return NULL;
107 	work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work,
108 			  free_list);
109 	list_del_init(&work->free_list);
110 	return work;
111 }
112 
113 static void put_work(struct iwcm_work *work)
114 {
115 	list_add(&work->free_list, &work->cm_id->work_free_list);
116 }
117 
118 static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv)
119 {
120 	struct list_head *e, *tmp;
121 
122 	list_for_each_safe(e, tmp, &cm_id_priv->work_free_list)
123 		kfree(list_entry(e, struct iwcm_work, free_list));
124 }
125 
126 static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count)
127 {
128 	struct iwcm_work *work;
129 
130 	BUG_ON(!list_empty(&cm_id_priv->work_free_list));
131 	while (count--) {
132 		work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL);
133 		if (!work) {
134 			dealloc_work_entries(cm_id_priv);
135 			return -ENOMEM;
136 		}
137 		work->cm_id = cm_id_priv;
138 		INIT_LIST_HEAD(&work->list);
139 		put_work(work);
140 	}
141 	return 0;
142 }
143 
144 /*
145  * Save private data from incoming connection requests to
146  * iw_cm_event, so the low level driver doesn't have to. Adjust
147  * the event ptr to point to the local copy.
148  */
149 static int copy_private_data(struct iw_cm_event *event)
150 {
151 	void *p;
152 
153 	p = kmemdup(event->private_data, event->private_data_len, GFP_ATOMIC);
154 	if (!p)
155 		return -ENOMEM;
156 	event->private_data = p;
157 	return 0;
158 }
159 
160 static void free_cm_id(struct iwcm_id_private *cm_id_priv)
161 {
162 	dealloc_work_entries(cm_id_priv);
163 	kfree(cm_id_priv);
164 }
165 
166 /*
167  * Release a reference on cm_id. If the last reference is being
168  * released, free the cm_id and return 1.
169  */
170 static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
171 {
172 	BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
173 	if (atomic_dec_and_test(&cm_id_priv->refcount)) {
174 		BUG_ON(!list_empty(&cm_id_priv->work_list));
175 		free_cm_id(cm_id_priv);
176 		return 1;
177 	}
178 
179 	return 0;
180 }
181 
182 static void add_ref(struct iw_cm_id *cm_id)
183 {
184 	struct iwcm_id_private *cm_id_priv;
185 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
186 	atomic_inc(&cm_id_priv->refcount);
187 }
188 
189 static void rem_ref(struct iw_cm_id *cm_id)
190 {
191 	struct iwcm_id_private *cm_id_priv;
192 
193 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
194 
195 	(void)iwcm_deref_id(cm_id_priv);
196 }
197 
198 static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event);
199 
200 struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
201 				 iw_cm_handler cm_handler,
202 				 void *context)
203 {
204 	struct iwcm_id_private *cm_id_priv;
205 
206 	cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL);
207 	if (!cm_id_priv)
208 		return ERR_PTR(-ENOMEM);
209 
210 	cm_id_priv->state = IW_CM_STATE_IDLE;
211 	cm_id_priv->id.device = device;
212 	cm_id_priv->id.cm_handler = cm_handler;
213 	cm_id_priv->id.context = context;
214 	cm_id_priv->id.event_handler = cm_event_handler;
215 	cm_id_priv->id.add_ref = add_ref;
216 	cm_id_priv->id.rem_ref = rem_ref;
217 	spin_lock_init(&cm_id_priv->lock);
218 	atomic_set(&cm_id_priv->refcount, 1);
219 	init_waitqueue_head(&cm_id_priv->connect_wait);
220 	init_completion(&cm_id_priv->destroy_comp);
221 	INIT_LIST_HEAD(&cm_id_priv->work_list);
222 	INIT_LIST_HEAD(&cm_id_priv->work_free_list);
223 
224 	return &cm_id_priv->id;
225 }
226 EXPORT_SYMBOL(iw_create_cm_id);
227 
228 
229 static int iwcm_modify_qp_err(struct ib_qp *qp)
230 {
231 	struct ib_qp_attr qp_attr;
232 
233 	if (!qp)
234 		return -EINVAL;
235 
236 	qp_attr.qp_state = IB_QPS_ERR;
237 	return ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
238 }
239 
240 /*
241  * This is really the RDMAC CLOSING state. It is most similar to the
242  * IB SQD QP state.
243  */
244 static int iwcm_modify_qp_sqd(struct ib_qp *qp)
245 {
246 	struct ib_qp_attr qp_attr;
247 
248 	BUG_ON(qp == NULL);
249 	qp_attr.qp_state = IB_QPS_SQD;
250 	return ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
251 }
252 
253 /*
254  * CM_ID <-- CLOSING
255  *
256  * Block if a passive or active connection is currently being processed. Then
257  * process the event as follows:
258  * - If we are ESTABLISHED, move to CLOSING and modify the QP state
259  *   based on the abrupt flag
260  * - If the connection is already in the CLOSING or IDLE state, the peer is
261  *   disconnecting concurrently with us and we've already seen the
262  *   DISCONNECT event -- ignore the request and return 0
263  * - Disconnect on a listening endpoint returns -EINVAL
264  */
265 int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt)
266 {
267 	struct iwcm_id_private *cm_id_priv;
268 	unsigned long flags;
269 	int ret = 0;
270 	struct ib_qp *qp = NULL;
271 
272 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
273 	/* Wait if we're currently in a connect or accept downcall */
274 	wait_event(cm_id_priv->connect_wait,
275 		   !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
276 
277 	spin_lock_irqsave(&cm_id_priv->lock, flags);
278 	switch (cm_id_priv->state) {
279 	case IW_CM_STATE_ESTABLISHED:
280 		cm_id_priv->state = IW_CM_STATE_CLOSING;
281 
282 		/* QP could be <nul> for user-mode client */
283 		if (cm_id_priv->qp)
284 			qp = cm_id_priv->qp;
285 		else
286 			ret = -EINVAL;
287 		break;
288 	case IW_CM_STATE_LISTEN:
289 		ret = -EINVAL;
290 		break;
291 	case IW_CM_STATE_CLOSING:
292 		/* remote peer closed first */
293 	case IW_CM_STATE_IDLE:
294 		/* accept or connect returned !0 */
295 		break;
296 	case IW_CM_STATE_CONN_RECV:
297 		/*
298 		 * App called disconnect before/without calling accept after
299 		 * connect_request event delivered.
300 		 */
301 		break;
302 	case IW_CM_STATE_CONN_SENT:
303 		/* Can only get here if wait above fails */
304 	default:
305 		BUG();
306 	}
307 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
308 
309 	if (qp) {
310 		if (abrupt)
311 			(void) iwcm_modify_qp_err(qp);
312 		else
313 			(void) iwcm_modify_qp_sqd(qp);
314 
315 		/*
316 		 * If both sides are disconnecting the QP could
317 		 * already be in ERR or SQD states
318 		 */
319 		ret = 0;
320 	}
321 
322 	return ret;
323 }
324 EXPORT_SYMBOL(iw_cm_disconnect);
325 
326 /*
327  * CM_ID <-- DESTROYING
328  *
329  * Clean up all resources associated with the connection and release
330  * the initial reference taken by iw_create_cm_id.
331  */
332 static void destroy_cm_id(struct iw_cm_id *cm_id)
333 {
334 	struct iwcm_id_private *cm_id_priv;
335 	unsigned long flags;
336 
337 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
338 	/*
339 	 * Wait if we're currently in a connect or accept downcall. A
340 	 * listening endpoint should never block here.
341 	 */
342 	wait_event(cm_id_priv->connect_wait,
343 		   !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
344 
345 	/*
346 	 * Since we're deleting the cm_id, drop any events that
347 	 * might arrive before the last dereference.
348 	 */
349 	set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags);
350 
351 	spin_lock_irqsave(&cm_id_priv->lock, flags);
352 	switch (cm_id_priv->state) {
353 	case IW_CM_STATE_LISTEN:
354 		cm_id_priv->state = IW_CM_STATE_DESTROYING;
355 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
356 		/* destroy the listening endpoint */
357 		cm_id->device->iwcm->destroy_listen(cm_id);
358 		spin_lock_irqsave(&cm_id_priv->lock, flags);
359 		break;
360 	case IW_CM_STATE_ESTABLISHED:
361 		cm_id_priv->state = IW_CM_STATE_DESTROYING;
362 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
363 		/* Abrupt close of the connection */
364 		(void)iwcm_modify_qp_err(cm_id_priv->qp);
365 		spin_lock_irqsave(&cm_id_priv->lock, flags);
366 		break;
367 	case IW_CM_STATE_IDLE:
368 	case IW_CM_STATE_CLOSING:
369 		cm_id_priv->state = IW_CM_STATE_DESTROYING;
370 		break;
371 	case IW_CM_STATE_CONN_RECV:
372 		/*
373 		 * App called destroy before/without calling accept after
374 		 * receiving connection request event notification or
375 		 * returned non zero from the event callback function.
376 		 * In either case, must tell the provider to reject.
377 		 */
378 		cm_id_priv->state = IW_CM_STATE_DESTROYING;
379 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
380 		cm_id->device->iwcm->reject(cm_id, NULL, 0);
381 		spin_lock_irqsave(&cm_id_priv->lock, flags);
382 		break;
383 	case IW_CM_STATE_CONN_SENT:
384 	case IW_CM_STATE_DESTROYING:
385 	default:
386 		BUG();
387 		break;
388 	}
389 	if (cm_id_priv->qp) {
390 		cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
391 		cm_id_priv->qp = NULL;
392 	}
393 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
394 
395 	(void)iwcm_deref_id(cm_id_priv);
396 }
397 
398 /*
399  * This function is only called by the application thread and cannot
400  * be called by the event thread. The function will wait for all
401  * references to be released on the cm_id and then kfree the cm_id
402  * object.
403  */
404 void iw_destroy_cm_id(struct iw_cm_id *cm_id)
405 {
406 	struct iwcm_id_private *cm_id_priv;
407 
408 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
409 	destroy_cm_id(cm_id);
410 }
411 EXPORT_SYMBOL(iw_destroy_cm_id);
412 
413 /**
414  * iw_cm_map - Use portmapper to map the ports
415  * @cm_id: connection manager pointer
416  * @active: Indicates the active side when true
417  * returns nonzero for error only if iwpm_create_mapinfo() fails
418  *
419  * Tries to add a mapping for a port using the Portmapper. If
420  * successful in mapping the IP/Port it will check the remote
421  * mapped IP address for a wildcard IP address and replace the
422  * zero IP address with the remote_addr.
423  */
424 static int iw_cm_map(struct iw_cm_id *cm_id, bool active)
425 {
426 	cm_id->m_local_addr = cm_id->local_addr;
427 	cm_id->m_remote_addr = cm_id->remote_addr;
428 
429 	return 0;
430 }
431 
432 /*
433  * CM_ID <-- LISTEN
434  *
435  * Start listening for connect requests. Generates one CONNECT_REQUEST
436  * event for each inbound connect request.
437  */
438 int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
439 {
440 	struct iwcm_id_private *cm_id_priv;
441 	unsigned long flags;
442 	int ret;
443 
444 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
445 
446 	if (!backlog)
447 		backlog = default_backlog;
448 
449 	ret = alloc_work_entries(cm_id_priv, backlog);
450 	if (ret)
451 		return ret;
452 
453 	spin_lock_irqsave(&cm_id_priv->lock, flags);
454 	switch (cm_id_priv->state) {
455 	case IW_CM_STATE_IDLE:
456 		cm_id_priv->state = IW_CM_STATE_LISTEN;
457 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
458 		ret = iw_cm_map(cm_id, false);
459 		if (!ret)
460 			ret = cm_id->device->iwcm->create_listen(cm_id, backlog);
461 		if (ret)
462 			cm_id_priv->state = IW_CM_STATE_IDLE;
463 		spin_lock_irqsave(&cm_id_priv->lock, flags);
464 		break;
465 	default:
466 		ret = -EINVAL;
467 	}
468 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
469 
470 	return ret;
471 }
472 EXPORT_SYMBOL(iw_cm_listen);
473 
474 /*
475  * CM_ID <-- IDLE
476  *
477  * Rejects an inbound connection request. No events are generated.
478  */
479 int iw_cm_reject(struct iw_cm_id *cm_id,
480 		 const void *private_data,
481 		 u8 private_data_len)
482 {
483 	struct iwcm_id_private *cm_id_priv;
484 	unsigned long flags;
485 	int ret;
486 
487 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
488 	set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
489 
490 	spin_lock_irqsave(&cm_id_priv->lock, flags);
491 	if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
492 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
493 		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
494 		wake_up_all(&cm_id_priv->connect_wait);
495 		return -EINVAL;
496 	}
497 	cm_id_priv->state = IW_CM_STATE_IDLE;
498 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
499 
500 	ret = cm_id->device->iwcm->reject(cm_id, private_data,
501 					  private_data_len);
502 
503 	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
504 	wake_up_all(&cm_id_priv->connect_wait);
505 
506 	return ret;
507 }
508 EXPORT_SYMBOL(iw_cm_reject);
509 
510 /*
511  * CM_ID <-- ESTABLISHED
512  *
513  * Accepts an inbound connection request and generates an ESTABLISHED
514  * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block
515  * until the ESTABLISHED event is received from the provider.
516  */
517 int iw_cm_accept(struct iw_cm_id *cm_id,
518 		 struct iw_cm_conn_param *iw_param)
519 {
520 	struct iwcm_id_private *cm_id_priv;
521 	struct ib_qp *qp;
522 	unsigned long flags;
523 	int ret;
524 
525 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
526 	set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
527 
528 	spin_lock_irqsave(&cm_id_priv->lock, flags);
529 	if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
530 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
531 		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
532 		wake_up_all(&cm_id_priv->connect_wait);
533 		return -EINVAL;
534 	}
535 	/* Get the ib_qp given the QPN */
536 	qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
537 	if (!qp) {
538 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
539 		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
540 		wake_up_all(&cm_id_priv->connect_wait);
541 		return -EINVAL;
542 	}
543 	cm_id->device->iwcm->add_ref(qp);
544 	cm_id_priv->qp = qp;
545 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
546 
547 	ret = cm_id->device->iwcm->accept(cm_id, iw_param);
548 	if (ret) {
549 		/* An error on accept precludes provider events */
550 		BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
551 		cm_id_priv->state = IW_CM_STATE_IDLE;
552 		spin_lock_irqsave(&cm_id_priv->lock, flags);
553 		if (cm_id_priv->qp) {
554 			cm_id->device->iwcm->rem_ref(qp);
555 			cm_id_priv->qp = NULL;
556 		}
557 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
558 		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
559 		wake_up_all(&cm_id_priv->connect_wait);
560 	}
561 
562 	return ret;
563 }
564 EXPORT_SYMBOL(iw_cm_accept);
565 
566 /*
567  * Active Side: CM_ID <-- CONN_SENT
568  *
569  * If successful, results in the generation of a CONNECT_REPLY
570  * event. iw_cm_disconnect and iw_cm_destroy will block until the
571  * CONNECT_REPLY event is received from the provider.
572  */
573 int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
574 {
575 	struct iwcm_id_private *cm_id_priv;
576 	int ret;
577 	unsigned long flags;
578 	struct ib_qp *qp;
579 
580 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
581 
582 	ret = alloc_work_entries(cm_id_priv, 4);
583 	if (ret)
584 		return ret;
585 
586 	set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
587 	spin_lock_irqsave(&cm_id_priv->lock, flags);
588 
589 	if (cm_id_priv->state != IW_CM_STATE_IDLE) {
590 		ret = -EINVAL;
591 		goto err;
592 	}
593 
594 	/* Get the ib_qp given the QPN */
595 	qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
596 	if (!qp) {
597 		ret = -EINVAL;
598 		goto err;
599 	}
600 	cm_id->device->iwcm->add_ref(qp);
601 	cm_id_priv->qp = qp;
602 	cm_id_priv->state = IW_CM_STATE_CONN_SENT;
603 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
604 
605 	ret = iw_cm_map(cm_id, true);
606 	if (!ret)
607 		ret = cm_id->device->iwcm->connect(cm_id, iw_param);
608 	if (!ret)
609 		return 0;	/* success */
610 
611 	spin_lock_irqsave(&cm_id_priv->lock, flags);
612 	if (cm_id_priv->qp) {
613 		cm_id->device->iwcm->rem_ref(qp);
614 		cm_id_priv->qp = NULL;
615 	}
616 	cm_id_priv->state = IW_CM_STATE_IDLE;
617 err:
618 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
619 	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
620 	wake_up_all(&cm_id_priv->connect_wait);
621 	return ret;
622 }
623 EXPORT_SYMBOL(iw_cm_connect);
624 
625 /*
626  * Passive Side: new CM_ID <-- CONN_RECV
627  *
628  * Handles an inbound connect request. The function creates a new
629  * iw_cm_id to represent the new connection and inherits the client
630  * callback function and other attributes from the listening parent.
631  *
632  * The work item contains a pointer to the listen_cm_id and the event. The
633  * listen_cm_id contains the client cm_handler, context and
634  * device. These are copied when the device is cloned. The event
635  * contains the new four tuple.
636  *
637  * An error on the child should not affect the parent, so this
638  * function does not return a value.
639  */
640 static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
641 				struct iw_cm_event *iw_event)
642 {
643 	unsigned long flags;
644 	struct iw_cm_id *cm_id;
645 	struct iwcm_id_private *cm_id_priv;
646 	int ret;
647 
648 	/*
649 	 * The provider should never generate a connection request
650 	 * event with a bad status.
651 	 */
652 	BUG_ON(iw_event->status);
653 
654 	cm_id = iw_create_cm_id(listen_id_priv->id.device,
655 				listen_id_priv->id.cm_handler,
656 				listen_id_priv->id.context);
657 	/* If the cm_id could not be created, ignore the request */
658 	if (IS_ERR(cm_id))
659 		goto out;
660 
661 	cm_id->provider_data = iw_event->provider_data;
662 	cm_id->m_local_addr = iw_event->local_addr;
663 	cm_id->m_remote_addr = iw_event->remote_addr;
664 	cm_id->local_addr = listen_id_priv->id.local_addr;
665 	cm_id->remote_addr = iw_event->remote_addr;
666 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
667 	cm_id_priv->state = IW_CM_STATE_CONN_RECV;
668 
669 	/*
670 	 * We could be destroying the listening id. If so, ignore this
671 	 * upcall.
672 	 */
673 	spin_lock_irqsave(&listen_id_priv->lock, flags);
674 	if (listen_id_priv->state != IW_CM_STATE_LISTEN) {
675 		spin_unlock_irqrestore(&listen_id_priv->lock, flags);
676 		iw_cm_reject(cm_id, NULL, 0);
677 		iw_destroy_cm_id(cm_id);
678 		goto out;
679 	}
680 	spin_unlock_irqrestore(&listen_id_priv->lock, flags);
681 
682 	ret = alloc_work_entries(cm_id_priv, 3);
683 	if (ret) {
684 		iw_cm_reject(cm_id, NULL, 0);
685 		iw_destroy_cm_id(cm_id);
686 		goto out;
687 	}
688 
689 	/* Call the client CM handler */
690 	ret = cm_id->cm_handler(cm_id, iw_event);
691 	if (ret) {
692 		iw_cm_reject(cm_id, NULL, 0);
693 		iw_destroy_cm_id(cm_id);
694 	}
695 
696 out:
697 	if (iw_event->private_data_len)
698 		kfree(iw_event->private_data);
699 }
700 
701 /*
702  * Passive Side: CM_ID <-- ESTABLISHED
703  *
704  * The provider generated an ESTABLISHED event which means that
705  * the MPA negotion has completed successfully and we are now in MPA
706  * FPDU mode.
707  *
708  * This event can only be received in the CONN_RECV state. If the
709  * remote peer closed, the ESTABLISHED event would be received followed
710  * by the CLOSE event. If the app closes, it will block until we wake
711  * it up after processing this event.
712  */
713 static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv,
714 			       struct iw_cm_event *iw_event)
715 {
716 	unsigned long flags;
717 	int ret;
718 
719 	spin_lock_irqsave(&cm_id_priv->lock, flags);
720 
721 	/*
722 	 * We clear the CONNECT_WAIT bit here to allow the callback
723 	 * function to call iw_cm_disconnect. Calling iw_destroy_cm_id
724 	 * from a callback handler is not allowed.
725 	 */
726 	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
727 	BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
728 	cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
729 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
730 	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
731 	wake_up_all(&cm_id_priv->connect_wait);
732 
733 	return ret;
734 }
735 
736 /*
737  * Active Side: CM_ID <-- ESTABLISHED
738  *
739  * The app has called connect and is waiting for the established event to
740  * post it's requests to the server. This event will wake up anyone
741  * blocked in iw_cm_disconnect or iw_destroy_id.
742  */
743 static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
744 			       struct iw_cm_event *iw_event)
745 {
746 	unsigned long flags;
747 	int ret;
748 
749 	spin_lock_irqsave(&cm_id_priv->lock, flags);
750 	/*
751 	 * Clear the connect wait bit so a callback function calling
752 	 * iw_cm_disconnect will not wait and deadlock this thread
753 	 */
754 	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
755 	BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
756 	if (iw_event->status == 0) {
757 		cm_id_priv->id.m_local_addr = iw_event->local_addr;
758 		cm_id_priv->id.m_remote_addr = iw_event->remote_addr;
759 		iw_event->local_addr = cm_id_priv->id.local_addr;
760 		iw_event->remote_addr = cm_id_priv->id.remote_addr;
761 		cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
762 	} else {
763 		/* REJECTED or RESET */
764 		cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
765 		cm_id_priv->qp = NULL;
766 		cm_id_priv->state = IW_CM_STATE_IDLE;
767 	}
768 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
769 	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
770 
771 	if (iw_event->private_data_len)
772 		kfree(iw_event->private_data);
773 
774 	/* Wake up waiters on connect complete */
775 	wake_up_all(&cm_id_priv->connect_wait);
776 
777 	return ret;
778 }
779 
780 /*
781  * CM_ID <-- CLOSING
782  *
783  * If in the ESTABLISHED state, move to CLOSING.
784  */
785 static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv,
786 				  struct iw_cm_event *iw_event)
787 {
788 	unsigned long flags;
789 
790 	spin_lock_irqsave(&cm_id_priv->lock, flags);
791 	if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED)
792 		cm_id_priv->state = IW_CM_STATE_CLOSING;
793 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
794 }
795 
796 /*
797  * CM_ID <-- IDLE
798  *
799  * If in the ESTBLISHED or CLOSING states, the QP will have have been
800  * moved by the provider to the ERR state. Disassociate the CM_ID from
801  * the QP,  move to IDLE, and remove the 'connected' reference.
802  *
803  * If in some other state, the cm_id was destroyed asynchronously.
804  * This is the last reference that will result in waking up
805  * the app thread blocked in iw_destroy_cm_id.
806  */
807 static int cm_close_handler(struct iwcm_id_private *cm_id_priv,
808 				  struct iw_cm_event *iw_event)
809 {
810 	unsigned long flags;
811 	int ret = 0;
812 	spin_lock_irqsave(&cm_id_priv->lock, flags);
813 
814 	if (cm_id_priv->qp) {
815 		cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
816 		cm_id_priv->qp = NULL;
817 	}
818 	switch (cm_id_priv->state) {
819 	case IW_CM_STATE_ESTABLISHED:
820 	case IW_CM_STATE_CLOSING:
821 		cm_id_priv->state = IW_CM_STATE_IDLE;
822 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
823 		ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
824 		spin_lock_irqsave(&cm_id_priv->lock, flags);
825 		break;
826 	case IW_CM_STATE_DESTROYING:
827 		break;
828 	default:
829 		BUG();
830 	}
831 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
832 
833 	return ret;
834 }
835 
836 static int process_event(struct iwcm_id_private *cm_id_priv,
837 			 struct iw_cm_event *iw_event)
838 {
839 	int ret = 0;
840 
841 	switch (iw_event->event) {
842 	case IW_CM_EVENT_CONNECT_REQUEST:
843 		cm_conn_req_handler(cm_id_priv, iw_event);
844 		break;
845 	case IW_CM_EVENT_CONNECT_REPLY:
846 		ret = cm_conn_rep_handler(cm_id_priv, iw_event);
847 		break;
848 	case IW_CM_EVENT_ESTABLISHED:
849 		ret = cm_conn_est_handler(cm_id_priv, iw_event);
850 		break;
851 	case IW_CM_EVENT_DISCONNECT:
852 		cm_disconnect_handler(cm_id_priv, iw_event);
853 		break;
854 	case IW_CM_EVENT_CLOSE:
855 		ret = cm_close_handler(cm_id_priv, iw_event);
856 		break;
857 	default:
858 		BUG();
859 	}
860 
861 	return ret;
862 }
863 
864 /*
865  * Process events on the work_list for the cm_id. If the callback
866  * function requests that the cm_id be deleted, a flag is set in the
867  * cm_id flags to indicate that when the last reference is
868  * removed, the cm_id is to be destroyed. This is necessary to
869  * distinguish between an object that will be destroyed by the app
870  * thread asleep on the destroy_comp list vs. an object destroyed
871  * here synchronously when the last reference is removed.
872  */
873 static void cm_work_handler(struct work_struct *_work)
874 {
875 	struct iwcm_work *work = container_of(_work, struct iwcm_work, work);
876 	struct iw_cm_event levent;
877 	struct iwcm_id_private *cm_id_priv = work->cm_id;
878 	unsigned long flags;
879 	int empty;
880 	int ret = 0;
881 
882 	spin_lock_irqsave(&cm_id_priv->lock, flags);
883 	empty = list_empty(&cm_id_priv->work_list);
884 	while (!empty) {
885 		work = list_entry(cm_id_priv->work_list.next,
886 				  struct iwcm_work, list);
887 		list_del_init(&work->list);
888 		empty = list_empty(&cm_id_priv->work_list);
889 		levent = work->event;
890 		put_work(work);
891 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
892 
893 		if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) {
894 			ret = process_event(cm_id_priv, &levent);
895 			if (ret)
896 				destroy_cm_id(&cm_id_priv->id);
897 		} else
898 			pr_debug("dropping event %d\n", levent.event);
899 		if (iwcm_deref_id(cm_id_priv))
900 			return;
901 		if (empty)
902 			return;
903 		spin_lock_irqsave(&cm_id_priv->lock, flags);
904 	}
905 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
906 }
907 
908 /*
909  * This function is called on interrupt context. Schedule events on
910  * the iwcm_wq thread to allow callback functions to downcall into
911  * the CM and/or block.  Events are queued to a per-CM_ID
912  * work_list. If this is the first event on the work_list, the work
913  * element is also queued on the iwcm_wq thread.
914  *
915  * Each event holds a reference on the cm_id. Until the last posted
916  * event has been delivered and processed, the cm_id cannot be
917  * deleted.
918  *
919  * Returns:
920  * 	      0	- the event was handled.
921  *	-ENOMEM	- the event was not handled due to lack of resources.
922  */
923 static int cm_event_handler(struct iw_cm_id *cm_id,
924 			     struct iw_cm_event *iw_event)
925 {
926 	struct iwcm_work *work;
927 	struct iwcm_id_private *cm_id_priv;
928 	unsigned long flags;
929 	int ret = 0;
930 
931 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
932 
933 	spin_lock_irqsave(&cm_id_priv->lock, flags);
934 	work = get_work(cm_id_priv);
935 	if (!work) {
936 		ret = -ENOMEM;
937 		goto out;
938 	}
939 
940 	INIT_WORK(&work->work, cm_work_handler);
941 	work->cm_id = cm_id_priv;
942 	work->event = *iw_event;
943 
944 	if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST ||
945 	     work->event.event == IW_CM_EVENT_CONNECT_REPLY) &&
946 	    work->event.private_data_len) {
947 		ret = copy_private_data(&work->event);
948 		if (ret) {
949 			put_work(work);
950 			goto out;
951 		}
952 	}
953 
954 	atomic_inc(&cm_id_priv->refcount);
955 	if (list_empty(&cm_id_priv->work_list)) {
956 		list_add_tail(&work->list, &cm_id_priv->work_list);
957 		queue_work(iwcm_wq, &work->work);
958 	} else
959 		list_add_tail(&work->list, &cm_id_priv->work_list);
960 out:
961 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
962 	return ret;
963 }
964 
965 static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv,
966 				  struct ib_qp_attr *qp_attr,
967 				  int *qp_attr_mask)
968 {
969 	unsigned long flags;
970 	int ret;
971 
972 	spin_lock_irqsave(&cm_id_priv->lock, flags);
973 	switch (cm_id_priv->state) {
974 	case IW_CM_STATE_IDLE:
975 	case IW_CM_STATE_CONN_SENT:
976 	case IW_CM_STATE_CONN_RECV:
977 	case IW_CM_STATE_ESTABLISHED:
978 		*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
979 		qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE|
980 					   IB_ACCESS_REMOTE_READ;
981 		ret = 0;
982 		break;
983 	default:
984 		ret = -EINVAL;
985 		break;
986 	}
987 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
988 	return ret;
989 }
990 
991 static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv,
992 				  struct ib_qp_attr *qp_attr,
993 				  int *qp_attr_mask)
994 {
995 	unsigned long flags;
996 	int ret;
997 
998 	spin_lock_irqsave(&cm_id_priv->lock, flags);
999 	switch (cm_id_priv->state) {
1000 	case IW_CM_STATE_IDLE:
1001 	case IW_CM_STATE_CONN_SENT:
1002 	case IW_CM_STATE_CONN_RECV:
1003 	case IW_CM_STATE_ESTABLISHED:
1004 		*qp_attr_mask = 0;
1005 		ret = 0;
1006 		break;
1007 	default:
1008 		ret = -EINVAL;
1009 		break;
1010 	}
1011 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1012 	return ret;
1013 }
1014 
1015 int iw_cm_init_qp_attr(struct iw_cm_id *cm_id,
1016 		       struct ib_qp_attr *qp_attr,
1017 		       int *qp_attr_mask)
1018 {
1019 	struct iwcm_id_private *cm_id_priv;
1020 	int ret;
1021 
1022 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
1023 	switch (qp_attr->qp_state) {
1024 	case IB_QPS_INIT:
1025 	case IB_QPS_RTR:
1026 		ret = iwcm_init_qp_init_attr(cm_id_priv,
1027 					     qp_attr, qp_attr_mask);
1028 		break;
1029 	case IB_QPS_RTS:
1030 		ret = iwcm_init_qp_rts_attr(cm_id_priv,
1031 					    qp_attr, qp_attr_mask);
1032 		break;
1033 	default:
1034 		ret = -EINVAL;
1035 		break;
1036 	}
1037 	return ret;
1038 }
1039 EXPORT_SYMBOL(iw_cm_init_qp_attr);
1040 
1041 static int __init iw_cm_init(void)
1042 {
1043 	iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", WQ_MEM_RECLAIM);
1044 	if (!iwcm_wq)
1045 		return -ENOMEM;
1046 
1047 	return 0;
1048 }
1049 
1050 static void __exit iw_cm_cleanup(void)
1051 {
1052 	destroy_workqueue(iwcm_wq);
1053 }
1054 
1055 module_init_order(iw_cm_init, SI_ORDER_FIRST);
1056 module_exit_order(iw_cm_cleanup, SI_ORDER_FIRST);
1057