1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3  *
4  * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
5  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
6  * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
7  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
8  * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
9  * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
10  *
11  * This software is available to you under a choice of one of two
12  * licenses.  You may choose to be licensed under the terms of the GNU
13  * General Public License (GPL) Version 2, available from the file
14  * COPYING in the main directory of this source tree, or the
15  * OpenIB.org BSD license below:
16  *
17  *     Redistribution and use in source and binary forms, with or
18  *     without modification, are permitted provided that the following
19  *     conditions are met:
20  *
21  *      - Redistributions of source code must retain the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer.
24  *
25  *      - Redistributions in binary form must reproduce the above
26  *        copyright notice, this list of conditions and the following
27  *        disclaimer in the documentation and/or other materials
28  *        provided with the distribution.
29  *
30  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
31  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
33  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
34  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
35  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
36  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
37  * SOFTWARE.
38  *
39  * $FreeBSD$
40  */
41 
42 #include <linux/dma-mapping.h>
43 #include <linux/err.h>
44 #include <linux/idr.h>
45 #include <linux/interrupt.h>
46 #include <linux/rbtree.h>
47 #include <linux/sched.h>
48 #include <linux/spinlock.h>
49 #include <linux/workqueue.h>
50 #include <linux/completion.h>
51 #include <linux/slab.h>
52 #include <linux/module.h>
53 #include <linux/wait.h>
54 
55 #include <rdma/iw_cm.h>
56 #include <rdma/ib_addr.h>
57 #include <rdma/iw_portmap.h>
58 
59 #include "iwcm.h"
60 
61 MODULE_AUTHOR("Tom Tucker");
62 MODULE_DESCRIPTION("iWARP CM");
63 MODULE_LICENSE("Dual BSD/GPL");
64 
65 static struct workqueue_struct *iwcm_wq;
66 struct iwcm_work {
67 	struct work_struct work;
68 	struct iwcm_id_private *cm_id;
69 	struct list_head list;
70 	struct iw_cm_event event;
71 	struct list_head free_list;
72 };
73 
74 static unsigned int default_backlog = 256;
75 
76 /*
77  * The following services provide a mechanism for pre-allocating iwcm_work
78  * elements.  The design pre-allocates them  based on the cm_id type:
79  *	LISTENING IDS: 	Get enough elements preallocated to handle the
80  *			listen backlog.
81  *	ACTIVE IDS:	4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE
82  *	PASSIVE IDS:	3: ESTABLISHED, DISCONNECT, CLOSE
83  *
84  * Allocating them in connect and listen avoids having to deal
85  * with allocation failures on the event upcall from the provider (which
86  * is called in the interrupt context).
87  *
88  * One exception is when creating the cm_id for incoming connection requests.
89  * There are two cases:
90  * 1) in the event upcall, cm_event_handler(), for a listening cm_id.  If
91  *    the backlog is exceeded, then no more connection request events will
92  *    be processed.  cm_event_handler() returns -ENOMEM in this case.  Its up
93  *    to the provider to reject the connection request.
94  * 2) in the connection request workqueue handler, cm_conn_req_handler().
95  *    If work elements cannot be allocated for the new connect request cm_id,
96  *    then IWCM will call the provider reject method.  This is ok since
97  *    cm_conn_req_handler() runs in the workqueue thread context.
98  */
99 
100 static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv)
101 {
102 	struct iwcm_work *work;
103 
104 	if (list_empty(&cm_id_priv->work_free_list))
105 		return NULL;
106 	work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work,
107 			  free_list);
108 	list_del_init(&work->free_list);
109 	return work;
110 }
111 
112 static void put_work(struct iwcm_work *work)
113 {
114 	list_add(&work->free_list, &work->cm_id->work_free_list);
115 }
116 
117 static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv)
118 {
119 	struct list_head *e, *tmp;
120 
121 	list_for_each_safe(e, tmp, &cm_id_priv->work_free_list)
122 		kfree(list_entry(e, struct iwcm_work, free_list));
123 }
124 
125 static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count)
126 {
127 	struct iwcm_work *work;
128 
129 	BUG_ON(!list_empty(&cm_id_priv->work_free_list));
130 	while (count--) {
131 		work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL);
132 		if (!work) {
133 			dealloc_work_entries(cm_id_priv);
134 			return -ENOMEM;
135 		}
136 		work->cm_id = cm_id_priv;
137 		INIT_LIST_HEAD(&work->list);
138 		put_work(work);
139 	}
140 	return 0;
141 }
142 
143 /*
144  * Save private data from incoming connection requests to
145  * iw_cm_event, so the low level driver doesn't have to. Adjust
146  * the event ptr to point to the local copy.
147  */
148 static int copy_private_data(struct iw_cm_event *event)
149 {
150 	void *p;
151 
152 	p = kmemdup(event->private_data, event->private_data_len, GFP_ATOMIC);
153 	if (!p)
154 		return -ENOMEM;
155 	event->private_data = p;
156 	return 0;
157 }
158 
159 static void free_cm_id(struct iwcm_id_private *cm_id_priv)
160 {
161 	dealloc_work_entries(cm_id_priv);
162 	kfree(cm_id_priv);
163 }
164 
165 /*
166  * Release a reference on cm_id. If the last reference is being
167  * released, free the cm_id and return 1.
168  */
169 static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
170 {
171 	BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
172 	if (atomic_dec_and_test(&cm_id_priv->refcount)) {
173 		BUG_ON(!list_empty(&cm_id_priv->work_list));
174 		free_cm_id(cm_id_priv);
175 		return 1;
176 	}
177 
178 	return 0;
179 }
180 
181 static void add_ref(struct iw_cm_id *cm_id)
182 {
183 	struct iwcm_id_private *cm_id_priv;
184 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
185 	atomic_inc(&cm_id_priv->refcount);
186 }
187 
188 static void rem_ref(struct iw_cm_id *cm_id)
189 {
190 	struct iwcm_id_private *cm_id_priv;
191 
192 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
193 
194 	(void)iwcm_deref_id(cm_id_priv);
195 }
196 
197 static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event);
198 
199 struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
200 				 iw_cm_handler cm_handler,
201 				 void *context)
202 {
203 	struct iwcm_id_private *cm_id_priv;
204 
205 	cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL);
206 	if (!cm_id_priv)
207 		return ERR_PTR(-ENOMEM);
208 
209 	cm_id_priv->state = IW_CM_STATE_IDLE;
210 	cm_id_priv->id.device = device;
211 	cm_id_priv->id.cm_handler = cm_handler;
212 	cm_id_priv->id.context = context;
213 	cm_id_priv->id.event_handler = cm_event_handler;
214 	cm_id_priv->id.add_ref = add_ref;
215 	cm_id_priv->id.rem_ref = rem_ref;
216 	spin_lock_init(&cm_id_priv->lock);
217 	atomic_set(&cm_id_priv->refcount, 1);
218 	init_waitqueue_head(&cm_id_priv->connect_wait);
219 	init_completion(&cm_id_priv->destroy_comp);
220 	INIT_LIST_HEAD(&cm_id_priv->work_list);
221 	INIT_LIST_HEAD(&cm_id_priv->work_free_list);
222 
223 	return &cm_id_priv->id;
224 }
225 EXPORT_SYMBOL(iw_create_cm_id);
226 
227 
228 static int iwcm_modify_qp_err(struct ib_qp *qp)
229 {
230 	struct ib_qp_attr qp_attr;
231 
232 	if (!qp)
233 		return -EINVAL;
234 
235 	qp_attr.qp_state = IB_QPS_ERR;
236 	return ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
237 }
238 
239 /*
240  * This is really the RDMAC CLOSING state. It is most similar to the
241  * IB SQD QP state.
242  */
243 static int iwcm_modify_qp_sqd(struct ib_qp *qp)
244 {
245 	struct ib_qp_attr qp_attr;
246 
247 	BUG_ON(qp == NULL);
248 	qp_attr.qp_state = IB_QPS_SQD;
249 	return ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
250 }
251 
252 /*
253  * CM_ID <-- CLOSING
254  *
255  * Block if a passive or active connection is currently being processed. Then
256  * process the event as follows:
257  * - If we are ESTABLISHED, move to CLOSING and modify the QP state
258  *   based on the abrupt flag
259  * - If the connection is already in the CLOSING or IDLE state, the peer is
260  *   disconnecting concurrently with us and we've already seen the
261  *   DISCONNECT event -- ignore the request and return 0
262  * - Disconnect on a listening endpoint returns -EINVAL
263  */
264 int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt)
265 {
266 	struct iwcm_id_private *cm_id_priv;
267 	unsigned long flags;
268 	int ret = 0;
269 	struct ib_qp *qp = NULL;
270 
271 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
272 	/* Wait if we're currently in a connect or accept downcall */
273 	wait_event(cm_id_priv->connect_wait,
274 		   !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
275 
276 	spin_lock_irqsave(&cm_id_priv->lock, flags);
277 	switch (cm_id_priv->state) {
278 	case IW_CM_STATE_ESTABLISHED:
279 		cm_id_priv->state = IW_CM_STATE_CLOSING;
280 
281 		/* QP could be <nul> for user-mode client */
282 		if (cm_id_priv->qp)
283 			qp = cm_id_priv->qp;
284 		else
285 			ret = -EINVAL;
286 		break;
287 	case IW_CM_STATE_LISTEN:
288 		ret = -EINVAL;
289 		break;
290 	case IW_CM_STATE_CLOSING:
291 		/* remote peer closed first */
292 	case IW_CM_STATE_IDLE:
293 		/* accept or connect returned !0 */
294 		break;
295 	case IW_CM_STATE_CONN_RECV:
296 		/*
297 		 * App called disconnect before/without calling accept after
298 		 * connect_request event delivered.
299 		 */
300 		break;
301 	case IW_CM_STATE_CONN_SENT:
302 		/* Can only get here if wait above fails */
303 	default:
304 		BUG();
305 	}
306 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
307 
308 	if (qp) {
309 		if (abrupt)
310 			ret = iwcm_modify_qp_err(qp);
311 		else
312 			ret = iwcm_modify_qp_sqd(qp);
313 
314 		/*
315 		 * If both sides are disconnecting the QP could
316 		 * already be in ERR or SQD states
317 		 */
318 		ret = 0;
319 	}
320 
321 	return ret;
322 }
323 EXPORT_SYMBOL(iw_cm_disconnect);
324 
325 /*
326  * CM_ID <-- DESTROYING
327  *
328  * Clean up all resources associated with the connection and release
329  * the initial reference taken by iw_create_cm_id.
330  */
331 static void destroy_cm_id(struct iw_cm_id *cm_id)
332 {
333 	struct iwcm_id_private *cm_id_priv;
334 	unsigned long flags;
335 
336 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
337 	/*
338 	 * Wait if we're currently in a connect or accept downcall. A
339 	 * listening endpoint should never block here.
340 	 */
341 	wait_event(cm_id_priv->connect_wait,
342 		   !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
343 
344 	/*
345 	 * Since we're deleting the cm_id, drop any events that
346 	 * might arrive before the last dereference.
347 	 */
348 	set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags);
349 
350 	spin_lock_irqsave(&cm_id_priv->lock, flags);
351 	switch (cm_id_priv->state) {
352 	case IW_CM_STATE_LISTEN:
353 		cm_id_priv->state = IW_CM_STATE_DESTROYING;
354 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
355 		/* destroy the listening endpoint */
356 		cm_id->device->iwcm->destroy_listen(cm_id);
357 		spin_lock_irqsave(&cm_id_priv->lock, flags);
358 		break;
359 	case IW_CM_STATE_ESTABLISHED:
360 		cm_id_priv->state = IW_CM_STATE_DESTROYING;
361 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
362 		/* Abrupt close of the connection */
363 		(void)iwcm_modify_qp_err(cm_id_priv->qp);
364 		spin_lock_irqsave(&cm_id_priv->lock, flags);
365 		break;
366 	case IW_CM_STATE_IDLE:
367 	case IW_CM_STATE_CLOSING:
368 		cm_id_priv->state = IW_CM_STATE_DESTROYING;
369 		break;
370 	case IW_CM_STATE_CONN_RECV:
371 		/*
372 		 * App called destroy before/without calling accept after
373 		 * receiving connection request event notification or
374 		 * returned non zero from the event callback function.
375 		 * In either case, must tell the provider to reject.
376 		 */
377 		cm_id_priv->state = IW_CM_STATE_DESTROYING;
378 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
379 		cm_id->device->iwcm->reject(cm_id, NULL, 0);
380 		spin_lock_irqsave(&cm_id_priv->lock, flags);
381 		break;
382 	case IW_CM_STATE_CONN_SENT:
383 	case IW_CM_STATE_DESTROYING:
384 	default:
385 		BUG();
386 		break;
387 	}
388 	if (cm_id_priv->qp) {
389 		cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
390 		cm_id_priv->qp = NULL;
391 	}
392 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
393 
394 	(void)iwcm_deref_id(cm_id_priv);
395 }
396 
397 /*
398  * This function is only called by the application thread and cannot
399  * be called by the event thread. The function will wait for all
400  * references to be released on the cm_id and then kfree the cm_id
401  * object.
402  */
403 void iw_destroy_cm_id(struct iw_cm_id *cm_id)
404 {
405 	struct iwcm_id_private *cm_id_priv;
406 
407 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
408 	destroy_cm_id(cm_id);
409 }
410 EXPORT_SYMBOL(iw_destroy_cm_id);
411 
412 /**
413  * iw_cm_map - Use portmapper to map the ports
414  * @cm_id: connection manager pointer
415  * @active: Indicates the active side when true
416  * returns nonzero for error only if iwpm_create_mapinfo() fails
417  *
418  * Tries to add a mapping for a port using the Portmapper. If
419  * successful in mapping the IP/Port it will check the remote
420  * mapped IP address for a wildcard IP address and replace the
421  * zero IP address with the remote_addr.
422  */
423 static int iw_cm_map(struct iw_cm_id *cm_id, bool active)
424 {
425 	cm_id->m_local_addr = cm_id->local_addr;
426 	cm_id->m_remote_addr = cm_id->remote_addr;
427 
428 	return 0;
429 }
430 
431 /*
432  * CM_ID <-- LISTEN
433  *
434  * Start listening for connect requests. Generates one CONNECT_REQUEST
435  * event for each inbound connect request.
436  */
437 int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
438 {
439 	struct iwcm_id_private *cm_id_priv;
440 	unsigned long flags;
441 	int ret;
442 
443 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
444 
445 	if (!backlog)
446 		backlog = default_backlog;
447 
448 	ret = alloc_work_entries(cm_id_priv, backlog);
449 	if (ret)
450 		return ret;
451 
452 	spin_lock_irqsave(&cm_id_priv->lock, flags);
453 	switch (cm_id_priv->state) {
454 	case IW_CM_STATE_IDLE:
455 		cm_id_priv->state = IW_CM_STATE_LISTEN;
456 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
457 		ret = iw_cm_map(cm_id, false);
458 		if (!ret)
459 			ret = cm_id->device->iwcm->create_listen(cm_id, backlog);
460 		if (ret)
461 			cm_id_priv->state = IW_CM_STATE_IDLE;
462 		spin_lock_irqsave(&cm_id_priv->lock, flags);
463 		break;
464 	default:
465 		ret = -EINVAL;
466 	}
467 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
468 
469 	return ret;
470 }
471 EXPORT_SYMBOL(iw_cm_listen);
472 
473 /*
474  * CM_ID <-- IDLE
475  *
476  * Rejects an inbound connection request. No events are generated.
477  */
478 int iw_cm_reject(struct iw_cm_id *cm_id,
479 		 const void *private_data,
480 		 u8 private_data_len)
481 {
482 	struct iwcm_id_private *cm_id_priv;
483 	unsigned long flags;
484 	int ret;
485 
486 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
487 	set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
488 
489 	spin_lock_irqsave(&cm_id_priv->lock, flags);
490 	if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
491 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
492 		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
493 		wake_up_all(&cm_id_priv->connect_wait);
494 		return -EINVAL;
495 	}
496 	cm_id_priv->state = IW_CM_STATE_IDLE;
497 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
498 
499 	ret = cm_id->device->iwcm->reject(cm_id, private_data,
500 					  private_data_len);
501 
502 	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
503 	wake_up_all(&cm_id_priv->connect_wait);
504 
505 	return ret;
506 }
507 EXPORT_SYMBOL(iw_cm_reject);
508 
509 /*
510  * CM_ID <-- ESTABLISHED
511  *
512  * Accepts an inbound connection request and generates an ESTABLISHED
513  * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block
514  * until the ESTABLISHED event is received from the provider.
515  */
516 int iw_cm_accept(struct iw_cm_id *cm_id,
517 		 struct iw_cm_conn_param *iw_param)
518 {
519 	struct iwcm_id_private *cm_id_priv;
520 	struct ib_qp *qp;
521 	unsigned long flags;
522 	int ret;
523 
524 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
525 	set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
526 
527 	spin_lock_irqsave(&cm_id_priv->lock, flags);
528 	if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
529 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
530 		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
531 		wake_up_all(&cm_id_priv->connect_wait);
532 		return -EINVAL;
533 	}
534 	/* Get the ib_qp given the QPN */
535 	qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
536 	if (!qp) {
537 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
538 		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
539 		wake_up_all(&cm_id_priv->connect_wait);
540 		return -EINVAL;
541 	}
542 	cm_id->device->iwcm->add_ref(qp);
543 	cm_id_priv->qp = qp;
544 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
545 
546 	ret = cm_id->device->iwcm->accept(cm_id, iw_param);
547 	if (ret) {
548 		/* An error on accept precludes provider events */
549 		BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
550 		cm_id_priv->state = IW_CM_STATE_IDLE;
551 		spin_lock_irqsave(&cm_id_priv->lock, flags);
552 		if (cm_id_priv->qp) {
553 			cm_id->device->iwcm->rem_ref(qp);
554 			cm_id_priv->qp = NULL;
555 		}
556 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
557 		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
558 		wake_up_all(&cm_id_priv->connect_wait);
559 	}
560 
561 	return ret;
562 }
563 EXPORT_SYMBOL(iw_cm_accept);
564 
565 /*
566  * Active Side: CM_ID <-- CONN_SENT
567  *
568  * If successful, results in the generation of a CONNECT_REPLY
569  * event. iw_cm_disconnect and iw_cm_destroy will block until the
570  * CONNECT_REPLY event is received from the provider.
571  */
572 int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
573 {
574 	struct iwcm_id_private *cm_id_priv;
575 	int ret;
576 	unsigned long flags;
577 	struct ib_qp *qp;
578 
579 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
580 
581 	ret = alloc_work_entries(cm_id_priv, 4);
582 	if (ret)
583 		return ret;
584 
585 	set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
586 	spin_lock_irqsave(&cm_id_priv->lock, flags);
587 
588 	if (cm_id_priv->state != IW_CM_STATE_IDLE) {
589 		ret = -EINVAL;
590 		goto err;
591 	}
592 
593 	/* Get the ib_qp given the QPN */
594 	qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
595 	if (!qp) {
596 		ret = -EINVAL;
597 		goto err;
598 	}
599 	cm_id->device->iwcm->add_ref(qp);
600 	cm_id_priv->qp = qp;
601 	cm_id_priv->state = IW_CM_STATE_CONN_SENT;
602 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
603 
604 	ret = iw_cm_map(cm_id, true);
605 	if (!ret)
606 		ret = cm_id->device->iwcm->connect(cm_id, iw_param);
607 	if (!ret)
608 		return 0;	/* success */
609 
610 	spin_lock_irqsave(&cm_id_priv->lock, flags);
611 	if (cm_id_priv->qp) {
612 		cm_id->device->iwcm->rem_ref(qp);
613 		cm_id_priv->qp = NULL;
614 	}
615 	cm_id_priv->state = IW_CM_STATE_IDLE;
616 err:
617 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
618 	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
619 	wake_up_all(&cm_id_priv->connect_wait);
620 	return ret;
621 }
622 EXPORT_SYMBOL(iw_cm_connect);
623 
624 /*
625  * Passive Side: new CM_ID <-- CONN_RECV
626  *
627  * Handles an inbound connect request. The function creates a new
628  * iw_cm_id to represent the new connection and inherits the client
629  * callback function and other attributes from the listening parent.
630  *
631  * The work item contains a pointer to the listen_cm_id and the event. The
632  * listen_cm_id contains the client cm_handler, context and
633  * device. These are copied when the device is cloned. The event
634  * contains the new four tuple.
635  *
636  * An error on the child should not affect the parent, so this
637  * function does not return a value.
638  */
639 static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
640 				struct iw_cm_event *iw_event)
641 {
642 	unsigned long flags;
643 	struct iw_cm_id *cm_id;
644 	struct iwcm_id_private *cm_id_priv;
645 	int ret;
646 
647 	/*
648 	 * The provider should never generate a connection request
649 	 * event with a bad status.
650 	 */
651 	BUG_ON(iw_event->status);
652 
653 	cm_id = iw_create_cm_id(listen_id_priv->id.device,
654 				listen_id_priv->id.cm_handler,
655 				listen_id_priv->id.context);
656 	/* If the cm_id could not be created, ignore the request */
657 	if (IS_ERR(cm_id))
658 		goto out;
659 
660 	cm_id->provider_data = iw_event->provider_data;
661 	cm_id->m_local_addr = iw_event->local_addr;
662 	cm_id->m_remote_addr = iw_event->remote_addr;
663 	cm_id->local_addr = listen_id_priv->id.local_addr;
664 	cm_id->remote_addr = iw_event->remote_addr;
665 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
666 	cm_id_priv->state = IW_CM_STATE_CONN_RECV;
667 
668 	/*
669 	 * We could be destroying the listening id. If so, ignore this
670 	 * upcall.
671 	 */
672 	spin_lock_irqsave(&listen_id_priv->lock, flags);
673 	if (listen_id_priv->state != IW_CM_STATE_LISTEN) {
674 		spin_unlock_irqrestore(&listen_id_priv->lock, flags);
675 		iw_cm_reject(cm_id, NULL, 0);
676 		iw_destroy_cm_id(cm_id);
677 		goto out;
678 	}
679 	spin_unlock_irqrestore(&listen_id_priv->lock, flags);
680 
681 	ret = alloc_work_entries(cm_id_priv, 3);
682 	if (ret) {
683 		iw_cm_reject(cm_id, NULL, 0);
684 		iw_destroy_cm_id(cm_id);
685 		goto out;
686 	}
687 
688 	/* Call the client CM handler */
689 	ret = cm_id->cm_handler(cm_id, iw_event);
690 	if (ret) {
691 		iw_cm_reject(cm_id, NULL, 0);
692 		iw_destroy_cm_id(cm_id);
693 	}
694 
695 out:
696 	if (iw_event->private_data_len)
697 		kfree(iw_event->private_data);
698 }
699 
700 /*
701  * Passive Side: CM_ID <-- ESTABLISHED
702  *
703  * The provider generated an ESTABLISHED event which means that
704  * the MPA negotion has completed successfully and we are now in MPA
705  * FPDU mode.
706  *
707  * This event can only be received in the CONN_RECV state. If the
708  * remote peer closed, the ESTABLISHED event would be received followed
709  * by the CLOSE event. If the app closes, it will block until we wake
710  * it up after processing this event.
711  */
712 static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv,
713 			       struct iw_cm_event *iw_event)
714 {
715 	unsigned long flags;
716 	int ret;
717 
718 	spin_lock_irqsave(&cm_id_priv->lock, flags);
719 
720 	/*
721 	 * We clear the CONNECT_WAIT bit here to allow the callback
722 	 * function to call iw_cm_disconnect. Calling iw_destroy_cm_id
723 	 * from a callback handler is not allowed.
724 	 */
725 	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
726 	BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
727 	cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
728 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
729 	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
730 	wake_up_all(&cm_id_priv->connect_wait);
731 
732 	return ret;
733 }
734 
735 /*
736  * Active Side: CM_ID <-- ESTABLISHED
737  *
738  * The app has called connect and is waiting for the established event to
739  * post it's requests to the server. This event will wake up anyone
740  * blocked in iw_cm_disconnect or iw_destroy_id.
741  */
742 static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
743 			       struct iw_cm_event *iw_event)
744 {
745 	unsigned long flags;
746 	int ret;
747 
748 	spin_lock_irqsave(&cm_id_priv->lock, flags);
749 	/*
750 	 * Clear the connect wait bit so a callback function calling
751 	 * iw_cm_disconnect will not wait and deadlock this thread
752 	 */
753 	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
754 	BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
755 	if (iw_event->status == 0) {
756 		cm_id_priv->id.m_local_addr = iw_event->local_addr;
757 		cm_id_priv->id.m_remote_addr = iw_event->remote_addr;
758 		iw_event->local_addr = cm_id_priv->id.local_addr;
759 		iw_event->remote_addr = cm_id_priv->id.remote_addr;
760 		cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
761 	} else {
762 		/* REJECTED or RESET */
763 		cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
764 		cm_id_priv->qp = NULL;
765 		cm_id_priv->state = IW_CM_STATE_IDLE;
766 	}
767 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
768 	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
769 
770 	if (iw_event->private_data_len)
771 		kfree(iw_event->private_data);
772 
773 	/* Wake up waiters on connect complete */
774 	wake_up_all(&cm_id_priv->connect_wait);
775 
776 	return ret;
777 }
778 
779 /*
780  * CM_ID <-- CLOSING
781  *
782  * If in the ESTABLISHED state, move to CLOSING.
783  */
784 static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv,
785 				  struct iw_cm_event *iw_event)
786 {
787 	unsigned long flags;
788 
789 	spin_lock_irqsave(&cm_id_priv->lock, flags);
790 	if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED)
791 		cm_id_priv->state = IW_CM_STATE_CLOSING;
792 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
793 }
794 
795 /*
796  * CM_ID <-- IDLE
797  *
798  * If in the ESTBLISHED or CLOSING states, the QP will have have been
799  * moved by the provider to the ERR state. Disassociate the CM_ID from
800  * the QP,  move to IDLE, and remove the 'connected' reference.
801  *
802  * If in some other state, the cm_id was destroyed asynchronously.
803  * This is the last reference that will result in waking up
804  * the app thread blocked in iw_destroy_cm_id.
805  */
806 static int cm_close_handler(struct iwcm_id_private *cm_id_priv,
807 				  struct iw_cm_event *iw_event)
808 {
809 	unsigned long flags;
810 	int ret = 0;
811 	spin_lock_irqsave(&cm_id_priv->lock, flags);
812 
813 	if (cm_id_priv->qp) {
814 		cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
815 		cm_id_priv->qp = NULL;
816 	}
817 	switch (cm_id_priv->state) {
818 	case IW_CM_STATE_ESTABLISHED:
819 	case IW_CM_STATE_CLOSING:
820 		cm_id_priv->state = IW_CM_STATE_IDLE;
821 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
822 		ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
823 		spin_lock_irqsave(&cm_id_priv->lock, flags);
824 		break;
825 	case IW_CM_STATE_DESTROYING:
826 		break;
827 	default:
828 		BUG();
829 	}
830 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
831 
832 	return ret;
833 }
834 
835 static int process_event(struct iwcm_id_private *cm_id_priv,
836 			 struct iw_cm_event *iw_event)
837 {
838 	int ret = 0;
839 
840 	switch (iw_event->event) {
841 	case IW_CM_EVENT_CONNECT_REQUEST:
842 		cm_conn_req_handler(cm_id_priv, iw_event);
843 		break;
844 	case IW_CM_EVENT_CONNECT_REPLY:
845 		ret = cm_conn_rep_handler(cm_id_priv, iw_event);
846 		break;
847 	case IW_CM_EVENT_ESTABLISHED:
848 		ret = cm_conn_est_handler(cm_id_priv, iw_event);
849 		break;
850 	case IW_CM_EVENT_DISCONNECT:
851 		cm_disconnect_handler(cm_id_priv, iw_event);
852 		break;
853 	case IW_CM_EVENT_CLOSE:
854 		ret = cm_close_handler(cm_id_priv, iw_event);
855 		break;
856 	default:
857 		BUG();
858 	}
859 
860 	return ret;
861 }
862 
863 /*
864  * Process events on the work_list for the cm_id. If the callback
865  * function requests that the cm_id be deleted, a flag is set in the
866  * cm_id flags to indicate that when the last reference is
867  * removed, the cm_id is to be destroyed. This is necessary to
868  * distinguish between an object that will be destroyed by the app
869  * thread asleep on the destroy_comp list vs. an object destroyed
870  * here synchronously when the last reference is removed.
871  */
872 static void cm_work_handler(struct work_struct *_work)
873 {
874 	struct iwcm_work *work = container_of(_work, struct iwcm_work, work);
875 	struct iw_cm_event levent;
876 	struct iwcm_id_private *cm_id_priv = work->cm_id;
877 	unsigned long flags;
878 	int empty;
879 	int ret = 0;
880 
881 	spin_lock_irqsave(&cm_id_priv->lock, flags);
882 	empty = list_empty(&cm_id_priv->work_list);
883 	while (!empty) {
884 		work = list_entry(cm_id_priv->work_list.next,
885 				  struct iwcm_work, list);
886 		list_del_init(&work->list);
887 		empty = list_empty(&cm_id_priv->work_list);
888 		levent = work->event;
889 		put_work(work);
890 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
891 
892 		if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) {
893 			ret = process_event(cm_id_priv, &levent);
894 			if (ret)
895 				destroy_cm_id(&cm_id_priv->id);
896 		} else
897 			pr_debug("dropping event %d\n", levent.event);
898 		if (iwcm_deref_id(cm_id_priv))
899 			return;
900 		if (empty)
901 			return;
902 		spin_lock_irqsave(&cm_id_priv->lock, flags);
903 	}
904 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
905 }
906 
907 /*
908  * This function is called on interrupt context. Schedule events on
909  * the iwcm_wq thread to allow callback functions to downcall into
910  * the CM and/or block.  Events are queued to a per-CM_ID
911  * work_list. If this is the first event on the work_list, the work
912  * element is also queued on the iwcm_wq thread.
913  *
914  * Each event holds a reference on the cm_id. Until the last posted
915  * event has been delivered and processed, the cm_id cannot be
916  * deleted.
917  *
918  * Returns:
919  * 	      0	- the event was handled.
920  *	-ENOMEM	- the event was not handled due to lack of resources.
921  */
922 static int cm_event_handler(struct iw_cm_id *cm_id,
923 			     struct iw_cm_event *iw_event)
924 {
925 	struct iwcm_work *work;
926 	struct iwcm_id_private *cm_id_priv;
927 	unsigned long flags;
928 	int ret = 0;
929 
930 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
931 
932 	spin_lock_irqsave(&cm_id_priv->lock, flags);
933 	work = get_work(cm_id_priv);
934 	if (!work) {
935 		ret = -ENOMEM;
936 		goto out;
937 	}
938 
939 	INIT_WORK(&work->work, cm_work_handler);
940 	work->cm_id = cm_id_priv;
941 	work->event = *iw_event;
942 
943 	if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST ||
944 	     work->event.event == IW_CM_EVENT_CONNECT_REPLY) &&
945 	    work->event.private_data_len) {
946 		ret = copy_private_data(&work->event);
947 		if (ret) {
948 			put_work(work);
949 			goto out;
950 		}
951 	}
952 
953 	atomic_inc(&cm_id_priv->refcount);
954 	if (list_empty(&cm_id_priv->work_list)) {
955 		list_add_tail(&work->list, &cm_id_priv->work_list);
956 		queue_work(iwcm_wq, &work->work);
957 	} else
958 		list_add_tail(&work->list, &cm_id_priv->work_list);
959 out:
960 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
961 	return ret;
962 }
963 
964 static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv,
965 				  struct ib_qp_attr *qp_attr,
966 				  int *qp_attr_mask)
967 {
968 	unsigned long flags;
969 	int ret;
970 
971 	spin_lock_irqsave(&cm_id_priv->lock, flags);
972 	switch (cm_id_priv->state) {
973 	case IW_CM_STATE_IDLE:
974 	case IW_CM_STATE_CONN_SENT:
975 	case IW_CM_STATE_CONN_RECV:
976 	case IW_CM_STATE_ESTABLISHED:
977 		*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
978 		qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE|
979 					   IB_ACCESS_REMOTE_READ;
980 		ret = 0;
981 		break;
982 	default:
983 		ret = -EINVAL;
984 		break;
985 	}
986 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
987 	return ret;
988 }
989 
990 static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv,
991 				  struct ib_qp_attr *qp_attr,
992 				  int *qp_attr_mask)
993 {
994 	unsigned long flags;
995 	int ret;
996 
997 	spin_lock_irqsave(&cm_id_priv->lock, flags);
998 	switch (cm_id_priv->state) {
999 	case IW_CM_STATE_IDLE:
1000 	case IW_CM_STATE_CONN_SENT:
1001 	case IW_CM_STATE_CONN_RECV:
1002 	case IW_CM_STATE_ESTABLISHED:
1003 		*qp_attr_mask = 0;
1004 		ret = 0;
1005 		break;
1006 	default:
1007 		ret = -EINVAL;
1008 		break;
1009 	}
1010 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1011 	return ret;
1012 }
1013 
1014 int iw_cm_init_qp_attr(struct iw_cm_id *cm_id,
1015 		       struct ib_qp_attr *qp_attr,
1016 		       int *qp_attr_mask)
1017 {
1018 	struct iwcm_id_private *cm_id_priv;
1019 	int ret;
1020 
1021 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
1022 	switch (qp_attr->qp_state) {
1023 	case IB_QPS_INIT:
1024 	case IB_QPS_RTR:
1025 		ret = iwcm_init_qp_init_attr(cm_id_priv,
1026 					     qp_attr, qp_attr_mask);
1027 		break;
1028 	case IB_QPS_RTS:
1029 		ret = iwcm_init_qp_rts_attr(cm_id_priv,
1030 					    qp_attr, qp_attr_mask);
1031 		break;
1032 	default:
1033 		ret = -EINVAL;
1034 		break;
1035 	}
1036 	return ret;
1037 }
1038 EXPORT_SYMBOL(iw_cm_init_qp_attr);
1039 
1040 static int __init iw_cm_init(void)
1041 {
1042 	iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", WQ_MEM_RECLAIM);
1043 	if (!iwcm_wq)
1044 		return -ENOMEM;
1045 
1046 	return 0;
1047 }
1048 
1049 static void __exit iw_cm_cleanup(void)
1050 {
1051 	destroy_workqueue(iwcm_wq);
1052 }
1053 
1054 module_init(iw_cm_init);
1055 module_exit(iw_cm_cleanup);
1056