xref: /freebsd/sys/ofed/drivers/infiniband/core/ib_cm.c (revision e3aa18ad)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3  *
4  * Copyright (c) 2004-2007 Intel Corporation.  All rights reserved.
5  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
6  * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
7  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
8  *
9  * This software is available to you under a choice of one of two
10  * licenses.  You may choose to be licensed under the terms of the GNU
11  * General Public License (GPL) Version 2, available from the file
12  * COPYING in the main directory of this source tree, or the
13  * OpenIB.org BSD license below:
14  *
15  *     Redistribution and use in source and binary forms, with or
16  *     without modification, are permitted provided that the following
17  *     conditions are met:
18  *
19  *      - Redistributions of source code must retain the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer.
22  *
23  *      - Redistributions in binary form must reproduce the above
24  *        copyright notice, this list of conditions and the following
25  *        disclaimer in the documentation and/or other materials
26  *        provided with the distribution.
27  *
28  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
29  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
30  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
31  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
32  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
33  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
34  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35  * SOFTWARE.
36  */
37 
38 #include <sys/cdefs.h>
39 __FBSDID("$FreeBSD$");
40 
41 #include <linux/completion.h>
42 #include <linux/dma-mapping.h>
43 #include <linux/device.h>
44 #include <linux/module.h>
45 #include <linux/err.h>
46 #include <linux/idr.h>
47 #include <linux/interrupt.h>
48 #include <linux/random.h>
49 #include <linux/rbtree.h>
50 #include <linux/spinlock.h>
51 #include <linux/slab.h>
52 #include <linux/sysfs.h>
53 #include <linux/workqueue.h>
54 #include <linux/kdev_t.h>
55 #include <linux/etherdevice.h>
56 
57 #include <asm/atomic-long.h>
58 
59 #include <rdma/ib_addr.h>
60 #include <rdma/ib_cache.h>
61 #include <rdma/ib_cm.h>
62 #include "cm_msgs.h"
63 #include "core_priv.h"
64 
65 MODULE_AUTHOR("Sean Hefty");
66 MODULE_DESCRIPTION("InfiniBand CM");
67 MODULE_LICENSE("Dual BSD/GPL");
68 
69 static const char * const ibcm_rej_reason_strs[] = {
70 	[IB_CM_REJ_NO_QP]			= "no QP",
71 	[IB_CM_REJ_NO_EEC]			= "no EEC",
72 	[IB_CM_REJ_NO_RESOURCES]		= "no resources",
73 	[IB_CM_REJ_TIMEOUT]			= "timeout",
74 	[IB_CM_REJ_UNSUPPORTED]			= "unsupported",
75 	[IB_CM_REJ_INVALID_COMM_ID]		= "invalid comm ID",
76 	[IB_CM_REJ_INVALID_COMM_INSTANCE]	= "invalid comm instance",
77 	[IB_CM_REJ_INVALID_SERVICE_ID]		= "invalid service ID",
78 	[IB_CM_REJ_INVALID_TRANSPORT_TYPE]	= "invalid transport type",
79 	[IB_CM_REJ_STALE_CONN]			= "stale conn",
80 	[IB_CM_REJ_RDC_NOT_EXIST]		= "RDC not exist",
81 	[IB_CM_REJ_INVALID_GID]			= "invalid GID",
82 	[IB_CM_REJ_INVALID_LID]			= "invalid LID",
83 	[IB_CM_REJ_INVALID_SL]			= "invalid SL",
84 	[IB_CM_REJ_INVALID_TRAFFIC_CLASS]	= "invalid traffic class",
85 	[IB_CM_REJ_INVALID_HOP_LIMIT]		= "invalid hop limit",
86 	[IB_CM_REJ_INVALID_PACKET_RATE]		= "invalid packet rate",
87 	[IB_CM_REJ_INVALID_ALT_GID]		= "invalid alt GID",
88 	[IB_CM_REJ_INVALID_ALT_LID]		= "invalid alt LID",
89 	[IB_CM_REJ_INVALID_ALT_SL]		= "invalid alt SL",
90 	[IB_CM_REJ_INVALID_ALT_TRAFFIC_CLASS]	= "invalid alt traffic class",
91 	[IB_CM_REJ_INVALID_ALT_HOP_LIMIT]	= "invalid alt hop limit",
92 	[IB_CM_REJ_INVALID_ALT_PACKET_RATE]	= "invalid alt packet rate",
93 	[IB_CM_REJ_PORT_CM_REDIRECT]		= "port CM redirect",
94 	[IB_CM_REJ_PORT_REDIRECT]		= "port redirect",
95 	[IB_CM_REJ_INVALID_MTU]			= "invalid MTU",
96 	[IB_CM_REJ_INSUFFICIENT_RESP_RESOURCES]	= "insufficient resp resources",
97 	[IB_CM_REJ_CONSUMER_DEFINED]		= "consumer defined",
98 	[IB_CM_REJ_INVALID_RNR_RETRY]		= "invalid RNR retry",
99 	[IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID]	= "duplicate local comm ID",
100 	[IB_CM_REJ_INVALID_CLASS_VERSION]	= "invalid class version",
101 	[IB_CM_REJ_INVALID_FLOW_LABEL]		= "invalid flow label",
102 	[IB_CM_REJ_INVALID_ALT_FLOW_LABEL]	= "invalid alt flow label",
103 };
104 
105 const char *__attribute_const__ ibcm_reject_msg(int reason)
106 {
107 	size_t index = reason;
108 
109 	if (index < ARRAY_SIZE(ibcm_rej_reason_strs) &&
110 	    ibcm_rej_reason_strs[index])
111 		return ibcm_rej_reason_strs[index];
112 	else
113 		return "unrecognized reason";
114 }
115 EXPORT_SYMBOL(ibcm_reject_msg);
116 
117 static void cm_add_one(struct ib_device *device);
118 static void cm_remove_one(struct ib_device *device, void *client_data);
119 
120 static struct ib_client cm_client = {
121 	.name   = "cm",
122 	.add    = cm_add_one,
123 	.remove = cm_remove_one
124 };
125 
126 static struct ib_cm {
127 	spinlock_t lock;
128 	struct list_head device_list;
129 	rwlock_t device_lock;
130 	struct rb_root listen_service_table;
131 	u64 listen_service_id;
132 	/* struct rb_root peer_service_table; todo: fix peer to peer */
133 	struct rb_root remote_qp_table;
134 	struct rb_root remote_id_table;
135 	struct rb_root remote_sidr_table;
136 	struct idr local_id_table;
137 	__be32 random_id_operand;
138 	struct list_head timewait_list;
139 	struct workqueue_struct *wq;
140 	/* Sync on cm change port state */
141 	spinlock_t state_lock;
142 } cm;
143 
144 /* Counter indexes ordered by attribute ID */
145 enum {
146 	CM_REQ_COUNTER,
147 	CM_MRA_COUNTER,
148 	CM_REJ_COUNTER,
149 	CM_REP_COUNTER,
150 	CM_RTU_COUNTER,
151 	CM_DREQ_COUNTER,
152 	CM_DREP_COUNTER,
153 	CM_SIDR_REQ_COUNTER,
154 	CM_SIDR_REP_COUNTER,
155 	CM_LAP_COUNTER,
156 	CM_APR_COUNTER,
157 	CM_ATTR_COUNT,
158 	CM_ATTR_ID_OFFSET = 0x0010,
159 };
160 
161 enum {
162 	CM_XMIT,
163 	CM_XMIT_RETRIES,
164 	CM_RECV,
165 	CM_RECV_DUPLICATES,
166 	CM_COUNTER_GROUPS
167 };
168 
169 static char const counter_group_names[CM_COUNTER_GROUPS]
170 				     [sizeof("cm_rx_duplicates")] = {
171 	"cm_tx_msgs", "cm_tx_retries",
172 	"cm_rx_msgs", "cm_rx_duplicates"
173 };
174 
175 struct cm_counter_group {
176 	struct kobject obj;
177 	atomic_long_t counter[CM_ATTR_COUNT];
178 };
179 
180 struct cm_counter_attribute {
181 	struct attribute attr;
182 	int index;
183 };
184 
185 #define CM_COUNTER_ATTR(_name, _index) \
186 struct cm_counter_attribute cm_##_name##_counter_attr = { \
187 	.attr = { .name = __stringify(_name), .mode = 0444 }, \
188 	.index = _index \
189 }
190 
191 static CM_COUNTER_ATTR(req, CM_REQ_COUNTER);
192 static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER);
193 static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER);
194 static CM_COUNTER_ATTR(rep, CM_REP_COUNTER);
195 static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER);
196 static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER);
197 static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER);
198 static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER);
199 static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER);
200 static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER);
201 static CM_COUNTER_ATTR(apr, CM_APR_COUNTER);
202 
203 static struct attribute *cm_counter_default_attrs[] = {
204 	&cm_req_counter_attr.attr,
205 	&cm_mra_counter_attr.attr,
206 	&cm_rej_counter_attr.attr,
207 	&cm_rep_counter_attr.attr,
208 	&cm_rtu_counter_attr.attr,
209 	&cm_dreq_counter_attr.attr,
210 	&cm_drep_counter_attr.attr,
211 	&cm_sidr_req_counter_attr.attr,
212 	&cm_sidr_rep_counter_attr.attr,
213 	&cm_lap_counter_attr.attr,
214 	&cm_apr_counter_attr.attr,
215 	NULL
216 };
217 
218 struct cm_port {
219 	struct cm_device *cm_dev;
220 	struct ib_mad_agent *mad_agent;
221 	struct kobject port_obj;
222 	u8 port_num;
223 	struct list_head cm_priv_prim_list;
224 	struct list_head cm_priv_altr_list;
225 	struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
226 };
227 
228 struct cm_device {
229 	struct list_head list;
230 	struct ib_device *ib_device;
231 	u8 ack_delay;
232 	int going_down;
233 	struct cm_port *port[0];
234 };
235 
236 struct cm_av {
237 	struct cm_port *port;
238 	union ib_gid dgid;
239 	struct ib_ah_attr ah_attr;
240 	u16 pkey_index;
241 	u8 timeout;
242 };
243 
244 struct cm_work {
245 	struct delayed_work work;
246 	struct list_head list;
247 	struct cm_port *port;
248 	struct ib_mad_recv_wc *mad_recv_wc;	/* Received MADs */
249 	__be32 local_id;			/* Established / timewait */
250 	__be32 remote_id;
251 	struct ib_cm_event cm_event;
252 	struct ib_sa_path_rec path[0];
253 };
254 
255 struct cm_timewait_info {
256 	struct cm_work work;			/* Must be first. */
257 	struct list_head list;
258 	struct rb_node remote_qp_node;
259 	struct rb_node remote_id_node;
260 	__be64 remote_ca_guid;
261 	__be32 remote_qpn;
262 	u8 inserted_remote_qp;
263 	u8 inserted_remote_id;
264 };
265 
266 struct cm_id_private {
267 	struct ib_cm_id	id;
268 
269 	struct rb_node service_node;
270 	struct rb_node sidr_id_node;
271 	spinlock_t lock;	/* Do not acquire inside cm.lock */
272 	struct completion comp;
273 	atomic_t refcount;
274 	/* Number of clients sharing this ib_cm_id. Only valid for listeners.
275 	 * Protected by the cm.lock spinlock. */
276 	int listen_sharecount;
277 
278 	struct ib_mad_send_buf *msg;
279 	struct cm_timewait_info *timewait_info;
280 	/* todo: use alternate port on send failure */
281 	struct cm_av av;
282 	struct cm_av alt_av;
283 
284 	void *private_data;
285 	__be64 tid;
286 	__be32 local_qpn;
287 	__be32 remote_qpn;
288 	enum ib_qp_type qp_type;
289 	__be32 sq_psn;
290 	__be32 rq_psn;
291 	int timeout_ms;
292 	enum ib_mtu path_mtu;
293 	__be16 pkey;
294 	u8 private_data_len;
295 	u8 max_cm_retries;
296 	u8 peer_to_peer;
297 	u8 responder_resources;
298 	u8 initiator_depth;
299 	u8 retry_count;
300 	u8 rnr_retry_count;
301 	u8 service_timeout;
302 	u8 target_ack_delay;
303 
304 	struct list_head prim_list;
305 	struct list_head altr_list;
306 	/* Indicates that the send port mad is registered and av is set */
307 	int prim_send_port_not_ready;
308 	int altr_send_port_not_ready;
309 
310 	struct list_head work_list;
311 	atomic_t work_count;
312 };
313 
314 static void cm_work_handler(struct work_struct *work);
315 
316 static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
317 {
318 	if (atomic_dec_and_test(&cm_id_priv->refcount))
319 		complete(&cm_id_priv->comp);
320 }
321 
322 static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
323 			struct ib_mad_send_buf **msg)
324 {
325 	struct ib_mad_agent *mad_agent;
326 	struct ib_mad_send_buf *m;
327 	struct ib_ah *ah;
328 	struct cm_av *av;
329 	unsigned long flags, flags2;
330 	int ret = 0;
331 
332 	/* don't let the port to be released till the agent is down */
333 	spin_lock_irqsave(&cm.state_lock, flags2);
334 	spin_lock_irqsave(&cm.lock, flags);
335 	if (!cm_id_priv->prim_send_port_not_ready)
336 		av = &cm_id_priv->av;
337 	else if (!cm_id_priv->altr_send_port_not_ready &&
338 		 (cm_id_priv->alt_av.port))
339 		av = &cm_id_priv->alt_av;
340 	else {
341 		pr_info("%s: not valid CM id\n", __func__);
342 		ret = -ENODEV;
343 		spin_unlock_irqrestore(&cm.lock, flags);
344 		goto out;
345 	}
346 	spin_unlock_irqrestore(&cm.lock, flags);
347 	/* Make sure the port haven't released the mad yet */
348 	mad_agent = cm_id_priv->av.port->mad_agent;
349 	if (!mad_agent) {
350 		pr_info("%s: not a valid MAD agent\n", __func__);
351 		ret = -ENODEV;
352 		goto out;
353 	}
354 	ah = ib_create_ah(mad_agent->qp->pd, &av->ah_attr, 0);
355 	if (IS_ERR(ah)) {
356 		ret = PTR_ERR(ah);
357 		goto out;
358 	}
359 
360 	m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
361 			       av->pkey_index,
362 			       0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
363 			       GFP_ATOMIC,
364 			       IB_MGMT_BASE_VERSION);
365 	if (IS_ERR(m)) {
366 		ib_destroy_ah(ah, 0);
367 		ret = PTR_ERR(m);
368 		goto out;
369 	}
370 
371 	/* Timeout set by caller if response is expected. */
372 	m->ah = ah;
373 	m->retries = cm_id_priv->max_cm_retries;
374 
375 	atomic_inc(&cm_id_priv->refcount);
376 	m->context[0] = cm_id_priv;
377 	*msg = m;
378 
379 out:
380 	spin_unlock_irqrestore(&cm.state_lock, flags2);
381 	return ret;
382 }
383 
384 static struct ib_mad_send_buf *cm_alloc_response_msg_no_ah(struct cm_port *port,
385 							   struct ib_mad_recv_wc *mad_recv_wc)
386 {
387 	return ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
388 				  0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
389 				  GFP_ATOMIC,
390 				  IB_MGMT_BASE_VERSION);
391 }
392 
393 static int cm_create_response_msg_ah(struct cm_port *port,
394 				     struct ib_mad_recv_wc *mad_recv_wc,
395 				     struct ib_mad_send_buf *msg)
396 {
397 	struct ib_ah *ah;
398 
399 	ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc,
400 				  mad_recv_wc->recv_buf.grh, port->port_num);
401 	if (IS_ERR(ah))
402 		return PTR_ERR(ah);
403 
404 	msg->ah = ah;
405 	return 0;
406 }
407 
408 static void cm_free_msg(struct ib_mad_send_buf *msg)
409 {
410 	if (msg->ah)
411 		ib_destroy_ah(msg->ah, 0);
412 	if (msg->context[0])
413 		cm_deref_id(msg->context[0]);
414 	ib_free_send_mad(msg);
415 }
416 
417 static int cm_alloc_response_msg(struct cm_port *port,
418 				 struct ib_mad_recv_wc *mad_recv_wc,
419 				 struct ib_mad_send_buf **msg)
420 {
421 	struct ib_mad_send_buf *m;
422 	int ret;
423 
424 	m = cm_alloc_response_msg_no_ah(port, mad_recv_wc);
425 	if (IS_ERR(m))
426 		return PTR_ERR(m);
427 
428 	ret = cm_create_response_msg_ah(port, mad_recv_wc, m);
429 	if (ret) {
430 		cm_free_msg(m);
431 		return ret;
432 	}
433 
434 	*msg = m;
435 	return 0;
436 }
437 
438 static void * cm_copy_private_data(const void *private_data,
439 				   u8 private_data_len)
440 {
441 	void *data;
442 
443 	if (!private_data || !private_data_len)
444 		return NULL;
445 
446 	data = kmemdup(private_data, private_data_len, GFP_KERNEL);
447 	if (!data)
448 		return ERR_PTR(-ENOMEM);
449 
450 	return data;
451 }
452 
453 static void cm_set_private_data(struct cm_id_private *cm_id_priv,
454 				 void *private_data, u8 private_data_len)
455 {
456 	if (cm_id_priv->private_data && cm_id_priv->private_data_len)
457 		kfree(cm_id_priv->private_data);
458 
459 	cm_id_priv->private_data = private_data;
460 	cm_id_priv->private_data_len = private_data_len;
461 }
462 
463 static int cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc,
464 			      struct ib_grh *grh, struct cm_av *av)
465 {
466 	struct ib_ah_attr new_ah_attr;
467 	int ret;
468 
469 	av->port = port;
470 	av->pkey_index = wc->pkey_index;
471 
472 	/*
473 	 * av->ah_attr might be initialized based on past wc during incoming
474 	 * connect request or while sending out connect request. So initialize
475 	 * a new ah_attr on stack. If initialization fails, old ah_attr is
476 	 * used for sending any responses. If initialization is successful,
477 	 * than new ah_attr is used by overwriting old one.
478 	 */
479 	ret = ib_init_ah_from_wc(port->cm_dev->ib_device,
480 				 port->port_num, wc,
481 				 grh, &new_ah_attr);
482 	if (ret)
483 		return ret;
484 
485 	memcpy(&av->ah_attr, &new_ah_attr, sizeof(new_ah_attr));
486 	return 0;
487 }
488 
489 static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
490 				   struct ib_grh *grh, struct cm_av *av)
491 {
492 	av->port = port;
493 	av->pkey_index = wc->pkey_index;
494 	return ib_init_ah_from_wc(port->cm_dev->ib_device, port->port_num, wc,
495 				  grh, &av->ah_attr);
496 }
497 
498 static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av,
499 			      struct cm_id_private *cm_id_priv)
500 {
501 	struct cm_device *cm_dev;
502 	struct cm_port *port = NULL;
503 	unsigned long flags;
504 	int ret;
505 	u8 p;
506 	struct ifnet *ndev = ib_get_ndev_from_path(path);
507 
508 	read_lock_irqsave(&cm.device_lock, flags);
509 	list_for_each_entry(cm_dev, &cm.device_list, list) {
510 		if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
511 					path->gid_type, ndev, &p, NULL)) {
512 			port = cm_dev->port[p-1];
513 			break;
514 		}
515 	}
516 	read_unlock_irqrestore(&cm.device_lock, flags);
517 
518 	if (ndev)
519 		dev_put(ndev);
520 
521 	if (!port)
522 		return -EINVAL;
523 
524 	ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num,
525 				  be16_to_cpu(path->pkey), &av->pkey_index);
526 	if (ret)
527 		return ret;
528 
529 	av->port = port;
530 	ret = ib_init_ah_from_path(cm_dev->ib_device, port->port_num,
531 				   path, &av->ah_attr);
532 	if (ret)
533 		return ret;
534 
535 	av->timeout = path->packet_life_time + 1;
536 
537 	spin_lock_irqsave(&cm.lock, flags);
538 	if (&cm_id_priv->av == av)
539 		list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
540 	else if (&cm_id_priv->alt_av == av)
541 		list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
542 	else
543 		ret = -EINVAL;
544 
545 	spin_unlock_irqrestore(&cm.lock, flags);
546 
547 	return ret;
548 }
549 
550 static int cm_alloc_id(struct cm_id_private *cm_id_priv)
551 {
552 	unsigned long flags;
553 	int id;
554 
555 	idr_preload(GFP_KERNEL);
556 	spin_lock_irqsave(&cm.lock, flags);
557 
558 	id = idr_alloc_cyclic(&cm.local_id_table, cm_id_priv, 0, 0, GFP_NOWAIT);
559 
560 	spin_unlock_irqrestore(&cm.lock, flags);
561 	idr_preload_end();
562 
563 	cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
564 	return id < 0 ? id : 0;
565 }
566 
567 static void cm_free_id(__be32 local_id)
568 {
569 	spin_lock_irq(&cm.lock);
570 	idr_remove(&cm.local_id_table,
571 		   (__force int) (local_id ^ cm.random_id_operand));
572 	spin_unlock_irq(&cm.lock);
573 }
574 
575 static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
576 {
577 	struct cm_id_private *cm_id_priv;
578 
579 	cm_id_priv = idr_find(&cm.local_id_table,
580 			      (__force int) (local_id ^ cm.random_id_operand));
581 	if (cm_id_priv) {
582 		if (cm_id_priv->id.remote_id == remote_id)
583 			atomic_inc(&cm_id_priv->refcount);
584 		else
585 			cm_id_priv = NULL;
586 	}
587 
588 	return cm_id_priv;
589 }
590 
591 static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
592 {
593 	struct cm_id_private *cm_id_priv;
594 
595 	spin_lock_irq(&cm.lock);
596 	cm_id_priv = cm_get_id(local_id, remote_id);
597 	spin_unlock_irq(&cm.lock);
598 
599 	return cm_id_priv;
600 }
601 
602 /*
603  * Trivial helpers to strip endian annotation and compare; the
604  * endianness doesn't actually matter since we just need a stable
605  * order for the RB tree.
606  */
607 static int be32_lt(__be32 a, __be32 b)
608 {
609 	return (__force u32) a < (__force u32) b;
610 }
611 
612 static int be32_gt(__be32 a, __be32 b)
613 {
614 	return (__force u32) a > (__force u32) b;
615 }
616 
617 static int be64_lt(__be64 a, __be64 b)
618 {
619 	return (__force u64) a < (__force u64) b;
620 }
621 
622 static int be64_gt(__be64 a, __be64 b)
623 {
624 	return (__force u64) a > (__force u64) b;
625 }
626 
627 static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
628 {
629 	struct rb_node **link = &cm.listen_service_table.rb_node;
630 	struct rb_node *parent = NULL;
631 	struct cm_id_private *cur_cm_id_priv;
632 	__be64 service_id = cm_id_priv->id.service_id;
633 	__be64 service_mask = cm_id_priv->id.service_mask;
634 
635 	while (*link) {
636 		parent = *link;
637 		cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
638 					  service_node);
639 		if ((cur_cm_id_priv->id.service_mask & service_id) ==
640 		    (service_mask & cur_cm_id_priv->id.service_id) &&
641 		    (cm_id_priv->id.device == cur_cm_id_priv->id.device))
642 			return cur_cm_id_priv;
643 
644 		if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
645 			link = &(*link)->rb_left;
646 		else if (cm_id_priv->id.device > cur_cm_id_priv->id.device)
647 			link = &(*link)->rb_right;
648 		else if (be64_lt(service_id, cur_cm_id_priv->id.service_id))
649 			link = &(*link)->rb_left;
650 		else if (be64_gt(service_id, cur_cm_id_priv->id.service_id))
651 			link = &(*link)->rb_right;
652 		else
653 			link = &(*link)->rb_right;
654 	}
655 	rb_link_node(&cm_id_priv->service_node, parent, link);
656 	rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table);
657 	return NULL;
658 }
659 
660 static struct cm_id_private * cm_find_listen(struct ib_device *device,
661 					     __be64 service_id)
662 {
663 	struct rb_node *node = cm.listen_service_table.rb_node;
664 	struct cm_id_private *cm_id_priv;
665 
666 	while (node) {
667 		cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
668 		if ((cm_id_priv->id.service_mask & service_id) ==
669 		     cm_id_priv->id.service_id &&
670 		    (cm_id_priv->id.device == device))
671 			return cm_id_priv;
672 
673 		if (device < cm_id_priv->id.device)
674 			node = node->rb_left;
675 		else if (device > cm_id_priv->id.device)
676 			node = node->rb_right;
677 		else if (be64_lt(service_id, cm_id_priv->id.service_id))
678 			node = node->rb_left;
679 		else if (be64_gt(service_id, cm_id_priv->id.service_id))
680 			node = node->rb_right;
681 		else
682 			node = node->rb_right;
683 	}
684 	return NULL;
685 }
686 
687 static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
688 						     *timewait_info)
689 {
690 	struct rb_node **link = &cm.remote_id_table.rb_node;
691 	struct rb_node *parent = NULL;
692 	struct cm_timewait_info *cur_timewait_info;
693 	__be64 remote_ca_guid = timewait_info->remote_ca_guid;
694 	__be32 remote_id = timewait_info->work.remote_id;
695 
696 	while (*link) {
697 		parent = *link;
698 		cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
699 					     remote_id_node);
700 		if (be32_lt(remote_id, cur_timewait_info->work.remote_id))
701 			link = &(*link)->rb_left;
702 		else if (be32_gt(remote_id, cur_timewait_info->work.remote_id))
703 			link = &(*link)->rb_right;
704 		else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
705 			link = &(*link)->rb_left;
706 		else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
707 			link = &(*link)->rb_right;
708 		else
709 			return cur_timewait_info;
710 	}
711 	timewait_info->inserted_remote_id = 1;
712 	rb_link_node(&timewait_info->remote_id_node, parent, link);
713 	rb_insert_color(&timewait_info->remote_id_node, &cm.remote_id_table);
714 	return NULL;
715 }
716 
717 static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
718 						   __be32 remote_id)
719 {
720 	struct rb_node *node = cm.remote_id_table.rb_node;
721 	struct cm_timewait_info *timewait_info;
722 
723 	while (node) {
724 		timewait_info = rb_entry(node, struct cm_timewait_info,
725 					 remote_id_node);
726 		if (be32_lt(remote_id, timewait_info->work.remote_id))
727 			node = node->rb_left;
728 		else if (be32_gt(remote_id, timewait_info->work.remote_id))
729 			node = node->rb_right;
730 		else if (be64_lt(remote_ca_guid, timewait_info->remote_ca_guid))
731 			node = node->rb_left;
732 		else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid))
733 			node = node->rb_right;
734 		else
735 			return timewait_info;
736 	}
737 	return NULL;
738 }
739 
740 static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info
741 						      *timewait_info)
742 {
743 	struct rb_node **link = &cm.remote_qp_table.rb_node;
744 	struct rb_node *parent = NULL;
745 	struct cm_timewait_info *cur_timewait_info;
746 	__be64 remote_ca_guid = timewait_info->remote_ca_guid;
747 	__be32 remote_qpn = timewait_info->remote_qpn;
748 
749 	while (*link) {
750 		parent = *link;
751 		cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
752 					     remote_qp_node);
753 		if (be32_lt(remote_qpn, cur_timewait_info->remote_qpn))
754 			link = &(*link)->rb_left;
755 		else if (be32_gt(remote_qpn, cur_timewait_info->remote_qpn))
756 			link = &(*link)->rb_right;
757 		else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
758 			link = &(*link)->rb_left;
759 		else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
760 			link = &(*link)->rb_right;
761 		else
762 			return cur_timewait_info;
763 	}
764 	timewait_info->inserted_remote_qp = 1;
765 	rb_link_node(&timewait_info->remote_qp_node, parent, link);
766 	rb_insert_color(&timewait_info->remote_qp_node, &cm.remote_qp_table);
767 	return NULL;
768 }
769 
770 static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
771 						    *cm_id_priv)
772 {
773 	struct rb_node **link = &cm.remote_sidr_table.rb_node;
774 	struct rb_node *parent = NULL;
775 	struct cm_id_private *cur_cm_id_priv;
776 	union ib_gid *port_gid = &cm_id_priv->av.dgid;
777 	__be32 remote_id = cm_id_priv->id.remote_id;
778 
779 	while (*link) {
780 		parent = *link;
781 		cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
782 					  sidr_id_node);
783 		if (be32_lt(remote_id, cur_cm_id_priv->id.remote_id))
784 			link = &(*link)->rb_left;
785 		else if (be32_gt(remote_id, cur_cm_id_priv->id.remote_id))
786 			link = &(*link)->rb_right;
787 		else {
788 			int cmp;
789 			cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid,
790 				     sizeof *port_gid);
791 			if (cmp < 0)
792 				link = &(*link)->rb_left;
793 			else if (cmp > 0)
794 				link = &(*link)->rb_right;
795 			else
796 				return cur_cm_id_priv;
797 		}
798 	}
799 	rb_link_node(&cm_id_priv->sidr_id_node, parent, link);
800 	rb_insert_color(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
801 	return NULL;
802 }
803 
804 static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv,
805 			       enum ib_cm_sidr_status status)
806 {
807 	struct ib_cm_sidr_rep_param param;
808 
809 	memset(&param, 0, sizeof param);
810 	param.status = status;
811 	ib_send_cm_sidr_rep(&cm_id_priv->id, &param);
812 }
813 
814 struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
815 				 ib_cm_handler cm_handler,
816 				 void *context)
817 {
818 	struct cm_id_private *cm_id_priv;
819 	int ret;
820 
821 	cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL);
822 	if (!cm_id_priv)
823 		return ERR_PTR(-ENOMEM);
824 
825 	cm_id_priv->id.state = IB_CM_IDLE;
826 	cm_id_priv->id.device = device;
827 	cm_id_priv->id.cm_handler = cm_handler;
828 	cm_id_priv->id.context = context;
829 	cm_id_priv->id.remote_cm_qpn = 1;
830 	ret = cm_alloc_id(cm_id_priv);
831 	if (ret)
832 		goto error;
833 
834 	spin_lock_init(&cm_id_priv->lock);
835 	init_completion(&cm_id_priv->comp);
836 	INIT_LIST_HEAD(&cm_id_priv->work_list);
837 	INIT_LIST_HEAD(&cm_id_priv->prim_list);
838 	INIT_LIST_HEAD(&cm_id_priv->altr_list);
839 	atomic_set(&cm_id_priv->work_count, -1);
840 	atomic_set(&cm_id_priv->refcount, 1);
841 	return &cm_id_priv->id;
842 
843 error:
844 	kfree(cm_id_priv);
845 	return ERR_PTR(-ENOMEM);
846 }
847 EXPORT_SYMBOL(ib_create_cm_id);
848 
849 static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv)
850 {
851 	struct cm_work *work;
852 
853 	if (list_empty(&cm_id_priv->work_list))
854 		return NULL;
855 
856 	work = list_entry(cm_id_priv->work_list.next, struct cm_work, list);
857 	list_del(&work->list);
858 	return work;
859 }
860 
861 static void cm_free_work(struct cm_work *work)
862 {
863 	if (work->mad_recv_wc)
864 		ib_free_recv_mad(work->mad_recv_wc);
865 	kfree(work);
866 }
867 
868 static inline int cm_convert_to_ms(int iba_time)
869 {
870 	/* approximate conversion to ms from 4.096us x 2^iba_time */
871 	return 1 << max(iba_time - 8, 0);
872 }
873 
874 /*
875  * calculate: 4.096x2^ack_timeout = 4.096x2^ack_delay + 2x4.096x2^life_time
876  * Because of how ack_timeout is stored, adding one doubles the timeout.
877  * To avoid large timeouts, select the max(ack_delay, life_time + 1), and
878  * increment it (round up) only if the other is within 50%.
879  */
880 static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time)
881 {
882 	int ack_timeout = packet_life_time + 1;
883 
884 	if (ack_timeout >= ca_ack_delay)
885 		ack_timeout += (ca_ack_delay >= (ack_timeout - 1));
886 	else
887 		ack_timeout = ca_ack_delay +
888 			      (ack_timeout >= (ca_ack_delay - 1));
889 
890 	return min(31, ack_timeout);
891 }
892 
893 static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
894 {
895 	if (timewait_info->inserted_remote_id) {
896 		rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table);
897 		timewait_info->inserted_remote_id = 0;
898 	}
899 
900 	if (timewait_info->inserted_remote_qp) {
901 		rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table);
902 		timewait_info->inserted_remote_qp = 0;
903 	}
904 }
905 
906 static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id)
907 {
908 	struct cm_timewait_info *timewait_info;
909 
910 	timewait_info = kzalloc(sizeof *timewait_info, GFP_KERNEL);
911 	if (!timewait_info)
912 		return ERR_PTR(-ENOMEM);
913 
914 	timewait_info->work.local_id = local_id;
915 	INIT_DELAYED_WORK(&timewait_info->work.work, cm_work_handler);
916 	timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT;
917 	return timewait_info;
918 }
919 
920 static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
921 {
922 	int wait_time;
923 	unsigned long flags;
924 	struct cm_device *cm_dev;
925 
926 	cm_dev = ib_get_client_data(cm_id_priv->id.device, &cm_client);
927 	if (!cm_dev)
928 		return;
929 
930 	spin_lock_irqsave(&cm.lock, flags);
931 	cm_cleanup_timewait(cm_id_priv->timewait_info);
932 	list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list);
933 	spin_unlock_irqrestore(&cm.lock, flags);
934 
935 	/*
936 	 * The cm_id could be destroyed by the user before we exit timewait.
937 	 * To protect against this, we search for the cm_id after exiting
938 	 * timewait before notifying the user that we've exited timewait.
939 	 */
940 	cm_id_priv->id.state = IB_CM_TIMEWAIT;
941 	wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
942 
943 	/* Check if the device started its remove_one */
944 	spin_lock_irqsave(&cm.lock, flags);
945 	if (!cm_dev->going_down)
946 		queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
947 				   msecs_to_jiffies(wait_time));
948 	spin_unlock_irqrestore(&cm.lock, flags);
949 
950 	cm_id_priv->timewait_info = NULL;
951 }
952 
953 static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
954 {
955 	unsigned long flags;
956 
957 	cm_id_priv->id.state = IB_CM_IDLE;
958 	if (cm_id_priv->timewait_info) {
959 		spin_lock_irqsave(&cm.lock, flags);
960 		cm_cleanup_timewait(cm_id_priv->timewait_info);
961 		spin_unlock_irqrestore(&cm.lock, flags);
962 		kfree(cm_id_priv->timewait_info);
963 		cm_id_priv->timewait_info = NULL;
964 	}
965 }
966 
967 static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
968 {
969 	struct cm_id_private *cm_id_priv;
970 	struct cm_work *work;
971 
972 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
973 retest:
974 	spin_lock_irq(&cm_id_priv->lock);
975 	switch (cm_id->state) {
976 	case IB_CM_LISTEN:
977 		spin_unlock_irq(&cm_id_priv->lock);
978 
979 		spin_lock_irq(&cm.lock);
980 		if (--cm_id_priv->listen_sharecount > 0) {
981 			/* The id is still shared. */
982 			cm_deref_id(cm_id_priv);
983 			spin_unlock_irq(&cm.lock);
984 			return;
985 		}
986 		rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
987 		spin_unlock_irq(&cm.lock);
988 		break;
989 	case IB_CM_SIDR_REQ_SENT:
990 		cm_id->state = IB_CM_IDLE;
991 		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
992 		spin_unlock_irq(&cm_id_priv->lock);
993 		break;
994 	case IB_CM_SIDR_REQ_RCVD:
995 		spin_unlock_irq(&cm_id_priv->lock);
996 		cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
997 		spin_lock_irq(&cm.lock);
998 		if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node))
999 			rb_erase(&cm_id_priv->sidr_id_node,
1000 				 &cm.remote_sidr_table);
1001 		spin_unlock_irq(&cm.lock);
1002 		break;
1003 	case IB_CM_REQ_SENT:
1004 	case IB_CM_MRA_REQ_RCVD:
1005 		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1006 		spin_unlock_irq(&cm_id_priv->lock);
1007 		ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
1008 			       &cm_id_priv->id.device->node_guid,
1009 			       sizeof cm_id_priv->id.device->node_guid,
1010 			       NULL, 0);
1011 		break;
1012 	case IB_CM_REQ_RCVD:
1013 		if (err == -ENOMEM) {
1014 			/* Do not reject to allow future retries. */
1015 			cm_reset_to_idle(cm_id_priv);
1016 			spin_unlock_irq(&cm_id_priv->lock);
1017 		} else {
1018 			spin_unlock_irq(&cm_id_priv->lock);
1019 			ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
1020 				       NULL, 0, NULL, 0);
1021 		}
1022 		break;
1023 	case IB_CM_REP_SENT:
1024 	case IB_CM_MRA_REP_RCVD:
1025 		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1026 		/* Fall through */
1027 	case IB_CM_MRA_REQ_SENT:
1028 	case IB_CM_REP_RCVD:
1029 	case IB_CM_MRA_REP_SENT:
1030 		spin_unlock_irq(&cm_id_priv->lock);
1031 		ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
1032 			       NULL, 0, NULL, 0);
1033 		break;
1034 	case IB_CM_ESTABLISHED:
1035 		spin_unlock_irq(&cm_id_priv->lock);
1036 		if (cm_id_priv->qp_type == IB_QPT_XRC_TGT)
1037 			break;
1038 		ib_send_cm_dreq(cm_id, NULL, 0);
1039 		goto retest;
1040 	case IB_CM_DREQ_SENT:
1041 		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1042 		cm_enter_timewait(cm_id_priv);
1043 		spin_unlock_irq(&cm_id_priv->lock);
1044 		break;
1045 	case IB_CM_DREQ_RCVD:
1046 		spin_unlock_irq(&cm_id_priv->lock);
1047 		ib_send_cm_drep(cm_id, NULL, 0);
1048 		break;
1049 	default:
1050 		spin_unlock_irq(&cm_id_priv->lock);
1051 		break;
1052 	}
1053 
1054 	spin_lock_irq(&cm.lock);
1055 	if (!list_empty(&cm_id_priv->altr_list) &&
1056 	    (!cm_id_priv->altr_send_port_not_ready))
1057 		list_del(&cm_id_priv->altr_list);
1058 	if (!list_empty(&cm_id_priv->prim_list) &&
1059 	    (!cm_id_priv->prim_send_port_not_ready))
1060 		list_del(&cm_id_priv->prim_list);
1061 	spin_unlock_irq(&cm.lock);
1062 
1063 	cm_free_id(cm_id->local_id);
1064 	cm_deref_id(cm_id_priv);
1065 	wait_for_completion(&cm_id_priv->comp);
1066 	while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
1067 		cm_free_work(work);
1068 	kfree(cm_id_priv->private_data);
1069 	kfree(cm_id_priv);
1070 }
1071 
1072 void ib_destroy_cm_id(struct ib_cm_id *cm_id)
1073 {
1074 	cm_destroy_id(cm_id, 0);
1075 }
1076 EXPORT_SYMBOL(ib_destroy_cm_id);
1077 
1078 /**
1079  * __ib_cm_listen - Initiates listening on the specified service ID for
1080  *   connection and service ID resolution requests.
1081  * @cm_id: Connection identifier associated with the listen request.
1082  * @service_id: Service identifier matched against incoming connection
1083  *   and service ID resolution requests.  The service ID should be specified
1084  *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
1085  *   assign a service ID to the caller.
1086  * @service_mask: Mask applied to service ID used to listen across a
1087  *   range of service IDs.  If set to 0, the service ID is matched
1088  *   exactly.  This parameter is ignored if %service_id is set to
1089  *   IB_CM_ASSIGN_SERVICE_ID.
1090  */
1091 static int __ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id,
1092 			  __be64 service_mask)
1093 {
1094 	struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
1095 	int ret = 0;
1096 
1097 	service_mask = service_mask ? service_mask : ~cpu_to_be64(0);
1098 	service_id &= service_mask;
1099 	if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
1100 	    (service_id != IB_CM_ASSIGN_SERVICE_ID))
1101 		return -EINVAL;
1102 
1103 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1104 	if (cm_id->state != IB_CM_IDLE)
1105 		return -EINVAL;
1106 
1107 	cm_id->state = IB_CM_LISTEN;
1108 	++cm_id_priv->listen_sharecount;
1109 
1110 	if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
1111 		cm_id->service_id = cpu_to_be64(cm.listen_service_id++);
1112 		cm_id->service_mask = ~cpu_to_be64(0);
1113 	} else {
1114 		cm_id->service_id = service_id;
1115 		cm_id->service_mask = service_mask;
1116 	}
1117 	cur_cm_id_priv = cm_insert_listen(cm_id_priv);
1118 
1119 	if (cur_cm_id_priv) {
1120 		cm_id->state = IB_CM_IDLE;
1121 		--cm_id_priv->listen_sharecount;
1122 		ret = -EBUSY;
1123 	}
1124 	return ret;
1125 }
1126 
1127 int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask)
1128 {
1129 	unsigned long flags;
1130 	int ret;
1131 
1132 	spin_lock_irqsave(&cm.lock, flags);
1133 	ret = __ib_cm_listen(cm_id, service_id, service_mask);
1134 	spin_unlock_irqrestore(&cm.lock, flags);
1135 
1136 	return ret;
1137 }
1138 EXPORT_SYMBOL(ib_cm_listen);
1139 
1140 /**
1141  * Create a new listening ib_cm_id and listen on the given service ID.
1142  *
1143  * If there's an existing ID listening on that same device and service ID,
1144  * return it.
1145  *
1146  * @device: Device associated with the cm_id.  All related communication will
1147  * be associated with the specified device.
1148  * @cm_handler: Callback invoked to notify the user of CM events.
1149  * @service_id: Service identifier matched against incoming connection
1150  *   and service ID resolution requests.  The service ID should be specified
1151  *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
1152  *   assign a service ID to the caller.
1153  *
1154  * Callers should call ib_destroy_cm_id when done with the listener ID.
1155  */
1156 struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
1157 				     ib_cm_handler cm_handler,
1158 				     __be64 service_id)
1159 {
1160 	struct cm_id_private *cm_id_priv;
1161 	struct ib_cm_id *cm_id;
1162 	unsigned long flags;
1163 	int err = 0;
1164 
1165 	/* Create an ID in advance, since the creation may sleep */
1166 	cm_id = ib_create_cm_id(device, cm_handler, NULL);
1167 	if (IS_ERR(cm_id))
1168 		return cm_id;
1169 
1170 	spin_lock_irqsave(&cm.lock, flags);
1171 
1172 	if (service_id == IB_CM_ASSIGN_SERVICE_ID)
1173 		goto new_id;
1174 
1175 	/* Find an existing ID */
1176 	cm_id_priv = cm_find_listen(device, service_id);
1177 	if (cm_id_priv) {
1178 		if (cm_id->cm_handler != cm_handler || cm_id->context) {
1179 			/* Sharing an ib_cm_id with different handlers is not
1180 			 * supported */
1181 			spin_unlock_irqrestore(&cm.lock, flags);
1182 			ib_destroy_cm_id(cm_id);
1183 			return ERR_PTR(-EINVAL);
1184 		}
1185 		atomic_inc(&cm_id_priv->refcount);
1186 		++cm_id_priv->listen_sharecount;
1187 		spin_unlock_irqrestore(&cm.lock, flags);
1188 
1189 		ib_destroy_cm_id(cm_id);
1190 		cm_id = &cm_id_priv->id;
1191 		return cm_id;
1192 	}
1193 
1194 new_id:
1195 	/* Use newly created ID */
1196 	err = __ib_cm_listen(cm_id, service_id, 0);
1197 
1198 	spin_unlock_irqrestore(&cm.lock, flags);
1199 
1200 	if (err) {
1201 		ib_destroy_cm_id(cm_id);
1202 		return ERR_PTR(err);
1203 	}
1204 	return cm_id;
1205 }
1206 EXPORT_SYMBOL(ib_cm_insert_listen);
1207 
1208 static __be64 cm_form_tid(struct cm_id_private *cm_id_priv)
1209 {
1210 	u64 hi_tid, low_tid;
1211 
1212 	hi_tid   = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
1213 	low_tid  = (u64)cm_id_priv->id.local_id;
1214 	return cpu_to_be64(hi_tid | low_tid);
1215 }
1216 
1217 static void cm_format_mad_hdr(struct ib_mad_hdr *hdr,
1218 			      __be16 attr_id, __be64 tid)
1219 {
1220 	hdr->base_version  = IB_MGMT_BASE_VERSION;
1221 	hdr->mgmt_class	   = IB_MGMT_CLASS_CM;
1222 	hdr->class_version = IB_CM_CLASS_VERSION;
1223 	hdr->method	   = IB_MGMT_METHOD_SEND;
1224 	hdr->attr_id	   = attr_id;
1225 	hdr->tid	   = tid;
1226 }
1227 
1228 static void cm_format_req(struct cm_req_msg *req_msg,
1229 			  struct cm_id_private *cm_id_priv,
1230 			  struct ib_cm_req_param *param)
1231 {
1232 	struct ib_sa_path_rec *pri_path = param->primary_path;
1233 	struct ib_sa_path_rec *alt_path = param->alternate_path;
1234 
1235 	cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
1236 			  cm_form_tid(cm_id_priv));
1237 
1238 	req_msg->local_comm_id = cm_id_priv->id.local_id;
1239 	req_msg->service_id = param->service_id;
1240 	req_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
1241 	cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
1242 	cm_req_set_init_depth(req_msg, param->initiator_depth);
1243 	cm_req_set_remote_resp_timeout(req_msg,
1244 				       param->remote_cm_response_timeout);
1245 	cm_req_set_qp_type(req_msg, param->qp_type);
1246 	cm_req_set_flow_ctrl(req_msg, param->flow_control);
1247 	cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
1248 	cm_req_set_local_resp_timeout(req_msg,
1249 				      param->local_cm_response_timeout);
1250 	req_msg->pkey = param->primary_path->pkey;
1251 	cm_req_set_path_mtu(req_msg, param->primary_path->mtu);
1252 	cm_req_set_max_cm_retries(req_msg, param->max_cm_retries);
1253 
1254 	if (param->qp_type != IB_QPT_XRC_INI) {
1255 		cm_req_set_resp_res(req_msg, param->responder_resources);
1256 		cm_req_set_retry_count(req_msg, param->retry_count);
1257 		cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
1258 		cm_req_set_srq(req_msg, param->srq);
1259 	}
1260 
1261 	if (pri_path->hop_limit <= 1) {
1262 		req_msg->primary_local_lid = pri_path->slid;
1263 		req_msg->primary_remote_lid = pri_path->dlid;
1264 	} else {
1265 		/* Work-around until there's a way to obtain remote LID info */
1266 		req_msg->primary_local_lid = IB_LID_PERMISSIVE;
1267 		req_msg->primary_remote_lid = IB_LID_PERMISSIVE;
1268 	}
1269 	req_msg->primary_local_gid = pri_path->sgid;
1270 	req_msg->primary_remote_gid = pri_path->dgid;
1271 	cm_req_set_primary_flow_label(req_msg, pri_path->flow_label);
1272 	cm_req_set_primary_packet_rate(req_msg, pri_path->rate);
1273 	req_msg->primary_traffic_class = pri_path->traffic_class;
1274 	req_msg->primary_hop_limit = pri_path->hop_limit;
1275 	cm_req_set_primary_sl(req_msg, pri_path->sl);
1276 	cm_req_set_primary_subnet_local(req_msg, (pri_path->hop_limit <= 1));
1277 	cm_req_set_primary_local_ack_timeout(req_msg,
1278 		cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
1279 			       pri_path->packet_life_time));
1280 
1281 	if (alt_path) {
1282 		if (alt_path->hop_limit <= 1) {
1283 			req_msg->alt_local_lid = alt_path->slid;
1284 			req_msg->alt_remote_lid = alt_path->dlid;
1285 		} else {
1286 			req_msg->alt_local_lid = IB_LID_PERMISSIVE;
1287 			req_msg->alt_remote_lid = IB_LID_PERMISSIVE;
1288 		}
1289 		req_msg->alt_local_gid = alt_path->sgid;
1290 		req_msg->alt_remote_gid = alt_path->dgid;
1291 		cm_req_set_alt_flow_label(req_msg,
1292 					  alt_path->flow_label);
1293 		cm_req_set_alt_packet_rate(req_msg, alt_path->rate);
1294 		req_msg->alt_traffic_class = alt_path->traffic_class;
1295 		req_msg->alt_hop_limit = alt_path->hop_limit;
1296 		cm_req_set_alt_sl(req_msg, alt_path->sl);
1297 		cm_req_set_alt_subnet_local(req_msg, (alt_path->hop_limit <= 1));
1298 		cm_req_set_alt_local_ack_timeout(req_msg,
1299 			cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
1300 				       alt_path->packet_life_time));
1301 	}
1302 
1303 	if (param->private_data && param->private_data_len)
1304 		memcpy(req_msg->private_data, param->private_data,
1305 		       param->private_data_len);
1306 }
1307 
1308 static int cm_validate_req_param(struct ib_cm_req_param *param)
1309 {
1310 	/* peer-to-peer not supported */
1311 	if (param->peer_to_peer)
1312 		return -EINVAL;
1313 
1314 	if (!param->primary_path)
1315 		return -EINVAL;
1316 
1317 	if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC &&
1318 	    param->qp_type != IB_QPT_XRC_INI)
1319 		return -EINVAL;
1320 
1321 	if (param->private_data &&
1322 	    param->private_data_len > IB_CM_REQ_PRIVATE_DATA_SIZE)
1323 		return -EINVAL;
1324 
1325 	if (param->alternate_path &&
1326 	    (param->alternate_path->pkey != param->primary_path->pkey ||
1327 	     param->alternate_path->mtu != param->primary_path->mtu))
1328 		return -EINVAL;
1329 
1330 	return 0;
1331 }
1332 
1333 int ib_send_cm_req(struct ib_cm_id *cm_id,
1334 		   struct ib_cm_req_param *param)
1335 {
1336 	struct cm_id_private *cm_id_priv;
1337 	struct cm_req_msg *req_msg;
1338 	unsigned long flags;
1339 	int ret;
1340 
1341 	ret = cm_validate_req_param(param);
1342 	if (ret)
1343 		return ret;
1344 
1345 	/* Verify that we're not in timewait. */
1346 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1347 	spin_lock_irqsave(&cm_id_priv->lock, flags);
1348 	if (cm_id->state != IB_CM_IDLE) {
1349 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1350 		ret = -EINVAL;
1351 		goto out;
1352 	}
1353 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1354 
1355 	cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1356 							    id.local_id);
1357 	if (IS_ERR(cm_id_priv->timewait_info)) {
1358 		ret = PTR_ERR(cm_id_priv->timewait_info);
1359 		goto out;
1360 	}
1361 
1362 	ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av,
1363 				 cm_id_priv);
1364 	if (ret)
1365 		goto error1;
1366 	if (param->alternate_path) {
1367 		ret = cm_init_av_by_path(param->alternate_path,
1368 					 &cm_id_priv->alt_av, cm_id_priv);
1369 		if (ret)
1370 			goto error1;
1371 	}
1372 	cm_id->service_id = param->service_id;
1373 	cm_id->service_mask = ~cpu_to_be64(0);
1374 	cm_id_priv->timeout_ms = cm_convert_to_ms(
1375 				    param->primary_path->packet_life_time) * 2 +
1376 				 cm_convert_to_ms(
1377 				    param->remote_cm_response_timeout);
1378 	cm_id_priv->max_cm_retries = param->max_cm_retries;
1379 	cm_id_priv->initiator_depth = param->initiator_depth;
1380 	cm_id_priv->responder_resources = param->responder_resources;
1381 	cm_id_priv->retry_count = param->retry_count;
1382 	cm_id_priv->path_mtu = param->primary_path->mtu;
1383 	cm_id_priv->pkey = param->primary_path->pkey;
1384 	cm_id_priv->qp_type = param->qp_type;
1385 
1386 	ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg);
1387 	if (ret)
1388 		goto error1;
1389 
1390 	req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad;
1391 	cm_format_req(req_msg, cm_id_priv, param);
1392 	cm_id_priv->tid = req_msg->hdr.tid;
1393 	cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms;
1394 	cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT;
1395 
1396 	cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg);
1397 	cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg);
1398 
1399 	spin_lock_irqsave(&cm_id_priv->lock, flags);
1400 	ret = ib_post_send_mad(cm_id_priv->msg, NULL);
1401 	if (ret) {
1402 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1403 		goto error2;
1404 	}
1405 	BUG_ON(cm_id->state != IB_CM_IDLE);
1406 	cm_id->state = IB_CM_REQ_SENT;
1407 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1408 	return 0;
1409 
1410 error2:	cm_free_msg(cm_id_priv->msg);
1411 error1:	kfree(cm_id_priv->timewait_info);
1412 out:	return ret;
1413 }
1414 EXPORT_SYMBOL(ib_send_cm_req);
1415 
1416 static int cm_issue_rej(struct cm_port *port,
1417 			struct ib_mad_recv_wc *mad_recv_wc,
1418 			enum ib_cm_rej_reason reason,
1419 			enum cm_msg_response msg_rejected,
1420 			void *ari, u8 ari_length)
1421 {
1422 	struct ib_mad_send_buf *msg = NULL;
1423 	struct cm_rej_msg *rej_msg, *rcv_msg;
1424 	int ret;
1425 
1426 	ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
1427 	if (ret)
1428 		return ret;
1429 
1430 	/* We just need common CM header information.  Cast to any message. */
1431 	rcv_msg = (struct cm_rej_msg *) mad_recv_wc->recv_buf.mad;
1432 	rej_msg = (struct cm_rej_msg *) msg->mad;
1433 
1434 	cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid);
1435 	rej_msg->remote_comm_id = rcv_msg->local_comm_id;
1436 	rej_msg->local_comm_id = rcv_msg->remote_comm_id;
1437 	cm_rej_set_msg_rejected(rej_msg, msg_rejected);
1438 	rej_msg->reason = cpu_to_be16(reason);
1439 
1440 	if (ari && ari_length) {
1441 		cm_rej_set_reject_info_len(rej_msg, ari_length);
1442 		memcpy(rej_msg->ari, ari, ari_length);
1443 	}
1444 
1445 	ret = ib_post_send_mad(msg, NULL);
1446 	if (ret)
1447 		cm_free_msg(msg);
1448 
1449 	return ret;
1450 }
1451 
1452 static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
1453 					    struct ib_sa_path_rec *primary_path,
1454 					    struct ib_sa_path_rec *alt_path)
1455 {
1456 	memset(primary_path, 0, sizeof *primary_path);
1457 	primary_path->dgid = req_msg->primary_local_gid;
1458 	primary_path->sgid = req_msg->primary_remote_gid;
1459 	primary_path->dlid = req_msg->primary_local_lid;
1460 	primary_path->slid = req_msg->primary_remote_lid;
1461 	primary_path->flow_label = cm_req_get_primary_flow_label(req_msg);
1462 	primary_path->hop_limit = req_msg->primary_hop_limit;
1463 	primary_path->traffic_class = req_msg->primary_traffic_class;
1464 	primary_path->reversible = 1;
1465 	primary_path->pkey = req_msg->pkey;
1466 	primary_path->sl = cm_req_get_primary_sl(req_msg);
1467 	primary_path->mtu_selector = IB_SA_EQ;
1468 	primary_path->mtu = cm_req_get_path_mtu(req_msg);
1469 	primary_path->rate_selector = IB_SA_EQ;
1470 	primary_path->rate = cm_req_get_primary_packet_rate(req_msg);
1471 	primary_path->packet_life_time_selector = IB_SA_EQ;
1472 	primary_path->packet_life_time =
1473 		cm_req_get_primary_local_ack_timeout(req_msg);
1474 	primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
1475 	primary_path->service_id = req_msg->service_id;
1476 
1477 	if (req_msg->alt_local_lid) {
1478 		memset(alt_path, 0, sizeof *alt_path);
1479 		alt_path->dgid = req_msg->alt_local_gid;
1480 		alt_path->sgid = req_msg->alt_remote_gid;
1481 		alt_path->dlid = req_msg->alt_local_lid;
1482 		alt_path->slid = req_msg->alt_remote_lid;
1483 		alt_path->flow_label = cm_req_get_alt_flow_label(req_msg);
1484 		alt_path->hop_limit = req_msg->alt_hop_limit;
1485 		alt_path->traffic_class = req_msg->alt_traffic_class;
1486 		alt_path->reversible = 1;
1487 		alt_path->pkey = req_msg->pkey;
1488 		alt_path->sl = cm_req_get_alt_sl(req_msg);
1489 		alt_path->mtu_selector = IB_SA_EQ;
1490 		alt_path->mtu = cm_req_get_path_mtu(req_msg);
1491 		alt_path->rate_selector = IB_SA_EQ;
1492 		alt_path->rate = cm_req_get_alt_packet_rate(req_msg);
1493 		alt_path->packet_life_time_selector = IB_SA_EQ;
1494 		alt_path->packet_life_time =
1495 			cm_req_get_alt_local_ack_timeout(req_msg);
1496 		alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
1497 		alt_path->service_id = req_msg->service_id;
1498 	}
1499 }
1500 
1501 static u16 cm_get_bth_pkey(struct cm_work *work)
1502 {
1503 	struct ib_device *ib_dev = work->port->cm_dev->ib_device;
1504 	u8 port_num = work->port->port_num;
1505 	u16 pkey_index = work->mad_recv_wc->wc->pkey_index;
1506 	u16 pkey;
1507 	int ret;
1508 
1509 	ret = ib_get_cached_pkey(ib_dev, port_num, pkey_index, &pkey);
1510 	if (ret) {
1511 		dev_warn_ratelimited(&ib_dev->dev, "ib_cm: Couldn't retrieve pkey for incoming request (port %d, pkey index %d). %d\n",
1512 				     port_num, pkey_index, ret);
1513 		return 0;
1514 	}
1515 
1516 	return pkey;
1517 }
1518 
1519 static void cm_format_req_event(struct cm_work *work,
1520 				struct cm_id_private *cm_id_priv,
1521 				struct ib_cm_id *listen_id)
1522 {
1523 	struct cm_req_msg *req_msg;
1524 	struct ib_cm_req_event_param *param;
1525 
1526 	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1527 	param = &work->cm_event.param.req_rcvd;
1528 	param->listen_id = listen_id;
1529 	param->bth_pkey = cm_get_bth_pkey(work);
1530 	param->port = cm_id_priv->av.port->port_num;
1531 	param->primary_path = &work->path[0];
1532 	if (req_msg->alt_local_lid)
1533 		param->alternate_path = &work->path[1];
1534 	else
1535 		param->alternate_path = NULL;
1536 	param->remote_ca_guid = req_msg->local_ca_guid;
1537 	param->remote_qkey = be32_to_cpu(req_msg->local_qkey);
1538 	param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg));
1539 	param->qp_type = cm_req_get_qp_type(req_msg);
1540 	param->starting_psn = be32_to_cpu(cm_req_get_starting_psn(req_msg));
1541 	param->responder_resources = cm_req_get_init_depth(req_msg);
1542 	param->initiator_depth = cm_req_get_resp_res(req_msg);
1543 	param->local_cm_response_timeout =
1544 					cm_req_get_remote_resp_timeout(req_msg);
1545 	param->flow_control = cm_req_get_flow_ctrl(req_msg);
1546 	param->remote_cm_response_timeout =
1547 					cm_req_get_local_resp_timeout(req_msg);
1548 	param->retry_count = cm_req_get_retry_count(req_msg);
1549 	param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
1550 	param->srq = cm_req_get_srq(req_msg);
1551 	param->ppath_sgid_index = cm_id_priv->av.ah_attr.grh.sgid_index;
1552 	work->cm_event.private_data = &req_msg->private_data;
1553 }
1554 
1555 static void cm_process_work(struct cm_id_private *cm_id_priv,
1556 			    struct cm_work *work)
1557 {
1558 	int ret;
1559 
1560 	/* We will typically only have the current event to report. */
1561 	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event);
1562 	cm_free_work(work);
1563 
1564 	while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) {
1565 		spin_lock_irq(&cm_id_priv->lock);
1566 		work = cm_dequeue_work(cm_id_priv);
1567 		spin_unlock_irq(&cm_id_priv->lock);
1568 		BUG_ON(!work);
1569 		ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
1570 						&work->cm_event);
1571 		cm_free_work(work);
1572 	}
1573 	cm_deref_id(cm_id_priv);
1574 	if (ret)
1575 		cm_destroy_id(&cm_id_priv->id, ret);
1576 }
1577 
1578 static void cm_format_mra(struct cm_mra_msg *mra_msg,
1579 			  struct cm_id_private *cm_id_priv,
1580 			  enum cm_msg_response msg_mraed, u8 service_timeout,
1581 			  const void *private_data, u8 private_data_len)
1582 {
1583 	cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid);
1584 	cm_mra_set_msg_mraed(mra_msg, msg_mraed);
1585 	mra_msg->local_comm_id = cm_id_priv->id.local_id;
1586 	mra_msg->remote_comm_id = cm_id_priv->id.remote_id;
1587 	cm_mra_set_service_timeout(mra_msg, service_timeout);
1588 
1589 	if (private_data && private_data_len)
1590 		memcpy(mra_msg->private_data, private_data, private_data_len);
1591 }
1592 
1593 static void cm_format_rej(struct cm_rej_msg *rej_msg,
1594 			  struct cm_id_private *cm_id_priv,
1595 			  enum ib_cm_rej_reason reason,
1596 			  void *ari,
1597 			  u8 ari_length,
1598 			  const void *private_data,
1599 			  u8 private_data_len)
1600 {
1601 	cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid);
1602 	rej_msg->remote_comm_id = cm_id_priv->id.remote_id;
1603 
1604 	switch(cm_id_priv->id.state) {
1605 	case IB_CM_REQ_RCVD:
1606 		rej_msg->local_comm_id = 0;
1607 		cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
1608 		break;
1609 	case IB_CM_MRA_REQ_SENT:
1610 		rej_msg->local_comm_id = cm_id_priv->id.local_id;
1611 		cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
1612 		break;
1613 	case IB_CM_REP_RCVD:
1614 	case IB_CM_MRA_REP_SENT:
1615 		rej_msg->local_comm_id = cm_id_priv->id.local_id;
1616 		cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REP);
1617 		break;
1618 	default:
1619 		rej_msg->local_comm_id = cm_id_priv->id.local_id;
1620 		cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_OTHER);
1621 		break;
1622 	}
1623 
1624 	rej_msg->reason = cpu_to_be16(reason);
1625 	if (ari && ari_length) {
1626 		cm_rej_set_reject_info_len(rej_msg, ari_length);
1627 		memcpy(rej_msg->ari, ari, ari_length);
1628 	}
1629 
1630 	if (private_data && private_data_len)
1631 		memcpy(rej_msg->private_data, private_data, private_data_len);
1632 }
1633 
1634 static void cm_dup_req_handler(struct cm_work *work,
1635 			       struct cm_id_private *cm_id_priv)
1636 {
1637 	struct ib_mad_send_buf *msg = NULL;
1638 	int ret;
1639 
1640 	atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1641 			counter[CM_REQ_COUNTER]);
1642 
1643 	/* Quick state check to discard duplicate REQs. */
1644 	if (cm_id_priv->id.state == IB_CM_REQ_RCVD)
1645 		return;
1646 
1647 	ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
1648 	if (ret)
1649 		return;
1650 
1651 	spin_lock_irq(&cm_id_priv->lock);
1652 	switch (cm_id_priv->id.state) {
1653 	case IB_CM_MRA_REQ_SENT:
1654 		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
1655 			      CM_MSG_RESPONSE_REQ, cm_id_priv->service_timeout,
1656 			      cm_id_priv->private_data,
1657 			      cm_id_priv->private_data_len);
1658 		break;
1659 	case IB_CM_TIMEWAIT:
1660 		cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv,
1661 			      IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0);
1662 		break;
1663 	default:
1664 		goto unlock;
1665 	}
1666 	spin_unlock_irq(&cm_id_priv->lock);
1667 
1668 	ret = ib_post_send_mad(msg, NULL);
1669 	if (ret)
1670 		goto free;
1671 	return;
1672 
1673 unlock:	spin_unlock_irq(&cm_id_priv->lock);
1674 free:	cm_free_msg(msg);
1675 }
1676 
1677 static struct cm_id_private * cm_match_req(struct cm_work *work,
1678 					   struct cm_id_private *cm_id_priv)
1679 {
1680 	struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
1681 	struct cm_timewait_info *timewait_info;
1682 	struct cm_req_msg *req_msg;
1683 	struct ib_cm_id *cm_id;
1684 
1685 	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1686 
1687 	/* Check for possible duplicate REQ. */
1688 	spin_lock_irq(&cm.lock);
1689 	timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
1690 	if (timewait_info) {
1691 		cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
1692 					   timewait_info->work.remote_id);
1693 		spin_unlock_irq(&cm.lock);
1694 		if (cur_cm_id_priv) {
1695 			cm_dup_req_handler(work, cur_cm_id_priv);
1696 			cm_deref_id(cur_cm_id_priv);
1697 		}
1698 		return NULL;
1699 	}
1700 
1701 	/* Check for stale connections. */
1702 	timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
1703 	if (timewait_info) {
1704 		cm_cleanup_timewait(cm_id_priv->timewait_info);
1705 		cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
1706 					   timewait_info->work.remote_id);
1707 
1708 		spin_unlock_irq(&cm.lock);
1709 		cm_issue_rej(work->port, work->mad_recv_wc,
1710 			     IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
1711 			     NULL, 0);
1712 		if (cur_cm_id_priv) {
1713 			cm_id = &cur_cm_id_priv->id;
1714 			ib_send_cm_dreq(cm_id, NULL, 0);
1715 			cm_deref_id(cur_cm_id_priv);
1716 		}
1717 		return NULL;
1718 	}
1719 
1720 	/* Find matching listen request. */
1721 	listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
1722 					   req_msg->service_id);
1723 	if (!listen_cm_id_priv) {
1724 		cm_cleanup_timewait(cm_id_priv->timewait_info);
1725 		spin_unlock_irq(&cm.lock);
1726 		cm_issue_rej(work->port, work->mad_recv_wc,
1727 			     IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
1728 			     NULL, 0);
1729 		goto out;
1730 	}
1731 	atomic_inc(&listen_cm_id_priv->refcount);
1732 	atomic_inc(&cm_id_priv->refcount);
1733 	cm_id_priv->id.state = IB_CM_REQ_RCVD;
1734 	atomic_inc(&cm_id_priv->work_count);
1735 	spin_unlock_irq(&cm.lock);
1736 out:
1737 	return listen_cm_id_priv;
1738 }
1739 
1740 /*
1741  * Work-around for inter-subnet connections.  If the LIDs are permissive,
1742  * we need to override the LID/SL data in the REQ with the LID information
1743  * in the work completion.
1744  */
1745 static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc)
1746 {
1747 	if (!cm_req_get_primary_subnet_local(req_msg)) {
1748 		if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) {
1749 			req_msg->primary_local_lid = cpu_to_be16(wc->slid);
1750 			cm_req_set_primary_sl(req_msg, wc->sl);
1751 		}
1752 
1753 		if (req_msg->primary_remote_lid == IB_LID_PERMISSIVE)
1754 			req_msg->primary_remote_lid = cpu_to_be16(wc->dlid_path_bits);
1755 	}
1756 
1757 	if (!cm_req_get_alt_subnet_local(req_msg)) {
1758 		if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) {
1759 			req_msg->alt_local_lid = cpu_to_be16(wc->slid);
1760 			cm_req_set_alt_sl(req_msg, wc->sl);
1761 		}
1762 
1763 		if (req_msg->alt_remote_lid == IB_LID_PERMISSIVE)
1764 			req_msg->alt_remote_lid = cpu_to_be16(wc->dlid_path_bits);
1765 	}
1766 }
1767 
1768 static int cm_req_handler(struct cm_work *work)
1769 {
1770 	struct ib_cm_id *cm_id;
1771 	struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
1772 	struct cm_req_msg *req_msg;
1773 	union ib_gid gid;
1774 	struct ib_gid_attr gid_attr;
1775 	int ret;
1776 
1777 	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1778 
1779 	cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
1780 	if (IS_ERR(cm_id))
1781 		return PTR_ERR(cm_id);
1782 
1783 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1784 	cm_id_priv->id.remote_id = req_msg->local_comm_id;
1785 	ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
1786 				      work->mad_recv_wc->recv_buf.grh,
1787 				      &cm_id_priv->av);
1788 	if (ret)
1789 		goto destroy;
1790 	cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1791 							    id.local_id);
1792 	if (IS_ERR(cm_id_priv->timewait_info)) {
1793 		ret = PTR_ERR(cm_id_priv->timewait_info);
1794 		goto destroy;
1795 	}
1796 	cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id;
1797 	cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid;
1798 	cm_id_priv->timewait_info->remote_qpn = cm_req_get_local_qpn(req_msg);
1799 
1800 	listen_cm_id_priv = cm_match_req(work, cm_id_priv);
1801 	if (!listen_cm_id_priv) {
1802 		ret = -EINVAL;
1803 		goto free_timeinfo;
1804 	}
1805 
1806 	cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
1807 	cm_id_priv->id.context = listen_cm_id_priv->id.context;
1808 	cm_id_priv->id.service_id = req_msg->service_id;
1809 	cm_id_priv->id.service_mask = ~cpu_to_be64(0);
1810 
1811 	cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
1812 	cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
1813 
1814 	memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
1815 	work->path[0].hop_limit = cm_id_priv->av.ah_attr.grh.hop_limit;
1816 	ret = ib_get_cached_gid(work->port->cm_dev->ib_device,
1817 				work->port->port_num,
1818 				cm_id_priv->av.ah_attr.grh.sgid_index,
1819 				&gid, &gid_attr);
1820 	if (!ret) {
1821 		if (gid_attr.ndev) {
1822 			work->path[0].ifindex = gid_attr.ndev->if_index;
1823 			work->path[0].net = dev_net(gid_attr.ndev);
1824 			dev_put(gid_attr.ndev);
1825 		}
1826 		work->path[0].gid_type = gid_attr.gid_type;
1827 		ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av,
1828 					 cm_id_priv);
1829 	}
1830 	if (ret) {
1831 		int err = ib_get_cached_gid(work->port->cm_dev->ib_device,
1832 					    work->port->port_num, 0,
1833 					    &work->path[0].sgid,
1834 					    &gid_attr);
1835 		if (!err && gid_attr.ndev) {
1836 			work->path[0].ifindex = gid_attr.ndev->if_index;
1837 			work->path[0].net = dev_net(gid_attr.ndev);
1838 			dev_put(gid_attr.ndev);
1839 		}
1840 		work->path[0].gid_type = gid_attr.gid_type;
1841 		ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
1842 			       &work->path[0].sgid, sizeof work->path[0].sgid,
1843 			       NULL, 0);
1844 		goto rejected;
1845 	}
1846 	if (req_msg->alt_local_lid) {
1847 		ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av,
1848 					 cm_id_priv);
1849 		if (ret) {
1850 			ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
1851 				       &work->path[0].sgid,
1852 				       sizeof work->path[0].sgid, NULL, 0);
1853 			goto rejected;
1854 		}
1855 	}
1856 	cm_id_priv->tid = req_msg->hdr.tid;
1857 	cm_id_priv->timeout_ms = cm_convert_to_ms(
1858 					cm_req_get_local_resp_timeout(req_msg));
1859 	cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg);
1860 	cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg);
1861 	cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg);
1862 	cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg);
1863 	cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg);
1864 	cm_id_priv->pkey = req_msg->pkey;
1865 	cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg);
1866 	cm_id_priv->retry_count = cm_req_get_retry_count(req_msg);
1867 	cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
1868 	cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
1869 
1870 	cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
1871 	cm_process_work(cm_id_priv, work);
1872 	cm_deref_id(listen_cm_id_priv);
1873 	return 0;
1874 
1875 rejected:
1876 	atomic_dec(&cm_id_priv->refcount);
1877 	cm_deref_id(listen_cm_id_priv);
1878 free_timeinfo:
1879 	kfree(cm_id_priv->timewait_info);
1880 destroy:
1881 	ib_destroy_cm_id(cm_id);
1882 	return ret;
1883 }
1884 
1885 static void cm_format_rep(struct cm_rep_msg *rep_msg,
1886 			  struct cm_id_private *cm_id_priv,
1887 			  struct ib_cm_rep_param *param)
1888 {
1889 	cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
1890 	rep_msg->local_comm_id = cm_id_priv->id.local_id;
1891 	rep_msg->remote_comm_id = cm_id_priv->id.remote_id;
1892 	cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
1893 	rep_msg->resp_resources = param->responder_resources;
1894 	cm_rep_set_target_ack_delay(rep_msg,
1895 				    cm_id_priv->av.port->cm_dev->ack_delay);
1896 	cm_rep_set_failover(rep_msg, param->failover_accepted);
1897 	cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
1898 	rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
1899 
1900 	if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) {
1901 		rep_msg->initiator_depth = param->initiator_depth;
1902 		cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
1903 		cm_rep_set_srq(rep_msg, param->srq);
1904 		cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
1905 	} else {
1906 		cm_rep_set_srq(rep_msg, 1);
1907 		cm_rep_set_local_eecn(rep_msg, cpu_to_be32(param->qp_num));
1908 	}
1909 
1910 	if (param->private_data && param->private_data_len)
1911 		memcpy(rep_msg->private_data, param->private_data,
1912 		       param->private_data_len);
1913 }
1914 
1915 int ib_send_cm_rep(struct ib_cm_id *cm_id,
1916 		   struct ib_cm_rep_param *param)
1917 {
1918 	struct cm_id_private *cm_id_priv;
1919 	struct ib_mad_send_buf *msg;
1920 	struct cm_rep_msg *rep_msg;
1921 	unsigned long flags;
1922 	int ret;
1923 
1924 	if (param->private_data &&
1925 	    param->private_data_len > IB_CM_REP_PRIVATE_DATA_SIZE)
1926 		return -EINVAL;
1927 
1928 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1929 	spin_lock_irqsave(&cm_id_priv->lock, flags);
1930 	if (cm_id->state != IB_CM_REQ_RCVD &&
1931 	    cm_id->state != IB_CM_MRA_REQ_SENT) {
1932 		ret = -EINVAL;
1933 		goto out;
1934 	}
1935 
1936 	ret = cm_alloc_msg(cm_id_priv, &msg);
1937 	if (ret)
1938 		goto out;
1939 
1940 	rep_msg = (struct cm_rep_msg *) msg->mad;
1941 	cm_format_rep(rep_msg, cm_id_priv, param);
1942 	msg->timeout_ms = cm_id_priv->timeout_ms;
1943 	msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
1944 
1945 	ret = ib_post_send_mad(msg, NULL);
1946 	if (ret) {
1947 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1948 		cm_free_msg(msg);
1949 		return ret;
1950 	}
1951 
1952 	cm_id->state = IB_CM_REP_SENT;
1953 	cm_id_priv->msg = msg;
1954 	cm_id_priv->initiator_depth = param->initiator_depth;
1955 	cm_id_priv->responder_resources = param->responder_resources;
1956 	cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg);
1957 	cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF);
1958 
1959 out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1960 	return ret;
1961 }
1962 EXPORT_SYMBOL(ib_send_cm_rep);
1963 
1964 static void cm_format_rtu(struct cm_rtu_msg *rtu_msg,
1965 			  struct cm_id_private *cm_id_priv,
1966 			  const void *private_data,
1967 			  u8 private_data_len)
1968 {
1969 	cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid);
1970 	rtu_msg->local_comm_id = cm_id_priv->id.local_id;
1971 	rtu_msg->remote_comm_id = cm_id_priv->id.remote_id;
1972 
1973 	if (private_data && private_data_len)
1974 		memcpy(rtu_msg->private_data, private_data, private_data_len);
1975 }
1976 
1977 int ib_send_cm_rtu(struct ib_cm_id *cm_id,
1978 		   const void *private_data,
1979 		   u8 private_data_len)
1980 {
1981 	struct cm_id_private *cm_id_priv;
1982 	struct ib_mad_send_buf *msg;
1983 	unsigned long flags;
1984 	void *data;
1985 	int ret;
1986 
1987 	if (private_data && private_data_len > IB_CM_RTU_PRIVATE_DATA_SIZE)
1988 		return -EINVAL;
1989 
1990 	data = cm_copy_private_data(private_data, private_data_len);
1991 	if (IS_ERR(data))
1992 		return PTR_ERR(data);
1993 
1994 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1995 	spin_lock_irqsave(&cm_id_priv->lock, flags);
1996 	if (cm_id->state != IB_CM_REP_RCVD &&
1997 	    cm_id->state != IB_CM_MRA_REP_SENT) {
1998 		ret = -EINVAL;
1999 		goto error;
2000 	}
2001 
2002 	ret = cm_alloc_msg(cm_id_priv, &msg);
2003 	if (ret)
2004 		goto error;
2005 
2006 	cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
2007 		      private_data, private_data_len);
2008 
2009 	ret = ib_post_send_mad(msg, NULL);
2010 	if (ret) {
2011 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2012 		cm_free_msg(msg);
2013 		kfree(data);
2014 		return ret;
2015 	}
2016 
2017 	cm_id->state = IB_CM_ESTABLISHED;
2018 	cm_set_private_data(cm_id_priv, data, private_data_len);
2019 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2020 	return 0;
2021 
2022 error:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2023 	kfree(data);
2024 	return ret;
2025 }
2026 EXPORT_SYMBOL(ib_send_cm_rtu);
2027 
2028 static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type)
2029 {
2030 	struct cm_rep_msg *rep_msg;
2031 	struct ib_cm_rep_event_param *param;
2032 
2033 	rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
2034 	param = &work->cm_event.param.rep_rcvd;
2035 	param->remote_ca_guid = rep_msg->local_ca_guid;
2036 	param->remote_qkey = be32_to_cpu(rep_msg->local_qkey);
2037 	param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type));
2038 	param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg));
2039 	param->responder_resources = rep_msg->initiator_depth;
2040 	param->initiator_depth = rep_msg->resp_resources;
2041 	param->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
2042 	param->failover_accepted = cm_rep_get_failover(rep_msg);
2043 	param->flow_control = cm_rep_get_flow_ctrl(rep_msg);
2044 	param->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
2045 	param->srq = cm_rep_get_srq(rep_msg);
2046 	work->cm_event.private_data = &rep_msg->private_data;
2047 }
2048 
2049 static void cm_dup_rep_handler(struct cm_work *work)
2050 {
2051 	struct cm_id_private *cm_id_priv;
2052 	struct cm_rep_msg *rep_msg;
2053 	struct ib_mad_send_buf *msg = NULL;
2054 	int ret;
2055 
2056 	rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad;
2057 	cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id,
2058 				   rep_msg->local_comm_id);
2059 	if (!cm_id_priv)
2060 		return;
2061 
2062 	atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2063 			counter[CM_REP_COUNTER]);
2064 	ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
2065 	if (ret)
2066 		goto deref;
2067 
2068 	spin_lock_irq(&cm_id_priv->lock);
2069 	if (cm_id_priv->id.state == IB_CM_ESTABLISHED)
2070 		cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
2071 			      cm_id_priv->private_data,
2072 			      cm_id_priv->private_data_len);
2073 	else if (cm_id_priv->id.state == IB_CM_MRA_REP_SENT)
2074 		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2075 			      CM_MSG_RESPONSE_REP, cm_id_priv->service_timeout,
2076 			      cm_id_priv->private_data,
2077 			      cm_id_priv->private_data_len);
2078 	else
2079 		goto unlock;
2080 	spin_unlock_irq(&cm_id_priv->lock);
2081 
2082 	ret = ib_post_send_mad(msg, NULL);
2083 	if (ret)
2084 		goto free;
2085 	goto deref;
2086 
2087 unlock:	spin_unlock_irq(&cm_id_priv->lock);
2088 free:	cm_free_msg(msg);
2089 deref:	cm_deref_id(cm_id_priv);
2090 }
2091 
2092 static int cm_rep_handler(struct cm_work *work)
2093 {
2094 	struct cm_id_private *cm_id_priv;
2095 	struct cm_rep_msg *rep_msg;
2096 	int ret;
2097 	struct cm_id_private *cur_cm_id_priv;
2098 	struct ib_cm_id *cm_id;
2099 	struct cm_timewait_info *timewait_info;
2100 
2101 	rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
2102 	cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
2103 	if (!cm_id_priv) {
2104 		cm_dup_rep_handler(work);
2105 		return -EINVAL;
2106 	}
2107 
2108 	cm_format_rep_event(work, cm_id_priv->qp_type);
2109 
2110 	spin_lock_irq(&cm_id_priv->lock);
2111 	switch (cm_id_priv->id.state) {
2112 	case IB_CM_REQ_SENT:
2113 	case IB_CM_MRA_REQ_RCVD:
2114 		break;
2115 	default:
2116 		spin_unlock_irq(&cm_id_priv->lock);
2117 		ret = -EINVAL;
2118 		goto error;
2119 	}
2120 
2121 	cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;
2122 	cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid;
2123 	cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
2124 
2125 	spin_lock(&cm.lock);
2126 	/* Check for duplicate REP. */
2127 	if (cm_insert_remote_id(cm_id_priv->timewait_info)) {
2128 		spin_unlock(&cm.lock);
2129 		spin_unlock_irq(&cm_id_priv->lock);
2130 		ret = -EINVAL;
2131 		goto error;
2132 	}
2133 	/* Check for a stale connection. */
2134 	timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
2135 	if (timewait_info) {
2136 		rb_erase(&cm_id_priv->timewait_info->remote_id_node,
2137 			 &cm.remote_id_table);
2138 		cm_id_priv->timewait_info->inserted_remote_id = 0;
2139 		cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
2140 					   timewait_info->work.remote_id);
2141 
2142 		spin_unlock(&cm.lock);
2143 		spin_unlock_irq(&cm_id_priv->lock);
2144 		cm_issue_rej(work->port, work->mad_recv_wc,
2145 			     IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
2146 			     NULL, 0);
2147 		ret = -EINVAL;
2148 		if (cur_cm_id_priv) {
2149 			cm_id = &cur_cm_id_priv->id;
2150 			ib_send_cm_dreq(cm_id, NULL, 0);
2151 			cm_deref_id(cur_cm_id_priv);
2152 		}
2153 
2154 		goto error;
2155 	}
2156 	spin_unlock(&cm.lock);
2157 
2158 	cm_id_priv->id.state = IB_CM_REP_RCVD;
2159 	cm_id_priv->id.remote_id = rep_msg->local_comm_id;
2160 	cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
2161 	cm_id_priv->initiator_depth = rep_msg->resp_resources;
2162 	cm_id_priv->responder_resources = rep_msg->initiator_depth;
2163 	cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
2164 	cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
2165 	cm_id_priv->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
2166 	cm_id_priv->av.timeout =
2167 			cm_ack_timeout(cm_id_priv->target_ack_delay,
2168 				       cm_id_priv->av.timeout - 1);
2169 	cm_id_priv->alt_av.timeout =
2170 			cm_ack_timeout(cm_id_priv->target_ack_delay,
2171 				       cm_id_priv->alt_av.timeout - 1);
2172 
2173 	/* todo: handle peer_to_peer */
2174 
2175 	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2176 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2177 	if (!ret)
2178 		list_add_tail(&work->list, &cm_id_priv->work_list);
2179 	spin_unlock_irq(&cm_id_priv->lock);
2180 
2181 	if (ret)
2182 		cm_process_work(cm_id_priv, work);
2183 	else
2184 		cm_deref_id(cm_id_priv);
2185 	return 0;
2186 
2187 error:
2188 	cm_deref_id(cm_id_priv);
2189 	return ret;
2190 }
2191 
2192 static int cm_establish_handler(struct cm_work *work)
2193 {
2194 	struct cm_id_private *cm_id_priv;
2195 	int ret;
2196 
2197 	/* See comment in cm_establish about lookup. */
2198 	cm_id_priv = cm_acquire_id(work->local_id, work->remote_id);
2199 	if (!cm_id_priv)
2200 		return -EINVAL;
2201 
2202 	spin_lock_irq(&cm_id_priv->lock);
2203 	if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
2204 		spin_unlock_irq(&cm_id_priv->lock);
2205 		goto out;
2206 	}
2207 
2208 	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2209 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2210 	if (!ret)
2211 		list_add_tail(&work->list, &cm_id_priv->work_list);
2212 	spin_unlock_irq(&cm_id_priv->lock);
2213 
2214 	if (ret)
2215 		cm_process_work(cm_id_priv, work);
2216 	else
2217 		cm_deref_id(cm_id_priv);
2218 	return 0;
2219 out:
2220 	cm_deref_id(cm_id_priv);
2221 	return -EINVAL;
2222 }
2223 
2224 static int cm_rtu_handler(struct cm_work *work)
2225 {
2226 	struct cm_id_private *cm_id_priv;
2227 	struct cm_rtu_msg *rtu_msg;
2228 	int ret;
2229 
2230 	rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
2231 	cm_id_priv = cm_acquire_id(rtu_msg->remote_comm_id,
2232 				   rtu_msg->local_comm_id);
2233 	if (!cm_id_priv)
2234 		return -EINVAL;
2235 
2236 	work->cm_event.private_data = &rtu_msg->private_data;
2237 
2238 	spin_lock_irq(&cm_id_priv->lock);
2239 	if (cm_id_priv->id.state != IB_CM_REP_SENT &&
2240 	    cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
2241 		spin_unlock_irq(&cm_id_priv->lock);
2242 		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2243 				counter[CM_RTU_COUNTER]);
2244 		goto out;
2245 	}
2246 	cm_id_priv->id.state = IB_CM_ESTABLISHED;
2247 
2248 	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2249 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2250 	if (!ret)
2251 		list_add_tail(&work->list, &cm_id_priv->work_list);
2252 	spin_unlock_irq(&cm_id_priv->lock);
2253 
2254 	if (ret)
2255 		cm_process_work(cm_id_priv, work);
2256 	else
2257 		cm_deref_id(cm_id_priv);
2258 	return 0;
2259 out:
2260 	cm_deref_id(cm_id_priv);
2261 	return -EINVAL;
2262 }
2263 
2264 static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
2265 			  struct cm_id_private *cm_id_priv,
2266 			  const void *private_data,
2267 			  u8 private_data_len)
2268 {
2269 	cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID,
2270 			  cm_form_tid(cm_id_priv));
2271 	dreq_msg->local_comm_id = cm_id_priv->id.local_id;
2272 	dreq_msg->remote_comm_id = cm_id_priv->id.remote_id;
2273 	cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn);
2274 
2275 	if (private_data && private_data_len)
2276 		memcpy(dreq_msg->private_data, private_data, private_data_len);
2277 }
2278 
2279 int ib_send_cm_dreq(struct ib_cm_id *cm_id,
2280 		    const void *private_data,
2281 		    u8 private_data_len)
2282 {
2283 	struct cm_id_private *cm_id_priv;
2284 	struct ib_mad_send_buf *msg;
2285 	unsigned long flags;
2286 	int ret;
2287 
2288 	if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE)
2289 		return -EINVAL;
2290 
2291 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2292 	spin_lock_irqsave(&cm_id_priv->lock, flags);
2293 	if (cm_id->state != IB_CM_ESTABLISHED) {
2294 		ret = -EINVAL;
2295 		goto out;
2296 	}
2297 
2298 	if (cm_id->lap_state == IB_CM_LAP_SENT ||
2299 	    cm_id->lap_state == IB_CM_MRA_LAP_RCVD)
2300 		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2301 
2302 	ret = cm_alloc_msg(cm_id_priv, &msg);
2303 	if (ret) {
2304 		cm_enter_timewait(cm_id_priv);
2305 		goto out;
2306 	}
2307 
2308 	cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
2309 		       private_data, private_data_len);
2310 	msg->timeout_ms = cm_id_priv->timeout_ms;
2311 	msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
2312 
2313 	ret = ib_post_send_mad(msg, NULL);
2314 	if (ret) {
2315 		cm_enter_timewait(cm_id_priv);
2316 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2317 		cm_free_msg(msg);
2318 		return ret;
2319 	}
2320 
2321 	cm_id->state = IB_CM_DREQ_SENT;
2322 	cm_id_priv->msg = msg;
2323 out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2324 	return ret;
2325 }
2326 EXPORT_SYMBOL(ib_send_cm_dreq);
2327 
2328 static void cm_format_drep(struct cm_drep_msg *drep_msg,
2329 			  struct cm_id_private *cm_id_priv,
2330 			  const void *private_data,
2331 			  u8 private_data_len)
2332 {
2333 	cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid);
2334 	drep_msg->local_comm_id = cm_id_priv->id.local_id;
2335 	drep_msg->remote_comm_id = cm_id_priv->id.remote_id;
2336 
2337 	if (private_data && private_data_len)
2338 		memcpy(drep_msg->private_data, private_data, private_data_len);
2339 }
2340 
2341 int ib_send_cm_drep(struct ib_cm_id *cm_id,
2342 		    const void *private_data,
2343 		    u8 private_data_len)
2344 {
2345 	struct cm_id_private *cm_id_priv;
2346 	struct ib_mad_send_buf *msg;
2347 	unsigned long flags;
2348 	void *data;
2349 	int ret;
2350 
2351 	if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE)
2352 		return -EINVAL;
2353 
2354 	data = cm_copy_private_data(private_data, private_data_len);
2355 	if (IS_ERR(data))
2356 		return PTR_ERR(data);
2357 
2358 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2359 	spin_lock_irqsave(&cm_id_priv->lock, flags);
2360 	if (cm_id->state != IB_CM_DREQ_RCVD) {
2361 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2362 		kfree(data);
2363 		return -EINVAL;
2364 	}
2365 
2366 	cm_set_private_data(cm_id_priv, data, private_data_len);
2367 	cm_enter_timewait(cm_id_priv);
2368 
2369 	ret = cm_alloc_msg(cm_id_priv, &msg);
2370 	if (ret)
2371 		goto out;
2372 
2373 	cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
2374 		       private_data, private_data_len);
2375 
2376 	ret = ib_post_send_mad(msg, NULL);
2377 	if (ret) {
2378 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2379 		cm_free_msg(msg);
2380 		return ret;
2381 	}
2382 
2383 out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2384 	return ret;
2385 }
2386 EXPORT_SYMBOL(ib_send_cm_drep);
2387 
2388 static int cm_issue_drep(struct cm_port *port,
2389 			 struct ib_mad_recv_wc *mad_recv_wc)
2390 {
2391 	struct ib_mad_send_buf *msg = NULL;
2392 	struct cm_dreq_msg *dreq_msg;
2393 	struct cm_drep_msg *drep_msg;
2394 	int ret;
2395 
2396 	ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
2397 	if (ret)
2398 		return ret;
2399 
2400 	dreq_msg = (struct cm_dreq_msg *) mad_recv_wc->recv_buf.mad;
2401 	drep_msg = (struct cm_drep_msg *) msg->mad;
2402 
2403 	cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid);
2404 	drep_msg->remote_comm_id = dreq_msg->local_comm_id;
2405 	drep_msg->local_comm_id = dreq_msg->remote_comm_id;
2406 
2407 	ret = ib_post_send_mad(msg, NULL);
2408 	if (ret)
2409 		cm_free_msg(msg);
2410 
2411 	return ret;
2412 }
2413 
2414 static int cm_dreq_handler(struct cm_work *work)
2415 {
2416 	struct cm_id_private *cm_id_priv;
2417 	struct cm_dreq_msg *dreq_msg;
2418 	struct ib_mad_send_buf *msg = NULL;
2419 	int ret;
2420 
2421 	dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
2422 	cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id,
2423 				   dreq_msg->local_comm_id);
2424 	if (!cm_id_priv) {
2425 		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2426 				counter[CM_DREQ_COUNTER]);
2427 		cm_issue_drep(work->port, work->mad_recv_wc);
2428 		return -EINVAL;
2429 	}
2430 
2431 	work->cm_event.private_data = &dreq_msg->private_data;
2432 
2433 	spin_lock_irq(&cm_id_priv->lock);
2434 	if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg))
2435 		goto unlock;
2436 
2437 	switch (cm_id_priv->id.state) {
2438 	case IB_CM_REP_SENT:
2439 	case IB_CM_DREQ_SENT:
2440 		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2441 		break;
2442 	case IB_CM_ESTABLISHED:
2443 		if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
2444 		    cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
2445 			ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2446 		break;
2447 	case IB_CM_MRA_REP_RCVD:
2448 		break;
2449 	case IB_CM_TIMEWAIT:
2450 		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2451 				counter[CM_DREQ_COUNTER]);
2452 		msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc);
2453 		if (IS_ERR(msg))
2454 			goto unlock;
2455 
2456 		cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
2457 			       cm_id_priv->private_data,
2458 			       cm_id_priv->private_data_len);
2459 		spin_unlock_irq(&cm_id_priv->lock);
2460 
2461 		if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
2462 		    ib_post_send_mad(msg, NULL))
2463 			cm_free_msg(msg);
2464 		goto deref;
2465 	case IB_CM_DREQ_RCVD:
2466 		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2467 				counter[CM_DREQ_COUNTER]);
2468 		goto unlock;
2469 	default:
2470 		goto unlock;
2471 	}
2472 	cm_id_priv->id.state = IB_CM_DREQ_RCVD;
2473 	cm_id_priv->tid = dreq_msg->hdr.tid;
2474 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2475 	if (!ret)
2476 		list_add_tail(&work->list, &cm_id_priv->work_list);
2477 	spin_unlock_irq(&cm_id_priv->lock);
2478 
2479 	if (ret)
2480 		cm_process_work(cm_id_priv, work);
2481 	else
2482 		cm_deref_id(cm_id_priv);
2483 	return 0;
2484 
2485 unlock:	spin_unlock_irq(&cm_id_priv->lock);
2486 deref:	cm_deref_id(cm_id_priv);
2487 	return -EINVAL;
2488 }
2489 
2490 static int cm_drep_handler(struct cm_work *work)
2491 {
2492 	struct cm_id_private *cm_id_priv;
2493 	struct cm_drep_msg *drep_msg;
2494 	int ret;
2495 
2496 	drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
2497 	cm_id_priv = cm_acquire_id(drep_msg->remote_comm_id,
2498 				   drep_msg->local_comm_id);
2499 	if (!cm_id_priv)
2500 		return -EINVAL;
2501 
2502 	work->cm_event.private_data = &drep_msg->private_data;
2503 
2504 	spin_lock_irq(&cm_id_priv->lock);
2505 	if (cm_id_priv->id.state != IB_CM_DREQ_SENT &&
2506 	    cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
2507 		spin_unlock_irq(&cm_id_priv->lock);
2508 		goto out;
2509 	}
2510 	cm_enter_timewait(cm_id_priv);
2511 
2512 	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2513 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2514 	if (!ret)
2515 		list_add_tail(&work->list, &cm_id_priv->work_list);
2516 	spin_unlock_irq(&cm_id_priv->lock);
2517 
2518 	if (ret)
2519 		cm_process_work(cm_id_priv, work);
2520 	else
2521 		cm_deref_id(cm_id_priv);
2522 	return 0;
2523 out:
2524 	cm_deref_id(cm_id_priv);
2525 	return -EINVAL;
2526 }
2527 
2528 int ib_send_cm_rej(struct ib_cm_id *cm_id,
2529 		   enum ib_cm_rej_reason reason,
2530 		   void *ari,
2531 		   u8 ari_length,
2532 		   const void *private_data,
2533 		   u8 private_data_len)
2534 {
2535 	struct cm_id_private *cm_id_priv;
2536 	struct ib_mad_send_buf *msg;
2537 	unsigned long flags;
2538 	int ret;
2539 
2540 	if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) ||
2541 	    (ari && ari_length > IB_CM_REJ_ARI_LENGTH))
2542 		return -EINVAL;
2543 
2544 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2545 
2546 	spin_lock_irqsave(&cm_id_priv->lock, flags);
2547 	switch (cm_id->state) {
2548 	case IB_CM_REQ_SENT:
2549 	case IB_CM_MRA_REQ_RCVD:
2550 	case IB_CM_REQ_RCVD:
2551 	case IB_CM_MRA_REQ_SENT:
2552 	case IB_CM_REP_RCVD:
2553 	case IB_CM_MRA_REP_SENT:
2554 		ret = cm_alloc_msg(cm_id_priv, &msg);
2555 		if (!ret)
2556 			cm_format_rej((struct cm_rej_msg *) msg->mad,
2557 				      cm_id_priv, reason, ari, ari_length,
2558 				      private_data, private_data_len);
2559 
2560 		cm_reset_to_idle(cm_id_priv);
2561 		break;
2562 	case IB_CM_REP_SENT:
2563 	case IB_CM_MRA_REP_RCVD:
2564 		ret = cm_alloc_msg(cm_id_priv, &msg);
2565 		if (!ret)
2566 			cm_format_rej((struct cm_rej_msg *) msg->mad,
2567 				      cm_id_priv, reason, ari, ari_length,
2568 				      private_data, private_data_len);
2569 
2570 		cm_enter_timewait(cm_id_priv);
2571 		break;
2572 	default:
2573 		ret = -EINVAL;
2574 		goto out;
2575 	}
2576 
2577 	if (ret)
2578 		goto out;
2579 
2580 	ret = ib_post_send_mad(msg, NULL);
2581 	if (ret)
2582 		cm_free_msg(msg);
2583 
2584 out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2585 	return ret;
2586 }
2587 EXPORT_SYMBOL(ib_send_cm_rej);
2588 
2589 static void cm_format_rej_event(struct cm_work *work)
2590 {
2591 	struct cm_rej_msg *rej_msg;
2592 	struct ib_cm_rej_event_param *param;
2593 
2594 	rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2595 	param = &work->cm_event.param.rej_rcvd;
2596 	param->ari = rej_msg->ari;
2597 	param->ari_length = cm_rej_get_reject_info_len(rej_msg);
2598 	param->reason = __be16_to_cpu(rej_msg->reason);
2599 	work->cm_event.private_data = &rej_msg->private_data;
2600 }
2601 
2602 static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
2603 {
2604 	struct cm_timewait_info *timewait_info;
2605 	struct cm_id_private *cm_id_priv;
2606 	__be32 remote_id;
2607 
2608 	remote_id = rej_msg->local_comm_id;
2609 
2610 	if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) {
2611 		spin_lock_irq(&cm.lock);
2612 		timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari),
2613 						  remote_id);
2614 		if (!timewait_info) {
2615 			spin_unlock_irq(&cm.lock);
2616 			return NULL;
2617 		}
2618 		cm_id_priv = idr_find(&cm.local_id_table, (__force int)
2619 				      (timewait_info->work.local_id ^
2620 				       cm.random_id_operand));
2621 		if (cm_id_priv) {
2622 			if (cm_id_priv->id.remote_id == remote_id)
2623 				atomic_inc(&cm_id_priv->refcount);
2624 			else
2625 				cm_id_priv = NULL;
2626 		}
2627 		spin_unlock_irq(&cm.lock);
2628 	} else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ)
2629 		cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0);
2630 	else
2631 		cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, remote_id);
2632 
2633 	return cm_id_priv;
2634 }
2635 
2636 static int cm_rej_handler(struct cm_work *work)
2637 {
2638 	struct cm_id_private *cm_id_priv;
2639 	struct cm_rej_msg *rej_msg;
2640 	int ret;
2641 
2642 	rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2643 	cm_id_priv = cm_acquire_rejected_id(rej_msg);
2644 	if (!cm_id_priv)
2645 		return -EINVAL;
2646 
2647 	cm_format_rej_event(work);
2648 
2649 	spin_lock_irq(&cm_id_priv->lock);
2650 	switch (cm_id_priv->id.state) {
2651 	case IB_CM_REQ_SENT:
2652 	case IB_CM_MRA_REQ_RCVD:
2653 	case IB_CM_REP_SENT:
2654 	case IB_CM_MRA_REP_RCVD:
2655 		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2656 		/* fall through */
2657 	case IB_CM_REQ_RCVD:
2658 	case IB_CM_MRA_REQ_SENT:
2659 		if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_STALE_CONN)
2660 			cm_enter_timewait(cm_id_priv);
2661 		else
2662 			cm_reset_to_idle(cm_id_priv);
2663 		break;
2664 	case IB_CM_DREQ_SENT:
2665 		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2666 		/* fall through */
2667 	case IB_CM_REP_RCVD:
2668 	case IB_CM_MRA_REP_SENT:
2669 		cm_enter_timewait(cm_id_priv);
2670 		break;
2671 	case IB_CM_ESTABLISHED:
2672 		if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT ||
2673 		    cm_id_priv->id.lap_state == IB_CM_LAP_SENT) {
2674 			if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT)
2675 				ib_cancel_mad(cm_id_priv->av.port->mad_agent,
2676 					      cm_id_priv->msg);
2677 			cm_enter_timewait(cm_id_priv);
2678 			break;
2679 		}
2680 		/* fall through */
2681 	default:
2682 		spin_unlock_irq(&cm_id_priv->lock);
2683 		ret = -EINVAL;
2684 		goto out;
2685 	}
2686 
2687 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2688 	if (!ret)
2689 		list_add_tail(&work->list, &cm_id_priv->work_list);
2690 	spin_unlock_irq(&cm_id_priv->lock);
2691 
2692 	if (ret)
2693 		cm_process_work(cm_id_priv, work);
2694 	else
2695 		cm_deref_id(cm_id_priv);
2696 	return 0;
2697 out:
2698 	cm_deref_id(cm_id_priv);
2699 	return -EINVAL;
2700 }
2701 
2702 int ib_send_cm_mra(struct ib_cm_id *cm_id,
2703 		   u8 service_timeout,
2704 		   const void *private_data,
2705 		   u8 private_data_len)
2706 {
2707 	struct cm_id_private *cm_id_priv;
2708 	struct ib_mad_send_buf *msg;
2709 	enum ib_cm_state cm_state;
2710 	enum ib_cm_lap_state lap_state;
2711 	enum cm_msg_response msg_response;
2712 	void *data;
2713 	unsigned long flags;
2714 	int ret;
2715 
2716 	if (private_data && private_data_len > IB_CM_MRA_PRIVATE_DATA_SIZE)
2717 		return -EINVAL;
2718 
2719 	data = cm_copy_private_data(private_data, private_data_len);
2720 	if (IS_ERR(data))
2721 		return PTR_ERR(data);
2722 
2723 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2724 
2725 	spin_lock_irqsave(&cm_id_priv->lock, flags);
2726 	switch(cm_id_priv->id.state) {
2727 	case IB_CM_REQ_RCVD:
2728 		cm_state = IB_CM_MRA_REQ_SENT;
2729 		lap_state = cm_id->lap_state;
2730 		msg_response = CM_MSG_RESPONSE_REQ;
2731 		break;
2732 	case IB_CM_REP_RCVD:
2733 		cm_state = IB_CM_MRA_REP_SENT;
2734 		lap_state = cm_id->lap_state;
2735 		msg_response = CM_MSG_RESPONSE_REP;
2736 		break;
2737 	case IB_CM_ESTABLISHED:
2738 		if (cm_id->lap_state == IB_CM_LAP_RCVD) {
2739 			cm_state = cm_id->state;
2740 			lap_state = IB_CM_MRA_LAP_SENT;
2741 			msg_response = CM_MSG_RESPONSE_OTHER;
2742 			break;
2743 		}
2744 	default:
2745 		ret = -EINVAL;
2746 		goto error1;
2747 	}
2748 
2749 	if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) {
2750 		ret = cm_alloc_msg(cm_id_priv, &msg);
2751 		if (ret)
2752 			goto error1;
2753 
2754 		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2755 			      msg_response, service_timeout,
2756 			      private_data, private_data_len);
2757 		ret = ib_post_send_mad(msg, NULL);
2758 		if (ret)
2759 			goto error2;
2760 	}
2761 
2762 	cm_id->state = cm_state;
2763 	cm_id->lap_state = lap_state;
2764 	cm_id_priv->service_timeout = service_timeout;
2765 	cm_set_private_data(cm_id_priv, data, private_data_len);
2766 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2767 	return 0;
2768 
2769 error1:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2770 	kfree(data);
2771 	return ret;
2772 
2773 error2:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2774 	kfree(data);
2775 	cm_free_msg(msg);
2776 	return ret;
2777 }
2778 EXPORT_SYMBOL(ib_send_cm_mra);
2779 
2780 static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
2781 {
2782 	switch (cm_mra_get_msg_mraed(mra_msg)) {
2783 	case CM_MSG_RESPONSE_REQ:
2784 		return cm_acquire_id(mra_msg->remote_comm_id, 0);
2785 	case CM_MSG_RESPONSE_REP:
2786 	case CM_MSG_RESPONSE_OTHER:
2787 		return cm_acquire_id(mra_msg->remote_comm_id,
2788 				     mra_msg->local_comm_id);
2789 	default:
2790 		return NULL;
2791 	}
2792 }
2793 
2794 static int cm_mra_handler(struct cm_work *work)
2795 {
2796 	struct cm_id_private *cm_id_priv;
2797 	struct cm_mra_msg *mra_msg;
2798 	int timeout, ret;
2799 
2800 	mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad;
2801 	cm_id_priv = cm_acquire_mraed_id(mra_msg);
2802 	if (!cm_id_priv)
2803 		return -EINVAL;
2804 
2805 	work->cm_event.private_data = &mra_msg->private_data;
2806 	work->cm_event.param.mra_rcvd.service_timeout =
2807 					cm_mra_get_service_timeout(mra_msg);
2808 	timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
2809 		  cm_convert_to_ms(cm_id_priv->av.timeout);
2810 
2811 	spin_lock_irq(&cm_id_priv->lock);
2812 	switch (cm_id_priv->id.state) {
2813 	case IB_CM_REQ_SENT:
2814 		if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ ||
2815 		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
2816 				  cm_id_priv->msg, timeout))
2817 			goto out;
2818 		cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
2819 		break;
2820 	case IB_CM_REP_SENT:
2821 		if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP ||
2822 		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
2823 				  cm_id_priv->msg, timeout))
2824 			goto out;
2825 		cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
2826 		break;
2827 	case IB_CM_ESTABLISHED:
2828 		if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER ||
2829 		    cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
2830 		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
2831 				  cm_id_priv->msg, timeout)) {
2832 			if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
2833 				atomic_long_inc(&work->port->
2834 						counter_group[CM_RECV_DUPLICATES].
2835 						counter[CM_MRA_COUNTER]);
2836 			goto out;
2837 		}
2838 		cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
2839 		break;
2840 	case IB_CM_MRA_REQ_RCVD:
2841 	case IB_CM_MRA_REP_RCVD:
2842 		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2843 				counter[CM_MRA_COUNTER]);
2844 		/* fall through */
2845 	default:
2846 		goto out;
2847 	}
2848 
2849 	cm_id_priv->msg->context[1] = (void *) (unsigned long)
2850 				      cm_id_priv->id.state;
2851 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2852 	if (!ret)
2853 		list_add_tail(&work->list, &cm_id_priv->work_list);
2854 	spin_unlock_irq(&cm_id_priv->lock);
2855 
2856 	if (ret)
2857 		cm_process_work(cm_id_priv, work);
2858 	else
2859 		cm_deref_id(cm_id_priv);
2860 	return 0;
2861 out:
2862 	spin_unlock_irq(&cm_id_priv->lock);
2863 	cm_deref_id(cm_id_priv);
2864 	return -EINVAL;
2865 }
2866 
2867 static void cm_format_lap(struct cm_lap_msg *lap_msg,
2868 			  struct cm_id_private *cm_id_priv,
2869 			  struct ib_sa_path_rec *alternate_path,
2870 			  const void *private_data,
2871 			  u8 private_data_len)
2872 {
2873 	cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID,
2874 			  cm_form_tid(cm_id_priv));
2875 	lap_msg->local_comm_id = cm_id_priv->id.local_id;
2876 	lap_msg->remote_comm_id = cm_id_priv->id.remote_id;
2877 	cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn);
2878 	/* todo: need remote CM response timeout */
2879 	cm_lap_set_remote_resp_timeout(lap_msg, 0x1F);
2880 	lap_msg->alt_local_lid = alternate_path->slid;
2881 	lap_msg->alt_remote_lid = alternate_path->dlid;
2882 	lap_msg->alt_local_gid = alternate_path->sgid;
2883 	lap_msg->alt_remote_gid = alternate_path->dgid;
2884 	cm_lap_set_flow_label(lap_msg, alternate_path->flow_label);
2885 	cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class);
2886 	lap_msg->alt_hop_limit = alternate_path->hop_limit;
2887 	cm_lap_set_packet_rate(lap_msg, alternate_path->rate);
2888 	cm_lap_set_sl(lap_msg, alternate_path->sl);
2889 	cm_lap_set_subnet_local(lap_msg, 1); /* local only... */
2890 	cm_lap_set_local_ack_timeout(lap_msg,
2891 		cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
2892 			       alternate_path->packet_life_time));
2893 
2894 	if (private_data && private_data_len)
2895 		memcpy(lap_msg->private_data, private_data, private_data_len);
2896 }
2897 
2898 int ib_send_cm_lap(struct ib_cm_id *cm_id,
2899 		   struct ib_sa_path_rec *alternate_path,
2900 		   const void *private_data,
2901 		   u8 private_data_len)
2902 {
2903 	struct cm_id_private *cm_id_priv;
2904 	struct ib_mad_send_buf *msg;
2905 	unsigned long flags;
2906 	int ret;
2907 
2908 	if (private_data && private_data_len > IB_CM_LAP_PRIVATE_DATA_SIZE)
2909 		return -EINVAL;
2910 
2911 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2912 	spin_lock_irqsave(&cm_id_priv->lock, flags);
2913 	if (cm_id->state != IB_CM_ESTABLISHED ||
2914 	    (cm_id->lap_state != IB_CM_LAP_UNINIT &&
2915 	     cm_id->lap_state != IB_CM_LAP_IDLE)) {
2916 		ret = -EINVAL;
2917 		goto out;
2918 	}
2919 
2920 	ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av,
2921 				 cm_id_priv);
2922 	if (ret)
2923 		goto out;
2924 	cm_id_priv->alt_av.timeout =
2925 			cm_ack_timeout(cm_id_priv->target_ack_delay,
2926 				       cm_id_priv->alt_av.timeout - 1);
2927 
2928 	ret = cm_alloc_msg(cm_id_priv, &msg);
2929 	if (ret)
2930 		goto out;
2931 
2932 	cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv,
2933 		      alternate_path, private_data, private_data_len);
2934 	msg->timeout_ms = cm_id_priv->timeout_ms;
2935 	msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED;
2936 
2937 	ret = ib_post_send_mad(msg, NULL);
2938 	if (ret) {
2939 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2940 		cm_free_msg(msg);
2941 		return ret;
2942 	}
2943 
2944 	cm_id->lap_state = IB_CM_LAP_SENT;
2945 	cm_id_priv->msg = msg;
2946 
2947 out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2948 	return ret;
2949 }
2950 EXPORT_SYMBOL(ib_send_cm_lap);
2951 
2952 static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv,
2953 				    struct ib_sa_path_rec *path,
2954 				    struct cm_lap_msg *lap_msg)
2955 {
2956 	memset(path, 0, sizeof *path);
2957 	path->dgid = lap_msg->alt_local_gid;
2958 	path->sgid = lap_msg->alt_remote_gid;
2959 	path->dlid = lap_msg->alt_local_lid;
2960 	path->slid = lap_msg->alt_remote_lid;
2961 	path->flow_label = cm_lap_get_flow_label(lap_msg);
2962 	path->hop_limit = lap_msg->alt_hop_limit;
2963 	path->traffic_class = cm_lap_get_traffic_class(lap_msg);
2964 	path->reversible = 1;
2965 	path->pkey = cm_id_priv->pkey;
2966 	path->sl = cm_lap_get_sl(lap_msg);
2967 	path->mtu_selector = IB_SA_EQ;
2968 	path->mtu = cm_id_priv->path_mtu;
2969 	path->rate_selector = IB_SA_EQ;
2970 	path->rate = cm_lap_get_packet_rate(lap_msg);
2971 	path->packet_life_time_selector = IB_SA_EQ;
2972 	path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg);
2973 	path->packet_life_time -= (path->packet_life_time > 0);
2974 }
2975 
2976 static int cm_lap_handler(struct cm_work *work)
2977 {
2978 	struct cm_id_private *cm_id_priv;
2979 	struct cm_lap_msg *lap_msg;
2980 	struct ib_cm_lap_event_param *param;
2981 	struct ib_mad_send_buf *msg = NULL;
2982 	int ret;
2983 
2984 	/* Currently Alternate path messages are not supported for
2985 	 * RoCE link layer.
2986 	 */
2987 	if (rdma_protocol_roce(work->port->cm_dev->ib_device,
2988 			       work->port->port_num))
2989 		return -EINVAL;
2990 
2991 	/* todo: verify LAP request and send reject APR if invalid. */
2992 	lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad;
2993 	cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id,
2994 				   lap_msg->local_comm_id);
2995 	if (!cm_id_priv)
2996 		return -EINVAL;
2997 
2998 	param = &work->cm_event.param.lap_rcvd;
2999 	param->alternate_path = &work->path[0];
3000 	cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg);
3001 	work->cm_event.private_data = &lap_msg->private_data;
3002 
3003 	spin_lock_irq(&cm_id_priv->lock);
3004 	if (cm_id_priv->id.state != IB_CM_ESTABLISHED)
3005 		goto unlock;
3006 
3007 	switch (cm_id_priv->id.lap_state) {
3008 	case IB_CM_LAP_UNINIT:
3009 	case IB_CM_LAP_IDLE:
3010 		break;
3011 	case IB_CM_MRA_LAP_SENT:
3012 		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
3013 				counter[CM_LAP_COUNTER]);
3014 		msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc);
3015 		if (IS_ERR(msg))
3016 			goto unlock;
3017 
3018 		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
3019 			      CM_MSG_RESPONSE_OTHER,
3020 			      cm_id_priv->service_timeout,
3021 			      cm_id_priv->private_data,
3022 			      cm_id_priv->private_data_len);
3023 		spin_unlock_irq(&cm_id_priv->lock);
3024 
3025 		if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
3026 		    ib_post_send_mad(msg, NULL))
3027 			cm_free_msg(msg);
3028 		goto deref;
3029 	case IB_CM_LAP_RCVD:
3030 		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
3031 				counter[CM_LAP_COUNTER]);
3032 		goto unlock;
3033 	default:
3034 		goto unlock;
3035 	}
3036 
3037 	cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
3038 	cm_id_priv->tid = lap_msg->hdr.tid;
3039 	ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
3040 				      work->mad_recv_wc->recv_buf.grh,
3041 				      &cm_id_priv->av);
3042 	if (ret)
3043 		goto unlock;
3044 	ret = cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av,
3045 				 cm_id_priv);
3046 	if (ret)
3047 		goto unlock;
3048 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
3049 	if (!ret)
3050 		list_add_tail(&work->list, &cm_id_priv->work_list);
3051 	spin_unlock_irq(&cm_id_priv->lock);
3052 
3053 	if (ret)
3054 		cm_process_work(cm_id_priv, work);
3055 	else
3056 		cm_deref_id(cm_id_priv);
3057 	return 0;
3058 
3059 unlock:	spin_unlock_irq(&cm_id_priv->lock);
3060 deref:	cm_deref_id(cm_id_priv);
3061 	return -EINVAL;
3062 }
3063 
3064 static void cm_format_apr(struct cm_apr_msg *apr_msg,
3065 			  struct cm_id_private *cm_id_priv,
3066 			  enum ib_cm_apr_status status,
3067 			  void *info,
3068 			  u8 info_length,
3069 			  const void *private_data,
3070 			  u8 private_data_len)
3071 {
3072 	cm_format_mad_hdr(&apr_msg->hdr, CM_APR_ATTR_ID, cm_id_priv->tid);
3073 	apr_msg->local_comm_id = cm_id_priv->id.local_id;
3074 	apr_msg->remote_comm_id = cm_id_priv->id.remote_id;
3075 	apr_msg->ap_status = (u8) status;
3076 
3077 	if (info && info_length) {
3078 		apr_msg->info_length = info_length;
3079 		memcpy(apr_msg->info, info, info_length);
3080 	}
3081 
3082 	if (private_data && private_data_len)
3083 		memcpy(apr_msg->private_data, private_data, private_data_len);
3084 }
3085 
3086 int ib_send_cm_apr(struct ib_cm_id *cm_id,
3087 		   enum ib_cm_apr_status status,
3088 		   void *info,
3089 		   u8 info_length,
3090 		   const void *private_data,
3091 		   u8 private_data_len)
3092 {
3093 	struct cm_id_private *cm_id_priv;
3094 	struct ib_mad_send_buf *msg;
3095 	unsigned long flags;
3096 	int ret;
3097 
3098 	if ((private_data && private_data_len > IB_CM_APR_PRIVATE_DATA_SIZE) ||
3099 	    (info && info_length > IB_CM_APR_INFO_LENGTH))
3100 		return -EINVAL;
3101 
3102 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3103 	spin_lock_irqsave(&cm_id_priv->lock, flags);
3104 	if (cm_id->state != IB_CM_ESTABLISHED ||
3105 	    (cm_id->lap_state != IB_CM_LAP_RCVD &&
3106 	     cm_id->lap_state != IB_CM_MRA_LAP_SENT)) {
3107 		ret = -EINVAL;
3108 		goto out;
3109 	}
3110 
3111 	ret = cm_alloc_msg(cm_id_priv, &msg);
3112 	if (ret)
3113 		goto out;
3114 
3115 	cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status,
3116 		      info, info_length, private_data, private_data_len);
3117 	ret = ib_post_send_mad(msg, NULL);
3118 	if (ret) {
3119 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3120 		cm_free_msg(msg);
3121 		return ret;
3122 	}
3123 
3124 	cm_id->lap_state = IB_CM_LAP_IDLE;
3125 out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3126 	return ret;
3127 }
3128 EXPORT_SYMBOL(ib_send_cm_apr);
3129 
3130 static int cm_apr_handler(struct cm_work *work)
3131 {
3132 	struct cm_id_private *cm_id_priv;
3133 	struct cm_apr_msg *apr_msg;
3134 	int ret;
3135 
3136 	/* Currently Alternate path messages are not supported for
3137 	 * RoCE link layer.
3138 	 */
3139 	if (rdma_protocol_roce(work->port->cm_dev->ib_device,
3140 			       work->port->port_num))
3141 		return -EINVAL;
3142 
3143 	apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
3144 	cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id,
3145 				   apr_msg->local_comm_id);
3146 	if (!cm_id_priv)
3147 		return -EINVAL; /* Unmatched reply. */
3148 
3149 	work->cm_event.param.apr_rcvd.ap_status = apr_msg->ap_status;
3150 	work->cm_event.param.apr_rcvd.apr_info = &apr_msg->info;
3151 	work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length;
3152 	work->cm_event.private_data = &apr_msg->private_data;
3153 
3154 	spin_lock_irq(&cm_id_priv->lock);
3155 	if (cm_id_priv->id.state != IB_CM_ESTABLISHED ||
3156 	    (cm_id_priv->id.lap_state != IB_CM_LAP_SENT &&
3157 	     cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) {
3158 		spin_unlock_irq(&cm_id_priv->lock);
3159 		goto out;
3160 	}
3161 	cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
3162 	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
3163 	cm_id_priv->msg = NULL;
3164 
3165 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
3166 	if (!ret)
3167 		list_add_tail(&work->list, &cm_id_priv->work_list);
3168 	spin_unlock_irq(&cm_id_priv->lock);
3169 
3170 	if (ret)
3171 		cm_process_work(cm_id_priv, work);
3172 	else
3173 		cm_deref_id(cm_id_priv);
3174 	return 0;
3175 out:
3176 	cm_deref_id(cm_id_priv);
3177 	return -EINVAL;
3178 }
3179 
3180 static int cm_timewait_handler(struct cm_work *work)
3181 {
3182 	struct cm_timewait_info *timewait_info;
3183 	struct cm_id_private *cm_id_priv;
3184 	int ret;
3185 
3186 	timewait_info = (struct cm_timewait_info *)work;
3187 	spin_lock_irq(&cm.lock);
3188 	list_del(&timewait_info->list);
3189 	spin_unlock_irq(&cm.lock);
3190 
3191 	cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
3192 				   timewait_info->work.remote_id);
3193 	if (!cm_id_priv)
3194 		return -EINVAL;
3195 
3196 	spin_lock_irq(&cm_id_priv->lock);
3197 	if (cm_id_priv->id.state != IB_CM_TIMEWAIT ||
3198 	    cm_id_priv->remote_qpn != timewait_info->remote_qpn) {
3199 		spin_unlock_irq(&cm_id_priv->lock);
3200 		goto out;
3201 	}
3202 	cm_id_priv->id.state = IB_CM_IDLE;
3203 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
3204 	if (!ret)
3205 		list_add_tail(&work->list, &cm_id_priv->work_list);
3206 	spin_unlock_irq(&cm_id_priv->lock);
3207 
3208 	if (ret)
3209 		cm_process_work(cm_id_priv, work);
3210 	else
3211 		cm_deref_id(cm_id_priv);
3212 	return 0;
3213 out:
3214 	cm_deref_id(cm_id_priv);
3215 	return -EINVAL;
3216 }
3217 
3218 static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
3219 			       struct cm_id_private *cm_id_priv,
3220 			       struct ib_cm_sidr_req_param *param)
3221 {
3222 	cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
3223 			  cm_form_tid(cm_id_priv));
3224 	sidr_req_msg->request_id = cm_id_priv->id.local_id;
3225 	sidr_req_msg->pkey = param->path->pkey;
3226 	sidr_req_msg->service_id = param->service_id;
3227 
3228 	if (param->private_data && param->private_data_len)
3229 		memcpy(sidr_req_msg->private_data, param->private_data,
3230 		       param->private_data_len);
3231 }
3232 
3233 int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
3234 			struct ib_cm_sidr_req_param *param)
3235 {
3236 	struct cm_id_private *cm_id_priv;
3237 	struct ib_mad_send_buf *msg;
3238 	unsigned long flags;
3239 	int ret;
3240 
3241 	if (!param->path || (param->private_data &&
3242 	     param->private_data_len > IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE))
3243 		return -EINVAL;
3244 
3245 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3246 	ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv);
3247 	if (ret)
3248 		goto out;
3249 
3250 	cm_id->service_id = param->service_id;
3251 	cm_id->service_mask = ~cpu_to_be64(0);
3252 	cm_id_priv->timeout_ms = param->timeout_ms;
3253 	cm_id_priv->max_cm_retries = param->max_cm_retries;
3254 	ret = cm_alloc_msg(cm_id_priv, &msg);
3255 	if (ret)
3256 		goto out;
3257 
3258 	cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv,
3259 			   param);
3260 	msg->timeout_ms = cm_id_priv->timeout_ms;
3261 	msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
3262 
3263 	spin_lock_irqsave(&cm_id_priv->lock, flags);
3264 	if (cm_id->state == IB_CM_IDLE)
3265 		ret = ib_post_send_mad(msg, NULL);
3266 	else
3267 		ret = -EINVAL;
3268 
3269 	if (ret) {
3270 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3271 		cm_free_msg(msg);
3272 		goto out;
3273 	}
3274 	cm_id->state = IB_CM_SIDR_REQ_SENT;
3275 	cm_id_priv->msg = msg;
3276 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3277 out:
3278 	return ret;
3279 }
3280 EXPORT_SYMBOL(ib_send_cm_sidr_req);
3281 
3282 static void cm_format_sidr_req_event(struct cm_work *work,
3283 				     const struct cm_id_private *rx_cm_id,
3284 				     struct ib_cm_id *listen_id)
3285 {
3286 	struct cm_sidr_req_msg *sidr_req_msg;
3287 	struct ib_cm_sidr_req_event_param *param;
3288 
3289 	sidr_req_msg = (struct cm_sidr_req_msg *)
3290 				work->mad_recv_wc->recv_buf.mad;
3291 	param = &work->cm_event.param.sidr_req_rcvd;
3292 	param->pkey = __be16_to_cpu(sidr_req_msg->pkey);
3293 	param->listen_id = listen_id;
3294 	param->service_id = sidr_req_msg->service_id;
3295 	param->bth_pkey = cm_get_bth_pkey(work);
3296 	param->port = work->port->port_num;
3297 	param->sgid_index = rx_cm_id->av.ah_attr.grh.sgid_index;
3298 	work->cm_event.private_data = &sidr_req_msg->private_data;
3299 }
3300 
3301 static int cm_sidr_req_handler(struct cm_work *work)
3302 {
3303 	struct ib_cm_id *cm_id;
3304 	struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
3305 	struct cm_sidr_req_msg *sidr_req_msg;
3306 	struct ib_wc *wc;
3307 	int ret;
3308 
3309 	cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
3310 	if (IS_ERR(cm_id))
3311 		return PTR_ERR(cm_id);
3312 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3313 
3314 	/* Record SGID/SLID and request ID for lookup. */
3315 	sidr_req_msg = (struct cm_sidr_req_msg *)
3316 				work->mad_recv_wc->recv_buf.mad;
3317 	wc = work->mad_recv_wc->wc;
3318 	cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
3319 	cm_id_priv->av.dgid.global.interface_id = 0;
3320 	ret = cm_init_av_for_lap(work->port, work->mad_recv_wc->wc,
3321 				 work->mad_recv_wc->recv_buf.grh,
3322 				 &cm_id_priv->av);
3323 	if (ret)
3324 		goto out;
3325 	cm_id_priv->id.remote_id = sidr_req_msg->request_id;
3326 	cm_id_priv->tid = sidr_req_msg->hdr.tid;
3327 	atomic_inc(&cm_id_priv->work_count);
3328 
3329 	spin_lock_irq(&cm.lock);
3330 	cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
3331 	if (cur_cm_id_priv) {
3332 		spin_unlock_irq(&cm.lock);
3333 		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
3334 				counter[CM_SIDR_REQ_COUNTER]);
3335 		goto out; /* Duplicate message. */
3336 	}
3337 	cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
3338 	cur_cm_id_priv = cm_find_listen(cm_id->device,
3339 					sidr_req_msg->service_id);
3340 	if (!cur_cm_id_priv) {
3341 		spin_unlock_irq(&cm.lock);
3342 		cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED);
3343 		goto out; /* No match. */
3344 	}
3345 	atomic_inc(&cur_cm_id_priv->refcount);
3346 	atomic_inc(&cm_id_priv->refcount);
3347 	spin_unlock_irq(&cm.lock);
3348 
3349 	cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
3350 	cm_id_priv->id.context = cur_cm_id_priv->id.context;
3351 	cm_id_priv->id.service_id = sidr_req_msg->service_id;
3352 	cm_id_priv->id.service_mask = ~cpu_to_be64(0);
3353 
3354 	cm_format_sidr_req_event(work, cm_id_priv, &cur_cm_id_priv->id);
3355 	cm_process_work(cm_id_priv, work);
3356 	cm_deref_id(cur_cm_id_priv);
3357 	return 0;
3358 out:
3359 	ib_destroy_cm_id(&cm_id_priv->id);
3360 	return -EINVAL;
3361 }
3362 
3363 static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
3364 			       struct cm_id_private *cm_id_priv,
3365 			       struct ib_cm_sidr_rep_param *param)
3366 {
3367 	cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
3368 			  cm_id_priv->tid);
3369 	sidr_rep_msg->request_id = cm_id_priv->id.remote_id;
3370 	sidr_rep_msg->status = param->status;
3371 	cm_sidr_rep_set_qpn(sidr_rep_msg, cpu_to_be32(param->qp_num));
3372 	sidr_rep_msg->service_id = cm_id_priv->id.service_id;
3373 	sidr_rep_msg->qkey = cpu_to_be32(param->qkey);
3374 
3375 	if (param->info && param->info_length)
3376 		memcpy(sidr_rep_msg->info, param->info, param->info_length);
3377 
3378 	if (param->private_data && param->private_data_len)
3379 		memcpy(sidr_rep_msg->private_data, param->private_data,
3380 		       param->private_data_len);
3381 }
3382 
3383 int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
3384 			struct ib_cm_sidr_rep_param *param)
3385 {
3386 	struct cm_id_private *cm_id_priv;
3387 	struct ib_mad_send_buf *msg;
3388 	unsigned long flags;
3389 	int ret;
3390 
3391 	if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) ||
3392 	    (param->private_data &&
3393 	     param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE))
3394 		return -EINVAL;
3395 
3396 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3397 	spin_lock_irqsave(&cm_id_priv->lock, flags);
3398 	if (cm_id->state != IB_CM_SIDR_REQ_RCVD) {
3399 		ret = -EINVAL;
3400 		goto error;
3401 	}
3402 
3403 	ret = cm_alloc_msg(cm_id_priv, &msg);
3404 	if (ret)
3405 		goto error;
3406 
3407 	cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
3408 			   param);
3409 	ret = ib_post_send_mad(msg, NULL);
3410 	if (ret) {
3411 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3412 		cm_free_msg(msg);
3413 		return ret;
3414 	}
3415 	cm_id->state = IB_CM_IDLE;
3416 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3417 
3418 	spin_lock_irqsave(&cm.lock, flags);
3419 	if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) {
3420 		rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
3421 		RB_CLEAR_NODE(&cm_id_priv->sidr_id_node);
3422 	}
3423 	spin_unlock_irqrestore(&cm.lock, flags);
3424 	return 0;
3425 
3426 error:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3427 	return ret;
3428 }
3429 EXPORT_SYMBOL(ib_send_cm_sidr_rep);
3430 
3431 static void cm_format_sidr_rep_event(struct cm_work *work)
3432 {
3433 	struct cm_sidr_rep_msg *sidr_rep_msg;
3434 	struct ib_cm_sidr_rep_event_param *param;
3435 
3436 	sidr_rep_msg = (struct cm_sidr_rep_msg *)
3437 				work->mad_recv_wc->recv_buf.mad;
3438 	param = &work->cm_event.param.sidr_rep_rcvd;
3439 	param->status = sidr_rep_msg->status;
3440 	param->qkey = be32_to_cpu(sidr_rep_msg->qkey);
3441 	param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg));
3442 	param->info = &sidr_rep_msg->info;
3443 	param->info_len = sidr_rep_msg->info_length;
3444 	work->cm_event.private_data = &sidr_rep_msg->private_data;
3445 }
3446 
3447 static int cm_sidr_rep_handler(struct cm_work *work)
3448 {
3449 	struct cm_sidr_rep_msg *sidr_rep_msg;
3450 	struct cm_id_private *cm_id_priv;
3451 
3452 	sidr_rep_msg = (struct cm_sidr_rep_msg *)
3453 				work->mad_recv_wc->recv_buf.mad;
3454 	cm_id_priv = cm_acquire_id(sidr_rep_msg->request_id, 0);
3455 	if (!cm_id_priv)
3456 		return -EINVAL; /* Unmatched reply. */
3457 
3458 	spin_lock_irq(&cm_id_priv->lock);
3459 	if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) {
3460 		spin_unlock_irq(&cm_id_priv->lock);
3461 		goto out;
3462 	}
3463 	cm_id_priv->id.state = IB_CM_IDLE;
3464 	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
3465 	spin_unlock_irq(&cm_id_priv->lock);
3466 
3467 	cm_format_sidr_rep_event(work);
3468 	cm_process_work(cm_id_priv, work);
3469 	return 0;
3470 out:
3471 	cm_deref_id(cm_id_priv);
3472 	return -EINVAL;
3473 }
3474 
3475 static void cm_process_send_error(struct ib_mad_send_buf *msg,
3476 				  enum ib_wc_status wc_status)
3477 {
3478 	struct cm_id_private *cm_id_priv;
3479 	struct ib_cm_event cm_event;
3480 	enum ib_cm_state state;
3481 	int ret;
3482 
3483 	memset(&cm_event, 0, sizeof cm_event);
3484 	cm_id_priv = msg->context[0];
3485 
3486 	/* Discard old sends or ones without a response. */
3487 	spin_lock_irq(&cm_id_priv->lock);
3488 	state = (enum ib_cm_state) (unsigned long) msg->context[1];
3489 	if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
3490 		goto discard;
3491 
3492 	switch (state) {
3493 	case IB_CM_REQ_SENT:
3494 	case IB_CM_MRA_REQ_RCVD:
3495 		cm_reset_to_idle(cm_id_priv);
3496 		cm_event.event = IB_CM_REQ_ERROR;
3497 		break;
3498 	case IB_CM_REP_SENT:
3499 	case IB_CM_MRA_REP_RCVD:
3500 		cm_reset_to_idle(cm_id_priv);
3501 		cm_event.event = IB_CM_REP_ERROR;
3502 		break;
3503 	case IB_CM_DREQ_SENT:
3504 		cm_enter_timewait(cm_id_priv);
3505 		cm_event.event = IB_CM_DREQ_ERROR;
3506 		break;
3507 	case IB_CM_SIDR_REQ_SENT:
3508 		cm_id_priv->id.state = IB_CM_IDLE;
3509 		cm_event.event = IB_CM_SIDR_REQ_ERROR;
3510 		break;
3511 	default:
3512 		goto discard;
3513 	}
3514 	spin_unlock_irq(&cm_id_priv->lock);
3515 	cm_event.param.send_status = wc_status;
3516 
3517 	/* No other events can occur on the cm_id at this point. */
3518 	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event);
3519 	cm_free_msg(msg);
3520 	if (ret)
3521 		ib_destroy_cm_id(&cm_id_priv->id);
3522 	return;
3523 discard:
3524 	spin_unlock_irq(&cm_id_priv->lock);
3525 	cm_free_msg(msg);
3526 }
3527 
3528 static void cm_send_handler(struct ib_mad_agent *mad_agent,
3529 			    struct ib_mad_send_wc *mad_send_wc)
3530 {
3531 	struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
3532 	struct cm_port *port;
3533 	u16 attr_index;
3534 
3535 	port = mad_agent->context;
3536 	attr_index = be16_to_cpu(((struct ib_mad_hdr *)
3537 				  msg->mad)->attr_id) - CM_ATTR_ID_OFFSET;
3538 
3539 	/*
3540 	 * If the send was in response to a received message (context[0] is not
3541 	 * set to a cm_id), and is not a REJ, then it is a send that was
3542 	 * manually retried.
3543 	 */
3544 	if (!msg->context[0] && (attr_index != CM_REJ_COUNTER))
3545 		msg->retries = 1;
3546 
3547 	atomic_long_add(1 + msg->retries,
3548 			&port->counter_group[CM_XMIT].counter[attr_index]);
3549 	if (msg->retries)
3550 		atomic_long_add(msg->retries,
3551 				&port->counter_group[CM_XMIT_RETRIES].
3552 				counter[attr_index]);
3553 
3554 	switch (mad_send_wc->status) {
3555 	case IB_WC_SUCCESS:
3556 	case IB_WC_WR_FLUSH_ERR:
3557 		cm_free_msg(msg);
3558 		break;
3559 	default:
3560 		if (msg->context[0] && msg->context[1])
3561 			cm_process_send_error(msg, mad_send_wc->status);
3562 		else
3563 			cm_free_msg(msg);
3564 		break;
3565 	}
3566 }
3567 
3568 static void cm_work_handler(struct work_struct *_work)
3569 {
3570 	struct cm_work *work = container_of(_work, struct cm_work, work.work);
3571 	int ret;
3572 
3573 	switch (work->cm_event.event) {
3574 	case IB_CM_REQ_RECEIVED:
3575 		ret = cm_req_handler(work);
3576 		break;
3577 	case IB_CM_MRA_RECEIVED:
3578 		ret = cm_mra_handler(work);
3579 		break;
3580 	case IB_CM_REJ_RECEIVED:
3581 		ret = cm_rej_handler(work);
3582 		break;
3583 	case IB_CM_REP_RECEIVED:
3584 		ret = cm_rep_handler(work);
3585 		break;
3586 	case IB_CM_RTU_RECEIVED:
3587 		ret = cm_rtu_handler(work);
3588 		break;
3589 	case IB_CM_USER_ESTABLISHED:
3590 		ret = cm_establish_handler(work);
3591 		break;
3592 	case IB_CM_DREQ_RECEIVED:
3593 		ret = cm_dreq_handler(work);
3594 		break;
3595 	case IB_CM_DREP_RECEIVED:
3596 		ret = cm_drep_handler(work);
3597 		break;
3598 	case IB_CM_SIDR_REQ_RECEIVED:
3599 		ret = cm_sidr_req_handler(work);
3600 		break;
3601 	case IB_CM_SIDR_REP_RECEIVED:
3602 		ret = cm_sidr_rep_handler(work);
3603 		break;
3604 	case IB_CM_LAP_RECEIVED:
3605 		ret = cm_lap_handler(work);
3606 		break;
3607 	case IB_CM_APR_RECEIVED:
3608 		ret = cm_apr_handler(work);
3609 		break;
3610 	case IB_CM_TIMEWAIT_EXIT:
3611 		ret = cm_timewait_handler(work);
3612 		break;
3613 	default:
3614 		ret = -EINVAL;
3615 		break;
3616 	}
3617 	if (ret)
3618 		cm_free_work(work);
3619 }
3620 
3621 static int cm_establish(struct ib_cm_id *cm_id)
3622 {
3623 	struct cm_id_private *cm_id_priv;
3624 	struct cm_work *work;
3625 	unsigned long flags;
3626 	int ret = 0;
3627 	struct cm_device *cm_dev;
3628 
3629 	cm_dev = ib_get_client_data(cm_id->device, &cm_client);
3630 	if (!cm_dev)
3631 		return -ENODEV;
3632 
3633 	work = kmalloc(sizeof *work, GFP_ATOMIC);
3634 	if (!work)
3635 		return -ENOMEM;
3636 
3637 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3638 	spin_lock_irqsave(&cm_id_priv->lock, flags);
3639 	switch (cm_id->state)
3640 	{
3641 	case IB_CM_REP_SENT:
3642 	case IB_CM_MRA_REP_RCVD:
3643 		cm_id->state = IB_CM_ESTABLISHED;
3644 		break;
3645 	case IB_CM_ESTABLISHED:
3646 		ret = -EISCONN;
3647 		break;
3648 	default:
3649 		ret = -EINVAL;
3650 		break;
3651 	}
3652 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3653 
3654 	if (ret) {
3655 		kfree(work);
3656 		goto out;
3657 	}
3658 
3659 	/*
3660 	 * The CM worker thread may try to destroy the cm_id before it
3661 	 * can execute this work item.  To prevent potential deadlock,
3662 	 * we need to find the cm_id once we're in the context of the
3663 	 * worker thread, rather than holding a reference on it.
3664 	 */
3665 	INIT_DELAYED_WORK(&work->work, cm_work_handler);
3666 	work->local_id = cm_id->local_id;
3667 	work->remote_id = cm_id->remote_id;
3668 	work->mad_recv_wc = NULL;
3669 	work->cm_event.event = IB_CM_USER_ESTABLISHED;
3670 
3671 	/* Check if the device started its remove_one */
3672 	spin_lock_irqsave(&cm.lock, flags);
3673 	if (!cm_dev->going_down) {
3674 		queue_delayed_work(cm.wq, &work->work, 0);
3675 	} else {
3676 		kfree(work);
3677 		ret = -ENODEV;
3678 	}
3679 	spin_unlock_irqrestore(&cm.lock, flags);
3680 
3681 out:
3682 	return ret;
3683 }
3684 
3685 static int cm_migrate(struct ib_cm_id *cm_id)
3686 {
3687 	struct cm_id_private *cm_id_priv;
3688 	struct cm_av tmp_av;
3689 	unsigned long flags;
3690 	int tmp_send_port_not_ready;
3691 	int ret = 0;
3692 
3693 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3694 	spin_lock_irqsave(&cm_id_priv->lock, flags);
3695 	if (cm_id->state == IB_CM_ESTABLISHED &&
3696 	    (cm_id->lap_state == IB_CM_LAP_UNINIT ||
3697 	     cm_id->lap_state == IB_CM_LAP_IDLE)) {
3698 		cm_id->lap_state = IB_CM_LAP_IDLE;
3699 		/* Swap address vector */
3700 		tmp_av = cm_id_priv->av;
3701 		cm_id_priv->av = cm_id_priv->alt_av;
3702 		cm_id_priv->alt_av = tmp_av;
3703 		/* Swap port send ready state */
3704 		tmp_send_port_not_ready = cm_id_priv->prim_send_port_not_ready;
3705 		cm_id_priv->prim_send_port_not_ready = cm_id_priv->altr_send_port_not_ready;
3706 		cm_id_priv->altr_send_port_not_ready = tmp_send_port_not_ready;
3707 	} else
3708 		ret = -EINVAL;
3709 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3710 
3711 	return ret;
3712 }
3713 
3714 int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event)
3715 {
3716 	int ret;
3717 
3718 	switch (event) {
3719 	case IB_EVENT_COMM_EST:
3720 		ret = cm_establish(cm_id);
3721 		break;
3722 	case IB_EVENT_PATH_MIG:
3723 		ret = cm_migrate(cm_id);
3724 		break;
3725 	default:
3726 		ret = -EINVAL;
3727 	}
3728 	return ret;
3729 }
3730 EXPORT_SYMBOL(ib_cm_notify);
3731 
3732 static void cm_recv_handler(struct ib_mad_agent *mad_agent,
3733 			    struct ib_mad_send_buf *send_buf,
3734 			    struct ib_mad_recv_wc *mad_recv_wc)
3735 {
3736 	struct cm_port *port = mad_agent->context;
3737 	struct cm_work *work;
3738 	enum ib_cm_event_type event;
3739 	u16 attr_id;
3740 	int paths = 0;
3741 	int going_down = 0;
3742 
3743 	switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) {
3744 	case CM_REQ_ATTR_ID:
3745 		paths = 1 + (((struct cm_req_msg *) mad_recv_wc->recv_buf.mad)->
3746 						    alt_local_lid != 0);
3747 		event = IB_CM_REQ_RECEIVED;
3748 		break;
3749 	case CM_MRA_ATTR_ID:
3750 		event = IB_CM_MRA_RECEIVED;
3751 		break;
3752 	case CM_REJ_ATTR_ID:
3753 		event = IB_CM_REJ_RECEIVED;
3754 		break;
3755 	case CM_REP_ATTR_ID:
3756 		event = IB_CM_REP_RECEIVED;
3757 		break;
3758 	case CM_RTU_ATTR_ID:
3759 		event = IB_CM_RTU_RECEIVED;
3760 		break;
3761 	case CM_DREQ_ATTR_ID:
3762 		event = IB_CM_DREQ_RECEIVED;
3763 		break;
3764 	case CM_DREP_ATTR_ID:
3765 		event = IB_CM_DREP_RECEIVED;
3766 		break;
3767 	case CM_SIDR_REQ_ATTR_ID:
3768 		event = IB_CM_SIDR_REQ_RECEIVED;
3769 		break;
3770 	case CM_SIDR_REP_ATTR_ID:
3771 		event = IB_CM_SIDR_REP_RECEIVED;
3772 		break;
3773 	case CM_LAP_ATTR_ID:
3774 		paths = 1;
3775 		event = IB_CM_LAP_RECEIVED;
3776 		break;
3777 	case CM_APR_ATTR_ID:
3778 		event = IB_CM_APR_RECEIVED;
3779 		break;
3780 	default:
3781 		ib_free_recv_mad(mad_recv_wc);
3782 		return;
3783 	}
3784 
3785 	attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id);
3786 	atomic_long_inc(&port->counter_group[CM_RECV].
3787 			counter[attr_id - CM_ATTR_ID_OFFSET]);
3788 
3789 	work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths,
3790 		       GFP_KERNEL);
3791 	if (!work) {
3792 		ib_free_recv_mad(mad_recv_wc);
3793 		return;
3794 	}
3795 
3796 	INIT_DELAYED_WORK(&work->work, cm_work_handler);
3797 	work->cm_event.event = event;
3798 	work->mad_recv_wc = mad_recv_wc;
3799 	work->port = port;
3800 
3801 	/* Check if the device started its remove_one */
3802 	spin_lock_irq(&cm.lock);
3803 	if (!port->cm_dev->going_down)
3804 		queue_delayed_work(cm.wq, &work->work, 0);
3805 	else
3806 		going_down = 1;
3807 	spin_unlock_irq(&cm.lock);
3808 
3809 	if (going_down) {
3810 		kfree(work);
3811 		ib_free_recv_mad(mad_recv_wc);
3812 	}
3813 }
3814 
3815 static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
3816 				struct ib_qp_attr *qp_attr,
3817 				int *qp_attr_mask)
3818 {
3819 	unsigned long flags;
3820 	int ret;
3821 
3822 	spin_lock_irqsave(&cm_id_priv->lock, flags);
3823 	switch (cm_id_priv->id.state) {
3824 	case IB_CM_REQ_SENT:
3825 	case IB_CM_MRA_REQ_RCVD:
3826 	case IB_CM_REQ_RCVD:
3827 	case IB_CM_MRA_REQ_SENT:
3828 	case IB_CM_REP_RCVD:
3829 	case IB_CM_MRA_REP_SENT:
3830 	case IB_CM_REP_SENT:
3831 	case IB_CM_MRA_REP_RCVD:
3832 	case IB_CM_ESTABLISHED:
3833 		*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
3834 				IB_QP_PKEY_INDEX | IB_QP_PORT;
3835 		qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
3836 		if (cm_id_priv->responder_resources)
3837 			qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
3838 						    IB_ACCESS_REMOTE_ATOMIC;
3839 		qp_attr->pkey_index = cm_id_priv->av.pkey_index;
3840 		qp_attr->port_num = cm_id_priv->av.port->port_num;
3841 		ret = 0;
3842 		break;
3843 	default:
3844 		ret = -EINVAL;
3845 		break;
3846 	}
3847 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3848 	return ret;
3849 }
3850 
3851 static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
3852 			       struct ib_qp_attr *qp_attr,
3853 			       int *qp_attr_mask)
3854 {
3855 	unsigned long flags;
3856 	int ret;
3857 
3858 	spin_lock_irqsave(&cm_id_priv->lock, flags);
3859 	switch (cm_id_priv->id.state) {
3860 	case IB_CM_REQ_RCVD:
3861 	case IB_CM_MRA_REQ_SENT:
3862 	case IB_CM_REP_RCVD:
3863 	case IB_CM_MRA_REP_SENT:
3864 	case IB_CM_REP_SENT:
3865 	case IB_CM_MRA_REP_RCVD:
3866 	case IB_CM_ESTABLISHED:
3867 		*qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
3868 				IB_QP_DEST_QPN | IB_QP_RQ_PSN;
3869 		qp_attr->ah_attr = cm_id_priv->av.ah_attr;
3870 		qp_attr->path_mtu = cm_id_priv->path_mtu;
3871 		qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
3872 		qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
3873 		if (cm_id_priv->qp_type == IB_QPT_RC ||
3874 		    cm_id_priv->qp_type == IB_QPT_XRC_TGT) {
3875 			*qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC |
3876 					 IB_QP_MIN_RNR_TIMER;
3877 			qp_attr->max_dest_rd_atomic =
3878 					cm_id_priv->responder_resources;
3879 			qp_attr->min_rnr_timer = 0;
3880 		}
3881 		if (cm_id_priv->alt_av.ah_attr.dlid) {
3882 			*qp_attr_mask |= IB_QP_ALT_PATH;
3883 			qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
3884 			qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
3885 			qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
3886 			qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
3887 		}
3888 		ret = 0;
3889 		break;
3890 	default:
3891 		ret = -EINVAL;
3892 		break;
3893 	}
3894 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3895 	return ret;
3896 }
3897 
3898 static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
3899 			       struct ib_qp_attr *qp_attr,
3900 			       int *qp_attr_mask)
3901 {
3902 	unsigned long flags;
3903 	int ret;
3904 
3905 	spin_lock_irqsave(&cm_id_priv->lock, flags);
3906 	switch (cm_id_priv->id.state) {
3907 	/* Allow transition to RTS before sending REP */
3908 	case IB_CM_REQ_RCVD:
3909 	case IB_CM_MRA_REQ_SENT:
3910 
3911 	case IB_CM_REP_RCVD:
3912 	case IB_CM_MRA_REP_SENT:
3913 	case IB_CM_REP_SENT:
3914 	case IB_CM_MRA_REP_RCVD:
3915 	case IB_CM_ESTABLISHED:
3916 		if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) {
3917 			*qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
3918 			qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
3919 			switch (cm_id_priv->qp_type) {
3920 			case IB_QPT_RC:
3921 			case IB_QPT_XRC_INI:
3922 				*qp_attr_mask |= IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
3923 						 IB_QP_MAX_QP_RD_ATOMIC;
3924 				qp_attr->retry_cnt = cm_id_priv->retry_count;
3925 				qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
3926 				qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
3927 				/* fall through */
3928 			case IB_QPT_XRC_TGT:
3929 				*qp_attr_mask |= IB_QP_TIMEOUT;
3930 				qp_attr->timeout = cm_id_priv->av.timeout;
3931 				break;
3932 			default:
3933 				break;
3934 			}
3935 			if (cm_id_priv->alt_av.ah_attr.dlid) {
3936 				*qp_attr_mask |= IB_QP_PATH_MIG_STATE;
3937 				qp_attr->path_mig_state = IB_MIG_REARM;
3938 			}
3939 		} else {
3940 			*qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE;
3941 			qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
3942 			qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
3943 			qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
3944 			qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
3945 			qp_attr->path_mig_state = IB_MIG_REARM;
3946 		}
3947 		ret = 0;
3948 		break;
3949 	default:
3950 		ret = -EINVAL;
3951 		break;
3952 	}
3953 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3954 	return ret;
3955 }
3956 
3957 int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
3958 		       struct ib_qp_attr *qp_attr,
3959 		       int *qp_attr_mask)
3960 {
3961 	struct cm_id_private *cm_id_priv;
3962 	int ret;
3963 
3964 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3965 	switch (qp_attr->qp_state) {
3966 	case IB_QPS_INIT:
3967 		ret = cm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask);
3968 		break;
3969 	case IB_QPS_RTR:
3970 		ret = cm_init_qp_rtr_attr(cm_id_priv, qp_attr, qp_attr_mask);
3971 		break;
3972 	case IB_QPS_RTS:
3973 		ret = cm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask);
3974 		break;
3975 	default:
3976 		ret = -EINVAL;
3977 		break;
3978 	}
3979 	return ret;
3980 }
3981 EXPORT_SYMBOL(ib_cm_init_qp_attr);
3982 
3983 static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
3984 			       char *buf)
3985 {
3986 	struct cm_counter_group *group;
3987 	struct cm_counter_attribute *cm_attr;
3988 
3989 	group = container_of(obj, struct cm_counter_group, obj);
3990 	cm_attr = container_of(attr, struct cm_counter_attribute, attr);
3991 
3992 	return sprintf(buf, "%ld\n",
3993 		       atomic_long_read(&group->counter[cm_attr->index]));
3994 }
3995 
3996 static const struct sysfs_ops cm_counter_ops = {
3997 	.show = cm_show_counter
3998 };
3999 
4000 static struct kobj_type cm_counter_obj_type = {
4001 	.sysfs_ops = &cm_counter_ops,
4002 	.default_attrs = cm_counter_default_attrs
4003 };
4004 
4005 static char *cm_devnode(struct device *dev, umode_t *mode)
4006 {
4007 	if (mode)
4008 		*mode = 0666;
4009 	return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
4010 }
4011 
4012 struct class cm_class = {
4013 	.owner   = THIS_MODULE,
4014 	.name    = "infiniband_cm",
4015 	.devnode = cm_devnode,
4016 };
4017 EXPORT_SYMBOL(cm_class);
4018 
4019 static int cm_create_port_fs(struct cm_port *port)
4020 {
4021 	int i, ret;
4022 
4023 	for (i = 0; i < CM_COUNTER_GROUPS; i++) {
4024 		ret = ib_port_register_module_stat(port->cm_dev->ib_device,
4025 						   port->port_num,
4026 						   &port->counter_group[i].obj,
4027 						   &cm_counter_obj_type,
4028 						   counter_group_names[i]);
4029 		if (ret)
4030 			goto error;
4031 	}
4032 
4033 	return 0;
4034 
4035 error:
4036 	while (i--)
4037 		ib_port_unregister_module_stat(&port->counter_group[i].obj);
4038 	return ret;
4039 
4040 }
4041 
4042 static void cm_remove_port_fs(struct cm_port *port)
4043 {
4044 	int i;
4045 
4046 	for (i = 0; i < CM_COUNTER_GROUPS; i++)
4047 		ib_port_unregister_module_stat(&port->counter_group[i].obj);
4048 
4049 }
4050 
4051 static void cm_add_one(struct ib_device *ib_device)
4052 {
4053 	struct cm_device *cm_dev;
4054 	struct cm_port *port;
4055 	struct ib_mad_reg_req reg_req = {
4056 		.mgmt_class = IB_MGMT_CLASS_CM,
4057 		.mgmt_class_version = IB_CM_CLASS_VERSION,
4058 	};
4059 	struct ib_port_modify port_modify = {
4060 		.set_port_cap_mask = IB_PORT_CM_SUP
4061 	};
4062 	unsigned long flags;
4063 	int ret;
4064 	int count = 0;
4065 	u8 i;
4066 
4067 	cm_dev = kzalloc(sizeof(*cm_dev) + sizeof(*port) *
4068 			 ib_device->phys_port_cnt, GFP_KERNEL);
4069 	if (!cm_dev)
4070 		return;
4071 
4072 	cm_dev->ib_device = ib_device;
4073 	cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay;
4074 	cm_dev->going_down = 0;
4075 
4076 	set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
4077 	for (i = 1; i <= ib_device->phys_port_cnt; i++) {
4078 		if (!rdma_cap_ib_cm(ib_device, i))
4079 			continue;
4080 
4081 		port = kzalloc(sizeof *port, GFP_KERNEL);
4082 		if (!port)
4083 			goto error1;
4084 
4085 		cm_dev->port[i-1] = port;
4086 		port->cm_dev = cm_dev;
4087 		port->port_num = i;
4088 
4089 		INIT_LIST_HEAD(&port->cm_priv_prim_list);
4090 		INIT_LIST_HEAD(&port->cm_priv_altr_list);
4091 
4092 		ret = cm_create_port_fs(port);
4093 		if (ret)
4094 			goto error1;
4095 
4096 		port->mad_agent = ib_register_mad_agent(ib_device, i,
4097 							IB_QPT_GSI,
4098 							&reg_req,
4099 							0,
4100 							cm_send_handler,
4101 							cm_recv_handler,
4102 							port,
4103 							0);
4104 		if (IS_ERR(port->mad_agent))
4105 			goto error2;
4106 
4107 		ret = ib_modify_port(ib_device, i, 0, &port_modify);
4108 		if (ret)
4109 			goto error3;
4110 
4111 		count++;
4112 	}
4113 
4114 	if (!count)
4115 		goto free;
4116 
4117 	ib_set_client_data(ib_device, &cm_client, cm_dev);
4118 
4119 	write_lock_irqsave(&cm.device_lock, flags);
4120 	list_add_tail(&cm_dev->list, &cm.device_list);
4121 	write_unlock_irqrestore(&cm.device_lock, flags);
4122 	return;
4123 
4124 error3:
4125 	ib_unregister_mad_agent(port->mad_agent);
4126 error2:
4127 	cm_remove_port_fs(port);
4128 error1:
4129 	port_modify.set_port_cap_mask = 0;
4130 	port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
4131 	kfree(port);
4132 	while (--i) {
4133 		if (!rdma_cap_ib_cm(ib_device, i))
4134 			continue;
4135 
4136 		port = cm_dev->port[i-1];
4137 		ib_modify_port(ib_device, port->port_num, 0, &port_modify);
4138 		ib_unregister_mad_agent(port->mad_agent);
4139 		cm_remove_port_fs(port);
4140 		kfree(port);
4141 	}
4142 free:
4143 	kfree(cm_dev);
4144 }
4145 
4146 static void cm_remove_one(struct ib_device *ib_device, void *client_data)
4147 {
4148 	struct cm_device *cm_dev = client_data;
4149 	struct cm_port *port;
4150 	struct cm_id_private *cm_id_priv;
4151 	struct ib_mad_agent *cur_mad_agent;
4152 	struct ib_port_modify port_modify = {
4153 		.clr_port_cap_mask = IB_PORT_CM_SUP
4154 	};
4155 	unsigned long flags;
4156 	int i;
4157 
4158 	if (!cm_dev)
4159 		return;
4160 
4161 	write_lock_irqsave(&cm.device_lock, flags);
4162 	list_del(&cm_dev->list);
4163 	write_unlock_irqrestore(&cm.device_lock, flags);
4164 
4165 	spin_lock_irq(&cm.lock);
4166 	cm_dev->going_down = 1;
4167 	spin_unlock_irq(&cm.lock);
4168 
4169 	for (i = 1; i <= ib_device->phys_port_cnt; i++) {
4170 		if (!rdma_cap_ib_cm(ib_device, i))
4171 			continue;
4172 
4173 		port = cm_dev->port[i-1];
4174 		ib_modify_port(ib_device, port->port_num, 0, &port_modify);
4175 		/* Mark all the cm_id's as not valid */
4176 		spin_lock_irq(&cm.lock);
4177 		list_for_each_entry(cm_id_priv, &port->cm_priv_altr_list, altr_list)
4178 			cm_id_priv->altr_send_port_not_ready = 1;
4179 		list_for_each_entry(cm_id_priv, &port->cm_priv_prim_list, prim_list)
4180 			cm_id_priv->prim_send_port_not_ready = 1;
4181 		spin_unlock_irq(&cm.lock);
4182 		/*
4183 		 * We flush the queue here after the going_down set, this
4184 		 * verify that no new works will be queued in the recv handler,
4185 		 * after that we can call the unregister_mad_agent
4186 		 */
4187 		flush_workqueue(cm.wq);
4188 		spin_lock_irq(&cm.state_lock);
4189 		cur_mad_agent = port->mad_agent;
4190 		port->mad_agent = NULL;
4191 		spin_unlock_irq(&cm.state_lock);
4192 		ib_unregister_mad_agent(cur_mad_agent);
4193 		cm_remove_port_fs(port);
4194 		kfree(port);
4195 	}
4196 
4197 	kfree(cm_dev);
4198 }
4199 
4200 static int __init ib_cm_init(void)
4201 {
4202 	int ret;
4203 
4204 	memset(&cm, 0, sizeof cm);
4205 	INIT_LIST_HEAD(&cm.device_list);
4206 	rwlock_init(&cm.device_lock);
4207 	spin_lock_init(&cm.lock);
4208 	spin_lock_init(&cm.state_lock);
4209 	cm.listen_service_table = RB_ROOT;
4210 	cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
4211 	cm.remote_id_table = RB_ROOT;
4212 	cm.remote_qp_table = RB_ROOT;
4213 	cm.remote_sidr_table = RB_ROOT;
4214 	idr_init(&cm.local_id_table);
4215 	get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
4216 	INIT_LIST_HEAD(&cm.timewait_list);
4217 
4218 	ret = class_register(&cm_class);
4219 	if (ret) {
4220 		ret = -ENOMEM;
4221 		goto error1;
4222 	}
4223 
4224 	cm.wq = create_workqueue("ib_cm");
4225 	if (!cm.wq) {
4226 		ret = -ENOMEM;
4227 		goto error2;
4228 	}
4229 
4230 	ret = ib_register_client(&cm_client);
4231 	if (ret)
4232 		goto error3;
4233 
4234 	return 0;
4235 error3:
4236 	destroy_workqueue(cm.wq);
4237 error2:
4238 	class_unregister(&cm_class);
4239 error1:
4240 	idr_destroy(&cm.local_id_table);
4241 	return ret;
4242 }
4243 
4244 static void __exit ib_cm_cleanup(void)
4245 {
4246 	struct cm_timewait_info *timewait_info, *tmp;
4247 
4248 	spin_lock_irq(&cm.lock);
4249 	list_for_each_entry(timewait_info, &cm.timewait_list, list)
4250 		cancel_delayed_work(&timewait_info->work.work);
4251 	spin_unlock_irq(&cm.lock);
4252 
4253 	ib_unregister_client(&cm_client);
4254 	destroy_workqueue(cm.wq);
4255 
4256 	list_for_each_entry_safe(timewait_info, tmp, &cm.timewait_list, list) {
4257 		cancel_delayed_work_sync(&timewait_info->work.work);
4258 		list_del(&timewait_info->list);
4259 		kfree(timewait_info);
4260 	}
4261 
4262 	class_unregister(&cm_class);
4263 	idr_destroy(&cm.local_id_table);
4264 }
4265 
4266 module_init_order(ib_cm_init, SI_ORDER_SECOND);
4267 module_exit_order(ib_cm_cleanup, SI_ORDER_SECOND);
4268 
4269