1 /*
2  * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License as
6  * published by the Free Software Foundation; either version 2 of the
7  * License, or any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17  * 02110-1301, USA.
18  *
19  * You can also choose to distribute this program under the terms of
20  * the Unmodified Binary Distribution Licence (as given in the file
21  * COPYING.UBDL), provided that you have satisfied its requirements.
22  */
23 
24 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
25 
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <byteswap.h>
30 #include <errno.h>
31 #include <assert.h>
32 #include <ipxe/infiniband.h>
33 #include <ipxe/ib_mi.h>
34 #include <ipxe/ib_pathrec.h>
35 #include <ipxe/ib_cm.h>
36 
37 /**
38  * @file
39  *
40  * Infiniband communication management
41  *
42  */
43 
44 /** List of connections */
45 static LIST_HEAD ( ib_cm_conns );
46 
47 /**
48  * Find connection by local communication ID
49  *
50  * @v local_id		Local communication ID
51  * @ret conn		Connection, or NULL
52  */
ib_cm_find(uint32_t local_id)53 static struct ib_connection * ib_cm_find ( uint32_t local_id ) {
54 	struct ib_connection *conn;
55 
56 	list_for_each_entry ( conn, &ib_cm_conns, list ) {
57 		if ( conn->local_id == local_id )
58 			return conn;
59 	}
60 	return NULL;
61 }
62 
63 /**
64  * Send "ready to use" response
65  *
66  * @v ibdev		Infiniband device
67  * @v mi		Management interface
68  * @v tid		Transaction identifier
69  * @v av		Address vector
70  * @v local_id		Local communication ID
71  * @v remote_id		Remote communication ID
72  * @ret rc		Return status code
73  */
ib_cm_send_rtu(struct ib_device * ibdev,struct ib_mad_interface * mi,struct ib_mad_tid * tid,struct ib_address_vector * av,uint32_t local_id,uint32_t remote_id)74 static int ib_cm_send_rtu ( struct ib_device *ibdev,
75 			    struct ib_mad_interface *mi,
76 			    struct ib_mad_tid *tid,
77 			    struct ib_address_vector *av,
78 			    uint32_t local_id, uint32_t remote_id ) {
79 	union ib_mad mad;
80 	struct ib_cm_ready_to_use *rtu = &mad.cm.cm_data.ready_to_use;
81 	int rc;
82 
83 	/* Construct "ready to use" response */
84 	memset ( &mad, 0, sizeof ( mad ) );
85 	mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
86 	mad.hdr.class_version = IB_CM_CLASS_VERSION;
87 	mad.hdr.method = IB_MGMT_METHOD_SEND;
88 	memcpy ( &mad.hdr.tid, tid, sizeof ( mad.hdr.tid ) );
89 	mad.hdr.attr_id = htons ( IB_CM_ATTR_READY_TO_USE );
90 	rtu->local_id = htonl ( local_id );
91 	rtu->remote_id = htonl ( remote_id );
92 	if ( ( rc = ib_mi_send ( ibdev, mi, &mad, av ) ) != 0 ) {
93 		DBGC ( local_id, "CM %08x could not send RTU: %s\n",
94 		       local_id, strerror ( rc ) );
95 		return rc;
96 	}
97 
98 	return 0;
99 }
100 
101 /**
102  * Handle duplicate connection replies
103  *
104  * @v ibdev		Infiniband device
105  * @v mi		Management interface
106  * @v mad		Received MAD
107  * @v av		Source address vector
108  * @ret rc		Return status code
109  *
110  * If a "ready to use" MAD is lost, the peer may resend the connection
111  * reply.  We have to respond to these with duplicate "ready to use"
112  * MADs, otherwise the peer may time out and drop the connection.
113  */
ib_cm_recv_rep(struct ib_device * ibdev,struct ib_mad_interface * mi,union ib_mad * mad,struct ib_address_vector * av)114 static void ib_cm_recv_rep ( struct ib_device *ibdev,
115 			     struct ib_mad_interface *mi,
116 			     union ib_mad *mad,
117 			     struct ib_address_vector *av ) {
118 	struct ib_cm_connect_reply *rep = &mad->cm.cm_data.connect_reply;
119 	struct ib_connection *conn;
120 	uint32_t local_id = ntohl ( rep->remote_id );
121 	int rc;
122 
123 	/* Identify connection */
124 	conn = ib_cm_find ( local_id );
125 	if ( conn ) {
126 		/* Try to send "ready to use" reply */
127 		if ( ( rc = ib_cm_send_rtu ( ibdev, mi, &mad->hdr.tid, av,
128 					     conn->local_id,
129 					     conn->remote_id ) ) != 0 ) {
130 			/* Ignore errors; the remote end will retry */
131 		}
132 	} else {
133 		DBGC ( local_id, "CM %08x unexpected REP\n", local_id );
134 	}
135 }
136 
137 /**
138  * Send reply to disconnection request
139  *
140  * @v ibdev		Infiniband device
141  * @v mi		Management interface
142  * @v tid		Transaction identifier
143  * @v av		Address vector
144  * @v local_id		Local communication ID
145  * @v remote_id		Remote communication ID
146  * @ret rc		Return status code
147  */
ib_cm_send_drep(struct ib_device * ibdev,struct ib_mad_interface * mi,struct ib_mad_tid * tid,struct ib_address_vector * av,uint32_t local_id,uint32_t remote_id)148 static int ib_cm_send_drep ( struct ib_device *ibdev,
149 			     struct ib_mad_interface *mi,
150 			     struct ib_mad_tid *tid,
151 			     struct ib_address_vector *av,
152 			     uint32_t local_id, uint32_t remote_id ) {
153 	union ib_mad mad;
154 	struct ib_cm_disconnect_reply *drep = &mad.cm.cm_data.disconnect_reply;
155 	int rc;
156 
157 	/* Construct reply to disconnection request */
158 	memset ( &mad, 0, sizeof ( mad ) );
159 	mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
160 	mad.hdr.class_version = IB_CM_CLASS_VERSION;
161 	mad.hdr.method = IB_MGMT_METHOD_SEND;
162 	memcpy ( &mad.hdr.tid, tid, sizeof ( mad.hdr.tid ) );
163 	mad.hdr.attr_id = htons ( IB_CM_ATTR_DISCONNECT_REPLY );
164 	drep->local_id = htonl ( local_id );
165 	drep->remote_id = htonl ( remote_id );
166 	if ( ( rc = ib_mi_send ( ibdev, mi, &mad, av ) ) != 0 ) {
167 		DBGC ( local_id, "CM %08x could not send DREP: %s\n",
168 		       local_id, strerror ( rc ) );
169 		return rc;
170 	}
171 
172 	return 0;
173 }
174 
175 /**
176  * Handle disconnection requests
177  *
178  * @v ibdev		Infiniband device
179  * @v mi		Management interface
180  * @v mad		Received MAD
181  * @v av		Source address vector
182  * @ret rc		Return status code
183  */
ib_cm_recv_dreq(struct ib_device * ibdev,struct ib_mad_interface * mi,union ib_mad * mad,struct ib_address_vector * av)184 static void ib_cm_recv_dreq ( struct ib_device *ibdev,
185 			      struct ib_mad_interface *mi,
186 			      union ib_mad *mad,
187 			      struct ib_address_vector *av ) {
188 	struct ib_cm_disconnect_request *dreq =
189 		&mad->cm.cm_data.disconnect_request;
190 	struct ib_connection *conn;
191 	uint32_t local_id = ntohl ( dreq->remote_id );
192 	uint32_t remote_id = ntohl ( dreq->local_id );
193 	int rc;
194 
195 	/* Identify connection */
196 	conn = ib_cm_find ( local_id );
197 	if ( conn ) {
198 		/* Notify upper layer */
199 		conn->op->changed ( ibdev, conn->qp, conn, -ENOTCONN,
200 				    &dreq->private_data,
201 				    sizeof ( dreq->private_data ) );
202 	} else {
203 		DBGC ( local_id, "CM %08x unexpected DREQ\n", local_id );
204 	}
205 
206 	/* Send reply */
207 	if ( ( rc = ib_cm_send_drep ( ibdev, mi, &mad->hdr.tid, av, local_id,
208 				      remote_id ) ) != 0 ) {
209 		/* Ignore errors; the remote end will retry */
210 	}
211 };
212 
213 /** Communication management agents */
214 struct ib_mad_agent ib_cm_agent[] __ib_mad_agent = {
215 	{
216 		.mgmt_class = IB_MGMT_CLASS_CM,
217 		.class_version = IB_CM_CLASS_VERSION,
218 		.attr_id = htons ( IB_CM_ATTR_CONNECT_REPLY ),
219 		.handle = ib_cm_recv_rep,
220 	},
221 	{
222 		.mgmt_class = IB_MGMT_CLASS_CM,
223 		.class_version = IB_CM_CLASS_VERSION,
224 		.attr_id = htons ( IB_CM_ATTR_DISCONNECT_REQUEST ),
225 		.handle = ib_cm_recv_dreq,
226 	},
227 };
228 
229 /**
230  * Convert connection rejection reason to return status code
231  *
232  * @v reason		Rejection reason (in network byte order)
233  * @ret rc		Return status code
234  */
ib_cm_rejection_reason_to_rc(uint16_t reason)235 static int ib_cm_rejection_reason_to_rc ( uint16_t reason ) {
236 	switch ( reason ) {
237 	case htons ( IB_CM_REJECT_BAD_SERVICE_ID ) :
238 		return -ENODEV;
239 	case htons ( IB_CM_REJECT_STALE_CONN ) :
240 		return -EALREADY;
241 	case htons ( IB_CM_REJECT_CONSUMER ) :
242 		return -ENOTTY;
243 	default:
244 		return -EPERM;
245 	}
246 }
247 
248 /**
249  * Handle connection request transaction completion
250  *
251  * @v ibdev		Infiniband device
252  * @v mi		Management interface
253  * @v madx		Management transaction
254  * @v rc		Status code
255  * @v mad		Received MAD (or NULL on error)
256  * @v av		Source address vector (or NULL on error)
257  */
ib_cm_req_complete(struct ib_device * ibdev,struct ib_mad_interface * mi,struct ib_mad_transaction * madx,int rc,union ib_mad * mad,struct ib_address_vector * av)258 static void ib_cm_req_complete ( struct ib_device *ibdev,
259 				 struct ib_mad_interface *mi,
260 				 struct ib_mad_transaction *madx,
261 				 int rc, union ib_mad *mad,
262 				 struct ib_address_vector *av ) {
263 	struct ib_connection *conn = ib_madx_get_ownerdata ( madx );
264 	struct ib_queue_pair *qp = conn->qp;
265 	struct ib_cm_common *common = &mad->cm.cm_data.common;
266 	struct ib_cm_connect_reply *rep = &mad->cm.cm_data.connect_reply;
267 	struct ib_cm_connect_reject *rej = &mad->cm.cm_data.connect_reject;
268 	uint32_t local_id = conn->local_id;
269 	void *private_data = NULL;
270 	size_t private_data_len = 0;
271 
272 	/* Report failures */
273 	if ( ( rc == 0 ) && ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) ))
274 		rc = -EIO;
275 	if ( rc != 0 ) {
276 		DBGC ( local_id, "CM %08x connection request failed: %s\n",
277 		       local_id, strerror ( rc ) );
278 		goto out;
279 	}
280 
281 	/* Record remote communication ID */
282 	conn->remote_id = ntohl ( common->local_id );
283 
284 	/* Handle response */
285 	switch ( mad->hdr.attr_id ) {
286 
287 	case htons ( IB_CM_ATTR_CONNECT_REPLY ) :
288 		/* Extract fields */
289 		qp->av.qpn = ( ntohl ( rep->local_qpn ) >> 8 );
290 		qp->send.psn = ( ntohl ( rep->starting_psn ) >> 8 );
291 		private_data = &rep->private_data;
292 		private_data_len = sizeof ( rep->private_data );
293 		DBGC ( local_id, "CM %08x connected to QPN %#lx PSN %#x\n",
294 		       local_id, qp->av.qpn, qp->send.psn );
295 
296 		/* Modify queue pair */
297 		if ( ( rc = ib_modify_qp ( ibdev, qp ) ) != 0 ) {
298 			DBGC ( local_id, "CM %08x could not modify queue "
299 			       "pair: %s\n", local_id, strerror ( rc ) );
300 			goto out;
301 		}
302 
303 		/* Send "ready to use" reply */
304 		if ( ( rc = ib_cm_send_rtu ( ibdev, mi, &mad->hdr.tid, av,
305 					     conn->local_id,
306 					     conn->remote_id ) ) != 0 ) {
307 			/* Treat as non-fatal */
308 			rc = 0;
309 		}
310 		break;
311 
312 	case htons ( IB_CM_ATTR_CONNECT_REJECT ) :
313 		/* Extract fields */
314 		DBGC ( local_id, "CM %08x connection rejected (reason %d)\n",
315 		       local_id, ntohs ( rej->reason ) );
316 		/* Private data is valid only for a Consumer Reject */
317 		if ( rej->reason == htons ( IB_CM_REJECT_CONSUMER ) ) {
318 			private_data = &rej->private_data;
319 			private_data_len = sizeof ( rej->private_data );
320 		}
321 		rc = ib_cm_rejection_reason_to_rc ( rej->reason );
322 		break;
323 
324 	default:
325 		DBGC ( local_id, "CM %08x unexpected response (attribute "
326 		       "%04x)\n", local_id, ntohs ( mad->hdr.attr_id ) );
327 		rc = -ENOTSUP;
328 		break;
329 	}
330 
331  out:
332 	/* Destroy the completed transaction */
333 	ib_destroy_madx ( ibdev, ibdev->gsi, madx );
334 	conn->madx = NULL;
335 
336 	/* Hand off to the upper completion handler */
337 	conn->op->changed ( ibdev, qp, conn, rc, private_data,
338 			    private_data_len );
339 }
340 
341 /** Connection request operations */
342 static struct ib_mad_transaction_operations ib_cm_req_op = {
343 	.complete = ib_cm_req_complete,
344 };
345 
346 /**
347  * Handle connection path transaction completion
348  *
349  * @v ibdev		Infiniband device
350  * @v path		Path
351  * @v rc		Status code
352  * @v av		Address vector, or NULL on error
353  */
ib_cm_path_complete(struct ib_device * ibdev,struct ib_path * path,int rc,struct ib_address_vector * av)354 static void ib_cm_path_complete ( struct ib_device *ibdev,
355 				  struct ib_path *path, int rc,
356 				  struct ib_address_vector *av ) {
357 	struct ib_connection *conn = ib_path_get_ownerdata ( path );
358 	struct ib_queue_pair *qp = conn->qp;
359 	union ib_mad mad;
360 	struct ib_cm_connect_request *req = &mad.cm.cm_data.connect_request;
361 	uint32_t local_id = conn->local_id;
362 	size_t private_data_len;
363 
364 	/* Report failures */
365 	if ( rc != 0 ) {
366 		DBGC ( local_id, "CM %08x path lookup failed: %s\n",
367 		       local_id, strerror ( rc ) );
368 		conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 );
369 		goto out;
370 	}
371 
372 	/* Update queue pair peer path */
373 	memcpy ( &qp->av, av, sizeof ( qp->av ) );
374 
375 	/* Construct connection request */
376 	memset ( &mad, 0, sizeof ( mad ) );
377 	mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
378 	mad.hdr.class_version = IB_CM_CLASS_VERSION;
379 	mad.hdr.method = IB_MGMT_METHOD_SEND;
380 	mad.hdr.attr_id = htons ( IB_CM_ATTR_CONNECT_REQUEST );
381 	req->local_id = htonl ( conn->local_id );
382 	memcpy ( &req->service_id, &conn->service_id,
383 		 sizeof ( req->service_id ) );
384 	memcpy ( &req->local_ca, &ibdev->node_guid, sizeof ( req->local_ca ) );
385 	req->local_qpn__responder_resources = htonl ( ( qp->qpn << 8 ) | 1 );
386 	req->local_eecn__initiator_depth = htonl ( ( 0 << 8 ) | 1 );
387 	req->remote_eecn__remote_timeout__service_type__ee_flow_ctrl =
388 		htonl ( ( 0x14 << 3 ) | ( IB_CM_TRANSPORT_RC << 1 ) |
389 			( 0 << 0 ) );
390 	req->starting_psn__local_timeout__retry_count =
391 		htonl ( ( qp->recv.psn << 8 ) | ( 0x14 << 3 ) |
392 			( 0x07 << 0 ) );
393 	req->pkey = htons ( ibdev->pkey );
394 	req->payload_mtu__rdc_exists__rnr_retry =
395 		( ( IB_MTU_2048 << 4 ) | ( 1 << 3 ) | ( 0x07 << 0 ) );
396 	req->max_cm_retries__srq = ( ( 0x0f << 4 ) | ( 0 << 3 ) );
397 	req->primary.local_lid = htons ( ibdev->lid );
398 	req->primary.remote_lid = htons ( conn->qp->av.lid );
399 	memcpy ( &req->primary.local_gid, &ibdev->gid,
400 		 sizeof ( req->primary.local_gid ) );
401 	memcpy ( &req->primary.remote_gid, &conn->qp->av.gid,
402 		 sizeof ( req->primary.remote_gid ) );
403 	req->primary.flow_label__rate =
404 		htonl ( ( 0 << 12 ) | ( conn->qp->av.rate << 0 ) );
405 	req->primary.hop_limit = 0;
406 	req->primary.sl__subnet_local =
407 		( ( conn->qp->av.sl << 4 ) | ( 1 << 3 ) );
408 	req->primary.local_ack_timeout = ( 0x13 << 3 );
409 	private_data_len = conn->private_data_len;
410 	if ( private_data_len > sizeof ( req->private_data ) )
411 		private_data_len = sizeof ( req->private_data );
412 	memcpy ( &req->private_data, &conn->private_data, private_data_len );
413 
414 	/* Create connection request */
415 	av->qpn = IB_QPN_GSI;
416 	av->qkey = IB_QKEY_GSI;
417 	conn->madx = ib_create_madx ( ibdev, ibdev->gsi, &mad, av,
418 				      &ib_cm_req_op );
419 	if ( ! conn->madx ) {
420 		DBGC ( local_id, "CM %08x could not create connection "
421 		       "request\n", local_id );
422 		conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 );
423 		goto out;
424 	}
425 	ib_madx_set_ownerdata ( conn->madx, conn );
426 
427  out:
428 	/* Destroy the completed transaction */
429 	ib_destroy_path ( ibdev, path );
430 	conn->path = NULL;
431 }
432 
433 /** Connection path operations */
434 static struct ib_path_operations ib_cm_path_op = {
435 	.complete = ib_cm_path_complete,
436 };
437 
438 /**
439  * Create connection to remote QP
440  *
441  * @v ibdev		Infiniband device
442  * @v qp		Queue pair
443  * @v dgid		Target GID
444  * @v service_id	Target service ID
445  * @v private_data	Connection request private data
446  * @v private_data_len	Length of connection request private data
447  * @v op		Connection operations
448  * @ret conn		Connection
449  */
450 struct ib_connection *
ib_create_conn(struct ib_device * ibdev,struct ib_queue_pair * qp,union ib_gid * dgid,union ib_guid * service_id,void * private_data,size_t private_data_len,struct ib_connection_operations * op)451 ib_create_conn ( struct ib_device *ibdev, struct ib_queue_pair *qp,
452 		 union ib_gid *dgid, union ib_guid *service_id,
453 		 void *private_data, size_t private_data_len,
454 		 struct ib_connection_operations *op ) {
455 	struct ib_connection *conn;
456 	uint32_t local_id;
457 
458 	/* Allocate and initialise request */
459 	conn = zalloc ( sizeof ( *conn ) + private_data_len );
460 	if ( ! conn )
461 		goto err_alloc_conn;
462 	conn->ibdev = ibdev;
463 	conn->qp = qp;
464 	memset ( &qp->av, 0, sizeof ( qp->av ) );
465 	qp->av.gid_present = 1;
466 	memcpy ( &qp->av.gid, dgid, sizeof ( qp->av.gid ) );
467 	conn->local_id = local_id = random();
468 	memcpy ( &conn->service_id, service_id, sizeof ( conn->service_id ) );
469 	conn->op = op;
470 	conn->private_data_len = private_data_len;
471 	memcpy ( &conn->private_data, private_data, private_data_len );
472 
473 	/* Create path */
474 	conn->path = ib_create_path ( ibdev, &qp->av, &ib_cm_path_op );
475 	if ( ! conn->path )
476 		goto err_create_path;
477 	ib_path_set_ownerdata ( conn->path, conn );
478 
479 	/* Add to list of connections */
480 	list_add ( &conn->list, &ib_cm_conns );
481 
482 	DBGC ( local_id, "CM %08x created for IBDEV %s QPN %#lx\n",
483 	       local_id, ibdev->name, qp->qpn );
484 	DBGC ( local_id, "CM %08x connecting to " IB_GID_FMT " "
485 	       IB_GUID_FMT "\n", local_id, IB_GID_ARGS ( dgid ),
486 	       IB_GUID_ARGS ( service_id ) );
487 
488 	return conn;
489 
490 	ib_destroy_path ( ibdev, conn->path );
491  err_create_path:
492 	free ( conn );
493  err_alloc_conn:
494 	return NULL;
495 }
496 
497 /**
498  * Destroy connection to remote QP
499  *
500  * @v ibdev		Infiniband device
501  * @v qp		Queue pair
502  * @v conn		Connection
503  */
ib_destroy_conn(struct ib_device * ibdev,struct ib_queue_pair * qp __unused,struct ib_connection * conn)504 void ib_destroy_conn ( struct ib_device *ibdev,
505 		       struct ib_queue_pair *qp __unused,
506 		       struct ib_connection *conn ) {
507 
508 	list_del ( &conn->list );
509 	if ( conn->madx )
510 		ib_destroy_madx ( ibdev, ibdev->gsi, conn->madx );
511 	if ( conn->path )
512 		ib_destroy_path ( ibdev, conn->path );
513 	free ( conn );
514 }
515