1 /*
2 * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19 FILE_LICENCE ( GPL2_OR_LATER );
20
21 #include <stdint.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <byteswap.h>
25 #include <errno.h>
26 #include <assert.h>
27 #include <gpxe/infiniband.h>
28 #include <gpxe/ib_mi.h>
29 #include <gpxe/ib_pathrec.h>
30 #include <gpxe/ib_cm.h>
31
32 /**
33 * @file
34 *
35 * Infiniband communication management
36 *
37 */
38
39 /** List of connections */
40 static LIST_HEAD ( ib_cm_conns );
41
42 /**
43 * Send "ready to use" response
44 *
45 * @v ibdev Infiniband device
46 * @v mi Management interface
47 * @v conn Connection
48 * @v av Address vector
49 * @ret rc Return status code
50 */
ib_cm_send_rtu(struct ib_device * ibdev,struct ib_mad_interface * mi,struct ib_connection * conn,struct ib_address_vector * av)51 static int ib_cm_send_rtu ( struct ib_device *ibdev,
52 struct ib_mad_interface *mi,
53 struct ib_connection *conn,
54 struct ib_address_vector *av ) {
55 union ib_mad mad;
56 struct ib_cm_ready_to_use *ready =
57 &mad.cm.cm_data.ready_to_use;
58 int rc;
59
60 /* Construct "ready to use" response */
61 memset ( &mad, 0, sizeof ( mad ) );
62 mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
63 mad.hdr.class_version = IB_CM_CLASS_VERSION;
64 mad.hdr.method = IB_MGMT_METHOD_SEND;
65 mad.hdr.attr_id = htons ( IB_CM_ATTR_READY_TO_USE );
66 ready->local_id = htonl ( conn->local_id );
67 ready->remote_id = htonl ( conn->remote_id );
68 if ( ( rc = ib_mi_send ( ibdev, mi, &mad, av ) ) != 0 ){
69 DBGC ( conn, "CM %p could not send RTU: %s\n",
70 conn, strerror ( rc ) );
71 return rc;
72 }
73
74 return 0;
75 }
76
77 /**
78 * Handle duplicate connection replies
79 *
80 * @v ibdev Infiniband device
81 * @v mi Management interface
82 * @v mad Received MAD
83 * @v av Source address vector
84 * @ret rc Return status code
85 *
86 * If a "ready to use" MAD is lost, the peer may resend the connection
87 * reply. We have to respond to these with duplicate "ready to use"
88 * MADs, otherwise the peer may time out and drop the connection.
89 */
ib_cm_connect_rep(struct ib_device * ibdev,struct ib_mad_interface * mi,union ib_mad * mad,struct ib_address_vector * av)90 static void ib_cm_connect_rep ( struct ib_device *ibdev,
91 struct ib_mad_interface *mi,
92 union ib_mad *mad,
93 struct ib_address_vector *av ) {
94 struct ib_cm_connect_reply *connect_rep =
95 &mad->cm.cm_data.connect_reply;
96 struct ib_connection *conn;
97 int rc;
98
99 /* Identify connection */
100 list_for_each_entry ( conn, &ib_cm_conns, list ) {
101 if ( ntohl ( connect_rep->remote_id ) != conn->local_id )
102 continue;
103 /* Try to send "ready to use" reply */
104 if ( ( rc = ib_cm_send_rtu ( ibdev, mi, conn, av ) ) != 0 ) {
105 /* Ignore errors */
106 return;
107 }
108 return;
109 }
110
111 DBG ( "CM unidentified connection %08x\n",
112 ntohl ( connect_rep->remote_id ) );
113 }
114
115 /** Communication management agents */
116 struct ib_mad_agent ib_cm_agent[] __ib_mad_agent = {
117 {
118 .mgmt_class = IB_MGMT_CLASS_CM,
119 .class_version = IB_CM_CLASS_VERSION,
120 .attr_id = htons ( IB_CM_ATTR_CONNECT_REPLY ),
121 .handle = ib_cm_connect_rep,
122 },
123 };
124
125 /**
126 * Convert connection rejection reason to return status code
127 *
128 * @v reason Rejection reason (in network byte order)
129 * @ret rc Return status code
130 */
ib_cm_rejection_reason_to_rc(uint16_t reason)131 static int ib_cm_rejection_reason_to_rc ( uint16_t reason ) {
132 switch ( reason ) {
133 case htons ( IB_CM_REJECT_BAD_SERVICE_ID ) :
134 return -ENODEV;
135 case htons ( IB_CM_REJECT_STALE_CONN ) :
136 return -EALREADY;
137 case htons ( IB_CM_REJECT_CONSUMER ) :
138 return -ENOTTY;
139 default:
140 return -EPERM;
141 }
142 }
143
144 /**
145 * Handle connection request transaction completion
146 *
147 * @v ibdev Infiniband device
148 * @v mi Management interface
149 * @v madx Management transaction
150 * @v rc Status code
151 * @v mad Received MAD (or NULL on error)
152 * @v av Source address vector (or NULL on error)
153 */
ib_cm_req_complete(struct ib_device * ibdev,struct ib_mad_interface * mi,struct ib_mad_transaction * madx,int rc,union ib_mad * mad,struct ib_address_vector * av)154 static void ib_cm_req_complete ( struct ib_device *ibdev,
155 struct ib_mad_interface *mi,
156 struct ib_mad_transaction *madx,
157 int rc, union ib_mad *mad,
158 struct ib_address_vector *av ) {
159 struct ib_connection *conn = ib_madx_get_ownerdata ( madx );
160 struct ib_queue_pair *qp = conn->qp;
161 struct ib_cm_common *common = &mad->cm.cm_data.common;
162 struct ib_cm_connect_reply *connect_rep =
163 &mad->cm.cm_data.connect_reply;
164 struct ib_cm_connect_reject *connect_rej =
165 &mad->cm.cm_data.connect_reject;
166 void *private_data = NULL;
167 size_t private_data_len = 0;
168
169 /* Report failures */
170 if ( ( rc == 0 ) && ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) ))
171 rc = -EIO;
172 if ( rc != 0 ) {
173 DBGC ( conn, "CM %p connection request failed: %s\n",
174 conn, strerror ( rc ) );
175 goto out;
176 }
177
178 /* Record remote communication ID */
179 conn->remote_id = ntohl ( common->local_id );
180
181 /* Handle response */
182 switch ( mad->hdr.attr_id ) {
183
184 case htons ( IB_CM_ATTR_CONNECT_REPLY ) :
185 /* Extract fields */
186 qp->av.qpn = ( ntohl ( connect_rep->local_qpn ) >> 8 );
187 qp->send.psn = ( ntohl ( connect_rep->starting_psn ) >> 8 );
188 private_data = &connect_rep->private_data;
189 private_data_len = sizeof ( connect_rep->private_data );
190 DBGC ( conn, "CM %p connected to QPN %lx PSN %x\n",
191 conn, qp->av.qpn, qp->send.psn );
192
193 /* Modify queue pair */
194 if ( ( rc = ib_modify_qp ( ibdev, qp ) ) != 0 ) {
195 DBGC ( conn, "CM %p could not modify queue pair: %s\n",
196 conn, strerror ( rc ) );
197 goto out;
198 }
199
200 /* Send "ready to use" reply */
201 if ( ( rc = ib_cm_send_rtu ( ibdev, mi, conn, av ) ) != 0 ) {
202 /* Treat as non-fatal */
203 rc = 0;
204 }
205 break;
206
207 case htons ( IB_CM_ATTR_CONNECT_REJECT ) :
208 /* Extract fields */
209 DBGC ( conn, "CM %p connection rejected (reason %d)\n",
210 conn, ntohs ( connect_rej->reason ) );
211 /* Private data is valid only for a Consumer Reject */
212 if ( connect_rej->reason == htons ( IB_CM_REJECT_CONSUMER ) ) {
213 private_data = &connect_rej->private_data;
214 private_data_len = sizeof (connect_rej->private_data);
215 }
216 rc = ib_cm_rejection_reason_to_rc ( connect_rej->reason );
217 break;
218
219 default:
220 DBGC ( conn, "CM %p unexpected response (attribute %04x)\n",
221 conn, ntohs ( mad->hdr.attr_id ) );
222 rc = -ENOTSUP;
223 break;
224 }
225
226 out:
227 /* Destroy the completed transaction */
228 ib_destroy_madx ( ibdev, ibdev->gsi, madx );
229 conn->madx = NULL;
230
231 /* Hand off to the upper completion handler */
232 conn->op->changed ( ibdev, qp, conn, rc, private_data,
233 private_data_len );
234 }
235
236 /** Connection request operations */
237 static struct ib_mad_transaction_operations ib_cm_req_op = {
238 .complete = ib_cm_req_complete,
239 };
240
241 /**
242 * Handle connection path transaction completion
243 *
244 * @v ibdev Infiniband device
245 * @v path Path
246 * @v rc Status code
247 * @v av Address vector, or NULL on error
248 */
ib_cm_path_complete(struct ib_device * ibdev,struct ib_path * path,int rc,struct ib_address_vector * av)249 static void ib_cm_path_complete ( struct ib_device *ibdev,
250 struct ib_path *path, int rc,
251 struct ib_address_vector *av ) {
252 struct ib_connection *conn = ib_path_get_ownerdata ( path );
253 struct ib_queue_pair *qp = conn->qp;
254 union ib_mad mad;
255 struct ib_cm_connect_request *connect_req =
256 &mad.cm.cm_data.connect_request;
257 size_t private_data_len;
258
259 /* Report failures */
260 if ( rc != 0 ) {
261 DBGC ( conn, "CM %p path lookup failed: %s\n",
262 conn, strerror ( rc ) );
263 conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 );
264 goto out;
265 }
266
267 /* Update queue pair peer path */
268 memcpy ( &qp->av, av, sizeof ( qp->av ) );
269
270 /* Construct connection request */
271 memset ( &mad, 0, sizeof ( mad ) );
272 mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
273 mad.hdr.class_version = IB_CM_CLASS_VERSION;
274 mad.hdr.method = IB_MGMT_METHOD_SEND;
275 mad.hdr.attr_id = htons ( IB_CM_ATTR_CONNECT_REQUEST );
276 connect_req->local_id = htonl ( conn->local_id );
277 memcpy ( &connect_req->service_id, &conn->service_id,
278 sizeof ( connect_req->service_id ) );
279 ib_get_hca_info ( ibdev, &connect_req->local_ca );
280 connect_req->local_qpn__responder_resources =
281 htonl ( ( qp->qpn << 8 ) | 1 );
282 connect_req->local_eecn__initiator_depth = htonl ( ( 0 << 8 ) | 1 );
283 connect_req->remote_eecn__remote_timeout__service_type__ee_flow_ctrl =
284 htonl ( ( 0x14 << 3 ) | ( IB_CM_TRANSPORT_RC << 1 ) |
285 ( 0 << 0 ) );
286 connect_req->starting_psn__local_timeout__retry_count =
287 htonl ( ( qp->recv.psn << 8 ) | ( 0x14 << 3 ) |
288 ( 0x07 << 0 ) );
289 connect_req->pkey = htons ( ibdev->pkey );
290 connect_req->payload_mtu__rdc_exists__rnr_retry =
291 ( ( IB_MTU_2048 << 4 ) | ( 1 << 3 ) | ( 0x07 << 0 ) );
292 connect_req->max_cm_retries__srq =
293 ( ( 0x0f << 4 ) | ( 0 << 3 ) );
294 connect_req->primary.local_lid = htons ( ibdev->lid );
295 connect_req->primary.remote_lid = htons ( conn->qp->av.lid );
296 memcpy ( &connect_req->primary.local_gid, &ibdev->gid,
297 sizeof ( connect_req->primary.local_gid ) );
298 memcpy ( &connect_req->primary.remote_gid, &conn->qp->av.gid,
299 sizeof ( connect_req->primary.remote_gid ) );
300 connect_req->primary.flow_label__rate =
301 htonl ( ( 0 << 12 ) | ( conn->qp->av.rate << 0 ) );
302 connect_req->primary.hop_limit = 0;
303 connect_req->primary.sl__subnet_local =
304 ( ( conn->qp->av.sl << 4 ) | ( 1 << 3 ) );
305 connect_req->primary.local_ack_timeout = ( 0x13 << 3 );
306 private_data_len = conn->private_data_len;
307 if ( private_data_len > sizeof ( connect_req->private_data ) )
308 private_data_len = sizeof ( connect_req->private_data );
309 memcpy ( &connect_req->private_data, &conn->private_data,
310 private_data_len );
311
312 /* Create connection request */
313 av->qpn = IB_QPN_GSI;
314 av->qkey = IB_QKEY_GSI;
315 conn->madx = ib_create_madx ( ibdev, ibdev->gsi, &mad, av,
316 &ib_cm_req_op );
317 if ( ! conn->madx ) {
318 DBGC ( conn, "CM %p could not create connection request\n",
319 conn );
320 conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 );
321 goto out;
322 }
323 ib_madx_set_ownerdata ( conn->madx, conn );
324
325 out:
326 /* Destroy the completed transaction */
327 ib_destroy_path ( ibdev, path );
328 conn->path = NULL;
329 }
330
331 /** Connection path operations */
332 static struct ib_path_operations ib_cm_path_op = {
333 .complete = ib_cm_path_complete,
334 };
335
336 /**
337 * Create connection to remote QP
338 *
339 * @v ibdev Infiniband device
340 * @v qp Queue pair
341 * @v dgid Target GID
342 * @v service_id Target service ID
343 * @v private_data Connection request private data
344 * @v private_data_len Length of connection request private data
345 * @v op Connection operations
346 * @ret conn Connection
347 */
348 struct ib_connection *
ib_create_conn(struct ib_device * ibdev,struct ib_queue_pair * qp,struct ib_gid * dgid,struct ib_gid_half * service_id,void * private_data,size_t private_data_len,struct ib_connection_operations * op)349 ib_create_conn ( struct ib_device *ibdev, struct ib_queue_pair *qp,
350 struct ib_gid *dgid, struct ib_gid_half *service_id,
351 void *private_data, size_t private_data_len,
352 struct ib_connection_operations *op ) {
353 struct ib_connection *conn;
354
355 /* Allocate and initialise request */
356 conn = zalloc ( sizeof ( *conn ) + private_data_len );
357 if ( ! conn )
358 goto err_alloc_conn;
359 conn->ibdev = ibdev;
360 conn->qp = qp;
361 memset ( &qp->av, 0, sizeof ( qp->av ) );
362 qp->av.gid_present = 1;
363 memcpy ( &qp->av.gid, dgid, sizeof ( qp->av.gid ) );
364 conn->local_id = random();
365 memcpy ( &conn->service_id, service_id, sizeof ( conn->service_id ) );
366 conn->op = op;
367 conn->private_data_len = private_data_len;
368 memcpy ( &conn->private_data, private_data, private_data_len );
369
370 /* Create path */
371 conn->path = ib_create_path ( ibdev, &qp->av, &ib_cm_path_op );
372 if ( ! conn->path )
373 goto err_create_path;
374 ib_path_set_ownerdata ( conn->path, conn );
375
376 /* Add to list of connections */
377 list_add ( &conn->list, &ib_cm_conns );
378
379 DBGC ( conn, "CM %p created for IBDEV %p QPN %lx\n",
380 conn, ibdev, qp->qpn );
381 DBGC ( conn, "CM %p connecting to %08x:%08x:%08x:%08x %08x:%08x\n",
382 conn, ntohl ( dgid->u.dwords[0] ), ntohl ( dgid->u.dwords[1] ),
383 ntohl ( dgid->u.dwords[2] ), ntohl ( dgid->u.dwords[3] ),
384 ntohl ( service_id->u.dwords[0] ),
385 ntohl ( service_id->u.dwords[1] ) );
386
387 return conn;
388
389 ib_destroy_path ( ibdev, conn->path );
390 err_create_path:
391 free ( conn );
392 err_alloc_conn:
393 return NULL;
394 }
395
396 /**
397 * Destroy connection to remote QP
398 *
399 * @v ibdev Infiniband device
400 * @v qp Queue pair
401 * @v conn Connection
402 */
ib_destroy_conn(struct ib_device * ibdev,struct ib_queue_pair * qp __unused,struct ib_connection * conn)403 void ib_destroy_conn ( struct ib_device *ibdev,
404 struct ib_queue_pair *qp __unused,
405 struct ib_connection *conn ) {
406
407 list_del ( &conn->list );
408 if ( conn->madx )
409 ib_destroy_madx ( ibdev, ibdev->gsi, conn->madx );
410 if ( conn->path )
411 ib_destroy_path ( ibdev, conn->path );
412 free ( conn );
413 }
414