1 /*
2 * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA.
18 *
19 * You can also choose to distribute this program under the terms of
20 * the Unmodified Binary Distribution Licence (as given in the file
21 * COPYING.UBDL), provided that you have satisfied its requirements.
22 */
23
24 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
25
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <byteswap.h>
32 #include <errno.h>
33 #include <assert.h>
34 #include <ipxe/list.h>
35 #include <ipxe/errortab.h>
36 #include <ipxe/if_arp.h>
37 #include <ipxe/netdevice.h>
38 #include <ipxe/iobuf.h>
39 #include <ipxe/process.h>
40 #include <ipxe/profile.h>
41 #include <ipxe/infiniband.h>
42 #include <ipxe/ib_mi.h>
43 #include <ipxe/ib_sma.h>
44
45 /** @file
46 *
47 * Infiniband protocol
48 *
49 */
50
51 /** List of Infiniband devices */
52 struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices );
53
54 /** List of open Infiniband devices, in reverse order of opening */
55 static struct list_head open_ib_devices = LIST_HEAD_INIT ( open_ib_devices );
56
57 /** Infiniband device index */
58 static unsigned int ibdev_index = 0;
59
60 /** Post send work queue entry profiler */
61 static struct profiler ib_post_send_profiler __profiler =
62 { .name = "ib.post_send" };
63
64 /** Post receive work queue entry profiler */
65 static struct profiler ib_post_recv_profiler __profiler =
66 { .name = "ib.post_recv" };
67
68 /* Disambiguate the various possible EINPROGRESSes */
69 #define EINPROGRESS_INIT __einfo_error ( EINFO_EINPROGRESS_INIT )
70 #define EINFO_EINPROGRESS_INIT __einfo_uniqify \
71 ( EINFO_EINPROGRESS, 0x01, "Initialising" )
72 #define EINPROGRESS_ARMED __einfo_error ( EINFO_EINPROGRESS_ARMED )
73 #define EINFO_EINPROGRESS_ARMED __einfo_uniqify \
74 ( EINFO_EINPROGRESS, 0x02, "Armed" )
75
76 /** Human-readable message for the link statuses */
77 struct errortab infiniband_errors[] __errortab = {
78 __einfo_errortab ( EINFO_EINPROGRESS_INIT ),
79 __einfo_errortab ( EINFO_EINPROGRESS_ARMED ),
80 };
81
82 /***************************************************************************
83 *
84 * Completion queues
85 *
86 ***************************************************************************
87 */
88
89 /**
90 * Create completion queue
91 *
92 * @v ibdev Infiniband device
93 * @v num_cqes Number of completion queue entries
94 * @v op Completion queue operations
95 * @v new_cq New completion queue to fill in
96 * @ret rc Return status code
97 */
ib_create_cq(struct ib_device * ibdev,unsigned int num_cqes,struct ib_completion_queue_operations * op,struct ib_completion_queue ** new_cq)98 int ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes,
99 struct ib_completion_queue_operations *op,
100 struct ib_completion_queue **new_cq ) {
101 struct ib_completion_queue *cq;
102 int rc;
103
104 DBGC ( ibdev, "IBDEV %s creating completion queue\n", ibdev->name );
105
106 /* Allocate and initialise data structure */
107 cq = zalloc ( sizeof ( *cq ) );
108 if ( ! cq ) {
109 rc = -ENOMEM;
110 goto err_alloc_cq;
111 }
112 cq->ibdev = ibdev;
113 list_add_tail ( &cq->list, &ibdev->cqs );
114 cq->num_cqes = num_cqes;
115 INIT_LIST_HEAD ( &cq->work_queues );
116 cq->op = op;
117
118 /* Perform device-specific initialisation and get CQN */
119 if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) {
120 DBGC ( ibdev, "IBDEV %s could not initialise completion "
121 "queue: %s\n", ibdev->name, strerror ( rc ) );
122 goto err_dev_create_cq;
123 }
124
125 DBGC ( ibdev, "IBDEV %s created %d-entry completion queue %p (%p) "
126 "with CQN %#lx\n", ibdev->name, num_cqes, cq,
127 ib_cq_get_drvdata ( cq ), cq->cqn );
128 *new_cq = cq;
129 return 0;
130
131 ibdev->op->destroy_cq ( ibdev, cq );
132 err_dev_create_cq:
133 list_del ( &cq->list );
134 free ( cq );
135 err_alloc_cq:
136 return rc;
137 }
138
139 /**
140 * Destroy completion queue
141 *
142 * @v ibdev Infiniband device
143 * @v cq Completion queue
144 */
ib_destroy_cq(struct ib_device * ibdev,struct ib_completion_queue * cq)145 void ib_destroy_cq ( struct ib_device *ibdev,
146 struct ib_completion_queue *cq ) {
147 DBGC ( ibdev, "IBDEV %s destroying completion queue %#lx\n",
148 ibdev->name, cq->cqn );
149 assert ( list_empty ( &cq->work_queues ) );
150 ibdev->op->destroy_cq ( ibdev, cq );
151 list_del ( &cq->list );
152 free ( cq );
153 }
154
155 /**
156 * Poll completion queue
157 *
158 * @v ibdev Infiniband device
159 * @v cq Completion queue
160 */
ib_poll_cq(struct ib_device * ibdev,struct ib_completion_queue * cq)161 void ib_poll_cq ( struct ib_device *ibdev,
162 struct ib_completion_queue *cq ) {
163 struct ib_work_queue *wq;
164
165 /* Poll completion queue */
166 ibdev->op->poll_cq ( ibdev, cq );
167
168 /* Refill receive work queues */
169 list_for_each_entry ( wq, &cq->work_queues, list ) {
170 if ( ! wq->is_send )
171 ib_refill_recv ( ibdev, wq->qp );
172 }
173 }
174
175 /***************************************************************************
176 *
177 * Work queues
178 *
179 ***************************************************************************
180 */
181
182 /**
183 * Create queue pair
184 *
185 * @v ibdev Infiniband device
186 * @v type Queue pair type
187 * @v num_send_wqes Number of send work queue entries
188 * @v send_cq Send completion queue
189 * @v num_recv_wqes Number of receive work queue entries
190 * @v recv_cq Receive completion queue
191 * @v op Queue pair operations
192 * @v name Queue pair name
193 * @v new_qp New queue pair to fill in
194 * @ret rc Return status code
195 *
196 * The queue pair will be left in the INIT state; you must call
197 * ib_modify_qp() before it is ready to use for sending and receiving.
198 */
ib_create_qp(struct ib_device * ibdev,enum ib_queue_pair_type type,unsigned int num_send_wqes,struct ib_completion_queue * send_cq,unsigned int num_recv_wqes,struct ib_completion_queue * recv_cq,struct ib_queue_pair_operations * op,const char * name,struct ib_queue_pair ** new_qp)199 int ib_create_qp ( struct ib_device *ibdev, enum ib_queue_pair_type type,
200 unsigned int num_send_wqes,
201 struct ib_completion_queue *send_cq,
202 unsigned int num_recv_wqes,
203 struct ib_completion_queue *recv_cq,
204 struct ib_queue_pair_operations *op, const char *name,
205 struct ib_queue_pair **new_qp ) {
206 struct ib_queue_pair *qp;
207 size_t total_size;
208 int rc;
209
210 DBGC ( ibdev, "IBDEV %s creating queue pair\n", ibdev->name );
211
212 /* Allocate and initialise data structure */
213 total_size = ( sizeof ( *qp ) +
214 ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
215 ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
216 qp = zalloc ( total_size );
217 if ( ! qp ) {
218 rc = -ENOMEM;
219 goto err_alloc_qp;
220 }
221 qp->ibdev = ibdev;
222 list_add_tail ( &qp->list, &ibdev->qps );
223 qp->type = type;
224 qp->send.qp = qp;
225 qp->send.is_send = 1;
226 qp->send.cq = send_cq;
227 list_add_tail ( &qp->send.list, &send_cq->work_queues );
228 qp->send.psn = ( random() & 0xffffffUL );
229 qp->send.num_wqes = num_send_wqes;
230 qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) );
231 qp->recv.qp = qp;
232 qp->recv.cq = recv_cq;
233 list_add_tail ( &qp->recv.list, &recv_cq->work_queues );
234 qp->recv.psn = ( random() & 0xffffffUL );
235 qp->recv.num_wqes = num_recv_wqes;
236 qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) +
237 ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ));
238 INIT_LIST_HEAD ( &qp->mgids );
239 qp->op = op;
240 qp->name = name;
241
242 /* Perform device-specific initialisation and get QPN */
243 if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) {
244 DBGC ( ibdev, "IBDEV %s could not initialise queue pair: "
245 "%s\n", ibdev->name, strerror ( rc ) );
246 goto err_dev_create_qp;
247 }
248 DBGC ( ibdev, "IBDEV %s created queue pair %p (%p) with QPN %#lx\n",
249 ibdev->name, qp, ib_qp_get_drvdata ( qp ), qp->qpn );
250 DBGC ( ibdev, "IBDEV %s QPN %#lx has %d send entries at [%p,%p)\n",
251 ibdev->name, qp->qpn, num_send_wqes, qp->send.iobufs,
252 qp->recv.iobufs );
253 DBGC ( ibdev, "IBDEV %s QPN %#lx has %d receive entries at [%p,%p)\n",
254 ibdev->name, qp->qpn, num_recv_wqes, qp->recv.iobufs,
255 ( ( ( void * ) qp ) + total_size ) );
256
257 /* Calculate externally-visible QPN */
258 switch ( type ) {
259 case IB_QPT_SMI:
260 qp->ext_qpn = IB_QPN_SMI;
261 break;
262 case IB_QPT_GSI:
263 qp->ext_qpn = IB_QPN_GSI;
264 break;
265 default:
266 qp->ext_qpn = qp->qpn;
267 break;
268 }
269 if ( qp->ext_qpn != qp->qpn ) {
270 DBGC ( ibdev, "IBDEV %s QPN %#lx has external QPN %#lx\n",
271 ibdev->name, qp->qpn, qp->ext_qpn );
272 }
273
274 *new_qp = qp;
275 return 0;
276
277 ibdev->op->destroy_qp ( ibdev, qp );
278 err_dev_create_qp:
279 list_del ( &qp->send.list );
280 list_del ( &qp->recv.list );
281 list_del ( &qp->list );
282 free ( qp );
283 err_alloc_qp:
284 return rc;
285 }
286
287 /**
288 * Modify queue pair
289 *
290 * @v ibdev Infiniband device
291 * @v qp Queue pair
292 * @ret rc Return status code
293 */
ib_modify_qp(struct ib_device * ibdev,struct ib_queue_pair * qp)294 int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
295 int rc;
296
297 DBGC ( ibdev, "IBDEV %s modifying QPN %#lx\n", ibdev->name, qp->qpn );
298
299 if ( ( rc = ibdev->op->modify_qp ( ibdev, qp ) ) != 0 ) {
300 DBGC ( ibdev, "IBDEV %s could not modify QPN %#lx: %s\n",
301 ibdev->name, qp->qpn, strerror ( rc ) );
302 return rc;
303 }
304
305 return 0;
306 }
307
308 /**
309 * Destroy queue pair
310 *
311 * @v ibdev Infiniband device
312 * @v qp Queue pair
313 */
ib_destroy_qp(struct ib_device * ibdev,struct ib_queue_pair * qp)314 void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
315 struct io_buffer *iobuf;
316 unsigned int i;
317
318 DBGC ( ibdev, "IBDEV %s destroying QPN %#lx\n",
319 ibdev->name, qp->qpn );
320
321 assert ( list_empty ( &qp->mgids ) );
322
323 /* Perform device-specific destruction */
324 ibdev->op->destroy_qp ( ibdev, qp );
325
326 /* Complete any remaining I/O buffers with errors */
327 for ( i = 0 ; i < qp->send.num_wqes ; i++ ) {
328 if ( ( iobuf = qp->send.iobufs[i] ) != NULL )
329 ib_complete_send ( ibdev, qp, iobuf, -ECANCELED );
330 }
331 for ( i = 0 ; i < qp->recv.num_wqes ; i++ ) {
332 if ( ( iobuf = qp->recv.iobufs[i] ) != NULL ) {
333 ib_complete_recv ( ibdev, qp, NULL, NULL, iobuf,
334 -ECANCELED );
335 }
336 }
337
338 /* Remove work queues from completion queue */
339 list_del ( &qp->send.list );
340 list_del ( &qp->recv.list );
341
342 /* Free QP */
343 list_del ( &qp->list );
344 free ( qp );
345 }
346
347 /**
348 * Find queue pair by QPN
349 *
350 * @v ibdev Infiniband device
351 * @v qpn Queue pair number
352 * @ret qp Queue pair, or NULL
353 */
ib_find_qp_qpn(struct ib_device * ibdev,unsigned long qpn)354 struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev,
355 unsigned long qpn ) {
356 struct ib_queue_pair *qp;
357
358 list_for_each_entry ( qp, &ibdev->qps, list ) {
359 if ( ( qpn == qp->qpn ) || ( qpn == qp->ext_qpn ) )
360 return qp;
361 }
362 return NULL;
363 }
364
365 /**
366 * Find queue pair by multicast GID
367 *
368 * @v ibdev Infiniband device
369 * @v gid Multicast GID
370 * @ret qp Queue pair, or NULL
371 */
ib_find_qp_mgid(struct ib_device * ibdev,union ib_gid * gid)372 struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev,
373 union ib_gid *gid ) {
374 struct ib_queue_pair *qp;
375 struct ib_multicast_gid *mgid;
376
377 list_for_each_entry ( qp, &ibdev->qps, list ) {
378 list_for_each_entry ( mgid, &qp->mgids, list ) {
379 if ( memcmp ( &mgid->gid, gid,
380 sizeof ( mgid->gid ) ) == 0 ) {
381 return qp;
382 }
383 }
384 }
385 return NULL;
386 }
387
388 /**
389 * Find work queue belonging to completion queue
390 *
391 * @v cq Completion queue
392 * @v qpn Queue pair number
393 * @v is_send Find send work queue (rather than receive)
394 * @ret wq Work queue, or NULL if not found
395 */
ib_find_wq(struct ib_completion_queue * cq,unsigned long qpn,int is_send)396 struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
397 unsigned long qpn, int is_send ) {
398 struct ib_work_queue *wq;
399
400 list_for_each_entry ( wq, &cq->work_queues, list ) {
401 if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) )
402 return wq;
403 }
404 return NULL;
405 }
406
407 /**
408 * Post send work queue entry
409 *
410 * @v ibdev Infiniband device
411 * @v qp Queue pair
412 * @v dest Destination address vector
413 * @v iobuf I/O buffer
414 * @ret rc Return status code
415 */
ib_post_send(struct ib_device * ibdev,struct ib_queue_pair * qp,struct ib_address_vector * dest,struct io_buffer * iobuf)416 int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
417 struct ib_address_vector *dest,
418 struct io_buffer *iobuf ) {
419 struct ib_address_vector dest_copy;
420 int rc;
421
422 /* Start profiling */
423 profile_start ( &ib_post_send_profiler );
424
425 /* Check queue fill level */
426 if ( qp->send.fill >= qp->send.num_wqes ) {
427 DBGC ( ibdev, "IBDEV %s QPN %#lx send queue full\n",
428 ibdev->name, qp->qpn );
429 return -ENOBUFS;
430 }
431
432 /* Use default address vector if none specified */
433 if ( ! dest )
434 dest = &qp->av;
435
436 /* Make modifiable copy of address vector */
437 memcpy ( &dest_copy, dest, sizeof ( dest_copy ) );
438 dest = &dest_copy;
439
440 /* Fill in optional parameters in address vector */
441 if ( ! dest->qkey )
442 dest->qkey = qp->qkey;
443 if ( ! dest->rate )
444 dest->rate = IB_RATE_2_5;
445
446 /* Post to hardware */
447 if ( ( rc = ibdev->op->post_send ( ibdev, qp, dest, iobuf ) ) != 0 ) {
448 DBGC ( ibdev, "IBDEV %s QPN %#lx could not post send WQE: "
449 "%s\n", ibdev->name, qp->qpn, strerror ( rc ) );
450 return rc;
451 }
452
453 /* Increase fill level */
454 qp->send.fill++;
455
456 /* Stop profiling */
457 profile_stop ( &ib_post_send_profiler );
458
459 return 0;
460 }
461
462 /**
463 * Post receive work queue entry
464 *
465 * @v ibdev Infiniband device
466 * @v qp Queue pair
467 * @v iobuf I/O buffer
468 * @ret rc Return status code
469 */
ib_post_recv(struct ib_device * ibdev,struct ib_queue_pair * qp,struct io_buffer * iobuf)470 int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
471 struct io_buffer *iobuf ) {
472 int rc;
473
474 /* Start profiling */
475 profile_start ( &ib_post_recv_profiler );
476
477 /* Check packet length */
478 if ( iob_tailroom ( iobuf ) < IB_MAX_PAYLOAD_SIZE ) {
479 DBGC ( ibdev, "IBDEV %s QPN %#lx wrong RX buffer size (%zd)\n",
480 ibdev->name, qp->qpn, iob_tailroom ( iobuf ) );
481 return -EINVAL;
482 }
483
484 /* Check queue fill level */
485 if ( qp->recv.fill >= qp->recv.num_wqes ) {
486 DBGC ( ibdev, "IBDEV %s QPN %#lx receive queue full\n",
487 ibdev->name, qp->qpn );
488 return -ENOBUFS;
489 }
490
491 /* Post to hardware */
492 if ( ( rc = ibdev->op->post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
493 DBGC ( ibdev, "IBDEV %s QPN %#lx could not post receive WQE: "
494 "%s\n", ibdev->name, qp->qpn, strerror ( rc ) );
495 return rc;
496 }
497
498 /* Increase fill level */
499 qp->recv.fill++;
500
501 /* Stop profiling */
502 profile_stop ( &ib_post_recv_profiler );
503
504 return 0;
505 }
506
507 /**
508 * Complete send work queue entry
509 *
510 * @v ibdev Infiniband device
511 * @v qp Queue pair
512 * @v iobuf I/O buffer
513 * @v rc Completion status code
514 */
ib_complete_send(struct ib_device * ibdev,struct ib_queue_pair * qp,struct io_buffer * iobuf,int rc)515 void ib_complete_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
516 struct io_buffer *iobuf, int rc ) {
517
518 if ( qp->send.cq->op->complete_send ) {
519 qp->send.cq->op->complete_send ( ibdev, qp, iobuf, rc );
520 } else {
521 free_iob ( iobuf );
522 }
523 qp->send.fill--;
524 }
525
526 /**
527 * Complete receive work queue entry
528 *
529 * @v ibdev Infiniband device
530 * @v qp Queue pair
531 * @v dest Destination address vector, or NULL
532 * @v source Source address vector, or NULL
533 * @v iobuf I/O buffer
534 * @v rc Completion status code
535 */
ib_complete_recv(struct ib_device * ibdev,struct ib_queue_pair * qp,struct ib_address_vector * dest,struct ib_address_vector * source,struct io_buffer * iobuf,int rc)536 void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
537 struct ib_address_vector *dest,
538 struct ib_address_vector *source,
539 struct io_buffer *iobuf, int rc ) {
540
541 if ( qp->recv.cq->op->complete_recv ) {
542 qp->recv.cq->op->complete_recv ( ibdev, qp, dest, source,
543 iobuf, rc );
544 } else {
545 free_iob ( iobuf );
546 }
547 qp->recv.fill--;
548 }
549
550 /**
551 * Refill receive work queue
552 *
553 * @v ibdev Infiniband device
554 * @v qp Queue pair
555 */
ib_refill_recv(struct ib_device * ibdev,struct ib_queue_pair * qp)556 void ib_refill_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
557 struct io_buffer *iobuf;
558 int rc;
559
560 /* Keep filling while unfilled entries remain */
561 while ( qp->recv.fill < qp->recv.num_wqes ) {
562
563 /* Allocate I/O buffer */
564 iobuf = qp->op->alloc_iob ( IB_MAX_PAYLOAD_SIZE );
565 if ( ! iobuf ) {
566 /* Non-fatal; we will refill on next attempt */
567 return;
568 }
569
570 /* Post I/O buffer */
571 if ( ( rc = ib_post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
572 DBGC ( ibdev, "IBDEV %s could not refill: %s\n",
573 ibdev->name, strerror ( rc ) );
574 free_iob ( iobuf );
575 /* Give up */
576 return;
577 }
578 }
579 }
580
581 /***************************************************************************
582 *
583 * Link control
584 *
585 ***************************************************************************
586 */
587
588 /**
589 * Get link state
590 *
591 * @v ibdev Infiniband device
592 * @ret rc Link status code
593 */
ib_link_rc(struct ib_device * ibdev)594 int ib_link_rc ( struct ib_device *ibdev ) {
595 switch ( ibdev->port_state ) {
596 case IB_PORT_STATE_DOWN: return -ENOTCONN;
597 case IB_PORT_STATE_INIT: return -EINPROGRESS_INIT;
598 case IB_PORT_STATE_ARMED: return -EINPROGRESS_ARMED;
599 case IB_PORT_STATE_ACTIVE: return 0;
600 default: return -EINVAL;
601 }
602 }
603
604 /**
605 * Textual representation of Infiniband link state
606 *
607 * @v ibdev Infiniband device
608 * @ret link_text Link state text
609 */
ib_link_state_text(struct ib_device * ibdev)610 static const char * ib_link_state_text ( struct ib_device *ibdev ) {
611 switch ( ibdev->port_state ) {
612 case IB_PORT_STATE_DOWN: return "DOWN";
613 case IB_PORT_STATE_INIT: return "INIT";
614 case IB_PORT_STATE_ARMED: return "ARMED";
615 case IB_PORT_STATE_ACTIVE: return "ACTIVE";
616 default: return "UNKNOWN";
617 }
618 }
619
620 /**
621 * Notify drivers of Infiniband device or link state change
622 *
623 * @v ibdev Infiniband device
624 */
ib_notify(struct ib_device * ibdev)625 static void ib_notify ( struct ib_device *ibdev ) {
626 struct ib_driver *driver;
627
628 for_each_table_entry ( driver, IB_DRIVERS )
629 driver->notify ( ibdev );
630 }
631
632 /**
633 * Notify of Infiniband link state change
634 *
635 * @v ibdev Infiniband device
636 */
ib_link_state_changed(struct ib_device * ibdev)637 void ib_link_state_changed ( struct ib_device *ibdev ) {
638
639 DBGC ( ibdev, "IBDEV %s link state is %s\n",
640 ibdev->name, ib_link_state_text ( ibdev ) );
641
642 /* Notify drivers of link state change */
643 ib_notify ( ibdev );
644 }
645
646 /**
647 * Open port
648 *
649 * @v ibdev Infiniband device
650 * @ret rc Return status code
651 */
ib_open(struct ib_device * ibdev)652 int ib_open ( struct ib_device *ibdev ) {
653 int rc;
654
655 /* Increment device open request counter */
656 if ( ibdev->open_count++ > 0 ) {
657 /* Device was already open; do nothing */
658 return 0;
659 }
660
661 /* Open device */
662 if ( ( rc = ibdev->op->open ( ibdev ) ) != 0 ) {
663 DBGC ( ibdev, "IBDEV %s could not open: %s\n",
664 ibdev->name, strerror ( rc ) );
665 goto err_open;
666 }
667
668 /* Create subnet management interface */
669 if ( ( rc = ib_create_mi ( ibdev, IB_QPT_SMI, &ibdev->smi ) ) != 0 ) {
670 DBGC ( ibdev, "IBDEV %s could not create SMI: %s\n",
671 ibdev->name, strerror ( rc ) );
672 goto err_create_smi;
673 }
674
675 /* Create subnet management agent */
676 if ( ( rc = ib_create_sma ( ibdev, ibdev->smi ) ) != 0 ) {
677 DBGC ( ibdev, "IBDEV %s could not create SMA: %s\n",
678 ibdev->name, strerror ( rc ) );
679 goto err_create_sma;
680 }
681
682 /* Create general services interface */
683 if ( ( rc = ib_create_mi ( ibdev, IB_QPT_GSI, &ibdev->gsi ) ) != 0 ) {
684 DBGC ( ibdev, "IBDEV %s could not create GSI: %s\n",
685 ibdev->name, strerror ( rc ) );
686 goto err_create_gsi;
687 }
688
689 /* Add to head of open devices list */
690 list_add ( &ibdev->open_list, &open_ib_devices );
691
692 /* Notify drivers of device state change */
693 ib_notify ( ibdev );
694
695 assert ( ibdev->open_count == 1 );
696 return 0;
697
698 ib_destroy_mi ( ibdev, ibdev->gsi );
699 err_create_gsi:
700 ib_destroy_sma ( ibdev, ibdev->smi );
701 err_create_sma:
702 ib_destroy_mi ( ibdev, ibdev->smi );
703 err_create_smi:
704 ibdev->op->close ( ibdev );
705 err_open:
706 assert ( ibdev->open_count == 1 );
707 ibdev->open_count = 0;
708 return rc;
709 }
710
711 /**
712 * Close port
713 *
714 * @v ibdev Infiniband device
715 */
ib_close(struct ib_device * ibdev)716 void ib_close ( struct ib_device *ibdev ) {
717
718 /* Decrement device open request counter */
719 ibdev->open_count--;
720
721 /* Close device if this was the last remaining requested opening */
722 if ( ibdev->open_count == 0 ) {
723 ib_notify ( ibdev );
724 list_del ( &ibdev->open_list );
725 ib_destroy_mi ( ibdev, ibdev->gsi );
726 ib_destroy_sma ( ibdev, ibdev->smi );
727 ib_destroy_mi ( ibdev, ibdev->smi );
728 ibdev->op->close ( ibdev );
729 ibdev->port_state = IB_PORT_STATE_DOWN;
730 }
731 }
732
733 /***************************************************************************
734 *
735 * Multicast
736 *
737 ***************************************************************************
738 */
739
740 /**
741 * Attach to multicast group
742 *
743 * @v ibdev Infiniband device
744 * @v qp Queue pair
745 * @v gid Multicast GID
746 * @ret rc Return status code
747 *
748 * Note that this function handles only the local device's attachment
749 * to the multicast GID; it does not issue the relevant MADs to join
750 * the multicast group on the subnet.
751 */
ib_mcast_attach(struct ib_device * ibdev,struct ib_queue_pair * qp,union ib_gid * gid)752 int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
753 union ib_gid *gid ) {
754 struct ib_multicast_gid *mgid;
755 int rc;
756
757 /* Sanity check */
758 assert ( qp != NULL );
759
760 /* Add to software multicast GID list */
761 mgid = zalloc ( sizeof ( *mgid ) );
762 if ( ! mgid ) {
763 rc = -ENOMEM;
764 goto err_alloc_mgid;
765 }
766 memcpy ( &mgid->gid, gid, sizeof ( mgid->gid ) );
767 list_add_tail ( &mgid->list, &qp->mgids );
768
769 /* Add to hardware multicast GID list */
770 if ( ( rc = ibdev->op->mcast_attach ( ibdev, qp, gid ) ) != 0 )
771 goto err_dev_mcast_attach;
772
773 return 0;
774
775 err_dev_mcast_attach:
776 list_del ( &mgid->list );
777 free ( mgid );
778 err_alloc_mgid:
779 return rc;
780 }
781
782 /**
783 * Detach from multicast group
784 *
785 * @v ibdev Infiniband device
786 * @v qp Queue pair
787 * @v gid Multicast GID
788 */
ib_mcast_detach(struct ib_device * ibdev,struct ib_queue_pair * qp,union ib_gid * gid)789 void ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
790 union ib_gid *gid ) {
791 struct ib_multicast_gid *mgid;
792
793 /* Sanity check */
794 assert ( qp != NULL );
795
796 /* Remove from hardware multicast GID list */
797 ibdev->op->mcast_detach ( ibdev, qp, gid );
798
799 /* Remove from software multicast GID list */
800 list_for_each_entry ( mgid, &qp->mgids, list ) {
801 if ( memcmp ( &mgid->gid, gid, sizeof ( mgid->gid ) ) == 0 ) {
802 list_del ( &mgid->list );
803 free ( mgid );
804 break;
805 }
806 }
807 }
808
809 /***************************************************************************
810 *
811 * Miscellaneous
812 *
813 ***************************************************************************
814 */
815
816 /**
817 * Count Infiniband HCA ports
818 *
819 * @v ibdev Infiniband device
820 * @ret num_ports Number of ports
821 */
ib_count_ports(struct ib_device * ibdev)822 int ib_count_ports ( struct ib_device *ibdev ) {
823 struct ib_device *tmp;
824 int num_ports = 0;
825
826 /* Search for IB devices with the same physical device to
827 * identify port count.
828 */
829 for_each_ibdev ( tmp ) {
830 if ( tmp->dev == ibdev->dev )
831 num_ports++;
832 }
833 return num_ports;
834 }
835
836 /**
837 * Set port information
838 *
839 * @v ibdev Infiniband device
840 * @v mad Set port information MAD
841 */
ib_set_port_info(struct ib_device * ibdev,union ib_mad * mad)842 int ib_set_port_info ( struct ib_device *ibdev, union ib_mad *mad ) {
843 int rc;
844
845 /* Adapters with embedded SMAs do not need to support this method */
846 if ( ! ibdev->op->set_port_info ) {
847 DBGC ( ibdev, "IBDEV %s does not support setting port "
848 "information\n", ibdev->name );
849 return -ENOTSUP;
850 }
851
852 if ( ( rc = ibdev->op->set_port_info ( ibdev, mad ) ) != 0 ) {
853 DBGC ( ibdev, "IBDEV %s could not set port information: %s\n",
854 ibdev->name, strerror ( rc ) );
855 return rc;
856 }
857
858 return 0;
859 };
860
861 /**
862 * Set partition key table
863 *
864 * @v ibdev Infiniband device
865 * @v mad Set partition key table MAD
866 */
ib_set_pkey_table(struct ib_device * ibdev,union ib_mad * mad)867 int ib_set_pkey_table ( struct ib_device *ibdev, union ib_mad *mad ) {
868 int rc;
869
870 /* Adapters with embedded SMAs do not need to support this method */
871 if ( ! ibdev->op->set_pkey_table ) {
872 DBGC ( ibdev, "IBDEV %s does not support setting partition "
873 "key table\n", ibdev->name );
874 return -ENOTSUP;
875 }
876
877 if ( ( rc = ibdev->op->set_pkey_table ( ibdev, mad ) ) != 0 ) {
878 DBGC ( ibdev, "IBDEV %s could not set partition key table: "
879 "%s\n", ibdev->name, strerror ( rc ) );
880 return rc;
881 }
882
883 return 0;
884 };
885
886 /***************************************************************************
887 *
888 * Event queues
889 *
890 ***************************************************************************
891 */
892
893 /**
894 * Poll event queue
895 *
896 * @v ibdev Infiniband device
897 */
ib_poll_eq(struct ib_device * ibdev)898 void ib_poll_eq ( struct ib_device *ibdev ) {
899 struct ib_completion_queue *cq;
900
901 /* Poll device's event queue */
902 ibdev->op->poll_eq ( ibdev );
903
904 /* Poll all completion queues */
905 list_for_each_entry ( cq, &ibdev->cqs, list )
906 ib_poll_cq ( ibdev, cq );
907 }
908
909 /**
910 * Single-step the Infiniband event queue
911 *
912 * @v process Infiniband event queue process
913 */
ib_step(struct process * process __unused)914 static void ib_step ( struct process *process __unused ) {
915 struct ib_device *ibdev;
916
917 list_for_each_entry ( ibdev, &open_ib_devices, open_list )
918 ib_poll_eq ( ibdev );
919 }
920
921 /** Infiniband event queue process */
922 PERMANENT_PROCESS ( ib_process, ib_step );
923
924 /***************************************************************************
925 *
926 * Infiniband device creation/destruction
927 *
928 ***************************************************************************
929 */
930
931 /**
932 * Allocate Infiniband device
933 *
934 * @v priv_size Size of driver private data area
935 * @ret ibdev Infiniband device, or NULL
936 */
alloc_ibdev(size_t priv_size)937 struct ib_device * alloc_ibdev ( size_t priv_size ) {
938 struct ib_device *ibdev;
939 void *drv_priv;
940 size_t total_len;
941
942 total_len = ( sizeof ( *ibdev ) + priv_size );
943 ibdev = zalloc ( total_len );
944 if ( ibdev ) {
945 drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) );
946 ib_set_drvdata ( ibdev, drv_priv );
947 INIT_LIST_HEAD ( &ibdev->list );
948 INIT_LIST_HEAD ( &ibdev->open_list );
949 INIT_LIST_HEAD ( &ibdev->cqs );
950 INIT_LIST_HEAD ( &ibdev->qps );
951 ibdev->port_state = IB_PORT_STATE_DOWN;
952 ibdev->lid = IB_LID_NONE;
953 ibdev->pkey = IB_PKEY_DEFAULT;
954 }
955 return ibdev;
956 }
957
958 /**
959 * Register Infiniband device
960 *
961 * @v ibdev Infiniband device
962 * @ret rc Return status code
963 */
register_ibdev(struct ib_device * ibdev)964 int register_ibdev ( struct ib_device *ibdev ) {
965 struct ib_driver *driver;
966 int rc;
967
968 /* Record device index and create device name */
969 if ( ibdev->name[0] == '\0' ) {
970 snprintf ( ibdev->name, sizeof ( ibdev->name ), "inf%d",
971 ibdev_index );
972 }
973 ibdev->index = ++ibdev_index;
974
975 /* Add to device list */
976 ibdev_get ( ibdev );
977 list_add_tail ( &ibdev->list, &ib_devices );
978 DBGC ( ibdev, "IBDEV %s registered (phys %s)\n", ibdev->name,
979 ibdev->dev->name );
980
981 /* Probe device */
982 for_each_table_entry ( driver, IB_DRIVERS ) {
983 if ( ( rc = driver->probe ( ibdev ) ) != 0 ) {
984 DBGC ( ibdev, "IBDEV %s could not add %s device: %s\n",
985 ibdev->name, driver->name, strerror ( rc ) );
986 goto err_probe;
987 }
988 }
989
990 return 0;
991
992 err_probe:
993 for_each_table_entry_continue_reverse ( driver, IB_DRIVERS )
994 driver->remove ( ibdev );
995 list_del ( &ibdev->list );
996 ibdev_put ( ibdev );
997 return rc;
998 }
999
1000 /**
1001 * Unregister Infiniband device
1002 *
1003 * @v ibdev Infiniband device
1004 */
unregister_ibdev(struct ib_device * ibdev)1005 void unregister_ibdev ( struct ib_device *ibdev ) {
1006 struct ib_driver *driver;
1007
1008 /* Remove device */
1009 for_each_table_entry_reverse ( driver, IB_DRIVERS )
1010 driver->remove ( ibdev );
1011
1012 /* Remove from device list */
1013 list_del ( &ibdev->list );
1014 ibdev_put ( ibdev );
1015 DBGC ( ibdev, "IBDEV %s unregistered\n", ibdev->name );
1016
1017 /* Reset device index if no devices remain */
1018 if ( list_empty ( &ib_devices ) )
1019 ibdev_index = 0;
1020 }
1021
1022 /**
1023 * Find Infiniband device by GID
1024 *
1025 * @v gid GID
1026 * @ret ibdev Infiniband device, or NULL
1027 */
find_ibdev(union ib_gid * gid)1028 struct ib_device * find_ibdev ( union ib_gid *gid ) {
1029 struct ib_device *ibdev;
1030
1031 for_each_ibdev ( ibdev ) {
1032 if ( memcmp ( gid, &ibdev->gid, sizeof ( *gid ) ) == 0 )
1033 return ibdev;
1034 }
1035 return NULL;
1036 }
1037
1038 /**
1039 * Get most recently opened Infiniband device
1040 *
1041 * @ret ibdev Most recently opened Infiniband device, or NULL
1042 */
last_opened_ibdev(void)1043 struct ib_device * last_opened_ibdev ( void ) {
1044 struct ib_device *ibdev;
1045
1046 ibdev = list_first_entry ( &open_ib_devices, struct ib_device,
1047 open_list );
1048 if ( ! ibdev )
1049 return NULL;
1050
1051 assert ( ibdev->open_count != 0 );
1052 return ibdev;
1053 }
1054
1055 /* Drag in objects via register_ibdev() */
1056 REQUIRING_SYMBOL ( register_ibdev );
1057
1058 /* Drag in Infiniband configuration */
1059 REQUIRE_OBJECT ( config_infiniband );
1060