1 /*
2  * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License as
6  * published by the Free Software Foundation; either version 2 of the
7  * License, or any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17  * 02110-1301, USA.
18  *
19  * You can also choose to distribute this program under the terms of
20  * the Unmodified Binary Distribution Licence (as given in the file
21  * COPYING.UBDL), provided that you have satisfied its requirements.
22  */
23 
24 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
25 
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <byteswap.h>
32 #include <errno.h>
33 #include <assert.h>
34 #include <ipxe/list.h>
35 #include <ipxe/errortab.h>
36 #include <ipxe/if_arp.h>
37 #include <ipxe/netdevice.h>
38 #include <ipxe/iobuf.h>
39 #include <ipxe/process.h>
40 #include <ipxe/profile.h>
41 #include <ipxe/infiniband.h>
42 #include <ipxe/ib_mi.h>
43 #include <ipxe/ib_sma.h>
44 
45 /** @file
46  *
47  * Infiniband protocol
48  *
49  */
50 
51 /** List of Infiniband devices */
52 struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices );
53 
54 /** List of open Infiniband devices, in reverse order of opening */
55 static struct list_head open_ib_devices = LIST_HEAD_INIT ( open_ib_devices );
56 
57 /** Infiniband device index */
58 static unsigned int ibdev_index = 0;
59 
60 /** Post send work queue entry profiler */
61 static struct profiler ib_post_send_profiler __profiler =
62 	{ .name = "ib.post_send" };
63 
64 /** Post receive work queue entry profiler */
65 static struct profiler ib_post_recv_profiler __profiler =
66 	{ .name = "ib.post_recv" };
67 
68 /* Disambiguate the various possible EINPROGRESSes */
69 #define EINPROGRESS_INIT __einfo_error ( EINFO_EINPROGRESS_INIT )
70 #define EINFO_EINPROGRESS_INIT __einfo_uniqify \
71 	( EINFO_EINPROGRESS, 0x01, "Initialising" )
72 #define EINPROGRESS_ARMED __einfo_error ( EINFO_EINPROGRESS_ARMED )
73 #define EINFO_EINPROGRESS_ARMED __einfo_uniqify \
74 	( EINFO_EINPROGRESS, 0x02, "Armed" )
75 
76 /** Human-readable message for the link statuses */
77 struct errortab infiniband_errors[] __errortab = {
78 	__einfo_errortab ( EINFO_EINPROGRESS_INIT ),
79 	__einfo_errortab ( EINFO_EINPROGRESS_ARMED ),
80 };
81 
82 /***************************************************************************
83  *
84  * Completion queues
85  *
86  ***************************************************************************
87  */
88 
89 /**
90  * Create completion queue
91  *
92  * @v ibdev		Infiniband device
93  * @v num_cqes		Number of completion queue entries
94  * @v op		Completion queue operations
95  * @v new_cq		New completion queue to fill in
96  * @ret rc		Return status code
97  */
ib_create_cq(struct ib_device * ibdev,unsigned int num_cqes,struct ib_completion_queue_operations * op,struct ib_completion_queue ** new_cq)98 int ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes,
99 		   struct ib_completion_queue_operations *op,
100 		   struct ib_completion_queue **new_cq ) {
101 	struct ib_completion_queue *cq;
102 	int rc;
103 
104 	DBGC ( ibdev, "IBDEV %s creating completion queue\n", ibdev->name );
105 
106 	/* Allocate and initialise data structure */
107 	cq = zalloc ( sizeof ( *cq ) );
108 	if ( ! cq ) {
109 		rc = -ENOMEM;
110 		goto err_alloc_cq;
111 	}
112 	cq->ibdev = ibdev;
113 	list_add_tail ( &cq->list, &ibdev->cqs );
114 	cq->num_cqes = num_cqes;
115 	INIT_LIST_HEAD ( &cq->work_queues );
116 	cq->op = op;
117 
118 	/* Perform device-specific initialisation and get CQN */
119 	if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) {
120 		DBGC ( ibdev, "IBDEV %s could not initialise completion "
121 		       "queue: %s\n", ibdev->name, strerror ( rc ) );
122 		goto err_dev_create_cq;
123 	}
124 
125 	DBGC ( ibdev, "IBDEV %s created %d-entry completion queue %p (%p) "
126 	       "with CQN %#lx\n", ibdev->name, num_cqes, cq,
127 	       ib_cq_get_drvdata ( cq ), cq->cqn );
128 	*new_cq = cq;
129 	return 0;
130 
131 	ibdev->op->destroy_cq ( ibdev, cq );
132  err_dev_create_cq:
133 	list_del ( &cq->list );
134 	free ( cq );
135  err_alloc_cq:
136 	return rc;
137 }
138 
139 /**
140  * Destroy completion queue
141  *
142  * @v ibdev		Infiniband device
143  * @v cq		Completion queue
144  */
ib_destroy_cq(struct ib_device * ibdev,struct ib_completion_queue * cq)145 void ib_destroy_cq ( struct ib_device *ibdev,
146 		     struct ib_completion_queue *cq ) {
147 	DBGC ( ibdev, "IBDEV %s destroying completion queue %#lx\n",
148 	       ibdev->name, cq->cqn );
149 	assert ( list_empty ( &cq->work_queues ) );
150 	ibdev->op->destroy_cq ( ibdev, cq );
151 	list_del ( &cq->list );
152 	free ( cq );
153 }
154 
155 /**
156  * Poll completion queue
157  *
158  * @v ibdev		Infiniband device
159  * @v cq		Completion queue
160  */
ib_poll_cq(struct ib_device * ibdev,struct ib_completion_queue * cq)161 void ib_poll_cq ( struct ib_device *ibdev,
162 		  struct ib_completion_queue *cq ) {
163 	struct ib_work_queue *wq;
164 
165 	/* Poll completion queue */
166 	ibdev->op->poll_cq ( ibdev, cq );
167 
168 	/* Refill receive work queues */
169 	list_for_each_entry ( wq, &cq->work_queues, list ) {
170 		if ( ! wq->is_send )
171 			ib_refill_recv ( ibdev, wq->qp );
172 	}
173 }
174 
175 /***************************************************************************
176  *
177  * Work queues
178  *
179  ***************************************************************************
180  */
181 
182 /**
183  * Create queue pair
184  *
185  * @v ibdev		Infiniband device
186  * @v type		Queue pair type
187  * @v num_send_wqes	Number of send work queue entries
188  * @v send_cq		Send completion queue
189  * @v num_recv_wqes	Number of receive work queue entries
190  * @v recv_cq		Receive completion queue
191  * @v op		Queue pair operations
192  * @v name		Queue pair name
193  * @v new_qp		New queue pair to fill in
194  * @ret rc		Return status code
195  *
196  * The queue pair will be left in the INIT state; you must call
197  * ib_modify_qp() before it is ready to use for sending and receiving.
198  */
ib_create_qp(struct ib_device * ibdev,enum ib_queue_pair_type type,unsigned int num_send_wqes,struct ib_completion_queue * send_cq,unsigned int num_recv_wqes,struct ib_completion_queue * recv_cq,struct ib_queue_pair_operations * op,const char * name,struct ib_queue_pair ** new_qp)199 int ib_create_qp ( struct ib_device *ibdev, enum ib_queue_pair_type type,
200 		   unsigned int num_send_wqes,
201 		   struct ib_completion_queue *send_cq,
202 		   unsigned int num_recv_wqes,
203 		   struct ib_completion_queue *recv_cq,
204 		   struct ib_queue_pair_operations *op, const char *name,
205 		   struct ib_queue_pair **new_qp ) {
206 	struct ib_queue_pair *qp;
207 	size_t total_size;
208 	int rc;
209 
210 	DBGC ( ibdev, "IBDEV %s creating queue pair\n", ibdev->name );
211 
212 	/* Allocate and initialise data structure */
213 	total_size = ( sizeof ( *qp ) +
214 		       ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
215 		       ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
216 	qp = zalloc ( total_size );
217 	if ( ! qp ) {
218 		rc = -ENOMEM;
219 		goto err_alloc_qp;
220 	}
221 	qp->ibdev = ibdev;
222 	list_add_tail ( &qp->list, &ibdev->qps );
223 	qp->type = type;
224 	qp->send.qp = qp;
225 	qp->send.is_send = 1;
226 	qp->send.cq = send_cq;
227 	list_add_tail ( &qp->send.list, &send_cq->work_queues );
228 	qp->send.psn = ( random() & 0xffffffUL );
229 	qp->send.num_wqes = num_send_wqes;
230 	qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) );
231 	qp->recv.qp = qp;
232 	qp->recv.cq = recv_cq;
233 	list_add_tail ( &qp->recv.list, &recv_cq->work_queues );
234 	qp->recv.psn = ( random() & 0xffffffUL );
235 	qp->recv.num_wqes = num_recv_wqes;
236 	qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) +
237 			    ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ));
238 	INIT_LIST_HEAD ( &qp->mgids );
239 	qp->op = op;
240 	qp->name = name;
241 
242 	/* Perform device-specific initialisation and get QPN */
243 	if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) {
244 		DBGC ( ibdev, "IBDEV %s could not initialise queue pair: "
245 		       "%s\n", ibdev->name, strerror ( rc ) );
246 		goto err_dev_create_qp;
247 	}
248 	DBGC ( ibdev, "IBDEV %s created queue pair %p (%p) with QPN %#lx\n",
249 	       ibdev->name, qp, ib_qp_get_drvdata ( qp ), qp->qpn );
250 	DBGC ( ibdev, "IBDEV %s QPN %#lx has %d send entries at [%p,%p)\n",
251 	       ibdev->name, qp->qpn, num_send_wqes, qp->send.iobufs,
252 	       qp->recv.iobufs );
253 	DBGC ( ibdev, "IBDEV %s QPN %#lx has %d receive entries at [%p,%p)\n",
254 	       ibdev->name, qp->qpn, num_recv_wqes, qp->recv.iobufs,
255 	       ( ( ( void * ) qp ) + total_size ) );
256 
257 	/* Calculate externally-visible QPN */
258 	switch ( type ) {
259 	case IB_QPT_SMI:
260 		qp->ext_qpn = IB_QPN_SMI;
261 		break;
262 	case IB_QPT_GSI:
263 		qp->ext_qpn = IB_QPN_GSI;
264 		break;
265 	default:
266 		qp->ext_qpn = qp->qpn;
267 		break;
268 	}
269 	if ( qp->ext_qpn != qp->qpn ) {
270 		DBGC ( ibdev, "IBDEV %s QPN %#lx has external QPN %#lx\n",
271 		       ibdev->name, qp->qpn, qp->ext_qpn );
272 	}
273 
274 	*new_qp = qp;
275 	return 0;
276 
277 	ibdev->op->destroy_qp ( ibdev, qp );
278  err_dev_create_qp:
279 	list_del ( &qp->send.list );
280 	list_del ( &qp->recv.list );
281 	list_del ( &qp->list );
282 	free ( qp );
283  err_alloc_qp:
284 	return rc;
285 }
286 
287 /**
288  * Modify queue pair
289  *
290  * @v ibdev		Infiniband device
291  * @v qp		Queue pair
292  * @ret rc		Return status code
293  */
ib_modify_qp(struct ib_device * ibdev,struct ib_queue_pair * qp)294 int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
295 	int rc;
296 
297 	DBGC ( ibdev, "IBDEV %s modifying QPN %#lx\n", ibdev->name, qp->qpn );
298 
299 	if ( ( rc = ibdev->op->modify_qp ( ibdev, qp ) ) != 0 ) {
300 		DBGC ( ibdev, "IBDEV %s could not modify QPN %#lx: %s\n",
301 		       ibdev->name, qp->qpn, strerror ( rc ) );
302 		return rc;
303 	}
304 
305 	return 0;
306 }
307 
308 /**
309  * Destroy queue pair
310  *
311  * @v ibdev		Infiniband device
312  * @v qp		Queue pair
313  */
ib_destroy_qp(struct ib_device * ibdev,struct ib_queue_pair * qp)314 void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
315 	struct io_buffer *iobuf;
316 	unsigned int i;
317 
318 	DBGC ( ibdev, "IBDEV %s destroying QPN %#lx\n",
319 	       ibdev->name, qp->qpn );
320 
321 	assert ( list_empty ( &qp->mgids ) );
322 
323 	/* Perform device-specific destruction */
324 	ibdev->op->destroy_qp ( ibdev, qp );
325 
326 	/* Complete any remaining I/O buffers with errors */
327 	for ( i = 0 ; i < qp->send.num_wqes ; i++ ) {
328 		if ( ( iobuf = qp->send.iobufs[i] ) != NULL )
329 			ib_complete_send ( ibdev, qp, iobuf, -ECANCELED );
330 	}
331 	for ( i = 0 ; i < qp->recv.num_wqes ; i++ ) {
332 		if ( ( iobuf = qp->recv.iobufs[i] ) != NULL ) {
333 			ib_complete_recv ( ibdev, qp, NULL, NULL, iobuf,
334 					   -ECANCELED );
335 		}
336 	}
337 
338 	/* Remove work queues from completion queue */
339 	list_del ( &qp->send.list );
340 	list_del ( &qp->recv.list );
341 
342 	/* Free QP */
343 	list_del ( &qp->list );
344 	free ( qp );
345 }
346 
347 /**
348  * Find queue pair by QPN
349  *
350  * @v ibdev		Infiniband device
351  * @v qpn		Queue pair number
352  * @ret qp		Queue pair, or NULL
353  */
ib_find_qp_qpn(struct ib_device * ibdev,unsigned long qpn)354 struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev,
355 					unsigned long qpn ) {
356 	struct ib_queue_pair *qp;
357 
358 	list_for_each_entry ( qp, &ibdev->qps, list ) {
359 		if ( ( qpn == qp->qpn ) || ( qpn == qp->ext_qpn ) )
360 			return qp;
361 	}
362 	return NULL;
363 }
364 
365 /**
366  * Find queue pair by multicast GID
367  *
368  * @v ibdev		Infiniband device
369  * @v gid		Multicast GID
370  * @ret qp		Queue pair, or NULL
371  */
ib_find_qp_mgid(struct ib_device * ibdev,union ib_gid * gid)372 struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev,
373 					 union ib_gid *gid ) {
374 	struct ib_queue_pair *qp;
375 	struct ib_multicast_gid *mgid;
376 
377 	list_for_each_entry ( qp, &ibdev->qps, list ) {
378 		list_for_each_entry ( mgid, &qp->mgids, list ) {
379 			if ( memcmp ( &mgid->gid, gid,
380 				      sizeof ( mgid->gid ) ) == 0 ) {
381 				return qp;
382 			}
383 		}
384 	}
385 	return NULL;
386 }
387 
388 /**
389  * Find work queue belonging to completion queue
390  *
391  * @v cq		Completion queue
392  * @v qpn		Queue pair number
393  * @v is_send		Find send work queue (rather than receive)
394  * @ret wq		Work queue, or NULL if not found
395  */
ib_find_wq(struct ib_completion_queue * cq,unsigned long qpn,int is_send)396 struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
397 				    unsigned long qpn, int is_send ) {
398 	struct ib_work_queue *wq;
399 
400 	list_for_each_entry ( wq, &cq->work_queues, list ) {
401 		if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) )
402 			return wq;
403 	}
404 	return NULL;
405 }
406 
407 /**
408  * Post send work queue entry
409  *
410  * @v ibdev		Infiniband device
411  * @v qp		Queue pair
412  * @v dest		Destination address vector
413  * @v iobuf		I/O buffer
414  * @ret rc		Return status code
415  */
ib_post_send(struct ib_device * ibdev,struct ib_queue_pair * qp,struct ib_address_vector * dest,struct io_buffer * iobuf)416 int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
417 		   struct ib_address_vector *dest,
418 		   struct io_buffer *iobuf ) {
419 	struct ib_address_vector dest_copy;
420 	int rc;
421 
422 	/* Start profiling */
423 	profile_start ( &ib_post_send_profiler );
424 
425 	/* Check queue fill level */
426 	if ( qp->send.fill >= qp->send.num_wqes ) {
427 		DBGC ( ibdev, "IBDEV %s QPN %#lx send queue full\n",
428 		       ibdev->name, qp->qpn );
429 		return -ENOBUFS;
430 	}
431 
432 	/* Use default address vector if none specified */
433 	if ( ! dest )
434 		dest = &qp->av;
435 
436 	/* Make modifiable copy of address vector */
437 	memcpy ( &dest_copy, dest, sizeof ( dest_copy ) );
438 	dest = &dest_copy;
439 
440 	/* Fill in optional parameters in address vector */
441 	if ( ! dest->qkey )
442 		dest->qkey = qp->qkey;
443 	if ( ! dest->rate )
444 		dest->rate = IB_RATE_2_5;
445 
446 	/* Post to hardware */
447 	if ( ( rc = ibdev->op->post_send ( ibdev, qp, dest, iobuf ) ) != 0 ) {
448 		DBGC ( ibdev, "IBDEV %s QPN %#lx could not post send WQE: "
449 		       "%s\n", ibdev->name, qp->qpn, strerror ( rc ) );
450 		return rc;
451 	}
452 
453 	/* Increase fill level */
454 	qp->send.fill++;
455 
456 	/* Stop profiling */
457 	profile_stop ( &ib_post_send_profiler );
458 
459 	return 0;
460 }
461 
462 /**
463  * Post receive work queue entry
464  *
465  * @v ibdev		Infiniband device
466  * @v qp		Queue pair
467  * @v iobuf		I/O buffer
468  * @ret rc		Return status code
469  */
ib_post_recv(struct ib_device * ibdev,struct ib_queue_pair * qp,struct io_buffer * iobuf)470 int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
471 		   struct io_buffer *iobuf ) {
472 	int rc;
473 
474 	/* Start profiling */
475 	profile_start ( &ib_post_recv_profiler );
476 
477 	/* Check packet length */
478 	if ( iob_tailroom ( iobuf ) < IB_MAX_PAYLOAD_SIZE ) {
479 		DBGC ( ibdev, "IBDEV %s QPN %#lx wrong RX buffer size (%zd)\n",
480 		       ibdev->name, qp->qpn, iob_tailroom ( iobuf ) );
481 		return -EINVAL;
482 	}
483 
484 	/* Check queue fill level */
485 	if ( qp->recv.fill >= qp->recv.num_wqes ) {
486 		DBGC ( ibdev, "IBDEV %s QPN %#lx receive queue full\n",
487 		       ibdev->name, qp->qpn );
488 		return -ENOBUFS;
489 	}
490 
491 	/* Post to hardware */
492 	if ( ( rc = ibdev->op->post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
493 		DBGC ( ibdev, "IBDEV %s QPN %#lx could not post receive WQE: "
494 		       "%s\n", ibdev->name, qp->qpn, strerror ( rc ) );
495 		return rc;
496 	}
497 
498 	/* Increase fill level */
499 	qp->recv.fill++;
500 
501 	/* Stop profiling */
502 	profile_stop ( &ib_post_recv_profiler );
503 
504 	return 0;
505 }
506 
507 /**
508  * Complete send work queue entry
509  *
510  * @v ibdev		Infiniband device
511  * @v qp		Queue pair
512  * @v iobuf		I/O buffer
513  * @v rc		Completion status code
514  */
ib_complete_send(struct ib_device * ibdev,struct ib_queue_pair * qp,struct io_buffer * iobuf,int rc)515 void ib_complete_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
516 			struct io_buffer *iobuf, int rc ) {
517 
518 	if ( qp->send.cq->op->complete_send ) {
519 		qp->send.cq->op->complete_send ( ibdev, qp, iobuf, rc );
520 	} else {
521 		free_iob ( iobuf );
522 	}
523 	qp->send.fill--;
524 }
525 
526 /**
527  * Complete receive work queue entry
528  *
529  * @v ibdev		Infiniband device
530  * @v qp		Queue pair
531  * @v dest		Destination address vector, or NULL
532  * @v source		Source address vector, or NULL
533  * @v iobuf		I/O buffer
534  * @v rc		Completion status code
535  */
ib_complete_recv(struct ib_device * ibdev,struct ib_queue_pair * qp,struct ib_address_vector * dest,struct ib_address_vector * source,struct io_buffer * iobuf,int rc)536 void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
537 			struct ib_address_vector *dest,
538 			struct ib_address_vector *source,
539 			struct io_buffer *iobuf, int rc ) {
540 
541 	if ( qp->recv.cq->op->complete_recv ) {
542 		qp->recv.cq->op->complete_recv ( ibdev, qp, dest, source,
543 						 iobuf, rc );
544 	} else {
545 		free_iob ( iobuf );
546 	}
547 	qp->recv.fill--;
548 }
549 
550 /**
551  * Refill receive work queue
552  *
553  * @v ibdev		Infiniband device
554  * @v qp		Queue pair
555  */
ib_refill_recv(struct ib_device * ibdev,struct ib_queue_pair * qp)556 void ib_refill_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
557 	struct io_buffer *iobuf;
558 	int rc;
559 
560 	/* Keep filling while unfilled entries remain */
561 	while ( qp->recv.fill < qp->recv.num_wqes ) {
562 
563 		/* Allocate I/O buffer */
564 		iobuf = qp->op->alloc_iob ( IB_MAX_PAYLOAD_SIZE );
565 		if ( ! iobuf ) {
566 			/* Non-fatal; we will refill on next attempt */
567 			return;
568 		}
569 
570 		/* Post I/O buffer */
571 		if ( ( rc = ib_post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
572 			DBGC ( ibdev, "IBDEV %s could not refill: %s\n",
573 			       ibdev->name, strerror ( rc ) );
574 			free_iob ( iobuf );
575 			/* Give up */
576 			return;
577 		}
578 	}
579 }
580 
581 /***************************************************************************
582  *
583  * Link control
584  *
585  ***************************************************************************
586  */
587 
588 /**
589  * Get link state
590  *
591  * @v ibdev		Infiniband device
592  * @ret rc		Link status code
593  */
ib_link_rc(struct ib_device * ibdev)594 int ib_link_rc ( struct ib_device *ibdev ) {
595 	switch ( ibdev->port_state ) {
596 	case IB_PORT_STATE_DOWN:	return -ENOTCONN;
597 	case IB_PORT_STATE_INIT:	return -EINPROGRESS_INIT;
598 	case IB_PORT_STATE_ARMED:	return -EINPROGRESS_ARMED;
599 	case IB_PORT_STATE_ACTIVE:	return 0;
600 	default:			return -EINVAL;
601 	}
602 }
603 
604 /**
605  * Textual representation of Infiniband link state
606  *
607  * @v ibdev		Infiniband device
608  * @ret link_text	Link state text
609  */
ib_link_state_text(struct ib_device * ibdev)610 static const char * ib_link_state_text ( struct ib_device *ibdev ) {
611 	switch ( ibdev->port_state ) {
612 	case IB_PORT_STATE_DOWN:	return "DOWN";
613 	case IB_PORT_STATE_INIT:	return "INIT";
614 	case IB_PORT_STATE_ARMED:	return "ARMED";
615 	case IB_PORT_STATE_ACTIVE:	return "ACTIVE";
616 	default:			return "UNKNOWN";
617 	}
618 }
619 
620 /**
621  * Notify drivers of Infiniband device or link state change
622  *
623  * @v ibdev		Infiniband device
624  */
ib_notify(struct ib_device * ibdev)625 static void ib_notify ( struct ib_device *ibdev ) {
626 	struct ib_driver *driver;
627 
628 	for_each_table_entry ( driver, IB_DRIVERS )
629 		driver->notify ( ibdev );
630 }
631 
632 /**
633  * Notify of Infiniband link state change
634  *
635  * @v ibdev		Infiniband device
636  */
ib_link_state_changed(struct ib_device * ibdev)637 void ib_link_state_changed ( struct ib_device *ibdev ) {
638 
639 	DBGC ( ibdev, "IBDEV %s link state is %s\n",
640 	       ibdev->name, ib_link_state_text ( ibdev ) );
641 
642 	/* Notify drivers of link state change */
643 	ib_notify ( ibdev );
644 }
645 
646 /**
647  * Open port
648  *
649  * @v ibdev		Infiniband device
650  * @ret rc		Return status code
651  */
ib_open(struct ib_device * ibdev)652 int ib_open ( struct ib_device *ibdev ) {
653 	int rc;
654 
655 	/* Increment device open request counter */
656 	if ( ibdev->open_count++ > 0 ) {
657 		/* Device was already open; do nothing */
658 		return 0;
659 	}
660 
661 	/* Open device */
662 	if ( ( rc = ibdev->op->open ( ibdev ) ) != 0 ) {
663 		DBGC ( ibdev, "IBDEV %s could not open: %s\n",
664 		       ibdev->name, strerror ( rc ) );
665 		goto err_open;
666 	}
667 
668 	/* Create subnet management interface */
669 	if ( ( rc = ib_create_mi ( ibdev, IB_QPT_SMI, &ibdev->smi ) ) != 0 ) {
670 		DBGC ( ibdev, "IBDEV %s could not create SMI: %s\n",
671 		       ibdev->name, strerror ( rc ) );
672 		goto err_create_smi;
673 	}
674 
675 	/* Create subnet management agent */
676 	if ( ( rc = ib_create_sma ( ibdev, ibdev->smi ) ) != 0 ) {
677 		DBGC ( ibdev, "IBDEV %s could not create SMA: %s\n",
678 		       ibdev->name, strerror ( rc ) );
679 		goto err_create_sma;
680 	}
681 
682 	/* Create general services interface */
683 	if ( ( rc = ib_create_mi ( ibdev, IB_QPT_GSI, &ibdev->gsi ) ) != 0 ) {
684 		DBGC ( ibdev, "IBDEV %s could not create GSI: %s\n",
685 		       ibdev->name, strerror ( rc ) );
686 		goto err_create_gsi;
687 	}
688 
689 	/* Add to head of open devices list */
690 	list_add ( &ibdev->open_list, &open_ib_devices );
691 
692 	/* Notify drivers of device state change */
693 	ib_notify ( ibdev );
694 
695 	assert ( ibdev->open_count == 1 );
696 	return 0;
697 
698 	ib_destroy_mi ( ibdev, ibdev->gsi );
699  err_create_gsi:
700 	ib_destroy_sma ( ibdev, ibdev->smi );
701  err_create_sma:
702 	ib_destroy_mi ( ibdev, ibdev->smi );
703  err_create_smi:
704 	ibdev->op->close ( ibdev );
705  err_open:
706 	assert ( ibdev->open_count == 1 );
707 	ibdev->open_count = 0;
708 	return rc;
709 }
710 
711 /**
712  * Close port
713  *
714  * @v ibdev		Infiniband device
715  */
ib_close(struct ib_device * ibdev)716 void ib_close ( struct ib_device *ibdev ) {
717 
718 	/* Decrement device open request counter */
719 	ibdev->open_count--;
720 
721 	/* Close device if this was the last remaining requested opening */
722 	if ( ibdev->open_count == 0 ) {
723 		ib_notify ( ibdev );
724 		list_del ( &ibdev->open_list );
725 		ib_destroy_mi ( ibdev, ibdev->gsi );
726 		ib_destroy_sma ( ibdev, ibdev->smi );
727 		ib_destroy_mi ( ibdev, ibdev->smi );
728 		ibdev->op->close ( ibdev );
729 		ibdev->port_state = IB_PORT_STATE_DOWN;
730 	}
731 }
732 
733 /***************************************************************************
734  *
735  * Multicast
736  *
737  ***************************************************************************
738  */
739 
740 /**
741  * Attach to multicast group
742  *
743  * @v ibdev		Infiniband device
744  * @v qp		Queue pair
745  * @v gid		Multicast GID
746  * @ret rc		Return status code
747  *
748  * Note that this function handles only the local device's attachment
749  * to the multicast GID; it does not issue the relevant MADs to join
750  * the multicast group on the subnet.
751  */
ib_mcast_attach(struct ib_device * ibdev,struct ib_queue_pair * qp,union ib_gid * gid)752 int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
753 		      union ib_gid *gid ) {
754 	struct ib_multicast_gid *mgid;
755 	int rc;
756 
757 	/* Sanity check */
758 	assert ( qp != NULL );
759 
760 	/* Add to software multicast GID list */
761 	mgid = zalloc ( sizeof ( *mgid ) );
762 	if ( ! mgid ) {
763 		rc = -ENOMEM;
764 		goto err_alloc_mgid;
765 	}
766 	memcpy ( &mgid->gid, gid, sizeof ( mgid->gid ) );
767 	list_add_tail ( &mgid->list, &qp->mgids );
768 
769 	/* Add to hardware multicast GID list */
770 	if ( ( rc = ibdev->op->mcast_attach ( ibdev, qp, gid ) ) != 0 )
771 		goto err_dev_mcast_attach;
772 
773 	return 0;
774 
775  err_dev_mcast_attach:
776 	list_del ( &mgid->list );
777 	free ( mgid );
778  err_alloc_mgid:
779 	return rc;
780 }
781 
782 /**
783  * Detach from multicast group
784  *
785  * @v ibdev		Infiniband device
786  * @v qp		Queue pair
787  * @v gid		Multicast GID
788  */
ib_mcast_detach(struct ib_device * ibdev,struct ib_queue_pair * qp,union ib_gid * gid)789 void ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
790 		       union ib_gid *gid ) {
791 	struct ib_multicast_gid *mgid;
792 
793 	/* Sanity check */
794 	assert ( qp != NULL );
795 
796 	/* Remove from hardware multicast GID list */
797 	ibdev->op->mcast_detach ( ibdev, qp, gid );
798 
799 	/* Remove from software multicast GID list */
800 	list_for_each_entry ( mgid, &qp->mgids, list ) {
801 		if ( memcmp ( &mgid->gid, gid, sizeof ( mgid->gid ) ) == 0 ) {
802 			list_del ( &mgid->list );
803 			free ( mgid );
804 			break;
805 		}
806 	}
807 }
808 
809 /***************************************************************************
810  *
811  * Miscellaneous
812  *
813  ***************************************************************************
814  */
815 
816 /**
817  * Count Infiniband HCA ports
818  *
819  * @v ibdev		Infiniband device
820  * @ret num_ports	Number of ports
821  */
ib_count_ports(struct ib_device * ibdev)822 int ib_count_ports ( struct ib_device *ibdev ) {
823 	struct ib_device *tmp;
824 	int num_ports = 0;
825 
826 	/* Search for IB devices with the same physical device to
827 	 * identify port count.
828 	 */
829 	for_each_ibdev ( tmp ) {
830 		if ( tmp->dev == ibdev->dev )
831 			num_ports++;
832 	}
833 	return num_ports;
834 }
835 
836 /**
837  * Set port information
838  *
839  * @v ibdev		Infiniband device
840  * @v mad		Set port information MAD
841  */
ib_set_port_info(struct ib_device * ibdev,union ib_mad * mad)842 int ib_set_port_info ( struct ib_device *ibdev, union ib_mad *mad ) {
843 	int rc;
844 
845 	/* Adapters with embedded SMAs do not need to support this method */
846 	if ( ! ibdev->op->set_port_info ) {
847 		DBGC ( ibdev, "IBDEV %s does not support setting port "
848 		       "information\n", ibdev->name );
849 		return -ENOTSUP;
850 	}
851 
852 	if ( ( rc = ibdev->op->set_port_info ( ibdev, mad ) ) != 0 ) {
853 		DBGC ( ibdev, "IBDEV %s could not set port information: %s\n",
854 		       ibdev->name, strerror ( rc ) );
855 		return rc;
856 	}
857 
858 	return 0;
859 };
860 
861 /**
862  * Set partition key table
863  *
864  * @v ibdev		Infiniband device
865  * @v mad		Set partition key table MAD
866  */
ib_set_pkey_table(struct ib_device * ibdev,union ib_mad * mad)867 int ib_set_pkey_table ( struct ib_device *ibdev, union ib_mad *mad ) {
868 	int rc;
869 
870 	/* Adapters with embedded SMAs do not need to support this method */
871 	if ( ! ibdev->op->set_pkey_table ) {
872 		DBGC ( ibdev, "IBDEV %s does not support setting partition "
873 		       "key table\n", ibdev->name );
874 		return -ENOTSUP;
875 	}
876 
877 	if ( ( rc = ibdev->op->set_pkey_table ( ibdev, mad ) ) != 0 ) {
878 		DBGC ( ibdev, "IBDEV %s could not set partition key table: "
879 		       "%s\n", ibdev->name, strerror ( rc ) );
880 		return rc;
881 	}
882 
883 	return 0;
884 };
885 
886 /***************************************************************************
887  *
888  * Event queues
889  *
890  ***************************************************************************
891  */
892 
893 /**
894  * Poll event queue
895  *
896  * @v ibdev		Infiniband device
897  */
ib_poll_eq(struct ib_device * ibdev)898 void ib_poll_eq ( struct ib_device *ibdev ) {
899 	struct ib_completion_queue *cq;
900 
901 	/* Poll device's event queue */
902 	ibdev->op->poll_eq ( ibdev );
903 
904 	/* Poll all completion queues */
905 	list_for_each_entry ( cq, &ibdev->cqs, list )
906 		ib_poll_cq ( ibdev, cq );
907 }
908 
909 /**
910  * Single-step the Infiniband event queue
911  *
912  * @v process		Infiniband event queue process
913  */
ib_step(struct process * process __unused)914 static void ib_step ( struct process *process __unused ) {
915 	struct ib_device *ibdev;
916 
917 	list_for_each_entry ( ibdev, &open_ib_devices, open_list )
918 		ib_poll_eq ( ibdev );
919 }
920 
921 /** Infiniband event queue process */
922 PERMANENT_PROCESS ( ib_process, ib_step );
923 
924 /***************************************************************************
925  *
926  * Infiniband device creation/destruction
927  *
928  ***************************************************************************
929  */
930 
931 /**
932  * Allocate Infiniband device
933  *
934  * @v priv_size		Size of driver private data area
935  * @ret ibdev		Infiniband device, or NULL
936  */
alloc_ibdev(size_t priv_size)937 struct ib_device * alloc_ibdev ( size_t priv_size ) {
938 	struct ib_device *ibdev;
939 	void *drv_priv;
940 	size_t total_len;
941 
942 	total_len = ( sizeof ( *ibdev ) + priv_size );
943 	ibdev = zalloc ( total_len );
944 	if ( ibdev ) {
945 		drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) );
946 		ib_set_drvdata ( ibdev, drv_priv );
947 		INIT_LIST_HEAD ( &ibdev->list );
948 		INIT_LIST_HEAD ( &ibdev->open_list );
949 		INIT_LIST_HEAD ( &ibdev->cqs );
950 		INIT_LIST_HEAD ( &ibdev->qps );
951 		ibdev->port_state = IB_PORT_STATE_DOWN;
952 		ibdev->lid = IB_LID_NONE;
953 		ibdev->pkey = IB_PKEY_DEFAULT;
954 	}
955 	return ibdev;
956 }
957 
958 /**
959  * Register Infiniband device
960  *
961  * @v ibdev		Infiniband device
962  * @ret rc		Return status code
963  */
register_ibdev(struct ib_device * ibdev)964 int register_ibdev ( struct ib_device *ibdev ) {
965 	struct ib_driver *driver;
966 	int rc;
967 
968 	/* Record device index and create device name */
969 	if ( ibdev->name[0] == '\0' ) {
970 		snprintf ( ibdev->name, sizeof ( ibdev->name ), "inf%d",
971 			   ibdev_index );
972 	}
973 	ibdev->index = ++ibdev_index;
974 
975 	/* Add to device list */
976 	ibdev_get ( ibdev );
977 	list_add_tail ( &ibdev->list, &ib_devices );
978 	DBGC ( ibdev, "IBDEV %s registered (phys %s)\n", ibdev->name,
979 	       ibdev->dev->name );
980 
981 	/* Probe device */
982 	for_each_table_entry ( driver, IB_DRIVERS ) {
983 		if ( ( rc = driver->probe ( ibdev ) ) != 0 ) {
984 			DBGC ( ibdev, "IBDEV %s could not add %s device: %s\n",
985 			       ibdev->name, driver->name, strerror ( rc ) );
986 			goto err_probe;
987 		}
988 	}
989 
990 	return 0;
991 
992  err_probe:
993 	for_each_table_entry_continue_reverse ( driver, IB_DRIVERS )
994 		driver->remove ( ibdev );
995 	list_del ( &ibdev->list );
996 	ibdev_put ( ibdev );
997 	return rc;
998 }
999 
1000 /**
1001  * Unregister Infiniband device
1002  *
1003  * @v ibdev		Infiniband device
1004  */
unregister_ibdev(struct ib_device * ibdev)1005 void unregister_ibdev ( struct ib_device *ibdev ) {
1006 	struct ib_driver *driver;
1007 
1008 	/* Remove device */
1009 	for_each_table_entry_reverse ( driver, IB_DRIVERS )
1010 		driver->remove ( ibdev );
1011 
1012 	/* Remove from device list */
1013 	list_del ( &ibdev->list );
1014 	ibdev_put ( ibdev );
1015 	DBGC ( ibdev, "IBDEV %s unregistered\n", ibdev->name );
1016 
1017 	/* Reset device index if no devices remain */
1018 	if ( list_empty ( &ib_devices ) )
1019 		ibdev_index = 0;
1020 }
1021 
1022 /**
1023  * Find Infiniband device by GID
1024  *
1025  * @v gid		GID
1026  * @ret ibdev		Infiniband device, or NULL
1027  */
find_ibdev(union ib_gid * gid)1028 struct ib_device * find_ibdev ( union ib_gid *gid ) {
1029 	struct ib_device *ibdev;
1030 
1031 	for_each_ibdev ( ibdev ) {
1032 		if ( memcmp ( gid, &ibdev->gid, sizeof ( *gid ) ) == 0 )
1033 			return ibdev;
1034 	}
1035 	return NULL;
1036 }
1037 
1038 /**
1039  * Get most recently opened Infiniband device
1040  *
1041  * @ret ibdev		Most recently opened Infiniband device, or NULL
1042  */
last_opened_ibdev(void)1043 struct ib_device * last_opened_ibdev ( void ) {
1044 	struct ib_device *ibdev;
1045 
1046 	ibdev = list_first_entry ( &open_ib_devices, struct ib_device,
1047 				   open_list );
1048 	if ( ! ibdev )
1049 		return NULL;
1050 
1051 	assert ( ibdev->open_count != 0 );
1052 	return ibdev;
1053 }
1054 
1055 /* Drag in objects via register_ibdev() */
1056 REQUIRING_SYMBOL ( register_ibdev );
1057 
1058 /* Drag in Infiniband configuration */
1059 REQUIRE_OBJECT ( config_infiniband );
1060