1 #ifndef _IPXE_INFINIBAND_H
2 #define _IPXE_INFINIBAND_H
3
4 /** @file
5 *
6 * Infiniband protocol
7 *
8 */
9
10 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
11
12 #include <stdint.h>
13 #include <ipxe/refcnt.h>
14 #include <ipxe/device.h>
15 #include <ipxe/tables.h>
16 #include <ipxe/ib_packet.h>
17 #include <ipxe/ib_mad.h>
18 #include <ipxe/if_ether.h>
19
20 /** Subnet management interface QPN */
21 #define IB_QPN_SMI 0
22
23 /** Subnet management interface queue key */
24 #define IB_QKEY_SMI 0
25
26 /** General service interface QPN */
27 #define IB_QPN_GSI 1
28
29 /** General service interface queue key */
30 #define IB_QKEY_GSI 0x80010000UL
31
32 /** Broadcast QPN */
33 #define IB_QPN_BROADCAST 0xffffffUL
34
35 /** QPN mask */
36 #define IB_QPN_MASK 0xffffffUL
37
38 /** Default Infiniband partition key */
39 #define IB_PKEY_DEFAULT 0xffff
40
41 /** Infiniband partition key full membership flag */
42 #define IB_PKEY_FULL 0x8000
43
44 /**
45 * Maximum payload size
46 *
47 * This is currently hard-coded in various places (drivers, subnet
48 * management agent, etc.) to 2048.
49 */
50 #define IB_MAX_PAYLOAD_SIZE 2048
51
52 struct ib_device;
53 struct ib_queue_pair;
54 struct ib_address_vector;
55 struct ib_completion_queue;
56 struct ib_mad_interface;
57
58 /** Infiniband transmission rates */
59 enum ib_rate {
60 IB_RATE_2_5 = 2,
61 IB_RATE_10 = 3,
62 IB_RATE_30 = 4,
63 IB_RATE_5 = 5,
64 IB_RATE_20 = 6,
65 IB_RATE_40 = 7,
66 IB_RATE_60 = 8,
67 IB_RATE_80 = 9,
68 IB_RATE_120 = 10,
69 };
70
71 /** An Infiniband Address Vector */
72 struct ib_address_vector {
73 /** Queue Pair Number */
74 unsigned long qpn;
75 /** Queue key
76 *
77 * Not specified for received packets.
78 */
79 unsigned long qkey;
80 /** Local ID */
81 unsigned int lid;
82 /** Rate
83 *
84 * Not specified for received packets.
85 */
86 enum ib_rate rate;
87 /** Service level */
88 unsigned int sl;
89 /** GID is present */
90 unsigned int gid_present;
91 /** GID, if present */
92 union ib_gid gid;
93 /** VLAN is present */
94 unsigned int vlan_present;
95 /** VLAN, if present */
96 unsigned int vlan;
97 };
98
99 /** An Infiniband Work Queue */
100 struct ib_work_queue {
101 /** Containing queue pair */
102 struct ib_queue_pair *qp;
103 /** "Is a send queue" flag */
104 int is_send;
105 /** Associated completion queue */
106 struct ib_completion_queue *cq;
107 /** List of work queues on this completion queue */
108 struct list_head list;
109 /** Packet sequence number */
110 uint32_t psn;
111 /** Number of work queue entries */
112 unsigned int num_wqes;
113 /** Number of occupied work queue entries */
114 unsigned int fill;
115 /** Next work queue entry index
116 *
117 * This is the index of the next entry to be filled (i.e. the
118 * first empty entry). This value is not bounded by num_wqes;
119 * users must logical-AND with (num_wqes-1) to generate an
120 * array index.
121 */
122 unsigned long next_idx;
123 /** I/O buffers assigned to work queue */
124 struct io_buffer **iobufs;
125 /** Driver private data */
126 void *drv_priv;
127 };
128
129 /** An Infiniband multicast GID */
130 struct ib_multicast_gid {
131 /** List of multicast GIDs on this QP */
132 struct list_head list;
133 /** Multicast GID */
134 union ib_gid gid;
135 };
136
137 /** An Infiniband queue pair type */
138 enum ib_queue_pair_type {
139 IB_QPT_SMI,
140 IB_QPT_GSI,
141 IB_QPT_UD,
142 IB_QPT_RC,
143 IB_QPT_ETH,
144 };
145
146 /** Infiniband queue pair operations */
147 struct ib_queue_pair_operations {
148 /** Allocate receive I/O buffer
149 *
150 * @v len Maximum receive length
151 * @ret iobuf I/O buffer (or NULL if out of memory)
152 */
153 struct io_buffer * ( * alloc_iob ) ( size_t len );
154 };
155
156 /** An Infiniband Queue Pair */
157 struct ib_queue_pair {
158 /** Containing Infiniband device */
159 struct ib_device *ibdev;
160 /** List of queue pairs on this Infiniband device */
161 struct list_head list;
162 /** Queue pair name */
163 const char *name;
164 /** Queue pair number */
165 unsigned long qpn;
166 /** Externally-visible queue pair number
167 *
168 * This may differ from the real queue pair number (e.g. when
169 * the HCA cannot use the management QPNs 0 and 1 as hardware
170 * QPNs and needs to remap them).
171 */
172 unsigned long ext_qpn;
173 /** Queue pair type */
174 enum ib_queue_pair_type type;
175 /** Queue key */
176 unsigned long qkey;
177 /** Send queue */
178 struct ib_work_queue send;
179 /** Receive queue */
180 struct ib_work_queue recv;
181 /** List of multicast GIDs */
182 struct list_head mgids;
183 /** Address vector */
184 struct ib_address_vector av;
185 /** Queue pair operations */
186 struct ib_queue_pair_operations *op;
187 /** Driver private data */
188 void *drv_priv;
189 /** Queue owner private data */
190 void *owner_priv;
191 };
192
193 /** Infiniband completion queue operations */
194 struct ib_completion_queue_operations {
195 /**
196 * Complete Send WQE
197 *
198 * @v ibdev Infiniband device
199 * @v qp Queue pair
200 * @v iobuf I/O buffer
201 * @v rc Completion status code
202 */
203 void ( * complete_send ) ( struct ib_device *ibdev,
204 struct ib_queue_pair *qp,
205 struct io_buffer *iobuf, int rc );
206 /**
207 * Complete Receive WQE
208 *
209 * @v ibdev Infiniband device
210 * @v qp Queue pair
211 * @v dest Destination address vector, or NULL
212 * @v source Source address vector, or NULL
213 * @v iobuf I/O buffer
214 * @v rc Completion status code
215 */
216 void ( * complete_recv ) ( struct ib_device *ibdev,
217 struct ib_queue_pair *qp,
218 struct ib_address_vector *dest,
219 struct ib_address_vector *source,
220 struct io_buffer *iobuf, int rc );
221 };
222
223 /** An Infiniband Completion Queue */
224 struct ib_completion_queue {
225 /** Containing Infiniband device */
226 struct ib_device *ibdev;
227 /** List of completion queues on this Infiniband device */
228 struct list_head list;
229 /** Completion queue number */
230 unsigned long cqn;
231 /** Number of completion queue entries */
232 unsigned int num_cqes;
233 /** Next completion queue entry index
234 *
235 * This is the index of the next entry to be filled (i.e. the
236 * first empty entry). This value is not bounded by num_wqes;
237 * users must logical-AND with (num_wqes-1) to generate an
238 * array index.
239 */
240 unsigned long next_idx;
241 /** List of work queues completing to this queue */
242 struct list_head work_queues;
243 /** Completion queue operations */
244 struct ib_completion_queue_operations *op;
245 /** Driver private data */
246 void *drv_priv;
247 };
248
249 /**
250 * Infiniband device operations
251 *
252 * These represent a subset of the Infiniband Verbs.
253 */
254 struct ib_device_operations {
255 /** Create completion queue
256 *
257 * @v ibdev Infiniband device
258 * @v cq Completion queue
259 * @ret rc Return status code
260 */
261 int ( * create_cq ) ( struct ib_device *ibdev,
262 struct ib_completion_queue *cq );
263 /** Destroy completion queue
264 *
265 * @v ibdev Infiniband device
266 * @v cq Completion queue
267 */
268 void ( * destroy_cq ) ( struct ib_device *ibdev,
269 struct ib_completion_queue *cq );
270 /** Create queue pair
271 *
272 * @v ibdev Infiniband device
273 * @v qp Queue pair
274 * @ret rc Return status code
275 */
276 int ( * create_qp ) ( struct ib_device *ibdev,
277 struct ib_queue_pair *qp );
278 /** Modify queue pair
279 *
280 * @v ibdev Infiniband device
281 * @v qp Queue pair
282 * @ret rc Return status code
283 */
284 int ( * modify_qp ) ( struct ib_device *ibdev,
285 struct ib_queue_pair *qp );
286 /** Destroy queue pair
287 *
288 * @v ibdev Infiniband device
289 * @v qp Queue pair
290 */
291 void ( * destroy_qp ) ( struct ib_device *ibdev,
292 struct ib_queue_pair *qp );
293 /** Post send work queue entry
294 *
295 * @v ibdev Infiniband device
296 * @v qp Queue pair
297 * @v dest Destination address vector
298 * @v iobuf I/O buffer
299 * @ret rc Return status code
300 *
301 * If this method returns success, the I/O buffer remains
302 * owned by the queue pair. If this method returns failure,
303 * the I/O buffer is immediately released; the failure is
304 * interpreted as "failure to enqueue buffer".
305 */
306 int ( * post_send ) ( struct ib_device *ibdev,
307 struct ib_queue_pair *qp,
308 struct ib_address_vector *dest,
309 struct io_buffer *iobuf );
310 /** Post receive work queue entry
311 *
312 * @v ibdev Infiniband device
313 * @v qp Queue pair
314 * @v iobuf I/O buffer
315 * @ret rc Return status code
316 *
317 * If this method returns success, the I/O buffer remains
318 * owned by the queue pair. If this method returns failure,
319 * the I/O buffer is immediately released; the failure is
320 * interpreted as "failure to enqueue buffer".
321 */
322 int ( * post_recv ) ( struct ib_device *ibdev,
323 struct ib_queue_pair *qp,
324 struct io_buffer *iobuf );
325 /** Poll completion queue
326 *
327 * @v ibdev Infiniband device
328 * @v cq Completion queue
329 *
330 * The relevant completion handler (specified at completion
331 * queue creation time) takes ownership of the I/O buffer.
332 */
333 void ( * poll_cq ) ( struct ib_device *ibdev,
334 struct ib_completion_queue *cq );
335 /**
336 * Poll event queue
337 *
338 * @v ibdev Infiniband device
339 */
340 void ( * poll_eq ) ( struct ib_device *ibdev );
341 /**
342 * Open port
343 *
344 * @v ibdev Infiniband device
345 * @ret rc Return status code
346 */
347 int ( * open ) ( struct ib_device *ibdev );
348 /**
349 * Close port
350 *
351 * @v ibdev Infiniband device
352 */
353 void ( * close ) ( struct ib_device *ibdev );
354 /** Attach to multicast group
355 *
356 * @v ibdev Infiniband device
357 * @v qp Queue pair
358 * @v gid Multicast GID
359 * @ret rc Return status code
360 */
361 int ( * mcast_attach ) ( struct ib_device *ibdev,
362 struct ib_queue_pair *qp,
363 union ib_gid *gid );
364 /** Detach from multicast group
365 *
366 * @v ibdev Infiniband device
367 * @v qp Queue pair
368 * @v gid Multicast GID
369 */
370 void ( * mcast_detach ) ( struct ib_device *ibdev,
371 struct ib_queue_pair *qp,
372 union ib_gid *gid );
373 /** Set port information
374 *
375 * @v ibdev Infiniband device
376 * @v mad Set port information MAD
377 *
378 * This method is required only by adapters that do not have
379 * an embedded SMA.
380 */
381 int ( * set_port_info ) ( struct ib_device *ibdev, union ib_mad *mad );
382 /** Set partition key table
383 *
384 * @v ibdev Infiniband device
385 * @v mad Set partition key table MAD
386 *
387 * This method is required only by adapters that do not have
388 * an embedded SMA.
389 */
390 int ( * set_pkey_table ) ( struct ib_device *ibdev,
391 union ib_mad *mad );
392 };
393
394 /** Maximum length of an Infiniband device name */
395 #define IBDEV_NAME_LEN 8
396
397 /** An Infiniband device */
398 struct ib_device {
399 /** Reference counter */
400 struct refcnt refcnt;
401 /** List of Infiniband devices */
402 struct list_head list;
403 /** List of open Infiniband devices */
404 struct list_head open_list;
405 /** Index of this Infiniband device */
406 unsigned int index;
407 /** Name of this Infiniband device */
408 char name[IBDEV_NAME_LEN];
409 /** Underlying device */
410 struct device *dev;
411 /** List of completion queues */
412 struct list_head cqs;
413 /** List of queue pairs */
414 struct list_head qps;
415 /** Infiniband operations */
416 struct ib_device_operations *op;
417 /** Port number */
418 unsigned int port;
419 /** Total ports on device */
420 unsigned int ports;
421 /** Port open request counter */
422 unsigned int open_count;
423
424 /** Port state */
425 uint8_t port_state;
426 /** Link width supported */
427 uint8_t link_width_supported;
428 /** Link width enabled */
429 uint8_t link_width_enabled;
430 /** Link width active */
431 uint8_t link_width_active;
432 /** Link speed supported */
433 uint8_t link_speed_supported;
434 /** Link speed enabled */
435 uint8_t link_speed_enabled;
436 /** Link speed active */
437 uint8_t link_speed_active;
438 /** Node GUID */
439 union ib_guid node_guid;
440 /** Port GID (comprising GID prefix and port GUID) */
441 union ib_gid gid;
442 /** Port LID */
443 uint16_t lid;
444 /** Subnet manager LID */
445 uint16_t sm_lid;
446 /** Subnet manager SL */
447 uint8_t sm_sl;
448 /** Partition key */
449 uint16_t pkey;
450
451 /** RDMA key
452 *
453 * This is a single key allowing unrestricted access to
454 * memory.
455 */
456 uint32_t rdma_key;
457
458 /** Subnet management interface */
459 struct ib_mad_interface *smi;
460 /** General services interface */
461 struct ib_mad_interface *gsi;
462
463 /** IPoIB LEMAC (if non-default) */
464 uint8_t lemac[ETH_ALEN];
465
466 /** Driver private data */
467 void *drv_priv;
468 };
469
470 /** An Infiniband upper-layer driver */
471 struct ib_driver {
472 /** Name */
473 const char *name;
474 /** Probe device
475 *
476 * @v ibdev Infiniband device
477 * @ret rc Return status code
478 */
479 int ( * probe ) ( struct ib_device *ibdev );
480 /** Notify of device or link state change
481 *
482 * @v ibdev Infiniband device
483 */
484 void ( * notify ) ( struct ib_device *ibdev );
485 /** Remove device
486 *
487 * @v ibdev Infiniband device
488 */
489 void ( * remove ) ( struct ib_device *ibdev );
490 };
491
492 /** Infiniband driver table */
493 #define IB_DRIVERS __table ( struct ib_driver, "ib_drivers" )
494
495 /** Declare an Infiniband driver */
496 #define __ib_driver __table_entry ( IB_DRIVERS, 01 )
497
498 extern int ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes,
499 struct ib_completion_queue_operations *op,
500 struct ib_completion_queue **new_cq );
501 extern void ib_destroy_cq ( struct ib_device *ibdev,
502 struct ib_completion_queue *cq );
503 extern void ib_poll_cq ( struct ib_device *ibdev,
504 struct ib_completion_queue *cq );
505 extern int ib_create_qp ( struct ib_device *ibdev, enum ib_queue_pair_type type,
506 unsigned int num_send_wqes,
507 struct ib_completion_queue *send_cq,
508 unsigned int num_recv_wqes,
509 struct ib_completion_queue *recv_cq,
510 struct ib_queue_pair_operations *op,
511 const char *name, struct ib_queue_pair **new_qp );
512 extern int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp );
513 extern void ib_destroy_qp ( struct ib_device *ibdev,
514 struct ib_queue_pair *qp );
515 extern struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev,
516 unsigned long qpn );
517 extern struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev,
518 union ib_gid *gid );
519 extern struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
520 unsigned long qpn, int is_send );
521 extern int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
522 struct ib_address_vector *dest,
523 struct io_buffer *iobuf );
524 extern int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
525 struct io_buffer *iobuf );
526 extern void ib_complete_send ( struct ib_device *ibdev,
527 struct ib_queue_pair *qp,
528 struct io_buffer *iobuf, int rc );
529 extern void ib_complete_recv ( struct ib_device *ibdev,
530 struct ib_queue_pair *qp,
531 struct ib_address_vector *dest,
532 struct ib_address_vector *source,
533 struct io_buffer *iobuf, int rc );
534 extern void ib_refill_recv ( struct ib_device *ibdev,
535 struct ib_queue_pair *qp );
536 extern int ib_open ( struct ib_device *ibdev );
537 extern void ib_close ( struct ib_device *ibdev );
538 extern int ib_link_rc ( struct ib_device *ibdev );
539 extern int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
540 union ib_gid *gid );
541 extern void ib_mcast_detach ( struct ib_device *ibdev,
542 struct ib_queue_pair *qp, union ib_gid *gid );
543 extern int ib_set_port_info ( struct ib_device *ibdev, union ib_mad *mad );
544 extern int ib_set_pkey_table ( struct ib_device *ibdev, union ib_mad *mad );
545 extern struct ib_device * alloc_ibdev ( size_t priv_size );
546 extern int register_ibdev ( struct ib_device *ibdev );
547 extern void unregister_ibdev ( struct ib_device *ibdev );
548 extern struct ib_device * find_ibdev ( union ib_gid *gid );
549 extern struct ib_device * last_opened_ibdev ( void );
550 extern void ib_link_state_changed ( struct ib_device *ibdev );
551 extern void ib_poll_eq ( struct ib_device *ibdev );
552 extern struct list_head ib_devices;
553
554 /** Iterate over all network devices */
555 #define for_each_ibdev( ibdev ) \
556 list_for_each_entry ( (ibdev), &ib_devices, list )
557
558 /**
559 * Check link state of Infiniband device
560 *
561 * @v ibdev Infiniband device
562 * @ret link_up Link is up
563 */
564 static inline __always_inline int
ib_link_ok(struct ib_device * ibdev)565 ib_link_ok ( struct ib_device *ibdev ) {
566 return ( ibdev->port_state == IB_PORT_STATE_ACTIVE );
567 }
568
569 /**
570 * Check whether or not Infiniband device is open
571 *
572 * @v ibdev Infiniband device
573 * @v is_open Infiniband device is open
574 */
575 static inline __attribute__ (( always_inline )) int
ib_is_open(struct ib_device * ibdev)576 ib_is_open ( struct ib_device *ibdev ) {
577 return ( ibdev->open_count > 0 );
578 }
579
580 /**
581 * Get reference to Infiniband device
582 *
583 * @v ibdev Infiniband device
584 * @ret ibdev Infiniband device
585 */
586 static inline __always_inline struct ib_device *
ibdev_get(struct ib_device * ibdev)587 ibdev_get ( struct ib_device *ibdev ) {
588 ref_get ( &ibdev->refcnt );
589 return ibdev;
590 }
591
592 /**
593 * Drop reference to Infiniband device
594 *
595 * @v ibdev Infiniband device
596 */
597 static inline __always_inline void
ibdev_put(struct ib_device * ibdev)598 ibdev_put ( struct ib_device *ibdev ) {
599 ref_put ( &ibdev->refcnt );
600 }
601
602 /**
603 * Set Infiniband work queue driver-private data
604 *
605 * @v wq Work queue
606 * @v priv Private data
607 */
608 static inline __always_inline void
ib_wq_set_drvdata(struct ib_work_queue * wq,void * priv)609 ib_wq_set_drvdata ( struct ib_work_queue *wq, void *priv ) {
610 wq->drv_priv = priv;
611 }
612
613 /**
614 * Get Infiniband work queue driver-private data
615 *
616 * @v wq Work queue
617 * @ret priv Private data
618 */
619 static inline __always_inline void *
ib_wq_get_drvdata(struct ib_work_queue * wq)620 ib_wq_get_drvdata ( struct ib_work_queue *wq ) {
621 return wq->drv_priv;
622 }
623
624 /**
625 * Set Infiniband queue pair driver-private data
626 *
627 * @v qp Queue pair
628 * @v priv Private data
629 */
630 static inline __always_inline void
ib_qp_set_drvdata(struct ib_queue_pair * qp,void * priv)631 ib_qp_set_drvdata ( struct ib_queue_pair *qp, void *priv ) {
632 qp->drv_priv = priv;
633 }
634
635 /**
636 * Get Infiniband queue pair driver-private data
637 *
638 * @v qp Queue pair
639 * @ret priv Private data
640 */
641 static inline __always_inline void *
ib_qp_get_drvdata(struct ib_queue_pair * qp)642 ib_qp_get_drvdata ( struct ib_queue_pair *qp ) {
643 return qp->drv_priv;
644 }
645
646 /**
647 * Set Infiniband queue pair owner-private data
648 *
649 * @v qp Queue pair
650 * @v priv Private data
651 */
652 static inline __always_inline void
ib_qp_set_ownerdata(struct ib_queue_pair * qp,void * priv)653 ib_qp_set_ownerdata ( struct ib_queue_pair *qp, void *priv ) {
654 qp->owner_priv = priv;
655 }
656
657 /**
658 * Get Infiniband queue pair owner-private data
659 *
660 * @v qp Queue pair
661 * @ret priv Private data
662 */
663 static inline __always_inline void *
ib_qp_get_ownerdata(struct ib_queue_pair * qp)664 ib_qp_get_ownerdata ( struct ib_queue_pair *qp ) {
665 return qp->owner_priv;
666 }
667
668 /**
669 * Set Infiniband completion queue driver-private data
670 *
671 * @v cq Completion queue
672 * @v priv Private data
673 */
674 static inline __always_inline void
ib_cq_set_drvdata(struct ib_completion_queue * cq,void * priv)675 ib_cq_set_drvdata ( struct ib_completion_queue *cq, void *priv ) {
676 cq->drv_priv = priv;
677 }
678
679 /**
680 * Get Infiniband completion queue driver-private data
681 *
682 * @v cq Completion queue
683 * @ret priv Private data
684 */
685 static inline __always_inline void *
ib_cq_get_drvdata(struct ib_completion_queue * cq)686 ib_cq_get_drvdata ( struct ib_completion_queue *cq ) {
687 return cq->drv_priv;
688 }
689
690 /**
691 * Set Infiniband device driver-private data
692 *
693 * @v ibdev Infiniband device
694 * @v priv Private data
695 */
696 static inline __always_inline void
ib_set_drvdata(struct ib_device * ibdev,void * priv)697 ib_set_drvdata ( struct ib_device *ibdev, void *priv ) {
698 ibdev->drv_priv = priv;
699 }
700
701 /**
702 * Get Infiniband device driver-private data
703 *
704 * @v ibdev Infiniband device
705 * @ret priv Private data
706 */
707 static inline __always_inline void *
ib_get_drvdata(struct ib_device * ibdev)708 ib_get_drvdata ( struct ib_device *ibdev ) {
709 return ibdev->drv_priv;
710 }
711
712 #endif /* _IPXE_INFINIBAND_H */
713