1 /**
2 * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED.
3 *
4 * See file LICENSE for terms.
5 */
6
7 #ifndef UCT_RC_MLX5_COMMON_H
8 #define UCT_RC_MLX5_COMMON_H
9
10 #include <uct/ib/base/ib_device.h>
11 #include <uct/ib/rc/base/rc_iface.h>
12 #include <uct/ib/rc/base/rc_ep.h>
13 #include <uct/ib/mlx5/ib_mlx5.h>
14
15
16 /*
17 * HW tag matching
18 */
19 #if IBV_HW_TM
20 # define UCT_RC_RNDV_HDR_LEN sizeof(struct ibv_rvh)
21 #else
22 # define UCT_RC_RNDV_HDR_LEN 0
23 #endif
24
25 #if IBV_HW_TM
26 # if HAVE_INFINIBAND_TM_TYPES_H
27 # include <infiniband/tm_types.h>
28 # else
29 # define ibv_tmh ibv_exp_tmh
30 # define ibv_rvh ibv_exp_tmh_rvh
31 # define IBV_TM_CAP_RC IBV_EXP_TM_CAP_RC
32 # define IBV_TMH_EAGER IBV_EXP_TMH_EAGER
33 # define IBV_TMH_RNDV IBV_EXP_TMH_RNDV
34 # define IBV_TMH_FIN IBV_EXP_TMH_FIN
35 # define IBV_TMH_NO_TAG IBV_EXP_TMH_NO_TAG
36 # endif
37 # define IBV_DEVICE_TM_CAPS(_dev, _field) ((_dev)->dev_attr.tm_caps._field)
38 #else
39 # define IBV_TM_CAP_RC 0
40 # define IBV_DEVICE_TM_CAPS(_dev, _field) 0
41 #endif
42
43 #if HAVE_STRUCT_IBV_TM_CAPS_FLAGS
44 # define IBV_DEVICE_TM_FLAGS(_dev) IBV_DEVICE_TM_CAPS(_dev, flags)
45 #else
46 # define IBV_DEVICE_TM_FLAGS(_dev) IBV_DEVICE_TM_CAPS(_dev, capability_flags)
47 #endif
48
49 #define IBV_DEVICE_MAX_UNEXP_COUNT UCS_BIT(14)
50
51 #if HAVE_DECL_IBV_EXP_CREATE_SRQ
52 # define ibv_srq_init_attr_ex ibv_exp_create_srq_attr
53 #endif
54
55 #define UCT_RC_MLX5_OPCODE_FLAG_RAW 0x100
56 #define UCT_RC_MLX5_OPCODE_FLAG_TM 0x200
57 #define UCT_RC_MLX5_OPCODE_MASK 0xff
58 #define UCT_RC_MLX5_SINGLE_FRAG_MSG(_flags) \
59 (((_flags) & UCT_CB_PARAM_FLAG_FIRST) && !((_flags) & UCT_CB_PARAM_FLAG_MORE))
60
61 #define UCT_RC_MLX5_CHECK_AM_ZCOPY(_id, _header_length, _length, _seg_size, _av_size) \
62 UCT_CHECK_AM_ID(_id); \
63 UCT_RC_CHECK_ZCOPY_DATA(_header_length, _length, _seg_size) \
64 UCT_CHECK_LENGTH(sizeof(uct_rc_mlx5_hdr_t) + _header_length, 0, \
65 UCT_IB_MLX5_AM_ZCOPY_MAX_HDR(_av_size), "am_zcopy header");
66
67
68 #define UCT_RC_MLX5_CHECK_AM_SHORT(_id, _length, _av_size) \
69 UCT_CHECK_AM_ID(_id); \
70 UCT_CHECK_LENGTH(sizeof(uct_rc_mlx5_am_short_hdr_t) + _length, 0, \
71 UCT_IB_MLX5_AM_MAX_SHORT(_av_size), "am_short");
72
73
74 /* there is no need to do a special check for length == 0 because in that
75 * case wqe size is valid: inl + raddr + dgram + ctrl fit in 2 WQ BB
76 */
77 #define UCT_RC_MLX5_CHECK_PUT_SHORT(_length, _av_size) \
78 UCT_CHECK_LENGTH(_length, 0, UCT_IB_MLX5_PUT_MAX_SHORT(_av_size), "put_short")
79
80 #define UCT_RC_MLX5_ATOMIC_OPS (UCS_BIT(UCT_ATOMIC_OP_ADD) | \
81 UCS_BIT(UCT_ATOMIC_OP_AND) | \
82 UCS_BIT(UCT_ATOMIC_OP_OR) | \
83 UCS_BIT(UCT_ATOMIC_OP_XOR))
84
85 #define UCT_RC_MLX5_ATOMIC_FOPS (UCT_RC_MLX5_ATOMIC_OPS | UCS_BIT(UCT_ATOMIC_OP_SWAP))
86
87 #define UCT_RC_MLX5_CHECK_ATOMIC_OPS(_op, _size, _flags) \
88 if (ucs_unlikely(!(UCS_BIT(_op) & (_flags)))) { \
89 ucs_assertv(0, "incorrect opcode for atomic: %d", _op); \
90 return UCS_ERR_UNSUPPORTED; \
91 } else { \
92 ucs_assert((_size == sizeof(uint64_t)) || (_size == sizeof(uint32_t))); \
93 }
94
95 #define UCT_RC_MLX5_TO_BE(_val, _size) \
96 ((_size) == sizeof(uint64_t) ? htobe64(_val) : htobe32(_val))
97
98 #define UCT_RC_MLX5_DECLARE_ATOMIC_LE_HANDLER(_bits) \
99 void \
100 uct_rc_mlx5_common_atomic##_bits##_le_handler(uct_rc_iface_send_op_t *op, \
101 const void *resp);
102
103 UCT_RC_MLX5_DECLARE_ATOMIC_LE_HANDLER(32)
104 UCT_RC_MLX5_DECLARE_ATOMIC_LE_HANDLER(64)
105
106 enum {
107 UCT_RC_MLX5_IFACE_STAT_RX_INL_32,
108 UCT_RC_MLX5_IFACE_STAT_RX_INL_64,
109 UCT_RC_MLX5_IFACE_STAT_LAST
110 };
111
112 enum {
113 UCT_RC_MLX5_TM_OPCODE_NOP = 0x00,
114 UCT_RC_MLX5_TM_OPCODE_APPEND = 0x01,
115 UCT_RC_MLX5_TM_OPCODE_REMOVE = 0x02
116 };
117
118 /* TODO: Remove/replace this enum when mlx5dv.h is included */
119 enum {
120 UCT_RC_MLX5_OPCODE_TAG_MATCHING = 0x28,
121 UCT_RC_MLX5_CQE_APP_TAG_MATCHING = 1,
122
123 /* last packet flag for multi-packet RQs */
124 UCT_RC_MLX5_MP_RQ_LAST_MSG_FIELD = 0x40000000,
125
126 /* byte count mask for multi-packet RQs */
127 UCT_RC_MLX5_MP_RQ_BYTE_CNT_FIELD_MASK = 0x0000FFFF,
128
129 UCT_RC_MLX5_MP_RQ_NUM_STRIDES_FIELD_MASK = 0x3FFF0000,
130
131 /* filler cqe indicator */
132 UCT_RC_MLX5_MP_RQ_FILLER_CQE = UCS_BIT(31),
133
134 /* tag segment flags */
135 UCT_RC_MLX5_SRQ_FLAG_TM_SW_CNT = (1 << 6),
136 UCT_RC_MLX5_SRQ_FLAG_TM_CQE_REQ = (1 << 7),
137
138 /* tag CQE codes */
139 UCT_RC_MLX5_CQE_APP_OP_TM_CONSUMED = 0x1,
140 UCT_RC_MLX5_CQE_APP_OP_TM_EXPECTED = 0x2,
141 UCT_RC_MLX5_CQE_APP_OP_TM_UNEXPECTED = 0x3,
142 UCT_RC_MLX5_CQE_APP_OP_TM_NO_TAG = 0x4,
143 UCT_RC_MLX5_CQE_APP_OP_TM_APPEND = 0x5,
144 UCT_RC_MLX5_CQE_APP_OP_TM_REMOVE = 0x6,
145 UCT_RC_MLX5_CQE_APP_OP_TM_CONSUMED_MSG = 0xA
146 };
147
148 enum {
149 UCT_RC_MLX5_POLL_FLAG_TM = UCS_BIT(0),
150 UCT_RC_MLX5_POLL_FLAG_HAS_EP = UCS_BIT(1),
151 UCT_RC_MLX5_POLL_FLAG_TAG_CQE = UCS_BIT(2)
152 };
153
154
155 #define UCT_RC_MLX5_RMA_MAX_IOV(_av_size) \
156 ((UCT_IB_MLX5_MAX_SEND_WQE_SIZE - ((_av_size) + \
157 sizeof(struct mlx5_wqe_raddr_seg) + sizeof(struct mlx5_wqe_ctrl_seg))) / \
158 sizeof(struct mlx5_wqe_data_seg))
159
160
161 #if IBV_HW_TM
162 # define UCT_RC_MLX5_TM_EAGER_ZCOPY_MAX_IOV(_av_size) \
163 (UCT_IB_MLX5_AM_MAX_SHORT(_av_size + sizeof(struct ibv_tmh))/ \
164 sizeof(struct mlx5_wqe_data_seg))
165 #else
166 # define UCT_RC_MLX5_TM_EAGER_ZCOPY_MAX_IOV(_av_size) 0
167 #endif /* IBV_HW_TM */
168
169
170 #define UCT_RC_MLX5_TM_CQE_WITH_IMM(_cqe64) \
171 (((_cqe64)->op_own >> 4) == MLX5_CQE_RESP_SEND_IMM)
172
173
174 #define UCT_RC_MLX5_TM_IS_SW_RNDV(_cqe64, _imm_data) \
175 (ucs_unlikely(UCT_RC_MLX5_TM_CQE_WITH_IMM(_cqe64) && !(_imm_data)))
176
177
178 #define UCT_RC_MLX5_CHECK_TAG(_mlx5_common_iface) \
179 if (ucs_unlikely((_mlx5_common_iface)->tm.head->next == NULL)) { \
180 return UCS_ERR_EXCEEDS_LIMIT; \
181 }
182
183
184 typedef struct uct_rc_mlx5_hdr {
185 uint8_t tmh_opcode; /* TMH.opcode */
186 uct_rc_hdr_t rc_hdr;
187 } UCS_S_PACKED uct_rc_mlx5_hdr_t;
188
189 /*
190 * Short active message header (active message header is always 64 bit).
191 */
192 typedef struct uct_rc_mlx5_am_short_hdr {
193 uct_rc_mlx5_hdr_t rc_hdr;
194 uint64_t am_hdr;
195 } UCS_S_PACKED uct_rc_mlx5_am_short_hdr_t;
196
197
198 /* TODO: Remove this struct when mlx5dv.h is included! */
199 typedef struct uct_rc_mlx5_wqe_tm_seg {
200 uint8_t opcode;
201 uint8_t flags;
202 uint16_t index;
203 uint8_t rsvd0[2];
204 uint16_t sw_cnt;
205 uint8_t rsvd1[8];
206 uint64_t append_tag;
207 uint64_t append_mask;
208 } uct_rc_mlx5_wqe_tm_seg_t;
209
210
211 /* Tag matching list entry */
212 typedef struct uct_rc_mlx5_tag_entry {
213 struct uct_rc_mlx5_tag_entry *next;
214 uct_tag_context_t *ctx; /* the corresponding UCT context */
215 unsigned num_cqes; /* how many CQEs is expected for this entry */
216 } uct_rc_mlx5_tag_entry_t;
217
218
219 /* Pending operation on the command QP */
220 typedef struct uct_rc_mlx5_srq_op {
221 uct_rc_mlx5_tag_entry_t *tag;
222 } uct_rc_mlx5_srq_op_t;
223
224
225 /* Command QP work-queue. All tag matching list operations are posted on it. */
226 typedef struct uct_rc_mlx5_cmd_wq {
227 uct_ib_mlx5_txwq_t super;
228 uct_rc_mlx5_srq_op_t *ops; /* array of operations on command QP */
229 int ops_head; /* points to the next operation to be completed */
230 int ops_tail; /* points to the last adde operation*/
231 int ops_mask; /* mask which bounds head and tail by
232 ops array size */
233 } uct_rc_mlx5_cmd_wq_t;
234
235
236 /* Message context used with multi-packet XRQ */
237 typedef struct uct_rc_mlx5_mp_context {
238 /* Storage for a per-message user-defined context. Must be passed unchanged
239 * to the user in uct_tag_unexp_eager_cb_t. */
240 void *context;
241
242 /* Tag is saved when first fragment (with TMH) arrives and then passed to
243 * the eager unexpected callback for subsequent fragments. */
244 uct_tag_t tag;
245
246 /* With MP XRQ immediate value is delivered with the last fragment, while
247 * TMH is present in the first fragment only. Need to save app_context
248 * from TMH in this field and construct immediate data for unexpected
249 * eager callback when the last message fragment arrives. */
250 uint32_t app_ctx;
251
252 /* Used when local EP can be found by sender QP number (rc_mlx5 tl).
253 * When 0, it means that tag eager unexpected multi-fragmented message
254 * is being processed (not all fragments are delivered to the user via
255 * uct_tag_unexp_eager_cb_t callback yet). Otherwise, any incoming tag
256 * eager message should be either a single fragment message or the first
257 * fragment of multi-fragmeneted message. */
258 uint8_t free;
259 } uct_rc_mlx5_mp_context_t;
260
261
262 typedef struct uct_rc_mlx5_mp_hash_key {
263 uint64_t guid;
264 uint32_t qp_num;
265 } uct_rc_mlx5_mp_hash_key_t;
266
267
268 static UCS_F_ALWAYS_INLINE int
uct_rc_mlx5_mp_hash_equal(uct_rc_mlx5_mp_hash_key_t key1,uct_rc_mlx5_mp_hash_key_t key2)269 uct_rc_mlx5_mp_hash_equal(uct_rc_mlx5_mp_hash_key_t key1,
270 uct_rc_mlx5_mp_hash_key_t key2)
271 {
272 return (key1.qp_num == key2.qp_num) && (key1.guid == key2.guid);
273 }
274
275
276 static UCS_F_ALWAYS_INLINE khint32_t
uct_rc_mlx5_mp_hash_func(uct_rc_mlx5_mp_hash_key_t key)277 uct_rc_mlx5_mp_hash_func(uct_rc_mlx5_mp_hash_key_t key)
278 {
279 return kh_int64_hash_func(key.guid ^ key.qp_num);
280 }
281
282
283 KHASH_MAP_INIT_INT64(uct_rc_mlx5_mp_hash_lid, uct_rc_mlx5_mp_context_t);
284
285
286 KHASH_INIT(uct_rc_mlx5_mp_hash_gid, uct_rc_mlx5_mp_hash_key_t,
287 uct_rc_mlx5_mp_context_t, 1, uct_rc_mlx5_mp_hash_func,
288 uct_rc_mlx5_mp_hash_equal);
289
290
291 #if IBV_HW_TM
292 # define UCT_RC_MLX5_IFACE_GET_TM_BCOPY_DESC(_iface, _mp, _desc, _tag, _app_ctx, \
293 _pack_cb, _arg, _length) \
294 { \
295 void *hdr; \
296 UCT_RC_IFACE_GET_TX_DESC(_iface, _mp, _desc) \
297 (_desc)->super.handler = (uct_rc_send_handler_t)ucs_mpool_put; \
298 hdr = (_desc) + 1; \
299 uct_rc_mlx5_fill_tmh(hdr, _tag, _app_ctx, IBV_TMH_EAGER); \
300 hdr = UCS_PTR_BYTE_OFFSET(hdr, sizeof(struct ibv_tmh)); \
301 _length = _pack_cb(hdr, _arg); \
302 }
303 #endif
304
305 enum {
306 UCT_RC_MLX5_STAT_TAG_RX_EXP,
307 UCT_RC_MLX5_STAT_TAG_RX_EAGER_UNEXP,
308 UCT_RC_MLX5_STAT_TAG_RX_RNDV_UNEXP,
309 UCT_RC_MLX5_STAT_TAG_RX_RNDV_REQ_EXP,
310 UCT_RC_MLX5_STAT_TAG_RX_RNDV_REQ_UNEXP,
311 UCT_RC_MLX5_STAT_TAG_RX_RNDV_FIN,
312 UCT_RC_MLX5_STAT_TAG_LIST_ADD,
313 UCT_RC_MLX5_STAT_TAG_LIST_DEL,
314 UCT_RC_MLX5_STAT_TAG_LIST_SYNC,
315 UCT_RC_MLX5_STAT_TAG_LAST
316 };
317
318 typedef struct uct_rc_mlx5_tmh_priv_data {
319 uint8_t length;
320 uint16_t data;
321 } UCS_S_PACKED uct_rc_mlx5_tmh_priv_data_t;
322
323 void uct_rc_mlx5_release_desc(uct_recv_desc_t *self, void *desc);
324
325 typedef struct uct_rc_mlx5_release_desc {
326 uct_recv_desc_t super;
327 unsigned offset;
328 } uct_rc_mlx5_release_desc_t;
329
330
331 typedef struct uct_rc_mlx5_ctx_priv {
332 uint64_t tag;
333 void *buffer;
334 uint32_t app_ctx;
335 uint32_t length;
336 uint32_t tag_handle;
337 } uct_rc_mlx5_ctx_priv_t;
338
339 #if HAVE_IBV_DM
340 typedef struct uct_mlx5_dm_data {
341 uct_worker_tl_data_t super;
342 ucs_mpool_t mp;
343 struct ibv_mr *mr;
344 struct ibv_dm *dm;
345 void *start_va;
346 size_t seg_len;
347 unsigned seg_count;
348 unsigned seg_attached;
349 uct_ib_device_t *device;
350 } uct_mlx5_dm_data_t;
351
352 typedef union uct_rc_mlx5_dm_copy_data {
353 uct_rc_mlx5_am_short_hdr_t am_hdr;
354 struct ibv_tmh tm_hdr;
355 char bytes[sizeof(uint64_t) * 2];
356 } UCS_S_PACKED uct_rc_mlx5_dm_copy_data_t;
357 #endif
358
359 #define uct_rc_mlx5_tag_addr_hash(_ptr) kh_int64_hash_func((uintptr_t)(_ptr))
360 KHASH_INIT(uct_rc_mlx5_tag_addrs, void*, char, 0, uct_rc_mlx5_tag_addr_hash,
361 kh_int64_hash_equal)
362
363 typedef struct uct_rc_mlx5_iface_common {
364 uct_rc_iface_t super;
365 struct {
366 ucs_mpool_t atomic_desc_mp;
367 uct_ib_mlx5_mmio_mode_t mmio_mode;
368 uint16_t bb_max; /* limit number of outstanding WQE BBs */
369 } tx;
370 struct {
371 uct_ib_mlx5_srq_t srq;
372 void *pref_ptr;
373 } rx;
374 uct_ib_mlx5_cq_t cq[UCT_IB_DIR_NUM];
375 struct {
376 uct_rc_mlx5_cmd_wq_t cmd_wq;
377 uct_rc_mlx5_tag_entry_t *head;
378 uct_rc_mlx5_tag_entry_t *tail;
379 uct_rc_mlx5_tag_entry_t *list;
380 ucs_mpool_t *bcopy_mp;
381 khash_t(uct_rc_mlx5_tag_addrs) tag_addrs;
382
383 ucs_ptr_array_t rndv_comps;
384 size_t max_bcopy;
385 size_t max_zcopy;
386 unsigned num_tags;
387 unsigned num_outstanding;
388 unsigned max_rndv_data;
389 uint16_t unexpected_cnt;
390 uint16_t cmd_qp_len;
391 uint8_t enabled;
392 struct {
393 uint8_t num_strides;
394 ucs_mpool_t tx_mp;
395 uct_rc_mlx5_mp_context_t last_frag_ctx;
396 khash_t(uct_rc_mlx5_mp_hash_lid) hash_lid;
397 khash_t(uct_rc_mlx5_mp_hash_gid) hash_gid;
398 } mp;
399 struct {
400 void *arg; /* User defined arg */
401 uct_tag_unexp_eager_cb_t cb; /* Callback for unexpected eager messages */
402 } eager_unexp;
403
404 struct {
405 void *arg; /* User defined arg */
406 uct_tag_unexp_rndv_cb_t cb; /* Callback for unexpected rndv messages */
407 } rndv_unexp;
408 uct_rc_mlx5_release_desc_t eager_desc;
409 uct_rc_mlx5_release_desc_t rndv_desc;
410 uct_rc_mlx5_release_desc_t am_desc;
411 UCS_STATS_NODE_DECLARE(stats)
412 } tm;
413 #if HAVE_IBV_DM
414 struct {
415 uct_mlx5_dm_data_t *dm;
416 size_t seg_len; /* cached value to avoid double-pointer access */
417 ucs_status_t (*am_short)(uct_ep_h tl_ep, uint8_t id, uint64_t hdr,
418 const void *payload, unsigned length);
419 ucs_status_t (*tag_short)(uct_ep_h tl_ep, uct_tag_t tag,
420 const void *data, size_t length);
421 } dm;
422 #endif
423 #if HAVE_DECL_MLX5DV_DEVX_SUBSCRIBE_DEVX_EVENT
424 struct mlx5dv_devx_event_channel *event_channel;
425 #endif
426 struct {
427 uint8_t atomic_fence_flag;
428 ucs_ternary_value_t cyclic_srq_enable;
429 } config;
430 UCS_STATS_NODE_DECLARE(stats)
431 } uct_rc_mlx5_iface_common_t;
432
433 /**
434 * Common RC/DC mlx5 interface configuration
435 */
436 typedef struct uct_rc_mlx5_iface_common_config {
437 uct_ib_mlx5_iface_config_t super;
438 unsigned tx_max_bb;
439 struct {
440 int enable;
441 unsigned list_size;
442 size_t seg_size;
443 ucs_ternary_value_t mp_enable;
444 size_t mp_num_strides;
445 } tm;
446 unsigned exp_backoff;
447 ucs_ternary_value_t cyclic_srq_enable;
448 } uct_rc_mlx5_iface_common_config_t;
449
450
451 UCS_CLASS_DECLARE(uct_rc_mlx5_iface_common_t,
452 uct_rc_iface_ops_t*,
453 uct_md_h, uct_worker_h,
454 const uct_iface_params_t*,
455 uct_rc_iface_common_config_t*,
456 uct_rc_mlx5_iface_common_config_t*,
457 uct_ib_iface_init_attr_t*);
458
459
460 #define UCT_RC_MLX5_TM_STAT(_iface, _op) \
461 UCS_STATS_UPDATE_COUNTER((_iface)->tm.stats, UCT_RC_MLX5_STAT_TAG_##_op, 1)
462
463 #define UCT_RC_MLX5_TM_ENABLED(_iface) (_iface)->tm.enabled
464
465 #define UCT_RC_MLX5_MP_ENABLED(_iface) ((_iface)->tm.mp.num_strides > 1)
466
467 /* TMH can carry 2 bytes of data in its reserved filed */
468 #define UCT_RC_MLX5_TMH_PRIV_LEN ucs_field_sizeof(uct_rc_mlx5_tmh_priv_data_t, \
469 data)
470
471 #define UCT_RC_MLX5_CHECK_RNDV_PARAMS(_iovcnt, _header_len, _tm_len, \
472 _max_inline, _max_rndv_hdr) \
473 { \
474 UCT_CHECK_PARAM_PTR(_iovcnt <= 1ul, "Wrong iovcnt %lu", iovcnt); \
475 UCT_CHECK_PARAM_PTR(_header_len <= _max_rndv_hdr, \
476 "Invalid header len %u", _header_len); \
477 UCT_CHECK_PARAM_PTR((_header_len + _tm_len) <= _max_inline, \
478 "Invalid RTS len gth %u", \
479 _header_len + _tm_len); \
480 }
481
482 #define UCT_RC_MLX5_FILL_TM_IMM(_imm_data, _app_ctx, _ib_imm, _res_op, \
483 _op, _imm_suffix) \
484 if (_imm_data == 0) { \
485 _res_op = _op; \
486 _app_ctx = 0; \
487 _ib_imm = 0; \
488 } else { \
489 _res_op = UCS_PP_TOKENPASTE(_op, _imm_suffix); \
490 uct_rc_mlx5_tag_imm_data_pack(&(_ib_imm), &(_app_ctx), _imm_data); \
491 }
492
493 #define UCT_RC_MLX5_GET_TX_TM_DESC(_iface, _mp, _desc, _tag, _app_ctx, _hdr) \
494 { \
495 UCT_RC_IFACE_GET_TX_DESC(_iface, _mp, _desc) \
496 _hdr = _desc + 1; \
497 uct_rc_mlx5_fill_tmh(_hdr, _tag, _app_ctx, IBV_EXP_TMH_EAGER); \
498 _hdr += sizeof(struct ibv_tmh); \
499 }
500
501 #define UCT_RC_MLX5_GET_TM_BCOPY_DESC(_iface, _mp, _desc, _tag, _app_ctx, \
502 _pack_cb, _arg, _length) \
503 { \
504 void *hdr; \
505 UCT_RC_MLX5_GET_TX_TM_DESC(_iface, _mp, _desc, _tag, _app_ctx, hdr) \
506 (_desc)->super.handler = (uct_rc_send_handler_t)ucs_mpool_put; \
507 _length = _pack_cb(hdr, _arg); \
508 }
509
510 #if IBV_HW_TM
511 void uct_rc_mlx5_handle_unexp_rndv(uct_rc_mlx5_iface_common_t *iface,
512 struct ibv_tmh *tmh, uct_tag_t tag,
513 struct mlx5_cqe64 *cqe, unsigned flags,
514 unsigned byte_len);
515
516
517 static UCS_F_ALWAYS_INLINE void
uct_rc_mlx5_fill_tmh(struct ibv_tmh * tmh,uct_tag_t tag,uint32_t app_ctx,unsigned op)518 uct_rc_mlx5_fill_tmh(struct ibv_tmh *tmh, uct_tag_t tag,
519 uint32_t app_ctx, unsigned op)
520 {
521 tmh->opcode = op;
522 tmh->app_ctx = app_ctx;
523 tmh->tag = tag;
524 }
525
526 static UCS_F_ALWAYS_INLINE void
uct_rc_mlx5_fill_rvh(struct ibv_rvh * rvh,const void * vaddr,uint32_t rkey,uint32_t len)527 uct_rc_mlx5_fill_rvh(struct ibv_rvh *rvh, const void *vaddr,
528 uint32_t rkey, uint32_t len)
529 {
530 rvh->va = htobe64((uint64_t)vaddr);
531 rvh->rkey = htonl(rkey);
532 rvh->len = htonl(len);
533 }
534
535 static UCS_F_ALWAYS_INLINE unsigned
uct_rc_mlx5_tag_get_op_id(uct_rc_mlx5_iface_common_t * iface,uct_completion_t * comp)536 uct_rc_mlx5_tag_get_op_id(uct_rc_mlx5_iface_common_t *iface, uct_completion_t *comp)
537 {
538 return ucs_ptr_array_insert(&iface->tm.rndv_comps, comp);
539 }
540
541
542 static UCS_F_ALWAYS_INLINE unsigned
uct_rc_mlx5_fill_tmh_priv_data(struct ibv_tmh * tmh,const void * hdr,unsigned hdr_len,unsigned max_rndv_priv_data)543 uct_rc_mlx5_fill_tmh_priv_data(struct ibv_tmh *tmh, const void *hdr,
544 unsigned hdr_len, unsigned max_rndv_priv_data)
545 {
546 uct_rc_mlx5_tmh_priv_data_t *priv = (uct_rc_mlx5_tmh_priv_data_t*)tmh->reserved;
547
548 /* If header length is bigger tha max_rndv_priv_data size, need to add the
549 * rest to the TMH reserved field. */
550 if (hdr_len > max_rndv_priv_data) {
551 priv->length = hdr_len - max_rndv_priv_data;
552 ucs_assert(priv->length <= UCT_RC_MLX5_TMH_PRIV_LEN);
553 memcpy(&priv->data, (char*)hdr, priv->length);
554 } else {
555 priv->length = 0;
556 }
557
558 return priv->length;
559 }
560 #endif
561
562 static UCS_F_ALWAYS_INLINE void
uct_rc_mlx5_tag_imm_data_pack(uint32_t * ib_imm,uint32_t * app_ctx,uint64_t imm_val)563 uct_rc_mlx5_tag_imm_data_pack(uint32_t *ib_imm, uint32_t *app_ctx,
564 uint64_t imm_val)
565 {
566 *ib_imm = (uint32_t)(imm_val & 0xFFFFFFFF);
567 *app_ctx = (uint32_t)(imm_val >> 32);
568 }
569
570 static UCS_F_ALWAYS_INLINE uint64_t
uct_rc_mlx5_tag_imm_data_unpack(uint32_t ib_imm,uint32_t app_ctx,int is_imm)571 uct_rc_mlx5_tag_imm_data_unpack(uint32_t ib_imm, uint32_t app_ctx, int is_imm)
572 {
573 return is_imm ? (((uint64_t)app_ctx << 32) | ib_imm) : 0ul;
574 }
575
576 static UCS_F_ALWAYS_INLINE uct_rc_mlx5_ctx_priv_t*
uct_rc_mlx5_ctx_priv(uct_tag_context_t * ctx)577 uct_rc_mlx5_ctx_priv(uct_tag_context_t *ctx)
578 {
579 return (uct_rc_mlx5_ctx_priv_t*)ctx->priv;
580 }
581
582 static UCS_F_ALWAYS_INLINE void
uct_rc_mlx5_handle_rndv_fin(uct_rc_mlx5_iface_common_t * iface,uint32_t app_ctx)583 uct_rc_mlx5_handle_rndv_fin(uct_rc_mlx5_iface_common_t *iface, uint32_t app_ctx)
584 {
585 void *rndv_comp = NULL;
586 int found;
587
588 found = ucs_ptr_array_lookup(&iface->tm.rndv_comps, app_ctx, rndv_comp);
589 ucs_assert_always(found > 0);
590 uct_invoke_completion((uct_completion_t*)rndv_comp, UCS_OK);
591 ucs_ptr_array_remove(&iface->tm.rndv_comps, app_ctx);
592 }
593
594 extern ucs_config_field_t uct_rc_mlx5_common_config_table[];
595
596 unsigned uct_rc_mlx5_iface_srq_post_recv(uct_rc_mlx5_iface_common_t *iface);
597
598 void uct_rc_mlx5_iface_common_prepost_recvs(uct_rc_mlx5_iface_common_t *iface);
599
600 ucs_status_t uct_rc_mlx5_iface_common_init(uct_rc_mlx5_iface_common_t *iface,
601 uct_rc_iface_t *rc_iface,
602 const uct_rc_iface_config_t *config,
603 const uct_ib_mlx5_iface_config_t *mlx5_config);
604
605 void uct_rc_mlx5_iface_common_cleanup(uct_rc_mlx5_iface_common_t *iface);
606
607 ucs_status_t uct_rc_mlx5_iface_common_dm_init(uct_rc_mlx5_iface_common_t *iface,
608 uct_rc_iface_t *rc_iface,
609 const uct_ib_mlx5_iface_config_t *mlx5_config);
610
611 void uct_rc_mlx5_iface_common_dm_cleanup(uct_rc_mlx5_iface_common_t *iface);
612
613 void uct_rc_mlx5_iface_common_query(uct_ib_iface_t *ib_iface,
614 uct_iface_attr_t *iface_attr,
615 size_t max_inline, size_t max_tag_eager_iov);
616
617 void uct_rc_mlx5_iface_common_update_cqs_ci(uct_rc_mlx5_iface_common_t *iface,
618 uct_ib_iface_t *ib_iface);
619
620 void uct_rc_mlx5_iface_common_sync_cqs_ci(uct_rc_mlx5_iface_common_t *iface,
621 uct_ib_iface_t *ib_iface);
622
623 int uct_rc_mlx5_iface_commom_clean(uct_ib_mlx5_cq_t *mlx5_cq,
624 uct_ib_mlx5_srq_t *srq, uint32_t qpn);
625
626 static UCS_F_MAYBE_UNUSED void
uct_rc_mlx5_iface_tm_set_cmd_qp_len(uct_rc_mlx5_iface_common_t * iface)627 uct_rc_mlx5_iface_tm_set_cmd_qp_len(uct_rc_mlx5_iface_common_t *iface)
628 {
629 /* 2 ops for each tag (ADD + DEL) and extra ops for SYNC. */
630 iface->tm.cmd_qp_len = (2 * iface->tm.num_tags) + 2;
631 }
632
633 #if IBV_HW_TM
634 void uct_rc_mlx5_init_rx_tm_common(uct_rc_mlx5_iface_common_t *iface,
635 const uct_rc_iface_common_config_t *config,
636 unsigned rndv_hdr_len);
637
638 ucs_status_t uct_rc_mlx5_init_rx_tm(uct_rc_mlx5_iface_common_t *iface,
639 const uct_rc_iface_common_config_t *config,
640 struct ibv_srq_init_attr_ex *srq_init_attr,
641 unsigned rndv_hdr_len);
642 #else
643 static UCS_F_MAYBE_UNUSED ucs_status_t
uct_rc_mlx5_init_rx_tm(uct_rc_mlx5_iface_common_t * iface,const uct_rc_iface_common_config_t * config,struct ibv_srq_init_attr_ex * srq_init_attr,unsigned rndv_hdr_len)644 uct_rc_mlx5_init_rx_tm(uct_rc_mlx5_iface_common_t *iface,
645 const uct_rc_iface_common_config_t *config,
646 struct ibv_srq_init_attr_ex *srq_init_attr,
647 unsigned rndv_hdr_len)
648 {
649 return UCS_ERR_UNSUPPORTED;
650 }
651 #endif
652
653 #if IBV_HW_TM && HAVE_DEVX
654 ucs_status_t uct_rc_mlx5_devx_init_rx_tm(uct_rc_mlx5_iface_common_t *iface,
655 const uct_rc_iface_common_config_t *config,
656 int dc, unsigned rndv_hdr_len);
657 #else
658 static UCS_F_MAYBE_UNUSED ucs_status_t
uct_rc_mlx5_devx_init_rx_tm(uct_rc_mlx5_iface_common_t * iface,const uct_rc_iface_common_config_t * config,int dc,unsigned rndv_hdr_len)659 uct_rc_mlx5_devx_init_rx_tm(uct_rc_mlx5_iface_common_t *iface,
660 const uct_rc_iface_common_config_t *config,
661 int dc, unsigned rndv_hdr_len)
662 {
663 return UCS_ERR_UNSUPPORTED;
664 }
665 #endif
666
667 #if HAVE_DEVX
668 ucs_status_t uct_rc_mlx5_devx_init_rx(uct_rc_mlx5_iface_common_t *iface,
669 const uct_rc_iface_common_config_t *config);
670
671 void uct_rc_mlx5_devx_cleanup_srq(uct_ib_mlx5_md_t *md, uct_ib_mlx5_srq_t *srq);
672 #else
673 static UCS_F_MAYBE_UNUSED ucs_status_t
uct_rc_mlx5_devx_init_rx(uct_rc_mlx5_iface_common_t * iface,const uct_rc_iface_common_config_t * config)674 uct_rc_mlx5_devx_init_rx(uct_rc_mlx5_iface_common_t *iface,
675 const uct_rc_iface_common_config_t *config)
676 {
677 return UCS_ERR_UNSUPPORTED;
678 }
679
680 static UCS_F_MAYBE_UNUSED void
uct_rc_mlx5_devx_cleanup_srq(uct_ib_mlx5_md_t * md,uct_ib_mlx5_srq_t * srq)681 uct_rc_mlx5_devx_cleanup_srq(uct_ib_mlx5_md_t *md, uct_ib_mlx5_srq_t *srq)
682 {
683 ucs_bug("DevX SRQ cleanup has to be done only if DevX support is enabled");
684 }
685 #endif
686
687 void uct_rc_mlx5_tag_cleanup(uct_rc_mlx5_iface_common_t *iface);
688
689 ucs_status_t uct_rc_mlx5_iface_common_tag_init(uct_rc_mlx5_iface_common_t *iface);
690
691 void uct_rc_mlx5_iface_common_tag_cleanup(uct_rc_mlx5_iface_common_t *iface);
692
693 ucs_status_t uct_rc_mlx5_ep_tag_rndv_cancel(uct_ep_h tl_ep, void *op);
694
695 void uct_rc_mlx5_common_packet_dump(uct_base_iface_t *iface, uct_am_trace_type_t type,
696 void *data, size_t length, size_t valid_length,
697 char *buffer, size_t max);
698
699 static UCS_F_ALWAYS_INLINE void
uct_rc_mlx5_am_hdr_fill(uct_rc_mlx5_hdr_t * rch,uint8_t id)700 uct_rc_mlx5_am_hdr_fill(uct_rc_mlx5_hdr_t *rch, uint8_t id)
701 {
702 #if IBV_HW_TM
703 rch->tmh_opcode = IBV_TMH_NO_TAG;
704 #endif
705 rch->rc_hdr.am_id = id;
706 }
707
708 #if HAVE_DEVX
709 ucs_status_t
710 uct_rc_mlx5_iface_common_devx_connect_qp(uct_rc_mlx5_iface_common_t *iface,
711 uct_ib_mlx5_qp_t *qp,
712 uint32_t dest_qp_num,
713 struct ibv_ah_attr *ah_attr,
714 enum ibv_mtu path_mtu);
715
716 #else
717 static UCS_F_MAYBE_UNUSED ucs_status_t
uct_rc_mlx5_iface_common_devx_connect_qp(uct_rc_mlx5_iface_common_t * iface,uct_ib_mlx5_qp_t * qp,uint32_t dest_qp_num,struct ibv_ah_attr * ah_attr,enum ibv_mtu path_mtu)718 uct_rc_mlx5_iface_common_devx_connect_qp(uct_rc_mlx5_iface_common_t *iface,
719 uct_ib_mlx5_qp_t *qp,
720 uint32_t dest_qp_num,
721 struct ibv_ah_attr *ah_attr,
722 enum ibv_mtu path_mtu)
723 {
724 return UCS_ERR_UNSUPPORTED;
725 }
726 #endif
727
728 ucs_status_t uct_rc_mlx5_devx_iface_init_events(uct_rc_mlx5_iface_common_t *iface);
729
730 void uct_rc_mlx5_devx_iface_free_events(uct_rc_mlx5_iface_common_t *iface);
731
732 ucs_status_t uct_rc_mlx5_devx_iface_subscribe_event(uct_rc_mlx5_iface_common_t *iface,
733 uct_ib_mlx5_qp_t *qp,
734 unsigned event_num,
735 enum ibv_event_type event_type,
736 unsigned event_data);
737
738 void uct_rc_mlx5_iface_fill_attr(uct_rc_mlx5_iface_common_t *iface,
739 uct_ib_mlx5_qp_attr_t *qp_attr,
740 unsigned max_send_wr,
741 uct_ib_mlx5_srq_t *srq);
742
743 ucs_status_t
744 uct_rc_mlx5_common_iface_init_rx(uct_rc_mlx5_iface_common_t *iface,
745 const uct_rc_iface_common_config_t *rc_config);
746
747 void uct_rc_mlx5_destroy_srq(uct_ib_mlx5_md_t *md, uct_ib_mlx5_srq_t *srq);
748
749 #endif
750