1/**
2 * Copyright (C) Mellanox Technologies Ltd. 2001-2016.  ALL RIGHTS RESERVED.
3 *
4 * See file LICENSE for terms.
5 */
6
7#include "ib_mlx5.h"
8
9
10static UCS_F_ALWAYS_INLINE UCS_F_NON_NULL struct mlx5_cqe64*
11uct_ib_mlx5_get_cqe(uct_ib_mlx5_cq_t *cq,  unsigned cqe_index)
12{
13    return UCS_PTR_BYTE_OFFSET(cq->cq_buf, ((cqe_index & (cq->cq_length - 1)) <<
14                                            cq->cqe_size_log));
15}
16
17static UCS_F_ALWAYS_INLINE int
18uct_ib_mlx5_cqe_is_hw_owned(uint8_t op_own, unsigned cqe_index, unsigned mask)
19{
20    return (op_own & MLX5_CQE_OWNER_MASK) == !(cqe_index & mask);
21}
22
23static UCS_F_ALWAYS_INLINE int
24uct_ib_mlx5_cqe_stride_index(struct mlx5_cqe64* cqe)
25{
26#ifdef HAVE_STRUCT_MLX5_CQE64_IB_STRIDE_INDEX
27    return ntohs(cqe->ib_stride_index);
28#else
29    uint16_t *stride = (uint16_t*)&cqe->rsvd20[2];
30    return ntohs(*stride);
31#endif
32}
33
34static UCS_F_ALWAYS_INLINE int uct_ib_mlx5_srq_stride(int num_sge)
35{
36    int stride;
37
38    stride = sizeof(struct mlx5_wqe_srq_next_seg) +
39             (num_sge * sizeof(struct mlx5_wqe_data_seg));
40
41    return ucs_roundup_pow2(stride);
42}
43
44static UCS_F_ALWAYS_INLINE int
45uct_ib_mlx5_srq_max_wrs(int rxq_len, int num_sge)
46{
47    return ucs_max(rxq_len / num_sge, UCT_IB_MLX5_XRQ_MIN_UWQ_POST);
48}
49
50static UCS_F_ALWAYS_INLINE int
51uct_ib_mlx5_cqe_is_grh_present(struct mlx5_cqe64* cqe)
52{
53    return cqe->flags_rqpn & htonl(UCT_IB_MLX5_CQE_FLAG_L3_IN_DATA |
54                                   UCT_IB_MLX5_CQE_FLAG_L3_IN_CQE);
55}
56
57static UCS_F_ALWAYS_INLINE void*
58uct_ib_mlx5_gid_from_cqe(struct mlx5_cqe64* cqe)
59{
60    ucs_assert(uct_ib_mlx5_cqe_is_grh_present(cqe) ==
61               htonl(UCT_IB_MLX5_CQE_FLAG_L3_IN_CQE)); /* GRH is in CQE */
62    return UCS_PTR_BYTE_OFFSET(cqe, -UCT_IB_GRH_LEN);
63}
64
65static UCS_F_ALWAYS_INLINE struct mlx5_cqe64*
66uct_ib_mlx5_poll_cq(uct_ib_iface_t *iface, uct_ib_mlx5_cq_t *cq)
67{
68    struct mlx5_cqe64 *cqe;
69    unsigned cqe_index;
70    uint8_t op_own;
71
72    cqe_index = cq->cq_ci;
73    cqe       = uct_ib_mlx5_get_cqe(cq, cqe_index);
74    op_own    = cqe->op_own;
75
76    if (ucs_unlikely(uct_ib_mlx5_cqe_is_hw_owned(op_own, cqe_index, cq->cq_length))) {
77        return NULL;
78    } else if (ucs_unlikely(op_own & UCT_IB_MLX5_CQE_OP_OWN_ERR_MASK)) {
79        UCS_STATIC_ASSERT(MLX5_CQE_INVALID & (UCT_IB_MLX5_CQE_OP_OWN_ERR_MASK >> 4));
80        ucs_assert((op_own >> 4) != MLX5_CQE_INVALID);
81        uct_ib_mlx5_check_completion(iface, cq, cqe);
82        return NULL; /* No CQE */
83    }
84
85    cq->cq_ci = cqe_index + 1;
86    return cqe;
87}
88
89
90static UCS_F_ALWAYS_INLINE uint16_t
91uct_ib_mlx5_txwq_update_bb(uct_ib_mlx5_txwq_t *wq, uint16_t hw_ci)
92{
93#if UCS_ENABLE_ASSERT
94    wq->hw_ci = hw_ci;
95#endif
96    return wq->bb_max - (wq->prev_sw_pi - hw_ci);
97}
98
99
100/* check that work queue has enough space for the new work request */
101static inline void
102uct_ib_mlx5_txwq_validate(uct_ib_mlx5_txwq_t *wq, uint16_t num_bb)
103{
104
105#if UCS_ENABLE_ASSERT
106    uint16_t wqe_s, wqe_e;
107    uint16_t hw_ci, sw_pi;
108    uint16_t wqe_cnt;
109    int is_ok = 1;
110
111    if (wq->hw_ci == 0xFFFF) {
112        return;
113    }
114
115    wqe_cnt = UCS_PTR_BYTE_DIFF(wq->qstart, wq->qend) / MLX5_SEND_WQE_BB;
116    if (wqe_cnt < wq->bb_max) {
117        ucs_fatal("wqe count (%u) < bb_max (%u)", wqe_cnt, wq->bb_max);
118    }
119
120    wqe_s = UCS_PTR_BYTE_DIFF(wq->qstart, wq->curr) / MLX5_SEND_WQE_BB;
121    wqe_e = (wqe_s + num_bb) % wqe_cnt;
122
123    sw_pi = wq->prev_sw_pi % wqe_cnt;
124    hw_ci = wq->hw_ci % wqe_cnt;
125
126    if (hw_ci <= sw_pi) {
127        if (hw_ci <= wqe_s && wqe_s <= sw_pi) {
128            is_ok = 0;
129        }
130        if (hw_ci <= wqe_e && wqe_e <= sw_pi) {
131            is_ok = 0;
132        }
133    }
134    else {
135        if (!(sw_pi < wqe_s && wqe_s < hw_ci)) {
136            is_ok = 0;
137        }
138        if (!(sw_pi < wqe_e && wqe_e < hw_ci)) {
139            is_ok = 0;
140        }
141    }
142    if (!is_ok) {
143        ucs_fatal("tx wq overrun: hw_ci: %u sw_pi: %u cur: %u-%u num_bb: %u wqe_cnt: %u",
144                hw_ci, sw_pi, wqe_s, wqe_e, num_bb, wqe_cnt);
145    }
146#endif
147}
148
149
150/**
151 * Copy data to inline segment, taking into account QP wrap-around.
152 *
153 * @param dest    Inline data in the WQE to copy to.
154 * @param src     Data to copy.
155 * @param length  Data length.
156 *
157 */
158static UCS_F_ALWAYS_INLINE void
159uct_ib_mlx5_inline_copy(void *restrict dest, const void *restrict src, unsigned
160                        length, uct_ib_mlx5_txwq_t *wq)
161{
162    ptrdiff_t n;
163
164    ucs_assert(dest != NULL);
165    ucs_assert((src != NULL) || (length == 0));
166
167    if (UCS_PTR_BYTE_OFFSET(dest, length) <= wq->qend) {
168        /* cppcheck-suppress nullPointer */
169        memcpy(dest, src, length);
170    } else {
171        n = UCS_PTR_BYTE_DIFF(dest, wq->qend);
172        memcpy(dest, src, n);
173        memcpy(wq->qstart, UCS_PTR_BYTE_OFFSET(src, n), length - n);
174    }
175}
176
177
178/* wrapping of 'seg' should not happen */
179static UCS_F_ALWAYS_INLINE void*
180uct_ib_mlx5_txwq_wrap_none(uct_ib_mlx5_txwq_t *txwq, void *seg)
181{
182    ucs_assertv(((unsigned long)seg % UCT_IB_MLX5_WQE_SEG_SIZE) == 0, "seg=%p", seg);
183    ucs_assertv(seg >= txwq->qstart, "seg=%p qstart=%p", seg, txwq->qstart);
184    ucs_assertv(seg <  txwq->qend,   "seg=%p qend=%p",   seg, txwq->qend);
185    return seg;
186}
187
188
189/* wrapping of 'seg' could happen, but only on exact 'qend' boundary */
190static UCS_F_ALWAYS_INLINE void *
191uct_ib_mlx5_txwq_wrap_exact(uct_ib_mlx5_txwq_t *txwq, void *seg)
192{
193    ucs_assert(seg <= txwq->qend);
194    if (ucs_unlikely(seg == txwq->qend)) {
195        seg = txwq->qstart;
196    }
197    return uct_ib_mlx5_txwq_wrap_none(txwq, seg);
198}
199
200
201/* wrapping of 'seg' could happen, even past 'qend' boundary */
202static UCS_F_ALWAYS_INLINE void *
203uct_ib_mlx5_txwq_wrap_any(uct_ib_mlx5_txwq_t *txwq, void *seg)
204{
205    if (ucs_unlikely(seg >= txwq->qend)) {
206        seg = UCS_PTR_BYTE_OFFSET(seg, -UCS_PTR_BYTE_DIFF(txwq->qstart,
207                                                          txwq->qend));
208    }
209    return uct_ib_mlx5_txwq_wrap_none(txwq, seg);
210}
211
212
213/* Wrapping of 'data' could happen, even past 'qend' boundary.
214 * Do not check for alignment. */
215static UCS_F_ALWAYS_INLINE void *
216uct_ib_mlx5_txwq_wrap_data(uct_ib_mlx5_txwq_t *txwq, void *data)
217{
218    if (ucs_unlikely(data >= txwq->qend)) {
219        data = UCS_PTR_BYTE_OFFSET(data, -UCS_PTR_BYTE_DIFF(txwq->qstart,
220                                                            txwq->qend));
221    }
222    return data;
223}
224
225
226static UCS_F_ALWAYS_INLINE void
227uct_ib_mlx5_ep_set_rdma_seg(struct mlx5_wqe_raddr_seg *raddr, uint64_t rdma_raddr,
228                            uct_rkey_t rdma_rkey)
229{
230#if defined(__SSE4_2__)
231    *(__m128i*)raddr = _mm_shuffle_epi8(
232                _mm_set_epi64x(rdma_rkey, rdma_raddr),
233                _mm_set_epi8(0, 0, 0, 0,            /* reserved */
234                             8, 9, 10, 11,          /* rkey */
235                             0, 1, 2, 3, 4, 5, 6, 7 /* rdma_raddr */
236                             ));
237#elif defined(__ARM_NEON)
238    uint8x16_t table =  {7,  6,  5, 4, 3, 2, 1, 0, /* rdma_raddr */
239                         11, 10, 9, 8,             /* rkey */
240                         16,16,16,16};             /* reserved (set 0) */
241    uint64x2_t data = {rdma_raddr, rdma_rkey};
242    *(uint8x16_t *)raddr = vqtbl1q_u8((uint8x16_t)data, table);
243#else
244    raddr->raddr = htobe64(rdma_raddr);
245    raddr->rkey  = htonl(rdma_rkey);
246#endif
247}
248
249
250static UCS_F_ALWAYS_INLINE void
251uct_ib_mlx5_set_dgram_seg(struct mlx5_wqe_datagram_seg *seg,
252                          uct_ib_mlx5_base_av_t *av, struct mlx5_grh_av *grh_av,
253                          int qp_type)
254{
255    if (qp_type == IBV_QPT_UD) {
256        mlx5_av_base(&seg->av)->key.qkey.qkey  = htonl(UCT_IB_KEY);
257#if HAVE_TL_DC
258    } else if (qp_type == UCT_IB_QPT_DCI) {
259        mlx5_av_base(&seg->av)->key.dc_key     = htobe64(UCT_IB_KEY);
260#endif
261    }
262    ucs_assert(av != NULL);
263    /* cppcheck-suppress ctunullpointer */
264    mlx5_av_base(&seg->av)->dqp_dct            = av->dqp_dct;
265    mlx5_av_base(&seg->av)->stat_rate_sl       = av->stat_rate_sl;
266    mlx5_av_base(&seg->av)->fl_mlid            = av->fl_mlid;
267    mlx5_av_base(&seg->av)->rlid               = av->rlid;
268
269    if (grh_av) {
270        ucs_assert(av->dqp_dct & UCT_IB_MLX5_EXTENDED_UD_AV);
271#if HAVE_STRUCT_MLX5_GRH_AV_RMAC
272        memcpy(mlx5_av_grh(&seg->av)->rmac, grh_av->rmac,
273               sizeof(mlx5_av_grh(&seg->av)->rmac));
274#endif
275        mlx5_av_grh(&seg->av)->tclass      = grh_av->tclass;
276        mlx5_av_grh(&seg->av)->hop_limit   = grh_av->hop_limit;
277        mlx5_av_grh(&seg->av)->grh_gid_fl  = grh_av->grh_gid_fl;
278        memcpy(mlx5_av_grh(&seg->av)->rgid, grh_av->rgid,
279               sizeof(mlx5_av_grh(&seg->av)->rgid));
280    } else if (av->dqp_dct & UCT_IB_MLX5_EXTENDED_UD_AV) {
281        mlx5_av_grh(&seg->av)->grh_gid_fl  = 0;
282    }
283}
284
285
286static UCS_F_ALWAYS_INLINE void
287uct_ib_mlx5_set_ctrl_seg(struct mlx5_wqe_ctrl_seg* ctrl, uint16_t pi,
288                         uint8_t opcode, uint8_t opmod, uint32_t qp_num,
289                         uint8_t fm_ce_se, unsigned wqe_size)
290{
291    uint8_t ds;
292
293    ucs_assert(((unsigned long)ctrl % UCT_IB_MLX5_WQE_SEG_SIZE) == 0);
294    ds = ucs_div_round_up(wqe_size, UCT_IB_MLX5_WQE_SEG_SIZE);
295#if defined(__SSE4_2__)
296    *(__m128i *) ctrl = _mm_shuffle_epi8(
297                    _mm_set_epi32(qp_num, ds, pi,
298                                  (opcode << 16) | (opmod << 8) | fm_ce_se), /* OR of constants */
299                    _mm_set_epi8(0, 0, 0, 0, /* immediate */
300                                 0,          /* signal/fence_mode */
301                                 0, 0,       /* reserved */
302                                 0,          /* signature */
303                                 8,          /* data size */
304                                 12, 13, 14, /* QP num */
305                                 2,          /* opcode */
306                                 4, 5,       /* sw_pi in BE */
307                                 1           /* opmod */
308                                 ));
309#elif defined(__ARM_NEON)
310    uint8x16_t table = {1,               /* opmod */
311                        5,  4,           /* sw_pi in BE */
312                        2,               /* opcode */
313                        14, 13, 12,      /* QP num */
314                        8,               /* data size */
315                        16,              /* signature (set 0) */
316                        16, 16,          /* reserved (set 0) */
317                        0,               /* signal/fence_mode */
318                        16, 16, 16, 16}; /* immediate (set to 0)*/
319    uint32x4_t data = {(opcode << 16) | (opmod << 8) | (uint32_t)fm_ce_se,
320                       pi, ds, qp_num};
321    *(uint8x16_t *)ctrl = vqtbl1q_u8((uint8x16_t)data, table);
322#else
323    ctrl->opmod_idx_opcode = (opcode << 24) | (htons(pi) << 8) | opmod;
324    ctrl->qpn_ds           = htonl((qp_num << 8) | ds);
325    ctrl->fm_ce_se         = fm_ce_se;
326#endif
327}
328
329
330static UCS_F_ALWAYS_INLINE void
331uct_ib_mlx5_set_ctrl_seg_with_imm(struct mlx5_wqe_ctrl_seg* ctrl, uint16_t pi,
332                                  uint8_t opcode, uint8_t opmod, uint32_t qp_num,
333                                  uint8_t fm_ce_se, unsigned wqe_size, uint32_t imm)
334{
335    uint8_t ds;
336
337    ucs_assert(((unsigned long)ctrl % UCT_IB_MLX5_WQE_SEG_SIZE) == 0);
338    ds = ucs_div_round_up(wqe_size, UCT_IB_MLX5_WQE_SEG_SIZE);
339#if defined(__SSE4_2__)
340    *(__m128i *) ctrl = _mm_shuffle_epi8(
341                    _mm_set_epi32(qp_num, imm, (ds << 16) | pi,
342                                  (opcode << 16) | (opmod << 8) | fm_ce_se), /* OR of constants */
343                    _mm_set_epi8(11, 10, 9, 8, /* immediate */
344                                 0,            /* signal/fence_mode */
345                                 0, 0,         /* reserved */
346                                 0,            /* signature */
347                                 6,            /* data size */
348                                 12, 13, 14,   /* QP num */
349                                 2,            /* opcode */
350                                 4, 5,         /* sw_pi in BE */
351                                 1             /* opmod */
352                                 ));
353#elif defined(__ARM_NEON)
354    uint8x16_t table = {1,               /* opmod */
355                        5,  4,           /* sw_pi in BE */
356                        2,               /* opcode */
357                        14, 13, 12,      /* QP num */
358                        6,               /* data size */
359                        16,              /* signature (set 0) */
360                        16, 16,          /* reserved (set 0) */
361                        0,               /* signal/fence_mode */
362                        8, 9, 10, 11}; /* immediate (set to 0)*/
363    uint32x4_t data = {(opcode << 16) | (opmod << 8) | (uint32_t)fm_ce_se,
364                       (ds << 16) | pi, imm,  qp_num};
365    *(uint8x16_t *)ctrl = vqtbl1q_u8((uint8x16_t)data, table);
366#else
367    ctrl->opmod_idx_opcode = (opcode << 24) | (htons(pi) << 8) | opmod;
368    ctrl->qpn_ds           = htonl((qp_num << 8) | ds);
369    ctrl->fm_ce_se         = fm_ce_se;
370    ctrl->imm              = imm;
371#endif
372}
373
374
375static UCS_F_ALWAYS_INLINE void
376uct_ib_mlx5_set_data_seg(struct mlx5_wqe_data_seg *dptr,
377                         const void *address,
378                         unsigned length, uint32_t lkey)
379{
380    ucs_assert(((unsigned long)dptr % UCT_IB_MLX5_WQE_SEG_SIZE) == 0);
381    dptr->byte_count = htonl(length);
382    dptr->lkey       = htonl(lkey);
383    dptr->addr       = htobe64((uintptr_t)address);
384}
385
386
387static UCS_F_ALWAYS_INLINE
388size_t uct_ib_mlx5_set_data_seg_iov(uct_ib_mlx5_txwq_t *txwq,
389                                    struct mlx5_wqe_data_seg *dptr,
390                                    const uct_iov_t *iov, size_t iovcnt)
391{
392    size_t wqe_size = 0;
393    size_t iov_it;
394
395    for (iov_it = 0; iov_it < iovcnt; ++iov_it) {
396        if (!iov[iov_it].length) { /* Skip zero length WQE*/
397            continue;
398        }
399        ucs_assert(iov[iov_it].memh != UCT_MEM_HANDLE_NULL);
400
401        /* place data into the buffer */
402        dptr = uct_ib_mlx5_txwq_wrap_any(txwq, dptr);
403        uct_ib_mlx5_set_data_seg(dptr, iov[iov_it].buffer,
404                                 uct_iov_get_length(iov + iov_it),
405                                 uct_ib_memh_get_lkey(iov[iov_it].memh));
406        wqe_size += sizeof(*dptr);
407        ++dptr;
408    }
409
410    return wqe_size;
411}
412
413
414static UCS_F_ALWAYS_INLINE void uct_ib_mlx5_bf_copy_bb(void * restrict dst,
415                                                       void * restrict src)
416{
417#if defined( __SSE4_2__)
418    UCS_WORD_COPY(__m128i, dst, __m128i, src, MLX5_SEND_WQE_BB);
419#elif defined(__ARM_NEON)
420    UCS_WORD_COPY(int16x8_t, dst, int16x8_t, src, MLX5_SEND_WQE_BB);
421#else /* NO SIMD support */
422    UCS_WORD_COPY(uint64_t, dst, uint64_t, src, MLX5_SEND_WQE_BB);
423#endif
424}
425
426static UCS_F_ALWAYS_INLINE
427void *uct_ib_mlx5_bf_copy(void *dst, void *src, uint16_t num_bb,
428                          const uct_ib_mlx5_txwq_t *wq)
429{
430    uint16_t n;
431
432    for (n = 0; n < num_bb; ++n) {
433        uct_ib_mlx5_bf_copy_bb(dst, src);
434        dst = UCS_PTR_BYTE_OFFSET(dst, MLX5_SEND_WQE_BB);
435        src = UCS_PTR_BYTE_OFFSET(src, MLX5_SEND_WQE_BB);
436        if (ucs_unlikely(src == wq->qend)) {
437            src = wq->qstart;
438        }
439    }
440    return src;
441}
442
443static UCS_F_ALWAYS_INLINE uint16_t
444uct_ib_mlx5_post_send(uct_ib_mlx5_txwq_t *wq,
445                      struct mlx5_wqe_ctrl_seg *ctrl, unsigned wqe_size)
446{
447    uint16_t sw_pi, num_bb, res_count;
448    void *src, *dst;
449
450    ucs_assert(((unsigned long)ctrl % UCT_IB_MLX5_WQE_SEG_SIZE) == 0);
451    num_bb  = ucs_div_round_up(wqe_size, MLX5_SEND_WQE_BB);
452    sw_pi   = wq->sw_pi;
453
454    uct_ib_mlx5_txwq_validate(wq, num_bb);
455    /* TODO Put memory store fence here too, to prevent WC being flushed after DBrec */
456    ucs_memory_cpu_store_fence();
457
458    /* Write doorbell record */
459    *wq->dbrec = htonl(sw_pi += num_bb);
460
461    /* Make sure that doorbell record is written before ringing the doorbell */
462    ucs_memory_bus_store_fence();
463
464    /* Set up copy pointers */
465    dst = wq->reg->addr.ptr;
466    src = ctrl;
467
468    ucs_assert(wqe_size <= UCT_IB_MLX5_BF_REG_SIZE);
469    ucs_assert(num_bb <= UCT_IB_MLX5_MAX_BB);
470    if (ucs_likely(wq->reg->mode == UCT_IB_MLX5_MMIO_MODE_BF_POST)) {
471        src = uct_ib_mlx5_bf_copy(dst, src, num_bb, wq);
472        ucs_memory_bus_cacheline_wc_flush();
473    } else if (wq->reg->mode == UCT_IB_MLX5_MMIO_MODE_BF_POST_MT) {
474        src = uct_ib_mlx5_bf_copy(dst, src, num_bb, wq);
475        /* Make sure that HW observes WC writes in order, in case of multiple
476         * threads which use the same BF register in a serialized way
477         */
478        ucs_memory_cpu_wc_fence();
479    } else {
480        ucs_assert(wq->reg->mode == UCT_IB_MLX5_MMIO_MODE_DB);
481        *(volatile uint64_t*)dst = *(volatile uint64_t*)src;
482        ucs_memory_bus_store_fence();
483        src = UCS_PTR_BYTE_OFFSET(src, num_bb * MLX5_SEND_WQE_BB);
484        src = uct_ib_mlx5_txwq_wrap_any(wq, src);
485    }
486
487    /* We don't want the compiler to reorder instructions and hurt latency */
488    ucs_compiler_fence();
489
490    /*
491     * Advance queue pointer.
492     * We return the number of BBs the *previous* WQE has consumed, since CQEs
493     * are reporting the index of the first BB rather than the last. We have
494     * reserved QP space for at least UCT_IB_MLX5_MAX_BB to accommodate.
495     * */
496    ucs_assert(ctrl == wq->curr);
497    res_count       = wq->sw_pi - wq->prev_sw_pi;
498    wq->curr        = src;
499    wq->prev_sw_pi += res_count;
500    ucs_assert(wq->prev_sw_pi == wq->sw_pi);
501    wq->sw_pi       = sw_pi;
502
503    /* Flip BF register */
504    wq->reg->addr.uint ^= UCT_IB_MLX5_BF_REG_SIZE;
505    return res_count;
506}
507
508
509static inline uct_ib_mlx5_srq_seg_t *
510uct_ib_mlx5_srq_get_wqe(uct_ib_mlx5_srq_t *srq, uint16_t wqe_index)
511{
512    return UCS_PTR_BYTE_OFFSET(srq->buf, (wqe_index & srq->mask) * srq->stride);
513}
514
515static ucs_status_t UCS_F_MAYBE_UNUSED
516uct_ib_mlx5_iface_fill_attr(uct_ib_iface_t *iface,
517                            uct_ib_mlx5_qp_t *qp,
518                            uct_ib_mlx5_qp_attr_t *attr)
519{
520    ucs_status_t status;
521
522    status = uct_ib_mlx5_iface_get_res_domain(iface, qp);
523    if (status != UCS_OK) {
524        return status;
525    }
526
527#if HAVE_DECL_IBV_EXP_CREATE_QP
528    attr->super.ibv.comp_mask       = IBV_EXP_QP_INIT_ATTR_PD;
529    attr->super.ibv.pd              = uct_ib_iface_md(iface)->pd;
530#elif HAVE_DECL_IBV_CREATE_QP_EX
531    attr->super.ibv.comp_mask       = IBV_QP_INIT_ATTR_PD;
532    if (qp->verbs.rd->pd != NULL) {
533        attr->super.ibv.pd          = qp->verbs.rd->pd;
534    } else {
535        attr->super.ibv.pd          = uct_ib_iface_md(iface)->pd;
536    }
537#endif
538
539#ifdef HAVE_IBV_EXP_RES_DOMAIN
540    attr->super.ibv.comp_mask      |= IBV_EXP_QP_INIT_ATTR_RES_DOMAIN;
541    attr->super.ibv.res_domain      = qp->verbs.rd->ibv_domain;
542#endif
543
544    return UCS_OK;
545}
546
547static void UCS_F_MAYBE_UNUSED
548uct_ib_mlx5_destroy_qp(uct_ib_mlx5_md_t *md, uct_ib_mlx5_qp_t *qp)
549{
550    switch (qp->type) {
551    case UCT_IB_MLX5_OBJ_TYPE_VERBS:
552        uct_ib_destroy_qp(qp->verbs.qp);
553        break;
554    case UCT_IB_MLX5_OBJ_TYPE_DEVX:
555        uct_ib_mlx5_devx_destroy_qp(md, qp);
556        break;
557    case UCT_IB_MLX5_OBJ_TYPE_LAST:
558        break;
559    }
560}
561