1 /**
2 * Copyright (C) Mellanox Technologies Ltd. 2001-2014.  ALL RIGHTS RESERVED.
3 *
4 * See file LICENSE for terms.
5 */
6 
7 #ifndef UCT_RC_EP_H
8 #define UCT_RC_EP_H
9 
10 #include "rc_iface.h"
11 
12 #include <uct/api/uct.h>
13 #include <ucs/debug/debug.h>
14 
15 
16 #define RC_UNSIGNALED_INF UINT16_MAX
17 
18 enum {
19     UCT_RC_FC_STAT_NO_CRED,
20     UCT_RC_FC_STAT_TX_GRANT,
21     UCT_RC_FC_STAT_TX_PURE_GRANT,
22     UCT_RC_FC_STAT_TX_SOFT_REQ,
23     UCT_RC_FC_STAT_TX_HARD_REQ,
24     UCT_RC_FC_STAT_RX_GRANT,
25     UCT_RC_FC_STAT_RX_PURE_GRANT,
26     UCT_RC_FC_STAT_RX_SOFT_REQ,
27     UCT_RC_FC_STAT_RX_HARD_REQ,
28     UCT_RC_FC_STAT_FC_WND,
29     UCT_RC_FC_STAT_LAST
30 };
31 
32 enum {
33     UCT_RC_TXQP_STAT_QP_FULL,
34     UCT_RC_TXQP_STAT_SIGNAL,
35     UCT_RC_TXQP_STAT_LAST
36 };
37 
38 /*
39  * Auxillary AM ID bits used by FC protocol.
40  */
41 enum {
42     /* Soft Credit Request: indicates that peer needs to piggy-back credits
43      * grant to counter AM (if any). Can be bundled with
44      * UCT_RC_EP_FC_FLAG_GRANT  */
45     UCT_RC_EP_FC_FLAG_SOFT_REQ  = UCS_BIT(UCT_AM_ID_BITS),
46 
47     /* Hard Credit Request: indicates that wnd is close to be exhausted.
48      * The peer must send separate AM with credit grant as soon as it
49      * receives AM  with this bit set. Can be bundled with
50      * UCT_RC_EP_FC_FLAG_GRANT */
51     UCT_RC_EP_FC_FLAG_HARD_REQ  = UCS_BIT((UCT_AM_ID_BITS) + 1),
52 
53     /* Credit Grant: ep should update its FC wnd as soon as it receives AM with
54      * this bit set. Can be bundled with either soft or hard request bits */
55     UCT_RC_EP_FC_FLAG_GRANT     = UCS_BIT((UCT_AM_ID_BITS) + 2),
56 
57     /* Special FC AM with Credit Grant: Just an empty message indicating
58      * credit grant. Can't be bundled with any other FC flag (as it consumes
59      * all 3 FC bits). */
60     UCT_RC_EP_FC_PURE_GRANT     = (UCT_RC_EP_FC_FLAG_HARD_REQ |
61                                    UCT_RC_EP_FC_FLAG_SOFT_REQ |
62                                    UCT_RC_EP_FC_FLAG_GRANT)
63 };
64 
65 /*
66  * FC protocol header mask
67  */
68 #define UCT_RC_EP_FC_MASK UCT_RC_EP_FC_PURE_GRANT
69 
70 /*
71  * Macro to generate functions for AMO completions.
72  */
73 #define UCT_RC_DEFINE_ATOMIC_HANDLER_FUNC_NAME(_num_bits, _is_be) \
74     uct_rc_ep_atomic_handler_##_num_bits##_be##_is_be
75 
76 /*
77  * Check for send resources
78  */
79 #define UCT_RC_CHECK_CQE_RET(_iface, _ep, _ret) \
80     /* tx_moderation == 0 for TLs which don't support it */ \
81     if (ucs_unlikely((_iface)->tx.cq_available <= \
82         (signed)(_iface)->config.tx_moderation)) { \
83         if (uct_rc_ep_check_cqe(_iface, _ep) != UCS_OK) { \
84             return _ret; \
85         } \
86     }
87 
88 #define UCT_RC_CHECK_TXQP_RET(_iface, _ep, _ret) \
89     if (uct_rc_txqp_available(&(_ep)->txqp) <= 0) { \
90         UCS_STATS_UPDATE_COUNTER((_ep)->txqp.stats, UCT_RC_TXQP_STAT_QP_FULL, 1); \
91         UCS_STATS_UPDATE_COUNTER((_ep)->super.stats, UCT_EP_STAT_NO_RES, 1); \
92         return _ret; \
93     }
94 
95 #define UCT_RC_CHECK_NUM_RDMA_READ(_iface) \
96     if (ucs_unlikely((_iface)->tx.reads_available <= 0)) { \
97         UCS_STATS_UPDATE_COUNTER((_iface)->stats, \
98                                  UCT_RC_IFACE_STAT_NO_READS, 1); \
99         return UCS_ERR_NO_RESOURCE; \
100     }
101 
102 #define UCT_RC_RDMA_READ_POSTED(_iface, _length) \
103     { \
104         ucs_assert((_iface)->tx.reads_available > 0); \
105         (_iface)->tx.reads_available -= (_length); \
106     }
107 
108 #define UCT_RC_CHECK_RES(_iface, _ep) \
109     UCT_RC_CHECK_CQE_RET(_iface, _ep, UCS_ERR_NO_RESOURCE) \
110     UCT_RC_CHECK_TXQP_RET(_iface, _ep, UCS_ERR_NO_RESOURCE)
111 
112 /**
113  * All RMA and AMO operations are not allowed if no RDMA_READ credits.
114  * Otherwise operations ordering can be broken (which fence operation
115  * relies on).
116  */
117 #define UCT_RC_CHECK_RMA_RES(_iface, _ep) \
118     UCT_RC_CHECK_RES(_iface, _ep) \
119     UCT_RC_CHECK_NUM_RDMA_READ(_iface)
120 
121 /*
122  * check for FC credits and add FC protocol bits (if any)
123  */
124 #define UCT_RC_CHECK_FC_WND(_fc, _stats)\
125     if ((_fc)->fc_wnd <= 0) { \
126         UCS_STATS_UPDATE_COUNTER((_fc)->stats, UCT_RC_FC_STAT_NO_CRED, 1); \
127         UCS_STATS_UPDATE_COUNTER(_stats, UCT_EP_STAT_NO_RES, 1); \
128         return UCS_ERR_NO_RESOURCE; \
129     } \
130 
131 
132 #define UCT_RC_UPDATE_FC_WND(_iface, _fc) \
133     { \
134         /* For performance reasons, prefer to update fc_wnd unconditionally */ \
135         (_fc)->fc_wnd--; \
136         \
137         if ((_iface)->config.fc_enabled) { \
138             UCS_STATS_SET_COUNTER((_fc)->stats, UCT_RC_FC_STAT_FC_WND, \
139                                   (_fc)->fc_wnd); \
140         } \
141     }
142 
143 #define UCT_RC_CHECK_FC(_iface, _ep, _am_id) \
144     { \
145         if (ucs_unlikely((_ep)->fc.fc_wnd <= (_iface)->config.fc_soft_thresh)) { \
146             if ((_iface)->config.fc_enabled) { \
147                 UCT_RC_CHECK_FC_WND(&(_ep)->fc, (_ep)->super.stats); \
148                 (_am_id) |= uct_rc_fc_req_moderation(&(_ep)->fc, _iface); \
149             } else { \
150                 /* Set fc_wnd to max, to send as much as possible without checks */ \
151                 (_ep)->fc.fc_wnd = INT16_MAX; \
152             } \
153         } \
154         (_am_id) |= uct_rc_fc_get_fc_hdr((_ep)->fc.flags); /* take grant bit */ \
155     }
156 
157 #define UCT_RC_UPDATE_FC(_iface, _ep, _fc_hdr) \
158     { \
159         if ((_fc_hdr) & UCT_RC_EP_FC_FLAG_GRANT) { \
160             UCS_STATS_UPDATE_COUNTER((_ep)->fc.stats, UCT_RC_FC_STAT_TX_GRANT, 1); \
161         } \
162         if ((_fc_hdr) & UCT_RC_EP_FC_FLAG_SOFT_REQ) { \
163             UCS_STATS_UPDATE_COUNTER((_ep)->fc.stats, UCT_RC_FC_STAT_TX_SOFT_REQ, 1); \
164         } else if ((_fc_hdr) & UCT_RC_EP_FC_FLAG_HARD_REQ) { \
165             UCS_STATS_UPDATE_COUNTER((_ep)->fc.stats, UCT_RC_FC_STAT_TX_HARD_REQ, 1); \
166         } \
167         \
168         (_ep)->fc.flags = 0; \
169         \
170         UCT_RC_UPDATE_FC_WND(_iface, &(_ep)->fc) \
171     }
172 
173 
174 /* this is a common type for all rc and dc transports */
175 struct uct_rc_txqp {
176     ucs_queue_head_t    outstanding;
177     /* RC_UNSIGNALED_INF value forces signaled in moderation logic when
178      * CQ credits are close to zero (less tx_moderation value) */
179     uint16_t            unsignaled;
180     /* Saved unsignaled value before it was set to inf to have possibility
181      * to return correct amount of CQ credits on TX completion */
182     uint16_t            unsignaled_store;
183     /* If unsignaled was stored several times to aggregative value, let's return
184      * credits only when this counter == 0 because it's impossible to return
185      * exact value on each signaled completion */
186     uint16_t            unsignaled_store_count;
187     int16_t             available;
188     UCS_STATS_NODE_DECLARE(stats)
189 };
190 
191 typedef struct uct_rc_fc {
192     /* Not more than fc_wnd active messages can be sent w/o acknowledgment */
193     int16_t             fc_wnd;
194     /* used only for FC protocol at this point (3 higher bits) */
195     uint8_t             flags;
196     UCS_STATS_NODE_DECLARE(stats)
197 } uct_rc_fc_t;
198 
199 struct uct_rc_ep {
200     uct_base_ep_t       super;
201     uct_rc_txqp_t       txqp;
202     ucs_list_link_t     list;
203     ucs_arbiter_group_t arb_group;
204     uct_rc_fc_t         fc;
205     uint16_t            atomic_mr_offset;
206     uint8_t             path_index;
207 };
208 
209 UCS_CLASS_DECLARE(uct_rc_ep_t, uct_rc_iface_t*, uint32_t, const uct_ep_params_t*);
210 
211 
212 void uct_rc_ep_packet_dump(uct_base_iface_t *iface, uct_am_trace_type_t type,
213                            void *data, size_t length, size_t valid_length,
214                            char *buffer, size_t max);
215 
216 void uct_rc_ep_get_bcopy_handler(uct_rc_iface_send_op_t *op, const void *resp);
217 
218 void uct_rc_ep_get_bcopy_handler_no_completion(uct_rc_iface_send_op_t *op,
219                                                const void *resp);
220 
221 void uct_rc_ep_get_zcopy_completion_handler(uct_rc_iface_send_op_t *op,
222                                             const void *resp);
223 
224 void uct_rc_ep_send_op_completion_handler(uct_rc_iface_send_op_t *op,
225                                           const void *resp);
226 
227 void uct_rc_ep_flush_op_completion_handler(uct_rc_iface_send_op_t *op,
228                                            const void *resp);
229 
230 ucs_status_t uct_rc_ep_pending_add(uct_ep_h tl_ep, uct_pending_req_t *n,
231                                    unsigned flags);
232 
233 void uct_rc_ep_pending_purge(uct_ep_h ep, uct_pending_purge_callback_t cb,
234                              void*arg);
235 
236 ucs_arbiter_cb_result_t uct_rc_ep_process_pending(ucs_arbiter_t *arbiter,
237                                                   ucs_arbiter_group_t *group,
238                                                   ucs_arbiter_elem_t *elem,
239                                                   void *arg);
240 
241 ucs_status_t uct_rc_fc_init(uct_rc_fc_t *fc, int16_t winsize
242                             UCS_STATS_ARG(ucs_stats_node_t* stats_parent));
243 void uct_rc_fc_cleanup(uct_rc_fc_t *fc);
244 
245 ucs_status_t uct_rc_ep_fc_grant(uct_pending_req_t *self);
246 
247 void uct_rc_txqp_purge_outstanding(uct_rc_txqp_t *txqp, ucs_status_t status,
248                                    int is_log);
249 
250 ucs_status_t uct_rc_ep_flush(uct_rc_ep_t *ep, int16_t max_available,
251                              unsigned flags);
252 
253 ucs_status_t uct_rc_ep_check_cqe(uct_rc_iface_t *iface, uct_rc_ep_t *ep);
254 
255 void UCT_RC_DEFINE_ATOMIC_HANDLER_FUNC_NAME(32, 0)(uct_rc_iface_send_op_t *op,
256                                                    const void *resp);
257 void UCT_RC_DEFINE_ATOMIC_HANDLER_FUNC_NAME(32, 1)(uct_rc_iface_send_op_t *op,
258                                                    const void *resp);
259 void UCT_RC_DEFINE_ATOMIC_HANDLER_FUNC_NAME(64, 0)(uct_rc_iface_send_op_t *op,
260                                                    const void *resp);
261 void UCT_RC_DEFINE_ATOMIC_HANDLER_FUNC_NAME(64, 1)(uct_rc_iface_send_op_t *op,
262                                                    const void *resp);
263 
264 ucs_status_t uct_rc_txqp_init(uct_rc_txqp_t *txqp, uct_rc_iface_t *iface,
265                               uint32_t qp_num
266                               UCS_STATS_ARG(ucs_stats_node_t* stats_parent));
267 void uct_rc_txqp_cleanup(uct_rc_txqp_t *txqp);
268 
uct_rc_txqp_available(uct_rc_txqp_t * txqp)269 static inline int16_t uct_rc_txqp_available(uct_rc_txqp_t *txqp)
270 {
271     return txqp->available;
272 }
273 
uct_rc_txqp_available_add(uct_rc_txqp_t * txqp,int16_t val)274 static inline void uct_rc_txqp_available_add(uct_rc_txqp_t *txqp, int16_t val)
275 {
276     txqp->available += val;
277 }
278 
uct_rc_txqp_available_set(uct_rc_txqp_t * txqp,int16_t val)279 static inline void uct_rc_txqp_available_set(uct_rc_txqp_t *txqp, int16_t val)
280 {
281     txqp->available = val;
282 }
283 
uct_rc_txqp_unsignaled(uct_rc_txqp_t * txqp)284 static inline uint16_t uct_rc_txqp_unsignaled(uct_rc_txqp_t *txqp)
285 {
286     return txqp->unsignaled;
287 }
288 
289 static UCS_F_ALWAYS_INLINE
uct_rc_fc_has_resources(uct_rc_iface_t * iface,uct_rc_fc_t * fc)290 int uct_rc_fc_has_resources(uct_rc_iface_t *iface, uct_rc_fc_t *fc)
291 {
292     /* When FC is disabled, fc_wnd may still become 0 because it's decremented
293      * unconditionally (for performance reasons) */
294     return (fc->fc_wnd > 0) || !iface->config.fc_enabled;
295 }
296 
uct_rc_ep_has_tx_resources(uct_rc_ep_t * ep)297 static UCS_F_ALWAYS_INLINE int uct_rc_ep_has_tx_resources(uct_rc_ep_t *ep)
298 {
299     uct_rc_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_rc_iface_t);
300 
301     return (ep->txqp.available > 0) && uct_rc_fc_has_resources(iface, &ep->fc);
302 }
303 
304 static UCS_F_ALWAYS_INLINE void
uct_rc_txqp_add_send_op(uct_rc_txqp_t * txqp,uct_rc_iface_send_op_t * op)305 uct_rc_txqp_add_send_op(uct_rc_txqp_t *txqp, uct_rc_iface_send_op_t *op)
306 {
307 
308     /* NOTE: We insert the descriptor with the sequence number after the post,
309      * because when polling completions, we get the number of completions (rather
310      * than completion zero-based index).
311      */
312     ucs_assert(op != NULL);
313     ucs_assertv(!(op->flags & UCT_RC_IFACE_SEND_OP_FLAG_INUSE), "op=%p", op);
314     op->flags |= UCT_RC_IFACE_SEND_OP_FLAG_INUSE;
315     ucs_queue_push(&txqp->outstanding, &op->queue);
316 }
317 
318 static UCS_F_ALWAYS_INLINE void
uct_rc_txqp_add_send_op_sn(uct_rc_txqp_t * txqp,uct_rc_iface_send_op_t * op,uint16_t sn)319 uct_rc_txqp_add_send_op_sn(uct_rc_txqp_t *txqp, uct_rc_iface_send_op_t *op, uint16_t sn)
320 {
321     ucs_trace_poll("txqp %p add send op %p sn %d handler %s", txqp, op, sn,
322                    ucs_debug_get_symbol_name((void*)op->handler));
323     op->sn = sn;
324     uct_rc_txqp_add_send_op(txqp, op);
325 }
326 
327 static UCS_F_ALWAYS_INLINE void
uct_rc_txqp_add_send_comp(uct_rc_iface_t * iface,uct_rc_txqp_t * txqp,uct_rc_send_handler_t handler,uct_completion_t * comp,uint16_t sn,uint16_t flags,size_t length)328 uct_rc_txqp_add_send_comp(uct_rc_iface_t *iface, uct_rc_txqp_t *txqp,
329                           uct_rc_send_handler_t handler, uct_completion_t *comp,
330                           uint16_t sn, uint16_t flags, size_t length)
331 {
332     uct_rc_iface_send_op_t *op;
333 
334     if (comp == NULL) {
335         return;
336     }
337 
338     op            = uct_rc_iface_get_send_op(iface);
339     op->handler   = handler;
340     op->user_comp = comp;
341     op->flags    |= flags;
342     op->length    = length;
343     uct_rc_txqp_add_send_op_sn(txqp, op, sn);
344 }
345 
346 static UCS_F_ALWAYS_INLINE ucs_status_t
uct_rc_txqp_add_flush_comp(uct_rc_iface_t * iface,uct_base_ep_t * ep,uct_rc_txqp_t * txqp,uct_completion_t * comp,uint16_t sn)347 uct_rc_txqp_add_flush_comp(uct_rc_iface_t *iface, uct_base_ep_t *ep,
348                            uct_rc_txqp_t *txqp, uct_completion_t *comp,
349                            uint16_t sn)
350 {
351     uct_rc_iface_send_op_t *op;
352 
353     if (comp != NULL) {
354         op = (uct_rc_iface_send_op_t*)ucs_mpool_get(&iface->tx.flush_mp);
355         if (ucs_unlikely(op == NULL)) {
356             ucs_error("Failed to allocate flush completion");
357             return UCS_ERR_NO_MEMORY;
358         }
359 
360         op->flags     = 0;
361         op->user_comp = comp;
362         uct_rc_txqp_add_send_op_sn(txqp, op, sn);
363         VALGRIND_MAKE_MEM_DEFINED(op, sizeof(*op)); /* handler set by mpool init */
364     }
365     UCT_TL_EP_STAT_FLUSH_WAIT(ep);
366 
367     return UCS_INPROGRESS;
368 }
369 
370 static UCS_F_ALWAYS_INLINE void
uct_rc_txqp_completion_op(uct_rc_iface_send_op_t * op,const void * resp)371 uct_rc_txqp_completion_op(uct_rc_iface_send_op_t *op, const void *resp)
372 {
373     ucs_trace_poll("complete op %p sn %d handler %s", op, op->sn,
374                    ucs_debug_get_symbol_name((void*)op->handler));
375     ucs_assert(op->flags & UCT_RC_IFACE_SEND_OP_FLAG_INUSE);
376     op->flags &= ~(UCT_RC_IFACE_SEND_OP_FLAG_INUSE |
377                    UCT_RC_IFACE_SEND_OP_FLAG_ZCOPY);
378     op->handler(op, resp);
379 }
380 
381 static UCS_F_ALWAYS_INLINE void
uct_rc_txqp_completion_desc(uct_rc_txqp_t * txqp,uint16_t sn)382 uct_rc_txqp_completion_desc(uct_rc_txqp_t *txqp, uint16_t sn)
383 {
384     uct_rc_iface_send_op_t *op;
385 
386     ucs_trace_poll("txqp %p complete ops up to sn %d", txqp, sn);
387     ucs_queue_for_each_extract(op, &txqp->outstanding, queue,
388                                UCS_CIRCULAR_COMPARE16(op->sn, <=, sn)) {
389         uct_rc_txqp_completion_op(op, ucs_derived_of(op, uct_rc_iface_send_desc_t) + 1);
390     }
391 }
392 
393 static UCS_F_ALWAYS_INLINE void
uct_rc_txqp_completion_inl_resp(uct_rc_txqp_t * txqp,const void * resp,uint16_t sn)394 uct_rc_txqp_completion_inl_resp(uct_rc_txqp_t *txqp, const void *resp, uint16_t sn)
395 {
396     uct_rc_iface_send_op_t *op;
397 
398     ucs_trace_poll("txqp %p complete ops up to sn %d", txqp, sn);
399     ucs_queue_for_each_extract(op, &txqp->outstanding, queue,
400                                UCS_CIRCULAR_COMPARE16(op->sn, <=, sn)) {
401         ucs_assert(!(op->flags & UCT_RC_IFACE_SEND_OP_FLAG_ZCOPY));
402         uct_rc_txqp_completion_op(op, resp);
403     }
404 }
405 
406 static UCS_F_ALWAYS_INLINE uint8_t
uct_rc_iface_tx_moderation(uct_rc_iface_t * iface,uct_rc_txqp_t * txqp,uint8_t flag)407 uct_rc_iface_tx_moderation(uct_rc_iface_t *iface, uct_rc_txqp_t *txqp, uint8_t flag)
408 {
409     return (txqp->unsignaled >= iface->config.tx_moderation) ? flag : 0;
410 }
411 
412 static UCS_F_ALWAYS_INLINE void
uct_rc_txqp_posted(uct_rc_txqp_t * txqp,uct_rc_iface_t * iface,uint16_t res_count,int signaled)413 uct_rc_txqp_posted(uct_rc_txqp_t *txqp, uct_rc_iface_t *iface, uint16_t res_count,
414                    int signaled)
415 {
416     if (signaled) {
417         ucs_assert(uct_rc_iface_have_tx_cqe_avail(iface));
418         txqp->unsignaled = 0;
419         UCS_STATS_UPDATE_COUNTER(txqp->stats, UCT_RC_TXQP_STAT_SIGNAL, 1);
420     } else {
421         ucs_assert(txqp->unsignaled != RC_UNSIGNALED_INF);
422         ++txqp->unsignaled;
423     }
424 
425     /* reserve cq credits for every posted operation,
426      * in case it would complete with error */
427     iface->tx.cq_available -= res_count;
428     txqp->available -= res_count;
429 }
430 
431 static UCS_F_ALWAYS_INLINE uint8_t
uct_rc_fc_get_fc_hdr(uint8_t id)432 uct_rc_fc_get_fc_hdr(uint8_t id)
433 {
434     return id & UCT_RC_EP_FC_MASK;
435 }
436 
437 static UCS_F_ALWAYS_INLINE uint8_t
uct_rc_fc_req_moderation(uct_rc_fc_t * fc,uct_rc_iface_t * iface)438 uct_rc_fc_req_moderation(uct_rc_fc_t *fc, uct_rc_iface_t *iface)
439 {
440     return (fc->fc_wnd == iface->config.fc_hard_thresh) ?
441             UCT_RC_EP_FC_FLAG_HARD_REQ :
442            (fc->fc_wnd == iface->config.fc_soft_thresh) ?
443             UCT_RC_EP_FC_FLAG_SOFT_REQ : 0;
444 }
445 
446 static UCS_F_ALWAYS_INLINE int
uct_rc_ep_fm(uct_rc_iface_t * iface,uct_ib_fence_info_t * fi,int flag)447 uct_rc_ep_fm(uct_rc_iface_t *iface, uct_ib_fence_info_t* fi, int flag)
448 {
449     int fence;
450 
451     /* a call to iface_fence increases beat, so if endpoint beat is not in
452      * sync with iface beat it means the endpoint did not post any WQE with
453      * fence flag yet */
454     fence          = (fi->fence_beat != iface->tx.fi.fence_beat) ? flag : 0;
455     fi->fence_beat = iface->tx.fi.fence_beat;
456     return fence;
457 }
458 
459 static UCS_F_ALWAYS_INLINE ucs_status_t
uct_rc_ep_fence(uct_ep_h tl_ep,uct_ib_fence_info_t * fi,int fence)460 uct_rc_ep_fence(uct_ep_h tl_ep, uct_ib_fence_info_t* fi, int fence)
461 {
462     uct_rc_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_iface_t);
463 
464     /* in case if fence is requested and enabled by configuration
465      * we need to schedule fence for next RDMA operation */
466     if (fence && (iface->config.fence_mode != UCT_RC_FENCE_MODE_NONE)) {
467         fi->fence_beat = iface->tx.fi.fence_beat - 1;
468     }
469 
470     UCT_TL_EP_STAT_FENCE(ucs_derived_of(tl_ep, uct_base_ep_t));
471     return UCS_OK;
472 }
473 
474 static UCS_F_ALWAYS_INLINE void
uct_rc_ep_fence_put(uct_rc_iface_t * iface,uct_ib_fence_info_t * fi,uct_rkey_t * rkey,uint64_t * addr,uint16_t offset)475 uct_rc_ep_fence_put(uct_rc_iface_t *iface, uct_ib_fence_info_t *fi,
476                     uct_rkey_t *rkey, uint64_t *addr, uint16_t offset)
477 {
478     if (uct_rc_ep_fm(iface, fi, 1)) {
479         *rkey = uct_ib_resolve_atomic_rkey(*rkey, offset, addr);
480     } else {
481         *rkey = uct_ib_md_direct_rkey(*rkey);
482     }
483 }
484 
485 #endif
486