1 /**
2 * @file uct.h
3 * @date 2014-2020
4 * @copyright NVIDIA Corporation. All rights reserved.
5 * @copyright Mellanox Technologies Ltd. All rights reserved.
6 * @copyright Oak Ridge National Laboratory. All rights received.
7 * @copyright Advanced Micro Devices, Inc. All rights received.
8 * @brief Unified Communication Transport
9 */
10
11 #ifndef UCT_H_
12 #define UCT_H_
13
14 #include <uct/api/uct_def.h>
15 #include <uct/api/tl.h>
16 #include <uct/api/version.h>
17 #include <ucs/async/async_fwd.h>
18 #include <ucs/datastruct/callbackq.h>
19 #include <ucs/datastruct/linear_func.h>
20 #include <ucs/memory/memory_type.h>
21 #include <ucs/type/status.h>
22 #include <ucs/type/thread_mode.h>
23 #include <ucs/type/cpu_set.h>
24 #include <ucs/stats/stats_fwd.h>
25 #include <ucs/sys/compiler_def.h>
26 #include <ucs/sys/topo.h>
27
28 #include <sys/socket.h>
29 #include <stdio.h>
30 #include <sched.h>
31
32 BEGIN_C_DECLS
33
34 /** @file uct.h */
35
36 /**
37 * @defgroup UCT_API Unified Communication Transport (UCT) API
38 * @{
39 * This section describes UCT API.
40 * @}
41 */
42
43 /**
44 * @defgroup UCT_RESOURCE UCT Communication Resource
45 * @ingroup UCT_API
46 * @{
47 * This section describes a concept of the Communication Resource and routines
48 * associated with the concept.
49 * @}
50 */
51
52 /**
53 * @defgroup UCT_CONTEXT UCT Communication Context
54 * @ingroup UCT_API
55 * @{
56 *
57 * UCT context abstracts all the resources required for network communication.
58 * It is designed to enable either share or isolate resources for multiple
59 * programming models used by an application.
60 *
61 * This section provides a detailed description of this concept and
62 * routines associated with it.
63 *
64 * @}
65 */
66
67 /**
68 * @defgroup UCT_MD UCT Memory Domain
69 * @ingroup UCT_API
70 * @{
71 * The Memory Domain abstracts resources required for network communication,
72 * which typically includes memory, transport mechanisms, compute and
73 * network resources. It is an isolation mechanism that can be employed
74 * by the applications for isolating resources between multiple programming models.
75 * The attributes of the Memory Domain are defined by the structure @ref uct_md_attr().
76 * The communication and memory operations are defined in the context of Memory Domain.
77 *
78 * @}
79 */
80
81 /**
82 * @defgroup UCT_AM UCT Active messages
83 * @ingroup UCT_API
84 * @{
85 * Defines active message functions.
86 * @}
87 */
88
89 /**
90 * @defgroup UCT_RMA UCT Remote memory access operations
91 * @ingroup UCT_API
92 * @{
93 * Defines remote memory access operations.
94 * @}
95 */
96
97 /**
98 * @defgroup UCT_AMO UCT Atomic operations
99 * @ingroup UCT_API
100 * @{
101 * Defines atomic operations.
102 * @}
103 */
104
105 /**
106 * @defgroup UCT_TAG UCT Tag matching operations
107 * @ingroup UCT_API
108 * @{
109 * Defines tag matching operations.
110 * @}
111 */
112
113 /**
114 * @defgroup UCT_CLIENT_SERVER UCT client-server operations
115 * @ingroup UCT_API
116 * @{
117 * Defines client-server operations.
118 * The client-server API allows the connection establishment between an active
119 * side - a client, and its peer - the passive side - a server.
120 * The connection can be established through a UCT transport that supports
121 * listening and connecting via IP address and port (listening can also be on INADDR_ANY).
122 *
123 * The following is a general overview of the operations on the server side:
124 *
125 * Connecting:
126 * @ref uct_cm_open
127 * Open a connection manager.
128 * @ref uct_listener_create
129 * Create a listener on the CM and start listening on a given IP,port / INADDR_ANY.
130 * @ref uct_cm_listener_conn_request_callback_t
131 * This callback is invoked by the UCT transport to handle an incoming connection
132 * request from a client.
133 * Accept or reject the client's connection request.
134 * @ref uct_ep_create
135 * Connect to the client by creating an endpoint if the request is accepted.
136 * The server creates a new endpoint for every connection request that it accepts.
137 * @ref uct_cm_ep_priv_data_pack_callback_t
138 * This callback is invoked by the UCT transport to fill auxiliary data in
139 * the connection acknowledgement or reject notification back to the client.
140 * Send the client a connection acknowledgement or reject notification.
141 * Wait for an acknowledgment from the client, indicating that it is connected.
142 * @ref uct_cm_ep_server_conn_notify_callback_t
143 * This callback is invoked by the UCT transport to handle the connection
144 * notification from the client.
145 *
146 * Disconnecting:
147 * @ref uct_ep_disconnect
148 * Disconnect the server's endpoint from the client.
149 * Can be called when initiating a disconnect or when receiving a disconnect
150 * notification from the remote side.
151 * @ref uct_ep_disconnect_cb_t
152 * This callback is invoked by the UCT transport when the client side calls
153 * uct_ep_disconnect as well.
154 * @ref uct_ep_destroy
155 * Destroy the endpoint connected to the remote peer.
156 * If this function is called before the endpoint was disconnected, the
157 * @ref uct_ep_disconnect_cb_t will not be invoked.
158 *
159 * Destroying the server's resources:
160 * @ref uct_listener_destroy
161 * Destroy the listener object.
162 * @ref uct_cm_close
163 * Close the connection manager.
164 *
165 * The following is a general overview of the operations on the client side:
166 *
167 * Connecting:
168 * @ref uct_cm_open
169 * Open a connection manager.
170 * @ref uct_ep_create
171 * Create an endpoint for establishing a connection to the server.
172 * @ref uct_cm_ep_priv_data_pack_callback_t
173 * This callback is invoked by the UCT transport to fill the user's private data
174 * in the connection request to be sent to the server. This connection request
175 * should be created by the transport.
176 * Send the connection request to the server.
177 * Wait for an acknowledgment from the server, indicating that it is connected.
178 * @ref uct_cm_ep_client_connect_callback_t
179 * This callback is invoked by the UCT transport to handle a connection response
180 * from the server.
181 * After invoking this callback, the UCT transport will finalize the client's
182 * connection to the server.
183 * @ref uct_cm_client_ep_conn_notify
184 * After the client's connection establishment is completed, the client
185 * should call this function in which it sends a notification message to
186 * the server stating that it (the client) is connected.
187 * The notification message that is sent depends on the transport's
188 * implementation.
189 *
190 * Disconnecting:
191 * @ref uct_ep_disconnect
192 * Disconnect the client's endpoint from the server.
193 * Can be called when initiating a disconnect or when receiving a disconnect
194 * notification from the remote side.
195 * @ref uct_ep_disconnect_cb_t
196 * This callback is invoked by the UCT transport when the server side calls
197 * uct_ep_disconnect as well.
198 * @ref uct_ep_destroy
199 * Destroy the endpoint connected to the remote peer.
200 *
201 * Destroying the client's resources:
202 * @ref uct_cm_close
203 * Close the connection manager.
204 *
205 * @}
206 */
207
208 /**
209 * @ingroup UCT_RESOURCE
210 * @brief Memory domain resource descriptor.
211 *
212 * This structure describes a memory domain resource.
213 */
214 typedef struct uct_md_resource_desc {
215 char md_name[UCT_MD_NAME_MAX]; /**< Memory domain name */
216 } uct_md_resource_desc_t;
217
218
219 /**
220 * @ingroup UCT_RESOURCE
221 * @brief UCT component attributes field mask
222 *
223 * The enumeration allows specifying which fields in @ref uct_component_attr_t
224 * are present. It is used for backward compatibility support.
225 */
226 enum uct_component_attr_field {
227 UCT_COMPONENT_ATTR_FIELD_NAME = UCS_BIT(0), /**< Component name */
228 UCT_COMPONENT_ATTR_FIELD_MD_RESOURCE_COUNT = UCS_BIT(1), /**< MD resource count */
229 UCT_COMPONENT_ATTR_FIELD_MD_RESOURCES = UCS_BIT(2), /**< MD resources array */
230 UCT_COMPONENT_ATTR_FIELD_FLAGS = UCS_BIT(3) /**< Capability flags */
231 };
232
233
234 /**
235 * @ingroup UCT_RESOURCE
236 * @brief UCT component attributes
237 *
238 * This structure defines the attributes for UCT component. It is used for
239 * @ref uct_component_query
240 */
241 typedef struct uct_component_attr {
242 /**
243 * Mask of valid fields in this structure, using bits from
244 * @ref uct_component_attr_field.
245 * Fields not specified in this mask will be ignored.
246 * Provides ABI compatibility with respect to adding new fields.
247 */
248 uint64_t field_mask;
249
250 /** Component name */
251 char name[UCT_COMPONENT_NAME_MAX];
252
253 /** Number of memory-domain resources */
254 unsigned md_resource_count;
255
256 /**
257 * Array of memory domain resources. When used, it should be initialized
258 * prior to calling @ref uct_component_query with a pointer to an array,
259 * which is large enough to hold all memory domain resource entries. After
260 * the call, this array will be filled with information about existing
261 * memory domain resources.
262 * In order to allocate this array, you can call @ref uct_component_query
263 * twice: The first time would only obtain the amount of entries required,
264 * by specifying @ref UCT_COMPONENT_ATTR_FIELD_MD_RESOURCE_COUNT in
265 * field_mask. Then the array could be allocated with the returned number of
266 * entries, and passed to a second call to @ref uct_component_query, this
267 * time setting field_mask to @ref UCT_COMPONENT_ATTR_FIELD_MD_RESOURCES.
268 */
269 uct_md_resource_desc_t *md_resources;
270
271 /**
272 * Flags as defined by UCT_COMPONENT_FLAG_xx.
273 */
274 uint64_t flags;
275 } uct_component_attr_t;
276
277
278 /**
279 * @ingroup UCT_RESOURCE
280 * @brief Capability flags of @ref uct_component_h.
281 *
282 * The enumeration defines bit mask of @ref uct_component_h capabilities in
283 * @ref uct_component_attr_t::flags which is set by @ref uct_component_query.
284 */
285 enum {
286 /**
287 * If set, the component supports @ref uct_cm_h functionality.
288 * See @ref uct_cm_open for details.
289 */
290 UCT_COMPONENT_FLAG_CM = UCS_BIT(0)
291 };
292
293
294 /**
295 * @ingroup UCT_RESOURCE
296 * @brief List of UCX device types.
297 */
298 typedef enum {
299 UCT_DEVICE_TYPE_NET, /**< Network devices */
300 UCT_DEVICE_TYPE_SHM, /**< Shared memory devices */
301 UCT_DEVICE_TYPE_ACC, /**< Acceleration devices */
302 UCT_DEVICE_TYPE_SELF, /**< Loop-back device */
303 UCT_DEVICE_TYPE_LAST
304 } uct_device_type_t;
305
306
307 /**
308 * @ingroup UCT_RESOURCE
309 * @brief Communication resource descriptor.
310 *
311 * Resource descriptor is an object representing the network resource.
312 * Resource descriptor could represent a stand-alone communication resource
313 * such as an HCA port, network interface, or multiple resources such as
314 * multiple network interfaces or communication ports. It could also represent
315 * virtual communication resources that are defined over a single physical
316 * network interface.
317 */
318 typedef struct uct_tl_resource_desc {
319 char tl_name[UCT_TL_NAME_MAX]; /**< Transport name */
320 char dev_name[UCT_DEVICE_NAME_MAX]; /**< Hardware device name */
321 uct_device_type_t dev_type; /**< The device represented by this resource
322 (e.g. UCT_DEVICE_TYPE_NET for a network interface) */
323 ucs_sys_device_t sys_device; /**< The identifier associated with the device
324 bus_id as captured in ucs_sys_bus_id_t struct */
325 } uct_tl_resource_desc_t;
326
327 #define UCT_TL_RESOURCE_DESC_FMT "%s/%s"
328 #define UCT_TL_RESOURCE_DESC_ARG(_resource) (_resource)->tl_name, (_resource)->dev_name
329
330
331 /**
332 * @brief Atomic operation requested for uct_ep_atomic32_post, uct_ep_atomic64_post,
333 * uct_ep_atomic32_fetch and uct_ep_atomic64_fetch.
334 *
335 * This enumeration defines which atomic memory operation should be
336 * performed by the uct_ep_atomic family of fuctions.
337 */
338 typedef enum uct_atomic_op {
339 UCT_ATOMIC_OP_ADD, /**< Atomic add */
340 UCT_ATOMIC_OP_AND, /**< Atomic and */
341 UCT_ATOMIC_OP_OR, /**< Atomic or */
342 UCT_ATOMIC_OP_XOR, /**< Atomic xor */
343 UCT_ATOMIC_OP_SWAP, /**< Atomic swap */
344 UCT_ATOMIC_OP_CSWAP, /**< Atomic compare-and-swap */
345 UCT_ATOMIC_OP_LAST
346 } uct_atomic_op_t;
347
348
349 /**
350 * @defgroup UCT_RESOURCE_IFACE_CAP UCT interface operations and capabilities
351 * @ingroup UCT_RESOURCE
352 *
353 * @brief List of capabilities supported by UCX API
354 *
355 * The definition list presents a full list of operations and capabilities
356 * exposed by UCX API.
357 * @{
358 */
359 /* Active message capabilities */
360 #define UCT_IFACE_FLAG_AM_SHORT UCS_BIT(0) /**< Short active message */
361 #define UCT_IFACE_FLAG_AM_BCOPY UCS_BIT(1) /**< Buffered active message */
362 #define UCT_IFACE_FLAG_AM_ZCOPY UCS_BIT(2) /**< Zero-copy active message */
363
364 #define UCT_IFACE_FLAG_PENDING UCS_BIT(3) /**< Pending operations */
365
366 /* PUT capabilities */
367 #define UCT_IFACE_FLAG_PUT_SHORT UCS_BIT(4) /**< Short put */
368 #define UCT_IFACE_FLAG_PUT_BCOPY UCS_BIT(5) /**< Buffered put */
369 #define UCT_IFACE_FLAG_PUT_ZCOPY UCS_BIT(6) /**< Zero-copy put */
370
371 /* GET capabilities */
372 #define UCT_IFACE_FLAG_GET_SHORT UCS_BIT(8) /**< Short get */
373 #define UCT_IFACE_FLAG_GET_BCOPY UCS_BIT(9) /**< Buffered get */
374 #define UCT_IFACE_FLAG_GET_ZCOPY UCS_BIT(10) /**< Zero-copy get */
375
376 /* Atomic operations domain */
377 #define UCT_IFACE_FLAG_ATOMIC_CPU UCS_BIT(30) /**< Atomic communications are consistent
378 with respect to CPU operations. */
379 #define UCT_IFACE_FLAG_ATOMIC_DEVICE UCS_BIT(31) /**< Atomic communications are consistent
380 only with respect to other atomics
381 on the same device. */
382
383 /* Error handling capabilities */
384 #define UCT_IFACE_FLAG_ERRHANDLE_SHORT_BUF UCS_BIT(32) /**< Invalid buffer for short operation */
385 #define UCT_IFACE_FLAG_ERRHANDLE_BCOPY_BUF UCS_BIT(33) /**< Invalid buffer for buffered operation */
386 #define UCT_IFACE_FLAG_ERRHANDLE_ZCOPY_BUF UCS_BIT(34) /**< Invalid buffer for zero copy operation */
387 #define UCT_IFACE_FLAG_ERRHANDLE_AM_ID UCS_BIT(35) /**< Invalid AM id on remote */
388 #define UCT_IFACE_FLAG_ERRHANDLE_REMOTE_MEM UCS_BIT(36) /**< Remote memory access */
389 #define UCT_IFACE_FLAG_ERRHANDLE_BCOPY_LEN UCS_BIT(37) /**< Invalid length for buffered operation */
390 #define UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE UCS_BIT(38) /**< Remote peer failures/outage */
391
392 #define UCT_IFACE_FLAG_EP_CHECK UCS_BIT(39) /**< Endpoint check */
393
394 /* Connection establishment */
395 #define UCT_IFACE_FLAG_CONNECT_TO_IFACE UCS_BIT(40) /**< Supports connecting to interface */
396 #define UCT_IFACE_FLAG_CONNECT_TO_EP UCS_BIT(41) /**< Supports connecting to specific endpoint */
397 #define UCT_IFACE_FLAG_CONNECT_TO_SOCKADDR UCS_BIT(42) /**< Supports connecting to sockaddr */
398
399 /* Special transport flags */
400 #define UCT_IFACE_FLAG_AM_DUP UCS_BIT(43) /**< Active messages may be received with duplicates
401 This happens if the transport does not keep enough
402 information to detect retransmissions */
403
404 /* Callback invocation */
405 #define UCT_IFACE_FLAG_CB_SYNC UCS_BIT(44) /**< Interface supports setting a callback
406 which is invoked only from the calling context of
407 uct_worker_progress() */
408 #define UCT_IFACE_FLAG_CB_ASYNC UCS_BIT(45) /**< Interface supports setting a callback
409 which will be invoked within a reasonable amount of
410 time if uct_worker_progress() is not being called.
411 The callback can be invoked from any progress context
412 and it may also be invoked when uct_worker_progress()
413 is called. */
414
415 /* Tag matching operations */
416 #define UCT_IFACE_FLAG_TAG_EAGER_SHORT UCS_BIT(50) /**< Hardware tag matching short eager support */
417 #define UCT_IFACE_FLAG_TAG_EAGER_BCOPY UCS_BIT(51) /**< Hardware tag matching bcopy eager support */
418 #define UCT_IFACE_FLAG_TAG_EAGER_ZCOPY UCS_BIT(52) /**< Hardware tag matching zcopy eager support */
419 #define UCT_IFACE_FLAG_TAG_RNDV_ZCOPY UCS_BIT(53) /**< Hardware tag matching rendezvous zcopy support */
420 /**
421 * @}
422 */
423
424
425 /**
426 * @defgroup UCT_RESOURCE_IFACE_EVENT_CAP UCT interface for asynchronous event capabilities
427 * @ingroup UCT_RESOURCE
428 *
429 * @brief List of capabilities supported by UCT iface event API
430 *
431 * The definition list presents a full list of operations and capabilities
432 * supported by UCT iface event.
433 * @{
434 */
435 /* Event types */
436 #define UCT_IFACE_FLAG_EVENT_SEND_COMP UCS_BIT(0) /**< Event notification of send completion is
437 supported */
438 #define UCT_IFACE_FLAG_EVENT_RECV UCS_BIT(1) /**< Event notification of tag and active message
439 receive is supported */
440 #define UCT_IFACE_FLAG_EVENT_RECV_SIG UCS_BIT(2) /**< Event notification of signaled tag and active
441 message is supported */
442 /* Event notification mechanisms */
443 #define UCT_IFACE_FLAG_EVENT_FD UCS_BIT(3) /**< Event notification through File Descriptor
444 is supported */
445 #define UCT_IFACE_FLAG_EVENT_ASYNC_CB UCS_BIT(4) /**< Event notification through asynchronous
446 callback invocation is supported */
447 /**
448 * @}
449 */
450
451
452 /**
453 * @ingroup UCT_CONTEXT
454 * @brief Memory allocation methods.
455 */
456 typedef enum {
457 UCT_ALLOC_METHOD_THP, /**< Allocate from OS using libc allocator with
458 Transparent Huge Pages enabled*/
459 UCT_ALLOC_METHOD_MD, /**< Allocate using memory domain */
460 UCT_ALLOC_METHOD_HEAP, /**< Allocate from heap using libc allocator */
461 UCT_ALLOC_METHOD_MMAP, /**< Allocate from OS using mmap() syscall */
462 UCT_ALLOC_METHOD_HUGE, /**< Allocate huge pages */
463 UCT_ALLOC_METHOD_LAST,
464 UCT_ALLOC_METHOD_DEFAULT = UCT_ALLOC_METHOD_LAST /**< Use default method */
465 } uct_alloc_method_t;
466
467
468 /**
469 * @ingroup UCT_RESOURCE
470 * @brief Asynchronous event types.
471 *
472 * @note The UCT_EVENT_RECV and UCT_EVENT_RECV_SIG event types are used to
473 * indicate receive-side completions for both tag matching and active
474 * messages. If the interface supports signaled receives
475 * (@ref UCT_IFACE_FLAG_EVENT_RECV_SIG), then for the messages sent with
476 * UCT_SEND_FLAG_SIGNALED flag, UCT_EVENT_RECV_SIG should be triggered
477 * on the receiver. Otherwise, UCT_EVENT_RECV should be triggered.
478 */
479 enum uct_iface_event_types {
480 UCT_EVENT_SEND_COMP = UCS_BIT(0), /**< Send completion event */
481 UCT_EVENT_RECV = UCS_BIT(1), /**< Tag or active message received */
482 UCT_EVENT_RECV_SIG = UCS_BIT(2) /**< Signaled tag or active message
483 received */
484 };
485
486
487 /**
488 * @ingroup UCT_RESOURCE
489 * @brief Flush modifiers.
490 */
491 enum uct_flush_flags {
492 UCT_FLUSH_FLAG_LOCAL = 0, /**< Guarantees that the data
493 transfer is completed but the
494 target buffer may not be
495 updated yet.*/
496 UCT_FLUSH_FLAG_CANCEL = UCS_BIT(0) /**< The library will make a best
497 effort attempt to cancel all
498 uncompleted operations.
499 However, there is a chance that
500 some operations will not be
501 canceled in which case the user
502 will need to handle their
503 completions through
504 the relevant callbacks.
505 After @ref uct_ep_flush
506 with this flag is completed,
507 the endpoint will be set to
508 error state, and it becomes
509 unusable for send operations
510 and should be destroyed. */
511 };
512
513
514 /**
515 * @ingroup UCT_RESOURCE
516 * @brief UCT progress types
517 */
518 enum uct_progress_types {
519 UCT_PROGRESS_SEND = UCS_BIT(0), /**< Progress send operations */
520 UCT_PROGRESS_RECV = UCS_BIT(1), /**< Progress receive operations */
521 UCT_PROGRESS_THREAD_SAFE = UCS_BIT(7) /**< Enable/disable progress while
522 another thread may be calling
523 @ref ucp_worker_progress(). */
524 };
525
526
527 /**
528 * @ingroup UCT_AM
529 * @brief Flags for active message send operation.
530 */
531 enum uct_msg_flags {
532 UCT_SEND_FLAG_SIGNALED = UCS_BIT(0) /**< Trigger @ref UCT_EVENT_RECV_SIG
533 event on remote side. Make best
534 effort attempt to avoid triggering
535 @ref UCT_EVENT_RECV event.
536 Ignored if not supported by interface. */
537 };
538
539
540 /**
541 * @ingroup UCT_RESOURCE
542 * @brief Callback flags.
543 *
544 * List of flags for a callback.
545 */
546 enum uct_cb_flags {
547 UCT_CB_FLAG_RESERVED = UCS_BIT(1), /**< Reserved for future use. */
548 UCT_CB_FLAG_ASYNC = UCS_BIT(2) /**< Callback is allowed to be called
549 from any thread in the process, and
550 therefore should be thread-safe. For
551 example, it may be called from a
552 transport async progress thread. To
553 guarantee async invocation, the
554 interface must have the @ref
555 UCT_IFACE_FLAG_CB_ASYNC flag set. If
556 async callback is requested on an
557 interface which only supports sync
558 callback (i.e., only the @ref
559 UCT_IFACE_FLAG_CB_SYNC flag is set),
560 the callback will be invoked only
561 from the context that called @ref
562 uct_iface_progress). */
563 };
564
565
566 /**
567 * @ingroup UCT_RESOURCE
568 * @brief Mode in which to open the interface.
569 */
570 enum uct_iface_open_mode {
571 /** Interface is opened on a specific device */
572 UCT_IFACE_OPEN_MODE_DEVICE = UCS_BIT(0),
573
574 /** Interface is opened on a specific address on the server side. This mode
575 will be deprecated in the near future for a better API. */
576 UCT_IFACE_OPEN_MODE_SOCKADDR_SERVER = UCS_BIT(1),
577
578 /** Interface is opened on a specific address on the client side This mode
579 will be deprecated in the near future for a better API. */
580 UCT_IFACE_OPEN_MODE_SOCKADDR_CLIENT = UCS_BIT(2)
581 };
582
583
584 /**
585 * @ingroup UCT_RESOURCE
586 * @brief UCT interface created by @ref uct_iface_open parameters field mask.
587 *
588 * The enumeration allows specifying which fields in @ref uct_iface_params_t are
589 * present, for backward compatibility support.
590 */
591 enum uct_iface_params_field {
592 /** Enables @ref uct_iface_params_t::cpu_mask */
593 UCT_IFACE_PARAM_FIELD_CPU_MASK = UCS_BIT(0),
594
595 /** Enables @ref uct_iface_params_t::open_mode */
596 UCT_IFACE_PARAM_FIELD_OPEN_MODE = UCS_BIT(1),
597
598 /** Enables @ref uct_iface_params_t_mode_device
599 * "uct_iface_params_t::mode::device" */
600 UCT_IFACE_PARAM_FIELD_DEVICE = UCS_BIT(2),
601
602 /** Enables @ref uct_iface_params_t_mode_sockaddr
603 * "uct_iface_params_t::mode::sockaddr" */
604 UCT_IFACE_PARAM_FIELD_SOCKADDR = UCS_BIT(3),
605
606 /** Enables @ref uct_iface_params_t::stats_root */
607 UCT_IFACE_PARAM_FIELD_STATS_ROOT = UCS_BIT(4),
608
609 /** Enables @ref uct_iface_params_t::rx_headroom */
610 UCT_IFACE_PARAM_FIELD_RX_HEADROOM = UCS_BIT(5),
611
612 /** Enables @ref uct_iface_params_t::err_handler_arg */
613 UCT_IFACE_PARAM_FIELD_ERR_HANDLER_ARG = UCS_BIT(6),
614
615 /** Enables @ref uct_iface_params_t::err_handler */
616 UCT_IFACE_PARAM_FIELD_ERR_HANDLER = UCS_BIT(7),
617
618 /** Enables @ref uct_iface_params_t::err_handler_flags */
619 UCT_IFACE_PARAM_FIELD_ERR_HANDLER_FLAGS = UCS_BIT(8),
620
621 /** Enables @ref uct_iface_params_t::eager_arg */
622 UCT_IFACE_PARAM_FIELD_HW_TM_EAGER_ARG = UCS_BIT(9),
623
624 /** Enables @ref uct_iface_params_t::eager_cb */
625 UCT_IFACE_PARAM_FIELD_HW_TM_EAGER_CB = UCS_BIT(10),
626
627 /** Enables @ref uct_iface_params_t::rndv_arg */
628 UCT_IFACE_PARAM_FIELD_HW_TM_RNDV_ARG = UCS_BIT(11),
629
630 /** Enables @ref uct_iface_params_t::rndv_cb */
631 UCT_IFACE_PARAM_FIELD_HW_TM_RNDV_CB = UCS_BIT(12),
632
633 /** Enables @ref uct_iface_params_t::async_event_arg */
634 UCT_IFACE_PARAM_FIELD_ASYNC_EVENT_ARG = UCS_BIT(13),
635
636 /** Enables @ref uct_iface_params_t::async_event_cb */
637 UCT_IFACE_PARAM_FIELD_ASYNC_EVENT_CB = UCS_BIT(14)
638 };
639
640 /**
641 * @ingroup UCT_MD
642 * @brief Socket address accessibility type.
643 */
644 typedef enum {
645 UCT_SOCKADDR_ACC_LOCAL, /**< Check if local address exists.
646 Address should belong to a local
647 network interface */
648 UCT_SOCKADDR_ACC_REMOTE /**< Check if remote address can be reached.
649 Address is routable from one of the
650 local network interfaces */
651 } uct_sockaddr_accessibility_t;
652
653
654 /**
655 * @ingroup UCT_MD
656 * @brief Memory domain capability flags.
657 */
658 enum {
659 UCT_MD_FLAG_ALLOC = UCS_BIT(0), /**< MD supports memory allocation */
660 UCT_MD_FLAG_REG = UCS_BIT(1), /**< MD supports memory registration */
661 UCT_MD_FLAG_NEED_MEMH = UCS_BIT(2), /**< The transport needs a valid local
662 memory handle for zero-copy operations */
663 UCT_MD_FLAG_NEED_RKEY = UCS_BIT(3), /**< The transport needs a valid
664 remote memory key for remote memory
665 operations */
666 UCT_MD_FLAG_ADVISE = UCS_BIT(4), /**< MD supports memory advice */
667 UCT_MD_FLAG_FIXED = UCS_BIT(5), /**< MD supports memory allocation with
668 fixed address */
669 UCT_MD_FLAG_RKEY_PTR = UCS_BIT(6), /**< MD supports direct access to
670 remote memory via a pointer that
671 is returned by @ref uct_rkey_ptr */
672 UCT_MD_FLAG_SOCKADDR = UCS_BIT(7) /**< MD support for client-server
673 connection establishment via
674 sockaddr */
675 };
676
677 /**
678 * @ingroup UCT_MD
679 * @brief Memory allocation/registration flags.
680 */
681 enum uct_md_mem_flags {
682 UCT_MD_MEM_FLAG_NONBLOCK = UCS_BIT(0), /**< Hint to perform non-blocking
683 allocation/registration: page
684 mapping may be deferred until
685 it is accessed by the CPU or a
686 transport. */
687 UCT_MD_MEM_FLAG_FIXED = UCS_BIT(1), /**< Place the mapping at exactly
688 defined address */
689 UCT_MD_MEM_FLAG_LOCK = UCS_BIT(2), /**< Registered memory should be
690 locked. May incur extra cost for
691 registration, but memory access
692 is usually faster. */
693 UCT_MD_MEM_FLAG_HIDE_ERRORS = UCS_BIT(3), /**< Hide errors on memory registration.
694 In some cases registration failure
695 is not an error (e. g. for merged
696 memory regions). */
697
698 /* memory access flags */
699 UCT_MD_MEM_ACCESS_REMOTE_PUT = UCS_BIT(5), /**< enable remote put access */
700 UCT_MD_MEM_ACCESS_REMOTE_GET = UCS_BIT(6), /**< enable remote get access */
701 UCT_MD_MEM_ACCESS_REMOTE_ATOMIC = UCS_BIT(7), /**< enable remote atomic access */
702
703 /** enable local and remote access for all operations */
704 UCT_MD_MEM_ACCESS_ALL = (UCT_MD_MEM_ACCESS_REMOTE_PUT|
705 UCT_MD_MEM_ACCESS_REMOTE_GET|
706 UCT_MD_MEM_ACCESS_REMOTE_ATOMIC),
707
708 /** enable local and remote access for put and get operations */
709 UCT_MD_MEM_ACCESS_RMA = (UCT_MD_MEM_ACCESS_REMOTE_PUT|
710 UCT_MD_MEM_ACCESS_REMOTE_GET)
711 };
712
713
714 /**
715 * @ingroup UCT_MD
716 * @brief list of UCT memory use advice
717 */
718 typedef enum {
719 UCT_MADV_NORMAL = 0, /**< No special treatment */
720 UCT_MADV_WILLNEED /**< can be used on the memory mapped with
721 @ref UCT_MD_MEM_FLAG_NONBLOCK to speed up
722 memory mapping and to avoid page faults when
723 the memory is accessed for the first time. */
724 } uct_mem_advice_t;
725
726
727 /**
728 * @ingroup UCT_CLIENT_SERVER
729 * @brief UCT connection manager attributes field mask.
730 *
731 * The enumeration allows specifying which fields in @ref uct_cm_attr_t are
732 * present, for backward compatibility support.
733 */
734 enum uct_cm_attr_field {
735 /** Enables @ref uct_cm_attr::max_conn_priv */
736 UCT_CM_ATTR_FIELD_MAX_CONN_PRIV = UCS_BIT(0)
737 };
738
739
740 /**
741 * @ingroup UCT_CLIENT_SERVER
742 * @brief UCT listener attributes field mask.
743 *
744 * The enumeration allows specifying which fields in @ref uct_listener_attr_t are
745 * present, for backward compatibility support.
746 */
747 enum uct_listener_attr_field {
748 /** Enables @ref uct_listener_attr::sockaddr */
749 UCT_LISTENER_ATTR_FIELD_SOCKADDR = UCS_BIT(0)
750 };
751
752
753 /**
754 * @ingroup UCT_CLIENT_SERVER
755 * @brief UCT listener created by @ref uct_listener_create parameters field mask.
756 *
757 * The enumeration allows specifying which fields in @ref uct_listener_params_t
758 * are present, for backward compatibility support.
759 */
760 enum uct_listener_params_field {
761 /** Enables @ref uct_listener_params::backlog */
762 UCT_LISTENER_PARAM_FIELD_BACKLOG = UCS_BIT(0),
763
764 /** Enables @ref uct_listener_params::conn_request_cb */
765 UCT_LISTENER_PARAM_FIELD_CONN_REQUEST_CB = UCS_BIT(1),
766
767 /** Enables @ref uct_listener_params::user_data */
768 UCT_LISTENER_PARAM_FIELD_USER_DATA = UCS_BIT(2)
769 };
770
771
772 /**
773 * @ingroup UCT_RESOURCE
774 * @brief UCT endpoint created by @ref uct_ep_create parameters field mask.
775 *
776 * The enumeration allows specifying which fields in @ref uct_ep_params_t are
777 * present, for backward compatibility support.
778 */
779 enum uct_ep_params_field {
780 /** Enables @ref uct_ep_params::iface */
781 UCT_EP_PARAM_FIELD_IFACE = UCS_BIT(0),
782
783 /** Enables @ref uct_ep_params::user_data */
784 UCT_EP_PARAM_FIELD_USER_DATA = UCS_BIT(1),
785
786 /** Enables @ref uct_ep_params::dev_addr */
787 UCT_EP_PARAM_FIELD_DEV_ADDR = UCS_BIT(2),
788
789 /** Enables @ref uct_ep_params::iface_addr */
790 UCT_EP_PARAM_FIELD_IFACE_ADDR = UCS_BIT(3),
791
792 /** Enables @ref uct_ep_params::sockaddr */
793 UCT_EP_PARAM_FIELD_SOCKADDR = UCS_BIT(4),
794
795 /** Enables @ref uct_ep_params::sockaddr_cb_flags */
796 UCT_EP_PARAM_FIELD_SOCKADDR_CB_FLAGS = UCS_BIT(5),
797
798 /** Enables @ref uct_ep_params::sockaddr_pack_cb */
799 UCT_EP_PARAM_FIELD_SOCKADDR_PACK_CB = UCS_BIT(6),
800
801 /** Enables @ref uct_ep_params::cm */
802 UCT_EP_PARAM_FIELD_CM = UCS_BIT(7),
803
804 /** Enables @ref uct_ep_params::conn_request */
805 UCT_EP_PARAM_FIELD_CONN_REQUEST = UCS_BIT(8),
806
807 /** Enables @ref uct_ep_params::sockaddr_cb_client */
808 UCT_EP_PARAM_FIELD_SOCKADDR_CONNECT_CB_CLIENT = UCS_BIT(9),
809
810 /** Enables @ref uct_ep_params::sockaddr_cb_server */
811 UCT_EP_PARAM_FIELD_SOCKADDR_NOTIFY_CB_SERVER = UCS_BIT(10),
812
813 /** Enables @ref uct_ep_params::disconnect_cb */
814 UCT_EP_PARAM_FIELD_SOCKADDR_DISCONNECT_CB = UCS_BIT(11),
815
816 /** Enables @ref uct_ep_params::path_index */
817 UCT_EP_PARAM_FIELD_PATH_INDEX = UCS_BIT(12)
818 };
819
820
821 /*
822 * @ingroup UCT_RESOURCE
823 * @brief Process Per Node (PPN) bandwidth specification: f(ppn) = dedicated + shared / ppn
824 *
825 * This structure specifies a function which is used as basis for bandwidth
826 * estimation of various UCT operations. This information can be used to select
827 * the best performing combination of UCT operations.
828 */
829 typedef struct uct_ppn_bandwidth {
830 double dedicated; /**< Dedicated bandwidth, bytes/second */
831 double shared; /**< Shared bandwidth, bytes/second */
832 } uct_ppn_bandwidth_t;
833
834
835 /**
836 * @ingroup UCT_RESOURCE
837 * @brief Interface attributes: capabilities and limitations.
838 */
839 struct uct_iface_attr {
840 struct {
841 struct {
842 size_t max_short; /**< Maximal size for put_short */
843 size_t max_bcopy; /**< Maximal size for put_bcopy */
844 size_t min_zcopy; /**< Minimal size for put_zcopy (total
845 of @ref uct_iov_t::length of the
846 @a iov parameter) */
847 size_t max_zcopy; /**< Maximal size for put_zcopy (total
848 of @ref uct_iov_t::length of the
849 @a iov parameter) */
850 size_t opt_zcopy_align; /**< Optimal alignment for zero-copy
851 buffer address */
852 size_t align_mtu; /**< MTU used for alignment */
853 size_t max_iov; /**< Maximal @a iovcnt parameter in
854 @ref ::uct_ep_put_zcopy
855 @anchor uct_iface_attr_cap_put_max_iov */
856 } put; /**< Attributes for PUT operations */
857
858 struct {
859 size_t max_short; /**< Maximal size for get_short */
860 size_t max_bcopy; /**< Maximal size for get_bcopy */
861 size_t min_zcopy; /**< Minimal size for get_zcopy (total
862 of @ref uct_iov_t::length of the
863 @a iov parameter) */
864 size_t max_zcopy; /**< Maximal size for get_zcopy (total
865 of @ref uct_iov_t::length of the
866 @a iov parameter) */
867 size_t opt_zcopy_align; /**< Optimal alignment for zero-copy
868 buffer address */
869 size_t align_mtu; /**< MTU used for alignment */
870 size_t max_iov; /**< Maximal @a iovcnt parameter in
871 @ref uct_ep_get_zcopy
872 @anchor uct_iface_attr_cap_get_max_iov */
873 } get; /**< Attributes for GET operations */
874
875 struct {
876 size_t max_short; /**< Total max. size (incl. the header) */
877 size_t max_bcopy; /**< Total max. size (incl. the header) */
878 size_t min_zcopy; /**< Minimal size for am_zcopy (incl. the
879 header and total of @ref uct_iov_t::length
880 of the @a iov parameter) */
881 size_t max_zcopy; /**< Total max. size (incl. the header
882 and total of @ref uct_iov_t::length
883 of the @a iov parameter) */
884 size_t opt_zcopy_align; /**< Optimal alignment for zero-copy
885 buffer address */
886 size_t align_mtu; /**< MTU used for alignment */
887 size_t max_hdr; /**< Max. header size for zcopy */
888 size_t max_iov; /**< Maximal @a iovcnt parameter in
889 @ref ::uct_ep_am_zcopy
890 @anchor uct_iface_attr_cap_am_max_iov */
891 } am; /**< Attributes for AM operations */
892
893 struct {
894 struct {
895 size_t min_recv; /**< Minimal allowed length of posted receive buffer */
896 size_t max_zcopy; /**< Maximal allowed data length in
897 @ref uct_iface_tag_recv_zcopy */
898 size_t max_iov; /**< Maximal @a iovcnt parameter in
899 @ref uct_iface_tag_recv_zcopy
900 @anchor uct_iface_attr_cap_tag_recv_iov */
901 size_t max_outstanding; /**< Maximal number of simultaneous
902 receive operations */
903 } recv;
904
905 struct {
906 size_t max_short; /**< Maximal allowed data length in
907 @ref uct_ep_tag_eager_short */
908 size_t max_bcopy; /**< Maximal allowed data length in
909 @ref uct_ep_tag_eager_bcopy */
910 size_t max_zcopy; /**< Maximal allowed data length in
911 @ref uct_ep_tag_eager_zcopy */
912 size_t max_iov; /**< Maximal @a iovcnt parameter in
913 @ref uct_ep_tag_eager_zcopy */
914 } eager; /**< Attributes related to eager protocol */
915
916 struct {
917 size_t max_zcopy; /**< Maximal allowed data length in
918 @ref uct_ep_tag_rndv_zcopy */
919 size_t max_hdr; /**< Maximal allowed header length in
920 @ref uct_ep_tag_rndv_zcopy and
921 @ref uct_ep_tag_rndv_request */
922 size_t max_iov; /**< Maximal @a iovcnt parameter in
923 @ref uct_ep_tag_rndv_zcopy */
924 } rndv; /**< Attributes related to rendezvous protocol */
925 } tag; /**< Attributes for TAG operations */
926
927 struct {
928 uint64_t op_flags; /**< Attributes for atomic-post operations */
929 uint64_t fop_flags; /**< Attributes for atomic-fetch operations */
930 } atomic32, atomic64; /**< Attributes for atomic operations */
931
932 uint64_t flags; /**< Flags from @ref UCT_RESOURCE_IFACE_CAP */
933 uint64_t event_flags;/**< Flags from @ref UCT_RESOURCE_IFACE_EVENT_CAP */
934 } cap; /**< Interface capabilities */
935
936 size_t device_addr_len;/**< Size of device address */
937 size_t iface_addr_len; /**< Size of interface address */
938 size_t ep_addr_len; /**< Size of endpoint address */
939 size_t max_conn_priv; /**< Max size of the iface's private data.
940 used for connection
941 establishment with sockaddr */
942 struct sockaddr_storage listen_sockaddr; /**< Sockaddr on which this iface
943 is listening. */
944 /*
945 * The following fields define expected performance of the communication
946 * interface, this would usually be a combination of device and system
947 * characteristics and determined at run time.
948 */
949 double overhead; /**< Message overhead, seconds */
950 uct_ppn_bandwidth_t bandwidth; /**< Bandwidth model */
951 ucs_linear_func_t latency; /**< Latency as function of number of
952 active endpoints */
953 uint8_t priority; /**< Priority of device */
954 size_t max_num_eps; /**< Maximum number of endpoints */
955 unsigned dev_num_paths;/**< How many network paths can be
956 utilized on the device used by
957 this interface for optimal
958 performance. Endpoints that connect
959 to the same remote address but use
960 different paths can potentially
961 achieve higher total bandwidth
962 compared to using only a single
963 endpoint. */
964 };
965
966
967 /**
968 * @ingroup UCT_RESOURCE
969 * @brief Parameters used for interface creation.
970 *
971 * This structure should be allocated by the user and should be passed to
972 * @ref uct_iface_open. User has to initialize all fields of this structure.
973 */
974 struct uct_iface_params {
975 /** Mask of valid fields in this structure, using bits from
976 * @ref uct_iface_params_field. Fields not specified in this mask will be
977 * ignored. */
978 uint64_t field_mask;
979 /** Mask of CPUs to use for resources */
980 ucs_cpu_set_t cpu_mask;
981 /** Interface open mode bitmap. @ref uct_iface_open_mode */
982 uint64_t open_mode;
983 /** Mode-specific parameters */
984 union {
985 /** @anchor uct_iface_params_t_mode_device
986 * The fields in this structure (tl_name and dev_name) need to be set only when
987 * the @ref UCT_IFACE_OPEN_MODE_DEVICE bit is set in @ref
988 * uct_iface_params_t.open_mode This will make @ref uct_iface_open
989 * open the interface on the specified device.
990 */
991 struct {
992 const char *tl_name; /**< Transport name */
993 const char *dev_name; /**< Device Name */
994 } device;
995 /** @anchor uct_iface_params_t_mode_sockaddr
996 * These callbacks and address are only relevant for client-server
997 * connection establishment with sockaddr and are needed on the server side.
998 * The callbacks and address need to be set when the @ref
999 * UCT_IFACE_OPEN_MODE_SOCKADDR_SERVER bit is set in @ref
1000 * uct_iface_params_t.open_mode. This will make @ref uct_iface_open
1001 * open the interface on the specified address as a server. */
1002 struct {
1003 ucs_sock_addr_t listen_sockaddr;
1004 /** Argument for connection request callback */
1005 void *conn_request_arg;
1006 /** Callback for an incoming connection request on the server */
1007 uct_sockaddr_conn_request_callback_t conn_request_cb;
1008 /** Callback flags to indicate where the callback can be invoked from.
1009 * @ref uct_cb_flags */
1010 uint32_t cb_flags;
1011 } sockaddr;
1012 } mode;
1013
1014 /** Root in the statistics tree. Can be NULL. If non NULL, it will be
1015 a root of @a uct_iface object in the statistics tree. */
1016 ucs_stats_node_t *stats_root;
1017 /** How much bytes to reserve before the receive segment.*/
1018 size_t rx_headroom;
1019
1020 /** Custom argument of @a err_handler. */
1021 void *err_handler_arg;
1022 /** The callback to handle transport level error.*/
1023 uct_error_handler_t err_handler;
1024 /** Callback flags to indicate where the @a err_handler callback can be
1025 * invoked from. @ref uct_cb_flags */
1026 uint32_t err_handler_flags;
1027
1028 /** These callbacks are only relevant for HW Tag Matching */
1029 void *eager_arg;
1030 /** Callback for tag matching unexpected eager messages */
1031 uct_tag_unexp_eager_cb_t eager_cb;
1032 void *rndv_arg;
1033 /** Callback for tag matching unexpected rndv messages */
1034 uct_tag_unexp_rndv_cb_t rndv_cb;
1035
1036 void *async_event_arg;
1037 /** Callback for asynchronous event handling. The callback will be
1038 * invoked from UCT transport when there are new events to be
1039 * read by user if the iface has @ref UCT_IFACE_FLAG_EVENT_ASYNC_CB
1040 * capability */
1041 uct_async_event_cb_t async_event_cb;
1042 };
1043
1044
1045 /**
1046 * @ingroup UCT_RESOURCE
1047 * @brief Parameters for creating a UCT endpoint by @ref uct_ep_create
1048 */
1049 struct uct_ep_params {
1050 /**
1051 * Mask of valid fields in this structure, using bits from
1052 * @ref uct_ep_params_field. Fields not specified by this mask will be
1053 * ignored.
1054 */
1055 uint64_t field_mask;
1056
1057 /**
1058 * Interface to create the endpoint on.
1059 * Either @a iface or @a cm field must be initialized but not both.
1060 */
1061 uct_iface_h iface;
1062
1063 /**
1064 * User data associated with the endpoint.
1065 */
1066 void *user_data;
1067
1068 /**
1069 * The device address to connect to on the remote peer. This must be defined
1070 * together with @ref uct_ep_params_t::iface_addr to create an endpoint
1071 * connected to a remote interface.
1072 */
1073 const uct_device_addr_t *dev_addr;
1074
1075 /**
1076 * This specifies the remote address to use when creating an endpoint that
1077 * is connected to a remote interface.
1078 * @note This requires @ref UCT_IFACE_FLAG_CONNECT_TO_IFACE capability.
1079 */
1080 const uct_iface_addr_t *iface_addr;
1081
1082 /**
1083 * The sockaddr to connect to on the remote peer. If set, @ref uct_ep_create
1084 * will create an endpoint for a connection to the remote peer, specified by
1085 * its socket address.
1086 * @note The interface in this routine requires the
1087 * @ref UCT_IFACE_FLAG_CONNECT_TO_SOCKADDR capability.
1088 */
1089 const ucs_sock_addr_t *sockaddr;
1090
1091 /**
1092 * @ref uct_cb_flags to indicate @ref uct_ep_params_t::sockaddr_pack_cb
1093 * behavior. If @ref uct_ep_params_t::sockaddr_pack_cb is not set, this
1094 * field will be ignored.
1095 */
1096 uint32_t sockaddr_cb_flags;
1097
1098 /**
1099 * Callback that will be used for filling the user's private data to be
1100 * delivered to the remote peer by the callback on the server or client side.
1101 * This field is only valid if @ref uct_ep_params_t::sockaddr is set.
1102 * @note It is never guaranteed that the callaback will be called. If, for
1103 * example, the endpoint goes into error state before issuing the connection
1104 * request, the callback will not be invoked.
1105 */
1106 uct_cm_ep_priv_data_pack_callback_t sockaddr_pack_cb;
1107
1108 /**
1109 * The connection manager object as created by @ref uct_cm_open.
1110 * Either @a cm or @a iface field must be initialized but not both.
1111 */
1112 uct_cm_h cm;
1113
1114 /**
1115 * Connection request that was passed to
1116 * @ref uct_cm_listener_conn_request_args_t::conn_request.
1117 * @note After a call to @ref uct_ep_create, @a params.conn_request is
1118 * consumed and should not be used anymore, even if the call returns
1119 * with an error.
1120 */
1121 uct_conn_request_h conn_request;
1122
1123 /**
1124 * Callback that will be invoked when the endpoint on the client side
1125 * is being connected to the server by a connection manager @ref uct_cm_h .
1126 */
1127 uct_cm_ep_client_connect_callback_t sockaddr_cb_client;
1128
1129 /**
1130 * Callback that will be invoked when the endpoint on the server side
1131 * is being connected to a client by a connection manager @ref uct_cm_h .
1132 */
1133 uct_cm_ep_server_conn_notify_callback_t sockaddr_cb_server;
1134
1135 /**
1136 * Callback that will be invoked when the endpoint is disconnected.
1137 */
1138 uct_ep_disconnect_cb_t disconnect_cb;
1139
1140 /**
1141 * Index of the path which the endpoint should use, must be in the range
1142 * 0..(@ref uct_iface_attr_t.dev_num_paths - 1).
1143 */
1144 unsigned path_index;
1145 };
1146
1147
1148 /**
1149 * @ingroup UCT_CLIENT_SERVER
1150 * @brief Connection manager attributes, capabilities and limitations.
1151 */
1152 struct uct_cm_attr {
1153 /**
1154 * Mask of valid fields in this structure, using bits from
1155 * @ref uct_cm_attr_field. Fields not specified by this mask
1156 * will be ignored.
1157 */
1158 uint64_t field_mask;
1159
1160 /**
1161 * Max size of the connection manager's private data used for connection
1162 * establishment with sockaddr.
1163 */
1164 size_t max_conn_priv;
1165 };
1166
1167
1168 /**
1169 * @ingroup UCT_CLIENT_SERVER
1170 * @brief UCT listener attributes, capabilities and limitations.
1171 */
1172 struct uct_listener_attr {
1173 /**
1174 * Mask of valid fields in this structure, using bits from
1175 * @ref uct_listener_attr_field. Fields not specified by this mask
1176 * will be ignored.
1177 */
1178 uint64_t field_mask;
1179
1180 /**
1181 * Sockaddr on which this listener is listening.
1182 */
1183 struct sockaddr_storage sockaddr;
1184 };
1185
1186
1187 /**
1188 * @ingroup UCT_CLIENT_SERVER
1189 * @brief Parameters for creating a listener object @ref uct_listener_h by
1190 * @ref uct_listener_create
1191 */
1192 struct uct_listener_params {
1193 /**
1194 * Mask of valid fields in this structure, using bits from
1195 * @ref uct_listener_params_field. Fields not specified by this mask
1196 * will be ignored.
1197 */
1198 uint64_t field_mask;
1199
1200 /**
1201 * Backlog of incoming connection requests.
1202 * If not specified, SOMAXCONN, as defined in <sys/socket.h>, will be used.
1203 */
1204 int backlog;
1205
1206 /**
1207 * Callback function for handling incoming connection requests.
1208 */
1209 uct_cm_listener_conn_request_callback_t conn_request_cb;
1210
1211 /**
1212 * User data associated with the listener.
1213 */
1214 void *user_data;
1215 };
1216
1217
1218 /**
1219 * @ingroup UCT_MD
1220 * @brief Memory domain attributes.
1221 *
1222 * This structure defines the attributes of a Memory Domain which includes
1223 * maximum memory that can be allocated, credentials required for accessing the memory,
1224 * and CPU mask indicating the proximity of CPUs.
1225 */
1226 struct uct_md_attr {
1227 struct {
1228 size_t max_alloc; /**< Maximal allocation size */
1229 size_t max_reg; /**< Maximal registration size */
1230 uint64_t flags; /**< UCT_MD_FLAG_xx */
1231 uint64_t reg_mem_types; /**< Bitmap of memory types that Memory Domain can be registered with */
1232 uint64_t detect_mem_types; /**< Bitmap of memory types that Memory Domain can detect if address belongs to it */
1233 ucs_memory_type_t access_mem_type; /**< Memory type that Memory Domain can access */
1234 } cap;
1235
1236 ucs_linear_func_t reg_cost; /**< Memory registration cost estimation
1237 (time,seconds) as a linear function
1238 of the buffer size. */
1239
1240 char component_name[UCT_COMPONENT_NAME_MAX]; /**< Component name */
1241 size_t rkey_packed_size; /**< Size of buffer needed for packed rkey */
1242 ucs_cpu_set_t local_cpus; /**< Mask of CPUs near the resource */
1243 };
1244
1245
1246 /**
1247 * @ingroup UCT_MD
1248 * @brief UCT MD memory attributes field mask
1249 *
1250 * The enumeration allows specifying which fields in @ref uct_md_mem_attr_t
1251 * are present.
1252 */
1253 enum uct_md_mem_attr_field {
1254 UCT_MD_MEM_ATTR_FIELD_MEM_TYPE = UCS_BIT(0), /**< Indicate if memory type
1255 is populated. E.g. CPU/GPU */
1256 UCT_MD_MEM_ATTR_FIELD_SYS_DEV = UCS_BIT(1) /**< Indicate if details of
1257 system device backing
1258 the pointer are populated.
1259 E.g. NUMA/GPU */
1260 };
1261
1262
1263 /**
1264 * @ingroup UCT_MD
1265 * @brief Memory domain attributes.
1266 *
1267 * This structure defines the attributes of a memory pointer which may
1268 * include the memory type of the pointer, and the system device that backs
1269 * the pointer depending on the bit fields populated in field_mask.
1270 */
1271 typedef struct uct_md_mem_attr {
1272 /**
1273 * Mask of valid fields in this structure, using bits from
1274 * @ref uct_md_mem_attr_t. Note that the field mask is
1275 * populated upon return from uct_md_mem_query and not set by user.
1276 * Subsequent use of members of the structure are valid after ensuring that
1277 * relevant bits in the field_mask are set.
1278 */
1279 uint64_t field_mask;
1280
1281 /**
1282 * The type of memory. E.g. CPU/GPU memory or some other valid type
1283 */
1284 ucs_memory_type_t mem_type;
1285
1286 /**
1287 * Index of the system device on which the buffer resides. eg: NUMA/GPU
1288 */
1289 ucs_sys_device_t sys_dev;
1290 } uct_md_mem_attr_t;
1291
1292
1293 /**
1294 * @ingroup UCT_MD
1295 * @brief Query attributes of a given pointer
1296 *
1297 * Return attributes such as memory type, and system device for the
1298 * given pointer of specific length.
1299 *
1300 * @param [in] md Memory domain to run the query on. This function
1301 * returns an error if the md does not recognize the
1302 * pointer.
1303 * @param [in] address The address of the pointer. Must be non-NULL
1304 * else UCS_ERR_INVALID_PARAM error is returned.
1305 * @param [in] length Length of the memory region to examine.
1306 * Must be nonzero else UCS_ERR_INVALID_PARAM error
1307 * is returned.
1308 * @param [out] mem_attr If successful, filled with ptr attributes.
1309 *
1310 * @return Error code.
1311 */
1312 ucs_status_t uct_md_mem_query(uct_md_h md, const void *address, const size_t length,
1313 uct_md_mem_attr_t *mem_attr);
1314
1315
1316 /**
1317 * @ingroup UCT_MD
1318 * @brief Describes a memory allocated by UCT.
1319 *
1320 * This structure describes the memory block which includes the address, size, and
1321 * Memory Domain used for allocation. This structure is passed to interface
1322 * and the memory is allocated by memory allocation functions @ref uct_mem_alloc.
1323 */
1324 typedef struct uct_allocated_memory {
1325 void *address; /**< Address of allocated memory */
1326 size_t length; /**< Real size of allocated memory */
1327 uct_alloc_method_t method; /**< Method used to allocate the memory */
1328 ucs_memory_type_t mem_type; /**< type of allocated memory */
1329 uct_md_h md; /**< if method==MD: MD used to allocate the memory */
1330 uct_mem_h memh; /**< if method==MD: MD memory handle */
1331 } uct_allocated_memory_t;
1332
1333
1334 /**
1335 * @ingroup UCT_MD
1336 * @brief Remote key with its type
1337 *
1338 * This structure describes the credentials (typically key) and information
1339 * required to access the remote memory by the communication interfaces.
1340 */
1341 typedef struct uct_rkey_bundle {
1342 uct_rkey_t rkey; /**< Remote key descriptor, passed to RMA functions */
1343 void *handle; /**< Handle, used internally for releasing the key */
1344 void *type; /**< Remote key type */
1345 } uct_rkey_bundle_t;
1346
1347
1348 /**
1349 * @ingroup UCT_RESOURCE
1350 * @brief Completion handle.
1351 *
1352 * This structure should be allocated by the user and can be passed to communication
1353 * primitives. User has to initializes both fields of the structure.
1354 * If the operation returns UCS_INPROGRESS, this structure will be in use by the
1355 * transport until the operation completes. When the operation completes, "count"
1356 * field is decremented by 1, and whenever it reaches 0 - the callback is called.
1357 *
1358 * Notes:
1359 * - The same structure can be passed multiple times to communication functions
1360 * without the need to wait for completion.
1361 * - If the number of operations is smaller than the initial value of the counter,
1362 * the callback will not be called at all, so it may be left undefined.
1363 */
1364 struct uct_completion {
1365 uct_completion_callback_t func; /**< User callback function */
1366 int count; /**< Completion counter */
1367 };
1368
1369
1370 /**
1371 * @ingroup UCT_RESOURCE
1372 * @brief Pending request.
1373 *
1374 * This structure should be passed to @ref uct_ep_pending_add() and is used to signal
1375 * new available resources back to user.
1376 */
1377 struct uct_pending_req {
1378 uct_pending_callback_t func; /**< User callback function */
1379 char priv[UCT_PENDING_REQ_PRIV_LEN]; /**< Used internally by UCT */
1380 };
1381
1382
1383 /**
1384 * @ingroup UCT_TAG
1385 * @brief Posted tag context.
1386 *
1387 * Tag context is an object which tracks a tag posted to the transport. It
1388 * contains callbacks for matching events on this tag.
1389 */
1390 struct uct_tag_context {
1391 /**
1392 * Tag is consumed by the transport and should not be matched in software.
1393 *
1394 * @param [in] self Pointer to relevant context structure, which was
1395 * initially passed to @ref uct_iface_tag_recv_zcopy.
1396 */
1397 void (*tag_consumed_cb)(uct_tag_context_t *self);
1398
1399 /**
1400 * Tag processing is completed by the transport.
1401 *
1402 * @param [in] self Pointer to relevant context structure, which was
1403 * initially passed to @ref uct_iface_tag_recv_zcopy.
1404 * @param [in] stag Tag from sender.
1405 * @param [in] imm Immediate data from sender. For rendezvous, it's always 0.
1406 * @param [in] length Completed length.
1407 * @param [in] status Completion status:
1408 * (a) UCS_OK - Success, data placed in provided buffer.
1409 * (b) UCS_ERR_TRUNCATED - Sender's length exceed posted
1410 buffer, no data is copied.
1411 * (c) UCS_ERR_CANCELED - Canceled by user.
1412 */
1413 void (*completed_cb)(uct_tag_context_t *self, uct_tag_t stag, uint64_t imm,
1414 size_t length, ucs_status_t status);
1415
1416 /**
1417 * Tag was matched by a rendezvous request, which should be completed by
1418 * the protocol layer.
1419 *
1420 * @param [in] self Pointer to relevant context structure, which was
1421 * initially passed to @ref uct_iface_tag_recv_zcopy.
1422 * @param [in] stag Tag from sender.
1423 * @param [in] header User defined header.
1424 * @param [in] header_length User defined header length in bytes.
1425 * @param [in] status Completion status.
1426 */
1427 void (*rndv_cb)(uct_tag_context_t *self, uct_tag_t stag, const void *header,
1428 unsigned header_length, ucs_status_t status);
1429
1430 /** A placeholder for the private data used by the transport */
1431 char priv[UCT_TAG_PRIV_LEN];
1432 };
1433
1434
1435 extern const char *uct_alloc_method_names[];
1436
1437
1438 /**
1439 * @ingroup UCT_RESOURCE
1440 * @brief Query for list of components.
1441 *
1442 * Obtain the list of transport components available on the current system.
1443 *
1444 * @param [out] components_p Filled with a pointer to an array of component
1445 * handles.
1446 * @param [out] num_components_p Filled with the number of elements in the array.
1447 *
1448 * @return UCS_OK if successful, or UCS_ERR_NO_MEMORY if failed to allocate the
1449 * array of component handles.
1450 */
1451 ucs_status_t uct_query_components(uct_component_h **components_p,
1452 unsigned *num_components_p);
1453
1454 /**
1455 * @ingroup UCT_RESOURCE
1456 * @brief Release the list of components returned from @ref uct_query_components.
1457 *
1458 * This routine releases the memory associated with the list of components
1459 * allocated by @ref uct_query_components.
1460 *
1461 * @param [in] components Array of component handles to release.
1462 */
1463 void uct_release_component_list(uct_component_h *components);
1464
1465
1466 /**
1467 * @ingroup UCT_RESOURCE
1468 * @brief Get component attributes
1469 *
1470 * Query various attributes of a component.
1471 *
1472 * @param [in] component Component handle to query attributes for. The
1473 * handle can be obtained from @ref uct_query_components.
1474 * @param [inout] component_attr Filled with component attributes.
1475 *
1476 * @return UCS_OK if successful, or nonzero error code in case of failure.
1477 */
1478 ucs_status_t uct_component_query(uct_component_h component,
1479 uct_component_attr_t *component_attr);
1480
1481
1482 /**
1483 * @ingroup UCT_RESOURCE
1484 * @brief Open a memory domain.
1485 *
1486 * Open a specific memory domain. All communications and memory operations
1487 * are performed in the context of a specific memory domain. Therefore it
1488 * must be created before communication resources.
1489 *
1490 * @param [in] component Component on which to open the memory domain,
1491 * as returned from @ref uct_query_components.
1492 * @param [in] md_name Memory domain name, as returned from @ref
1493 * uct_component_query.
1494 * @param [in] config MD configuration options. Should be obtained
1495 * from uct_md_config_read() function, or point to
1496 * MD-specific structure which extends uct_md_config_t.
1497 * @param [out] md_p Filled with a handle to the memory domain.
1498 *
1499 * @return Error code.
1500 */
1501 ucs_status_t uct_md_open(uct_component_h component, const char *md_name,
1502 const uct_md_config_t *config, uct_md_h *md_p);
1503
1504 /**
1505 * @ingroup UCT_RESOURCE
1506 * @brief Close a memory domain.
1507 *
1508 * @param [in] md Memory domain to close.
1509 */
1510 void uct_md_close(uct_md_h md);
1511
1512
1513 /**
1514 * @ingroup UCT_RESOURCE
1515 * @brief Query for transport resources.
1516 *
1517 * This routine queries the @ref uct_md_h "memory domain" for communication
1518 * resources that are available for it.
1519 *
1520 * @param [in] md Handle to memory domain.
1521 * @param [out] resources_p Filled with a pointer to an array of resource
1522 * descriptors.
1523 * @param [out] num_resources_p Filled with the number of resources in the array.
1524 *
1525 * @return Error code.
1526 */
1527 ucs_status_t uct_md_query_tl_resources(uct_md_h md,
1528 uct_tl_resource_desc_t **resources_p,
1529 unsigned *num_resources_p);
1530
1531
1532 /**
1533 * @ingroup UCT_RESOURCE
1534 * @brief Release the list of resources returned from @ref uct_md_query_tl_resources.
1535 *
1536 * This routine releases the memory associated with the list of resources
1537 * allocated by @ref uct_md_query_tl_resources.
1538 *
1539 * @param [in] resources Array of resource descriptors to release.
1540 */
1541 void uct_release_tl_resource_list(uct_tl_resource_desc_t *resources);
1542
1543
1544 /**
1545 * @ingroup UCT_CONTEXT
1546 * @brief Create a worker object.
1547 *
1548 * The worker represents a progress engine. Multiple progress engines can be
1549 * created in an application, for example to be used by multiple threads.
1550 * Transports can allocate separate communication resources for every worker,
1551 * so that every worker can be progressed independently of others.
1552 *
1553 * @param [in] async Context for async event handlers. Must not be NULL.
1554 * @param [in] thread_mode Thread access mode to the worker and all interfaces
1555 * and endpoints associated with it.
1556 * @param [out] worker_p Filled with a pointer to the worker object.
1557 */
1558 ucs_status_t uct_worker_create(ucs_async_context_t *async,
1559 ucs_thread_mode_t thread_mode,
1560 uct_worker_h *worker_p);
1561
1562
1563 /**
1564 * @ingroup UCT_CONTEXT
1565 * @brief Destroy a worker object.
1566 *
1567 * @param [in] worker Worker object to destroy.
1568 */
1569 void uct_worker_destroy(uct_worker_h worker);
1570
1571
1572 /**
1573 * @ingroup UCT_CONTEXT
1574 * @brief Add a slow path callback function to a worker progress.
1575 *
1576 * If *id_p is equal to UCS_CALLBACKQ_ID_NULL, this function will add a callback
1577 * which will be invoked every time progress is made on the worker. *id_p will
1578 * be updated with an id which refers to this callback and can be used in
1579 * @ref uct_worker_progress_unregister_safe to remove it from the progress path.
1580 *
1581 * @param [in] worker Handle to the worker whose progress should invoke
1582 * the callback.
1583 * @param [in] func Pointer to the callback function.
1584 * @param [in] arg Argument for the callback function.
1585 * @param [in] flags Callback flags, see @ref ucs_callbackq_flags.
1586 * @param [inout] id_p Points to a location to store a callback identifier.
1587 * If *id_p is equal to UCS_CALLBACKQ_ID_NULL, a
1588 * callback will be added and *id_p will be replaced
1589 * with a callback identifier which can be subsequently
1590 * used to remove the callback. Otherwise, no callback
1591 * will be added and *id_p will be left unchanged.
1592 *
1593 * @note This function is thread safe.
1594 */
1595 void uct_worker_progress_register_safe(uct_worker_h worker, ucs_callback_t func,
1596 void *arg, unsigned flags,
1597 uct_worker_cb_id_t *id_p);
1598
1599
1600 /**
1601 * @ingroup UCT_CONTEXT
1602 * @brief Remove a slow path callback function from worker's progress.
1603 *
1604 * If *id_p is not equal to UCS_CALLBACKQ_ID_NULL, remove a callback which was
1605 * previously added by @ref uct_worker_progress_register_safe. *id_p will be reset
1606 * to UCS_CALLBACKQ_ID_NULL.
1607 *
1608 * @param [in] worker Handle to the worker whose progress should invoke
1609 * the callback.
1610 * @param [inout] id_p Points to a callback identifier which indicates
1611 * the callback to remove. If *id_p is not equal to
1612 * UCS_CALLBACKQ_ID_NULL, the callback will be removed
1613 * and *id_p will be reset to UCS_CALLBACKQ_ID_NULL.
1614 * If *id_p is equal to UCS_CALLBACKQ_ID_NULL, no
1615 * operation will be performed and *id_p will be
1616 * left unchanged.
1617 *
1618 * @note This function is thread safe.
1619 */
1620 void uct_worker_progress_unregister_safe(uct_worker_h worker,
1621 uct_worker_cb_id_t *id_p);
1622
1623
1624 /**
1625 * @ingroup UCT_RESOURCE
1626 * @brief Read transport-specific interface configuration.
1627 *
1628 * @param [in] md Memory domain on which the transport's interface
1629 * was registered.
1630 * @param [in] tl_name Transport name. If @e md supports
1631 * @ref UCT_MD_FLAG_SOCKADDR, the transport name
1632 * is allowed to be NULL. In this case, the configuration
1633 * returned from this routine should be passed to
1634 * @ref uct_iface_open with
1635 * @ref UCT_IFACE_OPEN_MODE_SOCKADDR_SERVER or
1636 * @ref UCT_IFACE_OPEN_MODE_SOCKADDR_CLIENT set in
1637 * @ref uct_iface_params_t.open_mode.
1638 * In addition, if tl_name is not NULL, the configuration
1639 * returned from this routine should be passed to
1640 * @ref uct_iface_open with @ref UCT_IFACE_OPEN_MODE_DEVICE
1641 * set in @ref uct_iface_params_t.open_mode.
1642 * @param [in] env_prefix If non-NULL, search for environment variables
1643 * starting with this UCT_<prefix>_. Otherwise, search
1644 * for environment variables starting with just UCT_.
1645 * @param [in] filename If non-NULL, read configuration from this file. If
1646 * the file does not exist, it will be ignored.
1647 * @param [out] config_p Filled with a pointer to configuration.
1648 *
1649 * @return Error code.
1650 */
1651 ucs_status_t uct_md_iface_config_read(uct_md_h md, const char *tl_name,
1652 const char *env_prefix, const char *filename,
1653 uct_iface_config_t **config_p);
1654
1655
1656 /**
1657 * @ingroup UCT_RESOURCE
1658 * @brief Release configuration memory returned from uct_md_iface_config_read(),
1659 * uct_md_config_read(), or from uct_cm_config_read().
1660 *
1661 * @param [in] config Configuration to release.
1662 */
1663 void uct_config_release(void *config);
1664
1665
1666 /**
1667 * @ingroup UCT_CONTEXT
1668 * @brief Get value by name from interface configuration (@ref uct_iface_config_t),
1669 * memory domain configuration (@ref uct_md_config_t)
1670 * or connection manager configuration (@ref uct_cm_config_t).
1671 *
1672 * @param [in] config Configuration to get from.
1673 * @param [in] name Configuration variable name.
1674 * @param [out] value Pointer to get value. Should be allocated/freed by
1675 * caller.
1676 * @param [in] max Available memory space at @a value pointer.
1677 *
1678 * @return UCS_OK if found, otherwise UCS_ERR_INVALID_PARAM or UCS_ERR_NO_ELEM
1679 * if error.
1680 */
1681 ucs_status_t uct_config_get(void *config, const char *name, char *value,
1682 size_t max);
1683
1684
1685 /**
1686 * @ingroup UCT_CONTEXT
1687 * @brief Modify interface configuration (@ref uct_iface_config_t),
1688 * memory domain configuration (@ref uct_md_config_t)
1689 * or connection manager configuration (@ref uct_cm_config_t).
1690 *
1691 * @param [in] config Configuration to modify.
1692 * @param [in] name Configuration variable name.
1693 * @param [in] value Value to set.
1694 *
1695 * @return Error code.
1696 */
1697 ucs_status_t uct_config_modify(void *config, const char *name, const char *value);
1698
1699
1700 /**
1701 * @ingroup UCT_RESOURCE
1702 * @brief Open a communication interface.
1703 *
1704 * @param [in] md Memory domain to create the interface on.
1705 * @param [in] worker Handle to worker which will be used to progress
1706 * communications on this interface.
1707 * @param [in] params User defined @ref uct_iface_params_t parameters.
1708 * @param [in] config Interface configuration options. Should be obtained
1709 * from uct_md_iface_config_read() function, or point to
1710 * transport-specific structure which extends uct_iface_config_t.
1711 * @param [out] iface_p Filled with a handle to opened communication interface.
1712 *
1713 * @return Error code.
1714 */
1715 ucs_status_t uct_iface_open(uct_md_h md, uct_worker_h worker,
1716 const uct_iface_params_t *params,
1717 const uct_iface_config_t *config,
1718 uct_iface_h *iface_p);
1719
1720
1721 /**
1722 * @ingroup UCT_RESOURCE
1723 * @brief Close and destroy an interface.
1724 *
1725 * @param [in] iface Interface to close.
1726 */
1727 void uct_iface_close(uct_iface_h iface);
1728
1729
1730 /**
1731 * @ingroup UCT_RESOURCE
1732 * @brief Get interface attributes.
1733 *
1734 * @param [in] iface Interface to query.
1735 * @param [out] iface_attr Filled with interface attributes.
1736 */
1737 ucs_status_t uct_iface_query(uct_iface_h iface, uct_iface_attr_t *iface_attr);
1738
1739
1740 /**
1741 * @ingroup UCT_RESOURCE
1742 * @brief Get address of the device the interface is using.
1743 *
1744 * Get underlying device address of the interface. All interfaces using the same
1745 * device would return the same address.
1746 *
1747 * @param [in] iface Interface to query.
1748 * @param [out] addr Filled with device address. The size of the buffer
1749 * provided must be at least @ref uct_iface_attr_t::device_addr_len.
1750 */
1751 ucs_status_t uct_iface_get_device_address(uct_iface_h iface, uct_device_addr_t *addr);
1752
1753
1754 /**
1755 * @ingroup UCT_RESOURCE
1756 * @brief Get interface address.
1757 *
1758 * requires @ref UCT_IFACE_FLAG_CONNECT_TO_IFACE.
1759 *
1760 * @param [in] iface Interface to query.
1761 * @param [out] addr Filled with interface address. The size of the buffer
1762 * provided must be at least @ref uct_iface_attr_t::iface_addr_len.
1763 */
1764 ucs_status_t uct_iface_get_address(uct_iface_h iface, uct_iface_addr_t *addr);
1765
1766
1767 /**
1768 * @ingroup UCT_RESOURCE
1769 * @brief Check if remote iface address is reachable.
1770 *
1771 * This function checks if a remote address can be reached from a local interface.
1772 * If the function returns true, it does not necessarily mean a connection and/or
1773 * data transfer would succeed, since the reachability check is a local operation
1774 * it does not detect issues such as network mis-configuration or lack of connectivity.
1775 *
1776 * @param [in] iface Interface to check reachability from.
1777 * @param [in] dev_addr Device address to check reachability to. It is NULL
1778 * if iface_attr.dev_addr_len == 0, and must be non-NULL otherwise.
1779 * @param [in] iface_addr Interface address to check reachability to. It is
1780 * NULL if iface_attr.iface_addr_len == 0, and must
1781 * be non-NULL otherwise.
1782 *
1783 * @return Nonzero if reachable, 0 if not.
1784 */
1785 int uct_iface_is_reachable(const uct_iface_h iface, const uct_device_addr_t *dev_addr,
1786 const uct_iface_addr_t *iface_addr);
1787
1788
1789 /**
1790 * @ingroup UCT_RESOURCE
1791 * @brief check if the destination endpoint is alive in respect to UCT library
1792 *
1793 * This function checks if the destination endpoint is alive with respect to the
1794 * UCT library. If the status of @a ep is known, either @ref UCS_OK or an error
1795 * is returned immediately. Otherwise, @ref UCS_INPROGRESS is returned,
1796 * indicating that synchronization on the status is needed. In this case, the
1797 * status will be be propagated by @a comp callback.
1798 *
1799 * @param [in] ep Endpoint to check
1800 * @param [in] flags Flags that define level of check
1801 * (currently unsupported - set to 0).
1802 * @param [in] comp Handler to process status of @a ep
1803 *
1804 * @return Error code.
1805 */
1806 ucs_status_t uct_ep_check(const uct_ep_h ep, unsigned flags,
1807 uct_completion_t *comp);
1808
1809
1810 /**
1811 * @ingroup UCT_RESOURCE
1812 * @brief Obtain a notification file descriptor for polling.
1813 *
1814 * Only interfaces that support at least one of the UCT_IFACE_FLAG_EVENT* flags
1815 * will implement this function.
1816 *
1817 * @param [in] iface Interface to get the notification descriptor.
1818 * @param [out] fd_p Location to write the notification file descriptor.
1819 *
1820 * @return Error code.
1821 */
1822 ucs_status_t uct_iface_event_fd_get(uct_iface_h iface, int *fd_p);
1823
1824
1825 /**
1826 * @ingroup UCT_RESOURCE
1827 * @brief Turn on event notification for the next event.
1828 *
1829 * This routine needs to be called before waiting on each notification on this
1830 * interface, so will typically be called once the processing of the previous
1831 * event is over.
1832 *
1833 * @param [in] iface Interface to arm.
1834 * @param [in] events Events to wakeup on. See @ref uct_iface_event_types
1835 *
1836 * @return ::UCS_OK The operation completed successfully. File descriptor
1837 * will be signaled by new events.
1838 * @return ::UCS_ERR_BUSY There are unprocessed events which prevent the
1839 * file descriptor from being armed.
1840 * The operation is not completed. File descriptor
1841 * will not be signaled by new events.
1842 * @return @ref ucs_status_t "Other" different error codes in case of issues.
1843 */
1844 ucs_status_t uct_iface_event_arm(uct_iface_h iface, unsigned events);
1845
1846
1847 /**
1848 * @ingroup UCT_RESOURCE
1849 * @brief Allocate memory which can be used for zero-copy communications.
1850 *
1851 * Allocate a region of memory which can be used for zero-copy data transfer or
1852 * remote access on a particular transport interface.
1853 *
1854 * @param [in] iface Interface to allocate memory on.
1855 * @param [in] length Size of memory region to allocate.
1856 * @param [in] flags Memory allocation flags, see @ref uct_md_mem_flags.
1857 * @param [in] name Allocation name, for debug purposes.
1858 * @param [out] mem Descriptor of allocated memory.
1859 *
1860 * @return UCS_OK if allocation was successful, error code otherwise.
1861 */
1862 ucs_status_t uct_iface_mem_alloc(uct_iface_h iface, size_t length, unsigned flags,
1863 const char *name, uct_allocated_memory_t *mem);
1864
1865
1866 /**
1867 * @ingroup UCT_RESOURCE
1868 * @brief Release memory allocated with @ref uct_iface_mem_alloc().
1869 *
1870 * @param [in] mem Descriptor of memory to release.
1871 */
1872 void uct_iface_mem_free(const uct_allocated_memory_t *mem);
1873
1874
1875 /**
1876 * @ingroup UCT_AM
1877 * @brief Set active message handler for the interface.
1878 *
1879 * Only one handler can be set of each active message ID, and setting a handler
1880 * replaces the previous value. If cb == NULL, the current handler is removed.
1881 *
1882 *
1883 * @param [in] iface Interface to set the active message handler for.
1884 * @param [in] id Active message id. Must be 0..UCT_AM_ID_MAX-1.
1885 * @param [in] cb Active message callback. NULL to clear.
1886 * @param [in] arg Active message argument.
1887 * @param [in] flags Required @ref uct_cb_flags "callback flags"
1888 *
1889 * @return error code if the interface does not support active messages or
1890 * requested callback flags
1891 */
1892 ucs_status_t uct_iface_set_am_handler(uct_iface_h iface, uint8_t id,
1893 uct_am_callback_t cb, void *arg, uint32_t flags);
1894
1895
1896 /**
1897 * @ingroup UCT_AM
1898 * @brief Set active message tracer for the interface.
1899 *
1900 * Sets a function which dumps active message debug information to a buffer,
1901 * which is printed every time an active message is sent or received, when
1902 * data tracing is on. Without the tracer, only transport-level information is
1903 * printed.
1904 *
1905 * @param [in] iface Interface to set the active message tracer for.
1906 * @param [in] tracer Active message tracer. NULL to clear.
1907 * @param [in] arg Tracer custom argument.
1908 */
1909 ucs_status_t uct_iface_set_am_tracer(uct_iface_h iface, uct_am_tracer_t tracer,
1910 void *arg);
1911
1912
1913 /**
1914 * @ingroup UCT_CLIENT_SERVER
1915 * @brief Accept connection request.
1916 *
1917 * @param [in] iface Transport interface which generated connection
1918 * request @a conn_request.
1919 * @param [in] conn_request Connection establishment request passed as parameter
1920 * of @ref uct_sockaddr_conn_request_callback_t.
1921 *
1922 * @return Error code as defined by @ref ucs_status_t
1923 */
1924 ucs_status_t uct_iface_accept(uct_iface_h iface,
1925 uct_conn_request_h conn_request);
1926
1927
1928 /**
1929 * @ingroup UCT_CLIENT_SERVER
1930 * @brief Reject connection request. Will invoke an error handler @ref
1931 * uct_error_handler_t on the remote transport interface, if set.
1932 *
1933 * @param [in] iface Interface which generated connection establishment
1934 * request @a conn_request.
1935 * @param [in] conn_request Connection establishment request passed as parameter
1936 * of @ref uct_sockaddr_conn_request_callback_t.
1937 *
1938 * @return Error code as defined by @ref ucs_status_t
1939 */
1940 ucs_status_t uct_iface_reject(uct_iface_h iface,
1941 uct_conn_request_h conn_request);
1942
1943
1944 /**
1945 * @ingroup UCT_RESOURCE
1946 * @brief Create new endpoint.
1947 *
1948 * Create a UCT endpoint in one of the available modes:
1949 * -# Unconnected endpoint: If no any address is present in @ref uct_ep_params,
1950 * this creates an unconnected endpoint. To establish a connection to a
1951 * remote endpoint, @ref uct_ep_connect_to_ep will need to be called. Use of
1952 * this mode requires @ref uct_ep_params_t::iface has the
1953 * @ref UCT_IFACE_FLAG_CONNECT_TO_EP capability flag. It may be obtained by
1954 * @ref uct_iface_query .
1955 * -# Connect to a remote interface: If @ref uct_ep_params_t::dev_addr and
1956 * @ref uct_ep_params_t::iface_addr are set, this will establish an endpoint
1957 * that is connected to a remote interface. This requires that
1958 * @ref uct_ep_params_t::iface has the @ref UCT_IFACE_FLAG_CONNECT_TO_IFACE
1959 * capability flag. It may be obtained by @ref uct_iface_query.
1960 * -# Connect to a remote socket address: If @ref uct_ep_params_t::sockaddr is
1961 * set, this will create an endpoint that is connected to a remote socket.
1962 * This requires that either @ref uct_ep_params::cm, or
1963 * @ref uct_ep_params::iface will be set. In the latter case, the interface
1964 * has to support @ref UCT_IFACE_FLAG_CONNECT_TO_SOCKADDR flag, which can be
1965 * checked by calling @ref uct_iface_query.
1966 * @param [in] params User defined @ref uct_ep_params_t configuration for the
1967 * @a ep_p.
1968 * @param [out] ep_p Filled with handle to the new endpoint.
1969 *
1970 * @return UCS_OK The endpoint is created successfully. This does not
1971 * guarantee that the endpoint has been connected to
1972 * the destination defined in @a params; in case of failure,
1973 * the error will be reported to the interface error
1974 * handler callback provided to @ref uct_iface_open
1975 * via @ref uct_iface_params_t.err_handler.
1976 * @return Error code as defined by @ref ucs_status_t
1977 */
1978 ucs_status_t uct_ep_create(const uct_ep_params_t *params, uct_ep_h *ep_p);
1979
1980
1981 /**
1982 * @ingroup UCT_CLIENT_SERVER
1983 * @brief Initiate a disconnection of an endpoint connected to a
1984 * sockaddr by a connection manager @ref uct_cm_h.
1985 *
1986 * This non-blocking routine will send a disconnect notification on the endpoint,
1987 * so that @ref uct_ep_disconnect_cb_t will be called on the remote peer.
1988 * The remote side should also call this routine when handling the initiator's
1989 * disconnect.
1990 * After a call to this function, the given endpoint may not be used for
1991 * communications anymore.
1992 * The @ref uct_ep_flush / @ref uct_iface_flush routines will guarantee that the
1993 * disconnect notification is delivered to the remote peer.
1994 * @ref uct_ep_destroy should be called on this endpoint after invoking this
1995 * routine and @ref uct_ep_params::disconnect_cb was called.
1996 *
1997 * @param [in] ep Endpoint to disconnect.
1998 * @param [in] flags Reserved for future use.
1999 *
2000 * @return UCS_OK Operation has completed successfully.
2001 * UCS_ERR_BUSY The @a ep is not connected yet (either
2002 * @ref uct_cm_ep_client_connect_callback_t or
2003 * @ref uct_cm_ep_server_conn_notify_callback_t
2004 * was not invoked).
2005 * UCS_INPROGRESS The disconnect request has been initiated, but
2006 * the remote peer has not yet responded to this
2007 * request, and consequently the registered
2008 * callback @ref uct_ep_disconnect_cb_t has not
2009 * been invoked to handle the request.
2010 * UCS_ERR_NOT_CONNECTED The @a ep is disconnected locally and remotely.
2011 * Other error codes as defined by @ref ucs_status_t .
2012 */
2013 ucs_status_t uct_ep_disconnect(uct_ep_h ep, unsigned flags);
2014
2015
2016 /**
2017 * @ingroup UCT_RESOURCE
2018 * @brief Destroy an endpoint.
2019 *
2020 * @param [in] ep Endpoint to destroy.
2021 */
2022 void uct_ep_destroy(uct_ep_h ep);
2023
2024
2025 /**
2026 * @ingroup UCT_RESOURCE
2027 * @brief Get endpoint address.
2028 *
2029 * @param [in] ep Endpoint to query.
2030 * @param [out] addr Filled with endpoint address. The size of the buffer
2031 * provided must be at least @ref uct_iface_attr_t::ep_addr_len.
2032 */
2033 ucs_status_t uct_ep_get_address(uct_ep_h ep, uct_ep_addr_t *addr);
2034
2035
2036 /**
2037 * @ingroup UCT_RESOURCE
2038 * @brief Connect endpoint to a remote endpoint.
2039 *
2040 * requires @ref UCT_IFACE_FLAG_CONNECT_TO_EP capability.
2041 *
2042 * @param [in] ep Endpoint to connect.
2043 * @param [in] dev_addr Remote device address.
2044 * @param [in] ep_addr Remote endpoint address.
2045 */
2046 ucs_status_t uct_ep_connect_to_ep(uct_ep_h ep, const uct_device_addr_t *dev_addr,
2047 const uct_ep_addr_t *ep_addr);
2048
2049
2050 /**
2051 * @ingroup UCT_MD
2052 * @brief Query for memory domain attributes.
2053 *
2054 * @param [in] md Memory domain to query.
2055 * @param [out] md_attr Filled with memory domain attributes.
2056 */
2057 ucs_status_t uct_md_query(uct_md_h md, uct_md_attr_t *md_attr);
2058
2059
2060 /**
2061 * @ingroup UCT_MD
2062 * @brief Allocate memory for zero-copy sends and remote access.
2063 *
2064 * Allocate memory on the memory domain. In order to use this function, MD
2065 * must support @ref UCT_MD_FLAG_ALLOC flag.
2066 *
2067 * @param [in] md Memory domain to allocate memory on.
2068 * @param [in,out] length_p Points to the size of memory to allocate. Upon successful
2069 * return, filled with the actual size that was allocated,
2070 * which may be larger than the one requested. Must be >0.
2071 * @param [in,out] address_p The address
2072 * @param [in] flags Memory allocation flags, see @ref uct_md_mem_flags.
2073 * @param [in] name Name of the allocated region, used to track memory
2074 * usage for debugging and profiling.
2075 * @param [out] memh_p Filled with handle for allocated region.
2076 */
2077 ucs_status_t uct_md_mem_alloc(uct_md_h md, size_t *length_p, void **address_p,
2078 unsigned flags, const char *name, uct_mem_h *memh_p);
2079
2080
2081 /**
2082 * @ingroup UCT_MD
2083 * @brief Release memory allocated by @ref uct_md_mem_alloc.
2084 *
2085 * @param [in] md Memory domain memory was allocated on.
2086 * @param [in] memh Memory handle, as returned from @ref uct_md_mem_alloc.
2087 */
2088 ucs_status_t uct_md_mem_free(uct_md_h md, uct_mem_h memh);
2089
2090
2091 /**
2092 * @ingroup UCT_MD
2093 * @brief Give advice about the use of memory
2094 *
2095 * This routine advises the UCT about how to handle memory range beginning at
2096 * address and size of length bytes. This call does not influence the semantics
2097 * of the application, but may influence its performance. The advice may be
2098 * ignored.
2099 *
2100 * @param [in] md Memory domain memory was allocated or registered on.
2101 * @param [in] memh Memory handle, as returned from @ref uct_md_mem_alloc
2102 * @param [in] addr Memory base address. Memory range must belong to the
2103 * @a memh
2104 * @param [in] length Length of memory to advise. Must be >0.
2105 * @param [in] advice Memory use advice as defined in the
2106 * @ref uct_mem_advice_t list
2107 */
2108 ucs_status_t uct_md_mem_advise(uct_md_h md, uct_mem_h memh, void *addr,
2109 size_t length, uct_mem_advice_t advice);
2110
2111
2112 /**
2113 * @ingroup UCT_MD
2114 * @brief Register memory for zero-copy sends and remote access.
2115 *
2116 * Register memory on the memory domain. In order to use this function, MD
2117 * must support @ref UCT_MD_FLAG_REG flag.
2118 *
2119 * @param [in] md Memory domain to register memory on.
2120 * @param [out] address Memory to register.
2121 * @param [in] length Size of memory to register. Must be >0.
2122 * @param [in] flags Memory allocation flags, see @ref uct_md_mem_flags.
2123 * @param [out] memh_p Filled with handle for allocated region.
2124 */
2125 ucs_status_t uct_md_mem_reg(uct_md_h md, void *address, size_t length,
2126 unsigned flags, uct_mem_h *memh_p);
2127
2128
2129 /**
2130 * @ingroup UCT_MD
2131 * @brief Undo the operation of @ref uct_md_mem_reg().
2132 *
2133 * @param [in] md Memory domain which was used to register the memory.
2134 * @param [in] memh Local access key to memory region.
2135 */
2136 ucs_status_t uct_md_mem_dereg(uct_md_h md, uct_mem_h memh);
2137
2138
2139 /**
2140 * @ingroup UCT_MD
2141 * @brief Detect memory type
2142 *
2143 *
2144 * @param [in] md Memory domain to detect memory type
2145 * @param [in] addr Memory address to detect.
2146 * @param [in] length Size of memory
2147 * @param [out] mem_type_p Filled with memory type of the address range if
2148 function succeeds
2149 * @return UCS_OK If memory type is successfully detected
2150 * UCS_ERR_INVALID_ADDR If failed to detect memory type
2151 */
2152 ucs_status_t uct_md_detect_memory_type(uct_md_h md, const void *addr,
2153 size_t length,
2154 ucs_memory_type_t *mem_type_p);
2155
2156
2157 /**
2158 * @ingroup UCT_MD
2159 * @brief Allocate memory for zero-copy communications and remote access.
2160 *
2161 * Allocate potentially registered memory. Every one of the provided allocation
2162 * methods will be used, in turn, to perform the allocation, until one succeeds.
2163 * Whenever the MD method is encountered, every one of the provided MDs will be
2164 * used, in turn, to allocate the memory, until one succeeds, or they are
2165 * exhausted. In this case the next allocation method from the initial list will
2166 * be attempted.
2167 *
2168 * @param [in] addr If @a addr is NULL, the underlying allocation routine
2169 * will choose the address at which to create the mapping.
2170 * If @a addr is non-NULL but UCT_MD_MEM_FLAG_FIXED is
2171 * not set, the address will be interpreted as a hint
2172 * as to where to establish the mapping. If @a addr is
2173 * non-NULL and UCT_MD_MEM_FLAG_FIXED is set, then
2174 * the specified address is interpreted as a requirement.
2175 * In this case, if the mapping to the exact address
2176 * cannot be made, the allocation request fails.
2177 * @param [in] min_length Minimal size to allocate. The actual size may be
2178 * larger, for example because of alignment restrictions.
2179 * @param [in] flags Memory allocation flags, see @ref uct_md_mem_flags.
2180 * @param [in] methods Array of memory allocation methods to attempt.
2181 * @param [in] num_methods Length of 'methods' array.
2182 * @param [in] mds Array of memory domains to attempt to allocate
2183 * the memory with, for MD allocation method.
2184 * @param [in] num_mds Length of 'mds' array. May be empty, in such case
2185 * 'mds' may be NULL, and MD allocation method will
2186 * be skipped.
2187 * @param [in] name Name of the allocation. Used for memory statistics.
2188 * @param [out] mem In case of success, filled with information about
2189 * the allocated memory. @ref uct_allocated_memory_t.
2190 */
2191 ucs_status_t uct_mem_alloc(void *addr, size_t min_length, unsigned flags,
2192 uct_alloc_method_t *methods, unsigned num_methods,
2193 uct_md_h *mds, unsigned num_mds, const char *name,
2194 uct_allocated_memory_t *mem);
2195
2196
2197 /**
2198 * @ingroup UCT_MD
2199 * @brief Release allocated memory.
2200 *
2201 * Release the memory allocated by @ref uct_mem_alloc.
2202 *
2203 * @param [in] mem Description of allocated memory, as returned from
2204 * @ref uct_mem_alloc.
2205 */
2206 ucs_status_t uct_mem_free(const uct_allocated_memory_t *mem);
2207
2208 /**
2209 * @ingroup UCT_MD
2210 * @brief Read the configuration for a memory domain.
2211 *
2212 * @param [in] component Read the configuration of this component.
2213 * @param [in] env_prefix If non-NULL, search for environment variables
2214 * starting with this UCT_<prefix>_. Otherwise, search
2215 * for environment variables starting with just UCT_.
2216 * @param [in] filename If non-NULL, read configuration from this file. If
2217 * the file does not exist, it will be ignored.
2218 * @param [out] config_p Filled with a pointer to the configuration.
2219 *
2220 * @return Error code.
2221 */
2222 ucs_status_t uct_md_config_read(uct_component_h component,
2223 const char *env_prefix, const char *filename,
2224 uct_md_config_t **config_p);
2225
2226
2227
2228 /**
2229 * @ingroup UCT_MD
2230 * @brief Check if remote sock address is accessible from the memory domain.
2231 *
2232 * This function checks if a remote sock address can be accessed from a local
2233 * memory domain. Accessibility can be checked in local or remote mode.
2234 *
2235 * @param [in] md Memory domain to check accessibility from.
2236 * This memory domain must support the @ref
2237 * UCT_MD_FLAG_SOCKADDR flag.
2238 * @param [in] sockaddr Socket address to check accessibility to.
2239 * @param [in] mode Mode for checking accessibility, as defined in @ref
2240 * uct_sockaddr_accessibility_t.
2241 * Indicates if accessibility is tested on the server side -
2242 * for binding to the given sockaddr, or on the
2243 * client side - for connecting to the given remote
2244 * peer's sockaddr.
2245 *
2246 * @return Nonzero if accessible, 0 if inaccessible.
2247 */
2248 int uct_md_is_sockaddr_accessible(uct_md_h md, const ucs_sock_addr_t *sockaddr,
2249 uct_sockaddr_accessibility_t mode);
2250
2251
2252 /**
2253 * @ingroup UCT_MD
2254 *
2255 * @brief Pack a remote key.
2256 *
2257 * @param [in] md Handle to memory domain.
2258 * @param [in] memh Local key, whose remote key should be packed.
2259 * @param [out] rkey_buffer Filled with packed remote key.
2260 *
2261 * @return Error code.
2262 */
2263 ucs_status_t uct_md_mkey_pack(uct_md_h md, uct_mem_h memh, void *rkey_buffer);
2264
2265
2266 /**
2267 * @ingroup UCT_MD
2268 *
2269 * @brief Unpack a remote key.
2270 *
2271 * @param [in] component Component on which to unpack the remote key.
2272 * @param [in] rkey_buffer Packed remote key buffer.
2273 * @param [out] rkey_ob Filled with the unpacked remote key and its type.
2274 *
2275 * @note The remote key must be unpacked with the same component that was used
2276 * to pack it. For example, if a remote device address on the remote
2277 * memory domain which was used to pack the key is reachable by a
2278 * transport on a local component, then that component is eligible to
2279 * unpack the key.
2280 * If the remote key buffer cannot be unpacked with the given component,
2281 * UCS_ERR_INVALID_PARAM will be returned.
2282 *
2283 * @return Error code.
2284 */
2285 ucs_status_t uct_rkey_unpack(uct_component_h component, const void *rkey_buffer,
2286 uct_rkey_bundle_t *rkey_ob);
2287
2288
2289 /**
2290 * @ingroup UCT_MD
2291 *
2292 * @brief Get a local pointer to remote memory.
2293 *
2294 * This routine returns a local pointer to the remote memory
2295 * described by the rkey bundle. The MD must support
2296 * @ref UCT_MD_FLAG_RKEY_PTR flag.
2297 *
2298 * @param [in] component Component on which to obtain the pointer to the
2299 * remote key.
2300 * @param [in] rkey_ob A remote key bundle as returned by
2301 * the @ref uct_rkey_unpack function.
2302 * @param [in] remote_addr A remote address within the memory area described
2303 * by the rkey_ob.
2304 * @param [out] addr_p A pointer that can be used for direct access to
2305 * the remote memory.
2306 *
2307 * @note The component used to obtain a local pointer to the remote memory must
2308 * be the same component that was used to pack the remote key. See notes
2309 * section for @ref uct_rkey_unpack.
2310 *
2311 * @return Error code if the remote memory cannot be accessed directly or
2312 * the remote address is not valid.
2313 */
2314 ucs_status_t uct_rkey_ptr(uct_component_h component, uct_rkey_bundle_t *rkey_ob,
2315 uint64_t remote_addr, void **addr_p);
2316
2317
2318 /**
2319 * @ingroup UCT_MD
2320 *
2321 * @brief Release a remote key.
2322 *
2323 * @param [in] component Component which was used to unpack the remote key.
2324 * @param [in] rkey_ob Remote key to release.
2325 */
2326 ucs_status_t uct_rkey_release(uct_component_h component,
2327 const uct_rkey_bundle_t *rkey_ob);
2328
2329
2330 /**
2331 * @ingroup UCT_CONTEXT
2332 * @brief Explicit progress for UCT worker.
2333 *
2334 * This routine explicitly progresses any outstanding communication operations
2335 * and active message requests.
2336 *
2337 * @note @li In the current implementation, users @b MUST call this routine
2338 * to receive the active message requests.
2339 *
2340 * @param [in] worker Handle to worker.
2341 *
2342 * @return Nonzero if any communication was progressed, zero otherwise.
2343 */
uct_worker_progress(uct_worker_h worker)2344 UCT_INLINE_API unsigned uct_worker_progress(uct_worker_h worker)
2345 {
2346 return ucs_callbackq_dispatch(&worker->progress_q);
2347 }
2348
2349
2350 /**
2351 * @ingroup UCT_RESOURCE
2352 * @brief Flush outstanding communication operations on an interface.
2353 *
2354 * Flushes all outstanding communications issued on the interface prior to
2355 * this call. The operations are completed at the origin or at the target
2356 * as well. The exact completion semantic depends on @a flags parameter.
2357 *
2358 * @note Currently only one completion type is supported. It guarantees that
2359 * the data transfer is completed but the target buffer may not be updated yet.
2360 *
2361 * @param [in] iface Interface to flush communications from.
2362 * @param [in] flags Flags that control completion semantic (currently only
2363 * @ref UCT_FLUSH_FLAG_LOCAL is supported).
2364 * @param [inout] comp Completion handle as defined by @ref uct_completion_t.
2365 * Can be NULL, which means that the call will return the
2366 * current state of the interface and no completion will
2367 * be generated in case of outstanding communications.
2368 * If it is not NULL completion counter is decremented
2369 * by 1 when the call completes. Completion callback is
2370 * called when the counter reaches 0.
2371 *
2372 *
2373 * @return UCS_OK - No outstanding communications left.
2374 * UCS_INPROGRESS - Some communication operations are still in progress.
2375 * If non-NULL 'comp' is provided, it will be updated
2376 * upon completion of these operations.
2377 */
uct_iface_flush(uct_iface_h iface,unsigned flags,uct_completion_t * comp)2378 UCT_INLINE_API ucs_status_t uct_iface_flush(uct_iface_h iface, unsigned flags,
2379 uct_completion_t *comp)
2380 {
2381 return iface->ops.iface_flush(iface, flags, comp);
2382 }
2383
2384 /**
2385 * @ingroup UCT_RESOURCE
2386 * @brief Ensures ordering of outstanding communications on the interface.
2387 * Operations issued on the interface prior to this call are guaranteed to
2388 * be completed before any subsequent communication operations to the same
2389 * interface which follow the call to fence.
2390 *
2391 * @param [in] iface Interface to issue communications from.
2392 * @param [in] flags Flags that control ordering semantic (currently
2393 * unsupported - set to 0).
2394 * @return UCS_OK - Ordering is inserted.
2395 */
2396
uct_iface_fence(uct_iface_h iface,unsigned flags)2397 UCT_INLINE_API ucs_status_t uct_iface_fence(uct_iface_h iface, unsigned flags)
2398 {
2399 return iface->ops.iface_fence(iface, flags);
2400 }
2401
2402 /**
2403 * @ingroup UCT_AM
2404 * @brief Release AM descriptor
2405 *
2406 * Release active message descriptor @a desc, which was passed to
2407 * @ref uct_am_callback_t "the active message callback", and owned by the callee.
2408 *
2409 * @param [in] desc Descriptor to release.
2410 */
uct_iface_release_desc(void * desc)2411 UCT_INLINE_API void uct_iface_release_desc(void *desc)
2412 {
2413 uct_recv_desc_t *release_desc = uct_recv_desc(desc);
2414 release_desc->cb(release_desc, desc);
2415 }
2416
2417
2418 /**
2419 * @ingroup UCT_RMA
2420 * @brief
2421 */
uct_ep_put_short(uct_ep_h ep,const void * buffer,unsigned length,uint64_t remote_addr,uct_rkey_t rkey)2422 UCT_INLINE_API ucs_status_t uct_ep_put_short(uct_ep_h ep, const void *buffer, unsigned length,
2423 uint64_t remote_addr, uct_rkey_t rkey)
2424 {
2425 return ep->iface->ops.ep_put_short(ep, buffer, length, remote_addr, rkey);
2426 }
2427
2428
2429 /**
2430 * @ingroup UCT_RMA
2431 * @brief
2432 */
uct_ep_put_bcopy(uct_ep_h ep,uct_pack_callback_t pack_cb,void * arg,uint64_t remote_addr,uct_rkey_t rkey)2433 UCT_INLINE_API ssize_t uct_ep_put_bcopy(uct_ep_h ep, uct_pack_callback_t pack_cb,
2434 void *arg, uint64_t remote_addr,
2435 uct_rkey_t rkey)
2436 {
2437 return ep->iface->ops.ep_put_bcopy(ep, pack_cb, arg, remote_addr, rkey);
2438 }
2439
2440
2441 /**
2442 * @ingroup UCT_RMA
2443 * @brief Write data to remote memory while avoiding local memory copy
2444 *
2445 * The input data in @a iov array of @ref ::uct_iov_t structures sent to remote
2446 * address ("gather output"). Buffers in @a iov are processed in array order.
2447 * This means that the function complete iov[0] before proceeding to
2448 * iov[1], and so on.
2449 *
2450 *
2451 * @param [in] ep Destination endpoint handle.
2452 * @param [in] iov Points to an array of @ref ::uct_iov_t structures.
2453 * The @a iov pointer must be a valid address of an array
2454 * of @ref ::uct_iov_t structures. A particular structure
2455 * pointer must be a valid address. A NULL terminated
2456 * array is not required.
2457 * @param [in] iovcnt Size of the @a iov data @ref ::uct_iov_t structures
2458 * array. If @a iovcnt is zero, the data is considered empty.
2459 * @a iovcnt is limited by @ref uct_iface_attr_cap_put_max_iov
2460 * "uct_iface_attr::cap::put::max_iov".
2461 * @param [in] remote_addr Remote address to place the @a iov data.
2462 * @param [in] rkey Remote key descriptor provided by @ref ::uct_rkey_unpack
2463 * @param [in] comp Completion handle as defined by @ref ::uct_completion_t.
2464 *
2465 * @return UCS_INPROGRESS Some communication operations are still in progress.
2466 * If non-NULL @a comp is provided, it will be updated
2467 * upon completion of these operations.
2468 *
2469 */
uct_ep_put_zcopy(uct_ep_h ep,const uct_iov_t * iov,size_t iovcnt,uint64_t remote_addr,uct_rkey_t rkey,uct_completion_t * comp)2470 UCT_INLINE_API ucs_status_t uct_ep_put_zcopy(uct_ep_h ep,
2471 const uct_iov_t *iov, size_t iovcnt,
2472 uint64_t remote_addr, uct_rkey_t rkey,
2473 uct_completion_t *comp)
2474 {
2475 return ep->iface->ops.ep_put_zcopy(ep, iov, iovcnt, remote_addr, rkey, comp);
2476 }
2477
2478
2479 /**
2480 * @ingroup UCT_RMA
2481 * @brief
2482 */
uct_ep_get_short(uct_ep_h ep,void * buffer,unsigned length,uint64_t remote_addr,uct_rkey_t rkey)2483 UCT_INLINE_API ucs_status_t uct_ep_get_short(uct_ep_h ep, void *buffer, unsigned length,
2484 uint64_t remote_addr, uct_rkey_t rkey)
2485 {
2486 return ep->iface->ops.ep_get_short(ep, buffer, length, remote_addr, rkey);
2487 }
2488
2489
2490 /**
2491 * @ingroup UCT_RMA
2492 * @brief
2493 */
uct_ep_get_bcopy(uct_ep_h ep,uct_unpack_callback_t unpack_cb,void * arg,size_t length,uint64_t remote_addr,uct_rkey_t rkey,uct_completion_t * comp)2494 UCT_INLINE_API ucs_status_t uct_ep_get_bcopy(uct_ep_h ep, uct_unpack_callback_t unpack_cb,
2495 void *arg, size_t length,
2496 uint64_t remote_addr, uct_rkey_t rkey,
2497 uct_completion_t *comp)
2498 {
2499 return ep->iface->ops.ep_get_bcopy(ep, unpack_cb, arg, length, remote_addr,
2500 rkey, comp);
2501 }
2502
2503
2504 /**
2505 * @ingroup UCT_RMA
2506 * @brief Read data from remote memory while avoiding local memory copy
2507 *
2508 * The output data in @a iov array of @ref ::uct_iov_t structures received from
2509 * remote address ("scatter input"). Buffers in @a iov are processed in array order.
2510 * This means that the function complete iov[0] before proceeding to
2511 * iov[1], and so on.
2512 *
2513 *
2514 * @param [in] ep Destination endpoint handle.
2515 * @param [in] iov Points to an array of @ref ::uct_iov_t structures.
2516 * The @a iov pointer must be a valid address of an array
2517 * of @ref ::uct_iov_t structures. A particular structure
2518 * pointer must be a valid address. A NULL terminated
2519 * array is not required.
2520 * @param [in] iovcnt Size of the @a iov data @ref ::uct_iov_t structures
2521 * array. If @a iovcnt is zero, the data is considered empty.
2522 * @a iovcnt is limited by @ref uct_iface_attr_cap_get_max_iov
2523 * "uct_iface_attr::cap::get::max_iov".
2524 * @param [in] remote_addr Remote address of the data placed to the @a iov.
2525 * @param [in] rkey Remote key descriptor provided by @ref ::uct_rkey_unpack
2526 * @param [in] comp Completion handle as defined by @ref ::uct_completion_t.
2527 *
2528 * @return UCS_INPROGRESS Some communication operations are still in progress.
2529 * If non-NULL @a comp is provided, it will be updated
2530 * upon completion of these operations.
2531 *
2532 */
uct_ep_get_zcopy(uct_ep_h ep,const uct_iov_t * iov,size_t iovcnt,uint64_t remote_addr,uct_rkey_t rkey,uct_completion_t * comp)2533 UCT_INLINE_API ucs_status_t uct_ep_get_zcopy(uct_ep_h ep,
2534 const uct_iov_t *iov, size_t iovcnt,
2535 uint64_t remote_addr, uct_rkey_t rkey,
2536 uct_completion_t *comp)
2537 {
2538 return ep->iface->ops.ep_get_zcopy(ep, iov, iovcnt, remote_addr, rkey, comp);
2539 }
2540
2541
2542 /**
2543 * @ingroup UCT_AM
2544 * @brief
2545 */
uct_ep_am_short(uct_ep_h ep,uint8_t id,uint64_t header,const void * payload,unsigned length)2546 UCT_INLINE_API ucs_status_t uct_ep_am_short(uct_ep_h ep, uint8_t id, uint64_t header,
2547 const void *payload, unsigned length)
2548 {
2549 return ep->iface->ops.ep_am_short(ep, id, header, payload, length);
2550 }
2551
2552
2553 /**
2554 * @ingroup UCT_AM
2555 * @brief
2556 */
uct_ep_am_bcopy(uct_ep_h ep,uint8_t id,uct_pack_callback_t pack_cb,void * arg,unsigned flags)2557 UCT_INLINE_API ssize_t uct_ep_am_bcopy(uct_ep_h ep, uint8_t id,
2558 uct_pack_callback_t pack_cb, void *arg,
2559 unsigned flags)
2560 {
2561 return ep->iface->ops.ep_am_bcopy(ep, id, pack_cb, arg, flags);
2562 }
2563
2564
2565 /**
2566 * @ingroup UCT_AM
2567 * @brief Send active message while avoiding local memory copy
2568 *
2569 * The input data in @a iov array of @ref ::uct_iov_t structures sent to remote
2570 * side ("gather output"). Buffers in @a iov are processed in array order.
2571 * This means that the function complete iov[0] before proceeding to
2572 * iov[1], and so on.
2573 *
2574 *
2575 * @param [in] ep Destination endpoint handle.
2576 * @param [in] id Active message id. Must be in range 0..UCT_AM_ID_MAX-1.
2577 * @param [in] header Active message header.
2578 * @param [in] header_length Active message header length in bytes.
2579 * @param [in] iov Points to an array of @ref ::uct_iov_t structures.
2580 * The @a iov pointer must be a valid address of an array
2581 * of @ref ::uct_iov_t structures. A particular structure
2582 * pointer must be a valid address. A NULL terminated
2583 * array is not required.
2584 * @param [in] iovcnt Size of the @a iov data @ref ::uct_iov_t structures
2585 * array. If @a iovcnt is zero, the data is considered empty.
2586 * @a iovcnt is limited by @ref uct_iface_attr_cap_am_max_iov
2587 * "uct_iface_attr::cap::am::max_iov".
2588 * @param [in] flags Active message flags, see @ref uct_msg_flags.
2589 * @param [in] comp Completion handle as defined by @ref ::uct_completion_t.
2590 *
2591 * @return UCS_OK Operation completed successfully.
2592 * @return UCS_INPROGRESS Some communication operations are still in progress.
2593 * If non-NULL @a comp is provided, it will be updated
2594 * upon completion of these operations.
2595 * @return UCS_ERR_NO_RESOURCE Could not start the operation due to lack of send
2596 * resources.
2597 *
2598 * @note If the operation returns @a UCS_INPROGRESS, the memory buffers
2599 * pointed to by @a iov array must not be modified until the operation
2600 * is completed by @a comp. @a header can be released or changed.
2601 */
uct_ep_am_zcopy(uct_ep_h ep,uint8_t id,const void * header,unsigned header_length,const uct_iov_t * iov,size_t iovcnt,unsigned flags,uct_completion_t * comp)2602 UCT_INLINE_API ucs_status_t uct_ep_am_zcopy(uct_ep_h ep, uint8_t id,
2603 const void *header,
2604 unsigned header_length,
2605 const uct_iov_t *iov, size_t iovcnt,
2606 unsigned flags,
2607 uct_completion_t *comp)
2608 {
2609 return ep->iface->ops.ep_am_zcopy(ep, id, header, header_length, iov, iovcnt,
2610 flags, comp);
2611 }
2612
2613 /**
2614 * @ingroup UCT_AMO
2615 * @brief
2616 */
uct_ep_atomic_cswap64(uct_ep_h ep,uint64_t compare,uint64_t swap,uint64_t remote_addr,uct_rkey_t rkey,uint64_t * result,uct_completion_t * comp)2617 UCT_INLINE_API ucs_status_t uct_ep_atomic_cswap64(uct_ep_h ep, uint64_t compare, uint64_t swap,
2618 uint64_t remote_addr, uct_rkey_t rkey,
2619 uint64_t *result, uct_completion_t *comp)
2620 {
2621 return ep->iface->ops.ep_atomic_cswap64(ep, compare, swap, remote_addr, rkey, result, comp);
2622 }
2623
2624
2625 /**
2626 * @ingroup UCT_AMO
2627 * @brief
2628 */
uct_ep_atomic_cswap32(uct_ep_h ep,uint32_t compare,uint32_t swap,uint64_t remote_addr,uct_rkey_t rkey,uint32_t * result,uct_completion_t * comp)2629 UCT_INLINE_API ucs_status_t uct_ep_atomic_cswap32(uct_ep_h ep, uint32_t compare, uint32_t swap,
2630 uint64_t remote_addr, uct_rkey_t rkey,
2631 uint32_t *result, uct_completion_t *comp)
2632 {
2633 return ep->iface->ops.ep_atomic_cswap32(ep, compare, swap, remote_addr, rkey, result, comp);
2634 }
2635
2636
2637 /**
2638 * @ingroup UCT_AMO
2639 * @brief
2640 */
uct_ep_atomic32_post(uct_ep_h ep,uct_atomic_op_t opcode,uint32_t value,uint64_t remote_addr,uct_rkey_t rkey)2641 UCT_INLINE_API ucs_status_t uct_ep_atomic32_post(uct_ep_h ep, uct_atomic_op_t opcode,
2642 uint32_t value, uint64_t remote_addr,
2643 uct_rkey_t rkey)
2644 {
2645 return ep->iface->ops.ep_atomic32_post(ep, opcode, value, remote_addr, rkey);
2646 }
2647
2648
2649 /**
2650 * @ingroup UCT_AMO
2651 * @brief
2652 */
uct_ep_atomic64_post(uct_ep_h ep,uct_atomic_op_t opcode,uint64_t value,uint64_t remote_addr,uct_rkey_t rkey)2653 UCT_INLINE_API ucs_status_t uct_ep_atomic64_post(uct_ep_h ep, uct_atomic_op_t opcode,
2654 uint64_t value, uint64_t remote_addr,
2655 uct_rkey_t rkey)
2656 {
2657 return ep->iface->ops.ep_atomic64_post(ep, opcode, value, remote_addr, rkey);
2658 }
2659
2660
2661 /**
2662 * @ingroup UCT_AMO
2663 * @brief
2664 */
uct_ep_atomic32_fetch(uct_ep_h ep,uct_atomic_op_t opcode,uint32_t value,uint32_t * result,uint64_t remote_addr,uct_rkey_t rkey,uct_completion_t * comp)2665 UCT_INLINE_API ucs_status_t uct_ep_atomic32_fetch(uct_ep_h ep, uct_atomic_op_t opcode,
2666 uint32_t value, uint32_t *result,
2667 uint64_t remote_addr, uct_rkey_t rkey,
2668 uct_completion_t *comp)
2669 {
2670 return ep->iface->ops.ep_atomic32_fetch(ep, opcode, value, result,
2671 remote_addr, rkey, comp);
2672 }
2673
2674
2675 /**
2676 * @ingroup UCT_AMO
2677 * @brief
2678 */
uct_ep_atomic64_fetch(uct_ep_h ep,uct_atomic_op_t opcode,uint64_t value,uint64_t * result,uint64_t remote_addr,uct_rkey_t rkey,uct_completion_t * comp)2679 UCT_INLINE_API ucs_status_t uct_ep_atomic64_fetch(uct_ep_h ep, uct_atomic_op_t opcode,
2680 uint64_t value, uint64_t *result,
2681 uint64_t remote_addr, uct_rkey_t rkey,
2682 uct_completion_t *comp)
2683 {
2684 return ep->iface->ops.ep_atomic64_fetch(ep, opcode, value, result,
2685 remote_addr, rkey, comp);
2686 }
2687
2688
2689 /**
2690 * @ingroup UCT_RESOURCE
2691 * @brief Add a pending request to an endpoint.
2692 *
2693 * Add a pending request to the endpoint pending queue. The request will be
2694 * dispatched when the endpoint could potentially have additional send resources.
2695 *
2696 * @param [in] ep Endpoint to add the pending request to.
2697 * @param [in] req Pending request, which would be dispatched when more
2698 * resources become available. The user is expected to initialize
2699 * the "func" field.
2700 * After being passed to the function, the request is owned by UCT,
2701 * until the callback is called and returns UCS_OK.
2702 * @param [in] flags Flags that control pending request processing (see @ref uct_cb_flags)
2703 *
2704 * @return UCS_OK - request added to pending queue
2705 * UCS_ERR_BUSY - request was not added to pending queue, because send
2706 * resources are available now. The user is advised to
2707 * retry.
2708 */
uct_ep_pending_add(uct_ep_h ep,uct_pending_req_t * req,unsigned flags)2709 UCT_INLINE_API ucs_status_t uct_ep_pending_add(uct_ep_h ep,
2710 uct_pending_req_t *req,
2711 unsigned flags)
2712 {
2713 return ep->iface->ops.ep_pending_add(ep, req, flags);
2714 }
2715
2716
2717 /**
2718 * @ingroup UCT_RESOURCE
2719 * @brief Remove all pending requests from an endpoint.
2720 *
2721 * Remove pending requests from the given endpoint and pass them to the provided
2722 * callback function. The callback return value is ignored.
2723 *
2724 * @param [in] ep Endpoint to remove pending requests from.
2725 * @param [in] cb Callback to pass the removed requests to.
2726 * @param [in] arg Argument to pass to the @a cb callback.
2727 */
uct_ep_pending_purge(uct_ep_h ep,uct_pending_purge_callback_t cb,void * arg)2728 UCT_INLINE_API void uct_ep_pending_purge(uct_ep_h ep,
2729 uct_pending_purge_callback_t cb,
2730 void *arg)
2731 {
2732 ep->iface->ops.ep_pending_purge(ep, cb, arg);
2733 }
2734
2735
2736 /**
2737 * @ingroup UCT_RESOURCE
2738 * @brief Flush outstanding communication operations on an endpoint.
2739 *
2740 * Flushes all outstanding communications issued on the endpoint prior to
2741 * this call. The operations are completed at the origin or at the target
2742 * as well. The exact completion semantic depends on @a flags parameter.
2743 *
2744 * @param [in] ep Endpoint to flush communications from.
2745 * @param [in] flags Flags @ref uct_flush_flags that control completion
2746 * semantic.
2747 * @param [inout] comp Completion handle as defined by @ref uct_completion_t.
2748 * Can be NULL, which means that the call will return the
2749 * current state of the endpoint and no completion will
2750 * be generated in case of outstanding communications.
2751 * If it is not NULL completion counter is decremented
2752 * by 1 when the call completes. Completion callback is
2753 * called when the counter reaches 0.
2754 *
2755 * @return UCS_OK - No outstanding communications left.
2756 * UCS_ERR_NO_RESOURCE - Flush operation could not be initiated. A subsequent
2757 * call to @ref uct_ep_pending_add would add a pending
2758 * operation, which provides an opportunity to retry
2759 * the flush.
2760 * UCS_INPROGRESS - Some communication operations are still in progress.
2761 * If non-NULL 'comp' is provided, it will be updated
2762 * upon completion of these operations.
2763 */
uct_ep_flush(uct_ep_h ep,unsigned flags,uct_completion_t * comp)2764 UCT_INLINE_API ucs_status_t uct_ep_flush(uct_ep_h ep, unsigned flags,
2765 uct_completion_t *comp)
2766 {
2767 return ep->iface->ops.ep_flush(ep, flags, comp);
2768 }
2769
2770
2771 /**
2772 * @ingroup UCT_RESOURCE
2773 * @brief Ensures ordering of outstanding communications on the endpoint.
2774 * Operations issued on the endpoint prior to this call are guaranteed to
2775 * be completed before any subsequent communication operations to the same
2776 * endpoint which follow the call to fence.
2777 *
2778 * @param [in] ep Endpoint to issue communications from.
2779 * @param [in] flags Flags that control ordering semantic (currently
2780 * unsupported - set to 0).
2781 * @return UCS_OK - Ordering is inserted.
2782 */
uct_ep_fence(uct_ep_h ep,unsigned flags)2783 UCT_INLINE_API ucs_status_t uct_ep_fence(uct_ep_h ep, unsigned flags)
2784 {
2785 return ep->iface->ops.ep_fence(ep, flags);
2786 }
2787
2788
2789 /**
2790 * @ingroup UCT_TAG
2791 * @brief Short eager tagged-send operation.
2792 *
2793 * This routine sends a message using @ref uct_short_protocol_desc "short"
2794 * eager protocol. Eager protocol means that the whole data is sent to the peer
2795 * immediately without any preceding notification.
2796 * The data is provided as buffer and its length,and must not be larger than the
2797 * corresponding @a max_short value in @ref uct_iface_attr.
2798 * The immediate value delivered to the receiver is implicitly equal to 0.
2799 * If it's required to pass nonzero imm value, @ref uct_ep_tag_eager_bcopy
2800 * should be used.
2801 *
2802 * @param [in] ep Destination endpoint handle.
2803 * @param [in] tag Tag to use for the eager message.
2804 * @param [in] data Data to send.
2805 * @param [in] length Data length.
2806 *
2807 * @return UCS_OK - operation completed successfully.
2808 * @return UCS_ERR_NO_RESOURCE - could not start the operation due to lack of
2809 * send resources.
2810 */
uct_ep_tag_eager_short(uct_ep_h ep,uct_tag_t tag,const void * data,size_t length)2811 UCT_INLINE_API ucs_status_t uct_ep_tag_eager_short(uct_ep_h ep, uct_tag_t tag,
2812 const void *data, size_t length)
2813 {
2814 return ep->iface->ops.ep_tag_eager_short(ep, tag, data, length);
2815 }
2816
2817
2818 /**
2819 * @ingroup UCT_TAG
2820 * @brief Bcopy eager tagged-send operation.
2821 *
2822 * This routine sends a message using @ref uct_bcopy_protocol_desc "bcopy"
2823 * eager protocol. Eager protocol means that the whole data is sent to the peer
2824 * immediately without any preceding notification.
2825 * Custom data callback is used to copy the data to the network buffers.
2826 *
2827 * @note The resulted data length must not be larger than the corresponding
2828 * @a max_bcopy value in @ref uct_iface_attr.
2829 *
2830 * @param [in] ep Destination endpoint handle.
2831 * @param [in] tag Tag to use for the eager message.
2832 * @param [in] imm Immediate value which will be available to the
2833 * receiver.
2834 * @param [in] pack_cb User callback to pack the data.
2835 * @param [in] arg Custom argument to @a pack_cb.
2836 * @param [in] flags Tag message flags, see @ref uct_msg_flags.
2837 *
2838 * @return >=0 - The size of the data packed by @a pack_cb.
2839 * @return otherwise - Error code.
2840 */
uct_ep_tag_eager_bcopy(uct_ep_h ep,uct_tag_t tag,uint64_t imm,uct_pack_callback_t pack_cb,void * arg,unsigned flags)2841 UCT_INLINE_API ssize_t uct_ep_tag_eager_bcopy(uct_ep_h ep, uct_tag_t tag,
2842 uint64_t imm,
2843 uct_pack_callback_t pack_cb,
2844 void *arg, unsigned flags)
2845 {
2846 return ep->iface->ops.ep_tag_eager_bcopy(ep, tag, imm, pack_cb, arg, flags);
2847 }
2848
2849
2850 /**
2851 * @ingroup UCT_TAG
2852 * @brief Zcopy eager tagged-send operation.
2853 *
2854 * This routine sends a message using @ref uct_zcopy_protocol_desc "zcopy"
2855 * eager protocol. Eager protocol means that the whole data is sent to the peer
2856 * immediately without any preceding notification.
2857 * The input data (which has to be previously registered) in @a iov array of
2858 * @ref uct_iov_t structures sent to remote side ("gather output"). Buffers in
2859 * @a iov are processed in array order, so the function complete @a iov[0]
2860 * before proceeding to @a iov[1], and so on.
2861 *
2862 * @note The resulted data length must not be larger than the corresponding
2863 * @a max_zcopy value in @ref uct_iface_attr.
2864 *
2865 * @param [in] ep Destination endpoint handle.
2866 * @param [in] tag Tag to use for the eager message.
2867 * @param [in] imm Immediate value which will be available to the
2868 * receiver.
2869 * @param [in] iov Points to an array of @ref uct_iov_t structures.
2870 * A particular structure pointer must be a valid address.
2871 * A NULL terminated array is not required.
2872 * @param [in] iovcnt Size of the @a iov array. If @a iovcnt is zero, the
2873 * data is considered empty. Note that @a iovcnt is
2874 * limited by the corresponding @a max_iov value in
2875 * @ref uct_iface_attr.
2876 * @param [in] flags Tag message flags, see @ref uct_msg_flags.
2877 * @param [in] comp Completion callback which will be called when the data
2878 * is reliably received by the peer, and the buffer
2879 * can be reused or invalidated.
2880 *
2881 * @return UCS_OK - operation completed successfully.
2882 * @return UCS_ERR_NO_RESOURCE - could not start the operation due to lack of
2883 * send resources.
2884 * @return UCS_INPROGRESS - operation started, and @a comp will be used to
2885 * notify when it's completed.
2886 */
uct_ep_tag_eager_zcopy(uct_ep_h ep,uct_tag_t tag,uint64_t imm,const uct_iov_t * iov,size_t iovcnt,unsigned flags,uct_completion_t * comp)2887 UCT_INLINE_API ucs_status_t uct_ep_tag_eager_zcopy(uct_ep_h ep, uct_tag_t tag,
2888 uint64_t imm,
2889 const uct_iov_t *iov,
2890 size_t iovcnt,
2891 unsigned flags,
2892 uct_completion_t *comp)
2893 {
2894 return ep->iface->ops.ep_tag_eager_zcopy(ep, tag, imm, iov, iovcnt, flags,
2895 comp);
2896 }
2897
2898
2899 /**
2900 * @ingroup UCT_TAG
2901 * @brief Rendezvous tagged-send operation.
2902 *
2903 * This routine sends a message using rendezvous protocol. Rendezvous protocol
2904 * means that only a small notification is sent at first, and the data itself
2905 * is transferred later (when there is a match) to avoid extra memory copy.
2906 *
2907 * @note The header will be available to the receiver in case of unexpected
2908 * rendezvous operation only, i.e. the peer has not posted tag for this
2909 * message yet (by means of @ref uct_iface_tag_recv_zcopy), when it is
2910 * arrived.
2911 *
2912 * @param [in] ep Destination endpoint handle.
2913 * @param [in] tag Tag to use for the eager message.
2914 * @param [in] header User defined header.
2915 * @param [in] header_length User defined header length in bytes. Note that
2916 * it is limited by the corresponding @a max_hdr
2917 * value in @ref uct_iface_attr.
2918 * @param [in] iov Points to an array of @ref uct_iov_t structures.
2919 * A particular structure pointer must be valid
2920 * address. A NULL terminated array is not required.
2921 * @param [in] iovcnt Size of the @a iov array. If @a iovcnt is zero,
2922 * the data is considered empty. Note that @a iovcnt
2923 * is limited by the corresponding @a max_iov value
2924 * in @ref uct_iface_attr.
2925 * @param [in] flags Tag message flags, see @ref uct_msg_flags.
2926 * @param [in] comp Completion callback which will be called when the
2927 * data is reliably received by the peer, and the
2928 * buffer can be reused or invalidated.
2929 *
2930 * @return >=0 - The operation is in progress and the return value is a
2931 * handle which can be used to cancel the outstanding
2932 * rendezvous operation.
2933 * @return otherwise - Error code.
2934 */
uct_ep_tag_rndv_zcopy(uct_ep_h ep,uct_tag_t tag,const void * header,unsigned header_length,const uct_iov_t * iov,size_t iovcnt,unsigned flags,uct_completion_t * comp)2935 UCT_INLINE_API ucs_status_ptr_t uct_ep_tag_rndv_zcopy(uct_ep_h ep, uct_tag_t tag,
2936 const void *header,
2937 unsigned header_length,
2938 const uct_iov_t *iov,
2939 size_t iovcnt,
2940 unsigned flags,
2941 uct_completion_t *comp)
2942 {
2943 return ep->iface->ops.ep_tag_rndv_zcopy(ep, tag, header, header_length,
2944 iov, iovcnt, flags, comp);
2945 }
2946
2947
2948 /**
2949 * @ingroup UCT_TAG
2950 * @brief Cancel outstanding rendezvous operation.
2951 *
2952 * This routine signals the underlying transport disregard the outstanding
2953 * operation without calling completion callback provided in
2954 * @ref uct_ep_tag_rndv_zcopy.
2955 *
2956 * @note The operation handle should be valid at the time the routine is
2957 * invoked. I.e. it should be a handle of the real operation which is not
2958 * completed yet.
2959 *
2960 * @param [in] ep Destination endpoint handle.
2961 * @param [in] op Rendezvous operation handle, as returned from
2962 * @ref uct_ep_tag_rndv_zcopy.
2963 *
2964 * @return UCS_OK - The operation has been canceled.
2965 */
uct_ep_tag_rndv_cancel(uct_ep_h ep,void * op)2966 UCT_INLINE_API ucs_status_t uct_ep_tag_rndv_cancel(uct_ep_h ep, void *op)
2967 {
2968 return ep->iface->ops.ep_tag_rndv_cancel(ep, op);
2969 }
2970
2971
2972 /**
2973 * @ingroup UCT_TAG
2974 * @brief Send software rendezvous request.
2975 *
2976 * This routine sends a rendezvous request only, which indicates that the data
2977 * transfer should be completed in software.
2978 *
2979 * @param [in] ep Destination endpoint handle.
2980 * @param [in] tag Tag to use for matching.
2981 * @param [in] header User defined header
2982 * @param [in] header_length User defined header length in bytes. Note that it
2983 * is limited by the corresponding @a max_hdr value
2984 * in @ref uct_iface_attr.
2985 * @param [in] flags Tag message flags, see @ref uct_msg_flags.
2986 *
2987 * @return UCS_OK - operation completed successfully.
2988 * @return UCS_ERR_NO_RESOURCE - could not start the operation due to lack of
2989 * send resources.
2990 */
uct_ep_tag_rndv_request(uct_ep_h ep,uct_tag_t tag,const void * header,unsigned header_length,unsigned flags)2991 UCT_INLINE_API ucs_status_t uct_ep_tag_rndv_request(uct_ep_h ep, uct_tag_t tag,
2992 const void* header,
2993 unsigned header_length,
2994 unsigned flags)
2995 {
2996 return ep->iface->ops.ep_tag_rndv_request(ep, tag, header, header_length,
2997 flags);
2998 }
2999
3000
3001 /**
3002 * @ingroup UCT_TAG
3003 * @brief Post a tag to a transport interface.
3004 *
3005 * This routine posts a tag to be matched on a transport interface. When a
3006 * message with the corresponding tag arrives it is stored in the user buffer
3007 * (described by @a iov and @a iovcnt) directly. The operation completion is
3008 * reported using callbacks on the @a ctx structure.
3009 *
3010 * @param [in] iface Interface to post the tag on.
3011 * @param [in] tag Tag to expect.
3012 * @param [in] tag_mask Mask which specifies what bits of the tag to
3013 * compare.
3014 * @param [in] iov Points to an array of @ref ::uct_iov_t structures.
3015 * The @a iov pointer must be a valid address of an array
3016 * of @ref ::uct_iov_t structures. A particular structure
3017 * pointer must be a valid address. A NULL terminated
3018 * array is not required.
3019 * @param [in] iovcnt Size of the @a iov data @ref ::uct_iov_t structures
3020 * array. If @a iovcnt is zero, the data is considered empty.
3021 * @a iovcnt is limited by @ref uct_iface_attr_cap_tag_recv_iov
3022 * "uct_iface_attr::cap::tag::max_iov".
3023 * @param [inout] ctx Context associated with this particular tag, "priv" field
3024 * in this structure is used to track the state internally.
3025 *
3026 * @return UCS_OK - The tag is posted to the transport.
3027 * @return UCS_ERR_NO_RESOURCE - Could not start the operation due to lack of
3028 * resources.
3029 * @return UCS_ERR_EXCEEDS_LIMIT - No more room for tags in the transport.
3030 */
uct_iface_tag_recv_zcopy(uct_iface_h iface,uct_tag_t tag,uct_tag_t tag_mask,const uct_iov_t * iov,size_t iovcnt,uct_tag_context_t * ctx)3031 UCT_INLINE_API ucs_status_t uct_iface_tag_recv_zcopy(uct_iface_h iface,
3032 uct_tag_t tag,
3033 uct_tag_t tag_mask,
3034 const uct_iov_t *iov,
3035 size_t iovcnt,
3036 uct_tag_context_t *ctx)
3037 {
3038 return iface->ops.iface_tag_recv_zcopy(iface, tag, tag_mask, iov, iovcnt, ctx);
3039 }
3040
3041
3042 /**
3043 * @ingroup UCT_TAG
3044 * @brief Cancel a posted tag.
3045 *
3046 * This routine cancels a tag, which was previously posted by
3047 * @ref uct_iface_tag_recv_zcopy. The tag would be either matched or canceled,
3048 * in a bounded time, regardless of the peer actions. The original completion
3049 * callback of the tag would be called with the status if @a force is not set.
3050 *
3051 * @param [in] iface Interface to cancel the tag on.
3052 * @param [in] ctx Tag context which was used for posting the tag. If
3053 * force is 0, @a ctx->completed_cb will be called with
3054 * either UCS_OK which means the tag was matched and data
3055 * received despite the cancel request, or
3056 * UCS_ERR_CANCELED which means the tag was successfully
3057 * canceled before it was matched.
3058 * @param [in] force Whether to report completions to @a ctx->completed_cb.
3059 * If nonzero, the cancel is assumed to be successful,
3060 * and the callback is not called.
3061 *
3062 * @return UCS_OK - The tag is canceled in the transport.
3063 */
uct_iface_tag_recv_cancel(uct_iface_h iface,uct_tag_context_t * ctx,int force)3064 UCT_INLINE_API ucs_status_t uct_iface_tag_recv_cancel(uct_iface_h iface,
3065 uct_tag_context_t *ctx,
3066 int force)
3067 {
3068 return iface->ops.iface_tag_recv_cancel(iface, ctx, force);
3069 }
3070
3071
3072 /**
3073 * @ingroup UCT_RESOURCE
3074 * @brief Enable synchronous progress for the interface
3075 *
3076 * Notify the transport that it should actively progress communications during
3077 * @ref uct_worker_progress().
3078 *
3079 * When the interface is created, its progress is initially disabled.
3080 *
3081 * @param [in] iface The interface to enable progress.
3082 * @param [in] flags The type of progress to enable as defined by
3083 * @ref uct_progress_types
3084 *
3085 * @note This function is not thread safe with respect to
3086 * @ref ucp_worker_progress(), unless the flag
3087 * @ref UCT_PROGRESS_THREAD_SAFE is specified.
3088 *
3089 */
uct_iface_progress_enable(uct_iface_h iface,unsigned flags)3090 UCT_INLINE_API void uct_iface_progress_enable(uct_iface_h iface, unsigned flags)
3091 {
3092 iface->ops.iface_progress_enable(iface, flags);
3093 }
3094
3095
3096 /**
3097 * @ingroup UCT_RESOURCE
3098 * @brief Disable synchronous progress for the interface
3099 *
3100 * Notify the transport that it should not progress its communications during
3101 * @ref uct_worker_progress(). Thus the latency of other transports may be
3102 * improved.
3103 *
3104 * By default, progress is disabled when the interface is created.
3105 *
3106 * @param [in] iface The interface to disable progress.
3107 * @param [in] flags The type of progress to disable as defined by
3108 * @ref uct_progress_types.
3109 *
3110 * @note This function is not thread safe with respect to
3111 * @ref ucp_worker_progress(), unless the flag
3112 * @ref UCT_PROGRESS_THREAD_SAFE is specified.
3113 *
3114 */
uct_iface_progress_disable(uct_iface_h iface,unsigned flags)3115 UCT_INLINE_API void uct_iface_progress_disable(uct_iface_h iface, unsigned flags)
3116 {
3117 iface->ops.iface_progress_disable(iface, flags);
3118 }
3119
3120
3121 /**
3122 * @ingroup UCT_RESOURCE
3123 * @brief Perform a progress on an interface.
3124 */
uct_iface_progress(uct_iface_h iface)3125 UCT_INLINE_API unsigned uct_iface_progress(uct_iface_h iface)
3126 {
3127 return iface->ops.iface_progress(iface);
3128 }
3129
3130
3131 /**
3132 * @ingroup UCT_CLIENT_SERVER
3133 * @brief Open a connection manager.
3134 *
3135 * Open a connection manager. All client server connection
3136 * establishment operations are performed in the context of a specific
3137 * connection manager.
3138 * @note This is an alternative API for
3139 * @ref uct_iface_open_mode::UCT_IFACE_OPEN_MODE_SOCKADDR_SERVER and
3140 * @ref uct_iface_open_mode::UCT_IFACE_OPEN_MODE_SOCKADDR_CLIENT .
3141 *
3142 * @param [in] component Component on which to open the connection manager,
3143 * as returned from @ref uct_query_components.
3144 * @param [in] worker Worker on which to open the connection manager.
3145 * @param [in] config CM configuration options. Either obtained
3146 * from @ref uct_cm_config_read() function, or pointer
3147 * to CM-specific structure that extends
3148 * @ref uct_cm_config_t.
3149 * @param [out] cm_p Filled with a handle to the connection manager.
3150 *
3151 * @return Error code.
3152 */
3153 ucs_status_t uct_cm_open(uct_component_h component, uct_worker_h worker,
3154 const uct_cm_config_t *config, uct_cm_h *cm_p);
3155
3156
3157 /**
3158 * @ingroup UCT_CLIENT_SERVER
3159 * @brief Close a connection manager.
3160 *
3161 * @param [in] cm Connection manager to close.
3162 */
3163 void uct_cm_close(uct_cm_h cm);
3164
3165
3166 /**
3167 * @ingroup UCT_CLIENT_SERVER
3168 * @brief Get connection manager attributes.
3169 *
3170 * This routine queries the @ref uct_cm_h "cm" for its attributes
3171 * @ref uct_cm_attr_t.
3172 *
3173 * @param [in] cm Connection manager to query.
3174 * @param [out] cm_attr Filled with connection manager attributes.
3175 */
3176 ucs_status_t uct_cm_query(uct_cm_h cm, uct_cm_attr_t *cm_attr);
3177
3178
3179 /**
3180 * @ingroup UCT_CLIENT_SERVER
3181 * @brief Read the configuration for a connection manager.
3182 *
3183 * @param [in] component Read the configuration of the connection manager
3184 * on this component.
3185 * @param [in] env_prefix If non-NULL, search for environment variables
3186 * starting with this UCT_<prefix>_. Otherwise, search
3187 * for environment variables starting with just UCT_.
3188 * @param [in] filename If non-NULL, read configuration from this file. If
3189 * the file does not exist, or exists but cannot be
3190 * opened or read, it will be ignored.
3191 * @param [out] config_p Filled with a pointer to the configuration.
3192 *
3193 * @return Error code.
3194 */
3195 ucs_status_t uct_cm_config_read(uct_component_h component,
3196 const char *env_prefix, const char *filename,
3197 uct_cm_config_t **config_p);
3198
3199
3200 /**
3201 * @ingroup UCT_CLIENT_SERVER
3202 * @brief Notify the server about client-side connection establishment.
3203 *
3204 * This routine should be called on the client side after the client completed
3205 * establishing its connection to the server. The routine will send a
3206 * notification message to the server indicating that the client is connected.
3207 *
3208 * @param [in] ep The connected endpoint on the client side.
3209 *
3210 * @return Error code.
3211 */
3212 ucs_status_t uct_cm_client_ep_conn_notify(uct_ep_h ep);
3213
3214
3215 /**
3216 * @ingroup UCT_CLIENT_SERVER
3217 * @brief Create a new transport listener object.
3218 *
3219 * This routine creates a new listener on the given CM which will start
3220 * listening on a given sockaddr.
3221 *
3222 * @param [in] cm Connection manager on which to open the listener.
3223 * This cm should not be closed as long as there are
3224 * open listeners on it.
3225 * @param [in] saddr The socket address to listen on.
3226 * @param [in] socklen The saddr length.
3227 * @param [in] params User defined @ref uct_listener_params_t
3228 * configurations for the @a listener_p.
3229 * @param [out] listener_p Filled with handle to the new listener.
3230 *
3231 * @return Error code.
3232 */
3233 ucs_status_t uct_listener_create(uct_cm_h cm, const struct sockaddr *saddr,
3234 socklen_t socklen,
3235 const uct_listener_params_t *params,
3236 uct_listener_h *listener_p);
3237
3238
3239 /**
3240 * @ingroup UCT_CLIENT_SERVER
3241 * @brief Destroy a transport listener.
3242 *
3243 * @param [in] listener Listener to destroy.
3244 */
3245 void uct_listener_destroy(uct_listener_h listener);
3246
3247
3248 /**
3249 * @ingroup UCT_CLIENT_SERVER
3250 * @brief Reject a connection request.
3251 *
3252 * This routine can be invoked on the server side. It rejects a connection request
3253 * from the client.
3254 *
3255 * @param [in] listener Listener which will reject the connection request.
3256 * @param [in] conn_request Connection establishment request passed as parameter
3257 * of @ref uct_cm_listener_conn_request_callback_t in
3258 * @ref uct_cm_listener_conn_request_args_t::conn_request.
3259 *
3260 *
3261 * @return Error code as defined by @ref ucs_status_t
3262 */
3263 ucs_status_t uct_listener_reject(uct_listener_h listener,
3264 uct_conn_request_h conn_request);
3265
3266
3267 /**
3268 * @ingroup UCT_CLIENT_SERVER
3269 * @brief Get attributes specific to a particular listener.
3270 *
3271 * This routine queries the @ref uct_listener_h "listener" for its attributes
3272 * @ref uct_listener_attr_t.
3273 *
3274 * @param [in] listener Listener object to query.
3275 * @param [out] listener_attr Filled with attributes of the listener.
3276 *
3277 * @return Error code as defined by @ref ucs_status_t
3278 */
3279 ucs_status_t uct_listener_query(uct_listener_h listener,
3280 uct_listener_attr_t *listener_attr);
3281
3282
3283 /**
3284 * @example uct_hello_world.c
3285 * UCT hello world client / server example utility.
3286 */
3287
3288 END_C_DECLS
3289
3290 #endif
3291