1 /**
2 * Copyright (C) Mellanox Technologies Ltd. 2001-2014.  ALL RIGHTS RESERVED.
3 * Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED.
4 * Copyright (C) The University of Tennessee and The University
5 *               of Tennessee Research Foundation. 2015. ALL RIGHTS RESERVED.
6 * See file LICENSE for terms.
7 */
8 
9 #ifndef UCX_LIBPERF_H
10 #define UCX_LIBPERF_H
11 
12 #include <ucs/sys/compiler.h>
13 
14 BEGIN_C_DECLS
15 
16 /** @file libperf.h */
17 
18 #include <sys/uio.h>
19 #include <uct/api/uct.h>
20 #include <ucp/api/ucp.h>
21 #include <ucs/sys/math.h>
22 #include <ucs/sys/stubs.h>
23 #include <ucs/type/status.h>
24 
25 
26 typedef enum {
27     UCX_PERF_API_UCT,
28     UCX_PERF_API_UCP,
29     UCX_PERF_API_LAST
30 } ucx_perf_api_t;
31 
32 
33 typedef enum {
34     UCX_PERF_CMD_AM,
35     UCX_PERF_CMD_PUT,
36     UCX_PERF_CMD_GET,
37     UCX_PERF_CMD_ADD,
38     UCX_PERF_CMD_FADD,
39     UCX_PERF_CMD_SWAP,
40     UCX_PERF_CMD_CSWAP,
41     UCX_PERF_CMD_TAG,
42     UCX_PERF_CMD_TAG_SYNC,
43     UCX_PERF_CMD_STREAM,
44     UCX_PERF_CMD_LAST
45 } ucx_perf_cmd_t;
46 
47 
48 typedef enum {
49     UCX_PERF_TEST_TYPE_PINGPONG,         /* Ping-pong mode */
50     UCX_PERF_TEST_TYPE_STREAM_UNI,       /* Unidirectional stream */
51     UCX_PERF_TEST_TYPE_STREAM_BI,        /* Bidirectional stream */
52     UCX_PERF_TEST_TYPE_LAST
53 } ucx_perf_test_type_t;
54 
55 
56 typedef enum {
57     UCP_PERF_DATATYPE_CONTIG,
58     UCP_PERF_DATATYPE_IOV,
59 } ucp_perf_datatype_t;
60 
61 
62 typedef enum {
63     UCT_PERF_DATA_LAYOUT_SHORT,
64     UCT_PERF_DATA_LAYOUT_BCOPY,
65     UCT_PERF_DATA_LAYOUT_ZCOPY,
66     UCT_PERF_DATA_LAYOUT_LAST
67 } uct_perf_data_layout_t;
68 
69 
70 typedef enum {
71     UCX_PERF_WAIT_MODE_PROGRESS,     /* Repeatedly call progress */
72     UCX_PERF_WAIT_MODE_SLEEP,        /* Go to sleep */
73     UCX_PERF_WAIT_MODE_SPIN,         /* Spin without calling progress */
74     UCX_PERF_WAIT_MODE_LAST
75 } ucx_perf_wait_mode_t;
76 
77 
78 enum ucx_perf_test_flags {
79     UCX_PERF_TEST_FLAG_VALIDATE         = UCS_BIT(1), /* Validate data. Affects performance. */
80     UCX_PERF_TEST_FLAG_ONE_SIDED        = UCS_BIT(2), /* For tests which involves only one side,
81                                                          the responder should not call progress(). */
82     UCX_PERF_TEST_FLAG_MAP_NONBLOCK     = UCS_BIT(3), /* Map memory in non-blocking mode */
83     UCX_PERF_TEST_FLAG_TAG_WILDCARD     = UCS_BIT(4), /* For tag tests, use wildcard mask */
84     UCX_PERF_TEST_FLAG_TAG_UNEXP_PROBE  = UCS_BIT(5), /* For tag tests, use probe to get unexpected receive */
85     UCX_PERF_TEST_FLAG_VERBOSE          = UCS_BIT(7), /* Print error messages */
86     UCX_PERF_TEST_FLAG_STREAM_RECV_DATA = UCS_BIT(8), /* For stream tests, use recv data API */
87     UCX_PERF_TEST_FLAG_FLUSH_EP         = UCS_BIT(9)  /* Issue flush on endpoint instead of worker */
88 };
89 
90 
91 enum {
92     UCT_PERF_TEST_MAX_FC_WINDOW   = 127         /* Maximal flow-control window */
93 };
94 
95 
96 #define UCT_PERF_TEST_PARAMS_FMT             "%s/%s"
97 #define UCT_PERF_TEST_PARAMS_ARG(_params)    (_params)->uct.tl_name, \
98                                              (_params)->uct.dev_name
99 
100 
101 /**
102  * Performance counter type.
103  */
104 typedef uint64_t ucx_perf_counter_t;
105 
106 
107 /*
108  * Performance test result.
109  *
110  * Time values are in seconds.
111  * Size values are in bytes.
112  */
113 typedef struct ucx_perf_result {
114     ucx_perf_counter_t      iters;
115     double                  elapsed_time;
116     ucx_perf_counter_t      bytes;
117     struct {
118         double              typical;
119         double              moment_average; /* Average since last report */
120         double              total_average;  /* Average of the whole test */
121     }
122     latency, bandwidth, msgrate;
123 } ucx_perf_result_t;
124 
125 
126 typedef void (*ucx_perf_rte_progress_cb_t)(void *arg);
127 
128 typedef unsigned (*ucx_perf_rte_group_size_func_t)(void *rte_group);
129 typedef unsigned (*ucx_perf_rte_group_index_func_t)(void *rte_group);
130 typedef void (*ucx_perf_rte_barrier_func_t)(void *rte_group,
131                                             ucx_perf_rte_progress_cb_t progress,
132                                             void *arg);
133 typedef void (*ucx_perf_rte_post_vec_func_t)(void *rte_group,
134                                              const struct iovec *iovec,
135                                              int iovcnt, void **req);
136 typedef void (*ucx_perf_rte_recv_func_t)(void *rte_group, unsigned src,
137                                          void *buffer, size_t max, void *req);
138 typedef void (*ucx_perf_rte_exchange_vec_func_t)(void *rte_group, void *req);
139 typedef void (*ucx_perf_rte_report_func_t)(void *rte_group,
140                                            const ucx_perf_result_t *result,
141                                            void *arg, int is_final,
142                                            int is_multi_thread);
143 
144 /**
145  * RTE used to bring-up the test
146  */
147 typedef struct ucx_perf_rte {
148     /* @return Group size */
149     ucx_perf_rte_group_size_func_t   group_size;
150 
151     /* @return My index within the group */
152     ucx_perf_rte_group_index_func_t  group_index;
153 
154     /* Barrier */
155     ucx_perf_rte_barrier_func_t      barrier;
156 
157     /* Direct modex */
158     ucx_perf_rte_post_vec_func_t     post_vec;
159     ucx_perf_rte_recv_func_t         recv;
160     ucx_perf_rte_exchange_vec_func_t exchange_vec;
161 
162     /* Handle results */
163     ucx_perf_rte_report_func_t       report;
164 
165 } ucx_perf_rte_t;
166 
167 
168 /**
169  * Describes a performance test.
170  */
171 typedef struct ucx_perf_params {
172     ucx_perf_api_t         api;             /* Which API to test */
173     ucx_perf_cmd_t         command;         /* Command to perform */
174     ucx_perf_test_type_t   test_type;       /* Test communication type */
175     ucs_thread_mode_t      thread_mode;     /* Thread mode for communication objects */
176     unsigned               thread_count;    /* Number of threads in the test program */
177     ucs_async_mode_t       async_mode;      /* how async progress and locking is done */
178     ucx_perf_wait_mode_t   wait_mode;       /* How to wait */
179     ucs_memory_type_t      send_mem_type;   /* Send memory type */
180     ucs_memory_type_t      recv_mem_type;   /* Recv memory type */
181     unsigned               flags;           /* See ucx_perf_test_flags. */
182 
183     size_t                 *msg_size_list;  /* Test message sizes list. The size
184                                                of the array is in msg_size_cnt */
185     size_t                 msg_size_cnt;    /* Number of message sizes in
186                                                message sizes list */
187     size_t                 iov_stride;      /* Distance between starting address
188                                                of consecutive IOV entries. It is
189                                                similar to UCT uct_iov_t type stride */
190     size_t                 am_hdr_size;     /* Active message header size (included in message size) */
191     size_t                 alignment;       /* Message buffer alignment */
192     unsigned               max_outstanding; /* Maximal number of outstanding sends */
193     ucx_perf_counter_t     warmup_iter;     /* Number of warm-up iterations */
194     ucx_perf_counter_t     max_iter;        /* Iterations limit, 0 - unlimited */
195     double                 max_time;        /* Time limit (seconds), 0 - unlimited */
196     double                 report_interval; /* Interval at which to call the report callback */
197 
198     void                   *rte_group;      /* Opaque RTE group handle */
199     ucx_perf_rte_t         *rte;            /* RTE functions used to exchange data */
200     void                   *report_arg;     /* Custom argument for report function */
201 
202     struct {
203         char                   dev_name[UCT_DEVICE_NAME_MAX]; /* Device name to use */
204         char                   tl_name[UCT_TL_NAME_MAX];      /* Transport to use */
205         char                   md_name[UCT_MD_NAME_MAX];      /* Memory domain name to use */
206         uct_perf_data_layout_t data_layout; /* Data layout to use */
207         unsigned               fc_window;   /* Window size for flow control <= UCX_PERF_TEST_MAX_FC_WINDOW */
208     } uct;
209 
210     struct {
211         unsigned               nonblocking_mode; /* TBD */
212         ucp_perf_datatype_t    send_datatype;
213         ucp_perf_datatype_t    recv_datatype;
214     } ucp;
215 
216 } ucx_perf_params_t;
217 
218 
219 /* Allocators for each memory type */
220 typedef struct ucx_perf_allocator ucx_perf_allocator_t;
221 extern const ucx_perf_allocator_t* ucx_perf_mem_type_allocators[];
222 
223 
224 /**
225  * Initialize performance testing framework. May be called multiple times.
226  */
227 void ucx_perf_global_init();
228 
229 
230 /**
231  * Run a UCT performance test.
232  */
233 ucs_status_t ucx_perf_run(const ucx_perf_params_t *params,
234                           ucx_perf_result_t *result);
235 
236 
237 END_C_DECLS
238 
239 #endif /* UCX_PERF_H_ */
240