1 /** 2 * Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. 3 * Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. 4 * Copyright (C) The University of Tennessee and The University 5 * of Tennessee Research Foundation. 2015. ALL RIGHTS RESERVED. 6 * See file LICENSE for terms. 7 */ 8 9 #ifndef UCX_LIBPERF_H 10 #define UCX_LIBPERF_H 11 12 #include <ucs/sys/compiler.h> 13 14 BEGIN_C_DECLS 15 16 /** @file libperf.h */ 17 18 #include <sys/uio.h> 19 #include <uct/api/uct.h> 20 #include <ucp/api/ucp.h> 21 #include <ucs/sys/math.h> 22 #include <ucs/sys/stubs.h> 23 #include <ucs/type/status.h> 24 25 26 typedef enum { 27 UCX_PERF_API_UCT, 28 UCX_PERF_API_UCP, 29 UCX_PERF_API_LAST 30 } ucx_perf_api_t; 31 32 33 typedef enum { 34 UCX_PERF_CMD_AM, 35 UCX_PERF_CMD_PUT, 36 UCX_PERF_CMD_GET, 37 UCX_PERF_CMD_ADD, 38 UCX_PERF_CMD_FADD, 39 UCX_PERF_CMD_SWAP, 40 UCX_PERF_CMD_CSWAP, 41 UCX_PERF_CMD_TAG, 42 UCX_PERF_CMD_TAG_SYNC, 43 UCX_PERF_CMD_STREAM, 44 UCX_PERF_CMD_LAST 45 } ucx_perf_cmd_t; 46 47 48 typedef enum { 49 UCX_PERF_TEST_TYPE_PINGPONG, /* Ping-pong mode */ 50 UCX_PERF_TEST_TYPE_STREAM_UNI, /* Unidirectional stream */ 51 UCX_PERF_TEST_TYPE_STREAM_BI, /* Bidirectional stream */ 52 UCX_PERF_TEST_TYPE_LAST 53 } ucx_perf_test_type_t; 54 55 56 typedef enum { 57 UCP_PERF_DATATYPE_CONTIG, 58 UCP_PERF_DATATYPE_IOV, 59 } ucp_perf_datatype_t; 60 61 62 typedef enum { 63 UCT_PERF_DATA_LAYOUT_SHORT, 64 UCT_PERF_DATA_LAYOUT_BCOPY, 65 UCT_PERF_DATA_LAYOUT_ZCOPY, 66 UCT_PERF_DATA_LAYOUT_LAST 67 } uct_perf_data_layout_t; 68 69 70 typedef enum { 71 UCX_PERF_WAIT_MODE_PROGRESS, /* Repeatedly call progress */ 72 UCX_PERF_WAIT_MODE_SLEEP, /* Go to sleep */ 73 UCX_PERF_WAIT_MODE_SPIN, /* Spin without calling progress */ 74 UCX_PERF_WAIT_MODE_LAST 75 } ucx_perf_wait_mode_t; 76 77 78 enum ucx_perf_test_flags { 79 UCX_PERF_TEST_FLAG_VALIDATE = UCS_BIT(1), /* Validate data. Affects performance. */ 80 UCX_PERF_TEST_FLAG_ONE_SIDED = UCS_BIT(2), /* For tests which involves only one side, 81 the responder should not call progress(). */ 82 UCX_PERF_TEST_FLAG_MAP_NONBLOCK = UCS_BIT(3), /* Map memory in non-blocking mode */ 83 UCX_PERF_TEST_FLAG_TAG_WILDCARD = UCS_BIT(4), /* For tag tests, use wildcard mask */ 84 UCX_PERF_TEST_FLAG_TAG_UNEXP_PROBE = UCS_BIT(5), /* For tag tests, use probe to get unexpected receive */ 85 UCX_PERF_TEST_FLAG_VERBOSE = UCS_BIT(7), /* Print error messages */ 86 UCX_PERF_TEST_FLAG_STREAM_RECV_DATA = UCS_BIT(8), /* For stream tests, use recv data API */ 87 UCX_PERF_TEST_FLAG_FLUSH_EP = UCS_BIT(9) /* Issue flush on endpoint instead of worker */ 88 }; 89 90 91 enum { 92 UCT_PERF_TEST_MAX_FC_WINDOW = 127 /* Maximal flow-control window */ 93 }; 94 95 96 #define UCT_PERF_TEST_PARAMS_FMT "%s/%s" 97 #define UCT_PERF_TEST_PARAMS_ARG(_params) (_params)->uct.tl_name, \ 98 (_params)->uct.dev_name 99 100 101 /** 102 * Performance counter type. 103 */ 104 typedef uint64_t ucx_perf_counter_t; 105 106 107 /* 108 * Performance test result. 109 * 110 * Time values are in seconds. 111 * Size values are in bytes. 112 */ 113 typedef struct ucx_perf_result { 114 ucx_perf_counter_t iters; 115 double elapsed_time; 116 ucx_perf_counter_t bytes; 117 struct { 118 double typical; 119 double moment_average; /* Average since last report */ 120 double total_average; /* Average of the whole test */ 121 } 122 latency, bandwidth, msgrate; 123 } ucx_perf_result_t; 124 125 126 typedef void (*ucx_perf_rte_progress_cb_t)(void *arg); 127 128 typedef unsigned (*ucx_perf_rte_group_size_func_t)(void *rte_group); 129 typedef unsigned (*ucx_perf_rte_group_index_func_t)(void *rte_group); 130 typedef void (*ucx_perf_rte_barrier_func_t)(void *rte_group, 131 ucx_perf_rte_progress_cb_t progress, 132 void *arg); 133 typedef void (*ucx_perf_rte_post_vec_func_t)(void *rte_group, 134 const struct iovec *iovec, 135 int iovcnt, void **req); 136 typedef void (*ucx_perf_rte_recv_func_t)(void *rte_group, unsigned src, 137 void *buffer, size_t max, void *req); 138 typedef void (*ucx_perf_rte_exchange_vec_func_t)(void *rte_group, void *req); 139 typedef void (*ucx_perf_rte_report_func_t)(void *rte_group, 140 const ucx_perf_result_t *result, 141 void *arg, int is_final, 142 int is_multi_thread); 143 144 /** 145 * RTE used to bring-up the test 146 */ 147 typedef struct ucx_perf_rte { 148 /* @return Group size */ 149 ucx_perf_rte_group_size_func_t group_size; 150 151 /* @return My index within the group */ 152 ucx_perf_rte_group_index_func_t group_index; 153 154 /* Barrier */ 155 ucx_perf_rte_barrier_func_t barrier; 156 157 /* Direct modex */ 158 ucx_perf_rte_post_vec_func_t post_vec; 159 ucx_perf_rte_recv_func_t recv; 160 ucx_perf_rte_exchange_vec_func_t exchange_vec; 161 162 /* Handle results */ 163 ucx_perf_rte_report_func_t report; 164 165 } ucx_perf_rte_t; 166 167 168 /** 169 * Describes a performance test. 170 */ 171 typedef struct ucx_perf_params { 172 ucx_perf_api_t api; /* Which API to test */ 173 ucx_perf_cmd_t command; /* Command to perform */ 174 ucx_perf_test_type_t test_type; /* Test communication type */ 175 ucs_thread_mode_t thread_mode; /* Thread mode for communication objects */ 176 unsigned thread_count; /* Number of threads in the test program */ 177 ucs_async_mode_t async_mode; /* how async progress and locking is done */ 178 ucx_perf_wait_mode_t wait_mode; /* How to wait */ 179 ucs_memory_type_t send_mem_type; /* Send memory type */ 180 ucs_memory_type_t recv_mem_type; /* Recv memory type */ 181 unsigned flags; /* See ucx_perf_test_flags. */ 182 183 size_t *msg_size_list; /* Test message sizes list. The size 184 of the array is in msg_size_cnt */ 185 size_t msg_size_cnt; /* Number of message sizes in 186 message sizes list */ 187 size_t iov_stride; /* Distance between starting address 188 of consecutive IOV entries. It is 189 similar to UCT uct_iov_t type stride */ 190 size_t am_hdr_size; /* Active message header size (included in message size) */ 191 size_t alignment; /* Message buffer alignment */ 192 unsigned max_outstanding; /* Maximal number of outstanding sends */ 193 ucx_perf_counter_t warmup_iter; /* Number of warm-up iterations */ 194 ucx_perf_counter_t max_iter; /* Iterations limit, 0 - unlimited */ 195 double max_time; /* Time limit (seconds), 0 - unlimited */ 196 double report_interval; /* Interval at which to call the report callback */ 197 198 void *rte_group; /* Opaque RTE group handle */ 199 ucx_perf_rte_t *rte; /* RTE functions used to exchange data */ 200 void *report_arg; /* Custom argument for report function */ 201 202 struct { 203 char dev_name[UCT_DEVICE_NAME_MAX]; /* Device name to use */ 204 char tl_name[UCT_TL_NAME_MAX]; /* Transport to use */ 205 char md_name[UCT_MD_NAME_MAX]; /* Memory domain name to use */ 206 uct_perf_data_layout_t data_layout; /* Data layout to use */ 207 unsigned fc_window; /* Window size for flow control <= UCX_PERF_TEST_MAX_FC_WINDOW */ 208 } uct; 209 210 struct { 211 unsigned nonblocking_mode; /* TBD */ 212 ucp_perf_datatype_t send_datatype; 213 ucp_perf_datatype_t recv_datatype; 214 } ucp; 215 216 } ucx_perf_params_t; 217 218 219 /* Allocators for each memory type */ 220 typedef struct ucx_perf_allocator ucx_perf_allocator_t; 221 extern const ucx_perf_allocator_t* ucx_perf_mem_type_allocators[]; 222 223 224 /** 225 * Initialize performance testing framework. May be called multiple times. 226 */ 227 void ucx_perf_global_init(); 228 229 230 /** 231 * Run a UCT performance test. 232 */ 233 ucs_status_t ucx_perf_run(const ucx_perf_params_t *params, 234 ucx_perf_result_t *result); 235 236 237 END_C_DECLS 238 239 #endif /* UCX_PERF_H_ */ 240