1 /**
2 * Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED.
3 *
4 * Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED.
5 * See file LICENSE for terms.
6 */
7
8 #include "ucp_test.h"
9
10 #include <gtest/common/test_perf.h>
11
12
13 #define MB pow(1024.0, -2)
14 #define UCP_ARM_PERF_TEST_MULTIPLIER 2
15 class test_ucp_perf : public ucp_test, public test_perf {
16 protected:
init()17 virtual void init() {
18 test_base::init(); /* Skip entities creation in ucp_test */
19 ucs_log_push_handler(log_handler);
20 }
21
cleanup()22 virtual void cleanup() {
23 ucs_log_pop_handler();
24 test_base::cleanup();
25 }
26
27 static ucs_log_func_rc_t
log_handler(const char * file,unsigned line,const char * function,ucs_log_level_t level,const ucs_log_component_config_t * comp_conf,const char * message,va_list ap)28 log_handler(const char *file, unsigned line, const char *function,
29 ucs_log_level_t level,
30 const ucs_log_component_config_t *comp_conf,
31 const char *message, va_list ap) {
32 // Ignore errors that transport cannot reach peer
33 if (level == UCS_LOG_LEVEL_ERROR) {
34 std::string err_str = format_message(message, ap);
35 if (strstr(err_str.c_str(), ucs_status_string(UCS_ERR_UNREACHABLE)) ||
36 strstr(err_str.c_str(), ucs_status_string(UCS_ERR_UNSUPPORTED))) {
37 UCS_TEST_MESSAGE << err_str;
38 return UCS_LOG_FUNC_RC_STOP;
39 }
40 }
41 return UCS_LOG_FUNC_RC_CONTINUE;
42 }
43
44 const static test_spec tests[];
45 };
46
47
48 const test_perf::test_spec test_ucp_perf::tests[] =
49 {
50 { "tag latency", "usec",
51 UCX_PERF_API_UCP, UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_PINGPONG,
52 UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu,
53 ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 60.0,
54 0 },
55
56 { "tag iov latency", "usec",
57 UCX_PERF_API_UCP, UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_PINGPONG,
58 UCP_PERF_DATATYPE_IOV, 8192, 3, { 1024, 1024, 1024 }, 1, 100000lu,
59 ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 60.0,
60 0 },
61
62 { "tag mr", "Mpps",
63 UCX_PERF_API_UCP, UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_STREAM_UNI,
64 UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 2000000lu,
65 ucs_offsetof(ucx_perf_result_t, msgrate.total_average), 1e-6, 0.1, 100.0,
66 0 },
67
68 { "tag sync mr", "Mpps",
69 UCX_PERF_API_UCP, UCX_PERF_CMD_TAG_SYNC, UCX_PERF_TEST_TYPE_STREAM_UNI,
70 UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 200000lu,
71 ucs_offsetof(ucx_perf_result_t, msgrate.total_average), 1e-6, 0.05, 100.0, 0},
72
73 { "tag wild mr", "Mpps",
74 UCX_PERF_API_UCP, UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_STREAM_UNI,
75 UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 2000000lu,
76 ucs_offsetof(ucx_perf_result_t, msgrate.total_average), 1e-6, 0.1, 100.0,
77 UCX_PERF_TEST_FLAG_TAG_WILDCARD },
78
79 { "tag bw", "MB/sec",
80 UCX_PERF_API_UCP, UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_STREAM_UNI,
81 UCT_PERF_DATA_LAYOUT_LAST, 0, 1, { 2048 }, 1, 100000lu,
82 ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 100.0, 100000.0 },
83
84 { "tag bw_zcopy_multi", "MB/sec",
85 UCX_PERF_API_UCP, UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_STREAM_UNI,
86 UCT_PERF_DATA_LAYOUT_LAST, 0, 1, { 2048 }, 16, 100000lu,
87 ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 100.0, 100000.0 },
88
89 { "put latency", "usec",
90 UCX_PERF_API_UCP, UCX_PERF_CMD_PUT, UCX_PERF_TEST_TYPE_PINGPONG,
91 UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu,
92 ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0,
93 0 },
94
95 { "put rate", "Mpps",
96 UCX_PERF_API_UCP, UCX_PERF_CMD_PUT, UCX_PERF_TEST_TYPE_STREAM_UNI,
97 UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 2000000lu,
98 ucs_offsetof(ucx_perf_result_t, msgrate.total_average), 1e-6, 0.5, 100.0,
99 0 },
100
101 { "put bw", "MB/sec",
102 UCX_PERF_API_UCP, UCX_PERF_CMD_PUT, UCX_PERF_TEST_TYPE_STREAM_UNI,
103 UCP_PERF_DATATYPE_CONTIG, 0, 1, { 2048 }, 1, 100000lu,
104 ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 200.0, 100000.0,
105 0 },
106
107 { "get latency", "usec",
108 UCX_PERF_API_UCP, UCX_PERF_CMD_GET, UCX_PERF_TEST_TYPE_STREAM_UNI,
109 UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu,
110 ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0,
111 0 },
112
113 { "get bw", "MB/sec",
114 UCX_PERF_API_UCP, UCX_PERF_CMD_GET, UCX_PERF_TEST_TYPE_STREAM_UNI,
115 UCP_PERF_DATATYPE_CONTIG, 0, 1, { 16384 }, 1, 10000lu,
116 ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 200.0, 100000.0,
117 0 },
118
119 { "stream latency", "usec",
120 UCX_PERF_API_UCP, UCX_PERF_CMD_STREAM, UCX_PERF_TEST_TYPE_PINGPONG,
121 UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu,
122 ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0, 0 },
123
124 { "stream bw", "MB/sec",
125 UCX_PERF_API_UCP, UCX_PERF_CMD_STREAM, UCX_PERF_TEST_TYPE_STREAM_UNI,
126 UCP_PERF_DATATYPE_CONTIG, 0, 1, { 16384 }, 1, 10000lu,
127 ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 200.0, 100000.0, 0 },
128
129 { "stream recv-data latency", "usec",
130 UCX_PERF_API_UCP, UCX_PERF_CMD_STREAM, UCX_PERF_TEST_TYPE_PINGPONG,
131 UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu,
132 ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0,
133 UCX_PERF_TEST_FLAG_STREAM_RECV_DATA },
134
135 { "stream recv-data bw", "MB/sec",
136 UCX_PERF_API_UCP, UCX_PERF_CMD_STREAM, UCX_PERF_TEST_TYPE_STREAM_UNI,
137 UCP_PERF_DATATYPE_CONTIG, 0, 1, { 16384 }, 1, 10000lu,
138 ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 200.0, 100000.0,
139 UCX_PERF_TEST_FLAG_STREAM_RECV_DATA },
140
141 { "atomic add rate", "Mpps",
142 UCX_PERF_API_UCP, UCX_PERF_CMD_ADD, UCX_PERF_TEST_TYPE_STREAM_UNI,
143 UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 1000000lu,
144 ucs_offsetof(ucx_perf_result_t, msgrate.total_average), 1e-6, 0.1, 500.0,
145 0 },
146
147 { "atomic fadd latency", "usec",
148 UCX_PERF_API_UCP, UCX_PERF_CMD_FADD, UCX_PERF_TEST_TYPE_STREAM_UNI,
149 UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu,
150 ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0,
151 0 },
152
153 { "atomic swap latency", "usec",
154 UCX_PERF_API_UCP, UCX_PERF_CMD_SWAP, UCX_PERF_TEST_TYPE_STREAM_UNI,
155 UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu,
156 ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0,
157 0 },
158
159 { "atomic cswap latency", "usec",
160 UCX_PERF_API_UCP, UCX_PERF_CMD_CSWAP, UCX_PERF_TEST_TYPE_STREAM_UNI,
161 UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu,
162 ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0,
163 0 },
164
165 { NULL }
166 };
167
168
UCS_TEST_P(test_ucp_perf,envelope)169 UCS_TEST_P(test_ucp_perf, envelope) {
170 bool check_perf = true;
171 size_t max_iter = std::numeric_limits<size_t>::max();
172
173 if (has_transport("tcp")) {
174 check_perf = false;
175 max_iter = 1000lu;
176 }
177
178 std::stringstream ss;
179 ss << GetParam();
180 /* coverity[tainted_string_argument] */
181 ucs::scoped_setenv tls("UCX_TLS", ss.str().c_str());
182 ucs::scoped_setenv warn_invalid("UCX_WARN_INVALID_CONFIG", "no");
183
184 /* Run all tests */
185 for (const test_spec *test_iter = tests; test_iter->title != NULL; ++test_iter) {
186 test_spec test = *test_iter;
187
188 if (ucs_arch_get_cpu_model() == UCS_CPU_MODEL_ARM_AARCH64) {
189 test.max *= UCP_ARM_PERF_TEST_MULTIPLIER;
190 test.min /= UCP_ARM_PERF_TEST_MULTIPLIER;
191 }
192 test.iters = ucs_min(test.iters, max_iter);
193 run_test(test, 0, check_perf, "", "");
194 }
195 }
196
197 UCP_INSTANTIATE_TEST_CASE(test_ucp_perf)
198