1 /**
2 * Copyright (C) Mellanox Technologies Ltd. 2001-2014.  ALL RIGHTS RESERVED.
3 *
4 * Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED.
5 * See file LICENSE for terms.
6 */
7 
8 #include "ucp_test.h"
9 
10 #include <gtest/common/test_perf.h>
11 
12 
13 #define MB   pow(1024.0, -2)
14 #define UCP_ARM_PERF_TEST_MULTIPLIER 2
15 class test_ucp_perf : public ucp_test, public test_perf {
16 protected:
init()17     virtual void init() {
18         test_base::init(); /* Skip entities creation in ucp_test */
19         ucs_log_push_handler(log_handler);
20     }
21 
cleanup()22     virtual void cleanup() {
23         ucs_log_pop_handler();
24         test_base::cleanup();
25     }
26 
27     static ucs_log_func_rc_t
log_handler(const char * file,unsigned line,const char * function,ucs_log_level_t level,const ucs_log_component_config_t * comp_conf,const char * message,va_list ap)28     log_handler(const char *file, unsigned line, const char *function,
29                 ucs_log_level_t level,
30                 const ucs_log_component_config_t *comp_conf,
31                 const char *message, va_list ap) {
32         // Ignore errors that transport cannot reach peer
33         if (level == UCS_LOG_LEVEL_ERROR) {
34             std::string err_str = format_message(message, ap);
35             if (strstr(err_str.c_str(), ucs_status_string(UCS_ERR_UNREACHABLE)) ||
36                 strstr(err_str.c_str(), ucs_status_string(UCS_ERR_UNSUPPORTED))) {
37                 UCS_TEST_MESSAGE << err_str;
38                 return UCS_LOG_FUNC_RC_STOP;
39             }
40         }
41         return UCS_LOG_FUNC_RC_CONTINUE;
42     }
43 
44     const static test_spec tests[];
45 };
46 
47 
48 const test_perf::test_spec test_ucp_perf::tests[] =
49 {
50   { "tag latency", "usec",
51     UCX_PERF_API_UCP, UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_PINGPONG,
52     UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu,
53     ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 60.0,
54     0 },
55 
56   { "tag iov latency", "usec",
57     UCX_PERF_API_UCP, UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_PINGPONG,
58     UCP_PERF_DATATYPE_IOV, 8192, 3, { 1024, 1024, 1024 }, 1, 100000lu,
59     ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 60.0,
60     0 },
61 
62   { "tag mr", "Mpps",
63     UCX_PERF_API_UCP, UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_STREAM_UNI,
64     UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 2000000lu,
65     ucs_offsetof(ucx_perf_result_t, msgrate.total_average), 1e-6, 0.1, 100.0,
66     0 },
67 
68   { "tag sync mr", "Mpps",
69     UCX_PERF_API_UCP, UCX_PERF_CMD_TAG_SYNC, UCX_PERF_TEST_TYPE_STREAM_UNI,
70     UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 200000lu,
71     ucs_offsetof(ucx_perf_result_t, msgrate.total_average), 1e-6, 0.05, 100.0, 0},
72 
73   { "tag wild mr", "Mpps",
74     UCX_PERF_API_UCP, UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_STREAM_UNI,
75     UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 2000000lu,
76     ucs_offsetof(ucx_perf_result_t, msgrate.total_average), 1e-6, 0.1, 100.0,
77     UCX_PERF_TEST_FLAG_TAG_WILDCARD },
78 
79   { "tag bw", "MB/sec",
80     UCX_PERF_API_UCP, UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_STREAM_UNI,
81     UCT_PERF_DATA_LAYOUT_LAST, 0, 1, { 2048 }, 1, 100000lu,
82     ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 100.0, 100000.0 },
83 
84   { "tag bw_zcopy_multi", "MB/sec",
85     UCX_PERF_API_UCP, UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_STREAM_UNI,
86     UCT_PERF_DATA_LAYOUT_LAST, 0, 1, { 2048 }, 16, 100000lu,
87     ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 100.0, 100000.0 },
88 
89   { "put latency", "usec",
90     UCX_PERF_API_UCP, UCX_PERF_CMD_PUT, UCX_PERF_TEST_TYPE_PINGPONG,
91     UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu,
92     ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0,
93     0 },
94 
95   { "put rate", "Mpps",
96     UCX_PERF_API_UCP, UCX_PERF_CMD_PUT, UCX_PERF_TEST_TYPE_STREAM_UNI,
97     UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 2000000lu,
98     ucs_offsetof(ucx_perf_result_t, msgrate.total_average), 1e-6, 0.5, 100.0,
99     0 },
100 
101   { "put bw", "MB/sec",
102     UCX_PERF_API_UCP, UCX_PERF_CMD_PUT, UCX_PERF_TEST_TYPE_STREAM_UNI,
103     UCP_PERF_DATATYPE_CONTIG, 0, 1, { 2048 }, 1, 100000lu,
104     ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 200.0, 100000.0,
105     0 },
106 
107   { "get latency", "usec",
108     UCX_PERF_API_UCP, UCX_PERF_CMD_GET, UCX_PERF_TEST_TYPE_STREAM_UNI,
109     UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu,
110     ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0,
111     0 },
112 
113   { "get bw", "MB/sec",
114     UCX_PERF_API_UCP, UCX_PERF_CMD_GET, UCX_PERF_TEST_TYPE_STREAM_UNI,
115     UCP_PERF_DATATYPE_CONTIG, 0, 1, { 16384 }, 1, 10000lu,
116     ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 200.0, 100000.0,
117     0 },
118 
119   { "stream latency", "usec",
120     UCX_PERF_API_UCP, UCX_PERF_CMD_STREAM, UCX_PERF_TEST_TYPE_PINGPONG,
121     UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu,
122     ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0, 0 },
123 
124   { "stream bw", "MB/sec",
125     UCX_PERF_API_UCP, UCX_PERF_CMD_STREAM, UCX_PERF_TEST_TYPE_STREAM_UNI,
126     UCP_PERF_DATATYPE_CONTIG, 0, 1, { 16384 }, 1, 10000lu,
127     ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 200.0, 100000.0, 0 },
128 
129   { "stream recv-data latency", "usec",
130     UCX_PERF_API_UCP, UCX_PERF_CMD_STREAM, UCX_PERF_TEST_TYPE_PINGPONG,
131     UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu,
132     ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0,
133     UCX_PERF_TEST_FLAG_STREAM_RECV_DATA },
134 
135   { "stream recv-data bw", "MB/sec",
136     UCX_PERF_API_UCP, UCX_PERF_CMD_STREAM, UCX_PERF_TEST_TYPE_STREAM_UNI,
137     UCP_PERF_DATATYPE_CONTIG, 0, 1, { 16384 }, 1, 10000lu,
138     ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 200.0, 100000.0,
139     UCX_PERF_TEST_FLAG_STREAM_RECV_DATA },
140 
141   { "atomic add rate", "Mpps",
142     UCX_PERF_API_UCP, UCX_PERF_CMD_ADD, UCX_PERF_TEST_TYPE_STREAM_UNI,
143     UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 1000000lu,
144     ucs_offsetof(ucx_perf_result_t, msgrate.total_average), 1e-6, 0.1, 500.0,
145     0 },
146 
147   { "atomic fadd latency", "usec",
148     UCX_PERF_API_UCP, UCX_PERF_CMD_FADD, UCX_PERF_TEST_TYPE_STREAM_UNI,
149     UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu,
150     ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0,
151     0 },
152 
153   { "atomic swap latency", "usec",
154     UCX_PERF_API_UCP, UCX_PERF_CMD_SWAP, UCX_PERF_TEST_TYPE_STREAM_UNI,
155     UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu,
156     ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0,
157     0 },
158 
159   { "atomic cswap latency", "usec",
160     UCX_PERF_API_UCP, UCX_PERF_CMD_CSWAP, UCX_PERF_TEST_TYPE_STREAM_UNI,
161     UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu,
162     ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0,
163     0 },
164 
165   { NULL }
166 };
167 
168 
UCS_TEST_P(test_ucp_perf,envelope)169 UCS_TEST_P(test_ucp_perf, envelope) {
170     bool check_perf = true;
171     size_t max_iter = std::numeric_limits<size_t>::max();
172 
173     if (has_transport("tcp")) {
174         check_perf = false;
175         max_iter   = 1000lu;
176     }
177 
178     std::stringstream ss;
179     ss << GetParam();
180     /* coverity[tainted_string_argument] */
181     ucs::scoped_setenv tls("UCX_TLS", ss.str().c_str());
182     ucs::scoped_setenv warn_invalid("UCX_WARN_INVALID_CONFIG", "no");
183 
184     /* Run all tests */
185     for (const test_spec *test_iter = tests; test_iter->title != NULL; ++test_iter) {
186         test_spec test = *test_iter;
187 
188         if (ucs_arch_get_cpu_model() == UCS_CPU_MODEL_ARM_AARCH64) {
189             test.max *= UCP_ARM_PERF_TEST_MULTIPLIER;
190             test.min /= UCP_ARM_PERF_TEST_MULTIPLIER;
191         }
192         test.iters = ucs_min(test.iters, max_iter);
193         run_test(test, 0, check_perf, "", "");
194     }
195 }
196 
197 UCP_INSTANTIATE_TEST_CASE(test_ucp_perf)
198