1 /**
2 * Copyright (C) Mellanox Technologies Ltd. 2001-2015.  ALL RIGHTS RESERVED.
3 * Copyright (C) The University of Tennessee and The University
4 *               of Tennessee Research Foundation. 2016. ALL RIGHTS RESERVED.
5 *
6 * See file LICENSE for terms.
7 */
8 
9 #ifdef HAVE_CONFIG_H
10 #  include "config.h"
11 #endif
12 
13 #include <tools/perf/lib/libperf_int.h>
14 
15 extern "C" {
16 #include <ucs/debug/log.h>
17 #include <ucs/sys/math.h>
18 #include <ucs/sys/sys.h>
19 }
20 #include <ucs/sys/preprocessor.h>
21 
22 #include <limits>
23 
24 
25 template <ucx_perf_cmd_t CMD, ucx_perf_test_type_t TYPE, unsigned FLAGS>
26 class ucp_perf_test_runner {
27 public:
28     static const ucp_tag_t TAG      = 0x1337a880u;
29     static const ucp_tag_t TAG_MASK = (FLAGS & UCX_PERF_TEST_FLAG_TAG_WILDCARD) ?
30                                       0 : (ucp_tag_t)-1;
31 
32     typedef uint8_t psn_t;
33 
ucp_perf_test_runner(ucx_perf_context_t & perf)34     ucp_perf_test_runner(ucx_perf_context_t &perf) :
35         m_perf(perf),
36         m_outstanding(0),
37         m_max_outstanding(m_perf.params.max_outstanding)
38 
39     {
40         ucs_assert_always(m_max_outstanding > 0);
41     }
42 
create_iov_buffer(ucp_dt_iov_t * iov,void * buffer)43     void create_iov_buffer(ucp_dt_iov_t *iov, void *buffer)
44     {
45         size_t iov_length_it, iov_it;
46         const size_t iovcnt = m_perf.params.msg_size_cnt;
47 
48         ucs_assert(NULL != m_perf.params.msg_size_list);
49         ucs_assert(iovcnt > 0);
50 
51         iov_length_it = 0;
52         for (iov_it = 0; iov_it < iovcnt; ++iov_it) {
53             iov[iov_it].buffer = (char *)buffer + iov_length_it;
54             iov[iov_it].length = m_perf.params.msg_size_list[iov_it];
55 
56             if (m_perf.params.iov_stride) {
57                 iov_length_it += m_perf.params.iov_stride;
58             } else {
59                 iov_length_it += iov[iov_it].length;
60             }
61         }
62     }
63 
ucp_perf_test_get_datatype(ucp_perf_datatype_t datatype,ucp_dt_iov_t * iov,size_t * length,void ** buffer_p)64     ucp_datatype_t ucp_perf_test_get_datatype(ucp_perf_datatype_t datatype, ucp_dt_iov_t *iov,
65                                               size_t *length, void **buffer_p)
66     {
67         ucp_datatype_t type = ucp_dt_make_contig(1);
68         if (UCP_PERF_DATATYPE_IOV == datatype) {
69             *buffer_p = iov;
70             *length   = m_perf.params.msg_size_cnt;
71             type      = ucp_dt_make_iov();
72         }
73         return type;
74     }
75     /**
76      * Make ucp_dt_iov_t iov[msg_size_cnt] array with pointer elements to
77      * original buffer
78      */
ucp_perf_test_prepare_iov_buffers()79     void ucp_perf_test_prepare_iov_buffers()
80     {
81         if (UCP_PERF_DATATYPE_IOV == m_perf.params.ucp.send_datatype) {
82             create_iov_buffer(m_perf.ucp.send_iov, m_perf.send_buffer);
83         }
84         if (UCP_PERF_DATATYPE_IOV == m_perf.params.ucp.recv_datatype) {
85             create_iov_buffer(m_perf.ucp.recv_iov, m_perf.recv_buffer);
86         }
87     }
88 
progress_responder()89     void UCS_F_ALWAYS_INLINE progress_responder() {
90         if (!(FLAGS & UCX_PERF_TEST_FLAG_ONE_SIDED) &&
91             !(m_perf.params.flags & UCX_PERF_TEST_FLAG_ONE_SIDED))
92         {
93             ucp_worker_progress(m_perf.ucp.worker);
94         }
95     }
96 
progress_requestor()97     void UCS_F_ALWAYS_INLINE progress_requestor() {
98         ucp_worker_progress(m_perf.ucp.worker);
99     }
100 
wait_stream_recv(void * request)101     ssize_t UCS_F_ALWAYS_INLINE wait_stream_recv(void *request)
102     {
103         size_t       length;
104         ucs_status_t status;
105 
106         ucs_assert(UCS_PTR_IS_PTR(request));
107 
108         while ((status = ucp_stream_recv_request_test(request, &length)) ==
109                 UCS_INPROGRESS) {
110             progress_responder();
111         }
112         ucp_request_release(request);
113 
114         return ucs_likely(status == UCS_OK) ? length : status;
115     }
116 
send_cb(void * request,ucs_status_t status)117     static void send_cb(void *request, ucs_status_t status)
118     {
119         ucp_perf_request_t *r      = reinterpret_cast<ucp_perf_request_t*>(
120                                           request);
121         ucp_perf_test_runner *test = (ucp_perf_test_runner*)r->context;
122 
123         test->op_completed();
124         r->context = NULL;
125         ucp_request_free(request);
126     }
127 
tag_recv_cb(void * request,ucs_status_t status,ucp_tag_recv_info_t * info)128     static void tag_recv_cb(void *request, ucs_status_t status,
129                             ucp_tag_recv_info_t *info)
130     {
131         ucp_perf_request_t *r = reinterpret_cast<ucp_perf_request_t*>(request);
132         ucp_perf_test_runner *test;
133 
134         /* if the request is completed during tag_recv_nb(), the context is
135          * still NULL */
136         if (r->context == NULL) {
137             return;
138         }
139 
140         test = (ucp_perf_test_runner*)r->context;
141         test->op_completed();
142         r->context = NULL;
143         ucp_request_free(request);
144     }
145 
wait_window(unsigned n,bool is_requestor)146     void UCS_F_ALWAYS_INLINE wait_window(unsigned n, bool is_requestor)
147     {
148         while (m_outstanding >= (m_max_outstanding - n + 1)) {
149             if (is_requestor) {
150                 progress_requestor();
151             } else {
152                 progress_responder();
153             }
154         }
155     }
156 
157     ucs_status_t UCS_F_ALWAYS_INLINE
send(ucp_ep_h ep,void * buffer,unsigned length,ucp_datatype_t datatype,uint8_t sn,uint64_t remote_addr,ucp_rkey_h rkey)158     send(ucp_ep_h ep, void *buffer, unsigned length, ucp_datatype_t datatype,
159          uint8_t sn, uint64_t remote_addr, ucp_rkey_h rkey)
160     {
161         void *request;
162 
163         /* coverity[switch_selector_expr_is_constant] */
164         switch (CMD) {
165         case UCX_PERF_CMD_TAG:
166         case UCX_PERF_CMD_TAG_SYNC:
167         case UCX_PERF_CMD_STREAM:
168             wait_window(1, true);
169             /* coverity[switch_selector_expr_is_constant] */
170             switch (CMD) {
171             case UCX_PERF_CMD_TAG:
172                 request = ucp_tag_send_nb(ep, buffer, length, datatype, TAG,
173                                           send_cb);
174                 break;
175             case UCX_PERF_CMD_TAG_SYNC:
176                 request = ucp_tag_send_sync_nb(ep, buffer, length, datatype, TAG,
177                                                send_cb);
178                 break;
179             case UCX_PERF_CMD_STREAM:
180                 request = ucp_stream_send_nb(ep, buffer, length, datatype,
181                                              send_cb, 0);
182                 break;
183             default:
184                 request = UCS_STATUS_PTR(UCS_ERR_INVALID_PARAM);
185                 break;
186             }
187             if (ucs_likely(!UCS_PTR_IS_PTR(request))) {
188                 return UCS_PTR_STATUS(request);
189             }
190             reinterpret_cast<ucp_perf_request_t*>(request)->context = this;
191             op_started();
192             return UCS_OK;
193         case UCX_PERF_CMD_PUT:
194             *((uint8_t*)buffer + length - 1) = sn;
195             return ucp_put(ep, buffer, length, remote_addr, rkey);
196         case UCX_PERF_CMD_GET:
197             return ucp_get(ep, buffer, length, remote_addr, rkey);
198         case UCX_PERF_CMD_ADD:
199             if (length == sizeof(uint32_t)) {
200                 return ucp_atomic_add32(ep, 1, remote_addr, rkey);
201             } else if (length == sizeof(uint64_t)) {
202                 return ucp_atomic_add64(ep, 1, remote_addr, rkey);
203             } else {
204                 return UCS_ERR_INVALID_PARAM;
205             }
206         case UCX_PERF_CMD_FADD:
207             if (length == sizeof(uint32_t)) {
208                 return ucp_atomic_fadd32(ep, 0, remote_addr, rkey, (uint32_t*)buffer);
209             } else if (length == sizeof(uint64_t)) {
210                 return ucp_atomic_fadd64(ep, 0, remote_addr, rkey, (uint64_t*)buffer);
211             } else {
212                 return UCS_ERR_INVALID_PARAM;
213             }
214         case UCX_PERF_CMD_SWAP:
215             if (length == sizeof(uint32_t)) {
216                 return ucp_atomic_swap32(ep, 0, remote_addr, rkey, (uint32_t*)buffer);
217             } else if (length == sizeof(uint64_t)) {
218                 return ucp_atomic_swap64(ep, 0, remote_addr, rkey, (uint64_t*)buffer);
219             } else {
220                 return UCS_ERR_INVALID_PARAM;
221             }
222         case UCX_PERF_CMD_CSWAP:
223             if (length == sizeof(uint32_t)) {
224                 return ucp_atomic_cswap32(ep, 0, 0, remote_addr, rkey, (uint32_t*)buffer);
225             } else if (length == sizeof(uint64_t)) {
226                 return ucp_atomic_cswap64(ep, 0, 0, remote_addr, rkey, (uint64_t*)buffer);
227             } else {
228                 return UCS_ERR_INVALID_PARAM;
229             }
230         default:
231             return UCS_ERR_INVALID_PARAM;
232         }
233     }
234 
235     ucs_status_t UCS_F_ALWAYS_INLINE
recv(ucp_worker_h worker,ucp_ep_h ep,void * buffer,unsigned length,ucp_datatype_t datatype,uint8_t sn)236     recv(ucp_worker_h worker, ucp_ep_h ep, void *buffer, unsigned length,
237          ucp_datatype_t datatype, uint8_t sn)
238     {
239         volatile uint8_t *ptr;
240         void *request;
241 
242         /* coverity[switch_selector_expr_is_constant] */
243         switch (CMD) {
244         case UCX_PERF_CMD_TAG:
245         case UCX_PERF_CMD_TAG_SYNC:
246             wait_window(1, false);
247             if (FLAGS & UCX_PERF_TEST_FLAG_TAG_UNEXP_PROBE) {
248                 ucp_tag_recv_info_t tag_info;
249                 while (ucp_tag_probe_nb(worker, TAG, TAG_MASK, 0, &tag_info) == NULL) {
250                     progress_responder();
251                 }
252             }
253             request = ucp_tag_recv_nb(worker, buffer, length, datatype, TAG, TAG_MASK,
254                                       tag_recv_cb);
255             if (ucs_likely(!UCS_PTR_IS_PTR(request))) {
256                 return UCS_PTR_STATUS(request);
257             }
258             if (ucp_request_is_completed(request)) {
259                 /* request is already completed and callback was called */
260                 ucp_request_free(request);
261                 return UCS_OK;
262             }
263             reinterpret_cast<ucp_perf_request_t*>(request)->context = this;
264             op_started();
265             return UCS_OK;
266         case UCX_PERF_CMD_PUT:
267             /* coverity[switch_selector_expr_is_constant] */
268             switch (TYPE) {
269             case UCX_PERF_TEST_TYPE_PINGPONG:
270                 ptr = (volatile uint8_t*)buffer + length - 1;
271                 while (*ptr != sn) {
272                     progress_responder();
273                 }
274                 return UCS_OK;
275             case UCX_PERF_TEST_TYPE_STREAM_UNI:
276                 return UCS_OK;
277             default:
278                 return UCS_ERR_INVALID_PARAM;
279             }
280         case UCX_PERF_CMD_GET:
281         case UCX_PERF_CMD_ADD:
282         case UCX_PERF_CMD_FADD:
283         case UCX_PERF_CMD_SWAP:
284         case UCX_PERF_CMD_CSWAP:
285             /* coverity[switch_selector_expr_is_constant] */
286             switch (TYPE) {
287             case UCX_PERF_TEST_TYPE_STREAM_UNI:
288                 progress_responder();
289                 return UCS_OK;
290             default:
291                 return UCS_ERR_INVALID_PARAM;
292             }
293         case UCX_PERF_CMD_STREAM:
294             if (FLAGS & UCX_PERF_TEST_FLAG_STREAM_RECV_DATA) {
295                 return recv_stream_data(ep, length, datatype);
296             } else {
297                 return recv_stream(ep, buffer, length, datatype);
298             }
299         default:
300             return UCS_ERR_INVALID_PARAM;
301         }
302     }
303 
flush()304     void flush()
305     {
306         if (m_perf.params.flags & UCX_PERF_TEST_FLAG_FLUSH_EP) {
307             ucp_ep_flush(m_perf.ucp.ep);
308         } else {
309             ucp_worker_flush(m_perf.ucp.worker);
310         }
311     }
312 
run_pingpong()313     ucs_status_t run_pingpong()
314     {
315         const psn_t unknown_psn = std::numeric_limits<psn_t>::max();
316         unsigned my_index;
317         ucp_worker_h worker;
318         ucp_ep_h ep;
319         void *send_buffer, *recv_buffer;
320         ucp_datatype_t send_datatype, recv_datatype;
321         uint64_t remote_addr;
322         uint8_t sn;
323         ucp_rkey_h rkey;
324         size_t length, send_length, recv_length;
325 
326         length        = ucx_perf_get_message_size(&m_perf.params);
327         ucs_assert(length >= sizeof(psn_t));
328 
329         ucp_perf_test_prepare_iov_buffers();
330 
331         if (CMD == UCX_PERF_CMD_PUT) {
332             m_perf.allocator->memcpy((psn_t*)m_perf.recv_buffer + length - 1,
333                                      m_perf.allocator->mem_type,
334                                      &unknown_psn, UCS_MEMORY_TYPE_HOST,
335                                      sizeof(unknown_psn));
336         }
337 
338         ucp_perf_barrier(&m_perf);
339 
340         my_index      = rte_call(&m_perf, group_index);
341 
342         ucx_perf_test_start_clock(&m_perf);
343 
344         ucx_perf_omp_barrier(&m_perf);
345 
346         send_buffer   = m_perf.send_buffer;
347         recv_buffer   = m_perf.recv_buffer;
348         worker        = m_perf.ucp.worker;
349         ep            = m_perf.ucp.ep;
350         remote_addr   = m_perf.ucp.remote_addr;
351         rkey          = m_perf.ucp.rkey;
352         sn            = 0;
353         send_length   = length;
354         recv_length   = length;
355         send_datatype = ucp_perf_test_get_datatype(m_perf.params.ucp.send_datatype,
356                                                    m_perf.ucp.send_iov, &send_length,
357                                                    &send_buffer);
358         recv_datatype = ucp_perf_test_get_datatype(m_perf.params.ucp.recv_datatype,
359                                                    m_perf.ucp.recv_iov, &recv_length,
360                                                    &recv_buffer);
361 
362         if (my_index == 0) {
363             UCX_PERF_TEST_FOREACH(&m_perf) {
364                 send(ep, send_buffer, send_length, send_datatype, sn, remote_addr, rkey);
365                 recv(worker, ep, recv_buffer, recv_length, recv_datatype, sn);
366                 ucx_perf_update(&m_perf, 1, length);
367                 ++sn;
368             }
369         } else if (my_index == 1) {
370             UCX_PERF_TEST_FOREACH(&m_perf) {
371                 recv(worker, ep, recv_buffer, recv_length, recv_datatype, sn);
372                 send(ep, send_buffer, send_length, send_datatype, sn, remote_addr, rkey);
373                 ucx_perf_update(&m_perf, 1, length);
374                 ++sn;
375             }
376         }
377 
378         wait_window(m_max_outstanding, true);
379         flush();
380 
381         ucx_perf_omp_barrier(&m_perf);
382 
383         ucx_perf_get_time(&m_perf);
384         ucp_perf_barrier(&m_perf);
385         return UCS_OK;
386     }
387 
run_stream_uni()388     ucs_status_t run_stream_uni()
389     {
390         unsigned my_index;
391         ucp_worker_h worker;
392         ucp_ep_h ep;
393         void *send_buffer, *recv_buffer;
394         ucp_datatype_t send_datatype, recv_datatype;
395         uint64_t remote_addr;
396         ucp_rkey_h rkey;
397         size_t length, send_length, recv_length;
398         uint8_t sn;
399 
400         length        = ucx_perf_get_message_size(&m_perf.params);
401         ucs_assert(length >= sizeof(psn_t));
402 
403         ucp_perf_test_prepare_iov_buffers();
404 
405         ucp_perf_barrier(&m_perf);
406 
407         my_index      = rte_call(&m_perf, group_index);
408 
409         ucx_perf_test_start_clock(&m_perf);
410 
411         ucx_perf_omp_barrier(&m_perf);
412 
413         send_buffer   = m_perf.send_buffer;
414         recv_buffer   = m_perf.recv_buffer;
415         worker        = m_perf.ucp.worker;
416         ep            = m_perf.ucp.ep;
417         remote_addr   = m_perf.ucp.remote_addr;
418         rkey          = m_perf.ucp.rkey;
419         sn            = 0;
420         send_length   = length;
421         recv_length   = length;
422         send_datatype = ucp_perf_test_get_datatype(m_perf.params.ucp.send_datatype,
423                                                    m_perf.ucp.send_iov, &send_length,
424                                                    &send_buffer);
425         recv_datatype = ucp_perf_test_get_datatype(m_perf.params.ucp.recv_datatype,
426                                                    m_perf.ucp.recv_iov, &recv_length,
427                                                    &recv_buffer);
428 
429         if (my_index == 0) {
430             UCX_PERF_TEST_FOREACH(&m_perf) {
431                 recv(worker, ep, recv_buffer, recv_length, recv_datatype, sn);
432                 ucx_perf_update(&m_perf, 1, length);
433                 ++sn;
434             }
435         } else if (my_index == 1) {
436             UCX_PERF_TEST_FOREACH(&m_perf) {
437                 send(ep, send_buffer, send_length, send_datatype, sn,
438                      remote_addr, rkey);
439                 ucx_perf_update(&m_perf, 1, length);
440                 ++sn;
441             }
442         }
443 
444         wait_window(m_max_outstanding, true);
445         flush();
446 
447         ucx_perf_omp_barrier(&m_perf);
448 
449         ucx_perf_get_time(&m_perf);
450 
451         ucp_perf_barrier(&m_perf);
452         return UCS_OK;
453     }
454 
run()455     ucs_status_t run()
456     {
457         /* coverity[switch_selector_expr_is_constant] */
458         switch (TYPE) {
459         case UCX_PERF_TEST_TYPE_PINGPONG:
460             return run_pingpong();
461         case UCX_PERF_TEST_TYPE_STREAM_UNI:
462             return run_stream_uni();
463         case UCX_PERF_TEST_TYPE_STREAM_BI:
464         default:
465             return UCS_ERR_INVALID_PARAM;
466         }
467     }
468 
469 private:
470     ucs_status_t UCS_F_ALWAYS_INLINE
recv_stream_data(ucp_ep_h ep,unsigned length,ucp_datatype_t datatype)471     recv_stream_data(ucp_ep_h ep, unsigned length, ucp_datatype_t datatype)
472     {
473         void *data;
474         size_t data_length;
475         size_t total = 0;
476 
477         do {
478             progress_responder();
479             data = ucp_stream_recv_data_nb(ep, &data_length);
480             if (ucs_likely(UCS_PTR_IS_PTR(data))) {
481                 total += data_length;
482                 ucp_stream_data_release(ep, data);
483             }
484         } while ((total < length) && !UCS_PTR_IS_ERR(data));
485 
486         return UCS_PTR_IS_ERR(data) ? UCS_PTR_STATUS(data) : UCS_OK;
487     }
488 
489     ucs_status_t UCS_F_ALWAYS_INLINE
recv_stream(ucp_ep_h ep,void * buf,unsigned length,ucp_datatype_t datatype)490     recv_stream(ucp_ep_h ep, void *buf, unsigned length, ucp_datatype_t datatype)
491     {
492         ssize_t  total = 0;
493         void    *rreq;
494         size_t   rlength;
495         ssize_t  rlength_s;
496 
497         do {
498             rreq = ucp_stream_recv_nb(ep, (char *)buf + total, length - total,
499                                       datatype,
500                                       (ucp_stream_recv_callback_t)ucs_empty_function,
501                                       &rlength, 0);
502             if (ucs_likely(rreq == NULL)) {
503                 total += rlength;
504             } else if (UCS_PTR_IS_PTR(rreq)) {
505                 rlength_s = wait_stream_recv(rreq);
506                 if (ucs_unlikely(rlength_s < 0)) {
507                     return ucs_status_t(rlength_s);
508                 }
509                 total += rlength_s;
510             } else {
511                 return UCS_PTR_STATUS(rreq);
512             }
513         } while (total < length);
514 
515         return UCS_OK;
516     }
517 
op_started()518     void UCS_F_ALWAYS_INLINE op_started()
519     {
520         ++m_outstanding;
521     }
522 
op_completed()523     void UCS_F_ALWAYS_INLINE op_completed()
524     {
525         --m_outstanding;
526     }
527 
528     ucx_perf_context_t &m_perf;
529     unsigned           m_outstanding;
530     const unsigned     m_max_outstanding;
531 };
532 
533 
534 #define TEST_CASE(_perf, _cmd, _type, _flags, _mask) \
535     if (((_perf)->params.command == (_cmd)) && \
536         ((_perf)->params.test_type == (_type)) && \
537         (((_perf)->params.flags & (_mask)) == (_flags))) \
538     { \
539         ucp_perf_test_runner<_cmd, _type, _flags> r(*_perf); \
540         return r.run(); \
541     }
542 
543 #define TEST_CASE_ALL_STREAM(_perf, _case) \
544     TEST_CASE(_perf, UCS_PP_TUPLE_0 _case, UCS_PP_TUPLE_1 _case, \
545               0, \
546               UCX_PERF_TEST_FLAG_STREAM_RECV_DATA) \
547     TEST_CASE(_perf, UCS_PP_TUPLE_0 _case, UCS_PP_TUPLE_1 _case, \
548               UCX_PERF_TEST_FLAG_STREAM_RECV_DATA, \
549               UCX_PERF_TEST_FLAG_STREAM_RECV_DATA)
550 
551 #define TEST_CASE_ALL_TAG(_perf, _case) \
552     TEST_CASE(_perf, UCS_PP_TUPLE_0 _case, UCS_PP_TUPLE_1 _case, \
553               0, \
554               UCX_PERF_TEST_FLAG_TAG_WILDCARD|UCX_PERF_TEST_FLAG_TAG_UNEXP_PROBE) \
555     TEST_CASE(_perf, UCS_PP_TUPLE_0 _case, UCS_PP_TUPLE_1 _case, \
556               UCX_PERF_TEST_FLAG_TAG_WILDCARD, \
557               UCX_PERF_TEST_FLAG_TAG_WILDCARD|UCX_PERF_TEST_FLAG_TAG_UNEXP_PROBE) \
558     TEST_CASE(_perf, UCS_PP_TUPLE_0 _case, UCS_PP_TUPLE_1 _case, \
559               UCX_PERF_TEST_FLAG_TAG_UNEXP_PROBE, \
560               UCX_PERF_TEST_FLAG_TAG_WILDCARD|UCX_PERF_TEST_FLAG_TAG_UNEXP_PROBE) \
561     TEST_CASE(_perf, UCS_PP_TUPLE_0 _case, UCS_PP_TUPLE_1 _case, \
562               UCX_PERF_TEST_FLAG_TAG_WILDCARD|UCX_PERF_TEST_FLAG_TAG_UNEXP_PROBE, \
563               UCX_PERF_TEST_FLAG_TAG_WILDCARD|UCX_PERF_TEST_FLAG_TAG_UNEXP_PROBE)
564 
565 #define TEST_CASE_ALL_OSD(_perf, _case) \
566     TEST_CASE(_perf, UCS_PP_TUPLE_0 _case, UCS_PP_TUPLE_1 _case, \
567               0, UCX_PERF_TEST_FLAG_ONE_SIDED) \
568     TEST_CASE(_perf, UCS_PP_TUPLE_0 _case, UCS_PP_TUPLE_1 _case, \
569               UCX_PERF_TEST_FLAG_ONE_SIDED, UCX_PERF_TEST_FLAG_ONE_SIDED)
570 
ucp_perf_test_dispatch(ucx_perf_context_t * perf)571 ucs_status_t ucp_perf_test_dispatch(ucx_perf_context_t *perf)
572 {
573     UCS_PP_FOREACH(TEST_CASE_ALL_OSD, perf,
574         (UCX_PERF_CMD_PUT,   UCX_PERF_TEST_TYPE_PINGPONG),
575         (UCX_PERF_CMD_PUT,   UCX_PERF_TEST_TYPE_STREAM_UNI),
576         (UCX_PERF_CMD_GET,   UCX_PERF_TEST_TYPE_STREAM_UNI),
577         (UCX_PERF_CMD_ADD,   UCX_PERF_TEST_TYPE_STREAM_UNI),
578         (UCX_PERF_CMD_FADD,  UCX_PERF_TEST_TYPE_STREAM_UNI),
579         (UCX_PERF_CMD_SWAP,  UCX_PERF_TEST_TYPE_STREAM_UNI),
580         (UCX_PERF_CMD_CSWAP, UCX_PERF_TEST_TYPE_STREAM_UNI)
581         );
582 
583     UCS_PP_FOREACH(TEST_CASE_ALL_TAG, perf,
584         (UCX_PERF_CMD_TAG,      UCX_PERF_TEST_TYPE_PINGPONG),
585         (UCX_PERF_CMD_TAG,      UCX_PERF_TEST_TYPE_STREAM_UNI),
586         (UCX_PERF_CMD_TAG_SYNC, UCX_PERF_TEST_TYPE_PINGPONG),
587         (UCX_PERF_CMD_TAG_SYNC, UCX_PERF_TEST_TYPE_STREAM_UNI)
588         );
589 
590     UCS_PP_FOREACH(TEST_CASE_ALL_STREAM, perf,
591         (UCX_PERF_CMD_STREAM,   UCX_PERF_TEST_TYPE_STREAM_UNI),
592         (UCX_PERF_CMD_STREAM,   UCX_PERF_TEST_TYPE_PINGPONG)
593         );
594 
595     ucs_error("Invalid test case: %d/%d/0x%x",
596               perf->params.command, perf->params.test_type,
597               perf->params.flags);
598     return UCS_ERR_INVALID_PARAM;
599 }
600