1 /*
2 * Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED.
3 * Copyright (C) Mellanox Technologies Ltd. 2020. ALL RIGHTS RESERVED.
4 * See file LICENSE for terms.
5 */
6
7 #ifdef HAVE_CONFIG_H
8 # include "config.h"
9 #endif
10
11 #include "rocm_ipc_ep.h"
12 #include "rocm_ipc_iface.h"
13 #include "rocm_ipc_md.h"
14
15 #include <uct/rocm/base/rocm_base.h>
16 #include <uct/base/uct_iov.inl>
17
UCS_CLASS_INIT_FUNC(uct_rocm_ipc_ep_t,const uct_ep_params_t * params)18 static UCS_CLASS_INIT_FUNC(uct_rocm_ipc_ep_t, const uct_ep_params_t *params)
19 {
20 uct_rocm_ipc_iface_t *iface = ucs_derived_of(params->iface, uct_rocm_ipc_iface_t);
21 char target_name[64];
22 ucs_status_t status;
23
24 UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super);
25
26 self->remote_pid = *(const pid_t*)params->iface_addr;
27
28 snprintf(target_name, sizeof(target_name), "dest:%d", *(pid_t*)params->iface_addr);
29 status = uct_rocm_ipc_create_cache(&self->remote_memh_cache, target_name);
30 if (status != UCS_OK) {
31 ucs_error("could not create create rocm ipc cache: %s",
32 ucs_status_string(status));
33 return status;
34 }
35
36 return UCS_OK;
37 }
38
UCS_CLASS_CLEANUP_FUNC(uct_rocm_ipc_ep_t)39 static UCS_CLASS_CLEANUP_FUNC(uct_rocm_ipc_ep_t)
40 {
41 uct_rocm_ipc_destroy_cache(self->remote_memh_cache);
42 }
43
44 UCS_CLASS_DEFINE(uct_rocm_ipc_ep_t, uct_base_ep_t);
45 UCS_CLASS_DEFINE_NEW_FUNC(uct_rocm_ipc_ep_t, uct_ep_t, const uct_ep_params_t *);
46 UCS_CLASS_DEFINE_DELETE_FUNC(uct_rocm_ipc_ep_t, uct_ep_t);
47
48 #define uct_rocm_ipc_trace_data(_remote_addr, _rkey, _fmt, ...) \
49 ucs_trace_data(_fmt " to %"PRIx64"(%+ld)", ## __VA_ARGS__, (_remote_addr), \
50 (_rkey))
51
uct_rocm_ipc_ep_zcopy(uct_ep_h tl_ep,uint64_t remote_addr,const uct_iov_t * iov,uct_rocm_ipc_key_t * key,uct_completion_t * comp,int is_put)52 ucs_status_t uct_rocm_ipc_ep_zcopy(uct_ep_h tl_ep,
53 uint64_t remote_addr,
54 const uct_iov_t *iov,
55 uct_rocm_ipc_key_t *key,
56 uct_completion_t *comp,
57 int is_put)
58 {
59 uct_rocm_ipc_ep_t *ep = ucs_derived_of(tl_ep, uct_rocm_ipc_ep_t);
60 hsa_status_t status;
61 hsa_agent_t local_agent;
62 size_t size = uct_iov_get_length(iov);
63 ucs_status_t ret = UCS_OK;
64 void *base_addr, *local_addr = iov->buffer;
65 uct_rocm_ipc_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rocm_ipc_iface_t);
66 void *remote_base_addr, *remote_copy_addr;
67 void *dst_addr, *src_addr;
68 uct_rocm_ipc_signal_desc_t *rocm_ipc_signal;
69
70 /* no data to deliver */
71 if (!size)
72 return UCS_OK;
73
74 if ((remote_addr < key->address) ||
75 (remote_addr + size > key->address + key->length)) {
76 ucs_error("remote addr %lx/%lx out of range %lx/%lx",
77 remote_addr, size, key->address, key->length);
78 return UCS_ERR_INVALID_PARAM;
79 }
80
81 status = uct_rocm_base_get_ptr_info(local_addr, size, &base_addr,
82 NULL, &local_agent);
83 if (status != HSA_STATUS_SUCCESS) {
84 ucs_error("local addr %p/%lx is not ROCM memory", local_addr, size);
85 return UCS_ERR_INVALID_ADDR;
86 }
87
88 ret = uct_rocm_ipc_cache_map_memhandle((void *)ep->remote_memh_cache, key,
89 &remote_base_addr);
90 if (ret != UCS_OK) {
91 ucs_error("fail to attach ipc mem %p %d\n", (void *)key->address, ret);
92 return ret;
93 }
94
95 remote_copy_addr = UCS_PTR_BYTE_OFFSET(remote_base_addr,
96 remote_addr - key->address);
97 if (is_put) {
98 dst_addr = remote_copy_addr;
99 src_addr = local_addr;
100 }
101 else {
102 dst_addr = local_addr;
103 src_addr = remote_copy_addr;
104 }
105
106 rocm_ipc_signal = ucs_mpool_get(&iface->signal_pool);
107 hsa_signal_store_screlease(rocm_ipc_signal->signal, 1);
108
109 status = hsa_amd_memory_async_copy(dst_addr, local_agent,
110 src_addr, local_agent,
111 size, 0, NULL,
112 rocm_ipc_signal->signal);
113
114 if (status != HSA_STATUS_SUCCESS) {
115 ucs_error("copy error");
116 ucs_mpool_put(rocm_ipc_signal);
117 return UCS_ERR_IO_ERROR;
118 }
119
120 rocm_ipc_signal->comp = comp;
121 rocm_ipc_signal->mapped_addr = remote_base_addr;
122 ucs_queue_push(&iface->signal_queue, &rocm_ipc_signal->queue);
123
124 ucs_trace("rocm async copy issued :%p remote:%p, local:%p len:%ld",
125 rocm_ipc_signal, (void *)remote_addr, local_addr, size);
126
127 return UCS_INPROGRESS;
128 }
129
uct_rocm_ipc_ep_put_zcopy(uct_ep_h tl_ep,const uct_iov_t * iov,size_t iovcnt,uint64_t remote_addr,uct_rkey_t rkey,uct_completion_t * comp)130 ucs_status_t uct_rocm_ipc_ep_put_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt,
131 uint64_t remote_addr, uct_rkey_t rkey,
132 uct_completion_t *comp)
133 {
134 ucs_status_t ret;
135 uct_rocm_ipc_key_t *key = (uct_rocm_ipc_key_t *)rkey;
136
137 ret = uct_rocm_ipc_ep_zcopy(tl_ep, remote_addr, iov, key, comp, 1);
138
139 UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), PUT, ZCOPY,
140 uct_iov_total_length(iov, iovcnt));
141 uct_rocm_ipc_trace_data(remote_addr, rkey, "PUT_ZCOPY [length %zu]",
142 uct_iov_total_length(iov, iovcnt));
143
144 return ret;
145 }
146
uct_rocm_ipc_ep_get_zcopy(uct_ep_h tl_ep,const uct_iov_t * iov,size_t iovcnt,uint64_t remote_addr,uct_rkey_t rkey,uct_completion_t * comp)147 ucs_status_t uct_rocm_ipc_ep_get_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt,
148 uint64_t remote_addr, uct_rkey_t rkey,
149 uct_completion_t *comp)
150 {
151 ucs_status_t ret;
152 uct_rocm_ipc_key_t *key = (uct_rocm_ipc_key_t *)rkey;
153
154 ret = uct_rocm_ipc_ep_zcopy(tl_ep, remote_addr, iov, key, comp, 0);
155
156 UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), GET, ZCOPY,
157 uct_iov_total_length(iov, iovcnt));
158 uct_rocm_ipc_trace_data(remote_addr, rkey, "GET_ZCOPY [length %zu]",
159 uct_iov_total_length(iov, iovcnt));
160
161 return ret;
162 }
163