1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3 * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
4 * reserved.
5 * 2014 Mellanox Technologies, Inc.
6 * All rights reserved.
7 * Copyright (c) 2015 Intel, Inc. All rights reserved.
8 * $COPYRIGHT$
9 *
10 * Additional copyrights may follow
11 *
12 * $HEADER$
13 *
14 */
15
16 #if !defined(MCA_OOB_UD_REQ_H)
17 #define MCA_OOB_UD_REQ_H
18
19 #include "oob_ud_peer.h"
20
21 #include "orte_config.h"
22 #include "orte/types.h"
23 #include "opal/threads/condition.h"
24 #include "opal/mca/event/event.h"
25 #include "opal/class/opal_free_list.h"
26 #include "orte/mca/rml/rml.h"
27
28 #include <infiniband/verbs.h>
29
30 #include "oob_ud_qp.h"
31
32 struct mca_oob_ud_peer_t;
33
34 enum mca_oob_ud_req_type_t {
35 MCA_OOB_UD_REQ_RECV,
36 MCA_OOB_UD_REQ_SEND
37 };
38 typedef enum mca_oob_ud_req_type_t mca_oob_ud_req_type_t;
39
40 enum mca_oob_ud_req_state_t {
41 MCA_OOB_UD_REQ_ACTIVE,
42 MCA_OOB_UD_REQ_PENDING,
43 MCA_OOB_UD_REQ_COMPLETE
44 };
45 typedef enum mca_oob_ud_req_state_t mca_oob_ud_req_state_t;
46
47 enum mca_oob_ud_req_data_type_t {
48 MCA_OOB_UD_REQ_IOV,
49 MCA_OOB_UD_REQ_BUF,
50 MCA_OOB_UD_REQ_TR
51 };
52 typedef enum mca_oob_ud_req_data_type_t mca_oob_ud_req_data_type_t;
53
54 enum mca_oob_ud_msg_type_t {
55 MCA_OOB_UD_MSG_REQUEST = 37,
56 MCA_OOB_UD_MSG_REPLY = 38,
57 MCA_OOB_UD_MSG_COMPLETE = 39,
58 MCA_OOB_UD_MSG_PING = 40,
59 MCA_OOB_UD_MSG_ACK = 41,
60 MCA_OOB_UD_MSG_NACK = 42,
61 MCA_OOB_UD_MSG_DATA_OK = 43,
62 MCA_OOB_UD_MSG_END = 44
63 };
64 typedef enum mca_oob_ud_msg_type_t mca_oob_ud_msg_type_t;
65
66 struct mca_oob_ud_msg_hdr_t {
67 mca_oob_ud_msg_type_t msg_type;
68
69 void *msg_rem_ctx;
70 void *msg_lcl_ctx;
71
72 orte_process_name_t msg_origin;
73 orte_process_name_t msg_target;
74 int msg_channel;
75 int msg_seq_num;
76
77 uint64_t msg_id;
78
79 struct {
80 /* the receiver can get the qpn and lid from the work completion */
81 uint32_t qkey;
82 orte_process_name_t name;
83 uint8_t port_num;
84 } ra;
85
86 union {
87 struct {
88 int tag;
89 int data_len;
90 int mtu;
91 bool data_follows;
92 bool data_iovec_used;
93 } req;
94 struct {
95 uint32_t qpn;
96 int data_len;
97 int tag;
98 int mtu;
99 } rep;
100 } msg_data;
101 };
102 typedef struct mca_oob_ud_msg_hdr_t mca_oob_ud_msg_hdr_t;
103
104 struct mca_oob_ud_req_t {
105 opal_list_item_t super;
106
107 mca_oob_ud_req_type_t type;
108 mca_oob_ud_req_state_t state;
109
110 union {
111 struct ibv_send_wr *send;
112 struct ibv_recv_wr *recv;
113 } req_wr;
114
115 /* storage for ib grh */
116 struct ibv_grh *req_grh;
117 struct ibv_mr *req_grh_mr;
118
119 struct ibv_sge *req_sge;
120
121 /* negotiated mtu */
122 int req_mtu;
123 uint32_t req_rem_qpn;
124 int req_rem_data_len;
125
126 int req_packet_count;
127
128 struct mca_oob_ud_peer_t *req_peer;
129 struct mca_oob_ud_port_t *req_port;
130 struct mca_oob_ud_qp_t *req_qp;
131
132 /* remote context (request or response) */
133 void *req_rem_ctx;
134
135 /* retry timer */
136 struct {
137 opal_event_t event;
138 struct timeval value;
139 } timer;
140
141 /* user request */
142 orte_process_name_t req_target;
143 orte_process_name_t req_origin;
144
145 mca_oob_ud_req_data_type_t req_data_type;
146 union {
147 struct {
148 struct ibv_mr **mr;
149 struct iovec *uiov;
150 int count;
151 }iov;
152 struct {
153 struct ibv_mr *mr;
154 char *p;
155 int size;
156 }buf;
157 }req_data;
158
159 int req_tag;
160 int req_channel;
161 int req_seq_num;
162 int req_rc;
163
164 void *req_cbdata;
165
166 /* what list is this request in */
167 opal_list_t *req_list;
168
169 bool req_is_eager;
170
171 orte_rml_send_t *rml_msg;
172 };
173
174 typedef struct mca_oob_ud_req_t mca_oob_ud_req_t;
175 OBJ_CLASS_DECLARATION(mca_oob_ud_req_t);
176
177 enum mca_oob_ud_status_t {
178 /* message posted */
179 MCA_OOB_UD_MSG_STATUS_POSTED,
180 /* remote side receive the message (ack'd) */
181 MCA_OOB_UD_MSG_STATUS_COMPLETE,
182 /* request message timed out */
183 MCA_OOB_UD_MSG_STATUS_TIMEOUT,
184 /* other failure */
185 MCA_OOB_UD_MSG_STATUS_ERROR
186 };
187 typedef enum mca_oob_ud_status_t mca_oob_ud_status_t;
188
189 struct mca_oob_ud_msg_t {
190 opal_free_list_item_t super;
191
192 struct ibv_send_wr wr;
193 struct ibv_sge sge;
194 mca_oob_ud_msg_hdr_t *hdr;
195 struct ibv_mr *mr;
196
197 /* qp this request was sent over */
198 struct mca_oob_ud_qp_t *qp;
199 struct mca_oob_ud_port_t *port;
200
201 opal_mutex_t lock;
202 opal_condition_t status_changed;
203 mca_oob_ud_status_t status;
204
205 bool persist;
206 mca_oob_ud_req_t *req;
207
208 void (*cbfunc) (struct mca_oob_ud_msg_t *, int);
209
210 struct mca_oob_ud_peer_t *peer;
211 };
212 typedef struct mca_oob_ud_msg_t mca_oob_ud_msg_t;
213 OBJ_CLASS_DECLARATION(mca_oob_ud_msg_t);
214
mca_oob_ud_recv_alloc(mca_oob_ud_req_t * recv_req)215 static inline int mca_oob_ud_recv_alloc (mca_oob_ud_req_t *recv_req)
216 {
217 int iov_index;
218
219 size_t alloc_size = recv_req->req_rem_data_len;
220 if (MCA_OOB_UD_REQ_IOV == recv_req->req_data_type) {
221 for (iov_index = 0 ; iov_index < recv_req->req_data.iov.count - 1 ; ++iov_index) {
222 alloc_size -= recv_req->req_data.iov.uiov[iov_index].iov_len;
223 }
224
225 recv_req->req_data.iov.uiov[iov_index].iov_len = alloc_size;
226 recv_req->req_data.iov.uiov[iov_index].iov_base = calloc (alloc_size, 1);
227
228 if (NULL == recv_req->req_data.iov.uiov[iov_index].iov_base) {
229 return ORTE_ERROR;
230 }
231 } else {
232 recv_req->req_data.buf.p = (char *)calloc(recv_req->req_rem_data_len, sizeof(char));
233 if (NULL == recv_req->req_data.buf.p) {
234 return ORTE_ERROR;
235 }
236 recv_req->req_data.buf.size = recv_req->req_rem_data_len;
237 }
238 return ORTE_SUCCESS;
239 }
240
241 #define MCA_OOB_UD_REQ_DEREG_MR(req) \
242 if (MCA_OOB_UD_REQ_IOV == req->req_data_type) { \
243 if (req->req_data.iov.mr) { \
244 for (i = 0 ; i < req->req_data.iov.count ; ++i) { \
245 if (req->req_data.iov.mr[i]) { \
246 (void) ibv_dereg_mr (req->req_data.iov.mr[i]); \
247 req->req_data.iov.mr[i] = NULL; \
248 } \
249 } \
250 free (req->req_data.iov.mr); \
251 req->req_data.iov.mr = NULL; \
252 } \
253 } else { \
254 if (req->req_data.buf.mr) { \
255 (void) ibv_dereg_mr (req->req_data.buf.mr); \
256 req->req_data.buf.mr = NULL; \
257 } \
258 }
259
260 int mca_oob_ud_msg_get (struct mca_oob_ud_port_t *port, mca_oob_ud_req_t *req,
261 mca_oob_ud_qp_t *qp, mca_oob_ud_peer_t *peer, bool persist,
262 mca_oob_ud_msg_t **msgp);
263 int mca_oob_ud_msg_init (opal_free_list_item_t *item, void *context);
264 void mca_oob_ud_msg_return (mca_oob_ud_msg_t *msg);
265
266
267 void mca_oob_ud_req_timer_set (mca_oob_ud_req_t *req, const struct timeval *timeout,
268 int max_tries, void (*cb)(evutil_socket_t, short, void *));
269
270 int mca_oob_ud_msg_post_send (mca_oob_ud_msg_t *msg);
271 int mca_oob_ud_msg_wait (mca_oob_ud_msg_t *msg);
272
273 int mca_oob_ud_msg_status_update (mca_oob_ud_msg_t *msg, mca_oob_ud_status_t status);
274
275 void mca_oob_ud_req_complete (mca_oob_ud_req_t *req, int rc);
276 void mca_oob_ud_req_abort (mca_oob_ud_req_t *req);
277
278 void mca_oob_ud_req_append_to_list (mca_oob_ud_req_t *req, opal_list_t *list);
279 bool mca_oob_ud_req_is_in_list (mca_oob_ud_req_t *req, opal_list_t *list);
280
281 #endif
282