1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3 *
4 * Copyright (c) 2006 Mellanox Technologies Ltd. All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer.
19 *
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 *
34 * $Id$
35 */
36 #include "sdp.h"
37
38 #define SDP_MAJV_MINV 0x22
39
40 SDP_MODPARAM_SINT(sdp_link_layer_ib_only, 1, "Support only link layer of "
41 "type Infiniband");
42
43 enum {
44 SDP_HH_SIZE = 76,
45 SDP_HAH_SIZE = 180,
46 };
47
48 static void
sdp_qp_event_handler(struct ib_event * event,void * data)49 sdp_qp_event_handler(struct ib_event *event, void *data)
50 {
51 struct socket *sk = data;
52
53 sdp_dbg(sk, "QP Event: %s (%d)", ib_event_msg(event->event),
54 event->event);
55 }
56
57 static int
sdp_get_max_dev_sge(struct ib_device * dev)58 sdp_get_max_dev_sge(struct ib_device *dev)
59 {
60 struct ib_device_attr *device_attr;
61 static int max_sges = -1;
62
63 if (max_sges > 0)
64 goto out;
65
66 device_attr = &dev->attrs;
67 max_sges = device_attr->max_sge;
68
69 out:
70 return max_sges;
71 }
72
73 static int
sdp_init_qp(struct socket * sk,struct rdma_cm_id * id)74 sdp_init_qp(struct socket *sk, struct rdma_cm_id *id)
75 {
76 struct ib_qp_init_attr qp_init_attr = {
77 .event_handler = sdp_qp_event_handler,
78 .qp_context = sk,
79 .cap.max_send_wr = SDP_TX_SIZE,
80 .cap.max_recv_wr = SDP_RX_SIZE,
81 .sq_sig_type = IB_SIGNAL_REQ_WR,
82 .qp_type = IB_QPT_RC,
83 };
84 struct ib_device *device = id->device;
85 struct sdp_sock *ssk;
86 int rc;
87
88 sdp_dbg(sk, "%s\n", __func__);
89
90 ssk = sdp_sk(sk);
91 ssk->max_sge = sdp_get_max_dev_sge(device);
92 sdp_dbg(sk, "Max sges: %d\n", ssk->max_sge);
93
94 qp_init_attr.cap.max_send_sge = MIN(ssk->max_sge, SDP_MAX_SEND_SGES);
95 sdp_dbg(sk, "Setting max send sge to: %d\n",
96 qp_init_attr.cap.max_send_sge);
97
98 qp_init_attr.cap.max_recv_sge = MIN(ssk->max_sge, SDP_MAX_RECV_SGES);
99 sdp_dbg(sk, "Setting max recv sge to: %d\n",
100 qp_init_attr.cap.max_recv_sge);
101
102 ssk->sdp_dev = ib_get_client_data(device, &sdp_client);
103 if (!ssk->sdp_dev) {
104 sdp_warn(sk, "SDP not available on device %s\n", device->name);
105 rc = -ENODEV;
106 goto err_rx;
107 }
108
109 rc = sdp_rx_ring_create(ssk, device);
110 if (rc)
111 goto err_rx;
112
113 rc = sdp_tx_ring_create(ssk, device);
114 if (rc)
115 goto err_tx;
116
117 qp_init_attr.recv_cq = ssk->rx_ring.cq;
118 qp_init_attr.send_cq = ssk->tx_ring.cq;
119
120 rc = rdma_create_qp(id, ssk->sdp_dev->pd, &qp_init_attr);
121 if (rc) {
122 sdp_warn(sk, "Unable to create QP: %d.\n", rc);
123 goto err_qp;
124 }
125 ssk->qp = id->qp;
126 ssk->ib_device = device;
127 ssk->qp_active = 1;
128 ssk->context.device = device;
129
130 sdp_dbg(sk, "%s done\n", __func__);
131 return 0;
132
133 err_qp:
134 sdp_tx_ring_destroy(ssk);
135 err_tx:
136 sdp_rx_ring_destroy(ssk);
137 err_rx:
138 return rc;
139 }
140
141 static int
sdp_connect_handler(struct socket * sk,struct rdma_cm_id * id,struct rdma_cm_event * event)142 sdp_connect_handler(struct socket *sk, struct rdma_cm_id *id,
143 struct rdma_cm_event *event)
144 {
145 struct sockaddr_in *src_addr;
146 struct sockaddr_in *dst_addr;
147 struct socket *child;
148 const struct sdp_hh *h;
149 struct sdp_sock *ssk;
150 int rc;
151
152 sdp_dbg(sk, "%s %p -> %p\n", __func__, sdp_sk(sk)->id, id);
153
154 h = event->param.conn.private_data;
155 SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh);
156
157 if (!h->max_adverts)
158 return -EINVAL;
159
160 child = sonewconn(sk, SS_ISCONNECTED);
161 if (!child)
162 return -ENOMEM;
163
164 ssk = sdp_sk(child);
165 rc = sdp_init_qp(child, id);
166 if (rc)
167 return rc;
168 SDP_WLOCK(ssk);
169 id->context = ssk;
170 ssk->id = id;
171 ssk->socket = child;
172 ssk->cred = crhold(child->so_cred);
173 dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr;
174 src_addr = (struct sockaddr_in *)&id->route.addr.src_addr;
175 ssk->fport = dst_addr->sin_port;
176 ssk->faddr = dst_addr->sin_addr.s_addr;
177 ssk->lport = src_addr->sin_port;
178 ssk->max_bufs = ntohs(h->bsdh.bufs);
179 atomic_set(&ssk->tx_ring.credits, ssk->max_bufs);
180 ssk->min_bufs = tx_credits(ssk) / 4;
181 ssk->xmit_size_goal = ntohl(h->localrcvsz) - sizeof(struct sdp_bsdh);
182 sdp_init_buffers(ssk, rcvbuf_initial_size);
183 ssk->state = TCPS_SYN_RECEIVED;
184 SDP_WUNLOCK(ssk);
185
186 return 0;
187 }
188
189 static int
sdp_response_handler(struct socket * sk,struct rdma_cm_id * id,struct rdma_cm_event * event)190 sdp_response_handler(struct socket *sk, struct rdma_cm_id *id,
191 struct rdma_cm_event *event)
192 {
193 const struct sdp_hah *h;
194 struct sockaddr_in *dst_addr;
195 struct sdp_sock *ssk;
196 sdp_dbg(sk, "%s\n", __func__);
197
198 ssk = sdp_sk(sk);
199 SDP_WLOCK(ssk);
200 ssk->state = TCPS_ESTABLISHED;
201 sdp_set_default_moderation(ssk);
202 if (ssk->flags & SDP_DROPPED) {
203 SDP_WUNLOCK(ssk);
204 return 0;
205 }
206 if (sk->so_options & SO_KEEPALIVE)
207 sdp_start_keepalive_timer(sk);
208 h = event->param.conn.private_data;
209 SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh);
210 ssk->max_bufs = ntohs(h->bsdh.bufs);
211 atomic_set(&ssk->tx_ring.credits, ssk->max_bufs);
212 ssk->min_bufs = tx_credits(ssk) / 4;
213 ssk->xmit_size_goal =
214 ntohl(h->actrcvsz) - sizeof(struct sdp_bsdh);
215 ssk->poll_cq = 1;
216
217 dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr;
218 ssk->fport = dst_addr->sin_port;
219 ssk->faddr = dst_addr->sin_addr.s_addr;
220 soisconnected(sk);
221 SDP_WUNLOCK(ssk);
222
223 return 0;
224 }
225
226 static int
sdp_connected_handler(struct socket * sk,struct rdma_cm_event * event)227 sdp_connected_handler(struct socket *sk, struct rdma_cm_event *event)
228 {
229 struct sdp_sock *ssk;
230
231 sdp_dbg(sk, "%s\n", __func__);
232
233 ssk = sdp_sk(sk);
234 SDP_WLOCK(ssk);
235 ssk->state = TCPS_ESTABLISHED;
236
237 sdp_set_default_moderation(ssk);
238
239 if (sk->so_options & SO_KEEPALIVE)
240 sdp_start_keepalive_timer(sk);
241
242 if ((ssk->flags & SDP_DROPPED) == 0)
243 soisconnected(sk);
244 SDP_WUNLOCK(ssk);
245 return 0;
246 }
247
248 static int
sdp_disconnected_handler(struct socket * sk)249 sdp_disconnected_handler(struct socket *sk)
250 {
251 struct sdp_sock *ssk;
252
253 ssk = sdp_sk(sk);
254 sdp_dbg(sk, "%s\n", __func__);
255
256 SDP_WLOCK_ASSERT(ssk);
257 if (sdp_sk(sk)->state == TCPS_SYN_RECEIVED) {
258 sdp_connected_handler(sk, NULL);
259
260 if (rcv_nxt(ssk))
261 return 0;
262 }
263
264 return -ECONNRESET;
265 }
266
267 int
sdp_cma_handler(struct rdma_cm_id * id,struct rdma_cm_event * event)268 sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
269 {
270 struct rdma_conn_param conn_param;
271 struct socket *sk;
272 struct sdp_sock *ssk;
273 struct sdp_hah hah;
274 struct sdp_hh hh;
275
276 int rc = 0;
277
278 ssk = id->context;
279 sk = NULL;
280 if (ssk)
281 sk = ssk->socket;
282 if (!ssk || !sk || !ssk->id) {
283 sdp_dbg(sk,
284 "cm_id is being torn down, event %d, ssk %p, sk %p, id %p\n",
285 event->event, ssk, sk, id);
286 return event->event == RDMA_CM_EVENT_CONNECT_REQUEST ?
287 -EINVAL : 0;
288 }
289
290 sdp_dbg(sk, "%s event %d id %p\n", __func__, event->event, id);
291 switch (event->event) {
292 case RDMA_CM_EVENT_ADDR_RESOLVED:
293 sdp_dbg(sk, "RDMA_CM_EVENT_ADDR_RESOLVED\n");
294
295 if (sdp_link_layer_ib_only &&
296 rdma_node_get_transport(id->device->node_type) ==
297 RDMA_TRANSPORT_IB &&
298 rdma_port_get_link_layer(id->device, id->port_num) !=
299 IB_LINK_LAYER_INFINIBAND) {
300 sdp_dbg(sk, "Link layer is: %d. Only IB link layer "
301 "is allowed\n",
302 rdma_port_get_link_layer(id->device, id->port_num));
303 rc = -ENETUNREACH;
304 break;
305 }
306
307 rc = rdma_resolve_route(id, SDP_ROUTE_TIMEOUT);
308 break;
309 case RDMA_CM_EVENT_ADDR_ERROR:
310 sdp_dbg(sk, "RDMA_CM_EVENT_ADDR_ERROR\n");
311 rc = -ENETUNREACH;
312 break;
313 case RDMA_CM_EVENT_ROUTE_RESOLVED:
314 sdp_dbg(sk, "RDMA_CM_EVENT_ROUTE_RESOLVED : %p\n", id);
315 rc = sdp_init_qp(sk, id);
316 if (rc)
317 break;
318 atomic_set(&sdp_sk(sk)->remote_credits,
319 rx_ring_posted(sdp_sk(sk)));
320 memset(&hh, 0, sizeof hh);
321 hh.bsdh.mid = SDP_MID_HELLO;
322 hh.bsdh.len = htonl(sizeof(struct sdp_hh));
323 hh.max_adverts = 1;
324 hh.ipv_cap = 0x40;
325 hh.majv_minv = SDP_MAJV_MINV;
326 sdp_init_buffers(sdp_sk(sk), rcvbuf_initial_size);
327 hh.bsdh.bufs = htons(rx_ring_posted(sdp_sk(sk)));
328 hh.localrcvsz = hh.desremrcvsz = htonl(sdp_sk(sk)->recv_bytes);
329 hh.max_adverts = 0x1;
330 sdp_sk(sk)->laddr =
331 ((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr;
332 memset(&conn_param, 0, sizeof conn_param);
333 conn_param.private_data_len = sizeof hh;
334 conn_param.private_data = &hh;
335 conn_param.responder_resources = 4 /* TODO */;
336 conn_param.initiator_depth = 4 /* TODO */;
337 conn_param.retry_count = SDP_RETRY_COUNT;
338 SDP_DUMP_PACKET(NULL, "TX", NULL, &hh.bsdh);
339 rc = rdma_connect(id, &conn_param);
340 break;
341 case RDMA_CM_EVENT_ROUTE_ERROR:
342 sdp_dbg(sk, "RDMA_CM_EVENT_ROUTE_ERROR : %p\n", id);
343 rc = -ETIMEDOUT;
344 break;
345 case RDMA_CM_EVENT_CONNECT_REQUEST:
346 sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_REQUEST\n");
347 rc = sdp_connect_handler(sk, id, event);
348 if (rc) {
349 sdp_dbg(sk, "Destroying qp\n");
350 rdma_reject(id, NULL, 0);
351 break;
352 }
353 ssk = id->context;
354 atomic_set(&ssk->remote_credits, rx_ring_posted(ssk));
355 memset(&hah, 0, sizeof hah);
356 hah.bsdh.mid = SDP_MID_HELLO_ACK;
357 hah.bsdh.bufs = htons(rx_ring_posted(ssk));
358 hah.bsdh.len = htonl(sizeof(struct sdp_hah));
359 hah.majv_minv = SDP_MAJV_MINV;
360 hah.ext_max_adverts = 1; /* Doesn't seem to be mandated by spec,
361 but just in case */
362 hah.actrcvsz = htonl(ssk->recv_bytes);
363 memset(&conn_param, 0, sizeof conn_param);
364 conn_param.private_data_len = sizeof hah;
365 conn_param.private_data = &hah;
366 conn_param.responder_resources = 4 /* TODO */;
367 conn_param.initiator_depth = 4 /* TODO */;
368 conn_param.retry_count = SDP_RETRY_COUNT;
369 SDP_DUMP_PACKET(sk, "TX", NULL, &hah.bsdh);
370 rc = rdma_accept(id, &conn_param);
371 if (rc) {
372 ssk->id = NULL;
373 id->qp = NULL;
374 id->context = NULL;
375 }
376 break;
377 case RDMA_CM_EVENT_CONNECT_RESPONSE:
378 sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_RESPONSE\n");
379 rc = sdp_response_handler(sk, id, event);
380 if (rc) {
381 sdp_dbg(sk, "Destroying qp\n");
382 rdma_reject(id, NULL, 0);
383 } else
384 rc = rdma_accept(id, NULL);
385 break;
386 case RDMA_CM_EVENT_CONNECT_ERROR:
387 sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_ERROR\n");
388 rc = -ETIMEDOUT;
389 break;
390 case RDMA_CM_EVENT_UNREACHABLE:
391 sdp_dbg(sk, "RDMA_CM_EVENT_UNREACHABLE\n");
392 rc = -ENETUNREACH;
393 break;
394 case RDMA_CM_EVENT_REJECTED:
395 sdp_dbg(sk, "RDMA_CM_EVENT_REJECTED\n");
396 rc = -ECONNREFUSED;
397 break;
398 case RDMA_CM_EVENT_ESTABLISHED:
399 sdp_dbg(sk, "RDMA_CM_EVENT_ESTABLISHED\n");
400 sdp_sk(sk)->laddr =
401 ((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr;
402 rc = sdp_connected_handler(sk, event);
403 break;
404 case RDMA_CM_EVENT_DISCONNECTED: /* This means DREQ/DREP received */
405 sdp_dbg(sk, "RDMA_CM_EVENT_DISCONNECTED\n");
406
407 SDP_WLOCK(ssk);
408 if (ssk->state == TCPS_LAST_ACK) {
409 sdp_cancel_dreq_wait_timeout(ssk);
410
411 sdp_dbg(sk, "%s: waiting for Infiniband tear down\n",
412 __func__);
413 }
414 ssk->qp_active = 0;
415 SDP_WUNLOCK(ssk);
416 rdma_disconnect(id);
417 SDP_WLOCK(ssk);
418 if (ssk->state != TCPS_TIME_WAIT) {
419 if (ssk->state == TCPS_CLOSE_WAIT) {
420 sdp_dbg(sk, "IB teardown while in "
421 "TCPS_CLOSE_WAIT taking reference to "
422 "let close() finish the work\n");
423 }
424 rc = sdp_disconnected_handler(sk);
425 if (rc)
426 rc = -EPIPE;
427 }
428 SDP_WUNLOCK(ssk);
429 break;
430 case RDMA_CM_EVENT_TIMEWAIT_EXIT:
431 sdp_dbg(sk, "RDMA_CM_EVENT_TIMEWAIT_EXIT\n");
432 SDP_WLOCK(ssk);
433 rc = sdp_disconnected_handler(sk);
434 SDP_WUNLOCK(ssk);
435 break;
436 case RDMA_CM_EVENT_DEVICE_REMOVAL:
437 sdp_dbg(sk, "RDMA_CM_EVENT_DEVICE_REMOVAL\n");
438 rc = -ENETRESET;
439 break;
440 default:
441 printk(KERN_ERR "SDP: Unexpected CMA event: %d\n",
442 event->event);
443 rc = -ECONNABORTED;
444 break;
445 }
446
447 sdp_dbg(sk, "event %s (%d) done. status %d\n",
448 rdma_event_msg(event->event), event->event, rc);
449
450 if (rc) {
451 SDP_WLOCK(ssk);
452 if (ssk->id == id) {
453 ssk->id = NULL;
454 id->qp = NULL;
455 id->context = NULL;
456 if (sdp_notify(ssk, -rc))
457 SDP_WUNLOCK(ssk);
458 } else
459 SDP_WUNLOCK(ssk);
460 }
461
462 return rc;
463 }
464