1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3  *
4  * Copyright (c) 2006 Mellanox Technologies Ltd.  All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  *
34  * $Id$
35  */
36 #include "sdp.h"
37 
38 #define SDP_MAJV_MINV 0x22
39 
40 SDP_MODPARAM_SINT(sdp_link_layer_ib_only, 1, "Support only link layer of "
41 		"type Infiniband");
42 
43 enum {
44 	SDP_HH_SIZE = 76,
45 	SDP_HAH_SIZE = 180,
46 };
47 
48 static void
49 sdp_qp_event_handler(struct ib_event *event, void *data)
50 {
51 	struct socket *sk = data;
52 
53 	sdp_dbg(sk, "QP Event: %s (%d)", ib_event_msg(event->event),
54 	    event->event);
55 }
56 
57 static int
58 sdp_get_max_dev_sge(struct ib_device *dev)
59 {
60 	struct ib_device_attr *device_attr;
61 	static int max_sges = -1;
62 
63 	if (max_sges > 0)
64 		goto out;
65 
66 	device_attr = &dev->attrs;
67 	max_sges = device_attr->max_sge;
68 
69 out:
70 	return max_sges;
71 }
72 
73 static int
74 sdp_init_qp(struct socket *sk, struct rdma_cm_id *id)
75 {
76 	struct ib_qp_init_attr qp_init_attr = {
77 		.event_handler = sdp_qp_event_handler,
78 		.qp_context = sk,
79 		.cap.max_send_wr = SDP_TX_SIZE,
80 		.cap.max_recv_wr = SDP_RX_SIZE,
81         	.sq_sig_type = IB_SIGNAL_REQ_WR,
82         	.qp_type = IB_QPT_RC,
83 	};
84 	struct ib_device *device = id->device;
85 	struct sdp_sock *ssk;
86 	int rc;
87 
88 	sdp_dbg(sk, "%s\n", __func__);
89 
90 	ssk = sdp_sk(sk);
91 	ssk->max_sge = sdp_get_max_dev_sge(device);
92 	sdp_dbg(sk, "Max sges: %d\n", ssk->max_sge);
93 
94 	qp_init_attr.cap.max_send_sge = MIN(ssk->max_sge, SDP_MAX_SEND_SGES);
95 	sdp_dbg(sk, "Setting max send sge to: %d\n",
96 	    qp_init_attr.cap.max_send_sge);
97 
98 	qp_init_attr.cap.max_recv_sge = MIN(ssk->max_sge, SDP_MAX_RECV_SGES);
99 	sdp_dbg(sk, "Setting max recv sge to: %d\n",
100 	    qp_init_attr.cap.max_recv_sge);
101 
102 	ssk->sdp_dev = ib_get_client_data(device, &sdp_client);
103 	if (!ssk->sdp_dev) {
104 		sdp_warn(sk, "SDP not available on device %s\n", device->name);
105 		rc = -ENODEV;
106 		goto err_rx;
107 	}
108 
109 	rc = sdp_rx_ring_create(ssk, device);
110 	if (rc)
111 		goto err_rx;
112 
113 	rc = sdp_tx_ring_create(ssk, device);
114 	if (rc)
115 		goto err_tx;
116 
117 	qp_init_attr.recv_cq = ssk->rx_ring.cq;
118 	qp_init_attr.send_cq = ssk->tx_ring.cq;
119 
120 	rc = rdma_create_qp(id, ssk->sdp_dev->pd, &qp_init_attr);
121 	if (rc) {
122 		sdp_warn(sk, "Unable to create QP: %d.\n", rc);
123 		goto err_qp;
124 	}
125 	ssk->qp = id->qp;
126 	ssk->ib_device = device;
127 	ssk->qp_active = 1;
128 	ssk->context.device = device;
129 
130 	sdp_dbg(sk, "%s done\n", __func__);
131 	return 0;
132 
133 err_qp:
134 	sdp_tx_ring_destroy(ssk);
135 err_tx:
136 	sdp_rx_ring_destroy(ssk);
137 err_rx:
138 	return rc;
139 }
140 
141 static int
142 sdp_connect_handler(struct socket *sk, struct rdma_cm_id *id,
143     struct rdma_cm_event *event)
144 {
145 	struct sockaddr_in *src_addr;
146 	struct sockaddr_in *dst_addr;
147 	struct socket *child;
148 	const struct sdp_hh *h;
149 	struct sdp_sock *ssk;
150 	int rc;
151 
152 	sdp_dbg(sk, "%s %p -> %p\n", __func__, sdp_sk(sk)->id, id);
153 
154 	h = event->param.conn.private_data;
155 	SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh);
156 
157 	if (!h->max_adverts)
158 		return -EINVAL;
159 
160 	child = sonewconn(sk, SS_ISCONNECTED);
161 	if (!child)
162 		return -ENOMEM;
163 
164 	ssk = sdp_sk(child);
165 	rc = sdp_init_qp(child, id);
166 	if (rc)
167 		return rc;
168 	SDP_WLOCK(ssk);
169 	id->context = ssk;
170 	ssk->id = id;
171 	ssk->socket = child;
172 	ssk->cred = crhold(child->so_cred);
173 	dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr;
174 	src_addr = (struct sockaddr_in *)&id->route.addr.src_addr;
175 	ssk->fport = dst_addr->sin_port;
176 	ssk->faddr = dst_addr->sin_addr.s_addr;
177 	ssk->lport = src_addr->sin_port;
178 	ssk->max_bufs = ntohs(h->bsdh.bufs);
179 	atomic_set(&ssk->tx_ring.credits, ssk->max_bufs);
180 	ssk->min_bufs = tx_credits(ssk) / 4;
181 	ssk->xmit_size_goal = ntohl(h->localrcvsz) - sizeof(struct sdp_bsdh);
182 	sdp_init_buffers(ssk, rcvbuf_initial_size);
183 	ssk->state = TCPS_SYN_RECEIVED;
184 	SDP_WUNLOCK(ssk);
185 
186 	return 0;
187 }
188 
189 static int
190 sdp_response_handler(struct socket *sk, struct rdma_cm_id *id,
191     struct rdma_cm_event *event)
192 {
193 	const struct sdp_hah *h;
194 	struct sockaddr_in *dst_addr;
195 	struct sdp_sock *ssk;
196 	sdp_dbg(sk, "%s\n", __func__);
197 
198 	ssk = sdp_sk(sk);
199 	SDP_WLOCK(ssk);
200 	ssk->state = TCPS_ESTABLISHED;
201 	sdp_set_default_moderation(ssk);
202 	if (ssk->flags & SDP_DROPPED) {
203 		SDP_WUNLOCK(ssk);
204 		return 0;
205 	}
206 	if (sk->so_options & SO_KEEPALIVE)
207 		sdp_start_keepalive_timer(sk);
208 	h = event->param.conn.private_data;
209 	SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh);
210 	ssk->max_bufs = ntohs(h->bsdh.bufs);
211 	atomic_set(&ssk->tx_ring.credits, ssk->max_bufs);
212 	ssk->min_bufs = tx_credits(ssk) / 4;
213 	ssk->xmit_size_goal =
214 		ntohl(h->actrcvsz) - sizeof(struct sdp_bsdh);
215 	ssk->poll_cq = 1;
216 
217 	dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr;
218 	ssk->fport = dst_addr->sin_port;
219 	ssk->faddr = dst_addr->sin_addr.s_addr;
220 	soisconnected(sk);
221 	SDP_WUNLOCK(ssk);
222 
223 	return 0;
224 }
225 
226 static int
227 sdp_connected_handler(struct socket *sk, struct rdma_cm_event *event)
228 {
229 	struct sdp_sock *ssk;
230 
231 	sdp_dbg(sk, "%s\n", __func__);
232 
233 	ssk = sdp_sk(sk);
234 	SDP_WLOCK(ssk);
235 	ssk->state = TCPS_ESTABLISHED;
236 
237 	sdp_set_default_moderation(ssk);
238 
239 	if (sk->so_options & SO_KEEPALIVE)
240 		sdp_start_keepalive_timer(sk);
241 
242 	if ((ssk->flags & SDP_DROPPED) == 0)
243 		soisconnected(sk);
244 	SDP_WUNLOCK(ssk);
245 	return 0;
246 }
247 
248 static int
249 sdp_disconnected_handler(struct socket *sk)
250 {
251 	struct sdp_sock *ssk;
252 
253 	ssk = sdp_sk(sk);
254 	sdp_dbg(sk, "%s\n", __func__);
255 
256 	SDP_WLOCK_ASSERT(ssk);
257 	if (sdp_sk(sk)->state == TCPS_SYN_RECEIVED) {
258 		sdp_connected_handler(sk, NULL);
259 
260 		if (rcv_nxt(ssk))
261 			return 0;
262 	}
263 
264 	return -ECONNRESET;
265 }
266 
267 int
268 sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
269 {
270 	struct rdma_conn_param conn_param;
271 	struct socket *sk;
272 	struct sdp_sock *ssk;
273 	struct sdp_hah hah;
274 	struct sdp_hh hh;
275 
276 	int rc = 0;
277 
278 	ssk = id->context;
279 	sk = NULL;
280 	if (ssk)
281 		sk = ssk->socket;
282 	if (!ssk || !sk || !ssk->id) {
283 		sdp_dbg(sk,
284 		    "cm_id is being torn down, event %d, ssk %p, sk %p, id %p\n",
285 		       	event->event, ssk, sk, id);
286 		return event->event == RDMA_CM_EVENT_CONNECT_REQUEST ?
287 			-EINVAL : 0;
288 	}
289 
290 	sdp_dbg(sk, "%s event %d id %p\n", __func__, event->event, id);
291 	switch (event->event) {
292 	case RDMA_CM_EVENT_ADDR_RESOLVED:
293 		sdp_dbg(sk, "RDMA_CM_EVENT_ADDR_RESOLVED\n");
294 
295 		if (sdp_link_layer_ib_only &&
296 			rdma_node_get_transport(id->device->node_type) ==
297 				RDMA_TRANSPORT_IB &&
298 			rdma_port_get_link_layer(id->device, id->port_num) !=
299 				IB_LINK_LAYER_INFINIBAND) {
300 			sdp_dbg(sk, "Link layer is: %d. Only IB link layer "
301 				"is allowed\n",
302 				rdma_port_get_link_layer(id->device, id->port_num));
303 			rc = -ENETUNREACH;
304 			break;
305 		}
306 
307 		rc = rdma_resolve_route(id, SDP_ROUTE_TIMEOUT);
308 		break;
309 	case RDMA_CM_EVENT_ADDR_ERROR:
310 		sdp_dbg(sk, "RDMA_CM_EVENT_ADDR_ERROR\n");
311 		rc = -ENETUNREACH;
312 		break;
313 	case RDMA_CM_EVENT_ROUTE_RESOLVED:
314 		sdp_dbg(sk, "RDMA_CM_EVENT_ROUTE_RESOLVED : %p\n", id);
315 		rc = sdp_init_qp(sk, id);
316 		if (rc)
317 			break;
318 		atomic_set(&sdp_sk(sk)->remote_credits,
319 				rx_ring_posted(sdp_sk(sk)));
320 		memset(&hh, 0, sizeof hh);
321 		hh.bsdh.mid = SDP_MID_HELLO;
322 		hh.bsdh.len = htonl(sizeof(struct sdp_hh));
323 		hh.max_adverts = 1;
324 		hh.ipv_cap = 0x40;
325 		hh.majv_minv = SDP_MAJV_MINV;
326 		sdp_init_buffers(sdp_sk(sk), rcvbuf_initial_size);
327 		hh.bsdh.bufs = htons(rx_ring_posted(sdp_sk(sk)));
328 		hh.localrcvsz = hh.desremrcvsz = htonl(sdp_sk(sk)->recv_bytes);
329 		hh.max_adverts = 0x1;
330 		sdp_sk(sk)->laddr =
331 			((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr;
332 		memset(&conn_param, 0, sizeof conn_param);
333 		conn_param.private_data_len = sizeof hh;
334 		conn_param.private_data = &hh;
335 		conn_param.responder_resources = 4 /* TODO */;
336 		conn_param.initiator_depth = 4 /* TODO */;
337 		conn_param.retry_count = SDP_RETRY_COUNT;
338 		SDP_DUMP_PACKET(NULL, "TX", NULL, &hh.bsdh);
339 		rc = rdma_connect(id, &conn_param);
340 		break;
341 	case RDMA_CM_EVENT_ROUTE_ERROR:
342 		sdp_dbg(sk, "RDMA_CM_EVENT_ROUTE_ERROR : %p\n", id);
343 		rc = -ETIMEDOUT;
344 		break;
345 	case RDMA_CM_EVENT_CONNECT_REQUEST:
346 		sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_REQUEST\n");
347 		rc = sdp_connect_handler(sk, id, event);
348 		if (rc) {
349 			sdp_dbg(sk, "Destroying qp\n");
350 			rdma_reject(id, NULL, 0);
351 			break;
352 		}
353 		ssk = id->context;
354 		atomic_set(&ssk->remote_credits, rx_ring_posted(ssk));
355 		memset(&hah, 0, sizeof hah);
356 		hah.bsdh.mid = SDP_MID_HELLO_ACK;
357 		hah.bsdh.bufs = htons(rx_ring_posted(ssk));
358 		hah.bsdh.len = htonl(sizeof(struct sdp_hah));
359 		hah.majv_minv = SDP_MAJV_MINV;
360 		hah.ext_max_adverts = 1; /* Doesn't seem to be mandated by spec,
361 					    but just in case */
362 		hah.actrcvsz = htonl(ssk->recv_bytes);
363 		memset(&conn_param, 0, sizeof conn_param);
364 		conn_param.private_data_len = sizeof hah;
365 		conn_param.private_data = &hah;
366 		conn_param.responder_resources = 4 /* TODO */;
367 		conn_param.initiator_depth = 4 /* TODO */;
368 		conn_param.retry_count = SDP_RETRY_COUNT;
369 		SDP_DUMP_PACKET(sk, "TX", NULL, &hah.bsdh);
370 		rc = rdma_accept(id, &conn_param);
371 		if (rc) {
372 			ssk->id = NULL;
373 			id->qp = NULL;
374 			id->context = NULL;
375 		}
376 		break;
377 	case RDMA_CM_EVENT_CONNECT_RESPONSE:
378 		sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_RESPONSE\n");
379 		rc = sdp_response_handler(sk, id, event);
380 		if (rc) {
381 			sdp_dbg(sk, "Destroying qp\n");
382 			rdma_reject(id, NULL, 0);
383 		} else
384 			rc = rdma_accept(id, NULL);
385 		break;
386 	case RDMA_CM_EVENT_CONNECT_ERROR:
387 		sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_ERROR\n");
388 		rc = -ETIMEDOUT;
389 		break;
390 	case RDMA_CM_EVENT_UNREACHABLE:
391 		sdp_dbg(sk, "RDMA_CM_EVENT_UNREACHABLE\n");
392 		rc = -ENETUNREACH;
393 		break;
394 	case RDMA_CM_EVENT_REJECTED:
395 		sdp_dbg(sk, "RDMA_CM_EVENT_REJECTED\n");
396 		rc = -ECONNREFUSED;
397 		break;
398 	case RDMA_CM_EVENT_ESTABLISHED:
399 		sdp_dbg(sk, "RDMA_CM_EVENT_ESTABLISHED\n");
400 		sdp_sk(sk)->laddr =
401 			((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr;
402 		rc = sdp_connected_handler(sk, event);
403 		break;
404 	case RDMA_CM_EVENT_DISCONNECTED: /* This means DREQ/DREP received */
405 		sdp_dbg(sk, "RDMA_CM_EVENT_DISCONNECTED\n");
406 
407 		SDP_WLOCK(ssk);
408 		if (ssk->state == TCPS_LAST_ACK) {
409 			sdp_cancel_dreq_wait_timeout(ssk);
410 
411 			sdp_dbg(sk, "%s: waiting for Infiniband tear down\n",
412 				__func__);
413 		}
414 		ssk->qp_active = 0;
415 		SDP_WUNLOCK(ssk);
416 		rdma_disconnect(id);
417 		SDP_WLOCK(ssk);
418 		if (ssk->state != TCPS_TIME_WAIT) {
419 			if (ssk->state == TCPS_CLOSE_WAIT) {
420 				sdp_dbg(sk, "IB teardown while in "
421 					"TCPS_CLOSE_WAIT taking reference to "
422 					"let close() finish the work\n");
423 			}
424 			rc = sdp_disconnected_handler(sk);
425 			if (rc)
426 				rc = -EPIPE;
427 		}
428 		SDP_WUNLOCK(ssk);
429 		break;
430 	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
431 		sdp_dbg(sk, "RDMA_CM_EVENT_TIMEWAIT_EXIT\n");
432 		SDP_WLOCK(ssk);
433 		rc = sdp_disconnected_handler(sk);
434 		SDP_WUNLOCK(ssk);
435 		break;
436 	case RDMA_CM_EVENT_DEVICE_REMOVAL:
437 		sdp_dbg(sk, "RDMA_CM_EVENT_DEVICE_REMOVAL\n");
438 		rc = -ENETRESET;
439 		break;
440 	default:
441 		printk(KERN_ERR "SDP: Unexpected CMA event: %d\n",
442 		       event->event);
443 		rc = -ECONNABORTED;
444 		break;
445 	}
446 
447 	sdp_dbg(sk, "event %s (%d) done. status %d\n",
448 	    rdma_event_msg(event->event), event->event, rc);
449 
450 	if (rc) {
451 		SDP_WLOCK(ssk);
452 		if (ssk->id == id) {
453 			ssk->id = NULL;
454 			id->qp = NULL;
455 			id->context = NULL;
456 			if (sdp_notify(ssk, -rc))
457 				SDP_WUNLOCK(ssk);
458 		} else
459 			SDP_WUNLOCK(ssk);
460 	}
461 
462 	return rc;
463 }
464