1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3  *
4  * Copyright (c) 2006 Mellanox Technologies Ltd.  All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  *
34  * $Id$
35  */
36 #include "sdp.h"
37 
38 #define SDP_MAJV_MINV 0x22
39 
40 SDP_MODPARAM_SINT(sdp_link_layer_ib_only, 1, "Support only link layer of "
41 		"type Infiniband");
42 
43 enum {
44 	SDP_HH_SIZE = 76,
45 	SDP_HAH_SIZE = 180,
46 };
47 
48 static void
49 sdp_qp_event_handler(struct ib_event *event, void *data)
50 {
51 }
52 
53 static int
54 sdp_get_max_dev_sge(struct ib_device *dev)
55 {
56 	struct ib_device_attr *device_attr;
57 	static int max_sges = -1;
58 
59 	if (max_sges > 0)
60 		goto out;
61 
62 	device_attr = &dev->attrs;
63 	max_sges = device_attr->max_sge;
64 
65 out:
66 	return max_sges;
67 }
68 
69 static int
70 sdp_init_qp(struct socket *sk, struct rdma_cm_id *id)
71 {
72 	struct ib_qp_init_attr qp_init_attr = {
73 		.event_handler = sdp_qp_event_handler,
74 		.cap.max_send_wr = SDP_TX_SIZE,
75 		.cap.max_recv_wr = SDP_RX_SIZE,
76         	.sq_sig_type = IB_SIGNAL_REQ_WR,
77         	.qp_type = IB_QPT_RC,
78 	};
79 	struct ib_device *device = id->device;
80 	struct sdp_sock *ssk;
81 	int rc;
82 
83 	sdp_dbg(sk, "%s\n", __func__);
84 
85 	ssk = sdp_sk(sk);
86 	ssk->max_sge = sdp_get_max_dev_sge(device);
87 	sdp_dbg(sk, "Max sges: %d\n", ssk->max_sge);
88 
89 	qp_init_attr.cap.max_send_sge = MIN(ssk->max_sge, SDP_MAX_SEND_SGES);
90 	sdp_dbg(sk, "Setting max send sge to: %d\n",
91 	    qp_init_attr.cap.max_send_sge);
92 
93 	qp_init_attr.cap.max_recv_sge = MIN(ssk->max_sge, SDP_MAX_RECV_SGES);
94 	sdp_dbg(sk, "Setting max recv sge to: %d\n",
95 	    qp_init_attr.cap.max_recv_sge);
96 
97 	ssk->sdp_dev = ib_get_client_data(device, &sdp_client);
98 	if (!ssk->sdp_dev) {
99 		sdp_warn(sk, "SDP not available on device %s\n", device->name);
100 		rc = -ENODEV;
101 		goto err_rx;
102 	}
103 
104 	rc = sdp_rx_ring_create(ssk, device);
105 	if (rc)
106 		goto err_rx;
107 
108 	rc = sdp_tx_ring_create(ssk, device);
109 	if (rc)
110 		goto err_tx;
111 
112 	qp_init_attr.recv_cq = ssk->rx_ring.cq;
113 	qp_init_attr.send_cq = ssk->tx_ring.cq;
114 
115 	rc = rdma_create_qp(id, ssk->sdp_dev->pd, &qp_init_attr);
116 	if (rc) {
117 		sdp_warn(sk, "Unable to create QP: %d.\n", rc);
118 		goto err_qp;
119 	}
120 	ssk->qp = id->qp;
121 	ssk->ib_device = device;
122 	ssk->qp_active = 1;
123 	ssk->context.device = device;
124 
125 	sdp_dbg(sk, "%s done\n", __func__);
126 	return 0;
127 
128 err_qp:
129 	sdp_tx_ring_destroy(ssk);
130 err_tx:
131 	sdp_rx_ring_destroy(ssk);
132 err_rx:
133 	return rc;
134 }
135 
136 static int
137 sdp_connect_handler(struct socket *sk, struct rdma_cm_id *id,
138     struct rdma_cm_event *event)
139 {
140 	struct sockaddr_in *src_addr;
141 	struct sockaddr_in *dst_addr;
142 	struct socket *child;
143 	const struct sdp_hh *h;
144 	struct sdp_sock *ssk;
145 	int rc;
146 
147 	sdp_dbg(sk, "%s %p -> %p\n", __func__, sdp_sk(sk)->id, id);
148 
149 	h = event->param.conn.private_data;
150 	SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh);
151 
152 	if (!h->max_adverts)
153 		return -EINVAL;
154 
155 	child = sonewconn(sk, SS_ISCONNECTED);
156 	if (!child)
157 		return -ENOMEM;
158 
159 	ssk = sdp_sk(child);
160 	rc = sdp_init_qp(child, id);
161 	if (rc)
162 		return rc;
163 	SDP_WLOCK(ssk);
164 	id->context = ssk;
165 	ssk->id = id;
166 	ssk->socket = child;
167 	ssk->cred = crhold(child->so_cred);
168 	dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr;
169 	src_addr = (struct sockaddr_in *)&id->route.addr.src_addr;
170 	ssk->fport = dst_addr->sin_port;
171 	ssk->faddr = dst_addr->sin_addr.s_addr;
172 	ssk->lport = src_addr->sin_port;
173 	ssk->max_bufs = ntohs(h->bsdh.bufs);
174 	atomic_set(&ssk->tx_ring.credits, ssk->max_bufs);
175 	ssk->min_bufs = tx_credits(ssk) / 4;
176 	ssk->xmit_size_goal = ntohl(h->localrcvsz) - sizeof(struct sdp_bsdh);
177 	sdp_init_buffers(ssk, rcvbuf_initial_size);
178 	ssk->state = TCPS_SYN_RECEIVED;
179 	SDP_WUNLOCK(ssk);
180 
181 	return 0;
182 }
183 
184 static int
185 sdp_response_handler(struct socket *sk, struct rdma_cm_id *id,
186     struct rdma_cm_event *event)
187 {
188 	const struct sdp_hah *h;
189 	struct sockaddr_in *dst_addr;
190 	struct sdp_sock *ssk;
191 	sdp_dbg(sk, "%s\n", __func__);
192 
193 	ssk = sdp_sk(sk);
194 	SDP_WLOCK(ssk);
195 	ssk->state = TCPS_ESTABLISHED;
196 	sdp_set_default_moderation(ssk);
197 	if (ssk->flags & SDP_DROPPED) {
198 		SDP_WUNLOCK(ssk);
199 		return 0;
200 	}
201 	if (sk->so_options & SO_KEEPALIVE)
202 		sdp_start_keepalive_timer(sk);
203 	h = event->param.conn.private_data;
204 	SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh);
205 	ssk->max_bufs = ntohs(h->bsdh.bufs);
206 	atomic_set(&ssk->tx_ring.credits, ssk->max_bufs);
207 	ssk->min_bufs = tx_credits(ssk) / 4;
208 	ssk->xmit_size_goal =
209 		ntohl(h->actrcvsz) - sizeof(struct sdp_bsdh);
210 	ssk->poll_cq = 1;
211 
212 	dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr;
213 	ssk->fport = dst_addr->sin_port;
214 	ssk->faddr = dst_addr->sin_addr.s_addr;
215 	soisconnected(sk);
216 	SDP_WUNLOCK(ssk);
217 
218 	return 0;
219 }
220 
221 static int
222 sdp_connected_handler(struct socket *sk, struct rdma_cm_event *event)
223 {
224 	struct sdp_sock *ssk;
225 
226 	sdp_dbg(sk, "%s\n", __func__);
227 
228 	ssk = sdp_sk(sk);
229 	SDP_WLOCK(ssk);
230 	ssk->state = TCPS_ESTABLISHED;
231 
232 	sdp_set_default_moderation(ssk);
233 
234 	if (sk->so_options & SO_KEEPALIVE)
235 		sdp_start_keepalive_timer(sk);
236 
237 	if ((ssk->flags & SDP_DROPPED) == 0)
238 		soisconnected(sk);
239 	SDP_WUNLOCK(ssk);
240 	return 0;
241 }
242 
243 static int
244 sdp_disconnected_handler(struct socket *sk)
245 {
246 	struct sdp_sock *ssk;
247 
248 	ssk = sdp_sk(sk);
249 	sdp_dbg(sk, "%s\n", __func__);
250 
251 	SDP_WLOCK_ASSERT(ssk);
252 	if (sdp_sk(sk)->state == TCPS_SYN_RECEIVED) {
253 		sdp_connected_handler(sk, NULL);
254 
255 		if (rcv_nxt(ssk))
256 			return 0;
257 	}
258 
259 	return -ECONNRESET;
260 }
261 
262 int
263 sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
264 {
265 	struct rdma_conn_param conn_param;
266 	struct socket *sk;
267 	struct sdp_sock *ssk;
268 	struct sdp_hah hah;
269 	struct sdp_hh hh;
270 
271 	int rc = 0;
272 
273 	ssk = id->context;
274 	sk = NULL;
275 	if (ssk)
276 		sk = ssk->socket;
277 	if (!ssk || !sk || !ssk->id) {
278 		sdp_dbg(sk,
279 		    "cm_id is being torn down, event %d, ssk %p, sk %p, id %p\n",
280 		       	event->event, ssk, sk, id);
281 		return event->event == RDMA_CM_EVENT_CONNECT_REQUEST ?
282 			-EINVAL : 0;
283 	}
284 
285 	sdp_dbg(sk, "%s event %d id %p\n", __func__, event->event, id);
286 	switch (event->event) {
287 	case RDMA_CM_EVENT_ADDR_RESOLVED:
288 		sdp_dbg(sk, "RDMA_CM_EVENT_ADDR_RESOLVED\n");
289 
290 		if (sdp_link_layer_ib_only &&
291 			rdma_node_get_transport(id->device->node_type) ==
292 				RDMA_TRANSPORT_IB &&
293 			rdma_port_get_link_layer(id->device, id->port_num) !=
294 				IB_LINK_LAYER_INFINIBAND) {
295 			sdp_dbg(sk, "Link layer is: %d. Only IB link layer "
296 				"is allowed\n",
297 				rdma_port_get_link_layer(id->device, id->port_num));
298 			rc = -ENETUNREACH;
299 			break;
300 		}
301 
302 		rc = rdma_resolve_route(id, SDP_ROUTE_TIMEOUT);
303 		break;
304 	case RDMA_CM_EVENT_ADDR_ERROR:
305 		sdp_dbg(sk, "RDMA_CM_EVENT_ADDR_ERROR\n");
306 		rc = -ENETUNREACH;
307 		break;
308 	case RDMA_CM_EVENT_ROUTE_RESOLVED:
309 		sdp_dbg(sk, "RDMA_CM_EVENT_ROUTE_RESOLVED : %p\n", id);
310 		rc = sdp_init_qp(sk, id);
311 		if (rc)
312 			break;
313 		atomic_set(&sdp_sk(sk)->remote_credits,
314 				rx_ring_posted(sdp_sk(sk)));
315 		memset(&hh, 0, sizeof hh);
316 		hh.bsdh.mid = SDP_MID_HELLO;
317 		hh.bsdh.len = htonl(sizeof(struct sdp_hh));
318 		hh.max_adverts = 1;
319 		hh.ipv_cap = 0x40;
320 		hh.majv_minv = SDP_MAJV_MINV;
321 		sdp_init_buffers(sdp_sk(sk), rcvbuf_initial_size);
322 		hh.bsdh.bufs = htons(rx_ring_posted(sdp_sk(sk)));
323 		hh.localrcvsz = hh.desremrcvsz = htonl(sdp_sk(sk)->recv_bytes);
324 		hh.max_adverts = 0x1;
325 		sdp_sk(sk)->laddr =
326 			((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr;
327 		memset(&conn_param, 0, sizeof conn_param);
328 		conn_param.private_data_len = sizeof hh;
329 		conn_param.private_data = &hh;
330 		conn_param.responder_resources = 4 /* TODO */;
331 		conn_param.initiator_depth = 4 /* TODO */;
332 		conn_param.retry_count = SDP_RETRY_COUNT;
333 		SDP_DUMP_PACKET(NULL, "TX", NULL, &hh.bsdh);
334 		rc = rdma_connect(id, &conn_param);
335 		break;
336 	case RDMA_CM_EVENT_ROUTE_ERROR:
337 		sdp_dbg(sk, "RDMA_CM_EVENT_ROUTE_ERROR : %p\n", id);
338 		rc = -ETIMEDOUT;
339 		break;
340 	case RDMA_CM_EVENT_CONNECT_REQUEST:
341 		sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_REQUEST\n");
342 		rc = sdp_connect_handler(sk, id, event);
343 		if (rc) {
344 			sdp_dbg(sk, "Destroying qp\n");
345 			rdma_reject(id, NULL, 0);
346 			break;
347 		}
348 		ssk = id->context;
349 		atomic_set(&ssk->remote_credits, rx_ring_posted(ssk));
350 		memset(&hah, 0, sizeof hah);
351 		hah.bsdh.mid = SDP_MID_HELLO_ACK;
352 		hah.bsdh.bufs = htons(rx_ring_posted(ssk));
353 		hah.bsdh.len = htonl(sizeof(struct sdp_hah));
354 		hah.majv_minv = SDP_MAJV_MINV;
355 		hah.ext_max_adverts = 1; /* Doesn't seem to be mandated by spec,
356 					    but just in case */
357 		hah.actrcvsz = htonl(ssk->recv_bytes);
358 		memset(&conn_param, 0, sizeof conn_param);
359 		conn_param.private_data_len = sizeof hah;
360 		conn_param.private_data = &hah;
361 		conn_param.responder_resources = 4 /* TODO */;
362 		conn_param.initiator_depth = 4 /* TODO */;
363 		conn_param.retry_count = SDP_RETRY_COUNT;
364 		SDP_DUMP_PACKET(sk, "TX", NULL, &hah.bsdh);
365 		rc = rdma_accept(id, &conn_param);
366 		if (rc) {
367 			ssk->id = NULL;
368 			id->qp = NULL;
369 			id->context = NULL;
370 		}
371 		break;
372 	case RDMA_CM_EVENT_CONNECT_RESPONSE:
373 		sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_RESPONSE\n");
374 		rc = sdp_response_handler(sk, id, event);
375 		if (rc) {
376 			sdp_dbg(sk, "Destroying qp\n");
377 			rdma_reject(id, NULL, 0);
378 		} else
379 			rc = rdma_accept(id, NULL);
380 		break;
381 	case RDMA_CM_EVENT_CONNECT_ERROR:
382 		sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_ERROR\n");
383 		rc = -ETIMEDOUT;
384 		break;
385 	case RDMA_CM_EVENT_UNREACHABLE:
386 		sdp_dbg(sk, "RDMA_CM_EVENT_UNREACHABLE\n");
387 		rc = -ENETUNREACH;
388 		break;
389 	case RDMA_CM_EVENT_REJECTED:
390 		sdp_dbg(sk, "RDMA_CM_EVENT_REJECTED\n");
391 		rc = -ECONNREFUSED;
392 		break;
393 	case RDMA_CM_EVENT_ESTABLISHED:
394 		sdp_dbg(sk, "RDMA_CM_EVENT_ESTABLISHED\n");
395 		sdp_sk(sk)->laddr =
396 			((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr;
397 		rc = sdp_connected_handler(sk, event);
398 		break;
399 	case RDMA_CM_EVENT_DISCONNECTED: /* This means DREQ/DREP received */
400 		sdp_dbg(sk, "RDMA_CM_EVENT_DISCONNECTED\n");
401 
402 		SDP_WLOCK(ssk);
403 		if (ssk->state == TCPS_LAST_ACK) {
404 			sdp_cancel_dreq_wait_timeout(ssk);
405 
406 			sdp_dbg(sk, "%s: waiting for Infiniband tear down\n",
407 				__func__);
408 		}
409 		ssk->qp_active = 0;
410 		SDP_WUNLOCK(ssk);
411 		rdma_disconnect(id);
412 		SDP_WLOCK(ssk);
413 		if (ssk->state != TCPS_TIME_WAIT) {
414 			if (ssk->state == TCPS_CLOSE_WAIT) {
415 				sdp_dbg(sk, "IB teardown while in "
416 					"TCPS_CLOSE_WAIT taking reference to "
417 					"let close() finish the work\n");
418 			}
419 			rc = sdp_disconnected_handler(sk);
420 			if (rc)
421 				rc = -EPIPE;
422 		}
423 		SDP_WUNLOCK(ssk);
424 		break;
425 	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
426 		sdp_dbg(sk, "RDMA_CM_EVENT_TIMEWAIT_EXIT\n");
427 		SDP_WLOCK(ssk);
428 		rc = sdp_disconnected_handler(sk);
429 		SDP_WUNLOCK(ssk);
430 		break;
431 	case RDMA_CM_EVENT_DEVICE_REMOVAL:
432 		sdp_dbg(sk, "RDMA_CM_EVENT_DEVICE_REMOVAL\n");
433 		rc = -ENETRESET;
434 		break;
435 	default:
436 		printk(KERN_ERR "SDP: Unexpected CMA event: %d\n",
437 		       event->event);
438 		rc = -ECONNABORTED;
439 		break;
440 	}
441 
442 	sdp_dbg(sk, "event %d done. status %d\n", event->event, rc);
443 
444 	if (rc) {
445 		SDP_WLOCK(ssk);
446 		if (ssk->id == id) {
447 			ssk->id = NULL;
448 			id->qp = NULL;
449 			id->context = NULL;
450 			if (sdp_notify(ssk, -rc))
451 				SDP_WUNLOCK(ssk);
452 		} else
453 			SDP_WUNLOCK(ssk);
454 	}
455 
456 	return rc;
457 }
458