xref: /linux/net/mptcp/protocol.c (revision 1891c4a0)
1f870fa0bSMat Martineau // SPDX-License-Identifier: GPL-2.0
2f870fa0bSMat Martineau /* Multipath TCP
3f870fa0bSMat Martineau  *
4f870fa0bSMat Martineau  * Copyright (c) 2017 - 2019, Intel Corporation.
5f870fa0bSMat Martineau  */
6f870fa0bSMat Martineau 
7f870fa0bSMat Martineau #define pr_fmt(fmt) "MPTCP: " fmt
8f870fa0bSMat Martineau 
9f870fa0bSMat Martineau #include <linux/kernel.h>
10f870fa0bSMat Martineau #include <linux/module.h>
11f870fa0bSMat Martineau #include <linux/netdevice.h>
12f870fa0bSMat Martineau #include <net/sock.h>
13f870fa0bSMat Martineau #include <net/inet_common.h>
14f870fa0bSMat Martineau #include <net/inet_hashtables.h>
15f870fa0bSMat Martineau #include <net/protocol.h>
16f870fa0bSMat Martineau #include <net/tcp.h>
17cf7da0d6SPeter Krystad #if IS_ENABLED(CONFIG_MPTCP_IPV6)
18cf7da0d6SPeter Krystad #include <net/transp_v6.h>
19cf7da0d6SPeter Krystad #endif
20f870fa0bSMat Martineau #include <net/mptcp.h>
21f870fa0bSMat Martineau #include "protocol.h"
22f870fa0bSMat Martineau 
232303f994SPeter Krystad #define MPTCP_SAME_STATE TCP_MAX_STATES
242303f994SPeter Krystad 
252303f994SPeter Krystad /* If msk has an initial subflow socket, and the MP_CAPABLE handshake has not
262303f994SPeter Krystad  * completed yet or has failed, return the subflow socket.
272303f994SPeter Krystad  * Otherwise return NULL.
282303f994SPeter Krystad  */
292303f994SPeter Krystad static struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk)
302303f994SPeter Krystad {
31cec37a6eSPeter Krystad 	if (!msk->subflow || mptcp_subflow_ctx(msk->subflow->sk)->fourth_ack)
322303f994SPeter Krystad 		return NULL;
332303f994SPeter Krystad 
342303f994SPeter Krystad 	return msk->subflow;
352303f994SPeter Krystad }
362303f994SPeter Krystad 
37cec37a6eSPeter Krystad /* if msk has a single subflow, and the mp_capable handshake is failed,
38cec37a6eSPeter Krystad  * return it.
39cec37a6eSPeter Krystad  * Otherwise returns NULL
40cec37a6eSPeter Krystad  */
41cec37a6eSPeter Krystad static struct socket *__mptcp_tcp_fallback(const struct mptcp_sock *msk)
42cec37a6eSPeter Krystad {
43cec37a6eSPeter Krystad 	struct socket *ssock = __mptcp_nmpc_socket(msk);
44cec37a6eSPeter Krystad 
45cec37a6eSPeter Krystad 	sock_owned_by_me((const struct sock *)msk);
46cec37a6eSPeter Krystad 
47cec37a6eSPeter Krystad 	if (!ssock || sk_is_mptcp(ssock->sk))
48cec37a6eSPeter Krystad 		return NULL;
49cec37a6eSPeter Krystad 
50cec37a6eSPeter Krystad 	return ssock;
51cec37a6eSPeter Krystad }
52cec37a6eSPeter Krystad 
532303f994SPeter Krystad static bool __mptcp_can_create_subflow(const struct mptcp_sock *msk)
542303f994SPeter Krystad {
552303f994SPeter Krystad 	return ((struct sock *)msk)->sk_state == TCP_CLOSE;
562303f994SPeter Krystad }
572303f994SPeter Krystad 
582303f994SPeter Krystad static struct socket *__mptcp_socket_create(struct mptcp_sock *msk, int state)
592303f994SPeter Krystad {
602303f994SPeter Krystad 	struct mptcp_subflow_context *subflow;
612303f994SPeter Krystad 	struct sock *sk = (struct sock *)msk;
622303f994SPeter Krystad 	struct socket *ssock;
632303f994SPeter Krystad 	int err;
642303f994SPeter Krystad 
652303f994SPeter Krystad 	ssock = __mptcp_nmpc_socket(msk);
662303f994SPeter Krystad 	if (ssock)
672303f994SPeter Krystad 		goto set_state;
682303f994SPeter Krystad 
692303f994SPeter Krystad 	if (!__mptcp_can_create_subflow(msk))
702303f994SPeter Krystad 		return ERR_PTR(-EINVAL);
712303f994SPeter Krystad 
722303f994SPeter Krystad 	err = mptcp_subflow_create_socket(sk, &ssock);
732303f994SPeter Krystad 	if (err)
742303f994SPeter Krystad 		return ERR_PTR(err);
752303f994SPeter Krystad 
762303f994SPeter Krystad 	msk->subflow = ssock;
772303f994SPeter Krystad 	subflow = mptcp_subflow_ctx(ssock->sk);
78cec37a6eSPeter Krystad 	list_add(&subflow->node, &msk->conn_list);
792303f994SPeter Krystad 	subflow->request_mptcp = 1;
802303f994SPeter Krystad 
812303f994SPeter Krystad set_state:
822303f994SPeter Krystad 	if (state != MPTCP_SAME_STATE)
832303f994SPeter Krystad 		inet_sk_state_store(sk, state);
842303f994SPeter Krystad 	return ssock;
852303f994SPeter Krystad }
862303f994SPeter Krystad 
87cec37a6eSPeter Krystad static struct sock *mptcp_subflow_get(const struct mptcp_sock *msk)
88cec37a6eSPeter Krystad {
89cec37a6eSPeter Krystad 	struct mptcp_subflow_context *subflow;
90cec37a6eSPeter Krystad 
91cec37a6eSPeter Krystad 	sock_owned_by_me((const struct sock *)msk);
92cec37a6eSPeter Krystad 
93cec37a6eSPeter Krystad 	mptcp_for_each_subflow(msk, subflow) {
94cec37a6eSPeter Krystad 		return mptcp_subflow_tcp_sock(subflow);
95cec37a6eSPeter Krystad 	}
96cec37a6eSPeter Krystad 
97cec37a6eSPeter Krystad 	return NULL;
98cec37a6eSPeter Krystad }
99cec37a6eSPeter Krystad 
1006d0060f6SMat Martineau static bool mptcp_ext_cache_refill(struct mptcp_sock *msk)
1016d0060f6SMat Martineau {
1026d0060f6SMat Martineau 	if (!msk->cached_ext)
1036d0060f6SMat Martineau 		msk->cached_ext = __skb_ext_alloc();
1046d0060f6SMat Martineau 
1056d0060f6SMat Martineau 	return !!msk->cached_ext;
1066d0060f6SMat Martineau }
1076d0060f6SMat Martineau 
1086d0060f6SMat Martineau static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
1096d0060f6SMat Martineau 			      struct msghdr *msg, long *timeo)
1106d0060f6SMat Martineau {
1116d0060f6SMat Martineau 	int mss_now = 0, size_goal = 0, ret = 0;
1126d0060f6SMat Martineau 	struct mptcp_sock *msk = mptcp_sk(sk);
1136d0060f6SMat Martineau 	struct mptcp_ext *mpext = NULL;
1146d0060f6SMat Martineau 	struct page_frag *pfrag;
1156d0060f6SMat Martineau 	struct sk_buff *skb;
1166d0060f6SMat Martineau 	size_t psize;
1176d0060f6SMat Martineau 
1186d0060f6SMat Martineau 	/* use the mptcp page cache so that we can easily move the data
1196d0060f6SMat Martineau 	 * from one substream to another, but do per subflow memory accounting
1206d0060f6SMat Martineau 	 */
1216d0060f6SMat Martineau 	pfrag = sk_page_frag(sk);
1226d0060f6SMat Martineau 	while (!sk_page_frag_refill(ssk, pfrag) ||
1236d0060f6SMat Martineau 	       !mptcp_ext_cache_refill(msk)) {
1246d0060f6SMat Martineau 		ret = sk_stream_wait_memory(ssk, timeo);
1256d0060f6SMat Martineau 		if (ret)
1266d0060f6SMat Martineau 			return ret;
1276d0060f6SMat Martineau 	}
1286d0060f6SMat Martineau 
1296d0060f6SMat Martineau 	/* compute copy limit */
1306d0060f6SMat Martineau 	mss_now = tcp_send_mss(ssk, &size_goal, msg->msg_flags);
1316d0060f6SMat Martineau 	psize = min_t(int, pfrag->size - pfrag->offset, size_goal);
1326d0060f6SMat Martineau 
1336d0060f6SMat Martineau 	pr_debug("left=%zu", msg_data_left(msg));
1346d0060f6SMat Martineau 	psize = copy_page_from_iter(pfrag->page, pfrag->offset,
1356d0060f6SMat Martineau 				    min_t(size_t, msg_data_left(msg), psize),
1366d0060f6SMat Martineau 				    &msg->msg_iter);
1376d0060f6SMat Martineau 	pr_debug("left=%zu", msg_data_left(msg));
1386d0060f6SMat Martineau 	if (!psize)
1396d0060f6SMat Martineau 		return -EINVAL;
1406d0060f6SMat Martineau 
1416d0060f6SMat Martineau 	/* Mark the end of the previous write so the beginning of the
1426d0060f6SMat Martineau 	 * next write (with its own mptcp skb extension data) is not
1436d0060f6SMat Martineau 	 * collapsed.
1446d0060f6SMat Martineau 	 */
1456d0060f6SMat Martineau 	skb = tcp_write_queue_tail(ssk);
1466d0060f6SMat Martineau 	if (skb)
1476d0060f6SMat Martineau 		TCP_SKB_CB(skb)->eor = 1;
1486d0060f6SMat Martineau 
1496d0060f6SMat Martineau 	ret = do_tcp_sendpages(ssk, pfrag->page, pfrag->offset, psize,
1506d0060f6SMat Martineau 			       msg->msg_flags | MSG_SENDPAGE_NOTLAST);
1516d0060f6SMat Martineau 	if (ret <= 0)
1526d0060f6SMat Martineau 		return ret;
1536d0060f6SMat Martineau 	if (unlikely(ret < psize))
1546d0060f6SMat Martineau 		iov_iter_revert(&msg->msg_iter, psize - ret);
1556d0060f6SMat Martineau 
1566d0060f6SMat Martineau 	skb = tcp_write_queue_tail(ssk);
1576d0060f6SMat Martineau 	mpext = __skb_ext_set(skb, SKB_EXT_MPTCP, msk->cached_ext);
1586d0060f6SMat Martineau 	msk->cached_ext = NULL;
1596d0060f6SMat Martineau 
1606d0060f6SMat Martineau 	memset(mpext, 0, sizeof(*mpext));
1616d0060f6SMat Martineau 	mpext->data_seq = msk->write_seq;
1626d0060f6SMat Martineau 	mpext->subflow_seq = mptcp_subflow_ctx(ssk)->rel_write_seq;
1636d0060f6SMat Martineau 	mpext->data_len = ret;
1646d0060f6SMat Martineau 	mpext->use_map = 1;
1656d0060f6SMat Martineau 	mpext->dsn64 = 1;
1666d0060f6SMat Martineau 
1676d0060f6SMat Martineau 	pr_debug("data_seq=%llu subflow_seq=%u data_len=%u dsn64=%d",
1686d0060f6SMat Martineau 		 mpext->data_seq, mpext->subflow_seq, mpext->data_len,
1696d0060f6SMat Martineau 		 mpext->dsn64);
1706d0060f6SMat Martineau 
1716d0060f6SMat Martineau 	pfrag->offset += ret;
1726d0060f6SMat Martineau 	msk->write_seq += ret;
1736d0060f6SMat Martineau 	mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
1746d0060f6SMat Martineau 
1756d0060f6SMat Martineau 	tcp_push(ssk, msg->msg_flags, mss_now, tcp_sk(ssk)->nonagle, size_goal);
1766d0060f6SMat Martineau 	return ret;
1776d0060f6SMat Martineau }
1786d0060f6SMat Martineau 
179*1891c4a0SFlorian Westphal static void ssk_check_wmem(struct mptcp_sock *msk, struct sock *ssk)
180*1891c4a0SFlorian Westphal {
181*1891c4a0SFlorian Westphal 	struct socket *sock;
182*1891c4a0SFlorian Westphal 
183*1891c4a0SFlorian Westphal 	if (likely(sk_stream_is_writeable(ssk)))
184*1891c4a0SFlorian Westphal 		return;
185*1891c4a0SFlorian Westphal 
186*1891c4a0SFlorian Westphal 	sock = READ_ONCE(ssk->sk_socket);
187*1891c4a0SFlorian Westphal 
188*1891c4a0SFlorian Westphal 	if (sock) {
189*1891c4a0SFlorian Westphal 		clear_bit(MPTCP_SEND_SPACE, &msk->flags);
190*1891c4a0SFlorian Westphal 		smp_mb__after_atomic();
191*1891c4a0SFlorian Westphal 		/* set NOSPACE only after clearing SEND_SPACE flag */
192*1891c4a0SFlorian Westphal 		set_bit(SOCK_NOSPACE, &sock->flags);
193*1891c4a0SFlorian Westphal 	}
194*1891c4a0SFlorian Westphal }
195*1891c4a0SFlorian Westphal 
196f870fa0bSMat Martineau static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
197f870fa0bSMat Martineau {
198f870fa0bSMat Martineau 	struct mptcp_sock *msk = mptcp_sk(sk);
199cec37a6eSPeter Krystad 	struct socket *ssock;
2006d0060f6SMat Martineau 	size_t copied = 0;
201cec37a6eSPeter Krystad 	struct sock *ssk;
2026d0060f6SMat Martineau 	int ret = 0;
2036d0060f6SMat Martineau 	long timeo;
204f870fa0bSMat Martineau 
205f870fa0bSMat Martineau 	if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
206f870fa0bSMat Martineau 		return -EOPNOTSUPP;
207f870fa0bSMat Martineau 
208cec37a6eSPeter Krystad 	lock_sock(sk);
209cec37a6eSPeter Krystad 	ssock = __mptcp_tcp_fallback(msk);
210cec37a6eSPeter Krystad 	if (ssock) {
211cec37a6eSPeter Krystad 		pr_debug("fallback passthrough");
212cec37a6eSPeter Krystad 		ret = sock_sendmsg(ssock, msg);
213cec37a6eSPeter Krystad 		release_sock(sk);
214cec37a6eSPeter Krystad 		return ret;
215cec37a6eSPeter Krystad 	}
216cec37a6eSPeter Krystad 
2176d0060f6SMat Martineau 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
2186d0060f6SMat Martineau 
219cec37a6eSPeter Krystad 	ssk = mptcp_subflow_get(msk);
220cec37a6eSPeter Krystad 	if (!ssk) {
221cec37a6eSPeter Krystad 		release_sock(sk);
222cec37a6eSPeter Krystad 		return -ENOTCONN;
223cec37a6eSPeter Krystad 	}
224cec37a6eSPeter Krystad 
2256d0060f6SMat Martineau 	pr_debug("conn_list->subflow=%p", ssk);
226cec37a6eSPeter Krystad 
2276d0060f6SMat Martineau 	lock_sock(ssk);
2286d0060f6SMat Martineau 	while (msg_data_left(msg)) {
2296d0060f6SMat Martineau 		ret = mptcp_sendmsg_frag(sk, ssk, msg, &timeo);
2306d0060f6SMat Martineau 		if (ret < 0)
2316d0060f6SMat Martineau 			break;
2326d0060f6SMat Martineau 
2336d0060f6SMat Martineau 		copied += ret;
2346d0060f6SMat Martineau 	}
2356d0060f6SMat Martineau 
2366d0060f6SMat Martineau 	if (copied > 0)
2376d0060f6SMat Martineau 		ret = copied;
2386d0060f6SMat Martineau 
239*1891c4a0SFlorian Westphal 	ssk_check_wmem(msk, ssk);
2406d0060f6SMat Martineau 	release_sock(ssk);
241cec37a6eSPeter Krystad 	release_sock(sk);
242cec37a6eSPeter Krystad 	return ret;
243f870fa0bSMat Martineau }
244f870fa0bSMat Martineau 
245648ef4b8SMat Martineau int mptcp_read_actor(read_descriptor_t *desc, struct sk_buff *skb,
246648ef4b8SMat Martineau 		     unsigned int offset, size_t len)
247648ef4b8SMat Martineau {
248648ef4b8SMat Martineau 	struct mptcp_read_arg *arg = desc->arg.data;
249648ef4b8SMat Martineau 	size_t copy_len;
250648ef4b8SMat Martineau 
251648ef4b8SMat Martineau 	copy_len = min(desc->count, len);
252648ef4b8SMat Martineau 
253648ef4b8SMat Martineau 	if (likely(arg->msg)) {
254648ef4b8SMat Martineau 		int err;
255648ef4b8SMat Martineau 
256648ef4b8SMat Martineau 		err = skb_copy_datagram_msg(skb, offset, arg->msg, copy_len);
257648ef4b8SMat Martineau 		if (err) {
258648ef4b8SMat Martineau 			pr_debug("error path");
259648ef4b8SMat Martineau 			desc->error = err;
260648ef4b8SMat Martineau 			return err;
261648ef4b8SMat Martineau 		}
262648ef4b8SMat Martineau 	} else {
263648ef4b8SMat Martineau 		pr_debug("Flushing skb payload");
264648ef4b8SMat Martineau 	}
265648ef4b8SMat Martineau 
266648ef4b8SMat Martineau 	desc->count -= copy_len;
267648ef4b8SMat Martineau 
268648ef4b8SMat Martineau 	pr_debug("consumed %zu bytes, %zu left", copy_len, desc->count);
269648ef4b8SMat Martineau 	return copy_len;
270648ef4b8SMat Martineau }
271648ef4b8SMat Martineau 
272f870fa0bSMat Martineau static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
273f870fa0bSMat Martineau 			 int nonblock, int flags, int *addr_len)
274f870fa0bSMat Martineau {
275f870fa0bSMat Martineau 	struct mptcp_sock *msk = mptcp_sk(sk);
276cec37a6eSPeter Krystad 	struct socket *ssock;
277cec37a6eSPeter Krystad 	struct sock *ssk;
278cec37a6eSPeter Krystad 	int copied = 0;
279f870fa0bSMat Martineau 
280f870fa0bSMat Martineau 	if (msg->msg_flags & ~(MSG_WAITALL | MSG_DONTWAIT))
281f870fa0bSMat Martineau 		return -EOPNOTSUPP;
282f870fa0bSMat Martineau 
283cec37a6eSPeter Krystad 	lock_sock(sk);
284cec37a6eSPeter Krystad 	ssock = __mptcp_tcp_fallback(msk);
285cec37a6eSPeter Krystad 	if (ssock) {
286cec37a6eSPeter Krystad 		pr_debug("fallback-read subflow=%p",
287cec37a6eSPeter Krystad 			 mptcp_subflow_ctx(ssock->sk));
288cec37a6eSPeter Krystad 		copied = sock_recvmsg(ssock, msg, flags);
289cec37a6eSPeter Krystad 		release_sock(sk);
290cec37a6eSPeter Krystad 		return copied;
291cec37a6eSPeter Krystad 	}
292cec37a6eSPeter Krystad 
293cec37a6eSPeter Krystad 	ssk = mptcp_subflow_get(msk);
294cec37a6eSPeter Krystad 	if (!ssk) {
295cec37a6eSPeter Krystad 		release_sock(sk);
296cec37a6eSPeter Krystad 		return -ENOTCONN;
297cec37a6eSPeter Krystad 	}
298cec37a6eSPeter Krystad 
299cec37a6eSPeter Krystad 	copied = sock_recvmsg(ssk->sk_socket, msg, flags);
300cec37a6eSPeter Krystad 
301cec37a6eSPeter Krystad 	release_sock(sk);
302cec37a6eSPeter Krystad 
303cec37a6eSPeter Krystad 	return copied;
304cec37a6eSPeter Krystad }
305cec37a6eSPeter Krystad 
306cec37a6eSPeter Krystad /* subflow sockets can be either outgoing (connect) or incoming
307cec37a6eSPeter Krystad  * (accept).
308cec37a6eSPeter Krystad  *
309cec37a6eSPeter Krystad  * Outgoing subflows use in-kernel sockets.
310cec37a6eSPeter Krystad  * Incoming subflows do not have their own 'struct socket' allocated,
311cec37a6eSPeter Krystad  * so we need to use tcp_close() after detaching them from the mptcp
312cec37a6eSPeter Krystad  * parent socket.
313cec37a6eSPeter Krystad  */
314cec37a6eSPeter Krystad static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
315cec37a6eSPeter Krystad 			      struct mptcp_subflow_context *subflow,
316cec37a6eSPeter Krystad 			      long timeout)
317cec37a6eSPeter Krystad {
318cec37a6eSPeter Krystad 	struct socket *sock = READ_ONCE(ssk->sk_socket);
319cec37a6eSPeter Krystad 
320cec37a6eSPeter Krystad 	list_del(&subflow->node);
321cec37a6eSPeter Krystad 
322cec37a6eSPeter Krystad 	if (sock && sock != sk->sk_socket) {
323cec37a6eSPeter Krystad 		/* outgoing subflow */
324cec37a6eSPeter Krystad 		sock_release(sock);
325cec37a6eSPeter Krystad 	} else {
326cec37a6eSPeter Krystad 		/* incoming subflow */
327cec37a6eSPeter Krystad 		tcp_close(ssk, timeout);
328cec37a6eSPeter Krystad 	}
329f870fa0bSMat Martineau }
330f870fa0bSMat Martineau 
331f870fa0bSMat Martineau static int mptcp_init_sock(struct sock *sk)
332f870fa0bSMat Martineau {
333cec37a6eSPeter Krystad 	struct mptcp_sock *msk = mptcp_sk(sk);
334cec37a6eSPeter Krystad 
335cec37a6eSPeter Krystad 	INIT_LIST_HEAD(&msk->conn_list);
336*1891c4a0SFlorian Westphal 	__set_bit(MPTCP_SEND_SPACE, &msk->flags);
337cec37a6eSPeter Krystad 
338f870fa0bSMat Martineau 	return 0;
339f870fa0bSMat Martineau }
340f870fa0bSMat Martineau 
34121498490SPeter Krystad static void mptcp_subflow_shutdown(struct sock *ssk, int how)
34221498490SPeter Krystad {
34321498490SPeter Krystad 	lock_sock(ssk);
34421498490SPeter Krystad 
34521498490SPeter Krystad 	switch (ssk->sk_state) {
34621498490SPeter Krystad 	case TCP_LISTEN:
34721498490SPeter Krystad 		if (!(how & RCV_SHUTDOWN))
34821498490SPeter Krystad 			break;
34921498490SPeter Krystad 		/* fall through */
35021498490SPeter Krystad 	case TCP_SYN_SENT:
35121498490SPeter Krystad 		tcp_disconnect(ssk, O_NONBLOCK);
35221498490SPeter Krystad 		break;
35321498490SPeter Krystad 	default:
35421498490SPeter Krystad 		ssk->sk_shutdown |= how;
35521498490SPeter Krystad 		tcp_shutdown(ssk, how);
35621498490SPeter Krystad 		break;
35721498490SPeter Krystad 	}
35821498490SPeter Krystad 
35921498490SPeter Krystad 	/* Wake up anyone sleeping in poll. */
36021498490SPeter Krystad 	ssk->sk_state_change(ssk);
36121498490SPeter Krystad 	release_sock(ssk);
36221498490SPeter Krystad }
36321498490SPeter Krystad 
364f870fa0bSMat Martineau static void mptcp_close(struct sock *sk, long timeout)
365f870fa0bSMat Martineau {
366cec37a6eSPeter Krystad 	struct mptcp_subflow_context *subflow, *tmp;
367f870fa0bSMat Martineau 	struct mptcp_sock *msk = mptcp_sk(sk);
368f870fa0bSMat Martineau 
36979c0949eSPeter Krystad 	mptcp_token_destroy(msk->token);
370f870fa0bSMat Martineau 	inet_sk_state_store(sk, TCP_CLOSE);
371f870fa0bSMat Martineau 
372cec37a6eSPeter Krystad 	lock_sock(sk);
373cec37a6eSPeter Krystad 
374cec37a6eSPeter Krystad 	list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
375cec37a6eSPeter Krystad 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
376cec37a6eSPeter Krystad 
377cec37a6eSPeter Krystad 		__mptcp_close_ssk(sk, ssk, subflow, timeout);
378f870fa0bSMat Martineau 	}
379f870fa0bSMat Martineau 
3806d0060f6SMat Martineau 	if (msk->cached_ext)
3816d0060f6SMat Martineau 		__skb_ext_put(msk->cached_ext);
382cec37a6eSPeter Krystad 	release_sock(sk);
383cec37a6eSPeter Krystad 	sk_common_release(sk);
384f870fa0bSMat Martineau }
385f870fa0bSMat Martineau 
386cf7da0d6SPeter Krystad static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk)
387cf7da0d6SPeter Krystad {
388cf7da0d6SPeter Krystad #if IS_ENABLED(CONFIG_MPTCP_IPV6)
389cf7da0d6SPeter Krystad 	const struct ipv6_pinfo *ssk6 = inet6_sk(ssk);
390cf7da0d6SPeter Krystad 	struct ipv6_pinfo *msk6 = inet6_sk(msk);
391cf7da0d6SPeter Krystad 
392cf7da0d6SPeter Krystad 	msk->sk_v6_daddr = ssk->sk_v6_daddr;
393cf7da0d6SPeter Krystad 	msk->sk_v6_rcv_saddr = ssk->sk_v6_rcv_saddr;
394cf7da0d6SPeter Krystad 
395cf7da0d6SPeter Krystad 	if (msk6 && ssk6) {
396cf7da0d6SPeter Krystad 		msk6->saddr = ssk6->saddr;
397cf7da0d6SPeter Krystad 		msk6->flow_label = ssk6->flow_label;
398cf7da0d6SPeter Krystad 	}
399cf7da0d6SPeter Krystad #endif
400cf7da0d6SPeter Krystad 
401cf7da0d6SPeter Krystad 	inet_sk(msk)->inet_num = inet_sk(ssk)->inet_num;
402cf7da0d6SPeter Krystad 	inet_sk(msk)->inet_dport = inet_sk(ssk)->inet_dport;
403cf7da0d6SPeter Krystad 	inet_sk(msk)->inet_sport = inet_sk(ssk)->inet_sport;
404cf7da0d6SPeter Krystad 	inet_sk(msk)->inet_daddr = inet_sk(ssk)->inet_daddr;
405cf7da0d6SPeter Krystad 	inet_sk(msk)->inet_saddr = inet_sk(ssk)->inet_saddr;
406cf7da0d6SPeter Krystad 	inet_sk(msk)->inet_rcv_saddr = inet_sk(ssk)->inet_rcv_saddr;
407cf7da0d6SPeter Krystad }
408cf7da0d6SPeter Krystad 
409cf7da0d6SPeter Krystad static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
410cf7da0d6SPeter Krystad 				 bool kern)
411cf7da0d6SPeter Krystad {
412cf7da0d6SPeter Krystad 	struct mptcp_sock *msk = mptcp_sk(sk);
413cf7da0d6SPeter Krystad 	struct socket *listener;
414cf7da0d6SPeter Krystad 	struct sock *newsk;
415cf7da0d6SPeter Krystad 
416cf7da0d6SPeter Krystad 	listener = __mptcp_nmpc_socket(msk);
417cf7da0d6SPeter Krystad 	if (WARN_ON_ONCE(!listener)) {
418cf7da0d6SPeter Krystad 		*err = -EINVAL;
419cf7da0d6SPeter Krystad 		return NULL;
420cf7da0d6SPeter Krystad 	}
421cf7da0d6SPeter Krystad 
422cf7da0d6SPeter Krystad 	pr_debug("msk=%p, listener=%p", msk, mptcp_subflow_ctx(listener->sk));
423cf7da0d6SPeter Krystad 	newsk = inet_csk_accept(listener->sk, flags, err, kern);
424cf7da0d6SPeter Krystad 	if (!newsk)
425cf7da0d6SPeter Krystad 		return NULL;
426cf7da0d6SPeter Krystad 
427cf7da0d6SPeter Krystad 	pr_debug("msk=%p, subflow is mptcp=%d", msk, sk_is_mptcp(newsk));
428cf7da0d6SPeter Krystad 
429cf7da0d6SPeter Krystad 	if (sk_is_mptcp(newsk)) {
430cf7da0d6SPeter Krystad 		struct mptcp_subflow_context *subflow;
431cf7da0d6SPeter Krystad 		struct sock *new_mptcp_sock;
432cf7da0d6SPeter Krystad 		struct sock *ssk = newsk;
4336d0060f6SMat Martineau 		u64 ack_seq;
434cf7da0d6SPeter Krystad 
435cf7da0d6SPeter Krystad 		subflow = mptcp_subflow_ctx(newsk);
436cf7da0d6SPeter Krystad 		lock_sock(sk);
437cf7da0d6SPeter Krystad 
438cf7da0d6SPeter Krystad 		local_bh_disable();
439cf7da0d6SPeter Krystad 		new_mptcp_sock = sk_clone_lock(sk, GFP_ATOMIC);
440cf7da0d6SPeter Krystad 		if (!new_mptcp_sock) {
441cf7da0d6SPeter Krystad 			*err = -ENOBUFS;
442cf7da0d6SPeter Krystad 			local_bh_enable();
443cf7da0d6SPeter Krystad 			release_sock(sk);
44421498490SPeter Krystad 			mptcp_subflow_shutdown(newsk, SHUT_RDWR + 1);
445cf7da0d6SPeter Krystad 			tcp_close(newsk, 0);
446cf7da0d6SPeter Krystad 			return NULL;
447cf7da0d6SPeter Krystad 		}
448cf7da0d6SPeter Krystad 
449cf7da0d6SPeter Krystad 		mptcp_init_sock(new_mptcp_sock);
450cf7da0d6SPeter Krystad 
451cf7da0d6SPeter Krystad 		msk = mptcp_sk(new_mptcp_sock);
452cf7da0d6SPeter Krystad 		msk->remote_key = subflow->remote_key;
453cf7da0d6SPeter Krystad 		msk->local_key = subflow->local_key;
45479c0949eSPeter Krystad 		msk->token = subflow->token;
455cf7da0d6SPeter Krystad 		msk->subflow = NULL;
456cf7da0d6SPeter Krystad 
45779c0949eSPeter Krystad 		mptcp_token_update_accept(newsk, new_mptcp_sock);
4586d0060f6SMat Martineau 
4596d0060f6SMat Martineau 		mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
4606d0060f6SMat Martineau 		msk->write_seq = subflow->idsn + 1;
4616d0060f6SMat Martineau 		ack_seq++;
4626d0060f6SMat Martineau 		msk->ack_seq = ack_seq;
463648ef4b8SMat Martineau 		subflow->map_seq = ack_seq;
464648ef4b8SMat Martineau 		subflow->map_subflow_seq = 1;
4656d0060f6SMat Martineau 		subflow->rel_write_seq = 1;
466648ef4b8SMat Martineau 		subflow->tcp_sock = ssk;
467cf7da0d6SPeter Krystad 		newsk = new_mptcp_sock;
468cf7da0d6SPeter Krystad 		mptcp_copy_inaddrs(newsk, ssk);
469cf7da0d6SPeter Krystad 		list_add(&subflow->node, &msk->conn_list);
470cf7da0d6SPeter Krystad 
471cf7da0d6SPeter Krystad 		/* will be fully established at mptcp_stream_accept()
472cf7da0d6SPeter Krystad 		 * completion.
473cf7da0d6SPeter Krystad 		 */
474cf7da0d6SPeter Krystad 		inet_sk_state_store(new_mptcp_sock, TCP_SYN_RECV);
475cf7da0d6SPeter Krystad 		bh_unlock_sock(new_mptcp_sock);
476cf7da0d6SPeter Krystad 		local_bh_enable();
477cf7da0d6SPeter Krystad 		release_sock(sk);
478cf7da0d6SPeter Krystad 	}
479cf7da0d6SPeter Krystad 
480cf7da0d6SPeter Krystad 	return newsk;
481cf7da0d6SPeter Krystad }
482cf7da0d6SPeter Krystad 
48379c0949eSPeter Krystad static void mptcp_destroy(struct sock *sk)
48479c0949eSPeter Krystad {
48579c0949eSPeter Krystad }
48679c0949eSPeter Krystad 
487717e79c8SPeter Krystad static int mptcp_setsockopt(struct sock *sk, int level, int optname,
488717e79c8SPeter Krystad 			    char __user *uoptval, unsigned int optlen)
489717e79c8SPeter Krystad {
490717e79c8SPeter Krystad 	struct mptcp_sock *msk = mptcp_sk(sk);
491717e79c8SPeter Krystad 	char __kernel *optval;
492717e79c8SPeter Krystad 	int ret = -EOPNOTSUPP;
493717e79c8SPeter Krystad 	struct socket *ssock;
494717e79c8SPeter Krystad 
495717e79c8SPeter Krystad 	/* will be treated as __user in tcp_setsockopt */
496717e79c8SPeter Krystad 	optval = (char __kernel __force *)uoptval;
497717e79c8SPeter Krystad 
498717e79c8SPeter Krystad 	pr_debug("msk=%p", msk);
499717e79c8SPeter Krystad 
500717e79c8SPeter Krystad 	/* @@ the meaning of setsockopt() when the socket is connected and
501717e79c8SPeter Krystad 	 * there are multiple subflows is not defined.
502717e79c8SPeter Krystad 	 */
503717e79c8SPeter Krystad 	lock_sock(sk);
504717e79c8SPeter Krystad 	ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE);
505717e79c8SPeter Krystad 	if (!IS_ERR(ssock)) {
506717e79c8SPeter Krystad 		pr_debug("subflow=%p", ssock->sk);
507717e79c8SPeter Krystad 		ret = kernel_setsockopt(ssock, level, optname, optval, optlen);
508717e79c8SPeter Krystad 	}
509717e79c8SPeter Krystad 	release_sock(sk);
510717e79c8SPeter Krystad 
511717e79c8SPeter Krystad 	return ret;
512717e79c8SPeter Krystad }
513717e79c8SPeter Krystad 
514717e79c8SPeter Krystad static int mptcp_getsockopt(struct sock *sk, int level, int optname,
515717e79c8SPeter Krystad 			    char __user *uoptval, int __user *uoption)
516717e79c8SPeter Krystad {
517717e79c8SPeter Krystad 	struct mptcp_sock *msk = mptcp_sk(sk);
518717e79c8SPeter Krystad 	char __kernel *optval;
519717e79c8SPeter Krystad 	int ret = -EOPNOTSUPP;
520717e79c8SPeter Krystad 	int __kernel *option;
521717e79c8SPeter Krystad 	struct socket *ssock;
522717e79c8SPeter Krystad 
523717e79c8SPeter Krystad 	/* will be treated as __user in tcp_getsockopt */
524717e79c8SPeter Krystad 	optval = (char __kernel __force *)uoptval;
525717e79c8SPeter Krystad 	option = (int __kernel __force *)uoption;
526717e79c8SPeter Krystad 
527717e79c8SPeter Krystad 	pr_debug("msk=%p", msk);
528717e79c8SPeter Krystad 
529717e79c8SPeter Krystad 	/* @@ the meaning of getsockopt() when the socket is connected and
530717e79c8SPeter Krystad 	 * there are multiple subflows is not defined.
531717e79c8SPeter Krystad 	 */
532717e79c8SPeter Krystad 	lock_sock(sk);
533717e79c8SPeter Krystad 	ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE);
534717e79c8SPeter Krystad 	if (!IS_ERR(ssock)) {
535717e79c8SPeter Krystad 		pr_debug("subflow=%p", ssock->sk);
536717e79c8SPeter Krystad 		ret = kernel_getsockopt(ssock, level, optname, optval, option);
537717e79c8SPeter Krystad 	}
538717e79c8SPeter Krystad 	release_sock(sk);
539717e79c8SPeter Krystad 
540717e79c8SPeter Krystad 	return ret;
541717e79c8SPeter Krystad }
542717e79c8SPeter Krystad 
543cec37a6eSPeter Krystad static int mptcp_get_port(struct sock *sk, unsigned short snum)
544f870fa0bSMat Martineau {
545f870fa0bSMat Martineau 	struct mptcp_sock *msk = mptcp_sk(sk);
546cec37a6eSPeter Krystad 	struct socket *ssock;
547f870fa0bSMat Martineau 
548cec37a6eSPeter Krystad 	ssock = __mptcp_nmpc_socket(msk);
549cec37a6eSPeter Krystad 	pr_debug("msk=%p, subflow=%p", msk, ssock);
550cec37a6eSPeter Krystad 	if (WARN_ON_ONCE(!ssock))
551cec37a6eSPeter Krystad 		return -EINVAL;
552f870fa0bSMat Martineau 
553cec37a6eSPeter Krystad 	return inet_csk_get_port(ssock->sk, snum);
554cec37a6eSPeter Krystad }
555f870fa0bSMat Martineau 
556cec37a6eSPeter Krystad void mptcp_finish_connect(struct sock *ssk)
557cec37a6eSPeter Krystad {
558cec37a6eSPeter Krystad 	struct mptcp_subflow_context *subflow;
559cec37a6eSPeter Krystad 	struct mptcp_sock *msk;
560cec37a6eSPeter Krystad 	struct sock *sk;
5616d0060f6SMat Martineau 	u64 ack_seq;
562f870fa0bSMat Martineau 
563cec37a6eSPeter Krystad 	subflow = mptcp_subflow_ctx(ssk);
564f870fa0bSMat Martineau 
565cec37a6eSPeter Krystad 	if (!subflow->mp_capable)
566cec37a6eSPeter Krystad 		return;
567cec37a6eSPeter Krystad 
568cec37a6eSPeter Krystad 	sk = subflow->conn;
569cec37a6eSPeter Krystad 	msk = mptcp_sk(sk);
570cec37a6eSPeter Krystad 
571648ef4b8SMat Martineau 	pr_debug("msk=%p, token=%u", sk, subflow->token);
572648ef4b8SMat Martineau 
5736d0060f6SMat Martineau 	mptcp_crypto_key_sha(subflow->remote_key, NULL, &ack_seq);
5746d0060f6SMat Martineau 	ack_seq++;
575648ef4b8SMat Martineau 	subflow->map_seq = ack_seq;
576648ef4b8SMat Martineau 	subflow->map_subflow_seq = 1;
5776d0060f6SMat Martineau 	subflow->rel_write_seq = 1;
5786d0060f6SMat Martineau 
579cec37a6eSPeter Krystad 	/* the socket is not connected yet, no msk/subflow ops can access/race
580cec37a6eSPeter Krystad 	 * accessing the field below
581cec37a6eSPeter Krystad 	 */
582cec37a6eSPeter Krystad 	WRITE_ONCE(msk->remote_key, subflow->remote_key);
583cec37a6eSPeter Krystad 	WRITE_ONCE(msk->local_key, subflow->local_key);
58479c0949eSPeter Krystad 	WRITE_ONCE(msk->token, subflow->token);
5856d0060f6SMat Martineau 	WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
5866d0060f6SMat Martineau 	WRITE_ONCE(msk->ack_seq, ack_seq);
587f870fa0bSMat Martineau }
588f870fa0bSMat Martineau 
589cf7da0d6SPeter Krystad static void mptcp_sock_graft(struct sock *sk, struct socket *parent)
590cf7da0d6SPeter Krystad {
591cf7da0d6SPeter Krystad 	write_lock_bh(&sk->sk_callback_lock);
592cf7da0d6SPeter Krystad 	rcu_assign_pointer(sk->sk_wq, &parent->wq);
593cf7da0d6SPeter Krystad 	sk_set_socket(sk, parent);
594cf7da0d6SPeter Krystad 	sk->sk_uid = SOCK_INODE(parent)->i_uid;
595cf7da0d6SPeter Krystad 	write_unlock_bh(&sk->sk_callback_lock);
596cf7da0d6SPeter Krystad }
597cf7da0d6SPeter Krystad 
598*1891c4a0SFlorian Westphal static bool mptcp_memory_free(const struct sock *sk, int wake)
599*1891c4a0SFlorian Westphal {
600*1891c4a0SFlorian Westphal 	struct mptcp_sock *msk = mptcp_sk(sk);
601*1891c4a0SFlorian Westphal 
602*1891c4a0SFlorian Westphal 	return wake ? test_bit(MPTCP_SEND_SPACE, &msk->flags) : true;
603*1891c4a0SFlorian Westphal }
604*1891c4a0SFlorian Westphal 
605f870fa0bSMat Martineau static struct proto mptcp_prot = {
606f870fa0bSMat Martineau 	.name		= "MPTCP",
607f870fa0bSMat Martineau 	.owner		= THIS_MODULE,
608f870fa0bSMat Martineau 	.init		= mptcp_init_sock,
609f870fa0bSMat Martineau 	.close		= mptcp_close,
610cf7da0d6SPeter Krystad 	.accept		= mptcp_accept,
611717e79c8SPeter Krystad 	.setsockopt	= mptcp_setsockopt,
612717e79c8SPeter Krystad 	.getsockopt	= mptcp_getsockopt,
613f870fa0bSMat Martineau 	.shutdown	= tcp_shutdown,
61479c0949eSPeter Krystad 	.destroy	= mptcp_destroy,
615f870fa0bSMat Martineau 	.sendmsg	= mptcp_sendmsg,
616f870fa0bSMat Martineau 	.recvmsg	= mptcp_recvmsg,
617f870fa0bSMat Martineau 	.hash		= inet_hash,
618f870fa0bSMat Martineau 	.unhash		= inet_unhash,
619cec37a6eSPeter Krystad 	.get_port	= mptcp_get_port,
620*1891c4a0SFlorian Westphal 	.stream_memory_free	= mptcp_memory_free,
621f870fa0bSMat Martineau 	.obj_size	= sizeof(struct mptcp_sock),
622f870fa0bSMat Martineau 	.no_autobind	= true,
623f870fa0bSMat Martineau };
624f870fa0bSMat Martineau 
6252303f994SPeter Krystad static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
6262303f994SPeter Krystad {
6272303f994SPeter Krystad 	struct mptcp_sock *msk = mptcp_sk(sock->sk);
6282303f994SPeter Krystad 	struct socket *ssock;
629cf7da0d6SPeter Krystad 	int err;
6302303f994SPeter Krystad 
6312303f994SPeter Krystad 	lock_sock(sock->sk);
6322303f994SPeter Krystad 	ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE);
6332303f994SPeter Krystad 	if (IS_ERR(ssock)) {
6342303f994SPeter Krystad 		err = PTR_ERR(ssock);
6352303f994SPeter Krystad 		goto unlock;
6362303f994SPeter Krystad 	}
6372303f994SPeter Krystad 
6382303f994SPeter Krystad 	err = ssock->ops->bind(ssock, uaddr, addr_len);
639cf7da0d6SPeter Krystad 	if (!err)
640cf7da0d6SPeter Krystad 		mptcp_copy_inaddrs(sock->sk, ssock->sk);
6412303f994SPeter Krystad 
6422303f994SPeter Krystad unlock:
6432303f994SPeter Krystad 	release_sock(sock->sk);
6442303f994SPeter Krystad 	return err;
6452303f994SPeter Krystad }
6462303f994SPeter Krystad 
6472303f994SPeter Krystad static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
6482303f994SPeter Krystad 				int addr_len, int flags)
6492303f994SPeter Krystad {
6502303f994SPeter Krystad 	struct mptcp_sock *msk = mptcp_sk(sock->sk);
6512303f994SPeter Krystad 	struct socket *ssock;
6522303f994SPeter Krystad 	int err;
6532303f994SPeter Krystad 
6542303f994SPeter Krystad 	lock_sock(sock->sk);
6552303f994SPeter Krystad 	ssock = __mptcp_socket_create(msk, TCP_SYN_SENT);
6562303f994SPeter Krystad 	if (IS_ERR(ssock)) {
6572303f994SPeter Krystad 		err = PTR_ERR(ssock);
6582303f994SPeter Krystad 		goto unlock;
6592303f994SPeter Krystad 	}
6602303f994SPeter Krystad 
661cf7da0d6SPeter Krystad #ifdef CONFIG_TCP_MD5SIG
662cf7da0d6SPeter Krystad 	/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
663cf7da0d6SPeter Krystad 	 * TCP option space.
664cf7da0d6SPeter Krystad 	 */
665cf7da0d6SPeter Krystad 	if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info))
666cf7da0d6SPeter Krystad 		mptcp_subflow_ctx(ssock->sk)->request_mptcp = 0;
667cf7da0d6SPeter Krystad #endif
668cf7da0d6SPeter Krystad 
6692303f994SPeter Krystad 	err = ssock->ops->connect(ssock, uaddr, addr_len, flags);
6702303f994SPeter Krystad 	inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk));
671cf7da0d6SPeter Krystad 	mptcp_copy_inaddrs(sock->sk, ssock->sk);
6722303f994SPeter Krystad 
6732303f994SPeter Krystad unlock:
6742303f994SPeter Krystad 	release_sock(sock->sk);
6752303f994SPeter Krystad 	return err;
6762303f994SPeter Krystad }
6772303f994SPeter Krystad 
678cf7da0d6SPeter Krystad static int mptcp_v4_getname(struct socket *sock, struct sockaddr *uaddr,
679cf7da0d6SPeter Krystad 			    int peer)
680cf7da0d6SPeter Krystad {
681cf7da0d6SPeter Krystad 	if (sock->sk->sk_prot == &tcp_prot) {
682cf7da0d6SPeter Krystad 		/* we are being invoked from __sys_accept4, after
683cf7da0d6SPeter Krystad 		 * mptcp_accept() has just accepted a non-mp-capable
684cf7da0d6SPeter Krystad 		 * flow: sk is a tcp_sk, not an mptcp one.
685cf7da0d6SPeter Krystad 		 *
686cf7da0d6SPeter Krystad 		 * Hand the socket over to tcp so all further socket ops
687cf7da0d6SPeter Krystad 		 * bypass mptcp.
688cf7da0d6SPeter Krystad 		 */
689cf7da0d6SPeter Krystad 		sock->ops = &inet_stream_ops;
690cf7da0d6SPeter Krystad 	}
691cf7da0d6SPeter Krystad 
692cf7da0d6SPeter Krystad 	return inet_getname(sock, uaddr, peer);
693cf7da0d6SPeter Krystad }
694cf7da0d6SPeter Krystad 
695cf7da0d6SPeter Krystad #if IS_ENABLED(CONFIG_MPTCP_IPV6)
696cf7da0d6SPeter Krystad static int mptcp_v6_getname(struct socket *sock, struct sockaddr *uaddr,
697cf7da0d6SPeter Krystad 			    int peer)
698cf7da0d6SPeter Krystad {
699cf7da0d6SPeter Krystad 	if (sock->sk->sk_prot == &tcpv6_prot) {
700cf7da0d6SPeter Krystad 		/* we are being invoked from __sys_accept4 after
701cf7da0d6SPeter Krystad 		 * mptcp_accept() has accepted a non-mp-capable
702cf7da0d6SPeter Krystad 		 * subflow: sk is a tcp_sk, not mptcp.
703cf7da0d6SPeter Krystad 		 *
704cf7da0d6SPeter Krystad 		 * Hand the socket over to tcp so all further
705cf7da0d6SPeter Krystad 		 * socket ops bypass mptcp.
706cf7da0d6SPeter Krystad 		 */
707cf7da0d6SPeter Krystad 		sock->ops = &inet6_stream_ops;
708cf7da0d6SPeter Krystad 	}
709cf7da0d6SPeter Krystad 
710cf7da0d6SPeter Krystad 	return inet6_getname(sock, uaddr, peer);
711cf7da0d6SPeter Krystad }
712cf7da0d6SPeter Krystad #endif
713cf7da0d6SPeter Krystad 
714cf7da0d6SPeter Krystad static int mptcp_listen(struct socket *sock, int backlog)
715cf7da0d6SPeter Krystad {
716cf7da0d6SPeter Krystad 	struct mptcp_sock *msk = mptcp_sk(sock->sk);
717cf7da0d6SPeter Krystad 	struct socket *ssock;
718cf7da0d6SPeter Krystad 	int err;
719cf7da0d6SPeter Krystad 
720cf7da0d6SPeter Krystad 	pr_debug("msk=%p", msk);
721cf7da0d6SPeter Krystad 
722cf7da0d6SPeter Krystad 	lock_sock(sock->sk);
723cf7da0d6SPeter Krystad 	ssock = __mptcp_socket_create(msk, TCP_LISTEN);
724cf7da0d6SPeter Krystad 	if (IS_ERR(ssock)) {
725cf7da0d6SPeter Krystad 		err = PTR_ERR(ssock);
726cf7da0d6SPeter Krystad 		goto unlock;
727cf7da0d6SPeter Krystad 	}
728cf7da0d6SPeter Krystad 
729cf7da0d6SPeter Krystad 	err = ssock->ops->listen(ssock, backlog);
730cf7da0d6SPeter Krystad 	inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk));
731cf7da0d6SPeter Krystad 	if (!err)
732cf7da0d6SPeter Krystad 		mptcp_copy_inaddrs(sock->sk, ssock->sk);
733cf7da0d6SPeter Krystad 
734cf7da0d6SPeter Krystad unlock:
735cf7da0d6SPeter Krystad 	release_sock(sock->sk);
736cf7da0d6SPeter Krystad 	return err;
737cf7da0d6SPeter Krystad }
738cf7da0d6SPeter Krystad 
739cf7da0d6SPeter Krystad static bool is_tcp_proto(const struct proto *p)
740cf7da0d6SPeter Krystad {
741cf7da0d6SPeter Krystad #if IS_ENABLED(CONFIG_MPTCP_IPV6)
742cf7da0d6SPeter Krystad 	return p == &tcp_prot || p == &tcpv6_prot;
743cf7da0d6SPeter Krystad #else
744cf7da0d6SPeter Krystad 	return p == &tcp_prot;
745cf7da0d6SPeter Krystad #endif
746cf7da0d6SPeter Krystad }
747cf7da0d6SPeter Krystad 
748cf7da0d6SPeter Krystad static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
749cf7da0d6SPeter Krystad 			       int flags, bool kern)
750cf7da0d6SPeter Krystad {
751cf7da0d6SPeter Krystad 	struct mptcp_sock *msk = mptcp_sk(sock->sk);
752cf7da0d6SPeter Krystad 	struct socket *ssock;
753cf7da0d6SPeter Krystad 	int err;
754cf7da0d6SPeter Krystad 
755cf7da0d6SPeter Krystad 	pr_debug("msk=%p", msk);
756cf7da0d6SPeter Krystad 
757cf7da0d6SPeter Krystad 	lock_sock(sock->sk);
758cf7da0d6SPeter Krystad 	if (sock->sk->sk_state != TCP_LISTEN)
759cf7da0d6SPeter Krystad 		goto unlock_fail;
760cf7da0d6SPeter Krystad 
761cf7da0d6SPeter Krystad 	ssock = __mptcp_nmpc_socket(msk);
762cf7da0d6SPeter Krystad 	if (!ssock)
763cf7da0d6SPeter Krystad 		goto unlock_fail;
764cf7da0d6SPeter Krystad 
765cf7da0d6SPeter Krystad 	sock_hold(ssock->sk);
766cf7da0d6SPeter Krystad 	release_sock(sock->sk);
767cf7da0d6SPeter Krystad 
768cf7da0d6SPeter Krystad 	err = ssock->ops->accept(sock, newsock, flags, kern);
769cf7da0d6SPeter Krystad 	if (err == 0 && !is_tcp_proto(newsock->sk->sk_prot)) {
770cf7da0d6SPeter Krystad 		struct mptcp_sock *msk = mptcp_sk(newsock->sk);
771cf7da0d6SPeter Krystad 		struct mptcp_subflow_context *subflow;
772cf7da0d6SPeter Krystad 
773cf7da0d6SPeter Krystad 		/* set ssk->sk_socket of accept()ed flows to mptcp socket.
774cf7da0d6SPeter Krystad 		 * This is needed so NOSPACE flag can be set from tcp stack.
775cf7da0d6SPeter Krystad 		 */
776cf7da0d6SPeter Krystad 		list_for_each_entry(subflow, &msk->conn_list, node) {
777cf7da0d6SPeter Krystad 			struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
778cf7da0d6SPeter Krystad 
779cf7da0d6SPeter Krystad 			if (!ssk->sk_socket)
780cf7da0d6SPeter Krystad 				mptcp_sock_graft(ssk, newsock);
781cf7da0d6SPeter Krystad 		}
782cf7da0d6SPeter Krystad 
783cf7da0d6SPeter Krystad 		inet_sk_state_store(newsock->sk, TCP_ESTABLISHED);
784cf7da0d6SPeter Krystad 	}
785cf7da0d6SPeter Krystad 
786cf7da0d6SPeter Krystad 	sock_put(ssock->sk);
787cf7da0d6SPeter Krystad 	return err;
788cf7da0d6SPeter Krystad 
789cf7da0d6SPeter Krystad unlock_fail:
790cf7da0d6SPeter Krystad 	release_sock(sock->sk);
791cf7da0d6SPeter Krystad 	return -EINVAL;
792cf7da0d6SPeter Krystad }
793cf7da0d6SPeter Krystad 
7942303f994SPeter Krystad static __poll_t mptcp_poll(struct file *file, struct socket *sock,
7952303f994SPeter Krystad 			   struct poll_table_struct *wait)
7962303f994SPeter Krystad {
797*1891c4a0SFlorian Westphal 	const struct mptcp_sock *msk;
798*1891c4a0SFlorian Westphal 	struct sock *sk = sock->sk;
799*1891c4a0SFlorian Westphal 	struct socket *ssock;
8002303f994SPeter Krystad 	__poll_t mask = 0;
8012303f994SPeter Krystad 
802*1891c4a0SFlorian Westphal 	msk = mptcp_sk(sk);
803*1891c4a0SFlorian Westphal 	lock_sock(sk);
804*1891c4a0SFlorian Westphal 	ssock = __mptcp_nmpc_socket(msk);
805*1891c4a0SFlorian Westphal 	if (ssock) {
806*1891c4a0SFlorian Westphal 		mask = ssock->ops->poll(file, ssock, wait);
807*1891c4a0SFlorian Westphal 		release_sock(sk);
808*1891c4a0SFlorian Westphal 		return mask;
809*1891c4a0SFlorian Westphal 	}
810*1891c4a0SFlorian Westphal 
811*1891c4a0SFlorian Westphal 	release_sock(sk);
812*1891c4a0SFlorian Westphal 	sock_poll_wait(file, sock, wait);
813*1891c4a0SFlorian Westphal 	lock_sock(sk);
814*1891c4a0SFlorian Westphal 
815*1891c4a0SFlorian Westphal 	if (test_bit(MPTCP_DATA_READY, &msk->flags))
816*1891c4a0SFlorian Westphal 		mask = EPOLLIN | EPOLLRDNORM;
817*1891c4a0SFlorian Westphal 	if (sk_stream_is_writeable(sk) &&
818*1891c4a0SFlorian Westphal 	    test_bit(MPTCP_SEND_SPACE, &msk->flags))
819*1891c4a0SFlorian Westphal 		mask |= EPOLLOUT | EPOLLWRNORM;
820*1891c4a0SFlorian Westphal 	if (sk->sk_shutdown & RCV_SHUTDOWN)
821*1891c4a0SFlorian Westphal 		mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
822*1891c4a0SFlorian Westphal 
823*1891c4a0SFlorian Westphal 	release_sock(sk);
824*1891c4a0SFlorian Westphal 
8252303f994SPeter Krystad 	return mask;
8262303f994SPeter Krystad }
8272303f994SPeter Krystad 
82821498490SPeter Krystad static int mptcp_shutdown(struct socket *sock, int how)
82921498490SPeter Krystad {
83021498490SPeter Krystad 	struct mptcp_sock *msk = mptcp_sk(sock->sk);
83121498490SPeter Krystad 	struct mptcp_subflow_context *subflow;
83221498490SPeter Krystad 	int ret = 0;
83321498490SPeter Krystad 
83421498490SPeter Krystad 	pr_debug("sk=%p, how=%d", msk, how);
83521498490SPeter Krystad 
83621498490SPeter Krystad 	lock_sock(sock->sk);
83721498490SPeter Krystad 
83821498490SPeter Krystad 	if (how == SHUT_WR || how == SHUT_RDWR)
83921498490SPeter Krystad 		inet_sk_state_store(sock->sk, TCP_FIN_WAIT1);
84021498490SPeter Krystad 
84121498490SPeter Krystad 	how++;
84221498490SPeter Krystad 
84321498490SPeter Krystad 	if ((how & ~SHUTDOWN_MASK) || !how) {
84421498490SPeter Krystad 		ret = -EINVAL;
84521498490SPeter Krystad 		goto out_unlock;
84621498490SPeter Krystad 	}
84721498490SPeter Krystad 
84821498490SPeter Krystad 	if (sock->state == SS_CONNECTING) {
84921498490SPeter Krystad 		if ((1 << sock->sk->sk_state) &
85021498490SPeter Krystad 		    (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE))
85121498490SPeter Krystad 			sock->state = SS_DISCONNECTING;
85221498490SPeter Krystad 		else
85321498490SPeter Krystad 			sock->state = SS_CONNECTED;
85421498490SPeter Krystad 	}
85521498490SPeter Krystad 
85621498490SPeter Krystad 	mptcp_for_each_subflow(msk, subflow) {
85721498490SPeter Krystad 		struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
85821498490SPeter Krystad 
85921498490SPeter Krystad 		mptcp_subflow_shutdown(tcp_sk, how);
86021498490SPeter Krystad 	}
86121498490SPeter Krystad 
86221498490SPeter Krystad out_unlock:
86321498490SPeter Krystad 	release_sock(sock->sk);
86421498490SPeter Krystad 
86521498490SPeter Krystad 	return ret;
86621498490SPeter Krystad }
86721498490SPeter Krystad 
8682303f994SPeter Krystad static struct proto_ops mptcp_stream_ops;
8692303f994SPeter Krystad 
870f870fa0bSMat Martineau static struct inet_protosw mptcp_protosw = {
871f870fa0bSMat Martineau 	.type		= SOCK_STREAM,
872f870fa0bSMat Martineau 	.protocol	= IPPROTO_MPTCP,
873f870fa0bSMat Martineau 	.prot		= &mptcp_prot,
8742303f994SPeter Krystad 	.ops		= &mptcp_stream_ops,
8752303f994SPeter Krystad 	.flags		= INET_PROTOSW_ICSK,
876f870fa0bSMat Martineau };
877f870fa0bSMat Martineau 
878f870fa0bSMat Martineau void __init mptcp_init(void)
879f870fa0bSMat Martineau {
8802303f994SPeter Krystad 	mptcp_prot.h.hashinfo = tcp_prot.h.hashinfo;
8812303f994SPeter Krystad 	mptcp_stream_ops = inet_stream_ops;
8822303f994SPeter Krystad 	mptcp_stream_ops.bind = mptcp_bind;
8832303f994SPeter Krystad 	mptcp_stream_ops.connect = mptcp_stream_connect;
8842303f994SPeter Krystad 	mptcp_stream_ops.poll = mptcp_poll;
885cf7da0d6SPeter Krystad 	mptcp_stream_ops.accept = mptcp_stream_accept;
886cf7da0d6SPeter Krystad 	mptcp_stream_ops.getname = mptcp_v4_getname;
887cf7da0d6SPeter Krystad 	mptcp_stream_ops.listen = mptcp_listen;
88821498490SPeter Krystad 	mptcp_stream_ops.shutdown = mptcp_shutdown;
8892303f994SPeter Krystad 
8902303f994SPeter Krystad 	mptcp_subflow_init();
8912303f994SPeter Krystad 
892f870fa0bSMat Martineau 	if (proto_register(&mptcp_prot, 1) != 0)
893f870fa0bSMat Martineau 		panic("Failed to register MPTCP proto.\n");
894f870fa0bSMat Martineau 
895f870fa0bSMat Martineau 	inet_register_protosw(&mptcp_protosw);
896f870fa0bSMat Martineau }
897f870fa0bSMat Martineau 
898f870fa0bSMat Martineau #if IS_ENABLED(CONFIG_MPTCP_IPV6)
8992303f994SPeter Krystad static struct proto_ops mptcp_v6_stream_ops;
900f870fa0bSMat Martineau static struct proto mptcp_v6_prot;
901f870fa0bSMat Martineau 
90279c0949eSPeter Krystad static void mptcp_v6_destroy(struct sock *sk)
90379c0949eSPeter Krystad {
90479c0949eSPeter Krystad 	mptcp_destroy(sk);
90579c0949eSPeter Krystad 	inet6_destroy_sock(sk);
90679c0949eSPeter Krystad }
90779c0949eSPeter Krystad 
908f870fa0bSMat Martineau static struct inet_protosw mptcp_v6_protosw = {
909f870fa0bSMat Martineau 	.type		= SOCK_STREAM,
910f870fa0bSMat Martineau 	.protocol	= IPPROTO_MPTCP,
911f870fa0bSMat Martineau 	.prot		= &mptcp_v6_prot,
9122303f994SPeter Krystad 	.ops		= &mptcp_v6_stream_ops,
913f870fa0bSMat Martineau 	.flags		= INET_PROTOSW_ICSK,
914f870fa0bSMat Martineau };
915f870fa0bSMat Martineau 
916f870fa0bSMat Martineau int mptcpv6_init(void)
917f870fa0bSMat Martineau {
918f870fa0bSMat Martineau 	int err;
919f870fa0bSMat Martineau 
920f870fa0bSMat Martineau 	mptcp_v6_prot = mptcp_prot;
921f870fa0bSMat Martineau 	strcpy(mptcp_v6_prot.name, "MPTCPv6");
922f870fa0bSMat Martineau 	mptcp_v6_prot.slab = NULL;
92379c0949eSPeter Krystad 	mptcp_v6_prot.destroy = mptcp_v6_destroy;
924f870fa0bSMat Martineau 	mptcp_v6_prot.obj_size = sizeof(struct mptcp_sock) +
925f870fa0bSMat Martineau 				 sizeof(struct ipv6_pinfo);
926f870fa0bSMat Martineau 
927f870fa0bSMat Martineau 	err = proto_register(&mptcp_v6_prot, 1);
928f870fa0bSMat Martineau 	if (err)
929f870fa0bSMat Martineau 		return err;
930f870fa0bSMat Martineau 
9312303f994SPeter Krystad 	mptcp_v6_stream_ops = inet6_stream_ops;
9322303f994SPeter Krystad 	mptcp_v6_stream_ops.bind = mptcp_bind;
9332303f994SPeter Krystad 	mptcp_v6_stream_ops.connect = mptcp_stream_connect;
9342303f994SPeter Krystad 	mptcp_v6_stream_ops.poll = mptcp_poll;
935cf7da0d6SPeter Krystad 	mptcp_v6_stream_ops.accept = mptcp_stream_accept;
936cf7da0d6SPeter Krystad 	mptcp_v6_stream_ops.getname = mptcp_v6_getname;
937cf7da0d6SPeter Krystad 	mptcp_v6_stream_ops.listen = mptcp_listen;
93821498490SPeter Krystad 	mptcp_v6_stream_ops.shutdown = mptcp_shutdown;
9392303f994SPeter Krystad 
940f870fa0bSMat Martineau 	err = inet6_register_protosw(&mptcp_v6_protosw);
941f870fa0bSMat Martineau 	if (err)
942f870fa0bSMat Martineau 		proto_unregister(&mptcp_v6_prot);
943f870fa0bSMat Martineau 
944f870fa0bSMat Martineau 	return err;
945f870fa0bSMat Martineau }
946f870fa0bSMat Martineau #endif
947