xref: /linux/net/unix/unix_bpf.c (revision 16b2f264)
1c6382918SCong Wang // SPDX-License-Identifier: GPL-2.0
2c6382918SCong Wang /* Copyright (c) 2021 Cong Wang <cong.wang@bytedance.com> */
3c6382918SCong Wang 
4c6382918SCong Wang #include <linux/skmsg.h>
5c6382918SCong Wang #include <linux/bpf.h>
6c6382918SCong Wang #include <net/sock.h>
7c6382918SCong Wang #include <net/af_unix.h>
8c6382918SCong Wang 
99825d866SCong Wang #define unix_sk_has_data(__sk, __psock)					\
109825d866SCong Wang 		({	!skb_queue_empty(&__sk->sk_receive_queue) ||	\
119825d866SCong Wang 			!skb_queue_empty(&__psock->ingress_skb) ||	\
129825d866SCong Wang 			!list_empty(&__psock->ingress_msg);		\
139825d866SCong Wang 		})
149825d866SCong Wang 
unix_msg_wait_data(struct sock * sk,struct sk_psock * psock,long timeo)159825d866SCong Wang static int unix_msg_wait_data(struct sock *sk, struct sk_psock *psock,
169825d866SCong Wang 			      long timeo)
179825d866SCong Wang {
189825d866SCong Wang 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
199825d866SCong Wang 	struct unix_sock *u = unix_sk(sk);
209825d866SCong Wang 	int ret = 0;
219825d866SCong Wang 
229825d866SCong Wang 	if (sk->sk_shutdown & RCV_SHUTDOWN)
239825d866SCong Wang 		return 1;
249825d866SCong Wang 
259825d866SCong Wang 	if (!timeo)
269825d866SCong Wang 		return ret;
279825d866SCong Wang 
289825d866SCong Wang 	add_wait_queue(sk_sleep(sk), &wait);
299825d866SCong Wang 	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
309825d866SCong Wang 	if (!unix_sk_has_data(sk, psock)) {
319825d866SCong Wang 		mutex_unlock(&u->iolock);
329825d866SCong Wang 		wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
339825d866SCong Wang 		mutex_lock(&u->iolock);
349825d866SCong Wang 		ret = unix_sk_has_data(sk, psock);
359825d866SCong Wang 	}
369825d866SCong Wang 	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
379825d866SCong Wang 	remove_wait_queue(sk_sleep(sk), &wait);
389825d866SCong Wang 	return ret;
399825d866SCong Wang }
409825d866SCong Wang 
__unix_recvmsg(struct sock * sk,struct msghdr * msg,size_t len,int flags)4194531cfcSJiang Wang static int __unix_recvmsg(struct sock *sk, struct msghdr *msg,
4294531cfcSJiang Wang 			  size_t len, int flags)
4394531cfcSJiang Wang {
4494531cfcSJiang Wang 	if (sk->sk_type == SOCK_DGRAM)
4594531cfcSJiang Wang 		return __unix_dgram_recvmsg(sk, msg, len, flags);
4694531cfcSJiang Wang 	else
4794531cfcSJiang Wang 		return __unix_stream_recvmsg(sk, msg, len, flags);
4894531cfcSJiang Wang }
4994531cfcSJiang Wang 
unix_bpf_recvmsg(struct sock * sk,struct msghdr * msg,size_t len,int flags,int * addr_len)5094531cfcSJiang Wang static int unix_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
51ec095263SOliver Hartkopp 			    size_t len, int flags, int *addr_len)
529825d866SCong Wang {
539825d866SCong Wang 	struct unix_sock *u = unix_sk(sk);
549825d866SCong Wang 	struct sk_psock *psock;
550b846445SCong Wang 	int copied;
569825d866SCong Wang 
57d900f3d2SLiu Jian 	if (!len)
58d900f3d2SLiu Jian 		return 0;
59d900f3d2SLiu Jian 
609825d866SCong Wang 	psock = sk_psock_get(sk);
619825d866SCong Wang 	if (unlikely(!psock))
6294531cfcSJiang Wang 		return __unix_recvmsg(sk, msg, len, flags);
639825d866SCong Wang 
649825d866SCong Wang 	mutex_lock(&u->iolock);
659825d866SCong Wang 	if (!skb_queue_empty(&sk->sk_receive_queue) &&
669825d866SCong Wang 	    sk_psock_queue_empty(psock)) {
670b846445SCong Wang 		mutex_unlock(&u->iolock);
680b846445SCong Wang 		sk_psock_put(sk, psock);
6994531cfcSJiang Wang 		return __unix_recvmsg(sk, msg, len, flags);
709825d866SCong Wang 	}
719825d866SCong Wang 
729825d866SCong Wang msg_bytes_ready:
739825d866SCong Wang 	copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
749825d866SCong Wang 	if (!copied) {
759825d866SCong Wang 		long timeo;
769825d866SCong Wang 		int data;
779825d866SCong Wang 
78ec095263SOliver Hartkopp 		timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
799825d866SCong Wang 		data = unix_msg_wait_data(sk, psock, timeo);
809825d866SCong Wang 		if (data) {
819825d866SCong Wang 			if (!sk_psock_queue_empty(psock))
829825d866SCong Wang 				goto msg_bytes_ready;
830b846445SCong Wang 			mutex_unlock(&u->iolock);
840b846445SCong Wang 			sk_psock_put(sk, psock);
8594531cfcSJiang Wang 			return __unix_recvmsg(sk, msg, len, flags);
869825d866SCong Wang 		}
879825d866SCong Wang 		copied = -EAGAIN;
889825d866SCong Wang 	}
899825d866SCong Wang 	mutex_unlock(&u->iolock);
909825d866SCong Wang 	sk_psock_put(sk, psock);
910b846445SCong Wang 	return copied;
929825d866SCong Wang }
939825d866SCong Wang 
9494531cfcSJiang Wang static struct proto *unix_dgram_prot_saved __read_mostly;
9594531cfcSJiang Wang static DEFINE_SPINLOCK(unix_dgram_prot_lock);
9694531cfcSJiang Wang static struct proto unix_dgram_bpf_prot;
97c6382918SCong Wang 
9894531cfcSJiang Wang static struct proto *unix_stream_prot_saved __read_mostly;
9994531cfcSJiang Wang static DEFINE_SPINLOCK(unix_stream_prot_lock);
10094531cfcSJiang Wang static struct proto unix_stream_bpf_prot;
10194531cfcSJiang Wang 
unix_dgram_bpf_rebuild_protos(struct proto * prot,const struct proto * base)10294531cfcSJiang Wang static void unix_dgram_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
103c6382918SCong Wang {
104c6382918SCong Wang 	*prot        = *base;
105c6382918SCong Wang 	prot->close  = sock_map_close;
10694531cfcSJiang Wang 	prot->recvmsg = unix_bpf_recvmsg;
107af493388SCong Wang 	prot->sock_is_readable = sk_msg_is_readable;
108c6382918SCong Wang }
109c6382918SCong Wang 
unix_stream_bpf_rebuild_protos(struct proto * prot,const struct proto * base)11094531cfcSJiang Wang static void unix_stream_bpf_rebuild_protos(struct proto *prot,
11194531cfcSJiang Wang 					   const struct proto *base)
112c6382918SCong Wang {
11394531cfcSJiang Wang 	*prot        = *base;
11494531cfcSJiang Wang 	prot->close  = sock_map_close;
11594531cfcSJiang Wang 	prot->recvmsg = unix_bpf_recvmsg;
116af493388SCong Wang 	prot->sock_is_readable = sk_msg_is_readable;
11794531cfcSJiang Wang 	prot->unhash  = sock_map_unhash;
118c6382918SCong Wang }
11994531cfcSJiang Wang 
unix_dgram_bpf_check_needs_rebuild(struct proto * ops)12094531cfcSJiang Wang static void unix_dgram_bpf_check_needs_rebuild(struct proto *ops)
12194531cfcSJiang Wang {
12294531cfcSJiang Wang 	if (unlikely(ops != smp_load_acquire(&unix_dgram_prot_saved))) {
12394531cfcSJiang Wang 		spin_lock_bh(&unix_dgram_prot_lock);
12494531cfcSJiang Wang 		if (likely(ops != unix_dgram_prot_saved)) {
12594531cfcSJiang Wang 			unix_dgram_bpf_rebuild_protos(&unix_dgram_bpf_prot, ops);
12694531cfcSJiang Wang 			smp_store_release(&unix_dgram_prot_saved, ops);
12794531cfcSJiang Wang 		}
12894531cfcSJiang Wang 		spin_unlock_bh(&unix_dgram_prot_lock);
129c6382918SCong Wang 	}
130c6382918SCong Wang }
131c6382918SCong Wang 
unix_stream_bpf_check_needs_rebuild(struct proto * ops)13294531cfcSJiang Wang static void unix_stream_bpf_check_needs_rebuild(struct proto *ops)
13394531cfcSJiang Wang {
13494531cfcSJiang Wang 	if (unlikely(ops != smp_load_acquire(&unix_stream_prot_saved))) {
13594531cfcSJiang Wang 		spin_lock_bh(&unix_stream_prot_lock);
13694531cfcSJiang Wang 		if (likely(ops != unix_stream_prot_saved)) {
13794531cfcSJiang Wang 			unix_stream_bpf_rebuild_protos(&unix_stream_bpf_prot, ops);
13894531cfcSJiang Wang 			smp_store_release(&unix_stream_prot_saved, ops);
13994531cfcSJiang Wang 		}
14094531cfcSJiang Wang 		spin_unlock_bh(&unix_stream_prot_lock);
14194531cfcSJiang Wang 	}
14294531cfcSJiang Wang }
14394531cfcSJiang Wang 
unix_dgram_bpf_update_proto(struct sock * sk,struct sk_psock * psock,bool restore)14494531cfcSJiang Wang int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
145c6382918SCong Wang {
14683f31535SCong Wang 	if (sk->sk_type != SOCK_DGRAM)
14783f31535SCong Wang 		return -EOPNOTSUPP;
14883f31535SCong Wang 
149c6382918SCong Wang 	if (restore) {
150c6382918SCong Wang 		sk->sk_write_space = psock->saved_write_space;
151fee9ac06SPavel Begunkov 		sock_replace_proto(sk, psock->sk_proto);
152c6382918SCong Wang 		return 0;
153c6382918SCong Wang 	}
154c6382918SCong Wang 
15594531cfcSJiang Wang 	unix_dgram_bpf_check_needs_rebuild(psock->sk_proto);
156fee9ac06SPavel Begunkov 	sock_replace_proto(sk, &unix_dgram_bpf_prot);
15794531cfcSJiang Wang 	return 0;
15894531cfcSJiang Wang }
15994531cfcSJiang Wang 
unix_stream_bpf_update_proto(struct sock * sk,struct sk_psock * psock,bool restore)16094531cfcSJiang Wang int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
16194531cfcSJiang Wang {
1628866730aSJohn Fastabend 	struct sock *sk_pair;
1638866730aSJohn Fastabend 
164*16b2f264SJohn Fastabend 	/* Restore does not decrement the sk_pair reference yet because we must
165*16b2f264SJohn Fastabend 	 * keep the a reference to the socket until after an RCU grace period
166*16b2f264SJohn Fastabend 	 * and any pending sends have completed.
167*16b2f264SJohn Fastabend 	 */
16894531cfcSJiang Wang 	if (restore) {
16994531cfcSJiang Wang 		sk->sk_write_space = psock->saved_write_space;
170fee9ac06SPavel Begunkov 		sock_replace_proto(sk, psock->sk_proto);
17194531cfcSJiang Wang 		return 0;
17294531cfcSJiang Wang 	}
17394531cfcSJiang Wang 
174*16b2f264SJohn Fastabend 	/* psock_update_sk_prot can be called multiple times if psock is
175*16b2f264SJohn Fastabend 	 * added to multiple maps and/or slots in the same map. There is
176*16b2f264SJohn Fastabend 	 * also an edge case where replacing a psock with itself can trigger
177*16b2f264SJohn Fastabend 	 * an extra psock_update_sk_prot during the insert process. So it
178*16b2f264SJohn Fastabend 	 * must be safe to do multiple calls. Here we need to ensure we don't
179*16b2f264SJohn Fastabend 	 * increment the refcnt through sock_hold many times. There will only
180*16b2f264SJohn Fastabend 	 * be a single matching destroy operation.
181*16b2f264SJohn Fastabend 	 */
182*16b2f264SJohn Fastabend 	if (!psock->sk_pair) {
1838866730aSJohn Fastabend 		sk_pair = unix_peer(sk);
1848866730aSJohn Fastabend 		sock_hold(sk_pair);
1858866730aSJohn Fastabend 		psock->sk_pair = sk_pair;
186*16b2f264SJohn Fastabend 	}
187*16b2f264SJohn Fastabend 
18894531cfcSJiang Wang 	unix_stream_bpf_check_needs_rebuild(psock->sk_proto);
189fee9ac06SPavel Begunkov 	sock_replace_proto(sk, &unix_stream_bpf_prot);
190c6382918SCong Wang 	return 0;
191c6382918SCong Wang }
192c6382918SCong Wang 
unix_bpf_build_proto(void)193c6382918SCong Wang void __init unix_bpf_build_proto(void)
194c6382918SCong Wang {
19594531cfcSJiang Wang 	unix_dgram_bpf_rebuild_protos(&unix_dgram_bpf_prot, &unix_dgram_proto);
19694531cfcSJiang Wang 	unix_stream_bpf_rebuild_protos(&unix_stream_bpf_prot, &unix_stream_proto);
19794531cfcSJiang Wang 
198c6382918SCong Wang }
199