1c6382918SCong Wang // SPDX-License-Identifier: GPL-2.0
2c6382918SCong Wang /* Copyright (c) 2021 Cong Wang <cong.wang@bytedance.com> */
3c6382918SCong Wang
4c6382918SCong Wang #include <linux/skmsg.h>
5c6382918SCong Wang #include <linux/bpf.h>
6c6382918SCong Wang #include <net/sock.h>
7c6382918SCong Wang #include <net/af_unix.h>
8c6382918SCong Wang
99825d866SCong Wang #define unix_sk_has_data(__sk, __psock) \
109825d866SCong Wang ({ !skb_queue_empty(&__sk->sk_receive_queue) || \
119825d866SCong Wang !skb_queue_empty(&__psock->ingress_skb) || \
129825d866SCong Wang !list_empty(&__psock->ingress_msg); \
139825d866SCong Wang })
149825d866SCong Wang
unix_msg_wait_data(struct sock * sk,struct sk_psock * psock,long timeo)159825d866SCong Wang static int unix_msg_wait_data(struct sock *sk, struct sk_psock *psock,
169825d866SCong Wang long timeo)
179825d866SCong Wang {
189825d866SCong Wang DEFINE_WAIT_FUNC(wait, woken_wake_function);
199825d866SCong Wang struct unix_sock *u = unix_sk(sk);
209825d866SCong Wang int ret = 0;
219825d866SCong Wang
229825d866SCong Wang if (sk->sk_shutdown & RCV_SHUTDOWN)
239825d866SCong Wang return 1;
249825d866SCong Wang
259825d866SCong Wang if (!timeo)
269825d866SCong Wang return ret;
279825d866SCong Wang
289825d866SCong Wang add_wait_queue(sk_sleep(sk), &wait);
299825d866SCong Wang sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
309825d866SCong Wang if (!unix_sk_has_data(sk, psock)) {
319825d866SCong Wang mutex_unlock(&u->iolock);
329825d866SCong Wang wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
339825d866SCong Wang mutex_lock(&u->iolock);
349825d866SCong Wang ret = unix_sk_has_data(sk, psock);
359825d866SCong Wang }
369825d866SCong Wang sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
379825d866SCong Wang remove_wait_queue(sk_sleep(sk), &wait);
389825d866SCong Wang return ret;
399825d866SCong Wang }
409825d866SCong Wang
__unix_recvmsg(struct sock * sk,struct msghdr * msg,size_t len,int flags)4194531cfcSJiang Wang static int __unix_recvmsg(struct sock *sk, struct msghdr *msg,
4294531cfcSJiang Wang size_t len, int flags)
4394531cfcSJiang Wang {
4494531cfcSJiang Wang if (sk->sk_type == SOCK_DGRAM)
4594531cfcSJiang Wang return __unix_dgram_recvmsg(sk, msg, len, flags);
4694531cfcSJiang Wang else
4794531cfcSJiang Wang return __unix_stream_recvmsg(sk, msg, len, flags);
4894531cfcSJiang Wang }
4994531cfcSJiang Wang
unix_bpf_recvmsg(struct sock * sk,struct msghdr * msg,size_t len,int flags,int * addr_len)5094531cfcSJiang Wang static int unix_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
51ec095263SOliver Hartkopp size_t len, int flags, int *addr_len)
529825d866SCong Wang {
539825d866SCong Wang struct unix_sock *u = unix_sk(sk);
549825d866SCong Wang struct sk_psock *psock;
550b846445SCong Wang int copied;
569825d866SCong Wang
57d900f3d2SLiu Jian if (!len)
58d900f3d2SLiu Jian return 0;
59d900f3d2SLiu Jian
609825d866SCong Wang psock = sk_psock_get(sk);
619825d866SCong Wang if (unlikely(!psock))
6294531cfcSJiang Wang return __unix_recvmsg(sk, msg, len, flags);
639825d866SCong Wang
649825d866SCong Wang mutex_lock(&u->iolock);
659825d866SCong Wang if (!skb_queue_empty(&sk->sk_receive_queue) &&
669825d866SCong Wang sk_psock_queue_empty(psock)) {
670b846445SCong Wang mutex_unlock(&u->iolock);
680b846445SCong Wang sk_psock_put(sk, psock);
6994531cfcSJiang Wang return __unix_recvmsg(sk, msg, len, flags);
709825d866SCong Wang }
719825d866SCong Wang
729825d866SCong Wang msg_bytes_ready:
739825d866SCong Wang copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
749825d866SCong Wang if (!copied) {
759825d866SCong Wang long timeo;
769825d866SCong Wang int data;
779825d866SCong Wang
78ec095263SOliver Hartkopp timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
799825d866SCong Wang data = unix_msg_wait_data(sk, psock, timeo);
809825d866SCong Wang if (data) {
819825d866SCong Wang if (!sk_psock_queue_empty(psock))
829825d866SCong Wang goto msg_bytes_ready;
830b846445SCong Wang mutex_unlock(&u->iolock);
840b846445SCong Wang sk_psock_put(sk, psock);
8594531cfcSJiang Wang return __unix_recvmsg(sk, msg, len, flags);
869825d866SCong Wang }
879825d866SCong Wang copied = -EAGAIN;
889825d866SCong Wang }
899825d866SCong Wang mutex_unlock(&u->iolock);
909825d866SCong Wang sk_psock_put(sk, psock);
910b846445SCong Wang return copied;
929825d866SCong Wang }
939825d866SCong Wang
9494531cfcSJiang Wang static struct proto *unix_dgram_prot_saved __read_mostly;
9594531cfcSJiang Wang static DEFINE_SPINLOCK(unix_dgram_prot_lock);
9694531cfcSJiang Wang static struct proto unix_dgram_bpf_prot;
97c6382918SCong Wang
9894531cfcSJiang Wang static struct proto *unix_stream_prot_saved __read_mostly;
9994531cfcSJiang Wang static DEFINE_SPINLOCK(unix_stream_prot_lock);
10094531cfcSJiang Wang static struct proto unix_stream_bpf_prot;
10194531cfcSJiang Wang
unix_dgram_bpf_rebuild_protos(struct proto * prot,const struct proto * base)10294531cfcSJiang Wang static void unix_dgram_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
103c6382918SCong Wang {
104c6382918SCong Wang *prot = *base;
105c6382918SCong Wang prot->close = sock_map_close;
10694531cfcSJiang Wang prot->recvmsg = unix_bpf_recvmsg;
107af493388SCong Wang prot->sock_is_readable = sk_msg_is_readable;
108c6382918SCong Wang }
109c6382918SCong Wang
unix_stream_bpf_rebuild_protos(struct proto * prot,const struct proto * base)11094531cfcSJiang Wang static void unix_stream_bpf_rebuild_protos(struct proto *prot,
11194531cfcSJiang Wang const struct proto *base)
112c6382918SCong Wang {
11394531cfcSJiang Wang *prot = *base;
11494531cfcSJiang Wang prot->close = sock_map_close;
11594531cfcSJiang Wang prot->recvmsg = unix_bpf_recvmsg;
116af493388SCong Wang prot->sock_is_readable = sk_msg_is_readable;
11794531cfcSJiang Wang prot->unhash = sock_map_unhash;
118c6382918SCong Wang }
11994531cfcSJiang Wang
unix_dgram_bpf_check_needs_rebuild(struct proto * ops)12094531cfcSJiang Wang static void unix_dgram_bpf_check_needs_rebuild(struct proto *ops)
12194531cfcSJiang Wang {
12294531cfcSJiang Wang if (unlikely(ops != smp_load_acquire(&unix_dgram_prot_saved))) {
12394531cfcSJiang Wang spin_lock_bh(&unix_dgram_prot_lock);
12494531cfcSJiang Wang if (likely(ops != unix_dgram_prot_saved)) {
12594531cfcSJiang Wang unix_dgram_bpf_rebuild_protos(&unix_dgram_bpf_prot, ops);
12694531cfcSJiang Wang smp_store_release(&unix_dgram_prot_saved, ops);
12794531cfcSJiang Wang }
12894531cfcSJiang Wang spin_unlock_bh(&unix_dgram_prot_lock);
129c6382918SCong Wang }
130c6382918SCong Wang }
131c6382918SCong Wang
unix_stream_bpf_check_needs_rebuild(struct proto * ops)13294531cfcSJiang Wang static void unix_stream_bpf_check_needs_rebuild(struct proto *ops)
13394531cfcSJiang Wang {
13494531cfcSJiang Wang if (unlikely(ops != smp_load_acquire(&unix_stream_prot_saved))) {
13594531cfcSJiang Wang spin_lock_bh(&unix_stream_prot_lock);
13694531cfcSJiang Wang if (likely(ops != unix_stream_prot_saved)) {
13794531cfcSJiang Wang unix_stream_bpf_rebuild_protos(&unix_stream_bpf_prot, ops);
13894531cfcSJiang Wang smp_store_release(&unix_stream_prot_saved, ops);
13994531cfcSJiang Wang }
14094531cfcSJiang Wang spin_unlock_bh(&unix_stream_prot_lock);
14194531cfcSJiang Wang }
14294531cfcSJiang Wang }
14394531cfcSJiang Wang
unix_dgram_bpf_update_proto(struct sock * sk,struct sk_psock * psock,bool restore)14494531cfcSJiang Wang int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
145c6382918SCong Wang {
14683f31535SCong Wang if (sk->sk_type != SOCK_DGRAM)
14783f31535SCong Wang return -EOPNOTSUPP;
14883f31535SCong Wang
149c6382918SCong Wang if (restore) {
150c6382918SCong Wang sk->sk_write_space = psock->saved_write_space;
151fee9ac06SPavel Begunkov sock_replace_proto(sk, psock->sk_proto);
152c6382918SCong Wang return 0;
153c6382918SCong Wang }
154c6382918SCong Wang
15594531cfcSJiang Wang unix_dgram_bpf_check_needs_rebuild(psock->sk_proto);
156fee9ac06SPavel Begunkov sock_replace_proto(sk, &unix_dgram_bpf_prot);
15794531cfcSJiang Wang return 0;
15894531cfcSJiang Wang }
15994531cfcSJiang Wang
unix_stream_bpf_update_proto(struct sock * sk,struct sk_psock * psock,bool restore)16094531cfcSJiang Wang int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
16194531cfcSJiang Wang {
1628866730aSJohn Fastabend struct sock *sk_pair;
1638866730aSJohn Fastabend
164*16b2f264SJohn Fastabend /* Restore does not decrement the sk_pair reference yet because we must
165*16b2f264SJohn Fastabend * keep the a reference to the socket until after an RCU grace period
166*16b2f264SJohn Fastabend * and any pending sends have completed.
167*16b2f264SJohn Fastabend */
16894531cfcSJiang Wang if (restore) {
16994531cfcSJiang Wang sk->sk_write_space = psock->saved_write_space;
170fee9ac06SPavel Begunkov sock_replace_proto(sk, psock->sk_proto);
17194531cfcSJiang Wang return 0;
17294531cfcSJiang Wang }
17394531cfcSJiang Wang
174*16b2f264SJohn Fastabend /* psock_update_sk_prot can be called multiple times if psock is
175*16b2f264SJohn Fastabend * added to multiple maps and/or slots in the same map. There is
176*16b2f264SJohn Fastabend * also an edge case where replacing a psock with itself can trigger
177*16b2f264SJohn Fastabend * an extra psock_update_sk_prot during the insert process. So it
178*16b2f264SJohn Fastabend * must be safe to do multiple calls. Here we need to ensure we don't
179*16b2f264SJohn Fastabend * increment the refcnt through sock_hold many times. There will only
180*16b2f264SJohn Fastabend * be a single matching destroy operation.
181*16b2f264SJohn Fastabend */
182*16b2f264SJohn Fastabend if (!psock->sk_pair) {
1838866730aSJohn Fastabend sk_pair = unix_peer(sk);
1848866730aSJohn Fastabend sock_hold(sk_pair);
1858866730aSJohn Fastabend psock->sk_pair = sk_pair;
186*16b2f264SJohn Fastabend }
187*16b2f264SJohn Fastabend
18894531cfcSJiang Wang unix_stream_bpf_check_needs_rebuild(psock->sk_proto);
189fee9ac06SPavel Begunkov sock_replace_proto(sk, &unix_stream_bpf_prot);
190c6382918SCong Wang return 0;
191c6382918SCong Wang }
192c6382918SCong Wang
unix_bpf_build_proto(void)193c6382918SCong Wang void __init unix_bpf_build_proto(void)
194c6382918SCong Wang {
19594531cfcSJiang Wang unix_dgram_bpf_rebuild_protos(&unix_dgram_bpf_prot, &unix_dgram_proto);
19694531cfcSJiang Wang unix_stream_bpf_rebuild_protos(&unix_stream_bpf_prot, &unix_stream_proto);
19794531cfcSJiang Wang
198c6382918SCong Wang }
199