xref: /openbsd/sys/netinet/ip_divert.c (revision c8a0ef6c)
1 /*      $OpenBSD: ip_divert.c,v 1.95 2024/03/05 09:45:13 bluhm Exp $ */
2 
3 /*
4  * Copyright (c) 2009 Michele Marchetto <michele@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/mbuf.h>
22 #include <sys/protosw.h>
23 #include <sys/socket.h>
24 #include <sys/socketvar.h>
25 #include <sys/sysctl.h>
26 
27 #include <net/if.h>
28 #include <net/route.h>
29 #include <net/if_var.h>
30 #include <net/netisr.h>
31 
32 #include <netinet/in.h>
33 #include <netinet/in_var.h>
34 #include <netinet/ip.h>
35 #include <netinet/ip_var.h>
36 #include <netinet/in_pcb.h>
37 #include <netinet/ip_divert.h>
38 #include <netinet/tcp.h>
39 #include <netinet/udp.h>
40 #include <netinet/ip_icmp.h>
41 
42 #include <net/pfvar.h>
43 
44 struct	inpcbtable	divbtable;
45 struct	cpumem		*divcounters;
46 
47 #ifndef DIVERT_SENDSPACE
48 #define DIVERT_SENDSPACE	(65536 + 100)
49 #endif
50 u_int   divert_sendspace = DIVERT_SENDSPACE;
51 #ifndef DIVERT_RECVSPACE
52 #define DIVERT_RECVSPACE	(65536 + 100)
53 #endif
54 u_int   divert_recvspace = DIVERT_RECVSPACE;
55 
56 #ifndef DIVERTHASHSIZE
57 #define DIVERTHASHSIZE	128
58 #endif
59 
60 const struct sysctl_bounded_args divertctl_vars[] = {
61 	{ DIVERTCTL_RECVSPACE, &divert_recvspace, 0, INT_MAX },
62 	{ DIVERTCTL_SENDSPACE, &divert_sendspace, 0, INT_MAX },
63 };
64 
65 const struct pr_usrreqs divert_usrreqs = {
66 	.pru_attach	= divert_attach,
67 	.pru_detach	= divert_detach,
68 	.pru_lock	= divert_lock,
69 	.pru_unlock	= divert_unlock,
70 	.pru_locked	= divert_locked,
71 	.pru_bind	= divert_bind,
72 	.pru_shutdown	= divert_shutdown,
73 	.pru_send	= divert_send,
74 	.pru_control	= in_control,
75 	.pru_sockaddr	= in_sockaddr,
76 	.pru_peeraddr	= in_peeraddr,
77 };
78 
79 int divbhashsize = DIVERTHASHSIZE;
80 
81 int	divert_output(struct inpcb *, struct mbuf *, struct mbuf *,
82 	    struct mbuf *);
83 void
divert_init(void)84 divert_init(void)
85 {
86 	in_pcbinit(&divbtable, divbhashsize);
87 	divcounters = counters_alloc(divs_ncounters);
88 }
89 
90 int
divert_output(struct inpcb * inp,struct mbuf * m,struct mbuf * nam,struct mbuf * control)91 divert_output(struct inpcb *inp, struct mbuf *m, struct mbuf *nam,
92     struct mbuf *control)
93 {
94 	struct sockaddr_in *sin;
95 	int error, min_hdrlen, off, dir;
96 	struct ip *ip;
97 
98 	m_freem(control);
99 
100 	if ((error = in_nam2sin(nam, &sin)))
101 		goto fail;
102 
103 	if (m->m_pkthdr.len > IP_MAXPACKET) {
104 		error = EMSGSIZE;
105 		goto fail;
106 	}
107 
108 	m = rip_chkhdr(m, NULL);
109 	if (m == NULL) {
110 		error = EINVAL;
111 		goto fail;
112 	}
113 
114 	ip = mtod(m, struct ip *);
115 	off = ip->ip_hl << 2;
116 
117 	dir = (sin->sin_addr.s_addr == INADDR_ANY ? PF_OUT : PF_IN);
118 
119 	switch (ip->ip_p) {
120 	case IPPROTO_TCP:
121 		min_hdrlen = sizeof(struct tcphdr);
122 		m->m_pkthdr.csum_flags |= M_TCP_CSUM_OUT;
123 		break;
124 	case IPPROTO_UDP:
125 		min_hdrlen = sizeof(struct udphdr);
126 		m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT;
127 		break;
128 	case IPPROTO_ICMP:
129 		min_hdrlen = ICMP_MINLEN;
130 		m->m_pkthdr.csum_flags |= M_ICMP_CSUM_OUT;
131 		break;
132 	default:
133 		min_hdrlen = 0;
134 		break;
135 	}
136 	if (min_hdrlen && m->m_pkthdr.len < off + min_hdrlen) {
137 		error = EINVAL;
138 		goto fail;
139 	}
140 
141 	m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED_PACKET;
142 
143 	if (dir == PF_IN) {
144 		struct rtentry *rt;
145 		struct ifnet *ifp;
146 
147 		rt = rtalloc(sintosa(sin), 0, inp->inp_rtableid);
148 		if (!rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_LOCAL)) {
149 			rtfree(rt);
150 			error = EADDRNOTAVAIL;
151 			goto fail;
152 		}
153 		m->m_pkthdr.ph_ifidx = rt->rt_ifidx;
154 		rtfree(rt);
155 
156 		/*
157 		 * Recalculate IP and protocol checksums for the inbound packet
158 		 * since the userspace application may have modified the packet
159 		 * prior to reinjection.
160 		 */
161 		in_hdr_cksum_out(m, NULL);
162 		in_proto_cksum_out(m, NULL);
163 
164 		ifp = if_get(m->m_pkthdr.ph_ifidx);
165 		if (ifp == NULL) {
166 			error = ENETDOWN;
167 			goto fail;
168 		}
169 		ipv4_input(ifp, m);
170 		if_put(ifp);
171 	} else {
172 		m->m_pkthdr.ph_rtableid = inp->inp_rtableid;
173 
174 		error = ip_output(m, NULL, &inp->inp_route,
175 		    IP_ALLOWBROADCAST | IP_RAWOUTPUT, NULL, NULL, 0);
176 	}
177 
178 	divstat_inc(divs_opackets);
179 	return (error);
180 
181 fail:
182 	m_freem(m);
183 	divstat_inc(divs_errors);
184 	return (error);
185 }
186 
187 void
divert_packet(struct mbuf * m,int dir,u_int16_t divert_port)188 divert_packet(struct mbuf *m, int dir, u_int16_t divert_port)
189 {
190 	struct inpcb *inp = NULL;
191 	struct socket *so;
192 	struct sockaddr_in sin;
193 
194 	divstat_inc(divs_ipackets);
195 
196 	if (m->m_len < sizeof(struct ip) &&
197 	    (m = m_pullup(m, sizeof(struct ip))) == NULL) {
198 		divstat_inc(divs_errors);
199 		goto bad;
200 	}
201 
202 	mtx_enter(&divbtable.inpt_mtx);
203 	TAILQ_FOREACH(inp, &divbtable.inpt_queue, inp_queue) {
204 		if (inp->inp_lport != divert_port)
205 			continue;
206 		in_pcbref(inp);
207 		break;
208 	}
209 	mtx_leave(&divbtable.inpt_mtx);
210 	if (inp == NULL) {
211 		divstat_inc(divs_noport);
212 		goto bad;
213 	}
214 
215 	memset(&sin, 0, sizeof(sin));
216 	sin.sin_family = AF_INET;
217 	sin.sin_len = sizeof(sin);
218 
219 	if (dir == PF_IN) {
220 		struct ifaddr *ifa;
221 		struct ifnet *ifp;
222 
223 		ifp = if_get(m->m_pkthdr.ph_ifidx);
224 		if (ifp == NULL) {
225 			divstat_inc(divs_errors);
226 			goto bad;
227 		}
228 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
229 			if (ifa->ifa_addr->sa_family != AF_INET)
230 				continue;
231 			sin.sin_addr = satosin(ifa->ifa_addr)->sin_addr;
232 			break;
233 		}
234 		if_put(ifp);
235 	} else {
236 		/*
237 		 * Calculate IP and protocol checksums for outbound packet
238 		 * diverted to userland.  pf rule diverts before cksum offload.
239 		 */
240 		in_hdr_cksum_out(m, NULL);
241 		in_proto_cksum_out(m, NULL);
242 	}
243 
244 	so = inp->inp_socket;
245 	mtx_enter(&so->so_rcv.sb_mtx);
246 	if (sbappendaddr(so, &so->so_rcv, sintosa(&sin), m, NULL) == 0) {
247 		mtx_leave(&so->so_rcv.sb_mtx);
248 		divstat_inc(divs_fullsock);
249 		goto bad;
250 	}
251 	mtx_leave(&so->so_rcv.sb_mtx);
252 	sorwakeup(so);
253 
254 	in_pcbunref(inp);
255 	return;
256 
257  bad:
258 	if (inp != NULL)
259 		in_pcbunref(inp);
260 	m_freem(m);
261 }
262 
263 int
divert_attach(struct socket * so,int proto,int wait)264 divert_attach(struct socket *so, int proto, int wait)
265 {
266 	int error;
267 
268 	if (so->so_pcb != NULL)
269 		return EINVAL;
270 	if ((so->so_state & SS_PRIV) == 0)
271 		return EACCES;
272 
273 	error = in_pcballoc(so, &divbtable, wait);
274 	if (error)
275 		return error;
276 
277 	error = soreserve(so, divert_sendspace, divert_recvspace);
278 	if (error)
279 		return error;
280 
281 	sotoinpcb(so)->inp_flags |= INP_HDRINCL;
282 	return (0);
283 }
284 
285 int
divert_detach(struct socket * so)286 divert_detach(struct socket *so)
287 {
288 	struct inpcb *inp = sotoinpcb(so);
289 
290 	soassertlocked(so);
291 
292 	if (inp == NULL)
293 		return (EINVAL);
294 
295 	in_pcbdetach(inp);
296 	return (0);
297 }
298 
299 void
divert_lock(struct socket * so)300 divert_lock(struct socket *so)
301 {
302 	struct inpcb *inp = sotoinpcb(so);
303 
304 	NET_ASSERT_LOCKED();
305 	mtx_enter(&inp->inp_mtx);
306 }
307 
308 void
divert_unlock(struct socket * so)309 divert_unlock(struct socket *so)
310 {
311 	struct inpcb *inp = sotoinpcb(so);
312 
313 	NET_ASSERT_LOCKED();
314 	mtx_leave(&inp->inp_mtx);
315 }
316 
317 int
divert_locked(struct socket * so)318 divert_locked(struct socket *so)
319 {
320 	struct inpcb *inp = sotoinpcb(so);
321 
322 	return mtx_owned(&inp->inp_mtx);
323 }
324 
325 int
divert_bind(struct socket * so,struct mbuf * addr,struct proc * p)326 divert_bind(struct socket *so, struct mbuf *addr, struct proc *p)
327 {
328 	struct inpcb *inp = sotoinpcb(so);
329 
330 	soassertlocked(so);
331 	return in_pcbbind(inp, addr, p);
332 }
333 
334 int
divert_shutdown(struct socket * so)335 divert_shutdown(struct socket *so)
336 {
337 	soassertlocked(so);
338 	socantsendmore(so);
339 	return (0);
340 }
341 
342 int
divert_send(struct socket * so,struct mbuf * m,struct mbuf * addr,struct mbuf * control)343 divert_send(struct socket *so, struct mbuf *m, struct mbuf *addr,
344     struct mbuf *control)
345 {
346 	struct inpcb *inp = sotoinpcb(so);
347 
348 	soassertlocked(so);
349 	return (divert_output(inp, m, addr, control));
350 }
351 
352 int
divert_sysctl_divstat(void * oldp,size_t * oldlenp,void * newp)353 divert_sysctl_divstat(void *oldp, size_t *oldlenp, void *newp)
354 {
355 	uint64_t counters[divs_ncounters];
356 	struct divstat divstat;
357 	u_long *words = (u_long *)&divstat;
358 	int i;
359 
360 	CTASSERT(sizeof(divstat) == (nitems(counters) * sizeof(u_long)));
361 	memset(&divstat, 0, sizeof divstat);
362 	counters_read(divcounters, counters, nitems(counters), NULL);
363 
364 	for (i = 0; i < nitems(counters); i++)
365 		words[i] = (u_long)counters[i];
366 
367 	return (sysctl_rdstruct(oldp, oldlenp, newp,
368 	    &divstat, sizeof(divstat)));
369 }
370 
371 /*
372  * Sysctl for divert variables.
373  */
374 int
divert_sysctl(int * name,u_int namelen,void * oldp,size_t * oldlenp,void * newp,size_t newlen)375 divert_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
376     size_t newlen)
377 {
378 	int error;
379 
380 	/* All sysctl names at this level are terminal. */
381 	if (namelen != 1)
382 		return (ENOTDIR);
383 
384 	switch (name[0]) {
385 	case DIVERTCTL_STATS:
386 		return (divert_sysctl_divstat(oldp, oldlenp, newp));
387 	default:
388 		NET_LOCK();
389 		error = sysctl_bounded_arr(divertctl_vars,
390 		    nitems(divertctl_vars), name, namelen, oldp, oldlenp, newp,
391 		    newlen);
392 		NET_UNLOCK();
393 		return (error);
394 	}
395 	/* NOTREACHED */
396 }
397