xref: /openbsd/sys/netinet6/ip6_divert.c (revision 3370674d)
1 /*      $OpenBSD: ip6_divert.c,v 1.98 2025/01/23 12:51:51 bluhm Exp $ */
2 
3 /*
4  * Copyright (c) 2009 Michele Marchetto <michele@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/mbuf.h>
22 #include <sys/protosw.h>
23 #include <sys/socket.h>
24 #include <sys/socketvar.h>
25 #include <sys/sysctl.h>
26 
27 #include <net/if.h>
28 #include <net/route.h>
29 #include <net/if_var.h>
30 #include <net/netisr.h>
31 
32 #include <netinet/in.h>
33 #include <netinet6/in6_var.h>
34 #include <netinet/ip.h>
35 #include <netinet/ip_var.h>
36 #include <netinet/ip6.h>
37 #include <netinet6/ip6_var.h>
38 #include <netinet/in_pcb.h>
39 #include <netinet/ip_divert.h>
40 #include <netinet6/ip6_divert.h>
41 #include <netinet/tcp.h>
42 #include <netinet/udp.h>
43 #include <netinet/icmp6.h>
44 
45 #include <net/pfvar.h>
46 
47 /*
48  * Locks used to protect data:
49  *	a	atomic
50  */
51 
52 struct	inpcbtable	divb6table;
53 struct	cpumem		*div6counters;
54 
55 #ifndef DIVERT_SENDSPACE
56 #define DIVERT_SENDSPACE	(65536 + 100)
57 #endif
58 u_int   divert6_sendspace = DIVERT_SENDSPACE;	/* [a] */
59 #ifndef DIVERT_RECVSPACE
60 #define DIVERT_RECVSPACE	(65536 + 100)
61 #endif
62 u_int   divert6_recvspace = DIVERT_RECVSPACE;	/* [a] */
63 
64 #ifndef DIVERTHASHSIZE
65 #define DIVERTHASHSIZE	128
66 #endif
67 
68 const struct sysctl_bounded_args divert6ctl_vars[] = {
69 	{ DIVERT6CTL_RECVSPACE, &divert6_recvspace, 0, INT_MAX },
70 	{ DIVERT6CTL_SENDSPACE, &divert6_sendspace, 0, INT_MAX },
71 };
72 
73 const struct pr_usrreqs divert6_usrreqs = {
74 	.pru_attach	= divert6_attach,
75 	.pru_detach	= divert_detach,
76 	.pru_bind	= divert_bind,
77 	.pru_shutdown	= divert_shutdown,
78 	.pru_send	= divert6_send,
79 	.pru_control	= in6_control,
80 	.pru_sockaddr	= in6_sockaddr,
81 	.pru_peeraddr	= in6_peeraddr,
82 };
83 
84 int divb6hashsize = DIVERTHASHSIZE;
85 
86 int	divert6_output(struct inpcb *, struct mbuf *, struct mbuf *,
87 	    struct mbuf *);
88 
89 void
90 divert6_init(void)
91 {
92 	in_pcbinit(&divb6table, divb6hashsize);
93 	div6counters = counters_alloc(div6s_ncounters);
94 }
95 
96 int
97 divert6_output(struct inpcb *inp, struct mbuf *m, struct mbuf *nam,
98     struct mbuf *control)
99 {
100 	struct sockaddr_in6 *sin6;
101 	int error, min_hdrlen, nxt, off, dir;
102 	struct ip6_hdr *ip6;
103 
104 	m_freem(control);
105 
106 	if ((error = in6_nam2sin6(nam, &sin6)))
107 		goto fail;
108 
109 	/* Do basic sanity checks. */
110 	if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
111 		goto fail;
112 	if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
113 		/* m_pullup() has freed the mbuf, so just return. */
114 		div6stat_inc(div6s_errors);
115 		return (ENOBUFS);
116 	}
117 	ip6 = mtod(m, struct ip6_hdr *);
118 	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION)
119 		goto fail;
120 	if (m->m_pkthdr.len < sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen))
121 		goto fail;
122 
123 	/*
124 	 * Recalculate the protocol checksum since the userspace application
125 	 * may have modified the packet prior to reinjection.
126 	 */
127 	off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
128 	if (off < sizeof(struct ip6_hdr))
129 		goto fail;
130 
131 	dir = (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ? PF_OUT : PF_IN);
132 
133 	switch (nxt) {
134 	case IPPROTO_TCP:
135 		min_hdrlen = sizeof(struct tcphdr);
136 		m->m_pkthdr.csum_flags |= M_TCP_CSUM_OUT;
137 		break;
138 	case IPPROTO_UDP:
139 		min_hdrlen = sizeof(struct udphdr);
140 		m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT;
141 		break;
142 	case IPPROTO_ICMPV6:
143 		min_hdrlen = sizeof(struct icmp6_hdr);
144 		m->m_pkthdr.csum_flags |= M_ICMP_CSUM_OUT;
145 		break;
146 	default:
147 		min_hdrlen = 0;
148 		break;
149 	}
150 	if (min_hdrlen && m->m_pkthdr.len < off + min_hdrlen)
151 		goto fail;
152 
153 	m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED_PACKET;
154 
155 	if (dir == PF_IN) {
156 		struct rtentry *rt;
157 		struct ifnet *ifp;
158 
159 		rt = rtalloc(sin6tosa(sin6), 0, inp->inp_rtableid);
160 		if (!rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_LOCAL)) {
161 			rtfree(rt);
162 			error = EADDRNOTAVAIL;
163 			goto fail;
164 		}
165 		m->m_pkthdr.ph_ifidx = rt->rt_ifidx;
166 		rtfree(rt);
167 
168 		/*
169 		 * Recalculate the protocol checksum for the inbound packet
170 		 * since the userspace application may have modified the packet
171 		 * prior to reinjection.
172 		 */
173 		in6_proto_cksum_out(m, NULL);
174 
175 		ifp = if_get(m->m_pkthdr.ph_ifidx);
176 		if (ifp == NULL) {
177 			error = ENETDOWN;
178 			goto fail;
179 		}
180 		ipv6_input(ifp, m);
181 		if_put(ifp);
182 	} else {
183 		m->m_pkthdr.ph_rtableid = inp->inp_rtableid;
184 
185 		error = ip6_output(m, NULL, &inp->inp_route,
186 		    IP_ALLOWBROADCAST | IP_RAWOUTPUT, NULL, NULL);
187 	}
188 
189 	div6stat_inc(div6s_opackets);
190 	return (error);
191 
192 fail:
193 	div6stat_inc(div6s_errors);
194 	m_freem(m);
195 	return (error ? error : EINVAL);
196 }
197 
198 void
199 divert6_packet(struct mbuf *m, int dir, u_int16_t divert_port)
200 {
201 	struct inpcb *inp = NULL;
202 	struct socket *so;
203 	struct sockaddr_in6 sin6;
204 
205 	div6stat_inc(div6s_ipackets);
206 
207 	if (m->m_len < sizeof(struct ip6_hdr) &&
208 	    (m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
209 		div6stat_inc(div6s_errors);
210 		goto bad;
211 	}
212 
213 	mtx_enter(&divb6table.inpt_mtx);
214 	TAILQ_FOREACH(inp, &divb6table.inpt_queue, inp_queue) {
215 		if (inp->inp_lport != divert_port)
216 			continue;
217 		in_pcbref(inp);
218 		break;
219 	}
220 	mtx_leave(&divb6table.inpt_mtx);
221 	if (inp == NULL) {
222 		div6stat_inc(div6s_noport);
223 		goto bad;
224 	}
225 
226 	memset(&sin6, 0, sizeof(sin6));
227 	sin6.sin6_family = AF_INET6;
228 	sin6.sin6_len = sizeof(sin6);
229 
230 	if (dir == PF_IN) {
231 		struct ifaddr *ifa;
232 		struct ifnet *ifp;
233 
234 		ifp = if_get(m->m_pkthdr.ph_ifidx);
235 		if (ifp == NULL) {
236 			div6stat_inc(div6s_errors);
237 			goto bad;
238 		}
239 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
240 			if (ifa->ifa_addr->sa_family != AF_INET6)
241 				continue;
242 			sin6.sin6_addr = satosin6(ifa->ifa_addr)->sin6_addr;
243 			break;
244 		}
245 		if_put(ifp);
246 	} else {
247 		/*
248 		 * Calculate protocol checksum for outbound packet diverted
249 		 * to userland.  pf out rule diverts before cksum offload.
250 		 */
251 		in6_proto_cksum_out(m, NULL);
252 	}
253 
254 	so = inp->inp_socket;
255 	mtx_enter(&so->so_rcv.sb_mtx);
256 	if (sbappendaddr(so, &so->so_rcv, sin6tosa(&sin6), m, NULL) == 0) {
257 		mtx_leave(&so->so_rcv.sb_mtx);
258 		div6stat_inc(div6s_fullsock);
259 		goto bad;
260 	}
261 	mtx_leave(&so->so_rcv.sb_mtx);
262 	sorwakeup(so);
263 
264 	in_pcbunref(inp);
265 	return;
266 
267  bad:
268 	if (inp != NULL)
269 		in_pcbunref(inp);
270 	m_freem(m);
271 }
272 
273 int
274 divert6_attach(struct socket *so, int proto, int wait)
275 {
276 	int error;
277 
278 	if (so->so_pcb != NULL)
279 		return EINVAL;
280 	if ((so->so_state & SS_PRIV) == 0)
281 		return EACCES;
282 
283 	error = soreserve(so, atomic_load_int(&divert6_sendspace),
284 	    atomic_load_int(&divert6_recvspace));
285 	if (error)
286 		return (error);
287 	error = in_pcballoc(so, &divb6table, wait);
288 	if (error)
289 		return (error);
290 
291 	return (0);
292 }
293 
294 int
295 divert6_send(struct socket *so, struct mbuf *m, struct mbuf *addr,
296     struct mbuf *control)
297 {
298 	struct inpcb *inp = sotoinpcb(so);
299 
300 	soassertlocked(so);
301 	return (divert6_output(inp, m, addr, control));
302 }
303 
304 int
305 divert6_sysctl_div6stat(void *oldp, size_t *oldlenp, void *newp)
306 {
307 	uint64_t counters[div6s_ncounters];
308 	struct div6stat div6stat;
309 	u_long *words = (u_long *)&div6stat;
310 	int i;
311 
312 	CTASSERT(sizeof(div6stat) == (nitems(counters) * sizeof(u_long)));
313 
314 	counters_read(div6counters, counters, nitems(counters), NULL);
315 
316 	for (i = 0; i < nitems(counters); i++)
317 		words[i] = (u_long)counters[i];
318 
319 	return (sysctl_rdstruct(oldp, oldlenp, newp,
320 	    &div6stat, sizeof(div6stat)));
321 }
322 
323 /*
324  * Sysctl for divert variables.
325  */
326 int
327 divert6_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
328     void *newp, size_t newlen)
329 {
330 	/* All sysctl names at this level are terminal. */
331 	if (namelen != 1)
332 		return (ENOTDIR);
333 
334 	switch (name[0]) {
335 	case DIVERT6CTL_STATS:
336 		return (divert6_sysctl_div6stat(oldp, oldlenp, newp));
337 	default:
338 		return (sysctl_bounded_arr(divert6ctl_vars,
339 		    nitems(divert6ctl_vars), name, namelen, oldp, oldlenp,
340 		    newp, newlen));
341 	}
342 	/* NOTREACHED */
343 }
344