xref: /openbsd/sys/netinet6/ip6_divert.c (revision 5dea098c)
1 /*      $OpenBSD: ip6_divert.c,v 1.95 2024/02/13 12:22:09 bluhm Exp $ */
2 
3 /*
4  * Copyright (c) 2009 Michele Marchetto <michele@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/mbuf.h>
22 #include <sys/protosw.h>
23 #include <sys/socket.h>
24 #include <sys/socketvar.h>
25 #include <sys/sysctl.h>
26 
27 #include <net/if.h>
28 #include <net/route.h>
29 #include <net/if_var.h>
30 #include <net/netisr.h>
31 
32 #include <netinet/in.h>
33 #include <netinet6/in6_var.h>
34 #include <netinet/ip.h>
35 #include <netinet/ip_var.h>
36 #include <netinet/ip6.h>
37 #include <netinet6/ip6_var.h>
38 #include <netinet/in_pcb.h>
39 #include <netinet/ip_divert.h>
40 #include <netinet6/ip6_divert.h>
41 #include <netinet/tcp.h>
42 #include <netinet/udp.h>
43 #include <netinet/icmp6.h>
44 
45 #include <net/pfvar.h>
46 
47 struct	inpcbtable	divb6table;
48 struct	cpumem		*div6counters;
49 
50 #ifndef DIVERT_SENDSPACE
51 #define DIVERT_SENDSPACE	(65536 + 100)
52 #endif
53 u_int   divert6_sendspace = DIVERT_SENDSPACE;
54 #ifndef DIVERT_RECVSPACE
55 #define DIVERT_RECVSPACE	(65536 + 100)
56 #endif
57 u_int   divert6_recvspace = DIVERT_RECVSPACE;
58 
59 #ifndef DIVERTHASHSIZE
60 #define DIVERTHASHSIZE	128
61 #endif
62 
63 const struct sysctl_bounded_args divert6ctl_vars[] = {
64 	{ DIVERT6CTL_RECVSPACE, &divert6_recvspace, 0, INT_MAX },
65 	{ DIVERT6CTL_SENDSPACE, &divert6_sendspace, 0, INT_MAX },
66 };
67 
68 const struct pr_usrreqs divert6_usrreqs = {
69 	.pru_attach	= divert6_attach,
70 	.pru_detach	= divert_detach,
71 	.pru_lock	= divert_lock,
72 	.pru_unlock	= divert_unlock,
73 	.pru_locked	= divert_locked,
74 	.pru_bind	= divert_bind,
75 	.pru_shutdown	= divert_shutdown,
76 	.pru_send	= divert6_send,
77 	.pru_control	= in6_control,
78 	.pru_sockaddr	= in6_sockaddr,
79 	.pru_peeraddr	= in6_peeraddr,
80 };
81 
82 int divb6hashsize = DIVERTHASHSIZE;
83 
84 int	divert6_output(struct inpcb *, struct mbuf *, struct mbuf *,
85 	    struct mbuf *);
86 
87 void
88 divert6_init(void)
89 {
90 	in_pcbinit(&divb6table, divb6hashsize);
91 	div6counters = counters_alloc(div6s_ncounters);
92 }
93 
94 int
95 divert6_output(struct inpcb *inp, struct mbuf *m, struct mbuf *nam,
96     struct mbuf *control)
97 {
98 	struct sockaddr_in6 *sin6;
99 	int error, min_hdrlen, nxt, off, dir;
100 	struct ip6_hdr *ip6;
101 
102 	m_freem(control);
103 
104 	if ((error = in6_nam2sin6(nam, &sin6)))
105 		goto fail;
106 
107 	/* Do basic sanity checks. */
108 	if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
109 		goto fail;
110 	if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
111 		/* m_pullup() has freed the mbuf, so just return. */
112 		div6stat_inc(div6s_errors);
113 		return (ENOBUFS);
114 	}
115 	ip6 = mtod(m, struct ip6_hdr *);
116 	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION)
117 		goto fail;
118 	if (m->m_pkthdr.len < sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen))
119 		goto fail;
120 
121 	/*
122 	 * Recalculate the protocol checksum since the userspace application
123 	 * may have modified the packet prior to reinjection.
124 	 */
125 	off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
126 	if (off < sizeof(struct ip6_hdr))
127 		goto fail;
128 
129 	dir = (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ? PF_OUT : PF_IN);
130 
131 	switch (nxt) {
132 	case IPPROTO_TCP:
133 		min_hdrlen = sizeof(struct tcphdr);
134 		m->m_pkthdr.csum_flags |= M_TCP_CSUM_OUT;
135 		break;
136 	case IPPROTO_UDP:
137 		min_hdrlen = sizeof(struct udphdr);
138 		m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT;
139 		break;
140 	case IPPROTO_ICMPV6:
141 		min_hdrlen = sizeof(struct icmp6_hdr);
142 		m->m_pkthdr.csum_flags |= M_ICMP_CSUM_OUT;
143 		break;
144 	default:
145 		min_hdrlen = 0;
146 		break;
147 	}
148 	if (min_hdrlen && m->m_pkthdr.len < off + min_hdrlen)
149 		goto fail;
150 
151 	m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED_PACKET;
152 
153 	if (dir == PF_IN) {
154 		struct rtentry *rt;
155 		struct ifnet *ifp;
156 
157 		rt = rtalloc(sin6tosa(sin6), 0, inp->inp_rtableid);
158 		if (!rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_LOCAL)) {
159 			rtfree(rt);
160 			error = EADDRNOTAVAIL;
161 			goto fail;
162 		}
163 		m->m_pkthdr.ph_ifidx = rt->rt_ifidx;
164 		rtfree(rt);
165 
166 		/*
167 		 * Recalculate the protocol checksum for the inbound packet
168 		 * since the userspace application may have modified the packet
169 		 * prior to reinjection.
170 		 */
171 		in6_proto_cksum_out(m, NULL);
172 
173 		ifp = if_get(m->m_pkthdr.ph_ifidx);
174 		if (ifp == NULL) {
175 			error = ENETDOWN;
176 			goto fail;
177 		}
178 		ipv6_input(ifp, m);
179 		if_put(ifp);
180 	} else {
181 		m->m_pkthdr.ph_rtableid = inp->inp_rtableid;
182 
183 		error = ip6_output(m, NULL, &inp->inp_route,
184 		    IP_ALLOWBROADCAST | IP_RAWOUTPUT, NULL, NULL);
185 	}
186 
187 	div6stat_inc(div6s_opackets);
188 	return (error);
189 
190 fail:
191 	div6stat_inc(div6s_errors);
192 	m_freem(m);
193 	return (error ? error : EINVAL);
194 }
195 
196 void
197 divert6_packet(struct mbuf *m, int dir, u_int16_t divert_port)
198 {
199 	struct inpcb *inp = NULL;
200 	struct socket *so;
201 	struct sockaddr_in6 sin6;
202 
203 	div6stat_inc(div6s_ipackets);
204 
205 	if (m->m_len < sizeof(struct ip6_hdr) &&
206 	    (m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
207 		div6stat_inc(div6s_errors);
208 		goto bad;
209 	}
210 
211 	mtx_enter(&divb6table.inpt_mtx);
212 	TAILQ_FOREACH(inp, &divb6table.inpt_queue, inp_queue) {
213 		if (inp->inp_lport != divert_port)
214 			continue;
215 		in_pcbref(inp);
216 		break;
217 	}
218 	mtx_leave(&divb6table.inpt_mtx);
219 	if (inp == NULL) {
220 		div6stat_inc(div6s_noport);
221 		goto bad;
222 	}
223 
224 	memset(&sin6, 0, sizeof(sin6));
225 	sin6.sin6_family = AF_INET6;
226 	sin6.sin6_len = sizeof(sin6);
227 
228 	if (dir == PF_IN) {
229 		struct ifaddr *ifa;
230 		struct ifnet *ifp;
231 
232 		ifp = if_get(m->m_pkthdr.ph_ifidx);
233 		if (ifp == NULL) {
234 			div6stat_inc(div6s_errors);
235 			goto bad;
236 		}
237 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
238 			if (ifa->ifa_addr->sa_family != AF_INET6)
239 				continue;
240 			sin6.sin6_addr = satosin6(ifa->ifa_addr)->sin6_addr;
241 			break;
242 		}
243 		if_put(ifp);
244 	} else {
245 		/*
246 		 * Calculate protocol checksum for outbound packet diverted
247 		 * to userland.  pf out rule diverts before cksum offload.
248 		 */
249 		in6_proto_cksum_out(m, NULL);
250 	}
251 
252 	so = inp->inp_socket;
253 	mtx_enter(&so->so_rcv.sb_mtx);
254 	if (sbappendaddr(so, &so->so_rcv, sin6tosa(&sin6), m, NULL) == 0) {
255 		mtx_leave(&so->so_rcv.sb_mtx);
256 		div6stat_inc(div6s_fullsock);
257 		goto bad;
258 	}
259 	mtx_leave(&so->so_rcv.sb_mtx);
260 	sorwakeup(so);
261 
262 	in_pcbunref(inp);
263 	return;
264 
265  bad:
266 	if (inp != NULL)
267 		in_pcbunref(inp);
268 	m_freem(m);
269 }
270 
271 int
272 divert6_attach(struct socket *so, int proto, int wait)
273 {
274 	int error;
275 
276 	if (so->so_pcb != NULL)
277 		return EINVAL;
278 	if ((so->so_state & SS_PRIV) == 0)
279 		return EACCES;
280 
281 	error = in_pcballoc(so, &divb6table, wait);
282 	if (error)
283 		return (error);
284 
285 	error = soreserve(so, divert6_sendspace, divert6_recvspace);
286 	if (error)
287 		return (error);
288 
289 	return (0);
290 }
291 
292 int
293 divert6_send(struct socket *so, struct mbuf *m, struct mbuf *addr,
294     struct mbuf *control)
295 {
296 	struct inpcb *inp = sotoinpcb(so);
297 
298 	soassertlocked(so);
299 	return (divert6_output(inp, m, addr, control));
300 }
301 
302 int
303 divert6_sysctl_div6stat(void *oldp, size_t *oldlenp, void *newp)
304 {
305 	uint64_t counters[div6s_ncounters];
306 	struct div6stat div6stat;
307 	u_long *words = (u_long *)&div6stat;
308 	int i;
309 
310 	CTASSERT(sizeof(div6stat) == (nitems(counters) * sizeof(u_long)));
311 
312 	counters_read(div6counters, counters, nitems(counters), NULL);
313 
314 	for (i = 0; i < nitems(counters); i++)
315 		words[i] = (u_long)counters[i];
316 
317 	return (sysctl_rdstruct(oldp, oldlenp, newp,
318 	    &div6stat, sizeof(div6stat)));
319 }
320 
321 /*
322  * Sysctl for divert variables.
323  */
324 int
325 divert6_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
326     void *newp, size_t newlen)
327 {
328 	int error;
329 
330 	/* All sysctl names at this level are terminal. */
331 	if (namelen != 1)
332 		return (ENOTDIR);
333 
334 	switch (name[0]) {
335 	case DIVERT6CTL_STATS:
336 		return (divert6_sysctl_div6stat(oldp, oldlenp, newp));
337 	default:
338 		NET_LOCK();
339 		error = sysctl_bounded_arr(divert6ctl_vars,
340 		    nitems(divert6ctl_vars), name, namelen, oldp, oldlenp,
341 		    newp, newlen);
342 		NET_UNLOCK();
343 		return (error);
344 	}
345 	/* NOTREACHED */
346 }
347