xref: /openbsd/sys/netinet6/ip6_divert.c (revision 3bef86f7)
1 /*      $OpenBSD: ip6_divert.c,v 1.91 2024/01/01 18:52:09 bluhm Exp $ */
2 
3 /*
4  * Copyright (c) 2009 Michele Marchetto <michele@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/mbuf.h>
22 #include <sys/protosw.h>
23 #include <sys/socket.h>
24 #include <sys/socketvar.h>
25 #include <sys/sysctl.h>
26 
27 #include <net/if.h>
28 #include <net/route.h>
29 #include <net/if_var.h>
30 #include <net/netisr.h>
31 
32 #include <netinet/in.h>
33 #include <netinet/ip.h>
34 #include <netinet/ip_var.h>
35 #include <netinet/in_pcb.h>
36 #include <netinet/ip_divert.h>
37 #include <netinet/ip6.h>
38 #include <netinet6/in6_var.h>
39 #include <netinet6/ip6_divert.h>
40 #include <netinet/tcp.h>
41 #include <netinet/udp.h>
42 #include <netinet/icmp6.h>
43 
44 #include <net/pfvar.h>
45 
46 struct	inpcbtable	divb6table;
47 struct	cpumem		*div6counters;
48 
49 #ifndef DIVERT_SENDSPACE
50 #define DIVERT_SENDSPACE	(65536 + 100)
51 #endif
52 u_int   divert6_sendspace = DIVERT_SENDSPACE;
53 #ifndef DIVERT_RECVSPACE
54 #define DIVERT_RECVSPACE	(65536 + 100)
55 #endif
56 u_int   divert6_recvspace = DIVERT_RECVSPACE;
57 
58 #ifndef DIVERTHASHSIZE
59 #define DIVERTHASHSIZE	128
60 #endif
61 
62 const struct sysctl_bounded_args divert6ctl_vars[] = {
63 	{ DIVERT6CTL_RECVSPACE, &divert6_recvspace, 0, INT_MAX },
64 	{ DIVERT6CTL_SENDSPACE, &divert6_sendspace, 0, INT_MAX },
65 };
66 
67 const struct pr_usrreqs divert6_usrreqs = {
68 	.pru_attach	= divert6_attach,
69 	.pru_detach	= divert_detach,
70 	.pru_lock	= divert_lock,
71 	.pru_unlock	= divert_unlock,
72 	.pru_bind	= divert_bind,
73 	.pru_shutdown	= divert_shutdown,
74 	.pru_send	= divert6_send,
75 	.pru_control	= in6_control,
76 	.pru_sockaddr	= in6_sockaddr,
77 	.pru_peeraddr	= in6_peeraddr,
78 };
79 
80 int divb6hashsize = DIVERTHASHSIZE;
81 
82 int	divert6_output(struct inpcb *, struct mbuf *, struct mbuf *,
83 	    struct mbuf *);
84 
85 void
86 divert6_init(void)
87 {
88 	in_pcbinit(&divb6table, divb6hashsize);
89 	div6counters = counters_alloc(div6s_ncounters);
90 }
91 
92 int
93 divert6_output(struct inpcb *inp, struct mbuf *m, struct mbuf *nam,
94     struct mbuf *control)
95 {
96 	struct sockaddr_in6 *sin6;
97 	int error, min_hdrlen, nxt, off, dir;
98 	struct ip6_hdr *ip6;
99 
100 	m_freem(control);
101 
102 	if ((error = in6_nam2sin6(nam, &sin6)))
103 		goto fail;
104 
105 	/* Do basic sanity checks. */
106 	if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
107 		goto fail;
108 	if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
109 		/* m_pullup() has freed the mbuf, so just return. */
110 		div6stat_inc(div6s_errors);
111 		return (ENOBUFS);
112 	}
113 	ip6 = mtod(m, struct ip6_hdr *);
114 	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION)
115 		goto fail;
116 	if (m->m_pkthdr.len < sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen))
117 		goto fail;
118 
119 	/*
120 	 * Recalculate the protocol checksum since the userspace application
121 	 * may have modified the packet prior to reinjection.
122 	 */
123 	off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
124 	if (off < sizeof(struct ip6_hdr))
125 		goto fail;
126 
127 	dir = (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ? PF_OUT : PF_IN);
128 
129 	switch (nxt) {
130 	case IPPROTO_TCP:
131 		min_hdrlen = sizeof(struct tcphdr);
132 		m->m_pkthdr.csum_flags |= M_TCP_CSUM_OUT;
133 		break;
134 	case IPPROTO_UDP:
135 		min_hdrlen = sizeof(struct udphdr);
136 		m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT;
137 		break;
138 	case IPPROTO_ICMPV6:
139 		min_hdrlen = sizeof(struct icmp6_hdr);
140 		m->m_pkthdr.csum_flags |= M_ICMP_CSUM_OUT;
141 		break;
142 	default:
143 		min_hdrlen = 0;
144 		break;
145 	}
146 	if (min_hdrlen && m->m_pkthdr.len < off + min_hdrlen)
147 		goto fail;
148 
149 	m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED_PACKET;
150 
151 	if (dir == PF_IN) {
152 		struct rtentry *rt;
153 		struct ifnet *ifp;
154 
155 		rt = rtalloc(sin6tosa(sin6), 0, inp->inp_rtableid);
156 		if (!rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_LOCAL)) {
157 			rtfree(rt);
158 			error = EADDRNOTAVAIL;
159 			goto fail;
160 		}
161 		m->m_pkthdr.ph_ifidx = rt->rt_ifidx;
162 		rtfree(rt);
163 
164 		/*
165 		 * Recalculate the protocol checksum for the inbound packet
166 		 * since the userspace application may have modified the packet
167 		 * prior to reinjection.
168 		 */
169 		in6_proto_cksum_out(m, NULL);
170 
171 		ifp = if_get(m->m_pkthdr.ph_ifidx);
172 		if (ifp == NULL) {
173 			error = ENETDOWN;
174 			goto fail;
175 		}
176 		ipv6_input(ifp, m);
177 		if_put(ifp);
178 	} else {
179 		m->m_pkthdr.ph_rtableid = inp->inp_rtableid;
180 
181 		error = ip6_output(m, NULL, &inp->inp_route6,
182 		    IP_ALLOWBROADCAST | IP_RAWOUTPUT, NULL, NULL);
183 	}
184 
185 	div6stat_inc(div6s_opackets);
186 	return (error);
187 
188 fail:
189 	div6stat_inc(div6s_errors);
190 	m_freem(m);
191 	return (error ? error : EINVAL);
192 }
193 
194 void
195 divert6_packet(struct mbuf *m, int dir, u_int16_t divert_port)
196 {
197 	struct inpcb *inp = NULL;
198 	struct socket *so;
199 	struct sockaddr_in6 sin6;
200 
201 	div6stat_inc(div6s_ipackets);
202 
203 	if (m->m_len < sizeof(struct ip6_hdr) &&
204 	    (m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
205 		div6stat_inc(div6s_errors);
206 		goto bad;
207 	}
208 
209 	mtx_enter(&divb6table.inpt_mtx);
210 	TAILQ_FOREACH(inp, &divb6table.inpt_queue, inp_queue) {
211 		if (inp->inp_lport != divert_port)
212 			continue;
213 		in_pcbref(inp);
214 		break;
215 	}
216 	mtx_leave(&divb6table.inpt_mtx);
217 	if (inp == NULL) {
218 		div6stat_inc(div6s_noport);
219 		goto bad;
220 	}
221 
222 	memset(&sin6, 0, sizeof(sin6));
223 	sin6.sin6_family = AF_INET6;
224 	sin6.sin6_len = sizeof(sin6);
225 
226 	if (dir == PF_IN) {
227 		struct ifaddr *ifa;
228 		struct ifnet *ifp;
229 
230 		ifp = if_get(m->m_pkthdr.ph_ifidx);
231 		if (ifp == NULL) {
232 			div6stat_inc(div6s_errors);
233 			goto bad;
234 		}
235 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
236 			if (ifa->ifa_addr->sa_family != AF_INET6)
237 				continue;
238 			sin6.sin6_addr = satosin6(ifa->ifa_addr)->sin6_addr;
239 			break;
240 		}
241 		if_put(ifp);
242 	} else {
243 		/*
244 		 * Calculate protocol checksum for outbound packet diverted
245 		 * to userland.  pf out rule diverts before cksum offload.
246 		 */
247 		in6_proto_cksum_out(m, NULL);
248 	}
249 
250 	mtx_enter(&inp->inp_mtx);
251 	so = inp->inp_socket;
252 	if (sbappendaddr(so, &so->so_rcv, sin6tosa(&sin6), m, NULL) == 0) {
253 		mtx_leave(&inp->inp_mtx);
254 		div6stat_inc(div6s_fullsock);
255 		goto bad;
256 	}
257 	mtx_leave(&inp->inp_mtx);
258 	sorwakeup(so);
259 
260 	in_pcbunref(inp);
261 	return;
262 
263  bad:
264 	if (inp != NULL)
265 		in_pcbunref(inp);
266 	m_freem(m);
267 }
268 
269 int
270 divert6_attach(struct socket *so, int proto, int wait)
271 {
272 	int error;
273 
274 	if (so->so_pcb != NULL)
275 		return EINVAL;
276 	if ((so->so_state & SS_PRIV) == 0)
277 		return EACCES;
278 
279 	error = in_pcballoc(so, &divb6table, wait);
280 	if (error)
281 		return (error);
282 
283 	error = soreserve(so, divert6_sendspace, divert6_recvspace);
284 	if (error)
285 		return (error);
286 
287 	return (0);
288 }
289 
290 int
291 divert6_send(struct socket *so, struct mbuf *m, struct mbuf *addr,
292     struct mbuf *control)
293 {
294 	struct inpcb *inp = sotoinpcb(so);
295 
296 	soassertlocked(so);
297 	return (divert6_output(inp, m, addr, control));
298 }
299 
300 int
301 divert6_sysctl_div6stat(void *oldp, size_t *oldlenp, void *newp)
302 {
303 	uint64_t counters[div6s_ncounters];
304 	struct div6stat div6stat;
305 	u_long *words = (u_long *)&div6stat;
306 	int i;
307 
308 	CTASSERT(sizeof(div6stat) == (nitems(counters) * sizeof(u_long)));
309 
310 	counters_read(div6counters, counters, nitems(counters), NULL);
311 
312 	for (i = 0; i < nitems(counters); i++)
313 		words[i] = (u_long)counters[i];
314 
315 	return (sysctl_rdstruct(oldp, oldlenp, newp,
316 	    &div6stat, sizeof(div6stat)));
317 }
318 
319 /*
320  * Sysctl for divert variables.
321  */
322 int
323 divert6_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
324     void *newp, size_t newlen)
325 {
326 	int error;
327 
328 	/* All sysctl names at this level are terminal. */
329 	if (namelen != 1)
330 		return (ENOTDIR);
331 
332 	switch (name[0]) {
333 	case DIVERT6CTL_STATS:
334 		return (divert6_sysctl_div6stat(oldp, oldlenp, newp));
335 	default:
336 		NET_LOCK();
337 		error = sysctl_bounded_arr(divert6ctl_vars,
338 		    nitems(divert6ctl_vars), name, namelen, oldp, oldlenp,
339 		    newp, newlen);
340 		NET_UNLOCK();
341 		return (error);
342 	}
343 	/* NOTREACHED */
344 }
345