xref: /openbsd/sys/netinet6/ip6_divert.c (revision 771fbea0)
1 /*      $OpenBSD: ip6_divert.c,v 1.63 2020/11/16 06:38:20 gnezdo Exp $ */
2 
3 /*
4  * Copyright (c) 2009 Michele Marchetto <michele@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/mbuf.h>
22 #include <sys/protosw.h>
23 #include <sys/socket.h>
24 #include <sys/socketvar.h>
25 #include <sys/sysctl.h>
26 
27 #include <net/if.h>
28 #include <net/route.h>
29 #include <net/if_var.h>
30 #include <net/netisr.h>
31 
32 #include <netinet/in.h>
33 #include <netinet/ip.h>
34 #include <netinet/ip_var.h>
35 #include <netinet/in_pcb.h>
36 #include <netinet/ip6.h>
37 #include <netinet6/in6_var.h>
38 #include <netinet6/ip6_divert.h>
39 #include <netinet/tcp.h>
40 #include <netinet/udp.h>
41 #include <netinet/icmp6.h>
42 
43 #include <net/pfvar.h>
44 
45 struct	inpcbtable	divb6table;
46 struct	cpumem		*div6counters;
47 
48 #ifndef DIVERT_SENDSPACE
49 #define DIVERT_SENDSPACE	(65536 + 100)
50 #endif
51 u_int   divert6_sendspace = DIVERT_SENDSPACE;
52 #ifndef DIVERT_RECVSPACE
53 #define DIVERT_RECVSPACE	(65536 + 100)
54 #endif
55 u_int   divert6_recvspace = DIVERT_RECVSPACE;
56 
57 #ifndef DIVERTHASHSIZE
58 #define DIVERTHASHSIZE	128
59 #endif
60 
61 const struct sysctl_bounded_args divert6ctl_vars[] = {
62 	{ DIVERT6CTL_RECVSPACE, &divert6_recvspace, 0, INT_MAX },
63 	{ DIVERT6CTL_SENDSPACE, &divert6_sendspace, 0, INT_MAX },
64 };
65 
66 int divb6hashsize = DIVERTHASHSIZE;
67 
68 int	divert6_output(struct inpcb *, struct mbuf *, struct mbuf *,
69 	    struct mbuf *);
70 
71 void
72 divert6_init(void)
73 {
74 	in_pcbinit(&divb6table, divb6hashsize);
75 	div6counters = counters_alloc(div6s_ncounters);
76 }
77 
78 int
79 divert6_output(struct inpcb *inp, struct mbuf *m, struct mbuf *nam,
80     struct mbuf *control)
81 {
82 	struct sockaddr_in6 *sin6;
83 	int error, min_hdrlen, nxt, off, dir;
84 	struct ip6_hdr *ip6;
85 
86 	m_freem(control);
87 
88 	if ((error = in6_nam2sin6(nam, &sin6)))
89 		goto fail;
90 
91 	/* Do basic sanity checks. */
92 	if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
93 		goto fail;
94 	if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
95 		/* m_pullup() has freed the mbuf, so just return. */
96 		div6stat_inc(div6s_errors);
97 		return (ENOBUFS);
98 	}
99 	ip6 = mtod(m, struct ip6_hdr *);
100 	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION)
101 		goto fail;
102 	if (m->m_pkthdr.len < sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen))
103 		goto fail;
104 
105 	/*
106 	 * Recalculate the protocol checksum since the userspace application
107 	 * may have modified the packet prior to reinjection.
108 	 */
109 	off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
110 	if (off < sizeof(struct ip6_hdr))
111 		goto fail;
112 
113 	dir = (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ? PF_OUT : PF_IN);
114 
115 	switch (nxt) {
116 	case IPPROTO_TCP:
117 		min_hdrlen = sizeof(struct tcphdr);
118 		m->m_pkthdr.csum_flags |= M_TCP_CSUM_OUT;
119 		break;
120 	case IPPROTO_UDP:
121 		min_hdrlen = sizeof(struct udphdr);
122 		m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT;
123 		break;
124 	case IPPROTO_ICMPV6:
125 		min_hdrlen = sizeof(struct icmp6_hdr);
126 		m->m_pkthdr.csum_flags |= M_ICMP_CSUM_OUT;
127 		break;
128 	default:
129 		min_hdrlen = 0;
130 		break;
131 	}
132 	if (min_hdrlen && m->m_pkthdr.len < off + min_hdrlen)
133 		goto fail;
134 
135 	m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED_PACKET;
136 
137 	if (dir == PF_IN) {
138 		struct rtentry *rt;
139 		struct ifnet *ifp;
140 
141 		rt = rtalloc(sin6tosa(sin6), 0, inp->inp_rtableid);
142 		if (!rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_LOCAL)) {
143 			rtfree(rt);
144 			error = EADDRNOTAVAIL;
145 			goto fail;
146 		}
147 		m->m_pkthdr.ph_ifidx = rt->rt_ifidx;
148 		rtfree(rt);
149 
150 		/*
151 		 * Recalculate the protocol checksum for the inbound packet
152 		 * since the userspace application may have modified the packet
153 		 * prior to reinjection.
154 		 */
155 		in6_proto_cksum_out(m, NULL);
156 
157 		ifp = if_get(m->m_pkthdr.ph_ifidx);
158 		if (ifp == NULL) {
159 			error = ENETDOWN;
160 			goto fail;
161 		}
162 		ipv6_input(ifp, m);
163 		if_put(ifp);
164 	} else {
165 		m->m_pkthdr.ph_rtableid = inp->inp_rtableid;
166 
167 		error = ip6_output(m, NULL, &inp->inp_route6,
168 		    IP_ALLOWBROADCAST | IP_RAWOUTPUT, NULL, NULL);
169 	}
170 
171 	div6stat_inc(div6s_opackets);
172 	return (error);
173 
174 fail:
175 	div6stat_inc(div6s_errors);
176 	m_freem(m);
177 	return (error ? error : EINVAL);
178 }
179 
180 int
181 divert6_packet(struct mbuf *m, int dir, u_int16_t divert_port)
182 {
183 	struct inpcb *inp;
184 	struct socket *sa = NULL;
185 	struct sockaddr_in6 addr;
186 
187 	inp = NULL;
188 	div6stat_inc(div6s_ipackets);
189 
190 	if (m->m_len < sizeof(struct ip6_hdr) &&
191 	    (m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
192 		div6stat_inc(div6s_errors);
193 		return (0);
194 	}
195 
196 	TAILQ_FOREACH(inp, &divb6table.inpt_queue, inp_queue) {
197 		if (inp->inp_lport == divert_port)
198 			break;
199 	}
200 
201 	memset(&addr, 0, sizeof(addr));
202 	addr.sin6_family = AF_INET6;
203 	addr.sin6_len = sizeof(addr);
204 
205 	if (dir == PF_IN) {
206 		struct ifaddr *ifa;
207 		struct ifnet *ifp;
208 
209 		ifp = if_get(m->m_pkthdr.ph_ifidx);
210 		if (ifp == NULL) {
211 			m_freem(m);
212 			return (0);
213 		}
214 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
215 			if (ifa->ifa_addr->sa_family != AF_INET6)
216 				continue;
217 			addr.sin6_addr = satosin6(ifa->ifa_addr)->sin6_addr;
218 			break;
219 		}
220 		if_put(ifp);
221 	}
222 
223 	if (inp) {
224 		sa = inp->inp_socket;
225 		if (sbappendaddr(sa, &sa->so_rcv, sin6tosa(&addr), m, NULL) == 0) {
226 			div6stat_inc(div6s_fullsock);
227 			m_freem(m);
228 			return (0);
229 		} else {
230 			KERNEL_LOCK();
231 			sorwakeup(inp->inp_socket);
232 			KERNEL_UNLOCK();
233 		}
234 	}
235 
236 	if (sa == NULL) {
237 		div6stat_inc(div6s_noport);
238 		m_freem(m);
239 	}
240 	return (0);
241 }
242 
243 /*ARGSUSED*/
244 int
245 divert6_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *addr,
246     struct mbuf *control, struct proc *p)
247 {
248 	struct inpcb *inp = sotoinpcb(so);
249 	int error = 0;
250 
251 	if (req == PRU_CONTROL) {
252 		return (in6_control(so, (u_long)m, (caddr_t)addr,
253 		    (struct ifnet *)control));
254 	}
255 
256 	soassertlocked(so);
257 
258 	if (inp == NULL) {
259 		error = EINVAL;
260 		goto release;
261 	}
262 	switch (req) {
263 
264 	case PRU_BIND:
265 		error = in_pcbbind(inp, addr, p);
266 		break;
267 
268 	case PRU_SHUTDOWN:
269 		socantsendmore(so);
270 		break;
271 
272 	case PRU_SEND:
273 		return (divert6_output(inp, m, addr, control));
274 
275 	case PRU_ABORT:
276 		soisdisconnected(so);
277 		in_pcbdetach(inp);
278 		break;
279 
280 	case PRU_SOCKADDR:
281 		in6_setsockaddr(inp, addr);
282 		break;
283 
284 	case PRU_PEERADDR:
285 		in6_setpeeraddr(inp, addr);
286 		break;
287 
288 	case PRU_SENSE:
289 		break;
290 
291 	case PRU_LISTEN:
292 	case PRU_CONNECT:
293 	case PRU_CONNECT2:
294 	case PRU_ACCEPT:
295 	case PRU_DISCONNECT:
296 	case PRU_SENDOOB:
297 	case PRU_FASTTIMO:
298 	case PRU_SLOWTIMO:
299 	case PRU_PROTORCV:
300 	case PRU_PROTOSEND:
301 	case PRU_RCVD:
302 	case PRU_RCVOOB:
303 		error =  EOPNOTSUPP;
304 		break;
305 
306 	default:
307 		panic("%s", __func__);
308 	}
309 
310 release:
311 	if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) {
312 		m_freem(control);
313 		m_freem(m);
314 	}
315 	return (error);
316 }
317 
318 int
319 divert6_attach(struct socket *so, int proto)
320 {
321 	int error;
322 
323 	if (so->so_pcb != NULL)
324 		return EINVAL;
325 
326 	if ((so->so_state & SS_PRIV) == 0)
327 		return EACCES;
328 
329 	error = in_pcballoc(so, &divb6table);
330 	if (error)
331 		return (error);
332 
333 	error = soreserve(so, divert6_sendspace, divert6_recvspace);
334 	if (error)
335 		return (error);
336 	sotoinpcb(so)->inp_flags |= INP_HDRINCL;
337 	return (0);
338 }
339 
340 int
341 divert6_detach(struct socket *so)
342 {
343 	struct inpcb *inp = sotoinpcb(so);
344 
345 	soassertlocked(so);
346 
347 	if (inp == NULL)
348 		return (EINVAL);
349 
350 	in_pcbdetach(inp);
351 
352 	return (0);
353 }
354 
355 int
356 divert6_sysctl_div6stat(void *oldp, size_t *oldlenp, void *newp)
357 {
358 	uint64_t counters[div6s_ncounters];
359 	struct div6stat div6stat;
360 	u_long *words = (u_long *)&div6stat;
361 	int i;
362 
363 	CTASSERT(sizeof(div6stat) == (nitems(counters) * sizeof(u_long)));
364 
365 	counters_read(div6counters, counters, nitems(counters));
366 
367 	for (i = 0; i < nitems(counters); i++)
368 		words[i] = (u_long)counters[i];
369 
370 	return (sysctl_rdstruct(oldp, oldlenp, newp,
371 	    &div6stat, sizeof(div6stat)));
372 }
373 
374 /*
375  * Sysctl for divert variables.
376  */
377 int
378 divert6_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
379     void *newp, size_t newlen)
380 {
381 	int error;
382 
383 	/* All sysctl names at this level are terminal. */
384 	if (namelen != 1)
385 		return (ENOTDIR);
386 
387 	switch (name[0]) {
388 	case DIVERT6CTL_STATS:
389 		return (divert6_sysctl_div6stat(oldp, oldlenp, newp));
390 	default:
391 		NET_LOCK();
392 		error = sysctl_bounded_arr(divert6ctl_vars,
393 		    nitems(divert6ctl_vars), name, namelen, oldp, oldlenp,
394 		    newp, newlen);
395 		NET_UNLOCK();
396 		return (error);
397 	}
398 	/* NOTREACHED */
399 }
400