xref: /openbsd/sys/netinet/ip_divert.c (revision 76d0caae)
1 /*      $OpenBSD: ip_divert.c,v 1.64 2020/11/16 06:38:20 gnezdo Exp $ */
2 
3 /*
4  * Copyright (c) 2009 Michele Marchetto <michele@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/mbuf.h>
22 #include <sys/protosw.h>
23 #include <sys/socket.h>
24 #include <sys/socketvar.h>
25 #include <sys/sysctl.h>
26 
27 #include <net/if.h>
28 #include <net/route.h>
29 #include <net/if_var.h>
30 #include <net/netisr.h>
31 
32 #include <netinet/in.h>
33 #include <netinet/in_var.h>
34 #include <netinet/ip.h>
35 #include <netinet/ip_var.h>
36 #include <netinet/in_pcb.h>
37 #include <netinet/ip_divert.h>
38 #include <netinet/tcp.h>
39 #include <netinet/udp.h>
40 #include <netinet/ip_icmp.h>
41 
42 #include <net/pfvar.h>
43 
44 struct	inpcbtable	divbtable;
45 struct	cpumem		*divcounters;
46 
47 #ifndef DIVERT_SENDSPACE
48 #define DIVERT_SENDSPACE	(65536 + 100)
49 #endif
50 u_int   divert_sendspace = DIVERT_SENDSPACE;
51 #ifndef DIVERT_RECVSPACE
52 #define DIVERT_RECVSPACE	(65536 + 100)
53 #endif
54 u_int   divert_recvspace = DIVERT_RECVSPACE;
55 
56 #ifndef DIVERTHASHSIZE
57 #define DIVERTHASHSIZE	128
58 #endif
59 
60 const struct sysctl_bounded_args divertctl_vars[] = {
61 	{ DIVERTCTL_RECVSPACE, &divert_recvspace, 0, INT_MAX },
62 	{ DIVERTCTL_SENDSPACE, &divert_sendspace, 0, INT_MAX },
63 };
64 
65 int divbhashsize = DIVERTHASHSIZE;
66 
67 int	divert_output(struct inpcb *, struct mbuf *, struct mbuf *,
68 	    struct mbuf *);
69 void
70 divert_init(void)
71 {
72 	in_pcbinit(&divbtable, divbhashsize);
73 	divcounters = counters_alloc(divs_ncounters);
74 }
75 
76 int
77 divert_output(struct inpcb *inp, struct mbuf *m, struct mbuf *nam,
78     struct mbuf *control)
79 {
80 	struct sockaddr_in *sin;
81 	int error, min_hdrlen, off, dir;
82 	struct ip *ip;
83 
84 	m_freem(control);
85 
86 	if ((error = in_nam2sin(nam, &sin)))
87 		goto fail;
88 
89 	/* Do basic sanity checks. */
90 	if (m->m_pkthdr.len < sizeof(struct ip))
91 		goto fail;
92 	if ((m = m_pullup(m, sizeof(struct ip))) == NULL) {
93 		/* m_pullup() has freed the mbuf, so just return. */
94 		divstat_inc(divs_errors);
95 		return (ENOBUFS);
96 	}
97 	ip = mtod(m, struct ip *);
98 	if (ip->ip_v != IPVERSION)
99 		goto fail;
100 	off = ip->ip_hl << 2;
101 	if (off < sizeof(struct ip) || ntohs(ip->ip_len) < off ||
102 	    m->m_pkthdr.len < ntohs(ip->ip_len))
103 		goto fail;
104 
105 	dir = (sin->sin_addr.s_addr == INADDR_ANY ? PF_OUT : PF_IN);
106 
107 	switch (ip->ip_p) {
108 	case IPPROTO_TCP:
109 		min_hdrlen = sizeof(struct tcphdr);
110 		m->m_pkthdr.csum_flags |= M_TCP_CSUM_OUT;
111 		break;
112 	case IPPROTO_UDP:
113 		min_hdrlen = sizeof(struct udphdr);
114 		m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT;
115 		break;
116 	case IPPROTO_ICMP:
117 		min_hdrlen = ICMP_MINLEN;
118 		m->m_pkthdr.csum_flags |= M_ICMP_CSUM_OUT;
119 		break;
120 	default:
121 		min_hdrlen = 0;
122 		break;
123 	}
124 	if (min_hdrlen && m->m_pkthdr.len < off + min_hdrlen)
125 		goto fail;
126 
127 	m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED_PACKET;
128 
129 	if (dir == PF_IN) {
130 		struct rtentry *rt;
131 		struct ifnet *ifp;
132 
133 		rt = rtalloc(sintosa(sin), 0, inp->inp_rtableid);
134 		if (!rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_LOCAL)) {
135 			rtfree(rt);
136 			error = EADDRNOTAVAIL;
137 			goto fail;
138 		}
139 		m->m_pkthdr.ph_ifidx = rt->rt_ifidx;
140 		rtfree(rt);
141 
142 		/*
143 		 * Recalculate IP and protocol checksums for the inbound packet
144 		 * since the userspace application may have modified the packet
145 		 * prior to reinjection.
146 		 */
147 		ip->ip_sum = 0;
148 		ip->ip_sum = in_cksum(m, off);
149 		in_proto_cksum_out(m, NULL);
150 
151 		ifp = if_get(m->m_pkthdr.ph_ifidx);
152 		if (ifp == NULL) {
153 			error = ENETDOWN;
154 			goto fail;
155 		}
156 		ipv4_input(ifp, m);
157 		if_put(ifp);
158 	} else {
159 		m->m_pkthdr.ph_rtableid = inp->inp_rtableid;
160 
161 		error = ip_output(m, NULL, &inp->inp_route,
162 		    IP_ALLOWBROADCAST | IP_RAWOUTPUT, NULL, NULL, 0);
163 	}
164 
165 	divstat_inc(divs_opackets);
166 	return (error);
167 
168 fail:
169 	m_freem(m);
170 	divstat_inc(divs_errors);
171 	return (error ? error : EINVAL);
172 }
173 
174 int
175 divert_packet(struct mbuf *m, int dir, u_int16_t divert_port)
176 {
177 	struct inpcb *inp;
178 	struct socket *sa = NULL;
179 	struct sockaddr_in addr;
180 
181 	inp = NULL;
182 	divstat_inc(divs_ipackets);
183 
184 	if (m->m_len < sizeof(struct ip) &&
185 	    (m = m_pullup(m, sizeof(struct ip))) == NULL) {
186 		divstat_inc(divs_errors);
187 		return (0);
188 	}
189 
190 	TAILQ_FOREACH(inp, &divbtable.inpt_queue, inp_queue) {
191 		if (inp->inp_lport == divert_port)
192 			break;
193 	}
194 
195 	memset(&addr, 0, sizeof(addr));
196 	addr.sin_family = AF_INET;
197 	addr.sin_len = sizeof(addr);
198 
199 	if (dir == PF_IN) {
200 		struct ifaddr *ifa;
201 		struct ifnet *ifp;
202 
203 		ifp = if_get(m->m_pkthdr.ph_ifidx);
204 		if (ifp == NULL) {
205 			m_freem(m);
206 			return (0);
207 		}
208 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
209 			if (ifa->ifa_addr->sa_family != AF_INET)
210 				continue;
211 			addr.sin_addr.s_addr = satosin(
212 			    ifa->ifa_addr)->sin_addr.s_addr;
213 			break;
214 		}
215 		if_put(ifp);
216 	}
217 
218 	if (inp) {
219 		sa = inp->inp_socket;
220 		if (sbappendaddr(sa, &sa->so_rcv, sintosa(&addr), m, NULL) == 0) {
221 			divstat_inc(divs_fullsock);
222 			m_freem(m);
223 			return (0);
224 		} else {
225 			KERNEL_LOCK();
226 			sorwakeup(inp->inp_socket);
227 			KERNEL_UNLOCK();
228 		}
229 	}
230 
231 	if (sa == NULL) {
232 		divstat_inc(divs_noport);
233 		m_freem(m);
234 	}
235 	return (0);
236 }
237 
238 /*ARGSUSED*/
239 int
240 divert_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *addr,
241     struct mbuf *control, struct proc *p)
242 {
243 	struct inpcb *inp = sotoinpcb(so);
244 	int error = 0;
245 
246 	if (req == PRU_CONTROL) {
247 		return (in_control(so, (u_long)m, (caddr_t)addr,
248 		    (struct ifnet *)control));
249 	}
250 
251 	soassertlocked(so);
252 
253 	if (inp == NULL) {
254 		error = EINVAL;
255 		goto release;
256 	}
257 	switch (req) {
258 
259 	case PRU_BIND:
260 		error = in_pcbbind(inp, addr, p);
261 		break;
262 
263 	case PRU_SHUTDOWN:
264 		socantsendmore(so);
265 		break;
266 
267 	case PRU_SEND:
268 		return (divert_output(inp, m, addr, control));
269 
270 	case PRU_ABORT:
271 		soisdisconnected(so);
272 		in_pcbdetach(inp);
273 		break;
274 
275 	case PRU_SOCKADDR:
276 		in_setsockaddr(inp, addr);
277 		break;
278 
279 	case PRU_PEERADDR:
280 		in_setpeeraddr(inp, addr);
281 		break;
282 
283 	case PRU_SENSE:
284 		break;
285 
286 	case PRU_LISTEN:
287 	case PRU_CONNECT:
288 	case PRU_CONNECT2:
289 	case PRU_ACCEPT:
290 	case PRU_DISCONNECT:
291 	case PRU_SENDOOB:
292 	case PRU_FASTTIMO:
293 	case PRU_SLOWTIMO:
294 	case PRU_PROTORCV:
295 	case PRU_PROTOSEND:
296 	case PRU_RCVD:
297 	case PRU_RCVOOB:
298 		error =  EOPNOTSUPP;
299 		break;
300 
301 	default:
302 		panic("divert_usrreq");
303 	}
304 
305 release:
306 	if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) {
307 		m_freem(control);
308 		m_freem(m);
309 	}
310 	return (error);
311 }
312 
313 int
314 divert_attach(struct socket *so, int proto)
315 {
316 	int error;
317 
318 	if (so->so_pcb != NULL)
319 		return EINVAL;
320 	if ((so->so_state & SS_PRIV) == 0)
321 		return EACCES;
322 
323 	error = in_pcballoc(so, &divbtable);
324 	if (error)
325 		return error;
326 
327 	error = soreserve(so, divert_sendspace, divert_recvspace);
328 	if (error)
329 		return error;
330 
331 	sotoinpcb(so)->inp_flags |= INP_HDRINCL;
332 	return (0);
333 }
334 
335 int
336 divert_detach(struct socket *so)
337 {
338 	struct inpcb *inp = sotoinpcb(so);
339 
340 	soassertlocked(so);
341 
342 	if (inp == NULL)
343 		return (EINVAL);
344 
345 	in_pcbdetach(inp);
346 	return (0);
347 }
348 
349 int
350 divert_sysctl_divstat(void *oldp, size_t *oldlenp, void *newp)
351 {
352 	uint64_t counters[divs_ncounters];
353 	struct divstat divstat;
354 	u_long *words = (u_long *)&divstat;
355 	int i;
356 
357 	CTASSERT(sizeof(divstat) == (nitems(counters) * sizeof(u_long)));
358 	memset(&divstat, 0, sizeof divstat);
359 	counters_read(divcounters, counters, nitems(counters));
360 
361 	for (i = 0; i < nitems(counters); i++)
362 		words[i] = (u_long)counters[i];
363 
364 	return (sysctl_rdstruct(oldp, oldlenp, newp,
365 	    &divstat, sizeof(divstat)));
366 }
367 
368 /*
369  * Sysctl for divert variables.
370  */
371 int
372 divert_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
373     size_t newlen)
374 {
375 	int error;
376 
377 	/* All sysctl names at this level are terminal. */
378 	if (namelen != 1)
379 		return (ENOTDIR);
380 
381 	switch (name[0]) {
382 	case DIVERTCTL_STATS:
383 		return (divert_sysctl_divstat(oldp, oldlenp, newp));
384 	default:
385 		NET_LOCK();
386 		error = sysctl_bounded_arr(divertctl_vars,
387 		    nitems(divertctl_vars), name, namelen, oldp, oldlenp, newp,
388 		    newlen);
389 		NET_UNLOCK();
390 		return (error);
391 	}
392 	/* NOTREACHED */
393 }
394