xref: /openbsd/sys/netinet/ip_divert.c (revision 91f110e0)
1 /*      $OpenBSD: ip_divert.c,v 1.16 2014/01/09 06:29:06 tedu Exp $ */
2 
3 /*
4  * Copyright (c) 2009 Michele Marchetto <michele@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/mbuf.h>
22 #include <sys/protosw.h>
23 #include <sys/socket.h>
24 #include <sys/socketvar.h>
25 #include <sys/sysctl.h>
26 
27 #include <net/if.h>
28 #include <net/route.h>
29 #include <net/netisr.h>
30 #include <net/pfvar.h>
31 
32 #include <netinet/in.h>
33 #include <netinet/in_systm.h>
34 #include <netinet/in_var.h>
35 #include <netinet/ip.h>
36 #include <netinet/ip_var.h>
37 #include <netinet/in_pcb.h>
38 #include <netinet/ip_divert.h>
39 #include <netinet/tcp.h>
40 #include <netinet/udp.h>
41 #include <netinet/ip_icmp.h>
42 
43 struct	inpcbtable	divbtable;
44 struct	divstat		divstat;
45 
46 #ifndef DIVERT_SENDSPACE
47 #define DIVERT_SENDSPACE	(65536 + 100)
48 #endif
49 u_int   divert_sendspace = DIVERT_SENDSPACE;
50 #ifndef DIVERT_RECVSPACE
51 #define DIVERT_RECVSPACE	(65536 + 100)
52 #endif
53 u_int   divert_recvspace = DIVERT_RECVSPACE;
54 
55 #ifndef DIVERTHASHSIZE
56 #define DIVERTHASHSIZE	128
57 #endif
58 
59 int *divertctl_vars[DIVERTCTL_MAXID] = DIVERTCTL_VARS;
60 
61 int divbhashsize = DIVERTHASHSIZE;
62 
63 static struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
64 
65 void divert_detach(struct inpcb *);
66 
67 void
68 divert_init()
69 {
70 	in_pcbinit(&divbtable, divbhashsize);
71 }
72 
73 void
74 divert_input(struct mbuf *m, ...)
75 {
76 	m_freem(m);
77 }
78 
79 int
80 divert_output(struct mbuf *m, ...)
81 {
82 	struct inpcb *inp;
83 	struct ifqueue *inq;
84 	struct mbuf *nam, *control;
85 	struct sockaddr_in *sin;
86 	struct socket *so;
87 	struct ifaddr *ifa;
88 	int s, error = 0, p_hdrlen = 0;
89 	va_list ap;
90 	struct ip *ip;
91 	u_int16_t off, csum = 0;
92 	u_int8_t nxt;
93 	size_t p_off = 0;
94 
95 	va_start(ap, m);
96 	inp = va_arg(ap, struct inpcb *);
97 	nam = va_arg(ap, struct mbuf *);
98 	control = va_arg(ap, struct mbuf *);
99 	va_end(ap);
100 
101 	m->m_pkthdr.rcvif = NULL;
102 	m->m_nextpkt = NULL;
103 	m->m_pkthdr.rdomain = inp->inp_rtableid;
104 
105 	if (control)
106 		m_freem(control);
107 
108 	sin = mtod(nam, struct sockaddr_in *);
109 	so = inp->inp_socket;
110 
111 	/* Do basic sanity checks. */
112 	if (m->m_pkthdr.len < sizeof(struct ip))
113 		goto fail;
114 	if ((m = m_pullup(m, sizeof(struct ip))) == NULL) {
115 		/* m_pullup() has freed the mbuf, so just return. */
116 		divstat.divs_errors++;
117 		return (ENOBUFS);
118 	}
119 	ip = mtod(m, struct ip *);
120 	if (ip->ip_v != IPVERSION)
121 		goto fail;
122 	off = ip->ip_hl << 2;
123 	if (off < sizeof(struct ip) || ntohs(ip->ip_len) < off ||
124 	    m->m_pkthdr.len < ntohs(ip->ip_len))
125 		goto fail;
126 
127 	/*
128 	 * Recalculate IP and protocol checksums since the userspace application
129 	 * may have modified the packet prior to reinjection.
130 	 */
131 	ip->ip_sum = 0;
132 	ip->ip_sum = in_cksum(m, off);
133 	nxt = ip->ip_p;
134 	switch (ip->ip_p) {
135 	case IPPROTO_TCP:
136 		p_hdrlen = sizeof(struct tcphdr);
137 		p_off = offsetof(struct tcphdr, th_sum);
138 		break;
139 	case IPPROTO_UDP:
140 		p_hdrlen = sizeof(struct udphdr);
141 		p_off = offsetof(struct udphdr, uh_sum);
142 		break;
143 	case IPPROTO_ICMP:
144 		p_hdrlen = sizeof(struct icmp);
145 		p_off = offsetof(struct icmp, icmp_cksum);
146 		nxt = 0;
147 		break;
148 	default:
149 		/* nothing */
150 		break;
151 	}
152 	if (p_hdrlen) {
153 		if (m->m_pkthdr.len < off + p_hdrlen)
154 			goto fail;
155 
156 		if ((error = m_copyback(m, off + p_off, sizeof(csum), &csum, M_NOWAIT)))
157 			goto fail;
158 		csum = in4_cksum(m, nxt, off, m->m_pkthdr.len - off);
159 		if (ip->ip_p == IPPROTO_UDP && csum == 0)
160 			csum = 0xffff;
161 		if ((error = m_copyback(m, off + p_off, sizeof(csum), &csum, M_NOWAIT)))
162 			goto fail;
163 	}
164 
165 	m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED_PACKET;
166 
167 	if (sin->sin_addr.s_addr != INADDR_ANY) {
168 		ipaddr.sin_addr = sin->sin_addr;
169 		ifa = ifa_ifwithaddr(sintosa(&ipaddr), m->m_pkthdr.rdomain);
170 		if (ifa == NULL) {
171 			error = EADDRNOTAVAIL;
172 			goto fail;
173 		}
174 		m->m_pkthdr.rcvif = ifa->ifa_ifp;
175 
176 		inq = &ipintrq;
177 
178 		s = splnet();
179 		IF_INPUT_ENQUEUE(inq, m);
180 		schednetisr(NETISR_IP);
181 		splx(s);
182 	} else {
183 		error = ip_output(m, (void *)NULL, &inp->inp_route,
184 		    ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0)
185 		    | IP_ALLOWBROADCAST | IP_RAWOUTPUT, (void *)NULL,
186 		    (void *)NULL);
187 		if (error == EACCES)	/* translate pf(4) error for userland */
188 			error = EHOSTUNREACH;
189 	}
190 
191 	divstat.divs_opackets++;
192 	return (error);
193 
194 fail:
195 	m_freem(m);
196 	divstat.divs_errors++;
197 	return (error ? error : EINVAL);
198 }
199 
200 int
201 divert_packet(struct mbuf *m, int dir)
202 {
203 	struct inpcb *inp;
204 	struct socket *sa = NULL;
205 	struct sockaddr_in addr;
206 	struct pf_divert *divert;
207 
208 	inp = NULL;
209 	divstat.divs_ipackets++;
210 
211 	if (m->m_len < sizeof(struct ip) &&
212 	    (m = m_pullup(m, sizeof(struct ip))) == NULL) {
213 		divstat.divs_errors++;
214 		return (0);
215 	}
216 
217 	divert = pf_find_divert(m);
218 	if (divert == NULL) {
219 		divstat.divs_errors++;
220 		m_freem(m);
221 		return (0);
222 	}
223 
224 	TAILQ_FOREACH(inp, &divbtable.inpt_queue, inp_queue) {
225 		if (inp->inp_lport != divert->port)
226 			continue;
227 		if (inp->inp_divertfl == 0)
228 			break;
229 		if (dir == PF_IN && !(inp->inp_divertfl & IPPROTO_DIVERT_RESP))
230 			return (-1);
231 		if (dir == PF_OUT && !(inp->inp_divertfl & IPPROTO_DIVERT_INIT))
232 			return (-1);
233 		break;
234 	}
235 
236 	memset(&addr, 0, sizeof(addr));
237 	addr.sin_family = AF_INET;
238 	addr.sin_len = sizeof(addr);
239 
240 	if (dir == PF_IN) {
241 		struct ifaddr *ifa;
242 		struct ifnet *ifp;
243 
244 		ifp = m->m_pkthdr.rcvif;
245 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
246 			if (ifa->ifa_addr->sa_family != AF_INET)
247 				continue;
248 			addr.sin_addr.s_addr = ((struct sockaddr_in *)
249 			    ifa->ifa_addr)->sin_addr.s_addr;
250 			break;
251 		}
252 	}
253 	/* force checksum calculation */
254 	if (dir == PF_OUT)
255 		in_proto_cksum_out(m, NULL);
256 
257 	if (inp) {
258 		sa = inp->inp_socket;
259 		if (sbappendaddr(&sa->so_rcv, (struct sockaddr *)&addr,
260 		    m, NULL) == 0) {
261 			divstat.divs_fullsock++;
262 			m_freem(m);
263 			return (0);
264 		} else
265 			sorwakeup(inp->inp_socket);
266 	}
267 
268 	if (sa == NULL) {
269 		divstat.divs_noport++;
270 		m_freem(m);
271 	}
272 	return (0);
273 }
274 
275 /*ARGSUSED*/
276 int
277 divert_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *addr,
278     struct mbuf *control, struct proc *p)
279 {
280 	struct inpcb *inp = sotoinpcb(so);
281 	int error = 0;
282 	int s;
283 
284 	if (req == PRU_CONTROL) {
285 		return (in_control(so, (u_long)m, (caddr_t)addr,
286 		    (struct ifnet *)control));
287 	}
288 	if (inp == NULL && req != PRU_ATTACH) {
289 		error = EINVAL;
290 		goto release;
291 	}
292 	switch (req) {
293 
294 	case PRU_ATTACH:
295 		if (inp != NULL) {
296 			error = EINVAL;
297 			break;
298 		}
299 		if ((so->so_state & SS_PRIV) == 0) {
300 			error = EACCES;
301 			break;
302 		}
303 		s = splsoftnet();
304 		error = in_pcballoc(so, &divbtable);
305 		splx(s);
306 		if (error)
307 			break;
308 
309 		error = soreserve(so, divert_sendspace, divert_recvspace);
310 		if (error)
311 			break;
312 		sotoinpcb(so)->inp_flags |= INP_HDRINCL;
313 		break;
314 
315 	case PRU_DETACH:
316 		divert_detach(inp);
317 		break;
318 
319 	case PRU_BIND:
320 		s = splsoftnet();
321 		error = in_pcbbind(inp, addr, p);
322 		splx(s);
323 		break;
324 
325 	case PRU_SHUTDOWN:
326 		socantsendmore(so);
327 		break;
328 
329 	case PRU_SEND:
330 		return (divert_output(m, inp, addr, control));
331 
332 	case PRU_ABORT:
333 		soisdisconnected(so);
334 		divert_detach(inp);
335 		break;
336 
337 	case PRU_SOCKADDR:
338 		in_setsockaddr(inp, addr);
339 		break;
340 
341 	case PRU_PEERADDR:
342 		in_setpeeraddr(inp, addr);
343 		break;
344 
345 	case PRU_SENSE:
346 		return (0);
347 
348 	case PRU_LISTEN:
349 	case PRU_CONNECT:
350 	case PRU_CONNECT2:
351 	case PRU_ACCEPT:
352 	case PRU_DISCONNECT:
353 	case PRU_SENDOOB:
354 	case PRU_FASTTIMO:
355 	case PRU_SLOWTIMO:
356 	case PRU_PROTORCV:
357 	case PRU_PROTOSEND:
358 		error =  EOPNOTSUPP;
359 		break;
360 
361 	case PRU_RCVD:
362 	case PRU_RCVOOB:
363 		return (EOPNOTSUPP);	/* do not free mbuf's */
364 
365 	default:
366 		panic("divert_usrreq");
367 	}
368 
369 release:
370 	if (control) {
371 		m_freem(control);
372 	}
373 	if (m)
374 		m_freem(m);
375 	return (error);
376 }
377 
378 void
379 divert_detach(struct inpcb *inp)
380 {
381 	int s = splsoftnet();
382 
383 	in_pcbdetach(inp);
384 	splx(s);
385 }
386 
387 /*
388  * Sysctl for divert variables.
389  */
390 int
391 divert_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
392     size_t newlen)
393 {
394 	/* All sysctl names at this level are terminal. */
395 	if (namelen != 1)
396 		return (ENOTDIR);
397 
398 	switch (name[0]) {
399 	case DIVERTCTL_SENDSPACE:
400 		return (sysctl_int(oldp, oldlenp, newp, newlen,
401 		    &divert_sendspace));
402 	case DIVERTCTL_RECVSPACE:
403 		return (sysctl_int(oldp, oldlenp, newp, newlen,
404 		    &divert_recvspace));
405 	case DIVERTCTL_STATS:
406 		if (newp != NULL)
407 			return (EPERM);
408 		return (sysctl_struct(oldp, oldlenp, newp, newlen,
409 		    &divstat, sizeof(divstat)));
410 	default:
411 		if (name[0] < DIVERTCTL_MAXID)
412 			return sysctl_int_arr(divertctl_vars, name, namelen,
413 			    oldp, oldlenp, newp, newlen);
414 
415 		return (ENOPROTOOPT);
416 	}
417 	/* NOTREACHED */
418 }
419