1 /* $OpenBSD: ip_divert.c,v 1.95 2024/03/05 09:45:13 bluhm Exp $ */
2
3 /*
4 * Copyright (c) 2009 Michele Marchetto <michele@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/mbuf.h>
22 #include <sys/protosw.h>
23 #include <sys/socket.h>
24 #include <sys/socketvar.h>
25 #include <sys/sysctl.h>
26
27 #include <net/if.h>
28 #include <net/route.h>
29 #include <net/if_var.h>
30 #include <net/netisr.h>
31
32 #include <netinet/in.h>
33 #include <netinet/in_var.h>
34 #include <netinet/ip.h>
35 #include <netinet/ip_var.h>
36 #include <netinet/in_pcb.h>
37 #include <netinet/ip_divert.h>
38 #include <netinet/tcp.h>
39 #include <netinet/udp.h>
40 #include <netinet/ip_icmp.h>
41
42 #include <net/pfvar.h>
43
44 struct inpcbtable divbtable;
45 struct cpumem *divcounters;
46
47 #ifndef DIVERT_SENDSPACE
48 #define DIVERT_SENDSPACE (65536 + 100)
49 #endif
50 u_int divert_sendspace = DIVERT_SENDSPACE;
51 #ifndef DIVERT_RECVSPACE
52 #define DIVERT_RECVSPACE (65536 + 100)
53 #endif
54 u_int divert_recvspace = DIVERT_RECVSPACE;
55
56 #ifndef DIVERTHASHSIZE
57 #define DIVERTHASHSIZE 128
58 #endif
59
60 const struct sysctl_bounded_args divertctl_vars[] = {
61 { DIVERTCTL_RECVSPACE, &divert_recvspace, 0, INT_MAX },
62 { DIVERTCTL_SENDSPACE, &divert_sendspace, 0, INT_MAX },
63 };
64
65 const struct pr_usrreqs divert_usrreqs = {
66 .pru_attach = divert_attach,
67 .pru_detach = divert_detach,
68 .pru_lock = divert_lock,
69 .pru_unlock = divert_unlock,
70 .pru_locked = divert_locked,
71 .pru_bind = divert_bind,
72 .pru_shutdown = divert_shutdown,
73 .pru_send = divert_send,
74 .pru_control = in_control,
75 .pru_sockaddr = in_sockaddr,
76 .pru_peeraddr = in_peeraddr,
77 };
78
79 int divbhashsize = DIVERTHASHSIZE;
80
81 int divert_output(struct inpcb *, struct mbuf *, struct mbuf *,
82 struct mbuf *);
83 void
divert_init(void)84 divert_init(void)
85 {
86 in_pcbinit(&divbtable, divbhashsize);
87 divcounters = counters_alloc(divs_ncounters);
88 }
89
90 int
divert_output(struct inpcb * inp,struct mbuf * m,struct mbuf * nam,struct mbuf * control)91 divert_output(struct inpcb *inp, struct mbuf *m, struct mbuf *nam,
92 struct mbuf *control)
93 {
94 struct sockaddr_in *sin;
95 int error, min_hdrlen, off, dir;
96 struct ip *ip;
97
98 m_freem(control);
99
100 if ((error = in_nam2sin(nam, &sin)))
101 goto fail;
102
103 if (m->m_pkthdr.len > IP_MAXPACKET) {
104 error = EMSGSIZE;
105 goto fail;
106 }
107
108 m = rip_chkhdr(m, NULL);
109 if (m == NULL) {
110 error = EINVAL;
111 goto fail;
112 }
113
114 ip = mtod(m, struct ip *);
115 off = ip->ip_hl << 2;
116
117 dir = (sin->sin_addr.s_addr == INADDR_ANY ? PF_OUT : PF_IN);
118
119 switch (ip->ip_p) {
120 case IPPROTO_TCP:
121 min_hdrlen = sizeof(struct tcphdr);
122 m->m_pkthdr.csum_flags |= M_TCP_CSUM_OUT;
123 break;
124 case IPPROTO_UDP:
125 min_hdrlen = sizeof(struct udphdr);
126 m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT;
127 break;
128 case IPPROTO_ICMP:
129 min_hdrlen = ICMP_MINLEN;
130 m->m_pkthdr.csum_flags |= M_ICMP_CSUM_OUT;
131 break;
132 default:
133 min_hdrlen = 0;
134 break;
135 }
136 if (min_hdrlen && m->m_pkthdr.len < off + min_hdrlen) {
137 error = EINVAL;
138 goto fail;
139 }
140
141 m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED_PACKET;
142
143 if (dir == PF_IN) {
144 struct rtentry *rt;
145 struct ifnet *ifp;
146
147 rt = rtalloc(sintosa(sin), 0, inp->inp_rtableid);
148 if (!rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_LOCAL)) {
149 rtfree(rt);
150 error = EADDRNOTAVAIL;
151 goto fail;
152 }
153 m->m_pkthdr.ph_ifidx = rt->rt_ifidx;
154 rtfree(rt);
155
156 /*
157 * Recalculate IP and protocol checksums for the inbound packet
158 * since the userspace application may have modified the packet
159 * prior to reinjection.
160 */
161 in_hdr_cksum_out(m, NULL);
162 in_proto_cksum_out(m, NULL);
163
164 ifp = if_get(m->m_pkthdr.ph_ifidx);
165 if (ifp == NULL) {
166 error = ENETDOWN;
167 goto fail;
168 }
169 ipv4_input(ifp, m);
170 if_put(ifp);
171 } else {
172 m->m_pkthdr.ph_rtableid = inp->inp_rtableid;
173
174 error = ip_output(m, NULL, &inp->inp_route,
175 IP_ALLOWBROADCAST | IP_RAWOUTPUT, NULL, NULL, 0);
176 }
177
178 divstat_inc(divs_opackets);
179 return (error);
180
181 fail:
182 m_freem(m);
183 divstat_inc(divs_errors);
184 return (error);
185 }
186
187 void
divert_packet(struct mbuf * m,int dir,u_int16_t divert_port)188 divert_packet(struct mbuf *m, int dir, u_int16_t divert_port)
189 {
190 struct inpcb *inp = NULL;
191 struct socket *so;
192 struct sockaddr_in sin;
193
194 divstat_inc(divs_ipackets);
195
196 if (m->m_len < sizeof(struct ip) &&
197 (m = m_pullup(m, sizeof(struct ip))) == NULL) {
198 divstat_inc(divs_errors);
199 goto bad;
200 }
201
202 mtx_enter(&divbtable.inpt_mtx);
203 TAILQ_FOREACH(inp, &divbtable.inpt_queue, inp_queue) {
204 if (inp->inp_lport != divert_port)
205 continue;
206 in_pcbref(inp);
207 break;
208 }
209 mtx_leave(&divbtable.inpt_mtx);
210 if (inp == NULL) {
211 divstat_inc(divs_noport);
212 goto bad;
213 }
214
215 memset(&sin, 0, sizeof(sin));
216 sin.sin_family = AF_INET;
217 sin.sin_len = sizeof(sin);
218
219 if (dir == PF_IN) {
220 struct ifaddr *ifa;
221 struct ifnet *ifp;
222
223 ifp = if_get(m->m_pkthdr.ph_ifidx);
224 if (ifp == NULL) {
225 divstat_inc(divs_errors);
226 goto bad;
227 }
228 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
229 if (ifa->ifa_addr->sa_family != AF_INET)
230 continue;
231 sin.sin_addr = satosin(ifa->ifa_addr)->sin_addr;
232 break;
233 }
234 if_put(ifp);
235 } else {
236 /*
237 * Calculate IP and protocol checksums for outbound packet
238 * diverted to userland. pf rule diverts before cksum offload.
239 */
240 in_hdr_cksum_out(m, NULL);
241 in_proto_cksum_out(m, NULL);
242 }
243
244 so = inp->inp_socket;
245 mtx_enter(&so->so_rcv.sb_mtx);
246 if (sbappendaddr(so, &so->so_rcv, sintosa(&sin), m, NULL) == 0) {
247 mtx_leave(&so->so_rcv.sb_mtx);
248 divstat_inc(divs_fullsock);
249 goto bad;
250 }
251 mtx_leave(&so->so_rcv.sb_mtx);
252 sorwakeup(so);
253
254 in_pcbunref(inp);
255 return;
256
257 bad:
258 if (inp != NULL)
259 in_pcbunref(inp);
260 m_freem(m);
261 }
262
263 int
divert_attach(struct socket * so,int proto,int wait)264 divert_attach(struct socket *so, int proto, int wait)
265 {
266 int error;
267
268 if (so->so_pcb != NULL)
269 return EINVAL;
270 if ((so->so_state & SS_PRIV) == 0)
271 return EACCES;
272
273 error = in_pcballoc(so, &divbtable, wait);
274 if (error)
275 return error;
276
277 error = soreserve(so, divert_sendspace, divert_recvspace);
278 if (error)
279 return error;
280
281 sotoinpcb(so)->inp_flags |= INP_HDRINCL;
282 return (0);
283 }
284
285 int
divert_detach(struct socket * so)286 divert_detach(struct socket *so)
287 {
288 struct inpcb *inp = sotoinpcb(so);
289
290 soassertlocked(so);
291
292 if (inp == NULL)
293 return (EINVAL);
294
295 in_pcbdetach(inp);
296 return (0);
297 }
298
299 void
divert_lock(struct socket * so)300 divert_lock(struct socket *so)
301 {
302 struct inpcb *inp = sotoinpcb(so);
303
304 NET_ASSERT_LOCKED();
305 mtx_enter(&inp->inp_mtx);
306 }
307
308 void
divert_unlock(struct socket * so)309 divert_unlock(struct socket *so)
310 {
311 struct inpcb *inp = sotoinpcb(so);
312
313 NET_ASSERT_LOCKED();
314 mtx_leave(&inp->inp_mtx);
315 }
316
317 int
divert_locked(struct socket * so)318 divert_locked(struct socket *so)
319 {
320 struct inpcb *inp = sotoinpcb(so);
321
322 return mtx_owned(&inp->inp_mtx);
323 }
324
325 int
divert_bind(struct socket * so,struct mbuf * addr,struct proc * p)326 divert_bind(struct socket *so, struct mbuf *addr, struct proc *p)
327 {
328 struct inpcb *inp = sotoinpcb(so);
329
330 soassertlocked(so);
331 return in_pcbbind(inp, addr, p);
332 }
333
334 int
divert_shutdown(struct socket * so)335 divert_shutdown(struct socket *so)
336 {
337 soassertlocked(so);
338 socantsendmore(so);
339 return (0);
340 }
341
342 int
divert_send(struct socket * so,struct mbuf * m,struct mbuf * addr,struct mbuf * control)343 divert_send(struct socket *so, struct mbuf *m, struct mbuf *addr,
344 struct mbuf *control)
345 {
346 struct inpcb *inp = sotoinpcb(so);
347
348 soassertlocked(so);
349 return (divert_output(inp, m, addr, control));
350 }
351
352 int
divert_sysctl_divstat(void * oldp,size_t * oldlenp,void * newp)353 divert_sysctl_divstat(void *oldp, size_t *oldlenp, void *newp)
354 {
355 uint64_t counters[divs_ncounters];
356 struct divstat divstat;
357 u_long *words = (u_long *)&divstat;
358 int i;
359
360 CTASSERT(sizeof(divstat) == (nitems(counters) * sizeof(u_long)));
361 memset(&divstat, 0, sizeof divstat);
362 counters_read(divcounters, counters, nitems(counters), NULL);
363
364 for (i = 0; i < nitems(counters); i++)
365 words[i] = (u_long)counters[i];
366
367 return (sysctl_rdstruct(oldp, oldlenp, newp,
368 &divstat, sizeof(divstat)));
369 }
370
371 /*
372 * Sysctl for divert variables.
373 */
374 int
divert_sysctl(int * name,u_int namelen,void * oldp,size_t * oldlenp,void * newp,size_t newlen)375 divert_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
376 size_t newlen)
377 {
378 int error;
379
380 /* All sysctl names at this level are terminal. */
381 if (namelen != 1)
382 return (ENOTDIR);
383
384 switch (name[0]) {
385 case DIVERTCTL_STATS:
386 return (divert_sysctl_divstat(oldp, oldlenp, newp));
387 default:
388 NET_LOCK();
389 error = sysctl_bounded_arr(divertctl_vars,
390 nitems(divertctl_vars), name, namelen, oldp, oldlenp, newp,
391 newlen);
392 NET_UNLOCK();
393 return (error);
394 }
395 /* NOTREACHED */
396 }
397