1 /* $OpenBSD: ip6_divert.c,v 1.98 2025/01/23 12:51:51 bluhm Exp $ */
2
3 /*
4 * Copyright (c) 2009 Michele Marchetto <michele@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/mbuf.h>
22 #include <sys/protosw.h>
23 #include <sys/socket.h>
24 #include <sys/socketvar.h>
25 #include <sys/sysctl.h>
26
27 #include <net/if.h>
28 #include <net/route.h>
29 #include <net/if_var.h>
30 #include <net/netisr.h>
31
32 #include <netinet/in.h>
33 #include <netinet6/in6_var.h>
34 #include <netinet/ip.h>
35 #include <netinet/ip_var.h>
36 #include <netinet/ip6.h>
37 #include <netinet6/ip6_var.h>
38 #include <netinet/in_pcb.h>
39 #include <netinet/ip_divert.h>
40 #include <netinet6/ip6_divert.h>
41 #include <netinet/tcp.h>
42 #include <netinet/udp.h>
43 #include <netinet/icmp6.h>
44
45 #include <net/pfvar.h>
46
47 /*
48 * Locks used to protect data:
49 * a atomic
50 */
51
52 struct inpcbtable divb6table;
53 struct cpumem *div6counters;
54
55 #ifndef DIVERT_SENDSPACE
56 #define DIVERT_SENDSPACE (65536 + 100)
57 #endif
58 u_int divert6_sendspace = DIVERT_SENDSPACE; /* [a] */
59 #ifndef DIVERT_RECVSPACE
60 #define DIVERT_RECVSPACE (65536 + 100)
61 #endif
62 u_int divert6_recvspace = DIVERT_RECVSPACE; /* [a] */
63
64 #ifndef DIVERTHASHSIZE
65 #define DIVERTHASHSIZE 128
66 #endif
67
68 const struct sysctl_bounded_args divert6ctl_vars[] = {
69 { DIVERT6CTL_RECVSPACE, &divert6_recvspace, 0, INT_MAX },
70 { DIVERT6CTL_SENDSPACE, &divert6_sendspace, 0, INT_MAX },
71 };
72
73 const struct pr_usrreqs divert6_usrreqs = {
74 .pru_attach = divert6_attach,
75 .pru_detach = divert_detach,
76 .pru_bind = divert_bind,
77 .pru_shutdown = divert_shutdown,
78 .pru_send = divert6_send,
79 .pru_control = in6_control,
80 .pru_sockaddr = in6_sockaddr,
81 .pru_peeraddr = in6_peeraddr,
82 };
83
84 int divb6hashsize = DIVERTHASHSIZE;
85
86 int divert6_output(struct inpcb *, struct mbuf *, struct mbuf *,
87 struct mbuf *);
88
89 void
divert6_init(void)90 divert6_init(void)
91 {
92 in_pcbinit(&divb6table, divb6hashsize);
93 div6counters = counters_alloc(div6s_ncounters);
94 }
95
96 int
divert6_output(struct inpcb * inp,struct mbuf * m,struct mbuf * nam,struct mbuf * control)97 divert6_output(struct inpcb *inp, struct mbuf *m, struct mbuf *nam,
98 struct mbuf *control)
99 {
100 struct sockaddr_in6 *sin6;
101 int error, min_hdrlen, nxt, off, dir;
102 struct ip6_hdr *ip6;
103
104 m_freem(control);
105
106 if ((error = in6_nam2sin6(nam, &sin6)))
107 goto fail;
108
109 /* Do basic sanity checks. */
110 if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
111 goto fail;
112 if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
113 /* m_pullup() has freed the mbuf, so just return. */
114 div6stat_inc(div6s_errors);
115 return (ENOBUFS);
116 }
117 ip6 = mtod(m, struct ip6_hdr *);
118 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION)
119 goto fail;
120 if (m->m_pkthdr.len < sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen))
121 goto fail;
122
123 /*
124 * Recalculate the protocol checksum since the userspace application
125 * may have modified the packet prior to reinjection.
126 */
127 off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
128 if (off < sizeof(struct ip6_hdr))
129 goto fail;
130
131 dir = (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ? PF_OUT : PF_IN);
132
133 switch (nxt) {
134 case IPPROTO_TCP:
135 min_hdrlen = sizeof(struct tcphdr);
136 m->m_pkthdr.csum_flags |= M_TCP_CSUM_OUT;
137 break;
138 case IPPROTO_UDP:
139 min_hdrlen = sizeof(struct udphdr);
140 m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT;
141 break;
142 case IPPROTO_ICMPV6:
143 min_hdrlen = sizeof(struct icmp6_hdr);
144 m->m_pkthdr.csum_flags |= M_ICMP_CSUM_OUT;
145 break;
146 default:
147 min_hdrlen = 0;
148 break;
149 }
150 if (min_hdrlen && m->m_pkthdr.len < off + min_hdrlen)
151 goto fail;
152
153 m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED_PACKET;
154
155 if (dir == PF_IN) {
156 struct rtentry *rt;
157 struct ifnet *ifp;
158
159 rt = rtalloc(sin6tosa(sin6), 0, inp->inp_rtableid);
160 if (!rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_LOCAL)) {
161 rtfree(rt);
162 error = EADDRNOTAVAIL;
163 goto fail;
164 }
165 m->m_pkthdr.ph_ifidx = rt->rt_ifidx;
166 rtfree(rt);
167
168 /*
169 * Recalculate the protocol checksum for the inbound packet
170 * since the userspace application may have modified the packet
171 * prior to reinjection.
172 */
173 in6_proto_cksum_out(m, NULL);
174
175 ifp = if_get(m->m_pkthdr.ph_ifidx);
176 if (ifp == NULL) {
177 error = ENETDOWN;
178 goto fail;
179 }
180 ipv6_input(ifp, m);
181 if_put(ifp);
182 } else {
183 m->m_pkthdr.ph_rtableid = inp->inp_rtableid;
184
185 error = ip6_output(m, NULL, &inp->inp_route,
186 IP_ALLOWBROADCAST | IP_RAWOUTPUT, NULL, NULL);
187 }
188
189 div6stat_inc(div6s_opackets);
190 return (error);
191
192 fail:
193 div6stat_inc(div6s_errors);
194 m_freem(m);
195 return (error ? error : EINVAL);
196 }
197
198 void
divert6_packet(struct mbuf * m,int dir,u_int16_t divert_port)199 divert6_packet(struct mbuf *m, int dir, u_int16_t divert_port)
200 {
201 struct inpcb *inp = NULL;
202 struct socket *so;
203 struct sockaddr_in6 sin6;
204
205 div6stat_inc(div6s_ipackets);
206
207 if (m->m_len < sizeof(struct ip6_hdr) &&
208 (m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
209 div6stat_inc(div6s_errors);
210 goto bad;
211 }
212
213 mtx_enter(&divb6table.inpt_mtx);
214 TAILQ_FOREACH(inp, &divb6table.inpt_queue, inp_queue) {
215 if (inp->inp_lport != divert_port)
216 continue;
217 in_pcbref(inp);
218 break;
219 }
220 mtx_leave(&divb6table.inpt_mtx);
221 if (inp == NULL) {
222 div6stat_inc(div6s_noport);
223 goto bad;
224 }
225
226 memset(&sin6, 0, sizeof(sin6));
227 sin6.sin6_family = AF_INET6;
228 sin6.sin6_len = sizeof(sin6);
229
230 if (dir == PF_IN) {
231 struct ifaddr *ifa;
232 struct ifnet *ifp;
233
234 ifp = if_get(m->m_pkthdr.ph_ifidx);
235 if (ifp == NULL) {
236 div6stat_inc(div6s_errors);
237 goto bad;
238 }
239 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
240 if (ifa->ifa_addr->sa_family != AF_INET6)
241 continue;
242 sin6.sin6_addr = satosin6(ifa->ifa_addr)->sin6_addr;
243 break;
244 }
245 if_put(ifp);
246 } else {
247 /*
248 * Calculate protocol checksum for outbound packet diverted
249 * to userland. pf out rule diverts before cksum offload.
250 */
251 in6_proto_cksum_out(m, NULL);
252 }
253
254 so = inp->inp_socket;
255 mtx_enter(&so->so_rcv.sb_mtx);
256 if (sbappendaddr(so, &so->so_rcv, sin6tosa(&sin6), m, NULL) == 0) {
257 mtx_leave(&so->so_rcv.sb_mtx);
258 div6stat_inc(div6s_fullsock);
259 goto bad;
260 }
261 mtx_leave(&so->so_rcv.sb_mtx);
262 sorwakeup(so);
263
264 in_pcbunref(inp);
265 return;
266
267 bad:
268 if (inp != NULL)
269 in_pcbunref(inp);
270 m_freem(m);
271 }
272
273 int
divert6_attach(struct socket * so,int proto,int wait)274 divert6_attach(struct socket *so, int proto, int wait)
275 {
276 int error;
277
278 if (so->so_pcb != NULL)
279 return EINVAL;
280 if ((so->so_state & SS_PRIV) == 0)
281 return EACCES;
282
283 error = soreserve(so, atomic_load_int(&divert6_sendspace),
284 atomic_load_int(&divert6_recvspace));
285 if (error)
286 return (error);
287 error = in_pcballoc(so, &divb6table, wait);
288 if (error)
289 return (error);
290
291 return (0);
292 }
293
294 int
divert6_send(struct socket * so,struct mbuf * m,struct mbuf * addr,struct mbuf * control)295 divert6_send(struct socket *so, struct mbuf *m, struct mbuf *addr,
296 struct mbuf *control)
297 {
298 struct inpcb *inp = sotoinpcb(so);
299
300 soassertlocked(so);
301 return (divert6_output(inp, m, addr, control));
302 }
303
304 int
divert6_sysctl_div6stat(void * oldp,size_t * oldlenp,void * newp)305 divert6_sysctl_div6stat(void *oldp, size_t *oldlenp, void *newp)
306 {
307 uint64_t counters[div6s_ncounters];
308 struct div6stat div6stat;
309 u_long *words = (u_long *)&div6stat;
310 int i;
311
312 CTASSERT(sizeof(div6stat) == (nitems(counters) * sizeof(u_long)));
313
314 counters_read(div6counters, counters, nitems(counters), NULL);
315
316 for (i = 0; i < nitems(counters); i++)
317 words[i] = (u_long)counters[i];
318
319 return (sysctl_rdstruct(oldp, oldlenp, newp,
320 &div6stat, sizeof(div6stat)));
321 }
322
323 /*
324 * Sysctl for divert variables.
325 */
326 int
divert6_sysctl(int * name,u_int namelen,void * oldp,size_t * oldlenp,void * newp,size_t newlen)327 divert6_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
328 void *newp, size_t newlen)
329 {
330 /* All sysctl names at this level are terminal. */
331 if (namelen != 1)
332 return (ENOTDIR);
333
334 switch (name[0]) {
335 case DIVERT6CTL_STATS:
336 return (divert6_sysctl_div6stat(oldp, oldlenp, newp));
337 default:
338 return (sysctl_bounded_arr(divert6ctl_vars,
339 nitems(divert6ctl_vars), name, namelen, oldp, oldlenp,
340 newp, newlen));
341 }
342 /* NOTREACHED */
343 }
344