1 /* $OpenBSD: ip6_divert.c,v 1.98 2025/01/23 12:51:51 bluhm Exp $ */ 2 3 /* 4 * Copyright (c) 2009 Michele Marchetto <michele@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/mbuf.h> 22 #include <sys/protosw.h> 23 #include <sys/socket.h> 24 #include <sys/socketvar.h> 25 #include <sys/sysctl.h> 26 27 #include <net/if.h> 28 #include <net/route.h> 29 #include <net/if_var.h> 30 #include <net/netisr.h> 31 32 #include <netinet/in.h> 33 #include <netinet6/in6_var.h> 34 #include <netinet/ip.h> 35 #include <netinet/ip_var.h> 36 #include <netinet/ip6.h> 37 #include <netinet6/ip6_var.h> 38 #include <netinet/in_pcb.h> 39 #include <netinet/ip_divert.h> 40 #include <netinet6/ip6_divert.h> 41 #include <netinet/tcp.h> 42 #include <netinet/udp.h> 43 #include <netinet/icmp6.h> 44 45 #include <net/pfvar.h> 46 47 /* 48 * Locks used to protect data: 49 * a atomic 50 */ 51 52 struct inpcbtable divb6table; 53 struct cpumem *div6counters; 54 55 #ifndef DIVERT_SENDSPACE 56 #define DIVERT_SENDSPACE (65536 + 100) 57 #endif 58 u_int divert6_sendspace = DIVERT_SENDSPACE; /* [a] */ 59 #ifndef DIVERT_RECVSPACE 60 #define DIVERT_RECVSPACE (65536 + 100) 61 #endif 62 u_int divert6_recvspace = DIVERT_RECVSPACE; /* [a] */ 63 64 #ifndef DIVERTHASHSIZE 65 #define DIVERTHASHSIZE 128 66 #endif 67 68 const struct sysctl_bounded_args divert6ctl_vars[] = { 69 { DIVERT6CTL_RECVSPACE, &divert6_recvspace, 0, INT_MAX }, 70 { DIVERT6CTL_SENDSPACE, &divert6_sendspace, 0, INT_MAX }, 71 }; 72 73 const struct pr_usrreqs divert6_usrreqs = { 74 .pru_attach = divert6_attach, 75 .pru_detach = divert_detach, 76 .pru_bind = divert_bind, 77 .pru_shutdown = divert_shutdown, 78 .pru_send = divert6_send, 79 .pru_control = in6_control, 80 .pru_sockaddr = in6_sockaddr, 81 .pru_peeraddr = in6_peeraddr, 82 }; 83 84 int divb6hashsize = DIVERTHASHSIZE; 85 86 int divert6_output(struct inpcb *, struct mbuf *, struct mbuf *, 87 struct mbuf *); 88 89 void 90 divert6_init(void) 91 { 92 in_pcbinit(&divb6table, divb6hashsize); 93 div6counters = counters_alloc(div6s_ncounters); 94 } 95 96 int 97 divert6_output(struct inpcb *inp, struct mbuf *m, struct mbuf *nam, 98 struct mbuf *control) 99 { 100 struct sockaddr_in6 *sin6; 101 int error, min_hdrlen, nxt, off, dir; 102 struct ip6_hdr *ip6; 103 104 m_freem(control); 105 106 if ((error = in6_nam2sin6(nam, &sin6))) 107 goto fail; 108 109 /* Do basic sanity checks. */ 110 if (m->m_pkthdr.len < sizeof(struct ip6_hdr)) 111 goto fail; 112 if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) { 113 /* m_pullup() has freed the mbuf, so just return. */ 114 div6stat_inc(div6s_errors); 115 return (ENOBUFS); 116 } 117 ip6 = mtod(m, struct ip6_hdr *); 118 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) 119 goto fail; 120 if (m->m_pkthdr.len < sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen)) 121 goto fail; 122 123 /* 124 * Recalculate the protocol checksum since the userspace application 125 * may have modified the packet prior to reinjection. 126 */ 127 off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt); 128 if (off < sizeof(struct ip6_hdr)) 129 goto fail; 130 131 dir = (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ? PF_OUT : PF_IN); 132 133 switch (nxt) { 134 case IPPROTO_TCP: 135 min_hdrlen = sizeof(struct tcphdr); 136 m->m_pkthdr.csum_flags |= M_TCP_CSUM_OUT; 137 break; 138 case IPPROTO_UDP: 139 min_hdrlen = sizeof(struct udphdr); 140 m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT; 141 break; 142 case IPPROTO_ICMPV6: 143 min_hdrlen = sizeof(struct icmp6_hdr); 144 m->m_pkthdr.csum_flags |= M_ICMP_CSUM_OUT; 145 break; 146 default: 147 min_hdrlen = 0; 148 break; 149 } 150 if (min_hdrlen && m->m_pkthdr.len < off + min_hdrlen) 151 goto fail; 152 153 m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED_PACKET; 154 155 if (dir == PF_IN) { 156 struct rtentry *rt; 157 struct ifnet *ifp; 158 159 rt = rtalloc(sin6tosa(sin6), 0, inp->inp_rtableid); 160 if (!rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_LOCAL)) { 161 rtfree(rt); 162 error = EADDRNOTAVAIL; 163 goto fail; 164 } 165 m->m_pkthdr.ph_ifidx = rt->rt_ifidx; 166 rtfree(rt); 167 168 /* 169 * Recalculate the protocol checksum for the inbound packet 170 * since the userspace application may have modified the packet 171 * prior to reinjection. 172 */ 173 in6_proto_cksum_out(m, NULL); 174 175 ifp = if_get(m->m_pkthdr.ph_ifidx); 176 if (ifp == NULL) { 177 error = ENETDOWN; 178 goto fail; 179 } 180 ipv6_input(ifp, m); 181 if_put(ifp); 182 } else { 183 m->m_pkthdr.ph_rtableid = inp->inp_rtableid; 184 185 error = ip6_output(m, NULL, &inp->inp_route, 186 IP_ALLOWBROADCAST | IP_RAWOUTPUT, NULL, NULL); 187 } 188 189 div6stat_inc(div6s_opackets); 190 return (error); 191 192 fail: 193 div6stat_inc(div6s_errors); 194 m_freem(m); 195 return (error ? error : EINVAL); 196 } 197 198 void 199 divert6_packet(struct mbuf *m, int dir, u_int16_t divert_port) 200 { 201 struct inpcb *inp = NULL; 202 struct socket *so; 203 struct sockaddr_in6 sin6; 204 205 div6stat_inc(div6s_ipackets); 206 207 if (m->m_len < sizeof(struct ip6_hdr) && 208 (m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) { 209 div6stat_inc(div6s_errors); 210 goto bad; 211 } 212 213 mtx_enter(&divb6table.inpt_mtx); 214 TAILQ_FOREACH(inp, &divb6table.inpt_queue, inp_queue) { 215 if (inp->inp_lport != divert_port) 216 continue; 217 in_pcbref(inp); 218 break; 219 } 220 mtx_leave(&divb6table.inpt_mtx); 221 if (inp == NULL) { 222 div6stat_inc(div6s_noport); 223 goto bad; 224 } 225 226 memset(&sin6, 0, sizeof(sin6)); 227 sin6.sin6_family = AF_INET6; 228 sin6.sin6_len = sizeof(sin6); 229 230 if (dir == PF_IN) { 231 struct ifaddr *ifa; 232 struct ifnet *ifp; 233 234 ifp = if_get(m->m_pkthdr.ph_ifidx); 235 if (ifp == NULL) { 236 div6stat_inc(div6s_errors); 237 goto bad; 238 } 239 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { 240 if (ifa->ifa_addr->sa_family != AF_INET6) 241 continue; 242 sin6.sin6_addr = satosin6(ifa->ifa_addr)->sin6_addr; 243 break; 244 } 245 if_put(ifp); 246 } else { 247 /* 248 * Calculate protocol checksum for outbound packet diverted 249 * to userland. pf out rule diverts before cksum offload. 250 */ 251 in6_proto_cksum_out(m, NULL); 252 } 253 254 so = inp->inp_socket; 255 mtx_enter(&so->so_rcv.sb_mtx); 256 if (sbappendaddr(so, &so->so_rcv, sin6tosa(&sin6), m, NULL) == 0) { 257 mtx_leave(&so->so_rcv.sb_mtx); 258 div6stat_inc(div6s_fullsock); 259 goto bad; 260 } 261 mtx_leave(&so->so_rcv.sb_mtx); 262 sorwakeup(so); 263 264 in_pcbunref(inp); 265 return; 266 267 bad: 268 if (inp != NULL) 269 in_pcbunref(inp); 270 m_freem(m); 271 } 272 273 int 274 divert6_attach(struct socket *so, int proto, int wait) 275 { 276 int error; 277 278 if (so->so_pcb != NULL) 279 return EINVAL; 280 if ((so->so_state & SS_PRIV) == 0) 281 return EACCES; 282 283 error = soreserve(so, atomic_load_int(&divert6_sendspace), 284 atomic_load_int(&divert6_recvspace)); 285 if (error) 286 return (error); 287 error = in_pcballoc(so, &divb6table, wait); 288 if (error) 289 return (error); 290 291 return (0); 292 } 293 294 int 295 divert6_send(struct socket *so, struct mbuf *m, struct mbuf *addr, 296 struct mbuf *control) 297 { 298 struct inpcb *inp = sotoinpcb(so); 299 300 soassertlocked(so); 301 return (divert6_output(inp, m, addr, control)); 302 } 303 304 int 305 divert6_sysctl_div6stat(void *oldp, size_t *oldlenp, void *newp) 306 { 307 uint64_t counters[div6s_ncounters]; 308 struct div6stat div6stat; 309 u_long *words = (u_long *)&div6stat; 310 int i; 311 312 CTASSERT(sizeof(div6stat) == (nitems(counters) * sizeof(u_long))); 313 314 counters_read(div6counters, counters, nitems(counters), NULL); 315 316 for (i = 0; i < nitems(counters); i++) 317 words[i] = (u_long)counters[i]; 318 319 return (sysctl_rdstruct(oldp, oldlenp, newp, 320 &div6stat, sizeof(div6stat))); 321 } 322 323 /* 324 * Sysctl for divert variables. 325 */ 326 int 327 divert6_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, 328 void *newp, size_t newlen) 329 { 330 /* All sysctl names at this level are terminal. */ 331 if (namelen != 1) 332 return (ENOTDIR); 333 334 switch (name[0]) { 335 case DIVERT6CTL_STATS: 336 return (divert6_sysctl_div6stat(oldp, oldlenp, newp)); 337 default: 338 return (sysctl_bounded_arr(divert6ctl_vars, 339 nitems(divert6ctl_vars), name, namelen, oldp, oldlenp, 340 newp, newlen)); 341 } 342 /* NOTREACHED */ 343 } 344