1 /* $OpenBSD: ip_divert.c,v 1.64 2020/11/16 06:38:20 gnezdo Exp $ */ 2 3 /* 4 * Copyright (c) 2009 Michele Marchetto <michele@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/mbuf.h> 22 #include <sys/protosw.h> 23 #include <sys/socket.h> 24 #include <sys/socketvar.h> 25 #include <sys/sysctl.h> 26 27 #include <net/if.h> 28 #include <net/route.h> 29 #include <net/if_var.h> 30 #include <net/netisr.h> 31 32 #include <netinet/in.h> 33 #include <netinet/in_var.h> 34 #include <netinet/ip.h> 35 #include <netinet/ip_var.h> 36 #include <netinet/in_pcb.h> 37 #include <netinet/ip_divert.h> 38 #include <netinet/tcp.h> 39 #include <netinet/udp.h> 40 #include <netinet/ip_icmp.h> 41 42 #include <net/pfvar.h> 43 44 struct inpcbtable divbtable; 45 struct cpumem *divcounters; 46 47 #ifndef DIVERT_SENDSPACE 48 #define DIVERT_SENDSPACE (65536 + 100) 49 #endif 50 u_int divert_sendspace = DIVERT_SENDSPACE; 51 #ifndef DIVERT_RECVSPACE 52 #define DIVERT_RECVSPACE (65536 + 100) 53 #endif 54 u_int divert_recvspace = DIVERT_RECVSPACE; 55 56 #ifndef DIVERTHASHSIZE 57 #define DIVERTHASHSIZE 128 58 #endif 59 60 const struct sysctl_bounded_args divertctl_vars[] = { 61 { DIVERTCTL_RECVSPACE, &divert_recvspace, 0, INT_MAX }, 62 { DIVERTCTL_SENDSPACE, &divert_sendspace, 0, INT_MAX }, 63 }; 64 65 int divbhashsize = DIVERTHASHSIZE; 66 67 int divert_output(struct inpcb *, struct mbuf *, struct mbuf *, 68 struct mbuf *); 69 void 70 divert_init(void) 71 { 72 in_pcbinit(&divbtable, divbhashsize); 73 divcounters = counters_alloc(divs_ncounters); 74 } 75 76 int 77 divert_output(struct inpcb *inp, struct mbuf *m, struct mbuf *nam, 78 struct mbuf *control) 79 { 80 struct sockaddr_in *sin; 81 int error, min_hdrlen, off, dir; 82 struct ip *ip; 83 84 m_freem(control); 85 86 if ((error = in_nam2sin(nam, &sin))) 87 goto fail; 88 89 /* Do basic sanity checks. */ 90 if (m->m_pkthdr.len < sizeof(struct ip)) 91 goto fail; 92 if ((m = m_pullup(m, sizeof(struct ip))) == NULL) { 93 /* m_pullup() has freed the mbuf, so just return. */ 94 divstat_inc(divs_errors); 95 return (ENOBUFS); 96 } 97 ip = mtod(m, struct ip *); 98 if (ip->ip_v != IPVERSION) 99 goto fail; 100 off = ip->ip_hl << 2; 101 if (off < sizeof(struct ip) || ntohs(ip->ip_len) < off || 102 m->m_pkthdr.len < ntohs(ip->ip_len)) 103 goto fail; 104 105 dir = (sin->sin_addr.s_addr == INADDR_ANY ? PF_OUT : PF_IN); 106 107 switch (ip->ip_p) { 108 case IPPROTO_TCP: 109 min_hdrlen = sizeof(struct tcphdr); 110 m->m_pkthdr.csum_flags |= M_TCP_CSUM_OUT; 111 break; 112 case IPPROTO_UDP: 113 min_hdrlen = sizeof(struct udphdr); 114 m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT; 115 break; 116 case IPPROTO_ICMP: 117 min_hdrlen = ICMP_MINLEN; 118 m->m_pkthdr.csum_flags |= M_ICMP_CSUM_OUT; 119 break; 120 default: 121 min_hdrlen = 0; 122 break; 123 } 124 if (min_hdrlen && m->m_pkthdr.len < off + min_hdrlen) 125 goto fail; 126 127 m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED_PACKET; 128 129 if (dir == PF_IN) { 130 struct rtentry *rt; 131 struct ifnet *ifp; 132 133 rt = rtalloc(sintosa(sin), 0, inp->inp_rtableid); 134 if (!rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_LOCAL)) { 135 rtfree(rt); 136 error = EADDRNOTAVAIL; 137 goto fail; 138 } 139 m->m_pkthdr.ph_ifidx = rt->rt_ifidx; 140 rtfree(rt); 141 142 /* 143 * Recalculate IP and protocol checksums for the inbound packet 144 * since the userspace application may have modified the packet 145 * prior to reinjection. 146 */ 147 ip->ip_sum = 0; 148 ip->ip_sum = in_cksum(m, off); 149 in_proto_cksum_out(m, NULL); 150 151 ifp = if_get(m->m_pkthdr.ph_ifidx); 152 if (ifp == NULL) { 153 error = ENETDOWN; 154 goto fail; 155 } 156 ipv4_input(ifp, m); 157 if_put(ifp); 158 } else { 159 m->m_pkthdr.ph_rtableid = inp->inp_rtableid; 160 161 error = ip_output(m, NULL, &inp->inp_route, 162 IP_ALLOWBROADCAST | IP_RAWOUTPUT, NULL, NULL, 0); 163 } 164 165 divstat_inc(divs_opackets); 166 return (error); 167 168 fail: 169 m_freem(m); 170 divstat_inc(divs_errors); 171 return (error ? error : EINVAL); 172 } 173 174 int 175 divert_packet(struct mbuf *m, int dir, u_int16_t divert_port) 176 { 177 struct inpcb *inp; 178 struct socket *sa = NULL; 179 struct sockaddr_in addr; 180 181 inp = NULL; 182 divstat_inc(divs_ipackets); 183 184 if (m->m_len < sizeof(struct ip) && 185 (m = m_pullup(m, sizeof(struct ip))) == NULL) { 186 divstat_inc(divs_errors); 187 return (0); 188 } 189 190 TAILQ_FOREACH(inp, &divbtable.inpt_queue, inp_queue) { 191 if (inp->inp_lport == divert_port) 192 break; 193 } 194 195 memset(&addr, 0, sizeof(addr)); 196 addr.sin_family = AF_INET; 197 addr.sin_len = sizeof(addr); 198 199 if (dir == PF_IN) { 200 struct ifaddr *ifa; 201 struct ifnet *ifp; 202 203 ifp = if_get(m->m_pkthdr.ph_ifidx); 204 if (ifp == NULL) { 205 m_freem(m); 206 return (0); 207 } 208 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { 209 if (ifa->ifa_addr->sa_family != AF_INET) 210 continue; 211 addr.sin_addr.s_addr = satosin( 212 ifa->ifa_addr)->sin_addr.s_addr; 213 break; 214 } 215 if_put(ifp); 216 } 217 218 if (inp) { 219 sa = inp->inp_socket; 220 if (sbappendaddr(sa, &sa->so_rcv, sintosa(&addr), m, NULL) == 0) { 221 divstat_inc(divs_fullsock); 222 m_freem(m); 223 return (0); 224 } else { 225 KERNEL_LOCK(); 226 sorwakeup(inp->inp_socket); 227 KERNEL_UNLOCK(); 228 } 229 } 230 231 if (sa == NULL) { 232 divstat_inc(divs_noport); 233 m_freem(m); 234 } 235 return (0); 236 } 237 238 /*ARGSUSED*/ 239 int 240 divert_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *addr, 241 struct mbuf *control, struct proc *p) 242 { 243 struct inpcb *inp = sotoinpcb(so); 244 int error = 0; 245 246 if (req == PRU_CONTROL) { 247 return (in_control(so, (u_long)m, (caddr_t)addr, 248 (struct ifnet *)control)); 249 } 250 251 soassertlocked(so); 252 253 if (inp == NULL) { 254 error = EINVAL; 255 goto release; 256 } 257 switch (req) { 258 259 case PRU_BIND: 260 error = in_pcbbind(inp, addr, p); 261 break; 262 263 case PRU_SHUTDOWN: 264 socantsendmore(so); 265 break; 266 267 case PRU_SEND: 268 return (divert_output(inp, m, addr, control)); 269 270 case PRU_ABORT: 271 soisdisconnected(so); 272 in_pcbdetach(inp); 273 break; 274 275 case PRU_SOCKADDR: 276 in_setsockaddr(inp, addr); 277 break; 278 279 case PRU_PEERADDR: 280 in_setpeeraddr(inp, addr); 281 break; 282 283 case PRU_SENSE: 284 break; 285 286 case PRU_LISTEN: 287 case PRU_CONNECT: 288 case PRU_CONNECT2: 289 case PRU_ACCEPT: 290 case PRU_DISCONNECT: 291 case PRU_SENDOOB: 292 case PRU_FASTTIMO: 293 case PRU_SLOWTIMO: 294 case PRU_PROTORCV: 295 case PRU_PROTOSEND: 296 case PRU_RCVD: 297 case PRU_RCVOOB: 298 error = EOPNOTSUPP; 299 break; 300 301 default: 302 panic("divert_usrreq"); 303 } 304 305 release: 306 if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) { 307 m_freem(control); 308 m_freem(m); 309 } 310 return (error); 311 } 312 313 int 314 divert_attach(struct socket *so, int proto) 315 { 316 int error; 317 318 if (so->so_pcb != NULL) 319 return EINVAL; 320 if ((so->so_state & SS_PRIV) == 0) 321 return EACCES; 322 323 error = in_pcballoc(so, &divbtable); 324 if (error) 325 return error; 326 327 error = soreserve(so, divert_sendspace, divert_recvspace); 328 if (error) 329 return error; 330 331 sotoinpcb(so)->inp_flags |= INP_HDRINCL; 332 return (0); 333 } 334 335 int 336 divert_detach(struct socket *so) 337 { 338 struct inpcb *inp = sotoinpcb(so); 339 340 soassertlocked(so); 341 342 if (inp == NULL) 343 return (EINVAL); 344 345 in_pcbdetach(inp); 346 return (0); 347 } 348 349 int 350 divert_sysctl_divstat(void *oldp, size_t *oldlenp, void *newp) 351 { 352 uint64_t counters[divs_ncounters]; 353 struct divstat divstat; 354 u_long *words = (u_long *)&divstat; 355 int i; 356 357 CTASSERT(sizeof(divstat) == (nitems(counters) * sizeof(u_long))); 358 memset(&divstat, 0, sizeof divstat); 359 counters_read(divcounters, counters, nitems(counters)); 360 361 for (i = 0; i < nitems(counters); i++) 362 words[i] = (u_long)counters[i]; 363 364 return (sysctl_rdstruct(oldp, oldlenp, newp, 365 &divstat, sizeof(divstat))); 366 } 367 368 /* 369 * Sysctl for divert variables. 370 */ 371 int 372 divert_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 373 size_t newlen) 374 { 375 int error; 376 377 /* All sysctl names at this level are terminal. */ 378 if (namelen != 1) 379 return (ENOTDIR); 380 381 switch (name[0]) { 382 case DIVERTCTL_STATS: 383 return (divert_sysctl_divstat(oldp, oldlenp, newp)); 384 default: 385 NET_LOCK(); 386 error = sysctl_bounded_arr(divertctl_vars, 387 nitems(divertctl_vars), name, namelen, oldp, oldlenp, newp, 388 newlen); 389 NET_UNLOCK(); 390 return (error); 391 } 392 /* NOTREACHED */ 393 } 394