1 /* $OpenBSD: raw_ip.c,v 1.156 2024/02/11 18:14:26 mvs Exp $ */ 2 /* $NetBSD: raw_ip.c,v 1.25 1996/02/18 18:58:33 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 33 * 34 * NRL grants permission for redistribution and use in source and binary 35 * forms, with or without modification, of the software and documentation 36 * created at NRL provided that the following conditions are met: 37 * 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgements: 45 * This product includes software developed by the University of 46 * California, Berkeley and its contributors. 47 * This product includes software developed at the Information 48 * Technology Division, US Naval Research Laboratory. 49 * 4. Neither the name of the NRL nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 * 65 * The views and conclusions contained in the software and documentation 66 * are those of the authors and should not be interpreted as representing 67 * official policies, either expressed or implied, of the US Naval 68 * Research Laboratory (NRL). 69 */ 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/mbuf.h> 74 #include <sys/socket.h> 75 #include <sys/protosw.h> 76 #include <sys/socketvar.h> 77 78 #include <net/if.h> 79 #include <net/if_var.h> 80 #include <net/route.h> 81 82 #include <netinet/in.h> 83 #include <netinet/ip.h> 84 #include <netinet/ip_mroute.h> 85 #include <netinet/ip_var.h> 86 #include <netinet/in_pcb.h> 87 #include <netinet/in_var.h> 88 #include <netinet/ip_icmp.h> 89 90 #include <net/pfvar.h> 91 92 #include "pf.h" 93 94 struct inpcbtable rawcbtable; 95 96 /* 97 * Nominal space allocated to a raw ip socket. 98 */ 99 #define RIPSNDQ 8192 100 #define RIPRCVQ 8192 101 102 /* 103 * Raw interface to IP protocol. 104 */ 105 106 const struct pr_usrreqs rip_usrreqs = { 107 .pru_attach = rip_attach, 108 .pru_detach = rip_detach, 109 .pru_lock = rip_lock, 110 .pru_unlock = rip_unlock, 111 .pru_locked = rip_locked, 112 .pru_bind = rip_bind, 113 .pru_connect = rip_connect, 114 .pru_disconnect = rip_disconnect, 115 .pru_shutdown = rip_shutdown, 116 .pru_send = rip_send, 117 .pru_control = in_control, 118 .pru_sockaddr = in_sockaddr, 119 .pru_peeraddr = in_peeraddr, 120 }; 121 122 /* 123 * Initialize raw connection block q. 124 */ 125 void 126 rip_init(void) 127 { 128 in_pcbinit(&rawcbtable, 1); 129 } 130 131 struct mbuf *rip_chkhdr(struct mbuf *, struct mbuf *); 132 133 int 134 rip_input(struct mbuf **mp, int *offp, int proto, int af) 135 { 136 struct mbuf *m = *mp; 137 struct ip *ip = mtod(m, struct ip *); 138 struct inpcb *inp; 139 SIMPLEQ_HEAD(, inpcb) inpcblist; 140 struct in_addr *key; 141 struct counters_ref ref; 142 uint64_t *counters; 143 struct sockaddr_in ripsrc; 144 145 KASSERT(af == AF_INET); 146 147 memset(&ripsrc, 0, sizeof(ripsrc)); 148 ripsrc.sin_family = AF_INET; 149 ripsrc.sin_len = sizeof(ripsrc); 150 ripsrc.sin_addr = ip->ip_src; 151 152 key = &ip->ip_dst; 153 #if NPF > 0 154 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) { 155 struct pf_divert *divert; 156 157 divert = pf_find_divert(m); 158 KASSERT(divert != NULL); 159 switch (divert->type) { 160 case PF_DIVERT_TO: 161 key = &divert->addr.v4; 162 break; 163 case PF_DIVERT_REPLY: 164 break; 165 default: 166 panic("%s: unknown divert type %d, mbuf %p, divert %p", 167 __func__, divert->type, m, divert); 168 } 169 } 170 #endif 171 SIMPLEQ_INIT(&inpcblist); 172 rw_enter_write(&rawcbtable.inpt_notify); 173 mtx_enter(&rawcbtable.inpt_mtx); 174 TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) { 175 KASSERT(!ISSET(inp->inp_flags, INP_IPV6)); 176 177 if (inp->inp_socket->so_rcv.sb_state & SS_CANTRCVMORE) 178 continue; 179 if (rtable_l2(inp->inp_rtableid) != 180 rtable_l2(m->m_pkthdr.ph_rtableid)) 181 continue; 182 183 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != ip->ip_p) 184 continue; 185 if (inp->inp_laddr.s_addr && 186 inp->inp_laddr.s_addr != key->s_addr) 187 continue; 188 if (inp->inp_faddr.s_addr && 189 inp->inp_faddr.s_addr != ip->ip_src.s_addr) 190 continue; 191 192 in_pcbref(inp); 193 SIMPLEQ_INSERT_TAIL(&inpcblist, inp, inp_notify); 194 } 195 mtx_leave(&rawcbtable.inpt_mtx); 196 197 if (SIMPLEQ_EMPTY(&inpcblist)) { 198 rw_exit_write(&rawcbtable.inpt_notify); 199 200 if (ip->ip_p != IPPROTO_ICMP) 201 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 202 0, 0); 203 else 204 m_freem(m); 205 206 counters = counters_enter(&ref, ipcounters); 207 counters[ips_noproto]++; 208 counters[ips_delivered]--; 209 counters_leave(&ref, ipcounters); 210 211 return IPPROTO_DONE; 212 } 213 214 while ((inp = SIMPLEQ_FIRST(&inpcblist)) != NULL) { 215 struct mbuf *n, *opts = NULL; 216 217 SIMPLEQ_REMOVE_HEAD(&inpcblist, inp_notify); 218 if (SIMPLEQ_EMPTY(&inpcblist)) 219 n = m; 220 else 221 n = m_copym(m, 0, M_COPYALL, M_NOWAIT); 222 if (n != NULL) { 223 struct socket *so = inp->inp_socket; 224 int ret; 225 226 if (inp->inp_flags & INP_CONTROLOPTS || 227 so->so_options & SO_TIMESTAMP) 228 ip_savecontrol(inp, &opts, ip, n); 229 230 mtx_enter(&so->so_rcv.sb_mtx); 231 ret = sbappendaddr(so, &so->so_rcv, 232 sintosa(&ripsrc), n, opts); 233 mtx_leave(&so->so_rcv.sb_mtx); 234 235 if (ret == 0) { 236 /* should notify about lost packet */ 237 m_freem(n); 238 m_freem(opts); 239 } else 240 sorwakeup(so); 241 } 242 in_pcbunref(inp); 243 } 244 rw_exit_write(&rawcbtable.inpt_notify); 245 246 return IPPROTO_DONE; 247 } 248 249 /* 250 * Generate IP header and pass packet to ip_output. 251 * Tack on options user may have setup with control call. 252 */ 253 int 254 rip_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr, 255 struct mbuf *control) 256 { 257 struct sockaddr_in *dst = satosin(dstaddr); 258 struct ip *ip; 259 struct inpcb *inp; 260 int flags, error; 261 262 inp = sotoinpcb(so); 263 flags = IP_ALLOWBROADCAST; 264 265 /* 266 * If the user handed us a complete IP packet, use it. 267 * Otherwise, allocate an mbuf for a header and fill it in. 268 */ 269 if ((inp->inp_flags & INP_HDRINCL) == 0) { 270 if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) { 271 m_freem(m); 272 return (EMSGSIZE); 273 } 274 M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); 275 if (!m) 276 return (ENOBUFS); 277 ip = mtod(m, struct ip *); 278 ip->ip_tos = inp->inp_ip.ip_tos; 279 ip->ip_off = htons(0); 280 ip->ip_p = inp->inp_ip.ip_p; 281 ip->ip_len = htons(m->m_pkthdr.len); 282 ip->ip_src.s_addr = INADDR_ANY; 283 ip->ip_dst = dst->sin_addr; 284 ip->ip_ttl = inp->inp_ip.ip_ttl ? inp->inp_ip.ip_ttl : MAXTTL; 285 } else { 286 if (m->m_pkthdr.len > IP_MAXPACKET) { 287 m_freem(m); 288 return (EMSGSIZE); 289 } 290 291 m = rip_chkhdr(m, inp->inp_options); 292 if (m == NULL) 293 return (EINVAL); 294 295 ip = mtod(m, struct ip *); 296 if (ip->ip_id == 0) 297 ip->ip_id = htons(ip_randomid()); 298 dst->sin_addr = ip->ip_dst; 299 300 /* XXX prevent ip_output from overwriting header fields */ 301 flags |= IP_RAWOUTPUT; 302 ipstat_inc(ips_rawout); 303 } 304 305 if (ip->ip_src.s_addr == INADDR_ANY) { 306 error = in_pcbselsrc(&ip->ip_src, dst, inp); 307 if (error != 0) 308 return (error); 309 } 310 311 #ifdef INET6 312 /* 313 * A thought: Even though raw IP shouldn't be able to set IPv6 314 * multicast options, if it does, the last parameter to 315 * ip_output should be guarded against v6/v4 problems. 316 */ 317 #endif 318 /* force routing table */ 319 m->m_pkthdr.ph_rtableid = inp->inp_rtableid; 320 321 #if NPF > 0 322 if (inp->inp_socket->so_state & SS_ISCONNECTED && 323 ip->ip_p != IPPROTO_ICMP) 324 pf_mbuf_link_inpcb(m, inp); 325 #endif 326 327 error = ip_output(m, inp->inp_options, &inp->inp_route, flags, 328 inp->inp_moptions, inp->inp_seclevel, 0); 329 return (error); 330 } 331 332 struct mbuf * 333 rip_chkhdr(struct mbuf *m, struct mbuf *options) 334 { 335 struct ip *ip; 336 int hlen, opt, optlen, cnt; 337 u_char *cp; 338 339 if (m->m_pkthdr.len < sizeof(struct ip)) { 340 m_freem(m); 341 return NULL; 342 } 343 344 m = m_pullup(m, sizeof (struct ip)); 345 if (m == NULL) 346 return NULL; 347 348 ip = mtod(m, struct ip *); 349 hlen = ip->ip_hl << 2; 350 351 /* Don't allow packet length sizes that will crash. */ 352 if (hlen < sizeof (struct ip) || 353 ntohs(ip->ip_len) < hlen || 354 ntohs(ip->ip_len) != m->m_pkthdr.len) { 355 m_freem(m); 356 return NULL; 357 } 358 m = m_pullup(m, hlen); 359 if (m == NULL) 360 return NULL; 361 362 ip = mtod(m, struct ip *); 363 364 if (ip->ip_v != IPVERSION) { 365 m_freem(m); 366 return NULL; 367 } 368 369 /* 370 * Don't allow both user specified and setsockopt options. 371 * If options are present verify them. 372 */ 373 if (hlen != sizeof(struct ip)) { 374 if (options) { 375 m_freem(m); 376 return NULL; 377 } else { 378 cp = (u_char *)(ip + 1); 379 cnt = hlen - sizeof(struct ip); 380 for (; cnt > 0; cnt -= optlen, cp += optlen) { 381 opt = cp[IPOPT_OPTVAL]; 382 if (opt == IPOPT_EOL) 383 break; 384 if (opt == IPOPT_NOP) 385 optlen = 1; 386 else { 387 if (cnt < IPOPT_OLEN + sizeof(*cp)) { 388 m_freem(m); 389 return NULL; 390 } 391 optlen = cp[IPOPT_OLEN]; 392 if (optlen < IPOPT_OLEN + sizeof(*cp) || 393 optlen > cnt) { 394 m_freem(m); 395 return NULL; 396 } 397 } 398 } 399 } 400 } 401 402 return m; 403 } 404 405 /* 406 * Raw IP socket option processing. 407 */ 408 int 409 rip_ctloutput(int op, struct socket *so, int level, int optname, 410 struct mbuf *m) 411 { 412 struct inpcb *inp = sotoinpcb(so); 413 int error; 414 415 if (level != IPPROTO_IP) 416 return (EINVAL); 417 418 switch (optname) { 419 420 case IP_HDRINCL: 421 error = 0; 422 if (op == PRCO_SETOPT) { 423 if (m == NULL || m->m_len < sizeof (int)) 424 error = EINVAL; 425 else if (*mtod(m, int *)) 426 inp->inp_flags |= INP_HDRINCL; 427 else 428 inp->inp_flags &= ~INP_HDRINCL; 429 } else { 430 m->m_len = sizeof(int); 431 *mtod(m, int *) = inp->inp_flags & INP_HDRINCL; 432 } 433 return (error); 434 435 case MRT_INIT: 436 case MRT_DONE: 437 case MRT_ADD_VIF: 438 case MRT_DEL_VIF: 439 case MRT_ADD_MFC: 440 case MRT_DEL_MFC: 441 case MRT_VERSION: 442 case MRT_ASSERT: 443 case MRT_API_SUPPORT: 444 case MRT_API_CONFIG: 445 #ifdef MROUTING 446 switch (op) { 447 case PRCO_SETOPT: 448 error = ip_mrouter_set(so, optname, m); 449 break; 450 case PRCO_GETOPT: 451 error = ip_mrouter_get(so, optname, m); 452 break; 453 default: 454 error = EINVAL; 455 break; 456 } 457 return (error); 458 #else 459 return (EOPNOTSUPP); 460 #endif 461 } 462 return (ip_ctloutput(op, so, level, optname, m)); 463 } 464 465 u_long rip_sendspace = RIPSNDQ; 466 u_long rip_recvspace = RIPRCVQ; 467 468 int 469 rip_attach(struct socket *so, int proto, int wait) 470 { 471 struct inpcb *inp; 472 int error; 473 474 if (so->so_pcb) 475 panic("rip_attach"); 476 if ((so->so_state & SS_PRIV) == 0) 477 return EACCES; 478 if (proto < 0 || proto >= IPPROTO_MAX) 479 return EPROTONOSUPPORT; 480 481 if ((error = soreserve(so, rip_sendspace, rip_recvspace))) 482 return error; 483 NET_ASSERT_LOCKED(); 484 if ((error = in_pcballoc(so, &rawcbtable, wait))) 485 return error; 486 inp = sotoinpcb(so); 487 inp->inp_ip.ip_p = proto; 488 return 0; 489 } 490 491 int 492 rip_detach(struct socket *so) 493 { 494 struct inpcb *inp = sotoinpcb(so); 495 496 soassertlocked(so); 497 498 if (inp == NULL) 499 return (EINVAL); 500 501 #ifdef MROUTING 502 if (so == ip_mrouter[inp->inp_rtableid]) 503 ip_mrouter_done(so); 504 #endif 505 in_pcbdetach(inp); 506 507 return (0); 508 } 509 510 void 511 rip_lock(struct socket *so) 512 { 513 struct inpcb *inp = sotoinpcb(so); 514 515 NET_ASSERT_LOCKED(); 516 mtx_enter(&inp->inp_mtx); 517 } 518 519 void 520 rip_unlock(struct socket *so) 521 { 522 struct inpcb *inp = sotoinpcb(so); 523 524 NET_ASSERT_LOCKED(); 525 mtx_leave(&inp->inp_mtx); 526 } 527 528 int 529 rip_locked(struct socket *so) 530 { 531 struct inpcb *inp = sotoinpcb(so); 532 533 return mtx_owned(&inp->inp_mtx); 534 } 535 536 int 537 rip_bind(struct socket *so, struct mbuf *nam, struct proc *p) 538 { 539 struct inpcb *inp = sotoinpcb(so); 540 struct sockaddr_in *addr; 541 int error; 542 543 soassertlocked(so); 544 545 if ((error = in_nam2sin(nam, &addr))) 546 return (error); 547 548 if (!((so->so_options & SO_BINDANY) || 549 addr->sin_addr.s_addr == INADDR_ANY || 550 addr->sin_addr.s_addr == INADDR_BROADCAST || 551 in_broadcast(addr->sin_addr, inp->inp_rtableid) || 552 ifa_ifwithaddr(sintosa(addr), inp->inp_rtableid))) 553 return (EADDRNOTAVAIL); 554 555 mtx_enter(&rawcbtable.inpt_mtx); 556 inp->inp_laddr = addr->sin_addr; 557 mtx_leave(&rawcbtable.inpt_mtx); 558 559 return (0); 560 } 561 562 int 563 rip_connect(struct socket *so, struct mbuf *nam) 564 { 565 struct inpcb *inp = sotoinpcb(so); 566 struct sockaddr_in *addr; 567 int error; 568 569 soassertlocked(so); 570 571 if ((error = in_nam2sin(nam, &addr))) 572 return (error); 573 574 mtx_enter(&rawcbtable.inpt_mtx); 575 inp->inp_faddr = addr->sin_addr; 576 mtx_leave(&rawcbtable.inpt_mtx); 577 soisconnected(so); 578 579 return (0); 580 } 581 582 int 583 rip_disconnect(struct socket *so) 584 { 585 struct inpcb *inp = sotoinpcb(so); 586 587 soassertlocked(so); 588 589 if ((so->so_state & SS_ISCONNECTED) == 0) 590 return (ENOTCONN); 591 592 soisdisconnected(so); 593 mtx_enter(&rawcbtable.inpt_mtx); 594 inp->inp_faddr.s_addr = INADDR_ANY; 595 mtx_leave(&rawcbtable.inpt_mtx); 596 597 return (0); 598 } 599 600 int 601 rip_shutdown(struct socket *so) 602 { 603 /* 604 * Mark the connection as being incapable of further input. 605 */ 606 607 soassertlocked(so); 608 socantsendmore(so); 609 610 return (0); 611 } 612 613 int 614 rip_send(struct socket *so, struct mbuf *m, struct mbuf *nam, 615 struct mbuf *control) 616 { 617 struct inpcb *inp = sotoinpcb(so); 618 struct sockaddr_in dst; 619 int error; 620 621 soassertlocked(so); 622 623 /* 624 * Ship a packet out. The appropriate raw output 625 * routine handles any massaging necessary. 626 */ 627 memset(&dst, 0, sizeof(dst)); 628 dst.sin_family = AF_INET; 629 dst.sin_len = sizeof(dst); 630 if (so->so_state & SS_ISCONNECTED) { 631 if (nam) { 632 error = EISCONN; 633 goto out; 634 } 635 dst.sin_addr = inp->inp_faddr; 636 } else { 637 struct sockaddr_in *addr; 638 639 if (nam == NULL) { 640 error = ENOTCONN; 641 goto out; 642 } 643 if ((error = in_nam2sin(nam, &addr))) 644 goto out; 645 dst.sin_addr = addr->sin_addr; 646 } 647 #ifdef IPSEC 648 /* XXX Find an IPsec TDB */ 649 #endif 650 error = rip_output(m, so, sintosa(&dst), NULL); 651 m = NULL; 652 653 out: 654 m_freem(control); 655 m_freem(m); 656 657 return (error); 658 } 659