1 /* $OpenBSD: in_pcb.c,v 1.106 2009/07/26 12:59:16 thib Exp $ */ 2 /* $NetBSD: in_pcb.c,v 1.25 1996/02/13 23:41:53 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 33 * 34 * NRL grants permission for redistribution and use in source and binary 35 * forms, with or without modification, of the software and documentation 36 * created at NRL provided that the following conditions are met: 37 * 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgements: 45 * This product includes software developed by the University of 46 * California, Berkeley and its contributors. 47 * This product includes software developed at the Information 48 * Technology Division, US Naval Research Laboratory. 49 * 4. Neither the name of the NRL nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 * 65 * The views and conclusions contained in the software and documentation 66 * are those of the authors and should not be interpreted as representing 67 * official policies, either expressed or implied, of the US Naval 68 * Research Laboratory (NRL). 69 */ 70 71 #include "pf.h" 72 73 #include <sys/param.h> 74 #include <sys/systm.h> 75 #include <sys/mbuf.h> 76 #include <sys/protosw.h> 77 #include <sys/socket.h> 78 #include <sys/socketvar.h> 79 #include <sys/proc.h> 80 #include <sys/domain.h> 81 #include <sys/pool.h> 82 83 #include <net/if.h> 84 #include <net/route.h> 85 #include <net/pfvar.h> 86 87 #include <netinet/in.h> 88 #include <netinet/in_systm.h> 89 #include <netinet/ip.h> 90 #include <netinet/in_pcb.h> 91 #include <netinet/in_var.h> 92 #include <netinet/ip_var.h> 93 #include <dev/rndvar.h> 94 95 #include <sys/mount.h> 96 #include <nfs/nfsproto.h> 97 98 #ifdef INET6 99 #include <netinet6/ip6_var.h> 100 #endif /* INET6 */ 101 #ifdef IPSEC 102 #include <netinet/ip_esp.h> 103 #endif /* IPSEC */ 104 105 struct in_addr zeroin_addr; 106 107 extern int ipsec_auth_default_level; 108 extern int ipsec_esp_trans_default_level; 109 extern int ipsec_esp_network_default_level; 110 extern int ipsec_ipcomp_default_level; 111 112 /* 113 * These configure the range of local port addresses assigned to 114 * "unspecified" outgoing connections/packets/whatever. 115 */ 116 int ipport_firstauto = IPPORT_RESERVED; 117 int ipport_lastauto = IPPORT_USERRESERVED; 118 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; 119 int ipport_hilastauto = IPPORT_HILASTAUTO; 120 121 struct pool inpcb_pool; 122 int inpcb_pool_initialized = 0; 123 124 #define INPCBHASH(table, faddr, fport, laddr, lport, rdom) \ 125 &(table)->inpt_hashtbl[(ntohl((faddr)->s_addr) + \ 126 ntohs((fport)) + ntohs((lport)) + (rdom)) & (table->inpt_hash)] 127 128 #define IN6PCBHASH(table, faddr, fport, laddr, lport) \ 129 &(table)->inpt_hashtbl[(ntohl((faddr)->s6_addr32[0] ^ \ 130 (faddr)->s6_addr32[3]) + ntohs((fport)) + ntohs((lport))) & \ 131 (table->inpt_hash)] 132 133 #define INPCBLHASH(table, lport, rdom) \ 134 &(table)->inpt_lhashtbl[(ntohs((lport)) + (rdom)) & table->inpt_lhash] 135 136 void 137 in_pcbinit(table, hashsize) 138 struct inpcbtable *table; 139 int hashsize; 140 { 141 142 CIRCLEQ_INIT(&table->inpt_queue); 143 table->inpt_hashtbl = hashinit(hashsize, M_PCB, M_NOWAIT, 144 &table->inpt_hash); 145 if (table->inpt_hashtbl == NULL) 146 panic("in_pcbinit: hashinit failed"); 147 table->inpt_lhashtbl = hashinit(hashsize, M_PCB, M_NOWAIT, 148 &table->inpt_lhash); 149 if (table->inpt_lhashtbl == NULL) 150 panic("in_pcbinit: hashinit failed for lport"); 151 table->inpt_lastport = 0; 152 } 153 154 struct baddynamicports baddynamicports; 155 156 /* 157 * Check if the specified port is invalid for dynamic allocation. 158 */ 159 int 160 in_baddynamic(u_int16_t port, u_int16_t proto) 161 { 162 switch (proto) { 163 case IPPROTO_TCP: 164 return (DP_ISSET(baddynamicports.tcp, port)); 165 case IPPROTO_UDP: 166 #ifdef IPSEC 167 /* Cannot preset this as it is a sysctl */ 168 if (port == udpencap_port) 169 return (1); 170 #endif 171 return (DP_ISSET(baddynamicports.udp, port)); 172 default: 173 return (0); 174 } 175 } 176 177 int 178 in_pcballoc(so, v) 179 struct socket *so; 180 void *v; 181 { 182 struct inpcbtable *table = v; 183 struct inpcb *inp; 184 int s; 185 186 if (inpcb_pool_initialized == 0) { 187 pool_init(&inpcb_pool, sizeof(struct inpcb), 0, 0, 0, 188 "inpcbpl", NULL); 189 inpcb_pool_initialized = 1; 190 } 191 inp = pool_get(&inpcb_pool, PR_NOWAIT); 192 if (inp == NULL) 193 return (ENOBUFS); 194 bzero((caddr_t)inp, sizeof(*inp)); 195 inp->inp_table = table; 196 inp->inp_socket = so; 197 inp->inp_seclevel[SL_AUTH] = ipsec_auth_default_level; 198 inp->inp_seclevel[SL_ESP_TRANS] = ipsec_esp_trans_default_level; 199 inp->inp_seclevel[SL_ESP_NETWORK] = ipsec_esp_network_default_level; 200 inp->inp_seclevel[SL_IPCOMP] = ipsec_ipcomp_default_level; 201 s = splnet(); 202 CIRCLEQ_INSERT_HEAD(&table->inpt_queue, inp, inp_queue); 203 LIST_INSERT_HEAD(INPCBLHASH(table, inp->inp_lport, inp->inp_rdomain), 204 inp, inp_lhash); 205 LIST_INSERT_HEAD(INPCBHASH(table, &inp->inp_faddr, inp->inp_fport, 206 &inp->inp_laddr, inp->inp_lport, inp->inp_rdomain), inp, inp_hash); 207 splx(s); 208 so->so_pcb = inp; 209 inp->inp_hops = -1; 210 211 #ifdef INET6 212 /* 213 * Small change in this function to set the INP_IPV6 flag so routines 214 * outside pcb-specific routines don't need to use sotopf(), and all 215 * of its pointer chasing, later. 216 */ 217 if (sotopf(so) == PF_INET6) 218 inp->inp_flags = INP_IPV6; 219 inp->in6p_cksum = -1; 220 #endif /* INET6 */ 221 return (0); 222 } 223 224 int 225 in_pcbbind(v, nam, p) 226 void *v; 227 struct mbuf *nam; 228 struct proc *p; 229 { 230 struct inpcb *inp = v; 231 struct socket *so = inp->inp_socket; 232 struct inpcbtable *table = inp->inp_table; 233 u_int16_t *lastport = &inp->inp_table->inpt_lastport; 234 struct sockaddr_in *sin; 235 u_int16_t lport = 0; 236 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); 237 int error; 238 239 #ifdef INET6 240 if (sotopf(so) == PF_INET6) 241 return in6_pcbbind(inp, nam, p); 242 #endif /* INET6 */ 243 244 if (TAILQ_EMPTY(&in_ifaddr)) 245 return (EADDRNOTAVAIL); 246 if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY) 247 return (EINVAL); 248 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0 && 249 ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 || 250 (so->so_options & SO_ACCEPTCONN) == 0)) 251 wild = INPLOOKUP_WILDCARD; 252 if (nam) { 253 sin = mtod(nam, struct sockaddr_in *); 254 if (nam->m_len != sizeof (*sin)) 255 return (EINVAL); 256 #ifdef notdef 257 /* 258 * We should check the family, but old programs 259 * incorrectly fail to initialize it. 260 */ 261 if (sin->sin_family != AF_INET) 262 return (EAFNOSUPPORT); 263 #endif 264 lport = sin->sin_port; 265 if (IN_MULTICAST(sin->sin_addr.s_addr)) { 266 /* 267 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 268 * allow complete duplication of binding if 269 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 270 * and a multicast address is bound on both 271 * new and duplicated sockets. 272 */ 273 if (so->so_options & SO_REUSEADDR) 274 reuseport = SO_REUSEADDR|SO_REUSEPORT; 275 } else if (sin->sin_addr.s_addr != INADDR_ANY) { 276 sin->sin_port = 0; /* yech... */ 277 if (!(so->so_options & SO_BINDANY) && 278 in_iawithaddr(sin->sin_addr, NULL, 279 inp->inp_rdomain) == 0) 280 return (EADDRNOTAVAIL); 281 } 282 if (lport) { 283 struct inpcb *t; 284 285 /* GROSS */ 286 if (ntohs(lport) < IPPORT_RESERVED && 287 (error = suser(p, 0))) 288 return (EACCES); 289 if (so->so_euid) { 290 t = in_pcblookup(table, &zeroin_addr, 0, 291 &sin->sin_addr, lport, INPLOOKUP_WILDCARD, 292 inp->inp_rdomain); 293 if (t && (so->so_euid != t->inp_socket->so_euid)) 294 return (EADDRINUSE); 295 } 296 t = in_pcblookup(table, &zeroin_addr, 0, 297 &sin->sin_addr, lport, wild, inp->inp_rdomain); 298 if (t && (reuseport & t->inp_socket->so_options) == 0) 299 return (EADDRINUSE); 300 } 301 inp->inp_laddr = sin->sin_addr; 302 } 303 if (lport == 0) { 304 u_int16_t first, last; 305 int count; 306 307 if (inp->inp_flags & INP_HIGHPORT) { 308 first = ipport_hifirstauto; /* sysctl */ 309 last = ipport_hilastauto; 310 } else if (inp->inp_flags & INP_LOWPORT) { 311 if ((error = suser(p, 0))) 312 return (EACCES); 313 first = IPPORT_RESERVED-1; /* 1023 */ 314 last = 600; /* not IPPORT_RESERVED/2 */ 315 } else { 316 first = ipport_firstauto; /* sysctl */ 317 last = ipport_lastauto; 318 } 319 320 /* 321 * Simple check to ensure all ports are not used up causing 322 * a deadlock here. 323 * 324 * We split the two cases (up and down) so that the direction 325 * is not being tested on each round of the loop. 326 */ 327 328 if (first > last) { 329 /* 330 * counting down 331 */ 332 count = first - last; 333 if (count) 334 *lastport = first - arc4random_uniform(count); 335 336 do { 337 if (count-- < 0) /* completely used? */ 338 return (EADDRNOTAVAIL); 339 --*lastport; 340 if (*lastport > first || *lastport < last) 341 *lastport = first; 342 lport = htons(*lastport); 343 } while (in_baddynamic(*lastport, so->so_proto->pr_protocol) || 344 in_pcblookup(table, &zeroin_addr, 0, 345 &inp->inp_laddr, lport, wild, inp->inp_rdomain)); 346 } else { 347 /* 348 * counting up 349 */ 350 count = last - first; 351 if (count) 352 *lastport = first + arc4random_uniform(count); 353 354 do { 355 if (count-- < 0) /* completely used? */ 356 return (EADDRNOTAVAIL); 357 ++*lastport; 358 if (*lastport < first || *lastport > last) 359 *lastport = first; 360 lport = htons(*lastport); 361 } while (in_baddynamic(*lastport, so->so_proto->pr_protocol) || 362 in_pcblookup(table, &zeroin_addr, 0, 363 &inp->inp_laddr, lport, wild, inp->inp_rdomain)); 364 } 365 } 366 inp->inp_lport = lport; 367 in_pcbrehash(inp); 368 return (0); 369 } 370 371 /* 372 * Connect from a socket to a specified address. 373 * Both address and port must be specified in argument sin. 374 * If don't have a local address for this socket yet, 375 * then pick one. 376 */ 377 int 378 in_pcbconnect(v, nam) 379 void *v; 380 struct mbuf *nam; 381 { 382 struct inpcb *inp = v; 383 struct sockaddr_in *ifaddr = NULL; 384 struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *); 385 386 #ifdef INET6 387 if (sotopf(inp->inp_socket) == PF_INET6) 388 return (in6_pcbconnect(inp, nam)); 389 if ((inp->inp_flags & INP_IPV6) != 0) 390 panic("IPv6 pcb passed into in_pcbconnect"); 391 #endif /* INET6 */ 392 393 if (nam->m_len != sizeof (*sin)) 394 return (EINVAL); 395 if (sin->sin_family != AF_INET) 396 return (EAFNOSUPPORT); 397 if (sin->sin_port == 0) 398 return (EADDRNOTAVAIL); 399 if (!TAILQ_EMPTY(&in_ifaddr)) { 400 /* 401 * If the destination address is INADDR_ANY, 402 * use the primary local address. 403 * If the supplied address is INADDR_BROADCAST, 404 * and the primary interface supports broadcast, 405 * choose the broadcast address for that interface. 406 */ 407 if (sin->sin_addr.s_addr == INADDR_ANY) 408 sin->sin_addr = TAILQ_FIRST(&in_ifaddr)->ia_addr.sin_addr; 409 else if (sin->sin_addr.s_addr == INADDR_BROADCAST && 410 (TAILQ_FIRST(&in_ifaddr)->ia_ifp->if_flags & IFF_BROADCAST)) 411 sin->sin_addr = TAILQ_FIRST(&in_ifaddr)->ia_broadaddr.sin_addr; 412 } 413 if (inp->inp_laddr.s_addr == INADDR_ANY) { 414 int error; 415 ifaddr = in_selectsrc(sin, &inp->inp_route, 416 inp->inp_socket->so_options, inp->inp_moptions, &error, 417 inp->inp_rdomain); 418 if (ifaddr == NULL) { 419 if (error == 0) 420 error = EADDRNOTAVAIL; 421 return error; 422 } 423 } 424 if (in_pcbhashlookup(inp->inp_table, sin->sin_addr, sin->sin_port, 425 inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr, 426 inp->inp_lport, inp->inp_rdomain) != 0) 427 return (EADDRINUSE); 428 if (inp->inp_laddr.s_addr == INADDR_ANY) { 429 if (inp->inp_lport == 0 && 430 in_pcbbind(inp, NULL, curproc) == EADDRNOTAVAIL) 431 return (EADDRNOTAVAIL); 432 inp->inp_laddr = ifaddr->sin_addr; 433 } 434 inp->inp_faddr = sin->sin_addr; 435 inp->inp_fport = sin->sin_port; 436 in_pcbrehash(inp); 437 #ifdef IPSEC 438 { 439 int error; /* This is just ignored */ 440 441 /* Cause an IPsec SA to be established. */ 442 ipsp_spd_inp(NULL, AF_INET, 0, &error, IPSP_DIRECTION_OUT, 443 NULL, inp, NULL); 444 } 445 #endif 446 return (0); 447 } 448 449 void 450 in_pcbdisconnect(v) 451 void *v; 452 { 453 struct inpcb *inp = v; 454 455 switch (sotopf(inp->inp_socket)) { 456 #ifdef INET6 457 case PF_INET6: 458 inp->inp_faddr6 = in6addr_any; 459 break; 460 #endif 461 case PF_INET: 462 inp->inp_faddr.s_addr = INADDR_ANY; 463 break; 464 } 465 466 inp->inp_fport = 0; 467 in_pcbrehash(inp); 468 if (inp->inp_socket->so_state & SS_NOFDREF) 469 in_pcbdetach(inp); 470 } 471 472 void 473 in_pcbdetach(v) 474 void *v; 475 { 476 struct inpcb *inp = v; 477 struct socket *so = inp->inp_socket; 478 int s; 479 480 so->so_pcb = 0; 481 sofree(so); 482 if (inp->inp_options) 483 m_freem(inp->inp_options); 484 if (inp->inp_route.ro_rt) 485 rtfree(inp->inp_route.ro_rt); 486 #ifdef INET6 487 if (inp->inp_flags & INP_IPV6) { 488 ip6_freepcbopts(inp->inp_outputopts6); 489 ip6_freemoptions(inp->inp_moptions6); 490 } else 491 #endif 492 ip_freemoptions(inp->inp_moptions); 493 #ifdef IPSEC 494 /* IPsec cleanup here */ 495 s = spltdb(); 496 if (inp->inp_tdb_in) 497 TAILQ_REMOVE(&inp->inp_tdb_in->tdb_inp_in, 498 inp, inp_tdb_in_next); 499 if (inp->inp_tdb_out) 500 TAILQ_REMOVE(&inp->inp_tdb_out->tdb_inp_out, inp, 501 inp_tdb_out_next); 502 if (inp->inp_ipsec_remotecred) 503 ipsp_reffree(inp->inp_ipsec_remotecred); 504 if (inp->inp_ipsec_remoteauth) 505 ipsp_reffree(inp->inp_ipsec_remoteauth); 506 if (inp->inp_ipo) 507 ipsec_delete_policy(inp->inp_ipo); 508 splx(s); 509 #endif 510 #if NPF > 0 511 if (inp->inp_pf_sk) 512 ((struct pf_state_key *)inp->inp_pf_sk)->inp = NULL; 513 #endif 514 s = splnet(); 515 LIST_REMOVE(inp, inp_lhash); 516 LIST_REMOVE(inp, inp_hash); 517 CIRCLEQ_REMOVE(&inp->inp_table->inpt_queue, inp, inp_queue); 518 splx(s); 519 pool_put(&inpcb_pool, inp); 520 } 521 522 void 523 in_setsockaddr(inp, nam) 524 struct inpcb *inp; 525 struct mbuf *nam; 526 { 527 struct sockaddr_in *sin; 528 529 nam->m_len = sizeof (*sin); 530 sin = mtod(nam, struct sockaddr_in *); 531 bzero((caddr_t)sin, sizeof (*sin)); 532 sin->sin_family = AF_INET; 533 sin->sin_len = sizeof(*sin); 534 sin->sin_port = inp->inp_lport; 535 sin->sin_addr = inp->inp_laddr; 536 } 537 538 void 539 in_setpeeraddr(inp, nam) 540 struct inpcb *inp; 541 struct mbuf *nam; 542 { 543 struct sockaddr_in *sin; 544 545 #ifdef INET6 546 if (sotopf(inp->inp_socket) == PF_INET6) { 547 in6_setpeeraddr(inp, nam); 548 return; 549 } 550 #endif /* INET6 */ 551 552 nam->m_len = sizeof (*sin); 553 sin = mtod(nam, struct sockaddr_in *); 554 bzero((caddr_t)sin, sizeof (*sin)); 555 sin->sin_family = AF_INET; 556 sin->sin_len = sizeof(*sin); 557 sin->sin_port = inp->inp_fport; 558 sin->sin_addr = inp->inp_faddr; 559 } 560 561 /* 562 * Pass some notification to all connections of a protocol 563 * associated with address dst. The "usual action" will be 564 * taken, depending on the ctlinput cmd. The caller must filter any 565 * cmds that are uninteresting (e.g., no error in the map). 566 * Call the protocol specific routine (if any) to report 567 * any errors for each matching socket. 568 * 569 * Must be called at splsoftnet. 570 */ 571 void 572 in_pcbnotifyall(table, dst, errno, notify) 573 struct inpcbtable *table; 574 struct sockaddr *dst; 575 int errno; 576 void (*notify)(struct inpcb *, int); 577 { 578 struct inpcb *inp, *oinp; 579 struct in_addr faddr; 580 581 splsoftassert(IPL_SOFTNET); 582 583 #ifdef INET6 584 /* 585 * See in6_pcbnotify() for IPv6 codepath. By the time this 586 * gets called, the addresses passed are either definitely IPv4 or 587 * IPv6; *_pcbnotify() never gets called with v4-mapped v6 addresses. 588 */ 589 #endif /* INET6 */ 590 591 if (dst->sa_family != AF_INET) 592 return; 593 faddr = satosin(dst)->sin_addr; 594 if (faddr.s_addr == INADDR_ANY) 595 return; 596 597 for (inp = CIRCLEQ_FIRST(&table->inpt_queue); 598 inp != CIRCLEQ_END(&table->inpt_queue);) { 599 #ifdef INET6 600 if (inp->inp_flags & INP_IPV6) { 601 inp = CIRCLEQ_NEXT(inp, inp_queue); 602 continue; 603 } 604 #endif 605 if (inp->inp_faddr.s_addr != faddr.s_addr || 606 inp->inp_socket == 0) { 607 inp = CIRCLEQ_NEXT(inp, inp_queue); 608 continue; 609 } 610 oinp = inp; 611 inp = CIRCLEQ_NEXT(inp, inp_queue); 612 if (notify) 613 (*notify)(oinp, errno); 614 } 615 } 616 617 /* 618 * Check for alternatives when higher level complains 619 * about service problems. For now, invalidate cached 620 * routing information. If the route was created dynamically 621 * (by a redirect), time to try a default gateway again. 622 */ 623 void 624 in_losing(inp) 625 struct inpcb *inp; 626 { 627 struct rtentry *rt; 628 struct rt_addrinfo info; 629 630 if ((rt = inp->inp_route.ro_rt)) { 631 inp->inp_route.ro_rt = 0; 632 bzero((caddr_t)&info, sizeof(info)); 633 info.rti_flags = rt->rt_flags; 634 info.rti_info[RTAX_DST] = &inp->inp_route.ro_dst; 635 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 636 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 637 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, rt->rt_ifp, 0, 638 inp->inp_rdomain); 639 if (rt->rt_flags & RTF_DYNAMIC) 640 (void)rtrequest1(RTM_DELETE, &info, rt->rt_priority, 641 (struct rtentry **)0, inp->inp_rdomain); 642 /* 643 * A new route can be allocated 644 * the next time output is attempted. 645 * rtfree() needs to be called in anycase because the inp 646 * is still holding a reference to rt. 647 */ 648 rtfree(rt); 649 } 650 } 651 652 /* 653 * After a routing change, flush old routing 654 * and allocate a (hopefully) better one. 655 */ 656 void 657 in_rtchange(inp, errno) 658 struct inpcb *inp; 659 int errno; 660 { 661 if (inp->inp_route.ro_rt) { 662 rtfree(inp->inp_route.ro_rt); 663 inp->inp_route.ro_rt = 0; 664 /* 665 * A new route can be allocated the next time 666 * output is attempted. 667 */ 668 } 669 } 670 671 struct inpcb * 672 in_pcblookup(struct inpcbtable *table, void *faddrp, u_int fport_arg, void *laddrp, u_int lport_arg, int flags, u_int rdomain) 673 { 674 struct inpcb *inp, *match = 0; 675 int matchwild = 3, wildcard; 676 u_int16_t fport = fport_arg, lport = lport_arg; 677 struct in_addr faddr = *(struct in_addr *)faddrp; 678 struct in_addr laddr = *(struct in_addr *)laddrp; 679 680 for (inp = LIST_FIRST(INPCBLHASH(table, lport, rdomain)); inp; 681 inp = LIST_NEXT(inp, inp_lhash)) { 682 if (inp->inp_rdomain != rdomain) 683 continue; 684 if (inp->inp_lport != lport) 685 continue; 686 wildcard = 0; 687 #ifdef INET6 688 if (flags & INPLOOKUP_IPV6) { 689 struct in6_addr *laddr6 = (struct in6_addr *)laddrp; 690 struct in6_addr *faddr6 = (struct in6_addr *)faddrp; 691 692 if (!(inp->inp_flags & INP_IPV6)) 693 continue; 694 695 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6)) { 696 if (IN6_IS_ADDR_UNSPECIFIED(laddr6)) 697 wildcard++; 698 else if (!IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, laddr6)) 699 continue; 700 } else { 701 if (!IN6_IS_ADDR_UNSPECIFIED(laddr6)) 702 wildcard++; 703 } 704 705 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) { 706 if (IN6_IS_ADDR_UNSPECIFIED(faddr6)) 707 wildcard++; 708 else if (!IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6, 709 faddr6) || inp->inp_fport != fport) 710 continue; 711 } else { 712 if (!IN6_IS_ADDR_UNSPECIFIED(faddr6)) 713 wildcard++; 714 } 715 } else 716 #endif /* INET6 */ 717 { 718 #ifdef INET6 719 if (inp->inp_flags & INP_IPV6) 720 continue; 721 #endif /* INET6 */ 722 723 if (inp->inp_faddr.s_addr != INADDR_ANY) { 724 if (faddr.s_addr == INADDR_ANY) 725 wildcard++; 726 else if (inp->inp_faddr.s_addr != faddr.s_addr || 727 inp->inp_fport != fport) 728 continue; 729 } else { 730 if (faddr.s_addr != INADDR_ANY) 731 wildcard++; 732 } 733 if (inp->inp_laddr.s_addr != INADDR_ANY) { 734 if (laddr.s_addr == INADDR_ANY) 735 wildcard++; 736 else if (inp->inp_laddr.s_addr != laddr.s_addr) 737 continue; 738 } else { 739 if (laddr.s_addr != INADDR_ANY) 740 wildcard++; 741 } 742 } 743 if ((!wildcard || (flags & INPLOOKUP_WILDCARD)) && 744 wildcard < matchwild) { 745 match = inp; 746 if ((matchwild = wildcard) == 0) 747 break; 748 } 749 } 750 return (match); 751 } 752 753 struct rtentry * 754 in_pcbrtentry(inp) 755 struct inpcb *inp; 756 { 757 struct route *ro; 758 759 ro = &inp->inp_route; 760 761 /* 762 * No route yet, so try to acquire one. 763 */ 764 if (ro->ro_rt == NULL) { 765 #ifdef INET6 766 bzero(ro, sizeof(struct route_in6)); 767 #else 768 bzero(ro, sizeof(struct route)); 769 #endif 770 771 switch(sotopf(inp->inp_socket)) { 772 #ifdef INET6 773 case PF_INET6: 774 if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) 775 break; 776 ro->ro_dst.sa_family = AF_INET6; 777 ro->ro_dst.sa_len = sizeof(struct sockaddr_in6); 778 ((struct sockaddr_in6 *) &ro->ro_dst)->sin6_addr = 779 inp->inp_faddr6; 780 rtalloc_mpath(ro, &inp->inp_laddr6.s6_addr32[0], 0); 781 break; 782 #endif /* INET6 */ 783 case PF_INET: 784 if (inp->inp_faddr.s_addr == INADDR_ANY) 785 break; 786 ro->ro_dst.sa_family = AF_INET; 787 ro->ro_dst.sa_len = sizeof(ro->ro_dst); 788 satosin(&ro->ro_dst)->sin_addr = inp->inp_faddr; 789 rtalloc_mpath(ro, &inp->inp_laddr.s_addr, 0); 790 break; 791 } 792 } 793 return (ro->ro_rt); 794 } 795 796 struct sockaddr_in * 797 in_selectsrc(struct sockaddr_in *sin, struct route *ro, int soopts, 798 struct ip_moptions *mopts, int *errorp, u_int rdomain) 799 { 800 struct sockaddr_in *sin2; 801 struct in_ifaddr *ia; 802 803 ia = (struct in_ifaddr *)0; 804 /* 805 * If route is known or can be allocated now, 806 * our src addr is taken from the i/f, else punt. 807 */ 808 if (ro->ro_rt && 809 (satosin(&ro->ro_dst)->sin_addr.s_addr != 810 sin->sin_addr.s_addr || 811 soopts & SO_DONTROUTE)) { 812 RTFREE(ro->ro_rt); 813 ro->ro_rt = (struct rtentry *)0; 814 } 815 if ((soopts & SO_DONTROUTE) == 0 && /*XXX*/ 816 (ro->ro_rt == (struct rtentry *)0 || 817 ro->ro_rt->rt_ifp == (struct ifnet *)0)) { 818 /* No route yet, so try to acquire one */ 819 ro->ro_dst.sa_family = AF_INET; 820 ro->ro_dst.sa_len = sizeof(struct sockaddr_in); 821 satosin(&ro->ro_dst)->sin_addr = sin->sin_addr; 822 rtalloc_mpath(ro, NULL, rdomain); 823 824 /* 825 * It is important to bzero out the rest of the 826 * struct sockaddr_in when mixing v6 & v4! 827 */ 828 sin2 = (struct sockaddr_in *)&ro->ro_dst; 829 bzero(sin2->sin_zero, sizeof(sin2->sin_zero)); 830 } 831 /* 832 * If we found a route, use the address 833 * corresponding to the outgoing interface 834 * unless it is the loopback (in case a route 835 * to our address on another net goes to loopback). 836 */ 837 if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) 838 ia = ifatoia(ro->ro_rt->rt_ifa); 839 if (ia == 0) { 840 u_int16_t fport = sin->sin_port; 841 842 sin->sin_port = 0; 843 ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin), rdomain)); 844 if (ia == 0) 845 ia = ifatoia(ifa_ifwithnet(sintosa(sin), rdomain)); 846 sin->sin_port = fport; 847 if (ia == 0) 848 ia = TAILQ_FIRST(&in_ifaddr); 849 if (ia == 0) { 850 *errorp = EADDRNOTAVAIL; 851 return NULL; 852 } 853 } 854 /* 855 * If the destination address is multicast and an outgoing 856 * interface has been set as a multicast option, use the 857 * address of that interface as our source address. 858 */ 859 if (IN_MULTICAST(sin->sin_addr.s_addr) && mopts != NULL) { 860 struct ip_moptions *imo; 861 struct ifnet *ifp; 862 863 imo = mopts; 864 if (imo->imo_multicast_ifp != NULL) { 865 ifp = imo->imo_multicast_ifp; 866 TAILQ_FOREACH(ia, &in_ifaddr, ia_list) 867 if (ia->ia_ifp == ifp) 868 break; 869 if (ia == 0) { 870 *errorp = EADDRNOTAVAIL; 871 return NULL; 872 } 873 } 874 } 875 return satosin(&ia->ia_addr); 876 } 877 878 void 879 in_pcbrehash(inp) 880 struct inpcb *inp; 881 { 882 struct inpcbtable *table = inp->inp_table; 883 int s; 884 885 s = splnet(); 886 LIST_REMOVE(inp, inp_lhash); 887 LIST_INSERT_HEAD(INPCBLHASH(table, inp->inp_lport, inp->inp_rdomain), 888 inp, inp_lhash); 889 LIST_REMOVE(inp, inp_hash); 890 #ifdef INET6 891 if (inp->inp_flags & INP_IPV6) { 892 LIST_INSERT_HEAD(IN6PCBHASH(table, &inp->inp_faddr6, 893 inp->inp_fport, &inp->inp_laddr6, inp->inp_lport), 894 inp, inp_hash); 895 } else { 896 #endif /* INET6 */ 897 LIST_INSERT_HEAD(INPCBHASH(table, &inp->inp_faddr, 898 inp->inp_fport, &inp->inp_laddr, inp->inp_lport, 899 inp->inp_rdomain), inp, inp_hash); 900 #ifdef INET6 901 } 902 #endif /* INET6 */ 903 splx(s); 904 } 905 906 #ifdef DIAGNOSTIC 907 int in_pcbnotifymiss = 0; 908 #endif 909 910 /* 911 * The in(6)_pcbhashlookup functions are used to locate connected sockets 912 * quickly: 913 * faddr.fport <-> laddr.lport 914 * No wildcard matching is done so that listening sockets are not found. 915 * If the functions return NULL in(6)_pcblookup_listen can be used to 916 * find a listening/bound socket that may accept the connection. 917 * After those two lookups no other are necessary. 918 */ 919 struct inpcb * 920 in_pcbhashlookup(struct inpcbtable *table, struct in_addr faddr, 921 u_int fport_arg, struct in_addr laddr, u_int lport_arg, u_int rdomain) 922 { 923 struct inpcbhead *head; 924 struct inpcb *inp; 925 u_int16_t fport = fport_arg, lport = lport_arg; 926 927 head = INPCBHASH(table, &faddr, fport, &laddr, lport, rdomain); 928 LIST_FOREACH(inp, head, inp_hash) { 929 #ifdef INET6 930 if (inp->inp_flags & INP_IPV6) 931 continue; /*XXX*/ 932 #endif 933 if (inp->inp_faddr.s_addr == faddr.s_addr && 934 inp->inp_fport == fport && 935 inp->inp_lport == lport && 936 inp->inp_laddr.s_addr == laddr.s_addr && 937 inp->inp_rdomain == rdomain) { 938 /* 939 * Move this PCB to the head of hash chain so that 940 * repeated accesses are quicker. This is analogous to 941 * the historic single-entry PCB cache. 942 */ 943 if (inp != LIST_FIRST(head)) { 944 LIST_REMOVE(inp, inp_hash); 945 LIST_INSERT_HEAD(head, inp, inp_hash); 946 } 947 break; 948 } 949 } 950 #ifdef DIAGNOSTIC 951 if (inp == NULL && in_pcbnotifymiss) { 952 printf("in_pcbhashlookup: faddr=%08x fport=%d laddr=%08x lport=%d rdom=%d\n", 953 ntohl(faddr.s_addr), ntohs(fport), 954 ntohl(laddr.s_addr), ntohs(lport), rdomain); 955 } 956 #endif 957 return (inp); 958 } 959 960 #ifdef INET6 961 struct inpcb * 962 in6_pcbhashlookup(struct inpcbtable *table, struct in6_addr *faddr, 963 u_int fport_arg, struct in6_addr *laddr, u_int lport_arg) 964 { 965 struct inpcbhead *head; 966 struct inpcb *inp; 967 u_int16_t fport = fport_arg, lport = lport_arg; 968 969 head = IN6PCBHASH(table, faddr, fport, laddr, lport); 970 LIST_FOREACH(inp, head, inp_hash) { 971 if (!(inp->inp_flags & INP_IPV6)) 972 continue; 973 if (IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6, faddr) && 974 inp->inp_fport == fport && inp->inp_lport == lport && 975 IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, laddr)) { 976 /* 977 * Move this PCB to the head of hash chain so that 978 * repeated accesses are quicker. This is analogous to 979 * the historic single-entry PCB cache. 980 */ 981 if (inp != LIST_FIRST(head)) { 982 LIST_REMOVE(inp, inp_hash); 983 LIST_INSERT_HEAD(head, inp, inp_hash); 984 } 985 break; 986 } 987 } 988 #ifdef DIAGNOSTIC 989 if (inp == NULL && in_pcbnotifymiss) { 990 printf("in6_pcbhashlookup: faddr="); 991 printf(" fport=%d laddr=", ntohs(fport)); 992 printf(" lport=%d\n", ntohs(lport)); 993 } 994 #endif 995 return (inp); 996 } 997 #endif /* INET6 */ 998 999 /* 1000 * The in(6)_pcblookup_listen functions are used to locate listening 1001 * sockets quickly. This are sockets with unspecified foreign address 1002 * and port: 1003 * *.* <-> laddr.lport 1004 * *.* <-> *.lport 1005 */ 1006 struct inpcb * 1007 in_pcblookup_listen(struct inpcbtable *table, struct in_addr laddr, 1008 u_int lport_arg, int reverse, struct mbuf *m, u_int rdomain) 1009 { 1010 struct inpcbhead *head; 1011 struct in_addr *key1, *key2; 1012 struct inpcb *inp; 1013 u_int16_t lport = lport_arg; 1014 1015 #if NPF > 0 1016 if (m && m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) { 1017 struct pf_divert *divert; 1018 /* XXX rdomain */ 1019 if ((divert = pf_find_divert(m)) == NULL) 1020 return (NULL); 1021 key1 = key2 = &divert->addr.ipv4; 1022 lport = divert->port; 1023 } else 1024 #endif 1025 if (reverse) { 1026 key1 = &zeroin_addr; 1027 key2 = &laddr; 1028 } else { 1029 key1 = &laddr; 1030 key2 = &zeroin_addr; 1031 } 1032 1033 head = INPCBHASH(table, &zeroin_addr, 0, key1, lport, rdomain); 1034 LIST_FOREACH(inp, head, inp_hash) { 1035 #ifdef INET6 1036 if (inp->inp_flags & INP_IPV6) 1037 continue; /*XXX*/ 1038 #endif 1039 if (inp->inp_lport == lport && inp->inp_fport == 0 && 1040 inp->inp_laddr.s_addr == key1->s_addr && 1041 inp->inp_faddr.s_addr == INADDR_ANY && 1042 inp->inp_rdomain == rdomain) 1043 break; 1044 } 1045 if (inp == NULL && key1->s_addr != key2->s_addr) { 1046 head = INPCBHASH(table, &zeroin_addr, 0, key2, lport, rdomain); 1047 LIST_FOREACH(inp, head, inp_hash) { 1048 #ifdef INET6 1049 if (inp->inp_flags & INP_IPV6) 1050 continue; /*XXX*/ 1051 #endif 1052 if (inp->inp_lport == lport && inp->inp_fport == 0 && 1053 inp->inp_laddr.s_addr == key2->s_addr && 1054 inp->inp_faddr.s_addr == INADDR_ANY && 1055 inp->inp_rdomain == rdomain) 1056 break; 1057 } 1058 } 1059 #ifdef DIAGNOSTIC 1060 if (inp == NULL && in_pcbnotifymiss) { 1061 printf("in_pcblookup_listen: laddr=%08x lport=%d\n", 1062 ntohl(laddr.s_addr), ntohs(lport)); 1063 } 1064 #endif 1065 /* 1066 * Move this PCB to the head of hash chain so that 1067 * repeated accesses are quicker. This is analogous to 1068 * the historic single-entry PCB cache. 1069 */ 1070 if (inp != NULL && inp != LIST_FIRST(head)) { 1071 LIST_REMOVE(inp, inp_hash); 1072 LIST_INSERT_HEAD(head, inp, inp_hash); 1073 } 1074 return (inp); 1075 } 1076 1077 #ifdef INET6 1078 struct inpcb * 1079 in6_pcblookup_listen(struct inpcbtable *table, struct in6_addr *laddr, 1080 u_int lport_arg, int reverse, struct mbuf *m) 1081 { 1082 struct inpcbhead *head; 1083 struct in6_addr *key1, *key2; 1084 struct inpcb *inp; 1085 u_int16_t lport = lport_arg; 1086 1087 #if NPF > 0 1088 if (m && m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) { 1089 struct pf_divert *divert; 1090 1091 if ((divert = pf_find_divert(m)) == NULL) 1092 return (NULL); 1093 key1 = key2 = &divert->addr.ipv6; 1094 lport = divert->port; 1095 } else 1096 #endif 1097 if (reverse) { 1098 key1 = &zeroin6_addr; 1099 key2 = laddr; 1100 } else { 1101 key1 = laddr; 1102 key2 = &zeroin6_addr; 1103 } 1104 1105 head = IN6PCBHASH(table, &zeroin6_addr, 0, key1, lport); 1106 LIST_FOREACH(inp, head, inp_hash) { 1107 if (!(inp->inp_flags & INP_IPV6)) 1108 continue; 1109 if (inp->inp_lport == lport && inp->inp_fport == 0 && 1110 IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, key1) && 1111 IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) 1112 break; 1113 } 1114 if (inp == NULL && ! IN6_ARE_ADDR_EQUAL(key1, key2)) { 1115 head = IN6PCBHASH(table, &zeroin6_addr, 0, key2, lport); 1116 LIST_FOREACH(inp, head, inp_hash) { 1117 if (!(inp->inp_flags & INP_IPV6)) 1118 continue; 1119 if (inp->inp_lport == lport && inp->inp_fport == 0 && 1120 IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, key2) && 1121 IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) 1122 break; 1123 } 1124 } 1125 #ifdef DIAGNOSTIC 1126 if (inp == NULL && in_pcbnotifymiss) { 1127 printf("in6_pcblookup_listen: laddr= lport=%d\n", 1128 ntohs(lport)); 1129 } 1130 #endif 1131 /* 1132 * Move this PCB to the head of hash chain so that 1133 * repeated accesses are quicker. This is analogous to 1134 * the historic single-entry PCB cache. 1135 */ 1136 if (inp != NULL && inp != LIST_FIRST(head)) { 1137 LIST_REMOVE(inp, inp_hash); 1138 LIST_INSERT_HEAD(head, inp, inp_hash); 1139 } 1140 return (inp); 1141 } 1142 #endif /* INET6 */ 1143