xref: /openbsd/sys/netinet/in_pcb.c (revision 404b540a)
1 /*	$OpenBSD: in_pcb.c,v 1.106 2009/07/26 12:59:16 thib Exp $	*/
2 /*	$NetBSD: in_pcb.c,v 1.25 1996/02/13 23:41:53 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
33  *
34  * NRL grants permission for redistribution and use in source and binary
35  * forms, with or without modification, of the software and documentation
36  * created at NRL provided that the following conditions are met:
37  *
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. All advertising materials mentioning features or use of this software
44  *    must display the following acknowledgements:
45  * 	This product includes software developed by the University of
46  * 	California, Berkeley and its contributors.
47  * 	This product includes software developed at the Information
48  * 	Technology Division, US Naval Research Laboratory.
49  * 4. Neither the name of the NRL nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
54  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
56  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
57  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
58  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
59  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
60  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
61  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
62  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64  *
65  * The views and conclusions contained in the software and documentation
66  * are those of the authors and should not be interpreted as representing
67  * official policies, either expressed or implied, of the US Naval
68  * Research Laboratory (NRL).
69  */
70 
71 #include "pf.h"
72 
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/mbuf.h>
76 #include <sys/protosw.h>
77 #include <sys/socket.h>
78 #include <sys/socketvar.h>
79 #include <sys/proc.h>
80 #include <sys/domain.h>
81 #include <sys/pool.h>
82 
83 #include <net/if.h>
84 #include <net/route.h>
85 #include <net/pfvar.h>
86 
87 #include <netinet/in.h>
88 #include <netinet/in_systm.h>
89 #include <netinet/ip.h>
90 #include <netinet/in_pcb.h>
91 #include <netinet/in_var.h>
92 #include <netinet/ip_var.h>
93 #include <dev/rndvar.h>
94 
95 #include <sys/mount.h>
96 #include <nfs/nfsproto.h>
97 
98 #ifdef INET6
99 #include <netinet6/ip6_var.h>
100 #endif /* INET6 */
101 #ifdef IPSEC
102 #include <netinet/ip_esp.h>
103 #endif /* IPSEC */
104 
105 struct	in_addr zeroin_addr;
106 
107 extern int ipsec_auth_default_level;
108 extern int ipsec_esp_trans_default_level;
109 extern int ipsec_esp_network_default_level;
110 extern int ipsec_ipcomp_default_level;
111 
112 /*
113  * These configure the range of local port addresses assigned to
114  * "unspecified" outgoing connections/packets/whatever.
115  */
116 int ipport_firstauto = IPPORT_RESERVED;
117 int ipport_lastauto = IPPORT_USERRESERVED;
118 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO;
119 int ipport_hilastauto = IPPORT_HILASTAUTO;
120 
121 struct pool inpcb_pool;
122 int inpcb_pool_initialized = 0;
123 
124 #define	INPCBHASH(table, faddr, fport, laddr, lport, rdom) \
125 	&(table)->inpt_hashtbl[(ntohl((faddr)->s_addr) + \
126 	ntohs((fport)) + ntohs((lport)) + (rdom)) & (table->inpt_hash)]
127 
128 #define	IN6PCBHASH(table, faddr, fport, laddr, lport) \
129 	&(table)->inpt_hashtbl[(ntohl((faddr)->s6_addr32[0] ^ \
130 	(faddr)->s6_addr32[3]) + ntohs((fport)) + ntohs((lport))) & \
131 	(table->inpt_hash)]
132 
133 #define	INPCBLHASH(table, lport, rdom) \
134 	&(table)->inpt_lhashtbl[(ntohs((lport)) + (rdom)) & table->inpt_lhash]
135 
136 void
137 in_pcbinit(table, hashsize)
138 	struct inpcbtable *table;
139 	int hashsize;
140 {
141 
142 	CIRCLEQ_INIT(&table->inpt_queue);
143 	table->inpt_hashtbl = hashinit(hashsize, M_PCB, M_NOWAIT,
144 	    &table->inpt_hash);
145 	if (table->inpt_hashtbl == NULL)
146 		panic("in_pcbinit: hashinit failed");
147 	table->inpt_lhashtbl = hashinit(hashsize, M_PCB, M_NOWAIT,
148 	    &table->inpt_lhash);
149 	if (table->inpt_lhashtbl == NULL)
150 		panic("in_pcbinit: hashinit failed for lport");
151 	table->inpt_lastport = 0;
152 }
153 
154 struct baddynamicports baddynamicports;
155 
156 /*
157  * Check if the specified port is invalid for dynamic allocation.
158  */
159 int
160 in_baddynamic(u_int16_t port, u_int16_t proto)
161 {
162 	switch (proto) {
163 	case IPPROTO_TCP:
164 		return (DP_ISSET(baddynamicports.tcp, port));
165 	case IPPROTO_UDP:
166 #ifdef IPSEC
167 		/* Cannot preset this as it is a sysctl */
168 		if (port == udpencap_port)
169 			return (1);
170 #endif
171 		return (DP_ISSET(baddynamicports.udp, port));
172 	default:
173 		return (0);
174 	}
175 }
176 
177 int
178 in_pcballoc(so, v)
179 	struct socket *so;
180 	void *v;
181 {
182 	struct inpcbtable *table = v;
183 	struct inpcb *inp;
184 	int s;
185 
186 	if (inpcb_pool_initialized == 0) {
187 		pool_init(&inpcb_pool, sizeof(struct inpcb), 0, 0, 0,
188 		    "inpcbpl", NULL);
189 		inpcb_pool_initialized = 1;
190 	}
191 	inp = pool_get(&inpcb_pool, PR_NOWAIT);
192 	if (inp == NULL)
193 		return (ENOBUFS);
194 	bzero((caddr_t)inp, sizeof(*inp));
195 	inp->inp_table = table;
196 	inp->inp_socket = so;
197 	inp->inp_seclevel[SL_AUTH] = ipsec_auth_default_level;
198 	inp->inp_seclevel[SL_ESP_TRANS] = ipsec_esp_trans_default_level;
199 	inp->inp_seclevel[SL_ESP_NETWORK] = ipsec_esp_network_default_level;
200 	inp->inp_seclevel[SL_IPCOMP] = ipsec_ipcomp_default_level;
201 	s = splnet();
202 	CIRCLEQ_INSERT_HEAD(&table->inpt_queue, inp, inp_queue);
203 	LIST_INSERT_HEAD(INPCBLHASH(table, inp->inp_lport, inp->inp_rdomain),
204 	    inp, inp_lhash);
205 	LIST_INSERT_HEAD(INPCBHASH(table, &inp->inp_faddr, inp->inp_fport,
206 	    &inp->inp_laddr, inp->inp_lport, inp->inp_rdomain), inp, inp_hash);
207 	splx(s);
208 	so->so_pcb = inp;
209 	inp->inp_hops = -1;
210 
211 #ifdef INET6
212 	/*
213 	 * Small change in this function to set the INP_IPV6 flag so routines
214 	 * outside pcb-specific routines don't need to use sotopf(), and all
215 	 * of its pointer chasing, later.
216 	 */
217 	if (sotopf(so) == PF_INET6)
218 		inp->inp_flags = INP_IPV6;
219 	inp->in6p_cksum = -1;
220 #endif /* INET6 */
221 	return (0);
222 }
223 
224 int
225 in_pcbbind(v, nam, p)
226 	void *v;
227 	struct mbuf *nam;
228 	struct proc *p;
229 {
230 	struct inpcb *inp = v;
231 	struct socket *so = inp->inp_socket;
232 	struct inpcbtable *table = inp->inp_table;
233 	u_int16_t *lastport = &inp->inp_table->inpt_lastport;
234 	struct sockaddr_in *sin;
235 	u_int16_t lport = 0;
236 	int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
237 	int error;
238 
239 #ifdef INET6
240 	if (sotopf(so) == PF_INET6)
241 		return in6_pcbbind(inp, nam, p);
242 #endif /* INET6 */
243 
244 	if (TAILQ_EMPTY(&in_ifaddr))
245 		return (EADDRNOTAVAIL);
246 	if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY)
247 		return (EINVAL);
248 	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0 &&
249 	    ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 ||
250 	     (so->so_options & SO_ACCEPTCONN) == 0))
251 		wild = INPLOOKUP_WILDCARD;
252 	if (nam) {
253 		sin = mtod(nam, struct sockaddr_in *);
254 		if (nam->m_len != sizeof (*sin))
255 			return (EINVAL);
256 #ifdef notdef
257 		/*
258 		 * We should check the family, but old programs
259 		 * incorrectly fail to initialize it.
260 		 */
261 		if (sin->sin_family != AF_INET)
262 			return (EAFNOSUPPORT);
263 #endif
264 		lport = sin->sin_port;
265 		if (IN_MULTICAST(sin->sin_addr.s_addr)) {
266 			/*
267 			 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
268 			 * allow complete duplication of binding if
269 			 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
270 			 * and a multicast address is bound on both
271 			 * new and duplicated sockets.
272 			 */
273 			if (so->so_options & SO_REUSEADDR)
274 				reuseport = SO_REUSEADDR|SO_REUSEPORT;
275 		} else if (sin->sin_addr.s_addr != INADDR_ANY) {
276 			sin->sin_port = 0;		/* yech... */
277 			if (!(so->so_options & SO_BINDANY) &&
278 			    in_iawithaddr(sin->sin_addr, NULL,
279 			    inp->inp_rdomain) == 0)
280 				return (EADDRNOTAVAIL);
281 		}
282 		if (lport) {
283 			struct inpcb *t;
284 
285 			/* GROSS */
286 			if (ntohs(lport) < IPPORT_RESERVED &&
287 			    (error = suser(p, 0)))
288 				return (EACCES);
289 			if (so->so_euid) {
290 				t = in_pcblookup(table, &zeroin_addr, 0,
291 				    &sin->sin_addr, lport, INPLOOKUP_WILDCARD,
292 				    inp->inp_rdomain);
293 				if (t && (so->so_euid != t->inp_socket->so_euid))
294 					return (EADDRINUSE);
295 			}
296 			t = in_pcblookup(table, &zeroin_addr, 0,
297 			    &sin->sin_addr, lport, wild, inp->inp_rdomain);
298 			if (t && (reuseport & t->inp_socket->so_options) == 0)
299 				return (EADDRINUSE);
300 		}
301 		inp->inp_laddr = sin->sin_addr;
302 	}
303 	if (lport == 0) {
304 		u_int16_t first, last;
305 		int count;
306 
307 		if (inp->inp_flags & INP_HIGHPORT) {
308 			first = ipport_hifirstauto;	/* sysctl */
309 			last = ipport_hilastauto;
310 		} else if (inp->inp_flags & INP_LOWPORT) {
311 			if ((error = suser(p, 0)))
312 				return (EACCES);
313 			first = IPPORT_RESERVED-1; /* 1023 */
314 			last = 600;		   /* not IPPORT_RESERVED/2 */
315 		} else {
316 			first = ipport_firstauto;	/* sysctl */
317 			last  = ipport_lastauto;
318 		}
319 
320 		/*
321 		 * Simple check to ensure all ports are not used up causing
322 		 * a deadlock here.
323 		 *
324 		 * We split the two cases (up and down) so that the direction
325 		 * is not being tested on each round of the loop.
326 		 */
327 
328 		if (first > last) {
329 			/*
330 			 * counting down
331 			 */
332 			count = first - last;
333 			if (count)
334 				*lastport = first - arc4random_uniform(count);
335 
336 			do {
337 				if (count-- < 0)	/* completely used? */
338 					return (EADDRNOTAVAIL);
339 				--*lastport;
340 				if (*lastport > first || *lastport < last)
341 					*lastport = first;
342 				lport = htons(*lastport);
343 			} while (in_baddynamic(*lastport, so->so_proto->pr_protocol) ||
344 			    in_pcblookup(table, &zeroin_addr, 0,
345 			    &inp->inp_laddr, lport, wild, inp->inp_rdomain));
346 		} else {
347 			/*
348 			 * counting up
349 			 */
350 			count = last - first;
351 			if (count)
352 				*lastport = first + arc4random_uniform(count);
353 
354 			do {
355 				if (count-- < 0)	/* completely used? */
356 					return (EADDRNOTAVAIL);
357 				++*lastport;
358 				if (*lastport < first || *lastport > last)
359 					*lastport = first;
360 				lport = htons(*lastport);
361 			} while (in_baddynamic(*lastport, so->so_proto->pr_protocol) ||
362 			    in_pcblookup(table, &zeroin_addr, 0,
363 			    &inp->inp_laddr, lport, wild, inp->inp_rdomain));
364 		}
365 	}
366 	inp->inp_lport = lport;
367 	in_pcbrehash(inp);
368 	return (0);
369 }
370 
371 /*
372  * Connect from a socket to a specified address.
373  * Both address and port must be specified in argument sin.
374  * If don't have a local address for this socket yet,
375  * then pick one.
376  */
377 int
378 in_pcbconnect(v, nam)
379 	void *v;
380 	struct mbuf *nam;
381 {
382 	struct inpcb *inp = v;
383 	struct sockaddr_in *ifaddr = NULL;
384 	struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *);
385 
386 #ifdef INET6
387 	if (sotopf(inp->inp_socket) == PF_INET6)
388 		return (in6_pcbconnect(inp, nam));
389 	if ((inp->inp_flags & INP_IPV6) != 0)
390 		panic("IPv6 pcb passed into in_pcbconnect");
391 #endif /* INET6 */
392 
393 	if (nam->m_len != sizeof (*sin))
394 		return (EINVAL);
395 	if (sin->sin_family != AF_INET)
396 		return (EAFNOSUPPORT);
397 	if (sin->sin_port == 0)
398 		return (EADDRNOTAVAIL);
399 	if (!TAILQ_EMPTY(&in_ifaddr)) {
400 		/*
401 		 * If the destination address is INADDR_ANY,
402 		 * use the primary local address.
403 		 * If the supplied address is INADDR_BROADCAST,
404 		 * and the primary interface supports broadcast,
405 		 * choose the broadcast address for that interface.
406 		 */
407 		if (sin->sin_addr.s_addr == INADDR_ANY)
408 			sin->sin_addr = TAILQ_FIRST(&in_ifaddr)->ia_addr.sin_addr;
409 		else if (sin->sin_addr.s_addr == INADDR_BROADCAST &&
410 		  (TAILQ_FIRST(&in_ifaddr)->ia_ifp->if_flags & IFF_BROADCAST))
411 			sin->sin_addr = TAILQ_FIRST(&in_ifaddr)->ia_broadaddr.sin_addr;
412 	}
413 	if (inp->inp_laddr.s_addr == INADDR_ANY) {
414 		int error;
415 		ifaddr = in_selectsrc(sin, &inp->inp_route,
416 			inp->inp_socket->so_options, inp->inp_moptions, &error,
417 			inp->inp_rdomain);
418 		if (ifaddr == NULL) {
419 			if (error == 0)
420 				error = EADDRNOTAVAIL;
421 			return error;
422 		}
423 	}
424 	if (in_pcbhashlookup(inp->inp_table, sin->sin_addr, sin->sin_port,
425 	    inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr,
426 	    inp->inp_lport, inp->inp_rdomain) != 0)
427 		return (EADDRINUSE);
428 	if (inp->inp_laddr.s_addr == INADDR_ANY) {
429 		if (inp->inp_lport == 0 &&
430 		    in_pcbbind(inp, NULL, curproc) == EADDRNOTAVAIL)
431 			return (EADDRNOTAVAIL);
432 		inp->inp_laddr = ifaddr->sin_addr;
433 	}
434 	inp->inp_faddr = sin->sin_addr;
435 	inp->inp_fport = sin->sin_port;
436 	in_pcbrehash(inp);
437 #ifdef IPSEC
438 	{
439 		int error; /* This is just ignored */
440 
441 		/* Cause an IPsec SA to be established. */
442 		ipsp_spd_inp(NULL, AF_INET, 0, &error, IPSP_DIRECTION_OUT,
443 		    NULL, inp, NULL);
444 	}
445 #endif
446 	return (0);
447 }
448 
449 void
450 in_pcbdisconnect(v)
451 	void *v;
452 {
453 	struct inpcb *inp = v;
454 
455 	switch (sotopf(inp->inp_socket)) {
456 #ifdef INET6
457 	case PF_INET6:
458 		inp->inp_faddr6 = in6addr_any;
459 		break;
460 #endif
461 	case PF_INET:
462 		inp->inp_faddr.s_addr = INADDR_ANY;
463 		break;
464 	}
465 
466 	inp->inp_fport = 0;
467 	in_pcbrehash(inp);
468 	if (inp->inp_socket->so_state & SS_NOFDREF)
469 		in_pcbdetach(inp);
470 }
471 
472 void
473 in_pcbdetach(v)
474 	void *v;
475 {
476 	struct inpcb *inp = v;
477 	struct socket *so = inp->inp_socket;
478 	int s;
479 
480 	so->so_pcb = 0;
481 	sofree(so);
482 	if (inp->inp_options)
483 		m_freem(inp->inp_options);
484 	if (inp->inp_route.ro_rt)
485 		rtfree(inp->inp_route.ro_rt);
486 #ifdef INET6
487 	if (inp->inp_flags & INP_IPV6) {
488 		ip6_freepcbopts(inp->inp_outputopts6);
489 		ip6_freemoptions(inp->inp_moptions6);
490 	} else
491 #endif
492 		ip_freemoptions(inp->inp_moptions);
493 #ifdef IPSEC
494 	/* IPsec cleanup here */
495 	s = spltdb();
496 	if (inp->inp_tdb_in)
497 		TAILQ_REMOVE(&inp->inp_tdb_in->tdb_inp_in,
498 			     inp, inp_tdb_in_next);
499 	if (inp->inp_tdb_out)
500 	        TAILQ_REMOVE(&inp->inp_tdb_out->tdb_inp_out, inp,
501 			     inp_tdb_out_next);
502 	if (inp->inp_ipsec_remotecred)
503 		ipsp_reffree(inp->inp_ipsec_remotecred);
504 	if (inp->inp_ipsec_remoteauth)
505 		ipsp_reffree(inp->inp_ipsec_remoteauth);
506 	if (inp->inp_ipo)
507 		ipsec_delete_policy(inp->inp_ipo);
508 	splx(s);
509 #endif
510 #if NPF > 0
511 	if (inp->inp_pf_sk)
512 		((struct pf_state_key *)inp->inp_pf_sk)->inp = NULL;
513 #endif
514 	s = splnet();
515 	LIST_REMOVE(inp, inp_lhash);
516 	LIST_REMOVE(inp, inp_hash);
517 	CIRCLEQ_REMOVE(&inp->inp_table->inpt_queue, inp, inp_queue);
518 	splx(s);
519 	pool_put(&inpcb_pool, inp);
520 }
521 
522 void
523 in_setsockaddr(inp, nam)
524 	struct inpcb *inp;
525 	struct mbuf *nam;
526 {
527 	struct sockaddr_in *sin;
528 
529 	nam->m_len = sizeof (*sin);
530 	sin = mtod(nam, struct sockaddr_in *);
531 	bzero((caddr_t)sin, sizeof (*sin));
532 	sin->sin_family = AF_INET;
533 	sin->sin_len = sizeof(*sin);
534 	sin->sin_port = inp->inp_lport;
535 	sin->sin_addr = inp->inp_laddr;
536 }
537 
538 void
539 in_setpeeraddr(inp, nam)
540 	struct inpcb *inp;
541 	struct mbuf *nam;
542 {
543 	struct sockaddr_in *sin;
544 
545 #ifdef INET6
546 	if (sotopf(inp->inp_socket) == PF_INET6) {
547 		in6_setpeeraddr(inp, nam);
548 		return;
549 	}
550 #endif /* INET6 */
551 
552 	nam->m_len = sizeof (*sin);
553 	sin = mtod(nam, struct sockaddr_in *);
554 	bzero((caddr_t)sin, sizeof (*sin));
555 	sin->sin_family = AF_INET;
556 	sin->sin_len = sizeof(*sin);
557 	sin->sin_port = inp->inp_fport;
558 	sin->sin_addr = inp->inp_faddr;
559 }
560 
561 /*
562  * Pass some notification to all connections of a protocol
563  * associated with address dst.  The "usual action" will be
564  * taken, depending on the ctlinput cmd.  The caller must filter any
565  * cmds that are uninteresting (e.g., no error in the map).
566  * Call the protocol specific routine (if any) to report
567  * any errors for each matching socket.
568  *
569  * Must be called at splsoftnet.
570  */
571 void
572 in_pcbnotifyall(table, dst, errno, notify)
573 	struct inpcbtable *table;
574 	struct sockaddr *dst;
575 	int errno;
576 	void (*notify)(struct inpcb *, int);
577 {
578 	struct inpcb *inp, *oinp;
579 	struct in_addr faddr;
580 
581 	splsoftassert(IPL_SOFTNET);
582 
583 #ifdef INET6
584 	/*
585 	 * See in6_pcbnotify() for IPv6 codepath.  By the time this
586 	 * gets called, the addresses passed are either definitely IPv4 or
587 	 * IPv6; *_pcbnotify() never gets called with v4-mapped v6 addresses.
588 	 */
589 #endif /* INET6 */
590 
591 	if (dst->sa_family != AF_INET)
592 		return;
593 	faddr = satosin(dst)->sin_addr;
594 	if (faddr.s_addr == INADDR_ANY)
595 		return;
596 
597 	for (inp = CIRCLEQ_FIRST(&table->inpt_queue);
598 	    inp != CIRCLEQ_END(&table->inpt_queue);) {
599 #ifdef INET6
600 		if (inp->inp_flags & INP_IPV6) {
601 			inp = CIRCLEQ_NEXT(inp, inp_queue);
602 			continue;
603 		}
604 #endif
605 		if (inp->inp_faddr.s_addr != faddr.s_addr ||
606 		    inp->inp_socket == 0) {
607 			inp = CIRCLEQ_NEXT(inp, inp_queue);
608 			continue;
609 		}
610 		oinp = inp;
611 		inp = CIRCLEQ_NEXT(inp, inp_queue);
612 		if (notify)
613 			(*notify)(oinp, errno);
614 	}
615 }
616 
617 /*
618  * Check for alternatives when higher level complains
619  * about service problems.  For now, invalidate cached
620  * routing information.  If the route was created dynamically
621  * (by a redirect), time to try a default gateway again.
622  */
623 void
624 in_losing(inp)
625 	struct inpcb *inp;
626 {
627 	struct rtentry *rt;
628 	struct rt_addrinfo info;
629 
630 	if ((rt = inp->inp_route.ro_rt)) {
631 		inp->inp_route.ro_rt = 0;
632 		bzero((caddr_t)&info, sizeof(info));
633 		info.rti_flags = rt->rt_flags;
634 		info.rti_info[RTAX_DST] = &inp->inp_route.ro_dst;
635 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
636 		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
637 		rt_missmsg(RTM_LOSING, &info, rt->rt_flags, rt->rt_ifp, 0,
638 		    inp->inp_rdomain);
639 		if (rt->rt_flags & RTF_DYNAMIC)
640 			(void)rtrequest1(RTM_DELETE, &info, rt->rt_priority,
641 				(struct rtentry **)0, inp->inp_rdomain);
642 		/*
643 		 * A new route can be allocated
644 		 * the next time output is attempted.
645 		 * rtfree() needs to be called in anycase because the inp
646 		 * is still holding a reference to rt.
647 		 */
648 		rtfree(rt);
649 	}
650 }
651 
652 /*
653  * After a routing change, flush old routing
654  * and allocate a (hopefully) better one.
655  */
656 void
657 in_rtchange(inp, errno)
658 	struct inpcb *inp;
659 	int errno;
660 {
661 	if (inp->inp_route.ro_rt) {
662 		rtfree(inp->inp_route.ro_rt);
663 		inp->inp_route.ro_rt = 0;
664 		/*
665 		 * A new route can be allocated the next time
666 		 * output is attempted.
667 		 */
668 	}
669 }
670 
671 struct inpcb *
672 in_pcblookup(struct inpcbtable *table, void *faddrp, u_int fport_arg, void *laddrp, u_int lport_arg, int flags, u_int rdomain)
673 {
674 	struct inpcb *inp, *match = 0;
675 	int matchwild = 3, wildcard;
676 	u_int16_t fport = fport_arg, lport = lport_arg;
677 	struct in_addr faddr = *(struct in_addr *)faddrp;
678 	struct in_addr laddr = *(struct in_addr *)laddrp;
679 
680 	for (inp = LIST_FIRST(INPCBLHASH(table, lport, rdomain)); inp;
681 	    inp = LIST_NEXT(inp, inp_lhash)) {
682 		if (inp->inp_rdomain != rdomain)
683 			continue;
684 		if (inp->inp_lport != lport)
685 			continue;
686 		wildcard = 0;
687 #ifdef INET6
688 		if (flags & INPLOOKUP_IPV6) {
689 			struct in6_addr *laddr6 = (struct in6_addr *)laddrp;
690 			struct in6_addr *faddr6 = (struct in6_addr *)faddrp;
691 
692 			if (!(inp->inp_flags & INP_IPV6))
693 				continue;
694 
695 			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6)) {
696 				if (IN6_IS_ADDR_UNSPECIFIED(laddr6))
697 					wildcard++;
698 				else if (!IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, laddr6))
699 					continue;
700 			} else {
701 				if (!IN6_IS_ADDR_UNSPECIFIED(laddr6))
702 					wildcard++;
703 			}
704 
705 			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) {
706 				if (IN6_IS_ADDR_UNSPECIFIED(faddr6))
707 					wildcard++;
708 				else if (!IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6,
709 				    faddr6) || inp->inp_fport != fport)
710 					continue;
711 			} else {
712 				if (!IN6_IS_ADDR_UNSPECIFIED(faddr6))
713 					wildcard++;
714 			}
715 		} else
716 #endif /* INET6 */
717 		{
718 #ifdef INET6
719 		        if (inp->inp_flags & INP_IPV6)
720 			        continue;
721 #endif /* INET6 */
722 
723 			if (inp->inp_faddr.s_addr != INADDR_ANY) {
724 				if (faddr.s_addr == INADDR_ANY)
725 					wildcard++;
726 				else if (inp->inp_faddr.s_addr != faddr.s_addr ||
727 				    inp->inp_fport != fport)
728 					continue;
729 			} else {
730 				if (faddr.s_addr != INADDR_ANY)
731 					wildcard++;
732 			}
733 			if (inp->inp_laddr.s_addr != INADDR_ANY) {
734 				if (laddr.s_addr == INADDR_ANY)
735 					wildcard++;
736 				else if (inp->inp_laddr.s_addr != laddr.s_addr)
737 					continue;
738 			} else {
739 				if (laddr.s_addr != INADDR_ANY)
740 					wildcard++;
741 			}
742 		}
743 		if ((!wildcard || (flags & INPLOOKUP_WILDCARD)) &&
744 		    wildcard < matchwild) {
745 			match = inp;
746 			if ((matchwild = wildcard) == 0)
747 				break;
748 		}
749 	}
750 	return (match);
751 }
752 
753 struct rtentry *
754 in_pcbrtentry(inp)
755 	struct inpcb *inp;
756 {
757 	struct route *ro;
758 
759 	ro = &inp->inp_route;
760 
761 	/*
762 	 * No route yet, so try to acquire one.
763 	 */
764 	if (ro->ro_rt == NULL) {
765 #ifdef INET6
766 		bzero(ro, sizeof(struct route_in6));
767 #else
768 		bzero(ro, sizeof(struct route));
769 #endif
770 
771 		switch(sotopf(inp->inp_socket)) {
772 #ifdef INET6
773 		case PF_INET6:
774 			if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
775 				break;
776 			ro->ro_dst.sa_family = AF_INET6;
777 			ro->ro_dst.sa_len = sizeof(struct sockaddr_in6);
778 			((struct sockaddr_in6 *) &ro->ro_dst)->sin6_addr =
779 			    inp->inp_faddr6;
780 			rtalloc_mpath(ro, &inp->inp_laddr6.s6_addr32[0], 0);
781 			break;
782 #endif /* INET6 */
783 		case PF_INET:
784 			if (inp->inp_faddr.s_addr == INADDR_ANY)
785 				break;
786 			ro->ro_dst.sa_family = AF_INET;
787 			ro->ro_dst.sa_len = sizeof(ro->ro_dst);
788 			satosin(&ro->ro_dst)->sin_addr = inp->inp_faddr;
789 			rtalloc_mpath(ro, &inp->inp_laddr.s_addr, 0);
790 			break;
791 		}
792 	}
793 	return (ro->ro_rt);
794 }
795 
796 struct sockaddr_in *
797 in_selectsrc(struct sockaddr_in *sin, struct route *ro, int soopts,
798     struct ip_moptions *mopts, int *errorp, u_int rdomain)
799 {
800 	struct sockaddr_in *sin2;
801 	struct in_ifaddr *ia;
802 
803 	ia = (struct in_ifaddr *)0;
804 	/*
805 	 * If route is known or can be allocated now,
806 	 * our src addr is taken from the i/f, else punt.
807 	 */
808 	if (ro->ro_rt &&
809 	    (satosin(&ro->ro_dst)->sin_addr.s_addr !=
810 		sin->sin_addr.s_addr ||
811 	    soopts & SO_DONTROUTE)) {
812 		RTFREE(ro->ro_rt);
813 		ro->ro_rt = (struct rtentry *)0;
814 	}
815 	if ((soopts & SO_DONTROUTE) == 0 && /*XXX*/
816 	    (ro->ro_rt == (struct rtentry *)0 ||
817 	    ro->ro_rt->rt_ifp == (struct ifnet *)0)) {
818 		/* No route yet, so try to acquire one */
819 		ro->ro_dst.sa_family = AF_INET;
820 		ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
821 		satosin(&ro->ro_dst)->sin_addr = sin->sin_addr;
822 		rtalloc_mpath(ro, NULL, rdomain);
823 
824 		/*
825 		 * It is important to bzero out the rest of the
826 		 * struct sockaddr_in when mixing v6 & v4!
827 		 */
828 		sin2 = (struct sockaddr_in *)&ro->ro_dst;
829 		bzero(sin2->sin_zero, sizeof(sin2->sin_zero));
830 	}
831 	/*
832 	 * If we found a route, use the address
833 	 * corresponding to the outgoing interface
834 	 * unless it is the loopback (in case a route
835 	 * to our address on another net goes to loopback).
836 	 */
837 	if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))
838 		ia = ifatoia(ro->ro_rt->rt_ifa);
839 	if (ia == 0) {
840 		u_int16_t fport = sin->sin_port;
841 
842 		sin->sin_port = 0;
843 		ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin), rdomain));
844 		if (ia == 0)
845 			ia = ifatoia(ifa_ifwithnet(sintosa(sin), rdomain));
846 		sin->sin_port = fport;
847 		if (ia == 0)
848 			ia = TAILQ_FIRST(&in_ifaddr);
849 		if (ia == 0) {
850 			*errorp = EADDRNOTAVAIL;
851 			return NULL;
852 		}
853 	}
854 	/*
855 	 * If the destination address is multicast and an outgoing
856 	 * interface has been set as a multicast option, use the
857 	 * address of that interface as our source address.
858 	 */
859 	if (IN_MULTICAST(sin->sin_addr.s_addr) && mopts != NULL) {
860 		struct ip_moptions *imo;
861 		struct ifnet *ifp;
862 
863 		imo = mopts;
864 		if (imo->imo_multicast_ifp != NULL) {
865 			ifp = imo->imo_multicast_ifp;
866 			TAILQ_FOREACH(ia, &in_ifaddr, ia_list)
867 				if (ia->ia_ifp == ifp)
868 					break;
869 			if (ia == 0) {
870 				*errorp = EADDRNOTAVAIL;
871 				return NULL;
872 			}
873 		}
874 	}
875 	return satosin(&ia->ia_addr);
876 }
877 
878 void
879 in_pcbrehash(inp)
880 	struct inpcb *inp;
881 {
882 	struct inpcbtable *table = inp->inp_table;
883 	int s;
884 
885 	s = splnet();
886 	LIST_REMOVE(inp, inp_lhash);
887 	LIST_INSERT_HEAD(INPCBLHASH(table, inp->inp_lport, inp->inp_rdomain),
888 	    inp, inp_lhash);
889 	LIST_REMOVE(inp, inp_hash);
890 #ifdef INET6
891 	if (inp->inp_flags & INP_IPV6) {
892 		LIST_INSERT_HEAD(IN6PCBHASH(table, &inp->inp_faddr6,
893 		    inp->inp_fport, &inp->inp_laddr6, inp->inp_lport),
894 		    inp, inp_hash);
895 	} else {
896 #endif /* INET6 */
897 		LIST_INSERT_HEAD(INPCBHASH(table, &inp->inp_faddr,
898 		    inp->inp_fport, &inp->inp_laddr, inp->inp_lport,
899 		    inp->inp_rdomain), inp, inp_hash);
900 #ifdef INET6
901 	}
902 #endif /* INET6 */
903 	splx(s);
904 }
905 
906 #ifdef DIAGNOSTIC
907 int	in_pcbnotifymiss = 0;
908 #endif
909 
910 /*
911  * The in(6)_pcbhashlookup functions are used to locate connected sockets
912  * quickly:
913  * 		faddr.fport <-> laddr.lport
914  * No wildcard matching is done so that listening sockets are not found.
915  * If the functions return NULL in(6)_pcblookup_listen can be used to
916  * find a listening/bound socket that may accept the connection.
917  * After those two lookups no other are necessary.
918  */
919 struct inpcb *
920 in_pcbhashlookup(struct inpcbtable *table, struct in_addr faddr,
921     u_int fport_arg, struct in_addr laddr, u_int lport_arg, u_int rdomain)
922 {
923 	struct inpcbhead *head;
924 	struct inpcb *inp;
925 	u_int16_t fport = fport_arg, lport = lport_arg;
926 
927 	head = INPCBHASH(table, &faddr, fport, &laddr, lport, rdomain);
928 	LIST_FOREACH(inp, head, inp_hash) {
929 #ifdef INET6
930 		if (inp->inp_flags & INP_IPV6)
931 			continue;	/*XXX*/
932 #endif
933 		if (inp->inp_faddr.s_addr == faddr.s_addr &&
934 		    inp->inp_fport == fport &&
935 		    inp->inp_lport == lport &&
936 		    inp->inp_laddr.s_addr == laddr.s_addr &&
937 		    inp->inp_rdomain == rdomain) {
938 			/*
939 			 * Move this PCB to the head of hash chain so that
940 			 * repeated accesses are quicker.  This is analogous to
941 			 * the historic single-entry PCB cache.
942 			 */
943 			if (inp != LIST_FIRST(head)) {
944 				LIST_REMOVE(inp, inp_hash);
945 				LIST_INSERT_HEAD(head, inp, inp_hash);
946 			}
947 			break;
948 		}
949 	}
950 #ifdef DIAGNOSTIC
951 	if (inp == NULL && in_pcbnotifymiss) {
952 		printf("in_pcbhashlookup: faddr=%08x fport=%d laddr=%08x lport=%d rdom=%d\n",
953 		    ntohl(faddr.s_addr), ntohs(fport),
954 		    ntohl(laddr.s_addr), ntohs(lport), rdomain);
955 	}
956 #endif
957 	return (inp);
958 }
959 
960 #ifdef INET6
961 struct inpcb *
962 in6_pcbhashlookup(struct inpcbtable *table, struct in6_addr *faddr,
963     u_int fport_arg, struct in6_addr *laddr, u_int lport_arg)
964 {
965 	struct inpcbhead *head;
966 	struct inpcb *inp;
967 	u_int16_t fport = fport_arg, lport = lport_arg;
968 
969 	head = IN6PCBHASH(table, faddr, fport, laddr, lport);
970 	LIST_FOREACH(inp, head, inp_hash) {
971 		if (!(inp->inp_flags & INP_IPV6))
972 			continue;
973 		if (IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6, faddr) &&
974 		    inp->inp_fport == fport && inp->inp_lport == lport &&
975 		    IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, laddr)) {
976 			/*
977 			 * Move this PCB to the head of hash chain so that
978 			 * repeated accesses are quicker.  This is analogous to
979 			 * the historic single-entry PCB cache.
980 			 */
981 			if (inp != LIST_FIRST(head)) {
982 				LIST_REMOVE(inp, inp_hash);
983 				LIST_INSERT_HEAD(head, inp, inp_hash);
984 			}
985 			break;
986 		}
987 	}
988 #ifdef DIAGNOSTIC
989 	if (inp == NULL && in_pcbnotifymiss) {
990 		printf("in6_pcbhashlookup: faddr=");
991 		printf(" fport=%d laddr=", ntohs(fport));
992 		printf(" lport=%d\n", ntohs(lport));
993 	}
994 #endif
995 	return (inp);
996 }
997 #endif /* INET6 */
998 
999 /*
1000  * The in(6)_pcblookup_listen functions are used to locate listening
1001  * sockets quickly.  This are sockets with unspecified foreign address
1002  * and port:
1003  *		*.*     <-> laddr.lport
1004  *		*.*     <->     *.lport
1005  */
1006 struct inpcb *
1007 in_pcblookup_listen(struct inpcbtable *table, struct in_addr laddr,
1008     u_int lport_arg, int reverse, struct mbuf *m, u_int rdomain)
1009 {
1010 	struct inpcbhead *head;
1011 	struct in_addr *key1, *key2;
1012 	struct inpcb *inp;
1013 	u_int16_t lport = lport_arg;
1014 
1015 #if NPF > 0
1016 	if (m && m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) {
1017 		struct pf_divert *divert;
1018 		/* XXX rdomain */
1019 		if ((divert = pf_find_divert(m)) == NULL)
1020 			return (NULL);
1021 		key1 = key2 = &divert->addr.ipv4;
1022 		lport = divert->port;
1023 	} else
1024 #endif
1025 	if (reverse) {
1026 		key1 = &zeroin_addr;
1027 		key2 = &laddr;
1028 	} else {
1029 		key1 = &laddr;
1030 		key2 = &zeroin_addr;
1031 	}
1032 
1033 	head = INPCBHASH(table, &zeroin_addr, 0, key1, lport, rdomain);
1034 	LIST_FOREACH(inp, head, inp_hash) {
1035 #ifdef INET6
1036 		if (inp->inp_flags & INP_IPV6)
1037 			continue;	/*XXX*/
1038 #endif
1039 		if (inp->inp_lport == lport && inp->inp_fport == 0 &&
1040 		    inp->inp_laddr.s_addr == key1->s_addr &&
1041 		    inp->inp_faddr.s_addr == INADDR_ANY &&
1042 		    inp->inp_rdomain == rdomain)
1043 			break;
1044 	}
1045 	if (inp == NULL && key1->s_addr != key2->s_addr) {
1046 		head = INPCBHASH(table, &zeroin_addr, 0, key2, lport, rdomain);
1047 		LIST_FOREACH(inp, head, inp_hash) {
1048 #ifdef INET6
1049 			if (inp->inp_flags & INP_IPV6)
1050 				continue;	/*XXX*/
1051 #endif
1052 			if (inp->inp_lport == lport && inp->inp_fport == 0 &&
1053 			    inp->inp_laddr.s_addr == key2->s_addr &&
1054 			    inp->inp_faddr.s_addr == INADDR_ANY &&
1055 			    inp->inp_rdomain == rdomain)
1056 				break;
1057 		}
1058 	}
1059 #ifdef DIAGNOSTIC
1060 	if (inp == NULL && in_pcbnotifymiss) {
1061 		printf("in_pcblookup_listen: laddr=%08x lport=%d\n",
1062 		    ntohl(laddr.s_addr), ntohs(lport));
1063 	}
1064 #endif
1065 	/*
1066 	 * Move this PCB to the head of hash chain so that
1067 	 * repeated accesses are quicker.  This is analogous to
1068 	 * the historic single-entry PCB cache.
1069 	 */
1070 	if (inp != NULL && inp != LIST_FIRST(head)) {
1071 		LIST_REMOVE(inp, inp_hash);
1072 		LIST_INSERT_HEAD(head, inp, inp_hash);
1073 	}
1074 	return (inp);
1075 }
1076 
1077 #ifdef INET6
1078 struct inpcb *
1079 in6_pcblookup_listen(struct inpcbtable *table, struct in6_addr *laddr,
1080     u_int lport_arg, int reverse, struct mbuf *m)
1081 {
1082 	struct inpcbhead *head;
1083 	struct in6_addr *key1, *key2;
1084 	struct inpcb *inp;
1085 	u_int16_t lport = lport_arg;
1086 
1087 #if NPF > 0
1088 	if (m && m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) {
1089 		struct pf_divert *divert;
1090 
1091 		if ((divert = pf_find_divert(m)) == NULL)
1092 			return (NULL);
1093 		key1 = key2 = &divert->addr.ipv6;
1094 		lport = divert->port;
1095 	} else
1096 #endif
1097 	if (reverse) {
1098 		key1 = &zeroin6_addr;
1099 		key2 = laddr;
1100 	} else {
1101 		key1 = laddr;
1102 		key2 = &zeroin6_addr;
1103 	}
1104 
1105 	head = IN6PCBHASH(table, &zeroin6_addr, 0, key1, lport);
1106 	LIST_FOREACH(inp, head, inp_hash) {
1107 		if (!(inp->inp_flags & INP_IPV6))
1108 			continue;
1109 		if (inp->inp_lport == lport && inp->inp_fport == 0 &&
1110 		    IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, key1) &&
1111 		    IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
1112 			break;
1113 	}
1114 	if (inp == NULL && ! IN6_ARE_ADDR_EQUAL(key1, key2)) {
1115 		head = IN6PCBHASH(table, &zeroin6_addr, 0, key2, lport);
1116 		LIST_FOREACH(inp, head, inp_hash) {
1117 			if (!(inp->inp_flags & INP_IPV6))
1118 				continue;
1119 			if (inp->inp_lport == lport && inp->inp_fport == 0 &&
1120 		    	    IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, key2) &&
1121 			    IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
1122 				break;
1123 		}
1124 	}
1125 #ifdef DIAGNOSTIC
1126 	if (inp == NULL && in_pcbnotifymiss) {
1127 		printf("in6_pcblookup_listen: laddr= lport=%d\n",
1128 		    ntohs(lport));
1129 	}
1130 #endif
1131 	/*
1132 	 * Move this PCB to the head of hash chain so that
1133 	 * repeated accesses are quicker.  This is analogous to
1134 	 * the historic single-entry PCB cache.
1135 	 */
1136 	if (inp != NULL && inp != LIST_FIRST(head)) {
1137 		LIST_REMOVE(inp, inp_hash);
1138 		LIST_INSERT_HEAD(head, inp, inp_hash);
1139 	}
1140 	return (inp);
1141 }
1142 #endif /* INET6 */
1143