xref: /freebsd/sys/netinet/in_pcb.c (revision 39beb93c)
1 /*-
2  * Copyright (c) 1982, 1986, 1991, 1993, 1995
3  *	The Regents of the University of California.
4  * Copyright (c) 2007-2008 Robert N. M. Watson
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 4. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  *	@(#)in_pcb.c	8.4 (Berkeley) 5/24/95
32  */
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 #include "opt_ddb.h"
38 #include "opt_inet.h"
39 #include "opt_ipsec.h"
40 #include "opt_inet6.h"
41 #include "opt_mac.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/malloc.h>
46 #include <sys/mbuf.h>
47 #include <sys/domain.h>
48 #include <sys/protosw.h>
49 #include <sys/socket.h>
50 #include <sys/socketvar.h>
51 #include <sys/priv.h>
52 #include <sys/proc.h>
53 #include <sys/jail.h>
54 #include <sys/kernel.h>
55 #include <sys/sysctl.h>
56 #include <sys/vimage.h>
57 
58 #ifdef DDB
59 #include <ddb/ddb.h>
60 #endif
61 
62 #include <vm/uma.h>
63 
64 #include <net/if.h>
65 #include <net/if_types.h>
66 #include <net/route.h>
67 
68 #include <netinet/in.h>
69 #include <netinet/in_pcb.h>
70 #include <netinet/in_var.h>
71 #include <netinet/ip_var.h>
72 #include <netinet/tcp_var.h>
73 #include <netinet/udp.h>
74 #include <netinet/udp_var.h>
75 #include <netinet/vinet.h>
76 #ifdef INET6
77 #include <netinet/ip6.h>
78 #include <netinet6/ip6_var.h>
79 #include <netinet6/vinet6.h>
80 #endif /* INET6 */
81 
82 
83 #ifdef IPSEC
84 #include <netipsec/ipsec.h>
85 #include <netipsec/key.h>
86 #endif /* IPSEC */
87 
88 #include <security/mac/mac_framework.h>
89 
90 #ifdef VIMAGE_GLOBALS
91 /*
92  * These configure the range of local port addresses assigned to
93  * "unspecified" outgoing connections/packets/whatever.
94  */
95 int	ipport_lowfirstauto;
96 int	ipport_lowlastauto;
97 int	ipport_firstauto;
98 int	ipport_lastauto;
99 int	ipport_hifirstauto;
100 int	ipport_hilastauto;
101 
102 /*
103  * Reserved ports accessible only to root. There are significant
104  * security considerations that must be accounted for when changing these,
105  * but the security benefits can be great. Please be careful.
106  */
107 int	ipport_reservedhigh;
108 int	ipport_reservedlow;
109 
110 /* Variables dealing with random ephemeral port allocation. */
111 int	ipport_randomized;
112 int	ipport_randomcps;
113 int	ipport_randomtime;
114 int	ipport_stoprandom;
115 int	ipport_tcpallocs;
116 int	ipport_tcplastcount;
117 #endif
118 
119 #define RANGECHK(var, min, max) \
120 	if ((var) < (min)) { (var) = (min); } \
121 	else if ((var) > (max)) { (var) = (max); }
122 
123 static int
124 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS)
125 {
126 	INIT_VNET_INET(curvnet);
127 	int error;
128 
129 	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
130 	if (error == 0) {
131 		RANGECHK(V_ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
132 		RANGECHK(V_ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
133 		RANGECHK(V_ipport_firstauto, IPPORT_RESERVED, IPPORT_MAX);
134 		RANGECHK(V_ipport_lastauto, IPPORT_RESERVED, IPPORT_MAX);
135 		RANGECHK(V_ipport_hifirstauto, IPPORT_RESERVED, IPPORT_MAX);
136 		RANGECHK(V_ipport_hilastauto, IPPORT_RESERVED, IPPORT_MAX);
137 	}
138 	return (error);
139 }
140 
141 #undef RANGECHK
142 
143 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports");
144 
145 SYSCTL_V_PROC(V_NET, vnet_inet, _net_inet_ip_portrange, OID_AUTO,
146 	lowfirst, CTLTYPE_INT|CTLFLAG_RW, ipport_lowfirstauto, 0,
147 	&sysctl_net_ipport_check, "I", "");
148 SYSCTL_V_PROC(V_NET, vnet_inet, _net_inet_ip_portrange, OID_AUTO,
149 	lowlast, CTLTYPE_INT|CTLFLAG_RW, ipport_lowlastauto, 0,
150 	&sysctl_net_ipport_check, "I", "");
151 SYSCTL_V_PROC(V_NET, vnet_inet, _net_inet_ip_portrange, OID_AUTO,
152 	first, CTLTYPE_INT|CTLFLAG_RW, ipport_firstauto, 0,
153 	&sysctl_net_ipport_check, "I", "");
154 SYSCTL_V_PROC(V_NET, vnet_inet, _net_inet_ip_portrange, OID_AUTO,
155 	last, CTLTYPE_INT|CTLFLAG_RW, ipport_lastauto, 0,
156 	&sysctl_net_ipport_check, "I", "");
157 SYSCTL_V_PROC(V_NET, vnet_inet, _net_inet_ip_portrange, OID_AUTO,
158 	hifirst, CTLTYPE_INT|CTLFLAG_RW, ipport_hifirstauto, 0,
159 	&sysctl_net_ipport_check, "I", "");
160 SYSCTL_V_PROC(V_NET, vnet_inet, _net_inet_ip_portrange, OID_AUTO,
161 	hilast, CTLTYPE_INT|CTLFLAG_RW, ipport_hilastauto, 0,
162 	&sysctl_net_ipport_check, "I", "");
163 SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip_portrange, OID_AUTO,
164 	reservedhigh, CTLFLAG_RW|CTLFLAG_SECURE, ipport_reservedhigh, 0, "");
165 SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip_portrange, OID_AUTO, reservedlow,
166 	CTLFLAG_RW|CTLFLAG_SECURE, ipport_reservedlow, 0, "");
167 SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip_portrange, OID_AUTO, randomized,
168 	CTLFLAG_RW, ipport_randomized, 0, "Enable random port allocation");
169 SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip_portrange, OID_AUTO, randomcps,
170 	CTLFLAG_RW, ipport_randomcps, 0, "Maximum number of random port "
171 	"allocations before switching to a sequental one");
172 SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip_portrange, OID_AUTO, randomtime,
173 	CTLFLAG_RW, ipport_randomtime, 0,
174 	"Minimum time to keep sequental port "
175 	"allocation before switching to a random one");
176 
177 /*
178  * in_pcb.c: manage the Protocol Control Blocks.
179  *
180  * NOTE: It is assumed that most of these functions will be called with
181  * the pcbinfo lock held, and often, the inpcb lock held, as these utility
182  * functions often modify hash chains or addresses in pcbs.
183  */
184 
185 /*
186  * Allocate a PCB and associate it with the socket.
187  * On success return with the PCB locked.
188  */
189 int
190 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
191 {
192 #ifdef INET6
193 	INIT_VNET_INET6(curvnet);
194 #endif
195 	struct inpcb *inp;
196 	int error;
197 
198 	INP_INFO_WLOCK_ASSERT(pcbinfo);
199 	error = 0;
200 	inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT);
201 	if (inp == NULL)
202 		return (ENOBUFS);
203 	bzero(inp, inp_zero_size);
204 	inp->inp_pcbinfo = pcbinfo;
205 	inp->inp_socket = so;
206 	inp->inp_cred = crhold(so->so_cred);
207 	inp->inp_inc.inc_fibnum = so->so_fibnum;
208 #ifdef MAC
209 	error = mac_inpcb_init(inp, M_NOWAIT);
210 	if (error != 0)
211 		goto out;
212 	SOCK_LOCK(so);
213 	mac_inpcb_create(so, inp);
214 	SOCK_UNLOCK(so);
215 #endif
216 #ifdef IPSEC
217 	error = ipsec_init_policy(so, &inp->inp_sp);
218 	if (error != 0) {
219 #ifdef MAC
220 		mac_inpcb_destroy(inp);
221 #endif
222 		goto out;
223 	}
224 #endif /*IPSEC*/
225 #ifdef INET6
226 	if (INP_SOCKAF(so) == AF_INET6) {
227 		inp->inp_vflag |= INP_IPV6PROTO;
228 		if (V_ip6_v6only)
229 			inp->inp_flags |= IN6P_IPV6_V6ONLY;
230 	}
231 #endif
232 	LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
233 	pcbinfo->ipi_count++;
234 	so->so_pcb = (caddr_t)inp;
235 #ifdef INET6
236 	if (V_ip6_auto_flowlabel)
237 		inp->inp_flags |= IN6P_AUTOFLOWLABEL;
238 #endif
239 	INP_WLOCK(inp);
240 	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
241 	inp->inp_refcount = 1;	/* Reference from the inpcbinfo */
242 #if defined(IPSEC) || defined(MAC)
243 out:
244 	if (error != 0) {
245 		crfree(inp->inp_cred);
246 		uma_zfree(pcbinfo->ipi_zone, inp);
247 	}
248 #endif
249 	return (error);
250 }
251 
252 int
253 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
254 {
255 	int anonport, error;
256 
257 	INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
258 	INP_WLOCK_ASSERT(inp);
259 
260 	if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY)
261 		return (EINVAL);
262 	anonport = inp->inp_lport == 0 && (nam == NULL ||
263 	    ((struct sockaddr_in *)nam)->sin_port == 0);
264 	error = in_pcbbind_setup(inp, nam, &inp->inp_laddr.s_addr,
265 	    &inp->inp_lport, cred);
266 	if (error)
267 		return (error);
268 	if (in_pcbinshash(inp) != 0) {
269 		inp->inp_laddr.s_addr = INADDR_ANY;
270 		inp->inp_lport = 0;
271 		return (EAGAIN);
272 	}
273 	if (anonport)
274 		inp->inp_flags |= INP_ANONPORT;
275 	return (0);
276 }
277 
278 /*
279  * Set up a bind operation on a PCB, performing port allocation
280  * as required, but do not actually modify the PCB. Callers can
281  * either complete the bind by setting inp_laddr/inp_lport and
282  * calling in_pcbinshash(), or they can just use the resulting
283  * port and address to authorise the sending of a once-off packet.
284  *
285  * On error, the values of *laddrp and *lportp are not changed.
286  */
287 int
288 in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
289     u_short *lportp, struct ucred *cred)
290 {
291 	INIT_VNET_INET(inp->inp_vnet);
292 	struct socket *so = inp->inp_socket;
293 	unsigned short *lastport;
294 	struct sockaddr_in *sin;
295 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
296 	struct in_addr laddr;
297 	u_short lport = 0;
298 	int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
299 	int error;
300 	int dorandom;
301 
302 	/*
303 	 * Because no actual state changes occur here, a global write lock on
304 	 * the pcbinfo isn't required.
305 	 */
306 	INP_INFO_LOCK_ASSERT(pcbinfo);
307 	INP_LOCK_ASSERT(inp);
308 
309 	if (TAILQ_EMPTY(&V_in_ifaddrhead)) /* XXX broken! */
310 		return (EADDRNOTAVAIL);
311 	laddr.s_addr = *laddrp;
312 	if (nam != NULL && laddr.s_addr != INADDR_ANY)
313 		return (EINVAL);
314 	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
315 		wild = INPLOOKUP_WILDCARD;
316 	if (nam == NULL) {
317 		if ((error = prison_local_ip4(cred, &laddr)) != 0)
318 			return (error);
319 	} else {
320 		sin = (struct sockaddr_in *)nam;
321 		if (nam->sa_len != sizeof (*sin))
322 			return (EINVAL);
323 #ifdef notdef
324 		/*
325 		 * We should check the family, but old programs
326 		 * incorrectly fail to initialize it.
327 		 */
328 		if (sin->sin_family != AF_INET)
329 			return (EAFNOSUPPORT);
330 #endif
331 		error = prison_local_ip4(cred, &sin->sin_addr);
332 		if (error)
333 			return (error);
334 		if (sin->sin_port != *lportp) {
335 			/* Don't allow the port to change. */
336 			if (*lportp != 0)
337 				return (EINVAL);
338 			lport = sin->sin_port;
339 		}
340 		/* NB: lport is left as 0 if the port isn't being changed. */
341 		if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
342 			/*
343 			 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
344 			 * allow complete duplication of binding if
345 			 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
346 			 * and a multicast address is bound on both
347 			 * new and duplicated sockets.
348 			 */
349 			if (so->so_options & SO_REUSEADDR)
350 				reuseport = SO_REUSEADDR|SO_REUSEPORT;
351 		} else if (sin->sin_addr.s_addr != INADDR_ANY) {
352 			sin->sin_port = 0;		/* yech... */
353 			bzero(&sin->sin_zero, sizeof(sin->sin_zero));
354 			/*
355 			 * Is the address a local IP address?
356 			 * If INP_NONLOCALOK is set, then the socket may be bound
357 			 * to any endpoint address, local or not.
358 			 */
359 			if (
360 #if defined(IP_NONLOCALBIND)
361 			    ((inp->inp_flags & INP_NONLOCALOK) == 0) &&
362 #endif
363 			    (ifa_ifwithaddr((struct sockaddr *)sin) == 0))
364 				return (EADDRNOTAVAIL);
365 		}
366 		laddr = sin->sin_addr;
367 		if (lport) {
368 			struct inpcb *t;
369 			struct tcptw *tw;
370 
371 			/* GROSS */
372 			if (ntohs(lport) <= V_ipport_reservedhigh &&
373 			    ntohs(lport) >= V_ipport_reservedlow &&
374 			    priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT,
375 			    0))
376 				return (EACCES);
377 			if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
378 			    priv_check_cred(inp->inp_cred,
379 			    PRIV_NETINET_REUSEPORT, 0) != 0) {
380 				t = in_pcblookup_local(pcbinfo, sin->sin_addr,
381 				    lport, INPLOOKUP_WILDCARD, cred);
382 	/*
383 	 * XXX
384 	 * This entire block sorely needs a rewrite.
385 	 */
386 				if (t &&
387 				    ((t->inp_vflag & INP_TIMEWAIT) == 0) &&
388 				    (so->so_type != SOCK_STREAM ||
389 				     ntohl(t->inp_faddr.s_addr) == INADDR_ANY) &&
390 				    (ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
391 				     ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
392 				     (t->inp_socket->so_options &
393 					 SO_REUSEPORT) == 0) &&
394 				    (inp->inp_cred->cr_uid !=
395 				     t->inp_cred->cr_uid))
396 					return (EADDRINUSE);
397 			}
398 			t = in_pcblookup_local(pcbinfo, sin->sin_addr,
399 			    lport, wild, cred);
400 			if (t && (t->inp_vflag & INP_TIMEWAIT)) {
401 				/*
402 				 * XXXRW: If an incpb has had its timewait
403 				 * state recycled, we treat the address as
404 				 * being in use (for now).  This is better
405 				 * than a panic, but not desirable.
406 				 */
407 				tw = intotw(inp);
408 				if (tw == NULL ||
409 				    (reuseport & tw->tw_so_options) == 0)
410 					return (EADDRINUSE);
411 			} else if (t &&
412 			    (reuseport & t->inp_socket->so_options) == 0) {
413 #ifdef INET6
414 				if (ntohl(sin->sin_addr.s_addr) !=
415 				    INADDR_ANY ||
416 				    ntohl(t->inp_laddr.s_addr) !=
417 				    INADDR_ANY ||
418 				    INP_SOCKAF(so) ==
419 				    INP_SOCKAF(t->inp_socket))
420 #endif
421 				return (EADDRINUSE);
422 			}
423 		}
424 	}
425 	if (*lportp != 0)
426 		lport = *lportp;
427 	if (lport == 0) {
428 		u_short first, last, aux;
429 		int count;
430 
431 		if (inp->inp_flags & INP_HIGHPORT) {
432 			first = V_ipport_hifirstauto;	/* sysctl */
433 			last  = V_ipport_hilastauto;
434 			lastport = &pcbinfo->ipi_lasthi;
435 		} else if (inp->inp_flags & INP_LOWPORT) {
436 			error = priv_check_cred(cred,
437 			    PRIV_NETINET_RESERVEDPORT, 0);
438 			if (error)
439 				return error;
440 			first = V_ipport_lowfirstauto;	/* 1023 */
441 			last  = V_ipport_lowlastauto;	/* 600 */
442 			lastport = &pcbinfo->ipi_lastlow;
443 		} else {
444 			first = V_ipport_firstauto;	/* sysctl */
445 			last  = V_ipport_lastauto;
446 			lastport = &pcbinfo->ipi_lastport;
447 		}
448 		/*
449 		 * For UDP, use random port allocation as long as the user
450 		 * allows it.  For TCP (and as of yet unknown) connections,
451 		 * use random port allocation only if the user allows it AND
452 		 * ipport_tick() allows it.
453 		 */
454 		if (V_ipport_randomized &&
455 			(!V_ipport_stoprandom || pcbinfo == &V_udbinfo))
456 			dorandom = 1;
457 		else
458 			dorandom = 0;
459 		/*
460 		 * It makes no sense to do random port allocation if
461 		 * we have the only port available.
462 		 */
463 		if (first == last)
464 			dorandom = 0;
465 		/* Make sure to not include UDP packets in the count. */
466 		if (pcbinfo != &V_udbinfo)
467 			V_ipport_tcpallocs++;
468 		/*
469 		 * Instead of having two loops further down counting up or down
470 		 * make sure that first is always <= last and go with only one
471 		 * code path implementing all logic.
472 		 */
473 		if (first > last) {
474 			aux = first;
475 			first = last;
476 			last = aux;
477 		}
478 
479 		if (dorandom)
480 			*lastport = first +
481 				    (arc4random() % (last - first));
482 
483 		count = last - first;
484 
485 		do {
486 			if (count-- < 0)	/* completely used? */
487 				return (EADDRNOTAVAIL);
488 			++*lastport;
489 			if (*lastport < first || *lastport > last)
490 				*lastport = first;
491 			lport = htons(*lastport);
492 		} while (in_pcblookup_local(pcbinfo, laddr,
493 		    lport, wild, cred));
494 	}
495 	*laddrp = laddr.s_addr;
496 	*lportp = lport;
497 	return (0);
498 }
499 
500 /*
501  * Connect from a socket to a specified address.
502  * Both address and port must be specified in argument sin.
503  * If don't have a local address for this socket yet,
504  * then pick one.
505  */
506 int
507 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
508 {
509 	u_short lport, fport;
510 	in_addr_t laddr, faddr;
511 	int anonport, error;
512 
513 	INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
514 	INP_WLOCK_ASSERT(inp);
515 
516 	lport = inp->inp_lport;
517 	laddr = inp->inp_laddr.s_addr;
518 	anonport = (lport == 0);
519 	error = in_pcbconnect_setup(inp, nam, &laddr, &lport, &faddr, &fport,
520 	    NULL, cred);
521 	if (error)
522 		return (error);
523 
524 	/* Do the initial binding of the local address if required. */
525 	if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) {
526 		inp->inp_lport = lport;
527 		inp->inp_laddr.s_addr = laddr;
528 		if (in_pcbinshash(inp) != 0) {
529 			inp->inp_laddr.s_addr = INADDR_ANY;
530 			inp->inp_lport = 0;
531 			return (EAGAIN);
532 		}
533 	}
534 
535 	/* Commit the remaining changes. */
536 	inp->inp_lport = lport;
537 	inp->inp_laddr.s_addr = laddr;
538 	inp->inp_faddr.s_addr = faddr;
539 	inp->inp_fport = fport;
540 	in_pcbrehash(inp);
541 
542 	if (anonport)
543 		inp->inp_flags |= INP_ANONPORT;
544 	return (0);
545 }
546 
547 /*
548  * Do proper source address selection on an unbound socket in case
549  * of connect. Take jails into account as well.
550  */
551 static int
552 in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
553     struct ucred *cred)
554 {
555 	struct in_ifaddr *ia;
556 	struct ifaddr *ifa;
557 	struct sockaddr *sa;
558 	struct sockaddr_in *sin;
559 	struct route sro;
560 	int error;
561 
562 	KASSERT(laddr != NULL, ("%s: laddr NULL", __func__));
563 
564 	error = 0;
565 	ia = NULL;
566 	bzero(&sro, sizeof(sro));
567 
568 	sin = (struct sockaddr_in *)&sro.ro_dst;
569 	sin->sin_family = AF_INET;
570 	sin->sin_len = sizeof(struct sockaddr_in);
571 	sin->sin_addr.s_addr = faddr->s_addr;
572 
573 	/*
574 	 * If route is known our src addr is taken from the i/f,
575 	 * else punt.
576 	 *
577 	 * Find out route to destination.
578 	 */
579 	if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0)
580 		in_rtalloc_ign(&sro, 0, inp->inp_inc.inc_fibnum);
581 
582 	/*
583 	 * If we found a route, use the address corresponding to
584 	 * the outgoing interface.
585 	 *
586 	 * Otherwise assume faddr is reachable on a directly connected
587 	 * network and try to find a corresponding interface to take
588 	 * the source address from.
589 	 */
590 	if (sro.ro_rt == NULL || sro.ro_rt->rt_ifp == NULL) {
591 		struct ifnet *ifp;
592 
593 		ia = ifatoia(ifa_ifwithdstaddr((struct sockaddr *)sin));
594 		if (ia == NULL)
595 			ia = ifatoia(ifa_ifwithnet((struct sockaddr *)sin));
596 		if (ia == NULL) {
597 			error = ENETUNREACH;
598 			goto done;
599 		}
600 
601 		if (cred == NULL || !jailed(cred)) {
602 			laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
603 			goto done;
604 		}
605 
606 		ifp = ia->ia_ifp;
607 		ia = NULL;
608 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
609 
610 			sa = ifa->ifa_addr;
611 			if (sa->sa_family != AF_INET)
612 				continue;
613 			sin = (struct sockaddr_in *)sa;
614 			if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
615 				ia = (struct in_ifaddr *)ifa;
616 				break;
617 			}
618 		}
619 		if (ia != NULL) {
620 			laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
621 			goto done;
622 		}
623 
624 		/* 3. As a last resort return the 'default' jail address. */
625 		error = prison_get_ip4(cred, laddr);
626 		goto done;
627 	}
628 
629 	/*
630 	 * If the outgoing interface on the route found is not
631 	 * a loopback interface, use the address from that interface.
632 	 * In case of jails do those three steps:
633 	 * 1. check if the interface address belongs to the jail. If so use it.
634 	 * 2. check if we have any address on the outgoing interface
635 	 *    belonging to this jail. If so use it.
636 	 * 3. as a last resort return the 'default' jail address.
637 	 */
638 	if ((sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) {
639 
640 		/* If not jailed, use the default returned. */
641 		if (cred == NULL || !jailed(cred)) {
642 			ia = (struct in_ifaddr *)sro.ro_rt->rt_ifa;
643 			laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
644 			goto done;
645 		}
646 
647 		/* Jailed. */
648 		/* 1. Check if the iface address belongs to the jail. */
649 		sin = (struct sockaddr_in *)sro.ro_rt->rt_ifa->ifa_addr;
650 		if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
651 			ia = (struct in_ifaddr *)sro.ro_rt->rt_ifa;
652 			laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
653 			goto done;
654 		}
655 
656 		/*
657 		 * 2. Check if we have any address on the outgoing interface
658 		 *    belonging to this jail.
659 		 */
660 		TAILQ_FOREACH(ifa, &sro.ro_rt->rt_ifp->if_addrhead, ifa_link) {
661 
662 			sa = ifa->ifa_addr;
663 			if (sa->sa_family != AF_INET)
664 				continue;
665 			sin = (struct sockaddr_in *)sa;
666 			if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
667 				ia = (struct in_ifaddr *)ifa;
668 				break;
669 			}
670 		}
671 		if (ia != NULL) {
672 			laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
673 			goto done;
674 		}
675 
676 		/* 3. As a last resort return the 'default' jail address. */
677 		error = prison_get_ip4(cred, laddr);
678 		goto done;
679 	}
680 
681 	/*
682 	 * The outgoing interface is marked with 'loopback net', so a route
683 	 * to ourselves is here.
684 	 * Try to find the interface of the destination address and then
685 	 * take the address from there. That interface is not necessarily
686 	 * a loopback interface.
687 	 * In case of jails, check that it is an address of the jail
688 	 * and if we cannot find, fall back to the 'default' jail address.
689 	 */
690 	if ((sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) {
691 		struct sockaddr_in sain;
692 
693 		bzero(&sain, sizeof(struct sockaddr_in));
694 		sain.sin_family = AF_INET;
695 		sain.sin_len = sizeof(struct sockaddr_in);
696 		sain.sin_addr.s_addr = faddr->s_addr;
697 
698 		ia = ifatoia(ifa_ifwithdstaddr(sintosa(&sain)));
699 		if (ia == NULL)
700 			ia = ifatoia(ifa_ifwithnet(sintosa(&sain)));
701 
702 		if (cred == NULL || !jailed(cred)) {
703 #if __FreeBSD_version < 800000
704 			if (ia == NULL)
705 				ia = (struct in_ifaddr *)sro.ro_rt->rt_ifa;
706 #endif
707 			if (ia == NULL) {
708 				error = ENETUNREACH;
709 				goto done;
710 			}
711 			laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
712 			goto done;
713 		}
714 
715 		/* Jailed. */
716 		if (ia != NULL) {
717 			struct ifnet *ifp;
718 
719 			ifp = ia->ia_ifp;
720 			ia = NULL;
721 			TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
722 
723 				sa = ifa->ifa_addr;
724 				if (sa->sa_family != AF_INET)
725 					continue;
726 				sin = (struct sockaddr_in *)sa;
727 				if (prison_check_ip4(cred,
728 				    &sin->sin_addr) == 0) {
729 					ia = (struct in_ifaddr *)ifa;
730 					break;
731 				}
732 			}
733 			if (ia != NULL) {
734 				laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
735 				goto done;
736 			}
737 		}
738 
739 		/* 3. As a last resort return the 'default' jail address. */
740 		error = prison_get_ip4(cred, laddr);
741 		goto done;
742 	}
743 
744 done:
745 	if (sro.ro_rt != NULL)
746 		RTFREE(sro.ro_rt);
747 	return (error);
748 }
749 
750 /*
751  * Set up for a connect from a socket to the specified address.
752  * On entry, *laddrp and *lportp should contain the current local
753  * address and port for the PCB; these are updated to the values
754  * that should be placed in inp_laddr and inp_lport to complete
755  * the connect.
756  *
757  * On success, *faddrp and *fportp will be set to the remote address
758  * and port. These are not updated in the error case.
759  *
760  * If the operation fails because the connection already exists,
761  * *oinpp will be set to the PCB of that connection so that the
762  * caller can decide to override it. In all other cases, *oinpp
763  * is set to NULL.
764  */
765 int
766 in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam,
767     in_addr_t *laddrp, u_short *lportp, in_addr_t *faddrp, u_short *fportp,
768     struct inpcb **oinpp, struct ucred *cred)
769 {
770 	INIT_VNET_INET(inp->inp_vnet);
771 	struct sockaddr_in *sin = (struct sockaddr_in *)nam;
772 	struct in_ifaddr *ia;
773 	struct inpcb *oinp;
774 	struct in_addr laddr, faddr;
775 	u_short lport, fport;
776 	int error;
777 
778 	/*
779 	 * Because a global state change doesn't actually occur here, a read
780 	 * lock is sufficient.
781 	 */
782 	INP_INFO_LOCK_ASSERT(inp->inp_pcbinfo);
783 	INP_LOCK_ASSERT(inp);
784 
785 	if (oinpp != NULL)
786 		*oinpp = NULL;
787 	if (nam->sa_len != sizeof (*sin))
788 		return (EINVAL);
789 	if (sin->sin_family != AF_INET)
790 		return (EAFNOSUPPORT);
791 	if (sin->sin_port == 0)
792 		return (EADDRNOTAVAIL);
793 	laddr.s_addr = *laddrp;
794 	lport = *lportp;
795 	faddr = sin->sin_addr;
796 	fport = sin->sin_port;
797 
798 	if (!TAILQ_EMPTY(&V_in_ifaddrhead)) {
799 		/*
800 		 * If the destination address is INADDR_ANY,
801 		 * use the primary local address.
802 		 * If the supplied address is INADDR_BROADCAST,
803 		 * and the primary interface supports broadcast,
804 		 * choose the broadcast address for that interface.
805 		 */
806 		if (faddr.s_addr == INADDR_ANY) {
807 			faddr =
808 			    IA_SIN(TAILQ_FIRST(&V_in_ifaddrhead))->sin_addr;
809 			if (cred != NULL &&
810 			    (error = prison_get_ip4(cred, &faddr)) != 0)
811 				return (error);
812 		} else if (faddr.s_addr == (u_long)INADDR_BROADCAST &&
813 		    (TAILQ_FIRST(&V_in_ifaddrhead)->ia_ifp->if_flags &
814 		    IFF_BROADCAST))
815 			faddr = satosin(&TAILQ_FIRST(
816 			    &V_in_ifaddrhead)->ia_broadaddr)->sin_addr;
817 	}
818 	if (laddr.s_addr == INADDR_ANY) {
819 		error = in_pcbladdr(inp, &faddr, &laddr, cred);
820 		if (error)
821 			return (error);
822 
823 		/*
824 		 * If the destination address is multicast and an outgoing
825 		 * interface has been set as a multicast option, use the
826 		 * address of that interface as our source address.
827 		 */
828 		if (IN_MULTICAST(ntohl(faddr.s_addr)) &&
829 		    inp->inp_moptions != NULL) {
830 			struct ip_moptions *imo;
831 			struct ifnet *ifp;
832 
833 			imo = inp->inp_moptions;
834 			if (imo->imo_multicast_ifp != NULL) {
835 				ifp = imo->imo_multicast_ifp;
836 				TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link)
837 					if (ia->ia_ifp == ifp)
838 						break;
839 				if (ia == NULL)
840 					return (EADDRNOTAVAIL);
841 				laddr = ia->ia_addr.sin_addr;
842 			}
843 		}
844 	}
845 
846 	oinp = in_pcblookup_hash(inp->inp_pcbinfo, faddr, fport, laddr, lport,
847 	    0, NULL);
848 	if (oinp != NULL) {
849 		if (oinpp != NULL)
850 			*oinpp = oinp;
851 		return (EADDRINUSE);
852 	}
853 	if (lport == 0) {
854 		error = in_pcbbind_setup(inp, NULL, &laddr.s_addr, &lport,
855 		    cred);
856 		if (error)
857 			return (error);
858 	}
859 	*laddrp = laddr.s_addr;
860 	*lportp = lport;
861 	*faddrp = faddr.s_addr;
862 	*fportp = fport;
863 	return (0);
864 }
865 
866 void
867 in_pcbdisconnect(struct inpcb *inp)
868 {
869 
870 	INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
871 	INP_WLOCK_ASSERT(inp);
872 
873 	inp->inp_faddr.s_addr = INADDR_ANY;
874 	inp->inp_fport = 0;
875 	in_pcbrehash(inp);
876 }
877 
878 /*
879  * in_pcbdetach() is responsibe for disassociating a socket from an inpcb.
880  * For most protocols, this will be invoked immediately prior to calling
881  * in_pcbfree().  However, with TCP the inpcb may significantly outlive the
882  * socket, in which case in_pcbfree() is deferred.
883  */
884 void
885 in_pcbdetach(struct inpcb *inp)
886 {
887 
888 	KASSERT(inp->inp_socket != NULL, ("%s: inp_socket == NULL", __func__));
889 
890 	inp->inp_socket->so_pcb = NULL;
891 	inp->inp_socket = NULL;
892 }
893 
894 /*
895  * in_pcbfree_internal() frees an inpcb that has been detached from its
896  * socket, and whose reference count has reached 0.  It will also remove the
897  * inpcb from any global lists it might remain on.
898  */
899 static void
900 in_pcbfree_internal(struct inpcb *inp)
901 {
902 	struct inpcbinfo *ipi = inp->inp_pcbinfo;
903 
904 	KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
905 	KASSERT(inp->inp_refcount == 0, ("%s: refcount !0", __func__));
906 
907 	INP_INFO_WLOCK_ASSERT(ipi);
908 	INP_WLOCK_ASSERT(inp);
909 
910 #ifdef IPSEC
911 	if (inp->inp_sp != NULL)
912 		ipsec_delete_pcbpolicy(inp);
913 #endif /* IPSEC */
914 	inp->inp_gencnt = ++ipi->ipi_gencnt;
915 	in_pcbremlists(inp);
916 #ifdef INET6
917 	if (inp->inp_vflag & INP_IPV6PROTO) {
918 		ip6_freepcbopts(inp->in6p_outputopts);
919 		ip6_freemoptions(inp->in6p_moptions);
920 	}
921 #endif
922 	if (inp->inp_options)
923 		(void)m_free(inp->inp_options);
924 	if (inp->inp_moptions != NULL)
925 		inp_freemoptions(inp->inp_moptions);
926 	inp->inp_vflag = 0;
927 	crfree(inp->inp_cred);
928 
929 #ifdef MAC
930 	mac_inpcb_destroy(inp);
931 #endif
932 	INP_WUNLOCK(inp);
933 	uma_zfree(ipi->ipi_zone, inp);
934 }
935 
936 /*
937  * in_pcbref() bumps the reference count on an inpcb in order to maintain
938  * stability of an inpcb pointer despite the inpcb lock being released.  This
939  * is used in TCP when the inpcbinfo lock needs to be acquired or upgraded,
940  * but where the inpcb lock is already held.
941  *
942  * While the inpcb will not be freed, releasing the inpcb lock means that the
943  * connection's state may change, so the caller should be careful to
944  * revalidate any cached state on reacquiring the lock.  Drop the reference
945  * using in_pcbrele().
946  */
947 void
948 in_pcbref(struct inpcb *inp)
949 {
950 
951 	INP_WLOCK_ASSERT(inp);
952 
953 	KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__));
954 
955 	inp->inp_refcount++;
956 }
957 
958 /*
959  * Drop a refcount on an inpcb elevated using in_pcbref(); because a call to
960  * in_pcbfree() may have been made between in_pcbref() and in_pcbrele(), we
961  * return a flag indicating whether or not the inpcb remains valid.  If it is
962  * valid, we return with the inpcb lock held.
963  */
964 int
965 in_pcbrele(struct inpcb *inp)
966 {
967 #ifdef INVARIANTS
968 	struct inpcbinfo *ipi = inp->inp_pcbinfo;
969 #endif
970 
971 	KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__));
972 
973 	INP_INFO_WLOCK_ASSERT(ipi);
974 	INP_WLOCK_ASSERT(inp);
975 
976 	inp->inp_refcount--;
977 	if (inp->inp_refcount > 0)
978 		return (0);
979 	in_pcbfree_internal(inp);
980 	return (1);
981 }
982 
983 /*
984  * Unconditionally schedule an inpcb to be freed by decrementing its
985  * reference count, which should occur only after the inpcb has been detached
986  * from its socket.  If another thread holds a temporary reference (acquired
987  * using in_pcbref()) then the free is deferred until that reference is
988  * released using in_pcbrele(), but the inpcb is still unlocked.
989  */
990 void
991 in_pcbfree(struct inpcb *inp)
992 {
993 #ifdef INVARIANTS
994 	struct inpcbinfo *ipi = inp->inp_pcbinfo;
995 #endif
996 
997 	KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL",
998 	    __func__));
999 
1000 	INP_INFO_WLOCK_ASSERT(ipi);
1001 	INP_WLOCK_ASSERT(inp);
1002 
1003 	if (!in_pcbrele(inp))
1004 		INP_WUNLOCK(inp);
1005 }
1006 
1007 /*
1008  * in_pcbdrop() removes an inpcb from hashed lists, releasing its address and
1009  * port reservation, and preventing it from being returned by inpcb lookups.
1010  *
1011  * It is used by TCP to mark an inpcb as unused and avoid future packet
1012  * delivery or event notification when a socket remains open but TCP has
1013  * closed.  This might occur as a result of a shutdown()-initiated TCP close
1014  * or a RST on the wire, and allows the port binding to be reused while still
1015  * maintaining the invariant that so_pcb always points to a valid inpcb until
1016  * in_pcbdetach().
1017  *
1018  * XXXRW: An inp_lport of 0 is used to indicate that the inpcb is not on hash
1019  * lists, but can lead to confusing netstat output, as open sockets with
1020  * closed TCP connections will no longer appear to have their bound port
1021  * number.  An explicit flag would be better, as it would allow us to leave
1022  * the port number intact after the connection is dropped.
1023  *
1024  * XXXRW: Possibly in_pcbdrop() should also prevent future notifications by
1025  * in_pcbnotifyall() and in_pcbpurgeif0()?
1026  */
1027 void
1028 in_pcbdrop(struct inpcb *inp)
1029 {
1030 
1031 	INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
1032 	INP_WLOCK_ASSERT(inp);
1033 
1034 	inp->inp_vflag |= INP_DROPPED;
1035 	if (inp->inp_lport) {
1036 		struct inpcbport *phd = inp->inp_phd;
1037 
1038 		LIST_REMOVE(inp, inp_hash);
1039 		LIST_REMOVE(inp, inp_portlist);
1040 		if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
1041 			LIST_REMOVE(phd, phd_hash);
1042 			free(phd, M_PCB);
1043 		}
1044 		inp->inp_lport = 0;
1045 	}
1046 }
1047 
1048 /*
1049  * Common routines to return the socket addresses associated with inpcbs.
1050  */
1051 struct sockaddr *
1052 in_sockaddr(in_port_t port, struct in_addr *addr_p)
1053 {
1054 	struct sockaddr_in *sin;
1055 
1056 	sin = malloc(sizeof *sin, M_SONAME,
1057 		M_WAITOK | M_ZERO);
1058 	sin->sin_family = AF_INET;
1059 	sin->sin_len = sizeof(*sin);
1060 	sin->sin_addr = *addr_p;
1061 	sin->sin_port = port;
1062 
1063 	return (struct sockaddr *)sin;
1064 }
1065 
1066 int
1067 in_getsockaddr(struct socket *so, struct sockaddr **nam)
1068 {
1069 	struct inpcb *inp;
1070 	struct in_addr addr;
1071 	in_port_t port;
1072 
1073 	inp = sotoinpcb(so);
1074 	KASSERT(inp != NULL, ("in_getsockaddr: inp == NULL"));
1075 
1076 	INP_RLOCK(inp);
1077 	port = inp->inp_lport;
1078 	addr = inp->inp_laddr;
1079 	INP_RUNLOCK(inp);
1080 
1081 	*nam = in_sockaddr(port, &addr);
1082 	return 0;
1083 }
1084 
1085 int
1086 in_getpeeraddr(struct socket *so, struct sockaddr **nam)
1087 {
1088 	struct inpcb *inp;
1089 	struct in_addr addr;
1090 	in_port_t port;
1091 
1092 	inp = sotoinpcb(so);
1093 	KASSERT(inp != NULL, ("in_getpeeraddr: inp == NULL"));
1094 
1095 	INP_RLOCK(inp);
1096 	port = inp->inp_fport;
1097 	addr = inp->inp_faddr;
1098 	INP_RUNLOCK(inp);
1099 
1100 	*nam = in_sockaddr(port, &addr);
1101 	return 0;
1102 }
1103 
1104 void
1105 in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr, int errno,
1106     struct inpcb *(*notify)(struct inpcb *, int))
1107 {
1108 	struct inpcb *inp, *inp_temp;
1109 
1110 	INP_INFO_WLOCK(pcbinfo);
1111 	LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) {
1112 		INP_WLOCK(inp);
1113 #ifdef INET6
1114 		if ((inp->inp_vflag & INP_IPV4) == 0) {
1115 			INP_WUNLOCK(inp);
1116 			continue;
1117 		}
1118 #endif
1119 		if (inp->inp_faddr.s_addr != faddr.s_addr ||
1120 		    inp->inp_socket == NULL) {
1121 			INP_WUNLOCK(inp);
1122 			continue;
1123 		}
1124 		if ((*notify)(inp, errno))
1125 			INP_WUNLOCK(inp);
1126 	}
1127 	INP_INFO_WUNLOCK(pcbinfo);
1128 }
1129 
1130 void
1131 in_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
1132 {
1133 	struct inpcb *inp;
1134 	struct ip_moptions *imo;
1135 	int i, gap;
1136 
1137 	INP_INFO_RLOCK(pcbinfo);
1138 	LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1139 		INP_WLOCK(inp);
1140 		imo = inp->inp_moptions;
1141 		if ((inp->inp_vflag & INP_IPV4) &&
1142 		    imo != NULL) {
1143 			/*
1144 			 * Unselect the outgoing interface if it is being
1145 			 * detached.
1146 			 */
1147 			if (imo->imo_multicast_ifp == ifp)
1148 				imo->imo_multicast_ifp = NULL;
1149 
1150 			/*
1151 			 * Drop multicast group membership if we joined
1152 			 * through the interface being detached.
1153 			 */
1154 			for (i = 0, gap = 0; i < imo->imo_num_memberships;
1155 			    i++) {
1156 				if (imo->imo_membership[i]->inm_ifp == ifp) {
1157 					in_delmulti(imo->imo_membership[i]);
1158 					gap++;
1159 				} else if (gap != 0)
1160 					imo->imo_membership[i - gap] =
1161 					    imo->imo_membership[i];
1162 			}
1163 			imo->imo_num_memberships -= gap;
1164 		}
1165 		INP_WUNLOCK(inp);
1166 	}
1167 	INP_INFO_RUNLOCK(pcbinfo);
1168 }
1169 
1170 /*
1171  * Lookup a PCB based on the local address and port.
1172  */
1173 #define INP_LOOKUP_MAPPED_PCB_COST	3
1174 struct inpcb *
1175 in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
1176     u_short lport, int wild_okay, struct ucred *cred)
1177 {
1178 	struct inpcb *inp;
1179 #ifdef INET6
1180 	int matchwild = 3 + INP_LOOKUP_MAPPED_PCB_COST;
1181 #else
1182 	int matchwild = 3;
1183 #endif
1184 	int wildcard;
1185 
1186 	INP_INFO_LOCK_ASSERT(pcbinfo);
1187 
1188 	if (!wild_okay) {
1189 		struct inpcbhead *head;
1190 		/*
1191 		 * Look for an unconnected (wildcard foreign addr) PCB that
1192 		 * matches the local address and port we're looking for.
1193 		 */
1194 		head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
1195 		    0, pcbinfo->ipi_hashmask)];
1196 		LIST_FOREACH(inp, head, inp_hash) {
1197 #ifdef INET6
1198 			/* XXX inp locking */
1199 			if ((inp->inp_vflag & INP_IPV4) == 0)
1200 				continue;
1201 #endif
1202 			if (inp->inp_faddr.s_addr == INADDR_ANY &&
1203 			    inp->inp_laddr.s_addr == laddr.s_addr &&
1204 			    inp->inp_lport == lport) {
1205 				/*
1206 				 * Found?
1207 				 */
1208 				if (cred == NULL ||
1209 				    inp->inp_cred->cr_prison == cred->cr_prison)
1210 					return (inp);
1211 			}
1212 		}
1213 		/*
1214 		 * Not found.
1215 		 */
1216 		return (NULL);
1217 	} else {
1218 		struct inpcbporthead *porthash;
1219 		struct inpcbport *phd;
1220 		struct inpcb *match = NULL;
1221 		/*
1222 		 * Best fit PCB lookup.
1223 		 *
1224 		 * First see if this local port is in use by looking on the
1225 		 * port hash list.
1226 		 */
1227 		porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
1228 		    pcbinfo->ipi_porthashmask)];
1229 		LIST_FOREACH(phd, porthash, phd_hash) {
1230 			if (phd->phd_port == lport)
1231 				break;
1232 		}
1233 		if (phd != NULL) {
1234 			/*
1235 			 * Port is in use by one or more PCBs. Look for best
1236 			 * fit.
1237 			 */
1238 			LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
1239 				wildcard = 0;
1240 				if (cred != NULL &&
1241 				    inp->inp_cred->cr_prison != cred->cr_prison)
1242 					continue;
1243 #ifdef INET6
1244 				/* XXX inp locking */
1245 				if ((inp->inp_vflag & INP_IPV4) == 0)
1246 					continue;
1247 				/*
1248 				 * We never select the PCB that has
1249 				 * INP_IPV6 flag and is bound to :: if
1250 				 * we have another PCB which is bound
1251 				 * to 0.0.0.0.  If a PCB has the
1252 				 * INP_IPV6 flag, then we set its cost
1253 				 * higher than IPv4 only PCBs.
1254 				 *
1255 				 * Note that the case only happens
1256 				 * when a socket is bound to ::, under
1257 				 * the condition that the use of the
1258 				 * mapped address is allowed.
1259 				 */
1260 				if ((inp->inp_vflag & INP_IPV6) != 0)
1261 					wildcard += INP_LOOKUP_MAPPED_PCB_COST;
1262 #endif
1263 				if (inp->inp_faddr.s_addr != INADDR_ANY)
1264 					wildcard++;
1265 				if (inp->inp_laddr.s_addr != INADDR_ANY) {
1266 					if (laddr.s_addr == INADDR_ANY)
1267 						wildcard++;
1268 					else if (inp->inp_laddr.s_addr != laddr.s_addr)
1269 						continue;
1270 				} else {
1271 					if (laddr.s_addr != INADDR_ANY)
1272 						wildcard++;
1273 				}
1274 				if (wildcard < matchwild) {
1275 					match = inp;
1276 					matchwild = wildcard;
1277 					if (matchwild == 0)
1278 						break;
1279 				}
1280 			}
1281 		}
1282 		return (match);
1283 	}
1284 }
1285 #undef INP_LOOKUP_MAPPED_PCB_COST
1286 
1287 /*
1288  * Lookup PCB in hash list.
1289  */
1290 struct inpcb *
1291 in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
1292     u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
1293     struct ifnet *ifp)
1294 {
1295 	struct inpcbhead *head;
1296 	struct inpcb *inp, *tmpinp;
1297 	u_short fport = fport_arg, lport = lport_arg;
1298 
1299 	INP_INFO_LOCK_ASSERT(pcbinfo);
1300 
1301 	/*
1302 	 * First look for an exact match.
1303 	 */
1304 	tmpinp = NULL;
1305 	head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
1306 	    pcbinfo->ipi_hashmask)];
1307 	LIST_FOREACH(inp, head, inp_hash) {
1308 #ifdef INET6
1309 		/* XXX inp locking */
1310 		if ((inp->inp_vflag & INP_IPV4) == 0)
1311 			continue;
1312 #endif
1313 		if (inp->inp_faddr.s_addr == faddr.s_addr &&
1314 		    inp->inp_laddr.s_addr == laddr.s_addr &&
1315 		    inp->inp_fport == fport &&
1316 		    inp->inp_lport == lport) {
1317 			/*
1318 			 * XXX We should be able to directly return
1319 			 * the inp here, without any checks.
1320 			 * Well unless both bound with SO_REUSEPORT?
1321 			 */
1322 			if (jailed(inp->inp_cred))
1323 				return (inp);
1324 			if (tmpinp == NULL)
1325 				tmpinp = inp;
1326 		}
1327 	}
1328 	if (tmpinp != NULL)
1329 		return (tmpinp);
1330 
1331 	/*
1332 	 * Then look for a wildcard match, if requested.
1333 	 */
1334 	if (wildcard == INPLOOKUP_WILDCARD) {
1335 		struct inpcb *local_wild = NULL, *local_exact = NULL;
1336 #ifdef INET6
1337 		struct inpcb *local_wild_mapped = NULL;
1338 #endif
1339 		struct inpcb *jail_wild = NULL;
1340 		int injail;
1341 
1342 		/*
1343 		 * Order of socket selection - we always prefer jails.
1344 		 *      1. jailed, non-wild.
1345 		 *      2. jailed, wild.
1346 		 *      3. non-jailed, non-wild.
1347 		 *      4. non-jailed, wild.
1348 		 */
1349 
1350 		head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
1351 		    0, pcbinfo->ipi_hashmask)];
1352 		LIST_FOREACH(inp, head, inp_hash) {
1353 #ifdef INET6
1354 			/* XXX inp locking */
1355 			if ((inp->inp_vflag & INP_IPV4) == 0)
1356 				continue;
1357 #endif
1358 			if (inp->inp_faddr.s_addr != INADDR_ANY ||
1359 			    inp->inp_lport != lport)
1360 				continue;
1361 
1362 			/* XXX inp locking */
1363 			if (ifp && ifp->if_type == IFT_FAITH &&
1364 			    (inp->inp_flags & INP_FAITH) == 0)
1365 				continue;
1366 
1367 			injail = jailed(inp->inp_cred);
1368 			if (injail) {
1369 				if (prison_check_ip4(inp->inp_cred,
1370 				    &laddr) != 0)
1371 					continue;
1372 			} else {
1373 				if (local_exact != NULL)
1374 					continue;
1375 			}
1376 
1377 			if (inp->inp_laddr.s_addr == laddr.s_addr) {
1378 				if (injail)
1379 					return (inp);
1380 				else
1381 					local_exact = inp;
1382 			} else if (inp->inp_laddr.s_addr == INADDR_ANY) {
1383 #ifdef INET6
1384 				/* XXX inp locking, NULL check */
1385 				if (inp->inp_vflag & INP_IPV6PROTO)
1386 					local_wild_mapped = inp;
1387 				else
1388 #endif /* INET6 */
1389 					if (injail)
1390 						jail_wild = inp;
1391 					else
1392 						local_wild = inp;
1393 			}
1394 		} /* LIST_FOREACH */
1395 		if (jail_wild != NULL)
1396 			return (jail_wild);
1397 		if (local_exact != NULL)
1398 			return (local_exact);
1399 		if (local_wild != NULL)
1400 			return (local_wild);
1401 #ifdef INET6
1402 		if (local_wild_mapped != NULL)
1403 			return (local_wild_mapped);
1404 #endif /* defined(INET6) */
1405 	} /* if (wildcard == INPLOOKUP_WILDCARD) */
1406 
1407 	return (NULL);
1408 }
1409 
1410 /*
1411  * Insert PCB onto various hash lists.
1412  */
1413 int
1414 in_pcbinshash(struct inpcb *inp)
1415 {
1416 	struct inpcbhead *pcbhash;
1417 	struct inpcbporthead *pcbporthash;
1418 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1419 	struct inpcbport *phd;
1420 	u_int32_t hashkey_faddr;
1421 
1422 	INP_INFO_WLOCK_ASSERT(pcbinfo);
1423 	INP_WLOCK_ASSERT(inp);
1424 
1425 #ifdef INET6
1426 	if (inp->inp_vflag & INP_IPV6)
1427 		hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
1428 	else
1429 #endif /* INET6 */
1430 	hashkey_faddr = inp->inp_faddr.s_addr;
1431 
1432 	pcbhash = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr,
1433 		 inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)];
1434 
1435 	pcbporthash = &pcbinfo->ipi_porthashbase[
1436 	    INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)];
1437 
1438 	/*
1439 	 * Go through port list and look for a head for this lport.
1440 	 */
1441 	LIST_FOREACH(phd, pcbporthash, phd_hash) {
1442 		if (phd->phd_port == inp->inp_lport)
1443 			break;
1444 	}
1445 	/*
1446 	 * If none exists, malloc one and tack it on.
1447 	 */
1448 	if (phd == NULL) {
1449 		phd = malloc(sizeof(struct inpcbport), M_PCB, M_NOWAIT);
1450 		if (phd == NULL) {
1451 			return (ENOBUFS); /* XXX */
1452 		}
1453 		phd->phd_port = inp->inp_lport;
1454 		LIST_INIT(&phd->phd_pcblist);
1455 		LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
1456 	}
1457 	inp->inp_phd = phd;
1458 	LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
1459 	LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
1460 	return (0);
1461 }
1462 
1463 /*
1464  * Move PCB to the proper hash bucket when { faddr, fport } have  been
1465  * changed. NOTE: This does not handle the case of the lport changing (the
1466  * hashed port list would have to be updated as well), so the lport must
1467  * not change after in_pcbinshash() has been called.
1468  */
1469 void
1470 in_pcbrehash(struct inpcb *inp)
1471 {
1472 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1473 	struct inpcbhead *head;
1474 	u_int32_t hashkey_faddr;
1475 
1476 	INP_INFO_WLOCK_ASSERT(pcbinfo);
1477 	INP_WLOCK_ASSERT(inp);
1478 
1479 #ifdef INET6
1480 	if (inp->inp_vflag & INP_IPV6)
1481 		hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
1482 	else
1483 #endif /* INET6 */
1484 	hashkey_faddr = inp->inp_faddr.s_addr;
1485 
1486 	head = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr,
1487 		inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)];
1488 
1489 	LIST_REMOVE(inp, inp_hash);
1490 	LIST_INSERT_HEAD(head, inp, inp_hash);
1491 }
1492 
1493 /*
1494  * Remove PCB from various lists.
1495  */
1496 void
1497 in_pcbremlists(struct inpcb *inp)
1498 {
1499 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1500 
1501 	INP_INFO_WLOCK_ASSERT(pcbinfo);
1502 	INP_WLOCK_ASSERT(inp);
1503 
1504 	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
1505 	if (inp->inp_lport) {
1506 		struct inpcbport *phd = inp->inp_phd;
1507 
1508 		LIST_REMOVE(inp, inp_hash);
1509 		LIST_REMOVE(inp, inp_portlist);
1510 		if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
1511 			LIST_REMOVE(phd, phd_hash);
1512 			free(phd, M_PCB);
1513 		}
1514 	}
1515 	LIST_REMOVE(inp, inp_list);
1516 	pcbinfo->ipi_count--;
1517 }
1518 
1519 /*
1520  * A set label operation has occurred at the socket layer, propagate the
1521  * label change into the in_pcb for the socket.
1522  */
1523 void
1524 in_pcbsosetlabel(struct socket *so)
1525 {
1526 #ifdef MAC
1527 	struct inpcb *inp;
1528 
1529 	inp = sotoinpcb(so);
1530 	KASSERT(inp != NULL, ("in_pcbsosetlabel: so->so_pcb == NULL"));
1531 
1532 	INP_WLOCK(inp);
1533 	SOCK_LOCK(so);
1534 	mac_inpcb_sosetlabel(so, inp);
1535 	SOCK_UNLOCK(so);
1536 	INP_WUNLOCK(inp);
1537 #endif
1538 }
1539 
1540 /*
1541  * ipport_tick runs once per second, determining if random port allocation
1542  * should be continued.  If more than ipport_randomcps ports have been
1543  * allocated in the last second, then we return to sequential port
1544  * allocation. We return to random allocation only once we drop below
1545  * ipport_randomcps for at least ipport_randomtime seconds.
1546  */
1547 void
1548 ipport_tick(void *xtp)
1549 {
1550 	VNET_ITERATOR_DECL(vnet_iter);
1551 
1552 	VNET_LIST_RLOCK();
1553 	VNET_FOREACH(vnet_iter) {
1554 		CURVNET_SET(vnet_iter);	/* XXX appease INVARIANTS here */
1555 		INIT_VNET_INET(vnet_iter);
1556 		if (V_ipport_tcpallocs <=
1557 		    V_ipport_tcplastcount + V_ipport_randomcps) {
1558 			if (V_ipport_stoprandom > 0)
1559 				V_ipport_stoprandom--;
1560 		} else
1561 			V_ipport_stoprandom = V_ipport_randomtime;
1562 		V_ipport_tcplastcount = V_ipport_tcpallocs;
1563 		CURVNET_RESTORE();
1564 	}
1565 	VNET_LIST_RUNLOCK();
1566 	callout_reset(&ipport_tick_callout, hz, ipport_tick, NULL);
1567 }
1568 
1569 void
1570 inp_wlock(struct inpcb *inp)
1571 {
1572 
1573 	INP_WLOCK(inp);
1574 }
1575 
1576 void
1577 inp_wunlock(struct inpcb *inp)
1578 {
1579 
1580 	INP_WUNLOCK(inp);
1581 }
1582 
1583 void
1584 inp_rlock(struct inpcb *inp)
1585 {
1586 
1587 	INP_RLOCK(inp);
1588 }
1589 
1590 void
1591 inp_runlock(struct inpcb *inp)
1592 {
1593 
1594 	INP_RUNLOCK(inp);
1595 }
1596 
1597 #ifdef INVARIANTS
1598 void
1599 inp_lock_assert(struct inpcb *inp)
1600 {
1601 
1602 	INP_WLOCK_ASSERT(inp);
1603 }
1604 
1605 void
1606 inp_unlock_assert(struct inpcb *inp)
1607 {
1608 
1609 	INP_UNLOCK_ASSERT(inp);
1610 }
1611 #endif
1612 
1613 void
1614 inp_apply_all(void (*func)(struct inpcb *, void *), void *arg)
1615 {
1616 	INIT_VNET_INET(curvnet);
1617 	struct inpcb *inp;
1618 
1619 	INP_INFO_RLOCK(&V_tcbinfo);
1620 	LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) {
1621 		INP_WLOCK(inp);
1622 		func(inp, arg);
1623 		INP_WUNLOCK(inp);
1624 	}
1625 	INP_INFO_RUNLOCK(&V_tcbinfo);
1626 }
1627 
1628 struct socket *
1629 inp_inpcbtosocket(struct inpcb *inp)
1630 {
1631 
1632 	INP_WLOCK_ASSERT(inp);
1633 	return (inp->inp_socket);
1634 }
1635 
1636 struct tcpcb *
1637 inp_inpcbtotcpcb(struct inpcb *inp)
1638 {
1639 
1640 	INP_WLOCK_ASSERT(inp);
1641 	return ((struct tcpcb *)inp->inp_ppcb);
1642 }
1643 
1644 int
1645 inp_ip_tos_get(const struct inpcb *inp)
1646 {
1647 
1648 	return (inp->inp_ip_tos);
1649 }
1650 
1651 void
1652 inp_ip_tos_set(struct inpcb *inp, int val)
1653 {
1654 
1655 	inp->inp_ip_tos = val;
1656 }
1657 
1658 void
1659 inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
1660     uint32_t *faddr, uint16_t *fp)
1661 {
1662 
1663 	INP_LOCK_ASSERT(inp);
1664 	*laddr = inp->inp_laddr.s_addr;
1665 	*faddr = inp->inp_faddr.s_addr;
1666 	*lp = inp->inp_lport;
1667 	*fp = inp->inp_fport;
1668 }
1669 
1670 struct inpcb *
1671 so_sotoinpcb(struct socket *so)
1672 {
1673 
1674 	return (sotoinpcb(so));
1675 }
1676 
1677 struct tcpcb *
1678 so_sototcpcb(struct socket *so)
1679 {
1680 
1681 	return (sototcpcb(so));
1682 }
1683 
1684 #ifdef DDB
1685 static void
1686 db_print_indent(int indent)
1687 {
1688 	int i;
1689 
1690 	for (i = 0; i < indent; i++)
1691 		db_printf(" ");
1692 }
1693 
1694 static void
1695 db_print_inconninfo(struct in_conninfo *inc, const char *name, int indent)
1696 {
1697 	char faddr_str[48], laddr_str[48];
1698 
1699 	db_print_indent(indent);
1700 	db_printf("%s at %p\n", name, inc);
1701 
1702 	indent += 2;
1703 
1704 #ifdef INET6
1705 	if (inc->inc_flags & INC_ISIPV6) {
1706 		/* IPv6. */
1707 		ip6_sprintf(laddr_str, &inc->inc6_laddr);
1708 		ip6_sprintf(faddr_str, &inc->inc6_faddr);
1709 	} else {
1710 #endif
1711 		/* IPv4. */
1712 		inet_ntoa_r(inc->inc_laddr, laddr_str);
1713 		inet_ntoa_r(inc->inc_faddr, faddr_str);
1714 #ifdef INET6
1715 	}
1716 #endif
1717 	db_print_indent(indent);
1718 	db_printf("inc_laddr %s   inc_lport %u\n", laddr_str,
1719 	    ntohs(inc->inc_lport));
1720 	db_print_indent(indent);
1721 	db_printf("inc_faddr %s   inc_fport %u\n", faddr_str,
1722 	    ntohs(inc->inc_fport));
1723 }
1724 
1725 static void
1726 db_print_inpflags(int inp_flags)
1727 {
1728 	int comma;
1729 
1730 	comma = 0;
1731 	if (inp_flags & INP_RECVOPTS) {
1732 		db_printf("%sINP_RECVOPTS", comma ? ", " : "");
1733 		comma = 1;
1734 	}
1735 	if (inp_flags & INP_RECVRETOPTS) {
1736 		db_printf("%sINP_RECVRETOPTS", comma ? ", " : "");
1737 		comma = 1;
1738 	}
1739 	if (inp_flags & INP_RECVDSTADDR) {
1740 		db_printf("%sINP_RECVDSTADDR", comma ? ", " : "");
1741 		comma = 1;
1742 	}
1743 	if (inp_flags & INP_HDRINCL) {
1744 		db_printf("%sINP_HDRINCL", comma ? ", " : "");
1745 		comma = 1;
1746 	}
1747 	if (inp_flags & INP_HIGHPORT) {
1748 		db_printf("%sINP_HIGHPORT", comma ? ", " : "");
1749 		comma = 1;
1750 	}
1751 	if (inp_flags & INP_LOWPORT) {
1752 		db_printf("%sINP_LOWPORT", comma ? ", " : "");
1753 		comma = 1;
1754 	}
1755 	if (inp_flags & INP_ANONPORT) {
1756 		db_printf("%sINP_ANONPORT", comma ? ", " : "");
1757 		comma = 1;
1758 	}
1759 	if (inp_flags & INP_RECVIF) {
1760 		db_printf("%sINP_RECVIF", comma ? ", " : "");
1761 		comma = 1;
1762 	}
1763 	if (inp_flags & INP_MTUDISC) {
1764 		db_printf("%sINP_MTUDISC", comma ? ", " : "");
1765 		comma = 1;
1766 	}
1767 	if (inp_flags & INP_FAITH) {
1768 		db_printf("%sINP_FAITH", comma ? ", " : "");
1769 		comma = 1;
1770 	}
1771 	if (inp_flags & INP_RECVTTL) {
1772 		db_printf("%sINP_RECVTTL", comma ? ", " : "");
1773 		comma = 1;
1774 	}
1775 	if (inp_flags & INP_DONTFRAG) {
1776 		db_printf("%sINP_DONTFRAG", comma ? ", " : "");
1777 		comma = 1;
1778 	}
1779 	if (inp_flags & IN6P_IPV6_V6ONLY) {
1780 		db_printf("%sIN6P_IPV6_V6ONLY", comma ? ", " : "");
1781 		comma = 1;
1782 	}
1783 	if (inp_flags & IN6P_PKTINFO) {
1784 		db_printf("%sIN6P_PKTINFO", comma ? ", " : "");
1785 		comma = 1;
1786 	}
1787 	if (inp_flags & IN6P_HOPLIMIT) {
1788 		db_printf("%sIN6P_HOPLIMIT", comma ? ", " : "");
1789 		comma = 1;
1790 	}
1791 	if (inp_flags & IN6P_HOPOPTS) {
1792 		db_printf("%sIN6P_HOPOPTS", comma ? ", " : "");
1793 		comma = 1;
1794 	}
1795 	if (inp_flags & IN6P_DSTOPTS) {
1796 		db_printf("%sIN6P_DSTOPTS", comma ? ", " : "");
1797 		comma = 1;
1798 	}
1799 	if (inp_flags & IN6P_RTHDR) {
1800 		db_printf("%sIN6P_RTHDR", comma ? ", " : "");
1801 		comma = 1;
1802 	}
1803 	if (inp_flags & IN6P_RTHDRDSTOPTS) {
1804 		db_printf("%sIN6P_RTHDRDSTOPTS", comma ? ", " : "");
1805 		comma = 1;
1806 	}
1807 	if (inp_flags & IN6P_TCLASS) {
1808 		db_printf("%sIN6P_TCLASS", comma ? ", " : "");
1809 		comma = 1;
1810 	}
1811 	if (inp_flags & IN6P_AUTOFLOWLABEL) {
1812 		db_printf("%sIN6P_AUTOFLOWLABEL", comma ? ", " : "");
1813 		comma = 1;
1814 	}
1815 	if (inp_flags & IN6P_RFC2292) {
1816 		db_printf("%sIN6P_RFC2292", comma ? ", " : "");
1817 		comma = 1;
1818 	}
1819 	if (inp_flags & IN6P_MTU) {
1820 		db_printf("IN6P_MTU%s", comma ? ", " : "");
1821 		comma = 1;
1822 	}
1823 }
1824 
1825 static void
1826 db_print_inpvflag(u_char inp_vflag)
1827 {
1828 	int comma;
1829 
1830 	comma = 0;
1831 	if (inp_vflag & INP_IPV4) {
1832 		db_printf("%sINP_IPV4", comma ? ", " : "");
1833 		comma  = 1;
1834 	}
1835 	if (inp_vflag & INP_IPV6) {
1836 		db_printf("%sINP_IPV6", comma ? ", " : "");
1837 		comma  = 1;
1838 	}
1839 	if (inp_vflag & INP_IPV6PROTO) {
1840 		db_printf("%sINP_IPV6PROTO", comma ? ", " : "");
1841 		comma  = 1;
1842 	}
1843 	if (inp_vflag & INP_TIMEWAIT) {
1844 		db_printf("%sINP_TIMEWAIT", comma ? ", " : "");
1845 		comma  = 1;
1846 	}
1847 	if (inp_vflag & INP_ONESBCAST) {
1848 		db_printf("%sINP_ONESBCAST", comma ? ", " : "");
1849 		comma  = 1;
1850 	}
1851 	if (inp_vflag & INP_DROPPED) {
1852 		db_printf("%sINP_DROPPED", comma ? ", " : "");
1853 		comma  = 1;
1854 	}
1855 	if (inp_vflag & INP_SOCKREF) {
1856 		db_printf("%sINP_SOCKREF", comma ? ", " : "");
1857 		comma  = 1;
1858 	}
1859 }
1860 
1861 void
1862 db_print_inpcb(struct inpcb *inp, const char *name, int indent)
1863 {
1864 
1865 	db_print_indent(indent);
1866 	db_printf("%s at %p\n", name, inp);
1867 
1868 	indent += 2;
1869 
1870 	db_print_indent(indent);
1871 	db_printf("inp_flow: 0x%x\n", inp->inp_flow);
1872 
1873 	db_print_inconninfo(&inp->inp_inc, "inp_conninfo", indent);
1874 
1875 	db_print_indent(indent);
1876 	db_printf("inp_ppcb: %p   inp_pcbinfo: %p   inp_socket: %p\n",
1877 	    inp->inp_ppcb, inp->inp_pcbinfo, inp->inp_socket);
1878 
1879 	db_print_indent(indent);
1880 	db_printf("inp_label: %p   inp_flags: 0x%x (",
1881 	   inp->inp_label, inp->inp_flags);
1882 	db_print_inpflags(inp->inp_flags);
1883 	db_printf(")\n");
1884 
1885 	db_print_indent(indent);
1886 	db_printf("inp_sp: %p   inp_vflag: 0x%x (", inp->inp_sp,
1887 	    inp->inp_vflag);
1888 	db_print_inpvflag(inp->inp_vflag);
1889 	db_printf(")\n");
1890 
1891 	db_print_indent(indent);
1892 	db_printf("inp_ip_ttl: %d   inp_ip_p: %d   inp_ip_minttl: %d\n",
1893 	    inp->inp_ip_ttl, inp->inp_ip_p, inp->inp_ip_minttl);
1894 
1895 	db_print_indent(indent);
1896 #ifdef INET6
1897 	if (inp->inp_vflag & INP_IPV6) {
1898 		db_printf("in6p_options: %p   in6p_outputopts: %p   "
1899 		    "in6p_moptions: %p\n", inp->in6p_options,
1900 		    inp->in6p_outputopts, inp->in6p_moptions);
1901 		db_printf("in6p_icmp6filt: %p   in6p_cksum %d   "
1902 		    "in6p_hops %u\n", inp->in6p_icmp6filt, inp->in6p_cksum,
1903 		    inp->in6p_hops);
1904 	} else
1905 #endif
1906 	{
1907 		db_printf("inp_ip_tos: %d   inp_ip_options: %p   "
1908 		    "inp_ip_moptions: %p\n", inp->inp_ip_tos,
1909 		    inp->inp_options, inp->inp_moptions);
1910 	}
1911 
1912 	db_print_indent(indent);
1913 	db_printf("inp_phd: %p   inp_gencnt: %ju\n", inp->inp_phd,
1914 	    (uintmax_t)inp->inp_gencnt);
1915 }
1916 
1917 DB_SHOW_COMMAND(inpcb, db_show_inpcb)
1918 {
1919 	struct inpcb *inp;
1920 
1921 	if (!have_addr) {
1922 		db_printf("usage: show inpcb <addr>\n");
1923 		return;
1924 	}
1925 	inp = (struct inpcb *)addr;
1926 
1927 	db_print_inpcb(inp, "inpcb", 0);
1928 }
1929 #endif
1930