xref: /openbsd/sys/netinet/in_pcb.h (revision 4e5e13a2)
1 /*	$OpenBSD: in_pcb.h,v 1.164 2025/01/09 16:47:24 bluhm Exp $	*/
2 /*	$NetBSD: in_pcb.h,v 1.14 1996/02/13 23:42:00 christos Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1982, 1986, 1990, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)in_pcb.h	8.1 (Berkeley) 6/10/93
62  */
63 
64 #ifndef _NETINET_IN_PCB_H_
65 #define _NETINET_IN_PCB_H_
66 
67 #include <sys/queue.h>
68 #include <sys/mutex.h>
69 #include <sys/rwlock.h>
70 #include <sys/refcnt.h>
71 #include <netinet/ip6.h>
72 #include <netinet/icmp6.h>
73 #include <netinet/ip_ipsp.h>
74 
75 #include <crypto/siphash.h>
76 
77 /*
78  * Locks used to protect struct members in this file:
79  *	I	immutable after creation
80  *	N	net lock
81  *	t	inpt_mtx		pcb table mutex
82  *	L	pf_inp_mtx		link pf to inp mutex
83  *	s	so_lock			socket rwlock
84  *	f	inp_sofree_mtx		socket detach and lock
85  */
86 
87 /*
88  * The pcb table mutex guarantees that all inpcb are consistent and
89  * that bind(2) and connect(2) create unique combinations of
90  * laddr/faddr/lport/fport/rtalbleid.  This mutex is used to protect
91  * both address consistency and inpcb lookup during protocol input.
92  * All writes to inp_[lf]addr take table mutex.  A per socket lock is
93  * needed, so that socket layer input have a consistent view at these
94  * values.
95  *
96  * In soconnect() and sosend() a per pcb mutex cannot be used.  They
97  * eventually call IP output which takes pf lock which is a sleeping lock.
98  * Also connect(2) does a route lookup for source selection.  There
99  * route resolve happens, which creates a route, which sends a route
100  * message, which needs route lock, which is a rw-lock.
101  *
102  * On the other hand a mutex should be used in protocol input.  It
103  * does not make sense to do a process switch per packet.  Better spin
104  * until the packet can be processed.
105  *
106  * So there are three locks.  Table mutex is for writing inp_[lf]addr/port
107  * and lookup, socket rw-lock to separate sockets in system calls, and
108  * socket buffer mutex to protect socket receive buffer.  Changing
109  * inp_[lf]addr/port takes both per socket rw-lock and global table mutex.
110  * Protocol input only reads inp_[lf]addr/port during lookup and is safe.
111  */
112 
113 struct pf_state_key;
114 
115 union inpaddru {
116 	struct in_addr iau_addr;
117 	struct in6_addr iau_addr6;
118 };
119 
120 /*
121  * Common structure pcb for internet protocol implementation.
122  * Here are stored pointers to local and foreign host table
123  * entries, local and foreign socket numbers, and pointers
124  * up (to a socket structure) and down (to a protocol-specific)
125  * control block.
126  */
127 struct inpcb {
128 	struct	  inpcbtable *inp_table;	/* [I] inet queue/hash table */
129 	TAILQ_ENTRY(inpcb) inp_queue;		/* [t] inet PCB queue */
130 	/* keep fields above in sync with struct inpcb_iterator */
131 	LIST_ENTRY(inpcb) inp_hash;		/* [t] local and foreign hash */
132 	LIST_ENTRY(inpcb) inp_lhash;		/* [t] local port hash */
133 	union	  inpaddru inp_faddru;		/* [t] Foreign address. */
134 	union	  inpaddru inp_laddru;		/* [t] Local address. */
135 #define	inp_faddr	inp_faddru.iau_addr
136 #define	inp_faddr6	inp_faddru.iau_addr6
137 #define	inp_laddr	inp_laddru.iau_addr
138 #define	inp_laddr6	inp_laddru.iau_addr6
139 	u_int16_t inp_fport;		/* [t] foreign port */
140 	u_int16_t inp_lport;		/* [t] local port */
141 	struct	  socket *inp_socket;	/* [f] back pointer to socket */
142 	struct	  mutex inp_sofree_mtx;	/* protect socket free */
143 	caddr_t	  inp_ppcb;		/* pointer to per-protocol pcb */
144 	struct    route inp_route;	/* [s] cached route */
145 	struct    refcnt inp_refcnt;	/* refcount PCB, delay memory free */
146 	int	  inp_flags;		/* generic IP/datagram flags */
147 	union {				/* Header prototype. */
148 		struct ip hu_ip;
149 		struct ip6_hdr hu_ipv6;
150 	} inp_hu;
151 #define	inp_ip		inp_hu.hu_ip
152 #define	inp_ipv6	inp_hu.hu_ipv6
153 	union {
154 		struct	mbuf *inp_options;		/* IPv4 options */
155 		struct	ip6_pktopts *inp_outputopts6;	/* IPv6 options */
156 	};
157 	int inp_hops;
158 	union {
159 		struct ip_moptions *mou_mo;
160 		struct ip6_moptions *mou_mo6;
161 	} inp_mou;
162 #define inp_moptions inp_mou.mou_mo	/* [N] IPv4 multicast options */
163 #define inp_moptions6 inp_mou.mou_mo6	/* [N] IPv6 multicast options */
164 	struct	ipsec_level   inp_seclevel;	/* [N] IPsec level of socket */
165 	u_char	inp_ip_minttl;		/* minimum TTL or drop */
166 #define inp_ip6_minhlim inp_ip_minttl	/* minimum Hop Limit or drop */
167 #define	inp_flowinfo	inp_hu.hu_ipv6.ip6_flow
168 
169 	int	inp_cksum6;
170 	struct	icmp6_filter *inp_icmp6filt;
171 	struct	pf_state_key *inp_pf_sk; /* [L] */
172 	struct	mbuf *(*inp_upcall)(void *, struct mbuf *,
173 		    struct ip *, struct ip6_hdr *, void *, int);
174 	void	*inp_upcall_arg;
175 	u_int	inp_rtableid;		/* [t] */
176 	int	inp_pipex;		/* pipex indication */
177 	uint16_t inp_flowid;		/* [s] */
178 };
179 
180 LIST_HEAD(inpcbhead, inpcb);
181 
182 struct inpcb_iterator {
183 	struct	  inpcbtable *inp_table;	/* [I] always NULL */
184 	TAILQ_ENTRY(inpcb) inp_queue;		/* [t] inet PCB queue */
185 	/* keep fields above in sync with struct inpcb */
186 };
187 
188 static inline int
in_pcb_is_iterator(struct inpcb * inp)189 in_pcb_is_iterator(struct inpcb *inp)
190 {
191 	return (inp->inp_table == NULL ? 1 : 0);
192 }
193 
194 struct inpcbtable {
195 	struct mutex inpt_mtx;			/* protect queue and hash */
196 	TAILQ_HEAD(inpthead, inpcb) inpt_queue;	/* [t] inet PCB queue */
197 	struct	inpcbhead *inpt_hashtbl;	/* [t] local and foreign hash */
198 	struct	inpcbhead *inpt_lhashtbl;	/* [t] local port hash */
199 	SIPHASH_KEY inpt_key, inpt_lkey;	/* [I] secrets for hashes */
200 	u_long	inpt_mask, inpt_lmask;		/* [t] hash masks */
201 	int	inpt_count, inpt_size;		/* [t] queue count, hash size */
202 };
203 
204 /* flags in inp_flags: */
205 #define	INP_RECVOPTS	0x001	/* receive incoming IP options */
206 #define	INP_RECVRETOPTS	0x002	/* receive IP options for reply */
207 #define	INP_RECVDSTADDR	0x004	/* receive IP dst address */
208 
209 #define	INP_RXDSTOPTS	INP_RECVOPTS
210 #define	INP_RXHOPOPTS	INP_RECVRETOPTS
211 #define	INP_RXINFO	INP_RECVDSTADDR
212 #define	INP_RXSRCRT	0x010
213 #define	INP_HOPLIMIT	0x020
214 
215 #define	INP_HDRINCL	0x008	/* user supplies entire IP header */
216 #define	INP_HIGHPORT	0x010	/* user wants "high" port binding */
217 #define	INP_LOWPORT	0x020	/* user wants "low" port binding */
218 #define	INP_RECVIF	0x080	/* receive incoming interface */
219 #define	INP_RECVTTL	0x040	/* receive incoming IP TTL */
220 #define	INP_RECVDSTPORT	0x200	/* receive IP dst addr before rdr */
221 #define	INP_RECVRTABLE	0x400	/* receive routing table */
222 #define	INP_IPSECFLOWINFO 0x800	/* receive IPsec flow info */
223 
224 #define	INP_CONTROLOPTS	(INP_RECVOPTS|INP_RECVRETOPTS|INP_RECVDSTADDR| \
225 	    INP_RXSRCRT|INP_HOPLIMIT|INP_RECVIF|INP_RECVTTL|INP_RECVDSTPORT| \
226 	    INP_RECVRTABLE)
227 
228 /*
229  * These flags' values should be determined by either the transport
230  * protocol at PRU_BIND, PRU_LISTEN, PRU_CONNECT, etc, or by in_pcb*().
231  */
232 #define INP_IPV6	0x100	/* socket, proto, domain, family is PF_INET6 */
233 
234 /*
235  * Flags in inp_flags for IPV6
236  */
237 #define IN6P_HIGHPORT		INP_HIGHPORT	/* user wants "high" port */
238 #define IN6P_LOWPORT		INP_LOWPORT	/* user wants "low" port */
239 #define IN6P_RECVDSTPORT	INP_RECVDSTPORT	/* receive IP dst addr before rdr */
240 #define IN6P_PKTINFO		0x010000 /* receive IP6 dst and I/F */
241 #define IN6P_HOPLIMIT		0x020000 /* receive hoplimit */
242 #define IN6P_HOPOPTS		0x040000 /* receive hop-by-hop options */
243 #define IN6P_DSTOPTS		0x080000 /* receive dst options after rthdr */
244 #define IN6P_RTHDR		0x100000 /* receive routing header */
245 #define IN6P_TCLASS		0x400000 /* receive traffic class value */
246 #define IN6P_AUTOFLOWLABEL	0x800000 /* attach flowlabel automatically */
247 
248 #define IN6P_ANONPORT		0x4000000 /* port chosen for user */
249 #define IN6P_RFC2292		0x40000000 /* used RFC2292 API on the socket */
250 #define IN6P_MTU		0x80000000 /* receive path MTU */
251 
252 #define IN6P_MINMTU		0x20000000 /* use minimum MTU */
253 
254 #define IN6P_CONTROLOPTS	(IN6P_PKTINFO|IN6P_HOPLIMIT|IN6P_HOPOPTS|\
255 				 IN6P_DSTOPTS|IN6P_RTHDR|\
256 				 IN6P_TCLASS|IN6P_AUTOFLOWLABEL|IN6P_RFC2292|\
257 				 IN6P_MTU|IN6P_RECVDSTPORT)
258 
259 #define	INPLOOKUP_WILDCARD	1
260 #define	INPLOOKUP_SETLOCAL	2
261 #define	INPLOOKUP_IPV6		4
262 
263 #define	sotoinpcb(so)	((struct inpcb *)(so)->so_pcb)
264 
265 /* macros for handling bitmap of ports not to allocate dynamically */
266 #define	DP_MAPBITS	(sizeof(u_int32_t) * NBBY)
267 #define	DP_MAPSIZE	(howmany(65536, DP_MAPBITS))
268 #define	DP_SET(m, p)	((m)[(p) / DP_MAPBITS] |= (1U << ((p) % DP_MAPBITS)))
269 #define	DP_CLR(m, p)	((m)[(p) / DP_MAPBITS] &= ~(1U << ((p) % DP_MAPBITS)))
270 #define	DP_ISSET(m, p)	((m)[(p) / DP_MAPBITS] & (1U << ((p) % DP_MAPBITS)))
271 
272 /* default values for baddynamicports [see ip_init()] */
273 #define	DEFBADDYNAMICPORTS_TCP	{ \
274 	587, 749, 750, 751, 853, 871, 2049, \
275 	6000, 6001, 6002, 6003, 6004, 6005, 6006, 6007, 6008, 6009, 6010, \
276 	0 }
277 #define	DEFBADDYNAMICPORTS_UDP	{ 623, 664, 749, 750, 751, 2049, \
278 	3784, 3785, 7784, /* BFD/S-BFD ports */ \
279 	 0 }
280 
281 #define DEFROOTONLYPORTS_TCP { \
282 	2049, \
283 	0 }
284 #define DEFROOTONLYPORTS_UDP { \
285 	2049, \
286 	0 }
287 
288 struct baddynamicports {
289 	u_int32_t tcp[DP_MAPSIZE];
290 	u_int32_t udp[DP_MAPSIZE];
291 };
292 
293 #ifdef _KERNEL
294 
295 #define IN_PCBLOCK_HOLD	1
296 #define IN_PCBLOCK_GRAB	2
297 
298 extern struct inpcbtable rawcbtable, rawin6pcbtable;
299 extern struct baddynamicports baddynamicports;
300 extern struct baddynamicports rootonlyports;
301 extern int in_pcbnotifymiss;
302 
303 void	 in_init(void);
304 void	 in_losing(struct inpcb *);
305 int	 in_pcballoc(struct socket *, struct inpcbtable *, int);
306 int	 in_pcbbind_locked(struct inpcb *, struct mbuf *, const void *,
307 	    struct proc *);
308 int	 in_pcbbind(struct inpcb *, struct mbuf *, struct proc *);
309 int	 in_pcbaddrisavail(const struct inpcb *, struct sockaddr_in *, int,
310 	    struct proc *);
311 int	 in_pcbconnect(struct inpcb *, struct mbuf *);
312 void	 in_pcbdetach(struct inpcb *);
313 struct socket *
314 	 in_pcbsolock_ref(struct inpcb *);
315 void	 in_pcbsounlock_rele(struct inpcb *, struct socket *);
316 struct inpcb *
317 	 in_pcbref(struct inpcb *);
318 void	 in_pcbunref(struct inpcb *);
319 void	 in_pcbdisconnect(struct inpcb *);
320 struct inpcb *
321 	 in_pcb_iterator(struct inpcbtable *, struct inpcb *,
322 	    struct inpcb_iterator *);
323 void	 in_pcb_iterator_abort(struct inpcbtable *, struct inpcb *,
324 	    struct inpcb_iterator *);
325 struct inpcb *
326 	 in_pcblookup(struct inpcbtable *, struct in_addr,
327 			       u_int, struct in_addr, u_int, u_int);
328 struct inpcb *
329 	 in_pcblookup_listen(struct inpcbtable *, struct in_addr, u_int,
330 	    struct mbuf *, u_int);
331 #ifdef INET6
332 uint64_t in6_pcbhash(struct inpcbtable *, u_int, const struct in6_addr *,
333 	    u_short, const struct in6_addr *, u_short);
334 struct inpcb *
335 	 in6_pcblookup(struct inpcbtable *, const struct in6_addr *,
336 	    u_int, const struct in6_addr *, u_int, u_int);
337 struct inpcb *
338 	 in6_pcblookup_listen(struct inpcbtable *, struct in6_addr *, u_int,
339 	    struct mbuf *, u_int);
340 int	 in6_pcbaddrisavail_lock(const struct inpcb *, struct sockaddr_in6 *,
341 	    int, struct proc *, int);
342 int	 in6_pcbaddrisavail(const struct inpcb *, struct sockaddr_in6 *, int,
343 	    struct proc *);
344 int	 in6_pcbconnect(struct inpcb *, struct mbuf *);
345 void	 in6_setsockaddr(struct inpcb *, struct mbuf *);
346 void	 in6_setpeeraddr(struct inpcb *, struct mbuf *);
347 int	 in6_sockaddr(struct socket *, struct mbuf *);
348 int	 in6_peeraddr(struct socket *, struct mbuf *);
349 #endif /* INET6 */
350 void	 in_pcbinit(struct inpcbtable *, int);
351 struct inpcb *
352 	 in_pcblookup_local_lock(struct inpcbtable *, const void *, u_int, int,
353 	    u_int, int);
354 void	 in_pcbnotifyall(struct inpcbtable *, const struct sockaddr_in *,
355 	    u_int, int, void (*)(struct inpcb *, int));
356 void	 in_pcbrehash(struct inpcb *);
357 void	 in_rtchange(struct inpcb *, int);
358 void	 in_setpeeraddr(struct inpcb *, struct mbuf *);
359 void	 in_setsockaddr(struct inpcb *, struct mbuf *);
360 int	 in_sockaddr(struct socket *, struct mbuf *);
361 int	 in_peeraddr(struct socket *, struct mbuf *);
362 int	 in_baddynamic(u_int16_t, u_int16_t);
363 int	 in_rootonly(u_int16_t, u_int16_t);
364 int	 in_pcbselsrc(struct in_addr *, struct sockaddr_in *, struct inpcb *);
365 struct rtentry *
366 	in_pcbrtentry(struct inpcb *);
367 
368 /* INET6 stuff */
369 struct rtentry *
370 	in6_pcbrtentry(struct inpcb *);
371 void	in6_pcbnotify(struct inpcbtable *, const struct sockaddr_in6 *,
372 	u_int, const struct sockaddr_in6 *, u_int, u_int, int, void *,
373 	void (*)(struct inpcb *, int));
374 int	in6_selecthlim(const struct inpcb *);
375 int	in_pcbset_rtableid(struct inpcb *, u_int);
376 void	in_pcbset_laddr(struct inpcb *, const struct sockaddr *, u_int);
377 void	in_pcbunset_faddr(struct inpcb *);
378 void	in_pcbunset_laddr(struct inpcb *);
379 
380 #endif /* _KERNEL */
381 #endif /* _NETINET_IN_PCB_H_ */
382