xref: /openbsd/sys/netinet/in_pcb.h (revision 9ea232b5)
1 /*	$OpenBSD: in_pcb.h,v 1.150 2024/01/31 12:27:57 bluhm Exp $	*/
2 /*	$NetBSD: in_pcb.h,v 1.14 1996/02/13 23:42:00 christos Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1982, 1986, 1990, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)in_pcb.h	8.1 (Berkeley) 6/10/93
62  */
63 
64 #ifndef _NETINET_IN_PCB_H_
65 #define _NETINET_IN_PCB_H_
66 
67 #include <sys/queue.h>
68 #include <sys/mutex.h>
69 #include <sys/rwlock.h>
70 #include <sys/refcnt.h>
71 #include <netinet/ip6.h>
72 #include <netinet6/ip6_var.h>
73 #include <netinet/icmp6.h>
74 #include <netinet/ip_ipsp.h>
75 
76 #include <crypto/siphash.h>
77 
78 /*
79  * Locks used to protect struct members in this file:
80  *	I	immutable after creation
81  *	N	net lock
82  *	t	inpt_mtx		pcb table mutex
83  *	y	inpt_notify		pcb table rwlock for notify
84  *	p	inpcb_mtx		pcb mutex
85  *	L	pf_inp_mtx		link pf to inp mutex
86  *	s	so_lock			socket rwlock
87  */
88 
89 /*
90  * The pcb table mutex guarantees that all inpcb are consistent and
91  * that bind(2) and connect(2) create unique combinations of
92  * laddr/faddr/lport/fport/rtalbleid.  This mutex is used to protect
93  * both address consistency and inpcb lookup during protocol input.
94  * All writes to inp_[lf]addr take table mutex.  A per socket lock is
95  * needed, so that socket layer input have a consistent view at these
96  * values.
97  *
98  * In soconnect() and sosend() pcb mutex cannot be used.  They eventually
99  * can call IP output which takes pf lock which is a sleeping lock.
100  * Also connect(2) does a route lookup for source selection.  There
101  * route resolve happens, which creates a route, which sends a route
102  * message, which needs route lock, which is a rw-lock.
103  *
104  * On the other hand a mutex should be used in protocol input.  It
105  * does not make sense to do a process switch per packet.  Better spin
106  * until the packet can be processed.
107  *
108  * So there are three locks.  Table mutex is for writing inp_[lf]addr/port
109  * and lookup, socket rw-lock to separate sockets in system calls, and
110  * pcb mutex to protect socket receive buffer.  Changing inp_[lf]addr/port
111  * takes both per socket rw-lock and global table mutex.  Protocol
112  * input only reads inp_[lf]addr/port during lookup and is safe.  System
113  * call only reads when holding socket rw-lock and is safe.  The socket
114  * layer needs pcb mutex only in soreceive().
115  *
116  * Function pru_lock() grabs the pcb mutex and its existence indicates
117  * that a protocol is MP safe.  Otherwise the exclusive net lock is
118  * used.
119  */
120 
121 struct pf_state_key;
122 
123 union inpaddru {
124 	struct in6_addr iau_addr6;
125 	struct {
126 		uint8_t pad[12];
127 		struct in_addr inaddr;	/* easier transition */
128 	} iau_a4u;
129 };
130 
131 /*
132  * Common structure pcb for internet protocol implementation.
133  * Here are stored pointers to local and foreign host table
134  * entries, local and foreign socket numbers, and pointers
135  * up (to a socket structure) and down (to a protocol-specific)
136  * control block.
137  */
138 struct inpcb {
139 	LIST_ENTRY(inpcb) inp_hash;		/* [t] local and foreign hash */
140 	LIST_ENTRY(inpcb) inp_lhash;		/* [t] local port hash */
141 	TAILQ_ENTRY(inpcb) inp_queue;		/* [t] inet PCB queue */
142 	SIMPLEQ_ENTRY(inpcb) inp_notify;	/* [y] notify or udp append */
143 	struct	  inpcbtable *inp_table;	/* [I] inet queue/hash table */
144 	union	  inpaddru inp_faddru;		/* [t] Foreign address. */
145 	union	  inpaddru inp_laddru;		/* [t] Local address. */
146 #define	inp_faddr	inp_faddru.iau_a4u.inaddr
147 #define	inp_faddr6	inp_faddru.iau_addr6
148 #define	inp_laddr	inp_laddru.iau_a4u.inaddr
149 #define	inp_laddr6	inp_laddru.iau_addr6
150 	u_int16_t inp_fport;		/* [t] foreign port */
151 	u_int16_t inp_lport;		/* [t] local port */
152 	struct	  socket *inp_socket;	/* [I] back pointer to socket */
153 	caddr_t	  inp_ppcb;		/* pointer to per-protocol pcb */
154 	union {				/* Route (notice increased size). */
155 		struct route ru_route;
156 		struct route_in6 ru_route6;
157 	} inp_ru;
158 #define	inp_route	inp_ru.ru_route
159 #define	inp_route6	inp_ru.ru_route6
160 	struct    refcnt inp_refcnt;	/* refcount PCB, delay memory free */
161 	struct	  mutex inp_mtx;	/* protect PCB and socket members */
162 	int	  inp_flags;		/* generic IP/datagram flags */
163 	union {				/* Header prototype. */
164 		struct ip hu_ip;
165 		struct ip6_hdr hu_ipv6;
166 	} inp_hu;
167 #define	inp_ip		inp_hu.hu_ip
168 #define	inp_ipv6	inp_hu.hu_ipv6
169 	struct	  mbuf *inp_options;	/* IP options */
170 	struct ip6_pktopts *inp_outputopts6; /* IP6 options for outgoing packets */
171 	int inp_hops;
172 	union {
173 		struct ip_moptions *mou_mo;
174 		struct ip6_moptions *mou_mo6;
175 	} inp_mou;
176 #define inp_moptions inp_mou.mou_mo	/* [N] IPv4 multicast options */
177 #define inp_moptions6 inp_mou.mou_mo6	/* [N] IPv6 multicast options */
178 	u_char	  inp_seclevel[4];	/* [N] IPsec level of socket */
179 #define SL_AUTH           0             /* Authentication level */
180 #define SL_ESP_TRANS      1             /* ESP transport level */
181 #define SL_ESP_NETWORK    2             /* ESP network (encapsulation) level */
182 #define SL_IPCOMP         3             /* Compression level */
183 	u_char	inp_ip_minttl;		/* minimum TTL or drop */
184 #define inp_ip6_minhlim inp_ip_minttl	/* minimum Hop Limit or drop */
185 #define	inp_flowinfo	inp_hu.hu_ipv6.ip6_flow
186 
187 	int	inp_cksum6;
188 #ifndef _KERNEL
189 #define inp_csumoffset	inp_cksum6
190 #endif
191 	struct	icmp6_filter *inp_icmp6filt;
192 	struct	pf_state_key *inp_pf_sk; /* [L] */
193 	struct	mbuf *(*inp_upcall)(void *, struct mbuf *,
194 		    struct ip *, struct ip6_hdr *, void *, int);
195 	void	*inp_upcall_arg;
196 	u_int	inp_rtableid;		/* [t] */
197 	int	inp_pipex;		/* pipex indication */
198 	uint16_t inp_flowid;		/* [s] */
199 };
200 
201 LIST_HEAD(inpcbhead, inpcb);
202 
203 struct inpcbtable {
204 	struct mutex inpt_mtx;			/* protect queue and hash */
205 	struct rwlock inpt_notify;		/* protect inp_notify list */
206 	TAILQ_HEAD(inpthead, inpcb) inpt_queue;	/* [t] inet PCB queue */
207 	struct	inpcbhead *inpt_hashtbl;	/* [t] local and foreign hash */
208 	struct	inpcbhead *inpt_lhashtbl;	/* [t] local port hash */
209 	SIPHASH_KEY inpt_key, inpt_lkey;	/* [I] secrets for hashes */
210 	u_long	inpt_mask, inpt_lmask;		/* [t] hash masks */
211 	int	inpt_count, inpt_size;		/* [t] queue count, hash size */
212 };
213 
214 /* flags in inp_flags: */
215 #define	INP_RECVOPTS	0x001	/* receive incoming IP options */
216 #define	INP_RECVRETOPTS	0x002	/* receive IP options for reply */
217 #define	INP_RECVDSTADDR	0x004	/* receive IP dst address */
218 
219 #define	INP_RXDSTOPTS	INP_RECVOPTS
220 #define	INP_RXHOPOPTS	INP_RECVRETOPTS
221 #define	INP_RXINFO	INP_RECVDSTADDR
222 #define	INP_RXSRCRT	0x010
223 #define	INP_HOPLIMIT	0x020
224 
225 #define	INP_HDRINCL	0x008	/* user supplies entire IP header */
226 #define	INP_HIGHPORT	0x010	/* user wants "high" port binding */
227 #define	INP_LOWPORT	0x020	/* user wants "low" port binding */
228 #define	INP_RECVIF	0x080	/* receive incoming interface */
229 #define	INP_RECVTTL	0x040	/* receive incoming IP TTL */
230 #define	INP_RECVDSTPORT	0x200	/* receive IP dst addr before rdr */
231 #define	INP_RECVRTABLE	0x400	/* receive routing table */
232 #define	INP_IPSECFLOWINFO 0x800	/* receive IPsec flow info */
233 
234 #define	INP_CONTROLOPTS	(INP_RECVOPTS|INP_RECVRETOPTS|INP_RECVDSTADDR| \
235 	    INP_RXSRCRT|INP_HOPLIMIT|INP_RECVIF|INP_RECVTTL|INP_RECVDSTPORT| \
236 	    INP_RECVRTABLE)
237 
238 /*
239  * These flags' values should be determined by either the transport
240  * protocol at PRU_BIND, PRU_LISTEN, PRU_CONNECT, etc, or by in_pcb*().
241  */
242 #define INP_IPV6	0x100	/* socket, proto, domain, family is PF_INET6 */
243 
244 /*
245  * Flags in inp_flags for IPV6
246  */
247 #define IN6P_HIGHPORT		INP_HIGHPORT	/* user wants "high" port */
248 #define IN6P_LOWPORT		INP_LOWPORT	/* user wants "low" port */
249 #define IN6P_RECVDSTPORT	INP_RECVDSTPORT	/* receive IP dst addr before rdr */
250 #define IN6P_PKTINFO		0x010000 /* receive IP6 dst and I/F */
251 #define IN6P_HOPLIMIT		0x020000 /* receive hoplimit */
252 #define IN6P_HOPOPTS		0x040000 /* receive hop-by-hop options */
253 #define IN6P_DSTOPTS		0x080000 /* receive dst options after rthdr */
254 #define IN6P_RTHDR		0x100000 /* receive routing header */
255 #define IN6P_TCLASS		0x400000 /* receive traffic class value */
256 #define IN6P_AUTOFLOWLABEL	0x800000 /* attach flowlabel automatically */
257 
258 #define IN6P_ANONPORT		0x4000000 /* port chosen for user */
259 #define IN6P_RFC2292		0x40000000 /* used RFC2292 API on the socket */
260 #define IN6P_MTU		0x80000000 /* receive path MTU */
261 
262 #define IN6P_MINMTU		0x20000000 /* use minimum MTU */
263 
264 #define IN6P_CONTROLOPTS	(IN6P_PKTINFO|IN6P_HOPLIMIT|IN6P_HOPOPTS|\
265 				 IN6P_DSTOPTS|IN6P_RTHDR|\
266 				 IN6P_TCLASS|IN6P_AUTOFLOWLABEL|IN6P_RFC2292|\
267 				 IN6P_MTU|IN6P_RECVDSTPORT)
268 
269 #define	INPLOOKUP_WILDCARD	1
270 #define	INPLOOKUP_SETLOCAL	2
271 #define	INPLOOKUP_IPV6		4
272 
273 #define	sotoinpcb(so)	((struct inpcb *)(so)->so_pcb)
274 
275 /* macros for handling bitmap of ports not to allocate dynamically */
276 #define	DP_MAPBITS	(sizeof(u_int32_t) * NBBY)
277 #define	DP_MAPSIZE	(howmany(65536, DP_MAPBITS))
278 #define	DP_SET(m, p)	((m)[(p) / DP_MAPBITS] |= (1U << ((p) % DP_MAPBITS)))
279 #define	DP_CLR(m, p)	((m)[(p) / DP_MAPBITS] &= ~(1U << ((p) % DP_MAPBITS)))
280 #define	DP_ISSET(m, p)	((m)[(p) / DP_MAPBITS] & (1U << ((p) % DP_MAPBITS)))
281 
282 /* default values for baddynamicports [see ip_init()] */
283 #define	DEFBADDYNAMICPORTS_TCP	{ \
284 	587, 749, 750, 751, 853, 871, 2049, \
285 	6000, 6001, 6002, 6003, 6004, 6005, 6006, 6007, 6008, 6009, 6010, \
286 	0 }
287 #define	DEFBADDYNAMICPORTS_UDP	{ 623, 664, 749, 750, 751, 2049, \
288 	3784, 3785, 7784, /* BFD/S-BFD ports */ \
289 	 0 }
290 
291 #define DEFROOTONLYPORTS_TCP { \
292 	2049, \
293 	0 }
294 #define DEFROOTONLYPORTS_UDP { \
295 	2049, \
296 	0 }
297 
298 struct baddynamicports {
299 	u_int32_t tcp[DP_MAPSIZE];
300 	u_int32_t udp[DP_MAPSIZE];
301 };
302 
303 #ifdef _KERNEL
304 
305 #define IN_PCBLOCK_HOLD	1
306 #define IN_PCBLOCK_GRAB	2
307 
308 extern struct inpcbtable rawcbtable, rawin6pcbtable;
309 extern struct baddynamicports baddynamicports;
310 extern struct baddynamicports rootonlyports;
311 extern int in_pcbnotifymiss;
312 
313 void	 in_init(void);
314 void	 in_losing(struct inpcb *);
315 int	 in_pcballoc(struct socket *, struct inpcbtable *, int);
316 int	 in_pcbbind_locked(struct inpcb *, struct mbuf *, struct proc *);
317 int	 in_pcbbind(struct inpcb *, struct mbuf *, struct proc *);
318 int	 in_pcbaddrisavail(const struct inpcb *, struct sockaddr_in *, int,
319 	    struct proc *);
320 int	 in_pcbconnect(struct inpcb *, struct mbuf *);
321 void	 in_pcbdetach(struct inpcb *);
322 struct inpcb *
323 	 in_pcbref(struct inpcb *);
324 void	 in_pcbunref(struct inpcb *);
325 void	 in_pcbdisconnect(struct inpcb *);
326 struct inpcb *
327 	 in_pcblookup(struct inpcbtable *, struct in_addr,
328 			       u_int, struct in_addr, u_int, u_int);
329 struct inpcb *
330 	 in_pcblookup_listen(struct inpcbtable *, struct in_addr, u_int,
331 	    struct mbuf *, u_int);
332 #ifdef INET6
333 uint64_t in6_pcbhash(struct inpcbtable *, u_int, const struct in6_addr *,
334 	    u_short, const struct in6_addr *, u_short);
335 struct inpcb *
336 	 in6_pcblookup(struct inpcbtable *, const struct in6_addr *,
337 	    u_int, const struct in6_addr *, u_int, u_int);
338 struct inpcb *
339 	 in6_pcblookup_listen(struct inpcbtable *, struct in6_addr *, u_int,
340 	    struct mbuf *, u_int);
341 int	 in6_pcbaddrisavail_lock(const struct inpcb *, struct sockaddr_in6 *,
342 	    int, struct proc *, int);
343 int	 in6_pcbaddrisavail(const struct inpcb *, struct sockaddr_in6 *, int,
344 	    struct proc *);
345 int	 in6_pcbconnect(struct inpcb *, struct mbuf *);
346 void	 in6_setsockaddr(struct inpcb *, struct mbuf *);
347 void	 in6_setpeeraddr(struct inpcb *, struct mbuf *);
348 int	 in6_sockaddr(struct socket *, struct mbuf *);
349 int	 in6_peeraddr(struct socket *, struct mbuf *);
350 #endif /* INET6 */
351 void	 in_pcbinit(struct inpcbtable *, int);
352 struct inpcb *
353 	 in_pcblookup_local_lock(struct inpcbtable *, const void *, u_int, int,
354 	    u_int, int);
355 void	 in_pcbnotifyall(struct inpcbtable *, const struct sockaddr_in *,
356 	    u_int, int, void (*)(struct inpcb *, int));
357 void	 in_pcbrehash(struct inpcb *);
358 void	 in_rtchange(struct inpcb *, int);
359 void	 in_setpeeraddr(struct inpcb *, struct mbuf *);
360 void	 in_setsockaddr(struct inpcb *, struct mbuf *);
361 int	 in_sockaddr(struct socket *, struct mbuf *);
362 int	 in_peeraddr(struct socket *, struct mbuf *);
363 int	 in_baddynamic(u_int16_t, u_int16_t);
364 int	 in_rootonly(u_int16_t, u_int16_t);
365 int	 in_pcbselsrc(struct in_addr *, struct sockaddr_in *, struct inpcb *);
366 struct rtentry *
367 	in_pcbrtentry(struct inpcb *);
368 
369 /* INET6 stuff */
370 struct rtentry *
371 	in6_pcbrtentry(struct inpcb *);
372 void	in6_pcbnotify(struct inpcbtable *, const struct sockaddr_in6 *,
373 	u_int, const struct sockaddr_in6 *, u_int, u_int, int, void *,
374 	void (*)(struct inpcb *, int));
375 int	in6_selecthlim(const struct inpcb *);
376 int	in_pcbset_rtableid(struct inpcb *, u_int);
377 void	in_pcbset_laddr(struct inpcb *, const struct sockaddr *, u_int);
378 void	in_pcbunset_faddr(struct inpcb *);
379 void	in_pcbunset_laddr(struct inpcb *);
380 
381 #endif /* _KERNEL */
382 #endif /* _NETINET_IN_PCB_H_ */
383