xref: /freebsd/sys/sys/socketvar.h (revision b00ab754)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1982, 1986, 1990, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  *	@(#)socketvar.h	8.3 (Berkeley) 2/19/95
32  *
33  * $FreeBSD$
34  */
35 
36 #ifndef _SYS_SOCKETVAR_H_
37 #define _SYS_SOCKETVAR_H_
38 
39 /*
40  * Socket generation count type.  Also used in xinpcb, xtcpcb, xunpcb.
41  */
42 typedef uint64_t so_gen_t;
43 
44 #if defined(_KERNEL) || defined(_WANT_SOCKET)
45 #include <sys/queue.h>			/* for TAILQ macros */
46 #include <sys/selinfo.h>		/* for struct selinfo */
47 #include <sys/_lock.h>
48 #include <sys/_mutex.h>
49 #include <sys/osd.h>
50 #include <sys/_sx.h>
51 #include <sys/sockbuf.h>
52 #ifdef _KERNEL
53 #include <sys/caprights.h>
54 #include <sys/sockopt.h>
55 #endif
56 
57 struct vnet;
58 
59 /*
60  * Kernel structure per socket.
61  * Contains send and receive buffer queues,
62  * handle on protocol and pointer to protocol
63  * private data and error information.
64  */
65 typedef	int so_upcall_t(struct socket *, void *, int);
66 
67 struct socket;
68 
69 /*-
70  * Locking key to struct socket:
71  * (a) constant after allocation, no locking required.
72  * (b) locked by SOCK_LOCK(so).
73  * (cr) locked by SOCKBUF_LOCK(&so->so_rcv).
74  * (cs) locked by SOCKBUF_LOCK(&so->so_snd).
75  * (e) locked by SOLISTEN_LOCK() of corresponding listening socket.
76  * (f) not locked since integer reads/writes are atomic.
77  * (g) used only as a sleep/wakeup address, no value.
78  * (h) locked by global mutex so_global_mtx.
79  */
80 TAILQ_HEAD(accept_queue, socket);
81 struct socket {
82 	struct mtx	so_lock;
83 	volatile u_int	so_count;	/* (b / refcount) */
84 	struct selinfo	so_rdsel;	/* (b/cr) for so_rcv/so_comp */
85 	struct selinfo	so_wrsel;	/* (b/cs) for so_snd */
86 	short	so_type;		/* (a) generic type, see socket.h */
87 	short	so_options;		/* (b) from socket call, see socket.h */
88 	short	so_linger;		/* time to linger close(2) */
89 	short	so_state;		/* (b) internal state flags SS_* */
90 	void	*so_pcb;		/* protocol control block */
91 	struct	vnet *so_vnet;		/* (a) network stack instance */
92 	struct	protosw *so_proto;	/* (a) protocol handle */
93 	short	so_timeo;		/* (g) connection timeout */
94 	u_short	so_error;		/* (f) error affecting connection */
95 	struct	sigio *so_sigio;	/* [sg] information for async I/O or
96 					   out of band data (SIGURG) */
97 	struct	ucred *so_cred;		/* (a) user credentials */
98 	struct	label *so_label;	/* (b) MAC label for socket */
99 	/* NB: generation count must not be first. */
100 	so_gen_t so_gencnt;		/* (h) generation count */
101 	void	*so_emuldata;		/* (b) private data for emulators */
102 	struct	osd	osd;		/* Object Specific extensions */
103 	/*
104 	 * so_fibnum, so_user_cookie and friends can be used to attach
105 	 * some user-specified metadata to a socket, which then can be
106 	 * used by the kernel for various actions.
107 	 * so_user_cookie is used by ipfw/dummynet.
108 	 */
109 	int so_fibnum;		/* routing domain for this socket */
110 	uint32_t so_user_cookie;
111 
112 	int so_ts_clock;	/* type of the clock used for timestamps */
113 	uint32_t so_max_pacing_rate;	/* (f) TX rate limit in bytes/s */
114 	union {
115 		/* Regular (data flow) socket. */
116 		struct {
117 			/* (cr, cs) Receive and send buffers. */
118 			struct sockbuf		so_rcv, so_snd;
119 
120 			/* (e) Our place on accept queue. */
121 			TAILQ_ENTRY(socket)	so_list;
122 			struct socket		*so_listen;	/* (b) */
123 			enum {
124 				SQ_NONE = 0,
125 				SQ_INCOMP = 0x0800,	/* on sol_incomp */
126 				SQ_COMP = 0x1000,	/* on sol_comp */
127 			}			so_qstate;	/* (b) */
128 
129 			/* (b) cached MAC label for peer */
130 			struct	label		*so_peerlabel;
131 			u_long	so_oobmark;	/* chars to oob mark */
132 		};
133 		/*
134 		 * Listening socket, where accepts occur, is so_listen in all
135 		 * subsidiary sockets.  If so_listen is NULL, socket is not
136 		 * related to an accept.  For a listening socket itself
137 		 * sol_incomp queues partially completed connections, while
138 		 * sol_comp is a queue of connections ready to be accepted.
139 		 * If a connection is aborted and it has so_listen set, then
140 		 * it has to be pulled out of either sol_incomp or sol_comp.
141 		 * We allow connections to queue up based on current queue
142 		 * lengths and limit on number of queued connections for this
143 		 * socket.
144 		 */
145 		struct {
146 			/* (e) queue of partial unaccepted connections */
147 			struct accept_queue	sol_incomp;
148 			/* (e) queue of complete unaccepted connections */
149 			struct accept_queue	sol_comp;
150 			u_int	sol_qlen;    /* (e) sol_comp length */
151 			u_int	sol_incqlen; /* (e) sol_incomp length */
152 			u_int	sol_qlimit;  /* (e) queue limit */
153 
154 			/* accept_filter(9) optional data */
155 			struct	accept_filter	*sol_accept_filter;
156 			void	*sol_accept_filter_arg;	/* saved filter args */
157 			char	*sol_accept_filter_str;	/* saved user args */
158 
159 			/* Optional upcall, for kernel socket. */
160 			so_upcall_t	*sol_upcall;	/* (e) */
161 			void		*sol_upcallarg;	/* (e) */
162 
163 			/* Socket buffer parameters, to be copied to
164 			 * dataflow sockets, accepted from this one. */
165 			int		sol_sbrcv_lowat;
166 			int		sol_sbsnd_lowat;
167 			u_int		sol_sbrcv_hiwat;
168 			u_int		sol_sbsnd_hiwat;
169 			short		sol_sbrcv_flags;
170 			short		sol_sbsnd_flags;
171 			sbintime_t	sol_sbrcv_timeo;
172 			sbintime_t	sol_sbsnd_timeo;
173 		};
174 	};
175 };
176 #endif	/* defined(_KERNEL) || defined(_WANT_SOCKET) */
177 
178 /*
179  * Socket state bits.
180  *
181  * Historically, this bits were all kept in the so_state field.  For
182  * locking reasons, they are now in multiple fields, as they are
183  * locked differently.  so_state maintains basic socket state protected
184  * by the socket lock.  so_qstate holds information about the socket
185  * accept queues.  Each socket buffer also has a state field holding
186  * information relevant to that socket buffer (can't send, rcv).  Many
187  * fields will be read without locks to improve performance and avoid
188  * lock order issues.  However, this approach must be used with caution.
189  */
190 #define	SS_NOFDREF		0x0001	/* no file table ref any more */
191 #define	SS_ISCONNECTED		0x0002	/* socket connected to a peer */
192 #define	SS_ISCONNECTING		0x0004	/* in process of connecting to peer */
193 #define	SS_ISDISCONNECTING	0x0008	/* in process of disconnecting */
194 #define	SS_NBIO			0x0100	/* non-blocking ops */
195 #define	SS_ASYNC		0x0200	/* async i/o notify */
196 #define	SS_ISCONFIRMING		0x0400	/* deciding to accept connection req */
197 #define	SS_ISDISCONNECTED	0x2000	/* socket disconnected from peer */
198 
199 /*
200  * Protocols can mark a socket as SS_PROTOREF to indicate that, following
201  * pru_detach, they still want the socket to persist, and will free it
202  * themselves when they are done.  Protocols should only ever call sofree()
203  * following setting this flag in pru_detach(), and never otherwise, as
204  * sofree() bypasses socket reference counting.
205  */
206 #define	SS_PROTOREF		0x4000	/* strong protocol reference */
207 
208 #ifdef _KERNEL
209 
210 #define	SOCK_MTX(so)		&(so)->so_lock
211 #define	SOCK_LOCK(so)		mtx_lock(&(so)->so_lock)
212 #define	SOCK_OWNED(so)		mtx_owned(&(so)->so_lock)
213 #define	SOCK_UNLOCK(so)		mtx_unlock(&(so)->so_lock)
214 #define	SOCK_LOCK_ASSERT(so)	mtx_assert(&(so)->so_lock, MA_OWNED)
215 #define	SOCK_UNLOCK_ASSERT(so)	mtx_assert(&(so)->so_lock, MA_NOTOWNED)
216 
217 #define	SOLISTENING(sol)	(((sol)->so_options & SO_ACCEPTCONN) != 0)
218 #define	SOLISTEN_LOCK(sol)	do {					\
219 	mtx_lock(&(sol)->so_lock);					\
220 	KASSERT(SOLISTENING(sol),					\
221 	    ("%s: %p not listening", __func__, (sol)));			\
222 } while (0)
223 #define	SOLISTEN_TRYLOCK(sol)	mtx_trylock(&(sol)->so_lock)
224 #define	SOLISTEN_UNLOCK(sol)	do {					\
225 	KASSERT(SOLISTENING(sol),					\
226 	    ("%s: %p not listening", __func__, (sol)));			\
227 	mtx_unlock(&(sol)->so_lock);					\
228 } while (0)
229 #define	SOLISTEN_LOCK_ASSERT(sol)	do {				\
230 	mtx_assert(&(sol)->so_lock, MA_OWNED);				\
231 	KASSERT(SOLISTENING(sol),					\
232 	    ("%s: %p not listening", __func__, (sol)));			\
233 } while (0)
234 
235 /*
236  * Macros for sockets and socket buffering.
237  */
238 
239 /*
240  * Flags to sblock().
241  */
242 #define	SBL_WAIT	0x00000001	/* Wait if not immediately available. */
243 #define	SBL_NOINTR	0x00000002	/* Force non-interruptible sleep. */
244 #define	SBL_VALID	(SBL_WAIT | SBL_NOINTR)
245 
246 /*
247  * Do we need to notify the other side when I/O is possible?
248  */
249 #define	sb_notify(sb)	(((sb)->sb_flags & (SB_WAIT | SB_SEL | SB_ASYNC | \
250     SB_UPCALL | SB_AIO | SB_KNOTE)) != 0)
251 
252 /* do we have to send all at once on a socket? */
253 #define	sosendallatonce(so) \
254     ((so)->so_proto->pr_flags & PR_ATOMIC)
255 
256 /* can we read something from so? */
257 #define	soreadabledata(so) \
258 	(sbavail(&(so)->so_rcv) >= (so)->so_rcv.sb_lowat ||  (so)->so_error)
259 #define	soreadable(so) \
260 	(soreadabledata(so) || ((so)->so_rcv.sb_state & SBS_CANTRCVMORE))
261 
262 /* can we write something to so? */
263 #define	sowriteable(so) \
264     ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat && \
265 	(((so)->so_state&SS_ISCONNECTED) || \
266 	  ((so)->so_proto->pr_flags&PR_CONNREQUIRED)==0)) || \
267      ((so)->so_snd.sb_state & SBS_CANTSENDMORE) || \
268      (so)->so_error)
269 
270 /*
271  * soref()/sorele() ref-count the socket structure.
272  * soref() may be called without owning socket lock, but in that case a
273  * caller must own something that holds socket, and so_count must be not 0.
274  * Note that you must still explicitly close the socket, but the last ref
275  * count will free the structure.
276  */
277 #define	soref(so)	refcount_acquire(&(so)->so_count)
278 #define	sorele(so) do {							\
279 	SOCK_LOCK_ASSERT(so);						\
280 	if (refcount_release(&(so)->so_count))				\
281 		sofree(so);						\
282 	else								\
283 		SOCK_UNLOCK(so);					\
284 } while (0)
285 
286 /*
287  * In sorwakeup() and sowwakeup(), acquire the socket buffer lock to
288  * avoid a non-atomic test-and-wakeup.  However, sowakeup is
289  * responsible for releasing the lock if it is called.  We unlock only
290  * if we don't call into sowakeup.  If any code is introduced that
291  * directly invokes the underlying sowakeup() primitives, it must
292  * maintain the same semantics.
293  */
294 #define	sorwakeup_locked(so) do {					\
295 	SOCKBUF_LOCK_ASSERT(&(so)->so_rcv);				\
296 	if (sb_notify(&(so)->so_rcv))					\
297 		sowakeup((so), &(so)->so_rcv);	 			\
298 	else								\
299 		SOCKBUF_UNLOCK(&(so)->so_rcv);				\
300 } while (0)
301 
302 #define	sorwakeup(so) do {						\
303 	SOCKBUF_LOCK(&(so)->so_rcv);					\
304 	sorwakeup_locked(so);						\
305 } while (0)
306 
307 #define	sowwakeup_locked(so) do {					\
308 	SOCKBUF_LOCK_ASSERT(&(so)->so_snd);				\
309 	if (sb_notify(&(so)->so_snd))					\
310 		sowakeup((so), &(so)->so_snd); 				\
311 	else								\
312 		SOCKBUF_UNLOCK(&(so)->so_snd);				\
313 } while (0)
314 
315 #define	sowwakeup(so) do {						\
316 	SOCKBUF_LOCK(&(so)->so_snd);					\
317 	sowwakeup_locked(so);						\
318 } while (0)
319 
320 struct accept_filter {
321 	char	accf_name[16];
322 	int	(*accf_callback)
323 		(struct socket *so, void *arg, int waitflag);
324 	void *	(*accf_create)
325 		(struct socket *so, char *arg);
326 	void	(*accf_destroy)
327 		(struct socket *so);
328 	SLIST_ENTRY(accept_filter) accf_next;
329 };
330 
331 #ifdef MALLOC_DECLARE
332 MALLOC_DECLARE(M_ACCF);
333 MALLOC_DECLARE(M_PCB);
334 MALLOC_DECLARE(M_SONAME);
335 #endif
336 
337 /*
338  * Socket specific helper hook point identifiers
339  * Do not leave holes in the sequence, hook registration is a loop.
340  */
341 #define HHOOK_SOCKET_OPT		0
342 #define HHOOK_SOCKET_CREATE		1
343 #define HHOOK_SOCKET_RCV 		2
344 #define HHOOK_SOCKET_SND		3
345 #define HHOOK_FILT_SOREAD		4
346 #define HHOOK_FILT_SOWRITE		5
347 #define HHOOK_SOCKET_CLOSE		6
348 #define HHOOK_SOCKET_LAST		HHOOK_SOCKET_CLOSE
349 
350 struct socket_hhook_data {
351 	struct socket	*so;
352 	struct mbuf	*m;
353 	void		*hctx;		/* hook point specific data*/
354 	int		status;
355 };
356 
357 extern int	maxsockets;
358 extern u_long	sb_max;
359 extern so_gen_t so_gencnt;
360 
361 struct file;
362 struct filecaps;
363 struct filedesc;
364 struct mbuf;
365 struct sockaddr;
366 struct ucred;
367 struct uio;
368 
369 /* 'which' values for socket upcalls. */
370 #define	SO_RCV		1
371 #define	SO_SND		2
372 
373 /* Return values for socket upcalls. */
374 #define	SU_OK		0
375 #define	SU_ISCONNECTED	1
376 
377 /*
378  * From uipc_socket and friends
379  */
380 int	getsockaddr(struct sockaddr **namp, caddr_t uaddr, size_t len);
381 int	getsock_cap(struct thread *td, int fd, cap_rights_t *rightsp,
382 	    struct file **fpp, u_int *fflagp, struct filecaps *havecaps);
383 void	soabort(struct socket *so);
384 int	soaccept(struct socket *so, struct sockaddr **nam);
385 void	soaio_enqueue(struct task *task);
386 void	soaio_rcv(void *context, int pending);
387 void	soaio_snd(void *context, int pending);
388 int	socheckuid(struct socket *so, uid_t uid);
389 int	sobind(struct socket *so, struct sockaddr *nam, struct thread *td);
390 int	sobindat(int fd, struct socket *so, struct sockaddr *nam,
391 	    struct thread *td);
392 int	soclose(struct socket *so);
393 int	soconnect(struct socket *so, struct sockaddr *nam, struct thread *td);
394 int	soconnectat(int fd, struct socket *so, struct sockaddr *nam,
395 	    struct thread *td);
396 int	soconnect2(struct socket *so1, struct socket *so2);
397 int	socreate(int dom, struct socket **aso, int type, int proto,
398 	    struct ucred *cred, struct thread *td);
399 int	sodisconnect(struct socket *so);
400 struct	sockaddr *sodupsockaddr(const struct sockaddr *sa, int mflags);
401 void	sofree(struct socket *so);
402 void	sohasoutofband(struct socket *so);
403 int	solisten(struct socket *so, int backlog, struct thread *td);
404 void	solisten_proto(struct socket *so, int backlog);
405 int	solisten_proto_check(struct socket *so);
406 int	solisten_dequeue(struct socket *, struct socket **, int);
407 struct socket *
408 	sonewconn(struct socket *head, int connstatus);
409 struct socket *
410 	sopeeloff(struct socket *);
411 int	sopoll(struct socket *so, int events, struct ucred *active_cred,
412 	    struct thread *td);
413 int	sopoll_generic(struct socket *so, int events,
414 	    struct ucred *active_cred, struct thread *td);
415 int	soreceive(struct socket *so, struct sockaddr **paddr, struct uio *uio,
416 	    struct mbuf **mp0, struct mbuf **controlp, int *flagsp);
417 int	soreceive_stream(struct socket *so, struct sockaddr **paddr,
418 	    struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
419 	    int *flagsp);
420 int	soreceive_dgram(struct socket *so, struct sockaddr **paddr,
421 	    struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
422 	    int *flagsp);
423 int	soreceive_generic(struct socket *so, struct sockaddr **paddr,
424 	    struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
425 	    int *flagsp);
426 int	soreserve(struct socket *so, u_long sndcc, u_long rcvcc);
427 void	sorflush(struct socket *so);
428 int	sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
429 	    struct mbuf *top, struct mbuf *control, int flags,
430 	    struct thread *td);
431 int	sosend_dgram(struct socket *so, struct sockaddr *addr,
432 	    struct uio *uio, struct mbuf *top, struct mbuf *control,
433 	    int flags, struct thread *td);
434 int	sosend_generic(struct socket *so, struct sockaddr *addr,
435 	    struct uio *uio, struct mbuf *top, struct mbuf *control,
436 	    int flags, struct thread *td);
437 int	soshutdown(struct socket *so, int how);
438 void	soupcall_clear(struct socket *, int);
439 void	soupcall_set(struct socket *, int, so_upcall_t, void *);
440 void	solisten_upcall_set(struct socket *, so_upcall_t, void *);
441 void	sowakeup(struct socket *so, struct sockbuf *sb);
442 void	sowakeup_aio(struct socket *so, struct sockbuf *sb);
443 void	solisten_wakeup(struct socket *);
444 int	selsocket(struct socket *so, int events, struct timeval *tv,
445 	    struct thread *td);
446 void	soisconnected(struct socket *so);
447 void	soisconnecting(struct socket *so);
448 void	soisdisconnected(struct socket *so);
449 void	soisdisconnecting(struct socket *so);
450 void	socantrcvmore(struct socket *so);
451 void	socantrcvmore_locked(struct socket *so);
452 void	socantsendmore(struct socket *so);
453 void	socantsendmore_locked(struct socket *so);
454 
455 /*
456  * Accept filter functions (duh).
457  */
458 int	accept_filt_add(struct accept_filter *filt);
459 int	accept_filt_del(char *name);
460 struct	accept_filter *accept_filt_get(char *name);
461 #ifdef ACCEPT_FILTER_MOD
462 #ifdef SYSCTL_DECL
463 SYSCTL_DECL(_net_inet_accf);
464 #endif
465 int	accept_filt_generic_mod_event(module_t mod, int event, void *data);
466 #endif
467 
468 #endif /* _KERNEL */
469 
470 /*
471  * Structure to export socket from kernel to utilities, via sysctl(3).
472  */
473 struct xsocket {
474 	size_t		xso_len;	/* length of this structure */
475 	union {
476 		void	*xso_so;	/* kernel address of struct socket */
477 		int64_t ph_so;
478 	};
479 	union {
480 		void 	*so_pcb;	/* kernel address of struct inpcb */
481 		int64_t ph_pcb;
482 	};
483 	uint64_t	so_oobmark;
484 	int64_t		so_spare64[8];
485 	int32_t		xso_protocol;
486 	int32_t		xso_family;
487 	uint32_t	so_qlen;
488 	uint32_t	so_incqlen;
489 	uint32_t	so_qlimit;
490 	pid_t		so_pgid;
491 	uid_t		so_uid;
492 	int32_t		so_spare32[8];
493 	int16_t		so_type;
494 	int16_t		so_options;
495 	int16_t		so_linger;
496 	int16_t		so_state;
497 	int16_t		so_timeo;
498 	uint16_t	so_error;
499 	struct xsockbuf {
500 		uint32_t	sb_cc;
501 		uint32_t	sb_hiwat;
502 		uint32_t	sb_mbcnt;
503 		uint32_t	sb_mcnt;
504 		uint32_t	sb_ccnt;
505 		uint32_t	sb_mbmax;
506 		int32_t		sb_lowat;
507 		int32_t		sb_timeo;
508 		int16_t		sb_flags;
509 	} so_rcv, so_snd;
510 };
511 
512 #ifdef _KERNEL
513 void	sotoxsocket(struct socket *so, struct xsocket *xso);
514 void	sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb);
515 #endif
516 
517 /*
518  * Socket buffer state bits.  Exported via libprocstat(3).
519  */
520 #define	SBS_CANTSENDMORE	0x0010	/* can't send more data to peer */
521 #define	SBS_CANTRCVMORE		0x0020	/* can't receive more data from peer */
522 #define	SBS_RCVATMARK		0x0040	/* at mark on input */
523 
524 #endif /* !_SYS_SOCKETVAR_H_ */
525