xref: /openbsd/sys/sys/socketvar.h (revision e3b7649a)
1 /*	$OpenBSD: socketvar.h,v 1.134 2024/09/09 07:38:45 mvs Exp $	*/
2 /*	$NetBSD: socketvar.h,v 1.18 1996/02/09 18:25:38 christos Exp $	*/
3 
4 /*-
5  * Copyright (c) 1982, 1986, 1990, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)socketvar.h	8.1 (Berkeley) 6/2/93
33  */
34 
35 #ifndef _SYS_SOCKETVAR_H_
36 #define _SYS_SOCKETVAR_H_
37 
38 #include <sys/event.h>
39 #include <sys/queue.h>
40 #include <sys/sigio.h>				/* for struct sigio_ref */
41 #include <sys/task.h>
42 #include <sys/timeout.h>
43 #include <sys/mutex.h>
44 #include <sys/rwlock.h>
45 #include <sys/refcnt.h>
46 
47 #ifndef	_SOCKLEN_T_DEFINED_
48 #define	_SOCKLEN_T_DEFINED_
49 typedef	__socklen_t	socklen_t;	/* length type for network syscalls */
50 #endif
51 
52 TAILQ_HEAD(soqhead, socket);
53 
54 /*
55  * Locks used to protect global data and struct members:
56  *	I	immutable after creation
57  *	a	atomic
58  *	mr	sb_mxt of so_rcv buffer
59  *	ms	sb_mtx of so_snd buffer
60  *	m	sb_mtx
61  *	br	sblock() of so_rcv buffer
62  *	bs	sblock() od so_snd buffer
63  *	s	solock()
64  */
65 
66 /*
67  * XXXSMP: tcp(4) sockets rely on exclusive solock() for all the cases.
68  */
69 
70 /*
71  * Variables for socket splicing, allocated only when needed.
72  */
73 struct sosplice {
74 	struct	socket *ssp_socket;	/* [mr ms] send data to drain socket */
75 	struct	socket *ssp_soback;	/* [ms ms] back ref to source socket */
76 	off_t	ssp_len;		/* [mr] number of bytes spliced */
77 	off_t	ssp_max;		/* [I] maximum number of bytes */
78 	struct	timeval ssp_idletv;	/* [I] idle timeout */
79 	struct	timeout ssp_idleto;
80 	struct	task ssp_task;		/* task for somove */
81 };
82 
83 /*
84  * Variables for socket buffering.
85  */
86 struct sockbuf {
87 	struct rwlock sb_lock;
88 	struct mutex  sb_mtx;
89 /* The following fields are all zeroed on flush. */
90 #define	sb_startzero	sb_cc
91 	u_long	sb_cc;			/* [m] actual chars in buffer */
92 	u_long	sb_datacc;		/* [m] data only chars in buffer */
93 	u_long	sb_hiwat;		/* [m] max actual char count */
94 	u_long  sb_wat;			/* [m] default watermark */
95 	u_long	sb_mbcnt;		/* [m] chars of mbufs used */
96 	u_long	sb_mbmax;		/* [m] max chars of mbufs to use */
97 	long	sb_lowat;		/* [m] low water mark */
98 	struct mbuf *sb_mb;		/* [m] the mbuf chain */
99 	struct mbuf *sb_mbtail;		/* [m] the last mbuf in the chain */
100 	struct mbuf *sb_lastrecord;	/* [m] first mbuf of last record in
101 					    socket buffer */
102 	short	sb_flags;		/* [m] flags, see below */
103 /* End area that is zeroed on flush. */
104 #define	sb_endzero	sb_flags
105 	short	sb_state;		/* [m] socket state on sockbuf */
106 	uint64_t sb_timeo_nsecs;	/* [m] timeout for read/write */
107 	struct klist sb_klist;		/* [m] list of knotes */
108 };
109 
110 #define SB_MAX		(2*1024*1024)	/* default for max chars in sockbuf */
111 #define SB_WAIT		0x0001		/* someone is waiting for data/space */
112 #define SB_ASYNC	0x0002		/* ASYNC I/O, need signals */
113 #define SB_SPLICE	0x0004		/* buffer is splice source or drain */
114 #define SB_NOINTR	0x0008		/* operations not interruptible */
115 #define SB_MTXLOCK	0x0010		/* sblock() doesn't need solock() */
116 
117 /*
118  * Kernel structure per socket.
119  * Contains send and receive buffer queues,
120  * handle on protocol and pointer to protocol
121  * private data and error information.
122  */
123 struct socket {
124 	const struct protosw *so_proto;	/* [I] protocol handle */
125 	struct rwlock so_lock;		/* this socket lock */
126 	struct refcnt so_refcnt;	/* references to this socket */
127 	void	*so_pcb;		/* [s] protocol control block */
128 	u_int	so_state;		/* [s] internal state flags SS_*,
129 					    see below */
130 	short	so_type;		/* [I] generic type, see socket.h */
131 	short	so_options;		/* [s] from socket call, see
132 					    socket.h */
133 	short	so_linger;		/* [s] time to linger while closing */
134 /*
135  * Variables for connection queueing.
136  * Socket where accepts occur is so_head in all subsidiary sockets.
137  * If so_head is 0, socket is not related to an accept.
138  * For head socket so_q0 queues partially completed connections,
139  * while so_q is a queue of connections ready to be accepted.
140  * If a connection is aborted and it has so_head set, then
141  * it has to be pulled out of either so_q0 or so_q.
142  * We allow connections to queue up based on current queue lengths
143  * and limit on number of queued connections for this socket.
144  *
145  * Connections queue relies on both socket locks of listening and
146  * unaccepted sockets. Socket lock of listening socket should be
147  * always taken first.
148  */
149 	struct	socket	*so_head;	/* [s] back pointer to accept socket */
150 	struct	soqhead	*so_onq;	/* [s] queue (q or q0) that we're on */
151 	struct	soqhead	so_q0;		/* [s] queue of partial connections */
152 	struct	soqhead	so_q;		/* [s] queue of incoming connections */
153 	struct	sigio_ref so_sigio;	/* async I/O registration */
154 	TAILQ_ENTRY(socket) so_qe;	/* [s] our queue entry (q or q0) */
155 	short	so_q0len;		/* [s] partials on so_q0 */
156 	short	so_qlen;		/* [s] number of connections on so_q */
157 	short	so_qlimit;		/* [s] max number queued connections */
158 	short	so_timeo;		/* [s] connection timeout */
159 	u_long	so_oobmark;		/* [mr] chars to oob mark */
160 	u_int	so_error;		/* [a] error affecting connection */
161 
162 	struct sosplice *so_sp;		/* [s br] */
163 
164 	struct sockbuf so_rcv;
165 	struct sockbuf so_snd;
166 
167 	void	(*so_upcall)(struct socket *, caddr_t, int); /* [s] */
168 	caddr_t	so_upcallarg;		/* [s] Arg for above */
169 	uid_t	so_euid;		/* [I] who opened the socket */
170 	uid_t	so_ruid;		/* [I] */
171 	gid_t	so_egid;		/* [I] */
172 	gid_t	so_rgid;		/* [I] */
173 	pid_t	so_cpid;		/* [I] pid of process that opened
174 					    socket */
175 };
176 
177 /*
178  * Socket state bits.
179  *
180  * NOTE: The following states should be used with corresponding socket's
181  * buffer `sb_state' only:
182  *
183  *	SS_CANTSENDMORE		with `so_snd'
184  *	SS_ISSENDING		with `so_snd'
185  *	SS_CANTRCVMORE		with `so_rcv'
186  *	SS_RCVATMARK		with `so_rcv'
187  */
188 
189 #define	SS_NOFDREF		0x001	/* no file table ref any more */
190 #define	SS_ISCONNECTED		0x002	/* socket connected to a peer */
191 #define	SS_ISCONNECTING		0x004	/* in process of connecting to peer */
192 #define	SS_ISDISCONNECTING	0x008	/* in process of disconnecting */
193 #define	SS_CANTSENDMORE		0x010	/* can't send more data to peer */
194 #define	SS_CANTRCVMORE		0x020	/* can't receive more data from peer */
195 #define	SS_RCVATMARK		0x040	/* at mark on input */
196 #define	SS_ISDISCONNECTED	0x800	/* socket disconnected from peer */
197 
198 #define	SS_PRIV			0x080	/* privileged for broadcast, raw... */
199 #define	SS_CONNECTOUT		0x1000	/* connect, not accept, at this end */
200 #define	SS_ISSENDING		0x2000	/* hint for lower layer */
201 #define	SS_DNS			0x4000	/* created using SOCK_DNS socket(2) */
202 #define	SS_YP			0x8000	/* created using ypconnect(2) */
203 
204 #ifdef _KERNEL
205 
206 #include <sys/protosw.h>
207 #include <lib/libkern/libkern.h>
208 
209 void	soassertlocked(struct socket *);
210 void	soassertlocked_readonly(struct socket *);
211 
212 static inline void
soref(struct socket * so)213 soref(struct socket *so)
214 {
215 	refcnt_take(&so->so_refcnt);
216 }
217 
218 static inline void
sorele(struct socket * so)219 sorele(struct socket *so)
220 {
221 	refcnt_rele_wake(&so->so_refcnt);
222 }
223 
224 /*
225  * Macros for sockets and socket buffering.
226  */
227 
228 #define isspliced(so)		((so)->so_sp && (so)->so_sp->ssp_socket)
229 #define issplicedback(so)	((so)->so_sp && (so)->so_sp->ssp_soback)
230 
231 static inline void
sb_mtx_lock(struct sockbuf * sb)232 sb_mtx_lock(struct sockbuf *sb)
233 {
234 	if (sb->sb_flags & SB_MTXLOCK)
235 		mtx_enter(&sb->sb_mtx);
236 }
237 
238 static inline void
sb_mtx_unlock(struct sockbuf * sb)239 sb_mtx_unlock(struct sockbuf *sb)
240 {
241 	if (sb->sb_flags & SB_MTXLOCK)
242 		mtx_leave(&sb->sb_mtx);
243 }
244 
245 void	sbmtxassertlocked(struct socket *so, struct sockbuf *);
246 
247 /*
248  * Do we need to notify the other side when I/O is possible?
249  */
250 static inline int
sb_notify(struct socket * so,struct sockbuf * sb)251 sb_notify(struct socket *so, struct sockbuf *sb)
252 {
253 	int rv;
254 
255 	soassertlocked(so);
256 
257 	mtx_enter(&sb->sb_mtx);
258 	rv = ((sb->sb_flags & (SB_WAIT|SB_ASYNC|SB_SPLICE)) != 0 ||
259 	    !klist_empty(&sb->sb_klist));
260 	mtx_leave(&sb->sb_mtx);
261 
262 	return rv;
263 }
264 
265 /*
266  * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
267  * This is problematical if the fields are unsigned, as the space might
268  * still be negative (cc > hiwat or mbcnt > mbmax).  Should detect
269  * overflow and return 0.
270  */
271 
272 static inline long
sbspace_locked(struct socket * so,struct sockbuf * sb)273 sbspace_locked(struct socket *so, struct sockbuf *sb)
274 {
275 	if (sb->sb_flags & SB_MTXLOCK)
276 		sbmtxassertlocked(so, sb);
277 	else
278 		soassertlocked_readonly(so);
279 
280 	return lmin(sb->sb_hiwat - sb->sb_cc, sb->sb_mbmax - sb->sb_mbcnt);
281 }
282 
283 static inline long
sbspace(struct socket * so,struct sockbuf * sb)284 sbspace(struct socket *so, struct sockbuf *sb)
285 {
286 	long ret;
287 
288 	sb_mtx_lock(sb);
289 	ret = sbspace_locked(so, sb);
290 	sb_mtx_unlock(sb);
291 
292 	return ret;
293 }
294 
295 /* do we have to send all at once on a socket? */
296 #define	sosendallatonce(so) \
297     ((so)->so_proto->pr_flags & PR_ATOMIC)
298 
299 /* are we sending on this socket? */
300 #define	soissending(so) \
301     ((so)->so_snd.sb_state & SS_ISSENDING)
302 
303 /* can we read something from so? */
304 static inline int
soreadable(struct socket * so)305 soreadable(struct socket *so)
306 {
307 	soassertlocked_readonly(so);
308 	if (isspliced(so))
309 		return 0;
310 	return (so->so_rcv.sb_state & SS_CANTRCVMORE) || so->so_qlen ||
311 	    so->so_error || so->so_rcv.sb_cc >= so->so_rcv.sb_lowat;
312 }
313 
314 /* can we write something to so? */
315 static inline int
sowriteable(struct socket * so)316 sowriteable(struct socket *so)
317 {
318 	soassertlocked_readonly(so);
319 	return ((sbspace(so, &so->so_snd) >= so->so_snd.sb_lowat &&
320 	    ((so->so_state & SS_ISCONNECTED) ||
321 	    (so->so_proto->pr_flags & PR_CONNREQUIRED)==0)) ||
322 	    (so->so_snd.sb_state & SS_CANTSENDMORE) || so->so_error);
323 }
324 
325 /* adjust counters in sb reflecting allocation of m */
326 static inline void
sballoc(struct socket * so,struct sockbuf * sb,struct mbuf * m)327 sballoc(struct socket *so, struct sockbuf *sb, struct mbuf *m)
328 {
329 	sb->sb_cc += m->m_len;
330 	if (m->m_type != MT_CONTROL && m->m_type != MT_SONAME)
331 		sb->sb_datacc += m->m_len;
332 	sb->sb_mbcnt += MSIZE;
333 	if (m->m_flags & M_EXT)
334 		sb->sb_mbcnt += m->m_ext.ext_size;
335 }
336 
337 /* adjust counters in sb reflecting freeing of m */
338 static inline void
sbfree(struct socket * so,struct sockbuf * sb,struct mbuf * m)339 sbfree(struct socket *so, struct sockbuf *sb, struct mbuf *m)
340 {
341 	sb->sb_cc -= m->m_len;
342 	if (m->m_type != MT_CONTROL && m->m_type != MT_SONAME)
343 		sb->sb_datacc -= m->m_len;
344 	sb->sb_mbcnt -= MSIZE;
345 	if (m->m_flags & M_EXT)
346 		sb->sb_mbcnt -= m->m_ext.ext_size;
347 }
348 
349 /*
350  * Flags to sblock()
351  */
352 #define SBL_WAIT	0x01	/* Wait if lock not immediately available. */
353 #define SBL_NOINTR	0x02	/* Enforce non-interruptible sleep. */
354 
355 /*
356  * Set lock on sockbuf sb; sleep if lock is already held.
357  * Unless SB_NOINTR is set on sockbuf or SBL_NOINTR passed,
358  * sleep is interruptible. Returns error without lock if
359  * sleep is interrupted.
360  */
361 int sblock(struct sockbuf *, int);
362 
363 /* release lock on sockbuf sb */
364 void sbunlock(struct sockbuf *);
365 
366 static inline void
sbassertlocked(struct sockbuf * sb)367 sbassertlocked(struct sockbuf *sb)
368 {
369 	rw_assert_wrlock(&sb->sb_lock);
370 }
371 
372 #define	SB_EMPTY_FIXUP(sb) do {						\
373 	if ((sb)->sb_mb == NULL) {					\
374 		(sb)->sb_mbtail = NULL;					\
375 		(sb)->sb_lastrecord = NULL;				\
376 	}								\
377 } while (/*CONSTCOND*/0)
378 
379 extern u_long sb_max;
380 
381 extern struct pool	socket_pool;
382 
383 struct mbuf;
384 struct sockaddr;
385 struct proc;
386 struct msghdr;
387 struct stat;
388 struct knote;
389 
390 /*
391  * File operations on sockets.
392  */
393 int	soo_read(struct file *, struct uio *, int);
394 int	soo_write(struct file *, struct uio *, int);
395 int	soo_ioctl(struct file *, u_long, caddr_t, struct proc *);
396 int	soo_kqfilter(struct file *, struct knote *);
397 int 	soo_close(struct file *, struct proc *);
398 int	soo_stat(struct file *, struct stat *, struct proc *);
399 void	sbappend(struct socket *, struct sockbuf *, struct mbuf *);
400 void	sbappendstream(struct socket *, struct sockbuf *, struct mbuf *);
401 int	sbappendaddr(struct socket *, struct sockbuf *,
402 	    const struct sockaddr *, struct mbuf *, struct mbuf *);
403 int	sbappendcontrol(struct socket *, struct sockbuf *, struct mbuf *,
404 	    struct mbuf *);
405 void	sbappendrecord(struct socket *, struct sockbuf *, struct mbuf *);
406 void	sbcompress(struct socket *, struct sockbuf *, struct mbuf *,
407 	    struct mbuf *);
408 struct mbuf *
409 	sbcreatecontrol(const void *, size_t, int, int);
410 void	sbdrop(struct socket *, struct sockbuf *, int);
411 void	sbdroprecord(struct socket *, struct sockbuf *);
412 void	sbflush(struct socket *, struct sockbuf *);
413 void	sbrelease(struct socket *, struct sockbuf *);
414 int	sbcheckreserve(u_long, u_long);
415 int	sbchecklowmem(void);
416 int	sbreserve(struct socket *, struct sockbuf *, u_long);
417 int	sbwait(struct socket *, struct sockbuf *);
418 void	soinit(void);
419 void	soabort(struct socket *);
420 int	soaccept(struct socket *, struct mbuf *);
421 int	sobind(struct socket *, struct mbuf *, struct proc *);
422 void	socantrcvmore(struct socket *);
423 void	socantsendmore(struct socket *);
424 int	soclose(struct socket *, int);
425 int	soconnect(struct socket *, struct mbuf *);
426 int	soconnect2(struct socket *, struct socket *);
427 int	socreate(int, struct socket **, int, int);
428 int	sodisconnect(struct socket *);
429 struct socket *soalloc(const struct protosw *, int);
430 void	sofree(struct socket *, int);
431 int	sogetopt(struct socket *, int, int, struct mbuf *);
432 void	sohasoutofband(struct socket *);
433 void	soisconnected(struct socket *);
434 void	soisconnecting(struct socket *);
435 void	soisdisconnected(struct socket *);
436 void	soisdisconnecting(struct socket *);
437 int	solisten(struct socket *, int);
438 struct socket *sonewconn(struct socket *, int, int);
439 void	soqinsque(struct socket *, struct socket *, int);
440 int	soqremque(struct socket *, int);
441 int	soreceive(struct socket *, struct mbuf **, struct uio *,
442 	    struct mbuf **, struct mbuf **, int *, socklen_t);
443 int	soreserve(struct socket *, u_long, u_long);
444 int	sosend(struct socket *, struct mbuf *, struct uio *,
445 	    struct mbuf *, struct mbuf *, int);
446 int	sosetopt(struct socket *, int, int, struct mbuf *);
447 int	soshutdown(struct socket *, int);
448 void	sowakeup(struct socket *, struct sockbuf *);
449 void	sorwakeup(struct socket *);
450 void	sowwakeup(struct socket *);
451 int	sockargs(struct mbuf **, const void *, size_t, int);
452 
453 int	sosleep_nsec(struct socket *, void *, int, const char *, uint64_t);
454 void	solock(struct socket *);
455 void	solock_shared(struct socket *);
456 int	solock_persocket(struct socket *);
457 void	solock_pair(struct socket *, struct socket *);
458 void	sounlock(struct socket *);
459 void	sounlock_shared(struct socket *);
460 
461 int	sendit(struct proc *, int, struct msghdr *, int, register_t *);
462 int	recvit(struct proc *, int, struct msghdr *, caddr_t, register_t *);
463 int	doaccept(struct proc *, int, struct sockaddr *, socklen_t *, int,
464 	    register_t *);
465 
466 #ifdef SOCKBUF_DEBUG
467 void	sblastrecordchk(struct sockbuf *, const char *);
468 #define	SBLASTRECORDCHK(sb, where)	sblastrecordchk((sb), (where))
469 
470 void	sblastmbufchk(struct sockbuf *, const char *);
471 #define	SBLASTMBUFCHK(sb, where)	sblastmbufchk((sb), (where))
472 void	sbcheck(struct socket *, struct sockbuf *);
473 #define	SBCHECK(so, sb)			sbcheck((so), (sb))
474 #else
475 #define	SBLASTRECORDCHK(sb, where)	/* nothing */
476 #define	SBLASTMBUFCHK(sb, where)	/* nothing */
477 #define	SBCHECK(so, sb)			/* nothing */
478 #endif /* SOCKBUF_DEBUG */
479 
480 #endif /* _KERNEL */
481 
482 #endif /* _SYS_SOCKETVAR_H_ */
483