1 /* $OpenBSD: socketvar.h,v 1.134 2024/09/09 07:38:45 mvs Exp $ */
2 /* $NetBSD: socketvar.h,v 1.18 1996/02/09 18:25:38 christos Exp $ */
3
4 /*-
5 * Copyright (c) 1982, 1986, 1990, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)socketvar.h 8.1 (Berkeley) 6/2/93
33 */
34
35 #ifndef _SYS_SOCKETVAR_H_
36 #define _SYS_SOCKETVAR_H_
37
38 #include <sys/event.h>
39 #include <sys/queue.h>
40 #include <sys/sigio.h> /* for struct sigio_ref */
41 #include <sys/task.h>
42 #include <sys/timeout.h>
43 #include <sys/mutex.h>
44 #include <sys/rwlock.h>
45 #include <sys/refcnt.h>
46
47 #ifndef _SOCKLEN_T_DEFINED_
48 #define _SOCKLEN_T_DEFINED_
49 typedef __socklen_t socklen_t; /* length type for network syscalls */
50 #endif
51
52 TAILQ_HEAD(soqhead, socket);
53
54 /*
55 * Locks used to protect global data and struct members:
56 * I immutable after creation
57 * a atomic
58 * mr sb_mxt of so_rcv buffer
59 * ms sb_mtx of so_snd buffer
60 * m sb_mtx
61 * br sblock() of so_rcv buffer
62 * bs sblock() od so_snd buffer
63 * s solock()
64 */
65
66 /*
67 * XXXSMP: tcp(4) sockets rely on exclusive solock() for all the cases.
68 */
69
70 /*
71 * Variables for socket splicing, allocated only when needed.
72 */
73 struct sosplice {
74 struct socket *ssp_socket; /* [mr ms] send data to drain socket */
75 struct socket *ssp_soback; /* [ms ms] back ref to source socket */
76 off_t ssp_len; /* [mr] number of bytes spliced */
77 off_t ssp_max; /* [I] maximum number of bytes */
78 struct timeval ssp_idletv; /* [I] idle timeout */
79 struct timeout ssp_idleto;
80 struct task ssp_task; /* task for somove */
81 };
82
83 /*
84 * Variables for socket buffering.
85 */
86 struct sockbuf {
87 struct rwlock sb_lock;
88 struct mutex sb_mtx;
89 /* The following fields are all zeroed on flush. */
90 #define sb_startzero sb_cc
91 u_long sb_cc; /* [m] actual chars in buffer */
92 u_long sb_datacc; /* [m] data only chars in buffer */
93 u_long sb_hiwat; /* [m] max actual char count */
94 u_long sb_wat; /* [m] default watermark */
95 u_long sb_mbcnt; /* [m] chars of mbufs used */
96 u_long sb_mbmax; /* [m] max chars of mbufs to use */
97 long sb_lowat; /* [m] low water mark */
98 struct mbuf *sb_mb; /* [m] the mbuf chain */
99 struct mbuf *sb_mbtail; /* [m] the last mbuf in the chain */
100 struct mbuf *sb_lastrecord; /* [m] first mbuf of last record in
101 socket buffer */
102 short sb_flags; /* [m] flags, see below */
103 /* End area that is zeroed on flush. */
104 #define sb_endzero sb_flags
105 short sb_state; /* [m] socket state on sockbuf */
106 uint64_t sb_timeo_nsecs; /* [m] timeout for read/write */
107 struct klist sb_klist; /* [m] list of knotes */
108 };
109
110 #define SB_MAX (2*1024*1024) /* default for max chars in sockbuf */
111 #define SB_WAIT 0x0001 /* someone is waiting for data/space */
112 #define SB_ASYNC 0x0002 /* ASYNC I/O, need signals */
113 #define SB_SPLICE 0x0004 /* buffer is splice source or drain */
114 #define SB_NOINTR 0x0008 /* operations not interruptible */
115 #define SB_MTXLOCK 0x0010 /* sblock() doesn't need solock() */
116
117 /*
118 * Kernel structure per socket.
119 * Contains send and receive buffer queues,
120 * handle on protocol and pointer to protocol
121 * private data and error information.
122 */
123 struct socket {
124 const struct protosw *so_proto; /* [I] protocol handle */
125 struct rwlock so_lock; /* this socket lock */
126 struct refcnt so_refcnt; /* references to this socket */
127 void *so_pcb; /* [s] protocol control block */
128 u_int so_state; /* [s] internal state flags SS_*,
129 see below */
130 short so_type; /* [I] generic type, see socket.h */
131 short so_options; /* [s] from socket call, see
132 socket.h */
133 short so_linger; /* [s] time to linger while closing */
134 /*
135 * Variables for connection queueing.
136 * Socket where accepts occur is so_head in all subsidiary sockets.
137 * If so_head is 0, socket is not related to an accept.
138 * For head socket so_q0 queues partially completed connections,
139 * while so_q is a queue of connections ready to be accepted.
140 * If a connection is aborted and it has so_head set, then
141 * it has to be pulled out of either so_q0 or so_q.
142 * We allow connections to queue up based on current queue lengths
143 * and limit on number of queued connections for this socket.
144 *
145 * Connections queue relies on both socket locks of listening and
146 * unaccepted sockets. Socket lock of listening socket should be
147 * always taken first.
148 */
149 struct socket *so_head; /* [s] back pointer to accept socket */
150 struct soqhead *so_onq; /* [s] queue (q or q0) that we're on */
151 struct soqhead so_q0; /* [s] queue of partial connections */
152 struct soqhead so_q; /* [s] queue of incoming connections */
153 struct sigio_ref so_sigio; /* async I/O registration */
154 TAILQ_ENTRY(socket) so_qe; /* [s] our queue entry (q or q0) */
155 short so_q0len; /* [s] partials on so_q0 */
156 short so_qlen; /* [s] number of connections on so_q */
157 short so_qlimit; /* [s] max number queued connections */
158 short so_timeo; /* [s] connection timeout */
159 u_long so_oobmark; /* [mr] chars to oob mark */
160 u_int so_error; /* [a] error affecting connection */
161
162 struct sosplice *so_sp; /* [s br] */
163
164 struct sockbuf so_rcv;
165 struct sockbuf so_snd;
166
167 void (*so_upcall)(struct socket *, caddr_t, int); /* [s] */
168 caddr_t so_upcallarg; /* [s] Arg for above */
169 uid_t so_euid; /* [I] who opened the socket */
170 uid_t so_ruid; /* [I] */
171 gid_t so_egid; /* [I] */
172 gid_t so_rgid; /* [I] */
173 pid_t so_cpid; /* [I] pid of process that opened
174 socket */
175 };
176
177 /*
178 * Socket state bits.
179 *
180 * NOTE: The following states should be used with corresponding socket's
181 * buffer `sb_state' only:
182 *
183 * SS_CANTSENDMORE with `so_snd'
184 * SS_ISSENDING with `so_snd'
185 * SS_CANTRCVMORE with `so_rcv'
186 * SS_RCVATMARK with `so_rcv'
187 */
188
189 #define SS_NOFDREF 0x001 /* no file table ref any more */
190 #define SS_ISCONNECTED 0x002 /* socket connected to a peer */
191 #define SS_ISCONNECTING 0x004 /* in process of connecting to peer */
192 #define SS_ISDISCONNECTING 0x008 /* in process of disconnecting */
193 #define SS_CANTSENDMORE 0x010 /* can't send more data to peer */
194 #define SS_CANTRCVMORE 0x020 /* can't receive more data from peer */
195 #define SS_RCVATMARK 0x040 /* at mark on input */
196 #define SS_ISDISCONNECTED 0x800 /* socket disconnected from peer */
197
198 #define SS_PRIV 0x080 /* privileged for broadcast, raw... */
199 #define SS_CONNECTOUT 0x1000 /* connect, not accept, at this end */
200 #define SS_ISSENDING 0x2000 /* hint for lower layer */
201 #define SS_DNS 0x4000 /* created using SOCK_DNS socket(2) */
202 #define SS_YP 0x8000 /* created using ypconnect(2) */
203
204 #ifdef _KERNEL
205
206 #include <sys/protosw.h>
207 #include <lib/libkern/libkern.h>
208
209 void soassertlocked(struct socket *);
210 void soassertlocked_readonly(struct socket *);
211
212 static inline void
soref(struct socket * so)213 soref(struct socket *so)
214 {
215 refcnt_take(&so->so_refcnt);
216 }
217
218 static inline void
sorele(struct socket * so)219 sorele(struct socket *so)
220 {
221 refcnt_rele_wake(&so->so_refcnt);
222 }
223
224 /*
225 * Macros for sockets and socket buffering.
226 */
227
228 #define isspliced(so) ((so)->so_sp && (so)->so_sp->ssp_socket)
229 #define issplicedback(so) ((so)->so_sp && (so)->so_sp->ssp_soback)
230
231 static inline void
sb_mtx_lock(struct sockbuf * sb)232 sb_mtx_lock(struct sockbuf *sb)
233 {
234 if (sb->sb_flags & SB_MTXLOCK)
235 mtx_enter(&sb->sb_mtx);
236 }
237
238 static inline void
sb_mtx_unlock(struct sockbuf * sb)239 sb_mtx_unlock(struct sockbuf *sb)
240 {
241 if (sb->sb_flags & SB_MTXLOCK)
242 mtx_leave(&sb->sb_mtx);
243 }
244
245 void sbmtxassertlocked(struct socket *so, struct sockbuf *);
246
247 /*
248 * Do we need to notify the other side when I/O is possible?
249 */
250 static inline int
sb_notify(struct socket * so,struct sockbuf * sb)251 sb_notify(struct socket *so, struct sockbuf *sb)
252 {
253 int rv;
254
255 soassertlocked(so);
256
257 mtx_enter(&sb->sb_mtx);
258 rv = ((sb->sb_flags & (SB_WAIT|SB_ASYNC|SB_SPLICE)) != 0 ||
259 !klist_empty(&sb->sb_klist));
260 mtx_leave(&sb->sb_mtx);
261
262 return rv;
263 }
264
265 /*
266 * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
267 * This is problematical if the fields are unsigned, as the space might
268 * still be negative (cc > hiwat or mbcnt > mbmax). Should detect
269 * overflow and return 0.
270 */
271
272 static inline long
sbspace_locked(struct socket * so,struct sockbuf * sb)273 sbspace_locked(struct socket *so, struct sockbuf *sb)
274 {
275 if (sb->sb_flags & SB_MTXLOCK)
276 sbmtxassertlocked(so, sb);
277 else
278 soassertlocked_readonly(so);
279
280 return lmin(sb->sb_hiwat - sb->sb_cc, sb->sb_mbmax - sb->sb_mbcnt);
281 }
282
283 static inline long
sbspace(struct socket * so,struct sockbuf * sb)284 sbspace(struct socket *so, struct sockbuf *sb)
285 {
286 long ret;
287
288 sb_mtx_lock(sb);
289 ret = sbspace_locked(so, sb);
290 sb_mtx_unlock(sb);
291
292 return ret;
293 }
294
295 /* do we have to send all at once on a socket? */
296 #define sosendallatonce(so) \
297 ((so)->so_proto->pr_flags & PR_ATOMIC)
298
299 /* are we sending on this socket? */
300 #define soissending(so) \
301 ((so)->so_snd.sb_state & SS_ISSENDING)
302
303 /* can we read something from so? */
304 static inline int
soreadable(struct socket * so)305 soreadable(struct socket *so)
306 {
307 soassertlocked_readonly(so);
308 if (isspliced(so))
309 return 0;
310 return (so->so_rcv.sb_state & SS_CANTRCVMORE) || so->so_qlen ||
311 so->so_error || so->so_rcv.sb_cc >= so->so_rcv.sb_lowat;
312 }
313
314 /* can we write something to so? */
315 static inline int
sowriteable(struct socket * so)316 sowriteable(struct socket *so)
317 {
318 soassertlocked_readonly(so);
319 return ((sbspace(so, &so->so_snd) >= so->so_snd.sb_lowat &&
320 ((so->so_state & SS_ISCONNECTED) ||
321 (so->so_proto->pr_flags & PR_CONNREQUIRED)==0)) ||
322 (so->so_snd.sb_state & SS_CANTSENDMORE) || so->so_error);
323 }
324
325 /* adjust counters in sb reflecting allocation of m */
326 static inline void
sballoc(struct socket * so,struct sockbuf * sb,struct mbuf * m)327 sballoc(struct socket *so, struct sockbuf *sb, struct mbuf *m)
328 {
329 sb->sb_cc += m->m_len;
330 if (m->m_type != MT_CONTROL && m->m_type != MT_SONAME)
331 sb->sb_datacc += m->m_len;
332 sb->sb_mbcnt += MSIZE;
333 if (m->m_flags & M_EXT)
334 sb->sb_mbcnt += m->m_ext.ext_size;
335 }
336
337 /* adjust counters in sb reflecting freeing of m */
338 static inline void
sbfree(struct socket * so,struct sockbuf * sb,struct mbuf * m)339 sbfree(struct socket *so, struct sockbuf *sb, struct mbuf *m)
340 {
341 sb->sb_cc -= m->m_len;
342 if (m->m_type != MT_CONTROL && m->m_type != MT_SONAME)
343 sb->sb_datacc -= m->m_len;
344 sb->sb_mbcnt -= MSIZE;
345 if (m->m_flags & M_EXT)
346 sb->sb_mbcnt -= m->m_ext.ext_size;
347 }
348
349 /*
350 * Flags to sblock()
351 */
352 #define SBL_WAIT 0x01 /* Wait if lock not immediately available. */
353 #define SBL_NOINTR 0x02 /* Enforce non-interruptible sleep. */
354
355 /*
356 * Set lock on sockbuf sb; sleep if lock is already held.
357 * Unless SB_NOINTR is set on sockbuf or SBL_NOINTR passed,
358 * sleep is interruptible. Returns error without lock if
359 * sleep is interrupted.
360 */
361 int sblock(struct sockbuf *, int);
362
363 /* release lock on sockbuf sb */
364 void sbunlock(struct sockbuf *);
365
366 static inline void
sbassertlocked(struct sockbuf * sb)367 sbassertlocked(struct sockbuf *sb)
368 {
369 rw_assert_wrlock(&sb->sb_lock);
370 }
371
372 #define SB_EMPTY_FIXUP(sb) do { \
373 if ((sb)->sb_mb == NULL) { \
374 (sb)->sb_mbtail = NULL; \
375 (sb)->sb_lastrecord = NULL; \
376 } \
377 } while (/*CONSTCOND*/0)
378
379 extern u_long sb_max;
380
381 extern struct pool socket_pool;
382
383 struct mbuf;
384 struct sockaddr;
385 struct proc;
386 struct msghdr;
387 struct stat;
388 struct knote;
389
390 /*
391 * File operations on sockets.
392 */
393 int soo_read(struct file *, struct uio *, int);
394 int soo_write(struct file *, struct uio *, int);
395 int soo_ioctl(struct file *, u_long, caddr_t, struct proc *);
396 int soo_kqfilter(struct file *, struct knote *);
397 int soo_close(struct file *, struct proc *);
398 int soo_stat(struct file *, struct stat *, struct proc *);
399 void sbappend(struct socket *, struct sockbuf *, struct mbuf *);
400 void sbappendstream(struct socket *, struct sockbuf *, struct mbuf *);
401 int sbappendaddr(struct socket *, struct sockbuf *,
402 const struct sockaddr *, struct mbuf *, struct mbuf *);
403 int sbappendcontrol(struct socket *, struct sockbuf *, struct mbuf *,
404 struct mbuf *);
405 void sbappendrecord(struct socket *, struct sockbuf *, struct mbuf *);
406 void sbcompress(struct socket *, struct sockbuf *, struct mbuf *,
407 struct mbuf *);
408 struct mbuf *
409 sbcreatecontrol(const void *, size_t, int, int);
410 void sbdrop(struct socket *, struct sockbuf *, int);
411 void sbdroprecord(struct socket *, struct sockbuf *);
412 void sbflush(struct socket *, struct sockbuf *);
413 void sbrelease(struct socket *, struct sockbuf *);
414 int sbcheckreserve(u_long, u_long);
415 int sbchecklowmem(void);
416 int sbreserve(struct socket *, struct sockbuf *, u_long);
417 int sbwait(struct socket *, struct sockbuf *);
418 void soinit(void);
419 void soabort(struct socket *);
420 int soaccept(struct socket *, struct mbuf *);
421 int sobind(struct socket *, struct mbuf *, struct proc *);
422 void socantrcvmore(struct socket *);
423 void socantsendmore(struct socket *);
424 int soclose(struct socket *, int);
425 int soconnect(struct socket *, struct mbuf *);
426 int soconnect2(struct socket *, struct socket *);
427 int socreate(int, struct socket **, int, int);
428 int sodisconnect(struct socket *);
429 struct socket *soalloc(const struct protosw *, int);
430 void sofree(struct socket *, int);
431 int sogetopt(struct socket *, int, int, struct mbuf *);
432 void sohasoutofband(struct socket *);
433 void soisconnected(struct socket *);
434 void soisconnecting(struct socket *);
435 void soisdisconnected(struct socket *);
436 void soisdisconnecting(struct socket *);
437 int solisten(struct socket *, int);
438 struct socket *sonewconn(struct socket *, int, int);
439 void soqinsque(struct socket *, struct socket *, int);
440 int soqremque(struct socket *, int);
441 int soreceive(struct socket *, struct mbuf **, struct uio *,
442 struct mbuf **, struct mbuf **, int *, socklen_t);
443 int soreserve(struct socket *, u_long, u_long);
444 int sosend(struct socket *, struct mbuf *, struct uio *,
445 struct mbuf *, struct mbuf *, int);
446 int sosetopt(struct socket *, int, int, struct mbuf *);
447 int soshutdown(struct socket *, int);
448 void sowakeup(struct socket *, struct sockbuf *);
449 void sorwakeup(struct socket *);
450 void sowwakeup(struct socket *);
451 int sockargs(struct mbuf **, const void *, size_t, int);
452
453 int sosleep_nsec(struct socket *, void *, int, const char *, uint64_t);
454 void solock(struct socket *);
455 void solock_shared(struct socket *);
456 int solock_persocket(struct socket *);
457 void solock_pair(struct socket *, struct socket *);
458 void sounlock(struct socket *);
459 void sounlock_shared(struct socket *);
460
461 int sendit(struct proc *, int, struct msghdr *, int, register_t *);
462 int recvit(struct proc *, int, struct msghdr *, caddr_t, register_t *);
463 int doaccept(struct proc *, int, struct sockaddr *, socklen_t *, int,
464 register_t *);
465
466 #ifdef SOCKBUF_DEBUG
467 void sblastrecordchk(struct sockbuf *, const char *);
468 #define SBLASTRECORDCHK(sb, where) sblastrecordchk((sb), (where))
469
470 void sblastmbufchk(struct sockbuf *, const char *);
471 #define SBLASTMBUFCHK(sb, where) sblastmbufchk((sb), (where))
472 void sbcheck(struct socket *, struct sockbuf *);
473 #define SBCHECK(so, sb) sbcheck((so), (sb))
474 #else
475 #define SBLASTRECORDCHK(sb, where) /* nothing */
476 #define SBLASTMBUFCHK(sb, where) /* nothing */
477 #define SBCHECK(so, sb) /* nothing */
478 #endif /* SOCKBUF_DEBUG */
479
480 #endif /* _KERNEL */
481
482 #endif /* _SYS_SOCKETVAR_H_ */
483