1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1982, 1986, 1990, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31 #ifndef _SYS_SOCKBUF_H_
32 #define _SYS_SOCKBUF_H_
33
34 /*
35 * Constants for sb_flags field of struct sockbuf/xsockbuf.
36 */
37 #define SB_TLS_RX 0x01 /* using KTLS on RX */
38 #define SB_TLS_RX_RUNNING 0x02 /* KTLS RX operation running */
39 #define SB_WAIT 0x04 /* someone is waiting for data/space */
40 #define SB_SEL 0x08 /* someone is selecting */
41 #define SB_ASYNC 0x10 /* ASYNC I/O, need signals */
42 #define SB_UPCALL 0x20 /* someone wants an upcall */
43 #define SB_NOINTR 0x40 /* operations not interruptible */
44 #define SB_AIO 0x80 /* AIO operations queued */
45 #define SB_KNOTE 0x100 /* kernel note attached */
46 #define SB_NOCOALESCE 0x200 /* don't coalesce new data into existing mbufs */
47 #define SB_IN_TOE 0x400 /* socket buffer is in the middle of an operation */
48 #define SB_AUTOSIZE 0x800 /* automatically size socket buffer */
49 #define SB_STOP 0x1000 /* backpressure indicator */
50 #define SB_AIO_RUNNING 0x2000 /* AIO operation running */
51 #define SB_UNUSED 0x4000 /* previously used for SB_TLS_IFNET */
52 #define SB_TLS_RX_RESYNC 0x8000 /* KTLS RX lost HW sync */
53
54 #define SBS_CANTSENDMORE 0x0010 /* can't send more data to peer */
55 #define SBS_CANTRCVMORE 0x0020 /* can't receive more data from peer */
56 #define SBS_RCVATMARK 0x0040 /* at mark on input */
57
58 #if defined(_KERNEL) || defined(_WANT_SOCKET)
59 #include <sys/_lock.h>
60 #include <sys/_mutex.h>
61 #include <sys/_sx.h>
62 #include <sys/_task.h>
63
64 #define SB_MAX (2*1024*1024) /* default for max chars in sockbuf */
65
66 struct ktls_session;
67 struct mbuf;
68 struct sockaddr;
69 struct socket;
70 struct sockopt;
71 struct thread;
72 struct selinfo;
73
74 /*
75 * Socket buffer
76 *
77 * A buffer starts with the fields that are accessed by I/O multiplexing
78 * APIs like select(2), kevent(2) or AIO and thus are shared between different
79 * buffer implementations. They are protected by the SOCK_RECVBUF_LOCK()
80 * or SOCK_SENDBUF_LOCK() of the owning socket.
81 *
82 * XXX: sb_acc, sb_ccc and sb_mbcnt shall become implementation specific
83 * methods.
84 *
85 * Protocol specific implementations follow in a union.
86 */
87 struct sockbuf {
88 struct selinfo *sb_sel; /* process selecting read/write */
89 short sb_state; /* socket state on sockbuf */
90 short sb_flags; /* flags, see above */
91 u_int sb_acc; /* available chars in buffer */
92 u_int sb_ccc; /* claimed chars in buffer */
93 u_int sb_mbcnt; /* chars of mbufs used */
94 u_int sb_ctl; /* non-data chars in buffer */
95 u_int sb_hiwat; /* max actual char count */
96 u_int sb_lowat; /* low water mark */
97 u_int sb_mbmax; /* max chars of mbufs to use */
98 sbintime_t sb_timeo; /* timeout for read/write */
99 int (*sb_upcall)(struct socket *, void *, int);
100 void *sb_upcallarg;
101 TAILQ_HEAD(, kaiocb) sb_aiojobq; /* pending AIO ops */
102 struct task sb_aiotask; /* AIO task */
103 union {
104 /*
105 * Classic BSD one-size-fits-all socket buffer, capable of
106 * doing streams and datagrams. The stream part is able
107 * to perform special features:
108 * - not ready data (sendfile)
109 * - TLS
110 */
111 struct {
112 /* compat: sockbuf lock pointer */
113 struct mtx *sb_mtx;
114 /* first and last mbufs in the chain */
115 struct mbuf *sb_mb;
116 struct mbuf *sb_mbtail;
117 /* first mbuf of last record in socket buffer */
118 struct mbuf *sb_lastrecord;
119 /* pointer to data to send next (TCP */
120 struct mbuf *sb_sndptr;
121 /* pointer to first not ready buffer */
122 struct mbuf *sb_fnrdy;
123 /* byte offset of ptr into chain, used with sb_sndptr */
124 u_int sb_sndptroff;
125 /* TLS */
126 u_int sb_tlscc; /* TLS chain characters */
127 u_int sb_tlsdcc; /* characters being decrypted */
128 struct mbuf *sb_mtls; /* TLS mbuf chain */
129 struct mbuf *sb_mtlstail; /* last mbuf in TLS chain */
130 uint64_t sb_tls_seqno; /* TLS seqno */
131 struct ktls_session *sb_tls_info; /* TLS state */
132 };
133 /*
134 * PF_UNIX/SOCK_DGRAM
135 *
136 * Local protocol, thus we should buffer on the receive side
137 * only. However, in one to many configuration we don't want
138 * a single receive buffer to be shared. So we would link
139 * send buffers onto receive buffer. All the fields are locked
140 * by the receive buffer lock.
141 */
142 struct {
143 /*
144 * For receive buffer: own queue of this buffer for
145 * unconnected sends. For send buffer: queue lended
146 * to the peer receive buffer, to isolate ourselves
147 * from other senders.
148 */
149 STAILQ_HEAD(, mbuf) uxdg_mb;
150 /* For receive buffer: datagram seen via MSG_PEEK. */
151 struct mbuf *uxdg_peeked;
152 /*
153 * For receive buffer: queue of send buffers of
154 * connected peers. For send buffer: linkage on
155 * connected peer receive buffer queue.
156 */
157 union {
158 TAILQ_HEAD(, sockbuf) uxdg_conns;
159 TAILQ_ENTRY(sockbuf) uxdg_clist;
160 };
161 /* Counters for this buffer uxdg_mb chain + peeked. */
162 u_int uxdg_cc;
163 u_int uxdg_ctl;
164 u_int uxdg_mbcnt;
165 };
166 /*
167 * Netlink socket.
168 */
169 struct {
170 TAILQ_HEAD(, nl_buf) nl_queue;
171 };
172 };
173 };
174
175 #endif /* defined(_KERNEL) || defined(_WANT_SOCKET) */
176 #ifdef _KERNEL
177
178 /* 'which' values for KPIs that operate on one buffer of a socket. */
179 typedef enum { SO_RCV, SO_SND } sb_which;
180
181 /*
182 * Per-socket buffer mutex used to protect most fields in the socket buffer.
183 * These make use of the mutex pointer embedded in struct sockbuf, which
184 * currently just references mutexes in the containing socket. The
185 * SOCK_SENDBUF_LOCK() etc. macros can be used instead of or in combination with
186 * these locking macros.
187 */
188 #define SOCKBUF_MTX(_sb) ((_sb)->sb_mtx)
189 #define SOCKBUF_LOCK(_sb) mtx_lock(SOCKBUF_MTX(_sb))
190 #define SOCKBUF_OWNED(_sb) mtx_owned(SOCKBUF_MTX(_sb))
191 #define SOCKBUF_UNLOCK(_sb) mtx_unlock(SOCKBUF_MTX(_sb))
192 #define SOCKBUF_LOCK_ASSERT(_sb) mtx_assert(SOCKBUF_MTX(_sb), MA_OWNED)
193 #define SOCKBUF_UNLOCK_ASSERT(_sb) mtx_assert(SOCKBUF_MTX(_sb), MA_NOTOWNED)
194
195 /*
196 * Socket buffer private mbuf(9) flags.
197 */
198 #define M_NOTREADY M_PROTO1 /* m_data not populated yet */
199 #define M_BLOCKED M_PROTO2 /* M_NOTREADY in front of m */
200 #define M_NOTAVAIL (M_NOTREADY | M_BLOCKED)
201
202 void sbappend(struct sockbuf *sb, struct mbuf *m, int flags);
203 void sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags);
204 void sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags);
205 void sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags);
206 int sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
207 struct mbuf *m0, struct mbuf *control);
208 int sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
209 struct mbuf *m0, struct mbuf *control);
210 int sbappendaddr_nospacecheck_locked(struct sockbuf *sb,
211 const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control);
212 void sbappendcontrol(struct sockbuf *sb, struct mbuf *m0,
213 struct mbuf *control, int flags);
214 void sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
215 struct mbuf *control, int flags);
216 void sbappendrecord(struct sockbuf *sb, struct mbuf *m0);
217 void sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0);
218 void sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n);
219 struct mbuf *
220 sbcreatecontrol(const void *p, u_int size, int type, int level,
221 int wait);
222 void sbdestroy(struct socket *, sb_which);
223 void sbdrop(struct sockbuf *sb, int len);
224 void sbdrop_locked(struct sockbuf *sb, int len);
225 struct mbuf *
226 sbcut_locked(struct sockbuf *sb, int len);
227 void sbdroprecord(struct sockbuf *sb);
228 void sbdroprecord_locked(struct sockbuf *sb);
229 void sbflush(struct sockbuf *sb);
230 void sbflush_locked(struct sockbuf *sb);
231 void sbrelease(struct socket *, sb_which);
232 void sbrelease_locked(struct socket *, sb_which);
233 int sbsetopt(struct socket *so, struct sockopt *);
234 bool sbreserve_locked(struct socket *so, sb_which which, u_long cc,
235 struct thread *td);
236 bool sbreserve_locked_limit(struct socket *so, sb_which which, u_long cc,
237 u_long buf_max, struct thread *td);
238 void sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, u_int len);
239 struct mbuf *
240 sbsndptr_noadv(struct sockbuf *sb, u_int off, u_int *moff);
241 struct mbuf *
242 sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff);
243 int sbwait(struct socket *, sb_which);
244 void sballoc(struct sockbuf *, struct mbuf *);
245 void sbfree(struct sockbuf *, struct mbuf *);
246 void sballoc_ktls_rx(struct sockbuf *sb, struct mbuf *m);
247 void sbfree_ktls_rx(struct sockbuf *sb, struct mbuf *m);
248 int sbready(struct sockbuf *, struct mbuf *, int);
249
250 /*
251 * Return how much data is available to be taken out of socket
252 * buffer right now.
253 */
254 static inline u_int
sbavail(struct sockbuf * sb)255 sbavail(struct sockbuf *sb)
256 {
257
258 #if 0
259 SOCKBUF_LOCK_ASSERT(sb);
260 #endif
261 return (sb->sb_acc);
262 }
263
264 /*
265 * Return how much data sits there in the socket buffer
266 * It might be that some data is not yet ready to be read.
267 */
268 static inline u_int
sbused(struct sockbuf * sb)269 sbused(struct sockbuf *sb)
270 {
271
272 #if 0
273 SOCKBUF_LOCK_ASSERT(sb);
274 #endif
275 return (sb->sb_ccc);
276 }
277
278 /*
279 * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
280 * This is problematical if the fields are unsigned, as the space might
281 * still be negative (ccc > hiwat or mbcnt > mbmax).
282 */
283 static inline long
sbspace(struct sockbuf * sb)284 sbspace(struct sockbuf *sb)
285 {
286 int bleft, mleft; /* size should match sockbuf fields */
287
288 #if 0
289 SOCKBUF_LOCK_ASSERT(sb);
290 #endif
291
292 if (sb->sb_flags & SB_STOP)
293 return(0);
294
295 bleft = sb->sb_hiwat - sb->sb_ccc;
296 mleft = sb->sb_mbmax - sb->sb_mbcnt;
297
298 return ((bleft < mleft) ? bleft : mleft);
299 }
300
301 #define SB_EMPTY_FIXUP(sb) do { \
302 if ((sb)->sb_mb == NULL) { \
303 (sb)->sb_mbtail = NULL; \
304 (sb)->sb_lastrecord = NULL; \
305 } \
306 } while (/*CONSTCOND*/0)
307
308 #ifdef SOCKBUF_DEBUG
309 void sblastrecordchk(struct sockbuf *, const char *, int);
310 void sblastmbufchk(struct sockbuf *, const char *, int);
311 void sbcheck(struct sockbuf *, const char *, int);
312 #define SBLASTRECORDCHK(sb) sblastrecordchk((sb), __FILE__, __LINE__)
313 #define SBLASTMBUFCHK(sb) sblastmbufchk((sb), __FILE__, __LINE__)
314 #define SBCHECK(sb) sbcheck((sb), __FILE__, __LINE__)
315 #else
316 #define SBLASTRECORDCHK(sb) do {} while (0)
317 #define SBLASTMBUFCHK(sb) do {} while (0)
318 #define SBCHECK(sb) do {} while (0)
319 #endif /* SOCKBUF_DEBUG */
320
321 #endif /* _KERNEL */
322
323 #endif /* _SYS_SOCKBUF_H_ */
324