1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1982, 1986, 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)socketvar.h 8.3 (Berkeley) 2/19/95 32 * 33 * $FreeBSD$ 34 */ 35 36 #ifndef _SYS_SOCKETVAR_H_ 37 #define _SYS_SOCKETVAR_H_ 38 39 /* 40 * Socket generation count type. Also used in xinpcb, xtcpcb, xunpcb. 41 */ 42 typedef uint64_t so_gen_t; 43 44 #if defined(_KERNEL) || defined(_WANT_SOCKET) 45 #include <sys/queue.h> /* for TAILQ macros */ 46 #include <sys/selinfo.h> /* for struct selinfo */ 47 #include <sys/_lock.h> 48 #include <sys/_mutex.h> 49 #include <sys/osd.h> 50 #include <sys/_sx.h> 51 #include <sys/sockbuf.h> 52 #ifdef _KERNEL 53 #include <sys/caprights.h> 54 #include <sys/sockopt.h> 55 #endif 56 57 struct vnet; 58 59 /* 60 * Kernel structure per socket. 61 * Contains send and receive buffer queues, 62 * handle on protocol and pointer to protocol 63 * private data and error information. 64 */ 65 typedef int so_upcall_t(struct socket *, void *, int); 66 typedef void so_dtor_t(struct socket *); 67 68 struct socket; 69 70 enum socket_qstate { 71 SQ_NONE = 0, 72 SQ_INCOMP = 0x0800, /* on sol_incomp */ 73 SQ_COMP = 0x1000, /* on sol_comp */ 74 }; 75 76 /*- 77 * Locking key to struct socket: 78 * (a) constant after allocation, no locking required. 79 * (b) locked by SOCK_LOCK(so). 80 * (cr) locked by SOCKBUF_LOCK(&so->so_rcv). 81 * (cs) locked by SOCKBUF_LOCK(&so->so_snd). 82 * (e) locked by SOLISTEN_LOCK() of corresponding listening socket. 83 * (f) not locked since integer reads/writes are atomic. 84 * (g) used only as a sleep/wakeup address, no value. 85 * (h) locked by global mutex so_global_mtx. 86 */ 87 TAILQ_HEAD(accept_queue, socket); 88 struct socket { 89 struct mtx so_lock; 90 volatile u_int so_count; /* (b / refcount) */ 91 struct selinfo so_rdsel; /* (b/cr) for so_rcv/so_comp */ 92 struct selinfo so_wrsel; /* (b/cs) for so_snd */ 93 short so_type; /* (a) generic type, see socket.h */ 94 int so_options; /* (b) from socket call, see socket.h */ 95 short so_linger; /* time to linger close(2) */ 96 short so_state; /* (b) internal state flags SS_* */ 97 void *so_pcb; /* protocol control block */ 98 struct vnet *so_vnet; /* (a) network stack instance */ 99 struct protosw *so_proto; /* (a) protocol handle */ 100 short so_timeo; /* (g) connection timeout */ 101 u_short so_error; /* (f) error affecting connection */ 102 struct sigio *so_sigio; /* [sg] information for async I/O or 103 out of band data (SIGURG) */ 104 struct ucred *so_cred; /* (a) user credentials */ 105 struct label *so_label; /* (b) MAC label for socket */ 106 /* NB: generation count must not be first. */ 107 so_gen_t so_gencnt; /* (h) generation count */ 108 void *so_emuldata; /* (b) private data for emulators */ 109 so_dtor_t *so_dtor; /* (b) optional destructor */ 110 struct osd osd; /* Object Specific extensions */ 111 /* 112 * so_fibnum, so_user_cookie and friends can be used to attach 113 * some user-specified metadata to a socket, which then can be 114 * used by the kernel for various actions. 115 * so_user_cookie is used by ipfw/dummynet. 116 */ 117 int so_fibnum; /* routing domain for this socket */ 118 uint32_t so_user_cookie; 119 120 int so_ts_clock; /* type of the clock used for timestamps */ 121 uint32_t so_max_pacing_rate; /* (f) TX rate limit in bytes/s */ 122 union { 123 /* Regular (data flow) socket. */ 124 struct { 125 /* (cr, cs) Receive and send buffers. */ 126 struct sockbuf so_rcv, so_snd; 127 128 /* (e) Our place on accept queue. */ 129 TAILQ_ENTRY(socket) so_list; 130 struct socket *so_listen; /* (b) */ 131 enum socket_qstate so_qstate; /* (b) */ 132 /* (b) cached MAC label for peer */ 133 struct label *so_peerlabel; 134 u_long so_oobmark; /* chars to oob mark */ 135 }; 136 /* 137 * Listening socket, where accepts occur, is so_listen in all 138 * subsidiary sockets. If so_listen is NULL, socket is not 139 * related to an accept. For a listening socket itself 140 * sol_incomp queues partially completed connections, while 141 * sol_comp is a queue of connections ready to be accepted. 142 * If a connection is aborted and it has so_listen set, then 143 * it has to be pulled out of either sol_incomp or sol_comp. 144 * We allow connections to queue up based on current queue 145 * lengths and limit on number of queued connections for this 146 * socket. 147 */ 148 struct { 149 /* (e) queue of partial unaccepted connections */ 150 struct accept_queue sol_incomp; 151 /* (e) queue of complete unaccepted connections */ 152 struct accept_queue sol_comp; 153 u_int sol_qlen; /* (e) sol_comp length */ 154 u_int sol_incqlen; /* (e) sol_incomp length */ 155 u_int sol_qlimit; /* (e) queue limit */ 156 157 /* accept_filter(9) optional data */ 158 struct accept_filter *sol_accept_filter; 159 void *sol_accept_filter_arg; /* saved filter args */ 160 char *sol_accept_filter_str; /* saved user args */ 161 162 /* Optional upcall, for kernel socket. */ 163 so_upcall_t *sol_upcall; /* (e) */ 164 void *sol_upcallarg; /* (e) */ 165 166 /* Socket buffer parameters, to be copied to 167 * dataflow sockets, accepted from this one. */ 168 int sol_sbrcv_lowat; 169 int sol_sbsnd_lowat; 170 u_int sol_sbrcv_hiwat; 171 u_int sol_sbsnd_hiwat; 172 short sol_sbrcv_flags; 173 short sol_sbsnd_flags; 174 sbintime_t sol_sbrcv_timeo; 175 sbintime_t sol_sbsnd_timeo; 176 177 /* Information tracking listen queue overflows. */ 178 struct timeval sol_lastover; /* (e) */ 179 int sol_overcount; /* (e) */ 180 }; 181 }; 182 }; 183 #endif /* defined(_KERNEL) || defined(_WANT_SOCKET) */ 184 185 /* 186 * Socket state bits. 187 * 188 * Historically, these bits were all kept in the so_state field. 189 * They are now split into separate, lock-specific fields. 190 * so_state maintains basic socket state protected by the socket lock. 191 * so_qstate holds information about the socket accept queues. 192 * Each socket buffer also has a state field holding information 193 * relevant to that socket buffer (can't send, rcv). 194 * Many fields will be read without locks to improve performance and avoid 195 * lock order issues. However, this approach must be used with caution. 196 */ 197 #define SS_NOFDREF 0x0001 /* no file table ref any more */ 198 #define SS_ISCONNECTED 0x0002 /* socket connected to a peer */ 199 #define SS_ISCONNECTING 0x0004 /* in process of connecting to peer */ 200 #define SS_ISDISCONNECTING 0x0008 /* in process of disconnecting */ 201 #define SS_NBIO 0x0100 /* non-blocking ops */ 202 #define SS_ASYNC 0x0200 /* async i/o notify */ 203 #define SS_ISCONFIRMING 0x0400 /* deciding to accept connection req */ 204 #define SS_ISDISCONNECTED 0x2000 /* socket disconnected from peer */ 205 206 /* 207 * Protocols can mark a socket as SS_PROTOREF to indicate that, following 208 * pru_detach, they still want the socket to persist, and will free it 209 * themselves when they are done. Protocols should only ever call sofree() 210 * following setting this flag in pru_detach(), and never otherwise, as 211 * sofree() bypasses socket reference counting. 212 */ 213 #define SS_PROTOREF 0x4000 /* strong protocol reference */ 214 215 #ifdef _KERNEL 216 217 #define SOCK_MTX(so) &(so)->so_lock 218 #define SOCK_LOCK(so) mtx_lock(&(so)->so_lock) 219 #define SOCK_OWNED(so) mtx_owned(&(so)->so_lock) 220 #define SOCK_UNLOCK(so) mtx_unlock(&(so)->so_lock) 221 #define SOCK_LOCK_ASSERT(so) mtx_assert(&(so)->so_lock, MA_OWNED) 222 #define SOCK_UNLOCK_ASSERT(so) mtx_assert(&(so)->so_lock, MA_NOTOWNED) 223 224 #define SOLISTENING(sol) (((sol)->so_options & SO_ACCEPTCONN) != 0) 225 #define SOLISTEN_LOCK(sol) do { \ 226 mtx_lock(&(sol)->so_lock); \ 227 KASSERT(SOLISTENING(sol), \ 228 ("%s: %p not listening", __func__, (sol))); \ 229 } while (0) 230 #define SOLISTEN_TRYLOCK(sol) mtx_trylock(&(sol)->so_lock) 231 #define SOLISTEN_UNLOCK(sol) do { \ 232 KASSERT(SOLISTENING(sol), \ 233 ("%s: %p not listening", __func__, (sol))); \ 234 mtx_unlock(&(sol)->so_lock); \ 235 } while (0) 236 #define SOLISTEN_LOCK_ASSERT(sol) do { \ 237 mtx_assert(&(sol)->so_lock, MA_OWNED); \ 238 KASSERT(SOLISTENING(sol), \ 239 ("%s: %p not listening", __func__, (sol))); \ 240 } while (0) 241 242 /* 243 * Macros for sockets and socket buffering. 244 */ 245 246 /* 247 * Flags to sblock(). 248 */ 249 #define SBL_WAIT 0x00000001 /* Wait if not immediately available. */ 250 #define SBL_NOINTR 0x00000002 /* Force non-interruptible sleep. */ 251 #define SBL_VALID (SBL_WAIT | SBL_NOINTR) 252 253 /* 254 * Do we need to notify the other side when I/O is possible? 255 */ 256 #define sb_notify(sb) (((sb)->sb_flags & (SB_WAIT | SB_SEL | SB_ASYNC | \ 257 SB_UPCALL | SB_AIO | SB_KNOTE)) != 0) 258 259 /* do we have to send all at once on a socket? */ 260 #define sosendallatonce(so) \ 261 ((so)->so_proto->pr_flags & PR_ATOMIC) 262 263 /* can we read something from so? */ 264 #define soreadabledata(so) \ 265 (sbavail(&(so)->so_rcv) >= (so)->so_rcv.sb_lowat || (so)->so_error) 266 #define soreadable(so) \ 267 (soreadabledata(so) || ((so)->so_rcv.sb_state & SBS_CANTRCVMORE)) 268 269 /* can we write something to so? */ 270 #define sowriteable(so) \ 271 ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat && \ 272 (((so)->so_state&SS_ISCONNECTED) || \ 273 ((so)->so_proto->pr_flags&PR_CONNREQUIRED)==0)) || \ 274 ((so)->so_snd.sb_state & SBS_CANTSENDMORE) || \ 275 (so)->so_error) 276 277 /* 278 * soref()/sorele() ref-count the socket structure. 279 * soref() may be called without owning socket lock, but in that case a 280 * caller must own something that holds socket, and so_count must be not 0. 281 * Note that you must still explicitly close the socket, but the last ref 282 * count will free the structure. 283 */ 284 #define soref(so) refcount_acquire(&(so)->so_count) 285 #define sorele(so) do { \ 286 SOCK_LOCK_ASSERT(so); \ 287 if (refcount_release(&(so)->so_count)) \ 288 sofree(so); \ 289 else \ 290 SOCK_UNLOCK(so); \ 291 } while (0) 292 293 /* 294 * In sorwakeup() and sowwakeup(), acquire the socket buffer lock to 295 * avoid a non-atomic test-and-wakeup. However, sowakeup is 296 * responsible for releasing the lock if it is called. We unlock only 297 * if we don't call into sowakeup. If any code is introduced that 298 * directly invokes the underlying sowakeup() primitives, it must 299 * maintain the same semantics. 300 */ 301 #define sorwakeup_locked(so) do { \ 302 SOCKBUF_LOCK_ASSERT(&(so)->so_rcv); \ 303 if (sb_notify(&(so)->so_rcv)) \ 304 sowakeup((so), &(so)->so_rcv); \ 305 else \ 306 SOCKBUF_UNLOCK(&(so)->so_rcv); \ 307 } while (0) 308 309 #define sorwakeup(so) do { \ 310 SOCKBUF_LOCK(&(so)->so_rcv); \ 311 sorwakeup_locked(so); \ 312 } while (0) 313 314 #define sowwakeup_locked(so) do { \ 315 SOCKBUF_LOCK_ASSERT(&(so)->so_snd); \ 316 if (sb_notify(&(so)->so_snd)) \ 317 sowakeup((so), &(so)->so_snd); \ 318 else \ 319 SOCKBUF_UNLOCK(&(so)->so_snd); \ 320 } while (0) 321 322 #define sowwakeup(so) do { \ 323 SOCKBUF_LOCK(&(so)->so_snd); \ 324 sowwakeup_locked(so); \ 325 } while (0) 326 327 struct accept_filter { 328 char accf_name[16]; 329 int (*accf_callback) 330 (struct socket *so, void *arg, int waitflag); 331 void * (*accf_create) 332 (struct socket *so, char *arg); 333 void (*accf_destroy) 334 (struct socket *so); 335 SLIST_ENTRY(accept_filter) accf_next; 336 }; 337 338 #define ACCEPT_FILTER_DEFINE(modname, filtname, cb, create, destroy, ver) \ 339 static struct accept_filter modname##_filter = { \ 340 .accf_name = filtname, \ 341 .accf_callback = cb, \ 342 .accf_create = create, \ 343 .accf_destroy = destroy, \ 344 }; \ 345 static moduledata_t modname##_mod = { \ 346 .name = __XSTRING(modname), \ 347 .evhand = accept_filt_generic_mod_event, \ 348 .priv = &modname##_filter, \ 349 }; \ 350 DECLARE_MODULE(modname, modname##_mod, SI_SUB_DRIVERS, \ 351 SI_ORDER_MIDDLE); \ 352 MODULE_VERSION(modname, ver) 353 354 #ifdef MALLOC_DECLARE 355 MALLOC_DECLARE(M_ACCF); 356 MALLOC_DECLARE(M_PCB); 357 MALLOC_DECLARE(M_SONAME); 358 #endif 359 360 /* 361 * Socket specific helper hook point identifiers 362 * Do not leave holes in the sequence, hook registration is a loop. 363 */ 364 #define HHOOK_SOCKET_OPT 0 365 #define HHOOK_SOCKET_CREATE 1 366 #define HHOOK_SOCKET_RCV 2 367 #define HHOOK_SOCKET_SND 3 368 #define HHOOK_FILT_SOREAD 4 369 #define HHOOK_FILT_SOWRITE 5 370 #define HHOOK_SOCKET_CLOSE 6 371 #define HHOOK_SOCKET_LAST HHOOK_SOCKET_CLOSE 372 373 struct socket_hhook_data { 374 struct socket *so; 375 struct mbuf *m; 376 void *hctx; /* hook point specific data*/ 377 int status; 378 }; 379 380 extern int maxsockets; 381 extern u_long sb_max; 382 extern so_gen_t so_gencnt; 383 384 struct file; 385 struct filecaps; 386 struct filedesc; 387 struct mbuf; 388 struct sockaddr; 389 struct ucred; 390 struct uio; 391 392 /* 'which' values for socket upcalls. */ 393 #define SO_RCV 1 394 #define SO_SND 2 395 396 /* Return values for socket upcalls. */ 397 #define SU_OK 0 398 #define SU_ISCONNECTED 1 399 400 /* 401 * From uipc_socket and friends 402 */ 403 int getsockaddr(struct sockaddr **namp, const struct sockaddr *uaddr, 404 size_t len); 405 int getsock_cap(struct thread *td, int fd, cap_rights_t *rightsp, 406 struct file **fpp, u_int *fflagp, struct filecaps *havecaps); 407 void soabort(struct socket *so); 408 int soaccept(struct socket *so, struct sockaddr **nam); 409 void soaio_enqueue(struct task *task); 410 void soaio_rcv(void *context, int pending); 411 void soaio_snd(void *context, int pending); 412 int socheckuid(struct socket *so, uid_t uid); 413 int sobind(struct socket *so, struct sockaddr *nam, struct thread *td); 414 int sobindat(int fd, struct socket *so, struct sockaddr *nam, 415 struct thread *td); 416 int soclose(struct socket *so); 417 int soconnect(struct socket *so, struct sockaddr *nam, struct thread *td); 418 int soconnectat(int fd, struct socket *so, struct sockaddr *nam, 419 struct thread *td); 420 int soconnect2(struct socket *so1, struct socket *so2); 421 int socreate(int dom, struct socket **aso, int type, int proto, 422 struct ucred *cred, struct thread *td); 423 int sodisconnect(struct socket *so); 424 void sodtor_set(struct socket *, so_dtor_t *); 425 struct sockaddr *sodupsockaddr(const struct sockaddr *sa, int mflags); 426 void sofree(struct socket *so); 427 void sohasoutofband(struct socket *so); 428 int solisten(struct socket *so, int backlog, struct thread *td); 429 void solisten_proto(struct socket *so, int backlog); 430 int solisten_proto_check(struct socket *so); 431 int solisten_dequeue(struct socket *, struct socket **, int); 432 struct socket * 433 sonewconn(struct socket *head, int connstatus); 434 struct socket * 435 sopeeloff(struct socket *); 436 int sopoll(struct socket *so, int events, struct ucred *active_cred, 437 struct thread *td); 438 int sopoll_generic(struct socket *so, int events, 439 struct ucred *active_cred, struct thread *td); 440 int soreceive(struct socket *so, struct sockaddr **paddr, struct uio *uio, 441 struct mbuf **mp0, struct mbuf **controlp, int *flagsp); 442 int soreceive_stream(struct socket *so, struct sockaddr **paddr, 443 struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, 444 int *flagsp); 445 int soreceive_dgram(struct socket *so, struct sockaddr **paddr, 446 struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, 447 int *flagsp); 448 int soreceive_generic(struct socket *so, struct sockaddr **paddr, 449 struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, 450 int *flagsp); 451 int soreserve(struct socket *so, u_long sndcc, u_long rcvcc); 452 void sorflush(struct socket *so); 453 int sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, 454 struct mbuf *top, struct mbuf *control, int flags, 455 struct thread *td); 456 int sosend_dgram(struct socket *so, struct sockaddr *addr, 457 struct uio *uio, struct mbuf *top, struct mbuf *control, 458 int flags, struct thread *td); 459 int sosend_generic(struct socket *so, struct sockaddr *addr, 460 struct uio *uio, struct mbuf *top, struct mbuf *control, 461 int flags, struct thread *td); 462 int soshutdown(struct socket *so, int how); 463 void soupcall_clear(struct socket *, int); 464 void soupcall_set(struct socket *, int, so_upcall_t, void *); 465 void solisten_upcall_set(struct socket *, so_upcall_t, void *); 466 void sowakeup(struct socket *so, struct sockbuf *sb); 467 void sowakeup_aio(struct socket *so, struct sockbuf *sb); 468 void solisten_wakeup(struct socket *); 469 int selsocket(struct socket *so, int events, struct timeval *tv, 470 struct thread *td); 471 void soisconnected(struct socket *so); 472 void soisconnecting(struct socket *so); 473 void soisdisconnected(struct socket *so); 474 void soisdisconnecting(struct socket *so); 475 void socantrcvmore(struct socket *so); 476 void socantrcvmore_locked(struct socket *so); 477 void socantsendmore(struct socket *so); 478 void socantsendmore_locked(struct socket *so); 479 480 /* 481 * Accept filter functions (duh). 482 */ 483 int accept_filt_add(struct accept_filter *filt); 484 int accept_filt_del(char *name); 485 struct accept_filter *accept_filt_get(char *name); 486 #ifdef ACCEPT_FILTER_MOD 487 #ifdef SYSCTL_DECL 488 SYSCTL_DECL(_net_inet_accf); 489 #endif 490 int accept_filt_generic_mod_event(module_t mod, int event, void *data); 491 #endif 492 493 #endif /* _KERNEL */ 494 495 /* 496 * Structure to export socket from kernel to utilities, via sysctl(3). 497 */ 498 struct xsocket { 499 ksize_t xso_len; /* length of this structure */ 500 kvaddr_t xso_so; /* kernel address of struct socket */ 501 kvaddr_t so_pcb; /* kernel address of struct inpcb */ 502 uint64_t so_oobmark; 503 int64_t so_spare64[8]; 504 int32_t xso_protocol; 505 int32_t xso_family; 506 uint32_t so_qlen; 507 uint32_t so_incqlen; 508 uint32_t so_qlimit; 509 pid_t so_pgid; 510 uid_t so_uid; 511 int32_t so_spare32[8]; 512 int16_t so_type; 513 int16_t so_options; 514 int16_t so_linger; 515 int16_t so_state; 516 int16_t so_timeo; 517 uint16_t so_error; 518 struct xsockbuf { 519 uint32_t sb_cc; 520 uint32_t sb_hiwat; 521 uint32_t sb_mbcnt; 522 uint32_t sb_mcnt; 523 uint32_t sb_ccnt; 524 uint32_t sb_mbmax; 525 int32_t sb_lowat; 526 int32_t sb_timeo; 527 int16_t sb_flags; 528 } so_rcv, so_snd; 529 }; 530 531 #ifdef _KERNEL 532 void sotoxsocket(struct socket *so, struct xsocket *xso); 533 void sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb); 534 #endif 535 536 /* 537 * Socket buffer state bits. Exported via libprocstat(3). 538 */ 539 #define SBS_CANTSENDMORE 0x0010 /* can't send more data to peer */ 540 #define SBS_CANTRCVMORE 0x0020 /* can't receive more data from peer */ 541 #define SBS_RCVATMARK 0x0040 /* at mark on input */ 542 543 #endif /* !_SYS_SOCKETVAR_H_ */ 544