xref: /freebsd/share/man/man9/socket.9 (revision e0c4386e)
1.\"-
2.\" Copyright (c) 2006 Robert N. M. Watson
3.\" Copyright (c) 2014 Benjamin J. Kaduk
4.\" All rights reserved.
5.\"
6.\" Redistribution and use in source and binary forms, with or without
7.\" modification, are permitted provided that the following conditions
8.\" are met:
9.\" 1. Redistributions of source code must retain the above copyright
10.\"    notice, this list of conditions and the following disclaimer.
11.\" 2. Redistributions in binary form must reproduce the above copyright
12.\"    notice, this list of conditions and the following disclaimer in the
13.\"    documentation and/or other materials provided with the distribution.
14.\"
15.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25.\" SUCH DAMAGE.
26.\"
27.Dd September 6, 2022
28.Dt SOCKET 9
29.Os
30.Sh NAME
31.Nm socket
32.Nd "kernel socket interface"
33.Sh SYNOPSIS
34.In sys/socket.h
35.In sys/socketvar.h
36.Ft void
37.Fn soabort "struct socket *so"
38.Ft int
39.Fn soaccept "struct socket *so" "struct sockaddr *nam"
40.Ft int
41.Fn socheckuid "struct socket *so" "uid_t uid"
42.Ft int
43.Fn sobind "struct socket *so" "struct sockaddr *nam" "struct thread *td"
44.Ft void
45.Fn soclose "struct socket *so"
46.Ft int
47.Fn soconnect "struct socket *so" "struct sockaddr *nam" "struct thread *td"
48.Ft int
49.Fo socreate
50.Fa "int dom" "struct socket **aso" "int type" "int proto"
51.Fa "struct ucred *cred" "struct thread *td"
52.Fc
53.Ft int
54.Fn sodisconnect "struct socket *so"
55.Ft void
56.Fo sodtor_set
57.Fa "struct socket *so"
58.Fa "void (*func)(struct socket *)"
59.Fc
60.Ft struct  sockaddr *
61.Fn sodupsockaddr "const struct sockaddr *sa" "int mflags"
62.Ft void
63.Fn sofree "struct socket *so"
64.Ft void
65.Fn sohasoutofband "struct socket *so"
66.Ft int
67.Fn solisten "struct socket *so" "int backlog" "struct thread *td"
68.Ft void
69.Fn solisten_proto "struct socket *so" "int backlog"
70.Ft int
71.Fn solisten_proto_check "struct socket *so"
72.Ft struct socket *
73.Fn sonewconn "struct socket *head" "int connstatus"
74.Ft int
75.Fo sopoll
76.Fa "struct socket *so" "int events" "struct ucred *active_cred"
77.Fa "struct thread *td"
78.Fc
79.Ft int
80.Fo sopoll_generic
81.Fa "struct socket *so" "int events" "struct ucred *active_cred"
82.Fa "struct thread *td"
83.Fc
84.Ft int
85.Fo soreceive
86.Fa "struct socket *so" "struct sockaddr **psa" "struct uio *uio"
87.Fa "struct mbuf **mp0" "struct mbuf **controlp" "int *flagsp"
88.Fc
89.Ft int
90.Fo soreceive_stream
91.Fa "struct socket *so" "struct sockaddr **paddr"
92.Fa "struct uio *uio" "struct mbuf **mp0" "struct mbuf **controlp"
93.Fa "int *flagsp"
94.Fc
95.Ft int
96.Fo soreceive_dgram
97.Fa "struct socket *so" "struct sockaddr **paddr"
98.Fa "struct uio *uio" "struct mbuf **mp0" "struct mbuf **controlp"
99.Fa "int *flagsp"
100.Fc
101.Ft int
102.Fo soreceive_generic
103.Fa "struct socket *so" "struct sockaddr **paddr"
104.Fa "struct uio *uio" "struct mbuf **mp0" "struct mbuf **controlp"
105.Fa "int *flagsp"
106.Fc
107.Ft int
108.Fn soreserve "struct socket *so" "u_long sndcc" "u_long rcvcc"
109.Ft void
110.Fn sorflush "struct socket *so"
111.Ft int
112.Fo sosend
113.Fa "struct socket *so" "struct sockaddr *addr" "struct uio *uio"
114.Fa "struct mbuf *top" "struct mbuf *control" "int flags" "struct thread *td"
115.Fc
116.Ft int
117.Fo sosend_dgram
118.Fa "struct socket *so" "struct sockaddr *addr"
119.Fa "struct uio *uio" "struct mbuf *top" "struct mbuf *control"
120.Fa "int flags" "struct thread *td"
121.Fc
122.Ft int
123.Fo sosend_generic
124.Fa "struct socket *so" "struct sockaddr *addr"
125.Fa "struct uio *uio" "struct mbuf *top" "struct mbuf *control"
126.Fa "int flags" "struct thread *td"
127.Fc
128.Ft int
129.Fn soshutdown "struct socket *so" "int how"
130.Ft void
131.Fn sotoxsocket "struct socket *so" "struct xsocket *xso"
132.Ft void
133.Fn soupcall_clear "struct socket *so" "int which"
134.Ft void
135.Fo soupcall_set
136.Fa "struct socket *so" "int which"
137.Fa "int (*func)(struct socket *, void *, int)" "void *arg"
138.Fc
139.Ft void
140.Fn sowakeup "struct socket *so" "struct sockbuf *sb"
141.In sys/sockopt.h
142.Ft int
143.Fn sosetopt "struct socket *so" "struct sockopt *sopt"
144.Ft int
145.Fn sogetopt "struct socket *so" "struct sockopt *sopt"
146.Ft int
147.Fn sooptcopyin "struct sockopt *sopt" "void *buf" "size_t len" "size_t minlen"
148.Ft int
149.Fn sooptcopyout "struct sockopt *sopt" "const void *buf" "size_t len"
150.Sh DESCRIPTION
151The kernel
152.Nm
153programming interface permits in-kernel consumers to interact with
154local and network socket objects in a manner similar to that permitted using
155the
156.Xr socket 2
157user API.
158These interfaces are appropriate for use by distributed file systems and
159other network-aware kernel services.
160While the user API operates on file descriptors, the kernel interfaces
161operate directly on
162.Vt "struct socket"
163pointers.
164Some portions of the kernel API exist only to implement the user API,
165and are not expected to be used by kernel code.
166The portions of the socket API used by socket consumers and
167implementations of network protocols will differ; some routines
168are only useful for protocol implementors.
169.Pp
170Except where otherwise indicated,
171.Nm
172functions may sleep, and are not appropriate for use in an interrupt thread
173context or while holding non-sleepable kernel locks.
174.Ss Creating and Destroying Sockets
175A new socket may be created using
176.Fn socreate .
177As with
178.Xr socket 2 ,
179arguments specify the requested domain, type, and protocol via
180.Fa dom , type ,
181and
182.Fa proto .
183The socket is returned via
184.Fa aso
185on success.
186In addition, the credential used to authorize operations associated with the
187socket will be passed via
188.Fa cred
189(and will be cached for the lifetime of the socket), and the thread
190performing the operation via
191.Fa td .
192.Em Warning :
193authorization of the socket creation operation will be performed
194using the thread credential for some protocols (such as raw sockets).
195.Pp
196Sockets may be closed and freed using
197.Fn soclose ,
198which has similar semantics to
199.Xr close 2 .
200.Pp
201In certain circumstances, it is appropriate to destroy a socket without
202waiting for it to disconnect, for which
203.Fn soabort
204is used.
205This is only appropriate for incoming connections which are in a
206partially connected state.
207It must be called on an unreferenced socket, by the thread which
208removed the socket from its listen queue, to prevent races.
209It will call into protocol code, so no socket locks may be held
210over the call.
211The caller of
212.Fn soabort
213is responsible for setting the VNET context.
214The normal path to freeing a socket is
215.Fn sofree ,
216which handles reference counting on the socket.
217It should be called whenever a reference is released, and also whenever
218reference flags are cleared in socket or protocol code.
219Calls to
220.Fn sofree
221should not be made from outside the socket layer; outside callers
222should use
223.Fn soclose
224instead.
225.Ss Connections and Addresses
226The
227.Fn sobind
228function is equivalent to the
229.Xr bind 2
230system call, and binds the socket
231.Fa so
232to the address
233.Fa nam .
234The operation would be authorized using the credential on thread
235.Fa td .
236.Pp
237The
238.Fn soconnect
239function is equivalent to the
240.Xr connect 2
241system call, and initiates a connection on the socket
242.Fa so
243to the address
244.Fa nam .
245The operation will be authorized using the credential on thread
246.Fa td .
247Unlike the user system call,
248.Fn soconnect
249returns immediately; the caller may
250.Xr msleep 9
251on
252.Fa so->so_timeo
253while holding the socket mutex and waiting for the
254.Dv SS_ISCONNECTING
255flag to clear or
256.Fa so->so_error
257to become non-zero.
258If
259.Fn soconnect
260fails, the caller must manually clear the
261.Dv SS_ISCONNECTING
262flag.
263.Pp
264A call to
265.Fn sodisconnect
266disconnects the socket without closing it.
267.Pp
268The
269.Fn soshutdown
270function is equivalent to the
271.Xr shutdown 2
272system call, and causes part or all of a connection on a socket to be closed
273down.
274.Pp
275Sockets are transitioned from non-listening status to listening with
276.Fn solisten .
277.Ss Socket Options
278The
279.Fn sogetopt
280function is equivalent to the
281.Xr getsockopt 2
282system call, and retrieves a socket option on socket
283.Fa so .
284The
285.Fn sosetopt
286function is equivalent to the
287.Xr setsockopt 2
288system call, and sets a socket option on socket
289.Fa so .
290.Pp
291The second argument in both
292.Fn sogetopt
293and
294.Fn sosetopt
295is the
296.Fa sopt
297pointer to a
298.Vt "struct sopt"
299describing the socket option operation.
300The caller-allocated structure must be zeroed, and then have its fields
301initialized to specify socket option operation arguments:
302.Bl -tag -width ".Va sopt_valsize"
303.It Va sopt_dir
304Set to
305.Dv SOPT_SET
306or
307.Dv SOPT_GET
308depending on whether this is a get or set operation.
309.It Va sopt_level
310Specify the level in the network stack the operation is targeted at; for
311example,
312.Dv SOL_SOCKET .
313.It Va sopt_name
314Specify the name of the socket option to set.
315.It Va sopt_val
316Kernel space pointer to the argument value for the socket option.
317.It Va sopt_valsize
318Size of the argument value in bytes.
319.El
320.Ss Socket Upcalls
321In order for the owner of a socket to be notified when the socket
322is ready to send or receive data, an upcall may be registered on
323the socket.
324The upcall is a function that will be called by the socket framework
325when a socket buffer associated with the given socket is ready for
326reading or writing.
327.Fn soupcall_set
328is used to register a socket upcall.
329The function
330.Va func
331is registered, and the pointer
332.Va arg
333will be passed as its second argument when it is called by the framework.
334The possible values for
335.Va which
336are
337.Dv SO_RCV
338and
339.Dv SO_SND ,
340which register upcalls for receive and send events, respectively.
341The upcall function
342.Fn func
343must return either
344.Dv SU_OK
345or
346.Dv SU_ISCONNECTED ,
347depending on whether or not a call to
348.Xr soisconnected
349should be made by the socket framework after the upcall returns.
350The upcall
351.Va func
352cannot call
353.Xr soisconnected
354itself due to lock ordering with the socket buffer lock.
355Only
356.Dv SO_RCV
357upcalls should return
358.Dv SU_ISCONNECTED .
359When a
360.Dv SO_RCV
361upcall returns
362.Dv SU_ISCONNECTED ,
363the upcall will be removed from the socket.
364.Pp
365Upcalls are removed from their socket by
366.Fn soupcall_clear .
367The
368.Va which
369argument again specifies whether the sending or receiving upcall is to
370be cleared, with
371.Dv SO_RCV
372or
373.Dv SO_SND .
374.Ss Socket Destructor Callback
375A kernel system can use the
376.Fn sodtor_set
377function to set a destructor for a socket.
378The destructor is called when the socket is about to be freed.
379The destructor is called before the protocol detach routine.
380The destructor can serve as a callback to initiate additional cleanup actions.
381.Ss Socket I/O
382The
383.Fn soreceive
384function is equivalent to the
385.Xr recvmsg 2
386system call, and attempts to receive bytes of data from the socket
387.Fa so ,
388optionally blocking awaiting for data if none is ready to read.
389Data may be retrieved directly to kernel or user memory via the
390.Fa uio
391argument, or as an mbuf chain returned to the caller via
392.Fa mp0 ,
393avoiding a data copy.
394The
395.Fa uio
396must always be
397.Pf non- Dv NULL .
398If
399.Fa mp0
400is
401.Pf non- Dv NULL ,
402only the
403.Fa uio_resid
404of
405.Fa uio
406is used.
407The caller may optionally retrieve a socket address on a protocol with the
408.Dv PR_ADDR
409capability by providing storage via
410.Pf non- Dv NULL
411.Fa psa
412argument.
413The caller may optionally retrieve control data mbufs via a
414.Pf non- Dv NULL
415.Fa controlp
416argument.
417Optional flags may be passed to
418.Fn soreceive
419via a
420.Pf non- Dv NULL
421.Fa flagsp
422argument, and use the same flag name space as the
423.Xr recvmsg 2
424system call.
425.Pp
426The
427.Fn sosend
428function is equivalent to the
429.Xr sendmsg 2
430system call, and attempts to send bytes of data via the socket
431.Fa so ,
432optionally blocking if data cannot be immediately sent.
433Data may be sent directly from kernel or user memory via the
434.Fa uio
435argument, or as an mbuf chain via
436.Fa top ,
437avoiding a data copy.
438Only one of the
439.Fa uio
440or
441.Fa top
442pointers may be
443.Pf non- Dv NULL .
444An optional destination address may be specified via a
445.Pf non- Dv NULL
446.Fa addr
447argument, which may result in an implicit connect if supported by the
448protocol.
449The caller may optionally send control data mbufs via a
450.Pf non- Dv NULL
451.Fa control
452argument.
453Flags may be passed to
454.Fn sosend
455using the
456.Fa flags
457argument, and use the same flag name space as the
458.Xr sendmsg 2
459system call.
460.Pp
461Kernel callers running in an interrupt thread context, or with a mutex held,
462will wish to use non-blocking sockets and pass the
463.Dv MSG_DONTWAIT
464flag in order to prevent these functions from sleeping.
465.Pp
466A socket can be queried for readability, writability, out-of-band data,
467or end-of-file using
468.Fn sopoll .
469The possible values for
470.Va events
471are as for
472.Xr poll 2 ,
473with symbolic values
474.Dv POLLIN ,
475.Dv POLLPRI ,
476.Dv POLLOUT ,
477.Dv POLLRDNORM ,
478.Dv POLLWRNORM ,
479.Dv POLLRDBAND ,
480and
481.Dv POLLINGEOF
482taken from
483.In sys/poll.h .
484.Pp
485Calls to
486.Fn soaccept
487pass through to the protocol's accept routine to accept an incoming connection.
488.Ss Socket Utility Functions
489The uid of a socket's credential may be compared against a
490.Va uid
491with
492.Fn socheckuid .
493.Pp
494A copy of an existing
495.Vt struct sockaddr
496may be made using
497.Fn sodupsockaddr .
498.Pp
499Protocol implementations notify the socket layer of the arrival of
500out-of-band data using
501.Fn sohasoutofband ,
502so that the socket layer can notify socket consumers of the available data.
503.Pp
504An
505.Dq external-format
506version of a
507.Vt struct socket
508can be created using
509.Fn sotoxsocket ,
510suitable for isolating user code from changes in the kernel structure.
511.Ss Protocol Implementations
512Protocols must supply an implementation for
513.Fn solisten ;
514such protocol implementations can call back into the socket layer using
515.Fn solisten_proto_check
516and
517.Fn solisten_proto
518to check and set the socket-layer listen state.
519These callbacks are provided so that the protocol implementation
520can order the socket layer and protocol locks as necessary.
521Protocols must supply an implementation of
522.Fn soreceive ;
523the functions
524.Fn soreceive_stream ,
525.Fn soreceive_dgram ,
526and
527.Fn soreceive_generic
528are supplied for use by such implementations.
529.Pp
530Protocol implementations can use
531.Fn sonewconn
532to create a socket and attach protocol state to that socket.
533This can be used to create new sockets available for
534.Fn soaccept
535on a listen socket.
536The returned socket has a reference count of zero.
537.Pp
538Protocols must supply an implementation for
539.Fn sopoll ;
540.Fn sopoll_generic
541is provided for the use by protocol implementations.
542.Pp
543The functions
544.Fn sosend_dgram
545and
546.Fn sosend_generic
547are supplied to assist in protocol implementations of
548.Fn sosend .
549.Pp
550When a protocol creates a new socket structure, it is necessary to
551reserve socket buffer space for that socket, by calling
552.Fn soreserve .
553The rough inverse of this reservation is performed by
554.Fn sorflush ,
555which is called automatically by the socket framework.
556.Pp
557When a protocol needs to wake up threads waiting for the socket to
558become ready to read or write, variants of
559.Fn sowakeup
560are used.
561The
562.Fn sowakeup
563function should not be called directly by protocol code, instead use the
564wrappers
565.Fn sorwakeup ,
566.Fn sorwakeup_locked ,
567.Fn sowwakeup ,
568and
569.Fn sowwakeup_locked
570for readers and writers, with the corresponding socket buffer lock
571not already locked, or already held, respectively.
572.Pp
573The functions
574.Fn sooptcopyin
575and
576.Fn sooptcopyout
577are useful for transferring
578.Vt struct sockopt
579data between user and kernel code.
580.Sh SEE ALSO
581.Xr bind 2 ,
582.Xr close 2 ,
583.Xr connect 2 ,
584.Xr getsockopt 2 ,
585.Xr recv 2 ,
586.Xr send 2 ,
587.Xr setsockopt 2 ,
588.Xr shutdown 2 ,
589.Xr socket 2 ,
590.Xr ng_ksocket 4 ,
591.Xr intr_event 9 ,
592.Xr msleep 9 ,
593.Xr ucred 9
594.Sh HISTORY
595The
596.Xr socket 2
597system call appeared in
598.Bx 4.2 .
599This manual page was introduced in
600.Fx 7.0 .
601.Sh AUTHORS
602This manual page was written by
603.An Robert Watson
604and
605.An Benjamin Kaduk .
606.Sh BUGS
607The use of explicitly passed credentials, credentials hung from explicitly
608passed threads, the credential on
609.Dv curthread ,
610and the cached credential from
611socket creation time is inconsistent, and may lead to unexpected behaviour.
612It is possible that several of the
613.Fa td
614arguments should be
615.Fa cred
616arguments, or simply not be present at all.
617.Pp
618The caller may need to manually clear
619.Dv SS_ISCONNECTING
620if
621.Fn soconnect
622returns an error.
623.Pp
624The
625.Dv MSG_DONTWAIT
626flag is not implemented for
627.Fn sosend ,
628and may not always work with
629.Fn soreceive
630when zero copy sockets are enabled.
631.Pp
632This manual page does not describe how to register socket upcalls or monitor
633a socket for readability/writability without using blocking I/O.
634.Pp
635The
636.Fn soref
637and
638.Fn sorele
639functions are not described, and in most cases should not be used, due to
640confusing and potentially incorrect interactions when
641.Fn sorele
642is last called after
643.Fn soclose .
644