xref: /illumos-gate/usr/src/uts/common/fs/sockfs/socktpi.h (revision 15f90b02)
10f1702c5SYu Xiangning /*
20f1702c5SYu Xiangning  * CDDL HEADER START
30f1702c5SYu Xiangning  *
40f1702c5SYu Xiangning  * The contents of this file are subject to the terms of the
50f1702c5SYu Xiangning  * Common Development and Distribution License (the "License").
60f1702c5SYu Xiangning  * You may not use this file except in compliance with the License.
70f1702c5SYu Xiangning  *
80f1702c5SYu Xiangning  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90f1702c5SYu Xiangning  * or http://www.opensolaris.org/os/licensing.
100f1702c5SYu Xiangning  * See the License for the specific language governing permissions
110f1702c5SYu Xiangning  * and limitations under the License.
120f1702c5SYu Xiangning  *
130f1702c5SYu Xiangning  * When distributing Covered Code, include this CDDL HEADER in each
140f1702c5SYu Xiangning  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150f1702c5SYu Xiangning  * If applicable, add the following below this CDDL HEADER, with the
160f1702c5SYu Xiangning  * fields enclosed by brackets "[]" replaced with your own identifying
170f1702c5SYu Xiangning  * information: Portions Copyright [yyyy] [name of copyright owner]
180f1702c5SYu Xiangning  *
190f1702c5SYu Xiangning  * CDDL HEADER END
200f1702c5SYu Xiangning  */
210f1702c5SYu Xiangning 
220f1702c5SYu Xiangning /*
233e95bd4aSAnders Persson  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24*15f90b02SGarrett D'Amore  * Copyright 2022 Garrett D'Amore
250f1702c5SYu Xiangning  */
260f1702c5SYu Xiangning 
270f1702c5SYu Xiangning #ifndef _SOCKFS_SOCKTPI_H
280f1702c5SYu Xiangning #define	_SOCKFS_SOCKTPI_H
290f1702c5SYu Xiangning 
300f1702c5SYu Xiangning #ifdef	__cplusplus
310f1702c5SYu Xiangning extern "C" {
320f1702c5SYu Xiangning #endif
330f1702c5SYu Xiangning 
340f1702c5SYu Xiangning /*
350f1702c5SYu Xiangning  * Internal representation used for addresses.
360f1702c5SYu Xiangning  */
370f1702c5SYu Xiangning struct soaddr {
380f1702c5SYu Xiangning 	struct sockaddr	*soa_sa;	/* Actual address */
390f1702c5SYu Xiangning 	t_uscalar_t	soa_len;	/* Length in bytes for kmem_free */
400f1702c5SYu Xiangning 	t_uscalar_t	soa_maxlen;	/* Allocated length */
410f1702c5SYu Xiangning };
420f1702c5SYu Xiangning /* Maximum size address for transports that have ADDR_size == 1 */
430f1702c5SYu Xiangning #define	SOA_DEFSIZE	128
440f1702c5SYu Xiangning 
450f1702c5SYu Xiangning struct sonode;
460f1702c5SYu Xiangning 
470f1702c5SYu Xiangning /*
480f1702c5SYu Xiangning  * TPI Sockets
490f1702c5SYu Xiangning  * ======================
500f1702c5SYu Xiangning  *
510f1702c5SYu Xiangning  * A TPI socket can be created by the TPI socket module, or as a
520f1702c5SYu Xiangning  * result of fallback. In either case, the TPI related information is
530f1702c5SYu Xiangning  * stored in a sotpi_info_t. Sockets that are TPI based from the
540f1702c5SYu Xiangning  * beginning will use a sotpi_sonode_t, but fallback case the
550f1702c5SYu Xiangning  * sotpi_info_t will be allocated when needed. However, the so_priv
560f1702c5SYu Xiangning  * field in the sonode will always point to the sotpi_info_t, and the
570f1702c5SYu Xiangning  * structure should only be accessed via so_priv. Use SOTOTPI().
580f1702c5SYu Xiangning  *
590f1702c5SYu Xiangning  * A TPI socket always corresponds to a VCHR stream representing the
600f1702c5SYu Xiangning  * transport provider (e.g. /dev/tcp). This information is retrieved
610f1702c5SYu Xiangning  * from the kernel socket configuration table and accessible via
620f1702c5SYu Xiangning  * so_sockparams->sp_sdev_info.  sockfs uses this to perform
630f1702c5SYu Xiangning  * VOP_ACCESS checks before allowing an open of the transport
640f1702c5SYu Xiangning  * provider.
650f1702c5SYu Xiangning  *
660f1702c5SYu Xiangning  * AF_UNIX Sockets
670f1702c5SYu Xiangning  * -------------------------
680f1702c5SYu Xiangning  *
690f1702c5SYu Xiangning  * When an AF_UNIX socket is bound to a pathname the sockfs creates a
700f1702c5SYu Xiangning  * VSOCK vnode in the underlying file system. However, the vnodeops
710f1702c5SYu Xiangning  * etc in this VNODE remain those of the underlying file system.
720f1702c5SYu Xiangning  * Sockfs uses the v_stream pointer in the underlying file system
730f1702c5SYu Xiangning  * VSOCK node to find the sonode bound to the pathname. The bound
740f1702c5SYu Xiangning  * pathname vnode is accessed through sti_ux_vp.
750f1702c5SYu Xiangning  *
760f1702c5SYu Xiangning  * Out of Band Data Handling
770f1702c5SYu Xiangning  * -------------------------
780f1702c5SYu Xiangning  *
790f1702c5SYu Xiangning  * The counts (sti_oobcnt and sti_oobsigcnt) track the number of
800f1702c5SYu Xiangning  * urgent indicates that are (logically) queued on the stream head
810f1702c5SYu Xiangning  * read queue. The urgent data is queued on the stream head
820f1702c5SYu Xiangning  * as follows.
830f1702c5SYu Xiangning  *
840f1702c5SYu Xiangning  * In the normal case the SIGURG is not generated until
850f1702c5SYu Xiangning  * the T_EXDATA_IND arrives at the stream head. However, transports
860f1702c5SYu Xiangning  * that have an early indication that urgent data is pending
870f1702c5SYu Xiangning  * (e.g. TCP receiving a "new" urgent pointer value) can send up
880f1702c5SYu Xiangning  * an M_PCPROTO/SIGURG message to generate the signal early.
890f1702c5SYu Xiangning  *
900f1702c5SYu Xiangning  * The mark is indicated by either:
910f1702c5SYu Xiangning  *  - a T_EXDATA_IND (with no M_DATA b_cont) with MSGMARK set.
920f1702c5SYu Xiangning  *    When this message is consumed by sorecvmsg the socket layer
930f1702c5SYu Xiangning  *    sets SS_RCVATMARK until data has been consumed past the mark.
940f1702c5SYu Xiangning  *  - a message with MSGMARKNEXT set (indicating that the
950f1702c5SYu Xiangning  *    first byte of the next message constitutes the mark). When
960f1702c5SYu Xiangning  *    the last byte of the MSGMARKNEXT message is consumed in
970f1702c5SYu Xiangning  *    the stream head the stream head sets STRATMARK. This flag
980f1702c5SYu Xiangning  *    is cleared when at least one byte is read. (Note that
990f1702c5SYu Xiangning  *    the MSGMARKNEXT messages can be of zero length when there
1000f1702c5SYu Xiangning  *    is no previous data to which the marknext can be attached.)
1010f1702c5SYu Xiangning  *
1020f1702c5SYu Xiangning  * While the T_EXDATA_IND method is the common case which is used
1030f1702c5SYu Xiangning  * with all TPI transports, the MSGMARKNEXT method is needed to
1040f1702c5SYu Xiangning  * indicate the mark when e.g. the TCP urgent byte has not been
1050f1702c5SYu Xiangning  * received yet but the TCP urgent pointer has made TCP generate
1060f1702c5SYu Xiangning  * the M_PCSIG/SIGURG.
1070f1702c5SYu Xiangning  *
1080f1702c5SYu Xiangning  * The signal (the M_PCSIG carrying the SIGURG) and the mark
1090f1702c5SYu Xiangning  * indication can not be delivered as a single message, since
1100f1702c5SYu Xiangning  * the signal should be delivered as high priority and any mark
1110f1702c5SYu Xiangning  * indication must flow with the data. This implies that immediately
1120f1702c5SYu Xiangning  * when the SIGURG has been delivered if the stream head queue is
1130f1702c5SYu Xiangning  * empty it is impossible to determine if this will be the position
1140f1702c5SYu Xiangning  * of the mark. This race condition is resolved by using MSGNOTMARKNEXT
1150f1702c5SYu Xiangning  * messages and the STRNOTATMARK flag in the stream head. The
1160f1702c5SYu Xiangning  * SIOCATMARK code calls the stream head to wait for either a
1170f1702c5SYu Xiangning  * non-empty queue or one of the STR*ATMARK flags being set.
1180f1702c5SYu Xiangning  * This implies that any transport that is sending M_PCSIG(SIGURG)
1190f1702c5SYu Xiangning  * should send the appropriate MSGNOTMARKNEXT message (which can be
1200f1702c5SYu Xiangning  * zero length) after sending an M_PCSIG to prevent SIOCATMARK
1210f1702c5SYu Xiangning  * from sleeping unnecessarily.
1220f1702c5SYu Xiangning  */
1230f1702c5SYu Xiangning 
1240f1702c5SYu Xiangning #define	SOTPI_INFO_MAGIC	0x12345678
1250f1702c5SYu Xiangning 
1260f1702c5SYu Xiangning /*
1270f1702c5SYu Xiangning  * Information used by TPI/STREAMS sockets
1280f1702c5SYu Xiangning  */
1290f1702c5SYu Xiangning typedef struct sotpi_info {
1300f1702c5SYu Xiangning 	/*
1310f1702c5SYu Xiangning 	 * These fields are initialized once.
1320f1702c5SYu Xiangning 	 */
1330f1702c5SYu Xiangning 	uint32_t	sti_magic;	/* always set to SOTPI_INFO_MAGIC */
1340f1702c5SYu Xiangning 	dev_t		sti_dev;	/* device the sonode represents */
1350f1702c5SYu Xiangning 
1360f1702c5SYu Xiangning 	struct sockparams *sti_orig_sp;	/* in case of fallback; the orig sp */
1370f1702c5SYu Xiangning 
1380f1702c5SYu Xiangning 	kmutex_t	sti_plumb_lock;	/* serializes plumbs, and the related */
1390f1702c5SYu Xiangning 					/* so_pushcnt */
1400f1702c5SYu Xiangning 	short		sti_pushcnt;	/* Number of modules above "sockmod" */
1410f1702c5SYu Xiangning 
1420f1702c5SYu Xiangning 	kcondvar_t	sti_ack_cv;	/* wait for TPI acks */
1430f1702c5SYu Xiangning 
1440f1702c5SYu Xiangning 	uint8_t
1450f1702c5SYu Xiangning 		sti_laddr_valid : 1,	/* sti_laddr valid for user */
1460f1702c5SYu Xiangning 		sti_faddr_valid : 1,	/* sti_faddr valid for user */
1470f1702c5SYu Xiangning 		sti_faddr_noxlate : 1,	/* No xlation of faddr for AF_UNIX */
1480f1702c5SYu Xiangning 
1490f1702c5SYu Xiangning 		sti_direct : 1,		/* transport is directly below */
1500f1702c5SYu Xiangning 
1510f1702c5SYu Xiangning 		sti_pad_to_bit7 : 4;
1520f1702c5SYu Xiangning 
1530f1702c5SYu Xiangning 	mblk_t	*sti_ack_mp;		/* TPI ack received from below */
1540f1702c5SYu Xiangning 	mblk_t	*sti_unbind_mp;		/* Preallocated T_UNBIND_REQ message */
1550f1702c5SYu Xiangning 
1560f1702c5SYu Xiangning 	time_t  sti_atime;		/* time of last access */
1570f1702c5SYu Xiangning 	time_t  sti_mtime;		/* time of last modification */
1580f1702c5SYu Xiangning 	time_t  sti_ctime;		/* time of last attributes change */
1590f1702c5SYu Xiangning 
1600f1702c5SYu Xiangning 	ushort_t sti_delayed_error;	/* From T_uderror_ind */
1610f1702c5SYu Xiangning 	mblk_t	*sti_eaddr_mp;		/* for so_delayed_error */
1620f1702c5SYu Xiangning 					/* put here for delayed processing  */
1630f1702c5SYu Xiangning 
1640f1702c5SYu Xiangning 	mblk_t	*sti_conn_ind_head;	/* b_next list of T_CONN_IND */
1650f1702c5SYu Xiangning 	mblk_t	*sti_conn_ind_tail;
1660f1702c5SYu Xiangning 
1670f1702c5SYu Xiangning 	uint_t	sti_oobsigcnt;		/* Number of SIGURG generated */
1680f1702c5SYu Xiangning 	uint_t	sti_oobcnt;		/* Number of T_EXDATA_IND queued */
1690f1702c5SYu Xiangning 
1700f1702c5SYu Xiangning 	/* From T_info_ack */
1710f1702c5SYu Xiangning 	t_uscalar_t	sti_tsdu_size;
1720f1702c5SYu Xiangning 	t_uscalar_t	sti_etsdu_size;
1730f1702c5SYu Xiangning 	t_scalar_t	sti_addr_size;
1740f1702c5SYu Xiangning 	t_uscalar_t	sti_opt_size;
1750f1702c5SYu Xiangning 	t_uscalar_t	sti_tidu_size;
1760f1702c5SYu Xiangning 	t_scalar_t	sti_serv_type;
1770f1702c5SYu Xiangning 
1780f1702c5SYu Xiangning 	/* From T_capability_ack */
1790f1702c5SYu Xiangning 	t_uscalar_t	sti_acceptor_id;
1800f1702c5SYu Xiangning 
1810f1702c5SYu Xiangning 	/* Internal provider information */
1820f1702c5SYu Xiangning 	struct tpi_provinfo	*sti_provinfo;
1830f1702c5SYu Xiangning 
1840f1702c5SYu Xiangning 	/*
1850f1702c5SYu Xiangning 	 * The local and remote addresses have multiple purposes
1860f1702c5SYu Xiangning 	 * but one of the key reasons for their existence and careful
1870f1702c5SYu Xiangning 	 * tracking in sockfs is to support getsockname and getpeername
1880f1702c5SYu Xiangning 	 * when the transport does not handle the TI_GET*NAME ioctls
1890f1702c5SYu Xiangning 	 * and caching when it does (signalled by valid bits in so_state).
1900f1702c5SYu Xiangning 	 * When all transports support the new TPI (with T_ADDR_REQ)
1910f1702c5SYu Xiangning 	 * we can revisit this code.
1920f1702c5SYu Xiangning 	 *
1930f1702c5SYu Xiangning 	 * The other usage of sti_faddr is to keep the "connected to"
1940f1702c5SYu Xiangning 	 * address for datagram sockets.
1950f1702c5SYu Xiangning 	 *
1960f1702c5SYu Xiangning 	 * Finally, for AF_UNIX both local and remote addresses are used
1970f1702c5SYu Xiangning 	 * to record the sockaddr_un since we use a separate namespace
1980f1702c5SYu Xiangning 	 * in the loopback transport.
1990f1702c5SYu Xiangning 	 */
2000f1702c5SYu Xiangning 	struct soaddr sti_laddr;	/* Local address */
2010f1702c5SYu Xiangning 	struct soaddr sti_faddr;	/* Peer address */
2020f1702c5SYu Xiangning #define	sti_laddr_sa		sti_laddr.soa_sa
2030f1702c5SYu Xiangning #define	sti_faddr_sa		sti_faddr.soa_sa
2040f1702c5SYu Xiangning #define	sti_laddr_len		sti_laddr.soa_len
2050f1702c5SYu Xiangning #define	sti_faddr_len		sti_faddr.soa_len
2060f1702c5SYu Xiangning #define	sti_laddr_maxlen	sti_laddr.soa_maxlen
2070f1702c5SYu Xiangning #define	sti_faddr_maxlen	sti_faddr.soa_maxlen
2080f1702c5SYu Xiangning 
2090f1702c5SYu Xiangning 	/*
2100f1702c5SYu Xiangning 	 * For AF_UNIX sockets:
2110f1702c5SYu Xiangning 	 *
2120f1702c5SYu Xiangning 	 * sti_ux_laddr/faddr records the internal addresses used with the
2130f1702c5SYu Xiangning 	 * transport. sti_ux_vp and v_stream->sd_vnode form the
2140f1702c5SYu Xiangning 	 * cross-linkage between the underlying fs vnode corresponding
2150f1702c5SYu Xiangning 	 * to the bound sockaddr_un and the socket node.
216f012ee0cSGordon Ross 	 *
217f012ee0cSGordon Ross 	 * sti_ux_taddr holds the result of translations done in
218f012ee0cSGordon Ross 	 * so_ux_addr_xlate(), which may or may not be the same as
219f012ee0cSGordon Ross 	 * sti_ux_faddr (which is our connected peer address).
2200f1702c5SYu Xiangning 	 */
2210f1702c5SYu Xiangning 	struct so_ux_addr sti_ux_laddr; /* laddr bound with the transport */
222f012ee0cSGordon Ross 	struct so_ux_addr sti_ux_faddr; /* connected peer address */
223f012ee0cSGordon Ross 	struct so_ux_addr sti_ux_taddr; /* temporary address for sendmsg */
2240f1702c5SYu Xiangning 	struct vnode	*sti_ux_bound_vp; /* bound AF_UNIX file system vnode */
2250f1702c5SYu Xiangning 	struct sonode	*sti_next_so;	/* next sonode on socklist	*/
2260f1702c5SYu Xiangning 	struct sonode	*sti_prev_so;	/* previous sonode on socklist	*/
2270f1702c5SYu Xiangning 	mblk_t	*sti_discon_ind_mp;	/* T_DISCON_IND received from below */
2280f1702c5SYu Xiangning } sotpi_info_t;
2290f1702c5SYu Xiangning 
2300f1702c5SYu Xiangning struct T_capability_ack;
2310f1702c5SYu Xiangning 
2320f1702c5SYu Xiangning extern sonodeops_t sotpi_sonodeops;
2330f1702c5SYu Xiangning 
2340f1702c5SYu Xiangning extern int	socktpi_init(void);
23541174437SAnders Persson extern int	sotpi_convert_sonode(struct sonode *, struct sockparams *,
23641174437SAnders Persson 		    boolean_t *, queue_t **, struct cred *);
23741174437SAnders Persson extern void	sotpi_revert_sonode(struct sonode *, struct cred *);
2380f1702c5SYu Xiangning extern void	sotpi_update_state(struct sonode *, struct T_capability_ack *,
2390f1702c5SYu Xiangning 		    struct sockaddr *, socklen_t, struct sockaddr *, socklen_t,
2400f1702c5SYu Xiangning 		    short);
2410f1702c5SYu Xiangning 
2420f1702c5SYu Xiangning extern sotpi_info_t	*sotpi_sototpi(struct sonode *);
2430f1702c5SYu Xiangning #ifdef DEBUG
2440f1702c5SYu Xiangning #define	SOTOTPI(so)	(sotpi_sototpi(so))
2450f1702c5SYu Xiangning #else
2460f1702c5SYu Xiangning #define	SOTOTPI(so)	((sotpi_info_t *)(so)->so_priv)
2470f1702c5SYu Xiangning #endif
2480f1702c5SYu Xiangning 
2490f1702c5SYu Xiangning /* for consumers outside sockfs */
2500f1702c5SYu Xiangning #define	_SOTOTPI(so)	((sotpi_info_t *)(so)->so_priv)
2510f1702c5SYu Xiangning 
2520f1702c5SYu Xiangning #ifdef	__cplusplus
2530f1702c5SYu Xiangning }
2540f1702c5SYu Xiangning #endif
2550f1702c5SYu Xiangning 
2560f1702c5SYu Xiangning #endif /* _SOCKFS_SOCKTPI_H */
257