xref: /illumos-gate/usr/src/uts/common/inet/udp/udp.c (revision 4022e346)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
24  * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved.
25  * Copyright 2018, Joyent, Inc.
26  * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
27  * Copyright 2024 Oxide Computer Company
28  */
29 /* Copyright (c) 1990 Mentat Inc. */
30 
31 #include <sys/sysmacros.h>
32 #include <sys/types.h>
33 #include <sys/stream.h>
34 #include <sys/stropts.h>
35 #include <sys/strlog.h>
36 #include <sys/strsun.h>
37 #define	_SUN_TPI_VERSION 2
38 #include <sys/tihdr.h>
39 #include <sys/timod.h>
40 #include <sys/ddi.h>
41 #include <sys/sunddi.h>
42 #include <sys/strsubr.h>
43 #include <sys/suntpi.h>
44 #include <sys/xti_inet.h>
45 #include <sys/kmem.h>
46 #include <sys/cred_impl.h>
47 #include <sys/policy.h>
48 #include <sys/priv.h>
49 #include <sys/ucred.h>
50 #include <sys/zone.h>
51 
52 #include <sys/socket.h>
53 #include <sys/socketvar.h>
54 #include <sys/sockio.h>
55 #include <sys/vtrace.h>
56 #include <sys/sdt.h>
57 #include <sys/debug.h>
58 #include <sys/isa_defs.h>
59 #include <sys/random.h>
60 #include <netinet/in.h>
61 #include <netinet/ip6.h>
62 #include <netinet/icmp6.h>
63 #include <netinet/udp.h>
64 
65 #include <inet/common.h>
66 #include <inet/ip.h>
67 #include <inet/ip_impl.h>
68 #include <inet/ipsec_impl.h>
69 #include <inet/ip6.h>
70 #include <inet/ip_ire.h>
71 #include <inet/ip_if.h>
72 #include <inet/ip_multi.h>
73 #include <inet/ip_ndp.h>
74 #include <inet/proto_set.h>
75 #include <inet/mib2.h>
76 #include <inet/optcom.h>
77 #include <inet/snmpcom.h>
78 #include <inet/kstatcom.h>
79 #include <inet/ipclassifier.h>
80 #include <sys/squeue_impl.h>
81 #include <inet/ipnet.h>
82 #include <sys/vxlan.h>
83 #include <inet/inet_hash.h>
84 
85 #include <sys/tsol/label.h>
86 #include <sys/tsol/tnet.h>
87 #include <rpc/pmap_prot.h>
88 
89 #include <inet/udp_impl.h>
90 
91 /*
92  * Synchronization notes:
93  *
94  * UDP is MT and uses the usual kernel synchronization primitives. There are 2
95  * locks, the fanout lock (uf_lock) and conn_lock. conn_lock
96  * protects the contents of the udp_t. uf_lock protects the address and the
97  * fanout information.
98  * The lock order is conn_lock -> uf_lock.
99  *
100  * The fanout lock uf_lock:
101  * When a UDP endpoint is bound to a local port, it is inserted into
102  * a bind hash list.  The list consists of an array of udp_fanout_t buckets.
103  * The size of the array is controlled by the udp_bind_fanout_size variable.
104  * This variable can be changed in /etc/system if the default value is
105  * not large enough.  Each bind hash bucket is protected by a per bucket
106  * lock.  It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t
107  * structure and a few other fields in the udp_t. A UDP endpoint is removed
108  * from the bind hash list only when it is being unbound or being closed.
109  * The per bucket lock also protects a UDP endpoint's state changes.
110  *
111  * Plumbing notes:
112  * UDP is always a device driver. For compatibility with mibopen() code
113  * it is possible to I_PUSH "udp", but that results in pushing a passthrough
114  * dummy module.
115  *
116  * The above implies that we don't support any intermediate module to
117  * reside in between /dev/ip and udp -- in fact, we never supported such
118  * scenario in the past as the inter-layer communication semantics have
119  * always been private.
120  */
121 
122 /* For /etc/system control */
123 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE;
124 
125 static void	udp_addr_req(queue_t *q, mblk_t *mp);
126 static void	udp_tpi_bind(queue_t *q, mblk_t *mp);
127 static void	udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp);
128 static void	udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock);
129 static int	udp_build_hdr_template(conn_t *, const in6_addr_t *,
130     const in6_addr_t *, in_port_t, uint32_t);
131 static void	udp_capability_req(queue_t *q, mblk_t *mp);
132 static int	udp_tpi_close(queue_t *q, int flags, cred_t *);
133 static void	udp_close_free(conn_t *);
134 static void	udp_tpi_connect(queue_t *q, mblk_t *mp);
135 static void	udp_tpi_disconnect(queue_t *q, mblk_t *mp);
136 static void	udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
137     int sys_error);
138 static void	udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
139     t_scalar_t tlierr, int sys_error);
140 static int	udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp,
141 		    cred_t *cr);
142 static int	udp_extra_priv_ports_add(queue_t *q, mblk_t *mp,
143 		    char *value, caddr_t cp, cred_t *cr);
144 static int	udp_extra_priv_ports_del(queue_t *q, mblk_t *mp,
145 		    char *value, caddr_t cp, cred_t *cr);
146 static void	udp_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
147 static void	udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp,
148     ip_recv_attr_t *ira);
149 static void	udp_info_req(queue_t *q, mblk_t *mp);
150 static void	udp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
151 static int	udp_lrput(queue_t *, mblk_t *);
152 static int	udp_lwput(queue_t *, mblk_t *);
153 static int	udp_open(queue_t *q, dev_t *devp, int flag, int sflag,
154 		    cred_t *credp, boolean_t isv6);
155 static int	udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag,
156 		    cred_t *credp);
157 static int	udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag,
158 		    cred_t *credp);
159 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name);
160 int		udp_opt_set(conn_t *connp, uint_t optset_context,
161 		    int level, int name, uint_t inlen,
162 		    uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
163 		    void *thisdg_attrs, cred_t *cr);
164 int		udp_opt_get(conn_t *connp, int level, int name,
165 		    uchar_t *ptr);
166 static int	udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr,
167 		    pid_t pid);
168 static int	udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr,
169     pid_t pid, ip_xmit_attr_t *ixa);
170 static int	udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin,
171 		    sin6_t *sin6, ushort_t ipversion, cred_t *cr, pid_t,
172 		    ip_xmit_attr_t *ixa);
173 static mblk_t	*udp_prepend_hdr(conn_t *, ip_xmit_attr_t *, const ip_pkt_t *,
174     const in6_addr_t *, const in6_addr_t *, in_port_t, uint32_t, mblk_t *,
175     int *);
176 static mblk_t	*udp_prepend_header_template(conn_t *, ip_xmit_attr_t *,
177     mblk_t *, const in6_addr_t *, in_port_t, uint32_t, int *);
178 static void	udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err);
179 static void	udp_ud_err_connected(conn_t *, t_scalar_t);
180 static void	udp_tpi_unbind(queue_t *q, mblk_t *mp);
181 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port,
182     boolean_t random);
183 static void	udp_wput_other(queue_t *q, mblk_t *mp);
184 static void	udp_wput_iocdata(queue_t *q, mblk_t *mp);
185 static int	udp_wput_fallback(queue_t *q, mblk_t *mp);
186 static size_t	udp_set_rcv_hiwat(udp_t *udp, size_t size);
187 
188 static void	*udp_stack_init(netstackid_t stackid, netstack_t *ns);
189 static void	udp_stack_fini(netstackid_t stackid, void *arg);
190 
191 /* Common routines for TPI and socket module */
192 static void	udp_ulp_recv(conn_t *, mblk_t *, uint_t, ip_recv_attr_t *);
193 
194 /* Common routine for TPI and socket module */
195 static conn_t	*udp_do_open(cred_t *, boolean_t, int, int *);
196 static void	udp_do_close(conn_t *);
197 static int	udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *,
198     boolean_t);
199 static int	udp_do_unbind(conn_t *);
200 
201 int		udp_getsockname(sock_lower_handle_t,
202     struct sockaddr *, socklen_t *, cred_t *);
203 int		udp_getpeername(sock_lower_handle_t,
204     struct sockaddr *, socklen_t *, cred_t *);
205 static int	udp_do_connect(conn_t *, const struct sockaddr *, socklen_t,
206     cred_t *, pid_t);
207 
208 #pragma inline(udp_output_connected, udp_output_newdst, udp_output_lastdst)
209 
210 /*
211  * Checks if the given destination addr/port is allowed out.
212  * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster.
213  * Called for each connect() and for sendto()/sendmsg() to a different
214  * destination.
215  * For connect(), called in udp_connect().
216  * For sendto()/sendmsg(), called in udp_output_newdst().
217  *
218  * This macro assumes that the cl_inet_connect2 hook is not NULL.
219  * Please check this before calling this macro.
220  *
221  * void
222  * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing,
223  *     in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err);
224  */
225 #define	CL_INET_UDP_CONNECT(cp, is_outgoing, faddrp, fport, err) {	\
226 	(err) = 0;							\
227 	/*								\
228 	 * Running in cluster mode - check and register active		\
229 	 * "connection" information					\
230 	 */								\
231 	if ((cp)->conn_ipversion == IPV4_VERSION)			\
232 		(err) = (*cl_inet_connect2)(				\
233 		    (cp)->conn_netstack->netstack_stackid,		\
234 		    IPPROTO_UDP, is_outgoing, AF_INET,			\
235 		    (uint8_t *)&((cp)->conn_laddr_v4),			\
236 		    (cp)->conn_lport,					\
237 		    (uint8_t *)&(V4_PART_OF_V6(*faddrp)),		\
238 		    (in_port_t)(fport), NULL);				\
239 	else								\
240 		(err) = (*cl_inet_connect2)(				\
241 		    (cp)->conn_netstack->netstack_stackid,		\
242 		    IPPROTO_UDP, is_outgoing, AF_INET6,			\
243 		    (uint8_t *)&((cp)->conn_laddr_v6),			\
244 		    (cp)->conn_lport,					\
245 		    (uint8_t *)(faddrp), (in_port_t)(fport), NULL);	\
246 }
247 
248 static struct module_info udp_mod_info =  {
249 	UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER
250 };
251 
252 /*
253  * Entry points for UDP as a device.
254  * We have separate open functions for the /dev/udp and /dev/udp6 devices.
255  */
256 static struct qinit udp_rinitv4 = {
257 	NULL, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info, NULL
258 };
259 
260 static struct qinit udp_rinitv6 = {
261 	NULL, NULL, udp_openv6, udp_tpi_close, NULL, &udp_mod_info, NULL
262 };
263 
264 static struct qinit udp_winit = {
265 	udp_wput, ip_wsrv, NULL, NULL, NULL, &udp_mod_info
266 };
267 
268 /* UDP entry point during fallback */
269 struct qinit udp_fallback_sock_winit = {
270 	udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info
271 };
272 
273 /*
274  * UDP needs to handle I_LINK and I_PLINK since ifconfig
275  * likes to use it as a place to hang the various streams.
276  */
277 static struct qinit udp_lrinit = {
278 	udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info
279 };
280 
281 static struct qinit udp_lwinit = {
282 	udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info
283 };
284 
285 /* For AF_INET aka /dev/udp */
286 struct streamtab udpinfov4 = {
287 	&udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit
288 };
289 
290 /* For AF_INET6 aka /dev/udp6 */
291 struct streamtab udpinfov6 = {
292 	&udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit
293 };
294 
295 #define	UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH)
296 
297 /* Default structure copied into T_INFO_ACK messages */
298 static struct T_info_ack udp_g_t_info_ack_ipv4 = {
299 	T_INFO_ACK,
300 	UDP_MAXPACKET_IPV4,	/* TSDU_size. Excl. headers */
301 	T_INVALID,	/* ETSU_size.  udp does not support expedited data. */
302 	T_INVALID,	/* CDATA_size. udp does not support connect data. */
303 	T_INVALID,	/* DDATA_size. udp does not support disconnect data. */
304 	sizeof (sin_t),	/* ADDR_size. */
305 	0,		/* OPT_size - not initialized here */
306 	UDP_MAXPACKET_IPV4,	/* TIDU_size.  Excl. headers */
307 	T_CLTS,		/* SERV_type.  udp supports connection-less. */
308 	TS_UNBND,	/* CURRENT_state.  This is set from udp_state. */
309 	(XPG4_1|SENDZERO) /* PROVIDER_flag */
310 };
311 
312 #define	UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN)
313 
314 static	struct T_info_ack udp_g_t_info_ack_ipv6 = {
315 	T_INFO_ACK,
316 	UDP_MAXPACKET_IPV6,	/* TSDU_size.  Excl. headers */
317 	T_INVALID,	/* ETSU_size.  udp does not support expedited data. */
318 	T_INVALID,	/* CDATA_size. udp does not support connect data. */
319 	T_INVALID,	/* DDATA_size. udp does not support disconnect data. */
320 	sizeof (sin6_t), /* ADDR_size. */
321 	0,		/* OPT_size - not initialized here */
322 	UDP_MAXPACKET_IPV6,	/* TIDU_size. Excl. headers */
323 	T_CLTS,		/* SERV_type.  udp supports connection-less. */
324 	TS_UNBND,	/* CURRENT_state.  This is set from udp_state. */
325 	(XPG4_1|SENDZERO) /* PROVIDER_flag */
326 };
327 
328 /*
329  * UDP tunables related declarations. Definitions are in udp_tunables.c
330  */
331 extern mod_prop_info_t udp_propinfo_tbl[];
332 extern int udp_propinfo_count;
333 
334 /* Setable in /etc/system */
335 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */
336 uint32_t udp_random_anon_port = 1;
337 
338 /*
339  * Hook functions to enable cluster networking.
340  * On non-clustered systems these vectors must always be NULL
341  */
342 
343 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol,
344     sa_family_t addr_family, uint8_t *laddrp, in_port_t lport,
345     void *args) = NULL;
346 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol,
347     sa_family_t addr_family, uint8_t *laddrp, in_port_t lport,
348     void *args) = NULL;
349 
350 typedef union T_primitives *t_primp_t;
351 
352 /*
353  * Various protocols that encapsulate UDP have no real use for the source port.
354  * Instead, they want to vary the source port to provide better equal-cost
355  * multipathing and other systems that use fanout. Consider something like
356  * VXLAN. If you're actually sending multiple different streams to a single
357  * host, if you don't vary the source port, then the tuple of ( SRC IP, DST IP,
358  * SRC Port, DST Port) will always be the same.
359  *
360  * Here, we return a port to hash this to, if we know how to hash it. If for
361  * some reason we can't perform an L4 hash, then we just return the default
362  * value, usually the default port. After we determine the hash we transform it
363  * so that it's in the range of [ min, max ].
364  *
365  * We'd like to avoid a pull up for the sake of performing the hash. If the
366  * first mblk_t doesn't have the full protocol header, then we just send it to
367  * the default. If for some reason we have an encapsulated packet that has its
368  * protocol header in different parts of an mblk_t, then we'll go with the
369  * default port. This means that that if a driver isn't consistent about how it
370  * generates the frames for a given flow, it will not always be consistently
371  * hashed. That should be an uncommon event.
372  */
373 uint16_t
udp_srcport_hash(mblk_t * mp,int type,uint16_t min,uint16_t max,uint16_t def)374 udp_srcport_hash(mblk_t *mp, int type, uint16_t min, uint16_t max,
375     uint16_t def)
376 {
377 	size_t szused = 0;
378 	ip6_t *ip6h;
379 	ipha_t *ipha;
380 	uint16_t sap;
381 	uint64_t hash;
382 	uint32_t mod;
383 
384 	ASSERT(min <= max);
385 
386 	if (type != UDP_HASH_VXLAN)
387 		return (def);
388 
389 	if (!IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)))
390 		return (def);
391 
392 	/*
393 	 * The following logic is VXLAN specific to get at the header, if we
394 	 * have formats, eg. GENEVE, then we should ignore this.
395 	 *
396 	 * The kernel overlay device often puts a first mblk_t for the data
397 	 * which is just the encap. If so, then we're going to use that and try
398 	 * to avoid a pull up.
399 	 */
400 	if (MBLKL(mp) == VXLAN_HDR_LEN) {
401 		if (mp->b_cont == NULL)
402 			return (def);
403 		mp = mp->b_cont;
404 	} else if (MBLKL(mp) < VXLAN_HDR_LEN) {
405 		return (def);
406 	} else {
407 		szused = VXLAN_HDR_LEN;
408 	}
409 
410 	/* Can we hold a MAC header? */
411 	if (MBLKL(mp) + szused < sizeof (struct ether_header))
412 		return (def);
413 
414 	/*
415 	 * We need to lie about the starting offset into the message block for
416 	 * convenience. Undo it at the end. We know that inet_pkt_hash() won't
417 	 * modify the mblk_t.
418 	 */
419 	mp->b_rptr += szused;
420 	hash = inet_pkt_hash(DL_ETHER, mp, INET_PKT_HASH_L2 |
421 	    INET_PKT_HASH_L3 | INET_PKT_HASH_L4);
422 	mp->b_rptr -= szused;
423 
424 	if (hash == 0)
425 		return (def);
426 
427 	mod = max - min + 1;
428 	return ((hash % mod) + min);
429 }
430 
431 /*
432  * Return the next anonymous port in the privileged port range for
433  * bind checking.
434  *
435  * Trusted Extension (TX) notes: TX allows administrator to mark or
436  * reserve ports as Multilevel ports (MLP). MLP has special function
437  * on TX systems. Once a port is made MLP, it's not available as
438  * ordinary port. This creates "holes" in the port name space. It
439  * may be necessary to skip the "holes" find a suitable anon port.
440  */
441 static in_port_t
udp_get_next_priv_port(udp_t * udp)442 udp_get_next_priv_port(udp_t *udp)
443 {
444 	static in_port_t next_priv_port = IPPORT_RESERVED - 1;
445 	in_port_t nextport;
446 	boolean_t restart = B_FALSE;
447 	udp_stack_t *us = udp->udp_us;
448 
449 retry:
450 	if (next_priv_port < us->us_min_anonpriv_port ||
451 	    next_priv_port >= IPPORT_RESERVED) {
452 		next_priv_port = IPPORT_RESERVED - 1;
453 		if (restart)
454 			return (0);
455 		restart = B_TRUE;
456 	}
457 
458 	if (is_system_labeled() &&
459 	    (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
460 	    next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) {
461 		next_priv_port = nextport;
462 		goto retry;
463 	}
464 
465 	return (next_priv_port--);
466 }
467 
468 /*
469  * Hash list removal routine for udp_t structures.
470  */
471 static void
udp_bind_hash_remove(udp_t * udp,boolean_t caller_holds_lock)472 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock)
473 {
474 	udp_t		*udpnext;
475 	kmutex_t	*lockp;
476 	udp_stack_t	*us = udp->udp_us;
477 	conn_t		*connp = udp->udp_connp;
478 
479 	if (udp->udp_ptpbhn == NULL)
480 		return;
481 
482 	/*
483 	 * Extract the lock pointer in case there are concurrent
484 	 * hash_remove's for this instance.
485 	 */
486 	ASSERT(connp->conn_lport != 0);
487 	if (!caller_holds_lock) {
488 		lockp = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
489 		    us->us_bind_fanout_size)].uf_lock;
490 		ASSERT(lockp != NULL);
491 		mutex_enter(lockp);
492 	}
493 	if (udp->udp_ptpbhn != NULL) {
494 		udpnext = udp->udp_bind_hash;
495 		if (udpnext != NULL) {
496 			udpnext->udp_ptpbhn = udp->udp_ptpbhn;
497 			udp->udp_bind_hash = NULL;
498 		}
499 		*udp->udp_ptpbhn = udpnext;
500 		udp->udp_ptpbhn = NULL;
501 	}
502 	if (!caller_holds_lock) {
503 		mutex_exit(lockp);
504 	}
505 }
506 
507 static void
udp_bind_hash_insert(udp_fanout_t * uf,udp_t * udp)508 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp)
509 {
510 	conn_t	*connp = udp->udp_connp;
511 	udp_t	**udpp;
512 	udp_t	*udpnext;
513 	conn_t	*connext;
514 
515 	ASSERT(MUTEX_HELD(&uf->uf_lock));
516 	ASSERT(udp->udp_ptpbhn == NULL);
517 	udpp = &uf->uf_udp;
518 	udpnext = udpp[0];
519 	if (udpnext != NULL) {
520 		/*
521 		 * If the new udp bound to the INADDR_ANY address
522 		 * and the first one in the list is not bound to
523 		 * INADDR_ANY we skip all entries until we find the
524 		 * first one bound to INADDR_ANY.
525 		 * This makes sure that applications binding to a
526 		 * specific address get preference over those binding to
527 		 * INADDR_ANY.
528 		 */
529 		connext = udpnext->udp_connp;
530 		if (V6_OR_V4_INADDR_ANY(connp->conn_bound_addr_v6) &&
531 		    !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) {
532 			while ((udpnext = udpp[0]) != NULL &&
533 			    !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) {
534 				udpp = &(udpnext->udp_bind_hash);
535 			}
536 			if (udpnext != NULL)
537 				udpnext->udp_ptpbhn = &udp->udp_bind_hash;
538 		} else {
539 			udpnext->udp_ptpbhn = &udp->udp_bind_hash;
540 		}
541 	}
542 	udp->udp_bind_hash = udpnext;
543 	udp->udp_ptpbhn = udpp;
544 	udpp[0] = udp;
545 }
546 
547 /*
548  * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message
549  * passed to udp_wput.
550  * It associates a port number and local address with the stream.
551  * It calls IP to verify the local IP address, and calls IP to insert
552  * the conn_t in the fanout table.
553  * If everything is ok it then sends the T_BIND_ACK back up.
554  *
555  * Note that UDP over IPv4 and IPv6 sockets can use the same port number
556  * without setting SO_REUSEADDR. This is needed so that they
557  * can be viewed as two independent transport protocols.
558  * However, anonymouns ports are allocated from the same range to avoid
559  * duplicating the us->us_next_port_to_try.
560  */
561 static void
udp_tpi_bind(queue_t * q,mblk_t * mp)562 udp_tpi_bind(queue_t *q, mblk_t *mp)
563 {
564 	sin_t		*sin;
565 	sin6_t		*sin6;
566 	mblk_t		*mp1;
567 	struct T_bind_req *tbr;
568 	conn_t		*connp;
569 	udp_t		*udp;
570 	int		error;
571 	struct sockaddr	*sa;
572 	cred_t		*cr;
573 
574 	/*
575 	 * All Solaris components should pass a db_credp
576 	 * for this TPI message, hence we ASSERT.
577 	 * But in case there is some other M_PROTO that looks
578 	 * like a TPI message sent by some other kernel
579 	 * component, we check and return an error.
580 	 */
581 	cr = msg_getcred(mp, NULL);
582 	ASSERT(cr != NULL);
583 	if (cr == NULL) {
584 		udp_err_ack(q, mp, TSYSERR, EINVAL);
585 		return;
586 	}
587 
588 	connp = Q_TO_CONN(q);
589 	udp = connp->conn_udp;
590 	if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
591 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
592 		    "udp_bind: bad req, len %u",
593 		    (uint_t)(mp->b_wptr - mp->b_rptr));
594 		udp_err_ack(q, mp, TPROTO, 0);
595 		return;
596 	}
597 	if (udp->udp_state != TS_UNBND) {
598 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
599 		    "udp_bind: bad state, %u", udp->udp_state);
600 		udp_err_ack(q, mp, TOUTSTATE, 0);
601 		return;
602 	}
603 	/*
604 	 * Reallocate the message to make sure we have enough room for an
605 	 * address.
606 	 */
607 	mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1);
608 	if (mp1 == NULL) {
609 		udp_err_ack(q, mp, TSYSERR, ENOMEM);
610 		return;
611 	}
612 
613 	mp = mp1;
614 
615 	/* Reset the message type in preparation for shipping it back. */
616 	DB_TYPE(mp) = M_PCPROTO;
617 
618 	tbr = (struct T_bind_req *)mp->b_rptr;
619 	switch (tbr->ADDR_length) {
620 	case 0:			/* Request for a generic port */
621 		tbr->ADDR_offset = sizeof (struct T_bind_req);
622 		if (connp->conn_family == AF_INET) {
623 			tbr->ADDR_length = sizeof (sin_t);
624 			sin = (sin_t *)&tbr[1];
625 			*sin = sin_null;
626 			sin->sin_family = AF_INET;
627 			mp->b_wptr = (uchar_t *)&sin[1];
628 			sa = (struct sockaddr *)sin;
629 		} else {
630 			ASSERT(connp->conn_family == AF_INET6);
631 			tbr->ADDR_length = sizeof (sin6_t);
632 			sin6 = (sin6_t *)&tbr[1];
633 			*sin6 = sin6_null;
634 			sin6->sin6_family = AF_INET6;
635 			mp->b_wptr = (uchar_t *)&sin6[1];
636 			sa = (struct sockaddr *)sin6;
637 		}
638 		break;
639 
640 	case sizeof (sin_t):	/* Complete IPv4 address */
641 		sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
642 		    sizeof (sin_t));
643 		if (sa == NULL || !OK_32PTR((char *)sa)) {
644 			udp_err_ack(q, mp, TSYSERR, EINVAL);
645 			return;
646 		}
647 		if (connp->conn_family != AF_INET ||
648 		    sa->sa_family != AF_INET) {
649 			udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
650 			return;
651 		}
652 		break;
653 
654 	case sizeof (sin6_t):	/* complete IPv6 address */
655 		sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
656 		    sizeof (sin6_t));
657 		if (sa == NULL || !OK_32PTR((char *)sa)) {
658 			udp_err_ack(q, mp, TSYSERR, EINVAL);
659 			return;
660 		}
661 		if (connp->conn_family != AF_INET6 ||
662 		    sa->sa_family != AF_INET6) {
663 			udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
664 			return;
665 		}
666 		break;
667 
668 	default:		/* Invalid request */
669 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
670 		    "udp_bind: bad ADDR_length length %u", tbr->ADDR_length);
671 		udp_err_ack(q, mp, TBADADDR, 0);
672 		return;
673 	}
674 
675 	error = udp_do_bind(connp, sa, tbr->ADDR_length, cr,
676 	    tbr->PRIM_type != O_T_BIND_REQ);
677 
678 	if (error != 0) {
679 		if (error > 0) {
680 			udp_err_ack(q, mp, TSYSERR, error);
681 		} else {
682 			udp_err_ack(q, mp, -error, 0);
683 		}
684 	} else {
685 		tbr->PRIM_type = T_BIND_ACK;
686 		qreply(q, mp);
687 	}
688 }
689 
690 /*
691  * This routine handles each T_CONN_REQ message passed to udp.  It
692  * associates a default destination address with the stream.
693  *
694  * After various error checks are completed, udp_connect() lays
695  * the target address and port into the composite header template.
696  * Then we ask IP for information, including a source address if we didn't
697  * already have one. Finally we send up the T_OK_ACK reply message.
698  */
699 static void
udp_tpi_connect(queue_t * q,mblk_t * mp)700 udp_tpi_connect(queue_t *q, mblk_t *mp)
701 {
702 	conn_t	*connp = Q_TO_CONN(q);
703 	int	error;
704 	socklen_t	len;
705 	struct sockaddr		*sa;
706 	struct T_conn_req	*tcr;
707 	cred_t		*cr;
708 	pid_t		pid;
709 	/*
710 	 * All Solaris components should pass a db_credp
711 	 * for this TPI message, hence we ASSERT.
712 	 * But in case there is some other M_PROTO that looks
713 	 * like a TPI message sent by some other kernel
714 	 * component, we check and return an error.
715 	 */
716 	cr = msg_getcred(mp, &pid);
717 	ASSERT(cr != NULL);
718 	if (cr == NULL) {
719 		udp_err_ack(q, mp, TSYSERR, EINVAL);
720 		return;
721 	}
722 
723 	tcr = (struct T_conn_req *)mp->b_rptr;
724 
725 	/* A bit of sanity checking */
726 	if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) {
727 		udp_err_ack(q, mp, TPROTO, 0);
728 		return;
729 	}
730 
731 	if (tcr->OPT_length != 0) {
732 		udp_err_ack(q, mp, TBADOPT, 0);
733 		return;
734 	}
735 
736 	/*
737 	 * Determine packet type based on type of address passed in
738 	 * the request should contain an IPv4 or IPv6 address.
739 	 * Make sure that address family matches the type of
740 	 * family of the address passed down.
741 	 */
742 	len = tcr->DEST_length;
743 	switch (tcr->DEST_length) {
744 	default:
745 		udp_err_ack(q, mp, TBADADDR, 0);
746 		return;
747 
748 	case sizeof (sin_t):
749 		sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
750 		    sizeof (sin_t));
751 		break;
752 
753 	case sizeof (sin6_t):
754 		sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
755 		    sizeof (sin6_t));
756 		break;
757 	}
758 
759 	error = proto_verify_ip_addr(connp->conn_family, sa, len);
760 	if (error != 0) {
761 		udp_err_ack(q, mp, TSYSERR, error);
762 		return;
763 	}
764 
765 	error = udp_do_connect(connp, sa, len, cr, pid);
766 	if (error != 0) {
767 		if (error < 0)
768 			udp_err_ack(q, mp, -error, 0);
769 		else
770 			udp_err_ack(q, mp, TSYSERR, error);
771 	} else {
772 		mblk_t	*mp1;
773 		/*
774 		 * We have to send a connection confirmation to
775 		 * keep TLI happy.
776 		 */
777 		if (connp->conn_family == AF_INET) {
778 			mp1 = mi_tpi_conn_con(NULL, (char *)sa,
779 			    sizeof (sin_t), NULL, 0);
780 		} else {
781 			mp1 = mi_tpi_conn_con(NULL, (char *)sa,
782 			    sizeof (sin6_t), NULL, 0);
783 		}
784 		if (mp1 == NULL) {
785 			udp_err_ack(q, mp, TSYSERR, ENOMEM);
786 			return;
787 		}
788 
789 		/*
790 		 * Send ok_ack for T_CONN_REQ
791 		 */
792 		mp = mi_tpi_ok_ack_alloc(mp);
793 		if (mp == NULL) {
794 			/* Unable to reuse the T_CONN_REQ for the ack. */
795 			udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM);
796 			return;
797 		}
798 
799 		putnext(connp->conn_rq, mp);
800 		putnext(connp->conn_rq, mp1);
801 	}
802 }
803 
804 /* ARGSUSED */
805 static int
udp_tpi_close(queue_t * q,int flags,cred_t * credp __unused)806 udp_tpi_close(queue_t *q, int flags, cred_t *credp __unused)
807 {
808 	conn_t	*connp;
809 
810 	if (flags & SO_FALLBACK) {
811 		/*
812 		 * stream is being closed while in fallback
813 		 * simply free the resources that were allocated
814 		 */
815 		inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr));
816 		qprocsoff(q);
817 		goto done;
818 	}
819 
820 	connp = Q_TO_CONN(q);
821 	udp_do_close(connp);
822 done:
823 	q->q_ptr = WR(q)->q_ptr = NULL;
824 	return (0);
825 }
826 
827 static void
udp_close_free(conn_t * connp)828 udp_close_free(conn_t *connp)
829 {
830 	udp_t *udp = connp->conn_udp;
831 
832 	/* If there are any options associated with the stream, free them. */
833 	if (udp->udp_recv_ipp.ipp_fields != 0)
834 		ip_pkt_free(&udp->udp_recv_ipp);
835 
836 	/*
837 	 * Clear any fields which the kmem_cache constructor clears.
838 	 * Only udp_connp needs to be preserved.
839 	 * TBD: We should make this more efficient to avoid clearing
840 	 * everything.
841 	 */
842 	ASSERT(udp->udp_connp == connp);
843 	bzero(udp, sizeof (udp_t));
844 	udp->udp_connp = connp;
845 }
846 
847 static int
udp_do_disconnect(conn_t * connp)848 udp_do_disconnect(conn_t *connp)
849 {
850 	udp_t	*udp;
851 	udp_fanout_t *udpf;
852 	udp_stack_t *us;
853 	int	error;
854 
855 	udp = connp->conn_udp;
856 	us = udp->udp_us;
857 	mutex_enter(&connp->conn_lock);
858 	if (udp->udp_state != TS_DATA_XFER) {
859 		mutex_exit(&connp->conn_lock);
860 		return (-TOUTSTATE);
861 	}
862 	udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
863 	    us->us_bind_fanout_size)];
864 	mutex_enter(&udpf->uf_lock);
865 	if (connp->conn_mcbc_bind)
866 		connp->conn_saddr_v6 = ipv6_all_zeros;
867 	else
868 		connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
869 	connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
870 	connp->conn_faddr_v6 = ipv6_all_zeros;
871 	connp->conn_fport = 0;
872 	udp->udp_state = TS_IDLE;
873 	mutex_exit(&udpf->uf_lock);
874 
875 	/* Remove any remnants of mapped address binding */
876 	if (connp->conn_family == AF_INET6)
877 		connp->conn_ipversion = IPV6_VERSION;
878 
879 	connp->conn_v6lastdst = ipv6_all_zeros;
880 	error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
881 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
882 	mutex_exit(&connp->conn_lock);
883 	if (error != 0)
884 		return (error);
885 
886 	/*
887 	 * Tell IP to remove the full binding and revert
888 	 * to the local address binding.
889 	 */
890 	return (ip_laddr_fanout_insert(connp));
891 }
892 
893 static void
udp_tpi_disconnect(queue_t * q,mblk_t * mp)894 udp_tpi_disconnect(queue_t *q, mblk_t *mp)
895 {
896 	conn_t	*connp = Q_TO_CONN(q);
897 	int	error;
898 
899 	/*
900 	 * Allocate the largest primitive we need to send back
901 	 * T_error_ack is > than T_ok_ack
902 	 */
903 	mp = reallocb(mp, sizeof (struct T_error_ack), 1);
904 	if (mp == NULL) {
905 		/* Unable to reuse the T_DISCON_REQ for the ack. */
906 		udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM);
907 		return;
908 	}
909 
910 	error = udp_do_disconnect(connp);
911 
912 	if (error != 0) {
913 		if (error < 0) {
914 			udp_err_ack(q, mp, -error, 0);
915 		} else {
916 			udp_err_ack(q, mp, TSYSERR, error);
917 		}
918 	} else {
919 		mp = mi_tpi_ok_ack_alloc(mp);
920 		ASSERT(mp != NULL);
921 		qreply(q, mp);
922 	}
923 }
924 
925 int
udp_disconnect(conn_t * connp)926 udp_disconnect(conn_t *connp)
927 {
928 	int error;
929 
930 	connp->conn_dgram_errind = B_FALSE;
931 	error = udp_do_disconnect(connp);
932 	if (error < 0)
933 		error = proto_tlitosyserr(-error);
934 
935 	return (error);
936 }
937 
938 /* This routine creates a T_ERROR_ACK message and passes it upstream. */
939 static void
udp_err_ack(queue_t * q,mblk_t * mp,t_scalar_t t_error,int sys_error)940 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
941 {
942 	if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
943 		qreply(q, mp);
944 }
945 
946 /* Shorthand to generate and send TPI error acks to our client */
947 static void
udp_err_ack_prim(queue_t * q,mblk_t * mp,t_scalar_t primitive,t_scalar_t t_error,int sys_error)948 udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
949     t_scalar_t t_error, int sys_error)
950 {
951 	struct T_error_ack	*teackp;
952 
953 	if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
954 	    M_PCPROTO, T_ERROR_ACK)) != NULL) {
955 		teackp = (struct T_error_ack *)mp->b_rptr;
956 		teackp->ERROR_prim = primitive;
957 		teackp->TLI_error = t_error;
958 		teackp->UNIX_error = sys_error;
959 		qreply(q, mp);
960 	}
961 }
962 
963 /* At minimum we need 4 bytes of UDP header */
964 #define	ICMP_MIN_UDP_HDR	4
965 
966 /*
967  * udp_icmp_input is called as conn_recvicmp to process ICMP messages.
968  * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
969  * Assumes that IP has pulled up everything up to and including the ICMP header.
970  */
971 /* ARGSUSED2 */
972 static void
udp_icmp_input(void * arg1,mblk_t * mp,void * arg2,ip_recv_attr_t * ira)973 udp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
974 {
975 	conn_t		*connp = (conn_t *)arg1;
976 	icmph_t		*icmph;
977 	ipha_t		*ipha;
978 	int		iph_hdr_length;
979 	udpha_t		*udpha;
980 	sin_t		sin;
981 	sin6_t		sin6;
982 	mblk_t		*mp1;
983 	int		error = 0;
984 	udp_t		*udp = connp->conn_udp;
985 
986 	ipha = (ipha_t *)mp->b_rptr;
987 
988 	ASSERT(OK_32PTR(mp->b_rptr));
989 
990 	if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) {
991 		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
992 		udp_icmp_error_ipv6(connp, mp, ira);
993 		return;
994 	}
995 	ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
996 
997 	/* Skip past the outer IP and ICMP headers */
998 	ASSERT(IPH_HDR_LENGTH(ipha) == ira->ira_ip_hdr_length);
999 	iph_hdr_length = ira->ira_ip_hdr_length;
1000 	icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
1001 	ipha = (ipha_t *)&icmph[1];	/* Inner IP header */
1002 
1003 	/* Skip past the inner IP and find the ULP header */
1004 	iph_hdr_length = IPH_HDR_LENGTH(ipha);
1005 	udpha = (udpha_t *)((char *)ipha + iph_hdr_length);
1006 
1007 	switch (icmph->icmph_type) {
1008 	case ICMP_DEST_UNREACHABLE:
1009 		switch (icmph->icmph_code) {
1010 		case ICMP_FRAGMENTATION_NEEDED: {
1011 			ipha_t		*ipha;
1012 			ip_xmit_attr_t	*ixa;
1013 			/*
1014 			 * IP has already adjusted the path MTU.
1015 			 * But we need to adjust DF for IPv4.
1016 			 */
1017 			if (connp->conn_ipversion != IPV4_VERSION)
1018 				break;
1019 
1020 			ixa = conn_get_ixa(connp, B_FALSE);
1021 			if (ixa == NULL || ixa->ixa_ire == NULL) {
1022 				/*
1023 				 * Some other thread holds conn_ixa. We will
1024 				 * redo this on the next ICMP too big.
1025 				 */
1026 				if (ixa != NULL)
1027 					ixa_refrele(ixa);
1028 				break;
1029 			}
1030 			(void) ip_get_pmtu(ixa);
1031 
1032 			mutex_enter(&connp->conn_lock);
1033 			ipha = (ipha_t *)connp->conn_ht_iphc;
1034 			if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) {
1035 				ipha->ipha_fragment_offset_and_flags |=
1036 				    IPH_DF_HTONS;
1037 			} else {
1038 				ipha->ipha_fragment_offset_and_flags &=
1039 				    ~IPH_DF_HTONS;
1040 			}
1041 			mutex_exit(&connp->conn_lock);
1042 			ixa_refrele(ixa);
1043 			break;
1044 		}
1045 		case ICMP_PORT_UNREACHABLE:
1046 		case ICMP_PROTOCOL_UNREACHABLE:
1047 			error = ECONNREFUSED;
1048 			break;
1049 		default:
1050 			/* Transient errors */
1051 			break;
1052 		}
1053 		break;
1054 	default:
1055 		/* Transient errors */
1056 		break;
1057 	}
1058 	if (error == 0) {
1059 		freemsg(mp);
1060 		return;
1061 	}
1062 
1063 	/*
1064 	 * Deliver T_UDERROR_IND when the application has asked for it.
1065 	 * The socket layer enables this automatically when connected.
1066 	 */
1067 	if (!connp->conn_dgram_errind) {
1068 		freemsg(mp);
1069 		return;
1070 	}
1071 
1072 	switch (connp->conn_family) {
1073 	case AF_INET:
1074 		sin = sin_null;
1075 		sin.sin_family = AF_INET;
1076 		sin.sin_addr.s_addr = ipha->ipha_dst;
1077 		sin.sin_port = udpha->uha_dst_port;
1078 		if (IPCL_IS_NONSTR(connp)) {
1079 			mutex_enter(&connp->conn_lock);
1080 			if (udp->udp_state == TS_DATA_XFER) {
1081 				if (sin.sin_port == connp->conn_fport &&
1082 				    sin.sin_addr.s_addr ==
1083 				    connp->conn_faddr_v4) {
1084 					mutex_exit(&connp->conn_lock);
1085 					(*connp->conn_upcalls->su_set_error)
1086 					    (connp->conn_upper_handle, error);
1087 					goto done;
1088 				}
1089 			} else {
1090 				udp->udp_delayed_error = error;
1091 				*((sin_t *)&udp->udp_delayed_addr) = sin;
1092 			}
1093 			mutex_exit(&connp->conn_lock);
1094 		} else {
1095 			mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t),
1096 			    NULL, 0, error);
1097 			if (mp1 != NULL)
1098 				putnext(connp->conn_rq, mp1);
1099 		}
1100 		break;
1101 	case AF_INET6:
1102 		sin6 = sin6_null;
1103 		sin6.sin6_family = AF_INET6;
1104 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr);
1105 		sin6.sin6_port = udpha->uha_dst_port;
1106 		if (IPCL_IS_NONSTR(connp)) {
1107 			mutex_enter(&connp->conn_lock);
1108 			if (udp->udp_state == TS_DATA_XFER) {
1109 				if (sin6.sin6_port == connp->conn_fport &&
1110 				    IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
1111 				    &connp->conn_faddr_v6)) {
1112 					mutex_exit(&connp->conn_lock);
1113 					(*connp->conn_upcalls->su_set_error)
1114 					    (connp->conn_upper_handle, error);
1115 					goto done;
1116 				}
1117 			} else {
1118 				udp->udp_delayed_error = error;
1119 				*((sin6_t *)&udp->udp_delayed_addr) = sin6;
1120 			}
1121 			mutex_exit(&connp->conn_lock);
1122 		} else {
1123 			mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
1124 			    NULL, 0, error);
1125 			if (mp1 != NULL)
1126 				putnext(connp->conn_rq, mp1);
1127 		}
1128 		break;
1129 	}
1130 done:
1131 	freemsg(mp);
1132 }
1133 
1134 /*
1135  * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6.
1136  * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
1137  * Assumes that IP has pulled up all the extension headers as well as the
1138  * ICMPv6 header.
1139  */
1140 static void
udp_icmp_error_ipv6(conn_t * connp,mblk_t * mp,ip_recv_attr_t * ira)1141 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, ip_recv_attr_t *ira)
1142 {
1143 	icmp6_t		*icmp6;
1144 	ip6_t		*ip6h, *outer_ip6h;
1145 	uint16_t	iph_hdr_length;
1146 	uint8_t		*nexthdrp;
1147 	udpha_t		*udpha;
1148 	sin6_t		sin6;
1149 	mblk_t		*mp1;
1150 	int		error = 0;
1151 	udp_t		*udp = connp->conn_udp;
1152 	udp_stack_t	*us = udp->udp_us;
1153 
1154 	outer_ip6h = (ip6_t *)mp->b_rptr;
1155 #ifdef DEBUG
1156 	if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6)
1157 		iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h);
1158 	else
1159 		iph_hdr_length = IPV6_HDR_LEN;
1160 	ASSERT(iph_hdr_length == ira->ira_ip_hdr_length);
1161 #endif
1162 	/* Skip past the outer IP and ICMP headers */
1163 	iph_hdr_length = ira->ira_ip_hdr_length;
1164 	icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length];
1165 
1166 	/* Skip past the inner IP and find the ULP header */
1167 	ip6h = (ip6_t *)&icmp6[1];	/* Inner IP header */
1168 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) {
1169 		freemsg(mp);
1170 		return;
1171 	}
1172 	udpha = (udpha_t *)((char *)ip6h + iph_hdr_length);
1173 
1174 	switch (icmp6->icmp6_type) {
1175 	case ICMP6_DST_UNREACH:
1176 		switch (icmp6->icmp6_code) {
1177 		case ICMP6_DST_UNREACH_NOPORT:
1178 			error = ECONNREFUSED;
1179 			break;
1180 		case ICMP6_DST_UNREACH_ADMIN:
1181 		case ICMP6_DST_UNREACH_NOROUTE:
1182 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
1183 		case ICMP6_DST_UNREACH_ADDR:
1184 			/* Transient errors */
1185 			break;
1186 		default:
1187 			break;
1188 		}
1189 		break;
1190 	case ICMP6_PACKET_TOO_BIG: {
1191 		struct T_unitdata_ind	*tudi;
1192 		struct T_opthdr		*toh;
1193 		size_t			udi_size;
1194 		mblk_t			*newmp;
1195 		t_scalar_t		opt_length = sizeof (struct T_opthdr) +
1196 		    sizeof (struct ip6_mtuinfo);
1197 		sin6_t			*sin6;
1198 		struct ip6_mtuinfo	*mtuinfo;
1199 
1200 		/*
1201 		 * If the application has requested to receive path mtu
1202 		 * information, send up an empty message containing an
1203 		 * IPV6_PATHMTU ancillary data item.
1204 		 */
1205 		if (!connp->conn_ipv6_recvpathmtu)
1206 			break;
1207 
1208 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) +
1209 		    opt_length;
1210 		if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) {
1211 			UDPS_BUMP_MIB(us, udpInErrors);
1212 			break;
1213 		}
1214 
1215 		/*
1216 		 * newmp->b_cont is left to NULL on purpose.  This is an
1217 		 * empty message containing only ancillary data.
1218 		 */
1219 		newmp->b_datap->db_type = M_PROTO;
1220 		tudi = (struct T_unitdata_ind *)newmp->b_rptr;
1221 		newmp->b_wptr = (uchar_t *)tudi + udi_size;
1222 		tudi->PRIM_type = T_UNITDATA_IND;
1223 		tudi->SRC_length = sizeof (sin6_t);
1224 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
1225 		tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t);
1226 		tudi->OPT_length = opt_length;
1227 
1228 		sin6 = (sin6_t *)&tudi[1];
1229 		bzero(sin6, sizeof (sin6_t));
1230 		sin6->sin6_family = AF_INET6;
1231 		sin6->sin6_addr = connp->conn_faddr_v6;
1232 
1233 		toh = (struct T_opthdr *)&sin6[1];
1234 		toh->level = IPPROTO_IPV6;
1235 		toh->name = IPV6_PATHMTU;
1236 		toh->len = opt_length;
1237 		toh->status = 0;
1238 
1239 		mtuinfo = (struct ip6_mtuinfo *)&toh[1];
1240 		bzero(mtuinfo, sizeof (struct ip6_mtuinfo));
1241 		mtuinfo->ip6m_addr.sin6_family = AF_INET6;
1242 		mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst;
1243 		mtuinfo->ip6m_mtu = icmp6->icmp6_mtu;
1244 		/*
1245 		 * We've consumed everything we need from the original
1246 		 * message.  Free it, then send our empty message.
1247 		 */
1248 		freemsg(mp);
1249 		udp_ulp_recv(connp, newmp, msgdsize(newmp), ira);
1250 		return;
1251 	}
1252 	case ICMP6_TIME_EXCEEDED:
1253 		/* Transient errors */
1254 		break;
1255 	case ICMP6_PARAM_PROB:
1256 		/* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */
1257 		if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER &&
1258 		    (uchar_t *)ip6h + icmp6->icmp6_pptr ==
1259 		    (uchar_t *)nexthdrp) {
1260 			error = ECONNREFUSED;
1261 			break;
1262 		}
1263 		break;
1264 	}
1265 	if (error == 0) {
1266 		freemsg(mp);
1267 		return;
1268 	}
1269 
1270 	/*
1271 	 * Deliver T_UDERROR_IND when the application has asked for it.
1272 	 * The socket layer enables this automatically when connected.
1273 	 */
1274 	if (!connp->conn_dgram_errind) {
1275 		freemsg(mp);
1276 		return;
1277 	}
1278 
1279 	sin6 = sin6_null;
1280 	sin6.sin6_family = AF_INET6;
1281 	sin6.sin6_addr = ip6h->ip6_dst;
1282 	sin6.sin6_port = udpha->uha_dst_port;
1283 	sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
1284 
1285 	if (IPCL_IS_NONSTR(connp)) {
1286 		mutex_enter(&connp->conn_lock);
1287 		if (udp->udp_state == TS_DATA_XFER) {
1288 			if (sin6.sin6_port == connp->conn_fport &&
1289 			    IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
1290 			    &connp->conn_faddr_v6)) {
1291 				mutex_exit(&connp->conn_lock);
1292 				(*connp->conn_upcalls->su_set_error)
1293 				    (connp->conn_upper_handle, error);
1294 				goto done;
1295 			}
1296 		} else {
1297 			udp->udp_delayed_error = error;
1298 			*((sin6_t *)&udp->udp_delayed_addr) = sin6;
1299 		}
1300 		mutex_exit(&connp->conn_lock);
1301 	} else {
1302 		mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
1303 		    NULL, 0, error);
1304 		if (mp1 != NULL)
1305 			putnext(connp->conn_rq, mp1);
1306 	}
1307 done:
1308 	freemsg(mp);
1309 }
1310 
1311 /*
1312  * This routine responds to T_ADDR_REQ messages.  It is called by udp_wput.
1313  * The local address is filled in if endpoint is bound. The remote address
1314  * is filled in if remote address has been precified ("connected endpoint")
1315  * (The concept of connected CLTS sockets is alien to published TPI
1316  *  but we support it anyway).
1317  */
1318 static void
udp_addr_req(queue_t * q,mblk_t * mp)1319 udp_addr_req(queue_t *q, mblk_t *mp)
1320 {
1321 	struct sockaddr *sa;
1322 	mblk_t	*ackmp;
1323 	struct T_addr_ack *taa;
1324 	udp_t	*udp = Q_TO_UDP(q);
1325 	conn_t	*connp = udp->udp_connp;
1326 	uint_t	addrlen;
1327 
1328 	/* Make it large enough for worst case */
1329 	ackmp = reallocb(mp, sizeof (struct T_addr_ack) +
1330 	    2 * sizeof (sin6_t), 1);
1331 	if (ackmp == NULL) {
1332 		udp_err_ack(q, mp, TSYSERR, ENOMEM);
1333 		return;
1334 	}
1335 	taa = (struct T_addr_ack *)ackmp->b_rptr;
1336 
1337 	bzero(taa, sizeof (struct T_addr_ack));
1338 	ackmp->b_wptr = (uchar_t *)&taa[1];
1339 
1340 	taa->PRIM_type = T_ADDR_ACK;
1341 	ackmp->b_datap->db_type = M_PCPROTO;
1342 
1343 	if (connp->conn_family == AF_INET)
1344 		addrlen = sizeof (sin_t);
1345 	else
1346 		addrlen = sizeof (sin6_t);
1347 
1348 	mutex_enter(&connp->conn_lock);
1349 	/*
1350 	 * Note: Following code assumes 32 bit alignment of basic
1351 	 * data structures like sin_t and struct T_addr_ack.
1352 	 */
1353 	if (udp->udp_state != TS_UNBND) {
1354 		/*
1355 		 * Fill in local address first
1356 		 */
1357 		taa->LOCADDR_offset = sizeof (*taa);
1358 		taa->LOCADDR_length = addrlen;
1359 		sa = (struct sockaddr *)&taa[1];
1360 		(void) conn_getsockname(connp, sa, &addrlen);
1361 		ackmp->b_wptr += addrlen;
1362 	}
1363 	if (udp->udp_state == TS_DATA_XFER) {
1364 		/*
1365 		 * connected, fill remote address too
1366 		 */
1367 		taa->REMADDR_length = addrlen;
1368 		/* assumed 32-bit alignment */
1369 		taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length;
1370 		sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset);
1371 		(void) conn_getpeername(connp, sa, &addrlen);
1372 		ackmp->b_wptr += addrlen;
1373 	}
1374 	mutex_exit(&connp->conn_lock);
1375 	ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
1376 	qreply(q, ackmp);
1377 }
1378 
1379 static void
udp_copy_info(struct T_info_ack * tap,udp_t * udp)1380 udp_copy_info(struct T_info_ack *tap, udp_t *udp)
1381 {
1382 	conn_t		*connp = udp->udp_connp;
1383 
1384 	if (connp->conn_family == AF_INET) {
1385 		*tap = udp_g_t_info_ack_ipv4;
1386 	} else {
1387 		*tap = udp_g_t_info_ack_ipv6;
1388 	}
1389 	tap->CURRENT_state = udp->udp_state;
1390 	tap->OPT_size = udp_max_optsize;
1391 }
1392 
1393 static void
udp_do_capability_ack(udp_t * udp,struct T_capability_ack * tcap,t_uscalar_t cap_bits1)1394 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap,
1395     t_uscalar_t cap_bits1)
1396 {
1397 	tcap->CAP_bits1 = 0;
1398 
1399 	if (cap_bits1 & TC1_INFO) {
1400 		udp_copy_info(&tcap->INFO_ack, udp);
1401 		tcap->CAP_bits1 |= TC1_INFO;
1402 	}
1403 }
1404 
1405 /*
1406  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
1407  * udp_wput.  Much of the T_CAPABILITY_ACK information is copied from
1408  * udp_g_t_info_ack.  The current state of the stream is copied from
1409  * udp_state.
1410  */
1411 static void
udp_capability_req(queue_t * q,mblk_t * mp)1412 udp_capability_req(queue_t *q, mblk_t *mp)
1413 {
1414 	t_uscalar_t		cap_bits1;
1415 	struct T_capability_ack	*tcap;
1416 	udp_t	*udp = Q_TO_UDP(q);
1417 
1418 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
1419 
1420 	mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
1421 	    mp->b_datap->db_type, T_CAPABILITY_ACK);
1422 	if (!mp)
1423 		return;
1424 
1425 	tcap = (struct T_capability_ack *)mp->b_rptr;
1426 	udp_do_capability_ack(udp, tcap, cap_bits1);
1427 
1428 	qreply(q, mp);
1429 }
1430 
1431 /*
1432  * This routine responds to T_INFO_REQ messages.  It is called by udp_wput.
1433  * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack.
1434  * The current state of the stream is copied from udp_state.
1435  */
1436 static void
udp_info_req(queue_t * q,mblk_t * mp)1437 udp_info_req(queue_t *q, mblk_t *mp)
1438 {
1439 	udp_t *udp = Q_TO_UDP(q);
1440 
1441 	/* Create a T_INFO_ACK message. */
1442 	mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
1443 	    T_INFO_ACK);
1444 	if (!mp)
1445 		return;
1446 	udp_copy_info((struct T_info_ack *)mp->b_rptr, udp);
1447 	qreply(q, mp);
1448 }
1449 
1450 /* For /dev/udp aka AF_INET open */
1451 static int
udp_openv4(queue_t * q,dev_t * devp,int flag,int sflag,cred_t * credp)1452 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1453 {
1454 	return (udp_open(q, devp, flag, sflag, credp, B_FALSE));
1455 }
1456 
1457 /* For /dev/udp6 aka AF_INET6 open */
1458 static int
udp_openv6(queue_t * q,dev_t * devp,int flag,int sflag,cred_t * credp)1459 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1460 {
1461 	return (udp_open(q, devp, flag, sflag, credp, B_TRUE));
1462 }
1463 
1464 /*
1465  * This is the open routine for udp.  It allocates a udp_t structure for
1466  * the stream and, on the first open of the module, creates an ND table.
1467  */
1468 static int
udp_open(queue_t * q,dev_t * devp,int flag,int sflag,cred_t * credp,boolean_t isv6)1469 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
1470     boolean_t isv6)
1471 {
1472 	udp_t		*udp;
1473 	conn_t		*connp;
1474 	dev_t		conn_dev;
1475 	vmem_t		*minor_arena;
1476 	int		err;
1477 
1478 	/* If the stream is already open, return immediately. */
1479 	if (q->q_ptr != NULL)
1480 		return (0);
1481 
1482 	if (sflag == MODOPEN)
1483 		return (EINVAL);
1484 
1485 	if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) &&
1486 	    ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) {
1487 		minor_arena = ip_minor_arena_la;
1488 	} else {
1489 		/*
1490 		 * Either minor numbers in the large arena were exhausted
1491 		 * or a non socket application is doing the open.
1492 		 * Try to allocate from the small arena.
1493 		 */
1494 		if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0)
1495 			return (EBUSY);
1496 
1497 		minor_arena = ip_minor_arena_sa;
1498 	}
1499 
1500 	if (flag & SO_FALLBACK) {
1501 		/*
1502 		 * Non streams socket needs a stream to fallback to
1503 		 */
1504 		RD(q)->q_ptr = (void *)conn_dev;
1505 		WR(q)->q_qinfo = &udp_fallback_sock_winit;
1506 		WR(q)->q_ptr = (void *)minor_arena;
1507 		qprocson(q);
1508 		return (0);
1509 	}
1510 
1511 	connp = udp_do_open(credp, isv6, KM_SLEEP, &err);
1512 	if (connp == NULL) {
1513 		inet_minor_free(minor_arena, conn_dev);
1514 		return (err);
1515 	}
1516 	udp = connp->conn_udp;
1517 
1518 	*devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
1519 	connp->conn_dev = conn_dev;
1520 	connp->conn_minor_arena = minor_arena;
1521 
1522 	/*
1523 	 * Initialize the udp_t structure for this stream.
1524 	 */
1525 	q->q_ptr = connp;
1526 	WR(q)->q_ptr = connp;
1527 	connp->conn_rq = q;
1528 	connp->conn_wq = WR(q);
1529 
1530 	/*
1531 	 * Since this conn_t/udp_t is not yet visible to anybody else we don't
1532 	 * need to lock anything.
1533 	 */
1534 	ASSERT(connp->conn_proto == IPPROTO_UDP);
1535 	ASSERT(connp->conn_udp == udp);
1536 	ASSERT(udp->udp_connp == connp);
1537 
1538 	if (flag & SO_SOCKSTR) {
1539 		udp->udp_issocket = B_TRUE;
1540 	}
1541 
1542 	WR(q)->q_hiwat = connp->conn_sndbuf;
1543 	WR(q)->q_lowat = connp->conn_sndlowat;
1544 
1545 	qprocson(q);
1546 
1547 	/* Set the Stream head write offset and high watermark. */
1548 	(void) proto_set_tx_wroff(q, connp, connp->conn_wroff);
1549 	(void) proto_set_rx_hiwat(q, connp,
1550 	    udp_set_rcv_hiwat(udp, connp->conn_rcvbuf));
1551 
1552 	mutex_enter(&connp->conn_lock);
1553 	connp->conn_state_flags &= ~CONN_INCIPIENT;
1554 	mutex_exit(&connp->conn_lock);
1555 	return (0);
1556 }
1557 
1558 /*
1559  * Which UDP options OK to set through T_UNITDATA_REQ...
1560  */
1561 /* ARGSUSED */
1562 static boolean_t
udp_opt_allow_udr_set(t_scalar_t level,t_scalar_t name)1563 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name)
1564 {
1565 	return (B_TRUE);
1566 }
1567 
1568 /*
1569  * This routine gets default values of certain options whose default
1570  * values are maintained by protcol specific code
1571  */
1572 int
udp_opt_default(queue_t * q,t_scalar_t level,t_scalar_t name,uchar_t * ptr)1573 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
1574 {
1575 	udp_t		*udp = Q_TO_UDP(q);
1576 	udp_stack_t *us = udp->udp_us;
1577 	int *i1 = (int *)ptr;
1578 
1579 	switch (level) {
1580 	case IPPROTO_IP:
1581 		switch (name) {
1582 		case IP_MULTICAST_TTL:
1583 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL;
1584 			return (sizeof (uchar_t));
1585 		case IP_MULTICAST_LOOP:
1586 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP;
1587 			return (sizeof (uchar_t));
1588 		}
1589 		break;
1590 	case IPPROTO_IPV6:
1591 		switch (name) {
1592 		case IPV6_MULTICAST_HOPS:
1593 			*i1 = IP_DEFAULT_MULTICAST_TTL;
1594 			return (sizeof (int));
1595 		case IPV6_MULTICAST_LOOP:
1596 			*i1 = IP_DEFAULT_MULTICAST_LOOP;
1597 			return (sizeof (int));
1598 		case IPV6_UNICAST_HOPS:
1599 			*i1 = us->us_ipv6_hoplimit;
1600 			return (sizeof (int));
1601 		}
1602 		break;
1603 	}
1604 	return (-1);
1605 }
1606 
1607 /*
1608  * This routine retrieves the current status of socket options.
1609  * It returns the size of the option retrieved, or -1.
1610  */
1611 int
udp_opt_get(conn_t * connp,t_scalar_t level,t_scalar_t name,uchar_t * ptr)1612 udp_opt_get(conn_t *connp, t_scalar_t level, t_scalar_t name,
1613     uchar_t *ptr)
1614 {
1615 	int		*i1 = (int *)ptr;
1616 	udp_t		*udp = connp->conn_udp;
1617 	int		len;
1618 	conn_opt_arg_t	coas;
1619 	int		retval;
1620 
1621 	coas.coa_connp = connp;
1622 	coas.coa_ixa = connp->conn_ixa;
1623 	coas.coa_ipp = &connp->conn_xmit_ipp;
1624 	coas.coa_ancillary = B_FALSE;
1625 	coas.coa_changed = 0;
1626 
1627 	/*
1628 	 * We assume that the optcom framework has checked for the set
1629 	 * of levels and names that are supported, hence we don't worry
1630 	 * about rejecting based on that.
1631 	 * First check for UDP specific handling, then pass to common routine.
1632 	 */
1633 	switch (level) {
1634 	case IPPROTO_IP:
1635 		/*
1636 		 * Only allow IPv4 option processing on IPv4 sockets.
1637 		 */
1638 		if (connp->conn_family != AF_INET)
1639 			return (-1);
1640 
1641 		switch (name) {
1642 		case IP_OPTIONS:
1643 		case T_IP_OPTIONS:
1644 			mutex_enter(&connp->conn_lock);
1645 			if (!(udp->udp_recv_ipp.ipp_fields &
1646 			    IPPF_IPV4_OPTIONS)) {
1647 				mutex_exit(&connp->conn_lock);
1648 				return (0);
1649 			}
1650 
1651 			len = udp->udp_recv_ipp.ipp_ipv4_options_len;
1652 			ASSERT(len != 0);
1653 			bcopy(udp->udp_recv_ipp.ipp_ipv4_options, ptr, len);
1654 			mutex_exit(&connp->conn_lock);
1655 			return (len);
1656 		}
1657 		break;
1658 	case IPPROTO_UDP:
1659 		switch (name) {
1660 		case UDP_NAT_T_ENDPOINT:
1661 			mutex_enter(&connp->conn_lock);
1662 			*i1 = udp->udp_nat_t_endpoint;
1663 			mutex_exit(&connp->conn_lock);
1664 			return (sizeof (int));
1665 		case UDP_RCVHDR:
1666 			mutex_enter(&connp->conn_lock);
1667 			*i1 = udp->udp_rcvhdr ? 1 : 0;
1668 			mutex_exit(&connp->conn_lock);
1669 			return (sizeof (int));
1670 		case UDP_SRCPORT_HASH:
1671 			mutex_enter(&connp->conn_lock);
1672 			*i1 = udp->udp_vxlanhash;
1673 			mutex_exit(&connp->conn_lock);
1674 			return (sizeof (int));
1675 		}
1676 	}
1677 	mutex_enter(&connp->conn_lock);
1678 	retval = conn_opt_get(&coas, level, name, ptr);
1679 	mutex_exit(&connp->conn_lock);
1680 	return (retval);
1681 }
1682 
1683 /*
1684  * This routine retrieves the current status of socket options.
1685  * It returns the size of the option retrieved, or -1.
1686  */
1687 int
udp_tpi_opt_get(queue_t * q,t_scalar_t level,t_scalar_t name,uchar_t * ptr)1688 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
1689 {
1690 	conn_t		*connp = Q_TO_CONN(q);
1691 	int		err;
1692 
1693 	err = udp_opt_get(connp, level, name, ptr);
1694 	return (err);
1695 }
1696 
1697 /*
1698  * This routine sets socket options.
1699  */
1700 int
udp_do_opt_set(conn_opt_arg_t * coa,int level,int name,uint_t inlen,uchar_t * invalp,cred_t * cr,boolean_t checkonly)1701 udp_do_opt_set(conn_opt_arg_t *coa, int level, int name,
1702     uint_t inlen, uchar_t *invalp, cred_t *cr, boolean_t checkonly)
1703 {
1704 	conn_t		*connp = coa->coa_connp;
1705 	ip_xmit_attr_t	*ixa = coa->coa_ixa;
1706 	udp_t		*udp = connp->conn_udp;
1707 	udp_stack_t	*us = udp->udp_us;
1708 	int		*i1 = (int *)invalp;
1709 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
1710 	int		error;
1711 
1712 	ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1713 	/*
1714 	 * First do UDP specific sanity checks and handle UDP specific
1715 	 * options. Note that some IPPROTO_UDP options are handled
1716 	 * by conn_opt_set.
1717 	 */
1718 	switch (level) {
1719 	case SOL_SOCKET:
1720 		switch (name) {
1721 		case SO_SNDBUF:
1722 			if (*i1 > us->us_max_buf) {
1723 				return (ENOBUFS);
1724 			}
1725 			break;
1726 		case SO_RCVBUF:
1727 			if (*i1 > us->us_max_buf) {
1728 				return (ENOBUFS);
1729 			}
1730 			break;
1731 
1732 		case SCM_UCRED: {
1733 			struct ucred_s *ucr;
1734 			cred_t *newcr;
1735 			ts_label_t *tsl;
1736 
1737 			/*
1738 			 * Only sockets that have proper privileges and are
1739 			 * bound to MLPs will have any other value here, so
1740 			 * this implicitly tests for privilege to set label.
1741 			 */
1742 			if (connp->conn_mlp_type == mlptSingle)
1743 				break;
1744 
1745 			ucr = (struct ucred_s *)invalp;
1746 			if (inlen < sizeof (*ucr) + sizeof (bslabel_t) ||
1747 			    ucr->uc_labeloff < sizeof (*ucr) ||
1748 			    ucr->uc_labeloff + sizeof (bslabel_t) > inlen)
1749 				return (EINVAL);
1750 			if (!checkonly) {
1751 				/*
1752 				 * Set ixa_tsl to the new label.
1753 				 * We assume that crgetzoneid doesn't change
1754 				 * as part of the SCM_UCRED.
1755 				 */
1756 				ASSERT(cr != NULL);
1757 				if ((tsl = crgetlabel(cr)) == NULL)
1758 					return (EINVAL);
1759 				newcr = copycred_from_bslabel(cr, UCLABEL(ucr),
1760 				    tsl->tsl_doi, KM_NOSLEEP);
1761 				if (newcr == NULL)
1762 					return (ENOSR);
1763 				ASSERT(newcr->cr_label != NULL);
1764 				/*
1765 				 * Move the hold on the cr_label to ixa_tsl by
1766 				 * setting cr_label to NULL. Then release newcr.
1767 				 */
1768 				ip_xmit_attr_replace_tsl(ixa, newcr->cr_label);
1769 				ixa->ixa_flags |= IXAF_UCRED_TSL;
1770 				newcr->cr_label = NULL;
1771 				crfree(newcr);
1772 				coa->coa_changed |= COA_HEADER_CHANGED;
1773 				coa->coa_changed |= COA_WROFF_CHANGED;
1774 			}
1775 			/* Fully handled this option. */
1776 			return (0);
1777 		}
1778 		}
1779 		break;
1780 	case IPPROTO_UDP:
1781 		switch (name) {
1782 		case UDP_NAT_T_ENDPOINT:
1783 			if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1784 				return (error);
1785 			}
1786 
1787 			/*
1788 			 * Use conn_family instead so we can avoid ambiguitites
1789 			 * with AF_INET6 sockets that may switch from IPv4
1790 			 * to IPv6.
1791 			 */
1792 			if (connp->conn_family != AF_INET) {
1793 				return (EAFNOSUPPORT);
1794 			}
1795 
1796 			if (!checkonly) {
1797 				mutex_enter(&connp->conn_lock);
1798 				udp->udp_nat_t_endpoint = onoff;
1799 				mutex_exit(&connp->conn_lock);
1800 				coa->coa_changed |= COA_HEADER_CHANGED;
1801 				coa->coa_changed |= COA_WROFF_CHANGED;
1802 			}
1803 			/* Fully handled this option. */
1804 			return (0);
1805 		case UDP_RCVHDR:
1806 			mutex_enter(&connp->conn_lock);
1807 			udp->udp_rcvhdr = onoff;
1808 			mutex_exit(&connp->conn_lock);
1809 			return (0);
1810 		case UDP_SRCPORT_HASH:
1811 			/*
1812 			 * This should have already been verified, but double
1813 			 * check.
1814 			 */
1815 			if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1816 				return (error);
1817 			}
1818 
1819 			/* First see if the val is something we understand */
1820 			if (*i1 != UDP_HASH_DISABLE && *i1 != UDP_HASH_VXLAN)
1821 				return (EINVAL);
1822 
1823 			if (!checkonly) {
1824 				mutex_enter(&connp->conn_lock);
1825 				udp->udp_vxlanhash = *i1;
1826 				mutex_exit(&connp->conn_lock);
1827 			}
1828 			/* Fully handled this option. */
1829 			return (0);
1830 		}
1831 		break;
1832 	}
1833 	error = conn_opt_set(coa, level, name, inlen, invalp,
1834 	    checkonly, cr);
1835 	return (error);
1836 }
1837 
1838 /*
1839  * This routine sets socket options.
1840  */
1841 int
udp_opt_set(conn_t * connp,uint_t optset_context,int level,int name,uint_t inlen,uchar_t * invalp,uint_t * outlenp,uchar_t * outvalp,void * thisdg_attrs,cred_t * cr)1842 udp_opt_set(conn_t *connp, uint_t optset_context, int level,
1843     int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
1844     uchar_t *outvalp, void *thisdg_attrs, cred_t *cr)
1845 {
1846 	udp_t		*udp = connp->conn_udp;
1847 	int		err;
1848 	conn_opt_arg_t	coas, *coa;
1849 	boolean_t	checkonly;
1850 	udp_stack_t	*us = udp->udp_us;
1851 
1852 	switch (optset_context) {
1853 	case SETFN_OPTCOM_CHECKONLY:
1854 		checkonly = B_TRUE;
1855 		/*
1856 		 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
1857 		 * inlen != 0 implies value supplied and
1858 		 *	we have to "pretend" to set it.
1859 		 * inlen == 0 implies that there is no
1860 		 *	value part in T_CHECK request and just validation
1861 		 * done elsewhere should be enough, we just return here.
1862 		 */
1863 		if (inlen == 0) {
1864 			*outlenp = 0;
1865 			return (0);
1866 		}
1867 		break;
1868 	case SETFN_OPTCOM_NEGOTIATE:
1869 		checkonly = B_FALSE;
1870 		break;
1871 	case SETFN_UD_NEGOTIATE:
1872 	case SETFN_CONN_NEGOTIATE:
1873 		checkonly = B_FALSE;
1874 		/*
1875 		 * Negotiating local and "association-related" options
1876 		 * through T_UNITDATA_REQ.
1877 		 *
1878 		 * Following routine can filter out ones we do not
1879 		 * want to be "set" this way.
1880 		 */
1881 		if (!udp_opt_allow_udr_set(level, name)) {
1882 			*outlenp = 0;
1883 			return (EINVAL);
1884 		}
1885 		break;
1886 	default:
1887 		/*
1888 		 * We should never get here
1889 		 */
1890 		*outlenp = 0;
1891 		return (EINVAL);
1892 	}
1893 
1894 	ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
1895 	    (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
1896 
1897 	if (thisdg_attrs != NULL) {
1898 		/* Options from T_UNITDATA_REQ */
1899 		coa = (conn_opt_arg_t *)thisdg_attrs;
1900 		ASSERT(coa->coa_connp == connp);
1901 		ASSERT(coa->coa_ixa != NULL);
1902 		ASSERT(coa->coa_ipp != NULL);
1903 		ASSERT(coa->coa_ancillary);
1904 	} else {
1905 		coa = &coas;
1906 		coas.coa_connp = connp;
1907 		/* Get a reference on conn_ixa to prevent concurrent mods */
1908 		coas.coa_ixa = conn_get_ixa(connp, B_TRUE);
1909 		if (coas.coa_ixa == NULL) {
1910 			*outlenp = 0;
1911 			return (ENOMEM);
1912 		}
1913 		coas.coa_ipp = &connp->conn_xmit_ipp;
1914 		coas.coa_ancillary = B_FALSE;
1915 		coas.coa_changed = 0;
1916 	}
1917 
1918 	err = udp_do_opt_set(coa, level, name, inlen, invalp,
1919 	    cr, checkonly);
1920 	if (err != 0) {
1921 errout:
1922 		if (!coa->coa_ancillary)
1923 			ixa_refrele(coa->coa_ixa);
1924 		*outlenp = 0;
1925 		return (err);
1926 	}
1927 	/* Handle DHCPINIT here outside of lock */
1928 	if (level == IPPROTO_IP && name == IP_DHCPINIT_IF) {
1929 		uint_t	ifindex;
1930 		ill_t	*ill;
1931 
1932 		ifindex = *(uint_t *)invalp;
1933 		if (ifindex == 0) {
1934 			ill = NULL;
1935 		} else {
1936 			ill = ill_lookup_on_ifindex(ifindex, B_FALSE,
1937 			    coa->coa_ixa->ixa_ipst);
1938 			if (ill == NULL) {
1939 				err = ENXIO;
1940 				goto errout;
1941 			}
1942 
1943 			mutex_enter(&ill->ill_lock);
1944 			if (ill->ill_state_flags & ILL_CONDEMNED) {
1945 				mutex_exit(&ill->ill_lock);
1946 				ill_refrele(ill);
1947 				err = ENXIO;
1948 				goto errout;
1949 			}
1950 			if (IS_VNI(ill)) {
1951 				mutex_exit(&ill->ill_lock);
1952 				ill_refrele(ill);
1953 				err = EINVAL;
1954 				goto errout;
1955 			}
1956 		}
1957 		mutex_enter(&connp->conn_lock);
1958 
1959 		if (connp->conn_dhcpinit_ill != NULL) {
1960 			/*
1961 			 * We've locked the conn so conn_cleanup_ill()
1962 			 * cannot clear conn_dhcpinit_ill -- so it's
1963 			 * safe to access the ill.
1964 			 */
1965 			ill_t *oill = connp->conn_dhcpinit_ill;
1966 
1967 			ASSERT(oill->ill_dhcpinit != 0);
1968 			atomic_dec_32(&oill->ill_dhcpinit);
1969 			ill_set_inputfn(connp->conn_dhcpinit_ill);
1970 			connp->conn_dhcpinit_ill = NULL;
1971 		}
1972 
1973 		if (ill != NULL) {
1974 			connp->conn_dhcpinit_ill = ill;
1975 			atomic_inc_32(&ill->ill_dhcpinit);
1976 			ill_set_inputfn(ill);
1977 			mutex_exit(&connp->conn_lock);
1978 			mutex_exit(&ill->ill_lock);
1979 			ill_refrele(ill);
1980 		} else {
1981 			mutex_exit(&connp->conn_lock);
1982 		}
1983 	}
1984 
1985 	/*
1986 	 * Common case of OK return with outval same as inval.
1987 	 */
1988 	if (invalp != outvalp) {
1989 		/* don't trust bcopy for identical src/dst */
1990 		(void) bcopy(invalp, outvalp, inlen);
1991 	}
1992 	*outlenp = inlen;
1993 
1994 	/*
1995 	 * If this was not ancillary data, then we rebuild the headers,
1996 	 * update the IRE/NCE, and IPsec as needed.
1997 	 * Since the label depends on the destination we go through
1998 	 * ip_set_destination first.
1999 	 */
2000 	if (coa->coa_ancillary) {
2001 		return (0);
2002 	}
2003 
2004 	if (coa->coa_changed & COA_ROUTE_CHANGED) {
2005 		in6_addr_t saddr, faddr, nexthop;
2006 		in_port_t fport;
2007 
2008 		/*
2009 		 * We clear lastdst to make sure we pick up the change
2010 		 * next time sending.
2011 		 * If we are connected we re-cache the information.
2012 		 * We ignore errors to preserve BSD behavior.
2013 		 * Note that we don't redo IPsec policy lookup here
2014 		 * since the final destination (or source) didn't change.
2015 		 */
2016 		mutex_enter(&connp->conn_lock);
2017 		connp->conn_v6lastdst = ipv6_all_zeros;
2018 
2019 		ip_attr_nexthop(coa->coa_ipp, coa->coa_ixa,
2020 		    &connp->conn_faddr_v6, &nexthop);
2021 		saddr = connp->conn_saddr_v6;
2022 		faddr = connp->conn_faddr_v6;
2023 		fport = connp->conn_fport;
2024 		mutex_exit(&connp->conn_lock);
2025 
2026 		if (!IN6_IS_ADDR_UNSPECIFIED(&faddr) &&
2027 		    !IN6_IS_ADDR_V4MAPPED_ANY(&faddr)) {
2028 			(void) ip_attr_connect(connp, coa->coa_ixa,
2029 			    &saddr, &faddr, &nexthop, fport, NULL, NULL,
2030 			    IPDF_ALLOW_MCBC | IPDF_VERIFY_DST);
2031 		}
2032 	}
2033 
2034 	ixa_refrele(coa->coa_ixa);
2035 
2036 	if (coa->coa_changed & COA_HEADER_CHANGED) {
2037 		/*
2038 		 * Rebuild the header template if we are connected.
2039 		 * Otherwise clear conn_v6lastdst so we rebuild the header
2040 		 * in the data path.
2041 		 */
2042 		mutex_enter(&connp->conn_lock);
2043 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
2044 		    !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
2045 			err = udp_build_hdr_template(connp,
2046 			    &connp->conn_saddr_v6, &connp->conn_faddr_v6,
2047 			    connp->conn_fport, connp->conn_flowinfo);
2048 			if (err != 0) {
2049 				mutex_exit(&connp->conn_lock);
2050 				return (err);
2051 			}
2052 		} else {
2053 			connp->conn_v6lastdst = ipv6_all_zeros;
2054 		}
2055 		mutex_exit(&connp->conn_lock);
2056 	}
2057 	if (coa->coa_changed & COA_RCVBUF_CHANGED) {
2058 		(void) proto_set_rx_hiwat(connp->conn_rq, connp,
2059 		    connp->conn_rcvbuf);
2060 	}
2061 	if ((coa->coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) {
2062 		connp->conn_wq->q_hiwat = connp->conn_sndbuf;
2063 	}
2064 	if (coa->coa_changed & COA_WROFF_CHANGED) {
2065 		/* Increase wroff if needed */
2066 		uint_t wroff;
2067 
2068 		mutex_enter(&connp->conn_lock);
2069 		wroff = connp->conn_ht_iphc_allocated + us->us_wroff_extra;
2070 		if (udp->udp_nat_t_endpoint)
2071 			wroff += sizeof (uint32_t);
2072 		if (wroff > connp->conn_wroff) {
2073 			connp->conn_wroff = wroff;
2074 			mutex_exit(&connp->conn_lock);
2075 			(void) proto_set_tx_wroff(connp->conn_rq, connp, wroff);
2076 		} else {
2077 			mutex_exit(&connp->conn_lock);
2078 		}
2079 	}
2080 	return (err);
2081 }
2082 
2083 /* This routine sets socket options. */
2084 int
udp_tpi_opt_set(queue_t * q,uint_t optset_context,int level,int name,uint_t inlen,uchar_t * invalp,uint_t * outlenp,uchar_t * outvalp,void * thisdg_attrs,cred_t * cr)2085 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name,
2086     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
2087     void *thisdg_attrs, cred_t *cr)
2088 {
2089 	conn_t	*connp = Q_TO_CONN(q);
2090 	int error;
2091 
2092 	error = udp_opt_set(connp, optset_context, level, name, inlen, invalp,
2093 	    outlenp, outvalp, thisdg_attrs, cr);
2094 	return (error);
2095 }
2096 
2097 /*
2098  * Setup IP and UDP headers.
2099  * Returns NULL on allocation failure, in which case data_mp is freed.
2100  */
2101 mblk_t *
udp_prepend_hdr(conn_t * connp,ip_xmit_attr_t * ixa,const ip_pkt_t * ipp,const in6_addr_t * v6src,const in6_addr_t * v6dst,in_port_t dstport,uint32_t flowinfo,mblk_t * data_mp,int * errorp)2102 udp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
2103     const in6_addr_t *v6src, const in6_addr_t *v6dst, in_port_t dstport,
2104     uint32_t flowinfo, mblk_t *data_mp, int *errorp)
2105 {
2106 	mblk_t		*mp;
2107 	udpha_t		*udpha;
2108 	udp_stack_t	*us = connp->conn_netstack->netstack_udp;
2109 	uint_t		data_len;
2110 	uint32_t	cksum;
2111 	udp_t		*udp = connp->conn_udp;
2112 	boolean_t	insert_spi = udp->udp_nat_t_endpoint;
2113 	boolean_t	hash_srcport = udp->udp_vxlanhash;
2114 	uint_t		ulp_hdr_len;
2115 	uint16_t	srcport;
2116 
2117 	data_len = msgdsize(data_mp);
2118 	ulp_hdr_len = UDPH_SIZE;
2119 	if (insert_spi)
2120 		ulp_hdr_len += sizeof (uint32_t);
2121 
2122 	/*
2123 	 * If we have source port hashing going on, determine the hash before
2124 	 * we modify the mblk_t.
2125 	 */
2126 	if (hash_srcport == B_TRUE) {
2127 		srcport = udp_srcport_hash(mp, UDP_HASH_VXLAN,
2128 		    IPPORT_DYNAMIC_MIN, IPPORT_DYNAMIC_MAX,
2129 		    ntohs(connp->conn_lport));
2130 	}
2131 
2132 	mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, IPPROTO_UDP, flowinfo,
2133 	    ulp_hdr_len, data_mp, data_len, us->us_wroff_extra, &cksum, errorp);
2134 	if (mp == NULL) {
2135 		ASSERT(*errorp != 0);
2136 		return (NULL);
2137 	}
2138 
2139 	data_len += ulp_hdr_len;
2140 	ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length;
2141 
2142 	udpha = (udpha_t *)(mp->b_rptr + ixa->ixa_ip_hdr_length);
2143 	if (hash_srcport == B_TRUE) {
2144 		udpha->uha_src_port = htons(srcport);
2145 	} else {
2146 		udpha->uha_src_port = connp->conn_lport;
2147 	}
2148 	udpha->uha_dst_port = dstport;
2149 	udpha->uha_checksum = 0;
2150 	udpha->uha_length = htons(data_len);
2151 
2152 	/*
2153 	 * If there was a routing option/header then conn_prepend_hdr
2154 	 * has massaged it and placed the pseudo-header checksum difference
2155 	 * in the cksum argument.
2156 	 *
2157 	 * Setup header length and prepare for ULP checksum done in IP.
2158 	 *
2159 	 * We make it easy for IP to include our pseudo header
2160 	 * by putting our length in uha_checksum.
2161 	 * The IP source, destination, and length have already been set by
2162 	 * conn_prepend_hdr.
2163 	 */
2164 	cksum += data_len;
2165 	cksum = (cksum >> 16) + (cksum & 0xFFFF);
2166 	ASSERT(cksum < 0x10000);
2167 
2168 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2169 		ipha_t	*ipha = (ipha_t *)mp->b_rptr;
2170 
2171 		ASSERT(ntohs(ipha->ipha_length) == ixa->ixa_pktlen);
2172 
2173 		/* IP does the checksum if uha_checksum is non-zero */
2174 		if (us->us_do_checksum) {
2175 			if (cksum == 0)
2176 				udpha->uha_checksum = 0xffff;
2177 			else
2178 				udpha->uha_checksum = htons(cksum);
2179 		} else {
2180 			udpha->uha_checksum = 0;
2181 		}
2182 	} else {
2183 		ip6_t *ip6h = (ip6_t *)mp->b_rptr;
2184 
2185 		ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == ixa->ixa_pktlen);
2186 		if (cksum == 0)
2187 			udpha->uha_checksum = 0xffff;
2188 		else
2189 			udpha->uha_checksum = htons(cksum);
2190 	}
2191 
2192 	/* Insert all-0s SPI now. */
2193 	if (insert_spi)
2194 		*((uint32_t *)(udpha + 1)) = 0;
2195 
2196 	return (mp);
2197 }
2198 
2199 static int
udp_build_hdr_template(conn_t * connp,const in6_addr_t * v6src,const in6_addr_t * v6dst,in_port_t dstport,uint32_t flowinfo)2200 udp_build_hdr_template(conn_t *connp, const in6_addr_t *v6src,
2201     const in6_addr_t *v6dst, in_port_t dstport, uint32_t flowinfo)
2202 {
2203 	udpha_t		*udpha;
2204 	int		error;
2205 
2206 	ASSERT(MUTEX_HELD(&connp->conn_lock));
2207 	/*
2208 	 * We clear lastdst to make sure we don't use the lastdst path
2209 	 * next time sending since we might not have set v6dst yet.
2210 	 */
2211 	connp->conn_v6lastdst = ipv6_all_zeros;
2212 
2213 	error = conn_build_hdr_template(connp, UDPH_SIZE, 0, v6src, v6dst,
2214 	    flowinfo);
2215 	if (error != 0)
2216 		return (error);
2217 
2218 	/*
2219 	 * Any routing header/option has been massaged. The checksum difference
2220 	 * is stored in conn_sum.
2221 	 */
2222 	udpha = (udpha_t *)connp->conn_ht_ulp;
2223 	udpha->uha_src_port = connp->conn_lport;
2224 	udpha->uha_dst_port = dstport;
2225 	udpha->uha_checksum = 0;
2226 	udpha->uha_length = htons(UDPH_SIZE);	/* Filled in later */
2227 	return (0);
2228 }
2229 
2230 static mblk_t *
udp_queue_fallback(udp_t * udp,mblk_t * mp)2231 udp_queue_fallback(udp_t *udp, mblk_t *mp)
2232 {
2233 	ASSERT(MUTEX_HELD(&udp->udp_recv_lock));
2234 	if (IPCL_IS_NONSTR(udp->udp_connp)) {
2235 		/*
2236 		 * fallback has started but messages have not been moved yet
2237 		 */
2238 		if (udp->udp_fallback_queue_head == NULL) {
2239 			ASSERT(udp->udp_fallback_queue_tail == NULL);
2240 			udp->udp_fallback_queue_head = mp;
2241 			udp->udp_fallback_queue_tail = mp;
2242 		} else {
2243 			ASSERT(udp->udp_fallback_queue_tail != NULL);
2244 			udp->udp_fallback_queue_tail->b_next = mp;
2245 			udp->udp_fallback_queue_tail = mp;
2246 		}
2247 		return (NULL);
2248 	} else {
2249 		/*
2250 		 * Fallback completed, let the caller putnext() the mblk.
2251 		 */
2252 		return (mp);
2253 	}
2254 }
2255 
2256 /*
2257  * Deliver data to ULP. In case we have a socket, and it's falling back to
2258  * TPI, then we'll queue the mp for later processing.
2259  */
2260 static void
udp_ulp_recv(conn_t * connp,mblk_t * mp,uint_t len,ip_recv_attr_t * ira)2261 udp_ulp_recv(conn_t *connp, mblk_t *mp, uint_t len, ip_recv_attr_t *ira)
2262 {
2263 	if (IPCL_IS_NONSTR(connp)) {
2264 		udp_t *udp = connp->conn_udp;
2265 		int error;
2266 
2267 		ASSERT(len == msgdsize(mp));
2268 		if ((*connp->conn_upcalls->su_recv)
2269 		    (connp->conn_upper_handle, mp, len, 0, &error, NULL) < 0) {
2270 			mutex_enter(&udp->udp_recv_lock);
2271 			if (error == ENOSPC) {
2272 				/*
2273 				 * let's confirm while holding the lock
2274 				 */
2275 				if ((*connp->conn_upcalls->su_recv)
2276 				    (connp->conn_upper_handle, NULL, 0, 0,
2277 				    &error, NULL) < 0) {
2278 					ASSERT(error == ENOSPC);
2279 					if (error == ENOSPC) {
2280 						connp->conn_flow_cntrld =
2281 						    B_TRUE;
2282 					}
2283 				}
2284 				mutex_exit(&udp->udp_recv_lock);
2285 			} else {
2286 				ASSERT(error == EOPNOTSUPP);
2287 				mp = udp_queue_fallback(udp, mp);
2288 				mutex_exit(&udp->udp_recv_lock);
2289 				if (mp != NULL)
2290 					putnext(connp->conn_rq, mp);
2291 			}
2292 		}
2293 		ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock));
2294 	} else {
2295 		if (is_system_labeled()) {
2296 			ASSERT(ira->ira_cred != NULL);
2297 			/*
2298 			 * Provide for protocols above UDP such as RPC
2299 			 * NOPID leaves db_cpid unchanged.
2300 			 */
2301 			mblk_setcred(mp, ira->ira_cred, NOPID);
2302 		}
2303 
2304 		putnext(connp->conn_rq, mp);
2305 	}
2306 }
2307 
2308 /*
2309  * This is the inbound data path.
2310  * IP has already pulled up the IP plus UDP headers and verified alignment
2311  * etc.
2312  */
2313 /* ARGSUSED2 */
2314 static void
udp_input(void * arg1,mblk_t * mp,void * arg2,ip_recv_attr_t * ira)2315 udp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
2316 {
2317 	conn_t			*connp = (conn_t *)arg1;
2318 	struct T_unitdata_ind	*tudi;
2319 	uchar_t			*rptr;		/* Pointer to IP header */
2320 	int			hdr_length;	/* Length of IP+UDP headers */
2321 	int			udi_size;	/* Size of T_unitdata_ind */
2322 	int			pkt_len;
2323 	udp_t			*udp;
2324 	udpha_t			*udpha;
2325 	ip_pkt_t		ipps;
2326 	ip6_t			*ip6h;
2327 	mblk_t			*mp1;
2328 	uint32_t		udp_ipv4_options_len;
2329 	crb_t			recv_ancillary;
2330 	udp_stack_t		*us;
2331 
2332 	ASSERT(connp->conn_flags & IPCL_UDPCONN);
2333 
2334 	udp = connp->conn_udp;
2335 	us = udp->udp_us;
2336 	rptr = mp->b_rptr;
2337 
2338 	ASSERT(DB_TYPE(mp) == M_DATA);
2339 	ASSERT(OK_32PTR(rptr));
2340 	ASSERT(ira->ira_pktlen == msgdsize(mp));
2341 	pkt_len = ira->ira_pktlen;
2342 
2343 	/*
2344 	 * Get a snapshot of these and allow other threads to change
2345 	 * them after that. We need the same recv_ancillary when determining
2346 	 * the size as when adding the ancillary data items.
2347 	 */
2348 	mutex_enter(&connp->conn_lock);
2349 	udp_ipv4_options_len = udp->udp_recv_ipp.ipp_ipv4_options_len;
2350 	recv_ancillary = connp->conn_recv_ancillary;
2351 	mutex_exit(&connp->conn_lock);
2352 
2353 	hdr_length = ira->ira_ip_hdr_length;
2354 
2355 	/*
2356 	 * IP inspected the UDP header thus all of it must be in the mblk.
2357 	 * UDP length check is performed for IPv6 packets and IPv4 packets
2358 	 * to check if the size of the packet as specified
2359 	 * by the UDP header is the same as the length derived from the IP
2360 	 * header.
2361 	 */
2362 	udpha = (udpha_t *)(rptr + hdr_length);
2363 	if (pkt_len != ntohs(udpha->uha_length) + hdr_length)
2364 		goto tossit;
2365 
2366 	hdr_length += UDPH_SIZE;
2367 	ASSERT(MBLKL(mp) >= hdr_length);	/* IP did a pullup */
2368 
2369 	/* Initialize regardless of IP version */
2370 	ipps.ipp_fields = 0;
2371 
2372 	if (((ira->ira_flags & IRAF_IPV4_OPTIONS) ||
2373 	    udp_ipv4_options_len > 0) &&
2374 	    connp->conn_family == AF_INET) {
2375 		int	err;
2376 
2377 		/*
2378 		 * Record/update udp_recv_ipp with the lock
2379 		 * held. Not needed for AF_INET6 sockets
2380 		 * since they don't support a getsockopt of IP_OPTIONS.
2381 		 */
2382 		mutex_enter(&connp->conn_lock);
2383 		err = ip_find_hdr_v4((ipha_t *)rptr, &udp->udp_recv_ipp,
2384 		    B_TRUE);
2385 		if (err != 0) {
2386 			/* Allocation failed. Drop packet */
2387 			mutex_exit(&connp->conn_lock);
2388 			freemsg(mp);
2389 			UDPS_BUMP_MIB(us, udpInErrors);
2390 			return;
2391 		}
2392 		mutex_exit(&connp->conn_lock);
2393 	}
2394 
2395 	if (recv_ancillary.crb_all != 0) {
2396 		/*
2397 		 * Record packet information in the ip_pkt_t
2398 		 */
2399 		if (ira->ira_flags & IRAF_IS_IPV4) {
2400 			ASSERT(IPH_HDR_VERSION(rptr) == IPV4_VERSION);
2401 			ASSERT(MBLKL(mp) >= sizeof (ipha_t));
2402 			ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP);
2403 			ASSERT(ira->ira_ip_hdr_length == IPH_HDR_LENGTH(rptr));
2404 
2405 			(void) ip_find_hdr_v4((ipha_t *)rptr, &ipps, B_FALSE);
2406 		} else {
2407 			uint8_t nexthdrp;
2408 
2409 			ASSERT(IPH_HDR_VERSION(rptr) == IPV6_VERSION);
2410 			/*
2411 			 * IPv6 packets can only be received by applications
2412 			 * that are prepared to receive IPv6 addresses.
2413 			 * The IP fanout must ensure this.
2414 			 */
2415 			ASSERT(connp->conn_family == AF_INET6);
2416 
2417 			ip6h = (ip6_t *)rptr;
2418 
2419 			/* We don't care about the length, but need the ipp */
2420 			hdr_length = ip_find_hdr_v6(mp, ip6h, B_TRUE, &ipps,
2421 			    &nexthdrp);
2422 			ASSERT(hdr_length == ira->ira_ip_hdr_length);
2423 			/* Restore */
2424 			hdr_length = ira->ira_ip_hdr_length + UDPH_SIZE;
2425 			ASSERT(nexthdrp == IPPROTO_UDP);
2426 		}
2427 	}
2428 
2429 	/*
2430 	 * This is the inbound data path.  Packets are passed upstream as
2431 	 * T_UNITDATA_IND messages.
2432 	 */
2433 	if (connp->conn_family == AF_INET) {
2434 		sin_t *sin;
2435 
2436 		ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION);
2437 
2438 		/*
2439 		 * Normally only send up the source address.
2440 		 * If any ancillary data items are wanted we add those.
2441 		 */
2442 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t);
2443 		if (recv_ancillary.crb_all != 0) {
2444 			udi_size += conn_recvancillary_size(connp,
2445 			    recv_ancillary, ira, mp, &ipps);
2446 		}
2447 
2448 		/* Allocate a message block for the T_UNITDATA_IND structure. */
2449 		mp1 = allocb(udi_size, BPRI_MED);
2450 		if (mp1 == NULL) {
2451 			freemsg(mp);
2452 			UDPS_BUMP_MIB(us, udpInErrors);
2453 			return;
2454 		}
2455 		mp1->b_cont = mp;
2456 		mp1->b_datap->db_type = M_PROTO;
2457 		tudi = (struct T_unitdata_ind *)mp1->b_rptr;
2458 		mp1->b_wptr = (uchar_t *)tudi + udi_size;
2459 		tudi->PRIM_type = T_UNITDATA_IND;
2460 		tudi->SRC_length = sizeof (sin_t);
2461 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
2462 		tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
2463 		    sizeof (sin_t);
2464 		udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t));
2465 		tudi->OPT_length = udi_size;
2466 		sin = (sin_t *)&tudi[1];
2467 		sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src;
2468 		sin->sin_port =	udpha->uha_src_port;
2469 		sin->sin_family = connp->conn_family;
2470 		*(uint32_t *)&sin->sin_zero[0] = 0;
2471 		*(uint32_t *)&sin->sin_zero[4] = 0;
2472 
2473 		/*
2474 		 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA,
2475 		 * IP_RECVTTL or IP_RECVTOS has been set.
2476 		 */
2477 		if (udi_size != 0) {
2478 			conn_recvancillary_add(connp, recv_ancillary, ira,
2479 			    &ipps, (uchar_t *)&sin[1], udi_size);
2480 		}
2481 	} else {
2482 		sin6_t *sin6;
2483 
2484 		/*
2485 		 * Handle both IPv4 and IPv6 packets for IPv6 sockets.
2486 		 *
2487 		 * Normally we only send up the address. If receiving of any
2488 		 * optional receive side information is enabled, we also send
2489 		 * that up as options.
2490 		 */
2491 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t);
2492 
2493 		if (recv_ancillary.crb_all != 0) {
2494 			udi_size += conn_recvancillary_size(connp,
2495 			    recv_ancillary, ira, mp, &ipps);
2496 		}
2497 
2498 		mp1 = allocb(udi_size, BPRI_MED);
2499 		if (mp1 == NULL) {
2500 			freemsg(mp);
2501 			UDPS_BUMP_MIB(us, udpInErrors);
2502 			return;
2503 		}
2504 		mp1->b_cont = mp;
2505 		mp1->b_datap->db_type = M_PROTO;
2506 		tudi = (struct T_unitdata_ind *)mp1->b_rptr;
2507 		mp1->b_wptr = (uchar_t *)tudi + udi_size;
2508 		tudi->PRIM_type = T_UNITDATA_IND;
2509 		tudi->SRC_length = sizeof (sin6_t);
2510 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
2511 		tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
2512 		    sizeof (sin6_t);
2513 		udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t));
2514 		tudi->OPT_length = udi_size;
2515 		sin6 = (sin6_t *)&tudi[1];
2516 		if (ira->ira_flags & IRAF_IS_IPV4) {
2517 			in6_addr_t v6dst;
2518 
2519 			IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src,
2520 			    &sin6->sin6_addr);
2521 			IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst,
2522 			    &v6dst);
2523 			sin6->sin6_flowinfo = 0;
2524 			sin6->sin6_scope_id = 0;
2525 			sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst,
2526 			    IPCL_ZONEID(connp), us->us_netstack);
2527 		} else {
2528 			ip6h = (ip6_t *)rptr;
2529 
2530 			sin6->sin6_addr = ip6h->ip6_src;
2531 			/* No sin6_flowinfo per API */
2532 			sin6->sin6_flowinfo = 0;
2533 			/* For link-scope pass up scope id */
2534 			if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src))
2535 				sin6->sin6_scope_id = ira->ira_ruifindex;
2536 			else
2537 				sin6->sin6_scope_id = 0;
2538 			sin6->__sin6_src_id = ip_srcid_find_addr(
2539 			    &ip6h->ip6_dst, IPCL_ZONEID(connp),
2540 			    us->us_netstack);
2541 		}
2542 		sin6->sin6_port = udpha->uha_src_port;
2543 		sin6->sin6_family = connp->conn_family;
2544 
2545 		if (udi_size != 0) {
2546 			conn_recvancillary_add(connp, recv_ancillary, ira,
2547 			    &ipps, (uchar_t *)&sin6[1], udi_size);
2548 		}
2549 	}
2550 
2551 	/*
2552 	 * DTrace this UDP input as udp:::receive (this is for IPv4, IPv6 and
2553 	 * loopback traffic).
2554 	 */
2555 	DTRACE_UDP5(receive, mblk_t *, NULL, ip_xmit_attr_t *, connp->conn_ixa,
2556 	    void_ip_t *, rptr, udp_t *, udp, udpha_t *, udpha);
2557 
2558 	/* Walk past the headers unless IP_RECVHDR was set. */
2559 	if (!udp->udp_rcvhdr) {
2560 		mp->b_rptr = rptr + hdr_length;
2561 		pkt_len -= hdr_length;
2562 	}
2563 
2564 	UDPS_BUMP_MIB(us, udpHCInDatagrams);
2565 	udp_ulp_recv(connp, mp1, pkt_len, ira);
2566 	return;
2567 
2568 tossit:
2569 	freemsg(mp);
2570 	UDPS_BUMP_MIB(us, udpInErrors);
2571 }
2572 
2573 /*
2574  * This routine creates a T_UDERROR_IND message and passes it upstream.
2575  * The address and options are copied from the T_UNITDATA_REQ message
2576  * passed in mp.  This message is freed.
2577  */
2578 static void
udp_ud_err(queue_t * q,mblk_t * mp,t_scalar_t err)2579 udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err)
2580 {
2581 	struct T_unitdata_req *tudr;
2582 	mblk_t	*mp1;
2583 	uchar_t *destaddr;
2584 	t_scalar_t destlen;
2585 	uchar_t	*optaddr;
2586 	t_scalar_t optlen;
2587 
2588 	if ((mp->b_wptr < mp->b_rptr) ||
2589 	    (MBLKL(mp)) < sizeof (struct T_unitdata_req)) {
2590 		goto done;
2591 	}
2592 	tudr = (struct T_unitdata_req *)mp->b_rptr;
2593 	destaddr = mp->b_rptr + tudr->DEST_offset;
2594 	if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr ||
2595 	    destaddr + tudr->DEST_length < mp->b_rptr ||
2596 	    destaddr + tudr->DEST_length > mp->b_wptr) {
2597 		goto done;
2598 	}
2599 	optaddr = mp->b_rptr + tudr->OPT_offset;
2600 	if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr ||
2601 	    optaddr + tudr->OPT_length < mp->b_rptr ||
2602 	    optaddr + tudr->OPT_length > mp->b_wptr) {
2603 		goto done;
2604 	}
2605 	destlen = tudr->DEST_length;
2606 	optlen = tudr->OPT_length;
2607 
2608 	mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen,
2609 	    (char *)optaddr, optlen, err);
2610 	if (mp1 != NULL)
2611 		qreply(q, mp1);
2612 
2613 done:
2614 	freemsg(mp);
2615 }
2616 
2617 /*
2618  * This routine removes a port number association from a stream.  It
2619  * is called by udp_wput to handle T_UNBIND_REQ messages.
2620  */
2621 static void
udp_tpi_unbind(queue_t * q,mblk_t * mp)2622 udp_tpi_unbind(queue_t *q, mblk_t *mp)
2623 {
2624 	conn_t	*connp = Q_TO_CONN(q);
2625 	int	error;
2626 
2627 	error = udp_do_unbind(connp);
2628 	if (error) {
2629 		if (error < 0)
2630 			udp_err_ack(q, mp, -error, 0);
2631 		else
2632 			udp_err_ack(q, mp, TSYSERR, error);
2633 		return;
2634 	}
2635 
2636 	mp = mi_tpi_ok_ack_alloc(mp);
2637 	ASSERT(mp != NULL);
2638 	ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK);
2639 	qreply(q, mp);
2640 }
2641 
2642 /*
2643  * Don't let port fall into the privileged range.
2644  * Since the extra privileged ports can be arbitrary we also
2645  * ensure that we exclude those from consideration.
2646  * us->us_epriv_ports is not sorted thus we loop over it until
2647  * there are no changes.
2648  */
2649 static in_port_t
udp_update_next_port(udp_t * udp,in_port_t port,boolean_t random)2650 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random)
2651 {
2652 	int i, bump;
2653 	in_port_t nextport;
2654 	boolean_t restart = B_FALSE;
2655 	udp_stack_t *us = udp->udp_us;
2656 
2657 	if (random && udp_random_anon_port != 0) {
2658 		(void) random_get_pseudo_bytes((uint8_t *)&port,
2659 		    sizeof (in_port_t));
2660 		/*
2661 		 * Unless changed by a sys admin, the smallest anon port
2662 		 * is 32768 and the largest anon port is 65535.  It is
2663 		 * very likely (50%) for the random port to be smaller
2664 		 * than the smallest anon port.  When that happens,
2665 		 * add port % (anon port range) to the smallest anon
2666 		 * port to get the random port.  It should fall into the
2667 		 * valid anon port range.
2668 		 */
2669 		if ((port < us->us_smallest_anon_port) ||
2670 		    (port > us->us_largest_anon_port)) {
2671 			if (us->us_smallest_anon_port ==
2672 			    us->us_largest_anon_port) {
2673 				bump = 0;
2674 			} else {
2675 				bump = port % (us->us_largest_anon_port -
2676 				    us->us_smallest_anon_port);
2677 			}
2678 
2679 			port = us->us_smallest_anon_port + bump;
2680 		}
2681 	}
2682 
2683 retry:
2684 	if (port < us->us_smallest_anon_port)
2685 		port = us->us_smallest_anon_port;
2686 
2687 	if (port > us->us_largest_anon_port) {
2688 		port = us->us_smallest_anon_port;
2689 		if (restart)
2690 			return (0);
2691 		restart = B_TRUE;
2692 	}
2693 
2694 	if (port < us->us_smallest_nonpriv_port)
2695 		port = us->us_smallest_nonpriv_port;
2696 
2697 	for (i = 0; i < us->us_num_epriv_ports; i++) {
2698 		if (port == us->us_epriv_ports[i]) {
2699 			port++;
2700 			/*
2701 			 * Make sure that the port is in the
2702 			 * valid range.
2703 			 */
2704 			goto retry;
2705 		}
2706 	}
2707 
2708 	if (is_system_labeled() &&
2709 	    (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
2710 	    port, IPPROTO_UDP, B_TRUE)) != 0) {
2711 		port = nextport;
2712 		goto retry;
2713 	}
2714 
2715 	return (port);
2716 }
2717 
2718 /*
2719  * Handle T_UNITDATA_REQ with options. Both IPv4 and IPv6
2720  * Either tudr_mp or msg is set. If tudr_mp we take ancillary data from
2721  * the TPI options, otherwise we take them from msg_control.
2722  * If both sin and sin6 is set it is a connected socket and we use conn_faddr.
2723  * Always consumes mp; never consumes tudr_mp.
2724  */
2725 static int
udp_output_ancillary(conn_t * connp,sin_t * sin,sin6_t * sin6,mblk_t * mp,mblk_t * tudr_mp,struct nmsghdr * msg,cred_t * cr,pid_t pid)2726 udp_output_ancillary(conn_t *connp, sin_t *sin, sin6_t *sin6, mblk_t *mp,
2727     mblk_t *tudr_mp, struct nmsghdr *msg, cred_t *cr, pid_t pid)
2728 {
2729 	udp_t		*udp = connp->conn_udp;
2730 	udp_stack_t	*us = udp->udp_us;
2731 	int		error;
2732 	ip_xmit_attr_t	*ixa;
2733 	ip_pkt_t	*ipp;
2734 	in6_addr_t	v6src;
2735 	in6_addr_t	v6dst;
2736 	in6_addr_t	v6nexthop;
2737 	in_port_t	dstport;
2738 	uint32_t	flowinfo;
2739 	uint_t		srcid;
2740 	int		is_absreq_failure = 0;
2741 	conn_opt_arg_t	coas, *coa;
2742 
2743 	ASSERT(tudr_mp != NULL || msg != NULL);
2744 
2745 	/*
2746 	 * Get ixa before checking state to handle a disconnect race.
2747 	 *
2748 	 * We need an exclusive copy of conn_ixa since the ancillary data
2749 	 * options might modify it. That copy has no pointers hence we
2750 	 * need to set them up once we've parsed the ancillary data.
2751 	 */
2752 	ixa = conn_get_ixa_exclusive(connp);
2753 	if (ixa == NULL) {
2754 		UDPS_BUMP_MIB(us, udpOutErrors);
2755 		freemsg(mp);
2756 		return (ENOMEM);
2757 	}
2758 	ASSERT(cr != NULL);
2759 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2760 	ixa->ixa_cred = cr;
2761 	ixa->ixa_cpid = pid;
2762 	if (is_system_labeled()) {
2763 		/* We need to restart with a label based on the cred */
2764 		ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
2765 	}
2766 
2767 	/* In case previous destination was multicast or multirt */
2768 	ip_attr_newdst(ixa);
2769 
2770 	/* Get a copy of conn_xmit_ipp since the options might change it */
2771 	ipp = kmem_zalloc(sizeof (*ipp), KM_NOSLEEP);
2772 	if (ipp == NULL) {
2773 		ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2774 		ixa->ixa_cred = connp->conn_cred;	/* Restore */
2775 		ixa->ixa_cpid = connp->conn_cpid;
2776 		ixa_refrele(ixa);
2777 		UDPS_BUMP_MIB(us, udpOutErrors);
2778 		freemsg(mp);
2779 		return (ENOMEM);
2780 	}
2781 	mutex_enter(&connp->conn_lock);
2782 	error = ip_pkt_copy(&connp->conn_xmit_ipp, ipp, KM_NOSLEEP);
2783 	mutex_exit(&connp->conn_lock);
2784 	if (error != 0) {
2785 		UDPS_BUMP_MIB(us, udpOutErrors);
2786 		freemsg(mp);
2787 		goto done;
2788 	}
2789 
2790 	/*
2791 	 * Parse the options and update ixa and ipp as a result.
2792 	 * Note that ixa_tsl can be updated if SCM_UCRED.
2793 	 * ixa_refrele/ixa_inactivate will release any reference on ixa_tsl.
2794 	 */
2795 
2796 	coa = &coas;
2797 	coa->coa_connp = connp;
2798 	coa->coa_ixa = ixa;
2799 	coa->coa_ipp = ipp;
2800 	coa->coa_ancillary = B_TRUE;
2801 	coa->coa_changed = 0;
2802 
2803 	if (msg != NULL) {
2804 		error = process_auxiliary_options(connp, msg->msg_control,
2805 		    msg->msg_controllen, coa, &udp_opt_obj, udp_opt_set, cr);
2806 	} else {
2807 		struct T_unitdata_req *tudr;
2808 
2809 		tudr = (struct T_unitdata_req *)tudr_mp->b_rptr;
2810 		ASSERT(tudr->PRIM_type == T_UNITDATA_REQ);
2811 		error = tpi_optcom_buf(connp->conn_wq, tudr_mp,
2812 		    &tudr->OPT_length, tudr->OPT_offset, cr, &udp_opt_obj,
2813 		    coa, &is_absreq_failure);
2814 	}
2815 	if (error != 0) {
2816 		/*
2817 		 * Note: No special action needed in this
2818 		 * module for "is_absreq_failure"
2819 		 */
2820 		freemsg(mp);
2821 		UDPS_BUMP_MIB(us, udpOutErrors);
2822 		goto done;
2823 	}
2824 	ASSERT(is_absreq_failure == 0);
2825 
2826 	mutex_enter(&connp->conn_lock);
2827 	/*
2828 	 * If laddr is unspecified then we look at sin6_src_id.
2829 	 * We will give precedence to a source address set with IPV6_PKTINFO
2830 	 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
2831 	 * want ip_attr_connect to select a source (since it can fail) when
2832 	 * IPV6_PKTINFO is specified.
2833 	 * If this doesn't result in a source address then we get a source
2834 	 * from ip_attr_connect() below.
2835 	 */
2836 	v6src = connp->conn_saddr_v6;
2837 	if (sin != NULL) {
2838 		IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst);
2839 		dstport = sin->sin_port;
2840 		flowinfo = 0;
2841 		ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
2842 		ixa->ixa_flags |= IXAF_IS_IPV4;
2843 	} else if (sin6 != NULL) {
2844 		boolean_t v4mapped;
2845 
2846 		v6dst = sin6->sin6_addr;
2847 		dstport = sin6->sin6_port;
2848 		flowinfo = sin6->sin6_flowinfo;
2849 		srcid = sin6->__sin6_src_id;
2850 		if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) {
2851 			ixa->ixa_scopeid = sin6->sin6_scope_id;
2852 			ixa->ixa_flags |= IXAF_SCOPEID_SET;
2853 		} else {
2854 			ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
2855 		}
2856 		v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst);
2857 		if (v4mapped)
2858 			ixa->ixa_flags |= IXAF_IS_IPV4;
2859 		else
2860 			ixa->ixa_flags &= ~IXAF_IS_IPV4;
2861 		if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
2862 			if (!ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
2863 			    v4mapped, connp->conn_netstack)) {
2864 				/* Mismatch - v4mapped/v6 specified by srcid. */
2865 				mutex_exit(&connp->conn_lock);
2866 				error = EADDRNOTAVAIL;
2867 				goto failed;	/* Does freemsg() and mib. */
2868 			}
2869 		}
2870 	} else {
2871 		/* Connected case */
2872 		v6dst = connp->conn_faddr_v6;
2873 		dstport = connp->conn_fport;
2874 		flowinfo = connp->conn_flowinfo;
2875 	}
2876 	mutex_exit(&connp->conn_lock);
2877 
2878 	/* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */
2879 	if (ipp->ipp_fields & IPPF_ADDR) {
2880 		if (ixa->ixa_flags & IXAF_IS_IPV4) {
2881 			if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
2882 				v6src = ipp->ipp_addr;
2883 		} else {
2884 			if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
2885 				v6src = ipp->ipp_addr;
2886 		}
2887 	}
2888 
2889 	ip_attr_nexthop(ipp, ixa, &v6dst, &v6nexthop);
2890 	error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport,
2891 	    &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC);
2892 
2893 	switch (error) {
2894 	case 0:
2895 		break;
2896 	case EADDRNOTAVAIL:
2897 		/*
2898 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
2899 		 * Don't have the application see that errno
2900 		 */
2901 		error = ENETUNREACH;
2902 		goto failed;
2903 	case ENETDOWN:
2904 		/*
2905 		 * Have !ipif_addr_ready address; drop packet silently
2906 		 * until we can get applications to not send until we
2907 		 * are ready.
2908 		 */
2909 		error = 0;
2910 		goto failed;
2911 	case EHOSTUNREACH:
2912 	case ENETUNREACH:
2913 		if (ixa->ixa_ire != NULL) {
2914 			/*
2915 			 * Let conn_ip_output/ire_send_noroute return
2916 			 * the error and send any local ICMP error.
2917 			 */
2918 			error = 0;
2919 			break;
2920 		}
2921 		/* FALLTHRU */
2922 	default:
2923 	failed:
2924 		freemsg(mp);
2925 		UDPS_BUMP_MIB(us, udpOutErrors);
2926 		goto done;
2927 	}
2928 
2929 	/*
2930 	 * We might be going to a different destination than last time,
2931 	 * thus check that TX allows the communication and compute any
2932 	 * needed label.
2933 	 *
2934 	 * TSOL Note: We have an exclusive ipp and ixa for this thread so we
2935 	 * don't have to worry about concurrent threads.
2936 	 */
2937 	if (is_system_labeled()) {
2938 		/* Using UDP MLP requires SCM_UCRED from user */
2939 		if (connp->conn_mlp_type != mlptSingle &&
2940 		    !((ixa->ixa_flags & IXAF_UCRED_TSL))) {
2941 			UDPS_BUMP_MIB(us, udpOutErrors);
2942 			error = ECONNREFUSED;
2943 			freemsg(mp);
2944 			goto done;
2945 		}
2946 		/*
2947 		 * Check whether Trusted Solaris policy allows communication
2948 		 * with this host, and pretend that the destination is
2949 		 * unreachable if not.
2950 		 * Compute any needed label and place it in ipp_label_v4/v6.
2951 		 *
2952 		 * Later conn_build_hdr_template/conn_prepend_hdr takes
2953 		 * ipp_label_v4/v6 to form the packet.
2954 		 *
2955 		 * Tsol note: We have ipp structure local to this thread so
2956 		 * no locking is needed.
2957 		 */
2958 		error = conn_update_label(connp, ixa, &v6dst, ipp);
2959 		if (error != 0) {
2960 			freemsg(mp);
2961 			UDPS_BUMP_MIB(us, udpOutErrors);
2962 			goto done;
2963 		}
2964 	}
2965 	mp = udp_prepend_hdr(connp, ixa, ipp, &v6src, &v6dst, dstport,
2966 	    flowinfo, mp, &error);
2967 	if (mp == NULL) {
2968 		ASSERT(error != 0);
2969 		UDPS_BUMP_MIB(us, udpOutErrors);
2970 		goto done;
2971 	}
2972 	if (ixa->ixa_pktlen > IP_MAXPACKET) {
2973 		error = EMSGSIZE;
2974 		UDPS_BUMP_MIB(us, udpOutErrors);
2975 		freemsg(mp);
2976 		goto done;
2977 	}
2978 	/* We're done.  Pass the packet to ip. */
2979 	UDPS_BUMP_MIB(us, udpHCOutDatagrams);
2980 
2981 	DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
2982 	    void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *,
2983 	    &mp->b_rptr[ixa->ixa_ip_hdr_length]);
2984 
2985 	error = conn_ip_output(mp, ixa);
2986 	/* No udpOutErrors if an error since IP increases its error counter */
2987 	switch (error) {
2988 	case 0:
2989 		break;
2990 	case EWOULDBLOCK:
2991 		(void) ixa_check_drain_insert(connp, ixa);
2992 		error = 0;
2993 		break;
2994 	case EADDRNOTAVAIL:
2995 		/*
2996 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
2997 		 * Don't have the application see that errno
2998 		 */
2999 		error = ENETUNREACH;
3000 		/* FALLTHRU */
3001 	default:
3002 		mutex_enter(&connp->conn_lock);
3003 		/*
3004 		 * Clear the source and v6lastdst so we call ip_attr_connect
3005 		 * for the next packet and try to pick a better source.
3006 		 */
3007 		if (connp->conn_mcbc_bind)
3008 			connp->conn_saddr_v6 = ipv6_all_zeros;
3009 		else
3010 			connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
3011 		connp->conn_v6lastdst = ipv6_all_zeros;
3012 		mutex_exit(&connp->conn_lock);
3013 		break;
3014 	}
3015 done:
3016 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3017 	ixa->ixa_cred = connp->conn_cred;	/* Restore */
3018 	ixa->ixa_cpid = connp->conn_cpid;
3019 	ixa_refrele(ixa);
3020 	ip_pkt_free(ipp);
3021 	kmem_free(ipp, sizeof (*ipp));
3022 	return (error);
3023 }
3024 
3025 /*
3026  * Handle sending an M_DATA for a connected socket.
3027  * Handles both IPv4 and IPv6.
3028  */
3029 static int
udp_output_connected(conn_t * connp,mblk_t * mp,cred_t * cr,pid_t pid)3030 udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid)
3031 {
3032 	udp_t		*udp = connp->conn_udp;
3033 	udp_stack_t	*us = udp->udp_us;
3034 	int		error;
3035 	ip_xmit_attr_t	*ixa;
3036 
3037 	/*
3038 	 * If no other thread is using conn_ixa this just gets a reference to
3039 	 * conn_ixa. Otherwise we get a safe copy of conn_ixa.
3040 	 */
3041 	ixa = conn_get_ixa(connp, B_FALSE);
3042 	if (ixa == NULL) {
3043 		UDPS_BUMP_MIB(us, udpOutErrors);
3044 		freemsg(mp);
3045 		return (ENOMEM);
3046 	}
3047 
3048 	ASSERT(cr != NULL);
3049 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3050 	ixa->ixa_cred = cr;
3051 	ixa->ixa_cpid = pid;
3052 
3053 	mutex_enter(&connp->conn_lock);
3054 	mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_saddr_v6,
3055 	    connp->conn_fport, connp->conn_flowinfo, &error);
3056 
3057 	if (mp == NULL) {
3058 		ASSERT(error != 0);
3059 		mutex_exit(&connp->conn_lock);
3060 		ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3061 		ixa->ixa_cred = connp->conn_cred;	/* Restore */
3062 		ixa->ixa_cpid = connp->conn_cpid;
3063 		ixa_refrele(ixa);
3064 		UDPS_BUMP_MIB(us, udpOutErrors);
3065 		freemsg(mp);
3066 		return (error);
3067 	}
3068 
3069 	/*
3070 	 * In case we got a safe copy of conn_ixa, or if opt_set made us a new
3071 	 * safe copy, then we need to fill in any pointers in it.
3072 	 */
3073 	if (ixa->ixa_ire == NULL) {
3074 		in6_addr_t	faddr, saddr;
3075 		in6_addr_t	nexthop;
3076 		in_port_t	fport;
3077 
3078 		saddr = connp->conn_saddr_v6;
3079 		faddr = connp->conn_faddr_v6;
3080 		fport = connp->conn_fport;
3081 		ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &faddr, &nexthop);
3082 		mutex_exit(&connp->conn_lock);
3083 
3084 		error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop,
3085 		    fport, NULL, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST |
3086 		    IPDF_IPSEC);
3087 		switch (error) {
3088 		case 0:
3089 			break;
3090 		case EADDRNOTAVAIL:
3091 			/*
3092 			 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3093 			 * Don't have the application see that errno
3094 			 */
3095 			error = ENETUNREACH;
3096 			goto failed;
3097 		case ENETDOWN:
3098 			/*
3099 			 * Have !ipif_addr_ready address; drop packet silently
3100 			 * until we can get applications to not send until we
3101 			 * are ready.
3102 			 */
3103 			error = 0;
3104 			goto failed;
3105 		case EHOSTUNREACH:
3106 		case ENETUNREACH:
3107 			if (ixa->ixa_ire != NULL) {
3108 				/*
3109 				 * Let conn_ip_output/ire_send_noroute return
3110 				 * the error and send any local ICMP error.
3111 				 */
3112 				error = 0;
3113 				break;
3114 			}
3115 			/* FALLTHRU */
3116 		default:
3117 		failed:
3118 			ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3119 			ixa->ixa_cred = connp->conn_cred;	/* Restore */
3120 			ixa->ixa_cpid = connp->conn_cpid;
3121 			ixa_refrele(ixa);
3122 			freemsg(mp);
3123 			UDPS_BUMP_MIB(us, udpOutErrors);
3124 			return (error);
3125 		}
3126 	} else {
3127 		/* Done with conn_t */
3128 		mutex_exit(&connp->conn_lock);
3129 	}
3130 	ASSERT(ixa->ixa_ire != NULL);
3131 
3132 	/* We're done.  Pass the packet to ip. */
3133 	UDPS_BUMP_MIB(us, udpHCOutDatagrams);
3134 
3135 	DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
3136 	    void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *,
3137 	    &mp->b_rptr[ixa->ixa_ip_hdr_length]);
3138 
3139 	error = conn_ip_output(mp, ixa);
3140 	/* No udpOutErrors if an error since IP increases its error counter */
3141 	switch (error) {
3142 	case 0:
3143 		break;
3144 	case EWOULDBLOCK:
3145 		(void) ixa_check_drain_insert(connp, ixa);
3146 		error = 0;
3147 		break;
3148 	case EADDRNOTAVAIL:
3149 		/*
3150 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3151 		 * Don't have the application see that errno
3152 		 */
3153 		error = ENETUNREACH;
3154 		break;
3155 	}
3156 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3157 	ixa->ixa_cred = connp->conn_cred;	/* Restore */
3158 	ixa->ixa_cpid = connp->conn_cpid;
3159 	ixa_refrele(ixa);
3160 	return (error);
3161 }
3162 
3163 /*
3164  * Handle sending an M_DATA to the last destination.
3165  * Handles both IPv4 and IPv6.
3166  *
3167  * NOTE: The caller must hold conn_lock and we drop it here.
3168  */
3169 static int
udp_output_lastdst(conn_t * connp,mblk_t * mp,cred_t * cr,pid_t pid,ip_xmit_attr_t * ixa)3170 udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid,
3171     ip_xmit_attr_t *ixa)
3172 {
3173 	udp_t		*udp = connp->conn_udp;
3174 	udp_stack_t	*us = udp->udp_us;
3175 	int		error;
3176 
3177 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3178 	ASSERT(ixa != NULL);
3179 
3180 	ASSERT(cr != NULL);
3181 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3182 	ixa->ixa_cred = cr;
3183 	ixa->ixa_cpid = pid;
3184 
3185 	mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_v6lastsrc,
3186 	    connp->conn_lastdstport, connp->conn_lastflowinfo, &error);
3187 
3188 	if (mp == NULL) {
3189 		ASSERT(error != 0);
3190 		mutex_exit(&connp->conn_lock);
3191 		ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3192 		ixa->ixa_cred = connp->conn_cred;	/* Restore */
3193 		ixa->ixa_cpid = connp->conn_cpid;
3194 		ixa_refrele(ixa);
3195 		UDPS_BUMP_MIB(us, udpOutErrors);
3196 		freemsg(mp);
3197 		return (error);
3198 	}
3199 
3200 	/*
3201 	 * In case we got a safe copy of conn_ixa, or if opt_set made us a new
3202 	 * safe copy, then we need to fill in any pointers in it.
3203 	 */
3204 	if (ixa->ixa_ire == NULL) {
3205 		in6_addr_t	lastdst, lastsrc;
3206 		in6_addr_t	nexthop;
3207 		in_port_t	lastport;
3208 
3209 		lastsrc = connp->conn_v6lastsrc;
3210 		lastdst = connp->conn_v6lastdst;
3211 		lastport = connp->conn_lastdstport;
3212 		ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &lastdst, &nexthop);
3213 		mutex_exit(&connp->conn_lock);
3214 
3215 		error = ip_attr_connect(connp, ixa, &lastsrc, &lastdst,
3216 		    &nexthop, lastport, NULL, NULL, IPDF_ALLOW_MCBC |
3217 		    IPDF_VERIFY_DST | IPDF_IPSEC);
3218 		switch (error) {
3219 		case 0:
3220 			break;
3221 		case EADDRNOTAVAIL:
3222 			/*
3223 			 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3224 			 * Don't have the application see that errno
3225 			 */
3226 			error = ENETUNREACH;
3227 			goto failed;
3228 		case ENETDOWN:
3229 			/*
3230 			 * Have !ipif_addr_ready address; drop packet silently
3231 			 * until we can get applications to not send until we
3232 			 * are ready.
3233 			 */
3234 			error = 0;
3235 			goto failed;
3236 		case EHOSTUNREACH:
3237 		case ENETUNREACH:
3238 			if (ixa->ixa_ire != NULL) {
3239 				/*
3240 				 * Let conn_ip_output/ire_send_noroute return
3241 				 * the error and send any local ICMP error.
3242 				 */
3243 				error = 0;
3244 				break;
3245 			}
3246 			/* FALLTHRU */
3247 		default:
3248 		failed:
3249 			ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3250 			ixa->ixa_cred = connp->conn_cred;	/* Restore */
3251 			ixa->ixa_cpid = connp->conn_cpid;
3252 			ixa_refrele(ixa);
3253 			freemsg(mp);
3254 			UDPS_BUMP_MIB(us, udpOutErrors);
3255 			return (error);
3256 		}
3257 	} else {
3258 		/* Done with conn_t */
3259 		mutex_exit(&connp->conn_lock);
3260 	}
3261 
3262 	/* We're done.  Pass the packet to ip. */
3263 	UDPS_BUMP_MIB(us, udpHCOutDatagrams);
3264 
3265 	DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
3266 	    void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *,
3267 	    &mp->b_rptr[ixa->ixa_ip_hdr_length]);
3268 
3269 	error = conn_ip_output(mp, ixa);
3270 	/* No udpOutErrors if an error since IP increases its error counter */
3271 	switch (error) {
3272 	case 0:
3273 		break;
3274 	case EWOULDBLOCK:
3275 		(void) ixa_check_drain_insert(connp, ixa);
3276 		error = 0;
3277 		break;
3278 	case EADDRNOTAVAIL:
3279 		/*
3280 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3281 		 * Don't have the application see that errno
3282 		 */
3283 		error = ENETUNREACH;
3284 		/* FALLTHRU */
3285 	default:
3286 		mutex_enter(&connp->conn_lock);
3287 		/*
3288 		 * Clear the source and v6lastdst so we call ip_attr_connect
3289 		 * for the next packet and try to pick a better source.
3290 		 */
3291 		if (connp->conn_mcbc_bind)
3292 			connp->conn_saddr_v6 = ipv6_all_zeros;
3293 		else
3294 			connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
3295 		connp->conn_v6lastdst = ipv6_all_zeros;
3296 		mutex_exit(&connp->conn_lock);
3297 		break;
3298 	}
3299 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3300 	ixa->ixa_cred = connp->conn_cred;	/* Restore */
3301 	ixa->ixa_cpid = connp->conn_cpid;
3302 	ixa_refrele(ixa);
3303 	return (error);
3304 }
3305 
3306 
3307 /*
3308  * Prepend the header template and then fill in the source and
3309  * flowinfo. The caller needs to handle the destination address since
3310  * it's setting is different if rthdr or source route.
3311  *
3312  * Returns NULL is allocation failed or if the packet would exceed IP_MAXPACKET.
3313  * When it returns NULL it sets errorp.
3314  */
3315 static mblk_t *
udp_prepend_header_template(conn_t * connp,ip_xmit_attr_t * ixa,mblk_t * mp,const in6_addr_t * v6src,in_port_t dstport,uint32_t flowinfo,int * errorp)3316 udp_prepend_header_template(conn_t *connp, ip_xmit_attr_t *ixa, mblk_t *mp,
3317     const in6_addr_t *v6src, in_port_t dstport, uint32_t flowinfo, int *errorp)
3318 {
3319 	udp_t		*udp = connp->conn_udp;
3320 	udp_stack_t	*us = udp->udp_us;
3321 	boolean_t	insert_spi = udp->udp_nat_t_endpoint;
3322 	boolean_t	hash_srcport = udp->udp_vxlanhash;
3323 	uint_t		pktlen;
3324 	uint_t		alloclen;
3325 	uint_t		copylen;
3326 	uint8_t		*iph;
3327 	uint_t		ip_hdr_length;
3328 	udpha_t		*udpha;
3329 	uint32_t	cksum;
3330 	ip_pkt_t	*ipp;
3331 	uint16_t	srcport;
3332 
3333 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3334 
3335 	/*
3336 	 * If we have source port hashing going on, determine the hash before
3337 	 * we modify the mblk_t.
3338 	 */
3339 	if (hash_srcport == B_TRUE) {
3340 		srcport = udp_srcport_hash(mp, UDP_HASH_VXLAN,
3341 		    IPPORT_DYNAMIC_MIN, IPPORT_DYNAMIC_MAX,
3342 		    ntohs(connp->conn_lport));
3343 	}
3344 
3345 	/*
3346 	 * Copy the header template and leave space for an SPI
3347 	 */
3348 	copylen = connp->conn_ht_iphc_len;
3349 	alloclen = copylen + (insert_spi ? sizeof (uint32_t) : 0);
3350 	pktlen = alloclen + msgdsize(mp);
3351 	if (pktlen > IP_MAXPACKET) {
3352 		freemsg(mp);
3353 		*errorp = EMSGSIZE;
3354 		return (NULL);
3355 	}
3356 	ixa->ixa_pktlen = pktlen;
3357 
3358 	/* check/fix buffer config, setup pointers into it */
3359 	iph = mp->b_rptr - alloclen;
3360 	if (DB_REF(mp) != 1 || iph < DB_BASE(mp) || !OK_32PTR(iph)) {
3361 		mblk_t *mp1;
3362 
3363 		mp1 = allocb(alloclen + us->us_wroff_extra, BPRI_MED);
3364 		if (mp1 == NULL) {
3365 			freemsg(mp);
3366 			*errorp = ENOMEM;
3367 			return (NULL);
3368 		}
3369 		mp1->b_wptr = DB_LIM(mp1);
3370 		mp1->b_cont = mp;
3371 		mp = mp1;
3372 		iph = (mp->b_wptr - alloclen);
3373 	}
3374 	mp->b_rptr = iph;
3375 	bcopy(connp->conn_ht_iphc, iph, copylen);
3376 	ip_hdr_length = (uint_t)(connp->conn_ht_ulp - connp->conn_ht_iphc);
3377 
3378 	ixa->ixa_ip_hdr_length = ip_hdr_length;
3379 	udpha = (udpha_t *)(iph + ip_hdr_length);
3380 
3381 	/*
3382 	 * Setup header length and prepare for ULP checksum done in IP.
3383 	 * udp_build_hdr_template has already massaged any routing header
3384 	 * and placed the result in conn_sum.
3385 	 *
3386 	 * We make it easy for IP to include our pseudo header
3387 	 * by putting our length in uha_checksum.
3388 	 */
3389 	cksum = pktlen - ip_hdr_length;
3390 	udpha->uha_length = htons(cksum);
3391 
3392 	cksum += connp->conn_sum;
3393 	cksum = (cksum >> 16) + (cksum & 0xFFFF);
3394 	ASSERT(cksum < 0x10000);
3395 
3396 	ipp = &connp->conn_xmit_ipp;
3397 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
3398 		ipha_t	*ipha = (ipha_t *)iph;
3399 
3400 		ipha->ipha_length = htons((uint16_t)pktlen);
3401 
3402 		/* IP does the checksum if uha_checksum is non-zero */
3403 		if (us->us_do_checksum)
3404 			udpha->uha_checksum = htons(cksum);
3405 
3406 		/* if IP_PKTINFO specified an addres it wins over bind() */
3407 		if ((ipp->ipp_fields & IPPF_ADDR) &&
3408 		    IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) {
3409 			ASSERT(ipp->ipp_addr_v4 != INADDR_ANY);
3410 			ipha->ipha_src = ipp->ipp_addr_v4;
3411 		} else {
3412 			IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
3413 		}
3414 	} else {
3415 		ip6_t *ip6h = (ip6_t *)iph;
3416 
3417 		ip6h->ip6_plen =  htons((uint16_t)(pktlen - IPV6_HDR_LEN));
3418 		udpha->uha_checksum = htons(cksum);
3419 
3420 		/* if IP_PKTINFO specified an addres it wins over bind() */
3421 		if ((ipp->ipp_fields & IPPF_ADDR) &&
3422 		    !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) {
3423 			ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr));
3424 			ip6h->ip6_src = ipp->ipp_addr;
3425 		} else {
3426 			ip6h->ip6_src = *v6src;
3427 		}
3428 		ip6h->ip6_vcf =
3429 		    (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
3430 		    (flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
3431 		if (ipp->ipp_fields & IPPF_TCLASS) {
3432 			/* Overrides the class part of flowinfo */
3433 			ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
3434 			    ipp->ipp_tclass);
3435 		}
3436 	}
3437 
3438 	/* Insert all-0s SPI now. */
3439 	if (insert_spi)
3440 		*((uint32_t *)(udpha + 1)) = 0;
3441 
3442 	udpha->uha_dst_port = dstport;
3443 	if (hash_srcport == B_TRUE)
3444 		udpha->uha_src_port = htons(srcport);
3445 
3446 	return (mp);
3447 }
3448 
3449 /*
3450  * Send a T_UDERR_IND in response to an M_DATA
3451  */
3452 static void
udp_ud_err_connected(conn_t * connp,t_scalar_t error)3453 udp_ud_err_connected(conn_t *connp, t_scalar_t error)
3454 {
3455 	struct sockaddr_storage ss;
3456 	sin_t		*sin;
3457 	sin6_t		*sin6;
3458 	struct sockaddr	*addr;
3459 	socklen_t	addrlen;
3460 	mblk_t		*mp1;
3461 
3462 	mutex_enter(&connp->conn_lock);
3463 	/* Initialize addr and addrlen as if they're passed in */
3464 	if (connp->conn_family == AF_INET) {
3465 		sin = (sin_t *)&ss;
3466 		*sin = sin_null;
3467 		sin->sin_family = AF_INET;
3468 		sin->sin_port = connp->conn_fport;
3469 		sin->sin_addr.s_addr = connp->conn_faddr_v4;
3470 		addr = (struct sockaddr *)sin;
3471 		addrlen = sizeof (*sin);
3472 	} else {
3473 		sin6 = (sin6_t *)&ss;
3474 		*sin6 = sin6_null;
3475 		sin6->sin6_family = AF_INET6;
3476 		sin6->sin6_port = connp->conn_fport;
3477 		sin6->sin6_flowinfo = connp->conn_flowinfo;
3478 		sin6->sin6_addr = connp->conn_faddr_v6;
3479 		if (IN6_IS_ADDR_LINKSCOPE(&connp->conn_faddr_v6) &&
3480 		    (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) {
3481 			sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
3482 		} else {
3483 			sin6->sin6_scope_id = 0;
3484 		}
3485 		sin6->__sin6_src_id = 0;
3486 		addr = (struct sockaddr *)sin6;
3487 		addrlen = sizeof (*sin6);
3488 	}
3489 	mutex_exit(&connp->conn_lock);
3490 
3491 	mp1 = mi_tpi_uderror_ind((char *)addr, addrlen, NULL, 0, error);
3492 	if (mp1 != NULL)
3493 		putnext(connp->conn_rq, mp1);
3494 }
3495 
3496 /*
3497  * This routine handles all messages passed downstream.  It either
3498  * consumes the message or passes it downstream; it never queues a
3499  * a message.
3500  *
3501  * Also entry point for sockfs when udp is in "direct sockfs" mode.  This mode
3502  * is valid when we are directly beneath the stream head, and thus sockfs
3503  * is able to bypass STREAMS and directly call us, passing along the sockaddr
3504  * structure without the cumbersome T_UNITDATA_REQ interface for the case of
3505  * connected endpoints.
3506  */
3507 int
udp_wput(queue_t * q,mblk_t * mp)3508 udp_wput(queue_t *q, mblk_t *mp)
3509 {
3510 	sin6_t		*sin6;
3511 	sin_t		*sin = NULL;
3512 	uint_t		srcid;
3513 	conn_t		*connp = Q_TO_CONN(q);
3514 	udp_t		*udp = connp->conn_udp;
3515 	int		error = 0;
3516 	struct sockaddr	*addr = NULL;
3517 	socklen_t	addrlen;
3518 	udp_stack_t	*us = udp->udp_us;
3519 	struct T_unitdata_req *tudr;
3520 	mblk_t		*data_mp;
3521 	ushort_t	ipversion;
3522 	cred_t		*cr;
3523 	pid_t		pid;
3524 
3525 	/*
3526 	 * We directly handle several cases here: T_UNITDATA_REQ message
3527 	 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected
3528 	 * socket.
3529 	 */
3530 	switch (DB_TYPE(mp)) {
3531 	case M_DATA:
3532 		if (!udp->udp_issocket || udp->udp_state != TS_DATA_XFER) {
3533 			/* Not connected; address is required */
3534 			UDPS_BUMP_MIB(us, udpOutErrors);
3535 			UDP_DBGSTAT(us, udp_data_notconn);
3536 			UDP_STAT(us, udp_out_err_notconn);
3537 			freemsg(mp);
3538 			return (0);
3539 		}
3540 		/*
3541 		 * All Solaris components should pass a db_credp
3542 		 * for this message, hence we ASSERT.
3543 		 * On production kernels we return an error to be robust against
3544 		 * random streams modules sitting on top of us.
3545 		 */
3546 		cr = msg_getcred(mp, &pid);
3547 		ASSERT(cr != NULL);
3548 		if (cr == NULL) {
3549 			UDPS_BUMP_MIB(us, udpOutErrors);
3550 			freemsg(mp);
3551 			return (0);
3552 		}
3553 		ASSERT(udp->udp_issocket);
3554 		UDP_DBGSTAT(us, udp_data_conn);
3555 		error = udp_output_connected(connp, mp, cr, pid);
3556 		if (error != 0) {
3557 			UDP_STAT(us, udp_out_err_output);
3558 			if (connp->conn_rq != NULL)
3559 				udp_ud_err_connected(connp, (t_scalar_t)error);
3560 #ifdef DEBUG
3561 			printf("udp_output_connected returned %d\n", error);
3562 #endif
3563 		}
3564 		return (0);
3565 
3566 	case M_PROTO:
3567 	case M_PCPROTO:
3568 		tudr = (struct T_unitdata_req *)mp->b_rptr;
3569 		if (MBLKL(mp) < sizeof (*tudr) ||
3570 		    ((t_primp_t)mp->b_rptr)->type != T_UNITDATA_REQ) {
3571 			udp_wput_other(q, mp);
3572 			return (0);
3573 		}
3574 		break;
3575 
3576 	default:
3577 		udp_wput_other(q, mp);
3578 		return (0);
3579 	}
3580 
3581 	/* Handle valid T_UNITDATA_REQ here */
3582 	data_mp = mp->b_cont;
3583 	if (data_mp == NULL) {
3584 		error = EPROTO;
3585 		goto ud_error2;
3586 	}
3587 	mp->b_cont = NULL;
3588 
3589 	if (!MBLKIN(mp, 0, tudr->DEST_offset + tudr->DEST_length)) {
3590 		error = EADDRNOTAVAIL;
3591 		goto ud_error2;
3592 	}
3593 
3594 	/*
3595 	 * All Solaris components should pass a db_credp
3596 	 * for this TPI message, hence we should ASSERT.
3597 	 * However, RPC (svc_clts_ksend) does this odd thing where it
3598 	 * passes the options from a T_UNITDATA_IND unchanged in a
3599 	 * T_UNITDATA_REQ. While that is the right thing to do for
3600 	 * some options, SCM_UCRED being the key one, this also makes it
3601 	 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here.
3602 	 */
3603 	cr = msg_getcred(mp, &pid);
3604 	if (cr == NULL) {
3605 		cr = connp->conn_cred;
3606 		pid = connp->conn_cpid;
3607 	}
3608 
3609 	/*
3610 	 * If a port has not been bound to the stream, fail.
3611 	 * This is not a problem when sockfs is directly
3612 	 * above us, because it will ensure that the socket
3613 	 * is first bound before allowing data to be sent.
3614 	 */
3615 	if (udp->udp_state == TS_UNBND) {
3616 		error = EPROTO;
3617 		goto ud_error2;
3618 	}
3619 	addr = (struct sockaddr *)&mp->b_rptr[tudr->DEST_offset];
3620 	addrlen = tudr->DEST_length;
3621 
3622 	switch (connp->conn_family) {
3623 	case AF_INET6:
3624 		sin6 = (sin6_t *)addr;
3625 		if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) ||
3626 		    (sin6->sin6_family != AF_INET6)) {
3627 			error = EADDRNOTAVAIL;
3628 			goto ud_error2;
3629 		}
3630 
3631 		srcid = sin6->__sin6_src_id;
3632 		if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
3633 			/*
3634 			 * Destination is a non-IPv4-compatible IPv6 address.
3635 			 * Send out an IPv6 format packet.
3636 			 */
3637 
3638 			/*
3639 			 * If the local address is a mapped address return
3640 			 * an error.
3641 			 * It would be possible to send an IPv6 packet but the
3642 			 * response would never make it back to the application
3643 			 * since it is bound to a mapped address.
3644 			 */
3645 			if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) {
3646 				error = EADDRNOTAVAIL;
3647 				goto ud_error2;
3648 			}
3649 
3650 			UDP_DBGSTAT(us, udp_out_ipv6);
3651 
3652 			if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
3653 				sin6->sin6_addr = ipv6_loopback;
3654 			ipversion = IPV6_VERSION;
3655 		} else {
3656 			if (connp->conn_ipv6_v6only) {
3657 				error = EADDRNOTAVAIL;
3658 				goto ud_error2;
3659 			}
3660 
3661 			/*
3662 			 * If the local address is not zero or a mapped address
3663 			 * return an error.  It would be possible to send an
3664 			 * IPv4 packet but the response would never make it
3665 			 * back to the application since it is bound to a
3666 			 * non-mapped address.
3667 			 */
3668 			if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) &&
3669 			    !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
3670 				error = EADDRNOTAVAIL;
3671 				goto ud_error2;
3672 			}
3673 			UDP_DBGSTAT(us, udp_out_mapped);
3674 
3675 			if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) {
3676 				V4_PART_OF_V6(sin6->sin6_addr) =
3677 				    htonl(INADDR_LOOPBACK);
3678 			}
3679 			ipversion = IPV4_VERSION;
3680 		}
3681 
3682 		if (tudr->OPT_length != 0) {
3683 			/*
3684 			 * If we are connected then the destination needs to be
3685 			 * the same as the connected one.
3686 			 */
3687 			if (udp->udp_state == TS_DATA_XFER &&
3688 			    !conn_same_as_last_v6(connp, sin6)) {
3689 				error = EISCONN;
3690 				goto ud_error2;
3691 			}
3692 			UDP_STAT(us, udp_out_opt);
3693 			error = udp_output_ancillary(connp, NULL, sin6,
3694 			    data_mp, mp, NULL, cr, pid);
3695 		} else {
3696 			ip_xmit_attr_t *ixa;
3697 
3698 			/*
3699 			 * We have to allocate an ip_xmit_attr_t before we grab
3700 			 * conn_lock and we need to hold conn_lock once we've
3701 			 * checked conn_same_as_last_v6 to handle concurrent
3702 			 * send* calls on a socket.
3703 			 */
3704 			ixa = conn_get_ixa(connp, B_FALSE);
3705 			if (ixa == NULL) {
3706 				error = ENOMEM;
3707 				goto ud_error2;
3708 			}
3709 			mutex_enter(&connp->conn_lock);
3710 
3711 			if (conn_same_as_last_v6(connp, sin6) &&
3712 			    connp->conn_lastsrcid == srcid &&
3713 			    ipsec_outbound_policy_current(ixa)) {
3714 				UDP_DBGSTAT(us, udp_out_lastdst);
3715 				/* udp_output_lastdst drops conn_lock */
3716 				error = udp_output_lastdst(connp, data_mp, cr,
3717 				    pid, ixa);
3718 			} else {
3719 				UDP_DBGSTAT(us, udp_out_diffdst);
3720 				/* udp_output_newdst drops conn_lock */
3721 				error = udp_output_newdst(connp, data_mp, NULL,
3722 				    sin6, ipversion, cr, pid, ixa);
3723 			}
3724 			ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
3725 		}
3726 		if (error == 0) {
3727 			freeb(mp);
3728 			return (0);
3729 		}
3730 		break;
3731 
3732 	case AF_INET:
3733 		sin = (sin_t *)addr;
3734 		if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) ||
3735 		    (sin->sin_family != AF_INET)) {
3736 			error = EADDRNOTAVAIL;
3737 			goto ud_error2;
3738 		}
3739 		UDP_DBGSTAT(us, udp_out_ipv4);
3740 		if (sin->sin_addr.s_addr == INADDR_ANY)
3741 			sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
3742 		ipversion = IPV4_VERSION;
3743 
3744 		srcid = 0;
3745 		if (tudr->OPT_length != 0) {
3746 			/*
3747 			 * If we are connected then the destination needs to be
3748 			 * the same as the connected one.
3749 			 */
3750 			if (udp->udp_state == TS_DATA_XFER &&
3751 			    !conn_same_as_last_v4(connp, sin)) {
3752 				error = EISCONN;
3753 				goto ud_error2;
3754 			}
3755 			UDP_STAT(us, udp_out_opt);
3756 			error = udp_output_ancillary(connp, sin, NULL,
3757 			    data_mp, mp, NULL, cr, pid);
3758 		} else {
3759 			ip_xmit_attr_t *ixa;
3760 
3761 			/*
3762 			 * We have to allocate an ip_xmit_attr_t before we grab
3763 			 * conn_lock and we need to hold conn_lock once we've
3764 			 * checked conn_same_as_last_v4 to handle concurrent
3765 			 * send* calls on a socket.
3766 			 */
3767 			ixa = conn_get_ixa(connp, B_FALSE);
3768 			if (ixa == NULL) {
3769 				error = ENOMEM;
3770 				goto ud_error2;
3771 			}
3772 			mutex_enter(&connp->conn_lock);
3773 
3774 			if (conn_same_as_last_v4(connp, sin) &&
3775 			    ipsec_outbound_policy_current(ixa)) {
3776 				UDP_DBGSTAT(us, udp_out_lastdst);
3777 				/* udp_output_lastdst drops conn_lock */
3778 				error = udp_output_lastdst(connp, data_mp, cr,
3779 				    pid, ixa);
3780 			} else {
3781 				UDP_DBGSTAT(us, udp_out_diffdst);
3782 				/* udp_output_newdst drops conn_lock */
3783 				error = udp_output_newdst(connp, data_mp, sin,
3784 				    NULL, ipversion, cr, pid, ixa);
3785 			}
3786 			ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
3787 		}
3788 		if (error == 0) {
3789 			freeb(mp);
3790 			return (0);
3791 		}
3792 		break;
3793 	}
3794 	UDP_STAT(us, udp_out_err_output);
3795 	ASSERT(mp != NULL);
3796 	/* mp is freed by the following routine */
3797 	udp_ud_err(q, mp, (t_scalar_t)error);
3798 	return (0);
3799 
3800 ud_error2:
3801 	UDPS_BUMP_MIB(us, udpOutErrors);
3802 	freemsg(data_mp);
3803 	UDP_STAT(us, udp_out_err_output);
3804 	ASSERT(mp != NULL);
3805 	/* mp is freed by the following routine */
3806 	udp_ud_err(q, mp, (t_scalar_t)error);
3807 	return (0);
3808 }
3809 
3810 /*
3811  * Handle the case of the IP address, port, flow label being different
3812  * for both IPv4 and IPv6.
3813  *
3814  * NOTE: The caller must hold conn_lock and we drop it here.
3815  */
3816 static int
udp_output_newdst(conn_t * connp,mblk_t * data_mp,sin_t * sin,sin6_t * sin6,ushort_t ipversion,cred_t * cr,pid_t pid,ip_xmit_attr_t * ixa)3817 udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, sin6_t *sin6,
3818     ushort_t ipversion, cred_t *cr, pid_t pid, ip_xmit_attr_t *ixa)
3819 {
3820 	uint_t		srcid;
3821 	uint32_t	flowinfo;
3822 	udp_t		*udp = connp->conn_udp;
3823 	int		error = 0;
3824 	ip_xmit_attr_t	*oldixa;
3825 	udp_stack_t	*us = udp->udp_us;
3826 	in6_addr_t	v6src;
3827 	in6_addr_t	v6dst;
3828 	in6_addr_t	v6nexthop;
3829 	in_port_t	dstport;
3830 
3831 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3832 	ASSERT(ixa != NULL);
3833 	/*
3834 	 * We hold conn_lock across all the use and modifications of
3835 	 * the conn_lastdst, conn_ixa, and conn_xmit_ipp to ensure that they
3836 	 * stay consistent.
3837 	 */
3838 
3839 	ASSERT(cr != NULL);
3840 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3841 	ixa->ixa_cred = cr;
3842 	ixa->ixa_cpid = pid;
3843 	if (is_system_labeled()) {
3844 		/* We need to restart with a label based on the cred */
3845 		ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
3846 	}
3847 
3848 	/*
3849 	 * If we are connected then the destination needs to be the
3850 	 * same as the connected one, which is not the case here since we
3851 	 * checked for that above.
3852 	 */
3853 	if (udp->udp_state == TS_DATA_XFER) {
3854 		mutex_exit(&connp->conn_lock);
3855 		error = EISCONN;
3856 		goto ud_error;
3857 	}
3858 
3859 	/*
3860 	 * Before we modify the ixa at all, invalidate our most recent address
3861 	 * to assure that any subsequent call to conn_same_as_last_v6() will
3862 	 * not indicate a match: any thread that picks up conn_lock after we
3863 	 * drop it (but before we pick it up again and properly set the most
3864 	 * recent address) must not associate the ixa with the (now old) last
3865 	 * address.
3866 	 */
3867 	connp->conn_v6lastdst = ipv6_all_zeros;
3868 
3869 	/* In case previous destination was multicast or multirt */
3870 	ip_attr_newdst(ixa);
3871 
3872 	/*
3873 	 * If laddr is unspecified then we look at sin6_src_id.
3874 	 * We will give precedence to a source address set with IPV6_PKTINFO
3875 	 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
3876 	 * want ip_attr_connect to select a source (since it can fail) when
3877 	 * IPV6_PKTINFO is specified.
3878 	 * If this doesn't result in a source address then we get a source
3879 	 * from ip_attr_connect() below.
3880 	 */
3881 	v6src = connp->conn_saddr_v6;
3882 	if (sin != NULL) {
3883 		IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst);
3884 		dstport = sin->sin_port;
3885 		flowinfo = 0;
3886 		/* Don't bother with ip_srcid_find_id(), but indicate anyway. */
3887 		srcid = 0;
3888 		ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
3889 		ixa->ixa_flags |= IXAF_IS_IPV4;
3890 	} else {
3891 		boolean_t v4mapped;
3892 
3893 		v6dst = sin6->sin6_addr;
3894 		dstport = sin6->sin6_port;
3895 		flowinfo = sin6->sin6_flowinfo;
3896 		srcid = sin6->__sin6_src_id;
3897 		if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) {
3898 			ixa->ixa_scopeid = sin6->sin6_scope_id;
3899 			ixa->ixa_flags |= IXAF_SCOPEID_SET;
3900 		} else {
3901 			ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
3902 		}
3903 		v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst);
3904 		if (v4mapped)
3905 			ixa->ixa_flags |= IXAF_IS_IPV4;
3906 		else
3907 			ixa->ixa_flags &= ~IXAF_IS_IPV4;
3908 		if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
3909 			if (!ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
3910 			    v4mapped, connp->conn_netstack)) {
3911 				/* Mismatched v4mapped/v6 specified by srcid. */
3912 				mutex_exit(&connp->conn_lock);
3913 				error = EADDRNOTAVAIL;
3914 				goto ud_error;
3915 			}
3916 		}
3917 	}
3918 	/* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */
3919 	if (connp->conn_xmit_ipp.ipp_fields & IPPF_ADDR) {
3920 		ip_pkt_t *ipp = &connp->conn_xmit_ipp;
3921 
3922 		if (ixa->ixa_flags & IXAF_IS_IPV4) {
3923 			if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
3924 				v6src = ipp->ipp_addr;
3925 		} else {
3926 			if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
3927 				v6src = ipp->ipp_addr;
3928 		}
3929 	}
3930 
3931 	ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &v6dst, &v6nexthop);
3932 	mutex_exit(&connp->conn_lock);
3933 
3934 	error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport,
3935 	    &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC);
3936 	switch (error) {
3937 	case 0:
3938 		break;
3939 	case EADDRNOTAVAIL:
3940 		/*
3941 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3942 		 * Don't have the application see that errno
3943 		 */
3944 		error = ENETUNREACH;
3945 		goto failed;
3946 	case ENETDOWN:
3947 		/*
3948 		 * Have !ipif_addr_ready address; drop packet silently
3949 		 * until we can get applications to not send until we
3950 		 * are ready.
3951 		 */
3952 		error = 0;
3953 		goto failed;
3954 	case EHOSTUNREACH:
3955 	case ENETUNREACH:
3956 		if (ixa->ixa_ire != NULL) {
3957 			/*
3958 			 * Let conn_ip_output/ire_send_noroute return
3959 			 * the error and send any local ICMP error.
3960 			 */
3961 			error = 0;
3962 			break;
3963 		}
3964 		/* FALLTHRU */
3965 	failed:
3966 	default:
3967 		goto ud_error;
3968 	}
3969 
3970 
3971 	/*
3972 	 * Cluster note: we let the cluster hook know that we are sending to a
3973 	 * new address and/or port.
3974 	 */
3975 	if (cl_inet_connect2 != NULL) {
3976 		CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error);
3977 		if (error != 0) {
3978 			error = EHOSTUNREACH;
3979 			goto ud_error;
3980 		}
3981 	}
3982 
3983 	mutex_enter(&connp->conn_lock);
3984 	/*
3985 	 * While we dropped the lock some other thread might have connected
3986 	 * this socket. If so we bail out with EISCONN to ensure that the
3987 	 * connecting thread is the one that updates conn_ixa, conn_ht_*
3988 	 * and conn_*last*.
3989 	 */
3990 	if (udp->udp_state == TS_DATA_XFER) {
3991 		mutex_exit(&connp->conn_lock);
3992 		error = EISCONN;
3993 		goto ud_error;
3994 	}
3995 
3996 	/*
3997 	 * We need to rebuild the headers if
3998 	 *  - we are labeling packets (could be different for different
3999 	 *    destinations)
4000 	 *  - we have a source route (or routing header) since we need to
4001 	 *    massage that to get the pseudo-header checksum
4002 	 *  - the IP version is different than the last time
4003 	 *  - a socket option with COA_HEADER_CHANGED has been set which
4004 	 *    set conn_v6lastdst to zero.
4005 	 *
4006 	 * Otherwise the prepend function will just update the src, dst,
4007 	 * dstport, and flow label.
4008 	 */
4009 	if (is_system_labeled()) {
4010 		/* TX MLP requires SCM_UCRED and don't have that here */
4011 		if (connp->conn_mlp_type != mlptSingle) {
4012 			mutex_exit(&connp->conn_lock);
4013 			error = ECONNREFUSED;
4014 			goto ud_error;
4015 		}
4016 		/*
4017 		 * Check whether Trusted Solaris policy allows communication
4018 		 * with this host, and pretend that the destination is
4019 		 * unreachable if not.
4020 		 * Compute any needed label and place it in ipp_label_v4/v6.
4021 		 *
4022 		 * Later conn_build_hdr_template/conn_prepend_hdr takes
4023 		 * ipp_label_v4/v6 to form the packet.
4024 		 *
4025 		 * Tsol note: Since we hold conn_lock we know no other
4026 		 * thread manipulates conn_xmit_ipp.
4027 		 */
4028 		error = conn_update_label(connp, ixa, &v6dst,
4029 		    &connp->conn_xmit_ipp);
4030 		if (error != 0) {
4031 			mutex_exit(&connp->conn_lock);
4032 			goto ud_error;
4033 		}
4034 		/* Rebuild the header template */
4035 		error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport,
4036 		    flowinfo);
4037 		if (error != 0) {
4038 			mutex_exit(&connp->conn_lock);
4039 			goto ud_error;
4040 		}
4041 	} else if ((connp->conn_xmit_ipp.ipp_fields &
4042 	    (IPPF_IPV4_OPTIONS|IPPF_RTHDR)) ||
4043 	    ipversion != connp->conn_lastipversion ||
4044 	    IN6_IS_ADDR_UNSPECIFIED(&connp->conn_v6lastdst)) {
4045 		/* Rebuild the header template */
4046 		error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport,
4047 		    flowinfo);
4048 		if (error != 0) {
4049 			mutex_exit(&connp->conn_lock);
4050 			goto ud_error;
4051 		}
4052 	} else {
4053 		/* Simply update the destination address if no source route */
4054 		if (ixa->ixa_flags & IXAF_IS_IPV4) {
4055 			ipha_t	*ipha = (ipha_t *)connp->conn_ht_iphc;
4056 
4057 			IN6_V4MAPPED_TO_IPADDR(&v6dst, ipha->ipha_dst);
4058 			if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) {
4059 				ipha->ipha_fragment_offset_and_flags |=
4060 				    IPH_DF_HTONS;
4061 			} else {
4062 				ipha->ipha_fragment_offset_and_flags &=
4063 				    ~IPH_DF_HTONS;
4064 			}
4065 		} else {
4066 			ip6_t *ip6h = (ip6_t *)connp->conn_ht_iphc;
4067 			ip6h->ip6_dst = v6dst;
4068 		}
4069 	}
4070 
4071 	/*
4072 	 * Remember the dst/dstport etc which corresponds to the built header
4073 	 * template and conn_ixa.
4074 	 */
4075 	oldixa = conn_replace_ixa(connp, ixa);
4076 	connp->conn_v6lastdst = v6dst;
4077 	connp->conn_lastipversion = ipversion;
4078 	connp->conn_lastdstport = dstport;
4079 	connp->conn_lastflowinfo = flowinfo;
4080 	connp->conn_lastscopeid = ixa->ixa_scopeid;
4081 	connp->conn_lastsrcid = srcid;
4082 	/* Also remember a source to use together with lastdst */
4083 	connp->conn_v6lastsrc = v6src;
4084 
4085 	data_mp = udp_prepend_header_template(connp, ixa, data_mp, &v6src,
4086 	    dstport, flowinfo, &error);
4087 
4088 	/* Done with conn_t */
4089 	mutex_exit(&connp->conn_lock);
4090 	ixa_refrele(oldixa);
4091 
4092 	if (data_mp == NULL) {
4093 		ASSERT(error != 0);
4094 		goto ud_error;
4095 	}
4096 
4097 	/* We're done.  Pass the packet to ip. */
4098 	UDPS_BUMP_MIB(us, udpHCOutDatagrams);
4099 
4100 	DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
4101 	    void_ip_t *, data_mp->b_rptr, udp_t *, udp, udpha_t *,
4102 	    &data_mp->b_rptr[ixa->ixa_ip_hdr_length]);
4103 
4104 	error = conn_ip_output(data_mp, ixa);
4105 	/* No udpOutErrors if an error since IP increases its error counter */
4106 	switch (error) {
4107 	case 0:
4108 		break;
4109 	case EWOULDBLOCK:
4110 		(void) ixa_check_drain_insert(connp, ixa);
4111 		error = 0;
4112 		break;
4113 	case EADDRNOTAVAIL:
4114 		/*
4115 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
4116 		 * Don't have the application see that errno
4117 		 */
4118 		error = ENETUNREACH;
4119 		/* FALLTHRU */
4120 	default:
4121 		mutex_enter(&connp->conn_lock);
4122 		/*
4123 		 * Clear the source and v6lastdst so we call ip_attr_connect
4124 		 * for the next packet and try to pick a better source.
4125 		 */
4126 		if (connp->conn_mcbc_bind)
4127 			connp->conn_saddr_v6 = ipv6_all_zeros;
4128 		else
4129 			connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
4130 		connp->conn_v6lastdst = ipv6_all_zeros;
4131 		mutex_exit(&connp->conn_lock);
4132 		break;
4133 	}
4134 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
4135 	ixa->ixa_cred = connp->conn_cred;	/* Restore */
4136 	ixa->ixa_cpid = connp->conn_cpid;
4137 	ixa_refrele(ixa);
4138 	return (error);
4139 
4140 ud_error:
4141 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
4142 	ixa->ixa_cred = connp->conn_cred;	/* Restore */
4143 	ixa->ixa_cpid = connp->conn_cpid;
4144 	ixa_refrele(ixa);
4145 
4146 	freemsg(data_mp);
4147 	UDPS_BUMP_MIB(us, udpOutErrors);
4148 	UDP_STAT(us, udp_out_err_output);
4149 	return (error);
4150 }
4151 
4152 /* ARGSUSED */
4153 static int
udp_wput_fallback(queue_t * wq,mblk_t * mp)4154 udp_wput_fallback(queue_t *wq, mblk_t *mp)
4155 {
4156 #ifdef DEBUG
4157 	cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n");
4158 #endif
4159 	freemsg(mp);
4160 	return (0);
4161 }
4162 
4163 
4164 /*
4165  * Handle special out-of-band ioctl requests (see PSARC/2008/265).
4166  */
4167 static void
udp_wput_cmdblk(queue_t * q,mblk_t * mp)4168 udp_wput_cmdblk(queue_t *q, mblk_t *mp)
4169 {
4170 	void	*data;
4171 	mblk_t	*datamp = mp->b_cont;
4172 	conn_t	*connp = Q_TO_CONN(q);
4173 	udp_t	*udp = connp->conn_udp;
4174 	cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr;
4175 
4176 	if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) {
4177 		cmdp->cb_error = EPROTO;
4178 		qreply(q, mp);
4179 		return;
4180 	}
4181 	data = datamp->b_rptr;
4182 
4183 	mutex_enter(&connp->conn_lock);
4184 	switch (cmdp->cb_cmd) {
4185 	case TI_GETPEERNAME:
4186 		if (udp->udp_state != TS_DATA_XFER)
4187 			cmdp->cb_error = ENOTCONN;
4188 		else
4189 			cmdp->cb_error = conn_getpeername(connp, data,
4190 			    &cmdp->cb_len);
4191 		break;
4192 	case TI_GETMYNAME:
4193 		cmdp->cb_error = conn_getsockname(connp, data, &cmdp->cb_len);
4194 		break;
4195 	default:
4196 		cmdp->cb_error = EINVAL;
4197 		break;
4198 	}
4199 	mutex_exit(&connp->conn_lock);
4200 
4201 	qreply(q, mp);
4202 }
4203 
4204 static void
udp_use_pure_tpi(udp_t * udp)4205 udp_use_pure_tpi(udp_t *udp)
4206 {
4207 	conn_t	*connp = udp->udp_connp;
4208 
4209 	mutex_enter(&connp->conn_lock);
4210 	udp->udp_issocket = B_FALSE;
4211 	mutex_exit(&connp->conn_lock);
4212 	UDP_STAT(udp->udp_us, udp_sock_fallback);
4213 }
4214 
4215 static void
udp_wput_other(queue_t * q,mblk_t * mp)4216 udp_wput_other(queue_t *q, mblk_t *mp)
4217 {
4218 	uchar_t	*rptr = mp->b_rptr;
4219 	struct iocblk *iocp;
4220 	conn_t	*connp = Q_TO_CONN(q);
4221 	udp_t	*udp = connp->conn_udp;
4222 	cred_t	*cr;
4223 
4224 	switch (mp->b_datap->db_type) {
4225 	case M_CMD:
4226 		udp_wput_cmdblk(q, mp);
4227 		return;
4228 
4229 	case M_PROTO:
4230 	case M_PCPROTO:
4231 		if (mp->b_wptr - rptr < sizeof (t_scalar_t)) {
4232 			/*
4233 			 * If the message does not contain a PRIM_type,
4234 			 * throw it away.
4235 			 */
4236 			freemsg(mp);
4237 			return;
4238 		}
4239 		switch (((t_primp_t)rptr)->type) {
4240 		case T_ADDR_REQ:
4241 			udp_addr_req(q, mp);
4242 			return;
4243 		case O_T_BIND_REQ:
4244 		case T_BIND_REQ:
4245 			udp_tpi_bind(q, mp);
4246 			return;
4247 		case T_CONN_REQ:
4248 			udp_tpi_connect(q, mp);
4249 			return;
4250 		case T_CAPABILITY_REQ:
4251 			udp_capability_req(q, mp);
4252 			return;
4253 		case T_INFO_REQ:
4254 			udp_info_req(q, mp);
4255 			return;
4256 		case T_UNITDATA_REQ:
4257 			/*
4258 			 * If a T_UNITDATA_REQ gets here, the address must
4259 			 * be bad.  Valid T_UNITDATA_REQs are handled
4260 			 * in udp_wput.
4261 			 */
4262 			udp_ud_err(q, mp, EADDRNOTAVAIL);
4263 			return;
4264 		case T_UNBIND_REQ:
4265 			udp_tpi_unbind(q, mp);
4266 			return;
4267 		case T_SVR4_OPTMGMT_REQ:
4268 			/*
4269 			 * All Solaris components should pass a db_credp
4270 			 * for this TPI message, hence we ASSERT.
4271 			 * But in case there is some other M_PROTO that looks
4272 			 * like a TPI message sent by some other kernel
4273 			 * component, we check and return an error.
4274 			 */
4275 			cr = msg_getcred(mp, NULL);
4276 			ASSERT(cr != NULL);
4277 			if (cr == NULL) {
4278 				udp_err_ack(q, mp, TSYSERR, EINVAL);
4279 				return;
4280 			}
4281 			if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get,
4282 			    cr)) {
4283 				svr4_optcom_req(q, mp, cr, &udp_opt_obj);
4284 			}
4285 			return;
4286 
4287 		case T_OPTMGMT_REQ:
4288 			/*
4289 			 * All Solaris components should pass a db_credp
4290 			 * for this TPI message, hence we ASSERT.
4291 			 * But in case there is some other M_PROTO that looks
4292 			 * like a TPI message sent by some other kernel
4293 			 * component, we check and return an error.
4294 			 */
4295 			cr = msg_getcred(mp, NULL);
4296 			ASSERT(cr != NULL);
4297 			if (cr == NULL) {
4298 				udp_err_ack(q, mp, TSYSERR, EINVAL);
4299 				return;
4300 			}
4301 			tpi_optcom_req(q, mp, cr, &udp_opt_obj);
4302 			return;
4303 
4304 		case T_DISCON_REQ:
4305 			udp_tpi_disconnect(q, mp);
4306 			return;
4307 
4308 		/* The following TPI message is not supported by udp. */
4309 		case O_T_CONN_RES:
4310 		case T_CONN_RES:
4311 			udp_err_ack(q, mp, TNOTSUPPORT, 0);
4312 			return;
4313 
4314 		/* The following 3 TPI requests are illegal for udp. */
4315 		case T_DATA_REQ:
4316 		case T_EXDATA_REQ:
4317 		case T_ORDREL_REQ:
4318 			udp_err_ack(q, mp, TNOTSUPPORT, 0);
4319 			return;
4320 		default:
4321 			break;
4322 		}
4323 		break;
4324 	case M_FLUSH:
4325 		if (*rptr & FLUSHW)
4326 			flushq(q, FLUSHDATA);
4327 		break;
4328 	case M_IOCTL:
4329 		iocp = (struct iocblk *)mp->b_rptr;
4330 		switch (iocp->ioc_cmd) {
4331 		case TI_GETPEERNAME:
4332 			if (udp->udp_state != TS_DATA_XFER) {
4333 				/*
4334 				 * If a default destination address has not
4335 				 * been associated with the stream, then we
4336 				 * don't know the peer's name.
4337 				 */
4338 				iocp->ioc_error = ENOTCONN;
4339 				iocp->ioc_count = 0;
4340 				mp->b_datap->db_type = M_IOCACK;
4341 				qreply(q, mp);
4342 				return;
4343 			}
4344 			/* FALLTHRU */
4345 		case TI_GETMYNAME:
4346 			/*
4347 			 * For TI_GETPEERNAME and TI_GETMYNAME, we first
4348 			 * need to copyin the user's strbuf structure.
4349 			 * Processing will continue in the M_IOCDATA case
4350 			 * below.
4351 			 */
4352 			mi_copyin(q, mp, NULL,
4353 			    SIZEOF_STRUCT(strbuf, iocp->ioc_flag));
4354 			return;
4355 		case _SIOCSOCKFALLBACK:
4356 			/*
4357 			 * Either sockmod is about to be popped and the
4358 			 * socket would now be treated as a plain stream,
4359 			 * or a module is about to be pushed so we have
4360 			 * to follow pure TPI semantics.
4361 			 */
4362 			if (!udp->udp_issocket) {
4363 				DB_TYPE(mp) = M_IOCNAK;
4364 				iocp->ioc_error = EINVAL;
4365 			} else {
4366 				udp_use_pure_tpi(udp);
4367 
4368 				DB_TYPE(mp) = M_IOCACK;
4369 				iocp->ioc_error = 0;
4370 			}
4371 			iocp->ioc_count = 0;
4372 			iocp->ioc_rval = 0;
4373 			qreply(q, mp);
4374 			return;
4375 		default:
4376 			break;
4377 		}
4378 		break;
4379 	case M_IOCDATA:
4380 		udp_wput_iocdata(q, mp);
4381 		return;
4382 	default:
4383 		/* Unrecognized messages are passed through without change. */
4384 		break;
4385 	}
4386 	ip_wput_nondata(q, mp);
4387 }
4388 
4389 /*
4390  * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA
4391  * messages.
4392  */
4393 static void
udp_wput_iocdata(queue_t * q,mblk_t * mp)4394 udp_wput_iocdata(queue_t *q, mblk_t *mp)
4395 {
4396 	mblk_t		*mp1;
4397 	struct	iocblk *iocp = (struct iocblk *)mp->b_rptr;
4398 	STRUCT_HANDLE(strbuf, sb);
4399 	uint_t		addrlen;
4400 	conn_t		*connp = Q_TO_CONN(q);
4401 	udp_t		*udp = connp->conn_udp;
4402 
4403 	/* Make sure it is one of ours. */
4404 	switch (iocp->ioc_cmd) {
4405 	case TI_GETMYNAME:
4406 	case TI_GETPEERNAME:
4407 		break;
4408 	default:
4409 		ip_wput_nondata(q, mp);
4410 		return;
4411 	}
4412 
4413 	switch (mi_copy_state(q, mp, &mp1)) {
4414 	case -1:
4415 		return;
4416 	case MI_COPY_CASE(MI_COPY_IN, 1):
4417 		break;
4418 	case MI_COPY_CASE(MI_COPY_OUT, 1):
4419 		/*
4420 		 * The address has been copied out, so now
4421 		 * copyout the strbuf.
4422 		 */
4423 		mi_copyout(q, mp);
4424 		return;
4425 	case MI_COPY_CASE(MI_COPY_OUT, 2):
4426 		/*
4427 		 * The address and strbuf have been copied out.
4428 		 * We're done, so just acknowledge the original
4429 		 * M_IOCTL.
4430 		 */
4431 		mi_copy_done(q, mp, 0);
4432 		return;
4433 	default:
4434 		/*
4435 		 * Something strange has happened, so acknowledge
4436 		 * the original M_IOCTL with an EPROTO error.
4437 		 */
4438 		mi_copy_done(q, mp, EPROTO);
4439 		return;
4440 	}
4441 
4442 	/*
4443 	 * Now we have the strbuf structure for TI_GETMYNAME
4444 	 * and TI_GETPEERNAME.  Next we copyout the requested
4445 	 * address and then we'll copyout the strbuf.
4446 	 */
4447 	STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr);
4448 
4449 	if (connp->conn_family == AF_INET)
4450 		addrlen = sizeof (sin_t);
4451 	else
4452 		addrlen = sizeof (sin6_t);
4453 
4454 	if (STRUCT_FGET(sb, maxlen) < addrlen) {
4455 		mi_copy_done(q, mp, EINVAL);
4456 		return;
4457 	}
4458 
4459 	switch (iocp->ioc_cmd) {
4460 	case TI_GETMYNAME:
4461 		break;
4462 	case TI_GETPEERNAME:
4463 		if (udp->udp_state != TS_DATA_XFER) {
4464 			mi_copy_done(q, mp, ENOTCONN);
4465 			return;
4466 		}
4467 		break;
4468 	}
4469 	mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE);
4470 	if (!mp1)
4471 		return;
4472 
4473 	STRUCT_FSET(sb, len, addrlen);
4474 	switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
4475 	case TI_GETMYNAME:
4476 		(void) conn_getsockname(connp, (struct sockaddr *)mp1->b_wptr,
4477 		    &addrlen);
4478 		break;
4479 	case TI_GETPEERNAME:
4480 		(void) conn_getpeername(connp, (struct sockaddr *)mp1->b_wptr,
4481 		    &addrlen);
4482 		break;
4483 	}
4484 	mp1->b_wptr += addrlen;
4485 	/* Copy out the address */
4486 	mi_copyout(q, mp);
4487 }
4488 
4489 void
udp_ddi_g_init(void)4490 udp_ddi_g_init(void)
4491 {
4492 	udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr,
4493 	    udp_opt_obj.odb_opt_arr_cnt);
4494 
4495 	/*
4496 	 * We want to be informed each time a stack is created or
4497 	 * destroyed in the kernel, so we can maintain the
4498 	 * set of udp_stack_t's.
4499 	 */
4500 	netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini);
4501 }
4502 
4503 void
udp_ddi_g_destroy(void)4504 udp_ddi_g_destroy(void)
4505 {
4506 	netstack_unregister(NS_UDP);
4507 }
4508 
4509 #define	INET_NAME	"ip"
4510 
4511 /*
4512  * Initialize the UDP stack instance.
4513  */
4514 static void *
udp_stack_init(netstackid_t stackid,netstack_t * ns)4515 udp_stack_init(netstackid_t stackid, netstack_t *ns)
4516 {
4517 	udp_stack_t	*us;
4518 	int		i;
4519 	int		error = 0;
4520 	major_t		major;
4521 	size_t		arrsz;
4522 
4523 	us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP);
4524 	us->us_netstack = ns;
4525 
4526 	mutex_init(&us->us_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL);
4527 	us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS;
4528 	us->us_epriv_ports[0] = ULP_DEF_EPRIV_PORT1;
4529 	us->us_epriv_ports[1] = ULP_DEF_EPRIV_PORT2;
4530 
4531 	/*
4532 	 * The smallest anonymous port in the priviledged port range which UDP
4533 	 * looks for free port.  Use in the option UDP_ANONPRIVBIND.
4534 	 */
4535 	us->us_min_anonpriv_port = 512;
4536 
4537 	us->us_bind_fanout_size = udp_bind_fanout_size;
4538 
4539 	/* Roundup variable that might have been modified in /etc/system */
4540 	if (!ISP2(us->us_bind_fanout_size)) {
4541 		/* Not a power of two. Round up to nearest power of two */
4542 		for (i = 0; i < 31; i++) {
4543 			if (us->us_bind_fanout_size < (1 << i))
4544 				break;
4545 		}
4546 		us->us_bind_fanout_size = 1 << i;
4547 	}
4548 	us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size *
4549 	    sizeof (udp_fanout_t), KM_SLEEP);
4550 	for (i = 0; i < us->us_bind_fanout_size; i++) {
4551 		mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT,
4552 		    NULL);
4553 	}
4554 
4555 	arrsz = udp_propinfo_count * sizeof (mod_prop_info_t);
4556 	us->us_propinfo_tbl = (mod_prop_info_t *)kmem_alloc(arrsz,
4557 	    KM_SLEEP);
4558 	bcopy(udp_propinfo_tbl, us->us_propinfo_tbl, arrsz);
4559 
4560 	/* Allocate the per netstack stats */
4561 	mutex_enter(&cpu_lock);
4562 	us->us_sc_cnt = MAX(ncpus, boot_ncpus);
4563 	mutex_exit(&cpu_lock);
4564 	us->us_sc = kmem_zalloc(max_ncpus  * sizeof (udp_stats_cpu_t *),
4565 	    KM_SLEEP);
4566 	for (i = 0; i < us->us_sc_cnt; i++) {
4567 		us->us_sc[i] = kmem_zalloc(sizeof (udp_stats_cpu_t),
4568 		    KM_SLEEP);
4569 	}
4570 
4571 	us->us_kstat = udp_kstat2_init(stackid);
4572 	us->us_mibkp = udp_kstat_init(stackid);
4573 
4574 	major = mod_name_to_major(INET_NAME);
4575 	error = ldi_ident_from_major(major, &us->us_ldi_ident);
4576 	ASSERT(error == 0);
4577 	return (us);
4578 }
4579 
4580 /*
4581  * Free the UDP stack instance.
4582  */
4583 static void
udp_stack_fini(netstackid_t stackid,void * arg)4584 udp_stack_fini(netstackid_t stackid, void *arg)
4585 {
4586 	udp_stack_t *us = (udp_stack_t *)arg;
4587 	int i;
4588 
4589 	for (i = 0; i < us->us_bind_fanout_size; i++) {
4590 		mutex_destroy(&us->us_bind_fanout[i].uf_lock);
4591 	}
4592 
4593 	kmem_free(us->us_bind_fanout, us->us_bind_fanout_size *
4594 	    sizeof (udp_fanout_t));
4595 
4596 	us->us_bind_fanout = NULL;
4597 
4598 	for (i = 0; i < us->us_sc_cnt; i++)
4599 		kmem_free(us->us_sc[i], sizeof (udp_stats_cpu_t));
4600 	kmem_free(us->us_sc, max_ncpus * sizeof (udp_stats_cpu_t *));
4601 
4602 	kmem_free(us->us_propinfo_tbl,
4603 	    udp_propinfo_count * sizeof (mod_prop_info_t));
4604 	us->us_propinfo_tbl = NULL;
4605 
4606 	udp_kstat_fini(stackid, us->us_mibkp);
4607 	us->us_mibkp = NULL;
4608 
4609 	udp_kstat2_fini(stackid, us->us_kstat);
4610 	us->us_kstat = NULL;
4611 
4612 	mutex_destroy(&us->us_epriv_port_lock);
4613 	ldi_ident_release(us->us_ldi_ident);
4614 	kmem_free(us, sizeof (*us));
4615 }
4616 
4617 static size_t
udp_set_rcv_hiwat(udp_t * udp,size_t size)4618 udp_set_rcv_hiwat(udp_t *udp, size_t size)
4619 {
4620 	udp_stack_t *us = udp->udp_us;
4621 
4622 	/* We add a bit of extra buffering */
4623 	size += size >> 1;
4624 	if (size > us->us_max_buf)
4625 		size = us->us_max_buf;
4626 
4627 	udp->udp_rcv_hiwat = size;
4628 	return (size);
4629 }
4630 
4631 /*
4632  * For the lower queue so that UDP can be a dummy mux.
4633  * Nobody should be sending
4634  * packets up this stream
4635  */
4636 static int
udp_lrput(queue_t * q,mblk_t * mp)4637 udp_lrput(queue_t *q, mblk_t *mp)
4638 {
4639 	switch (mp->b_datap->db_type) {
4640 	case M_FLUSH:
4641 		/* Turn around */
4642 		if (*mp->b_rptr & FLUSHW) {
4643 			*mp->b_rptr &= ~FLUSHR;
4644 			qreply(q, mp);
4645 			return (0);
4646 		}
4647 		break;
4648 	}
4649 	freemsg(mp);
4650 	return (0);
4651 }
4652 
4653 /*
4654  * For the lower queue so that UDP can be a dummy mux.
4655  * Nobody should be sending packets down this stream.
4656  */
4657 /* ARGSUSED */
4658 int
udp_lwput(queue_t * q,mblk_t * mp)4659 udp_lwput(queue_t *q, mblk_t *mp)
4660 {
4661 	freemsg(mp);
4662 	return (0);
4663 }
4664 
4665 /*
4666  * When a CPU is added, we need to allocate the per CPU stats struct.
4667  */
4668 void
udp_stack_cpu_add(udp_stack_t * us,processorid_t cpu_seqid)4669 udp_stack_cpu_add(udp_stack_t *us, processorid_t cpu_seqid)
4670 {
4671 	int i;
4672 
4673 	if (cpu_seqid < us->us_sc_cnt)
4674 		return;
4675 	for (i = us->us_sc_cnt; i <= cpu_seqid; i++) {
4676 		ASSERT(us->us_sc[i] == NULL);
4677 		us->us_sc[i] = kmem_zalloc(sizeof (udp_stats_cpu_t),
4678 		    KM_SLEEP);
4679 	}
4680 	membar_producer();
4681 	us->us_sc_cnt = cpu_seqid + 1;
4682 }
4683 
4684 /*
4685  * Below routines for UDP socket module.
4686  */
4687 
4688 static conn_t *
udp_do_open(cred_t * credp,boolean_t isv6,int flags,int * errorp)4689 udp_do_open(cred_t *credp, boolean_t isv6, int flags, int *errorp)
4690 {
4691 	udp_t		*udp;
4692 	conn_t		*connp;
4693 	zoneid_t	zoneid;
4694 	netstack_t	*ns;
4695 	udp_stack_t	*us;
4696 	int		len;
4697 
4698 	ASSERT(errorp != NULL);
4699 
4700 	if ((*errorp = secpolicy_basic_net_access(credp)) != 0)
4701 		return (NULL);
4702 
4703 	ns = netstack_find_by_cred(credp);
4704 	ASSERT(ns != NULL);
4705 	us = ns->netstack_udp;
4706 	ASSERT(us != NULL);
4707 
4708 	/*
4709 	 * For exclusive stacks we set the zoneid to zero
4710 	 * to make UDP operate as if in the global zone.
4711 	 */
4712 	if (ns->netstack_stackid != GLOBAL_NETSTACKID)
4713 		zoneid = GLOBAL_ZONEID;
4714 	else
4715 		zoneid = crgetzoneid(credp);
4716 
4717 	ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP);
4718 
4719 	connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns);
4720 	if (connp == NULL) {
4721 		netstack_rele(ns);
4722 		*errorp = ENOMEM;
4723 		return (NULL);
4724 	}
4725 	udp = connp->conn_udp;
4726 
4727 	/*
4728 	 * ipcl_conn_create did a netstack_hold. Undo the hold that was
4729 	 * done by netstack_find_by_cred()
4730 	 */
4731 	netstack_rele(ns);
4732 
4733 	/*
4734 	 * Since this conn_t/udp_t is not yet visible to anybody else we don't
4735 	 * need to lock anything.
4736 	 */
4737 	ASSERT(connp->conn_proto == IPPROTO_UDP);
4738 	ASSERT(connp->conn_udp == udp);
4739 	ASSERT(udp->udp_connp == connp);
4740 
4741 	/* Set the initial state of the stream and the privilege status. */
4742 	udp->udp_state = TS_UNBND;
4743 	connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
4744 	if (isv6) {
4745 		connp->conn_family = AF_INET6;
4746 		connp->conn_ipversion = IPV6_VERSION;
4747 		connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4;
4748 		connp->conn_default_ttl = us->us_ipv6_hoplimit;
4749 		len = sizeof (ip6_t) + UDPH_SIZE;
4750 	} else {
4751 		connp->conn_family = AF_INET;
4752 		connp->conn_ipversion = IPV4_VERSION;
4753 		connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4;
4754 		connp->conn_default_ttl = us->us_ipv4_ttl;
4755 		len = sizeof (ipha_t) + UDPH_SIZE;
4756 	}
4757 
4758 	ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto);
4759 	connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl;
4760 
4761 	connp->conn_ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
4762 	connp->conn_ixa->ixa_flags |= IXAF_MULTICAST_LOOP | IXAF_SET_ULP_CKSUM;
4763 	/* conn_allzones can not be set this early, hence no IPCL_ZONEID */
4764 	connp->conn_ixa->ixa_zoneid = zoneid;
4765 
4766 	connp->conn_zoneid = zoneid;
4767 
4768 	/*
4769 	 * If the caller has the process-wide flag set, then default to MAC
4770 	 * exempt mode.  This allows read-down to unlabeled hosts.
4771 	 */
4772 	if (getpflags(NET_MAC_AWARE, credp) != 0)
4773 		connp->conn_mac_mode = CONN_MAC_AWARE;
4774 
4775 	connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID);
4776 
4777 	udp->udp_us = us;
4778 
4779 	connp->conn_rcvbuf = us->us_recv_hiwat;
4780 	connp->conn_sndbuf = us->us_xmit_hiwat;
4781 	connp->conn_sndlowat = us->us_xmit_lowat;
4782 	connp->conn_rcvlowat = udp_mod_info.mi_lowat;
4783 
4784 	connp->conn_wroff = len + us->us_wroff_extra;
4785 	connp->conn_so_type = SOCK_DGRAM;
4786 
4787 	connp->conn_recv = udp_input;
4788 	connp->conn_recvicmp = udp_icmp_input;
4789 	crhold(credp);
4790 	connp->conn_cred = credp;
4791 	connp->conn_cpid = curproc->p_pid;
4792 	connp->conn_open_time = ddi_get_lbolt64();
4793 	/* Cache things in ixa without an extra refhold */
4794 	ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
4795 	connp->conn_ixa->ixa_cred = connp->conn_cred;
4796 	connp->conn_ixa->ixa_cpid = connp->conn_cpid;
4797 	if (is_system_labeled())
4798 		connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred);
4799 
4800 	*((sin6_t *)&udp->udp_delayed_addr) = sin6_null;
4801 
4802 	if (us->us_pmtu_discovery)
4803 		connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
4804 
4805 	return (connp);
4806 }
4807 
4808 sock_lower_handle_t
udp_create(int family,int type,int proto,sock_downcalls_t ** sock_downcalls,uint_t * smodep,int * errorp,int flags,cred_t * credp)4809 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
4810     uint_t *smodep, int *errorp, int flags, cred_t *credp)
4811 {
4812 	udp_t		*udp = NULL;
4813 	udp_stack_t	*us;
4814 	conn_t		*connp;
4815 	boolean_t	isv6;
4816 
4817 	if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) ||
4818 	    (proto != 0 && proto != IPPROTO_UDP)) {
4819 		*errorp = EPROTONOSUPPORT;
4820 		return (NULL);
4821 	}
4822 
4823 	if (family == AF_INET6)
4824 		isv6 = B_TRUE;
4825 	else
4826 		isv6 = B_FALSE;
4827 
4828 	connp = udp_do_open(credp, isv6, flags, errorp);
4829 	if (connp == NULL)
4830 		return (NULL);
4831 
4832 	udp = connp->conn_udp;
4833 	ASSERT(udp != NULL);
4834 	us = udp->udp_us;
4835 	ASSERT(us != NULL);
4836 
4837 	udp->udp_issocket = B_TRUE;
4838 	connp->conn_flags |= IPCL_NONSTR;
4839 
4840 	/*
4841 	 * Set flow control
4842 	 * Since this conn_t/udp_t is not yet visible to anybody else we don't
4843 	 * need to lock anything.
4844 	 */
4845 	(void) udp_set_rcv_hiwat(udp, connp->conn_rcvbuf);
4846 	udp->udp_rcv_disply_hiwat = connp->conn_rcvbuf;
4847 
4848 	connp->conn_flow_cntrld = B_FALSE;
4849 
4850 	mutex_enter(&connp->conn_lock);
4851 	connp->conn_state_flags &= ~CONN_INCIPIENT;
4852 	mutex_exit(&connp->conn_lock);
4853 
4854 	*errorp = 0;
4855 	*smodep = SM_ATOMIC;
4856 	*sock_downcalls = &sock_udp_downcalls;
4857 	return ((sock_lower_handle_t)connp);
4858 }
4859 
4860 /* ARGSUSED3 */
4861 void
udp_activate(sock_lower_handle_t proto_handle,sock_upper_handle_t sock_handle,sock_upcalls_t * sock_upcalls,int flags,cred_t * cr)4862 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle,
4863     sock_upcalls_t *sock_upcalls, int flags, cred_t *cr)
4864 {
4865 	conn_t		*connp = (conn_t *)proto_handle;
4866 	struct sock_proto_props sopp;
4867 
4868 	/* All Solaris components should pass a cred for this operation. */
4869 	ASSERT(cr != NULL);
4870 
4871 	connp->conn_upcalls = sock_upcalls;
4872 	connp->conn_upper_handle = sock_handle;
4873 
4874 	sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT |
4875 	    SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ;
4876 	sopp.sopp_wroff = connp->conn_wroff;
4877 	sopp.sopp_maxblk = INFPSZ;
4878 	sopp.sopp_rxhiwat = connp->conn_rcvbuf;
4879 	sopp.sopp_rxlowat = connp->conn_rcvlowat;
4880 	sopp.sopp_maxaddrlen = sizeof (sin6_t);
4881 	sopp.sopp_maxpsz =
4882 	    (connp->conn_family == AF_INET) ? UDP_MAXPACKET_IPV4 :
4883 	    UDP_MAXPACKET_IPV6;
4884 	sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 :
4885 	    udp_mod_info.mi_minpsz;
4886 
4887 	(*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle,
4888 	    &sopp);
4889 }
4890 
4891 static void
udp_do_close(conn_t * connp)4892 udp_do_close(conn_t *connp)
4893 {
4894 	udp_t	*udp;
4895 
4896 	ASSERT(connp != NULL && IPCL_IS_UDP(connp));
4897 	udp = connp->conn_udp;
4898 
4899 	if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) {
4900 		/*
4901 		 * Running in cluster mode - register unbind information
4902 		 */
4903 		if (connp->conn_ipversion == IPV4_VERSION) {
4904 			(*cl_inet_unbind)(
4905 			    connp->conn_netstack->netstack_stackid,
4906 			    IPPROTO_UDP, AF_INET,
4907 			    (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)),
4908 			    (in_port_t)connp->conn_lport, NULL);
4909 		} else {
4910 			(*cl_inet_unbind)(
4911 			    connp->conn_netstack->netstack_stackid,
4912 			    IPPROTO_UDP, AF_INET6,
4913 			    (uint8_t *)&(connp->conn_laddr_v6),
4914 			    (in_port_t)connp->conn_lport, NULL);
4915 		}
4916 	}
4917 
4918 	udp_bind_hash_remove(udp, B_FALSE);
4919 
4920 	ip_quiesce_conn(connp);
4921 
4922 	if (!IPCL_IS_NONSTR(connp)) {
4923 		ASSERT(connp->conn_wq != NULL);
4924 		ASSERT(connp->conn_rq != NULL);
4925 		qprocsoff(connp->conn_rq);
4926 	}
4927 
4928 	udp_close_free(connp);
4929 
4930 	/*
4931 	 * Now we are truly single threaded on this stream, and can
4932 	 * delete the things hanging off the connp, and finally the connp.
4933 	 * We removed this connp from the fanout list, it cannot be
4934 	 * accessed thru the fanouts, and we already waited for the
4935 	 * conn_ref to drop to 0. We are already in close, so
4936 	 * there cannot be any other thread from the top. qprocsoff
4937 	 * has completed, and service has completed or won't run in
4938 	 * future.
4939 	 */
4940 	ASSERT(connp->conn_ref == 1);
4941 
4942 	if (!IPCL_IS_NONSTR(connp)) {
4943 		inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
4944 	} else {
4945 		ip_free_helper_stream(connp);
4946 	}
4947 
4948 	connp->conn_ref--;
4949 	ipcl_conn_destroy(connp);
4950 }
4951 
4952 /* ARGSUSED1 */
4953 int
udp_close(sock_lower_handle_t proto_handle,int flags,cred_t * cr)4954 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
4955 {
4956 	conn_t	*connp = (conn_t *)proto_handle;
4957 
4958 	/* All Solaris components should pass a cred for this operation. */
4959 	ASSERT(cr != NULL);
4960 
4961 	udp_do_close(connp);
4962 	return (0);
4963 }
4964 
4965 static int
udp_do_bind(conn_t * connp,struct sockaddr * sa,socklen_t len,cred_t * cr,boolean_t bind_to_req_port_only)4966 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
4967     boolean_t bind_to_req_port_only)
4968 {
4969 	sin_t		*sin;
4970 	sin6_t		*sin6;
4971 	udp_t		*udp = connp->conn_udp;
4972 	int		error = 0;
4973 	ip_laddr_t	laddr_type = IPVL_UNICAST_UP;	/* INADDR_ANY */
4974 	in_port_t	port;		/* Host byte order */
4975 	in_port_t	requested_port;	/* Host byte order */
4976 	int		count;
4977 	ipaddr_t	v4src;		/* Set if AF_INET */
4978 	in6_addr_t	v6src;
4979 	int		loopmax;
4980 	udp_fanout_t	*udpf;
4981 	in_port_t	lport;		/* Network byte order */
4982 	uint_t		scopeid = 0;
4983 	zoneid_t	zoneid = IPCL_ZONEID(connp);
4984 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
4985 	boolean_t	is_inaddr_any;
4986 	mlp_type_t	addrtype, mlptype;
4987 	udp_stack_t	*us = udp->udp_us;
4988 
4989 	sin = NULL;
4990 	sin6 = NULL;
4991 	switch (len) {
4992 	case sizeof (sin_t):	/* Complete IPv4 address */
4993 		sin = (sin_t *)sa;
4994 
4995 		if (sin == NULL || !OK_32PTR((char *)sin))
4996 			return (EINVAL);
4997 
4998 		if (connp->conn_family != AF_INET ||
4999 		    sin->sin_family != AF_INET) {
5000 			return (EAFNOSUPPORT);
5001 		}
5002 		v4src = sin->sin_addr.s_addr;
5003 		IN6_IPADDR_TO_V4MAPPED(v4src, &v6src);
5004 		if (v4src != INADDR_ANY) {
5005 			laddr_type = ip_laddr_verify_v4(v4src, zoneid, ipst,
5006 			    B_TRUE);
5007 		}
5008 		port = ntohs(sin->sin_port);
5009 		break;
5010 
5011 	case sizeof (sin6_t):	/* complete IPv6 address */
5012 		sin6 = (sin6_t *)sa;
5013 
5014 		if (sin6 == NULL || !OK_32PTR((char *)sin6))
5015 			return (EINVAL);
5016 
5017 		if (connp->conn_family != AF_INET6 ||
5018 		    sin6->sin6_family != AF_INET6) {
5019 			return (EAFNOSUPPORT);
5020 		}
5021 		v6src = sin6->sin6_addr;
5022 		if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
5023 			if (connp->conn_ipv6_v6only)
5024 				return (EADDRNOTAVAIL);
5025 
5026 			IN6_V4MAPPED_TO_IPADDR(&v6src, v4src);
5027 			if (v4src != INADDR_ANY) {
5028 				laddr_type = ip_laddr_verify_v4(v4src,
5029 				    zoneid, ipst, B_FALSE);
5030 			}
5031 		} else {
5032 			if (!IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
5033 				if (IN6_IS_ADDR_LINKSCOPE(&v6src))
5034 					scopeid = sin6->sin6_scope_id;
5035 				laddr_type = ip_laddr_verify_v6(&v6src,
5036 				    zoneid, ipst, B_TRUE, scopeid);
5037 			}
5038 		}
5039 		port = ntohs(sin6->sin6_port);
5040 		break;
5041 
5042 	default:		/* Invalid request */
5043 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
5044 		    "udp_bind: bad ADDR_length length %u", len);
5045 		return (-TBADADDR);
5046 	}
5047 
5048 	/* Is the local address a valid unicast, multicast, or broadcast? */
5049 	if (laddr_type == IPVL_BAD)
5050 		return (EADDRNOTAVAIL);
5051 
5052 	requested_port = port;
5053 
5054 	if (requested_port == 0 || !bind_to_req_port_only)
5055 		bind_to_req_port_only = B_FALSE;
5056 	else		/* T_BIND_REQ and requested_port != 0 */
5057 		bind_to_req_port_only = B_TRUE;
5058 
5059 	if (requested_port == 0) {
5060 		/*
5061 		 * If the application passed in zero for the port number, it
5062 		 * doesn't care which port number we bind to. Get one in the
5063 		 * valid range.
5064 		 */
5065 		if (connp->conn_anon_priv_bind) {
5066 			port = udp_get_next_priv_port(udp);
5067 		} else {
5068 			port = udp_update_next_port(udp,
5069 			    us->us_next_port_to_try, B_TRUE);
5070 		}
5071 	} else {
5072 		/*
5073 		 * If the port is in the well-known privileged range,
5074 		 * make sure the caller was privileged.
5075 		 */
5076 		int i;
5077 		boolean_t priv = B_FALSE;
5078 
5079 		if (port < us->us_smallest_nonpriv_port) {
5080 			priv = B_TRUE;
5081 		} else {
5082 			for (i = 0; i < us->us_num_epriv_ports; i++) {
5083 				if (port == us->us_epriv_ports[i]) {
5084 					priv = B_TRUE;
5085 					break;
5086 				}
5087 			}
5088 		}
5089 
5090 		if (priv) {
5091 			if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0)
5092 				return (-TACCES);
5093 		}
5094 	}
5095 
5096 	if (port == 0)
5097 		return (-TNOADDR);
5098 
5099 	/*
5100 	 * The state must be TS_UNBND. TPI mandates that users must send
5101 	 * TPI primitives only 1 at a time and wait for the response before
5102 	 * sending the next primitive.
5103 	 */
5104 	mutex_enter(&connp->conn_lock);
5105 	if (udp->udp_state != TS_UNBND) {
5106 		mutex_exit(&connp->conn_lock);
5107 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
5108 		    "udp_bind: bad state, %u", udp->udp_state);
5109 		return (-TOUTSTATE);
5110 	}
5111 	/*
5112 	 * Copy the source address into our udp structure. This address
5113 	 * may still be zero; if so, IP will fill in the correct address
5114 	 * each time an outbound packet is passed to it. Since the udp is
5115 	 * not yet in the bind hash list, we don't grab the uf_lock to
5116 	 * change conn_ipversion
5117 	 */
5118 	if (connp->conn_family == AF_INET) {
5119 		ASSERT(sin != NULL);
5120 		ASSERT(connp->conn_ixa->ixa_flags & IXAF_IS_IPV4);
5121 	} else {
5122 		if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
5123 			/*
5124 			 * no need to hold the uf_lock to set the conn_ipversion
5125 			 * since we are not yet in the fanout list
5126 			 */
5127 			connp->conn_ipversion = IPV4_VERSION;
5128 			connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4;
5129 		} else {
5130 			connp->conn_ipversion = IPV6_VERSION;
5131 			connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4;
5132 		}
5133 	}
5134 
5135 	/*
5136 	 * If conn_reuseaddr is not set, then we have to make sure that
5137 	 * the IP address and port number the application requested
5138 	 * (or we selected for the application) is not being used by
5139 	 * another stream.  If another stream is already using the
5140 	 * requested IP address and port, the behavior depends on
5141 	 * "bind_to_req_port_only". If set the bind fails; otherwise we
5142 	 * search for any unused port to bind to the stream.
5143 	 *
5144 	 * As per the BSD semantics, as modified by the Deering multicast
5145 	 * changes, if conn_reuseaddr is set, then we allow multiple binds
5146 	 * to the same port independent of the local IP address.
5147 	 *
5148 	 * This is slightly different than in SunOS 4.X which did not
5149 	 * support IP multicast. Note that the change implemented by the
5150 	 * Deering multicast code effects all binds - not only binding
5151 	 * to IP multicast addresses.
5152 	 *
5153 	 * Note that when binding to port zero we ignore SO_REUSEADDR in
5154 	 * order to guarantee a unique port.
5155 	 */
5156 
5157 	count = 0;
5158 	if (connp->conn_anon_priv_bind) {
5159 		/*
5160 		 * loopmax = (IPPORT_RESERVED-1) -
5161 		 *    us->us_min_anonpriv_port + 1
5162 		 */
5163 		loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port;
5164 	} else {
5165 		loopmax = us->us_largest_anon_port -
5166 		    us->us_smallest_anon_port + 1;
5167 	}
5168 
5169 	is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src);
5170 
5171 	for (;;) {
5172 		udp_t		*udp1;
5173 		boolean_t	found_exclbind = B_FALSE;
5174 		conn_t		*connp1;
5175 
5176 		/*
5177 		 * Walk through the list of udp streams bound to
5178 		 * requested port with the same IP address.
5179 		 */
5180 		lport = htons(port);
5181 		udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport,
5182 		    us->us_bind_fanout_size)];
5183 		mutex_enter(&udpf->uf_lock);
5184 		for (udp1 = udpf->uf_udp; udp1 != NULL;
5185 		    udp1 = udp1->udp_bind_hash) {
5186 			connp1 = udp1->udp_connp;
5187 
5188 			if (lport != connp1->conn_lport)
5189 				continue;
5190 
5191 			/*
5192 			 * On a labeled system, we must treat bindings to ports
5193 			 * on shared IP addresses by sockets with MAC exemption
5194 			 * privilege as being in all zones, as there's
5195 			 * otherwise no way to identify the right receiver.
5196 			 */
5197 			if (!IPCL_BIND_ZONE_MATCH(connp1, connp))
5198 				continue;
5199 
5200 			/*
5201 			 * If UDP_EXCLBIND is set for either the bound or
5202 			 * binding endpoint, the semantics of bind
5203 			 * is changed according to the following chart.
5204 			 *
5205 			 * spec = specified address (v4 or v6)
5206 			 * unspec = unspecified address (v4 or v6)
5207 			 * A = specified addresses are different for endpoints
5208 			 *
5209 			 * bound	bind to		allowed?
5210 			 * -------------------------------------
5211 			 * unspec	unspec		no
5212 			 * unspec	spec		no
5213 			 * spec		unspec		no
5214 			 * spec		spec		yes if A
5215 			 *
5216 			 * For labeled systems, SO_MAC_EXEMPT behaves the same
5217 			 * as UDP_EXCLBIND, except that zoneid is ignored.
5218 			 */
5219 			if (connp1->conn_exclbind || connp->conn_exclbind ||
5220 			    IPCL_CONNS_MAC(udp1->udp_connp, connp)) {
5221 				if (V6_OR_V4_INADDR_ANY(
5222 				    connp1->conn_bound_addr_v6) ||
5223 				    is_inaddr_any ||
5224 				    IN6_ARE_ADDR_EQUAL(
5225 				    &connp1->conn_bound_addr_v6,
5226 				    &v6src)) {
5227 					found_exclbind = B_TRUE;
5228 					break;
5229 				}
5230 				continue;
5231 			}
5232 
5233 			/*
5234 			 * Check ipversion to allow IPv4 and IPv6 sockets to
5235 			 * have disjoint port number spaces.
5236 			 */
5237 			if (connp->conn_ipversion != connp1->conn_ipversion) {
5238 
5239 				/*
5240 				 * On the first time through the loop, if the
5241 				 * the user intentionally specified a
5242 				 * particular port number, then ignore any
5243 				 * bindings of the other protocol that may
5244 				 * conflict. This allows the user to bind IPv6
5245 				 * alone and get both v4 and v6, or bind both
5246 				 * both and get each seperately. On subsequent
5247 				 * times through the loop, we're checking a
5248 				 * port that we chose (not the user) and thus
5249 				 * we do not allow casual duplicate bindings.
5250 				 */
5251 				if (count == 0 && requested_port != 0)
5252 					continue;
5253 			}
5254 
5255 			/*
5256 			 * No difference depending on SO_REUSEADDR.
5257 			 *
5258 			 * If existing port is bound to a
5259 			 * non-wildcard IP address and
5260 			 * the requesting stream is bound to
5261 			 * a distinct different IP addresses
5262 			 * (non-wildcard, also), keep going.
5263 			 */
5264 			if (!is_inaddr_any &&
5265 			    !V6_OR_V4_INADDR_ANY(connp1->conn_bound_addr_v6) &&
5266 			    !IN6_ARE_ADDR_EQUAL(&connp1->conn_laddr_v6,
5267 			    &v6src)) {
5268 				continue;
5269 			}
5270 			break;
5271 		}
5272 
5273 		if (!found_exclbind &&
5274 		    (connp->conn_reuseaddr && requested_port != 0)) {
5275 			break;
5276 		}
5277 
5278 		if (udp1 == NULL) {
5279 			/*
5280 			 * No other stream has this IP address
5281 			 * and port number. We can use it.
5282 			 */
5283 			break;
5284 		}
5285 		mutex_exit(&udpf->uf_lock);
5286 		if (bind_to_req_port_only) {
5287 			/*
5288 			 * We get here only when requested port
5289 			 * is bound (and only first  of the for()
5290 			 * loop iteration).
5291 			 *
5292 			 * The semantics of this bind request
5293 			 * require it to fail so we return from
5294 			 * the routine (and exit the loop).
5295 			 *
5296 			 */
5297 			mutex_exit(&connp->conn_lock);
5298 			return (-TADDRBUSY);
5299 		}
5300 
5301 		if (connp->conn_anon_priv_bind) {
5302 			port = udp_get_next_priv_port(udp);
5303 		} else {
5304 			if ((count == 0) && (requested_port != 0)) {
5305 				/*
5306 				 * If the application wants us to find
5307 				 * a port, get one to start with. Set
5308 				 * requested_port to 0, so that we will
5309 				 * update us->us_next_port_to_try below.
5310 				 */
5311 				port = udp_update_next_port(udp,
5312 				    us->us_next_port_to_try, B_TRUE);
5313 				requested_port = 0;
5314 			} else {
5315 				port = udp_update_next_port(udp, port + 1,
5316 				    B_FALSE);
5317 			}
5318 		}
5319 
5320 		if (port == 0 || ++count >= loopmax) {
5321 			/*
5322 			 * We've tried every possible port number and
5323 			 * there are none available, so send an error
5324 			 * to the user.
5325 			 */
5326 			mutex_exit(&connp->conn_lock);
5327 			return (-TNOADDR);
5328 		}
5329 	}
5330 
5331 	/*
5332 	 * Copy the source address into our udp structure.  This address
5333 	 * may still be zero; if so, ip_attr_connect will fill in the correct
5334 	 * address when a packet is about to be sent.
5335 	 * If we are binding to a broadcast or multicast address then
5336 	 * we just set the conn_bound_addr since we don't want to use
5337 	 * that as the source address when sending.
5338 	 */
5339 	connp->conn_bound_addr_v6 = v6src;
5340 	connp->conn_laddr_v6 = v6src;
5341 	if (scopeid != 0) {
5342 		connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET;
5343 		connp->conn_ixa->ixa_scopeid = scopeid;
5344 		connp->conn_incoming_ifindex = scopeid;
5345 	} else {
5346 		connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
5347 		connp->conn_incoming_ifindex = connp->conn_bound_if;
5348 	}
5349 
5350 	switch (laddr_type) {
5351 	case IPVL_UNICAST_UP:
5352 	case IPVL_UNICAST_DOWN:
5353 		connp->conn_saddr_v6 = v6src;
5354 		connp->conn_mcbc_bind = B_FALSE;
5355 		break;
5356 	case IPVL_MCAST:
5357 	case IPVL_BCAST:
5358 		/* ip_set_destination will pick a source address later */
5359 		connp->conn_saddr_v6 = ipv6_all_zeros;
5360 		connp->conn_mcbc_bind = B_TRUE;
5361 		break;
5362 	}
5363 
5364 	/* Any errors after this point should use late_error */
5365 	connp->conn_lport = lport;
5366 
5367 	/*
5368 	 * Now reset the next anonymous port if the application requested
5369 	 * an anonymous port, or we handed out the next anonymous port.
5370 	 */
5371 	if ((requested_port == 0) && (!connp->conn_anon_priv_bind)) {
5372 		us->us_next_port_to_try = port + 1;
5373 	}
5374 
5375 	/* Initialize the T_BIND_ACK. */
5376 	if (connp->conn_family == AF_INET) {
5377 		sin->sin_port = connp->conn_lport;
5378 	} else {
5379 		sin6->sin6_port = connp->conn_lport;
5380 	}
5381 	udp->udp_state = TS_IDLE;
5382 	udp_bind_hash_insert(udpf, udp);
5383 	mutex_exit(&udpf->uf_lock);
5384 	mutex_exit(&connp->conn_lock);
5385 
5386 	if (cl_inet_bind) {
5387 		/*
5388 		 * Running in cluster mode - register bind information
5389 		 */
5390 		if (connp->conn_ipversion == IPV4_VERSION) {
5391 			(*cl_inet_bind)(connp->conn_netstack->netstack_stackid,
5392 			    IPPROTO_UDP, AF_INET, (uint8_t *)&v4src,
5393 			    (in_port_t)connp->conn_lport, NULL);
5394 		} else {
5395 			(*cl_inet_bind)(connp->conn_netstack->netstack_stackid,
5396 			    IPPROTO_UDP, AF_INET6, (uint8_t *)&v6src,
5397 			    (in_port_t)connp->conn_lport, NULL);
5398 		}
5399 	}
5400 
5401 	mutex_enter(&connp->conn_lock);
5402 	connp->conn_anon_port = (is_system_labeled() && requested_port == 0);
5403 	if (is_system_labeled() && (!connp->conn_anon_port ||
5404 	    connp->conn_anon_mlp)) {
5405 		uint16_t mlpport;
5406 		zone_t *zone;
5407 
5408 		zone = crgetzone(cr);
5409 		connp->conn_mlp_type =
5410 		    connp->conn_recv_ancillary.crb_recvucred ? mlptBoth :
5411 		    mlptSingle;
5412 		addrtype = tsol_mlp_addr_type(
5413 		    connp->conn_allzones ? ALL_ZONES : zone->zone_id,
5414 		    IPV6_VERSION, &v6src, us->us_netstack->netstack_ip);
5415 		if (addrtype == mlptSingle) {
5416 			error = -TNOADDR;
5417 			mutex_exit(&connp->conn_lock);
5418 			goto late_error;
5419 		}
5420 		mlpport = connp->conn_anon_port ? PMAPPORT : port;
5421 		mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport,
5422 		    addrtype);
5423 
5424 		/*
5425 		 * It is a coding error to attempt to bind an MLP port
5426 		 * without first setting SOL_SOCKET/SCM_UCRED.
5427 		 */
5428 		if (mlptype != mlptSingle &&
5429 		    connp->conn_mlp_type == mlptSingle) {
5430 			error = EINVAL;
5431 			mutex_exit(&connp->conn_lock);
5432 			goto late_error;
5433 		}
5434 
5435 		/*
5436 		 * It is an access violation to attempt to bind an MLP port
5437 		 * without NET_BINDMLP privilege.
5438 		 */
5439 		if (mlptype != mlptSingle &&
5440 		    secpolicy_net_bindmlp(cr) != 0) {
5441 			if (connp->conn_debug) {
5442 				(void) strlog(UDP_MOD_ID, 0, 1,
5443 				    SL_ERROR|SL_TRACE,
5444 				    "udp_bind: no priv for multilevel port %d",
5445 				    mlpport);
5446 			}
5447 			error = -TACCES;
5448 			mutex_exit(&connp->conn_lock);
5449 			goto late_error;
5450 		}
5451 
5452 		/*
5453 		 * If we're specifically binding a shared IP address and the
5454 		 * port is MLP on shared addresses, then check to see if this
5455 		 * zone actually owns the MLP.  Reject if not.
5456 		 */
5457 		if (mlptype == mlptShared && addrtype == mlptShared) {
5458 			/*
5459 			 * No need to handle exclusive-stack zones since
5460 			 * ALL_ZONES only applies to the shared stack.
5461 			 */
5462 			zoneid_t mlpzone;
5463 
5464 			mlpzone = tsol_mlp_findzone(IPPROTO_UDP,
5465 			    htons(mlpport));
5466 			if (connp->conn_zoneid != mlpzone) {
5467 				if (connp->conn_debug) {
5468 					(void) strlog(UDP_MOD_ID, 0, 1,
5469 					    SL_ERROR|SL_TRACE,
5470 					    "udp_bind: attempt to bind port "
5471 					    "%d on shared addr in zone %d "
5472 					    "(should be %d)",
5473 					    mlpport, connp->conn_zoneid,
5474 					    mlpzone);
5475 				}
5476 				error = -TACCES;
5477 				mutex_exit(&connp->conn_lock);
5478 				goto late_error;
5479 			}
5480 		}
5481 		if (connp->conn_anon_port) {
5482 			error = tsol_mlp_anon(zone, mlptype, connp->conn_proto,
5483 			    port, B_TRUE);
5484 			if (error != 0) {
5485 				if (connp->conn_debug) {
5486 					(void) strlog(UDP_MOD_ID, 0, 1,
5487 					    SL_ERROR|SL_TRACE,
5488 					    "udp_bind: cannot establish anon "
5489 					    "MLP for port %d", port);
5490 				}
5491 				error = -TACCES;
5492 				mutex_exit(&connp->conn_lock);
5493 				goto late_error;
5494 			}
5495 		}
5496 		connp->conn_mlp_type = mlptype;
5497 	}
5498 
5499 	/*
5500 	 * We create an initial header template here to make a subsequent
5501 	 * sendto have a starting point. Since conn_last_dst is zero the
5502 	 * first sendto will always follow the 'dst changed' code path.
5503 	 * Note that we defer massaging options and the related checksum
5504 	 * adjustment until we have a destination address.
5505 	 */
5506 	error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5507 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5508 	if (error != 0) {
5509 		mutex_exit(&connp->conn_lock);
5510 		goto late_error;
5511 	}
5512 	/* Just in case */
5513 	connp->conn_faddr_v6 = ipv6_all_zeros;
5514 	connp->conn_fport = 0;
5515 	connp->conn_v6lastdst = ipv6_all_zeros;
5516 	mutex_exit(&connp->conn_lock);
5517 
5518 	error = ip_laddr_fanout_insert(connp);
5519 	if (error != 0)
5520 		goto late_error;
5521 
5522 	/* Bind succeeded */
5523 	return (0);
5524 
5525 late_error:
5526 	/* We had already picked the port number, and then the bind failed */
5527 	mutex_enter(&connp->conn_lock);
5528 	udpf = &us->us_bind_fanout[
5529 	    UDP_BIND_HASH(connp->conn_lport,
5530 	    us->us_bind_fanout_size)];
5531 	mutex_enter(&udpf->uf_lock);
5532 	connp->conn_saddr_v6 = ipv6_all_zeros;
5533 	connp->conn_bound_addr_v6 = ipv6_all_zeros;
5534 	connp->conn_laddr_v6 = ipv6_all_zeros;
5535 	if (scopeid != 0) {
5536 		connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
5537 		connp->conn_incoming_ifindex = connp->conn_bound_if;
5538 	}
5539 	udp->udp_state = TS_UNBND;
5540 	udp_bind_hash_remove(udp, B_TRUE);
5541 	connp->conn_lport = 0;
5542 	mutex_exit(&udpf->uf_lock);
5543 	connp->conn_anon_port = B_FALSE;
5544 	connp->conn_mlp_type = mlptSingle;
5545 
5546 	connp->conn_v6lastdst = ipv6_all_zeros;
5547 
5548 	/* Restore the header that was built above - different source address */
5549 	(void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5550 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5551 	mutex_exit(&connp->conn_lock);
5552 	return (error);
5553 }
5554 
5555 int
udp_bind(sock_lower_handle_t proto_handle,struct sockaddr * sa,socklen_t len,cred_t * cr)5556 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
5557     socklen_t len, cred_t *cr)
5558 {
5559 	int		error;
5560 	conn_t		*connp;
5561 
5562 	/* All Solaris components should pass a cred for this operation. */
5563 	ASSERT(cr != NULL);
5564 
5565 	connp = (conn_t *)proto_handle;
5566 
5567 	if (sa == NULL)
5568 		error = udp_do_unbind(connp);
5569 	else
5570 		error = udp_do_bind(connp, sa, len, cr, B_TRUE);
5571 
5572 	if (error < 0) {
5573 		if (error == -TOUTSTATE)
5574 			error = EINVAL;
5575 		else
5576 			error = proto_tlitosyserr(-error);
5577 	}
5578 
5579 	return (error);
5580 }
5581 
5582 static int
udp_implicit_bind(conn_t * connp,cred_t * cr)5583 udp_implicit_bind(conn_t *connp, cred_t *cr)
5584 {
5585 	sin6_t sin6addr;
5586 	sin_t *sin;
5587 	sin6_t *sin6;
5588 	socklen_t len;
5589 	int error;
5590 
5591 	/* All Solaris components should pass a cred for this operation. */
5592 	ASSERT(cr != NULL);
5593 
5594 	if (connp->conn_family == AF_INET) {
5595 		len = sizeof (struct sockaddr_in);
5596 		sin = (sin_t *)&sin6addr;
5597 		*sin = sin_null;
5598 		sin->sin_family = AF_INET;
5599 		sin->sin_addr.s_addr = INADDR_ANY;
5600 	} else {
5601 		ASSERT(connp->conn_family == AF_INET6);
5602 		len = sizeof (sin6_t);
5603 		sin6 = (sin6_t *)&sin6addr;
5604 		*sin6 = sin6_null;
5605 		sin6->sin6_family = AF_INET6;
5606 		V6_SET_ZERO(sin6->sin6_addr);
5607 	}
5608 
5609 	error = udp_do_bind(connp, (struct sockaddr *)&sin6addr, len,
5610 	    cr, B_FALSE);
5611 	return ((error < 0) ? proto_tlitosyserr(-error) : error);
5612 }
5613 
5614 /*
5615  * This routine removes a port number association from a stream. It
5616  * is called by udp_unbind and udp_tpi_unbind.
5617  */
5618 static int
udp_do_unbind(conn_t * connp)5619 udp_do_unbind(conn_t *connp)
5620 {
5621 	udp_t		*udp = connp->conn_udp;
5622 	udp_fanout_t	*udpf;
5623 	udp_stack_t	*us = udp->udp_us;
5624 
5625 	if (cl_inet_unbind != NULL) {
5626 		/*
5627 		 * Running in cluster mode - register unbind information
5628 		 */
5629 		if (connp->conn_ipversion == IPV4_VERSION) {
5630 			(*cl_inet_unbind)(
5631 			    connp->conn_netstack->netstack_stackid,
5632 			    IPPROTO_UDP, AF_INET,
5633 			    (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)),
5634 			    (in_port_t)connp->conn_lport, NULL);
5635 		} else {
5636 			(*cl_inet_unbind)(
5637 			    connp->conn_netstack->netstack_stackid,
5638 			    IPPROTO_UDP, AF_INET6,
5639 			    (uint8_t *)&(connp->conn_laddr_v6),
5640 			    (in_port_t)connp->conn_lport, NULL);
5641 		}
5642 	}
5643 
5644 	mutex_enter(&connp->conn_lock);
5645 	/* If a bind has not been done, we can't unbind. */
5646 	if (udp->udp_state == TS_UNBND) {
5647 		mutex_exit(&connp->conn_lock);
5648 		return (-TOUTSTATE);
5649 	}
5650 	udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
5651 	    us->us_bind_fanout_size)];
5652 	mutex_enter(&udpf->uf_lock);
5653 	udp_bind_hash_remove(udp, B_TRUE);
5654 	connp->conn_saddr_v6 = ipv6_all_zeros;
5655 	connp->conn_bound_addr_v6 = ipv6_all_zeros;
5656 	connp->conn_laddr_v6 = ipv6_all_zeros;
5657 	connp->conn_mcbc_bind = B_FALSE;
5658 	connp->conn_lport = 0;
5659 	/* In case we were also connected */
5660 	connp->conn_faddr_v6 = ipv6_all_zeros;
5661 	connp->conn_fport = 0;
5662 	mutex_exit(&udpf->uf_lock);
5663 
5664 	connp->conn_v6lastdst = ipv6_all_zeros;
5665 	udp->udp_state = TS_UNBND;
5666 
5667 	(void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5668 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5669 	mutex_exit(&connp->conn_lock);
5670 
5671 	ip_unbind(connp);
5672 
5673 	return (0);
5674 }
5675 
5676 /*
5677  * It associates a default destination address with the stream.
5678  */
5679 static int
udp_do_connect(conn_t * connp,const struct sockaddr * sa,socklen_t len,cred_t * cr,pid_t pid)5680 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
5681     cred_t *cr, pid_t pid)
5682 {
5683 	sin6_t		*sin6;
5684 	sin_t		*sin;
5685 	in6_addr_t	v6dst;
5686 	ipaddr_t	v4dst;
5687 	uint16_t	dstport;
5688 	uint32_t	flowinfo;
5689 	udp_fanout_t	*udpf;
5690 	udp_t		*udp, *udp1;
5691 	ushort_t	ipversion;
5692 	udp_stack_t	*us;
5693 	int		error;
5694 	conn_t		*connp1;
5695 	ip_xmit_attr_t	*ixa;
5696 	ip_xmit_attr_t	*oldixa;
5697 	uint_t		scopeid = 0;
5698 	uint_t		srcid = 0;
5699 	in6_addr_t	v6src = connp->conn_saddr_v6;
5700 	boolean_t	v4mapped;
5701 
5702 	udp = connp->conn_udp;
5703 	us = udp->udp_us;
5704 	sin = NULL;
5705 	sin6 = NULL;
5706 	v4dst = INADDR_ANY;
5707 	flowinfo = 0;
5708 
5709 	/*
5710 	 * Address has been verified by the caller
5711 	 */
5712 	switch (len) {
5713 	default:
5714 		/*
5715 		 * Should never happen
5716 		 */
5717 		return (EINVAL);
5718 
5719 	case sizeof (sin_t):
5720 		sin = (sin_t *)sa;
5721 		v4dst = sin->sin_addr.s_addr;
5722 		dstport = sin->sin_port;
5723 		IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
5724 		ASSERT(connp->conn_ipversion == IPV4_VERSION);
5725 		ipversion = IPV4_VERSION;
5726 		break;
5727 
5728 	case sizeof (sin6_t):
5729 		sin6 = (sin6_t *)sa;
5730 		v6dst = sin6->sin6_addr;
5731 		dstport = sin6->sin6_port;
5732 		srcid = sin6->__sin6_src_id;
5733 		v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst);
5734 		if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
5735 			if (!ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
5736 			    v4mapped, connp->conn_netstack)) {
5737 				/* Mismatch v4mapped/v6 specified by srcid. */
5738 				return (EADDRNOTAVAIL);
5739 			}
5740 		}
5741 		if (v4mapped) {
5742 			if (connp->conn_ipv6_v6only)
5743 				return (EADDRNOTAVAIL);
5744 
5745 			/*
5746 			 * Destination adress is mapped IPv6 address.
5747 			 * Source bound address should be unspecified or
5748 			 * IPv6 mapped address as well.
5749 			 */
5750 			if (!IN6_IS_ADDR_UNSPECIFIED(
5751 			    &connp->conn_bound_addr_v6) &&
5752 			    !IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) {
5753 				return (EADDRNOTAVAIL);
5754 			}
5755 			IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst);
5756 			ipversion = IPV4_VERSION;
5757 			flowinfo = 0;
5758 		} else {
5759 			ipversion = IPV6_VERSION;
5760 			flowinfo = sin6->sin6_flowinfo;
5761 			if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))
5762 				scopeid = sin6->sin6_scope_id;
5763 		}
5764 		break;
5765 	}
5766 
5767 	if (dstport == 0)
5768 		return (-TBADADDR);
5769 
5770 	/*
5771 	 * If there is a different thread using conn_ixa then we get a new
5772 	 * copy and cut the old one loose from conn_ixa. Otherwise we use
5773 	 * conn_ixa and prevent any other thread from using/changing it.
5774 	 * Once connect() is done other threads can use conn_ixa since the
5775 	 * refcnt will be back at one.
5776 	 * We defer updating conn_ixa until later to handle any concurrent
5777 	 * conn_ixa_cleanup thread.
5778 	 */
5779 	ixa = conn_get_ixa(connp, B_FALSE);
5780 	if (ixa == NULL)
5781 		return (ENOMEM);
5782 
5783 	mutex_enter(&connp->conn_lock);
5784 	/*
5785 	 * This udp_t must have bound to a port already before doing a connect.
5786 	 * Reject if a connect is in progress (we drop conn_lock during
5787 	 * udp_do_connect).
5788 	 */
5789 	if (udp->udp_state == TS_UNBND || udp->udp_state == TS_WCON_CREQ) {
5790 		mutex_exit(&connp->conn_lock);
5791 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
5792 		    "udp_connect: bad state, %u", udp->udp_state);
5793 		ixa_refrele(ixa);
5794 		return (-TOUTSTATE);
5795 	}
5796 	ASSERT(connp->conn_lport != 0 && udp->udp_ptpbhn != NULL);
5797 
5798 	udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
5799 	    us->us_bind_fanout_size)];
5800 
5801 	mutex_enter(&udpf->uf_lock);
5802 	if (udp->udp_state == TS_DATA_XFER) {
5803 		/* Already connected - clear out state */
5804 		if (connp->conn_mcbc_bind)
5805 			connp->conn_saddr_v6 = ipv6_all_zeros;
5806 		else
5807 			connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
5808 		connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
5809 		connp->conn_faddr_v6 = ipv6_all_zeros;
5810 		connp->conn_fport = 0;
5811 		udp->udp_state = TS_IDLE;
5812 	}
5813 
5814 	connp->conn_fport = dstport;
5815 	connp->conn_ipversion = ipversion;
5816 	if (ipversion == IPV4_VERSION) {
5817 		/*
5818 		 * Interpret a zero destination to mean loopback.
5819 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
5820 		 * generate the T_CONN_CON.
5821 		 */
5822 		if (v4dst == INADDR_ANY) {
5823 			v4dst = htonl(INADDR_LOOPBACK);
5824 			IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
5825 			if (connp->conn_family == AF_INET) {
5826 				sin->sin_addr.s_addr = v4dst;
5827 			} else {
5828 				sin6->sin6_addr = v6dst;
5829 			}
5830 		}
5831 		connp->conn_faddr_v6 = v6dst;
5832 		connp->conn_flowinfo = 0;
5833 	} else {
5834 		ASSERT(connp->conn_ipversion == IPV6_VERSION);
5835 		/*
5836 		 * Interpret a zero destination to mean loopback.
5837 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
5838 		 * generate the T_CONN_CON.
5839 		 */
5840 		if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) {
5841 			v6dst = ipv6_loopback;
5842 			sin6->sin6_addr = v6dst;
5843 		}
5844 		connp->conn_faddr_v6 = v6dst;
5845 		connp->conn_flowinfo = flowinfo;
5846 	}
5847 	mutex_exit(&udpf->uf_lock);
5848 
5849 	/*
5850 	 * We update our cred/cpid based on the caller of connect
5851 	 */
5852 	if (connp->conn_cred != cr) {
5853 		crhold(cr);
5854 		crfree(connp->conn_cred);
5855 		connp->conn_cred = cr;
5856 	}
5857 	connp->conn_cpid = pid;
5858 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
5859 	ixa->ixa_cred = cr;
5860 	ixa->ixa_cpid = pid;
5861 	if (is_system_labeled()) {
5862 		/* We need to restart with a label based on the cred */
5863 		ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
5864 	}
5865 
5866 	if (scopeid != 0) {
5867 		ixa->ixa_flags |= IXAF_SCOPEID_SET;
5868 		ixa->ixa_scopeid = scopeid;
5869 		connp->conn_incoming_ifindex = scopeid;
5870 	} else {
5871 		ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
5872 		connp->conn_incoming_ifindex = connp->conn_bound_if;
5873 	}
5874 	/*
5875 	 * conn_connect will drop conn_lock and reacquire it.
5876 	 * To prevent a send* from messing with this udp_t while the lock
5877 	 * is dropped we set udp_state and clear conn_v6lastdst.
5878 	 * That will make all send* fail with EISCONN.
5879 	 */
5880 	connp->conn_v6lastdst = ipv6_all_zeros;
5881 	udp->udp_state = TS_WCON_CREQ;
5882 
5883 	error = conn_connect(connp, NULL, IPDF_ALLOW_MCBC);
5884 	mutex_exit(&connp->conn_lock);
5885 	if (error != 0)
5886 		goto connect_failed;
5887 
5888 	/*
5889 	 * The addresses have been verified. Time to insert in
5890 	 * the correct fanout list.
5891 	 */
5892 	error = ipcl_conn_insert(connp);
5893 	if (error != 0)
5894 		goto connect_failed;
5895 
5896 	mutex_enter(&connp->conn_lock);
5897 	error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5898 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5899 	if (error != 0) {
5900 		mutex_exit(&connp->conn_lock);
5901 		goto connect_failed;
5902 	}
5903 
5904 	udp->udp_state = TS_DATA_XFER;
5905 	/* Record this as the "last" send even though we haven't sent any */
5906 	connp->conn_v6lastdst = connp->conn_faddr_v6;
5907 	connp->conn_lastipversion = connp->conn_ipversion;
5908 	connp->conn_lastdstport = connp->conn_fport;
5909 	connp->conn_lastflowinfo = connp->conn_flowinfo;
5910 	connp->conn_lastscopeid = scopeid;
5911 	connp->conn_lastsrcid = srcid;
5912 	/* Also remember a source to use together with lastdst */
5913 	connp->conn_v6lastsrc = v6src;
5914 
5915 	oldixa = conn_replace_ixa(connp, ixa);
5916 	mutex_exit(&connp->conn_lock);
5917 	ixa_refrele(oldixa);
5918 
5919 	/*
5920 	 * We've picked a source address above. Now we can
5921 	 * verify that the src/port/dst/port is unique for all
5922 	 * connections in TS_DATA_XFER, skipping ourselves.
5923 	 */
5924 	mutex_enter(&udpf->uf_lock);
5925 	for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) {
5926 		if (udp1->udp_state != TS_DATA_XFER)
5927 			continue;
5928 
5929 		if (udp1 == udp)
5930 			continue;
5931 
5932 		connp1 = udp1->udp_connp;
5933 		if (connp->conn_lport != connp1->conn_lport ||
5934 		    connp->conn_ipversion != connp1->conn_ipversion ||
5935 		    dstport != connp1->conn_fport ||
5936 		    !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
5937 		    &connp1->conn_laddr_v6) ||
5938 		    !IN6_ARE_ADDR_EQUAL(&v6dst, &connp1->conn_faddr_v6) ||
5939 		    !(IPCL_ZONE_MATCH(connp, connp1->conn_zoneid) ||
5940 		    IPCL_ZONE_MATCH(connp1, connp->conn_zoneid)))
5941 			continue;
5942 		mutex_exit(&udpf->uf_lock);
5943 		error = -TBADADDR;
5944 		goto connect_failed;
5945 	}
5946 	if (cl_inet_connect2 != NULL) {
5947 		CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error);
5948 		if (error != 0) {
5949 			mutex_exit(&udpf->uf_lock);
5950 			error = -TBADADDR;
5951 			goto connect_failed;
5952 		}
5953 	}
5954 	mutex_exit(&udpf->uf_lock);
5955 
5956 	ixa_refrele(ixa);
5957 	return (0);
5958 
5959 connect_failed:
5960 	if (ixa != NULL)
5961 		ixa_refrele(ixa);
5962 	mutex_enter(&connp->conn_lock);
5963 	mutex_enter(&udpf->uf_lock);
5964 	udp->udp_state = TS_IDLE;
5965 	connp->conn_faddr_v6 = ipv6_all_zeros;
5966 	connp->conn_fport = 0;
5967 	/* In case the source address was set above */
5968 	if (connp->conn_mcbc_bind)
5969 		connp->conn_saddr_v6 = ipv6_all_zeros;
5970 	else
5971 		connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
5972 	connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
5973 	mutex_exit(&udpf->uf_lock);
5974 
5975 	connp->conn_v6lastdst = ipv6_all_zeros;
5976 	connp->conn_flowinfo = 0;
5977 
5978 	(void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5979 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5980 	mutex_exit(&connp->conn_lock);
5981 	return (error);
5982 }
5983 
5984 static int
udp_connect(sock_lower_handle_t proto_handle,const struct sockaddr * sa,socklen_t len,sock_connid_t * id,cred_t * cr)5985 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
5986     socklen_t len, sock_connid_t *id, cred_t *cr)
5987 {
5988 	conn_t	*connp = (conn_t *)proto_handle;
5989 	udp_t	*udp = connp->conn_udp;
5990 	int	error;
5991 	boolean_t did_bind = B_FALSE;
5992 	pid_t	pid = curproc->p_pid;
5993 
5994 	/* All Solaris components should pass a cred for this operation. */
5995 	ASSERT(cr != NULL);
5996 
5997 	if (sa == NULL) {
5998 		/*
5999 		 * Disconnect
6000 		 * Make sure we are connected
6001 		 */
6002 		if (udp->udp_state != TS_DATA_XFER)
6003 			return (EINVAL);
6004 
6005 		error = udp_disconnect(connp);
6006 		return (error);
6007 	}
6008 
6009 	error = proto_verify_ip_addr(connp->conn_family, sa, len);
6010 	if (error != 0)
6011 		goto done;
6012 
6013 	/* do an implicit bind if necessary */
6014 	if (udp->udp_state == TS_UNBND) {
6015 		error = udp_implicit_bind(connp, cr);
6016 		/*
6017 		 * We could be racing with an actual bind, in which case
6018 		 * we would see EPROTO. We cross our fingers and try
6019 		 * to connect.
6020 		 */
6021 		if (!(error == 0 || error == EPROTO))
6022 			goto done;
6023 		did_bind = B_TRUE;
6024 	}
6025 	/*
6026 	 * set SO_DGRAM_ERRIND
6027 	 */
6028 	connp->conn_dgram_errind = B_TRUE;
6029 
6030 	error = udp_do_connect(connp, sa, len, cr, pid);
6031 
6032 	if (error != 0 && did_bind) {
6033 		int unbind_err;
6034 
6035 		unbind_err = udp_do_unbind(connp);
6036 		ASSERT(unbind_err == 0);
6037 	}
6038 
6039 	if (error == 0) {
6040 		*id = 0;
6041 		(*connp->conn_upcalls->su_connected)
6042 		    (connp->conn_upper_handle, 0, NULL, -1);
6043 	} else if (error < 0) {
6044 		error = proto_tlitosyserr(-error);
6045 	}
6046 
6047 done:
6048 	if (error != 0 && udp->udp_state == TS_DATA_XFER) {
6049 		/*
6050 		 * No need to hold locks to set state
6051 		 * after connect failure socket state is undefined
6052 		 * We set the state only to imitate old sockfs behavior
6053 		 */
6054 		udp->udp_state = TS_IDLE;
6055 	}
6056 	return (error);
6057 }
6058 
6059 int
udp_send(sock_lower_handle_t proto_handle,mblk_t * mp,struct nmsghdr * msg,cred_t * cr)6060 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
6061     cred_t *cr)
6062 {
6063 	sin6_t		*sin6;
6064 	sin_t		*sin = NULL;
6065 	uint_t		srcid;
6066 	conn_t		*connp = (conn_t *)proto_handle;
6067 	udp_t		*udp = connp->conn_udp;
6068 	int		error = 0;
6069 	udp_stack_t	*us = udp->udp_us;
6070 	ushort_t	ipversion;
6071 	pid_t		pid = curproc->p_pid;
6072 	ip_xmit_attr_t	*ixa;
6073 
6074 	ASSERT(DB_TYPE(mp) == M_DATA);
6075 
6076 	/* All Solaris components should pass a cred for this operation. */
6077 	ASSERT(cr != NULL);
6078 
6079 	/* do an implicit bind if necessary */
6080 	if (udp->udp_state == TS_UNBND) {
6081 		error = udp_implicit_bind(connp, cr);
6082 		/*
6083 		 * We could be racing with an actual bind, in which case
6084 		 * we would see EPROTO. We cross our fingers and try
6085 		 * to connect.
6086 		 */
6087 		if (!(error == 0 || error == EPROTO)) {
6088 			freemsg(mp);
6089 			return (error);
6090 		}
6091 	}
6092 
6093 	/* Connected? */
6094 	if (msg->msg_name == NULL) {
6095 		if (udp->udp_state != TS_DATA_XFER) {
6096 			UDPS_BUMP_MIB(us, udpOutErrors);
6097 			return (EDESTADDRREQ);
6098 		}
6099 		if (msg->msg_controllen != 0) {
6100 			error = udp_output_ancillary(connp, NULL, NULL, mp,
6101 			    NULL, msg, cr, pid);
6102 		} else {
6103 			error = udp_output_connected(connp, mp, cr, pid);
6104 		}
6105 		if (us->us_sendto_ignerr)
6106 			return (0);
6107 		else
6108 			return (error);
6109 	}
6110 	if (udp->udp_state == TS_DATA_XFER) {
6111 		UDPS_BUMP_MIB(us, udpOutErrors);
6112 		return (EISCONN);
6113 	}
6114 	error = proto_verify_ip_addr(connp->conn_family,
6115 	    (struct sockaddr *)msg->msg_name, msg->msg_namelen);
6116 	if (error != 0) {
6117 		UDPS_BUMP_MIB(us, udpOutErrors);
6118 		return (error);
6119 	}
6120 	switch (connp->conn_family) {
6121 	case AF_INET6:
6122 		sin6 = (sin6_t *)msg->msg_name;
6123 
6124 		srcid = sin6->__sin6_src_id;
6125 
6126 		if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
6127 			/*
6128 			 * Destination is a non-IPv4-compatible IPv6 address.
6129 			 * Send out an IPv6 format packet.
6130 			 */
6131 
6132 			/*
6133 			 * If the local address is a mapped address return
6134 			 * an error.
6135 			 * It would be possible to send an IPv6 packet but the
6136 			 * response would never make it back to the application
6137 			 * since it is bound to a mapped address.
6138 			 */
6139 			if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) {
6140 				UDPS_BUMP_MIB(us, udpOutErrors);
6141 				return (EADDRNOTAVAIL);
6142 			}
6143 			if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
6144 				sin6->sin6_addr = ipv6_loopback;
6145 			ipversion = IPV6_VERSION;
6146 		} else {
6147 			if (connp->conn_ipv6_v6only) {
6148 				UDPS_BUMP_MIB(us, udpOutErrors);
6149 				return (EADDRNOTAVAIL);
6150 			}
6151 
6152 			/*
6153 			 * If the local address is not zero or a mapped address
6154 			 * return an error.  It would be possible to send an
6155 			 * IPv4 packet but the response would never make it
6156 			 * back to the application since it is bound to a
6157 			 * non-mapped address.
6158 			 */
6159 			if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) &&
6160 			    !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
6161 				UDPS_BUMP_MIB(us, udpOutErrors);
6162 				return (EADDRNOTAVAIL);
6163 			}
6164 
6165 			if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) {
6166 				V4_PART_OF_V6(sin6->sin6_addr) =
6167 				    htonl(INADDR_LOOPBACK);
6168 			}
6169 			ipversion = IPV4_VERSION;
6170 		}
6171 
6172 		/*
6173 		 * We have to allocate an ip_xmit_attr_t before we grab
6174 		 * conn_lock and we need to hold conn_lock once we've check
6175 		 * conn_same_as_last_v6 to handle concurrent send* calls on a
6176 		 * socket.
6177 		 */
6178 		if (msg->msg_controllen == 0) {
6179 			ixa = conn_get_ixa(connp, B_FALSE);
6180 			if (ixa == NULL) {
6181 				UDPS_BUMP_MIB(us, udpOutErrors);
6182 				return (ENOMEM);
6183 			}
6184 		} else {
6185 			ixa = NULL;
6186 		}
6187 		mutex_enter(&connp->conn_lock);
6188 		if (udp->udp_delayed_error != 0) {
6189 			sin6_t  *sin2 = (sin6_t *)&udp->udp_delayed_addr;
6190 
6191 			error = udp->udp_delayed_error;
6192 			udp->udp_delayed_error = 0;
6193 
6194 			/* Compare IP address, port, and family */
6195 
6196 			if (sin6->sin6_port == sin2->sin6_port &&
6197 			    IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
6198 			    &sin2->sin6_addr) &&
6199 			    sin6->sin6_family == sin2->sin6_family) {
6200 				mutex_exit(&connp->conn_lock);
6201 				UDPS_BUMP_MIB(us, udpOutErrors);
6202 				if (ixa != NULL)
6203 					ixa_refrele(ixa);
6204 				return (error);
6205 			}
6206 		}
6207 
6208 		if (msg->msg_controllen != 0) {
6209 			mutex_exit(&connp->conn_lock);
6210 			ASSERT(ixa == NULL);
6211 			error = udp_output_ancillary(connp, NULL, sin6, mp,
6212 			    NULL, msg, cr, pid);
6213 		} else if (conn_same_as_last_v6(connp, sin6) &&
6214 		    connp->conn_lastsrcid == srcid &&
6215 		    ipsec_outbound_policy_current(ixa)) {
6216 			/* udp_output_lastdst drops conn_lock */
6217 			error = udp_output_lastdst(connp, mp, cr, pid, ixa);
6218 		} else {
6219 			/* udp_output_newdst drops conn_lock */
6220 			error = udp_output_newdst(connp, mp, NULL, sin6,
6221 			    ipversion, cr, pid, ixa);
6222 		}
6223 		ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
6224 		if (us->us_sendto_ignerr)
6225 			return (0);
6226 		else
6227 			return (error);
6228 	case AF_INET:
6229 		sin = (sin_t *)msg->msg_name;
6230 
6231 		ipversion = IPV4_VERSION;
6232 
6233 		if (sin->sin_addr.s_addr == INADDR_ANY)
6234 			sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
6235 
6236 		/*
6237 		 * We have to allocate an ip_xmit_attr_t before we grab
6238 		 * conn_lock and we need to hold conn_lock once we've check
6239 		 * conn_same_as_last_v6 to handle concurrent send* on a socket.
6240 		 */
6241 		if (msg->msg_controllen == 0) {
6242 			ixa = conn_get_ixa(connp, B_FALSE);
6243 			if (ixa == NULL) {
6244 				UDPS_BUMP_MIB(us, udpOutErrors);
6245 				return (ENOMEM);
6246 			}
6247 		} else {
6248 			ixa = NULL;
6249 		}
6250 		mutex_enter(&connp->conn_lock);
6251 		if (udp->udp_delayed_error != 0) {
6252 			sin_t  *sin2 = (sin_t *)&udp->udp_delayed_addr;
6253 
6254 			error = udp->udp_delayed_error;
6255 			udp->udp_delayed_error = 0;
6256 
6257 			/* Compare IP address and port */
6258 
6259 			if (sin->sin_port == sin2->sin_port &&
6260 			    sin->sin_addr.s_addr == sin2->sin_addr.s_addr) {
6261 				mutex_exit(&connp->conn_lock);
6262 				UDPS_BUMP_MIB(us, udpOutErrors);
6263 				if (ixa != NULL)
6264 					ixa_refrele(ixa);
6265 				return (error);
6266 			}
6267 		}
6268 		if (msg->msg_controllen != 0) {
6269 			mutex_exit(&connp->conn_lock);
6270 			ASSERT(ixa == NULL);
6271 			error = udp_output_ancillary(connp, sin, NULL, mp,
6272 			    NULL, msg, cr, pid);
6273 		} else if (conn_same_as_last_v4(connp, sin) &&
6274 		    ipsec_outbound_policy_current(ixa)) {
6275 			/* udp_output_lastdst drops conn_lock */
6276 			error = udp_output_lastdst(connp, mp, cr, pid, ixa);
6277 		} else {
6278 			/* udp_output_newdst drops conn_lock */
6279 			error = udp_output_newdst(connp, mp, sin, NULL,
6280 			    ipversion, cr, pid, ixa);
6281 		}
6282 		ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
6283 		if (us->us_sendto_ignerr)
6284 			return (0);
6285 		else
6286 			return (error);
6287 	default:
6288 		return (EINVAL);
6289 	}
6290 }
6291 
6292 int
udp_fallback(sock_lower_handle_t proto_handle,queue_t * q,boolean_t issocket,so_proto_quiesced_cb_t quiesced_cb,sock_quiesce_arg_t * arg)6293 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q,
6294     boolean_t issocket, so_proto_quiesced_cb_t quiesced_cb,
6295     sock_quiesce_arg_t *arg)
6296 {
6297 	conn_t	*connp = (conn_t *)proto_handle;
6298 	udp_t	*udp;
6299 	struct T_capability_ack tca;
6300 	struct sockaddr_in6 laddr, faddr;
6301 	socklen_t laddrlen, faddrlen;
6302 	short opts;
6303 	struct stroptions *stropt;
6304 	mblk_t *mp, *stropt_mp;
6305 	int error;
6306 
6307 	udp = connp->conn_udp;
6308 
6309 	stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL);
6310 
6311 	/*
6312 	 * setup the fallback stream that was allocated
6313 	 */
6314 	connp->conn_dev = (dev_t)RD(q)->q_ptr;
6315 	connp->conn_minor_arena = WR(q)->q_ptr;
6316 
6317 	RD(q)->q_ptr = WR(q)->q_ptr = connp;
6318 
6319 	WR(q)->q_qinfo = &udp_winit;
6320 
6321 	connp->conn_rq = RD(q);
6322 	connp->conn_wq = WR(q);
6323 
6324 	/* Notify stream head about options before sending up data */
6325 	stropt_mp->b_datap->db_type = M_SETOPTS;
6326 	stropt_mp->b_wptr += sizeof (*stropt);
6327 	stropt = (struct stroptions *)stropt_mp->b_rptr;
6328 	stropt->so_flags = SO_WROFF | SO_HIWAT;
6329 	stropt->so_wroff = connp->conn_wroff;
6330 	stropt->so_hiwat = udp->udp_rcv_disply_hiwat;
6331 	putnext(RD(q), stropt_mp);
6332 
6333 	/*
6334 	 * Free the helper stream
6335 	 */
6336 	ip_free_helper_stream(connp);
6337 
6338 	if (!issocket)
6339 		udp_use_pure_tpi(udp);
6340 
6341 	/*
6342 	 * Collect the information needed to sync with the sonode
6343 	 */
6344 	udp_do_capability_ack(udp, &tca, TC1_INFO);
6345 
6346 	laddrlen = faddrlen = sizeof (sin6_t);
6347 	(void) udp_getsockname((sock_lower_handle_t)connp,
6348 	    (struct sockaddr *)&laddr, &laddrlen, CRED());
6349 	error = udp_getpeername((sock_lower_handle_t)connp,
6350 	    (struct sockaddr *)&faddr, &faddrlen, CRED());
6351 	if (error != 0)
6352 		faddrlen = 0;
6353 
6354 	opts = 0;
6355 	if (connp->conn_dgram_errind)
6356 		opts |= SO_DGRAM_ERRIND;
6357 	if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE)
6358 		opts |= SO_DONTROUTE;
6359 
6360 	mp = (*quiesced_cb)(connp->conn_upper_handle, arg, &tca,
6361 	    (struct sockaddr *)&laddr, laddrlen,
6362 	    (struct sockaddr *)&faddr, faddrlen, opts);
6363 
6364 	mutex_enter(&udp->udp_recv_lock);
6365 	/*
6366 	 * Attempts to send data up during fallback will result in it being
6367 	 * queued in udp_t. First push up the datagrams obtained from the
6368 	 * socket, then any packets queued in udp_t.
6369 	 */
6370 	if (mp != NULL) {
6371 		mp->b_next = udp->udp_fallback_queue_head;
6372 		udp->udp_fallback_queue_head = mp;
6373 	}
6374 	while (udp->udp_fallback_queue_head != NULL) {
6375 		mp = udp->udp_fallback_queue_head;
6376 		udp->udp_fallback_queue_head = mp->b_next;
6377 		mutex_exit(&udp->udp_recv_lock);
6378 		mp->b_next = NULL;
6379 		putnext(RD(q), mp);
6380 		mutex_enter(&udp->udp_recv_lock);
6381 	}
6382 	udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head;
6383 	/*
6384 	 * No longer a streams less socket
6385 	 */
6386 	mutex_enter(&connp->conn_lock);
6387 	connp->conn_flags &= ~IPCL_NONSTR;
6388 	mutex_exit(&connp->conn_lock);
6389 
6390 	mutex_exit(&udp->udp_recv_lock);
6391 
6392 	ASSERT(connp->conn_ref >= 1);
6393 
6394 	return (0);
6395 }
6396 
6397 /* ARGSUSED3 */
6398 int
udp_getpeername(sock_lower_handle_t proto_handle,struct sockaddr * sa,socklen_t * salenp,cred_t * cr)6399 udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa,
6400     socklen_t *salenp, cred_t *cr)
6401 {
6402 	conn_t	*connp = (conn_t *)proto_handle;
6403 	udp_t	*udp = connp->conn_udp;
6404 	int error;
6405 
6406 	/* All Solaris components should pass a cred for this operation. */
6407 	ASSERT(cr != NULL);
6408 
6409 	mutex_enter(&connp->conn_lock);
6410 	if (udp->udp_state != TS_DATA_XFER)
6411 		error = ENOTCONN;
6412 	else
6413 		error = conn_getpeername(connp, sa, salenp);
6414 	mutex_exit(&connp->conn_lock);
6415 	return (error);
6416 }
6417 
6418 /* ARGSUSED3 */
6419 int
udp_getsockname(sock_lower_handle_t proto_handle,struct sockaddr * sa,socklen_t * salenp,cred_t * cr)6420 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa,
6421     socklen_t *salenp, cred_t *cr)
6422 {
6423 	conn_t	*connp = (conn_t *)proto_handle;
6424 	int error;
6425 
6426 	/* All Solaris components should pass a cred for this operation. */
6427 	ASSERT(cr != NULL);
6428 
6429 	mutex_enter(&connp->conn_lock);
6430 	error = conn_getsockname(connp, sa, salenp);
6431 	mutex_exit(&connp->conn_lock);
6432 	return (error);
6433 }
6434 
6435 int
udp_getsockopt(sock_lower_handle_t proto_handle,int level,int option_name,void * optvalp,socklen_t * optlen,cred_t * cr)6436 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
6437     void *optvalp, socklen_t *optlen, cred_t *cr)
6438 {
6439 	conn_t		*connp = (conn_t *)proto_handle;
6440 	int		error;
6441 	t_uscalar_t	max_optbuf_len;
6442 	void		*optvalp_buf;
6443 	int		len;
6444 
6445 	/* All Solaris components should pass a cred for this operation. */
6446 	ASSERT(cr != NULL);
6447 
6448 	error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len,
6449 	    udp_opt_obj.odb_opt_des_arr,
6450 	    udp_opt_obj.odb_opt_arr_cnt,
6451 	    B_FALSE, B_TRUE, cr);
6452 	if (error != 0) {
6453 		if (error < 0)
6454 			error = proto_tlitosyserr(-error);
6455 		return (error);
6456 	}
6457 
6458 	optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP);
6459 	len = udp_opt_get(connp, level, option_name, optvalp_buf);
6460 	if (len == -1) {
6461 		kmem_free(optvalp_buf, max_optbuf_len);
6462 		return (EINVAL);
6463 	}
6464 
6465 	/*
6466 	 * update optlen and copy option value
6467 	 */
6468 	t_uscalar_t size = MIN(len, *optlen);
6469 
6470 	bcopy(optvalp_buf, optvalp, size);
6471 	bcopy(&size, optlen, sizeof (size));
6472 
6473 	kmem_free(optvalp_buf, max_optbuf_len);
6474 	return (0);
6475 }
6476 
6477 int
udp_setsockopt(sock_lower_handle_t proto_handle,int level,int option_name,const void * optvalp,socklen_t optlen,cred_t * cr)6478 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
6479     const void *optvalp, socklen_t optlen, cred_t *cr)
6480 {
6481 	conn_t		*connp = (conn_t *)proto_handle;
6482 	int		error;
6483 
6484 	/* All Solaris components should pass a cred for this operation. */
6485 	ASSERT(cr != NULL);
6486 
6487 	error = proto_opt_check(level, option_name, optlen, NULL,
6488 	    udp_opt_obj.odb_opt_des_arr,
6489 	    udp_opt_obj.odb_opt_arr_cnt,
6490 	    B_TRUE, B_FALSE, cr);
6491 
6492 	if (error != 0) {
6493 		if (error < 0)
6494 			error = proto_tlitosyserr(-error);
6495 		return (error);
6496 	}
6497 
6498 	error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name,
6499 	    optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp,
6500 	    NULL, cr);
6501 
6502 	ASSERT(error >= 0);
6503 
6504 	return (error);
6505 }
6506 
6507 void
udp_clr_flowctrl(sock_lower_handle_t proto_handle)6508 udp_clr_flowctrl(sock_lower_handle_t proto_handle)
6509 {
6510 	conn_t	*connp = (conn_t *)proto_handle;
6511 	udp_t	*udp = connp->conn_udp;
6512 
6513 	mutex_enter(&udp->udp_recv_lock);
6514 	connp->conn_flow_cntrld = B_FALSE;
6515 	mutex_exit(&udp->udp_recv_lock);
6516 }
6517 
6518 /* ARGSUSED2 */
6519 int
udp_shutdown(sock_lower_handle_t proto_handle,int how,cred_t * cr)6520 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr)
6521 {
6522 	conn_t	*connp = (conn_t *)proto_handle;
6523 
6524 	/* All Solaris components should pass a cred for this operation. */
6525 	ASSERT(cr != NULL);
6526 
6527 	/* shut down the send side */
6528 	if (how != SHUT_RD)
6529 		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
6530 		    SOCK_OPCTL_SHUT_SEND, 0);
6531 	/* shut down the recv side */
6532 	if (how != SHUT_WR)
6533 		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
6534 		    SOCK_OPCTL_SHUT_RECV, 0);
6535 	return (0);
6536 }
6537 
6538 int
udp_ioctl(sock_lower_handle_t proto_handle,int cmd,intptr_t arg,int mode,int32_t * rvalp,cred_t * cr)6539 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
6540     int mode, int32_t *rvalp, cred_t *cr)
6541 {
6542 	conn_t		*connp = (conn_t *)proto_handle;
6543 	int		error;
6544 
6545 	/* All Solaris components should pass a cred for this operation. */
6546 	ASSERT(cr != NULL);
6547 
6548 	/*
6549 	 * If we don't have a helper stream then create one.
6550 	 * ip_create_helper_stream takes care of locking the conn_t,
6551 	 * so this check for NULL is just a performance optimization.
6552 	 */
6553 	if (connp->conn_helper_info == NULL) {
6554 		udp_stack_t *us = connp->conn_udp->udp_us;
6555 
6556 		ASSERT(us->us_ldi_ident != NULL);
6557 
6558 		/*
6559 		 * Create a helper stream for non-STREAMS socket.
6560 		 */
6561 		error = ip_create_helper_stream(connp, us->us_ldi_ident);
6562 		if (error != 0) {
6563 			ip0dbg(("udp_ioctl: create of IP helper stream "
6564 			    "failed %d\n", error));
6565 			return (error);
6566 		}
6567 	}
6568 
6569 	switch (cmd) {
6570 		case _SIOCSOCKFALLBACK:
6571 		case TI_GETPEERNAME:
6572 		case TI_GETMYNAME:
6573 			ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket",
6574 			    cmd));
6575 			error = EINVAL;
6576 			break;
6577 		default:
6578 			/*
6579 			 * Pass on to IP using helper stream
6580 			 */
6581 			error = ldi_ioctl(connp->conn_helper_info->iphs_handle,
6582 			    cmd, arg, mode, cr, rvalp);
6583 			break;
6584 	}
6585 	return (error);
6586 }
6587 
6588 /* ARGSUSED */
6589 int
udp_accept(sock_lower_handle_t lproto_handle,sock_lower_handle_t eproto_handle,sock_upper_handle_t sock_handle,cred_t * cr)6590 udp_accept(sock_lower_handle_t lproto_handle,
6591     sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle,
6592     cred_t *cr)
6593 {
6594 	return (EOPNOTSUPP);
6595 }
6596 
6597 /* ARGSUSED */
6598 int
udp_listen(sock_lower_handle_t proto_handle,int backlog,cred_t * cr)6599 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr)
6600 {
6601 	return (EOPNOTSUPP);
6602 }
6603 
6604 sock_downcalls_t sock_udp_downcalls = {
6605 	udp_activate,		/* sd_activate */
6606 	udp_accept,		/* sd_accept */
6607 	udp_bind,		/* sd_bind */
6608 	udp_listen,		/* sd_listen */
6609 	udp_connect,		/* sd_connect */
6610 	udp_getpeername,	/* sd_getpeername */
6611 	udp_getsockname,	/* sd_getsockname */
6612 	udp_getsockopt,		/* sd_getsockopt */
6613 	udp_setsockopt,		/* sd_setsockopt */
6614 	udp_send,		/* sd_send */
6615 	NULL,			/* sd_send_uio */
6616 	NULL,			/* sd_recv_uio */
6617 	NULL,			/* sd_poll */
6618 	udp_shutdown,		/* sd_shutdown */
6619 	udp_clr_flowctrl,	/* sd_setflowctrl */
6620 	udp_ioctl,		/* sd_ioctl */
6621 	udp_close		/* sd_close */
6622 };
6623